1 :- module(code2vec, [
2 leaf_paths/2
3 ]).
4
5 % The code2vec module contains predicates to split a B AST into a list of
6 % paths leading from leaf to leaf.
7 % Such paths are the base for the code2vec algorithm by Alon et al.
8 %
9 % Uri Alon, Meital Zilberstein, Omer Levy, and Eran Yahav. 2019.
10 % Code2vec: learning distributed representations of code.
11 % Proc. ACM Program. Lang. 3, POPL, Article 40 (January 2019), 29 pages.
12 % https://doi.org/10.1145/3290353
13
14
15 :- use_module(probsrc(bsyntaxtree), [safe_syntaxelement/5,
16 syntaxtraversion/6]).
17 :- use_module(library(lists)).
18
19
20 %% leaf_paths(+AST, -Paths).
21 %
22 % AST is a B AST. Paths is a list of paths leading from leaf to leaf.
23 leaf_paths(BAst, Paths) :-
24 unwrapped_b(BAst, Expr, Subs, Constants),
25 paths_and_fragments(Expr, Subs, Constants, Paths, _).
26
27 %% paths_and_fragments(RootExpression, Subexpressions, Constants, Paths, Fragments).
28 %
29 % RootExpression: Node type of the currently considered AST's root node.
30 % Subexpressions: List of sub-ASTs from the current node.
31 % constants: Constants that belong to the current root node.
32 % Paths: List of paths from leaf to leaf in the considered AST.
33 % Fragments: List of path fragments (only leaf to root sequences).
34 %
35 % Note: If no constants are given, the empty list is used.
36 % However, if a constant is given, it is usually not wrapped in a list.
37 paths_and_fragments(Expr, [], [], [], [Expr]).
38 paths_and_fragments(Expr, [], C, [], Fragments) :-
39 constant_fragments(Expr, C, Fragments).
40 paths_and_fragments(Expr, [Sub], [], Paths, Fragments) :-
41 % Here we only have a linear sub-path, so no new paths are generated.
42 unwrapped_b(Sub, SubExpr, SubSubs, SubConstants),
43 paths_and_fragments(SubExpr, SubSubs, SubConstants, Paths, SubFragments),
44 % Need to add the expression to all of the fragments.
45 maplist(couple_fragment(Expr), SubFragments, Fragments).
46 paths_and_fragments(Expr, [Sub1W, Sub2W | Subs], Constants, Paths, Fragments) :-
47 % Here we have multiple sub-paths so we need to actually generate new paths.
48 % Idea is to get all paths and fragments, then join their fragments over the root.
49 sub_paths_and_fragments(Expr, [Sub1W, Sub2W | Subs], SubPaths, SubFragments),
50 joined_sub_paths(Expr, SubFragments, NewPaths),
51 % Consider constants if necessary.
52 (Constants \= [] ->
53 paths_and_fragments(Expr, [], Constants, _, CFragments)
54 ;
55 CFragments = []
56 ),
57 % Note: SubFragments is list of lists.
58 append([CFragments | SubFragments], Fragments),
59 append(SubPaths, NewPaths, Paths).
60
61 sub_paths_and_fragments(Expression, Subs, Paths, Fragments) :-
62 sub_paths_and_fragments_aux(Subs, Expression, Paths, Fragments).
63
64 sub_paths_and_fragments_aux([], _, [], []).
65 sub_paths_and_fragments_aux([Sub1 | Subs], Expr, SubPaths, [Sub1Fragments | SubFragments]) :-
66 paths_and_fragments(Expr, [Sub1], [], Sub1Paths, Sub1Fragments),
67 % Append to add variable at end of list, then finish list in tail recursion.
68 append(Sub1Paths, SubRPaths, SubPaths),
69 sub_paths_and_fragments_aux(Subs, Expr, SubRPaths, SubFragments).
70
71
72 %% unwrapped_b(BAst, Expr, Subs, Constants).
73 %
74 % BAst: B AST to unwrap.
75 % Expr: Node type of the currently considered AST's root node, e.g. 'member'.
76 % Subs: List of sub-ASTs from the current node.
77 % Constants: Constants that belong to the current root node.
78 unwrapped_b(Sub, Expr, Subs, Constants) :-
79 syntaxtraversion(Sub, BExpr, _, _, _, _),
80 safe_syntaxelement(BExpr, Subs, _, _, Constants),
81 BExpr =.. [Expr | _].
82
83
84 % Needed solely for the maplist call in paths_and_fragments/5.
85 couple_fragment(F1, [F2], F2-F1) :- !. % Special case that can arise.
86 couple_fragment(F1, F2, F2-F1).
87
88 %% joined_paths_over_root(Root, Fragments1, Fragments2, Paths).
89 %
90 % Generates paths from the given fragments.
91 % This works as joined_paths/4, but only considers fragments of the form
92 % 'a-...-n-Root'.
93 % The fragments are then joined at the given root into paths.
94 joined_paths_over_root(Root, Sub1Fragments, Sub2Fragments, Sub12Paths) :-
95 % Strip the root and pass it to joined_paths/4.
96 findall(Frag1, member(Frag1-Root, Sub1Fragments), Frag1s),
97 findall(Frag2, member(Frag2-Root, Sub2Fragments), Frag2s),
98 joined_paths(Root, Frag1s, Frag2s, Sub12Paths).
99
100
101 %% joined_sub_paths(Root, SubFragments, Paths).
102 %
103 % Joins each set of sub fragments with each other over the root as path.
104 %
105 % Root: Root node of the AST.
106 % SubFragments: List of list of fragments of the form 'a-...-n-Root'.
107 % Paths: List of resulting paths connected at the Root.
108 joined_sub_paths(Root, [Right | Left], Paths) :-
109 joined_sub_paths_aux([], Right, Left, [], Root, Paths).
110
111 %% joined_sub_paths_aux(Remaining, Curr, LeftOrCurr, RightOfCurr, Root, Paths).
112 joined_sub_paths_aux([], _, [], _, _, []).
113 joined_sub_paths_aux([], OldCurrent, [NewCurrent | Left], Right, Root, Paths) :-
114 NewRight = [OldCurrent | Right],
115 joined_sub_paths_aux(NewRight, NewCurrent, Left, NewRight, Root, Paths).
116 joined_sub_paths_aux([Frags|Rest], Cur, Left, Right, Root, Paths) :-
117 joined_paths_over_root(Root, Cur, Frags, NewPaths),
118 % Append to add variable at end of list, then finish list in tail recursion.
119 append(NewPaths, NextPaths, Paths),
120 joined_sub_paths_aux(Rest, Cur, Left, Right, Root, NextPaths).
121
122
123
124 %% joined_paths(Root, LeftFragments, RightFragments, Paths).
125 %
126 % Generates paths from the given fragments.
127 % It is assumed that all fragments on the left should lead up to the root
128 % and the right fragments should lead down from the root.
129 %
130 % For a root 'q', left fragment 'l-frag' and right fragment 'r-frag',
131 % the generated path is 'l-frag-root(q)-(frag-r)'.
132 joined_paths(Expr, Sub1Fragments, Sub2Fragments, Sub12Paths) :-
133 joined_paths_aux([], [], Expr, Sub1Fragments, Sub2Fragments, Sub12Paths).
134
135 joined_paths_aux([], _, _, _, [], []).
136 joined_paths_aux([], _, Expr, F1s, [F2|F2s], Paths) :-
137 % Still some F2 fragments left, so we need to couple them with all F1s.
138 joined_paths_aux(F1s, [F2|F2s], Expr, F1s, F2s, Paths).
139 joined_paths_aux([_|F1s], [], Expr, AllF1s, F2s, Paths) :-
140 % Exhausted all F2s for our current F1, so consider remaining F1s.
141 joined_paths_aux(F1s, F2s, Expr, AllF1s, F2s, Paths).
142 joined_paths_aux([F1 | Sub1Fragments], [F2 | Sub2Fragments], Expr, AllSub1, RestSub2, [Path | Paths]) :-
143 reverse_fragment(F1, RevF1),
144 Path = (F2)-root(Expr)-(RevF1),
145 joined_paths_aux(Sub1Fragments, [F2 | Sub2Fragments], Expr, AllSub1, RestSub2, Paths).
146
147
148 %% reverse_fragment(Fragment, ReversedFragment).
149 %
150 % Reverses a fragment of the form a-...-n to n-...-a.
151 reverse_fragment(Fs-F, RevFragment) :-
152 !,
153 reverse_fragment_aux(Fs, F, RevFragment).
154 reverse_fragment(F, F).
155
156 reverse_fragment_aux(Fs-F, Rev, RevFragment) :-
157 !,
158 reverse_fragment_aux(Fs, Rev-F, RevFragment).
159 reverse_fragment_aux(F, Rev, Rev-F).
160
161
162 %% constant_fragments(Expr, Constants, Fragments).
163
164 % Generates fragments for the given expression with all constants.
165 % As constants via bsyntaxtree:syntaxelement/5 may be wrapped as a list,
166 % the case of unwrapping constants in such cases is handled here as well.
167 constant_fragments(_, [], []) :- !.
168 constant_fragments(Expr, [C|Cs], [C-Expr|Fragments]) :-
169 !,
170 constant_fragments(Expr, Cs, Fragments).
171 constant_fragments(Expr, C, [C-Expr]).