1 | :- module(code2vec, [ | |
2 | leaf_paths/2 | |
3 | ]). | |
4 | ||
5 | % The code2vec module contains predicates to split a B AST into a list of | |
6 | % paths leading from leaf to leaf. | |
7 | % Such paths are the base for the code2vec algorithm by Alon et al. | |
8 | % | |
9 | % Uri Alon, Meital Zilberstein, Omer Levy, and Eran Yahav. 2019. | |
10 | % Code2vec: learning distributed representations of code. | |
11 | % Proc. ACM Program. Lang. 3, POPL, Article 40 (January 2019), 29 pages. | |
12 | % https://doi.org/10.1145/3290353 | |
13 | ||
14 | ||
15 | :- use_module(probsrc(bsyntaxtree), [safe_syntaxelement/5, | |
16 | syntaxtraversion/6]). | |
17 | :- use_module(library(lists)). | |
18 | ||
19 | ||
20 | %% leaf_paths(+AST, -Paths). | |
21 | % | |
22 | % AST is a B AST. Paths is a list of paths leading from leaf to leaf. | |
23 | leaf_paths(BAst, Paths) :- | |
24 | unwrapped_b(BAst, Expr, Subs, Constants), | |
25 | paths_and_fragments(Expr, Subs, Constants, Paths, _). | |
26 | ||
27 | %% paths_and_fragments(RootExpression, Subexpressions, Constants, Paths, Fragments). | |
28 | % | |
29 | % RootExpression: Node type of the currently considered AST's root node. | |
30 | % Subexpressions: List of sub-ASTs from the current node. | |
31 | % constants: Constants that belong to the current root node. | |
32 | % Paths: List of paths from leaf to leaf in the considered AST. | |
33 | % Fragments: List of path fragments (only leaf to root sequences). | |
34 | % | |
35 | % Note: If no constants are given, the empty list is used. | |
36 | % However, if a constant is given, it is usually not wrapped in a list. | |
37 | paths_and_fragments(Expr, [], [], [], [Expr]). | |
38 | paths_and_fragments(Expr, [], C, [], Fragments) :- | |
39 | constant_fragments(Expr, C, Fragments). | |
40 | paths_and_fragments(Expr, [Sub], [], Paths, Fragments) :- | |
41 | % Here we only have a linear sub-path, so no new paths are generated. | |
42 | unwrapped_b(Sub, SubExpr, SubSubs, SubConstants), | |
43 | paths_and_fragments(SubExpr, SubSubs, SubConstants, Paths, SubFragments), | |
44 | % Need to add the expression to all of the fragments. | |
45 | maplist(couple_fragment(Expr), SubFragments, Fragments). | |
46 | paths_and_fragments(Expr, [Sub1W, Sub2W | Subs], Constants, Paths, Fragments) :- | |
47 | % Here we have multiple sub-paths so we need to actually generate new paths. | |
48 | % Idea is to get all paths and fragments, then join their fragments over the root. | |
49 | sub_paths_and_fragments(Expr, [Sub1W, Sub2W | Subs], SubPaths, SubFragments), | |
50 | joined_sub_paths(Expr, SubFragments, NewPaths), | |
51 | % Consider constants if necessary. | |
52 | (Constants \= [] -> | |
53 | paths_and_fragments(Expr, [], Constants, _, CFragments) | |
54 | ; | |
55 | CFragments = [] | |
56 | ), | |
57 | % Note: SubFragments is list of lists. | |
58 | append([CFragments | SubFragments], Fragments), | |
59 | append(SubPaths, NewPaths, Paths). | |
60 | ||
61 | sub_paths_and_fragments(Expression, Subs, Paths, Fragments) :- | |
62 | sub_paths_and_fragments_aux(Subs, Expression, Paths, Fragments). | |
63 | ||
64 | sub_paths_and_fragments_aux([], _, [], []). | |
65 | sub_paths_and_fragments_aux([Sub1 | Subs], Expr, SubPaths, [Sub1Fragments | SubFragments]) :- | |
66 | paths_and_fragments(Expr, [Sub1], [], Sub1Paths, Sub1Fragments), | |
67 | % Append to add variable at end of list, then finish list in tail recursion. | |
68 | append(Sub1Paths, SubRPaths, SubPaths), | |
69 | sub_paths_and_fragments_aux(Subs, Expr, SubRPaths, SubFragments). | |
70 | ||
71 | ||
72 | %% unwrapped_b(BAst, Expr, Subs, Constants). | |
73 | % | |
74 | % BAst: B AST to unwrap. | |
75 | % Expr: Node type of the currently considered AST's root node, e.g. 'member'. | |
76 | % Subs: List of sub-ASTs from the current node. | |
77 | % Constants: Constants that belong to the current root node. | |
78 | unwrapped_b(Sub, Expr, Subs, Constants) :- | |
79 | syntaxtraversion(Sub, BExpr, _, _, _, _), | |
80 | safe_syntaxelement(BExpr, Subs, _, _, Constants), | |
81 | BExpr =.. [Expr | _]. | |
82 | ||
83 | ||
84 | % Needed solely for the maplist call in paths_and_fragments/5. | |
85 | couple_fragment(F1, [F2], F2-F1) :- !. % Special case that can arise. | |
86 | couple_fragment(F1, F2, F2-F1). | |
87 | ||
88 | %% joined_paths_over_root(Root, Fragments1, Fragments2, Paths). | |
89 | % | |
90 | % Generates paths from the given fragments. | |
91 | % This works as joined_paths/4, but only considers fragments of the form | |
92 | % 'a-...-n-Root'. | |
93 | % The fragments are then joined at the given root into paths. | |
94 | joined_paths_over_root(Root, Sub1Fragments, Sub2Fragments, Sub12Paths) :- | |
95 | % Strip the root and pass it to joined_paths/4. | |
96 | findall(Frag1, member(Frag1-Root, Sub1Fragments), Frag1s), | |
97 | findall(Frag2, member(Frag2-Root, Sub2Fragments), Frag2s), | |
98 | joined_paths(Root, Frag1s, Frag2s, Sub12Paths). | |
99 | ||
100 | ||
101 | %% joined_sub_paths(Root, SubFragments, Paths). | |
102 | % | |
103 | % Joins each set of sub fragments with each other over the root as path. | |
104 | % | |
105 | % Root: Root node of the AST. | |
106 | % SubFragments: List of list of fragments of the form 'a-...-n-Root'. | |
107 | % Paths: List of resulting paths connected at the Root. | |
108 | joined_sub_paths(Root, [Right | Left], Paths) :- | |
109 | joined_sub_paths_aux([], Right, Left, [], Root, Paths). | |
110 | ||
111 | %% joined_sub_paths_aux(Remaining, Curr, LeftOrCurr, RightOfCurr, Root, Paths). | |
112 | joined_sub_paths_aux([], _, [], _, _, []). | |
113 | joined_sub_paths_aux([], OldCurrent, [NewCurrent | Left], Right, Root, Paths) :- | |
114 | NewRight = [OldCurrent | Right], | |
115 | joined_sub_paths_aux(NewRight, NewCurrent, Left, NewRight, Root, Paths). | |
116 | joined_sub_paths_aux([Frags|Rest], Cur, Left, Right, Root, Paths) :- | |
117 | joined_paths_over_root(Root, Cur, Frags, NewPaths), | |
118 | % Append to add variable at end of list, then finish list in tail recursion. | |
119 | append(NewPaths, NextPaths, Paths), | |
120 | joined_sub_paths_aux(Rest, Cur, Left, Right, Root, NextPaths). | |
121 | ||
122 | ||
123 | ||
124 | %% joined_paths(Root, LeftFragments, RightFragments, Paths). | |
125 | % | |
126 | % Generates paths from the given fragments. | |
127 | % It is assumed that all fragments on the left should lead up to the root | |
128 | % and the right fragments should lead down from the root. | |
129 | % | |
130 | % For a root 'q', left fragment 'l-frag' and right fragment 'r-frag', | |
131 | % the generated path is 'l-frag-root(q)-(frag-r)'. | |
132 | joined_paths(Expr, Sub1Fragments, Sub2Fragments, Sub12Paths) :- | |
133 | joined_paths_aux([], [], Expr, Sub1Fragments, Sub2Fragments, Sub12Paths). | |
134 | ||
135 | joined_paths_aux([], _, _, _, [], []). | |
136 | joined_paths_aux([], _, Expr, F1s, [F2|F2s], Paths) :- | |
137 | % Still some F2 fragments left, so we need to couple them with all F1s. | |
138 | joined_paths_aux(F1s, [F2|F2s], Expr, F1s, F2s, Paths). | |
139 | joined_paths_aux([_|F1s], [], Expr, AllF1s, F2s, Paths) :- | |
140 | % Exhausted all F2s for our current F1, so consider remaining F1s. | |
141 | joined_paths_aux(F1s, F2s, Expr, AllF1s, F2s, Paths). | |
142 | joined_paths_aux([F1 | Sub1Fragments], [F2 | Sub2Fragments], Expr, AllSub1, RestSub2, [Path | Paths]) :- | |
143 | reverse_fragment(F1, RevF1), | |
144 | Path = (F2)-root(Expr)-(RevF1), | |
145 | joined_paths_aux(Sub1Fragments, [F2 | Sub2Fragments], Expr, AllSub1, RestSub2, Paths). | |
146 | ||
147 | ||
148 | %% reverse_fragment(Fragment, ReversedFragment). | |
149 | % | |
150 | % Reverses a fragment of the form a-...-n to n-...-a. | |
151 | reverse_fragment(Fs-F, RevFragment) :- | |
152 | !, | |
153 | reverse_fragment_aux(Fs, F, RevFragment). | |
154 | reverse_fragment(F, F). | |
155 | ||
156 | reverse_fragment_aux(Fs-F, Rev, RevFragment) :- | |
157 | !, | |
158 | reverse_fragment_aux(Fs, Rev-F, RevFragment). | |
159 | reverse_fragment_aux(F, Rev, Rev-F). | |
160 | ||
161 | ||
162 | %% constant_fragments(Expr, Constants, Fragments). | |
163 | ||
164 | % Generates fragments for the given expression with all constants. | |
165 | % As constants via bsyntaxtree:syntaxelement/5 may be wrapped as a list, | |
166 | % the case of unwrapping constants in such cases is handled here as well. | |
167 | constant_fragments(_, [], []) :- !. | |
168 | constant_fragments(Expr, [C|Cs], [C-Expr|Fragments]) :- | |
169 | !, | |
170 | constant_fragments(Expr, Cs, Fragments). | |
171 | constant_fragments(Expr, C, [C-Expr]). |