1 :- module(data_generator, [generate_operation_data_from_machine_path/4]).
2
3 :- use_module(library(sets), [subtract/3]).
4 :- use_module(library(random), [random/3]).
5 :- use_module(library(lists), [append/2,maplist/3,remove_dups/2,select/3]).
6
7 :- use_module(probsrc(bmachine)).
8 :- use_module(probsrc(solver_interface), [solve_predicate/5]).
9 :- use_module(probsrc(translate), [translate_bexpression/2]).
10 :- use_module(probsrc(preferences), [set_preference/2]).
11 :- use_module(probsrc(tools), [flatten/2]).
12 :- use_module(probsrc(module_information), [module_info/2]).
13 :- use_module(probsrc(before_after_predicates), [before_after_predicate_for_operation/2]).
14 :- use_module(probsrc(bsyntaxtree), [conjunct_predicates/2,add_texpr_infos/3]).
15
16 :- use_module(synthesis('deep_learning/ground_truth')).
17 :- use_module(synthesis('deep_learning/b_machine_identifier_normalization')).
18 :- use_module(synthesis(synthesis_util), [get_precondition_from_operation_body/2,
19 b_get_typed_invariant_from_machine/1,
20 get_valid_inputs_for_operation/3,
21 create_equality_nodes_from_example/2,
22 get_output_nodes_from_bindings/2,
23 reduce_input_to_violating_vars/3]).
24
25 :- module_info(group, synthesis).
26 :- module_info(description, 'This module generates data to train a neural network that predicts the library components that are necessary to synthesize a B operation\'s substitution for given I/O examples.').
27
28 min_amount_of_examples(3).
29 max_amount_of_examples(16).
30
31 load_b_or_eventb_machine(AbsoluteFilePath) :-
32 is_classical_b_machine_path(AbsoluteFilePath),
33 !,
34 b_load_machine_from_file(AbsoluteFilePath),
35 b_machine_precompile.
36 load_b_or_eventb_machine(AbsoluteFilePath) :-
37 b_load_eventb_project(AbsoluteFilePath),
38 b_machine_precompile.
39 % TODO: set desired timeout from java
40 is_classical_b_machine_path(Path) :-
41 atom(Path),
42 atom_concat(_, Ext, Path),
43 Ext == '.mch'.
44
45 load_machine_for_data_generation(AbsoluteFilePath) :-
46 b_machine_reset,
47 set_preference(try_kodkod_on_load, false),
48 load_b_or_eventb_machine(AbsoluteFilePath),
49 \+ current_machine_uses_records,
50 b_get_machine_variables(MachineVars),
51 MachineVars \== [].
52
53 get_all_operation_bodies_from_current_machine(NOperationBodies) :-
54 findall((OperationName,OpReturnVars,OperationBody), b_get_machine_operation(OperationName, OpReturnVars, _, OperationBody), OperationBodies),
55 skip_operations_using_records(OperationBodies, [], NOperationBodies).
56
57 % We do not need records and we do not synthesize them by now.
58 skip_operations_using_records([], Acc, Acc).
59 skip_operations_using_records([OperationBody|T], Acc, NOperationBodies) :-
60 \+ contains_record_type(OperationBody),
61 !,
62 skip_operations_using_records(T, [OperationBody|Acc], NOperationBodies).
63 skip_operations_using_records([_|T], Acc, NOperationBodies) :-
64 skip_operations_using_records(T, Acc, NOperationBodies).
65
66 get_number_random_list(0, Acc, Acc) :-
67 !.
68 get_number_random_list(C, Acc, L) :-
69 C1 is C-1,
70 get_random_amount_of_examples(R),
71 \+ member(R, Acc),
72 !,
73 get_number_random_list(C1, [R|Acc], L).
74 get_number_random_list(C, Acc, L) :-
75 get_number_random_list(C, Acc, L).
76
77 b_get_machine_operation_any_parameters(OperationName, Ids) :-
78 b_get_machine_operation(OperationName, _, _, Body),
79 b_get_machine_operation_any_parameters_aux([], Body, Ids),
80 !.
81 b_get_machine_operation_any_parameters(_, []).
82
83 b_get_machine_operation_any_parameters_aux(Acc, b(Node,_,_), Ids) :-
84 Node =.. [Functor,AnyIds|Args],
85 ( Functor = any
86 ; Functor = let
87 ),
88 !,
89 maplist(b_get_machine_operation_any_parameters_aux([]), Args, TArgIds),
90 flatten(TArgIds, ArgIds),
91 append([Acc,ArgIds,AnyIds], Ids).
92 b_get_machine_operation_any_parameters_aux(Acc, b(Node,_,_), Ids) :-
93 Node =.. [_|Args],
94 maplist(b_get_machine_operation_any_parameters_aux([]), Args, TArgIds),
95 flatten(TArgIds, ArgIds),
96 append(Acc, ArgIds, Ids).
97 b_get_machine_operation_any_parameters_aux(Acc, _, Acc).
98
99 %%%
100 generate_operation_data_from_machine_path(AbsoluteFilePath, Augmentations, SolverTimeoutMs, OperationData) :-
101 retractall(tools:id_counter(_)),
102 load_machine_for_data_generation(AbsoluteFilePath),
103 get_normalized_id_name_mapping(NormalizedSets, NormalizedIds, NOperationNames, NRecordFieldNames),
104 get_all_operation_bodies_from_current_machine(OperationBodies),
105 b_machine_name(MachineName),
106 Env = [[],NormalizedSets,NormalizedIds,NOperationNames,NRecordFieldNames],
107 set_preference(time_out, SolverTimeoutMs),
108 generate_data_from_operation_substitutions(Env, Augmentations, AbsoluteFilePath-MachineName, OperationBodies, _NewEnv, [], OperationData).
109
110 generate_data_from_operation_substitutions(_, _, _, [], _, Acc, Acc) :-
111 !.
112 generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, [(OperationName,OpReturnVars,OperationBody)|T], NewEnv, Acc, OperationData) :-
113 format('.', []),
114 b_get_machine_operation_typed_parameters(OperationName, Parameters),
115 b_get_machine_operation_any_parameters(OperationName, AnyParameters),
116 append(Parameters, AnyParameters, TNParameters),
117 findall(NP, ( member(P, TNParameters),
118 add_texpr_infos(P, [parameter], NP)
119 ), NParameters),
120 get_ground_truth_from_operation_body(OpReturnVars, OperationBody, NParameters, GroundTruth),
121 !,
122 get_number_random_list(AR, [], RandomA),
123 length(NParameters, ParameterAmount),
124 select((global_ground_truth_params,TGlobalGt), GroundTruth, RestGt),
125 extend_ground_truth(parameter, ParameterAmount, TGlobalGt, GlobalGt),
126 enumerate_ids([], 'P', NParameters, EParameters),
127 Env = [NormalizedStrings,NormalizedSets,NormalizedIds,NOperationNames],
128 append(EParameters, NormalizedIds, NNormalizedIds),
129 Env1 = [NormalizedStrings,NormalizedSets,NNormalizedIds,NOperationNames],
130 normalize_ids_in_ground_truth(NormalizedIds, [(global_ground_truth_params,GlobalGt)|RestGt], NGroundTruth),
131 generate_data_from_operation_substitutions_aux(Env1, 0, RandomA, AbsoluteFilePath-MachineName, OperationName, Env2, [], Data),
132 generate_data_from_operation_substitutions(Env2, AR, AbsoluteFilePath-MachineName, T, NewEnv, [(Data,OperationName,NGroundTruth)|Acc], OperationData),
133 !.
134 generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, [(_,_,_)|T], NewEnv, Acc, OperationData) :-
135 generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, T, NewEnv, Acc, OperationData),
136 !.
137
138 % Generate several records for each operation using a varying amount of examples in order to obtain more data.
139 generate_data_from_operation_substitutions_aux(Env, _, [], _, _, Env, Acc, Acc) :-
140 !.
141 generate_data_from_operation_substitutions_aux(Env, _, [AmountOfExamples|AT], AbsoluteFilePath-MachineName, OperationName, NewEnv, Acc, Data) :-
142 get_valid_inputs_for_operation(OperationName, AmountOfExamples, InputExamples),
143 remove_dups(InputExamples, InputExamplesSet),
144 InputExamplesSet \== [],
145 InputExamplesSet \== [[]],
146 get_outputs_for_input_examples(Env, InputExamples, OperationName, InputOutputTuples, Env1),
147 !,
148 length(InputOutputTuples, Size),
149 min_amount_of_examples(MinExamples),
150 ( ( InputOutputTuples \== [],
151 Size >= MinExamples
152 )
153 ->
154 AtLeastOneSolution = 1,
155 NewAcc = [InputOutputTuples|Acc]
156 ; % skip this operation if no output examples could be generated or no ground truth can be inferred
157 AtLeastOneSolution = 0,
158 NewAcc = Acc
159 ),
160 generate_data_from_operation_substitutions_aux(Env1, AtLeastOneSolution, AT, AbsoluteFilePath-MachineName, OperationName, NewEnv, NewAcc, Data).
161 % no valid inputs can be generated
162 generate_data_from_operation_substitutions_aux(Env, AtLeastOneSolution, _, AbsoluteFilePath-_, OperationName, Env, Acc, Data) :-
163 AtLeastOneSolution == 0,
164 !,
165 format('~n~nWarning: No valid inputs could be generated for operation ~w in machine ~w.~n~n', [OperationName,AbsoluteFilePath]),
166 Data = Acc.
167 % at least one record could be generated but by excluding prior solutions we might not be able to generate as much records as initially planed
168 generate_data_from_operation_substitutions_aux(Env, _, _, _, _, Env, Acc, Acc).
169 %%%
170 normalize_ids_in_ground_truth(_, [], []).
171 normalize_ids_in_ground_truth(NormalizedIds, [(VarName,GroundTruth)|T], [(NVarName,GroundTruth)|NT]) :-
172 member((b(identifier(VarName),_,_),b(identifier(NVarName),_,_)), NormalizedIds),
173 !,
174 normalize_ids_in_ground_truth(NormalizedIds, T, NT).
175 normalize_ids_in_ground_truth(NormalizedIds, [(VarName,GroundTruth)|T], [(VarName,GroundTruth)|NT]) :-
176 !,
177 normalize_ids_in_ground_truth(NormalizedIds, T, NT).
178 normalize_ids_in_ground_truth(_, [H|_], _) :-
179 format("~nError: Ground truth accumulator ~w is not properly wrapped. Abort.~n~n", [H]),
180 !,
181 fail.
182
183 get_random_amount_of_examples(AmountOfExamples) :-
184 min_amount_of_examples(MinAmountOfExamples),
185 max_amount_of_examples(TempMaxAmountOfExamples),
186 MaxAmountOfExamples is TempMaxAmountOfExamples+1,
187 random(MinAmountOfExamples, MaxAmountOfExamples, AmountOfExamples),
188 !.
189
190 create_example_triple(Env, Ast, (Value,NType,NVarName)) :-
191 % (
192 Env = [_,NormalizedSets,_,_],
193 b_machine_identifier_normalization:normalize_ids_in_b_ast(Env, Ast, NAst, _), % TODO update env?
194 % ; trace , normalize_ids_in_b_ast(NormalizedSets,NormalizedIds,Ast,NAst)) ,
195 NAst = b(_,Type,NInfo),
196 normalize_id_type(NormalizedSets, Type, NType),
197 member(synthesis(machinevar,NVarName), NInfo),
198 translate_bexpression(NAst, Value).
199
200 get_outputs_for_input_examples(Env, InputExamples, OperationName, InputOutputTuples, NewEnv) :-
201 b_get_machine_operation(OperationName, _, _, Substitution),
202 get_precondition_from_operation_body(Substitution, Precondition),
203 b_get_typed_invariant_from_machine(Invariant),
204 conjunct_predicates([Invariant,Precondition], InvariantAndPrecondition),
205 get_outputs_for_input_examples(Env, InvariantAndPrecondition, InputExamples, OperationName, [], InputOutputTuples, NewEnv).
206
207 % Compute the corresponding output for each input example and return a list of I/O tuples.
208 get_outputs_for_input_examples(Env, _, [], _, Acc, Acc, Env).
209 get_outputs_for_input_examples(Env, InvariantAndPrecondition, [InputExampleAst|T], OperationName, Acc, InputOutputTuples, NewEnv) :-
210 create_equality_nodes_from_example(InputExampleAst, EqualityNodes),
211 conjunct_predicates(EqualityNodes, EqualityPredicate),
212 before_after_predicate_for_operation(OperationName, BeforeAfterPredicate),
213 conjunct_predicates([InvariantAndPrecondition,EqualityPredicate,BeforeAfterPredicate], BeforeAfterPredicateWithEqualities),
214 solve_predicate(BeforeAfterPredicateWithEqualities, _, 1, [force_evaluation], Solution),
215 Solution = solution(SolutionBindings),
216 get_output_nodes_from_bindings(SolutionBindings, OutputExampleAst),
217 \+ contains_symbolic_value(OutputExampleAst),
218 maplist(create_example_triple(Env), InputExampleAst, InputExample),
219 findall(Name, member((_,_,Name), InputExample), InputVarNames),
220 b_machine_identifier_normalization:map_normalize_ids_in_b_ast(Env, OutputExampleAst, OutputExampleAst2, Env1),
221 reduce_input_to_violating_vars(InputVarNames, OutputExampleAst2, OutputExampleAst3),
222 findall(N, ( member(b(_,_,I), OutputExampleAst3),
223 member(synthesis(machinevar,N), I)
224 ), OutputVarNames),
225 subtract(InputVarNames, OutputVarNames, Sub),
226 ( Sub == []
227 ->
228 maplist(create_example_triple(Env1), OutputExampleAst3, OutputExample),
229 get_outputs_for_input_examples(Env1, InvariantAndPrecondition, T, OperationName, [(InputExample,OutputExample)|Acc], InputOutputTuples, NewEnv)
230 ; format("Error: Missing Output value for at least one variable. Probably a bug in normalize_ids_in_b_ast/4.~n", []),
231 InputOutputTuples = Acc,
232 NewEnv = Env1
233 ).
234 get_outputs_for_input_examples(Env, _, _, _, Acc, Acc, Env).
235
236 contains_symbolic_value(LoE) :-
237 % TO DO: extend for lambda functions?
238 member(E, LoE),
239 contains_symbolic_value_aux(E).
240
241 contains_symbolic_value_aux(b(value(closure(_,_,b(member(_,_),_,_))),_,_)).
242 contains_symbolic_value_aux(b(Node,_,_)) :-
243 Node =.. [_|Args],
244 member(Arg, Args),
245 contains_symbolic_value_aux(Arg).