1 % (c) 2019-2022 Universitaet Duesseldorf
2 % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html)
3
4 :- module(operation_data_generator, [generate_operation_data_from_machine_path/4,
5 generate_data_from_machine_operation/6]).
6
7 :- use_module(library(sets), [subtract/3]).
8 :- use_module(library(random), [random/3]).
9 :- use_module(library(lists), [append/2,maplist/3,remove_dups/2,select/3]).
10
11 :- use_module(probsrc(bmachine)).
12 :- use_module(probsrc(solver_interface), [solve_predicate/5]).
13 :- use_module(probsrc(translate), [translate_bexpression/2]).
14 :- use_module(probsrc(preferences), [set_preference/2]).
15 :- use_module(probsrc(tools), [flatten/2]).
16 :- use_module(probsrc(module_information), [module_info/2]).
17 :- use_module(extrasrc(before_after_predicates), [before_after_predicate_for_operation/2]).
18 :- use_module(probsrc(bsyntaxtree), [conjunct_predicates/2,add_texpr_infos/3]).
19
20 :- use_module(synthesis('deep_learning/ground_truth')).
21 :- use_module(synthesis('deep_learning/b_machine_identifier_normalization')).
22 :- use_module(synthesis(synthesis_util), [get_precondition_from_operation_body/2,
23 b_get_typed_invariant_from_machine/1,
24 get_valid_inputs_for_operation/3,
25 create_equality_nodes_from_example/2,
26 get_output_nodes_from_bindings/2,
27 reduce_input_to_violating_vars/3]).
28
29 :- module_info(group, synthesis).
30 :- module_info(description, 'This module generates data to train a neural network that predicts the library components that are necessary to synthesize a B operation\'s substitution for given I/O examples.').
31
32 min_amount_of_examples(3).
33 max_amount_of_examples(16).
34
35 load_b_or_eventb_machine(AbsoluteFilePath) :-
36 is_classical_b_machine_path(AbsoluteFilePath),
37 !,
38 b_load_machine_from_file(AbsoluteFilePath),
39 b_machine_precompile.
40 load_b_or_eventb_machine(AbsoluteFilePath) :-
41 b_load_eventb_project(AbsoluteFilePath),
42 b_machine_precompile.
43
44 is_classical_b_machine_path(Path) :-
45 atom(Path),
46 atom_concat(_, Ext, Path),
47 Ext == '.mch'.
48
49 load_machine_for_data_generation(AbsoluteFilePath) :-
50 b_machine_reset,
51 set_preference(use_solver_on_load, prob),
52 load_b_or_eventb_machine(AbsoluteFilePath),
53 \+ current_machine_uses_records,
54 b_get_machine_variables(MachineVars),
55 MachineVars \== [].
56
57 get_all_operation_bodies_from_current_machine(NOperationBodies) :-
58 findall((OperationName,OpReturnVars,OperationBody), b_get_machine_operation(OperationName, OpReturnVars, _, OperationBody), OperationBodies),
59 skip_operations_using_records(OperationBodies, [], NOperationBodies).
60
61 % We do not need records and we do not synthesize them by now.
62 skip_operations_using_records([], Acc, Acc).
63 skip_operations_using_records([OperationBody|T], Acc, NOperationBodies) :-
64 \+ contains_record_type(OperationBody),
65 !,
66 skip_operations_using_records(T, [OperationBody|Acc], NOperationBodies).
67 skip_operations_using_records([_|T], Acc, NOperationBodies) :-
68 skip_operations_using_records(T, Acc, NOperationBodies).
69
70 get_number_random_list(0, Acc, Acc) :-
71 !.
72 get_number_random_list(C, Acc, L) :-
73 C1 is C-1,
74 get_random_amount_of_examples(R),
75 \+ member(R, Acc),
76 !,
77 get_number_random_list(C1, [R|Acc], L).
78 get_number_random_list(C, Acc, L) :-
79 get_number_random_list(C, Acc, L).
80
81 b_get_machine_operation_any_parameters(OperationName, Ids) :-
82 b_get_machine_operation(OperationName, _, _, Body),
83 b_get_machine_operation_any_parameters_aux([], Body, Ids),
84 !.
85 b_get_machine_operation_any_parameters(_, []).
86
87 b_get_machine_operation_any_parameters_aux(Acc, b(Node,_,_), Ids) :-
88 Node =.. [Functor,AnyIds|Args],
89 ( Functor = any
90 ; Functor = let
91 ),
92 !,
93 maplist(b_get_machine_operation_any_parameters_aux([]), Args, TArgIds),
94 flatten(TArgIds, ArgIds),
95 append([Acc,ArgIds,AnyIds], Ids).
96 b_get_machine_operation_any_parameters_aux(Acc, b(Node,_,_), Ids) :-
97 Node =.. [_|Args],
98 maplist(b_get_machine_operation_any_parameters_aux([]), Args, TArgIds),
99 flatten(TArgIds, ArgIds),
100 append(Acc, ArgIds, Ids).
101 b_get_machine_operation_any_parameters_aux(Acc, _, Acc).
102
103 %%%
104 generate_operation_data_from_machine_path(AbsoluteFilePath, Augmentations, SolverTimeoutMs, OperationData) :-
105 retractall(tools:id_counter(_)),
106 load_machine_for_data_generation(AbsoluteFilePath),
107 get_normalized_id_name_mapping(NormalizedSets, NormalizedIds, NOperationNames, NRecordFieldNames),
108 get_all_operation_bodies_from_current_machine(OperationBodies),
109 b_machine_name(MachineName),
110 Env = [[],NormalizedSets,NormalizedIds,NOperationNames,NRecordFieldNames],
111 set_preference(time_out, SolverTimeoutMs),
112 generate_data_from_operation_substitutions(Env, Augmentations, AbsoluteFilePath-MachineName, OperationBodies, _NewEnv, [], OperationData).
113
114 %% generate_data_from_machine_operation(+OperationName, +Augmentations, +SolverTimeOut, +AbsoluteFilePath, +MachineName, -OperationData).
115 % Assuming that the corresponding machine is loaded.
116 generate_data_from_machine_operation(OperationName, Augmentations, SolverTimeoutMs, AbsoluteFilePath, MachineName, OperationData) :-
117 retractall(tools:id_counter(_)),
118 load_b_machine_if_unloaded(AbsoluteFilePath),
119 set_preference(time_out, SolverTimeoutMs),
120 b_get_machine_operation(OperationName, OpReturnVars, _, OperationBody),
121 get_normalized_id_name_mapping(NormalizedSets, NormalizedIds, NOperationNames, NRecordFieldNames),
122 Env = [[],NormalizedSets,NormalizedIds,NOperationNames,NRecordFieldNames],
123 generate_data_from_operation_substitutions(Env, Augmentations, AbsoluteFilePath-MachineName, [(OperationName,OpReturnVars,OperationBody)], _, [], OperationData).
124
125 generate_data_from_operation_substitutions(_, _, _, [], _, Acc, Acc) :-
126 !.
127 generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, [(OperationName,OpReturnVars,OperationBody)|T], NewEnv, Acc, OperationData) :-
128 format('.', []),
129 b_get_machine_operation_typed_parameters(OperationName, Parameters),
130 b_get_machine_operation_any_parameters(OperationName, AnyParameters),
131 append(Parameters, AnyParameters, TNParameters),
132 findall(NP, ( member(P, TNParameters),
133 add_texpr_infos(P, [parameter], NP)
134 ), NParameters),
135 get_ground_truth_from_operation_body(OpReturnVars, OperationBody, NParameters, GroundTruth),
136 !,
137 get_number_random_list(AR, [], RandomA),
138 length(NParameters, ParameterAmount),
139 select((global_ground_truth_params,TGlobalGt), GroundTruth, RestGt),
140 extend_ground_truth(parameter, ParameterAmount, TGlobalGt, GlobalGt),
141 enumerate_ids([], 'P', NParameters, EParameters),
142 Env = [NormalizedStrings,NormalizedSets,NormalizedIds,NOperationNames,NRecordFieldNames],
143 append(EParameters, NormalizedIds, NNormalizedIds),
144 Env1 = [NormalizedStrings,NormalizedSets,NNormalizedIds,NOperationNames,NRecordFieldNames],
145 normalize_ids_in_ground_truth(NormalizedIds, [(global_ground_truth_params,GlobalGt)|RestGt], NGroundTruth),
146 generate_data_from_operation_substitutions_aux(Env1, 0, RandomA, AbsoluteFilePath-MachineName, OperationName, Env2, [], Data),
147 generate_data_from_operation_substitutions(Env2, AR, AbsoluteFilePath-MachineName, T, NewEnv, [(Data,OperationName,NGroundTruth)|Acc], OperationData),
148 !.
149 generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, [(_,_,_)|T], NewEnv, Acc, OperationData) :-
150 generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, T, NewEnv, Acc, OperationData),
151 !.
152
153 load_b_machine_if_unloaded(MachinePath) :-
154 bmachine_is_precompiled,
155 b_get_main_filename(LoadedMachinePath),
156 atom_concat(_, MachinePath, LoadedMachinePath),
157 !.
158 load_b_machine_if_unloaded(MachinePath) :-
159 b_load_machine_from_file(MachinePath),
160 b_machine_precompile.
161
162 % Generate several records for each operation using a varying amount of examples in order to obtain more data.
163 generate_data_from_operation_substitutions_aux(Env, _, [], _, _, Env, Acc, Acc) :-
164 !.
165 generate_data_from_operation_substitutions_aux(Env, _, [AmountOfExamples|AT], AbsoluteFilePath-MachineName, OperationName, NewEnv, Acc, Data) :-
166 get_valid_inputs_for_operation(OperationName, AmountOfExamples, InputExamples),
167 remove_dups(InputExamples, InputExamplesSet),
168 InputExamplesSet \== [],
169 InputExamplesSet \== [[]],
170 get_outputs_for_input_examples(Env, InputExamples, OperationName, InputOutputTuples, Env1),
171 !,
172 length(InputOutputTuples, Size),
173 min_amount_of_examples(MinExamples),
174 ( ( InputOutputTuples \== [],
175 Size >= MinExamples
176 )
177 ->
178 AtLeastOneSolution = 1,
179 NewAcc = [InputOutputTuples|Acc]
180 ; % skip this operation if no output examples could be generated or no ground truth can be inferred
181 AtLeastOneSolution = 0,
182 NewAcc = Acc
183 ),
184 generate_data_from_operation_substitutions_aux(Env1, AtLeastOneSolution, AT, AbsoluteFilePath-MachineName, OperationName, NewEnv, NewAcc, Data).
185 % no valid inputs can be generated
186 generate_data_from_operation_substitutions_aux(Env, AtLeastOneSolution, _, AbsoluteFilePath-_, OperationName, Env, Acc, Data) :-
187 AtLeastOneSolution == 0,
188 !,
189 format('~n~nWarning: No valid inputs could be generated for operation ~w in machine ~w.~n~n', [OperationName,AbsoluteFilePath]),
190 Data = Acc.
191 % at least one record could be generated but by excluding prior solutions we might not be able to generate as much records as initially planed
192 generate_data_from_operation_substitutions_aux(Env, _, _, _, _, Env, Acc, Acc).
193 %%%
194 normalize_ids_in_ground_truth(_, [], []).
195 normalize_ids_in_ground_truth(NormalizedIds, [(VarName,GroundTruth)|T], [(NVarName,GroundTruth)|NT]) :-
196 member((b(identifier(VarName),_,_),b(identifier(NVarName),_,_)), NormalizedIds),
197 !,
198 normalize_ids_in_ground_truth(NormalizedIds, T, NT).
199 normalize_ids_in_ground_truth(NormalizedIds, [(VarName,GroundTruth)|T], [(VarName,GroundTruth)|NT]) :-
200 !,
201 normalize_ids_in_ground_truth(NormalizedIds, T, NT).
202 normalize_ids_in_ground_truth(_, [H|_], _) :-
203 format("~nError: Ground truth accumulator ~w is not properly wrapped. Abort.~n~n", [H]),
204 !,
205 fail.
206
207 get_random_amount_of_examples(AmountOfExamples) :-
208 min_amount_of_examples(MinAmountOfExamples),
209 max_amount_of_examples(TempMaxAmountOfExamples),
210 MaxAmountOfExamples is TempMaxAmountOfExamples+1,
211 random(MinAmountOfExamples, MaxAmountOfExamples, AmountOfExamples),
212 !.
213
214 create_example_triple(Env, Ast, (NVarName,NType,Value)) :-
215 % (
216 Env = [_,NormalizedSets,_,_,_],
217 b_machine_identifier_normalization:normalize_ids_in_b_ast(Env, Ast, NAst, _), % TODO update env?
218 % ; trace , normalize_ids_in_b_ast(NormalizedSets,NormalizedIds,Ast,NAst)) ,
219 NAst = b(_,Type,NInfo),
220 normalize_id_type(NormalizedSets, Type, NType),
221 member(synthesis(machinevar,NVarName), NInfo),
222 translate_bexpression(NAst, Value).
223
224 get_outputs_for_input_examples(Env, InputExamples, OperationName, InputOutputTuples, NewEnv) :-
225 b_get_machine_operation(OperationName, _, _, Substitution),
226 get_precondition_from_operation_body(Substitution, Precondition),
227 b_get_typed_invariant_from_machine(Invariant),
228 conjunct_predicates([Invariant,Precondition], InvariantAndPrecondition),
229 get_outputs_for_input_examples(Env, InvariantAndPrecondition, InputExamples, OperationName, [], InputOutputTuples, NewEnv).
230
231 % Compute the corresponding output for each input example and return a list of I/O tuples.
232 get_outputs_for_input_examples(Env, _, [], _, Acc, Acc, Env).
233 get_outputs_for_input_examples(Env, InvariantAndPrecondition, [InputExampleAst|T], OperationName, Acc, InputOutputTuples, NewEnv) :-
234 create_equality_nodes_from_example(InputExampleAst, EqualityNodes),
235 conjunct_predicates(EqualityNodes, EqualityPredicate),
236 before_after_predicate_for_operation(OperationName, BeforeAfterPredicate),
237 conjunct_predicates([InvariantAndPrecondition,EqualityPredicate,BeforeAfterPredicate], BeforeAfterPredicateWithEqualities),
238 solve_predicate(BeforeAfterPredicateWithEqualities, _, 1, [force_evaluation], Solution),
239 Solution = solution(SolutionBindings),
240 get_output_nodes_from_bindings(SolutionBindings, OutputExampleAst),
241 \+ contains_symbolic_value(OutputExampleAst),
242 maplist(create_example_triple(Env), InputExampleAst, InputExample),
243 findall(Name, member((Name,_,_), InputExample), InputVarNames),
244 b_machine_identifier_normalization:map_normalize_ids_in_b_ast(Env, OutputExampleAst, OutputExampleAst2, Env1),
245 reduce_input_to_violating_vars(InputVarNames, OutputExampleAst2, OutputExampleAst3),
246 findall(N, ( member(b(_,_,I), OutputExampleAst3),
247 member(synthesis(machinevar,N), I)
248 ), OutputVarNames),
249 subtract(InputVarNames, OutputVarNames, Sub),
250 ( Sub == []
251 ->
252 maplist(create_example_triple(Env1), OutputExampleAst3, OutputExample),
253 get_outputs_for_input_examples(Env1, InvariantAndPrecondition, T, OperationName, [(InputExample,OutputExample)|Acc], InputOutputTuples, NewEnv)
254 ; format("Error: Missing Output value for at least one variable. Probably a bug in normalize_ids_in_b_ast/4.~n", []),
255 InputOutputTuples = Acc,
256 NewEnv = Env1
257 ).
258 get_outputs_for_input_examples(Env, _, _, _, Acc, Acc, Env).
259
260 contains_symbolic_value(LoE) :-
261 % TO DO: extend for lambda functions?
262 member(E, LoE),
263 contains_symbolic_value_aux(E).
264
265 contains_symbolic_value_aux(b(value(closure(_,_,b(member(_,_),_,_))),_,_)).
266 contains_symbolic_value_aux(b(Node,_,_)) :-
267 Node =.. [_|Args],
268 member(Arg, Args),
269 contains_symbolic_value_aux(Arg).