1 | | % (c) 2016-2023 Lehrstuhl fuer Softwaretechnik und Programmiersprachen, |
2 | | % Heinrich Heine Universitaet Duesseldorf |
3 | | % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html |
4 | | |
5 | | :- module(xml2b,[convert_xml_to_b/4, convert_json_to_b/3]). |
6 | | |
7 | | :- use_module(probsrc(module_information),[module_info/2]). |
8 | | :- module_info(group,external_functions). |
9 | | :- module_info(description,'This module transforms XML into a B representation.'). |
10 | | |
11 | | :- use_module(probsrc(xml_prob),[xml_parse/4]). |
12 | | %:- use_module(library(xml),[xml_parse/3]). |
13 | | :- use_module(library(lists)). |
14 | | :- use_module(probsrc(custom_explicit_sets),[convert_to_avl/2]). |
15 | | :- use_module(probsrc(error_manager)). |
16 | | :- use_module(probsrc(debug)). |
17 | | |
18 | | % type of generated XML elements: |
19 | | /* struct( |
20 | | recId: NATURAL1, |
21 | | pId:NATURAL, |
22 | | element:STRING, |
23 | | attributes: STRING +-> STRING, |
24 | | meta: STRING +-> STRING |
25 | | ) |
26 | | */ |
27 | | |
28 | | % convert a code list to a B sequence/set of records |
29 | | % Context is file name or string and is used for error messages only |
30 | | convert_xml_to_b(Codes,BContent,Context,Span) :- |
31 | | reset_counter(xml2b_current_line), |
32 | | set_counter(xml2b_current_line,1), |
33 | | peel_and_count_newlines(Codes,PeeledCodes), |
34 | | (debug_mode(off) -> true |
35 | | ; get_counter(xml2b_current_line,InitialLineNr), |
36 | | format('XML starting at line: ~w of ~w~n',[InitialLineNr,Context])), |
37 | | %statistics(walltime,[W1,_]), |
38 | | xml_parse(PeeledCodes,xml(_Atts,Content),[format(false)],Span), % format(false) means we also see all newlines |
39 | | %statistics(walltime,[W2,_]), W is W2-W1, formatsilent('% Walltime ~w ms to parse XML using Prolog library~n',[W]), |
40 | | % xml_prob:xml_pp(xml(_Atts,Content)),nl, |
41 | | % xml_parse(Codes2,xml(_Atts,Content),[]), format('XML:~n~s~n',[Codes2]), |
42 | | %print(Content),nl,nl, |
43 | | !, |
44 | ? | (convert_xml_content_to_b(Content,0,1,_,Conversion,[]) -> BContent = Conversion |
45 | | ; add_error(convert_xml_to_b,'Converting XML content to B failed:',Content,Span), % probably an internal error |
46 | | fail |
47 | | ). |
48 | | convert_xml_to_b(_,_,Context,Span) :- |
49 | | add_error(convert_xml_to_b,'XML Parsing failed, ensure that XML is valid in:',Context,Span),fail. |
50 | | |
51 | | |
52 | | convert_xml_content_to_b([],_ParentId,NextId,OutId) --> {OutId=NextId}. |
53 | | convert_xml_content_to_b([H|T],ParentId,NextId,OutId) --> |
54 | ? | convert_xml_element(H,ParentId,NextId,Id2), |
55 | ? | convert_xml_content_to_b(T,ParentId,Id2,OutId). |
56 | | |
57 | | % TO DO: do we need to parse namespaces... |
58 | | % TO DO: store meta-information |
59 | | % TO DO: adapt line numbers when newlines inside initial tag: <?xml version="1.0" encoding="ASCII"?> |
60 | | % TO DO: deal with new lines inside tags <Data version= "03.04"> : Prolog library hides those newlines ! |
61 | | |
62 | | convert_xml_element(Element, ParentId,NextId,OutId) --> |
63 | | % {get_counter(xml2b_current_line,CurNr),nl,print(el(CurNr,Element)),nl}, |
64 | | {xml_element(Element,Tag,Attributes,Content,LineNr)}, |
65 | | !, |
66 | | [(int(NextId), |
67 | | rec([ % note: fields have to be in order |
68 | | field(attributes,BAttributes), % partial function of Attributes |
69 | | field(element,string(Tag)), |
70 | | field(meta,MetaAttributes), % such as xmlLineNumber |
71 | | field(pId,int(ParentId)), % id of the parent XML record |
72 | | field(recId,int(NextId)) % id of the XML element/record |
73 | | ]) |
74 | | )], |
75 | | {Id2 is NextId+1, |
76 | | number_codes(LineNr,LCC), atom_codes(LineNrAtom,LCC), |
77 | | convert_to_avl([(string(xmlLineNumber),string(LineNrAtom))],MetaAttributes), |
78 | ? | maplist(convert_xml_attributes,Attributes,BAttributesList), |
79 | | convert_to_avl(BAttributesList,BAttributes) |
80 | | }, |
81 | ? | convert_xml_content_to_b(Content,NextId,Id2,OutId). |
82 | | convert_xml_element(namespace( URL, _, Element), ParentId,NextId,OutId) --> % what should we do with the name space ? |
83 | | {format('*** Ignoring XML namespace annotation: ~w~n',[URL])}, |
84 | ? | convert_xml_element(Element, ParentId,NextId,OutId). |
85 | | convert_xml_element(pcdata(_Codes),_,Id,Id) --> !. % should only happen when Codes consists solely of newlines |
86 | | % no need to count_newlines; already done below in xml_element before C1 \= []. |
87 | ? | convert_xml_element(comment(Codes),_,Id,Id) --> !,{count_newlines(Codes)}. |
88 | | convert_xml_element(instructions(Name, Chars),_,Id,Id) --> !, |
89 | | {format('Ignoring XML instructions annotation: ~w : ~w~n',[Name,Chars])}. |
90 | | convert_xml_element(doctype(Tag, DoctypeId),_,Id,Id) --> !, |
91 | | {format('Ignoring XML doctype annotation: ~w : ~w~n',[Tag,DoctypeId])}. |
92 | | convert_xml_element(cdata(Codes),ParentId,Id,OutId) --> !, |
93 | | % TO DO: tab expansion? see http://binding-time.co.uk/wiki/index.php/Parsing_XML_with_Prolog |
94 | | convert_xml_element(pcdata(Codes),ParentId,Id,OutId). |
95 | | convert_xml_element(El,_,Id,Id) --> |
96 | | {format('*** Unknown XML element: ~w~n',[El])}. |
97 | | |
98 | | |
99 | | xml_element(element(Tag,XAttributes,Content),Tag,Attributes,Content,LineNr) :- |
100 | | get_counter(xml2b_current_line,CurLineNr), |
101 | ? | (select('='('$attribute_linefeeds',LF),XAttributes,Rest) |
102 | | -> LineNr is CurLineNr+LF, Attributes = Rest, %print(inc_line_nr(CurLineNr,LF)),nl, |
103 | | set_counter(xml2b_current_line,LineNr) |
104 | | ; LineNr = CurLineNr, Attributes=XAttributes). |
105 | | xml_element(pcdata(Codes),'CText',['='(text,NewCodes)],[],LineNr) :- |
106 | ? | peel_and_count_newlines(Codes,C1), |
107 | | C1 \= [], |
108 | | get_counter(xml2b_current_line,LineNr), |
109 | | reverse(C1,RC), |
110 | | peel_and_count_newlines(RC,RC2), % remove newlines at end; try avoid B strings with newlines in them |
111 | | reverse(RC2,NewCodes). |
112 | | |
113 | | convert_xml_attributes('='(Attr,Codes),(string(Attr),string(Atom))) :- !,atom_codes(Atom,Codes), |
114 | ? | count_newlines(Codes). |
115 | | convert_xml_attributes(A,(string(error),string(A))) :- format('**** UNKNOWN ATTRIBUTE: ~w~n',[A]). |
116 | | |
117 | | |
118 | | :- use_module(extension('counter/counter'), |
119 | | [counter_init/0, new_counter/1, get_counter/2, inc_counter/1, reset_counter/1, set_counter/2]). |
120 | | |
121 | | count_newlines([]). |
122 | ? | count_newlines(HT) :- newline(HT,T),!, inc_counter(xml2b_current_line), count_newlines(T). |
123 | ? | count_newlines([_|T]) :- count_newlines(T). |
124 | | |
125 | | % peel leading newlines |
126 | | peel_and_count_newlines([],[]). |
127 | ? | peel_and_count_newlines(HT,Res) :- newline(HT,T),!, inc_counter(xml2b_current_line), peel_and_count_newlines(T,Res). |
128 | ? | peel_and_count_newlines([H|T],Res) :- whitespace(H),!, peel_and_count_newlines(T,Res). |
129 | | peel_and_count_newlines(R,R). |
130 | | |
131 | | |
132 | | newline([10|T],T). |
133 | | newline([13|X],T) :- (X=[10|TX] -> T=TX ; T=X). % TO DO: should we check if we are on Windows ? |
134 | | |
135 | | whitespace(9). % tab |
136 | | whitespace(32). |
137 | | |
138 | | xml2b_startup :- % call once at startup to ensure all counters exist |
139 | | counter_init, |
140 | | new_counter(xml2b_current_line). |
141 | | |
142 | | :- use_module(probsrc(eventhandling),[register_event_listener/3]). |
143 | | :- register_event_listener(startup_prob,xml2b_startup, |
144 | | 'Initialise xml2b Counters.'). |
145 | | |
146 | | % ------------------------------ |
147 | | % convert a format received by json_parser:json_parse_file into the same format as READ_XML |
148 | | |
149 | | convert_json_to_b(json(List),BContent,Span) :- !, |
150 | | (convert_json_content_to_b(['='(root,json(List),0-0)],0,1,_,0,Span,Conversion,[]) |
151 | | -> BContent=Conversion |
152 | | ; add_error(convert_xml_to_b,'Converting JSON content to B failed:',List,Span), % probably an internal error |
153 | | fail |
154 | | ). |
155 | | convert_json_to_b(T,_,Span) :- |
156 | | add_error(convert_json_to_b,'Ilegal JSON term:',T,Span),fail. |
157 | | |
158 | | convert_json_content_to_b([],_ParentId,NextId,OutId,_,_Span) --> {OutId=NextId}. |
159 | | convert_json_content_to_b([H|T],ParentId,NextId,OutId,LineNr,Span) --> |
160 | | convert_json_element(H,ParentId,NextId,Id2,LineNr,Span), |
161 | | convert_json_content_to_b(T,ParentId,Id2,OutId,LineNr,Span). |
162 | | |
163 | | |
164 | | :- use_module(probsrc(tools_strings),[ajoin/2]). |
165 | | convert_json_element(Element, ParentId,NextId,OutId,_,Span) --> |
166 | | {json_element(Element,Tag,BAttributesList,Content,LineNr)}, |
167 | | !, |
168 | | [(int(NextId), |
169 | | rec([ % note: fields have to be in order |
170 | | field(attributes,BAttributes), % partial function of Attributes |
171 | | field(element,string(Tag)), |
172 | | field(meta,MetaAttributes), % such as xmlLineNumber |
173 | | field(pId,int(ParentId)), % id of the parent XML record |
174 | | field(recId,int(NextId)) % id of the XML element/record |
175 | | ]) |
176 | | )], |
177 | | {Id2 is NextId+1, |
178 | | number_codes(LineNr,LCC), atom_codes(LineNrAtom,LCC), |
179 | | convert_to_avl([(string(xmlLineNumber),string(LineNrAtom))],MetaAttributes), |
180 | | convert_to_avl(BAttributesList,BAttributes) |
181 | | }, |
182 | | convert_json_content_to_b(Content,NextId,Id2,OutId,LineNr,Span). |
183 | | convert_json_element(El,_,Id,Id,LineNr,Span) --> |
184 | | {ajoin(['Cannot translate JSON element at line ',LineNr,' to B:'],Msg), |
185 | | add_error(convert_json_to_b,Msg,El,Span)}. |
186 | | |
187 | | json_element(json(List),Tag,BAttributesList,Content,LineNr) :- !, % this happens for list items |
188 | | Tag=list_item, |
189 | | get_attributes(List,BAttributesList,Content), |
190 | | get_line_nr(List,LineNr). |
191 | | json_element('='(Attr,Atom,Meta),Tag,BAttributesList,Content,LineNr) :- atom(Atom),!, |
192 | | Tag=list_item, BAttributesList=[(string(Attr),string(Atom))], |
193 | | Content=[], get_line_nr(Meta,LineNr). |
194 | | json_element('='(Tag,json(List),Meta),Tag,BAttributesList,Content,LineNr) :- !, |
195 | | get_attributes(List,BAttributesList,Content), get_line_nr(Meta,LineNr). |
196 | | json_element('='(Tag,List,Meta),Tag,BAttributesList,Content,LineNr) :- !, % we have a list of items |
197 | | BAttributesList=[], get_line_nr(Meta,LineNr), l_add_meta(List,LineNr,Content). |
198 | | json_element('='(Tag,Json),Tag,BAttributesList,Content,LineNr) :- !, % no meta info present |
199 | | json_element('='(Tag,Json,0-0),Tag,BAttributesList,Content,LineNr). |
200 | | json_element(atom(Atom,LineNr),Tag,BAttributesList,Content,LineNr) :- |
201 | | BAttributesList=[], Content=[], Tag=Atom. |
202 | | |
203 | | l_add_meta([],_,[]). |
204 | | l_add_meta([H|T],LineNr,[MH|MT]) :- |
205 | | add_meta(H,LineNr,MH), |
206 | | l_add_meta(T,LineNr,MT). |
207 | | add_meta(A,LineNr,Res) :- atom(A), !, Res= atom(A,LineNr). |
208 | | add_meta(A,_,A). |
209 | | |
210 | | get_line_nr(['='(_,_,Meta)|_],Line) :- !, get_line_nr(Meta,Line). |
211 | | get_line_nr('-'(L,_),Line) :- !, Line=L. |
212 | | get_line_nr(E,0) :- writeq(E), format('*** No position info: ~w~n',[E]). |
213 | | |
214 | | get_attributes([],[],[]). |
215 | | get_attributes(['='(Attr,Val,_Lines)|T],[(string(Attr),string(Atom))|TAttrs],Rest) :- atom(Attr), |
216 | | number_to_atom(Val,Atom),!, |
217 | | get_attributes(T,TAttrs,Rest). |
218 | | get_attributes([F|T],Attrs,[F|TRest]) :- |
219 | | get_attributes(T,Attrs,TRest). |
220 | | |
221 | | number_to_atom(A,Atom) :- atom(A),!,Atom=A. |
222 | | number_to_atom(N,Atom) :- number(N),!, number_codes(N,C), atom_codes(Atom,C). |