1 | % (c) 2016-2023 Lehrstuhl fuer Softwaretechnik und Programmiersprachen, | |
2 | % Heinrich Heine Universitaet Duesseldorf | |
3 | % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html | |
4 | ||
5 | :- module(xml2b,[convert_xml_to_b/4, convert_json_to_b/3]). | |
6 | ||
7 | :- use_module(probsrc(module_information),[module_info/2]). | |
8 | :- module_info(group,external_functions). | |
9 | :- module_info(description,'This module transforms XML into a B representation.'). | |
10 | ||
11 | :- use_module(probsrc(xml_prob),[xml_parse/4]). | |
12 | %:- use_module(library(xml),[xml_parse/3]). | |
13 | :- use_module(library(lists)). | |
14 | :- use_module(probsrc(custom_explicit_sets),[convert_to_avl/2]). | |
15 | :- use_module(probsrc(error_manager)). | |
16 | :- use_module(probsrc(debug)). | |
17 | ||
18 | % type of generated XML elements: | |
19 | /* struct( | |
20 | recId: NATURAL1, | |
21 | pId:NATURAL, | |
22 | element:STRING, | |
23 | attributes: STRING +-> STRING, | |
24 | meta: STRING +-> STRING | |
25 | ) | |
26 | */ | |
27 | ||
28 | % convert a code list to a B sequence/set of records | |
29 | % Context is file name or string and is used for error messages only | |
30 | convert_xml_to_b(Codes,BContent,Context,Span) :- | |
31 | reset_counter(xml2b_current_line), | |
32 | set_counter(xml2b_current_line,1), | |
33 | peel_and_count_newlines(Codes,PeeledCodes), | |
34 | (debug_mode(off) -> true | |
35 | ; get_counter(xml2b_current_line,InitialLineNr), | |
36 | format('XML starting at line: ~w of ~w~n',[InitialLineNr,Context])), | |
37 | %statistics(walltime,[W1,_]), | |
38 | xml_parse(PeeledCodes,xml(_Atts,Content),[format(false)],Span), % format(false) means we also see all newlines | |
39 | %statistics(walltime,[W2,_]), W is W2-W1, formatsilent('% Walltime ~w ms to parse XML using Prolog library~n',[W]), | |
40 | % xml_prob:xml_pp(xml(_Atts,Content)),nl, | |
41 | % xml_parse(Codes2,xml(_Atts,Content),[]), format('XML:~n~s~n',[Codes2]), | |
42 | %print(Content),nl,nl, | |
43 | !, | |
44 | (convert_xml_content_to_b(Content,0,1,_,Conversion,[]) -> BContent = Conversion | |
45 | ; add_error(convert_xml_to_b,'Converting XML content to B failed:',Content,Span), % probably an internal error | |
46 | fail | |
47 | ). | |
48 | convert_xml_to_b(_,_,Context,Span) :- | |
49 | add_error(convert_xml_to_b,'XML Parsing failed, ensure that XML is valid in:',Context,Span),fail. | |
50 | ||
51 | ||
52 | convert_xml_content_to_b([],_ParentId,NextId,OutId) --> {OutId=NextId}. | |
53 | convert_xml_content_to_b([H|T],ParentId,NextId,OutId) --> | |
54 | convert_xml_element(H,ParentId,NextId,Id2), | |
55 | convert_xml_content_to_b(T,ParentId,Id2,OutId). | |
56 | ||
57 | % TO DO: do we need to parse namespaces... | |
58 | % TO DO: store meta-information | |
59 | % TO DO: adapt line numbers when newlines inside initial tag: <?xml version="1.0" encoding="ASCII"?> | |
60 | % TO DO: deal with new lines inside tags <Data version= "03.04"> : Prolog library hides those newlines ! | |
61 | ||
62 | convert_xml_element(Element, ParentId,NextId,OutId) --> | |
63 | % {get_counter(xml2b_current_line,CurNr),nl,print(el(CurNr,Element)),nl}, | |
64 | {xml_element(Element,Tag,Attributes,Content,LineNr)}, | |
65 | !, | |
66 | [(int(NextId), | |
67 | rec([ % note: fields have to be in order | |
68 | field(attributes,BAttributes), % partial function of Attributes | |
69 | field(element,string(Tag)), | |
70 | field(meta,MetaAttributes), % such as xmlLineNumber | |
71 | field(pId,int(ParentId)), % id of the parent XML record | |
72 | field(recId,int(NextId)) % id of the XML element/record | |
73 | ]) | |
74 | )], | |
75 | {Id2 is NextId+1, | |
76 | number_codes(LineNr,LCC), atom_codes(LineNrAtom,LCC), | |
77 | convert_to_avl([(string(xmlLineNumber),string(LineNrAtom))],MetaAttributes), | |
78 | maplist(convert_xml_attributes,Attributes,BAttributesList), | |
79 | convert_to_avl(BAttributesList,BAttributes) | |
80 | }, | |
81 | convert_xml_content_to_b(Content,NextId,Id2,OutId). | |
82 | convert_xml_element(namespace( URL, _, Element), ParentId,NextId,OutId) --> % what should we do with the name space ? | |
83 | {format('*** Ignoring XML namespace annotation: ~w~n',[URL])}, | |
84 | convert_xml_element(Element, ParentId,NextId,OutId). | |
85 | convert_xml_element(pcdata(_Codes),_,Id,Id) --> !. % should only happen when Codes consists solely of newlines | |
86 | % no need to count_newlines; already done below in xml_element before C1 \= []. | |
87 | convert_xml_element(comment(Codes),_,Id,Id) --> !,{count_newlines(Codes)}. | |
88 | convert_xml_element(instructions(Name, Chars),_,Id,Id) --> !, | |
89 | {format('Ignoring XML instructions annotation: ~w : ~w~n',[Name,Chars])}. | |
90 | convert_xml_element(doctype(Tag, DoctypeId),_,Id,Id) --> !, | |
91 | {format('Ignoring XML doctype annotation: ~w : ~w~n',[Tag,DoctypeId])}. | |
92 | convert_xml_element(cdata(Codes),ParentId,Id,OutId) --> !, | |
93 | % TO DO: tab expansion? see http://binding-time.co.uk/wiki/index.php/Parsing_XML_with_Prolog | |
94 | convert_xml_element(pcdata(Codes),ParentId,Id,OutId). | |
95 | convert_xml_element(El,_,Id,Id) --> | |
96 | {format('*** Unknown XML element: ~w~n',[El])}. | |
97 | ||
98 | ||
99 | xml_element(element(Tag,XAttributes,Content),Tag,Attributes,Content,LineNr) :- | |
100 | get_counter(xml2b_current_line,CurLineNr), | |
101 | (select('='('$attribute_linefeeds',LF),XAttributes,Rest) | |
102 | -> LineNr is CurLineNr+LF, Attributes = Rest, %print(inc_line_nr(CurLineNr,LF)),nl, | |
103 | set_counter(xml2b_current_line,LineNr) | |
104 | ; LineNr = CurLineNr, Attributes=XAttributes). | |
105 | xml_element(pcdata(Codes),'CText',['='(text,NewCodes)],[],LineNr) :- | |
106 | peel_and_count_newlines(Codes,C1), | |
107 | C1 \= [], | |
108 | get_counter(xml2b_current_line,LineNr), | |
109 | reverse(C1,RC), | |
110 | peel_and_count_newlines(RC,RC2), % remove newlines at end; try avoid B strings with newlines in them | |
111 | reverse(RC2,NewCodes). | |
112 | ||
113 | convert_xml_attributes('='(Attr,Codes),(string(Attr),string(Atom))) :- !,atom_codes(Atom,Codes), | |
114 | count_newlines(Codes). | |
115 | convert_xml_attributes(A,(string(error),string(A))) :- format('**** UNKNOWN ATTRIBUTE: ~w~n',[A]). | |
116 | ||
117 | ||
118 | :- use_module(extension('counter/counter'), | |
119 | [counter_init/0, new_counter/1, get_counter/2, inc_counter/1, reset_counter/1, set_counter/2]). | |
120 | ||
121 | count_newlines([]). | |
122 | count_newlines(HT) :- newline(HT,T),!, inc_counter(xml2b_current_line), count_newlines(T). | |
123 | count_newlines([_|T]) :- count_newlines(T). | |
124 | ||
125 | % peel leading newlines | |
126 | peel_and_count_newlines([],[]). | |
127 | peel_and_count_newlines(HT,Res) :- newline(HT,T),!, inc_counter(xml2b_current_line), peel_and_count_newlines(T,Res). | |
128 | peel_and_count_newlines([H|T],Res) :- whitespace(H),!, peel_and_count_newlines(T,Res). | |
129 | peel_and_count_newlines(R,R). | |
130 | ||
131 | ||
132 | newline([10|T],T). | |
133 | newline([13|X],T) :- (X=[10|TX] -> T=TX ; T=X). % TO DO: should we check if we are on Windows ? | |
134 | ||
135 | whitespace(9). % tab | |
136 | whitespace(32). | |
137 | ||
138 | xml2b_startup :- % call once at startup to ensure all counters exist | |
139 | counter_init, | |
140 | new_counter(xml2b_current_line). | |
141 | ||
142 | :- use_module(probsrc(eventhandling),[register_event_listener/3]). | |
143 | :- register_event_listener(startup_prob,xml2b_startup, | |
144 | 'Initialise xml2b Counters.'). | |
145 | ||
146 | % ------------------------------ | |
147 | % convert a format received by json_parser:json_parse_file into the same format as READ_XML | |
148 | ||
149 | convert_json_to_b(json(List),BContent,Span) :- !, | |
150 | (convert_json_content_to_b(['='(root,json(List),0-0)],0,1,_,0,Span,Conversion,[]) | |
151 | -> BContent=Conversion | |
152 | ; add_error(convert_xml_to_b,'Converting JSON content to B failed:',List,Span), % probably an internal error | |
153 | fail | |
154 | ). | |
155 | convert_json_to_b(T,_,Span) :- | |
156 | add_error(convert_json_to_b,'Ilegal JSON term:',T,Span),fail. | |
157 | ||
158 | convert_json_content_to_b([],_ParentId,NextId,OutId,_,_Span) --> {OutId=NextId}. | |
159 | convert_json_content_to_b([H|T],ParentId,NextId,OutId,LineNr,Span) --> | |
160 | convert_json_element(H,ParentId,NextId,Id2,LineNr,Span), | |
161 | convert_json_content_to_b(T,ParentId,Id2,OutId,LineNr,Span). | |
162 | ||
163 | ||
164 | :- use_module(probsrc(tools_strings),[ajoin/2]). | |
165 | convert_json_element(Element, ParentId,NextId,OutId,_,Span) --> | |
166 | {json_element(Element,Tag,BAttributesList,Content,LineNr)}, | |
167 | !, | |
168 | [(int(NextId), | |
169 | rec([ % note: fields have to be in order | |
170 | field(attributes,BAttributes), % partial function of Attributes | |
171 | field(element,string(Tag)), | |
172 | field(meta,MetaAttributes), % such as xmlLineNumber | |
173 | field(pId,int(ParentId)), % id of the parent XML record | |
174 | field(recId,int(NextId)) % id of the XML element/record | |
175 | ]) | |
176 | )], | |
177 | {Id2 is NextId+1, | |
178 | number_codes(LineNr,LCC), atom_codes(LineNrAtom,LCC), | |
179 | convert_to_avl([(string(xmlLineNumber),string(LineNrAtom))],MetaAttributes), | |
180 | convert_to_avl(BAttributesList,BAttributes) | |
181 | }, | |
182 | convert_json_content_to_b(Content,NextId,Id2,OutId,LineNr,Span). | |
183 | convert_json_element(El,_,Id,Id,LineNr,Span) --> | |
184 | {ajoin(['Cannot translate JSON element at line ',LineNr,' to B:'],Msg), | |
185 | add_error(convert_json_to_b,Msg,El,Span)}. | |
186 | ||
187 | json_element(json(List),Tag,BAttributesList,Content,LineNr) :- !, % this happens for list items | |
188 | Tag=list_item, | |
189 | get_attributes(List,BAttributesList,Content), | |
190 | get_line_nr(List,LineNr). | |
191 | json_element('='(Attr,Atom,Meta),Tag,BAttributesList,Content,LineNr) :- atom(Atom),!, | |
192 | Tag=list_item, BAttributesList=[(string(Attr),string(Atom))], | |
193 | Content=[], get_line_nr(Meta,LineNr). | |
194 | json_element('='(Tag,json(List),Meta),Tag,BAttributesList,Content,LineNr) :- !, | |
195 | get_attributes(List,BAttributesList,Content), get_line_nr(Meta,LineNr). | |
196 | json_element('='(Tag,List,Meta),Tag,BAttributesList,Content,LineNr) :- !, % we have a list of items | |
197 | BAttributesList=[], get_line_nr(Meta,LineNr), l_add_meta(List,LineNr,Content). | |
198 | json_element('='(Tag,Json),Tag,BAttributesList,Content,LineNr) :- !, % no meta info present | |
199 | json_element('='(Tag,Json,0-0),Tag,BAttributesList,Content,LineNr). | |
200 | json_element(atom(Atom,LineNr),Tag,BAttributesList,Content,LineNr) :- | |
201 | BAttributesList=[], Content=[], Tag=Atom. | |
202 | ||
203 | l_add_meta([],_,[]). | |
204 | l_add_meta([H|T],LineNr,[MH|MT]) :- | |
205 | add_meta(H,LineNr,MH), | |
206 | l_add_meta(T,LineNr,MT). | |
207 | add_meta(A,LineNr,Res) :- atom(A), !, Res= atom(A,LineNr). | |
208 | add_meta(A,_,A). | |
209 | ||
210 | get_line_nr(['='(_,_,Meta)|_],Line) :- !, get_line_nr(Meta,Line). | |
211 | get_line_nr('-'(L,_),Line) :- !, Line=L. | |
212 | get_line_nr(E,0) :- writeq(E), format('*** No position info: ~w~n',[E]). | |
213 | ||
214 | get_attributes([],[],[]). | |
215 | get_attributes(['='(Attr,Val,_Lines)|T],[(string(Attr),string(Atom))|TAttrs],Rest) :- atom(Attr), | |
216 | number_to_atom(Val,Atom),!, | |
217 | get_attributes(T,TAttrs,Rest). | |
218 | get_attributes([F|T],Attrs,[F|TRest]) :- | |
219 | get_attributes(T,Attrs,TRest). | |
220 | ||
221 | number_to_atom(A,Atom) :- atom(A),!,Atom=A. | |
222 | number_to_atom(N,Atom) :- number(N),!, number_codes(N,C), atom_codes(Atom,C). |