1 % (c) 2016-2023 Lehrstuhl fuer Softwaretechnik und Programmiersprachen,
2 % Heinrich Heine Universitaet Duesseldorf
3 % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html
4
5 :- module(xml2b,[convert_xml_to_b/4, convert_json_to_b/3]).
6
7 :- use_module(probsrc(module_information),[module_info/2]).
8 :- module_info(group,external_functions).
9 :- module_info(description,'This module transforms XML into a B representation.').
10
11 :- use_module(probsrc(xml_prob),[xml_parse/4]).
12 %:- use_module(library(xml),[xml_parse/3]).
13 :- use_module(library(lists)).
14 :- use_module(probsrc(custom_explicit_sets),[convert_to_avl/2]).
15 :- use_module(probsrc(error_manager)).
16 :- use_module(probsrc(debug)).
17
18 % type of generated XML elements:
19 /* struct(
20 recId: NATURAL1,
21 pId:NATURAL,
22 element:STRING,
23 attributes: STRING +-> STRING,
24 meta: STRING +-> STRING
25 )
26 */
27
28 % convert a code list to a B sequence/set of records
29 % Context is file name or string and is used for error messages only
30 convert_xml_to_b(Codes,BContent,Context,Span) :-
31 reset_counter(xml2b_current_line),
32 set_counter(xml2b_current_line,1),
33 peel_and_count_newlines(Codes,PeeledCodes),
34 (debug_mode(off) -> true
35 ; get_counter(xml2b_current_line,InitialLineNr),
36 format('XML starting at line: ~w of ~w~n',[InitialLineNr,Context])),
37 %statistics(walltime,[W1,_]),
38 xml_parse(PeeledCodes,xml(_Atts,Content),[format(false)],Span), % format(false) means we also see all newlines
39 %statistics(walltime,[W2,_]), W is W2-W1, formatsilent('% Walltime ~w ms to parse XML using Prolog library~n',[W]),
40 % xml_prob:xml_pp(xml(_Atts,Content)),nl,
41 % xml_parse(Codes2,xml(_Atts,Content),[]), format('XML:~n~s~n',[Codes2]),
42 %print(Content),nl,nl,
43 !,
44 ? (convert_xml_content_to_b(Content,0,1,_,Conversion,[]) -> BContent = Conversion
45 ; add_error(convert_xml_to_b,'Converting XML content to B failed:',Content,Span), % probably an internal error
46 fail
47 ).
48 convert_xml_to_b(_,_,Context,Span) :-
49 add_error(convert_xml_to_b,'XML Parsing failed, ensure that XML is valid in:',Context,Span),fail.
50
51
52 convert_xml_content_to_b([],_ParentId,NextId,OutId) --> {OutId=NextId}.
53 convert_xml_content_to_b([H|T],ParentId,NextId,OutId) -->
54 ? convert_xml_element(H,ParentId,NextId,Id2),
55 ? convert_xml_content_to_b(T,ParentId,Id2,OutId).
56
57 % TO DO: do we need to parse namespaces...
58 % TO DO: store meta-information
59 % TO DO: adapt line numbers when newlines inside initial tag: <?xml version="1.0" encoding="ASCII"?>
60 % TO DO: deal with new lines inside tags <Data version= "03.04"> : Prolog library hides those newlines !
61
62 convert_xml_element(Element, ParentId,NextId,OutId) -->
63 % {get_counter(xml2b_current_line,CurNr),nl,print(el(CurNr,Element)),nl},
64 {xml_element(Element,Tag,Attributes,Content,LineNr)},
65 !,
66 [(int(NextId),
67 rec([ % note: fields have to be in order
68 field(attributes,BAttributes), % partial function of Attributes
69 field(element,string(Tag)),
70 field(meta,MetaAttributes), % such as xmlLineNumber
71 field(pId,int(ParentId)), % id of the parent XML record
72 field(recId,int(NextId)) % id of the XML element/record
73 ])
74 )],
75 {Id2 is NextId+1,
76 number_codes(LineNr,LCC), atom_codes(LineNrAtom,LCC),
77 convert_to_avl([(string(xmlLineNumber),string(LineNrAtom))],MetaAttributes),
78 ? maplist(convert_xml_attributes,Attributes,BAttributesList),
79 convert_to_avl(BAttributesList,BAttributes)
80 },
81 ? convert_xml_content_to_b(Content,NextId,Id2,OutId).
82 convert_xml_element(namespace( URL, _, Element), ParentId,NextId,OutId) --> % what should we do with the name space ?
83 {format('*** Ignoring XML namespace annotation: ~w~n',[URL])},
84 ? convert_xml_element(Element, ParentId,NextId,OutId).
85 convert_xml_element(pcdata(_Codes),_,Id,Id) --> !. % should only happen when Codes consists solely of newlines
86 % no need to count_newlines; already done below in xml_element before C1 \= [].
87 ?convert_xml_element(comment(Codes),_,Id,Id) --> !,{count_newlines(Codes)}.
88 convert_xml_element(instructions(Name, Chars),_,Id,Id) --> !,
89 {format('Ignoring XML instructions annotation: ~w : ~w~n',[Name,Chars])}.
90 convert_xml_element(doctype(Tag, DoctypeId),_,Id,Id) --> !,
91 {format('Ignoring XML doctype annotation: ~w : ~w~n',[Tag,DoctypeId])}.
92 convert_xml_element(cdata(Codes),ParentId,Id,OutId) --> !,
93 % TO DO: tab expansion? see http://binding-time.co.uk/wiki/index.php/Parsing_XML_with_Prolog
94 convert_xml_element(pcdata(Codes),ParentId,Id,OutId).
95 convert_xml_element(El,_,Id,Id) -->
96 {format('*** Unknown XML element: ~w~n',[El])}.
97
98
99 xml_element(element(Tag,XAttributes,Content),Tag,Attributes,Content,LineNr) :-
100 get_counter(xml2b_current_line,CurLineNr),
101 ? (select('='('$attribute_linefeeds',LF),XAttributes,Rest)
102 -> LineNr is CurLineNr+LF, Attributes = Rest, %print(inc_line_nr(CurLineNr,LF)),nl,
103 set_counter(xml2b_current_line,LineNr)
104 ; LineNr = CurLineNr, Attributes=XAttributes).
105 xml_element(pcdata(Codes),'CText',['='(text,NewCodes)],[],LineNr) :-
106 ? peel_and_count_newlines(Codes,C1),
107 C1 \= [],
108 get_counter(xml2b_current_line,LineNr),
109 reverse(C1,RC),
110 peel_and_count_newlines(RC,RC2), % remove newlines at end; try avoid B strings with newlines in them
111 reverse(RC2,NewCodes).
112
113 convert_xml_attributes('='(Attr,Codes),(string(Attr),string(Atom))) :- !,atom_codes(Atom,Codes),
114 ? count_newlines(Codes).
115 convert_xml_attributes(A,(string(error),string(A))) :- format('**** UNKNOWN ATTRIBUTE: ~w~n',[A]).
116
117
118 :- use_module(extension('counter/counter'),
119 [counter_init/0, new_counter/1, get_counter/2, inc_counter/1, reset_counter/1, set_counter/2]).
120
121 count_newlines([]).
122 ?count_newlines(HT) :- newline(HT,T),!, inc_counter(xml2b_current_line), count_newlines(T).
123 ?count_newlines([_|T]) :- count_newlines(T).
124
125 % peel leading newlines
126 peel_and_count_newlines([],[]).
127 ?peel_and_count_newlines(HT,Res) :- newline(HT,T),!, inc_counter(xml2b_current_line), peel_and_count_newlines(T,Res).
128 ?peel_and_count_newlines([H|T],Res) :- whitespace(H),!, peel_and_count_newlines(T,Res).
129 peel_and_count_newlines(R,R).
130
131
132 newline([10|T],T).
133 newline([13|X],T) :- (X=[10|TX] -> T=TX ; T=X). % TO DO: should we check if we are on Windows ?
134
135 whitespace(9). % tab
136 whitespace(32).
137
138 xml2b_startup :- % call once at startup to ensure all counters exist
139 counter_init,
140 new_counter(xml2b_current_line).
141
142 :- use_module(probsrc(eventhandling),[register_event_listener/3]).
143 :- register_event_listener(startup_prob,xml2b_startup,
144 'Initialise xml2b Counters.').
145
146 % ------------------------------
147 % convert a format received by json_parser:json_parse_file into the same format as READ_XML
148
149 convert_json_to_b(json(List),BContent,Span) :- !,
150 (convert_json_content_to_b(['='(root,json(List),0-0)],0,1,_,0,Span,Conversion,[])
151 -> BContent=Conversion
152 ; add_error(convert_xml_to_b,'Converting JSON content to B failed:',List,Span), % probably an internal error
153 fail
154 ).
155 convert_json_to_b(T,_,Span) :-
156 add_error(convert_json_to_b,'Ilegal JSON term:',T,Span),fail.
157
158 convert_json_content_to_b([],_ParentId,NextId,OutId,_,_Span) --> {OutId=NextId}.
159 convert_json_content_to_b([H|T],ParentId,NextId,OutId,LineNr,Span) -->
160 convert_json_element(H,ParentId,NextId,Id2,LineNr,Span),
161 convert_json_content_to_b(T,ParentId,Id2,OutId,LineNr,Span).
162
163
164 :- use_module(probsrc(tools_strings),[ajoin/2]).
165 convert_json_element(Element, ParentId,NextId,OutId,_,Span) -->
166 {json_element(Element,Tag,BAttributesList,Content,LineNr)},
167 !,
168 [(int(NextId),
169 rec([ % note: fields have to be in order
170 field(attributes,BAttributes), % partial function of Attributes
171 field(element,string(Tag)),
172 field(meta,MetaAttributes), % such as xmlLineNumber
173 field(pId,int(ParentId)), % id of the parent XML record
174 field(recId,int(NextId)) % id of the XML element/record
175 ])
176 )],
177 {Id2 is NextId+1,
178 number_codes(LineNr,LCC), atom_codes(LineNrAtom,LCC),
179 convert_to_avl([(string(xmlLineNumber),string(LineNrAtom))],MetaAttributes),
180 convert_to_avl(BAttributesList,BAttributes)
181 },
182 convert_json_content_to_b(Content,NextId,Id2,OutId,LineNr,Span).
183 convert_json_element(El,_,Id,Id,LineNr,Span) -->
184 {ajoin(['Cannot translate JSON element at line ',LineNr,' to B:'],Msg),
185 add_error(convert_json_to_b,Msg,El,Span)}.
186
187 json_element(json(List),Tag,BAttributesList,Content,LineNr) :- !, % this happens for list items
188 Tag=list_item,
189 get_attributes(List,BAttributesList,Content),
190 get_line_nr(List,LineNr).
191 json_element('='(Attr,Atom,Meta),Tag,BAttributesList,Content,LineNr) :- atom(Atom),!,
192 Tag=list_item, BAttributesList=[(string(Attr),string(Atom))],
193 Content=[], get_line_nr(Meta,LineNr).
194 json_element('='(Tag,json(List),Meta),Tag,BAttributesList,Content,LineNr) :- !,
195 get_attributes(List,BAttributesList,Content), get_line_nr(Meta,LineNr).
196 json_element('='(Tag,List,Meta),Tag,BAttributesList,Content,LineNr) :- !, % we have a list of items
197 BAttributesList=[], get_line_nr(Meta,LineNr), l_add_meta(List,LineNr,Content).
198 json_element('='(Tag,Json),Tag,BAttributesList,Content,LineNr) :- !, % no meta info present
199 json_element('='(Tag,Json,0-0),Tag,BAttributesList,Content,LineNr).
200 json_element(atom(Atom,LineNr),Tag,BAttributesList,Content,LineNr) :-
201 BAttributesList=[], Content=[], Tag=Atom.
202
203 l_add_meta([],_,[]).
204 l_add_meta([H|T],LineNr,[MH|MT]) :-
205 add_meta(H,LineNr,MH),
206 l_add_meta(T,LineNr,MT).
207 add_meta(A,LineNr,Res) :- atom(A), !, Res= atom(A,LineNr).
208 add_meta(A,_,A).
209
210 get_line_nr(['='(_,_,Meta)|_],Line) :- !, get_line_nr(Meta,Line).
211 get_line_nr('-'(L,_),Line) :- !, Line=L.
212 get_line_nr(E,0) :- writeq(E), format('*** No position info: ~w~n',[E]).
213
214 get_attributes([],[],[]).
215 get_attributes(['='(Attr,Val,_Lines)|T],[(string(Attr),string(Atom))|TAttrs],Rest) :- atom(Attr),
216 number_to_atom(Val,Atom),!,
217 get_attributes(T,TAttrs,Rest).
218 get_attributes([F|T],Attrs,[F|TRest]) :-
219 get_attributes(T,Attrs,TRest).
220
221 number_to_atom(A,Atom) :- atom(A),!,Atom=A.
222 number_to_atom(N,Atom) :- number(N),!, number_codes(N,C), atom_codes(Atom,C).