1 % (c) 2016-2019 Lehrstuhl fuer Softwaretechnik und Programmiersprachen,
2 % Heinrich Heine Universitaet Duesseldorf
3 % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html
4
5 :- module(xml2b,[convert_xml_to_b/2]).
6
7 :- use_module(module_information,[module_info/2]).
8 :- module_info(group,external_functions).
9 :- module_info(description,'This module transforms XML into a B representation.').
10
11 %:- use_module(library(xml)).
12 :- use_module(xml_prob).
13 :- use_module(library(lists)).
14 :- use_module(custom_explicit_sets,[convert_to_avl/2]).
15 :- use_module(error_manager).
16 :- use_module(debug).
17
18 % type of generated XML elements:
19 /* struct(
20 recId: NATURAL1,
21 pId:NATURAL,
22 element:STRING,
23 attributes: STRING +-> STRING,
24 meta: STRING +-> STRING
25 )
26 */
27
28 % convert a code list to a B sequence/set of records
29 convert_xml_to_b(Codes,BContent) :-
30 reset_counter(xml2b_current_line),
31 set_counter(xml2b_current_line,1),
32 peel_and_count_newlines(Codes,PeeledCodes),
33 (debug_mode(off) -> true
34 ; get_counter(xml2b_current_line,InitialLineNr), format('XML starting at line: ~w~n',[InitialLineNr])),
35 %statistics(walltime,[W1,_]),
36 xml_parse(PeeledCodes,xml(_Atts,Content),[format(false)]), % format(false) means we also see all newlines
37 %statistics(walltime,[W2,_]), W is W2-W1, formatsilent('% Walltime ~w ms to parse XML using Prolog library~n',[W]),
38 % xml_pp(xml(_Atts,Content)),nl,
39 % xml_parse(Codes2,xml(_Atts,Content),[]), format('XML:~n~s~n',[Codes2]),
40 %print(Content),nl,nl,
41 !,
42 convert_xml_content_to_b(Content,0,1,_,BContent,[]).
43 convert_xml_to_b(_,_) :- add_error(convert_xml_to_b,'XML Parsing failed'),fail.
44
45
46 convert_xml_content_to_b([],_ParentId,NextId,OutId) --> {OutId=NextId}.
47 convert_xml_content_to_b([H|T],ParentId,NextId,OutId) -->
48 convert_xml_element(H,ParentId,NextId,Id2),
49 convert_xml_content_to_b(T,ParentId,Id2,OutId).
50
51 % TO DO: do we need to parse namespaces...
52 % TO DO: store meta-information
53 % TO DO: adapt line numbers when newlines inside initial tag: <?xml version="1.0" encoding="ASCII"?>
54 % TO DO: deal with new lines inside tags <Data version= "03.04"> : Prolog library hides those newlines !
55
56 convert_xml_element(Element, ParentId,NextId,OutId) -->
57 % {get_counter(xml2b_current_line,CurNr),nl,print(el(CurNr,Element)),nl},
58 {xml_element(Element,Tag,Attributes,Content,LineNr)},
59 !,
60 [(int(NextId),
61 rec([ % note: fields have to be in order
62 field(attributes,BAttributes), % partial function of Attributes
63 field(element,string(Tag)),
64 field(meta,MetaAttributes), % such as xmlLineNumber
65 field(pId,int(ParentId)), % id of the parent XML record
66 field(recId,int(NextId)) % id of the XML element/record
67 ])
68 )],
69 {Id2 is NextId+1,
70 number_codes(LineNr,LCC), atom_codes(LineNrAtom,LCC),
71 convert_to_avl([(string(xmlLineNumber),string(LineNrAtom))],MetaAttributes),
72 maplist(convert_xml_attributes,Attributes,BAttributesList),
73 convert_to_avl(BAttributesList,BAttributes)
74 },
75 convert_xml_content_to_b(Content,NextId,Id2,OutId).
76 convert_xml_element(namespace( URL, _, Element), ParentId,NextId,OutId) --> % what should we do with the name space ?
77 {format('*** Ignoring XML namespace annotation: ~w~n',[URL])},
78 convert_xml_element(Element, ParentId,NextId,OutId).
79 convert_xml_element(pcdata(_Codes),_,Id,Id) --> !. % should only happen when Codes consists solely of newlines
80 % no need to count_newlines; already done below in xml_element before C1 \= [].
81 convert_xml_element(comment(Codes),_,Id,Id) --> !,{count_newlines(Codes)}.
82 convert_xml_element(instructions(Name, Chars),_,Id,Id) --> !,
83 {format('Ignoring XML instructions annotation: ~w : ~w~n',[Name,Chars])}.
84 convert_xml_element(doctype(Tag, DoctypeId),_,Id,Id) --> !,
85 {format('Ignoring XML doctype annotation: ~w : ~w~n',[Tag,DoctypeId])}.
86 convert_xml_element(cdata(Codes),ParentId,Id,OutId) --> !,
87 % TO DO: tab expansion? see http://binding-time.co.uk/wiki/index.php/Parsing_XML_with_Prolog
88 convert_xml_element(pcdata(Codes),ParentId,Id,OutId).
89 convert_xml_element(El,_,Id,Id) -->
90 {format('*** Unknown XML element: ~w~n',[El])}.
91
92
93 xml_element(element(Tag,XAttributes,Content),Tag,Attributes,Content,LineNr) :-
94 get_counter(xml2b_current_line,CurLineNr),
95 (select('='('$attribute_linefeeds',LF),XAttributes,Rest)
96 -> LineNr is CurLineNr+LF, Attributes = Rest, %print(inc_line_nr(CurLineNr,LF)),nl,
97 set_counter(xml2b_current_line,LineNr)
98 ; LineNr = CurLineNr, Attributes=XAttributes).
99 xml_element(pcdata(Codes),'CText',['='(text,NewCodes)],[],LineNr) :-
100 peel_and_count_newlines(Codes,C1),
101 C1 \= [],
102 get_counter(xml2b_current_line,LineNr),
103 reverse(C1,RC),
104 peel_and_count_newlines(RC,RC2), % remove newlines at end; try avoid B strings with newlines in them
105 reverse(RC2,NewCodes).
106
107 convert_xml_attributes('='(Attr,Codes),(string(Attr),string(Atom))) :- !,atom_codes(Atom,Codes),
108 count_newlines(Codes).
109 convert_xml_attributes(A,(string(error),string(A))) :- format('**** UNKNOWN ATTRIBUTE: ~w~n',[A]).
110
111
112 :- use_module(extension('counter/counter')).
113
114 count_newlines([]).
115 count_newlines(HT) :- newline(HT,T),!, inc_counter(xml2b_current_line), count_newlines(T).
116 count_newlines([_|T]) :- count_newlines(T).
117
118 % peel leading newlines
119 peel_and_count_newlines([],[]).
120 peel_and_count_newlines(HT,Res) :- newline(HT,T),!, inc_counter(xml2b_current_line), peel_and_count_newlines(T,Res).
121 peel_and_count_newlines([H|T],Res) :- whitespace(H),!, peel_and_count_newlines(T,Res).
122 peel_and_count_newlines(R,R).
123
124
125 newline([10|T],T).
126 newline([13|X],T) :- (X=[10|TX] -> T=TX ; T=X). % TO DO: should we check if we are on Windows ?
127
128 whitespace(9). % tab
129 whitespace(32).
130
131 xml2b_startup :- % call once at startup to ensure all counters exist
132 counter_init,
133 new_counter(xml2b_current_line).
134
135 :- use_module(eventhandling,[register_event_listener/3]).
136 :- register_event_listener(startup_prob,xml2b_startup,
137 'Initialise xml2b Counters.').