1 | % (c) 2016-2019 Lehrstuhl fuer Softwaretechnik und Programmiersprachen, | |
2 | % Heinrich Heine Universitaet Duesseldorf | |
3 | % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html | |
4 | ||
5 | :- module(xml2b,[convert_xml_to_b/2]). | |
6 | ||
7 | :- use_module(module_information,[module_info/2]). | |
8 | :- module_info(group,external_functions). | |
9 | :- module_info(description,'This module transforms XML into a B representation.'). | |
10 | ||
11 | %:- use_module(library(xml)). | |
12 | :- use_module(xml_prob). | |
13 | :- use_module(library(lists)). | |
14 | :- use_module(custom_explicit_sets,[convert_to_avl/2]). | |
15 | :- use_module(error_manager). | |
16 | :- use_module(debug). | |
17 | ||
18 | % type of generated XML elements: | |
19 | /* struct( | |
20 | recId: NATURAL1, | |
21 | pId:NATURAL, | |
22 | element:STRING, | |
23 | attributes: STRING +-> STRING, | |
24 | meta: STRING +-> STRING | |
25 | ) | |
26 | */ | |
27 | ||
28 | % convert a code list to a B sequence/set of records | |
29 | convert_xml_to_b(Codes,BContent) :- | |
30 | reset_counter(xml2b_current_line), | |
31 | set_counter(xml2b_current_line,1), | |
32 | peel_and_count_newlines(Codes,PeeledCodes), | |
33 | (debug_mode(off) -> true | |
34 | ; get_counter(xml2b_current_line,InitialLineNr), format('XML starting at line: ~w~n',[InitialLineNr])), | |
35 | %statistics(walltime,[W1,_]), | |
36 | xml_parse(PeeledCodes,xml(_Atts,Content),[format(false)]), % format(false) means we also see all newlines | |
37 | %statistics(walltime,[W2,_]), W is W2-W1, formatsilent('% Walltime ~w ms to parse XML using Prolog library~n',[W]), | |
38 | % xml_pp(xml(_Atts,Content)),nl, | |
39 | % xml_parse(Codes2,xml(_Atts,Content),[]), format('XML:~n~s~n',[Codes2]), | |
40 | %print(Content),nl,nl, | |
41 | !, | |
42 | convert_xml_content_to_b(Content,0,1,_,BContent,[]). | |
43 | convert_xml_to_b(_,_) :- add_error(convert_xml_to_b,'XML Parsing failed'),fail. | |
44 | ||
45 | ||
46 | convert_xml_content_to_b([],_ParentId,NextId,OutId) --> {OutId=NextId}. | |
47 | convert_xml_content_to_b([H|T],ParentId,NextId,OutId) --> | |
48 | convert_xml_element(H,ParentId,NextId,Id2), | |
49 | convert_xml_content_to_b(T,ParentId,Id2,OutId). | |
50 | ||
51 | % TO DO: do we need to parse namespaces... | |
52 | % TO DO: store meta-information | |
53 | % TO DO: adapt line numbers when newlines inside initial tag: <?xml version="1.0" encoding="ASCII"?> | |
54 | % TO DO: deal with new lines inside tags <Data version= "03.04"> : Prolog library hides those newlines ! | |
55 | ||
56 | convert_xml_element(Element, ParentId,NextId,OutId) --> | |
57 | % {get_counter(xml2b_current_line,CurNr),nl,print(el(CurNr,Element)),nl}, | |
58 | {xml_element(Element,Tag,Attributes,Content,LineNr)}, | |
59 | !, | |
60 | [(int(NextId), | |
61 | rec([ % note: fields have to be in order | |
62 | field(attributes,BAttributes), % partial function of Attributes | |
63 | field(element,string(Tag)), | |
64 | field(meta,MetaAttributes), % such as xmlLineNumber | |
65 | field(pId,int(ParentId)), % id of the parent XML record | |
66 | field(recId,int(NextId)) % id of the XML element/record | |
67 | ]) | |
68 | )], | |
69 | {Id2 is NextId+1, | |
70 | number_codes(LineNr,LCC), atom_codes(LineNrAtom,LCC), | |
71 | convert_to_avl([(string(xmlLineNumber),string(LineNrAtom))],MetaAttributes), | |
72 | maplist(convert_xml_attributes,Attributes,BAttributesList), | |
73 | convert_to_avl(BAttributesList,BAttributes) | |
74 | }, | |
75 | convert_xml_content_to_b(Content,NextId,Id2,OutId). | |
76 | convert_xml_element(namespace( URL, _, Element), ParentId,NextId,OutId) --> % what should we do with the name space ? | |
77 | {format('*** Ignoring XML namespace annotation: ~w~n',[URL])}, | |
78 | convert_xml_element(Element, ParentId,NextId,OutId). | |
79 | convert_xml_element(pcdata(_Codes),_,Id,Id) --> !. % should only happen when Codes consists solely of newlines | |
80 | % no need to count_newlines; already done below in xml_element before C1 \= []. | |
81 | convert_xml_element(comment(Codes),_,Id,Id) --> !,{count_newlines(Codes)}. | |
82 | convert_xml_element(instructions(Name, Chars),_,Id,Id) --> !, | |
83 | {format('Ignoring XML instructions annotation: ~w : ~w~n',[Name,Chars])}. | |
84 | convert_xml_element(doctype(Tag, DoctypeId),_,Id,Id) --> !, | |
85 | {format('Ignoring XML doctype annotation: ~w : ~w~n',[Tag,DoctypeId])}. | |
86 | convert_xml_element(cdata(Codes),ParentId,Id,OutId) --> !, | |
87 | % TO DO: tab expansion? see http://binding-time.co.uk/wiki/index.php/Parsing_XML_with_Prolog | |
88 | convert_xml_element(pcdata(Codes),ParentId,Id,OutId). | |
89 | convert_xml_element(El,_,Id,Id) --> | |
90 | {format('*** Unknown XML element: ~w~n',[El])}. | |
91 | ||
92 | ||
93 | xml_element(element(Tag,XAttributes,Content),Tag,Attributes,Content,LineNr) :- | |
94 | get_counter(xml2b_current_line,CurLineNr), | |
95 | (select('='('$attribute_linefeeds',LF),XAttributes,Rest) | |
96 | -> LineNr is CurLineNr+LF, Attributes = Rest, %print(inc_line_nr(CurLineNr,LF)),nl, | |
97 | set_counter(xml2b_current_line,LineNr) | |
98 | ; LineNr = CurLineNr, Attributes=XAttributes). | |
99 | xml_element(pcdata(Codes),'CText',['='(text,NewCodes)],[],LineNr) :- | |
100 | peel_and_count_newlines(Codes,C1), | |
101 | C1 \= [], | |
102 | get_counter(xml2b_current_line,LineNr), | |
103 | reverse(C1,RC), | |
104 | peel_and_count_newlines(RC,RC2), % remove newlines at end; try avoid B strings with newlines in them | |
105 | reverse(RC2,NewCodes). | |
106 | ||
107 | convert_xml_attributes('='(Attr,Codes),(string(Attr),string(Atom))) :- !,atom_codes(Atom,Codes), | |
108 | count_newlines(Codes). | |
109 | convert_xml_attributes(A,(string(error),string(A))) :- format('**** UNKNOWN ATTRIBUTE: ~w~n',[A]). | |
110 | ||
111 | ||
112 | :- use_module(extension('counter/counter')). | |
113 | ||
114 | count_newlines([]). | |
115 | count_newlines(HT) :- newline(HT,T),!, inc_counter(xml2b_current_line), count_newlines(T). | |
116 | count_newlines([_|T]) :- count_newlines(T). | |
117 | ||
118 | % peel leading newlines | |
119 | peel_and_count_newlines([],[]). | |
120 | peel_and_count_newlines(HT,Res) :- newline(HT,T),!, inc_counter(xml2b_current_line), peel_and_count_newlines(T,Res). | |
121 | peel_and_count_newlines([H|T],Res) :- whitespace(H),!, peel_and_count_newlines(T,Res). | |
122 | peel_and_count_newlines(R,R). | |
123 | ||
124 | ||
125 | newline([10|T],T). | |
126 | newline([13|X],T) :- (X=[10|TX] -> T=TX ; T=X). % TO DO: should we check if we are on Windows ? | |
127 | ||
128 | whitespace(9). % tab | |
129 | whitespace(32). | |
130 | ||
131 | xml2b_startup :- % call once at startup to ensure all counters exist | |
132 | counter_init, | |
133 | new_counter(xml2b_current_line). | |
134 | ||
135 | :- use_module(eventhandling,[register_event_listener/3]). | |
136 | :- register_event_listener(startup_prob,xml2b_startup, | |
137 | 'Initialise xml2b Counters.'). |