1		/* xml_prob.pl : Contains xml_parse/[2,3] a bi-directional XML parser written in
2		* Prolog.
3		*
4		* Copyright (C) 2001-2005 Binding Time Limited
5		* Copyright (C) 2005 John Fletcher
6		*
7		* Current Release: $Revision$
8		*
9		* TERMS AND CONDITIONS:
10		*
11		* This program is offered free of charge, as unsupported source code. You may
12		* use it, copy it, distribute it, modify it or sell it without restriction,
13		* but entirely at your own risk.
14		*
15		*/
16
17		% minimally adapted for SICStus Prolog
18		% Mats Carlsson, 2003-2006
19		% added linefeed counting for attributes (attributes_count_lf,spaces_count_linebreaks)
20		% Michael Leuschel, 2017-2022 (changes marked by PATCH LEUSCHEL)
21		% replaces library(xml) for SICStus
22
23		:- module( xml_prob, [
24		xml_parse/2,
25		xml_parse/3,
26		xml_subterm/2,
27		xml_pp/1,
28
29		xml_parse/4
30		]).
31
32		:- use_module(module_information,[module_info/2]).
33		:- module_info(group,external_functions).
34		:- module_info(description,'This module transforms XML into a Prolog representation, adaptation of SICStus library module.').
35
36		:- use_module(probsrc(error_manager)).
37		:- use_module(probsrc(debug),[debug_format/3]).
38		:- use_module(probsrc(tools_strings),[ajoin/2]).
39
40		% added by Leuschel:
41		xml_parse( Chars, Document, Controls, Span ) :-
42		bb_put(xml_prob_current_span,Span),
43		call_cleanup(xml_parse( Chars, Document, Controls),
44		bb_delete(xml_prob_current_span,_)).
45
46		add_xml_warning(Msg,Term) :-
47		bb_get(xml_prob_current_span,Span),!,
48		add_warning(xml_prob,Msg,Term,Span).
49		add_xml_warning(Msg,Term) :-
50		add_warning(xml_prob,Msg,Term).
51
52		add_xml_error(Msg,Term) :-
53		bb_get(xml_prob_current_span,Span),!,
54		add_error(xml_prob,Msg,Term,Span).
55		add_xml_error(Msg,Term) :-
56		add_error(xml_prob,Msg,Term).
57		:- use_module(library(lists),[append/2]).
58		expect_tokens(Tokens,_) --> Tokens,!.
59		expect_tokens(Tokens,Ctxt,Input,_) :-
60		append(["XML parse error, expected '",Tokens,"' in ",Ctxt,", but got:"],Codes),
61		atom_codes(Msg,Codes),
62		length(Tokens,Len),
63		get_next_tokens(Input,Len,Nxt),
64		atom_codes(NxtAtom,Nxt),
65		add_xml_error(Msg,NxtAtom), % TO DO: extract next tokens
66		fail.
67
68		get_next_tokens(_,N,R) :- N<1,!, R=[].
69		get_next_tokens(Var,_,Res) :- var(Var),!, Res="<EOF>".
70		get_next_tokens([],_,"<EOF>").
71		get_next_tokens([H\|T],Nr,[H\|R]) :- N1 is Nr-1, get_next_tokens(T,N1,R).
72
73		:- use_module(library(types), [
74		must_be/4,
75		illarg/3,
76		illarg/4
77		]).
78		:- use_module(library(lists), [
79		select/3,
80		is_list/1
81		]).
82
83		:- set_prolog_flag(double_quotes, codes).
84
85		%@ This is a package for parsing XML with Prolog, which
86		%@ provides Prolog applications with a simple ``Document Value Model''
87		%@ interface to XML documents. A description of the subset of XML that it
88		%@ supports can be found at:
89		%@ @c [PM] 4.1 link updated 2009-08-14
90		%@ @uref{http://www.binding-time.co.uk/xmlpl.html}
91		%@
92		%@ The package, originally written by Binding Time Ltd., is in the public
93		%@ domain and unsupported. To use the package, enter the query:
94		%@
95		%@ @example
96		%@ @group
97		%@ \| ?- use_module(library(xml)).
98		%@ @end group
99		%@ @end example
100		%@
101		%@ The package represents XML documents by the abstract data type
102		%@ @var{document}, which is defined by the following grammar:
103		%@
104		%@ @multitable @columnfractions .2 .3 .5
105		%@ @item @var{document} @tab ::= @code{xml(@var{attributes},@var{content})} @tab @r{@{ well-formed document @}}
106		%@ @item @tab \| @code{malformed(@var{attributes},@var{content})} @tab @r{@{ malformed document @}}
107		%@ @item
108		%@ @item @var{attributes} @tab ::= @code{[]}
109		%@ @item @tab \| @code{[@var{name}=@var{char-data}\|@var{attributes}]}
110		%@ @item
111		%@ @item @var{content} @tab ::= @code{[]}
112		%@ @item @tab \| @code{[@var{cterm}\|@var{content}]}
113		%@ @item
114		%@ @item @var{cterm} @tab ::= @code{pcdata(@var{char-data})} @tab @r{@{ text @}}
115		%@ @item @tab \| @code{comment(@var{char-data})} @tab @r{@{ an XML comment @}}
116		%@ @item @tab \| @code{namespace(@var{URI},@var{prefix},@var{element})} @tab @r{@{ a Namespace @}}
117		%@ @item @tab \| @code{element(@var{tag},@var{attributes},@var{content})} @tab @r{@{ <@var{tag}>..</@var{tag}> encloses @var{content} or <@var{tag} /> if empty @}}
118		%@ @item @tab \| @code{instructions(@var{name},@var{char-data})} @tab @r{@{ A PI <? @var{name} @var{char-data} ?> @}}
119		%@ @item @tab \| @code{cdata(@var{char-data})} @tab @r{@{ <![CDATA[@var{char-data}]]> @}}
120		%@ @item @tab \| @code{doctype(@var{tag},@var{doctype-id})} @tab @r{@{ DTD <!DOCTYPE .. > @}}
121		%@ @item @tab \| @code{unparsed(@var{char-data})} @tab @r{@{ text that hasn't been parsed @}}
122		%@ @item @tab \| @code{out_of_context(@var{tag})} @tab @r{@{ @var{tag} is not closed @}}
123		%@ @item
124		%@ @item @var{tag} @tab ::= @dfn{atom} @tab @r{@{ naming an element @}}
125		%@ @item
126		%@ @item @var{name} @tab ::= @dfn{atom} @tab @r{@{ not naming an element @}}
127		%@ @item
128		%@ @item @var{URI} @tab ::= @dfn{atom} @tab @r{@{ giving the URI of a namespace @}}
129		%@ @item
130		%@ @item @var{char-data} @tab ::= @dfn{code-list}
131		%@ @item
132		%@ @item @var{doctype-id} @tab ::= @code{public(@var{char-data},@var{char-data})}
133		%@ @item @tab \| @code{public(@var{char-data},@var{dtd-literals})}
134		%@ @item @tab \| @code{system(@var{char-data})}
135		%@ @item @tab \| @code{system(@var{char-data},@var{dtd-literals})}
136		%@ @item @tab \| @code{local}
137		%@ @item @tab \| @code{local,@var{dtd-literals}}
138		%@ @item
139		%@ @item @var{dtd-literals} @tab ::= @code{[]}
140		%@ @item @tab \| @code{[dtd_literal(@var{char-data})\|@var{dtd-literals}]}
141		%@ @end multitable
142		%@
143		%@ The following predicates are exported by the package:
144		%@
145		%@ @table @code
146		%@ @item xml_parse(@var{?Chars}, @var{?Document})
147		%@ @itemx xml_parse(@var{?Chars}, @var{?Document}, @var{+Options})
148		%@ @PLXindex {xml_parse/[2,3] (xml)}
149		%@ Either parses @var{Chars}, a @dfn{code-list}, to @var{Document}, a
150		%@ @var{document}. @var{Chars} is not required to represent strictly
151		%@ well-formed XML.
152		%@ Or generates @var{Chars}, a @dfn{code-list},
153		%@ from @var{Document}, a @var{document}.
154		%@ If @var{Document} is not a valid @var{document} term representing
155		%@ well-formed XML, an exception is raised.
156		%@ In the second usage of the predicate, the only option available is @code{format/1}.
157		%@
158		%@ @var{Options} is a list of zero or more of the following, where
159		%@ @var{Boolean} must be @code{true} or @code{false}:
160		%@
161		%@ @table @code
162		%@ @item format(@var{Boolean})
163		%@ @findex format/1 (xml_parse/3 option)
164		%@ Indent the element content (default @code{true}).
165		%@
166		%@ @item extended_characters(@var{Boolean})
167		%@ @findex extended_characters/1 (xml_parse/3 option)
168		%@ Use the extended character entities for XHTML (default @code{true}).
169		%@
170		%@ @item remove_attribute_prefixes(@var{Boolean})
171		%@ @findex remove_attribute_prefixes/1 (xml_parse/3 option)
172		%@ Remove namespace prefixes from attributes when it's the same as the
173		%@ prefix of the parent element (default @code{false}).
174		%@ @end table
175		%@
176		%@ @item xml_subterm(@var{+Term}, @var{?Subterm})
177		%@ @PLXindex {xml_subterm/2 (xml)}
178		%@ Unifies @var{Subterm} with a sub-term of @var{Term}, a
179		%@ @var{document}. This can be especially useful when trying to test or
180		%@ retrieve a deeply-nested subterm from a document.
181		%@
182		%@ @item xml_pp(@var{+Document})
183		%@ @PLXindex {xml_pp/1 (xml)}
184		%@ ``Pretty prints'' @var{Document}, a @var{document}, on the current
185		%@ output stream.
186		%@ @end table
187
188
189		/* xml_parse(+?Chars, ?+Document[, +Controls]) parses Chars to/from a data
190		* structure of the form xml(<atts>, <content>). <atts> is a list of
191		* <atom>=<string> attributes from the (possibly implicit) XML signature of the
192		* document. <content> is a (possibly empty) list comprising occurrences of :
193		*
194		* pcdata(<string>) : Text
195		* comment(<string>) : An xml comment;
196		* element(<tag>,<atts>,<content>) : <tag>..</tag> encloses <content>
197		* : <tag /> if empty
198		* instructions(<atom>, <string>) : Processing <? <atom> <params> ?>"
199		* cdata( <string> ) : <![CDATA[ <string> ]]>
200		* doctype(<atom>, <doctype id>) : DTD <!DOCTYPE .. >
201		*
202		* The conversions are not completely symmetrical, in that weaker XML is
203		* accepted than can be generated. Specifically, in-bound (Chars -> Document)
204		* does not require strictly well-formed XML. Document is instantiated to the
205		* term malformed(Attributes, Content) if Chars does not represent well-formed
206		* XML. The Content of a malformed/2 structure can contain:
207		*
208		* unparsed( <string> ) : Text which has not been parsed
209		* out_of_context( <tag> ) : <tag> is not closed
210		*
211		* in addition to the standard term types.
212		*
213		* Out-bound (Document -> Chars) parsing _does_ require that Document defines
214		* strictly well-formed XML. If an error is detected a 'domain' exception is
215		* raised.
216		*
217		* The domain exception will attempt to identify the particular sub-term in
218		* error and the message will show a list of its ancestor elements in the form
219		* <tag>{(id)}* where <id> is the value of any attribute _named_ id.
220		*
221		* At this release, the Controls applying to in-bound (Chars -> Document)
222		* parsing are:
223		*
224		* extended_characters(<bool>) : Use the extended character
225		* : entities for XHTML (default true)
226		*
227		* format(<bool>) : Strip layouts when no character data
228		* : appears between elements.
229		* : (default true)
230		*
231		* remove_attribute_prefixes( <bool>) : Remove namespace prefixes from
232		* : attributes when it's the same as the
233		* : prefix of the parent element
234		* : (default false).
235		*
236		* [<bool> is one of 'true' or 'false']
237		*
238		* For out-bound (Document -> Chars) parsing, the only available option is:
239		*
240		* format(<Bool>) : Indent the element content
241		* : (default true)
242		*
243		* Different DCGs for input and output are used because input parsing is
244		* more flexible than output parsing. Errors in input are recorded as part
245		* of the data structure. Output parsing throws an exception if the document
246		* is not well-formed, diagnosis tries to identify the specific culprit term.
247		*/
248		xml_parse( Chars, Document ) :-
249		xml_parse( Chars, Document, [] ).
250
251		xml_parse( Chars, Document, Controls ) :-
252		Goal = xml_parse(Chars,Document,Controls),
253		% format, extended_characters, remove_attribute_prefixes
254		must_be(Controls, proper_list, Goal, 3),
255		xml_options(Controls, opt(true,true,false), Options, Goal, 3),
256		( ground( Chars ) ->
257		xml_to_document( Options, Chars, Document )
258		; ground(Document) ->
259		document_to_xml( Options, Document, Chars )
260		; illarg(var, Goal, 0)
261		), !. /MC/
262
263		xml_options([], Opt, Opt, _, _) :- !.
264		xml_options([X\|L], Opt0, Opt, Goal, ArgNo) :- !,
265		( callable(X),
266		xml_option(X, Opt0, Opt1) -> true
267		; illarg(domain(term,xml_parse_option), Goal, ArgNo, X)
268		),
269		xml_options(L, Opt1, Opt, Goal, ArgNo).
270
271		xml_option(format(X), opt(_,B,C), opt(X,B,C)) :-
272		bool_option(X).
273		xml_option(extended_characters(X), opt(A,_,C), opt(A,X,C)) :-
274		bool_option(X).
275		xml_option(remove_attribute_prefixes(X), opt(A,B,_), opt(A,B,X)) :-
276		bool_option(X).
277
278		bool_option(X) :- var(X), !, fail.
279		bool_option(false).
280		bool_option(true).
281
282		document_to_xml( opt(Format,_,_), Document, Chars ) :-
283	?	( document_generation(Format, Document, Chars0, [] ) ->
284		Chars = Chars0
285		;
286		xml_fault( Document, [], Culprit, Path, Message ),
287		throw(xml_parse(Message,Document,Culprit,Path))
288		).
289
290		:- multifile user:generate_message_hook/3.
291		user:generate_message_hook(xml_parse(Message,Document,Culprit,Path)) --> !,
292		['XML Parse: ~a in ~q'-[Message,Document],nl,
293		'Culprit: ~q'-[Culprit],nl],
294		( {Path==[]} -> []
295		; ['Path: ~s'-[Path],nl]
296		).
297
298
299		/* xml_subterm( +XMLTerm, ?Subterm ) unifies Subterm with a sub-term of Term.
300		* Note that XMLTerm is a sub-term of itself.
301		*/
302		xml_subterm( Term, Term ).
303		xml_subterm( xml(_Attributes, Content), Term ) :-
304		xml_subterm( Content, Term ).
305		xml_subterm( [H\|T], Term ) :-
306		( xml_subterm( H, Term )
307		; xml_subterm( T, Term )
308		).
309		xml_subterm( element(_Name,_Attributes,Content), Term ) :-
310		xml_subterm( Content, Term ).
311		xml_subterm( namespace(_URI,_Prefix,Content), Term ) :-
312		xml_subterm( Content, Term ).
313
314		/* xml is intended to be a rather modular module: it should be easy to
315		* build a program that can output XML, but not read it, or vice versa.
316		* Similarly, you may be happy to dispense with diagnosis once you are
317		* sure that your code will only try to make valid calls to xml_parse/2.
318		*
319		* It is intended that the code should be very portable too. Clearly,
320		* some small changes will be needed between platforms, but these should
321		* be limited to xml_utilities. xml_utilities contains most of the shared
322		* code and most of the potentially non-portable code.
323		*/
324		/* xml_acquisition.pl : XML -> Document translation.
325		*
326		* $Revision$
327		*
328		*/
329
330		/* xml_to_document( +Options, +XML, ?Document ) translates the list of
331		* character codes XML into the Prolog term Document. Options is
332		* the parsed options list.
333		*/
334		xml_to_document( Options, XML, Document ) :-
335		initial_context( Options, Context ),
336		debug_format(19,'Parsing XML:~n~s~n',[XML]), %PATCH: LEUSCHEL
337		( xml_declaration( Attributes0, XML, XML1 ) ->
338		Attributes = Attributes0
339		;
340		format('No <xml> header declaration~n',[]), %PATCH LEUSCHEL
341		XML1 = XML,
342		Attributes = []
343		),
344		xml_to_document( XML1, Context, Terms, [], WellFormed ),
345		xml_to_document1( WellFormed, Attributes, Terms, Document ).
346
347		xml_to_document1( true, Attributes, Terms, xml(Attributes, Terms) ).
348		xml_to_document1( false, Attributes, Terms, malformed(Attributes, Terms) ).
349
350		% unparsed( +Unparsed, +Context, ?Terms, ?Residue, ?WellFormed )
351		unparsed( Unparsed, _Context, [unparsed(Unparsed)], [], false ).
352
353		xml_declaration( Attributes ) -->
354		spaces,
355		"<?",
356		nmtoken( xml ),
357		xml_declaration_attributes( Attributes ),
358		spaces,
359		"?>".
360
361		xml_to_document( [], Context, Terms, [], WF ) :-
362		close_context( Context, Terms, WF ).
363		xml_to_document( [Char\|Chars], Context, Terms, Residue, WF ) :-
364		( Char =:= "<" ->
365		xml_markup_structure( Chars, Context, Terms, Residue, WF )
366		; Char =:= "&" ->
367		entity_reference( Chars, Context, Terms, Residue, WF )
368		; Char =< " ",
369		\+ space_preserve( Context ) ->
370		layouts( Chars, Context, [Char\|T], T, Terms, Residue, WF )
371		; void_context( Context ) ->
372		unparsed( [Char\|Chars], Context, Terms, Residue, WF )
373		;
374		Terms = [pcdata([Char\|Chars1])\|Terms1],
375		acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF )
376		).
377
378		layouts( [], Context, _Plus, _Minus, Terms, [], WF ) :-
379		close_context( Context, Terms, WF ).
380		layouts( [Char\|Chars], Context, Plus, Minus, Terms, Residue, WF ) :-
381		( Char =:= "<" ->
382		xml_markup_structure( Chars, Context, Terms, Residue, WF )
383		; Char =:= "&" ->
384		entity_reference( Chars, Context, Terms, Residue, WF )
385		; Char =< " " ->
386		Minus = [Char\|Minus1],
387		layouts( Chars, Context, Plus, Minus1, Terms, Residue, WF )
388		; void_context( Context ) ->
389		unparsed( [Char\|Chars], Context, Terms, Residue, WF )
390		;
391		Terms = [pcdata(Plus)\|Terms1],
392		Minus = [Char\|Chars1],
393		context_update( space_preserve, Context, true, Context1 ),
394		acquire_pcdata( Chars, Context1, Chars1, Terms1, Residue, WF )
395		).
396
397		acquire_pcdata( [], Context, [], Terms, [], WF ) :-
398		close_context( Context, Terms, WF ).
399		acquire_pcdata( [Char\|Chars], Context, Chars1, Terms, Residue, WF ) :-
400		( Char =:= "<" ->
401		Chars1 = [],
402		xml_markup_structure( Chars, Context, Terms, Residue, WF )
403		; Char =:= "&" ->
404		reference_in_pcdata( Chars, Context, Chars1, Terms, Residue, WF )
405		;
406		Chars1 = [Char\|Chars2],
407		acquire_pcdata( Chars, Context, Chars2, Terms, Residue, WF )
408		).
409
410		xml_markup_structure( [], Context, Terms, Residue, WF ) :-
411		unparsed( "<", Context, Terms, Residue, WF ).
412		xml_markup_structure( Chars, Context, Terms, Residue, WF ) :-
413		Chars = [Char\|Chars1],
414		( Char =:= "/" ->
415		closing_tag( Context, Chars1, Terms, Residue, WF )
416		; Char =:= "?" ->
417		pi_acquisition( Chars1, Context, Terms, Residue, WF )
418		; Char =:= "!" ->
419		declaration_acquisition( Chars1, Context, Terms, Residue, WF )
420		; open_tag(Tag,Context,Attributes,Type, Chars, Chars2 ) ->
421		push_tag( Tag, Chars2, Context, Attributes, Type, Terms, Residue, WF )
422		;
423		unparsed( [0'<\|Chars], Context, Terms, Residue, WF ) %'
424		).
425
426		push_tag( Tag, Chars, Context, Attributes, Type, Terms, Residue, WF ) :-
427		new_element(Tag, Chars, Context, Attributes, Type, Term, Rest, WF0),
428		push_tag1( WF0, Context, Term, Rest, Terms, Residue, WF ).
429
430		push_tag1( true, Context, Term, Chars, [Term\|Terms], Residue, WF ) :-
431		xml_to_document( Chars, Context, Terms, Residue, WF ).
432		push_tag1( false, _Context, Term, Chars, [Term], Chars, false ).
433
434		new_element( TagChars, Chars, Context, Attributes0, Type, Term, Residue, WF ) :-
435		namespace_attributes( Attributes0, Context, Context1, Attributes1 ),
436		( append( NSChars, [0':\|TagChars1], TagChars ), %'
437		specific_namespace( NSChars, Context1, SpecificNamespace ) ->
438		Namespace0 = SpecificNamespace
439		;
440		NSChars = "",
441		TagChars1 = TagChars,
442		default_namespace( Context1, Namespace0 )
443		),
444		current_namespace( Context1, CurrentNamespace ),
445		( Namespace0 == CurrentNamespace ->
446		Term = element(Tag, Attributes, Contents),
447		Context2 = Context1
448		;
449		Term = namespace( Namespace0, NSChars,
450		element(Tag, Attributes, Contents)
451		),
452		context_update( current_namespace, Context1, Namespace0, Context2 )
453		),
454		input_attributes( Attributes1, Context2, Attributes ),
455		atom_codes( Tag, TagChars1 ),
456		close_tag( Type, Chars, Context2, Contents, Residue, WF ).
457
458		close_tag( empty, Residue, _Context, [], Residue, true ).
459		close_tag( push(Tag), Chars, Context0, Contents, Residue, WF ) :-
460		context_update( element, Context0, Tag, Context1 ),
461		xml_to_document( Chars, Context1, Contents, Residue, WF ).
462
463		pi_acquisition( Chars, Context, Terms, Residue, WellFormed ) :-
464		( inline_instruction(Target, Processing, Chars, Rest ),
465		Target \== xml ->
466		Terms = [instructions(Target, Processing)\|Terms1],
467		xml_to_document( Rest, Context, Terms1, Residue, WellFormed )
468		;
469		unparsed( [0'<,0'?\|Chars], Context, Terms, Residue, WellFormed )
470		).
471
472		declaration_acquisition( Chars, Context, Terms, Residue, WF ) :-
473		( declaration_type( Chars, Type, Chars1 ),
474		declaration_parse( Type, Context, Term, Context1, Chars1, Rest ) ->
475		Terms = [Term\|Terms1],
476		xml_to_document( Rest, Context1, Terms1, Residue, WF )
477		;
478		unparsed( [0'<,0'!\|Chars], Context, Terms, Residue, WF )
479		).
480
481		open_tag( Tag, Namespaces, Attributes, Termination ) -->
482		nmtoken_chars( Tag ),
483		attributes( Attributes, [], Namespaces ),
484		spaces,
485		(open_tag_terminator( Tag, Termination ) -> []
486		; {atom_codes(ATag,Tag), add_xml_warning('XML tag not properly terminated with >: ',ATag),fail}
487		).
488
489		open_tag_terminator( Tag, push(Tag) ) -->
490		">".
491		open_tag_terminator( _Tag, empty ) -->
492		"/>".
493
494		declaration_parse( comment, Namespaces, comment(Comment), Namespaces ) -->
495		comment(Comment).
496		declaration_parse( cdata, Namespaces, cdata(CData), Namespaces ) -->
497		cdata( CData ).
498		declaration_parse( doctype, Namespaces0, doctype(Name, Names), Namespaces ) -->
499		doctype( Name, Names, Namespaces0, Namespaces ),
500		spaces,
501		">".
502
503		inline_instruction( Target, Processing, Plus, Minus ) :-
504		nmtoken(Target, Plus, Mid0 ),
505		spaces( Mid0, Mid1 ),
506		append( Processing, [0'?,0'>\|Minus], Mid1 ),
507		!.
508
509		entity_reference_name( Reference ) -->
510		nmtoken_chars( Reference ),
511		";".
512
513		declaration_type( [Char1,Char2\|Chars1], Class, Rest ) :-
514		Chars = [Char1,Char2\|Chars1],
515		( declaration_type1( Char1, Char2, Chars1, Class0, Residue ) ->
516		Class = Class0,
517		Rest = Residue
518		;
519		Class = generic,
520		Rest = Chars
521		).
522
523		declaration_type1( 0'-, 0'-, Chars, comment, Chars ).
524		declaration_type1( 0'[, 0'C, Chars, cdata, Residue ) :-
525		append( "DATA[", Residue, Chars ).
526		declaration_type1( 0'D, 0'O, Chars, doctype, Residue ) :-
527		append( "CTYPE", Residue, Chars ).
528
529		closing_tag( Context, Chars, Terms, Residue, WellFormed ) :-
530		( closing_tag_name( Tag, Chars, Rest ),
531		current_tag( Context, Tag ) ->
532		Terms = [],
533		Residue = Rest,
534		WellFormed = true
535		;
536		unparsed( [0'<,0'/\|Chars], Context, Terms, Residue, WellFormed )
537		).
538
539		closing_tag_name( Tag ) -->
540		nmtoken_chars( Tag ),
541		spaces,
542		">".
543
544		entity_reference( Chars, Context, Terms, Residue, WF ) :-
545		( standard_character_entity( Char, Chars, Rest ) ->
546		Terms = [pcdata([Char\|Chars1])\|Terms1],
547		acquire_pcdata( Rest, Context, Chars1, Terms1, Residue, WF )
548		; entity_reference_name( Reference, Chars, Rest ),
549		defined_entity( Reference, Context, String ) ->
550		append( String, Rest, Full ),
551		xml_to_document( Full, Context, Terms, Residue, WF )
552		;
553		unparsed( [0'&\|Chars], Context, Terms, Residue, WF ) %'
554		).
555
556		reference_in_pcdata( Chars0, Context, Chars1, Terms, Residue, WF ) :-
557		( standard_character_entity(Char, Chars0, Rest ) ->
558		Chars1 = [Char\|Chars2],
559		acquire_pcdata( Rest, Context, Chars2, Terms, Residue, WF )
560		; entity_reference_name(Reference, Chars0, Rest ),
561		defined_entity( Reference, Context, String ) ->
562		append( String, Rest, Full ),
563		acquire_pcdata( Full, Context, Chars1, Terms, Residue, WF )
564		;
565		Chars1 = [],
566		unparsed( [0'&\|Chars0], Context, Terms, Residue, WF ) %'
567		).
568
569		namespace_attributes( [], Context, Context, [] ).
570		namespace_attributes( Attributes0, Context0, Context, Attributes ) :-
571		Attributes0 = [_\|_],
572		append( "xmlns:", Unqualified, QualifiedNameChars ),
573		( select( "xmlns"=Value, Attributes0, Attributes1 ) ->
574		atom_codes( URI, Value ),
575		context_update( default_namespace, Context0, URI, Context1 ),
576		namespace_attributes( Attributes1, Context1, Context, Attributes )
577		; select( QualifiedNameChars=Value, Attributes0, Attributes1 ) ->
578		Attributes = [QualifiedNameChars=Value\|Attributes2],
579		atom_codes( URI, Value ),
580		context_update( ns_prefix(Unqualified), Context0, URI, Context1 ),
581		namespace_attributes( Attributes1, Context1, Context, Attributes2 )
582		; member( "xml:space"="preserve", Attributes0 ) ->
583		Attributes = Attributes0,
584		context_update( space_preserve, Context0, true, Context )
585		;
586		Context = Context0,
587		Attributes = Attributes0
588		).
589
590		input_attributes( [], _Context, [] ).
591		input_attributes( [NameChars=Value\|Attributes0], Context,
592		[Name=Value\|Attributes] ) :-
593		( remove_attribute_prefixes( Context ),
594		append( NSChars, [0':\|NameChars1], NameChars ), %'
595		NSChars \== "xmlns",
596		specific_namespace( NSChars, Context, Namespace ),
597		current_namespace( Context, Namespace ) ->
598		atom_codes( Name, NameChars1 )
599		;
600		atom_codes( Name, NameChars )
601		),
602		input_attributes( Attributes0, Context, Attributes ).
603
604		attributes( Attributes, Seen, Namespaces ) -->
605		attributes_count_lf(0, Attributes, Seen, Namespaces ). % PATCH LEUSCHEL: add count parameter
606
607		attributes_count_lf(Count0, [Name=Value\|Attributes], Seen, Namespaces ) -->
608		spaces_count_linebreaks(Count0,C1),
609		nmtoken_chars( Name ),
610		{\+ member(Name, Seen)},
611		spaces_count_linebreaks(C1,C2),
612		expect_tokens("=","attributes"),
613		spaces_count_linebreaks(C2,C3),
614		attribute_value( Value, Namespaces, Name ),
615		attributes_count_lf( C3, Attributes, [Name\|Seen], Namespaces ).
616		attributes_count_lf( 0, [], _Seen, _Namespaces ) --> !, "". % ,{print(no_linefeeds),nl}.
617		attributes_count_lf( Count, ["$attribute_linefeeds"=Count], _Seen, _Namespaces ) --> "". %, {print(linefeeds(Count)),nl}.
618
619		spaces_count_linebreaks(C,C, [], [] ).
620		spaces_count_linebreaks(Count,ResCount, [Char\|Chars0], Chars1 ) :-
621		( Char=10 ->
622		Count1 is Count+1, spaces_count_linebreaks(Count1, ResCount, Chars0, Chars1 )
623		; Char =< 32 ->
624		spaces_count_linebreaks(Count,ResCount, Chars0, Chars1 )
625		;
626		Count=ResCount, Chars1 = [Char\|Chars0]
627		).
628		% END PATCH
629
630		xml_declaration_attributes( [] ) --> "".
631		xml_declaration_attributes( Res ) -->
632		spaces,
633		nmtoken( Name ),
634		spaces,
635		expect_tokens("=","declaration attribute"),
636		spaces,
637		xml_string( Value ),
638		{xml_declaration_attribute_valid(Name, Value)
639		-> Res = [Name-Value\|Attributes] ; Res = [Attributes]}, % PATCH LEUSCHEL: also continue with invalid declarations
640		xml_declaration_attributes( Attributes ),
641		spaces.
642
643		doctype( Name, External, Namespaces0, Namespaces1 ) -->
644		spaces,
645		nmtoken( Name ),
646		spaces,
647		doctype_id( External0 ),
648		spaces,
649		doctype1( Namespaces0, Literals, Namespaces1 ),
650		{doctype_extension(Literals, External0, External)}.
651
652		doctype_extension( [], External, External ).
653		doctype_extension( [Literal\|Literals], External0, External ) :-
654		extended_doctype( External0, [Literal\|Literals], External ).
655
656		extended_doctype( system(URL), Literals, system(URL,Literals) ).
657		extended_doctype( public(URN,URL), Literals, public(URN,URL,Literals) ).
658		extended_doctype( local, Literals, local(Literals) ).
659
660		doctype1( Namespaces0, Literals, Namespaces1 ) -->
661		"[",
662		!,
663		dtd( Namespaces0, Literals, Namespaces1 ),
664		expect_tokens("]","doctype").
665		doctype1( Namespaces, [], Namespaces ) --> "".
666
667		doctype_id( system(URL) ) -->
668		"SYSTEM",
669		spaces,
670		uri( URL ).
671		doctype_id( public(URN,URL) ) -->
672		"PUBLIC",
673		spaces,
674		uri( URN ),
675		spaces,
676		uri( URL ).
677		doctype_id( local ) --> "".
678
679		dtd( Namespaces0, Literals, Namespaces1 ) -->
680		spaces,
681		"<!ENTITY",
682		!,
683		spaces,
684		nmtoken_chars( Name ),
685		spaces,
686		quote( Quote ),
687		entity_value( Quote, Namespaces0, String ),
688		spaces,
689		expect_tokens(">","ENTITY"),
690		{\+ character_entity( Name, _StandardChar ),
691		% Don't allow < "e; etc. to be updated
692		context_update( entity(Name), Namespaces0, String, Namespaces2 )
693		},
694		dtd( Namespaces2, Literals, Namespaces1 ).
695		dtd( Namespaces0, Literals, Namespaces1 ) -->
696		spaces,
697		"<!--",
698		!,
699		dtd_comment,
700		expect_tokens(">","comment"),
701		dtd( Namespaces0, Literals, Namespaces1 ).
702		dtd( Namespaces0, [dtd_literal(Literal)\|Literals], Namespaces1 ) -->
703		spaces,
704		"<!",
705		!,
706		dtd_literal( Literal ),
707		dtd( Namespaces0, Literals, Namespaces1 ).
708		dtd( Namespaces, [], Namespaces ) --> spaces.
709
710		dtd_literal( [] ) --> ">", !.
711		dtd_literal( Chars ) -->
712		"--",
713		!,
714		dtd_comment,
715		dtd_literal( Chars ).
716		dtd_literal( [Char\|Chars] ) -->
717		[Char],
718		dtd_literal( Chars ).
719
720		dtd_comment( Plus, Minus ) :-
721		append( _Chars, [0'-,0'-\|Minus], Plus ),
722		!.
723
724		entity_value( Quote, Namespaces, String, [Char\|Plus], Minus ) :-
725		( Char == Quote ->
726		String = [],
727		Minus = Plus
728		; Char =:= "&" ->
729		reference_in_entity( Namespaces, Quote, String, Plus, Minus )
730		;
731		String = [Char\|String1],
732		entity_value( Quote, Namespaces, String1, Plus, Minus )
733		).
734
735		attribute_value( String, Namespaces, Name ) -->
736		(quote( Quote ) -> []
737		; {atom_codes(AN,Name), add_xml_warning('Expecting starting quotes for value of XML attribute: ',AN),fail}),
738		attribute_leading_layouts( Quote, Namespaces, String ).
739
740		attribute_leading_layouts( _Quote, _Namespace, [], [], [] ).
741		attribute_leading_layouts( Quote, Namespaces, String, [Char\|Plus], Minus ) :-
742		( Char == Quote ->
743		String = [],
744		Minus = Plus
745		; Char =:= "&" ->
746		reference_in_layout( Namespaces, Quote, String, Plus, Minus )
747		; Char > 32, Char \== 160 ->
748		String = [Char\|String1],
749		attribute_layouts( Quote, Namespaces, false, String1, Plus, Minus )
750		;
751		attribute_leading_layouts( Quote, Namespaces, String, Plus, Minus )
752		).
753
754		attribute_layouts( _Quote, _Namespaces, _Layout, [], [], [] ).
755		attribute_layouts( Quote, Namespaces, Layout, String, [Char\|Plus], Minus ) :-
756		( Char == Quote ->
757		String = [],
758		Minus = Plus
759		; Char =:= "&" ->
760		reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus )
761		; Char > 32, Char \== 160 ->
762		( Layout == true ->
763		String = [0' ,Char\|String1] %'
764		;
765		String = [Char\|String1]
766		),
767		attribute_layouts( Quote, Namespaces, false, String1, Plus, Minus )
768		;
769		attribute_layouts( Quote, Namespaces, true, String, Plus, Minus )
770		).
771
772		reference_in_layout( NS, Quote, String, Plus, Minus ) :-
773		( standard_character_entity( Char, Plus, Mid ) ->
774		String = [Char\|String1],
775		attribute_layouts( Quote, NS, false, String1, Mid, Minus )
776		; entity_reference_name( Name, Plus, Suffix ),
777		defined_entity( Name, NS, Text ) ->
778		append( Text, Suffix, Mid ),
779		attribute_leading_layouts( Quote, NS, String, Mid, Minus )
780		; % Just & is okay in a value
781		String = [0'&\|String1], %'
782		attribute_layouts( Quote, NS, false, String1, Plus, Minus )
783		).
784
785		reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus ) :-
786		( standard_character_entity( Char, Plus, Mid ) ->
787		( Layout == true ->
788		String = [0' ,Char\|String1] %'
789		;
790		String = [Char\|String1]
791		),
792		Layout1 = false
793		; entity_reference_name( Name, Plus, Suffix ),
794		defined_entity( Name, Namespaces, Text ) ->
795		String = String1,
796		append( Text, Suffix, Mid ),
797		Layout1 = Layout
798		; % Just & is okay in a value
799		Mid = Plus,
800		String = [0'&\|String1], %'
801		Layout1 = false
802		),
803		attribute_layouts( Quote, Namespaces, Layout1, String1, Mid, Minus ).
804
805		/* References are resolved backwards in Entity defintions so that
806		* circularity is avoided.
807		*/
808		reference_in_entity( Namespaces, Quote, String, Plus, Minus ) :-
809		( standard_character_entity( _SomeChar, Plus, _Rest ) ->
810		String = [0'&\|String1], % ' Character entities are unparsed
811		Mid = Plus
812		; entity_reference_name( Name, Plus, Suffix ),
813		defined_entity( Name, Namespaces, Text ) ->
814		String = String1,
815		append( Text, Suffix, Mid )
816		),
817		entity_value( Quote, Namespaces, String1, Mid, Minus ).
818
819		standard_character_entity( Char ) -->
820		"#x", hex_character_reference( Char ), ";".
821		standard_character_entity( Char ) -->
822		"#", digit( Digit ), digits( Digits ), ";",
823		{number_codes( Char, [Digit\|Digits])}.
824		standard_character_entity( C ) -->
825		chars( String ),
826		";",
827		!,
828		{character_entity(String, C)}.
829
830		uri( URI ) -->
831		quote( Quote ),
832		uri1( Quote, URI ).
833
834		uri1( Quote, [] ) -->
835		quote( Quote ),
836		!.
837		uri1( Quote, [Char\|Chars] ) -->
838		[Char],
839		uri1( Quote, Chars ).
840
841		comment( Chars, Plus, Minus ) :-
842		append( Chars, [0'-,0'-,0'>\|Minus], Plus ), %'
843		!.
844
845		cdata( Chars, Plus, Minus ) :-
846		append( Chars, [0'],0'],0'>\|Minus], Plus ), %'
847		!.
848		% Syntax Components
849
850		hex_character_reference( Code ) -->
851		hex_character_reference1( 0, Code ).
852
853		hex_character_reference1( Current, Code ) -->
854		hex_digit_char( Value ),
855		!,
856		{New is (Current << 4) + Value},
857		hex_character_reference1( New, Code ).
858		hex_character_reference1( Code, Code ) --> "".
859
860		hex_digit_char( 0 ) --> "0".
861		hex_digit_char( 1 ) --> "1".
862		hex_digit_char( 2 ) --> "2".
863		hex_digit_char( 3 ) --> "3".
864		hex_digit_char( 4 ) --> "4".
865		hex_digit_char( 5 ) --> "5".
866		hex_digit_char( 6 ) --> "6".
867		hex_digit_char( 7 ) --> "7".
868		hex_digit_char( 8 ) --> "8".
869		hex_digit_char( 9 ) --> "9".
870		hex_digit_char( 10 ) --> "A".
871		hex_digit_char( 11 ) --> "B".
872		hex_digit_char( 12 ) --> "C".
873		hex_digit_char( 13 ) --> "D".
874		hex_digit_char( 14 ) --> "E".
875		hex_digit_char( 15 ) --> "F".
876		hex_digit_char( 10 ) --> "a".
877		hex_digit_char( 11 ) --> "b".
878		hex_digit_char( 12 ) --> "c".
879		hex_digit_char( 13 ) --> "d".
880		hex_digit_char( 14 ) --> "e".
881		hex_digit_char( 15 ) --> "f".
882
883		quote( 0'" ) --> %'
884		"""".
885		quote( 0'\' ) -->
886		"'".
887
888		spaces( [], [] ).
889		spaces( [Char\|Chars0], Chars1 ) :-
890		( Char =< 32 ->
891		spaces( Chars0, Chars1 )
892		;
893		Chars1 = [Char\|Chars0]
894		).
895
896		nmtoken( Name ) -->
897		nmtoken_chars( Chars ),
898		{atom_codes(Name, Chars)}.
899
900		nmtoken_chars( [Char\|Chars] ) -->
901		[Char],
902		{nmtoken_first( Char )},
903		nmtoken_chars_tail( Chars ).
904
905		nmtoken_chars_tail( [Char\|Chars] ) -->
906		[Char],
907		{nmtoken_char(Char)},
908		!,
909		nmtoken_chars_tail( Chars ).
910		nmtoken_chars_tail([]) --> "".
911
912		nmtoken_first( 0': ).
913		nmtoken_first( 0'_ ).
914		nmtoken_first( Char ) :-
915		alphabet( Char ).
916
917		nmtoken_char( 0'a ).
918		nmtoken_char( 0'b ).
919		nmtoken_char( 0'c ).
920		nmtoken_char( 0'd ).
921		nmtoken_char( 0'e ).
922		nmtoken_char( 0'f ).
923		nmtoken_char( 0'g ).
924		nmtoken_char( 0'h ).
925		nmtoken_char( 0'i ).
926		nmtoken_char( 0'j ).
927		nmtoken_char( 0'k ).
928		nmtoken_char( 0'l ).
929		nmtoken_char( 0'm ).
930		nmtoken_char( 0'n ).
931		nmtoken_char( 0'o ).
932		nmtoken_char( 0'p ).
933		nmtoken_char( 0'q ).
934		nmtoken_char( 0'r ).
935		nmtoken_char( 0's ).
936		nmtoken_char( 0't ).
937		nmtoken_char( 0'u ).
938		nmtoken_char( 0'v ).
939		nmtoken_char( 0'w ).
940		nmtoken_char( 0'x ).
941		nmtoken_char( 0'y ).
942		nmtoken_char( 0'z ).
943		nmtoken_char( 0'A ).
944		nmtoken_char( 0'B ).
945		nmtoken_char( 0'C ).
946		nmtoken_char( 0'D ).
947		nmtoken_char( 0'E ).
948		nmtoken_char( 0'F ).
949		nmtoken_char( 0'G ).
950		nmtoken_char( 0'H ).
951		nmtoken_char( 0'I ).
952		nmtoken_char( 0'J ).
953		nmtoken_char( 0'K ).
954		nmtoken_char( 0'L ).
955		nmtoken_char( 0'M ).
956		nmtoken_char( 0'N ).
957		nmtoken_char( 0'O ).
958		nmtoken_char( 0'P ).
959		nmtoken_char( 0'Q ).
960		nmtoken_char( 0'R ).
961		nmtoken_char( 0'S ).
962		nmtoken_char( 0'T ).
963		nmtoken_char( 0'U ).
964		nmtoken_char( 0'V ).
965		nmtoken_char( 0'W ).
966		nmtoken_char( 0'X ).
967		nmtoken_char( 0'Y ).
968		nmtoken_char( 0'Z ).
969		nmtoken_char( 0'0 ).
970		nmtoken_char( 0'1 ).
971		nmtoken_char( 0'2 ).
972		nmtoken_char( 0'3 ).
973		nmtoken_char( 0'4 ).
974		nmtoken_char( 0'5 ).
975		nmtoken_char( 0'6 ).
976		nmtoken_char( 0'7 ).
977		nmtoken_char( 0'8 ).
978		nmtoken_char( 0'9 ).
979		nmtoken_char( 0'. ).
980		nmtoken_char( 0'- ).
981		nmtoken_char( 0'_ ).
982		nmtoken_char( 0': ).
983
984		xml_string( String ) -->
985		quote( Quote ),
986		xml_string1( Quote, String ).
987
988		xml_string1( Quote, [] ) -->
989		quote( Quote ),
990		!.
991		xml_string1( Quote, [Char\|Chars] ) -->
992		[Char],
993		xml_string1( Quote, Chars ).
994
995		alphabet( 0'a ).
996		alphabet( 0'b ).
997		alphabet( 0'c ).
998		alphabet( 0'd ).
999		alphabet( 0'e ).
1000		alphabet( 0'f ).
1001		alphabet( 0'g ).
1002		alphabet( 0'h ).
1003		alphabet( 0'i ).
1004		alphabet( 0'j ).
1005		alphabet( 0'k ).
1006		alphabet( 0'l ).
1007		alphabet( 0'm ).
1008		alphabet( 0'n ).
1009		alphabet( 0'o ).
1010		alphabet( 0'p ).
1011		alphabet( 0'q ).
1012		alphabet( 0'r ).
1013		alphabet( 0's ).
1014		alphabet( 0't ).
1015		alphabet( 0'u ).
1016		alphabet( 0'v ).
1017		alphabet( 0'w ).
1018		alphabet( 0'x ).
1019		alphabet( 0'y ).
1020		alphabet( 0'z ).
1021		alphabet( 0'A ).
1022		alphabet( 0'B ).
1023		alphabet( 0'C ).
1024		alphabet( 0'D ).
1025		alphabet( 0'E ).
1026		alphabet( 0'F ).
1027		alphabet( 0'G ).
1028		alphabet( 0'H ).
1029		alphabet( 0'I ).
1030		alphabet( 0'J ).
1031		alphabet( 0'K ).
1032		alphabet( 0'L ).
1033		alphabet( 0'M ).
1034		alphabet( 0'N ).
1035		alphabet( 0'O ).
1036		alphabet( 0'P ).
1037		alphabet( 0'Q ).
1038		alphabet( 0'R ).
1039		alphabet( 0'S ).
1040		alphabet( 0'T ).
1041		alphabet( 0'U ).
1042		alphabet( 0'V ).
1043		alphabet( 0'W ).
1044		alphabet( 0'X ).
1045		alphabet( 0'Y ).
1046		alphabet( 0'Z ).
1047
1048		digit( C ) --> [C], {digit_table( C )}.
1049
1050		digit_table( 0'0 ).
1051		digit_table( 0'1 ).
1052		digit_table( 0'2 ).
1053		digit_table( 0'3 ).
1054		digit_table( 0'4 ).
1055		digit_table( 0'5 ).
1056		digit_table( 0'6 ).
1057		digit_table( 0'7 ).
1058		digit_table( 0'8 ).
1059		digit_table( 0'9 ).
1060
1061		digits( [Digit\|Digits] ) -->
1062		digit( Digit ),
1063		digits( Digits ).
1064		digits( [] ) --> [].
1065
1066		character_entity( "quot", 0'" ). %'
1067		character_entity( "amp", 0'& ). %'
1068		character_entity( "lt", 0'< ). %'
1069		character_entity( "gt", 0'> ). %'
1070		character_entity( "apos", 0'\' ).
1071		/* xml_diagnosis.pl : XML exception diagnosis.
1072		*
1073		* $Revision$
1074		*/
1075
1076		/* xml_fault( +Term, +Indentation, ?SubTerm, ?Path, ?Message ) identifies SubTerm
1077		* as a sub-term of Term which cannot be serialized after Indentation.
1078		* Message is an atom naming the type of error; Path is a string encoding a
1079		* list of SubTerm's ancestor elements in the form <tag>{(id)}* where <tag> is the
1080		* element tag and <id> is the value of any attribute _named_ id.
1081		*/
1082		xml_fault( Term, _Indent, Term, [], 'Illegal Variable' ) :-
1083		var( Term ).
1084		xml_fault( xml(Attributes,_Content), _Indent, Term, [], Message ) :-
1085		member( Attribute, Attributes ),
1086		attribute_fault( Attribute, Term, Message ).
1087		xml_fault( xml(_Attributes,Content), Indent, Culprit, Path, Message ) :-
1088		xml_content_fault( Content, Indent, Culprit, Path, Message ).
1089		xml_fault( Term, _Indent, Term, [], 'Illegal Term' ).
1090
1091		xml_content_fault( Term, _Indent, Term, [], 'Illegal Variable' ) :-
1092		var( Term ).
1093		xml_content_fault( pcdata(Chars), _Indent, Chars, [], 'Invalid Character Data' ) :-
1094		\+ is_chars( Chars ).
1095		xml_content_fault( cdata(Chars), _Indent, Chars, [], 'Invalid Character Data' ) :-
1096		\+ is_chars( Chars ).
1097		xml_content_fault( [H\|_T], Indent, Culprit, Path, Message ) :-
1098		xml_content_fault( H, Indent, Culprit, Path, Message ).
1099		xml_content_fault( [_H\|T], Indent, Culprit, Path, Message ) :-
1100		xml_content_fault( T, Indent, Culprit, Path, Message ).
1101		xml_content_fault( namespace(_URI,_Prefix,Element), Indent, Culprit, Path, Message ) :-
1102		element_fault( Element, [0' \|Indent], Culprit, Path, Message ).
1103		xml_content_fault( Element, Indent, Culprit, Path, Message ) :-
1104		element_fault( Element, [0' \|Indent], Culprit, Path, Message ).
1105		xml_content_fault( Term, Indent, Term, [], 'Illegal Term' ) :-
1106		\+ generation(Term, "", false, Indent, _Format, _Plus, _Minus ).
1107
1108		element_fault( element(Tag, _Attributes, _Contents), _Indent, Tag, [], 'Tag must be an atom' ) :-
1109		\+ atom( Tag ).
1110		element_fault( element(Tag, Attributes, _Contents), _Indent, Tag, [], 'Attributes must be instantiated' ) :-
1111		var( Attributes ).
1112		element_fault( element(Tag, Attributes, _Contents), _Indent, Faulty, Path, Message ) :-
1113		fault_path( Tag, Attributes, Path, [] ),
1114		member( Attribute, Attributes ),
1115		attribute_fault( Attribute, Faulty, Message ).
1116		element_fault( element(Tag, Attributes, Contents), Indent, Culprit, Path, Message ) :-
1117		fault_path( Tag, Attributes, Path, Path1 ),
1118		xml_content_fault( Contents, Indent, Culprit, Path1, Message ).
1119
1120		attribute_fault( Attribute, Attribute, 'Illegal Variable' ) :-
1121		var( Attribute ).
1122		attribute_fault( Name=Value, Name=Value, 'Attribute Name must be atom' ) :-
1123		\+ atom(Name).
1124		attribute_fault( Name=Value, Name=Value, 'Attribute Value must be chars' ) :-
1125		\+ is_chars( Value ).
1126		attribute_fault( Attribute, Attribute, 'Malformed Attribute' ) :-
1127		Attribute \= (_Name=_Value).
1128
1129		is_chars( Chars ) :-
1130		is_list( Chars ),
1131		\+ (member( Char, Chars ), \+ (integer(Char), Char >=0, Char =< 255)).
1132
1133		fault_path( Tag, Attributes ) -->
1134		{atom_codes( Tag, Chars )},
1135		chars( Chars ),
1136		fault_id( Attributes ),
1137		" ".
1138
1139		fault_id( Attributes ) -->
1140		{member( id=Chars, Attributes ), is_chars( Chars )},
1141		!,
1142		"(", chars(Chars), ")".
1143		fault_id( _Attributes ) --> "".
1144		/* xml_generation.pl : Document -> XML translation
1145		*
1146		* $Revision$
1147		*/
1148
1149		/* document_generation( +Format, +Document ) is a DCG generating Document
1150		* as a list of character codes. Format is true\|false defining whether layouts,
1151		* to provide indentation, should be added between the element content of
1152		* the resultant "string". Note that formatting is disabled for elements that
1153		* are interspersed with pcdata/1 terms, such as XHTML's 'inline' elements.
1154		* Also, Format is over-ridden, for an individual element, by an explicit
1155		* 'xml:space'="preserve" attribute.
1156		*/
1157		document_generation( Format, xml(Attributes, Document) ) -->
1158	?	document_generation_body( Attributes, Format, Document ).
1159
1160		document_generation_body( [], Format, Document ) -->
1161		generation( Document, "", Format, [], _Format1 ).
1162		document_generation_body( Attributes, Format, Document ) -->
1163		{ Attributes = [_\|_],
1164	?	xml_declaration_attributes_valid( Attributes )
1165		},
1166		"<?xml",
1167		generated_attributes( Attributes, Format, Format0 ),
1168		"?>",
1169		indent( true, [] ),
1170		generation( Document, "", Format0, [], _Format1 ).
1171
1172		generation( [], _Prefix, Format, _Indent, Format ) --> [].
1173		generation( [Term\|Terms], Prefix, Format0, Indent, Format ) -->
1174		generation( Term, Prefix, Format0, Indent, Format1 ),
1175		generation( Terms, Prefix, Format1, Indent, Format ).
1176		generation( doctype(Name, External), _Prefix, Format, [], Format ) -->
1177		"<!DOCTYPE ",
1178		generated_name( Name ),
1179		generated_external_id( External ),
1180		">".
1181		generation( instructions(Target,Process), _Prefix, Format, Indent, Format ) -->
1182		indent( Format, Indent ),
1183		"<?", generated_name(Target), " ", chars( Process ) ,"?>".
1184		generation( pcdata(Chars), _Prefix, _Format, _Indent, false ) -->
1185		pcdata_generation( Chars ).
1186		generation( comment( Comment ), _Prefix, Format, Indent, Format ) -->
1187		indent( Format, Indent ),
1188		"<!--", chars( Comment ), "-->".
1189		generation( namespace(URI, Prefix, element(Name, Atts, Content)),
1190		_Prefix0, Format, Indent, Format ) -->
1191		indent( Format, Indent ),
1192		"<", generated_prefixed_name( Prefix, Name ),
1193		generated_prefixed_attributes( Prefix, URI, Atts, Format, Format1 ),
1194		generated_content( Content, Format1, Indent, Prefix, Name ).
1195		generation( element(Name, Atts, Content), Prefix, Format, Indent, Format ) -->
1196		indent( Format, Indent ),
1197		"<", generated_prefixed_name( Prefix, Name ),
1198		generated_attributes( Atts, Format, Format1 ),
1199		generated_content( Content, Format1, Indent, Prefix, Name ).
1200		generation( cdata(CData), _Prefix, Format, Indent, Format ) -->
1201		indent( Format, Indent ),
1202		"<![CDATA[", cdata_generation(CData), "]]>".
1203
1204		generated_attributes( [], Format, Format ) --> [].
1205		generated_attributes( [Name=Value\|Attributes], Format0, Format ) -->
1206		{( Name == 'xml:space',
1207		Value="preserve" ->
1208		Format1 = false
1209		;
1210		Format1 = Format0
1211		)},
1212		" ",
1213		generated_name( Name ),
1214		"=""",
1215		quoted_string( Value ),
1216		"""",
1217		generated_attributes( Attributes, Format1, Format ).
1218
1219		generated_prefixed_name( [], Name ) -->
1220		generated_name( Name ).
1221		generated_prefixed_name( Prefix, Name ) -->
1222		{Prefix = [_\|_]},
1223		chars( Prefix ), ":",
1224		generated_name( Name ).
1225
1226		generated_content( [], _Format, _Indent, _Prefix, _Namespace ) -->
1227		" />". % Leave an extra space for XHTML output.
1228		generated_content( [H\|T], Format, Indent, Prefix, Namespace ) -->
1229		">",
1230		generation( H, Prefix, Format, [0' \|Indent], Format1 ),
1231		generation( T, Prefix, Format1, [0' \|Indent], Format2 ),
1232		indent( Format2, Indent ),
1233		"</", generated_prefixed_name( Prefix, Namespace ), ">".
1234
1235		generated_prefixed_attributes( [_\|_Prefix], _URI, Atts, Format0, Format ) -->
1236		generated_attributes( Atts, Format0, Format ).
1237		generated_prefixed_attributes( [], URI, Atts, Format0, Format ) -->
1238		{atom_codes( URI, Namespace ),
1239		findall( Attr, (member(Attr, Atts), Attr \= (xmlns=_Val)), Atts1 )
1240		},
1241		generated_attributes( [xmlns=Namespace\|Atts1], Format0, Format ).
1242
1243		generated_name( Name, Plus, Minus ) :-
1244		atom_codes( Name, Chars ),
1245		append( Chars, Minus, Plus ).
1246
1247		generated_external_id( local ) --> "".
1248		generated_external_id( local(Literals) ) --> " [",
1249		generated_doctype_literals( Literals ), "\n\t]".
1250		generated_external_id( system(URL) ) -->
1251		" SYSTEM """,
1252		chars( URL ),
1253		"""".
1254		generated_external_id( system(URL,Literals) ) -->
1255		" SYSTEM """,
1256		chars( URL ),
1257		""" [",
1258		generated_doctype_literals( Literals ), "\n\t]".
1259		generated_external_id( public(URN,URL) ) -->
1260		" PUBLIC """,
1261		chars( URN ),
1262		""" """,
1263		chars( URL ),
1264		"""".
1265		generated_external_id( public(URN,URL,Literals) ) -->
1266		" PUBLIC """,
1267		chars( URN ),
1268		""" """,
1269		chars( URL ),
1270		""" [",
1271		generated_doctype_literals( Literals ), "\n\t]".
1272
1273		generated_doctype_literals( [] ) --> "".
1274		generated_doctype_literals( [dtd_literal(String)\|Literals] ) --> "\n\t",
1275		"<!", cdata_generation( String ), ">",
1276		generated_doctype_literals( Literals ).
1277
1278		/* quoted_string( +Chars ) is a DCG representing Chars, a list of character
1279		* codes, as a legal XML attribute string. Any leading or trailing layout
1280		* characters are removed. &, " and < characters are replaced by &, "
1281		* and < respectively.
1282		*/
1283		quoted_string( Raw, Plus, Minus ) :-
1284		quoted_string1( Raw, NoLeadingLayouts ),
1285		quoted_string2( NoLeadingLayouts, Layout, Layout, Plus, Minus ).
1286
1287		quoted_string1( [], [] ).
1288		quoted_string1( [Char\|Chars], NoLeadingLayouts ) :-
1289		( Char > 32 ->
1290		NoLeadingLayouts = [Char\|Chars]
1291		;
1292		quoted_string1( Chars, NoLeadingLayouts )
1293		).
1294
1295		quoted_string2( [], _LayoutPlus, _LayoutMinus, List, List ).
1296		quoted_string2( [Char\|Chars], LayoutPlus, LayoutMinus, Plus, Minus ) :-
1297		( Char =< " " ->
1298		Plus = Plus1,
1299		LayoutMinus = [Char\|LayoutMinus1],
1300		LayoutPlus = LayoutPlus1
1301		; Char =< 127 ->
1302		Plus = LayoutPlus,
1303		pcdata_7bit( Char, LayoutMinus, Plus1 ),
1304		LayoutPlus1 = LayoutMinus1
1305		; legal_xml_unicode( Char ) ->
1306		Plus = LayoutPlus,
1307		number_codes( Char, Codes ),
1308		pcdata_8bits_plus( Codes, LayoutMinus, Plus1 ),
1309		LayoutPlus1 = LayoutMinus1
1310		;
1311		LayoutPlus = LayoutPlus1,
1312		LayoutMinus = LayoutMinus1,
1313		Plus = Plus1
1314		),
1315		quoted_string2( Chars, LayoutPlus1, LayoutMinus1, Plus1, Minus ).
1316
1317		indent( false, _Indent ) --> [].
1318		indent( true, Indent ) --> "\n",
1319		chars( Indent ).
1320
1321		/* pcdata_generation( +Chars ) is a DCG representing Chars, a list of character
1322		* codes as legal XML "Parsed character data" (PCDATA) string. Any codes
1323		* which cannot be represented by a 7-bit character are replaced by their
1324		* decimal numeric character entity e.g. code 160 (non-breaking space) is
1325		* represented as . Any character codes disallowed by the XML
1326		* specification are not encoded.
1327		*/
1328		pcdata_generation( [], Plus, Plus ).
1329		pcdata_generation( [Char\|Chars], Plus, Minus ) :-
1330		( Char =< 127 ->
1331		pcdata_7bit( Char, Plus, Mid )
1332		; legal_xml_unicode( Char ) ->
1333		number_codes( Char, Codes ),
1334		pcdata_8bits_plus( Codes, Plus, Mid )
1335		;
1336		Plus = Mid
1337		),
1338		pcdata_generation( Chars, Mid, Minus ).
1339
1340		/* pcdata_7bit(+Char) represents the ascii character set in its
1341		* simplest format, using the character entities & " < and >
1342		* which are common to both XML and HTML. The numeric entity ' is used in
1343		* place of ', because browsers don't recognize it in HTML.
1344		*/
1345		pcdata_7bit( 0 ) --> "".
1346		pcdata_7bit( 1 ) --> "".
1347		pcdata_7bit( 2 ) --> "".
1348		pcdata_7bit( 3 ) --> "".
1349		pcdata_7bit( 4 ) --> "".
1350		pcdata_7bit( 5 ) --> "".
1351		pcdata_7bit( 6 ) --> "".
1352		pcdata_7bit( 7 ) --> "".
1353		pcdata_7bit( 8 ) --> "".
1354		pcdata_7bit( 9 ) --> [9].
1355		pcdata_7bit( 10 ) --> [10].
1356		pcdata_7bit( 11 ) --> "".
1357		pcdata_7bit( 12 ) --> "".
1358		pcdata_7bit( 13 ) --> [13].
1359		pcdata_7bit( 14 ) --> "".
1360		pcdata_7bit( 15 ) --> "".
1361		pcdata_7bit( 16 ) --> "".
1362		pcdata_7bit( 17 ) --> "".
1363		pcdata_7bit( 18 ) --> "".
1364		pcdata_7bit( 19 ) --> "".
1365		pcdata_7bit( 20 ) --> "".
1366		pcdata_7bit( 21 ) --> "".
1367		pcdata_7bit( 22 ) --> "".
1368		pcdata_7bit( 23 ) --> "".
1369		pcdata_7bit( 24 ) --> "".
1370		pcdata_7bit( 25 ) --> "".
1371		pcdata_7bit( 26 ) --> "".
1372		pcdata_7bit( 27 ) --> "".
1373		pcdata_7bit( 28 ) --> "".
1374		pcdata_7bit( 29 ) --> "".
1375		pcdata_7bit( 30 ) --> "".
1376		pcdata_7bit( 31 ) --> "".
1377		pcdata_7bit( 32 ) --> " ".
1378		pcdata_7bit( 33 ) --> "!".
1379		pcdata_7bit( 34 ) --> """.
1380		pcdata_7bit( 35 ) --> "#".
1381		pcdata_7bit( 36 ) --> "$".
1382		pcdata_7bit( 37 ) --> "%".
1383		pcdata_7bit( 38 ) --> "&".
1384		pcdata_7bit( 39 ) --> "'".
1385		pcdata_7bit( 40 ) --> "(".
1386		pcdata_7bit( 41 ) --> ")".
1387		pcdata_7bit( 42 ) --> "*".
1388		pcdata_7bit( 43 ) --> "+".
1389		pcdata_7bit( 44 ) --> ",".
1390		pcdata_7bit( 45 ) --> "-".
1391		pcdata_7bit( 46 ) --> ".".
1392		pcdata_7bit( 47 ) --> "/".
1393		pcdata_7bit( 48 ) --> "0".
1394		pcdata_7bit( 49 ) --> "1".
1395		pcdata_7bit( 50 ) --> "2".
1396		pcdata_7bit( 51 ) --> "3".
1397		pcdata_7bit( 52 ) --> "4".
1398		pcdata_7bit( 53 ) --> "5".
1399		pcdata_7bit( 54 ) --> "6".
1400		pcdata_7bit( 55 ) --> "7".
1401		pcdata_7bit( 56 ) --> "8".
1402		pcdata_7bit( 57 ) --> "9".
1403		pcdata_7bit( 58 ) --> ":".
1404		pcdata_7bit( 59 ) --> ";".
1405		pcdata_7bit( 60 ) --> "<".
1406		pcdata_7bit( 61 ) --> "=".
1407		pcdata_7bit( 62 ) --> ">".
1408		pcdata_7bit( 63 ) --> "?".
1409		pcdata_7bit( 64 ) --> "@".
1410		pcdata_7bit( 65 ) --> "A".
1411		pcdata_7bit( 66 ) --> "B".
1412		pcdata_7bit( 67 ) --> "C".
1413		pcdata_7bit( 68 ) --> "D".
1414		pcdata_7bit( 69 ) --> "E".
1415		pcdata_7bit( 70 ) --> "F".
1416		pcdata_7bit( 71 ) --> "G".
1417		pcdata_7bit( 72 ) --> "H".
1418		pcdata_7bit( 73 ) --> "I".
1419		pcdata_7bit( 74 ) --> "J".
1420		pcdata_7bit( 75 ) --> "K".
1421		pcdata_7bit( 76 ) --> "L".
1422		pcdata_7bit( 77 ) --> "M".
1423		pcdata_7bit( 78 ) --> "N".
1424		pcdata_7bit( 79 ) --> "O".
1425		pcdata_7bit( 80 ) --> "P".
1426		pcdata_7bit( 81 ) --> "Q".
1427		pcdata_7bit( 82 ) --> "R".
1428		pcdata_7bit( 83 ) --> "S".
1429		pcdata_7bit( 84 ) --> "T".
1430		pcdata_7bit( 85 ) --> "U".
1431		pcdata_7bit( 86 ) --> "V".
1432		pcdata_7bit( 87 ) --> "W".
1433		pcdata_7bit( 88 ) --> "X".
1434		pcdata_7bit( 89 ) --> "Y".
1435		pcdata_7bit( 90 ) --> "Z".
1436		pcdata_7bit( 91 ) --> "[".
1437		pcdata_7bit( 92 ) --> [92].
1438		pcdata_7bit( 93 ) --> "]".
1439		pcdata_7bit( 94 ) --> "^".
1440		pcdata_7bit( 95 ) --> "_".
1441		pcdata_7bit( 96 ) --> "`".
1442		pcdata_7bit( 97 ) --> "a".
1443		pcdata_7bit( 98 ) --> "b".
1444		pcdata_7bit( 99 ) --> "c".
1445		pcdata_7bit( 100 ) --> "d".
1446		pcdata_7bit( 101 ) --> "e".
1447		pcdata_7bit( 102 ) --> "f".
1448		pcdata_7bit( 103 ) --> "g".
1449		pcdata_7bit( 104 ) --> "h".
1450		pcdata_7bit( 105 ) --> "i".
1451		pcdata_7bit( 106 ) --> "j".
1452		pcdata_7bit( 107 ) --> "k".
1453		pcdata_7bit( 108 ) --> "l".
1454		pcdata_7bit( 109 ) --> "m".
1455		pcdata_7bit( 110 ) --> "n".
1456		pcdata_7bit( 111 ) --> "o".
1457		pcdata_7bit( 112 ) --> "p".
1458		pcdata_7bit( 113 ) --> "q".
1459		pcdata_7bit( 114 ) --> "r".
1460		pcdata_7bit( 115 ) --> "s".
1461		pcdata_7bit( 116 ) --> "t".
1462		pcdata_7bit( 117 ) --> "u".
1463		pcdata_7bit( 118 ) --> "v".
1464		pcdata_7bit( 119 ) --> "w".
1465		pcdata_7bit( 120 ) --> "x".
1466		pcdata_7bit( 121 ) --> "y".
1467		pcdata_7bit( 122 ) --> "z".
1468		pcdata_7bit( 123 ) --> "{".
1469		pcdata_7bit( 124 ) --> "\|".
1470		pcdata_7bit( 125 ) --> "}".
1471		pcdata_7bit( 126 ) --> "~".
1472		pcdata_7bit( 127 ) --> "".
1473
1474		pcdata_8bits_plus( Codes ) -->
1475		"&#", chars( Codes ), ";".
1476
1477		/* cdata_generation( +Chars ) is a DCG representing Chars, a list of character
1478		* codes as a legal XML CDATA string. Any character codes disallowed by the XML
1479		* specification are not encoded.
1480		*/
1481		cdata_generation( [] ) --> "".
1482		cdata_generation( [Char\|Chars] ) -->
1483		( {legal_xml_unicode( Char )}, !, [Char]
1484		\| ""
1485		),
1486		cdata_generation( Chars ).
1487
1488		legal_xml_unicode( 9 ).
1489		legal_xml_unicode( 10 ).
1490		legal_xml_unicode( 13 ).
1491		legal_xml_unicode( Code ) :-
1492		Code >= 32,
1493		Code =< 55295.
1494		legal_xml_unicode( Code ) :-
1495		Code >= 57344,
1496		Code =< 65533.
1497		legal_xml_unicode( Code ) :-
1498		Code >= 65536,
1499		Code =< 1114111.
1500
1501		/* xml_pp: "pretty print" an XML Document on the current output stream.
1502		*
1503		* Current Release: 1.9
1504		*/
1505		/* xml_pp( +XMLDocument ) "pretty prints" XMLDocument on the current
1506		* output stream.
1507		*/
1508		xml_pp( xml(Attributes, Document) ) :-
1509		write( 'xml( ' ), pp_attributes( Attributes, "" ), put_code( 0', ), nl,
1510		xml_pp_list( Document, "\t" ),
1511		format( ' ).~n', [] ).
1512		xml_pp( malformed(Attributes, Document) ) :-
1513		write( 'malformed( ' ), pp_attributes( Attributes, "" ), put_code( 0', ), nl,
1514		xml_pp_list( Document, "\t" ),
1515		format( ' ).~n', [] ).
1516
1517		xml_pp_indented( [], Indent ) :-
1518		format( '~s[]', [Indent] ).
1519		xml_pp_indented( List, Indent ) :-
1520		List = [_\|_],
1521		format( '~s', [Indent] ),
1522		xml_pp_list( List, Indent ).
1523		xml_pp_indented( comment(Text), Indent ) :-
1524		format( '~scomment(', [Indent] ), pp_string(Text), put_code( 0') ).
1525		xml_pp_indented( namespace(URI,Prefix,Element), Indent ) :-
1526		format( '~snamespace( ~q, "~s",~n', [Indent,URI,Prefix] ),
1527		xml_pp_indented( Element, [0'\t\|Indent] ),
1528		format( '~n~s)', [[0'\t\|Indent]] ).
1529		xml_pp_indented( element(Tag,Attributes,Contents), Indent ) :-
1530		format( '~selement( ~q,~n', [Indent,Tag] ),
1531		pp_attributes( Attributes, [0'\t\|Indent] ), put_code(0',), nl,
1532		xml_pp_list( Contents, [0'\t\|Indent] ), write( ' )' ).
1533		xml_pp_indented( instructions(Target, Processing), Indent ) :-
1534		format( '~sinstructions( ~q, ', [Indent,Target] ),
1535		pp_string(Processing), put_code( 0') ).
1536		xml_pp_indented( doctype(Name, DoctypeId), Indent ) :-
1537		format( '~sdoctype( ~q, ', [Indent,Name] ),
1538		xml_pp_indented( DoctypeId, [0'\t\|Indent] ),
1539		write( ' )' ).
1540		xml_pp_indented( cdata(CData), Indent ) :-
1541		format( '~scdata(', [Indent] ), pp_string(CData), put_code( 0') ).
1542		xml_pp_indented( pcdata(PCData), Indent ) :-
1543		format( '~spcdata(', [Indent] ), pp_string(PCData), put_code( 0') ).
1544		xml_pp_indented( public(URN,URL), _Indent ) :-
1545		format( 'public( "~s", "~s" )', [URN,URL] ).
1546		xml_pp_indented( public(URN,URL,Literals), Indent ) :-
1547		format( 'public( "~s", "~s",~n', [URN,URL] ),
1548		xml_pp_list( Literals, [0'\t\|Indent] ), write( ' )' ). %'
1549		xml_pp_indented( system(URL), _Indent ) :-
1550		format( 'system( "~s" )', [URL] ).
1551		xml_pp_indented( system(URL,Literals), Indent ) :-
1552		format( 'system( "~s",~n', [URL] ),
1553		xml_pp_list( Literals, [0'\t\|Indent] ), write( ' )' ). %'
1554		xml_pp_indented( local, _Indent ) :-
1555		write( local ).
1556		xml_pp_indented( local(Literals), Indent ) :-
1557		write( 'local(' ), nl,
1558		xml_pp_list( Literals, [0'\t\|Indent] ), write( ' )' ). %'
1559		xml_pp_indented( dtd_literal(String), Indent ) :-
1560		format( '~sdtd_literal(', [Indent] ), pp_string(String), put_code( 0') ). %'
1561		xml_pp_indented( out_of_context(Tag), Indent ) :-
1562		format( '~s/* SYNTAX ERROR */ out_of_context( ~q )', [Indent,Tag] ).
1563		xml_pp_indented( unparsed(String), Indent ) :-
1564		format( '~s/* SYNTAX ERROR */ unparsed( ', [Indent] ),
1565		pp_string(String), put_code( 0') ).
1566
1567		xml_pp_list( [], Indent ) :-
1568		format( '~s[]', [Indent] ).
1569		xml_pp_list( [H\|T], Indent ) :-
1570		format( '~s[~n', [Indent] ),
1571		xml_pp_indented( H, Indent ),
1572		xml_pp_list1( T, Indent ),
1573		format( '~s]', [Indent] ).
1574
1575		xml_pp_list1( [], _Indent ) :-
1576		nl.
1577		xml_pp_list1( [H\|T], Indent ) :-
1578		put_code( 0', ), nl,
1579		xml_pp_indented( H, Indent ),
1580		xml_pp_list1( T, Indent ).
1581
1582		% PATCH LEUSCHEL:
1583		pp_attributes( Attributes, Indent ) :-
1584		(select('$attribute_linefeeds'=_Count,Attributes,Rest)
1585		-> pp_attributes2(Rest,Indent) % TO DO: insert linefeeds
1586		; pp_attributes2(Attributes,Indent)).
1587
1588		pp_attributes2( [], Indent ) :-
1589		format( '~s[]', [Indent] ).
1590		pp_attributes2( [Attribute\|Attributes], Indent ) :-
1591		format( '~s[', [Indent] ),
1592		pp_attributes1( Attributes, Attribute ),
1593		put_code( 0'] ).
1594		% END PATCH
1595		pp_attributes1( [], Name=Value ) :-
1596		format( '~q=', [Name] ), pp_string( Value ).
1597		pp_attributes1( [H\|T], Name=Value ) :-
1598		format( '~q=', [Name] ), pp_string( Value ), write( ', ' ),
1599		pp_attributes1( T, H ).
1600		/* XML Utilities
1601		*
1602		* $Revision$
1603		*/
1604
1605		% Entity and Namespace map operations: these maps are usually quite small, so
1606		% a linear list lookup is okay. They could be substituted by a logarithmic
1607		% data structure - in extremis.
1608
1609		/* empty_map( ?Map ) is true if Map is a null map.
1610		*/
1611		empty_map( [] ).
1612
1613		/* map_member( +Key, +Map, ?Data ) is true if Map is a ordered map structure
1614		* which records the pair Key-Data. Key must be ground.
1615		*/
1616		map_member( Key0, [Key1-Data1\|Rest], Data0 ) :-
1617		( Key0 == Key1 ->
1618		Data0 = Data1
1619		; Key0 @> Key1 ->
1620		map_member( Key0, Rest, Data0 )
1621		).
1622
1623		/* map_store( +Map0, +Key, +Data, ?Map1 ) is true if Map0 is an ordered map
1624		* structure, Key must be ground, and Map1 is identical to Map0 except that
1625		* the pair Key-Data is recorded by Map1.
1626		*/
1627		map_store( [], Key, Data, [Key-Data] ).
1628		map_store( [Key0-Data0\|Map0], Key, Data, Map ) :-
1629		( Key == Key0 ->
1630		Map = [Key-Data\|Map0]
1631		; Key @< Key0 ->
1632		Map = [Key-Data,Key0-Data0\|Map0]
1633		; % >
1634		Map = [Key0-Data0\|Map1],
1635		map_store( Map0, Key, Data, Map1 )
1636		).
1637
1638		/* context(?Element, ?PreserveSpace, ?CurrentNS, ?DefaultNS, ?Entities, ?Namespaces )
1639		* is an ADT hiding the "state" arguments for XML Acquisition
1640		*/
1641		initial_context(
1642		opt(Fmt,Ext,RemoveAttributePrefixes),
1643		context(void,PreserveSpace,'','',Entities,Empty,
1644		RemoveAttributePrefixes)
1645		) :-
1646		empty_map( Empty ),
1647		( Ext==false ->
1648		Entities = Empty
1649		;
1650		extended_character_entities(Entities)
1651		),
1652		( Fmt==false ->
1653		PreserveSpace = true
1654		;
1655		PreserveSpace = false
1656		).
1657
1658		context_update( current_namespace, Context0, URI, Context1 ) :-
1659		Context0 = context(Element,Preserve,_Current,Default,Entities,
1660		Namespaces,RemoveAttributePrefixes),
1661		Context1 = context(Element,Preserve,URI,Default,Entities,
1662		Namespaces,RemoveAttributePrefixes).
1663		context_update( element, Context0, Tag, Context1 ) :-
1664		Context0 = context(_Element,Preserve,Current,Default,Entities,
1665		Namespaces,RemoveAttributePrefixes),
1666		Context1 = context(tag(Tag),Preserve,Current,Default,Entities,
1667		Namespaces,RemoveAttributePrefixes).
1668		context_update( default_namespace, Context0, URI, Context1 ):-
1669		Context0 = context(Element,Preserve,Current,_Default,Entities,
1670		Namespaces,RemoveAttributePrefixes),
1671		Context1 = context(Element,Preserve,Current,URI,Entities,
1672		Namespaces,RemoveAttributePrefixes).
1673		context_update( space_preserve, Context0, Boolean, Context1 ):-
1674		Context0 = context(Element,_Preserve,Current,Default,Entities,
1675		Namespaces,RemoveAttributePrefixes),
1676		Context1 = context(Element,Boolean,Current,Default,Entities,
1677		Namespaces,RemoveAttributePrefixes).
1678		context_update( ns_prefix(Prefix), Context0, URI, Context1 ) :-
1679		Context0 = context(Element,Preserve,Current,Default,Entities,
1680		Namespaces0,RemoveAttributePrefixes),
1681		Context1 = context(Element,Preserve,Current,Default,Entities,
1682		Namespaces1,RemoveAttributePrefixes),
1683		map_store( Namespaces0, Prefix, URI, Namespaces1 ).
1684		context_update( entity(Name), Context0, String, Context1 ) :-
1685		Context0 = context(Element,Preserve,Current,Default,Entities0,
1686		Namespaces,RemoveAttributePrefixes),
1687		Context1 = context(Element,Preserve,Current,Default,Entities1,
1688		Namespaces,RemoveAttributePrefixes),
1689		map_store( Entities0, Name, String, Entities1 ).
1690
1691		remove_attribute_prefixes( Context ) :-
1692		Context = context(_Element,_Preserve,_Current,_Default,_Entities,
1693		_Namespaces,true).
1694
1695		current_tag( Context, Tag ) :-
1696		Context = context(tag(Tag),_Preserve,_Current,_Default,_Entities,
1697		_Namespaces,_RPFA).
1698
1699		current_namespace( Context, Current ) :-
1700		Context = context(_Element,_Preserve,Current,_Default,_Entities,
1701		_Namespaces,_RPFA).
1702
1703		default_namespace( Context, Default ) :-
1704		Context = context(_Element,_Preserve,_Current,Default,_Entities,
1705		_Namespaces,_RPFA).
1706
1707		space_preserve( Context ) :-
1708		Context = context(_Element,true,_Current,_Default,_Entities,
1709		_Namespaces,_RPFA).
1710
1711		specific_namespace( Prefix, Context, URI ) :-
1712		Context = context(_Element,_Preserve,_Current,_Default,_Entities,
1713		Namespaces,_RPFA),
1714		map_member( Prefix, Namespaces, URI ).
1715
1716		defined_entity( Reference, Context, String ) :-
1717		Context = context(_Element,_Preserve,_Current,_Default,Entities,
1718		_Namespaces,_RPFA),
1719		map_member( Reference, Entities, String ).
1720
1721		close_context( Context, Terms, WellFormed ) :-
1722		Context = context(Element,_Preserve,_Current,_Default,_Entities,
1723		_Namespaces,_RPFA),
1724		close_context1( Element, Terms, WellFormed ).
1725
1726		close_context1( void, [], true ).
1727		close_context1( tag(TagChars), [out_of_context(Tag)], false ) :-
1728		atom_codes( Tag, TagChars ).
1729
1730		void_context(
1731		context(void,_Preserve,_Current,_Default,_Entities,_Namespaces)
1732		).
1733
1734		/* pp_string( +String ) prints String onto the current output stream.
1735		* If String contains only 7-bit chars it is printed in shorthand quoted
1736		* format, otherwise it is written as a list.
1737		* If your Prolog uses " to delimit a special string type, just use write/1.
1738		*/
1739		% [MC] rewritten
1740		pp_string(Chars) :-
1741		put_code(0'"),
1742		pp_string1(Chars),
1743		put_code(0'").
1744
1745		pp_string1([]).
1746		pp_string1([Char\|Chars]) :-
1747		pp_string2(Char),
1748		pp_string1(Chars).
1749
1750		pp_string2(0'\a) :- !,
1751		put_code(0'\\),
1752		put_code(0'a).
1753		pp_string2(0'\b) :- !,
1754		put_code(0'\\),
1755		put_code(0'b).
1756		pp_string2(0'\t) :- !,
1757		put_code(0'\\),
1758		put_code(0't).
1759		pp_string2(0'\n) :- !,
1760		put_code(0'\\),
1761		put_code(0'n).
1762		pp_string2(0'\v) :- !,
1763		put_code(0'\\),
1764		put_code(0'v).
1765		pp_string2(0'\f) :- !,
1766		put_code(0'\\),
1767		put_code(0'f).
1768		pp_string2(0'\r) :- !,
1769		put_code(0'\\),
1770		put_code(0'r).
1771		pp_string2(0'\e) :- !,
1772		put_code(0'\\),
1773		put_code(0'e).
1774		pp_string2(0'\") :- !,
1775		put_code(0'\\),
1776		put_code(0'").
1777		pp_string2(0'\\) :- !,
1778		put_code(0'\\),
1779		put_code(0'\\).
1780		pp_string2(Char) :-
1781		Char>=32, Char=<126, !,
1782		put_code(Char).
1783		pp_string2(Char) :-
1784		format('\\~8r\\', [Char]).
1785
1786		xml_declaration_attributes_valid( [] ).
1787		xml_declaration_attributes_valid( [Name=Value\|Attributes] ) :-
1788	?	xml_declaration_attribute_valid( Name, Value ),
1789	?	xml_declaration_attributes_valid( Attributes ).
1790
1791		xml_declaration_attribute_valid( Name, Value ) :-
1792		lowercase( Value, Lowercase ),
1793		debug_format(19,'Checking xml declaration attribute ~s=~s~n',[Name,Lowercase]),
1794	?	if(canonical_xml_declaration_attribute( Name, Lowercase ), true,
1795		(format(user_error,'Illegal <xml> declaration attribute: ~s = ~s~n',[Name,Value]),
1796		(valid_declaration_attribute_name(Name)
1797		-> ajoin(['Ignoring illegal value for attribute ',Name,': '],Msg),
1798		atom_codes(AValue,Value),
1799		add_xml_warning(Msg,AValue)
1800		; add_xml_warning('Ignoring illegal <xml> declaration attribute: ',Name)
1801		),
1802		fail
1803		)). % PATCH LEUSCHEL: add warning
1804
1805		% PATCH LEUSCHEL:
1806		valid_declaration_attribute_name(version).
1807		valid_declaration_attribute_name(standalone).
1808		valid_declaration_attribute_name(encoding).
1809
1810
1811		canonical_xml_declaration_attribute( version, "1.0" ).
1812		canonical_xml_declaration_attribute( standalone, "yes" ).
1813		canonical_xml_declaration_attribute( standalone, "no" ).
1814		canonical_xml_declaration_attribute( encoding, "utf-8" ).
1815		canonical_xml_declaration_attribute( encoding, "utf-16" ).
1816		canonical_xml_declaration_attribute( encoding, "ascii" ).
1817		canonical_xml_declaration_attribute( encoding, "iso-8859-1" ).
1818		canonical_xml_declaration_attribute( encoding, "iso-8859-2" ).
1819		canonical_xml_declaration_attribute( encoding, "iso-8859-15" ).
1820		canonical_xml_declaration_attribute( encoding, "windows-1252" ).
1821
1822		% PATCH LEUSCHEL: some more sets from https://www.iana.org/assignments/character-sets/character-sets.xhtml
1823		%canonical_xml_declaration_attribute( encoding, "iso-8859-3" ).
1824		%canonical_xml_declaration_attribute( encoding, "iso-8859-4" ).
1825		canonical_xml_declaration_attribute( encoding, Encoding ) :- atom_codes(Atom,Encoding),
1826		add_xml_warning('Unsupported xml encoding (which can lead to unexpected results): ',Atom).
1827
1828
1829
1830		/* lowercase( +MixedCase, ?Lowercase ) holds when Lowercase and MixedCase are
1831		* lists of character codes, and Lowercase is identical to MixedCase with
1832		* every uppercase character replaced by its lowercase equivalent.
1833		*/
1834		lowercase( [], [] ).
1835		lowercase( [Char\|Chars], [Lower\|LowerCase] ) :-
1836		( Char >= 0'A, Char =< 0'Z
1837		->
1838		Lower is Char + 0'a - 0'A
1839		;
1840		Lower = Char
1841		),
1842		lowercase( Chars, LowerCase ).
1843
1844		:- dynamic extended_character_entities/1. % no point compiling it
1845		extended_character_entities( [
1846		"Aacute"-[193], % latin capital letter A with acute,
1847		"aacute"-[225], % latin small letter a with acute,
1848		"Acirc"-[194], % latin capital letter A with circumflex,
1849		"acirc"-[226], % latin small letter a with circumflex,
1850		"acute"-[180], % acute accent = spacing acute,
1851		"AElig"-[198], % latin capital letter AE
1852		"aelig"-[230], % latin small letter ae
1853		"Agrave"-[192], % latin capital letter A with grave
1854		"agrave"-[224], % latin small letter a with grave
1855		"alefsym"-[8501], % alef symbol = first transfinite cardinal,
1856		"Alpha"-[913], % greek capital letter alpha, U+0391
1857		"alpha"-[945], % greek small letter alpha,
1858		"and"-[8743], % logical and = wedge, U+2227 ISOtech
1859		"ang"-[8736], % angle, U+2220 ISOamso
1860		"Aring"-[197], % latin capital letter A with ring above
1861		"aring"-[229], % latin small letter a with ring above
1862		"asymp"-[8776], % almost equal to = asymptotic to,
1863		"Atilde"-[195], % latin capital letter A with tilde,
1864		"atilde"-[227], % latin small letter a with tilde,
1865		"Auml"-[196], % latin capital letter A with diaeresis,
1866		"auml"-[228], % latin small letter a with diaeresis,
1867		"bdquo"-[8222], % double low-9 quotation mark, U+201E NEW
1868		"Beta"-[914], % greek capital letter beta, U+0392
1869		"beta"-[946], % greek small letter beta, U+03B2 ISOgrk3
1870		"brvbar"-[166], % broken bar = broken vertical bar,
1871		"bull"-[8226], % bullet = black small circle,
1872		"cap"-[8745], % intersection = cap, U+2229 ISOtech
1873		"Ccedil"-[199], % latin capital letter C with cedilla,
1874		"ccedil"-[231], % latin small letter c with cedilla,
1875		"cedil"-[184], % cedilla = spacing cedilla, U+00B8 ISOdia>
1876		"cent"-[162], % cent sign, U+00A2 ISOnum>
1877		"Chi"-[935], % greek capital letter chi, U+03A7
1878		"chi"-[967], % greek small letter chi, U+03C7 ISOgrk3
1879		"circ"-[710], % modifier letter circumflex accent,
1880		"clubs"-[9827], % black club suit = shamrock,
1881		"cong"-[8773], % approximately equal to, U+2245 ISOtech
1882		"copy"-[169], % copyright sign, U+00A9 ISOnum>
1883		"crarr"-[8629], % downwards arrow with corner leftwards
1884		"cup"-[8746], % union = cup, U+222A ISOtech
1885		"curren"-[164], % currency sign, U+00A4 ISOnum>
1886		"dagger"-[8224], % dagger, U+2020 ISOpub
1887		"Dagger"-[8225], % double dagger, U+2021 ISOpub
1888		"darr"-[8595], % downwards arrow, U+2193 ISOnum
1889		"dArr"-[8659], % downwards double arrow, U+21D3 ISOamsa
1890		"deg"-[176], % degree sign, U+00B0 ISOnum>
1891		"Delta"-[916], % greek capital letter delta,
1892		"delta"-[948], % greek small letter delta,
1893		"diams"-[9830], % black diamond suit, U+2666 ISOpub
1894		"divide"-[247], % division sign, U+00F7 ISOnum>
1895		"Eacute"-[201], % latin capital letter E with acute,
1896		"eacute"-[233], % latin small letter e with acute,
1897		"Ecirc"-[202], % latin capital letter E with circumflex,
1898		"ecirc"-[234], % latin small letter e with circumflex,
1899		"Egrave"-[200], % latin capital letter E with grave,
1900		"egrave"-[232], % latin small letter e with grave,
1901		"empty"-[8709], % empty set = null set = diameter,
1902		"emsp"-[8195], % em space, U+2003 ISOpub
1903		"ensp"-[8194], % en space, U+2002 ISOpub
1904		"Epsilon"-[917], % greek capital letter epsilon, U+0395
1905		"epsilon"-[949], % greek small letter epsilon,
1906		"equiv"-[8801], % identical to, U+2261 ISOtech
1907		"Eta"-[919], % greek capital letter eta, U+0397
1908		"eta"-[951], % greek small letter eta, U+03B7 ISOgrk3
1909		"ETH"-[208], % latin capital letter ETH, U+00D0 ISOlat1>
1910		"eth"-[240], % latin small letter eth, U+00F0 ISOlat1>
1911		"Euml"-[203], % latin capital letter E with diaeresis,
1912		"euml"-[235], % latin small letter e with diaeresis,
1913		"euro"-[8364], % euro sign, U+20AC NEW
1914		"exist"-[8707], % there exists, U+2203 ISOtech
1915		"fnof"-[402], % latin small f with hook = function
1916		"forall"-[8704], % for all, U+2200 ISOtech
1917		"frac12"-[189], % vulgar fraction one half
1918		"frac14"-[188], % vulgar fraction one quarter
1919		"frac34"-[190], % vulgar fraction three quarters
1920		"frasl"-[8260], % fraction slash, U+2044 NEW
1921		"Gamma"-[915], % greek capital letter gamma,
1922		"gamma"-[947], % greek small letter gamma,
1923		"ge"-[8805], % greater-than or equal to,
1924		"harr"-[8596], % left right arrow, U+2194 ISOamsa
1925		"hArr"-[8660], % left right double arrow,
1926		"hearts"-[9829], % black heart suit = valentine,
1927		"hellip"-[8230], % horizontal ellipsis = three dot leader,
1928		"Iacute"-[205], % latin capital letter I with acute,
1929		"iacute"-[237], % latin small letter i with acute,
1930		"Icirc"-[206], % latin capital letter I with circumflex,
1931		"icirc"-[238], % latin small letter i with circumflex,
1932		"iexcl"-[161], % inverted exclamation mark, U+00A1 ISOnum>
1933		"Igrave"-[204], % latin capital letter I with grave,
1934		"igrave"-[236], % latin small letter i with grave,
1935		"image"-[8465], % blackletter capital I = imaginary part,
1936		"infin"-[8734], % infinity, U+221E ISOtech
1937		"int"-[8747], % integral, U+222B ISOtech
1938		"Iota"-[921], % greek capital letter iota, U+0399
1939		"iota"-[953], % greek small letter iota, U+03B9 ISOgrk3
1940		"iquest"-[191], % inverted question mark
1941		"isin"-[8712], % element of, U+2208 ISOtech
1942		"Iuml"-[207], % latin capital letter I with diaeresis,
1943		"iuml"-[239], % latin small letter i with diaeresis,
1944		"Kappa"-[922], % greek capital letter kappa, U+039A
1945		"kappa"-[954], % greek small letter kappa,
1946		"Lambda"-[923], % greek capital letter lambda,
1947		"lambda"-[955], % greek small letter lambda,
1948		"lang"-[9001], % left-pointing angle bracket = bra,
1949		"laquo"-[171], % left-pointing double angle quotation mark
1950		"larr"-[8592], % leftwards arrow, U+2190 ISOnum
1951		"lArr"-[8656], % leftwards double arrow, U+21D0 ISOtech
1952		"lceil"-[8968], % left ceiling = apl upstile,
1953		"ldquo"-[8220], % left double quotation mark,
1954		"le"-[8804], % less-than or equal to, U+2264 ISOtech
1955		"lfloor"-[8970], % left floor = apl downstile,
1956		"lowast"-[8727], % asterisk operator, U+2217 ISOtech
1957		"loz"-[9674], % lozenge, U+25CA ISOpub
1958		"lrm"-[8206], % left-to-right mark, U+200E NEW RFC 2070
1959		"lsaquo"-[8249], % single left-pointing angle quotation mark,
1960		"lsquo"-[8216], % left single quotation mark,
1961		"macr"-[175], % macron = spacing macron = overline
1962		"mdash"-[8212], % em dash, U+2014 ISOpub
1963		"micro"-[181], % micro sign, U+00B5 ISOnum>
1964		"middot"-[183], % middle dot = Georgian comma
1965		"minus"-[8722], % minus sign, U+2212 ISOtech
1966		"Mu"-[924], % greek capital letter mu, U+039C
1967		"mu"-[956], % greek small letter mu, U+03BC ISOgrk3
1968		"nabla"-[8711], % nabla = backward difference,
1969		"nbsp"-[160], % no-break space = non-breaking space,
1970		"ndash"-[8211], % en dash, U+2013 ISOpub
1971		"ne"-[8800], % not equal to, U+2260 ISOtech
1972		"ni"-[8715], % contains as member, U+220B ISOtech
1973		"not"-[172], % not sign, U+00AC ISOnum>
1974		"notin"-[8713], % not an element of, U+2209 ISOtech
1975		"nsub"-[8836], % not a subset of, U+2284 ISOamsn
1976		"Ntilde"-[209], % latin capital letter N with tilde,
1977		"ntilde"-[241], % latin small letter n with tilde,
1978		"Nu"-[925], % greek capital letter nu, U+039D
1979		"nu"-[957], % greek small letter nu, U+03BD ISOgrk3
1980		"Oacute"-[211], % latin capital letter O with acute,
1981		"oacute"-[243], % latin small letter o with acute,
1982		"Ocirc"-[212], % latin capital letter O with circumflex,
1983		"ocirc"-[244], % latin small letter o with circumflex,
1984		"OElig"-[338], % latin capital ligature OE,
1985		"oelig"-[339], % latin small ligature oe, U+0153 ISOlat2
1986		"Ograve"-[210], % latin capital letter O with grave,
1987		"ograve"-[242], % latin small letter o with grave,
1988		"oline"-[8254], % overline = spacing overscore,
1989		"Omega"-[937], % greek capital letter omega,
1990		"omega"-[969], % greek small letter omega,
1991		"Omicron"-[927], % greek capital letter omicron, U+039F
1992		"omicron"-[959], % greek small letter omicron, U+03BF NEW
1993		"oplus"-[8853], % circled plus = direct sum,
1994		"or"-[8744], % logical or = vee, U+2228 ISOtech
1995		"ordf"-[170], % feminine ordinal indicator, U+00AA ISOnum>
1996		"ordm"-[186], % masculine ordinal indicator,
1997		"Oslash"-[216], % latin capital letter O with stroke
1998		"oslash"-[248], % latin small letter o with stroke,
1999		"Otilde"-[213], % latin capital letter O with tilde,
2000		"otilde"-[245], % latin small letter o with tilde,
2001		"otimes"-[8855], % circled times = vector product,
2002		"Ouml"-[214], % latin capital letter O with diaeresis,
2003		"ouml"-[246], % latin small letter o with diaeresis,
2004		"para"-[182], % pilcrow sign = paragraph sign,
2005		"part"-[8706], % partial differential, U+2202 ISOtech
2006		"permil"-[8240], % per mille sign, U+2030 ISOtech
2007		"perp"-[8869], % up tack = orthogonal to = perpendicular,
2008		"Phi"-[934], % greek capital letter phi,
2009		"phi"-[966], % greek small letter phi, U+03C6 ISOgrk3
2010		"Pi"-[928], % greek capital letter pi, U+03A0 ISOgrk3
2011		"pi"-[960], % greek small letter pi, U+03C0 ISOgrk3
2012		"piv"-[982], % greek pi symbol, U+03D6 ISOgrk3
2013		"plusmn"-[177], % plus-minus sign = plus-or-minus sign,
2014		"pound"-[163], % pound sign, U+00A3 ISOnum>
2015		"prime"-[8242], % prime = minutes = feet, U+2032 ISOtech
2016		"Prime"-[8243], % double prime = seconds = inches,
2017		"prod"-[8719], % n-ary product = product sign,
2018		"prop"-[8733], % proportional to, U+221D ISOtech
2019		"Psi"-[936], % greek capital letter psi,
2020		"psi"-[968], % greek small letter psi, U+03C8 ISOgrk3
2021		"radic"-[8730], % square root = radical sign,
2022		"rang"-[9002], % right-pointing angle bracket = ket,
2023		"raquo"-[187], % right-pointing double angle quotation mark
2024		"rarr"-[8594], % rightwards arrow, U+2192 ISOnum
2025		"rArr"-[8658], % rightwards double arrow,
2026		"rceil"-[8969], % right ceiling, U+2309 ISOamsc
2027		"rdquo"-[8221], % right double quotation mark,
2028		"real"-[8476], % blackletter capital R = real part symbol,
2029		"reg"-[174], % registered sign = registered trade mark sign,
2030		"rfloor"-[8971], % right floor, U+230B ISOamsc
2031		"Rho"-[929], % greek capital letter rho, U+03A1
2032		"rho"-[961], % greek small letter rho, U+03C1 ISOgrk3
2033		"rlm"-[8207], % right-to-left mark, U+200F NEW RFC 2070
2034		"rsaquo"-[8250], % single right-pointing angle quotation mark,
2035		"rsquo"-[8217], % right single quotation mark,
2036		"sbquo"-[8218], % single low-9 quotation mark, U+201A NEW
2037		"Scaron"-[352], % latin capital letter S with caron,
2038		"scaron"-[353], % latin small letter s with caron,
2039		"sdot"-[8901], % dot operator, U+22C5 ISOamsb
2040		"sect"-[167], % section sign, U+00A7 ISOnum>
2041		"shy"-[173], % soft hyphen = discretionary hyphen,
2042		"Sigma"-[931], % greek capital letter sigma,
2043		"sigma"-[963], % greek small letter sigma,
2044		"sigmaf"-[962], % greek small letter final sigma,
2045		"sim"-[8764], % tilde operator = varies with = similar to,
2046		"spades"-[9824], % black spade suit, U+2660 ISOpub
2047		"sub"-[8834], % subset of, U+2282 ISOtech
2048		"sube"-[8838], % subset of or equal to, U+2286 ISOtech
2049		"sum"-[8721], % n-ary sumation, U+2211 ISOamsb
2050		"sup"-[8835], % superset of, U+2283 ISOtech
2051		"sup1"-[185], % superscript one = superscript digit one,
2052		"sup2"-[178], % superscript two = superscript digit two
2053		"sup3"-[179], % superscript three = superscript digit three
2054		"supe"-[8839], % superset of or equal to,
2055		"szlig"-[223], % latin small letter sharp s = ess-zed,
2056		"Tau"-[932], % greek capital letter tau, U+03A4
2057		"tau"-[964], % greek small letter tau, U+03C4 ISOgrk3
2058		"there4"-[8756], % therefore, U+2234 ISOtech
2059		"Theta"-[920], % greek capital letter theta,
2060		"theta"-[952], % greek small letter theta,
2061		"thetasym"-[977], % greek small letter theta symbol,
2062		"thinsp"-[8201], % thin space, U+2009 ISOpub
2063		"THORN"-[222], % latin capital letter THORN,
2064		"thorn"-[254], % latin small letter thorn with,
2065		"tilde"-[732], % small tilde, U+02DC ISOdia
2066		"times"-[215], % multiplication sign, U+00D7 ISOnum>
2067		"trade"-[8482], % trade mark sign, U+2122 ISOnum
2068		"Uacute"-[218], % latin capital letter U with acute,
2069		"uacute"-[250], % latin small letter u with acute,
2070		"uarr"-[8593], % upwards arrow, U+2191 ISOnum
2071		"uArr"-[8657], % upwards double arrow, U+21D1 ISOamsa
2072		"Ucirc"-[219], % latin capital letter U with circumflex,
2073		"ucirc"-[251], % latin small letter u with circumflex,
2074		"Ugrave"-[217], % latin capital letter U with grave,
2075		"ugrave"-[249], % latin small letter u with grave,
2076		"uml"-[168], % diaeresis = spacing diaeresis,
2077		"upsih"-[978], % greek upsilon with hook symbol,
2078		"Upsilon"-[933], % greek capital letter upsilon,
2079		"upsilon"-[965], % greek small letter upsilon,
2080		"Uuml"-[220], % latin capital letter U with diaeresis,
2081		"uuml"-[252], % latin small letter u with diaeresis,
2082		"weierp"-[8472], % script capital P = power set
2083		"Xi"-[926], % greek capital letter xi, U+039E ISOgrk3
2084		"xi"-[958], % greek small letter xi, U+03BE ISOgrk3
2085		"Yacute"-[221], % latin capital letter Y with acute,
2086		"yacute"-[253], % latin small letter y with acute,
2087		"yen"-[165], % yen sign = yuan sign, U+00A5 ISOnum>
2088		"yuml"-[255], % latin small letter y with diaeresis,
2089		"Yuml"-[376], % latin capital letter Y with diaeresis,
2090		"Zeta"-[918], % greek capital letter zeta, U+0396
2091		"zeta"-[950], % greek small letter zeta, U+03B6 ISOgrk3
2092		"zwj"-[8205], % zero width joiner, U+200D NEW RFC 2070
2093		"zwnj"-[8204] % zero width non-joiner,
2094		] ).
2095
2096		/* chars( ?Chars, ?Plus, ?Minus ) used as chars( ?Chars ) in a DCG to
2097		* copy the list Chars inline.
2098		*
2099		* This is best expressed in terms of append/3 where append/3 is built-in.
2100		* For other Prologs, a straightforward specification can be used:
2101		*
2102		* chars( [] ) --> "".
2103		* chars( [Char\|Chars] ) -->
2104		* [Char],
2105		* chars( Chars ).
2106		*/
2107		chars( Chars, Plus, Minus ) :-
2108		append( Chars, Minus, Plus ).