1 /* xml_prob.pl : Contains xml_parse/[2,3] a bi-directional XML parser written in
2 * Prolog.
3 *
4 * Copyright (C) 2001-2005 Binding Time Limited
5 * Copyright (C) 2005 John Fletcher
6 *
7 * Current Release: $Revision$
8 *
9 * TERMS AND CONDITIONS:
10 *
11 * This program is offered free of charge, as unsupported source code. You may
12 * use it, copy it, distribute it, modify it or sell it without restriction,
13 * but entirely at your own risk.
14 *
15 */
16
17 % minimally adapted for SICStus Prolog
18 % Mats Carlsson, 2003-2006
19 % added linefeed counting for attributes (attributes_count_lf,spaces_count_linebreaks)
20 % Michael Leuschel, 2017-2022 (changes marked by PATCH LEUSCHEL)
21 % replaces library(xml) for SICStus
22
23 :- module( xml_prob, [
24 xml_parse/2,
25 xml_parse/3,
26 xml_subterm/2,
27 xml_pp/1,
28
29 xml_parse/4
30 ]).
31
32 :- use_module(module_information,[module_info/2]).
33 :- module_info(group,external_functions).
34 :- module_info(description,'This module transforms XML into a Prolog representation, adaptation of SICStus library module.').
35
36 :- use_module(probsrc(error_manager)).
37 :- use_module(probsrc(debug),[debug_format/3]).
38 :- use_module(probsrc(tools_strings),[ajoin/2]).
39
40 % added by Leuschel:
41 xml_parse( Chars, Document, Controls, Span ) :-
42 bb_put(xml_prob_current_span,Span),
43 call_cleanup(xml_parse( Chars, Document, Controls),
44 bb_delete(xml_prob_current_span,_)).
45
46 add_xml_warning(Msg,Term) :-
47 bb_get(xml_prob_current_span,Span),!,
48 add_warning(xml_prob,Msg,Term,Span).
49 add_xml_warning(Msg,Term) :-
50 add_warning(xml_prob,Msg,Term).
51
52 add_xml_error(Msg,Term) :-
53 bb_get(xml_prob_current_span,Span),!,
54 add_error(xml_prob,Msg,Term,Span).
55 add_xml_error(Msg,Term) :-
56 add_error(xml_prob,Msg,Term).
57 :- use_module(library(lists),[append/2]).
58 expect_tokens(Tokens,_) --> Tokens,!.
59 expect_tokens(Tokens,Ctxt,Input,_) :-
60 append(["XML parse error, expected '",Tokens,"' in ",Ctxt,", but got:"],Codes),
61 atom_codes(Msg,Codes),
62 length(Tokens,Len),
63 get_next_tokens(Input,Len,Nxt),
64 atom_codes(NxtAtom,Nxt),
65 add_xml_error(Msg,NxtAtom), % TO DO: extract next tokens
66 fail.
67
68 get_next_tokens(_,N,R) :- N<1,!, R=[].
69 get_next_tokens(Var,_,Res) :- var(Var),!, Res="<EOF>".
70 get_next_tokens([],_,"<EOF>").
71 get_next_tokens([H|T],Nr,[H|R]) :- N1 is Nr-1, get_next_tokens(T,N1,R).
72
73 :- use_module(library(types), [
74 must_be/4,
75 illarg/3,
76 illarg/4
77 ]).
78 :- use_module(library(lists), [
79 select/3,
80 is_list/1
81 ]).
82
83 :- set_prolog_flag(double_quotes, codes).
84
85 %@ This is a package for parsing XML with Prolog, which
86 %@ provides Prolog applications with a simple ``Document Value Model''
87 %@ interface to XML documents. A description of the subset of XML that it
88 %@ supports can be found at:
89 %@ @c [PM] 4.1 link updated 2009-08-14
90 %@ @uref{http://www.binding-time.co.uk/xmlpl.html}
91 %@
92 %@ The package, originally written by Binding Time Ltd., is in the public
93 %@ domain and unsupported. To use the package, enter the query:
94 %@
95 %@ @example
96 %@ @group
97 %@ | ?- use_module(library(xml)).
98 %@ @end group
99 %@ @end example
100 %@
101 %@ The package represents XML documents by the abstract data type
102 %@ @var{document}, which is defined by the following grammar:
103 %@
104 %@ @multitable @columnfractions .2 .3 .5
105 %@ @item @var{document} @tab ::= @code{xml(@var{attributes},@var{content})} @tab @r{@{ well-formed document @}}
106 %@ @item @tab | @code{malformed(@var{attributes},@var{content})} @tab @r{@{ malformed document @}}
107 %@ @item
108 %@ @item @var{attributes} @tab ::= @code{[]}
109 %@ @item @tab | @code{[@var{name}=@var{char-data}|@var{attributes}]}
110 %@ @item
111 %@ @item @var{content} @tab ::= @code{[]}
112 %@ @item @tab | @code{[@var{cterm}|@var{content}]}
113 %@ @item
114 %@ @item @var{cterm} @tab ::= @code{pcdata(@var{char-data})} @tab @r{@{ text @}}
115 %@ @item @tab | @code{comment(@var{char-data})} @tab @r{@{ an XML comment @}}
116 %@ @item @tab | @code{namespace(@var{URI},@var{prefix},@var{element})} @tab @r{@{ a Namespace @}}
117 %@ @item @tab | @code{element(@var{tag},@var{attributes},@var{content})} @tab @r{@{ <@var{tag}>..</@var{tag}> encloses @var{content} or <@var{tag} /> if empty @}}
118 %@ @item @tab | @code{instructions(@var{name},@var{char-data})} @tab @r{@{ A PI <? @var{name} @var{char-data} ?> @}}
119 %@ @item @tab | @code{cdata(@var{char-data})} @tab @r{@{ <![CDATA[@var{char-data}]]> @}}
120 %@ @item @tab | @code{doctype(@var{tag},@var{doctype-id})} @tab @r{@{ DTD <!DOCTYPE .. > @}}
121 %@ @item @tab | @code{unparsed(@var{char-data})} @tab @r{@{ text that hasn't been parsed @}}
122 %@ @item @tab | @code{out_of_context(@var{tag})} @tab @r{@{ @var{tag} is not closed @}}
123 %@ @item
124 %@ @item @var{tag} @tab ::= @dfn{atom} @tab @r{@{ naming an element @}}
125 %@ @item
126 %@ @item @var{name} @tab ::= @dfn{atom} @tab @r{@{ not naming an element @}}
127 %@ @item
128 %@ @item @var{URI} @tab ::= @dfn{atom} @tab @r{@{ giving the URI of a namespace @}}
129 %@ @item
130 %@ @item @var{char-data} @tab ::= @dfn{code-list}
131 %@ @item
132 %@ @item @var{doctype-id} @tab ::= @code{public(@var{char-data},@var{char-data})}
133 %@ @item @tab | @code{public(@var{char-data},@var{dtd-literals})}
134 %@ @item @tab | @code{system(@var{char-data})}
135 %@ @item @tab | @code{system(@var{char-data},@var{dtd-literals})}
136 %@ @item @tab | @code{local}
137 %@ @item @tab | @code{local,@var{dtd-literals}}
138 %@ @item
139 %@ @item @var{dtd-literals} @tab ::= @code{[]}
140 %@ @item @tab | @code{[dtd_literal(@var{char-data})|@var{dtd-literals}]}
141 %@ @end multitable
142 %@
143 %@ The following predicates are exported by the package:
144 %@
145 %@ @table @code
146 %@ @item xml_parse(@var{?Chars}, @var{?Document})
147 %@ @itemx xml_parse(@var{?Chars}, @var{?Document}, @var{+Options})
148 %@ @PLXindex {xml_parse/[2,3] (xml)}
149 %@ Either parses @var{Chars}, a @dfn{code-list}, to @var{Document}, a
150 %@ @var{document}. @var{Chars} is not required to represent strictly
151 %@ well-formed XML.
152 %@ Or generates @var{Chars}, a @dfn{code-list},
153 %@ from @var{Document}, a @var{document}.
154 %@ If @var{Document} is not a valid @var{document} term representing
155 %@ well-formed XML, an exception is raised.
156 %@ In the second usage of the predicate, the only option available is @code{format/1}.
157 %@
158 %@ @var{Options} is a list of zero or more of the following, where
159 %@ @var{Boolean} must be @code{true} or @code{false}:
160 %@
161 %@ @table @code
162 %@ @item format(@var{Boolean})
163 %@ @findex format/1 (xml_parse/3 option)
164 %@ Indent the element content (default @code{true}).
165 %@
166 %@ @item extended_characters(@var{Boolean})
167 %@ @findex extended_characters/1 (xml_parse/3 option)
168 %@ Use the extended character entities for XHTML (default @code{true}).
169 %@
170 %@ @item remove_attribute_prefixes(@var{Boolean})
171 %@ @findex remove_attribute_prefixes/1 (xml_parse/3 option)
172 %@ Remove namespace prefixes from attributes when it's the same as the
173 %@ prefix of the parent element (default @code{false}).
174 %@ @end table
175 %@
176 %@ @item xml_subterm(@var{+Term}, @var{?Subterm})
177 %@ @PLXindex {xml_subterm/2 (xml)}
178 %@ Unifies @var{Subterm} with a sub-term of @var{Term}, a
179 %@ @var{document}. This can be especially useful when trying to test or
180 %@ retrieve a deeply-nested subterm from a document.
181 %@
182 %@ @item xml_pp(@var{+Document})
183 %@ @PLXindex {xml_pp/1 (xml)}
184 %@ ``Pretty prints'' @var{Document}, a @var{document}, on the current
185 %@ output stream.
186 %@ @end table
187
188
189 /* xml_parse(+?Chars, ?+Document[, +Controls]) parses Chars to/from a data
190 * structure of the form xml(<atts>, <content>). <atts> is a list of
191 * <atom>=<string> attributes from the (possibly implicit) XML signature of the
192 * document. <content> is a (possibly empty) list comprising occurrences of :
193 *
194 * pcdata(<string>) : Text
195 * comment(<string>) : An xml comment;
196 * element(<tag>,<atts>,<content>) : <tag>..</tag> encloses <content>
197 * : <tag /> if empty
198 * instructions(<atom>, <string>) : Processing <? <atom> <params> ?>"
199 * cdata( <string> ) : <![CDATA[ <string> ]]>
200 * doctype(<atom>, <doctype id>) : DTD <!DOCTYPE .. >
201 *
202 * The conversions are not completely symmetrical, in that weaker XML is
203 * accepted than can be generated. Specifically, in-bound (Chars -> Document)
204 * does not require strictly well-formed XML. Document is instantiated to the
205 * term malformed(Attributes, Content) if Chars does not represent well-formed
206 * XML. The Content of a malformed/2 structure can contain:
207 *
208 * unparsed( <string> ) : Text which has not been parsed
209 * out_of_context( <tag> ) : <tag> is not closed
210 *
211 * in addition to the standard term types.
212 *
213 * Out-bound (Document -> Chars) parsing _does_ require that Document defines
214 * strictly well-formed XML. If an error is detected a 'domain' exception is
215 * raised.
216 *
217 * The domain exception will attempt to identify the particular sub-term in
218 * error and the message will show a list of its ancestor elements in the form
219 * <tag>{(id)}* where <id> is the value of any attribute _named_ id.
220 *
221 * At this release, the Controls applying to in-bound (Chars -> Document)
222 * parsing are:
223 *
224 * extended_characters(<bool>) : Use the extended character
225 * : entities for XHTML (default true)
226 *
227 * format(<bool>) : Strip layouts when no character data
228 * : appears between elements.
229 * : (default true)
230 *
231 * remove_attribute_prefixes( <bool>) : Remove namespace prefixes from
232 * : attributes when it's the same as the
233 * : prefix of the parent element
234 * : (default false).
235 *
236 * [<bool> is one of 'true' or 'false']
237 *
238 * For out-bound (Document -> Chars) parsing, the only available option is:
239 *
240 * format(<Bool>) : Indent the element content
241 * : (default true)
242 *
243 * Different DCGs for input and output are used because input parsing is
244 * more flexible than output parsing. Errors in input are recorded as part
245 * of the data structure. Output parsing throws an exception if the document
246 * is not well-formed, diagnosis tries to identify the specific culprit term.
247 */
248 xml_parse( Chars, Document ) :-
249 xml_parse( Chars, Document, [] ).
250
251 xml_parse( Chars, Document, Controls ) :-
252 Goal = xml_parse(Chars,Document,Controls),
253 % format, extended_characters, remove_attribute_prefixes
254 must_be(Controls, proper_list, Goal, 3),
255 xml_options(Controls, opt(true,true,false), Options, Goal, 3),
256 ( ground( Chars ) ->
257 xml_to_document( Options, Chars, Document )
258 ; ground(Document) ->
259 document_to_xml( Options, Document, Chars )
260 ; illarg(var, Goal, 0)
261 ), !. /*MC*/
262
263 xml_options([], Opt, Opt, _, _) :- !.
264 xml_options([X|L], Opt0, Opt, Goal, ArgNo) :- !,
265 ( callable(X),
266 xml_option(X, Opt0, Opt1) -> true
267 ; illarg(domain(term,xml_parse_option), Goal, ArgNo, X)
268 ),
269 xml_options(L, Opt1, Opt, Goal, ArgNo).
270
271 xml_option(format(X), opt(_,B,C), opt(X,B,C)) :-
272 bool_option(X).
273 xml_option(extended_characters(X), opt(A,_,C), opt(A,X,C)) :-
274 bool_option(X).
275 xml_option(remove_attribute_prefixes(X), opt(A,B,_), opt(A,B,X)) :-
276 bool_option(X).
277
278 bool_option(X) :- var(X), !, fail.
279 bool_option(false).
280 bool_option(true).
281
282 document_to_xml( opt(Format,_,_), Document, Chars ) :-
283 ? ( document_generation(Format, Document, Chars0, [] ) ->
284 Chars = Chars0
285 ;
286 xml_fault( Document, [], Culprit, Path, Message ),
287 throw(xml_parse(Message,Document,Culprit,Path))
288 ).
289
290 :- multifile user:generate_message_hook/3.
291 user:generate_message_hook(xml_parse(Message,Document,Culprit,Path)) --> !,
292 ['XML Parse: ~a in ~q'-[Message,Document],nl,
293 'Culprit: ~q'-[Culprit],nl],
294 ( {Path==[]} -> []
295 ; ['Path: ~s'-[Path],nl]
296 ).
297
298
299 /* xml_subterm( +XMLTerm, ?Subterm ) unifies Subterm with a sub-term of Term.
300 * Note that XMLTerm is a sub-term of itself.
301 */
302 xml_subterm( Term, Term ).
303 xml_subterm( xml(_Attributes, Content), Term ) :-
304 xml_subterm( Content, Term ).
305 xml_subterm( [H|T], Term ) :-
306 ( xml_subterm( H, Term )
307 ; xml_subterm( T, Term )
308 ).
309 xml_subterm( element(_Name,_Attributes,Content), Term ) :-
310 xml_subterm( Content, Term ).
311 xml_subterm( namespace(_URI,_Prefix,Content), Term ) :-
312 xml_subterm( Content, Term ).
313
314 /* xml is intended to be a rather modular module: it should be easy to
315 * build a program that can output XML, but not read it, or vice versa.
316 * Similarly, you may be happy to dispense with diagnosis once you are
317 * sure that your code will only try to make valid calls to xml_parse/2.
318 *
319 * It is intended that the code should be very portable too. Clearly,
320 * some small changes will be needed between platforms, but these should
321 * be limited to xml_utilities. xml_utilities contains most of the shared
322 * code and most of the potentially non-portable code.
323 */
324 /* xml_acquisition.pl : XML -> Document translation.
325 *
326 * $Revision$
327 *
328 */
329
330 /* xml_to_document( +Options, +XML, ?Document ) translates the list of
331 * character codes XML into the Prolog term Document. Options is
332 * the parsed options list.
333 */
334 xml_to_document( Options, XML, Document ) :-
335 initial_context( Options, Context ),
336 debug_format(19,'Parsing XML:~n~s~n',[XML]), %PATCH: LEUSCHEL
337 ( xml_declaration( Attributes0, XML, XML1 ) ->
338 Attributes = Attributes0
339 ;
340 format('No <xml> header declaration~n',[]), %PATCH LEUSCHEL
341 XML1 = XML,
342 Attributes = []
343 ),
344 xml_to_document( XML1, Context, Terms, [], WellFormed ),
345 xml_to_document1( WellFormed, Attributes, Terms, Document ).
346
347 xml_to_document1( true, Attributes, Terms, xml(Attributes, Terms) ).
348 xml_to_document1( false, Attributes, Terms, malformed(Attributes, Terms) ).
349
350 % unparsed( +Unparsed, +Context, ?Terms, ?Residue, ?WellFormed )
351 unparsed( Unparsed, _Context, [unparsed(Unparsed)], [], false ).
352
353 xml_declaration( Attributes ) -->
354 spaces,
355 "<?",
356 nmtoken( xml ),
357 xml_declaration_attributes( Attributes ),
358 spaces,
359 "?>".
360
361 xml_to_document( [], Context, Terms, [], WF ) :-
362 close_context( Context, Terms, WF ).
363 xml_to_document( [Char|Chars], Context, Terms, Residue, WF ) :-
364 ( Char =:= "<" ->
365 xml_markup_structure( Chars, Context, Terms, Residue, WF )
366 ; Char =:= "&" ->
367 entity_reference( Chars, Context, Terms, Residue, WF )
368 ; Char =< " ",
369 \+ space_preserve( Context ) ->
370 layouts( Chars, Context, [Char|T], T, Terms, Residue, WF )
371 ; void_context( Context ) ->
372 unparsed( [Char|Chars], Context, Terms, Residue, WF )
373 ;
374 Terms = [pcdata([Char|Chars1])|Terms1],
375 acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF )
376 ).
377
378 layouts( [], Context, _Plus, _Minus, Terms, [], WF ) :-
379 close_context( Context, Terms, WF ).
380 layouts( [Char|Chars], Context, Plus, Minus, Terms, Residue, WF ) :-
381 ( Char =:= "<" ->
382 xml_markup_structure( Chars, Context, Terms, Residue, WF )
383 ; Char =:= "&" ->
384 entity_reference( Chars, Context, Terms, Residue, WF )
385 ; Char =< " " ->
386 Minus = [Char|Minus1],
387 layouts( Chars, Context, Plus, Minus1, Terms, Residue, WF )
388 ; void_context( Context ) ->
389 unparsed( [Char|Chars], Context, Terms, Residue, WF )
390 ;
391 Terms = [pcdata(Plus)|Terms1],
392 Minus = [Char|Chars1],
393 context_update( space_preserve, Context, true, Context1 ),
394 acquire_pcdata( Chars, Context1, Chars1, Terms1, Residue, WF )
395 ).
396
397 acquire_pcdata( [], Context, [], Terms, [], WF ) :-
398 close_context( Context, Terms, WF ).
399 acquire_pcdata( [Char|Chars], Context, Chars1, Terms, Residue, WF ) :-
400 ( Char =:= "<" ->
401 Chars1 = [],
402 xml_markup_structure( Chars, Context, Terms, Residue, WF )
403 ; Char =:= "&" ->
404 reference_in_pcdata( Chars, Context, Chars1, Terms, Residue, WF )
405 ;
406 Chars1 = [Char|Chars2],
407 acquire_pcdata( Chars, Context, Chars2, Terms, Residue, WF )
408 ).
409
410 xml_markup_structure( [], Context, Terms, Residue, WF ) :-
411 unparsed( "<", Context, Terms, Residue, WF ).
412 xml_markup_structure( Chars, Context, Terms, Residue, WF ) :-
413 Chars = [Char|Chars1],
414 ( Char =:= "/" ->
415 closing_tag( Context, Chars1, Terms, Residue, WF )
416 ; Char =:= "?" ->
417 pi_acquisition( Chars1, Context, Terms, Residue, WF )
418 ; Char =:= "!" ->
419 declaration_acquisition( Chars1, Context, Terms, Residue, WF )
420 ; open_tag(Tag,Context,Attributes,Type, Chars, Chars2 ) ->
421 push_tag( Tag, Chars2, Context, Attributes, Type, Terms, Residue, WF )
422 ;
423 unparsed( [0'<|Chars], Context, Terms, Residue, WF ) %'
424 ).
425
426 push_tag( Tag, Chars, Context, Attributes, Type, Terms, Residue, WF ) :-
427 new_element(Tag, Chars, Context, Attributes, Type, Term, Rest, WF0),
428 push_tag1( WF0, Context, Term, Rest, Terms, Residue, WF ).
429
430 push_tag1( true, Context, Term, Chars, [Term|Terms], Residue, WF ) :-
431 xml_to_document( Chars, Context, Terms, Residue, WF ).
432 push_tag1( false, _Context, Term, Chars, [Term], Chars, false ).
433
434 new_element( TagChars, Chars, Context, Attributes0, Type, Term, Residue, WF ) :-
435 namespace_attributes( Attributes0, Context, Context1, Attributes1 ),
436 ( append( NSChars, [0':|TagChars1], TagChars ), %'
437 specific_namespace( NSChars, Context1, SpecificNamespace ) ->
438 Namespace0 = SpecificNamespace
439 ;
440 NSChars = "",
441 TagChars1 = TagChars,
442 default_namespace( Context1, Namespace0 )
443 ),
444 current_namespace( Context1, CurrentNamespace ),
445 ( Namespace0 == CurrentNamespace ->
446 Term = element(Tag, Attributes, Contents),
447 Context2 = Context1
448 ;
449 Term = namespace( Namespace0, NSChars,
450 element(Tag, Attributes, Contents)
451 ),
452 context_update( current_namespace, Context1, Namespace0, Context2 )
453 ),
454 input_attributes( Attributes1, Context2, Attributes ),
455 atom_codes( Tag, TagChars1 ),
456 close_tag( Type, Chars, Context2, Contents, Residue, WF ).
457
458 close_tag( empty, Residue, _Context, [], Residue, true ).
459 close_tag( push(Tag), Chars, Context0, Contents, Residue, WF ) :-
460 context_update( element, Context0, Tag, Context1 ),
461 xml_to_document( Chars, Context1, Contents, Residue, WF ).
462
463 pi_acquisition( Chars, Context, Terms, Residue, WellFormed ) :-
464 ( inline_instruction(Target, Processing, Chars, Rest ),
465 Target \== xml ->
466 Terms = [instructions(Target, Processing)|Terms1],
467 xml_to_document( Rest, Context, Terms1, Residue, WellFormed )
468 ;
469 unparsed( [0'<,0'?|Chars], Context, Terms, Residue, WellFormed )
470 ).
471
472 declaration_acquisition( Chars, Context, Terms, Residue, WF ) :-
473 ( declaration_type( Chars, Type, Chars1 ),
474 declaration_parse( Type, Context, Term, Context1, Chars1, Rest ) ->
475 Terms = [Term|Terms1],
476 xml_to_document( Rest, Context1, Terms1, Residue, WF )
477 ;
478 unparsed( [0'<,0'!|Chars], Context, Terms, Residue, WF )
479 ).
480
481 open_tag( Tag, Namespaces, Attributes, Termination ) -->
482 nmtoken_chars( Tag ),
483 attributes( Attributes, [], Namespaces ),
484 spaces,
485 (open_tag_terminator( Tag, Termination ) -> []
486 ; {atom_codes(ATag,Tag), add_xml_warning('XML tag not properly terminated with >: ',ATag),fail}
487 ).
488
489 open_tag_terminator( Tag, push(Tag) ) -->
490 ">".
491 open_tag_terminator( _Tag, empty ) -->
492 "/>".
493
494 declaration_parse( comment, Namespaces, comment(Comment), Namespaces ) -->
495 comment(Comment).
496 declaration_parse( cdata, Namespaces, cdata(CData), Namespaces ) -->
497 cdata( CData ).
498 declaration_parse( doctype, Namespaces0, doctype(Name, Names), Namespaces ) -->
499 doctype( Name, Names, Namespaces0, Namespaces ),
500 spaces,
501 ">".
502
503 inline_instruction( Target, Processing, Plus, Minus ) :-
504 nmtoken(Target, Plus, Mid0 ),
505 spaces( Mid0, Mid1 ),
506 append( Processing, [0'?,0'>|Minus], Mid1 ),
507 !.
508
509 entity_reference_name( Reference ) -->
510 nmtoken_chars( Reference ),
511 ";".
512
513 declaration_type( [Char1,Char2|Chars1], Class, Rest ) :-
514 Chars = [Char1,Char2|Chars1],
515 ( declaration_type1( Char1, Char2, Chars1, Class0, Residue ) ->
516 Class = Class0,
517 Rest = Residue
518 ;
519 Class = generic,
520 Rest = Chars
521 ).
522
523 declaration_type1( 0'-, 0'-, Chars, comment, Chars ).
524 declaration_type1( 0'[, 0'C, Chars, cdata, Residue ) :-
525 append( "DATA[", Residue, Chars ).
526 declaration_type1( 0'D, 0'O, Chars, doctype, Residue ) :-
527 append( "CTYPE", Residue, Chars ).
528
529 closing_tag( Context, Chars, Terms, Residue, WellFormed ) :-
530 ( closing_tag_name( Tag, Chars, Rest ),
531 current_tag( Context, Tag ) ->
532 Terms = [],
533 Residue = Rest,
534 WellFormed = true
535 ;
536 unparsed( [0'<,0'/|Chars], Context, Terms, Residue, WellFormed )
537 ).
538
539 closing_tag_name( Tag ) -->
540 nmtoken_chars( Tag ),
541 spaces,
542 ">".
543
544 entity_reference( Chars, Context, Terms, Residue, WF ) :-
545 ( standard_character_entity( Char, Chars, Rest ) ->
546 Terms = [pcdata([Char|Chars1])|Terms1],
547 acquire_pcdata( Rest, Context, Chars1, Terms1, Residue, WF )
548 ; entity_reference_name( Reference, Chars, Rest ),
549 defined_entity( Reference, Context, String ) ->
550 append( String, Rest, Full ),
551 xml_to_document( Full, Context, Terms, Residue, WF )
552 ;
553 unparsed( [0'&|Chars], Context, Terms, Residue, WF ) %'
554 ).
555
556 reference_in_pcdata( Chars0, Context, Chars1, Terms, Residue, WF ) :-
557 ( standard_character_entity(Char, Chars0, Rest ) ->
558 Chars1 = [Char|Chars2],
559 acquire_pcdata( Rest, Context, Chars2, Terms, Residue, WF )
560 ; entity_reference_name(Reference, Chars0, Rest ),
561 defined_entity( Reference, Context, String ) ->
562 append( String, Rest, Full ),
563 acquire_pcdata( Full, Context, Chars1, Terms, Residue, WF )
564 ;
565 Chars1 = [],
566 unparsed( [0'&|Chars0], Context, Terms, Residue, WF ) %'
567 ).
568
569 namespace_attributes( [], Context, Context, [] ).
570 namespace_attributes( Attributes0, Context0, Context, Attributes ) :-
571 Attributes0 = [_|_],
572 append( "xmlns:", Unqualified, QualifiedNameChars ),
573 ( select( "xmlns"=Value, Attributes0, Attributes1 ) ->
574 atom_codes( URI, Value ),
575 context_update( default_namespace, Context0, URI, Context1 ),
576 namespace_attributes( Attributes1, Context1, Context, Attributes )
577 ; select( QualifiedNameChars=Value, Attributes0, Attributes1 ) ->
578 Attributes = [QualifiedNameChars=Value|Attributes2],
579 atom_codes( URI, Value ),
580 context_update( ns_prefix(Unqualified), Context0, URI, Context1 ),
581 namespace_attributes( Attributes1, Context1, Context, Attributes2 )
582 ; member( "xml:space"="preserve", Attributes0 ) ->
583 Attributes = Attributes0,
584 context_update( space_preserve, Context0, true, Context )
585 ;
586 Context = Context0,
587 Attributes = Attributes0
588 ).
589
590 input_attributes( [], _Context, [] ).
591 input_attributes( [NameChars=Value|Attributes0], Context,
592 [Name=Value|Attributes] ) :-
593 ( remove_attribute_prefixes( Context ),
594 append( NSChars, [0':|NameChars1], NameChars ), %'
595 NSChars \== "xmlns",
596 specific_namespace( NSChars, Context, Namespace ),
597 current_namespace( Context, Namespace ) ->
598 atom_codes( Name, NameChars1 )
599 ;
600 atom_codes( Name, NameChars )
601 ),
602 input_attributes( Attributes0, Context, Attributes ).
603
604 attributes( Attributes, Seen, Namespaces ) -->
605 attributes_count_lf(0, Attributes, Seen, Namespaces ). % PATCH LEUSCHEL: add count parameter
606
607 attributes_count_lf(Count0, [Name=Value|Attributes], Seen, Namespaces ) -->
608 spaces_count_linebreaks(Count0,C1),
609 nmtoken_chars( Name ),
610 {\+ member(Name, Seen)},
611 spaces_count_linebreaks(C1,C2),
612 expect_tokens("=","attributes"),
613 spaces_count_linebreaks(C2,C3),
614 attribute_value( Value, Namespaces, Name ),
615 attributes_count_lf( C3, Attributes, [Name|Seen], Namespaces ).
616 attributes_count_lf( 0, [], _Seen, _Namespaces ) --> !, "". % ,{print(no_linefeeds),nl}.
617 attributes_count_lf( Count, ["$attribute_linefeeds"=Count], _Seen, _Namespaces ) --> "". %, {print(linefeeds(Count)),nl}.
618
619 spaces_count_linebreaks(C,C, [], [] ).
620 spaces_count_linebreaks(Count,ResCount, [Char|Chars0], Chars1 ) :-
621 ( Char=10 ->
622 Count1 is Count+1, spaces_count_linebreaks(Count1, ResCount, Chars0, Chars1 )
623 ; Char =< 32 ->
624 spaces_count_linebreaks(Count,ResCount, Chars0, Chars1 )
625 ;
626 Count=ResCount, Chars1 = [Char|Chars0]
627 ).
628 % END PATCH
629
630 xml_declaration_attributes( [] ) --> "".
631 xml_declaration_attributes( Res ) -->
632 spaces,
633 nmtoken( Name ),
634 spaces,
635 expect_tokens("=","declaration attribute"),
636 spaces,
637 xml_string( Value ),
638 {xml_declaration_attribute_valid(Name, Value)
639 -> Res = [Name-Value|Attributes] ; Res = [Attributes]}, % PATCH LEUSCHEL: also continue with invalid declarations
640 xml_declaration_attributes( Attributes ),
641 spaces.
642
643 doctype( Name, External, Namespaces0, Namespaces1 ) -->
644 spaces,
645 nmtoken( Name ),
646 spaces,
647 doctype_id( External0 ),
648 spaces,
649 doctype1( Namespaces0, Literals, Namespaces1 ),
650 {doctype_extension(Literals, External0, External)}.
651
652 doctype_extension( [], External, External ).
653 doctype_extension( [Literal|Literals], External0, External ) :-
654 extended_doctype( External0, [Literal|Literals], External ).
655
656 extended_doctype( system(URL), Literals, system(URL,Literals) ).
657 extended_doctype( public(URN,URL), Literals, public(URN,URL,Literals) ).
658 extended_doctype( local, Literals, local(Literals) ).
659
660 doctype1( Namespaces0, Literals, Namespaces1 ) -->
661 "[",
662 !,
663 dtd( Namespaces0, Literals, Namespaces1 ),
664 expect_tokens("]","doctype").
665 doctype1( Namespaces, [], Namespaces ) --> "".
666
667 doctype_id( system(URL) ) -->
668 "SYSTEM",
669 spaces,
670 uri( URL ).
671 doctype_id( public(URN,URL) ) -->
672 "PUBLIC",
673 spaces,
674 uri( URN ),
675 spaces,
676 uri( URL ).
677 doctype_id( local ) --> "".
678
679 dtd( Namespaces0, Literals, Namespaces1 ) -->
680 spaces,
681 "<!ENTITY",
682 !,
683 spaces,
684 nmtoken_chars( Name ),
685 spaces,
686 quote( Quote ),
687 entity_value( Quote, Namespaces0, String ),
688 spaces,
689 expect_tokens(">","ENTITY"),
690 {\+ character_entity( Name, _StandardChar ),
691 % Don't allow < "e; etc. to be updated
692 context_update( entity(Name), Namespaces0, String, Namespaces2 )
693 },
694 dtd( Namespaces2, Literals, Namespaces1 ).
695 dtd( Namespaces0, Literals, Namespaces1 ) -->
696 spaces,
697 "<!--",
698 !,
699 dtd_comment,
700 expect_tokens(">","comment"),
701 dtd( Namespaces0, Literals, Namespaces1 ).
702 dtd( Namespaces0, [dtd_literal(Literal)|Literals], Namespaces1 ) -->
703 spaces,
704 "<!",
705 !,
706 dtd_literal( Literal ),
707 dtd( Namespaces0, Literals, Namespaces1 ).
708 dtd( Namespaces, [], Namespaces ) --> spaces.
709
710 dtd_literal( [] ) --> ">", !.
711 dtd_literal( Chars ) -->
712 "--",
713 !,
714 dtd_comment,
715 dtd_literal( Chars ).
716 dtd_literal( [Char|Chars] ) -->
717 [Char],
718 dtd_literal( Chars ).
719
720 dtd_comment( Plus, Minus ) :-
721 append( _Chars, [0'-,0'-|Minus], Plus ),
722 !.
723
724 entity_value( Quote, Namespaces, String, [Char|Plus], Minus ) :-
725 ( Char == Quote ->
726 String = [],
727 Minus = Plus
728 ; Char =:= "&" ->
729 reference_in_entity( Namespaces, Quote, String, Plus, Minus )
730 ;
731 String = [Char|String1],
732 entity_value( Quote, Namespaces, String1, Plus, Minus )
733 ).
734
735 attribute_value( String, Namespaces, Name ) -->
736 (quote( Quote ) -> []
737 ; {atom_codes(AN,Name), add_xml_warning('Expecting starting quotes for value of XML attribute: ',AN),fail}),
738 attribute_leading_layouts( Quote, Namespaces, String ).
739
740 attribute_leading_layouts( _Quote, _Namespace, [], [], [] ).
741 attribute_leading_layouts( Quote, Namespaces, String, [Char|Plus], Minus ) :-
742 ( Char == Quote ->
743 String = [],
744 Minus = Plus
745 ; Char =:= "&" ->
746 reference_in_layout( Namespaces, Quote, String, Plus, Minus )
747 ; Char > 32, Char \== 160 ->
748 String = [Char|String1],
749 attribute_layouts( Quote, Namespaces, false, String1, Plus, Minus )
750 ;
751 attribute_leading_layouts( Quote, Namespaces, String, Plus, Minus )
752 ).
753
754 attribute_layouts( _Quote, _Namespaces, _Layout, [], [], [] ).
755 attribute_layouts( Quote, Namespaces, Layout, String, [Char|Plus], Minus ) :-
756 ( Char == Quote ->
757 String = [],
758 Minus = Plus
759 ; Char =:= "&" ->
760 reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus )
761 ; Char > 32, Char \== 160 ->
762 ( Layout == true ->
763 String = [0' ,Char|String1] %'
764 ;
765 String = [Char|String1]
766 ),
767 attribute_layouts( Quote, Namespaces, false, String1, Plus, Minus )
768 ;
769 attribute_layouts( Quote, Namespaces, true, String, Plus, Minus )
770 ).
771
772 reference_in_layout( NS, Quote, String, Plus, Minus ) :-
773 ( standard_character_entity( Char, Plus, Mid ) ->
774 String = [Char|String1],
775 attribute_layouts( Quote, NS, false, String1, Mid, Minus )
776 ; entity_reference_name( Name, Plus, Suffix ),
777 defined_entity( Name, NS, Text ) ->
778 append( Text, Suffix, Mid ),
779 attribute_leading_layouts( Quote, NS, String, Mid, Minus )
780 ; % Just & is okay in a value
781 String = [0'&|String1], %'
782 attribute_layouts( Quote, NS, false, String1, Plus, Minus )
783 ).
784
785 reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus ) :-
786 ( standard_character_entity( Char, Plus, Mid ) ->
787 ( Layout == true ->
788 String = [0' ,Char|String1] %'
789 ;
790 String = [Char|String1]
791 ),
792 Layout1 = false
793 ; entity_reference_name( Name, Plus, Suffix ),
794 defined_entity( Name, Namespaces, Text ) ->
795 String = String1,
796 append( Text, Suffix, Mid ),
797 Layout1 = Layout
798 ; % Just & is okay in a value
799 Mid = Plus,
800 String = [0'&|String1], %'
801 Layout1 = false
802 ),
803 attribute_layouts( Quote, Namespaces, Layout1, String1, Mid, Minus ).
804
805 /* References are resolved backwards in Entity defintions so that
806 * circularity is avoided.
807 */
808 reference_in_entity( Namespaces, Quote, String, Plus, Minus ) :-
809 ( standard_character_entity( _SomeChar, Plus, _Rest ) ->
810 String = [0'&|String1], % ' Character entities are unparsed
811 Mid = Plus
812 ; entity_reference_name( Name, Plus, Suffix ),
813 defined_entity( Name, Namespaces, Text ) ->
814 String = String1,
815 append( Text, Suffix, Mid )
816 ),
817 entity_value( Quote, Namespaces, String1, Mid, Minus ).
818
819 standard_character_entity( Char ) -->
820 "#x", hex_character_reference( Char ), ";".
821 standard_character_entity( Char ) -->
822 "#", digit( Digit ), digits( Digits ), ";",
823 {number_codes( Char, [Digit|Digits])}.
824 standard_character_entity( C ) -->
825 chars( String ),
826 ";",
827 !,
828 {character_entity(String, C)}.
829
830 uri( URI ) -->
831 quote( Quote ),
832 uri1( Quote, URI ).
833
834 uri1( Quote, [] ) -->
835 quote( Quote ),
836 !.
837 uri1( Quote, [Char|Chars] ) -->
838 [Char],
839 uri1( Quote, Chars ).
840
841 comment( Chars, Plus, Minus ) :-
842 append( Chars, [0'-,0'-,0'>|Minus], Plus ), %'
843 !.
844
845 cdata( Chars, Plus, Minus ) :-
846 append( Chars, [0'],0'],0'>|Minus], Plus ), %'
847 !.
848 % Syntax Components
849
850 hex_character_reference( Code ) -->
851 hex_character_reference1( 0, Code ).
852
853 hex_character_reference1( Current, Code ) -->
854 hex_digit_char( Value ),
855 !,
856 {New is (Current << 4) + Value},
857 hex_character_reference1( New, Code ).
858 hex_character_reference1( Code, Code ) --> "".
859
860 hex_digit_char( 0 ) --> "0".
861 hex_digit_char( 1 ) --> "1".
862 hex_digit_char( 2 ) --> "2".
863 hex_digit_char( 3 ) --> "3".
864 hex_digit_char( 4 ) --> "4".
865 hex_digit_char( 5 ) --> "5".
866 hex_digit_char( 6 ) --> "6".
867 hex_digit_char( 7 ) --> "7".
868 hex_digit_char( 8 ) --> "8".
869 hex_digit_char( 9 ) --> "9".
870 hex_digit_char( 10 ) --> "A".
871 hex_digit_char( 11 ) --> "B".
872 hex_digit_char( 12 ) --> "C".
873 hex_digit_char( 13 ) --> "D".
874 hex_digit_char( 14 ) --> "E".
875 hex_digit_char( 15 ) --> "F".
876 hex_digit_char( 10 ) --> "a".
877 hex_digit_char( 11 ) --> "b".
878 hex_digit_char( 12 ) --> "c".
879 hex_digit_char( 13 ) --> "d".
880 hex_digit_char( 14 ) --> "e".
881 hex_digit_char( 15 ) --> "f".
882
883 quote( 0'" ) --> %'
884 """".
885 quote( 0'\' ) -->
886 "'".
887
888 spaces( [], [] ).
889 spaces( [Char|Chars0], Chars1 ) :-
890 ( Char =< 32 ->
891 spaces( Chars0, Chars1 )
892 ;
893 Chars1 = [Char|Chars0]
894 ).
895
896 nmtoken( Name ) -->
897 nmtoken_chars( Chars ),
898 {atom_codes(Name, Chars)}.
899
900 nmtoken_chars( [Char|Chars] ) -->
901 [Char],
902 {nmtoken_first( Char )},
903 nmtoken_chars_tail( Chars ).
904
905 nmtoken_chars_tail( [Char|Chars] ) -->
906 [Char],
907 {nmtoken_char(Char)},
908 !,
909 nmtoken_chars_tail( Chars ).
910 nmtoken_chars_tail([]) --> "".
911
912 nmtoken_first( 0': ).
913 nmtoken_first( 0'_ ).
914 nmtoken_first( Char ) :-
915 alphabet( Char ).
916
917 nmtoken_char( 0'a ).
918 nmtoken_char( 0'b ).
919 nmtoken_char( 0'c ).
920 nmtoken_char( 0'd ).
921 nmtoken_char( 0'e ).
922 nmtoken_char( 0'f ).
923 nmtoken_char( 0'g ).
924 nmtoken_char( 0'h ).
925 nmtoken_char( 0'i ).
926 nmtoken_char( 0'j ).
927 nmtoken_char( 0'k ).
928 nmtoken_char( 0'l ).
929 nmtoken_char( 0'm ).
930 nmtoken_char( 0'n ).
931 nmtoken_char( 0'o ).
932 nmtoken_char( 0'p ).
933 nmtoken_char( 0'q ).
934 nmtoken_char( 0'r ).
935 nmtoken_char( 0's ).
936 nmtoken_char( 0't ).
937 nmtoken_char( 0'u ).
938 nmtoken_char( 0'v ).
939 nmtoken_char( 0'w ).
940 nmtoken_char( 0'x ).
941 nmtoken_char( 0'y ).
942 nmtoken_char( 0'z ).
943 nmtoken_char( 0'A ).
944 nmtoken_char( 0'B ).
945 nmtoken_char( 0'C ).
946 nmtoken_char( 0'D ).
947 nmtoken_char( 0'E ).
948 nmtoken_char( 0'F ).
949 nmtoken_char( 0'G ).
950 nmtoken_char( 0'H ).
951 nmtoken_char( 0'I ).
952 nmtoken_char( 0'J ).
953 nmtoken_char( 0'K ).
954 nmtoken_char( 0'L ).
955 nmtoken_char( 0'M ).
956 nmtoken_char( 0'N ).
957 nmtoken_char( 0'O ).
958 nmtoken_char( 0'P ).
959 nmtoken_char( 0'Q ).
960 nmtoken_char( 0'R ).
961 nmtoken_char( 0'S ).
962 nmtoken_char( 0'T ).
963 nmtoken_char( 0'U ).
964 nmtoken_char( 0'V ).
965 nmtoken_char( 0'W ).
966 nmtoken_char( 0'X ).
967 nmtoken_char( 0'Y ).
968 nmtoken_char( 0'Z ).
969 nmtoken_char( 0'0 ).
970 nmtoken_char( 0'1 ).
971 nmtoken_char( 0'2 ).
972 nmtoken_char( 0'3 ).
973 nmtoken_char( 0'4 ).
974 nmtoken_char( 0'5 ).
975 nmtoken_char( 0'6 ).
976 nmtoken_char( 0'7 ).
977 nmtoken_char( 0'8 ).
978 nmtoken_char( 0'9 ).
979 nmtoken_char( 0'. ).
980 nmtoken_char( 0'- ).
981 nmtoken_char( 0'_ ).
982 nmtoken_char( 0': ).
983
984 xml_string( String ) -->
985 quote( Quote ),
986 xml_string1( Quote, String ).
987
988 xml_string1( Quote, [] ) -->
989 quote( Quote ),
990 !.
991 xml_string1( Quote, [Char|Chars] ) -->
992 [Char],
993 xml_string1( Quote, Chars ).
994
995 alphabet( 0'a ).
996 alphabet( 0'b ).
997 alphabet( 0'c ).
998 alphabet( 0'd ).
999 alphabet( 0'e ).
1000 alphabet( 0'f ).
1001 alphabet( 0'g ).
1002 alphabet( 0'h ).
1003 alphabet( 0'i ).
1004 alphabet( 0'j ).
1005 alphabet( 0'k ).
1006 alphabet( 0'l ).
1007 alphabet( 0'm ).
1008 alphabet( 0'n ).
1009 alphabet( 0'o ).
1010 alphabet( 0'p ).
1011 alphabet( 0'q ).
1012 alphabet( 0'r ).
1013 alphabet( 0's ).
1014 alphabet( 0't ).
1015 alphabet( 0'u ).
1016 alphabet( 0'v ).
1017 alphabet( 0'w ).
1018 alphabet( 0'x ).
1019 alphabet( 0'y ).
1020 alphabet( 0'z ).
1021 alphabet( 0'A ).
1022 alphabet( 0'B ).
1023 alphabet( 0'C ).
1024 alphabet( 0'D ).
1025 alphabet( 0'E ).
1026 alphabet( 0'F ).
1027 alphabet( 0'G ).
1028 alphabet( 0'H ).
1029 alphabet( 0'I ).
1030 alphabet( 0'J ).
1031 alphabet( 0'K ).
1032 alphabet( 0'L ).
1033 alphabet( 0'M ).
1034 alphabet( 0'N ).
1035 alphabet( 0'O ).
1036 alphabet( 0'P ).
1037 alphabet( 0'Q ).
1038 alphabet( 0'R ).
1039 alphabet( 0'S ).
1040 alphabet( 0'T ).
1041 alphabet( 0'U ).
1042 alphabet( 0'V ).
1043 alphabet( 0'W ).
1044 alphabet( 0'X ).
1045 alphabet( 0'Y ).
1046 alphabet( 0'Z ).
1047
1048 digit( C ) --> [C], {digit_table( C )}.
1049
1050 digit_table( 0'0 ).
1051 digit_table( 0'1 ).
1052 digit_table( 0'2 ).
1053 digit_table( 0'3 ).
1054 digit_table( 0'4 ).
1055 digit_table( 0'5 ).
1056 digit_table( 0'6 ).
1057 digit_table( 0'7 ).
1058 digit_table( 0'8 ).
1059 digit_table( 0'9 ).
1060
1061 digits( [Digit|Digits] ) -->
1062 digit( Digit ),
1063 digits( Digits ).
1064 digits( [] ) --> [].
1065
1066 character_entity( "quot", 0'" ). %'
1067 character_entity( "amp", 0'& ). %'
1068 character_entity( "lt", 0'< ). %'
1069 character_entity( "gt", 0'> ). %'
1070 character_entity( "apos", 0'\' ).
1071 /* xml_diagnosis.pl : XML exception diagnosis.
1072 *
1073 * $Revision$
1074 */
1075
1076 /* xml_fault( +Term, +Indentation, ?SubTerm, ?Path, ?Message ) identifies SubTerm
1077 * as a sub-term of Term which cannot be serialized after Indentation.
1078 * Message is an atom naming the type of error; Path is a string encoding a
1079 * list of SubTerm's ancestor elements in the form <tag>{(id)}* where <tag> is the
1080 * element tag and <id> is the value of any attribute _named_ id.
1081 */
1082 xml_fault( Term, _Indent, Term, [], 'Illegal Variable' ) :-
1083 var( Term ).
1084 xml_fault( xml(Attributes,_Content), _Indent, Term, [], Message ) :-
1085 member( Attribute, Attributes ),
1086 attribute_fault( Attribute, Term, Message ).
1087 xml_fault( xml(_Attributes,Content), Indent, Culprit, Path, Message ) :-
1088 xml_content_fault( Content, Indent, Culprit, Path, Message ).
1089 xml_fault( Term, _Indent, Term, [], 'Illegal Term' ).
1090
1091 xml_content_fault( Term, _Indent, Term, [], 'Illegal Variable' ) :-
1092 var( Term ).
1093 xml_content_fault( pcdata(Chars), _Indent, Chars, [], 'Invalid Character Data' ) :-
1094 \+ is_chars( Chars ).
1095 xml_content_fault( cdata(Chars), _Indent, Chars, [], 'Invalid Character Data' ) :-
1096 \+ is_chars( Chars ).
1097 xml_content_fault( [H|_T], Indent, Culprit, Path, Message ) :-
1098 xml_content_fault( H, Indent, Culprit, Path, Message ).
1099 xml_content_fault( [_H|T], Indent, Culprit, Path, Message ) :-
1100 xml_content_fault( T, Indent, Culprit, Path, Message ).
1101 xml_content_fault( namespace(_URI,_Prefix,Element), Indent, Culprit, Path, Message ) :-
1102 element_fault( Element, [0' |Indent], Culprit, Path, Message ).
1103 xml_content_fault( Element, Indent, Culprit, Path, Message ) :-
1104 element_fault( Element, [0' |Indent], Culprit, Path, Message ).
1105 xml_content_fault( Term, Indent, Term, [], 'Illegal Term' ) :-
1106 \+ generation(Term, "", false, Indent, _Format, _Plus, _Minus ).
1107
1108 element_fault( element(Tag, _Attributes, _Contents), _Indent, Tag, [], 'Tag must be an atom' ) :-
1109 \+ atom( Tag ).
1110 element_fault( element(Tag, Attributes, _Contents), _Indent, Tag, [], 'Attributes must be instantiated' ) :-
1111 var( Attributes ).
1112 element_fault( element(Tag, Attributes, _Contents), _Indent, Faulty, Path, Message ) :-
1113 fault_path( Tag, Attributes, Path, [] ),
1114 member( Attribute, Attributes ),
1115 attribute_fault( Attribute, Faulty, Message ).
1116 element_fault( element(Tag, Attributes, Contents), Indent, Culprit, Path, Message ) :-
1117 fault_path( Tag, Attributes, Path, Path1 ),
1118 xml_content_fault( Contents, Indent, Culprit, Path1, Message ).
1119
1120 attribute_fault( Attribute, Attribute, 'Illegal Variable' ) :-
1121 var( Attribute ).
1122 attribute_fault( Name=Value, Name=Value, 'Attribute Name must be atom' ) :-
1123 \+ atom(Name).
1124 attribute_fault( Name=Value, Name=Value, 'Attribute Value must be chars' ) :-
1125 \+ is_chars( Value ).
1126 attribute_fault( Attribute, Attribute, 'Malformed Attribute' ) :-
1127 Attribute \= (_Name=_Value).
1128
1129 is_chars( Chars ) :-
1130 is_list( Chars ),
1131 \+ (member( Char, Chars ), \+ (integer(Char), Char >=0, Char =< 255)).
1132
1133 fault_path( Tag, Attributes ) -->
1134 {atom_codes( Tag, Chars )},
1135 chars( Chars ),
1136 fault_id( Attributes ),
1137 " ".
1138
1139 fault_id( Attributes ) -->
1140 {member( id=Chars, Attributes ), is_chars( Chars )},
1141 !,
1142 "(", chars(Chars), ")".
1143 fault_id( _Attributes ) --> "".
1144 /* xml_generation.pl : Document -> XML translation
1145 *
1146 * $Revision$
1147 */
1148
1149 /* document_generation( +Format, +Document ) is a DCG generating Document
1150 * as a list of character codes. Format is true|false defining whether layouts,
1151 * to provide indentation, should be added between the element content of
1152 * the resultant "string". Note that formatting is disabled for elements that
1153 * are interspersed with pcdata/1 terms, such as XHTML's 'inline' elements.
1154 * Also, Format is over-ridden, for an individual element, by an explicit
1155 * 'xml:space'="preserve" attribute.
1156 */
1157 document_generation( Format, xml(Attributes, Document) ) -->
1158 ? document_generation_body( Attributes, Format, Document ).
1159
1160 document_generation_body( [], Format, Document ) -->
1161 generation( Document, "", Format, [], _Format1 ).
1162 document_generation_body( Attributes, Format, Document ) -->
1163 { Attributes = [_|_],
1164 ? xml_declaration_attributes_valid( Attributes )
1165 },
1166 "<?xml",
1167 generated_attributes( Attributes, Format, Format0 ),
1168 "?>",
1169 indent( true, [] ),
1170 generation( Document, "", Format0, [], _Format1 ).
1171
1172 generation( [], _Prefix, Format, _Indent, Format ) --> [].
1173 generation( [Term|Terms], Prefix, Format0, Indent, Format ) -->
1174 generation( Term, Prefix, Format0, Indent, Format1 ),
1175 generation( Terms, Prefix, Format1, Indent, Format ).
1176 generation( doctype(Name, External), _Prefix, Format, [], Format ) -->
1177 "<!DOCTYPE ",
1178 generated_name( Name ),
1179 generated_external_id( External ),
1180 ">".
1181 generation( instructions(Target,Process), _Prefix, Format, Indent, Format ) -->
1182 indent( Format, Indent ),
1183 "<?", generated_name(Target), " ", chars( Process ) ,"?>".
1184 generation( pcdata(Chars), _Prefix, _Format, _Indent, false ) -->
1185 pcdata_generation( Chars ).
1186 generation( comment( Comment ), _Prefix, Format, Indent, Format ) -->
1187 indent( Format, Indent ),
1188 "<!--", chars( Comment ), "-->".
1189 generation( namespace(URI, Prefix, element(Name, Atts, Content)),
1190 _Prefix0, Format, Indent, Format ) -->
1191 indent( Format, Indent ),
1192 "<", generated_prefixed_name( Prefix, Name ),
1193 generated_prefixed_attributes( Prefix, URI, Atts, Format, Format1 ),
1194 generated_content( Content, Format1, Indent, Prefix, Name ).
1195 generation( element(Name, Atts, Content), Prefix, Format, Indent, Format ) -->
1196 indent( Format, Indent ),
1197 "<", generated_prefixed_name( Prefix, Name ),
1198 generated_attributes( Atts, Format, Format1 ),
1199 generated_content( Content, Format1, Indent, Prefix, Name ).
1200 generation( cdata(CData), _Prefix, Format, Indent, Format ) -->
1201 indent( Format, Indent ),
1202 "<![CDATA[", cdata_generation(CData), "]]>".
1203
1204 generated_attributes( [], Format, Format ) --> [].
1205 generated_attributes( [Name=Value|Attributes], Format0, Format ) -->
1206 {( Name == 'xml:space',
1207 Value="preserve" ->
1208 Format1 = false
1209 ;
1210 Format1 = Format0
1211 )},
1212 " ",
1213 generated_name( Name ),
1214 "=""",
1215 quoted_string( Value ),
1216 """",
1217 generated_attributes( Attributes, Format1, Format ).
1218
1219 generated_prefixed_name( [], Name ) -->
1220 generated_name( Name ).
1221 generated_prefixed_name( Prefix, Name ) -->
1222 {Prefix = [_|_]},
1223 chars( Prefix ), ":",
1224 generated_name( Name ).
1225
1226 generated_content( [], _Format, _Indent, _Prefix, _Namespace ) -->
1227 " />". % Leave an extra space for XHTML output.
1228 generated_content( [H|T], Format, Indent, Prefix, Namespace ) -->
1229 ">",
1230 generation( H, Prefix, Format, [0' |Indent], Format1 ),
1231 generation( T, Prefix, Format1, [0' |Indent], Format2 ),
1232 indent( Format2, Indent ),
1233 "</", generated_prefixed_name( Prefix, Namespace ), ">".
1234
1235 generated_prefixed_attributes( [_|_Prefix], _URI, Atts, Format0, Format ) -->
1236 generated_attributes( Atts, Format0, Format ).
1237 generated_prefixed_attributes( [], URI, Atts, Format0, Format ) -->
1238 {atom_codes( URI, Namespace ),
1239 findall( Attr, (member(Attr, Atts), Attr \= (xmlns=_Val)), Atts1 )
1240 },
1241 generated_attributes( [xmlns=Namespace|Atts1], Format0, Format ).
1242
1243 generated_name( Name, Plus, Minus ) :-
1244 atom_codes( Name, Chars ),
1245 append( Chars, Minus, Plus ).
1246
1247 generated_external_id( local ) --> "".
1248 generated_external_id( local(Literals) ) --> " [",
1249 generated_doctype_literals( Literals ), "\n\t]".
1250 generated_external_id( system(URL) ) -->
1251 " SYSTEM """,
1252 chars( URL ),
1253 """".
1254 generated_external_id( system(URL,Literals) ) -->
1255 " SYSTEM """,
1256 chars( URL ),
1257 """ [",
1258 generated_doctype_literals( Literals ), "\n\t]".
1259 generated_external_id( public(URN,URL) ) -->
1260 " PUBLIC """,
1261 chars( URN ),
1262 """ """,
1263 chars( URL ),
1264 """".
1265 generated_external_id( public(URN,URL,Literals) ) -->
1266 " PUBLIC """,
1267 chars( URN ),
1268 """ """,
1269 chars( URL ),
1270 """ [",
1271 generated_doctype_literals( Literals ), "\n\t]".
1272
1273 generated_doctype_literals( [] ) --> "".
1274 generated_doctype_literals( [dtd_literal(String)|Literals] ) --> "\n\t",
1275 "<!", cdata_generation( String ), ">",
1276 generated_doctype_literals( Literals ).
1277
1278 /* quoted_string( +Chars ) is a DCG representing Chars, a list of character
1279 * codes, as a legal XML attribute string. Any leading or trailing layout
1280 * characters are removed. &, " and < characters are replaced by &, "
1281 * and < respectively.
1282 */
1283 quoted_string( Raw, Plus, Minus ) :-
1284 quoted_string1( Raw, NoLeadingLayouts ),
1285 quoted_string2( NoLeadingLayouts, Layout, Layout, Plus, Minus ).
1286
1287 quoted_string1( [], [] ).
1288 quoted_string1( [Char|Chars], NoLeadingLayouts ) :-
1289 ( Char > 32 ->
1290 NoLeadingLayouts = [Char|Chars]
1291 ;
1292 quoted_string1( Chars, NoLeadingLayouts )
1293 ).
1294
1295 quoted_string2( [], _LayoutPlus, _LayoutMinus, List, List ).
1296 quoted_string2( [Char|Chars], LayoutPlus, LayoutMinus, Plus, Minus ) :-
1297 ( Char =< " " ->
1298 Plus = Plus1,
1299 LayoutMinus = [Char|LayoutMinus1],
1300 LayoutPlus = LayoutPlus1
1301 ; Char =< 127 ->
1302 Plus = LayoutPlus,
1303 pcdata_7bit( Char, LayoutMinus, Plus1 ),
1304 LayoutPlus1 = LayoutMinus1
1305 ; legal_xml_unicode( Char ) ->
1306 Plus = LayoutPlus,
1307 number_codes( Char, Codes ),
1308 pcdata_8bits_plus( Codes, LayoutMinus, Plus1 ),
1309 LayoutPlus1 = LayoutMinus1
1310 ;
1311 LayoutPlus = LayoutPlus1,
1312 LayoutMinus = LayoutMinus1,
1313 Plus = Plus1
1314 ),
1315 quoted_string2( Chars, LayoutPlus1, LayoutMinus1, Plus1, Minus ).
1316
1317 indent( false, _Indent ) --> [].
1318 indent( true, Indent ) --> "\n",
1319 chars( Indent ).
1320
1321 /* pcdata_generation( +Chars ) is a DCG representing Chars, a list of character
1322 * codes as legal XML "Parsed character data" (PCDATA) string. Any codes
1323 * which cannot be represented by a 7-bit character are replaced by their
1324 * decimal numeric character entity e.g. code 160 (non-breaking space) is
1325 * represented as  . Any character codes disallowed by the XML
1326 * specification are not encoded.
1327 */
1328 pcdata_generation( [], Plus, Plus ).
1329 pcdata_generation( [Char|Chars], Plus, Minus ) :-
1330 ( Char =< 127 ->
1331 pcdata_7bit( Char, Plus, Mid )
1332 ; legal_xml_unicode( Char ) ->
1333 number_codes( Char, Codes ),
1334 pcdata_8bits_plus( Codes, Plus, Mid )
1335 ;
1336 Plus = Mid
1337 ),
1338 pcdata_generation( Chars, Mid, Minus ).
1339
1340 /* pcdata_7bit(+Char) represents the ascii character set in its
1341 * simplest format, using the character entities & " < and >
1342 * which are common to both XML and HTML. The numeric entity ' is used in
1343 * place of ', because browsers don't recognize it in HTML.
1344 */
1345 pcdata_7bit( 0 ) --> "".
1346 pcdata_7bit( 1 ) --> "".
1347 pcdata_7bit( 2 ) --> "".
1348 pcdata_7bit( 3 ) --> "".
1349 pcdata_7bit( 4 ) --> "".
1350 pcdata_7bit( 5 ) --> "".
1351 pcdata_7bit( 6 ) --> "".
1352 pcdata_7bit( 7 ) --> "".
1353 pcdata_7bit( 8 ) --> "".
1354 pcdata_7bit( 9 ) --> [9].
1355 pcdata_7bit( 10 ) --> [10].
1356 pcdata_7bit( 11 ) --> "".
1357 pcdata_7bit( 12 ) --> "".
1358 pcdata_7bit( 13 ) --> [13].
1359 pcdata_7bit( 14 ) --> "".
1360 pcdata_7bit( 15 ) --> "".
1361 pcdata_7bit( 16 ) --> "".
1362 pcdata_7bit( 17 ) --> "".
1363 pcdata_7bit( 18 ) --> "".
1364 pcdata_7bit( 19 ) --> "".
1365 pcdata_7bit( 20 ) --> "".
1366 pcdata_7bit( 21 ) --> "".
1367 pcdata_7bit( 22 ) --> "".
1368 pcdata_7bit( 23 ) --> "".
1369 pcdata_7bit( 24 ) --> "".
1370 pcdata_7bit( 25 ) --> "".
1371 pcdata_7bit( 26 ) --> "".
1372 pcdata_7bit( 27 ) --> "".
1373 pcdata_7bit( 28 ) --> "".
1374 pcdata_7bit( 29 ) --> "".
1375 pcdata_7bit( 30 ) --> "".
1376 pcdata_7bit( 31 ) --> "".
1377 pcdata_7bit( 32 ) --> " ".
1378 pcdata_7bit( 33 ) --> "!".
1379 pcdata_7bit( 34 ) --> """.
1380 pcdata_7bit( 35 ) --> "#".
1381 pcdata_7bit( 36 ) --> "$".
1382 pcdata_7bit( 37 ) --> "%".
1383 pcdata_7bit( 38 ) --> "&".
1384 pcdata_7bit( 39 ) --> "'".
1385 pcdata_7bit( 40 ) --> "(".
1386 pcdata_7bit( 41 ) --> ")".
1387 pcdata_7bit( 42 ) --> "*".
1388 pcdata_7bit( 43 ) --> "+".
1389 pcdata_7bit( 44 ) --> ",".
1390 pcdata_7bit( 45 ) --> "-".
1391 pcdata_7bit( 46 ) --> ".".
1392 pcdata_7bit( 47 ) --> "/".
1393 pcdata_7bit( 48 ) --> "0".
1394 pcdata_7bit( 49 ) --> "1".
1395 pcdata_7bit( 50 ) --> "2".
1396 pcdata_7bit( 51 ) --> "3".
1397 pcdata_7bit( 52 ) --> "4".
1398 pcdata_7bit( 53 ) --> "5".
1399 pcdata_7bit( 54 ) --> "6".
1400 pcdata_7bit( 55 ) --> "7".
1401 pcdata_7bit( 56 ) --> "8".
1402 pcdata_7bit( 57 ) --> "9".
1403 pcdata_7bit( 58 ) --> ":".
1404 pcdata_7bit( 59 ) --> ";".
1405 pcdata_7bit( 60 ) --> "<".
1406 pcdata_7bit( 61 ) --> "=".
1407 pcdata_7bit( 62 ) --> ">".
1408 pcdata_7bit( 63 ) --> "?".
1409 pcdata_7bit( 64 ) --> "@".
1410 pcdata_7bit( 65 ) --> "A".
1411 pcdata_7bit( 66 ) --> "B".
1412 pcdata_7bit( 67 ) --> "C".
1413 pcdata_7bit( 68 ) --> "D".
1414 pcdata_7bit( 69 ) --> "E".
1415 pcdata_7bit( 70 ) --> "F".
1416 pcdata_7bit( 71 ) --> "G".
1417 pcdata_7bit( 72 ) --> "H".
1418 pcdata_7bit( 73 ) --> "I".
1419 pcdata_7bit( 74 ) --> "J".
1420 pcdata_7bit( 75 ) --> "K".
1421 pcdata_7bit( 76 ) --> "L".
1422 pcdata_7bit( 77 ) --> "M".
1423 pcdata_7bit( 78 ) --> "N".
1424 pcdata_7bit( 79 ) --> "O".
1425 pcdata_7bit( 80 ) --> "P".
1426 pcdata_7bit( 81 ) --> "Q".
1427 pcdata_7bit( 82 ) --> "R".
1428 pcdata_7bit( 83 ) --> "S".
1429 pcdata_7bit( 84 ) --> "T".
1430 pcdata_7bit( 85 ) --> "U".
1431 pcdata_7bit( 86 ) --> "V".
1432 pcdata_7bit( 87 ) --> "W".
1433 pcdata_7bit( 88 ) --> "X".
1434 pcdata_7bit( 89 ) --> "Y".
1435 pcdata_7bit( 90 ) --> "Z".
1436 pcdata_7bit( 91 ) --> "[".
1437 pcdata_7bit( 92 ) --> [92].
1438 pcdata_7bit( 93 ) --> "]".
1439 pcdata_7bit( 94 ) --> "^".
1440 pcdata_7bit( 95 ) --> "_".
1441 pcdata_7bit( 96 ) --> "`".
1442 pcdata_7bit( 97 ) --> "a".
1443 pcdata_7bit( 98 ) --> "b".
1444 pcdata_7bit( 99 ) --> "c".
1445 pcdata_7bit( 100 ) --> "d".
1446 pcdata_7bit( 101 ) --> "e".
1447 pcdata_7bit( 102 ) --> "f".
1448 pcdata_7bit( 103 ) --> "g".
1449 pcdata_7bit( 104 ) --> "h".
1450 pcdata_7bit( 105 ) --> "i".
1451 pcdata_7bit( 106 ) --> "j".
1452 pcdata_7bit( 107 ) --> "k".
1453 pcdata_7bit( 108 ) --> "l".
1454 pcdata_7bit( 109 ) --> "m".
1455 pcdata_7bit( 110 ) --> "n".
1456 pcdata_7bit( 111 ) --> "o".
1457 pcdata_7bit( 112 ) --> "p".
1458 pcdata_7bit( 113 ) --> "q".
1459 pcdata_7bit( 114 ) --> "r".
1460 pcdata_7bit( 115 ) --> "s".
1461 pcdata_7bit( 116 ) --> "t".
1462 pcdata_7bit( 117 ) --> "u".
1463 pcdata_7bit( 118 ) --> "v".
1464 pcdata_7bit( 119 ) --> "w".
1465 pcdata_7bit( 120 ) --> "x".
1466 pcdata_7bit( 121 ) --> "y".
1467 pcdata_7bit( 122 ) --> "z".
1468 pcdata_7bit( 123 ) --> "{".
1469 pcdata_7bit( 124 ) --> "|".
1470 pcdata_7bit( 125 ) --> "}".
1471 pcdata_7bit( 126 ) --> "~".
1472 pcdata_7bit( 127 ) --> "".
1473
1474 pcdata_8bits_plus( Codes ) -->
1475 "&#", chars( Codes ), ";".
1476
1477 /* cdata_generation( +Chars ) is a DCG representing Chars, a list of character
1478 * codes as a legal XML CDATA string. Any character codes disallowed by the XML
1479 * specification are not encoded.
1480 */
1481 cdata_generation( [] ) --> "".
1482 cdata_generation( [Char|Chars] ) -->
1483 ( {legal_xml_unicode( Char )}, !, [Char]
1484 | ""
1485 ),
1486 cdata_generation( Chars ).
1487
1488 legal_xml_unicode( 9 ).
1489 legal_xml_unicode( 10 ).
1490 legal_xml_unicode( 13 ).
1491 legal_xml_unicode( Code ) :-
1492 Code >= 32,
1493 Code =< 55295.
1494 legal_xml_unicode( Code ) :-
1495 Code >= 57344,
1496 Code =< 65533.
1497 legal_xml_unicode( Code ) :-
1498 Code >= 65536,
1499 Code =< 1114111.
1500
1501 /* xml_pp: "pretty print" an XML Document on the current output stream.
1502 *
1503 * Current Release: 1.9
1504 */
1505 /* xml_pp( +XMLDocument ) "pretty prints" XMLDocument on the current
1506 * output stream.
1507 */
1508 xml_pp( xml(Attributes, Document) ) :-
1509 write( 'xml( ' ), pp_attributes( Attributes, "" ), put_code( 0', ), nl,
1510 xml_pp_list( Document, "\t" ),
1511 format( ' ).~n', [] ).
1512 xml_pp( malformed(Attributes, Document) ) :-
1513 write( 'malformed( ' ), pp_attributes( Attributes, "" ), put_code( 0', ), nl,
1514 xml_pp_list( Document, "\t" ),
1515 format( ' ).~n', [] ).
1516
1517 xml_pp_indented( [], Indent ) :-
1518 format( '~s[]', [Indent] ).
1519 xml_pp_indented( List, Indent ) :-
1520 List = [_|_],
1521 format( '~s', [Indent] ),
1522 xml_pp_list( List, Indent ).
1523 xml_pp_indented( comment(Text), Indent ) :-
1524 format( '~scomment(', [Indent] ), pp_string(Text), put_code( 0') ).
1525 xml_pp_indented( namespace(URI,Prefix,Element), Indent ) :-
1526 format( '~snamespace( ~q, "~s",~n', [Indent,URI,Prefix] ),
1527 xml_pp_indented( Element, [0'\t|Indent] ),
1528 format( '~n~s)', [[0'\t|Indent]] ).
1529 xml_pp_indented( element(Tag,Attributes,Contents), Indent ) :-
1530 format( '~selement( ~q,~n', [Indent,Tag] ),
1531 pp_attributes( Attributes, [0'\t|Indent] ), put_code(0',), nl,
1532 xml_pp_list( Contents, [0'\t|Indent] ), write( ' )' ).
1533 xml_pp_indented( instructions(Target, Processing), Indent ) :-
1534 format( '~sinstructions( ~q, ', [Indent,Target] ),
1535 pp_string(Processing), put_code( 0') ).
1536 xml_pp_indented( doctype(Name, DoctypeId), Indent ) :-
1537 format( '~sdoctype( ~q, ', [Indent,Name] ),
1538 xml_pp_indented( DoctypeId, [0'\t|Indent] ),
1539 write( ' )' ).
1540 xml_pp_indented( cdata(CData), Indent ) :-
1541 format( '~scdata(', [Indent] ), pp_string(CData), put_code( 0') ).
1542 xml_pp_indented( pcdata(PCData), Indent ) :-
1543 format( '~spcdata(', [Indent] ), pp_string(PCData), put_code( 0') ).
1544 xml_pp_indented( public(URN,URL), _Indent ) :-
1545 format( 'public( "~s", "~s" )', [URN,URL] ).
1546 xml_pp_indented( public(URN,URL,Literals), Indent ) :-
1547 format( 'public( "~s", "~s",~n', [URN,URL] ),
1548 xml_pp_list( Literals, [0'\t|Indent] ), write( ' )' ). %'
1549 xml_pp_indented( system(URL), _Indent ) :-
1550 format( 'system( "~s" )', [URL] ).
1551 xml_pp_indented( system(URL,Literals), Indent ) :-
1552 format( 'system( "~s",~n', [URL] ),
1553 xml_pp_list( Literals, [0'\t|Indent] ), write( ' )' ). %'
1554 xml_pp_indented( local, _Indent ) :-
1555 write( local ).
1556 xml_pp_indented( local(Literals), Indent ) :-
1557 write( 'local(' ), nl,
1558 xml_pp_list( Literals, [0'\t|Indent] ), write( ' )' ). %'
1559 xml_pp_indented( dtd_literal(String), Indent ) :-
1560 format( '~sdtd_literal(', [Indent] ), pp_string(String), put_code( 0') ). %'
1561 xml_pp_indented( out_of_context(Tag), Indent ) :-
1562 format( '~s/* SYNTAX ERROR */ out_of_context( ~q )', [Indent,Tag] ).
1563 xml_pp_indented( unparsed(String), Indent ) :-
1564 format( '~s/* SYNTAX ERROR */ unparsed( ', [Indent] ),
1565 pp_string(String), put_code( 0') ).
1566
1567 xml_pp_list( [], Indent ) :-
1568 format( '~s[]', [Indent] ).
1569 xml_pp_list( [H|T], Indent ) :-
1570 format( '~s[~n', [Indent] ),
1571 xml_pp_indented( H, Indent ),
1572 xml_pp_list1( T, Indent ),
1573 format( '~s]', [Indent] ).
1574
1575 xml_pp_list1( [], _Indent ) :-
1576 nl.
1577 xml_pp_list1( [H|T], Indent ) :-
1578 put_code( 0', ), nl,
1579 xml_pp_indented( H, Indent ),
1580 xml_pp_list1( T, Indent ).
1581
1582 % PATCH LEUSCHEL:
1583 pp_attributes( Attributes, Indent ) :-
1584 (select('$attribute_linefeeds'=_Count,Attributes,Rest)
1585 -> pp_attributes2(Rest,Indent) % TO DO: insert linefeeds
1586 ; pp_attributes2(Attributes,Indent)).
1587
1588 pp_attributes2( [], Indent ) :-
1589 format( '~s[]', [Indent] ).
1590 pp_attributes2( [Attribute|Attributes], Indent ) :-
1591 format( '~s[', [Indent] ),
1592 pp_attributes1( Attributes, Attribute ),
1593 put_code( 0'] ).
1594 % END PATCH
1595 pp_attributes1( [], Name=Value ) :-
1596 format( '~q=', [Name] ), pp_string( Value ).
1597 pp_attributes1( [H|T], Name=Value ) :-
1598 format( '~q=', [Name] ), pp_string( Value ), write( ', ' ),
1599 pp_attributes1( T, H ).
1600 /* XML Utilities
1601 *
1602 * $Revision$
1603 */
1604
1605 % Entity and Namespace map operations: these maps are usually quite small, so
1606 % a linear list lookup is okay. They could be substituted by a logarithmic
1607 % data structure - in extremis.
1608
1609 /* empty_map( ?Map ) is true if Map is a null map.
1610 */
1611 empty_map( [] ).
1612
1613 /* map_member( +Key, +Map, ?Data ) is true if Map is a ordered map structure
1614 * which records the pair Key-Data. Key must be ground.
1615 */
1616 map_member( Key0, [Key1-Data1|Rest], Data0 ) :-
1617 ( Key0 == Key1 ->
1618 Data0 = Data1
1619 ; Key0 @> Key1 ->
1620 map_member( Key0, Rest, Data0 )
1621 ).
1622
1623 /* map_store( +Map0, +Key, +Data, ?Map1 ) is true if Map0 is an ordered map
1624 * structure, Key must be ground, and Map1 is identical to Map0 except that
1625 * the pair Key-Data is recorded by Map1.
1626 */
1627 map_store( [], Key, Data, [Key-Data] ).
1628 map_store( [Key0-Data0|Map0], Key, Data, Map ) :-
1629 ( Key == Key0 ->
1630 Map = [Key-Data|Map0]
1631 ; Key @< Key0 ->
1632 Map = [Key-Data,Key0-Data0|Map0]
1633 ; % >
1634 Map = [Key0-Data0|Map1],
1635 map_store( Map0, Key, Data, Map1 )
1636 ).
1637
1638 /* context(?Element, ?PreserveSpace, ?CurrentNS, ?DefaultNS, ?Entities, ?Namespaces )
1639 * is an ADT hiding the "state" arguments for XML Acquisition
1640 */
1641 initial_context(
1642 opt(Fmt,Ext,RemoveAttributePrefixes),
1643 context(void,PreserveSpace,'','',Entities,Empty,
1644 RemoveAttributePrefixes)
1645 ) :-
1646 empty_map( Empty ),
1647 ( Ext==false ->
1648 Entities = Empty
1649 ;
1650 extended_character_entities(Entities)
1651 ),
1652 ( Fmt==false ->
1653 PreserveSpace = true
1654 ;
1655 PreserveSpace = false
1656 ).
1657
1658 context_update( current_namespace, Context0, URI, Context1 ) :-
1659 Context0 = context(Element,Preserve,_Current,Default,Entities,
1660 Namespaces,RemoveAttributePrefixes),
1661 Context1 = context(Element,Preserve,URI,Default,Entities,
1662 Namespaces,RemoveAttributePrefixes).
1663 context_update( element, Context0, Tag, Context1 ) :-
1664 Context0 = context(_Element,Preserve,Current,Default,Entities,
1665 Namespaces,RemoveAttributePrefixes),
1666 Context1 = context(tag(Tag),Preserve,Current,Default,Entities,
1667 Namespaces,RemoveAttributePrefixes).
1668 context_update( default_namespace, Context0, URI, Context1 ):-
1669 Context0 = context(Element,Preserve,Current,_Default,Entities,
1670 Namespaces,RemoveAttributePrefixes),
1671 Context1 = context(Element,Preserve,Current,URI,Entities,
1672 Namespaces,RemoveAttributePrefixes).
1673 context_update( space_preserve, Context0, Boolean, Context1 ):-
1674 Context0 = context(Element,_Preserve,Current,Default,Entities,
1675 Namespaces,RemoveAttributePrefixes),
1676 Context1 = context(Element,Boolean,Current,Default,Entities,
1677 Namespaces,RemoveAttributePrefixes).
1678 context_update( ns_prefix(Prefix), Context0, URI, Context1 ) :-
1679 Context0 = context(Element,Preserve,Current,Default,Entities,
1680 Namespaces0,RemoveAttributePrefixes),
1681 Context1 = context(Element,Preserve,Current,Default,Entities,
1682 Namespaces1,RemoveAttributePrefixes),
1683 map_store( Namespaces0, Prefix, URI, Namespaces1 ).
1684 context_update( entity(Name), Context0, String, Context1 ) :-
1685 Context0 = context(Element,Preserve,Current,Default,Entities0,
1686 Namespaces,RemoveAttributePrefixes),
1687 Context1 = context(Element,Preserve,Current,Default,Entities1,
1688 Namespaces,RemoveAttributePrefixes),
1689 map_store( Entities0, Name, String, Entities1 ).
1690
1691 remove_attribute_prefixes( Context ) :-
1692 Context = context(_Element,_Preserve,_Current,_Default,_Entities,
1693 _Namespaces,true).
1694
1695 current_tag( Context, Tag ) :-
1696 Context = context(tag(Tag),_Preserve,_Current,_Default,_Entities,
1697 _Namespaces,_RPFA).
1698
1699 current_namespace( Context, Current ) :-
1700 Context = context(_Element,_Preserve,Current,_Default,_Entities,
1701 _Namespaces,_RPFA).
1702
1703 default_namespace( Context, Default ) :-
1704 Context = context(_Element,_Preserve,_Current,Default,_Entities,
1705 _Namespaces,_RPFA).
1706
1707 space_preserve( Context ) :-
1708 Context = context(_Element,true,_Current,_Default,_Entities,
1709 _Namespaces,_RPFA).
1710
1711 specific_namespace( Prefix, Context, URI ) :-
1712 Context = context(_Element,_Preserve,_Current,_Default,_Entities,
1713 Namespaces,_RPFA),
1714 map_member( Prefix, Namespaces, URI ).
1715
1716 defined_entity( Reference, Context, String ) :-
1717 Context = context(_Element,_Preserve,_Current,_Default,Entities,
1718 _Namespaces,_RPFA),
1719 map_member( Reference, Entities, String ).
1720
1721 close_context( Context, Terms, WellFormed ) :-
1722 Context = context(Element,_Preserve,_Current,_Default,_Entities,
1723 _Namespaces,_RPFA),
1724 close_context1( Element, Terms, WellFormed ).
1725
1726 close_context1( void, [], true ).
1727 close_context1( tag(TagChars), [out_of_context(Tag)], false ) :-
1728 atom_codes( Tag, TagChars ).
1729
1730 void_context(
1731 context(void,_Preserve,_Current,_Default,_Entities,_Namespaces)
1732 ).
1733
1734 /* pp_string( +String ) prints String onto the current output stream.
1735 * If String contains only 7-bit chars it is printed in shorthand quoted
1736 * format, otherwise it is written as a list.
1737 * If your Prolog uses " to delimit a special string type, just use write/1.
1738 */
1739 % [MC] rewritten
1740 pp_string(Chars) :-
1741 put_code(0'"),
1742 pp_string1(Chars),
1743 put_code(0'").
1744
1745 pp_string1([]).
1746 pp_string1([Char|Chars]) :-
1747 pp_string2(Char),
1748 pp_string1(Chars).
1749
1750 pp_string2(0'\a) :- !,
1751 put_code(0'\\),
1752 put_code(0'a).
1753 pp_string2(0'\b) :- !,
1754 put_code(0'\\),
1755 put_code(0'b).
1756 pp_string2(0'\t) :- !,
1757 put_code(0'\\),
1758 put_code(0't).
1759 pp_string2(0'\n) :- !,
1760 put_code(0'\\),
1761 put_code(0'n).
1762 pp_string2(0'\v) :- !,
1763 put_code(0'\\),
1764 put_code(0'v).
1765 pp_string2(0'\f) :- !,
1766 put_code(0'\\),
1767 put_code(0'f).
1768 pp_string2(0'\r) :- !,
1769 put_code(0'\\),
1770 put_code(0'r).
1771 pp_string2(0'\e) :- !,
1772 put_code(0'\\),
1773 put_code(0'e).
1774 pp_string2(0'\") :- !,
1775 put_code(0'\\),
1776 put_code(0'").
1777 pp_string2(0'\\) :- !,
1778 put_code(0'\\),
1779 put_code(0'\\).
1780 pp_string2(Char) :-
1781 Char>=32, Char=<126, !,
1782 put_code(Char).
1783 pp_string2(Char) :-
1784 format('\\~8r\\', [Char]).
1785
1786 xml_declaration_attributes_valid( [] ).
1787 xml_declaration_attributes_valid( [Name=Value|Attributes] ) :-
1788 ? xml_declaration_attribute_valid( Name, Value ),
1789 ? xml_declaration_attributes_valid( Attributes ).
1790
1791 xml_declaration_attribute_valid( Name, Value ) :-
1792 lowercase( Value, Lowercase ),
1793 debug_format(19,'Checking xml declaration attribute ~s=~s~n',[Name,Lowercase]),
1794 ? if(canonical_xml_declaration_attribute( Name, Lowercase ), true,
1795 (format(user_error,'Illegal <xml> declaration attribute: ~s = ~s~n',[Name,Value]),
1796 (valid_declaration_attribute_name(Name)
1797 -> ajoin(['Ignoring illegal value for attribute ',Name,': '],Msg),
1798 atom_codes(AValue,Value),
1799 add_xml_warning(Msg,AValue)
1800 ; add_xml_warning('Ignoring illegal <xml> declaration attribute: ',Name)
1801 ),
1802 fail
1803 )). % PATCH LEUSCHEL: add warning
1804
1805 % PATCH LEUSCHEL:
1806 valid_declaration_attribute_name(version).
1807 valid_declaration_attribute_name(standalone).
1808 valid_declaration_attribute_name(encoding).
1809
1810
1811 canonical_xml_declaration_attribute( version, "1.0" ).
1812 canonical_xml_declaration_attribute( standalone, "yes" ).
1813 canonical_xml_declaration_attribute( standalone, "no" ).
1814 canonical_xml_declaration_attribute( encoding, "utf-8" ).
1815 canonical_xml_declaration_attribute( encoding, "utf-16" ).
1816 canonical_xml_declaration_attribute( encoding, "ascii" ).
1817 canonical_xml_declaration_attribute( encoding, "iso-8859-1" ).
1818 canonical_xml_declaration_attribute( encoding, "iso-8859-2" ).
1819 canonical_xml_declaration_attribute( encoding, "iso-8859-15" ).
1820 canonical_xml_declaration_attribute( encoding, "windows-1252" ).
1821
1822 % PATCH LEUSCHEL: some more sets from https://www.iana.org/assignments/character-sets/character-sets.xhtml
1823 %canonical_xml_declaration_attribute( encoding, "iso-8859-3" ).
1824 %canonical_xml_declaration_attribute( encoding, "iso-8859-4" ).
1825 canonical_xml_declaration_attribute( encoding, Encoding ) :- atom_codes(Atom,Encoding),
1826 add_xml_warning('Unsupported xml encoding (which can lead to unexpected results): ',Atom).
1827
1828
1829
1830 /* lowercase( +MixedCase, ?Lowercase ) holds when Lowercase and MixedCase are
1831 * lists of character codes, and Lowercase is identical to MixedCase with
1832 * every uppercase character replaced by its lowercase equivalent.
1833 */
1834 lowercase( [], [] ).
1835 lowercase( [Char|Chars], [Lower|LowerCase] ) :-
1836 ( Char >= 0'A, Char =< 0'Z
1837 ->
1838 Lower is Char + 0'a - 0'A
1839 ;
1840 Lower = Char
1841 ),
1842 lowercase( Chars, LowerCase ).
1843
1844 :- dynamic extended_character_entities/1. % no point compiling it
1845 extended_character_entities( [
1846 "Aacute"-[193], % latin capital letter A with acute,
1847 "aacute"-[225], % latin small letter a with acute,
1848 "Acirc"-[194], % latin capital letter A with circumflex,
1849 "acirc"-[226], % latin small letter a with circumflex,
1850 "acute"-[180], % acute accent = spacing acute,
1851 "AElig"-[198], % latin capital letter AE
1852 "aelig"-[230], % latin small letter ae
1853 "Agrave"-[192], % latin capital letter A with grave
1854 "agrave"-[224], % latin small letter a with grave
1855 "alefsym"-[8501], % alef symbol = first transfinite cardinal,
1856 "Alpha"-[913], % greek capital letter alpha, U+0391
1857 "alpha"-[945], % greek small letter alpha,
1858 "and"-[8743], % logical and = wedge, U+2227 ISOtech
1859 "ang"-[8736], % angle, U+2220 ISOamso
1860 "Aring"-[197], % latin capital letter A with ring above
1861 "aring"-[229], % latin small letter a with ring above
1862 "asymp"-[8776], % almost equal to = asymptotic to,
1863 "Atilde"-[195], % latin capital letter A with tilde,
1864 "atilde"-[227], % latin small letter a with tilde,
1865 "Auml"-[196], % latin capital letter A with diaeresis,
1866 "auml"-[228], % latin small letter a with diaeresis,
1867 "bdquo"-[8222], % double low-9 quotation mark, U+201E NEW
1868 "Beta"-[914], % greek capital letter beta, U+0392
1869 "beta"-[946], % greek small letter beta, U+03B2 ISOgrk3
1870 "brvbar"-[166], % broken bar = broken vertical bar,
1871 "bull"-[8226], % bullet = black small circle,
1872 "cap"-[8745], % intersection = cap, U+2229 ISOtech
1873 "Ccedil"-[199], % latin capital letter C with cedilla,
1874 "ccedil"-[231], % latin small letter c with cedilla,
1875 "cedil"-[184], % cedilla = spacing cedilla, U+00B8 ISOdia>
1876 "cent"-[162], % cent sign, U+00A2 ISOnum>
1877 "Chi"-[935], % greek capital letter chi, U+03A7
1878 "chi"-[967], % greek small letter chi, U+03C7 ISOgrk3
1879 "circ"-[710], % modifier letter circumflex accent,
1880 "clubs"-[9827], % black club suit = shamrock,
1881 "cong"-[8773], % approximately equal to, U+2245 ISOtech
1882 "copy"-[169], % copyright sign, U+00A9 ISOnum>
1883 "crarr"-[8629], % downwards arrow with corner leftwards
1884 "cup"-[8746], % union = cup, U+222A ISOtech
1885 "curren"-[164], % currency sign, U+00A4 ISOnum>
1886 "dagger"-[8224], % dagger, U+2020 ISOpub
1887 "Dagger"-[8225], % double dagger, U+2021 ISOpub
1888 "darr"-[8595], % downwards arrow, U+2193 ISOnum
1889 "dArr"-[8659], % downwards double arrow, U+21D3 ISOamsa
1890 "deg"-[176], % degree sign, U+00B0 ISOnum>
1891 "Delta"-[916], % greek capital letter delta,
1892 "delta"-[948], % greek small letter delta,
1893 "diams"-[9830], % black diamond suit, U+2666 ISOpub
1894 "divide"-[247], % division sign, U+00F7 ISOnum>
1895 "Eacute"-[201], % latin capital letter E with acute,
1896 "eacute"-[233], % latin small letter e with acute,
1897 "Ecirc"-[202], % latin capital letter E with circumflex,
1898 "ecirc"-[234], % latin small letter e with circumflex,
1899 "Egrave"-[200], % latin capital letter E with grave,
1900 "egrave"-[232], % latin small letter e with grave,
1901 "empty"-[8709], % empty set = null set = diameter,
1902 "emsp"-[8195], % em space, U+2003 ISOpub
1903 "ensp"-[8194], % en space, U+2002 ISOpub
1904 "Epsilon"-[917], % greek capital letter epsilon, U+0395
1905 "epsilon"-[949], % greek small letter epsilon,
1906 "equiv"-[8801], % identical to, U+2261 ISOtech
1907 "Eta"-[919], % greek capital letter eta, U+0397
1908 "eta"-[951], % greek small letter eta, U+03B7 ISOgrk3
1909 "ETH"-[208], % latin capital letter ETH, U+00D0 ISOlat1>
1910 "eth"-[240], % latin small letter eth, U+00F0 ISOlat1>
1911 "Euml"-[203], % latin capital letter E with diaeresis,
1912 "euml"-[235], % latin small letter e with diaeresis,
1913 "euro"-[8364], % euro sign, U+20AC NEW
1914 "exist"-[8707], % there exists, U+2203 ISOtech
1915 "fnof"-[402], % latin small f with hook = function
1916 "forall"-[8704], % for all, U+2200 ISOtech
1917 "frac12"-[189], % vulgar fraction one half
1918 "frac14"-[188], % vulgar fraction one quarter
1919 "frac34"-[190], % vulgar fraction three quarters
1920 "frasl"-[8260], % fraction slash, U+2044 NEW
1921 "Gamma"-[915], % greek capital letter gamma,
1922 "gamma"-[947], % greek small letter gamma,
1923 "ge"-[8805], % greater-than or equal to,
1924 "harr"-[8596], % left right arrow, U+2194 ISOamsa
1925 "hArr"-[8660], % left right double arrow,
1926 "hearts"-[9829], % black heart suit = valentine,
1927 "hellip"-[8230], % horizontal ellipsis = three dot leader,
1928 "Iacute"-[205], % latin capital letter I with acute,
1929 "iacute"-[237], % latin small letter i with acute,
1930 "Icirc"-[206], % latin capital letter I with circumflex,
1931 "icirc"-[238], % latin small letter i with circumflex,
1932 "iexcl"-[161], % inverted exclamation mark, U+00A1 ISOnum>
1933 "Igrave"-[204], % latin capital letter I with grave,
1934 "igrave"-[236], % latin small letter i with grave,
1935 "image"-[8465], % blackletter capital I = imaginary part,
1936 "infin"-[8734], % infinity, U+221E ISOtech
1937 "int"-[8747], % integral, U+222B ISOtech
1938 "Iota"-[921], % greek capital letter iota, U+0399
1939 "iota"-[953], % greek small letter iota, U+03B9 ISOgrk3
1940 "iquest"-[191], % inverted question mark
1941 "isin"-[8712], % element of, U+2208 ISOtech
1942 "Iuml"-[207], % latin capital letter I with diaeresis,
1943 "iuml"-[239], % latin small letter i with diaeresis,
1944 "Kappa"-[922], % greek capital letter kappa, U+039A
1945 "kappa"-[954], % greek small letter kappa,
1946 "Lambda"-[923], % greek capital letter lambda,
1947 "lambda"-[955], % greek small letter lambda,
1948 "lang"-[9001], % left-pointing angle bracket = bra,
1949 "laquo"-[171], % left-pointing double angle quotation mark
1950 "larr"-[8592], % leftwards arrow, U+2190 ISOnum
1951 "lArr"-[8656], % leftwards double arrow, U+21D0 ISOtech
1952 "lceil"-[8968], % left ceiling = apl upstile,
1953 "ldquo"-[8220], % left double quotation mark,
1954 "le"-[8804], % less-than or equal to, U+2264 ISOtech
1955 "lfloor"-[8970], % left floor = apl downstile,
1956 "lowast"-[8727], % asterisk operator, U+2217 ISOtech
1957 "loz"-[9674], % lozenge, U+25CA ISOpub
1958 "lrm"-[8206], % left-to-right mark, U+200E NEW RFC 2070
1959 "lsaquo"-[8249], % single left-pointing angle quotation mark,
1960 "lsquo"-[8216], % left single quotation mark,
1961 "macr"-[175], % macron = spacing macron = overline
1962 "mdash"-[8212], % em dash, U+2014 ISOpub
1963 "micro"-[181], % micro sign, U+00B5 ISOnum>
1964 "middot"-[183], % middle dot = Georgian comma
1965 "minus"-[8722], % minus sign, U+2212 ISOtech
1966 "Mu"-[924], % greek capital letter mu, U+039C
1967 "mu"-[956], % greek small letter mu, U+03BC ISOgrk3
1968 "nabla"-[8711], % nabla = backward difference,
1969 "nbsp"-[160], % no-break space = non-breaking space,
1970 "ndash"-[8211], % en dash, U+2013 ISOpub
1971 "ne"-[8800], % not equal to, U+2260 ISOtech
1972 "ni"-[8715], % contains as member, U+220B ISOtech
1973 "not"-[172], % not sign, U+00AC ISOnum>
1974 "notin"-[8713], % not an element of, U+2209 ISOtech
1975 "nsub"-[8836], % not a subset of, U+2284 ISOamsn
1976 "Ntilde"-[209], % latin capital letter N with tilde,
1977 "ntilde"-[241], % latin small letter n with tilde,
1978 "Nu"-[925], % greek capital letter nu, U+039D
1979 "nu"-[957], % greek small letter nu, U+03BD ISOgrk3
1980 "Oacute"-[211], % latin capital letter O with acute,
1981 "oacute"-[243], % latin small letter o with acute,
1982 "Ocirc"-[212], % latin capital letter O with circumflex,
1983 "ocirc"-[244], % latin small letter o with circumflex,
1984 "OElig"-[338], % latin capital ligature OE,
1985 "oelig"-[339], % latin small ligature oe, U+0153 ISOlat2
1986 "Ograve"-[210], % latin capital letter O with grave,
1987 "ograve"-[242], % latin small letter o with grave,
1988 "oline"-[8254], % overline = spacing overscore,
1989 "Omega"-[937], % greek capital letter omega,
1990 "omega"-[969], % greek small letter omega,
1991 "Omicron"-[927], % greek capital letter omicron, U+039F
1992 "omicron"-[959], % greek small letter omicron, U+03BF NEW
1993 "oplus"-[8853], % circled plus = direct sum,
1994 "or"-[8744], % logical or = vee, U+2228 ISOtech
1995 "ordf"-[170], % feminine ordinal indicator, U+00AA ISOnum>
1996 "ordm"-[186], % masculine ordinal indicator,
1997 "Oslash"-[216], % latin capital letter O with stroke
1998 "oslash"-[248], % latin small letter o with stroke,
1999 "Otilde"-[213], % latin capital letter O with tilde,
2000 "otilde"-[245], % latin small letter o with tilde,
2001 "otimes"-[8855], % circled times = vector product,
2002 "Ouml"-[214], % latin capital letter O with diaeresis,
2003 "ouml"-[246], % latin small letter o with diaeresis,
2004 "para"-[182], % pilcrow sign = paragraph sign,
2005 "part"-[8706], % partial differential, U+2202 ISOtech
2006 "permil"-[8240], % per mille sign, U+2030 ISOtech
2007 "perp"-[8869], % up tack = orthogonal to = perpendicular,
2008 "Phi"-[934], % greek capital letter phi,
2009 "phi"-[966], % greek small letter phi, U+03C6 ISOgrk3
2010 "Pi"-[928], % greek capital letter pi, U+03A0 ISOgrk3
2011 "pi"-[960], % greek small letter pi, U+03C0 ISOgrk3
2012 "piv"-[982], % greek pi symbol, U+03D6 ISOgrk3
2013 "plusmn"-[177], % plus-minus sign = plus-or-minus sign,
2014 "pound"-[163], % pound sign, U+00A3 ISOnum>
2015 "prime"-[8242], % prime = minutes = feet, U+2032 ISOtech
2016 "Prime"-[8243], % double prime = seconds = inches,
2017 "prod"-[8719], % n-ary product = product sign,
2018 "prop"-[8733], % proportional to, U+221D ISOtech
2019 "Psi"-[936], % greek capital letter psi,
2020 "psi"-[968], % greek small letter psi, U+03C8 ISOgrk3
2021 "radic"-[8730], % square root = radical sign,
2022 "rang"-[9002], % right-pointing angle bracket = ket,
2023 "raquo"-[187], % right-pointing double angle quotation mark
2024 "rarr"-[8594], % rightwards arrow, U+2192 ISOnum
2025 "rArr"-[8658], % rightwards double arrow,
2026 "rceil"-[8969], % right ceiling, U+2309 ISOamsc
2027 "rdquo"-[8221], % right double quotation mark,
2028 "real"-[8476], % blackletter capital R = real part symbol,
2029 "reg"-[174], % registered sign = registered trade mark sign,
2030 "rfloor"-[8971], % right floor, U+230B ISOamsc
2031 "Rho"-[929], % greek capital letter rho, U+03A1
2032 "rho"-[961], % greek small letter rho, U+03C1 ISOgrk3
2033 "rlm"-[8207], % right-to-left mark, U+200F NEW RFC 2070
2034 "rsaquo"-[8250], % single right-pointing angle quotation mark,
2035 "rsquo"-[8217], % right single quotation mark,
2036 "sbquo"-[8218], % single low-9 quotation mark, U+201A NEW
2037 "Scaron"-[352], % latin capital letter S with caron,
2038 "scaron"-[353], % latin small letter s with caron,
2039 "sdot"-[8901], % dot operator, U+22C5 ISOamsb
2040 "sect"-[167], % section sign, U+00A7 ISOnum>
2041 "shy"-[173], % soft hyphen = discretionary hyphen,
2042 "Sigma"-[931], % greek capital letter sigma,
2043 "sigma"-[963], % greek small letter sigma,
2044 "sigmaf"-[962], % greek small letter final sigma,
2045 "sim"-[8764], % tilde operator = varies with = similar to,
2046 "spades"-[9824], % black spade suit, U+2660 ISOpub
2047 "sub"-[8834], % subset of, U+2282 ISOtech
2048 "sube"-[8838], % subset of or equal to, U+2286 ISOtech
2049 "sum"-[8721], % n-ary sumation, U+2211 ISOamsb
2050 "sup"-[8835], % superset of, U+2283 ISOtech
2051 "sup1"-[185], % superscript one = superscript digit one,
2052 "sup2"-[178], % superscript two = superscript digit two
2053 "sup3"-[179], % superscript three = superscript digit three
2054 "supe"-[8839], % superset of or equal to,
2055 "szlig"-[223], % latin small letter sharp s = ess-zed,
2056 "Tau"-[932], % greek capital letter tau, U+03A4
2057 "tau"-[964], % greek small letter tau, U+03C4 ISOgrk3
2058 "there4"-[8756], % therefore, U+2234 ISOtech
2059 "Theta"-[920], % greek capital letter theta,
2060 "theta"-[952], % greek small letter theta,
2061 "thetasym"-[977], % greek small letter theta symbol,
2062 "thinsp"-[8201], % thin space, U+2009 ISOpub
2063 "THORN"-[222], % latin capital letter THORN,
2064 "thorn"-[254], % latin small letter thorn with,
2065 "tilde"-[732], % small tilde, U+02DC ISOdia
2066 "times"-[215], % multiplication sign, U+00D7 ISOnum>
2067 "trade"-[8482], % trade mark sign, U+2122 ISOnum
2068 "Uacute"-[218], % latin capital letter U with acute,
2069 "uacute"-[250], % latin small letter u with acute,
2070 "uarr"-[8593], % upwards arrow, U+2191 ISOnum
2071 "uArr"-[8657], % upwards double arrow, U+21D1 ISOamsa
2072 "Ucirc"-[219], % latin capital letter U with circumflex,
2073 "ucirc"-[251], % latin small letter u with circumflex,
2074 "Ugrave"-[217], % latin capital letter U with grave,
2075 "ugrave"-[249], % latin small letter u with grave,
2076 "uml"-[168], % diaeresis = spacing diaeresis,
2077 "upsih"-[978], % greek upsilon with hook symbol,
2078 "Upsilon"-[933], % greek capital letter upsilon,
2079 "upsilon"-[965], % greek small letter upsilon,
2080 "Uuml"-[220], % latin capital letter U with diaeresis,
2081 "uuml"-[252], % latin small letter u with diaeresis,
2082 "weierp"-[8472], % script capital P = power set
2083 "Xi"-[926], % greek capital letter xi, U+039E ISOgrk3
2084 "xi"-[958], % greek small letter xi, U+03BE ISOgrk3
2085 "Yacute"-[221], % latin capital letter Y with acute,
2086 "yacute"-[253], % latin small letter y with acute,
2087 "yen"-[165], % yen sign = yuan sign, U+00A5 ISOnum>
2088 "yuml"-[255], % latin small letter y with diaeresis,
2089 "Yuml"-[376], % latin capital letter Y with diaeresis,
2090 "Zeta"-[918], % greek capital letter zeta, U+0396
2091 "zeta"-[950], % greek small letter zeta, U+03B6 ISOgrk3
2092 "zwj"-[8205], % zero width joiner, U+200D NEW RFC 2070
2093 "zwnj"-[8204] % zero width non-joiner,
2094 ] ).
2095
2096 /* chars( ?Chars, ?Plus, ?Minus ) used as chars( ?Chars ) in a DCG to
2097 * copy the list Chars inline.
2098 *
2099 * This is best expressed in terms of append/3 where append/3 is built-in.
2100 * For other Prologs, a straightforward specification can be used:
2101 *
2102 * chars( [] ) --> "".
2103 * chars( [Char|Chars] ) -->
2104 * [Char],
2105 * chars( Chars ).
2106 */
2107 chars( Chars, Plus, Minus ) :-
2108 append( Chars, Minus, Plus ).