1 :- module( json_parser, [ json_parse/2, json_parse/3,
2 json_parse_file/2, json_parse_file/3
3 ] ).
4
5 :- set_prolog_flag(double_quotes, codes).
6
7 % based on https://github.com/yoroto/pl-json
8 % have added line counting and error reporting, reading directly from file
9 % fixed a few issues with whitespace and avoid backtracking
10 % return strings using string(.) wrapper to avoid ambiguity with arrays of integers
11
12 :- use_module(probsrc(error_manager),[add_error/3, add_error/2, add_warning/3]).
13 :- use_module(library(lists)).
14
15 json_parse_file(File, Json) :-
16 json_parse_file(File, [strings_as_atoms(true),position_infos(false)], Json).
17
18 % parse a JSON file
19 json_parse_file(File, Options, Json) :-
20 maplist(process_option,Options),
21 catch(read_file(File,Codes), error(existence_error(_,_),_E),
22 (add_error(json_parse_file,'JSON file does not exist:',File),fail)),
23 %format('Read:~n~s~n',[Codes]),
24 (json_parse(Codes,Json) -> true
25 ; Codes = [] -> add_error(json_parse_file,'JSON file is empty:',File),fail
26 ; add_error(json_parse_file,'Could not parse JSON file:',File),fail
27 ).
28
29 process_option(strings_as_atoms(X)) :- !,retractall(convert_strings_to_atoms(_)), assertz(convert_strings_to_atoms(X)).
30 process_option(position_infos(X)) :- !,retractall(add_position_infos(_)), assertz(add_position_infos(X)).
31 process_option(O) :- add_warning(json_parser,'Unrecognized option:',O).
32
33
34 read_file(Filename,Codes) :-
35 open(Filename,read,S,[encoding(utf8)]),
36 read_codes(S,Codes),
37 close(S).
38 read_codes(S,Codes) :-
39 get_code(S,Code),
40 ( Code < 0 ->
41 Codes = []
42 ;
43 Codes = [Code|Rest],
44 read_codes(S,Rest)).
45
46 % --------------
47 :- dynamic convert_strings_to_atoms/1, add_position_infos/1.
48 convert_strings_to_atoms(true). % if true compatible with SICStus library; otherwise string(Codes) used
49 add_position_infos(false). % if true we add FromLineNr-ToLineNr as extra argument to pairs (=/3 instead of =/2)
50
51 json_parse( Chars, Json ) :-
52 json_parse( Chars, Json, [] ).
53
54 json_parse( Chars, Json, _Options ) :-
55 reset_line_nr,
56 json(Json, Chars, _).
57
58 json(Json) --> ws,!,json(Json).
59 json(Json) -->
60 json1(Json),
61 !,
62 spaces.
63 json(Json) --> {var(Json)},check_string("[{"""), {fail}. % generate error message
64 %json(Json) --> {ground(Json)}, json(JSon2), {format('### Mismatch:~n ~w~n~w~n',[Json,Json2]),fail}.
65
66 % a version that does not print an error message
67 try_json(Json) --> ws,!,try_json(Json).
68 try_json(Json) -->
69 json1(Json),
70 !,
71 spaces.
72
73 json1(null) --> "null",!.
74 json1(true) --> "true",!.
75 json1(false) --> "false",!.
76
77 json1(Number) --> number(Number),!.
78
79 json1(String) -->
80 json_start_string_quote(EndQuote),!,
81 %{print_info(start_string)},
82 string2(Codes,EndQuote),
83 {convert_strings_to_atoms(true) -> atom_codes(String,Codes) ; String=string(Codes)}.
84 json1(Array) -->
85 "[",!,
86 %{print_info(start_array)},
87 array(Array),
88 check_string("]").
89 json1(json(Pairs)) -->
90 "{",!,
91 %{print_info(start_object)},
92 spaces,
93 pairs(Pairs),
94 spaces,
95 check_string("}").
96
97 number(Number) -->
98 nm_token(NmCodes),
99 { number_codes(Number, NmCodes) }.
100
101 nm_token([H|T]) -->
102 [H],
103 { minus(H);digit_table(H) },
104 nm_token1(T).
105
106 nm_token1([0'\x2E\|T]) -->
107 ".",!,
108 nm_frac(T).
109
110 nm_token1([H|T]) -->
111 [H],
112 { digit_table(H) }, !,
113 nm_token1(T).
114
115 nm_token1([]) --> [].
116
117
118 nm_frac([0'\x45\,H|T]) -->
119 ("e";"E"),!,
120 [H], {minus(H);plus(H)},
121 nm_exp(T).
122
123 nm_frac([H|T]) -->
124 [H],
125 { digit_table(H) }, !,
126 nm_frac(T).
127
128 nm_frac([]) --> [].
129
130 nm_exp([H|T]) -->
131 [H],
132 { digit_table(H) }, !,
133 nm_exp1(T).
134
135 nm_exp1([H|T]) -->
136 [H],
137 { digit_table(H) }, !,
138 nm_exp1(T).
139
140 nm_exp1([]) --> [].
141
142 % detect valid start string quote
143 json_start_string_quote(0'\x22\) --> """",!. % regular JSON string syntax
144 %json_start_string_quote(39) --> "'". % possible ProB JSON extension, so that we do not have to quote B strings in formulas
145
146 % regular JSON string between double quotes
147 string(X) --> string2(X,0'\x22\).
148
149 string2([],EndQuote) --> [EndQuote],
150 !.
151 string2([EscapedChar|T],EndQuote) -->
152 [0'\x5C\],!, % \
153 escape_char(EscapedChar),
154 string2(T,EndQuote).
155 string2([10|T],EndQuote) --> [10],
156 !,
157 %{generate_json_error("string content",10),fail}, % comment in to disallow newlines
158 {inc_line_nr},
159 string2(T,EndQuote).
160 % TODO: check for other illegal characters like tab? ...
161 string2([H|T],EndQuote) -->
162 [H],
163 string2(T,EndQuote).
164
165 escape_char( 0'\x22\ ) --> [0'\x22\]. %" 34 decimal
166 escape_char( 0'\x5C\ ) --> [0'\x5C\]. %\
167 escape_char( 0'\x2F\ ) --> [0'\x2F\]. %/
168 escape_char( 0'\x08\ ) --> [0'\x62\]. %b
169 escape_char( 0'\x0C\ ) --> [0'\x66\]. %f
170 escape_char( 0'\x0A\ ) --> [0'\x6E\]. %n
171 escape_char( 0'\x0D\ ) --> [0'\x72\]. %r
172 escape_char( 0'\x09\ ) --> [0'\x74\]. %t
173
174 escape_char( Code ) -->
175 "u",
176 hex_digit_char( H1 ),
177 hex_digit_char( H2 ),
178 hex_digit_char( H3 ),
179 hex_digit_char( H4 ),
180 { Code is (((H1 << 4 + H2) << 4 + H3) << 4 + H4) }.
181
182 hex_digit_char( 0 ) --> "0".
183 hex_digit_char( 1 ) --> "1".
184 hex_digit_char( 2 ) --> "2".
185 hex_digit_char( 3 ) --> "3".
186 hex_digit_char( 4 ) --> "4".
187 hex_digit_char( 5 ) --> "5".
188 hex_digit_char( 6 ) --> "6".
189 hex_digit_char( 7 ) --> "7".
190 hex_digit_char( 8 ) --> "8".
191 hex_digit_char( 9 ) --> "9".
192 hex_digit_char( 10 ) --> "A".
193 hex_digit_char( 11 ) --> "B".
194 hex_digit_char( 12 ) --> "C".
195 hex_digit_char( 13 ) --> "D".
196 hex_digit_char( 14 ) --> "E".
197 hex_digit_char( 15 ) --> "F".
198 hex_digit_char( 10 ) --> "a".
199 hex_digit_char( 11 ) --> "b".
200 hex_digit_char( 12 ) --> "c".
201 hex_digit_char( 13 ) --> "d".
202 hex_digit_char( 14 ) --> "e".
203 hex_digit_char( 15 ) --> "f".
204
205 array([H|T]) -->
206 try_json(H), !,{print_info(first_array)},
207 array1(T).
208 array([]) --> [], {print_info(end_array)}.
209
210 array1([H|T]) -->
211 ",", !,
212 json(H),!, {print_info(next_array)},
213 array1(T).
214 array1([]) --> [], {print_info(empty_array)}.
215
216 pair(ResPair,Optional) -->
217 spaces,
218 ({Optional=optional} -> opt_pair_name(Codes) ; pair_name(Codes)),
219 {add_position_infos(false) -> ResPair = '='(Name, Value)
220 ; cur_line(FromLineNr), ResPair = '='(Name, Value,FromLineNr-ToLineNr)},
221 % TODO: usually we want the position info of the value, ideally with start column info
222 check_string(":"),
223 { atom_codes(Name, Codes) },
224 {print_info(pair_value_for(Name))},
225 json(Value),
226 {add_position_infos(false) -> true ; cur_line(ToLineNr)},
227 {print_info(found_value_vor(Name,Value,FromLineNr-ToLineNr))}.
228
229 opt_pair_name(Name) --> """", string(Name), spaces.
230 pair_name(Name) --> check_string(""""), string(Name), spaces.
231
232
233 pairs(List) --> ws,!,pairs(List).
234 pairs([H|T]) -->
235 pair(H,optional), !,
236 pairs1(T).
237 pairs([]) --> [], {print_info(empty_pairs)}.
238
239 pairs1(List) --> ws,!,pairs1(List).
240 pairs1([H|T]) -->
241 ",", !,
242 pair(H,required),!,
243 pairs1(T).
244 pairs1([]) --> [], {print_info(end_pairs)}.
245
246 :- use_module(library(lists),[append/2]).
247 % check if the next char matches a given character or one of the given chars
248 check_string(List) --> ws,!,check_string(List).
249 check_string(List) --> [Char], {member(Char,List)},!.
250 check_string(ExpectedString, [Char|_],_) :-
251 generate_json_error(ExpectedString,Char),
252 fail.
253
254 generate_json_error(ExpectedString,Char) :-
255 cur_line(LineNr),
256 number_codes(LineNr,LC),
257 % TO DO: if String has more than one char: write one of:
258 convert_char(Char,CStr),
259 append(["JSON error on line ",LC,": expecting ",ExpectedString," obtained ",CStr],Codes),
260 atom_codes(Msg,Codes),
261 %format(user_error,'! Error on line ~w: expecting ~s obtained ~s~n',[LineNr,String,[Char]]),
262 add_error(json_parser,Msg).
263
264 convert_char(8,Res) :- !, Res="tabulation character".
265 convert_char(10,Res) :- !, Res="newline".
266 convert_char(13,Res) :- !, Res="return character".
267 convert_char(Char,[Char]).
268
269
270 minus( 0'- ).
271 plus( 0'+ ).
272 digit_table( 0'0 ).
273 digit_table( 0'1 ).
274 digit_table( 0'2 ).
275 digit_table( 0'3 ).
276 digit_table( 0'4 ).
277 digit_table( 0'5 ).
278 digit_table( 0'6 ).
279 digit_table( 0'7 ).
280 digit_table( 0'8 ).
281 digit_table( 0'9 ).
282
283
284 spaces --> ws,!,spaces.
285 spaces --> [].
286
287 % whitespace
288 ws --> new_line,!.
289 ws --> " "; "\t" ; [10] ; [13].
290
291 new_line --> "\n",{inc_line_nr}.
292
293 % use a fact to keep track of line numbers
294 :- dynamic cur_line/1.
295 cur_line(1).
296 inc_line_nr :- retract(cur_line(N)), N1 is N+1, assertz(cur_line(N1)).
297 reset_line_nr :- retract(cur_line(_)), assertz(cur_line(1)).
298
299 print_info(_) :- !. % comment out to view debug info
300 print_info(Error) :- print_error(Error).
301 print_error(Error) :-
302 cur_line(LineNr),
303 nl(user_error),
304 write(user_error,'! Line: '),write_term(user_error,LineNr,[]),nl(user_error),
305 (var(Error) -> print_message(error,'_')
306 ; write(user_error,'! '),write_term(user_error,Error,[max_depth(20),numbervars(true)]),nl(user_error)).
307 %% flush_output(user_error), %%.