1 | :- module( json_parser, [ json_parse/2, json_parse/3, | |
2 | json_parse_file/2, json_parse_file/3 | |
3 | ] ). | |
4 | ||
5 | :- set_prolog_flag(double_quotes, codes). | |
6 | ||
7 | % based on https://github.com/yoroto/pl-json | |
8 | % have added line counting and error reporting, reading directly from file | |
9 | % fixed a few issues with whitespace and avoid backtracking | |
10 | % return strings using string(.) wrapper to avoid ambiguity with arrays of integers | |
11 | ||
12 | :- use_module(probsrc(error_manager),[add_error/3, add_error/2, add_warning/3]). | |
13 | :- use_module(library(lists)). | |
14 | ||
15 | json_parse_file(File, Json) :- | |
16 | json_parse_file(File, [strings_as_atoms(true),position_infos(false)], Json). | |
17 | ||
18 | % parse a JSON file | |
19 | json_parse_file(File, Options, Json) :- | |
20 | maplist(process_option,Options), | |
21 | catch(read_file(File,Codes), error(existence_error(_,_),_E), | |
22 | (add_error(json_parse_file,'JSON file does not exist:',File),fail)), | |
23 | %format('Read:~n~s~n',[Codes]), | |
24 | (json_parse(Codes,Json) -> true | |
25 | ; Codes = [] -> add_error(json_parse_file,'JSON file is empty:',File),fail | |
26 | ; add_error(json_parse_file,'Could not parse JSON file:',File),fail | |
27 | ). | |
28 | ||
29 | process_option(strings_as_atoms(X)) :- !,retractall(convert_strings_to_atoms(_)), assertz(convert_strings_to_atoms(X)). | |
30 | process_option(position_infos(X)) :- !,retractall(add_position_infos(_)), assertz(add_position_infos(X)). | |
31 | process_option(O) :- add_warning(json_parser,'Unrecognized option:',O). | |
32 | ||
33 | ||
34 | read_file(Filename,Codes) :- | |
35 | open(Filename,read,S,[encoding(utf8)]), | |
36 | read_codes(S,Codes), | |
37 | close(S). | |
38 | read_codes(S,Codes) :- | |
39 | get_code(S,Code), | |
40 | ( Code < 0 -> | |
41 | Codes = [] | |
42 | ; | |
43 | Codes = [Code|Rest], | |
44 | read_codes(S,Rest)). | |
45 | ||
46 | % -------------- | |
47 | :- dynamic convert_strings_to_atoms/1, add_position_infos/1. | |
48 | convert_strings_to_atoms(true). % if true compatible with SICStus library; otherwise string(Codes) used | |
49 | add_position_infos(false). % if true we add FromLineNr-ToLineNr as extra argument to pairs (=/3 instead of =/2) | |
50 | ||
51 | json_parse( Chars, Json ) :- | |
52 | json_parse( Chars, Json, [] ). | |
53 | ||
54 | json_parse( Chars, Json, _Options ) :- | |
55 | reset_line_nr, | |
56 | json(Json, Chars, _). | |
57 | ||
58 | json(Json) --> ws,!,json(Json). | |
59 | json(Json) --> | |
60 | json1(Json), | |
61 | !, | |
62 | spaces. | |
63 | json(Json) --> {var(Json)},check_string("[{"""), {fail}. % generate error message | |
64 | %json(Json) --> {ground(Json)}, json(JSon2), {format('### Mismatch:~n ~w~n~w~n',[Json,Json2]),fail}. | |
65 | ||
66 | % a version that does not print an error message | |
67 | try_json(Json) --> ws,!,try_json(Json). | |
68 | try_json(Json) --> | |
69 | json1(Json), | |
70 | !, | |
71 | spaces. | |
72 | ||
73 | json1(null) --> "null",!. | |
74 | json1(true) --> "true",!. | |
75 | json1(false) --> "false",!. | |
76 | ||
77 | json1(Number) --> number(Number),!. | |
78 | ||
79 | json1(String) --> | |
80 | json_start_string_quote(EndQuote),!, | |
81 | %{print_info(start_string)}, | |
82 | string2(Codes,EndQuote), | |
83 | {convert_strings_to_atoms(true) -> atom_codes(String,Codes) ; String=string(Codes)}. | |
84 | json1(Array) --> | |
85 | "[",!, | |
86 | %{print_info(start_array)}, | |
87 | array(Array), | |
88 | check_string("]"). | |
89 | json1(json(Pairs)) --> | |
90 | "{",!, | |
91 | %{print_info(start_object)}, | |
92 | spaces, | |
93 | pairs(Pairs), | |
94 | spaces, | |
95 | check_string("}"). | |
96 | ||
97 | number(Number) --> | |
98 | nm_token(NmCodes), | |
99 | { number_codes(Number, NmCodes) }. | |
100 | ||
101 | nm_token([H|T]) --> | |
102 | [H], | |
103 | { minus(H);digit_table(H) }, | |
104 | nm_token1(T). | |
105 | ||
106 | nm_token1([0'\x2E\|T]) --> | |
107 | ".",!, | |
108 | nm_frac(T). | |
109 | ||
110 | nm_token1([H|T]) --> | |
111 | [H], | |
112 | { digit_table(H) }, !, | |
113 | nm_token1(T). | |
114 | ||
115 | nm_token1([]) --> []. | |
116 | ||
117 | ||
118 | nm_frac([0'\x45\,H|T]) --> | |
119 | ("e";"E"),!, | |
120 | [H], {minus(H);plus(H)}, | |
121 | nm_exp(T). | |
122 | ||
123 | nm_frac([H|T]) --> | |
124 | [H], | |
125 | { digit_table(H) }, !, | |
126 | nm_frac(T). | |
127 | ||
128 | nm_frac([]) --> []. | |
129 | ||
130 | nm_exp([H|T]) --> | |
131 | [H], | |
132 | { digit_table(H) }, !, | |
133 | nm_exp1(T). | |
134 | ||
135 | nm_exp1([H|T]) --> | |
136 | [H], | |
137 | { digit_table(H) }, !, | |
138 | nm_exp1(T). | |
139 | ||
140 | nm_exp1([]) --> []. | |
141 | ||
142 | % detect valid start string quote | |
143 | json_start_string_quote(0'\x22\) --> """",!. % regular JSON string syntax | |
144 | %json_start_string_quote(39) --> "'". % possible ProB JSON extension, so that we do not have to quote B strings in formulas | |
145 | ||
146 | % regular JSON string between double quotes | |
147 | string(X) --> string2(X,0'\x22\). | |
148 | ||
149 | string2([],EndQuote) --> [EndQuote], | |
150 | !. | |
151 | string2([EscapedChar|T],EndQuote) --> | |
152 | [0'\x5C\],!, % \ | |
153 | escape_char(EscapedChar), | |
154 | string2(T,EndQuote). | |
155 | string2([10|T],EndQuote) --> [10], | |
156 | !, | |
157 | %{generate_json_error("string content",10),fail}, % comment in to disallow newlines | |
158 | {inc_line_nr}, | |
159 | string2(T,EndQuote). | |
160 | % TODO: check for other illegal characters like tab? ... | |
161 | string2([H|T],EndQuote) --> | |
162 | [H], | |
163 | string2(T,EndQuote). | |
164 | ||
165 | escape_char( 0'\x22\ ) --> [0'\x22\]. %" 34 decimal | |
166 | escape_char( 0'\x5C\ ) --> [0'\x5C\]. %\ | |
167 | escape_char( 0'\x2F\ ) --> [0'\x2F\]. %/ | |
168 | escape_char( 0'\x08\ ) --> [0'\x62\]. %b | |
169 | escape_char( 0'\x0C\ ) --> [0'\x66\]. %f | |
170 | escape_char( 0'\x0A\ ) --> [0'\x6E\]. %n | |
171 | escape_char( 0'\x0D\ ) --> [0'\x72\]. %r | |
172 | escape_char( 0'\x09\ ) --> [0'\x74\]. %t | |
173 | ||
174 | escape_char( Code ) --> | |
175 | "u", | |
176 | hex_digit_char( H1 ), | |
177 | hex_digit_char( H2 ), | |
178 | hex_digit_char( H3 ), | |
179 | hex_digit_char( H4 ), | |
180 | { Code is (((H1 << 4 + H2) << 4 + H3) << 4 + H4) }. | |
181 | ||
182 | hex_digit_char( 0 ) --> "0". | |
183 | hex_digit_char( 1 ) --> "1". | |
184 | hex_digit_char( 2 ) --> "2". | |
185 | hex_digit_char( 3 ) --> "3". | |
186 | hex_digit_char( 4 ) --> "4". | |
187 | hex_digit_char( 5 ) --> "5". | |
188 | hex_digit_char( 6 ) --> "6". | |
189 | hex_digit_char( 7 ) --> "7". | |
190 | hex_digit_char( 8 ) --> "8". | |
191 | hex_digit_char( 9 ) --> "9". | |
192 | hex_digit_char( 10 ) --> "A". | |
193 | hex_digit_char( 11 ) --> "B". | |
194 | hex_digit_char( 12 ) --> "C". | |
195 | hex_digit_char( 13 ) --> "D". | |
196 | hex_digit_char( 14 ) --> "E". | |
197 | hex_digit_char( 15 ) --> "F". | |
198 | hex_digit_char( 10 ) --> "a". | |
199 | hex_digit_char( 11 ) --> "b". | |
200 | hex_digit_char( 12 ) --> "c". | |
201 | hex_digit_char( 13 ) --> "d". | |
202 | hex_digit_char( 14 ) --> "e". | |
203 | hex_digit_char( 15 ) --> "f". | |
204 | ||
205 | array([H|T]) --> | |
206 | try_json(H), !,{print_info(first_array)}, | |
207 | array1(T). | |
208 | array([]) --> [], {print_info(end_array)}. | |
209 | ||
210 | array1([H|T]) --> | |
211 | ",", !, | |
212 | json(H),!, {print_info(next_array)}, | |
213 | array1(T). | |
214 | array1([]) --> [], {print_info(empty_array)}. | |
215 | ||
216 | pair(ResPair,Optional) --> | |
217 | spaces, | |
218 | ({Optional=optional} -> opt_pair_name(Codes) ; pair_name(Codes)), | |
219 | {add_position_infos(false) -> ResPair = '='(Name, Value) | |
220 | ; cur_line(FromLineNr), ResPair = '='(Name, Value,FromLineNr-ToLineNr)}, | |
221 | % TODO: usually we want the position info of the value, ideally with start column info | |
222 | check_string(":"), | |
223 | { atom_codes(Name, Codes) }, | |
224 | {print_info(pair_value_for(Name))}, | |
225 | json(Value), | |
226 | {add_position_infos(false) -> true ; cur_line(ToLineNr)}, | |
227 | {print_info(found_value_vor(Name,Value,FromLineNr-ToLineNr))}. | |
228 | ||
229 | opt_pair_name(Name) --> """", string(Name), spaces. | |
230 | pair_name(Name) --> check_string(""""), string(Name), spaces. | |
231 | ||
232 | ||
233 | pairs(List) --> ws,!,pairs(List). | |
234 | pairs([H|T]) --> | |
235 | pair(H,optional), !, | |
236 | pairs1(T). | |
237 | pairs([]) --> [], {print_info(empty_pairs)}. | |
238 | ||
239 | pairs1(List) --> ws,!,pairs1(List). | |
240 | pairs1([H|T]) --> | |
241 | ",", !, | |
242 | pair(H,required),!, | |
243 | pairs1(T). | |
244 | pairs1([]) --> [], {print_info(end_pairs)}. | |
245 | ||
246 | :- use_module(library(lists),[append/2]). | |
247 | % check if the next char matches a given character or one of the given chars | |
248 | check_string(List) --> ws,!,check_string(List). | |
249 | check_string(List) --> [Char], {member(Char,List)},!. | |
250 | check_string(ExpectedString, [Char|_],_) :- | |
251 | generate_json_error(ExpectedString,Char), | |
252 | fail. | |
253 | ||
254 | generate_json_error(ExpectedString,Char) :- | |
255 | cur_line(LineNr), | |
256 | number_codes(LineNr,LC), | |
257 | % TO DO: if String has more than one char: write one of: | |
258 | convert_char(Char,CStr), | |
259 | append(["JSON error on line ",LC,": expecting ",ExpectedString," obtained ",CStr],Codes), | |
260 | atom_codes(Msg,Codes), | |
261 | %format(user_error,'! Error on line ~w: expecting ~s obtained ~s~n',[LineNr,String,[Char]]), | |
262 | add_error(json_parser,Msg). | |
263 | ||
264 | convert_char(8,Res) :- !, Res="tabulation character". | |
265 | convert_char(10,Res) :- !, Res="newline". | |
266 | convert_char(13,Res) :- !, Res="return character". | |
267 | convert_char(Char,[Char]). | |
268 | ||
269 | ||
270 | minus( 0'- ). | |
271 | plus( 0'+ ). | |
272 | digit_table( 0'0 ). | |
273 | digit_table( 0'1 ). | |
274 | digit_table( 0'2 ). | |
275 | digit_table( 0'3 ). | |
276 | digit_table( 0'4 ). | |
277 | digit_table( 0'5 ). | |
278 | digit_table( 0'6 ). | |
279 | digit_table( 0'7 ). | |
280 | digit_table( 0'8 ). | |
281 | digit_table( 0'9 ). | |
282 | ||
283 | ||
284 | spaces --> ws,!,spaces. | |
285 | spaces --> []. | |
286 | ||
287 | % whitespace | |
288 | ws --> new_line,!. | |
289 | ws --> " "; "\t" ; [10] ; [13]. | |
290 | ||
291 | new_line --> "\n",{inc_line_nr}. | |
292 | ||
293 | % use a fact to keep track of line numbers | |
294 | :- dynamic cur_line/1. | |
295 | cur_line(1). | |
296 | inc_line_nr :- retract(cur_line(N)), N1 is N+1, assertz(cur_line(N1)). | |
297 | reset_line_nr :- retract(cur_line(_)), assertz(cur_line(1)). | |
298 | ||
299 | print_info(_) :- !. % comment out to view debug info | |
300 | print_info(Error) :- print_error(Error). | |
301 | print_error(Error) :- | |
302 | cur_line(LineNr), | |
303 | nl(user_error), | |
304 | write(user_error,'! Line: '),write_term(user_error,LineNr,[]),nl(user_error), | |
305 | (var(Error) -> print_message(error,'_') | |
306 | ; write(user_error,'! '),write_term(user_error,Error,[max_depth(20),numbervars(true)]),nl(user_error)). | |
307 | %% flush_output(user_error), %%. |