1 | /* |
---|
2 | * jsonsl |
---|
3 | * https://github.com/mnunberg/jsonsl |
---|
4 | * |
---|
5 | * Copyright (c) 2012 M. Nunberg, mnunberg@haskalah.org |
---|
6 | * |
---|
7 | * Permission is hereby granted, free of charge, to any person obtaining |
---|
8 | * a copy of this software and associated documentation files (the |
---|
9 | * "Software"), to deal in the Software without restriction, including |
---|
10 | * without limitation the rights to use, copy, modify, merge, publish, |
---|
11 | * distribute, sublicense, and/or sell copies of the Software, and to |
---|
12 | * permit persons to whom the Software is furnished to do so, subject to |
---|
13 | * the following conditions: |
---|
14 | * |
---|
15 | * The above copyright notice and this permission notice shall be |
---|
16 | * included in all copies or substantial portions of the Software. |
---|
17 | * |
---|
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
---|
19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
---|
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
---|
21 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
---|
22 | * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
---|
23 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
---|
24 | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
---|
25 | */ |
---|
26 | |
---|
27 | /** |
---|
28 | * JSON Simple/Stacked/Stateful Lexer. |
---|
29 | * - Does not buffer data |
---|
30 | * - Maintains state |
---|
31 | * - Callback oriented |
---|
32 | * - Lightweight and fast. One source file and one header file |
---|
33 | */ |
---|
34 | |
---|
35 | #ifndef JSONSL_H_ |
---|
36 | #define JSONSL_H_ |
---|
37 | |
---|
38 | #include <stdio.h> |
---|
39 | #include <stdlib.h> |
---|
40 | #include <stddef.h> |
---|
41 | #include <string.h> |
---|
42 | #include <sys/types.h> |
---|
43 | #include <wchar.h> |
---|
44 | |
---|
45 | #ifdef __cplusplus |
---|
46 | extern "C" { |
---|
47 | #endif /* __cplusplus */ |
---|
48 | |
---|
49 | #ifdef JSONSL_USE_WCHAR |
---|
50 | typedef jsonsl_char_t wchar_t; |
---|
51 | typedef jsonsl_uchar_t unsigned wchar_t; |
---|
52 | #else |
---|
53 | typedef char jsonsl_char_t; |
---|
54 | typedef unsigned char jsonsl_uchar_t; |
---|
55 | #endif /* JSONSL_USE_WCHAR */ |
---|
56 | |
---|
57 | /* Stolen from http-parser.h, and possibly others */ |
---|
58 | #if defined(_WIN32) && !defined(__MINGW32__) |
---|
59 | typedef __int8 int8_t; |
---|
60 | typedef unsigned __int8 uint8_t; |
---|
61 | typedef __int16 int16_t; |
---|
62 | typedef unsigned __int16 uint16_t; |
---|
63 | typedef __int32 int32_t; |
---|
64 | typedef unsigned __int32 uint32_t; |
---|
65 | typedef __int64 int64_t; |
---|
66 | typedef unsigned __int64 uint64_t; |
---|
67 | #if !defined(_MSC_VER) || _MSC_VER<1400 |
---|
68 | typedef unsigned int size_t; |
---|
69 | typedef int ssize_t; |
---|
70 | #endif |
---|
71 | #else |
---|
72 | #include <stdint.h> |
---|
73 | #endif |
---|
74 | |
---|
75 | |
---|
76 | #if (!defined(JSONSL_STATE_GENERIC)) && (!defined(JSONSL_STATE_USER_FIELDS)) |
---|
77 | #warning "JSONSL_STATE_USER_FIELDS not defined. Define this for extra structure fields" |
---|
78 | #warning "or define JSONSL_STATE_GENERIC" |
---|
79 | #define JSONSL_STATE_GENERIC |
---|
80 | #endif /* !defined JSONSL_STATE_GENERIC */ |
---|
81 | |
---|
82 | #ifdef JSONSL_STATE_GENERIC |
---|
83 | #define JSONSL_STATE_USER_FIELDS |
---|
84 | #endif /* JSONSL_STATE_GENERIC */ |
---|
85 | |
---|
86 | #ifndef JSONSL_API |
---|
87 | #define JSONSL_API |
---|
88 | #endif /* JSONSL_API */ |
---|
89 | |
---|
90 | #define JSONSL_MAX_LEVELS 512 |
---|
91 | |
---|
92 | struct jsonsl_st; |
---|
93 | typedef struct jsonsl_st *jsonsl_t; |
---|
94 | |
---|
95 | typedef struct jsonsl_jpr_st* jsonsl_jpr_t; |
---|
96 | |
---|
97 | /** |
---|
98 | * This flag is true when AND'd against a type whose value |
---|
99 | * must be in "quoutes" i.e. T_HKEY and T_STRING |
---|
100 | */ |
---|
101 | #define JSONSL_Tf_STRINGY 0xffff00 |
---|
102 | |
---|
103 | /** |
---|
104 | * Constant representing the special JSON types. |
---|
105 | * The values are special and aid in speed (the OBJECT and LIST |
---|
106 | * values are the char literals of their openings). |
---|
107 | * |
---|
108 | * Their actual value is a character which attempts to resemble |
---|
109 | * some mnemonic reference to the actual type. |
---|
110 | * |
---|
111 | * If new types are added, they must fit into the ASCII printable |
---|
112 | * range (so they should be AND'd with 0x7f and yield something |
---|
113 | * meaningful) |
---|
114 | */ |
---|
115 | #define JSONSL_XTYPE \ |
---|
116 | X(STRING, '"'|JSONSL_Tf_STRINGY) \ |
---|
117 | X(HKEY, '#'|JSONSL_Tf_STRINGY) \ |
---|
118 | X(OBJECT, '{') \ |
---|
119 | X(LIST, '[') \ |
---|
120 | X(SPECIAL, '^') \ |
---|
121 | X(UESCAPE, 'u') |
---|
122 | typedef enum { |
---|
123 | #define X(o, c) \ |
---|
124 | JSONSL_T_##o = c, |
---|
125 | JSONSL_XTYPE |
---|
126 | JSONSL_T_UNKNOWN = '?', |
---|
127 | /* Abstract 'root' object */ |
---|
128 | JSONSL_T_ROOT = 0 |
---|
129 | #undef X |
---|
130 | } jsonsl_type_t; |
---|
131 | |
---|
132 | /** |
---|
133 | * Subtypes for T_SPECIAL. We define them as flags |
---|
134 | * because more than one type can be applied to a |
---|
135 | * given object. |
---|
136 | */ |
---|
137 | |
---|
138 | #define JSONSL_XSPECIAL \ |
---|
139 | X(NONE, 0) \ |
---|
140 | X(SIGNED, 1<<0) \ |
---|
141 | X(UNSIGNED, 1<<1) \ |
---|
142 | X(TRUE, 1<<2) \ |
---|
143 | X(FALSE, 1<<3) \ |
---|
144 | X(NULL, 1<<4) \ |
---|
145 | X(FLOAT, 1<<5) \ |
---|
146 | X(EXPONENT, 1<<6) \ |
---|
147 | X(NONASCII, 1<<7) |
---|
148 | typedef enum { |
---|
149 | #define X(o,b) \ |
---|
150 | JSONSL_SPECIALf_##o = b, |
---|
151 | JSONSL_XSPECIAL |
---|
152 | #undef X |
---|
153 | /* Handy flags for checking */ |
---|
154 | JSONSL_SPECIALf_UNKNOWN = 1 << 8, |
---|
155 | JSONSL_SPECIALf_NUMERIC = (JSONSL_SPECIALf_SIGNED|JSONSL_SPECIALf_UNSIGNED), |
---|
156 | JSONSL_SPECIALf_BOOLEAN = (JSONSL_SPECIALf_TRUE|JSONSL_SPECIALf_FALSE), |
---|
157 | /* For non-simple numeric types */ |
---|
158 | JSONSL_SPECIALf_NUMNOINT = (JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT) |
---|
159 | } jsonsl_special_t; |
---|
160 | |
---|
161 | |
---|
162 | /** |
---|
163 | * These are the various types of stack (or other) events |
---|
164 | * which will trigger a callback. |
---|
165 | * Like the type constants, this are also mnemonic |
---|
166 | */ |
---|
167 | #define JSONSL_XACTION \ |
---|
168 | X(PUSH, '+') \ |
---|
169 | X(POP, '-') \ |
---|
170 | X(UESCAPE, 'U') \ |
---|
171 | X(ERROR, '!') |
---|
172 | typedef enum { |
---|
173 | #define X(a,c) \ |
---|
174 | JSONSL_ACTION_##a = c, |
---|
175 | JSONSL_XACTION |
---|
176 | JSONSL_ACTION_UNKNOWN = '?' |
---|
177 | #undef X |
---|
178 | } jsonsl_action_t; |
---|
179 | |
---|
180 | |
---|
181 | /** |
---|
182 | * Various errors which may be thrown while parsing JSON |
---|
183 | */ |
---|
184 | #define JSONSL_XERR \ |
---|
185 | X(SUCCESS) \ |
---|
186 | /* Trailing garbage characters */ \ |
---|
187 | X(GARBAGE_TRAILING) \ |
---|
188 | /* We were expecting a 'special' (numeric, true, false, null) */ \ |
---|
189 | X(SPECIAL_EXPECTED) \ |
---|
190 | /* Found a stray token */ \ |
---|
191 | X(STRAY_TOKEN) \ |
---|
192 | /* We were expecting a token before this one */ \ |
---|
193 | X(MISSING_TOKEN) \ |
---|
194 | /* Cannot insert because the container is not ready */ \ |
---|
195 | X(CANT_INSERT) \ |
---|
196 | /* Found a '\' outside a string */ \ |
---|
197 | X(ESCAPE_OUTSIDE_STRING) \ |
---|
198 | /* Found a ':' outside of a hash */ \ |
---|
199 | X(KEY_OUTSIDE_OBJECT) \ |
---|
200 | /* found a string outside of a container */ \ |
---|
201 | X(STRING_OUTSIDE_CONTAINER) \ |
---|
202 | /* Found a null byte in middle of string */ \ |
---|
203 | X(FOUND_NULL_BYTE) \ |
---|
204 | /* Current level exceeds limit specified in constructor */ \ |
---|
205 | X(LEVELS_EXCEEDED) \ |
---|
206 | /* Got a } as a result of an opening [ or vice versa */ \ |
---|
207 | X(BRACKET_MISMATCH) \ |
---|
208 | /* We expected a key, but got something else instead */ \ |
---|
209 | X(HKEY_EXPECTED) \ |
---|
210 | /* We got an illegal control character (bad whitespace or something) */ \ |
---|
211 | X(WEIRD_WHITESPACE) \ |
---|
212 | /* Found a \u-escape, but there were less than 4 following hex digits */ \ |
---|
213 | X(UESCAPE_TOOSHORT) \ |
---|
214 | /* Invalid two-character escape */ \ |
---|
215 | X(ESCAPE_INVALID) \ |
---|
216 | /* Trailing comma */ \ |
---|
217 | X(TRAILING_COMMA) \ |
---|
218 | /* An invalid number was passed in a numeric field */ \ |
---|
219 | X(INVALID_NUMBER) \ |
---|
220 | /* The following are for JPR Stuff */ \ |
---|
221 | \ |
---|
222 | /* Found a literal '%' but it was only followed by a single valid hex digit */ \ |
---|
223 | X(PERCENT_BADHEX) \ |
---|
224 | /* jsonpointer URI is malformed '/' */ \ |
---|
225 | X(JPR_BADPATH) \ |
---|
226 | /* Duplicate slash */ \ |
---|
227 | X(JPR_DUPSLASH) \ |
---|
228 | /* No leading root */ \ |
---|
229 | X(JPR_NOROOT) |
---|
230 | |
---|
231 | typedef enum { |
---|
232 | #define X(e) \ |
---|
233 | JSONSL_ERROR_##e, |
---|
234 | JSONSL_XERR |
---|
235 | #undef X |
---|
236 | JSONSL_ERROR_GENERIC |
---|
237 | } jsonsl_error_t; |
---|
238 | |
---|
239 | |
---|
240 | /** |
---|
241 | * A state is a single level of the stack. |
---|
242 | * Non-private data (i.e. the 'data' field, see the STATE_GENERIC section) |
---|
243 | * will remain in tact until the item is popped. |
---|
244 | * |
---|
245 | * As a result, it means a parent state object may be accessed from a child |
---|
246 | * object, (the parents fields will all be valid). This allows a user to create |
---|
247 | * an ad-hoc hierarchy on top of the JSON one. |
---|
248 | * |
---|
249 | */ |
---|
250 | struct jsonsl_state_st { |
---|
251 | /** |
---|
252 | * The JSON object type |
---|
253 | */ |
---|
254 | jsonsl_type_t type; |
---|
255 | |
---|
256 | /** If this element is special, then its extended type is here */ |
---|
257 | jsonsl_special_t special_flags; |
---|
258 | |
---|
259 | /** |
---|
260 | * Position offset variables. These are relative to jsn->pos. |
---|
261 | * pos_begin is the position at which this state was first pushed |
---|
262 | * to the stack. pos_cur is the position at which return last controlled |
---|
263 | * to this state (i.e. an immediate child state was popped from it). |
---|
264 | */ |
---|
265 | |
---|
266 | /** |
---|
267 | * The position at which this state was first PUSHed |
---|
268 | */ |
---|
269 | size_t pos_begin; |
---|
270 | |
---|
271 | /** |
---|
272 | * The position at which any immediate child was last POPped. |
---|
273 | * Note that this field is only set when the item is popped. |
---|
274 | */ |
---|
275 | size_t pos_cur; |
---|
276 | |
---|
277 | |
---|
278 | /** |
---|
279 | * Level of recursion into nesting. This is mainly a convenience |
---|
280 | * variable, as this can technically be deduced from the lexer's |
---|
281 | * level parameter (though the logic is not that simple) |
---|
282 | */ |
---|
283 | unsigned int level; |
---|
284 | |
---|
285 | |
---|
286 | /** |
---|
287 | * how many elements in the object/list. |
---|
288 | * For objects (hashes), an element is either |
---|
289 | * a key or a value. Thus for one complete pair, |
---|
290 | * nelem will be 2. |
---|
291 | * |
---|
292 | * For special types, this will hold the sum of the digits. |
---|
293 | * This only holds true for values which are simple signed/unsigned |
---|
294 | * numbers. Otherwise a special flag is set, and extra handling is not |
---|
295 | * performed. |
---|
296 | */ |
---|
297 | uint64_t nelem; |
---|
298 | |
---|
299 | |
---|
300 | |
---|
301 | /*TODO: merge this and special_flags into a union */ |
---|
302 | |
---|
303 | |
---|
304 | /** |
---|
305 | * Useful for an opening nest, this will prevent a callback from being |
---|
306 | * invoked on this item or any of its children |
---|
307 | */ |
---|
308 | int ignore_callback; |
---|
309 | |
---|
310 | /** |
---|
311 | * Counter which is incremented each time an escape ('\') is encountered. |
---|
312 | */ |
---|
313 | unsigned int nescapes; |
---|
314 | |
---|
315 | /** |
---|
316 | * Put anything you want here. if JSONSL_STATE_USER_FIELDS is here, then |
---|
317 | * the macro expansion happens here. |
---|
318 | * |
---|
319 | * You can use these fields to store hierarchical or 'tagging' information |
---|
320 | * for specific objects. |
---|
321 | * |
---|
322 | * See the documentation above for the lifetime of the state object (i.e. |
---|
323 | * if the private data points to allocated memory, it should be freed |
---|
324 | * when the object is popped, as the state object will be re-used) |
---|
325 | */ |
---|
326 | #ifndef JSONSL_STATE_GENERIC |
---|
327 | JSONSL_STATE_USER_FIELDS |
---|
328 | #else |
---|
329 | |
---|
330 | /** |
---|
331 | * Otherwise, this is a simple void * pointer for anything you want |
---|
332 | */ |
---|
333 | void *data; |
---|
334 | #endif /* JSONSL_STATE_USER_FIELDS */ |
---|
335 | }; |
---|
336 | |
---|
337 | /* |
---|
338 | * So now we need some special structure for keeping the |
---|
339 | * JPR info in sync. Preferrably all in a single block |
---|
340 | * of memory (there's no need for separate allocations. |
---|
341 | * So we will define a 'table' with the following layout |
---|
342 | * |
---|
343 | * Level nPosbl JPR1_last JPR2_last JPR3_last |
---|
344 | * |
---|
345 | * 0 1 NOMATCH POSSIBLE POSSIBLE |
---|
346 | * 1 0 NOMATCH NOMATCH COMPLETE |
---|
347 | * [ table ends here because no further path is possible] |
---|
348 | * |
---|
349 | * Where the JPR..n corresponds to the number of JPRs |
---|
350 | * requested, and nPosble is a quick flag to determine |
---|
351 | * |
---|
352 | * the number of possibilities. In the future this might |
---|
353 | * be made into a proper 'jump' table, |
---|
354 | * |
---|
355 | * Since we always mark JPRs from the higher levels descending |
---|
356 | * into the lower ones, a prospective child match would first |
---|
357 | * look at the parent table to check the possibilities, and then |
---|
358 | * see which ones were possible.. |
---|
359 | * |
---|
360 | * Thus, the size of this blob would be (and these are all ints here) |
---|
361 | * nLevels * nJPR * 2. |
---|
362 | * |
---|
363 | * the 'Width' of the table would be nJPR*2, and the 'height' would be |
---|
364 | * nlevels |
---|
365 | */ |
---|
366 | |
---|
367 | /** |
---|
368 | * This is called when a stack change ocurs. |
---|
369 | * |
---|
370 | * @param jsn The lexer |
---|
371 | * @param action The type of action, this can be PUSH or POP |
---|
372 | * @param state A pointer to the stack currently affected by the action |
---|
373 | * @param at A pointer to the position of the input buffer which triggered |
---|
374 | * this action. |
---|
375 | */ |
---|
376 | typedef void (*jsonsl_stack_callback)( |
---|
377 | jsonsl_t jsn, |
---|
378 | jsonsl_action_t action, |
---|
379 | struct jsonsl_state_st* state, |
---|
380 | const jsonsl_char_t *at); |
---|
381 | |
---|
382 | |
---|
383 | /** |
---|
384 | * This is called when an error is encountered. |
---|
385 | * Sometimes it's possible to 'erase' characters (by replacing them |
---|
386 | * with whitespace). If you think you have corrected the error, you |
---|
387 | * can return a true value, in which case the parser will backtrack |
---|
388 | * and try again. |
---|
389 | * |
---|
390 | * @param jsn The lexer |
---|
391 | * @param error The error which was thrown |
---|
392 | * @param state the current state |
---|
393 | * @param a pointer to the position of the input buffer which triggered |
---|
394 | * the error. Note that this is not const, this is because you have the |
---|
395 | * possibility of modifying the character in an attempt to correct the |
---|
396 | * error |
---|
397 | * |
---|
398 | * @return zero to bail, nonzero to try again (this only makes sense if |
---|
399 | * the input buffer has been modified by this callback) |
---|
400 | */ |
---|
401 | typedef int (*jsonsl_error_callback)( |
---|
402 | jsonsl_t jsn, |
---|
403 | jsonsl_error_t error, |
---|
404 | struct jsonsl_state_st* state, |
---|
405 | jsonsl_char_t *at); |
---|
406 | |
---|
407 | struct jsonsl_st { |
---|
408 | /** Public, read-only */ |
---|
409 | |
---|
410 | /** This is the current level of the stack */ |
---|
411 | unsigned int level; |
---|
412 | |
---|
413 | /** |
---|
414 | * This is the current position, relative to the beginning |
---|
415 | * of the stream. |
---|
416 | */ |
---|
417 | size_t pos; |
---|
418 | |
---|
419 | /** This is the 'bytes' variable passed to feed() */ |
---|
420 | const jsonsl_char_t *base; |
---|
421 | |
---|
422 | /** Callback invoked for PUSH actions */ |
---|
423 | jsonsl_stack_callback action_callback_PUSH; |
---|
424 | |
---|
425 | /** Callback invoked for POP actions */ |
---|
426 | jsonsl_stack_callback action_callback_POP; |
---|
427 | |
---|
428 | /** Default callback for any action, if neither PUSH or POP callbacks are defined */ |
---|
429 | jsonsl_stack_callback action_callback; |
---|
430 | |
---|
431 | /** Do not invoke callbacks for objects deeper than this level */ |
---|
432 | unsigned int max_callback_level; |
---|
433 | |
---|
434 | /** The error callback. Invoked when an error happens. Should not be NULL */ |
---|
435 | jsonsl_error_callback error_callback; |
---|
436 | |
---|
437 | /* these are boolean flags you can modify. You will be called |
---|
438 | * about notification for each of these types if the corresponding |
---|
439 | * variable is true. |
---|
440 | */ |
---|
441 | |
---|
442 | /** |
---|
443 | * @name Callback Booleans. |
---|
444 | * These determine whether a callback is to be invoked for certain types of objects |
---|
445 | * @{*/ |
---|
446 | |
---|
447 | /** Boolean flag to enable or disable the invokcation for events on this type*/ |
---|
448 | int call_SPECIAL; |
---|
449 | int call_OBJECT; |
---|
450 | int call_LIST; |
---|
451 | int call_STRING; |
---|
452 | int call_HKEY; |
---|
453 | /*@}*/ |
---|
454 | |
---|
455 | /** |
---|
456 | * @name u-Escape handling |
---|
457 | * Special handling for the \\u-f00d type sequences. These are meant |
---|
458 | * to be translated back into the corresponding octet(s). |
---|
459 | * A special callback (if set) is invoked with *at=='u'. An application |
---|
460 | * may wish to temporarily suspend parsing and handle the 'u-' sequence |
---|
461 | * internally (or not). |
---|
462 | */ |
---|
463 | |
---|
464 | /*@{*/ |
---|
465 | |
---|
466 | /** Callback to be invoked for a u-escape */ |
---|
467 | jsonsl_stack_callback action_callback_UESCAPE; |
---|
468 | |
---|
469 | /** Boolean flag, whether to invoke the callback */ |
---|
470 | int call_UESCAPE; |
---|
471 | |
---|
472 | /** Boolean flag, whether we should return after encountering a u-escape: |
---|
473 | * the callback is invoked and then we return if this is true |
---|
474 | */ |
---|
475 | int return_UESCAPE; |
---|
476 | /*@}*/ |
---|
477 | |
---|
478 | struct { |
---|
479 | int allow_trailing_comma; |
---|
480 | } options; |
---|
481 | |
---|
482 | /** Put anything here */ |
---|
483 | void *data; |
---|
484 | |
---|
485 | /*@{*/ |
---|
486 | /** Private */ |
---|
487 | int in_escape; |
---|
488 | char expecting; |
---|
489 | char tok_last; |
---|
490 | int can_insert; |
---|
491 | unsigned int levels_max; |
---|
492 | |
---|
493 | #ifndef JSONSL_NO_JPR |
---|
494 | size_t jpr_count; |
---|
495 | jsonsl_jpr_t *jprs; |
---|
496 | |
---|
497 | /* Root pointer for JPR matching information */ |
---|
498 | size_t *jpr_root; |
---|
499 | #endif /* JSONSL_NO_JPR */ |
---|
500 | /*@}*/ |
---|
501 | |
---|
502 | /** |
---|
503 | * This is the stack. Its upper bound is levels_max, or the |
---|
504 | * nlevels argument passed to jsonsl_new. If you modify this structure, |
---|
505 | * make sure that this member is last. |
---|
506 | */ |
---|
507 | struct jsonsl_state_st stack[1]; |
---|
508 | }; |
---|
509 | |
---|
510 | |
---|
511 | /** |
---|
512 | * Creates a new lexer object, with capacity for recursion up to nlevels |
---|
513 | * |
---|
514 | * @param nlevels maximum recursion depth |
---|
515 | */ |
---|
516 | JSONSL_API |
---|
517 | jsonsl_t jsonsl_new(int nlevels); |
---|
518 | |
---|
519 | /** |
---|
520 | * Feeds data into the lexer. |
---|
521 | * |
---|
522 | * @param jsn the lexer object |
---|
523 | * @param bytes new data to be fed |
---|
524 | * @param nbytes size of new data |
---|
525 | */ |
---|
526 | JSONSL_API |
---|
527 | void jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes); |
---|
528 | |
---|
529 | /** |
---|
530 | * Resets the internal parser state. This does not free the parser |
---|
531 | * but does clean it internally, so that the next time feed() is called, |
---|
532 | * it will be treated as a new stream |
---|
533 | * |
---|
534 | * @param jsn the lexer |
---|
535 | */ |
---|
536 | JSONSL_API |
---|
537 | void jsonsl_reset(jsonsl_t jsn); |
---|
538 | |
---|
539 | /** |
---|
540 | * Frees the lexer, cleaning any allocated memory taken |
---|
541 | * |
---|
542 | * @param jsn the lexer |
---|
543 | */ |
---|
544 | JSONSL_API |
---|
545 | void jsonsl_destroy(jsonsl_t jsn); |
---|
546 | |
---|
547 | /** |
---|
548 | * Gets the 'parent' element, given the current one |
---|
549 | * |
---|
550 | * @param jsn the lexer |
---|
551 | * @param cur the current nest, which should be a struct jsonsl_nest_st |
---|
552 | */ |
---|
553 | #define jsonsl_last_state(jsn, cur) \ |
---|
554 | (cur->level > 1 ) \ |
---|
555 | ? (jsn->stack + (cur->level-1)) \ |
---|
556 | : NULL |
---|
557 | |
---|
558 | |
---|
559 | /** |
---|
560 | * This enables receiving callbacks on all events. Doesn't do |
---|
561 | * anything special but helps avoid some boilerplate. |
---|
562 | * This does not touch the UESCAPE callbacks or flags. |
---|
563 | */ |
---|
564 | #define jsonsl_enable_all_callbacks(jsn) \ |
---|
565 | jsn->call_HKEY = 1; \ |
---|
566 | jsn->call_STRING = 1; \ |
---|
567 | jsn->call_OBJECT = 1; \ |
---|
568 | jsn->call_SPECIAL = 1; \ |
---|
569 | jsn->call_LIST = 1; |
---|
570 | |
---|
571 | /** |
---|
572 | * A macro which returns true if the current state object can |
---|
573 | * have children. This means a list type or an object type. |
---|
574 | */ |
---|
575 | #define JSONSL_STATE_IS_CONTAINER(state) \ |
---|
576 | (state->type == JSONSL_T_OBJECT || state->type == JSONSL_T_LIST) |
---|
577 | |
---|
578 | /** |
---|
579 | * These two functions, dump a string representation |
---|
580 | * of the error or type, respectively. They will never |
---|
581 | * return NULL |
---|
582 | */ |
---|
583 | JSONSL_API |
---|
584 | const char* jsonsl_strerror(jsonsl_error_t err); |
---|
585 | JSONSL_API |
---|
586 | const char* jsonsl_strtype(jsonsl_type_t jt); |
---|
587 | |
---|
588 | /** |
---|
589 | * Dumps global metrics to the screen. This is a noop unless |
---|
590 | * jsonsl was compiled with JSONSL_USE_METRICS |
---|
591 | */ |
---|
592 | JSONSL_API |
---|
593 | void jsonsl_dump_global_metrics(void); |
---|
594 | |
---|
595 | /* This macro just here for editors to do code folding */ |
---|
596 | #ifndef JSONSL_NO_JPR |
---|
597 | |
---|
598 | /** |
---|
599 | * @name JSON Pointer API |
---|
600 | * |
---|
601 | * JSONPointer API. This isn't really related to the lexer (at least not yet) |
---|
602 | * JSONPointer provides an extremely simple specification for providing |
---|
603 | * locations within JSON objects. We will extend it a bit and allow for |
---|
604 | * providing 'wildcard' characters by which to be able to 'query' the stream. |
---|
605 | * |
---|
606 | * See http://tools.ietf.org/html/draft-pbryan-zyp-json-pointer-00 |
---|
607 | * |
---|
608 | * Currently I'm implementing the 'single query' API which can only use a single |
---|
609 | * query component. In the future I will integrate my yet-to-be-published |
---|
610 | * Boyer-Moore-esque prefix searching implementation, in order to allow |
---|
611 | * multiple paths to be merged into one for quick and efficient searching. |
---|
612 | * |
---|
613 | * |
---|
614 | * JPR (as we'll refer to it within the source) can be used by splitting |
---|
615 | * the components into mutliple sections, and incrementally 'track' each |
---|
616 | * component. When JSONSL delivers a 'pop' callback for a string, or a 'push' |
---|
617 | * callback for an object, we will check to see whether the index matching |
---|
618 | * the component corresponding to the current level contains a match |
---|
619 | * for our path. |
---|
620 | * |
---|
621 | * In order to do this properly, a structure must be maintained within the |
---|
622 | * parent indicating whether its children are possible matches. This flag |
---|
623 | * will be 'inherited' by call children which may conform to the match |
---|
624 | * specification, and discarded by all which do not (thereby eliminating |
---|
625 | * their children from inheriting it). |
---|
626 | * |
---|
627 | * A successful match is a complete one. One can provide multiple paths with |
---|
628 | * multiple levels of matches e.g. |
---|
629 | * /foo/bar/baz/^/blah |
---|
630 | * |
---|
631 | * @{ |
---|
632 | */ |
---|
633 | |
---|
634 | /** The wildcard character */ |
---|
635 | #ifndef JSONSL_PATH_WILDCARD_CHAR |
---|
636 | #define JSONSL_PATH_WILDCARD_CHAR '^' |
---|
637 | #endif /* WILDCARD_CHAR */ |
---|
638 | |
---|
639 | #define JSONSL_XMATCH \ |
---|
640 | X(COMPLETE,1) \ |
---|
641 | X(POSSIBLE,0) \ |
---|
642 | X(NOMATCH,-1) |
---|
643 | |
---|
644 | typedef enum { |
---|
645 | |
---|
646 | #define X(T,v) \ |
---|
647 | JSONSL_MATCH_##T = v, |
---|
648 | JSONSL_XMATCH |
---|
649 | |
---|
650 | #undef X |
---|
651 | JSONSL_MATCH_UNKNOWN |
---|
652 | } jsonsl_jpr_match_t; |
---|
653 | |
---|
654 | typedef enum { |
---|
655 | JSONSL_PATH_STRING = 1, |
---|
656 | JSONSL_PATH_WILDCARD, |
---|
657 | JSONSL_PATH_NUMERIC, |
---|
658 | JSONSL_PATH_ROOT, |
---|
659 | |
---|
660 | /* Special */ |
---|
661 | JSONSL_PATH_INVALID = -1, |
---|
662 | JSONSL_PATH_NONE = 0 |
---|
663 | } jsonsl_jpr_type_t; |
---|
664 | |
---|
665 | struct jsonsl_jpr_component_st { |
---|
666 | char *pstr; |
---|
667 | /** if this is a numeric type, the number is 'cached' here */ |
---|
668 | unsigned long idx; |
---|
669 | size_t len; |
---|
670 | jsonsl_jpr_type_t ptype; |
---|
671 | }; |
---|
672 | |
---|
673 | struct jsonsl_jpr_st { |
---|
674 | /** Path components */ |
---|
675 | struct jsonsl_jpr_component_st *components; |
---|
676 | size_t ncomponents; |
---|
677 | |
---|
678 | /** Base of allocated string for components */ |
---|
679 | char *basestr; |
---|
680 | |
---|
681 | /** The original match string. Useful for returning to the user */ |
---|
682 | char *orig; |
---|
683 | size_t norig; |
---|
684 | }; |
---|
685 | |
---|
686 | |
---|
687 | |
---|
688 | /** |
---|
689 | * Create a new JPR object. |
---|
690 | * |
---|
691 | * @param path the JSONPointer path specification. |
---|
692 | * @param errp a pointer to a jsonsl_error_t. If this function returns NULL, |
---|
693 | * then more details will be in this variable. |
---|
694 | * |
---|
695 | * @return a new jsonsl_jpr_t object, or NULL on error. |
---|
696 | */ |
---|
697 | JSONSL_API |
---|
698 | jsonsl_jpr_t jsonsl_jpr_new(const char *path, jsonsl_error_t *errp); |
---|
699 | |
---|
700 | /** |
---|
701 | * Destroy a JPR object |
---|
702 | */ |
---|
703 | JSONSL_API |
---|
704 | void jsonsl_jpr_destroy(jsonsl_jpr_t jpr); |
---|
705 | |
---|
706 | /** |
---|
707 | * Match a JSON object against a type and specific level |
---|
708 | * |
---|
709 | * @param jpr the JPR object |
---|
710 | * @param parent_type the type of the parent (should be T_LIST or T_OBJECT) |
---|
711 | * @param parent_level the level of the parent |
---|
712 | * @param key the 'key' of the child. If the parent is an array, this should be |
---|
713 | * empty. |
---|
714 | * @param nkey - the length of the key. If the parent is an array (T_LIST), then |
---|
715 | * this should be the current index. |
---|
716 | * |
---|
717 | * NOTE: The key of the child means any kind of associative data related to the |
---|
718 | * element. Thus: <<< { "foo" : [ >>, |
---|
719 | * the opening array's key is "foo". |
---|
720 | * |
---|
721 | * @return a status constant. This indicates whether a match was excluded, possible, |
---|
722 | * or successful. |
---|
723 | */ |
---|
724 | JSONSL_API |
---|
725 | jsonsl_jpr_match_t jsonsl_jpr_match(jsonsl_jpr_t jpr, |
---|
726 | jsonsl_type_t parent_type, |
---|
727 | unsigned int parent_level, |
---|
728 | const char *key, size_t nkey); |
---|
729 | |
---|
730 | |
---|
731 | /** |
---|
732 | * Associate a set of JPR objects with a lexer instance. |
---|
733 | * This should be called before the lexer has been fed any data (and |
---|
734 | * behavior is undefined if you don't adhere to this). |
---|
735 | * |
---|
736 | * After using this function, you may subsequently call match_state() on |
---|
737 | * given states (presumably from within the callbacks). |
---|
738 | * |
---|
739 | * Note that currently the first JPR is the quickest and comes |
---|
740 | * pre-allocated with the state structure. Further JPR objects |
---|
741 | * are chained. |
---|
742 | * |
---|
743 | * @param jsn The lexer |
---|
744 | * @param jprs An array of jsonsl_jpr_t objects |
---|
745 | * @param njprs How many elements in the jprs array. |
---|
746 | */ |
---|
747 | JSONSL_API |
---|
748 | void jsonsl_jpr_match_state_init(jsonsl_t jsn, |
---|
749 | jsonsl_jpr_t *jprs, |
---|
750 | size_t njprs); |
---|
751 | |
---|
752 | /** |
---|
753 | * This follows the same semantics as the normal match, |
---|
754 | * except we infer parent and type information from the relevant state objects. |
---|
755 | * The match status (for all possible JPR objects) is set in the *out parameter. |
---|
756 | * |
---|
757 | * If a match has succeeded, then its JPR object will be returned. In all other |
---|
758 | * instances, NULL is returned; |
---|
759 | * |
---|
760 | * @param jpr The jsonsl_jpr_t handle |
---|
761 | * @param state The jsonsl_state_st which is a candidate |
---|
762 | * @param key The hash key (if applicable, can be NULL if parent is list) |
---|
763 | * @param nkey Length of hash key (if applicable, can be zero if parent is list) |
---|
764 | * @param out A pointer to a jsonsl_jpr_match_t. This will be populated with |
---|
765 | * the match result |
---|
766 | * |
---|
767 | * @return If a match was completed in full, then the JPR object containing |
---|
768 | * the matching path will be returned. Otherwise, the return is NULL (note, this |
---|
769 | * does not mean matching has failed, it can still be part of the match: check |
---|
770 | * the out parameter). |
---|
771 | */ |
---|
772 | JSONSL_API |
---|
773 | jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn, |
---|
774 | struct jsonsl_state_st *state, |
---|
775 | const char *key, |
---|
776 | size_t nkey, |
---|
777 | jsonsl_jpr_match_t *out); |
---|
778 | |
---|
779 | |
---|
780 | /** |
---|
781 | * Cleanup any memory allocated and any states set by |
---|
782 | * match_state_init() and match_state() |
---|
783 | * @param jsn The lexer |
---|
784 | */ |
---|
785 | JSONSL_API |
---|
786 | void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn); |
---|
787 | |
---|
788 | /** |
---|
789 | * Return a string representation of the match result returned by match() |
---|
790 | */ |
---|
791 | JSONSL_API |
---|
792 | const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match); |
---|
793 | |
---|
794 | /* @}*/ |
---|
795 | |
---|
796 | /** |
---|
797 | * Utility function to convert escape sequences into their original form. |
---|
798 | * |
---|
799 | * The decoders I've sampled do not seem to specify a standard behavior of what |
---|
800 | * to escape/unescape. |
---|
801 | * |
---|
802 | * RFC 4627 Mandates only that the quoute, backslash, and ASCII control |
---|
803 | * characters (0x00-0x1f) be escaped. It is often common for applications |
---|
804 | * to escape a '/' - however this may also be desired behavior. the JSON |
---|
805 | * spec is not clear on this, and therefore jsonsl leaves it up to you. |
---|
806 | * |
---|
807 | * @param in The input string. |
---|
808 | * @param out An allocated output (should be the same size as in) |
---|
809 | * @param len the size of the buffer |
---|
810 | * @param toEscape - A sparse array of characters to unescape. Characters |
---|
811 | * which are not present in this array, e.g. toEscape['c'] == 0 will be |
---|
812 | * ignored and passed to the output in their original form. |
---|
813 | * @param oflags If not null, and a \uXXXX escape expands to a non-ascii byte, |
---|
814 | * then this variable will have the SPECIALf_NONASCII flag on. |
---|
815 | * |
---|
816 | * @param err A pointer to an error variable. If an error ocurrs, it will be |
---|
817 | * set in this variable |
---|
818 | * @param errat If not null and an error occurs, this will be set to point |
---|
819 | * to the position within the string at which the offending character was |
---|
820 | * encountered. |
---|
821 | * |
---|
822 | * @return The effective size of the output buffer. |
---|
823 | */ |
---|
824 | JSONSL_API |
---|
825 | size_t jsonsl_util_unescape_ex(const char *in, |
---|
826 | char *out, |
---|
827 | size_t len, |
---|
828 | const int toEscape[128], |
---|
829 | jsonsl_special_t *oflags, |
---|
830 | jsonsl_error_t *err, |
---|
831 | const char **errat); |
---|
832 | |
---|
833 | /** |
---|
834 | * Convenience macro to avoid passing too many parameters |
---|
835 | */ |
---|
836 | #define jsonsl_util_unescape(in, out, len, toEscape, err) \ |
---|
837 | jsonsl_util_unescape_ex(in, out, len, toEscape, NULL, err, NULL) |
---|
838 | |
---|
839 | #endif /* JSONSL_NO_JPR */ |
---|
840 | |
---|
841 | /** |
---|
842 | * HERE BE CHARACTER TABLES! |
---|
843 | */ |
---|
844 | #define JSONSL_CHARTABLE_string_nopass \ |
---|
845 | /* 0x00 */ 1 /* <NUL> */, /* 0x00 */ \ |
---|
846 | /* 0x01 */ 1 /* <SOH> */, /* 0x01 */ \ |
---|
847 | /* 0x02 */ 1 /* <STX> */, /* 0x02 */ \ |
---|
848 | /* 0x03 */ 1 /* <ETX> */, /* 0x03 */ \ |
---|
849 | /* 0x04 */ 1 /* <EOT> */, /* 0x04 */ \ |
---|
850 | /* 0x05 */ 1 /* <ENQ> */, /* 0x05 */ \ |
---|
851 | /* 0x06 */ 1 /* <ACK> */, /* 0x06 */ \ |
---|
852 | /* 0x07 */ 1 /* <BEL> */, /* 0x07 */ \ |
---|
853 | /* 0x08 */ 1 /* <BS> */, /* 0x08 */ \ |
---|
854 | /* 0x09 */ 1 /* <HT> */, /* 0x09 */ \ |
---|
855 | /* 0x0a */ 1 /* <LF> */, /* 0x0a */ \ |
---|
856 | /* 0x0b */ 1 /* <VT> */, /* 0x0b */ \ |
---|
857 | /* 0x0c */ 1 /* <FF> */, /* 0x0c */ \ |
---|
858 | /* 0x0d */ 1 /* <CR> */, /* 0x0d */ \ |
---|
859 | /* 0x0e */ 1 /* <SO> */, /* 0x0e */ \ |
---|
860 | /* 0x0f */ 1 /* <SI> */, /* 0x0f */ \ |
---|
861 | /* 0x10 */ 1 /* <DLE> */, /* 0x10 */ \ |
---|
862 | /* 0x11 */ 1 /* <DC1> */, /* 0x11 */ \ |
---|
863 | /* 0x12 */ 1 /* <DC2> */, /* 0x12 */ \ |
---|
864 | /* 0x13 */ 1 /* <DC3> */, /* 0x13 */ \ |
---|
865 | /* 0x14 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x21 */ \ |
---|
866 | /* 0x22 */ 1 /* <"> */, /* 0x22 */ \ |
---|
867 | /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x42 */ \ |
---|
868 | /* 0x43 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */ \ |
---|
869 | /* 0x5c */ 1 /* <\> */, /* 0x5c */ \ |
---|
870 | /* 0x5d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7c */ \ |
---|
871 | /* 0x7d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9c */ \ |
---|
872 | /* 0x9d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbc */ \ |
---|
873 | /* 0xbd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdc */ \ |
---|
874 | /* 0xdd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfc */ \ |
---|
875 | /* 0xfd */ 0,0 /* 0xfe */ \ |
---|
876 | |
---|
877 | |
---|
878 | |
---|
879 | #ifdef __cplusplus |
---|
880 | } |
---|
881 | #endif /* __cplusplus */ |
---|
882 | |
---|
883 | #endif /* JSONSL_H_ */ |
---|