/Users/alexjokela/projects/lattice/src/json.c
Line | Count | Source |
1 | | #include "json.h" |
2 | | #include <stdlib.h> |
3 | | #include <string.h> |
4 | | #include <stdio.h> |
5 | | #include <ctype.h> |
6 | | #include <math.h> |
7 | | |
8 | | /* ======================================================================== |
9 | | * Internal: JSON Parser (recursive descent) |
10 | | * ======================================================================== */ |
11 | | |
12 | | typedef struct { |
13 | | const char *src; |
14 | | size_t pos; |
15 | | char *err; |
16 | | } JsonParser; |
17 | | |
18 | 297 | static void jp_skip_ws(JsonParser *p) { |
19 | 342 | while (p->src[p->pos] == ' ' || p->src[p->pos] == '\t' || |
20 | 342 | p->src[p->pos] == '\n' || p->src[p->pos] == '\r') { |
21 | 45 | p->pos++; |
22 | 45 | } |
23 | 297 | } |
24 | | |
25 | 282 | static char jp_peek(JsonParser *p) { |
26 | 282 | return p->src[p->pos]; |
27 | 282 | } |
28 | | |
29 | 3 | static void jp_error(JsonParser *p, const char *msg) { |
30 | 3 | if (!p->err) { |
31 | 3 | size_t len = strlen(msg) + 64; |
32 | 3 | p->err = malloc(len); |
33 | 3 | snprintf(p->err, len, "json_parse error at position %zu: %s", p->pos, msg); |
34 | 3 | } |
35 | 3 | } |
36 | | |
37 | | /* Forward declaration */ |
38 | | static LatValue jp_parse_value(JsonParser *p); |
39 | | |
40 | | /* ── Parse string ── */ |
41 | | |
42 | 0 | static int hex_digit(char c) { |
43 | 0 | if (c >= '0' && c <= '9') return c - '0'; |
44 | 0 | if (c >= 'a' && c <= 'f') return 10 + (c - 'a'); |
45 | 0 | if (c >= 'A' && c <= 'F') return 10 + (c - 'A'); |
46 | 0 | return -1; |
47 | 0 | } |
48 | | |
49 | 39 | static LatValue jp_parse_string(JsonParser *p) { |
50 | | /* Opening " already verified by caller; consume it */ |
51 | 39 | p->pos++; /* skip '"' */ |
52 | | |
53 | 39 | size_t cap = 64; |
54 | 39 | size_t len = 0; |
55 | 39 | char *buf = malloc(cap); |
56 | | |
57 | 183 | while (p->src[p->pos] != '\0') { |
58 | 183 | char c = p->src[p->pos]; |
59 | 183 | if (c == '"') { |
60 | 39 | p->pos++; /* consume closing quote */ |
61 | 39 | buf[len] = '\0'; |
62 | 39 | LatValue v = value_string(buf); |
63 | 39 | free(buf); |
64 | 39 | return v; |
65 | 39 | } |
66 | 144 | if (c == '\\') { |
67 | 0 | p->pos++; |
68 | 0 | char esc = p->src[p->pos]; |
69 | 0 | if (esc == '\0') { jp_error(p, "unexpected end of string"); free(buf); return value_unit(); } |
70 | 0 | p->pos++; |
71 | 0 | switch (esc) { |
72 | 0 | case '"': c = '"'; break; |
73 | 0 | case '\\': c = '\\'; break; |
74 | 0 | case '/': c = '/'; break; |
75 | 0 | case 'b': c = '\b'; break; |
76 | 0 | case 'f': c = '\f'; break; |
77 | 0 | case 'n': c = '\n'; break; |
78 | 0 | case 'r': c = '\r'; break; |
79 | 0 | case 't': c = '\t'; break; |
80 | 0 | case 'u': { |
81 | | /* \uXXXX - parse 4 hex digits */ |
82 | 0 | int codepoint = 0; |
83 | 0 | for (int i = 0; i < 4; i++) { |
84 | 0 | int d = hex_digit(p->src[p->pos]); |
85 | 0 | if (d < 0) { jp_error(p, "invalid \\uXXXX escape"); free(buf); return value_unit(); } |
86 | 0 | codepoint = (codepoint << 4) | d; |
87 | 0 | p->pos++; |
88 | 0 | } |
89 | | /* Encode as UTF-8 (or just ASCII for codepoints < 128) */ |
90 | 0 | if (codepoint < 0x80) { |
91 | 0 | if (len + 1 >= cap) { cap *= 2; buf = realloc(buf, cap); } |
92 | 0 | buf[len++] = (char)codepoint; |
93 | 0 | } else if (codepoint < 0x800) { |
94 | 0 | if (len + 2 >= cap) { cap *= 2; buf = realloc(buf, cap); } |
95 | 0 | buf[len++] = (char)(0xC0 | (codepoint >> 6)); |
96 | 0 | buf[len++] = (char)(0x80 | (codepoint & 0x3F)); |
97 | 0 | } else { |
98 | 0 | if (len + 3 >= cap) { cap *= 2; buf = realloc(buf, cap); } |
99 | 0 | buf[len++] = (char)(0xE0 | (codepoint >> 12)); |
100 | 0 | buf[len++] = (char)(0x80 | ((codepoint >> 6) & 0x3F)); |
101 | 0 | buf[len++] = (char)(0x80 | (codepoint & 0x3F)); |
102 | 0 | } |
103 | 0 | continue; /* don't fall through to the single-char append below */ |
104 | 0 | } |
105 | 0 | default: |
106 | 0 | jp_error(p, "invalid escape sequence"); |
107 | 0 | free(buf); |
108 | 0 | return value_unit(); |
109 | 0 | } |
110 | 144 | } else { |
111 | 144 | p->pos++; |
112 | 144 | } |
113 | 144 | if (len + 1 >= cap) { cap *= 2; buf = realloc(buf, cap); } |
114 | 144 | buf[len++] = c; |
115 | 144 | } |
116 | | |
117 | 0 | jp_error(p, "unterminated string"); |
118 | 0 | free(buf); |
119 | 0 | return value_unit(); |
120 | 39 | } |
121 | | |
122 | | /* ── Parse number ── */ |
123 | | |
124 | 45 | static LatValue jp_parse_number(JsonParser *p) { |
125 | 45 | const char *start = p->src + p->pos; |
126 | 45 | bool is_float = false; |
127 | | |
128 | | /* Optional leading minus */ |
129 | 45 | if (p->src[p->pos] == '-') p->pos++; |
130 | | |
131 | | /* Integer part */ |
132 | 45 | if (p->src[p->pos] == '0') { |
133 | 0 | p->pos++; |
134 | 45 | } else if (p->src[p->pos] >= '1' && p->src[p->pos] <= '9') { |
135 | 96 | while (p->src[p->pos] >= '0' && p->src[p->pos] <= '9') p->pos++; |
136 | 45 | } else { |
137 | 0 | jp_error(p, "invalid number"); |
138 | 0 | return value_unit(); |
139 | 0 | } |
140 | | |
141 | | /* Fractional part */ |
142 | 45 | if (p->src[p->pos] == '.') { |
143 | 3 | is_float = true; |
144 | 3 | p->pos++; |
145 | 3 | if (!(p->src[p->pos] >= '0' && p->src[p->pos] <= '9')) { |
146 | 0 | jp_error(p, "invalid number: expected digit after '.'"); |
147 | 0 | return value_unit(); |
148 | 0 | } |
149 | 9 | while (p->src[p->pos] >= '0' && p->src[p->pos] <= '9') p->pos++; |
150 | 3 | } |
151 | | |
152 | | /* Exponent part */ |
153 | 45 | if (p->src[p->pos] == 'e' || p->src[p->pos] == 'E') { |
154 | 0 | is_float = true; |
155 | 0 | p->pos++; |
156 | 0 | if (p->src[p->pos] == '+' || p->src[p->pos] == '-') p->pos++; |
157 | 0 | if (!(p->src[p->pos] >= '0' && p->src[p->pos] <= '9')) { |
158 | 0 | jp_error(p, "invalid number: expected digit in exponent"); |
159 | 0 | return value_unit(); |
160 | 0 | } |
161 | 0 | while (p->src[p->pos] >= '0' && p->src[p->pos] <= '9') p->pos++; |
162 | 0 | } |
163 | | |
164 | | /* Extract the substring and convert */ |
165 | 45 | size_t numlen = (size_t)((p->src + p->pos) - start); |
166 | 45 | char *numstr = malloc(numlen + 1); |
167 | 45 | memcpy(numstr, start, numlen); |
168 | 45 | numstr[numlen] = '\0'; |
169 | | |
170 | 45 | LatValue result; |
171 | 45 | if (is_float) { |
172 | 3 | double d = strtod(numstr, NULL); |
173 | 3 | result = value_float(d); |
174 | 42 | } else { |
175 | 42 | int64_t i = strtoll(numstr, NULL, 10); |
176 | 42 | result = value_int(i); |
177 | 42 | } |
178 | 45 | free(numstr); |
179 | 45 | return result; |
180 | 45 | } |
181 | | |
182 | | /* ── Parse array ── */ |
183 | | |
184 | 9 | static LatValue jp_parse_array(JsonParser *p) { |
185 | 9 | p->pos++; /* skip '[' */ |
186 | | |
187 | | /* Build a dynamically-growing array */ |
188 | 9 | size_t cap = 8; |
189 | 9 | size_t len = 0; |
190 | 9 | LatValue *elems = malloc(cap * sizeof(LatValue)); |
191 | | |
192 | 9 | jp_skip_ws(p); |
193 | 9 | if (jp_peek(p) == ']') { |
194 | 0 | p->pos++; |
195 | 0 | LatValue arr = value_array(elems, 0); |
196 | 0 | free(elems); |
197 | 0 | return arr; |
198 | 0 | } |
199 | | |
200 | 24 | for (;;) { |
201 | 24 | if (p->err) { free(elems); return value_unit(); } |
202 | | |
203 | 24 | LatValue elem = jp_parse_value(p); |
204 | 24 | if (p->err) { |
205 | 0 | value_free(&elem); |
206 | 0 | for (size_t i = 0; i < len; i++) value_free(&elems[i]); |
207 | 0 | free(elems); |
208 | 0 | return value_unit(); |
209 | 0 | } |
210 | | |
211 | 24 | if (len >= cap) { cap *= 2; elems = realloc(elems, cap * sizeof(LatValue)); } |
212 | 24 | elems[len++] = elem; |
213 | | |
214 | 24 | jp_skip_ws(p); |
215 | 24 | if (jp_peek(p) == ',') { |
216 | 15 | p->pos++; |
217 | 15 | continue; |
218 | 15 | } |
219 | 9 | if (jp_peek(p) == ']') { |
220 | 9 | p->pos++; |
221 | 9 | break; |
222 | 9 | } |
223 | 0 | jp_error(p, "expected ',' or ']' in array"); |
224 | 0 | for (size_t i = 0; i < len; i++) value_free(&elems[i]); |
225 | 0 | free(elems); |
226 | 0 | return value_unit(); |
227 | 9 | } |
228 | | |
229 | 9 | LatValue arr = value_array(elems, len); |
230 | 9 | free(elems); |
231 | 9 | return arr; |
232 | 9 | } |
233 | | |
234 | | /* ── Parse object ── */ |
235 | | |
236 | 24 | static LatValue jp_parse_object(JsonParser *p) { |
237 | 24 | p->pos++; /* skip '{' */ |
238 | | |
239 | 24 | LatValue map = value_map_new(); |
240 | | |
241 | 24 | jp_skip_ws(p); |
242 | 24 | if (jp_peek(p) == '}') { |
243 | 0 | p->pos++; |
244 | 0 | return map; |
245 | 0 | } |
246 | | |
247 | 33 | for (;;) { |
248 | 33 | if (p->err) { value_free(&map); return value_unit(); } |
249 | | |
250 | 33 | jp_skip_ws(p); |
251 | 33 | if (jp_peek(p) != '"') { |
252 | 3 | jp_error(p, "expected string key in object"); |
253 | 3 | value_free(&map); |
254 | 3 | return value_unit(); |
255 | 3 | } |
256 | | |
257 | | /* Parse key as a string value, then extract */ |
258 | 30 | LatValue key_val = jp_parse_string(p); |
259 | 30 | if (p->err) { value_free(&key_val); value_free(&map); return value_unit(); } |
260 | 30 | char *key = strdup(key_val.as.str_val); |
261 | 30 | value_free(&key_val); |
262 | | |
263 | 30 | jp_skip_ws(p); |
264 | 30 | if (jp_peek(p) != ':') { |
265 | 0 | jp_error(p, "expected ':' after object key"); |
266 | 0 | free(key); |
267 | 0 | value_free(&map); |
268 | 0 | return value_unit(); |
269 | 0 | } |
270 | 30 | p->pos++; /* skip ':' */ |
271 | | |
272 | 30 | LatValue val = jp_parse_value(p); |
273 | 30 | if (p->err) { |
274 | 0 | value_free(&val); |
275 | 0 | free(key); |
276 | 0 | value_free(&map); |
277 | 0 | return value_unit(); |
278 | 0 | } |
279 | | |
280 | 30 | lat_map_set(map.as.map.map, key, &val); |
281 | 30 | free(key); |
282 | | |
283 | 30 | jp_skip_ws(p); |
284 | 30 | if (jp_peek(p) == ',') { |
285 | 9 | p->pos++; |
286 | 9 | continue; |
287 | 9 | } |
288 | 21 | if (jp_peek(p) == '}') { |
289 | 21 | p->pos++; |
290 | 21 | break; |
291 | 21 | } |
292 | 0 | jp_error(p, "expected ',' or '}' in object"); |
293 | 0 | value_free(&map); |
294 | 0 | return value_unit(); |
295 | 21 | } |
296 | | |
297 | 21 | return map; |
298 | 24 | } |
299 | | |
300 | | /* ── Parse value (top-level dispatch) ── */ |
301 | | |
302 | 102 | static LatValue jp_parse_value(JsonParser *p) { |
303 | 102 | jp_skip_ws(p); |
304 | 102 | char c = jp_peek(p); |
305 | | |
306 | 102 | if (c == '"') return jp_parse_string(p); |
307 | 93 | if (c == '{') return jp_parse_object(p); |
308 | 69 | if (c == '[') return jp_parse_array(p); |
309 | 60 | if (c == '-' || (c >= '0' && c <= '9')) return jp_parse_number(p); |
310 | | |
311 | | /* true */ |
312 | 15 | if (strncmp(p->src + p->pos, "true", 4) == 0 && |
313 | 15 | !isalnum((unsigned char)p->src[p->pos + 4])) { |
314 | 6 | p->pos += 4; |
315 | 6 | return value_bool(true); |
316 | 6 | } |
317 | | /* false */ |
318 | 9 | if (strncmp(p->src + p->pos, "false", 5) == 0 && |
319 | 9 | !isalnum((unsigned char)p->src[p->pos + 5])) { |
320 | 3 | p->pos += 5; |
321 | 3 | return value_bool(false); |
322 | 3 | } |
323 | | /* null */ |
324 | 6 | if (strncmp(p->src + p->pos, "null", 4) == 0 && |
325 | 6 | !isalnum((unsigned char)p->src[p->pos + 4])) { |
326 | 6 | p->pos += 4; |
327 | 6 | return value_nil(); |
328 | 6 | } |
329 | | |
330 | 0 | jp_error(p, "unexpected character"); |
331 | 0 | return value_unit(); |
332 | 6 | } |
333 | | |
334 | | /* ── Public API: json_parse ── */ |
335 | | |
336 | 48 | LatValue json_parse(const char *json, char **err) { |
337 | 48 | *err = NULL; |
338 | 48 | JsonParser p = { .src = json, .pos = 0, .err = NULL }; |
339 | | |
340 | 48 | LatValue result = jp_parse_value(&p); |
341 | 48 | if (p.err) { |
342 | 3 | value_free(&result); |
343 | 3 | *err = p.err; |
344 | 3 | return value_unit(); |
345 | 3 | } |
346 | | |
347 | | /* Verify no trailing non-whitespace */ |
348 | 45 | jp_skip_ws(&p); |
349 | 45 | if (p.src[p.pos] != '\0') { |
350 | 0 | value_free(&result); |
351 | 0 | jp_error(&p, "unexpected trailing content"); |
352 | 0 | *err = p.err; |
353 | 0 | return value_unit(); |
354 | 0 | } |
355 | | |
356 | 45 | return result; |
357 | 45 | } |
358 | | |
359 | | |
360 | | /* ======================================================================== |
361 | | * Internal: JSON Serializer |
362 | | * ======================================================================== */ |
363 | | |
364 | | /* Dynamic string buffer for serialization */ |
365 | | typedef struct { |
366 | | char *buf; |
367 | | size_t len; |
368 | | size_t cap; |
369 | | } JsonBuf; |
370 | | |
371 | 21 | static void jb_init(JsonBuf *b) { |
372 | 21 | b->cap = 128; |
373 | 21 | b->len = 0; |
374 | 21 | b->buf = malloc(b->cap); |
375 | 21 | } |
376 | | |
377 | 96 | static void jb_ensure(JsonBuf *b, size_t extra) { |
378 | 96 | while (b->len + extra >= b->cap) { |
379 | 0 | b->cap *= 2; |
380 | 0 | b->buf = realloc(b->buf, b->cap); |
381 | 0 | } |
382 | 96 | } |
383 | | |
384 | 24 | static void jb_append(JsonBuf *b, const char *s, size_t slen) { |
385 | 24 | jb_ensure(b, slen + 1); |
386 | 24 | memcpy(b->buf + b->len, s, slen); |
387 | 24 | b->len += slen; |
388 | 24 | } |
389 | | |
390 | 24 | static void jb_append_str(JsonBuf *b, const char *s) { |
391 | 24 | jb_append(b, s, strlen(s)); |
392 | 24 | } |
393 | | |
394 | 72 | static void jb_append_char(JsonBuf *b, char c) { |
395 | 72 | jb_ensure(b, 2); |
396 | 72 | b->buf[b->len++] = c; |
397 | 72 | } |
398 | | |
399 | | /* Append a JSON-escaped string (with surrounding quotes) */ |
400 | 6 | static void jb_append_escaped_string(JsonBuf *b, const char *s) { |
401 | 6 | jb_append_char(b, '"'); |
402 | 24 | for (const char *p = s; *p; p++) { |
403 | 18 | unsigned char c = (unsigned char)*p; |
404 | 18 | switch (c) { |
405 | 0 | case '"': jb_append_str(b, "\\\""); break; |
406 | 0 | case '\\': jb_append_str(b, "\\\\"); break; |
407 | 0 | case '\b': jb_append_str(b, "\\b"); break; |
408 | 0 | case '\f': jb_append_str(b, "\\f"); break; |
409 | 0 | case '\n': jb_append_str(b, "\\n"); break; |
410 | 0 | case '\r': jb_append_str(b, "\\r"); break; |
411 | 0 | case '\t': jb_append_str(b, "\\t"); break; |
412 | 18 | default: |
413 | 18 | if (c < 0x20) { |
414 | | /* Control character: encode as \u00XX */ |
415 | 0 | char esc[7]; |
416 | 0 | snprintf(esc, sizeof(esc), "\\u%04x", c); |
417 | 0 | jb_append_str(b, esc); |
418 | 18 | } else { |
419 | 18 | jb_append_char(b, (char)c); |
420 | 18 | } |
421 | 18 | break; |
422 | 18 | } |
423 | 18 | } |
424 | 6 | jb_append_char(b, '"'); |
425 | 6 | } |
426 | | |
427 | | /* Forward declaration */ |
428 | | static bool jb_serialize(JsonBuf *b, const LatValue *val, char **err); |
429 | | |
430 | 33 | static bool jb_serialize(JsonBuf *b, const LatValue *val, char **err) { |
431 | 33 | switch (val->type) { |
432 | 15 | case VAL_INT: { |
433 | 15 | char num[32]; |
434 | 15 | snprintf(num, sizeof(num), "%lld", (long long)val->as.int_val); |
435 | 15 | jb_append_str(b, num); |
436 | 15 | return true; |
437 | 0 | } |
438 | 0 | case VAL_FLOAT: { |
439 | 0 | char num[64]; |
440 | 0 | double d = val->as.float_val; |
441 | 0 | if (isinf(d) || isnan(d)) { |
442 | 0 | jb_append_str(b, "null"); /* JSON has no Inf/NaN */ |
443 | 0 | } else { |
444 | 0 | snprintf(num, sizeof(num), "%.17g", d); |
445 | 0 | jb_append_str(b, num); |
446 | 0 | } |
447 | 0 | return true; |
448 | 0 | } |
449 | 6 | case VAL_BOOL: |
450 | 6 | jb_append_str(b, val->as.bool_val ? "true" : "false"); |
451 | 6 | return true; |
452 | 3 | case VAL_STR: |
453 | 3 | jb_append_escaped_string(b, val->as.str_val); |
454 | 3 | return true; |
455 | 0 | case VAL_UNIT: |
456 | 3 | case VAL_NIL: |
457 | 3 | jb_append_str(b, "null"); |
458 | 3 | return true; |
459 | 3 | case VAL_ARRAY: { |
460 | 3 | jb_append_char(b, '['); |
461 | 12 | for (size_t i = 0; i < val->as.array.len; i++) { |
462 | 9 | if (i > 0) jb_append_char(b, ','); |
463 | 9 | if (!jb_serialize(b, &val->as.array.elems[i], err)) return false; |
464 | 9 | } |
465 | 3 | jb_append_char(b, ']'); |
466 | 3 | return true; |
467 | 3 | } |
468 | 3 | case VAL_MAP: { |
469 | 3 | jb_append_char(b, '{'); |
470 | 3 | LatMap *m = val->as.map.map; |
471 | 3 | bool first = true; |
472 | 51 | for (size_t i = 0; i < m->cap; i++) { |
473 | 48 | if (m->entries[i].state != MAP_OCCUPIED) continue; |
474 | 3 | if (!first) jb_append_char(b, ','); |
475 | 3 | first = false; |
476 | 3 | jb_append_escaped_string(b, m->entries[i].key); |
477 | 3 | jb_append_char(b, ':'); |
478 | 3 | LatValue *mv = (LatValue *)m->entries[i].value; |
479 | 3 | if (!jb_serialize(b, mv, err)) return false; |
480 | 3 | } |
481 | 3 | jb_append_char(b, '}'); |
482 | 3 | return true; |
483 | 3 | } |
484 | 0 | case VAL_TUPLE: { |
485 | 0 | jb_append_char(b, '['); |
486 | 0 | for (size_t i = 0; i < val->as.tuple.len; i++) { |
487 | 0 | if (i > 0) jb_append_char(b, ','); |
488 | 0 | if (!jb_serialize(b, &val->as.tuple.elems[i], err)) return false; |
489 | 0 | } |
490 | 0 | jb_append_char(b, ']'); |
491 | 0 | return true; |
492 | 0 | } |
493 | 0 | case VAL_BUFFER: { |
494 | 0 | jb_append_char(b, '['); |
495 | 0 | for (size_t i = 0; i < val->as.buffer.len; i++) { |
496 | 0 | if (i > 0) jb_append_char(b, ','); |
497 | 0 | char num[8]; |
498 | 0 | snprintf(num, sizeof(num), "%u", val->as.buffer.data[i]); |
499 | 0 | jb_append_str(b, num); |
500 | 0 | } |
501 | 0 | jb_append_char(b, ']'); |
502 | 0 | return true; |
503 | 0 | } |
504 | 0 | case VAL_REF: |
505 | 0 | return jb_serialize(b, &val->as.ref.ref->value, err); |
506 | 0 | case VAL_STRUCT: |
507 | 0 | case VAL_CLOSURE: |
508 | 0 | case VAL_RANGE: |
509 | 0 | case VAL_CHANNEL: |
510 | 0 | case VAL_ENUM: |
511 | 0 | case VAL_SET: |
512 | 0 | *err = strdup("json_stringify: unsupported value type"); |
513 | 0 | return false; |
514 | 33 | } |
515 | 0 | *err = strdup("json_stringify: unknown value type"); |
516 | 0 | return false; |
517 | 33 | } |
518 | | |
519 | | /* ── Public API: json_stringify ── */ |
520 | | |
521 | 21 | char *json_stringify(const LatValue *val, char **err) { |
522 | 21 | *err = NULL; |
523 | 21 | JsonBuf b; |
524 | 21 | jb_init(&b); |
525 | | |
526 | 21 | if (!jb_serialize(&b, val, err)) { |
527 | 0 | free(b.buf); |
528 | 0 | return NULL; |
529 | 0 | } |
530 | | |
531 | 21 | jb_append_char(&b, '\0'); |
532 | 21 | return b.buf; |
533 | 21 | } |