/Users/alexjokela/projects/lattice/src/latc.c
Line | Count | Source |
1 | | #include "latc.h" |
2 | | #include <stdlib.h> |
3 | | #include <string.h> |
4 | | #include <stdio.h> |
5 | | |
6 | | /* ── Constant type tags for serialization ── */ |
7 | 0 | #define TAG_INT 0 |
8 | 0 | #define TAG_FLOAT 1 |
9 | 0 | #define TAG_BOOL 2 |
10 | 132 | #define TAG_STR 3 |
11 | 0 | #define TAG_NIL 4 |
12 | 0 | #define TAG_UNIT 5 |
13 | 60 | #define TAG_CLOSURE 6 |
14 | | |
15 | | /* ── Growable byte buffer (writer) ── */ |
16 | | |
17 | | typedef struct { |
18 | | uint8_t *data; |
19 | | size_t len; |
20 | | size_t cap; |
21 | | } ByteBuf; |
22 | | |
23 | 18 | static void bb_init(ByteBuf *bb) { |
24 | 18 | bb->cap = 1024; |
25 | 18 | bb->data = malloc(bb->cap); |
26 | 18 | bb->len = 0; |
27 | 18 | } |
28 | | |
29 | 1.41k | static void bb_ensure(ByteBuf *bb, size_t need) { |
30 | 1.41k | while (bb->len + need > bb->cap) { |
31 | 0 | bb->cap *= 2; |
32 | 0 | bb->data = realloc(bb->data, bb->cap); |
33 | 0 | } |
34 | 1.41k | } |
35 | | |
36 | 1.18k | static void bb_write_bytes(ByteBuf *bb, const void *src, size_t n) { |
37 | 1.18k | bb_ensure(bb, n); |
38 | 1.18k | memcpy(bb->data + bb->len, src, n); |
39 | 1.18k | bb->len += n; |
40 | 1.18k | } |
41 | | |
42 | 222 | static void bb_write_u8(ByteBuf *bb, uint8_t v) { |
43 | 222 | bb_ensure(bb, 1); |
44 | 222 | bb->data[bb->len++] = v; |
45 | 222 | } |
46 | | |
47 | 36 | static void bb_write_u16_le(ByteBuf *bb, uint16_t v) { |
48 | 36 | uint8_t buf[2] = { (uint8_t)(v & 0xff), (uint8_t)((v >> 8) & 0xff) }; |
49 | 36 | bb_write_bytes(bb, buf, 2); |
50 | 36 | } |
51 | | |
52 | 1.00k | static void bb_write_u32_le(ByteBuf *bb, uint32_t v) { |
53 | 1.00k | uint8_t buf[4] = { |
54 | 1.00k | (uint8_t)(v & 0xff), (uint8_t)((v >> 8) & 0xff), |
55 | 1.00k | (uint8_t)((v >> 16) & 0xff), (uint8_t)((v >> 24) & 0xff) |
56 | 1.00k | }; |
57 | 1.00k | bb_write_bytes(bb, buf, 4); |
58 | 1.00k | } |
59 | | |
60 | 0 | static void bb_write_i64_le(ByteBuf *bb, int64_t v) { |
61 | 0 | uint64_t u = (uint64_t)v; |
62 | 0 | uint8_t buf[8]; |
63 | 0 | for (int i = 0; i < 8; i++) buf[i] = (uint8_t)((u >> (i * 8)) & 0xff); |
64 | 0 | bb_write_bytes(bb, buf, 8); |
65 | 0 | } |
66 | | |
67 | 0 | static void bb_write_f64_le(ByteBuf *bb, double v) { |
68 | 0 | uint8_t buf[8]; |
69 | 0 | memcpy(buf, &v, 8); |
70 | 0 | bb_write_bytes(bb, buf, 8); |
71 | 0 | } |
72 | | |
73 | | /* ── Bounds-checked byte reader ── */ |
74 | | |
75 | | typedef struct { |
76 | | const uint8_t *data; |
77 | | size_t len; |
78 | | size_t pos; |
79 | | } ByteReader; |
80 | | |
81 | 222 | static bool br_read_u8(ByteReader *br, uint8_t *out) { |
82 | 222 | if (br->pos + 1 > br->len) return false; |
83 | 222 | *out = br->data[br->pos++]; |
84 | 222 | return true; |
85 | 222 | } |
86 | | |
87 | 36 | static bool br_read_u16_le(ByteReader *br, uint16_t *out) { |
88 | 36 | if (br->pos + 2 > br->len) return false; |
89 | 36 | *out = (uint16_t)br->data[br->pos] |
90 | 36 | | ((uint16_t)br->data[br->pos + 1] << 8); |
91 | 36 | br->pos += 2; |
92 | 36 | return true; |
93 | 36 | } |
94 | | |
95 | 1.00k | static bool br_read_u32_le(ByteReader *br, uint32_t *out) { |
96 | 1.00k | if (br->pos + 4 > br->len) return false; |
97 | 1.00k | *out = (uint32_t)br->data[br->pos] |
98 | 1.00k | | ((uint32_t)br->data[br->pos + 1] << 8) |
99 | 1.00k | | ((uint32_t)br->data[br->pos + 2] << 16) |
100 | 1.00k | | ((uint32_t)br->data[br->pos + 3] << 24); |
101 | 1.00k | br->pos += 4; |
102 | 1.00k | return true; |
103 | 1.00k | } |
104 | | |
105 | 0 | static bool br_read_i64_le(ByteReader *br, int64_t *out) { |
106 | 0 | if (br->pos + 8 > br->len) return false; |
107 | 0 | uint64_t u = 0; |
108 | 0 | for (int i = 0; i < 8; i++) |
109 | 0 | u |= (uint64_t)br->data[br->pos + i] << (i * 8); |
110 | 0 | br->pos += 8; |
111 | 0 | *out = (int64_t)u; |
112 | 0 | return true; |
113 | 0 | } |
114 | | |
115 | 0 | static bool br_read_f64_le(ByteReader *br, double *out) { |
116 | 0 | if (br->pos + 8 > br->len) return false; |
117 | 0 | memcpy(out, br->data + br->pos, 8); |
118 | 0 | br->pos += 8; |
119 | 0 | return true; |
120 | 0 | } |
121 | | |
122 | 129 | static bool br_read_bytes(ByteReader *br, void *dst, size_t n) { |
123 | 129 | if (br->pos + n > br->len) return false; |
124 | 129 | memcpy(dst, br->data + br->pos, n); |
125 | 129 | br->pos += n; |
126 | 129 | return true; |
127 | 129 | } |
128 | | |
129 | | /* ── Serialize a single chunk (recursive) ── */ |
130 | | |
131 | 48 | static void serialize_chunk(ByteBuf *bb, const Chunk *c) { |
132 | | /* Bytecode */ |
133 | 48 | bb_write_u32_le(bb, (uint32_t)c->code_len); |
134 | 48 | bb_write_bytes(bb, c->code, c->code_len); |
135 | | |
136 | | /* Line numbers (parallel to code, same count) */ |
137 | 48 | bb_write_u32_le(bb, (uint32_t)c->lines_len); |
138 | 750 | for (size_t i = 0; i < c->lines_len; i++) |
139 | 702 | bb_write_u32_le(bb, (uint32_t)c->lines[i]); |
140 | | |
141 | | /* Constants */ |
142 | 48 | bb_write_u32_le(bb, (uint32_t)c->const_len); |
143 | 144 | for (size_t i = 0; i < c->const_len; i++) { |
144 | 96 | const LatValue *v = &c->constants[i]; |
145 | 96 | switch (v->type) { |
146 | 0 | case VAL_INT: |
147 | 0 | bb_write_u8(bb, TAG_INT); |
148 | 0 | bb_write_i64_le(bb, v->as.int_val); |
149 | 0 | break; |
150 | 0 | case VAL_FLOAT: |
151 | 0 | bb_write_u8(bb, TAG_FLOAT); |
152 | 0 | bb_write_f64_le(bb, v->as.float_val); |
153 | 0 | break; |
154 | 0 | case VAL_BOOL: |
155 | 0 | bb_write_u8(bb, TAG_BOOL); |
156 | 0 | bb_write_u8(bb, v->as.bool_val ? 1 : 0); |
157 | 0 | break; |
158 | 66 | case VAL_STR: { |
159 | 66 | bb_write_u8(bb, TAG_STR); |
160 | 66 | uint32_t slen = (uint32_t)strlen(v->as.str_val); |
161 | 66 | bb_write_u32_le(bb, slen); |
162 | 66 | bb_write_bytes(bb, v->as.str_val, slen); |
163 | 66 | break; |
164 | 0 | } |
165 | 0 | case VAL_NIL: |
166 | 0 | bb_write_u8(bb, TAG_NIL); |
167 | 0 | break; |
168 | 0 | case VAL_UNIT: |
169 | 0 | bb_write_u8(bb, TAG_UNIT); |
170 | 0 | break; |
171 | 30 | case VAL_CLOSURE: |
172 | | /* Compiled sub-chunk: body==NULL, native_fn holds Chunk* */ |
173 | 30 | if (v->as.closure.body == NULL && v->as.closure.native_fn != NULL) { |
174 | 30 | bb_write_u8(bb, TAG_CLOSURE); |
175 | 30 | bb_write_u32_le(bb, (uint32_t)v->as.closure.param_count); |
176 | 30 | bb_write_u8(bb, v->as.closure.has_variadic ? 1 : 0); |
177 | 30 | serialize_chunk(bb, (const Chunk *)v->as.closure.native_fn); |
178 | 30 | } else { |
179 | | /* Shouldn't appear in compiler output, treat as nil */ |
180 | 0 | bb_write_u8(bb, TAG_NIL); |
181 | 0 | } |
182 | 30 | break; |
183 | 0 | default: |
184 | | /* Unknown type in constant pool — write nil as fallback */ |
185 | 0 | bb_write_u8(bb, TAG_NIL); |
186 | 0 | break; |
187 | 96 | } |
188 | 96 | } |
189 | | |
190 | | /* Local names (debug info) */ |
191 | 48 | bb_write_u32_le(bb, (uint32_t)c->local_name_cap); |
192 | 144 | for (size_t i = 0; i < c->local_name_cap; i++) { |
193 | 96 | if (c->local_names && c->local_names[i]) { |
194 | 15 | bb_write_u8(bb, 1); |
195 | 15 | uint32_t nlen = (uint32_t)strlen(c->local_names[i]); |
196 | 15 | bb_write_u32_le(bb, nlen); |
197 | 15 | bb_write_bytes(bb, c->local_names[i], nlen); |
198 | 81 | } else { |
199 | 81 | bb_write_u8(bb, 0); |
200 | 81 | } |
201 | 96 | } |
202 | 48 | } |
203 | | |
204 | | /* ── Deserialize a single chunk (recursive) ── */ |
205 | | |
206 | 48 | static Chunk *deserialize_chunk(ByteReader *br, char **err) { |
207 | 48 | uint32_t code_len; |
208 | 48 | if (!br_read_u32_le(br, &code_len)) { |
209 | 0 | *err = strdup("truncated: missing code_len"); |
210 | 0 | return NULL; |
211 | 0 | } |
212 | | |
213 | 48 | Chunk *c = chunk_new(); |
214 | | |
215 | | /* Bytecode */ |
216 | 48 | if (code_len > 0) { |
217 | 48 | if (c->code_cap < code_len) { |
218 | 0 | c->code_cap = code_len; |
219 | 0 | c->code = realloc(c->code, c->code_cap); |
220 | 0 | } |
221 | 48 | if (!br_read_bytes(br, c->code, code_len)) { |
222 | 0 | *err = strdup("truncated: incomplete bytecode"); |
223 | 0 | chunk_free(c); |
224 | 0 | return NULL; |
225 | 0 | } |
226 | 48 | } |
227 | 48 | c->code_len = code_len; |
228 | | |
229 | | /* Line numbers */ |
230 | 48 | uint32_t line_count; |
231 | 48 | if (!br_read_u32_le(br, &line_count)) { |
232 | 0 | *err = strdup("truncated: missing line_count"); |
233 | 0 | chunk_free(c); |
234 | 0 | return NULL; |
235 | 0 | } |
236 | 48 | if (c->lines_cap < line_count) { |
237 | 0 | c->lines_cap = line_count; |
238 | 0 | c->lines = realloc(c->lines, c->lines_cap * sizeof(int)); |
239 | 0 | } |
240 | 750 | for (uint32_t i = 0; i < line_count; i++) { |
241 | 702 | uint32_t line_val; |
242 | 702 | if (!br_read_u32_le(br, &line_val)) { |
243 | 0 | *err = strdup("truncated: incomplete line data"); |
244 | 0 | chunk_free(c); |
245 | 0 | return NULL; |
246 | 0 | } |
247 | 702 | c->lines[i] = (int)line_val; |
248 | 702 | } |
249 | 48 | c->lines_len = line_count; |
250 | | |
251 | | /* Constants */ |
252 | 48 | uint32_t const_count; |
253 | 48 | if (!br_read_u32_le(br, &const_count)) { |
254 | 0 | *err = strdup("truncated: missing const_count"); |
255 | 0 | chunk_free(c); |
256 | 0 | return NULL; |
257 | 0 | } |
258 | 144 | for (uint32_t i = 0; i < const_count; i++) { |
259 | 96 | uint8_t tag; |
260 | 96 | if (!br_read_u8(br, &tag)) { |
261 | 0 | *err = strdup("truncated: missing constant type tag"); |
262 | 0 | chunk_free(c); |
263 | 0 | return NULL; |
264 | 0 | } |
265 | 96 | switch (tag) { |
266 | 0 | case TAG_INT: { |
267 | 0 | int64_t val; |
268 | 0 | if (!br_read_i64_le(br, &val)) { |
269 | 0 | *err = strdup("truncated: incomplete int constant"); |
270 | 0 | chunk_free(c); |
271 | 0 | return NULL; |
272 | 0 | } |
273 | 0 | chunk_add_constant(c, value_int(val)); |
274 | 0 | break; |
275 | 0 | } |
276 | 0 | case TAG_FLOAT: { |
277 | 0 | double val; |
278 | 0 | if (!br_read_f64_le(br, &val)) { |
279 | 0 | *err = strdup("truncated: incomplete float constant"); |
280 | 0 | chunk_free(c); |
281 | 0 | return NULL; |
282 | 0 | } |
283 | 0 | chunk_add_constant(c, value_float(val)); |
284 | 0 | break; |
285 | 0 | } |
286 | 0 | case TAG_BOOL: { |
287 | 0 | uint8_t val; |
288 | 0 | if (!br_read_u8(br, &val)) { |
289 | 0 | *err = strdup("truncated: incomplete bool constant"); |
290 | 0 | chunk_free(c); |
291 | 0 | return NULL; |
292 | 0 | } |
293 | 0 | chunk_add_constant(c, value_bool(val != 0)); |
294 | 0 | break; |
295 | 0 | } |
296 | 66 | case TAG_STR: { |
297 | 66 | uint32_t slen; |
298 | 66 | if (!br_read_u32_le(br, &slen)) { |
299 | 0 | *err = strdup("truncated: incomplete string length"); |
300 | 0 | chunk_free(c); |
301 | 0 | return NULL; |
302 | 0 | } |
303 | 66 | char *s = malloc(slen + 1); |
304 | 66 | if (!br_read_bytes(br, s, slen)) { |
305 | 0 | free(s); |
306 | 0 | *err = strdup("truncated: incomplete string data"); |
307 | 0 | chunk_free(c); |
308 | 0 | return NULL; |
309 | 0 | } |
310 | 66 | s[slen] = '\0'; |
311 | 66 | chunk_add_constant(c, value_string_owned(s)); |
312 | 66 | break; |
313 | 66 | } |
314 | 0 | case TAG_NIL: |
315 | 0 | chunk_add_constant(c, value_nil()); |
316 | 0 | break; |
317 | 0 | case TAG_UNIT: |
318 | 0 | chunk_add_constant(c, value_unit()); |
319 | 0 | break; |
320 | 30 | case TAG_CLOSURE: { |
321 | 30 | uint32_t param_count; |
322 | 30 | uint8_t has_variadic; |
323 | 30 | if (!br_read_u32_le(br, ¶m_count)) { |
324 | 0 | *err = strdup("truncated: incomplete closure param_count"); |
325 | 0 | chunk_free(c); |
326 | 0 | return NULL; |
327 | 0 | } |
328 | 30 | if (!br_read_u8(br, &has_variadic)) { |
329 | 0 | *err = strdup("truncated: incomplete closure has_variadic"); |
330 | 0 | chunk_free(c); |
331 | 0 | return NULL; |
332 | 0 | } |
333 | 30 | Chunk *sub = deserialize_chunk(br, err); |
334 | 30 | if (!sub) { |
335 | 0 | chunk_free(c); |
336 | 0 | return NULL; |
337 | 0 | } |
338 | 30 | LatValue fn_val; |
339 | 30 | memset(&fn_val, 0, sizeof(fn_val)); |
340 | 30 | fn_val.type = VAL_CLOSURE; |
341 | 30 | fn_val.phase = VTAG_UNPHASED; |
342 | 30 | fn_val.region_id = (size_t)-1; |
343 | 30 | fn_val.as.closure.param_names = NULL; |
344 | 30 | fn_val.as.closure.param_count = (size_t)param_count; |
345 | 30 | fn_val.as.closure.body = NULL; |
346 | 30 | fn_val.as.closure.captured_env = NULL; |
347 | 30 | fn_val.as.closure.default_values = NULL; |
348 | 30 | fn_val.as.closure.has_variadic = (has_variadic != 0); |
349 | 30 | fn_val.as.closure.native_fn = sub; |
350 | 30 | chunk_add_constant(c, fn_val); |
351 | 30 | break; |
352 | 30 | } |
353 | 0 | default: { |
354 | 0 | char msg[64]; |
355 | 0 | snprintf(msg, sizeof(msg), "unknown constant type tag: %d", tag); |
356 | 0 | *err = strdup(msg); |
357 | 0 | chunk_free(c); |
358 | 0 | return NULL; |
359 | 30 | } |
360 | 96 | } |
361 | 96 | } |
362 | | |
363 | | /* Local names */ |
364 | 48 | uint32_t local_name_count; |
365 | 48 | if (!br_read_u32_le(br, &local_name_count)) { |
366 | 0 | *err = strdup("truncated: missing local_name_count"); |
367 | 0 | chunk_free(c); |
368 | 0 | return NULL; |
369 | 0 | } |
370 | 144 | for (uint32_t i = 0; i < local_name_count; i++) { |
371 | 96 | uint8_t present; |
372 | 96 | if (!br_read_u8(br, &present)) { |
373 | 0 | *err = strdup("truncated: incomplete local name flag"); |
374 | 0 | chunk_free(c); |
375 | 0 | return NULL; |
376 | 0 | } |
377 | 96 | if (present) { |
378 | 15 | uint32_t nlen; |
379 | 15 | if (!br_read_u32_le(br, &nlen)) { |
380 | 0 | *err = strdup("truncated: incomplete local name length"); |
381 | 0 | chunk_free(c); |
382 | 0 | return NULL; |
383 | 0 | } |
384 | 15 | char *name = malloc(nlen + 1); |
385 | 15 | if (!br_read_bytes(br, name, nlen)) { |
386 | 0 | free(name); |
387 | 0 | *err = strdup("truncated: incomplete local name data"); |
388 | 0 | chunk_free(c); |
389 | 0 | return NULL; |
390 | 0 | } |
391 | 15 | name[nlen] = '\0'; |
392 | 15 | chunk_set_local_name(c, (size_t)i, name); |
393 | 15 | free(name); |
394 | 15 | } |
395 | 96 | } |
396 | | |
397 | 48 | return c; |
398 | 48 | } |
399 | | |
400 | | /* ── Public API ── */ |
401 | | |
402 | 18 | uint8_t *chunk_serialize(const Chunk *c, size_t *out_len) { |
403 | 18 | ByteBuf bb; |
404 | 18 | bb_init(&bb); |
405 | | |
406 | | /* Header */ |
407 | 18 | bb_write_bytes(&bb, LATC_MAGIC, 4); |
408 | 18 | bb_write_u16_le(&bb, LATC_FORMAT); |
409 | 18 | bb_write_u16_le(&bb, 0); /* reserved */ |
410 | | |
411 | 18 | serialize_chunk(&bb, c); |
412 | | |
413 | 18 | *out_len = bb.len; |
414 | 18 | return bb.data; |
415 | 18 | } |
416 | | |
417 | 24 | Chunk *chunk_deserialize(const uint8_t *data, size_t len, char **err) { |
418 | 24 | ByteReader br = { data, len, 0 }; |
419 | | |
420 | | /* Validate header */ |
421 | 24 | if (len < 8) { |
422 | 3 | *err = strdup("file too small for .latc header"); |
423 | 3 | return NULL; |
424 | 3 | } |
425 | 21 | if (memcmp(data, LATC_MAGIC, 4) != 0) { |
426 | 3 | *err = strdup("invalid magic: not a .latc file"); |
427 | 3 | return NULL; |
428 | 3 | } |
429 | 18 | br.pos = 4; |
430 | | |
431 | 18 | uint16_t version; |
432 | 18 | if (!br_read_u16_le(&br, &version)) { |
433 | 0 | *err = strdup("truncated: missing format version"); |
434 | 0 | return NULL; |
435 | 0 | } |
436 | 18 | if (version != LATC_FORMAT) { |
437 | 0 | char msg[64]; |
438 | 0 | snprintf(msg, sizeof(msg), "unsupported .latc format version: %u", version); |
439 | 0 | *err = strdup(msg); |
440 | 0 | return NULL; |
441 | 0 | } |
442 | | |
443 | 18 | uint16_t reserved; |
444 | 18 | if (!br_read_u16_le(&br, &reserved)) { |
445 | 0 | *err = strdup("truncated: missing reserved field"); |
446 | 0 | return NULL; |
447 | 0 | } |
448 | | |
449 | 18 | return deserialize_chunk(&br, err); |
450 | 18 | } |
451 | | |
452 | 3 | int chunk_save(const Chunk *c, const char *path) { |
453 | 3 | size_t len; |
454 | 3 | uint8_t *data = chunk_serialize(c, &len); |
455 | 3 | if (!data) return -1; |
456 | | |
457 | 3 | FILE *f = fopen(path, "wb"); |
458 | 3 | if (!f) { |
459 | 0 | free(data); |
460 | 0 | return -1; |
461 | 0 | } |
462 | 3 | size_t written = fwrite(data, 1, len, f); |
463 | 3 | fclose(f); |
464 | 3 | free(data); |
465 | 3 | return (written == len) ? 0 : -1; |
466 | 3 | } |
467 | | |
468 | 3 | Chunk *chunk_load(const char *path, char **err) { |
469 | 3 | FILE *f = fopen(path, "rb"); |
470 | 3 | if (!f) { |
471 | 0 | char msg[256]; |
472 | 0 | snprintf(msg, sizeof(msg), "cannot open '%s'", path); |
473 | 0 | *err = strdup(msg); |
474 | 0 | return NULL; |
475 | 0 | } |
476 | | |
477 | 3 | fseek(f, 0, SEEK_END); |
478 | 3 | long flen = ftell(f); |
479 | 3 | fseek(f, 0, SEEK_SET); |
480 | | |
481 | 3 | if (flen < 0) { |
482 | 0 | fclose(f); |
483 | 0 | *err = strdup("cannot determine file size"); |
484 | 0 | return NULL; |
485 | 0 | } |
486 | | |
487 | 3 | size_t len = (size_t)flen; |
488 | 3 | uint8_t *data = malloc(len); |
489 | 3 | size_t n = fread(data, 1, len, f); |
490 | 3 | fclose(f); |
491 | | |
492 | 3 | if (n != len) { |
493 | 0 | free(data); |
494 | 0 | *err = strdup("failed to read file completely"); |
495 | 0 | return NULL; |
496 | 0 | } |
497 | | |
498 | 3 | Chunk *c = chunk_deserialize(data, len, err); |
499 | 3 | free(data); |
500 | 3 | return c; |
501 | 3 | } |
502 | | |
503 | | /* ═══════════════════════════════════════════════════════ |
504 | | * Register VM bytecode serialization (.rlatc) |
505 | | * |
506 | | * Format: RLATC_MAGIC(4) + version(u16) + reserved(u16) |
507 | | * + serialize_regchunk(...) |
508 | | * |
509 | | * RegChunk layout: |
510 | | * code_len(u32) + code(u32[] LE) |
511 | | * lines_len(u32) + lines(u32[]) |
512 | | * const_len(u32) + tagged constants |
513 | | * local_name_cap(u32) + local names |
514 | | * ═══════════════════════════════════════════════════════ */ |
515 | | |
516 | 0 | static void serialize_regchunk(ByteBuf *bb, const RegChunk *c) { |
517 | | /* Instructions (fixed-width u32) */ |
518 | 0 | bb_write_u32_le(bb, (uint32_t)c->code_len); |
519 | 0 | for (size_t i = 0; i < c->code_len; i++) |
520 | 0 | bb_write_u32_le(bb, c->code[i]); |
521 | | |
522 | | /* Line numbers */ |
523 | 0 | bb_write_u32_le(bb, (uint32_t)c->lines_len); |
524 | 0 | for (size_t i = 0; i < c->lines_len; i++) |
525 | 0 | bb_write_u32_le(bb, (uint32_t)c->lines[i]); |
526 | | |
527 | | /* Constants — same tagging as stack VM */ |
528 | 0 | bb_write_u32_le(bb, (uint32_t)c->const_len); |
529 | 0 | for (size_t i = 0; i < c->const_len; i++) { |
530 | 0 | const LatValue *v = &c->constants[i]; |
531 | 0 | switch (v->type) { |
532 | 0 | case VAL_INT: |
533 | 0 | bb_write_u8(bb, TAG_INT); |
534 | 0 | bb_write_i64_le(bb, v->as.int_val); |
535 | 0 | break; |
536 | 0 | case VAL_FLOAT: |
537 | 0 | bb_write_u8(bb, TAG_FLOAT); |
538 | 0 | bb_write_f64_le(bb, v->as.float_val); |
539 | 0 | break; |
540 | 0 | case VAL_BOOL: |
541 | 0 | bb_write_u8(bb, TAG_BOOL); |
542 | 0 | bb_write_u8(bb, v->as.bool_val ? 1 : 0); |
543 | 0 | break; |
544 | 0 | case VAL_STR: { |
545 | 0 | bb_write_u8(bb, TAG_STR); |
546 | 0 | uint32_t slen = (uint32_t)strlen(v->as.str_val); |
547 | 0 | bb_write_u32_le(bb, slen); |
548 | 0 | bb_write_bytes(bb, v->as.str_val, slen); |
549 | 0 | break; |
550 | 0 | } |
551 | 0 | case VAL_NIL: |
552 | 0 | bb_write_u8(bb, TAG_NIL); |
553 | 0 | break; |
554 | 0 | case VAL_UNIT: |
555 | 0 | bb_write_u8(bb, TAG_UNIT); |
556 | 0 | break; |
557 | 0 | case VAL_CLOSURE: |
558 | | /* Compiled sub-chunk: body==NULL, native_fn holds RegChunk* */ |
559 | 0 | if (v->as.closure.body == NULL && v->as.closure.native_fn != NULL) { |
560 | 0 | bb_write_u8(bb, TAG_CLOSURE); |
561 | 0 | bb_write_u32_le(bb, (uint32_t)v->as.closure.param_count); |
562 | 0 | bb_write_u8(bb, v->as.closure.has_variadic ? 1 : 0); |
563 | 0 | serialize_regchunk(bb, (const RegChunk *)v->as.closure.native_fn); |
564 | 0 | } else { |
565 | 0 | bb_write_u8(bb, TAG_NIL); |
566 | 0 | } |
567 | 0 | break; |
568 | 0 | default: |
569 | 0 | bb_write_u8(bb, TAG_NIL); |
570 | 0 | break; |
571 | 0 | } |
572 | 0 | } |
573 | | |
574 | | /* Local names */ |
575 | 0 | bb_write_u32_le(bb, (uint32_t)c->local_name_cap); |
576 | 0 | for (size_t i = 0; i < c->local_name_cap; i++) { |
577 | 0 | if (c->local_names && c->local_names[i]) { |
578 | 0 | bb_write_u8(bb, 1); |
579 | 0 | uint32_t nlen = (uint32_t)strlen(c->local_names[i]); |
580 | 0 | bb_write_u32_le(bb, nlen); |
581 | 0 | bb_write_bytes(bb, c->local_names[i], nlen); |
582 | 0 | } else { |
583 | 0 | bb_write_u8(bb, 0); |
584 | 0 | } |
585 | 0 | } |
586 | 0 | } |
587 | | |
588 | 0 | static RegChunk *deserialize_regchunk(ByteReader *br, char **err) { |
589 | 0 | uint32_t code_len; |
590 | 0 | if (!br_read_u32_le(br, &code_len)) { |
591 | 0 | *err = strdup("truncated: missing code_len"); |
592 | 0 | return NULL; |
593 | 0 | } |
594 | | |
595 | 0 | RegChunk *c = regchunk_new(); |
596 | | |
597 | | /* Instructions */ |
598 | 0 | for (uint32_t i = 0; i < code_len; i++) { |
599 | 0 | uint32_t instr; |
600 | 0 | if (!br_read_u32_le(br, &instr)) { |
601 | 0 | *err = strdup("truncated: incomplete instructions"); |
602 | 0 | regchunk_free(c); |
603 | 0 | return NULL; |
604 | 0 | } |
605 | 0 | regchunk_write(c, instr, 0); |
606 | 0 | } |
607 | | |
608 | | /* Line numbers — overwrite the zeros written by regchunk_write */ |
609 | 0 | uint32_t line_count; |
610 | 0 | if (!br_read_u32_le(br, &line_count)) { |
611 | 0 | *err = strdup("truncated: missing line_count"); |
612 | 0 | regchunk_free(c); |
613 | 0 | return NULL; |
614 | 0 | } |
615 | 0 | if (line_count <= c->lines_len) { |
616 | 0 | for (uint32_t i = 0; i < line_count; i++) { |
617 | 0 | uint32_t line_val; |
618 | 0 | if (!br_read_u32_le(br, &line_val)) { |
619 | 0 | *err = strdup("truncated: incomplete line data"); |
620 | 0 | regchunk_free(c); |
621 | 0 | return NULL; |
622 | 0 | } |
623 | 0 | c->lines[i] = (int)line_val; |
624 | 0 | } |
625 | 0 | } |
626 | | |
627 | | /* Constants */ |
628 | 0 | uint32_t const_count; |
629 | 0 | if (!br_read_u32_le(br, &const_count)) { |
630 | 0 | *err = strdup("truncated: missing const_count"); |
631 | 0 | regchunk_free(c); |
632 | 0 | return NULL; |
633 | 0 | } |
634 | 0 | for (uint32_t i = 0; i < const_count; i++) { |
635 | 0 | uint8_t tag; |
636 | 0 | if (!br_read_u8(br, &tag)) { |
637 | 0 | *err = strdup("truncated: missing constant type tag"); |
638 | 0 | regchunk_free(c); |
639 | 0 | return NULL; |
640 | 0 | } |
641 | 0 | switch (tag) { |
642 | 0 | case TAG_INT: { |
643 | 0 | int64_t val; |
644 | 0 | if (!br_read_i64_le(br, &val)) { *err = strdup("truncated int"); regchunk_free(c); return NULL; } |
645 | 0 | regchunk_add_constant(c, value_int(val)); |
646 | 0 | break; |
647 | 0 | } |
648 | 0 | case TAG_FLOAT: { |
649 | 0 | double val; |
650 | 0 | if (!br_read_f64_le(br, &val)) { *err = strdup("truncated float"); regchunk_free(c); return NULL; } |
651 | 0 | regchunk_add_constant(c, value_float(val)); |
652 | 0 | break; |
653 | 0 | } |
654 | 0 | case TAG_BOOL: { |
655 | 0 | uint8_t val; |
656 | 0 | if (!br_read_u8(br, &val)) { *err = strdup("truncated bool"); regchunk_free(c); return NULL; } |
657 | 0 | regchunk_add_constant(c, value_bool(val != 0)); |
658 | 0 | break; |
659 | 0 | } |
660 | 0 | case TAG_STR: { |
661 | 0 | uint32_t slen; |
662 | 0 | if (!br_read_u32_le(br, &slen)) { *err = strdup("truncated string len"); regchunk_free(c); return NULL; } |
663 | 0 | char *s = malloc(slen + 1); |
664 | 0 | if (!br_read_bytes(br, s, slen)) { free(s); *err = strdup("truncated string data"); regchunk_free(c); return NULL; } |
665 | 0 | s[slen] = '\0'; |
666 | 0 | regchunk_add_constant(c, value_string_owned(s)); |
667 | 0 | break; |
668 | 0 | } |
669 | 0 | case TAG_NIL: |
670 | 0 | regchunk_add_constant(c, value_nil()); |
671 | 0 | break; |
672 | 0 | case TAG_UNIT: |
673 | 0 | regchunk_add_constant(c, value_unit()); |
674 | 0 | break; |
675 | 0 | case TAG_CLOSURE: { |
676 | 0 | uint32_t param_count; |
677 | 0 | uint8_t has_variadic; |
678 | 0 | if (!br_read_u32_le(br, ¶m_count)) { *err = strdup("truncated closure"); regchunk_free(c); return NULL; } |
679 | 0 | if (!br_read_u8(br, &has_variadic)) { *err = strdup("truncated closure"); regchunk_free(c); return NULL; } |
680 | 0 | RegChunk *sub = deserialize_regchunk(br, err); |
681 | 0 | if (!sub) { regchunk_free(c); return NULL; } |
682 | 0 | LatValue fn_val; |
683 | 0 | memset(&fn_val, 0, sizeof(fn_val)); |
684 | 0 | fn_val.type = VAL_CLOSURE; |
685 | 0 | fn_val.phase = VTAG_UNPHASED; |
686 | 0 | fn_val.region_id = (size_t)-1; |
687 | 0 | fn_val.as.closure.param_names = NULL; |
688 | 0 | fn_val.as.closure.param_count = (size_t)param_count; |
689 | 0 | fn_val.as.closure.body = NULL; |
690 | 0 | fn_val.as.closure.captured_env = NULL; |
691 | 0 | fn_val.as.closure.default_values = NULL; |
692 | 0 | fn_val.as.closure.has_variadic = (has_variadic != 0); |
693 | 0 | fn_val.as.closure.native_fn = sub; |
694 | 0 | regchunk_add_constant(c, fn_val); |
695 | 0 | break; |
696 | 0 | } |
697 | 0 | default: { |
698 | 0 | char msg[64]; |
699 | 0 | snprintf(msg, sizeof(msg), "unknown constant type tag: %d", tag); |
700 | 0 | *err = strdup(msg); |
701 | 0 | regchunk_free(c); |
702 | 0 | return NULL; |
703 | 0 | } |
704 | 0 | } |
705 | 0 | } |
706 | | |
707 | | /* Local names */ |
708 | 0 | uint32_t local_name_count; |
709 | 0 | if (!br_read_u32_le(br, &local_name_count)) { |
710 | 0 | *err = strdup("truncated: missing local_name_count"); |
711 | 0 | regchunk_free(c); |
712 | 0 | return NULL; |
713 | 0 | } |
714 | 0 | for (uint32_t i = 0; i < local_name_count; i++) { |
715 | 0 | uint8_t present; |
716 | 0 | if (!br_read_u8(br, &present)) { *err = strdup("truncated local name"); regchunk_free(c); return NULL; } |
717 | 0 | if (present) { |
718 | 0 | uint32_t nlen; |
719 | 0 | if (!br_read_u32_le(br, &nlen)) { *err = strdup("truncated local name len"); regchunk_free(c); return NULL; } |
720 | 0 | char *name = malloc(nlen + 1); |
721 | 0 | if (!br_read_bytes(br, name, nlen)) { free(name); *err = strdup("truncated local name data"); regchunk_free(c); return NULL; } |
722 | 0 | name[nlen] = '\0'; |
723 | 0 | regchunk_set_local_name(c, (size_t)i, name); |
724 | 0 | free(name); |
725 | 0 | } |
726 | 0 | } |
727 | | |
728 | 0 | return c; |
729 | 0 | } |
730 | | |
731 | 0 | uint8_t *regchunk_serialize(const RegChunk *c, size_t *out_len) { |
732 | 0 | ByteBuf bb; |
733 | 0 | bb_init(&bb); |
734 | 0 | bb_write_bytes(&bb, RLATC_MAGIC, 4); |
735 | 0 | bb_write_u16_le(&bb, RLATC_FORMAT); |
736 | 0 | bb_write_u16_le(&bb, 0); |
737 | 0 | serialize_regchunk(&bb, c); |
738 | 0 | *out_len = bb.len; |
739 | 0 | return bb.data; |
740 | 0 | } |
741 | | |
742 | 0 | RegChunk *regchunk_deserialize(const uint8_t *data, size_t len, char **err) { |
743 | 0 | ByteReader br = { data, len, 0 }; |
744 | 0 | if (len < 8) { *err = strdup("file too small for .rlatc header"); return NULL; } |
745 | 0 | if (memcmp(data, RLATC_MAGIC, 4) != 0) { *err = strdup("invalid magic: not a .rlatc file"); return NULL; } |
746 | 0 | br.pos = 4; |
747 | 0 | uint16_t version; |
748 | 0 | if (!br_read_u16_le(&br, &version)) { *err = strdup("truncated version"); return NULL; } |
749 | 0 | if (version != RLATC_FORMAT) { *err = strdup("unsupported .rlatc format version"); return NULL; } |
750 | 0 | uint16_t reserved; |
751 | 0 | br_read_u16_le(&br, &reserved); |
752 | 0 | return deserialize_regchunk(&br, err); |
753 | 0 | } |
754 | | |
755 | 0 | int regchunk_save(const RegChunk *c, const char *path) { |
756 | 0 | size_t len; |
757 | 0 | uint8_t *data = regchunk_serialize(c, &len); |
758 | 0 | if (!data) return -1; |
759 | 0 | FILE *f = fopen(path, "wb"); |
760 | 0 | if (!f) { free(data); return -1; } |
761 | 0 | size_t written = fwrite(data, 1, len, f); |
762 | 0 | fclose(f); |
763 | 0 | free(data); |
764 | 0 | return (written == len) ? 0 : -1; |
765 | 0 | } |
766 | | |
767 | 0 | RegChunk *regchunk_load(const char *path, char **err) { |
768 | 0 | FILE *f = fopen(path, "rb"); |
769 | 0 | if (!f) { |
770 | 0 | char msg[256]; |
771 | 0 | snprintf(msg, sizeof(msg), "cannot open '%s'", path); |
772 | 0 | *err = strdup(msg); |
773 | 0 | return NULL; |
774 | 0 | } |
775 | 0 | fseek(f, 0, SEEK_END); |
776 | 0 | long flen = ftell(f); |
777 | 0 | fseek(f, 0, SEEK_SET); |
778 | 0 | if (flen < 0) { fclose(f); *err = strdup("cannot determine file size"); return NULL; } |
779 | 0 | size_t len = (size_t)flen; |
780 | 0 | uint8_t *data = malloc(len); |
781 | 0 | size_t n = fread(data, 1, len, f); |
782 | 0 | fclose(f); |
783 | 0 | if (n != len) { free(data); *err = strdup("failed to read file"); return NULL; } |
784 | 0 | RegChunk *c = regchunk_deserialize(data, len, err); |
785 | 0 | free(data); |
786 | 0 | return c; |
787 | 0 | } |