/Users/alexjokela/projects/lattice/src/regex_ops.c
Line | Count | Source |
1 | | #include "regex_ops.h" |
2 | | #include <regex.h> |
3 | | #include <stdlib.h> |
4 | | #include <string.h> |
5 | | #include <stdio.h> |
6 | | |
7 | | /* Helper: compile a POSIX extended regex. |
8 | | * Returns 0 on success. On failure, sets *err and returns non-zero. */ |
9 | 36 | static int compile_regex(regex_t *re, const char *pattern, char **err) { |
10 | 36 | int rc = regcomp(re, pattern, REG_EXTENDED); |
11 | 36 | if (rc != 0) { |
12 | 3 | size_t needed = regerror(rc, re, NULL, 0); |
13 | 3 | char *buf = malloc(needed); |
14 | 3 | regerror(rc, re, buf, needed); |
15 | 3 | char *msg = NULL; |
16 | 3 | (void)asprintf(&msg, "regex error: %s", buf); |
17 | 3 | free(buf); |
18 | 3 | *err = msg; |
19 | 3 | return rc; |
20 | 3 | } |
21 | 33 | return 0; |
22 | 36 | } |
23 | | |
24 | | /* ── regex_match ── */ |
25 | | |
26 | 12 | LatValue regex_match(const char *pattern, const char *str, char **err) { |
27 | 12 | regex_t re; |
28 | 12 | if (compile_regex(&re, pattern, err) != 0) { |
29 | 3 | return value_unit(); |
30 | 3 | } |
31 | 9 | int result = regexec(&re, str, 0, NULL, 0); |
32 | 9 | regfree(&re); |
33 | 9 | return value_bool(result == 0); |
34 | 12 | } |
35 | | |
36 | | /* ── regex_find_all ── */ |
37 | | |
38 | 12 | LatValue regex_find_all(const char *pattern, const char *str, char **err) { |
39 | 12 | regex_t re; |
40 | 12 | if (compile_regex(&re, pattern, err) != 0) { |
41 | 0 | return value_unit(); |
42 | 0 | } |
43 | | |
44 | | /* Collect matches into a dynamic array */ |
45 | 12 | size_t cap = 8; |
46 | 12 | size_t len = 0; |
47 | 12 | LatValue *elems = malloc(cap * sizeof(LatValue)); |
48 | | |
49 | 12 | regmatch_t match; |
50 | 12 | const char *cursor = str; |
51 | | |
52 | 33 | while (regexec(&re, cursor, 1, &match, 0) == 0) { |
53 | | /* Guard against zero-length matches to avoid infinite loop */ |
54 | 21 | if (match.rm_so == match.rm_eo) { |
55 | 0 | if (cursor[match.rm_eo] == '\0') break; |
56 | 0 | cursor += match.rm_eo + 1; |
57 | 0 | continue; |
58 | 0 | } |
59 | | |
60 | 21 | size_t match_len = (size_t)(match.rm_eo - match.rm_so); |
61 | 21 | char *substr = malloc(match_len + 1); |
62 | 21 | memcpy(substr, cursor + match.rm_so, match_len); |
63 | 21 | substr[match_len] = '\0'; |
64 | | |
65 | 21 | if (len >= cap) { |
66 | 0 | cap *= 2; |
67 | 0 | elems = realloc(elems, cap * sizeof(LatValue)); |
68 | 0 | } |
69 | 21 | elems[len++] = value_string_owned(substr); |
70 | | |
71 | 21 | cursor += match.rm_eo; |
72 | 21 | } |
73 | | |
74 | 12 | regfree(&re); |
75 | 12 | return value_array(elems, len); |
76 | 12 | } |
77 | | |
78 | | /* ── regex_replace ── */ |
79 | | |
80 | 12 | char *regex_replace(const char *pattern, const char *str, const char *replacement, char **err) { |
81 | 12 | regex_t re; |
82 | 12 | if (compile_regex(&re, pattern, err) != 0) { |
83 | 0 | return NULL; |
84 | 0 | } |
85 | | |
86 | 12 | size_t repl_len = strlen(replacement); |
87 | 12 | size_t result_cap = strlen(str) + 64; |
88 | 12 | size_t result_len = 0; |
89 | 12 | char *result = malloc(result_cap); |
90 | | |
91 | 12 | regmatch_t match; |
92 | 12 | const char *cursor = str; |
93 | | |
94 | 33 | while (regexec(&re, cursor, 1, &match, 0) == 0) { |
95 | | /* Guard against zero-length matches */ |
96 | 21 | if (match.rm_so == match.rm_eo) { |
97 | 0 | if (cursor[match.rm_eo] == '\0') break; |
98 | | /* Copy the character at the match position and advance */ |
99 | 0 | size_t needed = result_len + 1; |
100 | 0 | if (needed >= result_cap) { |
101 | 0 | result_cap = needed * 2; |
102 | 0 | result = realloc(result, result_cap); |
103 | 0 | } |
104 | 0 | result[result_len++] = cursor[match.rm_eo]; |
105 | 0 | cursor += match.rm_eo + 1; |
106 | 0 | continue; |
107 | 0 | } |
108 | | |
109 | | /* Copy prefix (before match) */ |
110 | 21 | size_t prefix_len = (size_t)match.rm_so; |
111 | 21 | size_t needed = result_len + prefix_len + repl_len + 1; |
112 | 21 | if (needed >= result_cap) { |
113 | 0 | result_cap = needed * 2; |
114 | 0 | result = realloc(result, result_cap); |
115 | 0 | } |
116 | 21 | memcpy(result + result_len, cursor, prefix_len); |
117 | 21 | result_len += prefix_len; |
118 | | |
119 | | /* Copy replacement */ |
120 | 21 | memcpy(result + result_len, replacement, repl_len); |
121 | 21 | result_len += repl_len; |
122 | | |
123 | 21 | cursor += match.rm_eo; |
124 | 21 | } |
125 | | |
126 | | /* Copy remainder */ |
127 | 12 | size_t tail_len = strlen(cursor); |
128 | 12 | size_t needed = result_len + tail_len + 1; |
129 | 12 | if (needed >= result_cap) { |
130 | 0 | result_cap = needed; |
131 | 0 | result = realloc(result, result_cap); |
132 | 0 | } |
133 | 12 | memcpy(result + result_len, cursor, tail_len); |
134 | 12 | result_len += tail_len; |
135 | 12 | result[result_len] = '\0'; |
136 | | |
137 | 12 | regfree(&re); |
138 | 12 | return result; |
139 | 12 | } |