NeoMutt  2024-10-02-37-gfa9146
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
regex.c
Go to the documentation of this file.
1
33#include "config.h"
34#include <ctype.h>
35#include <regex.h>
36#include <stdbool.h>
37#include <stdint.h>
38#include <stdio.h>
39#include <stdlib.h>
40#include "config/types.h"
41#include "atoi.h"
42#include "buffer.h"
43#include "logging2.h"
44#include "mbyte.h"
45#include "memory.h"
46#include "message.h"
47#include "pool.h"
48#include "queue.h"
49#include "regex3.h"
50#include "string2.h"
51
59struct Regex *mutt_regex_compile(const char *str, uint16_t flags)
60{
61 if (!str || (*str == '\0'))
62 return NULL;
63 struct Regex *rx = mutt_mem_calloc(1, sizeof(struct Regex));
64 rx->pattern = mutt_str_dup(str);
65 rx->regex = mutt_mem_calloc(1, sizeof(regex_t));
66 if (REG_COMP(rx->regex, str, flags) != 0)
67 mutt_regex_free(&rx);
68
69 return rx;
70}
71
80struct Regex *mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
81{
82 if (!str || (*str == '\0'))
83 return NULL;
84
85 uint16_t rflags = 0;
86 struct Regex *reg = mutt_mem_calloc(1, sizeof(struct Regex));
87
88 reg->regex = mutt_mem_calloc(1, sizeof(regex_t));
89 reg->pattern = mutt_str_dup(str);
90
91 /* Should we use smart case matching? */
92 if (((flags & D_REGEX_MATCH_CASE) == 0) && mutt_mb_is_lower(str))
93 rflags |= REG_ICASE;
94
95 /* Is a prefix of '!' allowed? */
96 if (((flags & D_REGEX_ALLOW_NOT) != 0) && (str[0] == '!'))
97 {
98 reg->pat_not = true;
99 str++;
100 }
101
102 int rc = REG_COMP(reg->regex, str, rflags);
103 if (rc != 0)
104 {
105 if (err)
106 regerror(rc, reg->regex, err->data, err->dsize);
107 mutt_regex_free(&reg);
108 return NULL;
109 }
110
111 return reg;
112}
113
118void mutt_regex_free(struct Regex **ptr)
119{
120 if (!ptr || !*ptr)
121 return;
122
123 struct Regex *rx = *ptr;
124 FREE(&rx->pattern);
125 if (rx->regex)
126 regfree(rx->regex);
127 FREE(&rx->regex);
128 FREE(ptr);
129}
130
140int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags,
141 struct Buffer *err)
142{
143 if (!rl || !str || (*str == '\0'))
144 return 0;
145
146 struct Regex *rx = mutt_regex_compile(str, flags);
147 if (!rx)
148 {
149 buf_printf(err, "Bad regex: %s\n", str);
150 return -1;
151 }
152
153 /* check to make sure the item is not already on this rl */
154 struct RegexNode *np = NULL;
155 STAILQ_FOREACH(np, rl, entries)
156 {
157 if (mutt_istr_equal(rx->pattern, np->regex->pattern))
158 break; /* already on the rl */
159 }
160
161 if (np)
162 {
163 mutt_regex_free(&rx);
164 }
165 else
166 {
167 np = mutt_regexlist_new();
168 np->regex = rx;
169 STAILQ_INSERT_TAIL(rl, np, entries);
170 }
171
172 return 0;
173}
174
179void mutt_regexlist_free(struct RegexList *rl)
180{
181 if (!rl)
182 return;
183
184 struct RegexNode *np = NULL, *tmp = NULL;
185 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
186 {
187 STAILQ_REMOVE(rl, np, RegexNode, entries);
189 FREE(&np);
190 }
191 STAILQ_INIT(rl);
192}
193
200bool mutt_regexlist_match(struct RegexList *rl, const char *str)
201{
202 if (!rl || !str)
203 return false;
204 struct RegexNode *np = NULL;
205 STAILQ_FOREACH(np, rl, entries)
206 {
207 if (mutt_regex_match(np->regex, str))
208 {
209 mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
210 return true;
211 }
212 }
213
214 return false;
215}
216
222{
223 return mutt_mem_calloc(1, sizeof(struct RegexNode));
224}
225
235int mutt_regexlist_remove(struct RegexList *rl, const char *str)
236{
237 if (!rl || !str)
238 return -1;
239
240 if (mutt_str_equal("*", str))
241 {
242 mutt_regexlist_free(rl); /* "unCMD *" means delete all current entries */
243 return 0;
244 }
245
246 int rc = -1;
247 struct RegexNode *np = NULL, *tmp = NULL;
248 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
249 {
250 if (mutt_istr_equal(str, np->regex->pattern))
251 {
252 STAILQ_REMOVE(rl, np, RegexNode, entries);
254 FREE(&np);
255 rc = 0;
256 }
257 }
258
259 return rc;
260}
261
271int mutt_replacelist_add(struct ReplaceList *rl, const char *pat,
272 const char *templ, struct Buffer *err)
273{
274 if (!rl || !pat || (*pat == '\0') || !templ)
275 return 0;
276
277 struct Regex *rx = mutt_regex_compile(pat, REG_ICASE);
278 if (!rx)
279 {
280 buf_printf(err, _("Bad regex: %s"), pat);
281 return -1;
282 }
283
284 /* check to make sure the item is not already on this rl */
285 struct Replace *np = NULL;
286 STAILQ_FOREACH(np, rl, entries)
287 {
288 if (mutt_istr_equal(rx->pattern, np->regex->pattern))
289 {
290 /* Already on the rl. Formerly we just skipped this case, but
291 * now we're supporting removals, which means we're supporting
292 * re-adds conceptually. So we probably want this to imply a
293 * removal, then do an add. We can achieve the removal by freeing
294 * the template, and leaving t pointed at the current item. */
295 FREE(&np->templ);
296 break;
297 }
298 }
299
300 /* If np is set, it's pointing into an extant ReplaceList* that we want to
301 * update. Otherwise we want to make a new one to link at the rl's end. */
302 if (np)
303 {
304 mutt_regex_free(&rx);
305 }
306 else
307 {
309 np->regex = rx;
310 rx = NULL;
311 STAILQ_INSERT_TAIL(rl, np, entries);
312 }
313
314 /* Now np is the Replace that we want to modify. It is prepared. */
315 np->templ = mutt_str_dup(templ);
316
317 /* Find highest match number in template string */
318 np->nmatch = 0;
319 for (const char *p = templ; *p;)
320 {
321 if (*p == '%')
322 {
323 int n = 0;
324 const char *end = mutt_str_atoi(++p, &n);
325 if (!end)
326 {
327 // this is not an error, we might have matched %R or %L in subjectrx
328 mutt_debug(LL_DEBUG2, "Invalid match number in replacelist: '%s'\n", p);
329 }
330 if (n > np->nmatch)
331 {
332 np->nmatch = n;
333 }
334 if (end)
335 {
336 p = end;
337 }
338 else
339 {
340 p++;
341 }
342 }
343 else
344 {
345 p++;
346 }
347 }
348
349 if (np->nmatch > np->regex->regex->re_nsub)
350 {
351 if (err)
352 buf_addstr(err, _("Not enough subexpressions for template"));
354 return -1;
355 }
356
357 np->nmatch++; /* match 0 is always the whole expr */
358 return 0;
359}
360
369char *mutt_replacelist_apply(struct ReplaceList *rl, const char *str)
370{
371 if (!rl || !str || (*str == '\0'))
372 return NULL;
373
374 static regmatch_t *pmatch = NULL;
375 static size_t nmatch = 0;
376 char *p = NULL;
377
378 struct Buffer *src = buf_pool_get();
379 struct Buffer *dst = buf_pool_get();
380
381 buf_strcpy(src, str);
382
383 struct Replace *np = NULL;
384 STAILQ_FOREACH(np, rl, entries)
385 {
386 /* If this pattern needs more matches, expand pmatch. */
387 if (np->nmatch > nmatch)
388 {
389 mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
390 nmatch = np->nmatch;
391 }
392
393 if (mutt_regex_capture(np->regex, buf_string(src), np->nmatch, pmatch))
394 {
395 mutt_debug(LL_DEBUG5, "%s matches %s\n", buf_string(src), np->regex->pattern);
396
397 buf_reset(dst);
398 if (np->templ)
399 {
400 for (p = np->templ; *p;)
401 {
402 if (*p == '%')
403 {
404 p++;
405 if (*p == 'L')
406 {
407 p++;
408 buf_addstr_n(dst, buf_string(src), pmatch[0].rm_so);
409 }
410 else if (*p == 'R')
411 {
412 p++;
413 buf_addstr(dst, src->data + pmatch[0].rm_eo);
414 }
415 else
416 {
417 long n = strtoul(p, &p, 10); /* get subst number */
418 if (n < np->nmatch)
419 {
420 buf_addstr_n(dst, src->data + pmatch[n].rm_so,
421 pmatch[n].rm_eo - pmatch[n].rm_so);
422 }
423 while (isdigit((unsigned char) *p)) /* skip subst token */
424 p++;
425 }
426 }
427 else
428 {
429 buf_addch(dst, *p++);
430 }
431 }
432 }
433
434 buf_strcpy(src, buf_string(dst));
435 mutt_debug(LL_DEBUG5, "subst %s\n", buf_string(dst));
436 }
437 }
438
439 char *result = buf_strdup(src);
440
441 buf_pool_release(&src);
442 buf_pool_release(&dst);
443 return result;
444}
445
450void mutt_replacelist_free(struct ReplaceList *rl)
451{
452 if (!rl)
453 return;
454
455 struct Replace *np = NULL, *tmp = NULL;
456 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
457 {
458 STAILQ_REMOVE(rl, np, Replace, entries);
460 FREE(&np->templ);
461 FREE(&np);
462 }
463}
464
478bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
479{
480 if (!rl || !buf || !str)
481 return false;
482
483 static regmatch_t *pmatch = NULL;
484 static size_t nmatch = 0;
485 int tlen = 0;
486 char *p = NULL;
487
488 struct Replace *np = NULL;
489 STAILQ_FOREACH(np, rl, entries)
490 {
491 /* If this pattern needs more matches, expand pmatch. */
492 if (np->nmatch > nmatch)
493 {
494 mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
495 nmatch = np->nmatch;
496 }
497
498 /* Does this pattern match? */
499 if (mutt_regex_capture(np->regex, str, (size_t) np->nmatch, pmatch))
500 {
501 mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
502 mutt_debug(LL_DEBUG5, "%d subs\n", (int) np->regex->regex->re_nsub);
503
504 /* Copy template into buf, with substitutions. */
505 for (p = np->templ; *p && (tlen < (buflen - 1));)
506 {
507 /* backreference to pattern match substring, eg. %1, %2, etc) */
508 if (*p == '%')
509 {
510 char *e = NULL; /* used as pointer to end of integer backreference in strtol() call */
511
512 p++; /* skip over % char */
513 long n = strtol(p, &e, 10);
514 /* Ensure that the integer conversion succeeded (e!=p) and bounds check. The upper bound check
515 * should not strictly be necessary since add_to_spam_list() finds the largest value, and
516 * the static array above is always large enough based on that value. */
517 if ((e != p) && (n >= 0) && (n < np->nmatch) && (pmatch[n].rm_so != -1))
518 {
519 /* copy as much of the substring match as will fit in the output buffer, saving space for
520 * the terminating nul char */
521 for (int idx = pmatch[n].rm_so;
522 (idx < pmatch[n].rm_eo) && (tlen < (buflen - 1)); idx++)
523 {
524 buf[tlen++] = str[idx];
525 }
526 }
527 p = e; /* skip over the parsed integer */
528 }
529 else
530 {
531 buf[tlen++] = *p++;
532 }
533 }
534 /* tlen should always be less than buflen except when buflen<=0
535 * because the bounds checks in the above code leave room for the
536 * terminal nul char. This should avoid returning an unterminated
537 * string to the caller. When buflen<=0 we make no assumption about
538 * the validity of the buf pointer. */
539 if (tlen < buflen)
540 {
541 buf[tlen] = '\0';
542 mutt_debug(LL_DEBUG5, "\"%s\"\n", buf);
543 }
544 return true;
545 }
546 }
547
548 return false;
549}
550
556{
557 return mutt_mem_calloc(1, sizeof(struct Replace));
558}
559
566int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
567{
568 if (!rl || !pat)
569 return 0;
570
571 int nremoved = 0;
572 struct Replace *np = NULL, *tmp = NULL;
573 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
574 {
575 if (mutt_str_equal(np->regex->pattern, pat))
576 {
577 STAILQ_REMOVE(rl, np, Replace, entries);
579 FREE(&np->templ);
580 FREE(&np);
581 nremoved++;
582 }
583 }
584
585 return nremoved;
586}
587
597bool mutt_regex_capture(const struct Regex *regex, const char *str,
598 size_t nmatch, regmatch_t matches[])
599{
600 if (!regex || !str || !regex->regex)
601 return false;
602
603 int rc = regexec(regex->regex, str, nmatch, matches, 0);
604 return ((rc == 0) ^ regex->pat_not);
605}
606
614bool mutt_regex_match(const struct Regex *regex, const char *str)
615{
616 return mutt_regex_capture(regex, str, 0, NULL);
617}
const char * mutt_str_atoi(const char *str, int *dst)
Convert ASCII string to an integer.
Definition: atoi.c:188
Parse a number in a string.
int buf_printf(struct Buffer *buf, const char *fmt,...)
Format a string overwriting a Buffer.
Definition: buffer.c:161
size_t buf_addstr_n(struct Buffer *buf, const char *s, size_t len)
Add a string to a Buffer, expanding it if necessary.
Definition: buffer.c:96
void buf_reset(struct Buffer *buf)
Reset an existing Buffer.
Definition: buffer.c:76
size_t buf_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition: buffer.c:241
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:226
size_t buf_strcpy(struct Buffer *buf, const char *s)
Copy a string into a Buffer.
Definition: buffer.c:395
char * buf_strdup(const struct Buffer *buf)
Copy a Buffer's string.
Definition: buffer.c:571
General purpose object for storing and parsing strings.
static const char * buf_string(const struct Buffer *buf)
Convert a buffer to a const char * "string".
Definition: buffer.h:96
#define mutt_debug(LEVEL,...)
Definition: logging2.h:89
Logging Dispatcher.
@ LL_DEBUG5
Log at debug level 5.
Definition: logging2.h:47
@ LL_DEBUG2
Log at debug level 2.
Definition: logging2.h:44
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition: mbyte.c:354
Multi-byte String manipulation functions.
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:51
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:115
Memory management wrappers.
#define FREE(x)
Definition: memory.h:45
Message logging.
#define _(a)
Definition: message.h:28
int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
Remove a pattern from a list.
Definition: regex.c:566
struct Regex * mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
Create an Regex from a string.
Definition: regex.c:80
struct RegexNode * mutt_regexlist_new(void)
Create a new RegexList.
Definition: regex.c:221
struct Regex * mutt_regex_compile(const char *str, uint16_t flags)
Create an Regex from a string.
Definition: regex.c:59
void mutt_regexlist_free(struct RegexList *rl)
Free a RegexList object.
Definition: regex.c:179
int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags, struct Buffer *err)
Compile a regex string and add it to a list.
Definition: regex.c:140
bool mutt_regex_capture(const struct Regex *regex, const char *str, size_t nmatch, regmatch_t matches[])
Match a regex against a string, with provided options.
Definition: regex.c:597
void mutt_replacelist_free(struct ReplaceList *rl)
Free a ReplaceList object.
Definition: regex.c:450
int mutt_regexlist_remove(struct RegexList *rl, const char *str)
Remove a Regex from a list.
Definition: regex.c:235
bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Does a string match a pattern?
Definition: regex.c:478
char * mutt_replacelist_apply(struct ReplaceList *rl, const char *str)
Apply replacements to a buffer.
Definition: regex.c:369
struct Replace * mutt_replacelist_new(void)
Create a new ReplaceList.
Definition: regex.c:555
int mutt_replacelist_add(struct ReplaceList *rl, const char *pat, const char *templ, struct Buffer *err)
Add a pattern and a template to a list.
Definition: regex.c:271
bool mutt_regexlist_match(struct RegexList *rl, const char *str)
Does a string match any Regex in the list?
Definition: regex.c:200
void mutt_regex_free(struct Regex **ptr)
Free a Regex object.
Definition: regex.c:118
bool mutt_regex_match(const struct Regex *regex, const char *str)
Shorthand to mutt_regex_capture()
Definition: regex.c:614
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:672
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:253
bool mutt_str_equal(const char *a, const char *b)
Compare two strings.
Definition: string.c:660
struct Buffer * buf_pool_get(void)
Get a Buffer from the pool.
Definition: pool.c:81
void buf_pool_release(struct Buffer **ptr)
Return a Buffer to the pool.
Definition: pool.c:94
A global pool of Buffers.
#define STAILQ_REMOVE(head, elm, type, field)
Definition: queue.h:402
#define STAILQ_INIT(head)
Definition: queue.h:372
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:352
#define STAILQ_INSERT_TAIL(head, elm, field)
Definition: queue.h:389
#define STAILQ_FOREACH_SAFE(var, head, field, tvar)
Definition: queue.h:362
Manage regular expressions.
#define REG_COMP(preg, regex, cflags)
Compile a regular expression.
Definition: regex3.h:50
String manipulation functions.
String manipulation buffer.
Definition: buffer.h:36
size_t dsize
Length of data.
Definition: buffer.h:39
char * data
Pointer to data.
Definition: buffer.h:37
List of regular expressions.
Definition: regex3.h:96
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:97
Cached regular expression.
Definition: regex3.h:86
char * pattern
printable version
Definition: regex3.h:87
bool pat_not
do not match
Definition: regex3.h:89
regex_t * regex
compiled expression
Definition: regex3.h:88
List of regular expressions.
Definition: regex3.h:106
char * templ
Template to match.
Definition: regex3.h:109
size_t nmatch
Match the 'nth' occurrence (0 means the whole expression)
Definition: regex3.h:108
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:107
Constants for all the config types.
#define D_REGEX_ALLOW_NOT
Regex can begin with '!'.
Definition: types.h:107
#define D_REGEX_MATCH_CASE
Case-sensitive matching.
Definition: types.h:106