From f0c0feff4f559f4631d87e3c797537a2a049cef9 Mon Sep 17 00:00:00 2001 From: Bram Matthys Date: Fri, 5 Jun 2026 09:43:22 +0200 Subject: [PATCH] Set PCRE2 limits explicitly (to more sensible defaults), reported by Link420. --- doc/RELEASE-NOTES.md | 8 +++++++- include/config.h | 10 ++++++++++ include/h.h | 4 +++- src/aliases.c | 2 +- src/conf.c | 1 + src/ircd.c | 1 + src/match.c | 47 ++++++++++++++++++++++++++++++++++++-------- src/modules/tkl.c | 42 +++++++++++++++++++++++++++++++++++---- 8 files changed, 100 insertions(+), 15 deletions(-) diff --git a/doc/RELEASE-NOTES.md b/doc/RELEASE-NOTES.md index c18e209b2..fa175efc7 100644 --- a/doc/RELEASE-NOTES.md +++ b/doc/RELEASE-NOTES.md @@ -7,15 +7,21 @@ This is work in progress and may not always be a stable version. ### Enhancements: ### Changes: +* Spamfilter regexes now use more sensible defaults in terms of "max effort", + similar to what PHP has been using for years. This means very slow regexes + will now raise a `SPAMFILTER_REGEX_ERROR` warning during execution if + this happens (should be extremely rare). ### Fixes: -* Harden the built-in HTTPS client +* Hardening of the built-in HTTPS client ### Developers and protocol: * URL API: The OutgoingWebRequest `max_size` (introduced last release) now also caps file-backed downloads. Default for file-backed when left at 0 is 50MB (`DOWNLOAD_MAX_SIZE_FILE_BACKED`). For memory-backed, it stays at 1MB like in 6.2.5 (`DOWNLOAD_MAX_SIZE_MEMORY_BACKED`). +* The `unreal_match()` function now has a 3rd argument `const char **error` + for communicating regex errors back. Just set to `NULL` if you don't care. * If you do something to a user that would (potentially) move the user from `unknown-users` to `known-users` (or vice versa) then you should call `update_known_user_cache(client);` to update the known users cache. diff --git a/include/config.h b/include/config.h index 36b888917..ae259e791 100644 --- a/include/config.h +++ b/include/config.h @@ -238,6 +238,16 @@ #define SPAMFILTER_DETECTSLOW #endif +/* Limits for PCRE2 regex matching (eg. spamfilter, badwords). A regex that + * exceeds these is aborted and treated as no match, instead of running + * unbounded. The match limit is honoured by JIT. The depth limit only applies + * to the non-JIT interpreter, since PCRE2 ignores it under JIT. + * We use the same defaults that PHP has been using for a long time (which is + * actually 10 times lower than PCRE2 defaults, as of 2026). + */ +#define UNREAL_PCRE2_MATCH_LIMIT 1000000 +#define UNREAL_PCRE2_DEPTH_LIMIT 100000 + /* Maximum number of ModData objects that may be attached to an object */ /* UnrealIRCd 4.0.0: 8, 8, 4, 4 * UnrealIRCd 4.0.14: 12, 8, 4, 4 diff --git a/include/h.h b/include/h.h index 233707863..85c074631 100644 --- a/include/h.h +++ b/include/h.h @@ -1091,9 +1091,11 @@ extern void read_packet(int fd, int revents, void *data); extern int process_packet(Client *cptr, char *readbuf, int length, int killsafely); extern int parse_chanmode(ParseMode *pm, const char *modebuf_in, const char *parabuf_in); extern int dead_socket(Client *to, const char *notice); +extern MODVAR pcre2_match_context *unreal_pcre2_match_ctx; +extern void init_match(void); extern Match *unreal_create_match(MatchType type, const char *str, char **error); extern void unreal_delete_match(Match *m); -extern int unreal_match(Match *m, const char *str); +extern int unreal_match(Match *m, const char *str, const char **error); extern int unreal_match_method_strtoval(const char *str); extern char *unreal_match_method_valtostr(int val); #ifdef _WIN32 diff --git a/src/aliases.c b/src/aliases.c index 6b6390498..b66154f6f 100644 --- a/src/aliases.c +++ b/src/aliases.c @@ -143,7 +143,7 @@ void cmd_alias(ClientContext *clictx, Client *client, MessageTag *mtags, int par for (format = alias->format; format; format = format->next) { - if (unreal_match(format->expr, ptr)) + if (unreal_match(format->expr, ptr, NULL)) { /* Parse the parameters */ int i = 0, j = 0, k = 1; diff --git a/src/conf.c b/src/conf.c index 0e51e0867..0c91ec64e 100644 --- a/src/conf.c +++ b/src/conf.c @@ -1906,6 +1906,7 @@ void config_setdefaultsettings(Configuration *i) add_log_throttle_config(&i->log_throttle, "BUG_CT_BUCKET_MISSING", 5, 60, 0); add_log_throttle_config(&i->log_throttle, "BUG_CT_NEGATIVE_COUNTER", 5, 60, 0); add_log_throttle_config(&i->log_throttle, "BUG_DECREASE_IPUSERS_BUCKET", 5, 60, 0); + add_log_throttle_config(&i->log_throttle, "SPAMFILTER_REGEX_ERROR", 5, 60, 0); /* TLS options */ i->tls_options = safe_alloc(sizeof(TLSOptions)); diff --git a/src/ircd.c b/src/ircd.c index 74b9b27ba..b40ec8606 100644 --- a/src/ircd.c +++ b/src/ircd.c @@ -547,6 +547,7 @@ int InitUnrealIRCd(int argc, char *argv[]) init_hash(); log_throttle_init(); + init_match(); SetupEvents(); diff --git a/src/match.c b/src/match.c index 7b6e16440..36e55c4b4 100644 --- a/src/match.c +++ b/src/match.c @@ -35,6 +35,8 @@ u_char touppertab[], tolowertab[]; #define tolowertab2 tolowertab #define lc(x) tolowertab2[x] +pcre2_match_context *unreal_pcre2_match_ctx = NULL; + /* Match routine for special cases where escaping is needed in a normal fashion. * Checks a string ('name') against a globbing(+more) pattern ('mask'). * Original by Douglas A Lewis (dalewis@acsu.buffalo.edu). @@ -369,6 +371,17 @@ u_char char_atribs[] = { /* f0-ff */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; +/* Set up global match state. Called once at startup. */ +void init_match(void) +{ + unreal_pcre2_match_ctx = pcre2_match_context_create(NULL); + if (unreal_pcre2_match_ctx) + { + pcre2_set_match_limit(unreal_pcre2_match_ctx, UNREAL_PCRE2_MATCH_LIMIT); + pcre2_set_depth_limit(unreal_pcre2_match_ctx, UNREAL_PCRE2_DEPTH_LIMIT); + } +} + /** Free up all resources of an Match entry (including the struct itself). * NOTE: this function may (also) be called for Match structs that have only been * setup half-way, so use special care when accessing members (NULL checks!) @@ -441,28 +454,46 @@ Match *unreal_create_match(MatchType type, const char *str, char **error) } /** Try to match an Match entry ('m') against a string ('str'). + * @param error If non-NULL, set to an error string when the regex could not + * complete (eg. a resource limit was hit), or to NULL otherwise. + * Points to a static buffer, valid until the next unreal_match(). * @returns 1 if matched, 0 if not. * @note These (more logical) return values are opposite to the match_simple() function. */ -int unreal_match(Match *m, const char *str) +int unreal_match(Match *m, const char *str, const char **error) { + static char errbuf[256]; + + if (error) + *error = NULL; + if (m->type == MATCH_SIMPLE) { if (match_simple(m->str, str)) return 1; return 0; } - + if (m->type == MATCH_PCRE_REGEX) { pcre2_match_data *md = pcre2_match_data_create(9, NULL); int ret; - - ret = pcre2_match(m->ext.pcre2_expr, str, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL); /* run the regex */ + + ret = pcre2_match(m->ext.pcre2_expr, str, PCRE2_ZERO_TERMINATED, 0, 0, md, unreal_pcre2_match_ctx); /* run the regex */ pcre2_match_data_free(md); /* yeah, we never use it. unfortunately argument must be non-NULL for pcre2_match() */ - + if (ret > 0) - return 1; /* MATCH */ + return 1; /* MATCH */ + + if (error && (ret < 0) && (ret != PCRE2_ERROR_NOMATCH) && (ret != PCRE2_ERROR_PARTIAL)) + { + /* Regex did not finish (eg. hit the match, depth or JIT stack limit). + * Report it so the caller can warn. We still return no-match. + */ + *errbuf = '\0'; + pcre2_get_error_message(ret, errbuf, sizeof(errbuf)); + *error = errbuf; + } return 0; /* NO MATCH */ } @@ -681,7 +712,7 @@ const char *stripbadwords(const char *str, ConfigItem_badword *start_bw, int *bl pcre2_match_data *md = pcre2_match_data_create(9, NULL); int ret; - ret = pcre2_match(this_word->pcre2_expr, cleanstr, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL); /* run the regex */ + ret = pcre2_match(this_word->pcre2_expr, cleanstr, PCRE2_ZERO_TERMINATED, 0, 0, md, unreal_pcre2_match_ctx); /* run the regex */ pcre2_match_data_free(md); /* yeah, we never use it. unfortunately argument must be non-NULL for pcre2_match() */ if (ret > 0) { @@ -702,7 +733,7 @@ const char *stripbadwords(const char *str, ConfigItem_badword *start_bw, int *bl /* ^^ we need to free 'md' in ALL circumstances. * remember this if you break or continue in this loop! */ - ret = pcre2_match(this_word->pcre2_expr, ptr, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL); /* run the regex */ + ret = pcre2_match(this_word->pcre2_expr, ptr, PCRE2_ZERO_TERMINATED, 0, 0, md, unreal_pcre2_match_ctx); /* run the regex */ if (ret > 0) { dd = pcre2_get_ovector_pointer(md); diff --git a/src/modules/tkl.c b/src/modules/tkl.c index 38e7a7059..d144aba15 100644 --- a/src/modules/tkl.c +++ b/src/modules/tkl.c @@ -1281,6 +1281,29 @@ char *spamfilter_id(TKL *tk) return buf; } +/* Warn opers when a spamfilter regex could not finish (eg. it hit the + * PCRE2 match or depth limit). The match is treated as no-match, so we + * only warn and do not remove the spamfilter. + */ +static void spamfilter_regex_error(TKL *tkl, const char *regex_error) +{ + if (tkl->type & TKL_GLOBAL) + { + unreal_log(ULOG_WARNING, "tkl", "SPAMFILTER_REGEX_ERROR", NULL, + "[Spamfilter] Regex aborted ($regex_error) for '$tkl'. Possibly too complex regex? " + "To delete, use: /SPAMFILTER del $spamfilter_id", + log_data_string("regex_error", regex_error), + log_data_string("spamfilter_id", spamfilter_id(tkl)), + log_data_tkl("tkl", tkl)); + } else { + unreal_log(ULOG_WARNING, "tkl", "SPAMFILTER_REGEX_ERROR", NULL, + "[Spamfilter] Regex aborted ($regex_error) for '$tkl'. Possibly too complex regex? " + "To remove it, edit your config file", + log_data_string("regex_error", regex_error), + log_data_tkl("tkl", tkl)); + } +} + int tkl_ip_change(Client *client, const char *oldip) { TKL *tkl; @@ -3774,9 +3797,15 @@ int spamfilter_check_users(TKL *tkl) { if (MyUser(client)) { + const char *regex_error = NULL; + spamfilter_build_user_string(spamfilter_user, client->name, client); - if (!unreal_match(tkl->ptr.spamfilter->match, spamfilter_user)) + if (!unreal_match(tkl->ptr.spamfilter->match, spamfilter_user, ®ex_error)) + { + if (regex_error) + spamfilter_regex_error(tkl, regex_error); continue; /* No match */ + } /* matched! */ unreal_log(ULOG_INFO, "tkl", "SPAMFILTER_MATCH", client, @@ -5589,6 +5618,8 @@ int _match_spamfilter(Client *client, const char *str_in, int target, const char if (tkl->ptr.spamfilter->match && (tkl->ptr.spamfilter->match->type != MATCH_NONE)) { + const char *regex_error = NULL; + #ifdef SPAMFILTER_DETECTSLOW if (tkl->ptr.spamfilter->match->type == MATCH_PCRE_REGEX) { @@ -5600,11 +5631,11 @@ int _match_spamfilter(Client *client, const char *str_in, int target, const char #endif if (tkl->ptr.spamfilter->input_conversion == INPUT_CONVERSION_STRIP_CONTROL_CODES) - ret = unreal_match(tkl->ptr.spamfilter->match, str); /* StripControlCodes() */ + ret = unreal_match(tkl->ptr.spamfilter->match, str, ®ex_error); /* StripControlCodes() */ else if (tkl->ptr.spamfilter->input_conversion == INPUT_CONVERSION_CONFUSABLES) - ret = unreal_match(tkl->ptr.spamfilter->match, str_deconfused ? str_deconfused : str); /* utf8_convert_confusables(), with fallback */ + ret = unreal_match(tkl->ptr.spamfilter->match, str_deconfused ? str_deconfused : str, ®ex_error); /* utf8_convert_confusables(), with fallback */ else - ret = unreal_match(tkl->ptr.spamfilter->match, str_in); /* raw */ + ret = unreal_match(tkl->ptr.spamfilter->match, str_in, ®ex_error); /* raw */ #ifdef SPAMFILTER_DETECTSLOW if (tkl->ptr.spamfilter->match->type == MATCH_PCRE_REGEX) @@ -5633,6 +5664,9 @@ int _match_spamfilter(Client *client, const char *str_in, int target, const char } } #endif + + if (regex_error) + spamfilter_regex_error(tkl, regex_error); } else { /* There is no ::match but there was a ::rule, and that is enough for a match.. */ if (tkl->ptr.spamfilter->rule)