1
0
mirror of https://github.com/unrealircd/unrealircd.git synced 2026-06-12 17:34:46 +02:00

Set PCRE2 limits explicitly (to more sensible defaults), reported by Link420.

This commit is contained in:
Bram Matthys
2026-06-05 09:43:22 +02:00
parent caa01c9c8c
commit f0c0feff4f
8 changed files with 100 additions and 15 deletions
+7 -1
View File
@@ -7,15 +7,21 @@ This is work in progress and may not always be a stable version.
### Enhancements:
### Changes:
* Spamfilter regexes now use more sensible defaults in terms of "max effort",
similar to what PHP has been using for years. This means very slow regexes
will now raise a `SPAMFILTER_REGEX_ERROR` warning during execution if
this happens (should be extremely rare).
### Fixes:
* Harden the built-in HTTPS client
* Hardening of the built-in HTTPS client
### Developers and protocol:
* URL API: The OutgoingWebRequest `max_size` (introduced last release) now
also caps file-backed downloads. Default for file-backed when left at 0
is 50MB (`DOWNLOAD_MAX_SIZE_FILE_BACKED`). For memory-backed, it stays
at 1MB like in 6.2.5 (`DOWNLOAD_MAX_SIZE_MEMORY_BACKED`).
* The `unreal_match()` function now has a 3rd argument `const char **error`
for communicating regex errors back. Just set to `NULL` if you don't care.
* If you do something to a user that would (potentially) move the user from
`unknown-users` to `known-users` (or vice versa) then you should call
`update_known_user_cache(client);` to update the known users cache.
+10
View File
@@ -238,6 +238,16 @@
#define SPAMFILTER_DETECTSLOW
#endif
/* Limits for PCRE2 regex matching (eg. spamfilter, badwords). A regex that
* exceeds these is aborted and treated as no match, instead of running
* unbounded. The match limit is honoured by JIT. The depth limit only applies
* to the non-JIT interpreter, since PCRE2 ignores it under JIT.
* We use the same defaults that PHP has been using for a long time (which is
* actually 10 times lower than PCRE2 defaults, as of 2026).
*/
#define UNREAL_PCRE2_MATCH_LIMIT 1000000
#define UNREAL_PCRE2_DEPTH_LIMIT 100000
/* Maximum number of ModData objects that may be attached to an object */
/* UnrealIRCd 4.0.0: 8, 8, 4, 4
* UnrealIRCd 4.0.14: 12, 8, 4, 4
+3 -1
View File
@@ -1091,9 +1091,11 @@ extern void read_packet(int fd, int revents, void *data);
extern int process_packet(Client *cptr, char *readbuf, int length, int killsafely);
extern int parse_chanmode(ParseMode *pm, const char *modebuf_in, const char *parabuf_in);
extern int dead_socket(Client *to, const char *notice);
extern MODVAR pcre2_match_context *unreal_pcre2_match_ctx;
extern void init_match(void);
extern Match *unreal_create_match(MatchType type, const char *str, char **error);
extern void unreal_delete_match(Match *m);
extern int unreal_match(Match *m, const char *str);
extern int unreal_match(Match *m, const char *str, const char **error);
extern int unreal_match_method_strtoval(const char *str);
extern char *unreal_match_method_valtostr(int val);
#ifdef _WIN32
+1 -1
View File
@@ -143,7 +143,7 @@ void cmd_alias(ClientContext *clictx, Client *client, MessageTag *mtags, int par
for (format = alias->format; format; format = format->next)
{
if (unreal_match(format->expr, ptr))
if (unreal_match(format->expr, ptr, NULL))
{
/* Parse the parameters */
int i = 0, j = 0, k = 1;
+1
View File
@@ -1906,6 +1906,7 @@ void config_setdefaultsettings(Configuration *i)
add_log_throttle_config(&i->log_throttle, "BUG_CT_BUCKET_MISSING", 5, 60, 0);
add_log_throttle_config(&i->log_throttle, "BUG_CT_NEGATIVE_COUNTER", 5, 60, 0);
add_log_throttle_config(&i->log_throttle, "BUG_DECREASE_IPUSERS_BUCKET", 5, 60, 0);
add_log_throttle_config(&i->log_throttle, "SPAMFILTER_REGEX_ERROR", 5, 60, 0);
/* TLS options */
i->tls_options = safe_alloc(sizeof(TLSOptions));
+1
View File
@@ -547,6 +547,7 @@ int InitUnrealIRCd(int argc, char *argv[])
init_hash();
log_throttle_init();
init_match();
SetupEvents();
+35 -4
View File
@@ -35,6 +35,8 @@ u_char touppertab[], tolowertab[];
#define tolowertab2 tolowertab
#define lc(x) tolowertab2[x]
pcre2_match_context *unreal_pcre2_match_ctx = NULL;
/* Match routine for special cases where escaping is needed in a normal fashion.
* Checks a string ('name') against a globbing(+more) pattern ('mask').
* Original by Douglas A Lewis (dalewis@acsu.buffalo.edu).
@@ -369,6 +371,17 @@ u_char char_atribs[] = {
/* f0-ff */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/* Set up global match state. Called once at startup. */
void init_match(void)
{
unreal_pcre2_match_ctx = pcre2_match_context_create(NULL);
if (unreal_pcre2_match_ctx)
{
pcre2_set_match_limit(unreal_pcre2_match_ctx, UNREAL_PCRE2_MATCH_LIMIT);
pcre2_set_depth_limit(unreal_pcre2_match_ctx, UNREAL_PCRE2_DEPTH_LIMIT);
}
}
/** Free up all resources of an Match entry (including the struct itself).
* NOTE: this function may (also) be called for Match structs that have only been
* setup half-way, so use special care when accessing members (NULL checks!)
@@ -441,11 +454,19 @@ Match *unreal_create_match(MatchType type, const char *str, char **error)
}
/** Try to match an Match entry ('m') against a string ('str').
* @param error If non-NULL, set to an error string when the regex could not
* complete (eg. a resource limit was hit), or to NULL otherwise.
* Points to a static buffer, valid until the next unreal_match().
* @returns 1 if matched, 0 if not.
* @note These (more logical) return values are opposite to the match_simple() function.
*/
int unreal_match(Match *m, const char *str)
int unreal_match(Match *m, const char *str, const char **error)
{
static char errbuf[256];
if (error)
*error = NULL;
if (m->type == MATCH_SIMPLE)
{
if (match_simple(m->str, str))
@@ -458,11 +479,21 @@ int unreal_match(Match *m, const char *str)
pcre2_match_data *md = pcre2_match_data_create(9, NULL);
int ret;
ret = pcre2_match(m->ext.pcre2_expr, str, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL); /* run the regex */
ret = pcre2_match(m->ext.pcre2_expr, str, PCRE2_ZERO_TERMINATED, 0, 0, md, unreal_pcre2_match_ctx); /* run the regex */
pcre2_match_data_free(md); /* yeah, we never use it. unfortunately argument must be non-NULL for pcre2_match() */
if (ret > 0)
return 1; /* MATCH */
if (error && (ret < 0) && (ret != PCRE2_ERROR_NOMATCH) && (ret != PCRE2_ERROR_PARTIAL))
{
/* Regex did not finish (eg. hit the match, depth or JIT stack limit).
* Report it so the caller can warn. We still return no-match.
*/
*errbuf = '\0';
pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
*error = errbuf;
}
return 0; /* NO MATCH */
}
@@ -681,7 +712,7 @@ const char *stripbadwords(const char *str, ConfigItem_badword *start_bw, int *bl
pcre2_match_data *md = pcre2_match_data_create(9, NULL);
int ret;
ret = pcre2_match(this_word->pcre2_expr, cleanstr, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL); /* run the regex */
ret = pcre2_match(this_word->pcre2_expr, cleanstr, PCRE2_ZERO_TERMINATED, 0, 0, md, unreal_pcre2_match_ctx); /* run the regex */
pcre2_match_data_free(md); /* yeah, we never use it. unfortunately argument must be non-NULL for pcre2_match() */
if (ret > 0)
{
@@ -702,7 +733,7 @@ const char *stripbadwords(const char *str, ConfigItem_badword *start_bw, int *bl
/* ^^ we need to free 'md' in ALL circumstances.
* remember this if you break or continue in this loop!
*/
ret = pcre2_match(this_word->pcre2_expr, ptr, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL); /* run the regex */
ret = pcre2_match(this_word->pcre2_expr, ptr, PCRE2_ZERO_TERMINATED, 0, 0, md, unreal_pcre2_match_ctx); /* run the regex */
if (ret > 0)
{
dd = pcre2_get_ovector_pointer(md);
+38 -4
View File
@@ -1281,6 +1281,29 @@ char *spamfilter_id(TKL *tk)
return buf;
}
/* Warn opers when a spamfilter regex could not finish (eg. it hit the
* PCRE2 match or depth limit). The match is treated as no-match, so we
* only warn and do not remove the spamfilter.
*/
static void spamfilter_regex_error(TKL *tkl, const char *regex_error)
{
if (tkl->type & TKL_GLOBAL)
{
unreal_log(ULOG_WARNING, "tkl", "SPAMFILTER_REGEX_ERROR", NULL,
"[Spamfilter] Regex aborted ($regex_error) for '$tkl'. Possibly too complex regex? "
"To delete, use: /SPAMFILTER del $spamfilter_id",
log_data_string("regex_error", regex_error),
log_data_string("spamfilter_id", spamfilter_id(tkl)),
log_data_tkl("tkl", tkl));
} else {
unreal_log(ULOG_WARNING, "tkl", "SPAMFILTER_REGEX_ERROR", NULL,
"[Spamfilter] Regex aborted ($regex_error) for '$tkl'. Possibly too complex regex? "
"To remove it, edit your config file",
log_data_string("regex_error", regex_error),
log_data_tkl("tkl", tkl));
}
}
int tkl_ip_change(Client *client, const char *oldip)
{
TKL *tkl;
@@ -3774,9 +3797,15 @@ int spamfilter_check_users(TKL *tkl)
{
if (MyUser(client))
{
const char *regex_error = NULL;
spamfilter_build_user_string(spamfilter_user, client->name, client);
if (!unreal_match(tkl->ptr.spamfilter->match, spamfilter_user))
if (!unreal_match(tkl->ptr.spamfilter->match, spamfilter_user, &regex_error))
{
if (regex_error)
spamfilter_regex_error(tkl, regex_error);
continue; /* No match */
}
/* matched! */
unreal_log(ULOG_INFO, "tkl", "SPAMFILTER_MATCH", client,
@@ -5589,6 +5618,8 @@ int _match_spamfilter(Client *client, const char *str_in, int target, const char
if (tkl->ptr.spamfilter->match && (tkl->ptr.spamfilter->match->type != MATCH_NONE))
{
const char *regex_error = NULL;
#ifdef SPAMFILTER_DETECTSLOW
if (tkl->ptr.spamfilter->match->type == MATCH_PCRE_REGEX)
{
@@ -5600,11 +5631,11 @@ int _match_spamfilter(Client *client, const char *str_in, int target, const char
#endif
if (tkl->ptr.spamfilter->input_conversion == INPUT_CONVERSION_STRIP_CONTROL_CODES)
ret = unreal_match(tkl->ptr.spamfilter->match, str); /* StripControlCodes() */
ret = unreal_match(tkl->ptr.spamfilter->match, str, &regex_error); /* StripControlCodes() */
else if (tkl->ptr.spamfilter->input_conversion == INPUT_CONVERSION_CONFUSABLES)
ret = unreal_match(tkl->ptr.spamfilter->match, str_deconfused ? str_deconfused : str); /* utf8_convert_confusables(), with fallback */
ret = unreal_match(tkl->ptr.spamfilter->match, str_deconfused ? str_deconfused : str, &regex_error); /* utf8_convert_confusables(), with fallback */
else
ret = unreal_match(tkl->ptr.spamfilter->match, str_in); /* raw */
ret = unreal_match(tkl->ptr.spamfilter->match, str_in, &regex_error); /* raw */
#ifdef SPAMFILTER_DETECTSLOW
if (tkl->ptr.spamfilter->match->type == MATCH_PCRE_REGEX)
@@ -5633,6 +5664,9 @@ int _match_spamfilter(Client *client, const char *str_in, int target, const char
}
}
#endif
if (regex_error)
spamfilter_regex_error(tkl, regex_error);
} else {
/* There is no ::match but there was a ::rule, and that is enough for a match.. */
if (tkl->ptr.spamfilter->rule)