mirror of
https://github.com/unrealircd/unrealircd.git
synced 2026-06-12 15:34:47 +02:00
Set PCRE2 limits explicitly (to more sensible defaults), reported by Link420.
This commit is contained in:
@@ -7,15 +7,21 @@ This is work in progress and may not always be a stable version.
|
||||
### Enhancements:
|
||||
|
||||
### Changes:
|
||||
* Spamfilter regexes now use more sensible defaults in terms of "max effort",
|
||||
similar to what PHP has been using for years. This means very slow regexes
|
||||
will now raise a `SPAMFILTER_REGEX_ERROR` warning during execution if
|
||||
this happens (should be extremely rare).
|
||||
|
||||
### Fixes:
|
||||
* Harden the built-in HTTPS client
|
||||
* Hardening of the built-in HTTPS client
|
||||
|
||||
### Developers and protocol:
|
||||
* URL API: The OutgoingWebRequest `max_size` (introduced last release) now
|
||||
also caps file-backed downloads. Default for file-backed when left at 0
|
||||
is 50MB (`DOWNLOAD_MAX_SIZE_FILE_BACKED`). For memory-backed, it stays
|
||||
at 1MB like in 6.2.5 (`DOWNLOAD_MAX_SIZE_MEMORY_BACKED`).
|
||||
* The `unreal_match()` function now has a 3rd argument `const char **error`
|
||||
for communicating regex errors back. Just set to `NULL` if you don't care.
|
||||
* If you do something to a user that would (potentially) move the user from
|
||||
`unknown-users` to `known-users` (or vice versa) then you should call
|
||||
`update_known_user_cache(client);` to update the known users cache.
|
||||
|
||||
@@ -238,6 +238,16 @@
|
||||
#define SPAMFILTER_DETECTSLOW
|
||||
#endif
|
||||
|
||||
/* Limits for PCRE2 regex matching (eg. spamfilter, badwords). A regex that
|
||||
* exceeds these is aborted and treated as no match, instead of running
|
||||
* unbounded. The match limit is honoured by JIT. The depth limit only applies
|
||||
* to the non-JIT interpreter, since PCRE2 ignores it under JIT.
|
||||
* We use the same defaults that PHP has been using for a long time (which is
|
||||
* actually 10 times lower than PCRE2 defaults, as of 2026).
|
||||
*/
|
||||
#define UNREAL_PCRE2_MATCH_LIMIT 1000000
|
||||
#define UNREAL_PCRE2_DEPTH_LIMIT 100000
|
||||
|
||||
/* Maximum number of ModData objects that may be attached to an object */
|
||||
/* UnrealIRCd 4.0.0: 8, 8, 4, 4
|
||||
* UnrealIRCd 4.0.14: 12, 8, 4, 4
|
||||
|
||||
+3
-1
@@ -1091,9 +1091,11 @@ extern void read_packet(int fd, int revents, void *data);
|
||||
extern int process_packet(Client *cptr, char *readbuf, int length, int killsafely);
|
||||
extern int parse_chanmode(ParseMode *pm, const char *modebuf_in, const char *parabuf_in);
|
||||
extern int dead_socket(Client *to, const char *notice);
|
||||
extern MODVAR pcre2_match_context *unreal_pcre2_match_ctx;
|
||||
extern void init_match(void);
|
||||
extern Match *unreal_create_match(MatchType type, const char *str, char **error);
|
||||
extern void unreal_delete_match(Match *m);
|
||||
extern int unreal_match(Match *m, const char *str);
|
||||
extern int unreal_match(Match *m, const char *str, const char **error);
|
||||
extern int unreal_match_method_strtoval(const char *str);
|
||||
extern char *unreal_match_method_valtostr(int val);
|
||||
#ifdef _WIN32
|
||||
|
||||
+1
-1
@@ -143,7 +143,7 @@ void cmd_alias(ClientContext *clictx, Client *client, MessageTag *mtags, int par
|
||||
|
||||
for (format = alias->format; format; format = format->next)
|
||||
{
|
||||
if (unreal_match(format->expr, ptr))
|
||||
if (unreal_match(format->expr, ptr, NULL))
|
||||
{
|
||||
/* Parse the parameters */
|
||||
int i = 0, j = 0, k = 1;
|
||||
|
||||
@@ -1906,6 +1906,7 @@ void config_setdefaultsettings(Configuration *i)
|
||||
add_log_throttle_config(&i->log_throttle, "BUG_CT_BUCKET_MISSING", 5, 60, 0);
|
||||
add_log_throttle_config(&i->log_throttle, "BUG_CT_NEGATIVE_COUNTER", 5, 60, 0);
|
||||
add_log_throttle_config(&i->log_throttle, "BUG_DECREASE_IPUSERS_BUCKET", 5, 60, 0);
|
||||
add_log_throttle_config(&i->log_throttle, "SPAMFILTER_REGEX_ERROR", 5, 60, 0);
|
||||
|
||||
/* TLS options */
|
||||
i->tls_options = safe_alloc(sizeof(TLSOptions));
|
||||
|
||||
@@ -547,6 +547,7 @@ int InitUnrealIRCd(int argc, char *argv[])
|
||||
|
||||
init_hash();
|
||||
log_throttle_init();
|
||||
init_match();
|
||||
|
||||
SetupEvents();
|
||||
|
||||
|
||||
+39
-8
@@ -35,6 +35,8 @@ u_char touppertab[], tolowertab[];
|
||||
#define tolowertab2 tolowertab
|
||||
#define lc(x) tolowertab2[x]
|
||||
|
||||
pcre2_match_context *unreal_pcre2_match_ctx = NULL;
|
||||
|
||||
/* Match routine for special cases where escaping is needed in a normal fashion.
|
||||
* Checks a string ('name') against a globbing(+more) pattern ('mask').
|
||||
* Original by Douglas A Lewis (dalewis@acsu.buffalo.edu).
|
||||
@@ -369,6 +371,17 @@ u_char char_atribs[] = {
|
||||
/* f0-ff */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
/* Set up global match state. Called once at startup. */
|
||||
void init_match(void)
|
||||
{
|
||||
unreal_pcre2_match_ctx = pcre2_match_context_create(NULL);
|
||||
if (unreal_pcre2_match_ctx)
|
||||
{
|
||||
pcre2_set_match_limit(unreal_pcre2_match_ctx, UNREAL_PCRE2_MATCH_LIMIT);
|
||||
pcre2_set_depth_limit(unreal_pcre2_match_ctx, UNREAL_PCRE2_DEPTH_LIMIT);
|
||||
}
|
||||
}
|
||||
|
||||
/** Free up all resources of an Match entry (including the struct itself).
|
||||
* NOTE: this function may (also) be called for Match structs that have only been
|
||||
* setup half-way, so use special care when accessing members (NULL checks!)
|
||||
@@ -441,28 +454,46 @@ Match *unreal_create_match(MatchType type, const char *str, char **error)
|
||||
}
|
||||
|
||||
/** Try to match an Match entry ('m') against a string ('str').
|
||||
* @param error If non-NULL, set to an error string when the regex could not
|
||||
* complete (eg. a resource limit was hit), or to NULL otherwise.
|
||||
* Points to a static buffer, valid until the next unreal_match().
|
||||
* @returns 1 if matched, 0 if not.
|
||||
* @note These (more logical) return values are opposite to the match_simple() function.
|
||||
*/
|
||||
int unreal_match(Match *m, const char *str)
|
||||
int unreal_match(Match *m, const char *str, const char **error)
|
||||
{
|
||||
static char errbuf[256];
|
||||
|
||||
if (error)
|
||||
*error = NULL;
|
||||
|
||||
if (m->type == MATCH_SIMPLE)
|
||||
{
|
||||
if (match_simple(m->str, str))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
if (m->type == MATCH_PCRE_REGEX)
|
||||
{
|
||||
pcre2_match_data *md = pcre2_match_data_create(9, NULL);
|
||||
int ret;
|
||||
|
||||
ret = pcre2_match(m->ext.pcre2_expr, str, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL); /* run the regex */
|
||||
|
||||
ret = pcre2_match(m->ext.pcre2_expr, str, PCRE2_ZERO_TERMINATED, 0, 0, md, unreal_pcre2_match_ctx); /* run the regex */
|
||||
pcre2_match_data_free(md); /* yeah, we never use it. unfortunately argument must be non-NULL for pcre2_match() */
|
||||
|
||||
|
||||
if (ret > 0)
|
||||
return 1; /* MATCH */
|
||||
return 1; /* MATCH */
|
||||
|
||||
if (error && (ret < 0) && (ret != PCRE2_ERROR_NOMATCH) && (ret != PCRE2_ERROR_PARTIAL))
|
||||
{
|
||||
/* Regex did not finish (eg. hit the match, depth or JIT stack limit).
|
||||
* Report it so the caller can warn. We still return no-match.
|
||||
*/
|
||||
*errbuf = '\0';
|
||||
pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
|
||||
*error = errbuf;
|
||||
}
|
||||
return 0; /* NO MATCH */
|
||||
}
|
||||
|
||||
@@ -681,7 +712,7 @@ const char *stripbadwords(const char *str, ConfigItem_badword *start_bw, int *bl
|
||||
pcre2_match_data *md = pcre2_match_data_create(9, NULL);
|
||||
int ret;
|
||||
|
||||
ret = pcre2_match(this_word->pcre2_expr, cleanstr, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL); /* run the regex */
|
||||
ret = pcre2_match(this_word->pcre2_expr, cleanstr, PCRE2_ZERO_TERMINATED, 0, 0, md, unreal_pcre2_match_ctx); /* run the regex */
|
||||
pcre2_match_data_free(md); /* yeah, we never use it. unfortunately argument must be non-NULL for pcre2_match() */
|
||||
if (ret > 0)
|
||||
{
|
||||
@@ -702,7 +733,7 @@ const char *stripbadwords(const char *str, ConfigItem_badword *start_bw, int *bl
|
||||
/* ^^ we need to free 'md' in ALL circumstances.
|
||||
* remember this if you break or continue in this loop!
|
||||
*/
|
||||
ret = pcre2_match(this_word->pcre2_expr, ptr, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL); /* run the regex */
|
||||
ret = pcre2_match(this_word->pcre2_expr, ptr, PCRE2_ZERO_TERMINATED, 0, 0, md, unreal_pcre2_match_ctx); /* run the regex */
|
||||
if (ret > 0)
|
||||
{
|
||||
dd = pcre2_get_ovector_pointer(md);
|
||||
|
||||
+38
-4
@@ -1281,6 +1281,29 @@ char *spamfilter_id(TKL *tk)
|
||||
return buf;
|
||||
}
|
||||
|
||||
/* Warn opers when a spamfilter regex could not finish (eg. it hit the
|
||||
* PCRE2 match or depth limit). The match is treated as no-match, so we
|
||||
* only warn and do not remove the spamfilter.
|
||||
*/
|
||||
static void spamfilter_regex_error(TKL *tkl, const char *regex_error)
|
||||
{
|
||||
if (tkl->type & TKL_GLOBAL)
|
||||
{
|
||||
unreal_log(ULOG_WARNING, "tkl", "SPAMFILTER_REGEX_ERROR", NULL,
|
||||
"[Spamfilter] Regex aborted ($regex_error) for '$tkl'. Possibly too complex regex? "
|
||||
"To delete, use: /SPAMFILTER del $spamfilter_id",
|
||||
log_data_string("regex_error", regex_error),
|
||||
log_data_string("spamfilter_id", spamfilter_id(tkl)),
|
||||
log_data_tkl("tkl", tkl));
|
||||
} else {
|
||||
unreal_log(ULOG_WARNING, "tkl", "SPAMFILTER_REGEX_ERROR", NULL,
|
||||
"[Spamfilter] Regex aborted ($regex_error) for '$tkl'. Possibly too complex regex? "
|
||||
"To remove it, edit your config file",
|
||||
log_data_string("regex_error", regex_error),
|
||||
log_data_tkl("tkl", tkl));
|
||||
}
|
||||
}
|
||||
|
||||
int tkl_ip_change(Client *client, const char *oldip)
|
||||
{
|
||||
TKL *tkl;
|
||||
@@ -3774,9 +3797,15 @@ int spamfilter_check_users(TKL *tkl)
|
||||
{
|
||||
if (MyUser(client))
|
||||
{
|
||||
const char *regex_error = NULL;
|
||||
|
||||
spamfilter_build_user_string(spamfilter_user, client->name, client);
|
||||
if (!unreal_match(tkl->ptr.spamfilter->match, spamfilter_user))
|
||||
if (!unreal_match(tkl->ptr.spamfilter->match, spamfilter_user, ®ex_error))
|
||||
{
|
||||
if (regex_error)
|
||||
spamfilter_regex_error(tkl, regex_error);
|
||||
continue; /* No match */
|
||||
}
|
||||
|
||||
/* matched! */
|
||||
unreal_log(ULOG_INFO, "tkl", "SPAMFILTER_MATCH", client,
|
||||
@@ -5589,6 +5618,8 @@ int _match_spamfilter(Client *client, const char *str_in, int target, const char
|
||||
|
||||
if (tkl->ptr.spamfilter->match && (tkl->ptr.spamfilter->match->type != MATCH_NONE))
|
||||
{
|
||||
const char *regex_error = NULL;
|
||||
|
||||
#ifdef SPAMFILTER_DETECTSLOW
|
||||
if (tkl->ptr.spamfilter->match->type == MATCH_PCRE_REGEX)
|
||||
{
|
||||
@@ -5600,11 +5631,11 @@ int _match_spamfilter(Client *client, const char *str_in, int target, const char
|
||||
#endif
|
||||
|
||||
if (tkl->ptr.spamfilter->input_conversion == INPUT_CONVERSION_STRIP_CONTROL_CODES)
|
||||
ret = unreal_match(tkl->ptr.spamfilter->match, str); /* StripControlCodes() */
|
||||
ret = unreal_match(tkl->ptr.spamfilter->match, str, ®ex_error); /* StripControlCodes() */
|
||||
else if (tkl->ptr.spamfilter->input_conversion == INPUT_CONVERSION_CONFUSABLES)
|
||||
ret = unreal_match(tkl->ptr.spamfilter->match, str_deconfused ? str_deconfused : str); /* utf8_convert_confusables(), with fallback */
|
||||
ret = unreal_match(tkl->ptr.spamfilter->match, str_deconfused ? str_deconfused : str, ®ex_error); /* utf8_convert_confusables(), with fallback */
|
||||
else
|
||||
ret = unreal_match(tkl->ptr.spamfilter->match, str_in); /* raw */
|
||||
ret = unreal_match(tkl->ptr.spamfilter->match, str_in, ®ex_error); /* raw */
|
||||
|
||||
#ifdef SPAMFILTER_DETECTSLOW
|
||||
if (tkl->ptr.spamfilter->match->type == MATCH_PCRE_REGEX)
|
||||
@@ -5633,6 +5664,9 @@ int _match_spamfilter(Client *client, const char *str_in, int target, const char
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (regex_error)
|
||||
spamfilter_regex_error(tkl, regex_error);
|
||||
} else {
|
||||
/* There is no ::match but there was a ::rule, and that is enough for a match.. */
|
||||
if (tkl->ptr.spamfilter->rule)
|
||||
|
||||
Reference in New Issue
Block a user