From 696533a16303c19497ad73e22ec0e73a24fbb6d5 Mon Sep 17 00:00:00 2001 From: codemastr Date: Fri, 13 Dec 2002 22:06:17 +0000 Subject: [PATCH] Rewrote some +G code --- Changes | 4 ++++ include/struct.h | 6 ++++++ src/badwords.c | 26 ++++-------------------- src/s_conf.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 64 insertions(+), 23 deletions(-) diff --git a/Changes b/Changes index 2e049de8f..e2aedb523 100644 --- a/Changes +++ b/Changes @@ -1721,3 +1721,7 @@ seen. gmtime warning still there *** beta13 released *** - Added set::htm to set HTM options via the config. set::htm is only valid if either commands.so or m_htm.so is loaded. +- Recoded some of +G to work MUCH faster. A compiled copy of the regex is now stored + in memory rather than compiling the regex each time the swear stripping was trigged. + This uses slightly more memory, but saves a signifigant amount of execution time. + Idea inspired by a suggestion from CaliMonk diff --git a/include/struct.h b/include/struct.h index 439bc9e28..c094089cd 100644 --- a/include/struct.h +++ b/include/struct.h @@ -57,6 +57,11 @@ #include #endif #include "auth.h" +#ifdef HAVE_REGEX +#include +#else +#include "../extras/regex/regex.h" +#endif extern int sendanyways; @@ -1012,6 +1017,7 @@ struct _configitem_badword { ConfigItem *prev, *next; ConfigFlag flag; char *word, *replace; + regex_t expr; }; struct _configitem_deny_dcc { diff --git a/src/badwords.c b/src/badwords.c index 09ec72174..f0f974a5c 100644 --- a/src/badwords.c +++ b/src/badwords.c @@ -44,11 +44,10 @@ void badwords_stats(aClient *sptr) char *stripbadwords_channel(char *str) { regmatch_t pmatch[MAX_MATCH]; - regex_t pcomp; static char cleanstr[4096]; char buf[4096]; char *ptr; - int errorcode, matchlen, stringlen; + int matchlen, stringlen; ConfigItem_badword *this_word; if (!conf_badword_channel) return str; @@ -63,19 +62,12 @@ char *stripbadwords_channel(char *str) for (this_word = conf_badword_channel; this_word; this_word = (ConfigItem_badword *)this_word->next) { - if ((errorcode = - regcomp(&pcomp, this_word->word, REG_ICASE)) > 0) - { - regfree(&pcomp); - return cleanstr; - } - /* * Set pointer to start of string */ ptr = cleanstr; - while (regexec(&pcomp, ptr, MAX_MATCH, pmatch, + while (regexec(&this_word->expr, ptr, MAX_MATCH, pmatch, 0) != REG_NOMATCH) { if (pmatch[0].rm_so == -1) @@ -93,7 +85,6 @@ char *stripbadwords_channel(char *str) strlcat(buf, ptr, sizeof buf); memcpy(cleanstr, buf, sizeof cleanstr); memset(buf, 0, sizeof(buf)); - regfree(&pcomp); if (matchlen == stringlen) break; } @@ -104,11 +95,10 @@ char *stripbadwords_channel(char *str) char *stripbadwords_message(char *str) { regmatch_t pmatch[MAX_MATCH]; - regex_t pcomp; static char cleanstr[4096]; char buf[4096]; char *ptr; - int errorcode, matchlen, stringlen; + int matchlen, stringlen; ConfigItem_badword *this_word; if (!conf_badword_message) return str; @@ -123,19 +113,12 @@ char *stripbadwords_message(char *str) for (this_word = conf_badword_message; this_word; this_word = (ConfigItem_badword *)this_word->next) { - if ((errorcode = - regcomp(&pcomp, this_word->word, REG_ICASE)) > 0) - { - regfree(&pcomp); - return cleanstr; - } - /* * Set pointer to start of string */ ptr = cleanstr; - while (regexec(&pcomp, ptr, MAX_MATCH, pmatch, + while (regexec(&this_word->expr, ptr, MAX_MATCH, pmatch, 0) != REG_NOMATCH) { if (pmatch[0].rm_so == -1) @@ -153,7 +136,6 @@ char *stripbadwords_message(char *str) strlcat(buf, ptr, sizeof buf); memcpy(cleanstr, buf, sizeof cleanstr); memset(buf, 0, sizeof(buf)); - regfree(&pcomp); if (matchlen == stringlen) break; } diff --git a/src/s_conf.c b/src/s_conf.c index 3b8c9c780..9b09da96b 100644 --- a/src/s_conf.c +++ b/src/s_conf.c @@ -49,6 +49,7 @@ #ifdef _WIN32 #undef GLOBH #endif +#include "badwords.h" #define ircstrdup(x,y) if (x) MyFree(x); if (!y) x = NULL; else x = strdup(y) #define ircfree(x) if (x) MyFree(x); x = NULL @@ -1286,7 +1287,9 @@ void config_rehash() badword_ptr = (ConfigItem_badword *) next) { next = (ListStruct *)badword_ptr->next; ircfree(badword_ptr->word); + if (badword_ptr->replace) ircfree(badword_ptr->replace); + regfree(badword_ptr->expr); DelListItem(badword_ptr, conf_badword_channel); MyFree(badword_ptr); } @@ -1294,7 +1297,9 @@ void config_rehash() badword_ptr = (ConfigItem_badword *) next) { next = (ListStruct *)badword_ptr->next; ircfree(badword_ptr->word); + if (badword_ptr->replace) ircfree(badword_ptr->replace); + regfree(badword_ptr->expr); DelListItem(badword_ptr, conf_badword_message); MyFree(badword_ptr); } @@ -3819,9 +3824,15 @@ int _conf_badword(ConfigFile *conf, ConfigEntry *ce) ca->word = MyMalloc(strlen(cep->ce_vardata) + strlen(PATTERN) -1); ircsprintf(ca->word, PATTERN, cep->ce_vardata); } + /* Yes this is called twice, once in test, and once here, but it is still MUCH + faster than calling it each time a message is received like before. -- codemastr + */ + regcomp(&ca->expr, ca->word, REG_ICASE); if ((cep = config_find_entry(ce->ce_entries, "replace"))) { ircstrdup(ca->replace, cep->ce_vardata); } + else + ca->replace = NULL; if (!strcmp(ce->ce_vardata, "channel")) AddListItem(ca, conf_badword_channel); else if (!strcmp(ce->ce_vardata, "message")) @@ -3833,6 +3844,7 @@ int _conf_badword(ConfigFile *conf, ConfigEntry *ce) int _test_badword(ConfigFile *conf, ConfigEntry *ce) { int errors = 0; ConfigEntry *word, *replace, *cep; + regex_t expr; if (!ce->ce_entries) { config_error("%s:%i: empty badword block", @@ -3863,7 +3875,44 @@ int _test_badword(ConfigFile *conf, ConfigEntry *ce) { config_error("%s:%i: badword::word without contents", word->ce_fileptr->cf_filename, word->ce_varlinenum); errors++; - } + } + else + { + + int errorcode, errorbufsize, regex; + char *errorbuf, *tmp, *tmpbuf; + for (tmp = word->ce_vardata; *tmp; tmp++) { + if ((int)*tmp < 65 || (int)*tmp > 123) { + regex = 1; + break; + } + } + if (regex) + errorcode = regcomp(&expr, word->ce_vardata, REG_ICASE); + else + { + tmpbuf = malloc(strlen(word->ce_vardata) + + strlen(PATTERN) -1); + ircsprintf(tmpbuf, PATTERN, word->ce_vardata); + errorcode = regcomp(&expr, tmpbuf, REG_ICASE); + } + if (errorcode > 0) + { + errorbufsize = regerror(errorcode, &expr, NULL, 0)+1; + errorbuf = malloc(errorbufsize); + regerror(errorcode, &expr, errorbuf, errorbufsize); + config_error("%s:%i: badword::%s contains an invalid regex: %s", + word->ce_fileptr->cf_filename, + word->ce_varlinenum, + word->ce_varname, errorbuf); + errors++; + free(errorbuf); + } + if (!regex) + free(tmpbuf); + regfree(&expr); + } + } if ((replace = config_find_entry(ce->ce_entries, "replace"))) {