1
0
mirror of https://github.com/unrealircd/unrealircd.git synced 2026-07-01 21:46:37 +02:00

Add spamfilter { input-conversion confusables; ..... } for UTF8 conversion

of lookalike characters to simple latin characters.

Also add SPAMINFO command so you can see the result of the conversion.
This commit is contained in:
Bram Matthys
2025-03-22 08:31:22 +01:00
parent 9b3d219743
commit e1fac402d5
3 changed files with 54 additions and 4 deletions
+52 -2
View File
@@ -52,6 +52,7 @@ CMD_FUNC(cmd_kline);
CMD_FUNC(cmd_zline);
CMD_FUNC(cmd_spamfilter);
CMD_FUNC(cmd_eline);
CMD_FUNC(cmd_spaminfo);
void cmd_tkl_line(Client *client, int parc, const char *parv[], char *type);
int _tkl_hash(unsigned int c);
char _tkl_typetochar(int type);
@@ -170,6 +171,7 @@ TKLTypeTable tkl_types[] = {
int max_stats_matches = 1000;
int mtag_spamfilters_present = 0; /**< Are any spamfilters with type SPAMF_MTAG present? */
int raw_spamfilters_present = 0; /**< Are any spamfilters with type SPAMF_RAW present? */
int confusables_spamfilters_present = 0; /**< Are any spamfilters with input-conversion confusables present? */
long previous_spamfilter_utf8 = 0;
static int firstboot = 0;
@@ -253,6 +255,7 @@ MOD_INIT()
CommandAdd(modinfo->handle, "SPAMFILTER", cmd_spamfilter, 7, CMD_OPER);
CommandAdd(modinfo->handle, "ELINE", cmd_eline, 4, CMD_OPER);
CommandAdd(modinfo->handle, "TKL", _cmd_tkl, MAXPARA, CMD_OPER|CMD_SERVER);
CommandAdd(modinfo->handle, "SPAMINFO", cmd_spaminfo, 1, CMD_OPER);
add_default_exempts();
return MOD_SUCCESS;
}
@@ -278,6 +281,8 @@ int input_conversion_strtoval(const char *name)
return 0;
if (!strcmp(name, "strip-control-codes"))
return INPUT_CONVERSION_STRIP_CONTROL_CODES;
if (!strcmp(name, "confusables"))
return INPUT_CONVERSION_CONFUSABLES;
return -1;
}
@@ -678,7 +683,12 @@ int tkl_config_run_spamfilter(ConfigFile *cf, ConfigEntry *ce, int type)
else
{
for (cepp = cep->items; cepp; cepp = cepp->next)
input_conversion |= input_conversion_strtoval(cepp->name);
{
if (!strcmp(cepp->name, "none"))
input_conversion = 0;
else
input_conversion |= input_conversion_strtoval(cepp->name);
}
}
}
else if (!strcmp(cep->name, "action"))
@@ -2968,6 +2978,8 @@ TKL *_tkl_add_spamfilter(int type, const char *id, unsigned short target, BanAct
mtag_spamfilters_present = 1;
if (target & SPAMF_RAW)
raw_spamfilters_present = 1;
if (input_conversion & INPUT_CONVERSION_CONFUSABLES)
confusables_spamfilters_present = 1;
return tkl;
}
@@ -5459,6 +5471,8 @@ int _match_spamfilter(Client *client, const char *str_in, int target, const char
TKL *tkl;
TKL *winner_tkl = NULL;
const char *str;
const char *str_deconfused = NULL;
char deconfused[512];
int ret = -1;
char *reason = NULL;
#ifdef SPAMFILTER_DETECTSLOW
@@ -5484,6 +5498,9 @@ int _match_spamfilter(Client *client, const char *str_in, int target, const char
else
str = StripControlCodes(str_in);
if (confusables_spamfilters_present)
str_deconfused = utf8_convert_confusables(str, deconfused, sizeof(deconfused));
/* (note: using client->user check here instead of IsUser()
* due to SPAMF_USER where user isn't marked as client/person yet.
*/
@@ -5559,7 +5576,9 @@ int _match_spamfilter(Client *client, const char *str_in, int target, const char
#endif
if (tkl->ptr.spamfilter->input_conversion == INPUT_CONVERSION_STRIP_CONTROL_CODES)
ret = unreal_match(tkl->ptr.spamfilter->match, str); /* stripcontrolcodes */
ret = unreal_match(tkl->ptr.spamfilter->match, str); /* StripControlCodes() */
else if (tkl->ptr.spamfilter->input_conversion == INPUT_CONVERSION_CONFUSABLES)
ret = unreal_match(tkl->ptr.spamfilter->match, str_deconfused); /* utf8_convert_confusables() */
else
ret = unreal_match(tkl->ptr.spamfilter->match, str_in); /* raw */
@@ -5825,6 +5844,8 @@ int check_special_spamfilters_present(void)
mtag_spamfilters_present = 1;
if (tkl->ptr.spamfilter->target & SPAMF_RAW)
raw_spamfilters_present = 1;
if (tkl->ptr.spamfilter->input_conversion & INPUT_CONVERSION_CONFUSABLES)
confusables_spamfilters_present = 1;
}
return 0;
@@ -6169,3 +6190,32 @@ int spamfilter_pre_command(Client *from, MessageTag *mtags, const char *buf)
return 0;
}
CMD_FUNC(cmd_spaminfo)
{
const char *line;
char deconfused[512], *s;
if (!IsOper(client))
{
sendnumeric(client, ERR_NOPRIVILEGES);
return;
}
if ((parc < 2) || BadPtr(parv[1]))
{
sendnotice(client, "Use: /SPAMINFO <line with spam text>");
return;
}
sendnotice(client, "*** SPAMINFO ***");
sendnotice(client, "This will show the original text and the deconfused text which can be used in a spamfilter block with input-conversion deconfused;");
line = parv[1];
sendnotice(client, "Original spam text: %s", line);
s = utf8_convert_confusables(line, deconfused, sizeof(deconfused));
if (s)
sendnotice(client, "Deconfused spam text: %s", s);
}
+1 -2
View File
@@ -1959,7 +1959,6 @@ char *_utf8_convert_confusables(const char *i, char *obuf, int olen)
utfchar = utf8_to_utf32(i);
len = utf8_charlen(i); // can't utfchar() set this too?
conv = utf8_lookup_confusable(utfchar);
config_status("char 0x%x to 0x%x", utfchar, conv); // DEBUG
if (conv == 0)
{
/* use as-is */
@@ -1979,4 +1978,4 @@ char *_utf8_convert_confusables(const char *i, char *obuf, int olen)
*o = '\0';
return obuf;
}
}