1
0
mirror of https://github.com/unrealircd/unrealircd.git synced 2026-06-30 03:16:38 +02:00
Files
unrealircd/src/modules/antimixedutf8.c
T
Bram Matthys 93980ee004 Include TextAnalysis in antimixedutf8 hit as well. And use "text_analysis"
and not "textanalysis" for the JSON, to keep naming of multi-word stuff
consistent.

Example:
--snip--
  "text_analysis": {
    "antimixedutf8_points": 20,
    "unicode_blocks": 9,
    "num_bytes": 55,
    "num_unicode_characters": 20,
    "deconfused": "Valware is ualwaring",
    "deconfused": "This is a testtestte",
    "unicode_blockmap": {
      "Basic Latin": 2,
      "Latin Extended-B": 2,
      "IPA Extensions": 1,
      "Greek and Coptic": 1,
      "Latin Extended Additional": 2,
      "Greek Extended": 1,
      "Number Forms": 1,
      "Tifinagh": 1,
      "Mathematical Alphanumeric Symbols": 7
    }
  },
2025-07-14 18:41:04 +02:00

258 lines
7.3 KiB
C

/*
* Anti mixed UTF8 - a filter written by Bram Matthys ("Syzop").
* Reported by Mr_Smoke in https://bugs.unrealircd.org/view.php?id=5163
* Tested by PeGaSuS (The_Myth) with some of the most used spam lines.
* Help with testing and fixing Cyrillic from 'i' <info@servx.org>
* In 2025 a major overhaul, with a lot of the detection code moved
* to generic text analysis in src/modules/utf8functions.c (and
* no longer in the file you are viewing right now).
*
* ==[ ABOUT ]==
* This module will detect and stop spam containing of characters of
* mixed "scripts", where some characters are in Latin script and other
* characters are in Cyrillic.
* This unusual behavior can be detected easily and action can be taken.
*
* ==[ MODULE LOADING AND CONFIGURATION ]==
* loadmodule "antimixedutf8";
* set {
* antimixedutf8 {
* score 10;
* ban-action block;
* ban-reason "Possible mixed character spam";
* ban-time 4h; // For other types
* except {
* }
* };
* };
*
* ==[ LICENSE AND PORTING ]==
* Feel free to copy/move the idea or code to other IRCds.
* The license is GPLv1 (or later, at your option):
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 1, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "unrealircd.h"
ModuleHeader MOD_HEADER
= {
"antimixedutf8",
"1.0",
"Mixed UTF8 character filter (look-alike character spam) - by Syzop",
"UnrealIRCd Team",
"unrealircd-6",
};
struct {
int score;
BanAction *ban_action;
char *ban_reason;
long ban_time;
SecurityGroup *except;
} cfg;
/* Forward declarations */
static void free_config(void);
static void init_config(void);
int antimixedutf8_config_test(ConfigFile *, ConfigEntry *, int, int *);
int antimixedutf8_config_run(ConfigFile *, ConfigEntry *, int);
int stripcolor_can_send_to_channel(Client *client, Channel *channel, Membership *lp, const char **msg, const char **errmsg, SendType sendtype, ClientContext *clictx);
int antimixedutf8_can_send_to_user(Client *client, Client *target, const char **text, const char **errmsg, SendType sendtype, ClientContext *clictx);
int antimixedutf8_check(Client *client, TextAnalysis *txa, const char **errmsg)
{
int score, retval;
if (!txa || !MyUser(client) || user_allowed_by_security_group(client, cfg.except))
return HOOK_CONTINUE;
if ((txa->antimixedutf8_points >= cfg.score) && !find_tkl_exception(TKL_ANTIMIXEDUTF8, client))
{
unreal_log(ULOG_INFO, "antimixedutf8", "ANTIMIXEDUTF8_HIT", client,
"[antimixedutf8] Client $client.details hit score $score -- taking action",
log_data_integer("score", txa->antimixedutf8_points),
log_data_textanalysis("text_analysis",txa));
/* Take the action */
retval = take_action(client, cfg.ban_action, cfg.ban_reason, cfg.ban_time, 0, NULL);
if ((retval == BAN_ACT_WARN) || (retval == BAN_ACT_SOFT_WARN))
{
/* no action */
} else
if ((retval == BAN_ACT_BLOCK) || (retval == BAN_ACT_SOFT_BLOCK))
{
*errmsg = cfg.ban_reason;
return HOOK_DENY;
} else if (retval > 0)
{
/* TODO: verify this works correctly with like kill/gline/etc */
*errmsg = cfg.ban_reason;
return HOOK_DENY;
}
/* fallthrough for retval <=0 */
}
return HOOK_CONTINUE;
}
int antimixedutf8_can_send_to_channel(Client *client, Channel *channel, Membership *lp, const char **msg, const char **errmsg, SendType sendtype, ClientContext *clictx)
{
return antimixedutf8_check(client, clictx->textanalysis, errmsg);
}
int antimixedutf8_can_send_to_user(Client *client, Client *target, const char **text, const char **errmsg, SendType sendtype, ClientContext *clictx)
{
return antimixedutf8_check(client, clictx->textanalysis, errmsg);
}
/*** rest is module and config stuff ****/
MOD_TEST()
{
HookAdd(modinfo->handle, HOOKTYPE_CONFIGTEST, 0, antimixedutf8_config_test);
return MOD_SUCCESS;
}
MOD_INIT()
{
MARK_AS_OFFICIAL_MODULE(modinfo);
init_config();
HookAdd(modinfo->handle, HOOKTYPE_CAN_SEND_TO_CHANNEL, 0, antimixedutf8_can_send_to_channel);
HookAdd(modinfo->handle, HOOKTYPE_CAN_SEND_TO_USER, 0, antimixedutf8_can_send_to_user);
HookAdd(modinfo->handle, HOOKTYPE_CONFIGRUN, 0, antimixedutf8_config_run);
return MOD_SUCCESS;
}
MOD_LOAD()
{
return MOD_SUCCESS;
}
MOD_UNLOAD()
{
free_config();
return MOD_SUCCESS;
}
static void init_config(void)
{
memset(&cfg, 0, sizeof(cfg));
/* Default values */
cfg.score = 10;
safe_strdup(cfg.ban_reason, "Possible mixed character spam");
cfg.ban_action = banact_value_to_struct(BAN_ACT_BLOCK);
cfg.ban_time = 60 * 60 * 4; /* irrelevant for block, but some default for others */
}
static void free_config(void)
{
safe_free(cfg.ban_reason);
free_security_group(cfg.except);
safe_free_all_ban_actions(cfg.ban_action);
memset(&cfg, 0, sizeof(cfg)); /* needed! */
}
int antimixedutf8_config_test(ConfigFile *cf, ConfigEntry *ce, int type, int *errs)
{
int errors = 0;
ConfigEntry *cep;
if (type != CONFIG_SET)
return 0;
/* We are only interrested in set::antimixedutf8... */
if (!ce || !ce->name || strcmp(ce->name, "antimixedutf8"))
return 0;
for (cep = ce->items; cep; cep = cep->next)
{
if (!cep->value)
{
config_error("%s:%i: set::antimixedutf8::%s with no value",
cep->file->filename, cep->line_number, cep->name);
errors++;
} else
if (!strcmp(cep->name, "score"))
{
int v = atoi(cep->value);
if ((v < 1) || (v > 99))
{
config_error("%s:%i: set::antimixedutf8::score: must be between 1 - 99 (got: %d)",
cep->file->filename, cep->line_number, v);
errors++;
}
} else
if (!strcmp(cep->name, "ban-action"))
{
errors += test_ban_action_config(cep);
} else
if (!strcmp(cep->name, "ban-reason"))
{
} else
if (!strcmp(cep->name, "ban-time"))
{
} else
if (!strcmp(cep->name, "except"))
{
test_match_block(cf, cep, &errors);
} else
{
config_error("%s:%i: unknown directive set::antimixedutf8::%s",
cep->file->filename, cep->line_number, cep->name);
errors++;
}
}
*errs = errors;
return errors ? -1 : 1;
}
int antimixedutf8_config_run(ConfigFile *cf, ConfigEntry *ce, int type)
{
ConfigEntry *cep;
if (type != CONFIG_SET)
return 0;
/* We are only interrested in set::antimixedutf8... */
if (!ce || !ce->name || strcmp(ce->name, "antimixedutf8"))
return 0;
for (cep = ce->items; cep; cep = cep->next)
{
if (!strcmp(cep->name, "score"))
{
cfg.score = atoi(cep->value);
} else
if (!strcmp(cep->name, "ban-action"))
{
parse_ban_action_config(cep, &cfg.ban_action);
} else
if (!strcmp(cep->name, "ban-reason"))
{
safe_strdup(cfg.ban_reason, cep->value);
} else
if (!strcmp(cep->name, "ban-time"))
{
cfg.ban_time = config_checkval(cep->value, CFG_TIME);
} else
if (!strcmp(cep->name, "except"))
{
conf_match_block(cf, cep, &cfg.except);
}
}
return 1;
}