1
0
mirror of https://github.com/weechat/weechat.git synced 2026-06-12 14:14:48 +02:00

core: add function string_levenshtein (issue #1877)

This commit is contained in:
Sébastien Helleu
2023-01-30 20:41:36 +01:00
parent 269b8fc66e
commit 38ffac78f3
3 changed files with 113 additions and 0 deletions
+57
View File
@@ -63,6 +63,7 @@
#define HEX2DEC(c) (((c >= 'a') && (c <= 'f')) ? c - 'a' + 10 : \
((c >= 'A') && (c <= 'F')) ? c - 'A' + 10 : \
c - '0')
#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))
struct t_hashtable *string_hashtable_shared = NULL;
@@ -3959,6 +3960,62 @@ string_input_for_buffer (const char *string)
return NULL;
}
/*
* Returns the distance between two strings using the Levenshtein algorithm.
* See: https://en.wikipedia.org/wiki/Levenshtein_distance
*/
int
string_levenshtein (const char *string1, const char *string2,
int case_sensitive)
{
int x, y, length1, length2, last_diag, old_diag;
wint_t char1, char2;
const char *ptr_str1, *ptr_str2;
length1 = (string1) ? utf8_strlen (string1) : 0;
length2 = (string2) ? utf8_strlen (string2) : 0;
if (length1 == 0)
return length2;
if (length2 == 0)
return length1;
int column[length1 + 1];
for (y = 1; y <= length1; y++)
{
column[y] = y;
}
ptr_str2 = string2;
for (x = 1; x <= length2; x++)
{
char2 = (case_sensitive) ?
(wint_t)utf8_char_int (ptr_str2) :
towlower (utf8_char_int (ptr_str2));
column[0] = x;
ptr_str1 = string1;
for (y = 1, last_diag = x - 1; y <= length1; y++)
{
char1 = (case_sensitive) ?
(wint_t)utf8_char_int (ptr_str1) :
towlower (utf8_char_int (ptr_str1));
old_diag = column[y];
column[y] = MIN3(
column[y] + 1,
column[y - 1] + 1,
last_diag + ((char1 == char2) ? 0 : 1));
last_diag = old_diag;
ptr_str1 = utf8_next_char (ptr_str1);
}
ptr_str2 = utf8_next_char (ptr_str2);
}
return column[length1];
}
/*
* Replaces ${vars} using a callback that returns replacement value (this value
* must be newly allocated because it will be freed in this function).
+2
View File
@@ -134,6 +134,8 @@ extern char *string_hex_dump (const char *data, int data_size,
const char *prefix, const char *suffix);
extern int string_is_command_char (const char *string);
extern const char *string_input_for_buffer (const char *string);
extern int string_levenshtein (const char *string1, const char *string2,
int case_sensitive);
extern char *string_replace_with_callback (const char *string,
const char *prefix,
const char *suffix,
+54
View File
@@ -2540,6 +2540,60 @@ TEST(CoreString, InputForBuffer)
config_file_option_reset (config_look_command_chars, 1);
}
/*
* Tests functions:
* string_levenshtein
*/
TEST(CoreString, Levenshtein)
{
LONGS_EQUAL(0, string_levenshtein (NULL, NULL, 1));
LONGS_EQUAL(0, string_levenshtein ("", "", 1));
LONGS_EQUAL(3, string_levenshtein (NULL, "abc", 1));
LONGS_EQUAL(3, string_levenshtein ("abc", NULL, 1));
LONGS_EQUAL(3, string_levenshtein ("", "abc", 1));
LONGS_EQUAL(3, string_levenshtein ("abc", "", 1));
LONGS_EQUAL(0, string_levenshtein ("abc", "abc", 1));
LONGS_EQUAL(1, string_levenshtein ("abc", "ab", 1));
LONGS_EQUAL(1, string_levenshtein ("ab", "abc", 1));
LONGS_EQUAL(2, string_levenshtein ("abc", "a", 1));
LONGS_EQUAL(2, string_levenshtein ("a", "abc", 1));
LONGS_EQUAL(3, string_levenshtein ("abc", "", 1));
LONGS_EQUAL(3, string_levenshtein ("", "abc", 1));
LONGS_EQUAL(3, string_levenshtein ("abc", "ABC", 1));
LONGS_EQUAL(3, string_levenshtein ("abc", "AB", 1));
LONGS_EQUAL(3, string_levenshtein ("ab", "ABC", 1));
LONGS_EQUAL(3, string_levenshtein ("abc", "A", 1));
LONGS_EQUAL(3, string_levenshtein ("a", "ABC", 1));
LONGS_EQUAL(3, string_levenshtein ("abc", "", 1));
LONGS_EQUAL(3, string_levenshtein ("", "ABC", 1));
LONGS_EQUAL(0, string_levenshtein ("abc", "ABC", 0));
LONGS_EQUAL(1, string_levenshtein ("abc", "AB", 0));
LONGS_EQUAL(1, string_levenshtein ("ab", "ABC", 0));
LONGS_EQUAL(2, string_levenshtein ("abc", "A", 0));
LONGS_EQUAL(2, string_levenshtein ("a", "ABC", 0));
LONGS_EQUAL(3, string_levenshtein ("abc", "", 0));
LONGS_EQUAL(3, string_levenshtein ("", "ABC", 0));
LONGS_EQUAL(2, string_levenshtein ("response", "respond", 1));
LONGS_EQUAL(4, string_levenshtein ("response", "resist", 1));
LONGS_EQUAL(2, string_levenshtein ("response", "responsive", 1));
/* with UTF-8 chars */
LONGS_EQUAL(1, string_levenshtein ("é", "É", 1));
LONGS_EQUAL(0, string_levenshtein ("é", "É", 0));
LONGS_EQUAL(1, string_levenshtein ("é", "à", 1));
LONGS_EQUAL(1, string_levenshtein ("é", "à", 0));
LONGS_EQUAL(1, string_levenshtein ("", "to", 1));
LONGS_EQUAL(1, string_levenshtein ("noël", "noel", 1));
LONGS_EQUAL(2, string_levenshtein ("bôô", "boo", 1));
LONGS_EQUAL(2, string_levenshtein ("界世", "こん", 1));
}
/*
* Tests functions:
* string_get_priority_and_name