mirror of
https://github.com/weechat/weechat.git
synced 2026-06-12 14:14:48 +02:00
core: add function string_levenshtein (issue #1877)
This commit is contained in:
@@ -63,6 +63,7 @@
|
||||
#define HEX2DEC(c) (((c >= 'a') && (c <= 'f')) ? c - 'a' + 10 : \
|
||||
((c >= 'A') && (c <= 'F')) ? c - 'A' + 10 : \
|
||||
c - '0')
|
||||
#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))
|
||||
|
||||
struct t_hashtable *string_hashtable_shared = NULL;
|
||||
|
||||
@@ -3959,6 +3960,62 @@ string_input_for_buffer (const char *string)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the distance between two strings using the Levenshtein algorithm.
|
||||
* See: https://en.wikipedia.org/wiki/Levenshtein_distance
|
||||
*/
|
||||
|
||||
int
|
||||
string_levenshtein (const char *string1, const char *string2,
|
||||
int case_sensitive)
|
||||
{
|
||||
int x, y, length1, length2, last_diag, old_diag;
|
||||
wint_t char1, char2;
|
||||
const char *ptr_str1, *ptr_str2;
|
||||
|
||||
length1 = (string1) ? utf8_strlen (string1) : 0;
|
||||
length2 = (string2) ? utf8_strlen (string2) : 0;
|
||||
|
||||
if (length1 == 0)
|
||||
return length2;
|
||||
if (length2 == 0)
|
||||
return length1;
|
||||
|
||||
int column[length1 + 1];
|
||||
|
||||
for (y = 1; y <= length1; y++)
|
||||
{
|
||||
column[y] = y;
|
||||
}
|
||||
|
||||
ptr_str2 = string2;
|
||||
|
||||
for (x = 1; x <= length2; x++)
|
||||
{
|
||||
char2 = (case_sensitive) ?
|
||||
(wint_t)utf8_char_int (ptr_str2) :
|
||||
towlower (utf8_char_int (ptr_str2));
|
||||
column[0] = x;
|
||||
ptr_str1 = string1;
|
||||
for (y = 1, last_diag = x - 1; y <= length1; y++)
|
||||
{
|
||||
char1 = (case_sensitive) ?
|
||||
(wint_t)utf8_char_int (ptr_str1) :
|
||||
towlower (utf8_char_int (ptr_str1));
|
||||
old_diag = column[y];
|
||||
column[y] = MIN3(
|
||||
column[y] + 1,
|
||||
column[y - 1] + 1,
|
||||
last_diag + ((char1 == char2) ? 0 : 1));
|
||||
last_diag = old_diag;
|
||||
ptr_str1 = utf8_next_char (ptr_str1);
|
||||
}
|
||||
ptr_str2 = utf8_next_char (ptr_str2);
|
||||
}
|
||||
|
||||
return column[length1];
|
||||
}
|
||||
|
||||
/*
|
||||
* Replaces ${vars} using a callback that returns replacement value (this value
|
||||
* must be newly allocated because it will be freed in this function).
|
||||
|
||||
@@ -134,6 +134,8 @@ extern char *string_hex_dump (const char *data, int data_size,
|
||||
const char *prefix, const char *suffix);
|
||||
extern int string_is_command_char (const char *string);
|
||||
extern const char *string_input_for_buffer (const char *string);
|
||||
extern int string_levenshtein (const char *string1, const char *string2,
|
||||
int case_sensitive);
|
||||
extern char *string_replace_with_callback (const char *string,
|
||||
const char *prefix,
|
||||
const char *suffix,
|
||||
|
||||
@@ -2540,6 +2540,60 @@ TEST(CoreString, InputForBuffer)
|
||||
config_file_option_reset (config_look_command_chars, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tests functions:
|
||||
* string_levenshtein
|
||||
*/
|
||||
|
||||
TEST(CoreString, Levenshtein)
|
||||
{
|
||||
LONGS_EQUAL(0, string_levenshtein (NULL, NULL, 1));
|
||||
LONGS_EQUAL(0, string_levenshtein ("", "", 1));
|
||||
LONGS_EQUAL(3, string_levenshtein (NULL, "abc", 1));
|
||||
LONGS_EQUAL(3, string_levenshtein ("abc", NULL, 1));
|
||||
LONGS_EQUAL(3, string_levenshtein ("", "abc", 1));
|
||||
LONGS_EQUAL(3, string_levenshtein ("abc", "", 1));
|
||||
|
||||
LONGS_EQUAL(0, string_levenshtein ("abc", "abc", 1));
|
||||
LONGS_EQUAL(1, string_levenshtein ("abc", "ab", 1));
|
||||
LONGS_EQUAL(1, string_levenshtein ("ab", "abc", 1));
|
||||
LONGS_EQUAL(2, string_levenshtein ("abc", "a", 1));
|
||||
LONGS_EQUAL(2, string_levenshtein ("a", "abc", 1));
|
||||
LONGS_EQUAL(3, string_levenshtein ("abc", "", 1));
|
||||
LONGS_EQUAL(3, string_levenshtein ("", "abc", 1));
|
||||
|
||||
LONGS_EQUAL(3, string_levenshtein ("abc", "ABC", 1));
|
||||
LONGS_EQUAL(3, string_levenshtein ("abc", "AB", 1));
|
||||
LONGS_EQUAL(3, string_levenshtein ("ab", "ABC", 1));
|
||||
LONGS_EQUAL(3, string_levenshtein ("abc", "A", 1));
|
||||
LONGS_EQUAL(3, string_levenshtein ("a", "ABC", 1));
|
||||
LONGS_EQUAL(3, string_levenshtein ("abc", "", 1));
|
||||
LONGS_EQUAL(3, string_levenshtein ("", "ABC", 1));
|
||||
|
||||
LONGS_EQUAL(0, string_levenshtein ("abc", "ABC", 0));
|
||||
LONGS_EQUAL(1, string_levenshtein ("abc", "AB", 0));
|
||||
LONGS_EQUAL(1, string_levenshtein ("ab", "ABC", 0));
|
||||
LONGS_EQUAL(2, string_levenshtein ("abc", "A", 0));
|
||||
LONGS_EQUAL(2, string_levenshtein ("a", "ABC", 0));
|
||||
LONGS_EQUAL(3, string_levenshtein ("abc", "", 0));
|
||||
LONGS_EQUAL(3, string_levenshtein ("", "ABC", 0));
|
||||
|
||||
LONGS_EQUAL(2, string_levenshtein ("response", "respond", 1));
|
||||
LONGS_EQUAL(4, string_levenshtein ("response", "resist", 1));
|
||||
|
||||
LONGS_EQUAL(2, string_levenshtein ("response", "responsive", 1));
|
||||
|
||||
/* with UTF-8 chars */
|
||||
LONGS_EQUAL(1, string_levenshtein ("é", "É", 1));
|
||||
LONGS_EQUAL(0, string_levenshtein ("é", "É", 0));
|
||||
LONGS_EQUAL(1, string_levenshtein ("é", "à", 1));
|
||||
LONGS_EQUAL(1, string_levenshtein ("é", "à", 0));
|
||||
LONGS_EQUAL(1, string_levenshtein ("té", "to", 1));
|
||||
LONGS_EQUAL(1, string_levenshtein ("noël", "noel", 1));
|
||||
LONGS_EQUAL(2, string_levenshtein ("bôô", "boo", 1));
|
||||
LONGS_EQUAL(2, string_levenshtein ("界世", "こん", 1));
|
||||
}
|
||||
|
||||
/*
|
||||
* Tests functions:
|
||||
* string_get_priority_and_name
|
||||
|
||||
Reference in New Issue
Block a user