1
0
mirror of https://github.com/weechat/weechat.git synced 2026-06-12 14:14:48 +02:00

core: optimize and fix function utf8_strlen_screen with non printable chars

When there non printable chars, the return of the function was 1.
For example utf8_strlen_screen("abc\x01") now returns 4 instead of 1.

In addition the function has been optimized to not use the `mbstowcs` function
which is slow; result is up to 15% faster.
This commit is contained in:
Sébastien Helleu
2022-12-03 11:40:30 +01:00
parent 0e6677fbcb
commit e5cbbd781d
2 changed files with 40 additions and 77 deletions
+31 -70
View File
@@ -480,6 +480,29 @@ utf8_strnlen (const char *string, int bytes)
return length;
}
/*
* Gets number of chars needed on screen to display the UTF-8 char.
*
* Returns the number of chars (>= 0).
*/
int
utf8_char_size_screen (const char *string)
{
int width;
if (!string)
return 0;
if (string[0] == '\t')
return CONFIG_INTEGER(config_look_tab_width);
width = wcwidth ((wchar_t)utf8_char_int (string));
/* non printable chars are displayed with a space (so size = 1) */
return (width >= 0) ? width : 1;
}
/*
* Gets number of chars needed on screen to display the UTF-8 string.
*
@@ -489,61 +512,24 @@ utf8_strnlen (const char *string, int bytes)
int
utf8_strlen_screen (const char *string)
{
int length, num_char, add_for_tab;
wchar_t *alloc_wstring, *ptr_wstring, wstring[4+2];
int size_screen;
const char *ptr_string;
if (!string || !string[0])
if (!string)
return 0;
if (!local_utf8)
return utf8_strlen (string);
alloc_wstring = NULL;
if (!string[1] || !string[2] || !string[3] || !string[4])
size_screen = 0;
ptr_string = string;
while (ptr_string && ptr_string[0])
{
/* optimization for max 4 chars: no malloc */
num_char = 4 + 1;
ptr_wstring = wstring;
}
else
{
num_char = mbstowcs (NULL, string, 0) + 1;
alloc_wstring = malloc ((num_char + 1) * sizeof (alloc_wstring[0]));
if (!alloc_wstring)
return utf8_strlen (string);
ptr_wstring = alloc_wstring;
size_screen += utf8_char_size_screen (ptr_string);
ptr_string = utf8_next_char (ptr_string);
}
if (mbstowcs (ptr_wstring, string, num_char) != (size_t)(-1))
{
length = wcswidth (ptr_wstring, num_char);
/*
* if the char is non-printable, wcswidth returns -1
* (for example the length of the snowman without snow (U+26C4) == -1)
* => in this case, consider the length is 1, to prevent any display bug
*/
if (length < 0)
length = 1;
}
else
length = utf8_strlen (string);
if (alloc_wstring)
free (alloc_wstring);
add_for_tab = CONFIG_INTEGER(config_look_tab_width) - 1;
if (add_for_tab > 0)
{
for (ptr_string = string; ptr_string[0]; ptr_string++)
{
if (ptr_string[0] == '\t')
length += add_for_tab;
}
}
return length;
return size_screen;
}
/*
@@ -649,31 +635,6 @@ utf8_charcasecmp_range (const char *string1, const char *string2, int range)
return (wchar1 < wchar2) ? -1 : ((wchar1 == wchar2) ? 0 : 1);
}
/*
* Gets number of chars needed on screen to display the UTF-8 char.
*
* Returns the number of chars (>= 0).
*/
int
utf8_char_size_screen (const char *string)
{
int char_size;
char utf_char[16];
if (!string)
return 0;
char_size = utf8_char_size (string);
if (char_size == 0)
return 0;
memcpy (utf_char, string, char_size);
utf_char[char_size] = '\0';
return utf8_strlen_screen (utf_char);
}
/*
* Moves forward N chars in an UTF-8 string.
*
+9 -7
View File
@@ -495,9 +495,9 @@ TEST(CoreUtf8, Size)
/* ël as iso-8859-15: invalid UTF-8 */
LONGS_EQUAL(1, utf8_char_size_screen ("\xebl"));
/* ëlm as iso-8859-15: invalid UTF-8 */
LONGS_EQUAL(1, utf8_char_size_screen ("\xeblm"));
LONGS_EQUAL(2, utf8_char_size_screen ("\xeblm"));
/* ëlmn as iso-8859-15: invalid UTF-8 */
LONGS_EQUAL(1, utf8_char_size_screen ("\xeblmn"));
LONGS_EQUAL(2, utf8_char_size_screen ("\xeblmn"));
/* length of string (in chars) */
LONGS_EQUAL(0, utf8_strlen (NULL));
@@ -530,16 +530,18 @@ TEST(CoreUtf8, Size)
LONGS_EQUAL(1, utf8_strlen_screen ("\x7f"));
LONGS_EQUAL(1, utf8_strlen_screen ("\x01"));
LONGS_EQUAL(4, utf8_strlen_screen (UTF8_NOEL_VALID));
LONGS_EQUAL(4, utf8_strlen_screen ("abc\x01"));
LONGS_EQUAL(8, utf8_strlen_screen ("a" "\x01" UTF8_NOEL_VALID "\x02" "b"));
LONGS_EQUAL(1, utf8_strlen_screen (UNICODE_SOFT_HYPHEN));
LONGS_EQUAL(3, utf8_strlen_screen ("a" UNICODE_SOFT_HYPHEN "b"));
LONGS_EQUAL(5, utf8_strlen_screen ("a" "\x01" UNICODE_SOFT_HYPHEN "\x02" "b"));
LONGS_EQUAL(0, utf8_strlen_screen (UNICODE_ZERO_WIDTH_SPACE));
LONGS_EQUAL(2, utf8_strlen_screen ("a" UNICODE_ZERO_WIDTH_SPACE "b"));
LONGS_EQUAL(4, utf8_strlen_screen ("a" "\x01" UNICODE_ZERO_WIDTH_SPACE "\x02" "b"));
LONGS_EQUAL(2, utf8_strlen_screen (UNICODE_SNOWMAN));
LONGS_EQUAL(4, utf8_strlen_screen ("a" UNICODE_SNOWMAN "b"));
LONGS_EQUAL(6, utf8_strlen_screen ("a" "\x01" UNICODE_SNOWMAN "\x02" "b"));
LONGS_EQUAL(2, utf8_strlen_screen (UNICODE_CJK_YELLOW));
LONGS_EQUAL(4, utf8_strlen_screen ("a" UNICODE_CJK_YELLOW "b"));
LONGS_EQUAL(6, utf8_strlen_screen ("a" "\x01" UNICODE_CJK_YELLOW "\x02" "b"));
LONGS_EQUAL(2, utf8_strlen_screen (UNICODE_HAN_CHAR));
LONGS_EQUAL(4, utf8_strlen_screen ("a" UNICODE_HAN_CHAR "b"));
LONGS_EQUAL(6, utf8_strlen_screen ("a" "\x01" UNICODE_HAN_CHAR "\x02" "b"));
/* length of Tabulation */
LONGS_EQUAL(1, utf8_strlen_screen ("\t"));