1
0
mirror of https://github.com/weechat/weechat.git synced 2026-06-30 06:46:38 +02:00

core: return number of bytes for UTF-8 char in function utf8_int_string

This commit is contained in:
Sébastien Helleu
2022-12-17 20:25:07 +01:00
parent 6aedddd351
commit eb6cc0bc2a
4 changed files with 27 additions and 12 deletions
+1 -2
View File
@@ -1073,10 +1073,9 @@ string_convert_escaped_chars (const char *string)
{
value = (value * 16) + HEX2DEC(ptr_string[i + 1]);
}
utf8_int_string (value, utf_char);
length = utf8_int_string (value, utf_char);
if (utf_char[0])
{
length = strlen (utf_char);
memcpy (output + pos_output, utf_char, length);
pos_output += length;
}
+19 -3
View File
@@ -331,21 +331,32 @@ utf8_char_int (const char *string)
*
* In case of error (if unicode value is > 0x1FFFFF), the string is set to an
* empty string (string[0] == '\0').
*
* Returns the number of bytes in the UTF-8 char (not counting the final '\0').
*/
void
int
utf8_int_string (unsigned int unicode_value, char *string)
{
int num_bytes;
num_bytes = 0;
if (!string)
return;
return num_bytes;
string[0] = '\0';
if (unicode_value <= 0x007F)
if (unicode_value == 0)
{
/* NUL char */
}
else if (unicode_value <= 0x007F)
{
/* UTF-8, 1 byte: 0vvvvvvv */
string[0] = unicode_value;
string[1] = '\0';
num_bytes = 1;
}
else if (unicode_value <= 0x07FF)
{
@@ -353,6 +364,7 @@ utf8_int_string (unsigned int unicode_value, char *string)
string[0] = 0xC0 | ((unicode_value >> 6) & 0x1F);
string[1] = 0x80 | (unicode_value & 0x3F);
string[2] = '\0';
num_bytes = 2;
}
else if (unicode_value <= 0xFFFF)
{
@@ -361,6 +373,7 @@ utf8_int_string (unsigned int unicode_value, char *string)
string[1] = 0x80 | ((unicode_value >> 6) & 0x3F);
string[2] = 0x80 | (unicode_value & 0x3F);
string[3] = '\0';
num_bytes = 3;
}
else if (unicode_value <= 0x1FFFFF)
{
@@ -370,7 +383,10 @@ utf8_int_string (unsigned int unicode_value, char *string)
string[2] = 0x80 | ((unicode_value >> 6) & 0x3F);
string[3] = 0x80 | (unicode_value & 0x3F);
string[4] = '\0';
num_bytes = 4;
}
return num_bytes;
}
/*
+1 -1
View File
@@ -36,7 +36,7 @@ extern const char *utf8_prev_char (const char *string_start,
const char *string);
extern const char *utf8_next_char (const char *string);
extern int utf8_char_int (const char *string);
extern void utf8_int_string (unsigned int unicode_value, char *string);
extern int utf8_int_string (unsigned int unicode_value, char *string);
extern wint_t utf8_wide_char (const char *string);
extern int utf8_char_size (const char *string);
extern int utf8_strlen (const char *string);
+6 -6
View File
@@ -458,16 +458,16 @@ TEST(CoreUtf8, Convert)
LONGS_EQUAL(0x92d, utf8_char_int (utf8_4bytes_truncated_3));
/* convert unicode char to a string */
utf8_int_string (0, NULL);
utf8_int_string (0, result);
LONGS_EQUAL(0, utf8_int_string (0, NULL));
LONGS_EQUAL(0, utf8_int_string (0, result));
STRCMP_EQUAL("", result);
utf8_int_string (235, result);
LONGS_EQUAL(2, utf8_int_string (L'ë', result));
STRCMP_EQUAL("ë", result);
utf8_int_string (0x20ac, result);
LONGS_EQUAL(3, utf8_int_string (L'', result));
STRCMP_EQUAL("", result);
utf8_int_string (0x2ee9, result);
LONGS_EQUAL(3, utf8_int_string (0x2ee9, result));
STRCMP_EQUAL(UNICODE_CJK_YELLOW, result);
utf8_int_string (0x24b62, result);
LONGS_EQUAL(4, utf8_int_string (0x24b62, result));
STRCMP_EQUAL(UNICODE_HAN_CHAR, result);
/* get wide char */