api: add argument "length" in function utf8_is_valid()

v2.8-utf8proc
Sébastien Helleu 2015-08-18 07:36:48 +02:00
parent fd1886e883
commit 46a9d17ac3
13 changed files with 188 additions and 46 deletions

View File

@ -15,6 +15,14 @@ https://weechat.org/files/releasenotes/ReleaseNotes-devel.html[release notes]
(file 'ReleaseNotes.asciidoc' in sources).
== Version 1.4 (under dev)
=== New features
* api: add argument "length" in function utf8_is_valid()
=== Bugs fixed
== Version 1.3 (2015-08-16)
=== New features

View File

@ -2151,18 +2151,22 @@ This function is not available in scripting API.
==== utf8_is_valid
_Updated in 1.4._
Check if a string is UTF-8 valid.
Prototype:
[source,C]
----
int weechat_utf8_is_valid (const char *string, char **error);
int weechat_utf8_is_valid (const char *string, int length, char **error);
----
Arguments:
* 'string': string
* 'length': max number of UTF-8 chars to check; if ≤ 0, the whole string is
checked _(WeeChat ≥ 1.4)_
* 'error': if not NULL, '*error*' is set with pointer to first non valid UTF-8
char in string, if any
@ -2175,7 +2179,7 @@ C example:
[source,C]
----
char *error;
if (weechat_utf8_is_valid (string, &error))
if (weechat_utf8_is_valid (string, -1, &error))
{
/* ... */
}

View File

@ -2193,18 +2193,22 @@ Cette fonction n'est pas disponible dans l'API script.
==== utf8_is_valid
_Mis à jour dans la 1.4._
Vérifier si une chaîne est valide UTF-8.
Prototype :
[source,C]
----
int weechat_utf8_is_valid (const char *string, char **error);
int weechat_utf8_is_valid (const char *string, int length, char **error);
----
Paramètres :
* 'string' : chaîne
* 'length' : nombre maximum de caractères UTF-8 à vérifier ; si ≤ 0, la chaîne
complète est vérifiée _(WeeChat ≥ 1.4)_
* 'error' : si non NULL, '*error' est alimenté avec le pointeur vers le premier
caractère non valide dans la chaîne, s'il y en a
@ -2217,7 +2221,7 @@ Exemple en C :
[source,C]
----
char *error;
if (weechat_utf8_is_valid (string, &error))
if (weechat_utf8_is_valid (string, -1, &error))
{
/* ... */
}

View File

@ -2226,18 +2226,24 @@ Questa funzione non è disponibile nelle API per lo scripting.
==== utf8_is_valid
// TRANSLATION MISSING
_Updated in 1.4._
Verifica che una stringa sia valida in UTF-8.
Prototipo:
[source,C]
----
int weechat_utf8_is_valid (const char *string, char **error);
int weechat_utf8_is_valid (const char *string, int length, char **error);
----
Argomenti:
* 'string': stringa
// TRANSLATION MISSING
* 'length': max number of UTF-8 chars to check; if ≤ 0, the whole string is
checked _(WeeChat ≥ 1.4)_
* 'error': se non NULL, '*error*' è impostato con il puntatore al primo
carattere UTF-8 non valido nella stringa, se esiste
@ -2250,7 +2256,7 @@ Esempio in C:
[source,C]
----
char *error;
if (weechat_utf8_is_valid (string, &error))
if (weechat_utf8_is_valid (string, -1, &error))
{
/* ... */
}

View File

@ -2152,18 +2152,23 @@ if (weechat_utf8_has_8bits (string))
==== utf8_is_valid
_バージョン 1.4 で更新。_
文字列が妥当な UTF-8 表現か確認。
プロトタイプ:
[source,C]
----
int weechat_utf8_is_valid (const char *string, char **error);
int weechat_utf8_is_valid (const char *string, int length, char **error);
----
引数:
* 'string': 文字列
// TRANSLATION MISSING
* 'length': max number of UTF-8 chars to check; if ≤ 0, the whole string is
checked _(WeeChat ≥ 1.4)_
* 'error': NULL でない場合は '*error*'
は文字列に含まれる最初の妥当でない UTF-8 文字へのポインタ
@ -2176,7 +2181,7 @@ C 言語での使用例:
[source,C]
----
char *error;
if (weechat_utf8_is_valid (string, &error))
if (weechat_utf8_is_valid (string, -1, &error))
{
/* ... */
}

View File

@ -2307,7 +2307,7 @@ string_iconv_to_internal (const char *charset, const char *string)
if (local_utf8 && (!charset || !charset[0]))
return input;
if (utf8_has_8bits (input) && utf8_is_valid (input, NULL))
if (utf8_has_8bits (input) && utf8_is_valid (input, -1, NULL))
return input;
output = string_iconv (0,

View File

@ -70,18 +70,24 @@ utf8_has_8bits (const char *string)
/*
* Checks if a string is UTF-8 valid.
*
* If length is <= 0, checks whole string.
* If length is > 0, checks only this number of chars (not bytes).
*
* Returns:
* 1: string is UTF-8 valid
* 0: string it not UTF-8 valid, and then if error is not NULL, it is set with
* first non valid UTF-8 char in string
* 0: string it not UTF-8 valid, and then if error is not NULL, it is set
* with first non valid UTF-8 char in string
*/
int
utf8_is_valid (const char *string, char **error)
utf8_is_valid (const char *string, int length, char **error)
{
int code_point;
int code_point, current_char;
while (string && string[0])
current_char = 0;
while (string && string[0]
&& ((length <= 0) || (current_char < length)))
{
/*
* UTF-8, 2 bytes, should be: 110vvvvv 10vvvvvv
@ -142,6 +148,7 @@ utf8_is_valid (const char *string, char **error)
goto invalid;
else
string++;
current_char++;
}
if (error)
*error = NULL;
@ -165,7 +172,7 @@ utf8_normalize (char *string, char replacement)
while (string && string[0])
{
if (utf8_is_valid (string, &error))
if (utf8_is_valid (string, -1, &error))
return;
error[0] = replacement;
string = error + 1;

View File

@ -30,7 +30,7 @@ extern int local_utf8;
extern void utf8_init ();
extern int utf8_has_8bits (const char *string);
extern int utf8_is_valid (const char *string, char **error);
extern int utf8_is_valid (const char *string, int length, char **error);
extern void utf8_normalize (char *string, char replacement);
extern const char *utf8_prev_char (const char *string_start,
const char *string);

View File

@ -378,7 +378,7 @@ gui_key_flush (int paste)
ptr_char = key_str;
while (ptr_char && ptr_char[0])
{
(void) utf8_is_valid (ptr_char, &ptr_error);
(void) utf8_is_valid (ptr_char, -1, &ptr_error);
if (!ptr_error)
break;
next_char = (char *)utf8_next_char (ptr_error);

View File

@ -265,7 +265,7 @@ gui_mouse_event_code2key (const char *code)
* mouse code must have at least:
* one code (for event) + X + Y == 3 bytes or 3 UTF-8 chars
*/
code_utf8 = utf8_is_valid (code, NULL);
code_utf8 = utf8_is_valid (code, -1, NULL);
length = (code_utf8) ? utf8_strlen (code) : (int)strlen (code);
if (length < 3)
return NULL;

View File

@ -214,7 +214,7 @@ gui_key_grab_end_timer_cb (void *data, int remaining_calls)
* but some mouse codes can return ISO chars (for coordinates),
* then we will convert them to UTF-8 string
*/
if (!utf8_is_valid (expanded_key, NULL))
if (!utf8_is_valid (expanded_key, -1, NULL))
{
expanded_key2 = string_iconv_to_internal ("iso-8859-1",
expanded_key);

View File

@ -57,7 +57,7 @@ struct timeval;
* please change the date with current one; for a second change at same
* date, increment the 01, otherwise please keep 01.
*/
#define WEECHAT_PLUGIN_API_VERSION "20150704-02"
#define WEECHAT_PLUGIN_API_VERSION "20150818-01"
/* macros for defining plugin infos */
#define WEECHAT_PLUGIN_NAME(__name) \
@ -317,7 +317,7 @@ struct t_weechat_plugin
/* UTF-8 strings */
int (*utf8_has_8bits) (const char *string);
int (*utf8_is_valid) (const char *string, char **error);
int (*utf8_is_valid) (const char *string, int length, char **error);
void (*utf8_normalize) (char *string, char replacement);
const char *(*utf8_prev_char) (const char *string_start,
const char *string);
@ -1110,8 +1110,8 @@ extern int weechat_plugin_end (struct t_weechat_plugin *plugin);
/* UTF-8 strings */
#define weechat_utf8_has_8bits(__string) \
(weechat_plugin->utf8_has_8bits)(__string)
#define weechat_utf8_is_valid(__string, __error) \
(weechat_plugin->utf8_is_valid)(__string, __error)
#define weechat_utf8_is_valid(__string, __length, __error) \
(weechat_plugin->utf8_is_valid)(__string, __length, __error)
#define weechat_utf8_normalize(__string, __char) \
(weechat_plugin->utf8_normalize)(__string, __char)
#define weechat_utf8_prev_char(__start, __string) \

View File

@ -59,38 +59,146 @@ TEST(Utf8, Validity)
LONGS_EQUAL(1, utf8_has_8bits ("no\xc3\xabl"));
/* check validity */
LONGS_EQUAL(1, utf8_is_valid (NULL, NULL));
LONGS_EQUAL(1, utf8_is_valid (NULL, &error));
LONGS_EQUAL(1, utf8_is_valid ("", NULL));
LONGS_EQUAL(1, utf8_is_valid ("", &error));
LONGS_EQUAL(1, utf8_is_valid ("abc", &error));
LONGS_EQUAL(1, utf8_is_valid (NULL, -1, NULL));
LONGS_EQUAL(1, utf8_is_valid (NULL, 0, NULL));
LONGS_EQUAL(1, utf8_is_valid (NULL, 1, NULL));
LONGS_EQUAL(1, utf8_is_valid (NULL, -1, &error));
LONGS_EQUAL(1, utf8_is_valid (NULL, 0, &error));
LONGS_EQUAL(1, utf8_is_valid (NULL, 1, &error));
LONGS_EQUAL(1, utf8_is_valid ("", -1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("", 0, NULL));
LONGS_EQUAL(1, utf8_is_valid ("", 1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("", -1, &error));
LONGS_EQUAL(1, utf8_is_valid ("", 0, &error));
LONGS_EQUAL(1, utf8_is_valid ("", 1, &error));
LONGS_EQUAL(1, utf8_is_valid ("abc", -1, &error));
POINTERS_EQUAL(NULL, error);
LONGS_EQUAL(1, utf8_is_valid (noel_valid, &error));
LONGS_EQUAL(1, utf8_is_valid ("abc", 0, &error));
POINTERS_EQUAL(NULL, error);
LONGS_EQUAL(0, utf8_is_valid (noel_invalid, &error));
LONGS_EQUAL(1, utf8_is_valid ("abc", 1, &error));
POINTERS_EQUAL(NULL, error);
LONGS_EQUAL(1, utf8_is_valid (noel_valid, -1, &error));
POINTERS_EQUAL(NULL, error);
LONGS_EQUAL(1, utf8_is_valid (noel_valid, 0, &error));
POINTERS_EQUAL(NULL, error);
LONGS_EQUAL(1, utf8_is_valid (noel_valid, 1, &error));
POINTERS_EQUAL(NULL, error);
LONGS_EQUAL(0, utf8_is_valid (noel_invalid, -1, &error));
POINTERS_EQUAL(noel_invalid + 2, error);
LONGS_EQUAL(0, utf8_is_valid (noel_invalid, 0, &error));
POINTERS_EQUAL(noel_invalid + 2, error);
LONGS_EQUAL(1, utf8_is_valid (noel_invalid, 1, &error));
POINTERS_EQUAL(NULL, error);
LONGS_EQUAL(1, utf8_is_valid (noel_invalid, 2, &error));
POINTERS_EQUAL(NULL, error);
LONGS_EQUAL(0, utf8_is_valid (noel_invalid, 3, &error));
POINTERS_EQUAL(noel_invalid + 2, error);
LONGS_EQUAL(0, utf8_is_valid (noel_invalid, 4, &error));
POINTERS_EQUAL(noel_invalid + 2, error);
LONGS_EQUAL(0, utf8_is_valid (noel_invalid, 5, &error));
POINTERS_EQUAL(noel_invalid + 2, error);
/* 2 bytes: code point must be in range U+0080-07FF */
LONGS_EQUAL(0, utf8_is_valid ("\xc0\x80", NULL)); /* U+0 */
LONGS_EQUAL(0, utf8_is_valid ("\xc1\xbf", NULL)); /* U+7F */
LONGS_EQUAL(1, utf8_is_valid ("\xc2\x80", NULL)); /* U+80 */
LONGS_EQUAL(1, utf8_is_valid ("\xdf\xbf", NULL)); /* U+7FF */
/* U+0 */
LONGS_EQUAL(0, utf8_is_valid ("\xc0\x80", -1, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xc0\x80", 0, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xc0\x80", 1, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xc0\x80", 2, NULL));
/* U+7F */
LONGS_EQUAL(0, utf8_is_valid ("\xc1\xbf", -1, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xc1\xbf", 0, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xc1\xbf", 1, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xc1\xbf", 2, NULL));
/* U+80 */
LONGS_EQUAL(1, utf8_is_valid ("\xc2\x80", -1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xc2\x80", 0, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xc2\x80", 1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xc2\x80", 2, NULL));
/* U+7FF */
LONGS_EQUAL(1, utf8_is_valid ("\xdf\xbf", -1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xdf\xbf", 0, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xdf\xbf", 1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xdf\xbf", 2, NULL));
/* 3 bytes: code point must be in range: U+0800-FFFF */
LONGS_EQUAL(0, utf8_is_valid ("\xe0\x80\x80", NULL)); /* U+0 */
LONGS_EQUAL(0, utf8_is_valid ("\xe0\x9f\xbf", NULL)); /* U+7FF */
LONGS_EQUAL(0, utf8_is_valid ("\xed\xa0\x80", NULL)); /* U+D800 */
LONGS_EQUAL(0, utf8_is_valid ("\xed\xbf\xbf", NULL)); /* U+DFFF */
LONGS_EQUAL(1, utf8_is_valid ("\xe0\xa0\x80", NULL)); /* U+800 */
LONGS_EQUAL(1, utf8_is_valid ("\xed\x9f\xbf", NULL)); /* U+D7FF */
LONGS_EQUAL(1, utf8_is_valid ("\xe7\x80\x80", NULL)); /* U+E000 */
LONGS_EQUAL(1, utf8_is_valid ("\xef\xbf\xbf", NULL)); /* U+FFFF */
/* U+0 */
LONGS_EQUAL(0, utf8_is_valid ("\xe0\x80\x80", -1, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xe0\x80\x80", 0, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xe0\x80\x80", 1, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xe0\x80\x80", 2, NULL));
/* U+7FF */
LONGS_EQUAL(0, utf8_is_valid ("\xe0\x9f\xbf", -1, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xe0\x9f\xbf", 0, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xe0\x9f\xbf", 1, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xe0\x9f\xbf", 2, NULL));
/* U+D800 */
LONGS_EQUAL(0, utf8_is_valid ("\xed\xa0\x80", -1, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xed\xa0\x80", 0, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xed\xa0\x80", 1, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xed\xa0\x80", 2, NULL));
/* U+DFFF */
LONGS_EQUAL(0, utf8_is_valid ("\xed\xbf\xbf", -1, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xed\xbf\xbf", 0, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xed\xbf\xbf", 1, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xed\xbf\xbf", 2, NULL));
/* U+800 */
LONGS_EQUAL(1, utf8_is_valid ("\xe0\xa0\x80", -1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xe0\xa0\x80", 0, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xe0\xa0\x80", 1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xe0\xa0\x80", 2, NULL));
/* U+D7FF */
LONGS_EQUAL(1, utf8_is_valid ("\xed\x9f\xbf", -1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xed\x9f\xbf", 0, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xed\x9f\xbf", 1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xed\x9f\xbf", 2, NULL));
/* U+E000 */
LONGS_EQUAL(1, utf8_is_valid ("\xe7\x80\x80", -1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xe7\x80\x80", 0, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xe7\x80\x80", 1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xe7\x80\x80", 2, NULL));
/* U+FFFF */
LONGS_EQUAL(1, utf8_is_valid ("\xef\xbf\xbf", -1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xef\xbf\xbf", 0, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xef\xbf\xbf", 1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xef\xbf\xbf", 2, NULL));
/* 4 bytes: code point must be in range: U+10000-1FFFFF */
LONGS_EQUAL(0, utf8_is_valid ("\xf0\x80\x80\x80", NULL)); /* U+0 */
LONGS_EQUAL(0, utf8_is_valid ("\xf0\x8f\xbf\xbf", NULL)); /* U+FFFF */
LONGS_EQUAL(1, utf8_is_valid ("\xf0\x90\x80\x80", NULL)); /* U+10000 */
LONGS_EQUAL(1, utf8_is_valid ("\xf7\xbf\xbf\xbf", NULL)); /* U+1FFFFF */
/* U+0 */
LONGS_EQUAL(0, utf8_is_valid ("\xf0\x80\x80\x80", -1, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xf0\x80\x80\x80", 0, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xf0\x80\x80\x80", 1, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xf0\x80\x80\x80", 2, NULL));
/* U+FFFF */
LONGS_EQUAL(0, utf8_is_valid ("\xf0\x8f\xbf\xbf", -1, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xf0\x8f\xbf\xbf", 0, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xf0\x8f\xbf\xbf", 1, NULL));
LONGS_EQUAL(0, utf8_is_valid ("\xf0\x8f\xbf\xbf", 2, NULL));
/* U+10000 */
LONGS_EQUAL(1, utf8_is_valid ("\xf0\x90\x80\x80", -1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xf0\x90\x80\x80", 0, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xf0\x90\x80\x80", 1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xf0\x90\x80\x80", 2, NULL));
/* U+1FFFFF */
LONGS_EQUAL(1, utf8_is_valid ("\xf7\xbf\xbf\xbf", -1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xf7\xbf\xbf\xbf", 0, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xf7\xbf\xbf\xbf", 1, NULL));
LONGS_EQUAL(1, utf8_is_valid ("\xf7\xbf\xbf\xbf", 2, NULL));
}
/*