core: improve the string_replace_regex function (add reference char, change syntax for match refs)

The reference char is now an argument for the function.
The references are now $0 .. $99 and $+ was added (last match, with
highest number).
The syntax to replace a match with one char is now: $.cN or $.c+
(for example: "$.*3").
v2.8-utf8proc
Sebastien Helleu 2014-02-09 15:14:07 +01:00
parent f666a356d7
commit 7c0d9fe850
6 changed files with 102 additions and 43 deletions

View File

@ -1153,11 +1153,12 @@ string_has_highlight_regex (const char *string, const char *regex)
char *
string_replace_regex_get_replace (const char *string, regmatch_t *regex_match,
const char *replace)
int last_match, const char *replace,
const char reference_char)
{
int length, length_current, length_add, match;
const char *ptr_replace, *ptr_add;
char *result, *result2, *modified_replace, *temp;
char *result, *result2, *modified_replace, *temp, char_replace;
/* default length is length*2, it will grow later if needed */
length = (strlen (string) * 2);
@ -1174,29 +1175,68 @@ string_replace_regex_get_replace (const char *string, regmatch_t *regex_match,
length_add = 0;
modified_replace = NULL;
if (ptr_replace[0] == '\\')
if ((ptr_replace[0] == '\\') && (ptr_replace[1] == reference_char))
{
if (ptr_replace[1] == '\\')
/* escaped reference char */
ptr_add = ptr_replace + 1;
length_add = 1;
ptr_replace += 2;
}
else if (ptr_replace[0] == reference_char)
{
if ((ptr_replace[1] == '+') || isdigit ((unsigned char)ptr_replace[1]))
{
ptr_add = ptr_replace;
length_add = 1;
ptr_replace += 2;
}
else if (isdigit ((unsigned char)ptr_replace[1]))
{
match = ptr_replace[1] - '0';
if (ptr_replace[1] == '+')
{
/* reference to last match */
match = last_match;
ptr_replace += 2;
}
else
{
/* reference to match 0 .. 99 */
if (isdigit ((unsigned char)ptr_replace[2]))
{
match = ((ptr_replace[1] - '0') * 10) + (ptr_replace[2] - '0');
ptr_replace += 3;
}
else
{
match = ptr_replace[1] - '0';
ptr_replace += 2;
}
}
if (regex_match[match].rm_so >= 0)
{
ptr_add = string + regex_match[match].rm_so;
length_add = regex_match[match].rm_eo - regex_match[match].rm_so;
}
ptr_replace += 2;
}
else if ((ptr_replace[1] >= 32)
&& (ptr_replace[1] <= 126)
&& isdigit ((unsigned char)ptr_replace[2]))
else if ((ptr_replace[1] == '.')
&& (ptr_replace[2] >= 32) && (ptr_replace[2] <= 126)
&& ((ptr_replace[3] == '+') || isdigit ((unsigned char)ptr_replace[3])))
{
match = ptr_replace[2] - '0';
char_replace = ptr_replace[2];
if (ptr_replace[3] == '+')
{
/* reference to last match */
match = last_match;
ptr_replace += 4;
}
else
{
/* reference to match 0 .. 99 */
if (isdigit ((unsigned char)ptr_replace[4]))
{
match = ((ptr_replace[3] - '0') * 10) + (ptr_replace[4] - '0');
ptr_replace += 5;
}
else
{
match = ptr_replace[3] - '0';
ptr_replace += 4;
}
}
if (regex_match[match].rm_so >= 0)
{
temp = string_strndup (string + regex_match[match].rm_so,
@ -1207,19 +1247,17 @@ string_replace_regex_get_replace (const char *string, regmatch_t *regex_match,
modified_replace = malloc (length_add + 1);
if (modified_replace)
{
memset (modified_replace, ptr_replace[1],
length_add);
memset (modified_replace, char_replace, length_add);
modified_replace[length_add] = '\0';
ptr_add = modified_replace;
}
free (temp);
}
}
ptr_replace += 3;
}
else
{
/* just ignore the '\' */
/* just ignore the reference char */
ptr_replace++;
}
}
@ -1264,21 +1302,32 @@ string_replace_regex_get_replace (const char *string, regmatch_t *regex_match,
* The argument "regex" is a pointer to a regex compiled with function regcomp
* (or WeeChat function string_regcomp).
*
* The argument "replace" can contain references to matching groups, from \1
* to \9 for match 1 to 9 (\0 is the whole match).
* Special references \c0 to \c9 can be used to replace all matching chars by
* the char 'c', which can be between space (32) and '~' (126).
* For example \*1 will replace matching chars in group 1 by '*'.
* The argument "replace" can contain references to matches:
* $0 .. $99 match 0 to 99 (0 is whole match, 1 .. 99 are groups captured)
* $+ the last match (with highest number)
* $.*N match N (can be '+' or 0 to 99), with all chars replaced by '*'
* (the char '*' can be replaced by any char between space (32)
* and '~' (126))
*
* Examples:
*
* string | regex | replace | result
* ----------+---------------+-----------+-------------
* test foo | test | Z | Z foo
* test foo | ^(test +)(.*) | $2 | foo
* test foo | ^(test +)(.*) | $1 / $.*2 | test / ***
* test foo | ^(test +)(.*) | $.%+ | %%%
*
* Note: result must be freed after use.
*/
char *
string_replace_regex (const char *string, void *regex, const char *replace)
string_replace_regex (const char *string, void *regex, const char *replace,
const char reference_char)
{
char *result, *result2, *str_replace;
int length, length_replace, start_offset, i, rc, end;
regmatch_t regex_match[10];
int length, length_replace, start_offset, i, rc, end, last_match;
regmatch_t regex_match[100];
if (!string)
return NULL;
@ -1292,12 +1341,12 @@ string_replace_regex (const char *string, void *regex, const char *replace)
start_offset = 0;
while (result && result[start_offset])
{
for (i = 0; i < 10; i++)
for (i = 0; i < 100; i++)
{
regex_match[i].rm_so = -1;
}
rc = regexec ((regex_t *)regex, result + start_offset, 10, regex_match,
rc = regexec ((regex_t *)regex, result + start_offset, 100, regex_match,
0);
/*
* no match found: exit the loop (if rm_eo == 0, it is an empty match
@ -1311,10 +1360,12 @@ string_replace_regex (const char *string, void *regex, const char *replace)
}
/* adjust the start/end offsets */
for (i = 0; i < 10; i++)
last_match = 0;
for (i = 0; i < 100; i++)
{
if (regex_match[i].rm_so >= 0)
{
last_match = i;
regex_match[i].rm_so += start_offset;
regex_match[i].rm_eo += start_offset;
}
@ -1324,7 +1375,8 @@ string_replace_regex (const char *string, void *regex, const char *replace)
end = !result[regex_match[0].rm_eo];
str_replace = string_replace_regex_get_replace (result, regex_match,
replace);
last_match,
replace, reference_char);
length_replace = (str_replace) ? strlen (str_replace) : 0;
length = regex_match[0].rm_so + length_replace +

View File

@ -58,7 +58,8 @@ extern int string_has_highlight_regex_compiled (const char *string,
regex_t *regex);
extern int string_has_highlight_regex (const char *string, const char *regex);
extern char *string_replace_regex (const char *string, void *regex,
const char *replace);
const char *replace,
const char reference_char);
extern char **string_split (const char *string, const char *separators,
int keep_eol, int num_items_max, int *num_items);
extern char **string_split_shared (const char *string, const char *separators,

View File

@ -114,7 +114,8 @@ trigger_callback_replace_regex (struct t_trigger *trigger,
value = weechat_string_replace_regex (ptr_value,
trigger->regex[i].regex,
trigger->regex[i].replace_eval);
trigger->regex[i].replace_eval,
'$');
if (!value)
continue;

View File

@ -618,12 +618,12 @@ trigger_command_init ()
" replace password with '*' in /oper command (in command line and "
"command history):\n"
" /trigger add oper modifier input_text_display;history_add "
"\"\" \"==^(/oper +\\S+ +)(.*)==\\1\\*2\"\n"
"\"\" \"==^(/oper +\\S+ +)(.*)==$1$.*2\"\n"
" add text attributes in *bold*, _underline_ and /italic/:\n"
" /trigger add effects modifier weechat_print \"\" "
"\"==\\*(\\S+)\\*==*${color:bold}\\1${color:-bold}*== "
"==_(\\S+)_==_${color:underline}\\1${color:-underline}_== "
"==/(\\S+)/==/${color:italic}\\1${color:-italic}/\"\n"
"\"==\\*(\\S+)\\*==*${color:bold}$1${color:-bold}*== "
"==_(\\S+)_==_${color:underline}$1${color:-underline}_== "
"==/(\\S+)/==/${color:italic}$1${color:-italic}/\"\n"
" silently save config each hour:\n"
" /trigger add cfgsave timer 3600000;0;0 \"\" \"\" \"/mute /save\""),
"list|listfull"

View File

@ -221,7 +221,9 @@ trigger_config_create_option (const char *trigger_name, int index_option,
"many regex can be separated by a space, for example: "
"\"/regex1/replace1/var1 /regex2/replace2/var2\"; the "
"separator \"/\" can be replaced by any char (one or more "
"identical chars), except '\\' and parentheses"),
"identical chars), except '\\' and parentheses; matching "
"groups can be used in replace: $0 to $99, $+ for last "
"match and $.cN to replace all chars of group N by char c"),
NULL, 0, 0, value, NULL, 0,
NULL, NULL, &trigger_config_change_regex, NULL, NULL, NULL);
break;

View File

@ -57,7 +57,7 @@ struct timeval;
* please change the date with current one; for a second change at same
* date, increment the 01, otherwise please keep 01.
*/
#define WEECHAT_PLUGIN_API_VERSION "20140131-01"
#define WEECHAT_PLUGIN_API_VERSION "20140208-01"
/* macros for defining plugin infos */
#define WEECHAT_PLUGIN_NAME(__name) \
@ -248,7 +248,8 @@ struct t_weechat_plugin
const char *highlight_words);
int (*string_has_highlight_regex) (const char *string, const char *regex);
char *(*string_replace_regex) (const char *string, void *regex,
const char *replace);
const char *replace,
const char reference_char);
char **(*string_split) (const char *string, const char *separators,
int keep_eol, int num_items_max, int *num_items);
char **(*string_split_shell) (const char *string, int *num_items);
@ -1012,8 +1013,10 @@ extern int weechat_plugin_end (struct t_weechat_plugin *plugin);
weechat_plugin->string_has_highlight(__string, __highlight_words)
#define weechat_string_has_highlight_regex(__string, __regex) \
weechat_plugin->string_has_highlight_regex(__string, __regex)
#define weechat_string_replace_regex(__string, __regex, __replace) \
weechat_plugin->string_replace_regex(__string, __regex, __replace)
#define weechat_string_replace_regex(__string, __regex, __replace, \
__reference_char) \
weechat_plugin->string_replace_regex(__string, __regex, __replace, \
__reference_char)
#define weechat_string_split(__string, __separator, __eol, __max, \
__num_items) \
weechat_plugin->string_split(__string, __separator, __eol, \