From 7c0d9fe850efa23d72725878e44d419caabd114b Mon Sep 17 00:00:00 2001 From: Sebastien Helleu Date: Sun, 9 Feb 2014 15:14:07 +0100 Subject: [PATCH] core: improve the string_replace_regex function (add reference char, change syntax for match refs) The reference char is now an argument for the function. The references are now $0 .. $99 and $+ was added (last match, with highest number). The syntax to replace a match with one char is now: $.cN or $.c+ (for example: "$.*3"). --- src/core/wee-string.c | 116 ++++++++++++++++++------- src/core/wee-string.h | 3 +- src/plugins/trigger/trigger-callback.c | 3 +- src/plugins/trigger/trigger-command.c | 8 +- src/plugins/trigger/trigger-config.c | 4 +- src/plugins/weechat-plugin.h | 11 ++- 6 files changed, 102 insertions(+), 43 deletions(-) diff --git a/src/core/wee-string.c b/src/core/wee-string.c index d54e24ec2..2e7cc662c 100644 --- a/src/core/wee-string.c +++ b/src/core/wee-string.c @@ -1153,11 +1153,12 @@ string_has_highlight_regex (const char *string, const char *regex) char * string_replace_regex_get_replace (const char *string, regmatch_t *regex_match, - const char *replace) + int last_match, const char *replace, + const char reference_char) { int length, length_current, length_add, match; const char *ptr_replace, *ptr_add; - char *result, *result2, *modified_replace, *temp; + char *result, *result2, *modified_replace, *temp, char_replace; /* default length is length*2, it will grow later if needed */ length = (strlen (string) * 2); @@ -1174,29 +1175,68 @@ string_replace_regex_get_replace (const char *string, regmatch_t *regex_match, length_add = 0; modified_replace = NULL; - if (ptr_replace[0] == '\\') + if ((ptr_replace[0] == '\\') && (ptr_replace[1] == reference_char)) { - if (ptr_replace[1] == '\\') + /* escaped reference char */ + ptr_add = ptr_replace + 1; + length_add = 1; + ptr_replace += 2; + } + else if (ptr_replace[0] == reference_char) + { + if ((ptr_replace[1] == '+') || isdigit ((unsigned char)ptr_replace[1])) { - ptr_add = ptr_replace; - length_add = 1; - ptr_replace += 2; - } - else if (isdigit ((unsigned char)ptr_replace[1])) - { - match = ptr_replace[1] - '0'; + if (ptr_replace[1] == '+') + { + /* reference to last match */ + match = last_match; + ptr_replace += 2; + } + else + { + /* reference to match 0 .. 99 */ + if (isdigit ((unsigned char)ptr_replace[2])) + { + match = ((ptr_replace[1] - '0') * 10) + (ptr_replace[2] - '0'); + ptr_replace += 3; + } + else + { + match = ptr_replace[1] - '0'; + ptr_replace += 2; + } + } if (regex_match[match].rm_so >= 0) { ptr_add = string + regex_match[match].rm_so; length_add = regex_match[match].rm_eo - regex_match[match].rm_so; } - ptr_replace += 2; } - else if ((ptr_replace[1] >= 32) - && (ptr_replace[1] <= 126) - && isdigit ((unsigned char)ptr_replace[2])) + else if ((ptr_replace[1] == '.') + && (ptr_replace[2] >= 32) && (ptr_replace[2] <= 126) + && ((ptr_replace[3] == '+') || isdigit ((unsigned char)ptr_replace[3]))) { - match = ptr_replace[2] - '0'; + char_replace = ptr_replace[2]; + if (ptr_replace[3] == '+') + { + /* reference to last match */ + match = last_match; + ptr_replace += 4; + } + else + { + /* reference to match 0 .. 99 */ + if (isdigit ((unsigned char)ptr_replace[4])) + { + match = ((ptr_replace[3] - '0') * 10) + (ptr_replace[4] - '0'); + ptr_replace += 5; + } + else + { + match = ptr_replace[3] - '0'; + ptr_replace += 4; + } + } if (regex_match[match].rm_so >= 0) { temp = string_strndup (string + regex_match[match].rm_so, @@ -1207,19 +1247,17 @@ string_replace_regex_get_replace (const char *string, regmatch_t *regex_match, modified_replace = malloc (length_add + 1); if (modified_replace) { - memset (modified_replace, ptr_replace[1], - length_add); + memset (modified_replace, char_replace, length_add); modified_replace[length_add] = '\0'; ptr_add = modified_replace; } free (temp); } } - ptr_replace += 3; } else { - /* just ignore the '\' */ + /* just ignore the reference char */ ptr_replace++; } } @@ -1264,21 +1302,32 @@ string_replace_regex_get_replace (const char *string, regmatch_t *regex_match, * The argument "regex" is a pointer to a regex compiled with function regcomp * (or WeeChat function string_regcomp). * - * The argument "replace" can contain references to matching groups, from \1 - * to \9 for match 1 to 9 (\0 is the whole match). - * Special references \c0 to \c9 can be used to replace all matching chars by - * the char 'c', which can be between space (32) and '~' (126). - * For example \*1 will replace matching chars in group 1 by '*'. + * The argument "replace" can contain references to matches: + * $0 .. $99 match 0 to 99 (0 is whole match, 1 .. 99 are groups captured) + * $+ the last match (with highest number) + * $.*N match N (can be '+' or 0 to 99), with all chars replaced by '*' + * (the char '*' can be replaced by any char between space (32) + * and '~' (126)) + * + * Examples: + * + * string | regex | replace | result + * ----------+---------------+-----------+------------- + * test foo | test | Z | Z foo + * test foo | ^(test +)(.*) | $2 | foo + * test foo | ^(test +)(.*) | $1 / $.*2 | test / *** + * test foo | ^(test +)(.*) | $.%+ | %%% * * Note: result must be freed after use. */ char * -string_replace_regex (const char *string, void *regex, const char *replace) +string_replace_regex (const char *string, void *regex, const char *replace, + const char reference_char) { char *result, *result2, *str_replace; - int length, length_replace, start_offset, i, rc, end; - regmatch_t regex_match[10]; + int length, length_replace, start_offset, i, rc, end, last_match; + regmatch_t regex_match[100]; if (!string) return NULL; @@ -1292,12 +1341,12 @@ string_replace_regex (const char *string, void *regex, const char *replace) start_offset = 0; while (result && result[start_offset]) { - for (i = 0; i < 10; i++) + for (i = 0; i < 100; i++) { regex_match[i].rm_so = -1; } - rc = regexec ((regex_t *)regex, result + start_offset, 10, regex_match, + rc = regexec ((regex_t *)regex, result + start_offset, 100, regex_match, 0); /* * no match found: exit the loop (if rm_eo == 0, it is an empty match @@ -1311,10 +1360,12 @@ string_replace_regex (const char *string, void *regex, const char *replace) } /* adjust the start/end offsets */ - for (i = 0; i < 10; i++) + last_match = 0; + for (i = 0; i < 100; i++) { if (regex_match[i].rm_so >= 0) { + last_match = i; regex_match[i].rm_so += start_offset; regex_match[i].rm_eo += start_offset; } @@ -1324,7 +1375,8 @@ string_replace_regex (const char *string, void *regex, const char *replace) end = !result[regex_match[0].rm_eo]; str_replace = string_replace_regex_get_replace (result, regex_match, - replace); + last_match, + replace, reference_char); length_replace = (str_replace) ? strlen (str_replace) : 0; length = regex_match[0].rm_so + length_replace + diff --git a/src/core/wee-string.h b/src/core/wee-string.h index 15c3cb697..1ae8cb9b2 100644 --- a/src/core/wee-string.h +++ b/src/core/wee-string.h @@ -58,7 +58,8 @@ extern int string_has_highlight_regex_compiled (const char *string, regex_t *regex); extern int string_has_highlight_regex (const char *string, const char *regex); extern char *string_replace_regex (const char *string, void *regex, - const char *replace); + const char *replace, + const char reference_char); extern char **string_split (const char *string, const char *separators, int keep_eol, int num_items_max, int *num_items); extern char **string_split_shared (const char *string, const char *separators, diff --git a/src/plugins/trigger/trigger-callback.c b/src/plugins/trigger/trigger-callback.c index 16d6e6ca6..2feadd795 100644 --- a/src/plugins/trigger/trigger-callback.c +++ b/src/plugins/trigger/trigger-callback.c @@ -114,7 +114,8 @@ trigger_callback_replace_regex (struct t_trigger *trigger, value = weechat_string_replace_regex (ptr_value, trigger->regex[i].regex, - trigger->regex[i].replace_eval); + trigger->regex[i].replace_eval, + '$'); if (!value) continue; diff --git a/src/plugins/trigger/trigger-command.c b/src/plugins/trigger/trigger-command.c index 079644ce1..c6f9be770 100644 --- a/src/plugins/trigger/trigger-command.c +++ b/src/plugins/trigger/trigger-command.c @@ -618,12 +618,12 @@ trigger_command_init () " replace password with '*' in /oper command (in command line and " "command history):\n" " /trigger add oper modifier input_text_display;history_add " - "\"\" \"==^(/oper +\\S+ +)(.*)==\\1\\*2\"\n" + "\"\" \"==^(/oper +\\S+ +)(.*)==$1$.*2\"\n" " add text attributes in *bold*, _underline_ and /italic/:\n" " /trigger add effects modifier weechat_print \"\" " - "\"==\\*(\\S+)\\*==*${color:bold}\\1${color:-bold}*== " - "==_(\\S+)_==_${color:underline}\\1${color:-underline}_== " - "==/(\\S+)/==/${color:italic}\\1${color:-italic}/\"\n" + "\"==\\*(\\S+)\\*==*${color:bold}$1${color:-bold}*== " + "==_(\\S+)_==_${color:underline}$1${color:-underline}_== " + "==/(\\S+)/==/${color:italic}$1${color:-italic}/\"\n" " silently save config each hour:\n" " /trigger add cfgsave timer 3600000;0;0 \"\" \"\" \"/mute /save\""), "list|listfull" diff --git a/src/plugins/trigger/trigger-config.c b/src/plugins/trigger/trigger-config.c index 1dc27c0c2..dc48cfdbe 100644 --- a/src/plugins/trigger/trigger-config.c +++ b/src/plugins/trigger/trigger-config.c @@ -221,7 +221,9 @@ trigger_config_create_option (const char *trigger_name, int index_option, "many regex can be separated by a space, for example: " "\"/regex1/replace1/var1 /regex2/replace2/var2\"; the " "separator \"/\" can be replaced by any char (one or more " - "identical chars), except '\\' and parentheses"), + "identical chars), except '\\' and parentheses; matching " + "groups can be used in replace: $0 to $99, $+ for last " + "match and $.cN to replace all chars of group N by char c"), NULL, 0, 0, value, NULL, 0, NULL, NULL, &trigger_config_change_regex, NULL, NULL, NULL); break; diff --git a/src/plugins/weechat-plugin.h b/src/plugins/weechat-plugin.h index 2cb562cdb..36e1399e6 100644 --- a/src/plugins/weechat-plugin.h +++ b/src/plugins/weechat-plugin.h @@ -57,7 +57,7 @@ struct timeval; * please change the date with current one; for a second change at same * date, increment the 01, otherwise please keep 01. */ -#define WEECHAT_PLUGIN_API_VERSION "20140131-01" +#define WEECHAT_PLUGIN_API_VERSION "20140208-01" /* macros for defining plugin infos */ #define WEECHAT_PLUGIN_NAME(__name) \ @@ -248,7 +248,8 @@ struct t_weechat_plugin const char *highlight_words); int (*string_has_highlight_regex) (const char *string, const char *regex); char *(*string_replace_regex) (const char *string, void *regex, - const char *replace); + const char *replace, + const char reference_char); char **(*string_split) (const char *string, const char *separators, int keep_eol, int num_items_max, int *num_items); char **(*string_split_shell) (const char *string, int *num_items); @@ -1012,8 +1013,10 @@ extern int weechat_plugin_end (struct t_weechat_plugin *plugin); weechat_plugin->string_has_highlight(__string, __highlight_words) #define weechat_string_has_highlight_regex(__string, __regex) \ weechat_plugin->string_has_highlight_regex(__string, __regex) -#define weechat_string_replace_regex(__string, __regex, __replace) \ - weechat_plugin->string_replace_regex(__string, __regex, __replace) +#define weechat_string_replace_regex(__string, __regex, __replace, \ + __reference_char) \ + weechat_plugin->string_replace_regex(__string, __regex, __replace, \ + __reference_char) #define weechat_string_split(__string, __separator, __eol, __max, \ __num_items) \ weechat_plugin->string_split(__string, __separator, __eol, \