| /** |
| * @file op_regex.cpp |
| * This file contains implementation for a lightweight wrapper around |
| * libc regex, providing regular expression match and replace facility. |
| * |
| * @remark Copyright 2003 OProfile authors |
| * @remark Read the file COPYING |
| * @remark Idea comes from TextFilt project <http://textfilt.sourceforge.net> |
| * |
| * @author Philippe Elie |
| */ |
| |
| #include <cerrno> |
| |
| #include <iostream> |
| #include <fstream> |
| |
| #include "string_manip.h" |
| |
| #include "op_regex.h" |
| |
| using namespace std; |
| |
| namespace { |
| |
| string op_regerror(int err, regex_t const & regexp) |
| { |
| size_t needed_size = regerror(err, ®exp, 0, 0); |
| char * buffer = new char[needed_size]; |
| regerror(err, ®exp, buffer, needed_size); |
| |
| return buffer; |
| } |
| |
| |
| void op_regcomp(regex_t & regexp, string const & pattern) |
| { |
| int err = regcomp(®exp, pattern.c_str(), REG_EXTENDED); |
| if (err) { |
| throw bad_regex("regcomp error: " + op_regerror(err, regexp) |
| + " for pattern : " + pattern); |
| } |
| } |
| |
| |
| bool op_regexec(regex_t const & regex, string const & str, regmatch_t * match, |
| size_t nmatch) |
| { |
| return regexec(®ex, str.c_str(), nmatch, match, 0) != REG_NOMATCH; |
| } |
| |
| |
| void op_regfree(regex_t & regexp) |
| { |
| regfree(®exp); |
| } |
| |
| |
| // return the index number associated with a char seen in a "\x". |
| // Allowed range are for x is [0-9a-z] return size_t(-1) if x is not in |
| // these ranges. |
| size_t subexpr_index(char ch) |
| { |
| if (isdigit(ch)) |
| return ch - '0'; |
| if (ch >= 'a' && ch <= 'z') |
| return ch - 'a' + 10; |
| return size_t(-1); |
| } |
| |
| } // anonymous namespace |
| |
| |
| bad_regex::bad_regex(string const & pattern) |
| : op_exception(pattern) |
| { |
| } |
| |
| |
| regular_expression_replace::regular_expression_replace(size_t limit_, |
| size_t limit_defs) |
| : |
| limit(limit_), |
| limit_defs_expansion(limit_defs) |
| { |
| } |
| |
| |
| regular_expression_replace::~regular_expression_replace() |
| { |
| for (size_t i = 0 ; i < regex_replace.size() ; ++i) |
| op_regfree(regex_replace[i].regexp); |
| } |
| |
| |
| void regular_expression_replace::add_definition(string const & name, |
| string const & definition) |
| { |
| defs[name] = expand_string(definition); |
| } |
| |
| |
| void regular_expression_replace::add_pattern(string const & pattern, |
| string const & replace) |
| { |
| string expanded_pattern = expand_string(pattern); |
| |
| regex_t regexp; |
| op_regcomp(regexp, expanded_pattern); |
| replace_t regex = { regexp, replace }; |
| regex_replace.push_back(regex); |
| } |
| |
| |
| string regular_expression_replace::expand_string(string const & input) |
| { |
| string last, expanded(input); |
| size_t i = 0; |
| for (i = 0 ; i < limit_defs_expansion ; ++i) { |
| last = expanded; |
| expanded = substitute_definition(last); |
| if (expanded == last) |
| break; |
| } |
| |
| if (i == limit_defs_expansion) |
| throw bad_regex("too many substitution for: + input"); |
| |
| return last; |
| } |
| |
| |
| string regular_expression_replace::substitute_definition(string const & pattern) |
| { |
| string result; |
| bool previous_is_escape = false; |
| |
| for (size_t i = 0 ; i < pattern.length() ; ++i) { |
| if (pattern[i] == '$' && !previous_is_escape) { |
| size_t pos = pattern.find('{', i); |
| if (pos != i + 1) { |
| throw bad_regex("invalid $ in pattern: " + pattern); |
| } |
| size_t end = pattern.find('}', i); |
| if (end == string::npos) { |
| throw bad_regex("no matching '}' in pattern: " + pattern); |
| } |
| string def_name = pattern.substr(pos+1, (end-pos) - 1); |
| if (defs.find(def_name) == defs.end()) { |
| throw bad_regex("definition not found and used in pattern: (" |
| + def_name + ") " + pattern); |
| } |
| result += defs[def_name]; |
| i = end; |
| } else { |
| if (pattern[i] == '\\' && !previous_is_escape) |
| previous_is_escape = true; |
| else |
| previous_is_escape = false; |
| result += pattern[i]; |
| } |
| } |
| |
| return result; |
| } |
| |
| |
| // FIXME limit output string size ? (cause we can have exponential growing |
| // of output string through a rule "a" = "aa") |
| bool regular_expression_replace::execute(string & str) const |
| { |
| bool changed = true; |
| for (size_t nr_iter = 0; changed && nr_iter < limit ; ++nr_iter) { |
| changed = false; |
| for (size_t i = 0 ; i < regex_replace.size() ; ++i) { |
| if (do_execute(str, regex_replace[i])) |
| changed = true; |
| } |
| } |
| |
| // this don't return if the input string has been changed but if |
| // we reach the limit number of iteration. |
| return changed == false; |
| } |
| |
| |
| bool regular_expression_replace::do_execute(string & str, |
| replace_t const & regexp) const |
| { |
| bool changed = false; |
| |
| regmatch_t match[max_match]; |
| for (size_t iter = 0; |
| op_regexec(regexp.regexp, str, match, max_match) && iter < limit; |
| iter++) { |
| changed = true; |
| do_replace(str, regexp.replace, match); |
| } |
| |
| return changed; |
| } |
| |
| |
| regmatch_t const & |
| regular_expression_replace::get_match(regmatch_t const * match, char idx) const |
| { |
| size_t sub_expr = subexpr_index(idx); |
| if (sub_expr == size_t(-1)) |
| throw bad_regex("expect group index: " + idx); |
| if (sub_expr >= max_match) |
| throw bad_regex("illegal group index :" + idx); |
| return match[sub_expr]; |
| } |
| |
| void regular_expression_replace::do_replace |
| (string & str, string const & replace, regmatch_t const * match) const |
| { |
| string inserted; |
| for (size_t i = 0 ; i < replace.length() ; ++i) { |
| if (replace[i] == '\\') { |
| if (i == replace.length() - 1) { |
| throw bad_regex("illegal \\ trailer: " + |
| replace); |
| } |
| ++i; |
| if (replace[i] == '\\') { |
| inserted += '\\'; |
| } else { |
| regmatch_t const & matched = get_match(match, |
| replace[i]); |
| if (matched.rm_so == -1 && |
| matched.rm_eo == -1) { |
| // empty match: nothing todo |
| } else if (matched.rm_so == -1 || |
| matched.rm_eo == -1) { |
| throw bad_regex("illegal match: " + |
| replace); |
| } else { |
| inserted += str.substr(matched.rm_so, |
| matched.rm_eo - matched.rm_so); |
| } |
| } |
| } else { |
| inserted += replace[i]; |
| } |
| } |
| |
| size_t first = match[0].rm_so; |
| size_t count = match[0].rm_eo - match[0].rm_so; |
| |
| str.replace(first, count, inserted); |
| } |
| |
| |
| void setup_regex(regular_expression_replace & regex, |
| string const & filename) |
| { |
| ifstream in(filename.c_str()); |
| if (!in) { |
| throw op_runtime_error("Can't open file " + filename + |
| " for reading", errno); |
| } |
| |
| regular_expression_replace var_name_rule; |
| var_name_rule.add_pattern("^\\$([_a-zA-Z][_a-zA-Z0-9]*)[ ]*=.*", "\\1"); |
| regular_expression_replace var_value_rule; |
| var_value_rule.add_pattern(".*=[ ]*\"(.*)\"", "\\1"); |
| |
| regular_expression_replace left_rule; |
| left_rule.add_pattern("[ ]*\"(.*)\"[ ]*=.*", "\\1"); |
| regular_expression_replace right_rule; |
| right_rule.add_pattern(".*=[ ]*\"(.*)\"", "\\1"); |
| |
| string line; |
| while (getline(in, line)) { |
| line = trim(line); |
| if (line.empty() || line[0] == '#') |
| continue; |
| |
| string temp = line; |
| var_name_rule.execute(temp); |
| if (temp == line) { |
| string left = line; |
| left_rule.execute(left); |
| if (left == line) { |
| throw bad_regex("invalid input file: \"" + line + '"'); |
| } |
| |
| string right = line; |
| right_rule.execute(right); |
| if (right == line) { |
| throw bad_regex("invalid input file: \"" + line + '"'); |
| } |
| |
| regex.add_pattern(left, right); |
| } else { |
| // temp != line ==> var_name_rule succeed to substitute |
| // into temp the var_name present in line |
| string var_name = temp; |
| string var_value = line; |
| var_value_rule.execute(var_value); |
| if (var_value == line) { |
| throw bad_regex("invalid input file: \"" + line + '"'); |
| } |
| |
| regex.add_definition(var_name, var_value); |
| } |
| } |
| } |