2 * @file SubstitutionList.cpp
4 * @brief Implementation file for SubstitutionList.
8 #include "SubstitutionList.h"
10 #include <Poco/RegularExpression.h>
13 static std::string replaceEscapeSequences(const std::string& input)
17 for (size_t i = 0; i < input.size(); ++i)
21 if (i + 1 < input.size())
47 if (i + 3 < input.size())
49 std::string hexValue = input.substr(i + 2, 2);
51 unsigned int intValue = std::stoul(hexValue, nullptr, 16);
52 result += static_cast<char>(intValue);
55 catch (const std::invalid_argument&) {
65 if (isdigit(input[i + 1]))
67 result += input[i + 1];
82 SubstitutionItem::SubstitutionItem(const std::string& pattern,
83 const std::string& replacement, int regexpCompileOptions)
85 , replacement(replaceEscapeSequences(replacement))
86 , regexpCompileOptions(regexpCompileOptions)
87 , regexp(pattern, regexpCompileOptions)
91 SubstitutionItem::SubstitutionItem(const SubstitutionItem& other)
92 : pattern(other.pattern)
93 , replacement(other.replacement)
94 , regexpCompileOptions(other.regexpCompileOptions)
95 , regexp(other.pattern, other.regexpCompileOptions)
99 void SubstitutionList::Add(const std::string& pattern, const std::string& replacement, int regexpCompileOptions)
101 m_list.emplace_back(pattern, replacement, regexpCompileOptions);
104 void SubstitutionList::Add(
105 const std::string& pattern, const std::string& replacement,
106 bool caseSensitive, bool matchWholeWordOnly)
108 int regexpCompileOptions =
109 caseSensitive ? 0 : Poco::RegularExpression::RE_CASELESS;
110 std::string rePattern;
111 for (auto c: pattern)
115 case '\\': case '.': case '^': case '$': case '|':
116 case '[': case ']': case '(': case ')': case '!':
117 case '?': case '*': case '+': case '{': case '}':
118 rePattern.push_back('\\');
123 rePattern.push_back(c);
125 if (matchWholeWordOnly)
126 rePattern = "\\b" + rePattern + "\\b";
127 m_list.emplace_back(rePattern, replacement, regexpCompileOptions);
130 std::string SubstitutionList::Subst(const std::string& subject, int codepage/*=CP_UTF8*/) const
132 std::string replaced;
134 if (codepage != ucr::CP_UTF_8)
136 // convert string into UTF-8
137 ucr::buffer buf(subject.length() * 2);
139 ucr::convert(ucr::NONE, codepage, reinterpret_cast<const unsigned char*>(subject.c_str()),
140 subject.length(), ucr::UTF8, ucr::CP_UTF_8, &buf);
142 replaced.assign(reinterpret_cast<const char *>(buf.ptr), buf.size);
149 for (const auto& item : m_list)
153 item.regexp.subst(replaced, item.replacement, Poco::RegularExpression::RE_GLOBAL);
164 void SubstitutionList::RemoveAllFilters()