1 #include "pseudolocalize.h"
5 // String basis to generate expansion
6 static const String16 k_expansion_string = String16("one two three "
7 "four five six seven eight nine ten eleven twelve thirteen "
8 "fourteen fiveteen sixteen seventeen nineteen twenty");
10 // Special unicode characters to override directionality of the words
11 static const String16 k_rlm = String16("\xe2\x80\x8f");
12 static const String16 k_rlo = String16("\xE2\x80\xae");
13 static const String16 k_pdf = String16("\xE2\x80\xac");
16 static const String16 k_placeholder_open = String16("\xc2\xbb");
17 static const String16 k_placeholder_close = String16("\xc2\xab");
19 static const char16_t k_arg_start = '{';
20 static const char16_t k_arg_end = '}';
22 Pseudolocalizer::Pseudolocalizer(PseudolocalizationMethod m)
23 : mImpl(nullptr), mLastDepth(0) {
27 void Pseudolocalizer::setMethod(PseudolocalizationMethod m) {
31 if (m == PSEUDO_ACCENTED) {
32 mImpl = new PseudoMethodAccent();
33 } else if (m == PSEUDO_BIDI) {
34 mImpl = new PseudoMethodBidi();
36 mImpl = new PseudoMethodNone();
40 String16 Pseudolocalizer::text(const String16& text) {
42 size_t depth = mLastDepth;
44 const size_t length= text.size();
45 const char16_t* str = text.string();
47 for (lastpos = pos = 0; pos < length; pos++) {
48 char16_t c = str[pos];
58 if (c == k_arg_start) {
60 } else if (c == k_arg_end && depth) {
64 if (mLastDepth != depth || pos == length - 1) {
65 bool pseudo = ((mLastDepth % 2) == 0);
67 if (!pseudo || depth == mLastDepth) {
70 size_t size = nextpos - lastpos;
72 String16 chunk = String16(text, size, lastpos);
74 chunk = mImpl->text(chunk);
75 } else if (str[lastpos] == k_arg_start &&
76 str[nextpos - 1] == k_arg_end) {
77 chunk = mImpl->placeholder(chunk);
81 if (pseudo && depth < mLastDepth) { // End of message
82 out.append(mImpl->end());
83 } else if (!pseudo && depth > mLastDepth) { // Start of message
84 out.append(mImpl->start());
94 pseudolocalize_char(const char16_t c)
97 case 'a': return "\xc3\xa5";
98 case 'b': return "\xc9\x93";
99 case 'c': return "\xc3\xa7";
100 case 'd': return "\xc3\xb0";
101 case 'e': return "\xc3\xa9";
102 case 'f': return "\xc6\x92";
103 case 'g': return "\xc4\x9d";
104 case 'h': return "\xc4\xa5";
105 case 'i': return "\xc3\xae";
106 case 'j': return "\xc4\xb5";
107 case 'k': return "\xc4\xb7";
108 case 'l': return "\xc4\xbc";
109 case 'm': return "\xe1\xb8\xbf";
110 case 'n': return "\xc3\xb1";
111 case 'o': return "\xc3\xb6";
112 case 'p': return "\xc3\xbe";
113 case 'q': return "\x51";
114 case 'r': return "\xc5\x95";
115 case 's': return "\xc5\xa1";
116 case 't': return "\xc5\xa3";
117 case 'u': return "\xc3\xbb";
118 case 'v': return "\x56";
119 case 'w': return "\xc5\xb5";
120 case 'x': return "\xd1\x85";
121 case 'y': return "\xc3\xbd";
122 case 'z': return "\xc5\xbe";
123 case 'A': return "\xc3\x85";
124 case 'B': return "\xce\xb2";
125 case 'C': return "\xc3\x87";
126 case 'D': return "\xc3\x90";
127 case 'E': return "\xc3\x89";
128 case 'G': return "\xc4\x9c";
129 case 'H': return "\xc4\xa4";
130 case 'I': return "\xc3\x8e";
131 case 'J': return "\xc4\xb4";
132 case 'K': return "\xc4\xb6";
133 case 'L': return "\xc4\xbb";
134 case 'M': return "\xe1\xb8\xbe";
135 case 'N': return "\xc3\x91";
136 case 'O': return "\xc3\x96";
137 case 'P': return "\xc3\x9e";
138 case 'Q': return "\x71";
139 case 'R': return "\xc5\x94";
140 case 'S': return "\xc5\xa0";
141 case 'T': return "\xc5\xa2";
142 case 'U': return "\xc3\x9b";
143 case 'V': return "\xce\xbd";
144 case 'W': return "\xc5\xb4";
145 case 'X': return "\xc3\x97";
146 case 'Y': return "\xc3\x9d";
147 case 'Z': return "\xc5\xbd";
148 case '!': return "\xc2\xa1";
149 case '?': return "\xc2\xbf";
150 case '$': return "\xe2\x82\xac";
151 default: return NULL;
155 static bool is_possible_normal_placeholder_end(const char16_t c) {
157 case 's': return true;
158 case 'S': return true;
159 case 'c': return true;
160 case 'C': return true;
161 case 'd': return true;
162 case 'o': return true;
163 case 'x': return true;
164 case 'X': return true;
165 case 'f': return true;
166 case 'e': return true;
167 case 'E': return true;
168 case 'g': return true;
169 case 'G': return true;
170 case 'a': return true;
171 case 'A': return true;
172 case 'b': return true;
173 case 'B': return true;
174 case 'h': return true;
175 case 'H': return true;
176 case '%': return true;
177 case 'n': return true;
178 default: return false;
182 static String16 pseudo_generate_expansion(const unsigned int length) {
183 String16 result = k_expansion_string;
184 const char16_t* s = result.string();
185 if (result.size() < length) {
186 result += String16(" ");
187 result += pseudo_generate_expansion(length - result.size());
190 // Should contain only whole words, so looking for a space
191 for (unsigned int i = length + 1; i < result.size(); ++i) {
197 result.remove(length + ext, 0);
202 static bool is_space(const char16_t c) {
203 return (c == ' ' || c == '\t' || c == '\n');
206 String16 PseudoMethodAccent::start() {
209 result = String16(String8("["));
211 mWordCount = mLength = 0;
216 String16 PseudoMethodAccent::end() {
219 result.append(String16(String8(" ")));
220 result.append(pseudo_generate_expansion(
221 mWordCount > 3 ? mLength : mLength / 2));
223 mWordCount = mLength = 0;
226 result.append(String16(String8("]")));
232 * Converts characters so they look like they've been localized.
234 * Note: This leaves escape sequences untouched so they can later be
235 * processed by ResTable::collectString in the normal way.
237 String16 PseudoMethodAccent::text(const String16& source)
239 const char16_t* s = source.string();
241 const size_t I = source.size();
242 bool lastspace = true;
243 for (size_t i=0; i<I; i++) {
246 // Escape syntax, no need to pseudolocalize
248 result += String16("\\");
253 // this one takes up 5 chars
254 result += String16(s+i, 5);
266 result.append(&c, 1);
270 result.append(&c, 1);
272 } else if (c == '%') {
273 // Placeholder syntax, no need to pseudolocalize
277 while (!end && i < I) {
281 if (is_possible_normal_placeholder_end(c)) {
283 } else if (c == 't') {
290 // Treat chunk as a placeholder unless it ends with %.
291 result += ((c == '%') ? chunk : placeholder(chunk));
292 } else if (c == '<' || c == '&') {
293 // html syntax, no need to pseudolocalize
294 bool tag_closed = false;
295 while (!tag_closed && i < I) {
297 String16 escape_text;
298 escape_text.append(&c, 1);
300 size_t htmlCodePos = i;
301 while (!end && htmlCodePos < I) {
304 escape_text.append(&c, 1);
311 else if (!((c == '#' ||
312 (c >= 'a' && c <= 'z') ||
313 (c >= 'A' && c <= 'Z') ||
314 (c >= '0' && c <= '9')))) {
318 result += escape_text;
319 if (escape_text != String16("<")) {
326 result.append(&c, 1);
329 result.append(&c, 1);
334 // This is a pure text that should be pseudolocalized
335 const char* p = pseudolocalize_char(c);
337 result += String16(p);
339 bool space = is_space(c);
340 if (lastspace && !space) {
344 result.append(&c, 1);
346 // Count only pseudolocalizable chars and delimiters
352 String16 PseudoMethodAccent::placeholder(const String16& source) {
353 // Surround a placeholder with brackets
354 return k_placeholder_open + source + k_placeholder_close;
357 String16 PseudoMethodBidi::text(const String16& source)
359 const char16_t* s = source.string();
361 bool lastspace = true;
363 for (size_t i=0; i<source.size(); i++) {
366 if (lastspace && !space) {
368 result += k_rlm + k_rlo;
369 } else if (!lastspace && space) {
371 result += k_pdf + k_rlm;
374 result.append(&c, 1);
378 result += k_pdf + k_rlm;
383 String16 PseudoMethodBidi::placeholder(const String16& source) {
384 // Surround a placeholder with directionality change sequence
385 return k_rlm + k_rlo + source + k_pdf + k_rlm;