2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #include <runtime/UString.h>
31 #include <wtf/Vector.h>
32 #include <wtf/unicode/Unicode.h>
34 namespace JSC { namespace Yarr {
36 struct PatternDisjunction;
38 struct CharacterRange {
42 CharacterRange(UChar begin, UChar end)
49 struct CharacterClassTable : RefCounted<CharacterClassTable> {
52 static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted)
54 return adoptRef(new CharacterClassTable(table, inverted));
58 CharacterClassTable(const char* table, bool inverted)
60 , m_inverted(inverted)
65 struct CharacterClass {
66 WTF_MAKE_FAST_ALLOCATED;
68 // All CharacterClass instances have to have the full set of matches and ranges,
69 // they may have an optional table for faster lookups (which must match the
70 // specified matches and ranges)
71 CharacterClass(PassRefPtr<CharacterClassTable> table)
75 Vector<UChar> m_matches;
76 Vector<CharacterRange> m_ranges;
77 Vector<UChar> m_matchesUnicode;
78 Vector<CharacterRange> m_rangesUnicode;
79 RefPtr<CharacterClassTable> m_table;
92 TypeAssertionWordBoundary,
97 TypeParenthesesSubpattern,
98 TypeParentheticalAssertion,
103 UChar patternCharacter;
104 CharacterClass* characterClass;
105 unsigned backReferenceSubpatternId;
107 PatternDisjunction* disjunction;
108 unsigned subpatternId;
109 unsigned lastSubpatternId;
114 QuantifierType quantityType;
115 unsigned quantityCount;
117 unsigned frameLocation;
119 PatternTerm(UChar ch)
120 : type(PatternTerm::TypePatternCharacter)
124 patternCharacter = ch;
125 quantityType = QuantifierFixedCount;
129 PatternTerm(CharacterClass* charClass, bool invert)
130 : type(PatternTerm::TypeCharacterClass)
134 characterClass = charClass;
135 quantityType = QuantifierFixedCount;
139 PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
144 parentheses.disjunction = disjunction;
145 parentheses.subpatternId = subpatternId;
146 parentheses.isCopy = false;
147 parentheses.isTerminal = false;
148 quantityType = QuantifierFixedCount;
152 PatternTerm(Type type, bool invert = false)
157 quantityType = QuantifierFixedCount;
161 PatternTerm(unsigned spatternId)
162 : type(TypeBackReference)
166 backReferenceSubpatternId = spatternId;
167 quantityType = QuantifierFixedCount;
171 static PatternTerm ForwardReference()
173 return PatternTerm(TypeForwardReference);
176 static PatternTerm BOL()
178 return PatternTerm(TypeAssertionBOL);
181 static PatternTerm EOL()
183 return PatternTerm(TypeAssertionEOL);
186 static PatternTerm WordBoundary(bool invert)
188 return PatternTerm(TypeAssertionWordBoundary, invert);
201 void quantify(unsigned count, QuantifierType type)
203 quantityCount = count;
208 struct PatternAlternative {
209 WTF_MAKE_FAST_ALLOCATED;
211 PatternAlternative(PatternDisjunction* disjunction)
212 : m_parent(disjunction)
213 , m_onceThrough(false)
214 , m_hasFixedSize(false)
215 , m_startsWithBOL(false)
216 , m_containsBOL(false)
220 PatternTerm& lastTerm()
222 ASSERT(m_terms.size());
223 return m_terms[m_terms.size() - 1];
226 void removeLastTerm()
228 ASSERT(m_terms.size());
229 m_terms.shrink(m_terms.size() - 1);
232 void setOnceThrough()
234 m_onceThrough = true;
239 return m_onceThrough;
242 Vector<PatternTerm> m_terms;
243 PatternDisjunction* m_parent;
244 unsigned m_minimumSize;
245 bool m_onceThrough : 1;
246 bool m_hasFixedSize : 1;
247 bool m_startsWithBOL : 1;
248 bool m_containsBOL : 1;
251 struct PatternDisjunction {
252 WTF_MAKE_FAST_ALLOCATED;
254 PatternDisjunction(PatternAlternative* parent = 0)
256 , m_hasFixedSize(false)
260 ~PatternDisjunction()
262 deleteAllValues(m_alternatives);
265 PatternAlternative* addNewAlternative()
267 PatternAlternative* alternative = new PatternAlternative(this);
268 m_alternatives.append(alternative);
272 Vector<PatternAlternative*> m_alternatives;
273 PatternAlternative* m_parent;
274 unsigned m_minimumSize;
275 unsigned m_callFrameSize;
279 // You probably don't want to be calling these functions directly
280 // (please to be calling newlineCharacterClass() et al on your
281 // friendly neighborhood YarrPattern instance to get nicely
283 CharacterClass* newlineCreate();
284 CharacterClass* digitsCreate();
285 CharacterClass* spacesCreate();
286 CharacterClass* wordcharCreate();
287 CharacterClass* nondigitsCreate();
288 CharacterClass* nonspacesCreate();
289 CharacterClass* nonwordcharCreate();
292 TermChain(PatternTerm term)
297 Vector<TermChain> hotTerms;
306 BeginChar(unsigned value, unsigned mask)
316 YarrPattern(const UString& pattern, bool ignoreCase, bool multiline, const char** error);
320 deleteAllValues(m_disjunctions);
321 deleteAllValues(m_userCharacterClasses);
326 m_numSubpatterns = 0;
327 m_maxBackReference = 0;
329 m_containsBackreferences = false;
330 m_containsBeginChars = false;
331 m_containsBOL = false;
339 nonwordcharCached = 0;
341 deleteAllValues(m_disjunctions);
342 m_disjunctions.clear();
343 deleteAllValues(m_userCharacterClasses);
344 m_userCharacterClasses.clear();
345 m_beginChars.clear();
348 bool containsIllegalBackReference()
350 return m_maxBackReference > m_numSubpatterns;
353 CharacterClass* newlineCharacterClass()
356 m_userCharacterClasses.append(newlineCached = newlineCreate());
357 return newlineCached;
359 CharacterClass* digitsCharacterClass()
362 m_userCharacterClasses.append(digitsCached = digitsCreate());
365 CharacterClass* spacesCharacterClass()
368 m_userCharacterClasses.append(spacesCached = spacesCreate());
371 CharacterClass* wordcharCharacterClass()
374 m_userCharacterClasses.append(wordcharCached = wordcharCreate());
375 return wordcharCached;
377 CharacterClass* nondigitsCharacterClass()
379 if (!nondigitsCached)
380 m_userCharacterClasses.append(nondigitsCached = nondigitsCreate());
381 return nondigitsCached;
383 CharacterClass* nonspacesCharacterClass()
385 if (!nonspacesCached)
386 m_userCharacterClasses.append(nonspacesCached = nonspacesCreate());
387 return nonspacesCached;
389 CharacterClass* nonwordcharCharacterClass()
391 if (!nonwordcharCached)
392 m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate());
393 return nonwordcharCached;
396 bool m_ignoreCase : 1;
397 bool m_multiline : 1;
398 bool m_containsBackreferences : 1;
399 bool m_containsBeginChars : 1;
400 bool m_containsBOL : 1;
401 unsigned m_numSubpatterns;
402 unsigned m_maxBackReference;
403 PatternDisjunction* m_body;
404 Vector<PatternDisjunction*, 4> m_disjunctions;
405 Vector<CharacterClass*> m_userCharacterClasses;
406 Vector<BeginChar> m_beginChars;
409 const char* compile(const UString& patternString);
411 CharacterClass* newlineCached;
412 CharacterClass* digitsCached;
413 CharacterClass* spacesCached;
414 CharacterClass* wordcharCached;
415 CharacterClass* nondigitsCached;
416 CharacterClass* nonspacesCached;
417 CharacterClass* nonwordcharCached;
420 } } // namespace JSC::Yarr
422 #endif // YarrPattern_h