1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "golang.org/x/text/internal/colltab"
11 // TODO: handle variable primary weights?
13 func (p *Pattern) deleteEmptyElements() {
15 for _, e := range p.ce {
16 if !isIgnorable(p.m, e) {
24 func isIgnorable(m *Matcher, e colltab.Elem) bool {
28 if e.Secondary() > 0 {
29 if !m.ignoreDiacritics {
32 // Primary value is 0 and ignoreDiacritics is true. In this case we
33 // ignore the tertiary element, as it only pertains to the modifier.
36 // TODO: further distinguish once we have the new implementation.
37 if !(m.ignoreWidth || m.ignoreCase) && e.Tertiary() > 0 {
40 // TODO: we ignore the Quaternary level for now.
44 // TODO: Use a Boyer-Moore-like algorithm (probably Sunday) for searching.
46 func (p *Pattern) forwardSearch(it *colltab.Iter) (start, end int) {
47 for start := 0; it.Next(); it.Reset(start) {
49 if end := p.searchOnce(it); end != -1 {
57 func (p *Pattern) anchoredForwardSearch(it *colltab.Iter) (start, end int) {
59 if end := p.searchOnce(it); end != -1 {
66 // next advances to the next weight in a pattern. f must return one of the
67 // weights of a collation element. next will advance to the first non-zero
68 // weight and return this weight and true if it exists, or 0, false otherwise.
69 func (p *Pattern) next(i *int, f func(colltab.Elem) int) (weight int, ok bool) {
74 // Skip successive ignorable values.
75 for ; *i < len(p.ce) && f(p.ce[*i]) == 0; *i++ {
83 // TODO: remove this function once Elem is internal and Tertiary returns int.
84 func tertiary(e colltab.Elem) int {
85 return int(e.Tertiary())
88 // searchOnce tries to match the pattern s.p at the text position i. s.buf needs
89 // to be filled with collation elements of the first segment, where n is the
90 // number of source bytes consumed for this segment. It will return the end
91 // position of the match or -1.
92 func (p *Pattern) searchOnce(it *colltab.Iter) (end int) {
99 if v := it.Elems[k].Primary(); v > 0 {
100 if w, ok := p.next(&pLevel[0], colltab.Elem.Primary); !ok || v != w {
105 if !m.ignoreDiacritics {
106 if v := it.Elems[k].Secondary(); v > 0 {
107 if w, ok := p.next(&pLevel[1], colltab.Elem.Secondary); !ok || v != w {
111 } else if it.Elems[k].Primary() == 0 {
112 // We ignore tertiary values of collation elements of the
117 // TODO: distinguish between case and width. This will be easier to
118 // implement after we moved to the new collation implementation.
119 if !m.ignoreWidth && !m.ignoreCase {
120 if v := it.Elems[k].Tertiary(); v > 0 {
121 if w, ok := p.next(&pLevel[2], tertiary); !ok || int(v) != w {
126 // TODO: check quaternary weight
128 it.Discard() // Remove the current segment from the buffer.
130 // Check for completion.
132 // If any of these cases match, we are not at the end.
133 case pLevel[0] < len(p.ce):
134 case !m.ignoreDiacritics && pLevel[1] < len(p.ce):
135 case !(m.ignoreWidth || m.ignoreCase) && pLevel[2] < len(p.ce):
137 // At this point, both the segment and pattern has matched fully.
138 // However, the segment may still be have trailing modifiers.
139 // This can be verified by another call to next.
141 if it.Next() && it.Elems[0].Primary() == 0 {
142 if !m.ignoreDiacritics {
150 // Fill the buffer with the next batch of collation elements.