vendor/golang.org/x/text/search/pattern.go

   1 // Copyright 2015 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package search
   6
   7 import (
   8         "golang.org/x/text/internal/colltab"
   9 )
  10
  11 // TODO: handle variable primary weights?
  12
  13 func (p *Pattern) deleteEmptyElements() {
  14         k := 0
  15         for _, e := range p.ce {
  16                 if !isIgnorable(p.m, e) {
  17                         p.ce[k] = e
  18                         k++
  19                 }
  20         }
  21         p.ce = p.ce[:k]
  22 }
  23
  24 func isIgnorable(m *Matcher, e colltab.Elem) bool {
  25         if e.Primary() > 0 {
  26                 return false
  27         }
  28         if e.Secondary() > 0 {
  29                 if !m.ignoreDiacritics {
  30                         return false
  31                 }
  32                 // Primary value is 0 and ignoreDiacritics is true. In this case we
  33                 // ignore the tertiary element, as it only pertains to the modifier.
  34                 return true
  35         }
  36         // TODO: further distinguish once we have the new implementation.
  37         if !(m.ignoreWidth || m.ignoreCase) && e.Tertiary() > 0 {
  38                 return false
  39         }
  40         // TODO: we ignore the Quaternary level for now.
  41         return true
  42 }
  43
  44 // TODO: Use a Boyer-Moore-like algorithm (probably Sunday) for searching.
  45
  46 func (p *Pattern) forwardSearch(it *colltab.Iter) (start, end int) {
  47         for start := 0; it.Next(); it.Reset(start) {
  48                 nextStart := it.End()
  49                 if end := p.searchOnce(it); end != -1 {
  50                         return start, end
  51                 }
  52                 start = nextStart
  53         }
  54         return -1, -1
  55 }
  56
  57 func (p *Pattern) anchoredForwardSearch(it *colltab.Iter) (start, end int) {
  58         if it.Next() {
  59                 if end := p.searchOnce(it); end != -1 {
  60                         return 0, end
  61                 }
  62         }
  63         return -1, -1
  64 }
  65
  66 // next advances to the next weight in a pattern. f must return one of the
  67 // weights of a collation element. next will advance to the first non-zero
  68 // weight and return this weight and true if it exists, or 0, false otherwise.
  69 func (p *Pattern) next(i *int, f func(colltab.Elem) int) (weight int, ok bool) {
  70         for *i < len(p.ce) {
  71                 v := f(p.ce[*i])
  72                 *i++
  73                 if v != 0 {
  74                         // Skip successive ignorable values.
  75                         for ; *i < len(p.ce) && f(p.ce[*i]) == 0; *i++ {
  76                         }
  77                         return v, true
  78                 }
  79         }
  80         return 0, false
  81 }
  82
  83 // TODO: remove this function once Elem is internal and Tertiary returns int.
  84 func tertiary(e colltab.Elem) int {
  85         return int(e.Tertiary())
  86 }
  87
  88 // searchOnce tries to match the pattern s.p at the text position i. s.buf needs
  89 // to be filled with collation elements of the first segment, where n is the
  90 // number of source bytes consumed for this segment. It will return the end
  91 // position of the match or -1.
  92 func (p *Pattern) searchOnce(it *colltab.Iter) (end int) {
  93         var pLevel [4]int
  94
  95         m := p.m
  96         for {
  97                 k := 0
  98                 for ; k < it.N; k++ {
  99                         if v := it.Elems[k].Primary(); v > 0 {
 100                                 if w, ok := p.next(&pLevel[0], colltab.Elem.Primary); !ok || v != w {
 101                                         return -1
 102                                 }
 103                         }
 104
 105                         if !m.ignoreDiacritics {
 106                                 if v := it.Elems[k].Secondary(); v > 0 {
 107                                         if w, ok := p.next(&pLevel[1], colltab.Elem.Secondary); !ok || v != w {
 108                                                 return -1
 109                                         }
 110                                 }
 111                         } else if it.Elems[k].Primary() == 0 {
 112                                 // We ignore tertiary values of collation elements of the
 113                                 // secondary level.
 114                                 continue
 115                         }
 116
 117                         // TODO: distinguish between case and width. This will be easier to
 118                         // implement after we moved to the new collation implementation.
 119                         if !m.ignoreWidth && !m.ignoreCase {
 120                                 if v := it.Elems[k].Tertiary(); v > 0 {
 121                                         if w, ok := p.next(&pLevel[2], tertiary); !ok || int(v) != w {
 122                                                 return -1
 123                                         }
 124                                 }
 125                         }
 126                         // TODO: check quaternary weight
 127                 }
 128                 it.Discard() // Remove the current segment from the buffer.
 129
 130                 // Check for completion.
 131                 switch {
 132                 // If any of these cases match, we are not at the end.
 133                 case pLevel[0] < len(p.ce):
 134                 case !m.ignoreDiacritics && pLevel[1] < len(p.ce):
 135                 case !(m.ignoreWidth || m.ignoreCase) && pLevel[2] < len(p.ce):
 136                 default:
 137                         // At this point, both the segment and pattern has matched fully.
 138                         // However, the segment may still be have trailing modifiers.
 139                         // This can be verified by another call to next.
 140                         end = it.End()
 141                         if it.Next() && it.Elems[0].Primary() == 0 {
 142                                 if !m.ignoreDiacritics {
 143                                         return -1
 144                                 }
 145                                 end = it.End()
 146                         }
 147                         return end
 148                 }
 149
 150                 // Fill the buffer with the next batch of collation elements.
 151                 if !it.Next() {
 152                         return -1
 153                 }
 154         }
 155 }