1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
7 // An Iter incrementally converts chunks of the input text to collation
8 // elements, while ensuring that the collation elements are in normalized order
9 // (that is, they are in the order as if the input text were normalized first).
13 // N is the number of elements in Elems that will not be reordered on
14 // subsequent iterations, N <= len(Elems).
19 // Because the Elems buffer may contain collation elements that are needed
20 // for look-ahead, we need two positions in the text (bytes or str): one for
21 // the end position in the text for the current iteration and one for the
22 // start of the next call to appendNext.
23 pEnd int // end position in text corresponding to N.
24 pNext int // pEnd <= pNext.
27 // Reset sets the position in the current input text to p and discards any
28 // results obtained so far.
29 func (i *Iter) Reset(p int) {
36 // Len returns the length of the input text.
37 func (i *Iter) Len() int {
44 // Discard removes the collation elements up to N.
45 func (i *Iter) Discard() {
46 // TODO: change this such that only modifiers following starters will have
48 i.Elems = i.Elems[:copy(i.Elems, i.Elems[i.N:])]
52 // End returns the end position of the input text for which Next has returned
54 func (i *Iter) End() int {
58 // SetInput resets i to input s.
59 func (i *Iter) SetInput(s []byte) {
65 // SetInputString resets i to input s.
66 func (i *Iter) SetInputString(s string) {
72 func (i *Iter) done() bool {
73 return i.pNext >= len(i.str) && i.pNext >= len(i.bytes)
76 func (i *Iter) appendNext() bool {
82 i.Elems, sz = i.Weighter.AppendNextString(i.Elems, i.str[i.pNext:])
84 i.Elems, sz = i.Weighter.AppendNext(i.Elems, i.bytes[i.pNext:])
93 // Next appends Elems to the internal array. On each iteration, it will either
94 // add starters or modifiers. In the majority of cases, an Elem with a primary
95 // value > 0 will have a CCC of 0. The CCC values of collation elements are also
96 // used to detect if the input string was not normalized and to adjust the
97 // result accordingly.
98 func (i *Iter) Next() bool {
99 if i.N == len(i.Elems) && !i.appendNext() {
103 // Check if the current segment starts with a starter.
104 prevCCC := i.Elems[len(i.Elems)-1].CCC()
109 } else if i.Elems[i.N].CCC() == 0 {
110 // set i.N to only cover part of i.Elems for which prevCCC == 0 and
111 // use rest for the next call to next.
112 for i.N++; i.N < len(i.Elems) && i.Elems[i.N].CCC() == 0; i.N++ {
118 // The current (partial) segment starts with modifiers. We need to collect
119 // all successive modifiers to ensure that they are normalized.
127 if ccc := i.Elems[p].CCC(); ccc == 0 || len(i.Elems)-i.N > maxCombiningCharacters {
128 // Leave the starter for the next iteration. This ensures that we
129 // do not return sequences of collation elements that cross two
132 // TODO: handle large number of combining characters by fully
133 // normalizing the input segment before iteration. This ensures
134 // results are consistent across the text repo.
137 } else if ccc < prevCCC {
138 i.doNorm(p, ccc) // should be rare, never occurs for NFD and FCC.
144 done := len(i.Elems) != i.N
149 // nextNoNorm is the same as next, but does not "normalize" the collation
151 func (i *Iter) nextNoNorm() bool {
152 // TODO: remove this function. Using this instead of next does not seem
153 // to improve performance in any significant way. We retain this until
154 // later for evaluation purposes.
163 const maxCombiningCharacters = 30
165 // doNorm reorders the collation elements in i.Elems.
166 // It assumes that blocks of collation elements added with appendNext
167 // either start and end with the same CCC or start with CCC == 0.
168 // This allows for a single insertion point for the entire block.
169 // The correctness of this assumption is verified in builder.go.
170 func (i *Iter) doNorm(p int, ccc uint8) {
173 for p--; p > i.N && ccc < i.Elems[p-1].CCC(); p-- {
175 i.Elems = append(i.Elems, i.Elems[p:k]...)
176 copy(i.Elems[p:], i.Elems[k:])
177 i.Elems = i.Elems[:n]