OSDN Git Service

new repo
[bytom/vapor.git] / vendor / golang.org / x / text / internal / colltab / table.go
1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package colltab
6
7 import (
8         "unicode/utf8"
9
10         "golang.org/x/text/unicode/norm"
11 )
12
13 // Table holds all collation data for a given collation ordering.
14 type Table struct {
15         Index Trie // main trie
16
17         // expansion info
18         ExpandElem []uint32
19
20         // contraction info
21         ContractTries  ContractTrieSet
22         ContractElem   []uint32
23         MaxContractLen int
24         VariableTop    uint32
25 }
26
27 func (t *Table) AppendNext(w []Elem, b []byte) (res []Elem, n int) {
28         return t.appendNext(w, source{bytes: b})
29 }
30
31 func (t *Table) AppendNextString(w []Elem, s string) (res []Elem, n int) {
32         return t.appendNext(w, source{str: s})
33 }
34
35 func (t *Table) Start(p int, b []byte) int {
36         // TODO: implement
37         panic("not implemented")
38 }
39
40 func (t *Table) StartString(p int, s string) int {
41         // TODO: implement
42         panic("not implemented")
43 }
44
45 func (t *Table) Domain() []string {
46         // TODO: implement
47         panic("not implemented")
48 }
49
50 func (t *Table) Top() uint32 {
51         return t.VariableTop
52 }
53
54 type source struct {
55         str   string
56         bytes []byte
57 }
58
59 func (src *source) lookup(t *Table) (ce Elem, sz int) {
60         if src.bytes == nil {
61                 return t.Index.lookupString(src.str)
62         }
63         return t.Index.lookup(src.bytes)
64 }
65
66 func (src *source) tail(sz int) {
67         if src.bytes == nil {
68                 src.str = src.str[sz:]
69         } else {
70                 src.bytes = src.bytes[sz:]
71         }
72 }
73
74 func (src *source) nfd(buf []byte, end int) []byte {
75         if src.bytes == nil {
76                 return norm.NFD.AppendString(buf[:0], src.str[:end])
77         }
78         return norm.NFD.Append(buf[:0], src.bytes[:end]...)
79 }
80
81 func (src *source) rune() (r rune, sz int) {
82         if src.bytes == nil {
83                 return utf8.DecodeRuneInString(src.str)
84         }
85         return utf8.DecodeRune(src.bytes)
86 }
87
88 func (src *source) properties(f norm.Form) norm.Properties {
89         if src.bytes == nil {
90                 return f.PropertiesString(src.str)
91         }
92         return f.Properties(src.bytes)
93 }
94
95 // appendNext appends the weights corresponding to the next rune or
96 // contraction in s.  If a contraction is matched to a discontinuous
97 // sequence of runes, the weights for the interstitial runes are
98 // appended as well.  It returns a new slice that includes the appended
99 // weights and the number of bytes consumed from s.
100 func (t *Table) appendNext(w []Elem, src source) (res []Elem, n int) {
101         ce, sz := src.lookup(t)
102         tp := ce.ctype()
103         if tp == ceNormal {
104                 if ce == 0 {
105                         r, _ := src.rune()
106                         const (
107                                 hangulSize  = 3
108                                 firstHangul = 0xAC00
109                                 lastHangul  = 0xD7A3
110                         )
111                         if r >= firstHangul && r <= lastHangul {
112                                 // TODO: performance can be considerably improved here.
113                                 n = sz
114                                 var buf [16]byte // Used for decomposing Hangul.
115                                 for b := src.nfd(buf[:0], hangulSize); len(b) > 0; b = b[sz:] {
116                                         ce, sz = t.Index.lookup(b)
117                                         w = append(w, ce)
118                                 }
119                                 return w, n
120                         }
121                         ce = makeImplicitCE(implicitPrimary(r))
122                 }
123                 w = append(w, ce)
124         } else if tp == ceExpansionIndex {
125                 w = t.appendExpansion(w, ce)
126         } else if tp == ceContractionIndex {
127                 n := 0
128                 src.tail(sz)
129                 if src.bytes == nil {
130                         w, n = t.matchContractionString(w, ce, src.str)
131                 } else {
132                         w, n = t.matchContraction(w, ce, src.bytes)
133                 }
134                 sz += n
135         } else if tp == ceDecompose {
136                 // Decompose using NFKD and replace tertiary weights.
137                 t1, t2 := splitDecompose(ce)
138                 i := len(w)
139                 nfkd := src.properties(norm.NFKD).Decomposition()
140                 for p := 0; len(nfkd) > 0; nfkd = nfkd[p:] {
141                         w, p = t.appendNext(w, source{bytes: nfkd})
142                 }
143                 w[i] = w[i].updateTertiary(t1)
144                 if i++; i < len(w) {
145                         w[i] = w[i].updateTertiary(t2)
146                         for i++; i < len(w); i++ {
147                                 w[i] = w[i].updateTertiary(maxTertiary)
148                         }
149                 }
150         }
151         return w, sz
152 }
153
154 func (t *Table) appendExpansion(w []Elem, ce Elem) []Elem {
155         i := splitExpandIndex(ce)
156         n := int(t.ExpandElem[i])
157         i++
158         for _, ce := range t.ExpandElem[i : i+n] {
159                 w = append(w, Elem(ce))
160         }
161         return w
162 }
163
164 func (t *Table) matchContraction(w []Elem, ce Elem, suffix []byte) ([]Elem, int) {
165         index, n, offset := splitContractIndex(ce)
166
167         scan := t.ContractTries.scanner(index, n, suffix)
168         buf := [norm.MaxSegmentSize]byte{}
169         bufp := 0
170         p := scan.scan(0)
171
172         if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
173                 // By now we should have filtered most cases.
174                 p0 := p
175                 bufn := 0
176                 rune := norm.NFD.Properties(suffix[p:])
177                 p += rune.Size()
178                 if rune.LeadCCC() != 0 {
179                         prevCC := rune.TrailCCC()
180                         // A gap may only occur in the last normalization segment.
181                         // This also ensures that len(scan.s) < norm.MaxSegmentSize.
182                         if end := norm.NFD.FirstBoundary(suffix[p:]); end != -1 {
183                                 scan.s = suffix[:p+end]
184                         }
185                         for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
186                                 rune = norm.NFD.Properties(suffix[p:])
187                                 if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
188                                         break
189                                 }
190                                 prevCC = rune.TrailCCC()
191                                 if pp := scan.scan(p); pp != p {
192                                         // Copy the interstitial runes for later processing.
193                                         bufn += copy(buf[bufn:], suffix[p0:p])
194                                         if scan.pindex == pp {
195                                                 bufp = bufn
196                                         }
197                                         p, p0 = pp, pp
198                                 } else {
199                                         p += rune.Size()
200                                 }
201                         }
202                 }
203         }
204         // Append weights for the matched contraction, which may be an expansion.
205         i, n := scan.result()
206         ce = Elem(t.ContractElem[i+offset])
207         if ce.ctype() == ceNormal {
208                 w = append(w, ce)
209         } else {
210                 w = t.appendExpansion(w, ce)
211         }
212         // Append weights for the runes in the segment not part of the contraction.
213         for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
214                 w, p = t.appendNext(w, source{bytes: b})
215         }
216         return w, n
217 }
218
219 // TODO: unify the two implementations. This is best done after first simplifying
220 // the algorithm taking into account the inclusion of both NFC and NFD forms
221 // in the table.
222 func (t *Table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem, int) {
223         index, n, offset := splitContractIndex(ce)
224
225         scan := t.ContractTries.scannerString(index, n, suffix)
226         buf := [norm.MaxSegmentSize]byte{}
227         bufp := 0
228         p := scan.scan(0)
229
230         if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
231                 // By now we should have filtered most cases.
232                 p0 := p
233                 bufn := 0
234                 rune := norm.NFD.PropertiesString(suffix[p:])
235                 p += rune.Size()
236                 if rune.LeadCCC() != 0 {
237                         prevCC := rune.TrailCCC()
238                         // A gap may only occur in the last normalization segment.
239                         // This also ensures that len(scan.s) < norm.MaxSegmentSize.
240                         if end := norm.NFD.FirstBoundaryInString(suffix[p:]); end != -1 {
241                                 scan.s = suffix[:p+end]
242                         }
243                         for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
244                                 rune = norm.NFD.PropertiesString(suffix[p:])
245                                 if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
246                                         break
247                                 }
248                                 prevCC = rune.TrailCCC()
249                                 if pp := scan.scan(p); pp != p {
250                                         // Copy the interstitial runes for later processing.
251                                         bufn += copy(buf[bufn:], suffix[p0:p])
252                                         if scan.pindex == pp {
253                                                 bufp = bufn
254                                         }
255                                         p, p0 = pp, pp
256                                 } else {
257                                         p += rune.Size()
258                                 }
259                         }
260                 }
261         }
262         // Append weights for the matched contraction, which may be an expansion.
263         i, n := scan.result()
264         ce = Elem(t.ContractElem[i+offset])
265         if ce.ctype() == ceNormal {
266                 w = append(w, ce)
267         } else {
268                 w = t.appendExpansion(w, ce)
269         }
270         // Append weights for the runes in the segment not part of the contraction.
271         for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
272                 w, p = t.appendNext(w, source{bytes: b})
273         }
274         return w, n
275 }