1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
7 import "golang.org/x/text/transform"
9 // A context is used for iterating over source bytes, fetching case info and
10 // writing to a destination buffer.
12 // Casing operations may need more than one rune of context to decide how a rune
13 // should be cased. Casing implementations should call checkpoint on context
14 // whenever it is known to be safe to return the runes processed so far.
16 // It is recommended for implementations to not allow for more than 30 case
17 // ignorables as lookahead (analogous to the limit in norm) and to use state if
18 // unbounded lookahead is needed for cased runes.
23 pDst int // pDst points past the last written rune in dst.
24 pSrc int // pSrc points to the start of the currently scanned rune.
26 // checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
30 sz int // size of current rune
31 info info // case information of currently scanned rune
33 // State preserved across calls to Transform.
34 isMidWord bool // false if next cased letter needs to be title-cased.
37 func (c *context) Reset() {
41 // ret returns the return values for the Transform method. It checks whether
42 // there were insufficient bytes in src to complete and introduces an error
43 // accordingly, if necessary.
44 func (c *context) ret() (nDst, nSrc int, err error) {
45 if c.err != nil || c.nSrc == len(c.src) {
46 return c.nDst, c.nSrc, c.err
48 // This point is only reached by mappers if there was no short destination
49 // buffer. This means that the source buffer was exhausted and that c.sz was
51 if c.atEOF && c.pSrc == len(c.src) {
52 return c.pDst, c.pSrc, nil
54 return c.nDst, c.nSrc, transform.ErrShortSrc
57 // retSpan returns the return values for the Span method. It checks whether
58 // there were insufficient bytes in src to complete and introduces an error
59 // accordingly, if necessary.
60 func (c *context) retSpan() (n int, err error) {
61 _, nSrc, err := c.ret()
65 // checkpoint sets the return value buffer points for Transform to the current
67 func (c *context) checkpoint() {
69 c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
73 // unreadRune causes the last rune read by next to be reread on the next
74 // invocation of next. Only one unreadRune may be called after a call to next.
75 func (c *context) unreadRune() {
79 func (c *context) next() bool {
81 if c.pSrc == len(c.src) || c.err != nil {
85 v, sz := trie.lookup(c.src[c.pSrc:])
86 c.info, c.sz = info(v), sz
89 // A zero size means we have an incomplete rune. If we are atEOF,
90 // this means it is an illegal rune, which we will consume one
94 c.err = transform.ErrShortSrc
101 // writeBytes adds bytes to dst.
102 func (c *context) writeBytes(b []byte) bool {
103 if len(c.dst)-c.pDst < len(b) {
104 c.err = transform.ErrShortDst
107 // This loop is faster than using copy.
108 for _, ch := range b {
115 // writeString writes the given string to dst.
116 func (c *context) writeString(s string) bool {
117 if len(c.dst)-c.pDst < len(s) {
118 c.err = transform.ErrShortDst
121 // This loop is faster than using copy.
122 for i := 0; i < len(s); i++ {
129 // copy writes the current rune to dst.
130 func (c *context) copy() bool {
131 return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
134 // copyXOR copies the current rune to dst and modifies it by applying the XOR
135 // pattern of the case info. It is the responsibility of the caller to ensure
136 // that this is a rune with a XOR pattern defined.
137 func (c *context) copyXOR() bool {
141 if c.info&xorIndexBit == 0 {
142 // Fast path for 6-bit XOR pattern, which covers most cases.
143 c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
145 // Interpret XOR bits as an index.
146 // TODO: test performance for unrolling this loop. Verify that we have
147 // at least two bytes and at most three.
148 idx := c.info >> xorShift
149 for p := c.pDst - 1; ; p-- {
150 c.dst[p] ^= xorData[idx]
152 if xorData[idx] == 0 {
160 // hasPrefix returns true if src[pSrc:] starts with the given string.
161 func (c *context) hasPrefix(s string) bool {
166 for i, c := range b[:len(s)] {
174 // caseType returns an info with only the case bits, normalized to either
175 // cLower, cUpper, cTitle or cUncased.
176 func (c *context) caseType() info {
182 // xor the last bit of the rune with the case type bits.
183 b := c.src[c.pSrc+c.sz-1]
184 return info(b&1) ^ cm&0x3
186 if cm == cIgnorableCased {
192 // lower writes the lowercase version of the current rune to dst.
193 func lower(c *context) bool {
195 if c.info&hasMappingMask == 0 || ct == cLower {
198 if c.info&exceptionBit == 0 {
201 e := exceptions[c.info>>exceptionShift:]
202 offset := 2 + e[0]&lengthMask // size of header + fold string
203 if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
204 return c.writeString(e[offset : offset+nLower])
209 func isLower(c *context) bool {
211 if c.info&hasMappingMask == 0 || ct == cLower {
214 if c.info&exceptionBit == 0 {
215 c.err = transform.ErrEndOfSpan
218 e := exceptions[c.info>>exceptionShift:]
219 if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
220 c.err = transform.ErrEndOfSpan
226 // upper writes the uppercase version of the current rune to dst.
227 func upper(c *context) bool {
229 if c.info&hasMappingMask == 0 || ct == cUpper {
232 if c.info&exceptionBit == 0 {
235 e := exceptions[c.info>>exceptionShift:]
236 offset := 2 + e[0]&lengthMask // size of header + fold string
237 // Get length of first special case mapping.
238 n := (e[1] >> lengthBits) & lengthMask
240 // The first special case mapping is for lower. Set n to the second.
244 n, e = e[1]&lengthMask, e[n:]
247 return c.writeString(e[offset : offset+n])
252 // isUpper writes the isUppercase version of the current rune to dst.
253 func isUpper(c *context) bool {
255 if c.info&hasMappingMask == 0 || ct == cUpper {
258 if c.info&exceptionBit == 0 {
259 c.err = transform.ErrEndOfSpan
262 e := exceptions[c.info>>exceptionShift:]
263 // Get length of first special case mapping.
264 n := (e[1] >> lengthBits) & lengthMask
266 n = e[1] & lengthMask
269 c.err = transform.ErrEndOfSpan
275 // title writes the title case version of the current rune to dst.
276 func title(c *context) bool {
278 if c.info&hasMappingMask == 0 || ct == cTitle {
281 if c.info&exceptionBit == 0 {
287 // Get the exception data.
288 e := exceptions[c.info>>exceptionShift:]
289 offset := 2 + e[0]&lengthMask // size of header + fold string
291 nFirst := (e[1] >> lengthBits) & lengthMask
292 if nTitle := e[1] & lengthMask; nTitle != noChange {
293 if nFirst != noChange {
296 return c.writeString(e[offset : offset+nTitle])
298 if ct == cLower && nFirst != noChange {
299 // Use the uppercase version instead.
300 return c.writeString(e[offset : offset+nFirst])
302 // Already in correct case.
306 // isTitle reports whether the current rune is in title case.
307 func isTitle(c *context) bool {
309 if c.info&hasMappingMask == 0 || ct == cTitle {
312 if c.info&exceptionBit == 0 {
314 c.err = transform.ErrEndOfSpan
319 // Get the exception data.
320 e := exceptions[c.info>>exceptionShift:]
321 if nTitle := e[1] & lengthMask; nTitle != noChange {
322 c.err = transform.ErrEndOfSpan
325 nFirst := (e[1] >> lengthBits) & lengthMask
326 if ct == cLower && nFirst != noChange {
327 c.err = transform.ErrEndOfSpan
333 // foldFull writes the foldFull version of the current rune to dst.
334 func foldFull(c *context) bool {
335 if c.info&hasMappingMask == 0 {
339 if c.info&exceptionBit == 0 {
340 if ct != cLower || c.info&inverseFoldBit != 0 {
345 e := exceptions[c.info>>exceptionShift:]
346 n := e[0] & lengthMask
351 n = (e[1] >> lengthBits) & lengthMask
353 return c.writeString(e[2 : 2+n])
356 // isFoldFull reports whether the current run is mapped to foldFull
357 func isFoldFull(c *context) bool {
358 if c.info&hasMappingMask == 0 {
362 if c.info&exceptionBit == 0 {
363 if ct != cLower || c.info&inverseFoldBit != 0 {
364 c.err = transform.ErrEndOfSpan
369 e := exceptions[c.info>>exceptionShift:]
370 n := e[0] & lengthMask
371 if n == 0 && ct == cLower {
374 c.err = transform.ErrEndOfSpan