1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
12 "golang.org/x/text/internal/testtext"
13 "golang.org/x/text/language"
14 "golang.org/x/text/transform"
15 "golang.org/x/text/unicode/norm"
16 "golang.org/x/text/unicode/rangetable"
19 // The following definitions are taken directly from Chapter 3 of The Unicode
22 func propCased(r rune) bool {
23 return propLower(r) || propUpper(r) || unicode.IsTitle(r)
26 func propLower(r rune) bool {
27 return unicode.IsLower(r) || unicode.Is(unicode.Other_Lowercase, r)
30 func propUpper(r rune) bool {
31 return unicode.IsUpper(r) || unicode.Is(unicode.Other_Uppercase, r)
34 func propIgnore(r rune) bool {
35 if unicode.In(r, unicode.Mn, unicode.Me, unicode.Cf, unicode.Lm, unicode.Sk) {
38 return caseIgnorable[r]
41 func hasBreakProp(r rune) bool {
42 // binary search over ranges
48 if bp.lo <= r && r <= bp.hi {
60 func contextFromRune(r rune) *context {
61 c := context{dst: make([]byte, 128), src: []byte(string(r)), atEOF: true}
66 func TestCaseProperties(t *testing.T) {
67 if unicode.Version != UnicodeVersion {
68 // Properties of existing code points may change by Unicode version, so
70 t.Skipf("Skipping as core Unicode version %s different than %s", unicode.Version, UnicodeVersion)
72 assigned := rangetable.Assigned(UnicodeVersion)
73 coreVersion := rangetable.Assigned(unicode.Version)
74 for r := rune(0); r <= lastRuneForTesting; r++ {
75 if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
78 c := contextFromRune(r)
79 if got, want := c.info.isCaseIgnorable(), propIgnore(r); got != want {
80 t.Errorf("caseIgnorable(%U): got %v; want %v (%x)", r, got, want, c.info)
82 // New letters may change case types, but existing case pairings should
83 // not change. See Case Pair Stability in
84 // http://unicode.org/policies/stability_policy.html.
85 if rf := unicode.SimpleFold(r); rf != r && unicode.In(rf, assigned) {
86 if got, want := c.info.isCased(), propCased(r); got != want {
87 t.Errorf("cased(%U): got %v; want %v (%x)", r, got, want, c.info)
89 if got, want := c.caseType() == cUpper, propUpper(r); got != want {
90 t.Errorf("upper(%U): got %v; want %v (%x)", r, got, want, c.info)
92 if got, want := c.caseType() == cLower, propLower(r); got != want {
93 t.Errorf("lower(%U): got %v; want %v (%x)", r, got, want, c.info)
96 if got, want := c.info.isBreak(), hasBreakProp(r); got != want {
97 t.Errorf("isBreak(%U): got %v; want %v (%x)", r, got, want, c.info)
100 // TODO: get title case from unicode file.
103 func TestMapping(t *testing.T) {
104 assigned := rangetable.Assigned(UnicodeVersion)
105 coreVersion := rangetable.Assigned(unicode.Version)
106 if coreVersion == nil {
107 coreVersion = assigned
109 apply := func(r rune, f func(c *context) bool) string {
110 c := contextFromRune(r)
112 return string(c.dst[:c.pDst])
115 for r, tt := range special {
116 if got, want := apply(r, lower), tt.toLower; got != want {
117 t.Errorf("lowerSpecial:(%U): got %+q; want %+q", r, got, want)
119 if got, want := apply(r, title), tt.toTitle; got != want {
120 t.Errorf("titleSpecial:(%U): got %+q; want %+q", r, got, want)
122 if got, want := apply(r, upper), tt.toUpper; got != want {
123 t.Errorf("upperSpecial:(%U): got %+q; want %+q", r, got, want)
127 for r := rune(0); r <= lastRuneForTesting; r++ {
128 if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
131 if rf := unicode.SimpleFold(r); rf == r || !unicode.In(rf, assigned) {
134 if _, ok := special[r]; ok {
137 want := string(unicode.ToLower(r))
138 if got := apply(r, lower); got != want {
139 t.Errorf("lower:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
142 want = string(unicode.ToUpper(r))
143 if got := apply(r, upper); got != want {
144 t.Errorf("upper:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
147 want = string(unicode.ToTitle(r))
148 if got := apply(r, title); got != want {
149 t.Errorf("title:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
154 func runeFoldData(r rune) (x struct{ simple, full, special string }) {
157 x.simple = string(unicode.ToLower(r))
160 x.full = string(unicode.ToLower(r))
168 func TestFoldData(t *testing.T) {
169 assigned := rangetable.Assigned(UnicodeVersion)
170 coreVersion := rangetable.Assigned(unicode.Version)
171 if coreVersion == nil {
172 coreVersion = assigned
174 apply := func(r rune, f func(c *context) bool) (string, info) {
175 c := contextFromRune(r)
177 return string(c.dst[:c.pDst]), c.info.cccType()
179 for r := rune(0); r <= lastRuneForTesting; r++ {
180 if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
184 if got, info := apply(r, foldFull); got != x.full {
185 t.Errorf("full:%q (%U): got %q %U; want %q %U (ccc=%x)", r, r, got, []rune(got), x.full, []rune(x.full), info)
187 // TODO: special and simple.
191 func TestCCC(t *testing.T) {
192 assigned := rangetable.Assigned(UnicodeVersion)
193 normVersion := rangetable.Assigned(norm.Version)
194 for r := rune(0); r <= lastRuneForTesting; r++ {
195 if !unicode.In(r, assigned) || !unicode.In(r, normVersion) {
198 c := contextFromRune(r)
200 p := norm.NFC.PropertiesString(string(r))
208 if got := c.info.cccType(); got != want {
209 t.Errorf("%U: got %x; want %x", r, got, want)
214 func TestWordBreaks(t *testing.T) {
215 for _, tt := range breakTest {
216 testtext.Run(t, tt, func(t *testing.T) {
217 parts := strings.Split(tt, "|")
219 for _, s := range parts {
221 // This algorithm implements title casing given word breaks
222 // as defined in the Unicode standard 3.13 R3.
223 for _, r := range s {
224 title := unicode.ToTitle(r)
225 lower := unicode.ToLower(r)
226 if !found && title != lower {
228 want += string(title)
230 want += string(lower)
234 src := strings.Join(parts, "")
235 got := Title(language.Und).String(src)
237 t.Errorf("got %q; want %q", got, want)
243 func TestContext(t *testing.T) {
256 desc: "next: past end, atEOF, no checkpoint",
262 ops: "next;next;next",
263 // Test that calling prefix with a non-empty argument when the buffer
264 // is depleted returns false.
268 desc: "next: not at end, atEOF, no checkpoint",
274 err: transform.ErrShortSrc,
279 desc: "next: past end, !atEOF, no checkpoint",
285 err: transform.ErrShortSrc,
286 ops: "next;next;next",
290 desc: "next: past end, !atEOF, checkpoint",
296 ops: "next;next;checkpoint;next",
300 desc: "copy: exact count, atEOF, no checkpoint",
306 ops: "next;copy;next;copy;next",
310 desc: "copy: past end, !atEOF, no checkpoint",
316 err: transform.ErrShortSrc,
317 ops: "next;copy;next;copy;next",
321 desc: "copy: past end, !atEOF, checkpoint",
327 ops: "next;copy;next;copy;checkpoint;next",
331 desc: "copy: short dst",
337 err: transform.ErrShortDst,
338 ops: "next;copy;next;copy;checkpoint;next",
342 desc: "copy: short dst, checkpointed",
348 err: transform.ErrShortDst,
349 ops: "next;copy;checkpoint;next;copy;next",
353 desc: "writeString: simple",
359 ops: "next;copy;writeab;next",
363 desc: "writeString: short dst",
369 err: transform.ErrShortDst,
370 ops: "next;copy;writeab;next",
374 desc: "writeString: simple",
380 ops: "next;copy;next;writeab;next",
384 desc: "writeString: short dst",
390 err: transform.ErrShortDst,
391 ops: "next;copy;next;writeab;next",
401 // Context will assign an ErrShortSrc if the input wasn't exhausted.
402 err: transform.ErrShortSrc,
406 for _, tt := range tests {
407 c := context{dst: make([]byte, tt.dstSize), src: []byte(tt.src), atEOF: tt.atEOF}
409 for _, op := range strings.Split(tt.ops, ";") {
421 t.Fatalf("unknown op %q", op)
424 if got := c.hasPrefix(tt.prefixArg); got != tt.prefixWant {
425 t.Errorf("%s:\nprefix was %v; want %v", tt.desc, got, tt.prefixWant)
427 nDst, nSrc, err := c.ret()
429 t.Errorf("%s:\nerror was %v; want %v", tt.desc, err, tt.err)
431 if out := string(c.dst[:nDst]); out != tt.out {
432 t.Errorf("%s:\nout was %q; want %q", tt.desc, out, tt.out)
435 t.Errorf("%s:\nnSrc was %d; want %d", tt.desc, nSrc, tt.nSrc)