1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
11 "golang.org/x/text/internal/testtext"
12 "golang.org/x/text/transform"
13 "golang.org/x/text/unicode/bidi"
17 strL = "ABC" // Left to right - most letters in LTR scripts
18 strR = "עברית" // Right to left - most letters in non-Arabic RTL scripts
19 strAL = "دبي" // Arabic letters - most letters in the Arabic script
20 strEN = "123" // European Number (0-9, and Extended Arabic-Indic numbers)
21 strES = "+-" // European Number Separator (+ and -)
22 strET = "$" // European Number Terminator (currency symbols, the hash sign, the percent sign and so on)
23 strAN = "\u0660" // Arabic Number; this encompasses the Arabic-Indic numbers, but not the Extended Arabic-Indic numbers
24 strCS = "," // Common Number Separator (. , / : et al)
25 strNSM = "\u0300" // Nonspacing Mark - most combining accents
26 strBN = "\u200d" // Boundary Neutral - control characters (ZWNJ, ZWJ, and others)
27 strB = "\u2029" // Paragraph Separator
28 strS = "\u0009" // Segment Separator
29 strWS = " " // Whitespace, including the SPACE character
30 strON = "@" // Other Neutrals, including @, &, parentheses, MIDDLE DOT
33 type ruleTest struct {
36 n int // position at which the rule fails
39 // For tests that split the string in two.
40 pSrc int // number of source bytes to consume first
41 szDst int // size of destination buffer
42 nSrc int // source bytes consumed and bytes written
43 err0 error // error after first run
46 var testCases = [][]ruleTest{
48 // Invalid UTF-8 is invalid.
51 dir: bidi.LeftToRight,
54 dir: bidi.LeftToRight,
59 dir: bidi.LeftToRight,
64 dir: bidi.LeftToRight,
69 dir: bidi.LeftToRight,
74 dir: bidi.LeftToRight,
79 dir: bidi.LeftToRight,
84 dir: bidi.RightToLeft,
89 dir: bidi.RightToLeft,
93 in: strAL + "\xcc" + strR,
94 dir: bidi.RightToLeft,
99 dir: bidi.RightToLeft,
104 // Rule 2.1: The first character must be a character with Bidi property L,
105 // R, or AL. If it has the R or AL property, it is an RTL label; if it has
106 // the L property, it is an LTR label.
109 dir: bidi.LeftToRight,
112 dir: bidi.RightToLeft,
115 dir: bidi.RightToLeft,
118 dir: bidi.RightToLeft,
122 dir: bidi.LeftToRight,
123 err: nil, // not an RTL string
126 dir: bidi.LeftToRight,
127 err: nil, // not an RTL string
130 dir: bidi.LeftToRight,
131 err: nil, // not an RTL string
134 dir: bidi.LeftToRight,
135 err: nil, // not an RTL string
138 dir: bidi.LeftToRight,
139 err: nil, // not an RTL string
142 dir: bidi.LeftToRight,
143 err: nil, // not an RTL string
146 dir: bidi.LeftToRight,
147 err: nil, // not an RTL string
150 dir: bidi.LeftToRight,
151 err: nil, // not an RTL string
154 dir: bidi.LeftToRight,
155 err: nil, // not an RTL string
158 dir: bidi.LeftToRight,
159 err: nil, // not an RTL string
162 dir: bidi.RightToLeft,
167 dir: bidi.RightToLeft,
172 dir: bidi.RightToLeft,
177 dir: bidi.RightToLeft,
182 dir: bidi.RightToLeft,
187 dir: bidi.RightToLeft,
192 dir: bidi.RightToLeft,
197 dir: bidi.RightToLeft,
202 dir: bidi.RightToLeft,
207 dir: bidi.RightToLeft,
212 // Rule 2.2: In an RTL label, only characters with the Bidi properties R,
213 // AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
215 in: strR + strR + strAL,
216 dir: bidi.RightToLeft,
218 in: strR + strAL + strR,
219 dir: bidi.RightToLeft,
221 in: strR + strAN + strAL,
222 dir: bidi.RightToLeft,
224 in: strR + strEN + strR,
225 dir: bidi.RightToLeft,
227 in: strR + strES + strR,
228 dir: bidi.RightToLeft,
230 in: strR + strCS + strR,
231 dir: bidi.RightToLeft,
233 in: strR + strET + strAL,
234 dir: bidi.RightToLeft,
236 in: strR + strON + strR,
237 dir: bidi.RightToLeft,
239 in: strR + strBN + strR,
240 dir: bidi.RightToLeft,
242 in: strR + strNSM + strAL,
243 dir: bidi.RightToLeft,
245 in: strR + strL + strR,
246 dir: bidi.RightToLeft,
250 in: strR + strB + strR,
251 dir: bidi.RightToLeft,
255 in: strR + strS + strAL,
256 dir: bidi.RightToLeft,
260 in: strR + strWS + strAL,
261 dir: bidi.RightToLeft,
265 in: strAL + strR + strAL,
266 dir: bidi.RightToLeft,
268 in: strAL + strAL + strR,
269 dir: bidi.RightToLeft,
271 in: strAL + strAN + strAL,
272 dir: bidi.RightToLeft,
274 in: strAL + strEN + strR,
275 dir: bidi.RightToLeft,
277 in: strAL + strES + strR,
278 dir: bidi.RightToLeft,
280 in: strAL + strCS + strR,
281 dir: bidi.RightToLeft,
283 in: strAL + strET + strAL,
284 dir: bidi.RightToLeft,
286 in: strAL + strON + strR,
287 dir: bidi.RightToLeft,
289 in: strAL + strBN + strR,
290 dir: bidi.RightToLeft,
292 in: strAL + strNSM + strAL,
293 dir: bidi.RightToLeft,
295 in: strAL + strL + strR,
296 dir: bidi.RightToLeft,
300 in: strAL + strB + strR,
301 dir: bidi.RightToLeft,
305 in: strAL + strS + strAL,
306 dir: bidi.RightToLeft,
310 in: strAL + strWS + strAL,
311 dir: bidi.RightToLeft,
316 // Rule 2.3: In an RTL label, the end of the label must be a character with
317 // Bidi property R, AL, EN, or AN, followed by zero or more characters with
318 // Bidi property NSM.
321 dir: bidi.RightToLeft,
324 dir: bidi.RightToLeft,
326 in: strR + strAL + strNSM,
327 dir: bidi.RightToLeft,
329 in: strR + strEN + strNSM + strNSM,
330 dir: bidi.RightToLeft,
333 dir: bidi.RightToLeft,
335 in: strR + strES + strNSM,
336 dir: bidi.RightToLeft,
337 n: len(strR + strES + strNSM),
340 in: strR + strCS + strNSM + strNSM,
341 dir: bidi.RightToLeft,
342 n: len(strR + strCS + strNSM + strNSM),
346 dir: bidi.RightToLeft,
347 n: len(strR + strET),
350 in: strR + strON + strNSM,
351 dir: bidi.RightToLeft,
352 n: len(strR + strON + strNSM),
355 in: strR + strBN + strNSM + strNSM,
356 dir: bidi.RightToLeft,
357 n: len(strR + strBN + strNSM + strNSM),
360 in: strR + strL + strNSM,
361 dir: bidi.RightToLeft,
365 in: strR + strB + strNSM + strNSM,
366 dir: bidi.RightToLeft,
371 dir: bidi.RightToLeft,
376 dir: bidi.RightToLeft,
381 dir: bidi.RightToLeft,
384 dir: bidi.RightToLeft,
386 in: strAL + strAL + strNSM,
387 dir: bidi.RightToLeft,
389 in: strAL + strEN + strNSM + strNSM,
390 dir: bidi.RightToLeft,
393 dir: bidi.RightToLeft,
395 in: strAL + strES + strNSM,
396 dir: bidi.RightToLeft,
397 n: len(strAL + strES + strNSM),
400 in: strAL + strCS + strNSM + strNSM,
401 dir: bidi.RightToLeft,
402 n: len(strAL + strCS + strNSM + strNSM),
406 dir: bidi.RightToLeft,
407 n: len(strAL + strET),
410 in: strAL + strON + strNSM,
411 dir: bidi.RightToLeft,
412 n: len(strAL + strON + strNSM),
415 in: strAL + strBN + strNSM + strNSM,
416 dir: bidi.RightToLeft,
417 n: len(strAL + strBN + strNSM + strNSM),
420 in: strAL + strL + strNSM,
421 dir: bidi.RightToLeft,
425 in: strAL + strB + strNSM + strNSM,
426 dir: bidi.RightToLeft,
431 dir: bidi.RightToLeft,
436 dir: bidi.RightToLeft,
441 // Rule 2.4: In an RTL label, if an EN is present, no AN may be present,
444 in: strR + strEN + strAN,
445 dir: bidi.RightToLeft,
446 n: len(strR + strEN),
449 in: strR + strAN + strEN + strNSM,
450 dir: bidi.RightToLeft,
451 n: len(strR + strAN),
454 in: strAL + strEN + strAN,
455 dir: bidi.RightToLeft,
456 n: len(strAL + strEN),
459 in: strAL + strAN + strEN + strNSM,
460 dir: bidi.RightToLeft,
461 n: len(strAL + strAN),
465 // Rule 2.5: In an LTR label, only characters with the Bidi properties L,
466 // EN, ES, CS, ET, ON, BN, or NSM are allowed.
468 in: strL + strL + strL,
469 dir: bidi.LeftToRight,
471 in: strL + strEN + strL,
472 dir: bidi.LeftToRight,
474 in: strL + strES + strL,
475 dir: bidi.LeftToRight,
477 in: strL + strCS + strL,
478 dir: bidi.LeftToRight,
480 in: strL + strET + strL,
481 dir: bidi.LeftToRight,
483 in: strL + strON + strL,
484 dir: bidi.LeftToRight,
486 in: strL + strBN + strL,
487 dir: bidi.LeftToRight,
489 in: strL + strNSM + strL,
490 dir: bidi.LeftToRight,
492 in: strL + strR + strL,
493 dir: bidi.RightToLeft,
497 in: strL + strAL + strL,
498 dir: bidi.RightToLeft,
502 in: strL + strAN + strL,
503 dir: bidi.RightToLeft,
507 in: strL + strB + strL,
508 dir: bidi.LeftToRight,
509 n: len(strL + strAN + strL),
512 in: strL + strB + strL + strR,
513 dir: bidi.RightToLeft,
514 n: len(strL + strB + strL),
517 in: strL + strS + strL,
518 dir: bidi.LeftToRight,
519 n: len(strL + strS + strL),
522 in: strL + strS + strL + strR,
523 dir: bidi.RightToLeft,
524 n: len(strL + strS + strL),
527 in: strL + strWS + strL,
528 dir: bidi.LeftToRight,
529 n: len(strL + strWS + strL),
532 in: strL + strWS + strL + strR,
533 dir: bidi.RightToLeft,
534 n: len(strL + strWS + strL),
538 // Rule 2.6: In an LTR label, the end of the label must be a character with
539 // Bidi property L or EN, followed by zero or more characters with Bidi
543 dir: bidi.LeftToRight,
546 dir: bidi.LeftToRight,
548 in: strL + strNSM + strNSM,
549 dir: bidi.LeftToRight,
552 dir: bidi.LeftToRight,
554 in: strL + strEN + strNSM,
555 dir: bidi.LeftToRight,
557 in: strL + strEN + strNSM + strNSM,
558 dir: bidi.LeftToRight,
561 dir: bidi.LeftToRight,
562 n: len(strL + strES),
565 in: strL + strES + strR,
566 dir: bidi.RightToLeft,
567 n: len(strL + strES),
571 dir: bidi.LeftToRight,
572 n: len(strL + strCS),
575 in: strL + strCS + strR,
576 dir: bidi.RightToLeft,
577 n: len(strL + strCS),
581 dir: bidi.LeftToRight,
582 n: len(strL + strET),
585 in: strL + strET + strR,
586 dir: bidi.RightToLeft,
587 n: len(strL + strET),
591 dir: bidi.LeftToRight,
592 n: len(strL + strON),
595 in: strL + strON + strR,
596 dir: bidi.RightToLeft,
597 n: len(strL + strON),
601 dir: bidi.LeftToRight,
602 n: len(strL + strBN),
605 in: strL + strBN + strR,
606 dir: bidi.RightToLeft,
607 n: len(strL + strBN),
611 dir: bidi.RightToLeft,
616 dir: bidi.RightToLeft,
621 dir: bidi.RightToLeft,
626 dir: bidi.LeftToRight,
630 in: strL + strB + strR,
631 dir: bidi.RightToLeft,
636 dir: bidi.LeftToRight,
640 in: strL + strB + strR,
641 dir: bidi.RightToLeft,
646 dir: bidi.LeftToRight,
650 in: strL + strB + strR,
651 dir: bidi.RightToLeft,
656 // Incremental processing.
659 dir: bidi.LeftToRight,
663 err0: transform.ErrShortSrc,
666 dir: bidi.LeftToRight,
670 err0: transform.ErrShortSrc,
672 // Remain invalid once invalid.
674 dir: bidi.RightToLeft,
684 dir: bidi.LeftToRight,
689 err0: transform.ErrShortDst,
691 // Short destination splitting input rune
693 dir: bidi.LeftToRight,
698 err0: transform.ErrShortDst,
703 for rule, cases := range testCases {
704 for i, tc := range cases {
706 testCases[rule][i].n = len(tc.in)
712 func doTests(t *testing.T, fn func(t *testing.T, tc ruleTest)) {
713 for rule, cases := range testCases {
714 for i, tc := range cases {
715 name := fmt.Sprintf("%d/%d:%+q:%s", rule, i, tc.in, tc.in)
716 testtext.Run(t, name, func(t *testing.T) {
723 func TestDirection(t *testing.T) {
724 doTests(t, func(t *testing.T, tc ruleTest) {
725 dir := Direction([]byte(tc.in))
727 t.Errorf("dir was %v; want %v", dir, tc.dir)
732 func TestDirectionString(t *testing.T) {
733 doTests(t, func(t *testing.T, tc ruleTest) {
734 dir := DirectionString(tc.in)
736 t.Errorf("dir was %v; want %v", dir, tc.dir)
741 func TestValid(t *testing.T) {
742 doTests(t, func(t *testing.T, tc ruleTest) {
743 got := Valid([]byte(tc.in))
744 want := tc.err == nil
746 t.Fatalf("Valid: got %v; want %v", got, want)
749 got = ValidString(tc.in)
752 t.Fatalf("Valid: got %v; want %v", got, want)
757 func TestSpan(t *testing.T) {
758 doTests(t, func(t *testing.T, tc ruleTest) {
759 // Skip tests that test for limited destination buffer size.
767 n, err := r.Span(src[:tc.pSrc], tc.pSrc == len(tc.in))
769 t.Errorf("err0 was %v; want %v", err, tc.err0)
772 t.Fatalf("nSrc was %d; want %d", n, tc.nSrc)
775 n, err = r.Span(src[n:], true)
777 t.Errorf("error was %v; want %v", err, tc.err)
779 if got := n + tc.nSrc; got != tc.n {
780 t.Errorf("n was %d; want %d", got, tc.n)
785 func TestTransform(t *testing.T) {
786 doTests(t, func(t *testing.T, tc ruleTest) {
790 dst := make([]byte, len(tc.in))
792 dst = make([]byte, tc.szDst)
795 // First transform operates on a zero-length string for most tests.
796 nDst, nSrc, err := r.Transform(dst, src[:tc.pSrc], tc.pSrc == len(tc.in))
798 t.Errorf("err0 was %v; want %v", err, tc.err0)
801 t.Fatalf("nDst (%d) and nSrc (%d) should match", nDst, nSrc)
804 t.Fatalf("nSrc was %d; want %d", nSrc, tc.nSrc)
807 dst1 := make([]byte, len(tc.in))
808 copy(dst1, dst[:nDst])
810 nDst, nSrc, err = r.Transform(dst1[nDst:], src[nSrc:], true)
812 t.Errorf("error was %v; want %v", err, tc.err)
815 t.Fatalf("nDst (%d) and nSrc (%d) should match", nDst, nSrc)
819 t.Fatalf("n was %d; want %d", n, tc.n)
821 if got, want := string(dst1[:n]), tc.in[:tc.n]; got != want {
822 t.Errorf("got %+q; want %+q", got, want)