OSDN Git Service

Hulk did something
[bytom/vapor.git] / vendor / golang.org / x / text / search / pattern_test.go
diff --git a/vendor/golang.org/x/text/search/pattern_test.go b/vendor/golang.org/x/text/search/pattern_test.go
new file mode 100644 (file)
index 0000000..931fa65
--- /dev/null
@@ -0,0 +1,357 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package search
+
+import (
+       "reflect"
+       "strings"
+       "testing"
+
+       "golang.org/x/text/language"
+)
+
+func TestCompile(t *testing.T) {
+       for i, tc := range []struct {
+               desc    string
+               pattern string
+               options []Option
+               n       int
+       }{{
+               desc:    "empty",
+               pattern: "",
+               n:       0,
+       }, {
+               desc:    "single",
+               pattern: "a",
+               n:       1,
+       }, {
+               desc:    "keep modifier",
+               pattern: "a\u0300", // U+0300: COMBINING GRAVE ACCENT
+               n:       2,
+       }, {
+               desc:    "remove modifier",
+               pattern: "a\u0300", // U+0300: COMBINING GRAVE ACCENT
+               options: []Option{IgnoreDiacritics},
+               n:       1,
+       }, {
+               desc:    "single with double collation element",
+               pattern: "ä",
+               n:       2,
+       }, {
+               desc:    "leading variable",
+               pattern: " a",
+               n:       2,
+       }, {
+               desc:    "trailing variable",
+               pattern: "aa ",
+               n:       3,
+       }, {
+               desc:    "leading and trailing variable",
+               pattern: " äb ",
+               n:       5,
+       }, {
+               desc:    "keep interior variable",
+               pattern: " ä b ",
+               n:       6,
+       }, {
+               desc:    "keep interior variables",
+               pattern: " b  ä ",
+               n:       7,
+       }, {
+               desc:    "remove ignoreables (zero-weights across the board)",
+               pattern: "\u009Db\u009Dä\u009D", // U+009D: OPERATING SYSTEM COMMAND
+               n:       3,
+       }} {
+               m := New(language.Und, tc.options...)
+               p := m.CompileString(tc.pattern)
+               if len(p.ce) != tc.n {
+                       t.Errorf("%d:%s: Compile(%+q): got %d; want %d", i, tc.desc, tc.pattern, len(p.ce), tc.n)
+               }
+       }
+}
+
+func TestNorm(t *testing.T) {
+       // U+0300: COMBINING GRAVE ACCENT (CCC=230)
+       // U+031B: COMBINING HORN (CCC=216)
+       for _, tc := range []struct {
+               desc string
+               a    string
+               b    string
+               want bool // a and b compile into the same pattern?
+       }{{
+               "simple",
+               "eee\u0300\u031b",
+               "eee\u031b\u0300",
+               true,
+       }, {
+               "large number of modifiers in pattern",
+               strings.Repeat("\u0300", 29) + "\u0318",
+               "\u0318" + strings.Repeat("\u0300", 29),
+               true,
+       }, {
+               "modifier overflow in pattern",
+               strings.Repeat("\u0300", 30) + "\u0318",
+               "\u0318" + strings.Repeat("\u0300", 30),
+               false,
+       }} {
+               m := New(language.Und)
+               a := m.CompileString(tc.a)
+               b := m.CompileString(tc.b)
+               if got := reflect.DeepEqual(a, b); got != tc.want {
+                       t.Errorf("Compile(a) == Compile(b) == %v; want %v", got, tc.want)
+               }
+       }
+}
+
+func TestForwardSearch(t *testing.T) {
+       for i, tc := range []struct {
+               desc    string
+               tag     string
+               options []Option
+               pattern string
+               text    string
+               want    []int
+       }{{
+               // The semantics of an empty search is to match nothing.
+               // TODO: change this to be in line with strings.Index? It is quite a
+               // different beast, so not sure yet.
+
+               desc:    "empty pattern and text",
+               tag:     "und",
+               pattern: "",
+               text:    "",
+               want:    nil, // TODO: consider: []int{0, 0},
+       }, {
+               desc:    "non-empty pattern and empty text",
+               tag:     "und",
+               pattern: " ",
+               text:    "",
+               want:    nil,
+       }, {
+               desc:    "empty pattern and non-empty text",
+               tag:     "und",
+               pattern: "",
+               text:    "abc",
+               want:    nil, // TODO: consider: []int{0, 0, 1, 1, 2, 2, 3, 3},
+       }, {
+               // Variable-only patterns. We don't support variables at the moment,
+               // but verify that, given this, the behavior is indeed as expected.
+
+               desc:    "exact match of variable",
+               tag:     "und",
+               pattern: " ",
+               text:    " ",
+               want:    []int{0, 1},
+       }, {
+               desc:    "variables not handled by default",
+               tag:     "und",
+               pattern: "- ",
+               text:    " -",
+               want:    nil, // Would be (1, 2) for a median match with variable}.
+       }, {
+               desc:    "multiple subsequent identical variables",
+               tag:     "und",
+               pattern: " ",
+               text:    "    ",
+               want:    []int{0, 1, 1, 2, 2, 3, 3, 4},
+       }, {
+               desc:    "text with variables",
+               tag:     "und",
+               options: []Option{IgnoreDiacritics},
+               pattern: "abc",
+               text:    "3 abc 3",
+               want:    []int{2, 5},
+       }, {
+               desc:    "pattern with interior variables",
+               tag:     "und",
+               options: []Option{IgnoreDiacritics},
+               pattern: "a b c",
+               text:    "3 a b c abc a  b  c 3",
+               want:    []int{2, 7}, // Would have 3 matches using variable.
+
+               // TODO: Different variable handling settings.
+       }, {
+               // Options.
+
+               desc:    "match all levels",
+               tag:     "und",
+               pattern: "Abc",
+               text:    "abcAbcABCÁbcábc",
+               want:    []int{3, 6},
+       }, {
+               desc:    "ignore diacritics in text",
+               tag:     "und",
+               options: []Option{IgnoreDiacritics},
+               pattern: "Abc",
+               text:    "Ábc",
+               want:    []int{0, 4},
+       }, {
+               desc:    "ignore diacritics in pattern",
+               tag:     "und",
+               options: []Option{IgnoreDiacritics},
+               pattern: "Ábc",
+               text:    "Abc",
+               want:    []int{0, 3},
+       }, {
+               desc:    "ignore diacritics",
+               tag:     "und",
+               options: []Option{IgnoreDiacritics},
+               pattern: "Abc",
+               text:    "abcAbcABCÁbcábc",
+               want:    []int{3, 6, 9, 13},
+       }, {
+               desc:    "ignore case",
+               tag:     "und",
+               options: []Option{IgnoreCase},
+               pattern: "Abc",
+               text:    "abcAbcABCÁbcábc",
+               want:    []int{0, 3, 3, 6, 6, 9},
+       }, {
+               desc:    "ignore case and diacritics",
+               tag:     "und",
+               options: []Option{IgnoreCase, IgnoreDiacritics},
+               pattern: "Abc",
+               text:    "abcAbcABCÁbcábc",
+               want:    []int{0, 3, 3, 6, 6, 9, 9, 13, 13, 17},
+       }, {
+               desc:    "ignore width to fullwidth",
+               tag:     "und",
+               options: []Option{IgnoreWidth},
+               pattern: "abc",
+               text:    "123 \uFF41\uFF42\uFF43 123", // U+FF41-3: FULLWIDTH LATIN SMALL LETTER A-C
+               want:    []int{4, 13},
+       }, {
+               // TODO: distinguish between case and width.
+               desc:    "don't ignore width to fullwidth, ignoring only case",
+               tag:     "und",
+               options: []Option{IgnoreCase},
+               pattern: "abc",
+               text:    "123 \uFF41\uFF42\uFF43 123", // U+FF41-3: FULLWIDTH LATIN SMALL LETTER A-C
+               want:    []int{4, 13},
+       }, {
+               desc:    "ignore width to fullwidth and diacritics",
+               tag:     "und",
+               options: []Option{IgnoreWidth, IgnoreDiacritics},
+               pattern: "abc",
+               text:    "123 \uFF41\uFF42\uFF43 123", // U+FF41-3: FULLWIDTH LATIN SMALL LETTER A-C
+               want:    []int{4, 13},
+       }, {
+               desc:    "whole grapheme, single rune",
+               tag:     "und",
+               pattern: "eee",
+               text:    "123 eeé 123",
+               want:    nil,
+       }, {
+               // Note: rules on when to apply contractions may, for certain languages,
+               // differ between search and collation. For example, "ch" is not
+               // considered a contraction for the purpose of searching in Spanish.
+               // Therefore, be careful picking this test.
+               desc:    "whole grapheme, contractions",
+               tag:     "da",
+               pattern: "aba",
+               // Fails at the primary level, because "aa" is a contraction.
+               text: "123 abaa 123",
+               want: []int{},
+       }, {
+               desc:    "whole grapheme, trailing modifier",
+               tag:     "und",
+               pattern: "eee",
+               text:    "123 eee\u0300 123", // U+0300: COMBINING GRAVE ACCENT
+               want:    nil,
+       }, {
+               // Language-specific matching.
+
+               desc:    "",
+               tag:     "da",
+               options: []Option{IgnoreCase},
+               pattern: "Århus",
+               text:    "AarhusÅrhus  Århus  ",
+               want:    []int{0, 6, 6, 12, 14, 20},
+       }, {
+               desc:    "",
+               tag:     "da",
+               options: []Option{IgnoreCase},
+               pattern: "Aarhus",
+               text:    "Århus Aarhus",
+               want:    []int{0, 6, 7, 13},
+       }, {
+               desc:    "",
+               tag:     "en", // Å does not match A for English.
+               options: []Option{IgnoreCase},
+               pattern: "Aarhus",
+               text:    "Århus",
+               want:    nil,
+       }, {
+               desc:    "ignore modifier in text",
+               options: []Option{IgnoreDiacritics},
+               tag:     "und",
+               pattern: "eee",
+               text:    "123 eee\u0300 123", // U+0300: COMBINING GRAVE ACCENT
+               want:    []int{4, 9},         // Matches on grapheme boundary.
+       }, {
+               desc:    "ignore multiple modifiers in text",
+               options: []Option{IgnoreDiacritics},
+               tag:     "und",
+               pattern: "eee",
+               text:    "123 eee\u0300\u0300 123", // U+0300: COMBINING GRAVE ACCENT
+               want:    []int{4, 11},              // Matches on grapheme boundary.
+       }, {
+               desc:    "ignore modifier in pattern",
+               options: []Option{IgnoreDiacritics},
+               tag:     "und",
+               pattern: "eee\u0300", // U+0300: COMBINING GRAVE ACCENT
+               text:    "123 eee 123",
+               want:    []int{4, 7},
+       }, {
+               desc:    "ignore multiple modifiers in pattern",
+               options: []Option{IgnoreDiacritics},
+               tag:     "und",
+               pattern: "eee\u0300\u0300", // U+0300: COMBINING GRAVE ACCENT
+               text:    "123 eee 123",
+               want:    []int{4, 7},
+       }, {
+               desc: "match non-normalized pattern",
+               tag:  "und",
+               // U+0300: COMBINING GRAVE ACCENT (CCC=230)
+               // U+031B: COMBINING HORN (CCC=216)
+               pattern: "eee\u0300\u031b",
+               text:    "123 eee\u031b\u0300 123",
+               want:    []int{4, 11},
+       }, {
+               desc: "match non-normalized text",
+               tag:  "und",
+               // U+0300: COMBINING GRAVE ACCENT (CCC=230)
+               // U+031B: COMBINING HORN (CCC=216)
+               pattern: "eee\u031b\u0300",
+               text:    "123 eee\u0300\u031b 123",
+               want:    []int{4, 11},
+       }} {
+               m := New(language.MustParse(tc.tag), tc.options...)
+               p := m.CompileString(tc.pattern)
+               for j := 0; j < len(tc.text); {
+                       start, end := p.IndexString(tc.text[j:])
+                       if start == -1 && end == -1 {
+                               j++
+                               continue
+                       }
+                       start += j
+                       end += j
+                       j = end
+                       if len(tc.want) == 0 {
+                               t.Errorf("%d:%s: found unexpected result [%d %d]", i, tc.desc, start, end)
+                               break
+                       }
+                       if tc.want[0] != start || tc.want[1] != end {
+                               t.Errorf("%d:%s: got [%d %d]; want %v", i, tc.desc, start, end, tc.want[:2])
+                               tc.want = tc.want[2:]
+                               break
+                       }
+                       tc.want = tc.want[2:]
+               }
+               if len(tc.want) != 0 {
+                       t.Errorf("%d:%s: %d extra results", i, tc.desc, len(tc.want)/2)
+               }
+       }
+}