X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;ds=sidebyside;f=vendor%2Fgolang.org%2Fx%2Ftext%2Fsearch%2Fsearch.go;fp=vendor%2Fgolang.org%2Fx%2Ftext%2Fsearch%2Fsearch.go;h=894b6c6c2b44f3b631e5edf338d0a5d68b38c87f;hb=db158dcf09436b003defd333f1a665e7e051d820;hp=0000000000000000000000000000000000000000;hpb=d09b7a78d44dc259725902b8141cdba0d716b121;p=bytom%2Fvapor.git diff --git a/vendor/golang.org/x/text/search/search.go b/vendor/golang.org/x/text/search/search.go new file mode 100644 index 00000000..894b6c6c --- /dev/null +++ b/vendor/golang.org/x/text/search/search.go @@ -0,0 +1,237 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate go run ../collate/maketables.go -cldr=23 -unicode=6.2.0 -types=search,searchjl -package=search + +// Package search provides language-specific search and string matching. +// +// Natural language matching can be intricate. For example, Danish will insist +// "Århus" and "Aarhus" are the same name and Turkish will match I to ı (note +// the lack of a dot) in a case-insensitive match. This package handles such +// language-specific details. +// +// Text passed to any of the calls in this message does not need to be +// normalized. +package search // import "golang.org/x/text/search" + +import ( + "strings" + + "golang.org/x/text/internal/colltab" + "golang.org/x/text/language" +) + +// An Option configures a Matcher. +type Option func(*Matcher) + +var ( + // WholeWord restricts matches to complete words. The default is to match at + // the character level. + WholeWord Option = nil + + // Exact requires that two strings are their exact equivalent. For example + // å would not match aa in Danish. It overrides any of the ignore options. + Exact Option = nil + + // Loose causes case, diacritics and width to be ignored. + Loose Option = loose + + // IgnoreCase enables case-insensitive search. + IgnoreCase Option = ignoreCase + + // IgnoreDiacritics causes diacritics to be ignored ("ö" == "o"). + IgnoreDiacritics Option = ignoreDiacritics + + // IgnoreWidth equates narrow with wide variants. + IgnoreWidth Option = ignoreWidth +) + +func ignoreDiacritics(m *Matcher) { m.ignoreDiacritics = true } +func ignoreCase(m *Matcher) { m.ignoreCase = true } +func ignoreWidth(m *Matcher) { m.ignoreWidth = true } +func loose(m *Matcher) { + ignoreDiacritics(m) + ignoreCase(m) + ignoreWidth(m) +} + +var ( + // Supported lists the languages for which search differs from its parent. + Supported language.Coverage + + tags []language.Tag +) + +func init() { + ids := strings.Split(availableLocales, ",") + tags = make([]language.Tag, len(ids)) + for i, s := range ids { + tags[i] = language.Raw.MustParse(s) + } + Supported = language.NewCoverage(tags) +} + +// New returns a new Matcher for the given language and options. +func New(t language.Tag, opts ...Option) *Matcher { + m := &Matcher{ + w: getTable(locales[colltab.MatchLang(t, tags)]), + } + for _, f := range opts { + f(m) + } + return m +} + +// A Matcher implements language-specific string matching. +type Matcher struct { + w colltab.Weighter + ignoreCase bool + ignoreWidth bool + ignoreDiacritics bool +} + +// An IndexOption specifies how the Index methods of Pattern or Matcher should +// match the input. +type IndexOption byte + +const ( + // Anchor restricts the search to the start (or end for Backwards) of the + // text. + Anchor IndexOption = 1 << iota + + // Backwards starts the search from the end of the text. + Backwards + + anchorBackwards = Anchor | Backwards +) + +// Index reports the start and end position of the first occurrence of pat in b +// or -1, -1 if pat is not present. +func (m *Matcher) Index(b, pat []byte, opts ...IndexOption) (start, end int) { + // TODO: implement optimized version that does not use a pattern. + return m.Compile(pat).Index(b, opts...) +} + +// IndexString reports the start and end position of the first occurrence of pat +// in s or -1, -1 if pat is not present. +func (m *Matcher) IndexString(s, pat string, opts ...IndexOption) (start, end int) { + // TODO: implement optimized version that does not use a pattern. + return m.CompileString(pat).IndexString(s, opts...) +} + +// Equal reports whether a and b are equivalent. +func (m *Matcher) Equal(a, b []byte) bool { + _, end := m.Index(a, b, Anchor) + return end == len(a) +} + +// EqualString reports whether a and b are equivalent. +func (m *Matcher) EqualString(a, b string) bool { + _, end := m.IndexString(a, b, Anchor) + return end == len(a) +} + +// Compile compiles and returns a pattern that can be used for faster searching. +func (m *Matcher) Compile(b []byte) *Pattern { + p := &Pattern{m: m} + iter := colltab.Iter{Weighter: m.w} + for iter.SetInput(b); iter.Next(); { + } + p.ce = iter.Elems + p.deleteEmptyElements() + return p +} + +// CompileString compiles and returns a pattern that can be used for faster +// searching. +func (m *Matcher) CompileString(s string) *Pattern { + p := &Pattern{m: m} + iter := colltab.Iter{Weighter: m.w} + for iter.SetInputString(s); iter.Next(); { + } + p.ce = iter.Elems + p.deleteEmptyElements() + return p +} + +// A Pattern is a compiled search string. It is safe for concurrent use. +type Pattern struct { + m *Matcher + ce []colltab.Elem +} + +// Design note (TODO remove): +// The cost of retrieving collation elements for each rune, which is used for +// search as well, is not trivial. Also, algorithms like Boyer-Moore and +// Sunday require some additional precomputing. + +// Index reports the start and end position of the first occurrence of p in b +// or -1, -1 if p is not present. +func (p *Pattern) Index(b []byte, opts ...IndexOption) (start, end int) { + // Pick a large enough buffer such that we likely do not need to allocate + // and small enough to not cause too much overhead initializing. + var buf [8]colltab.Elem + + it := &colltab.Iter{ + Weighter: p.m.w, + Elems: buf[:0], + } + it.SetInput(b) + + var optMask IndexOption + for _, o := range opts { + optMask |= o + } + + switch optMask { + case 0: + return p.forwardSearch(it) + case Anchor: + return p.anchoredForwardSearch(it) + case Backwards, anchorBackwards: + panic("TODO: implement") + default: + panic("unrecognized option") + } +} + +// IndexString reports the start and end position of the first occurrence of p +// in s or -1, -1 if p is not present. +func (p *Pattern) IndexString(s string, opts ...IndexOption) (start, end int) { + // Pick a large enough buffer such that we likely do not need to allocate + // and small enough to not cause too much overhead initializing. + var buf [8]colltab.Elem + + it := &colltab.Iter{ + Weighter: p.m.w, + Elems: buf[:0], + } + it.SetInputString(s) + + var optMask IndexOption + for _, o := range opts { + optMask |= o + } + + switch optMask { + case 0: + return p.forwardSearch(it) + case Anchor: + return p.anchoredForwardSearch(it) + case Backwards, anchorBackwards: + panic("TODO: implement") + default: + panic("unrecognized option") + } +} + +// TODO: +// - Maybe IndexAll methods (probably not necessary). +// - Some way to match patterns in a Reader (a bit tricky). +// - Some fold transformer that folds text to comparable text, based on the +// search options. This is a common technique, though very different from the +// collation-based design of this package. It has a somewhat different use +// case, so probably makes sense to support both. Should probably be in a +// different package, though, as it uses completely different kind of tables +// (based on norm, cases, width and range tables.)