vendor/golang.org/x/text/collate/tools/colcmp/colcmp.go

   1 // Copyright 2012 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package main // import "golang.org/x/text/collate/tools/colcmp"
   6
   7 import (
   8         "bytes"
   9         "flag"
  10         "fmt"
  11         "io"
  12         "log"
  13         "os"
  14         "runtime/pprof"
  15         "sort"
  16         "strconv"
  17         "strings"
  18         "text/template"
  19         "time"
  20
  21         "golang.org/x/text/unicode/norm"
  22 )
  23
  24 var (
  25         doNorm  = flag.Bool("norm", false, "normalize input strings")
  26         cases   = flag.Bool("case", false, "generate case variants")
  27         verbose = flag.Bool("verbose", false, "print results")
  28         debug   = flag.Bool("debug", false, "output debug information")
  29         locales = flag.String("locale", "en_US", "the locale to use. May be a comma-separated list for some commands.")
  30         col     = flag.String("col", "go", "collator to test")
  31         gold    = flag.String("gold", "go", "collator used as the gold standard")
  32         usecmp  = flag.Bool("usecmp", false,
  33                 `use comparison instead of sort keys when sorting.  Must be "test", "gold" or "both"`)
  34         cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
  35         exclude    = flag.String("exclude", "", "exclude errors that contain any of the characters")
  36         limit      = flag.Int("limit", 5000000, "maximum number of samples to generate for one run")
  37 )
  38
  39 func failOnError(err error) {
  40         if err != nil {
  41                 log.Panic(err)
  42         }
  43 }
  44
  45 // Test holds test data for testing a locale-collator pair.
  46 // Test also provides functionality that is commonly used by the various commands.
  47 type Test struct {
  48         ctxt    *Context
  49         Name    string
  50         Locale  string
  51         ColName string
  52
  53         Col        Collator
  54         UseCompare bool
  55
  56         Input    []Input
  57         Duration time.Duration
  58
  59         start time.Time
  60         msg   string
  61         count int
  62 }
  63
  64 func (t *Test) clear() {
  65         t.Col = nil
  66         t.Input = nil
  67 }
  68
  69 const (
  70         msgGeneratingInput = "generating input"
  71         msgGeneratingKeys  = "generating keys"
  72         msgSorting         = "sorting"
  73 )
  74
  75 var lastLen = 0
  76
  77 func (t *Test) SetStatus(msg string) {
  78         if *debug || *verbose {
  79                 fmt.Printf("%s: %s...\n", t.Name, msg)
  80         } else if t.ctxt.out != nil {
  81                 fmt.Fprint(t.ctxt.out, strings.Repeat(" ", lastLen))
  82                 fmt.Fprint(t.ctxt.out, strings.Repeat("\b", lastLen))
  83                 fmt.Fprint(t.ctxt.out, msg, "...")
  84                 lastLen = len(msg) + 3
  85                 fmt.Fprint(t.ctxt.out, strings.Repeat("\b", lastLen))
  86         }
  87 }
  88
  89 // Start is used by commands to signal the start of an operation.
  90 func (t *Test) Start(msg string) {
  91         t.SetStatus(msg)
  92         t.count = 0
  93         t.msg = msg
  94         t.start = time.Now()
  95 }
  96
  97 // Stop is used by commands to signal the end of an operation.
  98 func (t *Test) Stop() (time.Duration, int) {
  99         d := time.Now().Sub(t.start)
 100         t.Duration += d
 101         if *debug || *verbose {
 102                 fmt.Printf("%s: %s done. (%.3fs /%dK ops)\n", t.Name, t.msg, d.Seconds(), t.count/1000)
 103         }
 104         return d, t.count
 105 }
 106
 107 // generateKeys generates sort keys for all the inputs.
 108 func (t *Test) generateKeys() {
 109         for i, s := range t.Input {
 110                 b := t.Col.Key(s)
 111                 t.Input[i].key = b
 112                 if *debug {
 113                         fmt.Printf("%s (%X): %X\n", string(s.UTF8), s.UTF16, b)
 114                 }
 115         }
 116 }
 117
 118 // Sort sorts the inputs. It generates sort keys if this is required by the
 119 // chosen sort method.
 120 func (t *Test) Sort() (tkey, tsort time.Duration, nkey, nsort int) {
 121         if *cpuprofile != "" {
 122                 f, err := os.Create(*cpuprofile)
 123                 failOnError(err)
 124                 pprof.StartCPUProfile(f)
 125                 defer pprof.StopCPUProfile()
 126         }
 127         if t.UseCompare || t.Col.Key(t.Input[0]) == nil {
 128                 t.Start(msgSorting)
 129                 sort.Sort(&testCompare{*t})
 130                 tsort, nsort = t.Stop()
 131         } else {
 132                 t.Start(msgGeneratingKeys)
 133                 t.generateKeys()
 134                 t.count = len(t.Input)
 135                 tkey, nkey = t.Stop()
 136                 t.Start(msgSorting)
 137                 sort.Sort(t)
 138                 tsort, nsort = t.Stop()
 139         }
 140         return
 141 }
 142
 143 func (t *Test) Swap(a, b int) {
 144         t.Input[a], t.Input[b] = t.Input[b], t.Input[a]
 145 }
 146
 147 func (t *Test) Less(a, b int) bool {
 148         t.count++
 149         return bytes.Compare(t.Input[a].key, t.Input[b].key) == -1
 150 }
 151
 152 func (t Test) Len() int {
 153         return len(t.Input)
 154 }
 155
 156 type testCompare struct {
 157         Test
 158 }
 159
 160 func (t *testCompare) Less(a, b int) bool {
 161         t.count++
 162         return t.Col.Compare(t.Input[a], t.Input[b]) == -1
 163 }
 164
 165 type testRestore struct {
 166         Test
 167 }
 168
 169 func (t *testRestore) Less(a, b int) bool {
 170         return t.Input[a].index < t.Input[b].index
 171 }
 172
 173 // GenerateInput generates input phrases for the locale tested by t.
 174 func (t *Test) GenerateInput() {
 175         t.Input = nil
 176         if t.ctxt.lastLocale != t.Locale {
 177                 gen := phraseGenerator{}
 178                 gen.init(t.Locale)
 179                 t.SetStatus(msgGeneratingInput)
 180                 t.ctxt.lastInput = nil // allow the previous value to be garbage collected.
 181                 t.Input = gen.generate(*doNorm)
 182                 t.ctxt.lastInput = t.Input
 183                 t.ctxt.lastLocale = t.Locale
 184         } else {
 185                 t.Input = t.ctxt.lastInput
 186                 for i := range t.Input {
 187                         t.Input[i].key = nil
 188                 }
 189                 sort.Sort(&testRestore{*t})
 190         }
 191 }
 192
 193 // Context holds all tests and settings translated from command line options.
 194 type Context struct {
 195         test []*Test
 196         last *Test
 197
 198         lastLocale string
 199         lastInput  []Input
 200
 201         out io.Writer
 202 }
 203
 204 func (ts *Context) Printf(format string, a ...interface{}) {
 205         ts.assertBuf()
 206         fmt.Fprintf(ts.out, format, a...)
 207 }
 208
 209 func (ts *Context) Print(a ...interface{}) {
 210         ts.assertBuf()
 211         fmt.Fprint(ts.out, a...)
 212 }
 213
 214 // assertBuf sets up an io.Writer for output, if it doesn't already exist.
 215 // In debug and verbose mode, output is buffered so that the regular output
 216 // will not interfere with the additional output.  Otherwise, output is
 217 // written directly to stdout for a more responsive feel.
 218 func (ts *Context) assertBuf() {
 219         if ts.out != nil {
 220                 return
 221         }
 222         if *debug || *verbose {
 223                 ts.out = &bytes.Buffer{}
 224         } else {
 225                 ts.out = os.Stdout
 226         }
 227 }
 228
 229 // flush flushes the contents of ts.out to stdout, if it is not stdout already.
 230 func (ts *Context) flush() {
 231         if ts.out != nil {
 232                 if _, ok := ts.out.(io.ReadCloser); !ok {
 233                         io.Copy(os.Stdout, ts.out.(io.Reader))
 234                 }
 235         }
 236 }
 237
 238 // parseTests creates all tests from command lines and returns
 239 // a Context to hold them.
 240 func parseTests() *Context {
 241         ctxt := &Context{}
 242         colls := strings.Split(*col, ",")
 243         for _, loc := range strings.Split(*locales, ",") {
 244                 loc = strings.TrimSpace(loc)
 245                 for _, name := range colls {
 246                         name = strings.TrimSpace(name)
 247                         col := getCollator(name, loc)
 248                         ctxt.test = append(ctxt.test, &Test{
 249                                 ctxt:       ctxt,
 250                                 Locale:     loc,
 251                                 ColName:    name,
 252                                 UseCompare: *usecmp,
 253                                 Col:        col,
 254                         })
 255                 }
 256         }
 257         return ctxt
 258 }
 259
 260 func (c *Context) Len() int {
 261         return len(c.test)
 262 }
 263
 264 func (c *Context) Test(i int) *Test {
 265         if c.last != nil {
 266                 c.last.clear()
 267         }
 268         c.last = c.test[i]
 269         return c.last
 270 }
 271
 272 func parseInput(args []string) []Input {
 273         input := []Input{}
 274         for _, s := range args {
 275                 rs := []rune{}
 276                 for len(s) > 0 {
 277                         var r rune
 278                         r, _, s, _ = strconv.UnquoteChar(s, '\'')
 279                         rs = append(rs, r)
 280                 }
 281                 s = string(rs)
 282                 if *doNorm {
 283                         s = norm.NFD.String(s)
 284                 }
 285                 input = append(input, makeInputString(s))
 286         }
 287         return input
 288 }
 289
 290 // A Command is an implementation of a colcmp command.
 291 type Command struct {
 292         Run   func(cmd *Context, args []string)
 293         Usage string
 294         Short string
 295         Long  string
 296 }
 297
 298 func (cmd Command) Name() string {
 299         return strings.SplitN(cmd.Usage, " ", 2)[0]
 300 }
 301
 302 var commands = []*Command{
 303         cmdSort,
 304         cmdBench,
 305         cmdRegress,
 306 }
 307
 308 const sortHelp = `
 309 Sort sorts a given list of strings.  Strings are separated by whitespace.
 310 `
 311
 312 var cmdSort = &Command{
 313         Run:   runSort,
 314         Usage: "sort <string>*",
 315         Short: "sort a given list of strings",
 316         Long:  sortHelp,
 317 }
 318
 319 func runSort(ctxt *Context, args []string) {
 320         input := parseInput(args)
 321         if len(input) == 0 {
 322                 log.Fatalf("Nothing to sort.")
 323         }
 324         if ctxt.Len() > 1 {
 325                 ctxt.Print("COLL  LOCALE RESULT\n")
 326         }
 327         for i := 0; i < ctxt.Len(); i++ {
 328                 t := ctxt.Test(i)
 329                 t.Input = append(t.Input, input...)
 330                 t.Sort()
 331                 if ctxt.Len() > 1 {
 332                         ctxt.Printf("%-5s %-5s  ", t.ColName, t.Locale)
 333                 }
 334                 for _, s := range t.Input {
 335                         ctxt.Print(string(s.UTF8), " ")
 336                 }
 337                 ctxt.Print("\n")
 338         }
 339 }
 340
 341 const benchHelp = `
 342 Bench runs a benchmark for the given list of collator implementations.
 343 If no collator implementations are given, the go collator will be used.
 344 `
 345
 346 var cmdBench = &Command{
 347         Run:   runBench,
 348         Usage: "bench",
 349         Short: "benchmark a given list of collator implementations",
 350         Long:  benchHelp,
 351 }
 352
 353 func runBench(ctxt *Context, args []string) {
 354         ctxt.Printf("%-7s %-5s %-6s %-24s %-24s %-5s %s\n", "LOCALE", "COLL", "N", "KEYS", "SORT", "AVGLN", "TOTAL")
 355         for i := 0; i < ctxt.Len(); i++ {
 356                 t := ctxt.Test(i)
 357                 ctxt.Printf("%-7s %-5s ", t.Locale, t.ColName)
 358                 t.GenerateInput()
 359                 ctxt.Printf("%-6s ", fmt.Sprintf("%dK", t.Len()/1000))
 360                 tkey, tsort, nkey, nsort := t.Sort()
 361                 p := func(dur time.Duration, n int) {
 362                         s := ""
 363                         if dur > 0 {
 364                                 s = fmt.Sprintf("%6.3fs ", dur.Seconds())
 365                                 if n > 0 {
 366                                         s += fmt.Sprintf("%15s", fmt.Sprintf("(%4.2f ns/op)", float64(dur)/float64(n)))
 367                                 }
 368                         }
 369                         ctxt.Printf("%-24s ", s)
 370                 }
 371                 p(tkey, nkey)
 372                 p(tsort, nsort)
 373
 374                 total := 0
 375                 for _, s := range t.Input {
 376                         total += len(s.key)
 377                 }
 378                 ctxt.Printf("%-5d ", total/t.Len())
 379                 ctxt.Printf("%6.3fs\n", t.Duration.Seconds())
 380                 if *debug {
 381                         for _, s := range t.Input {
 382                                 fmt.Print(string(s.UTF8), " ")
 383                         }
 384                         fmt.Println()
 385                 }
 386         }
 387 }
 388
 389 const regressHelp = `
 390 Regress runs a monkey test by comparing the results of randomly generated tests
 391 between two implementations of a collator. The user may optionally pass a list
 392 of strings to regress against instead of the default test set.
 393 `
 394
 395 var cmdRegress = &Command{
 396         Run:   runRegress,
 397         Usage: "regress -gold=<col> -test=<col> [string]*",
 398         Short: "run a monkey test between two collators",
 399         Long:  regressHelp,
 400 }
 401
 402 const failedKeyCompare = `
 403 %s:%d: incorrect comparison result for input:
 404     a:   %q (%.4X)
 405     key: %s
 406     b:   %q (%.4X)
 407     key: %s
 408     Compare(a, b) = %d; want %d.
 409
 410   gold keys:
 411         a:   %s
 412         b:   %s
 413 `
 414
 415 const failedCompare = `
 416 %s:%d: incorrect comparison result for input:
 417     a:   %q (%.4X)
 418     b:   %q (%.4X)
 419     Compare(a, b) = %d; want %d.
 420 `
 421
 422 func keyStr(b []byte) string {
 423         buf := &bytes.Buffer{}
 424         for _, v := range b {
 425                 fmt.Fprintf(buf, "%.2X ", v)
 426         }
 427         return buf.String()
 428 }
 429
 430 func runRegress(ctxt *Context, args []string) {
 431         input := parseInput(args)
 432         for i := 0; i < ctxt.Len(); i++ {
 433                 t := ctxt.Test(i)
 434                 if len(input) > 0 {
 435                         t.Input = append(t.Input, input...)
 436                 } else {
 437                         t.GenerateInput()
 438                 }
 439                 t.Sort()
 440                 count := 0
 441                 gold := getCollator(*gold, t.Locale)
 442                 for i := 1; i < len(t.Input); i++ {
 443                         ia := t.Input[i-1]
 444                         ib := t.Input[i]
 445                         if bytes.IndexAny(ib.UTF8, *exclude) != -1 {
 446                                 i++
 447                                 continue
 448                         }
 449                         if bytes.IndexAny(ia.UTF8, *exclude) != -1 {
 450                                 continue
 451                         }
 452                         goldCmp := gold.Compare(ia, ib)
 453                         if cmp := bytes.Compare(ia.key, ib.key); cmp != goldCmp {
 454                                 count++
 455                                 a := string(ia.UTF8)
 456                                 b := string(ib.UTF8)
 457                                 fmt.Printf(failedKeyCompare, t.Locale, i-1, a, []rune(a), keyStr(ia.key), b, []rune(b), keyStr(ib.key), cmp, goldCmp, keyStr(gold.Key(ia)), keyStr(gold.Key(ib)))
 458                         } else if cmp := t.Col.Compare(ia, ib); cmp != goldCmp {
 459                                 count++
 460                                 a := string(ia.UTF8)
 461                                 b := string(ib.UTF8)
 462                                 fmt.Printf(failedCompare, t.Locale, i-1, a, []rune(a), b, []rune(b), cmp, goldCmp)
 463                         }
 464                 }
 465                 if count > 0 {
 466                         ctxt.Printf("Found %d inconsistencies in %d entries.\n", count, t.Len()-1)
 467                 }
 468         }
 469 }
 470
 471 const helpTemplate = `
 472 colcmp is a tool for testing and benchmarking collation
 473
 474 Usage: colcmp command [arguments]
 475
 476 The commands are:
 477 {{range .}}
 478     {{.Name | printf "%-11s"}} {{.Short}}{{end}}
 479
 480 Use "col help [topic]" for more information about that topic.
 481 `
 482
 483 const detailedHelpTemplate = `
 484 Usage: colcmp {{.Usage}}
 485
 486 {{.Long | trim}}
 487 `
 488
 489 func runHelp(args []string) {
 490         t := template.New("help")
 491         t.Funcs(template.FuncMap{"trim": strings.TrimSpace})
 492         if len(args) < 1 {
 493                 template.Must(t.Parse(helpTemplate))
 494                 failOnError(t.Execute(os.Stderr, &commands))
 495         } else {
 496                 for _, cmd := range commands {
 497                         if cmd.Name() == args[0] {
 498                                 template.Must(t.Parse(detailedHelpTemplate))
 499                                 failOnError(t.Execute(os.Stderr, cmd))
 500                                 os.Exit(0)
 501                         }
 502                 }
 503                 log.Fatalf("Unknown command %q. Run 'colcmp help'.", args[0])
 504         }
 505         os.Exit(0)
 506 }
 507
 508 func main() {
 509         flag.Parse()
 510         log.SetFlags(0)
 511
 512         ctxt := parseTests()
 513
 514         if flag.NArg() < 1 {
 515                 runHelp(nil)
 516         }
 517         args := flag.Args()[1:]
 518         if flag.Arg(0) == "help" {
 519                 runHelp(args)
 520         }
 521         for _, cmd := range commands {
 522                 if cmd.Name() == flag.Arg(0) {
 523                         cmd.Run(ctxt, args)
 524                         ctxt.flush()
 525                         return
 526                 }
 527         }
 528         runHelp(flag.Args())
 529 }