vendor/golang.org/x/net/publicsuffix/gen.go

   1 // Copyright 2012 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // +build ignore
   6
   7 package main
   8
   9 // This program generates table.go and table_test.go based on the authoritative
  10 // public suffix list at https://publicsuffix.org/list/effective_tld_names.dat
  11 //
  12 // The version is derived from
  13 // https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat
  14 // and a human-readable form is at
  15 // https://github.com/publicsuffix/list/commits/master/public_suffix_list.dat
  16 //
  17 // To fetch a particular git revision, such as 5c70ccd250, pass
  18 // -url "https://raw.githubusercontent.com/publicsuffix/list/5c70ccd250/public_suffix_list.dat"
  19 // and -version "an explicit version string".
  20
  21 import (
  22         "bufio"
  23         "bytes"
  24         "flag"
  25         "fmt"
  26         "go/format"
  27         "io"
  28         "io/ioutil"
  29         "net/http"
  30         "os"
  31         "regexp"
  32         "sort"
  33         "strings"
  34
  35         "golang.org/x/net/idna"
  36 )
  37
  38 const (
  39         // These sum of these four values must be no greater than 32.
  40         nodesBitsChildren   = 9
  41         nodesBitsICANN      = 1
  42         nodesBitsTextOffset = 15
  43         nodesBitsTextLength = 6
  44
  45         // These sum of these four values must be no greater than 32.
  46         childrenBitsWildcard = 1
  47         childrenBitsNodeType = 2
  48         childrenBitsHi       = 14
  49         childrenBitsLo       = 14
  50 )
  51
  52 var (
  53         maxChildren   int
  54         maxTextOffset int
  55         maxTextLength int
  56         maxHi         uint32
  57         maxLo         uint32
  58 )
  59
  60 func max(a, b int) int {
  61         if a < b {
  62                 return b
  63         }
  64         return a
  65 }
  66
  67 func u32max(a, b uint32) uint32 {
  68         if a < b {
  69                 return b
  70         }
  71         return a
  72 }
  73
  74 const (
  75         nodeTypeNormal     = 0
  76         nodeTypeException  = 1
  77         nodeTypeParentOnly = 2
  78         numNodeType        = 3
  79 )
  80
  81 func nodeTypeStr(n int) string {
  82         switch n {
  83         case nodeTypeNormal:
  84                 return "+"
  85         case nodeTypeException:
  86                 return "!"
  87         case nodeTypeParentOnly:
  88                 return "o"
  89         }
  90         panic("unreachable")
  91 }
  92
  93 const (
  94         defaultURL   = "https://publicsuffix.org/list/effective_tld_names.dat"
  95         gitCommitURL = "https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat"
  96 )
  97
  98 var (
  99         labelEncoding = map[string]uint32{}
 100         labelsList    = []string{}
 101         labelsMap     = map[string]bool{}
 102         rules         = []string{}
 103
 104         // validSuffixRE is used to check that the entries in the public suffix
 105         // list are in canonical form (after Punycode encoding). Specifically,
 106         // capital letters are not allowed.
 107         validSuffixRE = regexp.MustCompile(`^[a-z0-9_\!\*\-\.]+$`)
 108
 109         shaRE  = regexp.MustCompile(`"sha":"([^"]+)"`)
 110         dateRE = regexp.MustCompile(`"committer":{[^{]+"date":"([^"]+)"`)
 111
 112         comments = flag.Bool("comments", false, "generate table.go comments, for debugging")
 113         subset   = flag.Bool("subset", false, "generate only a subset of the full table, for debugging")
 114         url      = flag.String("url", defaultURL, "URL of the publicsuffix.org list. If empty, stdin is read instead")
 115         v        = flag.Bool("v", false, "verbose output (to stderr)")
 116         version  = flag.String("version", "", "the effective_tld_names.dat version")
 117 )
 118
 119 func main() {
 120         if err := main1(); err != nil {
 121                 fmt.Fprintln(os.Stderr, err)
 122                 os.Exit(1)
 123         }
 124 }
 125
 126 func main1() error {
 127         flag.Parse()
 128         if nodesBitsTextLength+nodesBitsTextOffset+nodesBitsICANN+nodesBitsChildren > 32 {
 129                 return fmt.Errorf("not enough bits to encode the nodes table")
 130         }
 131         if childrenBitsLo+childrenBitsHi+childrenBitsNodeType+childrenBitsWildcard > 32 {
 132                 return fmt.Errorf("not enough bits to encode the children table")
 133         }
 134         if *version == "" {
 135                 if *url != defaultURL {
 136                         return fmt.Errorf("-version was not specified, and the -url is not the default one")
 137                 }
 138                 sha, date, err := gitCommit()
 139                 if err != nil {
 140                         return err
 141                 }
 142                 *version = fmt.Sprintf("publicsuffix.org's public_suffix_list.dat, git revision %s (%s)", sha, date)
 143         }
 144         var r io.Reader = os.Stdin
 145         if *url != "" {
 146                 res, err := http.Get(*url)
 147                 if err != nil {
 148                         return err
 149                 }
 150                 if res.StatusCode != http.StatusOK {
 151                         return fmt.Errorf("bad GET status for %s: %d", *url, res.Status)
 152                 }
 153                 r = res.Body
 154                 defer res.Body.Close()
 155         }
 156
 157         var root node
 158         icann := false
 159         br := bufio.NewReader(r)
 160         for {
 161                 s, err := br.ReadString('\n')
 162                 if err != nil {
 163                         if err == io.EOF {
 164                                 break
 165                         }
 166                         return err
 167                 }
 168                 s = strings.TrimSpace(s)
 169                 if strings.Contains(s, "BEGIN ICANN DOMAINS") {
 170                         icann = true
 171                         continue
 172                 }
 173                 if strings.Contains(s, "END ICANN DOMAINS") {
 174                         icann = false
 175                         continue
 176                 }
 177                 if s == "" || strings.HasPrefix(s, "//") {
 178                         continue
 179                 }
 180                 s, err = idna.ToASCII(s)
 181                 if err != nil {
 182                         return err
 183                 }
 184                 if !validSuffixRE.MatchString(s) {
 185                         return fmt.Errorf("bad publicsuffix.org list data: %q", s)
 186                 }
 187
 188                 if *subset {
 189                         switch {
 190                         case s == "ac.jp" || strings.HasSuffix(s, ".ac.jp"):
 191                         case s == "ak.us" || strings.HasSuffix(s, ".ak.us"):
 192                         case s == "ao" || strings.HasSuffix(s, ".ao"):
 193                         case s == "ar" || strings.HasSuffix(s, ".ar"):
 194                         case s == "arpa" || strings.HasSuffix(s, ".arpa"):
 195                         case s == "cy" || strings.HasSuffix(s, ".cy"):
 196                         case s == "dyndns.org" || strings.HasSuffix(s, ".dyndns.org"):
 197                         case s == "jp":
 198                         case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"):
 199                         case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"):
 200                         case s == "om" || strings.HasSuffix(s, ".om"):
 201                         case s == "uk" || strings.HasSuffix(s, ".uk"):
 202                         case s == "uk.com" || strings.HasSuffix(s, ".uk.com"):
 203                         case s == "tw" || strings.HasSuffix(s, ".tw"):
 204                         case s == "zw" || strings.HasSuffix(s, ".zw"):
 205                         case s == "xn--p1ai" || strings.HasSuffix(s, ".xn--p1ai"):
 206                                 // xn--p1ai is Russian-Cyrillic "рф".
 207                         default:
 208                                 continue
 209                         }
 210                 }
 211
 212                 rules = append(rules, s)
 213
 214                 nt, wildcard := nodeTypeNormal, false
 215                 switch {
 216                 case strings.HasPrefix(s, "*."):
 217                         s, nt = s[2:], nodeTypeParentOnly
 218                         wildcard = true
 219                 case strings.HasPrefix(s, "!"):
 220                         s, nt = s[1:], nodeTypeException
 221                 }
 222                 labels := strings.Split(s, ".")
 223                 for n, i := &root, len(labels)-1; i >= 0; i-- {
 224                         label := labels[i]
 225                         n = n.child(label)
 226                         if i == 0 {
 227                                 if nt != nodeTypeParentOnly && n.nodeType == nodeTypeParentOnly {
 228                                         n.nodeType = nt
 229                                 }
 230                                 n.icann = n.icann && icann
 231                                 n.wildcard = n.wildcard || wildcard
 232                         }
 233                         labelsMap[label] = true
 234                 }
 235         }
 236         labelsList = make([]string, 0, len(labelsMap))
 237         for label := range labelsMap {
 238                 labelsList = append(labelsList, label)
 239         }
 240         sort.Strings(labelsList)
 241
 242         if err := generate(printReal, &root, "table.go"); err != nil {
 243                 return err
 244         }
 245         if err := generate(printTest, &root, "table_test.go"); err != nil {
 246                 return err
 247         }
 248         return nil
 249 }
 250
 251 func generate(p func(io.Writer, *node) error, root *node, filename string) error {
 252         buf := new(bytes.Buffer)
 253         if err := p(buf, root); err != nil {
 254                 return err
 255         }
 256         b, err := format.Source(buf.Bytes())
 257         if err != nil {
 258                 return err
 259         }
 260         return ioutil.WriteFile(filename, b, 0644)
 261 }
 262
 263 func gitCommit() (sha, date string, retErr error) {
 264         res, err := http.Get(gitCommitURL)
 265         if err != nil {
 266                 return "", "", err
 267         }
 268         if res.StatusCode != http.StatusOK {
 269                 return "", "", fmt.Errorf("bad GET status for %s: %d", gitCommitURL, res.Status)
 270         }
 271         defer res.Body.Close()
 272         b, err := ioutil.ReadAll(res.Body)
 273         if err != nil {
 274                 return "", "", err
 275         }
 276         if m := shaRE.FindSubmatch(b); m != nil {
 277                 sha = string(m[1])
 278         }
 279         if m := dateRE.FindSubmatch(b); m != nil {
 280                 date = string(m[1])
 281         }
 282         if sha == "" || date == "" {
 283                 retErr = fmt.Errorf("could not find commit SHA and date in %s", gitCommitURL)
 284         }
 285         return sha, date, retErr
 286 }
 287
 288 func printTest(w io.Writer, n *node) error {
 289         fmt.Fprintf(w, "// generated by go run gen.go; DO NOT EDIT\n\n")
 290         fmt.Fprintf(w, "package publicsuffix\n\nvar rules = [...]string{\n")
 291         for _, rule := range rules {
 292                 fmt.Fprintf(w, "%q,\n", rule)
 293         }
 294         fmt.Fprintf(w, "}\n\nvar nodeLabels = [...]string{\n")
 295         if err := n.walk(w, printNodeLabel); err != nil {
 296                 return err
 297         }
 298         fmt.Fprintf(w, "}\n")
 299         return nil
 300 }
 301
 302 func printReal(w io.Writer, n *node) error {
 303         const header = `// generated by go run gen.go; DO NOT EDIT
 304
 305 package publicsuffix
 306
 307 const version = %q
 308
 309 const (
 310         nodesBitsChildren   = %d
 311         nodesBitsICANN      = %d
 312         nodesBitsTextOffset = %d
 313         nodesBitsTextLength = %d
 314
 315         childrenBitsWildcard = %d
 316         childrenBitsNodeType = %d
 317         childrenBitsHi       = %d
 318         childrenBitsLo       = %d
 319 )
 320
 321 const (
 322         nodeTypeNormal     = %d
 323         nodeTypeException  = %d
 324         nodeTypeParentOnly = %d
 325 )
 326
 327 // numTLD is the number of top level domains.
 328 const numTLD = %d
 329
 330 `
 331         fmt.Fprintf(w, header, *version,
 332                 nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength,
 333                 childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo,
 334                 nodeTypeNormal, nodeTypeException, nodeTypeParentOnly, len(n.children))
 335
 336         text := combineText(labelsList)
 337         if text == "" {
 338                 return fmt.Errorf("internal error: makeText returned no text")
 339         }
 340         for _, label := range labelsList {
 341                 offset, length := strings.Index(text, label), len(label)
 342                 if offset < 0 {
 343                         return fmt.Errorf("internal error: could not find %q in text %q", label, text)
 344                 }
 345                 maxTextOffset, maxTextLength = max(maxTextOffset, offset), max(maxTextLength, length)
 346                 if offset >= 1<<nodesBitsTextOffset {
 347                         return fmt.Errorf("text offset %d is too large, or nodeBitsTextOffset is too small", offset)
 348                 }
 349                 if length >= 1<<nodesBitsTextLength {
 350                         return fmt.Errorf("text length %d is too large, or nodeBitsTextLength is too small", length)
 351                 }
 352                 labelEncoding[label] = uint32(offset)<<nodesBitsTextLength | uint32(length)
 353         }
 354         fmt.Fprintf(w, "// Text is the combined text of all labels.\nconst text = ")
 355         for len(text) > 0 {
 356                 n, plus := len(text), ""
 357                 if n > 64 {
 358                         n, plus = 64, " +"
 359                 }
 360                 fmt.Fprintf(w, "%q%s\n", text[:n], plus)
 361                 text = text[n:]
 362         }
 363
 364         if err := n.walk(w, assignIndexes); err != nil {
 365                 return err
 366         }
 367
 368         fmt.Fprintf(w, `
 369
 370 // nodes is the list of nodes. Each node is represented as a uint32, which
 371 // encodes the node's children, wildcard bit and node type (as an index into
 372 // the children array), ICANN bit and text.
 373 //
 374 // If the table was generated with the -comments flag, there is a //-comment
 375 // after each node's data. In it is the nodes-array indexes of the children,
 376 // formatted as (n0x1234-n0x1256), with * denoting the wildcard bit. The
 377 // nodeType is printed as + for normal, ! for exception, and o for parent-only
 378 // nodes that have children but don't match a domain label in their own right.
 379 // An I denotes an ICANN domain.
 380 //
 381 // The layout within the uint32, from MSB to LSB, is:
 382 //      [%2d bits] unused
 383 //      [%2d bits] children index
 384 //      [%2d bits] ICANN bit
 385 //      [%2d bits] text index
 386 //      [%2d bits] text length
 387 var nodes = [...]uint32{
 388 `,
 389                 32-nodesBitsChildren-nodesBitsICANN-nodesBitsTextOffset-nodesBitsTextLength,
 390                 nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength)
 391         if err := n.walk(w, printNode); err != nil {
 392                 return err
 393         }
 394         fmt.Fprintf(w, `}
 395
 396 // children is the list of nodes' children, the parent's wildcard bit and the
 397 // parent's node type. If a node has no children then their children index
 398 // will be in the range [0, 6), depending on the wildcard bit and node type.
 399 //
 400 // The layout within the uint32, from MSB to LSB, is:
 401 //      [%2d bits] unused
 402 //      [%2d bits] wildcard bit
 403 //      [%2d bits] node type
 404 //      [%2d bits] high nodes index (exclusive) of children
 405 //      [%2d bits] low nodes index (inclusive) of children
 406 var children=[...]uint32{
 407 `,
 408                 32-childrenBitsWildcard-childrenBitsNodeType-childrenBitsHi-childrenBitsLo,
 409                 childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo)
 410         for i, c := range childrenEncoding {
 411                 s := "---------------"
 412                 lo := c & (1<<childrenBitsLo - 1)
 413                 hi := (c >> childrenBitsLo) & (1<<childrenBitsHi - 1)
 414                 if lo != hi {
 415                         s = fmt.Sprintf("n0x%04x-n0x%04x", lo, hi)
 416                 }
 417                 nodeType := int(c>>(childrenBitsLo+childrenBitsHi)) & (1<<childrenBitsNodeType - 1)
 418                 wildcard := c>>(childrenBitsLo+childrenBitsHi+childrenBitsNodeType) != 0
 419                 if *comments {
 420                         fmt.Fprintf(w, "0x%08x, // c0x%04x (%s)%s %s\n",
 421                                 c, i, s, wildcardStr(wildcard), nodeTypeStr(nodeType))
 422                 } else {
 423                         fmt.Fprintf(w, "0x%x,\n", c)
 424                 }
 425         }
 426         fmt.Fprintf(w, "}\n\n")
 427         fmt.Fprintf(w, "// max children %d (capacity %d)\n", maxChildren, 1<<nodesBitsChildren-1)
 428         fmt.Fprintf(w, "// max text offset %d (capacity %d)\n", maxTextOffset, 1<<nodesBitsTextOffset-1)
 429         fmt.Fprintf(w, "// max text length %d (capacity %d)\n", maxTextLength, 1<<nodesBitsTextLength-1)
 430         fmt.Fprintf(w, "// max hi %d (capacity %d)\n", maxHi, 1<<childrenBitsHi-1)
 431         fmt.Fprintf(w, "// max lo %d (capacity %d)\n", maxLo, 1<<childrenBitsLo-1)
 432         return nil
 433 }
 434
 435 type node struct {
 436         label    string
 437         nodeType int
 438         icann    bool
 439         wildcard bool
 440         // nodesIndex and childrenIndex are the index of this node in the nodes
 441         // and the index of its children offset/length in the children arrays.
 442         nodesIndex, childrenIndex int
 443         // firstChild is the index of this node's first child, or zero if this
 444         // node has no children.
 445         firstChild int
 446         // children are the node's children, in strictly increasing node label order.
 447         children []*node
 448 }
 449
 450 func (n *node) walk(w io.Writer, f func(w1 io.Writer, n1 *node) error) error {
 451         if err := f(w, n); err != nil {
 452                 return err
 453         }
 454         for _, c := range n.children {
 455                 if err := c.walk(w, f); err != nil {
 456                         return err
 457                 }
 458         }
 459         return nil
 460 }
 461
 462 // child returns the child of n with the given label. The child is created if
 463 // it did not exist beforehand.
 464 func (n *node) child(label string) *node {
 465         for _, c := range n.children {
 466                 if c.label == label {
 467                         return c
 468                 }
 469         }
 470         c := &node{
 471                 label:    label,
 472                 nodeType: nodeTypeParentOnly,
 473                 icann:    true,
 474         }
 475         n.children = append(n.children, c)
 476         sort.Sort(byLabel(n.children))
 477         return c
 478 }
 479
 480 type byLabel []*node
 481
 482 func (b byLabel) Len() int           { return len(b) }
 483 func (b byLabel) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
 484 func (b byLabel) Less(i, j int) bool { return b[i].label < b[j].label }
 485
 486 var nextNodesIndex int
 487
 488 // childrenEncoding are the encoded entries in the generated children array.
 489 // All these pre-defined entries have no children.
 490 var childrenEncoding = []uint32{
 491         0 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeNormal.
 492         1 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeException.
 493         2 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeParentOnly.
 494         4 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeNormal.
 495         5 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeException.
 496         6 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeParentOnly.
 497 }
 498
 499 var firstCallToAssignIndexes = true
 500
 501 func assignIndexes(w io.Writer, n *node) error {
 502         if len(n.children) != 0 {
 503                 // Assign nodesIndex.
 504                 n.firstChild = nextNodesIndex
 505                 for _, c := range n.children {
 506                         c.nodesIndex = nextNodesIndex
 507                         nextNodesIndex++
 508                 }
 509
 510                 // The root node's children is implicit.
 511                 if firstCallToAssignIndexes {
 512                         firstCallToAssignIndexes = false
 513                         return nil
 514                 }
 515
 516                 // Assign childrenIndex.
 517                 maxChildren = max(maxChildren, len(childrenEncoding))
 518                 if len(childrenEncoding) >= 1<<nodesBitsChildren {
 519                         return fmt.Errorf("children table size %d is too large, or nodeBitsChildren is too small", len(childrenEncoding))
 520                 }
 521                 n.childrenIndex = len(childrenEncoding)
 522                 lo := uint32(n.firstChild)
 523                 hi := lo + uint32(len(n.children))
 524                 maxLo, maxHi = u32max(maxLo, lo), u32max(maxHi, hi)
 525                 if lo >= 1<<childrenBitsLo {
 526                         return fmt.Errorf("children lo %d is too large, or childrenBitsLo is too small", lo)
 527                 }
 528                 if hi >= 1<<childrenBitsHi {
 529                         return fmt.Errorf("children hi %d is too large, or childrenBitsHi is too small", hi)
 530                 }
 531                 enc := hi<<childrenBitsLo | lo
 532                 enc |= uint32(n.nodeType) << (childrenBitsLo + childrenBitsHi)
 533                 if n.wildcard {
 534                         enc |= 1 << (childrenBitsLo + childrenBitsHi + childrenBitsNodeType)
 535                 }
 536                 childrenEncoding = append(childrenEncoding, enc)
 537         } else {
 538                 n.childrenIndex = n.nodeType
 539                 if n.wildcard {
 540                         n.childrenIndex += numNodeType
 541                 }
 542         }
 543         return nil
 544 }
 545
 546 func printNode(w io.Writer, n *node) error {
 547         for _, c := range n.children {
 548                 s := "---------------"
 549                 if len(c.children) != 0 {
 550                         s = fmt.Sprintf("n0x%04x-n0x%04x", c.firstChild, c.firstChild+len(c.children))
 551                 }
 552                 encoding := labelEncoding[c.label]
 553                 if c.icann {
 554                         encoding |= 1 << (nodesBitsTextLength + nodesBitsTextOffset)
 555                 }
 556                 encoding |= uint32(c.childrenIndex) << (nodesBitsTextLength + nodesBitsTextOffset + nodesBitsICANN)
 557                 if *comments {
 558                         fmt.Fprintf(w, "0x%08x, // n0x%04x c0x%04x (%s)%s %s %s %s\n",
 559                                 encoding, c.nodesIndex, c.childrenIndex, s, wildcardStr(c.wildcard),
 560                                 nodeTypeStr(c.nodeType), icannStr(c.icann), c.label,
 561                         )
 562                 } else {
 563                         fmt.Fprintf(w, "0x%x,\n", encoding)
 564                 }
 565         }
 566         return nil
 567 }
 568
 569 func printNodeLabel(w io.Writer, n *node) error {
 570         for _, c := range n.children {
 571                 fmt.Fprintf(w, "%q,\n", c.label)
 572         }
 573         return nil
 574 }
 575
 576 func icannStr(icann bool) string {
 577         if icann {
 578                 return "I"
 579         }
 580         return " "
 581 }
 582
 583 func wildcardStr(wildcard bool) string {
 584         if wildcard {
 585                 return "*"
 586         }
 587         return " "
 588 }
 589
 590 // combineText combines all the strings in labelsList to form one giant string.
 591 // Overlapping strings will be merged: "arpa" and "parliament" could yield
 592 // "arparliament".
 593 func combineText(labelsList []string) string {
 594         beforeLength := 0
 595         for _, s := range labelsList {
 596                 beforeLength += len(s)
 597         }
 598
 599         text := crush(removeSubstrings(labelsList))
 600         if *v {
 601                 fmt.Fprintf(os.Stderr, "crushed %d bytes to become %d bytes\n", beforeLength, len(text))
 602         }
 603         return text
 604 }
 605
 606 type byLength []string
 607
 608 func (s byLength) Len() int           { return len(s) }
 609 func (s byLength) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
 610 func (s byLength) Less(i, j int) bool { return len(s[i]) < len(s[j]) }
 611
 612 // removeSubstrings returns a copy of its input with any strings removed
 613 // that are substrings of other provided strings.
 614 func removeSubstrings(input []string) []string {
 615         // Make a copy of input.
 616         ss := append(make([]string, 0, len(input)), input...)
 617         sort.Sort(byLength(ss))
 618
 619         for i, shortString := range ss {
 620                 // For each string, only consider strings higher than it in sort order, i.e.
 621                 // of equal length or greater.
 622                 for _, longString := range ss[i+1:] {
 623                         if strings.Contains(longString, shortString) {
 624                                 ss[i] = ""
 625                                 break
 626                         }
 627                 }
 628         }
 629
 630         // Remove the empty strings.
 631         sort.Strings(ss)
 632         for len(ss) > 0 && ss[0] == "" {
 633                 ss = ss[1:]
 634         }
 635         return ss
 636 }
 637
 638 // crush combines a list of strings, taking advantage of overlaps. It returns a
 639 // single string that contains each input string as a substring.
 640 func crush(ss []string) string {
 641         maxLabelLen := 0
 642         for _, s := range ss {
 643                 if maxLabelLen < len(s) {
 644                         maxLabelLen = len(s)
 645                 }
 646         }
 647
 648         for prefixLen := maxLabelLen; prefixLen > 0; prefixLen-- {
 649                 prefixes := makePrefixMap(ss, prefixLen)
 650                 for i, s := range ss {
 651                         if len(s) <= prefixLen {
 652                                 continue
 653                         }
 654                         mergeLabel(ss, i, prefixLen, prefixes)
 655                 }
 656         }
 657
 658         return strings.Join(ss, "")
 659 }
 660
 661 // mergeLabel merges the label at ss[i] with the first available matching label
 662 // in prefixMap, where the last "prefixLen" characters in ss[i] match the first
 663 // "prefixLen" characters in the matching label.
 664 // It will merge ss[i] repeatedly until no more matches are available.
 665 // All matching labels merged into ss[i] are replaced by "".
 666 func mergeLabel(ss []string, i, prefixLen int, prefixes prefixMap) {
 667         s := ss[i]
 668         suffix := s[len(s)-prefixLen:]
 669         for _, j := range prefixes[suffix] {
 670                 // Empty strings mean "already used." Also avoid merging with self.
 671                 if ss[j] == "" || i == j {
 672                         continue
 673                 }
 674                 if *v {
 675                         fmt.Fprintf(os.Stderr, "%d-length overlap at (%4d,%4d): %q and %q share %q\n",
 676                                 prefixLen, i, j, ss[i], ss[j], suffix)
 677                 }
 678                 ss[i] += ss[j][prefixLen:]
 679                 ss[j] = ""
 680                 // ss[i] has a new suffix, so merge again if possible.
 681                 // Note: we only have to merge again at the same prefix length. Shorter
 682                 // prefix lengths will be handled in the next iteration of crush's for loop.
 683                 // Can there be matches for longer prefix lengths, introduced by the merge?
 684                 // I believe that any such matches would by necessity have been eliminated
 685                 // during substring removal or merged at a higher prefix length. For
 686                 // instance, in crush("abc", "cde", "bcdef"), combining "abc" and "cde"
 687                 // would yield "abcde", which could be merged with "bcdef." However, in
 688                 // practice "cde" would already have been elimintated by removeSubstrings.
 689                 mergeLabel(ss, i, prefixLen, prefixes)
 690                 return
 691         }
 692 }
 693
 694 // prefixMap maps from a prefix to a list of strings containing that prefix. The
 695 // list of strings is represented as indexes into a slice of strings stored
 696 // elsewhere.
 697 type prefixMap map[string][]int
 698
 699 // makePrefixMap constructs a prefixMap from a slice of strings.
 700 func makePrefixMap(ss []string, prefixLen int) prefixMap {
 701         prefixes := make(prefixMap)
 702         for i, s := range ss {
 703                 // We use < rather than <= because if a label matches on a prefix equal to
 704                 // its full length, that's actually a substring match handled by
 705                 // removeSubstrings.
 706                 if prefixLen < len(s) {
 707                         prefix := s[:prefixLen]
 708                         prefixes[prefix] = append(prefixes[prefix], i)
 709                 }
 710         }
 711
 712         return prefixes
 713 }