1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
13 func TestNodeLabel(t *testing.T) {
14 for i, want := range nodeLabels {
15 got := nodeLabel(uint32(i))
17 t.Errorf("%d: got %q, want %q", i, got, want)
22 func TestFind(t *testing.T) {
23 testCases := []string{
49 for _, tc := range testCases {
50 got := find(tc, 0, numTLD)
52 for i := uint32(0); i < numTLD; i++ {
53 if tc == nodeLabel(i) {
59 t.Errorf("%q: got %d, want %d", tc, got, want)
64 func TestICANN(t *testing.T) {
65 testCases := map[string]bool{
68 "foo.dyndns.org": false,
69 "foo.go.dyndns.org": false,
70 "foo.blogspot.co.uk": false,
71 "foo.intranet": false,
73 for domain, want := range testCases {
74 _, got := PublicSuffix(domain)
76 t.Errorf("%q: got %v, want %v", domain, got, want)
81 var publicSuffixTestCases = []struct {
98 {"www.pb.ao", "pb.ao"},
99 {"www.xxx.yyy.zzz.pb.ao", "pb.ao"},
101 // The .ar rules are:
116 {"www.nic.ar", "ar"},
117 {"com.ar", "com.ar"},
118 {"www.com.ar", "com.ar"},
119 {"blogspot.com.ar", "blogspot.com.ar"},
120 {"www.blogspot.com.ar", "blogspot.com.ar"},
121 {"www.xxx.yyy.zzz.blogspot.com.ar", "blogspot.com.ar"},
122 {"logspot.com.ar", "com.ar"},
123 {"zlogspot.com.ar", "com.ar"},
124 {"zblogspot.com.ar", "com.ar"},
126 // The .arpa rules are:
135 {"www.arpa", "arpa"},
136 {"urn.arpa", "urn.arpa"},
137 {"www.urn.arpa", "urn.arpa"},
138 {"www.xxx.yyy.zzz.urn.arpa", "urn.arpa"},
140 // The relevant {kobe,kyoto}.jp rules are:
148 {"c.kobe.jp", "c.kobe.jp"},
149 {"b.c.kobe.jp", "c.kobe.jp"},
150 {"a.b.c.kobe.jp", "c.kobe.jp"},
151 {"city.kobe.jp", "kobe.jp"},
152 {"www.city.kobe.jp", "kobe.jp"},
153 {"kyoto.jp", "kyoto.jp"},
154 {"test.kyoto.jp", "kyoto.jp"},
155 {"ide.kyoto.jp", "ide.kyoto.jp"},
156 {"b.ide.kyoto.jp", "ide.kyoto.jp"},
157 {"a.b.ide.kyoto.jp", "ide.kyoto.jp"},
159 // The .tw rules are:
171 // 網路.tw (xn--zf0ao64a.tw)
172 // 組織.tw (xn--uc0atv.tw)
173 // 商業.tw (xn--czrw28b.tw)
177 {"www.aaa.tw", "tw"},
178 {"xn--czrw28b.aaa.tw", "tw"},
179 {"edu.tw", "edu.tw"},
180 {"www.edu.tw", "edu.tw"},
181 {"xn--czrw28b.edu.tw", "edu.tw"},
182 {"xn--czrw28b.tw", "xn--czrw28b.tw"},
183 {"www.xn--czrw28b.tw", "xn--czrw28b.tw"},
184 {"xn--uc0atv.xn--czrw28b.tw", "xn--czrw28b.tw"},
185 {"xn--kpry57d.tw", "tw"},
187 // The .uk rules are:
203 {"www.aaa.uk", "uk"},
205 {"www.mod.uk", "uk"},
207 {"mod.sch.uk", "mod.sch.uk"},
208 {"www.sch.uk", "www.sch.uk"},
209 {"blogspot.co.uk", "blogspot.co.uk"},
210 {"blogspot.nic.uk", "uk"},
211 {"blogspot.sch.uk", "blogspot.sch.uk"},
215 {"xn--p1ai", "xn--p1ai"},
216 {"aaa.xn--p1ai", "xn--p1ai"},
217 {"www.xxx.yyy.xn--p1ai", "xn--p1ai"},
219 // The .bd rules are:
222 {"www.bd", "www.bd"},
223 {"zzz.bd", "zzz.bd"},
224 {"www.zzz.bd", "zzz.bd"},
225 {"www.xxx.yyy.zzz.bd", "zzz.bd"},
227 // There are no .nosuchtld rules.
228 {"nosuchtld", "nosuchtld"},
229 {"foo.nosuchtld", "nosuchtld"},
230 {"bar.foo.nosuchtld", "nosuchtld"},
233 func BenchmarkPublicSuffix(b *testing.B) {
234 for i := 0; i < b.N; i++ {
235 for _, tc := range publicSuffixTestCases {
236 List.PublicSuffix(tc.domain)
241 func TestPublicSuffix(t *testing.T) {
242 for _, tc := range publicSuffixTestCases {
243 got := List.PublicSuffix(tc.domain)
245 t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
250 func TestSlowPublicSuffix(t *testing.T) {
251 for _, tc := range publicSuffixTestCases {
252 got := slowPublicSuffix(tc.domain)
254 t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
259 // slowPublicSuffix implements the canonical (but O(number of rules)) public
260 // suffix algorithm described at http://publicsuffix.org/list/.
262 // 1. Match domain against all rules and take note of the matching ones.
263 // 2. If no rules match, the prevailing rule is "*".
264 // 3. If more than one rule matches, the prevailing rule is the one which is an exception rule.
265 // 4. If there is no matching exception rule, the prevailing rule is the one with the most labels.
266 // 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label.
267 // 6. The public suffix is the set of labels from the domain which directly match the labels of the prevailing rule (joined by dots).
268 // 7. The registered or registrable domain is the public suffix plus one additional label.
270 // This function returns the public suffix, not the registrable domain, and so
271 // it stops after step 6.
272 func slowPublicSuffix(domain string) string {
273 match := func(rulePart, domainPart string) bool {
278 return rulePart[1:] == domainPart
280 return rulePart == domainPart
283 domainParts := strings.Split(domain, ".")
284 var matchingRules [][]string
287 for _, rule := range rules {
288 ruleParts := strings.Split(rule, ".")
289 if len(domainParts) < len(ruleParts) {
292 for i := range ruleParts {
293 rulePart := ruleParts[len(ruleParts)-1-i]
294 domainPart := domainParts[len(domainParts)-1-i]
295 if !match(rulePart, domainPart) {
299 matchingRules = append(matchingRules, ruleParts)
301 if len(matchingRules) == 0 {
302 matchingRules = append(matchingRules, []string{"*"})
304 sort.Sort(byPriority(matchingRules))
306 prevailing := matchingRules[0]
307 if prevailing[0][0] == '!' {
308 prevailing = prevailing[1:]
310 if prevailing[0][0] == '*' {
311 replaced := domainParts[len(domainParts)-len(prevailing)]
312 prevailing = append([]string{replaced}, prevailing[1:]...)
314 return strings.Join(prevailing, ".")
317 type byPriority [][]string
319 func (b byPriority) Len() int { return len(b) }
320 func (b byPriority) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
321 func (b byPriority) Less(i, j int) bool {
322 if b[i][0][0] == '!' {
325 if b[j][0][0] == '!' {
328 return len(b[i]) > len(b[j])
331 // eTLDPlusOneTestCases come from
332 // https://github.com/publicsuffix/list/blob/master/tests/test_psl.txt
333 var eTLDPlusOneTestCases = []struct {
340 {"example.example", "example.example"},
341 {"b.example.example", "example.example"},
342 {"a.b.example.example", "example.example"},
343 // TLD with only 1 rule.
345 {"domain.biz", "domain.biz"},
346 {"b.domain.biz", "domain.biz"},
347 {"a.b.domain.biz", "domain.biz"},
348 // TLD with some 2-level rules.
350 {"example.com", "example.com"},
351 {"b.example.com", "example.com"},
352 {"a.b.example.com", "example.com"},
354 {"example.uk.com", "example.uk.com"},
355 {"b.example.uk.com", "example.uk.com"},
356 {"a.b.example.uk.com", "example.uk.com"},
357 {"test.ac", "test.ac"},
358 // TLD with only 1 (wildcard) rule.
361 {"b.c.mm", "b.c.mm"},
362 {"a.b.c.mm", "b.c.mm"},
365 {"test.jp", "test.jp"},
366 {"www.test.jp", "test.jp"},
368 {"test.ac.jp", "test.ac.jp"},
369 {"www.test.ac.jp", "test.ac.jp"},
371 {"test.kyoto.jp", "test.kyoto.jp"},
372 {"ide.kyoto.jp", ""},
373 {"b.ide.kyoto.jp", "b.ide.kyoto.jp"},
374 {"a.b.ide.kyoto.jp", "b.ide.kyoto.jp"},
376 {"b.c.kobe.jp", "b.c.kobe.jp"},
377 {"a.b.c.kobe.jp", "b.c.kobe.jp"},
378 {"city.kobe.jp", "city.kobe.jp"},
379 {"www.city.kobe.jp", "city.kobe.jp"},
380 // TLD with a wildcard rule and exceptions.
383 {"b.test.ck", "b.test.ck"},
384 {"a.b.test.ck", "b.test.ck"},
385 {"www.ck", "www.ck"},
386 {"www.www.ck", "www.ck"},
389 {"test.us", "test.us"},
390 {"www.test.us", "test.us"},
392 {"test.ak.us", "test.ak.us"},
393 {"www.test.ak.us", "test.ak.us"},
395 {"test.k12.ak.us", "test.k12.ak.us"},
396 {"www.test.k12.ak.us", "test.k12.ak.us"},
397 // Punycoded IDN labels
398 {"xn--85x722f.com.cn", "xn--85x722f.com.cn"},
399 {"xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"},
400 {"www.xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"},
401 {"shishi.xn--55qx5d.cn", "shishi.xn--55qx5d.cn"},
402 {"xn--55qx5d.cn", ""},
403 {"xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"},
404 {"www.xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"},
405 {"shishi.xn--fiqs8s", "shishi.xn--fiqs8s"},
409 func TestEffectiveTLDPlusOne(t *testing.T) {
410 for _, tc := range eTLDPlusOneTestCases {
411 got, _ := EffectiveTLDPlusOne(tc.domain)
413 t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)