OSDN Git Service

wsboards.com noticed, had been removed due to spam (Thanks Chris)
[pukiwiki/pukiwiki_sandbox.git] / spam / SpamPickupTest.php
1 <?php
2 // $Id: SpamPickupTest.php,v 1.11 2009/01/04 08:56:07 henoheno Exp $
3 // Copyright (C) 2007-2009 heno
4 //
5 // Design test case for spam.php (called from runner.php)
6
7 if (! defined('SPAM_INI_FILE')) define('SPAM_INI_FILE', 'spam.ini.php');
8
9 require_once('spam_pickup.php');
10 require_once('PHPUnit/PHPUnit.php');
11
12 class SpamPickupTest extends PHPUnit_TestCase
13 {
14         function setup_string_null()
15         {
16                 return array(
17                         '[NULL]'        => NULL,
18                         '[TRUE]'        => TRUE,
19                         '[FALSE]'       => FALSE,
20                         '[array(foobar)]' => array('foobar'),
21                         '[]'            => '',
22                         '[0]'           => 0,
23                         '[1]'           => 1
24                 );
25         }
26
27         function testFunc_scheme_normalize()
28         {
29                 // Null
30                 foreach($this->setup_string_null() as $key => $value){
31                         $this->assertEquals('', scheme_normalize($value), $key);
32                 }
33
34                 // CASE
35                 $this->assertEquals('http', scheme_normalize('HTTP'));
36
37                 // Aliases
38                 $this->assertEquals('pop3',  scheme_normalize('pop'));
39                 $this->assertEquals('nntp',  scheme_normalize('news'));
40                 $this->assertEquals('imap',  scheme_normalize('imap4'));
41                 $this->assertEquals('nntps', scheme_normalize('snntp'));
42                 $this->assertEquals('nntps', scheme_normalize('snews'));
43                 $this->assertEquals('pop3s', scheme_normalize('spop3'));
44                 $this->assertEquals('pop3s', scheme_normalize('pops'));
45                 
46                 // Abbrevs
47                 $this->assertEquals('http',  scheme_normalize('ttp'));
48                 $this->assertEquals('https', scheme_normalize('ttps'));
49
50                 // Abbrevs considererd harmless
51                 $this->assertEquals('', scheme_normalize('ttp',  FALSE));
52                 $this->assertEquals('', scheme_normalize('ttps', FALSE));
53         }
54
55         function testFunc_host_normalize()
56         {
57                 // Invalid: Null
58                 foreach($this->setup_string_null() as $key => $value){
59                         $this->assertEquals('', host_normalize($value), $key);
60                 }
61
62                 // Hostname is case-insensitive
63                 $this->assertEquals('example.org', host_normalize('ExAMPle.ORG'));
64
65                 // Cut 'www' with traditional ASCII-based FQDN (destructive)
66                 $this->assertEquals('example.org', host_normalize('WWW.example.org'));
67
68                 // Don't cut 'www' with Non-ASCII-based string such as IDN
69                 $this->assertEquals("www.example.org\0foobar",
70                          host_normalize("WWW.example.org\0foobar"));
71         }
72
73         function testFunc_port_normalize()
74         {
75                 $scheme = 'dont_care';
76
77                 // 1st argument: Null
78                 $this->assertEquals('', port_normalize(NULL, $scheme));
79                 $this->assertEquals('', port_normalize(TRUE, $scheme));
80                 $this->assertEquals('', port_normalize(FALSE, $scheme));
81                 $this->assertEquals('', port_normalize(array('foobar'), $scheme));
82                 $this->assertEquals('', port_normalize('',   $scheme));
83
84                 // 1st argument: Known port
85                 $this->assertEquals('',    port_normalize(   -1, $scheme));
86                 $this->assertEquals(0,     port_normalize(    0, $scheme));
87                 $this->assertEquals(1,     port_normalize(    1, $scheme));
88                 $this->assertEquals('',    port_normalize(   21, 'ftp'));
89                 $this->assertEquals('',    port_normalize(   22, 'ssh'));
90                 $this->assertEquals('',    port_normalize(   23, 'telnet'));
91                 $this->assertEquals('',    port_normalize(   25, 'smtp'));
92                 $this->assertEquals('',    port_normalize(   69, 'tftp'));
93                 $this->assertEquals('',    port_normalize(   70, 'gopher'));
94                 $this->assertEquals('',    port_normalize(   79, 'finger'));
95                 $this->assertEquals('',    port_normalize(   80, 'http'));
96                 $this->assertEquals('',    port_normalize(  110, 'pop3'));
97                 $this->assertEquals('',    port_normalize(  115, 'sftp'));
98                 $this->assertEquals('',    port_normalize(  119, 'nntp'));
99                 $this->assertEquals('',    port_normalize(  143, 'imap'));
100                 $this->assertEquals('',    port_normalize(  194, 'irc'));
101                 $this->assertEquals('',    port_normalize(  210, 'wais'));
102                 $this->assertEquals('',    port_normalize(  443, 'https'));
103                 $this->assertEquals('',    port_normalize(  563, 'nntps'));
104                 $this->assertEquals('',    port_normalize(  873, 'rsync'));
105                 $this->assertEquals('',    port_normalize(  990, 'ftps'));
106                 $this->assertEquals('',    port_normalize(  992, 'telnets'));
107                 $this->assertEquals('',    port_normalize(  993, 'imaps'));
108                 $this->assertEquals('',    port_normalize(  994, 'ircs'));
109                 $this->assertEquals('',    port_normalize(  995, 'pop3s'));
110                 $this->assertEquals('',    port_normalize( 3306, 'mysql'));
111                 $this->assertEquals(8080,  port_normalize( 8080, $scheme));
112                 $this->assertEquals(65535, port_normalize(65535, $scheme));
113                 $this->assertEquals(65536, port_normalize(65536, $scheme)); // Seems not invalid in RFC
114
115                 // 1st argument: Invalid type
116                 $this->assertEquals('1x',  port_normalize('001', $scheme) . 'x');
117                 $this->assertEquals('',    port_normalize('+0',  $scheme));
118                 $this->assertEquals('',    port_normalize('0-1', $scheme)); // intval() says '0'
119                 $this->assertEquals('',    port_normalize('str', $scheme));
120
121                 // 2nd and 3rd argument: Null
122                 $this->assertEquals(80,    port_normalize(80, NULL,  TRUE));
123                 $this->assertEquals(80,    port_normalize(80, TRUE,  TRUE));
124                 $this->assertEquals(80,    port_normalize(80, FALSE, TRUE));
125                 $this->assertEquals(80,    port_normalize(80, array('foobar'), TRUE));
126                 $this->assertEquals(80,    port_normalize(80, '', TRUE));
127
128                 // 2nd and 3rd argument: Do $scheme_normalize
129                 $this->assertEquals('',    port_normalize(80,  'TTP',  TRUE));
130                 $this->assertEquals('',    port_normalize(110, 'POP',  TRUE));
131                 $this->assertEquals(80,    port_normalize(80,  'HTTP', FALSE));
132         }
133
134         function testFunc_path_normalize()
135         {
136                 // 1st argument: Null
137                 foreach($this->setup_string_null() as $key => $value){
138                         $this->assertEquals('/', path_normalize($value), $key);
139                 }
140
141                 // 1st argument: CASE sensitive
142                 $this->assertEquals('/ExAMPle', path_normalize('ExAMPle'));
143                 $this->assertEquals('/#hoge',   path_normalize('#hoge'));
144                 $this->assertEquals('/a/b/c/d', path_normalize('/a/b/./c////./d'));
145                 $this->assertEquals('/b/',      path_normalize('/a/../../../b/'));
146
147                 // 2nd argument
148                 $this->assertEquals('\\b\\c\\d\\', path_normalize('\\a\\..\\b\\.\\c\\\\.\\d\\', '\\'));
149                 $this->assertEquals('str1str3str', path_normalize('str1strstr2str..str3str', 'str'));
150                 $this->assertEquals('/do/../nothing/', path_normalize('/do/../nothing/', TRUE));
151                 $this->assertEquals('/do/../nothing/', path_normalize('/do/../nothing/', array('a')));
152                 $this->assertEquals('',            path_normalize(array('a'), array('b')));
153         }
154
155         function testFunc_query_normalize()
156         {
157                 // 1st argument: Null
158                 foreach($this->setup_string_null() as $key => $value){
159                         $this->assertEquals('', query_normalize($value), $key);
160                 }
161
162                 $this->assertEquals('a=0dd&b&c&d&f=d', query_normalize('&&&&f=d&b&d&c&a=0dd'));
163                 $this->assertEquals('eg=foobar',       query_normalize('nothing==&eg=dummy&eg=padding&eg=foobar'));
164         }
165
166         function testFunc_file_normalize()
167         {
168                 // 1st argument: Null
169                 foreach($this->setup_string_null() as $key => $value){
170                         $this->assertEquals('', file_normalize($value), $key);
171                 }
172
173                 // 1st argument: Cut DirectoryIndexes (Destructive)
174                 foreach(array(
175                         'default.htm',
176                         'default.html',
177                         'default.asp',
178                         'default.aspx',
179 \r                       'index',
180                         'index.htm',
181                         'index.html',
182                         'index.shtml',
183                         'index.jsp',
184                         'index.php',
185                         'index.php',
186                         'index.php3',
187                         'index.php4',
188                         'index.pl',
189                         'index.py',
190                         'index.rb',
191                         'index.cgi',
192
193                         // Apache 2.0.59 default 'index.html' variants
194                         'index.html.ca',
195                         'index.html.cz.iso8859-2',
196                         'index.html.de',
197                         'index.html.dk',
198                         'index.html.ee',
199                         'index.html.el',
200                         'index.html.en',
201                         'index.html.es',
202                         'index.html.et',
203                         'index.html.fr',
204                         'index.html.he.iso8859-8',
205                         'index.html.hr.iso8859-2',
206                         'index.html.it',
207                         'index.html.ja.iso2022-jp',
208                         'index.html.ko.euc-kr',
209                         'index.html.lb.utf8',
210                         'index.html.nl',
211                         'index.html.nn',
212                         'index.html.no',
213                         'index.html.po.iso8859-2',
214                         'index.html.pt',
215                         'index.html.pt-br',
216                         'index.html.ru.cp866',
217                         'index.html.ru.cp-1251',
218                         'index.html.ru.iso-ru',
219                         'index.html.ru.koi8-r',
220                         'index.html.ru.utf8',
221                         'index.html.sv',
222                         'index.html.var',       // default
223                         'index.html.zh-cn.gb2312',
224                         'index.html.zh-tw.big5',
225
226                         'index.html.po.iso8859-2',
227                         'index.html.zh-tw.big5',
228
229                         'index.ja.en.de.html',
230                 
231                         // .gz
232                         'index.html.ca.gz',
233                         'index.html.en.ja.ca.z',
234                 ) as $arg){
235                         $this->assertEquals('', file_normalize($arg));
236                 }
237
238                 //$this->assertEquals('foo/', file_normalize('foo/index.html'));
239
240                 //$this->assertEquals('ExAMPle', file_normalize('ExAMPle'));
241                 //$this->assertEquals('exe.exe', file_normalize('exe.exe'));
242                 //$this->assertEquals('sample.html', file_normalize('sample.html.en'));
243                 //$this->assertEquals('sample.html', file_normalize('sample.html.pt-br'));
244                 //$this->assertEquals('sample.html', file_normalize('sample.html.po.iso8859-2'));
245                 //$this->assertEquals('sample.html', file_normalize('sample.html.zh-tw.big5'));
246         }
247
248         function testFunc_uri_pickup()
249         {
250                 // 1st argument: Null
251                 foreach($this->setup_string_null() as $key => $value){
252                         $this->assertEquals(0, count(uri_pickup($value)), $key);
253                 }
254
255                 // 1st argument: Some
256                 $test_string = <<<EOF
257                         TTP://wwW.Example.Org#TTP_and_www
258                         https://nasty.example.org:443/foo/xxx#port443/slash
259                         sftp://foobar.example.org:80/dfsdfs#ftp_bat_port80
260                         ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm
261                         http://192.168.1.4:443#IPv4
262                         http://localhost/index.php?%2Fofficial&word=f
263 EOF;
264                 $results = uri_pickup_normalize(uri_pickup($test_string));
265                 $this->assertEquals(6, count($results));
266
267                 // ttp://wwW.Example.Org:80#TTP_and_www
268                 $this->assertEquals('http',           $results[0]['scheme']);
269                 $this->assertEquals('',               $results[0]['userinfo']);
270                 $this->assertEquals('example.org',    $results[0]['host']);
271                 $this->assertEquals('',               $results[0]['port']);
272                 $this->assertEquals('/',              $results[0]['path']);
273                 $this->assertEquals('',               $results[0]['file']);
274                 $this->assertEquals('',               $results[0]['query']);
275                 $this->assertEquals('ttp_and_www',    $results[0]['fragment']);
276
277                 // https://nasty.example.org:443/foo/xxx#port443/slash
278                 $this->assertEquals('https',          $results[1]['scheme']);
279                 $this->assertEquals('',               $results[1]['userinfo']);
280                 $this->assertEquals('nasty.example.org', $results[1]['host']);
281                 $this->assertEquals('',               $results[1]['port']);
282                 $this->assertEquals('/foo/',          $results[1]['path']);
283                 $this->assertEquals('xxx',            $results[1]['file']);
284                 $this->assertEquals('',               $results[1]['query']);
285                 $this->assertEquals('port443',        $results[1]['fragment']);
286
287                 // sftp://foobar.example.org:80/dfsdfs#sftp_bat_port80
288                 $this->assertEquals('sftp',           $results[2]['scheme']);
289                 $this->assertEquals('',               $results[2]['userinfo']);
290                 $this->assertEquals('foobar.example.org', $results[2]['host']);
291                 $this->assertEquals('80',             $results[2]['port']);
292                 $this->assertEquals('/',              $results[2]['path']);
293                 $this->assertEquals('dfsdfs',         $results[2]['file']);
294                 $this->assertEquals('',               $results[2]['query']);
295                 $this->assertEquals('ftp_bat_port80', $results[2]['fragment']);
296
297                 // ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm
298                 $this->assertEquals('ftp',            $results[3]['scheme']);
299                 $this->assertEquals('cnn.example.com&story=breaking_news', $results[3]['userinfo']);
300                 $this->assertEquals('10.0.0.1',       $results[3]['host']);
301                 $this->assertEquals('',               $results[3]['port']);
302                 $this->assertEquals('/',              $results[3]['path']);
303                 $this->assertEquals('top_story.htm',  $results[3]['file']);
304                 $this->assertEquals('',               $results[3]['query']);
305                 $this->assertEquals('',               $results[3]['fragment']);
306
307                 // http://192.168.1.4:443#IPv4
308                 $this->assertEquals('http',           $results[4]['scheme']);
309                 $this->assertEquals('',               $results[4]['userinfo']);
310                 $this->assertEquals('192.168.1.4',    $results[4]['host']);
311                 $this->assertEquals('443',            $results[4]['port']);
312                 $this->assertEquals('/',              $results[4]['path']);
313                 $this->assertEquals('',               $results[4]['file']);
314                 $this->assertEquals('',               $results[4]['query']);
315                 $this->assertEquals('ipv4',           $results[4]['fragment']);
316
317                 // http://localhost/index.php?%2Fofficial&word=f
318                 $this->assertEquals('http',           $results[5]['scheme']);
319                 $this->assertEquals('',               $results[5]['userinfo']);
320                 $this->assertEquals('localhost',      $results[5]['host']);
321                 $this->assertEquals('',               $results[5]['port']);
322                 $this->assertEquals('/',              $results[5]['path']);
323                 $this->assertEquals('',               $results[5]['file']);
324                 $this->assertEquals('%2fofficial&word=f', $results[5]['query']);
325                 $this->assertEquals('',               $results[5]['fragment']);
326
327
328                 // Specific tests ----
329
330                 // Divider: Back-slash
331                 $test_string = ' http:\\backslash.org\fobar.html ';
332                 $results = uri_pickup_normalize(uri_pickup($test_string));
333                 $this->assertEquals('backslash.org',  $results[0]['host']);
334
335                 // Divider: percent-encoded
336                 //$test_string = ' http%3A%2F%5Cpercent-encoded.org%5Cfobar.html ';
337                 //$results = uri_pickup_normalize(uri_pickup($test_string));
338                 //$this->assertEquals('percent-encoded.org',  $results[0]['host']);
339
340                 // Host: Without path
341                 $test_string = ' http://nopathstring.com ';
342                 $results = uri_pickup($test_string);
343                 $this->assertEquals('', $results[0]['path']);
344                 $this->assertEquals('', $results[0]['file']);
345                 $results[0]['path'] = '/';
346                 $this->assertEquals('', $results[0]['file'], '[Seems referense trouble]');
347                 //
348                 $results = uri_pickup($test_string);
349                 $results = uri_pickup_normalize($results);
350                 $this->assertEquals('/',$results[0]['path']);
351                 $this->assertEquals('', $results[0]['file']);
352
353                 // Host: Underscore
354                 $test_string = ' http://under_score.org/fobar.html ';
355                 $results = uri_pickup_normalize(uri_pickup($test_string));
356                 $this->assertEquals('under_score.org',$results[0]['host']);     // Not 'under'
357
358                 // Host: IPv4
359                 $test_string = ' http://192.168.0.1/fobar.html ';
360                 $results = uri_pickup_normalize(uri_pickup($test_string));
361                 $this->assertEquals('192.168.0.1',    $results[0]['host']);
362
363                 // Host: Starts
364                 $test_string = ' http://_sss/foo.html ';
365                 $results = uri_pickup_normalize(uri_pickup($test_string));
366                 $this->assertEquals('_sss',           $results[0]['host']);
367                 $this->assertEquals('foo.html',       $results[0]['file']);
368
369                 // Host: Ends
370                 $test_string = ' http://sss_/foo.html ';
371                 $results = uri_pickup_normalize(uri_pickup($test_string));
372                 $this->assertEquals('sss_',           $results[0]['host']);
373                 $this->assertEquals('foo.html',       $results[0]['file']);
374
375
376                 // Specific tests ---- Fails
377
378                 // Divider: Colon only (Too sensitive to capture)
379                 $test_string = ' http:colon.org ';
380                 $results = uri_pickup_normalize(uri_pickup($test_string));
381                 $this->assertEquals(0, count($results));
382
383                 // Host: Too short
384                 $test_string = ' http://s/foo.html http://ss/foo.html ';
385                 $results = uri_pickup_normalize(uri_pickup($test_string));
386                 $this->assertEquals(0, count($results));
387
388                 $test_string = ' http://sss/foo.html ';
389                 $results = uri_pickup_normalize(uri_pickup($test_string));
390                 $this->assertEquals('sss',            $results[0]['host']);
391                 $this->assertEquals('foo.html',       $results[0]['file']);
392
393                 // uri_pickup_normalize_pathfile()
394                 $test_string = ' http://example.com/path/to/directory-accidentally-not-ended-with-slash ';
395                 $results = uri_pickup_normalize_pathfile(uri_pickup($test_string));
396                 $this->assertEquals('/path/to/directory-accidentally-not-ended-with-slash', $results[0]['path']);
397                 $this->assertEquals(TRUE,  isset($results[0]['path']));
398                 $this->assertEquals(FALSE, isset($results[0]['file']));
399                 $this->assertEquals('http://example.com/path/to/directory-accidentally-not-ended-with-slash',
400                         uri_pickup_implode($results[0]));
401         }
402
403         function testFunc_spam_uri_pickup()
404         {
405                 // Divider: percent-encoded
406                 $test_string = ' http://victim.example.org/http%3A%2F%5Cnasty.example.org ';
407                 $results = spam_uri_pickup($test_string);
408                 $this->assertEquals('victim.example.org', $results[0]['host']);
409                 $this->assertEquals('nasty.example.org',  $results[1]['host']);
410
411                 // Domain exposure (site:)
412                 $test_string = ' http://search.example.org/?q=%20site:nasty.example.org ';
413                 $results = spam_uri_pickup($test_string);
414                 $this->assertEquals('nasty.example.org', $results[0]['host']);
415                 $this->assertEquals('search.example.org',  $results[1]['host']);
416                 
417                 // Domain exposure (%20site:)
418                 $test_string = ' http://search2.example.org/?q=%20site:nasty2.example.org ';
419                 $results = spam_uri_pickup($test_string);
420                 $this->assertEquals('nasty2.example.org', $results[0]['host']);
421                 $this->assertEquals('search2.example.org',  $results[1]['host']);
422         }
423
424         function testFunc_is_ip()
425         {
426                 // 1st argument: Null
427                 foreach($this->setup_string_null() as $key => $value){
428                         $this->assertEquals(FALSE, is_ip($value), $key);
429                 }
430
431                 // IPv4
432                 foreach(array(
433                                 '192.168.1.1',
434                         ) as $value){
435                         $this->assertEquals(4,  is_ip($value), $key, '[' . $value . ']');
436                 }
437
438                 // IPv6
439                 foreach(array(
440                                 '::',                           // 0:0:0:0:0:0:0:0
441                                 '::192.168.1.1',        // IPv4 within IPv6 network
442                         ) as $value){
443                         $this->assertEquals(6,  is_ip($value), $key, '[' . $value . ']');
444                 }
445
446                 // Invalid
447                 foreach(array(
448                                 '',
449                                 '.',
450                         ) as $value){
451                         $this->assertEquals(FALSE,      is_ip($value), $key, '[' . $value . ']');
452                 }
453         }
454
455         function testFunc_whois_responsibility()
456         {
457                 // 1st argument: Null
458                 foreach($this->setup_string_null() as $key => $value){
459                         $this->assertEquals('',        whois_responsibility($value), $key);
460                 }
461
462                 // 'act.edu.au' is known as 3rd level domain
463                 $this->AssertEquals('bar.act.edu.au', whois_responsibility('foo.bar.act.edu.au'));
464                 $this->AssertEquals('bar.act.edu.au', whois_responsibility('bar.act.edu.au'));
465                 $this->AssertEquals('act.edu.au',  whois_responsibility('act.edu.au'));
466                 $this->AssertEquals('edu.au',      whois_responsibility('edu.au'));
467                 $this->AssertEquals('au',          whois_responsibility('au'));
468
469                 // 'co.uk' is known as 2nd level domain
470                 $this->AssertEquals('bar.co.uk',   whois_responsibility('foo.bar.co.uk'));
471                 $this->AssertEquals('bar.co.uk',   whois_responsibility('bar.co.uk'));
472                 $this->AssertEquals('co.uk',       whois_responsibility('co.uk'));
473                 $this->AssertEquals('uk',          whois_responsibility('uk'));
474
475                 // 'bar.uk' is not 2nd level (implicit responsibility)
476                 $this->AssertEquals('bar.uk',      whois_responsibility('foo.bar.uk'));
477                 $this->AssertEquals('bar.uk',      whois_responsibility('bar.uk'));
478
479                 // IPv4
480                 $this->AssertEquals('192.168.0.1', whois_responsibility('192.168.0.1'));
481
482                 // Invalid Top-Level Domain (With implicit)
483                 $this->AssertEquals('bar.local',  whois_responsibility('foo.bar.local'));       // Implicit responsibility
484                 $this->AssertEquals('bar.local',  whois_responsibility('bar.local'));
485                 $this->AssertEquals('local',      whois_responsibility('local'));
486                 $this->AssertEquals('localhost',  whois_responsibility('localhost'));
487                 $this->AssertEquals('s',          whois_responsibility('s'));
488         }
489 }
490
491 ?>