2 // $Id: SpamPickupTest.php,v 1.10 2009/01/02 10:37:47 henoheno Exp $
3 // Copyright (C) 2007 heno
5 // Design test case for spam.php (called from runner.php)
7 if (! defined('SPAM_INI_FILE')) define('SPAM_INI_FILE', 'spam.ini.php');
9 require_once('spam_pickup.php');
10 require_once('PHPUnit/PHPUnit.php');
12 class SpamPickupTest extends PHPUnit_TestCase
14 function setup_string_null()
20 '[array(foobar)]' => array('foobar'),
27 function testFunc_scheme_normalize()
30 foreach($this->setup_string_null() as $key => $value){
31 $this->assertEquals('', scheme_normalize($value), $key);
35 $this->assertEquals('http', scheme_normalize('HTTP'));
38 $this->assertEquals('pop3', scheme_normalize('pop'));
39 $this->assertEquals('nntp', scheme_normalize('news'));
40 $this->assertEquals('imap', scheme_normalize('imap4'));
41 $this->assertEquals('nntps', scheme_normalize('snntp'));
42 $this->assertEquals('nntps', scheme_normalize('snews'));
43 $this->assertEquals('pop3s', scheme_normalize('spop3'));
44 $this->assertEquals('pop3s', scheme_normalize('pops'));
47 $this->assertEquals('http', scheme_normalize('ttp'));
48 $this->assertEquals('https', scheme_normalize('ttps'));
50 // Abbrevs considererd harmless
51 $this->assertEquals('', scheme_normalize('ttp', FALSE));
52 $this->assertEquals('', scheme_normalize('ttps', FALSE));
55 function testFunc_host_normalize()
58 foreach($this->setup_string_null() as $key => $value){
59 $this->assertEquals('', host_normalize($value), $key);
62 // Hostname is case-insensitive
63 $this->assertEquals('example.org', host_normalize('ExAMPle.ORG'));
65 // Cut 'www' with traditional ASCII-based FQDN (destructive)
66 $this->assertEquals('example.org', host_normalize('WWW.example.org'));
68 // Don't cut 'www' with Non-ASCII-based string such as IDN
69 $this->assertEquals("www.example.org\0foobar",
70 host_normalize("WWW.example.org\0foobar"));
73 function testFunc_port_normalize()
75 $scheme = 'dont_care';
78 $this->assertEquals('', port_normalize(NULL, $scheme));
79 $this->assertEquals('', port_normalize(TRUE, $scheme));
80 $this->assertEquals('', port_normalize(FALSE, $scheme));
81 $this->assertEquals('', port_normalize(array('foobar'), $scheme));
82 $this->assertEquals('', port_normalize('', $scheme));
84 // 1st argument: Known port
85 $this->assertEquals('', port_normalize( -1, $scheme));
86 $this->assertEquals(0, port_normalize( 0, $scheme));
87 $this->assertEquals(1, port_normalize( 1, $scheme));
88 $this->assertEquals('', port_normalize( 21, 'ftp'));
89 $this->assertEquals('', port_normalize( 22, 'ssh'));
90 $this->assertEquals('', port_normalize( 23, 'telnet'));
91 $this->assertEquals('', port_normalize( 25, 'smtp'));
92 $this->assertEquals('', port_normalize( 69, 'tftp'));
93 $this->assertEquals('', port_normalize( 70, 'gopher'));
94 $this->assertEquals('', port_normalize( 79, 'finger'));
95 $this->assertEquals('', port_normalize( 80, 'http'));
96 $this->assertEquals('', port_normalize( 110, 'pop3'));
97 $this->assertEquals('', port_normalize( 115, 'sftp'));
98 $this->assertEquals('', port_normalize( 119, 'nntp'));
99 $this->assertEquals('', port_normalize( 143, 'imap'));
100 $this->assertEquals('', port_normalize( 194, 'irc'));
101 $this->assertEquals('', port_normalize( 210, 'wais'));
102 $this->assertEquals('', port_normalize( 443, 'https'));
103 $this->assertEquals('', port_normalize( 563, 'nntps'));
104 $this->assertEquals('', port_normalize( 873, 'rsync'));
105 $this->assertEquals('', port_normalize( 990, 'ftps'));
106 $this->assertEquals('', port_normalize( 992, 'telnets'));
107 $this->assertEquals('', port_normalize( 993, 'imaps'));
108 $this->assertEquals('', port_normalize( 994, 'ircs'));
109 $this->assertEquals('', port_normalize( 995, 'pop3s'));
110 $this->assertEquals('', port_normalize( 3306, 'mysql'));
111 $this->assertEquals(8080, port_normalize( 8080, $scheme));
112 $this->assertEquals(65535, port_normalize(65535, $scheme));
113 $this->assertEquals(65536, port_normalize(65536, $scheme)); // Seems not invalid in RFC
115 // 1st argument: Invalid type
116 $this->assertEquals('1x', port_normalize('001', $scheme) . 'x');
117 $this->assertEquals('', port_normalize('+0', $scheme));
118 $this->assertEquals('', port_normalize('0-1', $scheme)); // intval() says '0'
119 $this->assertEquals('', port_normalize('str', $scheme));
121 // 2nd and 3rd argument: Null
122 $this->assertEquals(80, port_normalize(80, NULL, TRUE));
123 $this->assertEquals(80, port_normalize(80, TRUE, TRUE));
124 $this->assertEquals(80, port_normalize(80, FALSE, TRUE));
125 $this->assertEquals(80, port_normalize(80, array('foobar'), TRUE));
126 $this->assertEquals(80, port_normalize(80, '', TRUE));
128 // 2nd and 3rd argument: Do $scheme_normalize
129 $this->assertEquals('', port_normalize(80, 'TTP', TRUE));
130 $this->assertEquals('', port_normalize(110, 'POP', TRUE));
131 $this->assertEquals(80, port_normalize(80, 'HTTP', FALSE));
134 function testFunc_path_normalize()
136 // 1st argument: Null
137 foreach($this->setup_string_null() as $key => $value){
138 $this->assertEquals('/', path_normalize($value), $key);
141 // 1st argument: CASE sensitive
142 $this->assertEquals('/ExAMPle', path_normalize('ExAMPle'));
143 $this->assertEquals('/#hoge', path_normalize('#hoge'));
144 $this->assertEquals('/a/b/c/d', path_normalize('/a/b/./c////./d'));
145 $this->assertEquals('/b/', path_normalize('/a/../../../b/'));
148 $this->assertEquals('\\b\\c\\d\\', path_normalize('\\a\\..\\b\\.\\c\\\\.\\d\\', '\\'));
149 $this->assertEquals('str1str3str', path_normalize('str1strstr2str..str3str', 'str'));
150 $this->assertEquals('/do/../nothing/', path_normalize('/do/../nothing/', TRUE));
151 $this->assertEquals('/do/../nothing/', path_normalize('/do/../nothing/', array('a')));
152 $this->assertEquals('', path_normalize(array('a'), array('b')));
155 function testFunc_query_normalize()
157 // 1st argument: Null
158 foreach($this->setup_string_null() as $key => $value){
159 $this->assertEquals('', query_normalize($value), $key);
162 $this->assertEquals('a=0dd&b&c&d&f=d', query_normalize('&&&&f=d&b&d&c&a=0dd'));
163 $this->assertEquals('eg=foobar', query_normalize('nothing==&eg=dummy&eg=padding&eg=foobar'));
166 function testFunc_file_normalize()
168 // 1st argument: Null
169 foreach($this->setup_string_null() as $key => $value){
170 $this->assertEquals('', file_normalize($value), $key);
173 // 1st argument: Cut DirectoryIndexes (Destructive)
193 // Apache 2.0.59 default 'index.html' variants
195 'index.html.cz.iso8859-2',
204 'index.html.he.iso8859-8',
205 'index.html.hr.iso8859-2',
207 'index.html.ja.iso2022-jp',
208 'index.html.ko.euc-kr',
209 'index.html.lb.utf8',
213 'index.html.po.iso8859-2',
216 'index.html.ru.cp866',
217 'index.html.ru.cp-1251',
218 'index.html.ru.iso-ru',
219 'index.html.ru.koi8-r',
220 'index.html.ru.utf8',
222 'index.html.var', // default
223 'index.html.zh-cn.gb2312',
224 'index.html.zh-tw.big5',
226 'index.html.po.iso8859-2',
227 'index.html.zh-tw.big5',
229 'index.ja.en.de.html',
233 'index.html.en.ja.ca.z',
235 $this->assertEquals('', file_normalize($arg));
238 //$this->assertEquals('foo/', file_normalize('foo/index.html'));
240 //$this->assertEquals('ExAMPle', file_normalize('ExAMPle'));
241 //$this->assertEquals('exe.exe', file_normalize('exe.exe'));
242 //$this->assertEquals('sample.html', file_normalize('sample.html.en'));
243 //$this->assertEquals('sample.html', file_normalize('sample.html.pt-br'));
244 //$this->assertEquals('sample.html', file_normalize('sample.html.po.iso8859-2'));
245 //$this->assertEquals('sample.html', file_normalize('sample.html.zh-tw.big5'));
248 function testFunc_uri_pickup()
250 // 1st argument: Null
251 foreach($this->setup_string_null() as $key => $value){
252 $this->assertEquals(0, count(uri_pickup($value)), $key);
255 // 1st argument: Some
256 $test_string = <<<EOF
257 TTP://wwW.Example.Org#TTP_and_www
258 https://nasty.example.org:443/foo/xxx#port443/slash
259 sftp://foobar.example.org:80/dfsdfs#ftp_bat_port80
260 ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm
261 http://192.168.1.4:443#IPv4
262 http://localhost/index.php?%2Fofficial&word=f
264 $results = uri_pickup_normalize(uri_pickup($test_string));
265 $this->assertEquals(6, count($results));
267 // ttp://wwW.Example.Org:80#TTP_and_www
268 $this->assertEquals('http', $results[0]['scheme']);
269 $this->assertEquals('', $results[0]['userinfo']);
270 $this->assertEquals('example.org', $results[0]['host']);
271 $this->assertEquals('', $results[0]['port']);
272 $this->assertEquals('/', $results[0]['path']);
273 $this->assertEquals('', $results[0]['file']);
274 $this->assertEquals('', $results[0]['query']);
275 $this->assertEquals('ttp_and_www', $results[0]['fragment']);
277 // https://nasty.example.org:443/foo/xxx#port443/slash
278 $this->assertEquals('https', $results[1]['scheme']);
279 $this->assertEquals('', $results[1]['userinfo']);
280 $this->assertEquals('nasty.example.org', $results[1]['host']);
281 $this->assertEquals('', $results[1]['port']);
282 $this->assertEquals('/foo/', $results[1]['path']);
283 $this->assertEquals('xxx', $results[1]['file']);
284 $this->assertEquals('', $results[1]['query']);
285 $this->assertEquals('port443', $results[1]['fragment']);
287 // sftp://foobar.example.org:80/dfsdfs#sftp_bat_port80
288 $this->assertEquals('sftp', $results[2]['scheme']);
289 $this->assertEquals('', $results[2]['userinfo']);
290 $this->assertEquals('foobar.example.org', $results[2]['host']);
291 $this->assertEquals('80', $results[2]['port']);
292 $this->assertEquals('/', $results[2]['path']);
293 $this->assertEquals('dfsdfs', $results[2]['file']);
294 $this->assertEquals('', $results[2]['query']);
295 $this->assertEquals('ftp_bat_port80', $results[2]['fragment']);
297 // ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm
298 $this->assertEquals('ftp', $results[3]['scheme']);
299 $this->assertEquals('cnn.example.com&story=breaking_news', $results[3]['userinfo']);
300 $this->assertEquals('10.0.0.1', $results[3]['host']);
301 $this->assertEquals('', $results[3]['port']);
302 $this->assertEquals('/', $results[3]['path']);
303 $this->assertEquals('top_story.htm', $results[3]['file']);
304 $this->assertEquals('', $results[3]['query']);
305 $this->assertEquals('', $results[3]['fragment']);
307 // http://192.168.1.4:443#IPv4
308 $this->assertEquals('http', $results[4]['scheme']);
309 $this->assertEquals('', $results[4]['userinfo']);
310 $this->assertEquals('192.168.1.4', $results[4]['host']);
311 $this->assertEquals('443', $results[4]['port']);
312 $this->assertEquals('/', $results[4]['path']);
313 $this->assertEquals('', $results[4]['file']);
314 $this->assertEquals('', $results[4]['query']);
315 $this->assertEquals('ipv4', $results[4]['fragment']);
317 // http://localhost/index.php?%2Fofficial&word=f
318 $this->assertEquals('http', $results[5]['scheme']);
319 $this->assertEquals('', $results[5]['userinfo']);
320 $this->assertEquals('localhost', $results[5]['host']);
321 $this->assertEquals('', $results[5]['port']);
322 $this->assertEquals('/', $results[5]['path']);
323 $this->assertEquals('', $results[5]['file']);
324 $this->assertEquals('%2fofficial&word=f', $results[5]['query']);
325 $this->assertEquals('', $results[5]['fragment']);
328 // Specific tests ----
330 // Divider: Back-slash
331 $test_string = ' http:\\backslash.org\fobar.html ';
332 $results = uri_pickup_normalize(uri_pickup($test_string));
333 $this->assertEquals('backslash.org', $results[0]['host']);
335 // Divider: percent-encoded
336 //$test_string = ' http%3A%2F%5Cpercent-encoded.org%5Cfobar.html ';
337 //$results = uri_pickup_normalize(uri_pickup($test_string));
338 //$this->assertEquals('percent-encoded.org', $results[0]['host']);
340 // Host: Without path
341 $test_string = ' http://nopathstring.com ';
342 $results = uri_pickup($test_string);
343 $this->assertEquals('', $results[0]['path']);
344 $this->assertEquals('', $results[0]['file']);
345 $results[0]['path'] = '/';
346 $this->assertEquals('', $results[0]['file'], '[Seems referense trouble]');
348 $results = uri_pickup($test_string);
349 $results = uri_pickup_normalize($results);
350 $this->assertEquals('/',$results[0]['path']);
351 $this->assertEquals('', $results[0]['file']);
354 $test_string = ' http://under_score.org/fobar.html ';
355 $results = uri_pickup_normalize(uri_pickup($test_string));
356 $this->assertEquals('under_score.org',$results[0]['host']); // Not 'under'
359 $test_string = ' http://192.168.0.1/fobar.html ';
360 $results = uri_pickup_normalize(uri_pickup($test_string));
361 $this->assertEquals('192.168.0.1', $results[0]['host']);
364 $test_string = ' http://_sss/foo.html ';
365 $results = uri_pickup_normalize(uri_pickup($test_string));
366 $this->assertEquals('_sss', $results[0]['host']);
367 $this->assertEquals('foo.html', $results[0]['file']);
370 $test_string = ' http://sss_/foo.html ';
371 $results = uri_pickup_normalize(uri_pickup($test_string));
372 $this->assertEquals('sss_', $results[0]['host']);
373 $this->assertEquals('foo.html', $results[0]['file']);
376 // Specific tests ---- Fails
378 // Divider: Colon only (Too sensitive to capture)
379 $test_string = ' http:colon.org ';
380 $results = uri_pickup_normalize(uri_pickup($test_string));
381 $this->assertEquals(0, count($results));
384 $test_string = ' http://s/foo.html http://ss/foo.html ';
385 $results = uri_pickup_normalize(uri_pickup($test_string));
386 $this->assertEquals(0, count($results));
388 $test_string = ' http://sss/foo.html ';
389 $results = uri_pickup_normalize(uri_pickup($test_string));
390 $this->assertEquals('sss', $results[0]['host']);
391 $this->assertEquals('foo.html', $results[0]['file']);
393 // uri_pickup_normalize_pathfile()
394 $test_string = ' http://example.com/path/to/directory-accidentally-not-ended-with-slash ';
395 $results = uri_pickup_normalize_pathfile(uri_pickup($test_string));
396 $this->assertEquals('/path/to/directory-accidentally-not-ended-with-slash', $results[0]['path']);
397 $this->assertEquals(TRUE, isset($results[0]['path']));
398 $this->assertEquals(FALSE, isset($results[0]['file']));
399 $this->assertEquals('http://example.com/path/to/directory-accidentally-not-ended-with-slash',
400 uri_pickup_implode($results[0]));
403 function testFunc_spam_uri_pickup()
405 // Divider: percent-encoded
406 $test_string = ' http://victim.example.org/http%3A%2F%5Cnasty.example.org ';
407 $results = spam_uri_pickup($test_string);
408 $this->assertEquals('victim.example.org', $results[0]['host']);
409 $this->assertEquals('nasty.example.org', $results[1]['host']);
411 // Domain exposure (site:)
412 $test_string = ' http://search.example.org/?q=%20site:nasty.example.org ';
413 $results = spam_uri_pickup($test_string);
414 $this->assertEquals('nasty.example.org', $results[0]['host']);
415 $this->assertEquals('search.example.org', $results[1]['host']);
417 // Domain exposure (%20site:)
418 $test_string = ' http://search2.example.org/?q=%20site:nasty2.example.org ';
419 $results = spam_uri_pickup($test_string);
420 $this->assertEquals('nasty2.example.org', $results[0]['host']);
421 $this->assertEquals('search2.example.org', $results[1]['host']);
424 function testFunc_is_ip()
426 // 1st argument: Null
427 foreach($this->setup_string_null() as $key => $value){
428 $this->assertEquals(FALSE, is_ip($value), $key);
435 $this->assertEquals(4, is_ip($value), $key, '[' . $value . ']');
440 '::', // 0:0:0:0:0:0:0:0
441 '::192.168.1.1', // IPv4 within IPv6 network
443 $this->assertEquals(6, is_ip($value), $key, '[' . $value . ']');
451 $this->assertEquals(FALSE, is_ip($value), $key, '[' . $value . ']');
455 function testFunc_whois_responsibility()
457 // 1st argument: Null
458 foreach($this->setup_string_null() as $key => $value){
459 $this->assertEquals('', whois_responsibility($value), $key);
462 // 'act.edu.au' is known as 3rd level domain
463 $this->AssertEquals('bar.act.edu.au', whois_responsibility('foo.bar.act.edu.au'));
464 $this->AssertEquals('bar.act.edu.au', whois_responsibility('bar.act.edu.au'));
465 $this->AssertEquals('act.edu.au', whois_responsibility('act.edu.au'));
466 $this->AssertEquals('edu.au', whois_responsibility('edu.au'));
467 $this->AssertEquals('au', whois_responsibility('au'));
469 // 'co.uk' is known as 2nd level domain
470 $this->AssertEquals('bar.co.uk', whois_responsibility('foo.bar.co.uk'));
471 $this->AssertEquals('bar.co.uk', whois_responsibility('bar.co.uk'));
472 $this->AssertEquals('co.uk', whois_responsibility('co.uk'));
473 $this->AssertEquals('uk', whois_responsibility('uk'));
475 // 'bar.uk' is not 2nd level (implicit responsibility)
476 $this->AssertEquals('bar.uk', whois_responsibility('foo.bar.uk'));
477 $this->AssertEquals('bar.uk', whois_responsibility('bar.uk'));
480 $this->AssertEquals('192.168.0.1', whois_responsibility('192.168.0.1'));
482 // Invalid Top-Level Domain (With implicit)
483 $this->AssertEquals('bar.local', whois_responsibility('foo.bar.local')); // Implicit responsibility
484 $this->AssertEquals('bar.local', whois_responsibility('bar.local'));
485 $this->AssertEquals('local', whois_responsibility('local'));
486 $this->AssertEquals('localhost', whois_responsibility('localhost'));
487 $this->AssertEquals('s', whois_responsibility('s'));