OSDN Git Service

host_normalize(): Don't cut 'www' with Non-ASCII-based string such as IDN
[pukiwiki/pukiwiki_sandbox.git] / spam / SpamPickupTest.php
1 <?php
2 // $Id: SpamPickupTest.php,v 1.5 2008/12/27 11:50:55 henoheno Exp $
3 // Copyright (C) 2007 heno
4 //
5 // Design test case for spam.php (called from runner.php)
6
7 if (! defined('SPAM_INI_FILE')) define('SPAM_INI_FILE', 'spam.ini.php');
8
9 require_once('spam_pickup.php');
10 require_once('PHPUnit/PHPUnit.php');
11
12 class SpamPickupTest extends PHPUnit_TestCase
13 {
14         function setup_string_null()
15         {
16                 return array(
17                         '[NULL]'        => NULL,
18                         '[TRUE]'        => TRUE,
19                         '[FALSE]'       => FALSE,
20                         '[array(foobar)]' => array('foobar'),
21                         '[]'            => '',
22                         '[0]'           => 0,
23                         '[1]'           => 1
24                 );
25         }
26
27         function testFunc_scheme_normalize()
28         {
29                 // Null
30                 foreach($this->setup_string_null() as $key => $value){
31                         $this->assertEquals('', scheme_normalize($value), $key);
32                 }
33
34                 // CASE
35                 $this->assertEquals('http', scheme_normalize('HTTP'));
36
37                 // Aliases
38                 $this->assertEquals('pop3',  scheme_normalize('pop'));
39                 $this->assertEquals('nntp',  scheme_normalize('news'));
40                 $this->assertEquals('imap',  scheme_normalize('imap4'));
41                 $this->assertEquals('nntps', scheme_normalize('snntp'));
42                 $this->assertEquals('nntps', scheme_normalize('snews'));
43                 $this->assertEquals('pop3s', scheme_normalize('spop3'));
44                 $this->assertEquals('pop3s', scheme_normalize('pops'));
45                 
46                 // Abbrevs
47                 $this->assertEquals('http',  scheme_normalize('ttp'));
48                 $this->assertEquals('https', scheme_normalize('ttps'));
49
50                 // Abbrevs considererd harmless
51                 $this->assertEquals('', scheme_normalize('ttp',  FALSE));
52                 $this->assertEquals('', scheme_normalize('ttps', FALSE));
53         }
54
55         function testFunc_host_normalize()
56         {
57                 // Invalid: Null
58                 foreach($this->setup_string_null() as $key => $value){
59                         $this->assertEquals('', host_normalize($value), $key);
60                 }
61
62                 // Hostname is case-insensitive
63                 $this->assertEquals('example.org', host_normalize('ExAMPle.ORG'));
64
65                 // Cut 'www' with traditional ASCII-based FQDN (destructive)
66                 $this->assertEquals('example.org', host_normalize('WWW.example.org'));
67
68                 // Don't cut 'www' with Non-ASCII-based string such as IDN
69                 $this->assertEquals("www.example.org\0foobar",
70                          host_normalize("WWW.example.org\0foobar"));
71         }
72
73         function testFunc_port_normalize()
74         {
75                 $scheme = 'dont_care';
76
77                 // 1st argument: Null
78                 $this->assertEquals('', port_normalize(NULL, $scheme));
79                 $this->assertEquals('', port_normalize(TRUE, $scheme));
80                 $this->assertEquals('', port_normalize(FALSE, $scheme));
81                 $this->assertEquals('', port_normalize(array('foobar'), $scheme));
82                 $this->assertEquals('', port_normalize('',   $scheme));
83
84                 // 1st argument: Known port
85                 $this->assertEquals('',    port_normalize(   -1, $scheme));
86                 $this->assertEquals(0,     port_normalize(    0, $scheme));
87                 $this->assertEquals(1,     port_normalize(    1, $scheme));
88                 $this->assertEquals('',    port_normalize(   21, 'ftp'));
89                 $this->assertEquals('',    port_normalize(   22, 'ssh'));
90                 $this->assertEquals('',    port_normalize(   23, 'telnet'));
91                 $this->assertEquals('',    port_normalize(   25, 'smtp'));
92                 $this->assertEquals('',    port_normalize(   69, 'tftp'));
93                 $this->assertEquals('',    port_normalize(   70, 'gopher'));
94                 $this->assertEquals('',    port_normalize(   79, 'finger'));
95                 $this->assertEquals('',    port_normalize(   80, 'http'));
96                 $this->assertEquals('',    port_normalize(  110, 'pop3'));
97                 $this->assertEquals('',    port_normalize(  115, 'sftp'));
98                 $this->assertEquals('',    port_normalize(  119, 'nntp'));
99                 $this->assertEquals('',    port_normalize(  143, 'imap'));
100                 $this->assertEquals('',    port_normalize(  194, 'irc'));
101                 $this->assertEquals('',    port_normalize(  210, 'wais'));
102                 $this->assertEquals('',    port_normalize(  443, 'https'));
103                 $this->assertEquals('',    port_normalize(  563, 'nntps'));
104                 $this->assertEquals('',    port_normalize(  873, 'rsync'));
105                 $this->assertEquals('',    port_normalize(  990, 'ftps'));
106                 $this->assertEquals('',    port_normalize(  992, 'telnets'));
107                 $this->assertEquals('',    port_normalize(  993, 'imaps'));
108                 $this->assertEquals('',    port_normalize(  994, 'ircs'));
109                 $this->assertEquals('',    port_normalize(  995, 'pop3s'));
110                 $this->assertEquals('',    port_normalize( 3306, 'mysql'));
111                 $this->assertEquals(8080,  port_normalize( 8080, $scheme));
112                 $this->assertEquals(65535, port_normalize(65535, $scheme));
113                 $this->assertEquals(65536, port_normalize(65536, $scheme)); // Seems not invalid in RFC
114
115                 // 1st argument: Invalid type
116                 $this->assertEquals('1x',  port_normalize('001', $scheme) . 'x');
117                 $this->assertEquals('',    port_normalize('+0',  $scheme));
118                 $this->assertEquals('',    port_normalize('0-1', $scheme)); // intval() says '0'
119                 $this->assertEquals('',    port_normalize('str', $scheme));
120
121                 // 2nd and 3rd argument: Null
122                 $this->assertEquals(80,    port_normalize(80, NULL,  TRUE));
123                 $this->assertEquals(80,    port_normalize(80, TRUE,  TRUE));
124                 $this->assertEquals(80,    port_normalize(80, FALSE, TRUE));
125                 $this->assertEquals(80,    port_normalize(80, array('foobar'), TRUE));
126                 $this->assertEquals(80,    port_normalize(80, '', TRUE));
127
128                 // 2nd and 3rd argument: Do $scheme_normalize
129                 $this->assertEquals('',    port_normalize(80,  'TTP',  TRUE));
130                 $this->assertEquals('',    port_normalize(110, 'POP',  TRUE));
131                 $this->assertEquals(80,    port_normalize(80,  'HTTP', FALSE));
132         }
133
134         function testFunc_path_normalize()
135         {
136                 // 1st argument: Null
137                 foreach($this->setup_string_null() as $key => $value){
138                         $this->assertEquals('/', path_normalize($value), $key);
139                 }
140
141                 // 1st argument: CASE sensitive
142                 $this->assertEquals('/ExAMPle', path_normalize('ExAMPle'));
143                 $this->assertEquals('/#hoge',   path_normalize('#hoge'));
144                 $this->assertEquals('/a/b/c/d', path_normalize('/a/b/./c////./d'));
145                 $this->assertEquals('/b/',      path_normalize('/a/../../../b/'));
146
147                 // 2nd argument
148                 $this->assertEquals('\\b\\c\\d\\', path_normalize('\\a\\..\\b\\.\\c\\\\.\\d\\', '\\'));
149                 $this->assertEquals('str1str3str', path_normalize('str1strstr2str..str3str', 'str'));
150                 $this->assertEquals('/do/../nothing/', path_normalize('/do/../nothing/', TRUE));
151                 $this->assertEquals('/do/../nothing/', path_normalize('/do/../nothing/', array('a')));
152                 $this->assertEquals('',            path_normalize(array('a'), array('b')));
153         }
154
155         function testFunc_query_normalize()
156         {
157                 // 1st argument: Null
158                 foreach($this->setup_string_null() as $key => $value){
159                         $this->assertEquals('', query_normalize($value), $key);
160                 }
161
162                 $this->assertEquals('a=0dd&b&c&d&f=d', query_normalize('&&&&f=d&b&d&c&a=0dd'));
163                 $this->assertEquals('eg=foobar',       query_normalize('nothing==&eg=dummy&eg=padding&eg=foobar'));
164         }
165
166         function testFunc_file_normalize()
167         {
168                 // 1st argument: Null
169                 foreach($this->setup_string_null() as $key => $value){
170                         $this->assertEquals('', file_normalize($value), $key);
171                 }
172
173                 // 1st argument: Cut DirectoryIndexes (Destructive)
174                 foreach(array(
175                         'default.htm',
176                         'default.html',
177                         'default.asp',
178                         'default.aspx',
179 \r                       'index',
180                         'index.htm',
181                         'index.html',
182                         'index.shtml',
183                         'index.jsp',
184                         'index.php',
185                         'index.php',
186                         'index.php3',
187                         'index.php4',
188                         'index.pl',
189                         'index.py',
190                         'index.rb',
191                         'index.cgi',
192
193                         // Apache 2.0.59 default 'index.html' variants
194                         'index.html.ca',
195                         'index.html.cz.iso8859-2',
196                         'index.html.de',
197                         'index.html.dk',
198                         'index.html.ee',
199                         'index.html.el',
200                         'index.html.en',
201                         'index.html.es',
202                         'index.html.et',
203                         'index.html.fr',
204                         'index.html.he.iso8859-8',
205                         'index.html.hr.iso8859-2',
206                         'index.html.it',
207                         'index.html.ja.iso2022-jp',
208                         'index.html.ko.euc-kr',
209                         'index.html.lb.utf8',
210                         'index.html.nl',
211                         'index.html.nn',
212                         'index.html.no',
213                         'index.html.po.iso8859-2',
214                         'index.html.pt',
215                         'index.html.pt-br',
216                         'index.html.ru.cp866',
217                         'index.html.ru.cp-1251',
218                         'index.html.ru.iso-ru',
219                         'index.html.ru.koi8-r',
220                         'index.html.ru.utf8',
221                         'index.html.sv',
222                         'index.html.var',       // default
223                         'index.html.zh-cn.gb2312',
224                         'index.html.zh-tw.big5',
225
226                         'index.html.po.iso8859-2',
227                         'index.html.zh-tw.big5',
228
229                         'index.ja.en.de.html',
230                 
231                         // .gz
232                         'index.html.ca.gz',
233                         'index.html.en.ja.ca.z',
234                 ) as $arg){
235                         $this->assertEquals('', file_normalize($arg));
236                 }
237
238                 //$this->assertEquals('foo/', file_normalize('foo/index.html'));
239
240                 //$this->assertEquals('ExAMPle', file_normalize('ExAMPle'));
241                 //$this->assertEquals('exe.exe', file_normalize('exe.exe'));
242                 //$this->assertEquals('sample.html', file_normalize('sample.html.en'));
243                 //$this->assertEquals('sample.html', file_normalize('sample.html.pt-br'));
244                 //$this->assertEquals('sample.html', file_normalize('sample.html.po.iso8859-2'));
245                 //$this->assertEquals('sample.html', file_normalize('sample.html.zh-tw.big5'));
246         }
247
248         function testFunc_uri_pickup()
249         {
250                 // 1st argument: Null
251                 foreach($this->setup_string_null() as $key => $value){
252                         $this->assertEquals(0, count(uri_pickup($value)), $key);
253                 }
254
255                 // 1st argument: Some
256                 $test_string = <<<EOF
257                         TTP://wwW.Example.Org#TTP_and_www
258                         https://nasty.example.org:443/foo/xxx#port443/slash
259                         sftp://foobar.example.org:80/dfsdfs#ftp_bat_port80
260                         ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm
261                         http://192.168.1.4:443#IPv4
262 EOF;
263                 $results = uri_pickup_normalize(uri_pickup($test_string));
264                 $this->assertEquals(5, count($results));
265
266                 // ttp://wwW.Example.Org:80#TTP_and_www
267                 $this->assertEquals('http',           $results[0]['scheme']);
268                 $this->assertEquals('',               $results[0]['userinfo']);
269                 $this->assertEquals('example.org',    $results[0]['host']);
270                 $this->assertEquals('',               $results[0]['port']);
271                 $this->assertEquals('/',              $results[0]['path']);
272                 $this->assertEquals('',               $results[0]['file']);
273                 $this->assertEquals('',               $results[0]['query']);
274                 $this->assertEquals('ttp_and_www',    $results[0]['fragment']);
275
276                 // https://nasty.example.org:443/foo/xxx#port443/slash
277                 $this->assertEquals('https',          $results[1]['scheme']);
278                 $this->assertEquals('',               $results[1]['userinfo']);
279                 $this->assertEquals('nasty.example.org', $results[1]['host']);
280                 $this->assertEquals('',               $results[1]['port']);
281                 $this->assertEquals('/foo/',          $results[1]['path']);
282                 $this->assertEquals('xxx',            $results[1]['file']);
283                 $this->assertEquals('',               $results[1]['query']);
284                 $this->assertEquals('port443',        $results[1]['fragment']);
285
286                 // sftp://foobar.example.org:80/dfsdfs#sftp_bat_port80
287                 $this->assertEquals('sftp',           $results[2]['scheme']);
288                 $this->assertEquals('',               $results[2]['userinfo']);
289                 $this->assertEquals('foobar.example.org', $results[2]['host']);
290                 $this->assertEquals('80',             $results[2]['port']);
291                 $this->assertEquals('/',              $results[2]['path']);
292                 $this->assertEquals('dfsdfs',         $results[2]['file']);
293                 $this->assertEquals('',               $results[2]['query']);
294                 $this->assertEquals('ftp_bat_port80', $results[2]['fragment']);
295
296                 // ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm
297                 $this->assertEquals('ftp',            $results[3]['scheme']);
298                 $this->assertEquals('cnn.example.com&story=breaking_news', $results[3]['userinfo']);
299                 $this->assertEquals('10.0.0.1',       $results[3]['host']);
300                 $this->assertEquals('',               $results[3]['port']);
301                 $this->assertEquals('/',              $results[3]['path']);
302                 $this->assertEquals('top_story.htm',  $results[3]['file']);
303                 $this->assertEquals('',               $results[3]['query']);
304                 $this->assertEquals('',               $results[3]['fragment']);
305
306
307                 // Specific tests ----
308
309                 // Divider: Back-slash
310                 $test_string = ' http:\\backslash.org\fobar.html ';
311                 $results = uri_pickup_normalize(uri_pickup($test_string));
312                 $this->assertEquals('backslash.org',  $results[0]['host']);
313
314                 // Divider: percent-encoded
315                 //$test_string = ' http%3A%2F%5Cpercent-encoded.org%5Cfobar.html ';
316                 //$results = uri_pickup_normalize(uri_pickup($test_string));
317                 //$this->assertEquals('percent-encoded.org',  $results[0]['host']);
318
319                 // Host: Underscore
320                 $test_string = ' http://under_score.org/fobar.html ';
321                 $results = uri_pickup_normalize(uri_pickup($test_string));
322                 $this->assertEquals('under_score.org',$results[0]['host']);     // Not 'under'
323
324                 // Host: IPv4
325                 $test_string = ' http://192.168.0.1/fobar.html ';
326                 $results = uri_pickup_normalize(uri_pickup($test_string));
327                 $this->assertEquals('192.168.0.1',    $results[0]['host']);
328
329                 // Host: Starts
330                 $test_string = ' http://_sss/foo.html ';
331                 $results = uri_pickup_normalize(uri_pickup($test_string));
332                 $this->assertEquals('_sss',           $results[0]['host']);
333                 $this->assertEquals('foo.html',       $results[0]['file']);
334
335                 // Host: Ends
336                 $test_string = ' http://sss_/foo.html ';
337                 $results = uri_pickup_normalize(uri_pickup($test_string));
338                 $this->assertEquals('sss_',           $results[0]['host']);
339                 $this->assertEquals('foo.html',       $results[0]['file']);
340
341
342                 // Specific tests ---- Fails
343
344                 // Divider: Colon only (Too sensitive to capture)
345                 $test_string = ' http:colon.org ';
346                 $results = uri_pickup_normalize(uri_pickup($test_string));
347                 $this->assertEquals(0, count($results));
348
349                 // Host: Too short
350                 $test_string = ' http://s/foo.html http://ss/foo.html ';
351                 $results = uri_pickup_normalize(uri_pickup($test_string));
352                 $this->assertEquals(0, count($results));
353
354                 $test_string = ' http://sss/foo.html ';
355                 $results = uri_pickup_normalize(uri_pickup($test_string));
356                 $this->assertEquals('sss',            $results[0]['host']);
357                 $this->assertEquals('foo.html',       $results[0]['file']);
358         }
359
360         function testFunc_spam_uri_pickup()
361         {
362                 // Divider: percent-encoded
363                 $test_string = ' http://victim.example.org/http%3A%2F%5Cnasty.example.org ';
364                 $results = spam_uri_pickup($test_string);
365                 $this->assertEquals('victim.example.org', $results[0]['host']);
366                 $this->assertEquals('nasty.example.org',  $results[1]['host']);
367
368                 // Domain exposure (site:)
369                 $test_string = ' http://search.example.org/?q=%20site:nasty.example.org ';
370                 $results = spam_uri_pickup($test_string);
371                 $this->assertEquals('nasty.example.org', $results[0]['host']);
372                 $this->assertEquals('search.example.org',  $results[1]['host']);
373                 
374                 // Domain exposure (%20site:)
375                 $test_string = ' http://search2.example.org/?q=%20site:nasty2.example.org ';
376                 $results = spam_uri_pickup($test_string);
377                 $this->assertEquals('nasty2.example.org', $results[0]['host']);
378                 $this->assertEquals('search2.example.org',  $results[1]['host']);
379         }
380 }
381
382 ?>