<?php
-// $Id: spam.php,v 1.134 2007/04/28 06:33:30 henoheno Exp $
+// $Id: spam.php,v 1.137 2007/04/29 14:40:12 henoheno Exp $
// Copyright (C) 2006-2007 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
//
}
}
-// Very roughly strings(1)
-function strings($binary = '')
+// Roughly strings(1) using PCRE
+// This function is useful to:
+// * Reduce the size of data, from removing unprintable binary data
+// * Detect _bare_strings_ from binary data
+// References:
+// http://www.freebsd.org/cgi/man.cgi?query=strings (Man-page of GNU strings)
+// http://www.pcre.org/pcre.txt
+function strings($binary = '', $min_len = 4, $ignore_space = FALSE)
{
- // http://www.pcre.org/pcre.txt
- return preg_replace('/[[:^graph:]]+/', "\n", $binary);
+ if ($ignore_space) {
+ $binary = preg_replace(
+ array(
+ '/(?:[^[:graph:] \t\n]|[\r])+/s',
+ '/[ \t]{2,}/',
+ '/^[ \t]/m',
+ '/[ \t]$/m',
+ ),
+ array(
+ "\n",
+ ' ',
+ '',
+ ''
+ ),
+ $binary);
+ } else {
+ $binary = preg_replace('/(?:[^[:graph:][:space:]]|[\r])+/s', "\n", $binary);
+ }
+
+ if ($min_len > 1) {
+ $min_len = min(1024, intval($min_len));
+ $binary =
+ implode("\n",
+ preg_grep('/^.{' . $min_len . ',}/S',
+ explode("\n", $binary)
+ )
+ );
+ }
+
+ return $binary;
}
// Preprocess: rawurldecode() and adding space(s) and something
// to detect/count some URIs _if possible_
// NOTE: It's maybe danger to var_dump(result). [e.g. 'javascript:']
+// [OK] http://victim.example.org/?site:nasty.example.org
+// [OK] http://victim.example.org/nasty.example.org
// [OK] http://victim.example.org/go?http%3A%2F%2Fnasty.example.org
// [OK] http://victim.example.org/http://nasty.example.org
-// TODO: link.toolbot.com, urlx.org
function spam_uri_pickup_preprocess($string = '')
{
if (! is_string($string)) return '';
$string = rawurldecode($string);
+ // Domain exposure (simple)
+ // http://victim.example.org/nasty.example.org/path#frag
+ // => http://nasty.example.org/?refer=victim.example.org and original
+ $string = preg_replace(
+ '#http://' .
+ '(' .
+ 'ime\.nu' . '|' . // 2ch.net
+ 'ime\.st' . '|' . // 2ch.net
+ 'link\.toolbot\.com' . '|' .
+ 'urlx\.org' .
+ ')' .
+ '/([a-z0-9.%_-]+\.[a-z0-9.%_-]+)#i', // nasty.example.org
+ 'http://$2/?refer=$1 $0', // Insert
+ $string
+ );
+
// Domain exposure (See _preg_replace_callback_domain_exposure())
$string = preg_replace_callback(
array(