<?php
// PukiWiki - Yet another WikiWikiWeb clone.
-// $Id: pukiwiki.php,v 1.21 2007/08/26 15:17:28 henoheno Exp $
+// $Id: pukiwiki.php,v 1.22 2011/01/24 15:21:28 henoheno Exp $
//
// PukiWiki 1.4.*
// Copyright (C) 2002-2007 by PukiWiki Developers Team
if ($_spam) {
require(LIB_DIR . 'spam.php');
- require(LIB_DIR . 'spam_pickup.php');
if (isset($spam['method'][$_plugin])) {
$_method = & $spam['method'][$_plugin];
<?php
-// $Id: spam.php,v 1.33 2008/12/28 08:33:05 henoheno Exp $
-// Copyright (C) 2006-2007 PukiWiki Developers Team
+// $Id: spam.php,v 1.34 2011/01/24 15:19:36 henoheno Exp $
+// Copyright (C) 2006-2009, 2011 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
//
// Functions for Concept-work of spam-uri metrics
//
// (PHP 4 >= 4.3.0): preg_match_all(PREG_OFFSET_CAPTURE): $method['uri_XXX'] related feature
-if (! defined('SPAM_INI_FILE')) define('SPAM_INI_FILE', 'spam.ini.php');
-if (! defined('DOMAIN_INI_FILE')) define('DOMAIN_INI_FILE', 'domain.ini.php');
-
-// ---------------------
-// Compat etc
-
-// (PHP 4 >= 4.2.0): var_export(): mail-reporting and dump related
-if (! function_exists('var_export')) {
- function var_export() {
- return 'var_export() is not found on this server' . "\n";
- }
-}
-
-// (PHP 4 >= 4.2.0): preg_grep() enables invert option
-function preg_grep_invert($pattern = '//', $input = array())
-{
- static $invert;
- if (! isset($invert)) $invert = defined('PREG_GREP_INVERT');
-
- if ($invert) {
- return preg_grep($pattern, $input, PREG_GREP_INVERT);
- } else {
- $result = preg_grep($pattern, $input);
- if ($result) {
- return array_diff($input, preg_grep($pattern, $input));
- } else {
- return $input;
- }
- }
-}
-
-
-// ---------------------
-// Utilities
-
-// Very roughly, shrink the lines of var_export()
-// NOTE: If the same data exists, it must be corrupted.
-function var_export_shrink($expression, $return = FALSE, $ignore_numeric_keys = FALSE)
-{
- $result = var_export($expression, TRUE);
-
- $result = preg_replace(
- // Remove a newline and spaces
- '# => \n *array \(#', ' => array (',
- $result
- );
-
- if ($ignore_numeric_keys) {
- $result =preg_replace(
- // Remove numeric keys
- '#^( *)[0-9]+ => #m', '$1',
- $result
- );
- }
-
- if ($return) {
- return $result;
- } else {
- echo $result;
- return NULL;
- }
-}
-
-// Data structure: Create an array they _refer_only_one_ value
-function one_value_array($num = 0, $value = NULL)
-{
- $num = max(0, intval($num));
- $array = array();
-
- for ($i = 0; $i < $num; $i++) {
- $array[] = & $value;
- }
-
- return $array;
-}
-
-// Reverse $string with specified delimiter
-function delimiter_reverse($string = 'foo.bar.example.com', $from_delim = '.', $to_delim = NULL)
-{
- $to_null = ($to_delim === NULL);
-
- if (! is_string($from_delim) || (! $to_null && ! is_string($to_delim))) {
- return FALSE;
- }
- if (is_array($string)) {
- // Map, Recurse
- $count = count($string);
- $from = one_value_array($count, $from_delim);
- if ($to_null) {
- // Note: array_map() vanishes all keys
- return array_map('delimiter_reverse', $string, $from);
- } else {
- $to = one_value_array($count, $to_delim);
- // Note: array_map() vanishes all keys
- return array_map('delimiter_reverse', $string, $from, $to);
- }
- }
- if (! is_string($string)) {
- return FALSE;
- }
-
- // Returns com.example.bar.foo
- if ($to_null) $to_delim = & $from_delim;
- return implode($to_delim, array_reverse(explode($from_delim, $string)));
-}
-
-// ksort() by domain
-function ksort_by_domain(& $array)
-{
- $sort = array();
- foreach(array_keys($array) as $key) {
- $reversed = delimiter_reverse($key);
- if ($reversed !== FALSE) {
- $sort[$reversed] = $key;
- }
- }
- ksort($sort, SORT_STRING);
-
- $result = array();
- foreach($sort as $key) {
- $result[$key] = & $array[$key];
- }
-
- $array = $result;
-}
-
-// Roughly strings(1) using PCRE
-// This function is useful to:
-// * Reduce the size of data, from removing unprintable binary data
-// * Detect _bare_strings_ from binary data
-// References:
-// http://www.freebsd.org/cgi/man.cgi?query=strings (Man-page of GNU strings)
-// http://www.pcre.org/pcre.txt
-// Note: mb_ereg_replace() is one of mbstring extension's functions
-// and need to init its encoding.
-function strings($binary = '', $min_len = 4, $ignore_space = FALSE, $multibyte = FALSE)
-{
- // String only
- $binary = (is_array($binary) || $binary === TRUE) ? '' : strval($binary);
-
- $regex = $ignore_space ?
- '[^[:graph:] \t\n]+' : // Remove "\0" etc, and readable spaces
- '[^[:graph:][:space:]]+'; // Preserve readable spaces if possible
-
- $binary = $multibyte ?
- mb_ereg_replace($regex, "\n", $binary) :
- preg_replace('/' . $regex . '/s', "\n", $binary);
-
- if ($ignore_space) {
- $binary = preg_replace(
- array(
- '/[ \t]{2,}/',
- '/^[ \t]/m',
- '/[ \t]$/m',
- ),
- array(
- ' ',
- '',
- ''
- ),
- $binary);
- }
-
- if ($min_len > 1) {
- // The last character seems "\n" or not
- $br = (! empty($binary) && $binary[strlen($binary) - 1] == "\n") ? "\n" : '';
-
- $min_len = min(1024, intval($min_len));
- $regex = '/^.{' . $min_len . ',}/S';
- $binary = implode("\n", preg_grep($regex, explode("\n", $binary))) . $br;
- }
-
- return $binary;
-}
-
-
-// ---------------------
-// Utilities: Arrays
-
-// Count leaves (A leaf = value that is not an array, or an empty array)
-function array_count_leaves($array = array(), $count_empty = FALSE)
-{
- if (! is_array($array) || (empty($array) && $count_empty)) return 1;
-
- // Recurse
- $count = 0;
- foreach ($array as $part) {
- $count += array_count_leaves($part, $count_empty);
- }
- return $count;
-}
-
-// Merge two leaves
-// Similar to PHP array_merge_leaves(), except strictly preserving keys as string
-function array_merge_leaves($array1, $array2, $sort_keys = TRUE)
-{
- // Array(s) only
- $is_array1 = is_array($array1);
- $is_array2 = is_array($array2);
- if ($is_array1) {
- if ($is_array2) {
- ; // Pass
- } else {
- return $array1;
- }
- } else if ($is_array2) {
- return $array2;
- } else {
- return $array2; // Not array ($array1 is overwritten)
- }
-
- $keys_all = array_merge(array_keys($array1), array_keys($array2));
- if ($sort_keys) sort($keys_all, SORT_STRING);
-
- $result = array();
- foreach($keys_all as $key) {
- $isset1 = isset($array1[$key]);
- $isset2 = isset($array2[$key]);
- if ($isset1 && $isset2) {
- // Recurse
- $result[$key] = array_merge_leaves($array1[$key], $array2[$key], $sort_keys);
- } else if ($isset1) {
- $result[$key] = & $array1[$key];
- } else {
- $result[$key] = & $array2[$key];
- }
- }
- return $result;
-}
-
-// An array-leaves to a flat array
-function array_flat_leaves($array, $unique = TRUE)
-{
- if (! is_array($array)) return $array;
-
- $tmp = array();
- foreach(array_keys($array) as $key) {
- if (is_array($array[$key])) {
- // Recurse
- foreach(array_flat_leaves($array[$key]) as $_value) {
- $tmp[] = $_value;
- }
- } else {
- $tmp[] = & $array[$key];
- }
- }
-
- return $unique ? array_values(array_unique($tmp)) : $tmp;
-}
-
-// $array['something'] => $array['wanted']
-function array_rename_keys(& $array, $keys = array('from' => 'to'), $force = FALSE, $default = '')
-{
- if (! is_array($array) || ! is_array($keys)) return FALSE;
- // Nondestructive test
- if (! $force) {
- foreach(array_keys($keys) as $from) {
- if (! isset($array[$from])) {
- return FALSE;
- }
- }
- }
-
- foreach($keys as $from => $to) {
- if ($from === $to) continue;
- if (! $force || isset($array[$from])) {
- $array[$to] = & $array[$from];
- unset($array[$from]);
- } else {
- $array[$to] = $default;
- }
- }
-
- return TRUE;
-}
-
-// Remove redundant values from array()
-function array_unique_recursive($array = array())
-{
- if (! is_array($array)) return $array;
+if (! defined('LIB_DIR')) define('LIB_DIR', './');
+require(LIB_DIR . 'spam_pickup.php');
+require(LIB_DIR . 'spam_util.php');
- $tmp = array();
- foreach($array as $key => $value){
- if (is_array($value)) {
- $array[$key] = array_unique_recursive($value);
- } else {
- if (isset($tmp[$value])) {
- unset($array[$key]);
- } else {
- $tmp[$value] = TRUE;
- }
- }
- }
-
- return $array;
-}
+if (! defined('SPAM_INI_FILE')) define('SPAM_INI_FILE', 'spam.ini.php');
// ---------------------
-// Part One : Checker
+// Regex
// Rough implementation of globbing
//
{
if (! is_string($string)) return '';
- if (mb_strpos($string, '.') === FALSE) {
- // localhost
+ if (mb_strpos($string, '.') === FALSE || is_ip($string)) {
+ // "localhost", IPv4, etc
return generate_glob_regex($string, $divider);
}
- if (is_ip($string)) {
- // IPv4
- return generate_glob_regex($string, $divider);
+ // FQDN or something
+ $part = explode('.', $string, 2);
+ if ($part[0] == '') {
+ // ".example.org"
+ $part[0] = '(?:.*\.)?';
+ } else if ($part[0] == '*') {
+ // "*.example.org"
+ $part[0] = '.*\.';
} else {
- // FQDN or something
- $part = explode('.', $string, 2);
- if ($part[0] == '') {
- // .example.org
- $part[0] = '(?:.*\.)?';
- } else if ($part[0] == '*') {
- // *.example.org
- $part[0] = '.*\.';
- } else {
- // example.org, etc
- return generate_glob_regex($string, $divider);
- }
- $part[1] = generate_glob_regex($part[1], $divider);
- return implode('', $part);
+ // example.org, etc
+ return generate_glob_regex($string, $divider);
}
-}
-// Rough hostname checker
-// TODO: Strict digit, 0x, CIDR, '999.999.999.999', ':', '::G'
-function is_ip($string = '')
-{
- if (! is_string($string)) return FALSE;
+ $part[1] = generate_glob_regex($part[1], $divider);
- if (strpos($string, ':') !== FALSE) {
- return 6; // Seems IPv6
- }
+ return implode('', $part);
+}
- if (preg_match('/^' .
- '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' .
- '(?:[0-9]{1,3}\.){1,3}' . '$/',
- $string)) {
- return 4; // Seems IPv4(dot-decimal)
- }
- return FALSE; // Seems not IP
-}
+// ---------------------
+// Load
// Load SPAM_INI_FILE and return parsed one
function get_blocklist($list = '')
if (is_string($key)) {
$array[$key] = & $value; // Treat $value as a regex for FQDN(host)s
} else {
- $array[$value] = '#^' . generate_host_regex($value, '#') . '$#i';
+ $regex = generate_host_regex($value, '#');
+ if (! empty($regex)) {
+ $array[$value] = '#^' . $regex . '$#i';
+ }
}
}
// ----------------------------------------
// Area measure
- // Area: There's HTML anchor tag
- if ((! $asap || ! $is_spam) && isset($method['area_anchor'])) {
- $key = 'area_anchor';
- $_asap = isset($method['asap']) ? array('asap' => TRUE) : array();
- $result = area_pickup($target, array($key => TRUE) + $_asap);
- if ($result) {
- $sum[$key] = $result[$key];
- if (isset($method[$key]) && $sum[$key] > $method[$key]) {
- $is_spam[$key] = TRUE;
- }
+ if (! $asap || ! $is_spam) {
+
+ // Method pickup
+ $_method = array();
+ foreach(array(
+ 'area_anchor', // There's HTML anchor tag
+ 'area_bbcode', // There's 'BBCode' linking tag
+ ) as $key) {
+ if (isset($method[$key])) $_method[$key] = TRUE;
}
- }
- // Area: There's 'BBCode' linking tag
- if ((! $asap || ! $is_spam) && isset($method['area_bbcode'])) {
- $key = 'area_bbcode';
- $_asap = isset($method['asap']) ? array('asap' => TRUE) : array();
- $result = area_pickup($target, array($key => TRUE) + $_asap);
- if ($result) {
- $sum[$key] = $result[$key];
- if (isset($method[$key]) && $sum[$key] > $method[$key]) {
- $is_spam[$key] = TRUE;
+ if ($_method) {
+ $_asap = isset($method['asap']) ? array('asap' => TRUE) : array();
+ $_result = area_pickup($target, $_method + $_asap);
+ $_asap = NULL;
+ } else {
+ $_result = FALSE;
+ }
+
+ if ($_result) {
+ foreach(array_keys($_method) as $key) {
+ if (isset($_result[$key])) {
+ $sum[$key] = $_result[$key];
+ if (isset($method[$key]) && $sum[$key] > $method[$key]) {
+ $is_spam[$key] = TRUE;
+ }
+ }
}
}
+
+ unset($_asap, $_method, $_result);
}
// Return if ...
// ----------------------------------------
// URI: Pickup
- $pickups = uri_pickup_normalize(spam_uri_pickup($target, $method));
+ $pickups = spam_uri_pickup($target, $method);
+
+
+ // Return if ...
+ if (empty($pickups)) return $progress;
+
+ // Normalize all
+ $pickups = uri_pickup_normalize($pickups);
+
+ // ----------------------------------------
+ // Pickup some part of URI
+
$hosts = array();
foreach ($pickups as $key => $pickup) {
$hosts[$key] = & $pickup['host'];
}
- // Return if ...
- if (empty($pickups)) return $progress;
-
// ----------------------------------------
// URI: Bad host <pre-filter> (Separate good/bad hosts from $hosts)
if ((! $asap || ! $is_spam) && isset($method['badhost'])) {
$list = get_blocklist('pre');
$blocked = blocklist_distiller($hosts, array_keys($list), $asap);
- foreach($list as $key=>$type){
+ foreach($list as $key => $type){
if (! $type) unset($blocked[$key]); // Ignore goodhost etc
}
unset($list);
$subs = array();
foreach(array_keys($trie[$key]) as $sub) {
if ($sub == '') {
- $subs[] = $key;
+ $subs[] = $key; // 'example.com'
} else {
- $subs[] = $sub . '.' . $key;
+ $subs[] = $sub . '. '; // 'A.foo.bar. '
}
}
$result[] = ' \'' . $key . '\' => \'' . implode(', ', $subs) . '\',';
}
-// Check responsibility-root of the FQDN
-// 'foo.bar.example.com' => 'example.com' (.com has the last whois for it)
-// 'foo.bar.example.au' => 'example.au' (.au has the last whois for it)
-// 'foo.bar.example.edu.au' => 'example.edu.au' (.edu.au has the last whois for it)
-// 'foo.bar.example.act.edu.au' => 'example.act.edu.au' (.act.edu.au has the last whois for it)
-function whois_responsibility($fqdn = 'foo.bar.example.com', $parent = FALSE, $implicit = TRUE)
-{
- static $domain;
-
- if ($fqdn === NULL) {
- $domain = NULL; // Unset
- return '';
- }
- if (! is_string($fqdn)) return '';
-
- if (is_ip($fqdn)) return $fqdn;
-
- if (! isset($domain)) {
- $domain = array();
- if (file_exists(DOMAIN_INI_FILE)) {
- include(DOMAIN_INI_FILE); // Set
- }
- }
-
- $result = array();
- $dcursor = & $domain;
- $array = array_reverse(explode('.', $fqdn));
- $i = 0;
- while(TRUE) {
- if (! isset($array[$i])) break;
- $acursor = $array[$i];
- if (is_array($dcursor) && isset($dcursor[$acursor])) {
- $result[] = & $array[$i];
- $dcursor = & $dcursor[$acursor];
- } else {
- if (! $parent && isset($acursor)) {
- $result[] = & $array[$i]; // Whois servers must know this subdomain
- }
- break;
- }
- ++$i;
- }
-
- // Implicit responsibility: Top-Level-Domains must not be yours
- // 'bar.foo.something' => 'foo.something'
- if ($implicit && count($result) == 1 && count($array) > 1) {
- $result[] = & $array[1];
- }
-
- return $result ? implode('.', array_reverse($result)) : '';
-}
-
-
// ---------------------
// Exit
break;
case 'dump':
echo('<pre>' . "\n");
- echo htmlspecialchars(var_export($data, TRUE));
+ echo htmlsc(var_export($data, TRUE));
echo('</pre>' . "\n");
break;
};
<?php
-// $Id: spam_pickup.php,v 1.5 2007/10/20 04:44:08 henoheno Exp $
-// Copyright (C) 2006-2007 PukiWiki Developers Team
+// $Id: spam_pickup.php,v 1.6 2011/01/24 15:19:36 henoheno Exp $
+// Copyright (C) 2006-2009 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
//
// Functions for Concept-work of spam-uri metrics
//
+// (PHP 4 >= 4.3.0): preg_match_all(PREG_OFFSET_CAPTURE): $method['uri_XXX'] related feature
+//
+
+if (! defined('DOMAIN_INI_FILE')) define('DOMAIN_INI_FILE', 'domain.ini.php');
// ---------------------
// URI pickup
// [OK] http://nasty.example.org:80/foo/xxx#nasty_string/bar
// [OK] ftp://nasty.example.org:80/dfsdfs
// [OK] ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm (from RFC3986)
+// Not available for: IDN(ignored)
function uri_pickup($string = '')
{
if (! is_string($string)) return array();
- // Not available for: IDN(ignored)
$array = array();
preg_match_all(
// scheme://userinfo@host:port/path/or/pathinfo/maybefile.and?query=string#fragment
// Refer RFC3986 (Regex below is not strict)
'#(\b[a-z][a-z0-9.+-]{1,8}):[/\\\]+' . // 1: Scheme
'(?:' .
- '([^\s<>"\'\[\]/\#?@]*)' . // 2: Userinfo (Username)
+ '([^\s<>"\'\[\]/\#?@]*)' . // 2: Userinfo (Username and/or password)
'@)?' .
'(' .
// 3: Host
'[a-z0-9_-][a-z0-9_.-]+[a-z0-9_-]' . // hostname(FQDN) : foo.example.org
')' .
'(?::([0-9]*))?' . // 4: Port
- '((?:/+[^\s<>"\'\[\]/\#]+)*/+)?' . // 5: Directory path or path-info
+ '((?:/+[^\s<>"\'\[\]/\#?]+)*/+)?' . // 5: Directory path
'([^\s<>"\'\[\]\#?]+)?' . // 6: File?
'(?:\?([^\s<>"\'\[\]\#]+))?' . // 7: Query string
'(?:\#([a-z0-9._~%!$&\'()*+,;=:@-]*))?' . // 8: Fragment
$string, $array, PREG_SET_ORDER | PREG_OFFSET_CAPTURE
);
- // Format the $array
+ // Reformat the $array
static $parts = array(
1 => 'scheme', 2 => 'userinfo', 3 => 'host', 4 => 'port',
5 => 'path', 6 => 'file', 7 => 'query', 8 => 'fragment'
);
- $default = array('');
+ $default = array(0 => '', 1 => -1);
foreach(array_keys($array) as $uri) {
$_uri = & $array[$uri];
array_rename_keys($_uri, $parts, TRUE, $default);
$offset = $_uri['scheme'][1]; // Scheme's offset = URI's offset
foreach(array_keys($_uri) as $part) {
- $_uri[$part] = & $_uri[$part][0]; // Remove offsets
+ $_uri[$part] = $_uri[$part][0]; // Remove offsets
}
}
$tmp[] = & $uri['scheme'];
$tmp[] = '://';
}
+
if (isset($uri['userinfo']) && $uri['userinfo'] !== '') {
$tmp[] = & $uri['userinfo'];
$tmp[] = '@';
+ } else if (isset($uri['user']) || isset($uri['pass'])) {
+ if (isset($uri['user']) && $uri['user'] !== '') {
+ $tmp[] = & $uri['user'];
+ }
+ $tmp[] = ':';
+ if (isset($uri['pass']) && $uri['pass'] !== '') {
+ $tmp[] = & $uri['pass'];
+ }
+ $tmp[] = '@';
}
+
if (isset($uri['host']) && $uri['host'] !== '') {
$tmp[] = & $uri['host'];
}
+
if (isset($uri['port']) && $uri['port'] !== '') {
$tmp[] = ':';
$tmp[] = & $uri['port'];
}
+
if (isset($uri['path']) && $uri['path'] !== '') {
$tmp[] = & $uri['path'];
}
+
if (isset($uri['file']) && $uri['file'] !== '') {
$tmp[] = & $uri['file'];
}
+
if (isset($uri['query']) && $uri['query'] !== '') {
$tmp[] = '?';
$tmp[] = & $uri['query'];
}
+
if (isset($uri['fragment']) && $uri['fragment'] !== '') {
$tmp[] = '#';
$tmp[] = & $uri['fragment'];
return implode('', $tmp);
}
+
// ---------------------
// URI normalization
// Normalize an array of URI arrays
// NOTE: Give me the uri_pickup() results
-function uri_pickup_normalize(& $pickups, $destructive = TRUE)
+function uri_pickup_normalize(& $pickups, $destructive = TRUE, $pathfile = FALSE)
{
if (! is_array($pickups)) return $pickups;
}
}
+ if ($pathfile) {
+ return uri_pickup_normalize_pathfile($pickups);
+ } else {
+ return $pickups;
+ }
+}
+
+// Normalize: 'path' + 'file' = 'path' (Similar structure using PHP's "parse_url()" function)
+// NOTE: In some case, 'file' DOES NOT mean _filename_.
+// [EXAMPLE] http://example.com/path/to/directory-accidentally-not-ended-with-slash
+function uri_pickup_normalize_pathfile(& $pickups)
+{
+ if (! is_array($pickups)) return $pickups;
+
+ foreach (array_keys($pickups) as $key) {
+ $_key = & $pickups[$key];
+ if (isset($_key['path'], $_key['file'])) {
+ $_key['path'] = $_key['path'] . $_key['file'];
+ unset($_key['file']);
+ }
+ }
+
return $pickups;
}
// www.foo.bar => foo.bar
// www.10.20 => www.10.20 (Invalid hostname)
// NOTE:
-// 'www' is mostly used as traditional hostname of WWW server.
-// 'www.foo.bar' may be identical with 'foo.bar'.
+// 'www' is basically traditional hostname for WWW server.
+// In these case, 'www.foo.bar' MAY be identical with 'foo.bar'.
function host_normalize($host = '')
{
if (! is_string($host)) return '';
$host = strtolower($host);
+
$matches = array();
if (preg_match('/^www\.(.+\.[a-z]+)$/', $host, $matches)) {
return $matches[1];
}
// Remove 'offset's for area_measure()
- foreach(array_keys($array) as $key)
+ foreach(array_keys($array) as $key) {
unset($array[$key]['area']['offset']);
+ }
return $array;
}
+// Rough hostname checker
+// TODO: Strict digit, 0x, CIDR, '999.999.999.999', ':', '::G'
+function is_ip($string = '')
+{
+ if (! is_string($string)) return FALSE;
+
+ if (strpos($string, ':') !== FALSE) {
+ return 6; // Seems IPv6
+ }
+
+ if (preg_match('/^' .
+ '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' .
+ '(?:[0-9]{1,3}\.){1,3}' . '$/',
+ $string)) {
+ return 4; // Seems IPv4(dot-decimal)
+ }
+
+ return FALSE; // Seems not IP
+}
+
+// Check responsibility-root of the FQDN
+// 'foo.bar.example.com' => 'example.com' (.com has the last whois for it)
+// 'foo.bar.example.au' => 'example.au' (.au has the last whois for it)
+// 'foo.bar.example.edu.au' => 'example.edu.au' (.edu.au has the last whois for it)
+// 'foo.bar.example.act.edu.au' => 'example.act.edu.au' (.act.edu.au has the last whois for it)
+function whois_responsibility($fqdn = 'foo.bar.example.com', $parent = FALSE, $implicit = TRUE)
+{
+ static $domain;
+
+ if ($fqdn === NULL) {
+ $domain = NULL; // Unset
+ return '';
+ }
+ if (! is_string($fqdn)) return '';
+
+ if (is_ip($fqdn)) return $fqdn;
+
+ if (! isset($domain)) {
+ $domain = array();
+ if (file_exists(DOMAIN_INI_FILE)) {
+ include(DOMAIN_INI_FILE); // Set
+ }
+ }
+
+ $result = array();
+ $dcursor = & $domain;
+ $array = array_reverse(explode('.', $fqdn));
+ $i = 0;
+ while(TRUE) {
+ if (! isset($array[$i])) break;
+ $acursor = $array[$i];
+ if (is_array($dcursor) && isset($dcursor[$acursor])) {
+ $result[] = & $array[$i];
+ $dcursor = & $dcursor[$acursor];
+ } else {
+ if (! $parent && isset($acursor)) {
+ $result[] = & $array[$i]; // Whois servers must know this subdomain
+ }
+ break;
+ }
+ ++$i;
+ }
+
+ // Implicit responsibility: Top-Level-Domains must not be yours
+ // 'bar.foo.something' => 'foo.something'
+ if ($implicit && count($result) == 1 && count($array) > 1) {
+ $result[] = & $array[1];
+ }
+
+ return $result ? implode('.', array_reverse($result)) : '';
+}
+
?>
--- /dev/null
+<?php
+// $Id: spam_util.php,v 1.1 2011/01/24 15:19:36 henoheno Exp $
+// Copyright (C) 2006-2009, 2011 PukiWiki Developers Team
+// License: GPL v2 or (at your option) any later version
+//
+// Functions for Concept-work of spam-uri metrics
+
+
+// ---------------------
+// Compat etc
+
+// (PHP 4 >= 4.2.0): var_export(): mail-reporting and dump related
+if (! function_exists('var_export')) {
+ function var_export() {
+ return 'var_export() is not found on this server' . "\n";
+ }
+}
+
+// (PHP 4 >= 4.2.0): preg_grep() enables invert option
+function preg_grep_invert($pattern = '//', $input = array())
+{
+ static $invert;
+ if (! isset($invert)) $invert = defined('PREG_GREP_INVERT');
+
+ if ($invert) {
+ return preg_grep($pattern, $input, PREG_GREP_INVERT);
+ } else {
+ $result = preg_grep($pattern, $input);
+ if ($result) {
+ return array_diff($input, preg_grep($pattern, $input));
+ } else {
+ return $input;
+ }
+ }
+}
+
+
+// ---------------------
+// Utilities
+
+
+if (! function_exists('htmlsc')) {
+ // Interface with PukiWiki
+ if (! defined('CONTENT_CHARSET')) define('CONTENT_CHARSET', 'ISO-8859-1');
+
+ // Sugar with default settings
+ function htmlsc($string = '', $flags = ENT_QUOTES, $charset = CONTENT_CHARSET)
+ {
+ return htmlspecialchars($string, $flags, $charset); // htmlsc()
+ }
+}
+
+// Very roughly, shrink the lines of var_export()
+// NOTE: If the same data exists, it must be corrupted.
+function var_export_shrink($expression, $return = FALSE, $ignore_numeric_keys = FALSE)
+{
+ $result = var_export($expression, TRUE);
+
+ $result = preg_replace(
+ // Remove a newline and spaces
+ '# => \n *array \(#', ' => array (',
+ $result
+ );
+
+ if ($ignore_numeric_keys) {
+ $result =preg_replace(
+ // Remove numeric keys
+ '#^( *)[0-9]+ => #m', '$1',
+ $result
+ );
+ }
+
+ if ($return) {
+ return $result;
+ } else {
+ echo $result;
+ return NULL;
+ }
+}
+
+// Data structure: Create an array they _refer_only_one_ value
+function one_value_array($num = 0, $value = NULL)
+{
+ $num = max(0, intval($num));
+ $array = array();
+
+ for ($i = 0; $i < $num; $i++) {
+ $array[] = & $value;
+ }
+
+ return $array;
+}
+
+// Reverse $string with specified delimiter
+function delimiter_reverse($string = 'foo.bar.example.com', $from_delim = '.', $to_delim = NULL)
+{
+ $to_null = ($to_delim === NULL);
+
+ if (! is_string($from_delim) || (! $to_null && ! is_string($to_delim))) {
+ return FALSE;
+ }
+ if (is_array($string)) {
+ // Map, Recurse
+ $count = count($string);
+ $from = one_value_array($count, $from_delim);
+ if ($to_null) {
+ // Note: array_map() vanishes all keys
+ return array_map('delimiter_reverse', $string, $from);
+ } else {
+ $to = one_value_array($count, $to_delim);
+ // Note: array_map() vanishes all keys
+ return array_map('delimiter_reverse', $string, $from, $to);
+ }
+ }
+ if (! is_string($string)) {
+ return FALSE;
+ }
+
+ // Returns com.example.bar.foo
+ if ($to_null) $to_delim = & $from_delim;
+ return implode($to_delim, array_reverse(explode($from_delim, $string)));
+}
+
+// ksort() by domain
+function ksort_by_domain(& $array)
+{
+ $sort = array();
+ foreach(array_keys($array) as $key) {
+ $reversed = delimiter_reverse($key);
+ if ($reversed !== FALSE) {
+ $sort[$reversed] = $key;
+ }
+ }
+ ksort($sort, SORT_STRING);
+
+ $result = array();
+ foreach($sort as $key) {
+ $result[$key] = & $array[$key];
+ }
+
+ $array = $result;
+}
+
+// Roughly strings(1) using PCRE
+// This function is useful to:
+// * Reduce the size of data, from removing unprintable binary data
+// * Detect _bare_strings_ from binary data
+// References:
+// http://www.freebsd.org/cgi/man.cgi?query=strings (Man-page of GNU strings)
+// http://www.pcre.org/pcre.txt
+// Note: mb_ereg_replace() is one of mbstring extension's functions
+// and need to init its encoding.
+function strings($binary = '', $min_len = 4, $ignore_space = FALSE, $multibyte = FALSE)
+{
+ // String only
+ $binary = (is_array($binary) || $binary === TRUE) ? '' : strval($binary);
+
+ $regex = $ignore_space ?
+ '[^[:graph:] \t\n]+' : // Remove "\0" etc, and readable spaces
+ '[^[:graph:][:space:]]+'; // Preserve readable spaces if possible
+
+ $binary = $multibyte ?
+ mb_ereg_replace($regex, "\n", $binary) :
+ preg_replace('/' . $regex . '/s', "\n", $binary);
+
+ if ($ignore_space) {
+ $binary = preg_replace(
+ array(
+ '/[ \t]{2,}/',
+ '/^[ \t]/m',
+ '/[ \t]$/m',
+ ),
+ array(
+ ' ',
+ '',
+ ''
+ ),
+ $binary);
+ }
+
+ if ($min_len > 1) {
+ // The last character seems "\n" or not
+ $br = (! empty($binary) && $binary[strlen($binary) - 1] == "\n") ? "\n" : '';
+
+ $min_len = min(1024, intval($min_len));
+ $regex = '/^.{' . $min_len . ',}/S';
+ $binary = implode("\n", preg_grep($regex, explode("\n", $binary))) . $br;
+ }
+
+ return $binary;
+}
+
+
+// ---------------------
+// Utilities: Arrays
+
+// Count leaves (A leaf = value that is not an array, or an empty array)
+function array_count_leaves($array = array(), $count_empty = FALSE)
+{
+ if (! is_array($array) || (empty($array) && $count_empty)) return 1;
+
+ // Recurse
+ $count = 0;
+ foreach ($array as $part) {
+ $count += array_count_leaves($part, $count_empty);
+ }
+ return $count;
+}
+
+// Merge two leaves
+// Similar to PHP array_merge_leaves(), except strictly preserving keys as string
+function array_merge_leaves($array1, $array2, $sort_keys = TRUE)
+{
+ // Array(s) only
+ $is_array1 = is_array($array1);
+ $is_array2 = is_array($array2);
+ if ($is_array1) {
+ if ($is_array2) {
+ ; // Pass
+ } else {
+ return $array1;
+ }
+ } else if ($is_array2) {
+ return $array2;
+ } else {
+ return $array2; // Not array ($array1 is overwritten)
+ }
+
+ $keys_all = array_merge(array_keys($array1), array_keys($array2));
+ if ($sort_keys) sort($keys_all, SORT_STRING);
+
+ $result = array();
+ foreach($keys_all as $key) {
+ $isset1 = isset($array1[$key]);
+ $isset2 = isset($array2[$key]);
+ if ($isset1 && $isset2) {
+ // Recurse
+ $result[$key] = array_merge_leaves($array1[$key], $array2[$key], $sort_keys);
+ } else if ($isset1) {
+ $result[$key] = & $array1[$key];
+ } else {
+ $result[$key] = & $array2[$key];
+ }
+ }
+ return $result;
+}
+
+// An array-leaves to a flat array
+function array_flat_leaves($array, $unique = TRUE)
+{
+ if (! is_array($array)) return $array;
+
+ $tmp = array();
+ foreach(array_keys($array) as $key) {
+ if (is_array($array[$key])) {
+ // Recurse
+ foreach(array_flat_leaves($array[$key]) as $_value) {
+ $tmp[] = $_value;
+ }
+ } else {
+ $tmp[] = & $array[$key];
+ }
+ }
+
+ return $unique ? array_values(array_unique($tmp)) : $tmp;
+}
+
+// $array['something'] => $array['wanted']
+function array_rename_keys(& $array, $keys = array('from' => 'to'), $force = FALSE, $default = '')
+{
+ if (! is_array($array) || ! is_array($keys)) return FALSE;
+
+ // Nondestructive test
+ if (! $force) {
+ foreach(array_keys($keys) as $from) {
+ if (! isset($array[$from])) {
+ return FALSE;
+ }
+ }
+ }
+
+ foreach($keys as $from => $to) {
+ if ($from === $to) continue;
+ if (! $force || isset($array[$from])) {
+ $array[$to] = & $array[$from];
+ unset($array[$from]);
+ } else {
+ $array[$to] = $default;
+ }
+ }
+
+ return TRUE;
+}
+
+// Remove redundant values from array()
+function array_unique_recursive($array = array())
+{
+ if (! is_array($array)) return $array;
+
+ $tmp = array();
+ foreach($array as $key => $value){
+ if (is_array($value)) {
+ $array[$key] = array_unique_recursive($value);
+ } else {
+ if (isset($tmp[$value])) {
+ unset($array[$key]);
+ } else {
+ $tmp[$value] = TRUE;
+ }
+ }
+ }
+
+ return $array;
+}
+
+?>
<?php
-// $Id: spam.ini.php,v 1.93 2010/09/04 13:36:25 henoheno Exp $
+// $Id: spam.ini.php,v 1.94 2011/01/24 15:19:36 henoheno Exp $
// Spam-related setting
// NOTE FOR ADMINISTRATORS:
$blocklist['A-1'] = array(
- // A-1: General redirection services -- by HTML meta, HTML frame, JavaScript,
+ // A-1: General redirection or masking services -- by HTML meta, HTML frame, JavaScript,
// web-based proxy, DNS subdomains, etc
// http://en.wikipedia.org/wiki/URL_redirection
//
'gzurl.com',
'url.grillsportverein.de',
'Harudake.net' => array('*.hyu.jp'),
+ 'hatena.ne.jp related' => array(
+ 'htn.to', // 2010-09 59.106.108.106 (hatena.ne.jp is 59.106.108.106)
+ ),
'Hattinger Linux User Group' => array('short.hatlug.de'),
'Hexten.net' => array('lyxus.net'),
'here.is',
'trimurl.com',
//'ttu.cc', // Seems closed
'turl.jp',
- 'Twitter' => array(
+ 'Twitter.com' => array(
't.co', // by (cofounders at cointernet.co)
'twt.tl',
),
'useurl.us', // by Edward Beauchamp (mail at ebvk.com)
'utun.jp',
'uxxy.com',
+ 'uzo.in', // 2010-09 redirects, and subdomain
'*.v27.net',
'V3.com by FortuneCity.com' => array( // http://www.v3.com/sub-domain-list.shtml
'*.all.at',
),
'.onlinecasinoinformation.com', // 2010/08 66.96.147.105
'.onlinecasinoresources.com', // 2010/08 74.220.215.62
+
+ 'moshenhm at gmail.com' => array( // by Nahum, Moshe (moshenhm at gmail.com)
+
+ // 2010/08 65.254.248.143
+ '.odinhosting.com', // seems no link today
+ '.nycdivers.com', // link to mainalpha.com
+ '.robertlhines.com', // link to mainalpha.com, etc
+ '.shadowsonmyshift.com', // link to mainalpha.com, etc
+
+ // 2010/08 69.89.31.187
+ '.bcsliding.com', //
+ '.msthirteen.com', // link to mainalpha.com
+ // 2010/08 74.53.239.27
+ '.thetravelerscafe.com', // cheap tickets
+
+ // 2010/08 74.81.92.55
+ '.sonicparthenon.com', // link to mainalpha.com
+ '.staroftheevening.com', // link to mainalpha.com
+ '.sanjosecosmeticdental.com',
+
+ // 2010/08 173.45.103.74
+ '.digitalexperts.com',
+
+ // 2010/08 173.236.48.82
+ '.sunshinetesting.com', // link to mainalpha.com
+ '.sports-and-concert-tickets.com', // seems no link today
+
+ // 2010/08 174.120.82.124
+ '.blueysretreat.com', // seems no link today
+ '.lamborghinidenveronline.com', // seems no link today
+ '.buckandbb.net', // link to kqzyfj.com
+
+ // 2010/08 174.132.149.98
+ '.2008-national-n-scale-convention.com', // link to mainalpha.com
+ '.creativejuicecompetition.com', // seems no link today
+
+ // 2010/08 216.119.132.2
+ '.kennybrown.net',
+
+ // 2010/08 no address today
+ '.bestblackdatingonline.com',
+ ),
+
+ 'info at dvishnu.com' => array( // by Vishnu Prasath (info at dvishnu.com)
+
+ // 2010/08 69.89.31.187
+ '.dinuzzollc.com', // link to mainalpha.com etc
+ '.laruesbackdoor.com', // link to mainalpha.com etc
+ '.okrenters.com', // link to mainalpha.co
+ '.pandaitaid.com', // link to mainalpha.com, etc
+ '.vicariouscollection.com', // by Vishnu Prasath (info at dvishnu.com)
+ '.middlefingerproductions.net', // link to mainalpha.com
+
+ // 2010/08 74.81.92.55
+ '.ecoxfinancial.com', // link to mainalpha.com etc
+ '.fightingspirit-comics.com', // link to mainalpha.com etc
+ '.learntoplaythedobro.com', // link to mainalpha.com etc
+ '.montcalm4hfair.com', // link to mainalpha.com etc
+ '.oaads.com', // link to mainalpha.com etc
+ '.pabloblum.com', // link to mainalpha.com
+ '.renaissancequartet.com', // link to mainalpha.com
+ '.sbi-limited.com', // link to mainalpha.com
+ '.showeroffire.com', // link to mainalpha.com
+ '.soccerfestcolumbus.com', // link to mainalpha.com
+
+ // 2010/08 173.236.48.82
+ '.anniedguesthouse.com', // link to mainalpha.com
+ '.finnfest2009.com', // link to mainalpha.com
+ '.hietalasoldworldmeats.com', // link to mainalpha.com etc
+ '.splendoreimport.com', // link to mainalpha.com
+
+ // 2010/08 174.120.82.124
+ '.jenurbanandthebox.com', // link to mainalpha.com
+
+ // 2010/08 174.132.149.98
+ '.segwaybykar.com', // link to mainalpha.com
+ ),
+
'mainalpha.com related' => array(
// 2010/08 65.254.248.143
'.archivecdbooksus.com', // by (offpista at gmail.com), "sports betting"
'.highrollersonlinecasinos.com', // casios
'.onlinecasinocenter.com', // casios
- '.odinhosting.com', // by Nahum, Moshe (moshenhm at gmail.com), seems no link today
- '.nycdivers.com', // by Nahum, Moshe (moshenhm at gmail.com), link to mainalpha.com
- '.robertlhines.com', // by Nahum, Moshe (moshenhm at gmail.com), link to mainalpha.com, etc
- '.shadowsonmyshift.com', // by Nahum, Moshe (moshenhm at gmail.com), link to mainalpha.com, etc
- // 2010/08 69.89.31.187
- '.bcsliding.com', // by Nahum, Moshe (moshenhm at gmail.com)
- '.dinuzzollc.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com etc
- '.laruesbackdoor.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com etc
- '.msthirteen.com', // by Nahum, Moshe (moshenhm at gmail.com), link to mainalpha.com
- '.okrenters.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.co
- '.pandaitaid.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com, etc
- '.vicariouscollection.com', // by Vishnu Prasath (info at dvishnu.com)
- '.middlefingerproductions.net', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com
-
- // 2010/08 74.53.239.27
- '.thetravelerscafe.com', // by Nahum, Moshe (moshenhm at gmail.com), cheap tickets
// 2010/08 74.81.92.55
'.2ndrose.com', // by Victor Zrovanov (victor3239 at gmail.com), link to mainalpha.com
- '.ecoxfinancial.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com etc
'.edgewatertowers.com', // by Victor Frankl (victor3239 at gmail.com), link to mainalpha.com
- '.fightingspirit-comics.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com etc
- '.learntoplaythedobro.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com etc
- '.montcalm4hfair.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com etc
- '.oaads.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com etc
- '.pabloblum.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com
- '.renaissancequartet.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com
- '.sbi-limited.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com
- '.showeroffire.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com
- '.soccerfestcolumbus.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com
- '.sonicparthenon.com', // by Nahum, Moshe (moshenhm at gmail.com), link to mainalpha.com
- '.staroftheevening.com', // by Nahum, Moshe (moshenhm at gmail.com), link to mainalpha.com
- // 2010/08 173.45.103.74
- '.digitalexperts.com', // by Nahum, Moshe (moshenhm at gmail.com)
-
// 2010/08 173.236.48.82
- '.anniedguesthouse.com', // by Vishnu Prasath (info dvishnu.com), link to mainalpha.com
'.ciprogram.com', // link to mainalpha.com
- '.finnfest2009.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com
- '.hietalasoldworldmeats.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com etc
- '.splendoreimport.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com
- '.sunshinetesting.com', // by Nahum, Moshe (moshenhm at gmail.com), link to mainalpha.com
- '.sports-and-concert-tickets.com', // by Nahum, Moshe (moshenhm at gmail.com), seems no link today
-
- // 2010/08 174.120.82.124
- '.blueysretreat.com', // by Nahum, Moshe (moshenhm at gmail.com), seems no link today
- '.jenurbanandthebox.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com
- '.lamborghinidenveronline.com', // by Nahum, Moshe (moshenhm at gmail.com), seems no link today
- '.buckandbb.net', // by Nahum, Moshe (moshenhm at gmail.com), link to kqzyfj.com
// 2010/08 174.132.149.98
- '.2008-national-n-scale-convention.com', // by Moshe Nahum (moshenhm at gmail.com), link to mainalpha.com
'.msgulfcoastbnbs.com', // by Michael John (cpajourney at yahoo.com), link to mainalpha.com etc
- '.segwaybykar.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com
- '.creativejuicecompetition.com', // by Nahum, Moshe (moshenhm at gmail.com), seems no link today
-
- // 2010/08 216.119.132.2
- '.kennybrown.net', // by Moshe Nahum (moshenhm at gmail.com),
-
- // 2010/08 no address today
- '.bestblackdatingonline.com', // by Nahum, Moshe (moshenhm at gmail.com)
),
+
'kouvald at gmail.com' => array( // by Vlad Kouvaldin (kouvald at gmail.com)
// 2010/08 91.205.156.73
'.analsextube247.com', // 2010/08 99.192.176.58
'.eroticandy.com', // 2010/08 99.192.176.54
),
- 'nikiforov501@mail.ru' => array(
+ 'nikiforov501 at mail.ru' => array(
// 2010/09 91.205.156.74
'.211park.com',
'.atlantajuniorthrashers.com',
$blocklist['E'] = array(
// E: Sample setting of
- // Promoters
- // (Affiliates, Hypes, Catalog retailers, Multi-level marketings, Resellers,
+ // Promotions
+ // (Phenomenons with Affiliates, Hypes, Catalog retailers, Multi-level marketings, Resellers,
// Ads, Business promotions, SEO, etc)
//
- // They often promotes near you using blog article, mail-magazines, tools(search engines, blogs, etc), etc.
+ // Promotions near you using blog article, mail-magazines, tools(search engines, blogs, etc), etc.
// Sometimes they may promote each other
+ //
+ // Please notify us about this list with reason:
+ // http://pukiwiki.sourceforge.jp/dev/?BugTrack2/342
'15-Mail.com related' => array(
'.15-mail.com', // 202.218.109.45(*.netassist.jp) by yukiyo yamamoto (sunkusu5268 at m4.ktplan.ne.jp)