<?php
-// $Id: spam.php,v 1.127 2007/03/25 13:46:43 henoheno Exp $
+// $Id: spam.php,v 1.131 2007/04/22 08:04:19 henoheno Exp $
// Copyright (C) 2006-2007 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
//
if ($destructive) {
foreach (array_keys($pickups) as $key) {
$_key = & $pickups[$key];
- $_key['scheme'] = isset($_key['scheme']) ? scheme_normalize($_key['scheme']) : '';
+ $_key['scheme'] = isset($_key['scheme']) ? scheme_normalize($_key['scheme']) : '';
$_key['host'] = isset($_key['host']) ? host_normalize($_key['host']) : '';
- $_key['port'] = isset($_key['port']) ? port_normalize($_key['port'], $_key['scheme'], FALSE) : '';
+ $_key['port'] = isset($_key['port']) ? port_normalize($_key['port'], $_key['scheme'], FALSE) : '';
$_key['path'] = isset($_key['path']) ? strtolower(path_normalize($_key['path'])) : '';
$_key['file'] = isset($_key['file']) ? file_normalize($_key['file']) : '';
$_key['query'] = isset($_key['query']) ? query_normalize($_key['query']) : '';
} else {
foreach (array_keys($pickups) as $key) {
$_key = & $pickups[$key];
- $_key['scheme'] = isset($_key['scheme']) ? scheme_normalize($_key['scheme']) : '';
- $_key['host'] = isset($_key['host']) ? strtolower($_key['host']) : '';
- $_key['port'] = isset($_key['port']) ? port_normalize($_key['port'], $_key['scheme'], FALSE) : '';
- $_key['path'] = isset($_key['path']) ? path_normalize($_key['path']) : '';
+ $_key['scheme'] = isset($_key['scheme']) ? scheme_normalize($_key['scheme']) : '';
+ $_key['host'] = isset($_key['host']) ? strtolower($_key['host']) : '';
+ $_key['port'] = isset($_key['port']) ? port_normalize($_key['port'], $_key['scheme'], FALSE) : '';
+ $_key['path'] = isset($_key['path']) ? path_normalize($_key['path']) : '';
}
}
-
return $pickups;
}
if (isset($simple_defaults[$_file])) return '';
- // [Apache 2 Content-negotiation (type-map)]
- // Roughly removing language/character-set/encoding suffixes,
- // (See Apache 2 document about 'mod_mime' and 'mod_negotiation',
- // http://www.iana.org/assignments/character-sets, RFC3066, and ISO 639))
+ // Roughly removing language/character-set/encoding suffixes
+ // References:
+ // * Apache 2 document about 'Content-negotiaton', 'mod_mime' and 'mod_negotiation'
+ // http://httpd.apache.org/docs/2.0/content-negotiation.html
+ // http://httpd.apache.org/docs/2.0/mod/mod_mime.html
+ // http://httpd.apache.org/docs/2.0/mod/mod_negotiation.html
+ // * http://www.iana.org/assignments/character-sets
+ // * RFC3066: Tags for the Identification of Languages
+ // http://www.ietf.org/rfc/rfc3066.txt
+ // * ISO 639: codes of 'language names'
$suffixes = explode('.', $_file);
$body = array_shift($suffixes);
if ($suffixes) {
- // Remove the liast .gz/.z
+ // Remove the last .gz/.z
$last_key = end(array_keys($suffixes));
if (isset($encoding_suffix[$suffixes[$last_key]])) {
unset($suffixes[$last_key]);
// [OK] nothing==&eg=dummy&eg=padding&eg=foobar => eg=foobar
function query_normalize($string = '', $equal = TRUE, $equal_cutempty = TRUE, $stortolower = TRUE)
{
+ if (! is_string($string)) return '';
if ($stortolower) $string = strtolower($string);
$array = explode('&', $string);
// 23 => ']',
);
+ if (! is_string($string)) return '';
+
$string = str_replace($from, $mid, $string); // Hide
$string = preg_quote($string, $divider);
$string = str_replace($mid, $to, $string); // Unhide
// TODO: IPv4, CIDR?, IPv6
function generate_host_regex($string = '', $divider = '/')
{
+ if (! is_string($string)) return '';
+
if (mb_strpos($string, '.') === FALSE)
return generate_glob_regex($string, $divider);
// '*.blogspot.com', // Blog services's subdomains (only)
// 'IANA-examples' => '#^(?:.*\.)?example\.(?:com|net|org)$#',
// );
- foreach(array('goodhost', 'badhost') as $_list) {
- if (! isset($blocklist[$list])) continue;
+ if (isset($blocklist['list'])) {
+ $regexs['list'] = & $blocklist['list'];
+ } else {
+ // Default
+ $blocklist['list'] = array(
+ 'goodhost' => FALSE,
+ 'badhost' => TRUE,
+ );
+ }
+ foreach(array_keys($blocklist['list']) as $_list) {
+ if (! isset($blocklist[$_list])) continue;
foreach ($blocklist[$_list] as $key => $value) {
if (is_array($value)) {
$regexs[$_list][$key] = array();
get_blocklist_add($regexs[$_list], $key, $value);
}
}
+ unset($blocklist[$_list]);
}
}
}
} else {
$array[$value] = '/^' . generate_host_regex($value, '/') . '$/i';
}
-}
+}
function is_badhost($hosts = array(), $asap = TRUE, & $remains)
{
$result = array();
if (! is_array($hosts)) $hosts = array($hosts);
foreach(array_keys($hosts) as $key) {
- if (! is_string($hosts[$key])) unset($hosts[$key]);
- }
- if (empty($hosts)) return $result;
-
- foreach (get_blocklist('goodhost') as $regex) {
- $hosts = preg_grep_invert($regex, $hosts);
+ if (! is_string($hosts[$key])) {
+ unset($hosts[$key]);
+ }
}
if (empty($hosts)) return $result;
- $tmp = array();
- foreach (get_blocklist('badhost') as $label => $regex) {
- if (is_array($regex)) {
- $result[$label] = array();
- foreach($regex as $_label => $_regex) {
- if (is_badhost_avail($_label, $_regex, $hosts, $result[$label]) && $asap) break;
+ foreach(get_blocklist('list') as $key=>$value){
+ if ($value) {
+ foreach (get_blocklist($key) as $label => $regex) {
+ if (is_array($regex)) {
+ $result[$label] = array();
+ foreach($regex as $_label => $_regex) {
+ if (is_badhost_avail($_label, $_regex, $hosts, $result[$label]) && $asap) {
+ break;
+ }
+ }
+ if (empty($result[$label])) unset($result[$label]);
+ } else {
+ if (is_badhost_avail($label, $regex, $hosts, $result) && $asap) {
+ break;
+ }
+ }
}
- if (empty($result[$label])) unset($result[$label]);
} else {
- if (is_badhost_avail($label, $regex, $hosts, $result) && $asap) break;
+ foreach (get_blocklist($key) as $regex) {
+ $hosts = preg_grep_invert($regex, $hosts);
+ }
+ if (empty($hosts)) return $result;
}
}
$remains = $hosts;
-
return $result;
}
{
$group = preg_grep($regex, $hosts);
if ($group) {
+
+ // DEBUG var_dump($group); // badhost detail
+
$result[$label] = & $group;
$hosts = array_diff($hosts, $result[$label]);
return TRUE;
if ($asap && $is_spam) return $progress;
// URI: Pickup
- $pickups = spam_uri_pickup($target, $method);
+ $pickups = uri_pickup_normalize(spam_uri_pickup($target, $method));
//$remains['uri_pickup'] = & $pickups;
// Return if ...
// URI: Uniqueness (and removing non-uniques)
if ((! $asap || ! $is_spam) && isset($method['non_uniquri'])) {
- uri_pickup_normalize($pickups);
-
$uris = array();
foreach (array_keys($pickups) as $key) {
$uris[$key] = uri_pickup_implode($pickups[$key]);