<?php
-// $Id: spam.php,v 1.197 2007/07/03 14:51:07 henoheno Exp $
+// $Id: spam.php,v 1.202 2007/08/18 09:10:58 henoheno Exp $
// Copyright (C) 2006-2007 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
//
return $count;
}
+// Merge two leaves
+// Similar to PHP array_merge_leaves(), except strictly preserving keys as string
+function array_merge_leaves($array1, $array2, $sort_keys = TRUE)
+{
+ // Array(s) only
+ $is_array1 = is_array($array1);
+ $is_array2 = is_array($array2);
+ if ($is_array1) {
+ if ($is_array2) {
+ ; // Pass
+ } else {
+ return $array1;
+ }
+ } else if ($is_array2) {
+ return $array2;
+ } else {
+ return $array2; // Not array ($array1 is overwritten)
+ }
+
+ $keys_all = array_merge(array_keys($array1), array_keys($array2));
+ if ($sort_keys) sort($keys_all, SORT_STRING);
+
+ $result = array();
+ foreach($keys_all as $key) {
+ $isset1 = isset($array1[$key]);
+ $isset2 = isset($array2[$key]);
+ if ($isset1 && $isset2) {
+ // Recurse
+ $result[$key] = array_merge_leaves($array1[$key], $array2[$key], $sort_keys);
+ } else if ($isset1) {
+ $result[$key] = & $array1[$key];
+ } else {
+ $result[$key] = & $array2[$key];
+ }
+ }
+ return $result;
+}
+
// An array-leaves to a flat array
function array_flat_leaves($array, $unique = TRUE)
{
if (! is_array($array) || ! is_array($keys)) return FALSE;
// Nondestructive test
- if (! $force)
- foreach(array_keys($keys) as $from)
- if (! isset($array[$from]))
+ if (! $force) {
+ foreach(array_keys($keys) as $from) {
+ if (! isset($array[$from])) {
return FALSE;
+ }
+ }
+ }
foreach($keys as $from => $to) {
if ($from === $to) continue;
if (mb_strpos($string, '.') === FALSE)
return generate_glob_regex($string, $divider);
- $result = '';
if (is_ip($string)) {
// IPv4
return generate_glob_regex($string, $divider);
if (file_exists(SPAM_INI_FILE)) {
$blocklist = array();
include(SPAM_INI_FILE);
+ // $blocklist['list'] = array(
+ // //'goodhost' => FALSE;
+ // 'badhost' => TRUE;
+ // );
// $blocklist['badhost'] = array(
// '*.blogspot.com', // Blog services's subdomains (only)
// 'IANA-examples' => '#^(?:.*\.)?example\.(?:com|net|org)$#',
// );
- if (isset($blocklist['list'])) {
- $regexes['list'] = & $blocklist['list'];
- } else {
- // Default
- $blocklist['list'] = array(
- 'goodhost' => FALSE,
- 'badhost' => TRUE,
- );
- }
- foreach(array_keys($blocklist['list']) as $_list) {
- if (! isset($blocklist[$_list])) continue;
- foreach ($blocklist[$_list] as $key => $value) {
- if (is_array($value)) {
- $regexes[$_list][$key] = array();
- foreach($value as $_key => $_value) {
- get_blocklist_add($regexes[$_list][$key], $_key, $_value);
+ foreach(array('pre', 'list') as $special) {
+ if (! isset($blocklist[$special])) continue;
+ $regexes[$special] = $blocklist[$special];
+ foreach(array_keys($blocklist[$special]) as $_list) {
+ if (! isset($blocklist[$_list])) continue;
+ foreach ($blocklist[$_list] as $key => $value) {
+ if (is_array($value)) {
+ $regexes[$_list][$key] = array();
+ foreach($value as $_key => $_value) {
+ get_blocklist_add($regexes[$_list][$key], $_key, $_value);
+ }
+ } else {
+ get_blocklist_add($regexes[$_list], $key, $value);
}
- } else {
- get_blocklist_add($regexes[$_list], $key, $value);
}
+ unset($blocklist[$_list]);
}
- unset($blocklist[$_list]);
}
}
}
),
);
+ // ----------------------------------------
// Aliases
+
$sum = & $progress['sum'];
$is_spam = & $progress['is_spam'];
$progress['method'] = & $method; // Argument
$hosts = & $progress['hosts'];
$asap = isset($method['asap']);
+ // ----------------------------------------
// Init
+
if (! is_array($method) || empty($method)) {
$method = check_uri_spam_method();
}
}
if (! isset($sum['quantity'])) $sum['quantity'] = 0;
+ // ----------------------------------------
+ // Recurse
+
if (is_array($target)) {
foreach($target as $str) {
if (! is_string($str)) continue;
if ($asap && $is_spam) break;
// Merge only
- $blocked = array_merge_recursive($blocked, $_progress['blocked']);
- $hosts = array_merge_recursive($hosts, $_progress['hosts']);
+ $blocked = array_merge_leaves($blocked, $_progress['blocked'], FALSE);
+ $hosts = array_merge_leaves($hosts, $_progress['hosts'], FALSE);
}
// Unique values
return $progress;
}
+ // ----------------------------------------
+ // Area measure
+
// Area: There's HTML anchor tag
if ((! $asap || ! $is_spam) && isset($method['area_anchor'])) {
$key = 'area_anchor';
// Return if ...
if ($asap && $is_spam) return $progress;
+ // ----------------------------------------
// URI: Pickup
+
$pickups = uri_pickup_normalize(spam_uri_pickup($target, $method));
+ $hosts = array();
+ foreach ($pickups as $key => $pickup) {
+ $hosts[$key] = & $pickup['host'];
+ }
// Return if ...
if (empty($pickups)) return $progress;
+ // ----------------------------------------
+ // URI: Bad host <pre-filter> (Separate good/bad hosts from $hosts)
+
+ if ((! $asap || ! $is_spam) && isset($method['badhost'])) {
+ $list = get_blocklist('pre');
+ $blocked = blocklist_distiller($hosts, array_keys($list), $asap);
+ foreach($list as $key=>$type){
+ if (! $type) unset($blocked[$key]); // Ignore goodhost etc
+ }
+ unset($list);
+ if (! empty($blocked)) $is_spam['badhost'] = TRUE;
+ }
+
+ // Return if ...
+ if ($asap && $is_spam) return $progress;
+
+ // Remove blocked from $pickups
+ foreach(array_keys($pickups) as $key) {
+ if (! isset($hosts[$key])) {
+ unset($pickups[$key]);
+ }
+ }
+
+ // ----------------------------------------
// URI: Check quantity
+
$sum['quantity'] += count($pickups);
// URI quantity
if ((! $asap || ! $is_spam) && isset($method['quantity']) &&
$is_spam['quantity'] = TRUE;
}
+ // ----------------------------------------
// URI: used inside HTML anchor tag pair
+
if ((! $asap || ! $is_spam) && isset($method['uri_anchor'])) {
$key = 'uri_anchor';
foreach($pickups as $pickup) {
}
}
+ // ----------------------------------------
// URI: used inside 'BBCode' pair
+
if ((! $asap || ! $is_spam) && isset($method['uri_bbcode'])) {
$key = 'uri_bbcode';
foreach($pickups as $pickup) {
}
}
+ // ----------------------------------------
// URI: Uniqueness (and removing non-uniques)
+
if ((! $asap || ! $is_spam) && isset($method['non_uniquri'])) {
$uris = array();
// Return if ...
if ($asap && $is_spam) return $progress;
+ // ----------------------------------------
// Host: Uniqueness (uniq / non-uniq)
- foreach ($pickups as $pickup) $hosts[] = & $pickup['host'];
+
$hosts = array_unique($hosts);
- $sum['uniqhost'] += count($hosts);
+
+ if (isset($sum['uniqhost'])) $sum['uniqhost'] += count($hosts);
if ((! $asap || ! $is_spam) && isset($method['non_uniqhost'])) {
$sum['non_uniqhost'] = $sum['quantity'] - $sum['uniqhost'];
if ($sum['non_uniqhost'] > $method['non_uniqhost']) {
// Return if ...
if ($asap && $is_spam) return $progress;
+ // ----------------------------------------
// URI: Bad host (Separate good/bad hosts from $hosts)
- if ((! $asap || ! $is_spam) && isset($method['badhost'])) {
- // is_badhost()
- $list = get_blocklist('list');
- $blocked = blocklist_distiller($hosts, array_keys($list), $asap);
+ if ((! $asap || ! $is_spam) && isset($method['badhost'])) {
+ $list = get_blocklist('list');
+ $blocked = array_merge_leaves(
+ $blocked,
+ blocklist_distiller($hosts, array_keys($list), $asap),
+ FALSE
+ );
foreach($list as $key=>$type){
if (! $type) unset($blocked[$key]); // Ignore goodhost etc
}
unset($list);
-
if (! empty($blocked)) $is_spam['badhost'] = TRUE;
}
+ // Return if ...
+ //if ($asap && $is_spam) return $progress;
+
+ // ----------------------------------------
+ // End
+
return $progress;
}
} else {
$rest = rtrim(substr($value, 0, - strlen($resp)), '.'); // 'A.foo.bar'
}
- $trie = array_merge_recursive($trie, array($resp => array($rest => NULL)));
+ $trie = array_merge_leaves($trie, array($resp => array($rest => NULL)), FALSE);
}
// Format: var_export_shrink() -like output