X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=spam%2Fspam.php;h=caf7d9804b86c91c1d5ed04bb79fcf3b6d0ec043;hb=79a1cc2771df77a3a19a79b5b9a06374b71daf2f;hp=3e8f5a31b6d42d92bd05c9e2354fb0978b73df01;hpb=1dfc553dd916e74636db79acc9c4550afe5e0e43;p=pukiwiki%2Fpukiwiki_sandbox.git diff --git a/spam/spam.php b/spam/spam.php index 3e8f5a3..caf7d98 100644 --- a/spam/spam.php +++ b/spam/spam.php @@ -1,5 +1,5 @@ "\'\[\]/\#?@]*)' . // 2: Userinfo (Username) '@)?' . @@ -179,7 +179,7 @@ function uri_pickup($string = '') // 3: Host '\[[0-9a-f:.]+\]' . '|' . // IPv6([colon-hex and dot]): RFC2732 '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' . // IPv4(dot-decimal): 001.22.3.44 - '[a-z0-9.-]+' . // hostname(FQDN) : foo.example.org + '[a-z0-9][a-z0-9.-]+[a-z0-9]' . // hostname(FQDN) : foo.example.org ')' . '(?::([0-9]*))?' . // 4: Port '((?:/+[^\s<>"\'\[\]/\#]+)*/+)?' . // 5: Directory path or path-info @@ -462,7 +462,7 @@ function spam_uri_pickup_preprocess($string = '') // http://victim.example.org/nasty.example.org/path#frag // => http://nasty.example.org/?refer=victim.example.org and original $string = preg_replace( - '#http://' . + '#h?ttp://' . '(' . 'ime\.nu' . '|' . // 2ch.net 'ime\.st' . '|' . // 2ch.net @@ -474,6 +474,23 @@ function spam_uri_pickup_preprocess($string = '') $string ); + // Domain exposure (gate-big5) + // http://victim.example.org/gate/big5/nasty.example.org/path + // => http://nasty.example.org/?refer=victim.example.org and original + $string = preg_replace( + '#h?ttp://' . + '(' . + 'big5.51job.com' . '|' . + 'big5.china.com' . '|' . + 'big5.xinhuanet.com' . '|' . + ')' . + '/gate/big5' . + '/([a-z0-9.%_-]+\.[a-z0-9.%_-]+)' . + '#i', // nasty.example.org + 'http://$2/?refer=$1 $0', // Preserve $0 or remove? + $string + ); + // Domain exposure (See _preg_replace_callback_domain_exposure()) $string = preg_replace_callback( array( @@ -1001,10 +1018,18 @@ function generate_host_regex($string = '', $divider = '/') function get_blocklist($list = '') { - static $regexs; + static $f_dispose = FALSE, $regexes; + + if ($list === NULL) { + $f_dispose = TRUE; + $regexes = NULL; // Unset + return array(); + } + + if (! isset($regexes)) { + if ($f_dispose === TRUE) die(__FUNCTION__ . '(): Memory already disposed'); - if (! isset($regexs)) { - $regexs = array(); + $regexes = array(); if (file_exists(SPAM_INI_FILE)) { $blocklist = array(); include(SPAM_INI_FILE); @@ -1013,7 +1038,7 @@ function get_blocklist($list = '') // 'IANA-examples' => '#^(?:.*\.)?example\.(?:com|net|org)$#', // ); if (isset($blocklist['list'])) { - $regexs['list'] = & $blocklist['list']; + $regexes['list'] = & $blocklist['list']; } else { // Default $blocklist['list'] = array( @@ -1025,12 +1050,12 @@ function get_blocklist($list = '') if (! isset($blocklist[$_list])) continue; foreach ($blocklist[$_list] as $key => $value) { if (is_array($value)) { - $regexs[$_list][$key] = array(); + $regexes[$_list][$key] = array(); foreach($value as $_key => $_value) { - get_blocklist_add($regexs[$_list][$key], $_key, $_value); + get_blocklist_add($regexes[$_list][$key], $_key, $_value); } } else { - get_blocklist_add($regexs[$_list], $key, $value); + get_blocklist_add($regexes[$_list], $key, $value); } } unset($blocklist[$_list]); @@ -1038,11 +1063,11 @@ function get_blocklist($list = '') } } - if ($list == '') { - return $regexs; // ALL - } else if (isset($regexs[$list])) { - return $regexs[$list]; - } else { + if ($list === '') { + return $regexes; // ALL + } else if (isset($regexes[$list])) { + return $regexes[$list]; + } else { return array(); } } @@ -1091,19 +1116,6 @@ function blocklist_distiller(& $hosts, $keys = array('goodhost', 'badhost'), $as return $blocked; } -// Simple example for badhost (not used now) -function is_badhost($hosts = array(), $asap = TRUE, $bool = TRUE) -{ - $list = get_blocklist('list'); - $blocked = blocklist_distiller($hosts, array_keys($list), $asap); - foreach($list as $key=>$type){ - if (! $type) unset($blocked[$key]); // Ignore goodhost etc - } - - return $bool ? ! empty($blocked) : $blocked; -} - - // Default (enabled) methods and thresholds (for content insertion) function check_uri_spam_method($times = 1, $t_area = 0, $rule = TRUE) { @@ -1218,18 +1230,14 @@ function check_uri_spam($target = '', $method = array()) if ($asap && $is_spam) break; // Merge only - $blocked = array_merge_leaves($blocked, $_progress['blocked'], FALSE, FALSE); - $hosts = array_merge_leaves($hosts, $_progress['hosts'], FALSE, FALSE); + $blocked = array_merge_recursive($blocked, $_progress['blocked']); + $hosts = array_merge_recursive($hosts, $_progress['hosts']); } // Unique values $blocked = array_unique_recursive($blocked); $hosts = array_unique_recursive($hosts); - // Renumber numeric keys - array_renumber_numeric_keys($blocked); - array_renumber_numeric_keys($hosts); - // Recount $sum['badhost'] $sum['badhost'] = array_count_leaves($blocked); @@ -1380,68 +1388,50 @@ function array_count_leaves($array = array(), $count_empty = FALSE) return $count; } -// Merge two leaves' value -function array_merge_leaves(& $array1, & $array2, $unique_values = TRUE, $renumber_numeric = TRUE) -{ - $array = array_merge_recursive($array1, $array2); - - // Redundant values (and keys) are vanished - if ($unique_values) $array = array_unique_recursive($array); - - // All NUMERIC keys are always renumbered from 0 - if ($renumber_numeric) array_renumber_numeric_keys($array); - - return $array; -} - -// Shrink array('key' => array('key')) to array('key') (Not used now) -function array_shrink_leaves(& $array) +// An array-leaves to a flat array +function array_flat_leaves($array, $unique = TRUE) { if (! is_array($array)) return $array; - foreach($array as $key => $value){ - // Recurse. Removing more leaves beforehand - if (is_array($value)) array_shrink_leaves($array[$key]); - } - $tmp = array(); - foreach($array as $key => $value){ - if (is_array($value)) { - $count = count($value); - if ($count == 1 && current($value) == $key) { - unset($array[$key]); - $array[] = $key; + foreach(array_keys($array) as $key) { + if (is_array($array[$key])) { + // Recurse + foreach(array_flat_leaves($array[$key]) as $_value) { + $tmp[] = $_value; } + } else { + $tmp[] = & $array[$key]; } } - return $array; + return $unique ? array_values(array_unique($tmp)) : $tmp; } -// array-leave to flat array() (with unique) -function array_flat_leaves($array) +// An array() to an array leaf +function array_leaf($array = array('A', 'B', 'C.D'), $stem = FALSE, $edge = array()) { - //var_dump($array); - if (! is_array($array)) return $array; - - $tmp = array(); - foreach($array as $key => $value) { - if (is_array($value)) { - foreach(array_flat_leaves($value) as $_value) { - $tmp[$_value] = TRUE; - } - } else { - $tmp[$value] = TRUE; - } + $leaf = array(); + $tmp = & $leaf; + foreach($array as $arg) { + if (! is_string($arg) && ! is_int($arg)) continue; + $tmp[$arg] = array(); + $parent = & $tmp; + $tmp = & $tmp[$arg]; + } + if ($stem) { + $parent[key($parent)] = & $edge; + } else { + $parent = key($parent); } - return array_keys($tmp); + return $leaf; // array('A' => array('B' => 'C.D')) } + // --------------------- // Reporting -// TODO: Don't show unused $method! // Summarize $progress (blocked only) function summarize_spam_progress($progress = array(), $blockedonly = FALSE) { @@ -1464,13 +1454,14 @@ function summarize_spam_progress($progress = array(), $blockedonly = FALSE) function summarize_detail_badhost($progress = array()) { - if (! isset($progress['blocked'])) return ''; + if (! isset($progress['blocked']) || empty($progress['blocked'])) return ''; + // Flat per group $blocked = array(); foreach($progress['blocked'] as $list => $lvalue) { foreach($lvalue as $group => $gvalue) { $flat = implode(', ', array_flat_leaves($gvalue)); - if ($flat == $group) { + if ($flat === $group) { $blocked[$list][] = $flat; } else { $blocked[$list][$group] = $flat; @@ -1478,6 +1469,17 @@ function summarize_detail_badhost($progress = array()) } } + // Shrink per list + // From: 'A-1' => array('ie.to') + // To: 'A-1' => 'ie.to' + foreach($blocked as $list => $lvalue) { + if (is_array($lvalue) && + count($lvalue) == 1 && + is_numeric(key($lvalue))) { + $blocked[$list] = current($lvalue); + } + } + return var_export_shrink($blocked, TRUE, TRUE); } @@ -1487,14 +1489,66 @@ function summarize_detail_newtral($progress = array()) ! is_array($progress['hosts']) || empty($progress['hosts'])) return ''; - // Sort by domain - $tmp = array(); - foreach($progress['hosts'] as $value) { - $tmp[delimiter_reverse($value)] = $value; + $result = ''; + if (FALSE) { + // Sort by domain + $tmp = array(); + foreach($progress['hosts'] as $value) { + $tmp[delimiter_reverse($value)] = $value; + } + ksort($tmp, SORT_STRING); + $result = count($tmp) . ' (' .implode(', ', $tmp) . ')'; + } else { + $tmp = array(); + foreach($progress['hosts'] as $value) { + $tmp = array_merge_recursive( + $tmp, + array_leaf(explode('.', delimiter_reverse($value) . '.'), TRUE, $value) + ); + } + ksort($tmp, SORT_STRING); + + separate_and_joinkey_leaves($tmp, '.', TRUE, TRUE); + separate_and_joinkey_leaves($tmp, '.', TRUE, FALSE); + separate_and_joinkey_leaves($tmp, '.', TRUE, FALSE); + //separate_and_joinkey_leaves($tmp, '.', TRUE, FALSE); + + foreach($tmp as $key => $value) { + if (is_array($value)) { + ksort($tmp[$key]); + // $tmp[$key] = implode(', ', array_flat_leaves($value)); + } + } + + $result = var_export_shrink($tmp, TRUE, TRUE); } - ksort($tmp); - return count($tmp) . ' (' .implode(', ', $tmp) . ')'; + return $result; +} + +function separate_and_joinkey_leaves( + & $array, // array('A' => array('B' => 'C.D')), + $delim = '.', $reversejoin = FALSE, $allowmulti = FALSE) +{ + if (! is_array($array)) return $array; + + $result = array(); + foreach(array_keys($array) as $key) { + if (! is_array($array[$key]) || (! $allowmulti && count($array[$key]) > 1)) { + $result[$key] = & $array[$key]; // Do nothing + } else { + foreach(array_keys($array[$key]) as $_key) { + $joinkey = $reversejoin ? + $_key . $delim . $key : + $key . $delim . $_key; + $result[$joinkey] = & $array[$key][$_key]; + } + } + } + + $array = & $result; + + return $result; // array('A.B' => 'C.D') } @@ -1505,8 +1559,14 @@ function summarize_detail_newtral($progress = array()) // NOTE: Call this function from various blocking feature, to disgueise the reason 'why blocked' function spam_exit($mode = '', $data = array()) { + // Dispose + get_blocklist(NULL); + + $exit = TRUE; switch ($mode) { - case '': echo("\n"); break; + case '': + echo("\n"); + break; case 'dump': echo('
' . "\n");
 			echo htmlspecialchars(var_export($data, TRUE));
@@ -1514,8 +1574,7 @@ function spam_exit($mode = '', $data = array())
 			break;
 	};
 
-	// Force exit
-	exit;
+	if ($exit) exit;	// Force exit
 }