<?php
-// $Id: spam.php,v 1.178 2007/06/16 03:23:34 henoheno Exp $
+// $Id: spam.php,v 1.182 2007/06/16 13:38:00 henoheno Exp $
// Copyright (C) 2006-2007 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
//
// References:
// http://www.freebsd.org/cgi/man.cgi?query=strings (Man-page of GNU strings)
// http://www.pcre.org/pcre.txt
-function strings($binary = '', $min_len = 4, $ignore_space = FALSE, $multibyte = TRUE)
+// Note: mb_ereg_replace() is one of mbstring extension's functions
+// and need to init its encoding.
+function strings($binary = '', $min_len = 4, $ignore_space = FALSE, $multibyte = FALSE)
{
// String only
$binary = (is_array($binary) || $binary === TRUE) ? '' : strval($binary);
}
// Removing sequential spaces and too short lines
- $binary = strings($binary, $length, TRUE, TRUE);
+ $binary = strings($binary, $length, TRUE, FALSE); // Multibyte NOT needed
- // Words between spaces
- $binary = preg_replace('/[ \t][\w \t]+[ \t]/', ' ', $binary);
+ // Remove words (has no '<>[]:') between spaces
+ $binary = preg_replace('/[ \t][\w.,()\ \t]+[ \t]/', ' ', $binary);
return $binary;
}
if (! is_string($string)) return '';
$string = spam_uri_removing_hocus_pocus(rawurldecode($string), $method);
+ //var_dump(htmlspecialchars($string));
// Domain exposure (simple)
// http://victim.example.org/nasty.example.org/path#frag
$trie = array();
foreach($progress['hosts'] as $value) {
- // Try to shorten (pre) -- array('example.com', 'bar', 'foo')
+ // array('example.com', 'bar', 'foo')
$resp = whois_responsibility($value); // 'example.com'
$rest = rtrim(substr($value, 0, - strlen($resp)), '.'); // 'foo.bar'
if ($rest) {
);
}
- // Try to shorten (post, non-recursive) -- 'foo.bar.example.com'
- array_joinbranch_leaf($trie, '.', 0, TRUE);
-
// Sort and flatten -- 'A.foo.bar.example.com, B.foo.bar.example.com'
foreach(array_keys($trie) as $key) {
if (is_array($trie[$key])) {
ksort_by_domain($trie[$key]);
+
$trie[$key] = implode(', ', array_flat_leaves($trie[$key]));
+ //$trie[$key] = var_export($trie[$key], TRUE); // DEBUG
}
}
- // TODO: ltrim('.') from $trie
-
ksort_by_domain($trie);
- // TODO: from array('foobar' => 'foobar') to 'foobar'
+ // Format: From array('foobar' => 'foobar') to 'foobar'
+ $tmp = array();
+ foreach($trie as $key => $value) {
+ $tmp[] = ' \'' .
+ (($trie[$key] == $key) ? $key : $key . '\' => \'' . $trie[$key])
+ . '\',';
+ unset($trie[$key]);
+ }
- return var_export_shrink($trie, TRUE, TRUE);
+ return 'array (' . "\n" . implode("\n", $tmp) . "\n" . ')';
}
// ksort() by domain
$array = $result;
}
-// array('F' => array('B' => array('C' => array('d' => array('' => 'foobar')))))
-// to
-// array('F.B.C.d.' => 'foobar')
-function array_joinbranch_leaf(& $array, $delim = '.', $limit = 0, $reverse = FALSE)
-{
- $result = array();
- if (! is_array($array)) return $result; // Nothing to do
-
- $limit = max(0, intval($limit));
- $cstack = array();
-
- foreach(array_keys($array) as $key) {
- $kstack = array();
- $k = -1;
-
- $single = array($key => & $array[$key]); // Keep it single
- $cursor = & $single;
- while(is_array($cursor) && count($cursor) == 1) { // Once
- ++$k;
- $kstack[] = key($cursor);
- $cursor = & $cursor[$kstack[$k]];
- if ($limit != 0 && $k == $limit) break;
- }
-
- // Relink
- if ($k != 0) {
- if ($reverse) $kstack = array_reverse($kstack);
- $joinkey = implode($delim, $kstack);
-
- unset($array[$key]);
- $array[$joinkey] = & $cursor;
- $result[$joinkey] = $k + 1; // Key seems not an single array => joined length
- }
- }
-
- return $result;
-}
-
-
// Check responsibility-root of the FQDN
// 'foo.bar.example.com' => 'example.com' (.com has the last whois for it)
// 'foo.bar.example.au' => 'example.au' (.au has the last whois for it)
if (empty($progress['is_spam'])) {
spam_dispose();
} else {
- $target = string($target, 0); // Removing "\0" etc
+
+// TODO: detect encoding from $target for mbstring functions
+// $tmp = array();
+// foreach(array_keys($target) as $key) {
+// $tmp[strings($key, 0, FALSE, TRUE)] = strings($target[$key], 0, FALSE, TRUE); // Removing "\0" etc
+// }
+// $target = & $tmp;
+
pkwk_spamnotify($action, $page, $target, $progress, $method);
spam_exit($exitmode, $progress);
}