HTML tag
//'uri_bbcode' => $t_area, // URI inside [url] or [link] BBCode
);
if ($rule) {
$bool = array(
// Rules
//'asap' => TRUE, // Quit or return As Soon As Possible
'uniqhost' => TRUE, // Show uniq host (at block notification mail)
'badhost' => TRUE, // Check badhost
);
} else {
$bool = array();
}
// Remove non-$positive values
foreach (array_keys($positive) as $key) {
if ($positive[$key] < 0) unset($positive[$key]);
}
return $positive + $bool;
}
// Simple/fast spam check
function check_uri_spam($target = '', $method = array())
{
// Return value
$progress = array(
'method' => array(
// Theme to do => Dummy, optional value, or optional array()
//'quantity' => 8,
//'uniqhost' => TRUE,
//'non_uniqhost'=> 3,
//'non_uniquri' => 3,
//'badhost' => TRUE,
//'area_anchor' => 0,
//'area_bbcode' => 0,
//'uri_anchor' => 0,
//'uri_bbcode' => 0,
),
'sum' => array(
// Theme => Volume found (int)
),
'is_spam' => array(
// Flag. If someting defined here,
// one or more spam will be included
// in this report
),
'blocked' => array(
// Hosts blocked
//'category' => array(
// 'host',
//)
),
'hosts' => array(
// Hosts not blocked
),
);
// Aliases
$sum = & $progress['sum'];
$is_spam = & $progress['is_spam'];
$progress['method'] = & $method; // Argument
$blocked = & $progress['blocked'];
$hosts = & $progress['hosts'];
$asap = isset($method['asap']);
// Init
if (! is_array($method) || empty($method)) {
$method = check_uri_spam_method();
}
foreach(array_keys($method) as $key) {
if (! isset($sum[$key])) $sum[$key] = 0;
}
if (is_array($target)) {
foreach($target as $str) {
if (! is_string($str)) continue;
$_progress = check_uri_spam($str, $method); // Recurse
// Merge $sum
$_sum = & $_progress['sum'];
foreach (array_keys($_sum) as $key) {
if (! isset($sum[$key])) {
$sum[$key] = & $_sum[$key];
} else {
$sum[$key] += $_sum[$key];
}
}
// Merge $is_spam
$_is_spam = & $_progress['is_spam'];
foreach (array_keys($_is_spam) as $key) {
$is_spam[$key] = TRUE;
if ($asap) break;
}
if ($asap && $is_spam) break;
// Merge only
$blocked = array_merge_recursive($blocked, $_progress['blocked']);
$hosts = array_merge_recursive($hosts, $_progress['hosts']);
}
// Unique values
$blocked = array_unique_recursive($blocked);
$hosts = array_unique_recursive($hosts);
// Recount $sum['badhost']
$sum['badhost'] = array_count_leaves($blocked);
return $progress;
}
// Area: There's HTML anchor tag
if ((! $asap || ! $is_spam) && isset($method['area_anchor'])) {
$key = 'area_anchor';
$_asap = isset($method['asap']) ? array('asap' => TRUE) : array();
$result = area_pickup($target, array($key => TRUE) + $_asap);
if ($result) {
$sum[$key] = $result[$key];
if (isset($method[$key]) && $sum[$key] > $method[$key]) {
$is_spam[$key] = TRUE;
}
}
}
// Area: There's 'BBCode' linking tag
if ((! $asap || ! $is_spam) && isset($method['area_bbcode'])) {
$key = 'area_bbcode';
$_asap = isset($method['asap']) ? array('asap' => TRUE) : array();
$result = area_pickup($target, array($key => TRUE) + $_asap);
if ($result) {
$sum[$key] = $result[$key];
if (isset($method[$key]) && $sum[$key] > $method[$key]) {
$is_spam[$key] = TRUE;
}
}
}
// Return if ...
if ($asap && $is_spam) return $progress;
// URI: Pickup
$pickups = uri_pickup_normalize(spam_uri_pickup($target, $method));
// Return if ...
if (empty($pickups)) return $progress;
// URI: Check quantity
$sum['quantity'] += count($pickups);
// URI quantity
if ((! $asap || ! $is_spam) && isset($method['quantity']) &&
$sum['quantity'] > $method['quantity']) {
$is_spam['quantity'] = TRUE;
}
// URI: used inside HTML anchor tag pair
if ((! $asap || ! $is_spam) && isset($method['uri_anchor'])) {
$key = 'uri_anchor';
foreach($pickups as $pickup) {
if (isset($pickup['area'][$key])) {
$sum[$key] += $pickup['area'][$key];
if(isset($method[$key]) &&
$sum[$key] > $method[$key]) {
$is_spam[$key] = TRUE;
if ($asap && $is_spam) break;
}
if ($asap && $is_spam) break;
}
}
}
// URI: used inside 'BBCode' pair
if ((! $asap || ! $is_spam) && isset($method['uri_bbcode'])) {
$key = 'uri_bbcode';
foreach($pickups as $pickup) {
if (isset($pickup['area'][$key])) {
$sum[$key] += $pickup['area'][$key];
if(isset($method[$key]) &&
$sum[$key] > $method[$key]) {
$is_spam[$key] = TRUE;
if ($asap && $is_spam) break;
}
if ($asap && $is_spam) break;
}
}
}
// URI: Uniqueness (and removing non-uniques)
if ((! $asap || ! $is_spam) && isset($method['non_uniquri'])) {
$uris = array();
foreach (array_keys($pickups) as $key) {
$uris[$key] = uri_pickup_implode($pickups[$key]);
}
$count = count($uris);
$uris = array_unique($uris);
$sum['non_uniquri'] += $count - count($uris);
if ($sum['non_uniquri'] > $method['non_uniquri']) {
$is_spam['non_uniquri'] = TRUE;
}
if (! $asap || ! $is_spam) {
foreach (array_diff(array_keys($pickups),
array_keys($uris)) as $remove) {
unset($pickups[$remove]);
}
}
unset($uris);
}
// Return if ...
if ($asap && $is_spam) return $progress;
// Host: Uniqueness (uniq / non-uniq)
foreach ($pickups as $pickup) $hosts[] = & $pickup['host'];
$hosts = array_unique($hosts);
$sum['uniqhost'] += count($hosts);
if ((! $asap || ! $is_spam) && isset($method['non_uniqhost'])) {
$sum['non_uniqhost'] = $sum['quantity'] - $sum['uniqhost'];
if ($sum['non_uniqhost'] > $method['non_uniqhost']) {
$is_spam['non_uniqhost'] = TRUE;
}
}
// Return if ...
if ($asap && $is_spam) return $progress;
// URI: Bad host (Separate good/bad hosts from $hosts)
if ((! $asap || ! $is_spam) && isset($method['badhost'])) {
// is_badhost()
$list = get_blocklist('list');
$blocked = blocklist_distiller($hosts, array_keys($list), $asap);
foreach($list as $key=>$type){
if (! $type) unset($blocked[$key]); // Ignore goodhost etc
}
unset($list);
if (! empty($blocked)) $is_spam['badhost'] = TRUE;
}
return $progress;
}
// Count leaves (A leaf = value that is not an array, or an empty array)
function array_count_leaves($array = array(), $count_empty = FALSE)
{
if (! is_array($array) || (empty($array) && $count_empty)) return 1;
// Recurse
$count = 0;
foreach ($array as $part) {
$count += array_count_leaves($part, $count_empty);
}
return $count;
}
// An array-leaves to a flat array
function array_flat_leaves($array, $unique = TRUE)
{
if (! is_array($array)) return $array;
$tmp = array();
foreach(array_keys($array) as $key) {
if (is_array($array[$key])) {
// Recurse
foreach(array_flat_leaves($array[$key]) as $_value) {
$tmp[] = $_value;
}
} else {
$tmp[] = & $array[$key];
}
}
return $unique ? array_values(array_unique($tmp)) : $tmp;
}
// An array() to an array leaf
function array_leaf($array = array('A', 'B', 'C.D'), $stem = FALSE, $edge = TRUE)
{
if (! is_array($array)) return $array;
$leaf = array();
$tmp = & $leaf;
foreach($array as $arg) {
if (! is_string($arg) && ! is_int($arg)) continue;
$tmp[$arg] = array();
$parent = & $tmp;
$tmp = & $tmp[$arg];
}
if ($stem) {
$parent[key($parent)] = & $edge;
} else {
$parent = key($parent);
}
return $leaf; // array('A' => array('B' => 'C.D'))
}
// ---------------------
// Reporting
// Summarize $progress (blocked only)
function summarize_spam_progress($progress = array(), $blockedonly = FALSE)
{
if ($blockedonly) {
$tmp = array_keys($progress['is_spam']);
} else {
$tmp = array();
$method = & $progress['method'];
if (isset($progress['sum'])) {
foreach ($progress['sum'] as $key => $value) {
if (isset($method[$key]) && $value) {
$tmp[] = $key . '(' . $value . ')';
}
}
}
}
return implode(', ', $tmp);
}
function summarize_detail_badhost($progress = array())
{
if (! isset($progress['blocked']) || empty($progress['blocked'])) return '';
// Flat per group
$blocked = array();
foreach($progress['blocked'] as $list => $lvalue) {
foreach($lvalue as $group => $gvalue) {
$flat = implode(', ', array_flat_leaves($gvalue));
if ($flat === $group) {
$blocked[$list][] = $flat;
} else {
$blocked[$list][$group] = $flat;
}
}
}
// Shrink per list
// From: 'A-1' => array('ie.to')
// To: 'A-1' => 'ie.to'
foreach($blocked as $list => $lvalue) {
if (is_array($lvalue) &&
count($lvalue) == 1 &&
is_numeric(key($lvalue))) {
$blocked[$list] = current($lvalue);
}
}
return var_export_shrink($blocked, TRUE, TRUE);
}
function summarize_detail_newtral($progress = array())
{
if (! isset($progress['hosts']) ||
! is_array($progress['hosts']) ||
empty($progress['hosts'])) return '';
$result = '';
if (FALSE) {
// Sort by domain
$tmp = array();
foreach($progress['hosts'] as $value) {
$tmp[delimiter_reverse($value)] = $value;
}
ksort($tmp, SORT_STRING);
$result = count($tmp) . ' (' .implode(', ', $tmp) . ')';
} else {
$tmp = array();
foreach($progress['hosts'] as $value) {
$tmp = array_merge_recursive(
$tmp,
array_leaf(explode('.', delimiter_reverse($value)), TRUE, $value)
);
}
//var_dump($tmp);
// TODO: IP address 1.2.3.4 => "0"-3-2-1 by array_shrinkbranch_leaves()
array_shrinkbranch_leaves($tmp, '.', TRUE); // "domain.tld"
array_joinbranch_leaf($tmp, '.', 0, TRUE);
foreach($tmp as $key => $value) {
if (is_array($value)) {
ksort($tmp[$key], SORT_STRING);
$tmp[$key] = implode(', ', array_flat_leaves($value));
}
}
ksort($tmp, SORT_STRING);
$result = var_export_shrink($tmp, TRUE, TRUE);
}
return $result;
}
// array('F' => array('B' => array('C' => array('d' => array('' => 'foobar')))))
// to
// array('F.B.C.d.' => 'foobar')
function array_joinbranch_leaf(& $array, $delim = '.', $limit = 0, $reverse = FALSE)
{
$result = array();
if (! is_array($array)) return $result; // Nothing to do
$limit = max(0, intval($limit));
$cstack = array();
foreach(array_keys($array) as $key) {
$kstack = array();
$k = -1;
$single = array($key => & $array[$key]); // Keep it single
$cursor = & $single;
while(is_array($cursor) && count($cursor) == 1) { // Once
++$k;
$kstack[] = key($cursor);
$cursor = & $cursor[$kstack[$k]];
if ($limit != 0 && $k == $limit) break;
}
// Relink
if ($k != 0) {
if ($reverse) $kstack = array_reverse($kstack);
$joinkey = implode($delim, $kstack);
unset($array[$key]);
$array[$joinkey] = & $cursor;
$result[$joinkey] = $k + 1; // Key seems not an single array => joined length
}
}
return $result;
}
// array('A' => array('B' => 'C')) to
// array('A.B' => 'C')
// array(
// 'A' => array(
// 'B' => array(
// 'C' => array(
// 'D' => '1'
// ),
// ),
// ),
// 'G' => array(
// 'H' => '2'
// ),
// )
// to
// array (
// 'G.H' => '2',
// 'A.B.C.D' => '1',
// )
function array_shrinkbranch_leaves(& $array, $delim = '.', $reverse = FALSE, $recurse = FALSE)
{
$result = 0;
if (! is_array($array) || empty($array)) return $result;
foreach(array_keys($array) as $key) {
$branch = & $array[$key];
if (! is_array($branch) || empty($branch)) continue;
foreach(array_keys($branch) as $bkey) {
$joinkey = $reverse ?
$bkey . $delim . $key :
$key . $delim . $bkey;
$array[$joinkey] = & $branch[$bkey];
unset($array[$key]);
++$result;
}
}
// Rescan (Recurse)
if ($recurse && $result) {
$result = array_shrinkbranch_leaves($array, $delim, $reverse, $recurse);
}
return $result; // Tell me how many
}
//$a = array (
// 'edu' => array (
// 'berkeley' => array (
// 'polisci' => array (
// '' => 'polisci.berkeley.edu',
// ),
// ),
// 'cmich' => array (
// 'rso' => array (
// '' => 'rso.cmich.edu',
// ),
// ),
// ),
//);
//array_shrinkbranch_leaves($a, '.', TRUE);
//var_export($a);
//$a = array (
// '4' => array (
// '5' => array (
// '6' => array (
// '' => '7.8.9',
// ),
// ),
// ),
//);
//array_shrinkbranch_leaves($a, '.', TRUE);
//var_export($a);
function domain_responsibility($fqdn = 'fqdn.example.com', $implicit = TRUE)
{
// Domains who have 2nd and/or 3rd level domains
static $domain = array(
// ccTLD Australia http://www.auda.org.au/ http://www.aunic.net/ http://www.ausregistry.com.au/
'au' => array(
// .au Second Level Domains http://www.auda.org.au/domains/
'asn' => TRUE,
'com' => TRUE,
'conf' => TRUE,
'csiro' => TRUE,
'edu' => array( // http://www.domainname.edu.au/
'act' => TRUE,
'nt' => TRUE,
'nsw' => TRUE,
'qld' => TRUE,
'sa' => TRUE,
'tas' => TRUE,
'vic' => TRUE,
'wa' => TRUE,
),
'gov' => array(
'act' => TRUE, // Australian Capital Territory
'nt' => TRUE, // Northern Territory
'nsw' => TRUE, // New South Wales
'qld' => TRUE, // Queensland
'sa' => TRUE, // South Australia
'tas' => TRUE, // Tasmania
'vic' => TRUE, // Victoria
'wa' => TRUE, // Western Australia
),
'id' => TRUE,
'net' => TRUE,
'org' => TRUE,
'info' => TRUE,
),
// ccTLD Japan http://jprs.co.jp/en/ http://whois.jprs.jp/en/
'jp' => array(
// http://jprs.co.jp/en/jpdomain.html
'ac' => TRUE,
'ad' => TRUE,
'co' => TRUE,
'geo' => TRUE,
'go' => TRUE,
'gr' => TRUE,
'lg' => TRUE,
'ne' => TRUE,
'or' => TRUE,
),
// ccTLD Ukraine http://www.nic.net.ua/ http://whois.com.ua/
'ua' => array(
'cherkassy' => TRUE, // www.cherkassy.ua
'chernigov' => TRUE,
'chernovtsy' => TRUE,
'ck' => TRUE,
'cn' => TRUE,
'com' => TRUE,
'crimea' => TRUE,
'cv' => TRUE,
'dn' => TRUE,
'dnepropetrovsk' => TRUE,
'donetsk' => TRUE,
'dp' => TRUE,
'edu' => TRUE,
'gov' => TRUE,
'if' => TRUE,
'ivano-frankivsk' => TRUE,
'kh' => TRUE,
'kharkov' => TRUE,
'kherson' => TRUE,
'kiev' => TRUE,
'kirovograd' => TRUE,
'km' => TRUE,
'kr' => TRUE,
'ks' => TRUE,
'lg' => TRUE,
'lugansk' => TRUE,
'lutsk' => TRUE,
'lviv' => TRUE,
'mk' => TRUE,
'net' => TRUE,
'nikolaev' => TRUE,
'od' => TRUE,
'odessa' => TRUE,
'org' => TRUE,
'pl' => TRUE,
'poltava' => TRUE,
'rovno' => TRUE,
'rv' => TRUE,
'sebastopol' => TRUE,
'sumy' => TRUE,
'te' => TRUE,
'ternopil' => TRUE,
'uz' => TRUE,
'uzhgorod' => TRUE,
'vinnica' => TRUE,
'vn' => TRUE,
'zaporizhzhe' => TRUE,
'zhitomir' => TRUE,
'zp' => TRUE,
'zt' => TRUE,
),
);
if (! is_string($fqdn)) return '';
$result = array();
$dcursor = & $domain;
$array = array_reverse(explode('.', $fqdn));
$i = 0;
while(TRUE) {
$acursor = $array[$i];
if (is_array($dcursor) && isset($dcursor[$acursor])) {
$result[] = & $array[$i];
$dcursor = & $dcursor[$acursor];
} else {
if (isset($acursor)) {
$result[] = & $array[$i]; // Whois servers must know this subdomain
}
break;
}
++$i;
}
// Implicit responsibility: Top-Level-Domains must not be yours
// 'bar.foo.something' => 'foo.something'
if ($implicit && count($result) == 1 && count($array) > 1) {
$result[] = & $array[1];
}
return $result ? implode('.', array_reverse($result)) : '';
}
// ---------------------
// Exit
// Freeing memories
function spam_dispose()
{
get_blocklist(NULL);
}
// Common bahavior for blocking
// NOTE: Call this function from various blocking feature, to disgueise the reason 'why blocked'
function spam_exit($mode = '', $data = array())
{
$exit = TRUE;
switch ($mode) {
case '':
echo("\n");
break;
case 'dump':
echo('' . "\n");
echo htmlspecialchars(var_export($data, TRUE));
echo('
' . "\n");
break;
};
if ($exit) exit; // Force exit
}
// ---------------------
// Simple filtering
// TODO: Record them
// Simple/fast spam filter ($target: 'a string' or an array())
function pkwk_spamfilter($action, $page, $target = array('title' => ''), $method = array(), $exitmode = '')
{
$progress = check_uri_spam($target, $method);
if (empty($progress['is_spam'])) {
spam_dispose();
} else {
$target = string($target, 0); // Removing "\0" etc
pkwk_spamnotify($action, $page, $target, $progress, $method);
spam_exit($exitmode, $progress);
}
}
// ---------------------
// PukiWiki original
// Mail to administrator(s)
function pkwk_spamnotify($action, $page, $target = array('title' => ''), $progress = array(), $method = array())
{
global $notify, $notify_subject;
if (! $notify) return;
$asap = isset($method['asap']);
$summary['ACTION'] = 'Blocked by: ' . summarize_spam_progress($progress, TRUE);
if (! $asap) {
$summary['METRICS'] = summarize_spam_progress($progress);
}
$tmp = summarize_detail_badhost($progress);
if ($tmp != '') $summary['DETAIL_BADHOST'] = $tmp;
$tmp = summarize_detail_newtral($progress);
if (! $asap && $tmp != '') $summary['DETAIL_NEUTRAL_HOST'] = $tmp;
$summary['COMMENT'] = $action;
$summary['PAGE'] = '[blocked] ' . (is_pagename($page) ? $page : '');
$summary['URI'] = get_script_uri() . '?' . rawurlencode($page);
$summary['USER_AGENT'] = TRUE;
$summary['REMOTE_ADDR'] = TRUE;
pkwk_mail_notify($notify_subject, var_export($target, TRUE), $summary, TRUE);
}
?>