<?php
-// $Id: spam.php,v 1.204 2008/12/27 11:25:30 henoheno Exp $
+// $Id: spam.php,v 1.210 2008/12/28 15:37:07 henoheno Exp $
// Copyright (C) 2006-2007 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
//
{
if (! is_string($string)) return '';
- if (mb_strpos($string, '.') === FALSE)
+ if (mb_strpos($string, '.') === FALSE) {
+ // localhost
return generate_glob_regex($string, $divider);
+ }
if (is_ip($string)) {
// IPv4
// FQDN or something
$part = explode('.', $string, 2);
if ($part[0] == '') {
- $part[0] = '(?:.*\.)?'; // And all related FQDN
+ // .example.org
+ $part[0] = '(?:.*\.)?';
} else if ($part[0] == '*') {
- $part[0] = '.*\.'; // All subdomains/hosts only
+ // *.example.org
+ $part[0] = '.*\.';
} else {
+ // example.org, etc
return generate_glob_regex($string, $divider);
}
$part[1] = generate_glob_regex($part[1], $divider);
}
// Rough hostname checker
-// [OK] 192.168.
-// TODO: Strict digit, 0x, CIDR, IPv6
+// TODO: Strict digit, 0x, CIDR, '999.999.999.999', ':', '::G'
function is_ip($string = '')
{
+ if (! is_string($string)) return FALSE;
+
+ if (strpos($string, ':') !== FALSE) {
+ return 6; // Seems IPv6
+ }
+
if (preg_match('/^' .
'(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' .
- '(?:[0-9]{1,3}\.){1,3}' . '$/',
+ '(?:[0-9]{1,3}\.){1,3}' . '$/',
$string)) {
return 4; // Seems IPv4(dot-decimal)
- } else {
- return 0; // Seems not IP
}
+
+ return FALSE; // Seems not IP
}
+// Load SPAM_INI_FILE and return parsed one
function get_blocklist($list = '')
{
static $regexes;
$regexes = array();
if (file_exists(SPAM_INI_FILE)) {
$blocklist = array();
+
include(SPAM_INI_FILE);
// $blocklist['list'] = array(
// //'goodhost' => FALSE;
// '*.blogspot.com', // Blog services's subdomains (only)
// 'IANA-examples' => '#^(?:.*\.)?example\.(?:com|net|org)$#',
// );
- foreach(array('pre', 'list') as $special) {
+
+ foreach(array(
+ 'pre',
+ 'list',
+ ) as $special) {
+
if (! isset($blocklist[$special])) continue;
+
$regexes[$special] = $blocklist[$special];
+
foreach(array_keys($blocklist[$special]) as $_list) {
if (! isset($blocklist[$_list])) continue;
+
foreach ($blocklist[$_list] as $key => $value) {
if (is_array($value)) {
$regexes[$_list][$key] = array();
get_blocklist_add($regexes[$_list], $key, $value);
}
}
+
unset($blocklist[$_list]);
}
}
}
if ($list === '') {
- return $regexes; // ALL
+ return $regexes; // ALL of
} else if (isset($regexes[$list])) {
- return $regexes[$list];
+ return $regexes[$list]; // A part of
} else {
- return array();
+ return array(); // Found nothing
}
}
-// Subroutine of get_blocklist()
-function get_blocklist_add(& $array, $key = 0, $value = '*.example.org')
+// Subroutine of get_blocklist(): Add new regex to the $array
+function get_blocklist_add(& $array, $key = 0, $value = '*.example.org/path/to/file.html')
{
if (is_string($key)) {
- $array[$key] = & $value; // Treat $value as a regex
+ $array[$key] = & $value; // Treat $value as a regex for FQDN(host)s
} else {
- $array[$value] = '/^' . generate_host_regex($value, '/') . '$/i';
+ $array[$value] = '#^' . generate_host_regex($value, '#') . '$#i';
}
}
// ----------------------------------------
// Area measure
- // Area: There's HTML anchor tag
- if ((! $asap || ! $is_spam) && isset($method['area_anchor'])) {
- $key = 'area_anchor';
- $_asap = isset($method['asap']) ? array('asap' => TRUE) : array();
- $result = area_pickup($target, array($key => TRUE) + $_asap);
- if ($result) {
- $sum[$key] = $result[$key];
- if (isset($method[$key]) && $sum[$key] > $method[$key]) {
- $is_spam[$key] = TRUE;
- }
+ if (! $asap || ! $is_spam) {
+
+ // Method pickup
+ $_method = array();
+ foreach(array(
+ 'area_anchor', // There's HTML anchor tag
+ 'area_bbcode', // There's 'BBCode' linking tag
+ ) as $key) {
+ if (isset($method[$key])) $_method[$key] = TRUE;
}
- }
- // Area: There's 'BBCode' linking tag
- if ((! $asap || ! $is_spam) && isset($method['area_bbcode'])) {
- $key = 'area_bbcode';
- $_asap = isset($method['asap']) ? array('asap' => TRUE) : array();
- $result = area_pickup($target, array($key => TRUE) + $_asap);
- if ($result) {
- $sum[$key] = $result[$key];
- if (isset($method[$key]) && $sum[$key] > $method[$key]) {
- $is_spam[$key] = TRUE;
+ if ($_method) {
+ $_asap = isset($method['asap']) ? array('asap' => TRUE) : array();
+ $_result = area_pickup($target, $_method + $_asap);
+ } else {
+ $_result = FALSE;
+ }
+
+ if ($_result) {
+ foreach(array_keys($_method) as $key) {
+ $sum[$key] = $_result[$key];
+ if (isset($method[$key]) && $sum[$key] > $method[$key]) {
+ $is_spam[$key] = TRUE;
+ }
}
+ $_result = NULL;
}
}
$subs = array();
foreach(array_keys($trie[$key]) as $sub) {
if ($sub == '') {
- $subs[] = $key;
+ $subs[] = $key; // 'example.com'
} else {
- $subs[] = $sub . '.' . $key;
+ $subs[] = $sub . '. '; // 'A.foo.bar. '
}
}
$result[] = ' \'' . $key . '\' => \'' . implode(', ', $subs) . '\',';