<?php
-// $Id: spam_pickup.php,v 1.67 2009/01/02 10:18:38 henoheno Exp $
-// Copyright (C) 2006-2007 PukiWiki Developers Team
+// $Id: spam_pickup.php,v 1.71 2009/01/04 08:56:07 henoheno Exp $
+// Copyright (C) 2006-2009 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
//
// Functions for Concept-work of spam-uri metrics
//
+// (PHP 4 >= 4.3.0): preg_match_all(PREG_OFFSET_CAPTURE): $method['uri_XXX'] related feature
+//
+
+if (! defined('DOMAIN_INI_FILE')) define('DOMAIN_INI_FILE', 'domain.ini.php');
// ---------------------
// URI pickup
// Refer RFC3986 (Regex below is not strict)
'#(\b[a-z][a-z0-9.+-]{1,8}):[/\\\]+' . // 1: Scheme
'(?:' .
- '([^\s<>"\'\[\]/\#?@]*)' . // 2: Userinfo (Username)
+ '([^\s<>"\'\[\]/\#?@]*)' . // 2: Userinfo (Username and/or password)
'@)?' .
'(' .
// 3: Host
$tmp[] = & $uri['scheme'];
$tmp[] = '://';
}
+
if (isset($uri['userinfo']) && $uri['userinfo'] !== '') {
$tmp[] = & $uri['userinfo'];
$tmp[] = '@';
+ } else if (isset($uri['user']) || isset($uri['pass'])) {
+ if (isset($uri['user']) && $uri['user'] !== '') {
+ $tmp[] = & $uri['user'];
+ }
+ $tmp[] = ':';
+ if (isset($uri['pass']) && $uri['pass'] !== '') {
+ $tmp[] = & $uri['pass'];
+ }
+ $tmp[] = '@';
}
+
if (isset($uri['host']) && $uri['host'] !== '') {
$tmp[] = & $uri['host'];
}
+
if (isset($uri['port']) && $uri['port'] !== '') {
$tmp[] = ':';
$tmp[] = & $uri['port'];
}
+
if (isset($uri['path']) && $uri['path'] !== '') {
$tmp[] = & $uri['path'];
}
+
if (isset($uri['file']) && $uri['file'] !== '') {
$tmp[] = & $uri['file'];
}
+
if (isset($uri['query']) && $uri['query'] !== '') {
$tmp[] = '?';
$tmp[] = & $uri['query'];
}
+
if (isset($uri['fragment']) && $uri['fragment'] !== '') {
$tmp[] = '#';
$tmp[] = & $uri['fragment'];
return implode('', $tmp);
}
+
// ---------------------
// URI normalization
return $array;
}
+// Rough hostname checker
+// TODO: Strict digit, 0x, CIDR, '999.999.999.999', ':', '::G'
+function is_ip($string = '')
+{
+ if (! is_string($string)) return FALSE;
+
+ if (strpos($string, ':') !== FALSE) {
+ return 6; // Seems IPv6
+ }
+
+ if (preg_match('/^' .
+ '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' .
+ '(?:[0-9]{1,3}\.){1,3}' . '$/',
+ $string)) {
+ return 4; // Seems IPv4(dot-decimal)
+ }
+
+ return FALSE; // Seems not IP
+}
+
+// Check responsibility-root of the FQDN
+// 'foo.bar.example.com' => 'example.com' (.com has the last whois for it)
+// 'foo.bar.example.au' => 'example.au' (.au has the last whois for it)
+// 'foo.bar.example.edu.au' => 'example.edu.au' (.edu.au has the last whois for it)
+// 'foo.bar.example.act.edu.au' => 'example.act.edu.au' (.act.edu.au has the last whois for it)
+function whois_responsibility($fqdn = 'foo.bar.example.com', $parent = FALSE, $implicit = TRUE)
+{
+ static $domain;
+
+ if ($fqdn === NULL) {
+ $domain = NULL; // Unset
+ return '';
+ }
+ if (! is_string($fqdn)) return '';
+
+ if (is_ip($fqdn)) return $fqdn;
+
+ if (! isset($domain)) {
+ $domain = array();
+ if (file_exists(DOMAIN_INI_FILE)) {
+ include(DOMAIN_INI_FILE); // Set
+ }
+ }
+
+ $result = array();
+ $dcursor = & $domain;
+ $array = array_reverse(explode('.', $fqdn));
+ $i = 0;
+ while(TRUE) {
+ if (! isset($array[$i])) break;
+ $acursor = $array[$i];
+ if (is_array($dcursor) && isset($dcursor[$acursor])) {
+ $result[] = & $array[$i];
+ $dcursor = & $dcursor[$acursor];
+ } else {
+ if (! $parent && isset($acursor)) {
+ $result[] = & $array[$i]; // Whois servers must know this subdomain
+ }
+ break;
+ }
+ ++$i;
+ }
+
+ // Implicit responsibility: Top-Level-Domains must not be yours
+ // 'bar.foo.something' => 'foo.something'
+ if ($implicit && count($result) == 1 && count($array) > 1) {
+ $result[] = & $array[1];
+ }
+
+ return $result ? implode('.', array_reverse($result)) : '';
+}
+
?>