<?php
-// $Id: spam.php,v 1.14 2006/11/12 04:27:11 henoheno Exp $
+// $Id: spam.php,v 1.15 2006/11/12 10:59:56 henoheno Exp $
// Copyright (C) 2006 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
// Functions for Concept-work of spam-uri metrics
-// Return an array of normalized/parsed URIs in the $string
+// Return an array of URIs in the $string
// [OK] http://nasty.example.org#nasty_string
// [OK] http://nasty.example.org/foo/xxx#nasty_string/bar
// [OK] ftp://dfshodfs:80/dfsdfs
-// [OK] http://victim.example.org/go?http%3A%2F%2Fnasty.example.org
-// [OK] http://victim.example.org/gphttp://nasty.example.org
-function spam_pickup($string = '')
+function uri_pickup($string = '')
{
- // Preprocess: urldecode() and adding space(s)
- $string = preg_replace(
- array(
- '#(?:https?|ftp):/#',
- '#\b[a-z][a-z0-9.+-]{1,8}://#i',
- '#[a-z][a-z0-9.+-]{1,8}://#i'
- ), ' $0', rawurldecode($string));
-
- // URI pickup: Not available for user@password, IDN, Fragment(=ignored)
+ // Not available for: user@password, IDN, Fragment(=ignored)
$array = array();
preg_match_all(
// Refer RFC3986
'#i',
$string, $array, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
//var_dump(recursive_map('htmlspecialchars', $array));
+
// Shrink $array
$parts = array(1 => 'scheme', 2 => 'host', 3 => 'port',
4 => 'path', 5 => 'file');
array_rename_keys($array[$uri], $parts, TRUE, $default);
$offset = $array[$uri]['scheme'][1]; // Scheme's offset
- // Remove offsets (with normalization)
+ // Remove offsets for each part
foreach(array_keys($array[$uri]) as $part) {
- $array[$uri][$part] =
- strtolower($array[$uri][$part][0]);
+ $array[$uri][$part] = & $array[$uri][$part][0];
}
- //$array[$uri]['path'] = path_normalize($array[$uri]['path']);
+
$array[$uri]['offset'] = $offset;
$array[$uri]['area'] = 0;
}
+ return $array;
+}
+
+// Preprocess: rawurldecode() and adding space(s) to detect/count some URIs if possible
+// NOTE: It's maybe danger to var_dump() these results.
+// [OK] http://victim.example.org/go?http%3A%2F%2Fnasty.example.org
+// [OK] http://victim.example.org/http://nasty.example.org
+function spam_uri_pickup_preprocess($string = '')
+{
+ if (is_string($string)) {
+ return preg_replace(
+ array(
+ '#(?:https?|ftp):/#',
+ '#\b[a-z][a-z0-9.+-]{1,8}://#i',
+ '#[a-z][a-z0-9.+-]{1,8}://#i'
+ ),
+ ' $0',
+ rawurldecode($string)
+ );
+ } else {
+ return '';
+ }
+}
+
+// Main function of spam-uri pickup
+function spam_uri_pickup($string = '')
+{
+ $string = spam_uri_pickup_preprocess($string);
+
+ $array = uri_pickup($string);
+
// Area elevation for '(especially external)link' intension
if (! empty($array)) {
// Anchor tags by preg_match_all()
}
area_measure($areas, $array);
- // Various Wiki syntax
- // [text_or_uri>text_or_uri]
- // [text_or_uri:text_or_uri]
- // [text_or_uri|text_or_uri]
- // [text_or_uri->text_or_uri]
- // [text_or_uri text_or_uri] // MediaWiki
- // MediaWiki: [http://nasty.example.com/ visit http://nasty.example.com/]
-
// phpBB's "BBCode" by preg_match_all()
// [url]http://nasty.example.com/[/url]
// [link]http://nasty.example.com/[/link]
}
area_measure($areas, $array);
+ // Various Wiki syntax
+ // [text_or_uri>text_or_uri]
+ // [text_or_uri:text_or_uri]
+ // [text_or_uri|text_or_uri]
+ // [text_or_uri->text_or_uri]
+ // [text_or_uri text_or_uri] // MediaWiki
+ // MediaWiki: [http://nasty.example.com/ visit http://nasty.example.com/]
+
// Remove 'offset's for area_measure()
- foreach(array_keys($array) as $key)
- unset($array[$key]['offset']);
+ //foreach(array_keys($array) as $key)
+ // unset($array[$key]['offset']);
}
return $array;
if ($is_spam) break;
}
} else {
- $pickups = spam_pickup($target);
+ $pickups = spam_uri_pickup($target);
$urinum += count($pickups);
if (! empty($pickups)) {
// Some users want to post some URLs, but ...
<?php
-// $Id: spam.php,v 1.14 2006/11/12 04:27:11 henoheno Exp $
+// $Id: spam.php,v 1.15 2006/11/12 10:59:56 henoheno Exp $
// Copyright (C) 2006 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
// Functions for Concept-work of spam-uri metrics
-// Return an array of normalized/parsed URIs in the $string
+// Return an array of URIs in the $string
// [OK] http://nasty.example.org#nasty_string
// [OK] http://nasty.example.org/foo/xxx#nasty_string/bar
// [OK] ftp://dfshodfs:80/dfsdfs
-// [OK] http://victim.example.org/go?http%3A%2F%2Fnasty.example.org
-// [OK] http://victim.example.org/gphttp://nasty.example.org
-function spam_pickup($string = '')
+function uri_pickup($string = '')
{
- // Preprocess: urldecode() and adding space(s)
- $string = preg_replace(
- array(
- '#(?:https?|ftp):/#',
- '#\b[a-z][a-z0-9.+-]{1,8}://#i',
- '#[a-z][a-z0-9.+-]{1,8}://#i'
- ), ' $0', rawurldecode($string));
-
- // URI pickup: Not available for user@password, IDN, Fragment(=ignored)
+ // Not available for: user@password, IDN, Fragment(=ignored)
$array = array();
preg_match_all(
// Refer RFC3986
'#i',
$string, $array, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
//var_dump(recursive_map('htmlspecialchars', $array));
+
// Shrink $array
$parts = array(1 => 'scheme', 2 => 'host', 3 => 'port',
4 => 'path', 5 => 'file');
array_rename_keys($array[$uri], $parts, TRUE, $default);
$offset = $array[$uri]['scheme'][1]; // Scheme's offset
- // Remove offsets (with normalization)
+ // Remove offsets for each part
foreach(array_keys($array[$uri]) as $part) {
- $array[$uri][$part] =
- strtolower($array[$uri][$part][0]);
+ $array[$uri][$part] = & $array[$uri][$part][0];
}
- //$array[$uri]['path'] = path_normalize($array[$uri]['path']);
+
$array[$uri]['offset'] = $offset;
$array[$uri]['area'] = 0;
}
+ return $array;
+}
+
+// Preprocess: rawurldecode() and adding space(s) to detect/count some URIs if possible
+// NOTE: It's maybe danger to var_dump() these results.
+// [OK] http://victim.example.org/go?http%3A%2F%2Fnasty.example.org
+// [OK] http://victim.example.org/http://nasty.example.org
+function spam_uri_pickup_preprocess($string = '')
+{
+ if (is_string($string)) {
+ return preg_replace(
+ array(
+ '#(?:https?|ftp):/#',
+ '#\b[a-z][a-z0-9.+-]{1,8}://#i',
+ '#[a-z][a-z0-9.+-]{1,8}://#i'
+ ),
+ ' $0',
+ rawurldecode($string)
+ );
+ } else {
+ return '';
+ }
+}
+
+// Main function of spam-uri pickup
+function spam_uri_pickup($string = '')
+{
+ $string = spam_uri_pickup_preprocess($string);
+
+ $array = uri_pickup($string);
+
// Area elevation for '(especially external)link' intension
if (! empty($array)) {
// Anchor tags by preg_match_all()
}
area_measure($areas, $array);
- // Various Wiki syntax
- // [text_or_uri>text_or_uri]
- // [text_or_uri:text_or_uri]
- // [text_or_uri|text_or_uri]
- // [text_or_uri->text_or_uri]
- // [text_or_uri text_or_uri] // MediaWiki
- // MediaWiki: [http://nasty.example.com/ visit http://nasty.example.com/]
-
// phpBB's "BBCode" by preg_match_all()
// [url]http://nasty.example.com/[/url]
// [link]http://nasty.example.com/[/link]
}
area_measure($areas, $array);
+ // Various Wiki syntax
+ // [text_or_uri>text_or_uri]
+ // [text_or_uri:text_or_uri]
+ // [text_or_uri|text_or_uri]
+ // [text_or_uri->text_or_uri]
+ // [text_or_uri text_or_uri] // MediaWiki
+ // MediaWiki: [http://nasty.example.com/ visit http://nasty.example.com/]
+
// Remove 'offset's for area_measure()
- foreach(array_keys($array) as $key)
- unset($array[$key]['offset']);
+ //foreach(array_keys($array) as $key)
+ // unset($array[$key]['offset']);
}
return $array;
if ($is_spam) break;
}
} else {
- $pickups = spam_pickup($target);
+ $pickups = spam_uri_pickup($target);
$urinum += count($pickups);
if (! empty($pickups)) {
// Some users want to post some URLs, but ...
<?php
-// $Id: spam_pickup.php,v 1.10 2006/11/03 07:47:57 henoheno Exp $
+// $Id: spam_pickup.php,v 1.11 2006/11/12 10:59:56 henoheno Exp $
// Concept-work of spam-uri metrics
// Copyright (C) 2006 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
show_form($msg);
echo '<pre>';
-$results = spam_pickup($msg);
+$pickup = TRUE;
+if ($pickup) {
+ $results = spam_uri_pickup($msg);
-// Measure
-$count = count($results);
-$area = 0;
-foreach($results as $result)
- if (isset($result['area']))
- $area += $result['area'];
-$average = $count ? ($area / $count) : 'NULL';
+ // Measure
+ $count = count($results);
+ $area = 0;
+ foreach($results as $result)
+ if (isset($result['area']))
+ $area += $result['area'];
+ $average = $count ? ($area / $count) : 'NULL';
+
+ echo "TOTAL = $count URIs, AREA_TOTAL = $area, AREA_AVERAGE = " . $average . "</br >" . "</br >";
+
+ $a = array();
+ var_dump(array_tree('/a/b/c/d/e', '/', false));
+ var_dump($a);
+ echo "<br/>";
+}
-echo "TOTAL = $count URIs, AREA_TOTAL = $area, AREA_AVERAGE = " . $average . "</br >" . "</br >";
var_dump('is_uri_spam($msg)', is_uri_spam($msg));
//$notify = TRUE;
//var_dump('pkwk_spamfilter($msg)', pkwk_spamfilter('A', 'PAGE', array('msg' => $msg)));
//echo "\n";
-var_dump('$results', $results);
+if ($pickup) {
+ var_dump('$results', $results);
+}
echo '</pre>';
?>
<?php
-// $Id: spam_pickup.php,v 1.10 2006/11/03 07:47:57 henoheno Exp $
+// $Id: spam_pickup.php,v 1.11 2006/11/12 10:59:56 henoheno Exp $
// Concept-work of spam-uri metrics
// Copyright (C) 2006 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
show_form($msg);
echo '<pre>';
-$results = spam_pickup($msg);
+$pickup = TRUE;
+if ($pickup) {
+ $results = spam_uri_pickup($msg);
-// Measure
-$count = count($results);
-$area = 0;
-foreach($results as $result)
- if (isset($result['area']))
- $area += $result['area'];
-$average = $count ? ($area / $count) : 'NULL';
+ // Measure
+ $count = count($results);
+ $area = 0;
+ foreach($results as $result)
+ if (isset($result['area']))
+ $area += $result['area'];
+ $average = $count ? ($area / $count) : 'NULL';
+
+ echo "TOTAL = $count URIs, AREA_TOTAL = $area, AREA_AVERAGE = " . $average . "</br >" . "</br >";
+
+ $a = array();
+ var_dump(array_tree('/a/b/c/d/e', '/', false));
+ var_dump($a);
+ echo "<br/>";
+}
-echo "TOTAL = $count URIs, AREA_TOTAL = $area, AREA_AVERAGE = " . $average . "</br >" . "</br >";
var_dump('is_uri_spam($msg)', is_uri_spam($msg));
//$notify = TRUE;
//var_dump('pkwk_spamfilter($msg)', pkwk_spamfilter('A', 'PAGE', array('msg' => $msg)));
//echo "\n";
-var_dump('$results', $results);
+if ($pickup) {
+ var_dump('$results', $results);
+}
echo '</pre>';
?>