BugTrack/2108 Use case insensitive index chars for page list

[pukiwiki/pukiwiki.git] / lib / func.php
diff --git a/lib/func.php b/lib/func.php

index 2519d2b..05a6fe8 100644 (file)
--- a/lib/func.php
+++ b/lib/func.php
@@ -1,65 +1,65 @@
  <?php
-/////////////////////////////////////////////////
  // PukiWiki - Yet another WikiWikiWeb clone.
+// func.php
+// Copyright
+//   2002-2016 PukiWiki Development Team
+//   2001-2002 Originally written by yu-ji
+// License: GPL v2 or (at your option) any later version
  //
-// $Id: func.php,v 1.6 2004/08/06 15:39:52 henoheno Exp $
-//
+// General functions
  
-// Ê¸»úÎó¤¬InterWikiName¤«¤É¤¦¤«
  function is_interwiki($str)
  {
         global $InterWikiName;
-
-       return preg_match("/^$InterWikiName$/", $str);
+       return preg_match('/^' . $InterWikiName . '$/', $str);
  }
  
-// Ê¸»úÎó¤¬¥Ú¡¼¥¸Ì¾¤«¤É¤¦¤«
  function is_pagename($str)
  {
         global $BracketName;
  
-       $is_pagename = (! is_interwiki($str) and
-               preg_match("/^(?!\/)$BracketName$(?<!\/$)/", $str) and
-               ! preg_match('/(^|\/)\.{1,2}(\/|$)/', $str));
+       $is_pagename = (! is_interwiki($str) &&
+                 preg_match('/^(?!\/)' . $BracketName . '$(?<!\/$)/', $str) &&
+               ! preg_match('#(^|/)\.{1,2}(/|$)#', $str));
  
         if (defined('SOURCE_ENCODING')) {
-               if (SOURCE_ENCODING == 'UTF-8') {
-                       $pattern = '/^(?:[\x00-\x7F]|(?:[\xC0-\xDF][\x80-\xBF])|(?:[\xE0-\xEF][\x80-\xBF][\x80-\xBF]))+$/';
-                       $is_pagename = ($is_pagename and preg_match($pattern, $str));
-               } else if (SOURCE_ENCODING == 'EUC-JP') {
-                       $pattern = '/^(?:[\x00-\x7F]|(?:[\x8E\xA1-\xFE][\xA1-\xFE])|(?:\x8F[\xA1-\xFE][\xA1-\xFE]))+$/';
-                       $is_pagename = ($is_pagename and preg_match($pattern, $str));
+               switch(SOURCE_ENCODING){
+               case 'UTF-8': $pattern =
+                       '/^(?:[\x00-\x7F]|(?:[\xC0-\xDF][\x80-\xBF])|(?:[\xE0-\xEF][\x80-\xBF][\x80-\xBF]))+$/';
+                       break;
+               case 'EUC-JP': $pattern =
+                       '/^(?:[\x00-\x7F]|(?:[\x8E\xA1-\xFE][\xA1-\xFE])|(?:\x8F[\xA1-\xFE][\xA1-\xFE]))+$/';
+                       break;
                 }
+               if (isset($pattern) && $pattern != '')
+                       $is_pagename = ($is_pagename && preg_match($pattern, $str));
         }
  
         return $is_pagename;
  }
  
-// Ê¸»úÎó¤¬URL¤«¤É¤¦¤«
  function is_url($str, $only_http = FALSE)
  {
         $scheme = $only_http ? 'https?' : 'https?|ftp|news';
-       return preg_match('/^('.$scheme.')(:\/\/[-_.!~*\'()a-zA-Z0-9;\/?:\@&=+\$,%#]*)$/', $str);
+       return preg_match('/^(' . $scheme . ')(:\/\/[-_.!~*\'()a-zA-Z0-9;\/?:\@&=+\$,%#]*)$/', $str);
  }
  
-// ¥Ú¡¼¥¸¤¬Â¸ºß¤¹¤ë¤«
-function is_page($page, $reload = FALSE)
+// If the page exists
+function is_page($page, $clearcache = FALSE)
  {
-       if ($reload) clearstatcache();
+       if ($clearcache) clearstatcache();
         return file_exists(get_filename($page));
  }
  
-// ¥Ú¡¼¥¸¤¬ÊÔ½¸²ÄÇ½¤«
  function is_editable($page)
  {
         global $cantedit;
         static $is_editable = array();
  
-       if (!array_key_exists($page, $is_editable))
-       {
+       if (! isset($is_editable[$page])) {
                 $is_editable[$page] = (
-                       is_pagename($page) and
-                       ! is_freeze($page) and
+                       is_pagename($page) &&
+                       ! is_freeze($page) &&
                         ! in_array($page, $cantedit)
                 );
         }
@@ -67,18 +67,44 @@ function is_editable($page)
         return $is_editable[$page];
  }
  
-// ¥Ú¡¼¥¸¤¬Åà·ë¤µ¤ì¤Æ¤¤¤ë¤«
-function is_freeze($page)
+function is_freeze($page, $clearcache = FALSE)
  {
         global $function_freeze;
+       static $is_freeze = array();
+
+       if ($clearcache === TRUE) $is_freeze = array();
+       if (isset($is_freeze[$page])) return $is_freeze[$page];
+
+       if (! $function_freeze || ! is_page($page)) {
+               $is_freeze[$page] = FALSE;
+               return FALSE;
+       } else {
+               $fp = fopen(get_filename($page), 'rb') or
+                       die('is_freeze(): fopen() failed: ' . htmlsc($page));
+               flock($fp, LOCK_SH) or die('is_freeze(): flock() failed');
+               rewind($fp);
+               $buffer = fgets($fp, 9);
+               flock($fp, LOCK_UN) or die('is_freeze(): flock() failed');
+               fclose($fp) or die('is_freeze(): fclose() failed: ' . htmlsc($page));
+
+               $is_freeze[$page] = ($buffer != FALSE && rtrim($buffer, "\r\n") == '#freeze');
+               return $is_freeze[$page];
+       }
+}
  
-       if (! $function_freeze or ! is_page($page)) return FALSE;
+// Handling $non_list
+// $non_list will be preg_quote($str, '/') later.
+function check_non_list($page = '')
+{
+       global $non_list;
+       static $regex;
+
+       if (! isset($regex)) $regex = '/' . $non_list . '/';
  
-       list($lines) = get_source($page);
-       return (rtrim($lines) == '#freeze');
+       return preg_match($regex, $page);
  }
  
-// ¼«Æ°¥Æ¥ó¥×¥ì¡¼¥È
+// Auto template
  function auto_template($page)
  {
         global $auto_template_func, $auto_template_rules;
@@ -87,172 +113,221 @@ function auto_template($page)
  
         $body = '';
         $matches = array();
-       foreach ($auto_template_rules as $rule => $template)
-       {
-               if (preg_match("/$rule/", $page, $matches))
-               {
-                       $template_page = preg_replace("/$rule/", $template, $page);
-                       if (is_page($template_page))
-                       {
-                               $body = join('', get_source($template_page));
-                               // ¸«½Ð¤·¤Î¸ÇÍIDÉô¤òºï½ü
-                               $body = preg_replace('/^(\*{1,3}.*)\[#[A-Za-z][\w-]+\](.*)$/m', '$1$2', $body);
-                               // #freeze¤òºï½ü
-                               $body = preg_replace('/^#freeze\s*$/m', '', $body);
-
-                               for ($i = 0; $i < count($matches); $i++) {
-                                       $body = str_replace("\$$i", $matches[$i], $body);
-                               }
-                               break;
-                       }
-               }
+       foreach ($auto_template_rules as $rule => $template) {
+               $rule_pattrn = '/' . $rule . '/';
+
+               if (! preg_match($rule_pattrn, $page, $matches)) continue;
+
+               $template_page = preg_replace($rule_pattrn, $template, $page);
+               if (! is_page($template_page)) continue;
+
+               $body = join('', get_source($template_page));
+
+               // Remove fixed-heading anchors
+               $body = preg_replace('/^(\*{1,3}.*)\[#[A-Za-z][\w-]+\](.*)$/m', '$1$2', $body);
+
+               // Remove '#freeze'
+               $body = preg_replace('/^#freeze\s*$/m', '', $body);
+
+               $count = count($matches);
+               for ($i = 0; $i < $count; $i++)
+                       $body = str_replace('$' . $i, $matches[$i], $body);
+
+               break;
         }
         return $body;
  }
  
-// ¸¡º÷¸ì¤òÅ¸³«¤¹¤ë
-function get_search_words($words, $special = FALSE)
+// Expand all search-words to regexes and push them into an array
+function get_search_words($words = array(), $do_escape = FALSE)
  {
-       $retval = array();
-       // Perl¥á¥â - Àµ¤·¤¯¥Ñ¥¿¡¼¥ó¥Þ¥Ã¥Á¤µ¤»¤ë
-       // http://www.din.or.jp/~ohzaki/perl.htm#JP_Match
-       $eucpre = $eucpost = '';
-       if (SOURCE_ENCODING == 'EUC-JP')
-       {
-               $eucpre = '(?<!\x8F)';
-               // # JIS X 0208 ¤¬ 0Ê¸»ú°Ê¾åÂ³¤¤¤Æ # ASCII, SS2, SS3 ¤Þ¤¿¤Ï½ªÃ¼
-               $eucpost = '(?=(?:[\xA1-\xFE][\xA1-\xFE])*(?:[\x00-\x7F\x8E\x8F]|\z))';
+       static $init, $mb_convert_kana, $pre, $post, $quote = '/';
+
+       if (! isset($init)) {
+               // function: mb_convert_kana() is for Japanese code only
+               if (LANG == 'ja' && function_exists('mb_convert_kana')) {
+                       $mb_convert_kana = create_function('$str, $option',
+                               'return mb_convert_kana($str, $option, SOURCE_ENCODING);');
+               } else {
+                       $mb_convert_kana = create_function('$str, $option',
+                               'return $str;');
+               }
+               if (SOURCE_ENCODING == 'EUC-JP') {
+                       // Perl memo - Correct pattern-matching with EUC-JP
+                       // http://www.din.or.jp/~ohzaki/perl.htm#JP_Match (Japanese)
+                       $pre  = '(?<!\x8F)';
+                       $post = '(?=(?:[\xA1-\xFE][\xA1-\xFE])*' . // JIS X 0208
+                               '(?:[\x00-\x7F\x8E\x8F]|\z))';     // ASCII, SS2, SS3, or the last
+               } else {
+                       $pre = $post = '';
+               }
+               $init = TRUE;
         }
-       $quote_func = create_function('$str', 'return preg_quote($str, "/");');
  
-       // LANG=='ja'¤Ç¡¢mb_convert_kana¤¬»È¤¨¤ë¾ì¹ç¤Ïmb_convert_kana¤ò»ÈÍÑ
-       $convert_kana = create_function('$str, $option',
-               (LANG == 'ja' and function_exists('mb_convert_kana')) ?
-                       'return mb_convert_kana($str, $option);' : 'return $str;'
-       );
+       if (! is_array($words)) $words = array($words);
  
-       foreach ($words as $word)
-       {
-               // ±Ñ¿ô»ú¤ÏÈ¾³Ñ,¥«¥¿¥«¥Ê¤ÏÁ´³Ñ,¤Ò¤é¤¬¤Ê¤Ï¥«¥¿¥«¥Ê¤Ë
-               $word_zk = $convert_kana($word, 'aKCV');
+       // Generate regex for the words
+       $regex = array();
+       foreach ($words as $word) {
+               $word = trim($word);
+               if ($word == '') continue;
+
+               // Normalize: ASCII letters = to single-byte. Others = to Zenkaku and Katakana
+               $word_nm = $mb_convert_kana($word, 'aKCV');
+               $nmlen   = mb_strlen($word_nm, SOURCE_ENCODING);
+
+               // Each chars may be served ...
                 $chars = array();
-               for ($pos = 0; $pos < mb_strlen($word_zk); $pos++)
-               {
-                       $char = mb_substr($word_zk, $pos,1);
-                       // $special : htmlspecialchars()¤òÄÌ¤¹¤«
-                       $arr = array($quote_func($special ? htmlspecialchars($char) : $char));
-                       if (strlen($char) == 1) // ±Ñ¿ô»ú
-                       {
-                               foreach (array(strtoupper($char), strtolower($char)) as $_char)
-                               {
-                                       if ($char != '&') {
-                                               $arr[] = $quote_func($_char);
-                                       }
-                                       $ord = ord($_char);
-                                       $arr[] = sprintf('&#(?:%d|x%x);', $ord, $ord); // ¼ÂÂÎ»²¾È
-                                       $arr[] = $quote_func($convert_kana($_char, 'A')); // Á´³Ñ
+               for ($pos = 0; $pos < $nmlen; $pos++) {
+                       $char = mb_substr($word_nm, $pos, 1, SOURCE_ENCODING);
+
+                       // Just normalized one? (ASCII char or Zenkaku-Katakana?)
+                       $or = array(preg_quote($do_escape ? htmlsc($char) : $char, $quote));
+                       if (strlen($char) == 1) {
+                               // An ASCII (single-byte) character
+                               foreach (array(strtoupper($char), strtolower($char)) as $_char) {
+                                       if ($char != '&') $or[] = preg_quote($_char, $quote); // As-is?
+                                       $ascii = ord($_char);
+                                       $or[] = sprintf('&#(?:%d|x%x);', $ascii, $ascii); // As an entity reference?
+                                       $or[] = preg_quote($mb_convert_kana($_char, 'A'), $quote); // As Zenkaku?
                                 }
+                       } else {
+                               // NEVER COME HERE with mb_substr(string, start, length, 'ASCII')
+                               // A multi-byte character
+                               $or[] = preg_quote($mb_convert_kana($char, 'c'), $quote); // As Hiragana?
+                               $or[] = preg_quote($mb_convert_kana($char, 'k'), $quote); // As Hankaku-Katakana?
                         }
-                       else // ¥Þ¥ë¥Á¥Ð¥¤¥ÈÊ¸»ú
-                       {
-                               $arr[] = $quote_func($convert_kana($char, 'c')); // ¤Ò¤é¤¬¤Ê
-                               $arr[] = $quote_func($convert_kana($char, 'k')); // È¾³Ñ¥«¥¿¥«¥Ê
-                       }
-                       $chars[] = '(?:' . join('|', array_unique($arr)) . ')';
+                       $chars[] = '(?:' . join('|', array_unique($or)) . ')'; // Regex for the character
                 }
-               $retval[$word] = $eucpre.join('', $chars) . $eucpost;
+
+               $regex[$word] = $pre . join('', $chars) . $post; // For the word
         }
-       return $retval;
+
+       return $regex; // For all words
  }
  
-// ¸¡º÷
-function do_search($word, $type = 'AND', $non_format = FALSE)
+// 'Search' main function
+function do_search($word, $type = 'AND', $non_format = FALSE, $base = '')
  {
         global $script, $whatsnew, $non_list, $search_non_list;
         global $_msg_andresult, $_msg_orresult, $_msg_notfoundresult;
-       global $search_auth;
+       global $search_auth, $show_passage;
  
         $retval = array();
  
         $b_type = ($type == 'AND'); // AND:TRUE OR:FALSE
         $keys = get_search_words(preg_split('/\s+/', $word, -1, PREG_SPLIT_NO_EMPTY));
+       foreach ($keys as $key=>$value)
+               $keys[$key] = '/' . $value . '/S';
  
-       $_pages = get_existpages();
-       $pages = array();
+       $pages = get_existpages();
  
-       foreach ($_pages as $page)
-       {
-               if ($page == $whatsnew or (! $search_non_list and preg_match("/$non_list/", $page)))
-                       continue;
+       // Avoid
+       if ($base != '') {
+               $pages = preg_grep('/^' . preg_quote($base, '/') . '/S', $pages);
+       }
+       if (! $search_non_list) {
+               $pages = array_diff($pages, preg_grep('/' . $non_list . '/S', $pages));
+       }
+       $pages = array_flip($pages);
+       unset($pages[$whatsnew]);
  
-               // ¸¡º÷ÂÐ¾Ý¥Ú¡¼¥¸¤ÎÀ©¸Â¤ò¤«¤±¤ë¤«¤É¤¦¤« (¥Ú¡¼¥¸Ì¾¤ÏÀ©¸Â³°)
-               if ($search_auth and ! check_readable($page, false, false)) {
-                       $source = get_source(); // ¸¡º÷ÂÐ¾Ý¥Ú¡¼¥¸ÆâÍÆ¤ò¶õ¤Ë¡£
-               } else {
-                       $source = get_source($page);
+       $count = count($pages);
+       foreach (array_keys($pages) as $page) {
+               $b_match = FALSE;
+
+               // Search for page name
+               if (! $non_format) {
+                       foreach ($keys as $key) {
+                               $b_match = preg_match($key, $page);
+                               if ($b_type xor $b_match) break; // OR
+                       }
+                       if ($b_match) continue;
                 }
-               if (! $non_format)
-                       array_unshift($source, $page); // ¥Ú¡¼¥¸Ì¾¤â¸¡º÷ÂÐ¾Ý¤Ë
  
-               $b_match = FALSE;
-               foreach ($keys as $key) {
-                       $tmp = preg_grep("/$key/", $source);
-                       $b_match = (count($tmp) > 0);
-                       if ($b_match xor $b_type)
-                               break;
+               // Search auth for page contents
+               if ($search_auth && ! check_readable($page, false, false)) {
+                       unset($pages[$page]);
+                       --$count;
                 }
-               if ($b_match) {
-                       $pages[$page] = get_filetime($page);
+
+               // Search for page contents
+               foreach ($keys as $key) {
+                       $b_match = preg_match($key, get_source($page, TRUE, TRUE));
+                       if ($b_type xor $b_match) break; // OR
                 }
+               if ($b_match) continue;
+
+               unset($pages[$page]); // Miss
         }
-       if ($non_format) {
-               return array_keys($pages);
-       }
+       if ($non_format) return array_keys($pages);
  
         $r_word = rawurlencode($word);
-       $s_word = htmlspecialchars($word);
-       if (count($pages) == 0) {
+       $s_word = htmlsc($word);
+       if (empty($pages))
                 return str_replace('$1', $s_word, $_msg_notfoundresult);
-       }
  
-       ksort($pages);
-       $retval = "<ul>\n";
-       foreach ($pages as $page=>$time)
-       {
-               $r_page = rawurlencode($page);
-               $s_page = htmlspecialchars($page);
-               $passage = get_passage($time);
-               $retval .= " <li><a href=\"$script?cmd=read&amp;page=$r_page&amp;word=$r_word\">$s_page</a>$passage</li>\n";
+       ksort($pages, SORT_STRING);
+
+       $retval = '<ul>' . "\n";
+       foreach (array_keys($pages) as $page) {
+               $r_page  = rawurlencode($page);
+               $s_page  = htmlsc($page);
+               $passage = $show_passage ? ' ' . get_passage(get_filetime($page)) : '';
+               $retval .= ' <li><a href="' . $script . '?cmd=read&amp;page=' .
+                       $r_page . '&amp;word=' . $r_word . '">' . $s_page .
+                       '</a>' . $passage . '</li>' . "\n";
         }
-       $retval .= "</ul>\n";
+       $retval .= '</ul>' . "\n";
  
         $retval .= str_replace('$1', $s_word, str_replace('$2', count($pages),
-               str_replace('$3', count($_pages), $b_type ? $_msg_andresult : $_msg_orresult)));
+               str_replace('$3', $count, $b_type ? $_msg_andresult : $_msg_orresult)));
  
         return $retval;
  }
  
-// ¥×¥í¥°¥é¥à¤Ø¤Î°ú¿ô¤Î¥Á¥§¥Ã¥¯
+// Argument check for program
  function arg_check($str)
  {
         global $vars;
+       return isset($vars['cmd']) && (strpos($vars['cmd'], $str) === 0);
+}
  
-       return array_key_exists('cmd', $vars) and (strpos($vars['cmd'], $str) === 0);
+function _pagename_urlencode_callback($matches)
+{
+       return rawurlencode($matches[0]);
  }
  
-// ¥Ú¡¼¥¸Ì¾¤Î¥¨¥ó¥³¡¼¥É
-function encode($key)
+function pagename_urlencode($page)
  {
-       return ($key == '') ? '' : strtoupper(join('', unpack('H*0', $key)));
+       return preg_replace_callback('|[^/:]+|', '_pagename_urlencode_callback', $page);
  }
  
-// ¥Ú¡¼¥¸Ì¾¤Î¥Ç¥³¡¼¥É
-function decode($key)
+// Encode page-name
+function encode($str)
  {
-       return ($key == '') ? '' : substr(pack('H*', '20202020' . $key), 4);
+       $str = strval($str);
+       return ($str == '') ? '' : strtoupper(bin2hex($str));
+       // Equal to strtoupper(join('', unpack('H*0', $key)));
+       // But PHP 4.3.10 says 'Warning: unpack(): Type H: outside of string in ...'
  }
  
-// [[ ]] ¤ò¼è¤ê½ü¤¯
+// Decode page name
+function decode($str)
+{
+       return pkwk_hex2bin($str);
+}
+
+// Inversion of bin2hex()
+function pkwk_hex2bin($hex_string)
+{
+       // preg_match : Avoid warning : pack(): Type H: illegal hex digit ...
+       // (string)   : Always treat as string (not int etc). See BugTrack2/31
+       return preg_match('/^[0-9a-f]+$/i', $hex_string) ?
+               pack('H*', (string)$hex_string) : $hex_string;
+}
+
+// Remove [[ ]] (brackets)
  function strip_bracket($str)
  {
         $match = array();
@@ -263,14 +338,14 @@ function strip_bracket($str)
         }
  }
  
-// ¥Ú¡¼¥¸°ìÍ÷¤ÎºîÀ®
+// Create list of pages
  function page_list($pages, $cmd = 'read', $withfilename = FALSE)
  {
-       global $script, $list_index, $top;
+       global $script, $list_index;
         global $_msg_symbol, $_msg_other;
         global $pagereading_enable;
  
-       // ¥½¡¼¥È¥¡¼¤ò·èÄê¤¹¤ë¡£ ' ' < '[a-zA-Z]' < 'zz'¤È¤¤¤¦Á°Äó¡£
+       // ソートキーを決定する。 ' ' < '[a-zA-Z]' < 'zz'という前提。
         $symbol = ' ';
         $other = 'zz';
  
@@ -282,112 +357,122 @@ function page_list($pages, $cmd = 'read', $withfilename = FALSE)
         }
  
         $list = $matches = array();
-       foreach($pages as $file=>$page)
-       {
-               $r_page = rawurlencode($page);
-               $s_page = htmlspecialchars($page, ENT_QUOTES);
+
+       // Shrink URI for read
+       if ($cmd == 'read') {
+               $href = $script . '?';
+       } else {
+               $href = $script . '?cmd=' . $cmd . '&amp;page=';
+       }
+
+       foreach($pages as $file=>$page) {
+               $r_page  = pagename_urlencode($page);
+               $s_page  = htmlsc($page, ENT_QUOTES);
                 $passage = get_pg_passage($page);
  
-               $str = "   <li><a href=\"$script?cmd=$cmd&amp;page=$r_page\">$s_page</a>$passage";
+               $str = '   <li><a href="' . $href . $r_page . '">' .
+                       $s_page . '</a>' . $passage;
  
                 if ($withfilename) {
-                       $s_file = htmlspecialchars($file);
-                       $str .= "\n    <ul><li>$s_file</li></ul>\n   ";
+                       $s_file = htmlsc($file);
+                       $str .= "\n" . '    <ul><li>' . $s_file . '</li></ul>' .
+                               "\n" . '   ';
                 }
-               $str .= "</li>";
+               $str .= '</li>';
  
+               // WARNING: Japanese code hard-wired
                 if($pagereading_enable) {
                         if(mb_ereg('^([A-Za-z])', mb_convert_kana($page, 'a'), $matches)) {
+                               $head = strtoupper($matches[1]);
+                       } elseif (isset($readings[$page]) && mb_ereg('^([ァ-ヶ])', $readings[$page], $matches)) { // here
                                 $head = $matches[1];
-                       }
-                       elseif(mb_ereg('^([¥¡-¥ö])', $readings[$page], $matches)) {
-                               $head = $matches[1];
-                       }
-                       elseif (mb_ereg('^[ -~]|[^¤¡-¤ó°¡-ô¦]', $page)) {
+                       } elseif (mb_ereg('^[ -~]|[^ぁ-ん亜-熙]', $page)) { // and here
                                 $head = $symbol;
-                       }
-                       else {
+                       } else {
                                 $head = $other;
                         }
-               }
-               else {
-                       $head = (preg_match('/^([A-Za-z])/', $page, $matches)) ? $matches[1] :
-                               (preg_match('/^([ -~])/', $page, $matches) ? $symbol : $other);
+               } else {
+                       $head = (preg_match('/^([A-Za-z])/', $page, $matches)) ? strtoupper($matches[1]) :
+                               (preg_match('/^([ -~])/', $page) ? $symbol : $other);
                 }
  
                 $list[$head][$page] = $str;
         }
-       ksort($list);
+       uksort($pages, 'strnatcmp');
  
         $cnt = 0;
         $arr_index = array();
-       $retval .= "<ul>\n";
-       foreach ($list as $head=>$pages)
-       {
+       $retval .= '<ul>' . "\n";
+       foreach ($list as $head=>$pages) {
                 if ($head === $symbol) {
                         $head = $_msg_symbol;
                 } else if ($head === $other) {
                         $head = $_msg_other;
                 }
  
-               if ($list_index)
-               {
+               if ($list_index) {
                         ++$cnt;
-                       $arr_index[] = "<a id=\"top_$cnt\" href=\"#head_$cnt\"><strong>$head</strong></a>";
-                       $retval .= " <li><a id=\"head_$cnt\" href=\"#top_$cnt\"><strong>$head</strong></a>\n  <ul>\n";
+                       $arr_index[] = '<a id="top_' . $cnt .
+                               '" href="#head_' . $cnt . '"><strong>' .
+                               $head . '</strong></a>';
+                       $retval .= ' <li><a id="head_' . $cnt . '" href="#top_' . $cnt .
+                               '"><strong>' . $head . '</strong></a>' . "\n" .
+                               '  <ul>' . "\n";
                 }
-               ksort($pages);
+               ksort($pages, SORT_STRING);
                 $retval .= join("\n", $pages);
                 if ($list_index)
-               {
                         $retval .= "\n  </ul>\n </li>\n";
-               }
         }
-       $retval .= "</ul>\n";
-       if ($list_index and $cnt > 0)
-       {
+       $retval .= '</ul>' . "\n";
+       if ($list_index && $cnt > 0) {
                 $top = array();
-               while (count($arr_index) > 0) {
-                       $top[] = join(" | \n", array_splice($arr_index, 0, 16)) . "\n";
-               }
-               $retval = "<div id=\"top\" style=\"text-align:center\">\n" .
-                       join('<br />', $top) . "</div>\n" . $retval;
+               while (! empty($arr_index))
+                       $top[] = join(' | ' . "\n", array_splice($arr_index, 0, 16)) . "\n";
+
+               $retval = '<div id="top" style="text-align:center">' . "\n" .
+                       join('<br />', $top) . '</div>' . "\n" . $retval;
         }
         return $retval;
  }
  
-// ¥Æ¥¥¹¥ÈÀ°·Á¥ë¡¼¥ë¤òÉ½¼¨¤¹¤ë
+// Show text formatting rules
  function catrule()
  {
         global $rule_page;
  
-       if (! is_page($rule_page))
-       {
-               return "<p>sorry, $rule_page unavailable.</p>";
+       if (! is_page($rule_page)) {
+               return '<p>Sorry, page \'' . htmlsc($rule_page) .
+                       '\' unavailable.</p>';
+       } else {
+               return convert_html(get_source($rule_page));
         }
-       return convert_html(get_source($rule_page));
  }
  
-// ¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸¤òÉ½¼¨¤¹¤ë
+// Show (critical) error message
  function die_message($msg)
  {
         $title = $page = 'Runtime error';
-
         $body = <<<EOD
  <h3>Runtime error</h3>
  <strong>Error message : $msg</strong>
  EOD;
  
+       pkwk_common_headers();
         if(defined('SKIN_FILE') && file_exists(SKIN_FILE) && is_readable(SKIN_FILE)) {
                 catbody($title, $page, $body);
         } else {
-               header('Content-Type: text/html; charset=euc-jp');
+               $charset = 'utf-8';
+               if(defined('CONTENT_CHARSET')) {
+                       $charset = CONTENT_CHARSET;
+               }
+               header("Content-Type: text/html; charset=$charset");
                 print <<<EOD
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!DOCTYPE html>
  <html>
   <head>
+  <meta http-equiv="content-type" content="text/html; charset=$charset">
    <title>$title</title>
-  <meta http-equiv="content-type" content="text/html; charset=euc-jp">
   </head>
   <body>
   $body
@@ -395,43 +480,50 @@ EOD;
  </html>
  EOD;
         }
-       die();
+       exit;
  }
  
-// ¸½ºß»þ¹ï¤ò¥Þ¥¤¥¯¥íÉÃ¤Ç¼èÆÀ
+// Have the time (as microtime)
  function getmicrotime()
  {
         list($usec, $sec) = explode(' ', microtime());
         return ((float)$sec + (float)$usec);
  }
  
-// Æü»þ¤òÆÀ¤ë
+// Elapsed time by second
+//define('MUTIME', getmicrotime());
+function elapsedtime()
+{
+       $at_the_microtime = MUTIME;
+       return sprintf('%01.03f', getmicrotime() - $at_the_microtime);
+}
+
+// Get the date
  function get_date($format, $timestamp = NULL)
  {
-       $time = ($timestamp === NULL) ? UTIME : $timestamp;
-       $time += ZONETIME;
+       $format = preg_replace('/(?<!\\\)T/',
+               preg_replace('/(.)/', '\\\$1', ZONE), $format);
  
-       $format = preg_replace('/(?<!\\\)T/', preg_replace('/(.)/', '\\\$1', ZONE), $format);
+       $time = ZONETIME + (($timestamp !== NULL) ? $timestamp : UTIME);
  
         return date($format, $time);
  }
  
-// Æü»þÊ¸»úÎó¤òºî¤ë
+// Format date string
  function format_date($val, $paren = FALSE)
  {
         global $date_format, $time_format, $weeklabels;
  
         $val += ZONETIME;
  
-       $ins_date = date($date_format, $val);
-       $ins_time = date($time_format, $val);
-       $ins_week = '(' . $weeklabels[date('w', $val)] . ')';
+       $date = date($date_format, $val) .
+               ' (' . $weeklabels[date('w', $val)] . ') ' .
+               date($time_format, $val);
  
-       $ins = "$ins_date $ins_week $ins_time";
-       return $paren ? "($ins)" : $ins;
+       return $paren ? '(' . $date . ')' : $date;
  }
  
-// ·Ð²á»þ¹ïÊ¸»úÎó¤òºî¤ë
+// Get short string of the passage, 'N seconds/minutes/hours/days/years ago'
  function get_passage($time, $paren = TRUE)
  {
         static $units = array('m'=>60, 'h'=>24, 'd'=>1);
@@ -444,51 +536,45 @@ function get_passage($time, $paren = TRUE)
         }
         $time = floor($time) . $unit;
  
-       return $paren ? "($time)" : $time;
+       return $paren ? '(' . $time . ')' : $time;
  }
  
-// <input type="(submit|button|image)"...>¤ò±£¤¹
+// Hide <input type="(submit|button|image)"...>
  function drop_submit($str)
  {
-       return preg_replace(
-               '/<input([^>]+)type="(submit|button|image)"/i',
-               '<input$1type="hidden"',
-               $str
-       );
+       return preg_replace('/<input([^>]+)type="(submit|button|image)"/i',
+               '<input$1type="hidden"', $str);
  }
  
-// AutoLink¤Î¥Ñ¥¿¡¼¥ó¤òÀ¸À®¤¹¤ë
-// thx for hirofummy
+// Generate AutoLink patterns (thx to hirofummy)
  function get_autolink_pattern(& $pages)
  {
         global $WikiName, $autolink, $nowikiname;
  
-       $config = &new Config('AutoLink');
+       $config = new Config('AutoLink');
         $config->read();
-       $ignorepages = $config->get('IgnoreList');
+       $ignorepages      = $config->get('IgnoreList');
         $forceignorepages = $config->get('ForceIgnoreList');
         unset($config);
         $auto_pages = array_merge($ignorepages, $forceignorepages);
  
-       foreach ($pages as $page) {
-               if (preg_match("/^$WikiName$/", $page) ?
+       foreach ($pages as $page)
+               if (preg_match('/^' . $WikiName . '$/', $page) ?
                     $nowikiname : strlen($page) >= $autolink)
                         $auto_pages[] = $page;
-       }
-
-       if (count($auto_pages) == 0) {
-               return $nowikiname ? '(?!)' : $WikiName;
-       }
  
-       $auto_pages = array_unique($auto_pages);
-       sort($auto_pages, SORT_STRING);
-
-       $auto_pages_a = array_values(preg_grep('/^[A-Z]+$/i', $auto_pages));
-       $auto_pages   = array_values(array_diff($auto_pages, $auto_pages_a));
+       if (empty($auto_pages)) {
+               $result = $result_a = $nowikiname ? '(?!)' : $WikiName;
+       } else {
+               $auto_pages = array_unique($auto_pages);
+               sort($auto_pages, SORT_STRING);
  
-       $result   = get_autolink_pattern_sub($auto_pages,   0, count($auto_pages),   0);
-       $result_a = get_autolink_pattern_sub($auto_pages_a, 0, count($auto_pages_a), 0);
+               $auto_pages_a = array_values(preg_grep('/^[A-Z]+$/i', $auto_pages));
+               $auto_pages   = array_values(array_diff($auto_pages,  $auto_pages_a));
  
+               $result   = get_autolink_pattern_sub($auto_pages,   0, count($auto_pages),   0);
+               $result_a = get_autolink_pattern_sub($auto_pages_a, 0, count($auto_pages_a), 0);
+       }
         return array($result, $result_a, $forceignorepages);
  }
  
@@ -497,23 +583,16 @@ function get_autolink_pattern_sub(& $pages, $start, $end, $pos)
         if ($end == 0) return '(?!)';
  
         $result = '';
-       $count = 0;
+       $count = $i = $j = 0;
         $x = (mb_strlen($pages[$start]) <= $pos);
+       if ($x) ++$start;
  
-       if ($x) {
-               ++$start;
-       }
-
-       for ($i = $start; $i < $end; $i = $j) // What is the initial state of $j?
-       {
+       for ($i = $start; $i < $end; $i = $j) {
                 $char = mb_substr($pages[$i], $pos, 1);
-               for ($j = $i; $j < $end; $j++) {
-                       if (mb_substr($pages[$j], $pos, 1) != $char)
-                               break;
-               }
-               if ($i != $start) {
-                       $result .= '|';
-               }
+               for ($j = $i; $j < $end; $j++)
+                       if (mb_substr($pages[$j], $pos, 1) != $char) break;
+
+               if ($i != $start) $result .= '|';
                 if ($i >= ($j - 1)) {
                         $result .= str_replace(' ', '\\ ', preg_quote(mb_substr($pages[$i], $pos), '/'));
                 } else {
@@ -522,51 +601,77 @@ function get_autolink_pattern_sub(& $pages, $start, $end, $pos)
                 }
                 ++$count;
         }
-       if ($x or $count > 1) {
-               $result = '(?:' . $result . ')';
-       }
-       if ($x) {
-               $result .= '?';
-       }
+       if ($x || $count > 1) $result = '(?:' . $result . ')';
+       if ($x)               $result .= '?';
+
         return $result;
  }
  
-// pukiwiki.php¥¹¥¯¥ê¥×¥È¤Îabsolute-uri¤òÀ¸À®
-function get_script_uri()
+// Get absolute-URI of this script
+function get_script_uri($init_uri = '')
  {
-       $script  = (SERVER_PORT == 443 ? 'https://' : 'http://');       // scheme
-       $script .= SERVER_NAME; // host
-       $script .= (SERVER_PORT == 80 ? '' : ':' . SERVER_PORT); // port
+       global $script_directory_index;
+       static $script;
  
-       // SCRIPT_NAME ¤¬'/'¤Ç»Ï¤Þ¤Ã¤Æ¤¤¤Ê¤¤¾ì¹ç(cgi¤Ê¤É) REQUEST_URI¤ò»È¤Ã¤Æ¤ß¤ë
-       $path    = SCRIPT_NAME;
-       if ($path{0} != '/')
-       {
-               if (!isset($_SERVER['REQUEST_URI']) or $_SERVER['REQUEST_URI']{0} != '/')
-                       return FALSE;
+       if ($init_uri == '') {
+               // Get
+               if (isset($script)) return $script;
+
+               // Set automatically
+               $msg     = 'get_script_uri() failed: Please set $script at INI_FILE manually';
+
+               $script  = (SERVER_PORT == 443 ? 'https://' : 'http://'); // scheme
+               $script .= SERVER_NAME; // host
+               $script .= (SERVER_PORT == 80 ? '' : ':' . SERVER_PORT);  // port
  
-               // REQUEST_URI¤ò¥Ñ¡¼¥¹¤·¡¢pathÉôÊ¬¤À¤±¤ò¼è¤ê½Ð¤¹
-               $parse_url = parse_url($script.$_SERVER['REQUEST_URI']);
-               if (! isset($parse_url['path']) or $parse_url['path']{0} != '/')
-                       return FALSE;
+               // SCRIPT_NAME が'/'で始まっていない場合(cgiなど) REQUEST_URIを使ってみる
+               $path    = SCRIPT_NAME;
+               if ($path{0} != '/') {
+                       if (! isset($_SERVER['REQUEST_URI']) || $_SERVER['REQUEST_URI']{0} != '/')
+                               die_message($msg);
  
-               $path = $parse_url['path'];
+                       // REQUEST_URIをパースし、path部分だけを取り出す
+                       $parse_url = parse_url($script . $_SERVER['REQUEST_URI']);
+                       if (! isset($parse_url['path']) || $parse_url['path']{0} != '/')
+                               die_message($msg);
+
+                       $path = $parse_url['path'];
+               }
+               $script .= $path;
+
+               if (! is_url($script, TRUE) && php_sapi_name() == 'cgi')
+                       die_message($msg);
+               unset($msg);
+
+       } else {
+               // Set manually
+               if (isset($script)) die_message('$script: Already init');
+               if (! is_url($init_uri, TRUE)) die_message('$script: Invalid URI');
+               $script = $init_uri;
+       }
+
+       // Cut filename or not
+       if (isset($script_directory_index)) {
+               if (! file_exists($script_directory_index))
+                       die_message('Directory index file not found: ' .
+                               htmlsc($script_directory_index));
+               $matches = array();
+               if (preg_match('#^(.+/)' . preg_quote($script_directory_index, '#') . '$#',
+                       $script, $matches)) $script = $matches[1];
         }
-       $script .= $path;       // path
  
         return $script;
  }
  
-/*
-ÊÑ¿ôÆâ¤Înull(\0)¥Ð¥¤¥È¤òºï½ü¤¹¤ë
-PHP¤Ïfopen("hoge.php\0.txt")¤Ç"hoge.php"¤ò³«¤¤¤Æ¤·¤Þ¤¦¤Ê¤É¤ÎÌäÂê¤¢¤ê
-
-http://ns1.php.gr.jp/pipermail/php-users/2003-January/012742.html
-[PHP-users 12736] null byte attack
-
-2003-05-16: magic quotes gpc¤ÎÉü¸µ½èÍý¤òÅý¹ç
-2003-05-21: Ï¢ÁÛÇÛÎó¤Î¥¡¼¤Ïbinary safe
-*/
+// Remove null(\0) bytes from variables
+//
+// NOTE: PHP had vulnerabilities that opens "hoge.php" via fopen("hoge.php\0.txt") etc.
+// [PHP-users 12736] null byte attack
+// http://ns1.php.gr.jp/pipermail/php-users/2003-January/012742.html
+//
+// 2003-05-16: magic quotes gpcの復元処理を統合
+// 2003-05-21: 連想配列のキーはbinary safe
+//
  function input_filter($param)
  {
         static $magic_quotes_gpc = NULL;
@@ -587,7 +692,7 @@ function sanitize($param) {
         return input_filter($param);
  }
  
-// CSV·Á¼°¤ÎÊ¸»úÎó¤òÇÛÎó¤Ë
+// Explode Comma-Separated Values to an array
  function csv_explode($separator, $string)
  {
         $retval = $matches = array();
@@ -599,40 +704,30 @@ function csv_explode($separator, $string)
  
         foreach ($matches[1] as $str) {
                 $len = strlen($str);
-               if ($len > 1 and $str{0} == '"' and $str{$len - 1} == '"') {
+               if ($len > 1 && $str{0} == '"' && $str{$len - 1} == '"')
                         $str = str_replace('""', '"', substr($str, 1, -1));
-               }
                 $retval[] = $str;
         }
         return $retval;
  }
  
-// ÇÛÎó¤òCSV·Á¼°¤ÎÊ¸»úÎó¤Ë
+// Implode an array with CSV data format (escape double quotes)
  function csv_implode($glue, $pieces)
  {
-       $_glue = ($glue != '') ? '\\'.$glue{0} : '';
+       $_glue = ($glue != '') ? '\\' . $glue{0} : '';
         $arr = array();
-       foreach ($pieces as $str)
-       {
-               if (ereg("[$_glue\"\n\r]",$str))
-               {
-                       $str = '"'.str_replace('"', '""', $str).'"';
-               }
+       foreach ($pieces as $str) {
+               if (preg_match('/[' . '"' . "\n\r" . $_glue . ']/', $str))
+                       $str = '"' . str_replace('"', '""', $str) . '"';
                 $arr[] = $str;
         }
         return join($glue, $arr);
  }
  
-function pkwk_login($pass = '')
+// Sugar with default settings
+function htmlsc($string = '', $flags = ENT_COMPAT, $charset = CONTENT_CHARSET)
  {
-       global $adminpass;
-
-       if ($pass != '' && md5($pass) == $adminpass) {
-               return TRUE;
-       } else {
-               sleep (2);      // Blocking brute force attack
-               return FALSE;
-       }
+       return htmlspecialchars($string, $flags, $charset);     // htmlsc()
  }
  
  
@@ -640,15 +735,15 @@ function pkwk_login($pass = '')
  
  // is_a --  Returns TRUE if the object is of this class or has this class as one of its parents
  // (PHP 4 >= 4.2.0)
-if (! function_exists('is_a'))
-{
+if (! function_exists('is_a')) {
+
         function is_a($class, $match)
         {
-               if (empty($class)) return false;
+               if (empty($class)) return FALSE; 
  
                 $class = is_object($class) ? get_class($class) : $class;
                 if (strtolower($class) == strtolower($match)) {
-                       return true;
+                       return TRUE;
                 } else {
                         return is_a(get_parent_class($class), $match);  // Recurse
                 }
@@ -657,8 +752,8 @@ if (! function_exists('is_a'))
  
  // array_fill -- Fill an array with values
  // (PHP 4 >= 4.2.0)
-if (! function_exists('array_fill'))
-{
+if (! function_exists('array_fill')) {
+
         function array_fill($start_index, $num, $value)
         {
                 $ret = array();
@@ -669,16 +764,27 @@ if (! function_exists('array_fill'))
  
  // md5_file -- Calculates the md5 hash of a given filename
  // (PHP 4 >= 4.2.0)
-if (! function_exists('md5_file'))
-{
+if (! function_exists('md5_file')) {
+
         function md5_file($filename)
         {
                 if (! file_exists($filename)) return FALSE;
  
                 $fd = fopen($filename, 'rb');
+               if ($fd === FALSE ) return FALSE;
                 $data = fread($fd, filesize($filename));
                 fclose($fd);
                 return md5($data);
         }
  }
-?>
+
+// sha1 -- Compute SHA-1 hash
+// (PHP 4 >= 4.3.0, PHP5)
+if (! function_exists('sha1')) {
+       if (extension_loaded('mhash')) {
+               function sha1($str)
+               {
+                       return bin2hex(mhash(MHASH_SHA1, $str));
+               }
+       }
+}