2 // PukiWiki - Yet another WikiWikiWeb clone.
3 // $Id: file.php,v 1.95 2011/01/25 15:01:01 henoheno Exp $
5 // 2002-2006 PukiWiki Developers Team
6 // 2001-2002 Originally written by yu-ji
7 // License: GPL v2 or (at your option) any later version
9 // File related functions
12 define('PKWK_MAXSHOW_ALLOWANCE', 10);
13 define('PKWK_MAXSHOW_CACHE', 'recent.dat');
16 define('PKWK_AUTOLINK_REGEX_CACHE', 'autolink.dat');
18 // Get source(wiki text) data of the page
19 // Returns FALSE if error occurerd
20 function get_source($page = NULL, $lock = TRUE, $join = FALSE)
22 //$result = NULL; // File is not found
23 $result = $join ? '' : array();
24 // Compat for "implode('', get_source($file))",
25 // -- this is slower than "get_source($file, TRUE, TRUE)"
26 // Compat for foreach(get_source($file) as $line) {} not to warns
28 $path = get_filename($page);
29 if (file_exists($path)) {
32 $fp = @fopen($path, 'r');
33 if ($fp === FALSE) return FALSE;
39 $size = filesize($path);
40 if ($size === FALSE) {
42 } else if ($size == 0) {
45 $result = fread($fp, $size);
46 if ($result !== FALSE) {
47 // Removing line-feeds
48 $result = str_replace("\r", '', $result);
53 $result = file($path);
54 if ($result !== FALSE) {
55 // Removing line-feeds
56 $result = str_replace("\r", '', $result);
69 // Get last-modified filetime of the page
70 function get_filetime($page)
72 return is_page($page) ? filemtime(get_filename($page)) - LOCALZONE : 0;
75 // Get physical file name of the page
76 function get_filename($page)
78 return DATA_DIR . encode($page) . '.txt';
81 // Put a data(wiki text) into a physical file(diff, backup, text)
82 function page_write($page, $postdata, $notimestamp = FALSE)
84 if (PKWK_READONLY) return; // Do nothing
86 $postdata = make_str_rules($postdata);
88 // Create and write diff
89 $oldpostdata = is_page($page) ? join('', get_source($page)) : '';
90 $diffdata = do_diff($oldpostdata, $postdata);
91 file_write(DIFF_DIR, $page, $diffdata);
94 make_backup($page, $postdata == ''); // Is $postdata null?
97 file_write(DATA_DIR, $page, $postdata, $notimestamp);
102 // Modify original text with user-defined / system-defined rules
103 function make_str_rules($source)
105 global $str_rules, $fixed_heading_anchor;
107 $lines = explode("\n", $source);
108 $count = count($lines);
113 for ($i = 0; $i < $count; $i++) {
114 $line = & $lines[$i]; // Modify directly
116 // Ignore null string and preformatted texts
117 if ($line == '' || $line{0} == ' ' || $line{0} == "\t") continue;
121 if (! PKWKEXP_DISABLE_MULTILINE_PLUGIN_HACK &&
123 preg_match('/#[^{]*(\{\{+)\s*$/', $line, $matches)) {
124 // Multiline convert plugin start
126 $multiline = strlen($matches[1]); // Set specific number
129 if (! PKWKEXP_DISABLE_MULTILINE_PLUGIN_HACK &&
131 preg_match('/^\}{' . $multiline . '}\s*$/', $line)) {
132 // Multiline convert plugin end
137 if ($modify === FALSE) continue;
139 // Replace with $str_rules
140 foreach ($str_rules as $pattern => $replacement)
141 $line = preg_replace('/' . $pattern . '/', $replacement, $line);
143 // Adding fixed anchor into headings
144 if ($fixed_heading_anchor &&
145 preg_match('/^(\*{1,3}.*?)(?:\[#([A-Za-z][\w-]*)\]\s*)?$/', $line, $matches) &&
146 (! isset($matches[2]) || $matches[2] == '')) {
147 // Generate unique id
148 $anchor = generate_fixed_heading_anchor_id($matches[1]);
149 $line = rtrim($matches[1]) . ' [#' . $anchor . ']';
153 // Multiline part has no stopper
154 if (! PKWKEXP_DISABLE_MULTILINE_PLUGIN_HACK &&
155 $modify === FALSE && $multiline != 0)
156 $lines[] = str_repeat('}', $multiline);
158 return implode("\n", $lines);
162 function generate_fixed_heading_anchor_id($seed)
164 // A random alphabetic letter + 7 letters of random strings from md()
165 return chr(mt_rand(ord('a'), ord('z'))) .
166 substr(md5(uniqid(substr($seed, 0, 100), TRUE)),
170 // Read top N lines as an array
171 // (Use PHP file() function if you want to get ALL lines)
172 function file_head($file, $count = 1, $lock = TRUE, $buffer = 8192)
176 $fp = @fopen($file, 'r');
177 if ($fp === FALSE) return FALSE;
178 set_file_buffer($fp, 0);
179 if ($lock) flock($fp, LOCK_SH);
182 while (! feof($fp)) {
183 $line = fgets($fp, $buffer);
184 if ($line != FALSE) $array[] = $line;
185 if (++$index >= $count) break;
187 if ($lock) flock($fp, LOCK_UN);
188 if (! fclose($fp)) return FALSE;
194 function file_write($dir, $page, $str, $notimestamp = FALSE)
196 global $_msg_invalidiwn, $notify, $notify_diff_only, $notify_subject;
197 global $whatsdeleted, $maxshow_deleted;
199 if (PKWK_READONLY) return; // Do nothing
200 if ($dir != DATA_DIR && $dir != DIFF_DIR) die('file_write(): Invalid directory');
202 $page = strip_bracket($page);
203 $file = $dir . encode($page) . '.txt';
204 $file_exists = file_exists($file);
209 if ($dir == DATA_DIR && $str === '') {
211 if (! $file_exists) return; // Ignore null posting for DATA_DIR
213 // Update RecentDeleted (Add the $page)
214 add_recent($page, $whatsdeleted, '', $maxshow_deleted);
219 // Update RecentDeleted, and remove the page from RecentChanges
220 lastmodified_add($whatsdeleted, $page);
222 // Clear is_page() cache
223 is_page($page, TRUE);
227 } else if ($dir == DIFF_DIR && $str === " \n") {
228 return; // Ignore null posting for DIFF_DIR
232 // File replacement (Edit)
234 if (! is_pagename($page))
235 die_message(str_replace('$1', htmlsc($page),
236 str_replace('$2', 'WikiName', $_msg_invalidiwn)));
238 $str = rtrim(preg_replace('/' . "\r" . '/', '', $str)) . "\n";
239 $timestamp = ($file_exists && $notimestamp) ? filemtime($file) : FALSE;
241 $fp = fopen($file, 'a') or die('fopen() failed: ' .
242 htmlsc(basename($dir) . '/' . encode($page) . '.txt') .
244 'Maybe permission is not writable or filename is too long');
245 set_file_buffer($fp, 0);
253 if ($timestamp) pkwk_touch_file($file, $timestamp);
256 if ($dir == DATA_DIR) {
257 // Update RecentChanges (Add or renew the $page)
258 if ($timestamp === FALSE) lastmodified_add($page);
260 // Command execution per update
261 if (defined('PKWK_UPDATE_EXEC') && PKWK_UPDATE_EXEC)
262 system(PKWK_UPDATE_EXEC . ' > /dev/null &');
264 } else if ($dir == DIFF_DIR && $notify) {
265 if ($notify_diff_only) $str = preg_replace('/^[^-+].*\n/m', '', $str);
266 $footer['ACTION'] = 'Page update';
267 $footer['PAGE'] = & $page;
268 $footer['URI'] = get_script_uri() . '?' . rawurlencode($page);
269 $footer['USER_AGENT'] = TRUE;
270 $footer['REMOTE_ADDR'] = TRUE;
271 pkwk_mail_notify($notify_subject, $str, $footer) or
272 die('pkwk_mail_notify(): Failed');
275 is_page($page, TRUE); // Clear is_page() cache
278 // Update RecentDeleted
279 function add_recent($page, $recentpage, $subject = '', $limit = 0)
281 if (PKWK_READONLY || $limit == 0 || $page == '' || $recentpage == '' ||
282 check_non_list($page)) return;
285 $lines = $matches = array();
286 foreach (get_source($recentpage) as $line)
287 if (preg_match('/^-(.+) - (\[\[.+\]\])$/', $line, $matches))
288 $lines[$matches[2]] = $line;
290 $_page = '[[' . $page . ']]';
292 // Remove a report about the same page
293 if (isset($lines[$_page])) unset($lines[$_page]);
296 array_unshift($lines, '-' . format_date(UTIME) . ' - ' . $_page .
297 htmlsc($subject) . "\n");
299 // Get latest $limit reports
300 $lines = array_splice($lines, 0, $limit);
303 $fp = fopen(get_filename($recentpage), 'w') or
304 die_message('Cannot write page file ' .
305 htmlsc($recentpage) .
306 '<br />Maybe permission is not writable or filename is too long');
307 set_file_buffer($fp, 0);
310 fputs($fp, '#freeze' . "\n");
311 fputs($fp, '#norelated' . "\n"); // :)
312 fputs($fp, join('', $lines));
317 // Update PKWK_MAXSHOW_CACHE itself (Add or renew about the $page) (Light)
318 // Use without $autolink
319 function lastmodified_add($update = '', $remove = '')
321 global $maxshow, $whatsnew, $autolink;
323 // AutoLink implimentation needs everything, for now
325 put_lastmodified(); // Try to (re)create ALL
329 if (($update == '' || check_non_list($update)) && $remove == '')
332 $file = CACHE_DIR . PKWK_MAXSHOW_CACHE;
333 if (! file_exists($file)) {
334 put_lastmodified(); // Try to (re)create ALL
339 pkwk_touch_file($file);
340 $fp = fopen($file, 'r+') or
341 die_message('Cannot open ' . 'CACHE_DIR/' . PKWK_MAXSHOW_CACHE);
342 set_file_buffer($fp, 0);
345 // Read (keep the order of the lines)
346 $recent_pages = $matches = array();
347 foreach(file_head($file, $maxshow + PKWK_MAXSHOW_ALLOWANCE, FALSE) as $line)
348 if (preg_match('/^([0-9]+)\t(.+)/', $line, $matches))
349 $recent_pages[$matches[2]] = $matches[1];
351 // Remove if it exists inside
352 if (isset($recent_pages[$update])) unset($recent_pages[$update]);
353 if (isset($recent_pages[$remove])) unset($recent_pages[$remove]);
355 // Add to the top: like array_unshift()
357 $recent_pages = array($update => get_filetime($update)) + $recent_pages;
360 $abort = count($recent_pages) < $maxshow;
366 foreach ($recent_pages as $_page=>$time)
367 fputs($fp, $time . "\t" . $_page . "\n");
374 put_lastmodified(); // Try to (re)create ALL
381 // Update the page 'RecentChanges'
383 $recent_pages = array_splice($recent_pages, 0, $maxshow);
384 $file = get_filename($whatsnew);
387 pkwk_touch_file($file);
388 $fp = fopen($file, 'r+') or
389 die_message('Cannot open ' . htmlsc($whatsnew));
390 set_file_buffer($fp, 0);
396 foreach ($recent_pages as $_page=>$time)
397 fputs($fp, '-' . htmlsc(format_date($time)) .
398 ' - ' . '[[' . htmlsc($_page) . ']]' . "\n");
399 fputs($fp, '#norelated' . "\n"); // :)
405 // Re-create PKWK_MAXSHOW_CACHE (Heavy)
406 function put_lastmodified()
408 global $maxshow, $whatsnew, $autolink;
410 if (PKWK_READONLY) return; // Do nothing
412 // Get WHOLE page list
413 $pages = get_existpages();
415 // Check ALL filetime
416 $recent_pages = array();
417 foreach($pages as $page)
418 if ($page != $whatsnew && ! check_non_list($page))
419 $recent_pages[$page] = get_filetime($page);
421 // Sort decending order of last-modification date
422 arsort($recent_pages, SORT_NUMERIC);
425 // BugTrack2/179: array_splice() will break integer keys in hashtable
426 $count = $maxshow + PKWK_MAXSHOW_ALLOWANCE;
428 foreach($recent_pages as $key=>$value) {
429 unset($recent_pages[$key]);
430 $_recent[$key] = $value;
431 if (--$count < 1) break;
433 $recent_pages = & $_recent;
435 // Re-create PKWK_MAXSHOW_CACHE
436 $file = CACHE_DIR . PKWK_MAXSHOW_CACHE;
437 pkwk_touch_file($file);
438 $fp = fopen($file, 'r+') or
439 die_message('Cannot open' . 'CACHE_DIR/' . PKWK_MAXSHOW_CACHE);
440 set_file_buffer($fp, 0);
444 foreach ($recent_pages as $page=>$time)
445 fputs($fp, $time . "\t" . $page . "\n");
449 // Create RecentChanges
450 $file = get_filename($whatsnew);
451 pkwk_touch_file($file);
452 $fp = fopen($file, 'r+') or
453 die_message('Cannot open ' . htmlsc($whatsnew));
454 set_file_buffer($fp, 0);
458 foreach (array_keys($recent_pages) as $page) {
459 $time = $recent_pages[$page];
460 $s_lastmod = htmlsc(format_date($time));
461 $s_page = htmlsc($page);
462 fputs($fp, '-' . $s_lastmod . ' - [[' . $s_page . ']]' . "\n");
464 fputs($fp, '#norelated' . "\n"); // :)
470 list($pattern, $pattern_a, $forceignorelist) =
471 get_autolink_pattern($pages);
473 $file = CACHE_DIR . PKWK_AUTOLINK_REGEX_CACHE;
474 pkwk_touch_file($file);
475 $fp = fopen($file, 'r+') or
476 die_message('Cannot open ' . 'CACHE_DIR/' . PKWK_AUTOLINK_REGEX_CACHE);
477 set_file_buffer($fp, 0);
481 fputs($fp, $pattern . "\n");
482 fputs($fp, $pattern_a . "\n");
483 fputs($fp, join("\t", $forceignorelist) . "\n");
489 // Get elapsed date of the page
490 function get_pg_passage($page, $sw = TRUE)
492 global $show_passage;
493 if (! $show_passage) return '';
495 $time = get_filetime($page);
496 $pg_passage = ($time != 0) ? get_passage($time) : '';
498 return $sw ? '<small>' . $pg_passage . '</small>' : ' ' . $pg_passage;
501 // Last-Modified header
502 function header_lastmod($page = NULL)
506 if ($lastmod && is_page($page)) {
508 header('Last-Modified: ' .
509 date('D, d M Y H:i:s', get_filetime($page)) . ' GMT');
513 // Get a page list of this wiki
514 function get_existpages($dir = DATA_DIR, $ext = '.txt')
518 $pattern = '((?:[0-9A-F]{2})+)';
519 if ($ext != '') $ext = preg_quote($ext, '/');
520 $pattern = '/^' . $pattern . $ext . '$/';
522 $dp = @opendir($dir) or
523 die_message($dir . ' is not found or not readable.');
525 while ($file = readdir($dp))
526 if (preg_match($pattern, $file, $matches))
527 $aryret[$file] = decode($matches[1]);
533 // Get PageReading(pronounce-annotated) data in an array()
534 function get_readings()
536 global $pagereading_enable, $pagereading_kanji2kana_converter;
537 global $pagereading_kanji2kana_encoding, $pagereading_chasen_path;
538 global $pagereading_kakasi_path, $pagereading_config_page;
539 global $pagereading_config_dict;
541 $pages = get_existpages();
544 foreach ($pages as $page)
545 $readings[$page] = '';
547 $deletedPage = FALSE;
549 foreach (get_source($pagereading_config_page) as $line) {
551 if(preg_match('/^-\[\[([^]]+)\]\]\s+(.+)$/', $line, $matches)) {
552 if(isset($readings[$matches[1]])) {
553 // This page is not clear how to be pronounced
554 $readings[$matches[1]] = $matches[2];
556 // This page seems deleted
562 // If enabled ChaSen/KAKASI execution
563 if($pagereading_enable) {
565 // Check there's non-clear-pronouncing page
566 $unknownPage = FALSE;
567 foreach ($readings as $page => $reading) {
574 // Execute ChaSen/KAKASI, and get annotation
576 switch(strtolower($pagereading_kanji2kana_converter)) {
578 if(! file_exists($pagereading_chasen_path))
579 die_message('ChaSen not found: ' . $pagereading_chasen_path);
581 $tmpfname = tempnam(realpath(CACHE_DIR), 'PageReading');
582 $fp = fopen($tmpfname, 'w') or
583 die_message('Cannot write temporary file "' . $tmpfname . '".' . "\n");
584 foreach ($readings as $page => $reading) {
585 if($reading != '') continue;
586 fputs($fp, mb_convert_encoding($page . "\n",
587 $pagereading_kanji2kana_encoding, SOURCE_ENCODING));
591 $chasen = "$pagereading_chasen_path -F %y $tmpfname";
592 $fp = popen($chasen, 'r');
595 die_message('ChaSen execution failed: ' . $chasen);
597 foreach ($readings as $page => $reading) {
598 if($reading != '') continue;
601 $line = mb_convert_encoding($line, SOURCE_ENCODING,
602 $pagereading_kanji2kana_encoding);
604 $readings[$page] = $line;
609 die_message('Temporary file can not be removed: ' . $tmpfname);
612 case 'kakasi': /*FALLTHROUGH*/
614 if(! file_exists($pagereading_kakasi_path))
615 die_message('KAKASI not found: ' . $pagereading_kakasi_path);
617 $tmpfname = tempnam(realpath(CACHE_DIR), 'PageReading');
618 $fp = fopen($tmpfname, 'w') or
619 die_message('Cannot write temporary file "' . $tmpfname . '".' . "\n");
620 foreach ($readings as $page => $reading) {
621 if($reading != '') continue;
622 fputs($fp, mb_convert_encoding($page . "\n",
623 $pagereading_kanji2kana_encoding, SOURCE_ENCODING));
627 $kakasi = "$pagereading_kakasi_path -kK -HK -JK < $tmpfname";
628 $fp = popen($kakasi, 'r');
631 die_message('KAKASI execution failed: ' . $kakasi);
634 foreach ($readings as $page => $reading) {
635 if($reading != '') continue;
638 $line = mb_convert_encoding($line, SOURCE_ENCODING,
639 $pagereading_kanji2kana_encoding);
641 $readings[$page] = $line;
646 die_message('Temporary file can not be removed: ' . $tmpfname);
650 $patterns = $replacements = $matches = array();
651 foreach (get_source($pagereading_config_dict) as $line) {
653 if(preg_match('|^ /([^/]+)/,\s*(.+)$|', $line, $matches)) {
654 $patterns[] = $matches[1];
655 $replacements[] = $matches[2];
658 foreach ($readings as $page => $reading) {
659 if($reading != '') continue;
661 $readings[$page] = $page;
662 foreach ($patterns as $no => $pattern)
663 $readings[$page] = mb_convert_kana(mb_ereg_replace($pattern,
664 $replacements[$no], $readings[$page]), 'aKCV');
669 die_message('Unknown kanji-kana converter: ' . $pagereading_kanji2kana_converter . '.');
674 if($unknownPage || $deletedPage) {
676 asort($readings, SORT_STRING); // Sort by pronouncing(alphabetical/reading) order
678 foreach ($readings as $page => $reading)
679 $body .= '-[[' . $page . ']] ' . $reading . "\n";
681 page_write($pagereading_config_page, $body);
685 // Pages that are not prounouncing-clear, return pagenames of themselves
686 foreach ($pages as $page) {
687 if($readings[$page] == '')
688 $readings[$page] = $page;
694 // Get a list of encoded files (must specify a directory and a suffix)
695 function get_existfiles($dir, $ext)
697 $pattern = '/^(?:[0-9A-F]{2})+' . preg_quote($ext, '/') . '$/';
699 $dp = @opendir($dir) or die_message($dir . ' is not found or not readable.');
700 while ($file = readdir($dp))
701 if (preg_match($pattern, $file))
702 $aryret[] = $dir . $file;
707 // Get a list of related pages of the page
708 function links_get_related($page)
710 global $vars, $related;
711 static $links = array();
713 if (isset($links[$page])) return $links[$page];
715 // If possible, merge related pages generated by make_link()
716 $links[$page] = ($page == $vars['page']) ? $related : array();
718 // Get repated pages from DB
719 $links[$page] += links_get_related_db($vars['page']);
721 return $links[$page];
724 // _If needed_, re-create the file to change/correct ownership into PHP's
725 // NOTE: Not works for Windows
726 function pkwk_chown($filename, $preserve_time = TRUE)
728 static $php_uid; // PHP's UID
730 if (! isset($php_uid)) {
731 if (extension_loaded('posix')) {
732 $php_uid = posix_getuid(); // Unix
734 $php_uid = 0; // Windows
738 // Lock for pkwk_chown()
739 $lockfile = CACHE_DIR . 'pkwk_chown.lock';
740 $flock = fopen($lockfile, 'a') or
741 die('pkwk_chown(): fopen() failed for: CACHEDIR/' .
742 basename(htmlsc($lockfile)));
743 flock($flock, LOCK_EX) or die('pkwk_chown(): flock() failed for lock');
746 $stat = stat($filename) or
747 die('pkwk_chown(): stat() failed for: ' . basename(htmlsc($filename)));
748 if ($stat[4] === $php_uid) {
749 // NOTE: Windows always here
750 $result = TRUE; // Seems the same UID. Nothing to do
752 $tmp = $filename . '.' . getmypid() . '.tmp';
754 // Lock source $filename to avoid file corruption
755 // NOTE: Not 'r+'. Don't check write permission here
756 $ffile = fopen($filename, 'r') or
757 die('pkwk_chown(): fopen() failed for: ' .
758 basename(htmlsc($filename)));
760 // Try to chown by re-creating files
762 // * touch() before copy() is for 'rw-r--r--' instead of 'rwxr-xr-x' (with umask 022).
763 // * (PHP 4 < PHP 4.2.0) touch() with the third argument is not implemented and retuns NULL and Warn.
764 // * @unlink() before rename() is for Windows but here's for Unix only
765 flock($ffile, LOCK_EX) or die('pkwk_chown(): flock() failed');
766 $result = touch($tmp) && copy($filename, $tmp) &&
767 ($preserve_time ? (touch($tmp, $stat[9], $stat[8]) || touch($tmp, $stat[9])) : TRUE) &&
768 rename($tmp, $filename);
769 flock($ffile, LOCK_UN) or die('pkwk_chown(): flock() failed');
771 fclose($ffile) or die('pkwk_chown(): fclose() failed');
773 if ($result === FALSE) @unlink($tmp);
776 // Unlock for pkwk_chown()
777 flock($flock, LOCK_UN) or die('pkwk_chown(): flock() failed for lock');
778 fclose($flock) or die('pkwk_chown(): fclose() failed for lock');
783 // touch() with trying pkwk_chown()
784 function pkwk_touch_file($filename, $time = FALSE, $atime = FALSE)
786 // Is the owner incorrected and unable to correct?
787 if (! file_exists($filename) || pkwk_chown($filename)) {
788 if ($time === FALSE) {
789 $result = touch($filename);
790 } else if ($atime === FALSE) {
791 $result = touch($filename, $time);
793 $result = touch($filename, $time, $atime);
797 die('pkwk_touch_file(): Invalid UID and (not writable for the directory or not a flie): ' .
798 htmlsc(basename($filename)));