4 * written by Takashi Sakamoto as of Dec.24, 2011
5 * This is wrapper functions of iconv and mbstring
6 * and mail function with 7bit characters encoder
7 * for multibyte processing
8 * and includes members related to locale.
11 private static $mode = FALSE;
13 private static $charset = '';
14 private static $language = '';
15 private static $script = '';
16 private static $region = '';
17 private static $locale_list = array();
18 private static $timezone = 'UTC';
22 * Initializing i18n class
23 * @param string $charset character set
26 public static function init($charset, $dir)
28 /* i18n is already initialized */
34 /* make locale list in this Nucleus CMS */
35 if ( ($handle = opendir($dir)) === FALSE )
39 while ($filename = readdir($handle))
41 if (preg_match("#^(.+_.+_.+)\.{$charset}\.php$#", $filename, $matches) )
43 if ( !in_array($matches[1], self::$locale_list) )
45 self::$locale_list[] = $matches[1];
51 /* set i18n backend and validate character set */
52 if ( extension_loaded('iconv') )
54 /* this is just for checking the charset. */
55 if ( iconv_set_encoding('internal_encoding', $charset)
56 && iconv_set_encoding('output_encoding', $charset)
57 && iconv_set_encoding('internal_encoding', $charset) )
59 self::$charset = $charset;
60 self::$mode = 'iconv';
63 else if ( extension_loaded('mbstring') )
65 /* this is just for checking the charset. */
66 if ( mb_http_output($charset)
67 && mb_internal_encoding($charset)
68 && mb_regex_encoding($charset) )
70 self::$charset = $charset;
71 self::$mode = 'mbstring';
79 * i18n::get_available_locale_list
80 * return available locale list with current charset
82 * @return array available locale list
84 static public function get_available_locale_list()
86 return self::$locale_list;
90 * i18n::get_current_charset
91 * return current charset
93 * @return string $charset current character set
95 public static function get_current_charset()
97 return self::$charset;
103 * NOTE: naming rule is "$language_$script_$region.$charset.php".
104 * @param string $locale
105 * @return bool TRUE/FALSE
107 static public function set_current_locale($locale)
109 if ( preg_match('#^(.+)_(.+)_(.+)$#', $locale, $match) )
111 self::$language = $match[1];
112 self::$script = $match[2];
113 self::$region = $match[3];
125 static public function get_current_locale()
127 $elements = array(self::$language, self::$script, self::$region);
128 return implode('_', $elements);
132 * i18n::confirm_default_date_timezone
133 * to avoid E_NOTICE or E_WARNING generated when every calling to a date/time function.
134 * Some private servers are lack of its timezone setting
135 * http://www.php.net/manual/en/function.date-default-timezone-set.php
139 public static function confirm_default_date_timezone()
141 if ( function_exists('date_default_timezone_get')
142 && FALSE !== ($timezone = @date_default_timezone_get()))
144 self::$timezone = $timezone;
146 if (function_exists('date_default_timezone_set')) {
147 @date_default_timezone_set(self::$timezone);
153 * i18n::get_current_date_timezone()
154 * get current timezone
158 public static function get_date_timezone()
160 return self::$timezone;
165 * htmlentities wrapper
166 * @param string $string target string
167 * @param string $quotation quotation mode. please refer to the argument of PHP built-in htmlentities
168 * @return string escaped string
170 public static function hen($string, $quotation=ENT_QUOTES)
172 $string = html_entity_decode($string, $quotation, self::$charset);
173 return (string) htmlentities($string, $quotation, self::$charset);
178 * htmlspecialchars wrapper
179 * @param string $string target string
180 * @param string $quotation quotation mode. please refer to the argument of PHP built-in htmlspecialchars
181 * @return string escaped string
183 * NOTE: htmlspecialchars_decode() is ASCII-to-ACII conversion
184 * and its target string consists of several letters.
185 * There are no problems.
188 public static function hsc($string, $quotation=ENT_QUOTES)
190 $string = htmlspecialchars_decode($string, $quotation);
191 return (string) htmlspecialchars($string, $quotation, self::$charset);
196 * character set converter
197 * @param string $string target string binary
198 * @param string $from original character set encoding
199 * @param string $to target character set encoding
200 * @return string converted string
202 public static function convert($string, $from, $to='')
206 $to = self::$charset;
209 if ( self::$mode == 'iconv' )
211 $string = iconv($from, $to.'//TRANSLIT', $string);
213 else if ( self::$mode == 'mbstring' )
215 $string = mb_convert_encoding($string, $to, $from);
217 return (string) $string;
223 * @param string $string target string
224 * @return integer the number of letters
226 public static function strlen($string)
229 if ( self::$mode == 'iconv' )
231 $length = iconv_strlen($string, self::$charset);
233 else if ( self::$mode == 'mbstring' )
235 $length = mb_strlen($string, self::$charset);
239 $length = strlen($string);
241 return (integer) $length;
247 * @param string $haystack string to search
248 * @param string $needle string for search
249 * @param string $offset the position from which the search should be performed.
250 * @return integer/FALSE the numeric position of the first occurrence of needle in haystack
252 public static function strpos($haystack, $needle, $offset=0)
255 if ( self::$mode == 'iconv' )
257 $position = iconv_strpos($haystack, $needle, $offset, self::$charset);
259 else if ( self::$mode == 'mbstring' )
261 $position = mb_strpos($haystack, $needle, $offset, self::$charset);
265 $position = strpos($haystack, $needle, $offset);
268 if ( $position !== FALSE)
270 $position = (integer) $position;
278 * @param string $haystack string to search
279 * @param string $needle string for search
280 * @return integer/FALSE the numeric position of the last occurrence of needle in haystack
282 public static function strrpos ($haystack, $needle)
285 if ( self::$mode == 'iconv' )
287 $position = iconv_strrpos($haystack, $needle, self::$charset);
289 else if ( self::$mode == 'mbstring' )
291 $position = mb_strrpos($haystack, $needle, 0, self::$charset);
295 $position = strrpos($haystack, $needle, 0);
298 if ( $position !== FALSE)
300 $position = (integer) $position;
308 * @param string $string string to be cut
309 * @param string $start the position of starting
310 * @param integer $length the length to be cut
311 * @return string the extracted part of string
313 public static function substr($string, $start, $length=0)
316 if ( self::$mode == 'iconv' )
318 $return = iconv_substr($string, $start, $length, self::$charset);
320 else if ( self::$mode == 'mbstring' )
322 $return = mb_substr($string, $start, $length, self::$charset);
326 $return = strrpos($string, $start, $length);
328 return (string) $return;
333 * explode function based on multibyte processing with non-pcre regular expressions
335 * NOTE: we SHOULD use preg_split function instead of this,
336 * and I hope this is obsoleted near future...
338 * @param string $delimiter singlebyte or multibyte delimiter
339 * @param string $target target string
340 * @param integer $limit the number of index for returned array
341 * @return array array splitted by $delimiter
343 public static function explode($delimiter, $target, $limit=0)
346 $preg_delimiter = '#' . preg_quote($delimiter, '#') . '#';
347 if ( preg_match($preg_delimiter, $target) === 0 )
349 return (array) $target;
351 for ( $count=0; $limit == 0 || $count < $limit; $count++ )
353 $offset = self::strpos($target, $delimiter);
354 if ( $array != array() && $offset == 0 )
359 $array[] = self::substr($target, 0, $offset);
360 $length = self::strlen($target) - $offset;
361 $target = self::substr($target, $offset+1, $length);
364 return (array) $array;
369 * strftime function based on multibyte processing
370 * @param string $format format with singlebyte or multibyte
371 * @param timestamp $timestamp UNIX timestamp
372 * @return string formatted timestamp
374 public static function strftime($format, $timestamp='')
378 if ( $timestamp == '' )
383 if ( $format == '%%' )
387 else if ( preg_match('#%[^%]#', $format) === 0 )
392 $format = trim(preg_replace('#(%[^%])#', ',$1,', $format), ',');
393 $elements = preg_split('#,#', $format);
395 foreach ( $elements as $element )
397 if ( preg_match('#(%[^%])#', $element) )
399 $formatted .= strftime($element, $timestamp);
401 else if ( $element == '%%' )
407 $formatted .= $element;
411 return (string) $formatted;
416 * Send mails with headers including 7bit-encoded multibyte string
417 * @param string $to receivers including singlebyte and multibyte strings, based on RFC 5322
418 * @param string $subject subject including singlebyte and multibyte strings
419 * @param string $message message including singlebyte and multibyte strings
420 * @param string $from senders including singlebyte and multibyte strings, based on RFC 5322
421 * @param string(B/Q) $scheme 7bit-encoder scheme based on RFC 2047
422 * @return boolean accepted delivery or not
424 public static function mail($to, $subject, $message, $from, $scheme='B')
427 $to = self::mailbox_list_encoder($to, $scheme);
428 $subject = self::seven_bit_characters_encoder($subject, $scheme);
429 $from = 'From: ' . self::mailbox_list_encoder($from, $scheme);
432 * All of 7bit character encoding derives from ISO/IEC 646
433 * So we can decide the body's encoding bit count by this regular expression.
437 if ( preg_match('#\A[\x00-\x7f]*\z#', $message) )
442 $headers = 'Content-Type: text/html; charset=' . self::$charset . "; format=flowed; delsp=yes\n"
443 . "Content-Transfer-Encoding: {$bitcount}\n"
444 . "X-Mailer: Nucleus CMS i18n class\n";
446 return mail($to, $subject, $message, "{$from}\n{$headers}");
450 * i18n::mailbox_list_encoder
451 * Encode multi byte strings included in mailbox.
452 * The format of mailbox is based on RFC 5322, which obsoletes RFC 2822
454 * @param string $mailbox_list mailbox list
455 * @return string encoded string
456 * @link http://www.faqs.org/rfcs/rfc5322.html
457 * @see 3.4. Address Specification
460 private static function mailbox_list_encoder ($mailbox_list, $scheme='B')
462 $encoded_mailboxes = array();
463 $mailboxes = preg_split('#,#', $mailbox_list);
464 foreach ( $mailboxes as $mailbox )
466 if ( preg_match("#^([^,]+)?<([^,]+)?@([^,]+)?>$#", $mailbox, $match) )
468 $display_name = self::seven_bit_characters_encoder(trim($match[1]), $scheme);
469 $local_part = trim($match[2]);
470 $domain = trim($match[3]);
471 $encoded_mailboxes[] = "{$name} <{$local_part}@{$domain}>";
473 else if ( preg_match("#([^,]+)?@([^,]+)?#", $mailbox) )
475 $encoded_mailboxes[] = $mailbox;
482 if ( $encoded_mailboxes == array() )
486 return implode(',', $encoded_mailboxes);
490 * i18n::seven_bit_characters_encoder
491 * Encoder into 7bit ASCII expression for Non-ASCII Text based on RFC 2047.
493 * @link http://www.faqs.org/rfcs/rfc2047.html
494 * @see 2. Syntax of encoded-words
495 * @param string $charset Character set encoding
496 * @param string $type type of 7 bit encoding, should be 'B' or 'Q'
497 * @param string $string Target string with header field
498 * @return string encoded string
500 * NOTE: iconv extension give the same functions as this and each encoder in PHP5
501 * These implementation are for the servers which is lack of iconv extension
503 * NOTE: RFC 2047 has a ambiguousity for dealing with 'linear-white-space'.
504 * This causes a trouble related to line breaking between single byte and multi byte strings.
505 * To avoid this, single byte string is encoded as well as multi byte string here.
507 * NOTE: RFC 2231 allows the specification of the language to be used
508 * for display as well as the character set but isn't applied here.
511 private static function seven_bit_characters_encoder($string, $scheme='B')
513 if ( $scheme != 'Q' )
517 $header = chr(13) . chr(10) . chr(32) . '=?' . self::$charset . "?{$scheme}?";
519 $restriction = 78 - strlen($header) - strlen($footer) ;
521 $encoded_words = array();
522 for ( $i = 0; $i < self::strlen($string); $i++ )
524 if ( $scheme == 'B' )
531 $letter = self::substr($string, $i, 1);
532 $expected_length = strlen($letters) + strlen($letter) * 4 / 3;
534 if ( $expected_length > $restriction )
536 $encoded_text = self::b_encoder($letters);
537 $encoded_words[] = "{$header}{$encoded_text}{$footer}";
543 if ( $i == self::strlen($string) - 1 )
545 $encoded_text = self::b_encoder($letters);
546 $encoded_words[] = "{$header}{$encoded_text}{$footer}";
558 $encoded_letter = self::q_encoder(self::substr($string, $i, 1));
559 $expected_length = strlen($encoded_text) + strlen($encoded_letter);
561 if ( $expected_length > $restriction )
563 $encoded_words[] = "{$header}{$encoded_text}{$footer}";
567 $encoded_text .= $encoded_letter;
569 if ( $i == self::strlen($string) - 1 )
571 $encoded_words[] = "{$header}{$encoded_text}{$footer}";
578 return implode('', $encoded_words);
582 * B encoder according to RFC 2047.
583 * The "B" encoding is identical to the "BASE64" encoding defined by RFC 4648.
585 * @link http://tools.ietf.org/html/rfc4648
586 * @see 6.8. Base64 Content-Transfer-Encoding
587 * @param string $target targetted string
588 * @return string encoded string
590 * NOTE: According to RFC 4648
591 * (1) The final quantum of encoding input is an integral multiple of 24 bits;
592 * here, the final unit of encoded output will be an integral multiple
593 * of 4 characters with no "=" padding.
594 * (2) The final quantum of encoding input is exactly 8 bits; here,
595 * the final unit of encoded output will be two characters followed
596 * by two "=" padding characters.
597 * (3) The final quantum of encoding input is exactly 16 bits; here,
598 * the final unit of encoded output will be three characters followed
599 * by one "=" padding character.
602 private static function b_encoder($target)
604 return base64_encode($target);
608 * Q encoder according to RFC 2047.
609 * The "Q" encoding is similar to "Quoted-Printable" content-transfer-encoding defined in RFC 2045,
610 * but the "Q" encoding and the "Quoted-Printable" are different a bit.
612 * @link http://www.faqs.org/rfcs/rfc2047.html
613 * @see 4.2. The "Q" encoding
614 * @param string $target targetted string
615 * @return string encoded string
617 * NOTE: According to RFC 2047
618 * (1) Any 8-bit value may be represented by a "=" followed by two hexadecimal digits.
619 * For example, if the character set in use were ISO-8859-1,
620 * the "=" character would thus be encoded as "=3D", and a SPACE by "=20".
621 * (Upper case should be used for hexadecimal digits "A" through "F".)
622 * (2) The 8-bit hexadecimal value 20 (e.g., ISO-8859-1 SPACE) may be
623 * represented as "_" (underscore, ASCII 95.).
624 * (This character may not pass through some internetwork mail gateways,
625 * but its use will greatly enhance readability of "Q" encoded data
626 * with mail readers that do not support this encoding.)
627 * Note that the "_" always represents hexadecimal 20,
628 * even if the SPACE character occupies a different code position
629 * in the character set in use.
630 * (3) 8-bit values which correspond to printable ASCII characters
631 * other than "=", "?", and "_" (underscore), MAY be represented as those characters.
632 * (But see section 5 for restrictions.)
633 * In particular, SPACE and TAB MUST NOT be represented as themselves within encoded words.
636 private static function q_encoder($target)
640 for ( $i = 0; $i < strlen($target); $i++ )
642 $letter = substr ($target, $i, 1);
643 $order = ord($letter);
645 // Printable ASCII characters without "=", "?", "_"
646 if ((33 <= $order && $order <= 60)
648 || (64 <= $order && $order <= 94)
649 || (96 <= $order && $order <= 126))
651 $string .= strtoupper(dechex($order));
653 // Space shuold be encoded as the same strings as "_"
654 else if ($order == 32)
661 $string .= '=' . strtoupper(dechex($order));
669 * i18n::convert_locale_to_old_language_file_name()
670 * NOTE: this should be obsoleted near future.
671 * @param string $target_locale locale name as language_script_region
672 * @return string old language file name
674 static public function convert_locale_to_old_language_file_name($target_locale)
676 $target_language = '';
677 foreach ( self::$lang_refs as $language => $locale )
679 if ( preg_match('#-#', $language) )
681 if ( $target_locale . '.' . self::$charset == $locale )
683 $target_language = $language;
687 else if ( $target_locale == $locale )
689 $target_language = $language;
692 return $target_language;
696 * i18n::convert_old_language_file_name_to_locale()
697 * NOTE: this should be obsoleted near future.
698 * @param string $target_language old language file name
699 * @return string locale name as language_script_region
701 static public function convert_old_language_file_name_to_locale($target_language)
704 foreach ( self::$lang_refs as $language => $locale )
706 if ( $target_language == $language )
708 if ( preg_match('#^(.+)\.(.+)$#', $locale, $match) )
710 $target_locale = $match[1];
714 $target_locale = $locale;
719 return $target_locale;
724 * reference table to convert old and new way to name language files.
725 * NOTE: this should be obsoleted as soon as possible.
727 private static $lang_refs = array(
728 "english" => "en_Latn_US",
729 "english-utf8" => "en_Latn_US.UTF-8",
730 "bulgarian" => "bg_Cyrl_BG",
731 "finnish" => "fi_Latn_FU",
732 "catalan" => "ca_Latn_ES",
733 "french" => "fr_Latn_FR",
734 "russian" => "ru_Cyrl_RU",
735 "chinese" => "zh_Hans_CN",
736 "simchinese" => "zh_Hans_CN",
737 "chineseb5" => "zh_Hant_TW",
738 "traditional_chinese" => "zh_Hant_TW",
739 "galego" => "gl_Latn_ES",
740 "german" => "de_Latn_DE",
741 "korean-utf" => "ko_Kore_KR.UTF-8",
742 "korean-euc-kr" => "ko_Kore_KR.EUC-KR",
743 "slovak" => "sk_Latn_SK",
744 "czech" => "cs_Latn_CZ",
745 "hungarian" => "hu_Latn_HU",
746 "latvian" => "lv_Latn_LV",
747 "nederlands" => "nl_Latn_NL",
748 "italiano" => "it_Latn_IT",
749 "persian" => "fa_Arab_IR",
750 "spanish" => "es_Latn_ES",
751 "spanish-utf8" => "es_Latn_ES.UTF-8",
752 "japanese-euc" => "ja_Jpan_JP.EUC-JP",
753 "japanese-utf8" => "ja_Jpan_JP.UTF-8",
754 "portuguese_brazil" => "pt_Latn_BR"