OSDN Git Service

CHANGE:SKINIMPORTクラスでXMLをパースする前後にi18n::convert()による文字符号化方式の変換を追加
authorsakamocchi <o-takashi@sakamocchi.jp>
Sun, 15 Jan 2012 07:24:06 +0000 (16:24 +0900)
committersakamocchi <o-takashi@sakamocchi.jp>
Sun, 15 Jan 2012 07:24:06 +0000 (16:24 +0900)
CHANGE: 'SKINIMPORT' class encode XML as UTF-8 before parsing and
decode after parsing with i18n::convert().

Nucleus CMSのXMLパーサーはJames
ClarkのexpatによるPHP拡張を用いているが、このパーサーはUS-ASCII/ISO-8859-1/UTF-8によって符号化された文字列しか扱えない。このコミットは、パースする前後にi18n::convert()による文字符号化方式の変換処理を入れることで、さまざまな文字符号化方式を適用したXMLを扱うことができるようになった。

Nucleus CMS utilize James Clark's expat implementation for XML. This
implementation can parse the XML encoded by US-ASCII/ISO-8859-1/UTF-8.
On this commit, we can deal with XML no matter which character
encoding scheme is applied in the XML.

この問題は日本語フォーラムの以下の投稿で報告されている。この場合EUC-JP文字符号化方式が問題となっていた。
This bug is reported in Japanese forum, related to EUC-JP character
coding scheme.See the threads below.
http://japan.nucleuscms.org/bb/viewtopic.php?t=2014
http://japan.nucleuscms.org/bb/viewtopic.php?t=2932

nucleus/libs/skinie.php

index 656983a..bacb61f 100644 (file)
 class SKINIMPORT {
        // hardcoded value (see constructor). When 1, interesting info about the
        // parsing process is sent to the output
-       var $debug;
+       private $debug;
        
        // parser/file pointer
-       var $parser;
-       var $fp;
+       private $parser;
+       private $fp;
+       
+       // parset internal charset, US-ASCII/ISO-8859-1/UTF-8
+       private $parse_charset = 'UTF-8';
        
        // which data has been read?
-       var $metaDataRead;
-       var $allRead;
+       private $metaDataRead;
+       private $allRead;
        
        // extracted data
-       var $skins;
-       var $templates;
-       var $info;
+       private $skins;
+       private $templates;
+       private $info;
        
        // to maintain track of where we are inside the XML file
-       var $inXml;
-       var $inData;
-       var $inMeta;
-       var $inSkin;
-       var $inTemplate;
-       var $currentName;
-       var $currentPartName;
-       var $cdata;
+       private $inXml;
+       private $inData;
+       private $inMeta;
+       private $inSkin;
+       private $inTemplate;
+       private $currentName;
+       private $currentPartName;
+       private $cdata;
        
        /**
         * constructor initializes data structures
         */
-       function SKINIMPORT()
+       public function __construct()
        {
                // disable magic_quotes_runtime if it's turned on
                //set_magic_quotes_runtime(0);
@@ -62,9 +65,15 @@ class SKINIMPORT {
                $this->debug = 0;
                
                $this->reset();
+               return;
        }
-
-       function reset()
+       
+       public function __destruct()
+       {
+               return;
+       }
+       
+       public function reset()
        {
                if ( $this->parser )
                {
@@ -97,12 +106,14 @@ class SKINIMPORT {
                // extra info included in the XML files (e.g. installation notes)
                $this->info = '';
                
-               // init XML parser
-               $this->parser = xml_parser_create();
+               // init XML parser, this parser deal with characters as encoded by UTF-8
+               $this->parser = xml_parser_create($this->parse_charset);
                xml_set_object($this->parser, $this);
-               xml_set_element_handler($this->parser, 'startElement', 'endElement');
-               xml_set_character_data_handler($this->parser, 'characterData');
+               xml_set_element_handler($this->parser, 'start_element', 'end_element');
+               xml_set_character_data_handler($this->parser, 'character_data');
                xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, 0);
+               
+               return;
        }
        
        /**
@@ -113,7 +124,7 @@ class SKINIMPORT {
         * @param $metaOnly
         *              Set to 1 when only the metadata needs to be read (optional, default 0)
         */
-       function readFile($filename, $metaOnly = 0)
+       public function readFile($filename, $metaOnly = 0)
        {
                // open file
                $this->fp = @fopen($filename, 'r');
@@ -133,23 +144,17 @@ class SKINIMPORT {
                }
                fclose($this->fp);
                
-/*
-       [2004-08-04] dekarma - Took this out since it messes up good XML if it has skins/templates
-                                                  with CDATA sections. need to investigate consequences.
-                                                  see bug [ 999914 ] Import fails (multiple skins in XML/one of them with CDATA)
-
-               // backwards compatibility with the non-wellformed skinbackup.xml files
-               // generated by v2/v3 (when CDATA sections were present in skins)
-               // split up those CDATA sections into multiple ones
-               $tempbuffer = preg_replace_callback(
-                       "/(<!\[CDATA\[[^]]*?<!\[CDATA\[[^]]*)((?:\]\].*?<!\[CDATA.*?)*)(\]\])(.*\]\])/ms",
-                       create_function(
-                               '$matches',
-                               'return $matches[1] . preg_replace("/(\]\])(.*?<!\[CDATA)/ms","]]]]><![CDATA[$2",$matches[2])."]]]]><![CDATA[".$matches[4];'
-                       ),
-                       $tempbuffer
-               );
-*/
+               /*
+                * NOTE: conver character set.
+                * We hope all characters in the file also includes UTF-8 coded character set,
+                *  because this PHP extension implements support for James Clark's expat in PHP
+                *   and it supports juust US-ASCII, ISO-8859-1, UTF-8 character coding scheme.
+                */
+               if ( i18n::get_current_charset() != $this->parse_charset )
+               {
+                       $tempbuffer = i18n::convert($tempbuffer, i18n::get_current_charset(), $this->parse_charset);
+               }
+               
                $temp = tmpfile();
                fwrite($temp, $tempbuffer);
                rewind($temp);
@@ -167,12 +172,14 @@ class SKINIMPORT {
                // all done
                $this->inXml = 0;
                fclose($temp);
+               
+               return;
        }
        
        /**
         * Returns the list of skin names
         */
-       function getSkinNames()
+       public function getSkinNames()
        {
                return array_keys($this->skins);
        }
@@ -180,7 +187,7 @@ class SKINIMPORT {
        /**
         * Returns the list of template names
         */
-       function getTemplateNames()
+       public function getTemplateNames()
        {
                return array_keys($this->templates);
        }
@@ -188,7 +195,7 @@ class SKINIMPORT {
        /**
         * Returns the extra information included in the XML file
         */
-       function getInfo()
+       public function getInfo()
        {
                return $this->info;
        }
@@ -200,7 +207,7 @@ class SKINIMPORT {
         *              set to 1 when allowed to overwrite existing skins with the same name
         *              (default = 0)
         */
-       function writeToDatabase($allowOverwrite = 0)
+       public function writeToDatabase($allowOverwrite = 0)
        {
                $existingSkins = $this->checkSkinNameClashes();
                $existingTemplates = $this->checkTemplateNameClashes();
@@ -298,12 +305,13 @@ class SKINIMPORT {
                                $templateObj->update($partName, $partContent);
                        }
                }
+               return;
        }
        
        /**
          * returns an array of all the skin nameclashes (empty array when no name clashes)
          */
-       function checkSkinNameClashes()
+       public function checkSkinNameClashes()
        {
                $clashes = array();
                
@@ -321,7 +329,7 @@ class SKINIMPORT {
          * returns an array of all the template nameclashes
          * (empty array when no name clashes)
          */
-       function checkTemplateNameClashes()
+       public function checkTemplateNameClashes()
        {
                $clashes = array();
                
@@ -338,7 +346,7 @@ class SKINIMPORT {
        /**
          * returns an array of all the invalid skin names (empty array when no invalid names )
          */
-       function checkSkinNamesValid()
+       private function checkSkinNamesValid()
        {
                $notValid = array();
                
@@ -355,7 +363,7 @@ class SKINIMPORT {
        /**
          * returns an array of all the invalid template names (empty array when no invalid names )
          */
-       function checkTemplateNamesValid()
+       private function checkTemplateNamesValid()
        {
                $notValid = array();
                
@@ -372,11 +380,17 @@ class SKINIMPORT {
        /**
         * Called by XML parser for each new start element encountered
         */
-       function startElement($parser, $name, $attrs)
+       private function start_element($parser, $name, $attrs)
        {
                foreach( $attrs as $key=>$value )
                {
-                       $attrs[$key] = i18n::hsc($value);
+                       if ( $this->parse_charset != i18n::get_current_charset() )
+                       {
+                               $name = i18n::convert($name, $this->parse_charset, i18n::get_current_charset());
+                               $value = i18n::convert($value, $this->parse_charset, i18n::get_current_charset());
+                       }
+                       
+                       $attrs[$key] = $value;
                }
                
                if ( $this->debug )
@@ -435,19 +449,26 @@ class SKINIMPORT {
                                break;
                }
                // character data never contains other tags
-               $this->clearCharacterData();
+               $this->clear_character_data();
+               return;
        }
        
        /**
          * Called by the XML parser for each closing tag encountered
          */
-       function endElement($parser, $name)
+       private function end_elment($parser, $name)
        {
                if ( $this->debug )
                {
                        echo 'END: ' . i18n::hsc($name) . '<br />';
                }
                
+               if ( $this->parse_charset != i18n::get_current_charset() )
+               {
+                       $name = i18n::convert($name, $this->parse_charset, i18n::get_current_charset());
+                       $charset_data = i18n::convert($this->get_character_data(), $this->parse_charset, i18n::get_current_charset());
+               }
+               
                switch ( $name )
                {
                        case 'nucleusskin':
@@ -459,7 +480,7 @@ class SKINIMPORT {
                                $this->metaDataRead = 1;
                                break;
                        case 'info':
-                               $this->info = $this->getCharacterData();
+                               $this->info = $charset_data;
                        case 'skin':
                                if ( !$this->inMeta )
                                {
@@ -475,46 +496,48 @@ class SKINIMPORT {
                        case 'description':
                                if ( $this->inSkin )
                                {
-                                       $this->skins[$this->currentName]['description'] = $this->getCharacterData();
+                                       $this->skins[$this->currentName]['description'] = $charset_data;
                                }
                                else
                                {
-                                       $this->templates[$this->currentName]['description'] = $this->getCharacterData();
+                                       $this->templates[$this->currentName]['description'] = $charset_data;
                                }
                                break;
                        case 'part':
                                if ( $this->inSkin )
                                {
-                                       $this->skins[$this->currentName]['parts'][$this->currentPartName] = $this->getCharacterData();
+                                       $this->skins[$this->currentName]['parts'][$this->currentPartName] = $charset_data;
                                }
                                else
                                {
-                                       $this->templates[$this->currentName]['parts'][$this->currentPartName] = $this->getCharacterData();
+                                       $this->templates[$this->currentName]['parts'][$this->currentPartName] = $charset_data;
                                }
                                break;
                        default:
                                echo _SKINIE_SEELEMENT_UNEXPECTEDTAG . i18n::hsc($name) . '<br />';
                                break;
                }
-               $this->clearCharacterData();
+               $this->clear_character_data();
+               return;
        }
        
        /**
         * Called by XML parser for data inside elements
         */
-       function characterData ($parser, $data)
+       private function character_data ($parser, $data)
        {
                if ( $this->debug )
                {
                        echo 'NEW DATA: ' . i18n::hsc($data) . '<br />';
                }
                $this->cdata .= $data;
+               return;
        }
        
        /**
         * Returns the data collected so far
         */
-       function getCharacterData()
+       private function get_character_data()
        {
                return $this->cdata;
        }
@@ -522,15 +545,16 @@ class SKINIMPORT {
        /**
         * Clears the data buffer
         */
-       function clearCharacterData()
+       private function clear_character_data()
        {
                $this->cdata = '';
+               return;
        }
        
        /**
         * Static method that looks for importable XML files in subdirs of the given dir
         */
-       function searchForCandidates($dir)
+       static public function searchForCandidates($dir)
        {
                $candidates = array();
                
@@ -561,14 +585,14 @@ class SKINIMPORT {
 }
 
 class SKINEXPORT {
-       var $templates;
-       var $skins;
-       var $info;
+       private $templates;
+       private $skins;
+       private $info;
        
        /**
         * Constructor initializes data structures
         */
-       function SKINEXPORT()
+       public function __construct()
        {
                // list of templateIDs to export
                $this->templates = array();
@@ -587,7 +611,7 @@ class SKINEXPORT {
         *              template ID
         * @result false when no such ID exists
         */
-       function addTemplate($id)
+       public function addTemplate($id)
        {
                if ( !TEMPLATE::existsID($id) )
                {
@@ -605,7 +629,7 @@ class SKINEXPORT {
         *              skin ID
         * @result false when no such ID exists
         */
-       function addSkin($id)
+       public function addSkin($id)
        {
                if ( !SKIN::existsID($id) )
                {
@@ -619,7 +643,7 @@ class SKINEXPORT {
        /**
         * Sets the extra info to be included in the exported file
         */
-       function setInfo($info)
+       public function setInfo($info)
        {
                $this->info = $info;
        }
@@ -631,13 +655,13 @@ class SKINEXPORT {
         *              set to 0 if you don't want to send out headers
         *              (optional, default 1)
         */
-       function export($setHeaders = 1)
+       public function export($setHeaders = 1)
        {
                if ( $setHeaders )
                {
                        // make sure the mimetype is correct, and that the data does not show up
                        // in the browser, but gets saved into and XML file (popup download window)
-                       header('Content-Type: text/xml');
+                       header('Content-Type: text/xml; charset=' . i18n::get_current_charset());
                        header('Content-Disposition: attachment; filename="skinbackup.xml"');
                        header('Expires: 0');
                        header('Pragma: no-cache');
@@ -707,10 +731,8 @@ class SKINEXPORT {
        /**
         * Escapes CDATA content so it can be included in another CDATA section
         */
-       function escapeCDATA($cdata)
+       private function escapeCDATA($cdata)
        {
                return preg_replace('/]]>/', ']]]]><![CDATA[>', $cdata);
        }
-}
-
-?>
\ No newline at end of file
+}
\ No newline at end of file