OSDN Git Service

Fix encoding handling in xml binary input function. If the XML header didn't
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Fri, 4 Sep 2009 10:49:29 +0000 (10:49 +0000)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Fri, 4 Sep 2009 10:49:29 +0000 (10:49 +0000)
specify an encoding explicitly, we used to treat it as being in database
encoding when we parsed it, but then perform a UTF-8 -> database encoding
conversion on it, which was completely bogus. It's now consistently treated as
UTF-8.

src/backend/utils/adt/xml.c

index 80e1133..c746c71 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.93 2009/08/10 05:46:50 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.94 2009/09/04 10:49:29 heikki Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -109,7 +109,7 @@ static int parse_xml_decl(const xmlChar *str, size_t *lenp,
 static bool print_xml_decl(StringInfo buf, const xmlChar *version,
                           pg_enc encoding, int standalone);
 static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
-                 bool preserve_whitespace, xmlChar *encoding);
+                 bool preserve_whitespace, int encoding);
 static text *xml_xmlnodetoxmltype(xmlNodePtr cur);
 #endif   /* USE_LIBXML */
 
@@ -183,7 +183,7 @@ xml_in(PG_FUNCTION_ARGS)
         * Parse the data to check if it is well-formed XML data.  Assume that
         * ERROR occurred if parsing failed.
         */
-       doc = xml_parse(vardata, xmloption, true, NULL);
+       doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
        xmlFreeDoc(doc);
 
        PG_RETURN_XML_P(vardata);
@@ -272,7 +272,8 @@ xml_recv(PG_FUNCTION_ARGS)
        char       *newstr;
        int                     nbytes;
        xmlDocPtr       doc;
-       xmlChar    *encoding = NULL;
+       xmlChar    *encodingStr = NULL;
+       int                     encoding;
 
        /*
         * Read the data in raw format. We don't know yet what the encoding is, as
@@ -293,7 +294,15 @@ xml_recv(PG_FUNCTION_ARGS)
        str = VARDATA(result);
        str[nbytes] = '\0';
 
-       parse_xml_decl((xmlChar *) str, NULL, NULL, &encoding, NULL);
+       parse_xml_decl((xmlChar *) str, NULL, NULL, &encodingStr, NULL);
+
+       /*
+        * If encoding wasn't explicitly specified in the XML header, treat it as
+        * UTF-8, as that's the default in XML. This is different from xml_in(),
+        * where the input has to go through the normal client to server encoding
+        * conversion.
+        */
+       encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
 
        /*
         * Parse the data to check if it is well-formed XML data.  Assume that
@@ -305,9 +314,7 @@ xml_recv(PG_FUNCTION_ARGS)
        /* Now that we know what we're dealing with, convert to server encoding */
        newstr = (char *) pg_do_encoding_conversion((unsigned char *) str,
                                                                                                nbytes,
-                                                                                               encoding ?
-                                                                                         xmlChar_to_encoding(encoding) :
-                                                                                               PG_UTF8,
+                                                                                               encoding,
                                                                                                GetDatabaseEncoding());
 
        if (newstr != str)
@@ -659,7 +666,8 @@ xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
 #ifdef USE_LIBXML
        xmlDocPtr       doc;
 
-       doc = xml_parse(data, xmloption_arg, preserve_whitespace, NULL);
+       doc = xml_parse(data, xmloption_arg, preserve_whitespace,
+                                       GetDatabaseEncoding());
        xmlFreeDoc(doc);
 
        return (xmltype *) data;
@@ -799,7 +807,8 @@ xml_is_document(xmltype *arg)
        /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
        PG_TRY();
        {
-               doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true, NULL);
+               doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
+                                               GetDatabaseEncoding());
                result = true;
        }
        PG_CATCH();
@@ -1152,7 +1161,7 @@ print_xml_decl(StringInfo buf, const xmlChar *version,
  */
 static xmlDocPtr
 xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
-                 xmlChar *encoding)
+                 int encoding)
 {
        int32           len;
        xmlChar    *string;
@@ -1165,9 +1174,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
 
        utf8string = pg_do_encoding_conversion(string,
                                                                                   len,
-                                                                                  encoding ?
-                                                                                  xmlChar_to_encoding(encoding) :
-                                                                                  GetDatabaseEncoding(),
+                                                                                  encoding,
                                                                                   PG_UTF8);
 
        /* Start up libxml and its parser (no-ops if already done) */