OSDN Git Service

Fix up memory management problems in contrib/xml2.
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 28 Feb 2010 21:31:57 +0000 (21:31 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 28 Feb 2010 21:31:57 +0000 (21:31 +0000)
Get rid of the code that attempted to funnel libxml2's memory allocations
into palloc.   We already knew from experience with the core xml datatype
that trying to do this is simply not reliable.  Unlike the core code, I
did not bother adding a lot of PG_TRY/PG_CATCH logic to try to ensure that
everything is cleaned up on error exit.  Hence, we might leak some memory
if one of these functions fails partway through.  Given the deprecated
status of this contrib module and the fact that errors partway through
the functions shouldn't be too common, it doesn't seem worth worrying about.

Also fix a separate bug in xpath_table, that it did the wrong things
if given a result tuple descriptor with less than 2 columns.  While
such a case isn't very useful in practice, we shouldn't fail or stomp
memory when it occurs.

Add some simple regression tests based on all the reported crash cases
that I have on hand.

This should be back-patched, but let's see if the buildfarm likes it first.

contrib/xml2/Makefile
contrib/xml2/expected/xml2.out [new file with mode: 0644]
contrib/xml2/sql/xml2.sql [new file with mode: 0644]
contrib/xml2/xpath.c

index b96defa..3426dea 100644 (file)
@@ -1,4 +1,4 @@
-# $PostgreSQL: pgsql/contrib/xml2/Makefile,v 1.12 2008/05/08 16:49:37 tgl Exp $
+# $PostgreSQL: pgsql/contrib/xml2/Makefile,v 1.13 2010/02/28 21:31:57 tgl Exp $
 
 MODULE_big = pgxml
 
@@ -8,6 +8,7 @@ SHLIB_LINK += $(filter -lxslt, $(LIBS)) $(filter -lxml2, $(LIBS))
 
 DATA_built = pgxml.sql
 DATA = uninstall_pgxml.sql
+REGRESS = xml2
 
 ifdef USE_PGXS
 PG_CONFIG = pg_config
diff --git a/contrib/xml2/expected/xml2.out b/contrib/xml2/expected/xml2.out
new file mode 100644 (file)
index 0000000..74896b0
--- /dev/null
@@ -0,0 +1,147 @@
+--
+-- first, define the functions.  Turn off echoing so that expected file
+-- does not depend on contents of pgxml.sql.
+--
+SET client_min_messages = warning;
+\set ECHO none
+RESET client_min_messages;
+select query_to_xml('select 1 as x',true,false,'');
+                         query_to_xml                          
+---------------------------------------------------------------
+ <table xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">+
+                                                              +
+ <row>                                                        +
+   <x>1</x>                                                   +
+ </row>                                                       +
+                                                              +
+ </table>                                                     +
+(1 row)
+
+select xslt_process( query_to_xml('select x from generate_series(1,5) as 
+x',true,false,'')::text,
+$$<xsl:stylesheet version="1.0"
+               xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:output method="xml" indent="yes" />
+<xsl:template match="*">
+  <xsl:copy>
+     <xsl:copy-of select="@*" />
+     <xsl:apply-templates />
+  </xsl:copy>
+</xsl:template>
+<xsl:template match="comment()|processing-instruction()">
+  <xsl:copy />
+</xsl:template>
+</xsl:stylesheet>
+$$::text);
+                         xslt_process                          
+---------------------------------------------------------------
+ <?xml version="1.0"?>                                        +
+ <table xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">+
+                                                              +
+ <row>                                                        +
+   <x>1</x>                                                   +
+ </row>                                                       +
+                                                              +
+ <row>                                                        +
+   <x>2</x>                                                   +
+ </row>                                                       +
+                                                              +
+ <row>                                                        +
+   <x>3</x>                                                   +
+ </row>                                                       +
+                                                              +
+ <row>                                                        +
+   <x>4</x>                                                   +
+ </row>                                                       +
+                                                              +
+ <row>                                                        +
+   <x>5</x>                                                   +
+ </row>                                                       +
+                                                              +
+ </table>                                                     +
+(1 row)
+
+CREATE TABLE xpath_test (id integer NOT NULL, t xml);
+INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>');
+SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
+as t(id int4);
+ id 
+----
+(0 rows)
+
+SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
+as t(id int4, doc int4);
+ id | doc 
+----+-----
+  1 |   1
+(1 row)
+
+DROP TABLE xpath_test;
+CREATE TABLE xpath_test (id integer NOT NULL, t text);
+INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>');
+SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
+as t(id int4);
+ id 
+----
+(0 rows)
+
+SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
+as t(id int4, doc int4);
+ id | doc 
+----+-----
+  1 |   1
+(1 row)
+
+create table articles (article_id integer, article_xml xml, date_entered date);
+insert into articles (article_id, article_xml, date_entered)
+values (2, '<article><author>test</author><pages>37</pages></article>', now());
+SELECT * FROM
+xpath_table('article_id',
+            'article_xml',
+            'articles',
+            '/article/author|/article/pages|/article/title',
+            'date_entered > ''2003-01-01'' ')
+AS t(article_id integer, author text, page_count integer, title text);
+ article_id | author | page_count | title 
+------------+--------+------------+-------
+          2 | test   |         37 | 
+(1 row)
+
+-- this used to fail when invoked a second time
+select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0"
+xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:template match="@*|node()">
+      <xsl:copy>
+         <xsl:apply-templates select="@*|node()"/>
+      </xsl:copy>
+   </xsl:template>
+</xsl:stylesheet>$$)::xml;
+ xslt_process 
+--------------
+ <aaa/>      +
+(1 row)
+
+select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0"
+xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:template match="@*|node()">
+      <xsl:copy>
+         <xsl:apply-templates select="@*|node()"/>
+      </xsl:copy>
+   </xsl:template>
+</xsl:stylesheet>$$)::xml;
+ xslt_process 
+--------------
+ <aaa/>      +
+(1 row)
+
+create table t1 (id integer, xml_data xml);
+insert into t1 (id, xml_data)
+values
+(1, '<attributes><attribute name="attr_1">Some
+Value</attribute></attributes>');
+create index idx_xpath on t1 ( xpath_string
+('/attributes/attribute[@name="attr_1"]/text()', xml_data::text));
diff --git a/contrib/xml2/sql/xml2.sql b/contrib/xml2/sql/xml2.sql
new file mode 100644 (file)
index 0000000..73723b6
--- /dev/null
@@ -0,0 +1,82 @@
+--
+-- first, define the functions.  Turn off echoing so that expected file
+-- does not depend on contents of pgxml.sql.
+--
+SET client_min_messages = warning;
+\set ECHO none
+\i pgxml.sql
+\set ECHO all
+RESET client_min_messages;
+
+select query_to_xml('select 1 as x',true,false,'');
+
+select xslt_process( query_to_xml('select x from generate_series(1,5) as 
+x',true,false,'')::text,
+$$<xsl:stylesheet version="1.0"
+               xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:output method="xml" indent="yes" />
+<xsl:template match="*">
+  <xsl:copy>
+     <xsl:copy-of select="@*" />
+     <xsl:apply-templates />
+  </xsl:copy>
+</xsl:template>
+<xsl:template match="comment()|processing-instruction()">
+  <xsl:copy />
+</xsl:template>
+</xsl:stylesheet>
+$$::text);
+
+CREATE TABLE xpath_test (id integer NOT NULL, t xml);
+INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>');
+SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
+as t(id int4);
+SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
+as t(id int4, doc int4);
+
+DROP TABLE xpath_test;
+CREATE TABLE xpath_test (id integer NOT NULL, t text);
+INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>');
+SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
+as t(id int4);
+SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
+as t(id int4, doc int4);
+
+create table articles (article_id integer, article_xml xml, date_entered date);
+insert into articles (article_id, article_xml, date_entered)
+values (2, '<article><author>test</author><pages>37</pages></article>', now());
+SELECT * FROM
+xpath_table('article_id',
+            'article_xml',
+            'articles',
+            '/article/author|/article/pages|/article/title',
+            'date_entered > ''2003-01-01'' ')
+AS t(article_id integer, author text, page_count integer, title text);
+
+-- this used to fail when invoked a second time
+select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0"
+xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:template match="@*|node()">
+      <xsl:copy>
+         <xsl:apply-templates select="@*|node()"/>
+      </xsl:copy>
+   </xsl:template>
+</xsl:stylesheet>$$)::xml;
+
+select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0"
+xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:template match="@*|node()">
+      <xsl:copy>
+         <xsl:apply-templates select="@*|node()"/>
+      </xsl:copy>
+   </xsl:template>
+</xsl:stylesheet>$$)::xml;
+
+create table t1 (id integer, xml_data xml);
+insert into t1 (id, xml_data)
+values
+(1, '<attributes><attribute name="attr_1">Some
+Value</attribute></attributes>');
+
+create index idx_xpath on t1 ( xpath_string
+('/attributes/attribute[@name="attr_1"]/text()', xml_data::text));
index b2458e9..8e289bd 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * $PostgreSQL: pgsql/contrib/xml2/xpath.c,v 1.26 2010/02/28 19:51:37 tgl Exp $
+ * $PostgreSQL: pgsql/contrib/xml2/xpath.c,v 1.27 2010/02/28 21:31:57 tgl Exp $
  *
  * Parser interface for DOM-based parser (libxml) rather than
  * stream-based SAX-type parser
@@ -42,10 +42,6 @@ void         pgxml_parser_init(void);
 
 /* local declarations */
 
-static void *pgxml_palloc(size_t size);
-static void *pgxml_repalloc(void *ptr, size_t size);
-static void pgxml_pfree(void *ptr);
-static char *pgxml_pstrdup(const char *string);
 static void pgxml_errorHandler(void *ctxt, const char *msg,...);
 
 static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
@@ -59,41 +55,10 @@ static xmlChar *pgxml_texttoxmlchar(text *textstring);
 
 static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar *xpath);
 
-
 /* Global variables */
 static char *pgxml_errorMsg = NULL;            /* overall error message */
 
 
-/* memory handling passthrough functions (e.g. palloc, pstrdup are
-   currently macros, and the others might become so...) */
-
-static void *
-pgxml_palloc(size_t size)
-{
-/*     elog(DEBUG1,"Alloc %d in CMC %p",size,CurrentMemoryContext); */
-       return palloc(size);
-}
-
-static void *
-pgxml_repalloc(void *ptr, size_t size)
-{
-/*     elog(DEBUG1,"ReAlloc in CMC %p",CurrentMemoryContext);*/
-       return repalloc(ptr, size);
-}
-
-static void
-pgxml_pfree(void *ptr)
-{
-/*     elog(DEBUG1,"Free in CMC %p",CurrentMemoryContext); */
-       pfree(ptr);
-}
-
-static char *
-pgxml_pstrdup(const char *string)
-{
-       return pstrdup(string);
-}
-
 /*
  * The error handling function. This formats an error message and sets
  * a flag - an ereport will be issued prior to return
@@ -157,9 +122,6 @@ pgxml_parser_init(void)
        pgxml_errorMsg = NULL;
        xmlSetGenericErrorFunc(NULL, pgxml_errorHandler);
 
-       /* Replace libxml memory handling (DANGEROUS) */
-       xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
-
        /* Initialize libxml */
        xmlInitParser();
 
@@ -627,7 +589,7 @@ xpath_table(PG_FUNCTION_ARGS)
        int                     j;
        int                     rownr;                  /* For issuing multiple rows from one original
                                                                 * document */
-       int                     had_values;             /* To determine end of nodeset results */
+       bool            had_values;             /* To determine end of nodeset results */
        StringInfoData query_buf;
 
        /* We only have a valid tuple description in table function mode */
@@ -654,7 +616,6 @@ xpath_table(PG_FUNCTION_ARGS)
         * The tuplestore must exist in a higher context than this function call
         * (per_query_ctx is used)
         */
-
        per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
        oldcontext = MemoryContextSwitchTo(per_query_ctx);
 
@@ -671,6 +632,12 @@ xpath_table(PG_FUNCTION_ARGS)
        /* get the requested return tuple description */
        ret_tupdesc = CreateTupleDescCopy(rsinfo->expectedDesc);
 
+       /* must have at least one output column (for the pkey) */
+       if (ret_tupdesc->natts < 1)
+               ereport(ERROR,
+                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                errmsg("xpath_table must have at least one output column")));
+
        /*
         * At the moment we assume that the returned attributes make sense for the
         * XPath specififed (i.e. we trust the caller). It's not fatal if they get
@@ -686,26 +653,27 @@ xpath_table(PG_FUNCTION_ARGS)
        rsinfo->setDesc = ret_tupdesc;
 
        values = (char **) palloc(ret_tupdesc->natts * sizeof(char *));
-
        xpaths = (xmlChar **) palloc(ret_tupdesc->natts * sizeof(xmlChar *));
 
-       /* Split XPaths. xpathset is a writable CString. */
-
-       /* Note that we stop splitting once we've done all needed for tupdesc */
-
+       /*
+        * Split XPaths. xpathset is a writable CString.
+        *
+        * Note that we stop splitting once we've done all needed for tupdesc
+        */
        numpaths = 0;
        pos = xpathset;
-       do
+       while (numpaths < (ret_tupdesc->natts - 1))
        {
-               xpaths[numpaths] = (xmlChar *) pos;
+               xpaths[numpaths++] = (xmlChar *) pos;
                pos = strstr(pos, pathsep);
                if (pos != NULL)
                {
                        *pos = '\0';
                        pos++;
                }
-               numpaths++;
-       } while ((pos != NULL) && (numpaths < (ret_tupdesc->natts - 1)));
+               else
+                       break;
+       }
 
        /* Now build query */
        initStringInfo(&query_buf);
@@ -800,7 +768,7 @@ xpath_table(PG_FUNCTION_ARGS)
                        do
                        {
                                /* Now evaluate the set of xpaths. */
-                               had_values = 0;
+                               had_values = false;
                                for (j = 0; j < numpaths; j++)
                                {
                                        ctxt = xmlXPathNewContext(doctree);
@@ -813,10 +781,7 @@ xpath_table(PG_FUNCTION_ARGS)
                                        {
                                                xmlCleanupParser();
                                                xmlFreeDoc(doctree);
-
                                                elog_error("XPath Syntax Error", true);
-
-                                               PG_RETURN_NULL();               /* Keep compiler happy */
                                        }
 
                                        /* Now evaluate the path expression. */
@@ -829,11 +794,12 @@ xpath_table(PG_FUNCTION_ARGS)
                                                {
                                                        case XPATH_NODESET:
                                                                /* We see if this nodeset has enough nodes */
-                                                               if ((res->nodesetval != NULL) && (rownr < res->nodesetval->nodeNr))
+                                                               if (res->nodesetval != NULL &&
+                                                                       rownr < res->nodesetval->nodeNr)
                                                                {
                                                                        resstr =
                                                                                xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
-                                                                       had_values = 1;
+                                                                       had_values = true;
                                                                }
                                                                else
                                                                        resstr = NULL;