OSDN Git Service

Allow for arbitrary data types as content in XMLELEMENT. The original
[pg-rex/syncrep.git] / src / backend / utils / adt / xml.c
1 /*-------------------------------------------------------------------------
2  *
3  * xml.c
4  *        XML data type support.
5  *
6  *
7  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.15 2007/01/12 16:29:24 petere Exp $
11  *
12  *-------------------------------------------------------------------------
13  */
14
15 /*
16  * Generally, XML type support is only available when libxml use was
17  * configured during the build.  But even if that is not done, the
18  * type and all the functions are available, but most of them will
19  * fail.  For one thing, this avoids having to manage variant catalog
20  * installations.  But it also has nice effects such as that you can
21  * dump a database containing XML type data even if the server is not
22  * linked with libxml.  Thus, make sure xml_out() works even if nothing
23  * else does.
24  */
25
26 #include "postgres.h"
27
28 #ifdef USE_LIBXML
29 #include <libxml/chvalid.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
32 #include <libxml/uri.h>
33 #include <libxml/xmlerror.h>
34 #include <libxml/xmlsave.h>
35 #include <libxml/xmlwriter.h>
36 #endif /* USE_LIBXML */
37
38 #include "catalog/pg_type.h"
39 #include "executor/executor.h"
40 #include "fmgr.h"
41 #include "libpq/pqformat.h"
42 #include "mb/pg_wchar.h"
43 #include "nodes/execnodes.h"
44 #include "parser/parse_expr.h"
45 #include "utils/array.h"
46 #include "utils/builtins.h"
47 #include "utils/lsyscache.h"
48 #include "utils/memutils.h"
49 #include "utils/xml.h"
50
51
52 #ifdef USE_LIBXML
53
54 #define PG_XML_DEFAULT_URI "dummy.xml"
55
56 static StringInfo xml_err_buf = NULL;
57
58 static void     xml_init(void);
59 #ifdef NOT_USED
60 static void    *xml_palloc(size_t size);
61 static void    *xml_repalloc(void *ptr, size_t size);
62 static void     xml_pfree(void *ptr);
63 static char    *xml_pstrdup(const char *string);
64 #endif
65 static void     xml_ereport(int level, int sqlcode,
66                                                         const char *msg, void *ctxt);
67 static void     xml_errorHandler(void *ctxt, const char *msg, ...);
68 static void     xml_ereport_by_code(int level, int sqlcode,
69                                                                         const char *msg, int errcode);
70 static xmlChar *xml_text2xmlChar(text *in);
71 static xmlDocPtr xml_parse(text *data, bool is_document, bool preserve_whitespace);
72
73 static char *map_sql_value_to_xml_value(Datum value, Oid type);
74
75 #endif /* USE_LIBXML */
76
77 #define NO_XML_SUPPORT() \
78         ereport(ERROR, \
79                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
80                          errmsg("no XML support in this installation")))
81
82
83 Datum
84 xml_in(PG_FUNCTION_ARGS)
85 {
86 #ifdef USE_LIBXML
87         char            *s = PG_GETARG_CSTRING(0);
88         size_t          len;
89         xmltype         *vardata;
90         xmlDocPtr        doc;
91
92         len = strlen(s);
93         vardata = palloc(len + VARHDRSZ);
94         VARATT_SIZEP(vardata) = len + VARHDRSZ;
95         memcpy(VARDATA(vardata), s, len);
96
97         /*
98          * Parse the data to check if it is well-formed XML data.  Assume
99          * that ERROR occurred if parsing failed.
100          */
101         doc = xml_parse(vardata, false, true);
102         xmlFreeDoc(doc);
103
104         PG_RETURN_XML_P(vardata);
105 #else
106         NO_XML_SUPPORT();
107         return 0;
108 #endif
109 }
110
111
112 Datum
113 xml_out(PG_FUNCTION_ARGS)
114 {
115         xmltype         *s = PG_GETARG_XML_P(0);
116         char            *result;
117         int32           len;
118
119         len = VARSIZE(s) - VARHDRSZ;
120         result = palloc(len + 1);
121         memcpy(result, VARDATA(s), len);
122         result[len] = '\0';
123
124         PG_RETURN_CSTRING(result);
125 }
126
127
128 Datum
129 xml_recv(PG_FUNCTION_ARGS)
130 {
131 #ifdef USE_LIBXML
132         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
133         xmltype    *result;
134         char       *str;
135         int                     nbytes;
136         xmlDocPtr       doc;
137
138         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
139
140         result = (xmltype *) palloc(nbytes + VARHDRSZ);
141         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
142         memcpy(VARDATA(result), str, nbytes);
143         pfree(str);
144
145         /*
146          * Parse the data to check if it is well-formed XML data.  Assume
147          * that ERROR occurred if parsing failed.
148          */
149         doc = xml_parse(result, false, true);
150         xmlFreeDoc(doc);
151
152         PG_RETURN_XML_P(result);
153 #else
154         NO_XML_SUPPORT();
155         return 0;
156 #endif
157 }
158
159
160 Datum
161 xml_send(PG_FUNCTION_ARGS)
162 {
163         xmltype    *x = PG_GETARG_XML_P(0);
164         StringInfoData buf;
165
166         pq_begintypsend(&buf);
167         pq_sendbytes(&buf, VARDATA(x), VARSIZE(x) - VARHDRSZ);
168         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
169 }
170
171
172 #ifdef USE_LIBXML
173 static void
174 appendStringInfoText(StringInfo str, const text *t)
175 {
176         appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
177 }
178
179
180 static xmltype *
181 stringinfo_to_xmltype(StringInfo buf)
182 {
183         int32 len;
184         xmltype *result;
185
186         len = buf->len + VARHDRSZ;
187         result = palloc(len);
188         VARATT_SIZEP(result) = len;
189         memcpy(VARDATA(result), buf->data, buf->len);
190
191         return result;
192 }
193
194
195 static xmltype *
196 xmlBuffer_to_xmltype(xmlBufferPtr buf)
197 {
198         int32           len;
199         xmltype    *result;
200
201         len = xmlBufferLength(buf) + VARHDRSZ;
202         result = palloc(len);
203         VARATT_SIZEP(result) = len;
204         memcpy(VARDATA(result), xmlBufferContent(buf), len - VARHDRSZ);
205
206         return result;
207 }
208 #endif
209
210
211 Datum
212 xmlcomment(PG_FUNCTION_ARGS)
213 {
214 #ifdef USE_LIBXML
215         text *arg = PG_GETARG_TEXT_P(0);
216         int len =  VARATT_SIZEP(arg) - VARHDRSZ;
217         StringInfoData buf;
218         int i;
219
220         /* check for "--" in string or "-" at the end */
221         for (i = 1; i < len; i++)
222                 if ((VARDATA(arg)[i] == '-' && VARDATA(arg)[i - 1] == '-')
223                         || (VARDATA(arg)[i] == '-' && i == len - 1))
224                                         ereport(ERROR,
225                                                         (errcode(ERRCODE_INVALID_XML_COMMENT),
226                                                          errmsg("invalid XML comment")));
227
228         initStringInfo(&buf);
229         appendStringInfo(&buf, "<!--");
230         appendStringInfoText(&buf, arg);
231         appendStringInfo(&buf, "-->");
232
233         PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
234 #else
235         NO_XML_SUPPORT();
236         return 0;
237 #endif
238 }
239
240
241 Datum
242 texttoxml(PG_FUNCTION_ARGS)
243 {
244         text       *data = PG_GETARG_TEXT_P(0);
245
246         PG_RETURN_XML_P(xmlparse(data, false, true));
247 }
248
249
250 xmltype *
251 xmlelement(XmlExprState *xmlExpr, ExprContext *econtext)
252 {
253 #ifdef USE_LIBXML
254         XmlExpr    *xexpr = (XmlExpr *) xmlExpr->xprstate.expr;
255         int                     i;
256         ListCell   *arg;
257         ListCell   *narg;
258         bool            isnull;
259         xmltype    *result;
260         Datum           value;
261         char       *str;
262
263         xmlBufferPtr buf;
264         xmlTextWriterPtr writer;
265
266         buf = xmlBufferCreate();
267         writer = xmlNewTextWriterMemory(buf, 0);
268
269         xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
270
271         i = 0;
272         forboth(arg, xmlExpr->named_args, narg, xexpr->arg_names)
273         {
274                 ExprState       *e = (ExprState *) lfirst(arg);
275                 char    *argname = strVal(lfirst(narg));
276
277                 value = ExecEvalExpr(e, econtext, &isnull, NULL);
278                 if (!isnull)
279                 {
280                         str = OutputFunctionCall(&xmlExpr->named_outfuncs[i], value);
281                         xmlTextWriterWriteAttribute(writer, (xmlChar *) argname, (xmlChar *) str);
282                         pfree(str);
283                 }
284                 i++;
285         }
286
287         foreach(arg, xmlExpr->args)
288         {
289                 ExprState       *e = (ExprState *) lfirst(arg);
290
291                 value = ExecEvalExpr(e, econtext, &isnull, NULL);
292                 if (!isnull)
293                         xmlTextWriterWriteRaw(writer, (xmlChar *) map_sql_value_to_xml_value(value, exprType((Node *) e->expr)));
294         }
295
296         xmlTextWriterEndElement(writer);
297         xmlFreeTextWriter(writer);
298
299         result = xmlBuffer_to_xmltype(buf);
300         xmlBufferFree(buf);
301         return result;
302 #else
303         NO_XML_SUPPORT();
304         return NULL;
305 #endif
306 }
307
308
309 xmltype *
310 xmlparse(text *data, bool is_document, bool preserve_whitespace)
311 {
312 #ifdef USE_LIBXML
313         xmlDocPtr       doc;
314
315         doc = xml_parse(data, is_document, preserve_whitespace);
316         xmlFreeDoc(doc);
317
318         return (xmltype *) data;
319 #else
320         NO_XML_SUPPORT();
321         return NULL;
322 #endif
323 }
324
325
326 xmltype *
327 xmlpi(char *target, text *arg, bool arg_is_null, bool *result_is_null)
328 {
329 #ifdef USE_LIBXML
330         xmltype *result;
331         StringInfoData buf;
332
333         if (pg_strncasecmp(target, "xml", 3) == 0)
334                 ereport(ERROR,
335                                 (errcode(ERRCODE_SYNTAX_ERROR), /* really */
336                                  errmsg("invalid XML processing instruction"),
337                                  errdetail("XML processing instruction target name cannot start with \"xml\".")));
338
339         /*
340          * Following the SQL standard, the null check comes after the
341          * syntax check above.
342          */
343         *result_is_null = arg_is_null;
344         if (*result_is_null)
345                 return NULL;            
346
347         initStringInfo(&buf);
348
349         appendStringInfo(&buf, "<?%s", target);
350
351         if (arg != NULL)
352         {
353                 char *string;
354
355                 string = DatumGetCString(DirectFunctionCall1(textout,
356                                                                                                          PointerGetDatum(arg)));
357                 if (strstr(string, "?>") != NULL)
358                 ereport(ERROR,
359                                 (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
360                                  errmsg("invalid XML processing instruction"),
361                                  errdetail("XML processing instruction cannot contain \"?>\".")));
362
363                 appendStringInfoChar(&buf, ' ');
364                 appendStringInfoString(&buf, string + strspn(string, " "));
365                 pfree(string);
366         }
367         appendStringInfoString(&buf, "?>");
368
369         result = stringinfo_to_xmltype(&buf);
370         pfree(buf.data);
371         return result;
372 #else
373         NO_XML_SUPPORT();
374         return NULL;
375 #endif
376 }
377
378
379 xmltype *
380 xmlroot(xmltype *data, text *version, int standalone)
381 {
382 #ifdef USE_LIBXML
383         xmltype    *result;
384         xmlDocPtr       doc;
385         xmlBufferPtr buffer;
386         xmlSaveCtxtPtr save;
387
388         doc = xml_parse((text *) data, true, true);
389
390         if (version)
391                 doc->version = xmlStrdup(xml_text2xmlChar(version));
392         else
393                 doc->version = NULL;
394
395         switch (standalone)
396         {
397                 case 1:
398                         doc->standalone = 1;
399                         break;
400                 case -1:
401                         doc->standalone = 0;
402                         break;
403                 default:
404                         doc->standalone = -1;
405                         break;
406         }
407
408         buffer = xmlBufferCreate();
409         save = xmlSaveToBuffer(buffer, NULL, 0);
410         xmlSaveDoc(save, doc);
411         xmlSaveClose(save);
412
413         xmlFreeDoc(doc);
414
415         result = xmlBuffer_to_xmltype(buffer);
416         xmlBufferFree(buffer);
417         return result;
418 #else
419         NO_XML_SUPPORT();
420         return NULL;
421 #endif
422 }
423
424
425 /*
426  * Validate document (given as string) against DTD (given as external link)
427  * TODO !!! use text instead of cstring for second arg
428  * TODO allow passing DTD as a string value (not only as an URI)
429  * TODO redesign (see comment with '!!!' below)
430  */
431 Datum
432 xmlvalidate(PG_FUNCTION_ARGS)
433 {
434 #ifdef USE_LIBXML
435         text                            *data = PG_GETARG_TEXT_P(0);
436         text                            *dtdOrUri = PG_GETARG_TEXT_P(1);
437         bool                            result = false;
438         xmlParserCtxtPtr        ctxt = NULL;
439         xmlDocPtr                       doc = NULL;
440         xmlDtdPtr                       dtd = NULL;
441
442         xml_init();
443
444         /* We use a PG_TRY block to ensure libxml is cleaned up on error */
445         PG_TRY();
446         {
447                 ctxt = xmlNewParserCtxt();
448                 if (ctxt == NULL)
449                         xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
450                                                 "could not allocate parser context", ctxt);
451
452                 doc = xmlCtxtReadMemory(ctxt, (char *) VARDATA(data),
453                                                                 VARSIZE(data) - VARHDRSZ,
454                                                                 PG_XML_DEFAULT_URI, NULL, 0);
455                 if (doc == NULL)
456                         xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
457                                                 "could not parse XML data", ctxt);
458
459 #if 0
460                 uri = xmlCreateURI();
461                 elog(NOTICE, "dtd - %s", dtdOrUri);
462                 dtd = palloc(sizeof(xmlDtdPtr));
463                 uri = xmlParseURI(dtdOrUri);
464                 if (uri == NULL)
465                         xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
466                                                 "not implemented yet... (TODO)", ctxt);
467                 else
468 #endif
469                         dtd = xmlParseDTD(NULL, xml_text2xmlChar(dtdOrUri));
470
471                 if (dtd == NULL)
472                         xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
473                                                 "could not load DTD", ctxt);
474
475                 if (xmlValidateDtd(xmlNewValidCtxt(), doc, dtd) == 1)
476                         result = true;
477
478                 if (!result)
479                         xml_ereport(NOTICE, ERRCODE_INVALID_XML_DOCUMENT,
480                                                 "validation against DTD failed", ctxt);
481
482 #if 0
483                 if (uri)
484                         xmlFreeURI(uri);
485 #endif
486                 if (dtd)
487                         xmlFreeDtd(dtd);
488                 if (doc)
489                         xmlFreeDoc(doc);
490                 if (ctxt)
491                         xmlFreeParserCtxt(ctxt);
492                 xmlCleanupParser();
493         }
494         PG_CATCH();
495         {
496 #if 0
497                 if (uri)
498                         xmlFreeURI(uri);
499 #endif
500                 if (dtd)
501                         xmlFreeDtd(dtd);
502                 if (doc)
503                         xmlFreeDoc(doc);
504                 if (ctxt)
505                         xmlFreeParserCtxt(ctxt);
506                 xmlCleanupParser();
507
508                 PG_RE_THROW();
509         }
510         PG_END_TRY();
511
512         PG_RETURN_BOOL(result);
513 #else /* not USE_LIBXML */
514         NO_XML_SUPPORT();
515         return 0;
516 #endif /* not USE_LIBXML */
517 }
518
519
520 #ifdef USE_LIBXML
521
522 /*
523  * Container for some init stuff (not good design!)
524  * TODO xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and check)
525  */
526 static void
527 xml_init(void)
528 {
529         /*
530          * Currently, we have no pure UTF-8 support for internals -- check
531          * if we can work.
532          */
533         if (sizeof (char) != sizeof (xmlChar))
534                 ereport(ERROR,
535                                 (errmsg("could not initialize XML library"),
536                                  errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
537                                                    (int) sizeof(char), (int) sizeof(xmlChar))));
538
539         if (xml_err_buf == NULL)
540         {
541                 /* First time through: create error buffer in permanent context */
542                 MemoryContext oldcontext;
543
544                 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
545                 xml_err_buf = makeStringInfo();
546                 MemoryContextSwitchTo(oldcontext);
547         }
548         else
549         {
550                 /* Reset pre-existing buffer to empty */
551                 xml_err_buf->data[0] = '\0';
552                 xml_err_buf->len = 0;
553         }
554         /* Now that xml_err_buf exists, safe to call xml_errorHandler */
555         xmlSetGenericErrorFunc(NULL, xml_errorHandler);
556
557 #ifdef NOT_USED
558         /*
559          * FIXME: This doesn't work because libxml assumes that whatever
560          * libxml allocates, only libxml will free, so we can't just drop
561          * memory contexts behind it.  This needs to be refined.
562          */
563         xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
564 #endif
565         xmlInitParser();
566         LIBXML_TEST_VERSION;
567 }
568
569
570 /*
571  * SQL/XML allows storing "XML documents" or "XML content".  "XML
572  * documents" are specified by the XML specification and are parsed
573  * easily by libxml.  "XML content" is specified by SQL/XML as the
574  * production "XMLDecl? content".  But libxml can only parse the
575  * "content" part, so we have to parse the XML declaration ourselves
576  * to complete this.
577  */
578
579 #define CHECK_XML_SPACE(p) if (!xmlIsBlank_ch(*(p))) return XML_ERR_SPACE_REQUIRED
580 #define SKIP_XML_SPACE(p) while (xmlIsBlank_ch(*(p))) (p)++
581
582 static int
583 parse_xml_decl(const xmlChar *str, size_t *len, xmlChar **encoding, int *standalone)
584 {
585         const xmlChar *p;
586         const xmlChar *save_p;
587
588         p = str;
589
590         if (xmlStrncmp(p, (xmlChar *)"<?xml", 5) != 0)
591                 goto finished;
592
593         p += 5;
594
595         /* version */
596         CHECK_XML_SPACE(p);
597         SKIP_XML_SPACE(p);
598         if (xmlStrncmp(p, (xmlChar *)"version", 7) != 0)
599                 return XML_ERR_VERSION_MISSING;
600         p += 7;
601         SKIP_XML_SPACE(p);
602         if (*p != '=')
603                 return XML_ERR_VERSION_MISSING;
604         p += 1;
605         SKIP_XML_SPACE(p);
606         if (xmlStrncmp(p, (xmlChar *)"'1.0'", 5) != 0 && xmlStrncmp(p, (xmlChar *)"\"1.0\"", 5) != 0)
607                 return XML_ERR_VERSION_MISSING;
608         p += 5;
609
610         /* encoding */
611         save_p = p;
612         SKIP_XML_SPACE(p);
613         if (xmlStrncmp(p, (xmlChar *)"encoding", 8) == 0)
614         {
615                 CHECK_XML_SPACE(save_p);
616                 p += 8;
617                 SKIP_XML_SPACE(p);
618                 if (*p != '=')
619                         return XML_ERR_MISSING_ENCODING;
620                 p += 1;
621                 SKIP_XML_SPACE(p);
622
623                 if (*p == '\'' || *p == '"')
624                 {
625                         const xmlChar *q;
626
627                         q = xmlStrchr(p + 1, *p);
628                         if (!q)
629                                 return XML_ERR_MISSING_ENCODING;
630
631                         *encoding = xmlStrndup(p + 1, q - p - 1);
632                         p = q + 1;
633                 }
634                 else
635                         return XML_ERR_MISSING_ENCODING;
636         }
637         else
638         {
639                 p = save_p;
640                 *encoding = NULL;
641         }
642
643         /* standalone */
644         save_p = p;
645         SKIP_XML_SPACE(p);
646         if (xmlStrncmp(p, (xmlChar *)"standalone", 10) == 0)
647         {
648                 CHECK_XML_SPACE(save_p);
649                 p += 10;
650                 SKIP_XML_SPACE(p);
651                 if (*p != '=')
652                         return XML_ERR_STANDALONE_VALUE;
653                 p += 1;
654                 SKIP_XML_SPACE(p);
655                 if (xmlStrncmp(p, (xmlChar *)"'yes'", 5) == 0 || xmlStrncmp(p, (xmlChar *)"\"yes\"", 5) == 0)
656                 {
657                         *standalone = 1;
658                         p += 5;
659                 }
660                 else if (xmlStrncmp(p, (xmlChar *)"'no'", 4) == 0 || xmlStrncmp(p, (xmlChar *)"\"no\"", 4) == 0)
661                 {
662                         *standalone = 0;
663                         p += 4;
664                 }
665                 else
666                         return XML_ERR_STANDALONE_VALUE;
667         }
668         else
669         {
670                 p = save_p;
671                 *standalone = -1;
672         }
673
674         SKIP_XML_SPACE(p);
675         if (xmlStrncmp(p, (xmlChar *)"?>", 2) != 0)
676                 return XML_ERR_XMLDECL_NOT_FINISHED;
677         p += 2;
678
679 finished:
680         if (len)
681                 *len = (p - str);
682         return XML_ERR_OK;
683 }
684
685
686 /*
687  * Convert a C string to XML internal representation
688  *
689  * TODO maybe, libxml2's xmlreader is better? (do not construct DOM, yet do not use SAX - see xml_reader.c)
690  * TODO what about internal URI for docs? (see PG_XML_DEFAULT_URI below)
691  */
692 static xmlDocPtr
693 xml_parse(text *data, bool is_document, bool preserve_whitespace)
694 {
695         int                                     res_code;
696         int32                           len;
697         xmlChar                         *string;
698         xmlParserCtxtPtr        ctxt = NULL;
699         xmlDocPtr                       doc = NULL;
700
701         len = VARSIZE(data) - VARHDRSZ; /* will be useful later */
702         string = xml_text2xmlChar(data);
703
704         xml_init();
705
706         /* We use a PG_TRY block to ensure libxml is cleaned up on error */
707         PG_TRY();
708         {
709                 ctxt = xmlNewParserCtxt();
710                 if (ctxt == NULL)
711                         xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
712                                                 "could not allocate parser context", ctxt);
713
714                 if (is_document)
715                 {
716                         /*
717                          * Note, that here we try to apply DTD defaults
718                          * (XML_PARSE_DTDATTR) according to SQL/XML:10.16.7.d:
719                          * 'Default valies defined by internal DTD are applied'.
720                          * As for external DTDs, we try to support them too, (see
721                          * SQL/XML:10.16.7.e)
722                          */
723                         doc = xmlCtxtReadMemory(ctxt, (char *) string, len,
724                                                                         PG_XML_DEFAULT_URI, NULL,
725                                                                         XML_PARSE_NOENT | XML_PARSE_DTDATTR
726                                                                         | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
727                         if (doc == NULL)
728                                 xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
729                                                         "invalid XML document", ctxt);
730                 }
731                 else
732                 {
733                         size_t count;
734                         xmlChar *encoding = NULL;
735                         int standalone = -1;
736
737                         doc = xmlNewDoc(NULL);
738
739                         res_code = parse_xml_decl(string, &count, &encoding, &standalone);
740
741                         /* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */
742                         if (res_code == 0)
743                                 res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string + count, NULL);
744                         if (res_code != 0)
745                                 xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
746                                                                         "invalid XML content", res_code);
747
748                         doc->encoding = encoding;
749                         doc->standalone = standalone;
750                 }
751
752                 /* TODO encoding issues
753                  * (thoughts:
754                  *              CASE:
755                  *              - XML data has explicit encoding attribute in its prolog
756                  *              - if not, assume that enc. of XML data is the same as client's one
757                  *
758                  *              The common rule is to accept the XML data only if its encoding
759                  *              is the same as encoding of the storage (server's). The other possible
760                  *              option is to accept all the docs, but DO TRANSFORMATION and, if needed,
761                  *              change the prolog.
762                  *
763                  *              I think I'd stick the first way (for the 1st version),
764                  *              it's much simplier (less errors...)
765                  * ) */
766                 /* ... */
767
768                 if (ctxt)
769                         xmlFreeParserCtxt(ctxt);
770                 xmlCleanupParser();
771         }
772         PG_CATCH();
773         {
774                 if (doc)
775                         xmlFreeDoc(doc);
776                 doc = NULL;
777                 if (ctxt)
778                         xmlFreeParserCtxt(ctxt);
779                 xmlCleanupParser();
780
781                 PG_RE_THROW();
782         }
783         PG_END_TRY();
784
785         return doc;
786 }
787
788
789 /*
790  * xmlChar<->text convertions
791  */
792 static xmlChar *
793 xml_text2xmlChar(text *in)
794 {
795         int32           len = VARSIZE(in) - VARHDRSZ;
796         xmlChar         *res;
797
798         res = palloc(len + 1);
799         memcpy(res, VARDATA(in), len);
800         res[len] = '\0';
801
802         return(res);
803 }
804
805
806 #ifdef NOT_USED
807 /*
808  * Wrappers for memory management functions
809  */
810 static void *
811 xml_palloc(size_t size)
812 {
813         return palloc(size);
814 }
815
816
817 static void *
818 xml_repalloc(void *ptr, size_t size)
819 {
820         return repalloc(ptr, size);
821 }
822
823
824 static void
825 xml_pfree(void *ptr)
826 {
827         pfree(ptr);
828 }
829
830
831 static char *
832 xml_pstrdup(const char *string)
833 {
834         return pstrdup(string);
835 }
836 #endif /* NOT_USED */
837
838
839 /*
840  * Wrapper for "ereport" function.
841  * Adds detail - libxml's native error message, if any.
842  */
843 static void
844 xml_ereport(int level, int sqlcode,
845                         const char *msg, void *ctxt)
846 {
847         xmlErrorPtr libxmlErr = NULL;
848
849         if (xml_err_buf->len > 0)
850         {
851                 ereport(DEBUG1,
852                                 (errmsg("%s", xml_err_buf->data)));
853                 xml_err_buf->data[0] = '\0';
854                 xml_err_buf->len = 0;
855         }
856
857         if (ctxt != NULL)
858                 libxmlErr = xmlCtxtGetLastError(ctxt);
859
860         if (libxmlErr == NULL)
861         {
862                 ereport(level,
863                                 (errcode(sqlcode),
864                                  errmsg("%s", msg)));
865         }
866         else
867         {
868                 /* as usual, libxml error message contains '\n'; get rid of it */
869                 char *xmlErrDetail;
870                 int xmlErrLen, i;
871
872                 xmlErrDetail = pstrdup(libxmlErr->message);
873                 xmlErrLen = strlen(xmlErrDetail);
874                 for (i = 0; i < xmlErrLen; i++)
875                 {
876                         if (xmlErrDetail[i] == '\n')
877                                 xmlErrDetail[i] = '.';
878                 }
879                 ereport(level,
880                                 (errcode(sqlcode),
881                                  errmsg("%s", msg),
882                                  errdetail("%s", xmlErrDetail)));
883         }
884 }
885
886
887 /*
888  * Error handler for libxml error messages
889  */
890 static void
891 xml_errorHandler(void *ctxt, const char *msg,...)
892 {
893         /* Append the formatted text to xml_err_buf */
894         for (;;)
895         {
896                 va_list         args;
897                 bool            success;
898
899                 /* Try to format the data. */
900                 va_start(args, msg);
901                 success = appendStringInfoVA(xml_err_buf, msg, args);
902                 va_end(args);
903
904                 if (success)
905                         break;
906
907                 /* Double the buffer size and try again. */
908                 enlargeStringInfo(xml_err_buf, xml_err_buf->maxlen);
909         }
910 }
911
912
913 /*
914  * Return error message by libxml error code
915  * TODO make them closer to recommendations from Postgres manual
916  */
917 static void
918 xml_ereport_by_code(int level, int sqlcode,
919                                         const char *msg, int code)
920 {
921     const char *det;
922
923         if (xml_err_buf->len > 0)
924         {
925                 ereport(DEBUG1,
926                                 (errmsg("%s", xml_err_buf->data)));
927                 xml_err_buf->data[0] = '\0';
928                 xml_err_buf->len = 0;
929         }
930
931     switch (code)
932         {
933         case XML_ERR_INTERNAL_ERROR:
934             det = "libxml internal error";
935             break;
936         case XML_ERR_ENTITY_LOOP:
937             det = "Detected an entity reference loop";
938             break;
939         case XML_ERR_ENTITY_NOT_STARTED:
940             det = "EntityValue: \" or ' expected";
941             break;
942         case XML_ERR_ENTITY_NOT_FINISHED:
943             det = "EntityValue: \" or ' expected";
944             break;
945         case XML_ERR_ATTRIBUTE_NOT_STARTED:
946             det = "AttValue: \" or ' expected";
947             break;
948         case XML_ERR_LT_IN_ATTRIBUTE:
949             det = "Unescaped '<' not allowed in attributes values";
950             break;
951         case XML_ERR_LITERAL_NOT_STARTED:
952             det = "SystemLiteral \" or ' expected";
953             break;
954         case XML_ERR_LITERAL_NOT_FINISHED:
955             det = "Unfinished System or Public ID \" or ' expected";
956             break;
957         case XML_ERR_MISPLACED_CDATA_END:
958             det = "Sequence ']]>' not allowed in content";
959             break;
960         case XML_ERR_URI_REQUIRED:
961             det = "SYSTEM or PUBLIC, the URI is missing";
962             break;
963         case XML_ERR_PUBID_REQUIRED:
964             det = "PUBLIC, the Public Identifier is missing";
965             break;
966         case XML_ERR_HYPHEN_IN_COMMENT:
967             det = "Comment must not contain '--' (double-hyphen)";
968             break;
969         case XML_ERR_PI_NOT_STARTED:
970             det = "xmlParsePI : no target name";
971             break;
972         case XML_ERR_RESERVED_XML_NAME:
973             det = "Invalid PI name";
974             break;
975         case XML_ERR_NOTATION_NOT_STARTED:
976             det = "NOTATION: Name expected here";
977             break;
978         case XML_ERR_NOTATION_NOT_FINISHED:
979             det = "'>' required to close NOTATION declaration";
980             break;
981         case XML_ERR_VALUE_REQUIRED:
982             det = "Entity value required";
983             break;
984         case XML_ERR_URI_FRAGMENT:
985             det = "Fragment not allowed";
986             break;
987         case XML_ERR_ATTLIST_NOT_STARTED:
988             det = "'(' required to start ATTLIST enumeration";
989             break;
990         case XML_ERR_NMTOKEN_REQUIRED:
991             det = "NmToken expected in ATTLIST enumeration";
992             break;
993         case XML_ERR_ATTLIST_NOT_FINISHED:
994             det = "')' required to finish ATTLIST enumeration";
995             break;
996         case XML_ERR_MIXED_NOT_STARTED:
997             det = "MixedContentDecl : '|' or ')*' expected";
998             break;
999         case XML_ERR_PCDATA_REQUIRED:
1000             det = "MixedContentDecl : '#PCDATA' expected";
1001             break;
1002         case XML_ERR_ELEMCONTENT_NOT_STARTED:
1003             det = "ContentDecl : Name or '(' expected";
1004             break;
1005         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
1006             det = "ContentDecl : ',' '|' or ')' expected";
1007             break;
1008         case XML_ERR_PEREF_IN_INT_SUBSET:
1009             det = "PEReference: forbidden within markup decl in internal subset";
1010             break;
1011         case XML_ERR_GT_REQUIRED:
1012             det = "Expected '>'";
1013             break;
1014         case XML_ERR_CONDSEC_INVALID:
1015             det = "XML conditional section '[' expected";
1016             break;
1017         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
1018             det = "Content error in the external subset";
1019             break;
1020         case XML_ERR_CONDSEC_INVALID_KEYWORD:
1021             det = "conditional section INCLUDE or IGNORE keyword expected";
1022             break;
1023         case XML_ERR_CONDSEC_NOT_FINISHED:
1024             det = "XML conditional section not closed";
1025             break;
1026         case XML_ERR_XMLDECL_NOT_STARTED:
1027             det = "Text declaration '<?xml' required";
1028             break;
1029         case XML_ERR_XMLDECL_NOT_FINISHED:
1030             det = "parsing XML declaration: '?>' expected";
1031             break;
1032         case XML_ERR_EXT_ENTITY_STANDALONE:
1033             det = "external parsed entities cannot be standalone";
1034             break;
1035         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
1036             det = "EntityRef: expecting ';'";
1037             break;
1038         case XML_ERR_DOCTYPE_NOT_FINISHED:
1039             det = "DOCTYPE improperly terminated";
1040             break;
1041         case XML_ERR_LTSLASH_REQUIRED:
1042             det = "EndTag: '</' not found";
1043             break;
1044         case XML_ERR_EQUAL_REQUIRED:
1045             det = "Expected '='";
1046             break;
1047         case XML_ERR_STRING_NOT_CLOSED:
1048             det = "String not closed expecting \" or '";
1049             break;
1050         case XML_ERR_STRING_NOT_STARTED:
1051             det = "String not started expecting ' or \"";
1052             break;
1053         case XML_ERR_ENCODING_NAME:
1054             det = "Invalid XML encoding name";
1055             break;
1056         case XML_ERR_STANDALONE_VALUE:
1057             det = "Standalone accepts only 'yes' or 'no'";
1058             break;
1059         case XML_ERR_DOCUMENT_EMPTY:
1060             det = "Document is empty";
1061             break;
1062         case XML_ERR_DOCUMENT_END:
1063             det = "Extra content at the end of the document";
1064             break;
1065         case XML_ERR_NOT_WELL_BALANCED:
1066             det = "Chunk is not well balanced";
1067             break;
1068         case XML_ERR_EXTRA_CONTENT:
1069             det = "Extra content at the end of well balanced chunk";
1070             break;
1071         case XML_ERR_VERSION_MISSING:
1072             det = "Malformed declaration expecting version";
1073             break;
1074         /* more err codes... Please, keep the order! */
1075         case XML_ERR_ATTRIBUTE_WITHOUT_VALUE: /* 41 */
1076                 det ="Attribute without value";
1077                 break;
1078         case XML_ERR_ATTRIBUTE_REDEFINED:
1079                 det ="Attribute defined more than once in the same element";
1080                 break;
1081         case XML_ERR_COMMENT_NOT_FINISHED: /* 45 */
1082             det = "Comment is not finished";
1083             break;
1084         case XML_ERR_NAME_REQUIRED: /* 68 */
1085             det = "Element name not found";
1086             break;
1087         case XML_ERR_TAG_NOT_FINISHED: /* 77 */
1088             det = "Closing tag not found";
1089             break;
1090         default:
1091             det = "Unrecognized libxml error code: %d";
1092                         break;
1093         }
1094
1095         ereport(level,
1096                         (errcode(sqlcode),
1097                          errmsg("%s", msg),
1098                          errdetail(det, code)));
1099 }
1100
1101
1102 /*
1103  * Convert one char in the current server encoding to a Unicode codepoint.
1104  */
1105 static pg_wchar
1106 sqlchar_to_unicode(char *s)
1107 {
1108         char *utf8string;
1109         pg_wchar ret[2];                        /* need space for trailing zero */
1110
1111         utf8string = (char *) pg_do_encoding_conversion((unsigned char *) s,
1112                                                                                                         pg_mblen(s),
1113                                                                                                         GetDatabaseEncoding(),
1114                                                                                                         PG_UTF8);
1115
1116         pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret, pg_mblen(s));
1117
1118         return ret[0];
1119 }
1120
1121
1122 static bool
1123 is_valid_xml_namefirst(pg_wchar c)
1124 {
1125         /* (Letter | '_' | ':') */
1126         return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1127                         || c == '_' || c == ':');
1128 }
1129
1130
1131 static bool
1132 is_valid_xml_namechar(pg_wchar c)
1133 {
1134         /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1135         return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1136                         || xmlIsDigitQ(c)
1137                         || c == '.' || c == '-' || c == '_' || c == ':'
1138                         || xmlIsCombiningQ(c)
1139                         || xmlIsExtenderQ(c));
1140 }
1141 #endif /* USE_LIBXML */
1142
1143
1144 /*
1145  * Map SQL identifier to XML name; see SQL/XML:2003 section 9.1.
1146  */
1147 char *
1148 map_sql_identifier_to_xml_name(char *ident, bool fully_escaped)
1149 {
1150 #ifdef USE_LIBXML
1151         StringInfoData buf;
1152         char *p;
1153
1154         initStringInfo(&buf);
1155
1156         for (p = ident; *p; p += pg_mblen(p))
1157         {
1158                 if (*p == ':' && (p == ident || fully_escaped))
1159                         appendStringInfo(&buf, "_x003A_");
1160                 else if (*p == '_' && *(p+1) == 'x')
1161                         appendStringInfo(&buf, "_x005F_");
1162                 else if (fully_escaped && p == ident &&
1163                                  pg_strncasecmp(p, "xml", 3) == 0)
1164                 {
1165                         if (*p == 'x')
1166                                 appendStringInfo(&buf, "_x0078_");
1167                         else
1168                                 appendStringInfo(&buf, "_x0058_");
1169                 }
1170                 else
1171                 {
1172                         pg_wchar u = sqlchar_to_unicode(p);
1173
1174                         if ((p == ident)
1175                                 ? !is_valid_xml_namefirst(u)
1176                                 : !is_valid_xml_namechar(u))
1177                                 appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
1178                         else
1179                                 appendBinaryStringInfo(&buf, p, pg_mblen(p));
1180                 }
1181         }
1182
1183         return buf.data;
1184 #else /* not USE_LIBXML */
1185         NO_XML_SUPPORT();
1186         return NULL;
1187 #endif /* not USE_LIBXML */
1188 }
1189
1190
1191 /*
1192  * Map a Unicode codepoint into the current server encoding.
1193  */
1194 static char *
1195 unicode_to_sqlchar(pg_wchar c)
1196 {
1197         static unsigned char utf8string[4];
1198
1199         if (c <= 0x7F)
1200         {
1201                 utf8string[0] = c;
1202         }
1203         else if (c <= 0x7FF)
1204         {
1205                 utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
1206                 utf8string[1] = 0x80 | (c & 0x3F);
1207         }
1208         else if (c <= 0xFFFF)
1209         {
1210                 utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
1211                 utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
1212                 utf8string[2] = 0x80 | (c & 0x3F);
1213         }
1214         else
1215         {
1216                 utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
1217                 utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
1218                 utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
1219                 utf8string[3] = 0x80 | (c & 0x3F);
1220         }
1221
1222         return (char *) pg_do_encoding_conversion(utf8string,
1223                                                                                           pg_mblen((char *) utf8string),
1224                                                                                           PG_UTF8,
1225                                                                                           GetDatabaseEncoding());
1226 }
1227
1228
1229 /*
1230  * Map XML name to SQL identifier; see SQL/XML:2003 section 9.17.
1231  */
1232 char *
1233 map_xml_name_to_sql_identifier(char *name)
1234 {
1235         StringInfoData buf;
1236         char *p;
1237
1238         initStringInfo(&buf);
1239
1240         for (p = name; *p; p += pg_mblen(p))
1241         {
1242                 if (*p == '_' && *(p+1) == 'x'
1243                         && isxdigit((unsigned char) *(p+2))
1244                         && isxdigit((unsigned char) *(p+3))
1245                         && isxdigit((unsigned char) *(p+4))
1246                         && isxdigit((unsigned char) *(p+5))
1247                         && *(p+6) == '_')
1248                 {
1249                         unsigned int u;
1250
1251                         sscanf(p + 2, "%X", &u);
1252                         appendStringInfoString(&buf, unicode_to_sqlchar(u));
1253                         p += 6;
1254                 }
1255                 else
1256                         appendBinaryStringInfo(&buf, p, pg_mblen(p));
1257         }
1258
1259         return buf.data;
1260 }
1261
1262
1263 #ifdef USE_LIBXML
1264 /*
1265  * Map SQL value to XML value; see SQL/XML:2003 section 9.16.
1266  */
1267 static char *
1268 map_sql_value_to_xml_value(Datum value, Oid type)
1269 {
1270         StringInfoData buf;
1271
1272         initStringInfo(&buf);
1273
1274         if (is_array_type(type))
1275         {
1276                 int i;
1277                 ArrayType *array;
1278                 Oid elmtype;
1279                 int16 elmlen;
1280                 bool elmbyval;
1281                 char elmalign;
1282
1283                 array = DatumGetArrayTypeP(value);
1284
1285                 /* TODO: need some code-fu here to remove this limitation */
1286                 if (ARR_NDIM(array) != 1)
1287                         ereport(ERROR,
1288                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1289                                          errmsg("only supported for one-dimensional array")));
1290
1291                 elmtype = ARR_ELEMTYPE(array);
1292                 get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
1293
1294                 for (i = ARR_LBOUND(array)[0];
1295                          i < ARR_LBOUND(array)[0] + ARR_DIMS(array)[0];
1296                          i++)
1297                 {
1298                         Datum subval;
1299                         bool isnull;
1300
1301                         subval = array_ref(array, 1, &i, -1, elmlen, elmbyval, elmalign, &isnull);
1302                         appendStringInfoString(&buf, "<element>");
1303                         appendStringInfoString(&buf, map_sql_value_to_xml_value(subval, elmtype));
1304                         appendStringInfoString(&buf, "</element>");
1305                 }
1306         }
1307         else
1308         {
1309                 Oid typeOut;
1310                 bool isvarlena;
1311                 char *p, *str;
1312
1313                 getTypeOutputInfo(type, &typeOut, &isvarlena);
1314                 str = OidOutputFunctionCall(typeOut, value);
1315
1316                 if (type == XMLOID)
1317                         return str;
1318
1319                 for (p = str; *p; p += pg_mblen(p))
1320                 {
1321                         switch (*p)
1322                         {
1323                                 case '&':
1324                                         appendStringInfo(&buf, "&amp;");
1325                                         break;
1326                                 case '<':
1327                                         appendStringInfo(&buf, "&lt;");
1328                                         break;
1329                                 case '>':
1330                                         appendStringInfo(&buf, "&gt;");
1331                                         break;
1332                                 case '\r':
1333                                         appendStringInfo(&buf, "&#x0d;");
1334                                         break;
1335                                 default:
1336                                         appendBinaryStringInfo(&buf, p, pg_mblen(p));
1337                                         break;
1338                         }
1339                 }
1340         }
1341
1342         return buf.data;
1343 }
1344 #endif /* USE_LIBXML */