OSDN Git Service

Allow XML fragment to contain a XML declaration. For that, we need a small
[pg-rex/syncrep.git] / src / backend / utils / adt / xml.c
1 /*-------------------------------------------------------------------------
2  *
3  * xml.c
4  *        XML data type support.
5  *
6  *
7  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.12 2007/01/07 00:13:55 petere Exp $
11  *
12  *-------------------------------------------------------------------------
13  */
14
15 /*
16  * Generally, XML type support is only available when libxml use was
17  * configured during the build.  But even if that is not done, the
18  * type and all the functions are available, but most of them will
19  * fail.  For one thing, this avoids having to manage variant catalog
20  * installations.  But it also has nice effects such as that you can
21  * dump a database containing XML type data even if the server is not
22  * linked with libxml.  Thus, make sure xml_out() works even if nothing
23  * else does.
24  */
25
26 #include "postgres.h"
27
28 #ifdef USE_LIBXML
29 #include <libxml/chvalid.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
32 #include <libxml/uri.h>
33 #include <libxml/xmlerror.h>
34 #include <libxml/xmlsave.h>
35 #endif /* USE_LIBXML */
36
37 #include "fmgr.h"
38 #include "libpq/pqformat.h"
39 #include "mb/pg_wchar.h"
40 #include "nodes/execnodes.h"
41 #include "utils/builtins.h"
42 #include "utils/memutils.h"
43 #include "utils/xml.h"
44
45
46 #ifdef USE_LIBXML
47
48 #define PG_XML_DEFAULT_URI "dummy.xml"
49
50 static StringInfo xml_err_buf = NULL;
51
52 static void     xml_init(void);
53 #ifdef NOT_USED
54 static void    *xml_palloc(size_t size);
55 static void    *xml_repalloc(void *ptr, size_t size);
56 static void     xml_pfree(void *ptr);
57 static char    *xml_pstrdup(const char *string);
58 #endif
59 static void     xml_ereport(int level, int sqlcode,
60                                                         const char *msg, void *ctxt);
61 static void     xml_errorHandler(void *ctxt, const char *msg, ...);
62 static void     xml_ereport_by_code(int level, int sqlcode,
63                                                                         const char *msg, int errcode);
64 static xmlChar *xml_text2xmlChar(text *in);
65 static xmlDocPtr xml_parse(text *data, bool is_document, bool preserve_whitespace);
66
67 #endif /* USE_LIBXML */
68
69 #define NO_XML_SUPPORT() \
70         ereport(ERROR, \
71                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
72                          errmsg("no XML support in this installation")))
73
74
75 Datum
76 xml_in(PG_FUNCTION_ARGS)
77 {
78 #ifdef USE_LIBXML
79         char            *s = PG_GETARG_CSTRING(0);
80         size_t          len;
81         xmltype         *vardata;
82         xmlDocPtr        doc;
83
84         len = strlen(s);
85         vardata = palloc(len + VARHDRSZ);
86         VARATT_SIZEP(vardata) = len + VARHDRSZ;
87         memcpy(VARDATA(vardata), s, len);
88
89         /*
90          * Parse the data to check if it is well-formed XML data.  Assume
91          * that ERROR occurred if parsing failed.
92          */
93         doc = xml_parse(vardata, false, true);
94         xmlFreeDoc(doc);
95
96         PG_RETURN_XML_P(vardata);
97 #else
98         NO_XML_SUPPORT();
99         return 0;
100 #endif
101 }
102
103
104 Datum
105 xml_out(PG_FUNCTION_ARGS)
106 {
107         xmltype         *s = PG_GETARG_XML_P(0);
108         char            *result;
109         int32           len;
110
111         len = VARSIZE(s) - VARHDRSZ;
112         result = palloc(len + 1);
113         memcpy(result, VARDATA(s), len);
114         result[len] = '\0';
115
116         PG_RETURN_CSTRING(result);
117 }
118
119
120 Datum
121 xml_recv(PG_FUNCTION_ARGS)
122 {
123 #ifdef USE_LIBXML
124         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
125         xmltype    *result;
126         char       *str;
127         int                     nbytes;
128         xmlDocPtr       doc;
129
130         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
131
132         result = (xmltype *) palloc(nbytes + VARHDRSZ);
133         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
134         memcpy(VARDATA(result), str, nbytes);
135         pfree(str);
136
137         /*
138          * Parse the data to check if it is well-formed XML data.  Assume
139          * that ERROR occurred if parsing failed.
140          */
141         doc = xml_parse(result, false, true);
142         xmlFreeDoc(doc);
143
144         PG_RETURN_XML_P(result);
145 #else
146         NO_XML_SUPPORT();
147         return 0;
148 #endif
149 }
150
151
152 Datum
153 xml_send(PG_FUNCTION_ARGS)
154 {
155         xmltype    *x = PG_GETARG_XML_P(0);
156         StringInfoData buf;
157
158         pq_begintypsend(&buf);
159         pq_sendbytes(&buf, VARDATA(x), VARSIZE(x) - VARHDRSZ);
160         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
161 }
162
163
164 #ifdef USE_LIBXML
165 static void
166 appendStringInfoText(StringInfo str, const text *t)
167 {
168         appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
169 }
170
171
172 static xmltype *
173 stringinfo_to_xmltype(StringInfo buf)
174 {
175         int32 len;
176         xmltype *result;
177
178         len = buf->len + VARHDRSZ;
179         result = palloc(len);
180         VARATT_SIZEP(result) = len;
181         memcpy(VARDATA(result), buf->data, buf->len);
182
183         return result;
184 }
185
186
187 static xmltype *
188 xmlBuffer_to_xmltype(xmlBufferPtr buf)
189 {
190         int32           len;
191         xmltype    *result;
192
193         len = xmlBufferLength(buf) + VARHDRSZ;
194         result = palloc(len);
195         VARATT_SIZEP(result) = len;
196         memcpy(VARDATA(result), xmlBufferContent(buf), len - VARHDRSZ);
197
198         return result;
199 }
200 #endif
201
202
203 Datum
204 xmlcomment(PG_FUNCTION_ARGS)
205 {
206 #ifdef USE_LIBXML
207         text *arg = PG_GETARG_TEXT_P(0);
208         int len =  VARATT_SIZEP(arg) - VARHDRSZ;
209         StringInfoData buf;
210         int i;
211
212         /* check for "--" in string or "-" at the end */
213         for (i = 1; i < len; i++)
214                 if ((VARDATA(arg)[i] == '-' && VARDATA(arg)[i - 1] == '-')
215                         || (VARDATA(arg)[i] == '-' && i == len - 1))
216                                         ereport(ERROR,
217                                                         (errcode(ERRCODE_INVALID_XML_COMMENT),
218                                                          errmsg("invalid XML comment")));
219
220         initStringInfo(&buf);
221         appendStringInfo(&buf, "<!--");
222         appendStringInfoText(&buf, arg);
223         appendStringInfo(&buf, "-->");
224
225         PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
226 #else
227         NO_XML_SUPPORT();
228         return 0;
229 #endif
230 }
231
232
233 Datum
234 texttoxml(PG_FUNCTION_ARGS)
235 {
236         text       *data = PG_GETARG_TEXT_P(0);
237
238         PG_RETURN_XML_P(xmlparse(data, false, true));
239 }
240
241
242 xmltype *
243 xmlparse(text *data, bool is_document, bool preserve_whitespace)
244 {
245 #ifdef USE_LIBXML
246         xmlDocPtr       doc;
247
248         doc = xml_parse(data, is_document, preserve_whitespace);
249         xmlFreeDoc(doc);
250
251         return (xmltype *) data;
252 #else
253         NO_XML_SUPPORT();
254         return NULL;
255 #endif
256 }
257
258
259 xmltype *
260 xmlpi(char *target, text *arg)
261 {
262 #ifdef USE_LIBXML
263         xmltype *result;
264         StringInfoData buf;
265
266         if (pg_strncasecmp(target, "xml", 3) == 0)
267                 ereport(ERROR,
268                                 (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
269                                  errmsg("invalid XML processing instruction"),
270                                  errdetail("XML processing instruction target name cannot start with \"xml\".")));
271
272         initStringInfo(&buf);
273
274         appendStringInfo(&buf, "<?%s", target);
275
276         if (arg != NULL)
277         {
278                 char *string;
279
280                 string = DatumGetCString(DirectFunctionCall1(textout,
281                                                                                                          PointerGetDatum(arg)));
282                 if (strstr(string, "?>") != NULL)
283                 ereport(ERROR,
284                                 (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
285                                  errmsg("invalid XML processing instruction"),
286                                  errdetail("XML processing instruction cannot contain \"?>\".")));
287
288                 appendStringInfoChar(&buf, ' ');
289                 appendStringInfoString(&buf, string);
290                 pfree(string);
291         }
292         appendStringInfoString(&buf, "?>");
293
294         result = stringinfo_to_xmltype(&buf);
295         pfree(buf.data);
296         return result;
297 #else
298         NO_XML_SUPPORT();
299         return NULL;
300 #endif
301 }
302
303
304 xmltype *
305 xmlroot(xmltype *data, text *version, int standalone)
306 {
307 #ifdef USE_LIBXML
308         xmlDocPtr       doc;
309         xmlBufferPtr buffer;
310         xmlSaveCtxtPtr save;
311
312         doc = xml_parse((text *) data, true, true);
313
314         if (version)
315                 doc->version = xmlStrdup(xml_text2xmlChar(version));
316         else
317                 doc->version = NULL;
318
319         switch (standalone)
320         {
321                 case 1:
322                         doc->standalone = 1;
323                         break;
324                 case -1:
325                         doc->standalone = 0;
326                         break;
327                 default:
328                         doc->standalone = -1;
329                         break;
330         }
331
332         buffer = xmlBufferCreate();
333         save = xmlSaveToBuffer(buffer, NULL, 0);
334         xmlSaveDoc(save, doc);
335         xmlSaveClose(save);
336
337         xmlFreeDoc(doc);
338
339         return xmlBuffer_to_xmltype(buffer);
340 #else
341         NO_XML_SUPPORT();
342         return NULL;
343 #endif
344 }
345
346
347 /*
348  * Validate document (given as string) against DTD (given as external link)
349  * TODO !!! use text instead of cstring for second arg
350  * TODO allow passing DTD as a string value (not only as an URI)
351  * TODO redesign (see comment with '!!!' below)
352  */
353 Datum
354 xmlvalidate(PG_FUNCTION_ARGS)
355 {
356 #ifdef USE_LIBXML
357         text                            *data = PG_GETARG_TEXT_P(0);
358         text                            *dtdOrUri = PG_GETARG_TEXT_P(1);
359         bool                            result = false;
360         xmlParserCtxtPtr        ctxt = NULL;
361         xmlDocPtr                       doc = NULL;
362         xmlDtdPtr                       dtd = NULL;
363
364         xml_init();
365
366         /* We use a PG_TRY block to ensure libxml is cleaned up on error */
367         PG_TRY();
368         {
369                 ctxt = xmlNewParserCtxt();
370                 if (ctxt == NULL)
371                         xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
372                                                 "could not allocate parser context", ctxt);
373
374                 doc = xmlCtxtReadMemory(ctxt, (char *) VARDATA(data),
375                                                                 VARSIZE(data) - VARHDRSZ,
376                                                                 PG_XML_DEFAULT_URI, NULL, 0);
377                 if (doc == NULL)
378                         xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
379                                                 "could not parse XML data", ctxt);
380
381 #if 0
382                 uri = xmlCreateURI();
383                 elog(NOTICE, "dtd - %s", dtdOrUri);
384                 dtd = palloc(sizeof(xmlDtdPtr));
385                 uri = xmlParseURI(dtdOrUri);
386                 if (uri == NULL)
387                         xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
388                                                 "not implemented yet... (TODO)", ctxt);
389                 else
390 #endif
391                         dtd = xmlParseDTD(NULL, xml_text2xmlChar(dtdOrUri));
392
393                 if (dtd == NULL)
394                         xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
395                                                 "could not load DTD", ctxt);
396
397                 if (xmlValidateDtd(xmlNewValidCtxt(), doc, dtd) == 1)
398                         result = true;
399
400                 if (!result)
401                         xml_ereport(NOTICE, ERRCODE_INVALID_XML_DOCUMENT,
402                                                 "validation against DTD failed", ctxt);
403
404 #if 0
405                 if (uri)
406                         xmlFreeURI(uri);
407 #endif
408                 if (dtd)
409                         xmlFreeDtd(dtd);
410                 if (doc)
411                         xmlFreeDoc(doc);
412                 if (ctxt)
413                         xmlFreeParserCtxt(ctxt);
414                 xmlCleanupParser();
415         }
416         PG_CATCH();
417         {
418 #if 0
419                 if (uri)
420                         xmlFreeURI(uri);
421 #endif
422                 if (dtd)
423                         xmlFreeDtd(dtd);
424                 if (doc)
425                         xmlFreeDoc(doc);
426                 if (ctxt)
427                         xmlFreeParserCtxt(ctxt);
428                 xmlCleanupParser();
429
430                 PG_RE_THROW();
431         }
432         PG_END_TRY();
433
434         PG_RETURN_BOOL(result);
435 #else /* not USE_LIBXML */
436         NO_XML_SUPPORT();
437         return 0;
438 #endif /* not USE_LIBXML */
439 }
440
441
442 #ifdef USE_LIBXML
443
444 /*
445  * Container for some init stuff (not good design!)
446  * TODO xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and check)
447  */
448 static void
449 xml_init(void)
450 {
451         /*
452          * Currently, we have no pure UTF-8 support for internals -- check
453          * if we can work.
454          */
455         if (sizeof (char) != sizeof (xmlChar))
456                 ereport(ERROR,
457                                 (errmsg("could not initialize XML library"),
458                                  errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
459                                                    (int) sizeof(char), (int) sizeof(xmlChar))));
460
461         if (xml_err_buf == NULL)
462         {
463                 /* First time through: create error buffer in permanent context */
464                 MemoryContext oldcontext;
465
466                 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
467                 xml_err_buf = makeStringInfo();
468                 MemoryContextSwitchTo(oldcontext);
469         }
470         else
471         {
472                 /* Reset pre-existing buffer to empty */
473                 xml_err_buf->data[0] = '\0';
474                 xml_err_buf->len = 0;
475         }
476         /* Now that xml_err_buf exists, safe to call xml_errorHandler */
477         xmlSetGenericErrorFunc(NULL, xml_errorHandler);
478
479 #ifdef NOT_USED
480         /*
481          * FIXME: This doesn't work because libxml assumes that whatever
482          * libxml allocates, only libxml will free, so we can't just drop
483          * memory contexts behind it.  This needs to be refined.
484          */
485         xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
486 #endif
487         xmlInitParser();
488         LIBXML_TEST_VERSION;
489 }
490
491
492 /*
493  * SQL/XML allows storing "XML documents" or "XML content".  "XML
494  * documents" are specified by the XML specification and are parsed
495  * easily by libxml.  "XML content" is specified by SQL/XML as the
496  * production "XMLDecl? content".  But libxml can only parse the
497  * "content" part, so we have to parse the XML declaration ourselves
498  * to complete this.
499  */
500
501 #define CHECK_XML_SPACE(p) if (!xmlIsBlank_ch(*(p))) return XML_ERR_SPACE_REQUIRED
502 #define SKIP_XML_SPACE(p) while (xmlIsBlank_ch(*(p))) (p)++
503
504 static int
505 parse_xml_decl(const xmlChar *str, size_t *len, xmlChar **encoding, int *standalone)
506 {
507         const xmlChar *p;
508         const xmlChar *save_p;
509
510         p = str;
511
512         if (xmlStrncmp(p, (xmlChar *)"<?xml", 5) != 0)
513                 goto finished;
514
515         p += 5;
516
517         /* version */
518         CHECK_XML_SPACE(p);
519         SKIP_XML_SPACE(p);
520         if (xmlStrncmp(p, (xmlChar *)"version", 7) != 0)
521                 return XML_ERR_VERSION_MISSING;
522         p += 7;
523         SKIP_XML_SPACE(p);
524         if (*p != '=')
525                 return XML_ERR_VERSION_MISSING;
526         p += 1;
527         SKIP_XML_SPACE(p);
528         if (xmlStrncmp(p, (xmlChar *)"'1.0'", 5) != 0 && xmlStrncmp(p, (xmlChar *)"\"1.0\"", 5) != 0)
529                 return XML_ERR_VERSION_MISSING;
530         p += 5;
531
532         /* encoding */
533         save_p = p;
534         SKIP_XML_SPACE(p);
535         if (xmlStrncmp(p, (xmlChar *)"encoding", 8) == 0)
536         {
537                 CHECK_XML_SPACE(save_p);
538                 p += 8;
539                 SKIP_XML_SPACE(p);
540                 if (*p != '=')
541                         return XML_ERR_MISSING_ENCODING;
542                 p += 1;
543                 SKIP_XML_SPACE(p);
544
545                 if (*p == '\'' || *p == '"')
546                 {
547                         const xmlChar *q;
548
549                         q = xmlStrchr(p + 1, *p);
550                         if (!q)
551                                 return XML_ERR_MISSING_ENCODING;
552
553                         *encoding = xmlStrndup(p + 1, q - p - 1);
554                         p = q + 1;
555                 }
556                 else
557                         return XML_ERR_MISSING_ENCODING;
558         }
559         else
560         {
561                 p = save_p;
562                 *encoding = NULL;
563         }
564
565         /* standalone */
566         save_p = p;
567         SKIP_XML_SPACE(p);
568         if (xmlStrncmp(p, (xmlChar *)"standalone", 10) == 0)
569         {
570                 CHECK_XML_SPACE(save_p);
571                 p += 10;
572                 SKIP_XML_SPACE(p);
573                 if (*p != '=')
574                         return XML_ERR_STANDALONE_VALUE;
575                 p += 1;
576                 SKIP_XML_SPACE(p);
577                 if (xmlStrncmp(p, (xmlChar *)"'yes'", 5) == 0 || xmlStrncmp(p, (xmlChar *)"\"yes\"", 5) == 0)
578                 {
579                         *standalone = 1;
580                         p += 5;
581                 }
582                 else if (xmlStrncmp(p, (xmlChar *)"'no'", 4) == 0 || xmlStrncmp(p, (xmlChar *)"\"no\"", 4) == 0)
583                 {
584                         *standalone = 0;
585                         p += 4;
586                 }
587                 else
588                         return XML_ERR_STANDALONE_VALUE;
589         }
590         else
591         {
592                 p = save_p;
593                 *standalone = -1;
594         }
595
596         SKIP_XML_SPACE(p);
597         if (xmlStrncmp(p, (xmlChar *)"?>", 2) != 0)
598                 return XML_ERR_XMLDECL_NOT_FINISHED;
599         p += 2;
600
601 finished:
602         if (len)
603                 *len = (p - str);
604         return XML_ERR_OK;
605 }
606
607
608 /*
609  * Convert a C string to XML internal representation
610  *
611  * TODO maybe, libxml2's xmlreader is better? (do not construct DOM, yet do not use SAX - see xml_reader.c)
612  * TODO what about internal URI for docs? (see PG_XML_DEFAULT_URI below)
613  */
614 static xmlDocPtr
615 xml_parse(text *data, bool is_document, bool preserve_whitespace)
616 {
617         int                                     res_code;
618         int32                           len;
619         xmlChar                         *string;
620         xmlParserCtxtPtr        ctxt = NULL;
621         xmlDocPtr                       doc = NULL;
622
623         len = VARSIZE(data) - VARHDRSZ; /* will be useful later */
624         string = xml_text2xmlChar(data);
625
626         xml_init();
627
628         /* We use a PG_TRY block to ensure libxml is cleaned up on error */
629         PG_TRY();
630         {
631                 ctxt = xmlNewParserCtxt();
632                 if (ctxt == NULL)
633                         xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
634                                                 "could not allocate parser context", ctxt);
635
636                 if (is_document)
637                 {
638                         /*
639                          * Note, that here we try to apply DTD defaults
640                          * (XML_PARSE_DTDATTR) according to SQL/XML:10.16.7.d:
641                          * 'Default valies defined by internal DTD are applied'.
642                          * As for external DTDs, we try to support them too, (see
643                          * SQL/XML:10.16.7.e)
644                          */
645                         doc = xmlCtxtReadMemory(ctxt, (char *) string, len,
646                                                                         PG_XML_DEFAULT_URI, NULL,
647                                                                         XML_PARSE_NOENT | XML_PARSE_DTDATTR
648                                                                         | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
649                         if (doc == NULL)
650                                 xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
651                                                         "invalid XML document", ctxt);
652                 }
653                 else
654                 {
655                         size_t count;
656                         xmlChar *encoding = NULL;
657                         int standalone = -1;
658
659                         doc = xmlNewDoc(NULL);
660
661                         res_code = parse_xml_decl(string, &count, &encoding, &standalone);
662
663                         /* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */
664                         if (res_code == 0)
665                                 res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string + count, NULL);
666                         if (res_code != 0)
667                                 xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
668                                                                         "invalid XML content", res_code);
669
670                         doc->encoding = encoding;
671                         doc->standalone = standalone;
672                 }
673
674                 /* TODO encoding issues
675                  * (thoughts:
676                  *              CASE:
677                  *              - XML data has explicit encoding attribute in its prolog
678                  *              - if not, assume that enc. of XML data is the same as client's one
679                  *
680                  *              The common rule is to accept the XML data only if its encoding
681                  *              is the same as encoding of the storage (server's). The other possible
682                  *              option is to accept all the docs, but DO TRANSFORMATION and, if needed,
683                  *              change the prolog.
684                  *
685                  *              I think I'd stick the first way (for the 1st version),
686                  *              it's much simplier (less errors...)
687                  * ) */
688                 /* ... */
689
690                 if (ctxt)
691                         xmlFreeParserCtxt(ctxt);
692                 xmlCleanupParser();
693         }
694         PG_CATCH();
695         {
696                 if (doc)
697                         xmlFreeDoc(doc);
698                 doc = NULL;
699                 if (ctxt)
700                         xmlFreeParserCtxt(ctxt);
701                 xmlCleanupParser();
702
703                 PG_RE_THROW();
704         }
705         PG_END_TRY();
706
707         return doc;
708 }
709
710
711 /*
712  * xmlChar<->text convertions
713  */
714 static xmlChar *
715 xml_text2xmlChar(text *in)
716 {
717         int32           len = VARSIZE(in) - VARHDRSZ;
718         xmlChar         *res;
719
720         res = palloc(len + 1);
721         memcpy(res, VARDATA(in), len);
722         res[len] = '\0';
723
724         return(res);
725 }
726
727
728 #ifdef NOT_USED
729 /*
730  * Wrappers for memory management functions
731  */
732 static void *
733 xml_palloc(size_t size)
734 {
735         return palloc(size);
736 }
737
738
739 static void *
740 xml_repalloc(void *ptr, size_t size)
741 {
742         return repalloc(ptr, size);
743 }
744
745
746 static void
747 xml_pfree(void *ptr)
748 {
749         pfree(ptr);
750 }
751
752
753 static char *
754 xml_pstrdup(const char *string)
755 {
756         return pstrdup(string);
757 }
758 #endif /* NOT_USED */
759
760
761 /*
762  * Wrapper for "ereport" function.
763  * Adds detail - libxml's native error message, if any.
764  */
765 static void
766 xml_ereport(int level, int sqlcode,
767                         const char *msg, void *ctxt)
768 {
769         xmlErrorPtr libxmlErr = NULL;
770
771         if (xml_err_buf->len > 0)
772         {
773                 ereport(DEBUG1,
774                                 (errmsg("%s", xml_err_buf->data)));
775                 xml_err_buf->data[0] = '\0';
776                 xml_err_buf->len = 0;
777         }
778
779         if (ctxt != NULL)
780                 libxmlErr = xmlCtxtGetLastError(ctxt);
781
782         if (libxmlErr == NULL)
783         {
784                 ereport(level,
785                                 (errcode(sqlcode),
786                                  errmsg("%s", msg)));
787         }
788         else
789         {
790                 /* as usual, libxml error message contains '\n'; get rid of it */
791                 char *xmlErrDetail;
792                 int xmlErrLen, i;
793
794                 xmlErrDetail = pstrdup(libxmlErr->message);
795                 xmlErrLen = strlen(xmlErrDetail);
796                 for (i = 0; i < xmlErrLen; i++)
797                 {
798                         if (xmlErrDetail[i] == '\n')
799                                 xmlErrDetail[i] = '.';
800                 }
801                 ereport(level,
802                                 (errcode(sqlcode),
803                                  errmsg("%s", msg),
804                                  errdetail("%s", xmlErrDetail)));
805         }
806 }
807
808
809 /*
810  * Error handler for libxml error messages
811  */
812 static void
813 xml_errorHandler(void *ctxt, const char *msg,...)
814 {
815         /* Append the formatted text to xml_err_buf */
816         for (;;)
817         {
818                 va_list         args;
819                 bool            success;
820
821                 /* Try to format the data. */
822                 va_start(args, msg);
823                 success = appendStringInfoVA(xml_err_buf, msg, args);
824                 va_end(args);
825
826                 if (success)
827                         break;
828
829                 /* Double the buffer size and try again. */
830                 enlargeStringInfo(xml_err_buf, xml_err_buf->maxlen);
831         }
832 }
833
834
835 /*
836  * Return error message by libxml error code
837  * TODO make them closer to recommendations from Postgres manual
838  */
839 static void
840 xml_ereport_by_code(int level, int sqlcode,
841                                         const char *msg, int code)
842 {
843     const char *det;
844
845         if (xml_err_buf->len > 0)
846         {
847                 ereport(DEBUG1,
848                                 (errmsg("%s", xml_err_buf->data)));
849                 xml_err_buf->data[0] = '\0';
850                 xml_err_buf->len = 0;
851         }
852
853     switch (code)
854         {
855         case XML_ERR_INTERNAL_ERROR:
856             det = "libxml internal error";
857             break;
858         case XML_ERR_ENTITY_LOOP:
859             det = "Detected an entity reference loop";
860             break;
861         case XML_ERR_ENTITY_NOT_STARTED:
862             det = "EntityValue: \" or ' expected";
863             break;
864         case XML_ERR_ENTITY_NOT_FINISHED:
865             det = "EntityValue: \" or ' expected";
866             break;
867         case XML_ERR_ATTRIBUTE_NOT_STARTED:
868             det = "AttValue: \" or ' expected";
869             break;
870         case XML_ERR_LT_IN_ATTRIBUTE:
871             det = "Unescaped '<' not allowed in attributes values";
872             break;
873         case XML_ERR_LITERAL_NOT_STARTED:
874             det = "SystemLiteral \" or ' expected";
875             break;
876         case XML_ERR_LITERAL_NOT_FINISHED:
877             det = "Unfinished System or Public ID \" or ' expected";
878             break;
879         case XML_ERR_MISPLACED_CDATA_END:
880             det = "Sequence ']]>' not allowed in content";
881             break;
882         case XML_ERR_URI_REQUIRED:
883             det = "SYSTEM or PUBLIC, the URI is missing";
884             break;
885         case XML_ERR_PUBID_REQUIRED:
886             det = "PUBLIC, the Public Identifier is missing";
887             break;
888         case XML_ERR_HYPHEN_IN_COMMENT:
889             det = "Comment must not contain '--' (double-hyphen)";
890             break;
891         case XML_ERR_PI_NOT_STARTED:
892             det = "xmlParsePI : no target name";
893             break;
894         case XML_ERR_RESERVED_XML_NAME:
895             det = "Invalid PI name";
896             break;
897         case XML_ERR_NOTATION_NOT_STARTED:
898             det = "NOTATION: Name expected here";
899             break;
900         case XML_ERR_NOTATION_NOT_FINISHED:
901             det = "'>' required to close NOTATION declaration";
902             break;
903         case XML_ERR_VALUE_REQUIRED:
904             det = "Entity value required";
905             break;
906         case XML_ERR_URI_FRAGMENT:
907             det = "Fragment not allowed";
908             break;
909         case XML_ERR_ATTLIST_NOT_STARTED:
910             det = "'(' required to start ATTLIST enumeration";
911             break;
912         case XML_ERR_NMTOKEN_REQUIRED:
913             det = "NmToken expected in ATTLIST enumeration";
914             break;
915         case XML_ERR_ATTLIST_NOT_FINISHED:
916             det = "')' required to finish ATTLIST enumeration";
917             break;
918         case XML_ERR_MIXED_NOT_STARTED:
919             det = "MixedContentDecl : '|' or ')*' expected";
920             break;
921         case XML_ERR_PCDATA_REQUIRED:
922             det = "MixedContentDecl : '#PCDATA' expected";
923             break;
924         case XML_ERR_ELEMCONTENT_NOT_STARTED:
925             det = "ContentDecl : Name or '(' expected";
926             break;
927         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
928             det = "ContentDecl : ',' '|' or ')' expected";
929             break;
930         case XML_ERR_PEREF_IN_INT_SUBSET:
931             det = "PEReference: forbidden within markup decl in internal subset";
932             break;
933         case XML_ERR_GT_REQUIRED:
934             det = "Expected '>'";
935             break;
936         case XML_ERR_CONDSEC_INVALID:
937             det = "XML conditional section '[' expected";
938             break;
939         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
940             det = "Content error in the external subset";
941             break;
942         case XML_ERR_CONDSEC_INVALID_KEYWORD:
943             det = "conditional section INCLUDE or IGNORE keyword expected";
944             break;
945         case XML_ERR_CONDSEC_NOT_FINISHED:
946             det = "XML conditional section not closed";
947             break;
948         case XML_ERR_XMLDECL_NOT_STARTED:
949             det = "Text declaration '<?xml' required";
950             break;
951         case XML_ERR_XMLDECL_NOT_FINISHED:
952             det = "parsing XML declaration: '?>' expected";
953             break;
954         case XML_ERR_EXT_ENTITY_STANDALONE:
955             det = "external parsed entities cannot be standalone";
956             break;
957         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
958             det = "EntityRef: expecting ';'";
959             break;
960         case XML_ERR_DOCTYPE_NOT_FINISHED:
961             det = "DOCTYPE improperly terminated";
962             break;
963         case XML_ERR_LTSLASH_REQUIRED:
964             det = "EndTag: '</' not found";
965             break;
966         case XML_ERR_EQUAL_REQUIRED:
967             det = "Expected '='";
968             break;
969         case XML_ERR_STRING_NOT_CLOSED:
970             det = "String not closed expecting \" or '";
971             break;
972         case XML_ERR_STRING_NOT_STARTED:
973             det = "String not started expecting ' or \"";
974             break;
975         case XML_ERR_ENCODING_NAME:
976             det = "Invalid XML encoding name";
977             break;
978         case XML_ERR_STANDALONE_VALUE:
979             det = "Standalone accepts only 'yes' or 'no'";
980             break;
981         case XML_ERR_DOCUMENT_EMPTY:
982             det = "Document is empty";
983             break;
984         case XML_ERR_DOCUMENT_END:
985             det = "Extra content at the end of the document";
986             break;
987         case XML_ERR_NOT_WELL_BALANCED:
988             det = "Chunk is not well balanced";
989             break;
990         case XML_ERR_EXTRA_CONTENT:
991             det = "Extra content at the end of well balanced chunk";
992             break;
993         case XML_ERR_VERSION_MISSING:
994             det = "Malformed declaration expecting version";
995             break;
996         /* more err codes... Please, keep the order! */
997         case XML_ERR_ATTRIBUTE_WITHOUT_VALUE: /* 41 */
998                 det ="Attribute without value";
999                 break;
1000         case XML_ERR_ATTRIBUTE_REDEFINED:
1001                 det ="Attribute defined more than once in the same element";
1002                 break;
1003         case XML_ERR_COMMENT_NOT_FINISHED: /* 45 */
1004             det = "Comment is not finished";
1005             break;
1006         case XML_ERR_NAME_REQUIRED: /* 68 */
1007             det = "Element name not found";
1008             break;
1009         case XML_ERR_TAG_NOT_FINISHED: /* 77 */
1010             det = "Closing tag not found";
1011             break;
1012         default:
1013             det = "Unrecognized libxml error code: %d";
1014                         break;
1015         }
1016
1017         ereport(level,
1018                         (errcode(sqlcode),
1019                          errmsg("%s", msg),
1020                          errdetail(det, code)));
1021 }
1022
1023
1024 /*
1025  * Convert one char in the current server encoding to a Unicode codepoint.
1026  */
1027 static pg_wchar
1028 sqlchar_to_unicode(char *s)
1029 {
1030         char *utf8string;
1031         pg_wchar ret[2];                        /* need space for trailing zero */
1032
1033         utf8string = (char *) pg_do_encoding_conversion((unsigned char *) s,
1034                                                                                                         pg_mblen(s),
1035                                                                                                         GetDatabaseEncoding(),
1036                                                                                                         PG_UTF8);
1037
1038         pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret, pg_mblen(s));
1039
1040         return ret[0];
1041 }
1042
1043
1044 static bool
1045 is_valid_xml_namefirst(pg_wchar c)
1046 {
1047         /* (Letter | '_' | ':') */
1048         return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1049                         || c == '_' || c == ':');
1050 }
1051
1052
1053 static bool
1054 is_valid_xml_namechar(pg_wchar c)
1055 {
1056         /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1057         return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1058                         || xmlIsDigitQ(c)
1059                         || c == '.' || c == '-' || c == '_' || c == ':'
1060                         || xmlIsCombiningQ(c)
1061                         || xmlIsExtenderQ(c));
1062 }
1063 #endif /* USE_LIBXML */
1064
1065
1066 /*
1067  * Map SQL identifier to XML name; see SQL/XML:2003 section 9.1.
1068  */
1069 char *
1070 map_sql_identifier_to_xml_name(char *ident, bool fully_escaped)
1071 {
1072 #ifdef USE_LIBXML
1073         StringInfoData buf;
1074         char *p;
1075
1076         initStringInfo(&buf);
1077
1078         for (p = ident; *p; p += pg_mblen(p))
1079         {
1080                 if (*p == ':' && (p == ident || fully_escaped))
1081                         appendStringInfo(&buf, "_x003A_");
1082                 else if (*p == '_' && *(p+1) == 'x')
1083                         appendStringInfo(&buf, "_x005F_");
1084                 else if (fully_escaped && p == ident &&
1085                                  pg_strncasecmp(p, "xml", 3) == 0)
1086                 {
1087                         if (*p == 'x')
1088                                 appendStringInfo(&buf, "_x0078_");
1089                         else
1090                                 appendStringInfo(&buf, "_x0058_");
1091                 }
1092                 else
1093                 {
1094                         pg_wchar u = sqlchar_to_unicode(p);
1095
1096                         if ((p == ident)
1097                                 ? !is_valid_xml_namefirst(u)
1098                                 : !is_valid_xml_namechar(u))
1099                                 appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
1100                         else
1101                                 appendBinaryStringInfo(&buf, p, pg_mblen(p));
1102                 }
1103         }
1104
1105         return buf.data;
1106 #else /* not USE_LIBXML */
1107         NO_XML_SUPPORT();
1108         return NULL;
1109 #endif /* not USE_LIBXML */
1110 }
1111
1112
1113 /*
1114  * Map a Unicode codepoint into the current server encoding.
1115  */
1116 static char *
1117 unicode_to_sqlchar(pg_wchar c)
1118 {
1119         static unsigned char utf8string[4];
1120
1121         if (c <= 0x7F)
1122         {
1123                 utf8string[0] = c;
1124         }
1125         else if (c <= 0x7FF)
1126         {
1127                 utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
1128                 utf8string[1] = 0x80 | (c & 0x3F);
1129         }
1130         else if (c <= 0xFFFF)
1131         {
1132                 utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
1133                 utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
1134                 utf8string[2] = 0x80 | (c & 0x3F);
1135         }
1136         else
1137         {
1138                 utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
1139                 utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
1140                 utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
1141                 utf8string[3] = 0x80 | (c & 0x3F);
1142         }
1143
1144         return (char *) pg_do_encoding_conversion(utf8string,
1145                                                                                           pg_mblen((char *) utf8string),
1146                                                                                           PG_UTF8,
1147                                                                                           GetDatabaseEncoding());
1148 }
1149
1150
1151 /*
1152  * Map XML name to SQL identifier; see SQL/XML:2003 section 9.17.
1153  */
1154 char *
1155 map_xml_name_to_sql_identifier(char *name)
1156 {
1157         StringInfoData buf;
1158         char *p;
1159
1160         initStringInfo(&buf);
1161
1162         for (p = name; *p; p += pg_mblen(p))
1163         {
1164                 if (*p == '_' && *(p+1) == 'x'
1165                         && isxdigit((unsigned char) *(p+2))
1166                         && isxdigit((unsigned char) *(p+3))
1167                         && isxdigit((unsigned char) *(p+4))
1168                         && isxdigit((unsigned char) *(p+5))
1169                         && *(p+6) == '_')
1170                 {
1171                         unsigned int u;
1172
1173                         sscanf(p + 2, "%X", &u);
1174                         appendStringInfoString(&buf, unicode_to_sqlchar(u));
1175                         p += 6;
1176                 }
1177                 else
1178                         appendBinaryStringInfo(&buf, p, pg_mblen(p));
1179         }
1180
1181         return buf.data;
1182 }