1 /*-------------------------------------------------------------------------
4 * XML data type support.
7 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.19 2007/01/19 16:58:46 petere Exp $
12 *-------------------------------------------------------------------------
16 * Generally, XML type support is only available when libxml use was
17 * configured during the build. But even if that is not done, the
18 * type and all the functions are available, but most of them will
19 * fail. For one thing, this avoids having to manage variant catalog
20 * installations. But it also has nice effects such as that you can
21 * dump a database containing XML type data even if the server is not
22 * linked with libxml. Thus, make sure xml_out() works even if nothing
29 #include <libxml/chvalid.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
32 #include <libxml/uri.h>
33 #include <libxml/xmlerror.h>
34 #include <libxml/xmlsave.h>
35 #include <libxml/xmlwriter.h>
36 #endif /* USE_LIBXML */
38 #include "catalog/pg_type.h"
39 #include "executor/executor.h"
41 #include "libpq/pqformat.h"
42 #include "mb/pg_wchar.h"
43 #include "nodes/execnodes.h"
44 #include "parser/parse_expr.h"
45 #include "utils/array.h"
46 #include "utils/builtins.h"
47 #include "utils/lsyscache.h"
48 #include "utils/memutils.h"
49 #include "utils/xml.h"
54 #define PG_XML_DEFAULT_URI "dummy.xml"
56 static StringInfo xml_err_buf = NULL;
58 static void xml_init(void);
60 static void *xml_palloc(size_t size);
61 static void *xml_repalloc(void *ptr, size_t size);
62 static void xml_pfree(void *ptr);
63 static char *xml_pstrdup(const char *string);
65 static void xml_ereport(int level, int sqlcode,
66 const char *msg, void *ctxt);
67 static void xml_errorHandler(void *ctxt, const char *msg, ...);
68 static void xml_ereport_by_code(int level, int sqlcode,
69 const char *msg, int errcode);
70 static xmlChar *xml_text2xmlChar(text *in);
71 static int parse_xml_decl(const xmlChar *str, size_t *lenp, xmlChar **version, xmlChar **encoding, int *standalone);
72 static xmlDocPtr xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encoding);
74 #endif /* USE_LIBXML */
76 XmlBinaryType xmlbinary;
78 #define NO_XML_SUPPORT() \
80 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
81 errmsg("no XML support in this installation")))
85 xml_in(PG_FUNCTION_ARGS)
88 char *s = PG_GETARG_CSTRING(0);
94 vardata = palloc(len + VARHDRSZ);
95 VARATT_SIZEP(vardata) = len + VARHDRSZ;
96 memcpy(VARDATA(vardata), s, len);
99 * Parse the data to check if it is well-formed XML data. Assume
100 * that ERROR occurred if parsing failed.
102 doc = xml_parse(vardata, false, true, NULL);
105 PG_RETURN_XML_P(vardata);
113 #define PG_XML_DEFAULT_VERSION "1.0"
117 xml_out_internal(xmltype *x, pg_enc target_encoding)
128 len = VARSIZE(x) - VARHDRSZ;
129 str = palloc(len + 1);
130 memcpy(str, VARDATA(x), len);
135 * On output, we adjust the XML declaration as follows. (These
136 * rules are the moral equivalent of the clause "Serialization of
137 * an XML value" in the SQL standard.)
139 * We try to avoid generating an XML declaration if possible.
140 * This is so that you don't get trivial things like xml '<foo/>'
141 * resulting in '<?xml version="1.0"?><foo/>', which would surely
142 * be annoying. We must provide a declaration if the standalone
143 * property is specified or if we include an encoding
144 * specification. If we have a declaration, we must specify a
145 * version (XML requires this). Otherwise we only make a
146 * declaration if the version is not "1.0", which is the default
147 * version specified in SQL:2003.
149 if ((res_code = parse_xml_decl((xmlChar *) str, &len, &version, &encoding, &standalone)) == 0)
153 initStringInfo(&buf);
155 if ((version && strcmp((char *) version, PG_XML_DEFAULT_VERSION) != 0)
156 || (target_encoding && target_encoding != PG_UTF8)
159 appendStringInfoString(&buf, "<?xml");
161 appendStringInfo(&buf, " version=\"%s\"", version);
163 appendStringInfo(&buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
164 if (target_encoding && target_encoding != PG_UTF8)
165 /* XXX might be useful to convert this to IANA names
166 * (ISO-8859-1 instead of LATIN1 etc.); needs field
168 appendStringInfo(&buf, " encoding=\"%s\"", pg_encoding_to_char(target_encoding));
170 appendStringInfoString(&buf, " standalone=\"yes\"");
171 else if (standalone == 0)
172 appendStringInfoString(&buf, " standalone=\"no\"");
173 appendStringInfoString(&buf, "?>");
178 * If we are not going to produce an XML declaration, eat
179 * a single newline in the original string to prevent
180 * empty first lines in the output.
182 if (*(str + len) == '\n')
185 appendStringInfoString(&buf, str + len);
190 xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
191 "could not parse XML declaration in stored value", res_code);
198 xml_out(PG_FUNCTION_ARGS)
200 xmltype *x = PG_GETARG_XML_P(0);
203 * xml_out removes the encoding property in all cases. This is
204 * because we cannot control from here whether the datum will be
205 * converted to a different client encoding, so we'd do more harm
206 * than good by including it.
208 PG_RETURN_CSTRING(xml_out_internal(x, 0));
213 xml_recv(PG_FUNCTION_ARGS)
216 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
222 xmlChar *encoding = NULL;
224 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
226 result = palloc(nbytes + VARHDRSZ);
227 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
228 memcpy(VARDATA(result), str, nbytes);
230 parse_xml_decl((xmlChar *) str, NULL, NULL, &encoding, NULL);
233 * Parse the data to check if it is well-formed XML data. Assume
234 * that ERROR occurred if parsing failed.
236 doc = xml_parse(result, false, true, encoding);
239 newstr = (char *) pg_do_encoding_conversion((unsigned char *) str,
241 encoding ? pg_char_to_encoding((char *) encoding) : PG_UTF8,
242 GetDatabaseEncoding());
250 nbytes = strlen(newstr);
252 result = palloc(nbytes + VARHDRSZ);
253 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
254 memcpy(VARDATA(result), newstr, nbytes);
257 PG_RETURN_XML_P(result);
266 xml_send(PG_FUNCTION_ARGS)
268 xmltype *x = PG_GETARG_XML_P(0);
269 char *outval = xml_out_internal(x, pg_get_client_encoding());
272 pq_begintypsend(&buf);
273 pq_sendstring(&buf, outval);
274 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
280 appendStringInfoText(StringInfo str, const text *t)
282 appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
287 stringinfo_to_xmltype(StringInfo buf)
292 len = buf->len + VARHDRSZ;
293 result = palloc(len);
294 VARATT_SIZEP(result) = len;
295 memcpy(VARDATA(result), buf->data, buf->len);
302 cstring_to_xmltype(const char *string)
307 len = strlen(string) + VARHDRSZ;
308 result = palloc(len);
309 VARATT_SIZEP(result) = len;
310 memcpy(VARDATA(result), string, len - VARHDRSZ);
317 xmlBuffer_to_xmltype(xmlBufferPtr buf)
322 len = xmlBufferLength(buf) + VARHDRSZ;
323 result = palloc(len);
324 VARATT_SIZEP(result) = len;
325 memcpy(VARDATA(result), xmlBufferContent(buf), len - VARHDRSZ);
333 xmlcomment(PG_FUNCTION_ARGS)
336 text *arg = PG_GETARG_TEXT_P(0);
337 int len = VARSIZE(arg) - VARHDRSZ;
341 /* check for "--" in string or "-" at the end */
342 for (i = 1; i < len; i++)
343 if ((VARDATA(arg)[i] == '-' && VARDATA(arg)[i - 1] == '-')
344 || (VARDATA(arg)[i] == '-' && i == len - 1))
346 (errcode(ERRCODE_INVALID_XML_COMMENT),
347 errmsg("invalid XML comment")));
349 initStringInfo(&buf);
350 appendStringInfo(&buf, "<!--");
351 appendStringInfoText(&buf, arg);
352 appendStringInfo(&buf, "-->");
354 PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
363 texttoxml(PG_FUNCTION_ARGS)
365 text *data = PG_GETARG_TEXT_P(0);
367 PG_RETURN_XML_P(xmlparse(data, false, true));
372 xmlelement(XmlExprState *xmlExpr, ExprContext *econtext)
375 XmlExpr *xexpr = (XmlExpr *) xmlExpr->xprstate.expr;
385 xmlTextWriterPtr writer;
387 buf = xmlBufferCreate();
388 writer = xmlNewTextWriterMemory(buf, 0);
390 xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
393 forboth(arg, xmlExpr->named_args, narg, xexpr->arg_names)
395 ExprState *e = (ExprState *) lfirst(arg);
396 char *argname = strVal(lfirst(narg));
398 value = ExecEvalExpr(e, econtext, &isnull, NULL);
401 str = OutputFunctionCall(&xmlExpr->named_outfuncs[i], value);
402 xmlTextWriterWriteAttribute(writer, (xmlChar *) argname, (xmlChar *) str);
408 foreach(arg, xmlExpr->args)
410 ExprState *e = (ExprState *) lfirst(arg);
412 value = ExecEvalExpr(e, econtext, &isnull, NULL);
414 xmlTextWriterWriteRaw(writer, (xmlChar *) map_sql_value_to_xml_value(value, exprType((Node *) e->expr)));
417 xmlTextWriterEndElement(writer);
418 xmlFreeTextWriter(writer);
420 result = xmlBuffer_to_xmltype(buf);
431 xmlparse(text *data, bool is_document, bool preserve_whitespace)
436 doc = xml_parse(data, is_document, preserve_whitespace, NULL);
439 return (xmltype *) data;
448 xmlpi(char *target, text *arg, bool arg_is_null, bool *result_is_null)
454 if (pg_strncasecmp(target, "xml", 3) == 0)
456 (errcode(ERRCODE_SYNTAX_ERROR), /* really */
457 errmsg("invalid XML processing instruction"),
458 errdetail("XML processing instruction target name cannot start with \"xml\".")));
461 * Following the SQL standard, the null check comes after the
462 * syntax check above.
464 *result_is_null = arg_is_null;
468 initStringInfo(&buf);
470 appendStringInfo(&buf, "<?%s", target);
476 string = DatumGetCString(DirectFunctionCall1(textout,
477 PointerGetDatum(arg)));
478 if (strstr(string, "?>") != NULL)
480 (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
481 errmsg("invalid XML processing instruction"),
482 errdetail("XML processing instruction cannot contain \"?>\".")));
484 appendStringInfoChar(&buf, ' ');
485 appendStringInfoString(&buf, string + strspn(string, " "));
488 appendStringInfoString(&buf, "?>");
490 result = stringinfo_to_xmltype(&buf);
501 xmlroot(xmltype *data, text *version, int standalone)
509 doc = xml_parse((text *) data, true, true, NULL);
512 doc->version = xmlStrdup(xml_text2xmlChar(version));
525 doc->standalone = -1;
529 buffer = xmlBufferCreate();
530 save = xmlSaveToBuffer(buffer, "UTF-8", 0);
531 xmlSaveDoc(save, doc);
536 result = cstring_to_xmltype((char *) pg_do_encoding_conversion((unsigned char *) xmlBufferContent(buffer),
537 xmlBufferLength(buffer),
539 GetDatabaseEncoding()));
540 xmlBufferFree(buffer);
550 * Validate document (given as string) against DTD (given as external link)
551 * TODO !!! use text instead of cstring for second arg
552 * TODO allow passing DTD as a string value (not only as an URI)
553 * TODO redesign (see comment with '!!!' below)
556 xmlvalidate(PG_FUNCTION_ARGS)
559 text *data = PG_GETARG_TEXT_P(0);
560 text *dtdOrUri = PG_GETARG_TEXT_P(1);
562 xmlParserCtxtPtr ctxt = NULL;
563 xmlDocPtr doc = NULL;
564 xmlDtdPtr dtd = NULL;
568 /* We use a PG_TRY block to ensure libxml is cleaned up on error */
571 ctxt = xmlNewParserCtxt();
573 xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
574 "could not allocate parser context", ctxt);
576 doc = xmlCtxtReadMemory(ctxt, (char *) VARDATA(data),
577 VARSIZE(data) - VARHDRSZ,
578 PG_XML_DEFAULT_URI, NULL, 0);
580 xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
581 "could not parse XML data", ctxt);
584 uri = xmlCreateURI();
585 elog(NOTICE, "dtd - %s", dtdOrUri);
586 dtd = palloc(sizeof(xmlDtdPtr));
587 uri = xmlParseURI(dtdOrUri);
589 xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
590 "not implemented yet... (TODO)", ctxt);
593 dtd = xmlParseDTD(NULL, xml_text2xmlChar(dtdOrUri));
596 xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
597 "could not load DTD", ctxt);
599 if (xmlValidateDtd(xmlNewValidCtxt(), doc, dtd) == 1)
603 xml_ereport(NOTICE, ERRCODE_INVALID_XML_DOCUMENT,
604 "validation against DTD failed", ctxt);
615 xmlFreeParserCtxt(ctxt);
629 xmlFreeParserCtxt(ctxt);
636 PG_RETURN_BOOL(result);
637 #else /* not USE_LIBXML */
640 #endif /* not USE_LIBXML */
645 xml_is_document(xmltype *arg)
649 xmlDocPtr doc = NULL;
650 MemoryContext ccxt = CurrentMemoryContext;
654 doc = xml_parse((text *) arg, true, true, NULL);
662 ecxt = MemoryContextSwitchTo(ccxt);
663 errdata = CopyErrorData();
664 if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
671 MemoryContextSwitchTo(ecxt);
681 #else /* not USE_LIBXML */
684 #endif /* not USE_LIBXML */
691 * Container for some init stuff (not good design!)
692 * TODO xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and check)
698 * Currently, we have no pure UTF-8 support for internals -- check
701 if (sizeof (char) != sizeof (xmlChar))
703 (errmsg("could not initialize XML library"),
704 errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
705 (int) sizeof(char), (int) sizeof(xmlChar))));
707 if (xml_err_buf == NULL)
709 /* First time through: create error buffer in permanent context */
710 MemoryContext oldcontext;
712 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
713 xml_err_buf = makeStringInfo();
714 MemoryContextSwitchTo(oldcontext);
718 /* Reset pre-existing buffer to empty */
719 xml_err_buf->data[0] = '\0';
720 xml_err_buf->len = 0;
722 /* Now that xml_err_buf exists, safe to call xml_errorHandler */
723 xmlSetGenericErrorFunc(NULL, xml_errorHandler);
727 * FIXME: This doesn't work because libxml assumes that whatever
728 * libxml allocates, only libxml will free, so we can't just drop
729 * memory contexts behind it. This needs to be refined.
731 xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
739 * SQL/XML allows storing "XML documents" or "XML content". "XML
740 * documents" are specified by the XML specification and are parsed
741 * easily by libxml. "XML content" is specified by SQL/XML as the
742 * production "XMLDecl? content". But libxml can only parse the
743 * "content" part, so we have to parse the XML declaration ourselves
747 #define CHECK_XML_SPACE(p) if (!xmlIsBlank_ch(*(p))) return XML_ERR_SPACE_REQUIRED
748 #define SKIP_XML_SPACE(p) while (xmlIsBlank_ch(*(p))) (p)++
751 parse_xml_decl(const xmlChar *str, size_t *lenp, xmlChar **version, xmlChar **encoding, int *standalone)
754 const xmlChar *save_p;
766 if (xmlStrncmp(p, (xmlChar *)"<?xml", 5) != 0)
774 if (xmlStrncmp(p, (xmlChar *)"version", 7) != 0)
775 return XML_ERR_VERSION_MISSING;
779 return XML_ERR_VERSION_MISSING;
783 if (*p == '\'' || *p == '"')
787 q = xmlStrchr(p + 1, *p);
789 return XML_ERR_VERSION_MISSING;
792 *version = xmlStrndup(p + 1, q - p - 1);
796 return XML_ERR_VERSION_MISSING;
801 if (xmlStrncmp(p, (xmlChar *)"encoding", 8) == 0)
803 CHECK_XML_SPACE(save_p);
807 return XML_ERR_MISSING_ENCODING;
811 if (*p == '\'' || *p == '"')
815 q = xmlStrchr(p + 1, *p);
817 return XML_ERR_MISSING_ENCODING;
820 *encoding = xmlStrndup(p + 1, q - p - 1);
824 return XML_ERR_MISSING_ENCODING;
834 if (xmlStrncmp(p, (xmlChar *)"standalone", 10) == 0)
836 CHECK_XML_SPACE(save_p);
840 return XML_ERR_STANDALONE_VALUE;
843 if (xmlStrncmp(p, (xmlChar *)"'yes'", 5) == 0 || xmlStrncmp(p, (xmlChar *)"\"yes\"", 5) == 0)
848 else if (xmlStrncmp(p, (xmlChar *)"'no'", 4) == 0 || xmlStrncmp(p, (xmlChar *)"\"no\"", 4) == 0)
854 return XML_ERR_STANDALONE_VALUE;
862 if (xmlStrncmp(p, (xmlChar *)"?>", 2) != 0)
863 return XML_ERR_XMLDECL_NOT_FINISHED;
869 for (p = str; p < str + len; p++)
871 return XML_ERR_INVALID_CHAR;
881 * Convert a C string to XML internal representation
883 * TODO maybe, libxml2's xmlreader is better? (do not construct DOM, yet do not use SAX - see xml_reader.c)
884 * TODO what about internal URI for docs? (see PG_XML_DEFAULT_URI below)
887 xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encoding)
892 xmlParserCtxtPtr ctxt = NULL;
893 xmlDocPtr doc = NULL;
895 len = VARSIZE(data) - VARHDRSZ; /* will be useful later */
896 string = xml_text2xmlChar(data);
898 utf8string = pg_do_encoding_conversion(string,
901 ? pg_char_to_encoding((char *) encoding)
902 : GetDatabaseEncoding(),
907 /* We use a PG_TRY block to ensure libxml is cleaned up on error */
910 ctxt = xmlNewParserCtxt();
912 xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
913 "could not allocate parser context", ctxt);
918 * Note, that here we try to apply DTD defaults
919 * (XML_PARSE_DTDATTR) according to SQL/XML:10.16.7.d:
920 * 'Default valies defined by internal DTD are applied'.
921 * As for external DTDs, we try to support them too, (see
924 doc = xmlCtxtReadDoc(ctxt, utf8string,
927 XML_PARSE_NOENT | XML_PARSE_DTDATTR
928 | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
930 xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
931 "invalid XML document", ctxt);
937 xmlChar *version = NULL;
940 doc = xmlNewDoc(NULL);
942 res_code = parse_xml_decl(utf8string, &count, &version, NULL, &standalone);
945 res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, utf8string + count, NULL);
947 xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
948 "invalid XML content", res_code);
950 doc->version = xmlStrdup(version);
951 doc->encoding = xmlStrdup((xmlChar *) "UTF-8");
952 doc->standalone = standalone;
956 xmlFreeParserCtxt(ctxt);
965 xmlFreeParserCtxt(ctxt);
977 * xmlChar<->text convertions
980 xml_text2xmlChar(text *in)
982 int32 len = VARSIZE(in) - VARHDRSZ;
985 res = palloc(len + 1);
986 memcpy(res, VARDATA(in), len);
995 * Wrappers for memory management functions
998 xml_palloc(size_t size)
1000 return palloc(size);
1005 xml_repalloc(void *ptr, size_t size)
1007 return repalloc(ptr, size);
1012 xml_pfree(void *ptr)
1019 xml_pstrdup(const char *string)
1021 return pstrdup(string);
1023 #endif /* NOT_USED */
1027 * Wrapper for "ereport" function.
1028 * Adds detail - libxml's native error message, if any.
1031 xml_ereport(int level, int sqlcode,
1032 const char *msg, void *ctxt)
1034 xmlErrorPtr libxmlErr = NULL;
1036 if (xml_err_buf->len > 0)
1039 (errmsg("%s", xml_err_buf->data)));
1040 xml_err_buf->data[0] = '\0';
1041 xml_err_buf->len = 0;
1045 libxmlErr = xmlCtxtGetLastError(ctxt);
1047 if (libxmlErr == NULL)
1051 errmsg("%s", msg)));
1055 /* as usual, libxml error message contains '\n'; get rid of it */
1059 xmlErrDetail = pstrdup(libxmlErr->message);
1060 xmlErrLen = strlen(xmlErrDetail);
1061 for (i = 0; i < xmlErrLen; i++)
1063 if (xmlErrDetail[i] == '\n')
1064 xmlErrDetail[i] = '.';
1069 errdetail("%s", xmlErrDetail)));
1075 * Error handler for libxml error messages
1078 xml_errorHandler(void *ctxt, const char *msg,...)
1080 /* Append the formatted text to xml_err_buf */
1086 /* Try to format the data. */
1087 va_start(args, msg);
1088 success = appendStringInfoVA(xml_err_buf, msg, args);
1094 /* Double the buffer size and try again. */
1095 enlargeStringInfo(xml_err_buf, xml_err_buf->maxlen);
1101 * Return error message by libxml error code
1102 * TODO make them closer to recommendations from Postgres manual
1105 xml_ereport_by_code(int level, int sqlcode,
1106 const char *msg, int code)
1110 if (xml_err_buf->len > 0)
1113 (errmsg("%s", xml_err_buf->data)));
1114 xml_err_buf->data[0] = '\0';
1115 xml_err_buf->len = 0;
1120 case XML_ERR_INTERNAL_ERROR:
1121 det = "libxml internal error";
1123 case XML_ERR_ENTITY_LOOP:
1124 det = "Detected an entity reference loop";
1126 case XML_ERR_ENTITY_NOT_STARTED:
1127 det = "EntityValue: \" or ' expected";
1129 case XML_ERR_ENTITY_NOT_FINISHED:
1130 det = "EntityValue: \" or ' expected";
1132 case XML_ERR_ATTRIBUTE_NOT_STARTED:
1133 det = "AttValue: \" or ' expected";
1135 case XML_ERR_LT_IN_ATTRIBUTE:
1136 det = "Unescaped '<' not allowed in attributes values";
1138 case XML_ERR_LITERAL_NOT_STARTED:
1139 det = "SystemLiteral \" or ' expected";
1141 case XML_ERR_LITERAL_NOT_FINISHED:
1142 det = "Unfinished System or Public ID \" or ' expected";
1144 case XML_ERR_MISPLACED_CDATA_END:
1145 det = "Sequence ']]>' not allowed in content";
1147 case XML_ERR_URI_REQUIRED:
1148 det = "SYSTEM or PUBLIC, the URI is missing";
1150 case XML_ERR_PUBID_REQUIRED:
1151 det = "PUBLIC, the Public Identifier is missing";
1153 case XML_ERR_HYPHEN_IN_COMMENT:
1154 det = "Comment must not contain '--' (double-hyphen)";
1156 case XML_ERR_PI_NOT_STARTED:
1157 det = "xmlParsePI : no target name";
1159 case XML_ERR_RESERVED_XML_NAME:
1160 det = "Invalid PI name";
1162 case XML_ERR_NOTATION_NOT_STARTED:
1163 det = "NOTATION: Name expected here";
1165 case XML_ERR_NOTATION_NOT_FINISHED:
1166 det = "'>' required to close NOTATION declaration";
1168 case XML_ERR_VALUE_REQUIRED:
1169 det = "Entity value required";
1171 case XML_ERR_URI_FRAGMENT:
1172 det = "Fragment not allowed";
1174 case XML_ERR_ATTLIST_NOT_STARTED:
1175 det = "'(' required to start ATTLIST enumeration";
1177 case XML_ERR_NMTOKEN_REQUIRED:
1178 det = "NmToken expected in ATTLIST enumeration";
1180 case XML_ERR_ATTLIST_NOT_FINISHED:
1181 det = "')' required to finish ATTLIST enumeration";
1183 case XML_ERR_MIXED_NOT_STARTED:
1184 det = "MixedContentDecl : '|' or ')*' expected";
1186 case XML_ERR_PCDATA_REQUIRED:
1187 det = "MixedContentDecl : '#PCDATA' expected";
1189 case XML_ERR_ELEMCONTENT_NOT_STARTED:
1190 det = "ContentDecl : Name or '(' expected";
1192 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
1193 det = "ContentDecl : ',' '|' or ')' expected";
1195 case XML_ERR_PEREF_IN_INT_SUBSET:
1196 det = "PEReference: forbidden within markup decl in internal subset";
1198 case XML_ERR_GT_REQUIRED:
1199 det = "Expected '>'";
1201 case XML_ERR_CONDSEC_INVALID:
1202 det = "XML conditional section '[' expected";
1204 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
1205 det = "Content error in the external subset";
1207 case XML_ERR_CONDSEC_INVALID_KEYWORD:
1208 det = "conditional section INCLUDE or IGNORE keyword expected";
1210 case XML_ERR_CONDSEC_NOT_FINISHED:
1211 det = "XML conditional section not closed";
1213 case XML_ERR_XMLDECL_NOT_STARTED:
1214 det = "Text declaration '<?xml' required";
1216 case XML_ERR_XMLDECL_NOT_FINISHED:
1217 det = "parsing XML declaration: '?>' expected";
1219 case XML_ERR_EXT_ENTITY_STANDALONE:
1220 det = "external parsed entities cannot be standalone";
1222 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
1223 det = "EntityRef: expecting ';'";
1225 case XML_ERR_DOCTYPE_NOT_FINISHED:
1226 det = "DOCTYPE improperly terminated";
1228 case XML_ERR_LTSLASH_REQUIRED:
1229 det = "EndTag: '</' not found";
1231 case XML_ERR_EQUAL_REQUIRED:
1232 det = "Expected '='";
1234 case XML_ERR_STRING_NOT_CLOSED:
1235 det = "String not closed expecting \" or '";
1237 case XML_ERR_STRING_NOT_STARTED:
1238 det = "String not started expecting ' or \"";
1240 case XML_ERR_ENCODING_NAME:
1241 det = "Invalid XML encoding name";
1243 case XML_ERR_STANDALONE_VALUE:
1244 det = "Standalone accepts only 'yes' or 'no'";
1246 case XML_ERR_DOCUMENT_EMPTY:
1247 det = "Document is empty";
1249 case XML_ERR_DOCUMENT_END:
1250 det = "Extra content at the end of the document";
1252 case XML_ERR_NOT_WELL_BALANCED:
1253 det = "Chunk is not well balanced";
1255 case XML_ERR_EXTRA_CONTENT:
1256 det = "Extra content at the end of well balanced chunk";
1258 case XML_ERR_VERSION_MISSING:
1259 det = "Malformed declaration expecting version";
1261 /* more err codes... Please, keep the order! */
1262 case XML_ERR_ATTRIBUTE_WITHOUT_VALUE: /* 41 */
1263 det ="Attribute without value";
1265 case XML_ERR_ATTRIBUTE_REDEFINED:
1266 det ="Attribute defined more than once in the same element";
1268 case XML_ERR_COMMENT_NOT_FINISHED: /* 45 */
1269 det = "Comment is not finished";
1271 case XML_ERR_NAME_REQUIRED: /* 68 */
1272 det = "Element name not found";
1274 case XML_ERR_TAG_NOT_FINISHED: /* 77 */
1275 det = "Closing tag not found";
1278 det = "Unrecognized libxml error code: %d";
1285 errdetail(det, code)));
1290 * Convert one char in the current server encoding to a Unicode codepoint.
1293 sqlchar_to_unicode(char *s)
1296 pg_wchar ret[2]; /* need space for trailing zero */
1298 utf8string = (char *) pg_do_encoding_conversion((unsigned char *) s,
1300 GetDatabaseEncoding(),
1303 pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret, pg_mblen(s));
1310 is_valid_xml_namefirst(pg_wchar c)
1312 /* (Letter | '_' | ':') */
1313 return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1314 || c == '_' || c == ':');
1319 is_valid_xml_namechar(pg_wchar c)
1321 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1322 return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1324 || c == '.' || c == '-' || c == '_' || c == ':'
1325 || xmlIsCombiningQ(c)
1326 || xmlIsExtenderQ(c));
1328 #endif /* USE_LIBXML */
1332 * Map SQL identifier to XML name; see SQL/XML:2003 section 9.1.
1335 map_sql_identifier_to_xml_name(char *ident, bool fully_escaped)
1341 initStringInfo(&buf);
1343 for (p = ident; *p; p += pg_mblen(p))
1345 if (*p == ':' && (p == ident || fully_escaped))
1346 appendStringInfo(&buf, "_x003A_");
1347 else if (*p == '_' && *(p+1) == 'x')
1348 appendStringInfo(&buf, "_x005F_");
1349 else if (fully_escaped && p == ident &&
1350 pg_strncasecmp(p, "xml", 3) == 0)
1353 appendStringInfo(&buf, "_x0078_");
1355 appendStringInfo(&buf, "_x0058_");
1359 pg_wchar u = sqlchar_to_unicode(p);
1362 ? !is_valid_xml_namefirst(u)
1363 : !is_valid_xml_namechar(u))
1364 appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
1366 appendBinaryStringInfo(&buf, p, pg_mblen(p));
1371 #else /* not USE_LIBXML */
1374 #endif /* not USE_LIBXML */
1379 * Map a Unicode codepoint into the current server encoding.
1382 unicode_to_sqlchar(pg_wchar c)
1384 static unsigned char utf8string[4];
1390 else if (c <= 0x7FF)
1392 utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
1393 utf8string[1] = 0x80 | (c & 0x3F);
1395 else if (c <= 0xFFFF)
1397 utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
1398 utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
1399 utf8string[2] = 0x80 | (c & 0x3F);
1403 utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
1404 utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
1405 utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
1406 utf8string[3] = 0x80 | (c & 0x3F);
1409 return (char *) pg_do_encoding_conversion(utf8string,
1410 pg_mblen((char *) utf8string),
1412 GetDatabaseEncoding());
1417 * Map XML name to SQL identifier; see SQL/XML:2003 section 9.17.
1420 map_xml_name_to_sql_identifier(char *name)
1425 initStringInfo(&buf);
1427 for (p = name; *p; p += pg_mblen(p))
1429 if (*p == '_' && *(p+1) == 'x'
1430 && isxdigit((unsigned char) *(p+2))
1431 && isxdigit((unsigned char) *(p+3))
1432 && isxdigit((unsigned char) *(p+4))
1433 && isxdigit((unsigned char) *(p+5))
1438 sscanf(p + 2, "%X", &u);
1439 appendStringInfoString(&buf, unicode_to_sqlchar(u));
1443 appendBinaryStringInfo(&buf, p, pg_mblen(p));
1451 * Map SQL value to XML value; see SQL/XML:2003 section 9.16.
1454 map_sql_value_to_xml_value(Datum value, Oid type)
1458 initStringInfo(&buf);
1460 if (is_array_type(type))
1469 array = DatumGetArrayTypeP(value);
1471 /* TODO: need some code-fu here to remove this limitation */
1472 if (ARR_NDIM(array) != 1)
1474 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1475 errmsg("only supported for one-dimensional array")));
1477 elmtype = ARR_ELEMTYPE(array);
1478 get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
1480 for (i = ARR_LBOUND(array)[0];
1481 i < ARR_LBOUND(array)[0] + ARR_DIMS(array)[0];
1487 subval = array_ref(array, 1, &i, -1, elmlen, elmbyval, elmalign, &isnull);
1488 appendStringInfoString(&buf, "<element>");
1489 appendStringInfoString(&buf, map_sql_value_to_xml_value(subval, elmtype));
1490 appendStringInfoString(&buf, "</element>");
1499 getTypeOutputInfo(type, &typeOut, &isvarlena);
1500 str = OidOutputFunctionCall(typeOut, value);
1506 if (type == BYTEAOID)
1509 xmlTextWriterPtr writer;
1512 buf = xmlBufferCreate();
1513 writer = xmlNewTextWriterMemory(buf, 0);
1515 if (xmlbinary == XMLBINARY_BASE64)
1516 xmlTextWriterWriteBase64(writer, VARDATA(value), 0, VARSIZE(value) - VARHDRSZ);
1518 xmlTextWriterWriteBinHex(writer, VARDATA(value), 0, VARSIZE(value) - VARHDRSZ);
1520 xmlFreeTextWriter(writer);
1521 result = pstrdup((const char *) xmlBufferContent(buf));
1525 #endif /* USE_LIBXML */
1527 for (p = str; *p; p += pg_mblen(p))
1532 appendStringInfo(&buf, "&");
1535 appendStringInfo(&buf, "<");
1538 appendStringInfo(&buf, ">");
1541 appendStringInfo(&buf, "
");
1544 appendBinaryStringInfo(&buf, p, pg_mblen(p));