1 /*-------------------------------------------------------------------------
4 * XML data type support.
7 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.27 2007/02/11 22:18:15 petere Exp $
12 *-------------------------------------------------------------------------
16 * Generally, XML type support is only available when libxml use was
17 * configured during the build. But even if that is not done, the
18 * type and all the functions are available, but most of them will
19 * fail. For one thing, this avoids having to manage variant catalog
20 * installations. But it also has nice effects such as that you can
21 * dump a database containing XML type data even if the server is not
22 * linked with libxml. Thus, make sure xml_out() works even if nothing
27 * Note on memory management: Via callbacks, libxml is told to use
28 * palloc and friends for memory management. Sometimes, libxml
29 * allocates global structures in the hope that it can reuse them
30 * later on, but if "later" is much later, the memory context
31 * management of PostgreSQL will have blown those structures away
32 * without telling libxml about it. Therefore, it is important to
33 * call xmlCleanupParser() or perhaps some other cleanup function
34 * after using such functions, for example something from
35 * libxml/parser.h or libxml/xmlsave.h. Unfortunately, you cannot
36 * readily tell from the API documentation when that happens, so
37 * careful evaluation is necessary when introducing new libxml APIs
44 #include <libxml/chvalid.h>
45 #include <libxml/parser.h>
46 #include <libxml/tree.h>
47 #include <libxml/uri.h>
48 #include <libxml/xmlerror.h>
49 #include <libxml/xmlwriter.h>
50 #endif /* USE_LIBXML */
52 #include "catalog/pg_type.h"
53 #include "executor/executor.h"
55 #include "libpq/pqformat.h"
56 #include "mb/pg_wchar.h"
57 #include "nodes/execnodes.h"
58 #include "parser/parse_expr.h"
59 #include "utils/array.h"
60 #include "utils/builtins.h"
61 #include "utils/lsyscache.h"
62 #include "utils/memutils.h"
63 #include "utils/xml.h"
68 static StringInfo xml_err_buf = NULL;
70 static void xml_init(void);
71 static void *xml_palloc(size_t size);
72 static void *xml_repalloc(void *ptr, size_t size);
73 static void xml_pfree(void *ptr);
74 static char *xml_pstrdup(const char *string);
75 static void xml_ereport(int level, int sqlcode,
77 static void xml_errorHandler(void *ctxt, const char *msg, ...);
78 static void xml_ereport_by_code(int level, int sqlcode,
79 const char *msg, int errcode);
80 static xmlChar *xml_text2xmlChar(text *in);
81 static int parse_xml_decl(const xmlChar *str, size_t *lenp, xmlChar **version, xmlChar **encoding, int *standalone);
82 static bool print_xml_decl(StringInfo buf, const xmlChar *version, pg_enc encoding, int standalone);
83 static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, xmlChar *encoding);
85 #endif /* USE_LIBXML */
87 XmlBinaryType xmlbinary;
88 XmlOptionType xmloption;
91 #define NO_XML_SUPPORT() \
93 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
94 errmsg("no XML support in this installation")))
98 xml_in(PG_FUNCTION_ARGS)
101 char *s = PG_GETARG_CSTRING(0);
107 vardata = palloc(len + VARHDRSZ);
108 VARATT_SIZEP(vardata) = len + VARHDRSZ;
109 memcpy(VARDATA(vardata), s, len);
112 * Parse the data to check if it is well-formed XML data. Assume
113 * that ERROR occurred if parsing failed.
115 doc = xml_parse(vardata, xmloption, true, NULL);
118 PG_RETURN_XML_P(vardata);
126 #define PG_XML_DEFAULT_VERSION "1.0"
130 xml_out_internal(xmltype *x, pg_enc target_encoding)
141 len = VARSIZE(x) - VARHDRSZ;
142 str = palloc(len + 1);
143 memcpy(str, VARDATA(x), len);
147 if ((res_code = parse_xml_decl((xmlChar *) str, &len, &version, &encoding, &standalone)) == 0)
151 initStringInfo(&buf);
153 if (!print_xml_decl(&buf, version, target_encoding, standalone))
156 * If we are not going to produce an XML declaration, eat
157 * a single newline in the original string to prevent
158 * empty first lines in the output.
160 if (*(str + len) == '\n')
163 appendStringInfoString(&buf, str + len);
168 xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
169 "could not parse XML declaration in stored value", res_code);
176 xml_out(PG_FUNCTION_ARGS)
178 xmltype *x = PG_GETARG_XML_P(0);
181 * xml_out removes the encoding property in all cases. This is
182 * because we cannot control from here whether the datum will be
183 * converted to a different client encoding, so we'd do more harm
184 * than good by including it.
186 PG_RETURN_CSTRING(xml_out_internal(x, 0));
191 xml_recv(PG_FUNCTION_ARGS)
194 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
200 xmlChar *encoding = NULL;
202 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
204 result = palloc(nbytes + VARHDRSZ);
205 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
206 memcpy(VARDATA(result), str, nbytes);
208 parse_xml_decl((xmlChar *) str, NULL, NULL, &encoding, NULL);
211 * Parse the data to check if it is well-formed XML data. Assume
212 * that ERROR occurred if parsing failed.
214 doc = xml_parse(result, xmloption, true, encoding);
217 newstr = (char *) pg_do_encoding_conversion((unsigned char *) str,
219 encoding ? pg_char_to_encoding((char *) encoding) : PG_UTF8,
220 GetDatabaseEncoding());
228 nbytes = strlen(newstr);
230 result = palloc(nbytes + VARHDRSZ);
231 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
232 memcpy(VARDATA(result), newstr, nbytes);
235 PG_RETURN_XML_P(result);
244 xml_send(PG_FUNCTION_ARGS)
246 xmltype *x = PG_GETARG_XML_P(0);
247 char *outval = xml_out_internal(x, pg_get_client_encoding());
250 pq_begintypsend(&buf);
251 pq_sendstring(&buf, outval);
252 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
258 appendStringInfoText(StringInfo str, const text *t)
260 appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
265 stringinfo_to_xmltype(StringInfo buf)
270 len = buf->len + VARHDRSZ;
271 result = palloc(len);
272 VARATT_SIZEP(result) = len;
273 memcpy(VARDATA(result), buf->data, buf->len);
281 cstring_to_xmltype(const char *string)
286 len = strlen(string) + VARHDRSZ;
287 result = palloc(len);
288 VARATT_SIZEP(result) = len;
289 memcpy(VARDATA(result), string, len - VARHDRSZ);
297 xmlBuffer_to_xmltype(xmlBufferPtr buf)
302 len = xmlBufferLength(buf) + VARHDRSZ;
303 result = palloc(len);
304 VARATT_SIZEP(result) = len;
305 memcpy(VARDATA(result), xmlBufferContent(buf), len - VARHDRSZ);
313 xmlcomment(PG_FUNCTION_ARGS)
316 text *arg = PG_GETARG_TEXT_P(0);
317 int len = VARSIZE(arg) - VARHDRSZ;
321 /* check for "--" in string or "-" at the end */
322 for (i = 1; i < len; i++)
323 if ((VARDATA(arg)[i] == '-' && VARDATA(arg)[i - 1] == '-')
324 || (VARDATA(arg)[i] == '-' && i == len - 1))
326 (errcode(ERRCODE_INVALID_XML_COMMENT),
327 errmsg("invalid XML comment")));
329 initStringInfo(&buf);
330 appendStringInfo(&buf, "<!--");
331 appendStringInfoText(&buf, arg);
332 appendStringInfo(&buf, "-->");
334 PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
344 * TODO: xmlconcat needs to merge the notations and unparsed entities
345 * of the argument values. Not very important in practice, though.
348 xmlconcat(List *args)
354 int global_standalone = 1;
355 xmlChar *global_version = NULL;
356 bool global_version_no_value = false;
358 initStringInfo(&buf);
364 xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
367 len = VARSIZE(x) - VARHDRSZ;
368 str = palloc(len + 1);
369 memcpy(str, VARDATA(x), len);
372 parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
374 if (standalone == 0 && global_standalone == 1)
375 global_standalone = 0;
377 global_standalone = -1;
380 global_version_no_value = true;
381 else if (!global_version)
382 global_version = xmlStrdup(version);
383 else if (xmlStrcmp(version, global_version) != 0)
384 global_version_no_value = true;
386 appendStringInfoString(&buf, str + len);
390 if (!global_version_no_value || global_standalone >= 0)
394 initStringInfo(&buf2);
396 print_xml_decl(&buf2,
397 (!global_version_no_value && global_version) ? global_version : NULL,
401 appendStringInfoString(&buf2, buf.data);
405 return stringinfo_to_xmltype(&buf);
417 xmlconcat2(PG_FUNCTION_ARGS)
424 PG_RETURN_XML_P(PG_GETARG_XML_P(1));
426 else if (PG_ARGISNULL(1))
427 PG_RETURN_XML_P(PG_GETARG_XML_P(0));
429 PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0), PG_GETARG_XML_P(1))));
434 texttoxml(PG_FUNCTION_ARGS)
436 text *data = PG_GETARG_TEXT_P(0);
438 PG_RETURN_XML_P(xmlparse(data, xmloption, true));
443 xmltotext(PG_FUNCTION_ARGS)
445 xmltype *data = PG_GETARG_XML_P(0);
447 PG_RETURN_TEXT_P(xmltotext_with_xmloption(data, xmloption));
452 xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
454 if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
456 (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
457 errmsg("not an XML document")));
459 /* It's actually binary compatible, save for the above check. */
460 return (text *) data;
465 xmlelement(XmlExprState *xmlExpr, ExprContext *econtext)
468 XmlExpr *xexpr = (XmlExpr *) xmlExpr->xprstate.expr;
478 xmlTextWriterPtr writer;
480 buf = xmlBufferCreate();
481 writer = xmlNewTextWriterMemory(buf, 0);
483 xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
486 forboth(arg, xmlExpr->named_args, narg, xexpr->arg_names)
488 ExprState *e = (ExprState *) lfirst(arg);
489 char *argname = strVal(lfirst(narg));
491 value = ExecEvalExpr(e, econtext, &isnull, NULL);
494 str = OutputFunctionCall(&xmlExpr->named_outfuncs[i], value);
495 xmlTextWriterWriteAttribute(writer, (xmlChar *) argname, (xmlChar *) str);
501 foreach(arg, xmlExpr->args)
503 ExprState *e = (ExprState *) lfirst(arg);
505 value = ExecEvalExpr(e, econtext, &isnull, NULL);
507 xmlTextWriterWriteRaw(writer, (xmlChar *) map_sql_value_to_xml_value(value, exprType((Node *) e->expr)));
510 xmlTextWriterEndElement(writer);
511 xmlFreeTextWriter(writer);
513 result = xmlBuffer_to_xmltype(buf);
524 xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
529 doc = xml_parse(data, xmloption_arg, preserve_whitespace, NULL);
532 return (xmltype *) data;
541 xmlpi(char *target, text *arg, bool arg_is_null, bool *result_is_null)
547 if (pg_strncasecmp(target, "xml", 3) == 0)
549 (errcode(ERRCODE_SYNTAX_ERROR), /* really */
550 errmsg("invalid XML processing instruction"),
551 errdetail("XML processing instruction target name cannot start with \"xml\".")));
554 * Following the SQL standard, the null check comes after the
555 * syntax check above.
557 *result_is_null = arg_is_null;
561 initStringInfo(&buf);
563 appendStringInfo(&buf, "<?%s", target);
569 string = DatumGetCString(DirectFunctionCall1(textout,
570 PointerGetDatum(arg)));
571 if (strstr(string, "?>") != NULL)
573 (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
574 errmsg("invalid XML processing instruction"),
575 errdetail("XML processing instruction cannot contain \"?>\".")));
577 appendStringInfoChar(&buf, ' ');
578 appendStringInfoString(&buf, string + strspn(string, " "));
581 appendStringInfoString(&buf, "?>");
583 result = stringinfo_to_xmltype(&buf);
594 xmlroot(xmltype *data, text *version, int standalone)
599 xmlChar *orig_version;
603 len = VARSIZE(data) - VARHDRSZ;
604 str = palloc(len + 1);
605 memcpy(str, VARDATA(data), len);
608 parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
611 orig_version = xml_text2xmlChar(version);
617 case XML_STANDALONE_YES:
620 case XML_STANDALONE_NO:
623 case XML_STANDALONE_NO_VALUE:
624 orig_standalone = -1;
626 case XML_STANDALONE_OMITTED:
627 /* leave original value */
631 initStringInfo(&buf);
632 print_xml_decl(&buf, orig_version, 0, orig_standalone);
633 appendStringInfoString(&buf, str + len);
635 return stringinfo_to_xmltype(&buf);
644 * Validate document (given as string) against DTD (given as external link)
645 * TODO !!! use text instead of cstring for second arg
646 * TODO allow passing DTD as a string value (not only as an URI)
647 * TODO redesign (see comment with '!!!' below)
650 xmlvalidate(PG_FUNCTION_ARGS)
653 text *data = PG_GETARG_TEXT_P(0);
654 text *dtdOrUri = PG_GETARG_TEXT_P(1);
656 xmlParserCtxtPtr ctxt = NULL;
657 xmlDocPtr doc = NULL;
658 xmlDtdPtr dtd = NULL;
662 /* We use a PG_TRY block to ensure libxml is cleaned up on error */
665 ctxt = xmlNewParserCtxt();
667 xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
668 "could not allocate parser context");
670 doc = xmlCtxtReadMemory(ctxt, (char *) VARDATA(data),
671 VARSIZE(data) - VARHDRSZ,
674 xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
675 "could not parse XML data");
678 uri = xmlCreateURI();
679 elog(NOTICE, "dtd - %s", dtdOrUri);
680 dtd = palloc(sizeof(xmlDtdPtr));
681 uri = xmlParseURI(dtdOrUri);
683 xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
684 "not implemented yet... (TODO)");
687 dtd = xmlParseDTD(NULL, xml_text2xmlChar(dtdOrUri));
690 xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
691 "could not load DTD");
693 if (xmlValidateDtd(xmlNewValidCtxt(), doc, dtd) == 1)
697 xml_ereport(NOTICE, ERRCODE_INVALID_XML_DOCUMENT,
698 "validation against DTD failed");
709 xmlFreeParserCtxt(ctxt);
723 xmlFreeParserCtxt(ctxt);
730 PG_RETURN_BOOL(result);
731 #else /* not USE_LIBXML */
734 #endif /* not USE_LIBXML */
739 xml_is_document(xmltype *arg)
743 xmlDocPtr doc = NULL;
744 MemoryContext ccxt = CurrentMemoryContext;
748 doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true, NULL);
756 ecxt = MemoryContextSwitchTo(ccxt);
757 errdata = CopyErrorData();
758 if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
765 MemoryContextSwitchTo(ecxt);
775 #else /* not USE_LIBXML */
778 #endif /* not USE_LIBXML */
785 * Container for some init stuff (not good design!)
786 * TODO xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and check)
792 * Currently, we have no pure UTF-8 support for internals -- check
795 if (sizeof (char) != sizeof (xmlChar))
797 (errmsg("could not initialize XML library"),
798 errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
799 (int) sizeof(char), (int) sizeof(xmlChar))));
801 if (xml_err_buf == NULL)
803 /* First time through: create error buffer in permanent context */
804 MemoryContext oldcontext;
806 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
807 xml_err_buf = makeStringInfo();
808 MemoryContextSwitchTo(oldcontext);
812 /* Reset pre-existing buffer to empty */
813 xml_err_buf->data[0] = '\0';
814 xml_err_buf->len = 0;
816 /* Now that xml_err_buf exists, safe to call xml_errorHandler */
817 xmlSetGenericErrorFunc(NULL, xml_errorHandler);
819 xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
827 * SQL/XML allows storing "XML documents" or "XML content". "XML
828 * documents" are specified by the XML specification and are parsed
829 * easily by libxml. "XML content" is specified by SQL/XML as the
830 * production "XMLDecl? content". But libxml can only parse the
831 * "content" part, so we have to parse the XML declaration ourselves
835 #define CHECK_XML_SPACE(p) if (!xmlIsBlank_ch(*(p))) return XML_ERR_SPACE_REQUIRED
836 #define SKIP_XML_SPACE(p) while (xmlIsBlank_ch(*(p))) (p)++
839 parse_xml_decl(const xmlChar *str, size_t *lenp, xmlChar **version, xmlChar **encoding, int *standalone)
842 const xmlChar *save_p;
854 if (xmlStrncmp(p, (xmlChar *)"<?xml", 5) != 0)
862 if (xmlStrncmp(p, (xmlChar *)"version", 7) != 0)
863 return XML_ERR_VERSION_MISSING;
867 return XML_ERR_VERSION_MISSING;
871 if (*p == '\'' || *p == '"')
875 q = xmlStrchr(p + 1, *p);
877 return XML_ERR_VERSION_MISSING;
880 *version = xmlStrndup(p + 1, q - p - 1);
884 return XML_ERR_VERSION_MISSING;
889 if (xmlStrncmp(p, (xmlChar *)"encoding", 8) == 0)
891 CHECK_XML_SPACE(save_p);
895 return XML_ERR_MISSING_ENCODING;
899 if (*p == '\'' || *p == '"')
903 q = xmlStrchr(p + 1, *p);
905 return XML_ERR_MISSING_ENCODING;
908 *encoding = xmlStrndup(p + 1, q - p - 1);
912 return XML_ERR_MISSING_ENCODING;
922 if (xmlStrncmp(p, (xmlChar *)"standalone", 10) == 0)
924 CHECK_XML_SPACE(save_p);
928 return XML_ERR_STANDALONE_VALUE;
931 if (xmlStrncmp(p, (xmlChar *)"'yes'", 5) == 0 || xmlStrncmp(p, (xmlChar *)"\"yes\"", 5) == 0)
936 else if (xmlStrncmp(p, (xmlChar *)"'no'", 4) == 0 || xmlStrncmp(p, (xmlChar *)"\"no\"", 4) == 0)
942 return XML_ERR_STANDALONE_VALUE;
950 if (xmlStrncmp(p, (xmlChar *)"?>", 2) != 0)
951 return XML_ERR_XMLDECL_NOT_FINISHED;
957 for (p = str; p < str + len; p++)
959 return XML_ERR_INVALID_CHAR;
969 * Write an XML declaration. On output, we adjust the XML declaration
970 * as follows. (These rules are the moral equivalent of the clause
971 * "Serialization of an XML value" in the SQL standard.)
973 * We try to avoid generating an XML declaration if possible. This is
974 * so that you don't get trivial things like xml '<foo/>' resulting in
975 * '<?xml version="1.0"?><foo/>', which would surely be annoying. We
976 * must provide a declaration if the standalone property is specified
977 * or if we include an encoding declaration. If we have a
978 * declaration, we must specify a version (XML requires this).
979 * Otherwise we only make a declaration if the version is not "1.0",
980 * which is the default version specified in SQL:2003.
983 print_xml_decl(StringInfo buf, const xmlChar *version, pg_enc encoding, int standalone)
985 if ((version && strcmp((char *) version, PG_XML_DEFAULT_VERSION) != 0)
986 || (encoding && encoding != PG_UTF8)
989 appendStringInfoString(buf, "<?xml");
992 appendStringInfo(buf, " version=\"%s\"", version);
994 appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
996 if (encoding && encoding != PG_UTF8)
997 /* XXX might be useful to convert this to IANA names
998 * (ISO-8859-1 instead of LATIN1 etc.); needs field
1000 appendStringInfo(buf, " encoding=\"%s\"", pg_encoding_to_char(encoding));
1002 if (standalone == 1)
1003 appendStringInfoString(buf, " standalone=\"yes\"");
1004 else if (standalone == 0)
1005 appendStringInfoString(buf, " standalone=\"no\"");
1006 appendStringInfoString(buf, "?>");
1016 * Convert a C string to XML internal representation
1018 * TODO maybe, libxml2's xmlreader is better? (do not construct DOM, yet do not use SAX - see xml_reader.c)
1021 xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, xmlChar *encoding)
1025 xmlChar *utf8string;
1026 xmlParserCtxtPtr ctxt = NULL;
1027 xmlDocPtr doc = NULL;
1029 len = VARSIZE(data) - VARHDRSZ; /* will be useful later */
1030 string = xml_text2xmlChar(data);
1032 utf8string = pg_do_encoding_conversion(string,
1035 ? pg_char_to_encoding((char *) encoding)
1036 : GetDatabaseEncoding(),
1041 /* We use a PG_TRY block to ensure libxml is cleaned up on error */
1044 ctxt = xmlNewParserCtxt();
1046 xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
1047 "could not allocate parser context");
1049 if (xmloption_arg == XMLOPTION_DOCUMENT)
1052 * Note, that here we try to apply DTD defaults
1053 * (XML_PARSE_DTDATTR) according to SQL/XML:10.16.7.d:
1054 * 'Default valies defined by internal DTD are applied'.
1055 * As for external DTDs, we try to support them too, (see
1056 * SQL/XML:10.16.7.e)
1058 doc = xmlCtxtReadDoc(ctxt, utf8string,
1061 XML_PARSE_NOENT | XML_PARSE_DTDATTR
1062 | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
1064 xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
1065 "invalid XML document");
1071 xmlChar *version = NULL;
1072 int standalone = -1;
1074 doc = xmlNewDoc(NULL);
1076 res_code = parse_xml_decl(utf8string, &count, &version, NULL, &standalone);
1078 xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
1079 "invalid XML content: invalid XML declaration", res_code);
1081 res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, utf8string + count, NULL);
1083 xml_ereport(ERROR, ERRCODE_INVALID_XML_CONTENT,
1084 "invalid XML content");
1086 doc->version = xmlStrdup(version);
1087 doc->encoding = xmlStrdup((xmlChar *) "UTF-8");
1088 doc->standalone = standalone;
1092 xmlFreeParserCtxt(ctxt);
1101 xmlFreeParserCtxt(ctxt);
1113 * xmlChar<->text convertions
1116 xml_text2xmlChar(text *in)
1118 int32 len = VARSIZE(in) - VARHDRSZ;
1121 res = palloc(len + 1);
1122 memcpy(res, VARDATA(in), len);
1130 * Wrappers for memory management functions
1133 xml_palloc(size_t size)
1135 return palloc(size);
1140 xml_repalloc(void *ptr, size_t size)
1142 return repalloc(ptr, size);
1147 xml_pfree(void *ptr)
1154 xml_pstrdup(const char *string)
1156 return pstrdup(string);
1161 * Wrapper for "ereport" function for XML-related errors. The "msg"
1162 * is the SQL-level message; some can be adopted from the SQL/XML
1163 * standard. This function adds libxml's native error messages, if
1167 xml_ereport(int level, int sqlcode,
1172 if (xml_err_buf->len > 0)
1174 detail = pstrdup(xml_err_buf->data);
1175 xml_err_buf->data[0] = '\0';
1176 xml_err_buf->len = 0;
1181 /* libxml error messages end in '\n'; get rid of it */
1186 len = strlen(detail);
1187 if (len > 0 && detail[len-1] == '\n')
1188 detail[len-1] = '\0';
1193 errdetail("%s", detail)));
1199 errmsg("%s", msg)));
1205 * Error handler for libxml error messages
1208 xml_errorHandler(void *ctxt, const char *msg,...)
1210 /* Append the formatted text to xml_err_buf */
1216 /* Try to format the data. */
1217 va_start(args, msg);
1218 success = appendStringInfoVA(xml_err_buf, msg, args);
1224 /* Double the buffer size and try again. */
1225 enlargeStringInfo(xml_err_buf, xml_err_buf->maxlen);
1231 * Wrapper for "ereport" function for XML-related errors. The "msg"
1232 * is the SQL-level message; some can be adopted from the SQL/XML
1233 * standard. This function uses "code" to create a textual detail
1234 * message. At the moment, we only need to cover those codes that we
1235 * may raise in this file.
1238 xml_ereport_by_code(int level, int sqlcode,
1239 const char *msg, int code)
1245 case XML_ERR_INVALID_CHAR:
1246 det = "Invalid character value";
1248 case XML_ERR_SPACE_REQUIRED:
1249 det = "Space required";
1251 case XML_ERR_STANDALONE_VALUE:
1252 det = "standalone accepts only 'yes' or 'no'";
1254 case XML_ERR_VERSION_MISSING:
1255 det = "Malformed declaration expecting version";
1257 case XML_ERR_MISSING_ENCODING:
1258 det = "Missing encoding in text declaration";
1260 case XML_ERR_XMLDECL_NOT_FINISHED:
1261 det = "Parsing XML declaration: '?>' expected";
1264 det = "Unrecognized libxml error code: %d";
1271 errdetail(det, code)));
1276 * Convert one char in the current server encoding to a Unicode codepoint.
1279 sqlchar_to_unicode(char *s)
1282 pg_wchar ret[2]; /* need space for trailing zero */
1284 utf8string = (char *) pg_do_encoding_conversion((unsigned char *) s,
1286 GetDatabaseEncoding(),
1289 pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret, pg_mblen(s));
1296 is_valid_xml_namefirst(pg_wchar c)
1298 /* (Letter | '_' | ':') */
1299 return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1300 || c == '_' || c == ':');
1305 is_valid_xml_namechar(pg_wchar c)
1307 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1308 return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1310 || c == '.' || c == '-' || c == '_' || c == ':'
1311 || xmlIsCombiningQ(c)
1312 || xmlIsExtenderQ(c));
1314 #endif /* USE_LIBXML */
1318 * Map SQL identifier to XML name; see SQL/XML:2003 section 9.1.
1321 map_sql_identifier_to_xml_name(char *ident, bool fully_escaped, bool escape_period)
1324 * SQL/XML doesn't make use of this case anywhere, so it's
1325 * probably a mistake.
1327 Assert(fully_escaped || !escape_period);
1333 initStringInfo(&buf);
1335 for (p = ident; *p; p += pg_mblen(p))
1337 if (*p == ':' && (p == ident || fully_escaped))
1338 appendStringInfo(&buf, "_x003A_");
1339 else if (*p == '_' && *(p+1) == 'x')
1340 appendStringInfo(&buf, "_x005F_");
1341 else if (fully_escaped && p == ident &&
1342 pg_strncasecmp(p, "xml", 3) == 0)
1345 appendStringInfo(&buf, "_x0078_");
1347 appendStringInfo(&buf, "_x0058_");
1349 else if (escape_period && *p == '.')
1350 appendStringInfo(&buf, "_x002E_");
1353 pg_wchar u = sqlchar_to_unicode(p);
1356 ? !is_valid_xml_namefirst(u)
1357 : !is_valid_xml_namechar(u))
1358 appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
1360 appendBinaryStringInfo(&buf, p, pg_mblen(p));
1365 #else /* not USE_LIBXML */
1368 #endif /* not USE_LIBXML */
1373 * Map a Unicode codepoint into the current server encoding.
1376 unicode_to_sqlchar(pg_wchar c)
1378 static unsigned char utf8string[5]; /* need trailing zero */
1384 else if (c <= 0x7FF)
1386 utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
1387 utf8string[1] = 0x80 | (c & 0x3F);
1389 else if (c <= 0xFFFF)
1391 utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
1392 utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
1393 utf8string[2] = 0x80 | (c & 0x3F);
1397 utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
1398 utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
1399 utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
1400 utf8string[3] = 0x80 | (c & 0x3F);
1403 return (char *) pg_do_encoding_conversion(utf8string,
1404 pg_mblen((char *) utf8string),
1406 GetDatabaseEncoding());
1411 * Map XML name to SQL identifier; see SQL/XML:2003 section 9.17.
1414 map_xml_name_to_sql_identifier(char *name)
1419 initStringInfo(&buf);
1421 for (p = name; *p; p += pg_mblen(p))
1423 if (*p == '_' && *(p+1) == 'x'
1424 && isxdigit((unsigned char) *(p+2))
1425 && isxdigit((unsigned char) *(p+3))
1426 && isxdigit((unsigned char) *(p+4))
1427 && isxdigit((unsigned char) *(p+5))
1432 sscanf(p + 2, "%X", &u);
1433 appendStringInfoString(&buf, unicode_to_sqlchar(u));
1437 appendBinaryStringInfo(&buf, p, pg_mblen(p));
1445 * Map SQL value to XML value; see SQL/XML:2003 section 9.16.
1448 map_sql_value_to_xml_value(Datum value, Oid type)
1452 initStringInfo(&buf);
1454 if (is_array_type(type))
1463 array = DatumGetArrayTypeP(value);
1465 /* TODO: need some code-fu here to remove this limitation */
1466 if (ARR_NDIM(array) != 1)
1468 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1469 errmsg("only supported for one-dimensional array")));
1471 elmtype = ARR_ELEMTYPE(array);
1472 get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
1474 for (i = ARR_LBOUND(array)[0];
1475 i < ARR_LBOUND(array)[0] + ARR_DIMS(array)[0];
1481 subval = array_ref(array, 1, &i, -1, elmlen, elmbyval, elmalign, &isnull);
1482 appendStringInfoString(&buf, "<element>");
1483 appendStringInfoString(&buf, map_sql_value_to_xml_value(subval, elmtype));
1484 appendStringInfoString(&buf, "</element>");
1493 if (type == BOOLOID)
1495 if (DatumGetBool(value))
1501 getTypeOutputInfo(type, &typeOut, &isvarlena);
1502 str = OidOutputFunctionCall(typeOut, value);
1508 if (type == BYTEAOID)
1511 xmlTextWriterPtr writer;
1514 buf = xmlBufferCreate();
1515 writer = xmlNewTextWriterMemory(buf, 0);
1517 if (xmlbinary == XMLBINARY_BASE64)
1518 xmlTextWriterWriteBase64(writer, VARDATA(value), 0, VARSIZE(value) - VARHDRSZ);
1520 xmlTextWriterWriteBinHex(writer, VARDATA(value), 0, VARSIZE(value) - VARHDRSZ);
1522 xmlFreeTextWriter(writer);
1523 result = pstrdup((const char *) xmlBufferContent(buf));
1527 #endif /* USE_LIBXML */
1529 for (p = str; *p; p += pg_mblen(p))
1534 appendStringInfo(&buf, "&");
1537 appendStringInfo(&buf, "<");
1540 appendStringInfo(&buf, ">");
1543 appendStringInfo(&buf, "
");
1546 appendBinaryStringInfo(&buf, p, pg_mblen(p));