1 /*-------------------------------------------------------------------------
4 * XML data type support.
7 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.14 2007/01/10 20:33:54 petere Exp $
12 *-------------------------------------------------------------------------
16 * Generally, XML type support is only available when libxml use was
17 * configured during the build. But even if that is not done, the
18 * type and all the functions are available, but most of them will
19 * fail. For one thing, this avoids having to manage variant catalog
20 * installations. But it also has nice effects such as that you can
21 * dump a database containing XML type data even if the server is not
22 * linked with libxml. Thus, make sure xml_out() works even if nothing
29 #include <libxml/chvalid.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
32 #include <libxml/uri.h>
33 #include <libxml/xmlerror.h>
34 #include <libxml/xmlsave.h>
35 #include <libxml/xmlwriter.h>
36 #endif /* USE_LIBXML */
38 #include "executor/executor.h"
40 #include "libpq/pqformat.h"
41 #include "mb/pg_wchar.h"
42 #include "nodes/execnodes.h"
43 #include "utils/builtins.h"
44 #include "utils/memutils.h"
45 #include "utils/xml.h"
50 #define PG_XML_DEFAULT_URI "dummy.xml"
52 static StringInfo xml_err_buf = NULL;
54 static void xml_init(void);
56 static void *xml_palloc(size_t size);
57 static void *xml_repalloc(void *ptr, size_t size);
58 static void xml_pfree(void *ptr);
59 static char *xml_pstrdup(const char *string);
61 static void xml_ereport(int level, int sqlcode,
62 const char *msg, void *ctxt);
63 static void xml_errorHandler(void *ctxt, const char *msg, ...);
64 static void xml_ereport_by_code(int level, int sqlcode,
65 const char *msg, int errcode);
66 static xmlChar *xml_text2xmlChar(text *in);
67 static xmlDocPtr xml_parse(text *data, bool is_document, bool preserve_whitespace);
69 #endif /* USE_LIBXML */
71 #define NO_XML_SUPPORT() \
73 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
74 errmsg("no XML support in this installation")))
78 xml_in(PG_FUNCTION_ARGS)
81 char *s = PG_GETARG_CSTRING(0);
87 vardata = palloc(len + VARHDRSZ);
88 VARATT_SIZEP(vardata) = len + VARHDRSZ;
89 memcpy(VARDATA(vardata), s, len);
92 * Parse the data to check if it is well-formed XML data. Assume
93 * that ERROR occurred if parsing failed.
95 doc = xml_parse(vardata, false, true);
98 PG_RETURN_XML_P(vardata);
107 xml_out(PG_FUNCTION_ARGS)
109 xmltype *s = PG_GETARG_XML_P(0);
113 len = VARSIZE(s) - VARHDRSZ;
114 result = palloc(len + 1);
115 memcpy(result, VARDATA(s), len);
118 PG_RETURN_CSTRING(result);
123 xml_recv(PG_FUNCTION_ARGS)
126 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
132 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
134 result = (xmltype *) palloc(nbytes + VARHDRSZ);
135 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
136 memcpy(VARDATA(result), str, nbytes);
140 * Parse the data to check if it is well-formed XML data. Assume
141 * that ERROR occurred if parsing failed.
143 doc = xml_parse(result, false, true);
146 PG_RETURN_XML_P(result);
155 xml_send(PG_FUNCTION_ARGS)
157 xmltype *x = PG_GETARG_XML_P(0);
160 pq_begintypsend(&buf);
161 pq_sendbytes(&buf, VARDATA(x), VARSIZE(x) - VARHDRSZ);
162 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
168 appendStringInfoText(StringInfo str, const text *t)
170 appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
175 stringinfo_to_xmltype(StringInfo buf)
180 len = buf->len + VARHDRSZ;
181 result = palloc(len);
182 VARATT_SIZEP(result) = len;
183 memcpy(VARDATA(result), buf->data, buf->len);
190 xmlBuffer_to_xmltype(xmlBufferPtr buf)
195 len = xmlBufferLength(buf) + VARHDRSZ;
196 result = palloc(len);
197 VARATT_SIZEP(result) = len;
198 memcpy(VARDATA(result), xmlBufferContent(buf), len - VARHDRSZ);
206 xmlcomment(PG_FUNCTION_ARGS)
209 text *arg = PG_GETARG_TEXT_P(0);
210 int len = VARATT_SIZEP(arg) - VARHDRSZ;
214 /* check for "--" in string or "-" at the end */
215 for (i = 1; i < len; i++)
216 if ((VARDATA(arg)[i] == '-' && VARDATA(arg)[i - 1] == '-')
217 || (VARDATA(arg)[i] == '-' && i == len - 1))
219 (errcode(ERRCODE_INVALID_XML_COMMENT),
220 errmsg("invalid XML comment")));
222 initStringInfo(&buf);
223 appendStringInfo(&buf, "<!--");
224 appendStringInfoText(&buf, arg);
225 appendStringInfo(&buf, "-->");
227 PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
236 texttoxml(PG_FUNCTION_ARGS)
238 text *data = PG_GETARG_TEXT_P(0);
240 PG_RETURN_XML_P(xmlparse(data, false, true));
245 xmlelement(XmlExprState *xmlExpr, ExprContext *econtext)
248 XmlExpr *xexpr = (XmlExpr *) xmlExpr->xprstate.expr;
258 xmlTextWriterPtr writer;
260 buf = xmlBufferCreate();
261 writer = xmlNewTextWriterMemory(buf, 0);
263 xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
266 forboth(arg, xmlExpr->named_args, narg, xexpr->arg_names)
268 ExprState *e = (ExprState *) lfirst(arg);
269 char *argname = strVal(lfirst(narg));
271 value = ExecEvalExpr(e, econtext, &isnull, NULL);
274 str = OutputFunctionCall(&xmlExpr->named_outfuncs[i], value);
275 xmlTextWriterWriteAttribute(writer, (xmlChar *) argname, (xmlChar *) str);
281 foreach(arg, xmlExpr->args)
283 ExprState *e = (ExprState *) lfirst(arg);
285 value = ExecEvalExpr(e, econtext, &isnull, NULL);
288 /* we know the value is XML type */
289 str = DatumGetCString(DirectFunctionCall1(xml_out,
291 xmlTextWriterWriteRaw(writer, (xmlChar *) str);
296 xmlTextWriterEndElement(writer);
297 xmlFreeTextWriter(writer);
299 result = xmlBuffer_to_xmltype(buf);
310 xmlparse(text *data, bool is_document, bool preserve_whitespace)
315 doc = xml_parse(data, is_document, preserve_whitespace);
318 return (xmltype *) data;
327 xmlpi(char *target, text *arg, bool arg_is_null, bool *result_is_null)
333 if (pg_strncasecmp(target, "xml", 3) == 0)
335 (errcode(ERRCODE_SYNTAX_ERROR), /* really */
336 errmsg("invalid XML processing instruction"),
337 errdetail("XML processing instruction target name cannot start with \"xml\".")));
340 * Following the SQL standard, the null check comes after the
341 * syntax check above.
343 *result_is_null = arg_is_null;
347 initStringInfo(&buf);
349 appendStringInfo(&buf, "<?%s", target);
355 string = DatumGetCString(DirectFunctionCall1(textout,
356 PointerGetDatum(arg)));
357 if (strstr(string, "?>") != NULL)
359 (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
360 errmsg("invalid XML processing instruction"),
361 errdetail("XML processing instruction cannot contain \"?>\".")));
363 appendStringInfoChar(&buf, ' ');
364 appendStringInfoString(&buf, string + strspn(string, " "));
367 appendStringInfoString(&buf, "?>");
369 result = stringinfo_to_xmltype(&buf);
380 xmlroot(xmltype *data, text *version, int standalone)
388 doc = xml_parse((text *) data, true, true);
391 doc->version = xmlStrdup(xml_text2xmlChar(version));
404 doc->standalone = -1;
408 buffer = xmlBufferCreate();
409 save = xmlSaveToBuffer(buffer, NULL, 0);
410 xmlSaveDoc(save, doc);
415 result = xmlBuffer_to_xmltype(buffer);
416 xmlBufferFree(buffer);
426 * Validate document (given as string) against DTD (given as external link)
427 * TODO !!! use text instead of cstring for second arg
428 * TODO allow passing DTD as a string value (not only as an URI)
429 * TODO redesign (see comment with '!!!' below)
432 xmlvalidate(PG_FUNCTION_ARGS)
435 text *data = PG_GETARG_TEXT_P(0);
436 text *dtdOrUri = PG_GETARG_TEXT_P(1);
438 xmlParserCtxtPtr ctxt = NULL;
439 xmlDocPtr doc = NULL;
440 xmlDtdPtr dtd = NULL;
444 /* We use a PG_TRY block to ensure libxml is cleaned up on error */
447 ctxt = xmlNewParserCtxt();
449 xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
450 "could not allocate parser context", ctxt);
452 doc = xmlCtxtReadMemory(ctxt, (char *) VARDATA(data),
453 VARSIZE(data) - VARHDRSZ,
454 PG_XML_DEFAULT_URI, NULL, 0);
456 xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
457 "could not parse XML data", ctxt);
460 uri = xmlCreateURI();
461 elog(NOTICE, "dtd - %s", dtdOrUri);
462 dtd = palloc(sizeof(xmlDtdPtr));
463 uri = xmlParseURI(dtdOrUri);
465 xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
466 "not implemented yet... (TODO)", ctxt);
469 dtd = xmlParseDTD(NULL, xml_text2xmlChar(dtdOrUri));
472 xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
473 "could not load DTD", ctxt);
475 if (xmlValidateDtd(xmlNewValidCtxt(), doc, dtd) == 1)
479 xml_ereport(NOTICE, ERRCODE_INVALID_XML_DOCUMENT,
480 "validation against DTD failed", ctxt);
491 xmlFreeParserCtxt(ctxt);
505 xmlFreeParserCtxt(ctxt);
512 PG_RETURN_BOOL(result);
513 #else /* not USE_LIBXML */
516 #endif /* not USE_LIBXML */
523 * Container for some init stuff (not good design!)
524 * TODO xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and check)
530 * Currently, we have no pure UTF-8 support for internals -- check
533 if (sizeof (char) != sizeof (xmlChar))
535 (errmsg("could not initialize XML library"),
536 errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
537 (int) sizeof(char), (int) sizeof(xmlChar))));
539 if (xml_err_buf == NULL)
541 /* First time through: create error buffer in permanent context */
542 MemoryContext oldcontext;
544 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
545 xml_err_buf = makeStringInfo();
546 MemoryContextSwitchTo(oldcontext);
550 /* Reset pre-existing buffer to empty */
551 xml_err_buf->data[0] = '\0';
552 xml_err_buf->len = 0;
554 /* Now that xml_err_buf exists, safe to call xml_errorHandler */
555 xmlSetGenericErrorFunc(NULL, xml_errorHandler);
559 * FIXME: This doesn't work because libxml assumes that whatever
560 * libxml allocates, only libxml will free, so we can't just drop
561 * memory contexts behind it. This needs to be refined.
563 xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
571 * SQL/XML allows storing "XML documents" or "XML content". "XML
572 * documents" are specified by the XML specification and are parsed
573 * easily by libxml. "XML content" is specified by SQL/XML as the
574 * production "XMLDecl? content". But libxml can only parse the
575 * "content" part, so we have to parse the XML declaration ourselves
579 #define CHECK_XML_SPACE(p) if (!xmlIsBlank_ch(*(p))) return XML_ERR_SPACE_REQUIRED
580 #define SKIP_XML_SPACE(p) while (xmlIsBlank_ch(*(p))) (p)++
583 parse_xml_decl(const xmlChar *str, size_t *len, xmlChar **encoding, int *standalone)
586 const xmlChar *save_p;
590 if (xmlStrncmp(p, (xmlChar *)"<?xml", 5) != 0)
598 if (xmlStrncmp(p, (xmlChar *)"version", 7) != 0)
599 return XML_ERR_VERSION_MISSING;
603 return XML_ERR_VERSION_MISSING;
606 if (xmlStrncmp(p, (xmlChar *)"'1.0'", 5) != 0 && xmlStrncmp(p, (xmlChar *)"\"1.0\"", 5) != 0)
607 return XML_ERR_VERSION_MISSING;
613 if (xmlStrncmp(p, (xmlChar *)"encoding", 8) == 0)
615 CHECK_XML_SPACE(save_p);
619 return XML_ERR_MISSING_ENCODING;
623 if (*p == '\'' || *p == '"')
627 q = xmlStrchr(p + 1, *p);
629 return XML_ERR_MISSING_ENCODING;
631 *encoding = xmlStrndup(p + 1, q - p - 1);
635 return XML_ERR_MISSING_ENCODING;
646 if (xmlStrncmp(p, (xmlChar *)"standalone", 10) == 0)
648 CHECK_XML_SPACE(save_p);
652 return XML_ERR_STANDALONE_VALUE;
655 if (xmlStrncmp(p, (xmlChar *)"'yes'", 5) == 0 || xmlStrncmp(p, (xmlChar *)"\"yes\"", 5) == 0)
660 else if (xmlStrncmp(p, (xmlChar *)"'no'", 4) == 0 || xmlStrncmp(p, (xmlChar *)"\"no\"", 4) == 0)
666 return XML_ERR_STANDALONE_VALUE;
675 if (xmlStrncmp(p, (xmlChar *)"?>", 2) != 0)
676 return XML_ERR_XMLDECL_NOT_FINISHED;
687 * Convert a C string to XML internal representation
689 * TODO maybe, libxml2's xmlreader is better? (do not construct DOM, yet do not use SAX - see xml_reader.c)
690 * TODO what about internal URI for docs? (see PG_XML_DEFAULT_URI below)
693 xml_parse(text *data, bool is_document, bool preserve_whitespace)
698 xmlParserCtxtPtr ctxt = NULL;
699 xmlDocPtr doc = NULL;
701 len = VARSIZE(data) - VARHDRSZ; /* will be useful later */
702 string = xml_text2xmlChar(data);
706 /* We use a PG_TRY block to ensure libxml is cleaned up on error */
709 ctxt = xmlNewParserCtxt();
711 xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
712 "could not allocate parser context", ctxt);
717 * Note, that here we try to apply DTD defaults
718 * (XML_PARSE_DTDATTR) according to SQL/XML:10.16.7.d:
719 * 'Default valies defined by internal DTD are applied'.
720 * As for external DTDs, we try to support them too, (see
723 doc = xmlCtxtReadMemory(ctxt, (char *) string, len,
724 PG_XML_DEFAULT_URI, NULL,
725 XML_PARSE_NOENT | XML_PARSE_DTDATTR
726 | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
728 xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
729 "invalid XML document", ctxt);
734 xmlChar *encoding = NULL;
737 doc = xmlNewDoc(NULL);
739 res_code = parse_xml_decl(string, &count, &encoding, &standalone);
741 /* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */
743 res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string + count, NULL);
745 xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
746 "invalid XML content", res_code);
748 doc->encoding = encoding;
749 doc->standalone = standalone;
752 /* TODO encoding issues
755 * - XML data has explicit encoding attribute in its prolog
756 * - if not, assume that enc. of XML data is the same as client's one
758 * The common rule is to accept the XML data only if its encoding
759 * is the same as encoding of the storage (server's). The other possible
760 * option is to accept all the docs, but DO TRANSFORMATION and, if needed,
763 * I think I'd stick the first way (for the 1st version),
764 * it's much simplier (less errors...)
769 xmlFreeParserCtxt(ctxt);
778 xmlFreeParserCtxt(ctxt);
790 * xmlChar<->text convertions
793 xml_text2xmlChar(text *in)
795 int32 len = VARSIZE(in) - VARHDRSZ;
798 res = palloc(len + 1);
799 memcpy(res, VARDATA(in), len);
808 * Wrappers for memory management functions
811 xml_palloc(size_t size)
818 xml_repalloc(void *ptr, size_t size)
820 return repalloc(ptr, size);
832 xml_pstrdup(const char *string)
834 return pstrdup(string);
836 #endif /* NOT_USED */
840 * Wrapper for "ereport" function.
841 * Adds detail - libxml's native error message, if any.
844 xml_ereport(int level, int sqlcode,
845 const char *msg, void *ctxt)
847 xmlErrorPtr libxmlErr = NULL;
849 if (xml_err_buf->len > 0)
852 (errmsg("%s", xml_err_buf->data)));
853 xml_err_buf->data[0] = '\0';
854 xml_err_buf->len = 0;
858 libxmlErr = xmlCtxtGetLastError(ctxt);
860 if (libxmlErr == NULL)
868 /* as usual, libxml error message contains '\n'; get rid of it */
872 xmlErrDetail = pstrdup(libxmlErr->message);
873 xmlErrLen = strlen(xmlErrDetail);
874 for (i = 0; i < xmlErrLen; i++)
876 if (xmlErrDetail[i] == '\n')
877 xmlErrDetail[i] = '.';
882 errdetail("%s", xmlErrDetail)));
888 * Error handler for libxml error messages
891 xml_errorHandler(void *ctxt, const char *msg,...)
893 /* Append the formatted text to xml_err_buf */
899 /* Try to format the data. */
901 success = appendStringInfoVA(xml_err_buf, msg, args);
907 /* Double the buffer size and try again. */
908 enlargeStringInfo(xml_err_buf, xml_err_buf->maxlen);
914 * Return error message by libxml error code
915 * TODO make them closer to recommendations from Postgres manual
918 xml_ereport_by_code(int level, int sqlcode,
919 const char *msg, int code)
923 if (xml_err_buf->len > 0)
926 (errmsg("%s", xml_err_buf->data)));
927 xml_err_buf->data[0] = '\0';
928 xml_err_buf->len = 0;
933 case XML_ERR_INTERNAL_ERROR:
934 det = "libxml internal error";
936 case XML_ERR_ENTITY_LOOP:
937 det = "Detected an entity reference loop";
939 case XML_ERR_ENTITY_NOT_STARTED:
940 det = "EntityValue: \" or ' expected";
942 case XML_ERR_ENTITY_NOT_FINISHED:
943 det = "EntityValue: \" or ' expected";
945 case XML_ERR_ATTRIBUTE_NOT_STARTED:
946 det = "AttValue: \" or ' expected";
948 case XML_ERR_LT_IN_ATTRIBUTE:
949 det = "Unescaped '<' not allowed in attributes values";
951 case XML_ERR_LITERAL_NOT_STARTED:
952 det = "SystemLiteral \" or ' expected";
954 case XML_ERR_LITERAL_NOT_FINISHED:
955 det = "Unfinished System or Public ID \" or ' expected";
957 case XML_ERR_MISPLACED_CDATA_END:
958 det = "Sequence ']]>' not allowed in content";
960 case XML_ERR_URI_REQUIRED:
961 det = "SYSTEM or PUBLIC, the URI is missing";
963 case XML_ERR_PUBID_REQUIRED:
964 det = "PUBLIC, the Public Identifier is missing";
966 case XML_ERR_HYPHEN_IN_COMMENT:
967 det = "Comment must not contain '--' (double-hyphen)";
969 case XML_ERR_PI_NOT_STARTED:
970 det = "xmlParsePI : no target name";
972 case XML_ERR_RESERVED_XML_NAME:
973 det = "Invalid PI name";
975 case XML_ERR_NOTATION_NOT_STARTED:
976 det = "NOTATION: Name expected here";
978 case XML_ERR_NOTATION_NOT_FINISHED:
979 det = "'>' required to close NOTATION declaration";
981 case XML_ERR_VALUE_REQUIRED:
982 det = "Entity value required";
984 case XML_ERR_URI_FRAGMENT:
985 det = "Fragment not allowed";
987 case XML_ERR_ATTLIST_NOT_STARTED:
988 det = "'(' required to start ATTLIST enumeration";
990 case XML_ERR_NMTOKEN_REQUIRED:
991 det = "NmToken expected in ATTLIST enumeration";
993 case XML_ERR_ATTLIST_NOT_FINISHED:
994 det = "')' required to finish ATTLIST enumeration";
996 case XML_ERR_MIXED_NOT_STARTED:
997 det = "MixedContentDecl : '|' or ')*' expected";
999 case XML_ERR_PCDATA_REQUIRED:
1000 det = "MixedContentDecl : '#PCDATA' expected";
1002 case XML_ERR_ELEMCONTENT_NOT_STARTED:
1003 det = "ContentDecl : Name or '(' expected";
1005 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
1006 det = "ContentDecl : ',' '|' or ')' expected";
1008 case XML_ERR_PEREF_IN_INT_SUBSET:
1009 det = "PEReference: forbidden within markup decl in internal subset";
1011 case XML_ERR_GT_REQUIRED:
1012 det = "Expected '>'";
1014 case XML_ERR_CONDSEC_INVALID:
1015 det = "XML conditional section '[' expected";
1017 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
1018 det = "Content error in the external subset";
1020 case XML_ERR_CONDSEC_INVALID_KEYWORD:
1021 det = "conditional section INCLUDE or IGNORE keyword expected";
1023 case XML_ERR_CONDSEC_NOT_FINISHED:
1024 det = "XML conditional section not closed";
1026 case XML_ERR_XMLDECL_NOT_STARTED:
1027 det = "Text declaration '<?xml' required";
1029 case XML_ERR_XMLDECL_NOT_FINISHED:
1030 det = "parsing XML declaration: '?>' expected";
1032 case XML_ERR_EXT_ENTITY_STANDALONE:
1033 det = "external parsed entities cannot be standalone";
1035 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
1036 det = "EntityRef: expecting ';'";
1038 case XML_ERR_DOCTYPE_NOT_FINISHED:
1039 det = "DOCTYPE improperly terminated";
1041 case XML_ERR_LTSLASH_REQUIRED:
1042 det = "EndTag: '</' not found";
1044 case XML_ERR_EQUAL_REQUIRED:
1045 det = "Expected '='";
1047 case XML_ERR_STRING_NOT_CLOSED:
1048 det = "String not closed expecting \" or '";
1050 case XML_ERR_STRING_NOT_STARTED:
1051 det = "String not started expecting ' or \"";
1053 case XML_ERR_ENCODING_NAME:
1054 det = "Invalid XML encoding name";
1056 case XML_ERR_STANDALONE_VALUE:
1057 det = "Standalone accepts only 'yes' or 'no'";
1059 case XML_ERR_DOCUMENT_EMPTY:
1060 det = "Document is empty";
1062 case XML_ERR_DOCUMENT_END:
1063 det = "Extra content at the end of the document";
1065 case XML_ERR_NOT_WELL_BALANCED:
1066 det = "Chunk is not well balanced";
1068 case XML_ERR_EXTRA_CONTENT:
1069 det = "Extra content at the end of well balanced chunk";
1071 case XML_ERR_VERSION_MISSING:
1072 det = "Malformed declaration expecting version";
1074 /* more err codes... Please, keep the order! */
1075 case XML_ERR_ATTRIBUTE_WITHOUT_VALUE: /* 41 */
1076 det ="Attribute without value";
1078 case XML_ERR_ATTRIBUTE_REDEFINED:
1079 det ="Attribute defined more than once in the same element";
1081 case XML_ERR_COMMENT_NOT_FINISHED: /* 45 */
1082 det = "Comment is not finished";
1084 case XML_ERR_NAME_REQUIRED: /* 68 */
1085 det = "Element name not found";
1087 case XML_ERR_TAG_NOT_FINISHED: /* 77 */
1088 det = "Closing tag not found";
1091 det = "Unrecognized libxml error code: %d";
1098 errdetail(det, code)));
1103 * Convert one char in the current server encoding to a Unicode codepoint.
1106 sqlchar_to_unicode(char *s)
1109 pg_wchar ret[2]; /* need space for trailing zero */
1111 utf8string = (char *) pg_do_encoding_conversion((unsigned char *) s,
1113 GetDatabaseEncoding(),
1116 pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret, pg_mblen(s));
1123 is_valid_xml_namefirst(pg_wchar c)
1125 /* (Letter | '_' | ':') */
1126 return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1127 || c == '_' || c == ':');
1132 is_valid_xml_namechar(pg_wchar c)
1134 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1135 return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1137 || c == '.' || c == '-' || c == '_' || c == ':'
1138 || xmlIsCombiningQ(c)
1139 || xmlIsExtenderQ(c));
1141 #endif /* USE_LIBXML */
1145 * Map SQL identifier to XML name; see SQL/XML:2003 section 9.1.
1148 map_sql_identifier_to_xml_name(char *ident, bool fully_escaped)
1154 initStringInfo(&buf);
1156 for (p = ident; *p; p += pg_mblen(p))
1158 if (*p == ':' && (p == ident || fully_escaped))
1159 appendStringInfo(&buf, "_x003A_");
1160 else if (*p == '_' && *(p+1) == 'x')
1161 appendStringInfo(&buf, "_x005F_");
1162 else if (fully_escaped && p == ident &&
1163 pg_strncasecmp(p, "xml", 3) == 0)
1166 appendStringInfo(&buf, "_x0078_");
1168 appendStringInfo(&buf, "_x0058_");
1172 pg_wchar u = sqlchar_to_unicode(p);
1175 ? !is_valid_xml_namefirst(u)
1176 : !is_valid_xml_namechar(u))
1177 appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
1179 appendBinaryStringInfo(&buf, p, pg_mblen(p));
1184 #else /* not USE_LIBXML */
1187 #endif /* not USE_LIBXML */
1192 * Map a Unicode codepoint into the current server encoding.
1195 unicode_to_sqlchar(pg_wchar c)
1197 static unsigned char utf8string[4];
1203 else if (c <= 0x7FF)
1205 utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
1206 utf8string[1] = 0x80 | (c & 0x3F);
1208 else if (c <= 0xFFFF)
1210 utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
1211 utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
1212 utf8string[2] = 0x80 | (c & 0x3F);
1216 utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
1217 utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
1218 utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
1219 utf8string[3] = 0x80 | (c & 0x3F);
1222 return (char *) pg_do_encoding_conversion(utf8string,
1223 pg_mblen((char *) utf8string),
1225 GetDatabaseEncoding());
1230 * Map XML name to SQL identifier; see SQL/XML:2003 section 9.17.
1233 map_xml_name_to_sql_identifier(char *name)
1238 initStringInfo(&buf);
1240 for (p = name; *p; p += pg_mblen(p))
1242 if (*p == '_' && *(p+1) == 'x'
1243 && isxdigit((unsigned char) *(p+2))
1244 && isxdigit((unsigned char) *(p+3))
1245 && isxdigit((unsigned char) *(p+4))
1246 && isxdigit((unsigned char) *(p+5))
1251 sscanf(p + 2, "%X", &u);
1252 appendStringInfoString(&buf, unicode_to_sqlchar(u));
1256 appendBinaryStringInfo(&buf, p, pg_mblen(p));