-<!-- $PostgreSQL: pgsql/doc/src/sgml/array.sgml,v 1.36 2004/08/05 03:29:11 joe Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/array.sgml,v 1.37 2004/08/08 05:01:51 joe Exp $ -->
<sect1 id="arrays">
<title>Arrays</title>
</synopsis>
where <replaceable>delim</replaceable> is the delimiter character
for the type, as recorded in its <literal>pg_type</literal> entry.
- (For all built-in types, this is the comma character
- <quote><literal>,</literal></>.) Each
- <replaceable>val</replaceable> is either a constant of the array
- element type, or a subarray. An example of an array constant is
+ Among the standard data types provided in the
+ <productname>PostgreSQL</productname> distribution, type
+ <literal>box</> uses a semicolon (<literal>;</>) but all the others
+ use comma (<literal>,</>). Each <replaceable>val</replaceable> is
+ either a constant of the array element type, or a subarray. An example
+ of an array constant is
<programlisting>
'{{1,2,3},{4,5,6},{7,8,9}}'
</programlisting>
</para>
<para>
- The <literal>ARRAY</literal> expression syntax may also be used:
+ The <literal>ARRAY</> constructor syntax may also be used:
<programlisting>
INSERT INTO sal_emp
VALUES ('Bill',
Notice that the array elements are ordinary SQL constants or
expressions; for instance, string literals are single quoted, instead of
double quoted as they would be in an array literal. The <literal>ARRAY</>
- expression syntax is discussed in more detail in <xref
- linkend="sql-syntax-array-constructors">.
+ constructor syntax is discussed in more detail in
+ <xref linkend="sql-syntax-array-constructors">.
</para>
</sect2>
use comma.) In a multidimensional array, each dimension (row, plane,
cube, etc.) gets its own level of curly braces, and delimiters
must be written between adjacent curly-braced entities of the same level.
- You may write whitespace before a left brace, after a right
- brace, or before any individual item string. Whitespace after an item
- is not ignored, however: after skipping leading whitespace, everything
- up to the next right brace or delimiter is taken as the item value.
+ </para>
+
+ <para>
+ The array output routine will put double quotes around element values
+ if they are empty strings or contain curly braces, delimiter characters,
+ double quotes, backslashes, or white space. Double quotes and backslashes
+ embedded in element values will be backslash-escaped. For numeric
+ data types it is safe to assume that double quotes will never appear, but
+ for textual data types one should be prepared to cope with either presence
+ or absence of quotes. (This is a change in behavior from pre-7.2
+ <productname>PostgreSQL</productname> releases.)
</para>
<para>
<para>
As shown previously, when writing an array value you may write double
- quotes around any individual array
- element. You <emphasis>must</> do so if the element value would otherwise
- confuse the array-value parser. For example, elements containing curly
- braces, commas (or whatever the delimiter character is), double quotes,
- backslashes, or leading white space must be double-quoted. To put a double
- quote or backslash in a quoted array element value, precede it with a
- backslash.
- Alternatively, you can use backslash-escaping to protect all data characters
- that would otherwise be taken as array syntax or ignorable white space.
+ quotes around any individual array element. You <emphasis>must</> do so
+ if the element value would otherwise confuse the array-value parser.
+ For example, elements containing curly braces, commas (or whatever the
+ delimiter character is), double quotes, backslashes, or leading white
+ space must be double-quoted. To put a double quote or backslash in a
+ quoted array element value, precede it with a backslash. Alternatively,
+ you can use backslash-escaping to protect all data characters that would
+ otherwise be taken as array syntax.
</para>
<para>
- The array output routine will put double quotes around element values
- if they are empty strings or contain curly braces, delimiter characters,
- double quotes, backslashes, or white space. Double quotes and backslashes
- embedded in element values will be backslash-escaped. For numeric
- data types it is safe to assume that double quotes will never appear, but
- for textual data types one should be prepared to cope with either presence
- or absence of quotes. (This is a change in behavior from pre-7.2
- <productname>PostgreSQL</productname> releases.)
+ You may write whitespace before a left brace or after a right
+ brace. You may also write whitespace before or after any individual item
+ string. In all of these cases the whitespace will be ignored. However,
+ whitespace within double quoted elements, or surrounded on both sides by
+ non-whitespace characters of an element, are not ignored.
</para>
<note>
<tip>
<para>
- The <literal>ARRAY</> constructor syntax is often easier to work with
- than the array-literal syntax when writing array values in SQL commands.
- In <literal>ARRAY</>, individual element values are written the same way
- they would be written when not members of an array.
+ The <literal>ARRAY</> constructor syntax (see
+ <xref linkend="sql-syntax-array-constructors">) is often easier to work
+ with than the array-literal syntax when writing array values in SQL
+ commands. In <literal>ARRAY</>, individual element values are written the
+ same way they would be written when not members of an array.
</para>
</tip>
</sect2>
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.106 2004/08/05 03:29:37 joe Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.107 2004/08/08 05:01:55 joe Exp $
*
*-------------------------------------------------------------------------
*/
* The syntax for array input is C-like nested curly braces
*-----------------------------------------------------------------------------
*/
+typedef enum
+{
+ ARRAY_NO_LEVEL,
+ ARRAY_LEVEL_STARTED,
+ ARRAY_ELEM_STARTED,
+ ARRAY_ELEM_COMPLETED,
+ ARRAY_QUOTED_ELEM_STARTED,
+ ARRAY_QUOTED_ELEM_COMPLETED,
+ ARRAY_ELEM_DELIMITED,
+ ARRAY_LEVEL_COMPLETED,
+ ARRAY_LEVEL_DELIMITED
+} ArrayParseState;
+
static int
ArrayCount(char *str, int *dim, char typdelim)
{
- int nest_level = 0,
- i;
- int ndim = 1,
- temp[MAXDIM],
- nelems[MAXDIM],
- nelems_last[MAXDIM];
- bool scanning_string = false;
- bool eoArray = false;
- char *ptr;
+ int nest_level = 0,
+ i;
+ int ndim = 1,
+ temp[MAXDIM],
+ nelems[MAXDIM],
+ nelems_last[MAXDIM];
+ bool scanning_string = false;
+ bool eoArray = false;
+ char *ptr;
+ ArrayParseState parse_state = ARRAY_NO_LEVEL;
for (i = 0; i < MAXDIM; ++i)
{
nelems_last[i] = nelems[i] = 1;
}
+ /* special case for an empty array */
if (strncmp(str, "{}", 2) == 0)
return 0;
errmsg("malformed array literal: \"%s\"", str)));
break;
case '\\':
+ /*
+ * An escape must be after a level start, after an
+ * element start, or after an element delimiter. In any
+ * case we now must be past an element start.
+ */
+ if (parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_QUOTED_ELEM_STARTED &&
+ parse_state != ARRAY_ELEM_DELIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ if (parse_state != ARRAY_QUOTED_ELEM_STARTED)
+ parse_state = ARRAY_ELEM_STARTED;
/* skip the escaped character */
if (*(ptr + 1))
ptr++;
errmsg("malformed array literal: \"%s\"", str)));
break;
case '\"':
+ /*
+ * A quote must be after a level start, after a quoted
+ * element start, or after an element delimiter. In any
+ * case we now must be past an element start.
+ */
+ if (parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_QUOTED_ELEM_STARTED &&
+ parse_state != ARRAY_ELEM_DELIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
scanning_string = !scanning_string;
+ if (scanning_string)
+ parse_state = ARRAY_QUOTED_ELEM_STARTED;
+ else
+ parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
break;
case '{':
if (!scanning_string)
{
+ /*
+ * A left brace can occur if no nesting has
+ * occurred yet, after a level start, or
+ * after a level delimiter.
+ */
+ if (parse_state != ARRAY_NO_LEVEL &&
+ parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_LEVEL_DELIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ parse_state = ARRAY_LEVEL_STARTED;
if (nest_level >= MAXDIM)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
case '}':
if (!scanning_string)
{
+ /*
+ * A right brace can occur after an element start,
+ * an element completion, a quoted element completion,
+ * or a level completion.
+ */
+ if (parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_ELEM_COMPLETED &&
+ parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
+ parse_state != ARRAY_LEVEL_COMPLETED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ parse_state = ARRAY_LEVEL_COMPLETED;
if (nest_level == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
}
break;
default:
- if (*ptr == typdelim && !scanning_string)
+ if (!scanning_string)
{
- itemdone = true;
- nelems[nest_level - 1]++;
+ if (*ptr == typdelim)
+ {
+ /*
+ * Delimiters can occur after an element start,
+ * an element completion, a quoted element
+ * completion, or a level completion.
+ */
+ if (parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_ELEM_COMPLETED &&
+ parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
+ parse_state != ARRAY_LEVEL_COMPLETED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ if (parse_state == ARRAY_LEVEL_COMPLETED)
+ parse_state = ARRAY_LEVEL_DELIMITED;
+ else
+ parse_state = ARRAY_ELEM_DELIMITED;
+ itemdone = true;
+ nelems[nest_level - 1]++;
+ }
+ else if (!isspace(*ptr))
+ {
+ /*
+ * Other non-space characters must be after a level
+ * start, after an element start, or after an element
+ * delimiter. In any case we now must be past an
+ * element start.
+ */
+ if (parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_ELEM_DELIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ parse_state = ARRAY_ELEM_STARTED;
+ }
}
break;
}
while (!eoArray)
{
bool itemdone = false;
+ bool itemquoted = false;
int i = -1;
char *itemstart;
+ char *eptr;
/* skip leading whitespace */
while (isspace((unsigned char) *ptr))
ptr++;
+
itemstart = ptr;
while (!itemdone)
char *cptr;
scanning_string = !scanning_string;
- /* Crunch the string on top of the quote. */
- for (cptr = ptr; *cptr != '\0'; cptr++)
- *cptr = *(cptr + 1);
- /* Back up to not miss following character. */
- ptr--;
+ if (scanning_string)
+ {
+ itemquoted = true;
+ /* Crunch the string on top of the first quote. */
+ for (cptr = ptr; *cptr != '\0'; cptr++)
+ *cptr = *(cptr + 1);
+ /* Back up to not miss following character. */
+ ptr--;
+ }
break;
}
case '{':
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", arrayStr)));
+ /*
+ * skip trailing whitespace
+ */
+ eptr = ptr - 1;
+ if (!itemquoted)
+ {
+ /* skip to last non-NULL, non-space, character */
+ while ((*eptr == '\0') || (isspace((unsigned char) *eptr)))
+ eptr--;
+ *(++eptr) = '\0';
+ }
+ else
+ {
+ /* skip to last quote character */
+ while (*eptr != '"')
+ eptr--;
+ *eptr = '\0';
+ }
+
values[i] = FunctionCall3(inputproc,
CStringGetDatum(itemstart),
ObjectIdGetDatum(typioparam),