OSDN Git Service

Tighened up syntax checking of array input processing considerably. Junk that
authorJoe Conway <mail@joeconway.com>
Sun, 8 Aug 2004 05:01:55 +0000 (05:01 +0000)
committerJoe Conway <mail@joeconway.com>
Sun, 8 Aug 2004 05:01:55 +0000 (05:01 +0000)
was previously allowed in odd places with odd results now causes an ERROR.
Also changed behavior with respect to whitespace -- trailing whitespace is
now ignored as well as leading whitespace (which has always been ignored).

Documentation updated to reflect change in whitespace handling. Also some
refactoring to what I believe is a more sensible order of several paragraphs.

doc/src/sgml/array.sgml
src/backend/utils/adt/arrayfuncs.c

index ae2d74e..e5ca426 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/array.sgml,v 1.36 2004/08/05 03:29:11 joe Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/array.sgml,v 1.37 2004/08/08 05:01:51 joe Exp $ -->
 
 <sect1 id="arrays">
  <title>Arrays</title>
@@ -95,10 +95,12 @@ CREATE TABLE tictactoe (
 </synopsis>
    where <replaceable>delim</replaceable> is the delimiter character
    for the type, as recorded in its <literal>pg_type</literal> entry.
-   (For all built-in types, this is the comma character
-   <quote><literal>,</literal></>.)  Each
-   <replaceable>val</replaceable> is either a constant of the array
-   element type, or a subarray.  An example of an array constant is
+   Among the standard data types provided in the
+   <productname>PostgreSQL</productname> distribution, type
+   <literal>box</> uses a semicolon (<literal>;</>) but all the others
+   use comma (<literal>,</>). Each <replaceable>val</replaceable> is
+   either a constant of the array element type, or a subarray. An example
+   of an array constant is
 <programlisting>
 '{{1,2,3},{4,5,6},{7,8,9}}'
 </programlisting>
@@ -161,7 +163,7 @@ SELECT * FROM sal_emp;
  </para>
 
  <para>
-  The <literal>ARRAY</literal> expression syntax may also be used:
+  The <literal>ARRAY</> constructor syntax may also be used:
 <programlisting>
 INSERT INTO sal_emp
     VALUES ('Bill',
@@ -176,8 +178,8 @@ INSERT INTO sal_emp
   Notice that the array elements are ordinary SQL constants or
   expressions; for instance, string literals are single quoted, instead of
   double quoted as they would be in an array literal.  The <literal>ARRAY</>
-  expression syntax is discussed in more detail in <xref
-  linkend="sql-syntax-array-constructors">.
+  constructor syntax is discussed in more detail in
+  <xref linkend="sql-syntax-array-constructors">.
  </para>
  </sect2>
 
@@ -524,10 +526,17 @@ SELECT * FROM sal_emp WHERE 10000 = ALL (pay_by_quarter);
    use comma.)  In a multidimensional array, each dimension (row, plane,
    cube, etc.) gets its own level of curly braces, and delimiters
    must be written between adjacent curly-braced entities of the same level.
-   You may write whitespace before a left brace, after a right
-   brace, or before any individual item string.  Whitespace after an item
-   is not ignored, however: after skipping leading whitespace, everything
-   up to the next right brace or delimiter is taken as the item value.
+  </para>
+
+  <para>
+   The array output routine will put double quotes around element values
+   if they are empty strings or contain curly braces, delimiter characters,
+   double quotes, backslashes, or white space.  Double quotes and backslashes
+   embedded in element values will be backslash-escaped.  For numeric
+   data types it is safe to assume that double quotes will never appear, but
+   for textual data types one should be prepared to cope with either presence
+   or absence of quotes.  (This is a change in behavior from pre-7.2
+   <productname>PostgreSQL</productname> releases.)
   </para>
 
   <para>
@@ -573,26 +582,22 @@ SELECT f1[1][-2][3] AS e1, f1[1][-1][5] AS e2
 
   <para>
    As shown previously, when writing an array value you may write double
-   quotes around any individual array
-   element.  You <emphasis>must</> do so if the element value would otherwise
-   confuse the array-value parser.  For example, elements containing curly
-   braces, commas (or whatever the delimiter character is), double quotes,
-   backslashes, or leading white space must be double-quoted.  To put a double
-   quote or backslash in a quoted array element value, precede it with a
-   backslash.
-   Alternatively, you can use backslash-escaping to protect all data characters
-   that would otherwise be taken as array syntax or ignorable white space.
+   quotes around any individual array element. You <emphasis>must</> do so
+   if the element value would otherwise confuse the array-value parser.
+   For example, elements containing curly braces, commas (or whatever the
+   delimiter character is), double quotes, backslashes, or leading white
+   space must be double-quoted.  To put a double quote or backslash in a
+   quoted array element value, precede it with a backslash. Alternatively,
+   you can use backslash-escaping to protect all data characters that would
+   otherwise be taken as array syntax.
   </para>
 
   <para>
-   The array output routine will put double quotes around element values
-   if they are empty strings or contain curly braces, delimiter characters,
-   double quotes, backslashes, or white space.  Double quotes and backslashes
-   embedded in element values will be backslash-escaped.  For numeric
-   data types it is safe to assume that double quotes will never appear, but
-   for textual data types one should be prepared to cope with either presence
-   or absence of quotes.  (This is a change in behavior from pre-7.2
-   <productname>PostgreSQL</productname> releases.)
+   You may write whitespace before a left brace or after a right
+   brace. You may also write whitespace before or after any individual item
+   string. In all of these cases the whitespace will be ignored. However,
+   whitespace within double quoted elements, or surrounded on both sides by
+   non-whitespace characters of an element, are not ignored.
   </para>
 
  <note>
@@ -616,10 +621,11 @@ INSERT ... VALUES ('{"\\\\","\\""}');
 
  <tip>
   <para>
-   The <literal>ARRAY</> constructor syntax is often easier to work with
-   than the array-literal syntax when writing array values in SQL commands.
-   In <literal>ARRAY</>, individual element values are written the same way
-   they would be written when not members of an array.
+   The <literal>ARRAY</> constructor syntax (see
+   <xref linkend="sql-syntax-array-constructors">) is often easier to work
+   with than the array-literal syntax when writing array values in SQL
+   commands. In <literal>ARRAY</>, individual element values are written the
+   same way they would be written when not members of an array.
   </para>
  </tip>
  </sect2>
index a8b1b63..8e46c2f 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.106 2004/08/05 03:29:37 joe Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.107 2004/08/08 05:01:55 joe Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -351,18 +351,32 @@ array_in(PG_FUNCTION_ARGS)
  *              The syntax for array input is C-like nested curly braces
  *-----------------------------------------------------------------------------
  */
+typedef enum
+{
+       ARRAY_NO_LEVEL,
+       ARRAY_LEVEL_STARTED,
+       ARRAY_ELEM_STARTED,
+       ARRAY_ELEM_COMPLETED,
+       ARRAY_QUOTED_ELEM_STARTED,
+       ARRAY_QUOTED_ELEM_COMPLETED,
+       ARRAY_ELEM_DELIMITED,
+       ARRAY_LEVEL_COMPLETED,
+       ARRAY_LEVEL_DELIMITED
+} ArrayParseState;
+
 static int
 ArrayCount(char *str, int *dim, char typdelim)
 {
-       int                     nest_level = 0,
-                               i;
-       int                     ndim = 1,
-                               temp[MAXDIM],
-                               nelems[MAXDIM],
-                               nelems_last[MAXDIM];
-       bool            scanning_string = false;
-       bool            eoArray = false;
-       char       *ptr;
+       int                             nest_level = 0,
+                                       i;
+       int                             ndim = 1,
+                                       temp[MAXDIM],
+                                       nelems[MAXDIM],
+                                       nelems_last[MAXDIM];
+       bool                    scanning_string = false;
+       bool                    eoArray = false;
+       char               *ptr;
+       ArrayParseState parse_state = ARRAY_NO_LEVEL;
 
        for (i = 0; i < MAXDIM; ++i)
        {
@@ -370,6 +384,7 @@ ArrayCount(char *str, int *dim, char typdelim)
                nelems_last[i] = nelems[i] = 1;
        }
 
+       /* special case for an empty array */
        if (strncmp(str, "{}", 2) == 0)
                return 0;
 
@@ -389,6 +404,20 @@ ArrayCount(char *str, int *dim, char typdelim)
                                                errmsg("malformed array literal: \"%s\"", str)));
                                        break;
                                case '\\':
+                                       /*
+                                        * An escape must be after a level start, after an
+                                        * element start, or after an element delimiter. In any
+                                        * case we now must be past an element start.
+                                        */
+                                       if (parse_state != ARRAY_LEVEL_STARTED &&
+                                               parse_state != ARRAY_ELEM_STARTED &&
+                                               parse_state != ARRAY_QUOTED_ELEM_STARTED &&
+                                               parse_state != ARRAY_ELEM_DELIMITED)
+                                               ereport(ERROR,
+                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                       errmsg("malformed array literal: \"%s\"", str)));
+                                       if (parse_state != ARRAY_QUOTED_ELEM_STARTED)
+                                               parse_state = ARRAY_ELEM_STARTED;
                                        /* skip the escaped character */
                                        if (*(ptr + 1))
                                                ptr++;
@@ -398,11 +427,38 @@ ArrayCount(char *str, int *dim, char typdelim)
                                                errmsg("malformed array literal: \"%s\"", str)));
                                        break;
                                case '\"':
+                                       /*
+                                        * A quote must be after a level start, after a quoted
+                                        * element start, or after an element delimiter. In any
+                                        * case we now must be past an element start.
+                                        */
+                                       if (parse_state != ARRAY_LEVEL_STARTED &&
+                                               parse_state != ARRAY_QUOTED_ELEM_STARTED &&
+                                               parse_state != ARRAY_ELEM_DELIMITED)
+                                               ereport(ERROR,
+                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                       errmsg("malformed array literal: \"%s\"", str)));
                                        scanning_string = !scanning_string;
+                                       if (scanning_string)
+                                               parse_state = ARRAY_QUOTED_ELEM_STARTED;
+                                       else
+                                               parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
                                        break;
                                case '{':
                                        if (!scanning_string)
                                        {
+                                               /*
+                                                * A left brace can occur if no nesting has
+                                                * occurred yet, after a level start, or
+                                                * after a level delimiter.
+                                                */
+                                               if (parse_state != ARRAY_NO_LEVEL &&
+                                                       parse_state != ARRAY_LEVEL_STARTED &&
+                                                       parse_state != ARRAY_LEVEL_DELIMITED)
+                                                       ereport(ERROR,
+                                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                               errmsg("malformed array literal: \"%s\"", str)));
+                                               parse_state = ARRAY_LEVEL_STARTED;
                                                if (nest_level >= MAXDIM)
                                                        ereport(ERROR,
                                                                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
@@ -417,6 +473,19 @@ ArrayCount(char *str, int *dim, char typdelim)
                                case '}':
                                        if (!scanning_string)
                                        {
+                                               /*
+                                                * A right brace can occur after an element start,
+                                                * an element completion, a quoted element completion,
+                                                * or a level completion.
+                                                */
+                                               if (parse_state != ARRAY_ELEM_STARTED &&
+                                                       parse_state != ARRAY_ELEM_COMPLETED &&
+                                                       parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
+                                                       parse_state != ARRAY_LEVEL_COMPLETED)
+                                                       ereport(ERROR,
+                                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                               errmsg("malformed array literal: \"%s\"", str)));
+                                               parse_state = ARRAY_LEVEL_COMPLETED;
                                                if (nest_level == 0)
                                                        ereport(ERROR,
                                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
@@ -445,10 +514,45 @@ ArrayCount(char *str, int *dim, char typdelim)
                                        }
                                        break;
                                default:
-                                       if (*ptr == typdelim && !scanning_string)
+                                       if (!scanning_string)
                                        {
-                                               itemdone = true;
-                                               nelems[nest_level - 1]++;
+                                               if (*ptr == typdelim)
+                                               {
+                                                       /*
+                                                       * Delimiters can occur after an element start,
+                                                       * an element completion, a quoted element
+                                                       * completion, or a level completion.
+                                                       */
+                                                       if (parse_state != ARRAY_ELEM_STARTED &&
+                                                               parse_state != ARRAY_ELEM_COMPLETED &&
+                                                               parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
+                                                               parse_state != ARRAY_LEVEL_COMPLETED)
+                                                               ereport(ERROR,
+                                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                                       errmsg("malformed array literal: \"%s\"", str)));
+                                                       if (parse_state == ARRAY_LEVEL_COMPLETED)
+                                                               parse_state = ARRAY_LEVEL_DELIMITED;
+                                                       else
+                                                               parse_state = ARRAY_ELEM_DELIMITED;
+                                                       itemdone = true;
+                                                       nelems[nest_level - 1]++;
+                                               }
+                                               else if (!isspace(*ptr))
+                                               {
+                                                       /*
+                                                       * Other non-space characters must be after a level
+                                                       * start, after an element start, or after an element
+                                                       * delimiter. In any case we now must be past an
+                                                       * element start.
+                                                       */
+                                                       if (parse_state != ARRAY_LEVEL_STARTED &&
+                                                               parse_state != ARRAY_ELEM_STARTED &&
+                                                               parse_state != ARRAY_ELEM_DELIMITED)
+                                                               ereport(ERROR,
+                                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                                       errmsg("malformed array literal: \"%s\"", str)));
+                                                       parse_state = ARRAY_ELEM_STARTED;
+                                               }
                                        }
                                        break;
                        }
@@ -511,12 +615,15 @@ ReadArrayStr(char *arrayStr,
        while (!eoArray)
        {
                bool            itemdone = false;
+               bool            itemquoted = false;
                int                     i = -1;
                char       *itemstart;
+               char       *eptr;
 
                /* skip leading whitespace */
                while (isspace((unsigned char) *ptr))
                        ptr++;
+
                itemstart = ptr;
 
                while (!itemdone)
@@ -547,11 +654,15 @@ ReadArrayStr(char *arrayStr,
                                                char       *cptr;
 
                                                scanning_string = !scanning_string;
-                                               /* Crunch the string on top of the quote. */
-                                               for (cptr = ptr; *cptr != '\0'; cptr++)
-                                                       *cptr = *(cptr + 1);
-                                               /* Back up to not miss following character. */
-                                               ptr--;
+                                               if (scanning_string)
+                                               {
+                                                       itemquoted = true;
+                                                       /* Crunch the string on top of the first quote. */
+                                                       for (cptr = ptr; *cptr != '\0'; cptr++)
+                                                               *cptr = *(cptr + 1);
+                                                       /* Back up to not miss following character. */
+                                                       ptr--;
+                                               }
                                                break;
                                        }
                                case '{':
@@ -615,6 +726,25 @@ ReadArrayStr(char *arrayStr,
                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                   errmsg("malformed array literal: \"%s\"", arrayStr)));
 
+               /*
+                * skip trailing whitespace
+                */
+               eptr = ptr - 1;
+               if (!itemquoted)
+               {
+                       /* skip to last non-NULL, non-space, character */
+                       while ((*eptr == '\0') || (isspace((unsigned char) *eptr)))
+                               eptr--;
+                       *(++eptr) = '\0';
+               }
+               else
+               {
+                       /* skip to last quote character */
+                       while (*eptr != '"')
+                               eptr--;
+                       *eptr = '\0';
+               }
+
                values[i] = FunctionCall3(inputproc,
                                                                  CStringGetDatum(itemstart),
                                                                  ObjectIdGetDatum(typioparam),