OSDN Git Service

Add regression tests for CSV and \., and add automatic quoting of a
authorBruce Momjian <bruce@momjian.us>
Wed, 28 Dec 2005 03:25:32 +0000 (03:25 +0000)
committerBruce Momjian <bruce@momjian.us>
Wed, 28 Dec 2005 03:25:32 +0000 (03:25 +0000)
single column dump that has a \. value, so the load works properly.  I
also added documentation describing this issue.

doc/src/sgml/ref/copy.sgml
src/backend/commands/copy.c
src/test/regress/expected/copy2.out
src/test/regress/sql/copy2.sql

index 2d8dc94..becaecf 100644 (file)
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.70 2005/10/15 20:12:33 neilc Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.71 2005/12/28 03:25:32 momjian Exp $
 PostgreSQL documentation
 -->
 
@@ -511,17 +511,28 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
     comparisons for specific columns.
    </para>
 
+   <para> 
+    Because backslash is not a special character in the <literal>CSV</>
+    format, <literal>\.</>, the end-of-data marker, could also appear
+    as a data value.  To avoid any misinterpretation, a <literal>\.</>
+    data value appearing as a lone entry on a line is automatically 
+    quoted on output, and on input, if quoted, is not interpreted as the 
+    end-of-data marker.  If you are loading a single-column table that
+    might have a column value of <literal>\.</>, you might need to quote
+    that value in the input file.
+   </para>
+
    <note>
-       <para>
-        In <literal>CSV</> mode, all characters are significant. A quoted value 
-        surrounded by white space, or any characters other than 
-        <literal>DELIMITER</>, will include those characters. This can cause 
-        errors if  you import data from a system that pads <literal>CSV</> 
-        lines with white space out to some fixed width. If such a situation 
-        arises you might need to preprocess the <literal>CSV</> file to remove 
-        the trailing white space, before importing the data into 
-        <productname>PostgreSQL</>. 
-       </para>
+    <para>
+     In <literal>CSV</> mode, all characters are significant. A quoted value 
+     surrounded by white space, or any characters other than 
+     <literal>DELIMITER</>, will include those characters. This can cause 
+     errors if  you import data from a system that pads <literal>CSV</> 
+     lines with white space out to some fixed width. If such a situation 
+     arises you might need to preprocess the <literal>CSV</> file to remove 
+     the trailing white space, before importing the data into 
+     <productname>PostgreSQL</>. 
+    </para>
    </note>
 
    <note>
index ae1d40e..f97aafc 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.256 2005/12/27 18:10:48 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.257 2005/12/28 03:25:32 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -244,7 +244,7 @@ static Datum CopyReadBinaryAttribute(CopyState cstate,
                                                bool *isnull);
 static void CopyAttributeOutText(CopyState cstate, char *server_string);
 static void CopyAttributeOutCSV(CopyState cstate, char *server_string,
-                                       bool use_quote);
+                                       bool use_quote, bool single_attr);
 static List *CopyGetAttnums(Relation rel, List *attnamelist);
 static char *limit_printout_length(const char *str);
 
@@ -1284,7 +1284,8 @@ CopyTo(CopyState cstate)
 
                                colname = NameStr(attr[attnum - 1]->attname);
 
-                               CopyAttributeOutCSV(cstate, colname, false);
+                               CopyAttributeOutCSV(cstate, colname, false,
+                                                                       list_length(cstate->attnumlist) == 1);
                        }
 
                        CopySendEndOfRow(cstate);
@@ -1359,7 +1360,8 @@ CopyTo(CopyState cstate)
                                                                                                                   value));
                                        if (cstate->csv_mode)
                                                CopyAttributeOutCSV(cstate, string,
-                                                                                       force_quote[attnum - 1]);
+                                                                                       force_quote[attnum - 1],
+                                                                                       list_length(cstate->attnumlist) == 1);
                                        else
                                                CopyAttributeOutText(cstate, string);
                                }
@@ -2968,7 +2970,7 @@ CopyAttributeOutText(CopyState cstate, char *server_string)
  */
 static void
 CopyAttributeOutCSV(CopyState cstate, char *server_string,
-                                       bool use_quote)
+                                       bool use_quote, bool single_attr)
 {
        char       *string;
        char            c;
@@ -2993,17 +2995,27 @@ CopyAttributeOutCSV(CopyState cstate, char *server_string,
         */
        if (!use_quote)
        {
-               for (tstring = string; (c = *tstring) != '\0'; tstring += mblen)
-               {
-                       if (c == delimc || c == quotec || c == '\n' || c == '\r')
+               /*
+                *      Because '\.' can be a data value, quote it if it appears
+                *      alone on a line so it is not interpreted as the end-of-data
+                *      marker.
+                */
+               if (single_attr && strcmp(string, "\\.") == 0)
+                       use_quote = true;
+               else
+               {
+                       for (tstring = string; (c = *tstring) != '\0'; tstring += mblen)
                        {
-                               use_quote = true;
-                               break;
+                               if (c == delimc || c == quotec || c == '\n' || c == '\r')
+                               {
+                                       use_quote = true;
+                                       break;
+                               }
+                               if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
+                                       mblen = pg_encoding_mblen(cstate->client_encoding, tstring);
+                               else
+                                       mblen = 1;
                        }
-                       if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
-                               mblen = pg_encoding_mblen(cstate->client_encoding, tstring);
-                       else
-                               mblen = 1;
                }
        }
 
index 78f2060..524e88c 100644 (file)
@@ -194,6 +194,9 @@ COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\';
 --test that we read consecutive LFs properly
 CREATE TEMP TABLE testnl (a int, b text, c int);
 COPY testnl FROM stdin CSV;
+-- test end of copy marker
+CREATE TEMP TABLE testeoc (a text);
+COPY testeoc FROM stdin CSV;
 DROP TABLE x, y;
 DROP FUNCTION fn_x_before();
 DROP FUNCTION fn_x_after();
index add8214..d962d2e 100644 (file)
@@ -139,6 +139,16 @@ COPY testnl FROM stdin CSV;
 inside",2
 \.
 
+-- test end of copy marker
+CREATE TEMP TABLE testeoc (a text);
+
+COPY testeoc FROM stdin CSV;
+a\.
+\.b
+c\.d
+"\."
+\.
+
 
 DROP TABLE x, y;
 DROP FUNCTION fn_x_before();