OSDN Git Service

9f7263d59a54107afdf0fd44a79154e32225ad2c
[pg-rex/syncrep.git] / src / backend / commands / copy.c
1 /*-------------------------------------------------------------------------
2  *
3  * copy.c
4  *              Implements the COPY utility command
5  *
6  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/commands/copy.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include <ctype.h>
18 #include <unistd.h>
19 #include <sys/stat.h>
20 #include <netinet/in.h>
21 #include <arpa/inet.h>
22
23 #include "access/heapam.h"
24 #include "access/sysattr.h"
25 #include "access/xact.h"
26 #include "catalog/namespace.h"
27 #include "catalog/pg_type.h"
28 #include "commands/copy.h"
29 #include "commands/defrem.h"
30 #include "commands/trigger.h"
31 #include "executor/executor.h"
32 #include "libpq/libpq.h"
33 #include "libpq/pqformat.h"
34 #include "mb/pg_wchar.h"
35 #include "miscadmin.h"
36 #include "optimizer/planner.h"
37 #include "parser/parse_relation.h"
38 #include "rewrite/rewriteHandler.h"
39 #include "storage/fd.h"
40 #include "tcop/tcopprot.h"
41 #include "utils/acl.h"
42 #include "utils/builtins.h"
43 #include "utils/lsyscache.h"
44 #include "utils/memutils.h"
45 #include "utils/snapmgr.h"
46
47
48 #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
49 #define OCTVALUE(c) ((c) - '0')
50
51 /*
52  * Represents the different source/dest cases we need to worry about at
53  * the bottom level
54  */
55 typedef enum CopyDest
56 {
57         COPY_FILE,                                      /* to/from file */
58         COPY_OLD_FE,                            /* to/from frontend (2.0 protocol) */
59         COPY_NEW_FE                                     /* to/from frontend (3.0 protocol) */
60 } CopyDest;
61
62 /*
63  *      Represents the end-of-line terminator type of the input
64  */
65 typedef enum EolType
66 {
67         EOL_UNKNOWN,
68         EOL_NL,
69         EOL_CR,
70         EOL_CRNL
71 } EolType;
72
73 /*
74  * This struct contains all the state variables used throughout a COPY
75  * operation. For simplicity, we use the same struct for all variants of COPY,
76  * even though some fields are used in only some cases.
77  *
78  * Multi-byte encodings: all supported client-side encodings encode multi-byte
79  * characters by having the first byte's high bit set. Subsequent bytes of the
80  * character can have the high bit not set. When scanning data in such an
81  * encoding to look for a match to a single-byte (ie ASCII) character, we must
82  * use the full pg_encoding_mblen() machinery to skip over multibyte
83  * characters, else we might find a false match to a trailing byte. In
84  * supported server encodings, there is no possibility of a false match, and
85  * it's faster to make useless comparisons to trailing bytes than it is to
86  * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE
87  * when we have to do it the hard way.
88  */
89 typedef struct CopyStateData
90 {
91         /* low-level state data */
92         CopyDest        copy_dest;              /* type of copy source/destination */
93         FILE       *copy_file;          /* used if copy_dest == COPY_FILE */
94         StringInfo      fe_msgbuf;              /* used for all dests during COPY TO, only for
95                                                                  * dest == COPY_NEW_FE in COPY FROM */
96         bool            fe_eof;                 /* true if detected end of copy data */
97         EolType         eol_type;               /* EOL type of input */
98         int                     client_encoding;        /* remote side's character encoding */
99         bool            need_transcoding;               /* client encoding diff from server? */
100         bool            encoding_embeds_ascii;  /* ASCII can be non-first byte? */
101
102         /* parameters from the COPY command */
103         Relation        rel;                    /* relation to copy to or from */
104         QueryDesc  *queryDesc;          /* executable query to copy from */
105         List       *attnumlist;         /* integer list of attnums to copy */
106         char       *filename;           /* filename, or NULL for STDIN/STDOUT */
107         bool            binary;                 /* binary format? */
108         bool            oids;                   /* include OIDs? */
109         bool            csv_mode;               /* Comma Separated Value format? */
110         bool            header_line;    /* CSV header line? */
111         char       *null_print;         /* NULL marker string (server encoding!) */
112         int                     null_print_len; /* length of same */
113         char       *null_print_client;          /* same converted to client encoding */
114         char       *delim;                      /* column delimiter (must be 1 byte) */
115         char       *quote;                      /* CSV quote char (must be 1 byte) */
116         char       *escape;                     /* CSV escape char (must be 1 byte) */
117         bool       *force_quote_flags;          /* per-column CSV FQ flags */
118         bool       *force_notnull_flags;        /* per-column CSV FNN flags */
119
120         /* these are just for error messages, see CopyFromErrorCallback */
121         const char *cur_relname;        /* table name for error messages */
122         int                     cur_lineno;             /* line number for error messages */
123         const char *cur_attname;        /* current att for error messages */
124         const char *cur_attval;         /* current att value for error messages */
125
126         /*
127          * Working state for COPY TO/FROM
128          */
129         MemoryContext copycontext;      /* per-copy execution context */
130
131         /*
132          * Working state for COPY TO
133          */
134         FmgrInfo   *out_functions;      /* lookup info for output functions */
135         MemoryContext rowcontext;       /* per-row evaluation context */
136
137         /*
138          * Working state for COPY FROM
139          */
140         AttrNumber      num_defaults;
141         bool            file_has_oids;
142         FmgrInfo        oid_in_function;
143         Oid                     oid_typioparam;
144         FmgrInfo   *in_functions;       /* array of input functions for each attrs */
145         Oid                *typioparams;        /* array of element types for in_functions */
146         int                *defmap;                     /* array of default att numbers */
147         ExprState **defexprs;           /* array of default att expressions */
148
149         /*
150          * These variables are used to reduce overhead in textual COPY FROM.
151          *
152          * attribute_buf holds the separated, de-escaped text for each field of
153          * the current line.  The CopyReadAttributes functions return arrays of
154          * pointers into this buffer.  We avoid palloc/pfree overhead by re-using
155          * the buffer on each cycle.
156          */
157         StringInfoData attribute_buf;
158
159         /* field raw data pointers found by COPY FROM */
160
161         int max_fields;
162         char ** raw_fields;
163
164         /*
165          * Similarly, line_buf holds the whole input line being processed. The
166          * input cycle is first to read the whole line into line_buf, convert it
167          * to server encoding there, and then extract the individual attribute
168          * fields into attribute_buf.  line_buf is preserved unmodified so that we
169          * can display it in error messages if appropriate.
170          */
171         StringInfoData line_buf;
172         bool            line_buf_converted;             /* converted to server encoding? */
173
174         /*
175          * Finally, raw_buf holds raw data read from the data source (file or
176          * client connection).  CopyReadLine parses this data sufficiently to
177          * locate line boundaries, then transfers the data to line_buf and
178          * converts it.  Note: we guarantee that there is a \0 at
179          * raw_buf[raw_buf_len].
180          */
181 #define RAW_BUF_SIZE 65536              /* we palloc RAW_BUF_SIZE+1 bytes */
182         char       *raw_buf;
183         int                     raw_buf_index;  /* next byte to process */
184         int                     raw_buf_len;    /* total # of bytes stored */
185 } CopyStateData;
186
187 /* DestReceiver for COPY (SELECT) TO */
188 typedef struct
189 {
190         DestReceiver pub;                       /* publicly-known function pointers */
191         CopyState       cstate;                 /* CopyStateData for the command */
192         uint64          processed;              /* # of tuples processed */
193 } DR_copy;
194
195
196 /*
197  * These macros centralize code used to process line_buf and raw_buf buffers.
198  * They are macros because they often do continue/break control and to avoid
199  * function call overhead in tight COPY loops.
200  *
201  * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
202  * prevent the continue/break processing from working.  We end the "if (1)"
203  * with "else ((void) 0)" to ensure the "if" does not unintentionally match
204  * any "else" in the calling code, and to avoid any compiler warnings about
205  * empty statements.  See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
206  */
207
208 /*
209  * This keeps the character read at the top of the loop in the buffer
210  * even if there is more than one read-ahead.
211  */
212 #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
213 if (1) \
214 { \
215         if (raw_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
216         { \
217                 raw_buf_ptr = prev_raw_ptr; /* undo fetch */ \
218                 need_data = true; \
219                 continue; \
220         } \
221 } else ((void) 0)
222
223 /* This consumes the remainder of the buffer and breaks */
224 #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
225 if (1) \
226 { \
227         if (raw_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
228         { \
229                 if (extralen) \
230                         raw_buf_ptr = copy_buf_len; /* consume the partial character */ \
231                 /* backslash just before EOF, treat as data char */ \
232                 result = true; \
233                 break; \
234         } \
235 } else ((void) 0)
236
237 /*
238  * Transfer any approved data to line_buf; must do this to be sure
239  * there is some room in raw_buf.
240  */
241 #define REFILL_LINEBUF \
242 if (1) \
243 { \
244         if (raw_buf_ptr > cstate->raw_buf_index) \
245         { \
246                 appendBinaryStringInfo(&cstate->line_buf, \
247                                                          cstate->raw_buf + cstate->raw_buf_index, \
248                                                            raw_buf_ptr - cstate->raw_buf_index); \
249                 cstate->raw_buf_index = raw_buf_ptr; \
250         } \
251 } else ((void) 0)
252
253 /* Undo any read-ahead and jump out of the block. */
254 #define NO_END_OF_COPY_GOTO \
255 if (1) \
256 { \
257         raw_buf_ptr = prev_raw_ptr + 1; \
258         goto not_end_of_copy; \
259 } else ((void) 0)
260
261 static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
262
263
264 /* non-export function prototypes */
265 static CopyState BeginCopy(bool is_from, Relation rel, Node *raw_query,
266                                 const char *queryString, List *attnamelist, List *options);
267 static void EndCopy(CopyState cstate);
268 static CopyState BeginCopyTo(Relation rel, Node *query, const char *queryString,
269                                 const char *filename, List *attnamelist, List *options);
270 static void EndCopyTo(CopyState cstate);
271 static uint64 DoCopyTo(CopyState cstate);
272 static uint64 CopyTo(CopyState cstate);
273 static void CopyOneRowTo(CopyState cstate, Oid tupleOid,
274                          Datum *values, bool *nulls);
275 static uint64 CopyFrom(CopyState cstate);
276 static bool CopyReadLine(CopyState cstate);
277 static bool CopyReadLineText(CopyState cstate);
278 static int CopyReadAttributesText(CopyState cstate);
279 static int CopyReadAttributesCSV(CopyState cstate);
280 static Datum CopyReadBinaryAttribute(CopyState cstate,
281                                                 int column_no, FmgrInfo *flinfo,
282                                                 Oid typioparam, int32 typmod,
283                                                 bool *isnull);
284 static void CopyAttributeOutText(CopyState cstate, char *string);
285 static void CopyAttributeOutCSV(CopyState cstate, char *string,
286                                         bool use_quote, bool single_attr);
287 static List *CopyGetAttnums(TupleDesc tupDesc, Relation rel,
288                            List *attnamelist);
289 static char *limit_printout_length(const char *str);
290
291 /* Low-level communications functions */
292 static void SendCopyBegin(CopyState cstate);
293 static void ReceiveCopyBegin(CopyState cstate);
294 static void SendCopyEnd(CopyState cstate);
295 static void CopySendData(CopyState cstate, void *databuf, int datasize);
296 static void CopySendString(CopyState cstate, const char *str);
297 static void CopySendChar(CopyState cstate, char c);
298 static void CopySendEndOfRow(CopyState cstate);
299 static int CopyGetData(CopyState cstate, void *databuf,
300                         int minread, int maxread);
301 static void CopySendInt32(CopyState cstate, int32 val);
302 static bool CopyGetInt32(CopyState cstate, int32 *val);
303 static void CopySendInt16(CopyState cstate, int16 val);
304 static bool CopyGetInt16(CopyState cstate, int16 *val);
305
306
307 /*
308  * Send copy start/stop messages for frontend copies.  These have changed
309  * in past protocol redesigns.
310  */
311 static void
312 SendCopyBegin(CopyState cstate)
313 {
314         if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
315         {
316                 /* new way */
317                 StringInfoData buf;
318                 int                     natts = list_length(cstate->attnumlist);
319                 int16           format = (cstate->binary ? 1 : 0);
320                 int                     i;
321
322                 pq_beginmessage(&buf, 'H');
323                 pq_sendbyte(&buf, format);              /* overall format */
324                 pq_sendint(&buf, natts, 2);
325                 for (i = 0; i < natts; i++)
326                         pq_sendint(&buf, format, 2);            /* per-column formats */
327                 pq_endmessage(&buf);
328                 cstate->copy_dest = COPY_NEW_FE;
329         }
330         else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
331         {
332                 /* old way */
333                 if (cstate->binary)
334                         ereport(ERROR,
335                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
336                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
337                 pq_putemptymessage('H');
338                 /* grottiness needed for old COPY OUT protocol */
339                 pq_startcopyout();
340                 cstate->copy_dest = COPY_OLD_FE;
341         }
342         else
343         {
344                 /* very old way */
345                 if (cstate->binary)
346                         ereport(ERROR,
347                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
348                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
349                 pq_putemptymessage('B');
350                 /* grottiness needed for old COPY OUT protocol */
351                 pq_startcopyout();
352                 cstate->copy_dest = COPY_OLD_FE;
353         }
354 }
355
356 static void
357 ReceiveCopyBegin(CopyState cstate)
358 {
359         if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
360         {
361                 /* new way */
362                 StringInfoData buf;
363                 int                     natts = list_length(cstate->attnumlist);
364                 int16           format = (cstate->binary ? 1 : 0);
365                 int                     i;
366
367                 pq_beginmessage(&buf, 'G');
368                 pq_sendbyte(&buf, format);              /* overall format */
369                 pq_sendint(&buf, natts, 2);
370                 for (i = 0; i < natts; i++)
371                         pq_sendint(&buf, format, 2);            /* per-column formats */
372                 pq_endmessage(&buf);
373                 cstate->copy_dest = COPY_NEW_FE;
374                 cstate->fe_msgbuf = makeStringInfo();
375         }
376         else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
377         {
378                 /* old way */
379                 if (cstate->binary)
380                         ereport(ERROR,
381                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
382                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
383                 pq_putemptymessage('G');
384                 cstate->copy_dest = COPY_OLD_FE;
385         }
386         else
387         {
388                 /* very old way */
389                 if (cstate->binary)
390                         ereport(ERROR,
391                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
392                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
393                 pq_putemptymessage('D');
394                 cstate->copy_dest = COPY_OLD_FE;
395         }
396         /* We *must* flush here to ensure FE knows it can send. */
397         pq_flush();
398 }
399
400 static void
401 SendCopyEnd(CopyState cstate)
402 {
403         if (cstate->copy_dest == COPY_NEW_FE)
404         {
405                 /* Shouldn't have any unsent data */
406                 Assert(cstate->fe_msgbuf->len == 0);
407                 /* Send Copy Done message */
408                 pq_putemptymessage('c');
409         }
410         else
411         {
412                 CopySendData(cstate, "\\.", 2);
413                 /* Need to flush out the trailer (this also appends a newline) */
414                 CopySendEndOfRow(cstate);
415                 pq_endcopyout(false);
416         }
417 }
418
419 /*----------
420  * CopySendData sends output data to the destination (file or frontend)
421  * CopySendString does the same for null-terminated strings
422  * CopySendChar does the same for single characters
423  * CopySendEndOfRow does the appropriate thing at end of each data row
424  *      (data is not actually flushed except by CopySendEndOfRow)
425  *
426  * NB: no data conversion is applied by these functions
427  *----------
428  */
429 static void
430 CopySendData(CopyState cstate, void *databuf, int datasize)
431 {
432         appendBinaryStringInfo(cstate->fe_msgbuf, (char *) databuf, datasize);
433 }
434
435 static void
436 CopySendString(CopyState cstate, const char *str)
437 {
438         appendBinaryStringInfo(cstate->fe_msgbuf, str, strlen(str));
439 }
440
441 static void
442 CopySendChar(CopyState cstate, char c)
443 {
444         appendStringInfoCharMacro(cstate->fe_msgbuf, c);
445 }
446
447 static void
448 CopySendEndOfRow(CopyState cstate)
449 {
450         StringInfo      fe_msgbuf = cstate->fe_msgbuf;
451
452         switch (cstate->copy_dest)
453         {
454                 case COPY_FILE:
455                         if (!cstate->binary)
456                         {
457                                 /* Default line termination depends on platform */
458 #ifndef WIN32
459                                 CopySendChar(cstate, '\n');
460 #else
461                                 CopySendString(cstate, "\r\n");
462 #endif
463                         }
464
465                         (void) fwrite(fe_msgbuf->data, fe_msgbuf->len,
466                                                   1, cstate->copy_file);
467                         if (ferror(cstate->copy_file))
468                                 ereport(ERROR,
469                                                 (errcode_for_file_access(),
470                                                  errmsg("could not write to COPY file: %m")));
471                         break;
472                 case COPY_OLD_FE:
473                         /* The FE/BE protocol uses \n as newline for all platforms */
474                         if (!cstate->binary)
475                                 CopySendChar(cstate, '\n');
476
477                         if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
478                         {
479                                 /* no hope of recovering connection sync, so FATAL */
480                                 ereport(FATAL,
481                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
482                                                  errmsg("connection lost during COPY to stdout")));
483                         }
484                         break;
485                 case COPY_NEW_FE:
486                         /* The FE/BE protocol uses \n as newline for all platforms */
487                         if (!cstate->binary)
488                                 CopySendChar(cstate, '\n');
489
490                         /* Dump the accumulated row as one CopyData message */
491                         (void) pq_putmessage('d', fe_msgbuf->data, fe_msgbuf->len);
492                         break;
493         }
494
495         resetStringInfo(fe_msgbuf);
496 }
497
498 /*
499  * CopyGetData reads data from the source (file or frontend)
500  *
501  * We attempt to read at least minread, and at most maxread, bytes from
502  * the source.  The actual number of bytes read is returned; if this is
503  * less than minread, EOF was detected.
504  *
505  * Note: when copying from the frontend, we expect a proper EOF mark per
506  * protocol; if the frontend simply drops the connection, we raise error.
507  * It seems unwise to allow the COPY IN to complete normally in that case.
508  *
509  * NB: no data conversion is applied here.
510  */
511 static int
512 CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
513 {
514         int                     bytesread = 0;
515
516         switch (cstate->copy_dest)
517         {
518                 case COPY_FILE:
519                         bytesread = fread(databuf, 1, maxread, cstate->copy_file);
520                         if (ferror(cstate->copy_file))
521                                 ereport(ERROR,
522                                                 (errcode_for_file_access(),
523                                                  errmsg("could not read from COPY file: %m")));
524                         break;
525                 case COPY_OLD_FE:
526
527                         /*
528                          * We cannot read more than minread bytes (which in practice is 1)
529                          * because old protocol doesn't have any clear way of separating
530                          * the COPY stream from following data.  This is slow, but not any
531                          * slower than the code path was originally, and we don't care
532                          * much anymore about the performance of old protocol.
533                          */
534                         if (pq_getbytes((char *) databuf, minread))
535                         {
536                                 /* Only a \. terminator is legal EOF in old protocol */
537                                 ereport(ERROR,
538                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
539                                                  errmsg("unexpected EOF on client connection")));
540                         }
541                         bytesread = minread;
542                         break;
543                 case COPY_NEW_FE:
544                         while (maxread > 0 && bytesread < minread && !cstate->fe_eof)
545                         {
546                                 int                     avail;
547
548                                 while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
549                                 {
550                                         /* Try to receive another message */
551                                         int                     mtype;
552
553                         readmessage:
554                                         mtype = pq_getbyte();
555                                         if (mtype == EOF)
556                                                 ereport(ERROR,
557                                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
558                                                          errmsg("unexpected EOF on client connection")));
559                                         if (pq_getmessage(cstate->fe_msgbuf, 0))
560                                                 ereport(ERROR,
561                                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
562                                                          errmsg("unexpected EOF on client connection")));
563                                         switch (mtype)
564                                         {
565                                                 case 'd':               /* CopyData */
566                                                         break;
567                                                 case 'c':               /* CopyDone */
568                                                         /* COPY IN correctly terminated by frontend */
569                                                         cstate->fe_eof = true;
570                                                         return bytesread;
571                                                 case 'f':               /* CopyFail */
572                                                         ereport(ERROR,
573                                                                         (errcode(ERRCODE_QUERY_CANCELED),
574                                                                          errmsg("COPY from stdin failed: %s",
575                                                                            pq_getmsgstring(cstate->fe_msgbuf))));
576                                                         break;
577                                                 case 'H':               /* Flush */
578                                                 case 'S':               /* Sync */
579
580                                                         /*
581                                                          * Ignore Flush/Sync for the convenience of client
582                                                          * libraries (such as libpq) that may send those
583                                                          * without noticing that the command they just
584                                                          * sent was COPY.
585                                                          */
586                                                         goto readmessage;
587                                                 default:
588                                                         ereport(ERROR,
589                                                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
590                                                                          errmsg("unexpected message type 0x%02X during COPY from stdin",
591                                                                                         mtype)));
592                                                         break;
593                                         }
594                                 }
595                                 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
596                                 if (avail > maxread)
597                                         avail = maxread;
598                                 pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
599                                 databuf = (void *) ((char *) databuf + avail);
600                                 maxread -= avail;
601                                 bytesread += avail;
602                         }
603                         break;
604         }
605
606         return bytesread;
607 }
608
609
610 /*
611  * These functions do apply some data conversion
612  */
613
614 /*
615  * CopySendInt32 sends an int32 in network byte order
616  */
617 static void
618 CopySendInt32(CopyState cstate, int32 val)
619 {
620         uint32          buf;
621
622         buf = htonl((uint32) val);
623         CopySendData(cstate, &buf, sizeof(buf));
624 }
625
626 /*
627  * CopyGetInt32 reads an int32 that appears in network byte order
628  *
629  * Returns true if OK, false if EOF
630  */
631 static bool
632 CopyGetInt32(CopyState cstate, int32 *val)
633 {
634         uint32          buf;
635
636         if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
637         {
638                 *val = 0;                               /* suppress compiler warning */
639                 return false;
640         }
641         *val = (int32) ntohl(buf);
642         return true;
643 }
644
645 /*
646  * CopySendInt16 sends an int16 in network byte order
647  */
648 static void
649 CopySendInt16(CopyState cstate, int16 val)
650 {
651         uint16          buf;
652
653         buf = htons((uint16) val);
654         CopySendData(cstate, &buf, sizeof(buf));
655 }
656
657 /*
658  * CopyGetInt16 reads an int16 that appears in network byte order
659  */
660 static bool
661 CopyGetInt16(CopyState cstate, int16 *val)
662 {
663         uint16          buf;
664
665         if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
666         {
667                 *val = 0;                               /* suppress compiler warning */
668                 return false;
669         }
670         *val = (int16) ntohs(buf);
671         return true;
672 }
673
674
675 /*
676  * CopyLoadRawBuf loads some more data into raw_buf
677  *
678  * Returns TRUE if able to obtain at least one more byte, else FALSE.
679  *
680  * If raw_buf_index < raw_buf_len, the unprocessed bytes are transferred
681  * down to the start of the buffer and then we load more data after that.
682  * This case is used only when a frontend multibyte character crosses a
683  * bufferload boundary.
684  */
685 static bool
686 CopyLoadRawBuf(CopyState cstate)
687 {
688         int                     nbytes;
689         int                     inbytes;
690
691         if (cstate->raw_buf_index < cstate->raw_buf_len)
692         {
693                 /* Copy down the unprocessed data */
694                 nbytes = cstate->raw_buf_len - cstate->raw_buf_index;
695                 memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
696                                 nbytes);
697         }
698         else
699                 nbytes = 0;                             /* no data need be saved */
700
701         inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
702                                                   1, RAW_BUF_SIZE - nbytes);
703         nbytes += inbytes;
704         cstate->raw_buf[nbytes] = '\0';
705         cstate->raw_buf_index = 0;
706         cstate->raw_buf_len = nbytes;
707         return (inbytes > 0);
708 }
709
710
711 /*
712  *       DoCopy executes the SQL COPY statement
713  *
714  * Either unload or reload contents of table <relation>, depending on <from>.
715  * (<from> = TRUE means we are inserting into the table.)  In the "TO" case
716  * we also support copying the output of an arbitrary SELECT query.
717  *
718  * If <pipe> is false, transfer is between the table and the file named
719  * <filename>.  Otherwise, transfer is between the table and our regular
720  * input/output stream. The latter could be either stdin/stdout or a
721  * socket, depending on whether we're running under Postmaster control.
722  *
723  * Do not allow a Postgres user without superuser privilege to read from
724  * or write to a file.
725  *
726  * Do not allow the copy if user doesn't have proper permission to access
727  * the table or the specifically requested columns.
728  */
729 uint64
730 DoCopy(const CopyStmt *stmt, const char *queryString)
731 {
732         CopyState       cstate;
733         bool            is_from = stmt->is_from;
734         bool            pipe = (stmt->filename == NULL);
735         Relation        rel;
736         uint64          processed;
737
738         /* Disallow file COPY except to superusers. */
739         if (!pipe && !superuser())
740                 ereport(ERROR,
741                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
742                                  errmsg("must be superuser to COPY to or from a file"),
743                                  errhint("Anyone can COPY to stdout or from stdin. "
744                                                  "psql's \\copy command also works for anyone.")));
745
746         if (stmt->relation)
747         {
748                 TupleDesc               tupDesc;
749                 AclMode                 required_access = (is_from ? ACL_INSERT : ACL_SELECT);
750                 RangeTblEntry  *rte;
751                 List               *attnums;
752                 ListCell           *cur;
753
754                 Assert(!stmt->query);
755
756                 /* Open and lock the relation, using the appropriate lock type. */
757                 rel = heap_openrv(stmt->relation,
758                                                          (is_from ? RowExclusiveLock : AccessShareLock));
759
760                 rte = makeNode(RangeTblEntry);
761                 rte->rtekind = RTE_RELATION;
762                 rte->relid = RelationGetRelid(rel);
763                 rte->requiredPerms = required_access;
764
765                 tupDesc = RelationGetDescr(rel);
766                 attnums = CopyGetAttnums(tupDesc, rel, stmt->attlist);
767                 foreach (cur, attnums)
768                 {
769                         int             attno = lfirst_int(cur) -
770                                                         FirstLowInvalidHeapAttributeNumber;
771
772                         if (is_from)
773                                 rte->modifiedCols = bms_add_member(rte->modifiedCols, attno);
774                         else
775                                 rte->selectedCols = bms_add_member(rte->selectedCols, attno);
776                 }
777                 ExecCheckRTPerms(list_make1(rte), true);
778         }
779         else
780         {
781                 Assert(stmt->query);
782
783                 rel = NULL;
784         }
785
786         if (is_from)
787         {
788                 /* check read-only transaction */
789                 if (XactReadOnly && rel->rd_backend != MyBackendId)
790                         PreventCommandIfReadOnly("COPY FROM");
791
792                 cstate = BeginCopyFrom(rel, stmt->filename,
793                                                            stmt->attlist, stmt->options);
794                 processed = CopyFrom(cstate);   /* copy from file to database */
795                 EndCopyFrom(cstate);
796         }
797         else
798         {
799                 cstate = BeginCopyTo(rel, stmt->query, queryString, stmt->filename,
800                                                          stmt->attlist, stmt->options);
801                 processed = DoCopyTo(cstate);   /* copy from database to file */
802                 EndCopyTo(cstate);
803         }
804
805         /*
806          * Close the relation. If reading, we can release the AccessShareLock we
807          * got; if writing, we should hold the lock until end of transaction to
808          * ensure that updates will be committed before lock is released.
809          */
810         if (rel != NULL)
811                 heap_close(rel, (is_from ? NoLock : AccessShareLock));
812
813         return processed;
814 }
815
816 /*
817  * Common setup routines used by BeginCopyFrom and BeginCopyTo.
818  *
819  * Iff <binary>, unload or reload in the binary format, as opposed to the
820  * more wasteful but more robust and portable text format.
821  *
822  * Iff <oids>, unload or reload the format that includes OID information.
823  * On input, we accept OIDs whether or not the table has an OID column,
824  * but silently drop them if it does not.  On output, we report an error
825  * if the user asks for OIDs in a table that has none (not providing an
826  * OID column might seem friendlier, but could seriously confuse programs).
827  *
828  * If in the text format, delimit columns with delimiter <delim> and print
829  * NULL values as <null_print>.
830  */
831 static CopyState
832 BeginCopy(bool is_from,
833                   Relation rel,
834                   Node *raw_query,
835                   const char *queryString,
836                   List *attnamelist,
837                   List *options)
838 {
839         CopyState       cstate;
840         List       *force_quote = NIL;
841         List       *force_notnull = NIL;
842         bool            force_quote_all = false;
843         bool            format_specified = false;
844         ListCell   *option;
845         TupleDesc       tupDesc;
846         int                     num_phys_attrs;
847         MemoryContext oldcontext;
848
849         /* Allocate workspace and zero all fields */
850         cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
851
852         /*
853          * We allocate everything used by a cstate in a new memory context.
854          * This would avoid memory leaks repeated uses of COPY in a query.
855          */
856         cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
857                                                                                                 "COPY",
858                                                                                                 ALLOCSET_DEFAULT_MINSIZE,
859                                                                                                 ALLOCSET_DEFAULT_INITSIZE,
860                                                                                                 ALLOCSET_DEFAULT_MAXSIZE);
861
862         oldcontext = MemoryContextSwitchTo(cstate->copycontext);
863
864         /* Extract options from the statement node tree */
865         foreach(option, options)
866         {
867                 DefElem    *defel = (DefElem *) lfirst(option);
868
869                 if (strcmp(defel->defname, "format") == 0)
870                 {
871                         char       *fmt = defGetString(defel);
872
873                         if (format_specified)
874                                 ereport(ERROR,
875                                                 (errcode(ERRCODE_SYNTAX_ERROR),
876                                                  errmsg("conflicting or redundant options")));
877                         format_specified = true;
878                         if (strcmp(fmt, "text") == 0)
879                                  /* default format */ ;
880                         else if (strcmp(fmt, "csv") == 0)
881                                 cstate->csv_mode = true;
882                         else if (strcmp(fmt, "binary") == 0)
883                                 cstate->binary = true;
884                         else
885                                 ereport(ERROR,
886                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
887                                                  errmsg("COPY format \"%s\" not recognized", fmt)));
888                 }
889                 else if (strcmp(defel->defname, "oids") == 0)
890                 {
891                         if (cstate->oids)
892                                 ereport(ERROR,
893                                                 (errcode(ERRCODE_SYNTAX_ERROR),
894                                                  errmsg("conflicting or redundant options")));
895                         cstate->oids = defGetBoolean(defel);
896                 }
897                 else if (strcmp(defel->defname, "delimiter") == 0)
898                 {
899                         if (cstate->delim)
900                                 ereport(ERROR,
901                                                 (errcode(ERRCODE_SYNTAX_ERROR),
902                                                  errmsg("conflicting or redundant options")));
903                         cstate->delim = defGetString(defel);
904                 }
905                 else if (strcmp(defel->defname, "null") == 0)
906                 {
907                         if (cstate->null_print)
908                                 ereport(ERROR,
909                                                 (errcode(ERRCODE_SYNTAX_ERROR),
910                                                  errmsg("conflicting or redundant options")));
911                         cstate->null_print = defGetString(defel);
912                 }
913                 else if (strcmp(defel->defname, "header") == 0)
914                 {
915                         if (cstate->header_line)
916                                 ereport(ERROR,
917                                                 (errcode(ERRCODE_SYNTAX_ERROR),
918                                                  errmsg("conflicting or redundant options")));
919                         cstate->header_line = defGetBoolean(defel);
920                 }
921                 else if (strcmp(defel->defname, "quote") == 0)
922                 {
923                         if (cstate->quote)
924                                 ereport(ERROR,
925                                                 (errcode(ERRCODE_SYNTAX_ERROR),
926                                                  errmsg("conflicting or redundant options")));
927                         cstate->quote = defGetString(defel);
928                 }
929                 else if (strcmp(defel->defname, "escape") == 0)
930                 {
931                         if (cstate->escape)
932                                 ereport(ERROR,
933                                                 (errcode(ERRCODE_SYNTAX_ERROR),
934                                                  errmsg("conflicting or redundant options")));
935                         cstate->escape = defGetString(defel);
936                 }
937                 else if (strcmp(defel->defname, "force_quote") == 0)
938                 {
939                         if (force_quote || force_quote_all)
940                                 ereport(ERROR,
941                                                 (errcode(ERRCODE_SYNTAX_ERROR),
942                                                  errmsg("conflicting or redundant options")));
943                         if (defel->arg && IsA(defel->arg, A_Star))
944                                 force_quote_all = true;
945                         else if (defel->arg && IsA(defel->arg, List))
946                                 force_quote = (List *) defel->arg;
947                         else
948                                 ereport(ERROR,
949                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
950                                                  errmsg("argument to option \"%s\" must be a list of column names",
951                                                                 defel->defname)));
952                 }
953                 else if (strcmp(defel->defname, "force_not_null") == 0)
954                 {
955                         if (force_notnull)
956                                 ereport(ERROR,
957                                                 (errcode(ERRCODE_SYNTAX_ERROR),
958                                                  errmsg("conflicting or redundant options")));
959                         if (defel->arg && IsA(defel->arg, List))
960                                 force_notnull = (List *) defel->arg;
961                         else
962                                 ereport(ERROR,
963                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
964                                                  errmsg("argument to option \"%s\" must be a list of column names",
965                                                                 defel->defname)));
966                 }
967                 else
968                         ereport(ERROR,
969                                         (errcode(ERRCODE_SYNTAX_ERROR),
970                                          errmsg("option \"%s\" not recognized",
971                                                         defel->defname)));
972         }
973
974         /*
975          * Check for incompatible options (must do these two before inserting
976          * defaults)
977          */
978         if (cstate->binary && cstate->delim)
979                 ereport(ERROR,
980                                 (errcode(ERRCODE_SYNTAX_ERROR),
981                                  errmsg("cannot specify DELIMITER in BINARY mode")));
982
983         if (cstate->binary && cstate->null_print)
984                 ereport(ERROR,
985                                 (errcode(ERRCODE_SYNTAX_ERROR),
986                                  errmsg("cannot specify NULL in BINARY mode")));
987
988         /* Set defaults for omitted options */
989         if (!cstate->delim)
990                 cstate->delim = cstate->csv_mode ? "," : "\t";
991
992         if (!cstate->null_print)
993                 cstate->null_print = cstate->csv_mode ? "" : "\\N";
994         cstate->null_print_len = strlen(cstate->null_print);
995
996         if (cstate->csv_mode)
997         {
998                 if (!cstate->quote)
999                         cstate->quote = "\"";
1000                 if (!cstate->escape)
1001                         cstate->escape = cstate->quote;
1002         }
1003
1004         /* Only single-byte delimiter strings are supported. */
1005         if (strlen(cstate->delim) != 1)
1006                 ereport(ERROR,
1007                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1008                           errmsg("COPY delimiter must be a single one-byte character")));
1009
1010         /* Disallow end-of-line characters */
1011         if (strchr(cstate->delim, '\r') != NULL ||
1012                 strchr(cstate->delim, '\n') != NULL)
1013                 ereport(ERROR,
1014                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1015                          errmsg("COPY delimiter cannot be newline or carriage return")));
1016
1017         if (strchr(cstate->null_print, '\r') != NULL ||
1018                 strchr(cstate->null_print, '\n') != NULL)
1019                 ereport(ERROR,
1020                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1021                                  errmsg("COPY null representation cannot use newline or carriage return")));
1022
1023         /*
1024          * Disallow unsafe delimiter characters in non-CSV mode.  We can't allow
1025          * backslash because it would be ambiguous.  We can't allow the other
1026          * cases because data characters matching the delimiter must be
1027          * backslashed, and certain backslash combinations are interpreted
1028          * non-literally by COPY IN.  Disallowing all lower case ASCII letters is
1029          * more than strictly necessary, but seems best for consistency and
1030          * future-proofing.  Likewise we disallow all digits though only octal
1031          * digits are actually dangerous.
1032          */
1033         if (!cstate->csv_mode &&
1034                 strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789",
1035                            cstate->delim[0]) != NULL)
1036                 ereport(ERROR,
1037                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1038                                  errmsg("COPY delimiter cannot be \"%s\"", cstate->delim)));
1039
1040         /* Check header */
1041         if (!cstate->csv_mode && cstate->header_line)
1042                 ereport(ERROR,
1043                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1044                                  errmsg("COPY HEADER available only in CSV mode")));
1045
1046         /* Check quote */
1047         if (!cstate->csv_mode && cstate->quote != NULL)
1048                 ereport(ERROR,
1049                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1050                                  errmsg("COPY quote available only in CSV mode")));
1051
1052         if (cstate->csv_mode && strlen(cstate->quote) != 1)
1053                 ereport(ERROR,
1054                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1055                                  errmsg("COPY quote must be a single one-byte character")));
1056
1057         if (cstate->csv_mode && cstate->delim[0] == cstate->quote[0])
1058                 ereport(ERROR,
1059                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1060                                  errmsg("COPY delimiter and quote must be different")));
1061
1062         /* Check escape */
1063         if (!cstate->csv_mode && cstate->escape != NULL)
1064                 ereport(ERROR,
1065                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1066                                  errmsg("COPY escape available only in CSV mode")));
1067
1068         if (cstate->csv_mode && strlen(cstate->escape) != 1)
1069                 ereport(ERROR,
1070                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1071                                  errmsg("COPY escape must be a single one-byte character")));
1072
1073         /* Check force_quote */
1074         if (!cstate->csv_mode && (force_quote != NIL || force_quote_all))
1075                 ereport(ERROR,
1076                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1077                                  errmsg("COPY force quote available only in CSV mode")));
1078         if ((force_quote != NIL || force_quote_all) && is_from)
1079                 ereport(ERROR,
1080                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1081                                  errmsg("COPY force quote only available using COPY TO")));
1082
1083         /* Check force_notnull */
1084         if (!cstate->csv_mode && force_notnull != NIL)
1085                 ereport(ERROR,
1086                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1087                                  errmsg("COPY force not null available only in CSV mode")));
1088         if (force_notnull != NIL && !is_from)
1089                 ereport(ERROR,
1090                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1091                           errmsg("COPY force not null only available using COPY FROM")));
1092
1093         /* Don't allow the delimiter to appear in the null string. */
1094         if (strchr(cstate->null_print, cstate->delim[0]) != NULL)
1095                 ereport(ERROR,
1096                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1097                 errmsg("COPY delimiter must not appear in the NULL specification")));
1098
1099         /* Don't allow the CSV quote char to appear in the null string. */
1100         if (cstate->csv_mode &&
1101                 strchr(cstate->null_print, cstate->quote[0]) != NULL)
1102                 ereport(ERROR,
1103                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1104                                  errmsg("CSV quote character must not appear in the NULL specification")));
1105
1106         if (rel)
1107         {
1108                 Assert(!raw_query);
1109
1110                 cstate->rel = rel;
1111
1112                 tupDesc = RelationGetDescr(cstate->rel);
1113
1114                 /* Don't allow COPY w/ OIDs to or from a table without them */
1115                 if (cstate->oids && !cstate->rel->rd_rel->relhasoids)
1116                         ereport(ERROR,
1117                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
1118                                          errmsg("table \"%s\" does not have OIDs",
1119                                                         RelationGetRelationName(cstate->rel))));
1120         }
1121         else
1122         {
1123                 List       *rewritten;
1124                 Query      *query;
1125                 PlannedStmt *plan;
1126                 DestReceiver *dest;
1127
1128                 Assert(!is_from);
1129                 cstate->rel = NULL;
1130
1131                 /* Don't allow COPY w/ OIDs from a select */
1132                 if (cstate->oids)
1133                         ereport(ERROR,
1134                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1135                                          errmsg("COPY (SELECT) WITH OIDS is not supported")));
1136
1137                 /*
1138                  * Run parse analysis and rewrite.      Note this also acquires sufficient
1139                  * locks on the source table(s).
1140                  *
1141                  * Because the parser and planner tend to scribble on their input, we
1142                  * make a preliminary copy of the source querytree.  This prevents
1143                  * problems in the case that the COPY is in a portal or plpgsql
1144                  * function and is executed repeatedly.  (See also the same hack in
1145                  * DECLARE CURSOR and PREPARE.)  XXX FIXME someday.
1146                  */
1147                 rewritten = pg_analyze_and_rewrite((Node *) copyObject(raw_query),
1148                                                                                    queryString, NULL, 0);
1149
1150                 /* We don't expect more or less than one result query */
1151                 if (list_length(rewritten) != 1)
1152                         elog(ERROR, "unexpected rewrite result");
1153
1154                 query = (Query *) linitial(rewritten);
1155                 Assert(query->commandType == CMD_SELECT);
1156                 Assert(query->utilityStmt == NULL);
1157
1158                 /* Query mustn't use INTO, either */
1159                 if (query->intoClause)
1160                         ereport(ERROR,
1161                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1162                                          errmsg("COPY (SELECT INTO) is not supported")));
1163
1164                 /* plan the query */
1165                 plan = planner(query, 0, NULL);
1166
1167                 /*
1168                  * Use a snapshot with an updated command ID to ensure this query sees
1169                  * results of any previously executed queries.
1170                  */
1171                 PushUpdatedSnapshot(GetActiveSnapshot());
1172
1173                 /* Create dest receiver for COPY OUT */
1174                 dest = CreateDestReceiver(DestCopyOut);
1175                 ((DR_copy *) dest)->cstate = cstate;
1176
1177                 /* Create a QueryDesc requesting no output */
1178                 cstate->queryDesc = CreateQueryDesc(plan, queryString,
1179                                                                                         GetActiveSnapshot(),
1180                                                                                         InvalidSnapshot,
1181                                                                                         dest, NULL, 0);
1182
1183                 /*
1184                  * Call ExecutorStart to prepare the plan for execution.
1185                  *
1186                  * ExecutorStart computes a result tupdesc for us
1187                  */
1188                 ExecutorStart(cstate->queryDesc, 0);
1189
1190                 tupDesc = cstate->queryDesc->tupDesc;
1191         }
1192
1193         /* Generate or convert list of attributes to process */
1194         cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
1195
1196         num_phys_attrs = tupDesc->natts;
1197
1198         /* Convert FORCE QUOTE name list to per-column flags, check validity */
1199         cstate->force_quote_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1200         if (force_quote_all)
1201         {
1202                 int                     i;
1203
1204                 for (i = 0; i < num_phys_attrs; i++)
1205                         cstate->force_quote_flags[i] = true;
1206         }
1207         else if (force_quote)
1208         {
1209                 List       *attnums;
1210                 ListCell   *cur;
1211
1212                 attnums = CopyGetAttnums(tupDesc, cstate->rel, force_quote);
1213
1214                 foreach(cur, attnums)
1215                 {
1216                         int                     attnum = lfirst_int(cur);
1217
1218                         if (!list_member_int(cstate->attnumlist, attnum))
1219                                 ereport(ERROR,
1220                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1221                                    errmsg("FORCE QUOTE column \"%s\" not referenced by COPY",
1222                                                   NameStr(tupDesc->attrs[attnum - 1]->attname))));
1223                         cstate->force_quote_flags[attnum - 1] = true;
1224                 }
1225         }
1226
1227         /* Convert FORCE NOT NULL name list to per-column flags, check validity */
1228         cstate->force_notnull_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1229         if (force_notnull)
1230         {
1231                 List       *attnums;
1232                 ListCell   *cur;
1233
1234                 attnums = CopyGetAttnums(tupDesc, cstate->rel, force_notnull);
1235
1236                 foreach(cur, attnums)
1237                 {
1238                         int                     attnum = lfirst_int(cur);
1239
1240                         if (!list_member_int(cstate->attnumlist, attnum))
1241                                 ereport(ERROR,
1242                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1243                                 errmsg("FORCE NOT NULL column \"%s\" not referenced by COPY",
1244                                            NameStr(tupDesc->attrs[attnum - 1]->attname))));
1245                         cstate->force_notnull_flags[attnum - 1] = true;
1246                 }
1247         }
1248
1249         /*
1250          * Set up encoding conversion info.  Even if the client and server
1251          * encodings are the same, we must apply pg_client_to_server() to validate
1252          * data in multibyte encodings.
1253          */
1254         cstate->client_encoding = pg_get_client_encoding();
1255         cstate->need_transcoding =
1256                 (cstate->client_encoding != GetDatabaseEncoding() ||
1257                  pg_database_encoding_max_length() > 1);
1258         /* See Multibyte encoding comment above */
1259         cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->client_encoding);
1260
1261         cstate->copy_dest = COPY_FILE;          /* default */
1262
1263         MemoryContextSwitchTo(oldcontext);
1264
1265         return cstate;
1266 }
1267
1268 /*
1269  * Release resources allocated in a cstate for COPY TO/FROM.
1270  */
1271 static void
1272 EndCopy(CopyState cstate)
1273 {
1274         if (cstate->filename != NULL && FreeFile(cstate->copy_file))
1275                 ereport(ERROR,
1276                                 (errcode_for_file_access(),
1277                                  errmsg("could not close file \"%s\": %m",
1278                                                 cstate->filename)));
1279
1280         MemoryContextDelete(cstate->copycontext);
1281         pfree(cstate);
1282 }
1283
1284 /*
1285  * Setup CopyState to read tuples from a table or a query for COPY TO.
1286  */
1287 static CopyState
1288 BeginCopyTo(Relation rel,
1289                         Node *query,
1290                         const char *queryString,
1291                         const char *filename,
1292                         List *attnamelist,
1293                         List *options)
1294 {
1295         CopyState       cstate;
1296         bool            pipe = (filename == NULL);
1297         MemoryContext oldcontext;
1298
1299         if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
1300         {
1301                 if (rel->rd_rel->relkind == RELKIND_VIEW)
1302                         ereport(ERROR,
1303                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1304                                          errmsg("cannot copy from view \"%s\"",
1305                                                         RelationGetRelationName(rel)),
1306                                          errhint("Try the COPY (SELECT ...) TO variant.")));
1307                 else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1308                         ereport(ERROR,
1309                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1310                                          errmsg("cannot copy from foreign table \"%s\"",
1311                                                         RelationGetRelationName(rel)),
1312                                          errhint("Try the COPY (SELECT ...) TO variant.")));
1313                 else if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
1314                         ereport(ERROR,
1315                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1316                                          errmsg("cannot copy from sequence \"%s\"",
1317                                                         RelationGetRelationName(rel))));
1318                 else
1319                         ereport(ERROR,
1320                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1321                                          errmsg("cannot copy from non-table relation \"%s\"",
1322                                                         RelationGetRelationName(rel))));
1323         }
1324
1325         cstate = BeginCopy(false, rel, query, queryString, attnamelist, options);
1326         oldcontext = MemoryContextSwitchTo(cstate->copycontext);
1327
1328         if (pipe)
1329         {
1330                 if (whereToSendOutput != DestRemote)
1331                         cstate->copy_file = stdout;
1332         }
1333         else
1334         {
1335                 mode_t          oumask;         /* Pre-existing umask value */
1336                 struct stat st;
1337
1338                 /*
1339                  * Prevent write to relative path ... too easy to shoot oneself in the
1340                  * foot by overwriting a database file ...
1341                  */
1342                 if (!is_absolute_path(filename))
1343                         ereport(ERROR,
1344                                         (errcode(ERRCODE_INVALID_NAME),
1345                                          errmsg("relative path not allowed for COPY to file")));
1346
1347                 cstate->filename = pstrdup(filename);
1348                 oumask = umask(S_IWGRP | S_IWOTH);
1349                 cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_W);
1350                 umask(oumask);
1351
1352                 if (cstate->copy_file == NULL)
1353                         ereport(ERROR,
1354                                         (errcode_for_file_access(),
1355                                          errmsg("could not open file \"%s\" for writing: %m",
1356                                                         cstate->filename)));
1357
1358                 fstat(fileno(cstate->copy_file), &st);
1359                 if (S_ISDIR(st.st_mode))
1360                         ereport(ERROR,
1361                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1362                                          errmsg("\"%s\" is a directory", cstate->filename)));
1363         }
1364
1365         MemoryContextSwitchTo(oldcontext);
1366
1367         return cstate;
1368 }
1369
1370 /*
1371  * This intermediate routine exists mainly to localize the effects of setjmp
1372  * so we don't need to plaster a lot of variables with "volatile".
1373  */
1374 static uint64
1375 DoCopyTo(CopyState cstate)
1376 {
1377         bool            pipe = (cstate->filename == NULL);
1378         bool            fe_copy = (pipe && whereToSendOutput == DestRemote);
1379         uint64          processed;
1380
1381         PG_TRY();
1382         {
1383                 if (fe_copy)
1384                         SendCopyBegin(cstate);
1385
1386                 processed = CopyTo(cstate);
1387
1388                 if (fe_copy)
1389                         SendCopyEnd(cstate);
1390         }
1391         PG_CATCH();
1392         {
1393                 /*
1394                  * Make sure we turn off old-style COPY OUT mode upon error. It is
1395                  * okay to do this in all cases, since it does nothing if the mode is
1396                  * not on.
1397                  */
1398                 pq_endcopyout(true);
1399                 PG_RE_THROW();
1400         }
1401         PG_END_TRY();
1402
1403         return processed;
1404 }
1405
1406 /*
1407  * Clean up storage and release resources for COPY TO.
1408  */
1409 static void
1410 EndCopyTo(CopyState cstate)
1411 {
1412         if (cstate->queryDesc != NULL)
1413         {
1414                 /* Close down the query and free resources. */
1415                 ExecutorEnd(cstate->queryDesc);
1416                 FreeQueryDesc(cstate->queryDesc);
1417                 PopActiveSnapshot();
1418         }
1419
1420         /* Clean up storage */
1421         EndCopy(cstate);
1422 }
1423
1424 /*
1425  * Copy from relation or query TO file.
1426  */
1427 static uint64
1428 CopyTo(CopyState cstate)
1429 {
1430         TupleDesc       tupDesc;
1431         int                     num_phys_attrs;
1432         Form_pg_attribute *attr;
1433         ListCell   *cur;
1434         uint64          processed;
1435
1436         if (cstate->rel)
1437                 tupDesc = RelationGetDescr(cstate->rel);
1438         else
1439                 tupDesc = cstate->queryDesc->tupDesc;
1440         attr = tupDesc->attrs;
1441         num_phys_attrs = tupDesc->natts;
1442         cstate->null_print_client = cstate->null_print;         /* default */
1443
1444         /* We use fe_msgbuf as a per-row buffer regardless of copy_dest */
1445         cstate->fe_msgbuf = makeStringInfo();
1446
1447         /* Get info about the columns we need to process. */
1448         cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
1449         foreach(cur, cstate->attnumlist)
1450         {
1451                 int                     attnum = lfirst_int(cur);
1452                 Oid                     out_func_oid;
1453                 bool            isvarlena;
1454
1455                 if (cstate->binary)
1456                         getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
1457                                                                         &out_func_oid,
1458                                                                         &isvarlena);
1459                 else
1460                         getTypeOutputInfo(attr[attnum - 1]->atttypid,
1461                                                           &out_func_oid,
1462                                                           &isvarlena);
1463                 fmgr_info(out_func_oid, &cstate->out_functions[attnum - 1]);
1464         }
1465
1466         /*
1467          * Create a temporary memory context that we can reset once per row to
1468          * recover palloc'd memory.  This avoids any problems with leaks inside
1469          * datatype output routines, and should be faster than retail pfree's
1470          * anyway.      (We don't need a whole econtext as CopyFrom does.)
1471          */
1472         cstate->rowcontext = AllocSetContextCreate(CurrentMemoryContext,
1473                                                                                            "COPY TO",
1474                                                                                            ALLOCSET_DEFAULT_MINSIZE,
1475                                                                                            ALLOCSET_DEFAULT_INITSIZE,
1476                                                                                            ALLOCSET_DEFAULT_MAXSIZE);
1477
1478         if (cstate->binary)
1479         {
1480                 /* Generate header for a binary copy */
1481                 int32           tmp;
1482
1483                 /* Signature */
1484                 CopySendData(cstate, (char *) BinarySignature, 11);
1485                 /* Flags field */
1486                 tmp = 0;
1487                 if (cstate->oids)
1488                         tmp |= (1 << 16);
1489                 CopySendInt32(cstate, tmp);
1490                 /* No header extension */
1491                 tmp = 0;
1492                 CopySendInt32(cstate, tmp);
1493         }
1494         else
1495         {
1496                 /*
1497                  * For non-binary copy, we need to convert null_print to client
1498                  * encoding, because it will be sent directly with CopySendString.
1499                  */
1500                 if (cstate->need_transcoding)
1501                         cstate->null_print_client = pg_server_to_client(cstate->null_print,
1502                                                                                                          cstate->null_print_len);
1503
1504                 /* if a header has been requested send the line */
1505                 if (cstate->header_line)
1506                 {
1507                         bool            hdr_delim = false;
1508
1509                         foreach(cur, cstate->attnumlist)
1510                         {
1511                                 int                     attnum = lfirst_int(cur);
1512                                 char       *colname;
1513
1514                                 if (hdr_delim)
1515                                         CopySendChar(cstate, cstate->delim[0]);
1516                                 hdr_delim = true;
1517
1518                                 colname = NameStr(attr[attnum - 1]->attname);
1519
1520                                 CopyAttributeOutCSV(cstate, colname, false,
1521                                                                         list_length(cstate->attnumlist) == 1);
1522                         }
1523
1524                         CopySendEndOfRow(cstate);
1525                 }
1526         }
1527
1528         if (cstate->rel)
1529         {
1530                 Datum      *values;
1531                 bool       *nulls;
1532                 HeapScanDesc scandesc;
1533                 HeapTuple       tuple;
1534
1535                 values = (Datum *) palloc(num_phys_attrs * sizeof(Datum));
1536                 nulls = (bool *) palloc(num_phys_attrs * sizeof(bool));
1537
1538                 scandesc = heap_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
1539
1540                 processed = 0;
1541                 while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL)
1542                 {
1543                         CHECK_FOR_INTERRUPTS();
1544
1545                         /* Deconstruct the tuple ... faster than repeated heap_getattr */
1546                         heap_deform_tuple(tuple, tupDesc, values, nulls);
1547
1548                         /* Format and send the data */
1549                         CopyOneRowTo(cstate, HeapTupleGetOid(tuple), values, nulls);
1550                         processed++;
1551                 }
1552
1553                 heap_endscan(scandesc);
1554
1555                 pfree(values);
1556                 pfree(nulls);
1557         }
1558         else
1559         {
1560                 /* run the plan --- the dest receiver will send tuples */
1561                 ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L);
1562                 processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
1563         }
1564
1565         if (cstate->binary)
1566         {
1567                 /* Generate trailer for a binary copy */
1568                 CopySendInt16(cstate, -1);
1569                 /* Need to flush out the trailer */
1570                 CopySendEndOfRow(cstate);
1571         }
1572
1573         MemoryContextDelete(cstate->rowcontext);
1574
1575         return processed;
1576 }
1577
1578 /*
1579  * Emit one row during CopyTo().
1580  */
1581 static void
1582 CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls)
1583 {
1584         bool            need_delim = false;
1585         FmgrInfo   *out_functions = cstate->out_functions;
1586         MemoryContext oldcontext;
1587         ListCell   *cur;
1588         char       *string;
1589
1590         MemoryContextReset(cstate->rowcontext);
1591         oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
1592
1593         if (cstate->binary)
1594         {
1595                 /* Binary per-tuple header */
1596                 CopySendInt16(cstate, list_length(cstate->attnumlist));
1597                 /* Send OID if wanted --- note attnumlist doesn't include it */
1598                 if (cstate->oids)
1599                 {
1600                         /* Hack --- assume Oid is same size as int32 */
1601                         CopySendInt32(cstate, sizeof(int32));
1602                         CopySendInt32(cstate, tupleOid);
1603                 }
1604         }
1605         else
1606         {
1607                 /* Text format has no per-tuple header, but send OID if wanted */
1608                 /* Assume digits don't need any quoting or encoding conversion */
1609                 if (cstate->oids)
1610                 {
1611                         string = DatumGetCString(DirectFunctionCall1(oidout,
1612                                                                                                 ObjectIdGetDatum(tupleOid)));
1613                         CopySendString(cstate, string);
1614                         need_delim = true;
1615                 }
1616         }
1617
1618         foreach(cur, cstate->attnumlist)
1619         {
1620                 int                     attnum = lfirst_int(cur);
1621                 Datum           value = values[attnum - 1];
1622                 bool            isnull = nulls[attnum - 1];
1623
1624                 if (!cstate->binary)
1625                 {
1626                         if (need_delim)
1627                                 CopySendChar(cstate, cstate->delim[0]);
1628                         need_delim = true;
1629                 }
1630
1631                 if (isnull)
1632                 {
1633                         if (!cstate->binary)
1634                                 CopySendString(cstate, cstate->null_print_client);
1635                         else
1636                                 CopySendInt32(cstate, -1);
1637                 }
1638                 else
1639                 {
1640                         if (!cstate->binary)
1641                         {
1642                                 string = OutputFunctionCall(&out_functions[attnum - 1],
1643                                                                                         value);
1644                                 if (cstate->csv_mode)
1645                                         CopyAttributeOutCSV(cstate, string,
1646                                                                                 cstate->force_quote_flags[attnum - 1],
1647                                                                                 list_length(cstate->attnumlist) == 1);
1648                                 else
1649                                         CopyAttributeOutText(cstate, string);
1650                         }
1651                         else
1652                         {
1653                                 bytea      *outputbytes;
1654
1655                                 outputbytes = SendFunctionCall(&out_functions[attnum - 1],
1656                                                                                            value);
1657                                 CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
1658                                 CopySendData(cstate, VARDATA(outputbytes),
1659                                                          VARSIZE(outputbytes) - VARHDRSZ);
1660                         }
1661                 }
1662         }
1663
1664         CopySendEndOfRow(cstate);
1665
1666         MemoryContextSwitchTo(oldcontext);
1667 }
1668
1669
1670 /*
1671  * error context callback for COPY FROM
1672  *
1673  * The argument for the error context must be CopyState.
1674  */
1675 void
1676 CopyFromErrorCallback(void *arg)
1677 {
1678         CopyState       cstate = (CopyState) arg;
1679
1680         if (cstate->binary)
1681         {
1682                 /* can't usefully display the data */
1683                 if (cstate->cur_attname)
1684                         errcontext("COPY %s, line %d, column %s",
1685                                            cstate->cur_relname, cstate->cur_lineno,
1686                                            cstate->cur_attname);
1687                 else
1688                         errcontext("COPY %s, line %d",
1689                                            cstate->cur_relname, cstate->cur_lineno);
1690         }
1691         else
1692         {
1693                 if (cstate->cur_attname && cstate->cur_attval)
1694                 {
1695                         /* error is relevant to a particular column */
1696                         char       *attval;
1697
1698                         attval = limit_printout_length(cstate->cur_attval);
1699                         errcontext("COPY %s, line %d, column %s: \"%s\"",
1700                                            cstate->cur_relname, cstate->cur_lineno,
1701                                            cstate->cur_attname, attval);
1702                         pfree(attval);
1703                 }
1704                 else if (cstate->cur_attname)
1705                 {
1706                         /* error is relevant to a particular column, value is NULL */
1707                         errcontext("COPY %s, line %d, column %s: null input",
1708                                            cstate->cur_relname, cstate->cur_lineno,
1709                                            cstate->cur_attname);
1710                 }
1711                 else
1712                 {
1713                         /* error is relevant to a particular line */
1714                         if (cstate->line_buf_converted || !cstate->need_transcoding)
1715                         {
1716                                 char       *lineval;
1717
1718                                 lineval = limit_printout_length(cstate->line_buf.data);
1719                                 errcontext("COPY %s, line %d: \"%s\"",
1720                                                    cstate->cur_relname, cstate->cur_lineno, lineval);
1721                                 pfree(lineval);
1722                         }
1723                         else
1724                         {
1725                                 /*
1726                                  * Here, the line buffer is still in a foreign encoding, and
1727                                  * indeed it's quite likely that the error is precisely a
1728                                  * failure to do encoding conversion (ie, bad data).  We dare
1729                                  * not try to convert it, and at present there's no way to
1730                                  * regurgitate it without conversion.  So we have to punt and
1731                                  * just report the line number.
1732                                  */
1733                                 errcontext("COPY %s, line %d",
1734                                                    cstate->cur_relname, cstate->cur_lineno);
1735                         }
1736                 }
1737         }
1738 }
1739
1740 /*
1741  * Make sure we don't print an unreasonable amount of COPY data in a message.
1742  *
1743  * It would seem a lot easier to just use the sprintf "precision" limit to
1744  * truncate the string.  However, some versions of glibc have a bug/misfeature
1745  * that vsnprintf will always fail (return -1) if it is asked to truncate
1746  * a string that contains invalid byte sequences for the current encoding.
1747  * So, do our own truncation.  We return a pstrdup'd copy of the input.
1748  */
1749 static char *
1750 limit_printout_length(const char *str)
1751 {
1752 #define MAX_COPY_DATA_DISPLAY 100
1753
1754         int                     slen = strlen(str);
1755         int                     len;
1756         char       *res;
1757
1758         /* Fast path if definitely okay */
1759         if (slen <= MAX_COPY_DATA_DISPLAY)
1760                 return pstrdup(str);
1761
1762         /* Apply encoding-dependent truncation */
1763         len = pg_mbcliplen(str, slen, MAX_COPY_DATA_DISPLAY);
1764
1765         /*
1766          * Truncate, and add "..." to show we truncated the input.
1767          */
1768         res = (char *) palloc(len + 4);
1769         memcpy(res, str, len);
1770         strcpy(res + len, "...");
1771
1772         return res;
1773 }
1774
1775 /*
1776  * Copy FROM file to relation.
1777  */
1778 static uint64
1779 CopyFrom(CopyState cstate)
1780 {
1781         HeapTuple       tuple;
1782         TupleDesc       tupDesc;
1783         Datum      *values;
1784         bool       *nulls;
1785         ResultRelInfo *resultRelInfo;
1786         EState     *estate = CreateExecutorState(); /* for ExecConstraints() */
1787         ExprContext *econtext;
1788         TupleTableSlot *slot;
1789         MemoryContext oldcontext = CurrentMemoryContext;
1790         ErrorContextCallback errcontext;
1791         CommandId       mycid = GetCurrentCommandId(true);
1792         int                     hi_options = 0; /* start with default heap_insert options */
1793         BulkInsertState bistate;
1794         uint64          processed = 0;
1795
1796         Assert(cstate->rel);
1797
1798         if (cstate->rel->rd_rel->relkind != RELKIND_RELATION)
1799         {
1800                 if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
1801                         ereport(ERROR,
1802                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1803                                          errmsg("cannot copy to view \"%s\"",
1804                                                         RelationGetRelationName(cstate->rel))));
1805                 else if (cstate->rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1806                         ereport(ERROR,
1807                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1808                                          errmsg("cannot copy to foreign table \"%s\"",
1809                                                         RelationGetRelationName(cstate->rel))));
1810                 else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
1811                         ereport(ERROR,
1812                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1813                                          errmsg("cannot copy to sequence \"%s\"",
1814                                                         RelationGetRelationName(cstate->rel))));
1815                 else
1816                         ereport(ERROR,
1817                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1818                                          errmsg("cannot copy to non-table relation \"%s\"",
1819                                                         RelationGetRelationName(cstate->rel))));
1820         }
1821
1822         tupDesc = RelationGetDescr(cstate->rel);
1823
1824         /*----------
1825          * Check to see if we can avoid writing WAL
1826          *
1827          * If archive logging/streaming is not enabled *and* either
1828          *      - table was created in same transaction as this COPY
1829          *      - data is being written to relfilenode created in this transaction
1830          * then we can skip writing WAL.  It's safe because if the transaction
1831          * doesn't commit, we'll discard the table (or the new relfilenode file).
1832          * If it does commit, we'll have done the heap_sync at the bottom of this
1833          * routine first.
1834          *
1835          * As mentioned in comments in utils/rel.h, the in-same-transaction test
1836          * is not completely reliable, since in rare cases rd_createSubid or
1837          * rd_newRelfilenodeSubid can be cleared before the end of the transaction.
1838          * However this is OK since at worst we will fail to make the optimization.
1839          *
1840          * Also, if the target file is new-in-transaction, we assume that checking
1841          * FSM for free space is a waste of time, even if we must use WAL because
1842          * of archiving.  This could possibly be wrong, but it's unlikely.
1843          *
1844          * The comments for heap_insert and RelationGetBufferForTuple specify that
1845          * skipping WAL logging is only safe if we ensure that our tuples do not
1846          * go into pages containing tuples from any other transactions --- but this
1847          * must be the case if we have a new table or new relfilenode, so we need
1848          * no additional work to enforce that.
1849          *----------
1850          */
1851         if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
1852                 cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
1853         {
1854                 hi_options |= HEAP_INSERT_SKIP_FSM;
1855                 if (!XLogIsNeeded())
1856                         hi_options |= HEAP_INSERT_SKIP_WAL;
1857         }
1858
1859         /*
1860          * We need a ResultRelInfo so we can use the regular executor's
1861          * index-entry-making machinery.  (There used to be a huge amount of code
1862          * here that basically duplicated execUtils.c ...)
1863          */
1864         resultRelInfo = makeNode(ResultRelInfo);
1865         resultRelInfo->ri_RangeTableIndex = 1;          /* dummy */
1866         resultRelInfo->ri_RelationDesc = cstate->rel;
1867         resultRelInfo->ri_TrigDesc = CopyTriggerDesc(cstate->rel->trigdesc);
1868         if (resultRelInfo->ri_TrigDesc)
1869         {
1870                 resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
1871                         palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(FmgrInfo));
1872                 resultRelInfo->ri_TrigWhenExprs = (List **)
1873                         palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(List *));
1874         }
1875         resultRelInfo->ri_TrigInstrument = NULL;
1876
1877         ExecOpenIndices(resultRelInfo);
1878
1879         estate->es_result_relations = resultRelInfo;
1880         estate->es_num_result_relations = 1;
1881         estate->es_result_relation_info = resultRelInfo;
1882
1883         /* Set up a tuple slot too */
1884         slot = ExecInitExtraTupleSlot(estate);
1885         ExecSetSlotDescriptor(slot, tupDesc);
1886
1887         /* Prepare to catch AFTER triggers. */
1888         AfterTriggerBeginQuery();
1889
1890         /*
1891          * Check BEFORE STATEMENT insertion triggers. It's debateable whether we
1892          * should do this for COPY, since it's not really an "INSERT" statement as
1893          * such. However, executing these triggers maintains consistency with the
1894          * EACH ROW triggers that we already fire on COPY.
1895          */
1896         ExecBSInsertTriggers(estate, resultRelInfo);
1897
1898         values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
1899         nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
1900
1901         bistate = GetBulkInsertState();
1902         econtext = GetPerTupleExprContext(estate);
1903
1904         /* Set up callback to identify error line number */
1905         errcontext.callback = CopyFromErrorCallback;
1906         errcontext.arg = (void *) cstate;
1907         errcontext.previous = error_context_stack;
1908         error_context_stack = &errcontext;
1909
1910         for (;;)
1911         {
1912                 bool            skip_tuple;
1913                 Oid                     loaded_oid = InvalidOid;
1914
1915                 CHECK_FOR_INTERRUPTS();
1916
1917                 /* Reset the per-tuple exprcontext */
1918                 ResetPerTupleExprContext(estate);
1919
1920                 /* Switch into its memory context */
1921                 MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1922
1923                 if (!NextCopyFrom(cstate, econtext, values, nulls, &loaded_oid))
1924                         break;
1925
1926                 /* And now we can form the input tuple. */
1927                 tuple = heap_form_tuple(tupDesc, values, nulls);
1928
1929                 if (loaded_oid != InvalidOid)
1930                         HeapTupleSetOid(tuple, loaded_oid);
1931
1932                 /* Triggers and stuff need to be invoked in query context. */
1933                 MemoryContextSwitchTo(oldcontext);
1934
1935                 skip_tuple = false;
1936
1937                 /* BEFORE ROW INSERT Triggers */
1938                 if (resultRelInfo->ri_TrigDesc &&
1939                         resultRelInfo->ri_TrigDesc->trig_insert_before_row)
1940                 {
1941                         HeapTuple       newtuple;
1942
1943                         newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);
1944
1945                         if (newtuple == NULL)           /* "do nothing" */
1946                                 skip_tuple = true;
1947                         else if (newtuple != tuple) /* modified by Trigger(s) */
1948                         {
1949                                 heap_freetuple(tuple);
1950                                 tuple = newtuple;
1951                         }
1952                 }
1953
1954                 if (!skip_tuple)
1955                 {
1956                         List       *recheckIndexes = NIL;
1957
1958                         /* Place tuple in tuple slot */
1959                         ExecStoreTuple(tuple, slot, InvalidBuffer, false);
1960
1961                         /* Check the constraints of the tuple */
1962                         if (cstate->rel->rd_att->constr)
1963                                 ExecConstraints(resultRelInfo, slot, estate);
1964
1965                         /* OK, store the tuple and create index entries for it */
1966                         heap_insert(cstate->rel, tuple, mycid, hi_options, bistate);
1967
1968                         if (resultRelInfo->ri_NumIndices > 0)
1969                                 recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
1970                                                                                                            estate);
1971
1972                         /* AFTER ROW INSERT Triggers */
1973                         ExecARInsertTriggers(estate, resultRelInfo, tuple,
1974                                                                  recheckIndexes);
1975
1976                         list_free(recheckIndexes);
1977
1978                         /*
1979                          * We count only tuples not suppressed by a BEFORE INSERT trigger;
1980                          * this is the same definition used by execMain.c for counting
1981                          * tuples inserted by an INSERT command.
1982                          */
1983                         processed++;
1984                 }
1985         }
1986
1987         /* Done, clean up */
1988         error_context_stack = errcontext.previous;
1989
1990         FreeBulkInsertState(bistate);
1991
1992         MemoryContextSwitchTo(oldcontext);
1993
1994         /* Execute AFTER STATEMENT insertion triggers */
1995         ExecASInsertTriggers(estate, resultRelInfo);
1996
1997         /* Handle queued AFTER triggers */
1998         AfterTriggerEndQuery(estate);
1999
2000         pfree(values);
2001         pfree(nulls);
2002
2003         ExecResetTupleTable(estate->es_tupleTable, false);
2004
2005         ExecCloseIndices(resultRelInfo);
2006
2007         FreeExecutorState(estate);
2008
2009         /*
2010          * If we skipped writing WAL, then we need to sync the heap (but not
2011          * indexes since those use WAL anyway)
2012          */
2013         if (hi_options & HEAP_INSERT_SKIP_WAL)
2014                 heap_sync(cstate->rel);
2015
2016         return processed;
2017 }
2018
2019 /*
2020  * Setup to read tuples from a file for COPY FROM.
2021  *
2022  * 'rel': Used as a template for the tuples
2023  * 'filename': Name of server-local file to read
2024  * 'attnamelist': List of char *, columns to include. NIL selects all cols.
2025  * 'options': List of DefElem. See copy_opt_item in gram.y for selections.
2026  *
2027  * Returns a CopyState, to be passed to NextCopyFrom and related functions.
2028  */
2029 CopyState
2030 BeginCopyFrom(Relation rel,
2031                           const char *filename,
2032                           List *attnamelist,
2033                           List *options)
2034 {
2035         CopyState       cstate;
2036         bool            pipe = (filename == NULL);
2037         TupleDesc       tupDesc;
2038         Form_pg_attribute *attr;
2039         AttrNumber      num_phys_attrs,
2040                                 num_defaults;
2041         FmgrInfo   *in_functions;
2042         Oid                *typioparams;
2043         int                     attnum;
2044         Oid                     in_func_oid;
2045         int                *defmap;
2046         ExprState **defexprs;
2047         MemoryContext oldcontext;
2048
2049         cstate = BeginCopy(true, rel, NULL, NULL, attnamelist, options);
2050         oldcontext = MemoryContextSwitchTo(cstate->copycontext);
2051
2052         /* Initialize state variables */
2053         cstate->fe_eof = false;
2054         cstate->eol_type = EOL_UNKNOWN;
2055         cstate->cur_relname = RelationGetRelationName(cstate->rel);
2056         cstate->cur_lineno = 0;
2057         cstate->cur_attname = NULL;
2058         cstate->cur_attval = NULL;
2059
2060         /* Set up variables to avoid per-attribute overhead. */
2061         initStringInfo(&cstate->attribute_buf);
2062         initStringInfo(&cstate->line_buf);
2063         cstate->line_buf_converted = false;
2064         cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
2065         cstate->raw_buf_index = cstate->raw_buf_len = 0;
2066
2067         tupDesc = RelationGetDescr(cstate->rel);
2068         attr = tupDesc->attrs;
2069         num_phys_attrs = tupDesc->natts;
2070         num_defaults = 0;
2071
2072         /*
2073          * Pick up the required catalog information for each attribute in the
2074          * relation, including the input function, the element type (to pass to
2075          * the input function), and info about defaults and constraints. (Which
2076          * input function we use depends on text/binary format choice.)
2077          */
2078         in_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
2079         typioparams = (Oid *) palloc(num_phys_attrs * sizeof(Oid));
2080         defmap = (int *) palloc(num_phys_attrs * sizeof(int));
2081         defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
2082
2083         for (attnum = 1; attnum <= num_phys_attrs; attnum++)
2084         {
2085                 /* We don't need info for dropped attributes */
2086                 if (attr[attnum - 1]->attisdropped)
2087                         continue;
2088
2089                 /* Fetch the input function and typioparam info */
2090                 if (cstate->binary)
2091                         getTypeBinaryInputInfo(attr[attnum - 1]->atttypid,
2092                                                                    &in_func_oid, &typioparams[attnum - 1]);
2093                 else
2094                         getTypeInputInfo(attr[attnum - 1]->atttypid,
2095                                                          &in_func_oid, &typioparams[attnum - 1]);
2096                 fmgr_info(in_func_oid, &in_functions[attnum - 1]);
2097
2098                 /* Get default info if needed */
2099                 if (!list_member_int(cstate->attnumlist, attnum))
2100                 {
2101                         /* attribute is NOT to be copied from input */
2102                         /* use default value if one exists */
2103                         Node       *defexpr = build_column_default(cstate->rel, attnum);
2104
2105                         if (defexpr != NULL)
2106                         {
2107                                 /* Initialize expressions in copycontext. */
2108                                 defexprs[num_defaults] = ExecInitExpr(
2109                                                                 expression_planner((Expr *) defexpr), NULL);
2110                                 defmap[num_defaults] = attnum - 1;
2111                                 num_defaults++;
2112                         }
2113                 }
2114         }
2115
2116         /* We keep those variables in cstate. */
2117         cstate->in_functions = in_functions;
2118         cstate->typioparams = typioparams;
2119         cstate->defmap = defmap;
2120         cstate->defexprs = defexprs;
2121         cstate->num_defaults = num_defaults;
2122
2123         if (pipe)
2124         {
2125                 if (whereToSendOutput == DestRemote)
2126                         ReceiveCopyBegin(cstate);
2127                 else
2128                         cstate->copy_file = stdin;
2129         }
2130         else
2131         {
2132                 struct stat st;
2133
2134                 cstate->filename = pstrdup(filename);
2135                 cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_R);
2136
2137                 if (cstate->copy_file == NULL)
2138                         ereport(ERROR,
2139                                         (errcode_for_file_access(),
2140                                          errmsg("could not open file \"%s\" for reading: %m",
2141                                                         cstate->filename)));
2142
2143                 fstat(fileno(cstate->copy_file), &st);
2144                 if (S_ISDIR(st.st_mode))
2145                         ereport(ERROR,
2146                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2147                                          errmsg("\"%s\" is a directory", cstate->filename)));
2148         }
2149
2150         if (!cstate->binary)
2151         {
2152                 /* must rely on user to tell us... */
2153                 cstate->file_has_oids = cstate->oids;
2154         }
2155         else
2156         {
2157                 /* Read and verify binary header */
2158                 char            readSig[11];
2159                 int32           tmp;
2160
2161                 /* Signature */
2162                 if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
2163                         memcmp(readSig, BinarySignature, 11) != 0)
2164                         ereport(ERROR,
2165                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2166                                          errmsg("COPY file signature not recognized")));
2167                 /* Flags field */
2168                 if (!CopyGetInt32(cstate, &tmp))
2169                         ereport(ERROR,
2170                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2171                                          errmsg("invalid COPY file header (missing flags)")));
2172                 cstate->file_has_oids = (tmp & (1 << 16)) != 0;
2173                 tmp &= ~(1 << 16);
2174                 if ((tmp >> 16) != 0)
2175                         ereport(ERROR,
2176                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2177                                  errmsg("unrecognized critical flags in COPY file header")));
2178                 /* Header extension length */
2179                 if (!CopyGetInt32(cstate, &tmp) ||
2180                         tmp < 0)
2181                         ereport(ERROR,
2182                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2183                                          errmsg("invalid COPY file header (missing length)")));
2184                 /* Skip extension header, if present */
2185                 while (tmp-- > 0)
2186                 {
2187                         if (CopyGetData(cstate, readSig, 1, 1) != 1)
2188                                 ereport(ERROR,
2189                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2190                                                  errmsg("invalid COPY file header (wrong length)")));
2191                 }
2192         }
2193
2194         if (cstate->file_has_oids && cstate->binary)
2195         {
2196                 getTypeBinaryInputInfo(OIDOID,
2197                                                            &in_func_oid, &cstate->oid_typioparam);
2198                 fmgr_info(in_func_oid, &cstate->oid_in_function);
2199         }
2200
2201         /* create workspace for CopyReadAttributes results */
2202         if (!cstate->binary)
2203         {
2204                 AttrNumber      attr_count = list_length(cstate->attnumlist);
2205                 int     nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
2206
2207                 cstate->max_fields = nfields;
2208                 cstate->raw_fields = (char **) palloc(nfields * sizeof(char *));
2209         }
2210
2211         MemoryContextSwitchTo(oldcontext);
2212
2213         return cstate;
2214 }
2215
2216 /*
2217  * Read raw fields in the next line for COPY FROM in text or csv mode.
2218  * Return false if no more lines.
2219  *
2220  * An internal temporary buffer is returned via 'fields'. It is valid until
2221  * the next call of the function. Since the function returns all raw fields
2222  * in the input file, 'nfields' could be different from the number of columns
2223  * in the relation.
2224  *
2225  * NOTE: force_not_null option are not applied to the returned fields.
2226  */
2227 bool
2228 NextCopyFromRawFields(CopyState cstate, char ***fields, int *nfields)
2229 {
2230         int                     fldct;
2231         bool            done;
2232
2233         /* only available for text or csv input */
2234         Assert(!cstate->binary);
2235
2236         /* on input just throw the header line away */
2237         if (cstate->cur_lineno == 0 && cstate->header_line)
2238         {
2239                 cstate->cur_lineno++;
2240                 if (CopyReadLine(cstate))
2241                         return false;   /* done */
2242         }
2243
2244         cstate->cur_lineno++;
2245
2246         /* Actually read the line into memory here */
2247         done = CopyReadLine(cstate);
2248
2249         /*
2250          * EOF at start of line means we're done.  If we see EOF after
2251          * some characters, we act as though it was newline followed by
2252          * EOF, ie, process the line and then exit loop on next iteration.
2253          */
2254         if (done && cstate->line_buf.len == 0)
2255                 return false;
2256
2257         /* Parse the line into de-escaped field values */
2258         if (cstate->csv_mode)
2259                 fldct = CopyReadAttributesCSV(cstate);
2260         else
2261                 fldct = CopyReadAttributesText(cstate);
2262
2263         *fields = cstate->raw_fields;
2264         *nfields = fldct;
2265         return true;
2266 }
2267
2268 /*
2269  * Read next tuple from file for COPY FROM. Return false if no more tuples.
2270  *
2271  * 'econtext' is used to evaluate default expression for each columns not
2272  * read from the file. It can be NULL when no default values are used, i.e.
2273  * when all columns are read from the file.
2274  *
2275  * 'values' and 'nulls' arrays must be the same length as columns of the
2276  * relation passed to BeginCopyFrom. This function fills the arrays.
2277  * Oid of the tuple is returned with 'tupleOid' separately.
2278  */
2279 bool
2280 NextCopyFrom(CopyState cstate, ExprContext *econtext,
2281                          Datum *values, bool *nulls, Oid *tupleOid)
2282 {
2283         TupleDesc       tupDesc;
2284         Form_pg_attribute *attr;
2285         AttrNumber      num_phys_attrs,
2286                                 attr_count,
2287                                 num_defaults = cstate->num_defaults;
2288         FmgrInfo   *in_functions = cstate->in_functions;
2289         Oid                *typioparams = cstate->typioparams;
2290         int                     i;
2291         int         nfields;
2292         bool            isnull;
2293         bool            file_has_oids = cstate->file_has_oids;
2294         int                *defmap = cstate->defmap;
2295         ExprState **defexprs = cstate->defexprs;
2296
2297         tupDesc = RelationGetDescr(cstate->rel);
2298         attr = tupDesc->attrs;
2299         num_phys_attrs = tupDesc->natts;
2300         attr_count = list_length(cstate->attnumlist);
2301         nfields = file_has_oids ? (attr_count + 1) : attr_count;
2302
2303         /* Initialize all values for row to NULL */
2304         MemSet(values, 0, num_phys_attrs * sizeof(Datum));
2305         MemSet(nulls, true, num_phys_attrs * sizeof(bool));
2306
2307         if (!cstate->binary)
2308         {
2309                 char      **field_strings;
2310                 ListCell   *cur;
2311                 int                     fldct;
2312                 int                     fieldno;
2313                 char       *string;
2314
2315                 /* read raw fields in the next line */
2316                 if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
2317                         return false;
2318
2319                 /* check for overflowing fields */
2320                 if (nfields > 0 && fldct > nfields)
2321                         ereport(ERROR,
2322                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2323                                          errmsg("extra data after last expected column")));
2324
2325                 fieldno = 0;
2326
2327                 /* Read the OID field if present */
2328                 if (file_has_oids)
2329                 {
2330                         if (fieldno >= fldct)
2331                                 ereport(ERROR,
2332                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2333                                                  errmsg("missing data for OID column")));
2334                         string = field_strings[fieldno++];
2335
2336                         if (string == NULL)
2337                                 ereport(ERROR,
2338                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2339                                                  errmsg("null OID in COPY data")));
2340                         else if (cstate->oids && tupleOid != NULL)
2341                         {
2342                                 cstate->cur_attname = "oid";
2343                                 cstate->cur_attval = string;
2344                                 *tupleOid = DatumGetObjectId(DirectFunctionCall1(oidin,
2345                                                                                                    CStringGetDatum(string)));
2346                                 if (*tupleOid == InvalidOid)
2347                                         ereport(ERROR,
2348                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2349                                                          errmsg("invalid OID in COPY data")));
2350                                 cstate->cur_attname = NULL;
2351                                 cstate->cur_attval = NULL;
2352                         }
2353                 }
2354
2355                 /* Loop to read the user attributes on the line. */
2356                 foreach(cur, cstate->attnumlist)
2357                 {
2358                         int                     attnum = lfirst_int(cur);
2359                         int                     m = attnum - 1;
2360
2361                         if (fieldno >= fldct)
2362                                 ereport(ERROR,
2363                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2364                                                  errmsg("missing data for column \"%s\"",
2365                                                                 NameStr(attr[m]->attname))));
2366                         string = field_strings[fieldno++];
2367
2368                         if (cstate->csv_mode && string == NULL &&
2369                                 cstate->force_notnull_flags[m])
2370                         {
2371                                 /* Go ahead and read the NULL string */
2372                                 string = cstate->null_print;
2373                         }
2374
2375                         cstate->cur_attname = NameStr(attr[m]->attname);
2376                         cstate->cur_attval = string;
2377                         values[m] = InputFunctionCall(&in_functions[m],
2378                                                                                   string,
2379                                                                                   typioparams[m],
2380                                                                                   attr[m]->atttypmod);
2381                         if (string != NULL)
2382                                 nulls[m] = false;
2383                         cstate->cur_attname = NULL;
2384                         cstate->cur_attval = NULL;
2385                 }
2386
2387                 Assert(fieldno == nfields);
2388         }
2389         else
2390         {
2391                 /* binary */
2392                 int16           fld_count;
2393                 ListCell   *cur;
2394
2395                 cstate->cur_lineno++;
2396
2397                 if (!CopyGetInt16(cstate, &fld_count))
2398                 {
2399                         /* EOF detected (end of file, or protocol-level EOF) */
2400                         return false;
2401                 }
2402
2403                 if (fld_count == -1)
2404                 {
2405                         /*
2406                          * Received EOF marker.  In a V3-protocol copy, wait for
2407                          * the protocol-level EOF, and complain if it doesn't come
2408                          * immediately.  This ensures that we correctly handle
2409                          * CopyFail, if client chooses to send that now.
2410                          *
2411                          * Note that we MUST NOT try to read more data in an
2412                          * old-protocol copy, since there is no protocol-level EOF
2413                          * marker then.  We could go either way for copy from file,
2414                          * but choose to throw error if there's data after the EOF
2415                          * marker, for consistency with the new-protocol case.
2416                          */
2417                         char    dummy;
2418
2419                         if (cstate->copy_dest != COPY_OLD_FE &&
2420                                 CopyGetData(cstate, &dummy, 1, 1) > 0)
2421                                 ereport(ERROR,
2422                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2423                                                  errmsg("received copy data after EOF marker")));
2424                         return false;
2425                 }
2426
2427                 if (fld_count != attr_count)
2428                         ereport(ERROR,
2429                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2430                                          errmsg("row field count is %d, expected %d",
2431                                                         (int) fld_count, attr_count)));
2432
2433                 if (file_has_oids)
2434                 {
2435                         Oid             loaded_oid;
2436
2437                         cstate->cur_attname = "oid";
2438                         loaded_oid =
2439                                 DatumGetObjectId(CopyReadBinaryAttribute(cstate,
2440                                                                                                                  0,
2441                                                                                                                  &cstate->oid_in_function,
2442                                                                                                                  cstate->oid_typioparam,
2443                                                                                                                  -1,
2444                                                                                                                  &isnull));
2445                         if (isnull || loaded_oid == InvalidOid)
2446                                 ereport(ERROR,
2447                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2448                                                  errmsg("invalid OID in COPY data")));
2449                         cstate->cur_attname = NULL;
2450                         if (cstate->oids && tupleOid != NULL)
2451                                 *tupleOid = loaded_oid;
2452                 }
2453
2454                 i = 0;
2455                 foreach(cur, cstate->attnumlist)
2456                 {
2457                         int                     attnum = lfirst_int(cur);
2458                         int                     m = attnum - 1;
2459
2460                         cstate->cur_attname = NameStr(attr[m]->attname);
2461                         i++;
2462                         values[m] = CopyReadBinaryAttribute(cstate,
2463                                                                                                 i,
2464                                                                                                 &in_functions[m],
2465                                                                                                 typioparams[m],
2466                                                                                                 attr[m]->atttypmod,
2467                                                                                                 &nulls[m]);
2468                         cstate->cur_attname = NULL;
2469                 }
2470         }
2471
2472         /*
2473          * Now compute and insert any defaults available for the columns not
2474          * provided by the input data.  Anything not processed here or above
2475          * will remain NULL.
2476          */
2477         for (i = 0; i < num_defaults; i++)
2478         {
2479                 /*
2480                  * The caller must supply econtext and have switched into the
2481                  * per-tuple memory context in it.
2482                  */
2483                 Assert(econtext != NULL);
2484                 Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
2485
2486                 values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext,
2487                                                                                  &nulls[defmap[i]], NULL);
2488         }
2489
2490         return true;
2491 }
2492
2493 /*
2494  * Clean up storage and release resources for COPY FROM.
2495  */
2496 void
2497 EndCopyFrom(CopyState cstate)
2498 {
2499         /* No COPY FROM related resources except memory. */
2500
2501         EndCopy(cstate);
2502 }
2503
2504 /*
2505  * Read the next input line and stash it in line_buf, with conversion to
2506  * server encoding.
2507  *
2508  * Result is true if read was terminated by EOF, false if terminated
2509  * by newline.  The terminating newline or EOF marker is not included
2510  * in the final value of line_buf.
2511  */
2512 static bool
2513 CopyReadLine(CopyState cstate)
2514 {
2515         bool            result;
2516
2517         resetStringInfo(&cstate->line_buf);
2518
2519         /* Mark that encoding conversion hasn't occurred yet */
2520         cstate->line_buf_converted = false;
2521
2522         /* Parse data and transfer into line_buf */
2523         result = CopyReadLineText(cstate);
2524
2525         if (result)
2526         {
2527                 /*
2528                  * Reached EOF.  In protocol version 3, we should ignore anything
2529                  * after \. up to the protocol end of copy data.  (XXX maybe better
2530                  * not to treat \. as special?)
2531                  */
2532                 if (cstate->copy_dest == COPY_NEW_FE)
2533                 {
2534                         do
2535                         {
2536                                 cstate->raw_buf_index = cstate->raw_buf_len;
2537                         } while (CopyLoadRawBuf(cstate));
2538                 }
2539         }
2540         else
2541         {
2542                 /*
2543                  * If we didn't hit EOF, then we must have transferred the EOL marker
2544                  * to line_buf along with the data.  Get rid of it.
2545                  */
2546                 switch (cstate->eol_type)
2547                 {
2548                         case EOL_NL:
2549                                 Assert(cstate->line_buf.len >= 1);
2550                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
2551                                 cstate->line_buf.len--;
2552                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2553                                 break;
2554                         case EOL_CR:
2555                                 Assert(cstate->line_buf.len >= 1);
2556                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
2557                                 cstate->line_buf.len--;
2558                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2559                                 break;
2560                         case EOL_CRNL:
2561                                 Assert(cstate->line_buf.len >= 2);
2562                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
2563                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
2564                                 cstate->line_buf.len -= 2;
2565                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2566                                 break;
2567                         case EOL_UNKNOWN:
2568                                 /* shouldn't get here */
2569                                 Assert(false);
2570                                 break;
2571                 }
2572         }
2573
2574         /* Done reading the line.  Convert it to server encoding. */
2575         if (cstate->need_transcoding)
2576         {
2577                 char       *cvt;
2578
2579                 cvt = pg_client_to_server(cstate->line_buf.data,
2580                                                                   cstate->line_buf.len);
2581                 if (cvt != cstate->line_buf.data)
2582                 {
2583                         /* transfer converted data back to line_buf */
2584                         resetStringInfo(&cstate->line_buf);
2585                         appendBinaryStringInfo(&cstate->line_buf, cvt, strlen(cvt));
2586                         pfree(cvt);
2587                 }
2588         }
2589
2590         /* Now it's safe to use the buffer in error messages */
2591         cstate->line_buf_converted = true;
2592
2593         return result;
2594 }
2595
2596 /*
2597  * CopyReadLineText - inner loop of CopyReadLine for text mode
2598  */
2599 static bool
2600 CopyReadLineText(CopyState cstate)
2601 {
2602         char       *copy_raw_buf;
2603         int                     raw_buf_ptr;
2604         int                     copy_buf_len;
2605         bool            need_data = false;
2606         bool            hit_eof = false;
2607         bool            result = false;
2608         char            mblen_str[2];
2609
2610         /* CSV variables */
2611         bool            first_char_in_line = true;
2612         bool            in_quote = false,
2613                                 last_was_esc = false;
2614         char            quotec = '\0';
2615         char            escapec = '\0';
2616
2617         if (cstate->csv_mode)
2618         {
2619                 quotec = cstate->quote[0];
2620                 escapec = cstate->escape[0];
2621                 /* ignore special escape processing if it's the same as quotec */
2622                 if (quotec == escapec)
2623                         escapec = '\0';
2624         }
2625
2626         mblen_str[1] = '\0';
2627
2628         /*
2629          * The objective of this loop is to transfer the entire next input line
2630          * into line_buf.  Hence, we only care for detecting newlines (\r and/or
2631          * \n) and the end-of-copy marker (\.).
2632          *
2633          * In CSV mode, \r and \n inside a quoted field are just part of the data
2634          * value and are put in line_buf.  We keep just enough state to know if we
2635          * are currently in a quoted field or not.
2636          *
2637          * These four characters, and the CSV escape and quote characters, are
2638          * assumed the same in frontend and backend encodings.
2639          *
2640          * For speed, we try to move data from raw_buf to line_buf in chunks
2641          * rather than one character at a time.  raw_buf_ptr points to the next
2642          * character to examine; any characters from raw_buf_index to raw_buf_ptr
2643          * have been determined to be part of the line, but not yet transferred to
2644          * line_buf.
2645          *
2646          * For a little extra speed within the loop, we copy raw_buf and
2647          * raw_buf_len into local variables.
2648          */
2649         copy_raw_buf = cstate->raw_buf;
2650         raw_buf_ptr = cstate->raw_buf_index;
2651         copy_buf_len = cstate->raw_buf_len;
2652
2653         for (;;)
2654         {
2655                 int                     prev_raw_ptr;
2656                 char            c;
2657
2658                 /*
2659                  * Load more data if needed.  Ideally we would just force four bytes
2660                  * of read-ahead and avoid the many calls to
2661                  * IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(), but the COPY_OLD_FE protocol
2662                  * does not allow us to read too far ahead or we might read into the
2663                  * next data, so we read-ahead only as far we know we can.      One
2664                  * optimization would be to read-ahead four byte here if
2665                  * cstate->copy_dest != COPY_OLD_FE, but it hardly seems worth it,
2666                  * considering the size of the buffer.
2667                  */
2668                 if (raw_buf_ptr >= copy_buf_len || need_data)
2669                 {
2670                         REFILL_LINEBUF;
2671
2672                         /*
2673                          * Try to read some more data.  This will certainly reset
2674                          * raw_buf_index to zero, and raw_buf_ptr must go with it.
2675                          */
2676                         if (!CopyLoadRawBuf(cstate))
2677                                 hit_eof = true;
2678                         raw_buf_ptr = 0;
2679                         copy_buf_len = cstate->raw_buf_len;
2680
2681                         /*
2682                          * If we are completely out of data, break out of the loop,
2683                          * reporting EOF.
2684                          */
2685                         if (copy_buf_len <= 0)
2686                         {
2687                                 result = true;
2688                                 break;
2689                         }
2690                         need_data = false;
2691                 }
2692
2693                 /* OK to fetch a character */
2694                 prev_raw_ptr = raw_buf_ptr;
2695                 c = copy_raw_buf[raw_buf_ptr++];
2696
2697                 if (cstate->csv_mode)
2698                 {
2699                         /*
2700                          * If character is '\\' or '\r', we may need to look ahead below.
2701                          * Force fetch of the next character if we don't already have it.
2702                          * We need to do this before changing CSV state, in case one of
2703                          * these characters is also the quote or escape character.
2704                          *
2705                          * Note: old-protocol does not like forced prefetch, but it's OK
2706                          * here since we cannot validly be at EOF.
2707                          */
2708                         if (c == '\\' || c == '\r')
2709                         {
2710                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
2711                         }
2712
2713                         /*
2714                          * Dealing with quotes and escapes here is mildly tricky. If the
2715                          * quote char is also the escape char, there's no problem - we
2716                          * just use the char as a toggle. If they are different, we need
2717                          * to ensure that we only take account of an escape inside a
2718                          * quoted field and immediately preceding a quote char, and not
2719                          * the second in a escape-escape sequence.
2720                          */
2721                         if (in_quote && c == escapec)
2722                                 last_was_esc = !last_was_esc;
2723                         if (c == quotec && !last_was_esc)
2724                                 in_quote = !in_quote;
2725                         if (c != escapec)
2726                                 last_was_esc = false;
2727
2728                         /*
2729                          * Updating the line count for embedded CR and/or LF chars is
2730                          * necessarily a little fragile - this test is probably about the
2731                          * best we can do.      (XXX it's arguable whether we should do this
2732                          * at all --- is cur_lineno a physical or logical count?)
2733                          */
2734                         if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
2735                                 cstate->cur_lineno++;
2736                 }
2737
2738                 /* Process \r */
2739                 if (c == '\r' && (!cstate->csv_mode || !in_quote))
2740                 {
2741                         /* Check for \r\n on first line, _and_ handle \r\n. */
2742                         if (cstate->eol_type == EOL_UNKNOWN ||
2743                                 cstate->eol_type == EOL_CRNL)
2744                         {
2745                                 /*
2746                                  * If need more data, go back to loop top to load it.
2747                                  *
2748                                  * Note that if we are at EOF, c will wind up as '\0' because
2749                                  * of the guaranteed pad of raw_buf.
2750                                  */
2751                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
2752
2753                                 /* get next char */
2754                                 c = copy_raw_buf[raw_buf_ptr];
2755
2756                                 if (c == '\n')
2757                                 {
2758                                         raw_buf_ptr++;          /* eat newline */
2759                                         cstate->eol_type = EOL_CRNL;            /* in case not set yet */
2760                                 }
2761                                 else
2762                                 {
2763                                         /* found \r, but no \n */
2764                                         if (cstate->eol_type == EOL_CRNL)
2765                                                 ereport(ERROR,
2766                                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2767                                                                  !cstate->csv_mode ?
2768                                                         errmsg("literal carriage return found in data") :
2769                                                         errmsg("unquoted carriage return found in data"),
2770                                                                  !cstate->csv_mode ?
2771                                                 errhint("Use \"\\r\" to represent carriage return.") :
2772                                                                  errhint("Use quoted CSV field to represent carriage return.")));
2773
2774                                         /*
2775                                          * if we got here, it is the first line and we didn't find
2776                                          * \n, so don't consume the peeked character
2777                                          */
2778                                         cstate->eol_type = EOL_CR;
2779                                 }
2780                         }
2781                         else if (cstate->eol_type == EOL_NL)
2782                                 ereport(ERROR,
2783                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2784                                                  !cstate->csv_mode ?
2785                                                  errmsg("literal carriage return found in data") :
2786                                                  errmsg("unquoted carriage return found in data"),
2787                                                  !cstate->csv_mode ?
2788                                            errhint("Use \"\\r\" to represent carriage return.") :
2789                                                  errhint("Use quoted CSV field to represent carriage return.")));
2790                         /* If reach here, we have found the line terminator */
2791                         break;
2792                 }
2793
2794                 /* Process \n */
2795                 if (c == '\n' && (!cstate->csv_mode || !in_quote))
2796                 {
2797                         if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
2798                                 ereport(ERROR,
2799                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2800                                                  !cstate->csv_mode ?
2801                                                  errmsg("literal newline found in data") :
2802                                                  errmsg("unquoted newline found in data"),
2803                                                  !cstate->csv_mode ?
2804                                                  errhint("Use \"\\n\" to represent newline.") :
2805                                          errhint("Use quoted CSV field to represent newline.")));
2806                         cstate->eol_type = EOL_NL;      /* in case not set yet */
2807                         /* If reach here, we have found the line terminator */
2808                         break;
2809                 }
2810
2811                 /*
2812                  * In CSV mode, we only recognize \. alone on a line.  This is because
2813                  * \. is a valid CSV data value.
2814                  */
2815                 if (c == '\\' && (!cstate->csv_mode || first_char_in_line))
2816                 {
2817                         char            c2;
2818
2819                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
2820                         IF_NEED_REFILL_AND_EOF_BREAK(0);
2821
2822                         /* -----
2823                          * get next character
2824                          * Note: we do not change c so if it isn't \., we can fall
2825                          * through and continue processing for client encoding.
2826                          * -----
2827                          */
2828                         c2 = copy_raw_buf[raw_buf_ptr];
2829
2830                         if (c2 == '.')
2831                         {
2832                                 raw_buf_ptr++;  /* consume the '.' */
2833
2834                                 /*
2835                                  * Note: if we loop back for more data here, it does not
2836                                  * matter that the CSV state change checks are re-executed; we
2837                                  * will come back here with no important state changed.
2838                                  */
2839                                 if (cstate->eol_type == EOL_CRNL)
2840                                 {
2841                                         /* Get the next character */
2842                                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
2843                                         /* if hit_eof, c2 will become '\0' */
2844                                         c2 = copy_raw_buf[raw_buf_ptr++];
2845
2846                                         if (c2 == '\n')
2847                                         {
2848                                                 if (!cstate->csv_mode)
2849                                                         ereport(ERROR,
2850                                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2851                                                                          errmsg("end-of-copy marker does not match previous newline style")));
2852                                                 else
2853                                                         NO_END_OF_COPY_GOTO;
2854                                         }
2855                                         else if (c2 != '\r')
2856                                         {
2857                                                 if (!cstate->csv_mode)
2858                                                         ereport(ERROR,
2859                                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2860                                                                          errmsg("end-of-copy marker corrupt")));
2861                                                 else
2862                                                         NO_END_OF_COPY_GOTO;
2863                                         }
2864                                 }
2865
2866                                 /* Get the next character */
2867                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
2868                                 /* if hit_eof, c2 will become '\0' */
2869                                 c2 = copy_raw_buf[raw_buf_ptr++];
2870
2871                                 if (c2 != '\r' && c2 != '\n')
2872                                 {
2873                                         if (!cstate->csv_mode)
2874                                                 ereport(ERROR,
2875                                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2876                                                                  errmsg("end-of-copy marker corrupt")));
2877                                         else
2878                                                 NO_END_OF_COPY_GOTO;
2879                                 }
2880
2881                                 if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
2882                                         (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
2883                                         (cstate->eol_type == EOL_CR && c2 != '\r'))
2884                                 {
2885                                         ereport(ERROR,
2886                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2887                                                          errmsg("end-of-copy marker does not match previous newline style")));
2888                                 }
2889
2890                                 /*
2891                                  * Transfer only the data before the \. into line_buf, then
2892                                  * discard the data and the \. sequence.
2893                                  */
2894                                 if (prev_raw_ptr > cstate->raw_buf_index)
2895                                         appendBinaryStringInfo(&cstate->line_buf,
2896                                                                          cstate->raw_buf + cstate->raw_buf_index,
2897                                                                            prev_raw_ptr - cstate->raw_buf_index);
2898                                 cstate->raw_buf_index = raw_buf_ptr;
2899                                 result = true;  /* report EOF */
2900                                 break;
2901                         }
2902                         else if (!cstate->csv_mode)
2903
2904                                 /*
2905                                  * If we are here, it means we found a backslash followed by
2906                                  * something other than a period.  In non-CSV mode, anything
2907                                  * after a backslash is special, so we skip over that second
2908                                  * character too.  If we didn't do that \\. would be
2909                                  * considered an eof-of copy, while in non-CSV mode it is a
2910                                  * literal backslash followed by a period.      In CSV mode,
2911                                  * backslashes are not special, so we want to process the
2912                                  * character after the backslash just like a normal character,
2913                                  * so we don't increment in those cases.
2914                                  */
2915                                 raw_buf_ptr++;
2916                 }
2917
2918                 /*
2919                  * This label is for CSV cases where \. appears at the start of a
2920                  * line, but there is more text after it, meaning it was a data value.
2921                  * We are more strict for \. in CSV mode because \. could be a data
2922                  * value, while in non-CSV mode, \. cannot be a data value.
2923                  */
2924 not_end_of_copy:
2925
2926                 /*
2927                  * Process all bytes of a multi-byte character as a group.
2928                  *
2929                  * We only support multi-byte sequences where the first byte has the
2930                  * high-bit set, so as an optimization we can avoid this block
2931                  * entirely if it is not set.
2932                  */
2933                 if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
2934                 {
2935                         int                     mblen;
2936
2937                         mblen_str[0] = c;
2938                         /* All our encodings only read the first byte to get the length */
2939                         mblen = pg_encoding_mblen(cstate->client_encoding, mblen_str);
2940                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(mblen - 1);
2941                         IF_NEED_REFILL_AND_EOF_BREAK(mblen - 1);
2942                         raw_buf_ptr += mblen - 1;
2943                 }
2944                 first_char_in_line = false;
2945         }                                                       /* end of outer loop */
2946
2947         /*
2948          * Transfer any still-uncopied data to line_buf.
2949          */
2950         REFILL_LINEBUF;
2951
2952         return result;
2953 }
2954
2955 /*
2956  *      Return decimal value for a hexadecimal digit
2957  */
2958 static int
2959 GetDecimalFromHex(char hex)
2960 {
2961         if (isdigit((unsigned char) hex))
2962                 return hex - '0';
2963         else
2964                 return tolower((unsigned char) hex) - 'a' + 10;
2965 }
2966
2967 /*
2968  * Parse the current line into separate attributes (fields),
2969  * performing de-escaping as needed.
2970  *
2971  * The input is in line_buf.  We use attribute_buf to hold the result
2972  * strings.  cstate->raw_fields[k] is set to point to the k'th attribute 
2973  * string, or NULL when the input matches the null marker string.  
2974  * This array is expanded as necessary.
2975  *
2976  * (Note that the caller cannot check for nulls since the returned 
2977  * string would be the post-de-escaping equivalent, which may look 
2978  * the same as some valid data string.)
2979  *
2980  * delim is the column delimiter string (must be just one byte for now).
2981  * null_print is the null marker string.  Note that this is compared to
2982  * the pre-de-escaped input string.
2983  *
2984  * The return value is the number of fields actually read.
2985  */
2986 static int
2987 CopyReadAttributesText(CopyState cstate)
2988 {
2989         char            delimc = cstate->delim[0];
2990         int                     fieldno;
2991         char       *output_ptr;
2992         char       *cur_ptr;
2993         char       *line_end_ptr;
2994
2995         /*
2996          * We need a special case for zero-column tables: check that the input
2997          * line is empty, and return.
2998          */
2999         if (cstate->max_fields <= 0)
3000         {
3001                 if (cstate->line_buf.len != 0)
3002                         ereport(ERROR,
3003                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3004                                          errmsg("extra data after last expected column")));
3005                 return 0;
3006         }
3007
3008         resetStringInfo(&cstate->attribute_buf);
3009
3010         /*
3011          * The de-escaped attributes will certainly not be longer than the input
3012          * data line, so we can just force attribute_buf to be large enough and
3013          * then transfer data without any checks for enough space.      We need to do
3014          * it this way because enlarging attribute_buf mid-stream would invalidate
3015          * pointers already stored into cstate->raw_fields[].
3016          */
3017         if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
3018                 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
3019         output_ptr = cstate->attribute_buf.data;
3020
3021         /* set pointer variables for loop */
3022         cur_ptr = cstate->line_buf.data;
3023         line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
3024
3025         /* Outer loop iterates over fields */
3026         fieldno = 0;
3027         for (;;)
3028         {
3029                 bool            found_delim = false;
3030                 char       *start_ptr;
3031                 char       *end_ptr;
3032                 int                     input_len;
3033                 bool            saw_non_ascii = false;
3034
3035                 /* Make sure there is enough space for the next value */
3036                 if (fieldno >= cstate->max_fields)
3037                 {
3038                         cstate->max_fields *= 2;
3039                         cstate->raw_fields = 
3040                                 repalloc(cstate->raw_fields, cstate->max_fields*sizeof(char *));
3041                 }
3042
3043                 /* Remember start of field on both input and output sides */
3044                 start_ptr = cur_ptr;
3045                 cstate->raw_fields[fieldno] = output_ptr;
3046
3047                 /* Scan data for field */
3048                 for (;;)
3049                 {
3050                         char            c;
3051
3052                         end_ptr = cur_ptr;
3053                         if (cur_ptr >= line_end_ptr)
3054                                 break;
3055                         c = *cur_ptr++;
3056                         if (c == delimc)
3057                         {
3058                                 found_delim = true;
3059                                 break;
3060                         }
3061                         if (c == '\\')
3062                         {
3063                                 if (cur_ptr >= line_end_ptr)
3064                                         break;
3065                                 c = *cur_ptr++;
3066                                 switch (c)
3067                                 {
3068                                         case '0':
3069                                         case '1':
3070                                         case '2':
3071                                         case '3':
3072                                         case '4':
3073                                         case '5':
3074                                         case '6':
3075                                         case '7':
3076                                                 {
3077                                                         /* handle \013 */
3078                                                         int                     val;
3079
3080                                                         val = OCTVALUE(c);
3081                                                         if (cur_ptr < line_end_ptr)
3082                                                         {
3083                                                                 c = *cur_ptr;
3084                                                                 if (ISOCTAL(c))
3085                                                                 {
3086                                                                         cur_ptr++;
3087                                                                         val = (val << 3) + OCTVALUE(c);
3088                                                                         if (cur_ptr < line_end_ptr)
3089                                                                         {
3090                                                                                 c = *cur_ptr;
3091                                                                                 if (ISOCTAL(c))
3092                                                                                 {
3093                                                                                         cur_ptr++;
3094                                                                                         val = (val << 3) + OCTVALUE(c);
3095                                                                                 }
3096                                                                         }
3097                                                                 }
3098                                                         }
3099                                                         c = val & 0377;
3100                                                         if (c == '\0' || IS_HIGHBIT_SET(c))
3101                                                                 saw_non_ascii = true;
3102                                                 }
3103                                                 break;
3104                                         case 'x':
3105                                                 /* Handle \x3F */
3106                                                 if (cur_ptr < line_end_ptr)
3107                                                 {
3108                                                         char            hexchar = *cur_ptr;
3109
3110                                                         if (isxdigit((unsigned char) hexchar))
3111                                                         {
3112                                                                 int                     val = GetDecimalFromHex(hexchar);
3113
3114                                                                 cur_ptr++;
3115                                                                 if (cur_ptr < line_end_ptr)
3116                                                                 {
3117                                                                         hexchar = *cur_ptr;
3118                                                                         if (isxdigit((unsigned char) hexchar))
3119                                                                         {
3120                                                                                 cur_ptr++;
3121                                                                                 val = (val << 4) + GetDecimalFromHex(hexchar);
3122                                                                         }
3123                                                                 }
3124                                                                 c = val & 0xff;
3125                                                                 if (c == '\0' || IS_HIGHBIT_SET(c))
3126                                                                         saw_non_ascii = true;
3127                                                         }
3128                                                 }
3129                                                 break;
3130                                         case 'b':
3131                                                 c = '\b';
3132                                                 break;
3133                                         case 'f':
3134                                                 c = '\f';
3135                                                 break;
3136                                         case 'n':
3137                                                 c = '\n';
3138                                                 break;
3139                                         case 'r':
3140                                                 c = '\r';
3141                                                 break;
3142                                         case 't':
3143                                                 c = '\t';
3144                                                 break;
3145                                         case 'v':
3146                                                 c = '\v';
3147                                                 break;
3148
3149                                                 /*
3150                                                  * in all other cases, take the char after '\'
3151                                                  * literally
3152                                                  */
3153                                 }
3154                         }
3155
3156                         /* Add c to output string */
3157                         *output_ptr++ = c;
3158                 }
3159
3160                 /* Terminate attribute value in output area */
3161                 *output_ptr++ = '\0';
3162
3163                 /*
3164                  * If we de-escaped a non-7-bit-ASCII char, make sure we still have
3165                  * valid data for the db encoding. Avoid calling strlen here for the
3166                  * sake of efficiency.
3167                  */
3168                 if (saw_non_ascii)
3169                 {
3170                         char       *fld = cstate->raw_fields[fieldno];
3171
3172                         pg_verifymbstr(fld, output_ptr - (fld + 1), false);
3173                 }
3174
3175                 /* Check whether raw input matched null marker */
3176                 input_len = end_ptr - start_ptr;
3177                 if (input_len == cstate->null_print_len &&
3178                         strncmp(start_ptr, cstate->null_print, input_len) == 0)
3179                         cstate->raw_fields[fieldno] = NULL;
3180
3181                 fieldno++;
3182                 /* Done if we hit EOL instead of a delim */
3183                 if (!found_delim)
3184                         break;
3185         }
3186
3187         /* Clean up state of attribute_buf */
3188         output_ptr--;
3189         Assert(*output_ptr == '\0');
3190         cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
3191
3192         return fieldno;
3193 }
3194
3195 /*
3196  * Parse the current line into separate attributes (fields),
3197  * performing de-escaping as needed.  This has exactly the same API as
3198  * CopyReadAttributesText, except we parse the fields according to
3199  * "standard" (i.e. common) CSV usage.
3200  */
3201 static int
3202 CopyReadAttributesCSV(CopyState cstate)
3203 {
3204         char            delimc = cstate->delim[0];
3205         char            quotec = cstate->quote[0];
3206         char            escapec = cstate->escape[0];
3207         int                     fieldno;
3208         char       *output_ptr;
3209         char       *cur_ptr;
3210         char       *line_end_ptr;
3211
3212         /*
3213          * We need a special case for zero-column tables: check that the input
3214          * line is empty, and return.
3215          */
3216         if (cstate->max_fields <= 0)
3217         {
3218                 if (cstate->line_buf.len != 0)
3219                         ereport(ERROR,
3220                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3221                                          errmsg("extra data after last expected column")));
3222                 return 0;
3223         }
3224
3225         resetStringInfo(&cstate->attribute_buf);
3226
3227         /*
3228          * The de-escaped attributes will certainly not be longer than the input
3229          * data line, so we can just force attribute_buf to be large enough and
3230          * then transfer data without any checks for enough space.      We need to do
3231          * it this way because enlarging attribute_buf mid-stream would invalidate
3232          * pointers already stored into cstate->raw_fields[].
3233          */
3234         if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
3235                 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
3236         output_ptr = cstate->attribute_buf.data;
3237
3238         /* set pointer variables for loop */
3239         cur_ptr = cstate->line_buf.data;
3240         line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
3241
3242         /* Outer loop iterates over fields */
3243         fieldno = 0;
3244         for (;;)
3245         {
3246                 bool            found_delim = false;
3247                 bool            saw_quote = false;
3248                 char       *start_ptr;
3249                 char       *end_ptr;
3250                 int                     input_len;
3251
3252                 /* Make sure there is enough space for the next value */
3253                 if (fieldno >= cstate->max_fields)
3254                 {
3255                         cstate->max_fields *= 2;
3256                         cstate->raw_fields = 
3257                                 repalloc(cstate->raw_fields, cstate->max_fields*sizeof(char *));
3258                 }
3259
3260                 /* Remember start of field on both input and output sides */
3261                 start_ptr = cur_ptr;
3262                 cstate->raw_fields[fieldno] = output_ptr;
3263
3264                 /*
3265                  * Scan data for field,
3266                  *
3267                  * The loop starts in "not quote" mode and then toggles between that
3268                  * and "in quote" mode. The loop exits normally if it is in "not
3269                  * quote" mode and a delimiter or line end is seen.
3270                  */
3271                 for (;;)
3272                 {
3273                         char            c;
3274
3275                         /* Not in quote */
3276                         for (;;)
3277                         {
3278                                 end_ptr = cur_ptr;
3279                                 if (cur_ptr >= line_end_ptr)
3280                                         goto endfield;
3281                                 c = *cur_ptr++;
3282                                 /* unquoted field delimiter */
3283                                 if (c == delimc)
3284                                 {
3285                                         found_delim = true;
3286                                         goto endfield;
3287                                 }
3288                                 /* start of quoted field (or part of field) */
3289                                 if (c == quotec)
3290                                 {
3291                                         saw_quote = true;
3292                                         break;
3293                                 }
3294                                 /* Add c to output string */
3295                                 *output_ptr++ = c;
3296                         }
3297
3298                         /* In quote */
3299                         for (;;)
3300                         {
3301                                 end_ptr = cur_ptr;
3302                                 if (cur_ptr >= line_end_ptr)
3303                                         ereport(ERROR,
3304                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3305                                                          errmsg("unterminated CSV quoted field")));
3306
3307                                 c = *cur_ptr++;
3308
3309                                 /* escape within a quoted field */
3310                                 if (c == escapec)
3311                                 {
3312                                         /*
3313                                          * peek at the next char if available, and escape it if it
3314                                          * is an escape char or a quote char
3315                                          */
3316                                         if (cur_ptr < line_end_ptr)
3317                                         {
3318                                                 char            nextc = *cur_ptr;
3319
3320                                                 if (nextc == escapec || nextc == quotec)
3321                                                 {
3322                                                         *output_ptr++ = nextc;
3323                                                         cur_ptr++;
3324                                                         continue;
3325                                                 }
3326                                         }
3327                                 }
3328
3329                                 /*
3330                                  * end of quoted field. Must do this test after testing for
3331                                  * escape in case quote char and escape char are the same
3332                                  * (which is the common case).
3333                                  */
3334                                 if (c == quotec)
3335                                         break;
3336
3337                                 /* Add c to output string */
3338                                 *output_ptr++ = c;
3339                         }
3340                 }
3341 endfield:
3342
3343                 /* Terminate attribute value in output area */
3344                 *output_ptr++ = '\0';
3345
3346                 /* Check whether raw input matched null marker */
3347                 input_len = end_ptr - start_ptr;
3348                 if (!saw_quote && input_len == cstate->null_print_len &&
3349                         strncmp(start_ptr, cstate->null_print, input_len) == 0)
3350                         cstate->raw_fields[fieldno] = NULL;
3351
3352                 fieldno++;
3353                 /* Done if we hit EOL instead of a delim */
3354                 if (!found_delim)
3355                         break;
3356         }
3357
3358         /* Clean up state of attribute_buf */
3359         output_ptr--;
3360         Assert(*output_ptr == '\0');
3361         cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
3362
3363         return fieldno;
3364 }
3365
3366
3367 /*
3368  * Read a binary attribute
3369  */
3370 static Datum
3371 CopyReadBinaryAttribute(CopyState cstate,
3372                                                 int column_no, FmgrInfo *flinfo,
3373                                                 Oid typioparam, int32 typmod,
3374                                                 bool *isnull)
3375 {
3376         int32           fld_size;
3377         Datum           result;
3378
3379         if (!CopyGetInt32(cstate, &fld_size))
3380                 ereport(ERROR,
3381                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3382                                  errmsg("unexpected EOF in COPY data")));
3383         if (fld_size == -1)
3384         {
3385                 *isnull = true;
3386                 return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
3387         }
3388         if (fld_size < 0)
3389                 ereport(ERROR,
3390                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3391                                  errmsg("invalid field size")));
3392
3393         /* reset attribute_buf to empty, and load raw data in it */
3394         resetStringInfo(&cstate->attribute_buf);
3395
3396         enlargeStringInfo(&cstate->attribute_buf, fld_size);
3397         if (CopyGetData(cstate, cstate->attribute_buf.data,
3398                                         fld_size, fld_size) != fld_size)
3399                 ereport(ERROR,
3400                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3401                                  errmsg("unexpected EOF in COPY data")));
3402
3403         cstate->attribute_buf.len = fld_size;
3404         cstate->attribute_buf.data[fld_size] = '\0';
3405
3406         /* Call the column type's binary input converter */
3407         result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
3408                                                                  typioparam, typmod);
3409
3410         /* Trouble if it didn't eat the whole buffer */
3411         if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
3412                 ereport(ERROR,
3413                                 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
3414                                  errmsg("incorrect binary data format")));
3415
3416         *isnull = false;
3417         return result;
3418 }
3419
3420 /*
3421  * Send text representation of one attribute, with conversion and escaping
3422  */
3423 #define DUMPSOFAR() \
3424         do { \
3425                 if (ptr > start) \
3426                         CopySendData(cstate, start, ptr - start); \
3427         } while (0)
3428
3429 static void
3430 CopyAttributeOutText(CopyState cstate, char *string)
3431 {
3432         char       *ptr;
3433         char       *start;
3434         char            c;
3435         char            delimc = cstate->delim[0];
3436
3437         if (cstate->need_transcoding)
3438                 ptr = pg_server_to_client(string, strlen(string));
3439         else
3440                 ptr = string;
3441
3442         /*
3443          * We have to grovel through the string searching for control characters
3444          * and instances of the delimiter character.  In most cases, though, these
3445          * are infrequent.      To avoid overhead from calling CopySendData once per
3446          * character, we dump out all characters between escaped characters in a
3447          * single call.  The loop invariant is that the data from "start" to "ptr"
3448          * can be sent literally, but hasn't yet been.
3449          *
3450          * We can skip pg_encoding_mblen() overhead when encoding is safe, because
3451          * in valid backend encodings, extra bytes of a multibyte character never
3452          * look like ASCII.  This loop is sufficiently performance-critical that
3453          * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
3454          * of the normal safe-encoding path.
3455          */
3456         if (cstate->encoding_embeds_ascii)
3457         {
3458                 start = ptr;
3459                 while ((c = *ptr) != '\0')
3460                 {
3461                         if ((unsigned char) c < (unsigned char) 0x20)
3462                         {
3463                                 /*
3464                                  * \r and \n must be escaped, the others are traditional. We
3465                                  * prefer to dump these using the C-like notation, rather than
3466                                  * a backslash and the literal character, because it makes the
3467                                  * dump file a bit more proof against Microsoftish data
3468                                  * mangling.
3469                                  */
3470                                 switch (c)
3471                                 {
3472                                         case '\b':
3473                                                 c = 'b';
3474                                                 break;
3475                                         case '\f':
3476                                                 c = 'f';
3477                                                 break;
3478                                         case '\n':
3479                                                 c = 'n';
3480                                                 break;
3481                                         case '\r':
3482                                                 c = 'r';
3483                                                 break;
3484                                         case '\t':
3485                                                 c = 't';
3486                                                 break;
3487                                         case '\v':
3488                                                 c = 'v';
3489                                                 break;
3490                                         default:
3491                                                 /* If it's the delimiter, must backslash it */
3492                                                 if (c == delimc)
3493                                                         break;
3494                                                 /* All ASCII control chars are length 1 */
3495                                                 ptr++;
3496                                                 continue;               /* fall to end of loop */
3497                                 }
3498                                 /* if we get here, we need to convert the control char */
3499                                 DUMPSOFAR();
3500                                 CopySendChar(cstate, '\\');
3501                                 CopySendChar(cstate, c);
3502                                 start = ++ptr;  /* do not include char in next run */
3503                         }
3504                         else if (c == '\\' || c == delimc)
3505                         {
3506                                 DUMPSOFAR();
3507                                 CopySendChar(cstate, '\\');
3508                                 start = ptr++;  /* we include char in next run */
3509                         }
3510                         else if (IS_HIGHBIT_SET(c))
3511                                 ptr += pg_encoding_mblen(cstate->client_encoding, ptr);
3512                         else
3513                                 ptr++;
3514                 }
3515         }
3516         else
3517         {
3518                 start = ptr;
3519                 while ((c = *ptr) != '\0')
3520                 {
3521                         if ((unsigned char) c < (unsigned char) 0x20)
3522                         {
3523                                 /*
3524                                  * \r and \n must be escaped, the others are traditional. We
3525                                  * prefer to dump these using the C-like notation, rather than
3526                                  * a backslash and the literal character, because it makes the
3527                                  * dump file a bit more proof against Microsoftish data
3528                                  * mangling.
3529                                  */
3530                                 switch (c)
3531                                 {
3532                                         case '\b':
3533                                                 c = 'b';
3534                                                 break;
3535                                         case '\f':
3536                                                 c = 'f';
3537                                                 break;
3538                                         case '\n':
3539                                                 c = 'n';
3540                                                 break;
3541                                         case '\r':
3542                                                 c = 'r';
3543                                                 break;
3544                                         case '\t':
3545                                                 c = 't';
3546                                                 break;
3547                                         case '\v':
3548                                                 c = 'v';
3549                                                 break;
3550                                         default:
3551                                                 /* If it's the delimiter, must backslash it */
3552                                                 if (c == delimc)
3553                                                         break;
3554                                                 /* All ASCII control chars are length 1 */
3555                                                 ptr++;
3556                                                 continue;               /* fall to end of loop */
3557                                 }
3558                                 /* if we get here, we need to convert the control char */
3559                                 DUMPSOFAR();
3560                                 CopySendChar(cstate, '\\');
3561                                 CopySendChar(cstate, c);
3562                                 start = ++ptr;  /* do not include char in next run */
3563                         }
3564                         else if (c == '\\' || c == delimc)
3565                         {
3566                                 DUMPSOFAR();
3567                                 CopySendChar(cstate, '\\');
3568                                 start = ptr++;  /* we include char in next run */
3569                         }
3570                         else
3571                                 ptr++;
3572                 }
3573         }
3574
3575         DUMPSOFAR();
3576 }
3577
3578 /*
3579  * Send text representation of one attribute, with conversion and
3580  * CSV-style escaping
3581  */
3582 static void
3583 CopyAttributeOutCSV(CopyState cstate, char *string,
3584                                         bool use_quote, bool single_attr)
3585 {
3586         char       *ptr;
3587         char       *start;
3588         char            c;
3589         char            delimc = cstate->delim[0];
3590         char            quotec = cstate->quote[0];
3591         char            escapec = cstate->escape[0];
3592
3593         /* force quoting if it matches null_print (before conversion!) */
3594         if (!use_quote && strcmp(string, cstate->null_print) == 0)
3595                 use_quote = true;
3596
3597         if (cstate->need_transcoding)
3598                 ptr = pg_server_to_client(string, strlen(string));
3599         else
3600                 ptr = string;
3601
3602         /*
3603          * Make a preliminary pass to discover if it needs quoting
3604          */
3605         if (!use_quote)
3606         {
3607                 /*
3608                  * Because '\.' can be a data value, quote it if it appears alone on a
3609                  * line so it is not interpreted as the end-of-data marker.
3610                  */
3611                 if (single_attr && strcmp(ptr, "\\.") == 0)
3612                         use_quote = true;
3613                 else
3614                 {
3615                         char       *tptr = ptr;
3616
3617                         while ((c = *tptr) != '\0')
3618                         {
3619                                 if (c == delimc || c == quotec || c == '\n' || c == '\r')
3620                                 {
3621                                         use_quote = true;
3622                                         break;
3623                                 }
3624                                 if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
3625                                         tptr += pg_encoding_mblen(cstate->client_encoding, tptr);
3626                                 else
3627                                         tptr++;
3628                         }
3629                 }
3630         }
3631
3632         if (use_quote)
3633         {
3634                 CopySendChar(cstate, quotec);
3635
3636                 /*
3637                  * We adopt the same optimization strategy as in CopyAttributeOutText
3638                  */
3639                 start = ptr;
3640                 while ((c = *ptr) != '\0')
3641                 {
3642                         if (c == quotec || c == escapec)
3643                         {
3644                                 DUMPSOFAR();
3645                                 CopySendChar(cstate, escapec);
3646                                 start = ptr;    /* we include char in next run */
3647                         }
3648                         if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
3649                                 ptr += pg_encoding_mblen(cstate->client_encoding, ptr);
3650                         else
3651                                 ptr++;
3652                 }
3653                 DUMPSOFAR();
3654
3655                 CopySendChar(cstate, quotec);
3656         }
3657         else
3658         {
3659                 /* If it doesn't need quoting, we can just dump it as-is */
3660                 CopySendString(cstate, ptr);
3661         }
3662 }
3663
3664 /*
3665  * CopyGetAttnums - build an integer list of attnums to be copied
3666  *
3667  * The input attnamelist is either the user-specified column list,
3668  * or NIL if there was none (in which case we want all the non-dropped
3669  * columns).
3670  *
3671  * rel can be NULL ... it's only used for error reports.
3672  */
3673 static List *
3674 CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist)
3675 {
3676         List       *attnums = NIL;
3677
3678         if (attnamelist == NIL)
3679         {
3680                 /* Generate default column list */
3681                 Form_pg_attribute *attr = tupDesc->attrs;
3682                 int                     attr_count = tupDesc->natts;
3683                 int                     i;
3684
3685                 for (i = 0; i < attr_count; i++)
3686                 {
3687                         if (attr[i]->attisdropped)
3688                                 continue;
3689                         attnums = lappend_int(attnums, i + 1);
3690                 }
3691         }
3692         else
3693         {
3694                 /* Validate the user-supplied list and extract attnums */
3695                 ListCell   *l;
3696
3697                 foreach(l, attnamelist)
3698                 {
3699                         char       *name = strVal(lfirst(l));
3700                         int                     attnum;
3701                         int                     i;
3702
3703                         /* Lookup column name */
3704                         attnum = InvalidAttrNumber;
3705                         for (i = 0; i < tupDesc->natts; i++)
3706                         {
3707                                 if (tupDesc->attrs[i]->attisdropped)
3708                                         continue;
3709                                 if (namestrcmp(&(tupDesc->attrs[i]->attname), name) == 0)
3710                                 {
3711                                         attnum = tupDesc->attrs[i]->attnum;
3712                                         break;
3713                                 }
3714                         }
3715                         if (attnum == InvalidAttrNumber)
3716                         {
3717                                 if (rel != NULL)
3718                                         ereport(ERROR,
3719                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
3720                                         errmsg("column \"%s\" of relation \"%s\" does not exist",
3721                                                    name, RelationGetRelationName(rel))));
3722                                 else
3723                                         ereport(ERROR,
3724                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
3725                                                          errmsg("column \"%s\" does not exist",
3726                                                                         name)));
3727                         }
3728                         /* Check for duplicates */
3729                         if (list_member_int(attnums, attnum))
3730                                 ereport(ERROR,
3731                                                 (errcode(ERRCODE_DUPLICATE_COLUMN),
3732                                                  errmsg("column \"%s\" specified more than once",
3733                                                                 name)));
3734                         attnums = lappend_int(attnums, attnum);
3735                 }
3736         }
3737
3738         return attnums;
3739 }
3740
3741
3742 /*
3743  * copy_dest_startup --- executor startup
3744  */
3745 static void
3746 copy_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
3747 {
3748         /* no-op */
3749 }
3750
3751 /*
3752  * copy_dest_receive --- receive one tuple
3753  */
3754 static void
3755 copy_dest_receive(TupleTableSlot *slot, DestReceiver *self)
3756 {
3757         DR_copy    *myState = (DR_copy *) self;
3758         CopyState       cstate = myState->cstate;
3759
3760         /* Make sure the tuple is fully deconstructed */
3761         slot_getallattrs(slot);
3762
3763         /* And send the data */
3764         CopyOneRowTo(cstate, InvalidOid, slot->tts_values, slot->tts_isnull);
3765         myState->processed++;
3766 }
3767
3768 /*
3769  * copy_dest_shutdown --- executor end
3770  */
3771 static void
3772 copy_dest_shutdown(DestReceiver *self)
3773 {
3774         /* no-op */
3775 }
3776
3777 /*
3778  * copy_dest_destroy --- release DestReceiver object
3779  */
3780 static void
3781 copy_dest_destroy(DestReceiver *self)
3782 {
3783         pfree(self);
3784 }
3785
3786 /*
3787  * CreateCopyDestReceiver -- create a suitable DestReceiver object
3788  */
3789 DestReceiver *
3790 CreateCopyDestReceiver(void)
3791 {
3792         DR_copy    *self = (DR_copy *) palloc(sizeof(DR_copy));
3793
3794         self->pub.receiveSlot = copy_dest_receive;
3795         self->pub.rStartup = copy_dest_startup;
3796         self->pub.rShutdown = copy_dest_shutdown;
3797         self->pub.rDestroy = copy_dest_destroy;
3798         self->pub.mydest = DestCopyOut;
3799
3800         self->cstate = NULL;            /* will be set later */
3801
3802         return (DestReceiver *) self;
3803 }