OSDN Git Service

Use _() macro consistently rather than gettext(). Add translation
[pg-rex/syncrep.git] / src / backend / libpq / pqcomm.c
1 /*-------------------------------------------------------------------------
2  *
3  * pqcomm.c
4  *        Communication functions between the Frontend and the Backend
5  *
6  * These routines handle the low-level details of communication between
7  * frontend and backend.  They just shove data across the communication
8  * channel, and are ignorant of the semantics of the data --- or would be,
9  * except for major brain damage in the design of the old COPY OUT protocol.
10  * Unfortunately, COPY OUT was designed to commandeer the communication
11  * channel (it just transfers data without wrapping it into messages).
12  * No other messages can be sent while COPY OUT is in progress; and if the
13  * copy is aborted by an ereport(ERROR), we need to close out the copy so that
14  * the frontend gets back into sync.  Therefore, these routines have to be
15  * aware of COPY OUT state.  (New COPY-OUT is message-based and does *not*
16  * set the DoingCopyOut flag.)
17  *
18  * NOTE: generally, it's a bad idea to emit outgoing messages directly with
19  * pq_putbytes(), especially if the message would require multiple calls
20  * to send.  Instead, use the routines in pqformat.c to construct the message
21  * in a buffer and then emit it in one call to pq_putmessage.  This ensures
22  * that the channel will not be clogged by an incomplete message if execution
23  * is aborted by ereport(ERROR) partway through the message.  The only
24  * non-libpq code that should call pq_putbytes directly is old-style COPY OUT.
25  *
26  * At one time, libpq was shared between frontend and backend, but now
27  * the backend's "backend/libpq" is quite separate from "interfaces/libpq".
28  * All that remains is similarities of names to trap the unwary...
29  *
30  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
31  * Portions Copyright (c) 1994, Regents of the University of California
32  *
33  *      $PostgreSQL: pgsql/src/backend/libpq/pqcomm.c,v 1.176 2005/02/22 04:35:57 momjian Exp $
34  *
35  *-------------------------------------------------------------------------
36  */
37
38 /*------------------------
39  * INTERFACE ROUTINES
40  *
41  * setup/teardown:
42  *              StreamServerPort        - Open postmaster's server port
43  *              StreamConnection        - Create new connection with client
44  *              StreamClose                     - Close a client/backend connection
45  *              TouchSocketFile         - Protect socket file against /tmp cleaners
46  *              pq_init                 - initialize libpq at backend startup
47  *              pq_comm_reset   - reset libpq during error recovery
48  *              pq_close                - shutdown libpq at backend exit
49  *
50  * low-level I/O:
51  *              pq_getbytes             - get a known number of bytes from connection
52  *              pq_getstring    - get a null terminated string from connection
53  *              pq_getmessage   - get a message with length word from connection
54  *              pq_getbyte              - get next byte from connection
55  *              pq_peekbyte             - peek at next byte from connection
56  *              pq_putbytes             - send bytes to connection (not flushed until pq_flush)
57  *              pq_flush                - flush pending output
58  *
59  * message-level I/O (and old-style-COPY-OUT cruft):
60  *              pq_putmessage   - send a normal message (suppressed in COPY OUT mode)
61  *              pq_startcopyout - inform libpq that a COPY OUT transfer is beginning
62  *              pq_endcopyout   - end a COPY OUT transfer
63  *
64  *------------------------
65  */
66 #include "postgres.h"
67
68 #include <signal.h>
69 #include <errno.h>
70 #include <fcntl.h>
71 #include <grp.h>
72 #include <unistd.h>
73 #include <sys/file.h>
74 #include <sys/socket.h>
75 #include <sys/stat.h>
76 #include <sys/time.h>
77 #include <netdb.h>
78 #include <netinet/in.h>
79 #ifdef HAVE_NETINET_TCP_H
80 #include <netinet/tcp.h>
81 #endif
82 #include <arpa/inet.h>
83 #ifdef HAVE_UTIME_H
84 #include <utime.h>
85 #endif
86
87 #include "libpq/libpq.h"
88 #include "miscadmin.h"
89 #include "storage/ipc.h"
90
91
92 /*
93  * Configuration options
94  */
95 int                     Unix_socket_permissions;
96 char       *Unix_socket_group;
97
98
99 /* Where the Unix socket file is */
100 static char sock_path[MAXPGPATH];
101
102
103 /*
104  * Buffers for low-level I/O
105  */
106
107 #define PQ_BUFFER_SIZE 8192
108
109 static unsigned char PqSendBuffer[PQ_BUFFER_SIZE];
110 static int      PqSendPointer;          /* Next index to store a byte in
111                                                                  * PqSendBuffer */
112
113 static unsigned char PqRecvBuffer[PQ_BUFFER_SIZE];
114 static int      PqRecvPointer;          /* Next index to read a byte from
115                                                                  * PqRecvBuffer */
116 static int      PqRecvLength;           /* End of data available in PqRecvBuffer */
117
118 /*
119  * Message status
120  */
121 static bool PqCommBusy;
122 static bool DoingCopyOut;
123
124
125 /* Internal functions */
126 static void pq_close(int code, Datum arg);
127 static int      internal_putbytes(const char *s, size_t len);
128 static int      internal_flush(void);
129 #ifdef HAVE_UNIX_SOCKETS
130 static int      Lock_AF_UNIX(unsigned short portNumber, char *unixSocketName);
131 static int      Setup_AF_UNIX(void);
132 #endif   /* HAVE_UNIX_SOCKETS */
133
134
135 /* --------------------------------
136  *              pq_init - initialize libpq at backend startup
137  * --------------------------------
138  */
139 void
140 pq_init(void)
141 {
142         PqSendPointer = PqRecvPointer = PqRecvLength = 0;
143         PqCommBusy = false;
144         DoingCopyOut = false;
145         on_proc_exit(pq_close, 0);
146 }
147
148 /* --------------------------------
149  *              pq_comm_reset - reset libpq during error recovery
150  *
151  * This is called from error recovery at the outer idle loop.  It's
152  * just to get us out of trouble if we somehow manage to elog() from
153  * inside a pqcomm.c routine (which ideally will never happen, but...)
154  * --------------------------------
155  */
156 void
157 pq_comm_reset(void)
158 {
159         /* Do not throw away pending data, but do reset the busy flag */
160         PqCommBusy = false;
161         /* We can abort any old-style COPY OUT, too */
162         pq_endcopyout(true);
163 }
164
165 /* --------------------------------
166  *              pq_close - shutdown libpq at backend exit
167  *
168  * Note: in a standalone backend MyProcPort will be null,
169  * don't crash during exit...
170  * --------------------------------
171  */
172 static void
173 pq_close(int code, Datum arg)
174 {
175         if (MyProcPort != NULL)
176         {
177                 /* Cleanly shut down SSL layer */
178                 secure_close(MyProcPort);
179
180                 /*
181                  * Formerly we did an explicit close() here, but it seems better
182                  * to leave the socket open until the process dies.  This allows
183                  * clients to perform a "synchronous close" if they care --- wait
184                  * till the transport layer reports connection closure, and you
185                  * can be sure the backend has exited.
186                  *
187                  * We do set sock to -1 to prevent any further I/O, though.
188                  */
189                 MyProcPort->sock = -1;
190         }
191 }
192
193
194
195 /*
196  * Streams -- wrapper around Unix socket system calls
197  *
198  *
199  *              Stream functions are used for vanilla TCP connection protocol.
200  */
201
202
203 /* StreamDoUnlink()
204  * Shutdown routine for backend connection
205  * If a Unix socket is used for communication, explicitly close it.
206  */
207 #ifdef HAVE_UNIX_SOCKETS
208 static void
209 StreamDoUnlink(int code, Datum arg)
210 {
211         Assert(sock_path[0]);
212         unlink(sock_path);
213 }
214 #endif   /* HAVE_UNIX_SOCKETS */
215
216 /*
217  * StreamServerPort -- open a "listening" port to accept connections.
218  *
219  * Successfully opened sockets are added to the ListenSocket[] array,
220  * at the first position that isn't -1.
221  *
222  * RETURNS: STATUS_OK or STATUS_ERROR
223  */
224
225 int
226 StreamServerPort(int family, char *hostName, unsigned short portNumber,
227                                  char *unixSocketName,
228                                  int ListenSocket[], int MaxListen)
229 {
230         int                     fd,
231                                 err;
232         int                     maxconn;
233         int                     one = 1;
234         int                     ret;
235         char            portNumberStr[32];
236         const char *familyDesc;
237         char            familyDescBuf[64];
238         char       *service;
239         struct addrinfo *addrs = NULL,
240                            *addr;
241         struct addrinfo hint;
242         int                     listen_index = 0;
243         int                     added = 0;
244
245         /* Initialize hint structure */
246         MemSet(&hint, 0, sizeof(hint));
247         hint.ai_family = family;
248         hint.ai_flags = AI_PASSIVE;
249         hint.ai_socktype = SOCK_STREAM;
250
251 #ifdef HAVE_UNIX_SOCKETS
252         if (family == AF_UNIX)
253         {
254                 /* Lock_AF_UNIX will also fill in sock_path. */
255                 if (Lock_AF_UNIX(portNumber, unixSocketName) != STATUS_OK)
256                         return STATUS_ERROR;
257                 service = sock_path;
258         }
259         else
260 #endif   /* HAVE_UNIX_SOCKETS */
261         {
262                 snprintf(portNumberStr, sizeof(portNumberStr), "%d", portNumber);
263                 service = portNumberStr;
264         }
265
266         ret = getaddrinfo_all(hostName, service, &hint, &addrs);
267         if (ret || !addrs)
268         {
269                 if (hostName)
270                         ereport(LOG,
271                                         (errmsg("could not translate host name \"%s\", service \"%s\" to address: %s",
272                                                         hostName, service, gai_strerror(ret))));
273                 else
274                         ereport(LOG,
275                          (errmsg("could not translate service \"%s\" to address: %s",
276                                          service, gai_strerror(ret))));
277                 if (addrs)
278                         freeaddrinfo_all(hint.ai_family, addrs);
279                 return STATUS_ERROR;
280         }
281
282         for (addr = addrs; addr; addr = addr->ai_next)
283         {
284                 if (!IS_AF_UNIX(family) && IS_AF_UNIX(addr->ai_family))
285                 {
286                         /*
287                          * Only set up a unix domain socket when they really asked for
288                          * it.  The service/port is different in that case.
289                          */
290                         continue;
291                 }
292
293                 /* See if there is still room to add 1 more socket. */
294                 for (; listen_index < MaxListen; listen_index++)
295                 {
296                         if (ListenSocket[listen_index] == -1)
297                                 break;
298                 }
299                 if (listen_index >= MaxListen)
300                 {
301                         ereport(LOG,
302                                         (errmsg("could not bind to all requested addresses: MAXLISTEN (%d) exceeded",
303                                                         MaxListen)));
304                         break;
305                 }
306
307                 /* set up family name for possible error messages */
308                 switch (addr->ai_family)
309                 {
310                         case AF_INET:
311                                 familyDesc = _("IPv4");
312                                 break;
313 #ifdef HAVE_IPV6
314                         case AF_INET6:
315                                 familyDesc = _("IPv6");
316                                 break;
317 #endif
318 #ifdef HAVE_UNIX_SOCKETS
319                         case AF_UNIX:
320                                 familyDesc = _("Unix");
321                                 break;
322 #endif
323                         default:
324                                 snprintf(familyDescBuf, sizeof(familyDescBuf),
325                                                  _("unrecognized address family %d"),
326                                                  addr->ai_family);
327                                 familyDesc = familyDescBuf;
328                                 break;
329                 }
330
331                 if ((fd = socket(addr->ai_family, SOCK_STREAM, 0)) < 0)
332                 {
333                         ereport(LOG,
334                                         (errcode_for_socket_access(),
335                         /* translator: %s is IPv4, IPv6, or Unix */
336                                          errmsg("could not create %s socket: %m",
337                                                         familyDesc)));
338                         continue;
339                 }
340
341                 if (!IS_AF_UNIX(addr->ai_family))
342                 {
343                         if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
344                                                         (char *) &one, sizeof(one))) == -1)
345                         {
346                                 ereport(LOG,
347                                                 (errcode_for_socket_access(),
348                                                  errmsg("setsockopt(SO_REUSEADDR) failed: %m")));
349                                 closesocket(fd);
350                                 continue;
351                         }
352                 }
353
354 #ifdef IPV6_V6ONLY
355                 if (addr->ai_family == AF_INET6)
356                 {
357                         if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
358                                                    (char *) &one, sizeof(one)) == -1)
359                         {
360                                 ereport(LOG,
361                                                 (errcode_for_socket_access(),
362                                                  errmsg("setsockopt(IPV6_V6ONLY) failed: %m")));
363                                 closesocket(fd);
364                                 continue;
365                         }
366                 }
367 #endif
368
369                 /*
370                  * Note: This might fail on some OS's, like Linux older than
371                  * 2.4.21-pre3, that don't have the IPV6_V6ONLY socket option, and
372                  * map ipv4 addresses to ipv6.  It will show ::ffff:ipv4 for all
373                  * ipv4 connections.
374                  */
375                 err = bind(fd, addr->ai_addr, addr->ai_addrlen);
376                 if (err < 0)
377                 {
378                         ereport(LOG,
379                                         (errcode_for_socket_access(),
380                         /* translator: %s is IPv4, IPv6, or Unix */
381                                          errmsg("could not bind %s socket: %m",
382                                                         familyDesc),
383                                          (IS_AF_UNIX(addr->ai_family)) ?
384                           errhint("Is another postmaster already running on port %d?"
385                                           " If not, remove socket file \"%s\" and retry.",
386                                           (int) portNumber, sock_path) :
387                           errhint("Is another postmaster already running on port %d?"
388                                           " If not, wait a few seconds and retry.",
389                                           (int) portNumber)));
390                         closesocket(fd);
391                         continue;
392                 }
393
394 #ifdef HAVE_UNIX_SOCKETS
395                 if (addr->ai_family == AF_UNIX)
396                 {
397                         if (Setup_AF_UNIX() != STATUS_OK)
398                         {
399                                 closesocket(fd);
400                                 break;
401                         }
402                 }
403 #endif
404
405                 /*
406                  * Select appropriate accept-queue length limit.  PG_SOMAXCONN is
407                  * only intended to provide a clamp on the request on platforms
408                  * where an overly large request provokes a kernel error (are
409                  * there any?).
410                  */
411                 maxconn = MaxBackends * 2;
412                 if (maxconn > PG_SOMAXCONN)
413                         maxconn = PG_SOMAXCONN;
414
415                 err = listen(fd, maxconn);
416                 if (err < 0)
417                 {
418                         ereport(LOG,
419                                         (errcode_for_socket_access(),
420                         /* translator: %s is IPv4, IPv6, or Unix */
421                                          errmsg("could not listen on %s socket: %m",
422                                                         familyDesc)));
423                         closesocket(fd);
424                         continue;
425                 }
426                 ListenSocket[listen_index] = fd;
427                 added++;
428         }
429
430         freeaddrinfo_all(hint.ai_family, addrs);
431
432         if (!added)
433                 return STATUS_ERROR;
434
435         return STATUS_OK;
436 }
437
438
439 #ifdef HAVE_UNIX_SOCKETS
440
441 /*
442  * Lock_AF_UNIX -- configure unix socket file path
443  */
444 static int
445 Lock_AF_UNIX(unsigned short portNumber, char *unixSocketName)
446 {
447         UNIXSOCK_PATH(sock_path, portNumber, unixSocketName);
448
449         /*
450          * Grab an interlock file associated with the socket file.
451          */
452         CreateSocketLockFile(sock_path, true);
453
454         /*
455          * Once we have the interlock, we can safely delete any pre-existing
456          * socket file to avoid failure at bind() time.
457          */
458         unlink(sock_path);
459
460         return STATUS_OK;
461 }
462
463
464 /*
465  * Setup_AF_UNIX -- configure unix socket permissions
466  */
467 static int
468 Setup_AF_UNIX(void)
469 {
470         /* Arrange to unlink the socket file at exit */
471         on_proc_exit(StreamDoUnlink, 0);
472
473         /*
474          * Fix socket ownership/permission if requested.  Note we must do this
475          * before we listen() to avoid a window where unwanted connections
476          * could get accepted.
477          */
478         Assert(Unix_socket_group);
479         if (Unix_socket_group[0] != '\0')
480         {
481 #ifdef WIN32
482                 elog(WARNING, "configuration item unix_socket_group is not supported on this platform");
483 #else
484                 char       *endptr;
485                 unsigned long int val;
486                 gid_t           gid;
487
488                 val = strtoul(Unix_socket_group, &endptr, 10);
489                 if (*endptr == '\0')
490                 {                                               /* numeric group id */
491                         gid = val;
492                 }
493                 else
494                 {                                               /* convert group name to id */
495                         struct group *gr;
496
497                         gr = getgrnam(Unix_socket_group);
498                         if (!gr)
499                         {
500                                 ereport(LOG,
501                                                 (errmsg("group \"%s\" does not exist",
502                                                                 Unix_socket_group)));
503                                 return STATUS_ERROR;
504                         }
505                         gid = gr->gr_gid;
506                 }
507                 if (chown(sock_path, -1, gid) == -1)
508                 {
509                         ereport(LOG,
510                                         (errcode_for_file_access(),
511                                          errmsg("could not set group of file \"%s\": %m",
512                                                         sock_path)));
513                         return STATUS_ERROR;
514                 }
515 #endif
516         }
517
518         if (chmod(sock_path, Unix_socket_permissions) == -1)
519         {
520                 ereport(LOG,
521                                 (errcode_for_file_access(),
522                                  errmsg("could not set permissions of file \"%s\": %m",
523                                                 sock_path)));
524                 return STATUS_ERROR;
525         }
526         return STATUS_OK;
527 }
528 #endif   /* HAVE_UNIX_SOCKETS */
529
530
531 /*
532  * StreamConnection -- create a new connection with client using
533  *              server port.
534  *
535  * ASSUME: that this doesn't need to be non-blocking because
536  *              the Postmaster uses select() to tell when the server master
537  *              socket is ready for accept().
538  *
539  * RETURNS: STATUS_OK or STATUS_ERROR
540  */
541 int
542 StreamConnection(int server_fd, Port *port)
543 {
544         /* accept connection and fill in the client (remote) address */
545         port->raddr.salen = sizeof(port->raddr.addr);
546         if ((port->sock = accept(server_fd,
547                                                          (struct sockaddr *) & port->raddr.addr,
548                                                          &port->raddr.salen)) < 0)
549         {
550                 ereport(LOG,
551                                 (errcode_for_socket_access(),
552                                  errmsg("could not accept new connection: %m")));
553                 return STATUS_ERROR;
554         }
555
556 #ifdef SCO_ACCEPT_BUG
557
558         /*
559          * UnixWare 7+ and OpenServer 5.0.4 are known to have this bug, but it
560          * shouldn't hurt to catch it for all versions of those platforms.
561          */
562         if (port->raddr.addr.ss_family == 0)
563                 port->raddr.addr.ss_family = AF_UNIX;
564 #endif
565
566         /* fill in the server (local) address */
567         port->laddr.salen = sizeof(port->laddr.addr);
568         if (getsockname(port->sock,
569                                         (struct sockaddr *) & port->laddr.addr,
570                                         &port->laddr.salen) < 0)
571         {
572                 elog(LOG, "getsockname() failed: %m");
573                 return STATUS_ERROR;
574         }
575
576         /* select NODELAY and KEEPALIVE options if it's a TCP connection */
577         if (!IS_AF_UNIX(port->laddr.addr.ss_family))
578         {
579                 int                     on;
580
581 #ifdef  TCP_NODELAY
582                 on = 1;
583                 if (setsockopt(port->sock, IPPROTO_TCP, TCP_NODELAY,
584                                            (char *) &on, sizeof(on)) < 0)
585                 {
586                         elog(LOG, "setsockopt(TCP_NODELAY) failed: %m");
587                         return STATUS_ERROR;
588                 }
589 #endif
590                 on = 1;
591                 if (setsockopt(port->sock, SOL_SOCKET, SO_KEEPALIVE,
592                                            (char *) &on, sizeof(on)) < 0)
593                 {
594                         elog(LOG, "setsockopt(SO_KEEPALIVE) failed: %m");
595                         return STATUS_ERROR;
596                 }
597         }
598
599         return STATUS_OK;
600 }
601
602 /*
603  * StreamClose -- close a client/backend connection
604  *
605  * NOTE: this is NOT used to terminate a session; it is just used to release
606  * the file descriptor in a process that should no longer have the socket
607  * open.  (For example, the postmaster calls this after passing ownership
608  * of the connection to a child process.)  It is expected that someone else
609  * still has the socket open.  So, we only want to close the descriptor,
610  * we do NOT want to send anything to the far end.
611  */
612 void
613 StreamClose(int sock)
614 {
615         closesocket(sock);
616 }
617
618 /*
619  * TouchSocketFile -- mark socket file as recently accessed
620  *
621  * This routine should be called every so often to ensure that the socket
622  * file has a recent mod date (ordinary operations on sockets usually won't
623  * change the mod date).  That saves it from being removed by
624  * overenthusiastic /tmp-directory-cleaner daemons.  (Another reason we should
625  * never have put the socket file in /tmp...)
626  */
627 void
628 TouchSocketFile(void)
629 {
630         /* Do nothing if we did not create a socket... */
631         if (sock_path[0] != '\0')
632         {
633                 /*
634                  * utime() is POSIX standard, utimes() is a common alternative. If
635                  * we have neither, there's no way to affect the mod or access
636                  * time of the socket :-(
637                  *
638                  * In either path, we ignore errors; there's no point in complaining.
639                  */
640 #ifdef HAVE_UTIME
641                 utime(sock_path, NULL);
642 #else                                                   /* !HAVE_UTIME */
643 #ifdef HAVE_UTIMES
644                 utimes(sock_path, NULL);
645 #endif   /* HAVE_UTIMES */
646 #endif   /* HAVE_UTIME */
647         }
648 }
649
650
651 /* --------------------------------
652  * Low-level I/O routines begin here.
653  *
654  * These routines communicate with a frontend client across a connection
655  * already established by the preceding routines.
656  * --------------------------------
657  */
658
659
660 /* --------------------------------
661  *              pq_recvbuf - load some bytes into the input buffer
662  *
663  *              returns 0 if OK, EOF if trouble
664  * --------------------------------
665  */
666 static int
667 pq_recvbuf(void)
668 {
669         if (PqRecvPointer > 0)
670         {
671                 if (PqRecvLength > PqRecvPointer)
672                 {
673                         /* still some unread data, left-justify it in the buffer */
674                         memmove(PqRecvBuffer, PqRecvBuffer + PqRecvPointer,
675                                         PqRecvLength - PqRecvPointer);
676                         PqRecvLength -= PqRecvPointer;
677                         PqRecvPointer = 0;
678                 }
679                 else
680                         PqRecvLength = PqRecvPointer = 0;
681         }
682
683         /* Can fill buffer from PqRecvLength and upwards */
684         for (;;)
685         {
686                 int                     r;
687
688                 r = secure_read(MyProcPort, PqRecvBuffer + PqRecvLength,
689                                                 PQ_BUFFER_SIZE - PqRecvLength);
690
691                 if (r < 0)
692                 {
693                         if (errno == EINTR)
694                                 continue;               /* Ok if interrupted */
695
696                         /*
697                          * Careful: an ereport() that tries to write to the client
698                          * would cause recursion to here, leading to stack overflow
699                          * and core dump!  This message must go *only* to the
700                          * postmaster log.
701                          */
702                         ereport(COMMERROR,
703                                         (errcode_for_socket_access(),
704                                          errmsg("could not receive data from client: %m")));
705                         return EOF;
706                 }
707                 if (r == 0)
708                 {
709                         /*
710                          * EOF detected.  We used to write a log message here, but
711                          * it's better to expect the ultimate caller to do that.
712                          */
713                         return EOF;
714                 }
715                 /* r contains number of bytes read, so just incr length */
716                 PqRecvLength += r;
717                 return 0;
718         }
719 }
720
721 /* --------------------------------
722  *              pq_getbyte      - get a single byte from connection, or return EOF
723  * --------------------------------
724  */
725 int
726 pq_getbyte(void)
727 {
728         while (PqRecvPointer >= PqRecvLength)
729         {
730                 if (pq_recvbuf())               /* If nothing in buffer, then recv some */
731                         return EOF;                     /* Failed to recv data */
732         }
733         return PqRecvBuffer[PqRecvPointer++];
734 }
735
736 /* --------------------------------
737  *              pq_peekbyte             - peek at next byte from connection
738  *
739  *       Same as pq_getbyte() except we don't advance the pointer.
740  * --------------------------------
741  */
742 int
743 pq_peekbyte(void)
744 {
745         while (PqRecvPointer >= PqRecvLength)
746         {
747                 if (pq_recvbuf())               /* If nothing in buffer, then recv some */
748                         return EOF;                     /* Failed to recv data */
749         }
750         return PqRecvBuffer[PqRecvPointer];
751 }
752
753 /* --------------------------------
754  *              pq_getbytes             - get a known number of bytes from connection
755  *
756  *              returns 0 if OK, EOF if trouble
757  * --------------------------------
758  */
759 int
760 pq_getbytes(char *s, size_t len)
761 {
762         size_t          amount;
763
764         while (len > 0)
765         {
766                 while (PqRecvPointer >= PqRecvLength)
767                 {
768                         if (pq_recvbuf())       /* If nothing in buffer, then recv some */
769                                 return EOF;             /* Failed to recv data */
770                 }
771                 amount = PqRecvLength - PqRecvPointer;
772                 if (amount > len)
773                         amount = len;
774                 memcpy(s, PqRecvBuffer + PqRecvPointer, amount);
775                 PqRecvPointer += amount;
776                 s += amount;
777                 len -= amount;
778         }
779         return 0;
780 }
781
782 /* --------------------------------
783  *              pq_discardbytes         - throw away a known number of bytes
784  *
785  *              same as pq_getbytes except we do not copy the data to anyplace.
786  *              this is used for resynchronizing after read errors.
787  *
788  *              returns 0 if OK, EOF if trouble
789  * --------------------------------
790  */
791 static int
792 pq_discardbytes(size_t len)
793 {
794         size_t          amount;
795
796         while (len > 0)
797         {
798                 while (PqRecvPointer >= PqRecvLength)
799                 {
800                         if (pq_recvbuf())       /* If nothing in buffer, then recv some */
801                                 return EOF;             /* Failed to recv data */
802                 }
803                 amount = PqRecvLength - PqRecvPointer;
804                 if (amount > len)
805                         amount = len;
806                 PqRecvPointer += amount;
807                 len -= amount;
808         }
809         return 0;
810 }
811
812 /* --------------------------------
813  *              pq_getstring    - get a null terminated string from connection
814  *
815  *              The return value is placed in an expansible StringInfo, which has
816  *              already been initialized by the caller.
817  *
818  *              This is used only for dealing with old-protocol clients.  The idea
819  *              is to produce a StringInfo that looks the same as we would get from
820  *              pq_getmessage() with a newer client; we will then process it with
821  *              pq_getmsgstring.  Therefore, no character set conversion is done here,
822  *              even though this is presumably useful only for text.
823  *
824  *              returns 0 if OK, EOF if trouble
825  * --------------------------------
826  */
827 int
828 pq_getstring(StringInfo s)
829 {
830         int                     i;
831
832         /* Reset string to empty */
833         s->len = 0;
834         s->data[0] = '\0';
835         s->cursor = 0;
836
837         /* Read until we get the terminating '\0' */
838         for (;;)
839         {
840                 while (PqRecvPointer >= PqRecvLength)
841                 {
842                         if (pq_recvbuf())       /* If nothing in buffer, then recv some */
843                                 return EOF;             /* Failed to recv data */
844                 }
845
846                 for (i = PqRecvPointer; i < PqRecvLength; i++)
847                 {
848                         if (PqRecvBuffer[i] == '\0')
849                         {
850                                 /* include the '\0' in the copy */
851                                 appendBinaryStringInfo(s, PqRecvBuffer + PqRecvPointer,
852                                                                            i - PqRecvPointer + 1);
853                                 PqRecvPointer = i + 1;  /* advance past \0 */
854                                 return 0;
855                         }
856                 }
857
858                 /* If we're here we haven't got the \0 in the buffer yet. */
859                 appendBinaryStringInfo(s, PqRecvBuffer + PqRecvPointer,
860                                                            PqRecvLength - PqRecvPointer);
861                 PqRecvPointer = PqRecvLength;
862         }
863 }
864
865
866 /* --------------------------------
867  *              pq_getmessage   - get a message with length word from connection
868  *
869  *              The return value is placed in an expansible StringInfo, which has
870  *              already been initialized by the caller.
871  *              Only the message body is placed in the StringInfo; the length word
872  *              is removed.  Also, s->cursor is initialized to zero for convenience
873  *              in scanning the message contents.
874  *
875  *              If maxlen is not zero, it is an upper limit on the length of the
876  *              message we are willing to accept.  We abort the connection (by
877  *              returning EOF) if client tries to send more than that.
878  *
879  *              returns 0 if OK, EOF if trouble
880  * --------------------------------
881  */
882 int
883 pq_getmessage(StringInfo s, int maxlen)
884 {
885         int32           len;
886
887         /* Reset message buffer to empty */
888         s->len = 0;
889         s->data[0] = '\0';
890         s->cursor = 0;
891
892         /* Read message length word */
893         if (pq_getbytes((char *) &len, 4) == EOF)
894         {
895                 ereport(COMMERROR,
896                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
897                                  errmsg("unexpected EOF within message length word")));
898                 return EOF;
899         }
900
901         len = ntohl(len);
902
903         if (len < 4 ||
904                 (maxlen > 0 && len > maxlen))
905         {
906                 ereport(COMMERROR,
907                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
908                                  errmsg("invalid message length")));
909                 return EOF;
910         }
911
912         len -= 4;                                       /* discount length itself */
913
914         if (len > 0)
915         {
916                 /*
917                  * Allocate space for message.  If we run out of room (ridiculously
918                  * large message), we will elog(ERROR), but we want to discard the
919                  * message body so as not to lose communication sync.
920                  */
921                 PG_TRY();
922                 {
923                         enlargeStringInfo(s, len);
924                 }
925                 PG_CATCH();
926                 {
927                         if (pq_discardbytes(len) == EOF)
928                                 ereport(COMMERROR,
929                                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
930                                                  errmsg("incomplete message from client")));
931                         PG_RE_THROW();
932                 }
933                 PG_END_TRY();
934
935                 /* And grab the message */
936                 if (pq_getbytes(s->data, len) == EOF)
937                 {
938                         ereport(COMMERROR,
939                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
940                                          errmsg("incomplete message from client")));
941                         return EOF;
942                 }
943                 s->len = len;
944                 /* Place a trailing null per StringInfo convention */
945                 s->data[len] = '\0';
946         }
947
948         return 0;
949 }
950
951
952 /* --------------------------------
953  *              pq_putbytes             - send bytes to connection (not flushed until pq_flush)
954  *
955  *              returns 0 if OK, EOF if trouble
956  * --------------------------------
957  */
958 int
959 pq_putbytes(const char *s, size_t len)
960 {
961         int                     res;
962
963         /* Should only be called by old-style COPY OUT */
964         Assert(DoingCopyOut);
965         /* No-op if reentrant call */
966         if (PqCommBusy)
967                 return 0;
968         PqCommBusy = true;
969         res = internal_putbytes(s, len);
970         PqCommBusy = false;
971         return res;
972 }
973
974 static int
975 internal_putbytes(const char *s, size_t len)
976 {
977         size_t          amount;
978
979         while (len > 0)
980         {
981                 /* If buffer is full, then flush it out */
982                 if (PqSendPointer >= PQ_BUFFER_SIZE)
983                         if (internal_flush())
984                                 return EOF;
985                 amount = PQ_BUFFER_SIZE - PqSendPointer;
986                 if (amount > len)
987                         amount = len;
988                 memcpy(PqSendBuffer + PqSendPointer, s, amount);
989                 PqSendPointer += amount;
990                 s += amount;
991                 len -= amount;
992         }
993         return 0;
994 }
995
996 /* --------------------------------
997  *              pq_flush                - flush pending output
998  *
999  *              returns 0 if OK, EOF if trouble
1000  * --------------------------------
1001  */
1002 int
1003 pq_flush(void)
1004 {
1005         int                     res;
1006
1007         /* No-op if reentrant call */
1008         if (PqCommBusy)
1009                 return 0;
1010         PqCommBusy = true;
1011         res = internal_flush();
1012         PqCommBusy = false;
1013         return res;
1014 }
1015
1016 static int
1017 internal_flush(void)
1018 {
1019         static int      last_reported_send_errno = 0;
1020
1021         unsigned char *bufptr = PqSendBuffer;
1022         unsigned char *bufend = PqSendBuffer + PqSendPointer;
1023
1024         while (bufptr < bufend)
1025         {
1026                 int                     r;
1027
1028                 r = secure_write(MyProcPort, bufptr, bufend - bufptr);
1029
1030                 if (r <= 0)
1031                 {
1032                         if (errno == EINTR)
1033                                 continue;               /* Ok if we were interrupted */
1034
1035                         /*
1036                          * Careful: an ereport() that tries to write to the client
1037                          * would cause recursion to here, leading to stack overflow
1038                          * and core dump!  This message must go *only* to the
1039                          * postmaster log.
1040                          *
1041                          * If a client disconnects while we're in the midst of output, we
1042                          * might write quite a bit of data before we get to a safe
1043                          * query abort point.  So, suppress duplicate log messages.
1044                          */
1045                         if (errno != last_reported_send_errno)
1046                         {
1047                                 last_reported_send_errno = errno;
1048                                 ereport(COMMERROR,
1049                                                 (errcode_for_socket_access(),
1050                                                  errmsg("could not send data to client: %m")));
1051                         }
1052
1053                         /*
1054                          * We drop the buffered data anyway so that processing can
1055                          * continue, even though we'll probably quit soon.
1056                          */
1057                         PqSendPointer = 0;
1058                         return EOF;
1059                 }
1060
1061                 last_reported_send_errno = 0;   /* reset after any successful send */
1062                 bufptr += r;
1063         }
1064
1065         PqSendPointer = 0;
1066         return 0;
1067 }
1068
1069
1070 /* --------------------------------
1071  * Message-level I/O routines begin here.
1072  *
1073  * These routines understand about the old-style COPY OUT protocol.
1074  * --------------------------------
1075  */
1076
1077
1078 /* --------------------------------
1079  *              pq_putmessage   - send a normal message (suppressed in COPY OUT mode)
1080  *
1081  *              If msgtype is not '\0', it is a message type code to place before
1082  *              the message body.  If msgtype is '\0', then the message has no type
1083  *              code (this is only valid in pre-3.0 protocols).
1084  *
1085  *              len is the length of the message body data at *s.  In protocol 3.0
1086  *              and later, a message length word (equal to len+4 because it counts
1087  *              itself too) is inserted by this routine.
1088  *
1089  *              All normal messages are suppressed while old-style COPY OUT is in
1090  *              progress.  (In practice only a few notice messages might get emitted
1091  *              then; dropping them is annoying, but at least they will still appear
1092  *              in the postmaster log.)
1093  *
1094  *              We also suppress messages generated while pqcomm.c is busy.  This
1095  *              avoids any possibility of messages being inserted within other
1096  *              messages.  The only known trouble case arises if SIGQUIT occurs
1097  *              during a pqcomm.c routine --- quickdie() will try to send a warning
1098  *              message, and the most reasonable approach seems to be to drop it.
1099  *
1100  *              returns 0 if OK, EOF if trouble
1101  * --------------------------------
1102  */
1103 int
1104 pq_putmessage(char msgtype, const char *s, size_t len)
1105 {
1106         if (DoingCopyOut || PqCommBusy)
1107                 return 0;
1108         PqCommBusy = true;
1109         if (msgtype)
1110                 if (internal_putbytes(&msgtype, 1))
1111                         goto fail;
1112         if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
1113         {
1114                 uint32          n32;
1115
1116                 n32 = htonl((uint32) (len + 4));
1117                 if (internal_putbytes((char *) &n32, 4))
1118                         goto fail;
1119         }
1120         if (internal_putbytes(s, len))
1121                 goto fail;
1122         PqCommBusy = false;
1123         return 0;
1124
1125 fail:
1126         PqCommBusy = false;
1127         return EOF;
1128 }
1129
1130 /* --------------------------------
1131  *              pq_startcopyout - inform libpq that an old-style COPY OUT transfer
1132  *                      is beginning
1133  * --------------------------------
1134  */
1135 void
1136 pq_startcopyout(void)
1137 {
1138         DoingCopyOut = true;
1139 }
1140
1141 /* --------------------------------
1142  *              pq_endcopyout   - end an old-style COPY OUT transfer
1143  *
1144  *              If errorAbort is indicated, we are aborting a COPY OUT due to an error,
1145  *              and must send a terminator line.  Since a partial data line might have
1146  *              been emitted, send a couple of newlines first (the first one could
1147  *              get absorbed by a backslash...)  Note that old-style COPY OUT does
1148  *              not allow binary transfers, so a textual terminator is always correct.
1149  * --------------------------------
1150  */
1151 void
1152 pq_endcopyout(bool errorAbort)
1153 {
1154         if (!DoingCopyOut)
1155                 return;
1156         if (errorAbort)
1157                 pq_putbytes("\n\n\\.\n", 5);
1158         /* in non-error case, copy.c will have emitted the terminator line */
1159         DoingCopyOut = false;
1160 }