OSDN Git Service

Introduce Streaming Replication.
[pg-rex/syncrep.git] / src / backend / libpq / pqcomm.c
1 /*-------------------------------------------------------------------------
2  *
3  * pqcomm.c
4  *        Communication functions between the Frontend and the Backend
5  *
6  * These routines handle the low-level details of communication between
7  * frontend and backend.  They just shove data across the communication
8  * channel, and are ignorant of the semantics of the data --- or would be,
9  * except for major brain damage in the design of the old COPY OUT protocol.
10  * Unfortunately, COPY OUT was designed to commandeer the communication
11  * channel (it just transfers data without wrapping it into messages).
12  * No other messages can be sent while COPY OUT is in progress; and if the
13  * copy is aborted by an ereport(ERROR), we need to close out the copy so that
14  * the frontend gets back into sync.  Therefore, these routines have to be
15  * aware of COPY OUT state.  (New COPY-OUT is message-based and does *not*
16  * set the DoingCopyOut flag.)
17  *
18  * NOTE: generally, it's a bad idea to emit outgoing messages directly with
19  * pq_putbytes(), especially if the message would require multiple calls
20  * to send.  Instead, use the routines in pqformat.c to construct the message
21  * in a buffer and then emit it in one call to pq_putmessage.  This ensures
22  * that the channel will not be clogged by an incomplete message if execution
23  * is aborted by ereport(ERROR) partway through the message.  The only
24  * non-libpq code that should call pq_putbytes directly is old-style COPY OUT.
25  *
26  * At one time, libpq was shared between frontend and backend, but now
27  * the backend's "backend/libpq" is quite separate from "interfaces/libpq".
28  * All that remains is similarities of names to trap the unwary...
29  *
30  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
31  * Portions Copyright (c) 1994, Regents of the University of California
32  *
33  *      $PostgreSQL: pgsql/src/backend/libpq/pqcomm.c,v 1.202 2010/01/15 09:19:02 heikki Exp $
34  *
35  *-------------------------------------------------------------------------
36  */
37
38 /*------------------------
39  * INTERFACE ROUTINES
40  *
41  * setup/teardown:
42  *              StreamServerPort        - Open postmaster's server port
43  *              StreamConnection        - Create new connection with client
44  *              StreamClose                     - Close a client/backend connection
45  *              TouchSocketFile         - Protect socket file against /tmp cleaners
46  *              pq_init                 - initialize libpq at backend startup
47  *              pq_comm_reset   - reset libpq during error recovery
48  *              pq_close                - shutdown libpq at backend exit
49  *
50  * low-level I/O:
51  *              pq_getbytes             - get a known number of bytes from connection
52  *              pq_getstring    - get a null terminated string from connection
53  *              pq_getmessage   - get a message with length word from connection
54  *              pq_getbyte              - get next byte from connection
55  *              pq_peekbyte             - peek at next byte from connection
56  *              pq_putbytes             - send bytes to connection (not flushed until pq_flush)
57  *              pq_flush                - flush pending output
58  *              pq_getbyte_if_available - get a byte if available without blocking
59  *
60  * message-level I/O (and old-style-COPY-OUT cruft):
61  *              pq_putmessage   - send a normal message (suppressed in COPY OUT mode)
62  *              pq_startcopyout - inform libpq that a COPY OUT transfer is beginning
63  *              pq_endcopyout   - end a COPY OUT transfer
64  *
65  *------------------------
66  */
67 #include "postgres.h"
68
69 #include <signal.h>
70 #include <fcntl.h>
71 #include <grp.h>
72 #include <unistd.h>
73 #include <sys/file.h>
74 #include <sys/socket.h>
75 #include <sys/stat.h>
76 #include <sys/time.h>
77 #include <netdb.h>
78 #include <netinet/in.h>
79 #ifdef HAVE_NETINET_TCP_H
80 #include <netinet/tcp.h>
81 #endif
82 #include <arpa/inet.h>
83 #ifdef HAVE_UTIME_H
84 #include <utime.h>
85 #endif
86
87 #include "libpq/ip.h"
88 #include "libpq/libpq.h"
89 #include "miscadmin.h"
90 #include "storage/ipc.h"
91 #include "utils/guc.h"
92
93 /*
94  * Configuration options
95  */
96 int                     Unix_socket_permissions;
97 char       *Unix_socket_group;
98
99
100 /* Where the Unix socket file is */
101 static char sock_path[MAXPGPATH];
102
103
104 /*
105  * Buffers for low-level I/O
106  */
107
108 #define PQ_BUFFER_SIZE 8192
109
110 static char PqSendBuffer[PQ_BUFFER_SIZE];
111 static int      PqSendPointer;          /* Next index to store a byte in PqSendBuffer */
112
113 static char PqRecvBuffer[PQ_BUFFER_SIZE];
114 static int      PqRecvPointer;          /* Next index to read a byte from PqRecvBuffer */
115 static int      PqRecvLength;           /* End of data available in PqRecvBuffer */
116
117 /*
118  * Message status
119  */
120 static bool PqCommBusy;
121 static bool DoingCopyOut;
122
123
124 /* Internal functions */
125 static void pq_close(int code, Datum arg);
126 static int      internal_putbytes(const char *s, size_t len);
127 static int      internal_flush(void);
128
129 #ifdef HAVE_UNIX_SOCKETS
130 static int      Lock_AF_UNIX(unsigned short portNumber, char *unixSocketName);
131 static int      Setup_AF_UNIX(void);
132 #endif   /* HAVE_UNIX_SOCKETS */
133
134
135 /* --------------------------------
136  *              pq_init - initialize libpq at backend startup
137  * --------------------------------
138  */
139 void
140 pq_init(void)
141 {
142         PqSendPointer = PqRecvPointer = PqRecvLength = 0;
143         PqCommBusy = false;
144         DoingCopyOut = false;
145         on_proc_exit(pq_close, 0);
146 }
147
148 /* --------------------------------
149  *              pq_comm_reset - reset libpq during error recovery
150  *
151  * This is called from error recovery at the outer idle loop.  It's
152  * just to get us out of trouble if we somehow manage to elog() from
153  * inside a pqcomm.c routine (which ideally will never happen, but...)
154  * --------------------------------
155  */
156 void
157 pq_comm_reset(void)
158 {
159         /* Do not throw away pending data, but do reset the busy flag */
160         PqCommBusy = false;
161         /* We can abort any old-style COPY OUT, too */
162         pq_endcopyout(true);
163 }
164
165 /* --------------------------------
166  *              pq_close - shutdown libpq at backend exit
167  *
168  * Note: in a standalone backend MyProcPort will be null,
169  * don't crash during exit...
170  * --------------------------------
171  */
172 static void
173 pq_close(int code, Datum arg)
174 {
175         if (MyProcPort != NULL)
176         {
177 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
178 #ifdef ENABLE_GSS
179                 OM_uint32       min_s;
180
181                 /* Shutdown GSSAPI layer */
182                 if (MyProcPort->gss->ctx != GSS_C_NO_CONTEXT)
183                         gss_delete_sec_context(&min_s, &MyProcPort->gss->ctx, NULL);
184
185                 if (MyProcPort->gss->cred != GSS_C_NO_CREDENTIAL)
186                         gss_release_cred(&min_s, &MyProcPort->gss->cred);
187 #endif   /* ENABLE_GSS */
188                 /* GSS and SSPI share the port->gss struct */
189
190                 free(MyProcPort->gss);
191 #endif   /* ENABLE_GSS || ENABLE_SSPI */
192
193                 /* Cleanly shut down SSL layer */
194                 secure_close(MyProcPort);
195
196                 /*
197                  * Formerly we did an explicit close() here, but it seems better to
198                  * leave the socket open until the process dies.  This allows clients
199                  * to perform a "synchronous close" if they care --- wait till the
200                  * transport layer reports connection closure, and you can be sure the
201                  * backend has exited.
202                  *
203                  * We do set sock to PGINVALID_SOCKET to prevent any further I/O, though.
204                  */
205                 MyProcPort->sock = PGINVALID_SOCKET;
206         }
207 }
208
209
210
211 /*
212  * Streams -- wrapper around Unix socket system calls
213  *
214  *
215  *              Stream functions are used for vanilla TCP connection protocol.
216  */
217
218
219 /* StreamDoUnlink()
220  * Shutdown routine for backend connection
221  * If a Unix socket is used for communication, explicitly close it.
222  */
223 #ifdef HAVE_UNIX_SOCKETS
224 static void
225 StreamDoUnlink(int code, Datum arg)
226 {
227         Assert(sock_path[0]);
228         unlink(sock_path);
229 }
230 #endif   /* HAVE_UNIX_SOCKETS */
231
232 /*
233  * StreamServerPort -- open a "listening" port to accept connections.
234  *
235  * Successfully opened sockets are added to the ListenSocket[] array,
236  * at the first position that isn't PGINVALID_SOCKET.
237  *
238  * RETURNS: STATUS_OK or STATUS_ERROR
239  */
240
241 int
242 StreamServerPort(int family, char *hostName, unsigned short portNumber,
243                                  char *unixSocketName,
244                                  pgsocket ListenSocket[], int MaxListen)
245 {
246         pgsocket        fd;
247         int                     err;
248         int                     maxconn;
249         int                     ret;
250         char            portNumberStr[32];
251         const char *familyDesc;
252         char            familyDescBuf[64];
253         char       *service;
254         struct addrinfo *addrs = NULL,
255                            *addr;
256         struct addrinfo hint;
257         int                     listen_index = 0;
258         int                     added = 0;
259
260 #if !defined(WIN32) || defined(IPV6_V6ONLY)
261         int                     one = 1;
262 #endif
263
264         /* Initialize hint structure */
265         MemSet(&hint, 0, sizeof(hint));
266         hint.ai_family = family;
267         hint.ai_flags = AI_PASSIVE;
268         hint.ai_socktype = SOCK_STREAM;
269
270 #ifdef HAVE_UNIX_SOCKETS
271         if (family == AF_UNIX)
272         {
273                 /* Lock_AF_UNIX will also fill in sock_path. */
274                 if (Lock_AF_UNIX(portNumber, unixSocketName) != STATUS_OK)
275                         return STATUS_ERROR;
276                 service = sock_path;
277         }
278         else
279 #endif   /* HAVE_UNIX_SOCKETS */
280         {
281                 snprintf(portNumberStr, sizeof(portNumberStr), "%d", portNumber);
282                 service = portNumberStr;
283         }
284
285         ret = pg_getaddrinfo_all(hostName, service, &hint, &addrs);
286         if (ret || !addrs)
287         {
288                 if (hostName)
289                         ereport(LOG,
290                                         (errmsg("could not translate host name \"%s\", service \"%s\" to address: %s",
291                                                         hostName, service, gai_strerror(ret))));
292                 else
293                         ereport(LOG,
294                                  (errmsg("could not translate service \"%s\" to address: %s",
295                                                  service, gai_strerror(ret))));
296                 if (addrs)
297                         pg_freeaddrinfo_all(hint.ai_family, addrs);
298                 return STATUS_ERROR;
299         }
300
301         for (addr = addrs; addr; addr = addr->ai_next)
302         {
303                 if (!IS_AF_UNIX(family) && IS_AF_UNIX(addr->ai_family))
304                 {
305                         /*
306                          * Only set up a unix domain socket when they really asked for it.
307                          * The service/port is different in that case.
308                          */
309                         continue;
310                 }
311
312                 /* See if there is still room to add 1 more socket. */
313                 for (; listen_index < MaxListen; listen_index++)
314                 {
315                         if (ListenSocket[listen_index] == PGINVALID_SOCKET)
316                                 break;
317                 }
318                 if (listen_index >= MaxListen)
319                 {
320                         ereport(LOG,
321                                         (errmsg("could not bind to all requested addresses: MAXLISTEN (%d) exceeded",
322                                                         MaxListen)));
323                         break;
324                 }
325
326                 /* set up family name for possible error messages */
327                 switch (addr->ai_family)
328                 {
329                         case AF_INET:
330                                 familyDesc = _("IPv4");
331                                 break;
332 #ifdef HAVE_IPV6
333                         case AF_INET6:
334                                 familyDesc = _("IPv6");
335                                 break;
336 #endif
337 #ifdef HAVE_UNIX_SOCKETS
338                         case AF_UNIX:
339                                 familyDesc = _("Unix");
340                                 break;
341 #endif
342                         default:
343                                 snprintf(familyDescBuf, sizeof(familyDescBuf),
344                                                  _("unrecognized address family %d"),
345                                                  addr->ai_family);
346                                 familyDesc = familyDescBuf;
347                                 break;
348                 }
349
350                 if ((fd = socket(addr->ai_family, SOCK_STREAM, 0)) < 0)
351                 {
352                         ereport(LOG,
353                                         (errcode_for_socket_access(),
354                         /* translator: %s is IPv4, IPv6, or Unix */
355                                          errmsg("could not create %s socket: %m",
356                                                         familyDesc)));
357                         continue;
358                 }
359
360 #ifndef WIN32
361
362                 /*
363                  * Without the SO_REUSEADDR flag, a new postmaster can't be started
364                  * right away after a stop or crash, giving "address already in use"
365                  * error on TCP ports.
366                  *
367                  * On win32, however, this behavior only happens if the
368                  * SO_EXLUSIVEADDRUSE is set. With SO_REUSEADDR, win32 allows multiple
369                  * servers to listen on the same address, resulting in unpredictable
370                  * behavior. With no flags at all, win32 behaves as Unix with
371                  * SO_REUSEADDR.
372                  */
373                 if (!IS_AF_UNIX(addr->ai_family))
374                 {
375                         if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
376                                                         (char *) &one, sizeof(one))) == -1)
377                         {
378                                 ereport(LOG,
379                                                 (errcode_for_socket_access(),
380                                                  errmsg("setsockopt(SO_REUSEADDR) failed: %m")));
381                                 closesocket(fd);
382                                 continue;
383                         }
384                 }
385 #endif
386
387 #ifdef IPV6_V6ONLY
388                 if (addr->ai_family == AF_INET6)
389                 {
390                         if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
391                                                    (char *) &one, sizeof(one)) == -1)
392                         {
393                                 ereport(LOG,
394                                                 (errcode_for_socket_access(),
395                                                  errmsg("setsockopt(IPV6_V6ONLY) failed: %m")));
396                                 closesocket(fd);
397                                 continue;
398                         }
399                 }
400 #endif
401
402                 /*
403                  * Note: This might fail on some OS's, like Linux older than
404                  * 2.4.21-pre3, that don't have the IPV6_V6ONLY socket option, and map
405                  * ipv4 addresses to ipv6.      It will show ::ffff:ipv4 for all ipv4
406                  * connections.
407                  */
408                 err = bind(fd, addr->ai_addr, addr->ai_addrlen);
409                 if (err < 0)
410                 {
411                         ereport(LOG,
412                                         (errcode_for_socket_access(),
413                         /* translator: %s is IPv4, IPv6, or Unix */
414                                          errmsg("could not bind %s socket: %m",
415                                                         familyDesc),
416                                          (IS_AF_UNIX(addr->ai_family)) ?
417                                   errhint("Is another postmaster already running on port %d?"
418                                                   " If not, remove socket file \"%s\" and retry.",
419                                                   (int) portNumber, sock_path) :
420                                   errhint("Is another postmaster already running on port %d?"
421                                                   " If not, wait a few seconds and retry.",
422                                                   (int) portNumber)));
423                         closesocket(fd);
424                         continue;
425                 }
426
427 #ifdef HAVE_UNIX_SOCKETS
428                 if (addr->ai_family == AF_UNIX)
429                 {
430                         if (Setup_AF_UNIX() != STATUS_OK)
431                         {
432                                 closesocket(fd);
433                                 break;
434                         }
435                 }
436 #endif
437
438                 /*
439                  * Select appropriate accept-queue length limit.  PG_SOMAXCONN is only
440                  * intended to provide a clamp on the request on platforms where an
441                  * overly large request provokes a kernel error (are there any?).
442                  */
443                 maxconn = MaxBackends * 2;
444                 if (maxconn > PG_SOMAXCONN)
445                         maxconn = PG_SOMAXCONN;
446
447                 err = listen(fd, maxconn);
448                 if (err < 0)
449                 {
450                         ereport(LOG,
451                                         (errcode_for_socket_access(),
452                         /* translator: %s is IPv4, IPv6, or Unix */
453                                          errmsg("could not listen on %s socket: %m",
454                                                         familyDesc)));
455                         closesocket(fd);
456                         continue;
457                 }
458                 ListenSocket[listen_index] = fd;
459                 added++;
460         }
461
462         pg_freeaddrinfo_all(hint.ai_family, addrs);
463
464         if (!added)
465                 return STATUS_ERROR;
466
467         return STATUS_OK;
468 }
469
470
471 #ifdef HAVE_UNIX_SOCKETS
472
473 /*
474  * Lock_AF_UNIX -- configure unix socket file path
475  */
476 static int
477 Lock_AF_UNIX(unsigned short portNumber, char *unixSocketName)
478 {
479         UNIXSOCK_PATH(sock_path, portNumber, unixSocketName);
480
481         /*
482          * Grab an interlock file associated with the socket file.
483          */
484         CreateSocketLockFile(sock_path, true);
485
486         /*
487          * Once we have the interlock, we can safely delete any pre-existing
488          * socket file to avoid failure at bind() time.
489          */
490         unlink(sock_path);
491
492         return STATUS_OK;
493 }
494
495
496 /*
497  * Setup_AF_UNIX -- configure unix socket permissions
498  */
499 static int
500 Setup_AF_UNIX(void)
501 {
502         /* Arrange to unlink the socket file at exit */
503         on_proc_exit(StreamDoUnlink, 0);
504
505         /*
506          * Fix socket ownership/permission if requested.  Note we must do this
507          * before we listen() to avoid a window where unwanted connections could
508          * get accepted.
509          */
510         Assert(Unix_socket_group);
511         if (Unix_socket_group[0] != '\0')
512         {
513 #ifdef WIN32
514                 elog(WARNING, "configuration item unix_socket_group is not supported on this platform");
515 #else
516                 char       *endptr;
517                 unsigned long val;
518                 gid_t           gid;
519
520                 val = strtoul(Unix_socket_group, &endptr, 10);
521                 if (*endptr == '\0')
522                 {                                               /* numeric group id */
523                         gid = val;
524                 }
525                 else
526                 {                                               /* convert group name to id */
527                         struct group *gr;
528
529                         gr = getgrnam(Unix_socket_group);
530                         if (!gr)
531                         {
532                                 ereport(LOG,
533                                                 (errmsg("group \"%s\" does not exist",
534                                                                 Unix_socket_group)));
535                                 return STATUS_ERROR;
536                         }
537                         gid = gr->gr_gid;
538                 }
539                 if (chown(sock_path, -1, gid) == -1)
540                 {
541                         ereport(LOG,
542                                         (errcode_for_file_access(),
543                                          errmsg("could not set group of file \"%s\": %m",
544                                                         sock_path)));
545                         return STATUS_ERROR;
546                 }
547 #endif
548         }
549
550         if (chmod(sock_path, Unix_socket_permissions) == -1)
551         {
552                 ereport(LOG,
553                                 (errcode_for_file_access(),
554                                  errmsg("could not set permissions of file \"%s\": %m",
555                                                 sock_path)));
556                 return STATUS_ERROR;
557         }
558         return STATUS_OK;
559 }
560 #endif   /* HAVE_UNIX_SOCKETS */
561
562
563 /*
564  * StreamConnection -- create a new connection with client using
565  *              server port.  Set port->sock to the FD of the new connection.
566  *
567  * ASSUME: that this doesn't need to be non-blocking because
568  *              the Postmaster uses select() to tell when the server master
569  *              socket is ready for accept().
570  *
571  * RETURNS: STATUS_OK or STATUS_ERROR
572  */
573 int
574 StreamConnection(pgsocket server_fd, Port *port)
575 {
576         /* accept connection and fill in the client (remote) address */
577         port->raddr.salen = sizeof(port->raddr.addr);
578         if ((port->sock = accept(server_fd,
579                                                          (struct sockaddr *) & port->raddr.addr,
580                                                          &port->raddr.salen)) < 0)
581         {
582                 ereport(LOG,
583                                 (errcode_for_socket_access(),
584                                  errmsg("could not accept new connection: %m")));
585
586                 /*
587                  * If accept() fails then postmaster.c will still see the server
588                  * socket as read-ready, and will immediately try again.  To avoid
589                  * uselessly sucking lots of CPU, delay a bit before trying again.
590                  * (The most likely reason for failure is being out of kernel file
591                  * table slots; we can do little except hope some will get freed up.)
592                  */
593                 pg_usleep(100000L);             /* wait 0.1 sec */
594                 return STATUS_ERROR;
595         }
596
597 #ifdef SCO_ACCEPT_BUG
598
599         /*
600          * UnixWare 7+ and OpenServer 5.0.4 are known to have this bug, but it
601          * shouldn't hurt to catch it for all versions of those platforms.
602          */
603         if (port->raddr.addr.ss_family == 0)
604                 port->raddr.addr.ss_family = AF_UNIX;
605 #endif
606
607         /* fill in the server (local) address */
608         port->laddr.salen = sizeof(port->laddr.addr);
609         if (getsockname(port->sock,
610                                         (struct sockaddr *) & port->laddr.addr,
611                                         &port->laddr.salen) < 0)
612         {
613                 elog(LOG, "getsockname() failed: %m");
614                 return STATUS_ERROR;
615         }
616
617         /* select NODELAY and KEEPALIVE options if it's a TCP connection */
618         if (!IS_AF_UNIX(port->laddr.addr.ss_family))
619         {
620                 int                     on;
621
622 #ifdef  TCP_NODELAY
623                 on = 1;
624                 if (setsockopt(port->sock, IPPROTO_TCP, TCP_NODELAY,
625                                            (char *) &on, sizeof(on)) < 0)
626                 {
627                         elog(LOG, "setsockopt(TCP_NODELAY) failed: %m");
628                         return STATUS_ERROR;
629                 }
630 #endif
631                 on = 1;
632                 if (setsockopt(port->sock, SOL_SOCKET, SO_KEEPALIVE,
633                                            (char *) &on, sizeof(on)) < 0)
634                 {
635                         elog(LOG, "setsockopt(SO_KEEPALIVE) failed: %m");
636                         return STATUS_ERROR;
637                 }
638
639 #ifdef WIN32
640
641                 /*
642                  * This is a Win32 socket optimization.  The ideal size is 32k.
643                  * http://support.microsoft.com/kb/823764/EN-US/
644                  */
645                 on = PQ_BUFFER_SIZE * 4;
646                 if (setsockopt(port->sock, SOL_SOCKET, SO_SNDBUF, (char *) &on,
647                                            sizeof(on)) < 0)
648                 {
649                         elog(LOG, "setsockopt(SO_SNDBUF) failed: %m");
650                         return STATUS_ERROR;
651                 }
652 #endif
653
654                 /*
655                  * Also apply the current keepalive parameters.  If we fail to set a
656                  * parameter, don't error out, because these aren't universally
657                  * supported.  (Note: you might think we need to reset the GUC
658                  * variables to 0 in such a case, but it's not necessary because the
659                  * show hooks for these variables report the truth anyway.)
660                  */
661                 (void) pq_setkeepalivesidle(tcp_keepalives_idle, port);
662                 (void) pq_setkeepalivesinterval(tcp_keepalives_interval, port);
663                 (void) pq_setkeepalivescount(tcp_keepalives_count, port);
664         }
665
666         return STATUS_OK;
667 }
668
669 /*
670  * StreamClose -- close a client/backend connection
671  *
672  * NOTE: this is NOT used to terminate a session; it is just used to release
673  * the file descriptor in a process that should no longer have the socket
674  * open.  (For example, the postmaster calls this after passing ownership
675  * of the connection to a child process.)  It is expected that someone else
676  * still has the socket open.  So, we only want to close the descriptor,
677  * we do NOT want to send anything to the far end.
678  */
679 void
680 StreamClose(pgsocket sock)
681 {
682         closesocket(sock);
683 }
684
685 /*
686  * TouchSocketFile -- mark socket file as recently accessed
687  *
688  * This routine should be called every so often to ensure that the socket
689  * file has a recent mod date (ordinary operations on sockets usually won't
690  * change the mod date).  That saves it from being removed by
691  * overenthusiastic /tmp-directory-cleaner daemons.  (Another reason we should
692  * never have put the socket file in /tmp...)
693  */
694 void
695 TouchSocketFile(void)
696 {
697         /* Do nothing if we did not create a socket... */
698         if (sock_path[0] != '\0')
699         {
700                 /*
701                  * utime() is POSIX standard, utimes() is a common alternative. If we
702                  * have neither, there's no way to affect the mod or access time of
703                  * the socket :-(
704                  *
705                  * In either path, we ignore errors; there's no point in complaining.
706                  */
707 #ifdef HAVE_UTIME
708                 utime(sock_path, NULL);
709 #else                                                   /* !HAVE_UTIME */
710 #ifdef HAVE_UTIMES
711                 utimes(sock_path, NULL);
712 #endif   /* HAVE_UTIMES */
713 #endif   /* HAVE_UTIME */
714         }
715 }
716
717
718 /* --------------------------------
719  * Low-level I/O routines begin here.
720  *
721  * These routines communicate with a frontend client across a connection
722  * already established by the preceding routines.
723  * --------------------------------
724  */
725
726
727 /* --------------------------------
728  *              pq_recvbuf - load some bytes into the input buffer
729  *
730  *              returns 0 if OK, EOF if trouble
731  * --------------------------------
732  */
733 static int
734 pq_recvbuf(void)
735 {
736         if (PqRecvPointer > 0)
737         {
738                 if (PqRecvLength > PqRecvPointer)
739                 {
740                         /* still some unread data, left-justify it in the buffer */
741                         memmove(PqRecvBuffer, PqRecvBuffer + PqRecvPointer,
742                                         PqRecvLength - PqRecvPointer);
743                         PqRecvLength -= PqRecvPointer;
744                         PqRecvPointer = 0;
745                 }
746                 else
747                         PqRecvLength = PqRecvPointer = 0;
748         }
749
750         /* Can fill buffer from PqRecvLength and upwards */
751         for (;;)
752         {
753                 int                     r;
754
755                 r = secure_read(MyProcPort, PqRecvBuffer + PqRecvLength,
756                                                 PQ_BUFFER_SIZE - PqRecvLength);
757
758                 if (r < 0)
759                 {
760                         if (errno == EINTR)
761                                 continue;               /* Ok if interrupted */
762
763                         /*
764                          * Careful: an ereport() that tries to write to the client would
765                          * cause recursion to here, leading to stack overflow and core
766                          * dump!  This message must go *only* to the postmaster log.
767                          */
768                         ereport(COMMERROR,
769                                         (errcode_for_socket_access(),
770                                          errmsg("could not receive data from client: %m")));
771                         return EOF;
772                 }
773                 if (r == 0)
774                 {
775                         /*
776                          * EOF detected.  We used to write a log message here, but it's
777                          * better to expect the ultimate caller to do that.
778                          */
779                         return EOF;
780                 }
781                 /* r contains number of bytes read, so just incr length */
782                 PqRecvLength += r;
783                 return 0;
784         }
785 }
786
787 /* --------------------------------
788  *              pq_getbyte      - get a single byte from connection, or return EOF
789  * --------------------------------
790  */
791 int
792 pq_getbyte(void)
793 {
794         while (PqRecvPointer >= PqRecvLength)
795         {
796                 if (pq_recvbuf())               /* If nothing in buffer, then recv some */
797                         return EOF;                     /* Failed to recv data */
798         }
799         return (unsigned char) PqRecvBuffer[PqRecvPointer++];
800 }
801
802 /* --------------------------------
803  *              pq_peekbyte             - peek at next byte from connection
804  *
805  *       Same as pq_getbyte() except we don't advance the pointer.
806  * --------------------------------
807  */
808 int
809 pq_peekbyte(void)
810 {
811         while (PqRecvPointer >= PqRecvLength)
812         {
813                 if (pq_recvbuf())               /* If nothing in buffer, then recv some */
814                         return EOF;                     /* Failed to recv data */
815         }
816         return (unsigned char) PqRecvBuffer[PqRecvPointer];
817 }
818
819
820 /* --------------------------------
821  *              pq_getbyte_if_available - get a single byte from connection,
822  *                      if available
823  *
824  * The received byte is stored in *c. Returns 1 if a byte was read, 0 if
825  * if no data was available, or EOF.
826  * --------------------------------
827  */
828 int
829 pq_getbyte_if_available(unsigned char *c)
830 {
831         int r;
832
833         if (PqRecvPointer < PqRecvLength)
834         {
835                 *c = PqRecvBuffer[PqRecvPointer++];
836                 return 1;
837         }
838
839         /* Temporarily put the socket into non-blocking mode */
840         if (!pg_set_noblock(MyProcPort->sock))
841                 ereport(ERROR,
842                                 (errmsg("couldn't put socket to non-blocking mode: %m")));
843         MyProcPort->noblock = true;
844         PG_TRY();
845         {
846                 r = secure_read(MyProcPort, c, 1);
847         }
848         PG_CATCH();
849         {
850                 /*
851                  * The rest of the backend code assumes the socket is in blocking
852                  * mode, so treat failure as FATAL.
853                  */
854                 if (!pg_set_block(MyProcPort->sock))
855                         ereport(FATAL,
856                                         (errmsg("couldn't put socket to blocking mode: %m")));
857                 MyProcPort->noblock = false;
858                 PG_RE_THROW();
859         }
860         PG_END_TRY();
861         if (!pg_set_block(MyProcPort->sock))
862                 ereport(FATAL,
863                                 (errmsg("couldn't put socket to blocking mode: %m")));
864         MyProcPort->noblock = false;
865
866         return r;
867 }
868
869 /* --------------------------------
870  *              pq_getbytes             - get a known number of bytes from connection
871  *
872  *              returns 0 if OK, EOF if trouble
873  * --------------------------------
874  */
875 int
876 pq_getbytes(char *s, size_t len)
877 {
878         size_t          amount;
879
880         while (len > 0)
881         {
882                 while (PqRecvPointer >= PqRecvLength)
883                 {
884                         if (pq_recvbuf())       /* If nothing in buffer, then recv some */
885                                 return EOF;             /* Failed to recv data */
886                 }
887                 amount = PqRecvLength - PqRecvPointer;
888                 if (amount > len)
889                         amount = len;
890                 memcpy(s, PqRecvBuffer + PqRecvPointer, amount);
891                 PqRecvPointer += amount;
892                 s += amount;
893                 len -= amount;
894         }
895         return 0;
896 }
897
898 /* --------------------------------
899  *              pq_discardbytes         - throw away a known number of bytes
900  *
901  *              same as pq_getbytes except we do not copy the data to anyplace.
902  *              this is used for resynchronizing after read errors.
903  *
904  *              returns 0 if OK, EOF if trouble
905  * --------------------------------
906  */
907 static int
908 pq_discardbytes(size_t len)
909 {
910         size_t          amount;
911
912         while (len > 0)
913         {
914                 while (PqRecvPointer >= PqRecvLength)
915                 {
916                         if (pq_recvbuf())       /* If nothing in buffer, then recv some */
917                                 return EOF;             /* Failed to recv data */
918                 }
919                 amount = PqRecvLength - PqRecvPointer;
920                 if (amount > len)
921                         amount = len;
922                 PqRecvPointer += amount;
923                 len -= amount;
924         }
925         return 0;
926 }
927
928 /* --------------------------------
929  *              pq_getstring    - get a null terminated string from connection
930  *
931  *              The return value is placed in an expansible StringInfo, which has
932  *              already been initialized by the caller.
933  *
934  *              This is used only for dealing with old-protocol clients.  The idea
935  *              is to produce a StringInfo that looks the same as we would get from
936  *              pq_getmessage() with a newer client; we will then process it with
937  *              pq_getmsgstring.  Therefore, no character set conversion is done here,
938  *              even though this is presumably useful only for text.
939  *
940  *              returns 0 if OK, EOF if trouble
941  * --------------------------------
942  */
943 int
944 pq_getstring(StringInfo s)
945 {
946         int                     i;
947
948         resetStringInfo(s);
949
950         /* Read until we get the terminating '\0' */
951         for (;;)
952         {
953                 while (PqRecvPointer >= PqRecvLength)
954                 {
955                         if (pq_recvbuf())       /* If nothing in buffer, then recv some */
956                                 return EOF;             /* Failed to recv data */
957                 }
958
959                 for (i = PqRecvPointer; i < PqRecvLength; i++)
960                 {
961                         if (PqRecvBuffer[i] == '\0')
962                         {
963                                 /* include the '\0' in the copy */
964                                 appendBinaryStringInfo(s, PqRecvBuffer + PqRecvPointer,
965                                                                            i - PqRecvPointer + 1);
966                                 PqRecvPointer = i + 1;  /* advance past \0 */
967                                 return 0;
968                         }
969                 }
970
971                 /* If we're here we haven't got the \0 in the buffer yet. */
972                 appendBinaryStringInfo(s, PqRecvBuffer + PqRecvPointer,
973                                                            PqRecvLength - PqRecvPointer);
974                 PqRecvPointer = PqRecvLength;
975         }
976 }
977
978
979 /* --------------------------------
980  *              pq_getmessage   - get a message with length word from connection
981  *
982  *              The return value is placed in an expansible StringInfo, which has
983  *              already been initialized by the caller.
984  *              Only the message body is placed in the StringInfo; the length word
985  *              is removed.  Also, s->cursor is initialized to zero for convenience
986  *              in scanning the message contents.
987  *
988  *              If maxlen is not zero, it is an upper limit on the length of the
989  *              message we are willing to accept.  We abort the connection (by
990  *              returning EOF) if client tries to send more than that.
991  *
992  *              returns 0 if OK, EOF if trouble
993  * --------------------------------
994  */
995 int
996 pq_getmessage(StringInfo s, int maxlen)
997 {
998         int32           len;
999
1000         resetStringInfo(s);
1001
1002         /* Read message length word */
1003         if (pq_getbytes((char *) &len, 4) == EOF)
1004         {
1005                 ereport(COMMERROR,
1006                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1007                                  errmsg("unexpected EOF within message length word")));
1008                 return EOF;
1009         }
1010
1011         len = ntohl(len);
1012
1013         if (len < 4 ||
1014                 (maxlen > 0 && len > maxlen))
1015         {
1016                 ereport(COMMERROR,
1017                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1018                                  errmsg("invalid message length")));
1019                 return EOF;
1020         }
1021
1022         len -= 4;                                       /* discount length itself */
1023
1024         if (len > 0)
1025         {
1026                 /*
1027                  * Allocate space for message.  If we run out of room (ridiculously
1028                  * large message), we will elog(ERROR), but we want to discard the
1029                  * message body so as not to lose communication sync.
1030                  */
1031                 PG_TRY();
1032                 {
1033                         enlargeStringInfo(s, len);
1034                 }
1035                 PG_CATCH();
1036                 {
1037                         if (pq_discardbytes(len) == EOF)
1038                                 ereport(COMMERROR,
1039                                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1040                                                  errmsg("incomplete message from client")));
1041                         PG_RE_THROW();
1042                 }
1043                 PG_END_TRY();
1044
1045                 /* And grab the message */
1046                 if (pq_getbytes(s->data, len) == EOF)
1047                 {
1048                         ereport(COMMERROR,
1049                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1050                                          errmsg("incomplete message from client")));
1051                         return EOF;
1052                 }
1053                 s->len = len;
1054                 /* Place a trailing null per StringInfo convention */
1055                 s->data[len] = '\0';
1056         }
1057
1058         return 0;
1059 }
1060
1061
1062 /* --------------------------------
1063  *              pq_putbytes             - send bytes to connection (not flushed until pq_flush)
1064  *
1065  *              returns 0 if OK, EOF if trouble
1066  * --------------------------------
1067  */
1068 int
1069 pq_putbytes(const char *s, size_t len)
1070 {
1071         int                     res;
1072
1073         /* Should only be called by old-style COPY OUT */
1074         Assert(DoingCopyOut);
1075         /* No-op if reentrant call */
1076         if (PqCommBusy)
1077                 return 0;
1078         PqCommBusy = true;
1079         res = internal_putbytes(s, len);
1080         PqCommBusy = false;
1081         return res;
1082 }
1083
1084 static int
1085 internal_putbytes(const char *s, size_t len)
1086 {
1087         size_t          amount;
1088
1089         while (len > 0)
1090         {
1091                 /* If buffer is full, then flush it out */
1092                 if (PqSendPointer >= PQ_BUFFER_SIZE)
1093                         if (internal_flush())
1094                                 return EOF;
1095                 amount = PQ_BUFFER_SIZE - PqSendPointer;
1096                 if (amount > len)
1097                         amount = len;
1098                 memcpy(PqSendBuffer + PqSendPointer, s, amount);
1099                 PqSendPointer += amount;
1100                 s += amount;
1101                 len -= amount;
1102         }
1103         return 0;
1104 }
1105
1106 /* --------------------------------
1107  *              pq_flush                - flush pending output
1108  *
1109  *              returns 0 if OK, EOF if trouble
1110  * --------------------------------
1111  */
1112 int
1113 pq_flush(void)
1114 {
1115         int                     res;
1116
1117         /* No-op if reentrant call */
1118         if (PqCommBusy)
1119                 return 0;
1120         PqCommBusy = true;
1121         res = internal_flush();
1122         PqCommBusy = false;
1123         return res;
1124 }
1125
1126 static int
1127 internal_flush(void)
1128 {
1129         static int      last_reported_send_errno = 0;
1130
1131         char       *bufptr = PqSendBuffer;
1132         char       *bufend = PqSendBuffer + PqSendPointer;
1133
1134         while (bufptr < bufend)
1135         {
1136                 int                     r;
1137
1138                 r = secure_write(MyProcPort, bufptr, bufend - bufptr);
1139
1140                 if (r <= 0)
1141                 {
1142                         if (errno == EINTR)
1143                                 continue;               /* Ok if we were interrupted */
1144
1145                         /*
1146                          * Careful: an ereport() that tries to write to the client would
1147                          * cause recursion to here, leading to stack overflow and core
1148                          * dump!  This message must go *only* to the postmaster log.
1149                          *
1150                          * If a client disconnects while we're in the midst of output, we
1151                          * might write quite a bit of data before we get to a safe query
1152                          * abort point.  So, suppress duplicate log messages.
1153                          */
1154                         if (errno != last_reported_send_errno)
1155                         {
1156                                 last_reported_send_errno = errno;
1157                                 ereport(COMMERROR,
1158                                                 (errcode_for_socket_access(),
1159                                                  errmsg("could not send data to client: %m")));
1160                         }
1161
1162                         /*
1163                          * We drop the buffered data anyway so that processing can
1164                          * continue, even though we'll probably quit soon.
1165                          */
1166                         PqSendPointer = 0;
1167                         return EOF;
1168                 }
1169
1170                 last_reported_send_errno = 0;   /* reset after any successful send */
1171                 bufptr += r;
1172         }
1173
1174         PqSendPointer = 0;
1175         return 0;
1176 }
1177
1178
1179 /* --------------------------------
1180  * Message-level I/O routines begin here.
1181  *
1182  * These routines understand about the old-style COPY OUT protocol.
1183  * --------------------------------
1184  */
1185
1186
1187 /* --------------------------------
1188  *              pq_putmessage   - send a normal message (suppressed in COPY OUT mode)
1189  *
1190  *              If msgtype is not '\0', it is a message type code to place before
1191  *              the message body.  If msgtype is '\0', then the message has no type
1192  *              code (this is only valid in pre-3.0 protocols).
1193  *
1194  *              len is the length of the message body data at *s.  In protocol 3.0
1195  *              and later, a message length word (equal to len+4 because it counts
1196  *              itself too) is inserted by this routine.
1197  *
1198  *              All normal messages are suppressed while old-style COPY OUT is in
1199  *              progress.  (In practice only a few notice messages might get emitted
1200  *              then; dropping them is annoying, but at least they will still appear
1201  *              in the postmaster log.)
1202  *
1203  *              We also suppress messages generated while pqcomm.c is busy.  This
1204  *              avoids any possibility of messages being inserted within other
1205  *              messages.  The only known trouble case arises if SIGQUIT occurs
1206  *              during a pqcomm.c routine --- quickdie() will try to send a warning
1207  *              message, and the most reasonable approach seems to be to drop it.
1208  *
1209  *              returns 0 if OK, EOF if trouble
1210  * --------------------------------
1211  */
1212 int
1213 pq_putmessage(char msgtype, const char *s, size_t len)
1214 {
1215         if (DoingCopyOut || PqCommBusy)
1216                 return 0;
1217         PqCommBusy = true;
1218         if (msgtype)
1219                 if (internal_putbytes(&msgtype, 1))
1220                         goto fail;
1221         if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
1222         {
1223                 uint32          n32;
1224
1225                 n32 = htonl((uint32) (len + 4));
1226                 if (internal_putbytes((char *) &n32, 4))
1227                         goto fail;
1228         }
1229         if (internal_putbytes(s, len))
1230                 goto fail;
1231         PqCommBusy = false;
1232         return 0;
1233
1234 fail:
1235         PqCommBusy = false;
1236         return EOF;
1237 }
1238
1239 /* --------------------------------
1240  *              pq_startcopyout - inform libpq that an old-style COPY OUT transfer
1241  *                      is beginning
1242  * --------------------------------
1243  */
1244 void
1245 pq_startcopyout(void)
1246 {
1247         DoingCopyOut = true;
1248 }
1249
1250 /* --------------------------------
1251  *              pq_endcopyout   - end an old-style COPY OUT transfer
1252  *
1253  *              If errorAbort is indicated, we are aborting a COPY OUT due to an error,
1254  *              and must send a terminator line.  Since a partial data line might have
1255  *              been emitted, send a couple of newlines first (the first one could
1256  *              get absorbed by a backslash...)  Note that old-style COPY OUT does
1257  *              not allow binary transfers, so a textual terminator is always correct.
1258  * --------------------------------
1259  */
1260 void
1261 pq_endcopyout(bool errorAbort)
1262 {
1263         if (!DoingCopyOut)
1264                 return;
1265         if (errorAbort)
1266                 pq_putbytes("\n\n\\.\n", 5);
1267         /* in non-error case, copy.c will have emitted the terminator line */
1268         DoingCopyOut = false;
1269 }
1270
1271
1272 /*
1273  * Support for TCP Keepalive parameters
1274  */
1275
1276 int
1277 pq_getkeepalivesidle(Port *port)
1278 {
1279 #ifdef TCP_KEEPIDLE
1280         if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1281                 return 0;
1282
1283         if (port->keepalives_idle != 0)
1284                 return port->keepalives_idle;
1285
1286         if (port->default_keepalives_idle == 0)
1287         {
1288                 ACCEPT_TYPE_ARG3 size = sizeof(port->default_keepalives_idle);
1289
1290                 if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPIDLE,
1291                                            (char *) &port->default_keepalives_idle,
1292                                            &size) < 0)
1293                 {
1294                         elog(LOG, "getsockopt(TCP_KEEPIDLE) failed: %m");
1295                         port->default_keepalives_idle = -1; /* don't know */
1296                 }
1297         }
1298
1299         return port->default_keepalives_idle;
1300 #else
1301         return 0;
1302 #endif
1303 }
1304
1305 int
1306 pq_setkeepalivesidle(int idle, Port *port)
1307 {
1308         if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1309                 return STATUS_OK;
1310
1311 #ifdef TCP_KEEPIDLE
1312         if (idle == port->keepalives_idle)
1313                 return STATUS_OK;
1314
1315         if (port->default_keepalives_idle <= 0)
1316         {
1317                 if (pq_getkeepalivesidle(port) < 0)
1318                 {
1319                         if (idle == 0)
1320                                 return STATUS_OK;               /* default is set but unknown */
1321                         else
1322                                 return STATUS_ERROR;
1323                 }
1324         }
1325
1326         if (idle == 0)
1327                 idle = port->default_keepalives_idle;
1328
1329         if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPIDLE,
1330                                    (char *) &idle, sizeof(idle)) < 0)
1331         {
1332                 elog(LOG, "setsockopt(TCP_KEEPIDLE) failed: %m");
1333                 return STATUS_ERROR;
1334         }
1335
1336         port->keepalives_idle = idle;
1337 #else
1338         if (idle != 0)
1339         {
1340                 elog(LOG, "setsockopt(TCP_KEEPIDLE) not supported");
1341                 return STATUS_ERROR;
1342         }
1343 #endif
1344
1345         return STATUS_OK;
1346 }
1347
1348 int
1349 pq_getkeepalivesinterval(Port *port)
1350 {
1351 #ifdef TCP_KEEPINTVL
1352         if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1353                 return 0;
1354
1355         if (port->keepalives_interval != 0)
1356                 return port->keepalives_interval;
1357
1358         if (port->default_keepalives_interval == 0)
1359         {
1360                 ACCEPT_TYPE_ARG3 size = sizeof(port->default_keepalives_interval);
1361
1362                 if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPINTVL,
1363                                            (char *) &port->default_keepalives_interval,
1364                                            &size) < 0)
1365                 {
1366                         elog(LOG, "getsockopt(TCP_KEEPINTVL) failed: %m");
1367                         port->default_keepalives_interval = -1;         /* don't know */
1368                 }
1369         }
1370
1371         return port->default_keepalives_interval;
1372 #else
1373         return 0;
1374 #endif
1375 }
1376
1377 int
1378 pq_setkeepalivesinterval(int interval, Port *port)
1379 {
1380         if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1381                 return STATUS_OK;
1382
1383 #ifdef TCP_KEEPINTVL
1384         if (interval == port->keepalives_interval)
1385                 return STATUS_OK;
1386
1387         if (port->default_keepalives_interval <= 0)
1388         {
1389                 if (pq_getkeepalivesinterval(port) < 0)
1390                 {
1391                         if (interval == 0)
1392                                 return STATUS_OK;               /* default is set but unknown */
1393                         else
1394                                 return STATUS_ERROR;
1395                 }
1396         }
1397
1398         if (interval == 0)
1399                 interval = port->default_keepalives_interval;
1400
1401         if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPINTVL,
1402                                    (char *) &interval, sizeof(interval)) < 0)
1403         {
1404                 elog(LOG, "setsockopt(TCP_KEEPINTVL) failed: %m");
1405                 return STATUS_ERROR;
1406         }
1407
1408         port->keepalives_interval = interval;
1409 #else
1410         if (interval != 0)
1411         {
1412                 elog(LOG, "setsockopt(TCP_KEEPINTVL) not supported");
1413                 return STATUS_ERROR;
1414         }
1415 #endif
1416
1417         return STATUS_OK;
1418 }
1419
1420 int
1421 pq_getkeepalivescount(Port *port)
1422 {
1423 #ifdef TCP_KEEPCNT
1424         if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1425                 return 0;
1426
1427         if (port->keepalives_count != 0)
1428                 return port->keepalives_count;
1429
1430         if (port->default_keepalives_count == 0)
1431         {
1432                 ACCEPT_TYPE_ARG3 size = sizeof(port->default_keepalives_count);
1433
1434                 if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPCNT,
1435                                            (char *) &port->default_keepalives_count,
1436                                            &size) < 0)
1437                 {
1438                         elog(LOG, "getsockopt(TCP_KEEPCNT) failed: %m");
1439                         port->default_keepalives_count = -1;            /* don't know */
1440                 }
1441         }
1442
1443         return port->default_keepalives_count;
1444 #else
1445         return 0;
1446 #endif
1447 }
1448
1449 int
1450 pq_setkeepalivescount(int count, Port *port)
1451 {
1452         if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1453                 return STATUS_OK;
1454
1455 #ifdef TCP_KEEPCNT
1456         if (count == port->keepalives_count)
1457                 return STATUS_OK;
1458
1459         if (port->default_keepalives_count <= 0)
1460         {
1461                 if (pq_getkeepalivescount(port) < 0)
1462                 {
1463                         if (count == 0)
1464                                 return STATUS_OK;               /* default is set but unknown */
1465                         else
1466                                 return STATUS_ERROR;
1467                 }
1468         }
1469
1470         if (count == 0)
1471                 count = port->default_keepalives_count;
1472
1473         if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPCNT,
1474                                    (char *) &count, sizeof(count)) < 0)
1475         {
1476                 elog(LOG, "setsockopt(TCP_KEEPCNT) failed: %m");
1477                 return STATUS_ERROR;
1478         }
1479
1480         port->keepalives_count = count;
1481 #else
1482         if (count != 0)
1483         {
1484                 elog(LOG, "setsockopt(TCP_KEEPCNT) not supported");
1485                 return STATUS_ERROR;
1486         }
1487 #endif
1488
1489         return STATUS_OK;
1490 }