OSDN Git Service

7cd9a1b048d2d9508d8ea591225ce83309f81d33
[pg-rex/syncrep.git] / src / backend / postmaster / postmaster.c
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  *        This program acts as a clearing house for requests to the
5  *        POSTGRES system.      Frontend programs send a startup message
6  *        to the Postmaster and the postmaster uses the info in the
7  *        message to setup a backend process.
8  *
9  *        The postmaster also manages system-wide operations such as
10  *        startup and shutdown. The postmaster itself doesn't do those
11  *        operations, mind you --- it just forks off a subprocess to do them
12  *        at the right times.  It also takes care of resetting the system
13  *        if a backend crashes.
14  *
15  *        The postmaster process creates the shared memory and semaphore
16  *        pools during startup, but as a rule does not touch them itself.
17  *        In particular, it is not a member of the PGPROC array of backends
18  *        and so it cannot participate in lock-manager operations.      Keeping
19  *        the postmaster away from shared memory operations makes it simpler
20  *        and more reliable.  The postmaster is almost always able to recover
21  *        from crashes of individual backends by resetting shared memory;
22  *        if it did much with shared memory then it would be prone to crashing
23  *        along with the backends.
24  *
25  *        When a request message is received, we now fork() immediately.
26  *        The child process performs authentication of the request, and
27  *        then becomes a backend if successful.  This allows the auth code
28  *        to be written in a simple single-threaded style (as opposed to the
29  *        crufty "poor man's multitasking" code that used to be needed).
30  *        More importantly, it ensures that blockages in non-multithreaded
31  *        libraries like SSL or PAM cannot cause denial of service to other
32  *        clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *        $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.444 2005/02/20 02:21:54 tgl Exp $
41  *
42  * NOTES
43  *
44  * Initialization:
45  *              The Postmaster sets up shared memory data structures
46  *              for the backends.
47  *
48  * Synchronization:
49  *              The Postmaster shares memory with the backends but should avoid
50  *              touching shared memory, so as not to become stuck if a crashing
51  *              backend screws up locks or shared memory.  Likewise, the Postmaster
52  *              should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  *              The Postmaster cleans up after backends if they have an emergency
56  *              exit and/or core dump.
57  *
58  * Error Reporting:
59  *              Use write_stderr() only for reporting "interactive" errors
60  *              (essentially, bogus arguments on the command line).  Once the
61  *              postmaster is launched, use ereport().  In particular, don't use
62  *              write_stderr() for anything that occurs after pmdaemonize.
63  *
64  *-------------------------------------------------------------------------
65  */
66
67 #include "postgres.h"
68
69 #include <unistd.h>
70 #include <signal.h>
71 #include <time.h>
72 #include <sys/wait.h>
73 #include <ctype.h>
74 #include <sys/stat.h>
75 #include <sys/socket.h>
76 #include <fcntl.h>
77 #include <sys/param.h>
78 #include <netinet/in.h>
79 #include <arpa/inet.h>
80 #include <netdb.h>
81 #include <limits.h>
82
83 #ifdef HAVE_SYS_SELECT_H
84 #include <sys/select.h>
85 #endif
86
87 #ifdef HAVE_GETOPT_H
88 #include <getopt.h>
89 #endif
90
91 #ifdef USE_RENDEZVOUS
92 #include <DNSServiceDiscovery/DNSServiceDiscovery.h>
93 #endif
94
95 #include "catalog/pg_control.h"
96 #include "catalog/pg_database.h"
97 #include "commands/async.h"
98 #include "lib/dllist.h"
99 #include "libpq/auth.h"
100 #include "libpq/crypt.h"
101 #include "libpq/libpq.h"
102 #include "libpq/pqcomm.h"
103 #include "libpq/pqsignal.h"
104 #include "miscadmin.h"
105 #include "nodes/nodes.h"
106 #include "postmaster/postmaster.h"
107 #include "postmaster/pgarch.h"
108 #include "postmaster/syslogger.h"
109 #include "storage/fd.h"
110 #include "storage/ipc.h"
111 #include "storage/pg_shmem.h"
112 #include "storage/pmsignal.h"
113 #include "storage/proc.h"
114 #include "storage/bufmgr.h"
115 #include "access/xlog.h"
116 #include "tcop/tcopprot.h"
117 #include "utils/builtins.h"
118 #include "utils/guc.h"
119 #include "utils/memutils.h"
120 #include "utils/ps_status.h"
121 #include "bootstrap/bootstrap.h"
122 #include "pgstat.h"
123
124 #ifdef EXEC_BACKEND
125 #include "storage/spin.h"
126 #endif
127
128
129 /*
130  * List of active backends (or child processes anyway; we don't actually
131  * know whether a given child has become a backend or is still in the
132  * authorization phase).  This is used mainly to keep track of how many
133  * children we have and send them appropriate signals when necessary.
134  *
135  * "Special" children such as the startup and bgwriter tasks are not in
136  * this list.
137  */
138 typedef struct bkend
139 {
140         pid_t           pid;                    /* process id of backend */
141         long            cancel_key;             /* cancel key for cancels for this backend */
142 } Backend;
143
144 static Dllist *BackendList;
145
146 #ifdef EXEC_BACKEND
147 #define NUM_BACKENDARRAY_ELEMS (2*MaxBackends)
148 static Backend *ShmemBackendArray;
149 #endif
150
151 /* The socket number we are listening for connections on */
152 int                     PostPortNumber;
153 char       *UnixSocketDir;
154 char       *ListenAddresses;
155
156 /*
157  * ReservedBackends is the number of backends reserved for superuser use.
158  * This number is taken out of the pool size given by MaxBackends so
159  * number of backend slots available to non-superusers is
160  * (MaxBackends - ReservedBackends).  Note what this really means is
161  * "if there are <= ReservedBackends connections available, only superusers
162  * can make new connections" --- pre-existing superuser connections don't
163  * count against the limit.
164  */
165 int                     ReservedBackends;
166
167
168 static const char *progname = NULL;
169
170 /* The socket(s) we're listening to. */
171 #define MAXLISTEN       64
172 static int      ListenSocket[MAXLISTEN];
173
174 /*
175  * Set by the -o option
176  */
177 static char ExtraOptions[MAXPGPATH];
178
179 /*
180  * These globals control the behavior of the postmaster in case some
181  * backend dumps core.  Normally, it kills all peers of the dead backend
182  * and reinitializes shared memory.  By specifying -s or -n, we can have
183  * the postmaster stop (rather than kill) peers and not reinitialize
184  * shared data structures.
185  */
186 static bool Reinit = true;
187 static int      SendStop = false;
188
189 /* still more option variables */
190 bool            EnableSSL = false;
191 bool            SilentMode = false; /* silent mode (-S) */
192
193 int                     PreAuthDelay = 0;
194 int                     AuthenticationTimeout = 60;
195
196 bool            log_hostname;           /* for ps display and logging */
197 bool            Log_connections = false;
198 bool            Db_user_namespace = false;
199
200 char       *rendezvous_name;
201
202 /* list of library:init-function to be preloaded */
203 char       *preload_libraries_string = NULL;
204
205 /* PIDs of special child processes; 0 when not running */
206 static pid_t StartupPID = 0,
207                         BgWriterPID = 0,
208                         PgArchPID = 0,
209                         PgStatPID = 0,
210                         SysLoggerPID = 0;
211
212 /* Startup/shutdown state */
213 #define                 NoShutdown              0
214 #define                 SmartShutdown   1
215 #define                 FastShutdown    2
216
217 static int      Shutdown = NoShutdown;
218
219 static bool FatalError = false; /* T if recovering from backend crash */
220
221 bool            ClientAuthInProgress = false;           /* T during new-client
222                                                                                                  * authentication */
223
224 /*
225  * State for assigning random salts and cancel keys.
226  * Also, the global MyCancelKey passes the cancel key assigned to a given
227  * backend from the postmaster to that backend (via fork).
228  */
229 static unsigned int random_seed = 0;
230
231 extern char *optarg;
232 extern int      optind,
233                         opterr;
234
235 #ifdef HAVE_INT_OPTRESET
236 extern int      optreset;
237 #endif
238
239 /*
240  * postmaster.c - function prototypes
241  */
242 static void checkDataDir(void);
243
244 #ifdef USE_RENDEZVOUS
245 static void reg_reply(DNSServiceRegistrationReplyErrorType errorCode,
246                   void *context);
247 #endif
248 static void pmdaemonize(void);
249 static Port *ConnCreate(int serverFd);
250 static void ConnFree(Port *port);
251 static void reset_shared(unsigned short port);
252 static void SIGHUP_handler(SIGNAL_ARGS);
253 static void pmdie(SIGNAL_ARGS);
254 static void reaper(SIGNAL_ARGS);
255 static void sigusr1_handler(SIGNAL_ARGS);
256 static void dummy_handler(SIGNAL_ARGS);
257 static void CleanupBackend(int pid, int exitstatus);
258 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
259 static void LogChildExit(int lev, const char *procname,
260                          int pid, int exitstatus);
261 static int      BackendRun(Port *port);
262 static void ExitPostmaster(int status);
263 static void usage(const char *);
264 static int      ServerLoop(void);
265 static int      BackendStartup(Port *port);
266 static int      ProcessStartupPacket(Port *port, bool SSLdone);
267 static void processCancelRequest(Port *port, void *pkt);
268 static int      initMasks(fd_set *rmask);
269 static void report_fork_failure_to_client(Port *port, int errnum);
270 static enum CAC_state canAcceptConnections(void);
271 static long PostmasterRandom(void);
272 static void RandomSalt(char *cryptSalt, char *md5Salt);
273 static void SignalChildren(int signal);
274 static int      CountChildren(void);
275 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
276 static pid_t StartChildProcess(int xlop);
277
278 #ifdef EXEC_BACKEND
279
280 #ifdef WIN32
281 static void win32_AddChild(pid_t pid, HANDLE handle);
282 static void win32_RemoveChild(pid_t pid);
283 static pid_t win32_waitpid(int *exitstatus);
284 static DWORD WINAPI win32_sigchld_waiter(LPVOID param);
285
286 static pid_t *win32_childPIDArray;
287 static HANDLE *win32_childHNDArray;
288 static unsigned long win32_numChildren = 0;
289
290 HANDLE          PostmasterHandle;
291 #endif
292
293 static pid_t backend_forkexec(Port *port);
294 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
295
296 /* Type for a socket that can be inherited to a client process */
297 #ifdef WIN32
298 typedef struct
299 {
300         SOCKET origsocket; /* Original socket value, or -1 if not a socket */
301         WSAPROTOCOL_INFO wsainfo;
302 } InheritableSocket;
303 #else
304 typedef int InheritableSocket;
305 #endif
306
307 typedef struct LWLock LWLock;   /* ugly kluge */
308
309 /*
310  * Structure contains all variables passed to exec:ed backends
311  */
312 typedef struct
313 {
314         Port port;
315         InheritableSocket portsocket;
316         char DataDir[MAXPGPATH];
317         int ListenSocket[MAXLISTEN];
318         long MyCancelKey;
319         unsigned long UsedShmemSegID;
320         void *UsedShmemSegAddr;
321         slock_t *ShmemLock;
322         slock_t *ShmemIndexLock;
323         VariableCache ShmemVariableCache;
324         void *ShmemIndexAlloc;
325         Backend *ShmemBackendArray;
326         LWLock *LWLockArray;
327         slock_t *ProcStructLock;
328         InheritableSocket pgStatSock;
329         InheritableSocket pgStatPipe0;
330         InheritableSocket pgStatPipe1;
331         pid_t PostmasterPid;
332 #ifdef WIN32
333         HANDLE PostmasterHandle;
334         HANDLE initial_signal_pipe;
335         HANDLE syslogPipe[2];
336 #else
337         int syslogPipe[2];
338 #endif
339         char my_exec_path[MAXPGPATH];
340         char ExtraOptions[MAXPGPATH];
341         char lc_collate[LOCALE_NAME_BUFLEN];
342         char lc_ctype[LOCALE_NAME_BUFLEN];
343 } BackendParameters;
344
345 static void read_backend_variables(char *id, Port *port);
346 static void restore_backend_variables(BackendParameters *param, Port *port);
347 #ifndef WIN32
348 static bool save_backend_variables(BackendParameters *param, Port *port);
349 #else
350 static bool save_backend_variables(BackendParameters *param, Port *port,
351                                                                    HANDLE childProcess, pid_t childPid);
352 #endif
353
354 static void ShmemBackendArrayAdd(Backend *bn);
355 static void ShmemBackendArrayRemove(pid_t pid);
356
357 #endif   /* EXEC_BACKEND */
358
359 #define StartupDataBase()               StartChildProcess(BS_XLOG_STARTUP)
360 #define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER)
361
362
363 /*
364  * Postmaster main entry point
365  */
366 int
367 PostmasterMain(int argc, char *argv[])
368 {
369         int                     opt;
370         int                     status;
371         char       *userDoption = NULL;
372         int                     i;
373
374         /* This will call exit() if strdup() fails. */
375         progname = get_progname(argv[0]);       
376
377         MyProcPid = PostmasterPid = getpid();
378
379         IsPostmasterEnvironment = true;
380
381         /*
382          * Catch standard options before doing much else.  This even works on
383          * systems without getopt_long.
384          */
385         if (argc > 1)
386         {
387                 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
388                 {
389                         usage(progname);
390                         ExitPostmaster(0);
391                 }
392                 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
393                 {
394                         puts("postmaster (PostgreSQL) " PG_VERSION);
395                         ExitPostmaster(0);
396                 }
397         }
398
399 #ifdef WIN32
400         /* Start our win32 signal implementation */
401         pgwin32_signal_initialize();
402 #endif
403
404         /*
405          * for security, no dir or file created can be group or other
406          * accessible
407          */
408         umask((mode_t) 0077);
409
410         /*
411          * Fire up essential subsystems: memory management
412          */
413         MemoryContextInit();
414
415         /*
416          * By default, palloc() requests in the postmaster will be allocated
417          * in the PostmasterContext, which is space that can be recycled by
418          * backends.  Allocated data that needs to be available to backends
419          * should be allocated in TopMemoryContext.
420          */
421         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
422                                                                                           "Postmaster",
423                                                                                           ALLOCSET_DEFAULT_MINSIZE,
424                                                                                           ALLOCSET_DEFAULT_INITSIZE,
425                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
426         MemoryContextSwitchTo(PostmasterContext);
427
428         IgnoreSystemIndexes(false);
429
430         if (find_my_exec(argv[0], my_exec_path) < 0)
431                 elog(FATAL, "%s: could not locate my own executable path",
432                          argv[0]);
433
434         get_pkglib_path(my_exec_path, pkglib_path);
435
436         /*
437          * Options setup
438          */
439         InitializeGUCOptions();
440
441         opterr = 1;
442
443         while ((opt = getopt(argc, argv, "A:a:B:b:c:D:d:Fh:ik:lm:MN:no:p:Ss-:")) != -1)
444         {
445                 switch (opt)
446                 {
447                         case 'A':
448 #ifdef USE_ASSERT_CHECKING
449                                 SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
450 #else
451                                 write_stderr("%s: assert checking is not compiled in\n", progname);
452 #endif
453                                 break;
454                         case 'a':
455                                 /* Can no longer set authentication method. */
456                                 break;
457                         case 'B':
458                                 SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
459                                 break;
460                         case 'b':
461                                 /* Can no longer set the backend executable file to use. */
462                                 break;
463                         case 'D':
464                                 userDoption = optarg;
465                                 break;
466                         case 'd':
467                                 set_debug_options(atoi(optarg), PGC_POSTMASTER, PGC_S_ARGV);
468                                 break;
469                         case 'F':
470                                 SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
471                                 break;
472                         case 'h':
473                                 SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
474                                 break;
475                         case 'i':
476                                 SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
477                                 break;
478                         case 'k':
479                                 SetConfigOption("unix_socket_directory", optarg, PGC_POSTMASTER, PGC_S_ARGV);
480                                 break;
481 #ifdef USE_SSL
482                         case 'l':
483                                 SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
484                                 break;
485 #endif
486                         case 'm':
487                                 /* Multiplexed backends no longer supported. */
488                                 break;
489                         case 'M':
490
491                                 /*
492                                  * ignore this flag.  This may be passed in because the
493                                  * program was run as 'postgres -M' instead of
494                                  * 'postmaster'
495                                  */
496                                 break;
497                         case 'N':
498                                 /* The max number of backends to start. */
499                                 SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
500                                 break;
501                         case 'n':
502                                 /* Don't reinit shared mem after abnormal exit */
503                                 Reinit = false;
504                                 break;
505                         case 'o':
506
507                                 /*
508                                  * Other options to pass to the backend on the command
509                                  * line
510                                  */
511                                 snprintf(ExtraOptions + strlen(ExtraOptions),
512                                                  sizeof(ExtraOptions) - strlen(ExtraOptions),
513                                                  " %s", optarg);
514                                 break;
515                         case 'p':
516                                 SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
517                                 break;
518                         case 'S':
519
520                                 /*
521                                  * Start in 'S'ilent mode (disassociate from controlling
522                                  * tty). You may also think of this as 'S'ysV mode since
523                                  * it's most badly needed on SysV-derived systems like
524                                  * SVR4 and HP-UX.
525                                  */
526                                 SetConfigOption("silent_mode", "true", PGC_POSTMASTER, PGC_S_ARGV);
527                                 break;
528                         case 's':
529
530                                 /*
531                                  * In the event that some backend dumps core, send
532                                  * SIGSTOP, rather than SIGQUIT, to all its peers.      This
533                                  * lets the wily post_hacker collect core dumps from
534                                  * everyone.
535                                  */
536                                 SendStop = true;
537                                 break;
538                         case 'c':
539                         case '-':
540                                 {
541                                         char       *name,
542                                                            *value;
543
544                                         ParseLongOption(optarg, &name, &value);
545                                         if (!value)
546                                         {
547                                                 if (opt == '-')
548                                                         ereport(ERROR,
549                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
550                                                                          errmsg("--%s requires a value",
551                                                                                         optarg)));
552                                                 else
553                                                         ereport(ERROR,
554                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
555                                                                          errmsg("-c %s requires a value",
556                                                                                         optarg)));
557                                         }
558
559                                         SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
560                                         free(name);
561                                         if (value)
562                                                 free(value);
563                                         break;
564                                 }
565
566                         default:
567                                 write_stderr("Try \"%s --help\" for more information.\n",
568                                                          progname);
569                                 ExitPostmaster(1);
570                 }
571         }
572
573         /*
574          * Postmaster accepts no non-option switch arguments.
575          */
576         if (optind < argc)
577         {
578                 write_stderr("%s: invalid argument: \"%s\"\n",
579                                          progname, argv[optind]);
580                 write_stderr("Try \"%s --help\" for more information.\n",
581                                          progname);
582                 ExitPostmaster(1);
583         }
584
585         /*
586          * Locate the proper configuration files and data directory, and
587          * read postgresql.conf for the first time.
588          */
589         if (!SelectConfigFiles(userDoption, progname))
590                 ExitPostmaster(2);
591
592         /* Verify that DataDir looks reasonable */
593         checkDataDir();
594
595         /*
596          * Check for invalid combinations of GUC settings.
597          */
598         if (NBuffers < 2 * MaxBackends || NBuffers < 16)
599         {
600                 /*
601                  * Do not accept -B so small that backends are likely to starve
602                  * for lack of buffers.  The specific choices here are somewhat
603                  * arbitrary.
604                  */
605                 write_stderr("%s: the number of buffers (-B) must be at least twice the number of allowed connections (-N) and at least 16\n", progname);
606                 ExitPostmaster(1);
607         }
608
609         if (ReservedBackends >= MaxBackends)
610         {
611                 write_stderr("%s: superuser_reserved_connections must be less than max_connections\n", progname);
612                 ExitPostmaster(1);
613         }
614
615         /*
616          * Other one-time internal sanity checks can go here.
617          */
618         if (!CheckDateTokenTables())
619         {
620                 write_stderr("%s: invalid datetoken tables, please fix\n", progname);
621                 ExitPostmaster(1);
622         }
623
624         /*
625          * Now that we are done processing the postmaster arguments, reset
626          * getopt(3) library so that it will work correctly in subprocesses.
627          */
628         optind = 1;
629 #ifdef HAVE_INT_OPTRESET
630         optreset = 1;                           /* some systems need this too */
631 #endif
632
633         /* For debugging: display postmaster environment */
634         {
635                 extern char **environ;
636                 char      **p;
637
638                 ereport(DEBUG3,
639                         (errmsg_internal("%s: PostmasterMain: initial environ dump:",
640                                                          progname)));
641                 ereport(DEBUG3,
642                  (errmsg_internal("-----------------------------------------")));
643                 for (p = environ; *p; ++p)
644                         ereport(DEBUG3,
645                                         (errmsg_internal("\t%s", *p)));
646                 ereport(DEBUG3,
647                  (errmsg_internal("-----------------------------------------")));
648         }
649
650 #ifdef EXEC_BACKEND
651         if (find_other_exec(argv[0], "postgres", PG_VERSIONSTR,
652                                                 postgres_exec_path) < 0)
653                 ereport(FATAL,
654                          (errmsg("%s: could not locate matching postgres executable",
655                                          progname)));
656 #endif
657
658         /*
659          * Initialize SSL library, if specified.
660          */
661 #ifdef USE_SSL
662         if (EnableSSL)
663                 secure_initialize();
664 #endif
665
666         /*
667          * process any libraries that should be preloaded and optionally
668          * pre-initialized
669          */
670         if (preload_libraries_string)
671                 process_preload_libraries(preload_libraries_string);
672
673         /*
674          * Fork away from controlling terminal, if -S specified.
675          *
676          * Must do this before we grab any interlock files, else the interlocks
677          * will show the wrong PID.
678          */
679         if (SilentMode)
680                 pmdaemonize();
681
682         /*
683          * Create lockfile for data directory.
684          *
685          * We want to do this before we try to grab the input sockets, because
686          * the data directory interlock is more reliable than the socket-file
687          * interlock (thanks to whoever decided to put socket files in /tmp
688          * :-(). For the same reason, it's best to grab the TCP socket(s)
689          * before the Unix socket.
690          */
691         CreateDataDirLockFile(DataDir, true);
692
693         /*
694          * Remove old temporary files.  At this point there can be no other
695          * Postgres processes running in this directory, so this should be
696          * safe.
697          */
698         RemovePgTempFiles();
699
700         /*
701          * Establish input sockets.
702          */
703         for (i = 0; i < MAXLISTEN; i++)
704                 ListenSocket[i] = -1;
705
706         if (ListenAddresses)
707         {
708                 char       *rawstring;
709                 List       *elemlist;
710                 ListCell   *l;
711
712                 /* Need a modifiable copy of ListenAddresses */
713                 rawstring = pstrdup(ListenAddresses);
714
715                 /* Parse string into list of identifiers */
716                 if (!SplitIdentifierString(rawstring, ',', &elemlist))
717                 {
718                         /* syntax error in list */
719                         ereport(FATAL,
720                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
721                                 errmsg("invalid list syntax for \"listen_addresses\"")));
722                 }
723
724                 foreach(l, elemlist)
725                 {
726                         char       *curhost = (char *) lfirst(l);
727
728                         if (strcmp(curhost, "*") == 0)
729                                 status = StreamServerPort(AF_UNSPEC, NULL,
730                                                                                   (unsigned short) PostPortNumber,
731                                                                                   UnixSocketDir,
732                                                                                   ListenSocket, MAXLISTEN);
733                         else
734                                 status = StreamServerPort(AF_UNSPEC, curhost,
735                                                                                   (unsigned short) PostPortNumber,
736                                                                                   UnixSocketDir,
737                                                                                   ListenSocket, MAXLISTEN);
738                         if (status != STATUS_OK)
739                                 ereport(WARNING,
740                                          (errmsg("could not create listen socket for \"%s\"",
741                                                          curhost)));
742                 }
743
744                 list_free(elemlist);
745                 pfree(rawstring);
746         }
747
748 #ifdef USE_RENDEZVOUS
749         /* Register for Rendezvous only if we opened TCP socket(s) */
750         if (ListenSocket[0] != -1 && rendezvous_name != NULL)
751         {
752                 DNSServiceRegistrationCreate(rendezvous_name,
753                                                                          "_postgresql._tcp.",
754                                                                          "",
755                                                                          htonl(PostPortNumber),
756                                                                          "",
757                                                                  (DNSServiceRegistrationReply) reg_reply,
758                                                                          NULL);
759         }
760 #endif
761
762 #ifdef HAVE_UNIX_SOCKETS
763         status = StreamServerPort(AF_UNIX, NULL,
764                                                           (unsigned short) PostPortNumber,
765                                                           UnixSocketDir,
766                                                           ListenSocket, MAXLISTEN);
767         if (status != STATUS_OK)
768                 ereport(WARNING,
769                                 (errmsg("could not create Unix-domain socket")));
770 #endif
771
772         /*
773          * check that we have some socket to listen on
774          */
775         if (ListenSocket[0] == -1)
776                 ereport(FATAL,
777                                 (errmsg("no socket created for listening")));
778
779         XLOGPathInit();
780
781         /*
782          * Set up shared memory and semaphores.
783          */
784         reset_shared(PostPortNumber);
785
786         /*
787          * Estimate number of openable files.  This must happen after setting
788          * up semaphores, because on some platforms semaphores count as open
789          * files.
790          */
791         set_max_safe_fds();
792
793         /*
794          * Initialize the list of active backends.
795          */
796         BackendList = DLNewList();
797
798 #ifdef WIN32
799
800         /*
801          * Initialize the child pid/HANDLE arrays for signal handling.
802          */
803         win32_childPIDArray = (pid_t *)
804                 malloc(NUM_BACKENDARRAY_ELEMS * sizeof(pid_t));
805         win32_childHNDArray = (HANDLE *)
806                 malloc(NUM_BACKENDARRAY_ELEMS * sizeof(HANDLE));
807         if (!win32_childPIDArray || !win32_childHNDArray)
808                 ereport(FATAL,
809                                 (errcode(ERRCODE_OUT_OF_MEMORY),
810                                  errmsg("out of memory")));
811
812         /*
813          * Set up a handle that child processes can use to check whether the
814          * postmaster is still running.
815          */
816         if (DuplicateHandle(GetCurrentProcess(),
817                                                 GetCurrentProcess(),
818                                                 GetCurrentProcess(),
819                                                 &PostmasterHandle,
820                                                 0,
821                                                 TRUE,
822                                                 DUPLICATE_SAME_ACCESS) == 0)
823                 ereport(FATAL,
824                         (errmsg_internal("could not duplicate postmaster handle: error code %d",
825                                                          (int) GetLastError())));
826 #endif
827
828         /*
829          * Record postmaster options.  We delay this till now to avoid
830          * recording bogus options (eg, NBuffers too high for available
831          * memory).
832          */
833         if (!CreateOptsFile(argc, argv, my_exec_path))
834                 ExitPostmaster(1);
835
836 #ifdef EXEC_BACKEND
837         write_nondefault_variables(PGC_POSTMASTER);
838 #endif
839
840         /*
841          * Write the external PID file if requested
842          */
843         if (external_pid_file)
844         {
845                 FILE       *fpidfile = fopen(external_pid_file, "w");
846
847                 if (fpidfile)
848                 {
849                         fprintf(fpidfile, "%d\n", MyProcPid);
850                         fclose(fpidfile);
851                         /* Should we remove the pid file on postmaster exit? */
852                 }
853                 else
854                         write_stderr("%s: could not write external PID file \"%s\": %s\n",
855                                                  progname, external_pid_file, strerror(errno));
856         }
857
858         /*
859          * Set up signal handlers for the postmaster process.
860          *
861          * CAUTION: when changing this list, check for side-effects on the signal
862          * handling setup of child processes.  See tcop/postgres.c,
863          * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/pgarch.c,
864          * postmaster/pgstat.c, and postmaster/syslogger.c.
865          */
866         pqinitmask();
867         PG_SETMASK(&BlockSig);
868
869         pqsignal(SIGHUP, SIGHUP_handler);       /* reread config file and have
870                                                                                  * children do same */
871         pqsignal(SIGINT, pmdie);        /* send SIGTERM and shut down */
872         pqsignal(SIGQUIT, pmdie);       /* send SIGQUIT and die */
873         pqsignal(SIGTERM, pmdie);       /* wait for children and shut down */
874         pqsignal(SIGALRM, SIG_IGN); /* ignored */
875         pqsignal(SIGPIPE, SIG_IGN); /* ignored */
876         pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
877         pqsignal(SIGUSR2, dummy_handler);       /* unused, reserve for children */
878         pqsignal(SIGCHLD, reaper);      /* handle child termination */
879         pqsignal(SIGTTIN, SIG_IGN); /* ignored */
880         pqsignal(SIGTTOU, SIG_IGN); /* ignored */
881         /* ignore SIGXFSZ, so that ulimit violations work like disk full */
882 #ifdef SIGXFSZ
883         pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
884 #endif
885
886         /*
887          * If enabled, start up syslogger collection subprocess
888          */
889         SysLoggerPID = SysLogger_Start();
890
891         /*
892          * Reset whereToSendOutput from Debug (its starting state) to None.
893          * This stops ereport from sending log messages to stderr unless
894          * Log_destination permits.  We don't do this until the postmaster is
895          * fully launched, since startup failures may as well be reported to
896          * stderr.
897          */
898         whereToSendOutput = None;
899
900         /*
901          * Initialize the statistics collector stuff
902          */
903         pgstat_init();
904
905         /*
906          * Load configuration files for client authentication.
907          */
908         load_hba();
909         load_ident();
910
911         /*
912          * We're ready to rock and roll...
913          */
914         StartupPID = StartupDataBase();
915
916         status = ServerLoop();
917
918         /*
919          * ServerLoop probably shouldn't ever return, but if it does, close
920          * down.
921          */
922         ExitPostmaster(status != STATUS_OK);
923
924         return 0;                                       /* not reached */
925 }
926
927
928 /*
929  * Validate the proposed data directory
930  */
931 static void
932 checkDataDir(void)
933 {
934         char            path[MAXPGPATH];
935         FILE       *fp;
936         struct stat stat_buf;
937
938         Assert(DataDir);
939
940         if (stat(DataDir, &stat_buf) != 0)
941         {
942                 if (errno == ENOENT)
943                         ereport(FATAL,
944                                         (errcode_for_file_access(),
945                                          errmsg("data directory \"%s\" does not exist",
946                                                         DataDir)));
947                 else
948                         ereport(FATAL,
949                                         (errcode_for_file_access(),
950                          errmsg("could not read permissions of directory \"%s\": %m",
951                                         DataDir)));
952         }
953
954         /*
955          * Check if the directory has group or world access.  If so, reject.
956          *
957          * XXX temporarily suppress check when on Windows, because there may not
958          * be proper support for Unix-y file permissions.  Need to think of a
959          * reasonable check to apply on Windows.
960          */
961 #if !defined(WIN32) && !defined(__CYGWIN__)
962         if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
963                 ereport(FATAL,
964                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
965                                  errmsg("data directory \"%s\" has group or world access",
966                                                 DataDir),
967                                  errdetail("Permissions should be u=rwx (0700).")));
968 #endif
969
970         /* Look for PG_VERSION before looking for pg_control */
971         ValidatePgVersion(DataDir);
972
973         snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
974
975         fp = AllocateFile(path, PG_BINARY_R);
976         if (fp == NULL)
977         {
978                 write_stderr("%s: could not find the database system\n"
979                                          "Expected to find it in the directory \"%s\",\n"
980                                          "but could not open file \"%s\": %s\n",
981                                          progname, DataDir, path, strerror(errno));
982                 ExitPostmaster(2);
983         }
984         FreeFile(fp);
985 }
986
987
988 #ifdef USE_RENDEZVOUS
989
990 /*
991  * empty callback function for DNSServiceRegistrationCreate()
992  */
993 static void
994 reg_reply(DNSServiceRegistrationReplyErrorType errorCode, void *context)
995 {
996
997 }
998 #endif   /* USE_RENDEZVOUS */
999
1000
1001 /*
1002  * Fork away from the controlling terminal (-S option)
1003  */
1004 static void
1005 pmdaemonize(void)
1006 {
1007 #ifndef WIN32
1008         int                     i;
1009         pid_t           pid;
1010
1011 #ifdef LINUX_PROFILE
1012         struct itimerval prof_itimer;
1013 #endif
1014
1015 #ifdef LINUX_PROFILE
1016         /* see comments in BackendStartup */
1017         getitimer(ITIMER_PROF, &prof_itimer);
1018 #endif
1019
1020         pid = fork();
1021         if (pid == (pid_t) -1)
1022         {
1023                 write_stderr("%s: could not fork background process: %s\n",
1024                                          progname, strerror(errno));
1025                 ExitPostmaster(1);
1026         }
1027         else if (pid)
1028         {                                                       /* parent */
1029                 /* Parent should just exit, without doing any atexit cleanup */
1030                 _exit(0);
1031         }
1032
1033 #ifdef LINUX_PROFILE
1034         setitimer(ITIMER_PROF, &prof_itimer, NULL);
1035 #endif
1036
1037         MyProcPid = PostmasterPid = getpid();           /* reset PID vars to child */
1038
1039 /* GH: If there's no setsid(), we hopefully don't need silent mode.
1040  * Until there's a better solution.
1041  */
1042 #ifdef HAVE_SETSID
1043         if (setsid() < 0)
1044         {
1045                 write_stderr("%s: could not dissociate from controlling TTY: %s\n",
1046                                          progname, strerror(errno));
1047                 ExitPostmaster(1);
1048         }
1049 #endif
1050         i = open(NULL_DEV, O_RDWR);
1051         dup2(i, 0);
1052         dup2(i, 1);
1053         dup2(i, 2);
1054         close(i);
1055 #else                                                   /* WIN32 */
1056         /* not supported */
1057         elog(FATAL, "SilentMode not supported under WIN32");
1058 #endif   /* WIN32 */
1059 }
1060
1061
1062 /*
1063  * Print out help message
1064  */
1065 static void
1066 usage(const char *progname)
1067 {
1068         printf(gettext("%s is the PostgreSQL server.\n\n"), progname);
1069         printf(gettext("Usage:\n  %s [OPTION]...\n\n"), progname);
1070         printf(gettext("Options:\n"));
1071 #ifdef USE_ASSERT_CHECKING
1072         printf(gettext("  -A 1|0          enable/disable run-time assert checking\n"));
1073 #endif
1074         printf(gettext("  -B NBUFFERS     number of shared buffers\n"));
1075         printf(gettext("  -c NAME=VALUE   set run-time parameter\n"));
1076         printf(gettext("  -d 1-5          debugging level\n"));
1077         printf(gettext("  -D DATADIR      database directory\n"));
1078         printf(gettext("  -F              turn fsync off\n"));
1079         printf(gettext("  -h HOSTNAME     host name or IP address to listen on\n"));
1080         printf(gettext("  -i              enable TCP/IP connections\n"));
1081         printf(gettext("  -k DIRECTORY    Unix-domain socket location\n"));
1082 #ifdef USE_SSL
1083         printf(gettext("  -l              enable SSL connections\n"));
1084 #endif
1085         printf(gettext("  -N MAX-CONNECT  maximum number of allowed connections\n"));
1086         printf(gettext("  -o OPTIONS      pass \"OPTIONS\" to each server process\n"));
1087         printf(gettext("  -p PORT         port number to listen on\n"));
1088         printf(gettext("  -S              silent mode (start in background without logging output)\n"));
1089         printf(gettext("  --help          show this help, then exit\n"));
1090         printf(gettext("  --version       output version information, then exit\n"));
1091
1092         printf(gettext("\nDeveloper options:\n"));
1093         printf(gettext("  -n              do not reinitialize shared memory after abnormal exit\n"));
1094         printf(gettext("  -s              send SIGSTOP to all backend servers if one dies\n"));
1095
1096         printf(gettext("\nPlease read the documentation for the complete list of run-time\n"
1097                                    "configuration settings and how to set them on the command line or in\n"
1098                                    "the configuration file.\n\n"
1099                                    "Report bugs to <pgsql-bugs@postgresql.org>.\n"));
1100 }
1101
1102
1103 /*
1104  * Main idle loop of postmaster
1105  */
1106 static int
1107 ServerLoop(void)
1108 {
1109         fd_set          readmask;
1110         int                     nSockets;
1111         time_t          now,
1112                                 last_touch_time;
1113         struct timeval earlier,
1114                                 later;
1115         struct timezone tz;
1116
1117         gettimeofday(&earlier, &tz);
1118         last_touch_time = time(NULL);
1119
1120         nSockets = initMasks(&readmask);
1121
1122         for (;;)
1123         {
1124                 Port       *port;
1125                 fd_set          rmask;
1126                 struct timeval timeout;
1127                 int                     selres;
1128                 int                     i;
1129
1130                 /*
1131                  * Wait for something to happen.
1132                  *
1133                  * We wait at most one minute, to ensure that the other background
1134                  * tasks handled below get done even when no requests are
1135                  * arriving.
1136                  */
1137                 memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1138
1139                 timeout.tv_sec = 60;
1140                 timeout.tv_usec = 0;
1141
1142                 PG_SETMASK(&UnBlockSig);
1143
1144                 selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1145
1146                 /*
1147                  * Block all signals until we wait again.  (This makes it safe for
1148                  * our signal handlers to do nontrivial work.)
1149                  */
1150                 PG_SETMASK(&BlockSig);
1151
1152                 if (selres < 0)
1153                 {
1154                         if (errno != EINTR && errno != EWOULDBLOCK)
1155                         {
1156                                 ereport(LOG,
1157                                                 (errcode_for_socket_access(),
1158                                                  errmsg("select() failed in postmaster: %m")));
1159                                 return STATUS_ERROR;
1160                         }
1161                 }
1162
1163                 /*
1164                  * New connection pending on any of our sockets? If so, fork a
1165                  * child process to deal with it.
1166                  */
1167                 if (selres > 0)
1168                 {
1169                         /*
1170                          * Select a random seed at the time of first receiving a
1171                          * request.
1172                          */
1173                         while (random_seed == 0)
1174                         {
1175                                 gettimeofday(&later, &tz);
1176
1177                                 /*
1178                                  * We are not sure how much precision is in tv_usec, so we
1179                                  * swap the high and low 16 bits of 'later' and XOR them with
1180                                  * 'earlier'. On the off chance that the result is 0, we
1181                                  * loop until it isn't.
1182                                  */
1183                                 random_seed = earlier.tv_usec ^
1184                                         ((later.tv_usec << 16) |
1185                                          ((later.tv_usec >> 16) & 0xffff));
1186                         }
1187
1188                         for (i = 0; i < MAXLISTEN; i++)
1189                         {
1190                                 if (ListenSocket[i] == -1)
1191                                         break;
1192                                 if (FD_ISSET(ListenSocket[i], &rmask))
1193                                 {
1194                                         port = ConnCreate(ListenSocket[i]);
1195                                         if (port)
1196                                         {
1197                                                 BackendStartup(port);
1198
1199                                                 /*
1200                                                  * We no longer need the open socket or port
1201                                                  * structure in this process
1202                                                  */
1203                                                 StreamClose(port->sock);
1204                                                 ConnFree(port);
1205                                         }
1206                                 }
1207                         }
1208                 }
1209
1210                 /* If we have lost the system logger, try to start a new one */
1211                 if (SysLoggerPID == 0 && Redirect_stderr)
1212                         SysLoggerPID = SysLogger_Start();
1213
1214                 /*
1215                  * If no background writer process is running, and we are not in a
1216                  * state that prevents it, start one.  It doesn't matter if this
1217                  * fails, we'll just try again later.
1218                  */
1219                 if (BgWriterPID == 0 && StartupPID == 0 && !FatalError)
1220                 {
1221                         BgWriterPID = StartBackgroundWriter();
1222                         /* If shutdown is pending, set it going */
1223                         if (Shutdown > NoShutdown && BgWriterPID != 0)
1224                                 kill(BgWriterPID, SIGUSR2);
1225                 }
1226
1227                 /* If we have lost the archiver, try to start a new one */
1228                 if (XLogArchivingActive() && PgArchPID == 0 &&
1229                         StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
1230                         PgArchPID = pgarch_start();
1231
1232                 /* If we have lost the stats collector, try to start a new one */
1233                 if (PgStatPID == 0 &&
1234                         StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
1235                         PgStatPID = pgstat_start();
1236
1237                 /*
1238                  * Touch the socket and lock file at least every ten minutes, to
1239                  * ensure that they are not removed by overzealous /tmp-cleaning
1240                  * tasks.
1241                  */
1242                 now = time(NULL);
1243                 if (now - last_touch_time >= 10 * 60)
1244                 {
1245                         TouchSocketFile();
1246                         TouchSocketLockFile();
1247                         last_touch_time = now;
1248                 }
1249         }
1250 }
1251
1252
1253 /*
1254  * Initialise the masks for select() for the ports we are listening on.
1255  * Return the number of sockets to listen on.
1256  */
1257 static int
1258 initMasks(fd_set *rmask)
1259 {
1260         int                     nsocks = -1;
1261         int                     i;
1262
1263         FD_ZERO(rmask);
1264
1265         for (i = 0; i < MAXLISTEN; i++)
1266         {
1267                 int                     fd = ListenSocket[i];
1268
1269                 if (fd == -1)
1270                         break;
1271                 FD_SET(fd, rmask);
1272                 if (fd > nsocks)
1273                         nsocks = fd;
1274         }
1275
1276         return nsocks + 1;
1277 }
1278
1279
1280 /*
1281  * Read the startup packet and do something according to it.
1282  *
1283  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1284  * not return at all.
1285  *
1286  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1287  * if that's what you want.  Return STATUS_ERROR if you don't want to
1288  * send anything to the client, which would typically be appropriate
1289  * if we detect a communications failure.)
1290  */
1291 static int
1292 ProcessStartupPacket(Port *port, bool SSLdone)
1293 {
1294         int32           len;
1295         void       *buf;
1296         ProtocolVersion proto;
1297         MemoryContext oldcontext;
1298
1299         if (pq_getbytes((char *) &len, 4) == EOF)
1300         {
1301                 /*
1302                  * EOF after SSLdone probably means the client didn't like our
1303                  * response to NEGOTIATE_SSL_CODE.      That's not an error condition,
1304                  * so don't clutter the log with a complaint.
1305                  */
1306                 if (!SSLdone)
1307                         ereport(COMMERROR,
1308                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1309                                          errmsg("incomplete startup packet")));
1310                 return STATUS_ERROR;
1311         }
1312
1313         len = ntohl(len);
1314         len -= 4;
1315
1316         if (len < (int32) sizeof(ProtocolVersion) ||
1317                 len > MAX_STARTUP_PACKET_LENGTH)
1318         {
1319                 ereport(COMMERROR,
1320                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1321                                  errmsg("invalid length of startup packet")));
1322                 return STATUS_ERROR;
1323         }
1324
1325         /*
1326          * Allocate at least the size of an old-style startup packet, plus one
1327          * extra byte, and make sure all are zeroes.  This ensures we will
1328          * have null termination of all strings, in both fixed- and
1329          * variable-length packet layouts.
1330          */
1331         if (len <= (int32) sizeof(StartupPacket))
1332                 buf = palloc0(sizeof(StartupPacket) + 1);
1333         else
1334                 buf = palloc0(len + 1);
1335
1336         if (pq_getbytes(buf, len) == EOF)
1337         {
1338                 ereport(COMMERROR,
1339                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1340                                  errmsg("incomplete startup packet")));
1341                 return STATUS_ERROR;
1342         }
1343
1344         /*
1345          * The first field is either a protocol version number or a special
1346          * request code.
1347          */
1348         port->proto = proto = ntohl(*((ProtocolVersion *) buf));
1349
1350         if (proto == CANCEL_REQUEST_CODE)
1351         {
1352                 processCancelRequest(port, buf);
1353                 return 127;                             /* XXX */
1354         }
1355
1356         if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1357         {
1358                 char            SSLok;
1359
1360 #ifdef USE_SSL
1361                 /* No SSL when disabled or on Unix sockets */
1362                 if (!EnableSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1363                         SSLok = 'N';
1364                 else
1365                         SSLok = 'S';            /* Support for SSL */
1366 #else
1367                 SSLok = 'N';                    /* No support for SSL */
1368 #endif
1369                 if (send(port->sock, &SSLok, 1, 0) != 1)
1370                 {
1371                         ereport(COMMERROR,
1372                                         (errcode_for_socket_access(),
1373                                  errmsg("failed to send SSL negotiation response: %m")));
1374                         return STATUS_ERROR;    /* close the connection */
1375                 }
1376
1377 #ifdef USE_SSL
1378                 if (SSLok == 'S' && secure_open_server(port) == -1)
1379                         return STATUS_ERROR;
1380 #endif
1381                 /* regular startup packet, cancel, etc packet should follow... */
1382                 /* but not another SSL negotiation request */
1383                 return ProcessStartupPacket(port, true);
1384         }
1385
1386         /* Could add additional special packet types here */
1387
1388         /*
1389          * Set FrontendProtocol now so that ereport() knows what format to
1390          * send if we fail during startup.
1391          */
1392         FrontendProtocol = proto;
1393
1394         /* Check we can handle the protocol the frontend is using. */
1395
1396         if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
1397           PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) ||
1398         (PG_PROTOCOL_MAJOR(proto) == PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) &&
1399          PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))
1400                 ereport(FATAL,
1401                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1402                                  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
1403                                           PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
1404                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
1405                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
1406                                                 PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
1407
1408         /*
1409          * Now fetch parameters out of startup packet and save them into the
1410          * Port structure.      All data structures attached to the Port struct
1411          * must be allocated in TopMemoryContext so that they won't disappear
1412          * when we pass them to PostgresMain (see BackendRun).  We need not
1413          * worry about leaking this storage on failure, since we aren't in the
1414          * postmaster process anymore.
1415          */
1416         oldcontext = MemoryContextSwitchTo(TopMemoryContext);
1417
1418         if (PG_PROTOCOL_MAJOR(proto) >= 3)
1419         {
1420                 int32           offset = sizeof(ProtocolVersion);
1421
1422                 /*
1423                  * Scan packet body for name/option pairs.      We can assume any
1424                  * string beginning within the packet body is null-terminated,
1425                  * thanks to zeroing extra byte above.
1426                  */
1427                 port->guc_options = NIL;
1428
1429                 while (offset < len)
1430                 {
1431                         char       *nameptr = ((char *) buf) + offset;
1432                         int32           valoffset;
1433                         char       *valptr;
1434
1435                         if (*nameptr == '\0')
1436                                 break;                  /* found packet terminator */
1437                         valoffset = offset + strlen(nameptr) + 1;
1438                         if (valoffset >= len)
1439                                 break;                  /* missing value, will complain below */
1440                         valptr = ((char *) buf) + valoffset;
1441
1442                         if (strcmp(nameptr, "database") == 0)
1443                                 port->database_name = pstrdup(valptr);
1444                         else if (strcmp(nameptr, "user") == 0)
1445                                 port->user_name = pstrdup(valptr);
1446                         else if (strcmp(nameptr, "options") == 0)
1447                                 port->cmdline_options = pstrdup(valptr);
1448                         else
1449                         {
1450                                 /* Assume it's a generic GUC option */
1451                                 port->guc_options = lappend(port->guc_options,
1452                                                                                         pstrdup(nameptr));
1453                                 port->guc_options = lappend(port->guc_options,
1454                                                                                         pstrdup(valptr));
1455                         }
1456                         offset = valoffset + strlen(valptr) + 1;
1457                 }
1458
1459                 /*
1460                  * If we didn't find a packet terminator exactly at the end of the
1461                  * given packet length, complain.
1462                  */
1463                 if (offset != len - 1)
1464                         ereport(FATAL,
1465                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1466                                          errmsg("invalid startup packet layout: expected terminator as last byte")));
1467         }
1468         else
1469         {
1470                 /*
1471                  * Get the parameters from the old-style, fixed-width-fields
1472                  * startup packet as C strings.  The packet destination was
1473                  * cleared first so a short packet has zeros silently added.  We
1474                  * have to be prepared to truncate the pstrdup result for oversize
1475                  * fields, though.
1476                  */
1477                 StartupPacket *packet = (StartupPacket *) buf;
1478
1479                 port->database_name = pstrdup(packet->database);
1480                 if (strlen(port->database_name) > sizeof(packet->database))
1481                         port->database_name[sizeof(packet->database)] = '\0';
1482                 port->user_name = pstrdup(packet->user);
1483                 if (strlen(port->user_name) > sizeof(packet->user))
1484                         port->user_name[sizeof(packet->user)] = '\0';
1485                 port->cmdline_options = pstrdup(packet->options);
1486                 if (strlen(port->cmdline_options) > sizeof(packet->options))
1487                         port->cmdline_options[sizeof(packet->options)] = '\0';
1488                 port->guc_options = NIL;
1489         }
1490
1491         /* Check a user name was given. */
1492         if (port->user_name == NULL || port->user_name[0] == '\0')
1493                 ereport(FATAL,
1494                                 (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
1495                  errmsg("no PostgreSQL user name specified in startup packet")));
1496
1497         /* The database defaults to the user name. */
1498         if (port->database_name == NULL || port->database_name[0] == '\0')
1499                 port->database_name = pstrdup(port->user_name);
1500
1501         if (Db_user_namespace)
1502         {
1503                 /*
1504                  * If user@, it is a global user, remove '@'. We only want to do
1505                  * this if there is an '@' at the end and no earlier in the user
1506                  * string or they may fake as a local user of another database
1507                  * attaching to this database.
1508                  */
1509                 if (strchr(port->user_name, '@') ==
1510                         port->user_name + strlen(port->user_name) - 1)
1511                         *strchr(port->user_name, '@') = '\0';
1512                 else
1513                 {
1514                         /* Append '@' and dbname */
1515                         char       *db_user;
1516
1517                         db_user = palloc(strlen(port->user_name) +
1518                                                          strlen(port->database_name) + 2);
1519                         sprintf(db_user, "%s@%s", port->user_name, port->database_name);
1520                         port->user_name = db_user;
1521                 }
1522         }
1523
1524         /*
1525          * Truncate given database and user names to length of a Postgres
1526          * name.  This avoids lookup failures when overlength names are given.
1527          */
1528         if (strlen(port->database_name) >= NAMEDATALEN)
1529                 port->database_name[NAMEDATALEN - 1] = '\0';
1530         if (strlen(port->user_name) >= NAMEDATALEN)
1531                 port->user_name[NAMEDATALEN - 1] = '\0';
1532
1533         /*
1534          * Done putting stuff in TopMemoryContext.
1535          */
1536         MemoryContextSwitchTo(oldcontext);
1537
1538         /*
1539          * If we're going to reject the connection due to database state, say
1540          * so now instead of wasting cycles on an authentication exchange.
1541          * (This also allows a pg_ping utility to be written.)
1542          */
1543         switch (port->canAcceptConnections)
1544         {
1545                 case CAC_STARTUP:
1546                         ereport(FATAL,
1547                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1548                                          errmsg("the database system is starting up")));
1549                         break;
1550                 case CAC_SHUTDOWN:
1551                         ereport(FATAL,
1552                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1553                                          errmsg("the database system is shutting down")));
1554                         break;
1555                 case CAC_RECOVERY:
1556                         ereport(FATAL,
1557                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1558                                          errmsg("the database system is in recovery mode")));
1559                         break;
1560                 case CAC_TOOMANY:
1561                         ereport(FATAL,
1562                                         (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
1563                                          errmsg("sorry, too many clients already")));
1564                         break;
1565                 case CAC_OK:
1566                 default:
1567                         break;
1568         }
1569
1570         return STATUS_OK;
1571 }
1572
1573
1574 /*
1575  * The client has sent a cancel request packet, not a normal
1576  * start-a-new-connection packet.  Perform the necessary processing.
1577  * Nothing is sent back to the client.
1578  */
1579 static void
1580 processCancelRequest(Port *port, void *pkt)
1581 {
1582         CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
1583         int                     backendPID;
1584         long            cancelAuthCode;
1585         Backend    *bp;
1586 #ifndef EXEC_BACKEND
1587         Dlelem     *curr;
1588 #else
1589         int                     i;
1590 #endif
1591
1592         backendPID = (int) ntohl(canc->backendPID);
1593         cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
1594
1595         /*
1596          * See if we have a matching backend.  In the EXEC_BACKEND case, we
1597          * can no longer access the postmaster's own backend list, and must
1598          * rely on the duplicate array in shared memory.
1599          */
1600 #ifndef EXEC_BACKEND
1601         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
1602         {
1603                 bp = (Backend *) DLE_VAL(curr);
1604 #else
1605         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
1606         {
1607                 bp = (Backend *) &ShmemBackendArray[i];
1608 #endif
1609                 if (bp->pid == backendPID)
1610                 {
1611                         if (bp->cancel_key == cancelAuthCode)
1612                         {
1613                                 /* Found a match; signal that backend to cancel current op */
1614                                 ereport(DEBUG2,
1615                                                 (errmsg_internal("processing cancel request: sending SIGINT to process %d",
1616                                                                                  backendPID)));
1617                                 kill(bp->pid, SIGINT);
1618                         }
1619                         else
1620                                 /* Right PID, wrong key: no way, Jose */
1621                                 ereport(DEBUG2,
1622                                                 (errmsg_internal("bad key in cancel request for process %d",
1623                                                                                  backendPID)));
1624                         return;
1625                 }
1626         }
1627
1628         /* No matching backend */
1629         ereport(DEBUG2,
1630                         (errmsg_internal("bad pid in cancel request for process %d",
1631                                                          backendPID)));
1632 }
1633
1634 /*
1635  * canAcceptConnections --- check to see if database state allows connections.
1636  */
1637 static enum CAC_state
1638 canAcceptConnections(void)
1639 {
1640         /* Can't start backends when in startup/shutdown/recovery state. */
1641         if (Shutdown > NoShutdown)
1642                 return CAC_SHUTDOWN;
1643         if (StartupPID)
1644                 return CAC_STARTUP;
1645         if (FatalError)
1646                 return CAC_RECOVERY;
1647
1648         /*
1649          * Don't start too many children.
1650          *
1651          * We allow more connections than we can have backends here because some
1652          * might still be authenticating; they might fail auth, or some
1653          * existing backend might exit before the auth cycle is completed. The
1654          * exact MaxBackends limit is enforced when a new backend tries to
1655          * join the shared-inval backend array.
1656          */
1657         if (CountChildren() >= 2 * MaxBackends)
1658                 return CAC_TOOMANY;
1659
1660         return CAC_OK;
1661 }
1662
1663
1664 /*
1665  * ConnCreate -- create a local connection data structure
1666  */
1667 static Port *
1668 ConnCreate(int serverFd)
1669 {
1670         Port       *port;
1671
1672         if (!(port = (Port *) calloc(1, sizeof(Port))))
1673         {
1674                 ereport(LOG,
1675                                 (errcode(ERRCODE_OUT_OF_MEMORY),
1676                                  errmsg("out of memory")));
1677                 ExitPostmaster(1);
1678         }
1679
1680         if (StreamConnection(serverFd, port) != STATUS_OK)
1681         {
1682                 StreamClose(port->sock);
1683                 ConnFree(port);
1684                 port = NULL;
1685         }
1686         else
1687         {
1688                 /*
1689                  * Precompute password salt values to use for this connection.
1690                  * It's slightly annoying to do this long in advance of knowing
1691                  * whether we'll need 'em or not, but we must do the random()
1692                  * calls before we fork, not after.  Else the postmaster's random
1693                  * sequence won't get advanced, and all backends would end up
1694                  * using the same salt...
1695                  */
1696                 RandomSalt(port->cryptSalt, port->md5Salt);
1697         }
1698
1699         return port;
1700 }
1701
1702
1703 /*
1704  * ConnFree -- free a local connection data structure
1705  */
1706 static void
1707 ConnFree(Port *conn)
1708 {
1709 #ifdef USE_SSL
1710         secure_close(conn);
1711 #endif
1712         free(conn);
1713 }
1714
1715
1716 /*
1717  * ClosePostmasterPorts -- close all the postmaster's open sockets
1718  *
1719  * This is called during child process startup to release file descriptors
1720  * that are not needed by that child process.  The postmaster still has
1721  * them open, of course.
1722  *
1723  * Note: we pass am_syslogger as a boolean because we don't want to set
1724  * the global variable yet when this is called.
1725  */
1726 void
1727 ClosePostmasterPorts(bool am_syslogger)
1728 {
1729         int                     i;
1730
1731         /* Close the listen sockets */
1732         for (i = 0; i < MAXLISTEN; i++)
1733         {
1734                 if (ListenSocket[i] != -1)
1735                 {
1736                         StreamClose(ListenSocket[i]);
1737                         ListenSocket[i] = -1;
1738                 }
1739         }
1740
1741         /* If using syslogger, close the read side of the pipe */
1742         if (!am_syslogger)
1743         {
1744 #ifndef WIN32
1745                 if (syslogPipe[0] >= 0)
1746                         close(syslogPipe[0]);
1747                 syslogPipe[0] = -1;
1748 #else
1749                 if (syslogPipe[0])
1750                         CloseHandle(syslogPipe[0]);
1751                 syslogPipe[0] = 0;
1752 #endif
1753         }
1754 }
1755
1756
1757 /*
1758  * reset_shared -- reset shared memory and semaphores
1759  */
1760 static void
1761 reset_shared(unsigned short port)
1762 {
1763         /*
1764          * Create or re-create shared memory and semaphores.
1765          *
1766          * Note: in each "cycle of life" we will normally assign the same IPC
1767          * keys (if using SysV shmem and/or semas), since the port number is
1768          * used to determine IPC keys.  This helps ensure that we will clean
1769          * up dead IPC objects if the postmaster crashes and is restarted.
1770          */
1771         CreateSharedMemoryAndSemaphores(false, MaxBackends, port);
1772 }
1773
1774
1775 /*
1776  * SIGHUP -- reread config files, and tell children to do same
1777  */
1778 static void
1779 SIGHUP_handler(SIGNAL_ARGS)
1780 {
1781         int                     save_errno = errno;
1782
1783         PG_SETMASK(&BlockSig);
1784
1785         if (Shutdown <= SmartShutdown)
1786         {
1787                 ereport(LOG,
1788                          (errmsg("received SIGHUP, reloading configuration files")));
1789                 ProcessConfigFile(PGC_SIGHUP);
1790                 SignalChildren(SIGHUP);
1791                 if (BgWriterPID != 0)
1792                         kill(BgWriterPID, SIGHUP);
1793                 if (PgArchPID != 0)
1794                         kill(PgArchPID, SIGHUP);
1795                 if (SysLoggerPID != 0)
1796                         kill(SysLoggerPID, SIGHUP);
1797                 /* PgStatPID does not currently need SIGHUP */
1798
1799                 /* Reload authentication config files too */
1800                 load_hba();
1801                 load_ident();
1802
1803 #ifdef EXEC_BACKEND
1804                 /* Update the starting-point file for future children */
1805                 write_nondefault_variables(PGC_SIGHUP);
1806 #endif
1807         }
1808
1809         PG_SETMASK(&UnBlockSig);
1810
1811         errno = save_errno;
1812 }
1813
1814
1815 /*
1816  * pmdie -- signal handler for processing various postmaster signals.
1817  */
1818 static void
1819 pmdie(SIGNAL_ARGS)
1820 {
1821         int                     save_errno = errno;
1822
1823         PG_SETMASK(&BlockSig);
1824
1825         ereport(DEBUG2,
1826                         (errmsg_internal("postmaster received signal %d",
1827                                                          postgres_signal_arg)));
1828
1829         switch (postgres_signal_arg)
1830         {
1831                 case SIGTERM:
1832
1833                         /*
1834                          * Smart Shutdown:
1835                          *
1836                          * Wait for children to end their work, then shut down.
1837                          */
1838                         if (Shutdown >= SmartShutdown)
1839                                 break;
1840                         Shutdown = SmartShutdown;
1841                         ereport(LOG,
1842                                         (errmsg("received smart shutdown request")));
1843
1844                         if (DLGetHead(BackendList))
1845                                 break;                  /* let reaper() handle this */
1846
1847                         /*
1848                          * No children left. Begin shutdown of data base system.
1849                          */
1850                         if (StartupPID != 0 || FatalError)
1851                                 break;                  /* let reaper() handle this */
1852                         /* Start the bgwriter if not running */
1853                         if (BgWriterPID == 0)
1854                                 BgWriterPID = StartBackgroundWriter();
1855                         /* And tell it to shut down */
1856                         if (BgWriterPID != 0)
1857                                 kill(BgWriterPID, SIGUSR2);
1858                         /* Tell pgarch to shut down too; nothing left for it to do */
1859                         if (PgArchPID != 0)
1860                                 kill(PgArchPID, SIGQUIT);
1861                         /* Tell pgstat to shut down too; nothing left for it to do */
1862                         if (PgStatPID != 0)
1863                                 kill(PgStatPID, SIGQUIT);
1864                         break;
1865
1866                 case SIGINT:
1867
1868                         /*
1869                          * Fast Shutdown:
1870                          *
1871                          * Abort all children with SIGTERM (rollback active transactions
1872                          * and exit) and shut down when they are gone.
1873                          */
1874                         if (Shutdown >= FastShutdown)
1875                                 break;
1876                         Shutdown = FastShutdown;
1877                         ereport(LOG,
1878                                         (errmsg("received fast shutdown request")));
1879
1880                         if (DLGetHead(BackendList))
1881                         {
1882                                 if (!FatalError)
1883                                 {
1884                                         ereport(LOG,
1885                                                         (errmsg("aborting any active transactions")));
1886                                         SignalChildren(SIGTERM);
1887                                         /* reaper() does the rest */
1888                                 }
1889                                 break;
1890                         }
1891
1892                         /*
1893                          * No children left. Begin shutdown of data base system.
1894                          *
1895                          * Note: if we previously got SIGTERM then we may send SIGUSR2 to
1896                          * the bgwriter a second time here.  This should be harmless.
1897                          */
1898                         if (StartupPID != 0 || FatalError)
1899                                 break;                  /* let reaper() handle this */
1900                         /* Start the bgwriter if not running */
1901                         if (BgWriterPID == 0)
1902                                 BgWriterPID = StartBackgroundWriter();
1903                         /* And tell it to shut down */
1904                         if (BgWriterPID != 0)
1905                                 kill(BgWriterPID, SIGUSR2);
1906                         /* Tell pgarch to shut down too; nothing left for it to do */
1907                         if (PgArchPID != 0)
1908                                 kill(PgArchPID, SIGQUIT);
1909                         /* Tell pgstat to shut down too; nothing left for it to do */
1910                         if (PgStatPID != 0)
1911                                 kill(PgStatPID, SIGQUIT);
1912                         break;
1913
1914                 case SIGQUIT:
1915
1916                         /*
1917                          * Immediate Shutdown:
1918                          *
1919                          * abort all children with SIGQUIT and exit without attempt to
1920                          * properly shut down data base system.
1921                          */
1922                         ereport(LOG,
1923                                         (errmsg("received immediate shutdown request")));
1924                         if (StartupPID != 0)
1925                                 kill(StartupPID, SIGQUIT);
1926                         if (BgWriterPID != 0)
1927                                 kill(BgWriterPID, SIGQUIT);
1928                         if (PgArchPID != 0)
1929                                 kill(PgArchPID, SIGQUIT);
1930                         if (PgStatPID != 0)
1931                                 kill(PgStatPID, SIGQUIT);
1932                         if (DLGetHead(BackendList))
1933                                 SignalChildren(SIGQUIT);
1934                         ExitPostmaster(0);
1935                         break;
1936         }
1937
1938         PG_SETMASK(&UnBlockSig);
1939
1940         errno = save_errno;
1941 }
1942
1943 /*
1944  * Reaper -- signal handler to cleanup after a backend (child) dies.
1945  */
1946 static void
1947 reaper(SIGNAL_ARGS)
1948 {
1949         int                     save_errno = errno;
1950
1951 #ifdef HAVE_WAITPID
1952         int                     status;                 /* backend exit status */
1953
1954 #else
1955 #ifndef WIN32
1956         union wait      status;                 /* backend exit status */
1957 #endif
1958 #endif
1959         int                     exitstatus;
1960         int                     pid;                    /* process id of dead backend */
1961
1962         PG_SETMASK(&BlockSig);
1963
1964         ereport(DEBUG4,
1965                         (errmsg_internal("reaping dead processes")));
1966 #ifdef HAVE_WAITPID
1967         while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
1968         {
1969                 exitstatus = status;
1970 #else
1971 #ifndef WIN32
1972         while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
1973         {
1974                 exitstatus = status.w_status;
1975 #else
1976         while ((pid = win32_waitpid(&exitstatus)) > 0)
1977         {
1978                 /*
1979                  * We need to do this here, and not in CleanupBackend, since this
1980                  * is to be called on all children when we are done with them.
1981                  * Could move to LogChildExit, but that seems like asking for
1982                  * future trouble...
1983                  */
1984                 win32_RemoveChild(pid);
1985 #endif   /* WIN32 */
1986 #endif   /* HAVE_WAITPID */
1987
1988                 /*
1989                  * Check if this child was a startup process.
1990                  */
1991                 if (StartupPID != 0 && pid == StartupPID)
1992                 {
1993                         StartupPID = 0;
1994                         if (exitstatus != 0)
1995                         {
1996                                 LogChildExit(LOG, gettext("startup process"),
1997                                                          pid, exitstatus);
1998                                 ereport(LOG,
1999                                                 (errmsg("aborting startup due to startup process failure")));
2000                                 ExitPostmaster(1);
2001                         }
2002
2003                         /*
2004                          * Startup succeeded - we are done with system startup or
2005                          * recovery.
2006                          */
2007                         FatalError = false;
2008
2009                         /*
2010                          * Load the flat user/group files into postmaster's caches.
2011                          * The startup process has recomputed these from the database
2012                          * contents, so we wait till it finishes before loading them.
2013                          */
2014                         load_user();
2015                         load_group();
2016
2017                         /*
2018                          * Crank up the background writer.      It doesn't matter if this
2019                          * fails, we'll just try again later.
2020                          */
2021                         Assert(BgWriterPID == 0);
2022                         BgWriterPID = StartBackgroundWriter();
2023
2024                         /*
2025                          * Go to shutdown mode if a shutdown request was pending.
2026                          * Otherwise, try to start the archiver and stats collector
2027                          * too.
2028                          */
2029                         if (Shutdown > NoShutdown && BgWriterPID != 0)
2030                                 kill(BgWriterPID, SIGUSR2);
2031                         else if (Shutdown == NoShutdown)
2032                         {
2033                                 if (XLogArchivingActive() && PgArchPID == 0)
2034                                         PgArchPID = pgarch_start();
2035                                 if (PgStatPID == 0)
2036                                         PgStatPID = pgstat_start();
2037                         }
2038
2039                         continue;
2040                 }
2041
2042                 /*
2043                  * Was it the bgwriter?
2044                  */
2045                 if (BgWriterPID != 0 && pid == BgWriterPID)
2046                 {
2047                         BgWriterPID = 0;
2048                         if (exitstatus == 0 && Shutdown > NoShutdown &&
2049                                 !FatalError && !DLGetHead(BackendList))
2050                         {
2051                                 /*
2052                                  * Normal postmaster exit is here: we've seen normal exit
2053                                  * of the bgwriter after it's been told to shut down. We
2054                                  * expect that it wrote a shutdown checkpoint.  (If for
2055                                  * some reason it didn't, recovery will occur on next
2056                                  * postmaster start.)
2057                                  *
2058                                  * Note: we do not wait around for exit of the archiver or
2059                                  * stats processes.  They've been sent SIGQUIT by this
2060                                  * point, and in any case contain logic to commit
2061                                  * hara-kiri if they notice the postmaster is gone.
2062                                  */
2063                                 ExitPostmaster(0);
2064                         }
2065
2066                         /*
2067                          * Any unexpected exit of the bgwriter is treated as a crash.
2068                          */
2069                         HandleChildCrash(pid, exitstatus,
2070                                                          gettext("background writer process"));
2071                         continue;
2072                 }
2073
2074                 /*
2075                  * Was it the archiver?  If so, just try to start a new one; no
2076                  * need to force reset of the rest of the system.  (If fail, we'll
2077                  * try again in future cycles of the main loop.)
2078                  */
2079                 if (PgArchPID != 0 && pid == PgArchPID)
2080                 {
2081                         PgArchPID = 0;
2082                         if (exitstatus != 0)
2083                                 LogChildExit(LOG, gettext("archiver process"),
2084                                                          pid, exitstatus);
2085                         if (XLogArchivingActive() &&
2086                                 StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
2087                                 PgArchPID = pgarch_start();
2088                         continue;
2089                 }
2090
2091                 /*
2092                  * Was it the statistics collector?  If so, just try to start a
2093                  * new one; no need to force reset of the rest of the system.  (If
2094                  * fail, we'll try again in future cycles of the main loop.)
2095                  */
2096                 if (PgStatPID != 0 && pid == PgStatPID)
2097                 {
2098                         PgStatPID = 0;
2099                         if (exitstatus != 0)
2100                                 LogChildExit(LOG, gettext("statistics collector process"),
2101                                                          pid, exitstatus);
2102                         if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
2103                                 PgStatPID = pgstat_start();
2104                         continue;
2105                 }
2106
2107                 /* Was it the system logger? try to start a new one */
2108                 if (SysLoggerPID != 0 && pid == SysLoggerPID)
2109                 {
2110                         SysLoggerPID = 0;
2111                         /* for safety's sake, launch new logger *first* */
2112                         SysLoggerPID = SysLogger_Start();
2113                         if (exitstatus != 0)
2114                                 LogChildExit(LOG, gettext("system logger process"),
2115                                                          pid, exitstatus);
2116                         continue;
2117                 }
2118
2119                 /*
2120                  * Else do standard backend child cleanup.
2121                  */
2122                 CleanupBackend(pid, exitstatus);
2123         }                                                       /* loop over pending child-death reports */
2124
2125         if (FatalError)
2126         {
2127                 /*
2128                  * Wait for all important children to exit, then reset shmem and
2129                  * StartupDataBase.  (We can ignore the archiver and stats
2130                  * processes here since they are not connected to shmem.)
2131                  */
2132                 if (DLGetHead(BackendList) || StartupPID != 0 || BgWriterPID != 0)
2133                         goto reaper_done;
2134                 ereport(LOG,
2135                         (errmsg("all server processes terminated; reinitializing")));
2136
2137                 shmem_exit(0);
2138                 reset_shared(PostPortNumber);
2139
2140                 StartupPID = StartupDataBase();
2141
2142                 goto reaper_done;
2143         }
2144
2145         if (Shutdown > NoShutdown)
2146         {
2147                 if (DLGetHead(BackendList) || StartupPID != 0)
2148                         goto reaper_done;
2149                 /* Start the bgwriter if not running */
2150                 if (BgWriterPID == 0)
2151                         BgWriterPID = StartBackgroundWriter();
2152                 /* And tell it to shut down */
2153                 if (BgWriterPID != 0)
2154                         kill(BgWriterPID, SIGUSR2);
2155                 /* Tell pgarch to shut down too; nothing left for it to do */
2156                 if (PgArchPID != 0)
2157                         kill(PgArchPID, SIGQUIT);
2158                 /* Tell pgstat to shut down too; nothing left for it to do */
2159                 if (PgStatPID != 0)
2160                         kill(PgStatPID, SIGQUIT);
2161         }
2162
2163 reaper_done:
2164         PG_SETMASK(&UnBlockSig);
2165
2166         errno = save_errno;
2167 }
2168
2169
2170 /*
2171  * CleanupBackend -- cleanup after terminated backend.
2172  *
2173  * Remove all local state associated with backend.
2174  */
2175 static void
2176 CleanupBackend(int pid,
2177                            int exitstatus)      /* child's exit status. */
2178 {
2179         Dlelem     *curr;
2180
2181         LogChildExit(DEBUG2, gettext("server process"), pid, exitstatus);
2182
2183         /*
2184          * If a backend dies in an ugly way (i.e. exit status not 0) then we
2185          * must signal all other backends to quickdie.  If exit status is zero
2186          * we assume everything is hunky dory and simply remove the backend
2187          * from the active backend list.
2188          */
2189         if (exitstatus != 0)
2190         {
2191                 HandleChildCrash(pid, exitstatus, gettext("server process"));
2192                 return;
2193         }
2194
2195         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2196         {
2197                 Backend    *bp = (Backend *) DLE_VAL(curr);
2198
2199                 if (bp->pid == pid)
2200                 {
2201                         DLRemove(curr);
2202                         free(bp);
2203                         DLFreeElem(curr);
2204 #ifdef EXEC_BACKEND
2205                         ShmemBackendArrayRemove(pid);
2206 #endif
2207                         /* Tell the collector about backend termination */
2208                         pgstat_beterm(pid);
2209                         break;
2210                 }
2211         }
2212 }
2213
2214 /*
2215  * HandleChildCrash -- cleanup after failed backend or bgwriter.
2216  *
2217  * The objectives here are to clean up our local state about the child
2218  * process, and to signal all other remaining children to quickdie.
2219  */
2220 static void
2221 HandleChildCrash(int pid, int exitstatus, const char *procname)
2222 {
2223         Dlelem     *curr,
2224                            *next;
2225         Backend    *bp;
2226
2227         /*
2228          * Make log entry unless there was a previous crash (if so, nonzero
2229          * exit status is to be expected in SIGQUIT response; don't clutter
2230          * log)
2231          */
2232         if (!FatalError)
2233         {
2234                 LogChildExit(LOG, procname, pid, exitstatus);
2235                 ereport(LOG,
2236                           (errmsg("terminating any other active server processes")));
2237         }
2238
2239         /* Process regular backends */
2240         for (curr = DLGetHead(BackendList); curr; curr = next)
2241         {
2242                 next = DLGetSucc(curr);
2243                 bp = (Backend *) DLE_VAL(curr);
2244                 if (bp->pid == pid)
2245                 {
2246                         /*
2247                          * Found entry for freshly-dead backend, so remove it.
2248                          */
2249                         DLRemove(curr);
2250                         free(bp);
2251                         DLFreeElem(curr);
2252 #ifdef EXEC_BACKEND
2253                         ShmemBackendArrayRemove(pid);
2254 #endif
2255                         /* Tell the collector about backend termination */
2256                         pgstat_beterm(pid);
2257                         /* Keep looping so we can signal remaining backends */
2258                 }
2259                 else
2260                 {
2261                         /*
2262                          * This backend is still alive.  Unless we did so already,
2263                          * tell it to commit hara-kiri.
2264                          *
2265                          * SIGQUIT is the special signal that says exit without proc_exit
2266                          * and let the user know what's going on. But if SendStop is
2267                          * set (-s on command line), then we send SIGSTOP instead, so
2268                          * that we can get core dumps from all backends by hand.
2269                          */
2270                         if (!FatalError)
2271                         {
2272                                 ereport(DEBUG2,
2273                                                 (errmsg_internal("sending %s to process %d",
2274                                                                           (SendStop ? "SIGSTOP" : "SIGQUIT"),
2275                                                                                  (int) bp->pid)));
2276                                 kill(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
2277                         }
2278                 }
2279         }
2280
2281         /* Take care of the bgwriter too */
2282         if (pid == BgWriterPID)
2283                 BgWriterPID = 0;
2284         else if (BgWriterPID != 0 && !FatalError)
2285         {
2286                 ereport(DEBUG2,
2287                                 (errmsg_internal("sending %s to process %d",
2288                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2289                                                                  (int) BgWriterPID)));
2290                 kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
2291         }
2292
2293         /* Force a power-cycle of the pgarch process too */
2294         /* (Shouldn't be necessary, but just for luck) */
2295         if (PgArchPID != 0 && !FatalError)
2296         {
2297                 ereport(DEBUG2,
2298                                 (errmsg_internal("sending %s to process %d",
2299                                                                  "SIGQUIT",
2300                                                                  (int) PgArchPID)));
2301                 kill(PgArchPID, SIGQUIT);
2302         }
2303
2304         /* Force a power-cycle of the pgstat processes too */
2305         /* (Shouldn't be necessary, but just for luck) */
2306         if (PgStatPID != 0 && !FatalError)
2307         {
2308                 ereport(DEBUG2,
2309                                 (errmsg_internal("sending %s to process %d",
2310                                                                  "SIGQUIT",
2311                                                                  (int) PgStatPID)));
2312                 kill(PgStatPID, SIGQUIT);
2313         }
2314
2315         /* We do NOT restart the syslogger */
2316
2317         FatalError = true;
2318 }
2319
2320 /*
2321  * Log the death of a child process.
2322  */
2323 static void
2324 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
2325 {
2326         if (WIFEXITED(exitstatus))
2327                 ereport(lev,
2328
2329                 /*
2330                  * translator: %s is a noun phrase describing a child process,
2331                  * such as "server process"
2332                  */
2333                                 (errmsg("%s (PID %d) exited with exit code %d",
2334                                                 procname, pid, WEXITSTATUS(exitstatus))));
2335         else if (WIFSIGNALED(exitstatus))
2336                 ereport(lev,
2337
2338                 /*
2339                  * translator: %s is a noun phrase describing a child process,
2340                  * such as "server process"
2341                  */
2342                                 (errmsg("%s (PID %d) was terminated by signal %d",
2343                                                 procname, pid, WTERMSIG(exitstatus))));
2344         else
2345                 ereport(lev,
2346
2347                 /*
2348                  * translator: %s is a noun phrase describing a child process,
2349                  * such as "server process"
2350                  */
2351                                 (errmsg("%s (PID %d) exited with unexpected status %d",
2352                                                 procname, pid, exitstatus)));
2353 }
2354
2355 /*
2356  * Send a signal to all backend children (but NOT special children)
2357  */
2358 static void
2359 SignalChildren(int signal)
2360 {
2361         Dlelem     *curr;
2362
2363         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2364         {
2365                 Backend    *bp = (Backend *) DLE_VAL(curr);
2366
2367                 ereport(DEBUG4,
2368                                 (errmsg_internal("sending signal %d to process %d",
2369                                                                  signal, (int) bp->pid)));
2370                 kill(bp->pid, signal);
2371         }
2372 }
2373
2374 /*
2375  * BackendStartup -- start backend process
2376  *
2377  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
2378  */
2379 static int
2380 BackendStartup(Port *port)
2381 {
2382         Backend    *bn;                         /* for backend cleanup */
2383         pid_t           pid;
2384
2385 #ifdef LINUX_PROFILE
2386         struct itimerval prof_itimer;
2387 #endif
2388
2389         /*
2390          * Compute the cancel key that will be assigned to this backend. The
2391          * backend will have its own copy in the forked-off process' value of
2392          * MyCancelKey, so that it can transmit the key to the frontend.
2393          */
2394         MyCancelKey = PostmasterRandom();
2395
2396         /*
2397          * Make room for backend data structure.  Better before the fork() so
2398          * we can handle failure cleanly.
2399          */
2400         bn = (Backend *) malloc(sizeof(Backend));
2401         if (!bn)
2402         {
2403                 ereport(LOG,
2404                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2405                                  errmsg("out of memory")));
2406                 return STATUS_ERROR;
2407         }
2408
2409         /* Pass down canAcceptConnections state (kluge for EXEC_BACKEND case) */
2410         port->canAcceptConnections = canAcceptConnections();
2411
2412         /*
2413          * Flush stdio channels just before fork, to avoid double-output
2414          * problems. Ideally we'd use fflush(NULL) here, but there are still a
2415          * few non-ANSI stdio libraries out there (like SunOS 4.1.x) that
2416          * coredump if we do. Presently stdout and stderr are the only stdio
2417          * output channels used by the postmaster, so fflush'ing them should
2418          * be sufficient.
2419          */
2420         fflush(stdout);
2421         fflush(stderr);
2422
2423 #ifdef EXEC_BACKEND
2424
2425         pid = backend_forkexec(port);
2426
2427 #else                                                   /* !EXEC_BACKEND */
2428
2429 #ifdef LINUX_PROFILE
2430
2431         /*
2432          * Linux's fork() resets the profiling timer in the child process. If
2433          * we want to profile child processes then we need to save and restore
2434          * the timer setting.  This is a waste of time if not profiling,
2435          * however, so only do it if commanded by specific -DLINUX_PROFILE
2436          * switch.
2437          */
2438         getitimer(ITIMER_PROF, &prof_itimer);
2439 #endif
2440
2441 #ifdef __BEOS__
2442         /* Specific beos actions before backend startup */
2443         beos_before_backend_startup();
2444 #endif
2445
2446         pid = fork();
2447
2448         if (pid == 0)                           /* child */
2449         {
2450 #ifdef LINUX_PROFILE
2451                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
2452 #endif
2453
2454 #ifdef __BEOS__
2455                 /* Specific beos backend startup actions */
2456                 beos_backend_startup();
2457 #endif
2458                 free(bn);
2459
2460                 proc_exit(BackendRun(port));
2461         }
2462 #endif   /* EXEC_BACKEND */
2463
2464         if (pid < 0)
2465         {
2466                 /* in parent, fork failed */
2467                 int                     save_errno = errno;
2468
2469 #ifdef __BEOS__
2470                 /* Specific beos backend startup actions */
2471                 beos_backend_startup_failed();
2472 #endif
2473                 free(bn);
2474                 errno = save_errno;
2475                 ereport(LOG,
2476                           (errmsg("could not fork new process for connection: %m")));
2477                 report_fork_failure_to_client(port, save_errno);
2478                 return STATUS_ERROR;
2479         }
2480
2481         /* in parent, successful fork */
2482         ereport(DEBUG2,
2483                         (errmsg_internal("forked new backend, pid=%d socket=%d",
2484                                                          (int) pid, port->sock)));
2485
2486         /*
2487          * Everything's been successful, it's safe to add this backend to our
2488          * list of backends.
2489          */
2490         bn->pid = pid;
2491         bn->cancel_key = MyCancelKey;
2492         DLAddHead(BackendList, DLNewElem(bn));
2493 #ifdef EXEC_BACKEND
2494         ShmemBackendArrayAdd(bn);
2495 #endif
2496
2497         return STATUS_OK;
2498 }
2499
2500 /*
2501  * Try to report backend fork() failure to client before we close the
2502  * connection.  Since we do not care to risk blocking the postmaster on
2503  * this connection, we set the connection to non-blocking and try only once.
2504  *
2505  * This is grungy special-purpose code; we cannot use backend libpq since
2506  * it's not up and running.
2507  */
2508 static void
2509 report_fork_failure_to_client(Port *port, int errnum)
2510 {
2511         char            buffer[1000];
2512
2513         /* Format the error message packet (always V2 protocol) */
2514         snprintf(buffer, sizeof(buffer), "E%s%s\n",
2515                          gettext("could not fork new process for connection: "),
2516                          strerror(errnum));
2517
2518         /* Set port to non-blocking.  Don't do send() if this fails */
2519         if (!set_noblock(port->sock))
2520                 return;
2521
2522         send(port->sock, buffer, strlen(buffer) + 1, 0);
2523 }
2524
2525
2526 /*
2527  * split_opts -- split a string of options and append it to an argv array
2528  *
2529  * NB: the string is destructively modified!
2530  *
2531  * Since no current POSTGRES arguments require any quoting characters,
2532  * we can use the simple-minded tactic of assuming each set of space-
2533  * delimited characters is a separate argv element.
2534  *
2535  * If you don't like that, well, we *used* to pass the whole option string
2536  * as ONE argument to execl(), which was even less intelligent...
2537  */
2538 static void
2539 split_opts(char **argv, int *argcp, char *s)
2540 {
2541         while (s && *s)
2542         {
2543                 while (isspace((unsigned char) *s))
2544                         ++s;
2545                 if (*s == '\0')
2546                         break;
2547                 argv[(*argcp)++] = s;
2548                 while (*s && !isspace((unsigned char) *s))
2549                         ++s;
2550                 if (*s)
2551                         *s++ = '\0';
2552         }
2553 }
2554
2555
2556 /*
2557  * BackendRun -- perform authentication, and if successful,
2558  *                              set up the backend's argument list and invoke PostgresMain()
2559  *
2560  * returns:
2561  *              Shouldn't return at all.
2562  *              If PostgresMain() fails, return status.
2563  */
2564 static int
2565 BackendRun(Port *port)
2566 {
2567         int                     status;
2568         char            remote_host[NI_MAXHOST];
2569         char            remote_port[NI_MAXSERV];
2570         char            remote_ps_data[NI_MAXHOST];
2571         char      **av;
2572         int                     maxac;
2573         int                     ac;
2574         char            protobuf[32];
2575         int                     i;
2576
2577         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2578
2579         /*
2580          * Let's clean up ourselves as the postmaster child, and close the
2581          * postmaster's listen sockets
2582          */
2583         ClosePostmasterPorts(false);
2584
2585         /* We don't want the postmaster's proc_exit() handlers */
2586         on_exit_reset();
2587
2588         /*
2589          * Signal handlers setting is moved to tcop/postgres...
2590          */
2591
2592         /* Save port etc. for ps status */
2593         MyProcPort = port;
2594
2595         /* Reset MyProcPid to new backend's pid */
2596         MyProcPid = getpid();
2597
2598         /*
2599          * PreAuthDelay is a debugging aid for investigating problems in the
2600          * authentication cycle: it can be set in postgresql.conf to allow
2601          * time to attach to the newly-forked backend with a debugger. (See
2602          * also the -W backend switch, which we allow clients to pass through
2603          * PGOPTIONS, but it is not honored until after authentication.)
2604          */
2605         if (PreAuthDelay > 0)
2606                 pg_usleep(PreAuthDelay * 1000000L);
2607
2608         ClientAuthInProgress = true;    /* limit visibility of log messages */
2609
2610         /* save start time for end of session reporting */
2611         gettimeofday(&(port->session_start), NULL);
2612
2613         /* set these to empty in case they are needed before we set them up */
2614         port->remote_host = "";
2615         port->remote_port = "";
2616         port->commandTag = "";
2617
2618         /*
2619          * Initialize libpq and enable reporting of ereport errors to the
2620          * client. Must do this now because authentication uses libpq to send
2621          * messages.
2622          */
2623         pq_init();                                      /* initialize libpq to talk to client */
2624         whereToSendOutput = Remote; /* now safe to ereport to client */
2625
2626         /*
2627          * We arrange for a simple exit(0) if we receive SIGTERM or SIGQUIT
2628          * during any client authentication related communication. Otherwise
2629          * the postmaster cannot shutdown the database FAST or IMMED cleanly
2630          * if a buggy client blocks a backend during authentication.
2631          */
2632         pqsignal(SIGTERM, authdie);
2633         pqsignal(SIGQUIT, authdie);
2634         pqsignal(SIGALRM, authdie);
2635         PG_SETMASK(&AuthBlockSig);
2636
2637         /*
2638          * Get the remote host name and port for logging and status display.
2639          */
2640         remote_host[0] = '\0';
2641         remote_port[0] = '\0';
2642         if (getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2643                                                 remote_host, sizeof(remote_host),
2644                                                 remote_port, sizeof(remote_port),
2645                                    (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV))
2646         {
2647                 int                     ret = getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2648                                                                                 remote_host, sizeof(remote_host),
2649                                                                                 remote_port, sizeof(remote_port),
2650                                                                                 NI_NUMERICHOST | NI_NUMERICSERV);
2651
2652                 if (ret)
2653                         ereport(WARNING,
2654                                         (errmsg("getnameinfo_all() failed: %s",
2655                                                         gai_strerror(ret))));
2656         }
2657         snprintf(remote_ps_data, sizeof(remote_ps_data),
2658                          remote_port[0] == '\0' ? "%s" : "%s(%s)",
2659                          remote_host, remote_port);
2660
2661         if (Log_connections)
2662                 ereport(LOG,
2663                                 (errmsg("connection received: host=%s port=%s",
2664                                                 remote_host, remote_port)));
2665
2666         /*
2667          * save remote_host and remote_port in port stucture
2668          */
2669         port->remote_host = strdup(remote_host);
2670         port->remote_port = strdup(remote_port);
2671
2672         /*
2673          * In EXEC_BACKEND case, we didn't inherit the contents of pg_hba.conf
2674          * etcetera from the postmaster, and have to load them ourselves.
2675          * Build the PostmasterContext (which didn't exist before, in this
2676          * process) to contain the data.
2677          *
2678          * FIXME: [fork/exec] Ugh.      Is there a way around this overhead?
2679          */
2680 #ifdef EXEC_BACKEND
2681         Assert(PostmasterContext == NULL);
2682         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
2683                                                                                           "Postmaster",
2684                                                                                           ALLOCSET_DEFAULT_MINSIZE,
2685                                                                                           ALLOCSET_DEFAULT_INITSIZE,
2686                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
2687         MemoryContextSwitchTo(PostmasterContext);
2688
2689         load_hba();
2690         load_ident();
2691         load_user();
2692         load_group();
2693 #endif
2694
2695         /*
2696          * Ready to begin client interaction.  We will give up and exit(0)
2697          * after a time delay, so that a broken client can't hog a connection
2698          * indefinitely.  PreAuthDelay doesn't count against the time limit.
2699          */
2700         if (!enable_sig_alarm(AuthenticationTimeout * 1000, false))
2701                 elog(FATAL, "could not set timer for authorization timeout");
2702
2703         /*
2704          * Receive the startup packet (which might turn out to be a cancel
2705          * request packet).
2706          */
2707         status = ProcessStartupPacket(port, false);
2708
2709         if (status != STATUS_OK)
2710                 proc_exit(0);
2711
2712         /*
2713          * Now that we have the user and database name, we can set the process
2714          * title for ps.  It's good to do this as early as possible in
2715          * startup.
2716          */
2717         init_ps_display(port->user_name, port->database_name, remote_ps_data);
2718         set_ps_display("authentication");
2719
2720         /*
2721          * Now perform authentication exchange.
2722          */
2723         ClientAuthentication(port); /* might not return, if failure */
2724
2725         /*
2726          * Done with authentication.  Disable timeout, and prevent
2727          * SIGTERM/SIGQUIT again until backend startup is complete.
2728          */
2729         if (!disable_sig_alarm(false))
2730                 elog(FATAL, "could not disable timer for authorization timeout");
2731         PG_SETMASK(&BlockSig);
2732
2733         if (Log_connections)
2734                 ereport(LOG,
2735                                 (errmsg("connection authorized: user=%s database=%s",
2736                                                 port->user_name, port->database_name)));
2737
2738         /*
2739          * Don't want backend to be able to see the postmaster random number
2740          * generator state.  We have to clobber the static random_seed *and*
2741          * start a new random sequence in the random() library function.
2742          */
2743         random_seed = 0;
2744         srandom((unsigned int) (MyProcPid ^ port->session_start.tv_usec));
2745
2746         /* ----------------
2747          * Now, build the argv vector that will be given to PostgresMain.
2748          *
2749          * The layout of the command line is
2750          *              postgres [secure switches] -p databasename [insecure switches]
2751          * where the switches after -p come from the client request.
2752          *
2753          * The maximum possible number of commandline arguments that could come
2754          * from ExtraOptions or port->cmdline_options is (strlen + 1) / 2; see
2755          * split_opts().
2756          * ----------------
2757          */
2758         maxac = 10;                                     /* for fixed args supplied below */
2759         maxac += (strlen(ExtraOptions) + 1) / 2;
2760         if (port->cmdline_options)
2761                 maxac += (strlen(port->cmdline_options) + 1) / 2;
2762
2763         av = (char **) MemoryContextAlloc(TopMemoryContext,
2764                                                                           maxac * sizeof(char *));
2765         ac = 0;
2766
2767         av[ac++] = "postgres";
2768
2769         /*
2770          * Pass any backend switches specified with -o in the postmaster's own
2771          * command line.  We assume these are secure.  (It's OK to mangle
2772          * ExtraOptions now, since we're safely inside a subprocess.)
2773          */
2774         split_opts(av, &ac, ExtraOptions);
2775
2776         /* Tell the backend what protocol the frontend is using. */
2777         snprintf(protobuf, sizeof(protobuf), "-v%u", port->proto);
2778         av[ac++] = protobuf;
2779
2780         /*
2781          * Tell the backend it is being called from the postmaster, and which
2782          * database to use.  -p marks the end of secure switches.
2783          */
2784         av[ac++] = "-p";
2785         av[ac++] = port->database_name;
2786
2787         /*
2788          * Pass the (insecure) option switches from the connection request.
2789          * (It's OK to mangle port->cmdline_options now.)
2790          */
2791         if (port->cmdline_options)
2792                 split_opts(av, &ac, port->cmdline_options);
2793
2794         av[ac] = NULL;
2795
2796         Assert(ac < maxac);
2797
2798         /*
2799          * Release postmaster's working memory context so that backend can
2800          * recycle the space.  Note this does not trash *MyProcPort, because
2801          * ConnCreate() allocated that space with malloc() ... else we'd need
2802          * to copy the Port data here.  Also, subsidiary data such as the
2803          * username isn't lost either; see ProcessStartupPacket().
2804          */
2805         MemoryContextSwitchTo(TopMemoryContext);
2806         MemoryContextDelete(PostmasterContext);
2807         PostmasterContext = NULL;
2808
2809         /*
2810          * Debug: print arguments being passed to backend
2811          */
2812         ereport(DEBUG3,
2813                         (errmsg_internal("%s child[%d]: starting with (",
2814                                                          progname, (int)getpid())));
2815         for (i = 0; i < ac; ++i)
2816                 ereport(DEBUG3,
2817                                 (errmsg_internal("\t%s", av[i])));
2818         ereport(DEBUG3,
2819                         (errmsg_internal(")")));
2820
2821         ClientAuthInProgress = false;           /* client_min_messages is active
2822                                                                                  * now */
2823
2824         return (PostgresMain(ac, av, port->user_name));
2825 }
2826
2827
2828 #ifdef EXEC_BACKEND
2829
2830 /*
2831  * postmaster_forkexec -- fork and exec a postmaster subprocess
2832  *
2833  * The caller must have set up the argv array already, except for argv[2]
2834  * which will be filled with the name of the temp variable file.
2835  *
2836  * Returns the child process PID, or -1 on fork failure (a suitable error
2837  * message has been logged on failure).
2838  *
2839  * All uses of this routine will dispatch to SubPostmasterMain in the
2840  * child process.
2841  */
2842 pid_t
2843 postmaster_forkexec(int argc, char *argv[])
2844 {
2845         Port            port;
2846
2847         /* This entry point passes dummy values for the Port variables */
2848         memset(&port, 0, sizeof(port));
2849         return internal_forkexec(argc, argv, &port);
2850 }
2851
2852 /*
2853  * backend_forkexec -- fork/exec off a backend process
2854  *
2855  * returns the pid of the fork/exec'd process, or -1 on failure
2856  */
2857 static pid_t
2858 backend_forkexec(Port *port)
2859 {
2860         char       *av[4];
2861         int                     ac = 0;
2862
2863         av[ac++] = "postgres";
2864         av[ac++] = "-forkbackend";
2865         av[ac++] = NULL;                        /* filled in by internal_forkexec */
2866
2867         av[ac] = NULL;
2868         Assert(ac < lengthof(av));
2869
2870         return internal_forkexec(ac, av, port);
2871 }
2872
2873 #ifndef WIN32
2874
2875 /*
2876  * internal_forkexec non-win32 implementation
2877  *
2878  * - writes out backend variables to the parameter file
2879  * - fork():s, and then exec():s the child process
2880  */
2881 static pid_t
2882 internal_forkexec(int argc, char *argv[], Port *port)
2883 {
2884         static unsigned long tmpBackendFileNum = 0;
2885         pid_t           pid;
2886         char            tmpfilename[MAXPGPATH];
2887         BackendParameters param;
2888         FILE       *fp;
2889
2890         if (!save_backend_variables(&param, port))
2891                 return -1;                              /* log made by save_backend_variables */
2892
2893         /* Calculate name for temp file */
2894         Assert(DataDir);
2895         snprintf(tmpfilename, MAXPGPATH, "%s/%s/%s.backend_var.%d.%lu",
2896                          DataDir, PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
2897                          MyProcPid, ++tmpBackendFileNum);
2898
2899         /* Open file */
2900         fp = AllocateFile(tmpfilename, PG_BINARY_W);
2901         if (!fp)
2902         {
2903                 /* As per OpenTemporaryFile... */
2904                 char            dirname[MAXPGPATH];
2905
2906                 snprintf(dirname, MAXPGPATH, "%s/%s", DataDir, PG_TEMP_FILES_DIR);
2907                 mkdir(dirname, S_IRWXU);
2908
2909                 fp = AllocateFile(tmpfilename, PG_BINARY_W);
2910                 if (!fp)
2911                 {
2912                         ereport(LOG,
2913                                         (errcode_for_file_access(),
2914                                          errmsg("could not create file \"%s\": %m",
2915                                                         tmpfilename)));
2916                         return -1;
2917                 }
2918         }
2919
2920         if (fwrite(&param, sizeof(param), 1, fp) != 1)
2921         {
2922                 ereport(LOG,
2923                                 (errcode_for_file_access(),
2924                                  errmsg("could not write to file \"%s\": %m", tmpfilename)));
2925                 FreeFile(fp);
2926                 return -1;
2927         }
2928
2929         /* Release file */
2930         if (FreeFile(fp))
2931         {
2932                 ereport(LOG,
2933                                 (errcode_for_file_access(),
2934                                  errmsg("could not write to file \"%s\": %m", tmpfilename)));
2935                 return -1;
2936         }
2937
2938         /* Make sure caller set up argv properly */
2939         Assert(argc >= 3);
2940         Assert(argv[argc] == NULL);
2941         Assert(strncmp(argv[1], "-fork", 5) == 0);
2942         Assert(argv[2] == NULL);
2943
2944         /* Insert temp file name after -fork argument */
2945         argv[2] = tmpfilename;
2946
2947         /* Fire off execv in child */
2948         if ((pid = fork()) == 0)
2949         {
2950                 if (execv(postgres_exec_path, argv) < 0)
2951                 {
2952                         ereport(LOG,
2953                                         (errmsg("could not execute server process \"%s\": %m",
2954                                                         postgres_exec_path)));
2955                         /* We're already in the child process here, can't return */
2956                         exit(1);
2957                 }
2958         }
2959
2960         return pid;                                     /* Parent returns pid, or -1 on fork
2961                                                                  * failure */
2962 }
2963
2964 #else /* WIN32 */
2965
2966 /*
2967  * internal_forkexec win32 implementation
2968  *
2969  * - starts backend using CreateProcess(), in suspended state
2970  * - writes out backend variables to the parameter file
2971  *  - during this, duplicates handles and sockets required for
2972  *    inheritance into the new process
2973  * - resumes execution of the new process once the backend parameter
2974  *   file is complete.
2975  */
2976 static pid_t
2977 internal_forkexec(int argc, char *argv[], Port *port)
2978 {
2979         STARTUPINFO si;
2980         PROCESS_INFORMATION pi;
2981         int                     i;
2982         int                     j;
2983         char            cmdLine[MAXPGPATH * 2];
2984         HANDLE          childHandleCopy;
2985         HANDLE          waiterThread;
2986         HANDLE      paramHandle;
2987         BackendParameters *param;
2988         SECURITY_ATTRIBUTES sa;
2989         char        paramHandleStr[32];
2990
2991         /* Make sure caller set up argv properly */
2992         Assert(argc >= 3);
2993         Assert(argv[argc] == NULL);
2994         Assert(strncmp(argv[1], "-fork", 5) == 0);
2995         Assert(argv[2] == NULL);
2996
2997         /* Set up shared memory for parameter passing */
2998         ZeroMemory(&sa,sizeof(sa));
2999         sa.nLength = sizeof(sa);
3000         sa.bInheritHandle = TRUE;
3001         paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
3002                                                                         &sa,
3003                                                                         PAGE_READWRITE,
3004                                                                         0,
3005                                                                         sizeof(BackendParameters),
3006                                                                         NULL);
3007         if (paramHandle == INVALID_HANDLE_VALUE)
3008         {
3009                 elog(LOG, "could not create backend parameter file mapping: error code %d",
3010                          (int) GetLastError());
3011                 return -1;
3012         }
3013
3014         param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
3015         if (!param)
3016         {
3017                 elog(LOG, "could not map backend parameter memory: error code %d",
3018                          (int) GetLastError());
3019                 CloseHandle(paramHandle);
3020                 return -1;
3021         }
3022
3023         /* Insert temp file name after -fork argument */
3024         sprintf(paramHandleStr, "%lu", (DWORD)paramHandle);
3025         argv[2] = paramHandleStr;
3026
3027         /* Format the cmd line */
3028         cmdLine[sizeof(cmdLine) - 1] = '\0';
3029         cmdLine[sizeof(cmdLine) - 2] = '\0';
3030         snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
3031         i = 0;
3032         while (argv[++i] != NULL)
3033         {
3034                 j = strlen(cmdLine);
3035                 snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
3036         }
3037         if (cmdLine[sizeof(cmdLine) - 2] != '\0')
3038         {
3039                 elog(LOG, "subprocess command line too long");
3040                 return -1;
3041         }
3042
3043         memset(&pi, 0, sizeof(pi));
3044         memset(&si, 0, sizeof(si));
3045         si.cb = sizeof(si);
3046         /*
3047          * Create the subprocess in a suspended state. This will be resumed
3048          * later, once we have written out the parameter file.
3049          */
3050         if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
3051                                            NULL, NULL, &si, &pi))
3052         {
3053                 elog(LOG, "CreateProcess call failed: %m (error code %d)",
3054                          (int) GetLastError());
3055                 return -1;
3056         }
3057
3058         if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
3059         {
3060                 /*
3061                  * log made by save_backend_variables, but we have to clean
3062                  * up the mess with the half-started process
3063                  */
3064                 if (!TerminateProcess(pi.hProcess, 255))
3065                         ereport(ERROR,
3066                                         (errmsg_internal("could not terminate unstarted process: error code %d",
3067                                                                          (int) GetLastError())));
3068                 CloseHandle(pi.hProcess);
3069                 CloseHandle(pi.hThread);
3070                 return -1;                              /* log made by save_backend_variables */
3071         }
3072
3073         /* Drop the shared memory that is now inherited to the backend */
3074         if (!UnmapViewOfFile(param))
3075                 elog(LOG, "could not unmap view of backend parameter file: error code %d",
3076                          (int) GetLastError());
3077         if (!CloseHandle(paramHandle))
3078                 elog(LOG, "could not close handle to backend parameter file: error code %d",
3079                          (int) GetLastError());
3080
3081         /*
3082          * Now that the backend variables are written out, we start the
3083          * child thread so it can start initializing while we set up
3084          * the rest of the parent state.
3085          */
3086         if (ResumeThread(pi.hThread) == -1)
3087         {
3088                 if (!TerminateProcess(pi.hProcess, 255))
3089                 {
3090                         ereport(ERROR,
3091                                         (errmsg_internal("could not terminate unstartable process: error code %d",
3092                                                                          (int) GetLastError())));
3093                         CloseHandle(pi.hProcess);
3094                         CloseHandle(pi.hThread);
3095                         return -1;
3096                 }
3097                 CloseHandle(pi.hProcess);
3098                 CloseHandle(pi.hThread);
3099                 ereport(ERROR,
3100                                 (errmsg_internal("could not resume thread of unstarted process: error code %d",
3101                                                                  (int) GetLastError())));
3102                 return -1;
3103         }
3104
3105         if (!IsUnderPostmaster)
3106         {
3107                 /* We are the Postmaster creating a child... */
3108                 win32_AddChild(pi.dwProcessId, pi.hProcess);
3109         }
3110
3111         /* Set up the thread to handle the SIGCHLD for this process */
3112         if (DuplicateHandle(GetCurrentProcess(),
3113                                                 pi.hProcess,
3114                                                 GetCurrentProcess(),
3115                                                 &childHandleCopy,
3116                                                 0,
3117                                                 FALSE,
3118                                                 DUPLICATE_SAME_ACCESS) == 0)
3119                 ereport(FATAL,
3120                                 (errmsg_internal("could not duplicate child handle: error code %d",
3121                                                                  (int) GetLastError())));
3122
3123         waiterThread = CreateThread(NULL, 64 * 1024, win32_sigchld_waiter,
3124                                                                 (LPVOID) childHandleCopy, 0, NULL);
3125         if (!waiterThread)
3126                 ereport(FATAL,
3127                    (errmsg_internal("could not create sigchld waiter thread: error code %d",
3128                                                         (int) GetLastError())));
3129         CloseHandle(waiterThread);
3130
3131         if (IsUnderPostmaster)
3132                 CloseHandle(pi.hProcess);
3133         CloseHandle(pi.hThread);
3134
3135         return pi.dwProcessId;
3136 }
3137
3138 #endif /* WIN32 */
3139
3140
3141 /*
3142  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
3143  *                      to what it would be if we'd simply forked on Unix, and then
3144  *                      dispatch to the appropriate place.
3145  *
3146  * The first two command line arguments are expected to be "-forkFOO"
3147  * (where FOO indicates which postmaster child we are to become), and
3148  * the name of a variables file that we can read to load data that would
3149  * have been inherited by fork() on Unix.  Remaining arguments go to the
3150  * subprocess FooMain() routine.
3151  */
3152 int
3153 SubPostmasterMain(int argc, char *argv[])
3154 {
3155         Port            port;
3156
3157         /* Do this sooner rather than later... */
3158         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
3159
3160         MyProcPid = getpid();           /* reset MyProcPid */
3161
3162         /* In EXEC_BACKEND case we will not have inherited these settings */
3163         IsPostmasterEnvironment = true;
3164         whereToSendOutput = None;
3165
3166         /* Setup essential subsystems (to ensure elog() behaves sanely) */
3167         MemoryContextInit();
3168         InitializeGUCOptions();
3169
3170         /* Read in the variables file */
3171         memset(&port, 0, sizeof(Port));
3172         read_backend_variables(argv[2], &port);
3173
3174         /* Check we got appropriate args */
3175         if (argc < 3)
3176                 elog(FATAL, "invalid subpostmaster invocation");
3177
3178         /*
3179          * If appropriate, physically re-attach to shared memory segment.
3180          * We want to do this before going any further to ensure that we
3181          * can attach at the same address the postmaster used.
3182          */
3183         if (strcmp(argv[1], "-forkbackend") == 0 ||
3184                 strcmp(argv[1], "-forkboot") == 0)
3185                 PGSharedMemoryReAttach();
3186
3187         /*
3188          * Start our win32 signal implementation. This has to be done
3189          * after we read the backend variables, because we need to pick
3190          * up the signal pipe from the parent process.
3191          */
3192 #ifdef WIN32
3193         pgwin32_signal_initialize();
3194 #endif
3195
3196         /* In EXEC_BACKEND case we will not have inherited these settings */
3197         pqinitmask();
3198         PG_SETMASK(&BlockSig);
3199
3200         /* Read in remaining GUC variables */
3201         read_nondefault_variables();
3202
3203         /* Run backend or appropriate child */
3204         if (strcmp(argv[1], "-forkbackend") == 0)
3205         {
3206                 /* BackendRun will close sockets */
3207
3208                 /* Attach process to shared data structures */
3209                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
3210
3211 #ifdef USE_SSL
3212                 /*
3213                  *      Need to reinitialize the SSL library in the backend,
3214                  *      since the context structures contain function pointers
3215                  *      and cannot be passed through the parameter file.
3216                  */
3217                 if (EnableSSL)
3218                         secure_initialize();
3219 #endif
3220
3221                 Assert(argc == 3);              /* shouldn't be any more args */
3222                 proc_exit(BackendRun(&port));
3223         }
3224         if (strcmp(argv[1], "-forkboot") == 0)
3225         {
3226                 /* Close the postmaster's sockets */
3227                 ClosePostmasterPorts(false);
3228
3229                 /* Attach process to shared data structures */
3230                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
3231
3232                 BootstrapMain(argc - 2, argv + 2);
3233                 proc_exit(0);
3234         }
3235         if (strcmp(argv[1], "-forkarch") == 0)
3236         {
3237                 /* Close the postmaster's sockets */
3238                 ClosePostmasterPorts(false);
3239
3240                 /* Do not want to attach to shared memory */
3241
3242                 PgArchiverMain(argc, argv);
3243                 proc_exit(0);
3244         }
3245         if (strcmp(argv[1], "-forkbuf") == 0)
3246         {
3247                 /* Close the postmaster's sockets */
3248                 ClosePostmasterPorts(false);
3249
3250                 /* Do not want to attach to shared memory */
3251
3252                 PgstatBufferMain(argc, argv);
3253                 proc_exit(0);
3254         }
3255         if (strcmp(argv[1], "-forkcol") == 0)
3256         {
3257                 /*
3258                  * Do NOT close postmaster sockets here, because we are forking
3259                  * from pgstat buffer process, which already did it.
3260                  */
3261
3262                 /* Do not want to attach to shared memory */
3263
3264                 PgstatCollectorMain(argc, argv);
3265                 proc_exit(0);
3266         }
3267         if (strcmp(argv[1], "-forklog") == 0)
3268         {
3269                 /* Close the postmaster's sockets */
3270                 ClosePostmasterPorts(true);
3271
3272                 /* Do not want to attach to shared memory */
3273
3274                 SysLoggerMain(argc, argv);
3275                 proc_exit(0);
3276         }
3277
3278         return 1;                                       /* shouldn't get here */
3279 }
3280
3281 #endif   /* EXEC_BACKEND */
3282
3283
3284 /*
3285  * ExitPostmaster -- cleanup
3286  *
3287  * Do NOT call exit() directly --- always go through here!
3288  */
3289 static void
3290 ExitPostmaster(int status)
3291 {
3292         /* should cleanup shared memory and kill all backends */
3293
3294         /*
3295          * Not sure of the semantics here.      When the Postmaster dies, should
3296          * the backends all be killed? probably not.
3297          *
3298          * MUST         -- vadim 05-10-1999
3299          */
3300
3301         proc_exit(status);
3302 }
3303
3304 /*
3305  * sigusr1_handler - handle signal conditions from child processes
3306  */
3307 static void
3308 sigusr1_handler(SIGNAL_ARGS)
3309 {
3310         int                     save_errno = errno;
3311
3312         PG_SETMASK(&BlockSig);
3313
3314         if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
3315         {
3316                 /*
3317                  * Password or group file has changed.
3318                  */
3319                 load_user();
3320                 load_group();
3321         }
3322
3323         if (CheckPostmasterSignal(PMSIGNAL_WAKEN_CHILDREN))
3324         {
3325                 /*
3326                  * Send SIGUSR1 to all children (triggers
3327                  * CatchupInterruptHandler). See storage/ipc/sinval[adt].c for the
3328                  * use of this.
3329                  */
3330                 if (Shutdown <= SmartShutdown)
3331                         SignalChildren(SIGUSR1);
3332         }
3333
3334         if (PgArchPID != 0 && Shutdown == NoShutdown)
3335         {
3336                 if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER))
3337                 {
3338                         /*
3339                          * Send SIGUSR1 to archiver process, to wake it up and begin
3340                          * archiving next transaction log file.
3341                          */
3342                         kill(PgArchPID, SIGUSR1);
3343                 }
3344         }
3345
3346         PG_SETMASK(&UnBlockSig);
3347
3348         errno = save_errno;
3349 }
3350
3351
3352 /*
3353  * Dummy signal handler
3354  *
3355  * We use this for signals that we don't actually use in the postmaster,
3356  * but we do use in backends.  If we were to SIG_IGN such signals in the
3357  * postmaster, then a newly started backend might drop a signal that arrives
3358  * before it's able to reconfigure its signal processing.  (See notes in
3359  * tcop/postgres.c.)
3360  */
3361 static void
3362 dummy_handler(SIGNAL_ARGS)
3363 {
3364 }
3365
3366
3367 /*
3368  * CharRemap: given an int in range 0..61, produce textual encoding of it
3369  * per crypt(3) conventions.
3370  */
3371 static char
3372 CharRemap(long ch)
3373 {
3374         if (ch < 0)
3375                 ch = -ch;
3376         ch = ch % 62;
3377
3378         if (ch < 26)
3379                 return 'A' + ch;
3380
3381         ch -= 26;
3382         if (ch < 26)
3383                 return 'a' + ch;
3384
3385         ch -= 26;
3386         return '0' + ch;
3387 }
3388
3389 /*
3390  * RandomSalt
3391  */
3392 static void
3393 RandomSalt(char *cryptSalt, char *md5Salt)
3394 {
3395         long            rand = PostmasterRandom();
3396
3397         cryptSalt[0] = CharRemap(rand % 62);
3398         cryptSalt[1] = CharRemap(rand / 62);
3399
3400         /*
3401          * It's okay to reuse the first random value for one of the MD5 salt
3402          * bytes, since only one of the two salts will be sent to the client.
3403          * After that we need to compute more random bits.
3404          *
3405          * We use % 255, sacrificing one possible byte value, so as to ensure
3406          * that all bits of the random() value participate in the result.
3407          * While at it, add one to avoid generating any null bytes.
3408          */
3409         md5Salt[0] = (rand % 255) + 1;
3410         rand = PostmasterRandom();
3411         md5Salt[1] = (rand % 255) + 1;
3412         rand = PostmasterRandom();
3413         md5Salt[2] = (rand % 255) + 1;
3414         rand = PostmasterRandom();
3415         md5Salt[3] = (rand % 255) + 1;
3416 }
3417
3418 /*
3419  * PostmasterRandom
3420  */
3421 static long
3422 PostmasterRandom(void)
3423 {
3424         static bool initialized = false;
3425
3426         if (!initialized)
3427         {
3428                 Assert(random_seed != 0);
3429                 srandom(random_seed);
3430                 initialized = true;
3431         }
3432
3433         return random();
3434 }
3435
3436 /*
3437  * Count up number of child processes (regular backends only)
3438  */
3439 static int
3440 CountChildren(void)
3441 {
3442         Dlelem     *curr;
3443         int                     cnt = 0;
3444
3445         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
3446                 cnt++;
3447         return cnt;
3448 }
3449
3450
3451 /*
3452  * StartChildProcess -- start a non-backend child process for the postmaster
3453  *
3454  * xlog determines what kind of child will be started.  All child types
3455  * initially go to BootstrapMain, which will handle common setup.
3456  *
3457  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
3458  * to start subprocess.
3459  */
3460 static pid_t
3461 StartChildProcess(int xlop)
3462 {
3463         pid_t           pid;
3464         char       *av[10];
3465         int                     ac = 0;
3466         char            xlbuf[32];
3467
3468 #ifdef LINUX_PROFILE
3469         struct itimerval prof_itimer;
3470 #endif
3471
3472         /*
3473          * Set up command-line arguments for subprocess
3474          */
3475         av[ac++] = "postgres";
3476
3477 #ifdef EXEC_BACKEND
3478         av[ac++] = "-forkboot";
3479         av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
3480 #endif
3481
3482         snprintf(xlbuf, sizeof(xlbuf), "-x%d", xlop);
3483         av[ac++] = xlbuf;
3484
3485         av[ac++] = "-p";
3486         av[ac++] = "template1";
3487
3488         av[ac] = NULL;
3489         Assert(ac < lengthof(av));
3490
3491         /*
3492          * Flush stdio channels (see comments in BackendStartup)
3493          */
3494         fflush(stdout);
3495         fflush(stderr);
3496
3497 #ifdef EXEC_BACKEND
3498
3499         pid = postmaster_forkexec(ac, av);
3500
3501 #else                                                   /* !EXEC_BACKEND */
3502
3503 #ifdef LINUX_PROFILE
3504         /* see comments in BackendStartup */
3505         getitimer(ITIMER_PROF, &prof_itimer);
3506 #endif
3507
3508 #ifdef __BEOS__
3509         /* Specific beos actions before backend startup */
3510         beos_before_backend_startup();
3511 #endif
3512
3513         pid = fork();
3514
3515         if (pid == 0)                           /* child */
3516         {
3517 #ifdef LINUX_PROFILE
3518                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
3519 #endif
3520
3521 #ifdef __BEOS__
3522                 /* Specific beos actions after backend startup */
3523                 beos_backend_startup();
3524 #endif
3525
3526                 IsUnderPostmaster = true;               /* we are a postmaster subprocess
3527                                                                                  * now */
3528
3529                 /* Close the postmaster's sockets */
3530                 ClosePostmasterPorts(false);
3531
3532                 /* Lose the postmaster's on-exit routines and port connections */
3533                 on_exit_reset();
3534
3535                 /* Release postmaster's working memory context */
3536                 MemoryContextSwitchTo(TopMemoryContext);
3537                 MemoryContextDelete(PostmasterContext);
3538                 PostmasterContext = NULL;
3539
3540                 BootstrapMain(ac, av);
3541                 ExitPostmaster(0);
3542         }
3543 #endif   /* EXEC_BACKEND */
3544
3545         if (pid < 0)
3546         {
3547                 /* in parent, fork failed */
3548                 int                     save_errno = errno;
3549
3550 #ifdef __BEOS__
3551                 /* Specific beos actions before backend startup */
3552                 beos_backend_startup_failed();
3553 #endif
3554                 errno = save_errno;
3555                 switch (xlop)
3556                 {
3557                         case BS_XLOG_STARTUP:
3558                                 ereport(LOG,
3559                                                 (errmsg("could not fork startup process: %m")));
3560                                 break;
3561                         case BS_XLOG_BGWRITER:
3562                                 ereport(LOG,
3563                                 (errmsg("could not fork background writer process: %m")));
3564                                 break;
3565                         default:
3566                                 ereport(LOG,
3567                                                 (errmsg("could not fork process: %m")));
3568                                 break;
3569                 }
3570
3571                 /*
3572                  * fork failure is fatal during startup, but there's no need to
3573                  * choke immediately if starting other child types fails.
3574                  */
3575                 if (xlop == BS_XLOG_STARTUP)
3576                         ExitPostmaster(1);
3577                 return 0;
3578         }
3579
3580         /*
3581          * in parent, successful fork
3582          */
3583         return pid;
3584 }
3585
3586
3587 /*
3588  * Create the opts file
3589  */
3590 static bool
3591 CreateOptsFile(int argc, char *argv[], char *fullprogname)
3592 {
3593         char            filename[MAXPGPATH];
3594         FILE       *fp;
3595         int                     i;
3596
3597         snprintf(filename, sizeof(filename), "%s/postmaster.opts", DataDir);
3598
3599         if ((fp = fopen(filename, "w")) == NULL)
3600         {
3601                 elog(LOG, "could not create file \"%s\": %m", filename);
3602                 return false;
3603         }
3604
3605         fprintf(fp, "%s", fullprogname);
3606         for (i = 1; i < argc; i++)
3607                 fprintf(fp, " %s%s%s", SYSTEMQUOTE, argv[i], SYSTEMQUOTE);
3608         fputs("\n", fp);
3609
3610         if (fclose(fp))
3611         {
3612                 elog(LOG, "could not write file \"%s\": %m", filename);
3613                 return false;
3614         }
3615
3616         return true;
3617 }
3618
3619
3620 #ifdef EXEC_BACKEND
3621
3622 /*
3623  * The following need to be available to the save/restore_backend_variables
3624  * functions
3625  */
3626 extern slock_t *ShmemLock;
3627 extern slock_t *ShmemIndexLock;
3628 extern void *ShmemIndexAlloc;
3629 extern LWLock *LWLockArray;
3630 extern slock_t *ProcStructLock;
3631 extern int      pgStatSock;
3632 extern int pgStatPipe[2];
3633
3634 #ifndef WIN32
3635 #define write_inheritable_socket(dest, src, childpid) (*(dest) = (src))
3636 #define read_inheritable_socket(dest, src) (*(dest) = *(src))
3637 #else
3638 static void write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE child);
3639 static void write_inheritable_socket(InheritableSocket *dest, SOCKET src,
3640                                                                          pid_t childPid);
3641 static void read_inheritable_socket(SOCKET *dest, InheritableSocket *src);
3642 #endif
3643
3644
3645 /* Save critical backend variables into the BackendParameters struct */
3646 #ifndef WIN32
3647 static bool
3648 save_backend_variables(BackendParameters *param, Port *port)
3649 #else
3650 static bool
3651 save_backend_variables(BackendParameters *param, Port *port,
3652                                            HANDLE childProcess, pid_t childPid)
3653 #endif
3654 {
3655         memcpy(&param->port, port, sizeof(Port));
3656         write_inheritable_socket(&param->portsocket, port->sock, childPid);
3657
3658         StrNCpy(param->DataDir, DataDir, MAXPGPATH);
3659
3660         memcpy(&param->ListenSocket, &ListenSocket, sizeof(ListenSocket));
3661
3662         param->MyCancelKey = MyCancelKey;
3663
3664         param->UsedShmemSegID = UsedShmemSegID;
3665         param->UsedShmemSegAddr = UsedShmemSegAddr;
3666
3667         param->ShmemLock = ShmemLock;
3668         param->ShmemIndexLock = ShmemIndexLock;
3669         param->ShmemVariableCache = ShmemVariableCache;
3670         param->ShmemIndexAlloc = ShmemIndexAlloc;
3671         param->ShmemBackendArray = ShmemBackendArray;
3672
3673         param->LWLockArray = LWLockArray;
3674         param->ProcStructLock = ProcStructLock;
3675         write_inheritable_socket(&param->pgStatSock, pgStatSock, childPid);
3676         write_inheritable_socket(&param->pgStatPipe0, pgStatPipe[0], childPid);
3677         write_inheritable_socket(&param->pgStatPipe1, pgStatPipe[1], childPid);
3678
3679         param->PostmasterPid = PostmasterPid;
3680
3681 #ifdef WIN32
3682         param->PostmasterHandle = PostmasterHandle;
3683         write_duplicated_handle(&param->initial_signal_pipe,
3684                                                         pgwin32_create_signal_listener(childPid),
3685                                                         childProcess);
3686 #endif
3687
3688         memcpy(&param->syslogPipe, &syslogPipe, sizeof(syslogPipe));
3689
3690         StrNCpy(param->my_exec_path, my_exec_path, MAXPGPATH);
3691
3692         StrNCpy(param->ExtraOptions, ExtraOptions, MAXPGPATH);
3693
3694         StrNCpy(param->lc_collate, setlocale(LC_COLLATE, NULL), LOCALE_NAME_BUFLEN);
3695         StrNCpy(param->lc_ctype, setlocale(LC_CTYPE, NULL), LOCALE_NAME_BUFLEN);
3696
3697         return true;
3698 }
3699
3700
3701 #ifdef WIN32
3702 /*
3703  * Duplicate a handle for usage in a child process, and write the child
3704  * process instance of the handle to the parameter file.
3705  */
3706 static void
3707 write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE childProcess)
3708 {
3709         HANDLE hChild = INVALID_HANDLE_VALUE;
3710
3711         if (!DuplicateHandle(GetCurrentProcess(),
3712                                                  src,
3713                                                  childProcess,
3714                                                  &hChild,
3715                                                  0,
3716                                                  TRUE,
3717                                                  DUPLICATE_CLOSE_SOURCE | DUPLICATE_SAME_ACCESS))
3718                 ereport(ERROR,
3719                                 (errmsg_internal("could not duplicate handle to be written to backend parameter file: error code %d",
3720                                                                  (int) GetLastError())));
3721
3722         *dest = hChild;
3723 }
3724
3725 /*
3726  * Duplicate a socket for usage in a child process, and write the resulting
3727  * structure to the parameter file.
3728  * This is required because a number of LSPs (Layered Service Providers) very
3729  * common on Windows (antivirus, firewalls, download managers etc) break
3730  * straight socket inheritance.
3731  */
3732 static void
3733 write_inheritable_socket(InheritableSocket *dest, SOCKET src, pid_t childpid)
3734 {
3735         dest->origsocket = src;
3736         if (src != 0 && src != -1)
3737         {
3738                 /* Actual socket */
3739                 if (WSADuplicateSocket(src, childpid, &dest->wsainfo) != 0)
3740                         ereport(ERROR,
3741                                         (errmsg("could not duplicate socket %d for use in backend: error code %d",
3742                                                         src, WSAGetLastError())));
3743         }
3744 }
3745
3746 /*
3747  * Read a duplicate socket structure back, and get the socket descriptor.
3748  */
3749 static void
3750 read_inheritable_socket(SOCKET *dest, InheritableSocket *src)
3751 {
3752         SOCKET s;
3753
3754         if (src->origsocket == -1  || src->origsocket == 0)
3755         {
3756                 /* Not a real socket! */
3757                 *dest = src->origsocket;
3758         }
3759         else
3760         {
3761                 /* Actual socket, so create from structure */
3762                 s = WSASocket(FROM_PROTOCOL_INFO,
3763                                           FROM_PROTOCOL_INFO,
3764                                           FROM_PROTOCOL_INFO,
3765                                           &src->wsainfo,
3766                                           0,
3767                                           0);
3768                 if (s == INVALID_SOCKET)
3769                 {
3770                         write_stderr("could not create inherited socket: error code %d\n",
3771                                                  WSAGetLastError());
3772                         exit(1);
3773                 }
3774                 *dest = s;
3775
3776                 /*
3777                  * To make sure we don't get two references to the same socket,
3778                  * close the original one. (This would happen when inheritance
3779                  * actually works..
3780                  */
3781                 closesocket(src->origsocket);
3782         }
3783 }
3784 #endif
3785
3786 static void
3787 read_backend_variables(char *id, Port *port)
3788 {
3789         BackendParameters param;
3790
3791 #ifndef WIN32
3792         /* Non-win32 implementation reads from file */
3793         FILE *fp;
3794
3795         /* Open file */
3796         fp = AllocateFile(id, PG_BINARY_R);
3797         if (!fp)
3798         {
3799                 write_stderr("could not read from backend variables file \"%s\": %s\n",
3800                                          id, strerror(errno));
3801                 exit(1);
3802         }
3803
3804         if (fread(&param, sizeof(param), 1, fp) != 1)
3805         {
3806                 write_stderr("could not read from backend variables file \"%s\": %s\n",
3807                                          id, strerror(errno));
3808                 exit(1);
3809         }
3810
3811         /* Release file */
3812         FreeFile(fp);
3813         if (unlink(id) != 0)
3814         {
3815                 write_stderr("could not remove file \"%s\": %s\n",
3816                                          id, strerror(errno));
3817                 exit(1);
3818         }
3819 #else
3820         /* Win32 version uses mapped file */
3821         HANDLE paramHandle;
3822         BackendParameters *paramp;
3823
3824         paramHandle = (HANDLE)atol(id);
3825         paramp = MapViewOfFile(paramHandle, FILE_MAP_READ, 0, 0, 0);
3826         if (!paramp)
3827         {
3828                 write_stderr("could not map view of backend variables: error code %d\n",
3829                                          (int) GetLastError());
3830                 exit(1);
3831         }
3832
3833         memcpy(&param, paramp, sizeof(BackendParameters));
3834
3835         if (!UnmapViewOfFile(paramp))
3836         {
3837                 write_stderr("could not unmap view of backend variables: error code %d\n",
3838                                          (int) GetLastError());
3839                 exit(1);
3840         }
3841
3842         if (!CloseHandle(paramHandle))
3843         {
3844                 write_stderr("could not close handle to backend parameter variables: error code %d\n",
3845                                          (int) GetLastError());
3846                 exit(1);
3847         }
3848 #endif
3849
3850         restore_backend_variables(&param, port);
3851 }
3852
3853 /* Restore critical backend variables from the BackendParameters struct */
3854 static void
3855 restore_backend_variables(BackendParameters *param, Port *port)
3856 {
3857         memcpy(port, &param->port, sizeof(Port));
3858         read_inheritable_socket(&port->sock, &param->portsocket);
3859
3860         SetDataDir(param->DataDir);
3861
3862         memcpy(&ListenSocket, &param->ListenSocket, sizeof(ListenSocket));
3863
3864         MyCancelKey = param->MyCancelKey;
3865
3866         UsedShmemSegID = param->UsedShmemSegID;
3867         UsedShmemSegAddr = param->UsedShmemSegAddr;
3868
3869         ShmemLock = param->ShmemLock;
3870         ShmemIndexLock = param->ShmemIndexLock;
3871         ShmemVariableCache = param->ShmemVariableCache;
3872         ShmemIndexAlloc = param->ShmemIndexAlloc;
3873         ShmemBackendArray = param->ShmemBackendArray;
3874
3875         LWLockArray = param->LWLockArray;
3876         ProcStructLock = param->ProcStructLock;
3877         read_inheritable_socket(&pgStatSock, &param->pgStatSock);
3878         read_inheritable_socket(&pgStatPipe[0], &param->pgStatPipe0);
3879         read_inheritable_socket(&pgStatPipe[1], &param->pgStatPipe1);
3880
3881         PostmasterPid = param->PostmasterPid;
3882
3883 #ifdef WIN32
3884         PostmasterHandle = param->PostmasterHandle;
3885         pgwin32_initial_signal_pipe = param->initial_signal_pipe;
3886 #endif
3887
3888         memcpy(&syslogPipe, &param->syslogPipe, sizeof(syslogPipe));
3889
3890         StrNCpy(my_exec_path, param->my_exec_path, MAXPGPATH);
3891
3892         StrNCpy(ExtraOptions, param->ExtraOptions, MAXPGPATH);
3893
3894         setlocale(LC_COLLATE, param->lc_collate);
3895         setlocale(LC_CTYPE, param->lc_ctype);
3896 }
3897
3898
3899 size_t
3900 ShmemBackendArraySize(void)
3901 {
3902         return (NUM_BACKENDARRAY_ELEMS * sizeof(Backend));
3903 }
3904
3905 void
3906 ShmemBackendArrayAllocation(void)
3907 {
3908         size_t          size = ShmemBackendArraySize();
3909
3910         ShmemBackendArray = (Backend *) ShmemAlloc(size);
3911         /* Mark all slots as empty */
3912         memset(ShmemBackendArray, 0, size);
3913 }
3914
3915 static void
3916 ShmemBackendArrayAdd(Backend *bn)
3917 {
3918         int                     i;
3919
3920         /* Find an empty slot */
3921         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3922         {
3923                 if (ShmemBackendArray[i].pid == 0)
3924                 {
3925                         ShmemBackendArray[i] = *bn;
3926                         return;
3927                 }
3928         }
3929
3930         ereport(FATAL,
3931                         (errmsg_internal("no free slots in shmem backend array")));
3932 }
3933
3934 static void
3935 ShmemBackendArrayRemove(pid_t pid)
3936 {
3937         int                     i;
3938
3939         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3940         {
3941                 if (ShmemBackendArray[i].pid == pid)
3942                 {
3943                         /* Mark the slot as empty */
3944                         ShmemBackendArray[i].pid = 0;
3945                         return;
3946                 }
3947         }
3948
3949         ereport(WARNING,
3950                         (errmsg_internal("could not find backend entry with pid %d",
3951                                                          (int) pid)));
3952 }
3953
3954 #endif   /* EXEC_BACKEND */
3955
3956
3957 #ifdef WIN32
3958
3959 /*
3960  * Note: The following three functions must not be interrupted (eg. by
3961  * signals).  As the Postgres Win32 signalling architecture (currently)
3962  * requires polling, or APC checking functions which aren't used here, this
3963  * is not an issue.
3964  *
3965  * We keep two separate arrays, instead of a single array of pid/HANDLE
3966  * structs, to avoid having to re-create a handle array for
3967  * WaitForMultipleObjects on each call to win32_waitpid.
3968  */
3969
3970 static void
3971 win32_AddChild(pid_t pid, HANDLE handle)
3972 {
3973         Assert(win32_childPIDArray && win32_childHNDArray);
3974         if (win32_numChildren < NUM_BACKENDARRAY_ELEMS)
3975         {
3976                 win32_childPIDArray[win32_numChildren] = pid;
3977                 win32_childHNDArray[win32_numChildren] = handle;
3978                 ++win32_numChildren;
3979         }
3980         else
3981                 ereport(FATAL,
3982                                 (errmsg_internal("no room for child entry with pid %lu",
3983                                                                  (unsigned long) pid)));
3984 }
3985
3986 static void
3987 win32_RemoveChild(pid_t pid)
3988 {
3989         int                     i;
3990
3991         Assert(win32_childPIDArray && win32_childHNDArray);
3992
3993         for (i = 0; i < win32_numChildren; i++)
3994         {
3995                 if (win32_childPIDArray[i] == pid)
3996                 {
3997                         CloseHandle(win32_childHNDArray[i]);
3998
3999                         /* Swap last entry into the "removed" one */
4000                         --win32_numChildren;
4001                         win32_childPIDArray[i] = win32_childPIDArray[win32_numChildren];
4002                         win32_childHNDArray[i] = win32_childHNDArray[win32_numChildren];
4003                         return;
4004                 }
4005         }
4006
4007         ereport(WARNING,
4008                         (errmsg_internal("could not find child entry with pid %lu",
4009                                                          (unsigned long) pid)));
4010 }
4011
4012 static pid_t
4013 win32_waitpid(int *exitstatus)
4014 {
4015         /*
4016          * Note: Do NOT use WaitForMultipleObjectsEx, as we don't want to run
4017          * queued APCs here.
4018          */
4019         int                     index;
4020         DWORD           exitCode;
4021         DWORD           ret;
4022         unsigned long offset;
4023
4024         Assert(win32_childPIDArray && win32_childHNDArray);
4025         elog(DEBUG3, "waiting on %lu children", win32_numChildren);
4026
4027         for (offset = 0; offset < win32_numChildren; offset += MAXIMUM_WAIT_OBJECTS)
4028         {
4029                 unsigned long num = Min(MAXIMUM_WAIT_OBJECTS, win32_numChildren - offset);
4030
4031                 ret = WaitForMultipleObjects(num, &win32_childHNDArray[offset], FALSE, 0);
4032                 switch (ret)
4033                 {
4034                         case WAIT_FAILED:
4035                                 ereport(LOG,
4036                                                 (errmsg_internal("failed to wait on %lu of %lu children: error code %d",
4037                                                  num, win32_numChildren, (int) GetLastError())));
4038                                 return -1;
4039
4040                         case WAIT_TIMEOUT:
4041                                 /* No children (in this chunk) have finished */
4042                                 break;
4043
4044                         default:
4045
4046                                 /*
4047                                  * Get the exit code, and return the PID of, the
4048                                  * respective process
4049                                  */
4050                                 index = offset + ret - WAIT_OBJECT_0;
4051                                 Assert(index >= 0 && index < win32_numChildren);
4052                                 if (!GetExitCodeProcess(win32_childHNDArray[index], &exitCode))
4053                                 {
4054                                         /*
4055                                          * If we get this far, this should never happen, but,
4056                                          * then again... No choice other than to assume a
4057                                          * catastrophic failure.
4058                                          */
4059                                         ereport(FATAL,
4060                                                         (errmsg_internal("failed to get exit code for child %lu",
4061                                                                                          (unsigned long) win32_childPIDArray[index])));
4062                                 }
4063                                 *exitstatus = (int) exitCode;
4064                                 return win32_childPIDArray[index];
4065                 }
4066         }
4067
4068         /* No children have finished */
4069         return -1;
4070 }
4071
4072 /*
4073  * Note! Code below executes on separate threads, one for
4074  * each child process created
4075  */
4076 static DWORD WINAPI
4077 win32_sigchld_waiter(LPVOID param)
4078 {
4079         HANDLE          procHandle = (HANDLE) param;
4080
4081         DWORD           r = WaitForSingleObject(procHandle, INFINITE);
4082
4083         if (r == WAIT_OBJECT_0)
4084                 pg_queue_signal(SIGCHLD);
4085         else
4086                 write_stderr("could not wait on child process handle: error code %d\n",
4087                                          (int) GetLastError());
4088         CloseHandle(procHandle);
4089         return 0;
4090 }
4091
4092 #endif   /* WIN32 */