From 72a3902a664c7fbceb2034e28e444b28f96fa717 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 5 May 2002 00:03:29 +0000 Subject: [PATCH] Create an internal semaphore API that is not tied to SysV semaphores. As proof of concept, provide an alternate implementation based on POSIX semaphores. Also push the SysV shared-memory implementation into a separate file so that it can be replaced conveniently. --- configure | 40 +- configure.in | 24 +- src/backend/Makefile | 4 +- src/backend/bootstrap/bootstrap.c | 3 +- src/backend/catalog/namespace.c | 3 +- src/backend/commands/async.c | 3 +- src/backend/libpq/auth.c | 5 +- src/backend/libpq/pqcomm.c | 3 +- src/backend/port/Makefile | 4 +- src/backend/port/posix_sema.c | 357 ++++++++++++++++ src/backend/port/sysv_sema.c | 522 +++++++++++++++++++++++ src/backend/port/sysv_shmem.c | 400 ++++++++++++++++++ src/backend/postmaster/pgstat.c | 3 +- src/backend/postmaster/postmaster.c | 17 +- src/backend/storage/file/fd.c | 3 +- src/backend/storage/ipc/ipc.c | 812 +----------------------------------- src/backend/storage/ipc/ipci.c | 18 +- src/backend/storage/ipc/shmem.c | 3 +- src/backend/storage/ipc/sinvaladt.c | 3 +- src/backend/storage/lmgr/lwlock.c | 8 +- src/backend/storage/lmgr/proc.c | 296 +++---------- src/backend/storage/lmgr/spin.c | 143 ++----- src/backend/tcop/postgres.c | 7 +- src/backend/utils/error/elog.c | 3 +- src/backend/utils/init/miscinit.c | 26 +- src/backend/utils/init/postinit.c | 5 +- src/include/miscadmin.h | 9 +- src/include/pg_config.h.in | 11 +- src/include/storage/ipc.h | 71 +--- src/include/storage/pg_sema.h | 76 ++++ src/include/storage/pg_shmem.h | 44 ++ src/include/storage/proc.h | 49 +-- src/include/storage/s_lock.h | 15 +- src/include/storage/spin.h | 4 +- src/pl/plperl/SPI.xs | 16 - src/pl/plperl/plperl.c | 18 +- src/template/darwin | 3 + 37 files changed, 1660 insertions(+), 1371 deletions(-) create mode 100644 src/backend/port/posix_sema.c create mode 100644 src/backend/port/sysv_sema.c create mode 100644 src/backend/port/sysv_shmem.c create mode 100644 src/include/storage/pg_sema.h create mode 100644 src/include/storage/pg_shmem.h diff --git a/configure b/configure index bf8518d5c4..9389f7c8a5 100755 --- a/configure +++ b/configure @@ -15664,6 +15664,42 @@ HAVE_POSIX_SIGNALS=$pgac_cv_func_posix_signals +# Select semaphore implementation type. +if test x"$USE_NAMED_POSIX_SEMAPHORES" = x"1" ; then + +cat >>confdefs.h <<\_ACEOF +#define USE_NAMED_POSIX_SEMAPHORES 1 +_ACEOF + + SEMA_IMPLEMENTATION="src/backend/port/posix_sema.c" +else + if test x"$USE_UNNAMED_POSIX_SEMAPHORES" = x"1" ; then + +cat >>confdefs.h <<\_ACEOF +#define USE_UNNAMED_POSIX_SEMAPHORES 1 +_ACEOF + + SEMA_IMPLEMENTATION="src/backend/port/posix_sema.c" + else + +cat >>confdefs.h <<\_ACEOF +#define USE_SYSV_SEMAPHORES 1 +_ACEOF + + SEMA_IMPLEMENTATION="src/backend/port/sysv_sema.c" + fi +fi + + +# Select shared-memory implementation type. + +cat >>confdefs.h <<\_ACEOF +#define USE_SYSV_SHARED_MEMORY 1 +_ACEOF + +SHMEM_IMPLEMENTATION="src/backend/port/sysv_shmem.c" + + if test "$enable_nls" = yes ; then echo "$as_me:$LINENO: checking for library containing gettext" >&5 @@ -16724,7 +16760,7 @@ fi ac_config_files="$ac_config_files GNUmakefile src/Makefile.global" -ac_config_links="$ac_config_links src/backend/port/dynloader.c:src/backend/port/dynloader/${template}.c src/include/dynloader.h:src/backend/port/dynloader/${template}.h src/include/pg_config_os.h:src/include/port/${template}.h src/Makefile.port:src/makefiles/Makefile.${template}" +ac_config_links="$ac_config_links src/backend/port/dynloader.c:src/backend/port/dynloader/${template}.c src/backend/port/pg_sema.c:${SEMA_IMPLEMENTATION} src/backend/port/pg_shmem.c:${SHMEM_IMPLEMENTATION} src/include/dynloader.h:src/backend/port/dynloader/${template}.h src/include/pg_config_os.h:src/include/port/${template}.h src/Makefile.port:src/makefiles/Makefile.${template}" ac_config_headers="$ac_config_headers src/include/pg_config.h" @@ -17207,6 +17243,8 @@ do "src/Makefile.global" ) CONFIG_FILES="$CONFIG_FILES src/Makefile.global" ;; "src/backend/port/tas.s" ) CONFIG_LINKS="$CONFIG_LINKS src/backend/port/tas.s:src/backend/port/tas/${tas_file}" ;; "src/backend/port/dynloader.c" ) CONFIG_LINKS="$CONFIG_LINKS src/backend/port/dynloader.c:src/backend/port/dynloader/${template}.c" ;; + "src/backend/port/pg_sema.c" ) CONFIG_LINKS="$CONFIG_LINKS src/backend/port/pg_sema.c:${SEMA_IMPLEMENTATION}" ;; + "src/backend/port/pg_shmem.c" ) CONFIG_LINKS="$CONFIG_LINKS src/backend/port/pg_shmem.c:${SHMEM_IMPLEMENTATION}" ;; "src/include/dynloader.h" ) CONFIG_LINKS="$CONFIG_LINKS src/include/dynloader.h:src/backend/port/dynloader/${template}.h" ;; "src/include/pg_config_os.h" ) CONFIG_LINKS="$CONFIG_LINKS src/include/pg_config_os.h:src/include/port/${template}.h" ;; "src/Makefile.port" ) CONFIG_LINKS="$CONFIG_LINKS src/Makefile.port:src/makefiles/Makefile.${template}" ;; diff --git a/configure.in b/configure.in index e0b4ca4fc6..342bf851e7 100644 --- a/configure.in +++ b/configure.in @@ -1,5 +1,5 @@ dnl Process this file with autoconf to produce a configure script. -dnl $Header: /cvsroot/pgsql/configure.in,v 1.183 2002/04/26 19:47:35 tgl Exp $ +dnl $Header: /cvsroot/pgsql/configure.in,v 1.184 2002/05/05 00:03:28 tgl Exp $ dnl Developers, please strive to achieve this order: dnl @@ -1168,6 +1168,26 @@ AC_CHECK_TYPES([sig_atomic_t], [], [], [#include ]) PGAC_FUNC_POSIX_SIGNALS +# Select semaphore implementation type. +if test x"$USE_NAMED_POSIX_SEMAPHORES" = x"1" ; then + AC_DEFINE(USE_NAMED_POSIX_SEMAPHORES, 1, [Define to select named POSIX semaphores]) + SEMA_IMPLEMENTATION="src/backend/port/posix_sema.c" +else + if test x"$USE_UNNAMED_POSIX_SEMAPHORES" = x"1" ; then + AC_DEFINE(USE_UNNAMED_POSIX_SEMAPHORES, 1, [Define to select unnamed POSIX semaphores]) + SEMA_IMPLEMENTATION="src/backend/port/posix_sema.c" + else + AC_DEFINE(USE_SYSV_SEMAPHORES, 1, [Define to select SysV-style semaphores]) + SEMA_IMPLEMENTATION="src/backend/port/sysv_sema.c" + fi +fi + + +# Select shared-memory implementation type. +AC_DEFINE(USE_SYSV_SHARED_MEMORY, 1, [Define to select SysV-style shared memory]) +SHMEM_IMPLEMENTATION="src/backend/port/sysv_shmem.c" + + if test "$enable_nls" = yes ; then PGAC_CHECK_GETTEXT fi @@ -1222,6 +1242,8 @@ AC_CONFIG_FILES([GNUmakefile src/Makefile.global]) AC_CONFIG_LINKS([ src/backend/port/dynloader.c:src/backend/port/dynloader/${template}.c + src/backend/port/pg_sema.c:${SEMA_IMPLEMENTATION} + src/backend/port/pg_shmem.c:${SHMEM_IMPLEMENTATION} src/include/dynloader.h:src/backend/port/dynloader/${template}.h src/include/pg_config_os.h:src/include/port/${template}.h src/Makefile.port:src/makefiles/Makefile.${template} diff --git a/src/backend/Makefile b/src/backend/Makefile index b12d828c7f..6e7512ee94 100644 --- a/src/backend/Makefile +++ b/src/backend/Makefile @@ -4,7 +4,7 @@ # # Copyright (c) 1994, Regents of the University of California # -# $Header: /cvsroot/pgsql/src/backend/Makefile,v 1.77 2002/03/13 00:05:05 petere Exp $ +# $Header: /cvsroot/pgsql/src/backend/Makefile,v 1.78 2002/05/05 00:03:28 tgl Exp $ # #------------------------------------------------------------------------- @@ -194,7 +194,7 @@ ifeq ($(enable_nls), yes) endif distclean: clean - rm -f port/tas.s port/dynloader.c + rm -f port/tas.s port/dynloader.c port/pg_sema.c port/pg_shmem.c maintainer-clean: distclean rm -f $(srcdir)/bootstrap/bootparse.c \ diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 6028b2db48..51f432b1a0 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.127 2002/04/27 21:24:33 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.128 2002/05/05 00:03:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,6 +33,7 @@ #include "catalog/pg_type.h" #include "libpq/pqsignal.h" #include "miscadmin.h" +#include "storage/ipc.h" #include "storage/proc.h" #include "tcop/tcopprot.h" #include "utils/builtins.h" diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index 5c521da2a4..9e213edeae 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -13,7 +13,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/catalog/namespace.c,v 1.17 2002/05/01 23:06:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/catalog/namespace.c,v 1.18 2002/05/05 00:03:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -36,6 +36,7 @@ #include "miscadmin.h" #include "nodes/makefuncs.h" #include "storage/backendid.h" +#include "storage/ipc.h" #include "utils/acl.h" #include "utils/builtins.h" #include "utils/catcache.h" diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index cbfe37f027..8193174aec 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/async.c,v 1.83 2002/03/06 06:09:29 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/async.c,v 1.84 2002/05/05 00:03:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -85,6 +85,7 @@ #include "libpq/libpq.h" #include "libpq/pqformat.h" #include "miscadmin.h" +#include "storage/ipc.h" #include "tcop/tcopprot.h" #include "utils/fmgroids.h" #include "utils/ps_status.h" diff --git a/src/backend/libpq/auth.c b/src/backend/libpq/auth.c index 81a494905e..d29d034e84 100644 --- a/src/backend/libpq/auth.c +++ b/src/backend/libpq/auth.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/libpq/auth.c,v 1.80 2002/04/04 04:25:47 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/libpq/auth.c,v 1.81 2002/05/05 00:03:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,6 +25,7 @@ #endif #include #include + #include "libpq/auth.h" #include "libpq/crypt.h" #include "libpq/hba.h" @@ -32,6 +33,8 @@ #include "libpq/password.h" #include "libpq/pqformat.h" #include "miscadmin.h" +#include "storage/ipc.h" + static void sendAuthRequest(Port *port, AuthRequest areq); static int old_be_recvauth(Port *port); diff --git a/src/backend/libpq/pqcomm.c b/src/backend/libpq/pqcomm.c index f0db43239c..a82771dcf3 100644 --- a/src/backend/libpq/pqcomm.c +++ b/src/backend/libpq/pqcomm.c @@ -29,7 +29,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pqcomm.c,v 1.132 2002/04/21 01:03:33 tgl Exp $ + * $Id: pqcomm.c,v 1.133 2002/05/05 00:03:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -79,6 +79,7 @@ #include "libpq/libpq.h" #include "miscadmin.h" +#include "storage/ipc.h" static void pq_close(void); diff --git a/src/backend/port/Makefile b/src/backend/port/Makefile index 93823b44cd..1370cdbb78 100644 --- a/src/backend/port/Makefile +++ b/src/backend/port/Makefile @@ -13,7 +13,7 @@ # be converted to Method 2. # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/port/Makefile,v 1.11 2002/03/13 00:05:06 petere Exp $ +# $Header: /cvsroot/pgsql/src/backend/port/Makefile,v 1.12 2002/05/05 00:03:28 tgl Exp $ # #------------------------------------------------------------------------- @@ -21,7 +21,7 @@ subdir = src/backend/port top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -OBJS = dynloader.o +OBJS = dynloader.o pg_sema.o pg_shmem.o OBJS += $(GETHOSTNAME) $(GETRUSAGE) $(INET_ATON) $(ISINF) $(MEMCMP) \ $(MISSING_RANDOM) $(SNPRINTF) $(SRANDOM) $(STRCASECMP) $(STRERROR) \ diff --git a/src/backend/port/posix_sema.c b/src/backend/port/posix_sema.c new file mode 100644 index 0000000000..1dd02f8def --- /dev/null +++ b/src/backend/port/posix_sema.c @@ -0,0 +1,357 @@ +/*------------------------------------------------------------------------- + * + * posix_sema.c + * Implement PGSemaphores using POSIX semaphore facilities + * + * We prefer the unnamed style of POSIX semaphore (the kind made with + * sem_init). We can cope with the kind made with sem_open, however. + * + * + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/port/posix_sema.c,v 1.1 2002/05/05 00:03:28 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include +#include + +#include "storage/pg_sema.h" + + +#ifdef USE_NAMED_POSIX_SEMAPHORES +/* PGSemaphore is pointer to pointer to sem_t */ +#define PG_SEM_REF(x) (*(x)) +#else +/* PGSemaphore is pointer to sem_t */ +#define PG_SEM_REF(x) (x) +#endif + + +#define IPCProtection (0600) /* access/modify by user only */ + +static sem_t **mySemPointers; /* keep track of created semaphores */ +static int numSems; /* number of semas acquired so far */ +static int maxSems; /* allocated size of mySemaPointers array */ +static int nextSemKey; /* next name to try */ + + +static void ReleaseSemaphores(int status, Datum arg); + + +#ifdef USE_NAMED_POSIX_SEMAPHORES + +/* + * PosixSemaphoreCreate + * + * Attempt to create a new named semaphore. + * + * If we fail with a failure code other than collision-with-existing-sema, + * print out an error and abort. Other types of errors suggest nonrecoverable + * problems. + */ +static sem_t * +PosixSemaphoreCreate(void) +{ + int semKey; + char semname[64]; + sem_t *mySem; + + for (;;) + { + semKey = nextSemKey++; + + snprintf(semname, sizeof(semname), "/pgsql-%d", semKey); + + mySem = sem_open(semname, O_CREAT | O_EXCL, + (mode_t) IPCProtection, (unsigned) 1); + if (mySem != SEM_FAILED) + break; + + /* Loop if error indicates a collision */ + if (errno == EEXIST || errno == EACCES || errno == EINTR) + continue; + + /* + * Else complain and abort + */ + fprintf(stderr, "PosixSemaphoreCreate: sem_open(%s) failed: %s\n", + semname, strerror(errno)); + proc_exit(1); + } + + /* + * Unlink the semaphore immediately, so it can't be accessed externally. + * This also ensures that it will go away if we crash. + */ + sem_unlink(semname); + + return mySem; +} + +#else /* !USE_NAMED_POSIX_SEMAPHORES */ + +/* + * PosixSemaphoreCreate + * + * Attempt to create a new unnamed semaphore. + */ +static void +PosixSemaphoreCreate(sem_t *sem) +{ + if (sem_init(sem, 1, 1) < 0) + { + fprintf(stderr, "PosixSemaphoreCreate: sem_init failed: %s\n", + strerror(errno)); + proc_exit(1); + } +} + +#endif /* USE_NAMED_POSIX_SEMAPHORES */ + + +/* + * PosixSemaphoreKill - removes a semaphore + */ +static void +PosixSemaphoreKill(sem_t *sem) +{ +#ifdef USE_NAMED_POSIX_SEMAPHORES + /* Got to use sem_close for named semaphores */ + if (sem_close(sem) < 0) + fprintf(stderr, "PosixSemaphoreKill: sem_close failed: %s\n", + strerror(errno)); +#else + /* Got to use sem_destroy for unnamed semaphores */ + if (sem_destroy(sem) < 0) + fprintf(stderr, "PosixSemaphoreKill: sem_destroy failed: %s\n", + strerror(errno)); +#endif +} + + +/* + * PGReserveSemaphores --- initialize semaphore support + * + * This is called during postmaster start or shared memory reinitialization. + * It should do whatever is needed to be able to support up to maxSemas + * subsequent PGSemaphoreCreate calls. Also, if any system resources + * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit + * callback to release them. + * + * The port number is passed for possible use as a key (for Posix, we use + * it to generate the starting semaphore name). In a standalone backend, + * zero will be passed. + * + * In the Posix implementation, we acquire semaphores on-demand; the + * maxSemas parameter is just used to size the array that keeps track of + * acquired semas for subsequent releasing. + */ +void +PGReserveSemaphores(int maxSemas, int port) +{ + mySemPointers = (sem_t **) malloc(maxSemas * sizeof(sem_t *)); + if (mySemPointers == NULL) + elog(PANIC, "Out of memory in PGReserveSemaphores"); + numSems = 0; + maxSems = maxSemas; + nextSemKey = port * 1000; + + on_shmem_exit(ReleaseSemaphores, 0); +} + +/* + * Release semaphores at shutdown or shmem reinitialization + * + * (called as an on_shmem_exit callback, hence funny argument list) + */ +static void +ReleaseSemaphores(int status, Datum arg) +{ + int i; + + for (i = 0; i < numSems; i++) + PosixSemaphoreKill(mySemPointers[i]); + free(mySemPointers); +} + +/* + * PGSemaphoreCreate + * + * Initialize a PGSemaphore structure to represent a sema with count 1 + */ +void +PGSemaphoreCreate(PGSemaphore sema) +{ + sem_t *newsem; + + /* Can't do this in a backend, because static state is postmaster's */ + Assert(!IsUnderPostmaster); + + if (numSems >= maxSems) + elog(PANIC, "PGSemaphoreCreate: too many semaphores created"); + +#ifdef USE_NAMED_POSIX_SEMAPHORES + *sema = newsem = PosixSemaphoreCreate(); +#else + PosixSemaphoreCreate(sema); + newsem = sema; +#endif + + /* Remember new sema for ReleaseSemaphores */ + mySemPointers[numSems++] = newsem; +} + +/* + * PGSemaphoreReset + * + * Reset a previously-initialized PGSemaphore to have count 0 + */ +void +PGSemaphoreReset(PGSemaphore sema) +{ + /* + * There's no direct API for this in POSIX, so we have to ratchet the + * semaphore down to 0 with repeated trywait's. + */ + for (;;) + { + if (sem_trywait(PG_SEM_REF(sema)) < 0) + { + if (errno == EAGAIN || errno == EDEADLK) + break; /* got it down to 0 */ + if (errno == EINTR) + continue; /* can this happen? */ + fprintf(stderr, "PGSemaphoreReset: sem_trywait failed: %s\n", + strerror(errno)); + proc_exit(1); + } + } +} + +/* + * PGSemaphoreLock + * + * Lock a semaphore (decrement count), blocking if count would be < 0 + */ +void +PGSemaphoreLock(PGSemaphore sema, bool interruptOK) +{ + int errStatus; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were sent a + * signal. So we try and lock the semaphore again. + * + * Each time around the loop, we check for a cancel/die interrupt. We + * assume that if such an interrupt comes in while we are waiting, it + * will cause the sem_wait() call to exit with errno == EINTR, so that we + * will be able to service the interrupt (if not in a critical section + * already). + * + * Once we acquire the lock, we do NOT check for an interrupt before + * returning. The caller needs to be able to record ownership of the + * lock before any interrupt can be accepted. + * + * There is a window of a few instructions between CHECK_FOR_INTERRUPTS + * and entering the sem_wait() call. If a cancel/die interrupt occurs in + * that window, we would fail to notice it until after we acquire the + * lock (or get another interrupt to escape the sem_wait()). We can + * avoid this problem by temporarily setting ImmediateInterruptOK to + * true before we do CHECK_FOR_INTERRUPTS; then, a die() interrupt in + * this interval will execute directly. However, there is a huge + * pitfall: there is another window of a few instructions after the + * sem_wait() before we are able to reset ImmediateInterruptOK. If an + * interrupt occurs then, we'll lose control, which means that the + * lock has been acquired but our caller did not get a chance to + * record the fact. Therefore, we only set ImmediateInterruptOK if the + * caller tells us it's OK to do so, ie, the caller does not need to + * record acquiring the lock. (This is currently true for lockmanager + * locks, since the process that granted us the lock did all the + * necessary state updates. It's not true for Posix semaphores used to + * implement LW locks or emulate spinlocks --- but the wait time for + * such locks should not be very long, anyway.) + */ + do + { + ImmediateInterruptOK = interruptOK; + CHECK_FOR_INTERRUPTS(); + errStatus = sem_wait(PG_SEM_REF(sema)); + ImmediateInterruptOK = false; + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + { + fprintf(stderr, "PGSemaphoreLock: sem_wait failed: %s\n", + strerror(errno)); + proc_exit(255); + } +} + +/* + * PGSemaphoreUnlock + * + * Unlock a semaphore (increment count) + */ +void +PGSemaphoreUnlock(PGSemaphore sema) +{ + int errStatus; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were sent a + * signal. So we try and unlock the semaphore again. Not clear this + * can really happen, but might as well cope. + */ + do + { + errStatus = sem_post(PG_SEM_REF(sema)); + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + { + fprintf(stderr, "PGSemaphoreUnlock: sem_post failed: %s\n", + strerror(errno)); + proc_exit(255); + } +} + +/* + * PGSemaphoreTryLock + * + * Lock a semaphore only if able to do so without blocking + */ +bool +PGSemaphoreTryLock(PGSemaphore sema) +{ + int errStatus; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were sent a + * signal. So we try and lock the semaphore again. + */ + do + { + errStatus = sem_trywait(PG_SEM_REF(sema)); + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + { + if (errno == EAGAIN || errno == EDEADLK) + return false; /* failed to lock it */ + /* Otherwise we got trouble */ + fprintf(stderr, "PGSemaphoreTryLock: sem_trywait failed: %s\n", + strerror(errno)); + proc_exit(255); + } + + return true; +} diff --git a/src/backend/port/sysv_sema.c b/src/backend/port/sysv_sema.c new file mode 100644 index 0000000000..d868602de2 --- /dev/null +++ b/src/backend/port/sysv_sema.c @@ -0,0 +1,522 @@ +/*------------------------------------------------------------------------- + * + * sysv_sema.c + * Implement PGSemaphores using SysV semaphore facilities + * + * + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/port/sysv_sema.c,v 1.1 2002/05/05 00:03:28 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include +#include +#include +#include +#ifdef HAVE_SYS_IPC_H +#include +#endif +#ifdef HAVE_SYS_SEM_H +#include +#endif +#ifdef HAVE_KERNEL_OS_H +#include +#endif + +#include "miscadmin.h" +#include "storage/ipc.h" +#include "storage/pg_sema.h" + + +#ifndef HAVE_UNION_SEMUN +union semun +{ + int val; + struct semid_ds *buf; + unsigned short *array; +}; +#endif + +typedef uint32 IpcSemaphoreKey; /* semaphore key passed to semget(2) */ +typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */ + +/* + * SEMAS_PER_SET is the number of useful semaphores in each semaphore set + * we allocate. It must be *less than* your kernel's SEMMSL (max semaphores + * per set) parameter, which is often around 25. (Less than, because we + * allocate one extra sema in each set for identification purposes.) + */ +#define SEMAS_PER_SET 16 + +#define IPCProtection (0600) /* access/modify by user only */ + +#define PGSemaMagic 537 /* must be less than SEMVMX */ + + +static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so far */ +static int numSemaSets; /* number of sema sets acquired so far */ +static int maxSemaSets; /* allocated size of mySemaSets array */ +static IpcSemaphoreKey nextSemaKey; /* next key to try using */ +static int nextSemaNumber; /* next free sem num in last sema set */ + + +static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, + int numSems); +static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, + int value); +static void IpcSemaphoreKill(IpcSemaphoreId semId); +static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum); +static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum); +static IpcSemaphoreId IpcSemaphoreCreate(int numSems); +static void ReleaseSemaphores(int status, Datum arg); + + +/* + * InternalIpcSemaphoreCreate + * + * Attempt to create a new semaphore set with the specified key. + * Will fail (return -1) if such a set already exists. + * + * If we fail with a failure code other than collision-with-existing-set, + * print out an error and abort. Other types of errors suggest nonrecoverable + * problems. + */ +static IpcSemaphoreId +InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems) +{ + int semId; + + semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection); + + if (semId < 0) + { + /* + * Fail quietly if error indicates a collision with existing set. + * One would expect EEXIST, given that we said IPC_EXCL, but + * perhaps we could get a permission violation instead? Also, + * EIDRM might occur if an old set is slated for destruction but + * not gone yet. + */ + if (errno == EEXIST || errno == EACCES +#ifdef EIDRM + || errno == EIDRM +#endif + ) + return -1; + + /* + * Else complain and abort + */ + fprintf(stderr, "IpcSemaphoreCreate: semget(key=%d, num=%d, 0%o) failed: %s\n", + (int) semKey, numSems, (IPC_CREAT | IPC_EXCL | IPCProtection), + strerror(errno)); + + if (errno == ENOSPC) + fprintf(stderr, + "\nThis error does *not* mean that you have run out of disk space.\n" + "\n" + "It occurs when either the system limit for the maximum number of\n" + "semaphore sets (SEMMNI), or the system wide maximum number of\n" + "semaphores (SEMMNS), would be exceeded. You need to raise the\n" + "respective kernel parameter. Alternatively, reduce PostgreSQL's\n" + "consumption of semaphores by reducing its max_connections parameter\n" + "(currently %d).\n" + "\n" + "The PostgreSQL Administrator's Guide contains more information about\n" + "configuring your system for PostgreSQL.\n\n", + MaxBackends); + + proc_exit(1); + } + + return semId; +} + +/* + * Initialize a semaphore to the specified value. + */ +static void +IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value) +{ + union semun semun; + + semun.val = value; + if (semctl(semId, semNum, SETVAL, semun) < 0) + { + fprintf(stderr, "IpcSemaphoreInitialize: semctl(id=%d, %d, SETVAL, %d) failed: %s\n", + semId, semNum, value, strerror(errno)); + + if (errno == ERANGE) + fprintf(stderr, + "You possibly need to raise your kernel's SEMVMX value to be at least\n" + "%d. Look into the PostgreSQL documentation for details.\n", + value); + + proc_exit(1); + } +} + +/* + * IpcSemaphoreKill(semId) - removes a semaphore set + */ +static void +IpcSemaphoreKill(IpcSemaphoreId semId) +{ + union semun semun; + + semun.val = 0; /* unused, but keep compiler quiet */ + + if (semctl(semId, 0, IPC_RMID, semun) < 0) + fprintf(stderr, "IpcSemaphoreKill: semctl(%d, 0, IPC_RMID, ...) failed: %s\n", + semId, strerror(errno)); + + /* + * We used to report a failure via elog(WARNING), but that's pretty + * pointless considering any client has long since disconnected ... + */ +} + +/* Get the current value (semval) of the semaphore */ +static int +IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum) +{ + union semun dummy; /* for Solaris */ + + dummy.val = 0; /* unused */ + + return semctl(semId, semNum, GETVAL, dummy); +} + +/* Get the PID of the last process to do semop() on the semaphore */ +static pid_t +IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum) +{ + union semun dummy; /* for Solaris */ + + dummy.val = 0; /* unused */ + + return semctl(semId, semNum, GETPID, dummy); +} + + +/* + * Create a semaphore set with the given number of useful semaphores + * (an additional sema is actually allocated to serve as identifier). + * Dead Postgres sema sets are recycled if found, but we do not fail + * upon collision with non-Postgres sema sets. + * + * The idea here is to detect and re-use keys that may have been assigned + * by a crashed postmaster or backend. + */ +static IpcSemaphoreId +IpcSemaphoreCreate(int numSems) +{ + IpcSemaphoreId semId; + union semun semun; + PGSemaphoreData mysema; + + /* Loop till we find a free IPC key */ + for (nextSemaKey++; ; nextSemaKey++) + { + pid_t creatorPID; + + /* Try to create new semaphore set */ + semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1); + if (semId >= 0) + break; /* successful create */ + + /* See if it looks to be leftover from a dead Postgres process */ + semId = semget(nextSemaKey, numSems + 1, 0); + if (semId < 0) + continue; /* failed: must be some other app's */ + if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic) + continue; /* sema belongs to a non-Postgres app */ + + /* + * If the creator PID is my own PID or does not belong to any + * extant process, it's safe to zap it. + */ + creatorPID = IpcSemaphoreGetLastPID(semId, numSems); + if (creatorPID <= 0) + continue; /* oops, GETPID failed */ + if (creatorPID != getpid()) + { + if (kill(creatorPID, 0) == 0 || + errno != ESRCH) + continue; /* sema belongs to a live process */ + } + + /* + * The sema set appears to be from a dead Postgres process, or + * from a previous cycle of life in this same process. Zap it, if + * possible. This probably shouldn't fail, but if it does, assume + * the sema set belongs to someone else after all, and continue + * quietly. + */ + semun.val = 0; /* unused, but keep compiler quiet */ + if (semctl(semId, 0, IPC_RMID, semun) < 0) + continue; + + /* + * Now try again to create the sema set. + */ + semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1); + if (semId >= 0) + break; /* successful create */ + + /* + * Can only get here if some other process managed to create the + * same sema key before we did. Let him have that one, loop + * around to try next key. + */ + } + + /* + * OK, we created a new sema set. Mark it as created by this process. + * We do this by setting the spare semaphore to PGSemaMagic-1 and then + * incrementing it with semop(). That leaves it with value + * PGSemaMagic and sempid referencing this process. + */ + IpcSemaphoreInitialize(semId, numSems, PGSemaMagic - 1); + mysema.semId = semId; + mysema.semNum = numSems; + PGSemaphoreUnlock(&mysema); + + return semId; +} + + +/* + * PGReserveSemaphores --- initialize semaphore support + * + * This is called during postmaster start or shared memory reinitialization. + * It should do whatever is needed to be able to support up to maxSemas + * subsequent PGSemaphoreCreate calls. Also, if any system resources + * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit + * callback to release them. + * + * The port number is passed for possible use as a key (for SysV, we use + * it to generate the starting semaphore key). In a standalone backend, + * zero will be passed. + * + * In the SysV implementation, we acquire semaphore sets on-demand; the + * maxSemas parameter is just used to size the array that keeps track of + * acquired sets for subsequent releasing. + */ +void +PGReserveSemaphores(int maxSemas, int port) +{ + maxSemaSets = (maxSemas + SEMAS_PER_SET-1) / SEMAS_PER_SET; + mySemaSets = (IpcSemaphoreId *) + malloc(maxSemaSets * sizeof(IpcSemaphoreId)); + if (mySemaSets == NULL) + elog(PANIC, "Out of memory in PGReserveSemaphores"); + numSemaSets = 0; + nextSemaKey = port * 1000; + nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */ + + on_shmem_exit(ReleaseSemaphores, 0); +} + +/* + * Release semaphores at shutdown or shmem reinitialization + * + * (called as an on_shmem_exit callback, hence funny argument list) + */ +static void +ReleaseSemaphores(int status, Datum arg) +{ + int i; + + for (i = 0; i < numSemaSets; i++) + IpcSemaphoreKill(mySemaSets[i]); + free(mySemaSets); +} + +/* + * PGSemaphoreCreate + * + * Initialize a PGSemaphore structure to represent a sema with count 1 + */ +void +PGSemaphoreCreate(PGSemaphore sema) +{ + /* Can't do this in a backend, because static state is postmaster's */ + Assert(!IsUnderPostmaster); + + if (nextSemaNumber >= SEMAS_PER_SET) + { + /* Time to allocate another semaphore set */ + if (numSemaSets >= maxSemaSets) + elog(PANIC, "PGSemaphoreCreate: too many semaphores created"); + mySemaSets[numSemaSets] = IpcSemaphoreCreate(SEMAS_PER_SET); + numSemaSets++; + nextSemaNumber = 0; + } + /* Assign the next free semaphore in the current set */ + sema->semId = mySemaSets[numSemaSets-1]; + sema->semNum = nextSemaNumber++; + /* Initialize it to count 1 */ + IpcSemaphoreInitialize(sema->semId, sema->semNum, 1); +} + +/* + * PGSemaphoreReset + * + * Reset a previously-initialized PGSemaphore to have count 0 + */ +void +PGSemaphoreReset(PGSemaphore sema) +{ + IpcSemaphoreInitialize(sema->semId, sema->semNum, 0); +} + +/* + * PGSemaphoreLock + * + * Lock a semaphore (decrement count), blocking if count would be < 0 + */ +void +PGSemaphoreLock(PGSemaphore sema, bool interruptOK) +{ + int errStatus; + struct sembuf sops; + + sops.sem_op = -1; /* decrement */ + sops.sem_flg = 0; + sops.sem_num = sema->semNum; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were sent a + * signal. So we try and lock the semaphore again. + * + * Each time around the loop, we check for a cancel/die interrupt. We + * assume that if such an interrupt comes in while we are waiting, it + * will cause the semop() call to exit with errno == EINTR, so that we + * will be able to service the interrupt (if not in a critical section + * already). + * + * Once we acquire the lock, we do NOT check for an interrupt before + * returning. The caller needs to be able to record ownership of the + * lock before any interrupt can be accepted. + * + * There is a window of a few instructions between CHECK_FOR_INTERRUPTS + * and entering the semop() call. If a cancel/die interrupt occurs in + * that window, we would fail to notice it until after we acquire the + * lock (or get another interrupt to escape the semop()). We can + * avoid this problem by temporarily setting ImmediateInterruptOK to + * true before we do CHECK_FOR_INTERRUPTS; then, a die() interrupt in + * this interval will execute directly. However, there is a huge + * pitfall: there is another window of a few instructions after the + * semop() before we are able to reset ImmediateInterruptOK. If an + * interrupt occurs then, we'll lose control, which means that the + * lock has been acquired but our caller did not get a chance to + * record the fact. Therefore, we only set ImmediateInterruptOK if the + * caller tells us it's OK to do so, ie, the caller does not need to + * record acquiring the lock. (This is currently true for lockmanager + * locks, since the process that granted us the lock did all the + * necessary state updates. It's not true for SysV semaphores used to + * implement LW locks or emulate spinlocks --- but the wait time for + * such locks should not be very long, anyway.) + */ + do + { + ImmediateInterruptOK = interruptOK; + CHECK_FOR_INTERRUPTS(); + errStatus = semop(sema->semId, &sops, 1); + ImmediateInterruptOK = false; + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + { + fprintf(stderr, "PGSemaphoreLock: semop(id=%d) failed: %s\n", + sema->semId, strerror(errno)); + proc_exit(255); + } +} + +/* + * PGSemaphoreUnlock + * + * Unlock a semaphore (increment count) + */ +void +PGSemaphoreUnlock(PGSemaphore sema) +{ + int errStatus; + struct sembuf sops; + + sops.sem_op = 1; /* increment */ + sops.sem_flg = 0; + sops.sem_num = sema->semNum; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were sent a + * signal. So we try and unlock the semaphore again. Not clear this + * can really happen, but might as well cope. + */ + do + { + errStatus = semop(sema->semId, &sops, 1); + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + { + fprintf(stderr, "PGSemaphoreUnlock: semop(id=%d) failed: %s\n", + sema->semId, strerror(errno)); + proc_exit(255); + } +} + +/* + * PGSemaphoreTryLock + * + * Lock a semaphore only if able to do so without blocking + */ +bool +PGSemaphoreTryLock(PGSemaphore sema) +{ + int errStatus; + struct sembuf sops; + + sops.sem_op = -1; /* decrement */ + sops.sem_flg = IPC_NOWAIT; /* but don't block */ + sops.sem_num = sema->semNum; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were sent a + * signal. So we try and lock the semaphore again. + */ + do + { + errStatus = semop(sema->semId, &sops, 1); + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + { + /* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */ +#ifdef EAGAIN + if (errno == EAGAIN) + return false; /* failed to lock it */ +#endif +#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN)) + if (errno == EWOULDBLOCK) + return false; /* failed to lock it */ +#endif + /* Otherwise we got trouble */ + fprintf(stderr, "PGSemaphoreTryLock: semop(id=%d) failed: %s\n", + sema->semId, strerror(errno)); + proc_exit(255); + } + + return true; +} diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c new file mode 100644 index 0000000000..41d5bdd374 --- /dev/null +++ b/src/backend/port/sysv_shmem.c @@ -0,0 +1,400 @@ +/*------------------------------------------------------------------------- + * + * sysv_shmem.c + * Implement shared memory using SysV facilities + * + * These routines represent a fairly thin layer on top of SysV shared + * memory functionality. + * + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/port/sysv_shmem.c,v 1.1 2002/05/05 00:03:28 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include +#include +#include +#include +#ifdef HAVE_SYS_IPC_H +#include +#endif +#ifdef HAVE_SYS_SHM_H +#include +#endif +#ifdef HAVE_KERNEL_OS_H +#include +#endif + +#include "miscadmin.h" +#include "storage/ipc.h" +#include "storage/pg_shmem.h" + + +typedef uint32 IpcMemoryKey; /* shared memory key passed to shmget(2) */ +typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */ + +#define IPCProtection (0600) /* access/modify by user only */ + + +static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size); +static void IpcMemoryDetach(int status, Datum shmaddr); +static void IpcMemoryDelete(int status, Datum shmId); +static void *PrivateMemoryCreate(uint32 size); +static void PrivateMemoryDelete(int status, Datum memaddr); + + +/* + * InternalIpcMemoryCreate(memKey, size) + * + * Attempt to create a new shared memory segment with the specified key. + * Will fail (return NULL) if such a segment already exists. If successful, + * attach the segment to the current process and return its attached address. + * On success, callbacks are registered with on_shmem_exit to detach and + * delete the segment when on_shmem_exit is called. + * + * If we fail with a failure code other than collision-with-existing-segment, + * print out an error and abort. Other types of errors are not recoverable. + */ +static void * +InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size) +{ + IpcMemoryId shmid; + void *memAddress; + + shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection); + + if (shmid < 0) + { + /* + * Fail quietly if error indicates a collision with existing + * segment. One would expect EEXIST, given that we said IPC_EXCL, + * but perhaps we could get a permission violation instead? Also, + * EIDRM might occur if an old seg is slated for destruction but + * not gone yet. + */ + if (errno == EEXIST || errno == EACCES +#ifdef EIDRM + || errno == EIDRM +#endif + ) + return NULL; + + /* + * Else complain and abort + */ + fprintf(stderr, "IpcMemoryCreate: shmget(key=%d, size=%u, 0%o) failed: %s\n", + (int) memKey, size, (IPC_CREAT | IPC_EXCL | IPCProtection), + strerror(errno)); + + if (errno == EINVAL) + fprintf(stderr, + "\nThis error usually means that PostgreSQL's request for a shared memory\n" + "segment exceeded your kernel's SHMMAX parameter. You can either\n" + "reduce the request size or reconfigure the kernel with larger SHMMAX.\n" + "To reduce the request size (currently %u bytes), reduce\n" + "PostgreSQL's shared_buffers parameter (currently %d) and/or\n" + "its max_connections parameter (currently %d).\n" + "\n" + "If the request size is already small, it's possible that it is less than\n" + "your kernel's SHMMIN parameter, in which case raising the request size or\n" + "reconfiguring SHMMIN is called for.\n" + "\n" + "The PostgreSQL Administrator's Guide contains more information about\n" + "shared memory configuration.\n\n", + size, NBuffers, MaxBackends); + + else if (errno == ENOMEM) + fprintf(stderr, + "\nThis error usually means that PostgreSQL's request for a shared\n" + "memory segment exceeded available memory or swap space.\n" + "To reduce the request size (currently %u bytes), reduce\n" + "PostgreSQL's shared_buffers parameter (currently %d) and/or\n" + "its max_connections parameter (currently %d).\n" + "\n" + "The PostgreSQL Administrator's Guide contains more information about\n" + "shared memory configuration.\n\n", + size, NBuffers, MaxBackends); + + else if (errno == ENOSPC) + fprintf(stderr, + "\nThis error does *not* mean that you have run out of disk space.\n" + "\n" + "It occurs either if all available shared memory IDs have been taken,\n" + "in which case you need to raise the SHMMNI parameter in your kernel,\n" + "or because the system's overall limit for shared memory has been\n" + "reached. If you cannot increase the shared memory limit,\n" + "reduce PostgreSQL's shared memory request (currently %u bytes),\n" + "by reducing its shared_buffers parameter (currently %d) and/or\n" + "its max_connections parameter (currently %d).\n" + "\n" + "The PostgreSQL Administrator's Guide contains more information about\n" + "shared memory configuration.\n\n", + size, NBuffers, MaxBackends); + + proc_exit(1); + } + + /* Register on-exit routine to delete the new segment */ + on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid)); + + /* OK, should be able to attach to the segment */ +#if defined(solaris) && defined(__sparc__) + /* use intimate shared memory on SPARC Solaris */ + memAddress = shmat(shmid, 0, SHM_SHARE_MMU); +#else + memAddress = shmat(shmid, 0, 0); +#endif + + if (memAddress == (void *) -1) + { + fprintf(stderr, "IpcMemoryCreate: shmat(id=%d) failed: %s\n", + shmid, strerror(errno)); + proc_exit(1); + } + + /* Register on-exit routine to detach new segment before deleting */ + on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress)); + + /* Record key and ID in lockfile for data directory. */ + RecordSharedMemoryInLockFile((unsigned long) memKey, + (unsigned long) shmid); + + return memAddress; +} + +/****************************************************************************/ +/* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */ +/* from process' address spaceq */ +/* (called as an on_shmem_exit callback, hence funny argument list) */ +/****************************************************************************/ +static void +IpcMemoryDetach(int status, Datum shmaddr) +{ + if (shmdt(DatumGetPointer(shmaddr)) < 0) + fprintf(stderr, "IpcMemoryDetach: shmdt(%p) failed: %s\n", + DatumGetPointer(shmaddr), strerror(errno)); + + /* + * We used to report a failure via elog(WARNING), but that's pretty + * pointless considering any client has long since disconnected ... + */ +} + +/****************************************************************************/ +/* IpcMemoryDelete(status, shmId) deletes a shared memory segment */ +/* (called as an on_shmem_exit callback, hence funny argument list) */ +/****************************************************************************/ +static void +IpcMemoryDelete(int status, Datum shmId) +{ + if (shmctl(DatumGetInt32(shmId), IPC_RMID, (struct shmid_ds *) NULL) < 0) + fprintf(stderr, "IpcMemoryDelete: shmctl(%d, %d, 0) failed: %s\n", + DatumGetInt32(shmId), IPC_RMID, strerror(errno)); + + /* + * We used to report a failure via elog(WARNING), but that's pretty + * pointless considering any client has long since disconnected ... + */ +} + +/* + * PGSharedMemoryIsInUse + * + * Is a previously-existing shmem segment still existing and in use? + */ +bool +PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2) +{ + IpcMemoryId shmId = (IpcMemoryId) id2; + struct shmid_ds shmStat; + + /* + * We detect whether a shared memory segment is in use by seeing + * whether it (a) exists and (b) has any processes are attached to it. + * + * If we are unable to perform the stat operation for a reason other than + * nonexistence of the segment (most likely, because it doesn't belong + * to our userid), assume it is in use. + */ + if (shmctl(shmId, IPC_STAT, &shmStat) < 0) + { + /* + * EINVAL actually has multiple possible causes documented in the + * shmctl man page, but we assume it must mean the segment no + * longer exists. + */ + if (errno == EINVAL) + return false; + /* Else assume segment is in use */ + return true; + } + /* If it has attached processes, it's in use */ + if (shmStat.shm_nattch != 0) + return true; + return false; +} + + +/* ---------------------------------------------------------------- + * private memory support + * + * Rather than allocating shmem segments with IPC_PRIVATE key, we + * just malloc() the requested amount of space. This code emulates + * the needed shmem functions. + * ---------------------------------------------------------------- + */ + +static void * +PrivateMemoryCreate(uint32 size) +{ + void *memAddress; + + memAddress = malloc(size); + if (!memAddress) + { + fprintf(stderr, "PrivateMemoryCreate: malloc(%u) failed\n", size); + proc_exit(1); + } + MemSet(memAddress, 0, size); /* keep Purify quiet */ + + /* Register on-exit routine to release storage */ + on_shmem_exit(PrivateMemoryDelete, PointerGetDatum(memAddress)); + + return memAddress; +} + +static void +PrivateMemoryDelete(int status, Datum memaddr) +{ + free(DatumGetPointer(memaddr)); +} + + +/* + * PGSharedMemoryCreate + * + * Create a shared memory segment of the given size and initialize its + * standard header. Also, register an on_shmem_exit callback to release + * the storage. + * + * Dead Postgres segments are recycled if found, but we do not fail upon + * collision with non-Postgres shmem segments. The idea here is to detect and + * re-use keys that may have been assigned by a crashed postmaster or backend. + * + * The port number is passed for possible use as a key (for SysV, we use + * it to generate the starting shmem key). In a standalone backend, + * zero will be passed. + */ +PGShmemHeader * +PGSharedMemoryCreate(uint32 size, bool makePrivate, int port) +{ + IpcMemoryKey NextShmemSegID; + void *memAddress; + PGShmemHeader *hdr; + + /* Room for a header? */ + Assert(size > MAXALIGN(sizeof(PGShmemHeader))); + + /* Loop till we find a free IPC key */ + NextShmemSegID = port * 1000; + + for (NextShmemSegID++;; NextShmemSegID++) + { + IpcMemoryId shmid; + + /* Special case if creating a private segment --- just malloc() it */ + if (makePrivate) + { + memAddress = PrivateMemoryCreate(size); + break; + } + + /* Try to create new segment */ + memAddress = InternalIpcMemoryCreate(NextShmemSegID, size); + if (memAddress) + break; /* successful create and attach */ + + /* See if it looks to be leftover from a dead Postgres process */ + shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0); + if (shmid < 0) + continue; /* failed: must be some other app's */ + +#if defined(solaris) && defined(__sparc__) + /* use intimate shared memory on SPARC Solaris */ + memAddress = shmat(shmid, 0, SHM_SHARE_MMU); +#else + memAddress = shmat(shmid, 0, 0); +#endif + + if (memAddress == (void *) -1) + continue; /* failed: must be some other app's */ + hdr = (PGShmemHeader *) memAddress; + if (hdr->magic != PGShmemMagic) + { + shmdt(memAddress); + continue; /* segment belongs to a non-Postgres app */ + } + + /* + * If the creator PID is my own PID or does not belong to any + * extant process, it's safe to zap it. + */ + if (hdr->creatorPID != getpid()) + { + if (kill(hdr->creatorPID, 0) == 0 || + errno != ESRCH) + { + shmdt(memAddress); + continue; /* segment belongs to a live process */ + } + } + + /* + * The segment appears to be from a dead Postgres process, or from + * a previous cycle of life in this same process. Zap it, if + * possible. This probably shouldn't fail, but if it does, assume + * the segment belongs to someone else after all, and continue + * quietly. + */ + shmdt(memAddress); + if (shmctl(shmid, IPC_RMID, (struct shmid_ds *) NULL) < 0) + continue; + + /* + * Now try again to create the segment. + */ + memAddress = InternalIpcMemoryCreate(NextShmemSegID, size); + if (memAddress) + break; /* successful create and attach */ + + /* + * Can only get here if some other process managed to create the + * same shmem key before we did. Let him have that one, loop + * around to try next key. + */ + } + + /* + * OK, we created a new segment. Mark it as created by this process. + * The order of assignments here is critical so that another Postgres + * process can't see the header as valid but belonging to an invalid + * PID! + */ + hdr = (PGShmemHeader *) memAddress; + hdr->creatorPID = getpid(); + hdr->magic = PGShmemMagic; + + /* + * Initialize space allocation status for segment. + */ + hdr->totalsize = size; + hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader)); + + return hdr; +} diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 8f13a9d674..15fbc31e60 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -16,7 +16,7 @@ * * Copyright (c) 2001, PostgreSQL Global Development Group * - * $Header: /cvsroot/pgsql/src/backend/postmaster/pgstat.c,v 1.19 2002/04/03 00:27:25 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/postmaster/pgstat.c,v 1.20 2002/05/05 00:03:28 tgl Exp $ * ---------- */ #include "postgres.h" @@ -44,6 +44,7 @@ #include "miscadmin.h" #include "utils/memutils.h" #include "storage/backendid.h" +#include "storage/ipc.h" #include "utils/rel.h" #include "utils/hsearch.h" #include "utils/ps_status.h" diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 0ce817b5b3..a3a8f2521e 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -37,7 +37,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/postmaster/postmaster.c,v 1.272 2002/04/04 04:25:48 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/postmaster/postmaster.c,v 1.273 2002/05/05 00:03:28 tgl Exp $ * * NOTES * @@ -1362,17 +1362,14 @@ static void reset_shared(unsigned short port) { /* - * Reset assignment of shared mem and semaphore IPC keys. Doing this - * means that in normal cases we'll assign the same keys on each - * "cycle of life", and thereby avoid leaving dead IPC objects - * floating around if the postmaster crashes and is restarted. - */ - IpcInitKeyAssignment(port); - - /* * Create or re-create shared memory and semaphores. + * + * Note: in each "cycle of life" we will normally assign the same IPC + * keys (if using SysV shmem and/or semas), since the port number is + * used to determine IPC keys. This helps ensure that we will clean up + * dead IPC objects if the postmaster crashes and is restarted. */ - CreateSharedMemoryAndSemaphores(false, MaxBackends); + CreateSharedMemoryAndSemaphores(false, MaxBackends, port); } diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 8726a39459..473c5318d3 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.90 2002/03/06 06:10:03 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.91 2002/05/05 00:03:28 tgl Exp $ * * NOTES: * @@ -51,6 +51,7 @@ #include "miscadmin.h" #include "storage/fd.h" +#include "storage/ipc.h" /* Filename components for OpenTemporaryFile */ diff --git a/src/backend/storage/ipc/ipc.c b/src/backend/storage/ipc/ipc.c index 10d3d6e7ed..fad7883541 100644 --- a/src/backend/storage/ipc/ipc.c +++ b/src/backend/storage/ipc/ipc.c @@ -3,25 +3,17 @@ * ipc.c * POSTGRES inter-process communication definitions. * + * This file is misnamed, as it no longer has much of anything directly + * to do with IPC. The functionality here is concerned with managing + * exit-time cleanup for either a postmaster or a backend. + * + * * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.78 2002/04/13 19:52:51 momjian Exp $ - * - * NOTES - * - * Currently, semaphores are used (my understanding anyway) in two - * different ways: - * 1. as mutexes on machines that don't have test-and-set (eg. - * mips R3000). - * 2. for putting processes to sleep when waiting on a lock - * and waking them up when the lock is free. - * The number of semaphores in (1) is fixed and those are shared - * among all backends. In (2), there is 1 semaphore per process and those - * are not shared with anyone else. - * -ay 4/95 + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.79 2002/05/05 00:03:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -30,27 +22,9 @@ #include #include #include -#include - -#include "storage/ipc.h" -/* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */ -#ifdef HAVE_SYS_SEM_H -#include -#endif -#ifdef HAVE_SYS_SHM_H -#include -#endif -#ifdef HAVE_KERNEL_OS_H -#include -#endif - -#if defined(__darwin__) -#include "port/darwin/sem.h" -#endif #include "miscadmin.h" -#include "utils/memutils.h" -#include "libpq/libpq.h" +#include "storage/ipc.h" /* @@ -60,17 +34,6 @@ */ bool proc_exit_inprogress = false; -static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, - int numSems, int permission, - int semStartValue, bool removeOnExit); -static void CallbackSemaphoreKill(int status, Datum semId); -static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size, - int permission); -static void IpcMemoryDetach(int status, Datum shmaddr); -static void IpcMemoryDelete(int status, Datum shmId); -static void *PrivateMemoryCreate(uint32 size); -static void PrivateMemoryDelete(int status, Datum memaddr); - /* ---------------------------------------------------------------- * exit() handling stuff @@ -83,8 +46,6 @@ static void PrivateMemoryDelete(int status, Datum memaddr); * Callback functions can take zero, one, or two args: the first passed * arg is the integer exitcode, the second is the Datum supplied when * the callback was registered. - * - * XXX these functions probably ought to live in some other module. * ---------------------------------------------------------------- */ @@ -230,762 +191,3 @@ on_exit_reset(void) on_shmem_exit_index = 0; on_proc_exit_index = 0; } - - -/* ---------------------------------------------------------------- - * Semaphore support - * - * These routines represent a fairly thin layer on top of SysV semaphore - * functionality. - * ---------------------------------------------------------------- - */ - -/* ---------------------------------------------------------------- - * InternalIpcSemaphoreCreate(semKey, numSems, permission, - * semStartValue, removeOnExit) - * - * Attempt to create a new semaphore set with the specified key. - * Will fail (return -1) if such a set already exists. - * On success, a callback is optionally registered with on_shmem_exit - * to delete the semaphore set when on_shmem_exit is called. - * - * If we fail with a failure code other than collision-with-existing-set, - * print out an error and abort. Other types of errors are not recoverable. - * ---------------------------------------------------------------- - */ -static IpcSemaphoreId -InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, - int numSems, int permission, - int semStartValue, bool removeOnExit) -{ - int semId; - int i; - u_short array[IPC_NMAXSEM]; - union semun semun; - - Assert(numSems > 0 && numSems <= IPC_NMAXSEM); - - semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | permission); - - if (semId < 0) - { - /* - * Fail quietly if error indicates a collision with existing set. - * One would expect EEXIST, given that we said IPC_EXCL, but - * perhaps we could get a permission violation instead? Also, - * EIDRM might occur if an old set is slated for destruction but - * not gone yet. - */ - if (errno == EEXIST || errno == EACCES -#ifdef EIDRM - || errno == EIDRM -#endif - ) - return -1; - - /* - * Else complain and abort - */ - fprintf(stderr, "IpcSemaphoreCreate: semget(key=%d, num=%d, 0%o) failed: %s\n", - (int) semKey, numSems, (IPC_CREAT | IPC_EXCL | permission), - strerror(errno)); - - if (errno == ENOSPC) - fprintf(stderr, - "\nThis error does *not* mean that you have run out of disk space.\n" - "\n" - "It occurs when either the system limit for the maximum number of\n" - "semaphore sets (SEMMNI), or the system wide maximum number of\n" - "semaphores (SEMMNS), would be exceeded. You need to raise the\n" - "respective kernel parameter. Alternatively, reduce PostgreSQL's\n" - "consumption of semaphores by reducing its max_connections parameter\n" - "(currently %d).\n" - "\n" - "The PostgreSQL Administrator's Guide contains more information about\n" - "configuring your system for PostgreSQL.\n\n", - MaxBackends); - - proc_exit(1); - } - - /* Initialize new semas to specified start value */ - for (i = 0; i < numSems; i++) - array[i] = semStartValue; - semun.array = array; - if (semctl(semId, 0, SETALL, semun) < 0) - { - fprintf(stderr, "IpcSemaphoreCreate: semctl(id=%d, 0, SETALL, ...) failed: %s\n", - semId, strerror(errno)); - - if (errno == ERANGE) - fprintf(stderr, - "You possibly need to raise your kernel's SEMVMX value to be at least\n" - "%d. Look into the PostgreSQL documentation for details.\n", - semStartValue); - - IpcSemaphoreKill(semId); - proc_exit(1); - } - - /* Register on-exit routine to delete the new set */ - if (removeOnExit) - on_shmem_exit(CallbackSemaphoreKill, Int32GetDatum(semId)); - - return semId; -} - -/****************************************************************************/ -/* IpcSemaphoreKill(semId) - removes a semaphore set */ -/* */ -/****************************************************************************/ -void -IpcSemaphoreKill(IpcSemaphoreId semId) -{ - union semun semun; - - semun.val = 0; /* unused, but keep compiler quiet */ - - if (semctl(semId, 0, IPC_RMID, semun) < 0) - fprintf(stderr, "IpcSemaphoreKill: semctl(%d, 0, IPC_RMID, ...) failed: %s\n", - semId, strerror(errno)); - - /* - * We used to report a failure via elog(WARNING), but that's pretty - * pointless considering any client has long since disconnected ... - */ -} - -/****************************************************************************/ -/* CallbackSemaphoreKill(status, semId) */ -/* (called as an on_shmem_exit callback, hence funny argument list) */ -/****************************************************************************/ -static void -CallbackSemaphoreKill(int status, Datum semId) -{ - IpcSemaphoreKill(DatumGetInt32(semId)); -} - -/****************************************************************************/ -/* IpcSemaphoreLock(semId, sem) - locks a semaphore */ -/****************************************************************************/ -void -IpcSemaphoreLock(IpcSemaphoreId semId, int sem, bool interruptOK) -{ - int errStatus; - struct sembuf sops; - - sops.sem_op = -1; /* decrement */ - sops.sem_flg = 0; - sops.sem_num = sem; - - /* - * Note: if errStatus is -1 and errno == EINTR then it means we - * returned from the operation prematurely because we were sent a - * signal. So we try and lock the semaphore again. - * - * Each time around the loop, we check for a cancel/die interrupt. We - * assume that if such an interrupt comes in while we are waiting, it - * will cause the semop() call to exit with errno == EINTR, so that we - * will be able to service the interrupt (if not in a critical section - * already). - * - * Once we acquire the lock, we do NOT check for an interrupt before - * returning. The caller needs to be able to record ownership of the - * lock before any interrupt can be accepted. - * - * There is a window of a few instructions between CHECK_FOR_INTERRUPTS - * and entering the semop() call. If a cancel/die interrupt occurs in - * that window, we would fail to notice it until after we acquire the - * lock (or get another interrupt to escape the semop()). We can - * avoid this problem by temporarily setting ImmediateInterruptOK to - * true before we do CHECK_FOR_INTERRUPTS; then, a die() interrupt in - * this interval will execute directly. However, there is a huge - * pitfall: there is another window of a few instructions after the - * semop() before we are able to reset ImmediateInterruptOK. If an - * interrupt occurs then, we'll lose control, which means that the - * lock has been acquired but our caller did not get a chance to - * record the fact. Therefore, we only set ImmediateInterruptOK if the - * caller tells us it's OK to do so, ie, the caller does not need to - * record acquiring the lock. (This is currently true for lockmanager - * locks, since the process that granted us the lock did all the - * necessary state updates. It's not true for SysV semaphores used to - * implement LW locks or emulate spinlocks --- but the wait time for - * such locks should not be very long, anyway.) - */ - do - { - ImmediateInterruptOK = interruptOK; - CHECK_FOR_INTERRUPTS(); - errStatus = semop(semId, &sops, 1); - ImmediateInterruptOK = false; - } while (errStatus == -1 && errno == EINTR); - - if (errStatus == -1) - { - fprintf(stderr, "IpcSemaphoreLock: semop(id=%d) failed: %s\n", - semId, strerror(errno)); - proc_exit(255); - } -} - -/****************************************************************************/ -/* IpcSemaphoreUnlock(semId, sem) - unlocks a semaphore */ -/****************************************************************************/ -void -IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem) -{ - int errStatus; - struct sembuf sops; - - sops.sem_op = 1; /* increment */ - sops.sem_flg = 0; - sops.sem_num = sem; - - - /* - * Note: if errStatus is -1 and errno == EINTR then it means we - * returned from the operation prematurely because we were sent a - * signal. So we try and unlock the semaphore again. Not clear this - * can really happen, but might as well cope. - */ - do - { - errStatus = semop(semId, &sops, 1); - } while (errStatus == -1 && errno == EINTR); - - if (errStatus == -1) - { - fprintf(stderr, "IpcSemaphoreUnlock: semop(id=%d) failed: %s\n", - semId, strerror(errno)); - proc_exit(255); - } -} - -/****************************************************************************/ -/* IpcSemaphoreTryLock(semId, sem) - conditionally locks a semaphore */ -/* Lock the semaphore if it's free, but don't block. */ -/****************************************************************************/ -bool -IpcSemaphoreTryLock(IpcSemaphoreId semId, int sem) -{ - int errStatus; - struct sembuf sops; - - sops.sem_op = -1; /* decrement */ - sops.sem_flg = IPC_NOWAIT; /* but don't block */ - sops.sem_num = sem; - - /* - * Note: if errStatus is -1 and errno == EINTR then it means we - * returned from the operation prematurely because we were sent a - * signal. So we try and lock the semaphore again. - */ - do - { - errStatus = semop(semId, &sops, 1); - } while (errStatus == -1 && errno == EINTR); - - if (errStatus == -1) - { - /* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */ -#ifdef EAGAIN - if (errno == EAGAIN) - return false; /* failed to lock it */ -#endif -#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN)) - if (errno == EWOULDBLOCK) - return false; /* failed to lock it */ -#endif - /* Otherwise we got trouble */ - fprintf(stderr, "IpcSemaphoreTryLock: semop(id=%d) failed: %s\n", - semId, strerror(errno)); - proc_exit(255); - } - - return true; -} - -/* Get the current value (semval) of the semaphore */ -int -IpcSemaphoreGetValue(IpcSemaphoreId semId, int sem) -{ - union semun dummy; /* for Solaris */ - - dummy.val = 0; /* unused */ - - return semctl(semId, sem, GETVAL, dummy); -} - -/* Get the PID of the last process to do semop() on the semaphore */ -static pid_t -IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int sem) -{ - union semun dummy; /* for Solaris */ - - dummy.val = 0; /* unused */ - - return semctl(semId, sem, GETPID, dummy); -} - - -/* ---------------------------------------------------------------- - * Shared memory support - * - * These routines represent a fairly thin layer on top of SysV shared - * memory functionality. - * ---------------------------------------------------------------- - */ - -/* ---------------------------------------------------------------- - * InternalIpcMemoryCreate(memKey, size, permission) - * - * Attempt to create a new shared memory segment with the specified key. - * Will fail (return NULL) if such a segment already exists. If successful, - * attach the segment to the current process and return its attached address. - * On success, callbacks are registered with on_shmem_exit to detach and - * delete the segment when on_shmem_exit is called. - * - * If we fail with a failure code other than collision-with-existing-segment, - * print out an error and abort. Other types of errors are not recoverable. - * ---------------------------------------------------------------- - */ -static void * -InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size, int permission) -{ - IpcMemoryId shmid; - void *memAddress; - - shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | permission); - - if (shmid < 0) - { - /* - * Fail quietly if error indicates a collision with existing - * segment. One would expect EEXIST, given that we said IPC_EXCL, - * but perhaps we could get a permission violation instead? Also, - * EIDRM might occur if an old seg is slated for destruction but - * not gone yet. - */ - if (errno == EEXIST || errno == EACCES -#ifdef EIDRM - || errno == EIDRM -#endif - ) - return NULL; - - /* - * Else complain and abort - */ - fprintf(stderr, "IpcMemoryCreate: shmget(key=%d, size=%u, 0%o) failed: %s\n", - (int) memKey, size, (IPC_CREAT | IPC_EXCL | permission), - strerror(errno)); - - if (errno == EINVAL) - fprintf(stderr, - "\nThis error usually means that PostgreSQL's request for a shared memory\n" - "segment exceeded your kernel's SHMMAX parameter. You can either\n" - "reduce the request size or reconfigure the kernel with larger SHMMAX.\n" - "To reduce the request size (currently %u bytes), reduce\n" - "PostgreSQL's shared_buffers parameter (currently %d) and/or\n" - "its max_connections parameter (currently %d).\n" - "\n" - "If the request size is already small, it's possible that it is less than\n" - "your kernel's SHMMIN parameter, in which case raising the request size or\n" - "reconfiguring SHMMIN is called for.\n" - "\n" - "The PostgreSQL Administrator's Guide contains more information about\n" - "shared memory configuration.\n\n", - size, NBuffers, MaxBackends); - - else if (errno == ENOMEM) - fprintf(stderr, - "\nThis error usually means that PostgreSQL's request for a shared\n" - "memory segment exceeded available memory or swap space.\n" - "To reduce the request size (currently %u bytes), reduce\n" - "PostgreSQL's shared_buffers parameter (currently %d) and/or\n" - "its max_connections parameter (currently %d).\n" - "\n" - "The PostgreSQL Administrator's Guide contains more information about\n" - "shared memory configuration.\n\n", - size, NBuffers, MaxBackends); - - else if (errno == ENOSPC) - fprintf(stderr, - "\nThis error does *not* mean that you have run out of disk space.\n" - "\n" - "It occurs either if all available shared memory IDs have been taken,\n" - "in which case you need to raise the SHMMNI parameter in your kernel,\n" - "or because the system's overall limit for shared memory has been\n" - "reached. If you cannot increase the shared memory limit,\n" - "reduce PostgreSQL's shared memory request (currently %u bytes),\n" - "by reducing its shared_buffers parameter (currently %d) and/or\n" - "its max_connections parameter (currently %d).\n" - "\n" - "The PostgreSQL Administrator's Guide contains more information about\n" - "shared memory configuration.\n\n", - size, NBuffers, MaxBackends); - - proc_exit(1); - } - - /* Register on-exit routine to delete the new segment */ - on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid)); - - /* OK, should be able to attach to the segment */ -#if defined(solaris) && defined(__sparc__) - /* use intimate shared memory on SPARC Solaris */ - memAddress = shmat(shmid, 0, SHM_SHARE_MMU); -#else - memAddress = shmat(shmid, 0, 0); -#endif - - if (memAddress == (void *) -1) - { - fprintf(stderr, "IpcMemoryCreate: shmat(id=%d) failed: %s\n", - shmid, strerror(errno)); - proc_exit(1); - } - - /* Register on-exit routine to detach new segment before deleting */ - on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress)); - - /* Record key and ID in lockfile for data directory. */ - RecordSharedMemoryInLockFile(memKey, shmid); - - return memAddress; -} - -/****************************************************************************/ -/* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */ -/* from process' address spaceq */ -/* (called as an on_shmem_exit callback, hence funny argument list) */ -/****************************************************************************/ -static void -IpcMemoryDetach(int status, Datum shmaddr) -{ - if (shmdt(DatumGetPointer(shmaddr)) < 0) - fprintf(stderr, "IpcMemoryDetach: shmdt(%p) failed: %s\n", - DatumGetPointer(shmaddr), strerror(errno)); - - /* - * We used to report a failure via elog(WARNING), but that's pretty - * pointless considering any client has long since disconnected ... - */ -} - -/****************************************************************************/ -/* IpcMemoryDelete(status, shmId) deletes a shared memory segment */ -/* (called as an on_shmem_exit callback, hence funny argument list) */ -/****************************************************************************/ -static void -IpcMemoryDelete(int status, Datum shmId) -{ - if (shmctl(DatumGetInt32(shmId), IPC_RMID, (struct shmid_ds *) NULL) < 0) - fprintf(stderr, "IpcMemoryDelete: shmctl(%d, %d, 0) failed: %s\n", - DatumGetInt32(shmId), IPC_RMID, strerror(errno)); - - /* - * We used to report a failure via elog(WARNING), but that's pretty - * pointless considering any client has long since disconnected ... - */ -} - -/****************************************************************************/ -/* SharedMemoryIsInUse(shmKey, shmId) Is a shared memory segment in use? */ -/****************************************************************************/ -bool -SharedMemoryIsInUse(IpcMemoryKey shmKey, IpcMemoryId shmId) -{ - struct shmid_ds shmStat; - - /* - * We detect whether a shared memory segment is in use by seeing - * whether it (a) exists and (b) has any processes are attached to it. - * - * If we are unable to perform the stat operation for a reason other than - * nonexistence of the segment (most likely, because it doesn't belong - * to our userid), assume it is in use. - */ - if (shmctl(shmId, IPC_STAT, &shmStat) < 0) - { - /* - * EINVAL actually has multiple possible causes documented in the - * shmctl man page, but we assume it must mean the segment no - * longer exists. - */ - if (errno == EINVAL) - return false; - /* Else assume segment is in use */ - return true; - } - /* If it has attached processes, it's in use */ - if (shmStat.shm_nattch != 0) - return true; - return false; -} - - -/* ---------------------------------------------------------------- - * private memory support - * - * Rather than allocating shmem segments with IPC_PRIVATE key, we - * just malloc() the requested amount of space. This code emulates - * the needed shmem functions. - * ---------------------------------------------------------------- - */ - -static void * -PrivateMemoryCreate(uint32 size) -{ - void *memAddress; - - memAddress = malloc(size); - if (!memAddress) - { - fprintf(stderr, "PrivateMemoryCreate: malloc(%u) failed\n", size); - proc_exit(1); - } - MemSet(memAddress, 0, size); /* keep Purify quiet */ - - /* Register on-exit routine to release storage */ - on_shmem_exit(PrivateMemoryDelete, PointerGetDatum(memAddress)); - - return memAddress; -} - -static void -PrivateMemoryDelete(int status, Datum memaddr) -{ - free(DatumGetPointer(memaddr)); -} - - -/* ------------------ - * Routines to assign keys for new IPC objects - * - * The idea here is to detect and re-use keys that may have been assigned - * by a crashed postmaster or backend. - * ------------------ - */ - -static IpcMemoryKey NextShmemSegID = 0; -static IpcSemaphoreKey NextSemaID = 0; - -/* - * (Re) initialize key assignment at startup of postmaster or standalone - * backend, also at postmaster reset. - */ -void -IpcInitKeyAssignment(int port) -{ - NextShmemSegID = port * 1000; - NextSemaID = port * 1000; -} - -/* - * Create a shared memory segment of the given size and initialize its - * standard header. Dead Postgres segments are recycled if found, - * but we do not fail upon collision with non-Postgres shmem segments. - */ -PGShmemHeader * -IpcMemoryCreate(uint32 size, bool makePrivate, int permission) -{ - void *memAddress; - PGShmemHeader *hdr; - - /* Room for a header? */ - Assert(size > MAXALIGN(sizeof(PGShmemHeader))); - - /* Loop till we find a free IPC key */ - for (NextShmemSegID++;; NextShmemSegID++) - { - IpcMemoryId shmid; - - /* Special case if creating a private segment --- just malloc() it */ - if (makePrivate) - { - memAddress = PrivateMemoryCreate(size); - break; - } - - /* Try to create new segment */ - memAddress = InternalIpcMemoryCreate(NextShmemSegID, size, permission); - if (memAddress) - break; /* successful create and attach */ - - /* See if it looks to be leftover from a dead Postgres process */ - shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0); - if (shmid < 0) - continue; /* failed: must be some other app's */ - -#if defined(solaris) && defined(__sparc__) - /* use intimate shared memory on SPARC Solaris */ - memAddress = shmat(shmid, 0, SHM_SHARE_MMU); -#else - memAddress = shmat(shmid, 0, 0); -#endif - - if (memAddress == (void *) -1) - continue; /* failed: must be some other app's */ - hdr = (PGShmemHeader *) memAddress; - if (hdr->magic != PGShmemMagic) - { - shmdt(memAddress); - continue; /* segment belongs to a non-Postgres app */ - } - - /* - * If the creator PID is my own PID or does not belong to any - * extant process, it's safe to zap it. - */ - if (hdr->creatorPID != getpid()) - { - if (kill(hdr->creatorPID, 0) == 0 || - errno != ESRCH) - { - shmdt(memAddress); - continue; /* segment belongs to a live process */ - } - } - - /* - * The segment appears to be from a dead Postgres process, or from - * a previous cycle of life in this same process. Zap it, if - * possible. This probably shouldn't fail, but if it does, assume - * the segment belongs to someone else after all, and continue - * quietly. - */ - shmdt(memAddress); - if (shmctl(shmid, IPC_RMID, (struct shmid_ds *) NULL) < 0) - continue; - - /* - * Now try again to create the segment. - */ - memAddress = InternalIpcMemoryCreate(NextShmemSegID, size, permission); - if (memAddress) - break; /* successful create and attach */ - - /* - * Can only get here if some other process managed to create the - * same shmem key before we did. Let him have that one, loop - * around to try next key. - */ - } - - /* - * OK, we created a new segment. Mark it as created by this process. - * The order of assignments here is critical so that another Postgres - * process can't see the header as valid but belonging to an invalid - * PID! - */ - hdr = (PGShmemHeader *) memAddress; - hdr->creatorPID = getpid(); - hdr->magic = PGShmemMagic; - - /* - * Initialize space allocation status for segment. - */ - hdr->totalsize = size; - hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader)); - - return hdr; -} - -/* - * Create a semaphore set with the given number of useful semaphores - * (an additional sema is actually allocated to serve as identifier). - * Dead Postgres sema sets are recycled if found, but we do not fail - * upon collision with non-Postgres sema sets. - */ -IpcSemaphoreId -IpcSemaphoreCreate(int numSems, int permission, - int semStartValue, bool removeOnExit) -{ - IpcSemaphoreId semId; - union semun semun; - - /* Loop till we find a free IPC key */ - for (NextSemaID++;; NextSemaID++) - { - pid_t creatorPID; - - /* Try to create new semaphore set */ - semId = InternalIpcSemaphoreCreate(NextSemaID, numSems + 1, - permission, semStartValue, - removeOnExit); - if (semId >= 0) - break; /* successful create */ - - /* See if it looks to be leftover from a dead Postgres process */ - semId = semget(NextSemaID, numSems + 1, 0); - if (semId < 0) - continue; /* failed: must be some other app's */ - if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic) - continue; /* sema belongs to a non-Postgres app */ - - /* - * If the creator PID is my own PID or does not belong to any - * extant process, it's safe to zap it. - */ - creatorPID = IpcSemaphoreGetLastPID(semId, numSems); - if (creatorPID <= 0) - continue; /* oops, GETPID failed */ - if (creatorPID != getpid()) - { - if (kill(creatorPID, 0) == 0 || - errno != ESRCH) - continue; /* sema belongs to a live process */ - } - - /* - * The sema set appears to be from a dead Postgres process, or - * from a previous cycle of life in this same process. Zap it, if - * possible. This probably shouldn't fail, but if it does, assume - * the sema set belongs to someone else after all, and continue - * quietly. - */ - semun.val = 0; /* unused, but keep compiler quiet */ - if (semctl(semId, 0, IPC_RMID, semun) < 0) - continue; - - /* - * Now try again to create the sema set. - */ - semId = InternalIpcSemaphoreCreate(NextSemaID, numSems + 1, - permission, semStartValue, - removeOnExit); - if (semId >= 0) - break; /* successful create */ - - /* - * Can only get here if some other process managed to create the - * same sema key before we did. Let him have that one, loop - * around to try next key. - */ - } - - /* - * OK, we created a new sema set. Mark it as created by this process. - * We do this by setting the spare semaphore to PGSemaMagic-1 and then - * incrementing it with semop(). That leaves it with value - * PGSemaMagic and sempid referencing this process. - */ - semun.val = PGSemaMagic - 1; - if (semctl(semId, numSems, SETVAL, semun) < 0) - { - fprintf(stderr, "IpcSemaphoreCreate: semctl(id=%d, %d, SETVAL, %d) failed: %s\n", - semId, numSems, PGSemaMagic - 1, strerror(errno)); - - if (errno == ERANGE) - fprintf(stderr, - "You possibly need to raise your kernel's SEMVMX value to be at least\n" - "%d. Look into the PostgreSQL documentation for details.\n", - PGSemaMagic); - - proc_exit(1); - } - IpcSemaphoreUnlock(semId, numSems); - - return semId; -} diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index c2950dc767..34412e3ed4 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.46 2002/03/02 21:39:29 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.47 2002/05/05 00:03:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,8 +21,11 @@ #include "access/xlog.h" #include "storage/bufmgr.h" #include "storage/freespace.h" +#include "storage/ipc.h" #include "storage/lmgr.h" #include "storage/lwlock.h" +#include "storage/pg_sema.h" +#include "storage/pg_shmem.h" #include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/sinval.h" @@ -41,9 +44,12 @@ * memory. This is true for a standalone backend, false for a postmaster. */ void -CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends) +CreateSharedMemoryAndSemaphores(bool makePrivate, + int maxBackends, + int port) { int size; + int numSemas; PGShmemHeader *seghdr; /* @@ -70,12 +76,14 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends) /* * Create the shmem segment */ - seghdr = IpcMemoryCreate(size, makePrivate, IPCProtection); + seghdr = PGSharedMemoryCreate(size, makePrivate, port); /* - * First initialize spinlocks --- needed by InitShmemAllocation() + * Create semaphores */ - CreateSpinlocks(); + numSemas = ProcGlobalSemas(maxBackends); + numSemas += SpinlockSemas(); + PGReserveSemaphores(numSemas, port); /* * Set up shared memory allocation mechanism diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c index f9056ae375..3b558f528f 100644 --- a/src/backend/storage/ipc/shmem.c +++ b/src/backend/storage/ipc/shmem.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.64 2002/03/06 06:10:05 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.65 2002/05/05 00:03:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -61,6 +61,7 @@ #include "postgres.h" #include "access/transam.h" +#include "storage/pg_shmem.h" #include "storage/spin.h" #include "utils/tqual.h" diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c index bae8983472..dbe75469d0 100644 --- a/src/backend/storage/ipc/sinvaladt.c +++ b/src/backend/storage/ipc/sinvaladt.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.45 2002/03/02 23:35:57 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.46 2002/05/05 00:03:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,6 +16,7 @@ #include "miscadmin.h" #include "storage/backendid.h" +#include "storage/ipc.h" #include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/sinvaladt.h" diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index e85ef7ea11..8dbf55c078 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lwlock.c,v 1.9 2002/03/02 21:39:29 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lwlock.c,v 1.10 2002/05/05 00:03:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -302,7 +302,7 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode) for (;;) { /* "false" means cannot accept cancel/die interrupt here. */ - IpcSemaphoreLock(proc->sem.semId, proc->sem.semNum, false); + PGSemaphoreLock(&proc->sem, false); if (!proc->lwWaiting) break; extraWaits++; @@ -325,7 +325,7 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode) * Fix the process wait semaphore's count for any absorbed wakeups. */ while (extraWaits-- > 0) - IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum); + PGSemaphoreUnlock(&proc->sem); } /* @@ -485,7 +485,7 @@ LWLockRelease(LWLockId lockid) head = proc->lwWaitLink; proc->lwWaitLink = NULL; proc->lwWaiting = false; - IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum); + PGSemaphoreUnlock(&proc->sem); } /* diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 9982552955..8ccaff9477 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.118 2002/03/02 21:39:29 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.119 2002/05/05 00:03:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,12 +37,6 @@ * in the first place was to allow the lock table to grow larger * than available shared memory and that isn't going to work * without a lot of unimplemented support anyway. - * - * 4/7/95 -- instead of allocating a set of 1 semaphore per process, we - * allocate a semaphore from a set of PROC_NSEMS_PER_SET semaphores - * shared among backends (we keep a few sets of semaphores around). - * This is so that we can support more backends. (system-wide semaphore - * sets run out pretty fast.) -ay 4/95 */ #include "postgres.h" @@ -51,18 +45,9 @@ #include #include -#include "storage/ipc.h" -/* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */ -#ifdef HAVE_SYS_SEM_H -#include -#endif - -#if defined(__darwin__) -#include "port/darwin/sem.h" -#endif - #include "miscadmin.h" #include "access/xact.h" +#include "storage/ipc.h" #include "storage/proc.h" #include "storage/sinval.h" #include "storage/spin.h" @@ -73,11 +58,11 @@ int DeadlockTimeout = 1000; PROC *MyProc = NULL; /* - * This spinlock protects the freelist of recycled PROC structures and the - * bitmap of free semaphores. We cannot use an LWLock because the LWLock - * manager depends on already having a PROC and a wait semaphore! But these - * structures are touched relatively infrequently (only at backend startup - * or shutdown) and not for very long, so a spinlock is okay. + * This spinlock protects the freelist of recycled PROC structures. + * We cannot use an LWLock because the LWLock manager depends on already + * having a PROC and a wait semaphore! But these structures are touched + * relatively infrequently (only at backend startup or shutdown) and not for + * very long, so a spinlock is okay. */ static slock_t *ProcStructLock = NULL; @@ -90,21 +75,24 @@ static bool waitingForSignal = false; static void ProcKill(void); static void DummyProcKill(void); -static void ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum); -static void ProcFreeSem(IpcSemaphoreId semId, int semNum); -static void ZeroProcSemaphore(PROC *proc); -static void ProcFreeAllSemaphores(void); /* + * Report number of semaphores needed by InitProcGlobal. + */ +int +ProcGlobalSemas(int maxBackends) +{ + /* We need a sema per backend, plus one for the dummy process. */ + return maxBackends + 1; +} + +/* * InitProcGlobal - * initializes the global process table. We put it here so that - * the postmaster can do this initialization. (ProcFreeAllSemaphores needs - * to read this table on exiting the postmaster. If we have the first - * backend do this, starting up and killing the postmaster without - * starting any backends will be a problem.) + * the postmaster can do this initialization. * - * We also allocate all the per-process semaphores we will need to support + * We also create all the per-process semaphores we will need to support * the requested number of backends. We used to allocate semaphores * only when backends were actually started up, but that is bad because * it lets Postgres fail under load --- a lot of Unix systems are @@ -114,28 +102,19 @@ static void ProcFreeAllSemaphores(void); * of backends immediately at initialization --- if the sysadmin has set * MaxBackends higher than his kernel will support, he'll find out sooner * rather than later. + * + * Another reason for creating semaphores here is that the semaphore + * implementation typically requires us to create semaphores in the + * postmaster, not in backends. */ void InitProcGlobal(int maxBackends) { - int semMapEntries; - Size procGlobalSize; bool found = false; - /* - * Compute size for ProcGlobal structure. Note we need one more sema - * besides those used for regular backends; this is accounted for in - * the PROC_SEM_MAP_ENTRIES macro. (We do it that way so that other - * modules that use PROC_SEM_MAP_ENTRIES(maxBackends) to size data - * structures don't have to know about this explicitly.) - */ - Assert(maxBackends > 0); - semMapEntries = PROC_SEM_MAP_ENTRIES(maxBackends); - procGlobalSize = sizeof(PROC_HDR) + (semMapEntries - 1) *sizeof(SEM_MAP_ENTRY); - /* Create or attach to the ProcGlobal shared structure */ ProcGlobal = (PROC_HDR *) - ShmemInitStruct("Proc Header", procGlobalSize, &found); + ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found); /* -------------------- * We're the first - initialize. @@ -148,47 +127,33 @@ InitProcGlobal(int maxBackends) int i; ProcGlobal->freeProcs = INVALID_OFFSET; - ProcGlobal->semMapEntries = semMapEntries; - - for (i = 0; i < semMapEntries; i++) - { - ProcGlobal->procSemMap[i].procSemId = -1; - ProcGlobal->procSemMap[i].freeSemMap = 0; - } - - /* - * Arrange to delete semas on exit --- set this up now so that we - * will clean up if pre-allocation fails. We use our own - * freeproc, rather than IpcSemaphoreCreate's removeOnExit option, - * because we don't want to fill up the on_shmem_exit list with a - * separate entry for each semaphore set. - */ - on_shmem_exit(ProcFreeAllSemaphores, 0); /* - * Pre-create the semaphores. + * Pre-create the PROC structures and create a semaphore for each. */ - for (i = 0; i < semMapEntries; i++) + for (i = 0; i < maxBackends; i++) { - IpcSemaphoreId semId; - - semId = IpcSemaphoreCreate(PROC_NSEMS_PER_SET, - IPCProtection, - 1, - false); - ProcGlobal->procSemMap[i].procSemId = semId; + PROC *proc; + + proc = (PROC *) ShmemAlloc(sizeof(PROC)); + if (!proc) + elog(FATAL, "cannot create new proc: out of memory"); + MemSet(proc, 0, sizeof(PROC)); + PGSemaphoreCreate(&proc->sem); + proc->links.next = ProcGlobal->freeProcs; + ProcGlobal->freeProcs = MAKE_OFFSET(proc); } /* * Pre-allocate a PROC structure for dummy (checkpoint) processes, - * and reserve the last sema of the precreated semas for it. + * too. This does not get linked into the freeProcs list. */ DummyProc = (PROC *) ShmemAlloc(sizeof(PROC)); + if (!DummyProc) + elog(FATAL, "cannot create new proc: out of memory"); + MemSet(DummyProc, 0, sizeof(PROC)); DummyProc->pid = 0; /* marks DummyProc as not in use */ - i = semMapEntries - 1; - ProcGlobal->procSemMap[i].freeSemMap |= 1 << (PROC_NSEMS_PER_SET - 1); - DummyProc->sem.semId = ProcGlobal->procSemMap[i].procSemId; - DummyProc->sem.semNum = PROC_NSEMS_PER_SET - 1; + PGSemaphoreCreate(&DummyProc->sem); /* Create ProcStructLock spinlock, too */ ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t)); @@ -197,7 +162,7 @@ InitProcGlobal(int maxBackends) } /* - * InitProcess -- create a per-process data structure for this backend + * InitProcess -- initialize a per-process data structure for this backend */ void InitProcess(void) @@ -217,7 +182,8 @@ InitProcess(void) elog(ERROR, "InitProcess: you already exist"); /* - * try to get a proc struct from the free list first + * Try to get a proc struct from the free list. If this fails, + * we must be out of PROC structures (not to mention semaphores). */ SpinLockAcquire(ProcStructLock); @@ -232,20 +198,19 @@ InitProcess(void) else { /* - * have to allocate a new one. + * If we reach here, all the PROCs are in use. This is one of + * the possible places to detect "too many backends", so give the + * standard error message. */ SpinLockRelease(ProcStructLock); - MyProc = (PROC *) ShmemAlloc(sizeof(PROC)); - if (!MyProc) - elog(FATAL, "cannot create new proc: out of memory"); + elog(FATAL, "Sorry, too many clients already"); } /* - * Initialize all fields of MyProc. + * Initialize all fields of MyProc, except for the semaphore which + * was prepared for us by InitProcGlobal. */ SHMQueueElemInit(&(MyProc->links)); - MyProc->sem.semId = -1; /* no wait-semaphore acquired yet */ - MyProc->sem.semNum = -1; MyProc->errType = STATUS_OK; MyProc->xid = InvalidTransactionId; MyProc->xmin = InvalidTransactionId; @@ -265,18 +230,10 @@ InitProcess(void) on_shmem_exit(ProcKill, 0); /* - * Set up a wait-semaphore for the proc. (We rely on ProcKill to - * clean up MyProc if this fails.) - */ - if (IsUnderPostmaster) - ProcGetNewSemIdAndNum(&MyProc->sem.semId, &MyProc->sem.semNum); - - /* * We might be reusing a semaphore that belonged to a failed process. * So be careful and reinitialize its value here. */ - if (MyProc->sem.semId >= 0) - ZeroProcSemaphore(MyProc); + PGSemaphoreReset(&MyProc->sem); /* * Now that we have a PROC, we could try to acquire locks, so @@ -340,25 +297,7 @@ InitDummyProcess(void) * We might be reusing a semaphore that belonged to a failed process. * So be careful and reinitialize its value here. */ - if (MyProc->sem.semId >= 0) - ZeroProcSemaphore(MyProc); -} - -/* - * Initialize the proc's wait-semaphore to count zero. - */ -static void -ZeroProcSemaphore(PROC *proc) -{ - union semun semun; - - semun.val = 0; - if (semctl(proc->sem.semId, proc->sem.semNum, SETVAL, semun) < 0) - { - fprintf(stderr, "ZeroProcSemaphore: semctl(id=%d,SETVAL) failed: %s\n", - proc->sem.semId, strerror(errno)); - proc_exit(255); - } + PGSemaphoreReset(&MyProc->sem); } /* @@ -397,7 +336,7 @@ LockWaitCancel(void) * to zero. Otherwise, our next attempt to wait for a lock will fall * through prematurely. */ - ZeroProcSemaphore(MyProc); + PGSemaphoreReset(&MyProc->sem); /* * Return true even if we were kicked off the lock before we were able @@ -463,11 +402,7 @@ ProcKill(void) SpinLockAcquire(ProcStructLock); - /* Free up my wait semaphore, if I got one */ - if (MyProc->sem.semId >= 0) - ProcFreeSem(MyProc->sem.semId, MyProc->sem.semNum); - - /* Add PROC struct to freelist so space can be recycled in future */ + /* Return PROC structure (and semaphore) to freelist */ MyProc->links.next = procglobal->freeProcs; procglobal->freeProcs = MAKE_OFFSET(MyProc); @@ -701,10 +636,10 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable, elog(FATAL, "ProcSleep: Unable to set timer for process wakeup"); /* - * If someone wakes us between LWLockRelease and IpcSemaphoreLock, - * IpcSemaphoreLock will not block. The wakeup is "saved" by the + * If someone wakes us between LWLockRelease and PGSemaphoreLock, + * PGSemaphoreLock will not block. The wakeup is "saved" by the * semaphore implementation. Note also that if HandleDeadLock is - * invoked but does not detect a deadlock, IpcSemaphoreLock() will + * invoked but does not detect a deadlock, PGSemaphoreLock() will * continue to wait. There used to be a loop here, but it was useless * code... * @@ -714,7 +649,7 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable, * here. We don't, because we have no state-change work to do after * being granted the lock (the grantor did it all). */ - IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum, true); + PGSemaphoreLock(&MyProc->sem, true); /* * Disable the timer, if it's still running @@ -775,7 +710,7 @@ ProcWakeup(PROC *proc, int errType) proc->errType = errType; /* And awaken it */ - IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum); + PGSemaphoreUnlock(&proc->sem); return retProc; } @@ -914,7 +849,7 @@ HandleDeadLock(SIGNAL_ARGS) * Unlock my semaphore so that the interrupted ProcSleep() call can * finish. */ - IpcSemaphoreUnlock(MyProc->sem.semId, MyProc->sem.semNum); + PGSemaphoreUnlock(&MyProc->sem); /* * We're done here. Transaction abort caused by the error that @@ -943,7 +878,7 @@ void ProcWaitForSignal(void) { waitingForSignal = true; - IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum, true); + PGSemaphoreLock(&MyProc->sem, true); waitingForSignal = false; } @@ -957,7 +892,7 @@ ProcWaitForSignal(void) void ProcCancelWaitForSignal(void) { - ZeroProcSemaphore(MyProc); + PGSemaphoreReset(&MyProc->sem); waitingForSignal = false; } @@ -970,7 +905,7 @@ ProcSendSignal(BackendId procId) PROC *proc = BackendIdGetProc(procId); if (proc != NULL) - IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum); + PGSemaphoreUnlock(&proc->sem); } @@ -1035,110 +970,3 @@ disable_sigalrm_interrupt(void) return true; } - - -/***************************************************************************** - * - *****************************************************************************/ - -/* - * ProcGetNewSemIdAndNum - - * scan the free semaphore bitmap and allocate a single semaphore from - * a semaphore set. - */ -static void -ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum) -{ - /* use volatile pointer to prevent code rearrangement */ - volatile PROC_HDR *procglobal = ProcGlobal; - int semMapEntries = procglobal->semMapEntries; - volatile SEM_MAP_ENTRY *procSemMap = procglobal->procSemMap; - int32 fullmask = (1 << PROC_NSEMS_PER_SET) - 1; - int i; - - SpinLockAcquire(ProcStructLock); - - for (i = 0; i < semMapEntries; i++) - { - int mask = 1; - int j; - - if (procSemMap[i].freeSemMap == fullmask) - continue; /* this set is fully allocated */ - if (procSemMap[i].procSemId < 0) - continue; /* this set hasn't been initialized */ - - for (j = 0; j < PROC_NSEMS_PER_SET; j++) - { - if ((procSemMap[i].freeSemMap & mask) == 0) - { - /* A free semaphore found. Mark it as allocated. */ - procSemMap[i].freeSemMap |= mask; - - *semId = procSemMap[i].procSemId; - *semNum = j; - - SpinLockRelease(ProcStructLock); - - return; - } - mask <<= 1; - } - } - - SpinLockRelease(ProcStructLock); - - /* - * If we reach here, all the semaphores are in use. This is one of - * the possible places to detect "too many backends", so give the - * standard error message. (Whether we detect it here or in sinval.c - * depends on whether MaxBackends is a multiple of - * PROC_NSEMS_PER_SET.) - */ - elog(FATAL, "Sorry, too many clients already"); -} - -/* - * ProcFreeSem - - * free up our semaphore in the semaphore set. - * - * Caller is assumed to hold ProcStructLock. - */ -static void -ProcFreeSem(IpcSemaphoreId semId, int semNum) -{ - int32 mask; - int i; - int semMapEntries = ProcGlobal->semMapEntries; - - mask = ~(1 << semNum); - - for (i = 0; i < semMapEntries; i++) - { - if (ProcGlobal->procSemMap[i].procSemId == semId) - { - ProcGlobal->procSemMap[i].freeSemMap &= mask; - return; - } - } - /* can't elog here!!! */ - fprintf(stderr, "ProcFreeSem: no ProcGlobal entry for semId %d\n", semId); -} - -/* - * ProcFreeAllSemaphores - - * called at shmem_exit time, ie when exiting the postmaster or - * destroying shared state for a failed set of backends. - * Free up all the semaphores allocated to the lmgrs of the backends. - */ -static void -ProcFreeAllSemaphores(void) -{ - int i; - - for (i = 0; i < ProcGlobal->semMapEntries; i++) - { - if (ProcGlobal->procSemMap[i].procSemId >= 0) - IpcSemaphoreKill(ProcGlobal->procSemMap[i].procSemId); - } -} diff --git a/src/backend/storage/lmgr/spin.c b/src/backend/storage/lmgr/spin.c index f5bb8e3488..c384c4b158 100644 --- a/src/backend/storage/lmgr/spin.c +++ b/src/backend/storage/lmgr/spin.c @@ -6,7 +6,8 @@ * * For machines that have test-and-set (TAS) instructions, s_lock.h/.c * define the spinlock implementation. This file contains only a stub - * implementation for spinlocks using SysV semaphores. The semaphore method + * implementation for spinlocks using PGSemaphores. Unless semaphores + * are implemented in a way that doesn't involve a kernel call, this * is too slow to be very useful :-( * * @@ -15,143 +16,49 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/spin.c,v 1.7 2001/11/05 17:46:28 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/spin.c,v 1.8 2002/05/05 00:03:28 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" -#include - -#include "storage/ipc.h" -/* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */ -#ifdef HAVE_SYS_SEM_H -#include -#endif - -#if defined(__darwin__) -#include "port/darwin/sem.h" -#endif - #include "storage/lwlock.h" -#include "storage/proc.h" +#include "storage/pg_sema.h" #include "storage/spin.h" #ifdef HAS_TEST_AND_SET /* - * CreateSpinlocks --- create and initialize spinlocks during startup + * Report number of semaphores needed to support spinlocks. */ -void -CreateSpinlocks(void) +int +SpinlockSemas(void) { - /* no-op when we have TAS spinlocks */ + return 0; } #else /* !HAS_TEST_AND_SET */ /* - * No TAS, so spinlocks are implemented using SysV semaphores. - * - * Typedef slock_t stores the semId and sem number of the sema to use. - * The semas needed are created by CreateSpinlocks and doled out by - * s_init_lock_sema. - * - * Since many systems have a rather small SEMMSL limit on semas per set, - * we allocate the semaphores required in sets of SPINLOCKS_PER_SET semas. - * This value is deliberately made equal to PROC_NSEMS_PER_SET so that all - * sema sets allocated by Postgres will be the same size; that eases the - * semaphore-recycling logic in IpcSemaphoreCreate(). - * - * Note that the SpinLockIds array is not in shared memory; it is filled - * by the postmaster and then inherited through fork() by backends. This - * is OK because its contents do not change after shmem initialization. + * No TAS, so spinlocks are implemented as PGSemaphores. */ -#define SPINLOCKS_PER_SET PROC_NSEMS_PER_SET - -static IpcSemaphoreId *SpinLockIds = NULL; - -static int numSpinSets = 0; /* number of sema sets used */ -static int numSpinLocks = 0; /* total number of semas allocated */ -static int nextSpinLock = 0; /* next free spinlock index */ - -static void SpinFreeAllSemaphores(void); - /* - * CreateSpinlocks --- create and initialize spinlocks during startup + * Report number of semaphores needed to support spinlocks. */ -void -CreateSpinlocks(void) +int +SpinlockSemas(void) { - int i; - - if (SpinLockIds == NULL) - { - /* - * Compute number of spinlocks needed. It would be cleaner to - * distribute this logic into the affected modules, similar to the - * way shmem space estimation is handled. - * - * For now, though, we just need a few spinlocks (10 should be - * plenty) plus one for each LWLock. - */ - numSpinLocks = NumLWLocks() + 10; - - /* might as well round up to a multiple of SPINLOCKS_PER_SET */ - numSpinSets = (numSpinLocks - 1) / SPINLOCKS_PER_SET + 1; - numSpinLocks = numSpinSets * SPINLOCKS_PER_SET; - - SpinLockIds = (IpcSemaphoreId *) - malloc(numSpinSets * sizeof(IpcSemaphoreId)); - Assert(SpinLockIds != NULL); - } - - for (i = 0; i < numSpinSets; i++) - SpinLockIds[i] = -1; - /* - * Arrange to delete semas on exit --- set this up now so that we will - * clean up if allocation fails. We use our own freeproc, rather than - * IpcSemaphoreCreate's removeOnExit option, because we don't want to - * fill up the on_shmem_exit list with a separate entry for each - * semaphore set. + * It would be cleaner to distribute this logic into the affected modules, + * similar to the way shmem space estimation is handled. + * + * For now, though, we just need a few spinlocks (10 should be + * plenty) plus one for each LWLock. */ - on_shmem_exit(SpinFreeAllSemaphores, 0); - - /* Create sema sets and set all semas to count 1 */ - for (i = 0; i < numSpinSets; i++) - { - SpinLockIds[i] = IpcSemaphoreCreate(SPINLOCKS_PER_SET, - IPCProtection, - 1, - false); - } - - /* Init counter for allocating dynamic spinlocks */ - nextSpinLock = 0; -} - -/* - * SpinFreeAllSemaphores - - * called at shmem_exit time, ie when exiting the postmaster or - * destroying shared state for a failed set of backends. - * Free up all the semaphores allocated for spinlocks. - */ -static void -SpinFreeAllSemaphores(void) -{ - int i; - - for (i = 0; i < numSpinSets; i++) - { - if (SpinLockIds[i] >= 0) - IpcSemaphoreKill(SpinLockIds[i]); - } - free(SpinLockIds); - SpinLockIds = NULL; + return NumLWLocks() + 10; } /* @@ -161,30 +68,28 @@ SpinFreeAllSemaphores(void) void s_init_lock_sema(volatile slock_t *lock) { - if (nextSpinLock >= numSpinLocks) - elog(FATAL, "s_init_lock_sema: not enough semaphores"); - lock->semId = SpinLockIds[nextSpinLock / SPINLOCKS_PER_SET]; - lock->sem = nextSpinLock % SPINLOCKS_PER_SET; - nextSpinLock++; + PGSemaphoreCreate((PGSemaphore) lock); } void s_unlock_sema(volatile slock_t *lock) { - IpcSemaphoreUnlock(lock->semId, lock->sem); + PGSemaphoreUnlock((PGSemaphore) lock); } bool s_lock_free_sema(volatile slock_t *lock) { - return IpcSemaphoreGetValue(lock->semId, lock->sem) > 0; + /* We don't currently use S_LOCK_FREE anyway */ + elog(ERROR, "spin.c does not support S_LOCK_FREE()"); + return false; } int tas_sema(volatile slock_t *lock) { /* Note that TAS macros return 0 if *success* */ - return !IpcSemaphoreTryLock(lock->semId, lock->sem); + return !PGSemaphoreTryLock((PGSemaphore) lock); } #endif /* !HAS_TEST_AND_SET */ diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 5305f1b2f7..75b740115c 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/tcop/postgres.c,v 1.262 2002/04/20 21:56:15 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/tcop/postgres.c,v 1.263 2002/05/05 00:03:28 tgl Exp $ * * NOTES * this is the "main" module of the postgres backend and @@ -49,11 +49,12 @@ #include "parser/parse.h" #include "parser/parser.h" #include "rewrite/rewriteHandler.h" +#include "storage/ipc.h" +#include "storage/proc.h" #include "tcop/fastpath.h" #include "tcop/pquery.h" #include "tcop/tcopprot.h" #include "tcop/utility.h" -#include "storage/proc.h" #include "utils/exc.h" #include "utils/guc.h" #include "utils/memutils.h" @@ -1692,7 +1693,7 @@ PostgresMain(int argc, char *argv[], const char *username) if (!IsUnderPostmaster) { puts("\nPOSTGRES backend interactive interface "); - puts("$Revision: 1.262 $ $Date: 2002/04/20 21:56:15 $\n"); + puts("$Revision: 1.263 $ $Date: 2002/05/05 00:03:28 $\n"); } /* diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index dedbb9f68d..572e21ba83 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/error/elog.c,v 1.96 2002/04/21 00:22:52 ishii Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/error/elog.c,v 1.97 2002/05/05 00:03:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -30,6 +30,7 @@ #include "libpq/libpq.h" #include "libpq/pqformat.h" #include "miscadmin.h" +#include "storage/ipc.h" #include "storage/proc.h" #include "tcop/tcopprot.h" #include "utils/memutils.h" diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index b708ffb279..91c5a3eb2f 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/init/miscinit.c,v 1.88 2002/05/03 20:43:30 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/init/miscinit.c,v 1.89 2002/05/05 00:03:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -32,6 +32,8 @@ #include "catalog/pg_shadow.h" #include "libpq/libpq-be.h" #include "miscadmin.h" +#include "storage/ipc.h" +#include "storage/pg_shmem.h" #include "utils/builtins.h" #include "utils/guc.h" #include "utils/lsyscache.h" @@ -819,25 +821,24 @@ CreateLockFile(const char *filename, bool amPostmaster, if (isDDLock) { char *ptr; - unsigned long shmKey, - shmId; + unsigned long id1, + id2; ptr = strchr(buffer, '\n'); if (ptr != NULL && (ptr = strchr(ptr + 1, '\n')) != NULL) { ptr++; - if (sscanf(ptr, "%lu %lu", &shmKey, &shmId) == 2) + if (sscanf(ptr, "%lu %lu", &id1, &id2) == 2) { - if (SharedMemoryIsInUse((IpcMemoryKey) shmKey, - (IpcMemoryId) shmId)) + if (PGSharedMemoryIsInUse(id1, id2)) { fprintf(stderr, - "Found a pre-existing shared memory block (ID %d) still in use.\n" + "Found a pre-existing shared memory block (key %lu, id %lu) still in use.\n" "If you're sure there are no old backends still running,\n" "remove the shared memory block with ipcrm(1), or just\n" "delete \"%s\".\n", - (int) shmId, filename); + id1, id2, filename); return false; } } @@ -941,11 +942,11 @@ TouchSocketLockFile(void) * This may be called multiple times in the life of a postmaster, if we * delete and recreate shmem due to backend crash. Therefore, be prepared * to overwrite existing information. (As of 7.1, a postmaster only creates - * one shm seg anyway; but for the purposes here, if we did have more than + * one shm seg at a time; but for the purposes here, if we did have more than * one then any one of them would do anyway.) */ void -RecordSharedMemoryInLockFile(IpcMemoryKey shmKey, IpcMemoryId shmId) +RecordSharedMemoryInLockFile(unsigned long id1, unsigned long id2) { int fd; int len; @@ -988,11 +989,10 @@ RecordSharedMemoryInLockFile(IpcMemoryKey shmKey, IpcMemoryId shmId) ptr++; /* - * Append shm key and ID. Format to try to keep it the same length + * Append key information. Format to try to keep it the same length * always (trailing junk won't hurt, but might confuse humans). */ - sprintf(ptr, "%9lu %9lu\n", - (unsigned long) shmKey, (unsigned long) shmId); + sprintf(ptr, "%9lu %9lu\n", id1, id2); /* * And rewrite the data. Since we write in a single kernel call, this diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index a2d9758cc1..95974c6d6a 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.103 2002/04/27 21:24:34 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.104 2002/05/05 00:03:29 tgl Exp $ * * *------------------------------------------------------------------------- @@ -32,6 +32,7 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "storage/backendid.h" +#include "storage/ipc.h" #include "storage/proc.h" #include "storage/sinval.h" #include "storage/smgr.h" @@ -181,7 +182,7 @@ InitCommunication(void) * postmaster. Create private "shmem" and semaphores. Setting * MaxBackends = 16 is arbitrary. */ - CreateSharedMemoryAndSemaphores(true, 16); + CreateSharedMemoryAndSemaphores(true, 16, 0); } } diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 99f7fae88b..09e1c0fe63 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -12,7 +12,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: miscadmin.h,v 1.102 2002/04/04 04:25:51 momjian Exp $ + * $Id: miscadmin.h,v 1.103 2002/05/05 00:03:29 tgl Exp $ * * NOTES * some of the information in this file should be moved to @@ -23,7 +23,8 @@ #ifndef MISCADMIN_H #define MISCADMIN_H -#include "storage/ipc.h" +#include + /***************************************************************************** * System interrupt and critical section handling @@ -291,8 +292,8 @@ extern void BaseInit(void); extern bool CreateDataDirLockFile(const char *datadir, bool amPostmaster); extern bool CreateSocketLockFile(const char *socketfile, bool amPostmaster); extern void TouchSocketLockFile(void); -extern void RecordSharedMemoryInLockFile(IpcMemoryKey shmKey, - IpcMemoryId shmId); +extern void RecordSharedMemoryInLockFile(unsigned long id1, + unsigned long id2); extern void ValidatePgVersion(const char *path); diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 80b904181d..b3a5b8d6e1 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -8,7 +8,7 @@ * or in pg_config.h afterwards. Of course, if you edit pg_config.h, then your * changes will be overwritten the next time you run configure. * - * $Id: pg_config.h.in,v 1.23 2002/04/21 19:48:19 thomas Exp $ + * $Id: pg_config.h.in,v 1.24 2002/05/05 00:03:29 tgl Exp $ */ #ifndef PG_CONFIG_H @@ -711,6 +711,15 @@ extern int fdatasync(int fildes); /* Define if you have syslog() */ #undef HAVE_SYSLOG +/* Define exactly one of these symbols to select semaphore implementation */ +#undef USE_NAMED_POSIX_SEMAPHORES +#undef USE_UNNAMED_POSIX_SEMAPHORES +#undef USE_SYSV_SEMAPHORES + +/* Define exactly one of these symbols to select shared-mem implementation */ +#undef USE_SYSV_SHARED_MEMORY + + /* *------------------------------------------------------------------------ * Part 4: pull in system-specific declarations. diff --git a/src/include/storage/ipc.h b/src/include/storage/ipc.h index 6a96ea2d12..5ebee36b9f 100644 --- a/src/include/storage/ipc.h +++ b/src/include/storage/ipc.h @@ -3,63 +3,21 @@ * ipc.h * POSTGRES inter-process communication definitions. * + * This file is misnamed, as it no longer has much of anything directly + * to do with IPC. The functionality here is concerned with managing + * exit-time cleanup for either a postmaster or a backend. + * * * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: ipc.h,v 1.54 2001/11/05 17:46:35 momjian Exp $ + * $Id: ipc.h,v 1.55 2002/05/05 00:03:29 tgl Exp $ * - * Some files that would normally need to include only sys/ipc.h must - * instead include this file because on Ultrix, sys/ipc.h is not designed - * to be included multiple times. This file (by virtue of the ifndef IPC_H) - * is. *------------------------------------------------------------------------- */ #ifndef IPC_H #define IPC_H -#include -#ifdef HAVE_SYS_IPC_H -#include -#endif /* HAVE_SYS_IPC_H */ - -#ifndef HAVE_UNION_SEMUN -union semun -{ - int val; - struct semid_ds *buf; - unsigned short *array; -}; -#endif - - -/* generic IPC definitions */ - -#define IPCProtection (0600) /* access/modify by user only */ - -/* semaphore definitions */ - -typedef uint32 IpcSemaphoreKey; /* semaphore key passed to semget(2) */ -typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */ - -#define IPC_NMAXSEM 32 /* maximum number of semaphores per semID */ - -#define PGSemaMagic 537 /* must be less than SEMVMX */ - -/* shared memory definitions */ - -typedef uint32 IpcMemoryKey; /* shared memory key passed to shmget(2) */ -typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */ - -typedef struct PGShmemHeader /* standard header for all Postgres shmem */ -{ - int32 magic; /* magic # to identify Postgres segments */ -#define PGShmemMagic 679834892 - pid_t creatorPID; /* PID of creating process */ - uint32 totalsize; /* total size of segment */ - uint32 freeoffset; /* offset to first free space */ -} PGShmemHeader; - /* ipc.c */ extern bool proc_exit_inprogress; @@ -70,24 +28,9 @@ extern void on_proc_exit(void (*function) (), Datum arg); extern void on_shmem_exit(void (*function) (), Datum arg); extern void on_exit_reset(void); -extern void IpcInitKeyAssignment(int port); - -extern IpcSemaphoreId IpcSemaphoreCreate(int numSems, int permission, - int semStartValue, - bool removeOnExit); -extern void IpcSemaphoreKill(IpcSemaphoreId semId); -extern void IpcSemaphoreLock(IpcSemaphoreId semId, int sem, bool interruptOK); -extern void IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem); -extern bool IpcSemaphoreTryLock(IpcSemaphoreId semId, int sem); -extern int IpcSemaphoreGetValue(IpcSemaphoreId semId, int sem); - -extern PGShmemHeader *IpcMemoryCreate(uint32 size, bool makePrivate, - int permission); - -extern bool SharedMemoryIsInUse(IpcMemoryKey shmKey, IpcMemoryId shmId); - /* ipci.c */ extern void CreateSharedMemoryAndSemaphores(bool makePrivate, - int maxBackends); + int maxBackends, + int port); #endif /* IPC_H */ diff --git a/src/include/storage/pg_sema.h b/src/include/storage/pg_sema.h new file mode 100644 index 0000000000..65c1936c29 --- /dev/null +++ b/src/include/storage/pg_sema.h @@ -0,0 +1,76 @@ +/*------------------------------------------------------------------------- + * + * pg_sema.h + * Platform-independent API for semaphores. + * + * PostgreSQL requires counting semaphores (the kind that keep track of + * multiple unlock operations, and will allow an equal number of subsequent + * lock operations before blocking). The underlying implementation is + * not the same on every platform. This file defines the API that must + * be provided by each port. + * + * + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $Id: pg_sema.h,v 1.1 2002/05/05 00:03:29 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#ifndef PG_SEMA_H +#define PG_SEMA_H + +/* + * PGSemaphoreData and pointer type PGSemaphore are the data structure + * representing an individual semaphore. The contents of PGSemaphoreData + * vary across implementations and must never be touched by platform- + * independent code. PGSemaphoreData structures are always allocated + * in shared memory (to support implementations where the data changes during + * lock/unlock). + * + * pg_config.h must define exactly one of the USE_xxx_SEMAPHORES symbols. + */ + +#ifdef USE_NAMED_POSIX_SEMAPHORES + +#include + +typedef sem_t *PGSemaphoreData; + +#endif + +#ifdef USE_UNNAMED_POSIX_SEMAPHORES + +#include + +typedef sem_t PGSemaphoreData; + +#endif + +#ifdef USE_SYSV_SEMAPHORES + +typedef struct PGSemaphoreData +{ + int semId; /* semaphore set identifier */ + int semNum; /* semaphore number within set */ +} PGSemaphoreData; + +#endif + +typedef PGSemaphoreData *PGSemaphore; + + +/* Module initialization (called during postmaster start or shmem reinit) */ +extern void PGReserveSemaphores(int maxSemas, int port); +/* Initialize a PGSemaphore structure to represent a sema with count 1 */ +extern void PGSemaphoreCreate(PGSemaphore sema); +/* Reset a previously-initialized PGSemaphore to have count 0 */ +extern void PGSemaphoreReset(PGSemaphore sema); +/* Lock a semaphore (decrement count), blocking if count would be < 0 */ +extern void PGSemaphoreLock(PGSemaphore sema, bool interruptOK); +/* Unlock a semaphore (increment count) */ +extern void PGSemaphoreUnlock(PGSemaphore sema); +/* Lock a semaphore only if able to do so without blocking */ +extern bool PGSemaphoreTryLock(PGSemaphore sema); + +#endif /* PG_SEMA_H */ diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h new file mode 100644 index 0000000000..fa35e63f72 --- /dev/null +++ b/src/include/storage/pg_shmem.h @@ -0,0 +1,44 @@ +/*------------------------------------------------------------------------- + * + * pg_shmem.h + * Platform-independent API for shared memory support. + * + * Every port is expected to support shared memory with approximately + * SysV-ish semantics; in particular, a memory block is not anonymous + * but has an ID, and we must be able to tell whether there are any + * remaining processes attached to a block of a specified ID. + * + * To simplify life for the SysV implementation, the ID is assumed to + * consist of two unsigned long values (these are key and ID in SysV + * terms). Other platforms may ignore the second value if they need + * only one ID number. + * + * + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $Id: pg_shmem.h,v 1.1 2002/05/05 00:03:29 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#ifndef PG_SHMEM_H +#define PG_SHMEM_H + +#include + + +typedef struct PGShmemHeader /* standard header for all Postgres shmem */ +{ + int32 magic; /* magic # to identify Postgres segments */ +#define PGShmemMagic 679834892 + pid_t creatorPID; /* PID of creating process */ + uint32 totalsize; /* total size of segment */ + uint32 freeoffset; /* offset to first free space */ +} PGShmemHeader; + + +extern PGShmemHeader *PGSharedMemoryCreate(uint32 size, bool makePrivate, + int port); +extern bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2); + +#endif /* PG_SHMEM_H */ diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index e1700b2372..e276fddf9c 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: proc.h,v 1.54 2001/11/06 00:38:26 tgl Exp $ + * $Id: proc.h,v 1.55 2002/05/05 00:03:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,16 +16,10 @@ #include "access/xlog.h" #include "storage/backendid.h" -#include "storage/ipc.h" #include "storage/lock.h" +#include "storage/pg_sema.h" -typedef struct -{ - IpcSemaphoreId semId; /* SysV semaphore set ID */ - int semNum; /* semaphore number within set */ -} SEMA; - /* * Each backend has a PROC struct in shared memory. There is also a list of * currently-unused PROC structs that will be reallocated to new backends. @@ -39,7 +33,7 @@ struct PROC /* proc->links MUST BE FIRST IN STRUCT (see ProcSleep,ProcWakeup,etc) */ SHM_QUEUE links; /* list link if process is in a list */ - SEMA sem; /* ONE semaphore to sleep on */ + PGSemaphoreData sem; /* ONE semaphore to sleep on */ int errType; /* STATUS_OK or STATUS_ERROR after wakeup */ TransactionId xid; /* transaction currently being executed by @@ -84,47 +78,11 @@ extern PROC *MyProc; /* * There is one ProcGlobal struct for the whole installation. - * - * PROC_NSEMS_PER_SET is the number of semaphores in each sys-V semaphore set - * we allocate. It must be no more than 32 (or however many bits in an int - * on your machine), or our free-semaphores bitmap won't work. It also must - * be *less than* your kernel's SEMMSL (max semaphores per set) parameter, - * which is often around 25. (Less than, because we allocate one extra sema - * in each set for identification purposes.) - * - * PROC_SEM_MAP_ENTRIES is the number of semaphore sets we need to allocate - * to keep track of up to maxBackends backends. Note that we need one extra - * semaphore (see storage/lmgr/proc.c), so the computation may look wrong, - * but it's right. */ -#define PROC_NSEMS_PER_SET 16 -#define PROC_SEM_MAP_ENTRIES(maxBackends) ((maxBackends)/PROC_NSEMS_PER_SET+1) - -typedef struct -{ - /* info about a single set of per-process semaphores */ - IpcSemaphoreId procSemId; - int32 freeSemMap; - - /* - * In freeSemMap, bit i is set if the i'th semaphore of this sema set - * is allocated to a process. (i counts from 0 at the LSB) - */ -} SEM_MAP_ENTRY; - typedef struct PROC_HDR { /* Head of list of free PROC structures */ SHMEM_OFFSET freeProcs; - - /* Info about semaphore sets used for per-process semaphores */ - int semMapEntries; - - /* - * VARIABLE LENGTH ARRAY: actual length is semMapEntries. THIS MUST BE - * LAST IN THE STRUCT DECLARATION. - */ - SEM_MAP_ENTRY procSemMap[1]; } PROC_HDR; @@ -135,6 +93,7 @@ extern int DeadlockTimeout; /* * Function Prototypes */ +extern int ProcGlobalSemas(int maxBackends); extern void InitProcGlobal(int maxBackends); extern void InitProcess(void); extern void InitDummyProcess(void); diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h index be79b51a17..41a79102d7 100644 --- a/src/include/storage/s_lock.h +++ b/src/include/storage/s_lock.h @@ -63,14 +63,14 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: s_lock.h,v 1.97 2002/01/29 15:44:42 tgl Exp $ + * $Id: s_lock.h,v 1.98 2002/05/05 00:03:29 tgl Exp $ * *------------------------------------------------------------------------- */ #ifndef S_LOCK_H #define S_LOCK_H -#include "storage/ipc.h" +#include "storage/pg_sema.h" #if defined(HAS_TEST_AND_SET) @@ -438,7 +438,6 @@ extern slock_t wc_tas(volatile slock_t *lock); * AIX (POWER) * * Note that slock_t on POWER/POWER2/PowerPC is int instead of char - * (see storage/ipc.h). */ #define TAS(lock) cs((int *) (lock), 0, 1) #endif /* _AIX */ @@ -462,17 +461,11 @@ extern slock_t wc_tas(volatile slock_t *lock); #else /* !HAS_TEST_AND_SET */ /* - * Fake spinlock implementation using SysV semaphores --- slow and prone + * Fake spinlock implementation using semaphores --- slow and prone * to fall foul of kernel limits on number of semaphores, so don't use this * unless you must! The subroutines appear in spin.c. */ - -typedef struct -{ - /* reference to semaphore used to implement this spinlock */ - IpcSemaphoreId semId; - int sem; -} slock_t; +typedef PGSemaphoreData slock_t; extern bool s_lock_free_sema(volatile slock_t *lock); extern void s_unlock_sema(volatile slock_t *lock); diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h index 6ec0552ca6..8279af0efb 100644 --- a/src/include/storage/spin.h +++ b/src/include/storage/spin.h @@ -40,7 +40,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: spin.h,v 1.19 2001/11/05 17:46:35 momjian Exp $ + * $Id: spin.h,v 1.20 2002/05/05 00:03:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -72,6 +72,6 @@ #define SpinLockFree(lock) S_LOCK_FREE(lock) -extern void CreateSpinlocks(void); +extern int SpinlockSemas(void); #endif /* SPIN_H */ diff --git a/src/pl/plperl/SPI.xs b/src/pl/plperl/SPI.xs index 61f59fa43c..ca8030ae3f 100644 --- a/src/pl/plperl/SPI.xs +++ b/src/pl/plperl/SPI.xs @@ -21,22 +21,6 @@ #include "catalog/pg_type.h" /* perl stuff */ -/* - * Evil Code Alert - * - * both posgreSQL and perl try to do 'the right thing' - * and provide union semun if the platform doesn't define - * it in a system header. - * psql uses HAVE_UNION_SEMUN - * perl uses HAS_UNION_SEMUN - * together, they cause compile errors. - * If we need it, the psql headers above will provide it. - * So we tell perl that we have it. - */ -#ifndef HAS_UNION_SEMUN -#define HAS_UNION_SEMUN -#endif - #include "EXTERN.h" #include "perl.h" #include "XSUB.h" diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c index 5ba80b7b32..d33188ae28 100644 --- a/src/pl/plperl/plperl.c +++ b/src/pl/plperl/plperl.c @@ -33,7 +33,7 @@ * ENHANCEMENTS, OR MODIFICATIONS. * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/pl/plperl/plperl.c,v 1.29 2002/03/06 18:50:26 momjian Exp $ + * $Header: /cvsroot/pgsql/src/pl/plperl/plperl.c,v 1.30 2002/05/05 00:03:29 tgl Exp $ * **********************************************************************/ @@ -62,22 +62,6 @@ #include "catalog/pg_type.h" /* perl stuff */ -/* - * Evil Code Alert - * - * both posgreSQL and perl try to do 'the right thing' - * and provide union semun if the platform doesn't define - * it in a system header. - * psql uses HAVE_UNION_SEMUN - * perl uses HAS_UNION_SEMUN - * together, they cause compile errors. - * If we need it, the psql headers above will provide it. - * So we tell perl that we have it. - */ -#ifndef HAS_UNION_SEMUN -#define HAS_UNION_SEMUN -#endif - #include "EXTERN.h" #include "perl.h" #include "XSUB.h" diff --git a/src/template/darwin b/src/template/darwin index 9685f4840a..3c9f0cbe8d 100644 --- a/src/template/darwin +++ b/src/template/darwin @@ -1,3 +1,6 @@ # -traditional-cpp means "don't use apple's cpp-precomp" on darwin # this should change to -no-cpp-precomp when that flag is implemented CC="$CC -traditional-cpp" + +# Select appropriate semaphore support +USE_NAMED_POSIX_SEMAPHORES=1 -- 2.11.0