1 /*-------------------------------------------------------------------------
4 * Implement PGSemaphores using SysV semaphore facilities
7 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * $Header: /cvsroot/pgsql/src/backend/port/sysv_sema.c,v 1.2 2002/06/20 20:29:33 momjian Exp $
13 *-------------------------------------------------------------------------
21 #include <sys/types.h>
28 #ifdef HAVE_KERNEL_OS_H
29 #include <kernel/OS.h>
32 #include "miscadmin.h"
33 #include "storage/ipc.h"
34 #include "storage/pg_sema.h"
37 #ifndef HAVE_UNION_SEMUN
42 unsigned short *array;
46 typedef uint32 IpcSemaphoreKey; /* semaphore key passed to semget(2) */
47 typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */
50 * SEMAS_PER_SET is the number of useful semaphores in each semaphore set
51 * we allocate. It must be *less than* your kernel's SEMMSL (max semaphores
52 * per set) parameter, which is often around 25. (Less than, because we
53 * allocate one extra sema in each set for identification purposes.)
55 #define SEMAS_PER_SET 16
57 #define IPCProtection (0600) /* access/modify by user only */
59 #define PGSemaMagic 537 /* must be less than SEMVMX */
62 static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so far */
63 static int numSemaSets; /* number of sema sets acquired so far */
64 static int maxSemaSets; /* allocated size of mySemaSets array */
65 static IpcSemaphoreKey nextSemaKey; /* next key to try using */
66 static int nextSemaNumber; /* next free sem num in last sema set */
69 static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
71 static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum,
73 static void IpcSemaphoreKill(IpcSemaphoreId semId);
74 static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum);
75 static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum);
76 static IpcSemaphoreId IpcSemaphoreCreate(int numSems);
77 static void ReleaseSemaphores(int status, Datum arg);
81 * InternalIpcSemaphoreCreate
83 * Attempt to create a new semaphore set with the specified key.
84 * Will fail (return -1) if such a set already exists.
86 * If we fail with a failure code other than collision-with-existing-set,
87 * print out an error and abort. Other types of errors suggest nonrecoverable
91 InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems)
95 semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection);
100 * Fail quietly if error indicates a collision with existing set.
101 * One would expect EEXIST, given that we said IPC_EXCL, but
102 * perhaps we could get a permission violation instead? Also,
103 * EIDRM might occur if an old set is slated for destruction but
106 if (errno == EEXIST || errno == EACCES
114 * Else complain and abort
116 fprintf(stderr, "IpcSemaphoreCreate: semget(key=%d, num=%d, 0%o) failed: %s\n",
117 (int) semKey, numSems, (IPC_CREAT | IPC_EXCL | IPCProtection),
122 "\nThis error does *not* mean that you have run out of disk space.\n"
124 "It occurs when either the system limit for the maximum number of\n"
125 "semaphore sets (SEMMNI), or the system wide maximum number of\n"
126 "semaphores (SEMMNS), would be exceeded. You need to raise the\n"
127 "respective kernel parameter. Alternatively, reduce PostgreSQL's\n"
128 "consumption of semaphores by reducing its max_connections parameter\n"
131 "The PostgreSQL Administrator's Guide contains more information about\n"
132 "configuring your system for PostgreSQL.\n\n",
142 * Initialize a semaphore to the specified value.
145 IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value)
150 if (semctl(semId, semNum, SETVAL, semun) < 0)
152 fprintf(stderr, "IpcSemaphoreInitialize: semctl(id=%d, %d, SETVAL, %d) failed: %s\n",
153 semId, semNum, value, strerror(errno));
157 "You possibly need to raise your kernel's SEMVMX value to be at least\n"
158 "%d. Look into the PostgreSQL documentation for details.\n",
166 * IpcSemaphoreKill(semId) - removes a semaphore set
169 IpcSemaphoreKill(IpcSemaphoreId semId)
173 semun.val = 0; /* unused, but keep compiler quiet */
175 if (semctl(semId, 0, IPC_RMID, semun) < 0)
176 fprintf(stderr, "IpcSemaphoreKill: semctl(%d, 0, IPC_RMID, ...) failed: %s\n",
177 semId, strerror(errno));
180 * We used to report a failure via elog(WARNING), but that's pretty
181 * pointless considering any client has long since disconnected ...
185 /* Get the current value (semval) of the semaphore */
187 IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum)
189 union semun dummy; /* for Solaris */
191 dummy.val = 0; /* unused */
193 return semctl(semId, semNum, GETVAL, dummy);
196 /* Get the PID of the last process to do semop() on the semaphore */
198 IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum)
200 union semun dummy; /* for Solaris */
202 dummy.val = 0; /* unused */
204 return semctl(semId, semNum, GETPID, dummy);
209 * Create a semaphore set with the given number of useful semaphores
210 * (an additional sema is actually allocated to serve as identifier).
211 * Dead Postgres sema sets are recycled if found, but we do not fail
212 * upon collision with non-Postgres sema sets.
214 * The idea here is to detect and re-use keys that may have been assigned
215 * by a crashed postmaster or backend.
217 static IpcSemaphoreId
218 IpcSemaphoreCreate(int numSems)
220 IpcSemaphoreId semId;
222 PGSemaphoreData mysema;
224 /* Loop till we find a free IPC key */
225 for (nextSemaKey++; ; nextSemaKey++)
229 /* Try to create new semaphore set */
230 semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
232 break; /* successful create */
234 /* See if it looks to be leftover from a dead Postgres process */
235 semId = semget(nextSemaKey, numSems + 1, 0);
237 continue; /* failed: must be some other app's */
238 if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic)
239 continue; /* sema belongs to a non-Postgres app */
242 * If the creator PID is my own PID or does not belong to any
243 * extant process, it's safe to zap it.
245 creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
247 continue; /* oops, GETPID failed */
248 if (creatorPID != getpid())
250 if (kill(creatorPID, 0) == 0 ||
252 continue; /* sema belongs to a live process */
256 * The sema set appears to be from a dead Postgres process, or
257 * from a previous cycle of life in this same process. Zap it, if
258 * possible. This probably shouldn't fail, but if it does, assume
259 * the sema set belongs to someone else after all, and continue
262 semun.val = 0; /* unused, but keep compiler quiet */
263 if (semctl(semId, 0, IPC_RMID, semun) < 0)
267 * Now try again to create the sema set.
269 semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
271 break; /* successful create */
274 * Can only get here if some other process managed to create the
275 * same sema key before we did. Let him have that one, loop
276 * around to try next key.
281 * OK, we created a new sema set. Mark it as created by this process.
282 * We do this by setting the spare semaphore to PGSemaMagic-1 and then
283 * incrementing it with semop(). That leaves it with value
284 * PGSemaMagic and sempid referencing this process.
286 IpcSemaphoreInitialize(semId, numSems, PGSemaMagic - 1);
287 mysema.semId = semId;
288 mysema.semNum = numSems;
289 PGSemaphoreUnlock(&mysema);
296 * PGReserveSemaphores --- initialize semaphore support
298 * This is called during postmaster start or shared memory reinitialization.
299 * It should do whatever is needed to be able to support up to maxSemas
300 * subsequent PGSemaphoreCreate calls. Also, if any system resources
301 * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
302 * callback to release them.
304 * The port number is passed for possible use as a key (for SysV, we use
305 * it to generate the starting semaphore key). In a standalone backend,
306 * zero will be passed.
308 * In the SysV implementation, we acquire semaphore sets on-demand; the
309 * maxSemas parameter is just used to size the array that keeps track of
310 * acquired sets for subsequent releasing.
313 PGReserveSemaphores(int maxSemas, int port)
315 maxSemaSets = (maxSemas + SEMAS_PER_SET-1) / SEMAS_PER_SET;
316 mySemaSets = (IpcSemaphoreId *)
317 malloc(maxSemaSets * sizeof(IpcSemaphoreId));
318 if (mySemaSets == NULL)
319 elog(PANIC, "Out of memory in PGReserveSemaphores");
321 nextSemaKey = port * 1000;
322 nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */
324 on_shmem_exit(ReleaseSemaphores, 0);
328 * Release semaphores at shutdown or shmem reinitialization
330 * (called as an on_shmem_exit callback, hence funny argument list)
333 ReleaseSemaphores(int status, Datum arg)
337 for (i = 0; i < numSemaSets; i++)
338 IpcSemaphoreKill(mySemaSets[i]);
345 * Initialize a PGSemaphore structure to represent a sema with count 1
348 PGSemaphoreCreate(PGSemaphore sema)
350 /* Can't do this in a backend, because static state is postmaster's */
351 Assert(!IsUnderPostmaster);
353 if (nextSemaNumber >= SEMAS_PER_SET)
355 /* Time to allocate another semaphore set */
356 if (numSemaSets >= maxSemaSets)
357 elog(PANIC, "PGSemaphoreCreate: too many semaphores created");
358 mySemaSets[numSemaSets] = IpcSemaphoreCreate(SEMAS_PER_SET);
362 /* Assign the next free semaphore in the current set */
363 sema->semId = mySemaSets[numSemaSets-1];
364 sema->semNum = nextSemaNumber++;
365 /* Initialize it to count 1 */
366 IpcSemaphoreInitialize(sema->semId, sema->semNum, 1);
372 * Reset a previously-initialized PGSemaphore to have count 0
375 PGSemaphoreReset(PGSemaphore sema)
377 IpcSemaphoreInitialize(sema->semId, sema->semNum, 0);
383 * Lock a semaphore (decrement count), blocking if count would be < 0
386 PGSemaphoreLock(PGSemaphore sema, bool interruptOK)
391 sops.sem_op = -1; /* decrement */
393 sops.sem_num = sema->semNum;
396 * Note: if errStatus is -1 and errno == EINTR then it means we
397 * returned from the operation prematurely because we were sent a
398 * signal. So we try and lock the semaphore again.
400 * Each time around the loop, we check for a cancel/die interrupt. We
401 * assume that if such an interrupt comes in while we are waiting, it
402 * will cause the semop() call to exit with errno == EINTR, so that we
403 * will be able to service the interrupt (if not in a critical section
406 * Once we acquire the lock, we do NOT check for an interrupt before
407 * returning. The caller needs to be able to record ownership of the
408 * lock before any interrupt can be accepted.
410 * There is a window of a few instructions between CHECK_FOR_INTERRUPTS
411 * and entering the semop() call. If a cancel/die interrupt occurs in
412 * that window, we would fail to notice it until after we acquire the
413 * lock (or get another interrupt to escape the semop()). We can
414 * avoid this problem by temporarily setting ImmediateInterruptOK to
415 * true before we do CHECK_FOR_INTERRUPTS; then, a die() interrupt in
416 * this interval will execute directly. However, there is a huge
417 * pitfall: there is another window of a few instructions after the
418 * semop() before we are able to reset ImmediateInterruptOK. If an
419 * interrupt occurs then, we'll lose control, which means that the
420 * lock has been acquired but our caller did not get a chance to
421 * record the fact. Therefore, we only set ImmediateInterruptOK if the
422 * caller tells us it's OK to do so, ie, the caller does not need to
423 * record acquiring the lock. (This is currently true for lockmanager
424 * locks, since the process that granted us the lock did all the
425 * necessary state updates. It's not true for SysV semaphores used to
426 * implement LW locks or emulate spinlocks --- but the wait time for
427 * such locks should not be very long, anyway.)
431 ImmediateInterruptOK = interruptOK;
432 CHECK_FOR_INTERRUPTS();
433 errStatus = semop(sema->semId, &sops, 1);
434 ImmediateInterruptOK = false;
435 } while (errStatus < 0 && errno == EINTR);
439 fprintf(stderr, "PGSemaphoreLock: semop(id=%d) failed: %s\n",
440 sema->semId, strerror(errno));
448 * Unlock a semaphore (increment count)
451 PGSemaphoreUnlock(PGSemaphore sema)
456 sops.sem_op = 1; /* increment */
458 sops.sem_num = sema->semNum;
461 * Note: if errStatus is -1 and errno == EINTR then it means we
462 * returned from the operation prematurely because we were sent a
463 * signal. So we try and unlock the semaphore again. Not clear this
464 * can really happen, but might as well cope.
468 errStatus = semop(sema->semId, &sops, 1);
469 } while (errStatus < 0 && errno == EINTR);
473 fprintf(stderr, "PGSemaphoreUnlock: semop(id=%d) failed: %s\n",
474 sema->semId, strerror(errno));
482 * Lock a semaphore only if able to do so without blocking
485 PGSemaphoreTryLock(PGSemaphore sema)
490 sops.sem_op = -1; /* decrement */
491 sops.sem_flg = IPC_NOWAIT; /* but don't block */
492 sops.sem_num = sema->semNum;
495 * Note: if errStatus is -1 and errno == EINTR then it means we
496 * returned from the operation prematurely because we were sent a
497 * signal. So we try and lock the semaphore again.
501 errStatus = semop(sema->semId, &sops, 1);
502 } while (errStatus < 0 && errno == EINTR);
506 /* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
509 return false; /* failed to lock it */
511 #if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
512 if (errno == EWOULDBLOCK)
513 return false; /* failed to lock it */
515 /* Otherwise we got trouble */
516 fprintf(stderr, "PGSemaphoreTryLock: semop(id=%d) failed: %s\n",
517 sema->semId, strerror(errno));