1 /* flock.cc. NT specific implementation of advisory file locking.
3 Copyright 2003, 2008, 2009, 2010, 2011 Red Hat, Inc.
5 This file is part of Cygwin.
7 This software is a copyrighted work licensed under the terms of the
8 Cygwin license. Please consult the file "CYGWIN_LICENSE" for
11 /* The basic mechanism as well as the datastructures used in the below
12 implementation are taken from the FreeBSD repository on 2008-03-18.
13 The essential code of the lf_XXX functions has been taken from the
14 module src/sys/kern/kern_lockf.c. It has been adapted to use NT
15 global namespace subdirs and event objects for synchronization
18 So, the following copyright applies to most of the code in the lf_XXX
21 * Copyright (c) 1982, 1986, 1989, 1993
22 * The Regents of the University of California. All rights reserved.
24 * This code is derived from software contributed to Berkeley by
25 * Scooter Morris at Genentech Inc.
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
30 * 1. Redistributions of source code must retain the above copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 4. Neither the name of the University nor the names of its contributors
36 * may be used to endorse or promote products derived from this software
37 * without specific prior written permission.
39 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
40 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
42 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
43 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
44 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
45 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
47 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
48 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94
55 * The flock() function is based upon source taken from the Red Hat
56 * implementation used in their imap-2002d SRPM.
58 * $RH: flock.c,v 1.2 2000/08/23 17:07:00 nalin Exp $
61 /* The lockf function is based upon FreeBSD sources with the following
65 * Copyright (c) 1997 The NetBSD Foundation, Inc.
66 * All rights reserved.
68 * This code is derived from software contributed to The NetBSD Foundation
71 * Redistribution and use in source and binary forms, with or without
72 * modification, are permitted provided that the following conditions
74 * 1. Redistributions of source code must retain the above copyright
75 * notice, this list of conditions and the following disclaimer.
76 * 2. Redistributions in binary form must reproduce the above copyright
77 * notice, this list of conditions and the following disclaimer in the
78 * documentation and/or other materials provided with the distribution.
80 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
81 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
82 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
83 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
84 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
85 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
86 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
87 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
88 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
89 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
90 * POSSIBILITY OF SUCH DAMAGE.
100 #include "shared_info.h"
102 #include "fhandler.h"
108 #include "tls_pbuf.h"
109 #include "miscfuncs.h"
111 #include <sys/queue.h>
114 #define F_WAIT 0x10 /* Wait until lock is granted */
115 #define F_FLOCK 0x20 /* Use flock(2) semantics for lock */
116 #define F_POSIX 0x40 /* Use POSIX semantics for lock */
119 #define OFF_MAX LLONG_MAX
122 static NO_COPY muto lockf_guard;
124 #define INODE_LIST_LOCK() (lockf_guard.init ("lockf_guard")->acquire ())
125 #define INODE_LIST_UNLOCK() (lockf_guard.release ())
127 #define LOCK_OBJ_NAME_LEN 69
129 #define FLOCK_INODE_DIR_ACCESS (DIRECTORY_QUERY \
130 | DIRECTORY_TRAVERSE \
131 | DIRECTORY_CREATE_OBJECT \
134 #define FLOCK_EVENT_ACCESS (EVENT_QUERY_STATE \
138 /* This function takes the own process security descriptor DACL and adds
139 SYNCHRONIZE permissions for everyone. This allows all processes
140 to wait for this process to die when blocking in a F_SETLKW on a lock
141 which is hold by this process. */
143 allow_others_to_sync ()
145 static NO_COPY bool done;
155 /* Get this process DACL. We use a rather small stack buffer here which
156 should be more than sufficient for process ACLs. Can't use tls functions
157 at this point because this gets called during initialization when the tls
158 is not really available. */
159 #define MAX_PROCESS_SD_SIZE 3072
160 PSECURITY_DESCRIPTOR sd = (PSECURITY_DESCRIPTOR) alloca (MAX_PROCESS_SD_SIZE);
161 status = NtQuerySecurityObject (NtCurrentProcess (),
162 DACL_SECURITY_INFORMATION, sd,
163 MAX_PROCESS_SD_SIZE, &len);
164 if (!NT_SUCCESS (status))
166 debug_printf ("NtQuerySecurityObject: %p", status);
169 /* Create a valid dacl pointer and set its size to be as big as
170 there's room in the temporary buffer. Note that the descriptor
171 is in self-relative format. */
172 dacl = (PACL) ((char *) sd + (uintptr_t) sd->Dacl);
173 dacl->AclSize = NT_MAX_PATH * sizeof (WCHAR) - ((char *) dacl - (char *) sd);
174 /* Allow everyone to SYNCHRONIZE with this process. */
175 status = RtlAddAccessAllowedAce (dacl, ACL_REVISION, SYNCHRONIZE,
176 well_known_world_sid);
177 if (!NT_SUCCESS (status))
179 debug_printf ("RtlAddAccessAllowedAce: %p", status);
182 /* Set the size of the DACL correctly. */
183 status = RtlFirstFreeAce (dacl, &ace);
184 if (!NT_SUCCESS (status))
186 debug_printf ("RtlFirstFreeAce: %p", status);
189 dacl->AclSize = (char *) ace - (char *) dacl;
190 /* Write the DACL back. */
191 status = NtSetSecurityObject (NtCurrentProcess (), DACL_SECURITY_INFORMATION, sd);
192 if (!NT_SUCCESS (status))
194 debug_printf ("NtSetSecurityObject: %p", status);
200 /* Get the handle count of an object. */
202 get_obj_handle_count (HANDLE h)
204 OBJECT_BASIC_INFORMATION obi;
208 status = NtQueryObject (h, ObjectBasicInformation, &obi, sizeof obi, NULL);
209 if (!NT_SUCCESS (status))
210 debug_printf ("NtQueryObject: %p\n", status);
212 hdl_cnt = obi.HandleCount;
216 /* Helper struct to construct a local OBJECT_ATTRIBUTES on the stack. */
219 OBJECT_ATTRIBUTES attr;
220 UNICODE_STRING uname;
221 WCHAR name[LOCK_OBJ_NAME_LEN + 1];
224 /* Per lock class. */
228 short lf_flags; /* Semantics: F_POSIX, F_FLOCK, F_WAIT */
229 short lf_type; /* Lock type: F_RDLCK, F_WRLCK */
230 _off64_t lf_start; /* Byte # of the start of the lock */
231 _off64_t lf_end; /* Byte # of the end of the lock (-1=EOF) */
232 long long lf_id; /* Cygwin PID for POSIX locks, a unique id per
233 file table entry for BSD flock locks. */
234 DWORD lf_wid; /* Win PID of the resource holding the lock */
235 uint16_t lf_ver; /* Version number of the lock. If a released
236 lock event yet exists because another process
237 is still waiting for it, we use the version
238 field to distinguish old from new locks. */
239 class lockf_t **lf_head; /* Back pointer to the head of the lockf_t list */
240 class inode_t *lf_inode; /* Back pointer to the inode_t */
241 class lockf_t *lf_next; /* Pointer to the next lock on this inode_t */
242 HANDLE lf_obj; /* Handle to the lock event object. */
245 : lf_flags (0), lf_type (0), lf_start (0), lf_end (0), lf_id (0),
246 lf_wid (0), lf_ver (0), lf_head (NULL), lf_inode (NULL),
247 lf_next (NULL), lf_obj (NULL)
249 lockf_t (class inode_t *node, class lockf_t **head,
250 short flags, short type, _off64_t start, _off64_t end,
251 long long id, DWORD wid, uint16_t ver)
252 : lf_flags (flags), lf_type (type), lf_start (start), lf_end (end),
253 lf_id (id), lf_wid (wid), lf_ver (ver), lf_head (head), lf_inode (node),
254 lf_next (NULL), lf_obj (NULL)
258 /* Used to create all locks list in a given TLS buffer. */
259 void *operator new (size_t size, void *p)
261 /* Used to store own lock list in the cygheap. */
262 void *operator new (size_t size)
263 { return cmalloc (HEAP_FHANDLER, sizeof (lockf_t)); }
264 /* Never call on node->i_all_lf! */
265 void operator delete (void *p)
268 POBJECT_ATTRIBUTES create_lock_obj_attr (lockfattr_t *attr,
271 void create_lock_obj ();
272 bool open_lock_obj ();
273 void close_lock_obj () { NtClose (lf_obj); lf_obj = NULL; }
274 void del_lock_obj (HANDLE fhdl, bool signal = false);
277 /* Per inode_t class */
280 friend class lockf_t;
283 LIST_ENTRY (inode_t) i_next;
284 lockf_t *i_lockf; /* List of locks of this process. */
285 lockf_t *i_all_lf; /* Temp list of all locks for this file. */
287 __dev32_t i_dev; /* Device ID */
288 __ino64_t i_ino; /* inode number */
293 uint32_t i_cnt; /* # of threads referencing this instance. */
295 void use () { ++i_cnt; }
296 void unuse () { if (i_cnt > 0) --i_cnt; }
297 bool inuse () { return i_cnt > 0; }
300 inode_t (__dev32_t dev, __ino64_t ino);
303 void *operator new (size_t size)
304 { return cmalloc (HEAP_FHANDLER, sizeof (inode_t)); }
305 void operator delete (void *p)
308 static inode_t *get (__dev32_t dev, __ino64_t ino, bool create_if_missing);
310 void LOCK () { WaitForSingleObject (i_mtx, INFINITE); }
311 void UNLOCK () { ReleaseMutex (i_mtx); }
313 void notused () { i_cnt = 0; }
315 void unlock_and_remove_if_unused ();
317 lockf_t *get_all_locks_list ();
319 bool del_my_locks (long long id, HANDLE fhdl);
324 lockf_t *lock, *n_lock;
325 for (lock = i_lockf; lock && (n_lock = lock->lf_next, 1); lock = n_lock)
332 inode_t::unlock_and_remove_if_unused ()
337 if (i_lockf == NULL && !inuse ())
339 LIST_REMOVE (this, i_next);
342 INODE_LIST_UNLOCK ();
346 inode_t::del_my_locks (long long id, HANDLE fhdl)
348 lockf_t *lock, *n_lock;
349 lockf_t **prev = &i_lockf;
351 for (lock = *prev; lock && (n_lock = lock->lf_next, 1); lock = n_lock)
353 if (lock->lf_flags & F_POSIX)
355 /* Delete all POSIX locks. */
360 else if (id && lock->lf_id == id)
363 cygheap_fdenum cfd (true);
364 while (cfd.next () >= 0)
365 if (cfd->get_unique_id () == lock->lf_id && ++cnt > 1)
367 /* Delete BSD flock lock when no other fd in this process references
372 lock->del_lock_obj (fhdl);
377 prev = &lock->lf_next;
379 return i_lockf == NULL;
382 /* Used to delete the locks on a file hold by this process. Called from
383 close(2) and fixup_after_fork, as well as from fixup_after_exec in
384 case the close_on_exec flag is set. The whole inode is deleted as
385 soon as no lock exists on it anymore. */
387 fhandler_base::del_my_locks (del_lock_called_from from)
389 inode_t *node = inode_t::get (get_dev (), get_ino (), false);
392 /* When we're called from fixup_after_exec, the fhandler is a
393 close-on-exec fhandler. In this case our io handle is already
394 invalid. We can't use it to test for the object reference count.
395 However, that shouldn't be necessary for the following reason.
396 After exec, there are no threads in the current process waiting for
397 the lock. So, either we're the only process accessing the file table
398 entry and there are no threads which require signalling, or we have
399 a parent process still accessing the file object and signalling the
400 lock event would be premature. */
401 node->del_my_locks (from == after_fork ? 0 : get_unique_id (),
402 from == after_exec ? NULL : get_handle ());
403 node->unlock_and_remove_if_unused ();
407 /* Called in an execed child. The exec'ed process must allow SYNCHRONIZE
408 access to everyone if at least one inode exists.
409 The lock owner's Windows PID changed and all POSIX lock event objects
410 have to be relabeled so that waiting processes know which process to
411 wait on. If the node has been abandoned due to close_on_exec on the
412 referencing fhandlers, remove the inode entirely. */
414 fixup_lockf_after_exec ()
416 inode_t *node, *next_node;
419 if (LIST_FIRST (&cygheap->inode_list))
420 allow_others_to_sync ();
421 LIST_FOREACH_SAFE (node, &cygheap->inode_list, i_next, next_node)
425 cygheap_fdenum cfd (true);
426 while (cfd.next () >= 0)
427 if (cfd->get_dev () == node->i_dev
428 && cfd->get_ino () == node->i_ino
433 LIST_REMOVE (node, i_next);
439 for (lockf_t *lock = node->i_lockf; lock; lock = lock->lf_next)
440 if (lock->lf_flags & F_POSIX)
442 lock->del_lock_obj (NULL);
443 lock->lf_wid = myself->dwProcessId;
445 lock->create_lock_obj ();
450 INODE_LIST_UNLOCK ();
453 /* static method to return a pointer to the inode_t structure for a specific
454 file. The file is specified by the device and inode_t number. If inode_t
455 doesn't exist, create it. */
457 inode_t::get (__dev32_t dev, __ino64_t ino, bool create_if_missing)
462 LIST_FOREACH (node, &cygheap->inode_list, i_next)
463 if (node->i_dev == dev && node->i_ino == ino)
465 if (!node && create_if_missing)
467 node = new inode_t (dev, ino);
469 LIST_INSERT_HEAD (&cygheap->inode_list, node, i_next);
473 INODE_LIST_UNLOCK ();
479 inode_t::inode_t (__dev32_t dev, __ino64_t ino)
480 : i_lockf (NULL), i_all_lf (NULL), i_dev (dev), i_ino (ino), i_cnt (0L)
484 UNICODE_STRING uname;
485 OBJECT_ATTRIBUTES attr;
488 parent_dir = get_shared_parent_dir ();
489 /* Create a subdir which is named after the device and inode_t numbers
490 of the given file, in hex notation. */
491 int len = __small_swprintf (name, L"flock-%08x-%016X", dev, ino);
492 RtlInitCountedUnicodeString (&uname, name, len * sizeof (WCHAR));
493 InitializeObjectAttributes (&attr, &uname, OBJ_INHERIT | OBJ_OPENIF,
494 parent_dir, everyone_sd (FLOCK_INODE_DIR_ACCESS));
495 status = NtCreateDirectoryObject (&i_dir, FLOCK_INODE_DIR_ACCESS, &attr);
496 if (!NT_SUCCESS (status))
497 api_fatal ("NtCreateDirectoryObject(inode): %p", status);
498 /* Create a mutex object in the file specific dir, which is used for
499 access synchronization on the dir and its objects. */
500 InitializeObjectAttributes (&attr, &ro_u_mtx, OBJ_INHERIT | OBJ_OPENIF, i_dir,
501 everyone_sd (CYG_MUTANT_ACCESS));
502 status = NtCreateMutant (&i_mtx, CYG_MUTANT_ACCESS, &attr, FALSE);
503 if (!NT_SUCCESS (status))
504 api_fatal ("NtCreateMutant(inode): %p", status);
507 /* Enumerate all lock event objects for this file and create a lockf_t
508 list in the i_all_lf member. This list is searched in lf_getblock
509 for locks which potentially block our lock request. */
511 /* Number of lockf_t structs which fit in the temporary buffer. */
512 #define MAX_LOCKF_CNT ((intptr_t)((NT_MAX_PATH * sizeof (WCHAR)) \
516 inode_t::get_all_locks_list ()
520 DIRECTORY_BASIC_INFORMATION dbi;
521 WCHAR buf[2][NAME_MAX + 1];
525 lockf_t *lock = i_all_lf;
527 for (BOOLEAN restart = TRUE;
528 NT_SUCCESS (status = NtQueryDirectoryObject (i_dir, &f, sizeof f, TRUE,
529 restart, &context, NULL));
532 if (f.dbi.ObjectName.Length != LOCK_OBJ_NAME_LEN * sizeof (WCHAR))
534 wchar_t *wc = f.dbi.ObjectName.Buffer, *endptr;
535 /* "%02x-%01x-%016X-%016X-%016X-%08x-%04x",
536 lf_flags, lf_type, lf_start, lf_end, lf_id, lf_wid, lf_ver */
537 wc[LOCK_OBJ_NAME_LEN] = L'\0';
538 short flags = wcstol (wc, &endptr, 16);
539 if ((flags & ~(F_FLOCK | F_POSIX)) != 0
540 || ((flags & (F_FLOCK | F_POSIX)) == (F_FLOCK | F_POSIX)))
542 short type = wcstol (endptr + 1, &endptr, 16);
543 if ((type != F_RDLCK && type != F_WRLCK) || !endptr || *endptr != L'-')
545 _off64_t start = (_off64_t) wcstoull (endptr + 1, &endptr, 16);
546 if (start < 0 || !endptr || *endptr != L'-')
548 _off64_t end = (_off64_t) wcstoull (endptr + 1, &endptr, 16);
549 if (end < -1LL || (end > 0 && end < start) || !endptr || *endptr != L'-')
551 long long id = wcstoll (endptr + 1, &endptr, 16);
552 if (!endptr || *endptr != L'-'
553 || ((flags & F_POSIX) && (id < 1 || id > ULONG_MAX)))
555 DWORD wid = wcstoul (endptr + 1, &endptr, 16);
556 if (!endptr || *endptr != L'-')
558 uint16_t ver = wcstoul (endptr + 1, &endptr, 16);
559 if (endptr && *endptr != L'\0')
561 if (lock - i_all_lf >= MAX_LOCKF_CNT)
563 system_printf ("Warning, can't handle more than %d locks per file.",
568 lock[-1].lf_next = lock;
569 new (lock++) lockf_t (this, &i_all_lf,
570 flags, type, start, end, id, wid, ver);
572 /* If no lock has been found, return NULL. */
573 if (lock == i_all_lf)
578 /* Create the lock object name. The name is constructed from the lock
579 properties which identify it uniquely, all values in hex. */
581 lockf_t::create_lock_obj_attr (lockfattr_t *attr, ULONG flags)
583 __small_swprintf (attr->name, L"%02x-%01x-%016X-%016X-%016X-%08x-%04x",
584 lf_flags & (F_POSIX | F_FLOCK), lf_type, lf_start, lf_end,
585 lf_id, lf_wid, lf_ver);
586 RtlInitCountedUnicodeString (&attr->uname, attr->name,
587 LOCK_OBJ_NAME_LEN * sizeof (WCHAR));
588 InitializeObjectAttributes (&attr->attr, &attr->uname, flags, lf_inode->i_dir,
589 everyone_sd (FLOCK_EVENT_ACCESS));
593 /* Create the lock event object in the file's subdir in the NT global
596 lockf_t::create_lock_obj ()
603 status = NtCreateEvent (&lf_obj, CYG_EVENT_ACCESS,
604 create_lock_obj_attr (&attr, OBJ_INHERIT),
605 NotificationEvent, FALSE);
606 if (!NT_SUCCESS (status))
608 if (status != STATUS_OBJECT_NAME_COLLISION)
609 api_fatal ("NtCreateEvent(lock): %p", status);
610 /* If we get a STATUS_OBJECT_NAME_COLLISION, the event still exists
611 because some other process is waiting for it in lf_setlock.
612 If so, check the event's signal state. If we can't open it, it
613 has been closed in the meantime, so just try again. If we can
614 open it and the object is not signalled, it's surely a bug in the
615 code somewhere. Otherwise, close the event and retry to create
616 a new event with another name. */
617 if (open_lock_obj ())
619 if (!IsEventSignalled (lf_obj))
620 api_fatal ("NtCreateEvent(lock): %p", status);
622 /* Increment the lf_ver field until we have no collision. */
627 while (!NT_SUCCESS (status));
630 /* Open a lock event object for SYNCHRONIZE access (to wait for it). */
632 lockf_t::open_lock_obj ()
637 status = NtOpenEvent (&lf_obj, FLOCK_EVENT_ACCESS,
638 create_lock_obj_attr (&attr, 0));
639 if (!NT_SUCCESS (status))
641 SetLastError (RtlNtStatusToDosError (status));
642 lf_obj = NULL; /* Paranoia... */
644 return lf_obj != NULL;
647 /* Delete a lock event handle. The important thing here is to signal it
648 before closing the handle. This way all threads waiting for this lock
651 lockf_t::del_lock_obj (HANDLE fhdl, bool signal)
655 /* Only signal the event if it's either a POSIX lock, or, in case of
656 BSD flock locks, if it's an explicit unlock or if the calling fhandler
657 holds the last reference to the file table entry. The file table
658 entry in UNIX terms is equivalent to the FILE_OBJECT in Windows NT
659 terms. It's what the handle/descriptor references when calling
660 CreateFile/open. Calling DuplicateHandle/dup only creates a new
661 handle/descriptor to the same FILE_OBJECT/file table entry. */
662 if ((lf_flags & F_POSIX) || signal
663 || (fhdl && get_obj_handle_count (fhdl) <= 1))
664 NtSetEvent (lf_obj, NULL);
675 * This variable controls the maximum number of processes that will
676 * be checked in doing deadlock detection.
680 static int maxlockdepth = MAXDEPTH;
683 #define NOLOCKF (struct lockf_t *)0
686 static int lf_clearlock (lockf_t *, lockf_t **, HANDLE);
687 static int lf_findoverlap (lockf_t *, lockf_t *, int, lockf_t ***, lockf_t **);
688 static lockf_t *lf_getblock (lockf_t *, inode_t *node);
689 static int lf_getlock (lockf_t *, inode_t *, struct __flock64 *);
690 static int lf_setlock (lockf_t *, inode_t *, lockf_t **, HANDLE);
691 static void lf_split (lockf_t *, lockf_t *, lockf_t **);
692 static void lf_wakelock (lockf_t *, HANDLE);
695 fhandler_disk_file::lock (int a_op, struct __flock64 *fl)
697 _off64_t start, end, oadd;
700 short a_flags = fl->l_type & (F_POSIX | F_FLOCK);
701 short type = fl->l_type & (F_RDLCK | F_WRLCK | F_UNLCK);
704 a_flags = F_POSIX; /* default */
705 if (a_op == F_SETLKW)
717 /* flock semantics don't specify a requirement that the file has
718 been opened with a specific open mode, in contrast to POSIX locks
719 which require that a file is opened for reading to place a read
720 lock and opened for writing to place a write lock. */
721 if ((a_flags & F_POSIX) && !(get_access () & GENERIC_READ))
728 /* See above comment. */
729 if ((a_flags & F_POSIX) && !(get_access () & GENERIC_WRITE))
741 * Convert the flock structure into a start and end.
743 switch (fl->l_whence)
750 if ((start = lseek (0, SEEK_CUR)) == ILLEGAL_SEEK)
758 FILE_STANDARD_INFORMATION fsi;
760 status = NtQueryInformationFile (get_handle (), &io, &fsi, sizeof fsi,
761 FileStandardInformation);
762 if (!NT_SUCCESS (status))
764 __seterrno_from_nt_status (status);
767 if (fl->l_start > 0 && fsi.EndOfFile.QuadPart > OFF_MAX - fl->l_start)
769 set_errno (EOVERFLOW);
772 start = fsi.EndOfFile.QuadPart + fl->l_start;
799 else if (fl->l_len == 0)
803 oadd = fl->l_len - 1;
804 if (oadd > OFF_MAX - start)
806 set_errno (EOVERFLOW);
812 restart: /* Entry point after a restartable signal came in. */
814 inode_t *node = inode_t::get (get_dev (), get_ino (), true);
821 /* Unlock the fd table which has been locked in fcntl_worker/lock_worker,
822 otherwise a blocking F_SETLKW never wakes up on a signal. */
823 cygheap->fdtab.unlock ();
825 lockf_t **head = &node->i_lockf;
829 * Avoid the common case of unlocking when inode_t has no locks.
831 * This shortcut is invalid for Cygwin because the above inode_t::get
832 * call returns with an empty lock list if this process has no locks
840 fl->l_type = F_UNLCK;
846 * Allocate a spare structure in case we have to split.
848 lockf_t *clean = NULL;
849 if (a_op == F_SETLK || a_op == F_UNLCK)
851 clean = new lockf_t ();
854 node->unlock_and_remove_if_unused ();
860 * Create the lockf_t structure
862 lockf_t *lock = new lockf_t (node, head, a_flags, type, start, end,
863 (a_flags & F_FLOCK) ? get_unique_id ()
865 myself->dwProcessId, 0);
868 node->unlock_and_remove_if_unused ();
876 error = lf_setlock (lock, node, &clean, get_handle ());
880 error = lf_clearlock (lock, &clean, get_handle ());
881 lock->lf_next = clean;
886 error = lf_getlock (lock, node, fl);
887 lock->lf_next = clean;
892 lock->lf_next = clean;
897 for (lock = clean; lock != NULL; )
899 lockf_t *n = lock->lf_next;
900 lock->del_lock_obj (get_handle (), a_op == F_UNLCK);
904 node->unlock_and_remove_if_unused ();
907 case 0: /* All is well. */
908 need_fork_fixup (true);
910 case EINTR: /* Signal came in. */
911 if (_my_tls.call_signal_handler ())
914 case ECANCELED: /* The thread has been sent a cancellation request. */
915 pthread::static_cancel_self ();
925 * Set a byte-range lock.
928 lf_setlock (lockf_t *lock, inode_t *node, lockf_t **clean, HANDLE fhdl)
931 lockf_t **head = lock->lf_head;
932 lockf_t **prev, *overlap;
933 int ovcase, priority, old_prio, needtolink;
939 priority = old_prio = GetThreadPriority (GetCurrentThread ());
940 if (lock->lf_type == F_WRLCK && priority <= THREAD_PRIORITY_ABOVE_NORMAL)
941 priority = THREAD_PRIORITY_HIGHEST;
943 * Scan lock list for this file looking for locks that would block us.
945 /* Create temporary space for the all locks list. */
946 node->i_all_lf = (lockf_t *) (void *) tp.w_get ();
947 while ((block = lf_getblock(lock, node)))
949 HANDLE obj = block->lf_obj;
950 block->lf_obj = NULL;
953 * Free the structure and return if nonblocking.
955 if ((lock->lf_flags & F_WAIT) == 0)
958 lock->lf_next = *clean;
963 * We are blocked. Since flock style locks cover
964 * the whole file, there is no chance for deadlock.
965 * For byte-range locks we must check for deadlock.
967 * Deadlock detection is done by looking through the
968 * wait channels to see if there are any cycles that
969 * involve us. MAXDEPTH is set just to make sure we
970 * do not go off into neverland.
972 /* FIXME: We check the handle count of all the lock event objects
973 this process holds. If it's > 1, another process is
974 waiting for one of our locks. This method isn't overly
975 intelligent. If it turns out to be too dumb, we might
976 have to remove it or to find another method. */
977 if (lock->lf_flags & F_POSIX)
978 for (lockf_t *lk = node->i_lockf; lk; lk = lk->lf_next)
979 if ((lk->lf_flags & F_POSIX) && get_obj_handle_count (lk->lf_obj) > 1)
986 * For flock type locks, we must first remove
987 * any shared locks that we hold before we sleep
988 * waiting for an exclusive lock.
990 if ((lock->lf_flags & F_FLOCK) && lock->lf_type == F_WRLCK)
992 lock->lf_type = F_UNLCK;
993 (void) lf_clearlock (lock, clean, fhdl);
994 lock->lf_type = F_WRLCK;
998 * Add our lock to the blocked list and sleep until we're free.
999 * Remember who blocked us (for deadlock detection).
1001 /* Cygwin: No locked list. See deadlock recognition above. */
1005 /* Create list of objects to wait for. */
1006 HANDLE w4[4] = { obj, NULL, NULL, NULL };
1007 DWORD wait_count = 1;
1010 if (lock->lf_flags & F_POSIX)
1012 proc = OpenProcess (SYNCHRONIZE, FALSE, block->lf_wid);
1014 debug_printf ("Can't sync with process holding a POSIX lock "
1015 "(Win32 pid %lu): %E", block->lf_wid);
1017 w4[wait_count++] = proc;
1019 DWORD WAIT_SIGNAL_ARRIVED = WAIT_OBJECT_0 + wait_count;
1020 w4[wait_count++] = signal_arrived;
1022 DWORD WAIT_THREAD_CANCELED = WAIT_TIMEOUT + 1;
1023 HANDLE cancel_event = pthread::get_cancel_event ();
1026 WAIT_THREAD_CANCELED = WAIT_OBJECT_0 + wait_count;
1027 w4[wait_count++] = cancel_event;
1030 /* Wait for the blocking object and, for POSIX locks, its holding process.
1031 Unfortunately, since BSD flock locks are not attached to a specific
1032 process, we can't recognize an abandoned lock by sync'ing with the
1033 creator process. We have to make sure the event object is in a
1034 signalled state, or that it has gone away. The latter we can only
1035 recognize by retrying to fetch the block list, so we must not wait
1036 infinitely. Same problem for POSIX locks if the process has already
1037 exited at the time we're trying to open the process. */
1038 SetThreadPriority (GetCurrentThread (), priority);
1039 DWORD ret = WaitForMultipleObjects (wait_count, w4, FALSE,
1040 proc ? INFINITE : 100L);
1041 SetThreadPriority (GetCurrentThread (), old_prio);
1042 /* Always close handles before locking the node. */
1047 if (ret == WAIT_SIGNAL_ARRIVED)
1049 /* A signal came in. */
1050 lock->lf_next = *clean;
1054 else if (ret == WAIT_THREAD_CANCELED)
1056 /* The thread has been sent a cancellation request. */
1057 lock->lf_next = *clean;
1062 /* The lock object has been set to signalled or ...
1063 for POSIX locks, the process holding the lock has exited, or ...
1064 just a timeout. Just retry. */
1067 allow_others_to_sync ();
1069 * No blocks!! Add the lock. Note that we will
1070 * downgrade or upgrade any overlapping locks this
1071 * process already owns.
1073 * Handle any locks that overlap.
1080 ovcase = lf_findoverlap (block, lock, SELF, &prev, &overlap);
1082 block = overlap->lf_next;
1086 * 1) overlap == lock
1087 * 2) overlap contains lock
1088 * 3) lock contains overlap
1089 * 4) overlap starts before lock
1090 * 5) overlap ends after lock
1094 case 0: /* no overlap */
1098 lock->lf_next = overlap;
1099 lock->create_lock_obj ();
1103 case 1: /* overlap == lock */
1105 * If downgrading lock, others may be
1106 * able to acquire it.
1107 * Cygwin: Always wake lock.
1109 lf_wakelock (overlap, fhdl);
1110 overlap->lf_type = lock->lf_type;
1111 overlap->create_lock_obj ();
1112 lock->lf_next = *clean;
1116 case 2: /* overlap contains lock */
1118 * Check for common starting point and different types.
1120 if (overlap->lf_type == lock->lf_type)
1122 lock->lf_next = *clean;
1126 if (overlap->lf_start == lock->lf_start)
1129 lock->lf_next = overlap;
1130 overlap->lf_start = lock->lf_end + 1;
1133 lf_split (overlap, lock, clean);
1134 lf_wakelock (overlap, fhdl);
1135 overlap->create_lock_obj ();
1136 lock->create_lock_obj ();
1137 if (lock->lf_next && !lock->lf_next->lf_obj)
1138 lock->lf_next->create_lock_obj ();
1141 case 3: /* lock contains overlap */
1143 * If downgrading lock, others may be able to
1144 * acquire it, otherwise take the list.
1145 * Cygwin: Always wake old lock and create new lock.
1147 lf_wakelock (overlap, fhdl);
1149 * Add the new lock if necessary and delete the overlap.
1154 lock->lf_next = overlap->lf_next;
1155 prev = &lock->lf_next;
1156 lock->create_lock_obj ();
1160 *prev = overlap->lf_next;
1161 overlap->lf_next = *clean;
1165 case 4: /* overlap starts before lock */
1167 * Add lock after overlap on the list.
1169 lock->lf_next = overlap->lf_next;
1170 overlap->lf_next = lock;
1171 overlap->lf_end = lock->lf_start - 1;
1172 prev = &lock->lf_next;
1173 lf_wakelock (overlap, fhdl);
1174 overlap->create_lock_obj ();
1175 lock->create_lock_obj ();
1179 case 5: /* overlap ends after lock */
1181 * Add the new lock before overlap.
1185 lock->lf_next = overlap;
1187 overlap->lf_start = lock->lf_end + 1;
1188 lf_wakelock (overlap, fhdl);
1189 lock->create_lock_obj ();
1190 overlap->create_lock_obj ();
1199 * Remove a byte-range lock on an inode_t.
1201 * Generally, find the lock (or an overlap to that lock)
1202 * and remove it (or shrink it), then wakeup anyone we can.
1205 lf_clearlock (lockf_t *unlock, lockf_t **clean, HANDLE fhdl)
1207 lockf_t **head = unlock->lf_head;
1208 lockf_t *lf = *head;
1209 lockf_t *overlap, **prev;
1215 while ((ovcase = lf_findoverlap (lf, unlock, SELF, &prev, &overlap)))
1218 * Wakeup the list of locks to be retried.
1220 lf_wakelock (overlap, fhdl);
1224 case 1: /* overlap == lock */
1225 *prev = overlap->lf_next;
1226 overlap->lf_next = *clean;
1230 case 2: /* overlap contains lock: split it */
1231 if (overlap->lf_start == unlock->lf_start)
1233 overlap->lf_start = unlock->lf_end + 1;
1234 overlap->create_lock_obj ();
1237 lf_split (overlap, unlock, clean);
1238 overlap->lf_next = unlock->lf_next;
1239 overlap->create_lock_obj ();
1240 if (overlap->lf_next && !overlap->lf_next->lf_obj)
1241 overlap->lf_next->create_lock_obj ();
1244 case 3: /* lock contains overlap */
1245 *prev = overlap->lf_next;
1246 lf = overlap->lf_next;
1247 overlap->lf_next = *clean;
1251 case 4: /* overlap starts before lock */
1252 overlap->lf_end = unlock->lf_start - 1;
1253 prev = &overlap->lf_next;
1254 lf = overlap->lf_next;
1255 overlap->create_lock_obj ();
1258 case 5: /* overlap ends after lock */
1259 overlap->lf_start = unlock->lf_end + 1;
1260 overlap->create_lock_obj ();
1269 * Check whether there is a blocking lock,
1270 * and if so return its process identifier.
1273 lf_getlock (lockf_t *lock, inode_t *node, struct __flock64 *fl)
1278 /* Create temporary space for the all locks list. */
1279 node->i_all_lf = (lockf_t *) (void * ) tp.w_get ();
1280 if ((block = lf_getblock (lock, node)))
1283 block->close_lock_obj ();
1284 fl->l_type = block->lf_type;
1285 fl->l_whence = SEEK_SET;
1286 fl->l_start = block->lf_start;
1287 if (block->lf_end == -1)
1290 fl->l_len = block->lf_end - block->lf_start + 1;
1291 if (block->lf_flags & F_POSIX)
1292 fl->l_pid = (pid_t) block->lf_id;
1297 fl->l_type = F_UNLCK;
1302 * Walk the list of locks for an inode_t and
1303 * return the first blocking lock.
1306 lf_getblock (lockf_t *lock, inode_t *node)
1308 lockf_t **prev, *overlap;
1309 lockf_t *lf = node->get_all_locks_list ();
1312 prev = lock->lf_head;
1313 while ((ovcase = lf_findoverlap (lf, lock, OTHERS, &prev, &overlap)))
1316 * We've found an overlap, see if it blocks us
1318 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
1320 /* Open the event object for synchronization. */
1321 if (overlap->open_lock_obj ())
1323 /* If we found a POSIX lock, it will block us. */
1324 if (overlap->lf_flags & F_POSIX)
1326 /* In case of BSD flock locks, check if the event object is
1327 signalled. If so, the overlap doesn't actually exist anymore.
1328 There are just a few open handles left. */
1329 if (!IsEventSignalled (overlap->lf_obj))
1331 overlap->close_lock_obj ();
1335 * Nope, point to the next one on the list and
1336 * see if it blocks us
1338 lf = overlap->lf_next;
1344 * Walk the list of locks for an inode_t to
1345 * find an overlapping lock (if any).
1347 * NOTE: this returns only the FIRST overlapping lock. There
1348 * may be more than one.
1351 lf_findoverlap (lockf_t *lf, lockf_t *lock, int type, lockf_t ***prev,
1354 _off64_t start, end;
1360 start = lock->lf_start;
1362 while (lf != NOLOCKF)
1364 if (((type & SELF) && lf->lf_id != lock->lf_id)
1365 || ((type & OTHERS) && lf->lf_id == lock->lf_id)
1366 /* As on Linux: POSIX locks and BSD flock locks don't interact. */
1367 || (lf->lf_flags & (F_POSIX | F_FLOCK))
1368 != (lock->lf_flags & (F_POSIX | F_FLOCK)))
1370 *prev = &lf->lf_next;
1371 *overlap = lf = lf->lf_next;
1375 * OK, check for overlap
1379 * 1) overlap == lock
1380 * 2) overlap contains lock
1381 * 3) lock contains overlap
1382 * 4) overlap starts before lock
1383 * 5) overlap ends after lock
1385 if ((lf->lf_end != -1 && start > lf->lf_end) ||
1386 (end != -1 && lf->lf_start > end))
1389 if ((type & SELF) && end != -1 && lf->lf_start > end)
1391 *prev = &lf->lf_next;
1392 *overlap = lf = lf->lf_next;
1395 if ((lf->lf_start == start) && (lf->lf_end == end))
1400 if ((lf->lf_start <= start) && (end != -1) &&
1401 ((lf->lf_end >= end) || (lf->lf_end == -1)))
1406 if (start <= lf->lf_start && (end == -1 ||
1407 (lf->lf_end != -1 && end >= lf->lf_end)))
1412 if ((lf->lf_start < start) &&
1413 ((lf->lf_end >= start) || (lf->lf_end == -1)))
1418 if ((lf->lf_start > start) && (end != -1) &&
1419 ((lf->lf_end > end) || (lf->lf_end == -1)))
1424 api_fatal ("lf_findoverlap: default\n");
1430 * Split a lock and a contained region into
1431 * two or three locks as necessary.
1434 lf_split (lockf_t *lock1, lockf_t *lock2, lockf_t **split)
1439 * Check to see if spliting into only two pieces.
1441 if (lock1->lf_start == lock2->lf_start)
1443 lock1->lf_start = lock2->lf_end + 1;
1444 lock2->lf_next = lock1;
1447 if (lock1->lf_end == lock2->lf_end)
1449 lock1->lf_end = lock2->lf_start - 1;
1450 lock2->lf_next = lock1->lf_next;
1451 lock1->lf_next = lock2;
1455 * Make a new lock consisting of the last part of
1456 * the encompassing lock. We use the preallocated
1457 * splitlock so we don't have to block.
1460 assert (splitlock != NULL);
1461 *split = splitlock->lf_next;
1462 memcpy (splitlock, lock1, sizeof *splitlock);
1463 /* We have to unset the obj HANDLE here which has been copied by the
1464 above memcpy, so that the calling function recognizes the new object.
1465 See post-lf_split handling in lf_setlock and lf_clearlock. */
1466 splitlock->lf_obj = NULL;
1467 splitlock->lf_start = lock2->lf_end + 1;
1468 lock1->lf_end = lock2->lf_start - 1;
1470 * OK, now link it in
1472 splitlock->lf_next = lock1->lf_next;
1473 lock2->lf_next = splitlock;
1474 lock1->lf_next = lock2;
1478 * Wakeup a blocklist
1479 * Cygwin: Just signal the lock which gets removed. This unblocks
1480 * all threads waiting for this lock.
1483 lf_wakelock (lockf_t *listhead, HANDLE fhdl)
1485 listhead->del_lock_obj (fhdl, true);
1489 flock (int fd, int operation)
1493 struct __flock64 fl = { 0, SEEK_SET, 0, 0, 0 };
1496 if (efault.faulted (EFAULT))
1499 cygheap_fdget cfd (fd, true);
1503 cmd = (operation & LOCK_NB) ? F_SETLK : F_SETLKW;
1504 switch (operation & (~LOCK_NB))
1507 fl.l_type = F_WRLCK | F_FLOCK;
1510 fl.l_type = F_RDLCK | F_FLOCK;
1513 fl.l_type = F_UNLCK | F_FLOCK;
1519 res = cfd->lock (cmd, &fl);
1520 if ((res == -1) && ((get_errno () == EAGAIN) || (get_errno () == EACCES)))
1521 set_errno (EWOULDBLOCK);
1523 syscall_printf ("%d = flock (%d, %d)", res, fd, operation);
1528 lockf (int filedes, int function, _off64_t size)
1532 struct __flock64 fl;
1534 pthread_testcancel ();
1537 if (efault.faulted (EFAULT))
1540 cygheap_fdget cfd (filedes, true);
1546 fl.l_whence = SEEK_CUR;
1552 fl.l_type = F_UNLCK;
1556 fl.l_type = F_WRLCK;
1560 fl.l_type = F_WRLCK;
1563 fl.l_type = F_WRLCK;
1564 if (cfd->lock (F_GETLK, &fl) == -1)
1566 if (fl.l_type == F_UNLCK || fl.l_pid == getpid ())
1577 res = cfd->lock (cmd, &fl);
1579 syscall_printf ("%d = lockf (%d, %d, %D)", res, filedes, function, size);