winsup/cygwin/flock.cc

   1 /* flock.cc.  NT specific implementation of advisory file locking.
   2
   3    Copyright 2003, 2008, 2009, 2010, 2011 Red Hat, Inc.
   4
   5    This file is part of Cygwin.
   6
   7    This software is a copyrighted work licensed under the terms of the
   8    Cygwin license.  Please consult the file "CYGWIN_LICENSE" for
   9    details. */
  10
  11 /* The basic mechanism as well as the datastructures used in the below
  12    implementation are taken from the FreeBSD repository on 2008-03-18.
  13    The essential code of the lf_XXX functions has been taken from the
  14    module src/sys/kern/kern_lockf.c.  It has been adapted to use NT
  15    global namespace subdirs and event objects for synchronization
  16    purposes.
  17
  18    So, the following copyright applies to most of the code in the lf_XXX
  19    functions.
  20
  21  * Copyright (c) 1982, 1986, 1989, 1993
  22  *      The Regents of the University of California.  All rights reserved.
  23  *
  24  * This code is derived from software contributed to Berkeley by
  25  * Scooter Morris at Genentech Inc.
  26  *
  27  * Redistribution and use in source and binary forms, with or without
  28  * modification, are permitted provided that the following conditions
  29  * are met:
  30  * 1. Redistributions of source code must retain the above copyright
  31  *    notice, this list of conditions and the following disclaimer.
  32  * 2. Redistributions in binary form must reproduce the above copyright
  33  *    notice, this list of conditions and the following disclaimer in the
  34  *    documentation and/or other materials provided with the distribution.
  35  * 4. Neither the name of the University nor the names of its contributors
  36  *    may be used to endorse or promote products derived from this software
  37  *    without specific prior written permission.
  38  *
  39  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  40  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  41  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  42  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  43  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  44  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  45  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  46  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  47  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  48  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  49  * SUCH DAMAGE.
  50  *
  51  *      @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94
  52 */
  53
  54 /*
  55  * The flock() function is based upon source taken from the Red Hat
  56  * implementation used in their imap-2002d SRPM.
  57  *
  58  * $RH: flock.c,v 1.2 2000/08/23 17:07:00 nalin Exp $
  59  */
  60
  61 /* The lockf function is based upon FreeBSD sources with the following
  62  * copyright.
  63  */
  64 /*
  65  * Copyright (c) 1997 The NetBSD Foundation, Inc.
  66  * All rights reserved.
  67  *
  68  * This code is derived from software contributed to The NetBSD Foundation
  69  * by Klaus Klein.
  70  *
  71  * Redistribution and use in source and binary forms, with or without
  72  * modification, are permitted provided that the following conditions
  73  * are met:
  74  * 1. Redistributions of source code must retain the above copyright
  75  *    notice, this list of conditions and the following disclaimer.
  76  * 2. Redistributions in binary form must reproduce the above copyright
  77  *    notice, this list of conditions and the following disclaimer in the
  78  *    documentation and/or other materials provided with the distribution.
  79  *
  80  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  81  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  82  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  83  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  84  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  85  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  86  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  87  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  88  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  89  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  90  * POSSIBILITY OF SUCH DAMAGE.
  91  */
  92
  93 #include "winsup.h"
  94 #include <assert.h>
  95 #include <sys/file.h>
  96 #include <unistd.h>
  97 #include <stdlib.h>
  98 #include "cygerrno.h"
  99 #include "security.h"
 100 #include "shared_info.h"
 101 #include "path.h"
 102 #include "fhandler.h"
 103 #include "dtable.h"
 104 #include "cygheap.h"
 105 #include "pinfo.h"
 106 #include "sigproc.h"
 107 #include "cygtls.h"
 108 #include "tls_pbuf.h"
 109 #include "miscfuncs.h"
 110 #include "ntdll.h"
 111 #include <sys/queue.h>
 112 #include <wchar.h>
 113
 114 #define F_WAIT 0x10     /* Wait until lock is granted */
 115 #define F_FLOCK 0x20    /* Use flock(2) semantics for lock */
 116 #define F_POSIX 0x40    /* Use POSIX semantics for lock */
 117
 118 #ifndef OFF_MAX
 119 #define OFF_MAX LLONG_MAX
 120 #endif
 121
 122 static NO_COPY muto lockf_guard;
 123
 124 #define INODE_LIST_LOCK()       (lockf_guard.init ("lockf_guard")->acquire ())
 125 #define INODE_LIST_UNLOCK()     (lockf_guard.release ())
 126
 127 #define LOCK_OBJ_NAME_LEN       69
 128
 129 #define FLOCK_INODE_DIR_ACCESS  (DIRECTORY_QUERY \
 130                                  | DIRECTORY_TRAVERSE \
 131                                  | DIRECTORY_CREATE_OBJECT \
 132                                  | READ_CONTROL)
 133
 134 #define FLOCK_EVENT_ACCESS      (EVENT_QUERY_STATE \
 135                                  | SYNCHRONIZE \
 136                                  | READ_CONTROL)
 137
 138 /* This function takes the own process security descriptor DACL and adds
 139    SYNCHRONIZE permissions for everyone.  This allows all processes
 140    to wait for this process to die when blocking in a F_SETLKW on a lock
 141    which is hold by this process. */
 142 static void
 143 allow_others_to_sync ()
 144 {
 145   static NO_COPY bool done;
 146
 147   if (done)
 148     return;
 149
 150   NTSTATUS status;
 151   PACL dacl;
 152   LPVOID ace;
 153   ULONG len;
 154
 155   /* Get this process DACL.  We use a rather small stack buffer here which
 156      should be more than sufficient for process ACLs.  Can't use tls functions
 157      at this point because this gets called during initialization when the tls
 158      is not really available.  */
 159 #define MAX_PROCESS_SD_SIZE     3072
 160   PSECURITY_DESCRIPTOR sd = (PSECURITY_DESCRIPTOR) alloca (MAX_PROCESS_SD_SIZE);
 161   status = NtQuerySecurityObject (NtCurrentProcess (),
 162                                   DACL_SECURITY_INFORMATION, sd,
 163                                   MAX_PROCESS_SD_SIZE, &len);
 164   if (!NT_SUCCESS (status))
 165     {
 166       debug_printf ("NtQuerySecurityObject: %p", status);
 167       return;
 168     }
 169   /* Create a valid dacl pointer and set its size to be as big as
 170      there's room in the temporary buffer.  Note that the descriptor
 171      is in self-relative format. */
 172   dacl = (PACL) ((char *) sd + (uintptr_t) sd->Dacl);
 173   dacl->AclSize = NT_MAX_PATH * sizeof (WCHAR) - ((char *) dacl - (char *) sd);
 174   /* Allow everyone to SYNCHRONIZE with this process. */
 175   status = RtlAddAccessAllowedAce (dacl, ACL_REVISION, SYNCHRONIZE,
 176                                    well_known_world_sid);
 177   if (!NT_SUCCESS (status))
 178     {
 179       debug_printf ("RtlAddAccessAllowedAce: %p", status);
 180       return;
 181     }
 182   /* Set the size of the DACL correctly. */
 183   status = RtlFirstFreeAce (dacl, &ace);
 184   if (!NT_SUCCESS (status))
 185     {
 186       debug_printf ("RtlFirstFreeAce: %p", status);
 187       return;
 188     }
 189   dacl->AclSize = (char *) ace - (char *) dacl;
 190   /* Write the DACL back. */
 191   status = NtSetSecurityObject (NtCurrentProcess (), DACL_SECURITY_INFORMATION, sd);
 192   if (!NT_SUCCESS (status))
 193     {
 194       debug_printf ("NtSetSecurityObject: %p", status);
 195       return;
 196     }
 197   done = true;
 198 }
 199
 200 /* Get the handle count of an object. */
 201 static ULONG
 202 get_obj_handle_count (HANDLE h)
 203 {
 204   OBJECT_BASIC_INFORMATION obi;
 205   NTSTATUS status;
 206   ULONG hdl_cnt = 0;
 207
 208   status = NtQueryObject (h, ObjectBasicInformation, &obi, sizeof obi, NULL);
 209   if (!NT_SUCCESS (status))
 210     debug_printf ("NtQueryObject: %p\n", status);
 211   else
 212     hdl_cnt = obi.HandleCount;
 213   return hdl_cnt;
 214 }
 215
 216 /* Helper struct to construct a local OBJECT_ATTRIBUTES on the stack. */
 217 struct lockfattr_t
 218 {
 219   OBJECT_ATTRIBUTES attr;
 220   UNICODE_STRING uname;
 221   WCHAR name[LOCK_OBJ_NAME_LEN + 1];
 222 };
 223
 224 /* Per lock class. */
 225 class lockf_t
 226 {
 227   public:
 228     short           lf_flags; /* Semantics: F_POSIX, F_FLOCK, F_WAIT */
 229     short           lf_type;  /* Lock type: F_RDLCK, F_WRLCK */
 230     _off64_t        lf_start; /* Byte # of the start of the lock */
 231     _off64_t        lf_end;   /* Byte # of the end of the lock (-1=EOF) */
 232     long long       lf_id;    /* Cygwin PID for POSIX locks, a unique id per
 233                                  file table entry for BSD flock locks. */
 234     DWORD           lf_wid;   /* Win PID of the resource holding the lock */
 235     uint16_t        lf_ver;   /* Version number of the lock.  If a released
 236                                  lock event yet exists because another process
 237                                  is still waiting for it, we use the version
 238                                  field to distinguish old from new locks. */
 239     class lockf_t **lf_head;  /* Back pointer to the head of the lockf_t list */
 240     class inode_t  *lf_inode; /* Back pointer to the inode_t */
 241     class lockf_t  *lf_next;  /* Pointer to the next lock on this inode_t */
 242     HANDLE          lf_obj;   /* Handle to the lock event object. */
 243
 244     lockf_t ()
 245     : lf_flags (0), lf_type (0), lf_start (0), lf_end (0), lf_id (0),
 246       lf_wid (0), lf_ver (0), lf_head (NULL), lf_inode (NULL),
 247       lf_next (NULL), lf_obj (NULL)
 248     {}
 249     lockf_t (class inode_t *node, class lockf_t **head,
 250              short flags, short type, _off64_t start, _off64_t end,
 251              long long id, DWORD wid, uint16_t ver)
 252     : lf_flags (flags), lf_type (type), lf_start (start), lf_end (end),
 253       lf_id (id), lf_wid (wid), lf_ver (ver), lf_head (head), lf_inode (node),
 254       lf_next (NULL), lf_obj (NULL)
 255     {}
 256     ~lockf_t ();
 257
 258     /* Used to create all locks list in a given TLS buffer. */
 259     void *operator new (size_t size, void *p)
 260     { return p; }
 261     /* Used to store own lock list in the cygheap. */
 262     void *operator new (size_t size)
 263     { return cmalloc (HEAP_FHANDLER, sizeof (lockf_t)); }
 264     /* Never call on node->i_all_lf! */
 265     void operator delete (void *p)
 266     { cfree (p); }
 267
 268     POBJECT_ATTRIBUTES create_lock_obj_attr (lockfattr_t *attr,
 269                                              ULONG flags);
 270
 271     void create_lock_obj ();
 272     bool open_lock_obj ();
 273     void close_lock_obj () { NtClose (lf_obj); lf_obj = NULL; }
 274     void del_lock_obj (HANDLE fhdl, bool signal = false);
 275 };
 276
 277 /* Per inode_t class */
 278 class inode_t
 279 {
 280   friend class lockf_t;
 281
 282   public:
 283     LIST_ENTRY (inode_t) i_next;
 284     lockf_t             *i_lockf;  /* List of locks of this process. */
 285     lockf_t             *i_all_lf; /* Temp list of all locks for this file. */
 286
 287     __dev32_t            i_dev;    /* Device ID */
 288     __ino64_t            i_ino;    /* inode number */
 289
 290   private:
 291     HANDLE               i_dir;
 292     HANDLE               i_mtx;
 293     uint32_t             i_cnt;    /* # of threads referencing this instance. */
 294
 295     void use () { ++i_cnt; }
 296     void unuse () { if (i_cnt > 0) --i_cnt; }
 297     bool inuse () { return i_cnt > 0; }
 298
 299   public:
 300     inode_t (__dev32_t dev, __ino64_t ino);
 301     ~inode_t ();
 302
 303     void *operator new (size_t size)
 304     { return cmalloc (HEAP_FHANDLER, sizeof (inode_t)); }
 305     void operator delete (void *p)
 306     { cfree (p); }
 307
 308     static inode_t *get (__dev32_t dev, __ino64_t ino, bool create_if_missing);
 309
 310     void LOCK () { WaitForSingleObject (i_mtx, INFINITE); }
 311     void UNLOCK () { ReleaseMutex (i_mtx); }
 312
 313     void notused () { i_cnt = 0; }
 314
 315     void unlock_and_remove_if_unused ();
 316
 317     lockf_t *get_all_locks_list ();
 318
 319     bool del_my_locks (long long id, HANDLE fhdl);
 320 };
 321
 322 inode_t::~inode_t ()
 323 {
 324   lockf_t *lock, *n_lock;
 325   for (lock = i_lockf; lock && (n_lock = lock->lf_next, 1); lock = n_lock)
 326     delete lock;
 327   NtClose (i_mtx);
 328   NtClose (i_dir);
 329 }
 330
 331 void
 332 inode_t::unlock_and_remove_if_unused ()
 333 {
 334   UNLOCK ();
 335   INODE_LIST_LOCK ();
 336   unuse ();
 337   if (i_lockf == NULL && !inuse ())
 338     {
 339       LIST_REMOVE (this, i_next);
 340       delete this;
 341     }
 342   INODE_LIST_UNLOCK ();
 343 }
 344
 345 bool
 346 inode_t::del_my_locks (long long id, HANDLE fhdl)
 347 {
 348   lockf_t *lock, *n_lock;
 349   lockf_t **prev = &i_lockf;
 350   int lc = 0;
 351   for (lock = *prev; lock && (n_lock = lock->lf_next, 1); lock = n_lock)
 352     {
 353       if (lock->lf_flags & F_POSIX)
 354         {
 355           /* Delete all POSIX locks. */
 356           *prev = n_lock;
 357           ++lc;
 358           delete lock;
 359         }
 360       else if (id && lock->lf_id == id)
 361         {
 362           int cnt = 0;
 363           cygheap_fdenum cfd (true);
 364           while (cfd.next () >= 0)
 365             if (cfd->get_unique_id () == lock->lf_id && ++cnt > 1)
 366               break;
 367           /* Delete BSD flock lock when no other fd in this process references
 368              it anymore. */
 369           if (cnt <= 1)
 370             {
 371               *prev = n_lock;
 372               lock->del_lock_obj (fhdl);
 373               delete lock;
 374             }
 375         }
 376       else
 377         prev = &lock->lf_next;
 378     }
 379   return i_lockf == NULL;
 380 }
 381
 382 /* Used to delete the locks on a file hold by this process.  Called from
 383    close(2) and fixup_after_fork, as well as from fixup_after_exec in
 384    case the close_on_exec flag is set.  The whole inode is deleted as
 385    soon as no lock exists on it anymore. */
 386 void
 387 fhandler_base::del_my_locks (del_lock_called_from from)
 388 {
 389   inode_t *node = inode_t::get (get_dev (), get_ino (), false);
 390   if (node)
 391     {
 392       /* When we're called from fixup_after_exec, the fhandler is a
 393          close-on-exec fhandler.  In this case our io handle is already
 394          invalid.  We can't use it to test for the object reference count.
 395          However, that shouldn't be necessary for the following reason.
 396          After exec, there are no threads in the current process waiting for
 397          the lock.  So, either we're the only process accessing the file table
 398          entry and there are no threads which require signalling, or we have
 399          a parent process still accessing the file object and signalling the
 400          lock event would be premature. */
 401       node->del_my_locks (from == after_fork ? 0 : get_unique_id (),
 402                           from == after_exec ? NULL : get_handle ());
 403       node->unlock_and_remove_if_unused ();
 404     }
 405 }
 406
 407 /* Called in an execed child.  The exec'ed process must allow SYNCHRONIZE
 408    access to everyone if at least one inode exists.
 409    The lock owner's Windows PID changed and all POSIX lock event objects
 410    have to be relabeled so that waiting processes know which process to
 411    wait on.  If the node has been abandoned due to close_on_exec on the
 412    referencing fhandlers, remove the inode entirely. */
 413 void
 414 fixup_lockf_after_exec ()
 415 {
 416   inode_t *node, *next_node;
 417
 418   INODE_LIST_LOCK ();
 419   if (LIST_FIRST (&cygheap->inode_list))
 420     allow_others_to_sync ();
 421   LIST_FOREACH_SAFE (node, &cygheap->inode_list, i_next, next_node)
 422     {
 423       node->notused ();
 424       int cnt = 0;
 425       cygheap_fdenum cfd (true);
 426       while (cfd.next () >= 0)
 427         if (cfd->get_dev () == node->i_dev
 428             && cfd->get_ino () == node->i_ino
 429             && ++cnt > 1)
 430           break;
 431       if (cnt == 0)
 432         {
 433           LIST_REMOVE (node, i_next);
 434           delete node;
 435         }
 436       else
 437         {
 438           node->LOCK ();
 439           for (lockf_t *lock = node->i_lockf; lock; lock = lock->lf_next)
 440             if (lock->lf_flags & F_POSIX)
 441               {
 442                 lock->del_lock_obj (NULL);
 443                 lock->lf_wid = myself->dwProcessId;
 444                 lock->lf_ver = 0;
 445                 lock->create_lock_obj ();
 446               }
 447           node->UNLOCK ();
 448         }
 449     }
 450   INODE_LIST_UNLOCK ();
 451 }
 452
 453 /* static method to return a pointer to the inode_t structure for a specific
 454    file.  The file is specified by the device and inode_t number.  If inode_t
 455    doesn't exist, create it. */
 456 inode_t *
 457 inode_t::get (__dev32_t dev, __ino64_t ino, bool create_if_missing)
 458 {
 459   inode_t *node;
 460
 461   INODE_LIST_LOCK ();
 462   LIST_FOREACH (node, &cygheap->inode_list, i_next)
 463     if (node->i_dev == dev && node->i_ino == ino)
 464       break;
 465   if (!node && create_if_missing)
 466     {
 467       node = new inode_t (dev, ino);
 468       if (node)
 469         LIST_INSERT_HEAD (&cygheap->inode_list, node, i_next);
 470     }
 471   if (node)
 472     node->use ();
 473   INODE_LIST_UNLOCK ();
 474   if (node)
 475     node->LOCK ();
 476   return node;
 477 }
 478
 479 inode_t::inode_t (__dev32_t dev, __ino64_t ino)
 480 : i_lockf (NULL), i_all_lf (NULL), i_dev (dev), i_ino (ino), i_cnt (0L)
 481 {
 482   HANDLE parent_dir;
 483   WCHAR name[48];
 484   UNICODE_STRING uname;
 485   OBJECT_ATTRIBUTES attr;
 486   NTSTATUS status;
 487
 488   parent_dir = get_shared_parent_dir ();
 489   /* Create a subdir which is named after the device and inode_t numbers
 490      of the given file, in hex notation. */
 491   int len = __small_swprintf (name, L"flock-%08x-%016X", dev, ino);
 492   RtlInitCountedUnicodeString (&uname, name, len * sizeof (WCHAR));
 493   InitializeObjectAttributes (&attr, &uname, OBJ_INHERIT | OBJ_OPENIF,
 494                               parent_dir, everyone_sd (FLOCK_INODE_DIR_ACCESS));
 495   status = NtCreateDirectoryObject (&i_dir, FLOCK_INODE_DIR_ACCESS, &attr);
 496   if (!NT_SUCCESS (status))
 497     api_fatal ("NtCreateDirectoryObject(inode): %p", status);
 498   /* Create a mutex object in the file specific dir, which is used for
 499      access synchronization on the dir and its objects. */
 500   InitializeObjectAttributes (&attr, &ro_u_mtx, OBJ_INHERIT | OBJ_OPENIF, i_dir,
 501                               everyone_sd (CYG_MUTANT_ACCESS));
 502   status = NtCreateMutant (&i_mtx, CYG_MUTANT_ACCESS, &attr, FALSE);
 503   if (!NT_SUCCESS (status))
 504     api_fatal ("NtCreateMutant(inode): %p", status);
 505 }
 506
 507 /* Enumerate all lock event objects for this file and create a lockf_t
 508    list in the i_all_lf member.  This list is searched in lf_getblock
 509    for locks which potentially block our lock request. */
 510
 511 /* Number of lockf_t structs which fit in the temporary buffer. */
 512 #define MAX_LOCKF_CNT   ((intptr_t)((NT_MAX_PATH * sizeof (WCHAR)) \
 513                                     / sizeof (lockf_t)))
 514
 515 lockf_t *
 516 inode_t::get_all_locks_list ()
 517 {
 518   struct fdbi
 519   {
 520     DIRECTORY_BASIC_INFORMATION dbi;
 521     WCHAR buf[2][NAME_MAX + 1];
 522   } f;
 523   ULONG context;
 524   NTSTATUS status;
 525   lockf_t *lock = i_all_lf;
 526
 527   for (BOOLEAN restart = TRUE;
 528        NT_SUCCESS (status = NtQueryDirectoryObject (i_dir, &f, sizeof f, TRUE,
 529                                                     restart, &context, NULL));
 530        restart = FALSE)
 531     {
 532       if (f.dbi.ObjectName.Length != LOCK_OBJ_NAME_LEN * sizeof (WCHAR))
 533         continue;
 534       wchar_t *wc = f.dbi.ObjectName.Buffer, *endptr;
 535       /* "%02x-%01x-%016X-%016X-%016X-%08x-%04x",
 536          lf_flags, lf_type, lf_start, lf_end, lf_id, lf_wid, lf_ver */
 537       wc[LOCK_OBJ_NAME_LEN] = L'\0';
 538       short flags = wcstol (wc, &endptr, 16);
 539       if ((flags & ~(F_FLOCK | F_POSIX)) != 0
 540           || ((flags & (F_FLOCK | F_POSIX)) == (F_FLOCK | F_POSIX)))
 541         continue;
 542       short type = wcstol (endptr + 1, &endptr, 16);
 543       if ((type != F_RDLCK && type != F_WRLCK) || !endptr || *endptr != L'-')
 544         continue;
 545       _off64_t start = (_off64_t) wcstoull (endptr + 1, &endptr, 16);
 546       if (start < 0 || !endptr || *endptr != L'-')
 547         continue;
 548       _off64_t end = (_off64_t) wcstoull (endptr + 1, &endptr, 16);
 549       if (end < -1LL || (end > 0 && end < start) || !endptr || *endptr != L'-')
 550         continue;
 551       long long id = wcstoll (endptr + 1, &endptr, 16);
 552       if (!endptr || *endptr != L'-'
 553           || ((flags & F_POSIX) && (id < 1 || id > ULONG_MAX)))
 554         continue;
 555       DWORD wid = wcstoul (endptr + 1, &endptr, 16);
 556       if (!endptr || *endptr != L'-')
 557         continue;
 558       uint16_t ver = wcstoul (endptr + 1, &endptr, 16);
 559       if (endptr && *endptr != L'\0')
 560         continue;
 561       if (lock - i_all_lf >= MAX_LOCKF_CNT)
 562         {
 563           system_printf ("Warning, can't handle more than %d locks per file.",
 564                          MAX_LOCKF_CNT);
 565           break;
 566         }
 567       if (lock > i_all_lf)
 568         lock[-1].lf_next = lock;
 569       new (lock++) lockf_t (this, &i_all_lf,
 570                             flags, type, start, end, id, wid, ver);
 571     }
 572   /* If no lock has been found, return NULL. */
 573   if (lock == i_all_lf)
 574     return NULL;
 575   return i_all_lf;
 576 }
 577
 578 /* Create the lock object name.  The name is constructed from the lock
 579    properties which identify it uniquely, all values in hex. */
 580 POBJECT_ATTRIBUTES
 581 lockf_t::create_lock_obj_attr (lockfattr_t *attr, ULONG flags)
 582 {
 583   __small_swprintf (attr->name, L"%02x-%01x-%016X-%016X-%016X-%08x-%04x",
 584                     lf_flags & (F_POSIX | F_FLOCK), lf_type, lf_start, lf_end,
 585                     lf_id, lf_wid, lf_ver);
 586   RtlInitCountedUnicodeString (&attr->uname, attr->name,
 587                                LOCK_OBJ_NAME_LEN * sizeof (WCHAR));
 588   InitializeObjectAttributes (&attr->attr, &attr->uname, flags, lf_inode->i_dir,
 589                               everyone_sd (FLOCK_EVENT_ACCESS));
 590   return &attr->attr;
 591 }
 592
 593 /* Create the lock event object in the file's subdir in the NT global
 594    namespace. */
 595 void
 596 lockf_t::create_lock_obj ()
 597 {
 598   lockfattr_t attr;
 599   NTSTATUS status;
 600
 601   do
 602     {
 603       status = NtCreateEvent (&lf_obj, CYG_EVENT_ACCESS,
 604                               create_lock_obj_attr (&attr, OBJ_INHERIT),
 605                               NotificationEvent, FALSE);
 606       if (!NT_SUCCESS (status))
 607         {
 608           if (status != STATUS_OBJECT_NAME_COLLISION)
 609             api_fatal ("NtCreateEvent(lock): %p", status);
 610           /* If we get a STATUS_OBJECT_NAME_COLLISION, the event still exists
 611              because some other process is waiting for it in lf_setlock.
 612              If so, check the event's signal state.  If we can't open it, it
 613              has been closed in the meantime, so just try again.  If we can
 614              open it and the object is not signalled, it's surely a bug in the
 615              code somewhere.  Otherwise, close the event and retry to create
 616              a new event with another name. */
 617           if (open_lock_obj ())
 618             {
 619               if (!IsEventSignalled (lf_obj))
 620                 api_fatal ("NtCreateEvent(lock): %p", status);
 621               close_lock_obj ();
 622               /* Increment the lf_ver field until we have no collision. */
 623               ++lf_ver;
 624             }
 625         }
 626     }
 627   while (!NT_SUCCESS (status));
 628 }
 629
 630 /* Open a lock event object for SYNCHRONIZE access (to wait for it). */
 631 bool
 632 lockf_t::open_lock_obj ()
 633 {
 634   lockfattr_t attr;
 635   NTSTATUS status;
 636
 637   status = NtOpenEvent (&lf_obj, FLOCK_EVENT_ACCESS,
 638                         create_lock_obj_attr (&attr, 0));
 639   if (!NT_SUCCESS (status))
 640     {
 641       SetLastError (RtlNtStatusToDosError (status));
 642       lf_obj = NULL; /* Paranoia... */
 643     }
 644   return lf_obj != NULL;
 645 }
 646
 647 /* Delete a lock event handle.  The important thing here is to signal it
 648    before closing the handle.  This way all threads waiting for this lock
 649    can wake up. */
 650 void
 651 lockf_t::del_lock_obj (HANDLE fhdl, bool signal)
 652 {
 653   if (lf_obj)
 654     {
 655       /* Only signal the event if it's either a POSIX lock, or, in case of
 656          BSD flock locks, if it's an explicit unlock or if the calling fhandler
 657          holds the last reference to the file table entry.  The file table
 658          entry in UNIX terms is equivalent to the FILE_OBJECT in Windows NT
 659          terms.  It's what the handle/descriptor references when calling
 660          CreateFile/open.  Calling DuplicateHandle/dup only creates a new
 661          handle/descriptor to the same FILE_OBJECT/file table entry. */
 662       if ((lf_flags & F_POSIX) || signal
 663           || (fhdl && get_obj_handle_count (fhdl) <= 1))
 664         NtSetEvent (lf_obj, NULL);
 665       close_lock_obj ();
 666     }
 667 }
 668
 669 lockf_t::~lockf_t ()
 670 {
 671   del_lock_obj (NULL);
 672 }
 673
 674 /*
 675  * This variable controls the maximum number of processes that will
 676  * be checked in doing deadlock detection.
 677  */
 678 #ifndef __CYGWIN__
 679 #define MAXDEPTH 50
 680 static int maxlockdepth = MAXDEPTH;
 681 #endif
 682
 683 #define NOLOCKF (struct lockf_t *)0
 684 #define SELF    0x1
 685 #define OTHERS  0x2
 686 static int      lf_clearlock (lockf_t *, lockf_t **, HANDLE);
 687 static int      lf_findoverlap (lockf_t *, lockf_t *, int, lockf_t ***, lockf_t **);
 688 static lockf_t *lf_getblock (lockf_t *, inode_t *node);
 689 static int      lf_getlock (lockf_t *, inode_t *, struct __flock64 *);
 690 static int      lf_setlock (lockf_t *, inode_t *, lockf_t **, HANDLE);
 691 static void     lf_split (lockf_t *, lockf_t *, lockf_t **);
 692 static void     lf_wakelock (lockf_t *, HANDLE);
 693
 694 int
 695 fhandler_disk_file::lock (int a_op, struct __flock64 *fl)
 696 {
 697   _off64_t start, end, oadd;
 698   int error = 0;
 699
 700   short a_flags = fl->l_type & (F_POSIX | F_FLOCK);
 701   short type = fl->l_type & (F_RDLCK | F_WRLCK | F_UNLCK);
 702
 703   if (!a_flags)
 704     a_flags = F_POSIX; /* default */
 705   if (a_op == F_SETLKW)
 706     {
 707       a_op = F_SETLK;
 708       a_flags |= F_WAIT;
 709     }
 710   if (a_op == F_SETLK)
 711     switch (type)
 712       {
 713       case F_UNLCK:
 714         a_op = F_UNLCK;
 715         break;
 716       case F_RDLCK:
 717         /* flock semantics don't specify a requirement that the file has
 718            been opened with a specific open mode, in contrast to POSIX locks
 719            which require that a file is opened for reading to place a read
 720            lock and opened for writing to place a write lock. */
 721         if ((a_flags & F_POSIX) && !(get_access () & GENERIC_READ))
 722           {
 723             set_errno (EBADF);
 724             return -1;
 725           }
 726         break;
 727       case F_WRLCK:
 728         /* See above comment. */
 729         if ((a_flags & F_POSIX) && !(get_access () & GENERIC_WRITE))
 730           {
 731             set_errno (EBADF);
 732             return -1;
 733           }
 734         break;
 735       default:
 736         set_errno (EINVAL);
 737         return -1;
 738       }
 739
 740   /*
 741    * Convert the flock structure into a start and end.
 742    */
 743   switch (fl->l_whence)
 744     {
 745     case SEEK_SET:
 746       start = fl->l_start;
 747       break;
 748
 749     case SEEK_CUR:
 750       if ((start = lseek (0, SEEK_CUR)) == ILLEGAL_SEEK)
 751         return -1;
 752       break;
 753
 754     case SEEK_END:
 755       {
 756         NTSTATUS status;
 757         IO_STATUS_BLOCK io;
 758         FILE_STANDARD_INFORMATION fsi;
 759
 760         status = NtQueryInformationFile (get_handle (), &io, &fsi, sizeof fsi,
 761                                          FileStandardInformation);
 762         if (!NT_SUCCESS (status))
 763           {
 764             __seterrno_from_nt_status (status);
 765             return -1;
 766           }
 767         if (fl->l_start > 0 && fsi.EndOfFile.QuadPart > OFF_MAX - fl->l_start)
 768           {
 769             set_errno (EOVERFLOW);
 770             return -1;
 771           }
 772         start = fsi.EndOfFile.QuadPart + fl->l_start;
 773       }
 774       break;
 775
 776     default:
 777       return (EINVAL);
 778     }
 779   if (start < 0)
 780     {
 781       set_errno (EINVAL);
 782       return -1;
 783     }
 784   if (fl->l_len < 0)
 785     {
 786       if (start == 0)
 787         {
 788           set_errno (EINVAL);
 789           return -1;
 790         }
 791       end = start - 1;
 792       start += fl->l_len;
 793       if (start < 0)
 794         {
 795           set_errno (EINVAL);
 796           return -1;
 797         }
 798     }
 799   else if (fl->l_len == 0)
 800     end = -1;
 801   else
 802     {
 803       oadd = fl->l_len - 1;
 804       if (oadd > OFF_MAX - start)
 805         {
 806           set_errno (EOVERFLOW);
 807           return -1;
 808         }
 809       end = start + oadd;
 810     }
 811
 812 restart:        /* Entry point after a restartable signal came in. */
 813
 814   inode_t *node = inode_t::get (get_dev (), get_ino (), true);
 815   if (!node)
 816     {
 817       set_errno (ENOLCK);
 818       return -1;
 819     }
 820
 821   /* Unlock the fd table which has been locked in fcntl_worker/lock_worker,
 822      otherwise a blocking F_SETLKW never wakes up on a signal. */
 823   cygheap->fdtab.unlock ();
 824
 825   lockf_t **head = &node->i_lockf;
 826
 827 #if 0
 828   /*
 829    * Avoid the common case of unlocking when inode_t has no locks.
 830    *
 831    * This shortcut is invalid for Cygwin because the above inode_t::get
 832    * call returns with an empty lock list if this process has no locks
 833    * on the file yet.
 834    */
 835   if (*head == NULL)
 836     {
 837       if (a_op != F_SETLK)
 838         {
 839           node->UNLOCK ();
 840           fl->l_type = F_UNLCK;
 841           return 0;
 842         }
 843     }
 844 #endif
 845   /*
 846    * Allocate a spare structure in case we have to split.
 847    */
 848   lockf_t *clean = NULL;
 849   if (a_op == F_SETLK || a_op == F_UNLCK)
 850     {
 851       clean = new lockf_t ();
 852       if (!clean)
 853         {
 854           node->unlock_and_remove_if_unused ();
 855           set_errno (ENOLCK);
 856           return -1;
 857         }
 858     }
 859   /*
 860    * Create the lockf_t structure
 861    */
 862   lockf_t *lock = new lockf_t (node, head, a_flags, type, start, end,
 863                                (a_flags & F_FLOCK) ? get_unique_id ()
 864                                                    : getpid (),
 865                                myself->dwProcessId, 0);
 866   if (!lock)
 867     {
 868       node->unlock_and_remove_if_unused ();
 869       set_errno (ENOLCK);
 870       return -1;
 871     }
 872
 873   switch (a_op)
 874     {
 875     case F_SETLK:
 876       error = lf_setlock (lock, node, &clean, get_handle ());
 877       break;
 878
 879     case F_UNLCK:
 880       error = lf_clearlock (lock, &clean, get_handle ());
 881       lock->lf_next = clean;
 882       clean = lock;
 883       break;
 884
 885     case F_GETLK:
 886       error = lf_getlock (lock, node, fl);
 887       lock->lf_next = clean;
 888       clean = lock;
 889       break;
 890
 891     default:
 892       lock->lf_next = clean;
 893       clean = lock;
 894       error = EINVAL;
 895       break;
 896     }
 897   for (lock = clean; lock != NULL; )
 898     {
 899       lockf_t *n = lock->lf_next;
 900       lock->del_lock_obj (get_handle (), a_op == F_UNLCK);
 901       delete lock;
 902       lock = n;
 903     }
 904   node->unlock_and_remove_if_unused ();
 905   switch (error)
 906     {
 907     case 0:             /* All is well. */
 908       need_fork_fixup (true);
 909       return 0;
 910     case EINTR:         /* Signal came in. */
 911       if (_my_tls.call_signal_handler ())
 912         goto restart;
 913       break;
 914     case ECANCELED:     /* The thread has been sent a cancellation request. */
 915       pthread::static_cancel_self ();
 916       /*NOTREACHED*/
 917     default:
 918       break;
 919     }
 920   set_errno (error);
 921   return -1;
 922 }
 923
 924 /*
 925  * Set a byte-range lock.
 926  */
 927 static int
 928 lf_setlock (lockf_t *lock, inode_t *node, lockf_t **clean, HANDLE fhdl)
 929 {
 930   lockf_t *block;
 931   lockf_t **head = lock->lf_head;
 932   lockf_t **prev, *overlap;
 933   int ovcase, priority, old_prio, needtolink;
 934   tmp_pathbuf tp;
 935
 936   /*
 937    * Set the priority
 938    */
 939   priority = old_prio = GetThreadPriority (GetCurrentThread ());
 940   if (lock->lf_type == F_WRLCK && priority <= THREAD_PRIORITY_ABOVE_NORMAL)
 941     priority = THREAD_PRIORITY_HIGHEST;
 942   /*
 943    * Scan lock list for this file looking for locks that would block us.
 944    */
 945   /* Create temporary space for the all locks list. */
 946   node->i_all_lf = (lockf_t *) (void *) tp.w_get ();
 947   while ((block = lf_getblock(lock, node)))
 948     {
 949       HANDLE obj = block->lf_obj;
 950       block->lf_obj = NULL;
 951
 952       /*
 953        * Free the structure and return if nonblocking.
 954        */
 955       if ((lock->lf_flags & F_WAIT) == 0)
 956         {
 957           NtClose (obj);
 958           lock->lf_next = *clean;
 959           *clean = lock;
 960           return EAGAIN;
 961         }
 962       /*
 963        * We are blocked. Since flock style locks cover
 964        * the whole file, there is no chance for deadlock.
 965        * For byte-range locks we must check for deadlock.
 966        *
 967        * Deadlock detection is done by looking through the
 968        * wait channels to see if there are any cycles that
 969        * involve us. MAXDEPTH is set just to make sure we
 970        * do not go off into neverland.
 971        */
 972       /* FIXME: We check the handle count of all the lock event objects
 973                 this process holds.  If it's > 1, another process is
 974                 waiting for one of our locks.  This method isn't overly
 975                 intelligent.  If it turns out to be too dumb, we might
 976                 have to remove it or to find another method. */
 977       if (lock->lf_flags & F_POSIX)
 978         for (lockf_t *lk = node->i_lockf; lk; lk = lk->lf_next)
 979           if ((lk->lf_flags & F_POSIX) && get_obj_handle_count (lk->lf_obj) > 1)
 980             {
 981               NtClose (obj);
 982               return EDEADLK;
 983             }
 984
 985       /*
 986        * For flock type locks, we must first remove
 987        * any shared locks that we hold before we sleep
 988        * waiting for an exclusive lock.
 989        */
 990       if ((lock->lf_flags & F_FLOCK) && lock->lf_type == F_WRLCK)
 991         {
 992           lock->lf_type = F_UNLCK;
 993           (void) lf_clearlock (lock, clean, fhdl);
 994           lock->lf_type = F_WRLCK;
 995         }
 996
 997       /*
 998        * Add our lock to the blocked list and sleep until we're free.
 999        * Remember who blocked us (for deadlock detection).
1000        */
1001       /* Cygwin:  No locked list.  See deadlock recognition above. */
1002
1003       node->UNLOCK ();
1004
1005       /* Create list of objects to wait for. */
1006       HANDLE w4[4] = { obj, NULL, NULL, NULL };
1007       DWORD wait_count = 1;
1008
1009       HANDLE proc = NULL;
1010       if (lock->lf_flags & F_POSIX)
1011         {
1012           proc = OpenProcess (SYNCHRONIZE, FALSE, block->lf_wid);
1013           if (!proc)
1014             debug_printf ("Can't sync with process holding a POSIX lock "
1015                           "(Win32 pid %lu): %E", block->lf_wid);
1016           else
1017             w4[wait_count++] = proc;
1018         }
1019       DWORD WAIT_SIGNAL_ARRIVED = WAIT_OBJECT_0 + wait_count;
1020       w4[wait_count++] = signal_arrived;
1021
1022       DWORD WAIT_THREAD_CANCELED = WAIT_TIMEOUT + 1;
1023       HANDLE cancel_event = pthread::get_cancel_event ();
1024       if (cancel_event)
1025         {
1026           WAIT_THREAD_CANCELED = WAIT_OBJECT_0 + wait_count;
1027           w4[wait_count++] = cancel_event;
1028         }
1029
1030       /* Wait for the blocking object and, for POSIX locks, its holding process.
1031          Unfortunately, since BSD flock locks are not attached to a specific
1032          process, we can't recognize an abandoned lock by sync'ing with the
1033          creator process.  We have to make sure the event object is in a
1034          signalled state, or that it has gone away.  The latter we can only
1035          recognize by retrying to fetch the block list, so we must not wait
1036          infinitely.  Same problem for POSIX locks if the process has already
1037          exited at the time we're trying to open the process. */
1038       SetThreadPriority (GetCurrentThread (), priority);
1039       DWORD ret = WaitForMultipleObjects (wait_count, w4, FALSE,
1040                                           proc ? INFINITE : 100L);
1041       SetThreadPriority (GetCurrentThread (), old_prio);
1042       /* Always close handles before locking the node. */
1043       NtClose (obj);
1044       if (proc)
1045         CloseHandle (proc);
1046       node->LOCK ();
1047       if (ret == WAIT_SIGNAL_ARRIVED)
1048         {
1049           /* A signal came in. */
1050           lock->lf_next = *clean;
1051           *clean = lock;
1052           return EINTR;
1053         }
1054       else if (ret == WAIT_THREAD_CANCELED)
1055         {
1056           /* The thread has been sent a cancellation request. */
1057           lock->lf_next = *clean;
1058           *clean = lock;
1059           return ECANCELED;
1060         }
1061       else
1062         /* The lock object has been set to signalled or ...
1063            for POSIX locks, the process holding the lock has exited, or ...
1064            just a timeout.  Just retry. */
1065         continue;
1066     }
1067   allow_others_to_sync ();
1068   /*
1069    * No blocks!!  Add the lock.  Note that we will
1070    * downgrade or upgrade any overlapping locks this
1071    * process already owns.
1072    *
1073    * Handle any locks that overlap.
1074    */
1075   prev = head;
1076   block = *head;
1077   needtolink = 1;
1078   for (;;)
1079     {
1080       ovcase = lf_findoverlap (block, lock, SELF, &prev, &overlap);
1081       if (ovcase)
1082         block = overlap->lf_next;
1083       /*
1084        * Six cases:
1085        *  0) no overlap
1086        *  1) overlap == lock
1087        *  2) overlap contains lock
1088        *  3) lock contains overlap
1089        *  4) overlap starts before lock
1090        *  5) overlap ends after lock
1091        */
1092       switch (ovcase)
1093         {
1094         case 0: /* no overlap */
1095           if (needtolink)
1096             {
1097               *prev = lock;
1098               lock->lf_next = overlap;
1099               lock->create_lock_obj ();
1100             }
1101             break;
1102
1103         case 1: /* overlap == lock */
1104           /*
1105            * If downgrading lock, others may be
1106            * able to acquire it.
1107            * Cygwin: Always wake lock.
1108            */
1109           lf_wakelock (overlap, fhdl);
1110           overlap->lf_type = lock->lf_type;
1111           overlap->create_lock_obj ();
1112           lock->lf_next = *clean;
1113           *clean = lock;
1114           break;
1115
1116         case 2: /* overlap contains lock */
1117           /*
1118            * Check for common starting point and different types.
1119            */
1120           if (overlap->lf_type == lock->lf_type)
1121             {
1122               lock->lf_next = *clean;
1123               *clean = lock;
1124               break;
1125             }
1126           if (overlap->lf_start == lock->lf_start)
1127             {
1128               *prev = lock;
1129               lock->lf_next = overlap;
1130               overlap->lf_start = lock->lf_end + 1;
1131             }
1132           else
1133             lf_split (overlap, lock, clean);
1134           lf_wakelock (overlap, fhdl);
1135           overlap->create_lock_obj ();
1136           lock->create_lock_obj ();
1137           if (lock->lf_next && !lock->lf_next->lf_obj)
1138             lock->lf_next->create_lock_obj ();
1139           break;
1140
1141         case 3: /* lock contains overlap */
1142           /*
1143            * If downgrading lock, others may be able to
1144            * acquire it, otherwise take the list.
1145            * Cygwin: Always wake old lock and create new lock.
1146            */
1147           lf_wakelock (overlap, fhdl);
1148           /*
1149            * Add the new lock if necessary and delete the overlap.
1150            */
1151           if (needtolink)
1152             {
1153               *prev = lock;
1154               lock->lf_next = overlap->lf_next;
1155               prev = &lock->lf_next;
1156               lock->create_lock_obj ();
1157               needtolink = 0;
1158             }
1159           else
1160             *prev = overlap->lf_next;
1161           overlap->lf_next = *clean;
1162           *clean = overlap;
1163           continue;
1164
1165         case 4: /* overlap starts before lock */
1166           /*
1167            * Add lock after overlap on the list.
1168            */
1169           lock->lf_next = overlap->lf_next;
1170           overlap->lf_next = lock;
1171           overlap->lf_end = lock->lf_start - 1;
1172           prev = &lock->lf_next;
1173           lf_wakelock (overlap, fhdl);
1174           overlap->create_lock_obj ();
1175           lock->create_lock_obj ();
1176           needtolink = 0;
1177           continue;
1178
1179         case 5: /* overlap ends after lock */
1180           /*
1181            * Add the new lock before overlap.
1182            */
1183           if (needtolink) {
1184               *prev = lock;
1185               lock->lf_next = overlap;
1186           }
1187           overlap->lf_start = lock->lf_end + 1;
1188           lf_wakelock (overlap, fhdl);
1189           lock->create_lock_obj ();
1190           overlap->create_lock_obj ();
1191           break;
1192         }
1193       break;
1194     }
1195   return 0;
1196 }
1197
1198 /*
1199  * Remove a byte-range lock on an inode_t.
1200  *
1201  * Generally, find the lock (or an overlap to that lock)
1202  * and remove it (or shrink it), then wakeup anyone we can.
1203  */
1204 static int
1205 lf_clearlock (lockf_t *unlock, lockf_t **clean, HANDLE fhdl)
1206 {
1207   lockf_t **head = unlock->lf_head;
1208   lockf_t *lf = *head;
1209   lockf_t *overlap, **prev;
1210   int ovcase;
1211
1212   if (lf == NOLOCKF)
1213     return 0;
1214   prev = head;
1215   while ((ovcase = lf_findoverlap (lf, unlock, SELF, &prev, &overlap)))
1216     {
1217       /*
1218        * Wakeup the list of locks to be retried.
1219        */
1220       lf_wakelock (overlap, fhdl);
1221
1222       switch (ovcase)
1223         {
1224         case 1: /* overlap == lock */
1225           *prev = overlap->lf_next;
1226           overlap->lf_next = *clean;
1227           *clean = overlap;
1228           break;
1229
1230         case 2: /* overlap contains lock: split it */
1231           if (overlap->lf_start == unlock->lf_start)
1232             {
1233               overlap->lf_start = unlock->lf_end + 1;
1234               overlap->create_lock_obj ();
1235               break;
1236             }
1237           lf_split (overlap, unlock, clean);
1238           overlap->lf_next = unlock->lf_next;
1239           overlap->create_lock_obj ();
1240           if (overlap->lf_next && !overlap->lf_next->lf_obj)
1241             overlap->lf_next->create_lock_obj ();
1242           break;
1243
1244         case 3: /* lock contains overlap */
1245           *prev = overlap->lf_next;
1246           lf = overlap->lf_next;
1247           overlap->lf_next = *clean;
1248           *clean = overlap;
1249           continue;
1250
1251         case 4: /* overlap starts before lock */
1252             overlap->lf_end = unlock->lf_start - 1;
1253             prev = &overlap->lf_next;
1254             lf = overlap->lf_next;
1255             overlap->create_lock_obj ();
1256             continue;
1257
1258         case 5: /* overlap ends after lock */
1259             overlap->lf_start = unlock->lf_end + 1;
1260             overlap->create_lock_obj ();
1261             break;
1262         }
1263       break;
1264     }
1265   return 0;
1266 }
1267
1268 /*
1269  * Check whether there is a blocking lock,
1270  * and if so return its process identifier.
1271  */
1272 static int
1273 lf_getlock (lockf_t *lock, inode_t *node, struct __flock64 *fl)
1274 {
1275   lockf_t *block;
1276   tmp_pathbuf tp;
1277
1278   /* Create temporary space for the all locks list. */
1279   node->i_all_lf = (lockf_t *) (void * ) tp.w_get ();
1280   if ((block = lf_getblock (lock, node)))
1281     {
1282       if (block->lf_obj)
1283         block->close_lock_obj ();
1284       fl->l_type = block->lf_type;
1285       fl->l_whence = SEEK_SET;
1286       fl->l_start = block->lf_start;
1287       if (block->lf_end == -1)
1288         fl->l_len = 0;
1289       else
1290         fl->l_len = block->lf_end - block->lf_start + 1;
1291       if (block->lf_flags & F_POSIX)
1292         fl->l_pid = (pid_t) block->lf_id;
1293       else
1294         fl->l_pid = -1;
1295     }
1296   else
1297     fl->l_type = F_UNLCK;
1298   return 0;
1299 }
1300
1301 /*
1302  * Walk the list of locks for an inode_t and
1303  * return the first blocking lock.
1304  */
1305 static lockf_t *
1306 lf_getblock (lockf_t *lock, inode_t *node)
1307 {
1308   lockf_t **prev, *overlap;
1309   lockf_t *lf = node->get_all_locks_list ();
1310   int ovcase;
1311
1312   prev = lock->lf_head;
1313   while ((ovcase = lf_findoverlap (lf, lock, OTHERS, &prev, &overlap)))
1314     {
1315       /*
1316        * We've found an overlap, see if it blocks us
1317        */
1318       if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
1319         {
1320           /* Open the event object for synchronization. */
1321           if (overlap->open_lock_obj ())
1322             {
1323               /* If we found a POSIX lock, it will block us. */
1324               if (overlap->lf_flags & F_POSIX)
1325                 return overlap;
1326               /* In case of BSD flock locks, check if the event object is
1327                  signalled.  If so, the overlap doesn't actually exist anymore.
1328                  There are just a few open handles left. */
1329               if (!IsEventSignalled (overlap->lf_obj))
1330                 return overlap;
1331               overlap->close_lock_obj ();
1332             }
1333         }
1334       /*
1335        * Nope, point to the next one on the list and
1336        * see if it blocks us
1337        */
1338       lf = overlap->lf_next;
1339     }
1340   return NOLOCKF;
1341 }
1342
1343 /*
1344  * Walk the list of locks for an inode_t to
1345  * find an overlapping lock (if any).
1346  *
1347  * NOTE: this returns only the FIRST overlapping lock.  There
1348  *   may be more than one.
1349  */
1350 static int
1351 lf_findoverlap (lockf_t *lf, lockf_t *lock, int type, lockf_t ***prev,
1352                 lockf_t **overlap)
1353 {
1354   _off64_t start, end;
1355
1356   *overlap = lf;
1357   if (lf == NOLOCKF)
1358     return 0;
1359
1360   start = lock->lf_start;
1361   end = lock->lf_end;
1362   while (lf != NOLOCKF)
1363     {
1364       if (((type & SELF) && lf->lf_id != lock->lf_id)
1365           || ((type & OTHERS) && lf->lf_id == lock->lf_id)
1366           /* As on Linux: POSIX locks and BSD flock locks don't interact. */
1367           || (lf->lf_flags & (F_POSIX | F_FLOCK))
1368              != (lock->lf_flags & (F_POSIX | F_FLOCK)))
1369         {
1370           *prev = &lf->lf_next;
1371           *overlap = lf = lf->lf_next;
1372           continue;
1373         }
1374       /*
1375        * OK, check for overlap
1376        *
1377        * Six cases:
1378        *  0) no overlap
1379        *  1) overlap == lock
1380        *  2) overlap contains lock
1381        *  3) lock contains overlap
1382        *  4) overlap starts before lock
1383        *  5) overlap ends after lock
1384        */
1385       if ((lf->lf_end != -1 && start > lf->lf_end) ||
1386           (end != -1 && lf->lf_start > end))
1387         {
1388           /* Case 0 */
1389           if ((type & SELF) && end != -1 && lf->lf_start > end)
1390             return 0;
1391           *prev = &lf->lf_next;
1392           *overlap = lf = lf->lf_next;
1393           continue;
1394         }
1395       if ((lf->lf_start == start) && (lf->lf_end == end))
1396         {
1397           /* Case 1 */
1398           return 1;
1399         }
1400       if ((lf->lf_start <= start) && (end != -1) &&
1401           ((lf->lf_end >= end) || (lf->lf_end == -1)))
1402         {
1403           /* Case 2 */
1404           return 2;
1405         }
1406       if (start <= lf->lf_start && (end == -1 ||
1407           (lf->lf_end != -1 && end >= lf->lf_end)))
1408         {
1409           /* Case 3 */
1410           return 3;
1411         }
1412       if ((lf->lf_start < start) &&
1413           ((lf->lf_end >= start) || (lf->lf_end == -1)))
1414         {
1415           /* Case 4 */
1416           return 4;
1417         }
1418       if ((lf->lf_start > start) && (end != -1) &&
1419           ((lf->lf_end > end) || (lf->lf_end == -1)))
1420         {
1421           /* Case 5 */
1422           return 5;
1423         }
1424       api_fatal ("lf_findoverlap: default\n");
1425     }
1426   return 0;
1427 }
1428
1429 /*
1430  * Split a lock and a contained region into
1431  * two or three locks as necessary.
1432  */
1433 static void
1434 lf_split (lockf_t *lock1, lockf_t *lock2, lockf_t **split)
1435 {
1436   lockf_t *splitlock;
1437
1438   /*
1439    * Check to see if spliting into only two pieces.
1440    */
1441   if (lock1->lf_start == lock2->lf_start)
1442     {
1443       lock1->lf_start = lock2->lf_end + 1;
1444       lock2->lf_next = lock1;
1445       return;
1446     }
1447   if (lock1->lf_end == lock2->lf_end)
1448     {
1449       lock1->lf_end = lock2->lf_start - 1;
1450       lock2->lf_next = lock1->lf_next;
1451       lock1->lf_next = lock2;
1452       return;
1453     }
1454   /*
1455    * Make a new lock consisting of the last part of
1456    * the encompassing lock.  We use the preallocated
1457    * splitlock so we don't have to block.
1458    */
1459   splitlock = *split;
1460   assert (splitlock != NULL);
1461   *split = splitlock->lf_next;
1462   memcpy (splitlock, lock1, sizeof *splitlock);
1463   /* We have to unset the obj HANDLE here which has been copied by the
1464      above memcpy, so that the calling function recognizes the new object.
1465      See post-lf_split handling in lf_setlock and lf_clearlock. */
1466   splitlock->lf_obj = NULL;
1467   splitlock->lf_start = lock2->lf_end + 1;
1468   lock1->lf_end = lock2->lf_start - 1;
1469   /*
1470    * OK, now link it in
1471    */
1472   splitlock->lf_next = lock1->lf_next;
1473   lock2->lf_next = splitlock;
1474   lock1->lf_next = lock2;
1475 }
1476
1477 /*
1478  * Wakeup a blocklist
1479  * Cygwin: Just signal the lock which gets removed.  This unblocks
1480  * all threads waiting for this lock.
1481  */
1482 static void
1483 lf_wakelock (lockf_t *listhead, HANDLE fhdl)
1484 {
1485   listhead->del_lock_obj (fhdl, true);
1486 }
1487
1488 extern "C" int
1489 flock (int fd, int operation)
1490 {
1491   int res = -1;
1492   int cmd;
1493   struct __flock64 fl = { 0, SEEK_SET, 0, 0, 0 };
1494
1495   myfault efault;
1496   if (efault.faulted (EFAULT))
1497     return -1;
1498
1499   cygheap_fdget cfd (fd, true);
1500   if (cfd < 0)
1501     goto done;
1502
1503   cmd = (operation & LOCK_NB) ? F_SETLK : F_SETLKW;
1504   switch (operation & (~LOCK_NB))
1505     {
1506     case LOCK_EX:
1507       fl.l_type = F_WRLCK | F_FLOCK;
1508       break;
1509     case LOCK_SH:
1510       fl.l_type = F_RDLCK | F_FLOCK;
1511       break;
1512     case LOCK_UN:
1513       fl.l_type = F_UNLCK | F_FLOCK;
1514       break;
1515     default:
1516       set_errno (EINVAL);
1517       goto done;
1518     }
1519   res = cfd->lock (cmd, &fl);
1520   if ((res == -1) && ((get_errno () == EAGAIN) || (get_errno () == EACCES)))
1521     set_errno (EWOULDBLOCK);
1522 done:
1523   syscall_printf ("%d = flock (%d, %d)", res, fd, operation);
1524   return res;
1525 }
1526
1527 extern "C" int
1528 lockf (int filedes, int function, _off64_t size)
1529 {
1530   int res = -1;
1531   int cmd;
1532   struct __flock64 fl;
1533
1534   pthread_testcancel ();
1535
1536   myfault efault;
1537   if (efault.faulted (EFAULT))
1538     return -1;
1539
1540   cygheap_fdget cfd (filedes, true);
1541   if (cfd < 0)
1542     goto done;
1543
1544   fl.l_start = 0;
1545   fl.l_len = size;
1546   fl.l_whence = SEEK_CUR;
1547
1548   switch (function)
1549     {
1550     case F_ULOCK:
1551       cmd = F_SETLK;
1552       fl.l_type = F_UNLCK;
1553       break;
1554     case F_LOCK:
1555       cmd = F_SETLKW;
1556       fl.l_type = F_WRLCK;
1557       break;
1558     case F_TLOCK:
1559       cmd = F_SETLK;
1560       fl.l_type = F_WRLCK;
1561       break;
1562     case F_TEST:
1563       fl.l_type = F_WRLCK;
1564       if (cfd->lock (F_GETLK, &fl) == -1)
1565         goto done;
1566       if (fl.l_type == F_UNLCK || fl.l_pid == getpid ())
1567         res = 0;
1568       else
1569         errno = EAGAIN;
1570       goto done;
1571       /* NOTREACHED */
1572     default:
1573       errno = EINVAL;
1574       goto done;
1575       /* NOTREACHED */
1576     }
1577   res = cfd->lock (cmd, &fl);
1578 done:
1579   syscall_printf ("%d = lockf (%d, %d, %D)", res, filedes, function, size);
1580   return res;
1581 }