OSDN Git Service

Btrfs: fix lockdep deadlock warning due to dev_replace
authorLiu Bo <bo.li.liu@oracle.com>
Fri, 17 Jul 2015 08:49:19 +0000 (16:49 +0800)
committerDavid Sterba <dsterba@suse.com>
Tue, 23 Feb 2016 12:10:10 +0000 (13:10 +0100)
Xfstests btrfs/011 complains about a deadlock warning,

[ 1226.649039] =========================================================
[ 1226.649039] [ INFO: possible irq lock inversion dependency detected ]
[ 1226.649039] 4.1.0+ #270 Not tainted
[ 1226.649039] ---------------------------------------------------------
[ 1226.652955] kswapd0/46 just changed the state of lock:
[ 1226.652955]  (&delayed_node->mutex){+.+.-.}, at: [<ffffffff81458735>] __btrfs_release_delayed_node+0x45/0x1d0
[ 1226.652955] but this lock took another, RECLAIM_FS-unsafe lock in the past:
[ 1226.652955]  (&fs_info->dev_replace.lock){+.+.+.}

and interrupts could create inverse lock ordering between them.

[ 1226.652955]
other info that might help us debug this:
[ 1226.652955] Chain exists of:
  &delayed_node->mutex --> &found->groups_sem --> &fs_info->dev_replace.lock

[ 1226.652955]  Possible interrupt unsafe locking scenario:

[ 1226.652955]        CPU0                    CPU1
[ 1226.652955]        ----                    ----
[ 1226.652955]   lock(&fs_info->dev_replace.lock);
[ 1226.652955]                                local_irq_disable();
[ 1226.652955]                                lock(&delayed_node->mutex);
[ 1226.652955]                                lock(&found->groups_sem);
[ 1226.652955]   <Interrupt>
[ 1226.652955]     lock(&delayed_node->mutex);
[ 1226.652955]
 *** DEADLOCK ***

Commit 084b6e7c7607 ("btrfs: Fix a lockdep warning when running xfstest.") tried
to fix a similar one that has the exactly same warning, but with that, we still
run to this.

The above lock chain comes from
btrfs_commit_transaction
  ->btrfs_run_delayed_items
    ...
    ->__btrfs_update_delayed_inode
      ...
      ->__btrfs_cow_block
         ...
         ->find_free_extent
            ->cache_block_group
              ->load_free_space_cache
                ->btrfs_readpages
                  ->submit_one_bio
                    ...
                    ->__btrfs_map_block
                      ->btrfs_dev_replace_lock

However, with high memory pressure, tasks which hold dev_replace.lock can
be interrupted by kswapd and then kswapd is intended to release memory occupied
by superblock, inodes and dentries, where we may call evict_inode, and it comes
to

[ 1226.652955]  [<ffffffff81458735>] __btrfs_release_delayed_node+0x45/0x1d0
[ 1226.652955]  [<ffffffff81459e74>] btrfs_remove_delayed_node+0x24/0x30
[ 1226.652955]  [<ffffffff8140c5fe>] btrfs_evict_inode+0x34e/0x700

delayed_node->mutex may be acquired in __btrfs_release_delayed_node(), and it leads
to a ABBA deadlock.

To fix this, we can use "blocking rwlock" used in the case of extent_buffer, but
things are simpler here since we only needs read's spinlock to blocking lock.

With this, btrfs/011 no more produces warnings in dmesg.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.h
fs/btrfs/dev-replace.c
fs/btrfs/dev-replace.h
fs/btrfs/disk-io.c
fs/btrfs/reada.c
fs/btrfs/scrub.c
fs/btrfs/volumes.c

index bfe4a33..0b427a6 100644 (file)
@@ -1002,8 +1002,10 @@ struct btrfs_dev_replace {
        pid_t lock_owner;
        atomic_t nesting_level;
        struct mutex lock_finishing_cancel_unmount;
-       struct mutex lock_management_lock;
-       struct mutex lock;
+       rwlock_t lock;
+       atomic_t read_locks;
+       atomic_t blocking_readers;
+       wait_queue_head_t read_lock_wq;
 
        struct btrfs_scrub_progress scrub_progress;
 };
index cbb7dbf..8c8b489 100644 (file)
@@ -202,13 +202,13 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
        struct btrfs_dev_replace_item *ptr;
        struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
 
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 0);
        if (!dev_replace->is_valid ||
            !dev_replace->item_needs_writeback) {
-               btrfs_dev_replace_unlock(dev_replace);
+               btrfs_dev_replace_unlock(dev_replace, 0);
                return 0;
        }
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 0);
 
        key.objectid = 0;
        key.type = BTRFS_DEV_REPLACE_KEY;
@@ -264,7 +264,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
        ptr = btrfs_item_ptr(eb, path->slots[0],
                             struct btrfs_dev_replace_item);
 
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 1);
        if (dev_replace->srcdev)
                btrfs_set_dev_replace_src_devid(eb, ptr,
                        dev_replace->srcdev->devid);
@@ -287,7 +287,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
        btrfs_set_dev_replace_cursor_right(eb, ptr,
                dev_replace->cursor_right);
        dev_replace->item_needs_writeback = 0;
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 1);
 
        btrfs_mark_buffer_dirty(eb);
 
@@ -356,7 +356,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
                return PTR_ERR(trans);
        }
 
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 1);
        switch (dev_replace->replace_state) {
        case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
        case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@@ -395,7 +395,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
        dev_replace->is_valid = 1;
        dev_replace->item_needs_writeback = 1;
        args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 1);
 
        ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
        if (ret)
@@ -407,7 +407,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
-               btrfs_dev_replace_lock(dev_replace);
+               btrfs_dev_replace_lock(dev_replace, 1);
                goto leave;
        }
 
@@ -433,7 +433,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
 leave:
        dev_replace->srcdev = NULL;
        dev_replace->tgtdev = NULL;
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 1);
        btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
        return ret;
 }
@@ -471,18 +471,18 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
        /* don't allow cancel or unmount to disturb the finishing procedure */
        mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
 
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 0);
        /* was the operation canceled, or is it finished? */
        if (dev_replace->replace_state !=
            BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) {
-               btrfs_dev_replace_unlock(dev_replace);
+               btrfs_dev_replace_unlock(dev_replace, 0);
                mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
                return 0;
        }
 
        tgt_device = dev_replace->tgtdev;
        src_device = dev_replace->srcdev;
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 0);
 
        /*
         * flush all outstanding I/O and inode extent mappings before the
@@ -507,7 +507,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
        /* keep away write_all_supers() during the finishing procedure */
        mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
        mutex_lock(&root->fs_info->chunk_mutex);
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 1);
        dev_replace->replace_state =
                scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
                          : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
@@ -528,7 +528,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
                                rcu_str_deref(src_device->name),
                              src_device->devid,
                              rcu_str_deref(tgt_device->name), scrub_ret);
-               btrfs_dev_replace_unlock(dev_replace);
+               btrfs_dev_replace_unlock(dev_replace, 1);
                mutex_unlock(&root->fs_info->chunk_mutex);
                mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
                mutex_unlock(&uuid_mutex);
@@ -565,7 +565,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
        list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
        fs_info->fs_devices->rw_devices++;
 
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 1);
 
        btrfs_rm_dev_replace_blocked(fs_info);
 
@@ -649,7 +649,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
        struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
        struct btrfs_device *srcdev;
 
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 0);
        /* even if !dev_replace_is_valid, the values are good enough for
         * the replace_status ioctl */
        args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
@@ -675,7 +675,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
                        div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
                break;
        }
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 0);
 }
 
 int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
@@ -698,13 +698,13 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
                return -EROFS;
 
        mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 1);
        switch (dev_replace->replace_state) {
        case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
        case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
        case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
                result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED;
-               btrfs_dev_replace_unlock(dev_replace);
+               btrfs_dev_replace_unlock(dev_replace, 1);
                goto leave;
        case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
        case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
@@ -717,7 +717,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
        dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
        dev_replace->time_stopped = get_seconds();
        dev_replace->item_needs_writeback = 1;
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 1);
        btrfs_scrub_cancel(fs_info);
 
        trans = btrfs_start_transaction(root, 0);
@@ -740,7 +740,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
        struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
 
        mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 1);
        switch (dev_replace->replace_state) {
        case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
        case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@@ -756,7 +756,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
                break;
        }
 
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 1);
        mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
 }
 
@@ -766,12 +766,12 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
        struct task_struct *task;
        struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
 
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 1);
        switch (dev_replace->replace_state) {
        case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
        case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
        case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
-               btrfs_dev_replace_unlock(dev_replace);
+               btrfs_dev_replace_unlock(dev_replace, 1);
                return 0;
        case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
                break;
@@ -784,10 +784,10 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
                btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing");
                btrfs_info(fs_info,
                        "you may cancel the operation after 'mount -o degraded'");
-               btrfs_dev_replace_unlock(dev_replace);
+               btrfs_dev_replace_unlock(dev_replace, 1);
                return 0;
        }
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 1);
 
        WARN_ON(atomic_xchg(
                &fs_info->mutually_exclusive_operation_running, 1));
@@ -865,48 +865,58 @@ int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
        return 1;
 }
 
-void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace)
+void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw)
 {
-       /* the beginning is just an optimization for the typical case */
-       if (atomic_read(&dev_replace->nesting_level) == 0) {
-acquire_lock:
-               /* this is not a nested case where the same thread
-                * is trying to acqurire the same lock twice */
-               mutex_lock(&dev_replace->lock);
-               mutex_lock(&dev_replace->lock_management_lock);
-               dev_replace->lock_owner = current->pid;
-               atomic_inc(&dev_replace->nesting_level);
-               mutex_unlock(&dev_replace->lock_management_lock);
-               return;
+       if (rw == 1) {
+               /* write */
+again:
+               wait_event(dev_replace->read_lock_wq,
+                          atomic_read(&dev_replace->blocking_readers) == 0);
+               write_lock(&dev_replace->lock);
+               if (atomic_read(&dev_replace->blocking_readers)) {
+                       write_unlock(&dev_replace->lock);
+                       goto again;
+               }
+       } else {
+               read_lock(&dev_replace->lock);
+               atomic_inc(&dev_replace->read_locks);
        }
+}
 
-       mutex_lock(&dev_replace->lock_management_lock);
-       if (atomic_read(&dev_replace->nesting_level) > 0 &&
-           dev_replace->lock_owner == current->pid) {
-               WARN_ON(!mutex_is_locked(&dev_replace->lock));
-               atomic_inc(&dev_replace->nesting_level);
-               mutex_unlock(&dev_replace->lock_management_lock);
-               return;
+void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw)
+{
+       if (rw == 1) {
+               /* write */
+               ASSERT(atomic_read(&dev_replace->blocking_readers) == 0);
+               write_unlock(&dev_replace->lock);
+       } else {
+               ASSERT(atomic_read(&dev_replace->read_locks) > 0);
+               atomic_dec(&dev_replace->read_locks);
+               read_unlock(&dev_replace->lock);
        }
+}
 
-       mutex_unlock(&dev_replace->lock_management_lock);
-       goto acquire_lock;
+/* inc blocking cnt and release read lock */
+void btrfs_dev_replace_set_lock_blocking(
+                                       struct btrfs_dev_replace *dev_replace)
+{
+       /* only set blocking for read lock */
+       ASSERT(atomic_read(&dev_replace->read_locks) > 0);
+       atomic_inc(&dev_replace->blocking_readers);
+       read_unlock(&dev_replace->lock);
 }
 
-void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace)
+/* acquire read lock and dec blocking cnt */
+void btrfs_dev_replace_clear_lock_blocking(
+                                       struct btrfs_dev_replace *dev_replace)
 {
-       WARN_ON(!mutex_is_locked(&dev_replace->lock));
-       mutex_lock(&dev_replace->lock_management_lock);
-       WARN_ON(atomic_read(&dev_replace->nesting_level) < 1);
-       WARN_ON(dev_replace->lock_owner != current->pid);
-       atomic_dec(&dev_replace->nesting_level);
-       if (atomic_read(&dev_replace->nesting_level) == 0) {
-               dev_replace->lock_owner = 0;
-               mutex_unlock(&dev_replace->lock_management_lock);
-               mutex_unlock(&dev_replace->lock);
-       } else {
-               mutex_unlock(&dev_replace->lock_management_lock);
-       }
+       /* only set blocking for read lock */
+       ASSERT(atomic_read(&dev_replace->read_locks) > 0);
+       ASSERT(atomic_read(&dev_replace->blocking_readers) > 0);
+       read_lock(&dev_replace->lock);
+       if (atomic_dec_and_test(&dev_replace->blocking_readers) &&
+           waitqueue_active(&dev_replace->read_lock_wq))
+               wake_up(&dev_replace->read_lock_wq);
 }
 
 void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
index 20035cb..29e3ef5 100644 (file)
@@ -34,8 +34,11 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
 void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
 int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
 int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
-void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace);
-void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace);
+void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw);
+void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw);
+void btrfs_dev_replace_set_lock_blocking(struct btrfs_dev_replace *dev_replace);
+void btrfs_dev_replace_clear_lock_blocking(
+                                       struct btrfs_dev_replace *dev_replace);
 
 static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value)
 {
index 4545e2e..81d0f39 100644 (file)
@@ -2272,9 +2272,11 @@ static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
        fs_info->dev_replace.lock_owner = 0;
        atomic_set(&fs_info->dev_replace.nesting_level, 0);
        mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
-       mutex_init(&fs_info->dev_replace.lock_management_lock);
-       mutex_init(&fs_info->dev_replace.lock);
+       rwlock_init(&fs_info->dev_replace.lock);
+       atomic_set(&fs_info->dev_replace.read_locks, 0);
+       atomic_set(&fs_info->dev_replace.blocking_readers, 0);
        init_waitqueue_head(&fs_info->replace_wait);
+       init_waitqueue_head(&fs_info->dev_replace.read_lock_wq);
 }
 
 static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
index 619f929..fe3d4c7 100644 (file)
@@ -396,7 +396,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
        }
 
        /* insert extent in reada_tree + all per-device trees, all or nothing */
-       btrfs_dev_replace_lock(&fs_info->dev_replace);
+       btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
        spin_lock(&fs_info->reada_lock);
        ret = radix_tree_insert(&fs_info->reada_tree, index, re);
        if (ret == -EEXIST) {
@@ -404,12 +404,12 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
                BUG_ON(!re_exist);
                re_exist->refcnt++;
                spin_unlock(&fs_info->reada_lock);
-               btrfs_dev_replace_unlock(&fs_info->dev_replace);
+               btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
                goto error;
        }
        if (ret) {
                spin_unlock(&fs_info->reada_lock);
-               btrfs_dev_replace_unlock(&fs_info->dev_replace);
+               btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
                goto error;
        }
        prev_dev = NULL;
@@ -456,12 +456,12 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
                        BUG_ON(fs_info == NULL);
                        radix_tree_delete(&fs_info->reada_tree, index);
                        spin_unlock(&fs_info->reada_lock);
-                       btrfs_dev_replace_unlock(&fs_info->dev_replace);
+                       btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
                        goto error;
                }
        }
        spin_unlock(&fs_info->reada_lock);
-       btrfs_dev_replace_unlock(&fs_info->dev_replace);
+       btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
 
        btrfs_put_bbio(bbio);
        return re;
index 92bf5ee..4151792 100644 (file)
@@ -3857,16 +3857,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
                return -EIO;
        }
 
-       btrfs_dev_replace_lock(&fs_info->dev_replace);
+       btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
        if (dev->scrub_device ||
            (!is_dev_replace &&
             btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
-               btrfs_dev_replace_unlock(&fs_info->dev_replace);
+               btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
                mutex_unlock(&fs_info->scrub_lock);
                mutex_unlock(&fs_info->fs_devices->device_list_mutex);
                return -EINPROGRESS;
        }
-       btrfs_dev_replace_unlock(&fs_info->dev_replace);
+       btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
 
        ret = scrub_workers_get(fs_info, is_dev_replace);
        if (ret) {
index 366b335..858a2e4 100644 (file)
@@ -1714,12 +1714,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        } while (read_seqretry(&root->fs_info->profiles_lock, seq));
 
        num_devices = root->fs_info->fs_devices->num_devices;
-       btrfs_dev_replace_lock(&root->fs_info->dev_replace);
+       btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0);
        if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) {
                WARN_ON(num_devices < 1);
                num_devices--;
        }
-       btrfs_dev_replace_unlock(&root->fs_info->dev_replace);
+       btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0);
 
        if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
                ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET;
@@ -3686,12 +3686,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
        }
 
        num_devices = fs_info->fs_devices->num_devices;
-       btrfs_dev_replace_lock(&fs_info->dev_replace);
+       btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
        if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
                BUG_ON(num_devices < 1);
                num_devices--;
        }
-       btrfs_dev_replace_unlock(&fs_info->dev_replace);
+       btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
        allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
        if (num_devices == 1)
                allowed |= BTRFS_BLOCK_GROUP_DUP;
@@ -5062,10 +5062,10 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
                ret = 1;
        free_extent_map(em);
 
-       btrfs_dev_replace_lock(&fs_info->dev_replace);
+       btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
        if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
                ret++;
-       btrfs_dev_replace_unlock(&fs_info->dev_replace);
+       btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
 
        return ret;
 }
@@ -5325,10 +5325,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
        if (!bbio_ret)
                goto out;
 
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 0);
        dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
        if (!dev_replace_is_ongoing)
-               btrfs_dev_replace_unlock(dev_replace);
+               btrfs_dev_replace_unlock(dev_replace, 0);
+       else
+               btrfs_dev_replace_set_lock_blocking(dev_replace);
 
        if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
            !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
@@ -5751,8 +5753,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                bbio->mirror_num = map->num_stripes + 1;
        }
 out:
-       if (dev_replace_is_ongoing)
-               btrfs_dev_replace_unlock(dev_replace);
+       if (dev_replace_is_ongoing) {
+               btrfs_dev_replace_clear_lock_blocking(dev_replace);
+               btrfs_dev_replace_unlock(dev_replace, 0);
+       }
        free_extent_map(em);
        return ret;
 }