OSDN Git Service

writeback, memcg: Implement cgroup_writeback_by_id()
authorTejun Heo <tj@kernel.org>
Mon, 26 Aug 2019 16:06:55 +0000 (09:06 -0700)
committerJens Axboe <axboe@kernel.dk>
Tue, 27 Aug 2019 15:22:38 +0000 (09:22 -0600)
Implement cgroup_writeback_by_id() which initiates cgroup writeback
from bdi and memcg IDs.  This will be used by memcg foreign inode
flushing.

v2: Use wb_get_lookup() instead of wb_get_create() to avoid creating
    spurious wbs.

v3: Interpret 0 @nr as 1.25 * nr_dirty to implement best-effort
    flushing while avoding possible livelocks.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/fs-writeback.c
include/linux/writeback.h

index 9442f1f..658dc16 100644 (file)
@@ -892,6 +892,89 @@ restart:
 }
 
 /**
+ * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
+ * @bdi_id: target bdi id
+ * @memcg_id: target memcg css id
+ * @nr_pages: number of pages to write, 0 for best-effort dirty flushing
+ * @reason: reason why some writeback work initiated
+ * @done: target wb_completion
+ *
+ * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
+ * with the specified parameters.
+ */
+int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
+                          enum wb_reason reason, struct wb_completion *done)
+{
+       struct backing_dev_info *bdi;
+       struct cgroup_subsys_state *memcg_css;
+       struct bdi_writeback *wb;
+       struct wb_writeback_work *work;
+       int ret;
+
+       /* lookup bdi and memcg */
+       bdi = bdi_get_by_id(bdi_id);
+       if (!bdi)
+               return -ENOENT;
+
+       rcu_read_lock();
+       memcg_css = css_from_id(memcg_id, &memory_cgrp_subsys);
+       if (memcg_css && !css_tryget(memcg_css))
+               memcg_css = NULL;
+       rcu_read_unlock();
+       if (!memcg_css) {
+               ret = -ENOENT;
+               goto out_bdi_put;
+       }
+
+       /*
+        * And find the associated wb.  If the wb isn't there already
+        * there's nothing to flush, don't create one.
+        */
+       wb = wb_get_lookup(bdi, memcg_css);
+       if (!wb) {
+               ret = -ENOENT;
+               goto out_css_put;
+       }
+
+       /*
+        * If @nr is zero, the caller is attempting to write out most of
+        * the currently dirty pages.  Let's take the current dirty page
+        * count and inflate it by 25% which should be large enough to
+        * flush out most dirty pages while avoiding getting livelocked by
+        * concurrent dirtiers.
+        */
+       if (!nr) {
+               unsigned long filepages, headroom, dirty, writeback;
+
+               mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
+                                     &writeback);
+               nr = dirty * 10 / 8;
+       }
+
+       /* issue the writeback work */
+       work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
+       if (work) {
+               work->nr_pages = nr;
+               work->sync_mode = WB_SYNC_NONE;
+               work->range_cyclic = 1;
+               work->reason = reason;
+               work->done = done;
+               work->auto_free = 1;
+               wb_queue_work(wb, work);
+               ret = 0;
+       } else {
+               ret = -ENOMEM;
+       }
+
+       wb_put(wb);
+out_css_put:
+       css_put(memcg_css);
+out_bdi_put:
+       bdi_put(bdi);
+       return ret;
+}
+
+/**
  * cgroup_writeback_umount - flush inode wb switches for umount
  *
  * This function is called when a super_block is about to be destroyed and
index 8945aac..a19d845 100644 (file)
@@ -217,6 +217,8 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
 void wbc_detach_inode(struct writeback_control *wbc);
 void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
                              size_t bytes);
+int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages,
+                          enum wb_reason reason, struct wb_completion *done);
 void cgroup_writeback_umount(void);
 
 /**