OSDN Git Service

block/diskstats: accumulate all per-cpu counters in one pass
authorKonstantin Khlebnikov <khlebnikov@yandex-team.ru>
Wed, 25 Mar 2020 13:07:06 +0000 (16:07 +0300)
committerJens Axboe <axboe@kernel.dk>
Wed, 25 Mar 2020 14:49:10 +0000 (08:49 -0600)
Reading /proc/diskstats iterates over all cpus for summing each field.
It's faster to sum all fields in one pass.

Hammering /proc/diskstats with fio shows 2x performance improvement:

fio --name=test --numjobs=$JOBS --filename=/proc/diskstats \
    --size=1k --bs=1k --fallocate=none --create_on_open=1 \
    --time_based=1 --runtime=10 --invalidate=0 --group_report

  JOBS=1 JOBS=10
Before:   7k iops 64k iops
After:  18k iops      120k iops

Also this way code is more compact:

add/remove: 1/0 grow/shrink: 0/2 up/down: 194/-1540 (-1346)
Function                                     old     new   delta
part_stat_read_all                             -     194    +194
diskstats_show                              1344     631    -713
part_stat_show                              1219     392    -827
Total: Before=14966947, After=14965601, chg -0.01%

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/genhd.c
include/linux/genhd.h

index f7d60b6..9eb981f 100644 (file)
@@ -92,6 +92,34 @@ const char *bdevname(struct block_device *bdev, char *buf)
 }
 EXPORT_SYMBOL(bdevname);
 
+#ifdef CONFIG_SMP
+static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
+{
+       int cpu;
+
+       memset(stat, 0, sizeof(struct disk_stats));
+       for_each_possible_cpu(cpu) {
+               struct disk_stats *ptr = per_cpu_ptr(part->dkstats, cpu);
+               int group;
+
+               for (group = 0; group < NR_STAT_GROUPS; group++) {
+                       stat->nsecs[group] += ptr->nsecs[group];
+                       stat->sectors[group] += ptr->sectors[group];
+                       stat->ios[group] += ptr->ios[group];
+                       stat->merges[group] += ptr->merges[group];
+               }
+
+               stat->io_ticks += ptr->io_ticks;
+               stat->time_in_queue += ptr->time_in_queue;
+       }
+}
+#else /* CONFIG_SMP */
+static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
+{
+       memcpy(stat, &part->dkstats, sizeof(struct disk_stats));
+}
+#endif /* CONFIG_SMP */
+
 void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
 {
        if (queue_is_mq(q))
@@ -1214,9 +1242,12 @@ ssize_t part_stat_show(struct device *dev,
 {
        struct hd_struct *p = dev_to_part(dev);
        struct request_queue *q = part_to_disk(p)->queue;
+       struct disk_stats stat;
        unsigned int inflight;
 
+       part_stat_read_all(p, &stat);
        inflight = part_in_flight(q, p);
+
        return sprintf(buf,
                "%8lu %8lu %8llu %8u "
                "%8lu %8lu %8llu %8u "
@@ -1224,23 +1255,23 @@ ssize_t part_stat_show(struct device *dev,
                "%8lu %8lu %8llu %8u "
                "%8lu %8u"
                "\n",
-               part_stat_read(p, ios[STAT_READ]),
-               part_stat_read(p, merges[STAT_READ]),
-               (unsigned long long)part_stat_read(p, sectors[STAT_READ]),
-               (unsigned int)part_stat_read_msecs(p, STAT_READ),
-               part_stat_read(p, ios[STAT_WRITE]),
-               part_stat_read(p, merges[STAT_WRITE]),
-               (unsigned long long)part_stat_read(p, sectors[STAT_WRITE]),
-               (unsigned int)part_stat_read_msecs(p, STAT_WRITE),
+               stat.ios[STAT_READ],
+               stat.merges[STAT_READ],
+               (unsigned long long)stat.sectors[STAT_READ],
+               (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC),
+               stat.ios[STAT_WRITE],
+               stat.merges[STAT_WRITE],
+               (unsigned long long)stat.sectors[STAT_WRITE],
+               (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
                inflight,
-               jiffies_to_msecs(part_stat_read(p, io_ticks)),
-               jiffies_to_msecs(part_stat_read(p, time_in_queue)),
-               part_stat_read(p, ios[STAT_DISCARD]),
-               part_stat_read(p, merges[STAT_DISCARD]),
-               (unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
-               (unsigned int)part_stat_read_msecs(p, STAT_DISCARD),
-               part_stat_read(p, ios[STAT_FLUSH]),
-               (unsigned int)part_stat_read_msecs(p, STAT_FLUSH));
+               jiffies_to_msecs(stat.io_ticks),
+               jiffies_to_msecs(stat.time_in_queue),
+               stat.ios[STAT_DISCARD],
+               stat.merges[STAT_DISCARD],
+               (unsigned long long)stat.sectors[STAT_DISCARD],
+               (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
+               stat.ios[STAT_FLUSH],
+               (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
 }
 
 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
@@ -1492,6 +1523,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
        struct hd_struct *hd;
        char buf[BDEVNAME_SIZE];
        unsigned int inflight;
+       struct disk_stats stat;
 
        /*
        if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
@@ -1503,7 +1535,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 
        disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
        while ((hd = disk_part_iter_next(&piter))) {
+               part_stat_read_all(hd, &stat);
                inflight = part_in_flight(gp->queue, hd);
+
                seq_printf(seqf, "%4d %7d %s "
                           "%lu %lu %lu %u "
                           "%lu %lu %lu %u "
@@ -1513,23 +1547,27 @@ static int diskstats_show(struct seq_file *seqf, void *v)
                           "\n",
                           MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
                           disk_name(gp, hd->partno, buf),
-                          part_stat_read(hd, ios[STAT_READ]),
-                          part_stat_read(hd, merges[STAT_READ]),
-                          part_stat_read(hd, sectors[STAT_READ]),
-                          (unsigned int)part_stat_read_msecs(hd, STAT_READ),
-                          part_stat_read(hd, ios[STAT_WRITE]),
-                          part_stat_read(hd, merges[STAT_WRITE]),
-                          part_stat_read(hd, sectors[STAT_WRITE]),
-                          (unsigned int)part_stat_read_msecs(hd, STAT_WRITE),
+                          stat.ios[STAT_READ],
+                          stat.merges[STAT_READ],
+                          stat.sectors[STAT_READ],
+                          (unsigned int)div_u64(stat.nsecs[STAT_READ],
+                                                       NSEC_PER_MSEC),
+                          stat.ios[STAT_WRITE],
+                          stat.merges[STAT_WRITE],
+                          stat.sectors[STAT_WRITE],
+                          (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
+                                                       NSEC_PER_MSEC),
                           inflight,
-                          jiffies_to_msecs(part_stat_read(hd, io_ticks)),
-                          jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
-                          part_stat_read(hd, ios[STAT_DISCARD]),
-                          part_stat_read(hd, merges[STAT_DISCARD]),
-                          part_stat_read(hd, sectors[STAT_DISCARD]),
-                          (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD),
-                          part_stat_read(hd, ios[STAT_FLUSH]),
-                          (unsigned int)part_stat_read_msecs(hd, STAT_FLUSH)
+                          jiffies_to_msecs(stat.io_ticks),
+                          jiffies_to_msecs(stat.time_in_queue),
+                          stat.ios[STAT_DISCARD],
+                          stat.merges[STAT_DISCARD],
+                          stat.sectors[STAT_DISCARD],
+                          (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
+                                                NSEC_PER_MSEC),
+                          stat.ios[STAT_FLUSH],
+                          (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
+                                                NSEC_PER_MSEC)
                        );
        }
        disk_part_iter_exit(&piter);
index 13bb51f..b0c588d 100644 (file)
@@ -380,9 +380,6 @@ static inline void free_part_stats(struct hd_struct *part)
 
 #endif /* CONFIG_SMP */
 
-#define part_stat_read_msecs(part, which)                              \
-       div_u64(part_stat_read(part, nsecs[which]), NSEC_PER_MSEC)
-
 #define part_stat_read_accum(part, field)                              \
        (part_stat_read(part, field[STAT_READ]) +                       \
         part_stat_read(part, field[STAT_WRITE]) +                      \