OSDN Git Service

Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
authorLinus Torvalds <torvalds@woody.linux-foundation.org>
Fri, 8 Feb 2008 17:42:46 +0000 (09:42 -0800)
committerLinus Torvalds <torvalds@woody.linux-foundation.org>
Fri, 8 Feb 2008 17:42:46 +0000 (09:42 -0800)
* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
  Enhanced partition statistics: documentation update
  Enhanced partition statistics: remove old partition statistics
  Enhanced partition statistics: procfs
  Enhanced partition statistics: sysfs
  Enhanced partition statistics: aoe fix
  Enhanced partition statistics: update partition statitics
  Enhanced partition statistics: core statistics
  block: fixup rq_init() a bit

Manually fixed conflict in drivers/block/aoe/aoecmd.c due to statistics
support.

Documentation/ABI/testing/procfs-diskstats [new file with mode: 0644]
Documentation/ABI/testing/sysfs-block [new file with mode: 0644]
Documentation/iostats.txt
block/blk-core.c
block/blk-merge.c
block/genhd.c
drivers/block/aoe/aoecmd.c
fs/partitions/check.c
include/linux/blkdev.h
include/linux/genhd.h

diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats
new file mode 100644 (file)
index 0000000..9923390
--- /dev/null
@@ -0,0 +1,22 @@
+What:          /proc/diskstats
+Date:          February 2008
+Contact:       Jerome Marchand <jmarchan@redhat.com>
+Description:
+               The /proc/diskstats file displays the I/O statistics
+               of block devices. Each line contains the following 14
+               fields:
+                1 - major number
+                2 - minor mumber
+                3 - device name
+                4 - reads completed succesfully
+                5 - reads merged
+                6 - sectors read
+                7 - time spent reading (ms)
+                8 - writes completed
+                9 - writes merged
+               10 - sectors written
+               11 - time spent writing (ms)
+               12 - I/Os currently in progress
+               13 - time spent doing I/Os (ms)
+               14 - weighted time spent doing I/Os (ms)
+               For more details refer to Documentation/iostats.txt
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
new file mode 100644 (file)
index 0000000..4bd9ea5
--- /dev/null
@@ -0,0 +1,28 @@
+What:          /sys/block/<disk>/stat
+Date:          February 2008
+Contact:       Jerome Marchand <jmarchan@redhat.com>
+Description:
+               The /sys/block/<disk>/stat files displays the I/O
+               statistics of disk <disk>. They contain 11 fields:
+                1 - reads completed succesfully
+                2 - reads merged
+                3 - sectors read
+                4 - time spent reading (ms)
+                5 - writes completed
+                6 - writes merged
+                7 - sectors written
+                8 - time spent writing (ms)
+                9 - I/Os currently in progress
+               10 - time spent doing I/Os (ms)
+               11 - weighted time spent doing I/Os (ms)
+               For more details refer Documentation/iostats.txt
+
+
+What:          /sys/block/<disk>/<part>/stat
+Date:          February 2008
+Contact:       Jerome Marchand <jmarchan@redhat.com>
+Description:
+               The /sys/block/<disk>/<part>/stat files display the
+               I/O statistics of partition <part>. The format is the
+               same as the above-written /sys/block/<disk>/stat
+               format.
index b963c3b..5925c3c 100644 (file)
@@ -58,7 +58,7 @@ they should not wrap twice before you notice them.
 Each set of stats only applies to the indicated device; if you want
 system-wide stats you'll have to find all the devices and sum them all up.
 
-Field  1 -- # of reads issued
+Field  1 -- # of reads completed
     This is the total number of reads completed successfully.
 Field  2 -- # of reads merged, field 6 -- # of writes merged
     Reads and writes which are adjacent to each other may be merged for
@@ -132,6 +132,19 @@ words, the number of reads for partitions is counted slightly before time
 of queuing for partitions, and at completion for whole disks.  This is
 a subtle distinction that is probably uninteresting for most cases.
 
+More significant is the error induced by counting the numbers of
+reads/writes before merges for partitions and after for disks. Since a
+typical workload usually contains a lot of successive and adjacent requests,
+the number of reads/writes issued can be several times higher than the
+number of reads/writes completed.
+
+In 2.6.25, the full statistic set is again available for partitions and
+disk and partition statistics are consistent again. Since we still don't
+keep record of the partition-relative address, an operation is attributed to
+the partition which contains the first sector of the request after the
+eventual merges. As requests can be merged across partition, this could lead
+to some (probably insignificant) innacuracy.
+
 Additional notes
 ----------------
 
index 4afb39c..e9754dc 100644 (file)
@@ -60,10 +60,15 @@ static void drive_stat_acct(struct request *rq, int new_io)
                return;
 
        if (!new_io) {
-               __disk_stat_inc(rq->rq_disk, merges[rw]);
+               __all_stat_inc(rq->rq_disk, merges[rw], rq->sector);
        } else {
+               struct hd_struct *part = get_part(rq->rq_disk, rq->sector);
                disk_round_stats(rq->rq_disk);
                rq->rq_disk->in_flight++;
+               if (part) {
+                       part_round_stats(part);
+                       part->in_flight++;
+               }
        }
 }
 
@@ -102,27 +107,38 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
 }
 EXPORT_SYMBOL(blk_get_backing_dev_info);
 
+/*
+ * We can't just memset() the structure, since the allocation path
+ * already stored some information in the request.
+ */
 void rq_init(struct request_queue *q, struct request *rq)
 {
        INIT_LIST_HEAD(&rq->queuelist);
        INIT_LIST_HEAD(&rq->donelist);
-
-       rq->errors = 0;
+       rq->q = q;
+       rq->sector = rq->hard_sector = (sector_t) -1;
+       rq->nr_sectors = rq->hard_nr_sectors = 0;
+       rq->current_nr_sectors = rq->hard_cur_sectors = 0;
        rq->bio = rq->biotail = NULL;
        INIT_HLIST_NODE(&rq->hash);
        RB_CLEAR_NODE(&rq->rb_node);
+       rq->rq_disk = NULL;
+       rq->nr_phys_segments = 0;
+       rq->nr_hw_segments = 0;
        rq->ioprio = 0;
+       rq->special = NULL;
        rq->buffer = NULL;
+       rq->tag = -1;
+       rq->errors = 0;
        rq->ref_count = 1;
-       rq->q = q;
-       rq->special = NULL;
+       rq->cmd_len = 0;
+       memset(rq->cmd, 0, sizeof(rq->cmd));
        rq->data_len = 0;
+       rq->sense_len = 0;
        rq->data = NULL;
-       rq->nr_phys_segments = 0;
        rq->sense = NULL;
        rq->end_io = NULL;
        rq->end_io_data = NULL;
-       rq->completion_data = NULL;
        rq->next_rq = NULL;
 }
 
@@ -986,6 +1002,21 @@ void disk_round_stats(struct gendisk *disk)
 }
 EXPORT_SYMBOL_GPL(disk_round_stats);
 
+void part_round_stats(struct hd_struct *part)
+{
+       unsigned long now = jiffies;
+
+       if (now == part->stamp)
+               return;
+
+       if (part->in_flight) {
+               __part_stat_add(part, time_in_queue,
+                               part->in_flight * (now - part->stamp));
+               __part_stat_add(part, io_ticks, (now - part->stamp));
+       }
+       part->stamp = now;
+}
+
 /*
  * queue lock must be held
  */
@@ -1188,10 +1219,6 @@ static inline void blk_partition_remap(struct bio *bio)
 
        if (bio_sectors(bio) && bdev != bdev->bd_contains) {
                struct hd_struct *p = bdev->bd_part;
-               const int rw = bio_data_dir(bio);
-
-               p->sectors[rw] += bio_sectors(bio);
-               p->ios[rw]++;
 
                bio->bi_sector += p->start_sect;
                bio->bi_bdev = bdev->bd_contains;
@@ -1519,7 +1546,8 @@ static int __end_that_request_first(struct request *req, int error,
        if (blk_fs_request(req) && req->rq_disk) {
                const int rw = rq_data_dir(req);
 
-               disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);
+               all_stat_add(req->rq_disk, sectors[rw],
+                            nr_bytes >> 9, req->sector);
        }
 
        total_bytes = bio_nbytes = 0;
@@ -1704,11 +1732,16 @@ static void end_that_request_last(struct request *req, int error)
        if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
                unsigned long duration = jiffies - req->start_time;
                const int rw = rq_data_dir(req);
+               struct hd_struct *part = get_part(disk, req->sector);
 
-               __disk_stat_inc(disk, ios[rw]);
-               __disk_stat_add(disk, ticks[rw], duration);
+               __all_stat_inc(disk, ios[rw], req->sector);
+               __all_stat_add(disk, ticks[rw], duration, req->sector);
                disk_round_stats(disk);
                disk->in_flight--;
+               if (part) {
+                       part_round_stats(part);
+                       part->in_flight--;
+               }
        }
 
        if (req->end_io)
index 845ef81..d3b84bb 100644 (file)
@@ -454,8 +454,14 @@ static int attempt_merge(struct request_queue *q, struct request *req,
        elv_merge_requests(q, req, next);
 
        if (req->rq_disk) {
+               struct hd_struct *part
+                       = get_part(req->rq_disk, req->sector);
                disk_round_stats(req->rq_disk);
                req->rq_disk->in_flight--;
+               if (part) {
+                       part_round_stats(part);
+                       part->in_flight--;
+               }
        }
 
        req->ioprio = ioprio_best(req->ioprio, next->ioprio);
index de2ebb2..53f2238 100644 (file)
@@ -584,12 +584,28 @@ static int diskstats_show(struct seq_file *s, void *v)
        for (n = 0; n < gp->minors - 1; n++) {
                struct hd_struct *hd = gp->part[n];
 
-               if (hd && hd->nr_sects)
-                       seq_printf(s, "%4d %4d %s %u %u %u %u\n",
-                               gp->major, n + gp->first_minor + 1,
-                               disk_name(gp, n + 1, buf),
-                               hd->ios[0], hd->sectors[0],
-                               hd->ios[1], hd->sectors[1]);
+               if (!hd || !hd->nr_sects)
+                       continue;
+
+               preempt_disable();
+               part_round_stats(hd);
+               preempt_enable();
+               seq_printf(s, "%4d %4d %s %lu %lu %llu "
+                          "%u %lu %lu %llu %u %u %u %u\n",
+                          gp->major, n + gp->first_minor + 1,
+                          disk_name(gp, n + 1, buf),
+                          part_stat_read(hd, ios[0]),
+                          part_stat_read(hd, merges[0]),
+                          (unsigned long long)part_stat_read(hd, sectors[0]),
+                          jiffies_to_msecs(part_stat_read(hd, ticks[0])),
+                          part_stat_read(hd, ios[1]),
+                          part_stat_read(hd, merges[1]),
+                          (unsigned long long)part_stat_read(hd, sectors[1]),
+                          jiffies_to_msecs(part_stat_read(hd, ticks[1])),
+                          hd->in_flight,
+                          jiffies_to_msecs(part_stat_read(hd, io_ticks)),
+                          jiffies_to_msecs(part_stat_read(hd, time_in_queue))
+                       );
        }
  
        return 0;
index 44beb17..d00293b 100644 (file)
@@ -751,15 +751,15 @@ gettgt(struct aoedev *d, char *addr)
 }
 
 static inline void
-diskstats(struct gendisk *disk, struct bio *bio, ulong duration)
+diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector)
 {
        unsigned long n_sect = bio->bi_size >> 9;
        const int rw = bio_data_dir(bio);
 
-       disk_stat_inc(disk, ios[rw]);
-       disk_stat_add(disk, ticks[rw], duration);
-       disk_stat_add(disk, sectors[rw], n_sect);
-       disk_stat_add(disk, io_ticks, duration);
+       all_stat_inc(disk, ios[rw], sector);
+       all_stat_add(disk, ticks[rw], duration, sector);
+       all_stat_add(disk, sectors[rw], n_sect, sector);
+       all_stat_add(disk, io_ticks, duration, sector);
 }
 
 void
@@ -879,7 +879,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
        }
 
        if (buf && --buf->nframesout == 0 && buf->resid == 0) {
-               diskstats(d->gd, buf->bio, jiffies - buf->stime);
+               diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector);
                n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
                bio_endio(buf->bio, n);
                mempool_free(buf, d->bufpool);
index 9a64045..03f808c 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/fs.h>
 #include <linux/kmod.h>
 #include <linux/ctype.h>
+#include <linux/genhd.h>
 
 #include "check.h"
 
@@ -215,9 +216,25 @@ static ssize_t part_stat_show(struct device *dev,
 {
        struct hd_struct *p = dev_to_part(dev);
 
-       return sprintf(buf, "%8u %8llu %8u %8llu\n",
-                      p->ios[0], (unsigned long long)p->sectors[0],
-                      p->ios[1], (unsigned long long)p->sectors[1]);
+       preempt_disable();
+       part_round_stats(p);
+       preempt_enable();
+       return sprintf(buf,
+               "%8lu %8lu %8llu %8u "
+               "%8lu %8lu %8llu %8u "
+               "%8u %8u %8u"
+               "\n",
+               part_stat_read(p, ios[READ]),
+               part_stat_read(p, merges[READ]),
+               (unsigned long long)part_stat_read(p, sectors[READ]),
+               jiffies_to_msecs(part_stat_read(p, ticks[READ])),
+               part_stat_read(p, ios[WRITE]),
+               part_stat_read(p, merges[WRITE]),
+               (unsigned long long)part_stat_read(p, sectors[WRITE]),
+               jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
+               p->in_flight,
+               jiffies_to_msecs(part_stat_read(p, io_ticks)),
+               jiffies_to_msecs(part_stat_read(p, time_in_queue)));
 }
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -273,6 +290,7 @@ static struct attribute_group *part_attr_groups[] = {
 static void part_release(struct device *dev)
 {
        struct hd_struct *p = dev_to_part(dev);
+       free_part_stats(p);
        kfree(p);
 }
 
@@ -312,8 +330,7 @@ void delete_partition(struct gendisk *disk, int part)
        disk->part[part-1] = NULL;
        p->start_sect = 0;
        p->nr_sects = 0;
-       p->ios[0] = p->ios[1] = 0;
-       p->sectors[0] = p->sectors[1] = 0;
+       part_stat_set_all(p, 0);
        kobject_put(p->holder_dir);
        device_del(&p->dev);
        put_device(&p->dev);
@@ -336,6 +353,10 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
        if (!p)
                return;
 
+       if (!init_part_stats(p)) {
+               kfree(p);
+               return;
+       }
        p->start_sect = start;
        p->nr_sects = len;
        p->partno = part;
index 90392a9..e1888cc 100644 (file)
@@ -137,7 +137,9 @@ enum rq_flag_bits {
 #define BLK_MAX_CDB    16
 
 /*
- * try to put the fields that are referenced together in the same cacheline
+ * try to put the fields that are referenced together in the same cacheline.
+ * if you modify this structure, be sure to check block/blk-core.c:rq_init()
+ * as well!
  */
 struct request {
        struct list_head queuelist;
index 1dbea0a..09a3b18 100644 (file)
@@ -91,16 +91,31 @@ struct partition {
        __le32 nr_sects;                /* nr of sectors in partition */
 } __attribute__((packed));
 
+struct disk_stats {
+       unsigned long sectors[2];       /* READs and WRITEs */
+       unsigned long ios[2];
+       unsigned long merges[2];
+       unsigned long ticks[2];
+       unsigned long io_ticks;
+       unsigned long time_in_queue;
+};
+       
 struct hd_struct {
        sector_t start_sect;
        sector_t nr_sects;
        struct device dev;
        struct kobject *holder_dir;
-       unsigned ios[2], sectors[2];    /* READs and WRITEs */
        int policy, partno;
 #ifdef CONFIG_FAIL_MAKE_REQUEST
        int make_it_fail;
 #endif
+       unsigned long stamp;
+       int in_flight;
+#ifdef CONFIG_SMP
+       struct disk_stats *dkstats;
+#else
+       struct disk_stats dkstats;
+#endif
 };
 
 #define GENHD_FL_REMOVABLE                     1
@@ -111,15 +126,7 @@ struct hd_struct {
 #define GENHD_FL_SUPPRESS_PARTITION_INFO       32
 #define GENHD_FL_FAIL                          64
 
-struct disk_stats {
-       unsigned long sectors[2];       /* READs and WRITEs */
-       unsigned long ios[2];
-       unsigned long merges[2];
-       unsigned long ticks[2];
-       unsigned long io_ticks;
-       unsigned long time_in_queue;
-};
-       
+
 struct gendisk {
        int major;                      /* major number of driver */
        int first_minor;
@@ -158,6 +165,20 @@ struct gendisk {
  * The __ variants should only be called in critical sections. The full
  * variants disable/enable preemption.
  */
+static inline struct hd_struct *get_part(struct gendisk *gendiskp,
+                                        sector_t sector)
+{
+       struct hd_struct *part;
+       int i;
+       for (i = 0; i < gendiskp->minors - 1; i++) {
+               part = gendiskp->part[i];
+               if (part && part->start_sect <= sector
+                   && sector < part->start_sect + part->nr_sects)
+                       return part;
+       }
+       return NULL;
+}
+
 #ifdef CONFIG_SMP
 #define __disk_stat_add(gendiskp, field, addnd)        \
        (per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd)
@@ -177,15 +198,62 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) {
                memset(per_cpu_ptr(gendiskp->dkstats, i), value,
                                sizeof (struct disk_stats));
 }              
+
+#define __part_stat_add(part, field, addnd)                            \
+       (per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd)
+
+#define __all_stat_add(gendiskp, field, addnd, sector)         \
+({                                                             \
+       struct hd_struct *part = get_part(gendiskp, sector);    \
+       if (part)                                               \
+               __part_stat_add(part, field, addnd);            \
+       __disk_stat_add(gendiskp, field, addnd);                \
+})
+
+#define part_stat_read(part, field)                                    \
+({                                                                     \
+       typeof(part->dkstats->field) res = 0;                           \
+       int i;                                                          \
+       for_each_possible_cpu(i)                                        \
+               res += per_cpu_ptr(part->dkstats, i)->field;            \
+       res;                                                            \
+})
+
+static inline void part_stat_set_all(struct hd_struct *part, int value)        {
+       int i;
+       for_each_possible_cpu(i)
+               memset(per_cpu_ptr(part->dkstats, i), value,
+                      sizeof(struct disk_stats));
+}
                                
 #else
 #define __disk_stat_add(gendiskp, field, addnd) \
                                (gendiskp->dkstats.field += addnd)
 #define disk_stat_read(gendiskp, field)        (gendiskp->dkstats.field)
 
-static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)      {
+static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)
+{
        memset(&gendiskp->dkstats, value, sizeof (struct disk_stats));
 }
+
+#define __part_stat_add(part, field, addnd) \
+       (part->dkstats.field += addnd)
+
+#define __all_stat_add(gendiskp, field, addnd, sector)         \
+({                                                             \
+       struct hd_struct *part = get_part(gendiskp, sector);    \
+       if (part)                                               \
+               part->dkstats.field += addnd;                   \
+       __disk_stat_add(gendiskp, field, addnd);                \
+})
+
+#define part_stat_read(part, field)    (part->dkstats.field)
+
+static inline void part_stat_set_all(struct hd_struct *part, int value)
+{
+       memset(&part->dkstats, value, sizeof(struct disk_stats));
+}
+
 #endif
 
 #define disk_stat_add(gendiskp, field, addnd)                  \
@@ -206,6 +274,45 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)  {
 #define disk_stat_sub(gendiskp, field, subnd) \
                disk_stat_add(gendiskp, field, -subnd)
 
+#define part_stat_add(gendiskp, field, addnd)          \
+       do {                                            \
+               preempt_disable();                      \
+               __part_stat_add(gendiskp, field, addnd);\
+               preempt_enable();                       \
+       } while (0)
+
+#define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1)
+#define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1)
+
+#define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1)
+#define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1)
+
+#define __part_stat_sub(gendiskp, field, subnd) \
+               __part_stat_add(gendiskp, field, -subnd)
+#define part_stat_sub(gendiskp, field, subnd) \
+               part_stat_add(gendiskp, field, -subnd)
+
+#define all_stat_add(gendiskp, field, addnd, sector)           \
+       do {                                                    \
+               preempt_disable();                              \
+               __all_stat_add(gendiskp, field, addnd, sector); \
+               preempt_enable();                               \
+       } while (0)
+
+#define __all_stat_dec(gendiskp, field, sector) \
+               __all_stat_add(gendiskp, field, -1, sector)
+#define all_stat_dec(gendiskp, field, sector) \
+               all_stat_add(gendiskp, field, -1, sector)
+
+#define __all_stat_inc(gendiskp, field, sector) \
+               __all_stat_add(gendiskp, field, 1, sector)
+#define all_stat_inc(gendiskp, field, sector) \
+               all_stat_add(gendiskp, field, 1, sector)
+
+#define __all_stat_sub(gendiskp, field, subnd, sector) \
+               __all_stat_add(gendiskp, field, -subnd, sector)
+#define all_stat_sub(gendiskp, field, subnd, sector) \
+               all_stat_add(gendiskp, field, -subnd, sector)
 
 /* Inlines to alloc and free disk stats in struct gendisk */
 #ifdef  CONFIG_SMP
@@ -221,6 +328,20 @@ static inline void free_disk_stats(struct gendisk *disk)
 {
        free_percpu(disk->dkstats);
 }
+
+static inline int init_part_stats(struct hd_struct *part)
+{
+       part->dkstats = alloc_percpu(struct disk_stats);
+       if (!part->dkstats)
+               return 0;
+       return 1;
+}
+
+static inline void free_part_stats(struct hd_struct *part)
+{
+       free_percpu(part->dkstats);
+}
+
 #else  /* CONFIG_SMP */
 static inline int init_disk_stats(struct gendisk *disk)
 {
@@ -230,10 +351,20 @@ static inline int init_disk_stats(struct gendisk *disk)
 static inline void free_disk_stats(struct gendisk *disk)
 {
 }
+
+static inline int init_part_stats(struct hd_struct *part)
+{
+       return 1;
+}
+
+static inline void free_part_stats(struct hd_struct *part)
+{
+}
 #endif /* CONFIG_SMP */
 
 /* drivers/block/ll_rw_blk.c */
 extern void disk_round_stats(struct gendisk *disk);
+extern void part_round_stats(struct hd_struct *part);
 
 /* drivers/block/genhd.c */
 extern int get_blkdev_list(char *, int);