static const int cfq_hist_divisor = 4;
/*
- * offset from end of service tree
+ * offset from end of queue service tree for idle class
*/
#define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5)
+/* offset from end of group service tree under time slice mode */
+#define CFQ_SLICE_MODE_GROUP_DELAY (NSEC_PER_SEC / 5)
+/* offset from end of group service under IOPS mode */
+#define CFQ_IOPS_MODE_GROUP_DELAY (HZ / 5)
/*
* below this threshold, we consider thinktime immediate
/* total time with empty current active q with other requests queued */
struct blkg_stat empty_time;
/* fields after this shouldn't be cleared on stat reset */
- uint64_t start_group_wait_time;
- uint64_t start_idle_time;
- uint64_t start_empty_time;
+ u64 start_group_wait_time;
+ u64 start_idle_time;
+ u64 start_empty_time;
uint16_t flags;
#endif /* CONFIG_DEBUG_BLK_CGROUP */
#endif /* CONFIG_CFQ_GROUP_IOSCHED */
unsigned int weight;
unsigned int leaf_weight;
+ u64 group_idle;
};
/* This is per cgroup per device grouping structure */
struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
struct cfq_queue *async_idle_cfqq;
+ u64 group_idle;
};
struct cfq_io_cq {
/* This should be called with the queue_lock held. */
static void cfqg_stats_update_group_wait_time(struct cfqg_stats *stats)
{
- unsigned long long now;
+ u64 now;
if (!cfqg_stats_waiting(stats))
return;
- now = sched_clock();
- if (time_after64(now, stats->start_group_wait_time))
+ now = ktime_get_ns();
+ if (now > stats->start_group_wait_time)
blkg_stat_add(&stats->group_wait_time,
now - stats->start_group_wait_time);
cfqg_stats_clear_waiting(stats);
return;
if (cfqg == curr_cfqg)
return;
- stats->start_group_wait_time = sched_clock();
+ stats->start_group_wait_time = ktime_get_ns();
cfqg_stats_mark_waiting(stats);
}
/* This should be called with the queue_lock held. */
static void cfqg_stats_end_empty_time(struct cfqg_stats *stats)
{
- unsigned long long now;
+ u64 now;
if (!cfqg_stats_empty(stats))
return;
- now = sched_clock();
- if (time_after64(now, stats->start_empty_time))
+ now = ktime_get_ns();
+ if (now > stats->start_empty_time)
blkg_stat_add(&stats->empty_time,
now - stats->start_empty_time);
cfqg_stats_clear_empty(stats);
if (cfqg_stats_empty(stats))
return;
- stats->start_empty_time = sched_clock();
+ stats->start_empty_time = ktime_get_ns();
cfqg_stats_mark_empty(stats);
}
struct cfqg_stats *stats = &cfqg->stats;
if (cfqg_stats_idling(stats)) {
- unsigned long long now = sched_clock();
+ u64 now = ktime_get_ns();
- if (time_after64(now, stats->start_idle_time))
+ if (now > stats->start_idle_time)
blkg_stat_add(&stats->idle_time,
now - stats->start_idle_time);
cfqg_stats_clear_idling(stats);
BUG_ON(cfqg_stats_idling(stats));
- stats->start_idle_time = sched_clock();
+ stats->start_idle_time = ktime_get_ns();
cfqg_stats_mark_idling(stats);
}
}
static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
- uint64_t start_time, uint64_t io_start_time, int rw)
+ u64 start_time_ns, u64 io_start_time_ns, int rw)
{
struct cfqg_stats *stats = &cfqg->stats;
- unsigned long long now = sched_clock();
+ u64 now = ktime_get_ns();
- if (time_after64(now, io_start_time))
- blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
- if (time_after64(io_start_time, start_time))
+ if (now > io_start_time_ns)
+ blkg_rwstat_add(&stats->service_time, rw,
+ now - io_start_time_ns);
+ if (io_start_time_ns > start_time_ns)
blkg_rwstat_add(&stats->wait_time, rw,
- io_start_time - start_time);
+ io_start_time_ns - start_time_ns);
}
/* @stats = 0 */
static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw) { }
static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw) { }
static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
- uint64_t start_time, uint64_t io_start_time, int rw) { }
+ u64 start_time_ns, u64 io_start_time_ns, int rw) { }
#endif /* CONFIG_CFQ_GROUP_IOSCHED */
+static inline u64 get_group_idle(struct cfq_data *cfqd)
+{
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+ struct cfq_queue *cfqq = cfqd->active_queue;
+
+ if (cfqq && cfqq->cfqg)
+ return cfqq->cfqg->group_idle;
+#endif
+ return cfqd->cfq_group_idle;
+}
+
#define cfq_log(cfqd, fmt, args...) \
blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
if (!sample_valid(ttime->ttime_samples))
return false;
if (group_idle)
- slice = cfqd->cfq_group_idle;
+ slice = get_group_idle(cfqd);
else
slice = cfqd->cfq_slice_idle;
return ttime->ttime_mean > slice;
return min_vdisktime;
}
-static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
-{
- s64 delta = (s64)(vdisktime - min_vdisktime);
- if (delta < 0)
- min_vdisktime = vdisktime;
-
- return min_vdisktime;
-}
-
static void update_min_vdisktime(struct cfq_rb_root *st)
{
struct cfq_group *cfqg;
cfqg->vfraction = max_t(unsigned, vfr, 1);
}
+static inline u64 cfq_get_cfqg_vdisktime_delay(struct cfq_data *cfqd)
+{
+ if (!iops_mode(cfqd))
+ return CFQ_SLICE_MODE_GROUP_DELAY;
+ else
+ return CFQ_IOPS_MODE_GROUP_DELAY;
+}
+
static void
cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
{
n = rb_last(&st->rb);
if (n) {
__cfqg = rb_entry_cfqg(n);
- cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
+ cfqg->vdisktime = __cfqg->vdisktime +
+ cfq_get_cfqg_vdisktime_delay(cfqd);
} else
cfqg->vdisktime = st->min_vdisktime;
cfq_group_service_tree_add(st, cfqg);
cgd->weight = weight;
cgd->leaf_weight = weight;
+ cgd->group_idle = cfq_group_idle;
}
static void cfq_cpd_free(struct blkcg_policy_data *cpd)
cfqg->weight = cgd->weight;
cfqg->leaf_weight = cgd->leaf_weight;
+ cfqg->group_idle = cgd->group_idle;
}
static void cfq_pd_offline(struct blkg_policy_data *pd)
return 0;
}
+static int cfq_print_group_idle(struct seq_file *sf, void *v)
+{
+ struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+ struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
+ u64 val = 0;
+
+ if (cgd)
+ val = cgd->group_idle;
+
+ seq_printf(sf, "%llu\n", div_u64(val, NSEC_PER_USEC));
+ return 0;
+}
+
static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off,
bool on_dfl, bool is_leaf_weight)
return __cfq_set_weight(css, val, false, false, true);
}
+static int cfq_set_group_idle(struct cgroup_subsys_state *css,
+ struct cftype *cft, u64 val)
+{
+ struct blkcg *blkcg = css_to_blkcg(css);
+ struct cfq_group_data *cfqgd;
+ struct blkcg_gq *blkg;
+ int ret = 0;
+
+ spin_lock_irq(&blkcg->lock);
+ cfqgd = blkcg_to_cfqgd(blkcg);
+ if (!cfqgd) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ cfqgd->group_idle = val * NSEC_PER_USEC;
+
+ hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
+ struct cfq_group *cfqg = blkg_to_cfqg(blkg);
+
+ if (!cfqg)
+ continue;
+
+ cfqg->group_idle = cfqgd->group_idle;
+ }
+
+out:
+ spin_unlock_irq(&blkcg->lock);
+ return ret;
+}
+
static int cfqg_print_stat(struct seq_file *sf, void *v)
{
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
.seq_show = cfq_print_leaf_weight,
.write_u64 = cfq_set_leaf_weight,
},
+ {
+ .name = "group_idle",
+ .seq_show = cfq_print_group_idle,
+ .write_u64 = cfq_set_group_idle,
+ },
/* statistics, covers only the tasks in the cfqg */
{
if (!cfqg)
return NULL;
- for_each_cfqg_st(cfqg, i, j, st)
- if ((cfqq = cfq_rb_first(st)) != NULL)
+ for_each_cfqg_st(cfqg, i, j, st) {
+ cfqq = cfq_rb_first(st);
+ if (cfqq)
return cfqq;
+ }
return NULL;
}
static void cfq_arm_slice_timer(struct cfq_data *cfqd)
{
struct cfq_queue *cfqq = cfqd->active_queue;
+ struct cfq_rb_root *st = cfqq->service_tree;
struct cfq_io_cq *cic;
u64 sl, group_idle = 0;
u64 now = ktime_get_ns();
* otherwise we still have a problem with sync vs async workloads.
*/
if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag &&
- !cfqd->cfq_group_idle)
+ !get_group_idle(cfqd))
return;
WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
*/
if (!cfq_should_idle(cfqd, cfqq)) {
/* no queue idling. Check for group idling */
- if (cfqd->cfq_group_idle)
- group_idle = cfqd->cfq_group_idle;
- else
+ group_idle = get_group_idle(cfqd);
+ if (!group_idle)
return;
}
return;
}
- /* There are other queues in the group, don't do group idle */
- if (group_idle && cfqq->cfqg->nr_cfqq > 1)
+ /*
+ * There are other queues in the group or this is the only group and
+ * it has too big thinktime, don't do group idle.
+ */
+ if (group_idle &&
+ (cfqq->cfqg->nr_cfqq > 1 ||
+ cfq_io_thinktime_big(cfqd, &st->ttime, true)))
return;
cfq_mark_cfqq_wait_request(cfqq);
if (group_idle)
- sl = cfqd->cfq_group_idle;
+ sl = group_idle;
else
sl = cfqd->cfq_slice_idle;
* this group, wait for requests to complete.
*/
check_group_idle:
- if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 &&
+ if (get_group_idle(cfqd) && cfqq->cfqg->nr_cfqq == 1 &&
cfqq->cfqg->dispatched &&
!cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) {
cfqq = NULL;
switch (ioprio_class) {
default:
printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
+ /* fall through */
case IOPRIO_CLASS_NONE:
/*
* no prio set, inherit CPU scheduling settings
goto out;
}
+ /* cfq_init_cfqq() assumes cfqq->ioprio_class is initialized. */
+ cfqq->ioprio_class = IOPRIO_CLASS_NONE;
cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
cfq_init_prio_data(cfqq, cic);
cfq_link_cfqq_cfqg(cfqq, cfqg);
cfqd->cfq_slice_idle);
}
#ifdef CONFIG_CFQ_GROUP_IOSCHED
- __cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle);
+ __cfq_update_io_thinktime(&cfqq->cfqg->ttime, get_group_idle(cfqd));
#endif
}
if (cfq_should_wait_busy(cfqd, cfqq)) {
u64 extend_sl = cfqd->cfq_slice_idle;
if (!cfqd->cfq_slice_idle)
- extend_sl = cfqd->cfq_group_idle;
+ extend_sl = get_group_idle(cfqd);
cfqq->slice_end = now + extend_sl;
cfq_mark_cfqq_wait_busy(cfqq);
cfq_log_cfqq(cfqd, cfqq, "will busy wait");
static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
{ \
struct cfq_data *cfqd = e->elevator_data; \
- unsigned int __data; \
+ unsigned int __data, __min = (MIN), __max = (MAX); \
int ret = cfq_var_store(&__data, (page), count); \
- if (__data < (MIN)) \
- __data = (MIN); \
- else if (__data > (MAX)) \
- __data = (MAX); \
+ if (__data < __min) \
+ __data = __min; \
+ else if (__data > __max) \
+ __data = __max; \
if (__CONV) \
*(__PTR) = (u64)__data * NSEC_PER_MSEC; \
else \
static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
{ \
struct cfq_data *cfqd = e->elevator_data; \
- unsigned int __data; \
+ unsigned int __data, __min = (MIN), __max = (MAX); \
int ret = cfq_var_store(&__data, (page), count); \
- if (__data < (MIN)) \
- __data = (MIN); \
- else if (__data > (MAX)) \
- __data = (MAX); \
+ if (__data < __min) \
+ __data = __min; \
+ else if (__data > __max) \
+ __data = __max; \
*(__PTR) = (u64)__data * NSEC_PER_USEC; \
return ret; \
}