mm/damon/core.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Data Access Monitor
   4  *
   5  * Author: SeongJae Park <sjpark@amazon.de>
   6  */
   7
   8 #define pr_fmt(fmt) "damon: " fmt
   9
  10 #include <linux/damon.h>
  11 #include <linux/delay.h>
  12 #include <linux/kthread.h>
  13 #include <linux/mm.h>
  14 #include <linux/slab.h>
  15 #include <linux/string.h>
  16
  17 #define CREATE_TRACE_POINTS
  18 #include <trace/events/damon.h>
  19
  20 #ifdef CONFIG_DAMON_KUNIT_TEST
  21 #undef DAMON_MIN_REGION
  22 #define DAMON_MIN_REGION 1
  23 #endif
  24
  25 static DEFINE_MUTEX(damon_lock);
  26 static int nr_running_ctxs;
  27
  28 /*
  29  * Construct a damon_region struct
  30  *
  31  * Returns the pointer to the new struct if success, or NULL otherwise
  32  */
  33 struct damon_region *damon_new_region(unsigned long start, unsigned long end)
  34 {
  35         struct damon_region *region;
  36
  37         region = kmalloc(sizeof(*region), GFP_KERNEL);
  38         if (!region)
  39                 return NULL;
  40
  41         region->ar.start = start;
  42         region->ar.end = end;
  43         region->nr_accesses = 0;
  44         INIT_LIST_HEAD(&region->list);
  45
  46         region->age = 0;
  47         region->last_nr_accesses = 0;
  48
  49         return region;
  50 }
  51
  52 /*
  53  * Add a region between two other regions
  54  */
  55 inline void damon_insert_region(struct damon_region *r,
  56                 struct damon_region *prev, struct damon_region *next,
  57                 struct damon_target *t)
  58 {
  59         __list_add(&r->list, &prev->list, &next->list);
  60         t->nr_regions++;
  61 }
  62
  63 void damon_add_region(struct damon_region *r, struct damon_target *t)
  64 {
  65         list_add_tail(&r->list, &t->regions_list);
  66         t->nr_regions++;
  67 }
  68
  69 static void damon_del_region(struct damon_region *r, struct damon_target *t)
  70 {
  71         list_del(&r->list);
  72         t->nr_regions--;
  73 }
  74
  75 static void damon_free_region(struct damon_region *r)
  76 {
  77         kfree(r);
  78 }
  79
  80 void damon_destroy_region(struct damon_region *r, struct damon_target *t)
  81 {
  82         damon_del_region(r, t);
  83         damon_free_region(r);
  84 }
  85
  86 struct damos *damon_new_scheme(
  87                 unsigned long min_sz_region, unsigned long max_sz_region,
  88                 unsigned int min_nr_accesses, unsigned int max_nr_accesses,
  89                 unsigned int min_age_region, unsigned int max_age_region,
  90                 enum damos_action action, struct damos_quota *quota,
  91                 struct damos_watermarks *wmarks)
  92 {
  93         struct damos *scheme;
  94
  95         scheme = kmalloc(sizeof(*scheme), GFP_KERNEL);
  96         if (!scheme)
  97                 return NULL;
  98         scheme->min_sz_region = min_sz_region;
  99         scheme->max_sz_region = max_sz_region;
 100         scheme->min_nr_accesses = min_nr_accesses;
 101         scheme->max_nr_accesses = max_nr_accesses;
 102         scheme->min_age_region = min_age_region;
 103         scheme->max_age_region = max_age_region;
 104         scheme->action = action;
 105         scheme->stat_count = 0;
 106         scheme->stat_sz = 0;
 107         INIT_LIST_HEAD(&scheme->list);
 108
 109         scheme->quota.ms = quota->ms;
 110         scheme->quota.sz = quota->sz;
 111         scheme->quota.reset_interval = quota->reset_interval;
 112         scheme->quota.weight_sz = quota->weight_sz;
 113         scheme->quota.weight_nr_accesses = quota->weight_nr_accesses;
 114         scheme->quota.weight_age = quota->weight_age;
 115         scheme->quota.total_charged_sz = 0;
 116         scheme->quota.total_charged_ns = 0;
 117         scheme->quota.esz = 0;
 118         scheme->quota.charged_sz = 0;
 119         scheme->quota.charged_from = 0;
 120         scheme->quota.charge_target_from = NULL;
 121         scheme->quota.charge_addr_from = 0;
 122
 123         scheme->wmarks.metric = wmarks->metric;
 124         scheme->wmarks.interval = wmarks->interval;
 125         scheme->wmarks.high = wmarks->high;
 126         scheme->wmarks.mid = wmarks->mid;
 127         scheme->wmarks.low = wmarks->low;
 128         scheme->wmarks.activated = true;
 129
 130         return scheme;
 131 }
 132
 133 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s)
 134 {
 135         list_add_tail(&s->list, &ctx->schemes);
 136 }
 137
 138 static void damon_del_scheme(struct damos *s)
 139 {
 140         list_del(&s->list);
 141 }
 142
 143 static void damon_free_scheme(struct damos *s)
 144 {
 145         kfree(s);
 146 }
 147
 148 void damon_destroy_scheme(struct damos *s)
 149 {
 150         damon_del_scheme(s);
 151         damon_free_scheme(s);
 152 }
 153
 154 /*
 155  * Construct a damon_target struct
 156  *
 157  * Returns the pointer to the new struct if success, or NULL otherwise
 158  */
 159 struct damon_target *damon_new_target(unsigned long id)
 160 {
 161         struct damon_target *t;
 162
 163         t = kmalloc(sizeof(*t), GFP_KERNEL);
 164         if (!t)
 165                 return NULL;
 166
 167         t->id = id;
 168         t->nr_regions = 0;
 169         INIT_LIST_HEAD(&t->regions_list);
 170
 171         return t;
 172 }
 173
 174 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t)
 175 {
 176         list_add_tail(&t->list, &ctx->adaptive_targets);
 177 }
 178
 179 bool damon_targets_empty(struct damon_ctx *ctx)
 180 {
 181         return list_empty(&ctx->adaptive_targets);
 182 }
 183
 184 static void damon_del_target(struct damon_target *t)
 185 {
 186         list_del(&t->list);
 187 }
 188
 189 void damon_free_target(struct damon_target *t)
 190 {
 191         struct damon_region *r, *next;
 192
 193         damon_for_each_region_safe(r, next, t)
 194                 damon_free_region(r);
 195         kfree(t);
 196 }
 197
 198 void damon_destroy_target(struct damon_target *t)
 199 {
 200         damon_del_target(t);
 201         damon_free_target(t);
 202 }
 203
 204 unsigned int damon_nr_regions(struct damon_target *t)
 205 {
 206         return t->nr_regions;
 207 }
 208
 209 struct damon_ctx *damon_new_ctx(void)
 210 {
 211         struct damon_ctx *ctx;
 212
 213         ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 214         if (!ctx)
 215                 return NULL;
 216
 217         ctx->sample_interval = 5 * 1000;
 218         ctx->aggr_interval = 100 * 1000;
 219         ctx->primitive_update_interval = 60 * 1000 * 1000;
 220
 221         ktime_get_coarse_ts64(&ctx->last_aggregation);
 222         ctx->last_primitive_update = ctx->last_aggregation;
 223
 224         mutex_init(&ctx->kdamond_lock);
 225
 226         ctx->min_nr_regions = 10;
 227         ctx->max_nr_regions = 1000;
 228
 229         INIT_LIST_HEAD(&ctx->adaptive_targets);
 230         INIT_LIST_HEAD(&ctx->schemes);
 231
 232         return ctx;
 233 }
 234
 235 static void damon_destroy_targets(struct damon_ctx *ctx)
 236 {
 237         struct damon_target *t, *next_t;
 238
 239         if (ctx->primitive.cleanup) {
 240                 ctx->primitive.cleanup(ctx);
 241                 return;
 242         }
 243
 244         damon_for_each_target_safe(t, next_t, ctx)
 245                 damon_destroy_target(t);
 246 }
 247
 248 void damon_destroy_ctx(struct damon_ctx *ctx)
 249 {
 250         struct damos *s, *next_s;
 251
 252         damon_destroy_targets(ctx);
 253
 254         damon_for_each_scheme_safe(s, next_s, ctx)
 255                 damon_destroy_scheme(s);
 256
 257         kfree(ctx);
 258 }
 259
 260 /**
 261  * damon_set_targets() - Set monitoring targets.
 262  * @ctx:        monitoring context
 263  * @ids:        array of target ids
 264  * @nr_ids:     number of entries in @ids
 265  *
 266  * This function should not be called while the kdamond is running.
 267  *
 268  * Return: 0 on success, negative error code otherwise.
 269  */
 270 int damon_set_targets(struct damon_ctx *ctx,
 271                       unsigned long *ids, ssize_t nr_ids)
 272 {
 273         ssize_t i;
 274         struct damon_target *t, *next;
 275
 276         damon_destroy_targets(ctx);
 277
 278         for (i = 0; i < nr_ids; i++) {
 279                 t = damon_new_target(ids[i]);
 280                 if (!t) {
 281                         /* The caller should do cleanup of the ids itself */
 282                         damon_for_each_target_safe(t, next, ctx)
 283                                 damon_destroy_target(t);
 284                         return -ENOMEM;
 285                 }
 286                 damon_add_target(ctx, t);
 287         }
 288
 289         return 0;
 290 }
 291
 292 /**
 293  * damon_set_attrs() - Set attributes for the monitoring.
 294  * @ctx:                monitoring context
 295  * @sample_int:         time interval between samplings
 296  * @aggr_int:           time interval between aggregations
 297  * @primitive_upd_int:  time interval between monitoring primitive updates
 298  * @min_nr_reg:         minimal number of regions
 299  * @max_nr_reg:         maximum number of regions
 300  *
 301  * This function should not be called while the kdamond is running.
 302  * Every time interval is in micro-seconds.
 303  *
 304  * Return: 0 on success, negative error code otherwise.
 305  */
 306 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
 307                     unsigned long aggr_int, unsigned long primitive_upd_int,
 308                     unsigned long min_nr_reg, unsigned long max_nr_reg)
 309 {
 310         if (min_nr_reg < 3)
 311                 return -EINVAL;
 312         if (min_nr_reg > max_nr_reg)
 313                 return -EINVAL;
 314
 315         ctx->sample_interval = sample_int;
 316         ctx->aggr_interval = aggr_int;
 317         ctx->primitive_update_interval = primitive_upd_int;
 318         ctx->min_nr_regions = min_nr_reg;
 319         ctx->max_nr_regions = max_nr_reg;
 320
 321         return 0;
 322 }
 323
 324 /**
 325  * damon_set_schemes() - Set data access monitoring based operation schemes.
 326  * @ctx:        monitoring context
 327  * @schemes:    array of the schemes
 328  * @nr_schemes: number of entries in @schemes
 329  *
 330  * This function should not be called while the kdamond of the context is
 331  * running.
 332  *
 333  * Return: 0 if success, or negative error code otherwise.
 334  */
 335 int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes,
 336                         ssize_t nr_schemes)
 337 {
 338         struct damos *s, *next;
 339         ssize_t i;
 340
 341         damon_for_each_scheme_safe(s, next, ctx)
 342                 damon_destroy_scheme(s);
 343         for (i = 0; i < nr_schemes; i++)
 344                 damon_add_scheme(ctx, schemes[i]);
 345         return 0;
 346 }
 347
 348 /**
 349  * damon_nr_running_ctxs() - Return number of currently running contexts.
 350  */
 351 int damon_nr_running_ctxs(void)
 352 {
 353         int nr_ctxs;
 354
 355         mutex_lock(&damon_lock);
 356         nr_ctxs = nr_running_ctxs;
 357         mutex_unlock(&damon_lock);
 358
 359         return nr_ctxs;
 360 }
 361
 362 /* Returns the size upper limit for each monitoring region */
 363 static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
 364 {
 365         struct damon_target *t;
 366         struct damon_region *r;
 367         unsigned long sz = 0;
 368
 369         damon_for_each_target(t, ctx) {
 370                 damon_for_each_region(r, t)
 371                         sz += r->ar.end - r->ar.start;
 372         }
 373
 374         if (ctx->min_nr_regions)
 375                 sz /= ctx->min_nr_regions;
 376         if (sz < DAMON_MIN_REGION)
 377                 sz = DAMON_MIN_REGION;
 378
 379         return sz;
 380 }
 381
 382 static int kdamond_fn(void *data);
 383
 384 /*
 385  * __damon_start() - Starts monitoring with given context.
 386  * @ctx:        monitoring context
 387  *
 388  * This function should be called while damon_lock is hold.
 389  *
 390  * Return: 0 on success, negative error code otherwise.
 391  */
 392 static int __damon_start(struct damon_ctx *ctx)
 393 {
 394         int err = -EBUSY;
 395
 396         mutex_lock(&ctx->kdamond_lock);
 397         if (!ctx->kdamond) {
 398                 err = 0;
 399                 ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
 400                                 nr_running_ctxs);
 401                 if (IS_ERR(ctx->kdamond)) {
 402                         err = PTR_ERR(ctx->kdamond);
 403                         ctx->kdamond = NULL;
 404                 }
 405         }
 406         mutex_unlock(&ctx->kdamond_lock);
 407
 408         return err;
 409 }
 410
 411 /**
 412  * damon_start() - Starts the monitorings for a given group of contexts.
 413  * @ctxs:       an array of the pointers for contexts to start monitoring
 414  * @nr_ctxs:    size of @ctxs
 415  *
 416  * This function starts a group of monitoring threads for a group of monitoring
 417  * contexts.  One thread per each context is created and run in parallel.  The
 418  * caller should handle synchronization between the threads by itself.  If a
 419  * group of threads that created by other 'damon_start()' call is currently
 420  * running, this function does nothing but returns -EBUSY.
 421  *
 422  * Return: 0 on success, negative error code otherwise.
 423  */
 424 int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
 425 {
 426         int i;
 427         int err = 0;
 428
 429         mutex_lock(&damon_lock);
 430         if (nr_running_ctxs) {
 431                 mutex_unlock(&damon_lock);
 432                 return -EBUSY;
 433         }
 434
 435         for (i = 0; i < nr_ctxs; i++) {
 436                 err = __damon_start(ctxs[i]);
 437                 if (err)
 438                         break;
 439                 nr_running_ctxs++;
 440         }
 441         mutex_unlock(&damon_lock);
 442
 443         return err;
 444 }
 445
 446 /*
 447  * __damon_stop() - Stops monitoring of given context.
 448  * @ctx:        monitoring context
 449  *
 450  * Return: 0 on success, negative error code otherwise.
 451  */
 452 static int __damon_stop(struct damon_ctx *ctx)
 453 {
 454         struct task_struct *tsk;
 455
 456         mutex_lock(&ctx->kdamond_lock);
 457         tsk = ctx->kdamond;
 458         if (tsk) {
 459                 get_task_struct(tsk);
 460                 mutex_unlock(&ctx->kdamond_lock);
 461                 kthread_stop(tsk);
 462                 put_task_struct(tsk);
 463                 return 0;
 464         }
 465         mutex_unlock(&ctx->kdamond_lock);
 466
 467         return -EPERM;
 468 }
 469
 470 /**
 471  * damon_stop() - Stops the monitorings for a given group of contexts.
 472  * @ctxs:       an array of the pointers for contexts to stop monitoring
 473  * @nr_ctxs:    size of @ctxs
 474  *
 475  * Return: 0 on success, negative error code otherwise.
 476  */
 477 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
 478 {
 479         int i, err = 0;
 480
 481         for (i = 0; i < nr_ctxs; i++) {
 482                 /* nr_running_ctxs is decremented in kdamond_fn */
 483                 err = __damon_stop(ctxs[i]);
 484                 if (err)
 485                         return err;
 486         }
 487
 488         return err;
 489 }
 490
 491 /*
 492  * damon_check_reset_time_interval() - Check if a time interval is elapsed.
 493  * @baseline:   the time to check whether the interval has elapsed since
 494  * @interval:   the time interval (microseconds)
 495  *
 496  * See whether the given time interval has passed since the given baseline
 497  * time.  If so, it also updates the baseline to current time for next check.
 498  *
 499  * Return:      true if the time interval has passed, or false otherwise.
 500  */
 501 static bool damon_check_reset_time_interval(struct timespec64 *baseline,
 502                 unsigned long interval)
 503 {
 504         struct timespec64 now;
 505
 506         ktime_get_coarse_ts64(&now);
 507         if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) <
 508                         interval * 1000)
 509                 return false;
 510         *baseline = now;
 511         return true;
 512 }
 513
 514 /*
 515  * Check whether it is time to flush the aggregated information
 516  */
 517 static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
 518 {
 519         return damon_check_reset_time_interval(&ctx->last_aggregation,
 520                         ctx->aggr_interval);
 521 }
 522
 523 /*
 524  * Reset the aggregated monitoring results ('nr_accesses' of each region).
 525  */
 526 static void kdamond_reset_aggregated(struct damon_ctx *c)
 527 {
 528         struct damon_target *t;
 529
 530         damon_for_each_target(t, c) {
 531                 struct damon_region *r;
 532
 533                 damon_for_each_region(r, t) {
 534                         trace_damon_aggregated(t, r, damon_nr_regions(t));
 535                         r->last_nr_accesses = r->nr_accesses;
 536                         r->nr_accesses = 0;
 537                 }
 538         }
 539 }
 540
 541 static void damon_split_region_at(struct damon_ctx *ctx,
 542                 struct damon_target *t, struct damon_region *r,
 543                 unsigned long sz_r);
 544
 545 static bool __damos_valid_target(struct damon_region *r, struct damos *s)
 546 {
 547         unsigned long sz;
 548
 549         sz = r->ar.end - r->ar.start;
 550         return s->min_sz_region <= sz && sz <= s->max_sz_region &&
 551                 s->min_nr_accesses <= r->nr_accesses &&
 552                 r->nr_accesses <= s->max_nr_accesses &&
 553                 s->min_age_region <= r->age && r->age <= s->max_age_region;
 554 }
 555
 556 static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
 557                 struct damon_region *r, struct damos *s)
 558 {
 559         bool ret = __damos_valid_target(r, s);
 560
 561         if (!ret || !s->quota.esz || !c->primitive.get_scheme_score)
 562                 return ret;
 563
 564         return c->primitive.get_scheme_score(c, t, r, s) >= s->quota.min_score;
 565 }
 566
 567 static void damon_do_apply_schemes(struct damon_ctx *c,
 568                                    struct damon_target *t,
 569                                    struct damon_region *r)
 570 {
 571         struct damos *s;
 572
 573         damon_for_each_scheme(s, c) {
 574                 struct damos_quota *quota = &s->quota;
 575                 unsigned long sz = r->ar.end - r->ar.start;
 576                 struct timespec64 begin, end;
 577
 578                 if (!s->wmarks.activated)
 579                         continue;
 580
 581                 /* Check the quota */
 582                 if (quota->esz && quota->charged_sz >= quota->esz)
 583                         continue;
 584
 585                 /* Skip previously charged regions */
 586                 if (quota->charge_target_from) {
 587                         if (t != quota->charge_target_from)
 588                                 continue;
 589                         if (r == damon_last_region(t)) {
 590                                 quota->charge_target_from = NULL;
 591                                 quota->charge_addr_from = 0;
 592                                 continue;
 593                         }
 594                         if (quota->charge_addr_from &&
 595                                         r->ar.end <= quota->charge_addr_from)
 596                                 continue;
 597
 598                         if (quota->charge_addr_from && r->ar.start <
 599                                         quota->charge_addr_from) {
 600                                 sz = ALIGN_DOWN(quota->charge_addr_from -
 601                                                 r->ar.start, DAMON_MIN_REGION);
 602                                 if (!sz) {
 603                                         if (r->ar.end - r->ar.start <=
 604                                                         DAMON_MIN_REGION)
 605                                                 continue;
 606                                         sz = DAMON_MIN_REGION;
 607                                 }
 608                                 damon_split_region_at(c, t, r, sz);
 609                                 r = damon_next_region(r);
 610                                 sz = r->ar.end - r->ar.start;
 611                         }
 612                         quota->charge_target_from = NULL;
 613                         quota->charge_addr_from = 0;
 614                 }
 615
 616                 if (!damos_valid_target(c, t, r, s))
 617                         continue;
 618
 619                 /* Apply the scheme */
 620                 if (c->primitive.apply_scheme) {
 621                         if (quota->esz &&
 622                                         quota->charged_sz + sz > quota->esz) {
 623                                 sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
 624                                                 DAMON_MIN_REGION);
 625                                 if (!sz)
 626                                         goto update_stat;
 627                                 damon_split_region_at(c, t, r, sz);
 628                         }
 629                         ktime_get_coarse_ts64(&begin);
 630                         c->primitive.apply_scheme(c, t, r, s);
 631                         ktime_get_coarse_ts64(&end);
 632                         quota->total_charged_ns += timespec64_to_ns(&end) -
 633                                 timespec64_to_ns(&begin);
 634                         quota->charged_sz += sz;
 635                         if (quota->esz && quota->charged_sz >= quota->esz) {
 636                                 quota->charge_target_from = t;
 637                                 quota->charge_addr_from = r->ar.end + 1;
 638                         }
 639                 }
 640                 if (s->action != DAMOS_STAT)
 641                         r->age = 0;
 642
 643 update_stat:
 644                 s->stat_count++;
 645                 s->stat_sz += sz;
 646         }
 647 }
 648
 649 /* Shouldn't be called if quota->ms and quota->sz are zero */
 650 static void damos_set_effective_quota(struct damos_quota *quota)
 651 {
 652         unsigned long throughput;
 653         unsigned long esz;
 654
 655         if (!quota->ms) {
 656                 quota->esz = quota->sz;
 657                 return;
 658         }
 659
 660         if (quota->total_charged_ns)
 661                 throughput = quota->total_charged_sz * 1000000 /
 662                         quota->total_charged_ns;
 663         else
 664                 throughput = PAGE_SIZE * 1024;
 665         esz = throughput * quota->ms;
 666
 667         if (quota->sz && quota->sz < esz)
 668                 esz = quota->sz;
 669         quota->esz = esz;
 670 }
 671
 672 static void kdamond_apply_schemes(struct damon_ctx *c)
 673 {
 674         struct damon_target *t;
 675         struct damon_region *r, *next_r;
 676         struct damos *s;
 677
 678         damon_for_each_scheme(s, c) {
 679                 struct damos_quota *quota = &s->quota;
 680                 unsigned long cumulated_sz;
 681                 unsigned int score, max_score = 0;
 682
 683                 if (!s->wmarks.activated)
 684                         continue;
 685
 686                 if (!quota->ms && !quota->sz)
 687                         continue;
 688
 689                 /* New charge window starts */
 690                 if (time_after_eq(jiffies, quota->charged_from +
 691                                         msecs_to_jiffies(
 692                                                 quota->reset_interval))) {
 693                         quota->total_charged_sz += quota->charged_sz;
 694                         quota->charged_from = jiffies;
 695                         quota->charged_sz = 0;
 696                         damos_set_effective_quota(quota);
 697                 }
 698
 699                 if (!c->primitive.get_scheme_score)
 700                         continue;
 701
 702                 /* Fill up the score histogram */
 703                 memset(quota->histogram, 0, sizeof(quota->histogram));
 704                 damon_for_each_target(t, c) {
 705                         damon_for_each_region(r, t) {
 706                                 if (!__damos_valid_target(r, s))
 707                                         continue;
 708                                 score = c->primitive.get_scheme_score(
 709                                                 c, t, r, s);
 710                                 quota->histogram[score] +=
 711                                         r->ar.end - r->ar.start;
 712                                 if (score > max_score)
 713                                         max_score = score;
 714                         }
 715                 }
 716
 717                 /* Set the min score limit */
 718                 for (cumulated_sz = 0, score = max_score; ; score--) {
 719                         cumulated_sz += quota->histogram[score];
 720                         if (cumulated_sz >= quota->esz || !score)
 721                                 break;
 722                 }
 723                 quota->min_score = score;
 724         }
 725
 726         damon_for_each_target(t, c) {
 727                 damon_for_each_region_safe(r, next_r, t)
 728                         damon_do_apply_schemes(c, t, r);
 729         }
 730 }
 731
 732 #define sz_damon_region(r) (r->ar.end - r->ar.start)
 733
 734 /*
 735  * Merge two adjacent regions into one region
 736  */
 737 static void damon_merge_two_regions(struct damon_target *t,
 738                 struct damon_region *l, struct damon_region *r)
 739 {
 740         unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r);
 741
 742         l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
 743                         (sz_l + sz_r);
 744         l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r);
 745         l->ar.end = r->ar.end;
 746         damon_destroy_region(r, t);
 747 }
 748
 749 /*
 750  * Merge adjacent regions having similar access frequencies
 751  *
 752  * t            target affected by this merge operation
 753  * thres        '->nr_accesses' diff threshold for the merge
 754  * sz_limit     size upper limit of each region
 755  */
 756 static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
 757                                    unsigned long sz_limit)
 758 {
 759         struct damon_region *r, *prev = NULL, *next;
 760
 761         damon_for_each_region_safe(r, next, t) {
 762                 if (abs(r->nr_accesses - r->last_nr_accesses) > thres)
 763                         r->age = 0;
 764                 else
 765                         r->age++;
 766
 767                 if (prev && prev->ar.end == r->ar.start &&
 768                     abs(prev->nr_accesses - r->nr_accesses) <= thres &&
 769                     sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
 770                         damon_merge_two_regions(t, prev, r);
 771                 else
 772                         prev = r;
 773         }
 774 }
 775
 776 /*
 777  * Merge adjacent regions having similar access frequencies
 778  *
 779  * threshold    '->nr_accesses' diff threshold for the merge
 780  * sz_limit     size upper limit of each region
 781  *
 782  * This function merges monitoring target regions which are adjacent and their
 783  * access frequencies are similar.  This is for minimizing the monitoring
 784  * overhead under the dynamically changeable access pattern.  If a merge was
 785  * unnecessarily made, later 'kdamond_split_regions()' will revert it.
 786  */
 787 static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
 788                                   unsigned long sz_limit)
 789 {
 790         struct damon_target *t;
 791
 792         damon_for_each_target(t, c)
 793                 damon_merge_regions_of(t, threshold, sz_limit);
 794 }
 795
 796 /*
 797  * Split a region in two
 798  *
 799  * r            the region to be split
 800  * sz_r         size of the first sub-region that will be made
 801  */
 802 static void damon_split_region_at(struct damon_ctx *ctx,
 803                 struct damon_target *t, struct damon_region *r,
 804                 unsigned long sz_r)
 805 {
 806         struct damon_region *new;
 807
 808         new = damon_new_region(r->ar.start + sz_r, r->ar.end);
 809         if (!new)
 810                 return;
 811
 812         r->ar.end = new->ar.start;
 813
 814         new->age = r->age;
 815         new->last_nr_accesses = r->last_nr_accesses;
 816
 817         damon_insert_region(new, r, damon_next_region(r), t);
 818 }
 819
 820 /* Split every region in the given target into 'nr_subs' regions */
 821 static void damon_split_regions_of(struct damon_ctx *ctx,
 822                                      struct damon_target *t, int nr_subs)
 823 {
 824         struct damon_region *r, *next;
 825         unsigned long sz_region, sz_sub = 0;
 826         int i;
 827
 828         damon_for_each_region_safe(r, next, t) {
 829                 sz_region = r->ar.end - r->ar.start;
 830
 831                 for (i = 0; i < nr_subs - 1 &&
 832                                 sz_region > 2 * DAMON_MIN_REGION; i++) {
 833                         /*
 834                          * Randomly select size of left sub-region to be at
 835                          * least 10 percent and at most 90% of original region
 836                          */
 837                         sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
 838                                         sz_region / 10, DAMON_MIN_REGION);
 839                         /* Do not allow blank region */
 840                         if (sz_sub == 0 || sz_sub >= sz_region)
 841                                 continue;
 842
 843                         damon_split_region_at(ctx, t, r, sz_sub);
 844                         sz_region = sz_sub;
 845                 }
 846         }
 847 }
 848
 849 /*
 850  * Split every target region into randomly-sized small regions
 851  *
 852  * This function splits every target region into random-sized small regions if
 853  * current total number of the regions is equal or smaller than half of the
 854  * user-specified maximum number of regions.  This is for maximizing the
 855  * monitoring accuracy under the dynamically changeable access patterns.  If a
 856  * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
 857  * it.
 858  */
 859 static void kdamond_split_regions(struct damon_ctx *ctx)
 860 {
 861         struct damon_target *t;
 862         unsigned int nr_regions = 0;
 863         static unsigned int last_nr_regions;
 864         int nr_subregions = 2;
 865
 866         damon_for_each_target(t, ctx)
 867                 nr_regions += damon_nr_regions(t);
 868
 869         if (nr_regions > ctx->max_nr_regions / 2)
 870                 return;
 871
 872         /* Maybe the middle of the region has different access frequency */
 873         if (last_nr_regions == nr_regions &&
 874                         nr_regions < ctx->max_nr_regions / 3)
 875                 nr_subregions = 3;
 876
 877         damon_for_each_target(t, ctx)
 878                 damon_split_regions_of(ctx, t, nr_subregions);
 879
 880         last_nr_regions = nr_regions;
 881 }
 882
 883 /*
 884  * Check whether it is time to check and apply the target monitoring regions
 885  *
 886  * Returns true if it is.
 887  */
 888 static bool kdamond_need_update_primitive(struct damon_ctx *ctx)
 889 {
 890         return damon_check_reset_time_interval(&ctx->last_primitive_update,
 891                         ctx->primitive_update_interval);
 892 }
 893
 894 /*
 895  * Check whether current monitoring should be stopped
 896  *
 897  * The monitoring is stopped when either the user requested to stop, or all
 898  * monitoring targets are invalid.
 899  *
 900  * Returns true if need to stop current monitoring.
 901  */
 902 static bool kdamond_need_stop(struct damon_ctx *ctx)
 903 {
 904         struct damon_target *t;
 905
 906         if (kthread_should_stop())
 907                 return true;
 908
 909         if (!ctx->primitive.target_valid)
 910                 return false;
 911
 912         damon_for_each_target(t, ctx) {
 913                 if (ctx->primitive.target_valid(t))
 914                         return false;
 915         }
 916
 917         return true;
 918 }
 919
 920 static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric)
 921 {
 922         struct sysinfo i;
 923
 924         switch (metric) {
 925         case DAMOS_WMARK_FREE_MEM_RATE:
 926                 si_meminfo(&i);
 927                 return i.freeram * 1000 / i.totalram;
 928         default:
 929                 break;
 930         }
 931         return -EINVAL;
 932 }
 933
 934 /*
 935  * Returns zero if the scheme is active.  Else, returns time to wait for next
 936  * watermark check in micro-seconds.
 937  */
 938 static unsigned long damos_wmark_wait_us(struct damos *scheme)
 939 {
 940         unsigned long metric;
 941
 942         if (scheme->wmarks.metric == DAMOS_WMARK_NONE)
 943                 return 0;
 944
 945         metric = damos_wmark_metric_value(scheme->wmarks.metric);
 946         /* higher than high watermark or lower than low watermark */
 947         if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) {
 948                 if (scheme->wmarks.activated)
 949                         pr_debug("deactivate a scheme (%d) for %s wmark\n",
 950                                         scheme->action,
 951                                         metric > scheme->wmarks.high ?
 952                                         "high" : "low");
 953                 scheme->wmarks.activated = false;
 954                 return scheme->wmarks.interval;
 955         }
 956
 957         /* inactive and higher than middle watermark */
 958         if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) &&
 959                         !scheme->wmarks.activated)
 960                 return scheme->wmarks.interval;
 961
 962         if (!scheme->wmarks.activated)
 963                 pr_debug("activate a scheme (%d)\n", scheme->action);
 964         scheme->wmarks.activated = true;
 965         return 0;
 966 }
 967
 968 static void kdamond_usleep(unsigned long usecs)
 969 {
 970         /* See Documentation/timers/timers-howto.rst for the thresholds */
 971         if (usecs > 20 * USEC_PER_MSEC)
 972                 schedule_timeout_idle(usecs_to_jiffies(usecs));
 973         else
 974                 usleep_idle_range(usecs, usecs + 1);
 975 }
 976
 977 /* Returns negative error code if it's not activated but should return */
 978 static int kdamond_wait_activation(struct damon_ctx *ctx)
 979 {
 980         struct damos *s;
 981         unsigned long wait_time;
 982         unsigned long min_wait_time = 0;
 983
 984         while (!kdamond_need_stop(ctx)) {
 985                 damon_for_each_scheme(s, ctx) {
 986                         wait_time = damos_wmark_wait_us(s);
 987                         if (!min_wait_time || wait_time < min_wait_time)
 988                                 min_wait_time = wait_time;
 989                 }
 990                 if (!min_wait_time)
 991                         return 0;
 992
 993                 kdamond_usleep(min_wait_time);
 994         }
 995         return -EBUSY;
 996 }
 997
 998 /*
 999  * The monitoring daemon that runs as a kernel thread
1000  */
1001 static int kdamond_fn(void *data)
1002 {
1003         struct damon_ctx *ctx = (struct damon_ctx *)data;
1004         struct damon_target *t;
1005         struct damon_region *r, *next;
1006         unsigned int max_nr_accesses = 0;
1007         unsigned long sz_limit = 0;
1008         bool done = false;
1009
1010         pr_debug("kdamond (%d) starts\n", current->pid);
1011
1012         if (ctx->primitive.init)
1013                 ctx->primitive.init(ctx);
1014         if (ctx->callback.before_start && ctx->callback.before_start(ctx))
1015                 done = true;
1016
1017         sz_limit = damon_region_sz_limit(ctx);
1018
1019         while (!kdamond_need_stop(ctx) && !done) {
1020                 if (kdamond_wait_activation(ctx))
1021                         continue;
1022
1023                 if (ctx->primitive.prepare_access_checks)
1024                         ctx->primitive.prepare_access_checks(ctx);
1025                 if (ctx->callback.after_sampling &&
1026                                 ctx->callback.after_sampling(ctx))
1027                         done = true;
1028
1029                 kdamond_usleep(ctx->sample_interval);
1030
1031                 if (ctx->primitive.check_accesses)
1032                         max_nr_accesses = ctx->primitive.check_accesses(ctx);
1033
1034                 if (kdamond_aggregate_interval_passed(ctx)) {
1035                         kdamond_merge_regions(ctx,
1036                                         max_nr_accesses / 10,
1037                                         sz_limit);
1038                         if (ctx->callback.after_aggregation &&
1039                                         ctx->callback.after_aggregation(ctx))
1040                                 done = true;
1041                         kdamond_apply_schemes(ctx);
1042                         kdamond_reset_aggregated(ctx);
1043                         kdamond_split_regions(ctx);
1044                         if (ctx->primitive.reset_aggregated)
1045                                 ctx->primitive.reset_aggregated(ctx);
1046                 }
1047
1048                 if (kdamond_need_update_primitive(ctx)) {
1049                         if (ctx->primitive.update)
1050                                 ctx->primitive.update(ctx);
1051                         sz_limit = damon_region_sz_limit(ctx);
1052                 }
1053         }
1054         damon_for_each_target(t, ctx) {
1055                 damon_for_each_region_safe(r, next, t)
1056                         damon_destroy_region(r, t);
1057         }
1058
1059         if (ctx->callback.before_terminate)
1060                 ctx->callback.before_terminate(ctx);
1061         if (ctx->primitive.cleanup)
1062                 ctx->primitive.cleanup(ctx);
1063
1064         pr_debug("kdamond (%d) finishes\n", current->pid);
1065         mutex_lock(&ctx->kdamond_lock);
1066         ctx->kdamond = NULL;
1067         mutex_unlock(&ctx->kdamond_lock);
1068
1069         mutex_lock(&damon_lock);
1070         nr_running_ctxs--;
1071         mutex_unlock(&damon_lock);
1072
1073         return 0;
1074 }
1075
1076 #include "core-test.h"