1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2019 Sultan Alsawaf <sultan@kerneltoast.com>.
6 #define pr_fmt(fmt) "simple_lmk: " fmt
8 #include <linux/kthread.h>
10 #include <linux/moduleparam.h>
11 #include <linux/oom.h>
12 #include <linux/sort.h>
13 #include <linux/version.h>
15 /* The sched_param struct is located elsewhere in newer kernels */
16 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
17 #include <uapi/linux/sched/types.h>
20 /* The minimum number of pages to free per reclaim */
21 #define MIN_FREE_PAGES (CONFIG_ANDROID_SIMPLE_LMK_MINFREE * SZ_1M / PAGE_SIZE)
23 /* Kill up to this many victims per reclaim */
24 #define MAX_VICTIMS 1024
27 struct task_struct *tsk;
32 /* Pulled from the Android framework. Lower adj means higher priority. */
33 static const short adj_prio[] = {
34 906, /* CACHED_APP_MAX_ADJ */
40 900, /* CACHED_APP_MIN_ADJ */
41 800, /* SERVICE_B_ADJ */
42 700, /* PREVIOUS_APP_ADJ */
43 600, /* HOME_APP_ADJ */
44 500, /* SERVICE_ADJ */
45 400, /* HEAVY_WEIGHT_APP_ADJ */
46 300, /* BACKUP_APP_ADJ */
47 200, /* PERCEPTIBLE_APP_ADJ */
48 100, /* VISIBLE_APP_ADJ */
49 0 /* FOREGROUND_APP_ADJ */
52 static struct victim_info victims[MAX_VICTIMS];
53 static DECLARE_WAIT_QUEUE_HEAD(oom_waitq);
54 static DECLARE_COMPLETION(reclaim_done);
55 static atomic_t victims_to_kill = ATOMIC_INIT(0);
56 static atomic_t needs_reclaim = ATOMIC_INIT(0);
58 static int victim_size_cmp(const void *lhs_ptr, const void *rhs_ptr)
60 const struct victim_info *lhs = (typeof(lhs))lhs_ptr;
61 const struct victim_info *rhs = (typeof(rhs))rhs_ptr;
63 return rhs->size - lhs->size;
66 static bool vtsk_is_duplicate(struct victim_info *varr, int vlen,
67 struct task_struct *vtsk)
71 for (i = 0; i < vlen; i++) {
72 if (same_thread_group(varr[i].tsk, vtsk))
79 static unsigned long find_victims(struct victim_info *varr, int *vindex,
80 int vmaxlen, short target_adj)
82 unsigned long pages_found = 0;
83 int old_vindex = *vindex;
84 struct task_struct *tsk;
86 for_each_process(tsk) {
87 struct task_struct *vtsk;
90 * Search for tasks with the targeted importance (adj). Since
91 * only tasks with a positive adj can be targeted, that
92 * naturally excludes tasks which shouldn't be killed, like init
93 * and kthreads. Although oom_score_adj can still be changed
94 * while this code runs, it doesn't really matter. We just need
95 * to make sure that if the adj changes, we won't deadlock
96 * trying to lock a task that we locked earlier.
98 if (READ_ONCE(tsk->signal->oom_score_adj) != target_adj ||
99 vtsk_is_duplicate(varr, *vindex, tsk))
102 vtsk = find_lock_task_mm(tsk);
106 /* Store this potential victim away for later */
107 varr[*vindex].tsk = vtsk;
108 varr[*vindex].mm = vtsk->mm;
109 varr[*vindex].size = get_mm_rss(vtsk->mm);
111 /* Keep track of the number of pages that have been found */
112 pages_found += varr[*vindex].size;
114 /* Make sure there's space left in the victim array */
115 if (++*vindex == vmaxlen)
120 * Sort the victims in descending order of size to prioritize killing
121 * the larger ones first.
124 sort(&varr[old_vindex], *vindex - old_vindex, sizeof(*varr),
125 victim_size_cmp, NULL);
130 static int process_victims(struct victim_info *varr, int vlen,
131 unsigned long pages_needed)
133 unsigned long pages_found = 0;
134 int i, nr_to_kill = 0;
137 * Calculate the number of tasks that need to be killed and quickly
138 * release the references to those that'll live.
140 for (i = 0; i < vlen; i++) {
141 struct victim_info *victim = &victims[i];
142 struct task_struct *vtsk = victim->tsk;
144 /* The victim's mm lock is taken in find_victims; release it */
145 if (pages_found >= pages_needed) {
150 pages_found += victim->size;
157 static void scan_and_kill(unsigned long pages_needed)
159 int i, nr_to_kill = 0, nr_victims = 0;
160 unsigned long pages_found = 0;
163 * Hold the tasklist lock so tasks don't disappear while scanning. This
164 * is preferred to holding an RCU read lock so that the list of tasks
165 * is guaranteed to be up to date.
167 read_lock(&tasklist_lock);
168 for (i = 0; i < ARRAY_SIZE(adj_prio); i++) {
169 pages_found += find_victims(victims, &nr_victims, MAX_VICTIMS,
171 if (pages_found >= pages_needed || nr_victims == MAX_VICTIMS)
174 read_unlock(&tasklist_lock);
176 /* Pretty unlikely but it can happen */
177 if (unlikely(!nr_victims))
180 /* First round of victim processing to weed out unneeded victims */
181 nr_to_kill = process_victims(victims, nr_victims, pages_needed);
184 * Try to kill as few of the chosen victims as possible by sorting the
185 * chosen victims by size, which means larger victims that have a lower
186 * adj can be killed in place of smaller victims with a high adj.
188 sort(victims, nr_to_kill, sizeof(*victims), victim_size_cmp, NULL);
190 /* Second round of victim processing to finally select the victims */
191 nr_to_kill = process_victims(victims, nr_to_kill, pages_needed);
193 /* Kill the victims */
194 atomic_set_release(&victims_to_kill, nr_to_kill);
195 for (i = 0; i < nr_to_kill; i++) {
196 struct victim_info *victim = &victims[i];
197 struct task_struct *vtsk = victim->tsk;
199 pr_info("Killing %s with adj %d to free %lu KiB\n", vtsk->comm,
200 vtsk->signal->oom_score_adj,
201 victim->size << (PAGE_SHIFT - 10));
203 /* Send kill signal to the victim */
204 send_sig(SIGKILL, vtsk, 0);
209 /* Wait until all the victims die */
210 wait_for_completion(&reclaim_done);
213 static int simple_lmk_reclaim_thread(void *data)
215 static const struct sched_param sched_max_rt_prio = {
216 .sched_priority = MAX_RT_PRIO - 1
219 sched_setscheduler_nocheck(current, SCHED_FIFO, &sched_max_rt_prio);
222 wait_event(oom_waitq, atomic_add_unless(&needs_reclaim, -1, 0));
223 scan_and_kill(MIN_FREE_PAGES);
229 void simple_lmk_decide_reclaim(int kswapd_priority)
231 if (kswapd_priority == CONFIG_ANDROID_SIMPLE_LMK_AGGRESSION) {
234 for (v = 0;; v = v1) {
235 v1 = atomic_cmpxchg(&needs_reclaim, v, v + 1);
236 if (likely(v1 == v)) {
245 void simple_lmk_mm_freed(struct mm_struct *mm)
247 static atomic_t nr_killed = ATOMIC_INIT(0);
250 nr_to_kill = atomic_read_acquire(&victims_to_kill);
251 for (i = 0; i < nr_to_kill; i++) {
252 if (cmpxchg(&victims[i].mm, mm, NULL) == mm) {
253 if (atomic_inc_return(&nr_killed) == nr_to_kill) {
254 atomic_set(&victims_to_kill, 0);
255 nr_killed = (atomic_t)ATOMIC_INIT(0);
256 complete(&reclaim_done);
263 /* Initialize Simple LMK when lmkd in Android writes to the minfree parameter */
264 static int simple_lmk_init_set(const char *val, const struct kernel_param *kp)
266 static atomic_t init_done = ATOMIC_INIT(0);
267 struct task_struct *thread;
269 if (atomic_cmpxchg(&init_done, 0, 1))
272 thread = kthread_run_perf_critical(simple_lmk_reclaim_thread, NULL,
274 BUG_ON(IS_ERR(thread));
279 static const struct kernel_param_ops simple_lmk_init_ops = {
280 .set = simple_lmk_init_set
283 /* Needed to prevent Android from thinking there's no LMK and thus rebooting */
284 #undef MODULE_PARAM_PREFIX
285 #define MODULE_PARAM_PREFIX "lowmemorykiller."
286 module_param_cb(minfree, &simple_lmk_init_ops, NULL, 0200);