OSDN Git Service

fffe6882cdb7bdf0fcef88dffa124a22e0095cd9
[sagit-ice-cold/kernel_xiaomi_msm8998.git] / drivers / android / simple_lmk.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2019 Sultan Alsawaf <sultan@kerneltoast.com>.
4  */
5
6 #define pr_fmt(fmt) "simple_lmk: " fmt
7
8 #include <linux/kthread.h>
9 #include <linux/mm.h>
10 #include <linux/moduleparam.h>
11 #include <linux/oom.h>
12 #include <linux/sort.h>
13 #include <linux/version.h>
14
15 /* The sched_param struct is located elsewhere in newer kernels */
16 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
17 #include <uapi/linux/sched/types.h>
18 #endif
19
20 /* The minimum number of pages to free per reclaim */
21 #define MIN_FREE_PAGES (CONFIG_ANDROID_SIMPLE_LMK_MINFREE * SZ_1M / PAGE_SIZE)
22
23 /* Kill up to this many victims per reclaim */
24 #define MAX_VICTIMS 1024
25
26 struct victim_info {
27         struct task_struct *tsk;
28         struct mm_struct *mm;
29         unsigned long size;
30 };
31
32 /* Pulled from the Android framework. Lower adj means higher priority. */
33 static const short adj_prio[] = {
34         906, /* CACHED_APP_MAX_ADJ */
35         905, /* Cached app */
36         904, /* Cached app */
37         903, /* Cached app */
38         902, /* Cached app */
39         901, /* Cached app */
40         900, /* CACHED_APP_MIN_ADJ */
41         800, /* SERVICE_B_ADJ */
42         700, /* PREVIOUS_APP_ADJ */
43         600, /* HOME_APP_ADJ */
44         500, /* SERVICE_ADJ */
45         400, /* HEAVY_WEIGHT_APP_ADJ */
46         300, /* BACKUP_APP_ADJ */
47         200, /* PERCEPTIBLE_APP_ADJ */
48         100, /* VISIBLE_APP_ADJ */
49         0    /* FOREGROUND_APP_ADJ */
50 };
51
52 static struct victim_info victims[MAX_VICTIMS];
53 static DECLARE_WAIT_QUEUE_HEAD(oom_waitq);
54 static DECLARE_COMPLETION(reclaim_done);
55 static atomic_t victims_to_kill = ATOMIC_INIT(0);
56 static atomic_t needs_reclaim = ATOMIC_INIT(0);
57
58 static int victim_size_cmp(const void *lhs_ptr, const void *rhs_ptr)
59 {
60         const struct victim_info *lhs = (typeof(lhs))lhs_ptr;
61         const struct victim_info *rhs = (typeof(rhs))rhs_ptr;
62
63         return rhs->size - lhs->size;
64 }
65
66 static bool vtsk_is_duplicate(struct victim_info *varr, int vlen,
67                               struct task_struct *vtsk)
68 {
69         int i;
70
71         for (i = 0; i < vlen; i++) {
72                 if (same_thread_group(varr[i].tsk, vtsk))
73                         return true;
74         }
75
76         return false;
77 }
78
79 static unsigned long find_victims(struct victim_info *varr, int *vindex,
80                                   int vmaxlen, short target_adj)
81 {
82         unsigned long pages_found = 0;
83         int old_vindex = *vindex;
84         struct task_struct *tsk;
85
86         for_each_process(tsk) {
87                 struct task_struct *vtsk;
88
89                 /*
90                  * Search for tasks with the targeted importance (adj). Since
91                  * only tasks with a positive adj can be targeted, that
92                  * naturally excludes tasks which shouldn't be killed, like init
93                  * and kthreads. Although oom_score_adj can still be changed
94                  * while this code runs, it doesn't really matter. We just need
95                  * to make sure that if the adj changes, we won't deadlock
96                  * trying to lock a task that we locked earlier.
97                  */
98                 if (READ_ONCE(tsk->signal->oom_score_adj) != target_adj ||
99                     vtsk_is_duplicate(varr, *vindex, tsk))
100                         continue;
101
102                 vtsk = find_lock_task_mm(tsk);
103                 if (!vtsk)
104                         continue;
105
106                 /* Store this potential victim away for later */
107                 varr[*vindex].tsk = vtsk;
108                 varr[*vindex].mm = vtsk->mm;
109                 varr[*vindex].size = get_mm_rss(vtsk->mm);
110
111                 /* Keep track of the number of pages that have been found */
112                 pages_found += varr[*vindex].size;
113
114                 /* Make sure there's space left in the victim array */
115                 if (++*vindex == vmaxlen)
116                         break;
117         }
118
119         /*
120          * Sort the victims in descending order of size to prioritize killing
121          * the larger ones first.
122          */
123         if (pages_found)
124                 sort(&varr[old_vindex], *vindex - old_vindex, sizeof(*varr),
125                      victim_size_cmp, NULL);
126
127         return pages_found;
128 }
129
130 static int process_victims(struct victim_info *varr, int vlen,
131                            unsigned long pages_needed)
132 {
133         unsigned long pages_found = 0;
134         int i, nr_to_kill = 0;
135
136         /*
137          * Calculate the number of tasks that need to be killed and quickly
138          * release the references to those that'll live.
139          */
140         for (i = 0; i < vlen; i++) {
141                 struct victim_info *victim = &victims[i];
142                 struct task_struct *vtsk = victim->tsk;
143
144                 /* The victim's mm lock is taken in find_victims; release it */
145                 if (pages_found >= pages_needed) {
146                         task_unlock(vtsk);
147                         continue;
148                 }
149
150                 pages_found += victim->size;
151                 nr_to_kill++;
152         }
153
154         return nr_to_kill;
155 }
156
157 static void scan_and_kill(unsigned long pages_needed)
158 {
159         int i, nr_to_kill = 0, nr_victims = 0;
160         unsigned long pages_found = 0;
161
162         /*
163          * Hold the tasklist lock so tasks don't disappear while scanning. This
164          * is preferred to holding an RCU read lock so that the list of tasks
165          * is guaranteed to be up to date.
166          */
167         read_lock(&tasklist_lock);
168         for (i = 0; i < ARRAY_SIZE(adj_prio); i++) {
169                 pages_found += find_victims(victims, &nr_victims, MAX_VICTIMS,
170                                             adj_prio[i]);
171                 if (pages_found >= pages_needed || nr_victims == MAX_VICTIMS)
172                         break;
173         }
174         read_unlock(&tasklist_lock);
175
176         /* Pretty unlikely but it can happen */
177         if (unlikely(!nr_victims))
178                 return;
179
180         /* First round of victim processing to weed out unneeded victims */
181         nr_to_kill = process_victims(victims, nr_victims, pages_needed);
182
183         /*
184          * Try to kill as few of the chosen victims as possible by sorting the
185          * chosen victims by size, which means larger victims that have a lower
186          * adj can be killed in place of smaller victims with a high adj.
187          */
188         sort(victims, nr_to_kill, sizeof(*victims), victim_size_cmp, NULL);
189
190         /* Second round of victim processing to finally select the victims */
191         nr_to_kill = process_victims(victims, nr_to_kill, pages_needed);
192
193         /* Kill the victims */
194         atomic_set_release(&victims_to_kill, nr_to_kill);
195         for (i = 0; i < nr_to_kill; i++) {
196                 struct victim_info *victim = &victims[i];
197                 struct task_struct *vtsk = victim->tsk;
198
199                 pr_info("Killing %s with adj %d to free %lu KiB\n", vtsk->comm,
200                         vtsk->signal->oom_score_adj,
201                         victim->size << (PAGE_SHIFT - 10));
202
203                 /* Send kill signal to the victim */
204                 send_sig(SIGKILL, vtsk, 0);
205
206                 task_unlock(vtsk);
207         }
208
209         /* Wait until all the victims die */
210         wait_for_completion(&reclaim_done);
211 }
212
213 static int simple_lmk_reclaim_thread(void *data)
214 {
215         static const struct sched_param sched_max_rt_prio = {
216                 .sched_priority = MAX_RT_PRIO - 1
217         };
218
219         sched_setscheduler_nocheck(current, SCHED_FIFO, &sched_max_rt_prio);
220
221         while (1) {
222                 wait_event(oom_waitq, atomic_add_unless(&needs_reclaim, -1, 0));
223                 scan_and_kill(MIN_FREE_PAGES);
224         }
225
226         return 0;
227 }
228
229 void simple_lmk_decide_reclaim(int kswapd_priority)
230 {
231         if (kswapd_priority == CONFIG_ANDROID_SIMPLE_LMK_AGGRESSION) {
232                 int v, v1;
233
234                 for (v = 0;; v = v1) {
235                         v1 = atomic_cmpxchg(&needs_reclaim, v, v + 1);
236                         if (likely(v1 == v)) {
237                                 if (!v)
238                                         wake_up(&oom_waitq);
239                                 break;
240                         }
241                 }
242         }
243 }
244
245 void simple_lmk_mm_freed(struct mm_struct *mm)
246 {
247         static atomic_t nr_killed = ATOMIC_INIT(0);
248         int i, nr_to_kill;
249
250         nr_to_kill = atomic_read_acquire(&victims_to_kill);
251         for (i = 0; i < nr_to_kill; i++) {
252                 if (cmpxchg(&victims[i].mm, mm, NULL) == mm) {
253                         if (atomic_inc_return(&nr_killed) == nr_to_kill) {
254                                 atomic_set(&victims_to_kill, 0);
255                                 nr_killed = (atomic_t)ATOMIC_INIT(0);
256                                 complete(&reclaim_done);
257                         }
258                         break;
259                 }
260         }
261 }
262
263 /* Initialize Simple LMK when lmkd in Android writes to the minfree parameter */
264 static int simple_lmk_init_set(const char *val, const struct kernel_param *kp)
265 {
266         static atomic_t init_done = ATOMIC_INIT(0);
267         struct task_struct *thread;
268
269         if (atomic_cmpxchg(&init_done, 0, 1))
270                 return 0;
271
272         thread = kthread_run_perf_critical(simple_lmk_reclaim_thread, NULL,
273                                            "simple_lmkd");
274         BUG_ON(IS_ERR(thread));
275
276         return 0;
277 }
278
279 static const struct kernel_param_ops simple_lmk_init_ops = {
280         .set = simple_lmk_init_set
281 };
282
283 /* Needed to prevent Android from thinking there's no LMK and thus rebooting */
284 #undef MODULE_PARAM_PREFIX
285 #define MODULE_PARAM_PREFIX "lowmemorykiller."
286 module_param_cb(minfree, &simple_lmk_init_ops, NULL, 0200);