OSDN Git Service

Merge "diag: Prevent out-of-bound access while processing dci transaction"
[sagit-ice-cold/kernel_xiaomi_msm8998.git] / fs / f2fs / segment.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fs/f2fs/segment.c
4  *
5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6  *             http://www.samsung.com/
7  */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/bio.h>
11 #include <linux/blkdev.h>
12 #include <linux/prefetch.h>
13 #include <linux/kthread.h>
14 #include <linux/swap.h>
15 #include <linux/timer.h>
16 #include <linux/freezer.h>
17 #include <linux/sched.h>
18
19 #include "f2fs.h"
20 #include "segment.h"
21 #include "node.h"
22 #include "gc.h"
23 #include "trace.h"
24 #include <trace/events/f2fs.h>
25
26 #define __reverse_ffz(x) __reverse_ffs(~(x))
27
28 static struct kmem_cache *discard_entry_slab;
29 static struct kmem_cache *discard_cmd_slab;
30 static struct kmem_cache *sit_entry_set_slab;
31 static struct kmem_cache *inmem_entry_slab;
32
33 static unsigned long __reverse_ulong(unsigned char *str)
34 {
35         unsigned long tmp = 0;
36         int shift = 24, idx = 0;
37
38 #if BITS_PER_LONG == 64
39         shift = 56;
40 #endif
41         while (shift >= 0) {
42                 tmp |= (unsigned long)str[idx++] << shift;
43                 shift -= BITS_PER_BYTE;
44         }
45         return tmp;
46 }
47
48 /*
49  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
50  * MSB and LSB are reversed in a byte by f2fs_set_bit.
51  */
52 static inline unsigned long __reverse_ffs(unsigned long word)
53 {
54         int num = 0;
55
56 #if BITS_PER_LONG == 64
57         if ((word & 0xffffffff00000000UL) == 0)
58                 num += 32;
59         else
60                 word >>= 32;
61 #endif
62         if ((word & 0xffff0000) == 0)
63                 num += 16;
64         else
65                 word >>= 16;
66
67         if ((word & 0xff00) == 0)
68                 num += 8;
69         else
70                 word >>= 8;
71
72         if ((word & 0xf0) == 0)
73                 num += 4;
74         else
75                 word >>= 4;
76
77         if ((word & 0xc) == 0)
78                 num += 2;
79         else
80                 word >>= 2;
81
82         if ((word & 0x2) == 0)
83                 num += 1;
84         return num;
85 }
86
87 /*
88  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
89  * f2fs_set_bit makes MSB and LSB reversed in a byte.
90  * @size must be integral times of unsigned long.
91  * Example:
92  *                             MSB <--> LSB
93  *   f2fs_set_bit(0, bitmap) => 1000 0000
94  *   f2fs_set_bit(7, bitmap) => 0000 0001
95  */
96 static unsigned long __find_rev_next_bit(const unsigned long *addr,
97                         unsigned long size, unsigned long offset)
98 {
99         const unsigned long *p = addr + BIT_WORD(offset);
100         unsigned long result = size;
101         unsigned long tmp;
102
103         if (offset >= size)
104                 return size;
105
106         size -= (offset & ~(BITS_PER_LONG - 1));
107         offset %= BITS_PER_LONG;
108
109         while (1) {
110                 if (*p == 0)
111                         goto pass;
112
113                 tmp = __reverse_ulong((unsigned char *)p);
114
115                 tmp &= ~0UL >> offset;
116                 if (size < BITS_PER_LONG)
117                         tmp &= (~0UL << (BITS_PER_LONG - size));
118                 if (tmp)
119                         goto found;
120 pass:
121                 if (size <= BITS_PER_LONG)
122                         break;
123                 size -= BITS_PER_LONG;
124                 offset = 0;
125                 p++;
126         }
127         return result;
128 found:
129         return result - size + __reverse_ffs(tmp);
130 }
131
132 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
133                         unsigned long size, unsigned long offset)
134 {
135         const unsigned long *p = addr + BIT_WORD(offset);
136         unsigned long result = size;
137         unsigned long tmp;
138
139         if (offset >= size)
140                 return size;
141
142         size -= (offset & ~(BITS_PER_LONG - 1));
143         offset %= BITS_PER_LONG;
144
145         while (1) {
146                 if (*p == ~0UL)
147                         goto pass;
148
149                 tmp = __reverse_ulong((unsigned char *)p);
150
151                 if (offset)
152                         tmp |= ~0UL << (BITS_PER_LONG - offset);
153                 if (size < BITS_PER_LONG)
154                         tmp |= ~0UL >> size;
155                 if (tmp != ~0UL)
156                         goto found;
157 pass:
158                 if (size <= BITS_PER_LONG)
159                         break;
160                 size -= BITS_PER_LONG;
161                 offset = 0;
162                 p++;
163         }
164         return result;
165 found:
166         return result - size + __reverse_ffz(tmp);
167 }
168
169 bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
170 {
171         int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
172         int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
173         int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
174
175         if (test_opt(sbi, LFS))
176                 return false;
177         if (sbi->gc_mode == GC_URGENT)
178                 return true;
179         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
180                 return true;
181
182         return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
183                         SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
184 }
185
186 void f2fs_register_inmem_page(struct inode *inode, struct page *page)
187 {
188         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
189         struct f2fs_inode_info *fi = F2FS_I(inode);
190         struct inmem_pages *new;
191
192         f2fs_trace_pid(page);
193
194         f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
195
196         new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
197
198         /* add atomic page indices to the list */
199         new->page = page;
200         INIT_LIST_HEAD(&new->list);
201
202         /* increase reference count with clean state */
203         mutex_lock(&fi->inmem_lock);
204         get_page(page);
205         list_add_tail(&new->list, &fi->inmem_pages);
206         spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
207         if (list_empty(&fi->inmem_ilist))
208                 list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
209         spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
210         inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
211         mutex_unlock(&fi->inmem_lock);
212
213         trace_f2fs_register_inmem_page(page, INMEM);
214 }
215
216 static int __revoke_inmem_pages(struct inode *inode,
217                                 struct list_head *head, bool drop, bool recover,
218                                 bool trylock)
219 {
220         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
221         struct inmem_pages *cur, *tmp;
222         int err = 0;
223
224         list_for_each_entry_safe(cur, tmp, head, list) {
225                 struct page *page = cur->page;
226
227                 if (drop)
228                         trace_f2fs_commit_inmem_page(page, INMEM_DROP);
229
230                 if (trylock) {
231                         /*
232                          * to avoid deadlock in between page lock and
233                          * inmem_lock.
234                          */
235                         if (!trylock_page(page))
236                                 continue;
237                 } else {
238                         lock_page(page);
239                 }
240
241                 f2fs_wait_on_page_writeback(page, DATA, true, true);
242
243                 if (recover) {
244                         struct dnode_of_data dn;
245                         struct node_info ni;
246
247                         trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
248 retry:
249                         set_new_dnode(&dn, inode, NULL, NULL, 0);
250                         err = f2fs_get_dnode_of_data(&dn, page->index,
251                                                                 LOOKUP_NODE);
252                         if (err) {
253                                 if (err == -ENOMEM) {
254                                         congestion_wait(BLK_RW_ASYNC, HZ/50);
255                                         cond_resched();
256                                         goto retry;
257                                 }
258                                 err = -EAGAIN;
259                                 goto next;
260                         }
261
262                         err = f2fs_get_node_info(sbi, dn.nid, &ni);
263                         if (err) {
264                                 f2fs_put_dnode(&dn);
265                                 return err;
266                         }
267
268                         if (cur->old_addr == NEW_ADDR) {
269                                 f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
270                                 f2fs_update_data_blkaddr(&dn, NEW_ADDR);
271                         } else
272                                 f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
273                                         cur->old_addr, ni.version, true, true);
274                         f2fs_put_dnode(&dn);
275                 }
276 next:
277                 /* we don't need to invalidate this in the sccessful status */
278                 if (drop || recover) {
279                         ClearPageUptodate(page);
280                         clear_cold_data(page);
281                 }
282                 f2fs_clear_page_private(page);
283                 f2fs_put_page(page, 1);
284
285                 list_del(&cur->list);
286                 kmem_cache_free(inmem_entry_slab, cur);
287                 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
288         }
289         return err;
290 }
291
292 void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
293 {
294         struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
295         struct inode *inode;
296         struct f2fs_inode_info *fi;
297 next:
298         spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
299         if (list_empty(head)) {
300                 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
301                 return;
302         }
303         fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
304         inode = igrab(&fi->vfs_inode);
305         spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
306
307         if (inode) {
308                 if (gc_failure) {
309                         if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
310                                 goto drop;
311                         goto skip;
312                 }
313 drop:
314                 set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
315                 f2fs_drop_inmem_pages(inode);
316                 iput(inode);
317         }
318 skip:
319         congestion_wait(BLK_RW_ASYNC, HZ/50);
320         cond_resched();
321         goto next;
322 }
323
324 void f2fs_drop_inmem_pages(struct inode *inode)
325 {
326         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
327         struct f2fs_inode_info *fi = F2FS_I(inode);
328
329         while (!list_empty(&fi->inmem_pages)) {
330                 mutex_lock(&fi->inmem_lock);
331                 __revoke_inmem_pages(inode, &fi->inmem_pages,
332                                                 true, false, true);
333
334                 if (list_empty(&fi->inmem_pages)) {
335                         spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
336                         if (!list_empty(&fi->inmem_ilist))
337                                 list_del_init(&fi->inmem_ilist);
338                         spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
339                 }
340                 mutex_unlock(&fi->inmem_lock);
341         }
342
343         clear_inode_flag(inode, FI_ATOMIC_FILE);
344         fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
345         stat_dec_atomic_write(inode);
346 }
347
348 void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
349 {
350         struct f2fs_inode_info *fi = F2FS_I(inode);
351         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
352         struct list_head *head = &fi->inmem_pages;
353         struct inmem_pages *cur = NULL;
354
355         f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page));
356
357         mutex_lock(&fi->inmem_lock);
358         list_for_each_entry(cur, head, list) {
359                 if (cur->page == page)
360                         break;
361         }
362
363         f2fs_bug_on(sbi, list_empty(head) || cur->page != page);
364         list_del(&cur->list);
365         mutex_unlock(&fi->inmem_lock);
366
367         dec_page_count(sbi, F2FS_INMEM_PAGES);
368         kmem_cache_free(inmem_entry_slab, cur);
369
370         ClearPageUptodate(page);
371         f2fs_clear_page_private(page);
372         f2fs_put_page(page, 0);
373
374         trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
375 }
376
377 static int __f2fs_commit_inmem_pages(struct inode *inode)
378 {
379         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
380         struct f2fs_inode_info *fi = F2FS_I(inode);
381         struct inmem_pages *cur, *tmp;
382         struct f2fs_io_info fio = {
383                 .sbi = sbi,
384                 .ino = inode->i_ino,
385                 .type = DATA,
386                 .op = REQ_OP_WRITE,
387                 .op_flags = REQ_SYNC | REQ_PRIO,
388                 .io_type = FS_DATA_IO,
389         };
390         struct list_head revoke_list;
391         bool submit_bio = false;
392         int err = 0;
393
394         INIT_LIST_HEAD(&revoke_list);
395
396         list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
397                 struct page *page = cur->page;
398
399                 lock_page(page);
400                 if (page->mapping == inode->i_mapping) {
401                         trace_f2fs_commit_inmem_page(page, INMEM);
402
403                         f2fs_wait_on_page_writeback(page, DATA, true, true);
404
405                         set_page_dirty(page);
406                         if (clear_page_dirty_for_io(page)) {
407                                 inode_dec_dirty_pages(inode);
408                                 f2fs_remove_dirty_inode(inode);
409                         }
410 retry:
411                         fio.page = page;
412                         fio.old_blkaddr = NULL_ADDR;
413                         fio.encrypted_page = NULL;
414                         fio.need_lock = LOCK_DONE;
415                         err = f2fs_do_write_data_page(&fio);
416                         if (err) {
417                                 if (err == -ENOMEM) {
418                                         congestion_wait(BLK_RW_ASYNC, HZ/50);
419                                         cond_resched();
420                                         goto retry;
421                                 }
422                                 unlock_page(page);
423                                 break;
424                         }
425                         /* record old blkaddr for revoking */
426                         cur->old_addr = fio.old_blkaddr;
427                         submit_bio = true;
428                 }
429                 unlock_page(page);
430                 list_move_tail(&cur->list, &revoke_list);
431         }
432
433         if (submit_bio)
434                 f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
435
436         if (err) {
437                 /*
438                  * try to revoke all committed pages, but still we could fail
439                  * due to no memory or other reason, if that happened, EAGAIN
440                  * will be returned, which means in such case, transaction is
441                  * already not integrity, caller should use journal to do the
442                  * recovery or rewrite & commit last transaction. For other
443                  * error number, revoking was done by filesystem itself.
444                  */
445                 err = __revoke_inmem_pages(inode, &revoke_list,
446                                                 false, true, false);
447
448                 /* drop all uncommitted pages */
449                 __revoke_inmem_pages(inode, &fi->inmem_pages,
450                                                 true, false, false);
451         } else {
452                 __revoke_inmem_pages(inode, &revoke_list,
453                                                 false, false, false);
454         }
455
456         return err;
457 }
458
459 int f2fs_commit_inmem_pages(struct inode *inode)
460 {
461         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
462         struct f2fs_inode_info *fi = F2FS_I(inode);
463         int err;
464
465         f2fs_balance_fs(sbi, true);
466
467         down_write(&fi->i_gc_rwsem[WRITE]);
468
469         f2fs_lock_op(sbi);
470         set_inode_flag(inode, FI_ATOMIC_COMMIT);
471
472         mutex_lock(&fi->inmem_lock);
473         err = __f2fs_commit_inmem_pages(inode);
474
475         spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
476         if (!list_empty(&fi->inmem_ilist))
477                 list_del_init(&fi->inmem_ilist);
478         spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
479         mutex_unlock(&fi->inmem_lock);
480
481         clear_inode_flag(inode, FI_ATOMIC_COMMIT);
482
483         f2fs_unlock_op(sbi);
484         up_write(&fi->i_gc_rwsem[WRITE]);
485
486         return err;
487 }
488
489 /*
490  * This function balances dirty node and dentry pages.
491  * In addition, it controls garbage collection.
492  */
493 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
494 {
495         if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
496                 f2fs_show_injection_info(FAULT_CHECKPOINT);
497                 f2fs_stop_checkpoint(sbi, false);
498         }
499
500         /* balance_fs_bg is able to be pending */
501         if (need && excess_cached_nats(sbi))
502                 f2fs_balance_fs_bg(sbi);
503
504         if (f2fs_is_checkpoint_ready(sbi))
505                 return;
506
507         /*
508          * We should do GC or end up with checkpoint, if there are so many dirty
509          * dir/node pages without enough free segments.
510          */
511         if (has_not_enough_free_secs(sbi, 0, 0)) {
512                 mutex_lock(&sbi->gc_mutex);
513                 f2fs_gc(sbi, false, false, NULL_SEGNO);
514         }
515 }
516
517 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
518 {
519         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
520                 return;
521
522         /* try to shrink extent cache when there is no enough memory */
523         if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
524                 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
525
526         /* check the # of cached NAT entries */
527         if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
528                 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
529
530         if (!f2fs_available_free_memory(sbi, FREE_NIDS))
531                 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
532         else
533                 f2fs_build_free_nids(sbi, false, false);
534
535         if (!is_idle(sbi, REQ_TIME) &&
536                 (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
537                 return;
538
539         /* checkpoint is the only way to shrink partial cached entries */
540         if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
541                         !f2fs_available_free_memory(sbi, INO_ENTRIES) ||
542                         excess_prefree_segs(sbi) ||
543                         excess_dirty_nats(sbi) ||
544                         excess_dirty_nodes(sbi) ||
545                         f2fs_time_over(sbi, CP_TIME)) {
546                 if (test_opt(sbi, DATA_FLUSH)) {
547                         struct blk_plug plug;
548
549                         blk_start_plug(&plug);
550                         f2fs_sync_dirty_inodes(sbi, FILE_INODE);
551                         blk_finish_plug(&plug);
552                 }
553                 f2fs_sync_fs(sbi->sb, true);
554                 stat_inc_bg_cp_count(sbi->stat_info);
555         }
556 }
557
558 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
559                                 struct block_device *bdev)
560 {
561         struct bio *bio;
562         int ret;
563
564         bio = f2fs_bio_alloc(sbi, 0, false);
565         if (!bio)
566                 return -ENOMEM;
567
568         bio->bi_rw = REQ_OP_WRITE;
569         bio->bi_bdev = bdev;
570         ret = submit_bio_wait(WRITE_FLUSH, bio);
571         bio_put(bio);
572
573         trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
574                                 test_opt(sbi, FLUSH_MERGE), ret);
575         return ret;
576 }
577
578 static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
579 {
580         int ret = 0;
581         int i;
582
583         if (!f2fs_is_multi_device(sbi))
584                 return __submit_flush_wait(sbi, sbi->sb->s_bdev);
585
586         for (i = 0; i < sbi->s_ndevs; i++) {
587                 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
588                         continue;
589                 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
590                 if (ret)
591                         break;
592         }
593         return ret;
594 }
595
596 static int issue_flush_thread(void *data)
597 {
598         struct f2fs_sb_info *sbi = data;
599         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
600         wait_queue_head_t *q = &fcc->flush_wait_queue;
601 repeat:
602         if (kthread_should_stop())
603                 return 0;
604
605         sb_start_intwrite(sbi->sb);
606
607         if (!llist_empty(&fcc->issue_list)) {
608                 struct flush_cmd *cmd, *next;
609                 int ret;
610
611                 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
612                 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
613
614                 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
615
616                 ret = submit_flush_wait(sbi, cmd->ino);
617                 atomic_inc(&fcc->issued_flush);
618
619                 llist_for_each_entry_safe(cmd, next,
620                                           fcc->dispatch_list, llnode) {
621                         cmd->ret = ret;
622                         complete(&cmd->wait);
623                 }
624                 fcc->dispatch_list = NULL;
625         }
626
627         sb_end_intwrite(sbi->sb);
628
629         wait_event_interruptible(*q,
630                 kthread_should_stop() || !llist_empty(&fcc->issue_list));
631         goto repeat;
632 }
633
634 int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
635 {
636         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
637         struct flush_cmd cmd;
638         int ret;
639
640         if (test_opt(sbi, NOBARRIER))
641                 return 0;
642
643         if (!test_opt(sbi, FLUSH_MERGE)) {
644                 atomic_inc(&fcc->queued_flush);
645                 ret = submit_flush_wait(sbi, ino);
646                 atomic_dec(&fcc->queued_flush);
647                 atomic_inc(&fcc->issued_flush);
648                 return ret;
649         }
650
651         if (atomic_inc_return(&fcc->queued_flush) == 1 ||
652             f2fs_is_multi_device(sbi)) {
653                 ret = submit_flush_wait(sbi, ino);
654                 atomic_dec(&fcc->queued_flush);
655
656                 atomic_inc(&fcc->issued_flush);
657                 return ret;
658         }
659
660         cmd.ino = ino;
661         init_completion(&cmd.wait);
662
663         llist_add(&cmd.llnode, &fcc->issue_list);
664
665         /* update issue_list before we wake up issue_flush thread */
666         smp_mb();
667
668         if (waitqueue_active(&fcc->flush_wait_queue))
669                 wake_up(&fcc->flush_wait_queue);
670
671         if (fcc->f2fs_issue_flush) {
672                 wait_for_completion(&cmd.wait);
673                 atomic_dec(&fcc->queued_flush);
674         } else {
675                 struct llist_node *list;
676
677                 list = llist_del_all(&fcc->issue_list);
678                 if (!list) {
679                         wait_for_completion(&cmd.wait);
680                         atomic_dec(&fcc->queued_flush);
681                 } else {
682                         struct flush_cmd *tmp, *next;
683
684                         ret = submit_flush_wait(sbi, ino);
685
686                         llist_for_each_entry_safe(tmp, next, list, llnode) {
687                                 if (tmp == &cmd) {
688                                         cmd.ret = ret;
689                                         atomic_dec(&fcc->queued_flush);
690                                         continue;
691                                 }
692                                 tmp->ret = ret;
693                                 complete(&tmp->wait);
694                         }
695                 }
696         }
697
698         return cmd.ret;
699 }
700
701 int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
702 {
703         dev_t dev = sbi->sb->s_bdev->bd_dev;
704         struct flush_cmd_control *fcc;
705         int err = 0;
706
707         if (SM_I(sbi)->fcc_info) {
708                 fcc = SM_I(sbi)->fcc_info;
709                 if (fcc->f2fs_issue_flush)
710                         return err;
711                 goto init_thread;
712         }
713
714         fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
715         if (!fcc)
716                 return -ENOMEM;
717         atomic_set(&fcc->issued_flush, 0);
718         atomic_set(&fcc->queued_flush, 0);
719         init_waitqueue_head(&fcc->flush_wait_queue);
720         init_llist_head(&fcc->issue_list);
721         SM_I(sbi)->fcc_info = fcc;
722         if (!test_opt(sbi, FLUSH_MERGE))
723                 return err;
724
725 init_thread:
726         fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
727                                 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
728         if (IS_ERR(fcc->f2fs_issue_flush)) {
729                 err = PTR_ERR(fcc->f2fs_issue_flush);
730                 kvfree(fcc);
731                 SM_I(sbi)->fcc_info = NULL;
732                 return err;
733         }
734
735         return err;
736 }
737
738 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
739 {
740         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
741
742         if (fcc && fcc->f2fs_issue_flush) {
743                 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
744
745                 fcc->f2fs_issue_flush = NULL;
746                 kthread_stop(flush_thread);
747         }
748         if (free) {
749                 kvfree(fcc);
750                 SM_I(sbi)->fcc_info = NULL;
751         }
752 }
753
754 int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
755 {
756         int ret = 0, i;
757
758         if (!f2fs_is_multi_device(sbi))
759                 return 0;
760
761         for (i = 1; i < sbi->s_ndevs; i++) {
762                 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
763                         continue;
764                 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
765                 if (ret)
766                         break;
767
768                 spin_lock(&sbi->dev_lock);
769                 f2fs_clear_bit(i, (char *)&sbi->dirty_device);
770                 spin_unlock(&sbi->dev_lock);
771         }
772
773         return ret;
774 }
775
776 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
777                 enum dirty_type dirty_type)
778 {
779         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
780
781         /* need not be added */
782         if (IS_CURSEG(sbi, segno))
783                 return;
784
785         if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
786                 dirty_i->nr_dirty[dirty_type]++;
787
788         if (dirty_type == DIRTY) {
789                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
790                 enum dirty_type t = sentry->type;
791
792                 if (unlikely(t >= DIRTY)) {
793                         f2fs_bug_on(sbi, 1);
794                         return;
795                 }
796                 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
797                         dirty_i->nr_dirty[t]++;
798         }
799 }
800
801 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
802                 enum dirty_type dirty_type)
803 {
804         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
805
806         if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
807                 dirty_i->nr_dirty[dirty_type]--;
808
809         if (dirty_type == DIRTY) {
810                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
811                 enum dirty_type t = sentry->type;
812
813                 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
814                         dirty_i->nr_dirty[t]--;
815
816                 if (get_valid_blocks(sbi, segno, true) == 0)
817                         clear_bit(GET_SEC_FROM_SEG(sbi, segno),
818                                                 dirty_i->victim_secmap);
819         }
820 }
821
822 /*
823  * Should not occur error such as -ENOMEM.
824  * Adding dirty entry into seglist is not critical operation.
825  * If a given segment is one of current working segments, it won't be added.
826  */
827 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
828 {
829         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
830         unsigned short valid_blocks, ckpt_valid_blocks;
831
832         if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
833                 return;
834
835         mutex_lock(&dirty_i->seglist_lock);
836
837         valid_blocks = get_valid_blocks(sbi, segno, false);
838         ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
839
840         if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
841                                 ckpt_valid_blocks == sbi->blocks_per_seg)) {
842                 __locate_dirty_segment(sbi, segno, PRE);
843                 __remove_dirty_segment(sbi, segno, DIRTY);
844         } else if (valid_blocks < sbi->blocks_per_seg) {
845                 __locate_dirty_segment(sbi, segno, DIRTY);
846         } else {
847                 /* Recovery routine with SSR needs this */
848                 __remove_dirty_segment(sbi, segno, DIRTY);
849         }
850
851         mutex_unlock(&dirty_i->seglist_lock);
852 }
853
854 /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
855 void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
856 {
857         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
858         unsigned int segno;
859
860         mutex_lock(&dirty_i->seglist_lock);
861         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
862                 if (get_valid_blocks(sbi, segno, false))
863                         continue;
864                 if (IS_CURSEG(sbi, segno))
865                         continue;
866                 __locate_dirty_segment(sbi, segno, PRE);
867                 __remove_dirty_segment(sbi, segno, DIRTY);
868         }
869         mutex_unlock(&dirty_i->seglist_lock);
870 }
871
872 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
873 {
874         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
875         block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
876         block_t holes[2] = {0, 0};      /* DATA and NODE */
877         struct seg_entry *se;
878         unsigned int segno;
879
880         mutex_lock(&dirty_i->seglist_lock);
881         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
882                 se = get_seg_entry(sbi, segno);
883                 if (IS_NODESEG(se->type))
884                         holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
885                 else
886                         holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
887         }
888         mutex_unlock(&dirty_i->seglist_lock);
889
890         if (holes[DATA] > ovp || holes[NODE] > ovp)
891                 return -EAGAIN;
892         if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
893                 dirty_segments(sbi) > overprovision_segments(sbi))
894                 return -EAGAIN;
895         return 0;
896 }
897
898 /* This is only used by SBI_CP_DISABLED */
899 static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
900 {
901         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
902         unsigned int segno = 0;
903
904         mutex_lock(&dirty_i->seglist_lock);
905         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
906                 if (get_valid_blocks(sbi, segno, false))
907                         continue;
908                 if (get_ckpt_valid_blocks(sbi, segno))
909                         continue;
910                 mutex_unlock(&dirty_i->seglist_lock);
911                 return segno;
912         }
913         mutex_unlock(&dirty_i->seglist_lock);
914         return NULL_SEGNO;
915 }
916
917 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
918                 struct block_device *bdev, block_t lstart,
919                 block_t start, block_t len)
920 {
921         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
922         struct list_head *pend_list;
923         struct discard_cmd *dc;
924
925         f2fs_bug_on(sbi, !len);
926
927         pend_list = &dcc->pend_list[plist_idx(len)];
928
929         dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
930         INIT_LIST_HEAD(&dc->list);
931         dc->bdev = bdev;
932         dc->lstart = lstart;
933         dc->start = start;
934         dc->len = len;
935         dc->ref = 0;
936         dc->state = D_PREP;
937         dc->queued = 0;
938         dc->error = 0;
939         init_completion(&dc->wait);
940         list_add_tail(&dc->list, pend_list);
941         spin_lock_init(&dc->lock);
942         dc->bio_ref = 0;
943         atomic_inc(&dcc->discard_cmd_cnt);
944         dcc->undiscard_blks += len;
945
946         return dc;
947 }
948
949 static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
950                                 struct block_device *bdev, block_t lstart,
951                                 block_t start, block_t len,
952                                 struct rb_node *parent, struct rb_node **p)
953 {
954         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
955         struct discard_cmd *dc;
956
957         dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
958
959         rb_link_node(&dc->rb_node, parent, p);
960         rb_insert_color(&dc->rb_node, &dcc->root);
961
962         return dc;
963 }
964
965 static void __detach_discard_cmd(struct discard_cmd_control *dcc,
966                                                         struct discard_cmd *dc)
967 {
968         if (dc->state == D_DONE)
969                 atomic_sub(dc->queued, &dcc->queued_discard);
970
971         list_del(&dc->list);
972         rb_erase(&dc->rb_node, &dcc->root);
973         dcc->undiscard_blks -= dc->len;
974
975         kmem_cache_free(discard_cmd_slab, dc);
976
977         atomic_dec(&dcc->discard_cmd_cnt);
978 }
979
980 static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
981                                                         struct discard_cmd *dc)
982 {
983         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
984         unsigned long flags;
985
986         trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
987
988         spin_lock_irqsave(&dc->lock, flags);
989         if (dc->bio_ref) {
990                 spin_unlock_irqrestore(&dc->lock, flags);
991                 return;
992         }
993         spin_unlock_irqrestore(&dc->lock, flags);
994
995         f2fs_bug_on(sbi, dc->ref);
996
997         if (dc->error == -EOPNOTSUPP)
998                 dc->error = 0;
999
1000         if (dc->error)
1001                 printk_ratelimited(
1002                         "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d",
1003                         KERN_INFO, dc->lstart, dc->start, dc->len, dc->error);
1004         __detach_discard_cmd(dcc, dc);
1005 }
1006
1007 static void f2fs_submit_discard_endio(struct bio *bio)
1008 {
1009         struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
1010         unsigned long flags;
1011
1012         dc->error = bio->bi_error;
1013
1014         spin_lock_irqsave(&dc->lock, flags);
1015         dc->bio_ref--;
1016         if (!dc->bio_ref && dc->state == D_SUBMIT) {
1017                 dc->state = D_DONE;
1018                 complete_all(&dc->wait);
1019         }
1020         spin_unlock_irqrestore(&dc->lock, flags);
1021         bio_put(bio);
1022 }
1023
1024 /* copied from block/blk-lib.c in 4.10-rc1 */
1025 static int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
1026                 sector_t nr_sects, gfp_t gfp_mask, int flags,
1027                 struct bio **biop)
1028 {
1029         struct request_queue *q = bdev_get_queue(bdev);
1030         struct bio *bio = *biop;
1031         unsigned int granularity;
1032         int op = REQ_WRITE | REQ_DISCARD;
1033         int alignment;
1034         sector_t bs_mask;
1035
1036         if (!q)
1037                 return -ENXIO;
1038
1039         if (!blk_queue_discard(q))
1040                 return -EOPNOTSUPP;
1041
1042         if (flags & BLKDEV_DISCARD_SECURE) {
1043                 if (!blk_queue_secdiscard(q))
1044                         return -EOPNOTSUPP;
1045                 op |= REQ_SECURE;
1046         }
1047
1048         bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
1049         if ((sector | nr_sects) & bs_mask)
1050                 return -EINVAL;
1051
1052         /* Zero-sector (unknown) and one-sector granularities are the same.  */
1053         granularity = max(q->limits.discard_granularity >> 9, 1U);
1054         alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
1055
1056         while (nr_sects) {
1057                 unsigned int req_sects;
1058                 sector_t end_sect, tmp;
1059
1060                 /* Make sure bi_size doesn't overflow */
1061                 req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);
1062
1063                 /**
1064                  * If splitting a request, and the next starting sector would be
1065                  * misaligned, stop the discard at the previous aligned sector.
1066                  */
1067                 end_sect = sector + req_sects;
1068                 tmp = end_sect;
1069                 if (req_sects < nr_sects &&
1070                     sector_div(tmp, granularity) != alignment) {
1071                         end_sect = end_sect - alignment;
1072                         sector_div(end_sect, granularity);
1073                         end_sect = end_sect * granularity + alignment;
1074                         req_sects = end_sect - sector;
1075                 }
1076
1077                 if (bio) {
1078                         int ret = submit_bio_wait(op, bio);
1079                         bio_put(bio);
1080                         if (ret)
1081                                 return ret;
1082                 }
1083                 bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, 1);
1084                 bio->bi_iter.bi_sector = sector;
1085                 bio->bi_bdev = bdev;
1086                 bio_set_op_attrs(bio, op, 0);
1087
1088                 bio->bi_iter.bi_size = req_sects << 9;
1089                 nr_sects -= req_sects;
1090                 sector = end_sect;
1091
1092                 /*
1093                  * We can loop for a long time in here, if someone does
1094                  * full device discards (like mkfs). Be nice and allow
1095                  * us to schedule out to avoid softlocking if preempt
1096                  * is disabled.
1097                  */
1098                 cond_resched();
1099         }
1100
1101         *biop = bio;
1102         return 0;
1103 }
1104
1105 static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
1106                                 block_t start, block_t end)
1107 {
1108 #ifdef CONFIG_F2FS_CHECK_FS
1109         struct seg_entry *sentry;
1110         unsigned int segno;
1111         block_t blk = start;
1112         unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
1113         unsigned long *map;
1114
1115         while (blk < end) {
1116                 segno = GET_SEGNO(sbi, blk);
1117                 sentry = get_seg_entry(sbi, segno);
1118                 offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
1119
1120                 if (end < START_BLOCK(sbi, segno + 1))
1121                         size = GET_BLKOFF_FROM_SEG0(sbi, end);
1122                 else
1123                         size = max_blocks;
1124                 map = (unsigned long *)(sentry->cur_valid_map);
1125                 offset = __find_rev_next_bit(map, size, offset);
1126                 f2fs_bug_on(sbi, offset != size);
1127                 blk = START_BLOCK(sbi, segno + 1);
1128         }
1129 #endif
1130 }
1131
1132 static void __init_discard_policy(struct f2fs_sb_info *sbi,
1133                                 struct discard_policy *dpolicy,
1134                                 int discard_type, unsigned int granularity)
1135 {
1136         /* common policy */
1137         dpolicy->type = discard_type;
1138         dpolicy->sync = true;
1139         dpolicy->ordered = false;
1140         dpolicy->granularity = granularity;
1141
1142         dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
1143         dpolicy->io_aware_gran = MAX_PLIST_NUM;
1144         dpolicy->timeout = 0;
1145
1146         if (discard_type == DPOLICY_BG) {
1147                 dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1148                 dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1149                 dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1150                 dpolicy->io_aware = true;
1151                 dpolicy->sync = false;
1152                 dpolicy->ordered = true;
1153                 if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
1154                         dpolicy->granularity = 1;
1155                         dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1156                 }
1157         } else if (discard_type == DPOLICY_FORCE) {
1158                 dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1159                 dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1160                 dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1161                 dpolicy->io_aware = false;
1162         } else if (discard_type == DPOLICY_FSTRIM) {
1163                 dpolicy->io_aware = false;
1164         } else if (discard_type == DPOLICY_UMOUNT) {
1165                 dpolicy->max_requests = UINT_MAX;
1166                 dpolicy->io_aware = false;
1167                 /* we need to issue all to keep CP_TRIMMED_FLAG */
1168                 dpolicy->granularity = 1;
1169         }
1170 }
1171
1172 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1173                                 struct block_device *bdev, block_t lstart,
1174                                 block_t start, block_t len);
1175 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
1176 static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1177                                                 struct discard_policy *dpolicy,
1178                                                 struct discard_cmd *dc,
1179                                                 unsigned int *issued)
1180 {
1181         struct block_device *bdev = dc->bdev;
1182         struct request_queue *q = bdev_get_queue(bdev);
1183         unsigned int max_discard_blocks =
1184                         SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1185         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1186         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1187                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1188         int flag = dpolicy->sync ? REQ_SYNC : 0;
1189         block_t lstart, start, len, total_len;
1190         int err = 0;
1191
1192         if (dc->state != D_PREP)
1193                 return 0;
1194
1195         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1196                 return 0;
1197
1198         trace_f2fs_issue_discard(bdev, dc->start, dc->len);
1199
1200         lstart = dc->lstart;
1201         start = dc->start;
1202         len = dc->len;
1203         total_len = len;
1204
1205         dc->len = 0;
1206
1207         while (total_len && *issued < dpolicy->max_requests && !err) {
1208                 struct bio *bio = NULL;
1209                 unsigned long flags;
1210                 bool last = true;
1211
1212                 if (len > max_discard_blocks) {
1213                         len = max_discard_blocks;
1214                         last = false;
1215                 }
1216
1217                 (*issued)++;
1218                 if (*issued == dpolicy->max_requests)
1219                         last = true;
1220
1221                 dc->len += len;
1222
1223                 if (time_to_inject(sbi, FAULT_DISCARD)) {
1224                         f2fs_show_injection_info(FAULT_DISCARD);
1225                         err = -EIO;
1226                         goto submit;
1227                 }
1228                 err = __blkdev_issue_discard(bdev,
1229                                         SECTOR_FROM_BLOCK(start),
1230                                         SECTOR_FROM_BLOCK(len),
1231                                         GFP_NOFS, 0, &bio);
1232 submit:
1233                 if (err) {
1234                         spin_lock_irqsave(&dc->lock, flags);
1235                         if (dc->state == D_PARTIAL)
1236                                 dc->state = D_SUBMIT;
1237                         spin_unlock_irqrestore(&dc->lock, flags);
1238
1239                         break;
1240                 }
1241
1242                 f2fs_bug_on(sbi, !bio);
1243
1244                 /*
1245                  * should keep before submission to avoid D_DONE
1246                  * right away
1247                  */
1248                 spin_lock_irqsave(&dc->lock, flags);
1249                 if (last)
1250                         dc->state = D_SUBMIT;
1251                 else
1252                         dc->state = D_PARTIAL;
1253                 dc->bio_ref++;
1254                 spin_unlock_irqrestore(&dc->lock, flags);
1255
1256                 atomic_inc(&dcc->queued_discard);
1257                 dc->queued++;
1258                 list_move_tail(&dc->list, wait_list);
1259
1260                 /* sanity check on discard range */
1261                 __check_sit_bitmap(sbi, lstart, lstart + len);
1262
1263                 bio->bi_private = dc;
1264                 bio->bi_end_io = f2fs_submit_discard_endio;
1265                 submit_bio(flag, bio);
1266
1267                 atomic_inc(&dcc->issued_discard);
1268
1269                 f2fs_update_iostat(sbi, FS_DISCARD, 1);
1270
1271                 lstart += len;
1272                 start += len;
1273                 total_len -= len;
1274                 len = total_len;
1275         }
1276
1277         if (!err && len)
1278                 __update_discard_tree_range(sbi, bdev, lstart, start, len);
1279         return err;
1280 }
1281
1282 static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
1283                                 struct block_device *bdev, block_t lstart,
1284                                 block_t start, block_t len,
1285                                 struct rb_node **insert_p,
1286                                 struct rb_node *insert_parent)
1287 {
1288         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1289         struct rb_node **p;
1290         struct rb_node *parent = NULL;
1291         struct discard_cmd *dc = NULL;
1292
1293         if (insert_p && insert_parent) {
1294                 parent = insert_parent;
1295                 p = insert_p;
1296                 goto do_insert;
1297         }
1298
1299         p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart);
1300 do_insert:
1301         dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p);
1302         if (!dc)
1303                 return NULL;
1304
1305         return dc;
1306 }
1307
1308 static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1309                                                 struct discard_cmd *dc)
1310 {
1311         list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
1312 }
1313
1314 static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1315                                 struct discard_cmd *dc, block_t blkaddr)
1316 {
1317         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1318         struct discard_info di = dc->di;
1319         bool modified = false;
1320
1321         if (dc->state == D_DONE || dc->len == 1) {
1322                 __remove_discard_cmd(sbi, dc);
1323                 return;
1324         }
1325
1326         dcc->undiscard_blks -= di.len;
1327
1328         if (blkaddr > di.lstart) {
1329                 dc->len = blkaddr - dc->lstart;
1330                 dcc->undiscard_blks += dc->len;
1331                 __relocate_discard_cmd(dcc, dc);
1332                 modified = true;
1333         }
1334
1335         if (blkaddr < di.lstart + di.len - 1) {
1336                 if (modified) {
1337                         __insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
1338                                         di.start + blkaddr + 1 - di.lstart,
1339                                         di.lstart + di.len - 1 - blkaddr,
1340                                         NULL, NULL);
1341                 } else {
1342                         dc->lstart++;
1343                         dc->len--;
1344                         dc->start++;
1345                         dcc->undiscard_blks += dc->len;
1346                         __relocate_discard_cmd(dcc, dc);
1347                 }
1348         }
1349 }
1350
1351 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1352                                 struct block_device *bdev, block_t lstart,
1353                                 block_t start, block_t len)
1354 {
1355         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1356         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1357         struct discard_cmd *dc;
1358         struct discard_info di = {0};
1359         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1360         struct request_queue *q = bdev_get_queue(bdev);
1361         unsigned int max_discard_blocks =
1362                         SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1363         block_t end = lstart + len;
1364
1365         dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1366                                         NULL, lstart,
1367                                         (struct rb_entry **)&prev_dc,
1368                                         (struct rb_entry **)&next_dc,
1369                                         &insert_p, &insert_parent, true);
1370         if (dc)
1371                 prev_dc = dc;
1372
1373         if (!prev_dc) {
1374                 di.lstart = lstart;
1375                 di.len = next_dc ? next_dc->lstart - lstart : len;
1376                 di.len = min(di.len, len);
1377                 di.start = start;
1378         }
1379
1380         while (1) {
1381                 struct rb_node *node;
1382                 bool merged = false;
1383                 struct discard_cmd *tdc = NULL;
1384
1385                 if (prev_dc) {
1386                         di.lstart = prev_dc->lstart + prev_dc->len;
1387                         if (di.lstart < lstart)
1388                                 di.lstart = lstart;
1389                         if (di.lstart >= end)
1390                                 break;
1391
1392                         if (!next_dc || next_dc->lstart > end)
1393                                 di.len = end - di.lstart;
1394                         else
1395                                 di.len = next_dc->lstart - di.lstart;
1396                         di.start = start + di.lstart - lstart;
1397                 }
1398
1399                 if (!di.len)
1400                         goto next;
1401
1402                 if (prev_dc && prev_dc->state == D_PREP &&
1403                         prev_dc->bdev == bdev &&
1404                         __is_discard_back_mergeable(&di, &prev_dc->di,
1405                                                         max_discard_blocks)) {
1406                         prev_dc->di.len += di.len;
1407                         dcc->undiscard_blks += di.len;
1408                         __relocate_discard_cmd(dcc, prev_dc);
1409                         di = prev_dc->di;
1410                         tdc = prev_dc;
1411                         merged = true;
1412                 }
1413
1414                 if (next_dc && next_dc->state == D_PREP &&
1415                         next_dc->bdev == bdev &&
1416                         __is_discard_front_mergeable(&di, &next_dc->di,
1417                                                         max_discard_blocks)) {
1418                         next_dc->di.lstart = di.lstart;
1419                         next_dc->di.len += di.len;
1420                         next_dc->di.start = di.start;
1421                         dcc->undiscard_blks += di.len;
1422                         __relocate_discard_cmd(dcc, next_dc);
1423                         if (tdc)
1424                                 __remove_discard_cmd(sbi, tdc);
1425                         merged = true;
1426                 }
1427
1428                 if (!merged) {
1429                         __insert_discard_tree(sbi, bdev, di.lstart, di.start,
1430                                                         di.len, NULL, NULL);
1431                 }
1432  next:
1433                 prev_dc = next_dc;
1434                 if (!prev_dc)
1435                         break;
1436
1437                 node = rb_next(&prev_dc->rb_node);
1438                 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1439         }
1440 }
1441
1442 static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
1443                 struct block_device *bdev, block_t blkstart, block_t blklen)
1444 {
1445         block_t lblkstart = blkstart;
1446
1447         if (!f2fs_bdev_support_discard(bdev))
1448                 return 0;
1449
1450         trace_f2fs_queue_discard(bdev, blkstart, blklen);
1451
1452         if (f2fs_is_multi_device(sbi)) {
1453                 int devi = f2fs_target_device_index(sbi, blkstart);
1454
1455                 blkstart -= FDEV(devi).start_blk;
1456         }
1457         mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1458         __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1459         mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1460         return 0;
1461 }
1462
1463 static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1464                                         struct discard_policy *dpolicy)
1465 {
1466         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1467         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1468         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1469         struct discard_cmd *dc;
1470         struct blk_plug plug;
1471         unsigned int pos = dcc->next_pos;
1472         unsigned int issued = 0;
1473         bool io_interrupted = false;
1474
1475         mutex_lock(&dcc->cmd_lock);
1476         dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1477                                         NULL, pos,
1478                                         (struct rb_entry **)&prev_dc,
1479                                         (struct rb_entry **)&next_dc,
1480                                         &insert_p, &insert_parent, true);
1481         if (!dc)
1482                 dc = next_dc;
1483
1484         blk_start_plug(&plug);
1485
1486         while (dc) {
1487                 struct rb_node *node;
1488                 int err = 0;
1489
1490                 if (dc->state != D_PREP)
1491                         goto next;
1492
1493                 if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
1494                         io_interrupted = true;
1495                         break;
1496                 }
1497
1498                 dcc->next_pos = dc->lstart + dc->len;
1499                 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1500
1501                 if (issued >= dpolicy->max_requests)
1502                         break;
1503 next:
1504                 node = rb_next(&dc->rb_node);
1505                 if (err)
1506                         __remove_discard_cmd(sbi, dc);
1507                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1508         }
1509
1510         blk_finish_plug(&plug);
1511
1512         if (!dc)
1513                 dcc->next_pos = 0;
1514
1515         mutex_unlock(&dcc->cmd_lock);
1516
1517         if (!issued && io_interrupted)
1518                 issued = -1;
1519
1520         return issued;
1521 }
1522
1523 static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1524                                         struct discard_policy *dpolicy)
1525 {
1526         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1527         struct list_head *pend_list;
1528         struct discard_cmd *dc, *tmp;
1529         struct blk_plug plug;
1530         int i, issued = 0;
1531         bool io_interrupted = false;
1532
1533         if (dpolicy->timeout != 0)
1534                 f2fs_update_time(sbi, dpolicy->timeout);
1535
1536         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1537                 if (dpolicy->timeout != 0 &&
1538                                 f2fs_time_over(sbi, dpolicy->timeout))
1539                         break;
1540
1541                 if (i + 1 < dpolicy->granularity)
1542                         break;
1543
1544                 if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
1545                         return __issue_discard_cmd_orderly(sbi, dpolicy);
1546
1547                 pend_list = &dcc->pend_list[i];
1548
1549                 mutex_lock(&dcc->cmd_lock);
1550                 if (list_empty(pend_list))
1551                         goto next;
1552                 if (unlikely(dcc->rbtree_check))
1553                         f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
1554                                                                 &dcc->root));
1555                 blk_start_plug(&plug);
1556                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1557                         f2fs_bug_on(sbi, dc->state != D_PREP);
1558
1559                         if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1560                                                 !is_idle(sbi, DISCARD_TIME)) {
1561                                 io_interrupted = true;
1562                                 break;
1563                         }
1564
1565                         __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1566
1567                         if (issued >= dpolicy->max_requests)
1568                                 break;
1569                 }
1570                 blk_finish_plug(&plug);
1571 next:
1572                 mutex_unlock(&dcc->cmd_lock);
1573
1574                 if (issued >= dpolicy->max_requests || io_interrupted)
1575                         break;
1576         }
1577
1578         if (!issued && io_interrupted)
1579                 issued = -1;
1580
1581         return issued;
1582 }
1583
1584 static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1585 {
1586         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1587         struct list_head *pend_list;
1588         struct discard_cmd *dc, *tmp;
1589         int i;
1590         bool dropped = false;
1591
1592         mutex_lock(&dcc->cmd_lock);
1593         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1594                 pend_list = &dcc->pend_list[i];
1595                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1596                         f2fs_bug_on(sbi, dc->state != D_PREP);
1597                         __remove_discard_cmd(sbi, dc);
1598                         dropped = true;
1599                 }
1600         }
1601         mutex_unlock(&dcc->cmd_lock);
1602
1603         return dropped;
1604 }
1605
1606 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1607 {
1608         __drop_discard_cmd(sbi);
1609 }
1610
1611 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1612                                                         struct discard_cmd *dc)
1613 {
1614         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1615         unsigned int len = 0;
1616
1617         wait_for_completion_io(&dc->wait);
1618         mutex_lock(&dcc->cmd_lock);
1619         f2fs_bug_on(sbi, dc->state != D_DONE);
1620         dc->ref--;
1621         if (!dc->ref) {
1622                 if (!dc->error)
1623                         len = dc->len;
1624                 __remove_discard_cmd(sbi, dc);
1625         }
1626         mutex_unlock(&dcc->cmd_lock);
1627
1628         return len;
1629 }
1630
1631 static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1632                                                 struct discard_policy *dpolicy,
1633                                                 block_t start, block_t end)
1634 {
1635         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1636         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1637                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1638         struct discard_cmd *dc, *tmp;
1639         bool need_wait;
1640         unsigned int trimmed = 0;
1641
1642 next:
1643         need_wait = false;
1644
1645         mutex_lock(&dcc->cmd_lock);
1646         list_for_each_entry_safe(dc, tmp, wait_list, list) {
1647                 if (dc->lstart + dc->len <= start || end <= dc->lstart)
1648                         continue;
1649                 if (dc->len < dpolicy->granularity)
1650                         continue;
1651                 if (dc->state == D_DONE && !dc->ref) {
1652                         wait_for_completion_io(&dc->wait);
1653                         if (!dc->error)
1654                                 trimmed += dc->len;
1655                         __remove_discard_cmd(sbi, dc);
1656                 } else {
1657                         dc->ref++;
1658                         need_wait = true;
1659                         break;
1660                 }
1661         }
1662         mutex_unlock(&dcc->cmd_lock);
1663
1664         if (need_wait) {
1665                 trimmed += __wait_one_discard_bio(sbi, dc);
1666                 goto next;
1667         }
1668
1669         return trimmed;
1670 }
1671
1672 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1673                                                 struct discard_policy *dpolicy)
1674 {
1675         struct discard_policy dp;
1676         unsigned int discard_blks;
1677
1678         if (dpolicy)
1679                 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1680
1681         /* wait all */
1682         __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
1683         discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1684         __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
1685         discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1686
1687         return discard_blks;
1688 }
1689
1690 /* This should be covered by global mutex, &sit_i->sentry_lock */
1691 static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1692 {
1693         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1694         struct discard_cmd *dc;
1695         bool need_wait = false;
1696
1697         mutex_lock(&dcc->cmd_lock);
1698         dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
1699                                                         NULL, blkaddr);
1700         if (dc) {
1701                 if (dc->state == D_PREP) {
1702                         __punch_discard_cmd(sbi, dc, blkaddr);
1703                 } else {
1704                         dc->ref++;
1705                         need_wait = true;
1706                 }
1707         }
1708         mutex_unlock(&dcc->cmd_lock);
1709
1710         if (need_wait)
1711                 __wait_one_discard_bio(sbi, dc);
1712 }
1713
1714 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1715 {
1716         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1717
1718         if (dcc && dcc->f2fs_issue_discard) {
1719                 struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1720
1721                 dcc->f2fs_issue_discard = NULL;
1722                 kthread_stop(discard_thread);
1723         }
1724 }
1725
1726 /* This comes from f2fs_put_super */
1727 bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
1728 {
1729         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1730         struct discard_policy dpolicy;
1731         bool dropped;
1732
1733         __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
1734                                         dcc->discard_granularity);
1735         dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
1736         __issue_discard_cmd(sbi, &dpolicy);
1737         dropped = __drop_discard_cmd(sbi);
1738
1739         /* just to make sure there is no pending discard commands */
1740         __wait_all_discard_cmd(sbi, NULL);
1741
1742         f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1743         return dropped;
1744 }
1745
1746 static int issue_discard_thread(void *data)
1747 {
1748         struct f2fs_sb_info *sbi = data;
1749         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1750         wait_queue_head_t *q = &dcc->discard_wait_queue;
1751         struct discard_policy dpolicy;
1752         unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
1753         int issued;
1754
1755         set_freezable();
1756
1757         do {
1758                 __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
1759                                         dcc->discard_granularity);
1760
1761                 wait_event_interruptible_timeout(*q,
1762                                 kthread_should_stop() || freezing(current) ||
1763                                 dcc->discard_wake,
1764                                 msecs_to_jiffies(wait_ms));
1765
1766                 if (dcc->discard_wake)
1767                         dcc->discard_wake = 0;
1768
1769                 /* clean up pending candidates before going to sleep */
1770                 if (atomic_read(&dcc->queued_discard))
1771                         __wait_all_discard_cmd(sbi, NULL);
1772
1773                 if (try_to_freeze())
1774                         continue;
1775                 if (f2fs_readonly(sbi->sb))
1776                         continue;
1777                 if (kthread_should_stop())
1778                         return 0;
1779                 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
1780                         wait_ms = dpolicy.max_interval;
1781                         continue;
1782                 }
1783
1784                 if (sbi->gc_mode == GC_URGENT)
1785                         __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
1786
1787                 sb_start_intwrite(sbi->sb);
1788
1789                 issued = __issue_discard_cmd(sbi, &dpolicy);
1790                 if (issued > 0) {
1791                         __wait_all_discard_cmd(sbi, &dpolicy);
1792                         wait_ms = dpolicy.min_interval;
1793                 } else if (issued == -1){
1794                         wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
1795                         if (!wait_ms)
1796                                 wait_ms = dpolicy.mid_interval;
1797                 } else {
1798                         wait_ms = dpolicy.max_interval;
1799                 }
1800
1801                 sb_end_intwrite(sbi->sb);
1802
1803         } while (!kthread_should_stop());
1804         return 0;
1805 }
1806
1807 #ifdef CONFIG_BLK_DEV_ZONED
1808 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1809                 struct block_device *bdev, block_t blkstart, block_t blklen)
1810 {
1811         sector_t sector, nr_sects;
1812         block_t lblkstart = blkstart;
1813         int devi = 0;
1814
1815         if (f2fs_is_multi_device(sbi)) {
1816                 devi = f2fs_target_device_index(sbi, blkstart);
1817                 if (blkstart < FDEV(devi).start_blk ||
1818                     blkstart > FDEV(devi).end_blk) {
1819                         f2fs_msg(sbi->sb, KERN_ERR, "Invalid block %x",
1820                                  blkstart);
1821                         return -EIO;
1822                 }
1823                 blkstart -= FDEV(devi).start_blk;
1824         }
1825
1826         /* For sequential zones, reset the zone write pointer */
1827         if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
1828                 sector = SECTOR_FROM_BLOCK(blkstart);
1829                 nr_sects = SECTOR_FROM_BLOCK(blklen);
1830
1831                 if (sector & (bdev_zone_sectors(bdev) - 1) ||
1832                                 nr_sects != bdev_zone_sectors(bdev)) {
1833                         f2fs_msg(sbi->sb, KERN_ERR,
1834                                 "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
1835                                 devi, sbi->s_ndevs ? FDEV(devi).path: "",
1836                                 blkstart, blklen);
1837                         return -EIO;
1838                 }
1839                 trace_f2fs_issue_reset_zone(bdev, blkstart);
1840                 return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
1841         }
1842
1843         /* For conventional zones, use regular discard if supported */
1844         return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1845 }
1846 #endif
1847
1848 static int __issue_discard_async(struct f2fs_sb_info *sbi,
1849                 struct block_device *bdev, block_t blkstart, block_t blklen)
1850 {
1851 #ifdef CONFIG_BLK_DEV_ZONED
1852         if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
1853                 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
1854 #endif
1855         return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
1856 }
1857
1858 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
1859                                 block_t blkstart, block_t blklen)
1860 {
1861         sector_t start = blkstart, len = 0;
1862         struct block_device *bdev;
1863         struct seg_entry *se;
1864         unsigned int offset;
1865         block_t i;
1866         int err = 0;
1867
1868         bdev = f2fs_target_device(sbi, blkstart, NULL);
1869
1870         for (i = blkstart; i < blkstart + blklen; i++, len++) {
1871                 if (i != start) {
1872                         struct block_device *bdev2 =
1873                                 f2fs_target_device(sbi, i, NULL);
1874
1875                         if (bdev2 != bdev) {
1876                                 err = __issue_discard_async(sbi, bdev,
1877                                                 start, len);
1878                                 if (err)
1879                                         return err;
1880                                 bdev = bdev2;
1881                                 start = i;
1882                                 len = 0;
1883                         }
1884                 }
1885
1886                 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
1887                 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
1888
1889                 if (!f2fs_test_and_set_bit(offset, se->discard_map))
1890                         sbi->discard_blks--;
1891         }
1892
1893         if (len)
1894                 err = __issue_discard_async(sbi, bdev, start, len);
1895         return err;
1896 }
1897
1898 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
1899                                                         bool check_only)
1900 {
1901         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
1902         int max_blocks = sbi->blocks_per_seg;
1903         struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
1904         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
1905         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1906         unsigned long *discard_map = (unsigned long *)se->discard_map;
1907         unsigned long *dmap = SIT_I(sbi)->tmp_map;
1908         unsigned int start = 0, end = -1;
1909         bool force = (cpc->reason & CP_DISCARD);
1910         struct discard_entry *de = NULL;
1911         struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
1912         int i;
1913
1914         if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
1915                 return false;
1916
1917         if (!force) {
1918                 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
1919                         SM_I(sbi)->dcc_info->nr_discards >=
1920                                 SM_I(sbi)->dcc_info->max_discards)
1921                         return false;
1922         }
1923
1924         /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
1925         for (i = 0; i < entries; i++)
1926                 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
1927                                 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
1928
1929         while (force || SM_I(sbi)->dcc_info->nr_discards <=
1930                                 SM_I(sbi)->dcc_info->max_discards) {
1931                 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
1932                 if (start >= max_blocks)
1933                         break;
1934
1935                 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
1936                 if (force && start && end != max_blocks
1937                                         && (end - start) < cpc->trim_minlen)
1938                         continue;
1939
1940                 if (check_only)
1941                         return true;
1942
1943                 if (!de) {
1944                         de = f2fs_kmem_cache_alloc(discard_entry_slab,
1945                                                                 GFP_F2FS_ZERO);
1946                         de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
1947                         list_add_tail(&de->list, head);
1948                 }
1949
1950                 for (i = start; i < end; i++)
1951                         __set_bit_le(i, (void *)de->discard_map);
1952
1953                 SM_I(sbi)->dcc_info->nr_discards += end - start;
1954         }
1955         return false;
1956 }
1957
1958 static void release_discard_addr(struct discard_entry *entry)
1959 {
1960         list_del(&entry->list);
1961         kmem_cache_free(discard_entry_slab, entry);
1962 }
1963
1964 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
1965 {
1966         struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
1967         struct discard_entry *entry, *this;
1968
1969         /* drop caches */
1970         list_for_each_entry_safe(entry, this, head, list)
1971                 release_discard_addr(entry);
1972 }
1973
1974 /*
1975  * Should call f2fs_clear_prefree_segments after checkpoint is done.
1976  */
1977 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
1978 {
1979         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1980         unsigned int segno;
1981
1982         mutex_lock(&dirty_i->seglist_lock);
1983         for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
1984                 __set_test_and_free(sbi, segno);
1985         mutex_unlock(&dirty_i->seglist_lock);
1986 }
1987
1988 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
1989                                                 struct cp_control *cpc)
1990 {
1991         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1992         struct list_head *head = &dcc->entry_list;
1993         struct discard_entry *entry, *this;
1994         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1995         unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
1996         unsigned int start = 0, end = -1;
1997         unsigned int secno, start_segno;
1998         bool force = (cpc->reason & CP_DISCARD);
1999         bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
2000
2001         mutex_lock(&dirty_i->seglist_lock);
2002
2003         while (1) {
2004                 int i;
2005
2006                 if (need_align && end != -1)
2007                         end--;
2008                 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
2009                 if (start >= MAIN_SEGS(sbi))
2010                         break;
2011                 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
2012                                                                 start + 1);
2013
2014                 if (need_align) {
2015                         start = rounddown(start, sbi->segs_per_sec);
2016                         end = roundup(end, sbi->segs_per_sec);
2017                 }
2018
2019                 for (i = start; i < end; i++) {
2020                         if (test_and_clear_bit(i, prefree_map))
2021                                 dirty_i->nr_dirty[PRE]--;
2022                 }
2023
2024                 if (!f2fs_realtime_discard_enable(sbi))
2025                         continue;
2026
2027                 if (force && start >= cpc->trim_start &&
2028                                         (end - 1) <= cpc->trim_end)
2029                                 continue;
2030
2031                 if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) {
2032                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
2033                                 (end - start) << sbi->log_blocks_per_seg);
2034                         continue;
2035                 }
2036 next:
2037                 secno = GET_SEC_FROM_SEG(sbi, start);
2038                 start_segno = GET_SEG_FROM_SEC(sbi, secno);
2039                 if (!IS_CURSEC(sbi, secno) &&
2040                         !get_valid_blocks(sbi, start, true))
2041                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
2042                                 sbi->segs_per_sec << sbi->log_blocks_per_seg);
2043
2044                 start = start_segno + sbi->segs_per_sec;
2045                 if (start < end)
2046                         goto next;
2047                 else
2048                         end = start - 1;
2049         }
2050         mutex_unlock(&dirty_i->seglist_lock);
2051
2052         /* send small discards */
2053         list_for_each_entry_safe(entry, this, head, list) {
2054                 unsigned int cur_pos = 0, next_pos, len, total_len = 0;
2055                 bool is_valid = test_bit_le(0, entry->discard_map);
2056
2057 find_next:
2058                 if (is_valid) {
2059                         next_pos = find_next_zero_bit_le(entry->discard_map,
2060                                         sbi->blocks_per_seg, cur_pos);
2061                         len = next_pos - cur_pos;
2062
2063                         if (f2fs_sb_has_blkzoned(sbi) ||
2064                             (force && len < cpc->trim_minlen))
2065                                 goto skip;
2066
2067                         f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
2068                                                                         len);
2069                         total_len += len;
2070                 } else {
2071                         next_pos = find_next_bit_le(entry->discard_map,
2072                                         sbi->blocks_per_seg, cur_pos);
2073                 }
2074 skip:
2075                 cur_pos = next_pos;
2076                 is_valid = !is_valid;
2077
2078                 if (cur_pos < sbi->blocks_per_seg)
2079                         goto find_next;
2080
2081                 release_discard_addr(entry);
2082                 dcc->nr_discards -= total_len;
2083         }
2084
2085         wake_up_discard_thread(sbi, false);
2086 }
2087
2088 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
2089 {
2090         dev_t dev = sbi->sb->s_bdev->bd_dev;
2091         struct discard_cmd_control *dcc;
2092         int err = 0, i;
2093
2094         if (SM_I(sbi)->dcc_info) {
2095                 dcc = SM_I(sbi)->dcc_info;
2096                 goto init_thread;
2097         }
2098
2099         dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
2100         if (!dcc)
2101                 return -ENOMEM;
2102
2103         dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
2104         INIT_LIST_HEAD(&dcc->entry_list);
2105         for (i = 0; i < MAX_PLIST_NUM; i++)
2106                 INIT_LIST_HEAD(&dcc->pend_list[i]);
2107         INIT_LIST_HEAD(&dcc->wait_list);
2108         INIT_LIST_HEAD(&dcc->fstrim_list);
2109         mutex_init(&dcc->cmd_lock);
2110         atomic_set(&dcc->issued_discard, 0);
2111         atomic_set(&dcc->queued_discard, 0);
2112         atomic_set(&dcc->discard_cmd_cnt, 0);
2113         dcc->nr_discards = 0;
2114         dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
2115         dcc->undiscard_blks = 0;
2116         dcc->next_pos = 0;
2117         dcc->root = RB_ROOT;
2118         dcc->rbtree_check = false;
2119
2120         init_waitqueue_head(&dcc->discard_wait_queue);
2121         SM_I(sbi)->dcc_info = dcc;
2122 init_thread:
2123         dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
2124                                 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
2125         if (IS_ERR(dcc->f2fs_issue_discard)) {
2126                 err = PTR_ERR(dcc->f2fs_issue_discard);
2127                 kvfree(dcc);
2128                 SM_I(sbi)->dcc_info = NULL;
2129                 return err;
2130         }
2131
2132         return err;
2133 }
2134
2135 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2136 {
2137         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2138
2139         if (!dcc)
2140                 return;
2141
2142         f2fs_stop_discard_thread(sbi);
2143
2144         kvfree(dcc);
2145         SM_I(sbi)->dcc_info = NULL;
2146 }
2147
2148 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
2149 {
2150         struct sit_info *sit_i = SIT_I(sbi);
2151
2152         if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
2153                 sit_i->dirty_sentries++;
2154                 return false;
2155         }
2156
2157         return true;
2158 }
2159
2160 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
2161                                         unsigned int segno, int modified)
2162 {
2163         struct seg_entry *se = get_seg_entry(sbi, segno);
2164         se->type = type;
2165         if (modified)
2166                 __mark_sit_entry_dirty(sbi, segno);
2167 }
2168
2169 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2170 {
2171         struct seg_entry *se;
2172         unsigned int segno, offset;
2173         long int new_vblocks;
2174         bool exist;
2175 #ifdef CONFIG_F2FS_CHECK_FS
2176         bool mir_exist;
2177 #endif
2178
2179         segno = GET_SEGNO(sbi, blkaddr);
2180
2181         se = get_seg_entry(sbi, segno);
2182         new_vblocks = se->valid_blocks + del;
2183         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2184
2185         f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
2186                                 (new_vblocks > sbi->blocks_per_seg)));
2187
2188         se->valid_blocks = new_vblocks;
2189         se->mtime = get_mtime(sbi, false);
2190         if (se->mtime > SIT_I(sbi)->max_mtime)
2191                 SIT_I(sbi)->max_mtime = se->mtime;
2192
2193         /* Update valid block bitmap */
2194         if (del > 0) {
2195                 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2196 #ifdef CONFIG_F2FS_CHECK_FS
2197                 mir_exist = f2fs_test_and_set_bit(offset,
2198                                                 se->cur_valid_map_mir);
2199                 if (unlikely(exist != mir_exist)) {
2200                         f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
2201                                 "when setting bitmap, blk:%u, old bit:%d",
2202                                 blkaddr, exist);
2203                         f2fs_bug_on(sbi, 1);
2204                 }
2205 #endif
2206                 if (unlikely(exist)) {
2207                         f2fs_msg(sbi->sb, KERN_ERR,
2208                                 "Bitmap was wrongly set, blk:%u", blkaddr);
2209                         f2fs_bug_on(sbi, 1);
2210                         se->valid_blocks--;
2211                         del = 0;
2212                 }
2213
2214                 if (!f2fs_test_and_set_bit(offset, se->discard_map))
2215                         sbi->discard_blks--;
2216
2217                 /* don't overwrite by SSR to keep node chain */
2218                 if (IS_NODESEG(se->type) &&
2219                                 !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2220                         if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2221                                 se->ckpt_valid_blocks++;
2222                 }
2223         } else {
2224                 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2225 #ifdef CONFIG_F2FS_CHECK_FS
2226                 mir_exist = f2fs_test_and_clear_bit(offset,
2227                                                 se->cur_valid_map_mir);
2228                 if (unlikely(exist != mir_exist)) {
2229                         f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
2230                                 "when clearing bitmap, blk:%u, old bit:%d",
2231                                 blkaddr, exist);
2232                         f2fs_bug_on(sbi, 1);
2233                 }
2234 #endif
2235                 if (unlikely(!exist)) {
2236                         f2fs_msg(sbi->sb, KERN_ERR,
2237                                 "Bitmap was wrongly cleared, blk:%u", blkaddr);
2238                         f2fs_bug_on(sbi, 1);
2239                         se->valid_blocks++;
2240                         del = 0;
2241                 } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2242                         /*
2243                          * If checkpoints are off, we must not reuse data that
2244                          * was used in the previous checkpoint. If it was used
2245                          * before, we must track that to know how much space we
2246                          * really have.
2247                          */
2248                         if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
2249                                 spin_lock(&sbi->stat_lock);
2250                                 sbi->unusable_block_count++;
2251                                 spin_unlock(&sbi->stat_lock);
2252                         }
2253                 }
2254
2255                 if (f2fs_test_and_clear_bit(offset, se->discard_map))
2256                         sbi->discard_blks++;
2257         }
2258         if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2259                 se->ckpt_valid_blocks += del;
2260
2261         __mark_sit_entry_dirty(sbi, segno);
2262
2263         /* update total number of valid blocks to be written in ckpt area */
2264         SIT_I(sbi)->written_valid_blocks += del;
2265
2266         if (__is_large_section(sbi))
2267                 get_sec_entry(sbi, segno)->valid_blocks += del;
2268 }
2269
2270 void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2271 {
2272         unsigned int segno = GET_SEGNO(sbi, addr);
2273         struct sit_info *sit_i = SIT_I(sbi);
2274
2275         f2fs_bug_on(sbi, addr == NULL_ADDR);
2276         if (addr == NEW_ADDR)
2277                 return;
2278
2279         invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
2280
2281         /* add it into sit main buffer */
2282         down_write(&sit_i->sentry_lock);
2283
2284         update_sit_entry(sbi, addr, -1);
2285
2286         /* add it into dirty seglist */
2287         locate_dirty_segment(sbi, segno);
2288
2289         up_write(&sit_i->sentry_lock);
2290 }
2291
2292 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2293 {
2294         struct sit_info *sit_i = SIT_I(sbi);
2295         unsigned int segno, offset;
2296         struct seg_entry *se;
2297         bool is_cp = false;
2298
2299         if (!__is_valid_data_blkaddr(blkaddr))
2300                 return true;
2301
2302         down_read(&sit_i->sentry_lock);
2303
2304         segno = GET_SEGNO(sbi, blkaddr);
2305         se = get_seg_entry(sbi, segno);
2306         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2307
2308         if (f2fs_test_bit(offset, se->ckpt_valid_map))
2309                 is_cp = true;
2310
2311         up_read(&sit_i->sentry_lock);
2312
2313         return is_cp;
2314 }
2315
2316 /*
2317  * This function should be resided under the curseg_mutex lock
2318  */
2319 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
2320                                         struct f2fs_summary *sum)
2321 {
2322         struct curseg_info *curseg = CURSEG_I(sbi, type);
2323         void *addr = curseg->sum_blk;
2324         addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
2325         memcpy(addr, sum, sizeof(struct f2fs_summary));
2326 }
2327
2328 /*
2329  * Calculate the number of current summary pages for writing
2330  */
2331 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2332 {
2333         int valid_sum_count = 0;
2334         int i, sum_in_page;
2335
2336         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2337                 if (sbi->ckpt->alloc_type[i] == SSR)
2338                         valid_sum_count += sbi->blocks_per_seg;
2339                 else {
2340                         if (for_ra)
2341                                 valid_sum_count += le16_to_cpu(
2342                                         F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2343                         else
2344                                 valid_sum_count += curseg_blkoff(sbi, i);
2345                 }
2346         }
2347
2348         sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2349                         SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2350         if (valid_sum_count <= sum_in_page)
2351                 return 1;
2352         else if ((valid_sum_count - sum_in_page) <=
2353                 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2354                 return 2;
2355         return 3;
2356 }
2357
2358 /*
2359  * Caller should put this summary page
2360  */
2361 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2362 {
2363         return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
2364 }
2365
2366 void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2367                                         void *src, block_t blk_addr)
2368 {
2369         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2370
2371         memcpy(page_address(page), src, PAGE_SIZE);
2372         set_page_dirty(page);
2373         f2fs_put_page(page, 1);
2374 }
2375
2376 static void write_sum_page(struct f2fs_sb_info *sbi,
2377                         struct f2fs_summary_block *sum_blk, block_t blk_addr)
2378 {
2379         f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2380 }
2381
2382 static void write_current_sum_page(struct f2fs_sb_info *sbi,
2383                                                 int type, block_t blk_addr)
2384 {
2385         struct curseg_info *curseg = CURSEG_I(sbi, type);
2386         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2387         struct f2fs_summary_block *src = curseg->sum_blk;
2388         struct f2fs_summary_block *dst;
2389
2390         dst = (struct f2fs_summary_block *)page_address(page);
2391         memset(dst, 0, PAGE_SIZE);
2392
2393         mutex_lock(&curseg->curseg_mutex);
2394
2395         down_read(&curseg->journal_rwsem);
2396         memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2397         up_read(&curseg->journal_rwsem);
2398
2399         memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2400         memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2401
2402         mutex_unlock(&curseg->curseg_mutex);
2403
2404         set_page_dirty(page);
2405         f2fs_put_page(page, 1);
2406 }
2407
2408 static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
2409 {
2410         struct curseg_info *curseg = CURSEG_I(sbi, type);
2411         unsigned int segno = curseg->segno + 1;
2412         struct free_segmap_info *free_i = FREE_I(sbi);
2413
2414         if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
2415                 return !test_bit(segno, free_i->free_segmap);
2416         return 0;
2417 }
2418
2419 /*
2420  * Find a new segment from the free segments bitmap to right order
2421  * This function should be returned with success, otherwise BUG
2422  */
2423 static void get_new_segment(struct f2fs_sb_info *sbi,
2424                         unsigned int *newseg, bool new_sec, int dir)
2425 {
2426         struct free_segmap_info *free_i = FREE_I(sbi);
2427         unsigned int segno, secno, zoneno;
2428         unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2429         unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2430         unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2431         unsigned int left_start = hint;
2432         bool init = true;
2433         int go_left = 0;
2434         int i;
2435
2436         spin_lock(&free_i->segmap_lock);
2437
2438         if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
2439                 segno = find_next_zero_bit(free_i->free_segmap,
2440                         GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2441                 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2442                         goto got_it;
2443         }
2444 find_other_zone:
2445         secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2446         if (secno >= MAIN_SECS(sbi)) {
2447                 if (dir == ALLOC_RIGHT) {
2448                         secno = find_next_zero_bit(free_i->free_secmap,
2449                                                         MAIN_SECS(sbi), 0);
2450                         f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
2451                 } else {
2452                         go_left = 1;
2453                         left_start = hint - 1;
2454                 }
2455         }
2456         if (go_left == 0)
2457                 goto skip_left;
2458
2459         while (test_bit(left_start, free_i->free_secmap)) {
2460                 if (left_start > 0) {
2461                         left_start--;
2462                         continue;
2463                 }
2464                 left_start = find_next_zero_bit(free_i->free_secmap,
2465                                                         MAIN_SECS(sbi), 0);
2466                 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
2467                 break;
2468         }
2469         secno = left_start;
2470 skip_left:
2471         segno = GET_SEG_FROM_SEC(sbi, secno);
2472         zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2473
2474         /* give up on finding another zone */
2475         if (!init)
2476                 goto got_it;
2477         if (sbi->secs_per_zone == 1)
2478                 goto got_it;
2479         if (zoneno == old_zoneno)
2480                 goto got_it;
2481         if (dir == ALLOC_LEFT) {
2482                 if (!go_left && zoneno + 1 >= total_zones)
2483                         goto got_it;
2484                 if (go_left && zoneno == 0)
2485                         goto got_it;
2486         }
2487         for (i = 0; i < NR_CURSEG_TYPE; i++)
2488                 if (CURSEG_I(sbi, i)->zone == zoneno)
2489                         break;
2490
2491         if (i < NR_CURSEG_TYPE) {
2492                 /* zone is in user, try another */
2493                 if (go_left)
2494                         hint = zoneno * sbi->secs_per_zone - 1;
2495                 else if (zoneno + 1 >= total_zones)
2496                         hint = 0;
2497                 else
2498                         hint = (zoneno + 1) * sbi->secs_per_zone;
2499                 init = false;
2500                 goto find_other_zone;
2501         }
2502 got_it:
2503         /* set it as dirty segment in free segmap */
2504         f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2505         __set_inuse(sbi, segno);
2506         *newseg = segno;
2507         spin_unlock(&free_i->segmap_lock);
2508 }
2509
2510 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2511 {
2512         struct curseg_info *curseg = CURSEG_I(sbi, type);
2513         struct summary_footer *sum_footer;
2514
2515         curseg->segno = curseg->next_segno;
2516         curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2517         curseg->next_blkoff = 0;
2518         curseg->next_segno = NULL_SEGNO;
2519
2520         sum_footer = &(curseg->sum_blk->footer);
2521         memset(sum_footer, 0, sizeof(struct summary_footer));
2522         if (IS_DATASEG(type))
2523                 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2524         if (IS_NODESEG(type))
2525                 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2526         __set_sit_entry_type(sbi, type, curseg->segno, modified);
2527 }
2528
2529 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2530 {
2531         /* if segs_per_sec is large than 1, we need to keep original policy. */
2532         if (__is_large_section(sbi))
2533                 return CURSEG_I(sbi, type)->segno;
2534
2535         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2536                 return 0;
2537
2538         if (test_opt(sbi, NOHEAP) &&
2539                 (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
2540                 return 0;
2541
2542         if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2543                 return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2544
2545         /* find segments from 0 to reuse freed segments */
2546         if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2547                 return 0;
2548
2549         return CURSEG_I(sbi, type)->segno;
2550 }
2551
2552 /*
2553  * Allocate a current working segment.
2554  * This function always allocates a free segment in LFS manner.
2555  */
2556 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2557 {
2558         struct curseg_info *curseg = CURSEG_I(sbi, type);
2559         unsigned int segno = curseg->segno;
2560         int dir = ALLOC_LEFT;
2561
2562         write_sum_page(sbi, curseg->sum_blk,
2563                                 GET_SUM_BLOCK(sbi, segno));
2564         if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
2565                 dir = ALLOC_RIGHT;
2566
2567         if (test_opt(sbi, NOHEAP))
2568                 dir = ALLOC_RIGHT;
2569
2570         segno = __get_next_segno(sbi, type);
2571         get_new_segment(sbi, &segno, new_sec, dir);
2572         curseg->next_segno = segno;
2573         reset_curseg(sbi, type, 1);
2574         curseg->alloc_type = LFS;
2575 }
2576
2577 static void __next_free_blkoff(struct f2fs_sb_info *sbi,
2578                         struct curseg_info *seg, block_t start)
2579 {
2580         struct seg_entry *se = get_seg_entry(sbi, seg->segno);
2581         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2582         unsigned long *target_map = SIT_I(sbi)->tmp_map;
2583         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2584         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2585         int i, pos;
2586
2587         for (i = 0; i < entries; i++)
2588                 target_map[i] = ckpt_map[i] | cur_map[i];
2589
2590         pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
2591
2592         seg->next_blkoff = pos;
2593 }
2594
2595 /*
2596  * If a segment is written by LFS manner, next block offset is just obtained
2597  * by increasing the current block offset. However, if a segment is written by
2598  * SSR manner, next block offset obtained by calling __next_free_blkoff
2599  */
2600 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
2601                                 struct curseg_info *seg)
2602 {
2603         if (seg->alloc_type == SSR)
2604                 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
2605         else
2606                 seg->next_blkoff++;
2607 }
2608
2609 /*
2610  * This function always allocates a used segment(from dirty seglist) by SSR
2611  * manner, so it should recover the existing segment information of valid blocks
2612  */
2613 static void change_curseg(struct f2fs_sb_info *sbi, int type)
2614 {
2615         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2616         struct curseg_info *curseg = CURSEG_I(sbi, type);
2617         unsigned int new_segno = curseg->next_segno;
2618         struct f2fs_summary_block *sum_node;
2619         struct page *sum_page;
2620
2621         write_sum_page(sbi, curseg->sum_blk,
2622                                 GET_SUM_BLOCK(sbi, curseg->segno));
2623         __set_test_and_inuse(sbi, new_segno);
2624
2625         mutex_lock(&dirty_i->seglist_lock);
2626         __remove_dirty_segment(sbi, new_segno, PRE);
2627         __remove_dirty_segment(sbi, new_segno, DIRTY);
2628         mutex_unlock(&dirty_i->seglist_lock);
2629
2630         reset_curseg(sbi, type, 1);
2631         curseg->alloc_type = SSR;
2632         __next_free_blkoff(sbi, curseg, 0);
2633
2634         sum_page = f2fs_get_sum_page(sbi, new_segno);
2635         f2fs_bug_on(sbi, IS_ERR(sum_page));
2636         sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2637         memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2638         f2fs_put_page(sum_page, 1);
2639 }
2640
2641 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
2642 {
2643         struct curseg_info *curseg = CURSEG_I(sbi, type);
2644         const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
2645         unsigned segno = NULL_SEGNO;
2646         int i, cnt;
2647         bool reversed = false;
2648
2649         /* f2fs_need_SSR() already forces to do this */
2650         if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
2651                 curseg->next_segno = segno;
2652                 return 1;
2653         }
2654
2655         /* For node segments, let's do SSR more intensively */
2656         if (IS_NODESEG(type)) {
2657                 if (type >= CURSEG_WARM_NODE) {
2658                         reversed = true;
2659                         i = CURSEG_COLD_NODE;
2660                 } else {
2661                         i = CURSEG_HOT_NODE;
2662                 }
2663                 cnt = NR_CURSEG_NODE_TYPE;
2664         } else {
2665                 if (type >= CURSEG_WARM_DATA) {
2666                         reversed = true;
2667                         i = CURSEG_COLD_DATA;
2668                 } else {
2669                         i = CURSEG_HOT_DATA;
2670                 }
2671                 cnt = NR_CURSEG_DATA_TYPE;
2672         }
2673
2674         for (; cnt-- > 0; reversed ? i-- : i++) {
2675                 if (i == type)
2676                         continue;
2677                 if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
2678                         curseg->next_segno = segno;
2679                         return 1;
2680                 }
2681         }
2682
2683         /* find valid_blocks=0 in dirty list */
2684         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2685                 segno = get_free_segment(sbi);
2686                 if (segno != NULL_SEGNO) {
2687                         curseg->next_segno = segno;
2688                         return 1;
2689                 }
2690         }
2691         return 0;
2692 }
2693
2694 /*
2695  * flush out current segment and replace it with new segment
2696  * This function should be returned with success, otherwise BUG
2697  */
2698 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
2699                                                 int type, bool force)
2700 {
2701         struct curseg_info *curseg = CURSEG_I(sbi, type);
2702
2703         if (force)
2704                 new_curseg(sbi, type, true);
2705         else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
2706                                         type == CURSEG_WARM_NODE)
2707                 new_curseg(sbi, type, false);
2708         else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
2709                         likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2710                 new_curseg(sbi, type, false);
2711         else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
2712                 change_curseg(sbi, type);
2713         else
2714                 new_curseg(sbi, type, false);
2715
2716         stat_inc_seg_type(sbi, curseg);
2717 }
2718
2719 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
2720 {
2721         struct curseg_info *curseg;
2722         unsigned int old_segno;
2723         int i;
2724
2725         down_write(&SIT_I(sbi)->sentry_lock);
2726
2727         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2728                 curseg = CURSEG_I(sbi, i);
2729                 old_segno = curseg->segno;
2730                 SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
2731                 locate_dirty_segment(sbi, old_segno);
2732         }
2733
2734         up_write(&SIT_I(sbi)->sentry_lock);
2735 }
2736
2737 static const struct segment_allocation default_salloc_ops = {
2738         .allocate_segment = allocate_segment_by_default,
2739 };
2740
2741 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
2742                                                 struct cp_control *cpc)
2743 {
2744         __u64 trim_start = cpc->trim_start;
2745         bool has_candidate = false;
2746
2747         down_write(&SIT_I(sbi)->sentry_lock);
2748         for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
2749                 if (add_discard_addrs(sbi, cpc, true)) {
2750                         has_candidate = true;
2751                         break;
2752                 }
2753         }
2754         up_write(&SIT_I(sbi)->sentry_lock);
2755
2756         cpc->trim_start = trim_start;
2757         return has_candidate;
2758 }
2759
2760 static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
2761                                         struct discard_policy *dpolicy,
2762                                         unsigned int start, unsigned int end)
2763 {
2764         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2765         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
2766         struct rb_node **insert_p = NULL, *insert_parent = NULL;
2767         struct discard_cmd *dc;
2768         struct blk_plug plug;
2769         int issued;
2770         unsigned int trimmed = 0;
2771
2772 next:
2773         issued = 0;
2774
2775         mutex_lock(&dcc->cmd_lock);
2776         if (unlikely(dcc->rbtree_check))
2777                 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
2778                                                                 &dcc->root));
2779
2780         dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
2781                                         NULL, start,
2782                                         (struct rb_entry **)&prev_dc,
2783                                         (struct rb_entry **)&next_dc,
2784                                         &insert_p, &insert_parent, true);
2785         if (!dc)
2786                 dc = next_dc;
2787
2788         blk_start_plug(&plug);
2789
2790         while (dc && dc->lstart <= end) {
2791                 struct rb_node *node;
2792                 int err = 0;
2793
2794                 if (dc->len < dpolicy->granularity)
2795                         goto skip;
2796
2797                 if (dc->state != D_PREP) {
2798                         list_move_tail(&dc->list, &dcc->fstrim_list);
2799                         goto skip;
2800                 }
2801
2802                 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
2803
2804                 if (issued >= dpolicy->max_requests) {
2805                         start = dc->lstart + dc->len;
2806
2807                         if (err)
2808                                 __remove_discard_cmd(sbi, dc);
2809
2810                         blk_finish_plug(&plug);
2811                         mutex_unlock(&dcc->cmd_lock);
2812                         trimmed += __wait_all_discard_cmd(sbi, NULL);
2813                         congestion_wait(BLK_RW_ASYNC, HZ/50);
2814                         goto next;
2815                 }
2816 skip:
2817                 node = rb_next(&dc->rb_node);
2818                 if (err)
2819                         __remove_discard_cmd(sbi, dc);
2820                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
2821
2822                 if (fatal_signal_pending(current))
2823                         break;
2824         }
2825
2826         blk_finish_plug(&plug);
2827         mutex_unlock(&dcc->cmd_lock);
2828
2829         return trimmed;
2830 }
2831
2832 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
2833 {
2834         __u64 start = F2FS_BYTES_TO_BLK(range->start);
2835         __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
2836         unsigned int start_segno, end_segno;
2837         block_t start_block, end_block;
2838         struct cp_control cpc;
2839         struct discard_policy dpolicy;
2840         unsigned long long trimmed = 0;
2841         int err = 0;
2842         bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
2843
2844         if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
2845                 return -EINVAL;
2846
2847         if (end < MAIN_BLKADDR(sbi))
2848                 goto out;
2849
2850         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
2851                 f2fs_msg(sbi->sb, KERN_WARNING,
2852                         "Found FS corruption, run fsck to fix.");
2853                 return -EIO;
2854         }
2855
2856         /* start/end segment number in main_area */
2857         start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
2858         end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
2859                                                 GET_SEGNO(sbi, end);
2860         if (need_align) {
2861                 start_segno = rounddown(start_segno, sbi->segs_per_sec);
2862                 end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
2863         }
2864
2865         cpc.reason = CP_DISCARD;
2866         cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
2867         cpc.trim_start = start_segno;
2868         cpc.trim_end = end_segno;
2869
2870         if (sbi->discard_blks == 0)
2871                 goto out;
2872
2873         mutex_lock(&sbi->gc_mutex);
2874         err = f2fs_write_checkpoint(sbi, &cpc);
2875         mutex_unlock(&sbi->gc_mutex);
2876         if (err)
2877                 goto out;
2878
2879         /*
2880          * We filed discard candidates, but actually we don't need to wait for
2881          * all of them, since they'll be issued in idle time along with runtime
2882          * discard option. User configuration looks like using runtime discard
2883          * or periodic fstrim instead of it.
2884          */
2885         if (f2fs_realtime_discard_enable(sbi))
2886                 goto out;
2887
2888         start_block = START_BLOCK(sbi, start_segno);
2889         end_block = START_BLOCK(sbi, end_segno + 1);
2890
2891         __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
2892         trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
2893                                         start_block, end_block);
2894
2895         trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
2896                                         start_block, end_block);
2897 out:
2898         if (!err)
2899                 range->len = F2FS_BLK_TO_BYTES(trimmed);
2900         return err;
2901 }
2902
2903 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
2904 {
2905         struct curseg_info *curseg = CURSEG_I(sbi, type);
2906         if (curseg->next_blkoff < sbi->blocks_per_seg)
2907                 return true;
2908         return false;
2909 }
2910
2911 int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
2912 {
2913         switch (hint) {
2914         case WRITE_LIFE_SHORT:
2915                 return CURSEG_HOT_DATA;
2916         case WRITE_LIFE_EXTREME:
2917                 return CURSEG_COLD_DATA;
2918         default:
2919                 return CURSEG_WARM_DATA;
2920         }
2921 }
2922
2923 /* This returns write hints for each segment type. This hints will be
2924  * passed down to block layer. There are mapping tables which depend on
2925  * the mount option 'whint_mode'.
2926  *
2927  * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
2928  *
2929  * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
2930  *
2931  * User                  F2FS                     Block
2932  * ----                  ----                     -----
2933  *                       META                     WRITE_LIFE_NOT_SET
2934  *                       HOT_NODE                 "
2935  *                       WARM_NODE                "
2936  *                       COLD_NODE                "
2937  * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
2938  * extension list        "                        "
2939  *
2940  * -- buffered io
2941  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2942  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2943  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
2944  * WRITE_LIFE_NONE       "                        "
2945  * WRITE_LIFE_MEDIUM     "                        "
2946  * WRITE_LIFE_LONG       "                        "
2947  *
2948  * -- direct io
2949  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2950  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2951  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
2952  * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
2953  * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
2954  * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
2955  *
2956  * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
2957  *
2958  * User                  F2FS                     Block
2959  * ----                  ----                     -----
2960  *                       META                     WRITE_LIFE_MEDIUM;
2961  *                       HOT_NODE                 WRITE_LIFE_NOT_SET
2962  *                       WARM_NODE                "
2963  *                       COLD_NODE                WRITE_LIFE_NONE
2964  * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
2965  * extension list        "                        "
2966  *
2967  * -- buffered io
2968  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2969  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2970  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_LONG
2971  * WRITE_LIFE_NONE       "                        "
2972  * WRITE_LIFE_MEDIUM     "                        "
2973  * WRITE_LIFE_LONG       "                        "
2974  *
2975  * -- direct io
2976  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2977  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2978  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
2979  * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
2980  * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
2981  * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
2982  */
2983
2984 enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
2985                                 enum page_type type, enum temp_type temp)
2986 {
2987         if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
2988                 if (type == DATA) {
2989                         if (temp == WARM)
2990                                 return WRITE_LIFE_NOT_SET;
2991                         else if (temp == HOT)
2992                                 return WRITE_LIFE_SHORT;
2993                         else if (temp == COLD)
2994                                 return WRITE_LIFE_EXTREME;
2995                 } else {
2996                         return WRITE_LIFE_NOT_SET;
2997                 }
2998         } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
2999                 if (type == DATA) {
3000                         if (temp == WARM)
3001                                 return WRITE_LIFE_LONG;
3002                         else if (temp == HOT)
3003                                 return WRITE_LIFE_SHORT;
3004                         else if (temp == COLD)
3005                                 return WRITE_LIFE_EXTREME;
3006                 } else if (type == NODE) {
3007                         if (temp == WARM || temp == HOT)
3008                                 return WRITE_LIFE_NOT_SET;
3009                         else if (temp == COLD)
3010                                 return WRITE_LIFE_NONE;
3011                 } else if (type == META) {
3012                         return WRITE_LIFE_MEDIUM;
3013                 }
3014         }
3015         return WRITE_LIFE_NOT_SET;
3016 }
3017
3018 static int __get_segment_type_2(struct f2fs_io_info *fio)
3019 {
3020         if (fio->type == DATA)
3021                 return CURSEG_HOT_DATA;
3022         else
3023                 return CURSEG_HOT_NODE;
3024 }
3025
3026 static int __get_segment_type_4(struct f2fs_io_info *fio)
3027 {
3028         if (fio->type == DATA) {
3029                 struct inode *inode = fio->page->mapping->host;
3030
3031                 if (S_ISDIR(inode->i_mode))
3032                         return CURSEG_HOT_DATA;
3033                 else
3034                         return CURSEG_COLD_DATA;
3035         } else {
3036                 if (IS_DNODE(fio->page) && is_cold_node(fio->page))
3037                         return CURSEG_WARM_NODE;
3038                 else
3039                         return CURSEG_COLD_NODE;
3040         }
3041 }
3042
3043 static int __get_segment_type_6(struct f2fs_io_info *fio)
3044 {
3045         if (fio->type == DATA) {
3046                 struct inode *inode = fio->page->mapping->host;
3047
3048                 if (is_cold_data(fio->page) || file_is_cold(inode))
3049                         return CURSEG_COLD_DATA;
3050                 if (file_is_hot(inode) ||
3051                                 is_inode_flag_set(inode, FI_HOT_DATA) ||
3052                                 f2fs_is_atomic_file(inode) ||
3053                                 f2fs_is_volatile_file(inode))
3054                         return CURSEG_HOT_DATA;
3055                 /* f2fs_rw_hint_to_seg_type(inode->i_write_hint); */
3056                 return CURSEG_WARM_DATA;
3057         } else {
3058                 if (IS_DNODE(fio->page))
3059                         return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
3060                                                 CURSEG_HOT_NODE;
3061                 return CURSEG_COLD_NODE;
3062         }
3063 }
3064
3065 static int __get_segment_type(struct f2fs_io_info *fio)
3066 {
3067         int type = 0;
3068
3069         switch (F2FS_OPTION(fio->sbi).active_logs) {
3070         case 2:
3071                 type = __get_segment_type_2(fio);
3072                 break;
3073         case 4:
3074                 type = __get_segment_type_4(fio);
3075                 break;
3076         case 6:
3077                 type = __get_segment_type_6(fio);
3078                 break;
3079         default:
3080                 f2fs_bug_on(fio->sbi, true);
3081         }
3082
3083         if (IS_HOT(type))
3084                 fio->temp = HOT;
3085         else if (IS_WARM(type))
3086                 fio->temp = WARM;
3087         else
3088                 fio->temp = COLD;
3089         return type;
3090 }
3091
3092 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
3093                 block_t old_blkaddr, block_t *new_blkaddr,
3094                 struct f2fs_summary *sum, int type,
3095                 struct f2fs_io_info *fio, bool add_list)
3096 {
3097         struct sit_info *sit_i = SIT_I(sbi);
3098         struct curseg_info *curseg = CURSEG_I(sbi, type);
3099
3100         down_read(&SM_I(sbi)->curseg_lock);
3101
3102         mutex_lock(&curseg->curseg_mutex);
3103         down_write(&sit_i->sentry_lock);
3104
3105         *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
3106
3107         f2fs_wait_discard_bio(sbi, *new_blkaddr);
3108
3109         /*
3110          * __add_sum_entry should be resided under the curseg_mutex
3111          * because, this function updates a summary entry in the
3112          * current summary block.
3113          */
3114         __add_sum_entry(sbi, type, sum);
3115
3116         __refresh_next_blkoff(sbi, curseg);
3117
3118         stat_inc_block_count(sbi, curseg);
3119
3120         /*
3121          * SIT information should be updated before segment allocation,
3122          * since SSR needs latest valid block information.
3123          */
3124         update_sit_entry(sbi, *new_blkaddr, 1);
3125         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
3126                 update_sit_entry(sbi, old_blkaddr, -1);
3127
3128         if (!__has_curseg_space(sbi, type))
3129                 sit_i->s_ops->allocate_segment(sbi, type, false);
3130
3131         /*
3132          * segment dirty status should be updated after segment allocation,
3133          * so we just need to update status only one time after previous
3134          * segment being closed.
3135          */
3136         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3137         locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3138
3139         up_write(&sit_i->sentry_lock);
3140
3141         if (page && IS_NODESEG(type)) {
3142                 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
3143
3144                 f2fs_inode_chksum_set(sbi, page);
3145         }
3146
3147         if (add_list) {
3148                 struct f2fs_bio_info *io;
3149
3150                 INIT_LIST_HEAD(&fio->list);
3151                 fio->in_list = true;
3152                 fio->retry = false;
3153                 io = sbi->write_io[fio->type] + fio->temp;
3154                 spin_lock(&io->io_lock);
3155                 list_add_tail(&fio->list, &io->io_list);
3156                 spin_unlock(&io->io_lock);
3157         }
3158
3159         mutex_unlock(&curseg->curseg_mutex);
3160
3161         up_read(&SM_I(sbi)->curseg_lock);
3162 }
3163
3164 static void update_device_state(struct f2fs_io_info *fio)
3165 {
3166         struct f2fs_sb_info *sbi = fio->sbi;
3167         unsigned int devidx;
3168
3169         if (!f2fs_is_multi_device(sbi))
3170                 return;
3171
3172         devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
3173
3174         /* update device state for fsync */
3175         f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
3176
3177         /* update device state for checkpoint */
3178         if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3179                 spin_lock(&sbi->dev_lock);
3180                 f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3181                 spin_unlock(&sbi->dev_lock);
3182         }
3183 }
3184
3185 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3186 {
3187         int type = __get_segment_type(fio);
3188         bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA);
3189
3190         if (keep_order)
3191                 down_read(&fio->sbi->io_order_lock);
3192 reallocate:
3193         f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3194                         &fio->new_blkaddr, sum, type, fio, true);
3195         if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
3196                 invalidate_mapping_pages(META_MAPPING(fio->sbi),
3197                                         fio->old_blkaddr, fio->old_blkaddr);
3198
3199         /* writeout dirty page into bdev */
3200         f2fs_submit_page_write(fio);
3201         if (fio->retry) {
3202                 fio->old_blkaddr = fio->new_blkaddr;
3203                 goto reallocate;
3204         }
3205
3206         update_device_state(fio);
3207
3208         if (keep_order)
3209                 up_read(&fio->sbi->io_order_lock);
3210 }
3211
3212 void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
3213                                         enum iostat_type io_type)
3214 {
3215         struct f2fs_io_info fio = {
3216                 .sbi = sbi,
3217                 .type = META,
3218                 .temp = HOT,
3219                 .op = REQ_OP_WRITE,
3220                 .op_flags = REQ_SYNC | REQ_NOIDLE | REQ_META | REQ_PRIO,
3221                 .old_blkaddr = page->index,
3222                 .new_blkaddr = page->index,
3223                 .page = page,
3224                 .encrypted_page = NULL,
3225                 .in_list = false,
3226         };
3227
3228         if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
3229                 fio.op_flags &= ~REQ_META;
3230
3231         set_page_writeback(page);
3232         ClearPageError(page);
3233         f2fs_submit_page_write(&fio);
3234
3235         stat_inc_meta_count(sbi, page->index);
3236         f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
3237 }
3238
3239 void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3240 {
3241         struct f2fs_summary sum;
3242
3243         set_summary(&sum, nid, 0, 0);
3244         do_write_page(&sum, fio);
3245
3246         f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3247 }
3248
3249 void f2fs_outplace_write_data(struct dnode_of_data *dn,
3250                                         struct f2fs_io_info *fio)
3251 {
3252         struct f2fs_sb_info *sbi = fio->sbi;
3253         struct f2fs_summary sum;
3254
3255         f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3256         set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3257         do_write_page(&sum, fio);
3258         f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3259
3260         f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
3261 }
3262
3263 int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3264 {
3265         int err;
3266         struct f2fs_sb_info *sbi = fio->sbi;
3267         unsigned int segno;
3268
3269         fio->new_blkaddr = fio->old_blkaddr;
3270         /* i/o temperature is needed for passing down write hints */
3271         __get_segment_type(fio);
3272
3273         segno = GET_SEGNO(sbi, fio->new_blkaddr);
3274
3275         if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
3276                 set_sbi_flag(sbi, SBI_NEED_FSCK);
3277                 return -EFAULT;
3278         }
3279
3280         stat_inc_inplace_blocks(fio->sbi);
3281
3282         err = f2fs_submit_page_bio(fio);
3283         if (!err) {
3284                 update_device_state(fio);
3285                 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3286         }
3287
3288         return err;
3289 }
3290
3291 static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3292                                                 unsigned int segno)
3293 {
3294         int i;
3295
3296         for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3297                 if (CURSEG_I(sbi, i)->segno == segno)
3298                         break;
3299         }
3300         return i;
3301 }
3302
3303 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3304                                 block_t old_blkaddr, block_t new_blkaddr,
3305                                 bool recover_curseg, bool recover_newaddr)
3306 {
3307         struct sit_info *sit_i = SIT_I(sbi);
3308         struct curseg_info *curseg;
3309         unsigned int segno, old_cursegno;
3310         struct seg_entry *se;
3311         int type;
3312         unsigned short old_blkoff;
3313
3314         segno = GET_SEGNO(sbi, new_blkaddr);
3315         se = get_seg_entry(sbi, segno);
3316         type = se->type;
3317
3318         down_write(&SM_I(sbi)->curseg_lock);
3319
3320         if (!recover_curseg) {
3321                 /* for recovery flow */
3322                 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
3323                         if (old_blkaddr == NULL_ADDR)
3324                                 type = CURSEG_COLD_DATA;
3325                         else
3326                                 type = CURSEG_WARM_DATA;
3327                 }
3328         } else {
3329                 if (IS_CURSEG(sbi, segno)) {
3330                         /* se->type is volatile as SSR allocation */
3331                         type = __f2fs_get_curseg(sbi, segno);
3332                         f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
3333                 } else {
3334                         type = CURSEG_WARM_DATA;
3335                 }
3336         }
3337
3338         f2fs_bug_on(sbi, !IS_DATASEG(type));
3339         curseg = CURSEG_I(sbi, type);
3340
3341         mutex_lock(&curseg->curseg_mutex);
3342         down_write(&sit_i->sentry_lock);
3343
3344         old_cursegno = curseg->segno;
3345         old_blkoff = curseg->next_blkoff;
3346
3347         /* change the current segment */
3348         if (segno != curseg->segno) {
3349                 curseg->next_segno = segno;
3350                 change_curseg(sbi, type);
3351         }
3352
3353         curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3354         __add_sum_entry(sbi, type, sum);
3355
3356         if (!recover_curseg || recover_newaddr)
3357                 update_sit_entry(sbi, new_blkaddr, 1);
3358         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
3359                 invalidate_mapping_pages(META_MAPPING(sbi),
3360                                         old_blkaddr, old_blkaddr);
3361                 update_sit_entry(sbi, old_blkaddr, -1);
3362         }
3363
3364         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3365         locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
3366
3367         locate_dirty_segment(sbi, old_cursegno);
3368
3369         if (recover_curseg) {
3370                 if (old_cursegno != curseg->segno) {
3371                         curseg->next_segno = old_cursegno;
3372                         change_curseg(sbi, type);
3373                 }
3374                 curseg->next_blkoff = old_blkoff;
3375         }
3376
3377         up_write(&sit_i->sentry_lock);
3378         mutex_unlock(&curseg->curseg_mutex);
3379         up_write(&SM_I(sbi)->curseg_lock);
3380 }
3381
3382 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
3383                                 block_t old_addr, block_t new_addr,
3384                                 unsigned char version, bool recover_curseg,
3385                                 bool recover_newaddr)
3386 {
3387         struct f2fs_summary sum;
3388
3389         set_summary(&sum, dn->nid, dn->ofs_in_node, version);
3390
3391         f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3392                                         recover_curseg, recover_newaddr);
3393
3394         f2fs_update_data_blkaddr(dn, new_addr);
3395 }
3396
3397 void f2fs_wait_on_page_writeback(struct page *page,
3398                                 enum page_type type, bool ordered, bool locked)
3399 {
3400         if (PageWriteback(page)) {
3401                 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
3402
3403                 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
3404                 if (ordered) {
3405                         wait_on_page_writeback(page);
3406                         f2fs_bug_on(sbi, locked && PageWriteback(page));
3407                 } else {
3408                         wait_for_stable_page(page);
3409                 }
3410         }
3411 }
3412
3413 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3414 {
3415         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3416         struct page *cpage;
3417
3418         if (!f2fs_post_read_required(inode))
3419                 return;
3420
3421         if (!__is_valid_data_blkaddr(blkaddr))
3422                 return;
3423
3424         cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
3425         if (cpage) {
3426                 f2fs_wait_on_page_writeback(cpage, DATA, true, true);
3427                 f2fs_put_page(cpage, 1);
3428         }
3429 }
3430
3431 void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
3432                                                                 block_t len)
3433 {
3434         block_t i;
3435
3436         for (i = 0; i < len; i++)
3437                 f2fs_wait_on_block_writeback(inode, blkaddr + i);
3438 }
3439
3440 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
3441 {
3442         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3443         struct curseg_info *seg_i;
3444         unsigned char *kaddr;
3445         struct page *page;
3446         block_t start;
3447         int i, j, offset;
3448
3449         start = start_sum_block(sbi);
3450
3451         page = f2fs_get_meta_page(sbi, start++);
3452         if (IS_ERR(page))
3453                 return PTR_ERR(page);
3454         kaddr = (unsigned char *)page_address(page);
3455
3456         /* Step 1: restore nat cache */
3457         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3458         memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
3459
3460         /* Step 2: restore sit cache */
3461         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3462         memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
3463         offset = 2 * SUM_JOURNAL_SIZE;
3464
3465         /* Step 3: restore summary entries */
3466         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3467                 unsigned short blk_off;
3468                 unsigned int segno;
3469
3470                 seg_i = CURSEG_I(sbi, i);
3471                 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
3472                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
3473                 seg_i->next_segno = segno;
3474                 reset_curseg(sbi, i, 0);
3475                 seg_i->alloc_type = ckpt->alloc_type[i];
3476                 seg_i->next_blkoff = blk_off;
3477
3478                 if (seg_i->alloc_type == SSR)
3479                         blk_off = sbi->blocks_per_seg;
3480
3481                 for (j = 0; j < blk_off; j++) {
3482                         struct f2fs_summary *s;
3483                         s = (struct f2fs_summary *)(kaddr + offset);
3484                         seg_i->sum_blk->entries[j] = *s;
3485                         offset += SUMMARY_SIZE;
3486                         if (offset + SUMMARY_SIZE <= PAGE_SIZE -
3487                                                 SUM_FOOTER_SIZE)
3488                                 continue;
3489
3490                         f2fs_put_page(page, 1);
3491                         page = NULL;
3492
3493                         page = f2fs_get_meta_page(sbi, start++);
3494                         if (IS_ERR(page))
3495                                 return PTR_ERR(page);
3496                         kaddr = (unsigned char *)page_address(page);
3497                         offset = 0;
3498                 }
3499         }
3500         f2fs_put_page(page, 1);
3501         return 0;
3502 }
3503
3504 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
3505 {
3506         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3507         struct f2fs_summary_block *sum;
3508         struct curseg_info *curseg;
3509         struct page *new;
3510         unsigned short blk_off;
3511         unsigned int segno = 0;
3512         block_t blk_addr = 0;
3513         int err = 0;
3514
3515         /* get segment number and block addr */
3516         if (IS_DATASEG(type)) {
3517                 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
3518                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
3519                                                         CURSEG_HOT_DATA]);
3520                 if (__exist_node_summaries(sbi))
3521                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
3522                 else
3523                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
3524         } else {
3525                 segno = le32_to_cpu(ckpt->cur_node_segno[type -
3526                                                         CURSEG_HOT_NODE]);
3527                 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
3528                                                         CURSEG_HOT_NODE]);
3529                 if (__exist_node_summaries(sbi))
3530                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
3531                                                         type - CURSEG_HOT_NODE);
3532                 else
3533                         blk_addr = GET_SUM_BLOCK(sbi, segno);
3534         }
3535
3536         new = f2fs_get_meta_page(sbi, blk_addr);
3537         if (IS_ERR(new))
3538                 return PTR_ERR(new);
3539         sum = (struct f2fs_summary_block *)page_address(new);
3540
3541         if (IS_NODESEG(type)) {
3542                 if (__exist_node_summaries(sbi)) {
3543                         struct f2fs_summary *ns = &sum->entries[0];
3544                         int i;
3545                         for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
3546                                 ns->version = 0;
3547                                 ns->ofs_in_node = 0;
3548                         }
3549                 } else {
3550                         err = f2fs_restore_node_summary(sbi, segno, sum);
3551                         if (err)
3552                                 goto out;
3553                 }
3554         }
3555
3556         /* set uncompleted segment to curseg */
3557         curseg = CURSEG_I(sbi, type);
3558         mutex_lock(&curseg->curseg_mutex);
3559
3560         /* update journal info */
3561         down_write(&curseg->journal_rwsem);
3562         memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
3563         up_write(&curseg->journal_rwsem);
3564
3565         memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
3566         memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
3567         curseg->next_segno = segno;
3568         reset_curseg(sbi, type, 0);
3569         curseg->alloc_type = ckpt->alloc_type[type];
3570         curseg->next_blkoff = blk_off;
3571         mutex_unlock(&curseg->curseg_mutex);
3572 out:
3573         f2fs_put_page(new, 1);
3574         return err;
3575 }
3576
3577 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
3578 {
3579         struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
3580         struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
3581         int type = CURSEG_HOT_DATA;
3582         int err;
3583
3584         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
3585                 int npages = f2fs_npages_for_summary_flush(sbi, true);
3586
3587                 if (npages >= 2)
3588                         f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
3589                                                         META_CP, true);
3590
3591                 /* restore for compacted data summary */
3592                 err = read_compacted_summaries(sbi);
3593                 if (err)
3594                         return err;
3595                 type = CURSEG_HOT_NODE;
3596         }
3597
3598         if (__exist_node_summaries(sbi))
3599                 f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
3600                                         NR_CURSEG_TYPE - type, META_CP, true);
3601
3602         for (; type <= CURSEG_COLD_NODE; type++) {
3603                 err = read_normal_summaries(sbi, type);
3604                 if (err)
3605                         return err;
3606         }
3607
3608         /* sanity check for summary blocks */
3609         if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
3610                         sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
3611                 return -EINVAL;
3612
3613         return 0;
3614 }
3615
3616 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
3617 {
3618         struct page *page;
3619         unsigned char *kaddr;
3620         struct f2fs_summary *summary;
3621         struct curseg_info *seg_i;
3622         int written_size = 0;
3623         int i, j;
3624
3625         page = f2fs_grab_meta_page(sbi, blkaddr++);
3626         kaddr = (unsigned char *)page_address(page);
3627         memset(kaddr, 0, PAGE_SIZE);
3628
3629         /* Step 1: write nat cache */
3630         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3631         memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
3632         written_size += SUM_JOURNAL_SIZE;
3633
3634         /* Step 2: write sit cache */
3635         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3636         memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
3637         written_size += SUM_JOURNAL_SIZE;
3638
3639         /* Step 3: write summary entries */
3640         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3641                 unsigned short blkoff;
3642                 seg_i = CURSEG_I(sbi, i);
3643                 if (sbi->ckpt->alloc_type[i] == SSR)
3644                         blkoff = sbi->blocks_per_seg;
3645                 else
3646                         blkoff = curseg_blkoff(sbi, i);
3647
3648                 for (j = 0; j < blkoff; j++) {
3649                         if (!page) {
3650                                 page = f2fs_grab_meta_page(sbi, blkaddr++);
3651                                 kaddr = (unsigned char *)page_address(page);
3652                                 memset(kaddr, 0, PAGE_SIZE);
3653                                 written_size = 0;
3654                         }
3655                         summary = (struct f2fs_summary *)(kaddr + written_size);
3656                         *summary = seg_i->sum_blk->entries[j];
3657                         written_size += SUMMARY_SIZE;
3658
3659                         if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
3660                                                         SUM_FOOTER_SIZE)
3661                                 continue;
3662
3663                         set_page_dirty(page);
3664                         f2fs_put_page(page, 1);
3665                         page = NULL;
3666                 }
3667         }
3668         if (page) {
3669                 set_page_dirty(page);
3670                 f2fs_put_page(page, 1);
3671         }
3672 }
3673
3674 static void write_normal_summaries(struct f2fs_sb_info *sbi,
3675                                         block_t blkaddr, int type)
3676 {
3677         int i, end;
3678         if (IS_DATASEG(type))
3679                 end = type + NR_CURSEG_DATA_TYPE;
3680         else
3681                 end = type + NR_CURSEG_NODE_TYPE;
3682
3683         for (i = type; i < end; i++)
3684                 write_current_sum_page(sbi, i, blkaddr + (i - type));
3685 }
3686
3687 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3688 {
3689         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
3690                 write_compacted_summaries(sbi, start_blk);
3691         else
3692                 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
3693 }
3694
3695 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3696 {
3697         write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
3698 }
3699
3700 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
3701                                         unsigned int val, int alloc)
3702 {
3703         int i;
3704
3705         if (type == NAT_JOURNAL) {
3706                 for (i = 0; i < nats_in_cursum(journal); i++) {
3707                         if (le32_to_cpu(nid_in_journal(journal, i)) == val)
3708                                 return i;
3709                 }
3710                 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
3711                         return update_nats_in_cursum(journal, 1);
3712         } else if (type == SIT_JOURNAL) {
3713                 for (i = 0; i < sits_in_cursum(journal); i++)
3714                         if (le32_to_cpu(segno_in_journal(journal, i)) == val)
3715                                 return i;
3716                 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
3717                         return update_sits_in_cursum(journal, 1);
3718         }
3719         return -1;
3720 }
3721
3722 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
3723                                         unsigned int segno)
3724 {
3725         return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
3726 }
3727
3728 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
3729                                         unsigned int start)
3730 {
3731         struct sit_info *sit_i = SIT_I(sbi);
3732         struct page *page;
3733         pgoff_t src_off, dst_off;
3734
3735         src_off = current_sit_addr(sbi, start);
3736         dst_off = next_sit_addr(sbi, src_off);
3737
3738         page = f2fs_grab_meta_page(sbi, dst_off);
3739         seg_info_to_sit_page(sbi, page, start);
3740
3741         set_page_dirty(page);
3742         set_to_next_sit(sit_i, start);
3743
3744         return page;
3745 }
3746
3747 static struct sit_entry_set *grab_sit_entry_set(void)
3748 {
3749         struct sit_entry_set *ses =
3750                         f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
3751
3752         ses->entry_cnt = 0;
3753         INIT_LIST_HEAD(&ses->set_list);
3754         return ses;
3755 }
3756
3757 static void release_sit_entry_set(struct sit_entry_set *ses)
3758 {
3759         list_del(&ses->set_list);
3760         kmem_cache_free(sit_entry_set_slab, ses);
3761 }
3762
3763 static void adjust_sit_entry_set(struct sit_entry_set *ses,
3764                                                 struct list_head *head)
3765 {
3766         struct sit_entry_set *next = ses;
3767
3768         if (list_is_last(&ses->set_list, head))
3769                 return;
3770
3771         list_for_each_entry_continue(next, head, set_list)
3772                 if (ses->entry_cnt <= next->entry_cnt)
3773                         break;
3774
3775         list_move_tail(&ses->set_list, &next->set_list);
3776 }
3777
3778 static void add_sit_entry(unsigned int segno, struct list_head *head)
3779 {
3780         struct sit_entry_set *ses;
3781         unsigned int start_segno = START_SEGNO(segno);
3782
3783         list_for_each_entry(ses, head, set_list) {
3784                 if (ses->start_segno == start_segno) {
3785                         ses->entry_cnt++;
3786                         adjust_sit_entry_set(ses, head);
3787                         return;
3788                 }
3789         }
3790
3791         ses = grab_sit_entry_set();
3792
3793         ses->start_segno = start_segno;
3794         ses->entry_cnt++;
3795         list_add(&ses->set_list, head);
3796 }
3797
3798 static void add_sits_in_set(struct f2fs_sb_info *sbi)
3799 {
3800         struct f2fs_sm_info *sm_info = SM_I(sbi);
3801         struct list_head *set_list = &sm_info->sit_entry_set;
3802         unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
3803         unsigned int segno;
3804
3805         for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
3806                 add_sit_entry(segno, set_list);
3807 }
3808
3809 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
3810 {
3811         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3812         struct f2fs_journal *journal = curseg->journal;
3813         int i;
3814
3815         down_write(&curseg->journal_rwsem);
3816         for (i = 0; i < sits_in_cursum(journal); i++) {
3817                 unsigned int segno;
3818                 bool dirtied;
3819
3820                 segno = le32_to_cpu(segno_in_journal(journal, i));
3821                 dirtied = __mark_sit_entry_dirty(sbi, segno);
3822
3823                 if (!dirtied)
3824                         add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
3825         }
3826         update_sits_in_cursum(journal, -i);
3827         up_write(&curseg->journal_rwsem);
3828 }
3829
3830 /*
3831  * CP calls this function, which flushes SIT entries including sit_journal,
3832  * and moves prefree segs to free segs.
3833  */
3834 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
3835 {
3836         struct sit_info *sit_i = SIT_I(sbi);
3837         unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
3838         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3839         struct f2fs_journal *journal = curseg->journal;
3840         struct sit_entry_set *ses, *tmp;
3841         struct list_head *head = &SM_I(sbi)->sit_entry_set;
3842         bool to_journal = true;
3843         struct seg_entry *se;
3844
3845         down_write(&sit_i->sentry_lock);
3846
3847         if (!sit_i->dirty_sentries)
3848                 goto out;
3849
3850         /*
3851          * add and account sit entries of dirty bitmap in sit entry
3852          * set temporarily
3853          */
3854         add_sits_in_set(sbi);
3855
3856         /*
3857          * if there are no enough space in journal to store dirty sit
3858          * entries, remove all entries from journal and add and account
3859          * them in sit entry set.
3860          */
3861         if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
3862                 remove_sits_in_journal(sbi);
3863
3864         /*
3865          * there are two steps to flush sit entries:
3866          * #1, flush sit entries to journal in current cold data summary block.
3867          * #2, flush sit entries to sit page.
3868          */
3869         list_for_each_entry_safe(ses, tmp, head, set_list) {
3870                 struct page *page = NULL;
3871                 struct f2fs_sit_block *raw_sit = NULL;
3872                 unsigned int start_segno = ses->start_segno;
3873                 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
3874                                                 (unsigned long)MAIN_SEGS(sbi));
3875                 unsigned int segno = start_segno;
3876
3877                 if (to_journal &&
3878                         !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
3879                         to_journal = false;
3880
3881                 if (to_journal) {
3882                         down_write(&curseg->journal_rwsem);
3883                 } else {
3884                         page = get_next_sit_page(sbi, start_segno);
3885                         raw_sit = page_address(page);
3886                 }
3887
3888                 /* flush dirty sit entries in region of current sit set */
3889                 for_each_set_bit_from(segno, bitmap, end) {
3890                         int offset, sit_offset;
3891
3892                         se = get_seg_entry(sbi, segno);
3893 #ifdef CONFIG_F2FS_CHECK_FS
3894                         if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
3895                                                 SIT_VBLOCK_MAP_SIZE))
3896                                 f2fs_bug_on(sbi, 1);
3897 #endif
3898
3899                         /* add discard candidates */
3900                         if (!(cpc->reason & CP_DISCARD)) {
3901                                 cpc->trim_start = segno;
3902                                 add_discard_addrs(sbi, cpc, false);
3903                         }
3904
3905                         if (to_journal) {
3906                                 offset = f2fs_lookup_journal_in_cursum(journal,
3907                                                         SIT_JOURNAL, segno, 1);
3908                                 f2fs_bug_on(sbi, offset < 0);
3909                                 segno_in_journal(journal, offset) =
3910                                                         cpu_to_le32(segno);
3911                                 seg_info_to_raw_sit(se,
3912                                         &sit_in_journal(journal, offset));
3913                                 check_block_count(sbi, segno,
3914                                         &sit_in_journal(journal, offset));
3915                         } else {
3916                                 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
3917                                 seg_info_to_raw_sit(se,
3918                                                 &raw_sit->entries[sit_offset]);
3919                                 check_block_count(sbi, segno,
3920                                                 &raw_sit->entries[sit_offset]);
3921                         }
3922
3923                         __clear_bit(segno, bitmap);
3924                         sit_i->dirty_sentries--;
3925                         ses->entry_cnt--;
3926                 }
3927
3928                 if (to_journal)
3929                         up_write(&curseg->journal_rwsem);
3930                 else
3931                         f2fs_put_page(page, 1);
3932
3933                 f2fs_bug_on(sbi, ses->entry_cnt);
3934                 release_sit_entry_set(ses);
3935         }
3936
3937         f2fs_bug_on(sbi, !list_empty(head));
3938         f2fs_bug_on(sbi, sit_i->dirty_sentries);
3939 out:
3940         if (cpc->reason & CP_DISCARD) {
3941                 __u64 trim_start = cpc->trim_start;
3942
3943                 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
3944                         add_discard_addrs(sbi, cpc, false);
3945
3946                 cpc->trim_start = trim_start;
3947         }
3948         up_write(&sit_i->sentry_lock);
3949
3950         set_prefree_as_free_segments(sbi);
3951 }
3952
3953 static int build_sit_info(struct f2fs_sb_info *sbi)
3954 {
3955         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
3956         struct sit_info *sit_i;
3957         unsigned int sit_segs, start;
3958         char *src_bitmap;
3959         unsigned int bitmap_size;
3960
3961         /* allocate memory for SIT information */
3962         sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
3963         if (!sit_i)
3964                 return -ENOMEM;
3965
3966         SM_I(sbi)->sit_info = sit_i;
3967
3968         sit_i->sentries =
3969                 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
3970                                               MAIN_SEGS(sbi)),
3971                               GFP_KERNEL);
3972         if (!sit_i->sentries)
3973                 return -ENOMEM;
3974
3975         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
3976         sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size,
3977                                                                 GFP_KERNEL);
3978         if (!sit_i->dirty_sentries_bitmap)
3979                 return -ENOMEM;
3980
3981         for (start = 0; start < MAIN_SEGS(sbi); start++) {
3982                 sit_i->sentries[start].cur_valid_map
3983                         = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3984                 sit_i->sentries[start].ckpt_valid_map
3985                         = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3986                 if (!sit_i->sentries[start].cur_valid_map ||
3987                                 !sit_i->sentries[start].ckpt_valid_map)
3988                         return -ENOMEM;
3989
3990 #ifdef CONFIG_F2FS_CHECK_FS
3991                 sit_i->sentries[start].cur_valid_map_mir
3992                         = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3993                 if (!sit_i->sentries[start].cur_valid_map_mir)
3994                         return -ENOMEM;
3995 #endif
3996
3997                 sit_i->sentries[start].discard_map
3998                         = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
3999                                                         GFP_KERNEL);
4000                 if (!sit_i->sentries[start].discard_map)
4001                         return -ENOMEM;
4002         }
4003
4004         sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
4005         if (!sit_i->tmp_map)
4006                 return -ENOMEM;
4007
4008         if (__is_large_section(sbi)) {
4009                 sit_i->sec_entries =
4010                         f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
4011                                                       MAIN_SECS(sbi)),
4012                                       GFP_KERNEL);
4013                 if (!sit_i->sec_entries)
4014                         return -ENOMEM;
4015         }
4016
4017         /* get information related with SIT */
4018         sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
4019
4020         /* setup SIT bitmap from ckeckpoint pack */
4021         bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
4022         src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
4023
4024         sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
4025         if (!sit_i->sit_bitmap)
4026                 return -ENOMEM;
4027
4028 #ifdef CONFIG_F2FS_CHECK_FS
4029         sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
4030         if (!sit_i->sit_bitmap_mir)
4031                 return -ENOMEM;
4032 #endif
4033
4034         /* init SIT information */
4035         sit_i->s_ops = &default_salloc_ops;
4036
4037         sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
4038         sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
4039         sit_i->written_valid_blocks = 0;
4040         sit_i->bitmap_size = bitmap_size;
4041         sit_i->dirty_sentries = 0;
4042         sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
4043         sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
4044         sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
4045         init_rwsem(&sit_i->sentry_lock);
4046         return 0;
4047 }
4048
4049 static int build_free_segmap(struct f2fs_sb_info *sbi)
4050 {
4051         struct free_segmap_info *free_i;
4052         unsigned int bitmap_size, sec_bitmap_size;
4053
4054         /* allocate memory for free segmap information */
4055         free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
4056         if (!free_i)
4057                 return -ENOMEM;
4058
4059         SM_I(sbi)->free_info = free_i;
4060
4061         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4062         free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
4063         if (!free_i->free_segmap)
4064                 return -ENOMEM;
4065
4066         sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4067         free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
4068         if (!free_i->free_secmap)
4069                 return -ENOMEM;
4070
4071         /* set all segments as dirty temporarily */
4072         memset(free_i->free_segmap, 0xff, bitmap_size);
4073         memset(free_i->free_secmap, 0xff, sec_bitmap_size);
4074
4075         /* init free segmap information */
4076         free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
4077         free_i->free_segments = 0;
4078         free_i->free_sections = 0;
4079         spin_lock_init(&free_i->segmap_lock);
4080         return 0;
4081 }
4082
4083 static int build_curseg(struct f2fs_sb_info *sbi)
4084 {
4085         struct curseg_info *array;
4086         int i;
4087
4088         array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
4089                              GFP_KERNEL);
4090         if (!array)
4091                 return -ENOMEM;
4092
4093         SM_I(sbi)->curseg_array = array;
4094
4095         for (i = 0; i < NR_CURSEG_TYPE; i++) {
4096                 mutex_init(&array[i].curseg_mutex);
4097                 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
4098                 if (!array[i].sum_blk)
4099                         return -ENOMEM;
4100                 init_rwsem(&array[i].journal_rwsem);
4101                 array[i].journal = f2fs_kzalloc(sbi,
4102                                 sizeof(struct f2fs_journal), GFP_KERNEL);
4103                 if (!array[i].journal)
4104                         return -ENOMEM;
4105                 array[i].segno = NULL_SEGNO;
4106                 array[i].next_blkoff = 0;
4107         }
4108         return restore_curseg_summaries(sbi);
4109 }
4110
4111 static int build_sit_entries(struct f2fs_sb_info *sbi)
4112 {
4113         struct sit_info *sit_i = SIT_I(sbi);
4114         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4115         struct f2fs_journal *journal = curseg->journal;
4116         struct seg_entry *se;
4117         struct f2fs_sit_entry sit;
4118         int sit_blk_cnt = SIT_BLK_CNT(sbi);
4119         unsigned int i, start, end;
4120         unsigned int readed, start_blk = 0;
4121         int err = 0;
4122         block_t total_node_blocks = 0;
4123
4124         do {
4125                 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
4126                                                         META_SIT, true);
4127
4128                 start = start_blk * sit_i->sents_per_block;
4129                 end = (start_blk + readed) * sit_i->sents_per_block;
4130
4131                 for (; start < end && start < MAIN_SEGS(sbi); start++) {
4132                         struct f2fs_sit_block *sit_blk;
4133                         struct page *page;
4134
4135                         se = &sit_i->sentries[start];
4136                         page = get_current_sit_page(sbi, start);
4137                         if (IS_ERR(page))
4138                                 return PTR_ERR(page);
4139                         sit_blk = (struct f2fs_sit_block *)page_address(page);
4140                         sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
4141                         f2fs_put_page(page, 1);
4142
4143                         err = check_block_count(sbi, start, &sit);
4144                         if (err)
4145                                 return err;
4146                         seg_info_from_raw_sit(se, &sit);
4147                         if (IS_NODESEG(se->type))
4148                                 total_node_blocks += se->valid_blocks;
4149
4150                         /* build discard map only one time */
4151                         if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4152                                 memset(se->discard_map, 0xff,
4153                                         SIT_VBLOCK_MAP_SIZE);
4154                         } else {
4155                                 memcpy(se->discard_map,
4156                                         se->cur_valid_map,
4157                                         SIT_VBLOCK_MAP_SIZE);
4158                                 sbi->discard_blks +=
4159                                         sbi->blocks_per_seg -
4160                                         se->valid_blocks;
4161                         }
4162
4163                         if (__is_large_section(sbi))
4164                                 get_sec_entry(sbi, start)->valid_blocks +=
4165                                                         se->valid_blocks;
4166                 }
4167                 start_blk += readed;
4168         } while (start_blk < sit_blk_cnt);
4169
4170         down_read(&curseg->journal_rwsem);
4171         for (i = 0; i < sits_in_cursum(journal); i++) {
4172                 unsigned int old_valid_blocks;
4173
4174                 start = le32_to_cpu(segno_in_journal(journal, i));
4175                 if (start >= MAIN_SEGS(sbi)) {
4176                         f2fs_msg(sbi->sb, KERN_ERR,
4177                                         "Wrong journal entry on segno %u",
4178                                         start);
4179                         set_sbi_flag(sbi, SBI_NEED_FSCK);
4180                         err = -EINVAL;
4181                         break;
4182                 }
4183
4184                 se = &sit_i->sentries[start];
4185                 sit = sit_in_journal(journal, i);
4186
4187                 old_valid_blocks = se->valid_blocks;
4188                 if (IS_NODESEG(se->type))
4189                         total_node_blocks -= old_valid_blocks;
4190
4191                 err = check_block_count(sbi, start, &sit);
4192                 if (err)
4193                         break;
4194                 seg_info_from_raw_sit(se, &sit);
4195                 if (IS_NODESEG(se->type))
4196                         total_node_blocks += se->valid_blocks;
4197
4198                 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4199                         memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
4200                 } else {
4201                         memcpy(se->discard_map, se->cur_valid_map,
4202                                                 SIT_VBLOCK_MAP_SIZE);
4203                         sbi->discard_blks += old_valid_blocks;
4204                         sbi->discard_blks -= se->valid_blocks;
4205                 }
4206
4207                 if (__is_large_section(sbi)) {
4208                         get_sec_entry(sbi, start)->valid_blocks +=
4209                                                         se->valid_blocks;
4210                         get_sec_entry(sbi, start)->valid_blocks -=
4211                                                         old_valid_blocks;
4212                 }
4213         }
4214         up_read(&curseg->journal_rwsem);
4215
4216         if (!err && total_node_blocks != valid_node_count(sbi)) {
4217                 f2fs_msg(sbi->sb, KERN_ERR,
4218                         "SIT is corrupted node# %u vs %u",
4219                         total_node_blocks, valid_node_count(sbi));
4220                 set_sbi_flag(sbi, SBI_NEED_FSCK);
4221                 err = -EINVAL;
4222         }
4223
4224         return err;
4225 }
4226
4227 static void init_free_segmap(struct f2fs_sb_info *sbi)
4228 {
4229         unsigned int start;
4230         int type;
4231
4232         for (start = 0; start < MAIN_SEGS(sbi); start++) {
4233                 struct seg_entry *sentry = get_seg_entry(sbi, start);
4234                 if (!sentry->valid_blocks)
4235                         __set_free(sbi, start);
4236                 else
4237                         SIT_I(sbi)->written_valid_blocks +=
4238                                                 sentry->valid_blocks;
4239         }
4240
4241         /* set use the current segments */
4242         for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4243                 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4244                 __set_test_and_inuse(sbi, curseg_t->segno);
4245         }
4246 }
4247
4248 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
4249 {
4250         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4251         struct free_segmap_info *free_i = FREE_I(sbi);
4252         unsigned int segno = 0, offset = 0;
4253         unsigned short valid_blocks;
4254
4255         while (1) {
4256                 /* find dirty segment based on free segmap */
4257                 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
4258                 if (segno >= MAIN_SEGS(sbi))
4259                         break;
4260                 offset = segno + 1;
4261                 valid_blocks = get_valid_blocks(sbi, segno, false);
4262                 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
4263                         continue;
4264                 if (valid_blocks > sbi->blocks_per_seg) {
4265                         f2fs_bug_on(sbi, 1);
4266                         continue;
4267                 }
4268                 mutex_lock(&dirty_i->seglist_lock);
4269                 __locate_dirty_segment(sbi, segno, DIRTY);
4270                 mutex_unlock(&dirty_i->seglist_lock);
4271         }
4272 }
4273
4274 static int init_victim_secmap(struct f2fs_sb_info *sbi)
4275 {
4276         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4277         unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4278
4279         dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4280         if (!dirty_i->victim_secmap)
4281                 return -ENOMEM;
4282         return 0;
4283 }
4284
4285 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
4286 {
4287         struct dirty_seglist_info *dirty_i;
4288         unsigned int bitmap_size, i;
4289
4290         /* allocate memory for dirty segments list information */
4291         dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
4292                                                                 GFP_KERNEL);
4293         if (!dirty_i)
4294                 return -ENOMEM;
4295
4296         SM_I(sbi)->dirty_info = dirty_i;
4297         mutex_init(&dirty_i->seglist_lock);
4298
4299         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4300
4301         for (i = 0; i < NR_DIRTY_TYPE; i++) {
4302                 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
4303                                                                 GFP_KERNEL);
4304                 if (!dirty_i->dirty_segmap[i])
4305                         return -ENOMEM;
4306         }
4307
4308         init_dirty_segmap(sbi);
4309         return init_victim_secmap(sbi);
4310 }
4311
4312 /*
4313  * Update min, max modified time for cost-benefit GC algorithm
4314  */
4315 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
4316 {
4317         struct sit_info *sit_i = SIT_I(sbi);
4318         unsigned int segno;
4319
4320         down_write(&sit_i->sentry_lock);
4321
4322         sit_i->min_mtime = ULLONG_MAX;
4323
4324         for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
4325                 unsigned int i;
4326                 unsigned long long mtime = 0;
4327
4328                 for (i = 0; i < sbi->segs_per_sec; i++)
4329                         mtime += get_seg_entry(sbi, segno + i)->mtime;
4330
4331                 mtime = div_u64(mtime, sbi->segs_per_sec);
4332
4333                 if (sit_i->min_mtime > mtime)
4334                         sit_i->min_mtime = mtime;
4335         }
4336         sit_i->max_mtime = get_mtime(sbi, false);
4337         up_write(&sit_i->sentry_lock);
4338 }
4339
4340 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
4341 {
4342         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4343         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
4344         struct f2fs_sm_info *sm_info;
4345         int err;
4346
4347         sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
4348         if (!sm_info)
4349                 return -ENOMEM;
4350
4351         /* init sm info */
4352         sbi->sm_info = sm_info;
4353         sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
4354         sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
4355         sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
4356         sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
4357         sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
4358         sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
4359         sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
4360         sm_info->rec_prefree_segments = sm_info->main_segments *
4361                                         DEF_RECLAIM_PREFREE_SEGMENTS / 100;
4362         if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
4363                 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
4364
4365         if (!test_opt(sbi, LFS))
4366                 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
4367         sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
4368         sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
4369         sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
4370         sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
4371         sm_info->min_ssr_sections = reserved_sections(sbi);
4372
4373         INIT_LIST_HEAD(&sm_info->sit_entry_set);
4374
4375         init_rwsem(&sm_info->curseg_lock);
4376
4377         if (!f2fs_readonly(sbi->sb)) {
4378                 err = f2fs_create_flush_cmd_control(sbi);
4379                 if (err)
4380                         return err;
4381         }
4382
4383         err = create_discard_cmd_control(sbi);
4384         if (err)
4385                 return err;
4386
4387         err = build_sit_info(sbi);
4388         if (err)
4389                 return err;
4390         err = build_free_segmap(sbi);
4391         if (err)
4392                 return err;
4393         err = build_curseg(sbi);
4394         if (err)
4395                 return err;
4396
4397         /* reinit free segmap based on SIT */
4398         err = build_sit_entries(sbi);
4399         if (err)
4400                 return err;
4401
4402         init_free_segmap(sbi);
4403         err = build_dirty_segmap(sbi);
4404         if (err)
4405                 return err;
4406
4407         init_min_max_mtime(sbi);
4408         return 0;
4409 }
4410
4411 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
4412                 enum dirty_type dirty_type)
4413 {
4414         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4415
4416         mutex_lock(&dirty_i->seglist_lock);
4417         kvfree(dirty_i->dirty_segmap[dirty_type]);
4418         dirty_i->nr_dirty[dirty_type] = 0;
4419         mutex_unlock(&dirty_i->seglist_lock);
4420 }
4421
4422 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
4423 {
4424         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4425         kvfree(dirty_i->victim_secmap);
4426 }
4427
4428 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
4429 {
4430         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4431         int i;
4432
4433         if (!dirty_i)
4434                 return;
4435
4436         /* discard pre-free/dirty segments list */
4437         for (i = 0; i < NR_DIRTY_TYPE; i++)
4438                 discard_dirty_segmap(sbi, i);
4439
4440         destroy_victim_secmap(sbi);
4441         SM_I(sbi)->dirty_info = NULL;
4442         kvfree(dirty_i);
4443 }
4444
4445 static void destroy_curseg(struct f2fs_sb_info *sbi)
4446 {
4447         struct curseg_info *array = SM_I(sbi)->curseg_array;
4448         int i;
4449
4450         if (!array)
4451                 return;
4452         SM_I(sbi)->curseg_array = NULL;
4453         for (i = 0; i < NR_CURSEG_TYPE; i++) {
4454                 kvfree(array[i].sum_blk);
4455                 kvfree(array[i].journal);
4456         }
4457         kvfree(array);
4458 }
4459
4460 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
4461 {
4462         struct free_segmap_info *free_i = SM_I(sbi)->free_info;
4463         if (!free_i)
4464                 return;
4465         SM_I(sbi)->free_info = NULL;
4466         kvfree(free_i->free_segmap);
4467         kvfree(free_i->free_secmap);
4468         kvfree(free_i);
4469 }
4470
4471 static void destroy_sit_info(struct f2fs_sb_info *sbi)
4472 {
4473         struct sit_info *sit_i = SIT_I(sbi);
4474         unsigned int start;
4475
4476         if (!sit_i)
4477                 return;
4478
4479         if (sit_i->sentries) {
4480                 for (start = 0; start < MAIN_SEGS(sbi); start++) {
4481                         kvfree(sit_i->sentries[start].cur_valid_map);
4482 #ifdef CONFIG_F2FS_CHECK_FS
4483                         kvfree(sit_i->sentries[start].cur_valid_map_mir);
4484 #endif
4485                         kvfree(sit_i->sentries[start].ckpt_valid_map);
4486                         kvfree(sit_i->sentries[start].discard_map);
4487                 }
4488         }
4489         kvfree(sit_i->tmp_map);
4490
4491         kvfree(sit_i->sentries);
4492         kvfree(sit_i->sec_entries);
4493         kvfree(sit_i->dirty_sentries_bitmap);
4494
4495         SM_I(sbi)->sit_info = NULL;
4496         kvfree(sit_i->sit_bitmap);
4497 #ifdef CONFIG_F2FS_CHECK_FS
4498         kvfree(sit_i->sit_bitmap_mir);
4499 #endif
4500         kvfree(sit_i);
4501 }
4502
4503 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
4504 {
4505         struct f2fs_sm_info *sm_info = SM_I(sbi);
4506
4507         if (!sm_info)
4508                 return;
4509         f2fs_destroy_flush_cmd_control(sbi, true);
4510         destroy_discard_cmd_control(sbi);
4511         destroy_dirty_segmap(sbi);
4512         destroy_curseg(sbi);
4513         destroy_free_segmap(sbi);
4514         destroy_sit_info(sbi);
4515         sbi->sm_info = NULL;
4516         kvfree(sm_info);
4517 }
4518
4519 int __init f2fs_create_segment_manager_caches(void)
4520 {
4521         discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
4522                         sizeof(struct discard_entry));
4523         if (!discard_entry_slab)
4524                 goto fail;
4525
4526         discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
4527                         sizeof(struct discard_cmd));
4528         if (!discard_cmd_slab)
4529                 goto destroy_discard_entry;
4530
4531         sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
4532                         sizeof(struct sit_entry_set));
4533         if (!sit_entry_set_slab)
4534                 goto destroy_discard_cmd;
4535
4536         inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
4537                         sizeof(struct inmem_pages));
4538         if (!inmem_entry_slab)
4539                 goto destroy_sit_entry_set;
4540         return 0;
4541
4542 destroy_sit_entry_set:
4543         kmem_cache_destroy(sit_entry_set_slab);
4544 destroy_discard_cmd:
4545         kmem_cache_destroy(discard_cmd_slab);
4546 destroy_discard_entry:
4547         kmem_cache_destroy(discard_entry_slab);
4548 fail:
4549         return -ENOMEM;
4550 }
4551
4552 void f2fs_destroy_segment_manager_caches(void)
4553 {
4554         kmem_cache_destroy(sit_entry_set_slab);
4555         kmem_cache_destroy(discard_cmd_slab);
4556         kmem_cache_destroy(discard_entry_slab);
4557         kmem_cache_destroy(inmem_entry_slab);
4558 }