OSDN Git Service

f2fs: introduce DATA_GENERIC_ENHANCE
[sagit-ice-cold/kernel_xiaomi_msm8998.git] / fs / f2fs / segment.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fs/f2fs/segment.c
4  *
5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6  *             http://www.samsung.com/
7  */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/bio.h>
11 #include <linux/blkdev.h>
12 #include <linux/prefetch.h>
13 #include <linux/kthread.h>
14 #include <linux/swap.h>
15 #include <linux/timer.h>
16 #include <linux/freezer.h>
17 #include <linux/sched.h>
18
19 #include "f2fs.h"
20 #include "segment.h"
21 #include "node.h"
22 #include "gc.h"
23 #include "trace.h"
24 #include <trace/events/f2fs.h>
25
26 #define __reverse_ffz(x) __reverse_ffs(~(x))
27
28 static struct kmem_cache *discard_entry_slab;
29 static struct kmem_cache *discard_cmd_slab;
30 static struct kmem_cache *sit_entry_set_slab;
31 static struct kmem_cache *inmem_entry_slab;
32
33 static unsigned long __reverse_ulong(unsigned char *str)
34 {
35         unsigned long tmp = 0;
36         int shift = 24, idx = 0;
37
38 #if BITS_PER_LONG == 64
39         shift = 56;
40 #endif
41         while (shift >= 0) {
42                 tmp |= (unsigned long)str[idx++] << shift;
43                 shift -= BITS_PER_BYTE;
44         }
45         return tmp;
46 }
47
48 /*
49  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
50  * MSB and LSB are reversed in a byte by f2fs_set_bit.
51  */
52 static inline unsigned long __reverse_ffs(unsigned long word)
53 {
54         int num = 0;
55
56 #if BITS_PER_LONG == 64
57         if ((word & 0xffffffff00000000UL) == 0)
58                 num += 32;
59         else
60                 word >>= 32;
61 #endif
62         if ((word & 0xffff0000) == 0)
63                 num += 16;
64         else
65                 word >>= 16;
66
67         if ((word & 0xff00) == 0)
68                 num += 8;
69         else
70                 word >>= 8;
71
72         if ((word & 0xf0) == 0)
73                 num += 4;
74         else
75                 word >>= 4;
76
77         if ((word & 0xc) == 0)
78                 num += 2;
79         else
80                 word >>= 2;
81
82         if ((word & 0x2) == 0)
83                 num += 1;
84         return num;
85 }
86
87 /*
88  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
89  * f2fs_set_bit makes MSB and LSB reversed in a byte.
90  * @size must be integral times of unsigned long.
91  * Example:
92  *                             MSB <--> LSB
93  *   f2fs_set_bit(0, bitmap) => 1000 0000
94  *   f2fs_set_bit(7, bitmap) => 0000 0001
95  */
96 static unsigned long __find_rev_next_bit(const unsigned long *addr,
97                         unsigned long size, unsigned long offset)
98 {
99         const unsigned long *p = addr + BIT_WORD(offset);
100         unsigned long result = size;
101         unsigned long tmp;
102
103         if (offset >= size)
104                 return size;
105
106         size -= (offset & ~(BITS_PER_LONG - 1));
107         offset %= BITS_PER_LONG;
108
109         while (1) {
110                 if (*p == 0)
111                         goto pass;
112
113                 tmp = __reverse_ulong((unsigned char *)p);
114
115                 tmp &= ~0UL >> offset;
116                 if (size < BITS_PER_LONG)
117                         tmp &= (~0UL << (BITS_PER_LONG - size));
118                 if (tmp)
119                         goto found;
120 pass:
121                 if (size <= BITS_PER_LONG)
122                         break;
123                 size -= BITS_PER_LONG;
124                 offset = 0;
125                 p++;
126         }
127         return result;
128 found:
129         return result - size + __reverse_ffs(tmp);
130 }
131
132 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
133                         unsigned long size, unsigned long offset)
134 {
135         const unsigned long *p = addr + BIT_WORD(offset);
136         unsigned long result = size;
137         unsigned long tmp;
138
139         if (offset >= size)
140                 return size;
141
142         size -= (offset & ~(BITS_PER_LONG - 1));
143         offset %= BITS_PER_LONG;
144
145         while (1) {
146                 if (*p == ~0UL)
147                         goto pass;
148
149                 tmp = __reverse_ulong((unsigned char *)p);
150
151                 if (offset)
152                         tmp |= ~0UL << (BITS_PER_LONG - offset);
153                 if (size < BITS_PER_LONG)
154                         tmp |= ~0UL >> size;
155                 if (tmp != ~0UL)
156                         goto found;
157 pass:
158                 if (size <= BITS_PER_LONG)
159                         break;
160                 size -= BITS_PER_LONG;
161                 offset = 0;
162                 p++;
163         }
164         return result;
165 found:
166         return result - size + __reverse_ffz(tmp);
167 }
168
169 bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
170 {
171         int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
172         int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
173         int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
174
175         if (test_opt(sbi, LFS))
176                 return false;
177         if (sbi->gc_mode == GC_URGENT)
178                 return true;
179         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
180                 return true;
181
182         return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
183                         SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
184 }
185
186 void f2fs_register_inmem_page(struct inode *inode, struct page *page)
187 {
188         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
189         struct f2fs_inode_info *fi = F2FS_I(inode);
190         struct inmem_pages *new;
191
192         f2fs_trace_pid(page);
193
194         f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
195
196         new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
197
198         /* add atomic page indices to the list */
199         new->page = page;
200         INIT_LIST_HEAD(&new->list);
201
202         /* increase reference count with clean state */
203         mutex_lock(&fi->inmem_lock);
204         get_page(page);
205         list_add_tail(&new->list, &fi->inmem_pages);
206         spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
207         if (list_empty(&fi->inmem_ilist))
208                 list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
209         spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
210         inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
211         mutex_unlock(&fi->inmem_lock);
212
213         trace_f2fs_register_inmem_page(page, INMEM);
214 }
215
216 static int __revoke_inmem_pages(struct inode *inode,
217                                 struct list_head *head, bool drop, bool recover,
218                                 bool trylock)
219 {
220         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
221         struct inmem_pages *cur, *tmp;
222         int err = 0;
223
224         list_for_each_entry_safe(cur, tmp, head, list) {
225                 struct page *page = cur->page;
226
227                 if (drop)
228                         trace_f2fs_commit_inmem_page(page, INMEM_DROP);
229
230                 if (trylock) {
231                         /*
232                          * to avoid deadlock in between page lock and
233                          * inmem_lock.
234                          */
235                         if (!trylock_page(page))
236                                 continue;
237                 } else {
238                         lock_page(page);
239                 }
240
241                 f2fs_wait_on_page_writeback(page, DATA, true, true);
242
243                 if (recover) {
244                         struct dnode_of_data dn;
245                         struct node_info ni;
246
247                         trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
248 retry:
249                         set_new_dnode(&dn, inode, NULL, NULL, 0);
250                         err = f2fs_get_dnode_of_data(&dn, page->index,
251                                                                 LOOKUP_NODE);
252                         if (err) {
253                                 if (err == -ENOMEM) {
254                                         congestion_wait(BLK_RW_ASYNC, HZ/50);
255                                         cond_resched();
256                                         goto retry;
257                                 }
258                                 err = -EAGAIN;
259                                 goto next;
260                         }
261
262                         err = f2fs_get_node_info(sbi, dn.nid, &ni);
263                         if (err) {
264                                 f2fs_put_dnode(&dn);
265                                 return err;
266                         }
267
268                         if (cur->old_addr == NEW_ADDR) {
269                                 f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
270                                 f2fs_update_data_blkaddr(&dn, NEW_ADDR);
271                         } else
272                                 f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
273                                         cur->old_addr, ni.version, true, true);
274                         f2fs_put_dnode(&dn);
275                 }
276 next:
277                 /* we don't need to invalidate this in the sccessful status */
278                 if (drop || recover) {
279                         ClearPageUptodate(page);
280                         clear_cold_data(page);
281                 }
282                 f2fs_clear_page_private(page);
283                 f2fs_put_page(page, 1);
284
285                 list_del(&cur->list);
286                 kmem_cache_free(inmem_entry_slab, cur);
287                 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
288         }
289         return err;
290 }
291
292 void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
293 {
294         struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
295         struct inode *inode;
296         struct f2fs_inode_info *fi;
297 next:
298         spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
299         if (list_empty(head)) {
300                 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
301                 return;
302         }
303         fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
304         inode = igrab(&fi->vfs_inode);
305         spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
306
307         if (inode) {
308                 if (gc_failure) {
309                         if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
310                                 goto drop;
311                         goto skip;
312                 }
313 drop:
314                 set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
315                 f2fs_drop_inmem_pages(inode);
316                 iput(inode);
317         }
318 skip:
319         congestion_wait(BLK_RW_ASYNC, HZ/50);
320         cond_resched();
321         goto next;
322 }
323
324 void f2fs_drop_inmem_pages(struct inode *inode)
325 {
326         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
327         struct f2fs_inode_info *fi = F2FS_I(inode);
328
329         while (!list_empty(&fi->inmem_pages)) {
330                 mutex_lock(&fi->inmem_lock);
331                 __revoke_inmem_pages(inode, &fi->inmem_pages,
332                                                 true, false, true);
333
334                 if (list_empty(&fi->inmem_pages)) {
335                         spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
336                         if (!list_empty(&fi->inmem_ilist))
337                                 list_del_init(&fi->inmem_ilist);
338                         spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
339                 }
340                 mutex_unlock(&fi->inmem_lock);
341         }
342
343         clear_inode_flag(inode, FI_ATOMIC_FILE);
344         fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
345         stat_dec_atomic_write(inode);
346 }
347
348 void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
349 {
350         struct f2fs_inode_info *fi = F2FS_I(inode);
351         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
352         struct list_head *head = &fi->inmem_pages;
353         struct inmem_pages *cur = NULL;
354
355         f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page));
356
357         mutex_lock(&fi->inmem_lock);
358         list_for_each_entry(cur, head, list) {
359                 if (cur->page == page)
360                         break;
361         }
362
363         f2fs_bug_on(sbi, list_empty(head) || cur->page != page);
364         list_del(&cur->list);
365         mutex_unlock(&fi->inmem_lock);
366
367         dec_page_count(sbi, F2FS_INMEM_PAGES);
368         kmem_cache_free(inmem_entry_slab, cur);
369
370         ClearPageUptodate(page);
371         f2fs_clear_page_private(page);
372         f2fs_put_page(page, 0);
373
374         trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
375 }
376
377 static int __f2fs_commit_inmem_pages(struct inode *inode)
378 {
379         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
380         struct f2fs_inode_info *fi = F2FS_I(inode);
381         struct inmem_pages *cur, *tmp;
382         struct f2fs_io_info fio = {
383                 .sbi = sbi,
384                 .ino = inode->i_ino,
385                 .type = DATA,
386                 .op = REQ_OP_WRITE,
387                 .op_flags = REQ_SYNC | REQ_PRIO,
388                 .io_type = FS_DATA_IO,
389         };
390         struct list_head revoke_list;
391         bool submit_bio = false;
392         int err = 0;
393
394         INIT_LIST_HEAD(&revoke_list);
395
396         list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
397                 struct page *page = cur->page;
398
399                 lock_page(page);
400                 if (page->mapping == inode->i_mapping) {
401                         trace_f2fs_commit_inmem_page(page, INMEM);
402
403                         f2fs_wait_on_page_writeback(page, DATA, true, true);
404
405                         set_page_dirty(page);
406                         if (clear_page_dirty_for_io(page)) {
407                                 inode_dec_dirty_pages(inode);
408                                 f2fs_remove_dirty_inode(inode);
409                         }
410 retry:
411                         fio.page = page;
412                         fio.old_blkaddr = NULL_ADDR;
413                         fio.encrypted_page = NULL;
414                         fio.need_lock = LOCK_DONE;
415                         err = f2fs_do_write_data_page(&fio);
416                         if (err) {
417                                 if (err == -ENOMEM) {
418                                         congestion_wait(BLK_RW_ASYNC, HZ/50);
419                                         cond_resched();
420                                         goto retry;
421                                 }
422                                 unlock_page(page);
423                                 break;
424                         }
425                         /* record old blkaddr for revoking */
426                         cur->old_addr = fio.old_blkaddr;
427                         submit_bio = true;
428                 }
429                 unlock_page(page);
430                 list_move_tail(&cur->list, &revoke_list);
431         }
432
433         if (submit_bio)
434                 f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
435
436         if (err) {
437                 /*
438                  * try to revoke all committed pages, but still we could fail
439                  * due to no memory or other reason, if that happened, EAGAIN
440                  * will be returned, which means in such case, transaction is
441                  * already not integrity, caller should use journal to do the
442                  * recovery or rewrite & commit last transaction. For other
443                  * error number, revoking was done by filesystem itself.
444                  */
445                 err = __revoke_inmem_pages(inode, &revoke_list,
446                                                 false, true, false);
447
448                 /* drop all uncommitted pages */
449                 __revoke_inmem_pages(inode, &fi->inmem_pages,
450                                                 true, false, false);
451         } else {
452                 __revoke_inmem_pages(inode, &revoke_list,
453                                                 false, false, false);
454         }
455
456         return err;
457 }
458
459 int f2fs_commit_inmem_pages(struct inode *inode)
460 {
461         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
462         struct f2fs_inode_info *fi = F2FS_I(inode);
463         int err;
464
465         f2fs_balance_fs(sbi, true);
466
467         down_write(&fi->i_gc_rwsem[WRITE]);
468
469         f2fs_lock_op(sbi);
470         set_inode_flag(inode, FI_ATOMIC_COMMIT);
471
472         mutex_lock(&fi->inmem_lock);
473         err = __f2fs_commit_inmem_pages(inode);
474
475         spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
476         if (!list_empty(&fi->inmem_ilist))
477                 list_del_init(&fi->inmem_ilist);
478         spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
479         mutex_unlock(&fi->inmem_lock);
480
481         clear_inode_flag(inode, FI_ATOMIC_COMMIT);
482
483         f2fs_unlock_op(sbi);
484         up_write(&fi->i_gc_rwsem[WRITE]);
485
486         return err;
487 }
488
489 /*
490  * This function balances dirty node and dentry pages.
491  * In addition, it controls garbage collection.
492  */
493 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
494 {
495         if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
496                 f2fs_show_injection_info(FAULT_CHECKPOINT);
497                 f2fs_stop_checkpoint(sbi, false);
498         }
499
500         /* balance_fs_bg is able to be pending */
501         if (need && excess_cached_nats(sbi))
502                 f2fs_balance_fs_bg(sbi);
503
504         if (f2fs_is_checkpoint_ready(sbi))
505                 return;
506
507         /*
508          * We should do GC or end up with checkpoint, if there are so many dirty
509          * dir/node pages without enough free segments.
510          */
511         if (has_not_enough_free_secs(sbi, 0, 0)) {
512                 mutex_lock(&sbi->gc_mutex);
513                 f2fs_gc(sbi, false, false, NULL_SEGNO);
514         }
515 }
516
517 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
518 {
519         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
520                 return;
521
522         /* try to shrink extent cache when there is no enough memory */
523         if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
524                 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
525
526         /* check the # of cached NAT entries */
527         if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
528                 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
529
530         if (!f2fs_available_free_memory(sbi, FREE_NIDS))
531                 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
532         else
533                 f2fs_build_free_nids(sbi, false, false);
534
535         if (!is_idle(sbi, REQ_TIME) &&
536                 (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
537                 return;
538
539         /* checkpoint is the only way to shrink partial cached entries */
540         if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
541                         !f2fs_available_free_memory(sbi, INO_ENTRIES) ||
542                         excess_prefree_segs(sbi) ||
543                         excess_dirty_nats(sbi) ||
544                         excess_dirty_nodes(sbi) ||
545                         f2fs_time_over(sbi, CP_TIME)) {
546                 if (test_opt(sbi, DATA_FLUSH)) {
547                         struct blk_plug plug;
548
549                         blk_start_plug(&plug);
550                         f2fs_sync_dirty_inodes(sbi, FILE_INODE);
551                         blk_finish_plug(&plug);
552                 }
553                 f2fs_sync_fs(sbi->sb, true);
554                 stat_inc_bg_cp_count(sbi->stat_info);
555         }
556 }
557
558 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
559                                 struct block_device *bdev)
560 {
561         struct bio *bio;
562         int ret;
563
564         bio = f2fs_bio_alloc(sbi, 0, false);
565         if (!bio)
566                 return -ENOMEM;
567
568         bio->bi_rw = REQ_OP_WRITE;
569         bio->bi_bdev = bdev;
570         ret = submit_bio_wait(WRITE_FLUSH, bio);
571         bio_put(bio);
572
573         trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
574                                 test_opt(sbi, FLUSH_MERGE), ret);
575         return ret;
576 }
577
578 static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
579 {
580         int ret = 0;
581         int i;
582
583         if (!f2fs_is_multi_device(sbi))
584                 return __submit_flush_wait(sbi, sbi->sb->s_bdev);
585
586         for (i = 0; i < sbi->s_ndevs; i++) {
587                 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
588                         continue;
589                 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
590                 if (ret)
591                         break;
592         }
593         return ret;
594 }
595
596 static int issue_flush_thread(void *data)
597 {
598         struct f2fs_sb_info *sbi = data;
599         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
600         wait_queue_head_t *q = &fcc->flush_wait_queue;
601 repeat:
602         if (kthread_should_stop())
603                 return 0;
604
605         sb_start_intwrite(sbi->sb);
606
607         if (!llist_empty(&fcc->issue_list)) {
608                 struct flush_cmd *cmd, *next;
609                 int ret;
610
611                 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
612                 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
613
614                 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
615
616                 ret = submit_flush_wait(sbi, cmd->ino);
617                 atomic_inc(&fcc->issued_flush);
618
619                 llist_for_each_entry_safe(cmd, next,
620                                           fcc->dispatch_list, llnode) {
621                         cmd->ret = ret;
622                         complete(&cmd->wait);
623                 }
624                 fcc->dispatch_list = NULL;
625         }
626
627         sb_end_intwrite(sbi->sb);
628
629         wait_event_interruptible(*q,
630                 kthread_should_stop() || !llist_empty(&fcc->issue_list));
631         goto repeat;
632 }
633
634 int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
635 {
636         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
637         struct flush_cmd cmd;
638         int ret;
639
640         if (test_opt(sbi, NOBARRIER))
641                 return 0;
642
643         if (!test_opt(sbi, FLUSH_MERGE)) {
644                 atomic_inc(&fcc->queued_flush);
645                 ret = submit_flush_wait(sbi, ino);
646                 atomic_dec(&fcc->queued_flush);
647                 atomic_inc(&fcc->issued_flush);
648                 return ret;
649         }
650
651         if (atomic_inc_return(&fcc->queued_flush) == 1 ||
652             f2fs_is_multi_device(sbi)) {
653                 ret = submit_flush_wait(sbi, ino);
654                 atomic_dec(&fcc->queued_flush);
655
656                 atomic_inc(&fcc->issued_flush);
657                 return ret;
658         }
659
660         cmd.ino = ino;
661         init_completion(&cmd.wait);
662
663         llist_add(&cmd.llnode, &fcc->issue_list);
664
665         /* update issue_list before we wake up issue_flush thread */
666         smp_mb();
667
668         if (waitqueue_active(&fcc->flush_wait_queue))
669                 wake_up(&fcc->flush_wait_queue);
670
671         if (fcc->f2fs_issue_flush) {
672                 wait_for_completion(&cmd.wait);
673                 atomic_dec(&fcc->queued_flush);
674         } else {
675                 struct llist_node *list;
676
677                 list = llist_del_all(&fcc->issue_list);
678                 if (!list) {
679                         wait_for_completion(&cmd.wait);
680                         atomic_dec(&fcc->queued_flush);
681                 } else {
682                         struct flush_cmd *tmp, *next;
683
684                         ret = submit_flush_wait(sbi, ino);
685
686                         llist_for_each_entry_safe(tmp, next, list, llnode) {
687                                 if (tmp == &cmd) {
688                                         cmd.ret = ret;
689                                         atomic_dec(&fcc->queued_flush);
690                                         continue;
691                                 }
692                                 tmp->ret = ret;
693                                 complete(&tmp->wait);
694                         }
695                 }
696         }
697
698         return cmd.ret;
699 }
700
701 int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
702 {
703         dev_t dev = sbi->sb->s_bdev->bd_dev;
704         struct flush_cmd_control *fcc;
705         int err = 0;
706
707         if (SM_I(sbi)->fcc_info) {
708                 fcc = SM_I(sbi)->fcc_info;
709                 if (fcc->f2fs_issue_flush)
710                         return err;
711                 goto init_thread;
712         }
713
714         fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
715         if (!fcc)
716                 return -ENOMEM;
717         atomic_set(&fcc->issued_flush, 0);
718         atomic_set(&fcc->queued_flush, 0);
719         init_waitqueue_head(&fcc->flush_wait_queue);
720         init_llist_head(&fcc->issue_list);
721         SM_I(sbi)->fcc_info = fcc;
722         if (!test_opt(sbi, FLUSH_MERGE))
723                 return err;
724
725 init_thread:
726         fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
727                                 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
728         if (IS_ERR(fcc->f2fs_issue_flush)) {
729                 err = PTR_ERR(fcc->f2fs_issue_flush);
730                 kvfree(fcc);
731                 SM_I(sbi)->fcc_info = NULL;
732                 return err;
733         }
734
735         return err;
736 }
737
738 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
739 {
740         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
741
742         if (fcc && fcc->f2fs_issue_flush) {
743                 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
744
745                 fcc->f2fs_issue_flush = NULL;
746                 kthread_stop(flush_thread);
747         }
748         if (free) {
749                 kvfree(fcc);
750                 SM_I(sbi)->fcc_info = NULL;
751         }
752 }
753
754 int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
755 {
756         int ret = 0, i;
757
758         if (!f2fs_is_multi_device(sbi))
759                 return 0;
760
761         for (i = 1; i < sbi->s_ndevs; i++) {
762                 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
763                         continue;
764                 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
765                 if (ret)
766                         break;
767
768                 spin_lock(&sbi->dev_lock);
769                 f2fs_clear_bit(i, (char *)&sbi->dirty_device);
770                 spin_unlock(&sbi->dev_lock);
771         }
772
773         return ret;
774 }
775
776 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
777                 enum dirty_type dirty_type)
778 {
779         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
780
781         /* need not be added */
782         if (IS_CURSEG(sbi, segno))
783                 return;
784
785         if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
786                 dirty_i->nr_dirty[dirty_type]++;
787
788         if (dirty_type == DIRTY) {
789                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
790                 enum dirty_type t = sentry->type;
791
792                 if (unlikely(t >= DIRTY)) {
793                         f2fs_bug_on(sbi, 1);
794                         return;
795                 }
796                 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
797                         dirty_i->nr_dirty[t]++;
798         }
799 }
800
801 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
802                 enum dirty_type dirty_type)
803 {
804         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
805
806         if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
807                 dirty_i->nr_dirty[dirty_type]--;
808
809         if (dirty_type == DIRTY) {
810                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
811                 enum dirty_type t = sentry->type;
812
813                 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
814                         dirty_i->nr_dirty[t]--;
815
816                 if (get_valid_blocks(sbi, segno, true) == 0)
817                         clear_bit(GET_SEC_FROM_SEG(sbi, segno),
818                                                 dirty_i->victim_secmap);
819         }
820 }
821
822 /*
823  * Should not occur error such as -ENOMEM.
824  * Adding dirty entry into seglist is not critical operation.
825  * If a given segment is one of current working segments, it won't be added.
826  */
827 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
828 {
829         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
830         unsigned short valid_blocks, ckpt_valid_blocks;
831
832         if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
833                 return;
834
835         mutex_lock(&dirty_i->seglist_lock);
836
837         valid_blocks = get_valid_blocks(sbi, segno, false);
838         ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
839
840         if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
841                                 ckpt_valid_blocks == sbi->blocks_per_seg)) {
842                 __locate_dirty_segment(sbi, segno, PRE);
843                 __remove_dirty_segment(sbi, segno, DIRTY);
844         } else if (valid_blocks < sbi->blocks_per_seg) {
845                 __locate_dirty_segment(sbi, segno, DIRTY);
846         } else {
847                 /* Recovery routine with SSR needs this */
848                 __remove_dirty_segment(sbi, segno, DIRTY);
849         }
850
851         mutex_unlock(&dirty_i->seglist_lock);
852 }
853
854 /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
855 void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
856 {
857         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
858         unsigned int segno;
859
860         mutex_lock(&dirty_i->seglist_lock);
861         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
862                 if (get_valid_blocks(sbi, segno, false))
863                         continue;
864                 if (IS_CURSEG(sbi, segno))
865                         continue;
866                 __locate_dirty_segment(sbi, segno, PRE);
867                 __remove_dirty_segment(sbi, segno, DIRTY);
868         }
869         mutex_unlock(&dirty_i->seglist_lock);
870 }
871
872 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
873 {
874         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
875         block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
876         block_t holes[2] = {0, 0};      /* DATA and NODE */
877         struct seg_entry *se;
878         unsigned int segno;
879
880         mutex_lock(&dirty_i->seglist_lock);
881         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
882                 se = get_seg_entry(sbi, segno);
883                 if (IS_NODESEG(se->type))
884                         holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
885                 else
886                         holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
887         }
888         mutex_unlock(&dirty_i->seglist_lock);
889
890         if (holes[DATA] > ovp || holes[NODE] > ovp)
891                 return -EAGAIN;
892         if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
893                 dirty_segments(sbi) > overprovision_segments(sbi))
894                 return -EAGAIN;
895         return 0;
896 }
897
898 /* This is only used by SBI_CP_DISABLED */
899 static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
900 {
901         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
902         unsigned int segno = 0;
903
904         mutex_lock(&dirty_i->seglist_lock);
905         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
906                 if (get_valid_blocks(sbi, segno, false))
907                         continue;
908                 if (get_ckpt_valid_blocks(sbi, segno))
909                         continue;
910                 mutex_unlock(&dirty_i->seglist_lock);
911                 return segno;
912         }
913         mutex_unlock(&dirty_i->seglist_lock);
914         return NULL_SEGNO;
915 }
916
917 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
918                 struct block_device *bdev, block_t lstart,
919                 block_t start, block_t len)
920 {
921         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
922         struct list_head *pend_list;
923         struct discard_cmd *dc;
924
925         f2fs_bug_on(sbi, !len);
926
927         pend_list = &dcc->pend_list[plist_idx(len)];
928
929         dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
930         INIT_LIST_HEAD(&dc->list);
931         dc->bdev = bdev;
932         dc->lstart = lstart;
933         dc->start = start;
934         dc->len = len;
935         dc->ref = 0;
936         dc->state = D_PREP;
937         dc->queued = 0;
938         dc->error = 0;
939         init_completion(&dc->wait);
940         list_add_tail(&dc->list, pend_list);
941         spin_lock_init(&dc->lock);
942         dc->bio_ref = 0;
943         atomic_inc(&dcc->discard_cmd_cnt);
944         dcc->undiscard_blks += len;
945
946         return dc;
947 }
948
949 static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
950                                 struct block_device *bdev, block_t lstart,
951                                 block_t start, block_t len,
952                                 struct rb_node *parent, struct rb_node **p)
953 {
954         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
955         struct discard_cmd *dc;
956
957         dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
958
959         rb_link_node(&dc->rb_node, parent, p);
960         rb_insert_color(&dc->rb_node, &dcc->root);
961
962         return dc;
963 }
964
965 static void __detach_discard_cmd(struct discard_cmd_control *dcc,
966                                                         struct discard_cmd *dc)
967 {
968         if (dc->state == D_DONE)
969                 atomic_sub(dc->queued, &dcc->queued_discard);
970
971         list_del(&dc->list);
972         rb_erase(&dc->rb_node, &dcc->root);
973         dcc->undiscard_blks -= dc->len;
974
975         kmem_cache_free(discard_cmd_slab, dc);
976
977         atomic_dec(&dcc->discard_cmd_cnt);
978 }
979
980 static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
981                                                         struct discard_cmd *dc)
982 {
983         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
984         unsigned long flags;
985
986         trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
987
988         spin_lock_irqsave(&dc->lock, flags);
989         if (dc->bio_ref) {
990                 spin_unlock_irqrestore(&dc->lock, flags);
991                 return;
992         }
993         spin_unlock_irqrestore(&dc->lock, flags);
994
995         f2fs_bug_on(sbi, dc->ref);
996
997         if (dc->error == -EOPNOTSUPP)
998                 dc->error = 0;
999
1000         if (dc->error)
1001                 printk_ratelimited(
1002                         "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d",
1003                         KERN_INFO, dc->lstart, dc->start, dc->len, dc->error);
1004         __detach_discard_cmd(dcc, dc);
1005 }
1006
1007 static void f2fs_submit_discard_endio(struct bio *bio)
1008 {
1009         struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
1010         unsigned long flags;
1011
1012         dc->error = bio->bi_error;
1013
1014         spin_lock_irqsave(&dc->lock, flags);
1015         dc->bio_ref--;
1016         if (!dc->bio_ref && dc->state == D_SUBMIT) {
1017                 dc->state = D_DONE;
1018                 complete_all(&dc->wait);
1019         }
1020         spin_unlock_irqrestore(&dc->lock, flags);
1021         bio_put(bio);
1022 }
1023
1024 /* copied from block/blk-lib.c in 4.10-rc1 */
1025 static int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
1026                 sector_t nr_sects, gfp_t gfp_mask, int flags,
1027                 struct bio **biop)
1028 {
1029         struct request_queue *q = bdev_get_queue(bdev);
1030         struct bio *bio = *biop;
1031         unsigned int granularity;
1032         int op = REQ_WRITE | REQ_DISCARD;
1033         int alignment;
1034         sector_t bs_mask;
1035
1036         if (!q)
1037                 return -ENXIO;
1038
1039         if (!blk_queue_discard(q))
1040                 return -EOPNOTSUPP;
1041
1042         if (flags & BLKDEV_DISCARD_SECURE) {
1043                 if (!blk_queue_secdiscard(q))
1044                         return -EOPNOTSUPP;
1045                 op |= REQ_SECURE;
1046         }
1047
1048         bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
1049         if ((sector | nr_sects) & bs_mask)
1050                 return -EINVAL;
1051
1052         /* Zero-sector (unknown) and one-sector granularities are the same.  */
1053         granularity = max(q->limits.discard_granularity >> 9, 1U);
1054         alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
1055
1056         while (nr_sects) {
1057                 unsigned int req_sects;
1058                 sector_t end_sect, tmp;
1059
1060                 /* Make sure bi_size doesn't overflow */
1061                 req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);
1062
1063                 /**
1064                  * If splitting a request, and the next starting sector would be
1065                  * misaligned, stop the discard at the previous aligned sector.
1066                  */
1067                 end_sect = sector + req_sects;
1068                 tmp = end_sect;
1069                 if (req_sects < nr_sects &&
1070                     sector_div(tmp, granularity) != alignment) {
1071                         end_sect = end_sect - alignment;
1072                         sector_div(end_sect, granularity);
1073                         end_sect = end_sect * granularity + alignment;
1074                         req_sects = end_sect - sector;
1075                 }
1076
1077                 if (bio) {
1078                         int ret = submit_bio_wait(op, bio);
1079                         bio_put(bio);
1080                         if (ret)
1081                                 return ret;
1082                 }
1083                 bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, 1);
1084                 bio->bi_iter.bi_sector = sector;
1085                 bio->bi_bdev = bdev;
1086                 bio_set_op_attrs(bio, op, 0);
1087
1088                 bio->bi_iter.bi_size = req_sects << 9;
1089                 nr_sects -= req_sects;
1090                 sector = end_sect;
1091
1092                 /*
1093                  * We can loop for a long time in here, if someone does
1094                  * full device discards (like mkfs). Be nice and allow
1095                  * us to schedule out to avoid softlocking if preempt
1096                  * is disabled.
1097                  */
1098                 cond_resched();
1099         }
1100
1101         *biop = bio;
1102         return 0;
1103 }
1104
1105 static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
1106                                 block_t start, block_t end)
1107 {
1108 #ifdef CONFIG_F2FS_CHECK_FS
1109         struct seg_entry *sentry;
1110         unsigned int segno;
1111         block_t blk = start;
1112         unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
1113         unsigned long *map;
1114
1115         while (blk < end) {
1116                 segno = GET_SEGNO(sbi, blk);
1117                 sentry = get_seg_entry(sbi, segno);
1118                 offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
1119
1120                 if (end < START_BLOCK(sbi, segno + 1))
1121                         size = GET_BLKOFF_FROM_SEG0(sbi, end);
1122                 else
1123                         size = max_blocks;
1124                 map = (unsigned long *)(sentry->cur_valid_map);
1125                 offset = __find_rev_next_bit(map, size, offset);
1126                 f2fs_bug_on(sbi, offset != size);
1127                 blk = START_BLOCK(sbi, segno + 1);
1128         }
1129 #endif
1130 }
1131
1132 static void __init_discard_policy(struct f2fs_sb_info *sbi,
1133                                 struct discard_policy *dpolicy,
1134                                 int discard_type, unsigned int granularity)
1135 {
1136         /* common policy */
1137         dpolicy->type = discard_type;
1138         dpolicy->sync = true;
1139         dpolicy->ordered = false;
1140         dpolicy->granularity = granularity;
1141
1142         dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
1143         dpolicy->io_aware_gran = MAX_PLIST_NUM;
1144         dpolicy->timeout = 0;
1145
1146         if (discard_type == DPOLICY_BG) {
1147                 dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1148                 dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1149                 dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1150                 dpolicy->io_aware = true;
1151                 dpolicy->sync = false;
1152                 dpolicy->ordered = true;
1153                 if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
1154                         dpolicy->granularity = 1;
1155                         dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1156                 }
1157         } else if (discard_type == DPOLICY_FORCE) {
1158                 dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1159                 dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1160                 dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1161                 dpolicy->io_aware = false;
1162         } else if (discard_type == DPOLICY_FSTRIM) {
1163                 dpolicy->io_aware = false;
1164         } else if (discard_type == DPOLICY_UMOUNT) {
1165                 dpolicy->max_requests = UINT_MAX;
1166                 dpolicy->io_aware = false;
1167                 /* we need to issue all to keep CP_TRIMMED_FLAG */
1168                 dpolicy->granularity = 1;
1169         }
1170 }
1171
1172 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1173                                 struct block_device *bdev, block_t lstart,
1174                                 block_t start, block_t len);
1175 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
1176 static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1177                                                 struct discard_policy *dpolicy,
1178                                                 struct discard_cmd *dc,
1179                                                 unsigned int *issued)
1180 {
1181         struct block_device *bdev = dc->bdev;
1182         struct request_queue *q = bdev_get_queue(bdev);
1183         unsigned int max_discard_blocks =
1184                         SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1185         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1186         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1187                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1188         int flag = dpolicy->sync ? REQ_SYNC : 0;
1189         block_t lstart, start, len, total_len;
1190         int err = 0;
1191
1192         if (dc->state != D_PREP)
1193                 return 0;
1194
1195         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1196                 return 0;
1197
1198         trace_f2fs_issue_discard(bdev, dc->start, dc->len);
1199
1200         lstart = dc->lstart;
1201         start = dc->start;
1202         len = dc->len;
1203         total_len = len;
1204
1205         dc->len = 0;
1206
1207         while (total_len && *issued < dpolicy->max_requests && !err) {
1208                 struct bio *bio = NULL;
1209                 unsigned long flags;
1210                 bool last = true;
1211
1212                 if (len > max_discard_blocks) {
1213                         len = max_discard_blocks;
1214                         last = false;
1215                 }
1216
1217                 (*issued)++;
1218                 if (*issued == dpolicy->max_requests)
1219                         last = true;
1220
1221                 dc->len += len;
1222
1223                 if (time_to_inject(sbi, FAULT_DISCARD)) {
1224                         f2fs_show_injection_info(FAULT_DISCARD);
1225                         err = -EIO;
1226                         goto submit;
1227                 }
1228                 err = __blkdev_issue_discard(bdev,
1229                                         SECTOR_FROM_BLOCK(start),
1230                                         SECTOR_FROM_BLOCK(len),
1231                                         GFP_NOFS, 0, &bio);
1232 submit:
1233                 if (err) {
1234                         spin_lock_irqsave(&dc->lock, flags);
1235                         if (dc->state == D_PARTIAL)
1236                                 dc->state = D_SUBMIT;
1237                         spin_unlock_irqrestore(&dc->lock, flags);
1238
1239                         break;
1240                 }
1241
1242                 f2fs_bug_on(sbi, !bio);
1243
1244                 /*
1245                  * should keep before submission to avoid D_DONE
1246                  * right away
1247                  */
1248                 spin_lock_irqsave(&dc->lock, flags);
1249                 if (last)
1250                         dc->state = D_SUBMIT;
1251                 else
1252                         dc->state = D_PARTIAL;
1253                 dc->bio_ref++;
1254                 spin_unlock_irqrestore(&dc->lock, flags);
1255
1256                 atomic_inc(&dcc->queued_discard);
1257                 dc->queued++;
1258                 list_move_tail(&dc->list, wait_list);
1259
1260                 /* sanity check on discard range */
1261                 __check_sit_bitmap(sbi, lstart, lstart + len);
1262
1263                 bio->bi_private = dc;
1264                 bio->bi_end_io = f2fs_submit_discard_endio;
1265                 submit_bio(flag, bio);
1266
1267                 atomic_inc(&dcc->issued_discard);
1268
1269                 f2fs_update_iostat(sbi, FS_DISCARD, 1);
1270
1271                 lstart += len;
1272                 start += len;
1273                 total_len -= len;
1274                 len = total_len;
1275         }
1276
1277         if (!err && len)
1278                 __update_discard_tree_range(sbi, bdev, lstart, start, len);
1279         return err;
1280 }
1281
1282 static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
1283                                 struct block_device *bdev, block_t lstart,
1284                                 block_t start, block_t len,
1285                                 struct rb_node **insert_p,
1286                                 struct rb_node *insert_parent)
1287 {
1288         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1289         struct rb_node **p;
1290         struct rb_node *parent = NULL;
1291         struct discard_cmd *dc = NULL;
1292
1293         if (insert_p && insert_parent) {
1294                 parent = insert_parent;
1295                 p = insert_p;
1296                 goto do_insert;
1297         }
1298
1299         p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart);
1300 do_insert:
1301         dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p);
1302         if (!dc)
1303                 return NULL;
1304
1305         return dc;
1306 }
1307
1308 static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1309                                                 struct discard_cmd *dc)
1310 {
1311         list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
1312 }
1313
1314 static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1315                                 struct discard_cmd *dc, block_t blkaddr)
1316 {
1317         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1318         struct discard_info di = dc->di;
1319         bool modified = false;
1320
1321         if (dc->state == D_DONE || dc->len == 1) {
1322                 __remove_discard_cmd(sbi, dc);
1323                 return;
1324         }
1325
1326         dcc->undiscard_blks -= di.len;
1327
1328         if (blkaddr > di.lstart) {
1329                 dc->len = blkaddr - dc->lstart;
1330                 dcc->undiscard_blks += dc->len;
1331                 __relocate_discard_cmd(dcc, dc);
1332                 modified = true;
1333         }
1334
1335         if (blkaddr < di.lstart + di.len - 1) {
1336                 if (modified) {
1337                         __insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
1338                                         di.start + blkaddr + 1 - di.lstart,
1339                                         di.lstart + di.len - 1 - blkaddr,
1340                                         NULL, NULL);
1341                 } else {
1342                         dc->lstart++;
1343                         dc->len--;
1344                         dc->start++;
1345                         dcc->undiscard_blks += dc->len;
1346                         __relocate_discard_cmd(dcc, dc);
1347                 }
1348         }
1349 }
1350
1351 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1352                                 struct block_device *bdev, block_t lstart,
1353                                 block_t start, block_t len)
1354 {
1355         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1356         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1357         struct discard_cmd *dc;
1358         struct discard_info di = {0};
1359         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1360         struct request_queue *q = bdev_get_queue(bdev);
1361         unsigned int max_discard_blocks =
1362                         SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1363         block_t end = lstart + len;
1364
1365         dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1366                                         NULL, lstart,
1367                                         (struct rb_entry **)&prev_dc,
1368                                         (struct rb_entry **)&next_dc,
1369                                         &insert_p, &insert_parent, true);
1370         if (dc)
1371                 prev_dc = dc;
1372
1373         if (!prev_dc) {
1374                 di.lstart = lstart;
1375                 di.len = next_dc ? next_dc->lstart - lstart : len;
1376                 di.len = min(di.len, len);
1377                 di.start = start;
1378         }
1379
1380         while (1) {
1381                 struct rb_node *node;
1382                 bool merged = false;
1383                 struct discard_cmd *tdc = NULL;
1384
1385                 if (prev_dc) {
1386                         di.lstart = prev_dc->lstart + prev_dc->len;
1387                         if (di.lstart < lstart)
1388                                 di.lstart = lstart;
1389                         if (di.lstart >= end)
1390                                 break;
1391
1392                         if (!next_dc || next_dc->lstart > end)
1393                                 di.len = end - di.lstart;
1394                         else
1395                                 di.len = next_dc->lstart - di.lstart;
1396                         di.start = start + di.lstart - lstart;
1397                 }
1398
1399                 if (!di.len)
1400                         goto next;
1401
1402                 if (prev_dc && prev_dc->state == D_PREP &&
1403                         prev_dc->bdev == bdev &&
1404                         __is_discard_back_mergeable(&di, &prev_dc->di,
1405                                                         max_discard_blocks)) {
1406                         prev_dc->di.len += di.len;
1407                         dcc->undiscard_blks += di.len;
1408                         __relocate_discard_cmd(dcc, prev_dc);
1409                         di = prev_dc->di;
1410                         tdc = prev_dc;
1411                         merged = true;
1412                 }
1413
1414                 if (next_dc && next_dc->state == D_PREP &&
1415                         next_dc->bdev == bdev &&
1416                         __is_discard_front_mergeable(&di, &next_dc->di,
1417                                                         max_discard_blocks)) {
1418                         next_dc->di.lstart = di.lstart;
1419                         next_dc->di.len += di.len;
1420                         next_dc->di.start = di.start;
1421                         dcc->undiscard_blks += di.len;
1422                         __relocate_discard_cmd(dcc, next_dc);
1423                         if (tdc)
1424                                 __remove_discard_cmd(sbi, tdc);
1425                         merged = true;
1426                 }
1427
1428                 if (!merged) {
1429                         __insert_discard_tree(sbi, bdev, di.lstart, di.start,
1430                                                         di.len, NULL, NULL);
1431                 }
1432  next:
1433                 prev_dc = next_dc;
1434                 if (!prev_dc)
1435                         break;
1436
1437                 node = rb_next(&prev_dc->rb_node);
1438                 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1439         }
1440 }
1441
1442 static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
1443                 struct block_device *bdev, block_t blkstart, block_t blklen)
1444 {
1445         block_t lblkstart = blkstart;
1446
1447         if (!f2fs_bdev_support_discard(bdev))
1448                 return 0;
1449
1450         trace_f2fs_queue_discard(bdev, blkstart, blklen);
1451
1452         if (f2fs_is_multi_device(sbi)) {
1453                 int devi = f2fs_target_device_index(sbi, blkstart);
1454
1455                 blkstart -= FDEV(devi).start_blk;
1456         }
1457         mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1458         __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1459         mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1460         return 0;
1461 }
1462
1463 static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1464                                         struct discard_policy *dpolicy)
1465 {
1466         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1467         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1468         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1469         struct discard_cmd *dc;
1470         struct blk_plug plug;
1471         unsigned int pos = dcc->next_pos;
1472         unsigned int issued = 0;
1473         bool io_interrupted = false;
1474
1475         mutex_lock(&dcc->cmd_lock);
1476         dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1477                                         NULL, pos,
1478                                         (struct rb_entry **)&prev_dc,
1479                                         (struct rb_entry **)&next_dc,
1480                                         &insert_p, &insert_parent, true);
1481         if (!dc)
1482                 dc = next_dc;
1483
1484         blk_start_plug(&plug);
1485
1486         while (dc) {
1487                 struct rb_node *node;
1488                 int err = 0;
1489
1490                 if (dc->state != D_PREP)
1491                         goto next;
1492
1493                 if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
1494                         io_interrupted = true;
1495                         break;
1496                 }
1497
1498                 dcc->next_pos = dc->lstart + dc->len;
1499                 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1500
1501                 if (issued >= dpolicy->max_requests)
1502                         break;
1503 next:
1504                 node = rb_next(&dc->rb_node);
1505                 if (err)
1506                         __remove_discard_cmd(sbi, dc);
1507                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1508         }
1509
1510         blk_finish_plug(&plug);
1511
1512         if (!dc)
1513                 dcc->next_pos = 0;
1514
1515         mutex_unlock(&dcc->cmd_lock);
1516
1517         if (!issued && io_interrupted)
1518                 issued = -1;
1519
1520         return issued;
1521 }
1522
1523 static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1524                                         struct discard_policy *dpolicy)
1525 {
1526         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1527         struct list_head *pend_list;
1528         struct discard_cmd *dc, *tmp;
1529         struct blk_plug plug;
1530         int i, issued = 0;
1531         bool io_interrupted = false;
1532
1533         if (dpolicy->timeout != 0)
1534                 f2fs_update_time(sbi, dpolicy->timeout);
1535
1536         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1537                 if (dpolicy->timeout != 0 &&
1538                                 f2fs_time_over(sbi, dpolicy->timeout))
1539                         break;
1540
1541                 if (i + 1 < dpolicy->granularity)
1542                         break;
1543
1544                 if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
1545                         return __issue_discard_cmd_orderly(sbi, dpolicy);
1546
1547                 pend_list = &dcc->pend_list[i];
1548
1549                 mutex_lock(&dcc->cmd_lock);
1550                 if (list_empty(pend_list))
1551                         goto next;
1552                 if (unlikely(dcc->rbtree_check))
1553                         f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
1554                                                                 &dcc->root));
1555                 blk_start_plug(&plug);
1556                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1557                         f2fs_bug_on(sbi, dc->state != D_PREP);
1558
1559                         if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1560                                                 !is_idle(sbi, DISCARD_TIME)) {
1561                                 io_interrupted = true;
1562                                 break;
1563                         }
1564
1565                         __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1566
1567                         if (issued >= dpolicy->max_requests)
1568                                 break;
1569                 }
1570                 blk_finish_plug(&plug);
1571 next:
1572                 mutex_unlock(&dcc->cmd_lock);
1573
1574                 if (issued >= dpolicy->max_requests || io_interrupted)
1575                         break;
1576         }
1577
1578         if (!issued && io_interrupted)
1579                 issued = -1;
1580
1581         return issued;
1582 }
1583
1584 static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1585 {
1586         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1587         struct list_head *pend_list;
1588         struct discard_cmd *dc, *tmp;
1589         int i;
1590         bool dropped = false;
1591
1592         mutex_lock(&dcc->cmd_lock);
1593         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1594                 pend_list = &dcc->pend_list[i];
1595                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1596                         f2fs_bug_on(sbi, dc->state != D_PREP);
1597                         __remove_discard_cmd(sbi, dc);
1598                         dropped = true;
1599                 }
1600         }
1601         mutex_unlock(&dcc->cmd_lock);
1602
1603         return dropped;
1604 }
1605
1606 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1607 {
1608         __drop_discard_cmd(sbi);
1609 }
1610
1611 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1612                                                         struct discard_cmd *dc)
1613 {
1614         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1615         unsigned int len = 0;
1616
1617         wait_for_completion_io(&dc->wait);
1618         mutex_lock(&dcc->cmd_lock);
1619         f2fs_bug_on(sbi, dc->state != D_DONE);
1620         dc->ref--;
1621         if (!dc->ref) {
1622                 if (!dc->error)
1623                         len = dc->len;
1624                 __remove_discard_cmd(sbi, dc);
1625         }
1626         mutex_unlock(&dcc->cmd_lock);
1627
1628         return len;
1629 }
1630
1631 static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1632                                                 struct discard_policy *dpolicy,
1633                                                 block_t start, block_t end)
1634 {
1635         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1636         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1637                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1638         struct discard_cmd *dc, *tmp;
1639         bool need_wait;
1640         unsigned int trimmed = 0;
1641
1642 next:
1643         need_wait = false;
1644
1645         mutex_lock(&dcc->cmd_lock);
1646         list_for_each_entry_safe(dc, tmp, wait_list, list) {
1647                 if (dc->lstart + dc->len <= start || end <= dc->lstart)
1648                         continue;
1649                 if (dc->len < dpolicy->granularity)
1650                         continue;
1651                 if (dc->state == D_DONE && !dc->ref) {
1652                         wait_for_completion_io(&dc->wait);
1653                         if (!dc->error)
1654                                 trimmed += dc->len;
1655                         __remove_discard_cmd(sbi, dc);
1656                 } else {
1657                         dc->ref++;
1658                         need_wait = true;
1659                         break;
1660                 }
1661         }
1662         mutex_unlock(&dcc->cmd_lock);
1663
1664         if (need_wait) {
1665                 trimmed += __wait_one_discard_bio(sbi, dc);
1666                 goto next;
1667         }
1668
1669         return trimmed;
1670 }
1671
1672 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1673                                                 struct discard_policy *dpolicy)
1674 {
1675         struct discard_policy dp;
1676         unsigned int discard_blks;
1677
1678         if (dpolicy)
1679                 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1680
1681         /* wait all */
1682         __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
1683         discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1684         __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
1685         discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1686
1687         return discard_blks;
1688 }
1689
1690 /* This should be covered by global mutex, &sit_i->sentry_lock */
1691 static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1692 {
1693         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1694         struct discard_cmd *dc;
1695         bool need_wait = false;
1696
1697         mutex_lock(&dcc->cmd_lock);
1698         dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
1699                                                         NULL, blkaddr);
1700         if (dc) {
1701                 if (dc->state == D_PREP) {
1702                         __punch_discard_cmd(sbi, dc, blkaddr);
1703                 } else {
1704                         dc->ref++;
1705                         need_wait = true;
1706                 }
1707         }
1708         mutex_unlock(&dcc->cmd_lock);
1709
1710         if (need_wait)
1711                 __wait_one_discard_bio(sbi, dc);
1712 }
1713
1714 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1715 {
1716         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1717
1718         if (dcc && dcc->f2fs_issue_discard) {
1719                 struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1720
1721                 dcc->f2fs_issue_discard = NULL;
1722                 kthread_stop(discard_thread);
1723         }
1724 }
1725
1726 /* This comes from f2fs_put_super */
1727 bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
1728 {
1729         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1730         struct discard_policy dpolicy;
1731         bool dropped;
1732
1733         __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
1734                                         dcc->discard_granularity);
1735         dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
1736         __issue_discard_cmd(sbi, &dpolicy);
1737         dropped = __drop_discard_cmd(sbi);
1738
1739         /* just to make sure there is no pending discard commands */
1740         __wait_all_discard_cmd(sbi, NULL);
1741
1742         f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1743         return dropped;
1744 }
1745
1746 static int issue_discard_thread(void *data)
1747 {
1748         struct f2fs_sb_info *sbi = data;
1749         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1750         wait_queue_head_t *q = &dcc->discard_wait_queue;
1751         struct discard_policy dpolicy;
1752         unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
1753         int issued;
1754
1755         set_freezable();
1756
1757         do {
1758                 __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
1759                                         dcc->discard_granularity);
1760
1761                 wait_event_interruptible_timeout(*q,
1762                                 kthread_should_stop() || freezing(current) ||
1763                                 dcc->discard_wake,
1764                                 msecs_to_jiffies(wait_ms));
1765
1766                 if (dcc->discard_wake)
1767                         dcc->discard_wake = 0;
1768
1769                 /* clean up pending candidates before going to sleep */
1770                 if (atomic_read(&dcc->queued_discard))
1771                         __wait_all_discard_cmd(sbi, NULL);
1772
1773                 if (try_to_freeze())
1774                         continue;
1775                 if (f2fs_readonly(sbi->sb))
1776                         continue;
1777                 if (kthread_should_stop())
1778                         return 0;
1779                 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
1780                         wait_ms = dpolicy.max_interval;
1781                         continue;
1782                 }
1783
1784                 if (sbi->gc_mode == GC_URGENT)
1785                         __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
1786
1787                 sb_start_intwrite(sbi->sb);
1788
1789                 issued = __issue_discard_cmd(sbi, &dpolicy);
1790                 if (issued > 0) {
1791                         __wait_all_discard_cmd(sbi, &dpolicy);
1792                         wait_ms = dpolicy.min_interval;
1793                 } else if (issued == -1){
1794                         wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
1795                         if (!wait_ms)
1796                                 wait_ms = dpolicy.mid_interval;
1797                 } else {
1798                         wait_ms = dpolicy.max_interval;
1799                 }
1800
1801                 sb_end_intwrite(sbi->sb);
1802
1803         } while (!kthread_should_stop());
1804         return 0;
1805 }
1806
1807 #ifdef CONFIG_BLK_DEV_ZONED
1808 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1809                 struct block_device *bdev, block_t blkstart, block_t blklen)
1810 {
1811         sector_t sector, nr_sects;
1812         block_t lblkstart = blkstart;
1813         int devi = 0;
1814
1815         if (f2fs_is_multi_device(sbi)) {
1816                 devi = f2fs_target_device_index(sbi, blkstart);
1817                 if (blkstart < FDEV(devi).start_blk ||
1818                     blkstart > FDEV(devi).end_blk) {
1819                         f2fs_msg(sbi->sb, KERN_ERR, "Invalid block %x",
1820                                  blkstart);
1821                         return -EIO;
1822                 }
1823                 blkstart -= FDEV(devi).start_blk;
1824         }
1825
1826         /* For sequential zones, reset the zone write pointer */
1827         if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
1828                 sector = SECTOR_FROM_BLOCK(blkstart);
1829                 nr_sects = SECTOR_FROM_BLOCK(blklen);
1830
1831                 if (sector & (bdev_zone_sectors(bdev) - 1) ||
1832                                 nr_sects != bdev_zone_sectors(bdev)) {
1833                         f2fs_msg(sbi->sb, KERN_ERR,
1834                                 "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
1835                                 devi, sbi->s_ndevs ? FDEV(devi).path: "",
1836                                 blkstart, blklen);
1837                         return -EIO;
1838                 }
1839                 trace_f2fs_issue_reset_zone(bdev, blkstart);
1840                 return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
1841         }
1842
1843         /* For conventional zones, use regular discard if supported */
1844         return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1845 }
1846 #endif
1847
1848 static int __issue_discard_async(struct f2fs_sb_info *sbi,
1849                 struct block_device *bdev, block_t blkstart, block_t blklen)
1850 {
1851 #ifdef CONFIG_BLK_DEV_ZONED
1852         if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
1853                 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
1854 #endif
1855         return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
1856 }
1857
1858 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
1859                                 block_t blkstart, block_t blklen)
1860 {
1861         sector_t start = blkstart, len = 0;
1862         struct block_device *bdev;
1863         struct seg_entry *se;
1864         unsigned int offset;
1865         block_t i;
1866         int err = 0;
1867
1868         bdev = f2fs_target_device(sbi, blkstart, NULL);
1869
1870         for (i = blkstart; i < blkstart + blklen; i++, len++) {
1871                 if (i != start) {
1872                         struct block_device *bdev2 =
1873                                 f2fs_target_device(sbi, i, NULL);
1874
1875                         if (bdev2 != bdev) {
1876                                 err = __issue_discard_async(sbi, bdev,
1877                                                 start, len);
1878                                 if (err)
1879                                         return err;
1880                                 bdev = bdev2;
1881                                 start = i;
1882                                 len = 0;
1883                         }
1884                 }
1885
1886                 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
1887                 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
1888
1889                 if (!f2fs_test_and_set_bit(offset, se->discard_map))
1890                         sbi->discard_blks--;
1891         }
1892
1893         if (len)
1894                 err = __issue_discard_async(sbi, bdev, start, len);
1895         return err;
1896 }
1897
1898 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
1899                                                         bool check_only)
1900 {
1901         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
1902         int max_blocks = sbi->blocks_per_seg;
1903         struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
1904         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
1905         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1906         unsigned long *discard_map = (unsigned long *)se->discard_map;
1907         unsigned long *dmap = SIT_I(sbi)->tmp_map;
1908         unsigned int start = 0, end = -1;
1909         bool force = (cpc->reason & CP_DISCARD);
1910         struct discard_entry *de = NULL;
1911         struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
1912         int i;
1913
1914         if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
1915                 return false;
1916
1917         if (!force) {
1918                 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
1919                         SM_I(sbi)->dcc_info->nr_discards >=
1920                                 SM_I(sbi)->dcc_info->max_discards)
1921                         return false;
1922         }
1923
1924         /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
1925         for (i = 0; i < entries; i++)
1926                 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
1927                                 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
1928
1929         while (force || SM_I(sbi)->dcc_info->nr_discards <=
1930                                 SM_I(sbi)->dcc_info->max_discards) {
1931                 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
1932                 if (start >= max_blocks)
1933                         break;
1934
1935                 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
1936                 if (force && start && end != max_blocks
1937                                         && (end - start) < cpc->trim_minlen)
1938                         continue;
1939
1940                 if (check_only)
1941                         return true;
1942
1943                 if (!de) {
1944                         de = f2fs_kmem_cache_alloc(discard_entry_slab,
1945                                                                 GFP_F2FS_ZERO);
1946                         de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
1947                         list_add_tail(&de->list, head);
1948                 }
1949
1950                 for (i = start; i < end; i++)
1951                         __set_bit_le(i, (void *)de->discard_map);
1952
1953                 SM_I(sbi)->dcc_info->nr_discards += end - start;
1954         }
1955         return false;
1956 }
1957
1958 static void release_discard_addr(struct discard_entry *entry)
1959 {
1960         list_del(&entry->list);
1961         kmem_cache_free(discard_entry_slab, entry);
1962 }
1963
1964 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
1965 {
1966         struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
1967         struct discard_entry *entry, *this;
1968
1969         /* drop caches */
1970         list_for_each_entry_safe(entry, this, head, list)
1971                 release_discard_addr(entry);
1972 }
1973
1974 /*
1975  * Should call f2fs_clear_prefree_segments after checkpoint is done.
1976  */
1977 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
1978 {
1979         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1980         unsigned int segno;
1981
1982         mutex_lock(&dirty_i->seglist_lock);
1983         for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
1984                 __set_test_and_free(sbi, segno);
1985         mutex_unlock(&dirty_i->seglist_lock);
1986 }
1987
1988 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
1989                                                 struct cp_control *cpc)
1990 {
1991         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1992         struct list_head *head = &dcc->entry_list;
1993         struct discard_entry *entry, *this;
1994         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1995         unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
1996         unsigned int start = 0, end = -1;
1997         unsigned int secno, start_segno;
1998         bool force = (cpc->reason & CP_DISCARD);
1999         bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
2000
2001         mutex_lock(&dirty_i->seglist_lock);
2002
2003         while (1) {
2004                 int i;
2005
2006                 if (need_align && end != -1)
2007                         end--;
2008                 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
2009                 if (start >= MAIN_SEGS(sbi))
2010                         break;
2011                 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
2012                                                                 start + 1);
2013
2014                 if (need_align) {
2015                         start = rounddown(start, sbi->segs_per_sec);
2016                         end = roundup(end, sbi->segs_per_sec);
2017                 }
2018
2019                 for (i = start; i < end; i++) {
2020                         if (test_and_clear_bit(i, prefree_map))
2021                                 dirty_i->nr_dirty[PRE]--;
2022                 }
2023
2024                 if (!f2fs_realtime_discard_enable(sbi))
2025                         continue;
2026
2027                 if (force && start >= cpc->trim_start &&
2028                                         (end - 1) <= cpc->trim_end)
2029                                 continue;
2030
2031                 if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) {
2032                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
2033                                 (end - start) << sbi->log_blocks_per_seg);
2034                         continue;
2035                 }
2036 next:
2037                 secno = GET_SEC_FROM_SEG(sbi, start);
2038                 start_segno = GET_SEG_FROM_SEC(sbi, secno);
2039                 if (!IS_CURSEC(sbi, secno) &&
2040                         !get_valid_blocks(sbi, start, true))
2041                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
2042                                 sbi->segs_per_sec << sbi->log_blocks_per_seg);
2043
2044                 start = start_segno + sbi->segs_per_sec;
2045                 if (start < end)
2046                         goto next;
2047                 else
2048                         end = start - 1;
2049         }
2050         mutex_unlock(&dirty_i->seglist_lock);
2051
2052         /* send small discards */
2053         list_for_each_entry_safe(entry, this, head, list) {
2054                 unsigned int cur_pos = 0, next_pos, len, total_len = 0;
2055                 bool is_valid = test_bit_le(0, entry->discard_map);
2056
2057 find_next:
2058                 if (is_valid) {
2059                         next_pos = find_next_zero_bit_le(entry->discard_map,
2060                                         sbi->blocks_per_seg, cur_pos);
2061                         len = next_pos - cur_pos;
2062
2063                         if (f2fs_sb_has_blkzoned(sbi) ||
2064                             (force && len < cpc->trim_minlen))
2065                                 goto skip;
2066
2067                         f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
2068                                                                         len);
2069                         total_len += len;
2070                 } else {
2071                         next_pos = find_next_bit_le(entry->discard_map,
2072                                         sbi->blocks_per_seg, cur_pos);
2073                 }
2074 skip:
2075                 cur_pos = next_pos;
2076                 is_valid = !is_valid;
2077
2078                 if (cur_pos < sbi->blocks_per_seg)
2079                         goto find_next;
2080
2081                 release_discard_addr(entry);
2082                 dcc->nr_discards -= total_len;
2083         }
2084
2085         wake_up_discard_thread(sbi, false);
2086 }
2087
2088 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
2089 {
2090         dev_t dev = sbi->sb->s_bdev->bd_dev;
2091         struct discard_cmd_control *dcc;
2092         int err = 0, i;
2093
2094         if (SM_I(sbi)->dcc_info) {
2095                 dcc = SM_I(sbi)->dcc_info;
2096                 goto init_thread;
2097         }
2098
2099         dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
2100         if (!dcc)
2101                 return -ENOMEM;
2102
2103         dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
2104         INIT_LIST_HEAD(&dcc->entry_list);
2105         for (i = 0; i < MAX_PLIST_NUM; i++)
2106                 INIT_LIST_HEAD(&dcc->pend_list[i]);
2107         INIT_LIST_HEAD(&dcc->wait_list);
2108         INIT_LIST_HEAD(&dcc->fstrim_list);
2109         mutex_init(&dcc->cmd_lock);
2110         atomic_set(&dcc->issued_discard, 0);
2111         atomic_set(&dcc->queued_discard, 0);
2112         atomic_set(&dcc->discard_cmd_cnt, 0);
2113         dcc->nr_discards = 0;
2114         dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
2115         dcc->undiscard_blks = 0;
2116         dcc->next_pos = 0;
2117         dcc->root = RB_ROOT;
2118         dcc->rbtree_check = false;
2119
2120         init_waitqueue_head(&dcc->discard_wait_queue);
2121         SM_I(sbi)->dcc_info = dcc;
2122 init_thread:
2123         dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
2124                                 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
2125         if (IS_ERR(dcc->f2fs_issue_discard)) {
2126                 err = PTR_ERR(dcc->f2fs_issue_discard);
2127                 kvfree(dcc);
2128                 SM_I(sbi)->dcc_info = NULL;
2129                 return err;
2130         }
2131
2132         return err;
2133 }
2134
2135 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2136 {
2137         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2138
2139         if (!dcc)
2140                 return;
2141
2142         f2fs_stop_discard_thread(sbi);
2143
2144         kvfree(dcc);
2145         SM_I(sbi)->dcc_info = NULL;
2146 }
2147
2148 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
2149 {
2150         struct sit_info *sit_i = SIT_I(sbi);
2151
2152         if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
2153                 sit_i->dirty_sentries++;
2154                 return false;
2155         }
2156
2157         return true;
2158 }
2159
2160 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
2161                                         unsigned int segno, int modified)
2162 {
2163         struct seg_entry *se = get_seg_entry(sbi, segno);
2164         se->type = type;
2165         if (modified)
2166                 __mark_sit_entry_dirty(sbi, segno);
2167 }
2168
2169 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2170 {
2171         struct seg_entry *se;
2172         unsigned int segno, offset;
2173         long int new_vblocks;
2174         bool exist;
2175 #ifdef CONFIG_F2FS_CHECK_FS
2176         bool mir_exist;
2177 #endif
2178
2179         segno = GET_SEGNO(sbi, blkaddr);
2180
2181         se = get_seg_entry(sbi, segno);
2182         new_vblocks = se->valid_blocks + del;
2183         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2184
2185         f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
2186                                 (new_vblocks > sbi->blocks_per_seg)));
2187
2188         se->valid_blocks = new_vblocks;
2189         se->mtime = get_mtime(sbi, false);
2190         if (se->mtime > SIT_I(sbi)->max_mtime)
2191                 SIT_I(sbi)->max_mtime = se->mtime;
2192
2193         /* Update valid block bitmap */
2194         if (del > 0) {
2195                 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2196 #ifdef CONFIG_F2FS_CHECK_FS
2197                 mir_exist = f2fs_test_and_set_bit(offset,
2198                                                 se->cur_valid_map_mir);
2199                 if (unlikely(exist != mir_exist)) {
2200                         f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
2201                                 "when setting bitmap, blk:%u, old bit:%d",
2202                                 blkaddr, exist);
2203                         f2fs_bug_on(sbi, 1);
2204                 }
2205 #endif
2206                 if (unlikely(exist)) {
2207                         f2fs_msg(sbi->sb, KERN_ERR,
2208                                 "Bitmap was wrongly set, blk:%u", blkaddr);
2209                         f2fs_bug_on(sbi, 1);
2210                         se->valid_blocks--;
2211                         del = 0;
2212                 }
2213
2214                 if (!f2fs_test_and_set_bit(offset, se->discard_map))
2215                         sbi->discard_blks--;
2216
2217                 /* don't overwrite by SSR to keep node chain */
2218                 if (IS_NODESEG(se->type) &&
2219                                 !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2220                         if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2221                                 se->ckpt_valid_blocks++;
2222                 }
2223         } else {
2224                 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2225 #ifdef CONFIG_F2FS_CHECK_FS
2226                 mir_exist = f2fs_test_and_clear_bit(offset,
2227                                                 se->cur_valid_map_mir);
2228                 if (unlikely(exist != mir_exist)) {
2229                         f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
2230                                 "when clearing bitmap, blk:%u, old bit:%d",
2231                                 blkaddr, exist);
2232                         f2fs_bug_on(sbi, 1);
2233                 }
2234 #endif
2235                 if (unlikely(!exist)) {
2236                         f2fs_msg(sbi->sb, KERN_ERR,
2237                                 "Bitmap was wrongly cleared, blk:%u", blkaddr);
2238                         f2fs_bug_on(sbi, 1);
2239                         se->valid_blocks++;
2240                         del = 0;
2241                 } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2242                         /*
2243                          * If checkpoints are off, we must not reuse data that
2244                          * was used in the previous checkpoint. If it was used
2245                          * before, we must track that to know how much space we
2246                          * really have.
2247                          */
2248                         if (f2fs_test_bit(offset, se->ckpt_valid_map))
2249                                 sbi->unusable_block_count++;
2250                 }
2251
2252                 if (f2fs_test_and_clear_bit(offset, se->discard_map))
2253                         sbi->discard_blks++;
2254         }
2255         if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2256                 se->ckpt_valid_blocks += del;
2257
2258         __mark_sit_entry_dirty(sbi, segno);
2259
2260         /* update total number of valid blocks to be written in ckpt area */
2261         SIT_I(sbi)->written_valid_blocks += del;
2262
2263         if (__is_large_section(sbi))
2264                 get_sec_entry(sbi, segno)->valid_blocks += del;
2265 }
2266
2267 void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2268 {
2269         unsigned int segno = GET_SEGNO(sbi, addr);
2270         struct sit_info *sit_i = SIT_I(sbi);
2271
2272         f2fs_bug_on(sbi, addr == NULL_ADDR);
2273         if (addr == NEW_ADDR)
2274                 return;
2275
2276         invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
2277
2278         /* add it into sit main buffer */
2279         down_write(&sit_i->sentry_lock);
2280
2281         update_sit_entry(sbi, addr, -1);
2282
2283         /* add it into dirty seglist */
2284         locate_dirty_segment(sbi, segno);
2285
2286         up_write(&sit_i->sentry_lock);
2287 }
2288
2289 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2290 {
2291         struct sit_info *sit_i = SIT_I(sbi);
2292         unsigned int segno, offset;
2293         struct seg_entry *se;
2294         bool is_cp = false;
2295
2296         if (!__is_valid_data_blkaddr(blkaddr))
2297                 return true;
2298
2299         down_read(&sit_i->sentry_lock);
2300
2301         segno = GET_SEGNO(sbi, blkaddr);
2302         se = get_seg_entry(sbi, segno);
2303         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2304
2305         if (f2fs_test_bit(offset, se->ckpt_valid_map))
2306                 is_cp = true;
2307
2308         up_read(&sit_i->sentry_lock);
2309
2310         return is_cp;
2311 }
2312
2313 /*
2314  * This function should be resided under the curseg_mutex lock
2315  */
2316 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
2317                                         struct f2fs_summary *sum)
2318 {
2319         struct curseg_info *curseg = CURSEG_I(sbi, type);
2320         void *addr = curseg->sum_blk;
2321         addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
2322         memcpy(addr, sum, sizeof(struct f2fs_summary));
2323 }
2324
2325 /*
2326  * Calculate the number of current summary pages for writing
2327  */
2328 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2329 {
2330         int valid_sum_count = 0;
2331         int i, sum_in_page;
2332
2333         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2334                 if (sbi->ckpt->alloc_type[i] == SSR)
2335                         valid_sum_count += sbi->blocks_per_seg;
2336                 else {
2337                         if (for_ra)
2338                                 valid_sum_count += le16_to_cpu(
2339                                         F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2340                         else
2341                                 valid_sum_count += curseg_blkoff(sbi, i);
2342                 }
2343         }
2344
2345         sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2346                         SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2347         if (valid_sum_count <= sum_in_page)
2348                 return 1;
2349         else if ((valid_sum_count - sum_in_page) <=
2350                 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2351                 return 2;
2352         return 3;
2353 }
2354
2355 /*
2356  * Caller should put this summary page
2357  */
2358 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2359 {
2360         return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
2361 }
2362
2363 void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2364                                         void *src, block_t blk_addr)
2365 {
2366         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2367
2368         memcpy(page_address(page), src, PAGE_SIZE);
2369         set_page_dirty(page);
2370         f2fs_put_page(page, 1);
2371 }
2372
2373 static void write_sum_page(struct f2fs_sb_info *sbi,
2374                         struct f2fs_summary_block *sum_blk, block_t blk_addr)
2375 {
2376         f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2377 }
2378
2379 static void write_current_sum_page(struct f2fs_sb_info *sbi,
2380                                                 int type, block_t blk_addr)
2381 {
2382         struct curseg_info *curseg = CURSEG_I(sbi, type);
2383         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2384         struct f2fs_summary_block *src = curseg->sum_blk;
2385         struct f2fs_summary_block *dst;
2386
2387         dst = (struct f2fs_summary_block *)page_address(page);
2388         memset(dst, 0, PAGE_SIZE);
2389
2390         mutex_lock(&curseg->curseg_mutex);
2391
2392         down_read(&curseg->journal_rwsem);
2393         memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2394         up_read(&curseg->journal_rwsem);
2395
2396         memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2397         memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2398
2399         mutex_unlock(&curseg->curseg_mutex);
2400
2401         set_page_dirty(page);
2402         f2fs_put_page(page, 1);
2403 }
2404
2405 static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
2406 {
2407         struct curseg_info *curseg = CURSEG_I(sbi, type);
2408         unsigned int segno = curseg->segno + 1;
2409         struct free_segmap_info *free_i = FREE_I(sbi);
2410
2411         if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
2412                 return !test_bit(segno, free_i->free_segmap);
2413         return 0;
2414 }
2415
2416 /*
2417  * Find a new segment from the free segments bitmap to right order
2418  * This function should be returned with success, otherwise BUG
2419  */
2420 static void get_new_segment(struct f2fs_sb_info *sbi,
2421                         unsigned int *newseg, bool new_sec, int dir)
2422 {
2423         struct free_segmap_info *free_i = FREE_I(sbi);
2424         unsigned int segno, secno, zoneno;
2425         unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2426         unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2427         unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2428         unsigned int left_start = hint;
2429         bool init = true;
2430         int go_left = 0;
2431         int i;
2432
2433         spin_lock(&free_i->segmap_lock);
2434
2435         if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
2436                 segno = find_next_zero_bit(free_i->free_segmap,
2437                         GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2438                 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2439                         goto got_it;
2440         }
2441 find_other_zone:
2442         secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2443         if (secno >= MAIN_SECS(sbi)) {
2444                 if (dir == ALLOC_RIGHT) {
2445                         secno = find_next_zero_bit(free_i->free_secmap,
2446                                                         MAIN_SECS(sbi), 0);
2447                         f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
2448                 } else {
2449                         go_left = 1;
2450                         left_start = hint - 1;
2451                 }
2452         }
2453         if (go_left == 0)
2454                 goto skip_left;
2455
2456         while (test_bit(left_start, free_i->free_secmap)) {
2457                 if (left_start > 0) {
2458                         left_start--;
2459                         continue;
2460                 }
2461                 left_start = find_next_zero_bit(free_i->free_secmap,
2462                                                         MAIN_SECS(sbi), 0);
2463                 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
2464                 break;
2465         }
2466         secno = left_start;
2467 skip_left:
2468         segno = GET_SEG_FROM_SEC(sbi, secno);
2469         zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2470
2471         /* give up on finding another zone */
2472         if (!init)
2473                 goto got_it;
2474         if (sbi->secs_per_zone == 1)
2475                 goto got_it;
2476         if (zoneno == old_zoneno)
2477                 goto got_it;
2478         if (dir == ALLOC_LEFT) {
2479                 if (!go_left && zoneno + 1 >= total_zones)
2480                         goto got_it;
2481                 if (go_left && zoneno == 0)
2482                         goto got_it;
2483         }
2484         for (i = 0; i < NR_CURSEG_TYPE; i++)
2485                 if (CURSEG_I(sbi, i)->zone == zoneno)
2486                         break;
2487
2488         if (i < NR_CURSEG_TYPE) {
2489                 /* zone is in user, try another */
2490                 if (go_left)
2491                         hint = zoneno * sbi->secs_per_zone - 1;
2492                 else if (zoneno + 1 >= total_zones)
2493                         hint = 0;
2494                 else
2495                         hint = (zoneno + 1) * sbi->secs_per_zone;
2496                 init = false;
2497                 goto find_other_zone;
2498         }
2499 got_it:
2500         /* set it as dirty segment in free segmap */
2501         f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2502         __set_inuse(sbi, segno);
2503         *newseg = segno;
2504         spin_unlock(&free_i->segmap_lock);
2505 }
2506
2507 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2508 {
2509         struct curseg_info *curseg = CURSEG_I(sbi, type);
2510         struct summary_footer *sum_footer;
2511
2512         curseg->segno = curseg->next_segno;
2513         curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2514         curseg->next_blkoff = 0;
2515         curseg->next_segno = NULL_SEGNO;
2516
2517         sum_footer = &(curseg->sum_blk->footer);
2518         memset(sum_footer, 0, sizeof(struct summary_footer));
2519         if (IS_DATASEG(type))
2520                 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2521         if (IS_NODESEG(type))
2522                 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2523         __set_sit_entry_type(sbi, type, curseg->segno, modified);
2524 }
2525
2526 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2527 {
2528         /* if segs_per_sec is large than 1, we need to keep original policy. */
2529         if (__is_large_section(sbi))
2530                 return CURSEG_I(sbi, type)->segno;
2531
2532         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2533                 return 0;
2534
2535         if (test_opt(sbi, NOHEAP) &&
2536                 (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
2537                 return 0;
2538
2539         if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2540                 return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2541
2542         /* find segments from 0 to reuse freed segments */
2543         if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2544                 return 0;
2545
2546         return CURSEG_I(sbi, type)->segno;
2547 }
2548
2549 /*
2550  * Allocate a current working segment.
2551  * This function always allocates a free segment in LFS manner.
2552  */
2553 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2554 {
2555         struct curseg_info *curseg = CURSEG_I(sbi, type);
2556         unsigned int segno = curseg->segno;
2557         int dir = ALLOC_LEFT;
2558
2559         write_sum_page(sbi, curseg->sum_blk,
2560                                 GET_SUM_BLOCK(sbi, segno));
2561         if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
2562                 dir = ALLOC_RIGHT;
2563
2564         if (test_opt(sbi, NOHEAP))
2565                 dir = ALLOC_RIGHT;
2566
2567         segno = __get_next_segno(sbi, type);
2568         get_new_segment(sbi, &segno, new_sec, dir);
2569         curseg->next_segno = segno;
2570         reset_curseg(sbi, type, 1);
2571         curseg->alloc_type = LFS;
2572 }
2573
2574 static void __next_free_blkoff(struct f2fs_sb_info *sbi,
2575                         struct curseg_info *seg, block_t start)
2576 {
2577         struct seg_entry *se = get_seg_entry(sbi, seg->segno);
2578         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2579         unsigned long *target_map = SIT_I(sbi)->tmp_map;
2580         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2581         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2582         int i, pos;
2583
2584         for (i = 0; i < entries; i++)
2585                 target_map[i] = ckpt_map[i] | cur_map[i];
2586
2587         pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
2588
2589         seg->next_blkoff = pos;
2590 }
2591
2592 /*
2593  * If a segment is written by LFS manner, next block offset is just obtained
2594  * by increasing the current block offset. However, if a segment is written by
2595  * SSR manner, next block offset obtained by calling __next_free_blkoff
2596  */
2597 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
2598                                 struct curseg_info *seg)
2599 {
2600         if (seg->alloc_type == SSR)
2601                 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
2602         else
2603                 seg->next_blkoff++;
2604 }
2605
2606 /*
2607  * This function always allocates a used segment(from dirty seglist) by SSR
2608  * manner, so it should recover the existing segment information of valid blocks
2609  */
2610 static void change_curseg(struct f2fs_sb_info *sbi, int type)
2611 {
2612         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2613         struct curseg_info *curseg = CURSEG_I(sbi, type);
2614         unsigned int new_segno = curseg->next_segno;
2615         struct f2fs_summary_block *sum_node;
2616         struct page *sum_page;
2617
2618         write_sum_page(sbi, curseg->sum_blk,
2619                                 GET_SUM_BLOCK(sbi, curseg->segno));
2620         __set_test_and_inuse(sbi, new_segno);
2621
2622         mutex_lock(&dirty_i->seglist_lock);
2623         __remove_dirty_segment(sbi, new_segno, PRE);
2624         __remove_dirty_segment(sbi, new_segno, DIRTY);
2625         mutex_unlock(&dirty_i->seglist_lock);
2626
2627         reset_curseg(sbi, type, 1);
2628         curseg->alloc_type = SSR;
2629         __next_free_blkoff(sbi, curseg, 0);
2630
2631         sum_page = f2fs_get_sum_page(sbi, new_segno);
2632         f2fs_bug_on(sbi, IS_ERR(sum_page));
2633         sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2634         memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2635         f2fs_put_page(sum_page, 1);
2636 }
2637
2638 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
2639 {
2640         struct curseg_info *curseg = CURSEG_I(sbi, type);
2641         const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
2642         unsigned segno = NULL_SEGNO;
2643         int i, cnt;
2644         bool reversed = false;
2645
2646         /* f2fs_need_SSR() already forces to do this */
2647         if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
2648                 curseg->next_segno = segno;
2649                 return 1;
2650         }
2651
2652         /* For node segments, let's do SSR more intensively */
2653         if (IS_NODESEG(type)) {
2654                 if (type >= CURSEG_WARM_NODE) {
2655                         reversed = true;
2656                         i = CURSEG_COLD_NODE;
2657                 } else {
2658                         i = CURSEG_HOT_NODE;
2659                 }
2660                 cnt = NR_CURSEG_NODE_TYPE;
2661         } else {
2662                 if (type >= CURSEG_WARM_DATA) {
2663                         reversed = true;
2664                         i = CURSEG_COLD_DATA;
2665                 } else {
2666                         i = CURSEG_HOT_DATA;
2667                 }
2668                 cnt = NR_CURSEG_DATA_TYPE;
2669         }
2670
2671         for (; cnt-- > 0; reversed ? i-- : i++) {
2672                 if (i == type)
2673                         continue;
2674                 if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
2675                         curseg->next_segno = segno;
2676                         return 1;
2677                 }
2678         }
2679
2680         /* find valid_blocks=0 in dirty list */
2681         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2682                 segno = get_free_segment(sbi);
2683                 if (segno != NULL_SEGNO) {
2684                         curseg->next_segno = segno;
2685                         return 1;
2686                 }
2687         }
2688         return 0;
2689 }
2690
2691 /*
2692  * flush out current segment and replace it with new segment
2693  * This function should be returned with success, otherwise BUG
2694  */
2695 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
2696                                                 int type, bool force)
2697 {
2698         struct curseg_info *curseg = CURSEG_I(sbi, type);
2699
2700         if (force)
2701                 new_curseg(sbi, type, true);
2702         else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
2703                                         type == CURSEG_WARM_NODE)
2704                 new_curseg(sbi, type, false);
2705         else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
2706                         likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2707                 new_curseg(sbi, type, false);
2708         else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
2709                 change_curseg(sbi, type);
2710         else
2711                 new_curseg(sbi, type, false);
2712
2713         stat_inc_seg_type(sbi, curseg);
2714 }
2715
2716 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
2717 {
2718         struct curseg_info *curseg;
2719         unsigned int old_segno;
2720         int i;
2721
2722         down_write(&SIT_I(sbi)->sentry_lock);
2723
2724         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2725                 curseg = CURSEG_I(sbi, i);
2726                 old_segno = curseg->segno;
2727                 SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
2728                 locate_dirty_segment(sbi, old_segno);
2729         }
2730
2731         up_write(&SIT_I(sbi)->sentry_lock);
2732 }
2733
2734 static const struct segment_allocation default_salloc_ops = {
2735         .allocate_segment = allocate_segment_by_default,
2736 };
2737
2738 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
2739                                                 struct cp_control *cpc)
2740 {
2741         __u64 trim_start = cpc->trim_start;
2742         bool has_candidate = false;
2743
2744         down_write(&SIT_I(sbi)->sentry_lock);
2745         for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
2746                 if (add_discard_addrs(sbi, cpc, true)) {
2747                         has_candidate = true;
2748                         break;
2749                 }
2750         }
2751         up_write(&SIT_I(sbi)->sentry_lock);
2752
2753         cpc->trim_start = trim_start;
2754         return has_candidate;
2755 }
2756
2757 static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
2758                                         struct discard_policy *dpolicy,
2759                                         unsigned int start, unsigned int end)
2760 {
2761         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2762         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
2763         struct rb_node **insert_p = NULL, *insert_parent = NULL;
2764         struct discard_cmd *dc;
2765         struct blk_plug plug;
2766         int issued;
2767         unsigned int trimmed = 0;
2768
2769 next:
2770         issued = 0;
2771
2772         mutex_lock(&dcc->cmd_lock);
2773         if (unlikely(dcc->rbtree_check))
2774                 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
2775                                                                 &dcc->root));
2776
2777         dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
2778                                         NULL, start,
2779                                         (struct rb_entry **)&prev_dc,
2780                                         (struct rb_entry **)&next_dc,
2781                                         &insert_p, &insert_parent, true);
2782         if (!dc)
2783                 dc = next_dc;
2784
2785         blk_start_plug(&plug);
2786
2787         while (dc && dc->lstart <= end) {
2788                 struct rb_node *node;
2789                 int err = 0;
2790
2791                 if (dc->len < dpolicy->granularity)
2792                         goto skip;
2793
2794                 if (dc->state != D_PREP) {
2795                         list_move_tail(&dc->list, &dcc->fstrim_list);
2796                         goto skip;
2797                 }
2798
2799                 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
2800
2801                 if (issued >= dpolicy->max_requests) {
2802                         start = dc->lstart + dc->len;
2803
2804                         if (err)
2805                                 __remove_discard_cmd(sbi, dc);
2806
2807                         blk_finish_plug(&plug);
2808                         mutex_unlock(&dcc->cmd_lock);
2809                         trimmed += __wait_all_discard_cmd(sbi, NULL);
2810                         congestion_wait(BLK_RW_ASYNC, HZ/50);
2811                         goto next;
2812                 }
2813 skip:
2814                 node = rb_next(&dc->rb_node);
2815                 if (err)
2816                         __remove_discard_cmd(sbi, dc);
2817                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
2818
2819                 if (fatal_signal_pending(current))
2820                         break;
2821         }
2822
2823         blk_finish_plug(&plug);
2824         mutex_unlock(&dcc->cmd_lock);
2825
2826         return trimmed;
2827 }
2828
2829 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
2830 {
2831         __u64 start = F2FS_BYTES_TO_BLK(range->start);
2832         __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
2833         unsigned int start_segno, end_segno;
2834         block_t start_block, end_block;
2835         struct cp_control cpc;
2836         struct discard_policy dpolicy;
2837         unsigned long long trimmed = 0;
2838         int err = 0;
2839         bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
2840
2841         if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
2842                 return -EINVAL;
2843
2844         if (end < MAIN_BLKADDR(sbi))
2845                 goto out;
2846
2847         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
2848                 f2fs_msg(sbi->sb, KERN_WARNING,
2849                         "Found FS corruption, run fsck to fix.");
2850                 return -EIO;
2851         }
2852
2853         /* start/end segment number in main_area */
2854         start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
2855         end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
2856                                                 GET_SEGNO(sbi, end);
2857         if (need_align) {
2858                 start_segno = rounddown(start_segno, sbi->segs_per_sec);
2859                 end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
2860         }
2861
2862         cpc.reason = CP_DISCARD;
2863         cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
2864         cpc.trim_start = start_segno;
2865         cpc.trim_end = end_segno;
2866
2867         if (sbi->discard_blks == 0)
2868                 goto out;
2869
2870         mutex_lock(&sbi->gc_mutex);
2871         err = f2fs_write_checkpoint(sbi, &cpc);
2872         mutex_unlock(&sbi->gc_mutex);
2873         if (err)
2874                 goto out;
2875
2876         /*
2877          * We filed discard candidates, but actually we don't need to wait for
2878          * all of them, since they'll be issued in idle time along with runtime
2879          * discard option. User configuration looks like using runtime discard
2880          * or periodic fstrim instead of it.
2881          */
2882         if (f2fs_realtime_discard_enable(sbi))
2883                 goto out;
2884
2885         start_block = START_BLOCK(sbi, start_segno);
2886         end_block = START_BLOCK(sbi, end_segno + 1);
2887
2888         __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
2889         trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
2890                                         start_block, end_block);
2891
2892         trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
2893                                         start_block, end_block);
2894 out:
2895         if (!err)
2896                 range->len = F2FS_BLK_TO_BYTES(trimmed);
2897         return err;
2898 }
2899
2900 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
2901 {
2902         struct curseg_info *curseg = CURSEG_I(sbi, type);
2903         if (curseg->next_blkoff < sbi->blocks_per_seg)
2904                 return true;
2905         return false;
2906 }
2907
2908 int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
2909 {
2910         switch (hint) {
2911         case WRITE_LIFE_SHORT:
2912                 return CURSEG_HOT_DATA;
2913         case WRITE_LIFE_EXTREME:
2914                 return CURSEG_COLD_DATA;
2915         default:
2916                 return CURSEG_WARM_DATA;
2917         }
2918 }
2919
2920 /* This returns write hints for each segment type. This hints will be
2921  * passed down to block layer. There are mapping tables which depend on
2922  * the mount option 'whint_mode'.
2923  *
2924  * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
2925  *
2926  * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
2927  *
2928  * User                  F2FS                     Block
2929  * ----                  ----                     -----
2930  *                       META                     WRITE_LIFE_NOT_SET
2931  *                       HOT_NODE                 "
2932  *                       WARM_NODE                "
2933  *                       COLD_NODE                "
2934  * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
2935  * extension list        "                        "
2936  *
2937  * -- buffered io
2938  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2939  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2940  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
2941  * WRITE_LIFE_NONE       "                        "
2942  * WRITE_LIFE_MEDIUM     "                        "
2943  * WRITE_LIFE_LONG       "                        "
2944  *
2945  * -- direct io
2946  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2947  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2948  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
2949  * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
2950  * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
2951  * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
2952  *
2953  * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
2954  *
2955  * User                  F2FS                     Block
2956  * ----                  ----                     -----
2957  *                       META                     WRITE_LIFE_MEDIUM;
2958  *                       HOT_NODE                 WRITE_LIFE_NOT_SET
2959  *                       WARM_NODE                "
2960  *                       COLD_NODE                WRITE_LIFE_NONE
2961  * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
2962  * extension list        "                        "
2963  *
2964  * -- buffered io
2965  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2966  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2967  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_LONG
2968  * WRITE_LIFE_NONE       "                        "
2969  * WRITE_LIFE_MEDIUM     "                        "
2970  * WRITE_LIFE_LONG       "                        "
2971  *
2972  * -- direct io
2973  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2974  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2975  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
2976  * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
2977  * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
2978  * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
2979  */
2980
2981 enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
2982                                 enum page_type type, enum temp_type temp)
2983 {
2984         if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
2985                 if (type == DATA) {
2986                         if (temp == WARM)
2987                                 return WRITE_LIFE_NOT_SET;
2988                         else if (temp == HOT)
2989                                 return WRITE_LIFE_SHORT;
2990                         else if (temp == COLD)
2991                                 return WRITE_LIFE_EXTREME;
2992                 } else {
2993                         return WRITE_LIFE_NOT_SET;
2994                 }
2995         } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
2996                 if (type == DATA) {
2997                         if (temp == WARM)
2998                                 return WRITE_LIFE_LONG;
2999                         else if (temp == HOT)
3000                                 return WRITE_LIFE_SHORT;
3001                         else if (temp == COLD)
3002                                 return WRITE_LIFE_EXTREME;
3003                 } else if (type == NODE) {
3004                         if (temp == WARM || temp == HOT)
3005                                 return WRITE_LIFE_NOT_SET;
3006                         else if (temp == COLD)
3007                                 return WRITE_LIFE_NONE;
3008                 } else if (type == META) {
3009                         return WRITE_LIFE_MEDIUM;
3010                 }
3011         }
3012         return WRITE_LIFE_NOT_SET;
3013 }
3014
3015 static int __get_segment_type_2(struct f2fs_io_info *fio)
3016 {
3017         if (fio->type == DATA)
3018                 return CURSEG_HOT_DATA;
3019         else
3020                 return CURSEG_HOT_NODE;
3021 }
3022
3023 static int __get_segment_type_4(struct f2fs_io_info *fio)
3024 {
3025         if (fio->type == DATA) {
3026                 struct inode *inode = fio->page->mapping->host;
3027
3028                 if (S_ISDIR(inode->i_mode))
3029                         return CURSEG_HOT_DATA;
3030                 else
3031                         return CURSEG_COLD_DATA;
3032         } else {
3033                 if (IS_DNODE(fio->page) && is_cold_node(fio->page))
3034                         return CURSEG_WARM_NODE;
3035                 else
3036                         return CURSEG_COLD_NODE;
3037         }
3038 }
3039
3040 static int __get_segment_type_6(struct f2fs_io_info *fio)
3041 {
3042         if (fio->type == DATA) {
3043                 struct inode *inode = fio->page->mapping->host;
3044
3045                 if (is_cold_data(fio->page) || file_is_cold(inode))
3046                         return CURSEG_COLD_DATA;
3047                 if (file_is_hot(inode) ||
3048                                 is_inode_flag_set(inode, FI_HOT_DATA) ||
3049                                 f2fs_is_atomic_file(inode) ||
3050                                 f2fs_is_volatile_file(inode))
3051                         return CURSEG_HOT_DATA;
3052                 /* f2fs_rw_hint_to_seg_type(inode->i_write_hint); */
3053                 return CURSEG_WARM_DATA;
3054         } else {
3055                 if (IS_DNODE(fio->page))
3056                         return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
3057                                                 CURSEG_HOT_NODE;
3058                 return CURSEG_COLD_NODE;
3059         }
3060 }
3061
3062 static int __get_segment_type(struct f2fs_io_info *fio)
3063 {
3064         int type = 0;
3065
3066         switch (F2FS_OPTION(fio->sbi).active_logs) {
3067         case 2:
3068                 type = __get_segment_type_2(fio);
3069                 break;
3070         case 4:
3071                 type = __get_segment_type_4(fio);
3072                 break;
3073         case 6:
3074                 type = __get_segment_type_6(fio);
3075                 break;
3076         default:
3077                 f2fs_bug_on(fio->sbi, true);
3078         }
3079
3080         if (IS_HOT(type))
3081                 fio->temp = HOT;
3082         else if (IS_WARM(type))
3083                 fio->temp = WARM;
3084         else
3085                 fio->temp = COLD;
3086         return type;
3087 }
3088
3089 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
3090                 block_t old_blkaddr, block_t *new_blkaddr,
3091                 struct f2fs_summary *sum, int type,
3092                 struct f2fs_io_info *fio, bool add_list)
3093 {
3094         struct sit_info *sit_i = SIT_I(sbi);
3095         struct curseg_info *curseg = CURSEG_I(sbi, type);
3096
3097         down_read(&SM_I(sbi)->curseg_lock);
3098
3099         mutex_lock(&curseg->curseg_mutex);
3100         down_write(&sit_i->sentry_lock);
3101
3102         *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
3103
3104         f2fs_wait_discard_bio(sbi, *new_blkaddr);
3105
3106         /*
3107          * __add_sum_entry should be resided under the curseg_mutex
3108          * because, this function updates a summary entry in the
3109          * current summary block.
3110          */
3111         __add_sum_entry(sbi, type, sum);
3112
3113         __refresh_next_blkoff(sbi, curseg);
3114
3115         stat_inc_block_count(sbi, curseg);
3116
3117         /*
3118          * SIT information should be updated before segment allocation,
3119          * since SSR needs latest valid block information.
3120          */
3121         update_sit_entry(sbi, *new_blkaddr, 1);
3122         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
3123                 update_sit_entry(sbi, old_blkaddr, -1);
3124
3125         if (!__has_curseg_space(sbi, type))
3126                 sit_i->s_ops->allocate_segment(sbi, type, false);
3127
3128         /*
3129          * segment dirty status should be updated after segment allocation,
3130          * so we just need to update status only one time after previous
3131          * segment being closed.
3132          */
3133         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3134         locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3135
3136         up_write(&sit_i->sentry_lock);
3137
3138         if (page && IS_NODESEG(type)) {
3139                 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
3140
3141                 f2fs_inode_chksum_set(sbi, page);
3142         }
3143
3144         if (add_list) {
3145                 struct f2fs_bio_info *io;
3146
3147                 INIT_LIST_HEAD(&fio->list);
3148                 fio->in_list = true;
3149                 fio->retry = false;
3150                 io = sbi->write_io[fio->type] + fio->temp;
3151                 spin_lock(&io->io_lock);
3152                 list_add_tail(&fio->list, &io->io_list);
3153                 spin_unlock(&io->io_lock);
3154         }
3155
3156         mutex_unlock(&curseg->curseg_mutex);
3157
3158         up_read(&SM_I(sbi)->curseg_lock);
3159 }
3160
3161 static void update_device_state(struct f2fs_io_info *fio)
3162 {
3163         struct f2fs_sb_info *sbi = fio->sbi;
3164         unsigned int devidx;
3165
3166         if (!f2fs_is_multi_device(sbi))
3167                 return;
3168
3169         devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
3170
3171         /* update device state for fsync */
3172         f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
3173
3174         /* update device state for checkpoint */
3175         if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3176                 spin_lock(&sbi->dev_lock);
3177                 f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3178                 spin_unlock(&sbi->dev_lock);
3179         }
3180 }
3181
3182 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3183 {
3184         int type = __get_segment_type(fio);
3185         bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA);
3186
3187         if (keep_order)
3188                 down_read(&fio->sbi->io_order_lock);
3189 reallocate:
3190         f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3191                         &fio->new_blkaddr, sum, type, fio, true);
3192         if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
3193                 invalidate_mapping_pages(META_MAPPING(fio->sbi),
3194                                         fio->old_blkaddr, fio->old_blkaddr);
3195
3196         /* writeout dirty page into bdev */
3197         f2fs_submit_page_write(fio);
3198         if (fio->retry) {
3199                 fio->old_blkaddr = fio->new_blkaddr;
3200                 goto reallocate;
3201         }
3202
3203         update_device_state(fio);
3204
3205         if (keep_order)
3206                 up_read(&fio->sbi->io_order_lock);
3207 }
3208
3209 void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
3210                                         enum iostat_type io_type)
3211 {
3212         struct f2fs_io_info fio = {
3213                 .sbi = sbi,
3214                 .type = META,
3215                 .temp = HOT,
3216                 .op = REQ_OP_WRITE,
3217                 .op_flags = REQ_SYNC | REQ_NOIDLE | REQ_META | REQ_PRIO,
3218                 .old_blkaddr = page->index,
3219                 .new_blkaddr = page->index,
3220                 .page = page,
3221                 .encrypted_page = NULL,
3222                 .in_list = false,
3223         };
3224
3225         if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
3226                 fio.op_flags &= ~REQ_META;
3227
3228         set_page_writeback(page);
3229         ClearPageError(page);
3230         f2fs_submit_page_write(&fio);
3231
3232         stat_inc_meta_count(sbi, page->index);
3233         f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
3234 }
3235
3236 void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3237 {
3238         struct f2fs_summary sum;
3239
3240         set_summary(&sum, nid, 0, 0);
3241         do_write_page(&sum, fio);
3242
3243         f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3244 }
3245
3246 void f2fs_outplace_write_data(struct dnode_of_data *dn,
3247                                         struct f2fs_io_info *fio)
3248 {
3249         struct f2fs_sb_info *sbi = fio->sbi;
3250         struct f2fs_summary sum;
3251
3252         f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3253         set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3254         do_write_page(&sum, fio);
3255         f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3256
3257         f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
3258 }
3259
3260 int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3261 {
3262         int err;
3263         struct f2fs_sb_info *sbi = fio->sbi;
3264         unsigned int segno;
3265
3266         fio->new_blkaddr = fio->old_blkaddr;
3267         /* i/o temperature is needed for passing down write hints */
3268         __get_segment_type(fio);
3269
3270         segno = GET_SEGNO(sbi, fio->new_blkaddr);
3271
3272         if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
3273                 set_sbi_flag(sbi, SBI_NEED_FSCK);
3274                 return -EFAULT;
3275         }
3276
3277         stat_inc_inplace_blocks(fio->sbi);
3278
3279         err = f2fs_submit_page_bio(fio);
3280         if (!err) {
3281                 update_device_state(fio);
3282                 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3283         }
3284
3285         return err;
3286 }
3287
3288 static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3289                                                 unsigned int segno)
3290 {
3291         int i;
3292
3293         for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3294                 if (CURSEG_I(sbi, i)->segno == segno)
3295                         break;
3296         }
3297         return i;
3298 }
3299
3300 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3301                                 block_t old_blkaddr, block_t new_blkaddr,
3302                                 bool recover_curseg, bool recover_newaddr)
3303 {
3304         struct sit_info *sit_i = SIT_I(sbi);
3305         struct curseg_info *curseg;
3306         unsigned int segno, old_cursegno;
3307         struct seg_entry *se;
3308         int type;
3309         unsigned short old_blkoff;
3310
3311         segno = GET_SEGNO(sbi, new_blkaddr);
3312         se = get_seg_entry(sbi, segno);
3313         type = se->type;
3314
3315         down_write(&SM_I(sbi)->curseg_lock);
3316
3317         if (!recover_curseg) {
3318                 /* for recovery flow */
3319                 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
3320                         if (old_blkaddr == NULL_ADDR)
3321                                 type = CURSEG_COLD_DATA;
3322                         else
3323                                 type = CURSEG_WARM_DATA;
3324                 }
3325         } else {
3326                 if (IS_CURSEG(sbi, segno)) {
3327                         /* se->type is volatile as SSR allocation */
3328                         type = __f2fs_get_curseg(sbi, segno);
3329                         f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
3330                 } else {
3331                         type = CURSEG_WARM_DATA;
3332                 }
3333         }
3334
3335         f2fs_bug_on(sbi, !IS_DATASEG(type));
3336         curseg = CURSEG_I(sbi, type);
3337
3338         mutex_lock(&curseg->curseg_mutex);
3339         down_write(&sit_i->sentry_lock);
3340
3341         old_cursegno = curseg->segno;
3342         old_blkoff = curseg->next_blkoff;
3343
3344         /* change the current segment */
3345         if (segno != curseg->segno) {
3346                 curseg->next_segno = segno;
3347                 change_curseg(sbi, type);
3348         }
3349
3350         curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3351         __add_sum_entry(sbi, type, sum);
3352
3353         if (!recover_curseg || recover_newaddr)
3354                 update_sit_entry(sbi, new_blkaddr, 1);
3355         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
3356                 invalidate_mapping_pages(META_MAPPING(sbi),
3357                                         old_blkaddr, old_blkaddr);
3358                 update_sit_entry(sbi, old_blkaddr, -1);
3359         }
3360
3361         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3362         locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
3363
3364         locate_dirty_segment(sbi, old_cursegno);
3365
3366         if (recover_curseg) {
3367                 if (old_cursegno != curseg->segno) {
3368                         curseg->next_segno = old_cursegno;
3369                         change_curseg(sbi, type);
3370                 }
3371                 curseg->next_blkoff = old_blkoff;
3372         }
3373
3374         up_write(&sit_i->sentry_lock);
3375         mutex_unlock(&curseg->curseg_mutex);
3376         up_write(&SM_I(sbi)->curseg_lock);
3377 }
3378
3379 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
3380                                 block_t old_addr, block_t new_addr,
3381                                 unsigned char version, bool recover_curseg,
3382                                 bool recover_newaddr)
3383 {
3384         struct f2fs_summary sum;
3385
3386         set_summary(&sum, dn->nid, dn->ofs_in_node, version);
3387
3388         f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3389                                         recover_curseg, recover_newaddr);
3390
3391         f2fs_update_data_blkaddr(dn, new_addr);
3392 }
3393
3394 void f2fs_wait_on_page_writeback(struct page *page,
3395                                 enum page_type type, bool ordered, bool locked)
3396 {
3397         if (PageWriteback(page)) {
3398                 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
3399
3400                 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
3401                 if (ordered) {
3402                         wait_on_page_writeback(page);
3403                         f2fs_bug_on(sbi, locked && PageWriteback(page));
3404                 } else {
3405                         wait_for_stable_page(page);
3406                 }
3407         }
3408 }
3409
3410 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3411 {
3412         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3413         struct page *cpage;
3414
3415         if (!f2fs_post_read_required(inode))
3416                 return;
3417
3418         if (!__is_valid_data_blkaddr(blkaddr))
3419                 return;
3420
3421         cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
3422         if (cpage) {
3423                 f2fs_wait_on_page_writeback(cpage, DATA, true, true);
3424                 f2fs_put_page(cpage, 1);
3425         }
3426 }
3427
3428 void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
3429                                                                 block_t len)
3430 {
3431         block_t i;
3432
3433         for (i = 0; i < len; i++)
3434                 f2fs_wait_on_block_writeback(inode, blkaddr + i);
3435 }
3436
3437 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
3438 {
3439         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3440         struct curseg_info *seg_i;
3441         unsigned char *kaddr;
3442         struct page *page;
3443         block_t start;
3444         int i, j, offset;
3445
3446         start = start_sum_block(sbi);
3447
3448         page = f2fs_get_meta_page(sbi, start++);
3449         if (IS_ERR(page))
3450                 return PTR_ERR(page);
3451         kaddr = (unsigned char *)page_address(page);
3452
3453         /* Step 1: restore nat cache */
3454         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3455         memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
3456
3457         /* Step 2: restore sit cache */
3458         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3459         memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
3460         offset = 2 * SUM_JOURNAL_SIZE;
3461
3462         /* Step 3: restore summary entries */
3463         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3464                 unsigned short blk_off;
3465                 unsigned int segno;
3466
3467                 seg_i = CURSEG_I(sbi, i);
3468                 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
3469                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
3470                 seg_i->next_segno = segno;
3471                 reset_curseg(sbi, i, 0);
3472                 seg_i->alloc_type = ckpt->alloc_type[i];
3473                 seg_i->next_blkoff = blk_off;
3474
3475                 if (seg_i->alloc_type == SSR)
3476                         blk_off = sbi->blocks_per_seg;
3477
3478                 for (j = 0; j < blk_off; j++) {
3479                         struct f2fs_summary *s;
3480                         s = (struct f2fs_summary *)(kaddr + offset);
3481                         seg_i->sum_blk->entries[j] = *s;
3482                         offset += SUMMARY_SIZE;
3483                         if (offset + SUMMARY_SIZE <= PAGE_SIZE -
3484                                                 SUM_FOOTER_SIZE)
3485                                 continue;
3486
3487                         f2fs_put_page(page, 1);
3488                         page = NULL;
3489
3490                         page = f2fs_get_meta_page(sbi, start++);
3491                         if (IS_ERR(page))
3492                                 return PTR_ERR(page);
3493                         kaddr = (unsigned char *)page_address(page);
3494                         offset = 0;
3495                 }
3496         }
3497         f2fs_put_page(page, 1);
3498         return 0;
3499 }
3500
3501 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
3502 {
3503         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3504         struct f2fs_summary_block *sum;
3505         struct curseg_info *curseg;
3506         struct page *new;
3507         unsigned short blk_off;
3508         unsigned int segno = 0;
3509         block_t blk_addr = 0;
3510         int err = 0;
3511
3512         /* get segment number and block addr */
3513         if (IS_DATASEG(type)) {
3514                 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
3515                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
3516                                                         CURSEG_HOT_DATA]);
3517                 if (__exist_node_summaries(sbi))
3518                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
3519                 else
3520                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
3521         } else {
3522                 segno = le32_to_cpu(ckpt->cur_node_segno[type -
3523                                                         CURSEG_HOT_NODE]);
3524                 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
3525                                                         CURSEG_HOT_NODE]);
3526                 if (__exist_node_summaries(sbi))
3527                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
3528                                                         type - CURSEG_HOT_NODE);
3529                 else
3530                         blk_addr = GET_SUM_BLOCK(sbi, segno);
3531         }
3532
3533         new = f2fs_get_meta_page(sbi, blk_addr);
3534         if (IS_ERR(new))
3535                 return PTR_ERR(new);
3536         sum = (struct f2fs_summary_block *)page_address(new);
3537
3538         if (IS_NODESEG(type)) {
3539                 if (__exist_node_summaries(sbi)) {
3540                         struct f2fs_summary *ns = &sum->entries[0];
3541                         int i;
3542                         for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
3543                                 ns->version = 0;
3544                                 ns->ofs_in_node = 0;
3545                         }
3546                 } else {
3547                         err = f2fs_restore_node_summary(sbi, segno, sum);
3548                         if (err)
3549                                 goto out;
3550                 }
3551         }
3552
3553         /* set uncompleted segment to curseg */
3554         curseg = CURSEG_I(sbi, type);
3555         mutex_lock(&curseg->curseg_mutex);
3556
3557         /* update journal info */
3558         down_write(&curseg->journal_rwsem);
3559         memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
3560         up_write(&curseg->journal_rwsem);
3561
3562         memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
3563         memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
3564         curseg->next_segno = segno;
3565         reset_curseg(sbi, type, 0);
3566         curseg->alloc_type = ckpt->alloc_type[type];
3567         curseg->next_blkoff = blk_off;
3568         mutex_unlock(&curseg->curseg_mutex);
3569 out:
3570         f2fs_put_page(new, 1);
3571         return err;
3572 }
3573
3574 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
3575 {
3576         struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
3577         struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
3578         int type = CURSEG_HOT_DATA;
3579         int err;
3580
3581         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
3582                 int npages = f2fs_npages_for_summary_flush(sbi, true);
3583
3584                 if (npages >= 2)
3585                         f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
3586                                                         META_CP, true);
3587
3588                 /* restore for compacted data summary */
3589                 err = read_compacted_summaries(sbi);
3590                 if (err)
3591                         return err;
3592                 type = CURSEG_HOT_NODE;
3593         }
3594
3595         if (__exist_node_summaries(sbi))
3596                 f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
3597                                         NR_CURSEG_TYPE - type, META_CP, true);
3598
3599         for (; type <= CURSEG_COLD_NODE; type++) {
3600                 err = read_normal_summaries(sbi, type);
3601                 if (err)
3602                         return err;
3603         }
3604
3605         /* sanity check for summary blocks */
3606         if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
3607                         sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
3608                 return -EINVAL;
3609
3610         return 0;
3611 }
3612
3613 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
3614 {
3615         struct page *page;
3616         unsigned char *kaddr;
3617         struct f2fs_summary *summary;
3618         struct curseg_info *seg_i;
3619         int written_size = 0;
3620         int i, j;
3621
3622         page = f2fs_grab_meta_page(sbi, blkaddr++);
3623         kaddr = (unsigned char *)page_address(page);
3624         memset(kaddr, 0, PAGE_SIZE);
3625
3626         /* Step 1: write nat cache */
3627         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3628         memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
3629         written_size += SUM_JOURNAL_SIZE;
3630
3631         /* Step 2: write sit cache */
3632         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3633         memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
3634         written_size += SUM_JOURNAL_SIZE;
3635
3636         /* Step 3: write summary entries */
3637         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3638                 unsigned short blkoff;
3639                 seg_i = CURSEG_I(sbi, i);
3640                 if (sbi->ckpt->alloc_type[i] == SSR)
3641                         blkoff = sbi->blocks_per_seg;
3642                 else
3643                         blkoff = curseg_blkoff(sbi, i);
3644
3645                 for (j = 0; j < blkoff; j++) {
3646                         if (!page) {
3647                                 page = f2fs_grab_meta_page(sbi, blkaddr++);
3648                                 kaddr = (unsigned char *)page_address(page);
3649                                 memset(kaddr, 0, PAGE_SIZE);
3650                                 written_size = 0;
3651                         }
3652                         summary = (struct f2fs_summary *)(kaddr + written_size);
3653                         *summary = seg_i->sum_blk->entries[j];
3654                         written_size += SUMMARY_SIZE;
3655
3656                         if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
3657                                                         SUM_FOOTER_SIZE)
3658                                 continue;
3659
3660                         set_page_dirty(page);
3661                         f2fs_put_page(page, 1);
3662                         page = NULL;
3663                 }
3664         }
3665         if (page) {
3666                 set_page_dirty(page);
3667                 f2fs_put_page(page, 1);
3668         }
3669 }
3670
3671 static void write_normal_summaries(struct f2fs_sb_info *sbi,
3672                                         block_t blkaddr, int type)
3673 {
3674         int i, end;
3675         if (IS_DATASEG(type))
3676                 end = type + NR_CURSEG_DATA_TYPE;
3677         else
3678                 end = type + NR_CURSEG_NODE_TYPE;
3679
3680         for (i = type; i < end; i++)
3681                 write_current_sum_page(sbi, i, blkaddr + (i - type));
3682 }
3683
3684 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3685 {
3686         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
3687                 write_compacted_summaries(sbi, start_blk);
3688         else
3689                 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
3690 }
3691
3692 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3693 {
3694         write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
3695 }
3696
3697 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
3698                                         unsigned int val, int alloc)
3699 {
3700         int i;
3701
3702         if (type == NAT_JOURNAL) {
3703                 for (i = 0; i < nats_in_cursum(journal); i++) {
3704                         if (le32_to_cpu(nid_in_journal(journal, i)) == val)
3705                                 return i;
3706                 }
3707                 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
3708                         return update_nats_in_cursum(journal, 1);
3709         } else if (type == SIT_JOURNAL) {
3710                 for (i = 0; i < sits_in_cursum(journal); i++)
3711                         if (le32_to_cpu(segno_in_journal(journal, i)) == val)
3712                                 return i;
3713                 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
3714                         return update_sits_in_cursum(journal, 1);
3715         }
3716         return -1;
3717 }
3718
3719 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
3720                                         unsigned int segno)
3721 {
3722         return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
3723 }
3724
3725 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
3726                                         unsigned int start)
3727 {
3728         struct sit_info *sit_i = SIT_I(sbi);
3729         struct page *page;
3730         pgoff_t src_off, dst_off;
3731
3732         src_off = current_sit_addr(sbi, start);
3733         dst_off = next_sit_addr(sbi, src_off);
3734
3735         page = f2fs_grab_meta_page(sbi, dst_off);
3736         seg_info_to_sit_page(sbi, page, start);
3737
3738         set_page_dirty(page);
3739         set_to_next_sit(sit_i, start);
3740
3741         return page;
3742 }
3743
3744 static struct sit_entry_set *grab_sit_entry_set(void)
3745 {
3746         struct sit_entry_set *ses =
3747                         f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
3748
3749         ses->entry_cnt = 0;
3750         INIT_LIST_HEAD(&ses->set_list);
3751         return ses;
3752 }
3753
3754 static void release_sit_entry_set(struct sit_entry_set *ses)
3755 {
3756         list_del(&ses->set_list);
3757         kmem_cache_free(sit_entry_set_slab, ses);
3758 }
3759
3760 static void adjust_sit_entry_set(struct sit_entry_set *ses,
3761                                                 struct list_head *head)
3762 {
3763         struct sit_entry_set *next = ses;
3764
3765         if (list_is_last(&ses->set_list, head))
3766                 return;
3767
3768         list_for_each_entry_continue(next, head, set_list)
3769                 if (ses->entry_cnt <= next->entry_cnt)
3770                         break;
3771
3772         list_move_tail(&ses->set_list, &next->set_list);
3773 }
3774
3775 static void add_sit_entry(unsigned int segno, struct list_head *head)
3776 {
3777         struct sit_entry_set *ses;
3778         unsigned int start_segno = START_SEGNO(segno);
3779
3780         list_for_each_entry(ses, head, set_list) {
3781                 if (ses->start_segno == start_segno) {
3782                         ses->entry_cnt++;
3783                         adjust_sit_entry_set(ses, head);
3784                         return;
3785                 }
3786         }
3787
3788         ses = grab_sit_entry_set();
3789
3790         ses->start_segno = start_segno;
3791         ses->entry_cnt++;
3792         list_add(&ses->set_list, head);
3793 }
3794
3795 static void add_sits_in_set(struct f2fs_sb_info *sbi)
3796 {
3797         struct f2fs_sm_info *sm_info = SM_I(sbi);
3798         struct list_head *set_list = &sm_info->sit_entry_set;
3799         unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
3800         unsigned int segno;
3801
3802         for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
3803                 add_sit_entry(segno, set_list);
3804 }
3805
3806 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
3807 {
3808         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3809         struct f2fs_journal *journal = curseg->journal;
3810         int i;
3811
3812         down_write(&curseg->journal_rwsem);
3813         for (i = 0; i < sits_in_cursum(journal); i++) {
3814                 unsigned int segno;
3815                 bool dirtied;
3816
3817                 segno = le32_to_cpu(segno_in_journal(journal, i));
3818                 dirtied = __mark_sit_entry_dirty(sbi, segno);
3819
3820                 if (!dirtied)
3821                         add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
3822         }
3823         update_sits_in_cursum(journal, -i);
3824         up_write(&curseg->journal_rwsem);
3825 }
3826
3827 /*
3828  * CP calls this function, which flushes SIT entries including sit_journal,
3829  * and moves prefree segs to free segs.
3830  */
3831 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
3832 {
3833         struct sit_info *sit_i = SIT_I(sbi);
3834         unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
3835         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3836         struct f2fs_journal *journal = curseg->journal;
3837         struct sit_entry_set *ses, *tmp;
3838         struct list_head *head = &SM_I(sbi)->sit_entry_set;
3839         bool to_journal = true;
3840         struct seg_entry *se;
3841
3842         down_write(&sit_i->sentry_lock);
3843
3844         if (!sit_i->dirty_sentries)
3845                 goto out;
3846
3847         /*
3848          * add and account sit entries of dirty bitmap in sit entry
3849          * set temporarily
3850          */
3851         add_sits_in_set(sbi);
3852
3853         /*
3854          * if there are no enough space in journal to store dirty sit
3855          * entries, remove all entries from journal and add and account
3856          * them in sit entry set.
3857          */
3858         if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
3859                 remove_sits_in_journal(sbi);
3860
3861         /*
3862          * there are two steps to flush sit entries:
3863          * #1, flush sit entries to journal in current cold data summary block.
3864          * #2, flush sit entries to sit page.
3865          */
3866         list_for_each_entry_safe(ses, tmp, head, set_list) {
3867                 struct page *page = NULL;
3868                 struct f2fs_sit_block *raw_sit = NULL;
3869                 unsigned int start_segno = ses->start_segno;
3870                 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
3871                                                 (unsigned long)MAIN_SEGS(sbi));
3872                 unsigned int segno = start_segno;
3873
3874                 if (to_journal &&
3875                         !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
3876                         to_journal = false;
3877
3878                 if (to_journal) {
3879                         down_write(&curseg->journal_rwsem);
3880                 } else {
3881                         page = get_next_sit_page(sbi, start_segno);
3882                         raw_sit = page_address(page);
3883                 }
3884
3885                 /* flush dirty sit entries in region of current sit set */
3886                 for_each_set_bit_from(segno, bitmap, end) {
3887                         int offset, sit_offset;
3888
3889                         se = get_seg_entry(sbi, segno);
3890 #ifdef CONFIG_F2FS_CHECK_FS
3891                         if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
3892                                                 SIT_VBLOCK_MAP_SIZE))
3893                                 f2fs_bug_on(sbi, 1);
3894 #endif
3895
3896                         /* add discard candidates */
3897                         if (!(cpc->reason & CP_DISCARD)) {
3898                                 cpc->trim_start = segno;
3899                                 add_discard_addrs(sbi, cpc, false);
3900                         }
3901
3902                         if (to_journal) {
3903                                 offset = f2fs_lookup_journal_in_cursum(journal,
3904                                                         SIT_JOURNAL, segno, 1);
3905                                 f2fs_bug_on(sbi, offset < 0);
3906                                 segno_in_journal(journal, offset) =
3907                                                         cpu_to_le32(segno);
3908                                 seg_info_to_raw_sit(se,
3909                                         &sit_in_journal(journal, offset));
3910                                 check_block_count(sbi, segno,
3911                                         &sit_in_journal(journal, offset));
3912                         } else {
3913                                 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
3914                                 seg_info_to_raw_sit(se,
3915                                                 &raw_sit->entries[sit_offset]);
3916                                 check_block_count(sbi, segno,
3917                                                 &raw_sit->entries[sit_offset]);
3918                         }
3919
3920                         __clear_bit(segno, bitmap);
3921                         sit_i->dirty_sentries--;
3922                         ses->entry_cnt--;
3923                 }
3924
3925                 if (to_journal)
3926                         up_write(&curseg->journal_rwsem);
3927                 else
3928                         f2fs_put_page(page, 1);
3929
3930                 f2fs_bug_on(sbi, ses->entry_cnt);
3931                 release_sit_entry_set(ses);
3932         }
3933
3934         f2fs_bug_on(sbi, !list_empty(head));
3935         f2fs_bug_on(sbi, sit_i->dirty_sentries);
3936 out:
3937         if (cpc->reason & CP_DISCARD) {
3938                 __u64 trim_start = cpc->trim_start;
3939
3940                 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
3941                         add_discard_addrs(sbi, cpc, false);
3942
3943                 cpc->trim_start = trim_start;
3944         }
3945         up_write(&sit_i->sentry_lock);
3946
3947         set_prefree_as_free_segments(sbi);
3948 }
3949
3950 static int build_sit_info(struct f2fs_sb_info *sbi)
3951 {
3952         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
3953         struct sit_info *sit_i;
3954         unsigned int sit_segs, start;
3955         char *src_bitmap;
3956         unsigned int bitmap_size;
3957
3958         /* allocate memory for SIT information */
3959         sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
3960         if (!sit_i)
3961                 return -ENOMEM;
3962
3963         SM_I(sbi)->sit_info = sit_i;
3964
3965         sit_i->sentries =
3966                 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
3967                                               MAIN_SEGS(sbi)),
3968                               GFP_KERNEL);
3969         if (!sit_i->sentries)
3970                 return -ENOMEM;
3971
3972         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
3973         sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size,
3974                                                                 GFP_KERNEL);
3975         if (!sit_i->dirty_sentries_bitmap)
3976                 return -ENOMEM;
3977
3978         for (start = 0; start < MAIN_SEGS(sbi); start++) {
3979                 sit_i->sentries[start].cur_valid_map
3980                         = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3981                 sit_i->sentries[start].ckpt_valid_map
3982                         = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3983                 if (!sit_i->sentries[start].cur_valid_map ||
3984                                 !sit_i->sentries[start].ckpt_valid_map)
3985                         return -ENOMEM;
3986
3987 #ifdef CONFIG_F2FS_CHECK_FS
3988                 sit_i->sentries[start].cur_valid_map_mir
3989                         = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3990                 if (!sit_i->sentries[start].cur_valid_map_mir)
3991                         return -ENOMEM;
3992 #endif
3993
3994                 sit_i->sentries[start].discard_map
3995                         = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
3996                                                         GFP_KERNEL);
3997                 if (!sit_i->sentries[start].discard_map)
3998                         return -ENOMEM;
3999         }
4000
4001         sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
4002         if (!sit_i->tmp_map)
4003                 return -ENOMEM;
4004
4005         if (__is_large_section(sbi)) {
4006                 sit_i->sec_entries =
4007                         f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
4008                                                       MAIN_SECS(sbi)),
4009                                       GFP_KERNEL);
4010                 if (!sit_i->sec_entries)
4011                         return -ENOMEM;
4012         }
4013
4014         /* get information related with SIT */
4015         sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
4016
4017         /* setup SIT bitmap from ckeckpoint pack */
4018         bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
4019         src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
4020
4021         sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
4022         if (!sit_i->sit_bitmap)
4023                 return -ENOMEM;
4024
4025 #ifdef CONFIG_F2FS_CHECK_FS
4026         sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
4027         if (!sit_i->sit_bitmap_mir)
4028                 return -ENOMEM;
4029 #endif
4030
4031         /* init SIT information */
4032         sit_i->s_ops = &default_salloc_ops;
4033
4034         sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
4035         sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
4036         sit_i->written_valid_blocks = 0;
4037         sit_i->bitmap_size = bitmap_size;
4038         sit_i->dirty_sentries = 0;
4039         sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
4040         sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
4041         sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
4042         init_rwsem(&sit_i->sentry_lock);
4043         return 0;
4044 }
4045
4046 static int build_free_segmap(struct f2fs_sb_info *sbi)
4047 {
4048         struct free_segmap_info *free_i;
4049         unsigned int bitmap_size, sec_bitmap_size;
4050
4051         /* allocate memory for free segmap information */
4052         free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
4053         if (!free_i)
4054                 return -ENOMEM;
4055
4056         SM_I(sbi)->free_info = free_i;
4057
4058         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4059         free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
4060         if (!free_i->free_segmap)
4061                 return -ENOMEM;
4062
4063         sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4064         free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
4065         if (!free_i->free_secmap)
4066                 return -ENOMEM;
4067
4068         /* set all segments as dirty temporarily */
4069         memset(free_i->free_segmap, 0xff, bitmap_size);
4070         memset(free_i->free_secmap, 0xff, sec_bitmap_size);
4071
4072         /* init free segmap information */
4073         free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
4074         free_i->free_segments = 0;
4075         free_i->free_sections = 0;
4076         spin_lock_init(&free_i->segmap_lock);
4077         return 0;
4078 }
4079
4080 static int build_curseg(struct f2fs_sb_info *sbi)
4081 {
4082         struct curseg_info *array;
4083         int i;
4084
4085         array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
4086                              GFP_KERNEL);
4087         if (!array)
4088                 return -ENOMEM;
4089
4090         SM_I(sbi)->curseg_array = array;
4091
4092         for (i = 0; i < NR_CURSEG_TYPE; i++) {
4093                 mutex_init(&array[i].curseg_mutex);
4094                 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
4095                 if (!array[i].sum_blk)
4096                         return -ENOMEM;
4097                 init_rwsem(&array[i].journal_rwsem);
4098                 array[i].journal = f2fs_kzalloc(sbi,
4099                                 sizeof(struct f2fs_journal), GFP_KERNEL);
4100                 if (!array[i].journal)
4101                         return -ENOMEM;
4102                 array[i].segno = NULL_SEGNO;
4103                 array[i].next_blkoff = 0;
4104         }
4105         return restore_curseg_summaries(sbi);
4106 }
4107
4108 static int build_sit_entries(struct f2fs_sb_info *sbi)
4109 {
4110         struct sit_info *sit_i = SIT_I(sbi);
4111         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4112         struct f2fs_journal *journal = curseg->journal;
4113         struct seg_entry *se;
4114         struct f2fs_sit_entry sit;
4115         int sit_blk_cnt = SIT_BLK_CNT(sbi);
4116         unsigned int i, start, end;
4117         unsigned int readed, start_blk = 0;
4118         int err = 0;
4119         block_t total_node_blocks = 0;
4120
4121         do {
4122                 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
4123                                                         META_SIT, true);
4124
4125                 start = start_blk * sit_i->sents_per_block;
4126                 end = (start_blk + readed) * sit_i->sents_per_block;
4127
4128                 for (; start < end && start < MAIN_SEGS(sbi); start++) {
4129                         struct f2fs_sit_block *sit_blk;
4130                         struct page *page;
4131
4132                         se = &sit_i->sentries[start];
4133                         page = get_current_sit_page(sbi, start);
4134                         if (IS_ERR(page))
4135                                 return PTR_ERR(page);
4136                         sit_blk = (struct f2fs_sit_block *)page_address(page);
4137                         sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
4138                         f2fs_put_page(page, 1);
4139
4140                         err = check_block_count(sbi, start, &sit);
4141                         if (err)
4142                                 return err;
4143                         seg_info_from_raw_sit(se, &sit);
4144                         if (IS_NODESEG(se->type))
4145                                 total_node_blocks += se->valid_blocks;
4146
4147                         /* build discard map only one time */
4148                         if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4149                                 memset(se->discard_map, 0xff,
4150                                         SIT_VBLOCK_MAP_SIZE);
4151                         } else {
4152                                 memcpy(se->discard_map,
4153                                         se->cur_valid_map,
4154                                         SIT_VBLOCK_MAP_SIZE);
4155                                 sbi->discard_blks +=
4156                                         sbi->blocks_per_seg -
4157                                         se->valid_blocks;
4158                         }
4159
4160                         if (__is_large_section(sbi))
4161                                 get_sec_entry(sbi, start)->valid_blocks +=
4162                                                         se->valid_blocks;
4163                 }
4164                 start_blk += readed;
4165         } while (start_blk < sit_blk_cnt);
4166
4167         down_read(&curseg->journal_rwsem);
4168         for (i = 0; i < sits_in_cursum(journal); i++) {
4169                 unsigned int old_valid_blocks;
4170
4171                 start = le32_to_cpu(segno_in_journal(journal, i));
4172                 if (start >= MAIN_SEGS(sbi)) {
4173                         f2fs_msg(sbi->sb, KERN_ERR,
4174                                         "Wrong journal entry on segno %u",
4175                                         start);
4176                         set_sbi_flag(sbi, SBI_NEED_FSCK);
4177                         err = -EINVAL;
4178                         break;
4179                 }
4180
4181                 se = &sit_i->sentries[start];
4182                 sit = sit_in_journal(journal, i);
4183
4184                 old_valid_blocks = se->valid_blocks;
4185                 if (IS_NODESEG(se->type))
4186                         total_node_blocks -= old_valid_blocks;
4187
4188                 err = check_block_count(sbi, start, &sit);
4189                 if (err)
4190                         break;
4191                 seg_info_from_raw_sit(se, &sit);
4192                 if (IS_NODESEG(se->type))
4193                         total_node_blocks += se->valid_blocks;
4194
4195                 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4196                         memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
4197                 } else {
4198                         memcpy(se->discard_map, se->cur_valid_map,
4199                                                 SIT_VBLOCK_MAP_SIZE);
4200                         sbi->discard_blks += old_valid_blocks;
4201                         sbi->discard_blks -= se->valid_blocks;
4202                 }
4203
4204                 if (__is_large_section(sbi)) {
4205                         get_sec_entry(sbi, start)->valid_blocks +=
4206                                                         se->valid_blocks;
4207                         get_sec_entry(sbi, start)->valid_blocks -=
4208                                                         old_valid_blocks;
4209                 }
4210         }
4211         up_read(&curseg->journal_rwsem);
4212
4213         if (!err && total_node_blocks != valid_node_count(sbi)) {
4214                 f2fs_msg(sbi->sb, KERN_ERR,
4215                         "SIT is corrupted node# %u vs %u",
4216                         total_node_blocks, valid_node_count(sbi));
4217                 set_sbi_flag(sbi, SBI_NEED_FSCK);
4218                 err = -EINVAL;
4219         }
4220
4221         return err;
4222 }
4223
4224 static void init_free_segmap(struct f2fs_sb_info *sbi)
4225 {
4226         unsigned int start;
4227         int type;
4228
4229         for (start = 0; start < MAIN_SEGS(sbi); start++) {
4230                 struct seg_entry *sentry = get_seg_entry(sbi, start);
4231                 if (!sentry->valid_blocks)
4232                         __set_free(sbi, start);
4233                 else
4234                         SIT_I(sbi)->written_valid_blocks +=
4235                                                 sentry->valid_blocks;
4236         }
4237
4238         /* set use the current segments */
4239         for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4240                 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4241                 __set_test_and_inuse(sbi, curseg_t->segno);
4242         }
4243 }
4244
4245 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
4246 {
4247         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4248         struct free_segmap_info *free_i = FREE_I(sbi);
4249         unsigned int segno = 0, offset = 0;
4250         unsigned short valid_blocks;
4251
4252         while (1) {
4253                 /* find dirty segment based on free segmap */
4254                 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
4255                 if (segno >= MAIN_SEGS(sbi))
4256                         break;
4257                 offset = segno + 1;
4258                 valid_blocks = get_valid_blocks(sbi, segno, false);
4259                 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
4260                         continue;
4261                 if (valid_blocks > sbi->blocks_per_seg) {
4262                         f2fs_bug_on(sbi, 1);
4263                         continue;
4264                 }
4265                 mutex_lock(&dirty_i->seglist_lock);
4266                 __locate_dirty_segment(sbi, segno, DIRTY);
4267                 mutex_unlock(&dirty_i->seglist_lock);
4268         }
4269 }
4270
4271 static int init_victim_secmap(struct f2fs_sb_info *sbi)
4272 {
4273         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4274         unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4275
4276         dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4277         if (!dirty_i->victim_secmap)
4278                 return -ENOMEM;
4279         return 0;
4280 }
4281
4282 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
4283 {
4284         struct dirty_seglist_info *dirty_i;
4285         unsigned int bitmap_size, i;
4286
4287         /* allocate memory for dirty segments list information */
4288         dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
4289                                                                 GFP_KERNEL);
4290         if (!dirty_i)
4291                 return -ENOMEM;
4292
4293         SM_I(sbi)->dirty_info = dirty_i;
4294         mutex_init(&dirty_i->seglist_lock);
4295
4296         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4297
4298         for (i = 0; i < NR_DIRTY_TYPE; i++) {
4299                 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
4300                                                                 GFP_KERNEL);
4301                 if (!dirty_i->dirty_segmap[i])
4302                         return -ENOMEM;
4303         }
4304
4305         init_dirty_segmap(sbi);
4306         return init_victim_secmap(sbi);
4307 }
4308
4309 /*
4310  * Update min, max modified time for cost-benefit GC algorithm
4311  */
4312 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
4313 {
4314         struct sit_info *sit_i = SIT_I(sbi);
4315         unsigned int segno;
4316
4317         down_write(&sit_i->sentry_lock);
4318
4319         sit_i->min_mtime = ULLONG_MAX;
4320
4321         for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
4322                 unsigned int i;
4323                 unsigned long long mtime = 0;
4324
4325                 for (i = 0; i < sbi->segs_per_sec; i++)
4326                         mtime += get_seg_entry(sbi, segno + i)->mtime;
4327
4328                 mtime = div_u64(mtime, sbi->segs_per_sec);
4329
4330                 if (sit_i->min_mtime > mtime)
4331                         sit_i->min_mtime = mtime;
4332         }
4333         sit_i->max_mtime = get_mtime(sbi, false);
4334         up_write(&sit_i->sentry_lock);
4335 }
4336
4337 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
4338 {
4339         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4340         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
4341         struct f2fs_sm_info *sm_info;
4342         int err;
4343
4344         sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
4345         if (!sm_info)
4346                 return -ENOMEM;
4347
4348         /* init sm info */
4349         sbi->sm_info = sm_info;
4350         sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
4351         sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
4352         sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
4353         sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
4354         sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
4355         sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
4356         sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
4357         sm_info->rec_prefree_segments = sm_info->main_segments *
4358                                         DEF_RECLAIM_PREFREE_SEGMENTS / 100;
4359         if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
4360                 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
4361
4362         if (!test_opt(sbi, LFS))
4363                 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
4364         sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
4365         sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
4366         sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
4367         sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
4368         sm_info->min_ssr_sections = reserved_sections(sbi);
4369
4370         INIT_LIST_HEAD(&sm_info->sit_entry_set);
4371
4372         init_rwsem(&sm_info->curseg_lock);
4373
4374         if (!f2fs_readonly(sbi->sb)) {
4375                 err = f2fs_create_flush_cmd_control(sbi);
4376                 if (err)
4377                         return err;
4378         }
4379
4380         err = create_discard_cmd_control(sbi);
4381         if (err)
4382                 return err;
4383
4384         err = build_sit_info(sbi);
4385         if (err)
4386                 return err;
4387         err = build_free_segmap(sbi);
4388         if (err)
4389                 return err;
4390         err = build_curseg(sbi);
4391         if (err)
4392                 return err;
4393
4394         /* reinit free segmap based on SIT */
4395         err = build_sit_entries(sbi);
4396         if (err)
4397                 return err;
4398
4399         init_free_segmap(sbi);
4400         err = build_dirty_segmap(sbi);
4401         if (err)
4402                 return err;
4403
4404         init_min_max_mtime(sbi);
4405         return 0;
4406 }
4407
4408 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
4409                 enum dirty_type dirty_type)
4410 {
4411         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4412
4413         mutex_lock(&dirty_i->seglist_lock);
4414         kvfree(dirty_i->dirty_segmap[dirty_type]);
4415         dirty_i->nr_dirty[dirty_type] = 0;
4416         mutex_unlock(&dirty_i->seglist_lock);
4417 }
4418
4419 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
4420 {
4421         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4422         kvfree(dirty_i->victim_secmap);
4423 }
4424
4425 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
4426 {
4427         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4428         int i;
4429
4430         if (!dirty_i)
4431                 return;
4432
4433         /* discard pre-free/dirty segments list */
4434         for (i = 0; i < NR_DIRTY_TYPE; i++)
4435                 discard_dirty_segmap(sbi, i);
4436
4437         destroy_victim_secmap(sbi);
4438         SM_I(sbi)->dirty_info = NULL;
4439         kvfree(dirty_i);
4440 }
4441
4442 static void destroy_curseg(struct f2fs_sb_info *sbi)
4443 {
4444         struct curseg_info *array = SM_I(sbi)->curseg_array;
4445         int i;
4446
4447         if (!array)
4448                 return;
4449         SM_I(sbi)->curseg_array = NULL;
4450         for (i = 0; i < NR_CURSEG_TYPE; i++) {
4451                 kvfree(array[i].sum_blk);
4452                 kvfree(array[i].journal);
4453         }
4454         kvfree(array);
4455 }
4456
4457 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
4458 {
4459         struct free_segmap_info *free_i = SM_I(sbi)->free_info;
4460         if (!free_i)
4461                 return;
4462         SM_I(sbi)->free_info = NULL;
4463         kvfree(free_i->free_segmap);
4464         kvfree(free_i->free_secmap);
4465         kvfree(free_i);
4466 }
4467
4468 static void destroy_sit_info(struct f2fs_sb_info *sbi)
4469 {
4470         struct sit_info *sit_i = SIT_I(sbi);
4471         unsigned int start;
4472
4473         if (!sit_i)
4474                 return;
4475
4476         if (sit_i->sentries) {
4477                 for (start = 0; start < MAIN_SEGS(sbi); start++) {
4478                         kvfree(sit_i->sentries[start].cur_valid_map);
4479 #ifdef CONFIG_F2FS_CHECK_FS
4480                         kvfree(sit_i->sentries[start].cur_valid_map_mir);
4481 #endif
4482                         kvfree(sit_i->sentries[start].ckpt_valid_map);
4483                         kvfree(sit_i->sentries[start].discard_map);
4484                 }
4485         }
4486         kvfree(sit_i->tmp_map);
4487
4488         kvfree(sit_i->sentries);
4489         kvfree(sit_i->sec_entries);
4490         kvfree(sit_i->dirty_sentries_bitmap);
4491
4492         SM_I(sbi)->sit_info = NULL;
4493         kvfree(sit_i->sit_bitmap);
4494 #ifdef CONFIG_F2FS_CHECK_FS
4495         kvfree(sit_i->sit_bitmap_mir);
4496 #endif
4497         kvfree(sit_i);
4498 }
4499
4500 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
4501 {
4502         struct f2fs_sm_info *sm_info = SM_I(sbi);
4503
4504         if (!sm_info)
4505                 return;
4506         f2fs_destroy_flush_cmd_control(sbi, true);
4507         destroy_discard_cmd_control(sbi);
4508         destroy_dirty_segmap(sbi);
4509         destroy_curseg(sbi);
4510         destroy_free_segmap(sbi);
4511         destroy_sit_info(sbi);
4512         sbi->sm_info = NULL;
4513         kvfree(sm_info);
4514 }
4515
4516 int __init f2fs_create_segment_manager_caches(void)
4517 {
4518         discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
4519                         sizeof(struct discard_entry));
4520         if (!discard_entry_slab)
4521                 goto fail;
4522
4523         discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
4524                         sizeof(struct discard_cmd));
4525         if (!discard_cmd_slab)
4526                 goto destroy_discard_entry;
4527
4528         sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
4529                         sizeof(struct sit_entry_set));
4530         if (!sit_entry_set_slab)
4531                 goto destroy_discard_cmd;
4532
4533         inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
4534                         sizeof(struct inmem_pages));
4535         if (!inmem_entry_slab)
4536                 goto destroy_sit_entry_set;
4537         return 0;
4538
4539 destroy_sit_entry_set:
4540         kmem_cache_destroy(sit_entry_set_slab);
4541 destroy_discard_cmd:
4542         kmem_cache_destroy(discard_cmd_slab);
4543 destroy_discard_entry:
4544         kmem_cache_destroy(discard_entry_slab);
4545 fail:
4546         return -ENOMEM;
4547 }
4548
4549 void f2fs_destroy_segment_manager_caches(void)
4550 {
4551         kmem_cache_destroy(sit_entry_set_slab);
4552         kmem_cache_destroy(discard_cmd_slab);
4553         kmem_cache_destroy(discard_entry_slab);
4554         kmem_cache_destroy(inmem_entry_slab);
4555 }