OSDN Git Service

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 3 Mar 2013 03:33:21 +0000 (19:33 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 3 Mar 2013 03:33:21 +0000 (19:33 -0800)
Pull ext4 bug fixes from Ted Ts'o:
 "Various bug fixes for ext4.  The most important is a fix for the new
  extent cache's slab shrinker which can cause significant, user-visible
  pauses when the system is under memory pressure."

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: enable quotas before orphan cleanup
  ext4: don't allow quota mount options when quota feature enabled
  ext4: fix a warning from sparse check for ext4_dir_llseek
  ext4: convert number of blocks to clusters properly
  ext4: fix possible memory leak in ext4_remount()
  jbd2: fix ERR_PTR dereference in jbd2__journal_start
  ext4: use percpu counter for extent cache count
  ext4: optimize ext4_es_shrink()

fs/ext4/balloc.c
fs/ext4/dir.c
fs/ext4/ext4.h
fs/ext4/extents_status.c
fs/ext4/mballoc.c
fs/ext4/resize.c
fs/ext4/super.c
fs/jbd2/transaction.c
include/trace/events/ext4.h

index 2f2e0da..92e68b3 100644 (file)
@@ -635,7 +635,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
        brelse(bitmap_bh);
        printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
               ", computed = %llu, %llu\n",
-              EXT4_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)),
+              EXT4_NUM_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)),
               desc_count, bitmap_count);
        return bitmap_count;
 #else
index 6dda04f..d8cd1f0 100644 (file)
@@ -334,7 +334,7 @@ static inline loff_t ext4_get_htree_eof(struct file *filp)
  *
  * For non-htree, ext4_llseek already chooses the proper max offset.
  */
-loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
+static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
 {
        struct inode *inode = file->f_mapping->host;
        int dx_dir = is_dx_dir(inode);
index 6e16c18..4a01ba3 100644 (file)
@@ -1309,6 +1309,7 @@ struct ext4_sb_info {
        /* Reclaim extents from extent status tree */
        struct shrinker s_es_shrinker;
        struct list_head s_es_lru;
+       struct percpu_counter s_extent_cache_cnt;
        spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
 };
 
index f768f4a..95796a1 100644 (file)
@@ -147,11 +147,12 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
                              ext4_lblk_t end);
 static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
                                       int nr_to_scan);
-static int ext4_es_reclaim_extents_count(struct super_block *sb);
 
 int __init ext4_init_es(void)
 {
-       ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT);
+       ext4_es_cachep = kmem_cache_create("ext4_extent_status",
+                                          sizeof(struct extent_status),
+                                          0, (SLAB_RECLAIM_ACCOUNT), NULL);
        if (ext4_es_cachep == NULL)
                return -ENOMEM;
        return 0;
@@ -302,8 +303,10 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
        /*
         * We don't count delayed extent because we never try to reclaim them
         */
-       if (!ext4_es_is_delayed(es))
+       if (!ext4_es_is_delayed(es)) {
                EXT4_I(inode)->i_es_lru_nr++;
+               percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
+       }
 
        return es;
 }
@@ -314,6 +317,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
        if (!ext4_es_is_delayed(es)) {
                BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
                EXT4_I(inode)->i_es_lru_nr--;
+               percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
        }
 
        kmem_cache_free(ext4_es_cachep, es);
@@ -674,10 +678,11 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
        int nr_to_scan = sc->nr_to_scan;
        int ret, nr_shrunk = 0;
 
-       trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan);
+       ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
+       trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
 
        if (!nr_to_scan)
-               return ext4_es_reclaim_extents_count(sbi->s_sb);
+               return ret;
 
        INIT_LIST_HEAD(&scanned);
 
@@ -705,9 +710,10 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
        }
        list_splice_tail(&scanned, &sbi->s_es_lru);
        spin_unlock(&sbi->s_es_lru_lock);
-       trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk);
 
-       return ext4_es_reclaim_extents_count(sbi->s_sb);
+       ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
+       trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
+       return ret;
 }
 
 void ext4_es_register_shrinker(struct super_block *sb)
@@ -751,25 +757,6 @@ void ext4_es_lru_del(struct inode *inode)
        spin_unlock(&sbi->s_es_lru_lock);
 }
 
-static int ext4_es_reclaim_extents_count(struct super_block *sb)
-{
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct ext4_inode_info *ei;
-       struct list_head *cur;
-       int nr_cached = 0;
-
-       spin_lock(&sbi->s_es_lru_lock);
-       list_for_each(cur, &sbi->s_es_lru) {
-               ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
-               read_lock(&ei->i_es_lock);
-               nr_cached += ei->i_es_lru_nr;
-               read_unlock(&ei->i_es_lock);
-       }
-       spin_unlock(&sbi->s_es_lru_lock);
-       trace_ext4_es_reclaim_extents_count(sb, nr_cached);
-       return nr_cached;
-}
-
 static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
                                       int nr_to_scan)
 {
index 6540ebe..7bb713a 100644 (file)
@@ -3419,7 +3419,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
                        win = offs;
 
                ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
-                       EXT4_B2C(sbi, win);
+                       EXT4_NUM_B2C(sbi, win);
                BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
                BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
        }
@@ -4565,7 +4565,7 @@ do_more:
                        EXT4_BLOCKS_PER_GROUP(sb);
                count -= overflow;
        }
-       count_clusters = EXT4_B2C(sbi, count);
+       count_clusters = EXT4_NUM_B2C(sbi, count);
        bitmap_bh = ext4_read_block_bitmap(sb, block_group);
        if (!bitmap_bh) {
                err = -EIO;
@@ -4807,11 +4807,11 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
        ext4_group_desc_csum_set(sb, block_group, desc);
        ext4_unlock_group(sb, block_group);
        percpu_counter_add(&sbi->s_freeclusters_counter,
-                          EXT4_B2C(sbi, blocks_freed));
+                          EXT4_NUM_B2C(sbi, blocks_freed));
 
        if (sbi->s_log_groups_per_flex) {
                ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
-               atomic_add(EXT4_B2C(sbi, blocks_freed),
+               atomic_add(EXT4_NUM_B2C(sbi, blocks_freed),
                           &sbi->s_flex_groups[flex_group].free_clusters);
        }
 
index c7f4d75..b2c8ee5 100644 (file)
@@ -1247,7 +1247,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
 
                ext4_inode_table_set(sb, gdp, group_data->inode_table);
                ext4_free_group_clusters_set(sb, gdp,
-                                            EXT4_B2C(sbi, group_data->free_blocks_count));
+                       EXT4_NUM_B2C(sbi, group_data->free_blocks_count));
                ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
                if (ext4_has_group_desc_csum(sb))
                        ext4_itable_unused_set(sb, gdp,
@@ -1349,7 +1349,7 @@ static void ext4_update_super(struct super_block *sb,
 
        /* Update the free space counts */
        percpu_counter_add(&sbi->s_freeclusters_counter,
-                          EXT4_B2C(sbi, free_blocks));
+                          EXT4_NUM_B2C(sbi, free_blocks));
        percpu_counter_add(&sbi->s_freeinodes_counter,
                           EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
 
@@ -1360,7 +1360,7 @@ static void ext4_update_super(struct super_block *sb,
            sbi->s_log_groups_per_flex) {
                ext4_group_t flex_group;
                flex_group = ext4_flex_group(sbi, group_data[0].group);
-               atomic_add(EXT4_B2C(sbi, free_blocks),
+               atomic_add(EXT4_NUM_B2C(sbi, free_blocks),
                           &sbi->s_flex_groups[flex_group].free_clusters);
                atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
                           &sbi->s_flex_groups[flex_group].free_inodes);
index 620cf56..5e6c878 100644 (file)
@@ -783,6 +783,7 @@ static void ext4_put_super(struct super_block *sb)
        percpu_counter_destroy(&sbi->s_freeinodes_counter);
        percpu_counter_destroy(&sbi->s_dirs_counter);
        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
+       percpu_counter_destroy(&sbi->s_extent_cache_cnt);
        brelse(sbi->s_sbh);
 #ifdef CONFIG_QUOTA
        for (i = 0; i < MAXQUOTAS; i++)
@@ -1247,6 +1248,11 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
                        "quota options when quota turned on");
                return -1;
        }
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
+               ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options "
+                        "when QUOTA feature is enabled");
+               return -1;
+       }
        qname = match_strdup(args);
        if (!qname) {
                ext4_msg(sb, KERN_ERR,
@@ -1544,6 +1550,13 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
                                 "quota options when quota turned on");
                        return -1;
                }
+               if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                              EXT4_FEATURE_RO_COMPAT_QUOTA)) {
+                       ext4_msg(sb, KERN_ERR,
+                                "Cannot set journaled quota options "
+                                "when QUOTA feature is enabled");
+                       return -1;
+               }
                sbi->s_jquota_fmt = m->mount_opt;
 #endif
        } else {
@@ -1592,6 +1605,12 @@ static int parse_options(char *options, struct super_block *sb,
                        return 0;
        }
 #ifdef CONFIG_QUOTA
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
+           (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) {
+               ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA "
+                        "feature is enabled");
+               return 0;
+       }
        if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
                if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
                        clear_opt(sb, USRQUOTA);
@@ -3161,7 +3180,7 @@ int ext4_calculate_overhead(struct super_block *sb)
        }
        /* Add the journal blocks as well */
        if (sbi->s_journal)
-               overhead += EXT4_B2C(sbi, sbi->s_journal->j_maxlen);
+               overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
 
        sbi->s_overhead = overhead;
        smp_wmb();
@@ -3688,6 +3707,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        if (!err) {
                err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
        }
+       if (!err) {
+               err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0);
+       }
        if (err) {
                ext4_msg(sb, KERN_ERR, "insufficient memory");
                goto failed_mount3;
@@ -3711,13 +3733,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sb->s_export_op = &ext4_export_ops;
        sb->s_xattr = ext4_xattr_handlers;
 #ifdef CONFIG_QUOTA
-       sb->s_qcop = &ext4_qctl_operations;
        sb->dq_op = &ext4_quota_operations;
-
-       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
-               /* Use qctl operations for hidden quota files. */
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
                sb->s_qcop = &ext4_qctl_sysfile_operations;
-       }
+       else
+               sb->s_qcop = &ext4_qctl_operations;
 #endif
        memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
 
@@ -3913,6 +3933,16 @@ no_journal:
        if (err)
                goto failed_mount7;
 
+#ifdef CONFIG_QUOTA
+       /* Enable quota usage during mount. */
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
+           !(sb->s_flags & MS_RDONLY)) {
+               err = ext4_enable_quotas(sb);
+               if (err)
+                       goto failed_mount8;
+       }
+#endif  /* CONFIG_QUOTA */
+
        EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
        ext4_orphan_cleanup(sb, es);
        EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
@@ -3930,16 +3960,6 @@ no_journal:
        } else
                descr = "out journal";
 
-#ifdef CONFIG_QUOTA
-       /* Enable quota usage during mount. */
-       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
-           !(sb->s_flags & MS_RDONLY)) {
-               err = ext4_enable_quotas(sb);
-               if (err)
-                       goto failed_mount8;
-       }
-#endif  /* CONFIG_QUOTA */
-
        if (test_opt(sb, DISCARD)) {
                struct request_queue *q = bdev_get_queue(sb->s_bdev);
                if (!blk_queue_discard(q))
@@ -3993,6 +4013,7 @@ failed_mount3:
        percpu_counter_destroy(&sbi->s_freeinodes_counter);
        percpu_counter_destroy(&sbi->s_dirs_counter);
        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
+       percpu_counter_destroy(&sbi->s_extent_cache_cnt);
        if (sbi->s_mmp_tsk)
                kthread_stop(sbi->s_mmp_tsk);
 failed_mount2:
@@ -4538,6 +4559,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
                        if (!old_opts.s_qf_names[i]) {
                                for (j = 0; j < i; j++)
                                        kfree(old_opts.s_qf_names[j]);
+                               kfree(orig_data);
                                return -ENOMEM;
                        }
                } else
@@ -4816,9 +4838,12 @@ static int ext4_release_dquot(struct dquot *dquot)
 
 static int ext4_mark_dquot_dirty(struct dquot *dquot)
 {
+       struct super_block *sb = dquot->dq_sb;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+
        /* Are we journaling quotas? */
-       if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
-           EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) ||
+           sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
                dquot_mark_dquot_dirty(dquot);
                return ext4_write_dquot(dquot);
        } else {
index b7e2385..d6ee5ae 100644 (file)
@@ -382,7 +382,7 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask,
        if (err < 0) {
                jbd2_free_handle(handle);
                current->journal_info = NULL;
-               handle = ERR_PTR(err);
+               return ERR_PTR(err);
        }
        handle->h_type = type;
        handle->h_line_no = line_no;
index c0457c0..4ee4710 100644 (file)
@@ -2255,64 +2255,48 @@ TRACE_EVENT(ext4_es_lookup_extent_exit,
                  __entry->found ? __entry->status : 0)
 );
 
-TRACE_EVENT(ext4_es_reclaim_extents_count,
-       TP_PROTO(struct super_block *sb, int nr_cached),
-
-       TP_ARGS(sb, nr_cached),
-
-       TP_STRUCT__entry(
-               __field(        dev_t,  dev                     )
-               __field(        int,    nr_cached               )
-       ),
-
-       TP_fast_assign(
-               __entry->dev            = sb->s_dev;
-               __entry->nr_cached      = nr_cached;
-       ),
-
-       TP_printk("dev %d,%d cached objects nr %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->nr_cached)
-);
-
 TRACE_EVENT(ext4_es_shrink_enter,
-       TP_PROTO(struct super_block *sb, int nr_to_scan),
+       TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt),
 
-       TP_ARGS(sb, nr_to_scan),
+       TP_ARGS(sb, nr_to_scan, cache_cnt),
 
        TP_STRUCT__entry(
                __field(        dev_t,  dev                     )
                __field(        int,    nr_to_scan              )
+               __field(        int,    cache_cnt               )
        ),
 
        TP_fast_assign(
                __entry->dev            = sb->s_dev;
                __entry->nr_to_scan     = nr_to_scan;
+               __entry->cache_cnt      = cache_cnt;
        ),
 
-       TP_printk("dev %d,%d nr to scan %d",
+       TP_printk("dev %d,%d nr_to_scan %d cache_cnt %d",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->nr_to_scan)
+                 __entry->nr_to_scan, __entry->cache_cnt)
 );
 
 TRACE_EVENT(ext4_es_shrink_exit,
-       TP_PROTO(struct super_block *sb, int shrunk_nr),
+       TP_PROTO(struct super_block *sb, int shrunk_nr, int cache_cnt),
 
-       TP_ARGS(sb, shrunk_nr),
+       TP_ARGS(sb, shrunk_nr, cache_cnt),
 
        TP_STRUCT__entry(
                __field(        dev_t,  dev                     )
                __field(        int,    shrunk_nr               )
+               __field(        int,    cache_cnt               )
        ),
 
        TP_fast_assign(
                __entry->dev            = sb->s_dev;
                __entry->shrunk_nr      = shrunk_nr;
+               __entry->cache_cnt      = cache_cnt;
        ),
 
-       TP_printk("dev %d,%d nr to scan %d",
+       TP_printk("dev %d,%d shrunk_nr %d cache_cnt %d",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->shrunk_nr)
+                 __entry->shrunk_nr, __entry->cache_cnt)
 );
 
 #endif /* _TRACE_EXT4_H */