diff options
| -rw-r--r-- | MAINTAINERS | 7 | ||||
| -rw-r--r-- | fs/ext4/Makefile | 5 | ||||
| -rw-r--r-- | fs/ext4/crypto.c | 9 | ||||
| -rw-r--r-- | fs/ext4/ext4.h | 6 | ||||
| -rw-r--r-- | fs/ext4/ext4_extents.h | 12 | ||||
| -rw-r--r-- | fs/ext4/extents-test.c | 12 | ||||
| -rw-r--r-- | fs/ext4/extents.c | 80 | ||||
| -rw-r--r-- | fs/ext4/fast_commit.c | 17 | ||||
| -rw-r--r-- | fs/ext4/fsync.c | 16 | ||||
| -rw-r--r-- | fs/ext4/ialloc.c | 6 | ||||
| -rw-r--r-- | fs/ext4/inline.c | 10 | ||||
| -rw-r--r-- | fs/ext4/inode.c | 75 | ||||
| -rw-r--r-- | fs/ext4/mballoc-test.c | 81 | ||||
| -rw-r--r-- | fs/ext4/mballoc.c | 132 | ||||
| -rw-r--r-- | fs/ext4/mballoc.h | 30 | ||||
| -rw-r--r-- | fs/ext4/page-io.c | 10 | ||||
| -rw-r--r-- | fs/ext4/super.c | 37 | ||||
| -rw-r--r-- | fs/ext4/sysfs.c | 10 | ||||
| -rw-r--r-- | fs/jbd2/checkpoint.c | 15 |
19 files changed, 455 insertions, 115 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 5969f7b00761..9d31ceeac4a5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9619,7 +9619,12 @@ F: include/linux/ext2* EXT4 FILE SYSTEM M: "Theodore Ts'o" <tytso@mit.edu> -M: Andreas Dilger <adilger.kernel@dilger.ca> +R: Andreas Dilger <adilger.kernel@dilger.ca> +R: Baokun Li <libaokun@linux.alibaba.com> +R: Jan Kara <jack@suse.cz> +R: Ojaswin Mujoo <ojaswin@linux.ibm.com> +R: Ritesh Harjani (IBM) <ritesh.list@gmail.com> +R: Zhang Yi <yi.zhang@huawei.com> L: linux-ext4@vger.kernel.org S: Maintained W: http://ext4.wiki.kernel.org diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 72206a292676..3baee4e7c1cf 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -14,7 +14,8 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \ ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o -ext4-inode-test-objs += inode-test.o -obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-inode-test.o +ext4-test-objs += inode-test.o mballoc-test.o \ + extents-test.o +obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-test.o ext4-$(CONFIG_FS_VERITY) += verity.o ext4-$(CONFIG_FS_ENCRYPTION) += crypto.o diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index cf0a0970c095..f41f320f4437 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -163,10 +163,17 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, */ if (handle) { + /* + * Since the inode is new it is ok to pass the + * XATTR_CREATE flag. This is necessary to match the + * remaining journal credits check in the set_handle + * function with the credits allocated for the new + * inode. + */ res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION, EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, - ctx, len, 0); + ctx, len, XATTR_CREATE); if (!res) { ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); ext4_clear_inode_state(inode, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 293f698b7042..7617e2d454ea 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1570,6 +1570,7 @@ struct ext4_sb_info { struct proc_dir_entry *s_proc; struct kobject s_kobj; struct completion s_kobj_unregister; + struct mutex s_error_notify_mutex; /* protects sysfs_notify vs kobject_del */ struct super_block *s_sb; struct buffer_head *s_mmp_bh; @@ -3944,6 +3945,11 @@ static inline bool ext4_inode_can_atomic_write(struct inode *inode) extern int ext4_block_write_begin(handle_t *handle, struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block); + +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +#define EXPORT_SYMBOL_FOR_EXT4_TEST(sym) \ + EXPORT_SYMBOL_FOR_MODULES(sym, "ext4-test") +#endif #endif /* __KERNEL__ */ #endif /* _EXT4_H */ diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index c484125d963f..ebaf7cc42430 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -264,5 +264,17 @@ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, 0xffff); } +extern int __ext4_ext_dirty(const char *where, unsigned int line, + handle_t *handle, struct inode *inode, + struct ext4_ext_path *path); +extern int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex); +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +extern int ext4_ext_space_root_idx_test(struct inode *inode, int check); +extern struct ext4_ext_path *ext4_split_convert_extents_test( + handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, + struct ext4_ext_path *path, + int flags, unsigned int *allocated); +#endif #endif /* _EXT4_EXTENTS */ diff --git a/fs/ext4/extents-test.c b/fs/ext4/extents-test.c index 7c4690eb7dad..5496b2c8e2cd 100644 --- a/fs/ext4/extents-test.c +++ b/fs/ext4/extents-test.c @@ -142,8 +142,10 @@ static struct file_system_type ext_fs_type = { static void extents_kunit_exit(struct kunit *test) { - struct ext4_sb_info *sbi = k_ctx.k_ei->vfs_inode.i_sb->s_fs_info; + struct super_block *sb = k_ctx.k_ei->vfs_inode.i_sb; + struct ext4_sb_info *sbi = sb->s_fs_info; + ext4_es_unregister_shrinker(sbi); kfree(sbi); kfree(k_ctx.k_ei); kfree(k_ctx.k_data); @@ -280,8 +282,8 @@ static int extents_kunit_init(struct kunit *test) eh->eh_depth = 0; eh->eh_entries = cpu_to_le16(1); eh->eh_magic = EXT4_EXT_MAGIC; - eh->eh_max = - cpu_to_le16(ext4_ext_space_root_idx(&k_ctx.k_ei->vfs_inode, 0)); + eh->eh_max = cpu_to_le16(ext4_ext_space_root_idx_test( + &k_ctx.k_ei->vfs_inode, 0)); eh->eh_generation = 0; /* @@ -384,8 +386,8 @@ static void test_split_convert(struct kunit *test) switch (param->type) { case TEST_SPLIT_CONVERT: - path = ext4_split_convert_extents(NULL, inode, &map, path, - param->split_flags, NULL); + path = ext4_split_convert_extents_test(NULL, inode, &map, + path, param->split_flags, NULL); break; case TEST_CREATE_BLOCKS: ext4_map_create_blocks_helper(test, inode, &map, param->split_flags); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ae3804f36535..8cce1479be6d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -184,9 +184,9 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode, * - ENOMEM * - EIO */ -static int __ext4_ext_dirty(const char *where, unsigned int line, - handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) +int __ext4_ext_dirty(const char *where, unsigned int line, + handle_t *handle, struct inode *inode, + struct ext4_ext_path *path) { int err; @@ -1736,6 +1736,13 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path + k); if (err) return err; + if (unlikely(path[k].p_idx > EXT_LAST_INDEX(path[k].p_hdr))) { + EXT4_ERROR_INODE(inode, + "path[%d].p_idx %p > EXT_LAST_INDEX %p", + k, path[k].p_idx, + EXT_LAST_INDEX(path[k].p_hdr)); + return -EFSCORRUPTED; + } path[k].p_idx->ei_block = border; err = ext4_ext_dirty(handle, inode, path + k); if (err) @@ -1748,6 +1755,14 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path + k); if (err) goto clean; + if (unlikely(path[k].p_idx > EXT_LAST_INDEX(path[k].p_hdr))) { + EXT4_ERROR_INODE(inode, + "path[%d].p_idx %p > EXT_LAST_INDEX %p", + k, path[k].p_idx, + EXT_LAST_INDEX(path[k].p_hdr)); + err = -EFSCORRUPTED; + goto clean; + } path[k].p_idx->ei_block = border; err = ext4_ext_dirty(handle, inode, path + k); if (err) @@ -3144,7 +3159,7 @@ static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex) } /* FIXME!! we need to try to merge to left or right after zero-out */ -static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) +int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) { ext4_fsblk_t ee_pblock; unsigned int ee_len; @@ -3239,6 +3254,9 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, insert_err = PTR_ERR(path); err = 0; + if (insert_err != -ENOSPC && insert_err != -EDQUOT && + insert_err != -ENOMEM) + goto out_path; /* * Get a new path to try to zeroout or fix the extent length. @@ -3255,13 +3273,20 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, goto out_path; } + depth = ext_depth(inode); + ex = path[depth].p_ext; + if (!ex) { + EXT4_ERROR_INODE(inode, + "bad extent address lblock: %lu, depth: %d pblock %llu", + (unsigned long)ee_block, depth, path[depth].p_block); + err = -EFSCORRUPTED; + goto out; + } + err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; - depth = ext_depth(inode); - ex = path[depth].p_ext; - fix_extent_len: ex->ee_len = orig_ex.ee_len; err = ext4_ext_dirty(handle, inode, path + path->p_depth); @@ -3363,7 +3388,7 @@ static int ext4_split_extent_zeroout(handle_t *handle, struct inode *inode, ext4_ext_mark_initialized(ex); - ext4_ext_dirty(handle, inode, path + depth); + err = ext4_ext_dirty(handle, inode, path + depth); if (err) return err; @@ -4457,9 +4482,13 @@ got_allocated_blocks: path = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (IS_ERR(path)) { err = PTR_ERR(path); - if (allocated_clusters) { + /* + * Gracefully handle out of space conditions. If the filesystem + * is inconsistent, we'll just leak allocated blocks to avoid + * causing even more damage. + */ + if (allocated_clusters && (err == -EDQUOT || err == -ENOSPC)) { int fb_flags = 0; - /* * free data blocks we just allocated. * not a good idea to call discard here directly, @@ -6238,6 +6267,33 @@ out: return 0; } -#ifdef CONFIG_EXT4_KUNIT_TESTS -#include "extents-test.c" +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +int ext4_ext_space_root_idx_test(struct inode *inode, int check) +{ + return ext4_ext_space_root_idx(inode, check); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_space_root_idx_test); + +struct ext4_ext_path *ext4_split_convert_extents_test(handle_t *handle, + struct inode *inode, struct ext4_map_blocks *map, + struct ext4_ext_path *path, int flags, + unsigned int *allocated) +{ + return ext4_split_convert_extents(handle, inode, map, path, + flags, allocated); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_split_convert_extents_test); + +EXPORT_SYMBOL_FOR_EXT4_TEST(__ext4_ext_dirty); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_zeroout); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_register_shrinker); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_unregister_shrinker); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_map_create_blocks); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_init_tree); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_lookup_extent); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_insert_extent); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_insert_extent); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_find_extent); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_issue_zeroout); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_map_query_blocks); #endif diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index f575751f1cae..2f0057e04934 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -975,13 +975,13 @@ static int ext4_fc_flush_data(journal_t *journal) int ret = 0; list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { - ret = jbd2_submit_inode_data(journal, ei->jinode); + ret = jbd2_submit_inode_data(journal, READ_ONCE(ei->jinode)); if (ret) return ret; } list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { - ret = jbd2_wait_inode_data(journal, ei->jinode); + ret = jbd2_wait_inode_data(journal, READ_ONCE(ei->jinode)); if (ret) return ret; } @@ -1613,19 +1613,21 @@ static int ext4_fc_replay_inode(struct super_block *sb, /* Immediately update the inode on disk. */ ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); if (ret) - goto out; + goto out_brelse; ret = sync_dirty_buffer(iloc.bh); if (ret) - goto out; + goto out_brelse; ret = ext4_mark_inode_used(sb, ino); if (ret) - goto out; + goto out_brelse; /* Given that we just wrote the inode on disk, this SHOULD succeed. */ inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); if (IS_ERR(inode)) { ext4_debug("Inode not found."); - return -EFSCORRUPTED; + inode = NULL; + ret = -EFSCORRUPTED; + goto out_brelse; } /* @@ -1642,13 +1644,14 @@ static int ext4_fc_replay_inode(struct super_block *sb, ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode)); ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); sync_dirty_buffer(iloc.bh); +out_brelse: brelse(iloc.bh); out: iput(inode); if (!ret) blkdev_issue_flush(sb->s_bdev); - return 0; + return ret; } /* diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index e476c6de3074..bd8f230fa507 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -83,11 +83,23 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end, int datasync, bool *needs_barrier) { struct inode *inode = file->f_inode; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 0, + }; int ret; ret = generic_buffers_fsync_noflush(file, start, end, datasync); - if (!ret) - ret = ext4_sync_parent(inode); + if (ret) + return ret; + + /* Force writeout of inode table buffer to disk */ + ret = ext4_write_inode(inode, &wbc); + if (ret) + return ret; + + ret = ext4_sync_parent(inode); + if (test_opt(inode->i_sb, BARRIER)) *needs_barrier = true; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b20a1bf866ab..b1bc1950c9f0 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -686,6 +686,12 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino) if (unlikely(!gdp)) return 0; + /* Inode was never used in this filesystem? */ + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT) || + ino >= EXT4_INODES_PER_GROUP(sb) - ext4_itable_unused_count(sb, gdp))) + return 0; + bh = sb_find_get_block(sb, ext4_inode_table(sb, gdp) + (ino / inodes_per_block)); if (!bh || !buffer_uptodate(bh)) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 1f6bc05593df..408677fa8196 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -522,7 +522,15 @@ static int ext4_read_inline_folio(struct inode *inode, struct folio *folio) goto out; len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode)); - BUG_ON(len > PAGE_SIZE); + + if (len > PAGE_SIZE) { + ext4_error_inode(inode, __func__, __LINE__, 0, + "inline size %zu exceeds PAGE_SIZE", len); + ret = -EFSCORRUPTED; + brelse(iloc.bh); + goto out; + } + kaddr = kmap_local_folio(folio, 0); ret = ext4_read_inline_data(inode, kaddr, len, &iloc); kaddr = folio_zero_tail(folio, len, kaddr + len); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 396dc3a5d16b..1123d995494b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -128,6 +128,8 @@ void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, static inline int ext4_begin_ordered_truncate(struct inode *inode, loff_t new_size) { + struct jbd2_inode *jinode = READ_ONCE(EXT4_I(inode)->jinode); + trace_ext4_begin_ordered_truncate(inode, new_size); /* * If jinode is zero, then we never opened the file for @@ -135,10 +137,10 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, * jbd2_journal_begin_ordered_truncate() since there's no * outstanding writes we need to flush. */ - if (!EXT4_I(inode)->jinode) + if (!jinode) return 0; return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode), - EXT4_I(inode)->jinode, + jinode, new_size); } @@ -184,6 +186,14 @@ void ext4_evict_inode(struct inode *inode) if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL) ext4_evict_ea_inode(inode); if (inode->i_nlink) { + /* + * If there's dirty page will lead to data loss, user + * could see stale data. + */ + if (unlikely(!ext4_emergency_state(inode->i_sb) && + mapping_tagged(&inode->i_data, PAGECACHE_TAG_DIRTY))) + ext4_warning_inode(inode, "data will be lost"); + truncate_inode_pages_final(&inode->i_data); goto no_delete; @@ -4451,8 +4461,13 @@ int ext4_inode_attach_jinode(struct inode *inode) spin_unlock(&inode->i_lock); return -ENOMEM; } - ei->jinode = jinode; - jbd2_journal_init_jbd_inode(ei->jinode, inode); + jbd2_journal_init_jbd_inode(jinode, inode); + /* + * Publish ->jinode only after it is fully initialized so that + * readers never observe a partially initialized jbd2_inode. + */ + smp_wmb(); + WRITE_ONCE(ei->jinode, jinode); jinode = NULL; } spin_unlock(&inode->i_lock); @@ -5401,18 +5416,36 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, inode->i_op = &ext4_encrypted_symlink_inode_operations; } else if (ext4_inode_is_fast_symlink(inode)) { inode->i_op = &ext4_fast_symlink_inode_operations; - if (inode->i_size == 0 || - inode->i_size >= sizeof(ei->i_data) || - strnlen((char *)ei->i_data, inode->i_size + 1) != - inode->i_size) { - ext4_error_inode(inode, function, line, 0, - "invalid fast symlink length %llu", - (unsigned long long)inode->i_size); - ret = -EFSCORRUPTED; - goto bad_inode; + + /* + * Orphan cleanup can see inodes with i_size == 0 + * and i_data uninitialized. Skip size checks in + * that case. This is safe because the first thing + * ext4_evict_inode() does for fast symlinks is + * clearing of i_data and i_size. + */ + if ((EXT4_SB(sb)->s_mount_state & EXT4_ORPHAN_FS)) { + if (inode->i_nlink != 0) { + ext4_error_inode(inode, function, line, 0, + "invalid orphan symlink nlink %d", + inode->i_nlink); + ret = -EFSCORRUPTED; + goto bad_inode; + } + } else { + if (inode->i_size == 0 || + inode->i_size >= sizeof(ei->i_data) || + strnlen((char *)ei->i_data, inode->i_size + 1) != + inode->i_size) { + ext4_error_inode(inode, function, line, 0, + "invalid fast symlink length %llu", + (unsigned long long)inode->i_size); + ret = -EFSCORRUPTED; + goto bad_inode; + } + inode_set_cached_link(inode, (char *)ei->i_data, + inode->i_size); } - inode_set_cached_link(inode, (char *)ei->i_data, - inode->i_size); } else { inode->i_op = &ext4_symlink_inode_operations; } @@ -5849,6 +5882,18 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (attr->ia_size == inode->i_size) inc_ivers = false; + /* + * If file has inline data but new size exceeds inline capacity, + * convert to extent-based storage first to prevent inconsistent + * state (inline flag set but size exceeds inline capacity). + */ + if (ext4_has_inline_data(inode) && + attr->ia_size > EXT4_I(inode)->i_inline_size) { + error = ext4_convert_inline_data(inode); + if (error) + goto err_out; + } + if (shrink) { if (ext4_should_order_data(inode)) { error = ext4_begin_ordered_truncate(inode, diff --git a/fs/ext4/mballoc-test.c b/fs/ext4/mballoc-test.c index 9fbdf6a09489..6f5bfbb0e8a4 100644 --- a/fs/ext4/mballoc-test.c +++ b/fs/ext4/mballoc-test.c @@ -8,6 +8,7 @@ #include <linux/random.h> #include "ext4.h" +#include "mballoc.h" struct mbt_grp_ctx { struct buffer_head bitmap_bh; @@ -336,7 +337,7 @@ ext4_mb_mark_context_stub(handle_t *handle, struct super_block *sb, bool state, if (state) mb_set_bits(bitmap_bh->b_data, blkoff, len); else - mb_clear_bits(bitmap_bh->b_data, blkoff, len); + mb_clear_bits_test(bitmap_bh->b_data, blkoff, len); return 0; } @@ -413,14 +414,14 @@ static void test_new_blocks_simple(struct kunit *test) /* get block at goal */ ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_EQ_MSG(test, ar.goal, found, "failed to alloc block at goal, expected %llu found %llu", ar.goal, found); /* get block after goal in goal group */ ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_EQ_MSG(test, ar.goal + EXT4_C2B(sbi, 1), found, "failed to alloc block after goal in goal group, expected %llu found %llu", ar.goal + 1, found); @@ -428,7 +429,7 @@ static void test_new_blocks_simple(struct kunit *test) /* get block after goal group */ mbt_ctx_mark_used(sb, goal_group, 0, EXT4_CLUSTERS_PER_GROUP(sb)); ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_EQ_MSG(test, ext4_group_first_block_no(sb, goal_group + 1), found, "failed to alloc block after goal group, expected %llu found %llu", @@ -438,7 +439,7 @@ static void test_new_blocks_simple(struct kunit *test) for (i = goal_group; i < ext4_get_groups_count(sb); i++) mbt_ctx_mark_used(sb, i, 0, EXT4_CLUSTERS_PER_GROUP(sb)); ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_EQ_MSG(test, ext4_group_first_block_no(sb, 0) + EXT4_C2B(sbi, 1), found, "failed to alloc block before goal group, expected %llu found %llu", @@ -448,7 +449,7 @@ static void test_new_blocks_simple(struct kunit *test) for (i = 0; i < ext4_get_groups_count(sb); i++) mbt_ctx_mark_used(sb, i, 0, EXT4_CLUSTERS_PER_GROUP(sb)); ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_NE_MSG(test, err, 0, "unexpectedly get block when no block is available"); } @@ -492,16 +493,16 @@ validate_free_blocks_simple(struct kunit *test, struct super_block *sb, continue; bitmap = mbt_ctx_bitmap(sb, i); - bit = mb_find_next_zero_bit(bitmap, max, 0); + bit = mb_find_next_zero_bit_test(bitmap, max, 0); KUNIT_ASSERT_EQ_MSG(test, bit, max, "free block on unexpected group %d", i); } bitmap = mbt_ctx_bitmap(sb, goal_group); - bit = mb_find_next_zero_bit(bitmap, max, 0); + bit = mb_find_next_zero_bit_test(bitmap, max, 0); KUNIT_ASSERT_EQ(test, bit, start); - bit = mb_find_next_bit(bitmap, max, bit + 1); + bit = mb_find_next_bit_test(bitmap, max, bit + 1); KUNIT_ASSERT_EQ(test, bit, start + len); } @@ -524,7 +525,7 @@ test_free_blocks_simple_range(struct kunit *test, ext4_group_t goal_group, block = ext4_group_first_block_no(sb, goal_group) + EXT4_C2B(sbi, start); - ext4_free_blocks_simple(inode, block, len); + ext4_free_blocks_simple_test(inode, block, len); validate_free_blocks_simple(test, sb, goal_group, start, len); mbt_ctx_mark_used(sb, goal_group, 0, EXT4_CLUSTERS_PER_GROUP(sb)); } @@ -566,15 +567,15 @@ test_mark_diskspace_used_range(struct kunit *test, bitmap = mbt_ctx_bitmap(sb, TEST_GOAL_GROUP); memset(bitmap, 0, sb->s_blocksize); - ret = ext4_mb_mark_diskspace_used(ac, NULL); + ret = ext4_mb_mark_diskspace_used_test(ac, NULL); KUNIT_ASSERT_EQ(test, ret, 0); max = EXT4_CLUSTERS_PER_GROUP(sb); - i = mb_find_next_bit(bitmap, max, 0); + i = mb_find_next_bit_test(bitmap, max, 0); KUNIT_ASSERT_EQ(test, i, start); - i = mb_find_next_zero_bit(bitmap, max, i + 1); + i = mb_find_next_zero_bit_test(bitmap, max, i + 1); KUNIT_ASSERT_EQ(test, i, start + len); - i = mb_find_next_bit(bitmap, max, i + 1); + i = mb_find_next_bit_test(bitmap, max, i + 1); KUNIT_ASSERT_EQ(test, max, i); } @@ -617,54 +618,54 @@ static void mbt_generate_buddy(struct super_block *sb, void *buddy, max = EXT4_CLUSTERS_PER_GROUP(sb); bb_h = buddy + sbi->s_mb_offsets[1]; - off = mb_find_next_zero_bit(bb, max, 0); + off = mb_find_next_zero_bit_test(bb, max, 0); grp->bb_first_free = off; while (off < max) { grp->bb_counters[0]++; grp->bb_free++; - if (!(off & 1) && !mb_test_bit(off + 1, bb)) { + if (!(off & 1) && !mb_test_bit_test(off + 1, bb)) { grp->bb_free++; grp->bb_counters[0]--; - mb_clear_bit(off >> 1, bb_h); + mb_clear_bit_test(off >> 1, bb_h); grp->bb_counters[1]++; grp->bb_largest_free_order = 1; off++; } - off = mb_find_next_zero_bit(bb, max, off + 1); + off = mb_find_next_zero_bit_test(bb, max, off + 1); } for (order = 1; order < MB_NUM_ORDERS(sb) - 1; order++) { bb = buddy + sbi->s_mb_offsets[order]; bb_h = buddy + sbi->s_mb_offsets[order + 1]; max = max >> 1; - off = mb_find_next_zero_bit(bb, max, 0); + off = mb_find_next_zero_bit_test(bb, max, 0); while (off < max) { - if (!(off & 1) && !mb_test_bit(off + 1, bb)) { + if (!(off & 1) && !mb_test_bit_test(off + 1, bb)) { mb_set_bits(bb, off, 2); grp->bb_counters[order] -= 2; - mb_clear_bit(off >> 1, bb_h); + mb_clear_bit_test(off >> 1, bb_h); grp->bb_counters[order + 1]++; grp->bb_largest_free_order = order + 1; off++; } - off = mb_find_next_zero_bit(bb, max, off + 1); + off = mb_find_next_zero_bit_test(bb, max, off + 1); } } max = EXT4_CLUSTERS_PER_GROUP(sb); - off = mb_find_next_zero_bit(bitmap, max, 0); + off = mb_find_next_zero_bit_test(bitmap, max, 0); while (off < max) { grp->bb_fragments++; - off = mb_find_next_bit(bitmap, max, off + 1); + off = mb_find_next_bit_test(bitmap, max, off + 1); if (off + 1 >= max) break; - off = mb_find_next_zero_bit(bitmap, max, off + 1); + off = mb_find_next_zero_bit_test(bitmap, max, off + 1); } } @@ -706,7 +707,7 @@ do_test_generate_buddy(struct kunit *test, struct super_block *sb, void *bitmap, /* needed by validation in ext4_mb_generate_buddy */ ext4_grp->bb_free = mbt_grp->bb_free; memset(ext4_buddy, 0xff, sb->s_blocksize); - ext4_mb_generate_buddy(sb, ext4_buddy, bitmap, TEST_GOAL_GROUP, + ext4_mb_generate_buddy_test(sb, ext4_buddy, bitmap, TEST_GOAL_GROUP, ext4_grp); KUNIT_ASSERT_EQ(test, memcmp(mbt_buddy, ext4_buddy, sb->s_blocksize), @@ -760,7 +761,7 @@ test_mb_mark_used_range(struct kunit *test, struct ext4_buddy *e4b, ex.fe_group = TEST_GOAL_GROUP; ext4_lock_group(sb, TEST_GOAL_GROUP); - mb_mark_used(e4b, &ex); + mb_mark_used_test(e4b, &ex); ext4_unlock_group(sb, TEST_GOAL_GROUP); mb_set_bits(bitmap, start, len); @@ -769,7 +770,7 @@ test_mb_mark_used_range(struct kunit *test, struct ext4_buddy *e4b, memset(buddy, 0xff, sb->s_blocksize); for (i = 0; i < MB_NUM_ORDERS(sb); i++) grp->bb_counters[i] = 0; - ext4_mb_generate_buddy(sb, buddy, bitmap, 0, grp); + ext4_mb_generate_buddy_test(sb, buddy, bitmap, 0, grp); KUNIT_ASSERT_EQ(test, memcmp(buddy, e4b->bd_buddy, sb->s_blocksize), 0); @@ -798,7 +799,7 @@ static void test_mb_mark_used(struct kunit *test) bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, grp); - ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b); + ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b); KUNIT_ASSERT_EQ(test, ret, 0); grp->bb_free = EXT4_CLUSTERS_PER_GROUP(sb); @@ -809,7 +810,7 @@ static void test_mb_mark_used(struct kunit *test) test_mb_mark_used_range(test, &e4b, ranges[i].start, ranges[i].len, bitmap, buddy, grp); - ext4_mb_unload_buddy(&e4b); + ext4_mb_unload_buddy_test(&e4b); } static void @@ -825,16 +826,16 @@ test_mb_free_blocks_range(struct kunit *test, struct ext4_buddy *e4b, return; ext4_lock_group(sb, e4b->bd_group); - mb_free_blocks(NULL, e4b, start, len); + mb_free_blocks_test(NULL, e4b, start, len); ext4_unlock_group(sb, e4b->bd_group); - mb_clear_bits(bitmap, start, len); + mb_clear_bits_test(bitmap, start, len); /* bypass bb_free validatoin in ext4_mb_generate_buddy */ grp->bb_free += len; memset(buddy, 0xff, sb->s_blocksize); for (i = 0; i < MB_NUM_ORDERS(sb); i++) grp->bb_counters[i] = 0; - ext4_mb_generate_buddy(sb, buddy, bitmap, 0, grp); + ext4_mb_generate_buddy_test(sb, buddy, bitmap, 0, grp); KUNIT_ASSERT_EQ(test, memcmp(buddy, e4b->bd_buddy, sb->s_blocksize), 0); @@ -865,7 +866,7 @@ static void test_mb_free_blocks(struct kunit *test) bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, grp); - ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b); + ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b); KUNIT_ASSERT_EQ(test, ret, 0); ex.fe_start = 0; @@ -873,7 +874,7 @@ static void test_mb_free_blocks(struct kunit *test) ex.fe_group = TEST_GOAL_GROUP; ext4_lock_group(sb, TEST_GOAL_GROUP); - mb_mark_used(&e4b, &ex); + mb_mark_used_test(&e4b, &ex); ext4_unlock_group(sb, TEST_GOAL_GROUP); grp->bb_free = 0; @@ -886,7 +887,7 @@ static void test_mb_free_blocks(struct kunit *test) test_mb_free_blocks_range(test, &e4b, ranges[i].start, ranges[i].len, bitmap, buddy, grp); - ext4_mb_unload_buddy(&e4b); + ext4_mb_unload_buddy_test(&e4b); } #define COUNT_FOR_ESTIMATE 100000 @@ -904,7 +905,7 @@ static void test_mb_mark_used_cost(struct kunit *test) if (sb->s_blocksize > PAGE_SIZE) kunit_skip(test, "blocksize exceeds pagesize"); - ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b); + ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b); KUNIT_ASSERT_EQ(test, ret, 0); ex.fe_group = TEST_GOAL_GROUP; @@ -918,7 +919,7 @@ static void test_mb_mark_used_cost(struct kunit *test) ex.fe_start = ranges[i].start; ex.fe_len = ranges[i].len; ext4_lock_group(sb, TEST_GOAL_GROUP); - mb_mark_used(&e4b, &ex); + mb_mark_used_test(&e4b, &ex); ext4_unlock_group(sb, TEST_GOAL_GROUP); } end = jiffies; @@ -929,14 +930,14 @@ static void test_mb_mark_used_cost(struct kunit *test) continue; ext4_lock_group(sb, TEST_GOAL_GROUP); - mb_free_blocks(NULL, &e4b, ranges[i].start, + mb_free_blocks_test(NULL, &e4b, ranges[i].start, ranges[i].len); ext4_unlock_group(sb, TEST_GOAL_GROUP); } } kunit_info(test, "costed jiffies %lu\n", all); - ext4_mb_unload_buddy(&e4b); + ext4_mb_unload_buddy_test(&e4b); } static const struct mbt_ext4_block_layout mbt_test_layouts[] = { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 20e9fdaf4301..bb58eafb87bc 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1199,6 +1199,8 @@ static int ext4_mb_scan_groups(struct ext4_allocation_context *ac) /* searching for the right group start from the goal value specified */ start = ac->ac_g_ex.fe_group; + if (start >= ngroups) + start = 0; ac->ac_prefetch_grp = start; ac->ac_prefetch_nr = 0; @@ -2443,8 +2445,12 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, return 0; err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); - if (err) + if (err) { + if (EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info) && + !(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) + return 0; return err; + } ext4_lock_group(ac->ac_sb, group); if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) @@ -3580,9 +3586,7 @@ err_freebuddy: rcu_read_unlock(); iput(sbi->s_buddy_cache); err_freesgi: - rcu_read_lock(); - kvfree(rcu_dere |
