diff options
Diffstat (limited to 'fs/ext4/inode.c')
| -rw-r--r-- | fs/ext4/inode.c | 417 |
1 files changed, 277 insertions, 140 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 396dc3a5d16b..c2c2d6ac7f3d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -29,7 +29,7 @@ #include <linux/string.h> #include <linux/buffer_head.h> #include <linux/writeback.h> -#include <linux/pagevec.h> +#include <linux/folio_batch.h> #include <linux/mpage.h> #include <linux/rmap.h> #include <linux/namei.h> @@ -128,6 +128,8 @@ void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, static inline int ext4_begin_ordered_truncate(struct inode *inode, loff_t new_size) { + struct jbd2_inode *jinode = READ_ONCE(EXT4_I(inode)->jinode); + trace_ext4_begin_ordered_truncate(inode, new_size); /* * If jinode is zero, then we never opened the file for @@ -135,10 +137,10 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, * jbd2_journal_begin_ordered_truncate() since there's no * outstanding writes we need to flush. */ - if (!EXT4_I(inode)->jinode) + if (!jinode) return 0; return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode), - EXT4_I(inode)->jinode, + jinode, new_size); } @@ -184,8 +186,18 @@ void ext4_evict_inode(struct inode *inode) if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL) ext4_evict_ea_inode(inode); if (inode->i_nlink) { - truncate_inode_pages_final(&inode->i_data); + /* + * If there's dirty page will lead to data loss, user + * could see stale data. + */ + if (unlikely(!ext4_emergency_state(inode->i_sb) && + mapping_tagged(&inode->i_data, PAGECACHE_TAG_DIRTY))) + ext4_warning_inode(inode, "data will be lost"); + truncate_inode_pages_final(&inode->i_data); + /* Avoid mballoc special inode which has no proper iops */ + if (!EXT4_SB(inode->i_sb)->s_journal) + mmb_sync(&EXT4_I(inode)->i_metadata_bhs); goto no_delete; } @@ -262,7 +274,7 @@ void ext4_evict_inode(struct inode *inode) err = ext4_truncate(inode); if (err) { ext4_error_err(inode->i_sb, -err, - "couldn't truncate inode %lu (err %d)", + "couldn't truncate inode %llu (err %d)", inode->i_ino, err); goto stop_handle; } @@ -342,7 +354,7 @@ void ext4_da_update_reserve_space(struct inode *inode, spin_lock(&ei->i_block_reservation_lock); trace_ext4_da_update_reserve_space(inode, used, quota_claim); if (unlikely(used > ei->i_reserved_data_blocks)) { - ext4_warning(inode->i_sb, "%s: ino %lu, used %d " + ext4_warning(inode->i_sb, "%s: ino %llu, used %d " "with only %d reserved data blocks", __func__, inode->i_ino, used, ei->i_reserved_data_blocks); @@ -405,7 +417,10 @@ int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, KUNIT_STATIC_STUB_REDIRECT(ext4_issue_zeroout, inode, lblk, pblk, len); if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) - return fscrypt_zeroout_range(inode, lblk, pblk, len); + return fscrypt_zeroout_range(inode, + (loff_t)lblk << inode->i_blkbits, + pblk << (inode->i_blkbits - SECTOR_SHIFT), + (u64)len << inode->i_blkbits); ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS); if (ret > 0) @@ -475,7 +490,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, if (es_map->m_lblk != map->m_lblk || es_map->m_flags != map->m_flags || es_map->m_pblk != map->m_pblk) { - printk("ES cache assertion failed for inode: %lu " + printk("ES cache assertion failed for inode: %llu " "es_cached ex [%d/%d/%llu/%x] != " "found ex [%d/%d/%llu/%x] retval %d flags %x\n", inode->i_ino, es_map->m_lblk, es_map->m_len, @@ -515,7 +530,7 @@ static int ext4_map_query_blocks_next_in_leaf(handle_t *handle, if (unlikely(retval != map2.m_len)) { ext4_warning(inode->i_sb, "ES len assertion failed for inode " - "%lu: retval %d != map->m_len %d", + "%llu: retval %d != map->m_len %d", inode->i_ino, retval, map2.m_len); WARN_ON(1); } @@ -563,7 +578,7 @@ int ext4_map_query_blocks(handle_t *handle, struct inode *inode, if (unlikely(retval != map->m_len)) { ext4_warning(inode->i_sb, "ES len assertion failed for inode " - "%lu: retval %d != map->m_len %d", + "%llu: retval %d != map->m_len %d", inode->i_ino, retval, map->m_len); WARN_ON(1); } @@ -630,7 +645,7 @@ int ext4_map_create_blocks(handle_t *handle, struct inode *inode, if (unlikely(retval != map->m_len)) { ext4_warning(inode->i_sb, - "ES len assertion failed for inode %lu: " + "ES len assertion failed for inode %llu: " "retval %d != map->m_len %d", inode->i_ino, retval, map->m_len); WARN_ON(1); @@ -937,7 +952,7 @@ int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, { int ret = 0; - ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n", + ext4_debug("ext4_get_block_unwritten: inode %llu, create flag %d\n", inode->i_ino, create); ret = _ext4_get_block(inode, iblock, bh_result, EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT); @@ -1420,9 +1435,6 @@ static int write_end_fn(handle_t *handle, struct inode *inode, /* * We need to pick up the new inode size which generic_commit_write gave us * `iocb` can be NULL - eg, when called from page_symlink(). - * - * ext4 never places buffers on inode->i_mapping->i_private_list. metadata - * buffers are managed internally. */ static int ext4_write_end(const struct kiocb *iocb, struct address_space *mapping, @@ -1456,10 +1468,9 @@ static int ext4_write_end(const struct kiocb *iocb, folio_unlock(folio); folio_put(folio); - if (old_size < pos && !verity) { + if (old_size < pos && !verity) pagecache_isize_extended(inode, old_size, pos); - ext4_zero_partial_blocks(handle, inode, old_size, pos - old_size); - } + /* * Don't mark the inode dirty under folio lock. First, it unnecessarily * makes the holding time of folio lock longer. Second, it forces lock @@ -1574,10 +1585,8 @@ static int ext4_journalled_write_end(const struct kiocb *iocb, folio_unlock(folio); folio_put(folio); - if (old_size < pos && !verity) { + if (old_size < pos && !verity) pagecache_isize_extended(inode, old_size, pos); - ext4_zero_partial_blocks(handle, inode, old_size, pos - old_size); - } if (size_changed) { ret2 = ext4_mark_inode_dirty(handle, inode); @@ -1659,7 +1668,7 @@ void ext4_da_release_space(struct inode *inode, int to_free) * harmless to return without any action. */ ext4_warning(inode->i_sb, "ext4_da_release_space: " - "ino %lu, to_free %d with only %d reserved " + "ino %llu, to_free %d with only %d reserved " "data blocks", inode->i_ino, to_free, ei->i_reserved_data_blocks); WARN_ON(1); @@ -1747,8 +1756,22 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, BUG_ON(!folio_test_locked(folio)); BUG_ON(folio_test_writeback(folio)); if (invalidate) { - if (folio_mapped(folio)) + if (folio_mapped(folio)) { folio_clear_dirty_for_io(folio); + /* + * Unmap folio from page + * tables to prevent + * subsequent accesses through + * stale PTEs. This ensures + * future accesses trigger new + * page faults rather than + * reusing the invalidated + * folio. + */ + unmap_mapping_pages(folio->mapping, + folio->index, + folio_nr_pages(folio), false); + } block_invalidate_folio(folio, 0, folio_size(folio)); folio_clear_uptodate(folio); @@ -2491,7 +2514,7 @@ static int mpage_map_and_submit_extent(handle_t *handle, } ext4_msg(sb, KERN_CRIT, "Delayed block allocation failed for " - "inode %lu at logical offset %llu with" + "inode %llu at logical offset %llu with" " max blocks %u with error %d", inode->i_ino, (unsigned long long)map->m_lblk, @@ -2535,7 +2558,7 @@ update_disksize: err2 = ext4_mark_inode_dirty(handle, inode); if (err2) { ext4_error_err(inode->i_sb, -err2, - "Failed to mark inode %lu dirty", + "Failed to mark inode %llu dirty", inode->i_ino); } if (!err) @@ -2909,7 +2932,7 @@ retry: if (IS_ERR(handle)) { ret = PTR_ERR(handle); ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " - "%ld pages, ino %lu; err %d", __func__, + "%ld pages, ino %llu; err %d", __func__, wbc->nr_to_write, inode->i_ino, ret); /* Release allocated io_end */ ext4_put_io_end(mpd->io_submit.io_end); @@ -3031,17 +3054,23 @@ static int ext4_writepages(struct address_space *mapping, int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode) { + loff_t range_start, range_end; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, - .range_start = jinode->i_dirty_start, - .range_end = jinode->i_dirty_end, }; struct mpage_da_data mpd = { .inode = jinode->i_vfs_inode, .wbc = &wbc, .can_map = 0, }; + + if (!jbd2_jinode_get_dirty_range(jinode, &range_start, &range_end)) + return 0; + + wbc.range_start = range_start; + wbc.range_end = range_end; + return ext4_do_writepages(&mpd); } @@ -3196,7 +3225,7 @@ static int ext4_da_do_write_end(struct address_space *mapping, struct inode *inode = mapping->host; loff_t old_size = inode->i_size; bool disksize_changed = false; - loff_t new_i_size, zero_len = 0; + loff_t new_i_size; handle_t *handle; if (unlikely(!folio_buffers(folio))) { @@ -3240,19 +3269,15 @@ static int ext4_da_do_write_end(struct address_space *mapping, folio_unlock(folio); folio_put(folio); - if (pos > old_size) { + if (pos > old_size) pagecache_isize_extended(inode, old_size, pos); - zero_len = pos - old_size; - } - if (!disksize_changed && !zero_len) + if (!disksize_changed) return copied; - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); + handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); if (IS_ERR(handle)) return PTR_ERR(handle); - if (zero_len) - ext4_zero_partial_blocks(handle, inode, old_size, zero_len); ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); @@ -3437,7 +3462,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode) } /* Any metadata buffers to write? */ - if (!list_empty(&inode->i_mapping->i_private_list)) + if (mmb_has_buffers(&EXT4_I(inode)->i_metadata_bhs)) return true; return inode_state_read_once(inode) & I_DIRTY_DATASYNC; } @@ -4002,12 +4027,11 @@ void ext4_set_aops(struct inode *inode) * ext4_punch_hole, etc) which needs to be properly zeroed out. Otherwise a * racing writeback can come later and flush the stale pagecache to disk. */ -static int __ext4_block_zero_page_range(handle_t *handle, - struct address_space *mapping, loff_t from, loff_t length) +static struct buffer_head *ext4_load_tail_bh(struct inode *inode, loff_t from) { unsigned int offset, blocksize, pos; ext4_lblk_t iblock; - struct inode *inode = mapping->host; + struct address_space *mapping = inode->i_mapping; struct buffer_head *bh; struct folio *folio; int err = 0; @@ -4016,7 +4040,7 @@ static int __ext4_block_zero_page_range(handle_t *handle, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, mapping_gfp_constraint(mapping, ~__GFP_FS)); if (IS_ERR(folio)) - return PTR_ERR(folio); + return ERR_CAST(folio); blocksize = inode->i_sb->s_blocksize; @@ -4068,47 +4092,92 @@ static int __ext4_block_zero_page_range(handle_t *handle, } } } - if (ext4_should_journal_data(inode)) { - BUFFER_TRACE(bh, "get write access"); - err = ext4_journal_get_write_access(handle, inode->i_sb, bh, - EXT4_JTR_NONE); - if (err) - goto unlock; - } - folio_zero_range(folio, offset, length); + return bh; + +unlock: + folio_unlock(folio); + folio_put(folio); + return err ? ERR_PTR(err) : NULL; +} + +static int ext4_block_do_zero_range(struct inode *inode, loff_t from, + loff_t length, bool *did_zero, + bool *zero_written) +{ + struct buffer_head *bh; + struct folio *folio; + + bh = ext4_load_tail_bh(inode, from); + if (IS_ERR_OR_NULL(bh)) + return PTR_ERR_OR_ZERO(bh); + + folio = bh->b_folio; + folio_zero_range(folio, offset_in_folio(folio, from), length); BUFFER_TRACE(bh, "zeroed end of block"); - if (ext4_should_journal_data(inode)) { - err = ext4_dirty_journalled_data(handle, bh); - } else { - mark_buffer_dirty(bh); - /* - * Only the written block requires ordered data to prevent - * exposing stale data. - */ - if (!buffer_unwritten(bh) && !buffer_delay(bh) && - ext4_should_order_data(inode)) - err = ext4_jbd2_inode_add_write(handle, inode, from, - length); + mark_buffer_dirty(bh); + if (did_zero) + *did_zero = true; + if (zero_written && !buffer_unwritten(bh) && !buffer_delay(bh)) + *zero_written = true; + + folio_unlock(folio); + folio_put(folio); + return 0; +} + +static int ext4_block_journalled_zero_range(struct inode *inode, loff_t from, + loff_t length, bool *did_zero) +{ + struct buffer_head *bh; + struct folio *folio; + handle_t *handle; + int err; + + handle = ext4_journal_start(inode, EXT4_HT_MISC, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + bh = ext4_load_tail_bh(inode, from); + if (IS_ERR_OR_NULL(bh)) { + err = PTR_ERR_OR_ZERO(bh); + goto out_handle; } + folio = bh->b_folio; -unlock: + BUFFER_TRACE(bh, "get write access"); + err = ext4_journal_get_write_access(handle, inode->i_sb, bh, + EXT4_JTR_NONE); + if (err) + goto out; + + folio_zero_range(folio, offset_in_folio(folio, from), length); + BUFFER_TRACE(bh, "zeroed end of block"); + + err = ext4_dirty_journalled_data(handle, bh); + if (err) + goto out; + + if (did_zero) + *did_zero = true; +out: folio_unlock(folio); folio_put(folio); +out_handle: + ext4_journal_stop(handle); return err; } /* - * ext4_block_zero_page_range() zeros out a mapping of length 'length' - * starting from file offset 'from'. The range to be zero'd must - * be contained with in one block. If the specified range exceeds - * the end of the block it will be shortened to end of the block - * that corresponds to 'from' + * Zeros out a mapping of length 'length' starting from file offset + * 'from'. The range to be zero'd must be contained with in one block. + * If the specified range exceeds the end of the block it will be + * shortened to end of the block that corresponds to 'from'. */ -static int ext4_block_zero_page_range(handle_t *handle, - struct address_space *mapping, loff_t from, loff_t length) +static int ext4_block_zero_range(struct inode *inode, + loff_t from, loff_t length, bool *did_zero, + bool *zero_written) { - struct inode *inode = mapping->host; unsigned blocksize = inode->i_sb->s_blocksize; unsigned int max = blocksize - (from & (blocksize - 1)); @@ -4120,40 +4189,73 @@ static int ext4_block_zero_page_range(handle_t *handle, length = max; if (IS_DAX(inode)) { - return dax_zero_range(inode, from, length, NULL, + return dax_zero_range(inode, from, length, did_zero, &ext4_iomap_ops); + } else if (ext4_should_journal_data(inode)) { + return ext4_block_journalled_zero_range(inode, from, length, + did_zero); } - return __ext4_block_zero_page_range(handle, mapping, from, length); + return ext4_block_do_zero_range(inode, from, length, did_zero, + zero_written); } /* - * ext4_block_truncate_page() zeroes out a mapping from file offset `from' - * up to the end of the block which corresponds to `from'. - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. + * Zero out a mapping from file offset 'from' up to the end of the block + * which corresponds to 'from' or to the given 'end' inside this block. + * This required during truncate up and performing append writes. We need + * to physically zero the tail end of that block so it doesn't yield old + * data if the file is grown. */ -static int ext4_block_truncate_page(handle_t *handle, - struct address_space *mapping, loff_t from) +int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end) { - unsigned length; - unsigned blocksize; - struct inode *inode = mapping->host; + unsigned int blocksize = i_blocksize(inode); + unsigned int offset; + loff_t length = end - from; + bool did_zero = false; + bool zero_written = false; + int err; + offset = from & (blocksize - 1); + if (!offset || from >= end) + return 0; /* If we are processing an encrypted inode during orphan list handling */ if (IS_ENCRYPTED(inode) && !fscrypt_has_encryption_key(inode)) return 0; - blocksize = i_blocksize(inode); - length = blocksize - (from & (blocksize - 1)); + if (length > blocksize - offset) + length = blocksize - offset; + + err = ext4_block_zero_range(inode, from, length, + &did_zero, &zero_written); + if (err) + return err; + /* + * It's necessary to order zeroed data before update i_disksize when + * truncating up or performing an append write, because there might be + * exposing stale on-disk data which may caused by concurrent post-EOF + * mmap write during folio writeback. + */ + if (ext4_should_order_data(inode) && + did_zero && zero_written && !IS_DAX(inode)) { + handle_t *handle; + + handle = ext4_journal_start(inode, EXT4_HT_MISC, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + err = ext4_jbd2_inode_add_write(handle, inode, from, length); + ext4_journal_stop(handle); + if (err) + return err; + } - return ext4_block_zero_page_range(handle, mapping, from, length); + return 0; } -int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, - loff_t lstart, loff_t length) +int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length, + bool *did_zero) { struct super_block *sb = inode->i_sb; - struct address_space *mapping = inode->i_mapping; unsigned partial_start, partial_end; ext4_fsblk_t start, end; loff_t byte_end = (lstart + length - 1); @@ -4168,22 +4270,21 @@ int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, /* Handle partial zero within the single block */ if (start == end && (partial_start || (partial_end != sb->s_blocksize - 1))) { - err = ext4_block_zero_page_range(handle, mapping, - lstart, length); + err = ext4_block_zero_range(inode, lstart, length, did_zero, + NULL); return err; } /* Handle partial zero out on the start of the range */ if (partial_start) { - err = ext4_block_zero_page_range(handle, mapping, - lstart, sb->s_blocksize); + err = ext4_block_zero_range(inode, lstart, sb->s_blocksize, + did_zero, NULL); if (err) return err; } /* Handle partial zero out on the end of the range */ if (partial_end != sb->s_blocksize - 1) - err = ext4_block_zero_page_range(handle, mapping, - byte_end - partial_end, - partial_end + 1); + err = ext4_block_zero_range(inode, byte_end - partial_end, + partial_end + 1, did_zero, NULL); return err; } @@ -4332,6 +4433,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) loff_t end = offset + length; handle_t *handle; unsigned int credits; + bool partial_zeroed = false; int ret; trace_ext4_punch_hole(inode, offset, length, 0); @@ -4358,17 +4460,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) end = max_end; length = end - offset; - /* - * Attach jinode to inode for jbd2 if we do any zeroing of partial - * block. - */ - if (!IS_ALIGNED(offset | end, sb->s_blocksize)) { - ret = ext4_inode_attach_jinode(inode); - if (ret < 0) - return ret; - } - - ret = ext4_update_disksize_before_punch(inode, offset, length); if (ret) return ret; @@ -4378,8 +4469,18 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) if (ret) return ret; + ret = ext4_zero_partial_blocks(inode, offset, length, &partial_zeroed); + if (ret) + return ret; + if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) { + ret = filemap_write_and_wait_range(inode->i_mapping, offset, + end - 1); + if (ret) + return ret; + } + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) - credits = ext4_chunk_trans_extent(inode, 2); + credits = ext4_chunk_trans_extent(inode, 0); else credits = ext4_blocks_for_truncate(inode); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); @@ -4389,10 +4490,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) return ret; } - ret = ext4_zero_partial_blocks(handle, inode, offset, length); - if (ret) - goto out_handle; - /* If there are blocks to remove, do it */ start_lblk = EXT4_B_TO_LBLK(inode, offset); end_lblk = end >> inode->i_blkbits; @@ -4429,7 +4526,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) goto out_handle; ext4_update_inode_fsync_trans(handle, inode, 1); - if (IS_SYNC(inode)) + if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) ext4_handle_sync(handle); out_handle: ext4_journal_stop(handle); @@ -4451,8 +4548,13 @@ int ext4_inode_attach_jinode(struct inode *inode) spin_unlock(&inode->i_lock); return -ENOMEM; } - ei->jinode = jinode; - jbd2_journal_init_jbd_inode(ei->jinode, inode); + jbd2_journal_init_jbd_inode(jinode, inode); + /* + * Publish ->jinode only after it is fully initialized so that + * readers never observe a partially initialized jbd2_inode. + */ + smp_wmb(); + WRITE_ONCE(ei->jinode, jinode); jinode = NULL; } spin_unlock(&inode->i_lock); @@ -4495,7 +4597,6 @@ int ext4_truncate(struct inode *inode) unsigned int credits; int err = 0, err2; handle_t *handle; - struct address_space *mapping = inode->i_mapping; /* * There is a possibility that we're either freeing the inode @@ -4525,6 +4626,11 @@ int ext4_truncate(struct inode *inode) err = ext4_inode_attach_jinode(inode); if (err) goto out_trace; + + /* Zero to the end of the block containing i_size */ + err = ext4_block_zero_eof(inode, inode->i_size, LLONG_MAX); + if (err) + goto out_trace; } if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) @@ -4538,9 +4644,6 @@ int ext4_truncate(struct inode *inode) goto out_trace; } - if (inode->i_size & (inode->i_sb->s_blocksize - 1)) - ext4_block_truncate_page(handle, mapping, inode->i_size); - /* * We add the inode to the orphan list, so that if this * truncate spans multiple transactions, and we crash, we will @@ -5401,18 +5504,36 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, inode->i_op = &ext4_encrypted_symlink_inode_operations; } else if (ext4_inode_is_fast_symlink(inode)) { inode->i_op = &ext4_fast_symlink_inode_operations; - if (inode->i_size == 0 || - inode->i_size >= sizeof(ei->i_data) || - strnlen((char *)ei->i_data, inode->i_size + 1) != - inode->i_size) { - ext4_error_inode(inode, function, line, 0, - "invalid fast symlink length %llu", - (unsigned long long)inode->i_size); - ret = -EFSCORRUPTED; - goto bad_inode; + + /* + * Orphan cleanup can see inodes with i_size == 0 + * and i_data uninitialized. Skip size checks in + * that case. This is safe because the first thing + * ext4_evict_inode() does for fast symlinks is + * clearing of i_data and i_size. + */ + if ((EXT4_SB(sb)->s_mount_state & EXT4_ORPHAN_FS)) { + if (inode->i_nlink != 0) { + ext4_error_inode(inode, function, line, 0, + "invalid orphan symlink nlink %d", + inode->i_nlink); + ret = -EFSCORRUPTED; + goto bad_inode; + } + } else { + if (inode->i_size == 0 || + inode->i_size >= sizeof(ei->i_data) || + strnlen((char *)ei->i_data, inode->i_size + 1) != + inode->i_size) { + ext4_error_inode(inode, function, line, 0, + "invalid fast symlink length %llu", + (unsigned long long)inode->i_size); + ret = -EFSCORRUPTED; + goto bad_inode; + } + inode_set_cached_link(inode, (char *)ei->i_data, + inode->i_size); } - inode_set_cached_link(inode, (char *)ei->i_data, - inode->i_size); } else { inode->i_op = &ext4_symlink_inode_operations; } @@ -5849,6 +5970,18 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (attr->ia_size == inode->i_size) inc_ivers = false; + /* + * If file has inline data but new size exceeds inline capacity, + * convert to extent-based storage first to prevent inconsistent + * state (inline flag set but size exceeds inline capacity). + */ + if (ext4_has_inline_data(inode) && + attr->ia_size > EXT4_I(inode)->i_inline_size) { + error = ext4_convert_inline_data(inode); + if (error) + goto err_out; + } + if (shrink) { if (ext4_should_order_data(inode)) { error = ext4_begin_ordered_truncate(inode, @@ -5880,15 +6013,6 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, goto out_mmap_sem; } - handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); - if (IS_ERR(handle)) { - error = PTR_ERR(handle); - goto out_mmap_sem; - } - if (ext4_handle_valid(handle) && shrink) { - error = ext4_orphan_add(handle, inode); - orphan = 1; - } /* * Update c/mtime and tail zero the EOF folio on * truncate up. ext4_truncate() handles the shrink case @@ -5897,9 +6021,22 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (!shrink) { inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); - if (oldsize & (inode->i_sb->s_blocksize - 1)) - ext4_block_truncate_page(handle, - inode->i_mapping, oldsize); + if (oldsize & (inode->i_sb->s_blocksize - 1)) { + error = ext4_block_zero_eof(inode, + oldsize, LLONG_MAX); + if (error) + goto out_mmap_sem; + } + } + + handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + goto out_mmap_sem; + } + if (ext4_handle_valid(handle) && shrink) { + error = ext4_orphan_add(handle, inode); + orphan = 1; } if (shrink) |
