aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2026-05-12 14:42:38 +0200
committerChristian Brauner <brauner@kernel.org>2026-05-12 14:42:38 +0200
commit45205929a3a3310e4978f4097d8ed4fca36b2c32 (patch)
tree71d1c59b9aba1dfda3d10b71a75b3d301e71bdab /include
parent859c199bb3a90ec49a678cc0846694b06703bdde (diff)
parentc0410adf3da6db46f3513411fcf95e63c2f1d1ad (diff)
Merge patch series "netfs: Miscellaneous fixes"
David Howells <dhowells@redhat.com> says: Here are the outstanding miscellaneous fixes for netfslib gathered together and with some fixes-to-fixes folded down and one rearrangement. Various Sashiko review comments[1][2][3][4][5] are addressed: (1) Fix subrequest cancellation cleanup in DIO read and single-read. (2) Fix missing locking around retry adding new subrequests. (3) Fix read and write result collection to use barriering correctly to access a request's subrequest lists without taking a lock. This adds list_add_tail_release() and list_first_entry_or_null_acquire() to appropriate incorporate barriering into some list functions. (4) Fix netfs_read_to_pagecache() to pause on subrequest I/O failure. (5) Fix the potential for 64-bit tearing on a 32-bit machine when reading netfs_inode->remote_i_size and ->zero_point by using much the same mechanism as is used for ->i_size. (6) Fix the calculation of zero_point in netfs_release_folio() to limit it to ->remote_i_size, not ->i_size. (7) Fix triggering of a VM_BUG_ON_FOLIO() in netfs_write_begin(). (8) Fix a potentially uninitialised error value in netfs_extract_user_iter(). (9) Fix error handling in netfs_extract_user_iter(). (10) Fix overrun checking in netfs_extract_user_iter(). (11) Fix netfs_invalidate_folio() to clear the folio dirty bit if all dirty data removed. (12) Defer the emission of trace_netfs_folio() in netfs_perform_write(). This allows the next patch to emit the correct traces. (13) Fix the handling of a partially failed copy (ie. EFAULT) into a streaming write folio. Also remove the netfs_folio if a streaming write folio is entirely overwritten. (14) Fix a potential deadlock in writethrough writing. (15) Fix netfs_read_gaps() to remove the netfs_folio from a filled folio. (16) Fix netfs_perform_write() to not disable streaming writes when writing to an fd that's open O_RDWR. (17) Fix an early put of the sink page used in netfs_read_gaps(), before the request has completed. (18) Fix request leak in netfs_write_begin() error handling. (19) Fix a potential UAF in netfs_unlock_abandoned_read_pages() due to trying to check index of each folio we're abandoning to see if that folio is actually owned by the caller (in which case, we're not actually allowed to dereference it). (20) Fix incorrect adjustment of dirty region when partially invalidating a streaming write folio. (21) Fix the handling of folio->private in netfs_perform_write() and the attached netfs_folio and/or group when a streaming write folio is modified. (22) Fix netfs_read_folio() to wait on writeback first (it holds the folio lock) otherwise we aren't allowed to look at the netfs_folio struct as that could be modified at any time by the writeback collector. (23) Fix write skipping in dir/symlink writepages. (24) Fix the locking used by afs_get_link(). [1] https://sashiko.dev/#/patchset/20260414082004.3756080-1-dhowells%40redhat.com [2] https://sashiko.dev/#/patchset/20260326104544.509518-1-dhowells%40redhat.com [3] https://sashiko.dev/#/patchset/20260425125426.3855807-1-dhowells%40redhat.com [4] https://sashiko.dev/#/patchset/20260427154639.180684-1-dhowells%40redhat.com [5] https://sashiko.dev/#/patchset/20260428131756.922303-1-dhowells%40redhat.com * patches from https://patch.msgid.link/20260512123404.719402-1-dhowells@redhat.com: (24 commits) afs: Fix the locking used by afs_get_link() netfs, afs: Fix write skipping in dir/link writepages netfs: Fix netfs_read_folio() to wait on writeback netfs: Fix folio->private handling in netfs_perform_write() netfs: Fix partial invalidation of streaming-write folio netfs: Fix potential UAF in netfs_unlock_abandoned_read_pages() netfs: Fix leak of request in netfs_write_begin() error handling netfs: Fix early put of sink folio in netfs_read_gaps() netfs: Fix write streaming disablement if fd open O_RDWR netfs: Fix read-gaps to remove netfs_folio from filled folio netfs: Fix potential deadlock in write-through mode netfs: Fix streaming write being overwritten netfs: Defer the emission of trace_netfs_folio() netfs: Fix netfs_invalidate_folio() to clear dirty bit if all changes gone netfs: Fix overrun check in netfs_extract_user_iter() netfs: fix error handling in netfs_extract_user_iter() netfs: Fix potential uninitialised var in netfs_extract_user_iter() netfs: fix VM_BUG_ON_FOLIO() issue in netfs_write_begin() call netfs: Fix zeropoint update where i_size > remote_i_size netfs: Fix potential for tearing in ->remote_i_size and ->zero_point ... Link: https://patch.msgid.link/20260512123404.719402-1-dhowells@redhat.com Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/list.h37
-rw-r--r--include/linux/netfs.h295
-rw-r--r--include/trace/events/netfs.h8
3 files changed, 330 insertions, 10 deletions
diff --git a/include/linux/list.h b/include/linux/list.h
index 00ea8e5fb88b..09d979976b3b 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -191,6 +191,29 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head)
__list_add(new, head->prev, head);
}
+/**
+ * list_add_tail_release - add a new entry with release barrier
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head, using a release barrier to set
+ * the ->next pointer that points to it. This is useful for implementing
+ * queues, in particular one that the elements will be walked through forwards
+ * locklessly.
+ */
+static inline void list_add_tail_release(struct list_head *new,
+ struct list_head *head)
+{
+ struct list_head *prev = head->prev;
+
+ if (__list_add_valid(new, prev, head)) {
+ new->next = head;
+ new->prev = prev;
+ head->prev = new;
+ smp_store_release(&prev->next, new);
+ }
+}
+
/*
* Delete a list entry by making the prev/next entries
* point to each other.
@@ -645,6 +668,20 @@ static inline void list_splice_tail_init(struct list_head *list,
})
/**
+ * list_first_entry_or_null_acquire - get the first element from a list with barrier
+ * @ptr: the list head to take the element from.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_head within the struct.
+ *
+ * Note that if the list is empty, it returns NULL.
+ */
+#define list_first_entry_or_null_acquire(ptr, type, member) ({ \
+ struct list_head *head__ = (ptr); \
+ struct list_head *pos__ = smp_load_acquire(&head__->next); \
+ pos__ != head__ ? list_entry(pos__, type, member) : NULL; \
+})
+
+/**
* list_last_entry_or_null - get the last element from a list
* @ptr: the list head to take the element from.
* @type: the type of the struct this is embedded in.
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index ba17ac5bf356..243c0f737938 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -62,8 +62,8 @@ struct netfs_inode {
struct fscache_cookie *cache;
#endif
struct mutex wb_lock; /* Writeback serialisation */
- loff_t remote_i_size; /* Size of the remote file */
- loff_t zero_point; /* Size after which we assume there's no data
+ loff_t _remote_i_size; /* Size of the remote file */
+ loff_t _zero_point; /* Size after which we assume there's no data
* on the server */
atomic_t io_count; /* Number of outstanding reqs */
unsigned long flags;
@@ -252,7 +252,7 @@ struct netfs_io_request {
unsigned long long collected_to; /* Point we've collected to */
unsigned long long cleaned_to; /* Position we've cleaned folios to */
unsigned long long abandon_to; /* Position to abandon folios to */
- pgoff_t no_unlock_folio; /* Don't unlock this folio after read */
+ const struct folio *no_unlock_folio; /* Don't unlock this folio after read */
unsigned int direct_bv_count; /* Number of elements in direct_bv[] */
unsigned int debug_id;
unsigned int rsize; /* Maximum read size (0 for none) */
@@ -475,6 +475,254 @@ static inline struct netfs_inode *netfs_inode(struct inode *inode)
}
/**
+ * netfs_read_remote_i_size - Read remote_i_size safely
+ * @inode: The inode to access
+ *
+ * Read remote_i_size safely without the potential for tearing on 32-bit
+ * arches.
+ *
+ * NOTE: in a 32bit arch with a preemptable kernel and an UP compile the
+ * i_size_read/write must be atomic with respect to the local cpu (unlike with
+ * preempt disabled), but they don't need to be atomic with respect to other
+ * cpus like in true SMP (so they need either to either locally disable irq
+ * around the read or for example on x86 they can be still implemented as a
+ * cmpxchg8b without the need of the lock prefix). For SMP compiles and 64bit
+ * archs it makes no difference if preempt is enabled or not.
+ */
+static inline unsigned long long netfs_read_remote_i_size(const struct inode *inode)
+{
+ const struct netfs_inode *ictx = container_of(inode, struct netfs_inode, inode);
+ unsigned long long remote_i_size;
+
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ unsigned int seq;
+
+ do {
+ seq = read_seqcount_begin(&inode->i_size_seqcount);
+ remote_i_size = ictx->_remote_i_size;
+ } while (read_seqcount_retry(&inode->i_size_seqcount, seq));
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
+ remote_i_size = ictx->_remote_i_size;
+ preempt_enable();
+#else
+ /* Pairs with smp_store_release() in netfs_write_remote_i_size() */
+ remote_i_size = smp_load_acquire(&ictx->_remote_i_size);
+#endif
+ return remote_i_size;
+}
+
+/*
+ * netfs_write_remote_i_size - Set remote_i_size safely
+ * @inode: The inode to access
+ * @remote_i_size: The new value for the size of the file on the server
+ *
+ * Set remote_i_size safely without the potential for tearing on 32-bit arches.
+ *
+ * Context: The caller must hold inode->i_lock.
+ *
+ * NOTE: unlike netfs_read_remote_i_size(), netfs_write_remote_i_size() does
+ * need locking around it (normally i_rwsem), otherwise on 32bit/SMP an update
+ * of i_size_seqcount can be lost, resulting in subsequent i_size_read() calls
+ * spinning forever.
+ */
+static inline void netfs_write_remote_i_size(struct inode *inode,
+ unsigned long long remote_i_size)
+{
+ struct netfs_inode *ictx = netfs_inode(inode);
+
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ write_seqcount_begin(&inode->i_size_seqcount);
+ ictx->_remote_i_size = remote_i_size;
+ write_seqcount_end(&inode->i_size_seqcount);
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
+ ictx->_remote_i_size = remote_i_size;
+ preempt_enable();
+#else
+ /*
+ * Pairs with smp_load_acquire() in netfs_read_remote_i_size() to
+ * ensure changes related to inode size (such as page contents) are
+ * visible before we see the changed inode size.
+ */
+ smp_store_release(&ictx->_remote_i_size, remote_i_size);
+#endif
+}
+
+/**
+ * netfs_read_zero_point - Read zero_point safely
+ * @inode: The inode to access
+ *
+ * Read zero_point safely without the potential for tearing on 32-bit
+ * arches.
+ *
+ * NOTE: in a 32bit arch with a preemptable kernel and an UP compile the
+ * i_size_read/write must be atomic with respect to the local cpu (unlike with
+ * preempt disabled), but they don't need to be atomic with respect to other
+ * cpus like in true SMP (so they need either to either locally disable irq
+ * around the read or for example on x86 they can be still implemented as a
+ * cmpxchg8b without the need of the lock prefix). For SMP compiles and 64bit
+ * archs it makes no difference if preempt is enabled or not.
+ */
+static inline unsigned long long netfs_read_zero_point(const struct inode *inode)
+{
+ struct netfs_inode *ictx = container_of(inode, struct netfs_inode, inode);
+ unsigned long long zero_point;
+
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ unsigned int seq;
+
+ do {
+ seq = read_seqcount_begin(&inode->i_size_seqcount);
+ zero_point = ictx->_zero_point;
+ } while (read_seqcount_retry(&inode->i_size_seqcount, seq));
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
+ zero_point = ictx->_zero_point;
+ preempt_enable();
+#else
+ /* Pairs with smp_store_release() in netfs_write_zero_point() */
+ zero_point = smp_load_acquire(&ictx->_zero_point);
+#endif
+ return zero_point;
+}
+
+/*
+ * netfs_write_zero_point - Set zero_point safely
+ * @inode: The inode to access
+ * @zero_point: The new value for the point beyond which the server has no data
+ *
+ * Set zero_point safely without the potential for tearing on 32-bit arches.
+ *
+ * Context: The caller must hold inode->i_lock.
+ *
+ * NOTE: unlike netfs_read_zero_point(), netfs_write_zero_point() does need
+ * locking around it (normally i_rwsem), otherwise on 32bit/SMP an update of
+ * i_size_seqcount can be lost, resulting in subsequent read calls spinning
+ * forever.
+ */
+static inline void netfs_write_zero_point(struct inode *inode,
+ unsigned long long zero_point)
+{
+ struct netfs_inode *ictx = netfs_inode(inode);
+
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ write_seqcount_begin(&inode->i_size_seqcount);
+ ictx->_zero_point = zero_point;
+ write_seqcount_end(&inode->i_size_seqcount);
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
+ ictx->_zero_point = zero_point;
+ preempt_enable();
+#else
+ /*
+ * Pairs with smp_load_acquire() in netfs_read_zero_point() to
+ * ensure changes related to inode size (such as page contents) are
+ * visible before we see the changed inode size.
+ */
+ smp_store_release(&ictx->_zero_point, zero_point);
+#endif
+}
+
+/**
+ * netfs_read_sizes - Read remote_i_size and zero_point safely
+ * @inode: The inode to access
+ * @i_size: Where to return the local file size.
+ * @remote_i_size: Where to return the size of the file on the server
+ * @zero_point: Where to return the the point beyond which the server has no data
+ *
+ * Read remote_i_size and zero_point safely without the potential for tearing
+ * on 32-bit arches.
+ *
+ * NOTE: in a 32bit arch with a preemptable kernel and an UP compile the
+ * i_size_read/write must be atomic with respect to the local cpu (unlike with
+ * preempt disabled), but they don't need to be atomic with respect to other
+ * cpus like in true SMP (so they need either to either locally disable irq
+ * around the read or for example on x86 they can be still implemented as a
+ * cmpxchg8b without the need of the lock prefix). For SMP compiles and 64bit
+ * archs it makes no difference if preempt is enabled or not.
+ */
+static inline void netfs_read_sizes(const struct inode *inode,
+ unsigned long long *i_size,
+ unsigned long long *remote_i_size,
+ unsigned long long *zero_point)
+{
+ const struct netfs_inode *ictx = container_of(inode, struct netfs_inode, inode);
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ unsigned int seq;
+
+ do {
+ seq = read_seqcount_begin(&inode->i_size_seqcount);
+ *i_size = inode->i_size;
+ *remote_i_size = ictx->_remote_i_size;
+ *zero_point = ictx->_zero_point;
+ } while (read_seqcount_retry(&inode->i_size_seqcount, seq));
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
+ *i_size = inode->i_size;
+ *remote_i_size = ictx->_remote_i_size;
+ *zero_point = ictx->_zero_point;
+ preempt_enable();
+#else
+ /* Pairs with smp_store_release() in i_size_write() */
+ *i_size = smp_load_acquire(&inode->i_size);
+ /* Pairs with smp_store_release() in netfs_write_remote_i_size() */
+ *remote_i_size = smp_load_acquire(&ictx->_remote_i_size);
+ /* Pairs with smp_store_release() in netfs_write_zero_point() */
+ *zero_point = smp_load_acquire(&ictx->_zero_point);
+#endif
+}
+
+/*
+ * netfs_write_sizes - Set i_size, remote_i_size and zero_point safely
+ * @inode: The inode to access
+ * @i_size: The new value for the local size of the file
+ * @remote_i_size: The new value for the size of the file on the server
+ * @zero_point: The new value for the point beyond which the server has no data
+ *
+ * Set both remote_i_size and zero_point safely without the potential for
+ * tearing on 32-bit arches.
+ *
+ * Context: The caller must hold inode->i_lock.
+ *
+ * NOTE: unlike netfs_read_zero_point(), netfs_write_zero_point() does need
+ * locking around it (normally i_rwsem), otherwise on 32bit/SMP an update of
+ * i_size_seqcount can be lost, resulting in subsequent read calls spinning
+ * forever.
+ */
+static inline void netfs_write_sizes(struct inode *inode,
+ unsigned long long i_size,
+ unsigned long long remote_i_size,
+ unsigned long long zero_point)
+{
+ struct netfs_inode *ictx = netfs_inode(inode);
+
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ write_seqcount_begin(&inode->i_size_seqcount);
+ inode->i_size = i_size;
+ ictx->_remote_i_size = remote_i_size;
+ ictx->_zero_point = zero_point;
+ write_seqcount_end(&inode->i_size_seqcount);
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
+ inode->i_size = i_size;
+ ictx->_remote_i_size = remote_i_size;
+ ictx->_zero_point = zero_point;
+ preempt_enable();
+#else
+ /*
+ * Pairs with smp_load_acquire() in i_size_read(),
+ * netfs_read_remote_i_size() and netfs_read_zero_point() to ensure
+ * changes related to inode size (such as page contents) are visible
+ * before we see the changed inode size.
+ */
+ smp_store_release(&inode->i_size, i_size);
+ smp_store_release(&ictx->_remote_i_size, remote_i_size);
+ smp_store_release(&ictx->_zero_point, zero_point);
+#endif
+}
+
+/**
* netfs_inode_init - Initialise a netfslib inode context
* @ctx: The netfs inode to initialise
* @ops: The netfs's operations list
@@ -488,8 +736,8 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
bool use_zero_point)
{
ctx->ops = ops;
- ctx->remote_i_size = i_size_read(&ctx->inode);
- ctx->zero_point = LLONG_MAX;
+ ctx->_remote_i_size = i_size_read(&ctx->inode);
+ ctx->_zero_point = LLONG_MAX;
ctx->flags = 0;
atomic_set(&ctx->io_count, 0);
#if IS_ENABLED(CONFIG_FSCACHE)
@@ -498,7 +746,7 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
mutex_init(&ctx->wb_lock);
/* ->releasepage() drives zero_point */
if (use_zero_point) {
- ctx->zero_point = ctx->remote_i_size;
+ ctx->_zero_point = ctx->_remote_i_size;
mapping_set_release_always(ctx->inode.i_mapping);
}
}
@@ -511,13 +759,40 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
*
* Inform the netfs lib that a file got resized so that it can adjust its state.
*/
-static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size,
+static inline void netfs_resize_file(struct netfs_inode *ictx,
+ unsigned long long new_i_size,
bool changed_on_server)
{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ struct inode *inode = &ictx->inode;
+
+ preempt_disable();
+ write_seqcount_begin(&inode->i_size_seqcount);
+ if (changed_on_server)
+ ictx->_remote_i_size = new_i_size;
+ if (new_i_size < ictx->_zero_point)
+ ictx->_zero_point = new_i_size;
+ write_seqcount_end(&inode->i_size_seqcount);
+ preempt_enable();
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
if (changed_on_server)
- ctx->remote_i_size = new_i_size;
- if (new_i_size < ctx->zero_point)
- ctx->zero_point = new_i_size;
+ ictx->_remote_i_size = new_i_size;
+ if (new_i_size < ictx->_zero_point)
+ ictx->_zero_point = new_i_size;
+ preempt_enable();
+#else
+ /*
+ * Pairs with smp_load_acquire() in netfs_read_remote_i_size and
+ * netfs_read_zero_point() to ensure changes related to inode size
+ * (such as page contents) are visible before we see the changed inode
+ * size.
+ */
+ if (changed_on_server)
+ smp_store_release(&ictx->_remote_i_size, new_i_size);
+ if (new_i_size < ictx->_zero_point)
+ smp_store_release(&ictx->_zero_point, new_i_size);
+#endif
}
/**
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 8c936fc575d5..082cb03c6131 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -177,7 +177,11 @@
EM(netfs_folio_is_uptodate, "mod-uptodate") \
EM(netfs_just_prefetch, "mod-prefetch") \
EM(netfs_whole_folio_modify, "mod-whole-f") \
+ EM(netfs_whole_folio_modify_efault, "mod-whole-f!") \
+ EM(netfs_whole_folio_modify_filled, "mod-whole-f+") \
+ EM(netfs_whole_folio_modify_filled_efault, "mod-whole-f+!") \
EM(netfs_modify_and_clear, "mod-n-clear") \
+ EM(netfs_modify_and_clear_rm_finfo, "mod-n-clear+") \
EM(netfs_streaming_write, "mod-streamw") \
EM(netfs_streaming_write_cont, "mod-streamw+") \
EM(netfs_flush_content, "flush") \
@@ -194,6 +198,10 @@
EM(netfs_folio_trace_copy_to_cache, "mark-copy") \
EM(netfs_folio_trace_end_copy, "end-copy") \
EM(netfs_folio_trace_filled_gaps, "filled-gaps") \
+ EM(netfs_folio_trace_invalidate_all, "inval-all") \
+ EM(netfs_folio_trace_invalidate_front, "inval-front") \
+ EM(netfs_folio_trace_invalidate_middle, "inval-mid") \
+ EM(netfs_folio_trace_invalidate_tail, "inval-tail") \
EM(netfs_folio_trace_kill, "kill") \
EM(netfs_folio_trace_kill_cc, "kill-cc") \
EM(netfs_folio_trace_kill_g, "kill-g") \