From a260bd22a355bcdb74cedac6ab9b10739cd2c62c Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@grsecurity.net>
Date: Tue, 9 Dec 2025 22:09:03 +0100
Subject: media: mc: fix potential use-after-free in media_request_alloc()

Commit 6f504cbf108a ("media: convert media_request_alloc() to
FD_PREPARE()") moved the call to fd_install() (now hidden in
fd_publish()) before the snprintf(), making the later write to
potentially already freed memory, as userland is free to call
close() concurrently right after the call to fd_install() which
may end up in the request_fops.release() handler freeing 'req'.

Fixes: 6f504cbf108a ("media: convert media_request_alloc() to FD_PREPARE()")
Signed-off-by: Mathias Krause <minipli@grsecurity.net>
Link: https://patch.msgid.link/20251209210903.603958-1-minipli@grsecurity.net
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 drivers/media/mc/mc-request.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/mc/mc-request.c b/drivers/media/mc/mc-request.c
index 2ac9ac0a740b..3cca9a0c7c97 100644
--- a/drivers/media/mc/mc-request.c
+++ b/drivers/media/mc/mc-request.c
@@ -315,12 +315,12 @@ int media_request_alloc(struct media_device *mdev, int *alloc_fd)
 
 	fd_prepare_file(fdf)->private_data = req;
 
-	*alloc_fd = fd_publish(fdf);
-
 	snprintf(req->debug_str, sizeof(req->debug_str), "%u:%d",
-		 atomic_inc_return(&mdev->request_id), *alloc_fd);
+		 atomic_inc_return(&mdev->request_id), fd_prepare_fd(fdf));
 	dev_dbg(mdev->dev, "request: allocated %s\n", req->debug_str);
 
+	*alloc_fd = fd_publish(fdf);
+
 	return 0;
 
 err_free_req:
-- 
cgit v1.2.3


From ed61378b4dc63efe76cb8c23a36b228043332da3 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Mon, 8 Dec 2025 09:05:48 -0500
Subject: iomap: replace folio_batch allocation with stack allocation

Zhang Yi points out that the dynamic folio_batch allocation in
iomap_fill_dirty_folios() is problematic for the ext4 on iomap work
that is under development because it doesn't sufficiently handle the
allocation failure case (by allowing a retry, for example). We've
also seen lockdep (via syzbot) complain recently about the scope of
the allocation.

The dynamic allocation was initially added for simplicity and to
help indicate whether the batch was used or not by the calling fs.
To address these issues, put the batch on the stack of
iomap_zero_range() and use a flag to control whether the batch
should be used in the iomap folio lookup path. This keeps things
simple and eliminates allocation issues with lockdep and for ext4 on
iomap.

While here, also clean up the fill helper signature to be more
consistent with the underlying filemap helper. Pass through the
return value of the filemap helper (folio count) and update the
lookup offset via an out param.

Fixes: 395ed1ef0012 ("iomap: optional zero range dirty folio processing")
Signed-off-by: Brian Foster <bfoster@redhat.com>
Link: https://patch.msgid.link/20251208140548.373411-1-bfoster@redhat.com
Acked-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/iomap/buffered-io.c | 50 +++++++++++++++++++++++++++++++++++---------------
 fs/iomap/iter.c        |  6 +++---
 fs/xfs/xfs_iomap.c     | 11 ++++++-----
 include/linux/iomap.h  |  8 ++++++--
 4 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index e5c1ca440d93..fd9a2cf95620 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -832,7 +832,7 @@ static struct folio *__iomap_get_folio(struct iomap_iter *iter,
 	if (!mapping_large_folio_support(iter->inode->i_mapping))
 		len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
 
-	if (iter->fbatch) {
+	if (iter->iomap.flags & IOMAP_F_FOLIO_BATCH) {
 		struct folio *folio = folio_batch_next(iter->fbatch);
 
 		if (!folio)
@@ -929,7 +929,7 @@ static int iomap_write_begin(struct iomap_iter *iter,
 	 * process so return and let the caller iterate and refill the batch.
 	 */
 	if (!folio) {
-		WARN_ON_ONCE(!iter->fbatch);
+		WARN_ON_ONCE(!(iter->iomap.flags & IOMAP_F_FOLIO_BATCH));
 		return 0;
 	}
 
@@ -1544,23 +1544,39 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero,
 	return status;
 }
 
-loff_t
+/**
+ * iomap_fill_dirty_folios - fill a folio batch with dirty folios
+ * @iter: Iteration structure
+ * @start: Start offset of range. Updated based on lookup progress.
+ * @end: End offset of range
+ * @iomap_flags: Flags to set on the associated iomap to track the batch.
+ *
+ * Returns the folio count directly. Also returns the associated control flag if
+ * the the batch lookup is performed and the expected offset of a subsequent
+ * lookup via out params. The caller is responsible to set the flag on the
+ * associated iomap.
+ */
+unsigned int
 iomap_fill_dirty_folios(
 	struct iomap_iter	*iter,
-	loff_t			offset,
-	loff_t			length)
+	loff_t			*start,
+	loff_t			end,
+	unsigned int		*iomap_flags)
 {
 	struct address_space	*mapping = iter->inode->i_mapping;
-	pgoff_t			start = offset >> PAGE_SHIFT;
-	pgoff_t			end = (offset + length - 1) >> PAGE_SHIFT;
+	pgoff_t			pstart = *start >> PAGE_SHIFT;
+	pgoff_t			pend = (end - 1) >> PAGE_SHIFT;
+	unsigned int		count;
 
-	iter->fbatch = kmalloc(sizeof(struct folio_batch), GFP_KERNEL);
-	if (!iter->fbatch)
-		return offset + length;
-	folio_batch_init(iter->fbatch);
+	if (!iter->fbatch) {
+		*start = end;
+		return 0;
+	}
 
-	filemap_get_folios_dirty(mapping, &start, end, iter->fbatch);
-	return (start << PAGE_SHIFT);
+	count = filemap_get_folios_dirty(mapping, &pstart, pend, iter->fbatch);
+	*start = (pstart << PAGE_SHIFT);
+	*iomap_flags |= IOMAP_F_FOLIO_BATCH;
+	return count;
 }
 EXPORT_SYMBOL_GPL(iomap_fill_dirty_folios);
 
@@ -1569,17 +1585,21 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
 		const struct iomap_ops *ops,
 		const struct iomap_write_ops *write_ops, void *private)
 {
+	struct folio_batch fbatch;
 	struct iomap_iter iter = {
 		.inode		= inode,
 		.pos		= pos,
 		.len		= len,
 		.flags		= IOMAP_ZERO,
 		.private	= private,
+		.fbatch		= &fbatch,
 	};
 	struct address_space *mapping = inode->i_mapping;
 	int ret;
 	bool range_dirty;
 
+	folio_batch_init(&fbatch);
+
 	/*
 	 * To avoid an unconditional flush, check pagecache state and only flush
 	 * if dirty and the fs returns a mapping that might convert on
@@ -1590,11 +1610,11 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
 	while ((ret = iomap_iter(&iter, ops)) > 0) {
 		const struct iomap *srcmap = iomap_iter_srcmap(&iter);
 
-		if (WARN_ON_ONCE(iter.fbatch &&
+		if (WARN_ON_ONCE((iter.iomap.flags & IOMAP_F_FOLIO_BATCH) &&
 				 srcmap->type != IOMAP_UNWRITTEN))
 			return -EIO;
 
-		if (!iter.fbatch &&
+		if (!(iter.iomap.flags & IOMAP_F_FOLIO_BATCH) &&
 		    (srcmap->type == IOMAP_HOLE ||
 		     srcmap->type == IOMAP_UNWRITTEN)) {
 			s64 status;
diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c
index 8692e5e41c6d..c04796f6e57f 100644
--- a/fs/iomap/iter.c
+++ b/fs/iomap/iter.c
@@ -8,10 +8,10 @@
 
 static inline void iomap_iter_reset_iomap(struct iomap_iter *iter)
 {
-	if (iter->fbatch) {
+	if (iter->iomap.flags & IOMAP_F_FOLIO_BATCH) {
 		folio_batch_release(iter->fbatch);
-		kfree(iter->fbatch);
-		iter->fbatch = NULL;
+		folio_batch_reinit(iter->fbatch);
+		iter->iomap.flags &= ~IOMAP_F_FOLIO_BATCH;
 	}
 
 	iter->status = 0;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 04f39ea15898..37a1b33e9045 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1831,7 +1831,6 @@ xfs_buffered_write_iomap_begin(
 	 */
 	if (flags & IOMAP_ZERO) {
 		xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
-		u64 end;
 
 		if (isnullstartblock(imap.br_startblock) &&
 		    offset_fsb >= eof_fsb)
@@ -1851,12 +1850,14 @@ xfs_buffered_write_iomap_begin(
 		 */
 		if (imap.br_state == XFS_EXT_UNWRITTEN &&
 		    offset_fsb < eof_fsb) {
-			loff_t len = min(count,
-					 XFS_FSB_TO_B(mp, imap.br_blockcount));
+			loff_t foffset = offset, fend;
 
-			end = iomap_fill_dirty_folios(iter, offset, len);
+			fend = offset +
+			       min(count, XFS_FSB_TO_B(mp, imap.br_blockcount));
+			iomap_fill_dirty_folios(iter, &foffset, fend,
+						&iomap_flags);
 			end_fsb = min_t(xfs_fileoff_t, end_fsb,
-					XFS_B_TO_FSB(mp, end));
+					XFS_B_TO_FSB(mp, foffset));
 		}
 
 		xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 520e967cb501..6bb941707d12 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -88,6 +88,9 @@ struct vm_fault;
 /*
  * Flags set by the core iomap code during operations:
  *
+ * IOMAP_F_FOLIO_BATCH indicates that the folio batch mechanism is active
+ * for this operation, set by iomap_fill_dirty_folios().
+ *
  * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size
  * has changed as the result of this write operation.
  *
@@ -95,6 +98,7 @@ struct vm_fault;
  * range it covers needs to be remapped by the high level before the operation
  * can proceed.
  */
+#define IOMAP_F_FOLIO_BATCH	(1U << 13)
 #define IOMAP_F_SIZE_CHANGED	(1U << 14)
 #define IOMAP_F_STALE		(1U << 15)
 
@@ -352,8 +356,8 @@ bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio);
 int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
 		const struct iomap_ops *ops,
 		const struct iomap_write_ops *write_ops);
-loff_t iomap_fill_dirty_folios(struct iomap_iter *iter, loff_t offset,
-		loff_t length);
+unsigned int iomap_fill_dirty_folios(struct iomap_iter *iter, loff_t *start,
+		loff_t end, unsigned int *iomap_flags);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
 		bool *did_zero, const struct iomap_ops *ops,
 		const struct iomap_write_ops *write_ops, void *private);
-- 
cgit v1.2.3


From 392e317a20c32d45eebe4de8dc24408c6d1765d1 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Thu, 4 Dec 2025 08:48:32 -0500
Subject: filelock: add lease_dispose_list() helper

The lease-handling code paths always know they're disposing of leases,
yet locks_dispose_list() checks flags at runtime to determine whether
to call locks_free_lease() or locks_free_lock().

Split out a dedicated lease_dispose_list() helper for lease code paths.
This makes the type handling explicit and prepares for the upcoming
lease_manager enhancements where lease-specific operations are being
consolidated.

Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://patch.msgid.link/20251204-dir-deleg-ro-v2-1-22d37f92ce2c@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/locks.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 9f565802a88c..be0b79286da8 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -369,10 +369,19 @@ locks_dispose_list(struct list_head *dispose)
 	while (!list_empty(dispose)) {
 		flc = list_first_entry(dispose, struct file_lock_core, flc_list);
 		list_del_init(&flc->flc_list);
-		if (flc->flc_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
-			locks_free_lease(file_lease(flc));
-		else
-			locks_free_lock(file_lock(flc));
+		locks_free_lock(file_lock(flc));
+	}
+}
+
+static void
+lease_dispose_list(struct list_head *dispose)
+{
+	struct file_lock_core *flc;
+
+	while (!list_empty(dispose)) {
+		flc = list_first_entry(dispose, struct file_lock_core, flc_list);
+		list_del_init(&flc->flc_list);
+		locks_free_lease(file_lease(flc));
 	}
 }
 
@@ -1620,7 +1629,7 @@ restart:
 	spin_unlock(&ctx->flc_lock);
 	percpu_up_read(&file_rwsem);
 
-	locks_dispose_list(&dispose);
+	lease_dispose_list(&dispose);
 	error = wait_event_interruptible_timeout(new_fl->c.flc_wait,
 						 list_empty(&new_fl->c.flc_blocked_member),
 						 break_time);
@@ -1643,7 +1652,7 @@ restart:
 out:
 	spin_unlock(&ctx->flc_lock);
 	percpu_up_read(&file_rwsem);
-	locks_dispose_list(&dispose);
+	lease_dispose_list(&dispose);
 free_lock:
 	locks_free_lease(new_fl);
 	return error;
@@ -1727,7 +1736,7 @@ static int __fcntl_getlease(struct file *filp, unsigned int flavor)
 		spin_unlock(&ctx->flc_lock);
 		percpu_up_read(&file_rwsem);
 
-		locks_dispose_list(&dispose);
+		lease_dispose_list(&dispose);
 	}
 	return type;
 }
@@ -1896,7 +1905,7 @@ out_setup:
 out:
 	spin_unlock(&ctx->flc_lock);
 	percpu_up_read(&file_rwsem);
-	locks_dispose_list(&dispose);
+	lease_dispose_list(&dispose);
 	if (is_deleg)
 		inode_unlock(inode);
 	if (!error && !my_fl)
@@ -1932,7 +1941,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
 		error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
 	spin_unlock(&ctx->flc_lock);
 	percpu_up_read(&file_rwsem);
-	locks_dispose_list(&dispose);
+	lease_dispose_list(&dispose);
 	return error;
 }
 
@@ -2727,7 +2736,7 @@ locks_remove_lease(struct file *filp, struct file_lock_context *ctx)
 	spin_unlock(&ctx->flc_lock);
 	percpu_up_read(&file_rwsem);
 
-	locks_dispose_list(&dispose);
+	lease_dispose_list(&dispose);
 }
 
 /*
-- 
cgit v1.2.3


From 12965a190eaea614bb49e22041e8fc0d03d0310f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Thu, 4 Dec 2025 08:48:33 -0500
Subject: filelock: allow lease_managers to dictate what qualifies as a
 conflict

Requesting a delegation on a file from the userland fcntl() interface
currently succeeds when there are conflicting opens present.

This is because the lease handling code ignores conflicting opens for
FL_LAYOUT and FL_DELEG leases. This was a hack put in place long ago,
because nfsd already checks for conflicts in its own way. The kernel
needs to perform this check for userland delegations the same way it is
done for leases, however.

Make this dependent on the lease_manager by adding a new
->lm_open_conflict() lease_manager operation and have
generic_add_lease() call that instead of check_conflicting_open().
Morph check_conflicting_open() into a ->lm_open_conflict() op that is
only called for userland leases/delegations. Set the
->lm_open_conflict() operations for nfsd to trivial functions that
always return 0.

Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://patch.msgid.link/20251204-dir-deleg-ro-v2-2-22d37f92ce2c@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 Documentation/filesystems/locking.rst |  1 +
 fs/locks.c                            | 90 ++++++++++++++++-------------------
 fs/nfsd/nfs4layouts.c                 | 23 ++++++++-
 fs/nfsd/nfs4state.c                   | 19 ++++++++
 include/linux/filelock.h              |  1 +
 5 files changed, 84 insertions(+), 50 deletions(-)

diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 77704fde9845..04c7691e50e0 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -416,6 +416,7 @@ lm_change		yes		no			no
 lm_breaker_owns_lease:	yes     	no			no
 lm_lock_expirable	yes		no			no
 lm_expire_lock		no		no			yes
+lm_open_conflict	yes		no			no
 ======================	=============	=================	=========
 
 buffer_head
diff --git a/fs/locks.c b/fs/locks.c
index be0b79286da8..e75c8084d937 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -585,10 +585,50 @@ lease_setup(struct file_lease *fl, void **priv)
 	__f_setown(filp, task_pid(current), PIDTYPE_TGID, 0);
 }
 
+/**
+ * lease_open_conflict - see if the given file points to an inode that has
+ *			 an existing open that would conflict with the
+ *			 desired lease.
+ * @filp:	file to check
+ * @arg:	type of lease that we're trying to acquire
+ *
+ * Check to see if there's an existing open fd on this file that would
+ * conflict with the lease we're trying to set.
+ */
+static int
+lease_open_conflict(struct file *filp, const int arg)
+{
+	struct inode *inode = file_inode(filp);
+	int self_wcount = 0, self_rcount = 0;
+
+	if (arg == F_RDLCK)
+		return inode_is_open_for_write(inode) ? -EAGAIN : 0;
+	else if (arg != F_WRLCK)
+		return 0;
+
+	/*
+	 * Make sure that only read/write count is from lease requestor.
+	 * Note that this will result in denying write leases when i_writecount
+	 * is negative, which is what we want.  (We shouldn't grant write leases
+	 * on files open for execution.)
+	 */
+	if (filp->f_mode & FMODE_WRITE)
+		self_wcount = 1;
+	else if (filp->f_mode & FMODE_READ)
+		self_rcount = 1;
+
+	if (atomic_read(&inode->i_writecount) != self_wcount ||
+	    atomic_read(&inode->i_readcount) != self_rcount)
+		return -EAGAIN;
+
+	return 0;
+}
+
 static const struct lease_manager_operations lease_manager_ops = {
 	.lm_break = lease_break_callback,
 	.lm_change = lease_modify,
 	.lm_setup = lease_setup,
+	.lm_open_conflict = lease_open_conflict,
 };
 
 /*
@@ -1754,52 +1794,6 @@ int fcntl_getdeleg(struct file *filp, struct delegation *deleg)
 	return 0;
 }
 
-/**
- * check_conflicting_open - see if the given file points to an inode that has
- *			    an existing open that would conflict with the
- *			    desired lease.
- * @filp:	file to check
- * @arg:	type of lease that we're trying to acquire
- * @flags:	current lock flags
- *
- * Check to see if there's an existing open fd on this file that would
- * conflict with the lease we're trying to set.
- */
-static int
-check_conflicting_open(struct file *filp, const int arg, int flags)
-{
-	struct inode *inode = file_inode(filp);
-	int self_wcount = 0, self_rcount = 0;
-
-	if (flags & FL_LAYOUT)
-		return 0;
-	if (flags & FL_DELEG)
-		/* We leave these checks to the caller */
-		return 0;
-
-	if (arg == F_RDLCK)
-		return inode_is_open_for_write(inode) ? -EAGAIN : 0;
-	else if (arg != F_WRLCK)
-		return 0;
-
-	/*
-	 * Make sure that only read/write count is from lease requestor.
-	 * Note that this will result in denying write leases when i_writecount
-	 * is negative, which is what we want.  (We shouldn't grant write leases
-	 * on files open for execution.)
-	 */
-	if (filp->f_mode & FMODE_WRITE)
-		self_wcount = 1;
-	else if (filp->f_mode & FMODE_READ)
-		self_rcount = 1;
-
-	if (atomic_read(&inode->i_writecount) != self_wcount ||
-	    atomic_read(&inode->i_readcount) != self_rcount)
-		return -EAGAIN;
-
-	return 0;
-}
-
 static int
 generic_add_lease(struct file *filp, int arg, struct file_lease **flp, void **priv)
 {
@@ -1836,7 +1830,7 @@ generic_add_lease(struct file *filp, int arg, struct file_lease **flp, void **pr
 	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	time_out_leases(inode, &dispose);
-	error = check_conflicting_open(filp, arg, lease->c.flc_flags);
+	error = lease->fl_lmops->lm_open_conflict(filp, arg);
 	if (error)
 		goto out;
 
@@ -1893,7 +1887,7 @@ generic_add_lease(struct file *filp, int arg, struct file_lease **flp, void **pr
 	 * precedes these checks.
 	 */
 	smp_mb();
-	error = check_conflicting_open(filp, arg, lease->c.flc_flags);
+	error = lease->fl_lmops->lm_open_conflict(filp, arg);
 	if (error) {
 		locks_unlink_lock_ctx(&lease->c);
 		goto out;
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 683bd1130afe..ad7af8cfcf1f 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -764,9 +764,28 @@ nfsd4_layout_lm_change(struct file_lease *onlist, int arg,
 	return lease_modify(onlist, arg, dispose);
 }
 
+/**
+ *  nfsd4_layout_lm_open_conflict - see if the given file points to an inode that has
+ *				    an existing open that would conflict with the
+ *				    desired lease.
+ * @filp:	file to check
+ * @arg:	type of lease that we're trying to acquire
+ *
+ * The kernel will call into this operation to determine whether there
+ * are conflicting opens that may prevent the layout from being granted.
+ * For nfsd, that check is done at a higher level, so this trivially
+ * returns 0.
+ */
+static int
+nfsd4_layout_lm_open_conflict(struct file *filp, int arg)
+{
+	return 0;
+}
+
 static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
-	.lm_break	= nfsd4_layout_lm_break,
-	.lm_change	= nfsd4_layout_lm_change,
+	.lm_break		= nfsd4_layout_lm_break,
+	.lm_change		= nfsd4_layout_lm_change,
+	.lm_open_conflict	= nfsd4_layout_lm_open_conflict,
 };
 
 int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 808c24fb5c9a..19d6d6db107f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -5552,10 +5552,29 @@ nfsd_change_deleg_cb(struct file_lease *onlist, int arg,
 		return -EAGAIN;
 }
 
+/**
+ *  nfsd4_deleg_lm_open_conflict - see if the given file points to an inode that has
+ *				   an existing open that would conflict with the
+ *				   desired lease.
+ * @filp:	file to check
+ * @arg:	type of lease that we're trying to acquire
+ *
+ * The kernel will call into this operation to determine whether there
+ * are conflicting opens that may prevent the deleg from being granted.
+ * For nfsd, that check is done at a higher level, so this trivially
+ * returns 0.
+ */
+static int
+nfsd4_deleg_lm_open_conflict(struct file *filp, int arg)
+{
+	return 0;
+}
+
 static const struct lease_manager_operations nfsd_lease_mng_ops = {
 	.lm_breaker_owns_lease = nfsd_breaker_owns_lease,
 	.lm_break = nfsd_break_deleg_cb,
 	.lm_change = nfsd_change_deleg_cb,
+	.lm_open_conflict = nfsd4_deleg_lm_open_conflict,
 };
 
 static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4_stateowner *so, u32 seqid)
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index 54b824c05299..2f5e5588ee07 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -49,6 +49,7 @@ struct lease_manager_operations {
 	int (*lm_change)(struct file_lease *, int, struct list_head *);
 	void (*lm_setup)(struct file_lease *, void **);
 	bool (*lm_breaker_owns_lease)(struct file_lease *);
+	int (*lm_open_conflict)(struct file *, int);
 };
 
 struct lock_manager {
-- 
cgit v1.2.3


From 570ad253a3455a520f03c2136af8714bc780186d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Dec 2025 12:31:40 +0000
Subject: netfs: Fix early read unlock of page with EOF in middle

The read result collection for buffered reads seems to run ahead of the
completion of subrequests under some circumstances, as can be seen in the
following log snippet:

    9p_client_res: client 18446612686390831168 response P9_TREAD tag  0 err 0
    ...
    netfs_sreq: R=00001b55[1] DOWN TERM  f=192 s=0 5fb2/5fb2 s=5 e=0
    ...
    netfs_collect_folio: R=00001b55 ix=00004 r=4000-5000 t=4000/5fb2
    netfs_folio: i=157f3 ix=00004-00004 read-done
    netfs_folio: i=157f3 ix=00004-00004 read-unlock
    netfs_collect_folio: R=00001b55 ix=00005 r=5000-5fb2 t=5000/5fb2
    netfs_folio: i=157f3 ix=00005-00005 read-done
    netfs_folio: i=157f3 ix=00005-00005 read-unlock
    ...
    netfs_collect_stream: R=00001b55[0:] cto=5fb2 frn=ffffffff
    netfs_collect_state: R=00001b55 col=5fb2 cln=6000 n=c
    netfs_collect_stream: R=00001b55[0:] cto=5fb2 frn=ffffffff
    netfs_collect_state: R=00001b55 col=5fb2 cln=6000 n=8
    ...
    netfs_sreq: R=00001b55[2] ZERO SUBMT f=000 s=5fb2 0/4e s=0 e=0
    netfs_sreq: R=00001b55[2] ZERO TERM  f=102 s=5fb2 4e/4e s=5 e=0

The 'cto=5fb2' indicates the collected file pos we've collected results to
so far - but we still have 0x4e more bytes to go - so we shouldn't have
collected folio ix=00005 yet.  The 'ZERO' subreq that clears the tail
happens after we unlock the folio, allowing the application to see the
uncleared tail through mmap.

The problem is that netfs_read_unlock_folios() will unlock a folio in which
the amount of read results collected hits EOF position - but the ZERO
subreq lies beyond that and so happens after.

Fix this by changing the end check to always be the end of the folio and
never the end of the file.

In the future, I should look at clearing to the end of the folio here rather
than adding a ZERO subreq to do this.  On the other hand, the ZERO subreq can
run in parallel with an async READ subreq.  Further, the ZERO subreq may still
be necessary to, say, handle extents in a ceph file that don't have any
backing store and are thus implicitly all zeros.

This can be reproduced by creating a file, the size of which doesn't align
to a page boundary, e.g. 24998 (0x5fb2) bytes and then doing something
like:

    xfs_io -c "mmap -r 0 0x6000" -c "madvise -d 0 0x6000" \
           -c "mread -v 0 0x6000" /xfstest.test/x

The last 0x4e bytes should all be 00, but if the tail hasn't been cleared
yet, you may see rubbish there.  This can be reproduced with kafs by
modifying the kernel to disable the call to netfs_read_subreq_progress()
and to stop afs_issue_read() from doing the async call for NETFS_READAHEAD.
Reproduction can be made easier by inserting an mdelay(100) in
netfs_issue_read() for the ZERO-subreq case.

AFS and CIFS are normally unlikely to show this as they dispatch READ ops
asynchronously, which allows the ZERO-subreq to finish first.  9P's READ op is
completely synchronous, so the ZERO-subreq will always happen after.  It isn't
seen all the time, though, because the collection may be done in a worker
thread.

Reported-by: Christian Schoenebeck <linux_oss@crudebyte.com>
Link: https://lore.kernel.org/r/8622834.T7Z3S40VBb@weasel/
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://patch.msgid.link/938162.1766233900@warthog.procyon.org.uk
Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item")
Tested-by: Christian Schoenebeck <linux_oss@crudebyte.com>
Acked-by: Dominique Martinet <asmadeus@codewreck.org>
Suggested-by: Dominique Martinet <asmadeus@codewreck.org>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: v9fs@lists.linux.dev
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/netfs/read_collect.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index a95e7aadafd0..7a0ffa675fb1 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -137,7 +137,7 @@ static void netfs_read_unlock_folios(struct netfs_io_request *rreq,
 		rreq->front_folio_order = order;
 		fsize = PAGE_SIZE << order;
 		fpos = folio_pos(folio);
-		fend = umin(fpos + fsize, rreq->i_size);
+		fend = fpos + fsize;
 
 		trace_netfs_collect_folio(rreq, folio, fend, collected_to);
 
-- 
cgit v1.2.3


From 46af9ae1305f1025fd9ff7d8945de98a6ec0a52b Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Sat, 20 Dec 2025 06:40:22 +0100
Subject: fs: make sure to fail try_to_unlazy() and try_to_unlazy() for
 LOOKUP_CACHED

Otherwise the slowpath can be taken by the caller, defeating the flag.

This regressed after calls to legitimize_links() started being
conditionally elided and stems from the routine always failing
after seeing the flag, regardless if there were any links.

In order to address both the bug and the weird semantics make it illegal
to call legitimize_links() with LOOKUP_CACHED and handle the problem at
the two callsites.

Fixes: 7c179096e77eca21 ("fs: add predicts based on nd->depth")
Reported-by: Chris Mason <clm@meta.com>
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Link: https://patch.msgid.link/20251220054023.142134-1-mjguzik@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/namei.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index bf0f66f0e9b9..f7a8b5b000c2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -830,11 +830,9 @@ static inline bool legitimize_path(struct nameidata *nd,
 static bool legitimize_links(struct nameidata *nd)
 {
 	int i;
-	if (unlikely(nd->flags & LOOKUP_CACHED)) {
-		drop_links(nd);
-		nd->depth = 0;
-		return false;
-	}
+
+	VFS_BUG_ON(nd->flags & LOOKUP_CACHED);
+
 	for (i = 0; i < nd->depth; i++) {
 		struct saved *last = nd->stack + i;
 		if (unlikely(!legitimize_path(nd, &last->link, last->seq))) {
@@ -883,6 +881,11 @@ static bool try_to_unlazy(struct nameidata *nd)
 
 	BUG_ON(!(nd->flags & LOOKUP_RCU));
 
+	if (unlikely(nd->flags & LOOKUP_CACHED)) {
+		drop_links(nd);
+		nd->depth = 0;
+		goto out1;
+	}
 	if (unlikely(nd->depth && !legitimize_links(nd)))
 		goto out1;
 	if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
@@ -918,6 +921,11 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry)
 	int res;
 	BUG_ON(!(nd->flags & LOOKUP_RCU));
 
+	if (unlikely(nd->flags & LOOKUP_CACHED)) {
+		drop_links(nd);
+		nd->depth = 0;
+		goto out2;
+	}
 	if (unlikely(nd->depth && !legitimize_links(nd)))
 		goto out2;
 	res = __legitimize_mnt(nd->path.mnt, nd->m_seq);
-- 
cgit v1.2.3


From fe33729d2907415ff953d84673caebca628cbd77 Mon Sep 17 00:00:00 2001
From: Bagas Sanjaya <bagasdotme@gmail.com>
Date: Fri, 19 Dec 2025 09:46:19 +0700
Subject: fs: Describe @isnew parameter in ilookup5_nowait()

Sphinx reports kernel-doc warning:

WARNING: ./fs/inode.c:1607 function parameter 'isnew' not described in 'ilookup5_nowait'

Describe the parameter.

Fixes: a27628f4363435 ("fs: rework I_NEW handling to operate without fences")
Signed-off-by: Bagas Sanjaya <bagasdotme@gmail.com>
Link: https://patch.msgid.link/20251219024620.22880-2-bagasdotme@gmail.com
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/inode.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/inode.c b/fs/inode.c
index 521383223d8a..379f4c19845c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1593,6 +1593,9 @@ EXPORT_SYMBOL(igrab);
  * @hashval:	hash value (usually inode number) to search for
  * @test:	callback used for comparisons between inodes
  * @data:	opaque data pointer to pass to @test
+ * @isnew:	return argument telling whether I_NEW was set when
+ *		the inode was found in hash (the caller needs to
+ *		wait for I_NEW to clear)
  *
  * Search for the inode specified by @hashval and @data in the inode cache.
  * If the inode is in the cache, the inode is returned with an incremented
-- 
cgit v1.2.3


From 73a91ef328a9d728c7f3357f925980937f0d520c Mon Sep 17 00:00:00 2001
From: Bagas Sanjaya <bagasdotme@gmail.com>
Date: Fri, 19 Dec 2025 09:46:20 +0700
Subject: VFS: fix __start_dirop() kernel-doc warnings

Sphinx report kernel-doc warnings:

WARNING: ./fs/namei.c:2853 function parameter 'state' not described in '__start_dirop'
WARNING: ./fs/namei.c:2853 expecting prototype for start_dirop(). Prototype was for __start_dirop() instead

Fix them up.

Fixes: ff7c4ea11a05c8 ("VFS: add start_creating_killable() and start_removing_killable()")
Signed-off-by: Bagas Sanjaya <bagasdotme@gmail.com>
Link: https://patch.msgid.link/20251219024620.22880-3-bagasdotme@gmail.com
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/namei.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/namei.c b/fs/namei.c
index f7a8b5b000c2..cf16b6822dd3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2844,10 +2844,11 @@ static int filename_parentat(int dfd, struct filename *name,
 }
 
 /**
- * start_dirop - begin a create or remove dirop, performing locking and lookup
+ * __start_dirop - begin a create or remove dirop, performing locking and lookup
  * @parent:       the dentry of the parent in which the operation will occur
  * @name:         a qstr holding the name within that parent
  * @lookup_flags: intent and other lookup flags.
+ * @state:        task state bitmask
  *
  * The lookup is performed and necessary locks are taken so that, on success,
  * the returned dentry can be operated on safely.
-- 
cgit v1.2.3


From 3dd57ddec9e3a98387196a3f53b8c036977d8c0f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 16 Dec 2025 08:19:39 +0000
Subject: get rid of bogus __user in struct xattr_args::value

	The first member of struct xattr_args is declared as
	__aligned_u64 __user value;
which makes no sense whatsoever; __user is a qualifier and what that
declaration says is "all struct xattr_args instances have .value
_stored_ in user address space, no matter where the rest of the
structure happens to be".

	Something like "int __user *p" stands for "value of p is a pointer
to an instance of int that happens to live in user address space"; it
says nothing about location of p itself, just as const char *p declares a
pointer to unmodifiable char rather than an unmodifiable pointer to char.

	With xattr_args the intent clearly had been "the 64bit value
represents a _pointer_ to object in user address space", but __user has
nothing to do with that.  All it gets us is a couple of bogus warnings
in fs/xattr.c where (userland) instance of xattr_args is copied to local
variable of that type (in kernel address space), followed by access
to its members.  Since we've told sparse that args.value must somehow be
located in userland memory, we get warned that looking at that 64bit
unsigned integer (in a variable already on kernel stack) is not allowed.

	Note that sparse has no way to express "this integer shall never
be cast into a pointer to be dereferenced directly" and I don't see any
way to assign a sane semantics to that.  In any case, __user is not it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Link: https://patch.msgid.link/20251216081939.GQ1712166@ZenIV
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/xattr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h
index c7c85bb504ba..2e5aef48fa7e 100644
--- a/include/uapi/linux/xattr.h
+++ b/include/uapi/linux/xattr.h
@@ -23,7 +23,7 @@
 #define XATTR_REPLACE	0x2	/* set value, fail if attr does not exist */
 
 struct xattr_args {
-	__aligned_u64 __user value;
+	__aligned_u64 value;
 	__u32 size;
 	__u32 flags;
 };
-- 
cgit v1.2.3


From 5f9ad16bccd351321d9cd65726fd09390d34b06c Mon Sep 17 00:00:00 2001
From: Tyler Hicks <code@tyhicks.com>
Date: Tue, 23 Dec 2025 13:41:52 -0600
Subject: ecryptfs: Fix improper mknod pairing of
 start_creating()/end_removing()

The ecryptfs_start_creating_dentry() function must be paired with the
end_creating() function. Fix ecryptfs_mknod() so that end_creating() is
properly called in the return path, instead of end_removing().

Fixes: f046fbb4d81d ("ecryptfs: use new start_creating/start_removing APIs")
Signed-off-by: Tyler Hicks <code@tyhicks.com>
Link: https://patch.msgid.link/20251223194153.2818445-2-code@tyhicks.com
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ecryptfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 3978248247dc..e73d9de676a6 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -584,7 +584,7 @@ ecryptfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	fsstack_copy_attr_times(dir, lower_dir);
 	fsstack_copy_inode_size(dir, lower_dir);
 out:
-	end_removing(lower_dentry);
+	end_creating(lower_dentry);
 	if (d_really_is_negative(dentry))
 		d_drop(dentry);
 	return rc;
-- 
cgit v1.2.3


From 5c56afd204ad266d23889ee8823fb65b2c3b63da Mon Sep 17 00:00:00 2001
From: Tyler Hicks <code@tyhicks.com>
Date: Tue, 23 Dec 2025 13:41:53 -0600
Subject: ecryptfs: Release lower parent dentry after creating dir

Fix a mkdir-induced usage count imbalance that tripped a umount_check()
BUG while unmounting the lower filesystem. Commit f046fbb4d81d
("ecryptfs: use new start_creating/start_removing APIs") added a new
dget() of the lower parent dir, in ecryptfs_mkdir(), but did not dput()
the dentry before returning from that function.

The BUG output as seen while running the eCryptfs test suite:

$ ./run_tests.sh -b 131072 -c safe,destructive -f ext4 -K -t lp-926292.sh
...
Running eCryptfs filesystem tests on ext4
lp-926292
------------[ cut here ]------------
BUG: Dentry ffff8e6692d11988{i=c,n=ECRYPTFS_FNEK_ENCRYPTED.FXZuRGZL7QAFtER.JeA46DtdKqkkQx9H2Vpmv234J5CU8YSsrUwZJK4AbXbrN5WkZ348wnqstovKKxA-}  still in use (1) [unmount of ext4 loop0]
WARNING: CPU: 7 PID: 950 at fs/dcache.c:1590 umount_check+0x5e/0x80
Modules linked in: md5 libmd5 ecryptfs encrypted_keys ext4 crc16 mbcache jbd2
CPU: 7 UID: 0 PID: 950 Comm: umount Not tainted 6.18.0-rc1-00013-gf046fbb4d81d #17 PREEMPT(full)
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
RIP: 0010:umount_check+0x5e/0x80
Code: 88 38 06 00 00 48 8b 40 28 4c 8b 08 48 8b 46 68 48 85 c0 74 04 48 8b 50 38 51 48 c7 c7 60 32 9c b5 48 89 f1 e8 43 5e ca ff 90 <0f> 0b 90 90 58 31 c0 e9 46 9d 6c 00 41 83 f8 01 75 b8 eb a3 66 66
RSP: 0018:ffffa19940c4bdd0 EFLAGS: 00010282
RAX: 0000000000000000 RBX: ffff8e6692fad4c0 RCX: 0000000000000000
RDX: 0000000000000004 RSI: ffffa19940c4bc70 RDI: 00000000ffffffff
RBP: ffffffffb4eb5930 R08: 00000000ffffdfff R09: 0000000000000001
R10: 00000000ffffdfff R11: ffffffffb5c8a9e0 R12: ffff8e6692fad4c0
R13: ffff8e6692fad4c0 R14: ffff8e6692d11a40 R15: ffff8e6692d11988
FS:  00007f6b4b491800(0000) GS:ffff8e670506e000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f6b4b5f8d40 CR3: 0000000114eb7001 CR4: 0000000000772ef0
PKRU: 55555554
Call Trace:
 <TASK>
 d_walk+0xfd/0x370
 shrink_dcache_for_umount+0x4d/0x140
 generic_shutdown_super+0x20/0x160
 kill_block_super+0x1a/0x40
 ext4_kill_sb+0x22/0x40 [ext4]
 deactivate_locked_super+0x33/0xa0
 cleanup_mnt+0xba/0x150
 task_work_run+0x5c/0xa0
 exit_to_user_mode_loop+0xac/0xb0
 do_syscall_64+0x2ab/0xfa0
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f6b4b6c2a2b
Code: c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 f3 0f 1e fa 31 f6 e9 05 00 00 00 0f 1f 44 00 00 f3 0f 1e fa b8 a6 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 05 c3 0f 1f 40 00 48 8b 15 b9 83 0d 00 f7 d8
RSP: 002b:00007ffcd5b8b498 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
RAX: 0000000000000000 RBX: 000055b84af0b9e0 RCX: 00007f6b4b6c2a2b
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000055b84af0bdf0
RBP: 00007ffcd5b8b570 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000103 R11: 0000000000000246 R12: 000055b84af0bae0
R13: 0000000000000000 R14: 000055b84af0bdf0 R15: 0000000000000000
 </TASK>
---[ end trace 0000000000000000 ]---
EXT4-fs (loop0): unmounting filesystem 00d9ea41-f61e-43d0-a449-6be03e7e8428.
EXT4-fs (loop0): sb orphan head is 12
sb_info orphan list:
  inode loop0:12 at ffff8e66950e1df0: mode 40700, nlink 0, next 0
Assertion failure in ext4_put_super() at fs/ext4/super.c:1345: 'list_empty(&sbi->s_orphan)'

Fixes: f046fbb4d81d ("ecryptfs: use new start_creating/start_removing APIs")
Signed-off-by: Tyler Hicks <code@tyhicks.com>
Link: https://patch.msgid.link/20251223194153.2818445-3-code@tyhicks.com
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ecryptfs/inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e73d9de676a6..8ab014db3e03 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -533,6 +533,7 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 	fsstack_copy_inode_size(dir, lower_dir);
 	set_nlink(dir, lower_dir->i_nlink);
 out:
+	dput(lower_dir_dentry);
 	end_creating(lower_dentry);
 	if (d_really_is_negative(dentry))
 		d_drop(dentry);
-- 
cgit v1.2.3


From 75ddaa4ddc86d31edb15e50152adf4ddee77a6ba Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 24 Dec 2025 13:00:24 +0100
Subject: pidfs: protect PIDFD_GET_* ioctls() via ifdef

We originally protected PIDFD_GET_<ns-type>_NAMESPACE ioctls() through
ifdefs and recent rework made it possible to drop them. There was an
oversight though. When the relevant namespace is turned off ns->ops will
be NULL so even though opening a file descriptor is perfectly legitimate
it would fail during inode eviction when the file was closed.

The simple fix would be to check ns->ops for NULL and continue allow to
retrieve namespace fds from pidfds but we don't allow retrieving them
when the relevant namespace type is turned off. So keep the
simplification but add the ifdefs back in.

Link: https://lore.kernel.org/20251222214907.GA189632@quark
Link: https://patch.msgid.link/20251224-ununterbrochen-gagen-ea949b83f8f2@brauner
Fixes: a71e4f103aed ("pidfs: simplify PIDFD_GET_<type>_NAMESPACE ioctls")
Tested-by: Brendan Jackman <jackmanb@kernel.org>
Tested-by: Eric Biggers <ebiggers@kernel.org>
Reported-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/pidfs.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/fs/pidfs.c b/fs/pidfs.c
index dba703d4ce4a..1e20e36e0ed5 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -517,14 +517,18 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	switch (cmd) {
 	/* Namespaces that hang of nsproxy. */
 	case PIDFD_GET_CGROUP_NAMESPACE:
+#ifdef CONFIG_CGROUPS
 		if (!ns_ref_get(nsp->cgroup_ns))
 			break;
 		ns_common = to_ns_common(nsp->cgroup_ns);
+#endif
 		break;
 	case PIDFD_GET_IPC_NAMESPACE:
+#ifdef CONFIG_IPC_NS
 		if (!ns_ref_get(nsp->ipc_ns))
 			break;
 		ns_common = to_ns_common(nsp->ipc_ns);
+#endif
 		break;
 	case PIDFD_GET_MNT_NAMESPACE:
 		if (!ns_ref_get(nsp->mnt_ns))
@@ -532,32 +536,43 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		ns_common = to_ns_common(nsp->mnt_ns);
 		break;
 	case PIDFD_GET_NET_NAMESPACE:
+#ifdef CONFIG_NET_NS
 		if (!ns_ref_get(nsp->net_ns))
 			break;
 		ns_common = to_ns_common(nsp->net_ns);
+#endif
 		break;
 	case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
+#ifdef CONFIG_PID_NS
 		if (!ns_ref_get(nsp->pid_ns_for_children))
 			break;
 		ns_common = to_ns_common(nsp->pid_ns_for_children);
+#endif
 		break;
 	case PIDFD_GET_TIME_NAMESPACE:
+#ifdef CONFIG_TIME_NS
 		if (!ns_ref_get(nsp->time_ns))
 			break;
 		ns_common = to_ns_common(nsp->time_ns);
+#endif
 		break;
 	case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
+#ifdef CONFIG_TIME_NS
 		if (!ns_ref_get(nsp->time_ns_for_children))
 			break;
 		ns_common = to_ns_common(nsp->time_ns_for_children);
+#endif
 		break;
 	case PIDFD_GET_UTS_NAMESPACE:
+#ifdef CONFIG_UTS_NS
 		if (!ns_ref_get(nsp->uts_ns))
 			break;
 		ns_common = to_ns_common(nsp->uts_ns);
+#endif
 		break;
 	/* Namespaces that don't hang of nsproxy. */
 	case PIDFD_GET_USER_NAMESPACE:
+#ifdef CONFIG_USER_NS
 		scoped_guard(rcu) {
 			struct user_namespace *user_ns;
 
@@ -566,8 +581,10 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 				break;
 			ns_common = to_ns_common(user_ns);
 		}
+#endif
 		break;
 	case PIDFD_GET_PID_NAMESPACE:
+#ifdef CONFIG_PID_NS
 		scoped_guard(rcu) {
 			struct pid_namespace *pid_ns;
 
@@ -576,6 +593,7 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 				break;
 			ns_common = to_ns_common(pid_ns);
 		}
+#endif
 		break;
 	default:
 		return -ENOIOCTLCMD;
-- 
cgit v1.2.3