diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-08 11:11:51 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-08 11:11:51 -0800 |
| commit | bb93c5ed457fe76597c14717eb994fc5aef22716 (patch) | |
| tree | 99774d9b73c3b7900711d88ee1680d51f23f6b32 | |
| parent | 8c9440fea77440772542d6dbcb5c36182495c164 (diff) | |
| parent | c39e2ae3943d4ee278af4e1b1dcfd5946da1089b (diff) | |
Merge tag 'vfs-6.8.rw' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs rw updates from Christian Brauner:
"This contains updates from Amir for read-write backing file helpers
for stacking filesystems such as overlayfs:
- Fanotify is currently in the process of introducing pre content
events. Roughly, a new permission event will be added indicating
that it is safe to write to the file being accessed. These events
are used by hierarchical storage managers to e.g., fill the content
of files on first access.
During that work we noticed that our current permission checking is
inconsistent in rw_verify_area() and remap_verify_area().
Especially in the splice code permission checking is done multiple
times. For example, one time for the whole range and then again for
partial ranges inside the iterator.
In addition, we mostly do permission checking before we call
file_start_write() except for a few places where we call it after.
For pre-content events we need such permission checking to be done
before file_start_write(). So this is a nice reason to clean this
all up.
After this series, all permission checking is done before
file_start_write().
As part of this cleanup we also massaged the splice code a bit. We
got rid of a few helpers because we are alredy drowning in special
read-write helpers. We also cleaned up the return types for splice
helpers.
- Introduce generic read-write helpers for backing files. This lifts
some overlayfs code to common code so it can be used by the FUSE
passthrough work coming in over the next cycles. Make Amir and
Miklos the maintainers for this new subsystem of the vfs"
* tag 'vfs-6.8.rw' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (30 commits)
fs: fix __sb_write_started() kerneldoc formatting
fs: factor out backing_file_mmap() helper
fs: factor out backing_file_splice_{read,write}() helpers
fs: factor out backing_file_{read,write}_iter() helpers
fs: prepare for stackable filesystems backing file helpers
fsnotify: optionally pass access range in file permission hooks
fsnotify: assert that file_start_write() is not held in permission hooks
fsnotify: split fsnotify_perm() into two hooks
fs: use splice_copy_file_range() inline helper
splice: return type ssize_t from all helpers
fs: use do_splice_direct() for nfsd/ksmbd server-side-copy
fs: move file_start_write() into direct_splice_actor()
fs: fork splice_file_range() from do_splice_direct()
fs: create {sb,file}_write_not_started() helpers
fs: create file_write_started() helper
fs: create __sb_write_started() helper
fs: move kiocb_start_write() into vfs_iocb_iter_write()
fs: move permission hook out of do_iter_read()
fs: move permission hook out of do_iter_write()
fs: move file_start_write() into vfs_iter_write()
...
| -rw-r--r-- | MAINTAINERS | 9 | ||||
| -rw-r--r-- | drivers/block/loop.c | 2 | ||||
| -rw-r--r-- | fs/Kconfig | 4 | ||||
| -rw-r--r-- | fs/Makefile | 1 | ||||
| -rw-r--r-- | fs/backing-file.c | 336 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 12 | ||||
| -rw-r--r-- | fs/cachefiles/io.c | 5 | ||||
| -rw-r--r-- | fs/ceph/file.c | 13 | ||||
| -rw-r--r-- | fs/coda/file.c | 2 | ||||
| -rw-r--r-- | fs/fuse/file.c | 5 | ||||
| -rw-r--r-- | fs/internal.h | 8 | ||||
| -rw-r--r-- | fs/nfs/nfs4file.c | 5 | ||||
| -rw-r--r-- | fs/nfsd/vfs.c | 7 | ||||
| -rw-r--r-- | fs/open.c | 42 | ||||
| -rw-r--r-- | fs/overlayfs/Kconfig | 1 | ||||
| -rw-r--r-- | fs/overlayfs/copy_up.c | 30 | ||||
| -rw-r--r-- | fs/overlayfs/file.c | 247 | ||||
| -rw-r--r-- | fs/overlayfs/overlayfs.h | 8 | ||||
| -rw-r--r-- | fs/overlayfs/super.c | 11 | ||||
| -rw-r--r-- | fs/read_write.c | 235 | ||||
| -rw-r--r-- | fs/readdir.c | 4 | ||||
| -rw-r--r-- | fs/remap_range.c | 45 | ||||
| -rw-r--r-- | fs/smb/client/cifsfs.c | 5 | ||||
| -rw-r--r-- | fs/splice.c | 243 | ||||
| -rw-r--r-- | include/linux/backing-file.h | 42 | ||||
| -rw-r--r-- | include/linux/fs.h | 71 | ||||
| -rw-r--r-- | include/linux/fsnotify.h | 50 | ||||
| -rw-r--r-- | include/linux/splice.h | 51 | ||||
| -rw-r--r-- | io_uring/splice.c | 4 | ||||
| -rw-r--r-- | security/security.c | 10 |
30 files changed, 941 insertions, 567 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 5898fcb8640b..550d98cf14c6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8143,6 +8143,15 @@ S: Supported F: fs/iomap/ F: include/linux/iomap.h +FILESYSTEMS [STACKABLE] +M: Miklos Szeredi <miklos@szeredi.hu> +M: Amir Goldstein <amir73il@gmail.com> +L: linux-fsdevel@vger.kernel.org +L: linux-unionfs@vger.kernel.org +S: Maintained +F: fs/backing-file.c +F: include/linux/backing-file.h + FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER M: Riku Voipio <riku.voipio@iki.fi> L: linux-hwmon@vger.kernel.org diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 9f2d412fc560..8a8cd4fc9238 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -245,9 +245,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) iov_iter_bvec(&i, ITER_SOURCE, bvec, 1, bvec->bv_len); - file_start_write(file); bw = vfs_iter_write(file, &i, ppos, 0); - file_end_write(file); if (likely(bw == bvec->bv_len)) return 0; diff --git a/fs/Kconfig b/fs/Kconfig index 42837617a55b..231c7703793a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -18,6 +18,10 @@ config VALIDATE_FS_PARSER config FS_IOMAP bool +# Stackable filesystems +config FS_STACK + bool + config BUFFER_HEAD bool diff --git a/fs/Makefile b/fs/Makefile index 75522f88e763..a6962c588962 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o +obj-$(CONFIG_FS_STACK) += backing-file.o obj-$(CONFIG_FS_MBCACHE) += mbcache.o obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o obj-$(CONFIG_NFS_COMMON) += nfs_common/ diff --git a/fs/backing-file.c b/fs/backing-file.c new file mode 100644 index 000000000000..a681f38d84d8 --- /dev/null +++ b/fs/backing-file.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Common helpers for stackable filesystems and backing files. + * + * Forked from fs/overlayfs/file.c. + * + * Copyright (C) 2017 Red Hat, Inc. + * Copyright (C) 2023 CTERA Networks. + */ + +#include <linux/fs.h> +#include <linux/backing-file.h> +#include <linux/splice.h> +#include <linux/mm.h> + +#include "internal.h" + +/** + * backing_file_open - open a backing file for kernel internal use + * @user_path: path that the user reuqested to open + * @flags: open flags + * @real_path: path of the backing file + * @cred: credentials for open + * + * Open a backing file for a stackable filesystem (e.g., overlayfs). + * @user_path may be on the stackable filesystem and @real_path on the + * underlying filesystem. In this case, we want to be able to return the + * @user_path of the stackable filesystem. This is done by embedding the + * returned file into a container structure that also stores the stacked + * file's path, which can be retrieved using backing_file_user_path(). + */ +struct file *backing_file_open(const struct path *user_path, int flags, + const struct path *real_path, + const struct cred *cred) +{ + struct file *f; + int error; + + f = alloc_empty_backing_file(flags, cred); + if (IS_ERR(f)) + return f; + + path_get(user_path); + *backing_file_user_path(f) = *user_path; + error = vfs_open(real_path, f); + if (error) { + fput(f); + f = ERR_PTR(error); + } + + return f; +} +EXPORT_SYMBOL_GPL(backing_file_open); + +struct backing_aio { + struct kiocb iocb; + refcount_t ref; + struct kiocb *orig_iocb; + /* used for aio completion */ + void (*end_write)(struct file *); + struct work_struct work; + long res; +}; + +static struct kmem_cache *backing_aio_cachep; + +#define BACKING_IOCB_MASK \ + (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND) + +static rwf_t iocb_to_rw_flags(int flags) +{ + return (__force rwf_t)(flags & BACKING_IOCB_MASK); +} + +static void backing_aio_put(struct backing_aio *aio) +{ + if (refcount_dec_and_test(&aio->ref)) { + fput(aio->iocb.ki_filp); + kmem_cache_free(backing_aio_cachep, aio); + } +} + +static void backing_aio_cleanup(struct backing_aio *aio, long res) +{ + struct kiocb *iocb = &aio->iocb; + struct kiocb *orig_iocb = aio->orig_iocb; + + if (aio->end_write) + aio->end_write(orig_iocb->ki_filp); + + orig_iocb->ki_pos = iocb->ki_pos; + backing_aio_put(aio); +} + +static void backing_aio_rw_complete(struct kiocb *iocb, long res) +{ + struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb); + struct kiocb *orig_iocb = aio->orig_iocb; + + if (iocb->ki_flags & IOCB_WRITE) + kiocb_end_write(iocb); + + backing_aio_cleanup(aio, res); + orig_iocb->ki_complete(orig_iocb, res); +} + +static void backing_aio_complete_work(struct work_struct *work) +{ + struct backing_aio *aio = container_of(work, struct backing_aio, work); + + backing_aio_rw_complete(&aio->iocb, aio->res); +} + +static void backing_aio_queue_completion(struct kiocb *iocb, long res) +{ + struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb); + + /* + * Punt to a work queue to serialize updates of mtime/size. + */ + aio->res = res; + INIT_WORK(&aio->work, backing_aio_complete_work); + queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq, + &aio->work); +} + +static int backing_aio_init_wq(struct kiocb *iocb) +{ + struct super_block *sb = file_inode(iocb->ki_filp)->i_sb; + + if (sb->s_dio_done_wq) + return 0; + + return sb_init_dio_done_wq(sb); +} + + +ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, + struct kiocb *iocb, int flags, + struct backing_file_ctx *ctx) +{ + struct backing_aio *aio = NULL; + const struct cred *old_cred; + ssize_t ret; + + if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) + return -EIO; + + if (!iov_iter_count(iter)) + return 0; + + if (iocb->ki_flags & IOCB_DIRECT && + !(file->f_mode & FMODE_CAN_ODIRECT)) + return -EINVAL; + + old_cred = override_creds(ctx->cred); + if (is_sync_kiocb(iocb)) { + rwf_t rwf = iocb_to_rw_flags(flags); + + ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf); + } else { + ret = -ENOMEM; + aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL); + if (!aio) + goto out; + + aio->orig_iocb = iocb; + kiocb_clone(&aio->iocb, iocb, get_file(file)); + aio->iocb.ki_complete = backing_aio_rw_complete; + refcount_set(&aio->ref, 2); + ret = vfs_iocb_iter_read(file, &aio->iocb, iter); + backing_aio_put(aio); + if (ret != -EIOCBQUEUED) + backing_aio_cleanup(aio, ret); + } +out: + revert_creds(old_cred); + + if (ctx->accessed) + ctx->accessed(ctx->user_file); + + return ret; +} +EXPORT_SYMBOL_GPL(backing_file_read_iter); + +ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter, + struct kiocb *iocb, int flags, + struct backing_file_ctx *ctx) +{ + const struct cred *old_cred; + ssize_t ret; + + if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) + return -EIO; + + if (!iov_iter_count(iter)) + return 0; + + ret = file_remove_privs(ctx->user_file); + if (ret) + return ret; + + if (iocb->ki_flags & IOCB_DIRECT && + !(file->f_mode & FMODE_CAN_ODIRECT)) + return -EINVAL; + + /* + * Stacked filesystems don't support deferred completions, don't copy + * this property in case it is set by the issuer. + */ + flags &= ~IOCB_DIO_CALLER_COMP; + + old_cred = override_creds(ctx->cred); + if (is_sync_kiocb(iocb)) { + rwf_t rwf = iocb_to_rw_flags(flags); + + ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf); + if (ctx->end_write) + ctx->end_write(ctx->user_file); + } else { + struct backing_aio *aio; + + ret = backing_aio_init_wq(iocb); + if (ret) + goto out; + + ret = -ENOMEM; + aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL); + if (!aio) + goto out; + + aio->orig_iocb = iocb; + aio->end_write = ctx->end_write; + kiocb_clone(&aio->iocb, iocb, get_file(file)); + aio->iocb.ki_flags = flags; + aio->iocb.ki_complete = backing_aio_queue_completion; + refcount_set(&aio->ref, 2); + ret = vfs_iocb_iter_write(file, &aio->iocb, iter); + backing_aio_put(aio); + if (ret != -EIOCBQUEUED) + backing_aio_cleanup(aio, ret); + } +out: + revert_creds(old_cred); + + return ret; +} +EXPORT_SYMBOL_GPL(backing_file_write_iter); + +ssize_t backing_file_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags, + struct backing_file_ctx *ctx) +{ + const struct cred *old_cred; + ssize_t ret; + + if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING))) + return -EIO; + + old_cred = override_creds(ctx->cred); + ret = vfs_splice_read(in, ppos, pipe, len, flags); + revert_creds(old_cred); + + if (ctx->accessed) + ctx->accessed(ctx->user_file); + + return ret; +} +EXPORT_SYMBOL_GPL(backing_file_splice_read); + +ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, size_t len, + unsigned int flags, + struct backing_file_ctx *ctx) +{ + const struct cred *old_cred; + ssize_t ret; + + if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING))) + return -EIO; + + ret = file_remove_privs(ctx->user_file); + if (ret) + return ret; + + old_cred = override_creds(ctx->cred); + file_start_write(out); + ret = iter_file_splice_write(pipe, out, ppos, len, flags); + file_end_write(out); + revert_creds(old_cred); + + if (ctx->end_write) + ctx->end_write(ctx->user_file); + + return ret; +} +EXPORT_SYMBOL_GPL(backing_file_splice_write); + +int backing_file_mmap(struct file *file, struct vm_area_struct *vma, + struct backing_file_ctx *ctx) +{ + const struct cred *old_cred; + int ret; + + if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) || + WARN_ON_ONCE(ctx->user_file != vma->vm_file)) + return -EIO; + + if (!file->f_op->mmap) + return -ENODEV; + + vma_set_file(vma, file); + + old_cred = override_creds(ctx->cred); + ret = call_mmap(vma->vm_file, vma); + revert_creds(old_cred); + + if (ctx->accessed) + ctx->accessed(ctx->user_file); + + return ret; +} +EXPORT_SYMBOL_GPL(backing_file_mmap); + +static int __init backing_aio_init(void) +{ + backing_aio_cachep = kmem_cache_create("backing_aio", + sizeof(struct backing_aio), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!backing_aio_cachep) + return -ENOMEM; + + return 0; +} +fs_initcall(backing_aio_init); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a1743904202b..41b479861b3c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4533,29 +4533,29 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool if (ret < 0) goto out_acct; - file_start_write(file); - if (iov_iter_count(&iter) == 0) { ret = 0; - goto out_end_write; + goto out_iov; } pos = args.offset; ret = rw_verify_area(WRITE, file, &pos, args.len); if (ret < 0) - goto out_end_write; + goto out_iov; init_sync_kiocb(&kiocb, file); ret = kiocb_set_rw_flags(&kiocb, 0); if (ret) - goto out_end_write; + goto out_iov; kiocb.ki_pos = pos; + file_start_write(file); + ret = btrfs_do_write_iter(&kiocb, &iter, &args); if (ret > 0) fsnotify_modify(file); -out_end_write: file_end_write(file); +out_iov: kfree(iov); out_acct: if (ret > 0) diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index 009d23cd435b..5857241c5918 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -259,7 +259,8 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret) _enter("%ld", ret); - kiocb_end_write(iocb); + if (ki->was_async) + kiocb_end_write(iocb); if (ret < 0) trace_cachefiles_io_error(object, inode, ret, @@ -319,8 +320,6 @@ int __cachefiles_write(struct cachefiles_object *object, ki->iocb.ki_complete = cachefiles_write_complete; atomic_long_add(ki->b_writing, &cache->b_writing); - kiocb_start_write(&ki->iocb); - get_file(ki->iocb.ki_filp); cachefiles_grab_object(object, cachefiles_obj_get_ioreq); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 3b5aae29e944..d380d9dad0e0 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -12,6 +12,7 @@ #include <linux/falloc.h> #include <linux/iversion.h> #include <linux/ktime.h> +#include <linux/splice.h> #include "super.h" #include "mds_client.h" @@ -3010,8 +3011,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, * {read,write}_iter, which will get caps again. */ put_rd_wr_caps(src_ci, src_got, dst_ci, dst_got); - ret = do_splice_direct(src_file, &src_off, dst_file, - &dst_off, src_objlen, flags); + ret = splice_file_range(src_file, &src_off, dst_file, &dst_off, + src_objlen); /* Abort on short copies or on error */ if (ret < (long)src_objlen) { doutc(cl, "Failed partial copy (%zd)\n", ret); @@ -3065,8 +3066,8 @@ out_caps: */ if (len && (len < src_ci->i_layout.object_size)) { doutc(cl, "Final partial copy of %zu bytes\n", len); - bytes = do_splice_direct(src_file, &src_off, dst_file, - &dst_off, len, flags); + bytes = splice_file_range(src_file, &src_off, dst_file, + &dst_off, len); if (bytes > 0) ret += bytes; else @@ -3089,8 +3090,8 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, len, flags); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src_file, src_off, dst_file, - dst_off, len, flags); + ret = splice_copy_file_range(src_file, src_off, dst_file, + dst_off, len); return ret; } diff --git a/fs/coda/file.c b/fs/coda/file.c index 16acc58311ea..148856a582a9 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -79,14 +79,12 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to) if (ret) goto finish_write; - file_start_write(host_file); inode_lock(coda_inode); ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos, 0); coda_inode->i_size = file_inode(host_file)->i_size; coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9; inode_set_mtime_to_ts(coda_inode, inode_set_ctime_current(coda_inode)); inode_unlock(coda_inode); - file_end_write(host_file); finish_write: venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode), diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a660f1f21540..148a71b8b4d0 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -19,6 +19,7 @@ #include <linux/uio.h> #include <linux/fs.h> #include <linux/filelock.h> +#include <linux/splice.h> static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, unsigned int open_flags, int opcode, @@ -3195,8 +3196,8 @@ static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off, len, flags); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src_file, src_off, dst_file, - dst_off, len, flags); + ret = splice_copy_file_range(src_file, src_off, dst_file, + dst_off, len); return ret; } diff --git a/fs/internal.h b/fs/internal.h index 3a970a0644ce..bf2ee2e0d45d 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -244,10 +244,10 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags, /* * fs/splice.c: */ -long splice_file_to_pipe(struct file *in, - struct pipe_inode_info *opipe, - loff_t *offset, - size_t len, unsigned int flags); +ssize_t splice_file_to_pipe(struct file *in, + struct pipe_inode_info *opipe, + loff_t *offset, + size_t len, unsigned int flags); /* * fs/xattr.c: diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 02788c3c85e5..e238abc78a13 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -10,6 +10,7 @@ #include <linux/mount.h> #include <linux/nfs_fs.h> #include <linux/nfs_ssc.h> +#include <linux/splice.h> #include "delegation.h" #include "internal.h" #include "iostat.h" @@ -195,8 +196,8 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, ret = __nfs4_copy_file_range(file_in, pos_in, file_out, pos_out, count, flags); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(file_in, pos_in, file_out, - pos_out, count, flags); + ret = splice_copy_file_range(file_in, pos_in, file_out, + pos_out, count); return ret; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e01e4e2acbd9..707ef21f275b 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1039,7 +1039,10 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, ssize_t host_err; trace_nfsd_read_splice(rqstp, fhp, offset, *count); - host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); + host_err = rw_verify_area(READ, file, &offset, *count); + if (!host_err) + host_err = splice_direct_to_actor(file, &sd, + nfsd_direct_splice_actor); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } @@ -1176,9 +1179,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, since = READ_ONCE(file->f_wb_err); if (verf) nfsd_copy_write_verifier(verf, nn); - file_start_write(file); host_err = vfs_iter_write(file, &iter, &pos, flags); - file_end_write(file); if (host_err < 0) { commit_reset_write_verifier(nn, rqstp, host_err); goto out_nfserr; diff --git a/fs/open.c b/fs/open.c index 954d8fcbb635..a84d21e55c39 100644 --- a/fs/open.c +++ b/fs/open.c @@ -304,6 +304,10 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (ret) return ret; + ret = fsnotify_file_area_perm(file, MAY_WRITE, &offset, len); + if (ret) + return ret; + if (S_ISFIFO(inode->i_mode)) return -ESPIPE; @@ -1178,44 +1182,6 @@ struct file *kernel_file_open(const struct path *path, int flags, } EXPORT_SYMBOL_GPL(kernel_file_open); -/** - * backing_file_open - open a backing file for kernel internal use - * @user_path: path that the user reuqested to open - * @flags: open flags - * @real_path: path of the backing file - * @cred: credentials for open - * - * Open a backing file for a stackable filesystem (e.g., overlayfs). - * @user_path may be on the stackable filesystem and @real_path on the - * underlying filesystem. In this case, we want to be able to return the - * @user_path of the stackable filesystem. This is done by embedding the - * returned file into a container structure that also stores the stacked - * file's path, which can be retrieved using backing_file_user_path(). - */ -struct file *backing_file_open(const struct path *user_path, int flags, - const struct path *real_path, - const struct cred *cred) -{ - struct file *f; - int error; - - f = alloc_empty_backing_file(flags, cred); - if (IS_ERR(f)) - return f; - - path_get(user_path); - *backing_file_user_path(f) = *user_path; - f->f_path = *real_path; - error = do_dentry_open(f, d_inode(real_path->dentry), NULL); - if (error) { - fput(f); - f = ERR_PTR(error); - } - - return f; -} -EXPORT_SYMBOL_GPL(backing_file_open); - #define WILL_CREATE(flags) (flags & (O_CREAT | __O_TMPFILE)) #define O_PATH_FLAGS (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC) diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index fec5020c3495..2ac67e04a6fb 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config OVERLAY_FS tristate "Overlay filesystem support" + select FS_STACK select EXPORTFS help An overlay filesystem combines two filesystems - an 'upper' filesystem diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 8bea66c97316..45cadc3aed85 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -230,6 +230,19 @@ static int ovl_copy_fileattr(struct inode *inode, const struct path *old, return ovl_real_fileattr_set(new, &newfa); } +static int ovl_verify_area(loff_t pos, loff_t pos2, loff_t len, loff_t totlen) +{ + loff_t tmp; + + if (WARN_ON_ONCE(pos != pos2)) + return -EIO; + if (WARN_ON_ONCE(pos < 0 || len < 0 || totlen < 0)) + return -EIO; + if (WARN_ON_ONCE(check_add_overflow(pos, len, &tmp))) + return -EIO; + return 0; +} + static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, struct file *new_file, loff_t len) { @@ -244,13 +257,20 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, int error = 0; ovl_path_lowerdata(dentry, &datapath); - if (WARN_ON(datapath.dentry == NULL)) + if (WARN_ON_ONCE(datapath.dentry == NULL) || + WARN_ON_ONCE(len < 0)) return -EIO; old_file = ovl_path_open(&datapath, O_LARGEFILE | O_RDONLY); if (IS_ERR(old_file)) return PTR_ERR(old_file); + error = rw_verify_area(READ, old_file, &old_pos, len); + if (!error) + error = rw_verify_area(WRITE, new_file, &new_pos, len); + if (error) + goto out_fput; + /* Try to use clone_file_range to clone up within the same fs */ ovl_start_write(dentry); cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0); @@ -265,7 +285,7 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, while (len) { size_t this_len = OVL_COPY_UP_CHUNK_SIZE; - long bytes; + ssize_t bytes; if (len < this_len) this_len = len; @@ -309,11 +329,13 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, } } - ovl_start_write(dentry); + error = ovl_verify_area(old_pos, new_pos, this_len, len); + if (error) + break; + bytes = do_splice_direct(old_file, &old_pos, new_file, &new_pos, this_len, SPLICE_F_MOVE); - ovl_end_write(dentry); if (bytes <= 0) { error = bytes; break; diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 131621daeb13..05536964d37f 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -9,25 +9,11 @@ #include <linux/xattr.h> #include <linux/uio.h> #include <linux/uaccess.h> -#include <linux/splice.h> #include <linux/security.h> -#include <linux/mm.h> #include <linux/fs.h> +#include <linux/backing-file.h> #include "overlayfs.h" -#include "../internal.h" /* for sb_init_dio_done_wq */ - -struct ovl_aio_req { - struct kiocb iocb; - refcount_t ref; - struct kiocb *orig_iocb; - /* used for aio completion */ - struct work_struct work; - long res; -}; - -static struct kmem_cache *ovl_aio_request_cachep; - static char ovl_whatisit(struct inode *inode, struct inode *realinode) { if (realinode != ovl_inode_upper(inode)) @@ -274,83 +260,16 @@ static void ovl_file_accessed(struct file *file) touch_atime(&file->f_path); } -#define OVL_IOCB_MASK \ - (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND) - -static rwf_t iocb_to_rw_flags(int flags) -{ - return (__force rwf_t)(flags & OVL_IOCB_MASK); -} - -static inline void ovl_aio_put(struct ovl_aio_req *aio_req) -{ - if (refcount_dec_and_test(&aio_req->ref)) { - fput(aio_req->iocb.ki_filp); - kmem_cache_free(ovl_aio_request_cachep, aio_req); - } -} - -static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) -{ - struct kiocb *iocb = &aio_req->iocb; - struct kiocb * |
