diff options
64 files changed, 281 insertions, 187 deletions
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 4f34749126f5..85f590254f07 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1273,3 +1273,15 @@ to have them set. Better yet, think hard whether you need different ->d_op for different dentries - if not, just use set_default_d_op() at mount time and be done with that. Currently procfs is the only thing that really needs ->d_op varying between dentries. + +--- + +**highly recommended** + +The file operations mmap() callback is deprecated in favour of +mmap_prepare(). This passes a pointer to a vm_area_desc to the callback +rather than a VMA, as the VMA at this stage is not yet valid. + +The vm_area_desc provides the minimum required information for a filesystem +to initialise state upon memory mapping of a file-backed region, and output +parameters for the file system to set this state. diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index 57604b07bdc9..d154d6fd0931 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -1072,12 +1072,14 @@ This describes how the VFS can manipulate an open file. As of kernel struct file_operations { struct module *owner; + fop_flags_t fop_flags; loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); - int (*iopoll)(struct kiocb *kiocb, bool spin); + int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *, + unsigned int flags); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); @@ -1094,18 +1096,24 @@ This describes how the VFS can manipulate an open file. As of kernel int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); - int (*setlease)(struct file *, long, struct file_lock **, void **); + void (*splice_eof)(struct file *file); + int (*setlease)(struct file *, int, struct file_lease **, void **); long (*fallocate)(struct file *file, int mode, loff_t offset, loff_t len); void (*show_fdinfo)(struct seq_file *m, struct file *f); #ifndef CONFIG_MMU unsigned (*mmap_capabilities)(struct file *); #endif - ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); + ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, + loff_t, size_t, unsigned int); loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); + int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags); + int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *, + unsigned int poll_flags); + int (*mmap_prepare)(struct vm_area_desc *); }; Again, all methods are called without any locks being held, unless @@ -1145,7 +1153,8 @@ otherwise noted. used on 64 bit kernels. ``mmap`` - called by the mmap(2) system call + called by the mmap(2) system call. Deprecated in favour of + ``mmap_prepare``. ``open`` called by the VFS when an inode should be opened. When the VFS @@ -1222,6 +1231,11 @@ otherwise noted. ``fadvise`` possibly called by the fadvise64() system call. +``mmap_prepare`` + Called by the mmap(2) system call. Allows a VFS to set up a + file-backed memory mapping, most notably establishing relevant + private state and VMA callbacks. + Note that the file operations are implemented by the specific filesystem in which the inode resides. When opening a device node (character or block special) most filesystems will call special diff --git a/block/fops.c b/block/fops.c index 16c8d4593d57..14f706d3af6e 100644 --- a/block/fops.c +++ b/block/fops.c @@ -920,14 +920,14 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, return error; } -static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) +static int blkdev_mmap_prepare(struct vm_area_desc *desc) { - struct inode *bd_inode = bdev_file_inode(file); + struct file *file = desc->file; - if (bdev_read_only(I_BDEV(bd_inode))) - return generic_file_readonly_mmap(file, vma); + if (bdev_read_only(I_BDEV(bdev_file_inode(file)))) + return generic_file_readonly_mmap_prepare(desc); - return generic_file_mmap(file, vma); + return generic_file_mmap_prepare(desc); } const struct file_operations def_blk_fops = { @@ -937,7 +937,7 @@ const struct file_operations def_blk_fops = { .read_iter = blkdev_read_iter, .write_iter = blkdev_write_iter, .iopoll = iocb_bio_iopoll, - .mmap = blkdev_mmap, + .mmap_prepare = blkdev_mmap_prepare, .fsync = blkdev_fsync, .unlocked_ioctl = blkdev_ioctl, #ifdef CONFIG_COMPAT diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 05e440643aa2..f4f1c979d1b9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -105,7 +105,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct * if (!obj->base.filp) return -ENODEV; - ret = call_mmap(obj->base.filp, vma); + ret = vfs_mmap(obj->base.filp, vma); if (ret) return ret; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 348cc90bf9c5..eb0b083da269 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -454,9 +454,10 @@ int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end, } static int -v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma) +v9fs_file_mmap_prepare(struct vm_area_desc *desc) { int retval; + struct file *filp = desc->file; struct inode *inode = file_inode(filp); struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); @@ -464,12 +465,12 @@ v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma) if (!(v9ses->cache & CACHE_WRITEBACK)) { p9_debug(P9_DEBUG_CACHE, "(read-only mmap mode)"); - return generic_file_readonly_mmap(filp, vma); + return generic_file_readonly_mmap_prepare(desc); } - retval = generic_file_mmap(filp, vma); + retval = generic_file_mmap_prepare(desc); if (!retval) - vma->vm_ops = &v9fs_mmap_file_vm_ops; + desc->vm_ops = &v9fs_mmap_file_vm_ops; return retval; } @@ -516,7 +517,7 @@ const struct file_operations v9fs_file_operations = { .open = v9fs_file_open, .release = v9fs_dir_release, .lock = v9fs_file_lock, - .mmap = generic_file_readonly_mmap, + .mmap_prepare = generic_file_readonly_mmap_prepare, .splice_read = v9fs_file_splice_read, .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync, @@ -531,7 +532,7 @@ const struct file_operations v9fs_file_operations_dotl = { .release = v9fs_dir_release, .lock = v9fs_file_lock_dotl, .flock = v9fs_file_flock_dotl, - .mmap = v9fs_file_mmap, + .mmap_prepare = v9fs_file_mmap_prepare, .splice_read = v9fs_file_splice_read, .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync_dotl, diff --git a/fs/adfs/file.c b/fs/adfs/file.c index ee80718aaeec..cd13165fd904 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c @@ -25,7 +25,7 @@ const struct file_operations adfs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, - .mmap = generic_file_mmap, + .mmap_prepare = generic_file_mmap_prepare, .fsync = generic_file_fsync, .write_iter = generic_file_write_iter, .splice_read = filemap_splice_read, diff --git a/fs/affs/file.c b/fs/affs/file.c index 219ea0353906..765c3443663e 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -1003,7 +1003,7 @@ const struct file_operations affs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, - .mmap = generic_file_mmap, + .mmap_prepare = generic_file_mmap_prepare, .open = affs_file_open, .release = affs_file_release, .fsync = affs_file_fsync, diff --git a/fs/afs/file.c b/fs/afs/file.c index fc15497608c6..f66a92294284 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -19,7 +19,7 @@ #include <trace/events/netfs.h> #include "internal.h" -static int afs_file_mmap(struct file *file, struct vm_area_struct *vma); +static int afs_file_mmap_prepare(struct vm_area_desc *desc); static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos, @@ -35,7 +35,7 @@ const struct file_operations afs_file_operations = { .llseek = generic_file_llseek, .read_iter = afs_file_read_iter, .write_iter = netfs_file_write_iter, - .mmap = afs_file_mmap, + .mmap_prepare = afs_file_mmap_prepare, .splice_read = afs_file_splice_read, .splice_write = iter_file_splice_write, .fsync = afs_fsync, @@ -492,16 +492,16 @@ static void afs_drop_open_mmap(struct afs_vnode *vnode) /* * Handle setting up a memory mapping on an AFS file. */ -static int afs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int afs_file_mmap_prepare(struct vm_area_desc *desc) { - struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); + struct afs_vnode *vnode = AFS_FS_I(file_inode(desc->file)); int ret; afs_add_open_mmap(vnode); - ret = generic_file_mmap(file, vma); + ret = generic_file_mmap_prepare(desc); if (ret == 0) - vma->vm_ops = &afs_vm_ops; + desc->vm_ops = &afs_vm_ops; else afs_drop_open_mmap(vnode); return ret; @@ -392,15 +392,15 @@ static const struct vm_operations_struct aio_ring_vm_ops = { #endif }; -static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) +static int aio_ring_mmap_prepare(struct vm_area_desc *desc) { - vm_flags_set(vma, VM_DONTEXPAND); - vma->vm_ops = &aio_ring_vm_ops; + desc->vm_flags |= VM_DONTEXPAND; + desc->vm_ops = &aio_ring_vm_ops; return 0; } static const struct file_operations aio_ring_fops = { - .mmap = aio_ring_mmap, + .mmap_prepare = aio_ring_mmap_prepare, }; #if IS_ENABLED(CONFIG_MIGRATION) diff --git a/fs/backing-file.c b/fs/backing-file.c index 8c7396bff121..15a7f8031084 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -333,13 +333,13 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma, if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) return -EIO; - if (!file->f_op->mmap) + if (!can_mmap_file(file)) return -ENODEV; vma_set_file(vma, file); old_cred = override_creds(ctx->cred); - ret = call_mmap(vma->vm_file, vma); + ret = vfs_mmap(vma->vm_file, vma); revert_creds(old_cred); if (ctx->accessed) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index e54e4f255b22..159119079a88 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1551,11 +1551,11 @@ static const struct vm_operations_struct bch_vm_ops = { .page_mkwrite = bch2_page_mkwrite, }; -static int bch2_mmap(struct file *file, struct vm_area_struct *vma) +static int bch2_mmap_prepare(struct vm_area_desc *desc) { - file_accessed(file); + file_accessed(desc->file); - vma->vm_ops = &bch_vm_ops; + desc->vm_ops = &bch_vm_ops; return 0; } @@ -1739,7 +1739,7 @@ static const struct file_operations bch_file_operations = { .llseek = bch2_llseek, .read_iter = bch2_read_iter, .write_iter = bch2_write_iter, - .mmap = bch2_mmap, + .mmap_prepare = bch2_mmap_prepare, .get_unmapped_area = thp_get_unmapped_area, .fsync = bch2_fsync, .splice_read = filemap_splice_read, diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 10dc0151ea55..d33d6bde992b 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -27,7 +27,7 @@ const struct file_operations bfs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, - .mmap = generic_file_mmap, + .mmap_prepare = generic_file_mmap_prepare, .splice_read = filemap_splice_read, }; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index a43363d593e5..e3b56b603192 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -646,7 +646,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, if (!elf_check_arch(interp_elf_ex) || elf_check_fdpic(interp_elf_ex)) goto out; - if (!interpreter->f_op->mmap) + if (!can_mmap_file(interpreter)) goto out; total_size = total_mapping_size(interp_elf_phdata, @@ -848,7 +848,7 @@ static int load_elf_binary(struct linux_binprm *bprm) goto out; if (elf_check_fdpic(elf_ex)) goto out; - if (!bprm->file->f_op->mmap) + if (!can_mmap_file(bprm->file)) goto out; elf_phdata = load_elf_phdrs(elf_ex, bprm->file); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 9133f3827f90..59b138062352 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -109,7 +109,7 @@ static int is_elf(struct elfhdr *hdr, struct file *file) return 0; if (!elf_check_arch(hdr)) return 0; - if (!file->f_op->mmap) + if (!can_mmap_file(file)) return 0; return 1; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index bc1e00db96c9..204674934795 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2039,15 +2039,16 @@ static const struct vm_operations_struct btrfs_file_vm_ops = { .page_mkwrite = btrfs_page_mkwrite, }; -static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) +static int btrfs_file_mmap_prepare(struct vm_area_desc *desc) { + struct file *filp = desc->file; struct address_space *mapping = filp->f_mapping; if (!mapping->a_ops->read_folio) return -ENOEXEC; file_accessed(filp); - vma->vm_ops = &btrfs_file_vm_ops; + desc->vm_ops = &btrfs_file_vm_ops; return 0; } @@ -3821,7 +3822,7 @@ const struct file_operations btrfs_file_operations = { .splice_read = filemap_splice_read, .write_iter = btrfs_file_write_iter, .splice_write = iter_file_splice_write, - .mmap = btrfs_file_mmap, + .mmap_prepare = btrfs_file_mmap_prepare, .open = btrfs_file_open, .release = btrfs_release_file, .get_unmapped_area = thp_get_unmapped_area, diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 02468c848cce..8b202d789e93 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -2334,13 +2334,13 @@ static const struct vm_operations_struct ceph_vmops = { .page_mkwrite = ceph_page_mkwrite, }; -int ceph_mmap(struct file *file, struct vm_area_struct *vma) +int ceph_mmap_prepare(struct vm_area_desc *desc) { - struct address_space *mapping = file->f_mapping; + struct address_space *mapping = desc->file->f_mapping; if (!mapping->a_ops->read_folio) return -ENOEXEC; - vma->vm_ops = &ceph_vmops; + desc->vm_ops = &ceph_vmops; return 0; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index a7254cab44cc..8c06dc4655a8 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -3171,7 +3171,7 @@ const struct file_operations ceph_file_fops = { .llseek = ceph_llseek, .read_iter = ceph_read_iter, .write_iter = ceph_write_iter, - .mmap = ceph_mmap, + .mmap_prepare = ceph_mmap_prepare, .fsync = ceph_fsync, .lock = ceph_lock, .setlease = simple_nosetlease, diff --git a/fs/ceph/super.h b/fs/ceph/super.h index bb0db0cc8003..cf176aab0f82 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1286,7 +1286,7 @@ extern void __ceph_touch_fmode(struct ceph_inode_info *ci, /* addr.c */ extern const struct address_space_operations ceph_aops; extern const struct netfs_request_ops ceph_netfs_ops; -extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); +int ceph_mmap_prepare(struct vm_area_desc *desc); extern int ceph_uninline_data(struct file *file); extern int ceph_pool_perm_check(struct inode *inode, int need); extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc); diff --git a/fs/coda/file.c b/fs/coda/file.c index 148856a582a9..a390b5d21196 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -160,7 +160,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) size_t count; int ret; - if (!host_file->f_op->mmap) + if (!can_mmap_file(host_file)) return -ENODEV; if (WARN_ON(coda_file != vma->vm_file)) @@ -199,10 +199,10 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) spin_unlock(&cii->c_lock); vma->vm_file = get_file(host_file); - ret = call_mmap(vma->vm_file, vma); + ret = vfs_mmap(vma->vm_file, vma); if (ret) { - /* if call_mmap fails, our caller will put host_file so we + /* if vfs_mmap fails, our caller will put host_file so we * should drop the reference to the coda_file that we got. */ fput(coda_file); diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index ce0a3c5ed0ca..5f8f96da09fe 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -193,7 +193,7 @@ static int ecryptfs_mmap(struct file *file, struct vm_area_struct *vma) * natively. If FILESYSTEM_MAX_STACK_DEPTH > 2 or ecryptfs * allows recursive mounting, this will need to be extended. */ - if (!lower_file->f_op->mmap) + if (!can_mmap_file(lower_file)) return -ENODEV; return generic_file_mmap(file, vma); } diff --git a/fs/erofs/data.c b/fs/erofs/data.c index f46c47335b9c..3b1ba571c728 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -432,20 +432,20 @@ static const struct vm_operations_struct erofs_dax_vm_ops = { .huge_fault = erofs_dax_huge_fault, }; -static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int erofs_file_mmap_prepare(struct vm_area_desc *desc) { - if (!IS_DAX(file_inode(file))) - return generic_file_readonly_mmap(file, vma); + if (!IS_DAX(file_inode(desc->file))) + return generic_file_readonly_mmap_prepare(desc); - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) + if ((desc->vm_flags & VM_SHARED) && (desc->vm_flags & VM_MAYWRITE)) return -EINVAL; - vma->vm_ops = &erofs_dax_vm_ops; - vm_flags_set(vma, VM_HUGEPAGE); + desc->vm_ops = &erofs_dax_vm_ops; + desc->vm_flags |= VM_HUGEPAGE; return 0; } #else -#define erofs_file_mmap generic_file_readonly_mmap +#define erofs_file_mmap_prepare generic_file_readonly_mmap_prepare #endif static loff_t erofs_file_llseek(struct file *file, loff_t offset, int whence) @@ -475,7 +475,7 @@ static loff_t erofs_file_llseek(struct file *file, loff_t offset, int whence) const struct file_operations erofs_file_fops = { .llseek = erofs_file_llseek, .read_iter = erofs_file_read_iter, - .mmap = erofs_file_mmap, + .mmap_prepare = erofs_file_mmap_prepare, .get_unmapped_area = thp_get_unmapped_area, .splice_read = filemap_splice_read, }; diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 70f53edd0a10..6b82497572b4 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -682,13 +682,15 @@ static const struct vm_operations_struct exfat_file_vm_ops = { .page_mkwrite = exfat_page_mkwrite, }; -static int exfat_file_mmap(struct file *file, struct vm_area_struct *vma) +static int exfat_file_mmap_prepare(struct vm_area_desc *desc) { - if (unlikely(exfat_forced_shutdown(file_inode(file)->i_sb))) + struct file *file = desc->file; |
