From c4bb667eaf520f21b3a3db0489682becc9c49bcc Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 2 Aug 2019 18:15:19 +0100 Subject: fuse: reserve values for mapping protocol SETUPMAPPING is a command for use with 'virtiofsd', a fuse-over-virtio implementation; it may find use in other fuse impelementations as well in which the kernel does not have access to the address space of the daemon directly. A SETUPMAPPING operation causes a section of a file to be mapped into a memory window visible to the kernel. The offsets in the file and the window are defined by the kernel performing the operation. The daemon may reject the request, for reasons including permissions and limited resources. When a request perfectly overlaps a previous mapping, the previous mapping is replaced. When a mapping partially overlaps a previous mapping, the previous mapping is split into one or two smaller mappings. REMOVEMAPPING is the complement to SETUPMAPPING; it unmaps a range of mapped files from the window visible to the kernel. The map_alignment field communicates the alignment constraint for FUSE_SETUPMAPPING/FUSE_REMOVEMAPPING and allows the daemon to constrain the addresses and file offsets chosen by the kernel. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Vivek Goyal Signed-off-by: Stefan Hajnoczi Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index df2e12fb3381..802b0377a49e 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -133,6 +133,8 @@ * * 7.31 * - add FUSE_WRITE_KILL_PRIV flag + * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING + * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag */ #ifndef _LINUX_FUSE_H @@ -274,6 +276,7 @@ struct fuse_file_lock { * FUSE_CACHE_SYMLINKS: cache READLINK responses * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request + * FUSE_MAP_ALIGNMENT: map_alignment field is valid */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -301,6 +304,7 @@ struct fuse_file_lock { #define FUSE_CACHE_SYMLINKS (1 << 23) #define FUSE_NO_OPENDIR_SUPPORT (1 << 24) #define FUSE_EXPLICIT_INVAL_DATA (1 << 25) +#define FUSE_MAP_ALIGNMENT (1 << 26) /** * CUSE INIT request/reply flags @@ -422,6 +426,8 @@ enum fuse_opcode { FUSE_RENAME2 = 45, FUSE_LSEEK = 46, FUSE_COPY_FILE_RANGE = 47, + FUSE_SETUPMAPPING = 48, + FUSE_REMOVEMAPPING = 49, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -656,7 +662,7 @@ struct fuse_init_out { uint32_t max_write; uint32_t time_gran; uint16_t max_pages; - uint16_t padding; + uint16_t map_alignment; uint32_t unused[8]; }; -- cgit v1.2.3 From a62a8ef9d97da23762a588592c8b8eb50a8deb6a Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 12 Jun 2018 09:41:17 +0100 Subject: virtio-fs: add virtiofs filesystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a basic file system module for virtio-fs. This does not yet contain shared data support between host and guest or metadata coherency speedups. However it is already significantly faster than virtio-9p. Design Overview =============== With the goal of designing something with better performance and local file system semantics, a bunch of ideas were proposed. - Use fuse protocol (instead of 9p) for communication between guest and host. Guest kernel will be fuse client and a fuse server will run on host to serve the requests. - For data access inside guest, mmap portion of file in QEMU address space and guest accesses this memory using dax. That way guest page cache is bypassed and there is only one copy of data (on host). This will also enable mmap(MAP_SHARED) between guests. - For metadata coherency, there is a shared memory region which contains version number associated with metadata and any guest changing metadata updates version number and other guests refresh metadata on next access. This is yet to be implemented. How virtio-fs differs from existing approaches ============================================== The unique idea behind virtio-fs is to take advantage of the co-location of the virtual machine and hypervisor to avoid communication (vmexits). DAX allows file contents to be accessed without communication with the hypervisor. The shared memory region for metadata avoids communication in the common case where metadata is unchanged. By replacing expensive communication with cheaper shared memory accesses, we expect to achieve better performance than approaches based on network file system protocols. In addition, this also makes it easier to achieve local file system semantics (coherency). These techniques are not applicable to network file system protocols since the communications channel is bypassed by taking advantage of shared memory on a local machine. This is why we decided to build virtio-fs rather than focus on 9P or NFS. Caching Modes ============= Like virtio-9p, different caching modes are supported which determine the coherency level as well. The “cache=FOO” and “writeback” options control the level of coherence between the guest and host filesystems. - cache=none metadata, data and pathname lookup are not cached in guest. They are always fetched from host and any changes are immediately pushed to host. - cache=always metadata, data and pathname lookup are cached in guest and never expire. - cache=auto metadata and pathname lookup cache expires after a configured amount of time (default is 1 second). Data is cached while the file is open (close to open consistency). - writeback/no_writeback These options control the writeback strategy. If writeback is disabled, then normal writes will immediately be synchronized with the host fs. If writeback is enabled, then writes may be cached in the guest until the file is closed or an fsync(2) performed. This option has no effect on mmap-ed writes or writes going through the DAX mechanism. Signed-off-by: Stefan Hajnoczi Signed-off-by: Vivek Goyal Acked-by: Michael S. Tsirkin Signed-off-by: Miklos Szeredi --- include/uapi/linux/virtio_fs.h | 19 +++++++++++++++++++ include/uapi/linux/virtio_ids.h | 1 + 2 files changed, 20 insertions(+) create mode 100644 include/uapi/linux/virtio_fs.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_fs.h b/include/uapi/linux/virtio_fs.h new file mode 100644 index 000000000000..b02eb2ac3d99 --- /dev/null +++ b/include/uapi/linux/virtio_fs.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ + +#ifndef _UAPI_LINUX_VIRTIO_FS_H +#define _UAPI_LINUX_VIRTIO_FS_H + +#include +#include +#include +#include + +struct virtio_fs_config { + /* Filesystem name (UTF-8, not NUL-terminated, padded with NULs) */ + __u8 tag[36]; + + /* Number of request queues */ + __u32 num_request_queues; +} __attribute__((packed)); + +#endif /* _UAPI_LINUX_VIRTIO_FS_H */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 348fd0176f75..585e07b27333 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -44,6 +44,7 @@ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ +#define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ #endif /* _LINUX_VIRTIO_IDS_H */ -- cgit v1.2.3