aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.clang-format3
-rw-r--r--Documentation/userspace-api/index.rst1
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst1
-rw-r--r--Documentation/userspace-api/iommufd.rst223
-rw-r--r--MAINTAINERS12
-rw-r--r--drivers/dma/idxd/cdev.c3
-rw-r--r--drivers/dma/idxd/init.c25
-rw-r--r--drivers/gpu/drm/i915/gvt/kvmgt.c21
-rw-r--r--drivers/iommu/Kconfig1
-rw-r--r--drivers/iommu/Makefile4
-rw-r--r--drivers/iommu/amd/iommu.c14
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c104
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c32
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h23
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.c3
-rw-r--r--drivers/iommu/arm/arm-smmu/qcom_iommu.c7
-rw-r--r--drivers/iommu/fsl_pamu.c2
-rw-r--r--drivers/iommu/fsl_pamu_domain.c4
-rw-r--r--drivers/iommu/intel/dmar.c7
-rw-r--r--drivers/iommu/intel/iommu.c56
-rw-r--r--drivers/iommu/intel/iommu.h18
-rw-r--r--drivers/iommu/intel/pasid.c6
-rw-r--r--drivers/iommu/intel/svm.c145
-rw-r--r--drivers/iommu/io-pgfault.c77
-rw-r--r--drivers/iommu/iommu-sva-lib.c71
-rw-r--r--drivers/iommu/iommu-sva.c240
-rw-r--r--drivers/iommu/iommu-sva.h (renamed from drivers/iommu/iommu-sva-lib.h)14
-rw-r--r--drivers/iommu/iommu.c416
-rw-r--r--drivers/iommu/iommufd/Kconfig44
-rw-r--r--drivers/iommu/iommufd/Makefile13
-rw-r--r--drivers/iommu/iommufd/device.c772
-rw-r--r--drivers/iommu/iommufd/double_span.h53
-rw-r--r--drivers/iommu/iommufd/hw_pagetable.c57
-rw-r--r--drivers/iommu/iommufd/io_pagetable.c1216
-rw-r--r--drivers/iommu/iommufd/io_pagetable.h241
-rw-r--r--drivers/iommu/iommufd/ioas.c398
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h307
-rw-r--r--drivers/iommu/iommufd/iommufd_test.h93
-rw-r--r--drivers/iommu/iommufd/main.c460
-rw-r--r--drivers/iommu/iommufd/pages.c1977
-rw-r--r--drivers/iommu/iommufd/selftest.c853
-rw-r--r--drivers/iommu/iommufd/vfio_compat.c472
-rw-r--r--drivers/iommu/ipmmu-vmsa.c2
-rw-r--r--drivers/iommu/mtk_iommu.c4
-rw-r--r--drivers/iommu/omap-iommu.c6
-rw-r--r--drivers/iommu/sprd-iommu.c4
-rw-r--r--drivers/iommu/tegra-gart.c2
-rw-r--r--drivers/iommu/virtio-iommu.c7
-rw-r--r--drivers/misc/uacce/uacce.c2
-rw-r--r--drivers/pci/ats.c3
-rw-r--r--drivers/s390/cio/vfio_ccw_ops.c3
-rw-r--r--drivers/s390/crypto/vfio_ap_ops.c21
-rw-r--r--drivers/vfio/Kconfig36
-rw-r--r--drivers/vfio/Makefile6
-rw-r--r--drivers/vfio/container.c145
-rw-r--r--drivers/vfio/fsl-mc/vfio_fsl_mc.c3
-rw-r--r--drivers/vfio/group.c877
-rw-r--r--drivers/vfio/iommufd.c158
-rw-r--r--drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c6
-rw-r--r--drivers/vfio/pci/mlx5/main.c3
-rw-r--r--drivers/vfio/pci/vfio_pci.c3
-rw-r--r--drivers/vfio/platform/vfio_amba.c3
-rw-r--r--drivers/vfio/platform/vfio_platform.c3
-rw-r--r--drivers/vfio/vfio.h120
-rw-r--r--drivers/vfio/vfio_main.c983
-rw-r--r--include/linux/intel-svm.h13
-rw-r--r--include/linux/interval_tree.h58
-rw-r--r--include/linux/iommu.h145
-rw-r--r--include/linux/iommufd.h98
-rw-r--r--include/linux/sched/user.h2
-rw-r--r--include/linux/vfio.h39
-rw-r--r--include/uapi/linux/iommufd.h347
-rw-r--r--kernel/user.c1
-rw-r--r--lib/Kconfig4
-rw-r--r--lib/interval_tree.c132
-rwxr-xr-xscripts/kernel-doc12
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/iommu/.gitignore3
-rw-r--r--tools/testing/selftests/iommu/Makefile12
-rw-r--r--tools/testing/selftests/iommu/config2
-rw-r--r--tools/testing/selftests/iommu/iommufd.c1654
-rw-r--r--tools/testing/selftests/iommu/iommufd_fail_nth.c580
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h278
83 files changed, 12804 insertions, 1466 deletions
diff --git a/.clang-format b/.clang-format
index bcf60344f9f5..b62836419ea3 100644
--- a/.clang-format
+++ b/.clang-format
@@ -441,8 +441,11 @@ ForEachMacros:
- 'inet_lhash2_for_each_icsk'
- 'inet_lhash2_for_each_icsk_continue'
- 'inet_lhash2_for_each_icsk_rcu'
+ - 'interval_tree_for_each_double_span'
+ - 'interval_tree_for_each_span'
- 'intlist__for_each_entry'
- 'intlist__for_each_entry_safe'
+ - 'iopt_for_each_contig_area'
- 'kcore_copy__for_each_phdr'
- 'key_for_each'
- 'key_for_each_safe'
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index c78da9ce0ec4..f16337bdb852 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -25,6 +25,7 @@ place where this information is gathered.
ebpf/index
ioctl/index
iommu
+ iommufd
media/index
netlink/index
sysfs-platform_profile
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 5f81e2a24a5c..eb045fc495a4 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -105,6 +105,7 @@ Code Seq# Include File Comments
'8' all SNP8023 advanced NIC card
<mailto:mcr@solidum.com>
';' 64-7F linux/vfio.h
+';' 80-FF linux/iommufd.h
'=' 00-3f uapi/linux/ptp_clock.h <mailto:richardcochran@gmail.com>
'@' 00-0F linux/radeonfb.h conflict!
'@' 00-0F drivers/video/aty/aty128fb.c conflict!
diff --git a/Documentation/userspace-api/iommufd.rst b/Documentation/userspace-api/iommufd.rst
new file mode 100644
index 000000000000..79dd9eb51587
--- /dev/null
+++ b/Documentation/userspace-api/iommufd.rst
@@ -0,0 +1,223 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+=======
+IOMMUFD
+=======
+
+:Author: Jason Gunthorpe
+:Author: Kevin Tian
+
+Overview
+========
+
+IOMMUFD is the user API to control the IOMMU subsystem as it relates to managing
+IO page tables from userspace using file descriptors. It intends to be general
+and consumable by any driver that wants to expose DMA to userspace. These
+drivers are eventually expected to deprecate any internal IOMMU logic
+they may already/historically implement (e.g. vfio_iommu_type1.c).
+
+At minimum iommufd provides universal support of managing I/O address spaces and
+I/O page tables for all IOMMUs, with room in the design to add non-generic
+features to cater to specific hardware functionality.
+
+In this context the capital letter (IOMMUFD) refers to the subsystem while the
+small letter (iommufd) refers to the file descriptors created via /dev/iommu for
+use by userspace.
+
+Key Concepts
+============
+
+User Visible Objects
+--------------------
+
+Following IOMMUFD objects are exposed to userspace:
+
+- IOMMUFD_OBJ_IOAS, representing an I/O address space (IOAS), allowing map/unmap
+ of user space memory into ranges of I/O Virtual Address (IOVA).
+
+ The IOAS is a functional replacement for the VFIO container, and like the VFIO
+ container it copies an IOVA map to a list of iommu_domains held within it.
+
+- IOMMUFD_OBJ_DEVICE, representing a device that is bound to iommufd by an
+ external driver.
+
+- IOMMUFD_OBJ_HW_PAGETABLE, representing an actual hardware I/O page table
+ (i.e. a single struct iommu_domain) managed by the iommu driver.
+
+ The IOAS has a list of HW_PAGETABLES that share the same IOVA mapping and
+ it will synchronize its mapping with each member HW_PAGETABLE.
+
+All user-visible objects are destroyed via the IOMMU_DESTROY uAPI.
+
+The diagram below shows relationship between user-visible objects and kernel
+datastructures (external to iommufd), with numbers referred to operations
+creating the objects and links::
+
+ _________________________________________________________
+ | iommufd |
+ | [1] |
+ | _________________ |
+ | | | |
+ | | | |
+ | | | |
+ | | | |
+ | | | |
+ | | | |
+ | | | [3] [2] |
+ | | | ____________ __________ |
+ | | IOAS |<--| |<------| | |
+ | | | |HW_PAGETABLE| | DEVICE | |
+ | | | |____________| |__________| |
+ | | | | | |
+ | | | | | |
+ | | | | | |
+ | | | | | |
+ | | | | | |
+ | |_________________| | | |
+ | | | | |
+ |_________|___________________|___________________|_______|
+ | | |
+ | _____v______ _______v_____
+ | PFN storage | | | |
+ |------------>|iommu_domain| |struct device|
+ |____________| |_____________|
+
+1. IOMMUFD_OBJ_IOAS is created via the IOMMU_IOAS_ALLOC uAPI. An iommufd can
+ hold multiple IOAS objects. IOAS is the most generic object and does not
+ expose interfaces that are specific to single IOMMU drivers. All operations
+ on the IOAS must operate equally on each of the iommu_domains inside of it.
+
+2. IOMMUFD_OBJ_DEVICE is created when an external driver calls the IOMMUFD kAPI
+ to bind a device to an iommufd. The driver is expected to implement a set of
+ ioctls to allow userspace to initiate the binding operation. Successful
+ completion of this operation establishes the desired DMA ownership over the
+ device. The driver must also set the driver_managed_dma flag and must not
+ touch the device until this operation succeeds.
+
+3. IOMMUFD_OBJ_HW_PAGETABLE is created when an external driver calls the IOMMUFD
+ kAPI to attach a bound device to an IOAS. Similarly the external driver uAPI
+ allows userspace to initiate the attaching operation. If a compatible
+ pagetable already exists then it is reused for the attachment. Otherwise a
+ new pagetable object and iommu_domain is created. Successful completion of
+ this operation sets up the linkages among IOAS, device and iommu_domain. Once
+ this completes the device could do DMA.
+
+ Every iommu_domain inside the IOAS is also represented to userspace as a
+ HW_PAGETABLE object.
+
+ .. note::
+
+ Future IOMMUFD updates will provide an API to create and manipulate the
+ HW_PAGETABLE directly.
+
+A device can only bind to an iommufd due to DMA ownership claim and attach to at
+most one IOAS object (no support of PASID yet).
+
+Kernel Datastructure
+--------------------
+
+User visible objects are backed by following datastructures:
+
+- iommufd_ioas for IOMMUFD_OBJ_IOAS.
+- iommufd_device for IOMMUFD_OBJ_DEVICE.
+- iommufd_hw_pagetable for IOMMUFD_OBJ_HW_PAGETABLE.
+
+Several terminologies when looking at these datastructures:
+
+- Automatic domain - refers to an iommu domain created automatically when
+ attaching a device to an IOAS object. This is compatible to the semantics of
+ VFIO type1.
+
+- Manual domain - refers to an iommu domain designated by the user as the
+ target pagetable to be attached to by a device. Though currently there are
+ no uAPIs to directly create such domain, the datastructure and algorithms
+ are ready for handling that use case.
+
+- In-kernel user - refers to something like a VFIO mdev that is using the
+ IOMMUFD access interface to access the IOAS. This starts by creating an
+ iommufd_access object that is similar to the domain binding a physical device
+ would do. The access object will then allow converting IOVA ranges into struct
+ page * lists, or doing direct read/write to an IOVA.
+
+iommufd_ioas serves as the metadata datastructure to manage how IOVA ranges are
+mapped to memory pages, composed of:
+
+- struct io_pagetable holding the IOVA map
+- struct iopt_area's representing populated portions of IOVA
+- struct iopt_pages representing the storage of PFNs
+- struct iommu_domain representing the IO page table in the IOMMU
+- struct iopt_pages_access representing in-kernel users of PFNs
+- struct xarray pinned_pfns holding a list of pages pinned by in-kernel users
+
+Each iopt_pages represents a logical linear array of full PFNs. The PFNs are
+ultimately derived from userspace VAs via an mm_struct. Once they have been
+pinned the PFNs are stored in IOPTEs of an iommu_domain or inside the pinned_pfns
+xarray if they have been pinned through an iommufd_access.
+
+PFN have to be copied between all combinations of storage locations, depending
+on what domains are present and what kinds of in-kernel "software access" users
+exist. The mechanism ensures that a page is pinned only once.
+
+An io_pagetable is composed of iopt_areas pointing at iopt_pages, along with a
+list of iommu_domains that mirror the IOVA to PFN map.
+
+Multiple io_pagetable-s, through their iopt_area-s, can share a single
+iopt_pages which avoids multi-pinning and double accounting of page
+consumption.
+
+iommufd_ioas is sharable between subsystems, e.g. VFIO and VDPA, as long as
+devices managed by different subsystems are bound to a same iommufd.
+
+IOMMUFD User API
+================
+
+.. kernel-doc:: include/uapi/linux/iommufd.h
+
+IOMMUFD Kernel API
+==================
+
+The IOMMUFD kAPI is device-centric with group-related tricks managed behind the
+scene. This allows the external drivers calling such kAPI to implement a simple
+device-centric uAPI for connecting its device to an iommufd, instead of
+explicitly imposing the group semantics in its uAPI as VFIO does.
+
+.. kernel-doc:: drivers/iommu/iommufd/device.c
+ :export:
+
+.. kernel-doc:: drivers/iommu/iommufd/main.c
+ :export:
+
+VFIO and IOMMUFD
+----------------
+
+Connecting a VFIO device to iommufd can be done in two ways.
+
+First is a VFIO compatible way by directly implementing the /dev/vfio/vfio
+container IOCTLs by mapping them into io_pagetable operations. Doing so allows
+the use of iommufd in legacy VFIO applications by symlinking /dev/vfio/vfio to
+/dev/iommufd or extending VFIO to SET_CONTAINER using an iommufd instead of a
+container fd.
+
+The second approach directly extends VFIO to support a new set of device-centric
+user API based on aforementioned IOMMUFD kernel API. It requires userspace
+change but better matches the IOMMUFD API semantics and easier to support new
+iommufd features when comparing it to the first approach.
+
+Currently both approaches are still work-in-progress.
+
+There are still a few gaps to be resolved to catch up with VFIO type1, as
+documented in iommufd_vfio_check_extension().
+
+Future TODOs
+============
+
+Currently IOMMUFD supports only kernel-managed I/O page table, similar to VFIO
+type1. New features on the radar include:
+
+ - Binding iommu_domain's to PASID/SSID
+ - Userspace page tables, for ARM, x86 and S390
+ - Kernel bypass'd invalidation of user page tables
+ - Re-use of the KVM page table in the IOMMU
+ - Dirty page tracking in the IOMMU
+ - Runtime Increase/Decrease of IOPTE size
+ - PRI support with faults resolved in userspace
diff --git a/MAINTAINERS b/MAINTAINERS
index 2ada2bddb809..abb08d7cc92d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10793,6 +10793,18 @@ F: drivers/iommu/dma-iommu.h
F: drivers/iommu/iova.c
F: include/linux/iova.h
+IOMMUFD
+M: Jason Gunthorpe <jgg@nvidia.com>
+M: Kevin Tian <kevin.tian@intel.com>
+L: iommu@lists.linux.dev
+S: Maintained
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd.git
+F: Documentation/userspace-api/iommufd.rst
+F: drivers/iommu/iommufd/
+F: include/linux/iommufd.h
+F: include/uapi/linux/iommufd.h
+F: tools/testing/selftests/iommu/
+
IOMMU SUBSYSTEM
M: Joerg Roedel <joro@8bytes.org>
M: Will Deacon <will@kernel.org>
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index a9b96b18772f..e13e92609943 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -6,7 +6,6 @@
#include <linux/pci.h>
#include <linux/device.h>
#include <linux/sched/task.h>
-#include <linux/intel-svm.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/cdev.h>
#include <linux/fs.h>
@@ -100,7 +99,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
filp->private_data = ctx;
if (device_user_pasid_enabled(idxd)) {
- sva = iommu_sva_bind_device(dev, current->mm, NULL);
+ sva = iommu_sva_bind_device(dev, current->mm);
if (IS_ERR(sva)) {
rc = PTR_ERR(sva);
dev_err(dev, "pasid allocation failed: %d\n", rc);
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 09cbf0c179ba..529ea09c9094 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -14,7 +14,6 @@
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/device.h>
#include <linux/idr.h>
-#include <linux/intel-svm.h>
#include <linux/iommu.h>
#include <uapi/linux/idxd.h>
#include <linux/dmaengine.h>
@@ -502,29 +501,7 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d
static int idxd_enable_system_pasid(struct idxd_device *idxd)
{
- int flags;
- unsigned int pasid;
- struct iommu_sva *sva;
-
- flags = SVM_FLAG_SUPERVISOR_MODE;
-
- sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags);
- if (IS_ERR(sva)) {
- dev_warn(&idxd->pdev->dev,
- "iommu sva bind failed: %ld\n", PTR_ERR(sva));
- return PTR_ERR(sva);
- }
-
- pasid = iommu_sva_get_pasid(sva);
- if (pasid == IOMMU_PASID_INVALID) {
- iommu_sva_unbind_device(sva);
- return -ENODEV;
- }
-
- idxd->sva = sva;
- idxd->pasid = pasid;
- dev_dbg(&idxd->pdev->dev, "system pasid: %u\n", pasid);
- return 0;
+ return -EOPNOTSUPP;
}
static void idxd_disable_system_pasid(struct idxd_device *idxd)
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 077892a9aa8f..539447eda665 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -669,9 +669,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
vgpu->attached = true;
- kvmgt_protect_table_init(vgpu);
- gvt_cache_init(vgpu);
-
vgpu->track_node.track_write = kvmgt_page_track_write;
vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
kvm_get_kvm(vgpu->vfio_device.kvm);
@@ -715,6 +712,11 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
kvmgt_protect_table_destroy(vgpu);
gvt_cache_destroy(vgpu);
+ WARN_ON(vgpu->nr_cache_entries);
+
+ vgpu->gfn_cache = RB_ROOT;
+ vgpu->dma_addr_cache = RB_ROOT;
+
intel_vgpu_release_msi_eventfd_ctx(vgpu);