diff options
| author | Ankit Agrawal <ankita@nvidia.com> | 2025-11-02 18:44:34 +0000 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2025-11-16 17:28:30 -0800 |
| commit | ebb9aeb980e5d3d8d9505d187005b02942cc1cd9 (patch) | |
| tree | 822aca09f01e329cb40ea405f0cf3a7aaf3dd486 /drivers | |
| parent | 2ec41967189cd65a8f79c760dd1b50c4f56e8ac6 (diff) | |
vfio/nvgrace-gpu: register device memory for poison handling
The nvgrace-gpu-vfio-pci module [1] maps the device memory to the user VA
(Qemu) using remap_pfn_range() without adding the memory to the kernel.
The device memory pages are not backed by struct page. The previous patch
implements the mechanism to handle ECC/poison on memory page without
struct page. This new mechanism is being used here.
The module registers its memory region and the address_space with the
kernel MM for ECC handling using the register_pfn_address_space()
registration API exposed by the kernel.
Link: https://lore.kernel.org/all/20240220115055.23546-1-ankita@nvidia.com/ [1]
Link: https://lkml.kernel.org/r/20251102184434.2406-4-ankita@nvidia.com
Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
Acked-by: Alex Williamson <alex@shazbot.org>
Cc: Aniket Agashe <aniketa@nvidia.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Kirti Wankhede <kwankhede@nvidia.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Matthew R. Ochs <mochs@nvidia.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Neo Jia <cjia@nvidia.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shuai Xue <xueshuai@linux.alibaba.com>
Cc: Smita Koralahalli Channabasappa <smita.koralahallichannabasappa@amd.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tarun Gupta <targupta@nvidia.com>
Cc: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Cc: Vikram Sethi <vsethi@nvidia.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Zhi Wang <zhiw@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/vfio/pci/nvgrace-gpu/main.c | 45 |
1 files changed, 44 insertions, 1 deletions
diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c index e346392b72f6..3ce56d039cbe 100644 --- a/drivers/vfio/pci/nvgrace-gpu/main.c +++ b/drivers/vfio/pci/nvgrace-gpu/main.c @@ -8,6 +8,10 @@ #include <linux/delay.h> #include <linux/jiffies.h> +#ifdef CONFIG_MEMORY_FAILURE +#include <linux/memory-failure.h> +#endif + /* * The device memory usable to the workloads running in the VM is cached * and showcased as a 64b device BAR (comprising of BAR4 and BAR5 region) @@ -47,6 +51,9 @@ struct mem_region { void *memaddr; void __iomem *ioaddr; }; /* Base virtual address of the region */ +#ifdef CONFIG_MEMORY_FAILURE + struct pfn_address_space pfn_address_space; +#endif }; struct nvgrace_gpu_pci_core_device { @@ -60,6 +67,28 @@ struct nvgrace_gpu_pci_core_device { bool has_mig_hw_bug; }; +#ifdef CONFIG_MEMORY_FAILURE + +static int +nvgrace_gpu_vfio_pci_register_pfn_range(struct mem_region *region, + struct vm_area_struct *vma) +{ + unsigned long nr_pages; + int ret = 0; + + nr_pages = region->memlength >> PAGE_SHIFT; + + region->pfn_address_space.node.start = vma->vm_pgoff; + region->pfn_address_space.node.last = vma->vm_pgoff + nr_pages - 1; + region->pfn_address_space.mapping = vma->vm_file->f_mapping; + + ret = register_pfn_address_space(®ion->pfn_address_space); + + return ret; +} + +#endif + static void nvgrace_gpu_init_fake_bar_emu_regs(struct vfio_device *core_vdev) { struct nvgrace_gpu_pci_core_device *nvdev = @@ -127,6 +156,13 @@ static void nvgrace_gpu_close_device(struct vfio_device *core_vdev) mutex_destroy(&nvdev->remap_lock); +#ifdef CONFIG_MEMORY_FAILURE + if (nvdev->resmem.memlength) + unregister_pfn_address_space(&nvdev->resmem.pfn_address_space); + + unregister_pfn_address_space(&nvdev->usemem.pfn_address_space); +#endif + vfio_pci_core_close_device(core_vdev); } @@ -202,7 +238,14 @@ static int nvgrace_gpu_mmap(struct vfio_device *core_vdev, vma->vm_pgoff = start_pfn; - return 0; +#ifdef CONFIG_MEMORY_FAILURE + if (nvdev->resmem.memlength && index == VFIO_PCI_BAR2_REGION_INDEX) + ret = nvgrace_gpu_vfio_pci_register_pfn_range(&nvdev->resmem, vma); + else if (index == VFIO_PCI_BAR4_REGION_INDEX) + ret = nvgrace_gpu_vfio_pci_register_pfn_range(&nvdev->usemem, vma); +#endif + + return ret; } static long |
