// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
*/
#include <linux/sizes.h>
#include <linux/vfio_pci_core.h>
#include <linux/delay.h>
#include <linux/jiffies.h>
#include <linux/pci-p2pdma.h>
#include <linux/pm_runtime.h>
#include <linux/memory-failure.h>
/*
* The device memory usable to the workloads running in the VM is cached
* and showcased as a 64b device BAR (comprising of BAR4 and BAR5 region)
* to the VM and is represented as usemem.
* Moreover, the VM GPU device driver needs a non-cacheable region to
* support the MIG feature. This region is also exposed as a 64b BAR
* (comprising of BAR2 and BAR3 region) and represented as resmem.
*/
#define RESMEM_REGION_INDEX VFIO_PCI_BAR2_REGION_INDEX
#define USEMEM_REGION_INDEX VFIO_PCI_BAR4_REGION_INDEX
/* A hardwired and constant ABI value between the GPU FW and VFIO driver. */
#define MEMBLK_SIZE SZ_512M
#define DVSEC_BITMAP_OFFSET 0xA
#define MIG_SUPPORTED_WITH_CACHED_RESMEM BIT(0)
#define GPU_CAP_DVSEC_REGISTER 3
#define C2C_LINK_BAR0_OFFSET 0x1498
#define HBM_TRAINING_BAR0_OFFSET 0x200BC
#define STATUS_READY 0xFF
#define POLL_QUANTUM_MS 1000
#define POLL_TIMEOUT_MS (30 * 1000)
/*
* The state of the two device memory region - resmem and usemem - is
* saved as struct mem_region.
*/
struct mem_region {
phys_addr_t memphys; /* Base physical address of the region */
size_t memlength; /* Region size */
size_t bar_size; /* Reported region BAR size */
__le64 bar_val; /* Emulated BAR offset registers */
union {
void *memaddr;
void __iomem *ioaddr;
}; /* Base virtual address of the region */
struct pfn_address_space pfn_address_space;
};
struct nvgrace_gpu_pci_core_device {
struct vfio_pci_core_device core_device;
/* Cached and usable memory for the VM. */
struct mem_region usemem;
/* Non cached memory carved out from the end of device memory */
struct mem_region resmem;
/* Lock to control device memory kernel mapping */
struct mutex remap_lock;
bool has_mig_hw_bug;
/* GPU has just been reset */
bool reset_done;
};
static void nvgrace_gpu_init_fake_bar_emu_regs(struct vfio_device *core_vdev)
{
struct nvgrace_gpu_pci_core_device *nvdev =
container_of(core_vdev, struct nvgrace_gpu_pci_core_device,
core_device.vdev);
nvdev->resmem.bar_val = 0;
nvdev->usemem.bar_val = 0;
}
/* Choose the structure corresponding to the fake BAR with a given index. */
static struct mem_region *
nvgrace_gpu_memregion(int index,
struct nvgrace_gpu_pci_core_device *nvdev)
{
if (index == USEMEM_REGION_INDEX)
return &nvdev->usemem;
if (nvdev->resmem.memlength && index == RESMEM_REGION_INDEX)
return &nvdev->resmem;
return NULL;
}
static int pfn_memregion_offset(struct nvgrace_gpu_pci_core_device *nvdev,
unsigned int index,
unsigned long pfn,
pgoff_t *pfn_offset_in_region)
{
struct mem_region *region;
unsigned long start_pfn, num_pages;
region = nvgrace_gpu_memregion