// SPDX-License-Identifier: MIT
/*
* Copyright © 2021 Intel Corporation
*/
#include "xe_ggtt.h"
#include <kunit/visibility.h>
#include <linux/fault-inject.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/sizes.h>
#include <drm/drm_drv.h>
#include <drm/drm_managed.h>
#include <drm/intel/i915_drm.h>
#include <generated/xe_wa_oob.h>
#include "regs/xe_gt_regs.h"
#include "regs/xe_gtt_defs.h"
#include "regs/xe_regs.h"
#include "xe_assert.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
#include "xe_map.h"
#include "xe_mmio.h"
#include "xe_pm.h"
#include "xe_res_cursor.h"
#include "xe_sriov.h"
#include "xe_tile_printk.h"
#include "xe_tile_sriov_vf.h"
#include "xe_tlb_inval.h"
#include "xe_wa.h"
#include "xe_wopcm.h"
/**
* DOC: Global Graphics Translation Table (GGTT)
*
* Xe GGTT implements the support for a Global Virtual Address space that is used
* for resources that are accessible to privileged (i.e. kernel-mode) processes,
* and not tied to a specific user-level process. For example, the Graphics
* micro-Controller (GuC) and Display Engine (if present) utilize this Global
* address space.
*
* The Global GTT (GGTT) translates from the Global virtual address to a physical
* address that can be accessed by HW. The GGTT is a flat, single-level table.
*
* Xe implements a simplified version of the GGTT specifically managing only a
* certain range of it that goes from the Write Once Protected Content Memory (WOPCM)
* Layout to a predefined GUC_GGTT_TOP. This approach avoids complications related to
* the GuC (Graphics Microcontroller) hardware limitations. The GuC address space
* is limited on both ends of the GGTT, because the GuC shim HW redirects
* accesses to those addresses to other HW areas instead of going through the
* GGTT. On the bottom end, the GuC can't access offsets below the WOPCM size,
* while on the top side the limit is fixed at GUC_GGTT_TOP. To keep things
* simple, instead of checking each object to see if they are accessed by GuC or
* not, we just exclude those areas from the allocator. Additionally, to simplify
* the driver load, we use the maximum WOPCM size in this logic instead of the
* programmed one, so we don't need to wait until the actual size to be
* programmed is determined (which requires FW fetch) before initializing the
* GGTT. These simplifications might waste space in the GGTT (about 20-25 MBs
* depending on the platform) but we can live with this. Another benefit of this
* is the GuC bootrom can't access anything below the WOPCM max size so anything
* the bootrom needs to access (e.g. a RSA key) needs to be placed in the GGTT
* above the WOPCM max size. Starting the GGTT allocations above the WOPCM max
* give us the correct placement for free.
*/
static u64 xelp_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index)
{
u64 pte = XE_PAGE_PRESENT;
if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
pte |= XE_GGTT_PTE_DM;
return pte;
}
static u64 xelpg_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index)
{
struct xe_device *xe = xe_bo_device(bo);
u64 pte;
pte = xelp_ggtt_pte_flags(bo, pat_index);
xe_assert(xe, pat_index <= 3);
if (pat_index & BIT(0))
pte |= XELPG_GGTT_PTE_PAT0;
if (pat_index & BIT(1))
pte |= XELPG_GGTT_PTE_PAT1;
return pte;
}
static unsigned int probe_gsm_size(struct pci_dev *pdev)
{
u16 gmch_ctl, ggms;
pci_read_config_word(pdev, SNB_GMCH_CTRL, &gmch_ctl);
ggms = (gmch_ctl >> BDW_GMCH_GGMS_SHIFT) & BDW_GMCH_GGMS_MASK;
return ggms ? SZ_1M << ggms : 0;
}
static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
{
struct xe_tile *tile = ggtt->tile;
struct xe_gt *affected_gt;
u32 max_gtt_writes;
if (tile->primary_gt && XE_GT_WA(tile->primary_gt, 22019338487)) {
affected_gt = tile->primary_gt;
max_gtt_writes = 1100;
/* Only expected to apply to primary GT on dgpu platforms */
xe_tile_assert(tile, IS_DGFX(tile_to_xe(tile)));
} else {
affected_gt = tile->media_gt;
max_gtt_writes = 63;
/* Only expected to apply to media GT on igpu platforms */
xe_tile_assert(tile, !IS_DGFX(tile_to_xe(tile)));
}
/*
* Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit
* to wait for completion of prior GTT writes before letting this through.
* This needs to be done for all GGTT writes originating from the CPU.
*/
lockdep_assert_held(&ggtt->lock);
if ((++ggtt->access_count % max_gtt_writes) == 0) {
xe_mmio_write32(&affected_gt->mmio, GMD_ID, 0x0);
ggtt->access_count = 0;
}
}
static void xe_ggtt_set_pte(struct xe_ggtt