diff options
| author | Jack Xiao <Jack.Xiao@amd.com> | 2025-05-14 17:01:21 +0800 |
|---|---|---|
| committer | Alex Deucher <alexander.deucher@amd.com> | 2025-12-08 13:56:39 -0500 |
| commit | e220edf2d6fd6dbf08fa93790809500a80217948 (patch) | |
| tree | 2e7291e91ea728052a57538eec72a49840bf173f /drivers/gpu | |
| parent | 73aa1550dfeda9fff742dbba724c69d46e0908bf (diff) | |
drm/amdgpu/mes_v12_1: initial support for mes_v12_1
Duplicated and rename mes ip version name to v12_1_0.
Fix to access correct ring pipe by xcc_id.
Fix to access correct instance registers by xcc_id.
Fix to access correct index registers by grbm/xcc_id.
v2: rebase (Alex)
v3: fix sw_fini (Alex)
Signed-off-by: Jack Xiao <Jack.Xiao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Mukul Joshi <mukul.joshi@amd.com>
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/Makefile | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mes_v12_1.c | 1762 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mes_v12_1.h | 29 |
3 files changed, 1792 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 139ceafa164a..367f2da5f510 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -179,6 +179,7 @@ amdgpu-y += \ amdgpu_mes.o \ mes_v11_0.o \ mes_v12_0.o \ + mes_v12_1.o # add GFX userqueue support amdgpu-y += mes_userqueue.o diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c new file mode 100644 index 000000000000..ee5a2317e414 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c @@ -0,0 +1,1762 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include "amdgpu.h" +#include "soc15_common.h" +#include "soc_v1_0.h" +#include "gc/gc_12_1_0_offset.h" +#include "gc/gc_12_1_0_sh_mask.h" +#include "gc/gc_11_0_0_default.h" +#include "v12_structs.h" +#include "mes_v12_api_def.h" + +MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_1_0_uni_mes.bin"); + +static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block); +static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block); +static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id); +static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id); + +#define MES_EOP_SIZE 2048 + +#define regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT 0x100000 + +static void mes_v12_1_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + BUG(); + } +} + +static u64 mes_v12_1_ring_get_rptr(struct amdgpu_ring *ring) +{ + return *ring->rptr_cpu_addr; +} + +static u64 mes_v12_1_ring_get_wptr(struct amdgpu_ring *ring) +{ + u64 wptr; + + if (ring->use_doorbell) + wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); + else + BUG(); + return wptr; +} + +static const struct amdgpu_ring_funcs mes_v12_1_ring_funcs = { + .type = AMDGPU_RING_TYPE_MES, + .align_mask = 1, + .nop = 0, + .support_64bit_ptrs = true, + .get_rptr = mes_v12_1_ring_get_rptr, + .get_wptr = mes_v12_1_ring_get_wptr, + .set_wptr = mes_v12_1_ring_set_wptr, + .insert_nop = amdgpu_ring_insert_nop, +}; + +static const char *mes_v12_1_opcodes[] = { + "SET_HW_RSRC", + "SET_SCHEDULING_CONFIG", + "ADD_QUEUE", + "REMOVE_QUEUE", + "PERFORM_YIELD", + "SET_GANG_PRIORITY_LEVEL", + "SUSPEND", + "RESUME", + "RESET", + "SET_LOG_BUFFER", + "CHANGE_GANG_PRORITY", + "QUERY_SCHEDULER_STATUS", + "unused", + "SET_DEBUG_VMID", + "MISC", + "UPDATE_ROOT_PAGE_TABLE", + "AMD_LOG", + "SET_SE_MODE", + "SET_GANG_SUBMIT", + "SET_HW_RSRC_1", +}; + +static const char *mes_v12_1_misc_opcodes[] = { + "WRITE_REG", + "INV_GART", + "QUERY_STATUS", + "READ_REG", + "WAIT_REG_MEM", + "SET_SHADER_DEBUGGER", + "NOTIFY_WORK_ON_UNMAPPED_QUEUE", + "NOTIFY_TO_UNMAP_PROCESSES", +}; + +static const char *mes_v12_1_get_op_string(union MESAPI__MISC *x_pkt) +{ + const char *op_str = NULL; + + if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_1_opcodes)) + op_str = mes_v12_1_opcodes[x_pkt->header.opcode]; + + return op_str; +} + +static const char *mes_v12_1_get_misc_op_string(union MESAPI__MISC *x_pkt) +{ + const char *op_str = NULL; + + if ((x_pkt->header.opcode == MES_SCH_API_MISC) && + (x_pkt->opcode < ARRAY_SIZE(mes_v12_1_misc_opcodes))) + op_str = mes_v12_1_misc_opcodes[x_pkt->opcode]; + + return op_str; +} + +static int mes_v12_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, + int xcc_id, int pipe, void *pkt, + int size, int api_status_off) +{ + union MESAPI__QUERY_MES_STATUS mes_status_pkt; + signed long timeout = 2100000; /* 2100 ms */ + struct amdgpu_device *adev = mes->adev; + struct amdgpu_ring *ring = &mes->ring[MES_PIPE_INST(xcc_id, pipe)]; + spinlock_t *ring_lock = &mes->ring_lock[MES_PIPE_INST(xcc_id, pipe)]; + struct MES_API_STATUS *api_status; + union MESAPI__MISC *x_pkt = pkt; + const char *op_str, *misc_op_str; + unsigned long flags; + u64 status_gpu_addr; + u32 seq, status_offset; + u64 *status_ptr; + signed long r; + int ret; + + if (x_pkt->header.opcode >= MES_SCH_API_MAX) + return -EINVAL; + + if (amdgpu_emu_mode) { + timeout *= 1000; + } else if (amdgpu_sriov_vf(adev)) { + /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ + timeout = 15 * 600 * 1000; + } + + ret = amdgpu_device_wb_get(adev, &status_offset); + if (ret) + return ret; + + status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4); + status_ptr = (u64 *)&adev->wb.wb[status_offset]; + *status_ptr = 0; + + spin_lock_irqsave(ring_lock, flags); + r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); + if (r) + goto error_unlock_free; + + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); + api_status->api_completion_fence_addr = status_gpu_addr; + api_status->api_completion_fence_value = 1; + + amdgpu_ring_write_multiple(ring, pkt, size / 4); + + memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); + mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; + mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + mes_status_pkt.api_status.api_completion_fence_addr = + ring->fence_drv.gpu_addr; + mes_status_pkt.api_status.api_completion_fence_value = seq; + + amdgpu_ring_write_multiple(ring, &mes_status_pkt, + sizeof(mes_status_pkt) / 4); + + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(ring_lock, flags); + + op_str = mes_v12_1_get_op_string(x_pkt); + misc_op_str = mes_v12_1_get_misc_op_string(x_pkt); + + if (misc_op_str) + dev_dbg(adev->dev, "MES(%d, %d) msg=%s (%s) was emitted\n", + xcc_id, pipe, op_str, misc_op_str); + else if (op_str) + dev_dbg(adev->dev, "MES(%d, %d) msg=%s was emitted\n", + xcc_id, pipe, op_str); + else + dev_dbg(adev->dev, "MES(%d, %d) msg=%d was emitted\n", + xcc_id, pipe, x_pkt->header.opcode); + + r = amdgpu_fence_wait_polling(ring, seq, timeout); + if (r < 1 || !*status_ptr) { + if (misc_op_str) + dev_err(adev->dev, + "MES(%d, %d) failed to respond to msg=%s (%s)\n", + xcc_id, pipe, op_str, misc_op_str); + else if (op_str) + dev_err(adev->dev, + "MES(%d, %d) failed to respond to msg=%s\n", + xcc_id, pipe, op_str); + else + dev_err(adev->dev, + "MES(%d, %d) failed to respond to msg=%d\n", + xcc_id, pipe, x_pkt->header.opcode); + + while (halt_if_hws_hang) + schedule(); + + r = -ETIMEDOUT; + goto error_wb_free; + } + + amdgpu_device_wb_free(adev, status_offset); + return 0; + +error_undo: + dev_err(adev->dev, "MES(%d, %d) ring buffer is full.\n", xcc_id, pipe); + amdgpu_ring_undo(ring); + +error_unlock_free: + spin_unlock_irqrestore(ring_lock, flags); + +error_wb_free: + amdgpu_device_wb_free(adev, status_offset); + return r; +} + +static int convert_to_mes_queue_type(int queue_type) +{ + if (queue_type == AMDGPU_RING_TYPE_GFX) + return MES_QUEUE_TYPE_GFX; + else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) + return MES_QUEUE_TYPE_COMPUTE; + else if (queue_type == AMDGPU_RING_TYPE_SDMA) + return MES_QUEUE_TYPE_SDMA; + else if (queue_type == AMDGPU_RING_TYPE_MES) + return MES_QUEUE_TYPE_SCHQ; + else + BUG(); + return -1; +} + +static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes, + struct mes_add_queue_input *input) +{ + struct amdgpu_device *adev = mes->adev; + union MESAPI__ADD_QUEUE mes_add_queue_pkt; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + uint32_t vm_cntx_cntl = hub->vm_cntx_cntl; + + memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); + + mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; + mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_add_queue_pkt.process_id = input->process_id; + mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr; + mes_add_queue_pkt.process_va_start = input->process_va_start; + mes_add_queue_pkt.process_va_end = input->process_va_end; + mes_add_queue_pkt.process_quantum = input->process_quantum; + mes_add_queue_pkt.process_context_addr = input->process_context_addr; + mes_add_queue_pkt.gang_quantum = input->gang_quantum; + mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; + mes_add_queue_pkt.inprocess_gang_priority = + input->inprocess_gang_priority; + mes_add_queue_pkt.gang_global_priority_level = + input->gang_global_priority_level; + mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_add_queue_pkt.mqd_addr = input->mqd_addr; + + mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr; + + mes_add_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + mes_add_queue_pkt.paging = input->paging; + mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl; + mes_add_queue_pkt.gws_base = input->gws_base; + mes_add_queue_pkt.gws_size = input->gws_size; + mes_add_queue_pkt.trap_handler_addr = input->tba_addr; + mes_add_queue_pkt.tma_addr = input->tma_addr; + mes_add_queue_pkt.trap_en = input->trap_en; + mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear; + mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; + + /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ + mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; + mes_add_queue_pkt.gds_size = input->queue_size; + + /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ + mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; + mes_add_queue_pkt.gds_size = input->queue_size; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, + input->xcc_id, AMDGPU_MES_SCHED_PIPE, + &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), + offsetof(union MESAPI__ADD_QUEUE, api_status)); +} + +static int mes_v12_1_remove_hw_queue(struct amdgpu_mes *mes, + struct mes_remove_queue_input *input) +{ + union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + + memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); + + mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; + mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, + input->xcc_id, AMDGPU_MES_SCHED_PIPE, + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + +static int mes_v12_1_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + int pipe; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + /* mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; */ + /*mes_reset_queue_pkt.reset_queue_only = 1;*/ + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, + input->xcc_id, pipe, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + +static int mes_v12_1_map_legacy_queue(struct amdgpu_mes *mes, + struct mes_map_legacy_queue_input *input) +{ + union MESAPI__ADD_QUEUE mes_add_queue_pkt; + int pipe; + + memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); + + mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; + mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_add_queue_pkt.pipe_id = input->pipe_id; + mes_add_queue_pkt.queue_id = input->queue_id; + mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_add_queue_pkt.mqd_addr = input->mqd_addr; + mes_add_queue_pkt.wptr_addr = input->wptr_addr; + mes_add_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + mes_add_queue_pkt.map_legacy_kq = 1; + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, + input->xcc_id, pipe, + &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), + offsetof(union MESAPI__ADD_QUEUE, api_status)); +} + +static int mes_v12_1_unmap_legacy_queue(struct amdgpu_mes *mes, + struct mes_unmap_legacy_queue_input *input) +{ + union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + int pipe; + + memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); + + mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; + mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_remove_queue_pkt.gang_context_addr = 0; + + mes_remove_queue_pkt.pipe_id = input->pipe_id; + mes_remove_queue_pkt.queue_id = input->queue_id; + + if (input->action == PREEMPT_QUEUES_NO_UNMAP) { + mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1; + mes_remove_queue_pkt.tf_addr = input->trail_fence_addr; + mes_remove_queue_pkt.tf_data = + lower_32_bits(input->trail_fence_data); + } else { + mes_remove_queue_pkt.unmap_legacy_queue = 1; + mes_remove_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + } + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, + input->xcc_id, pipe, + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + +static int mes_v12_1_suspend_gang(struct amdgpu_mes *mes, + struct mes_suspend_gang_input *input) +{ + return 0; +} + +static int mes_v12_1_resume_gang(struct amdgpu_mes *mes, + struct mes_resume_gang_input *input) +{ + return 0; +} + +static int mes_v12_1_query_sched_status(struct amdgpu_mes *mes, + int pipe, int xcc_id) +{ + union MESAPI__QUERY_MES_STATUS mes_status_pkt; + + memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); + + mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; + mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, + &mes_status_pkt, sizeof(mes_status_pkt), + offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); +} + +static int mes_v12_1_misc_op(struct amdgpu_mes *mes, + struct mes_misc_op_input *input) +{ + union MESAPI__MISC misc_pkt; + int pipe; + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + memset(&misc_pkt, 0, sizeof(misc_pkt)); + + misc_pkt.header.type = MES_API_TYPE_SCHEDULER; + misc_pkt.header.opcode = MES_SCH_API_MISC; + misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + switch (input->op) { + case MES_MISC_OP_READ_REG: + misc_pkt.opcode = MESAPI_MISC__READ_REG; + misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset; + misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr; + break; + case MES_MISC_OP_WRITE_REG: + misc_pkt.opcode = MESAPI_MISC__WRITE_REG; + misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset; + misc_pkt.write_reg.reg_value = input->write_reg.reg_value; + break; + case MES_MISC_OP_WRM_REG_WAIT: + misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; + misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM; + misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; + misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; + misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; + misc_pkt.wait_reg_mem.reg_offset2 = 0; + break; + case MES_MISC_OP_WRM_REG_WR_WAIT: + misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; + misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG; + misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; + misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; + misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; + misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1; + break; + case MES_MISC_OP_SET_SHADER_DEBUGGER: + pipe = AMDGPU_MES_SCHED_PIPE; + misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER; + misc_pkt.set_shader_debugger.process_context_addr = + input->set_shader_debugger.process_context_addr; + misc_pkt.set_shader_debugger.flags.u32all = + input->set_shader_debugger.flags.u32all; + misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl = + input->set_shader_debugger.spi_gdbg_per_vmid_cntl; + memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl, + input->set_shader_debugger.tcp_watch_cntl, + sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); + misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; + break; + case MES_MISC_OP_CHANGE_CONFIG: + misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; + misc_pkt.change_config.opcode = + MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS; + misc_pkt.change_config.option.bits.limit_single_process = + input->change_config.option.limit_single_process; + break; + default: + DRM_ERROR("unsupported misc op (%d) \n", input->op); + return -EINVAL; + } + + return mes_v12_1_submit_pkt_and_poll_completion(mes, + input->xcc_id, pipe, + &misc_pkt, sizeof(misc_pkt), + offsetof(union MESAPI__MISC, api_status)); +} + +static int mes_v12_1_set_hw_resources_1(struct amdgpu_mes *mes, + int pipe, int xcc_id) +{ + union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; + + memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt)); + + mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; + mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, + &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); +} + +static void mes_v12_1_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt) +{ + /* + * GFX V12 has only one GFX pipe, but 8 queues in it. + * GFX pipe 0 queue 0 is being used by Kernel queue. + * Set GFX pipe 0 queue 1-7 for MES scheduling + * mask = 1111 1110b + */ + pkt->gfx_hqd_mask[0] = 0xFE; +} + +static int mes_v12_1_set_hw_resources(struct amdgpu_mes *mes, + int pipe, int xcc_id) +{ + int i; + struct amdgpu_device *adev = mes->adev; + union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt; + + memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); + + mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; + mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + if (pipe == AMDGPU_MES_SCHED_PIPE) { + mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; + mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; + mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; + mes_set_hw_res_pkt.paging_vmid = 0; + + for (i = 0; i < MAX_COMPUTE_PIPES; i++) + mes_set_hw_res_pkt.compute_hqd_mask[i] = + mes->compute_hqd_mask[i]; + + mes_v12_1_set_gfx_hqd_mask(&mes_set_hw_res_pkt); + + for (i = 0; i < MAX_SDMA_PIPES; i++) + mes_set_hw_res_pkt.sdma_hqd_mask[i] = + mes->sdma_hqd_mask[i]; + + for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) + mes_set_hw_res_pkt.aggregated_doorbells[i] = + mes->aggregated_doorbells[i]; + } + + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = + mes->sch_ctx_gpu_addr[pipe]; + mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = + mes->query_status_fence_gpu_addr[pipe]; + + for (i = 0; i < 5; i++) { + mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; + mes_set_hw_res_pkt.mmhub_base[i] = + adev->reg_offset[MMHUB_HWIP][0][i]; + mes_set_hw_res_pkt.osssys_base[i] = + adev->reg_offset[OSSSYS_HWIP][0][i]; + } + + mes_set_hw_res_pkt.disable_reset = 1; + mes_set_hw_res_pkt.disable_mes_log = 1; + mes_set_hw_res_pkt.use_different_vmid_compute = 1; + mes_set_hw_res_pkt.enable_reg_active_poll = 1; + mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; + + /* + * Keep oversubscribe timer for sdma . When we have unmapped doorbell + * handling support, other queue will not use the oversubscribe timer. + * handling mode - 0: disabled; 1: basic version; 2: basic+ version + */ + mes_set_hw_res_pkt.oversubscription_timer = 50; + mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; + + if (amdgpu_mes_log_enable) { + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = + mes->event_log_gpu_addr + pipe * AMDGPU_MES_LOG_BUFFER_SIZE; + } + + if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) + mes_set_hw_res_pkt.limit_single_process = 1; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, + &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); +} + +static void mes_v12_1_init_aggregated_doorbell(struct amdgpu_mes *mes, + int xcc_id) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t data; + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1); + data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << + CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1, data); + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2); + data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << + CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2, data); + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3); + data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << + CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3, data); + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4); + data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << + CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4, data); + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5); + data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << + CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5, data); + + data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_GFX_CONTROL, data); +} + + +static void mes_v12_1_enable_unmapped_doorbell_handling( + struct amdgpu_mes *mes, bool enable, int xcc_id) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL); + + /* + * The default PROC_LSB settng is 0xc which means doorbell + * addr[16:12] gives the doorbell page number. For kfd, each + * process will use 2 pages of doorbell, we need to change the + * setting to 0xd + */ + data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK; + data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT; + + data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT; + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL, data); +} + +#if 0 +static int mes_v12_1_reset_legacy_queue(struct amdgpu_mes *mes, + struct mes_reset_legacy_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + int pipe; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + + if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { + mes_reset_queue_pkt.reset_legacy_gfx = 1; + mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; + mes_reset_queue_pkt.queue_id_lp = input->queue_id; + mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; + mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; + mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; + mes_reset_queue_pkt.vmid_id_lp = input->vmid; + } else { + mes_reset_queue_pkt.reset_queue_only = 1; + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + } + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_1_submit_pkt_and_poll_completion(mes, + input->xcc_id, pipe, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__RESET, api_status)); +} +#endif + +static const struct amdgpu_mes_funcs mes_v12_1_funcs = { + .add_hw_queue = mes_v12_1_add_hw_queue, + .remove_hw_queue = mes_v12_1_remove_hw_queue, + .map_legacy_queue = mes_v12_1_map_legacy_queue, + .unmap_legacy_queue = mes_v12_1_unmap_legacy_queue, + .suspend_gang = mes_v12_1_suspend_gang, + .resume_gang = mes_v12_1_resume_gang, + .misc_op = mes_v12_1_misc_op, + .reset_hw_queue = mes_v12_1_reset_hw_queue, +}; + +static int mes_v12_1_allocate_ucode_buffer(struct amdgpu_device *adev, + enum amdgpu_mes_pipe pipe, + int xcc_id) +{ + int r, inst = MES_PIPE_INST(xcc_id, pipe); + const struct mes_firmware_header_v1_0 *mes_hdr; + const __le32 *fw_data; + unsigned fw_size; + + mes_hdr = (const struct mes_firmware_header_v1_0 *) + adev->mes.fw[pipe]->data; + + fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + + le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); + fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, fw_size, + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->mes.ucode_fw_obj[inst], + &adev->mes.ucode_fw_gpu_addr[inst], + (void **)&adev->mes.ucode_fw_ptr[inst]); + if (r) { + dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r); + return r; + } + + memcpy(adev->mes.ucode_fw_ptr[inst], fw_data, fw_size); + + amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[inst]); + amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[inst]); + + return 0; +} + +static int mes_v12_1_allocate_ucode_data_buffer(struct amdgpu_device *adev, + enum amdgpu_mes_pipe pipe, + int xcc_id) +{ + int r, inst = MES_PIPE_INST(xcc_id, pipe); + const struct mes_firmware_header_v1_0 *mes_hdr; + const __le32 *fw_data; + unsigned fw_size; + + mes_hdr = (const struct mes_firmware_header_v1_0 *) + adev->mes.fw[pipe]->data; + + fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + + le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); + fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); + + r = amdgpu_bo_create_reserved(adev, fw_size, + 64 * 1024, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->mes.data_fw_obj[inst], + &adev->mes.data_fw_gpu_addr[inst], + (void **)&adev->mes.data_fw_ptr[inst]); + if (r) { + dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r); + return r; + } + + memcpy(adev->mes.data_fw_ptr[inst], fw_data, fw_size); + + amdgpu_bo_kunmap(adev->mes.data_fw_obj[inst]); + amdgpu_bo_unreserve(adev->mes.data_fw_obj[inst]); + + return 0; +} + +static void mes_v12_1_free_ucode_buffers(struct amdgpu_device *adev, + enum amdgpu_mes_pipe pipe, + int xcc_id) +{ + int inst = MES_PIPE_INST(xcc_id, pipe); + + amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[inst], + &adev->mes.data_fw_gpu_addr[inst], + (void **)&adev->mes.data_fw_ptr[inst]); + + amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[inst], + &adev->mes.ucode_fw_gpu_addr[inst], + (void **)&adev->mes.ucode_fw_ptr[inst]); +} + +static void mes_v12_1_enable(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + uint64_t ucode_addr; + uint32_t pipe, data = 0; + + if (enable) { + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); + + mutex_lock(&adev->srbm_mutex); + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, + GET_INST(GC, xcc_id)); + + ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regCP_MES_PRGRM_CNTR_START, + lower_32_bits(ucode_addr)); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regCP_MES_PRGRM_CNTR_START_HI, + upper_32_bits(ucode_addr)); + } + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + + /* unhalt MES and activate pipe0 */ + data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); + + if (amdgpu_emu_mode) + msleep(500); + else if (adev->enable_uni_mes) + udelay(500); + else + udelay(50); + } else { + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, + MES_INVALIDATE_ICACHE, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); + } +} + +static void mes_v12_1_set_ucode_start_addr(struct amdgpu_device *adev, + int xcc_id) +{ + uint64_t ucode_addr; + int pipe; + + mes_v12_1_enable(adev, false, xcc_id); + + mutex_lock(&adev->srbm_mutex); + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + /* me=3, queue=0 */ + soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); + + /* set ucode start address */ + ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START, + lower_32_bits(ucode_addr)); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START_HI, + upper_32_bits(ucode_addr)); + + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + } + mutex_unlock(&adev->srbm_mutex); +} + +/* This function is for backdoor MES firmware */ +static int mes_v12_1_load_microcode(struct amdgpu_device *adev, + enum amdgpu_mes_pipe pipe, + bool prime_icache, int xcc_id) +{ + int r, inst = MES_PIPE_INST(xcc_id, pipe); + uint32_t data; + + mes_v12_1_enable(adev, false, xcc_id); + + if (!adev->mes.fw[pipe]) + return -EINVAL; + + r = mes_v12_1_allocate_ucode_buffer(adev, pipe, xcc_id); + if (r) + return r; + + r = mes_v12_1_allocate_ucode_data_buffer(adev, pipe, xcc_id); + if (r) { |
