diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-11 18:14:06 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-11 18:14:06 -0700 |
| commit | b0402403e54ae9eb94ce1cbb53c7def776e97426 (patch) | |
| tree | 64198f48106bd0909cedf604df42ad8c667ce388 /drivers/ras | |
| parent | 1f75619a721d5149d9a947f2177d3cffc473fbb7 (diff) | |
| parent | af65545a0f82d7336f62e34f69d3c644806f5f95 (diff) | |
Merge tag 'edac_updates_for_v6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull EDAC updates from Borislav Petkov:
- Add a FRU (Field Replaceable Unit) memory poison manager which
collects and manages previously encountered hw errors in order to
save them to persistent storage across reboots. Previously recorded
errors are "replayed" upon reboot in order to poison memory which has
caused said errors in the past.
The main use case is stacked, on-chip memory which cannot simply be
replaced so poisoning faulty areas of it and thus making them
inaccessible is the only strategy to prolong its lifetime.
- Add an AMD address translation library glue which converts the
reported addresses of hw errors into system physical addresses in
order to be used by other subsystems like memory failure, for
example. Add support for MI300 accelerators to that library.
- igen6: Add support for Alder Lake-N SoC
- i10nm: Add Grand Ridge support
- The usual fixlets and cleanups
* tag 'edac_updates_for_v6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
EDAC/versal: Convert to platform remove callback returning void
RAS/AMD/FMPM: Fix off by one when unwinding on error
RAS/AMD/FMPM: Add debugfs interface to print record entries
RAS/AMD/FMPM: Save SPA values
RAS: Export helper to get ras_debugfs_dir
RAS/AMD/ATL: Fix bit overflow in denorm_addr_df4_np2()
RAS: Introduce a FRU memory poison manager
RAS/AMD/ATL: Add MI300 row retirement support
Documentation: Move RAS section to admin-guide
EDAC/versal: Make the bit position of injected errors configurable
EDAC/i10nm: Add Intel Grand Ridge micro-server support
EDAC/igen6: Add one more Intel Alder Lake-N SoC support
RAS/AMD/ATL: Add MI300 DRAM to normalized address translation support
RAS/AMD/ATL: Fix array overflow in get_logical_coh_st_fabric_id_mi300()
RAS/AMD/ATL: Add MI300 support
Documentation: RAS: Add index and address translation section
EDAC/amd64: Use new AMD Address Translation Library
RAS: Introduce AMD Address Translation Library
EDAC/synopsys: Convert to devm_platform_ioremap_resource()
Diffstat (limited to 'drivers/ras')
| -rw-r--r-- | drivers/ras/Kconfig | 13 | ||||
| -rw-r--r-- | drivers/ras/Makefile | 3 | ||||
| -rw-r--r-- | drivers/ras/amd/atl/Kconfig | 21 | ||||
| -rw-r--r-- | drivers/ras/amd/atl/Makefile | 18 | ||||
| -rw-r--r-- | drivers/ras/amd/atl/access.c | 133 | ||||
| -rw-r--r-- | drivers/ras/amd/atl/core.c | 225 | ||||
| -rw-r--r-- | drivers/ras/amd/atl/dehash.c | 500 | ||||
| -rw-r--r-- | drivers/ras/amd/atl/denormalize.c | 718 | ||||
| -rw-r--r-- | drivers/ras/amd/atl/internal.h | 306 | ||||
| -rw-r--r-- | drivers/ras/amd/atl/map.c | 682 | ||||
| -rw-r--r-- | drivers/ras/amd/atl/reg_fields.h | 606 | ||||
| -rw-r--r-- | drivers/ras/amd/atl/system.c | 288 | ||||
| -rw-r--r-- | drivers/ras/amd/atl/umc.c | 341 | ||||
| -rw-r--r-- | drivers/ras/amd/fmpm.c | 1013 | ||||
| -rw-r--r-- | drivers/ras/cec.c | 10 | ||||
| -rw-r--r-- | drivers/ras/debugfs.c | 8 | ||||
| -rw-r--r-- | drivers/ras/debugfs.h | 2 | ||||
| -rw-r--r-- | drivers/ras/ras.c | 31 |
18 files changed, 4914 insertions, 4 deletions
diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig index c2a236f2e846..fc4f4bb94a4c 100644 --- a/drivers/ras/Kconfig +++ b/drivers/ras/Kconfig @@ -32,5 +32,18 @@ menuconfig RAS if RAS source "arch/x86/ras/Kconfig" +source "drivers/ras/amd/atl/Kconfig" + +config RAS_FMPM + tristate "FRU Memory Poison Manager" + default m + depends on AMD_ATL && ACPI_APEI + help + Support saving and restoring memory error information across reboot + using ACPI ERST as persistent storage. Error information is saved with + the UEFI CPER "FRU Memory Poison" section format. + + Memory will be retired during boot time and run time depending on + platform-specific policies. endif diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile index 6f0404f50107..11f95d59d397 100644 --- a/drivers/ras/Makefile +++ b/drivers/ras/Makefile @@ -2,3 +2,6 @@ obj-$(CONFIG_RAS) += ras.o obj-$(CONFIG_DEBUG_FS) += debugfs.o obj-$(CONFIG_RAS_CEC) += cec.o + +obj-$(CONFIG_RAS_FMPM) += amd/fmpm.o +obj-y += amd/atl/ diff --git a/drivers/ras/amd/atl/Kconfig b/drivers/ras/amd/atl/Kconfig new file mode 100644 index 000000000000..df49c23e7f62 --- /dev/null +++ b/drivers/ras/amd/atl/Kconfig @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# AMD Address Translation Library Kconfig +# +# Copyright (c) 2023, Advanced Micro Devices, Inc. +# All Rights Reserved. +# +# Author: Yazen Ghannam <Yazen.Ghannam@amd.com> + +config AMD_ATL + tristate "AMD Address Translation Library" + depends on AMD_NB && X86_64 && RAS + depends on MEMORY_FAILURE + default N + help + This library includes support for implementation-specific + address translation procedures needed for various error + handling cases. + + Enable this option if using DRAM ECC on Zen-based systems + and OS-based error handling. diff --git a/drivers/ras/amd/atl/Makefile b/drivers/ras/amd/atl/Makefile new file mode 100644 index 000000000000..4acd5f05bd9c --- /dev/null +++ b/drivers/ras/amd/atl/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# AMD Address Translation Library Makefile +# +# Copyright (c) 2023, Advanced Micro Devices, Inc. +# All Rights Reserved. +# +# Author: Yazen Ghannam <Yazen.Ghannam@amd.com> + +amd_atl-y := access.o +amd_atl-y += core.o +amd_atl-y += dehash.o +amd_atl-y += denormalize.o +amd_atl-y += map.o +amd_atl-y += system.o +amd_atl-y += umc.o + +obj-$(CONFIG_AMD_ATL) += amd_atl.o diff --git a/drivers/ras/amd/atl/access.c b/drivers/ras/amd/atl/access.c new file mode 100644 index 000000000000..ee4661ed28ba --- /dev/null +++ b/drivers/ras/amd/atl/access.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * access.c : DF Indirect Access functions + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam <Yazen.Ghannam@amd.com> + */ + +#include "internal.h" + +/* Protect the PCI config register pairs used for DF indirect access. */ +static DEFINE_MUTEX(df_indirect_mutex); + +/* + * Data Fabric Indirect Access uses FICAA/FICAD. + * + * Fabric Indirect Configuration Access Address (FICAA): constructed based + * on the device's Instance Id and the PCI function and register offset of + * the desired register. + * + * Fabric Indirect Configuration Access Data (FICAD): there are FICAD + * low and high registers but so far only the low register is needed. + * + * Use Instance Id 0xFF to indicate a broadcast read. + */ +#define DF_BROADCAST 0xFF + +#define DF_FICAA_INST_EN BIT(0) +#define DF_FICAA_REG_NUM GENMASK(10, 1) +#define DF_FICAA_FUNC_NUM GENMASK(13, 11) +#define DF_FICAA_INST_ID GENMASK(23, 16) + +#define DF_FICAA_REG_NUM_LEGACY GENMASK(10, 2) + +static u16 get_accessible_node(u16 node) +{ + /* + * On heterogeneous systems, not all AMD Nodes are accessible + * through software-visible registers. The Node ID needs to be + * adjusted for register accesses. But its value should not be + * changed for the translation methods. + */ + if (df_cfg.flags.heterogeneous) { + /* Only Node 0 is accessible on DF3.5 systems. */ + if (df_cfg.rev == DF3p5) + node = 0; + + /* + * Only the first Node in each Socket is accessible on + * DF4.5 systems, and this is visible to software as one + * Fabric per Socket. The Socket ID can be derived from + * the Node ID and global shift values. + */ + if (df_cfg.rev == DF4p5) + node >>= df_cfg.socket_id_shift - df_cfg.node_id_shift; + } + + return node; +} + +static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) +{ + u32 ficaa_addr = 0x8C, ficad_addr = 0xB8; + struct pci_dev *F4; + int err = -ENODEV; + u32 ficaa = 0; + + node = get_accessible_node(node); + if (node >= amd_nb_num()) + goto out; + + F4 = node_to_amd_nb(node)->link; + if (!F4) + goto out; + + /* Enable instance-specific access. */ + if (instance_id != DF_BROADCAST) { + ficaa |= FIELD_PREP(DF_FICAA_INST_EN, 1); + ficaa |= FIELD_PREP(DF_FICAA_INST_ID, instance_id); + } + + /* + * The two least-significant bits are masked when inputing the + * register offset to FICAA. + */ + reg >>= 2; + + if (df_cfg.flags.legacy_ficaa) { + ficaa_addr = 0x5C; + ficad_addr = 0x98; + + ficaa |= FIELD_PREP(DF_FICAA_REG_NUM_LEGACY, reg); + } else { + ficaa |= FIELD_PREP(DF_FICAA_REG_NUM, reg); + } + + ficaa |= FIELD_PREP(DF_FICAA_FUNC_NUM, func); + + mutex_lock(&df_indirect_mutex); + + err = pci_write_config_dword(F4, ficaa_addr, ficaa); + if (err) { + pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa); + goto out_unlock; + } + + err = pci_read_config_dword(F4, ficad_addr, lo); + if (err) + pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa); + + pr_debug("node=%u inst=0x%x func=0x%x reg=0x%x val=0x%x", + node, instance_id, func, reg << 2, *lo); + +out_unlock: + mutex_unlock(&df_indirect_mutex); + +out: + return err; +} + +int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) +{ + return __df_indirect_read(node, func, reg, instance_id, lo); +} + +int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo) +{ + return __df_indirect_read(node, func, reg, DF_BROADCAST, lo); +} diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c new file mode 100644 index 000000000000..6dc4e06305f7 --- /dev/null +++ b/drivers/ras/amd/atl/core.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * core.c : Module init and base translation functions + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam <Yazen.Ghannam@amd.com> + */ + +#include <linux/module.h> +#include <asm/cpu_device_id.h> + +#include "internal.h" + +struct df_config df_cfg __read_mostly; + +static int addr_over_limit(struct addr_ctx *ctx) +{ + u64 dram_limit_addr; + + if (df_cfg.rev >= DF4) + dram_limit_addr = FIELD_GET(DF4_DRAM_LIMIT_ADDR, ctx->map.limit); + else + dram_limit_addr = FIELD_GET(DF2_DRAM_LIMIT_ADDR, ctx->map.limit); + + dram_limit_addr <<= DF_DRAM_BASE_LIMIT_LSB; + dram_limit_addr |= GENMASK(DF_DRAM_BASE_LIMIT_LSB - 1, 0); + + /* Is calculated system address above DRAM limit address? */ + if (ctx->ret_addr > dram_limit_addr) { + atl_debug(ctx, "Calculated address (0x%016llx) > DRAM limit (0x%016llx)", + ctx->ret_addr, dram_limit_addr); + return -EINVAL; + } + + return 0; +} + +static bool legacy_hole_en(struct addr_ctx *ctx) +{ + u32 reg = ctx->map.base; + + if (df_cfg.rev >= DF4) + reg = ctx->map.ctl; + + return FIELD_GET(DF_LEGACY_MMIO_HOLE_EN, reg); +} + +static int add_legacy_hole(struct addr_ctx *ctx) +{ + u32 dram_hole_base; + u8 func = 0; + + if (!legacy_hole_en(ctx)) + return 0; + + if (df_cfg.rev >= DF4) + func = 7; + + if (df_indirect_read_broadcast(ctx->node_id, func, 0x104, &dram_hole_base)) + return -EINVAL; + + dram_hole_base &= DF_DRAM_HOLE_BASE_MASK; + + if (ctx->ret_addr >= dram_hole_base) + ctx->ret_addr += (BIT_ULL(32) - dram_hole_base); + + return 0; +} + +static u64 get_base_addr(struct addr_ctx *ctx) +{ + u64 base_addr; + + if (df_cfg.rev >= DF4) + base_addr = FIELD_GET(DF4_BASE_ADDR, ctx->map.base); + else + base_addr = FIELD_GET(DF2_BASE_ADDR, ctx->map.base); + + return base_addr << DF_DRAM_BASE_LIMIT_LSB; +} + +static int add_base_and_hole(struct addr_ctx *ctx) +{ + ctx->ret_addr += get_base_addr(ctx); + + if (add_legacy_hole(ctx)) + return -EINVAL; + + return 0; +} + +static bool late_hole_remove(struct addr_ctx *ctx) +{ + if (df_cfg.rev == DF3p5) + return true; + + if (df_cfg.rev == DF4) + return true; + + if (ctx->map.intlv_mode == DF3_6CHAN) + return true; + + return false; +} + +unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr) +{ + struct addr_ctx ctx; + + if (df_cfg.rev == UNKNOWN) + return -EINVAL; + + memset(&ctx, 0, sizeof(ctx)); + + /* Start from the normalized address */ + ctx.ret_addr = addr; + ctx.inst_id = coh_st_inst_id; + + ctx.inputs.norm_addr = addr; + ctx.inputs.socket_id = socket_id; + ctx.inputs.die_id = die_id; + ctx.inputs.coh_st_inst_id = coh_st_inst_id; + + if (determine_node_id(&ctx, socket_id, die_id)) + return -EINVAL; + + if (get_address_map(&ctx)) + return -EINVAL; + + if (denormalize_address(&ctx)) + return -EINVAL; + + if (!late_hole_remove(&ctx) && add_base_and_hole(&ctx)) + return -EINVAL; + + if (dehash_address(&ctx)) + return -EINVAL; + + if (late_hole_remove(&ctx) && add_base_and_hole(&ctx)) + return -EINVAL; + + if (addr_over_limit(&ctx)) + return -EINVAL; + + return ctx.ret_addr; +} + +static void check_for_legacy_df_access(void) +{ + /* + * All Zen-based systems before Family 19h use the legacy + * DF Indirect Access (FICAA/FICAD) offsets. + */ + if (boot_cpu_data.x86 < 0x19) { + df_cfg.flags.legacy_ficaa = true; + return; + } + + /* All systems after Family 19h use the current offsets. */ + if (boot_cpu_data.x86 > 0x19) + return; + + /* Some Family 19h systems use the legacy offsets. */ + switch (boot_cpu_data.x86_model) { + case 0x00 ... 0x0f: + case 0x20 ... 0x5f: + df_cfg.flags.legacy_ficaa = true; + } +} + +/* + * This library provides functionality for AMD-based systems with a Data Fabric. + * The set of systems with a Data Fabric is equivalent to the set of Zen-based systems + * and the set of systems with the Scalable MCA feature at this time. However, these + * are technically independent things. + * + * It's possible to match on the PCI IDs of the Data Fabric devices, but this will be + * an ever expanding list. Instead, match on the SMCA and Zen features to cover all + * relevant systems. + */ +static const struct x86_cpu_id amd_atl_cpuids[] = { + X86_MATCH_FEATURE(X86_FEATURE_SMCA, NULL), + X86_MATCH_FEATURE(X86_FEATURE_ZEN, NULL), + { } +}; +MODULE_DEVICE_TABLE(x86cpu, amd_atl_cpuids); + +static int __init amd_atl_init(void) +{ + if (!x86_match_cpu(amd_atl_cpuids)) + return -ENODEV; + + if (!amd_nb_num()) + return -ENODEV; + + check_for_legacy_df_access(); + + if (get_df_system_info()) + return -ENODEV; + + /* Increment this module's recount so that it can't be easily unloaded. */ + __module_get(THIS_MODULE); + amd_atl_register_decoder(convert_umc_mca_addr_to_sys_addr); + + pr_info("AMD Address Translation Library initialized"); + return 0; +} + +/* + * Exit function is only needed for testing and debug. Module unload must be + * forced to override refcount check. + */ +static void __exit amd_atl_exit(void) +{ + amd_atl_unregister_decoder(); +} + +module_init(amd_atl_init); +module_exit(amd_atl_exit); + +MODULE_LICENSE("GPL"); diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c new file mode 100644 index 000000000000..4ea46262c4f5 --- /dev/null +++ b/drivers/ras/amd/atl/dehash.c @@ -0,0 +1,500 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * dehash.c : Functions to account for hashing bits + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam <Yazen.Ghannam@amd.com> + */ + +#include "internal.h" + +/* + * Verify the interleave bits are correct in the different interleaving + * settings. + * + * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the + * respective interleaving is disabled. + */ +static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2, + u8 num_intlv_dies, u8 num_intlv_sockets) +{ + if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) { + pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos); + return false; + } + + if (ctx->map.num_intlv_dies > num_intlv_dies) { + pr_debug("Invalid number of interleave dies: %u", ctx->map.num_intlv_dies); + return false; + } + + if (ctx->map.num_intlv_sockets > num_intlv_sockets) { + pr_debug("Invalid number of interleave sockets: %u", ctx->map.num_intlv_sockets); + return false; + } + + return true; +} + +static int df2_dehash_addr(struct addr_ctx *ctx) +{ + u8 hashed_bit, intlv_bit, intlv_bit_pos; + + if (!map_bits_valid(ctx, 8, 9, 1, 1)) + return -EINVAL; + + intlv_bit_pos = ctx->map.intlv_bit_pos; + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(12), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr); + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + return 0; +} + +static int df3_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; + u8 hashed_bit, intlv_bit, intlv_bit_pos; + + if (!map_bits_valid(ctx, 8, 9, 1, 1)) + return -EINVAL; + + hash_ctl_64k = FIELD_GET(DF3_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF3_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF3_HASH_CTL_1G, ctx->map.ctl); + + intlv_bit_pos = ctx->map.intlv_bit_pos; + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + /* Calculation complete for 2 channels. Continue for 4 and 8 channels. */ + if (ctx->map.intlv_mode == DF3_COD4_2CHAN_HASH) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(12), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(12); + + /* Calculation complete for 4 channels. Continue for 8 channels. */ + if (ctx->map.intlv_mode == DF3_COD2_4CHAN_HASH) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(13), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(13); + + return 0; +} + +static int df3_6chan_dehash_addr(struct addr_ctx *ctx) +{ + u8 intlv_bit_pos = ctx->map.intlv_bit_pos; + u8 hashed_bit, intlv_bit, num_intlv_bits; + bool hash_ctl_2M, hash_ctl_1G; + + if (ctx->map.intlv_mode != DF3_6CHAN) { + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + } + + num_intlv_bits = ilog2(ctx->map.num_intlv_chan) + 1; + + hash_ctl_2M = FIELD_GET(DF3_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF3_HASH_CTL_1G, ctx->map.ctl); + + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= !!(BIT_ULL(intlv_bit_pos + num_intlv_bits) & ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + intlv_bit_pos++; + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + intlv_bit_pos++; + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + return 0; +} + +static int df4_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; + u8 hashed_bit, intlv_bit; + + if (!map_bits_valid(ctx, 8, 8, 1, 2)) + return -EINVAL; + + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + + intlv_bit = FIELD_GET(BIT_ULL(8), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + + if (ctx->map.num_intlv_sockets == 1) + hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr); + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(8); + + /* + * Hashing is possible with socket interleaving, so check the total number + * of channels in the system rather than DRAM map interleaving mode. + * + * Calculation complete for 2 channels. Continue for 4, 8, and 16 channels. + */ + if (ctx->map.total_intlv_chan <= 2) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(12), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(12); + + /* Calculation complete for 4 channels. Continue for 8 and 16 channels. */ + if (ctx->map.total_intlv_chan <= 4) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(13), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(13); + + /* Calculation complete for 8 channels. Continue for 16 channels. */ + if (ctx->map.total_intlv_chan <= 8) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(14), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(19), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(24), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(33), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(14); + + return 0; +} + +static int df4p5_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T; + u8 hashed_bit, intlv_bit; + u64 rehash_vector; + + if (!map_bits_valid(ctx, 8, 8, 1, 2)) + return -EINVAL; + + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); + + /* + * Generate a unique address to determine which bits + * need to be dehashed. + * + * Start with a contiguous bitmask for the total + * number of channels starting at bit 8. + * + * Then make a gap in the proper place based on + * interleave mode. + */ + rehash_vector = ctx->map.total_intlv_chan - 1; + rehash_vector <<= 8; + + if (ctx->map.intlv_mode == DF4p5_NPS2_4CHAN_1K_HASH || + ctx->map.intlv_mode == DF4p5_NPS1_8CHAN_1K_HASH || + ctx->map.intlv_mode == DF4p5_NPS1_16CHAN_1K_HASH) + rehash_vector = expand_bits(10, 2, rehash_vector); + else + rehash_vector = expand_bits(9, 3, rehash_vector); + + if (rehash_vector & BIT_ULL(8)) { + intlv_bit = FIELD_GET(BIT_ULL(8), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(40), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(8); + } + + if (rehash_vector & BIT_ULL(9)) { + intlv_bit = FIELD_GET(BIT_ULL(9), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(41), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(9); + } + + if (rehash_vector & BIT_ULL(12)) { + intlv_bit = FIELD_GET(BIT_ULL(12), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(42), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(12); + } + + if (rehash_vector & BIT_ULL(13)) { + intlv_bit = FIELD_GET(BIT_ULL(13), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(19), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(24), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(33), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(43), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(13); + } + + if (rehash_vector & BIT_ULL(14)) { + intlv_bit = FIELD_GET(BIT_ULL(14), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(20), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(25), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(34), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(44), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(14); + } + + return 0; +} + +/* + * MI300 hash bits + * 4K 64K 2M 1G 1T 1T + * COH_ST_Select[0] = XOR of addr{8, 12, 15, 22, 29, 36, 43} + * COH_ST_Select[1] = XOR of addr{9, 13, 16, 23, 30, 37, 44} + * COH_ST_Select[2] = XOR of addr{10, 14, 17, 24, 31, 38, 45} + * COH_ST_Select[3] = XOR of addr{11, 18, 25, 32, 39, 46} + * COH_ST_Select[4] = XOR of addr{14, 19, 26, 33, 40, 47} aka Stack + * DieID[0] = XOR of addr{12, 20, 27, 34, 41 } + * DieID[1] = XOR of addr{13, 21, 28, 35, 42 } + */ +static int mi300_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_4k, hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T; + bool hashed_bit, intlv_bit, test_bit; + u8 num_intlv_bits, base_bit, i; + + if (!map_bits_valid(ctx, 8, 8, 4, 1)) + return -EINVAL; + + hash_ctl_4k = FIELD_GET(DF4p5_HASH_CTL_4K, ctx->map.ctl); + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); + + /* Channel bits */ + num_intlv_bits = ilog2(ctx->map.num_intlv_chan); + + for (i = 0; i < num_intlv_bits; i++) { + base_bit = 8 + i; + + /* COH_ST_Select[4] jumps to a base bit of 14. */ + if (i == 4) + base_bit = 14; + + intlv_bit = BIT_ULL(base_bit) & ctx->ret_addr; + + hashed_bit = intlv_bit; + + /* 4k hash bit only applies to the first 3 bits. */ + if (i <= 2) { + test_bit = BIT_ULL(12 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_4k; + } + + /* Use temporary 'test_bit' value to avoid Sparse warnings. */ + test_bit = BIT_ULL(15 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_64k; + test_bit = BIT_ULL(22 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_2M; + test_bit = BIT_ULL(29 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1G; + test_bit = BIT_ULL(36 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1T; + test_bit = BIT_ULL(43 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(base_bit); + } + + /* Die bits */ + num_intlv_bits = ilog2(ctx->map.num_intlv_dies); + + for (i = 0; i < num_intlv_bits; i++) { + base_bit = 12 + i; + + intlv_bit = BIT_ULL(base_bit) & ctx->ret_addr; + + hashed_bit = intlv_bit; + + test_bit = BIT_ULL(20 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_64k; + test_bit = BIT_ULL(27 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_2M; + test_bit = BIT_ULL(34 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1G; + test_bit = BIT_ULL(41 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(base_bit); + } + + return 0; +} + +int dehash_address(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + /* No hashing cases. */ + case NONE: + case NOHASH_2CHAN: + case NOHASH_4CHAN: + case NOHASH_8CHAN: + case NOHASH_16CHAN: + case NOHASH_32CHAN: + /* Hashing bits handled earlier during CS ID calculation. */ + case DF4_NPS4_3CHAN_HASH: + case DF4_NPS2_5CHAN_HASH: + case DF4_NPS2_6CHAN_HASH: + case DF4_NPS1_10CHAN_HASH: + case DF4_NPS1_12CHAN_HASH: + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS0_24CHAN_2K_HASH: + /* No hash physical address bits, so nothing to do. */ + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + return 0; + + case DF2_2CHAN_HASH: + return df2_dehash_addr(ctx); |
