diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-02 10:45:50 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-02 10:45:50 -0800 |
| commit | 49219bba0149157774b7091c3ea9ad22b2114285 (patch) | |
| tree | 6a310ca45d8e7e426e457d41899396ff12670062 /drivers | |
| parent | 7f8d5f70fffe2177afcc62f02feead5827dfe8dd (diff) | |
| parent | e2349c5811ae642f6d948d1ee4bfc4a93566c178 (diff) | |
Merge tag 'edac_updates_for_v6.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull EDAC updates from Borislav Petkov:
- imh_edac: Add a new EDAC driver for Intel Diamond Rapids and future
incarnations of this memory controllers architecture
- amd64_edac: Remove the legacy csrow sysfs interface which has been
deprecated and unused (we assume) for at least a decade
- Add the capability to fallback to BIOS-provided address translation
functionality (ACPI PRM) which can be used on systems unsupported by
the current AMD address translation library
- The usual fixes, fixlets, cleanups and improvements all over the
place
* tag 'edac_updates_for_v6.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
RAS/AMD/ATL: Replace bitwise_xor_bits() with hweight16()
EDAC/igen6: Fix error handling in igen6_edac driver
EDAC/imh: Setup 'imh_test' debugfs testing node
EDAC/{skx_comm,imh}: Detect 2-level memory configuration
EDAC/skx_common: Extend the maximum number of DRAM chip row bits
EDAC/{skx_common,imh}: Add EDAC driver for Intel Diamond Rapids servers
EDAC/skx_common: Prepare for skx_set_hi_lo()
EDAC/skx_common: Prepare for skx_get_edac_list()
EDAC/{skx_common,skx,i10nm}: Make skx_register_mci() independent of pci_dev
EDAC/ghes: Replace deprecated strcpy() in ghes_edac_report_mem_error()
EDAC/ie31200: Fix error handling in ie31200_register_mci
RAS/CEC: Replace use of system_wq with system_percpu_wq
EDAC: Remove the legacy EDAC sysfs interface
EDAC/amd64: Remove NUM_CONTROLLERS macro
EDAC/amd64: Generate ctl_name string at runtime
RAS/AMD/ATL: Require PRM support for future systems
ACPI: PRM: Add acpi_prm_handler_available()
RAS/AMD/ATL: Return error codes from helper functions
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/acpi/prmt.c | 6 | ||||
| -rw-r--r-- | drivers/edac/Kconfig | 20 | ||||
| -rw-r--r-- | drivers/edac/Makefile | 3 | ||||
| -rw-r--r-- | drivers/edac/amd64_edac.c | 61 | ||||
| -rw-r--r-- | drivers/edac/amd64_edac.h | 7 | ||||
| -rw-r--r-- | drivers/edac/edac_mc_sysfs.c | 404 | ||||
| -rw-r--r-- | drivers/edac/ghes_edac.c | 7 | ||||
| -rw-r--r-- | drivers/edac/i10nm_base.c | 3 | ||||
| -rw-r--r-- | drivers/edac/ie31200_edac.c | 2 | ||||
| -rw-r--r-- | drivers/edac/igen6_edac.c | 2 | ||||
| -rw-r--r-- | drivers/edac/imh_base.c | 602 | ||||
| -rw-r--r-- | drivers/edac/skx_base.c | 4 | ||||
| -rw-r--r-- | drivers/edac/skx_common.c | 33 | ||||
| -rw-r--r-- | drivers/edac/skx_common.h | 98 | ||||
| -rw-r--r-- | drivers/ras/amd/atl/core.c | 7 | ||||
| -rw-r--r-- | drivers/ras/amd/atl/internal.h | 6 | ||||
| -rw-r--r-- | drivers/ras/amd/atl/prm.c | 4 | ||||
| -rw-r--r-- | drivers/ras/amd/atl/system.c | 30 | ||||
| -rw-r--r-- | drivers/ras/amd/atl/umc.c | 23 | ||||
| -rw-r--r-- | drivers/ras/cec.c | 2 |
20 files changed, 791 insertions, 533 deletions
diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c index 6792d4385eee..7b8b5d2015ec 100644 --- a/drivers/acpi/prmt.c +++ b/drivers/acpi/prmt.c @@ -244,6 +244,12 @@ static struct prm_handler_info *find_prm_handler(const guid_t *guid) return (struct prm_handler_info *) find_guid_info(guid, GET_HANDLER); } +bool acpi_prm_handler_available(const guid_t *guid) +{ + return find_prm_handler(guid) && find_prm_module(guid); +} +EXPORT_SYMBOL_GPL(acpi_prm_handler_available); + /* In-coming PRM commands */ #define PRM_CMD_RUN_SERVICE 0 diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 39352b9b7a7e..81e40543ffd8 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -23,14 +23,6 @@ menuconfig EDAC if EDAC -config EDAC_LEGACY_SYSFS - bool "EDAC legacy sysfs" - default y - help - Enable the compatibility sysfs nodes. - Use 'Y' if your edac utilities aren't ported to work with the newer - structures. - config EDAC_DEBUG bool "Debugging" select DEBUG_FS @@ -291,6 +283,18 @@ config EDAC_I10NM system has non-volatile DIMMs you should also manually select CONFIG_ACPI_NFIT. +config EDAC_IMH + tristate "Intel Integrated Memory/IO Hub MC" + depends on X86_64 && X86_MCE_INTEL && ACPI + depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_IMH can't be y + select DMI + select ACPI_ADXL + help + Support for error detection and correction the Intel + Integrated Memory/IO Hub Memory Controller. This MC IP is + first used on the Diamond Rapids servers but may appear on + others in the future. + config EDAC_PND2 tristate "Intel Pondicherry2" depends on PCI && X86_64 && X86_MCE_INTEL diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 1c14796410a3..8429b1e856bc 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -65,6 +65,9 @@ obj-$(CONFIG_EDAC_SKX) += skx_edac.o skx_edac_common.o i10nm_edac-y := i10nm_base.o obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o skx_edac_common.o +imh_edac-y := imh_base.o +obj-$(CONFIG_EDAC_IMH) += imh_edac.o skx_edac_common.o + obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 2f6ab783bf20..2391f3469961 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3732,6 +3732,7 @@ static void hw_info_put(struct amd64_pvt *pvt) pci_dev_put(pvt->F1); pci_dev_put(pvt->F2); kfree(pvt->umc); + kfree(pvt->csels); } static struct low_ops umc_ops = { @@ -3766,6 +3767,7 @@ static int per_family_init(struct amd64_pvt *pvt) pvt->stepping = boot_cpu_data.x86_stepping; pvt->model = boot_cpu_data.x86_model; pvt->fam = boot_cpu_data.x86; + char *tmp_name = NULL; pvt->max_mcs = 2; /* @@ -3779,7 +3781,7 @@ static int per_family_init(struct amd64_pvt *pvt) switch (pvt->fam) { case 0xf: - pvt->ctl_name = (pvt->ext_model >= K8_REV_F) ? + tmp_name = (pvt->ext_model >= K8_REV_F) ? "K8 revF or later" : "K8 revE or earlier"; pvt->f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP; pvt->f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL; @@ -3788,7 +3790,6 @@ static int per_family_init(struct amd64_pvt *pvt) break; case 0x10: - pvt->ctl_name = "F10h"; pvt->f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP; pvt->f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM; pvt->ops->dbam_to_cs = f10_dbam_to_chip_select; @@ -3797,12 +3798,10 @@ static int per_family_init(struct amd64_pvt *pvt) case 0x15: switch (pvt->model) { case 0x30: - pvt->ctl_name = "F15h_M30h"; pvt->f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1; pvt->f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2; break; case 0x60: - pvt->ctl_name = "F15h_M60h"; pvt->f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1; pvt->f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2; pvt->ops->dbam_to_cs = f15_m60h_dbam_to_chip_select; @@ -3811,7 +3810,6 @@ static int per_family_init(struct amd64_pvt *pvt) /* Richland is only client */ return -ENODEV; default: - pvt->ctl_name = "F15h"; pvt->f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1; pvt->f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2; pvt->ops->dbam_to_cs = f15_dbam_to_chip_select; @@ -3822,12 +3820,10 @@ static int per_family_init(struct amd64_pvt *pvt) case 0x16: switch (pvt->model) { case 0x30: - pvt->ctl_name = "F16h_M30h"; pvt->f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1; pvt->f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2; break; default: - pvt->ctl_name = "F16h"; pvt->f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1; pvt->f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2; break; @@ -3836,76 +3832,51 @@ static int per_family_init(struct amd64_pvt *pvt) case 0x17: switch (pvt->model) { - case 0x10 ... 0x2f: - pvt->ctl_name = "F17h_M10h"; - break; case 0x30 ... 0x3f: - pvt->ctl_name = "F17h_M30h"; pvt->max_mcs = 8; break; - case 0x60 ... 0x6f: - pvt->ctl_name = "F17h_M60h"; - break; - case 0x70 ... 0x7f: - pvt->ctl_name = "F17h_M70h"; - break; default: - pvt->ctl_name = "F17h"; break; } break; case 0x18: - pvt->ctl_name = "F18h"; break; case 0x19: switch (pvt->model) { case 0x00 ... 0x0f: - pvt->ctl_name = "F19h"; pvt->max_mcs = 8; break; case 0x10 ... 0x1f: - pvt->ctl_name = "F19h_M10h"; pvt->max_mcs = 12; pvt->flags.zn_regs_v2 = 1; break; - case 0x20 ... 0x2f: - pvt->ctl_name = "F19h_M20h"; - break; case 0x30 ... 0x3f: if (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) { - pvt->ctl_name = "MI200"; + tmp_name = "MI200"; pvt->max_mcs = 4; pvt->dram_type = MEM_HBM2; pvt->gpu_umc_base = 0x50000; pvt->ops = &gpu_ops; } else { - pvt->ctl_name = "F19h_M30h"; pvt->max_mcs = 8; } break; - case 0x50 ... 0x5f: - pvt->ctl_name = "F19h_M50h"; - break; case 0x60 ... 0x6f: - pvt->ctl_name = "F19h_M60h"; pvt->flags.zn_regs_v2 = 1; break; case 0x70 ... 0x7f: - pvt->ctl_name = "F19h_M70h"; pvt->max_mcs = 4; pvt->flags.zn_regs_v2 = 1; break; case 0x90 ... 0x9f: - pvt->ctl_name = "F19h_M90h"; pvt->max_mcs = 4; pvt->dram_type = MEM_HBM3; pvt->gpu_umc_base = 0x90000; pvt->ops = &gpu_ops; break; case 0xa0 ... 0xaf: - pvt->ctl_name = "F19h_MA0h"; pvt->max_mcs = 12; pvt->flags.zn_regs_v2 = 1; break; @@ -3915,34 +3886,22 @@ static int per_family_init(struct amd64_pvt *pvt) case 0x1A: switch (pvt->model) { case 0x00 ... 0x1f: - pvt->ctl_name = "F1Ah"; pvt->max_mcs = 12; pvt->flags.zn_regs_v2 = 1; break; case 0x40 ... 0x4f: - pvt->ctl_name = "F1Ah_M40h"; pvt->flags.zn_regs_v2 = 1; break; case 0x50 ... 0x57: - pvt->ctl_name = "F1Ah_M50h"; + case 0xc0 ... 0xc7: pvt->max_mcs = 16; pvt->flags.zn_regs_v2 = 1; break; case 0x90 ... 0x9f: - pvt->ctl_name = "F1Ah_M90h"; - pvt->max_mcs = 8; - pvt->flags.zn_regs_v2 = 1; - break; case 0xa0 ... 0xaf: - pvt->ctl_name = "F1Ah_MA0h"; pvt->max_mcs = 8; pvt->flags.zn_regs_v2 = 1; break; - case 0xc0 ... 0xc7: - pvt->ctl_name = "F1Ah_MC0h"; - pvt->max_mcs = 16; - pvt->flags.zn_regs_v2 = 1; - break; } break; @@ -3951,6 +3910,16 @@ static int per_family_init(struct amd64_pvt *pvt) return -ENODEV; } + if (tmp_name) + scnprintf(pvt->ctl_name, sizeof(pvt->ctl_name), tmp_name); + else + scnprintf(pvt->ctl_name, sizeof(pvt->ctl_name), "F%02Xh_M%02Xh", + pvt->fam, pvt->model); + + pvt->csels = kcalloc(pvt->max_mcs, sizeof(*pvt->csels), GFP_KERNEL); + if (!pvt->csels) + return -ENOMEM; + return 0; } diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index d70b8a8d0b09..1757c1b99fc8 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -96,11 +96,12 @@ /* Hardware limit on ChipSelect rows per MC and processors per system */ #define NUM_CHIPSELECTS 8 #define DRAM_RANGES 8 -#define NUM_CONTROLLERS 16 #define ON true #define OFF false +#define MAX_CTL_NAMELEN 19 + /* * PCI-defined configuration space registers */ @@ -346,7 +347,7 @@ struct amd64_pvt { u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */ /* one for each DCT/UMC */ - struct chip_select csels[NUM_CONTROLLERS]; + struct chip_select *csels; /* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */ struct dram_range ranges[DRAM_RANGES]; @@ -362,7 +363,7 @@ struct amd64_pvt { /* x4, x8, or x16 syndromes in use */ u8 ecc_sym_sz; - const char *ctl_name; + char ctl_name[MAX_CTL_NAMELEN]; u16 f1_id, f2_id; /* Maximum number of memory controllers per die/node. */ u8 max_mcs; diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 8689631f1905..091cc6aae8a9 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -115,401 +115,6 @@ static const char * const edac_caps[] = { [EDAC_S16ECD16ED] = "S16ECD16ED" }; -#ifdef CONFIG_EDAC_LEGACY_SYSFS -/* - * EDAC sysfs CSROW data structures and methods - */ - -#define to_csrow(k) container_of(k, struct csrow_info, dev) - -/* - * We need it to avoid namespace conflicts between the legacy API - * and the per-dimm/per-rank one - */ -#define DEVICE_ATTR_LEGACY(_name, _mode, _show, _store) \ - static struct device_attribute dev_attr_legacy_##_name = __ATTR(_name, _mode, _show, _store) - -struct dev_ch_attribute { - struct device_attribute attr; - unsigned int channel; -}; - -#define DEVICE_CHANNEL(_name, _mode, _show, _store, _var) \ - static struct dev_ch_attribute dev_attr_legacy_##_name = \ - { __ATTR(_name, _mode, _show, _store), (_var) } - -#define to_channel(k) (container_of(k, struct dev_ch_attribute, attr)->channel) - -/* Set of more default csrow<id> attribute show/store functions */ -static ssize_t csrow_ue_count_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sysfs_emit(data, "%u\n", csrow->ue_count); -} - -static ssize_t csrow_ce_count_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sysfs_emit(data, "%u\n", csrow->ce_count); -} - -static ssize_t csrow_size_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - int i; - u32 nr_pages = 0; - - for (i = 0; i < csrow->nr_channels; i++) - nr_pages += csrow->channels[i]->dimm->nr_pages; - return sysfs_emit(data, "%u\n", PAGES_TO_MiB(nr_pages)); -} - -static ssize_t csrow_mem_type_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sysfs_emit(data, "%s\n", edac_mem_types[csrow->channels[0]->dimm->mtype]); -} - -static ssize_t csrow_dev_type_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sysfs_emit(data, "%s\n", dev_types[csrow->channels[0]->dimm->dtype]); -} - -static ssize_t csrow_edac_mode_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sysfs_emit(data, "%s\n", edac_caps[csrow->channels[0]->dimm->edac_mode]); -} - -/* show/store functions for DIMM Label attributes */ -static ssize_t channel_dimm_label_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - unsigned int chan = to_channel(mattr); - struct rank_info *rank = csrow->channels[chan]; - - /* if field has not been initialized, there is nothing to send */ - if (!rank->dimm->label[0]) - return 0; - - return sysfs_emit(data, "%s\n", rank->dimm->label); -} - -static ssize_t channel_dimm_label_store(struct device *dev, - struct device_attribute *mattr, - const char *data, size_t count) -{ - struct csrow_info *csrow = to_csrow(dev); - unsigned int chan = to_channel(mattr); - struct rank_info *rank = csrow->channels[chan]; - size_t copy_count = count; - - if (count == 0) - return -EINVAL; - - if (data[count - 1] == '\0' || data[count - 1] == '\n') - copy_count -= 1; - - if (copy_count == 0 || copy_count >= sizeof(rank->dimm->label)) - return -EINVAL; - - memcpy(rank->dimm->label, data, copy_count); - rank->dimm->label[copy_count] = '\0'; - - return count; -} - -/* show function for dynamic chX_ce_count attribute */ -static ssize_t channel_ce_count_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - unsigned int chan = to_channel(mattr); - struct rank_info *rank = csrow->channels[chan]; - - return sysfs_emit(data, "%u\n", rank->ce_count); -} - -/* cwrow<id>/attribute files */ -DEVICE_ATTR_LEGACY(size_mb, S_IRUGO, csrow_size_show, NULL); -DEVICE_ATTR_LEGACY(dev_type, S_IRUGO, csrow_dev_type_show, NULL); -DEVICE_ATTR_LEGACY(mem_type, S_IRUGO, csrow_mem_type_show, NULL); -DEVICE_ATTR_LEGACY(edac_mode, S_IRUGO, csrow_edac_mode_show, NULL); -DEVICE_ATTR_LEGACY(ue_count, S_IRUGO, csrow_ue_count_show, NULL); -DEVICE_ATTR_LEGACY(ce_count, S_IRUGO, csrow_ce_count_show, NULL); - -/* default attributes of the CSROW<id> object */ -static struct attribute *csrow_attrs[] = { - &dev_attr_legacy_dev_type.attr, - &dev_attr_legacy_mem_type.attr, - &dev_attr_legacy_edac_mode.attr, - &dev_attr_legacy_size_mb.attr, - &dev_attr_legacy_ue_count.attr, - &dev_attr_legacy_ce_count.attr, - NULL, -}; - -static const struct attribute_group csrow_attr_grp = { - .attrs = csrow_attrs, -}; - -static const struct attribute_group *csrow_attr_groups[] = { - &csrow_attr_grp, - NULL -}; - -static const struct device_type csrow_attr_type = { - .groups = csrow_attr_groups, -}; - -/* - * possible dynamic channel DIMM Label attribute files - * - */ -DEVICE_CHANNEL(ch0_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 0); -DEVICE_CHANNEL(ch1_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 1); -DEVICE_CHANNEL(ch2_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 2); -DEVICE_CHANNEL(ch3_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 3); -DEVICE_CHANNEL(ch4_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 4); -DEVICE_CHANNEL(ch5_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 5); -DEVICE_CHANNEL(ch6_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 6); -DEVICE_CHANNEL(ch7_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 7); -DEVICE_CHANNEL(ch8_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 8); -DEVICE_CHANNEL(ch9_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 9); -DEVICE_CHANNEL(ch10_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 10); -DEVICE_CHANNEL(ch11_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 11); -DEVICE_CHANNEL(ch12_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 12); -DEVICE_CHANNEL(ch13_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 13); -DEVICE_CHANNEL(ch14_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 14); -DEVICE_CHANNEL(ch15_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 15); - -/* Total possible dynamic DIMM Label attribute file table */ -static struct attribute *dynamic_csrow_dimm_attr[] = { - &dev_attr_legacy_ch0_dimm_label.attr.attr, - &dev_attr_legacy_ch1_dimm_label.attr.attr, - &dev_attr_legacy_ch2_dimm_label.attr.attr, - &dev_attr_legacy_ch3_dimm_label.attr.attr, - &dev_attr_legacy_ch4_dimm_label.attr.attr, - &dev_attr_legacy_ch5_dimm_label.attr.attr, - &dev_attr_legacy_ch6_dimm_label.attr.attr, - &dev_attr_legacy_ch7_dimm_label.attr.attr, - &dev_attr_legacy_ch8_dimm_label.attr.attr, - &dev_attr_legacy_ch9_dimm_label.attr.attr, - &dev_attr_legacy_ch10_dimm_label.attr.attr, - &dev_attr_legacy_ch11_dimm_label.attr.attr, - &dev_attr_legacy_ch12_dimm_label.attr.attr, - &dev_attr_legacy_ch13_dimm_label.attr.attr, - &dev_attr_legacy_ch14_dimm_label.attr.attr, - &dev_attr_legacy_ch15_dimm_label.attr.attr, - NULL -}; - -/* possible dynamic channel ce_count attribute files */ -DEVICE_CHANNEL(ch0_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 0); -DEVICE_CHANNEL(ch1_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 1); -DEVICE_CHANNEL(ch2_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 2); -DEVICE_CHANNEL(ch3_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 3); -DEVICE_CHANNEL(ch4_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 4); -DEVICE_CHANNEL(ch5_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 5); -DEVICE_CHANNEL(ch6_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 6); -DEVICE_CHANNEL(ch7_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 7); -DEVICE_CHANNEL(ch8_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 8); -DEVICE_CHANNEL(ch9_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 9); -DEVICE_CHANNEL(ch10_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 10); -DEVICE_CHANNEL(ch11_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 11); -DEVICE_CHANNEL(ch12_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 12); -DEVICE_CHANNEL(ch13_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 13); -DEVICE_CHANNEL(ch14_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 14); -DEVICE_CHANNEL(ch15_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 15); - -/* Total possible dynamic ce_count attribute file table */ -static struct attribute *dynamic_csrow_ce_count_attr[] = { - &dev_attr_legacy_ch0_ce_count.attr.attr, - &dev_attr_legacy_ch1_ce_count.attr.attr, - &dev_attr_legacy_ch2_ce_count.attr.attr, - &dev_attr_legacy_ch3_ce_count.attr.attr, - &dev_attr_legacy_ch4_ce_count.attr.attr, - &dev_attr_legacy_ch5_ce_count.attr.attr, - &dev_attr_legacy_ch6_ce_count.attr.attr, - &dev_attr_legacy_ch7_ce_count.attr.attr, - &dev_attr_legacy_ch8_ce_count.attr.attr, - &dev_attr_legacy_ch9_ce_count.attr.attr, - &dev_attr_legacy_ch10_ce_count.attr.attr, - &dev_attr_legacy_ch11_ce_count.attr.attr, - &dev_attr_legacy_ch12_ce_count.attr.attr, - &dev_attr_legacy_ch13_ce_count.attr.attr, - &dev_attr_legacy_ch14_ce_count.attr.attr, - &dev_attr_legacy_ch15_ce_count.attr.attr, - NULL -}; - -static umode_t csrow_dev_is_visible(struct kobject *kobj, - struct attribute *attr, int idx) -{ - struct device *dev = kobj_to_dev(kobj); - struct csrow_info *csrow = container_of(dev, struct csrow_info, dev); - - if (idx >= csrow->nr_channels) - return 0; - - if (idx >= ARRAY_SIZE(dynamic_csrow_ce_count_attr) - 1) { - WARN_ONCE(1, "idx: %d\n", idx); - return 0; - } - - /* Only expose populated DIMMs */ - if (!csrow->channels[idx]->dimm->nr_pages) - return 0; - - return attr->mode; -} - - -static const struct attribute_group csrow_dev_dimm_group = { - .attrs = dynamic_csrow_dimm_attr, - .is_visible = csrow_dev_is_visible, -}; - -static const struct attribute_group csrow_dev_ce_count_group = { - .attrs = dynamic_csrow_ce_count_attr, - .is_visible = csrow_dev_is_visible, -}; - -static const struct attribute_group *csrow_dev_groups[] = { - &csrow_dev_dimm_group, - &csrow_dev_ce_count_group, - NULL -}; - -static void csrow_release(struct device *dev) -{ - /* - * Nothing to do, just unregister sysfs here. The mci - * device owns the data and will also release it. - */ -} - -static inline int nr_pages_per_csrow(struct csrow_info *csrow) -{ - int chan, nr_pages = 0; - - for (chan = 0; chan < csrow->nr_channels; chan++) - nr_pages += csrow->channels[chan]->dimm->nr_pages; - - return nr_pages; -} - -/* Create a CSROW object under specified edac_mc_device */ -static int edac_create_csrow_object(struct mem_ctl_info *mci, - struct csrow_info *csrow, int index) -{ - int err; - - csrow->dev.type = &csrow_attr_type; - csrow->dev.groups = csrow_dev_groups; - csrow->dev.release = csrow_release; - device_initialize(&csrow->dev); - csrow->dev.parent = &mci->dev; - csrow->mci = mci; - dev_set_name(&csrow->dev, "csrow%d", index); - dev_set_drvdata(&csrow->dev, csrow); - - err = device_add(&csrow->dev); - if (err) { - edac_dbg(1, "failure: create device %s\n", dev_name(&csrow->dev)); - put_device(&csrow->dev); - return err; - } - - edac_dbg(0, "device %s created\n", dev_name(&csrow->dev)); - - return 0; -} - -/* Create a CSROW object under specified edac_mc_device */ -static int edac_create_csrow_objects(struct mem_ctl_info *mci) -{ - int err, i; - struct csrow_info *csrow; - - for (i = 0; i < mci->nr_csrows; i++) { - csrow = mci->csrows[i]; - if (!nr_pages_per_csrow(csrow)) - continue; - err = edac_create_csrow_object(mci, mci->csrows[i], i); - if (err < 0) - goto error; - } - return 0; - -error: - for (--i; i >= 0; i--) { - if (device_is_registered(&mci->csrows[i]->dev)) - device_unregister(&mci->csrows[i]->dev); - } - - return err; -} - -static void edac_delete_csrow_objects(struct mem_ctl_info *mci) -{ - int i; - - for (i = 0; i < mci->nr_csrows; i++) { - if (device_is_registered(&mci->csrows[i]->dev)) - device_unregister(&mci->csrows[i]->dev); - } -} - -#endif - /* * Per-dimm (or per-rank) devices */ @@ -989,12 +594,6 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, goto fail; } -#ifdef CONFIG_EDAC_LEGACY_SYSFS - err = edac_create_csrow_objects(mci); - if (err < 0) - goto fail; -#endif - edac_create_debugfs_nodes(mci); return 0; @@ -1019,9 +618,6 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) #ifdef CONFIG_EDAC_DEBUG edac_debugfs_remove_recursive(mci->debugfs); #endif -#ifdef CONFIG_EDAC_LEGACY_SYSFS - edac_delete_csrow_objects(mci); -#endif mci_for_each_dimm(mci, dimm) { if (!device_is_registered(&dimm->dev)) diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 1eb0136c6fbd..d80c88818691 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -15,6 +15,7 @@ #include "edac_module.h" #include <ras/ras_event.h> #include <linux/notifier.h> +#include <linux/string.h> #define OTHER_DETAIL_LEN 400 @@ -332,7 +333,7 @@ static int ghes_edac_report_mem_error(struct notifier_block *nb, p = pvt->msg; p += snprintf(p, sizeof(pvt->msg), "%s", cper_mem_err_type_str(etype)); } else { - strcpy(pvt->msg, "unknown error"); + strscpy(pvt->msg, "unknown error"); } /* Error address */ @@ -357,14 +358,14 @@ static int ghes_edac_report_mem_error(struct notifier_block *nb, dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle); if (dimm) { e->top_layer = dimm->idx; - strcpy(e->label, dimm->label); + strscpy(e->label, dimm->label); } } if (p > e->location) *(p - 1) = '\0'; if (!*e->label) - strcpy(e->label, "unknown memory"); + strscpy(e->label, "unknown memory"); /* All other fields are mapped on e->other_detail */ p = pvt->other_detail; diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 2010a47149f4..89b3e8cc38b1 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -1198,7 +1198,8 @@ static int __init i10nm_init(void) d->imc[i].num_dimms = cfg->ddr_dimm_num; } - rc = skx_register_mci(&d->imc[i], d->imc[i].mdev, + rc = skx_register_mci(&d->imc[i], &d->imc[i].mdev->dev, + pci_name(d->imc[i].mdev), "Intel_10nm Socket", EDAC_MOD_STR, i10nm_get_dimm_config, cfg); if (rc < 0) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 5a080ab65476..8d4ddaa85ae8 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -526,6 +526,7 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, in ie31200_pvt.priv[mc] = priv; return 0; fail_unmap: + put_device(&priv->dev); iounmap(window); fail_free: edac_mc_free(mci); @@ -598,6 +599,7 @@ static void ie31200_unregister_mcis(void) mci = priv->mci; edac_mc_del_mc(mci->pdev); iounmap(priv->window); + put_device(&priv->dev); edac_mc_free(mci); } } diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index 2fc59f9eed69..553c31a2d922 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -1300,6 +1300,7 @@ static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev imc->mci = mci; return 0; fail3: + put_device(&imc->dev); mci->pvt_info = NULL; kfree(mci->ctl_name); fail2: @@ -1326,6 +1327,7 @@ static void igen6_unregister_mcis(void) kfree(mci->ctl_name); mci->pvt_info = NULL; edac_mc_free(mci); + put_device(&imc->dev); iounmap(imc->window); } } diff --git a/drivers/edac/imh_base.c b/drivers/edac/imh_base.c new file mode 100644 index 000000000000..4348b3883b45 --- /dev/null +++ b/drivers/edac/imh_base.c @@ -0,0 +1,602 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for Intel(R) servers with Integrated Memory/IO Hub-based memory controller. + * Copyright (c) 2025, Intel Corporation. + */ + +#include <linux/kernel.h> +#include <linux/io.h> +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> +#include <asm/mce.h> +#include <asm/cpu.h> +#include "edac_module.h" +#include "skx_common.h" + +#define IMH_REVISION "v0.0.1" +#define EDAC_MOD_STR "imh_edac" + +/* Debug macros */ +#define imh_printk(level, fmt, arg...) \ + edac_printk(level, "imh", fmt, ##arg) + +/* Configuration Agent(Ubox) */ +#define MMIO_BASE_H(reg) (((u64)GET_BITFIELD(reg, 0, 29)) << 23) +#define SOCKET_ID(reg) GET_BITFIELD(reg, 0, 3) + +/* PUNIT */ +#define DDR_IMC_BITMAP(reg) GET_BITFIELD(reg, 23, 30) + +/* Memory Controller */ +#define ECC_ENABLED(reg) GET_BITFIELD(reg, 2, 2) +#define DIMM_POPULATED(reg) GET_BITFIELD(reg, 15, 15) + +/* System Cache Agent(SCA) */ +#define TOLM(reg) (((u64)GET_BITFIELD(reg, 16, 31)) << 16) +#define TOHM(reg) (((u64)GET_BITFIELD(reg, 16, 51)) << 16) + +/* Home Agent (HA) */ +#define NMCACHING(reg) GET_BITFIELD(reg, 8, 8) < |
