diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-10 18:54:22 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-10 18:54:22 -0700 |
| commit | c5aec4c76af1a2d89ee2f2d4d5463b2ad2d85de5 (patch) | |
| tree | 628ae2d9370a6739fd98d8d2f055b46c87ab9316 /arch/powerpc/kernel | |
| parent | 2937f5efa5754754daf46de745f67350f7f06ec2 (diff) | |
| parent | 0c0a3e5a100bbc4aaedd140e82b429227a76701b (diff) | |
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
Pull powerpc updates from Ben Herrenschmidt:
"Here is the bulk of the powerpc changes for this merge window. It got
a bit delayed in part because I wasn't paying attention, and in part
because I discovered I had a core PCI change without a PCI maintainer
ack in it. Bjorn eventually agreed it was ok to merge it though we'll
probably improve it later and I didn't want to rebase to add his ack.
There is going to be a bit more next week, essentially fixes that I
still want to sort through and test.
The biggest item this time is the support to build the ppc64 LE kernel
with our new v2 ABI. We previously supported v2 userspace but the
kernel itself was a tougher nut to crack. This is now sorted mostly
thanks to Anton and Rusty.
We also have a fairly big series from Cedric that add support for
64-bit LE zImage boot wrapper. This was made harder by the fact that
traditionally our zImage wrapper was always 32-bit, but our new LE
toolchains don't really support 32-bit anymore (it's somewhat there
but not really "supported") so we didn't want to rely on it. This
meant more churn that just endian fixes.
This brings some more LE bits as well, such as the ability to run in
LE mode without a hypervisor (ie. under OPAL firmware) by doing the
right OPAL call to reinitialize the CPU to take HV interrupts in the
right mode and the usual pile of endian fixes.
There's another series from Gavin adding EEH improvements (one day we
*will* have a release with less than 20 EEH patches, I promise!).
Another highlight is the support for the "Split core" functionality on
P8 by Michael. This allows a P8 core to be split into "sub cores" of
4 threads which allows the subcores to run different guests under KVM
(the HW still doesn't support a partition per thread).
And then the usual misc bits and fixes ..."
[ Further delayed by gmail deciding that BenH is a dirty spammer.
Google knows. ]
* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (155 commits)
powerpc/powernv: Add missing include to LPC code
selftests/powerpc: Test the THP bug we fixed in the previous commit
powerpc/mm: Check paca psize is up to date for huge mappings
powerpc/powernv: Pass buffer size to OPAL validate flash call
powerpc/pseries: hcall functions are exported to modules, need _GLOBAL_TOC()
powerpc: Exported functions __clear_user and copy_page use r2 so need _GLOBAL_TOC()
powerpc/powernv: Set memory_block_size_bytes to 256MB
powerpc: Allow ppc_md platform hook to override memory_block_size_bytes
powerpc/powernv: Fix endian issues in memory error handling code
powerpc/eeh: Skip eeh sysfs when eeh is disabled
powerpc: 64bit sendfile is capped at 2GB
powerpc/powernv: Provide debugfs access to the LPC bus via OPAL
powerpc/serial: Use saner flags when creating legacy ports
powerpc: Add cpu family documentation
powerpc/xmon: Fix up xmon format strings
powerpc/powernv: Add calls to support little endian host
powerpc: Document sysfs DSCR interface
powerpc: Fix regression of per-CPU DSCR setting
powerpc: Split __SYSFS_SPRSETUP macro
arch: powerpc/fadump: Cleaning up inconsistent NULL checks
...
Diffstat (limited to 'arch/powerpc/kernel')
33 files changed, 1195 insertions, 787 deletions
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 93e1465c8496..f5995a912213 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -248,6 +248,7 @@ int main(void) #endif DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); + DEFINE(PACA_DSCR, offsetof(struct paca_struct, dscr_default)); DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime)); DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user)); DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index cc2d8962e090..4f1393d20079 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -94,12 +94,12 @@ _GLOBAL(setup_altivec_idle) _GLOBAL(__setup_cpu_e6500) mflr r6 #ifdef CONFIG_PPC64 - bl .setup_altivec_ivors + bl setup_altivec_ivors /* Touch IVOR42 only if the CPU supports E.HV category */ mfspr r10,SPRN_MMUCFG rlwinm. r10,r10,0,MMUCFG_LPIDSIZE beq 1f - bl .setup_lrat_ivor + bl setup_lrat_ivor 1: #endif bl setup_pw20_idle @@ -164,15 +164,15 @@ _GLOBAL(__setup_cpu_e5500) #ifdef CONFIG_PPC_BOOK3E_64 _GLOBAL(__restore_cpu_e6500) mflr r5 - bl .setup_altivec_ivors + bl setup_altivec_ivors /* Touch IVOR42 only if the CPU supports E.HV category */ mfspr r10,SPRN_MMUCFG rlwinm. r10,r10,0,MMUCFG_LPIDSIZE beq 1f - bl .setup_lrat_ivor + bl setup_lrat_ivor 1: - bl .setup_pw20_idle - bl .setup_altivec_idle + bl setup_pw20_idle + bl setup_altivec_idle bl __restore_cpu_e5500 mtlr r5 blr @@ -181,9 +181,9 @@ _GLOBAL(__restore_cpu_e5500) mflr r4 bl __e500_icache_setup bl __e500_dcache_setup - bl .__setup_base_ivors - bl .setup_perfmon_ivor - bl .setup_doorbell_ivors + bl __setup_base_ivors + bl setup_perfmon_ivor + bl setup_doorbell_ivors /* * We only want to touch IVOR38-41 if we're running on hardware * that supports category E.HV. The architectural way to determine @@ -192,7 +192,7 @@ _GLOBAL(__restore_cpu_e5500) mfspr r10,SPRN_MMUCFG rlwinm. r10,r10,0,MMUCFG_LPIDSIZE beq 1f - bl .setup_ehv_ivors + bl setup_ehv_ivors 1: mtlr r4 blr @@ -201,9 +201,9 @@ _GLOBAL(__setup_cpu_e5500) mflr r5 bl __e500_icache_setup bl __e500_dcache_setup - bl .__setup_base_ivors - bl .setup_perfmon_ivor - bl .setup_doorbell_ivors + bl __setup_base_ivors + bl setup_perfmon_ivor + bl setup_doorbell_ivors /* * We only want to touch IVOR38-41 if we're running on hardware * that supports category E.HV. The architectural way to determine @@ -212,7 +212,7 @@ _GLOBAL(__setup_cpu_e5500) mfspr r10,SPRN_MMUCFG rlwinm. r10,r10,0,MMUCFG_LPIDSIZE beq 1f - bl .setup_ehv_ivors + bl setup_ehv_ivors b 2f 1: ld r10,CPU_SPEC_FEATURES(r4) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index e7b76a6bf150..7051ea3101b9 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -22,6 +22,7 @@ */ #include <linux/delay.h> +#include <linux/debugfs.h> #include <linux/sched.h> #include <linux/init.h> #include <linux/list.h> @@ -35,6 +36,7 @@ #include <linux/of.h> #include <linux/atomic.h> +#include <asm/debug.h> #include <asm/eeh.h> #include <asm/eeh_event.h> #include <asm/io.h> @@ -87,22 +89,21 @@ /* Time to wait for a PCI slot to report status, in milliseconds */ #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) -/* Platform dependent EEH operations */ -struct eeh_ops *eeh_ops = NULL; - -bool eeh_subsystem_enabled = false; -EXPORT_SYMBOL(eeh_subsystem_enabled); - /* - * EEH probe mode support. The intention is to support multiple - * platforms for EEH. Some platforms like pSeries do PCI emunation - * based on device tree. However, other platforms like powernv probe - * PCI devices from hardware. The flag is used to distinguish that. - * In addition, struct eeh_ops::probe would be invoked for particular - * OF node or PCI device so that the corresponding PE would be created - * there. + * EEH probe mode support, which is part of the flags, + * is to support multiple platforms for EEH. Some platforms + * like pSeries do PCI emunation based on device tree. + * However, other platforms like powernv probe PCI devices + * from hardware. The flag is used to distinguish that. + * In addition, struct eeh_ops::probe would be invoked for + * particular OF node or PCI device so that the corresponding + * PE would be created there. */ -int eeh_probe_mode; +int eeh_subsystem_flags; +EXPORT_SYMBOL(eeh_subsystem_flags); + +/* Platform dependent EEH operations */ +struct eeh_ops *eeh_ops = NULL; /* Lock to avoid races due to multiple reports of an error */ DEFINE_RAW_SPINLOCK(confirm_error_lock); @@ -133,6 +134,15 @@ static struct eeh_stats eeh_stats; #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) +static int __init eeh_setup(char *str) +{ + if (!strcmp(str, "off")) + eeh_subsystem_flags |= EEH_FORCE_DISABLED; + + return 1; +} +__setup("eeh=", eeh_setup); + /** * eeh_gather_pci_data - Copy assorted PCI config space registers to buff * @edev: device to report data for @@ -145,73 +155,67 @@ static struct eeh_stats eeh_stats; static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) { struct device_node *dn = eeh_dev_to_of_node(edev); - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); u32 cfg; int cap, i; int n = 0; n += scnprintf(buf+n, len-n, "%s\n", dn->full_name); - printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name); + pr_warn("EEH: of node=%s\n", dn->full_name); eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg); n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); - printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg); + pr_warn("EEH: PCI device/vendor: %08x\n", cfg); eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg); n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); - printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg); - - if (!dev) { - printk(KERN_WARNING "EEH: no PCI device for this of node\n"); - return n; - } + pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); /* Gather bridge-specific registers */ - if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { + if (edev->mode & EEH_DEV_BRIDGE) { eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg); n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); - printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg); + pr_warn("EEH: Bridge secondary status: %04x\n", cfg); eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg); n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); - printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg); + pr_warn("EEH: Bridge control: %04x\n", cfg); } /* Dump out the PCI-X command and status regs */ - cap = pci_find_capability(dev, PCI_CAP_ID_PCIX); + cap = edev->pcix_cap; if (cap) { eeh_ops->read_config(dn, cap, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); - printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg); + pr_warn("EEH: PCI-X cmd: %08x\n", cfg); eeh_ops->read_config(dn, cap+4, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); - printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg); + pr_warn("EEH: PCI-X status: %08x\n", cfg); } - /* If PCI-E capable, dump PCI-E cap 10, and the AER */ - if (pci_is_pcie(dev)) { + /* If PCI-E capable, dump PCI-E cap 10 */ + cap = edev->pcie_cap; + if (cap) { n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); - printk(KERN_WARNING - "EEH: PCI-E capabilities and status follow:\n"); + pr_warn("EEH: PCI-E capabilities and status follow:\n"); for (i=0; i<=8; i++) { - eeh_ops->read_config(dn, dev->pcie_cap+4*i, 4, &cfg); + eeh_ops->read_config(dn, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); - printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg); + pr_warn("EEH: PCI-E %02x: %08x\n", i, cfg); } + } - cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); - if (cap) { - n += scnprintf(buf+n, len-n, "pci-e AER:\n"); - printk(KERN_WARNING - "EEH: PCI-E AER capability register set follows:\n"); - - for (i=0; i<14; i++) { - eeh_ops->read_config(dn, cap+4*i, 4, &cfg); - n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); - printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg); - } + /* If AER capable, dump it */ + cap = edev->aer_cap; + if (cap) { + n += scnprintf(buf+n, len-n, "pci-e AER:\n"); + pr_warn("EEH: PCI-E AER capability register set follows:\n"); + + for (i=0; i<14; i++) { + eeh_ops->read_config(dn, cap+4*i, 4, &cfg); + n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); + pr_warn("EEH: PCI-E AER %02x: %08x\n", i, cfg); } } @@ -232,21 +236,19 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) { size_t loglen = 0; struct eeh_dev *edev, *tmp; - bool valid_cfg_log = true; /* * When the PHB is fenced or dead, it's pointless to collect * the data from PCI config space because it should return * 0xFF's. For ER, we still retrieve the data from the PCI * config space. + * + * For pHyp, we have to enable IO for log retrieval. Otherwise, + * 0xFF's is always returned from PCI config space. */ - if (eeh_probe_mode_dev() && - (pe->type & EEH_PE_PHB) && - (pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD))) - valid_cfg_log = false; - - if (valid_cfg_log) { - eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + if (!(pe->type & EEH_PE_PHB)) { + if (eeh_probe_mode_devtree()) + eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); @@ -309,7 +311,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) /* If the PHB has been in problematic state */ eeh_serialize_lock(&flags); - if (phb_pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)) { + if (phb_pe->state & EEH_PE_ISOLATED) { ret = 0; goto out; } @@ -515,16 +517,42 @@ EXPORT_SYMBOL(eeh_check_failure); */ int eeh_pci_enable(struct eeh_pe *pe, int function) { - int rc; + int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + + /* + * pHyp doesn't allow to enable IO or DMA on unfrozen PE. + * Also, it's pointless to enable them on unfrozen PE. So + * we have the check here. + */ + if (function == EEH_OPT_THAW_MMIO || + function == EEH_OPT_THAW_DMA) { + rc = eeh_ops->get_state(pe, NULL); + if (rc < 0) + return rc; + + /* Needn't to enable or already enabled */ + if ((rc == EEH_STATE_NOT_SUPPORT) || + ((rc & flags) == flags)) + return 0; + } rc = eeh_ops->set_option(pe, function); if (rc) - pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n", - __func__, function, pe->phb->global_number, pe->addr, rc); + pr_warn("%s: Unexpected state change %d on " + "PHB#%d-PE#%x, err=%d\n", + __func__, function, pe->phb->global_number, + pe->addr, rc); rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) && - (function == EEH_OPT_THAW_MMIO)) + if (rc <= 0) + return rc; + + if ((function == EEH_OPT_THAW_MMIO) && + (rc & EEH_STATE_MMIO_ENABLED)) + return 0; + + if ((function == EEH_OPT_THAW_DMA) && + (rc & EEH_STATE_DMA_ENABLED)) return 0; return rc; @@ -612,26 +640,7 @@ static void eeh_reset_pe_once(struct eeh_pe *pe) else eeh_ops->reset(pe, EEH_RESET_HOT); - /* The PCI bus requires that the reset be held high for at least - * a 100 milliseconds. We wait a bit longer 'just in case'. - */ -#define PCI_BUS_RST_HOLD_TIME_MSEC 250 - msleep(PCI_BUS_RST_HOLD_TIME_MSEC); - - /* We might get hit with another EEH freeze as soon as the - * pci slot reset line is dropped. Make sure we don't miss - * these, and clear the flag now. - */ - eeh_pe_state_clear(pe, EEH_PE_ISOLATED); - eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); - - /* After a PCI slot has been reset, the PCI Express spec requires - * a 1.5 second idle time for the bus to stabilize, before starting - * up traffic. - */ -#define PCI_BUS_SETTLE_TIME_MSEC 1800 - msleep(PCI_BUS_SETTLE_TIME_MSEC); } /** @@ -651,6 +660,10 @@ int eeh_reset_pe(struct eeh_pe *pe) for (i=0; i<3; i++) { eeh_reset_pe_once(pe); + /* + * EEH_PE_ISOLATED is expected to be removed after + * BAR restore. + */ rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); if ((rc & flags) == flags) return 0; @@ -826,8 +839,8 @@ int eeh_init(void) &hose_list, list_node) pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL); } else { - pr_warning("%s: Invalid probe mode %d\n", - __func__, eeh_probe_mode); + pr_warn("%s: Invalid probe mode %x", + __func__, eeh_subsystem_flags); return -EINVAL; } @@ -1102,10 +1115,45 @@ static const struct file_operations proc_eeh_operations = { .release = single_release, }; +#ifdef CONFIG_DEBUG_FS +static int eeh_enable_dbgfs_set(void *data, u64 val) +{ + if (val) + eeh_subsystem_flags &= ~EEH_FORCE_DISABLED; + else + eeh_subsystem_flags |= EEH_FORCE_DISABLED; + + /* Notify the backend */ + if (eeh_ops->post_init) + eeh_ops->post_init(); + + return 0; +} + +static int eeh_enable_dbgfs_get(void *data, u64 *val) +{ + if (eeh_enabled()) + *val = 0x1ul; + else + *val = 0x0ul; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, + eeh_enable_dbgfs_set, "0x%llx\n"); +#endif + static int __init eeh_init_proc(void) { - if (machine_is(pseries) || machine_is(powernv)) + if (machine_is(pseries) || machine_is(powernv)) { proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); +#ifdef CONFIG_DEBUG_FS + debugfs_create_file("eeh_enable", 0600, + powerpc_debugfs_root, NULL, + &eeh_enable_dbgfs_ops); +#endif + } + return 0; } __initcall(eeh_init_proc); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index bb61ca58ca6d..7100a5b96e70 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -171,6 +171,15 @@ static void eeh_enable_irq(struct pci_dev *dev) } } +static bool eeh_dev_removed(struct eeh_dev *edev) +{ + /* EEH device removed ? */ + if (!edev || (edev->mode & EEH_DEV_REMOVED)) + return true; + + return false; +} + /** * eeh_report_error - Report pci error to each device driver * @data: eeh device @@ -187,10 +196,8 @@ static void *eeh_report_error(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - /* We might not have the associated PCI device, - * then we should continue for next one. - */ - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_frozen; driver = eeh_pcid_get(dev); @@ -230,6 +237,9 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; + if (!dev || eeh_dev_removed(edev)) + return NULL; + driver = eeh_pcid_get(dev); if (!driver) return NULL; @@ -267,7 +277,8 @@ static void *eeh_report_reset(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); @@ -307,7 +318,8 @@ static void *eeh_report_resume(void *data, void *userdata) struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_driver *driver; - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); @@ -343,7 +355,8 @@ static void *eeh_report_failure(void *data, void *userdata) struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_driver *driver; - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_perm_failure; driver = eeh_pcid_get(dev); @@ -380,6 +393,16 @@ static void *eeh_rmv_device(void *data, void *userdata) if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) return NULL; + /* + * We rely on count-based pcibios_release_device() to + * detach permanently offlined PEs. Unfortunately, that's + * not reliable enough. We might have the permanently + * offlined PEs attached, but we needn't take care of + * them and their child devices. + */ + if (eeh_dev_removed(edev)) + return NULL; + driver = eeh_pcid_get(dev); if (driver) { eeh_pcid_put(dev); @@ -417,6 +440,36 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) return NULL; } +/* + * Explicitly clear PE's frozen state for PowerNV where + * we have frozen PE until BAR restore is completed. It's + * harmless to clear it for pSeries. To be consistent with + * PE reset (for 3 times), we try to clear the frozen state + * for 3 times as well. + */ +static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) +{ + int i, rc; + + for (i = 0; i < 3; i++) { + rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + if (rc) + continue; + rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); + if (!rc) + break; + } + + /* The PE has been isolated, clear it */ + if (rc) + pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n", + __func__, pe->phb->global_number, pe->addr, rc); + else + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + + return rc; +} + /** * eeh_reset_device - Perform actual reset of a pci slot * @pe: EEH PE @@ -451,19 +504,33 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); } - /* Reset the pci controller. (Asserts RST#; resets config space). + /* + * Reset the pci controller. (Asserts RST#; resets config space). * Reconfigure bridges and devices. Don't try to bring the system * up if the reset failed for some reason. + * + * During the reset, it's very dangerous to have uncontrolled PCI + * config accesses. So we prefer to block them. However, controlled + * PCI config accesses initiated from EEH itself are allowed. */ + eeh_pe_state_mark(pe, EEH_PE_RESET); rc = eeh_reset_pe(pe); - if (rc) + if (rc) { + eeh_pe_state_clear(pe, EEH_PE_RESET); return rc; + } pci_lock_rescan_remove(); /* Restore PE */ eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); + eeh_pe_state_clear(pe, EEH_PE_RESET); + + /* Clear frozen state */ + rc = eeh_clear_pe_frozen_state(pe); + if (rc) + return rc; /* Give the system 5 seconds to finish running the user-space * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, @@ -573,7 +640,6 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) result = PCI_ERS_RESULT_NEED_RESET; } else { pr_info("EEH: Notify device drivers to resume I/O\n"); - result = PCI_ERS_RESULT_NONE; eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); } } @@ -585,10 +651,17 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) if (rc < 0) goto hard_fail; - if (rc) + if (rc) { result = PCI_ERS_RESULT_NEED_RESET; - else + } else { + /* + * We didn't do PE reset for the case. The PE + * is still in frozen state. Clear it before + * resuming the PE. + */ + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); result = PCI_ERS_RESULT_RECOVERED; + } } /* If any device has a hard failure, then shut off everything. */ @@ -650,8 +723,17 @@ perm_error: /* Notify all devices that they're about to go down. */ eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); - /* Shut down the device drivers for good. */ + /* Mark the PE to be removed permanently */ + pe->freeze_count = EEH_MAX_ALLOWED_FREEZES + 1; + + /* + * Shut down the device drivers for good. We mark + * all removed devices correctly to avoid access + * the their PCI config any more. + */ if (frozen_bus) { + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + pci_lock_rescan_remove(); pcibios_remove_pci_devices(frozen_bus); pci_unlock_rescan_remove(); @@ -682,8 +764,7 @@ static void eeh_handle_special_event(void) phb_pe = eeh_phb_pe_get(hose); if (!phb_pe) continue; - eeh_pe_state_mark(phb_pe, - EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); } eeh_serialize_unlock(flags); @@ -699,8 +780,7 @@ static void eeh_handle_special_event(void) eeh_remove_event(pe); if (rc == EEH_NEXT_ERR_DEAD_PHB) - eeh_pe_state_mark(pe, - EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); else eeh_pe_state_mark(pe, EEH_PE_ISOLATED | EEH_PE_RECOVERING); @@ -724,12 +804,14 @@ static void eeh_handle_special_event(void) if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { eeh_handle_normal_event(pe); + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); } else { pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { phb_pe = eeh_phb_pe_get(hose); if (!phb_pe || - !(phb_pe->state & EEH_PE_PHB_DEAD)) + !(phb_pe->state & EEH_PE_ISOLATED) || + (phb_pe->state & EEH_PE_RECOVERING)) continue; /* Notify all devices to be down */ diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index f0c353fa655a..995c2a284630 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -503,13 +503,17 @@ static void *__eeh_pe_state_mark(void *data, void *flag) struct eeh_dev *edev, *tmp; struct pci_dev *pdev; - /* - * Mark the PE with the indicated state. Also, - * the associated PCI device will be put into - * I/O frozen state to avoid I/O accesses from - * the PCI device driver. - */ + /* Keep the state of permanently removed PE intact */ + if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && + (state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) + return NULL; + pe->state |= state; + + /* Offline PCI devices if applicable */ + if (state != EEH_PE_ISOLATED) + return NULL; + eeh_pe_for_each_dev(pe, edev, tmp) { pdev = eeh_dev_to_pci_dev(edev); if (pdev) @@ -532,6 +536,27 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state) eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); } +static void *_ |
