diff options
| -rw-r--r-- | Documentation/ABI/testing/sysfs-devices-system-cpu | 11 | ||||
| -rw-r--r-- | Documentation/cpu-freq/boost.txt | 93 | ||||
| -rw-r--r-- | Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt | 55 | ||||
| -rw-r--r-- | Documentation/devicetree/bindings/power/opp.txt | 25 | ||||
| -rw-r--r-- | arch/arm/kernel/smp.c | 54 | ||||
| -rw-r--r-- | arch/x86/include/asm/msr-index.h | 3 | ||||
| -rw-r--r-- | drivers/acpi/processor_perflib.c | 30 | ||||
| -rw-r--r-- | drivers/base/power/opp.c | 47 | ||||
| -rw-r--r-- | drivers/cpufreq/Kconfig | 11 | ||||
| -rw-r--r-- | drivers/cpufreq/Kconfig.x86 | 18 | ||||
| -rw-r--r-- | drivers/cpufreq/Makefile | 4 | ||||
| -rw-r--r-- | drivers/cpufreq/acpi-cpufreq.c | 272 | ||||
| -rw-r--r-- | drivers/cpufreq/cpufreq-cpu0.c | 269 | ||||
| -rw-r--r-- | drivers/cpufreq/cpufreq_conservative.c | 2 | ||||
| -rw-r--r-- | drivers/cpufreq/cpufreq_ondemand.c | 1 | ||||
| -rw-r--r-- | drivers/cpufreq/longhaul.h | 26 | ||||
| -rw-r--r-- | drivers/cpufreq/omap-cpufreq.c | 35 | ||||
| -rw-r--r-- | drivers/cpufreq/powernow-k8.c | 406 | ||||
| -rw-r--r-- | drivers/cpufreq/powernow-k8.h | 32 | ||||
| -rw-r--r-- | include/linux/opp.h | 8 |
20 files changed, 947 insertions, 455 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 5dab36448b44..6943133afcb8 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -176,3 +176,14 @@ Description: Disable L3 cache indices All AMD processors with L3 caches provide this functionality. For details, see BKDGs at http://developer.amd.com/documentation/guides/Pages/default.aspx + + +What: /sys/devices/system/cpu/cpufreq/boost +Date: August 2012 +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> +Description: Processor frequency boosting control + + This switch controls the boost setting for the whole system. + Boosting allows the CPU and the firmware to run at a frequency + beyound it's nominal limit. + More details can be found in Documentation/cpu-freq/boost.txt diff --git a/Documentation/cpu-freq/boost.txt b/Documentation/cpu-freq/boost.txt new file mode 100644 index 000000000000..9b4edfcf486f --- /dev/null +++ b/Documentation/cpu-freq/boost.txt @@ -0,0 +1,93 @@ +Processor boosting control + + - information for users - + +Quick guide for the impatient: +-------------------- +/sys/devices/system/cpu/cpufreq/boost +controls the boost setting for the whole system. You can read and write +that file with either "0" (boosting disabled) or "1" (boosting allowed). +Reading or writing 1 does not mean that the system is boosting at this +very moment, but only that the CPU _may_ raise the frequency at it's +discretion. +-------------------- + +Introduction +------------- +Some CPUs support a functionality to raise the operating frequency of +some cores in a multi-core package if certain conditions apply, mostly +if the whole chip is not fully utilized and below it's intended thermal +budget. This is done without operating system control by a combination +of hardware and firmware. +On Intel CPUs this is called "Turbo Boost", AMD calls it "Turbo-Core", +in technical documentation "Core performance boost". In Linux we use +the term "boost" for convenience. + +Rationale for disable switch +---------------------------- + +Though the idea is to just give better performance without any user +intervention, sometimes the need arises to disable this functionality. +Most systems offer a switch in the (BIOS) firmware to disable the +functionality at all, but a more fine-grained and dynamic control would +be desirable: +1. While running benchmarks, reproducible results are important. Since + the boosting functionality depends on the load of the whole package, + single thread performance can vary. By explicitly disabling the boost + functionality at least for the benchmark's run-time the system will run + at a fixed frequency and results are reproducible again. +2. To examine the impact of the boosting functionality it is helpful + to do tests with and without boosting. +3. Boosting means overclocking the processor, though under controlled + conditions. By raising the frequency and the voltage the processor + will consume more power than without the boosting, which may be + undesirable for instance for mobile users. Disabling boosting may + save power here, though this depends on the workload. + + +User controlled switch +---------------------- + +To allow the user to toggle the boosting functionality, the acpi-cpufreq +driver exports a sysfs knob to disable it. There is a file: +/sys/devices/system/cpu/cpufreq/boost +which can either read "0" (boosting disabled) or "1" (boosting enabled). +Reading the file is always supported, even if the processor does not +support boosting. In this case the file will be read-only and always +reads as "0". Explicitly changing the permissions and writing to that +file anyway will return EINVAL. + +On supported CPUs one can write either a "0" or a "1" into this file. +This will either disable the boost functionality on all cores in the +whole system (0) or will allow the hardware to boost at will (1). + +Writing a "1" does not explicitly boost the system, but just allows the +CPU (and the firmware) to boost at their discretion. Some implementations +take external factors like the chip's temperature into account, so +boosting once does not necessarily mean that it will occur every time +even using the exact same software setup. + + +AMD legacy cpb switch +--------------------- +The AMD powernow-k8 driver used to support a very similar switch to +disable or enable the "Core Performance Boost" feature of some AMD CPUs. +This switch was instantiated in each CPU's cpufreq directory +(/sys/devices/system/cpu[0-9]*/cpufreq) and was called "cpb". +Though the per CPU existence hints at a more fine grained control, the +actual implementation only supported a system-global switch semantics, +which was simply reflected into each CPU's file. Writing a 0 or 1 into it +would pull the other CPUs to the same state. +For compatibility reasons this file and its behavior is still supported +on AMD CPUs, though it is now protected by a config switch +(X86_ACPI_CPUFREQ_CPB). On Intel CPUs this file will never be created, +even with the config option set. +This functionality is considered legacy and will be removed in some future +kernel version. + +More fine grained boosting control +---------------------------------- + +Technically it is possible to switch the boosting functionality at least +on a per package basis, for some CPUs even per core. Currently the driver +does not support it, but this may be implemented in the future. diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt new file mode 100644 index 000000000000..4416ccc33472 --- /dev/null +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt @@ -0,0 +1,55 @@ +Generic CPU0 cpufreq driver + +It is a generic cpufreq driver for CPU0 frequency management. It +supports both uniprocessor (UP) and symmetric multiprocessor (SMP) +systems which share clock and voltage across all CPUs. + +Both required and optional properties listed below must be defined +under node /cpus/cpu@0. + +Required properties: +- operating-points: Refer to Documentation/devicetree/bindings/power/opp.txt + for details + +Optional properties: +- clock-latency: Specify the possible maximum transition latency for clock, + in unit of nanoseconds. +- voltage-tolerance: Specify the CPU voltage tolerance in percentage. + +Examples: + +cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + compatible = "arm,cortex-a9"; + reg = <0>; + next-level-cache = <&L2>; + operating-points = < + /* kHz uV */ + 792000 1100000 + 396000 950000 + 198000 850000 + >; + transition-latency = <61036>; /* two CLK32 periods */ + }; + + cpu@1 { + compatible = "arm,cortex-a9"; + reg = <1>; + next-level-cache = <&L2>; + }; + + cpu@2 { + compatible = "arm,cortex-a9"; + reg = <2>; + next-level-cache = <&L2>; + }; + + cpu@3 { + compatible = "arm,cortex-a9"; + reg = <3>; + next-level-cache = <&L2>; + }; +}; diff --git a/Documentation/devicetree/bindings/power/opp.txt b/Documentation/devicetree/bindings/power/opp.txt new file mode 100644 index 000000000000..74499e5033fc --- /dev/null +++ b/Documentation/devicetree/bindings/power/opp.txt @@ -0,0 +1,25 @@ +* Generic OPP Interface + +SoCs have a standard set of tuples consisting of frequency and +voltage pairs that the device will support per voltage domain. These +are called Operating Performance Points or OPPs. + +Properties: +- operating-points: An array of 2-tuples items, and each item consists + of frequency and voltage like <freq-kHz vol-uV>. + freq: clock frequency in kHz + vol: voltage in microvolt + +Examples: + +cpu@0 { + compatible = "arm,cortex-a9"; + reg = <0>; + next-level-cache = <&L2>; + operating-points = < + /* kHz uV */ + 792000 1100000 + 396000 950000 + 198000 850000 + >; +}; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index ebd8ad274d76..8e03567c9583 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -25,6 +25,7 @@ #include <linux/percpu.h> #include <linux/clockchips.h> #include <linux/completion.h> +#include <linux/cpufreq.h> #include <linux/atomic.h> #include <asm/cacheflush.h> @@ -584,3 +585,56 @@ int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } + +#ifdef CONFIG_CPU_FREQ + +static DEFINE_PER_CPU(unsigned long, l_p_j_ref); +static DEFINE_PER_CPU(unsigned long, l_p_j_ref_freq); +static unsigned long global_l_p_j_ref; +static unsigned long global_l_p_j_ref_freq; + +static int cpufreq_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + int cpu = freq->cpu; + + if (freq->flags & CPUFREQ_CONST_LOOPS) + return NOTIFY_OK; + + if (!per_cpu(l_p_j_ref, cpu)) { + per_cpu(l_p_j_ref, cpu) = + per_cpu(cpu_data, cpu).loops_per_jiffy; + per_cpu(l_p_j_ref_freq, cpu) = freq->old; + if (!global_l_p_j_ref) { + global_l_p_j_ref = loops_per_jiffy; + global_l_p_j_ref_freq = freq->old; + } + } + + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || + (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) { + loops_per_jiffy = cpufreq_scale(global_l_p_j_ref, + global_l_p_j_ref_freq, + freq->new); + per_cpu(cpu_data, cpu).loops_per_jiffy = + cpufreq_scale(per_cpu(l_p_j_ref, cpu), + per_cpu(l_p_j_ref_freq, cpu), + freq->new); + } + return NOTIFY_OK; +} + +static struct notifier_block cpufreq_notifier = { + .notifier_call = cpufreq_callback, +}; + +static int __init register_cpufreq_notifier(void) +{ + return cpufreq_register_notifier(&cpufreq_notifier, + CPUFREQ_TRANSITION_NOTIFIER); +} +core_initcall(register_cpufreq_notifier); + +#endif diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 957ec87385af..fbee9714d9ab 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -248,6 +248,9 @@ #define MSR_IA32_PERF_STATUS 0x00000198 #define MSR_IA32_PERF_CTL 0x00000199 +#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD_PERF_STATUS 0xc0010063 +#define MSR_AMD_PERF_CTL 0xc0010062 #define MSR_IA32_MPERF 0x000000e7 #define MSR_IA32_APERF 0x000000e8 diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index a093dc163a42..836bfe069042 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -324,6 +324,34 @@ static int acpi_processor_get_performance_control(struct acpi_processor *pr) return result; } +#ifdef CONFIG_X86 +/* + * Some AMDs have 50MHz frequency multiples, but only provide 100MHz rounding + * in their ACPI data. Calculate the real values and fix up the _PSS data. + */ +static void amd_fixup_frequency(struct acpi_processor_px *px, int i) +{ + u32 hi, lo, fid, did; + int index = px->control & 0x00000007; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return; + + if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10) + || boot_cpu_data.x86 == 0x11) { + rdmsr(MSR_AMD_PSTATE_DEF_BASE + index, lo, hi); + fid = lo & 0x3f; + did = (lo >> 6) & 7; + if (boot_cpu_data.x86 == 0x10) + px->core_frequency = (100 * (fid + 0x10)) >> did; + else + px->core_frequency = (100 * (fid + 8)) >> did; + } +} +#else +static void amd_fixup_frequency(struct acpi_processor_px *px, int i) {}; +#endif + static int acpi_processor_get_performance_states(struct acpi_processor *pr) { int result = 0; @@ -379,6 +407,8 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr) goto end; } + amd_fixup_frequency(px, i); + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "State [%d]: core_frequency[%d] power[%d] transition_latency[%d] bus_master_latency[%d] control[0x%x] status[0x%x]\n", i, diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index ac993eafec82..d9468642fc41 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -22,6 +22,7 @@ #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/opp.h> +#include <linux/of.h> /* * Internal data structure organization with the OPP layer library is as @@ -674,3 +675,49 @@ struct srcu_notifier_head *opp_get_notifier(struct device *dev) return &dev_opp->head; } + +#ifdef CONFIG_OF +/** + * of_init_opp_table() - Initialize opp table from device tree + * @dev: device pointer used to lookup device OPPs. + * + * Register the initial OPP table with the OPP library for given device. + */ +int of_init_opp_table(struct device *dev) +{ + const struct property *prop; + const __be32 *val; + int nr; + + prop = of_find_property(dev->of_node, "operating-points", NULL); + if (!prop) + return -ENODEV; + if (!prop->value) + return -ENODATA; + + /* + * Each OPP is a set of tuples consisting of frequency and + * voltage like <freq-kHz vol-uV>. + */ + nr = prop->length / sizeof(u32); + if (nr % 2) { + dev_err(dev, "%s: Invalid OPP list\n", __func__); + return -EINVAL; + } + + val = prop->value; + while (nr) { + unsigned long freq = be32_to_cpup(val++) * 1000; + unsigned long volt = be32_to_cpup(val++); + + if (opp_add(dev, freq, volt)) { + dev_warn(dev, "%s: Failed to add OPP %ld\n", + __func__, freq); + continue; + } + nr -= 2; + } + + return 0; +} +#endif diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index e24a2a1b6666..ea512f47b789 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -179,6 +179,17 @@ config CPU_FREQ_GOV_CONSERVATIVE If in doubt, say N. +config GENERIC_CPUFREQ_CPU0 + bool "Generic CPU0 cpufreq driver" + depends on HAVE_CLK && REGULATOR && PM_OPP && OF + select CPU_FREQ_TABLE + help + This adds a generic cpufreq driver for CPU0 frequency management. + It supports both uniprocessor (UP) and symmetric multiprocessor (SMP) + systems which share clock and voltage across all CPUs. + + If in doubt, say N. + menu "x86 CPU frequency scaling drivers" depends on X86 source "drivers/cpufreq/Kconfig.x86" diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 78ff7ee48951..934854ae5eb4 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -23,7 +23,8 @@ config X86_ACPI_CPUFREQ help This driver adds a CPUFreq driver which utilizes the ACPI Processor Performance States. - This driver also supports Intel Enhanced Speedstep. + This driver also supports Intel Enhanced Speedstep and newer + AMD CPUs. To compile this driver as a module, choose M here: the module will be called acpi-cpufreq. @@ -32,6 +33,18 @@ config X86_ACPI_CPUFREQ If in doubt, say N. +config X86_ACPI_CPUFREQ_CPB + default y + bool "Legacy cpb sysfs knob support for AMD CPUs" + depends on X86_ACPI_CPUFREQ && CPU_SUP_AMD + help + The powernow-k8 driver used to provide a sysfs knob called "cpb" + to disable the Core Performance Boosting feature of AMD CPUs. This + file has now been superseeded by the more generic "boost" entry. + + By enabling this option the acpi_cpufreq driver provides the old + entry in addition to the new boost ones, for compatibility reasons. + config ELAN_CPUFREQ tristate "AMD Elan SC400 and SC410" select CPU_FREQ_TABLE @@ -95,7 +108,8 @@ config X86_POWERNOW_K8 select CPU_FREQ_TABLE depends on ACPI && ACPI_PROCESSOR help - This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors. + This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors. + Support for K10 and newer processors is now in acpi-cpufreq. To compile this driver as a module, choose M here: the module will be called powernow-k8. diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 9531fc2eda22..1bc90e1306d8 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -13,13 +13,15 @@ obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o # CPUfreq cross-arch helpers obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o +obj-$(CONFIG_GENERIC_CPUFREQ_CPU0) += cpufreq-cpu0.o + ################################################################################## # x86 drivers. # Link order matters. K8 is preferred to ACPI because of firmware bugs in early # K8 systems. ACPI is preferred to all other hardware-specific drivers. # speedstep-* is preferred over p4-clockmod. -obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o +obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 56c6c6b4eb4d..0d048f6a2b23 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -51,13 +51,19 @@ MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); MODULE_DESCRIPTION("ACPI Processor P-States Driver"); MODULE_LICENSE("GPL"); +#define PFX "acpi-cpufreq: " + enum { UNDEFINED_CAPABLE = 0, SYSTEM_INTEL_MSR_CAPABLE, + SYSTEM_AMD_MSR_CAPABLE, SYSTEM_IO_CAPABLE, }; #define INTEL_MSR_RANGE (0xffff) +#define AMD_MSR_RANGE (0x7) + +#define MSR_K7_HWCR_CPB_DIS (1ULL << 25) struct acpi_cpufreq_data { struct acpi_processor_performance *acpi_data; @@ -74,6 +80,116 @@ static struct acpi_processor_performance __percpu *acpi_perf_data; static struct cpufreq_driver acpi_cpufreq_driver; static unsigned int acpi_pstate_strict; +static bool boost_enabled, boost_supported; +static struct msr __percpu *msrs; + +static bool boost_state(unsigned int cpu) +{ + u32 lo, hi; + u64 msr; + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi); + msr = lo | ((u64)hi << 32); + return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); + case X86_VENDOR_AMD: + rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); + msr = lo | ((u64)hi << 32); + return !(msr & MSR_K7_HWCR_CPB_DIS); + } + return false; +} + +static void boost_set_msrs(bool enable, const struct cpumask *cpumask) +{ + u32 cpu; + u32 msr_addr; + u64 msr_mask; + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + msr_addr = MSR_IA32_MISC_ENABLE; + msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE; + break; + case X86_VENDOR_AMD: + msr_addr = MSR_K7_HWCR; + msr_mask = MSR_K7_HWCR_CPB_DIS; + break; + default: + return; + } + + rdmsr_on_cpus(cpumask, msr_addr, msrs); + + for_each_cpu(cpu, cpumask) { + struct msr *reg = per_cpu_ptr(msrs, cpu); + if (enable) + reg->q &= ~msr_mask; + else + reg->q |= msr_mask; + } + + wrmsr_on_cpus(cpumask, msr_addr, msrs); +} + +static ssize_t _store_boost(const char *buf, size_t count) +{ + int ret; + unsigned long val = 0; + + if (!boost_supported) + return -EINVAL; + + ret = kstrtoul(buf, 10, &val); + if (ret || (val > 1)) + return -EINVAL; + + if ((val && boost_enabled) || (!val && !boost_enabled)) + return count; + + get_online_cpus(); + + boost_set_msrs(val, cpu_online_mask); + + put_online_cpus(); + + boost_enabled = val; + pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis"); + + return count; +} + +static ssize_t store_global_boost(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + return _store_boost(buf, count); +} + +static ssize_t show_global_boost(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", boost_enabled); +} + +static struct global_attr global_boost = __ATTR(boost, 0644, + show_global_boost, + store_global_boost); + +#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB +static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, + size_t count) +{ + return _store_boost(buf, count); +} + +static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf) +{ + return sprintf(buf, "%u\n", boost_enabled); +} + +static struct freq_attr cpb = __ATTR(cpb, 0644, show_cpb, store_cpb); +#endif static int check_est_cpu(unsigned int cpuid) { @@ -82,6 +198,13 @@ static int check_est_cpu(unsigned int cpuid) return cpu_has(cpu, X86_FEATURE_EST); } +static int check_amd_hwpstate_cpu(unsigned int cpuid) +{ + struct cpuinfo_x86 *cpu = &cpu_data(cpuid); + + return cpu_has(cpu, X86_FEATURE_HW_PSTATE); +} + static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) { struct acpi_processor_performance *perf; @@ -101,7 +224,11 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) int i; struct acpi_processor_performance *perf; - msr &= INTEL_MSR_RANGE; + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + msr &= AMD_MSR_RANGE; + else + msr &= INTEL_MSR_RANGE; + perf = data->acpi_data; for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { @@ -115,6 +242,7 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) { switch (data->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: + case SYSTEM_AMD_MSR_CAPABLE: return extract_msr(val, data); case SYSTEM_IO_CAPABLE: return extract_io(val, data); @@ -150,6 +278,7 @@ static void do_drv_read(void *_cmd) switch (cmd->type) { case SYSTEM_INTEL_MSR_CAPABLE: + case SYSTEM_AMD_MSR_CAPABLE: rdmsr(cmd->addr.msr.reg, cmd->val, h); break; case SYSTEM_IO_CAPABLE: @@ -174,6 +303,9 @@ static void do_drv_write(void *_cmd) lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); wrmsr(cmd->addr.msr.reg, lo, hi); break; + case SYSTEM_AMD_MSR_CAPABLE: + wrmsr(cmd->addr.msr.reg, cmd->val, 0); + break; case SYSTEM_IO_CAPABLE: acpi_os_write_port((acpi_io_address)cmd->addr.io.port, cmd->val, @@ -217,6 +349,10 @@ static u32 get_cur_val(const struct cpumask *mask) cmd.type = SYSTEM_INTEL_MSR_CAPABLE; cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; break; + case SYSTEM_AMD_MSR_CAPABLE: + cmd.type = SYSTEM_AMD_MSR_CAPABLE; + cmd.addr.msr.reg = MSR_AMD_PERF_STATUS; + break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data; @@ -326,6 +462,11 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, cmd.addr.msr.reg = MSR_IA32_PERF_CTL; cmd.val = (u32) perf->states[next_perf_state].control; break; + case SYSTEM_AMD_MSR_CAPABLE: + cmd.type = SYSTEM_AMD_MSR_CAPABLE; + cmd.addr.msr.reg = MSR_AMD_PERF_CTL; + cmd.val = (u32) perf->states[next_perf_state].control; + break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; cmd.addr.io.port = perf->control_register.address; @@ -419,6 +560,44 @@ static void free_acpi_perf_data(void) free_percpu(acpi_perf_data); } +static int boost_notify(struct notifier_block *nb, unsigned long action, + void *hcpu) +{ + unsigned cpu = (long)hcpu; + const struct cpumask *cpumask; + + cpumask = get_cpu_mask(cpu); + + /* + * Clear the boost-disable bit on the CPU_DOWN path so that + * this cpu cannot block the remaining ones from boosting. On + * the CPU_UP path we simply keep the boost-disable flag in + * sync with the current global state. + */ + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + boost_set_msrs(boost_enabled, cpumask); + break; + + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + boost_set_msrs(1, cpumask); + break; + + default: + break; + } + + return NOTIFY_OK; +} + + +static struct notifier_block boost_nb = { + .notifier_call = boost_notify, +}; + /* * acpi_cpufreq_early_init - initialize ACPI P-States library * @@ -559,6 +738,14 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; cpumask_copy(policy->cpus, cpu_core_mask(cpu)); } + + if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) { + cpumask_clear(policy->cpus); + cpumask_set_cpu(cpu, policy->cpus); + cpumask_copy(policy->related_cpus, cpu_sibling_mask(cpu)); + policy->shared_type = CPUFREQ_SHARED_TYPE_HW; + pr_info_once(PFX "overriding BIOS provided _PSD data\n"); + } #endif /* capability check */ @@ -580,12 +767,16 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) break; case ACPI_ADR_SPACE_FIXED_HARDWARE: pr_debug("HARDWARE addr space\n"); - if (!check_est_cpu(cpu)) { - result = -ENODEV; - goto err_unreg; + if (check_est_cpu(cpu)) { + data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; + break; } - data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; - break; + if (check_amd_hwpstate_cpu(cpu)) { + data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE; + break; + } + result = -ENODEV; + goto err_unreg; default: pr_debug("Unknown addr space %d\n", (u32) (perf->control_register.space_id)); @@ -718,6 +909,7 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy) static struct freq_attr *acpi_cpufreq_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, + NULL, /* this is a placeholder for cpb, do not remove */ NULL, }; @@ -733,6 +925,49 @@ static struct cpufreq_driver acpi_cpufreq_driver = { .attr = acpi_cpufreq_attr, }; +static void __init acpi_cpufreq_boost_init(void) +{ + if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) { + msrs = msrs_alloc(); + + if (!msrs) + return; + + boost_supported = true; + boost_enabled = boost_state(0); + + get_online_cpus(); + + /* Force all MSRs to the same value */ + boost_set_msrs(boost_enabled, cpu_online_mask); + + register_cpu_notifier(&boost_nb); + + put_online_cpus(); + } else + global_boost.attr.mode = 0444; + + /* We create the boost file in any case, though for systems without + * hardware support it will be read-only and hardwired to return 0. + */ + if (sysfs_create_file(cpufreq_global_kobject, &(global_boost.attr))) + pr_warn(PFX "could not register global boost sysfs file\n"); + else + pr_debug("registered global boost sysfs file\n"); +} + +static void __exit acpi_cpufreq_boost_exit(void) +{ + sysfs_remove_file(cpufreq_global_kobject, &(global_boost.attr)); + + if (msrs) { + unregister_cpu_notifier(&boost_nb); + + msrs_free(msrs); + msrs = NULL; + } +} + static int __init acpi_cpufreq_init(void) { int ret; @@ -746,9 +981,32 @@ static int __init acpi_cpufreq_init(void) if (ret) return ret; +#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB + /* this is a sysfs file with a strange name and an even stranger + * semantic - per CPU instantiation, but system global effect. + * Lets enable it only on AMD CPUs for compatibility reasons and + * only if configured. This is considered legacy code, which + * will probably be removed at some point in the future. + */ + if (check_amd_hwpstate_cpu(0)) { + struct freq_attr **iter; + + pr_debug("adding sysfs entry for cpb\n"); + + for (iter = acpi_cpufreq_attr; *iter != NULL; iter++) + ; + + /* make sure there is a terminator behind it */ + if (iter[1] == NULL) + *iter = &cpb; + } +#endif + ret = cpufreq_register_driver(&acpi_cpufreq_driver); if (ret) free_acpi_perf_data(); + else + acpi_cpufreq_boost_init(); return ret; } @@ -757,6 +1015,8 @@ static void __exit acpi_cpufreq_exit(void) { pr_debug("acpi_cpufreq_exit\n"); + acpi_cpufreq_boost_exit(); + cpufreq_unregister_driver(&acpi_cpufreq_driver); free_acpi_perf_data(); diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c new file mode 100644 index 000000000000..e9158278c71d --- /dev/null +++ b/drivers/cpufreq/cpufreq-cpu0.c @@ -0,0 +1,269 @@ +/* + * Copyright (C) 2012 Freescale Semiconductor, Inc. + * + * The OPP code in function cpu0_set_target() is reused from + * drivers/cpufreq/omap-cpufreq.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/clk.h> +#include <linux/cpu.h> +#include <linux/cpufreq.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/opp.h> +#include <linux/regulator/consumer.h> +#include <linux/slab.h> + +static unsigned int transition_latency; +static unsigned int voltage_tolerance; /* in percentage */ + +static struct device *cpu_dev; +static struct clk *cpu_clk; +static struct regulator *cpu_reg; +static struct cpufreq_frequency_table *freq_table; + +static int cpu0_verify_speed(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, freq_table); +} + +static unsigned int cpu0_get_speed(unsigned int cpu) +{ + return clk_get_rate(cpu_clk) / 1000; +} + +static int cp |
