aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRafael J. Wysocki <rjw@sisk.pl>2012-09-17 20:26:02 +0200
committerRafael J. Wysocki <rjw@sisk.pl>2012-09-17 20:26:02 +0200
commitfa373abbbd01109b849fa331da73ee17748def66 (patch)
tree3a8a53e34891cfc4315b67a843dab1e441eb918b
parent87a2337abdd752d711724d7654a6db1b5b4c0d4d (diff)
parentcd664cc3a574b30988476143c1dcc9298b1fa531 (diff)
Merge branch 'pm-cpufreq'
* pm-cpufreq: cpufreq: OMAP: remove loops_per_jiffy recalculate for smp sections: fix section conflicts in drivers/cpufreq cpufreq: conservative: update frequency when limits are relaxed cpufreq / ondemand: update frequency when limits are relaxed cpufreq: Add a generic cpufreq-cpu0 driver PM / OPP: Initialize OPP table from device tree ARM: add cpufreq transiton notifier to adjust loops_per_jiffy for smp cpufreq: Remove support for hardware P-state chips from powernow-k8 acpi-cpufreq: Add compatibility for legacy AMD cpb sysfs knob acpi-cpufreq: Add support for disabling dynamic overclocking ACPI: Add fixups for AMD P-state figures powernow-k8: delay info messages until initialization has succeeded cpufreq: Add warning message to powernow-k8 acpi-cpufreq: Add quirk to disable _PSD usage on all AMD CPUs acpi-cpufreq: Add support for modern AMD CPUs cpufreq / powernow-k8: Fixup missing _PSS objects message PM / cpufreq: Initialise the cpu field during conservative governor start
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu11
-rw-r--r--Documentation/cpu-freq/boost.txt93
-rw-r--r--Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt55
-rw-r--r--Documentation/devicetree/bindings/power/opp.txt25
-rw-r--r--arch/arm/kernel/smp.c54
-rw-r--r--arch/x86/include/asm/msr-index.h3
-rw-r--r--drivers/acpi/processor_perflib.c30
-rw-r--r--drivers/base/power/opp.c47
-rw-r--r--drivers/cpufreq/Kconfig11
-rw-r--r--drivers/cpufreq/Kconfig.x8618
-rw-r--r--drivers/cpufreq/Makefile4
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c272
-rw-r--r--drivers/cpufreq/cpufreq-cpu0.c269
-rw-r--r--drivers/cpufreq/cpufreq_conservative.c2
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c1
-rw-r--r--drivers/cpufreq/longhaul.h26
-rw-r--r--drivers/cpufreq/omap-cpufreq.c35
-rw-r--r--drivers/cpufreq/powernow-k8.c406
-rw-r--r--drivers/cpufreq/powernow-k8.h32
-rw-r--r--include/linux/opp.h8
20 files changed, 947 insertions, 455 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 5dab36448b44..6943133afcb8 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -176,3 +176,14 @@ Description: Disable L3 cache indices
All AMD processors with L3 caches provide this functionality.
For details, see BKDGs at
http://developer.amd.com/documentation/guides/Pages/default.aspx
+
+
+What: /sys/devices/system/cpu/cpufreq/boost
+Date: August 2012
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Processor frequency boosting control
+
+ This switch controls the boost setting for the whole system.
+ Boosting allows the CPU and the firmware to run at a frequency
+ beyound it's nominal limit.
+ More details can be found in Documentation/cpu-freq/boost.txt
diff --git a/Documentation/cpu-freq/boost.txt b/Documentation/cpu-freq/boost.txt
new file mode 100644
index 000000000000..9b4edfcf486f
--- /dev/null
+++ b/Documentation/cpu-freq/boost.txt
@@ -0,0 +1,93 @@
+Processor boosting control
+
+ - information for users -
+
+Quick guide for the impatient:
+--------------------
+/sys/devices/system/cpu/cpufreq/boost
+controls the boost setting for the whole system. You can read and write
+that file with either "0" (boosting disabled) or "1" (boosting allowed).
+Reading or writing 1 does not mean that the system is boosting at this
+very moment, but only that the CPU _may_ raise the frequency at it's
+discretion.
+--------------------
+
+Introduction
+-------------
+Some CPUs support a functionality to raise the operating frequency of
+some cores in a multi-core package if certain conditions apply, mostly
+if the whole chip is not fully utilized and below it's intended thermal
+budget. This is done without operating system control by a combination
+of hardware and firmware.
+On Intel CPUs this is called "Turbo Boost", AMD calls it "Turbo-Core",
+in technical documentation "Core performance boost". In Linux we use
+the term "boost" for convenience.
+
+Rationale for disable switch
+----------------------------
+
+Though the idea is to just give better performance without any user
+intervention, sometimes the need arises to disable this functionality.
+Most systems offer a switch in the (BIOS) firmware to disable the
+functionality at all, but a more fine-grained and dynamic control would
+be desirable:
+1. While running benchmarks, reproducible results are important. Since
+ the boosting functionality depends on the load of the whole package,
+ single thread performance can vary. By explicitly disabling the boost
+ functionality at least for the benchmark's run-time the system will run
+ at a fixed frequency and results are reproducible again.
+2. To examine the impact of the boosting functionality it is helpful
+ to do tests with and without boosting.
+3. Boosting means overclocking the processor, though under controlled
+ conditions. By raising the frequency and the voltage the processor
+ will consume more power than without the boosting, which may be
+ undesirable for instance for mobile users. Disabling boosting may
+ save power here, though this depends on the workload.
+
+
+User controlled switch
+----------------------
+
+To allow the user to toggle the boosting functionality, the acpi-cpufreq
+driver exports a sysfs knob to disable it. There is a file:
+/sys/devices/system/cpu/cpufreq/boost
+which can either read "0" (boosting disabled) or "1" (boosting enabled).
+Reading the file is always supported, even if the processor does not
+support boosting. In this case the file will be read-only and always
+reads as "0". Explicitly changing the permissions and writing to that
+file anyway will return EINVAL.
+
+On supported CPUs one can write either a "0" or a "1" into this file.
+This will either disable the boost functionality on all cores in the
+whole system (0) or will allow the hardware to boost at will (1).
+
+Writing a "1" does not explicitly boost the system, but just allows the
+CPU (and the firmware) to boost at their discretion. Some implementations
+take external factors like the chip's temperature into account, so
+boosting once does not necessarily mean that it will occur every time
+even using the exact same software setup.
+
+
+AMD legacy cpb switch
+---------------------
+The AMD powernow-k8 driver used to support a very similar switch to
+disable or enable the "Core Performance Boost" feature of some AMD CPUs.
+This switch was instantiated in each CPU's cpufreq directory
+(/sys/devices/system/cpu[0-9]*/cpufreq) and was called "cpb".
+Though the per CPU existence hints at a more fine grained control, the
+actual implementation only supported a system-global switch semantics,
+which was simply reflected into each CPU's file. Writing a 0 or 1 into it
+would pull the other CPUs to the same state.
+For compatibility reasons this file and its behavior is still supported
+on AMD CPUs, though it is now protected by a config switch
+(X86_ACPI_CPUFREQ_CPB). On Intel CPUs this file will never be created,
+even with the config option set.
+This functionality is considered legacy and will be removed in some future
+kernel version.
+
+More fine grained boosting control
+----------------------------------
+
+Technically it is possible to switch the boosting functionality at least
+on a per package basis, for some CPUs even per core. Currently the driver
+does not support it, but this may be implemented in the future.
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
new file mode 100644
index 000000000000..4416ccc33472
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
@@ -0,0 +1,55 @@
+Generic CPU0 cpufreq driver
+
+It is a generic cpufreq driver for CPU0 frequency management. It
+supports both uniprocessor (UP) and symmetric multiprocessor (SMP)
+systems which share clock and voltage across all CPUs.
+
+Both required and optional properties listed below must be defined
+under node /cpus/cpu@0.
+
+Required properties:
+- operating-points: Refer to Documentation/devicetree/bindings/power/opp.txt
+ for details
+
+Optional properties:
+- clock-latency: Specify the possible maximum transition latency for clock,
+ in unit of nanoseconds.
+- voltage-tolerance: Specify the CPU voltage tolerance in percentage.
+
+Examples:
+
+cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ compatible = "arm,cortex-a9";
+ reg = <0>;
+ next-level-cache = <&L2>;
+ operating-points = <
+ /* kHz uV */
+ 792000 1100000
+ 396000 950000
+ 198000 850000
+ >;
+ transition-latency = <61036>; /* two CLK32 periods */
+ };
+
+ cpu@1 {
+ compatible = "arm,cortex-a9";
+ reg = <1>;
+ next-level-cache = <&L2>;
+ };
+
+ cpu@2 {
+ compatible = "arm,cortex-a9";
+ reg = <2>;
+ next-level-cache = <&L2>;
+ };
+
+ cpu@3 {
+ compatible = "arm,cortex-a9";
+ reg = <3>;
+ next-level-cache = <&L2>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/power/opp.txt b/Documentation/devicetree/bindings/power/opp.txt
new file mode 100644
index 000000000000..74499e5033fc
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/opp.txt
@@ -0,0 +1,25 @@
+* Generic OPP Interface
+
+SoCs have a standard set of tuples consisting of frequency and
+voltage pairs that the device will support per voltage domain. These
+are called Operating Performance Points or OPPs.
+
+Properties:
+- operating-points: An array of 2-tuples items, and each item consists
+ of frequency and voltage like <freq-kHz vol-uV>.
+ freq: clock frequency in kHz
+ vol: voltage in microvolt
+
+Examples:
+
+cpu@0 {
+ compatible = "arm,cortex-a9";
+ reg = <0>;
+ next-level-cache = <&L2>;
+ operating-points = <
+ /* kHz uV */
+ 792000 1100000
+ 396000 950000
+ 198000 850000
+ >;
+};
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index ebd8ad274d76..8e03567c9583 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -25,6 +25,7 @@
#include <linux/percpu.h>
#include <linux/clockchips.h>
#include <linux/completion.h>
+#include <linux/cpufreq.h>
#include <linux/atomic.h>
#include <asm/cacheflush.h>
@@ -584,3 +585,56 @@ int setup_profiling_timer(unsigned int multiplier)
{
return -EINVAL;
}
+
+#ifdef CONFIG_CPU_FREQ
+
+static DEFINE_PER_CPU(unsigned long, l_p_j_ref);
+static DEFINE_PER_CPU(unsigned long, l_p_j_ref_freq);
+static unsigned long global_l_p_j_ref;
+static unsigned long global_l_p_j_ref_freq;
+
+static int cpufreq_callback(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct cpufreq_freqs *freq = data;
+ int cpu = freq->cpu;
+
+ if (freq->flags & CPUFREQ_CONST_LOOPS)
+ return NOTIFY_OK;
+
+ if (!per_cpu(l_p_j_ref, cpu)) {
+ per_cpu(l_p_j_ref, cpu) =
+ per_cpu(cpu_data, cpu).loops_per_jiffy;
+ per_cpu(l_p_j_ref_freq, cpu) = freq->old;
+ if (!global_l_p_j_ref) {
+ global_l_p_j_ref = loops_per_jiffy;
+ global_l_p_j_ref_freq = freq->old;
+ }
+ }
+
+ if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
+ (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
+ (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
+ loops_per_jiffy = cpufreq_scale(global_l_p_j_ref,
+ global_l_p_j_ref_freq,
+ freq->new);
+ per_cpu(cpu_data, cpu).loops_per_jiffy =
+ cpufreq_scale(per_cpu(l_p_j_ref, cpu),
+ per_cpu(l_p_j_ref_freq, cpu),
+ freq->new);
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block cpufreq_notifier = {
+ .notifier_call = cpufreq_callback,
+};
+
+static int __init register_cpufreq_notifier(void)
+{
+ return cpufreq_register_notifier(&cpufreq_notifier,
+ CPUFREQ_TRANSITION_NOTIFIER);
+}
+core_initcall(register_cpufreq_notifier);
+
+#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 957ec87385af..fbee9714d9ab 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -248,6 +248,9 @@
#define MSR_IA32_PERF_STATUS 0x00000198
#define MSR_IA32_PERF_CTL 0x00000199
+#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
+#define MSR_AMD_PERF_STATUS 0xc0010063
+#define MSR_AMD_PERF_CTL 0xc0010062
#define MSR_IA32_MPERF 0x000000e7
#define MSR_IA32_APERF 0x000000e8
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index a093dc163a42..836bfe069042 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -324,6 +324,34 @@ static int acpi_processor_get_performance_control(struct acpi_processor *pr)
return result;
}
+#ifdef CONFIG_X86
+/*
+ * Some AMDs have 50MHz frequency multiples, but only provide 100MHz rounding
+ * in their ACPI data. Calculate the real values and fix up the _PSS data.
+ */
+static void amd_fixup_frequency(struct acpi_processor_px *px, int i)
+{
+ u32 hi, lo, fid, did;
+ int index = px->control & 0x00000007;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return;
+
+ if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
+ || boot_cpu_data.x86 == 0x11) {
+ rdmsr(MSR_AMD_PSTATE_DEF_BASE + index, lo, hi);
+ fid = lo & 0x3f;
+ did = (lo >> 6) & 7;
+ if (boot_cpu_data.x86 == 0x10)
+ px->core_frequency = (100 * (fid + 0x10)) >> did;
+ else
+ px->core_frequency = (100 * (fid + 8)) >> did;
+ }
+}
+#else
+static void amd_fixup_frequency(struct acpi_processor_px *px, int i) {};
+#endif
+
static int acpi_processor_get_performance_states(struct acpi_processor *pr)
{
int result = 0;
@@ -379,6 +407,8 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
goto end;
}
+ amd_fixup_frequency(px, i);
+
ACPI_DEBUG_PRINT((ACPI_DB_INFO,
"State [%d]: core_frequency[%d] power[%d] transition_latency[%d] bus_master_latency[%d] control[0x%x] status[0x%x]\n",
i,
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index ac993eafec82..d9468642fc41 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -22,6 +22,7 @@
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/opp.h>
+#include <linux/of.h>
/*
* Internal data structure organization with the OPP layer library is as
@@ -674,3 +675,49 @@ struct srcu_notifier_head *opp_get_notifier(struct device *dev)
return &dev_opp->head;
}
+
+#ifdef CONFIG_OF
+/**
+ * of_init_opp_table() - Initialize opp table from device tree
+ * @dev: device pointer used to lookup device OPPs.
+ *
+ * Register the initial OPP table with the OPP library for given device.
+ */
+int of_init_opp_table(struct device *dev)
+{
+ const struct property *prop;
+ const __be32 *val;
+ int nr;
+
+ prop = of_find_property(dev->of_node, "operating-points", NULL);
+ if (!prop)
+ return -ENODEV;
+ if (!prop->value)
+ return -ENODATA;
+
+ /*
+ * Each OPP is a set of tuples consisting of frequency and
+ * voltage like <freq-kHz vol-uV>.
+ */
+ nr = prop->length / sizeof(u32);
+ if (nr % 2) {
+ dev_err(dev, "%s: Invalid OPP list\n", __func__);
+ return -EINVAL;
+ }
+
+ val = prop->value;
+ while (nr) {
+ unsigned long freq = be32_to_cpup(val++) * 1000;
+ unsigned long volt = be32_to_cpup(val++);
+
+ if (opp_add(dev, freq, volt)) {
+ dev_warn(dev, "%s: Failed to add OPP %ld\n",
+ __func__, freq);
+ continue;
+ }
+ nr -= 2;
+ }
+
+ return 0;
+}
+#endif
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index e24a2a1b6666..ea512f47b789 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -179,6 +179,17 @@ config CPU_FREQ_GOV_CONSERVATIVE
If in doubt, say N.
+config GENERIC_CPUFREQ_CPU0
+ bool "Generic CPU0 cpufreq driver"
+ depends on HAVE_CLK && REGULATOR && PM_OPP && OF
+ select CPU_FREQ_TABLE
+ help
+ This adds a generic cpufreq driver for CPU0 frequency management.
+ It supports both uniprocessor (UP) and symmetric multiprocessor (SMP)
+ systems which share clock and voltage across all CPUs.
+
+ If in doubt, say N.
+
menu "x86 CPU frequency scaling drivers"
depends on X86
source "drivers/cpufreq/Kconfig.x86"
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index 78ff7ee48951..934854ae5eb4 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -23,7 +23,8 @@ config X86_ACPI_CPUFREQ
help
This driver adds a CPUFreq driver which utilizes the ACPI
Processor Performance States.
- This driver also supports Intel Enhanced Speedstep.
+ This driver also supports Intel Enhanced Speedstep and newer
+ AMD CPUs.
To compile this driver as a module, choose M here: the
module will be called acpi-cpufreq.
@@ -32,6 +33,18 @@ config X86_ACPI_CPUFREQ
If in doubt, say N.
+config X86_ACPI_CPUFREQ_CPB
+ default y
+ bool "Legacy cpb sysfs knob support for AMD CPUs"
+ depends on X86_ACPI_CPUFREQ && CPU_SUP_AMD
+ help
+ The powernow-k8 driver used to provide a sysfs knob called "cpb"
+ to disable the Core Performance Boosting feature of AMD CPUs. This
+ file has now been superseeded by the more generic "boost" entry.
+
+ By enabling this option the acpi_cpufreq driver provides the old
+ entry in addition to the new boost ones, for compatibility reasons.
+
config ELAN_CPUFREQ
tristate "AMD Elan SC400 and SC410"
select CPU_FREQ_TABLE
@@ -95,7 +108,8 @@ config X86_POWERNOW_K8
select CPU_FREQ_TABLE
depends on ACPI && ACPI_PROCESSOR
help
- This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors.
+ This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors.
+ Support for K10 and newer processors is now in acpi-cpufreq.
To compile this driver as a module, choose M here: the
module will be called powernow-k8.
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 9531fc2eda22..1bc90e1306d8 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -13,13 +13,15 @@ obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o
# CPUfreq cross-arch helpers
obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o
+obj-$(CONFIG_GENERIC_CPUFREQ_CPU0) += cpufreq-cpu0.o
+
##################################################################################
# x86 drivers.
# Link order matters. K8 is preferred to ACPI because of firmware bugs in early
# K8 systems. ACPI is preferred to all other hardware-specific drivers.
# speedstep-* is preferred over p4-clockmod.
-obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o
+obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o
obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o
obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 56c6c6b4eb4d..0d048f6a2b23 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -51,13 +51,19 @@ MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");
+#define PFX "acpi-cpufreq: "
+
enum {
UNDEFINED_CAPABLE = 0,
SYSTEM_INTEL_MSR_CAPABLE,
+ SYSTEM_AMD_MSR_CAPABLE,
SYSTEM_IO_CAPABLE,
};
#define INTEL_MSR_RANGE (0xffff)
+#define AMD_MSR_RANGE (0x7)
+
+#define MSR_K7_HWCR_CPB_DIS (1ULL << 25)
struct acpi_cpufreq_data {
struct acpi_processor_performance *acpi_data;
@@ -74,6 +80,116 @@ static struct acpi_processor_performance __percpu *acpi_perf_data;
static struct cpufreq_driver acpi_cpufreq_driver;
static unsigned int acpi_pstate_strict;
+static bool boost_enabled, boost_supported;
+static struct msr __percpu *msrs;
+
+static bool boost_state(unsigned int cpu)
+{
+ u32 lo, hi;
+ u64 msr;
+
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_INTEL:
+ rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
+ msr = lo | ((u64)hi << 32);
+ return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
+ case X86_VENDOR_AMD:
+ rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
+ msr = lo | ((u64)hi << 32);
+ return !(msr & MSR_K7_HWCR_CPB_DIS);
+ }
+ return false;
+}
+
+static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
+{
+ u32 cpu;
+ u32 msr_addr;
+ u64 msr_mask;
+
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_INTEL:
+ msr_addr = MSR_IA32_MISC_ENABLE;
+ msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
+ break;
+ case X86_VENDOR_AMD:
+ msr_addr = MSR_K7_HWCR;
+ msr_mask = MSR_K7_HWCR_CPB_DIS;
+ break;
+ default:
+ return;
+ }
+
+ rdmsr_on_cpus(cpumask, msr_addr, msrs);
+
+ for_each_cpu(cpu, cpumask) {
+ struct msr *reg = per_cpu_ptr(msrs, cpu);
+ if (enable)
+ reg->q &= ~msr_mask;
+ else
+ reg->q |= msr_mask;
+ }
+
+ wrmsr_on_cpus(cpumask, msr_addr, msrs);
+}
+
+static ssize_t _store_boost(const char *buf, size_t count)
+{
+ int ret;
+ unsigned long val = 0;
+
+ if (!boost_supported)
+ return -EINVAL;
+
+ ret = kstrtoul(buf, 10, &val);
+ if (ret || (val > 1))
+ return -EINVAL;
+
+ if ((val && boost_enabled) || (!val && !boost_enabled))
+ return count;
+
+ get_online_cpus();
+
+ boost_set_msrs(val, cpu_online_mask);
+
+ put_online_cpus();
+
+ boost_enabled = val;
+ pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");
+
+ return count;
+}
+
+static ssize_t store_global_boost(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ return _store_boost(buf, count);
+}
+
+static ssize_t show_global_boost(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", boost_enabled);
+}
+
+static struct global_attr global_boost = __ATTR(boost, 0644,
+ show_global_boost,
+ store_global_boost);
+
+#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
+static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
+ size_t count)
+{
+ return _store_boost(buf, count);
+}
+
+static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
+{
+ return sprintf(buf, "%u\n", boost_enabled);
+}
+
+static struct freq_attr cpb = __ATTR(cpb, 0644, show_cpb, store_cpb);
+#endif
static int check_est_cpu(unsigned int cpuid)
{
@@ -82,6 +198,13 @@ static int check_est_cpu(unsigned int cpuid)
return cpu_has(cpu, X86_FEATURE_EST);
}
+static int check_amd_hwpstate_cpu(unsigned int cpuid)
+{
+ struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
+
+ return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
+}
+
static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
{
struct acpi_processor_performance *perf;
@@ -101,7 +224,11 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
int i;
struct acpi_processor_performance *perf;
- msr &= INTEL_MSR_RANGE;
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ msr &= AMD_MSR_RANGE;
+ else
+ msr &= INTEL_MSR_RANGE;
+
perf = data->acpi_data;
for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
@@ -115,6 +242,7 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
{
switch (data->cpu_feature) {
case SYSTEM_INTEL_MSR_CAPABLE:
+ case SYSTEM_AMD_MSR_CAPABLE:
return extract_msr(val, data);
case SYSTEM_IO_CAPABLE:
return extract_io(val, data);
@@ -150,6 +278,7 @@ static void do_drv_read(void *_cmd)
switch (cmd->type) {
case SYSTEM_INTEL_MSR_CAPABLE:
+ case SYSTEM_AMD_MSR_CAPABLE:
rdmsr(cmd->addr.msr.reg, cmd->val, h);
break;
case SYSTEM_IO_CAPABLE:
@@ -174,6 +303,9 @@ static void do_drv_write(void *_cmd)
lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
wrmsr(cmd->addr.msr.reg, lo, hi);
break;
+ case SYSTEM_AMD_MSR_CAPABLE:
+ wrmsr(cmd->addr.msr.reg, cmd->val, 0);
+ break;
case SYSTEM_IO_CAPABLE:
acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
cmd->val,
@@ -217,6 +349,10 @@ static u32 get_cur_val(const struct cpumask *mask)
cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
break;
+ case SYSTEM_AMD_MSR_CAPABLE:
+ cmd.type = SYSTEM_AMD_MSR_CAPABLE;
+ cmd.addr.msr.reg = MSR_AMD_PERF_STATUS;
+ break;
case SYSTEM_IO_CAPABLE:
cmd.type = SYSTEM_IO_CAPABLE;
perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
@@ -326,6 +462,11 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
cmd.val = (u32) perf->states[next_perf_state].control;
break;
+ case SYSTEM_AMD_MSR_CAPABLE:
+ cmd.type = SYSTEM_AMD_MSR_CAPABLE;
+ cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
+ cmd.val = (u32) perf->states[next_perf_state].control;
+ break;
case SYSTEM_IO_CAPABLE:
cmd.type = SYSTEM_IO_CAPABLE;
cmd.addr.io.port = perf->control_register.address;
@@ -419,6 +560,44 @@ static void free_acpi_perf_data(void)
free_percpu(acpi_perf_data);
}
+static int boost_notify(struct notifier_block *nb, unsigned long action,
+ void *hcpu)
+{
+ unsigned cpu = (long)hcpu;
+ const struct cpumask *cpumask;
+
+ cpumask = get_cpu_mask(cpu);
+
+ /*
+ * Clear the boost-disable bit on the CPU_DOWN path so that
+ * this cpu cannot block the remaining ones from boosting. On
+ * the CPU_UP path we simply keep the boost-disable flag in
+ * sync with the current global state.
+ */
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ boost_set_msrs(boost_enabled, cpumask);
+ break;
+
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ boost_set_msrs(1, cpumask);
+ break;
+
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+
+static struct notifier_block boost_nb = {
+ .notifier_call = boost_notify,
+};
+
/*
* acpi_cpufreq_early_init - initialize ACPI P-States library
*
@@ -559,6 +738,14 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
cpumask_copy(policy->cpus, cpu_core_mask(cpu));
}
+
+ if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
+ cpumask_clear(policy->cpus);
+ cpumask_set_cpu(cpu, policy->cpus);
+ cpumask_copy(policy->related_cpus, cpu_sibling_mask(cpu));
+ policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
+ pr_info_once(PFX "overriding BIOS provided _PSD data\n");
+ }
#endif
/* capability check */
@@ -580,12 +767,16 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
break;
case ACPI_ADR_SPACE_FIXED_HARDWARE:
pr_debug("HARDWARE addr space\n");
- if (!check_est_cpu(cpu)) {
- result = -ENODEV;
- goto err_unreg;
+ if (check_est_cpu(cpu)) {
+ data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
+ break;
}
- data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
- break;
+ if (check_amd_hwpstate_cpu(cpu)) {
+ data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
+ break;
+ }
+ result = -ENODEV;
+ goto err_unreg;
default:
pr_debug("Unknown addr space %d\n",
(u32) (perf->control_register.space_id));
@@ -718,6 +909,7 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
static struct freq_attr *acpi_cpufreq_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
+ NULL, /* this is a placeholder for cpb, do not remove */
NULL,
};
@@ -733,6 +925,49 @@ static struct cpufreq_driver acpi_cpufreq_driver = {
.attr = acpi_cpufreq_attr,
};
+static void __init acpi_cpufreq_boost_init(void)
+{
+ if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
+ msrs = msrs_alloc();
+
+ if (!msrs)
+ return;
+
+ boost_supported = true;
+ boost_enabled = boost_state(0);
+
+ get_online_cpus();
+
+ /* Force all MSRs to the same value */
+ boost_set_msrs(boost_enabled, cpu_online_mask);
+
+ register_cpu_notifier(&boost_nb);
+
+ put_online_cpus();
+ } else
+ global_boost.attr.mode = 0444;
+
+ /* We create the boost file in any case, though for systems without
+ * hardware support it will be read-only and hardwired to return 0.
+ */
+ if (sysfs_create_file(cpufreq_global_kobject, &(global_boost.attr)))
+ pr_warn(PFX "could not register global boost sysfs file\n");
+ else
+ pr_debug("registered global boost sysfs file\n");
+}
+
+static void __exit acpi_cpufreq_boost_exit(void)
+{
+ sysfs_remove_file(cpufreq_global_kobject, &(global_boost.attr));
+
+ if (msrs) {
+ unregister_cpu_notifier(&boost_nb);
+
+ msrs_free(msrs);
+ msrs = NULL;
+ }
+}
+
static int __init acpi_cpufreq_init(void)
{
int ret;
@@ -746,9 +981,32 @@ static int __init acpi_cpufreq_init(void)
if (ret)
return ret;
+#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
+ /* this is a sysfs file with a strange name and an even stranger
+ * semantic - per CPU instantiation, but system global effect.
+ * Lets enable it only on AMD CPUs for compatibility reasons and
+ * only if configured. This is considered legacy code, which
+ * will probably be removed at some point in the future.
+ */
+ if (check_amd_hwpstate_cpu(0)) {
+ struct freq_attr **iter;
+
+ pr_debug("adding sysfs entry for cpb\n");
+
+ for (iter = acpi_cpufreq_attr; *iter != NULL; iter++)
+ ;
+
+ /* make sure there is a terminator behind it */
+ if (iter[1] == NULL)
+ *iter = &cpb;
+ }
+#endif
+
ret = cpufreq_register_driver(&acpi_cpufreq_driver);
if (ret)
free_acpi_perf_data();
+ else
+ acpi_cpufreq_boost_init();
return ret;
}
@@ -757,6 +1015,8 @@ static void __exit acpi_cpufreq_exit(void)
{
pr_debug("acpi_cpufreq_exit\n");
+ acpi_cpufreq_boost_exit();
+
cpufreq_unregister_driver(&acpi_cpufreq_driver);
free_acpi_perf_data();
diff