aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tools/power/x86/turbostat/turbostat.827
-rw-r--r--tools/power/x86/turbostat/turbostat.c1205
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c47
3 files changed, 660 insertions, 619 deletions
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 3340def58d01..1551fcdbfd8a 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -101,7 +101,7 @@ The column name "all" can be used to enable all disabled-by-default built-in cou
.PP
\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names.
.PP
-\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "other". "idle" (enabled by default), includes "hwidle" and "pct_idle". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "pct_idle". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle".
+\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a comma-separated-list of CATEGORIES of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "cache", "llc", "other". "idle" (enabled by default), includes "hwidle" and "pct_idle". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "pct_idle". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle".
.PP
\fB--Dump\fP displays the raw counter values.
.PP
@@ -159,6 +159,10 @@ The system configuration dump (if --quiet is not used) is followed by statistics
.PP
\fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs.
.PP
+\fBLLCkRPS\fP Last Level Cache Thousands of References Per Second. For CPUs with an L3 LLC, this is the number of references that CPU made to the L3 (and the number of misses that CPU made to it's L2). For CPUs with an L2 LLC, this is the number of references to the L2 (and the number of misses to the CPU's L1). The system summary row shows the sum for all CPUs. In both cases, the value displayed is the actual value divided by 1000 in the interest of usually fitting into 8 columns.
+.PP
+\fBLLC%hit\fP Last Level Cache Hit Rate %. Hit Rate Percent = 100.0 * (References - Misses)/References. The system summary row shows the weighted average for all CPUs (100.0 * (Sum_References - Sum_Misses)/Sum_References).
+.PP
\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. These counters are in the "cpuidle" group, which is disabled, by default.
.PP
\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. These counters are in the "cpuidle" group, which is disabled, by default.
@@ -410,25 +414,24 @@ CPU pCPU%c1 CPU%c1
.fi
.SH ADD PERF COUNTER EXAMPLE #2 (using virtual cpu device)
-Here we run on hybrid, Raptor Lake platform.
-We limit turbostat to show output for just cpu0 (pcore) and cpu12 (ecore).
+Here we run on hybrid, Meteor Lake platform.
+We limit turbostat to show output for just cpu0 (pcore) and cpu4 (ecore).
We add a counter showing number of L3 cache misses, using virtual "cpu" device,
labeling it with the column header, "VCMISS".
We add a counter showing number of L3 cache misses, using virtual "cpu_core" device,
-labeling it with the column header, "PCMISS". This will fail on ecore cpu12.
+labeling it with the column header, "PCMISS". This will fail on ecore cpu4.
We add a counter showing number of L3 cache misses, using virtual "cpu_atom" device,
labeling it with the column header, "ECMISS". This will fail on pcore cpu0.
We display it only once, after the conclusion of 0.1 second sleep.
.nf
-sudo ./turbostat --quiet --cpu 0,12 --show CPU --add perf/cpu/cache-misses,cpu,delta,raw,VCMISS --add perf/cpu_core/cache-misses,cpu,delta,raw,PCMISS --add perf/cpu_atom/cache-misses,cpu,delta,raw,ECMISS sleep .1
+sudo ./turbostat --quiet --cpu 0,4 --show CPU --add perf/cpu/cache-misses,cpu,delta,VCMISS --add perf/cpu_core/cache-misses,cpu,delta,PCMISS --add perf/cpu_atom/cache-misses,cpu,delta,ECMISS sleep 5
turbostat: added_perf_counters_init_: perf/cpu_atom/cache-misses: failed to open counter on cpu0
-turbostat: added_perf_counters_init_: perf/cpu_core/cache-misses: failed to open counter on cpu12
-0.104630 sec
-CPU ECMISS PCMISS VCMISS
-- 0x0000000000000000 0x0000000000000000 0x0000000000000000
-0 0x0000000000000000 0x0000000000007951 0x0000000000007796
-12 0x000000000001137a 0x0000000000000000 0x0000000000011392
-
+turbostat: added_perf_counters_init_: perf/cpu_core/cache-misses: failed to open counter on cpu4
+5.001207 sec
+CPU ECMISS PCMISS VCMISS
+- 41586506 46291219 87877749
+4 83173012 0 83173040
+0 0 92582439 92582458
.fi
.SH ADD PMT COUNTER EXAMPLE
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index f2512d78bcbd..5ad45c2ac5bd 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -142,6 +142,7 @@ struct msr_counter {
#define FLAGS_SHOW (1 << 1)
#define SYSFS_PERCPU (1 << 1)
};
+static int use_android_msr_path;
struct msr_counter bic[] = {
{ 0x0, "usec", NULL, 0, 0, 0, NULL, 0 },
@@ -209,6 +210,8 @@ struct msr_counter bic[] = {
{ 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 },
+ { 0x0, "LLCkRPS", NULL, 0, 0, 0, NULL, 0 },
+ { 0x0, "LLC%hit", NULL, 0, 0, 0, NULL, 0 },
};
/* n.b. bic_names must match the order in bic[], above */
@@ -278,6 +281,8 @@ enum bic_names {
BIC_NMI,
BIC_CPU_c1e,
BIC_pct_idle,
+ BIC_LLC_RPS,
+ BIC_LLC_HIT,
MAX_BIC
};
@@ -305,6 +310,7 @@ static cpu_set_t bic_group_frequency;
static cpu_set_t bic_group_hw_idle;
static cpu_set_t bic_group_sw_idle;
static cpu_set_t bic_group_idle;
+static cpu_set_t bic_group_cache;
static cpu_set_t bic_group_other;
static cpu_set_t bic_group_disabled_by_default;
static cpu_set_t bic_enabled;
@@ -413,9 +419,14 @@ static void bic_groups_init(void)
SET_BIC(BIC_pct_idle, &bic_group_sw_idle);
BIC_INIT(&bic_group_idle);
+
CPU_OR(&bic_group_idle, &bic_group_idle, &bic_group_hw_idle);
SET_BIC(BIC_pct_idle, &bic_group_idle);
+ BIC_INIT(&bic_group_cache);
+ SET_BIC(BIC_LLC_RPS, &bic_group_cache);
+ SET_BIC(BIC_LLC_HIT, &bic_group_cache);
+
BIC_INIT(&bic_group_other);
SET_BIC(BIC_IRQ, &bic_group_other);
SET_BIC(BIC_NMI, &bic_group_other);
@@ -466,12 +477,11 @@ static void bic_groups_init(void)
#define PCL_10 14 /* PC10 */
#define PCLUNL 15 /* Unlimited */
-struct amperf_group_fd;
-
char *proc_stat = "/proc/stat";
FILE *outf;
int *fd_percpu;
int *fd_instr_count_percpu;
+int *fd_llc_percpu;
struct timeval interval_tv = { 5, 0 };
struct timespec interval_ts = { 5, 0 };
@@ -482,11 +492,12 @@ unsigned int quiet;
unsigned int shown;
unsigned int sums_need_wide_columns;
unsigned int rapl_joules;
+unsigned int valid_rapl_msrs;
unsigned int summary_only;
unsigned int list_header_only;
unsigned int dump_only;
unsigned int force_load;
-unsigned int has_aperf;
+unsigned int cpuid_has_aperf_mperf;
unsigned int has_aperf_access;
unsigned int has_epb;
unsigned int has_turbo;
@@ -552,8 +563,7 @@ static struct gfx_sysfs_info gfx_info[GFX_MAX];
int get_msr(int cpu, off_t offset, unsigned long long *msr);
int add_counter(unsigned int msr_num, char *path, char *name,
- unsigned int width, enum counter_scope scope,
- enum counter_type type, enum counter_format format, int flags, int package_num);
+ unsigned int width, enum counter_scope scope, enum counter_type type, enum counter_format format, int flags, int package_num);
/* Model specific support Start */
@@ -578,7 +588,7 @@ struct platform_features {
bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */
int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */
int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */
- int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */
+ int plat_rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */
bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */
@@ -733,7 +743,7 @@ static const struct platform_features snb_features = {
.cst_limit = CST_LIMIT_SNB,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features snx_features = {
@@ -745,7 +755,7 @@ static const struct platform_features snx_features = {
.cst_limit = CST_LIMIT_SNB,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
};
static const struct platform_features ivb_features = {
@@ -758,7 +768,7 @@ static const struct platform_features ivb_features = {
.cst_limit = CST_LIMIT_SNB,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features ivx_features = {
@@ -770,7 +780,7 @@ static const struct platform_features ivx_features = {
.cst_limit = CST_LIMIT_SNB,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE | TRL_LIMIT1,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
};
static const struct platform_features hsw_features = {
@@ -784,7 +794,7 @@ static const struct platform_features hsw_features = {
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
.plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
- .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features hsx_features = {
@@ -798,7 +808,7 @@ static const struct platform_features hsx_features = {
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2,
.plr_msrs = PLR_CORE | PLR_RING,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
.has_fixed_rapl_unit = 1,
};
@@ -813,7 +823,7 @@ static const struct platform_features hswl_features = {
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
.plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
- .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features hswg_features = {
@@ -827,7 +837,7 @@ static const struct platform_features hswg_features = {
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
.plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
- .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features bdw_features = {
@@ -840,7 +850,7 @@ static const struct platform_features bdw_features = {
.cst_limit = CST_LIMIT_HSW,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features bdwg_features = {
@@ -853,7 +863,7 @@ static const struct platform_features bdwg_features = {
.cst_limit = CST_LIMIT_HSW,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features bdx_features = {
@@ -867,7 +877,7 @@ static const struct platform_features bdx_features = {
.has_irtl_msrs = 1,
.has_cst_auto_convension = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
.has_fixed_rapl_unit = 1,
};
@@ -884,7 +894,7 @@ static const struct platform_features skl_features = {
.has_ext_cst_msrs = 1,
.trl_msrs = TRL_BASE,
.tcc_offset_bits = 6,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS,
.enable_tsc_tweak = 1,
};
@@ -901,7 +911,7 @@ static const struct platform_features cnl_features = {
.has_ext_cst_msrs = 1,
.trl_msrs = TRL_BASE,
.tcc_offset_bits = 6,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS,
.enable_tsc_tweak = 1,
};
@@ -919,7 +929,7 @@ static const struct platform_features adl_features = {
.has_ext_cst_msrs = cnl_features.has_ext_cst_msrs,
.trl_msrs = cnl_features.trl_msrs,
.tcc_offset_bits = cnl_features.tcc_offset_bits,
- .rapl_msrs = cnl_features.rapl_msrs,
+ .plat_rapl_msrs = cnl_features.plat_rapl_msrs,
.enable_tsc_tweak = cnl_features.enable_tsc_tweak,
};
@@ -937,7 +947,7 @@ static const struct platform_features lnl_features = {
.has_ext_cst_msrs = adl_features.has_ext_cst_msrs,
.trl_msrs = adl_features.trl_msrs,
.tcc_offset_bits = adl_features.tcc_offset_bits,
- .rapl_msrs = adl_features.rapl_msrs,
+ .plat_rapl_msrs = adl_features.plat_rapl_msrs,
.enable_tsc_tweak = adl_features.enable_tsc_tweak,
};
@@ -952,7 +962,7 @@ static const struct platform_features skx_features = {
.has_irtl_msrs = 1,
.has_cst_auto_convension = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
.has_fixed_rapl_unit = 1,
};
@@ -968,7 +978,7 @@ static const struct platform_features icx_features = {
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
.has_fixed_rapl_unit = 1,
};
@@ -985,7 +995,7 @@ static const struct platform_features spr_features = {
.has_cst_prewake_bit = 1,
.has_fixed_rapl_psys_unit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
};
static const struct platform_features dmr_features = {
@@ -1000,7 +1010,7 @@ static const struct platform_features dmr_features = {
.has_fixed_rapl_psys_unit = spr_features.has_fixed_rapl_psys_unit,
.trl_msrs = spr_features.trl_msrs,
.has_msr_module_c6_res_ms = 1, /* DMR has Dual-Core-Module and MC6 MSR */
- .rapl_msrs = 0, /* DMR does not have RAPL MSRs */
+ .plat_rapl_msrs = 0, /* DMR does not have RAPL MSRs */
.plr_msrs = 0, /* DMR does not have PLR MSRs */
.has_irtl_msrs = 0, /* DMR does not have IRTL MSRs */
.has_config_tdp = 0, /* DMR does not have CTDP MSRs */
@@ -1019,7 +1029,7 @@ static const struct platform_features srf_features = {
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
};
static const struct platform_features grr_features = {
@@ -1035,7 +1045,7 @@ static const struct platform_features grr_features = {
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
};
static const struct platform_features slv_features = {
@@ -1048,7 +1058,7 @@ static const struct platform_features slv_features = {
.has_msr_c6_demotion_policy_config = 1,
.has_msr_atom_pkg_c6_residency = 1,
.trl_msrs = TRL_ATOM,
- .rapl_msrs = RAPL_PKG | RAPL_CORE,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE,
.has_rapl_divisor = 1,
.rapl_quirk_tdp = 30,
};
@@ -1061,7 +1071,7 @@ static const struct platform_features slvd_features = {
.cst_limit = CST_LIMIT_SLV,
.has_msr_atom_pkg_c6_residency = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG | RAPL_CORE,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE,
.rapl_quirk_tdp = 30,
};
@@ -1082,7 +1092,7 @@ static const struct platform_features gmt_features = {
.cst_limit = CST_LIMIT_GMT,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
- .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
};
static const struct platform_features gmtd_features = {
@@ -1095,7 +1105,7 @@ static const struct platform_features gmtd_features = {
.has_irtl_msrs = 1,
.has_msr_core_c1_res = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS,
};
static const struct platform_features gmtp_features = {
@@ -1107,7 +1117,7 @@ static const struct platform_features gmtp_features = {
.cst_limit = CST_LIMIT_GMT,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
};
static const struct platform_features tmt_features = {
@@ -1118,7 +1128,7 @@ static const struct platform_features tmt_features = {
.cst_limit = CST_LIMIT_GMT,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
.enable_tsc_tweak = 1,
};
@@ -1130,7 +1140,7 @@ static const struct platform_features tmtd_features = {
.cst_limit = CST_LIMIT_GMT,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
- .rapl_msrs = RAPL_PKG_ALL,
+ .plat_rapl_msrs = RAPL_PKG_ALL,
};
static const struct platform_features knl_features = {
@@ -1142,7 +1152,7 @@ static const struct platform_features knl_features = {
.cst_limit = CST_LIMIT_KNL,
.has_msr_knl_core_c6_residency = 1,
.trl_msrs = TRL_KNL,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
.has_fixed_rapl_unit = 1,
.need_perf_multiplier = 1,
};
@@ -1151,7 +1161,7 @@ static const struct platform_features default_features = {
};
static const struct platform_features amd_features_with_rapl = {
- .rapl_msrs = RAPL_AMD_F17H,
+ .plat_rapl_msrs = RAPL_AMD_F17H,
.has_per_core_rapl = 1,
.rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
};
@@ -1210,6 +1220,9 @@ static const struct platform_data turbostat_pdata[] = {
{ INTEL_ARROWLAKE, &adl_features },
{ INTEL_LUNARLAKE_M, &lnl_features },
{ INTEL_PANTHERLAKE_L, &lnl_features },
+ { INTEL_NOVALAKE, &lnl_features },
+ { INTEL_NOVALAKE_L, &lnl_features },
+ { INTEL_WILDCATLAKE_L, &lnl_features },
{ INTEL_ATOM_SILVERMONT, &slv_features },
{ INTEL_ATOM_SILVERMONT_D, &slvd_features },
{ INTEL_ATOM_AIRMONT, &amt_features },
@@ -1294,8 +1307,7 @@ char *progname;
#define CPU_SUBSET_MAXCPUS 8192 /* need to use before probe... */
cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
-size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize,
- cpu_subset_size;
+size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
#define MAX_ADDED_THREAD_COUNTERS 24
#define MAX_ADDED_CORE_COUNTERS 8
#define MAX_ADDED_PACKAGE_COUNTERS 16
@@ -1991,6 +2003,10 @@ void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size)
pmt_counter_resize_(pcounter, new_size);
}
+struct llc_stats {
+ unsigned long long references;
+ unsigned long long misses;
+};
struct thread_data {
struct timeval tv_begin;
struct timeval tv_end;
@@ -2003,6 +2019,7 @@ struct thread_data {
unsigned long long irq_count;
unsigned long long nmi_count;
unsigned int smi_count;
+ struct llc_stats llc;
unsigned int cpu_id;
unsigned int apic_id;
unsigned int x2apic_id;
@@ -2118,7 +2135,7 @@ off_t idx_to_offset(int idx)
switch (idx) {
case IDX_PKG_ENERGY:
- if (platform->rapl_msrs & RAPL_AMD_F17H)
+ if (valid_rapl_msrs & RAPL_AMD_F17H)
offset = MSR_PKG_ENERGY_STAT;
else
offset = MSR_PKG_ENERGY_STATUS;
@@ -2184,19 +2201,19 @@ int idx_valid(int idx)
{
switch (idx) {
case IDX_PKG_ENERGY:
- return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H);
+ return valid_rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H);
case IDX_DRAM_ENERGY:
- return platform->rapl_msrs & RAPL_DRAM;
+ return valid_rapl_msrs & RAPL_DRAM;
case IDX_PP0_ENERGY:
- return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS;
+ return valid_rapl_msrs & RAPL_CORE_ENERGY_STATUS;
case IDX_PP1_ENERGY:
- return platform->rapl_msrs & RAPL_GFX;
+ return valid_rapl_msrs & RAPL_GFX;
case IDX_PKG_PERF:
- return platform->rapl_msrs & RAPL_PKG_PERF_STATUS;
+ return valid_rapl_msrs & RAPL_PKG_PERF_STATUS;
case IDX_DRAM_PERF:
- return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS;
+ return valid_rapl_msrs & RAPL_DRAM_PERF_STATUS;
case IDX_PSYS_ENERGY:
- return platform->rapl_msrs & RAPL_PSYS;
+ return valid_rapl_msrs & RAPL_PSYS;
default:
return 0;
}
@@ -2362,23 +2379,19 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
return retval;
}
-int is_cpu_first_thread_in_core(PER_THREAD_PARAMS)
+int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c)
{
- UNUSED(p);
-
return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0);
}
-int is_cpu_first_core_in_package(PER_THREAD_PARAMS)
+int is_cpu_first_core_in_package(struct thread_data *t, struct pkg_data *p)
{
- UNUSED(c);
-
return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0);
}
-int is_cpu_first_thread_in_package(PER_THREAD_PARAMS)
+int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
- return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p);
+ return is_cpu_first_thread_in_core(t, c) && is_cpu_first_core_in_package(t, p);
}
int cpu_migrate(int cpu)
@@ -2400,20 +2413,11 @@ int get_msr_fd(int cpu)
if (fd)
return fd;
-#if defined(ANDROID)
- sprintf(pathname, "/dev/msr%d", cpu);
-#else
- sprintf(pathname, "/dev/cpu/%d/msr", cpu);
-#endif
+ sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", cpu);
fd = open(pathname, O_RDONLY);
if (fd < 0)
-#if defined(ANDROID)
- err(-1, "%s open failed, try chown or chmod +r /dev/msr*, "
- "or run with --no-msr, or run as root", pathname);
-#else
- err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, "
- "or run with --no-msr, or run as root", pathname);
-#endif
+ err(-1, "%s open failed, try chown or chmod +r %s, "
+ "or run with --no-msr, or run as root", pathname, use_android_msr_path ? "/dev/msr*" : "/dev/cpu/*/msr");
fd_percpu[cpu] = fd;
return fd;
@@ -2432,6 +2436,13 @@ static void bic_disable_msr_access(void)
free_sys_msr_counters();
}
+static void bic_disable_perf_access(void)
+{
+ CLR_BIC(BIC_IPC, &bic_enabled);
+ CLR_BIC(BIC_LLC_RPS, &bic_enabled);
+ CLR_BIC(BIC_LLC_HIT, &bic_enabled);
+}
+
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
{
assert(!no_perf);
@@ -2512,7 +2523,7 @@ int add_rapl_msr_counter(int cpu, const struct rapl_counter_arch_info *cai)
{
int ret;
- if (!(platform->rapl_msrs & cai->feature_mask))
+ if (!(valid_rapl_msrs & cai->feature_mask))
return -1;
ret = add_msr_counter(cpu, cai->msr);
@@ -2656,6 +2667,12 @@ void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode)
} else if (!strcmp(name_list, "idle")) {
CPU_OR(ret_set, ret_set, &bic_group_idle);
break;
+ } else if (!strcmp(name_list, "cache")) {
+ CPU_OR(ret_set, ret_set, &bic_group_cache);
+ break;
+ } else if (!strcmp(name_list, "llc")) {
+ CPU_OR(ret_set, ret_set, &bic_group_cache);
+ break;
} else if (!strcmp(name_list, "swidle")) {
CPU_OR(ret_set, ret_set, &bic_group_sw_idle);
break;
@@ -2677,8 +2694,7 @@ void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode)
if (mode == SHOW_LIST) {
deferred_add_names[deferred_add_index++] = name_list;
if (deferred_add_index >= MAX_DEFERRED) {
- fprintf(stderr, "More than max %d un-recognized --add options '%s'\n",
- MAX_DEFERRED, name_list);
+ fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", MAX_DEFERRED, name_list);
help();
exit(1);
}
@@ -2687,8 +2703,7 @@ void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode)
if (debug)
fprintf(stderr, "deferred \"%s\"\n", name_list);
if (deferred_skip_index >= MAX_DEFERRED) {
- fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
- MAX_DEFERRED, name_list);
+ fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", MAX_DEFERRED, name_list);
help();
exit(1);
}
@@ -2702,6 +2717,47 @@ void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode)
}
}
+/*
+ * print_name()
+ * Print column header name for raw 64-bit counter in 16 columns (at least 8-char plus a tab)
+ * Otherwise, allow the name + tab to fit within 8-coumn tab-stop.
+ * In both cases, left justififed, just like other turbostat columns,
+ * to allow the column values to consume the tab.
+ *
+ * Yes, 32-bit counters can overflow 8-columns, and
+ * 64-bit counters can overflow 16-columns, but that is uncommon.
+ */
+static inline int print_name(int width, int *printed, char *delim, char *name, enum counter_type type, enum counter_format format)
+{
+ UNUSED(type);
+
+ if (format == FORMAT_RAW && width >= 64)
+ return (sprintf(outp, "%s%-8s", (*printed++ ? delim : ""), name));
+ else
+ return (sprintf(outp, "%s%s", (*printed++ ? delim : ""), name));
+}
+
+static inline int print_hex_value(int width, int *printed, char *delim, unsigned long long value)
+{
+ if (width <= 32)
+ return (sprintf(outp, "%s%08x", (*printed++ ? delim : ""), (unsigned int)value));
+ else
+ return (sprintf(outp, "%s%016llx", (*printed++ ? delim : ""), value));
+}
+
+static inline int print_decimal_value(int width, int *printed, char *delim, unsigned long long value)
+{
+ if (width <= 32)
+ return (sprintf(outp, "%s%d", (*printed++ ? delim : ""), (unsigned int)value));
+ else
+ return (sprintf(outp, "%s%-8lld", (*printed++ ? delim : ""), value));
+}
+
+static inline int print_float_value(int *printed, char *delim, double value)
+{
+ return (sprintf(outp, "%s%0.2f", (*printed++ ? delim : ""), value));
+}
+
void print_header(char *delim)
{
struct msr_counter *mp;
@@ -2757,50 +2813,28 @@ void print_header(char *delim)
if (DO_BIC(BIC_SMI))
outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
- for (mp = sys.tp; mp; mp = mp->next) {
+ if (DO_BIC(BIC_LLC_RPS))
+ outp += sprintf(outp, "%sLLCkRPS", (printed++ ? delim : ""));
- if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) {
- if (mp->width == 64)
- outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
- else
- outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
- } else {
- if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
- else
- outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
- }
- }
+ if (DO_BIC(BIC_LLC_HIT))
+ outp += sprintf(outp, "%sLLC%%hit", (printed++ ? delim : ""));
- for (pp = sys.perf_tp; pp; pp = pp->next) {
+ for (mp = sys.tp; mp; mp = mp->next)
+ outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format);
- if (pp->format == FORMAT_RAW) {
- if (pp->width == 64)
- outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name);
- else
- outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name);
- } else {
- if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name);
- else
- outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name);
- }
- }
+ for (pp = sys.perf_tp; pp; pp = pp->next)
+ outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format);
ppmt = sys.pmt_tp;
while (ppmt) {
switch (ppmt->type) {
case PMT_TYPE_RAW:
- if (pmt_counter_get_width(ppmt) <= 32)
- outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name);
- else
- outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name);
-
+ outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
break;
case PMT_TYPE_XTAL_TIME:
case PMT_TYPE_TCORE_CLOCK:
- outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
+ outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
break;
}
@@ -2825,63 +2859,36 @@ void print_header(char *delim)
if (DO_BIC(BIC_CORE_THROT_CNT))
outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
- if (platform->rapl_msrs && !rapl_joules) {
+ if (valid_rapl_msrs && !rapl_joules) {
if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
- } else if (platform->rapl_msrs && rapl_joules) {
+ } else if (valid_rapl_msrs && rapl_joules) {
if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
}
- for (mp = sys.cp; mp; mp = mp->next) {
- if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) {
- if (mp->width == 64)
- outp += sprintf(outp, "%s%18.18s", delim, mp->name);
- else
- outp += sprintf(outp, "%s%10.10s", delim, mp->name);
- } else {
- if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8s", delim, mp->name);
- else
- outp += sprintf(outp, "%s%s", delim, mp->name);
- }
- }
+ for (mp = sys.cp; mp; mp = mp->next)
+ outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format);
- for (pp = sys.perf_cp; pp; pp = pp->next) {
-
- if (pp->format == FORMAT_RAW) {
- if (pp->width == 64)
- outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name);
- else
- outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name);
- } else {
- if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name);
- else
- outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name);
- }
- }
+ for (pp = sys.perf_cp; pp; pp = pp->next)
+ outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format);
ppmt = sys.pmt_cp;
while (ppmt) {
switch (ppmt->type) {
case PMT_TYPE_RAW:
- if (pmt_counter_get_width(ppmt) <= 32)
- outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name);
- else
- outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name);
+ outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
break;
case PMT_TYPE_XTAL_TIME:
case PMT_TYPE_TCORE_CLOCK:
- outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
+ outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
break;
}
ppmt = ppmt->next;
}
-
if (DO_BIC(BIC_PkgTmp))
outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
@@ -2963,51 +2970,22 @@ void print_header(char *delim)
if (DO_BIC(BIC_UNCORE_MHZ))
outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : ""));
- for (mp = sys.pp; mp; mp = mp->next) {
- if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) {
- if (mp->width == 64)
- outp += sprintf(outp, "%s%18.18s", delim, mp->name);
- else if (mp->width == 32)
- outp += sprintf(outp, "%s%10.10s", delim, mp->name);
- else
- outp += sprintf(outp, "%s%7.7s", delim, mp->name);
- } else {
- if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8s", delim, mp->name);
- else
- outp += sprintf(outp, "%s%7.7s", delim, mp->name);
- }
- }
-
- for (pp = sys.perf_pp; pp; pp = pp->next) {
+ for (mp = sys.pp; mp; mp = mp->next)
+ outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format);
- if (pp->format == FORMAT_RAW) {
- if (pp->width == 64)
- outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name);
- else
- outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name);
- } else {
- if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name);
- else
- outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name);
- }
- }
+ for (pp = sys.perf_pp; pp; pp = pp->next)
+ outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format);
ppmt = sys.pmt_pp;
while (ppmt) {
switch (ppmt->type) {
case PMT_TYPE_RAW:
- if (pmt_counter_get_width(ppmt) <= 32)
- outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name);
- else
- outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name);
-
+ outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
break;
case PMT_TYPE_XTAL_TIME:
case PMT_TYPE_TCORE_CLOCK:
- outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
+ outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
break;
}
@@ -3022,6 +3000,25 @@ void print_header(char *delim)
outp += sprintf(outp, "\n");
}
+/*
+ * pct()
+ *
+ * If absolute value is < 1.1, return percentage
+ * otherwise, return nan
+ *
+ * return value is appropriate for printing percentages with %f
+ * while flagging some obvious erroneous values.
+ */
+double pct(double d)
+{
+
+ double abs = fabs(d);
+
+ if (abs < 1.10)
+ return (100.0 * d);
+ return nan("");
+}
+
int dump_counters(PER_THREAD_PARAMS)
{
int i;
@@ -3047,14 +3044,16 @@ int dump_counters(PER_THREAD_PARAMS)
if (DO_BIC(BIC_SMI))
outp += sprintf(outp, "SMI: %d\n", t->smi_count);
+ outp += sprintf(outp, "LLC refs: %lld", t->llc.references);
+ outp += sprintf(outp, "LLC miss: %lld", t->llc.misses);
+ outp += sprintf(outp, "LLC Hit%%: %.2f", pct((t->llc.references - t->llc.misses) / t->llc.references));
+
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
- outp +=
- sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
- t->counter[i], mp->sp->path);
+ outp += sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, t->counter[i], mp->sp->path);
}
}
- if (c && is_cpu_first_thread_in_core(t, c, p)) {
+ if (c && is_cpu_first_thread_in_core(t, c)) {
outp += sprintf(outp, "core: %d\n", c->core_id);
outp += sprintf(outp, "c3: %016llX\n", c->c3);
outp += sprintf(outp, "c6: %016llX\n", c->c6);
@@ -3069,14 +3068,12 @@ int dump_counters(PER_THREAD_PARAMS)
outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
- outp +=
- sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
- c->counter[i], mp->sp->path);
+ outp += sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, c->counter[i], mp->sp->path);
}
outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
}
- if (p && is_cpu_first_core_in_package(t, c, p)) {
+ if (p && is_cpu_first_core_in_package(t, p)