aboutsummaryrefslogtreecommitdiff
path: root/tools/perf/scripts/python
diff options
context:
space:
mode:
authorTvrtko Ursulin <tursulin@ursulin.net>2025-02-05 09:29:14 +0000
committerTvrtko Ursulin <tursulin@ursulin.net>2025-02-05 09:29:14 +0000
commitc771600c6af14749609b49565ffb4cac2959710d (patch)
treee9be426f1b32598527127fd0fa1b265a7cc83044 /tools/perf/scripts/python
parent4a82ceb04ad4bbb9cc20925abccb70938313e555 (diff)
parent2014c95afecee3e76ca4a56956a936e23283f05b (diff)
Merge drm/drm-next into drm-intel-gt-next
We need 4ba4f1afb6a9 ("perf: Generic hotplug support for a PMU with a scope") in order to land a i915 PMU simplification and a fix. That landed in 6.12 and we are stuck at 6.9 so lets bump things forward. Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
Diffstat (limited to 'tools/perf/scripts/python')
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/Build2
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/Context.c31
-rwxr-xr-xtools/perf/scripts/python/arm-cs-trace-disasm.py152
-rw-r--r--tools/perf/scripts/python/mem-phys-addr.py177
-rw-r--r--tools/perf/scripts/python/netdev-times.py3
-rwxr-xr-xtools/perf/scripts/python/parallel-perf.py989
6 files changed, 1222 insertions, 132 deletions
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build
index 5b0b5ff7e14a..be3710c61320 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/Build
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Build
@@ -1,4 +1,4 @@
-perf-y += Context.o
+perf-util-y += Context.o
# -Wno-declaration-after-statement: The python headers have mixed code with declarations (decls after asserts, for instance)
CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-declaration-after-statement
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
index 3954bd1587ce..60dcfe56d4d9 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
@@ -12,6 +12,7 @@
#define PY_SSIZE_T_CLEAN
#include <Python.h>
+#include "../../../util/config.h"
#include "../../../util/trace-event.h"
#include "../../../util/event.h"
#include "../../../util/symbol.h"
@@ -23,16 +24,6 @@
#include "../../../util/srcline.h"
#include "../../../util/srccode.h"
-#if PY_MAJOR_VERSION < 3
-#define _PyCapsule_GetPointer(arg1, arg2) \
- PyCObject_AsVoidPtr(arg1)
-#define _PyBytes_FromStringAndSize(arg1, arg2) \
- PyString_FromStringAndSize((arg1), (arg2))
-#define _PyUnicode_AsUTF8(arg) \
- PyString_AsString(arg)
-
-PyMODINIT_FUNC initperf_trace_context(void);
-#else
#define _PyCapsule_GetPointer(arg1, arg2) \
PyCapsule_GetPointer((arg1), (arg2))
#define _PyBytes_FromStringAndSize(arg1, arg2) \
@@ -41,7 +32,6 @@ PyMODINIT_FUNC initperf_trace_context(void);
PyUnicode_AsUTF8(arg)
PyMODINIT_FUNC PyInit_perf_trace_context(void);
-#endif
static struct scripting_context *get_args(PyObject *args, const char *name, PyObject **arg2)
{
@@ -103,7 +93,7 @@ static PyObject *perf_sample_insn(PyObject *obj, PyObject *args)
if (c->sample->ip && !c->sample->insn_len && thread__maps(c->al->thread)) {
struct machine *machine = maps__machine(thread__maps(c->al->thread));
- script_fetch_insn(c->sample, c->al->thread, machine);
+ script_fetch_insn(c->sample, c->al->thread, machine, /*native_arch=*/true);
}
if (!c->sample->insn_len)
Py_RETURN_NONE; /* N.B. This is a return statement */
@@ -182,6 +172,15 @@ static PyObject *perf_sample_srccode(PyObject *obj, PyObject *args)
return perf_sample_src(obj, args, true);
}
+static PyObject *__perf_config_get(PyObject *obj, PyObject *args)
+{
+ const char *config_name;
+
+ if (!PyArg_ParseTuple(args, "s", &config_name))
+ return NULL;
+ return Py_BuildValue("s", perf_config_get(config_name));
+}
+
static PyMethodDef ContextMethods[] = {
#ifdef HAVE_LIBTRACEEVENT
{ "common_pc", perf_trace_context_common_pc, METH_VARARGS,
@@ -199,15 +198,10 @@ static PyMethodDef ContextMethods[] = {
METH_VARARGS, "Get source file name and line number."},
{ "perf_sample_srccode", perf_sample_srccode,
METH_VARARGS, "Get source file name, line number and line."},
+ { "perf_config_get", __perf_config_get, METH_VARARGS, "Get perf config entry"},
{ NULL, NULL, 0, NULL}
};
-#if PY_MAJOR_VERSION < 3
-PyMODINIT_FUNC initperf_trace_context(void)
-{
- (void) Py_InitModule("perf_trace_context", ContextMethods);
-}
-#else
PyMODINIT_FUNC PyInit_perf_trace_context(void)
{
static struct PyModuleDef moduledef = {
@@ -229,4 +223,3 @@ PyMODINIT_FUNC PyInit_perf_trace_context(void)
return mod;
}
-#endif
diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py
index d973c2baed1c..ba208c90d631 100755
--- a/tools/perf/scripts/python/arm-cs-trace-disasm.py
+++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
@@ -11,36 +11,74 @@ import os
from os import path
import re
from subprocess import *
-from optparse import OptionParser, make_option
+import argparse
+import platform
-from perf_trace_context import perf_set_itrace_options, \
- perf_sample_insn, perf_sample_srccode
+from perf_trace_context import perf_sample_srccode, perf_config_get
# Below are some example commands for using this script.
+# Note a --kcore recording is required for accurate decode
+# due to the alternatives patching mechanism. However this
+# script only supports reading vmlinux for disassembly dump,
+# meaning that any patched instructions will appear
+# as unpatched, but the instruction ranges themselves will
+# be correct. In addition to this, source line info comes
+# from Perf, and when using kcore there is no debug info. The
+# following lists the supported features in each mode:
+#
+# +-----------+-----------------+------------------+------------------+
+# | Recording | Accurate decode | Source line dump | Disassembly dump |
+# +-----------+-----------------+------------------+------------------+
+# | --kcore | yes | no | yes |
+# | normal | no | yes | yes |
+# +-----------+-----------------+------------------+------------------+
+#
+# Output disassembly with objdump and auto detect vmlinux
+# (when running on same machine.)
+# perf script -s scripts/python/arm-cs-trace-disasm.py -d
#
-# Output disassembly with objdump:
-# perf script -s scripts/python/arm-cs-trace-disasm.py \
-# -- -d objdump -k path/to/vmlinux
# Output disassembly with llvm-objdump:
# perf script -s scripts/python/arm-cs-trace-disasm.py \
# -- -d llvm-objdump-11 -k path/to/vmlinux
+#
# Output only source line and symbols:
# perf script -s scripts/python/arm-cs-trace-disasm.py
-# Command line parsing.
-option_list = [
- # formatting options for the bottom entry of the stack
- make_option("-k", "--vmlinux", dest="vmlinux_name",
- help="Set path to vmlinux file"),
- make_option("-d", "--objdump", dest="objdump_name",
- help="Set path to objdump executable file"),
- make_option("-v", "--verbose", dest="verbose",
- action="store_true", default=False,
- help="Enable debugging log")
-]
+def default_objdump():
+ config = perf_config_get("annotate.objdump")
+ return config if config else "objdump"
-parser = OptionParser(option_list=option_list)
-(options, args) = parser.parse_args()
+# Command line parsing.
+def int_arg(v):
+ v = int(v)
+ if v < 0:
+ raise argparse.ArgumentTypeError("Argument must be a positive integer")
+ return v
+
+args = argparse.ArgumentParser()
+args.add_argument("-k", "--vmlinux",
+ help="Set path to vmlinux file. Omit to autodetect if running on same machine")
+args.add_argument("-d", "--objdump", nargs="?", const=default_objdump(),
+ help="Show disassembly. Can also be used to change the objdump path"),
+args.add_argument("-v", "--verbose", action="store_true", help="Enable debugging log")
+args.add_argument("--start-time", type=int_arg, help="Monotonic clock time of sample to start from. "
+ "See 'time' field on samples in -v mode.")
+args.add_argument("--stop-time", type=int_arg, help="Monotonic clock time of sample to stop at. "
+ "See 'time' field on samples in -v mode.")
+args.add_argument("--start-sample", type=int_arg, help="Index of sample to start from. "
+ "See 'index' field on samples in -v mode.")
+args.add_argument("--stop-sample", type=int_arg, help="Index of sample to stop at. "
+ "See 'index' field on samples in -v mode.")
+
+options = args.parse_args()
+if (options.start_time and options.stop_time and
+ options.start_time >= options.stop_time):
+ print("--start-time must less than --stop-time")
+ exit(2)
+if (options.start_sample and options.stop_sample and
+ options.start_sample >= options.stop_sample):
+ print("--start-sample must less than --stop-sample")
+ exit(2)
# Initialize global dicts and regular expression
disasm_cache = dict()
@@ -48,11 +86,23 @@ cpu_data = dict()
disasm_re = re.compile(r"^\s*([0-9a-fA-F]+):")
disasm_func_re = re.compile(r"^\s*([0-9a-fA-F]+)\s.*:")
cache_size = 64*1024
+sample_idx = -1
glb_source_file_name = None
glb_line_number = None
glb_dso = None
+kver = platform.release()
+vmlinux_paths = [
+ f"/usr/lib/debug/boot/vmlinux-{kver}.debug",
+ f"/usr/lib/debug/lib/modules/{kver}/vmlinux",
+ f"/lib/modules/{kver}/build/vmlinux",
+ f"/usr/lib/debug/boot/vmlinux-{kver}",
+ f"/boot/vmlinux-{kver}",
+ f"/boot/vmlinux",
+ f"vmlinux"
+]
+
def get_optional(perf_dict, field):
if field in perf_dict:
return perf_dict[field]
@@ -63,12 +113,25 @@ def get_offset(perf_dict, field):
return "+%#x" % perf_dict[field]
return ""
+def find_vmlinux():
+ if hasattr(find_vmlinux, "path"):
+ return find_vmlinux.path
+
+ for v in vmlinux_paths:
+ if os.access(v, os.R_OK):
+ find_vmlinux.path = v
+ break
+ else:
+ find_vmlinux.path = None
+
+ return find_vmlinux.path
+
def get_dso_file_path(dso_name, dso_build_id):
if (dso_name == "[kernel.kallsyms]" or dso_name == "vmlinux"):
- if (options.vmlinux_name):
- return options.vmlinux_name;
+ if (options.vmlinux):
+ return options.vmlinux;
else:
- return dso_name
+ return find_vmlinux() if find_vmlinux() else dso_name
if (dso_name == "[vdso]") :
append = "/vdso"
@@ -92,7 +155,7 @@ def read_disam(dso_fname, dso_start, start_addr, stop_addr):
else:
start_addr = start_addr - dso_start;
stop_addr = stop_addr - dso_start;
- disasm = [ options.objdump_name, "-d", "-z",
+ disasm = [ options.objdump, "-d", "-z",
"--start-address="+format(start_addr,"#x"),
"--stop-address="+format(stop_addr,"#x") ]
disasm += [ dso_fname ]
@@ -112,10 +175,10 @@ def print_disam(dso_fname, dso_start, start_addr, stop_addr):
def print_sample(sample):
print("Sample = { cpu: %04d addr: 0x%016x phys_addr: 0x%016x ip: 0x%016x " \
- "pid: %d tid: %d period: %d time: %d }" % \
+ "pid: %d tid: %d period: %d time: %d index: %d}" % \
(sample['cpu'], sample['addr'], sample['phys_addr'], \
sample['ip'], sample['pid'], sample['tid'], \
- sample['period'], sample['time']))
+ sample['period'], sample['time'], sample_idx))
def trace_begin():
print('ARM CoreSight Trace Data Assembler Dump')
@@ -177,6 +240,7 @@ def print_srccode(comm, param_dict, sample, symbol, dso):
def process_event(param_dict):
global cache_size
global options
+ global sample_idx
sample = param_dict["sample"]
comm = param_dict["comm"]
@@ -187,22 +251,36 @@ def process_event(param_dict):
dso_start = get_optional(param_dict, "dso_map_start")
dso_end = get_optional(param_dict, "dso_map_end")
symbol = get_optional(param_dict, "symbol")
+ map_pgoff = get_optional(param_dict, "map_pgoff")
+ # check for valid map offset
+ if (str(map_pgoff) == '[unknown]'):
+ map_pgoff = 0
cpu = sample["cpu"]
ip = sample["ip"]
addr = sample["addr"]
- # Initialize CPU data if it's empty, and directly return back
- # if this is the first tracing event for this CPU.
- if (cpu_data.get(str(cpu) + 'addr') == None):
- cpu_data[str(cpu) + 'addr'] = addr
- return
+ sample_idx += 1
+ if (options.start_time and sample["time"] < options.start_time):
+ return
+ if (options.stop_time and sample["time"] > options.stop_time):
+ exit(0)
+ if (options.start_sample and sample_idx < options.start_sample):
+ return
+ if (options.stop_sample and sample_idx > options.stop_sample):
+ exit(0)
if (options.verbose == True):
print("Event type: %s" % name)
print_sample(sample)
+ # Initialize CPU data if it's empty, and directly return back
+ # if this is the first tracing event for this CPU.
+ if (cpu_data.get(str(cpu) + 'addr') == None):
+ cpu_data[str(cpu) + 'addr'] = addr
+ return
+
# If cannot find dso so cannot dump assembler, bail out
if (dso == '[unknown]'):
return
@@ -244,9 +322,10 @@ def process_event(param_dict):
# Record for previous sample packet
cpu_data[str(cpu) + 'addr'] = addr
- # Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4
- if (start_addr == 0 and stop_addr == 4):
- print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu)
+ # Filter out zero start_address. Optionally identify CS_ETM_TRACE_ON packet
+ if (start_addr == 0):
+ if ((stop_addr == 4) and (options.verbose == True)):
+ print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu)
return
if (start_addr < int(dso_start) or start_addr > int(dso_end)):
@@ -257,19 +336,20 @@ def process_event(param_dict):
print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso))
return
- if (options.objdump_name != None):
+ if (options.objdump != None):
# It doesn't need to decrease virtual memory offset for disassembly
# for kernel dso and executable file dso, so in this case we set
# vm_start to zero.
if (dso == "[kernel.kallsyms]" or dso_start == 0x400000):
dso_vm_start = 0
+ map_pgoff = 0
else:
dso_vm_start = int(dso_start)
dso_fname = get_dso_file_path(dso, dso_bid)
if path.exists(dso_fname):
- print_disam(dso_fname, dso_vm_start, start_addr, stop_addr)
+ print_disam(dso_fname, dso_vm_start, start_addr + map_pgoff, stop_addr + map_pgoff)
else:
- print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr))
+ print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr + map_pgoff, stop_addr + map_pgoff))
print_srccode(comm, param_dict, sample, symbol, dso)
diff --git a/tools/perf/scripts/python/mem-phys-addr.py b/tools/perf/scripts/python/mem-phys-addr.py
index 1f332e72b9b0..5e237a5a5f1b 100644
--- a/tools/perf/scripts/python/mem-phys-addr.py
+++ b/tools/perf/scripts/python/mem-phys-addr.py
@@ -3,98 +3,125 @@
#
# Copyright (c) 2018, Intel Corporation.
-from __future__ import division
-from __future__ import print_function
-
import os
import sys
-import struct
import re
import bisect
import collections
+from dataclasses import dataclass
+from typing import (Dict, Optional)
sys.path.append(os.environ['PERF_EXEC_PATH'] + \
- '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+@dataclass(frozen=True)
+class IomemEntry:
+ """Read from a line in /proc/iomem"""
+ begin: int
+ end: int
+ indent: int
+ label: str
-#physical address ranges for System RAM
-system_ram = []
-#physical address ranges for Persistent Memory
-pmem = []
-#file object for proc iomem
-f = None
-#Count for each type of memory
-load_mem_type_cnt = collections.Counter()
-#perf event name
-event_name = None
+# Physical memory layout from /proc/iomem. Key is the indent and then
+# a list of ranges.
+iomem: Dict[int, list[IomemEntry]] = collections.defaultdict(list)
+# Child nodes from the iomem parent.
+children: Dict[IomemEntry, set[IomemEntry]] = collections.defaultdict(set)
+# Maximum indent seen before an entry in the iomem file.
+max_indent: int = 0
+# Count for each range of memory.
+load_mem_type_cnt: Dict[IomemEntry, int] = collections.Counter()
+# Perf event name set from the first sample in the data.
+event_name: Optional[str] = None
def parse_iomem():
- global f
- f = open('/proc/iomem', 'r')
- for i, j in enumerate(f):
- m = re.split('-|:',j,2)
- if m[2].strip() == 'System RAM':
- system_ram.append(int(m[0], 16))
- system_ram.append(int(m[1], 16))
- if m[2].strip() == 'Persistent Memory':
- pmem.append(int(m[0], 16))
- pmem.append(int(m[1], 16))
+ """Populate iomem from /proc/iomem file"""
+ global iomem
+ global max_indent
+ global children
+ with open('/proc/iomem', 'r', encoding='ascii') as f:
+ for line in f:
+ indent = 0
+ while line[indent] == ' ':
+ indent += 1
+ if indent > max_indent:
+ max_indent = indent
+ m = re.split('-|:', line, 2)
+ begin = int(m[0], 16)
+ end = int(m[1], 16)
+ label = m[2].strip()
+ entry = IomemEntry(begin, end, indent, label)
+ # Before adding entry, search for a parent node using its begin.
+ if indent > 0:
+ parent = find_memory_type(begin)
+ assert parent, f"Given indent expected a parent for {label}"
+ children[parent].add(entry)
+ iomem[indent].append(entry)
-def print_memory_type():
- print("Event: %s" % (event_name))
- print("%-40s %10s %10s\n" % ("Memory type", "count", "percentage"), end='')
- print("%-40s %10s %10s\n" % ("----------------------------------------",
- "-----------", "-----------"),
- end='');
- total = sum(load_mem_type_cnt.values())
- for mem_type, count in sorted(load_mem_type_cnt.most_common(), \
- key = lambda kv: (kv[1], kv[0]), reverse = True):
- print("%-40s %10d %10.1f%%\n" %
- (mem_type, count, 100 * count / total),
- end='')
+def find_memory_type(phys_addr) -> Optional[IomemEntry]:
+ """Search iomem for the range containing phys_addr with the maximum indent"""
+ for i in range(max_indent, -1, -1):
+ if i not in iomem:
+ continue
+ position = bisect.bisect_right(iomem[i], phys_addr,
+ key=lambda entry: entry.begin)
+ if position is None:
+ continue
+ iomem_entry = iomem[i][position-1]
+ if iomem_entry.begin <= phys_addr <= iomem_entry.end:
+ return iomem_entry
+ print(f"Didn't find {phys_addr}")
+ return None
-def trace_begin():
- parse_iomem()
+def print_memory_type():
+ print(f"Event: {event_name}")
+ print(f"{'Memory type':<40} {'count':>10} {'percentage':>10}")
+ print(f"{'-' * 40:<40} {'-' * 10:>10} {'-' * 10:>10}")
+ total = sum(load_mem_type_cnt.values())
+ # Add count from children into the parent.
+ for i in range(max_indent, -1, -1):
+ if i not in iomem:
+ continue
+ for entry in iomem[i]:
+ global children
+ for child in children[entry]:
+ if load_mem_type_cnt[child] > 0:
+ load_mem_type_cnt[entry] += load_mem_type_cnt[child]
-def trace_end():
- print_memory_type()
- f.close()
+ def print_entries(entries):
+ """Print counts from parents down to their children"""
+ global children
+ for entry in sorted(entries,
+ key = lambda entry: load_mem_type_cnt[entry],
+ reverse = True):
+ count = load_mem_type_cnt[entry]
+ if count > 0:
+ mem_type = ' ' * entry.indent + f"{entry.begin:x}-{entry.end:x} : {entry.label}"
+ percent = 100 * count / total
+ print(f"{mem_type:<40} {count:>10} {percent:>10.1f}")
+ print_entries(children[entry])
-def is_system_ram(phys_addr):
- #/proc/iomem is sorted
- position = bisect.bisect(system_ram, phys_addr)
- if position % 2 == 0:
- return False
- return True
+ print_entries(iomem[0])
-def is_persistent_mem(phys_addr):
- position = bisect.bisect(pmem, phys_addr)
- if position % 2 == 0:
- return False
- return True
+def trace_begin():
+ parse_iomem()
-def find_memory_type(phys_addr):
- if phys_addr == 0:
- return "N/A"
- if is_system_ram(phys_addr):
- return "System RAM"
+def trace_end():
+ print_memory_type()
- if is_persistent_mem(phys_addr):
- return "Persistent Memory"
+def process_event(param_dict):
+ if "sample" not in param_dict:
+ return
- #slow path, search all
- f.seek(0, 0)
- for j in f:
- m = re.split('-|:',j,2)
- if int(m[0], 16) <= phys_addr <= int(m[1], 16):
- return m[2]
- return "N/A"
+ sample = param_dict["sample"]
+ if "phys_addr" not in sample:
+ return
-def process_event(param_dict):
- name = param_dict["ev_name"]
- sample = param_dict["sample"]
- phys_addr = sample["phys_addr"]
+ phys_addr = sample["phys_addr"]
+ entry = find_memory_type(phys_addr)
+ if entry:
+ load_mem_type_cnt[entry] += 1
- global event_name
- if event_name == None:
- event_name = name
- load_mem_type_cnt[find_memory_type(phys_addr)] += 1
+ global event_name
+ if event_name is None:
+ event_name = param_dict["ev_name"]
diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py
index 00552eeb7178..30c4bccee5b2 100644
--- a/tools/perf/scripts/python/netdev-times.py
+++ b/tools/perf/scripts/python/netdev-times.py
@@ -293,7 +293,8 @@ def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm, callchain,
skbaddr, location, protocol, reason)
all_event_list.append(event_info)
-def skb__consume_skb(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr):
+def skb__consume_skb(name, context, cpu, sec, nsec, pid, comm, callchain,
+ skbaddr, location):
event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
skbaddr)
all_event_list.append(event_info)
diff --git a/tools/perf/scripts/python/parallel-perf.py b/tools/perf/scripts/python/parallel-perf.py
new file mode 100755
index 000000000000..be85fd7f6632
--- /dev/null
+++ b/tools/perf/scripts/python/parallel-perf.py
@@ -0,0 +1,989 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a perf script command multiple times in parallel, using perf script
+# options --cpu and --time so that each job processes a different chunk
+# of the data.
+#
+# Copyright (c) 2024, Intel Corporation.
+
+import subprocess
+import argparse
+import pathlib
+import shlex
+import time
+import copy
+import sys
+import os
+import re
+
+glb_prog_name = "parallel-perf.py"
+glb_min_interval = 10.0
+glb_min_samples = 64
+
+class Verbosity():
+
+ def __init__(self, quiet=False, verbose=False, debug=False):
+ self.normal = True
+ self.verbose = verbose
+ self.debug = debug
+ self.self_test = True
+ if self.debug:
+ self.verbose = True
+ if self.verbose:
+ quiet = False
+ if quiet:
+ self.normal = False
+
+# Manage work (Start/Wait/Kill), as represented by a subprocess.Popen command
+class Work():
+
+ def __init__(self, cmd, pipe_to, output_dir="."):
+ self.popen = None
+ self.consumer = None
+ self.cmd = cmd
+ self.pipe_to = pipe_to
+ self.output_dir = output_dir
+ self.cmdout_name = f"{output_dir}/cmd.txt"
+ self.stdout_name = f"{output_dir}/out.txt"
+ self.stderr_name = f"{output_dir}/err.txt"
+
+ def Command(self):
+ sh_cmd = [ shlex.quote(x) for x in self.cmd ]
+ return " ".join(self.cmd)
+
+ def Stdout(self):
+ return open(self.stdout_name, "w")
+
+ def Stderr(self):
+ return open(self.stderr_name, "w")
+
+ def CreateOutputDir(self):
+ pathlib.Path(self.output_dir).mkdir(parents=True, exist_ok=True)
+
+ def Start(self):
+ if self.popen:
+ return
+ self.CreateOutputDir()
+ with open(self.cmdout_name, "w") as f:
+ f.write(self.Command())
+ f.write("\n")
+ stdout = self.Stdout()
+ stderr = self.Stderr()
+ if self.pipe_to:
+ self.popen = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=stderr)
+ args = shlex.split(self.pipe_to)
+ self.consumer = subprocess.Popen(args, stdin=self.popen.stdout, stdout=stdout, stderr=stderr)
+ else:
+ self.popen = subprocess.Popen(self.cmd, stdout=stdout, stderr=stderr)
+
+ def RemoveEmptyErrFile(self):
+ if os.path.exists(self.stderr_name):
+ if os.path.getsize(self.stderr_name) == 0:
+ os.unlink(self.stderr_name)
+
+ def Errors(self):
+ if os.path.exists(self.stderr_name):
+ if os.path.getsize(self.stderr_name) != 0:
+ return [ f"Non-empty error file {self.stderr_name}" ]
+ return []
+
+ def TidyUp(self):
+ self.RemoveEmptyErrFile()
+
+ def RawPollWait(self, p, wait):
+ if wait:
+ return p.wait()
+ return p.poll()
+
+ def Poll(self, wait=False):
+ if not self.popen:
+ return None
+ result = self.RawPollWait(self.popen, wait)
+ if self.consumer:
+ res = result
+ result = self.RawPollWait(self.consumer, wait)
+ if result != None and res == None:
+ self.popen.kill()
+ result = None
+ elif result == 0 and res != None and res != 0:
+ result = res
+ if result != None:
+ self.TidyUp()
+ return result
+
+ def Wait(self):
+ return self.Poll(wait=True)
+
+ def Kill(self):
+ if not self.popen:
+ return
+ self.popen.kill()
+ if self.consumer:
+ self.consumer.kill()
+
+def KillWork(worklist, verbosity):
+ for w in worklist:
+ w.Kill()
+ for w in worklist:
+ w.Wait()
+
+def NumberOfCPUs():
+ return os.sysconf("SC_NPROCESSORS_ONLN")
+
+def NanoSecsToSecsStr(x):
+ if x == None:
+ return ""
+ x = str(x)
+ if len(x) < 10:
+ x = "0" * (10 - len(x)) + x
+ return x[:len(x) - 9] + "." + x[-9:]
+
+def InsertOptionAfter(cmd, option, after):
+ try:
+ pos = cmd.index(after)
+ cmd.insert(pos + 1, option)
+ except:
+ cmd.append(option)
+
+def CreateWorkList(cmd, pipe_to, output_dir, cpus, time_ranges_by_cpu):
+ max_len = len(str(cpus[-1]))
+ cpu_dir_fmt = f"cpu-%.{max_len}u"
+ worklist = []
+ pos = 0
+ for cpu in cpus:
+ if cpu >= 0:
+ cpu_dir = os.path.join(output_dir, cpu_dir_fmt % cpu)
+ cpu_option = f"--cpu={cpu}"
+ else:
+ cpu_dir = output_dir
+ cpu_option = None
+
+ tr_dir_fmt = "time-range"
+
+ if len(time_ranges_by_cpu) > 1:
+ time_ranges = time_ranges_by_cpu[pos]
+ tr_dir_fmt += f"-{pos}"
+ pos += 1
+ else:
+ time_ranges = time_ranges_by_cpu[0]
+
+ max_len = len(str(len(time_ranges)))
+ tr_dir_fmt += f"-%.{max_len}u"
+
+ i = 0
+ for r in time_ranges:
+ if r == [None, None]:
+ time_option = None
+ work_output_dir = cpu_dir
+ else:
+ time_option = "--time=" + NanoSecsToSecsStr(r[0]) + "," + NanoSecsToSecsStr(r[1])
+ work_output_dir = os.path.join(cpu_dir, tr_dir_fmt % i)
+ i += 1
+ work_cmd = list(cmd)
+ if time_option != None:
+ InsertOptionAfter(work_cmd, time_option, "script")
+ if cpu_option != None:
+ InsertOptionAfter(work_cmd, cpu_option, "script")
+ w = Work(work_cmd, pipe_to, work_output_dir)
+ worklist.append(w)
+ return worklist
+
+def DoRunWork(worklist, nr_jobs, verbosity):
+ nr_to_do = len(worklist)
+ not_started = list(worklist)
+ running = []
+ done = []
+ chg = False
+ while True:
+ nr_done = len(done)
+ if chg and verbosity.normal:
+ nr_run = len(running)
+ print(f"\rThere are {nr_to_do} jobs: {nr_done} completed, {nr_run} running", flush=True, end=" ")
+ if verbosity.verbose:
+ print()
+ chg = False
+ if nr_done == nr_to_do:
+ break
+ while len(running) < nr_jobs and len(not_started):
+ w = not_started.pop(0)
+ running.append(w)
+ if verbosity.verbose:
+ print("Starting:", w.Command())
+ w.Start()
+ chg = True
+ if len(running):
+ time.sleep(0.1)
+ finished = []
+ not_finished = []
+ while len(running):
+ w = running.pop(0)
+ r = w.Poll()
+ if r == None:
+ not_finished.append(w)
+ continue
+ if r == 0:
+ if verbosity.verbose:
+ print("Finished:", w.Command())
+ finished.append(w)
+ chg = True
+ continue
+ if verbosity.normal and not verbosity.verbose:
+ print()
+ print("Job failed!\n return code:", r, "\n command: ", w.Command())
+ if w.pipe_to:
+ print(" piped to: ", w.pipe_to)
+ print("Killing outstanding jobs")
+ KillWork(not_finished, verbosity)
+ KillWork(running, verbosity)
+ return False
+ running = not_finished
+ done += finished
+ errorlist = []
+ for w in worklist:
+ errorlist += w.Errors()
+ if len(errorlist):
+ print("Errors:")
+ for e in errorlist:
+ print(e)
+ elif verbosity.normal:
+ print("\r"," "*50, "\rAll jobs finished successfully", flush=True)
+ return True
+
+def RunWork(worklist, nr_jobs=NumberOfCPUs(), verbosity=Verbosity()):
+ try:
+ return DoRunWork(worklist, nr_jobs, verbosity)
+ except:
+ for w in worklist:
+ w.Kill()
+ raise
+ return True
+
+def ReadHeader(perf, file_name):
+ return subprocess.Popen([perf, "script", "--header-only", "--input", file_name], stdout=subprocess.PIPE).stdout.read().decode("utf-8")
+
+def ParseHeader(hdr):
+ result = {}
+ lines = hdr.split("\n")
+ for line in lines:
+ if ":" in line and line[0] == "#":
+ pos = line.index(":")
+ name = line[1:pos-1].strip()
+ value = line[pos+1:].strip()
+ if name in result:
+ orig_name = name
+ nr = 2
+ while True:
+ name = f"{orig_name} {nr}"
+ if name not in result:
+ break
+ nr += 1
+ result[name] = value
+ return result
+
+def HeaderField(hdr_dict, hdr_fld):
+ if hdr_fld not in hdr_dict:
+ raise Exception(f"'{hdr_fld}' missing from header information")
+ return hdr_dict[hdr_fld]
+
+# Represent the position of an option within a command string
+# and provide the option value and/or remove the option
+class OptPos():
+
+ def Init(self, opt_element=-1, value_element=-1, opt_pos=-1, value_pos=-1, error=None):
+ self.opt_element = opt_element # list element that contains option
+ self.value_element = value_element # list element that contains option value
+ self.opt_pos = opt_pos # string position of option
+ self.value_pos = value_pos # string position of value
+ self.error = error # error message string
+
+ def __init__(self, args, short_name, long_name, default=None):
+ self.args = list(args)
+ self.default = default
+ n = 2 + len(long_name)
+ m = len(short_name)
+ pos = -1
+ for opt in args:
+ pos += 1
+ if m and opt[:2] == f"-{short_name}":
+ if len(opt) == 2:
+ if pos + 1 < len(args):
+ self.Init(pos, pos + 1, 0, 0)
+ else:
+ self.Init(error = f"-{short_name} option missing value")
+ else:
+ self.Init(pos, pos, 0, 2)
+ return
+ if opt[:n] == f"--{long_name}":
+ if len(opt) == n:
+ if pos + 1 < len(args):
+ self.Init(pos, pos + 1, 0, 0)
+ else:
+ self.Init(error = f"--{long_name} option missing value")
+ elif opt[n] == "=":
+ self.Init(pos, pos, 0, n + 1)
+ else:
+ self.Init(error = f"--{long_name} option expected '='")
+ return
+ if m and opt[:1] == "-" and opt[:2] != "--" and short_name in opt:
+ ipos = opt.index(short_name)
+ if "-" in opt[1:]:
+ hpos = opt[1:].index("-")
+ if hpos < ipos:
+ continue
+ if ipos + 1 == len(opt):
+ if pos + 1 < len(args):
+ self.Init(pos, pos + 1, ipos, 0)
+ else:
+ self.Init(error = f"-{short_name} option missing value")
+ else:
+ self.Init(pos, pos, ipos, ipos + 1)
+ return
+ self.Init()
+
+ def Value(self):
+ if self.opt_element >= 0:
+ if self.opt_element != self.value_element:
+ return self.args[self.value_element]
+ else:
+ return self.args[self.value_element][self.value_pos:]
+ return self.default
+
+ def Remove(self, args):
+ if self.opt_element == -1:
+ return
+ if self.opt_element != self.value_element:
+ del args[self.value_element]
+ if self.opt_pos:
+ args[self.opt_element] = args[self.opt_element][:self.opt_pos]
+ else:
+ del args[self.opt_element]
+
+def DetermineInputFileName(cmd):
+ p = OptPos(cmd, "i", "input", "perf.data")
+ if p.error:
+ raise Exception(f"perf command {p.error}")
+ file_name = p.Value()
+ if not os.path.exists(file_name):
+ raise Exception(f"perf command input file '{file_name}' not found")
+ return file_name
+
+def ReadOption(args, short_name, long_name, err_prefix, remove=False):
+ p = OptPos(args, short_name, long_name)
+ if p.error:
+ raise Exception(f"{err_prefix}{p.error}")
+ value = p.Value()
+ if remove:
+ p.Remove(args)
+ return value
+
+def ExtractOption(args, short_name, long_name, err_prefix):
+ return ReadOption(args, short_name, long_name, err_prefix, True)
+
+def ReadPerfOption(args, short_name, long_name):
+ return ReadOption(args, short_name, long_name, "perf command ")
+
+def ExtractPerfOption(args, short_name, long_name):
+ return ExtractOption(args, short_name, long_name, "perf command ")
+
+def PerfDoubleQuickCommands(cmd, file_name):
+ cpu_str = ReadPerfOption(cmd, "C", "cpu")
+ time_str = ReadPerfOption(cmd, "", "time")
+ # Use double-quick sampling to determine trace data density
+ times_cmd = ["perf", "script", "--ns", "--input", file_name, "--itrace=qqi"]