Merge drm/drm-next into drm-intel-gt-next

We need 4ba4f1afb6a9 ("perf: Generic hotplug support for a PMU with a scope") in order to land a i915 PMU simplification and a fix. That landed in 6.12 and we are stuck at 6.9 so lets bump things forward. Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
author: Tvrtko Ursulin <tursulin@ursulin.net> 2025-02-05 09:29:14 +0000
committer: Tvrtko Ursulin <tursulin@ursulin.net> 2025-02-05 09:29:14 +0000
commit: c771600c6af14749609b49565ffb4cac2959710d (patch)
tree: e9be426f1b32598527127fd0fa1b265a7cc83044 /tools/perf/scripts/python
parent: 4a82ceb04ad4bbb9cc20925abccb70938313e555 (diff)
parent: 2014c95afecee3e76ca4a56956a936e23283f05b (diff)
6 files changed, 1222 insertions, 132 deletions
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build
index 5b0b5ff7e14a..be3710c61320 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/Build
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Build
@@ -1,4 +1,4 @@
-perf-y += Context.o
+perf-util-y += Context.o
 
 # -Wno-declaration-after-statement: The python headers have mixed code with declarations (decls after asserts, for instance)
 CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-declaration-after-statement
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
index 3954bd1587ce..60dcfe56d4d9 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
@@ -12,6 +12,7 @@
 #define PY_SSIZE_T_CLEAN
 
 #include <Python.h>
+#include "../../../util/config.h"
 #include "../../../util/trace-event.h"
 #include "../../../util/event.h"
 #include "../../../util/symbol.h"
@@ -23,16 +24,6 @@
 #include "../../../util/srcline.h"
 #include "../../../util/srccode.h"
 
-#if PY_MAJOR_VERSION < 3
-#define _PyCapsule_GetPointer(arg1, arg2) \
-  PyCObject_AsVoidPtr(arg1)
-#define _PyBytes_FromStringAndSize(arg1, arg2) \
-  PyString_FromStringAndSize((arg1), (arg2))
-#define _PyUnicode_AsUTF8(arg) \
-  PyString_AsString(arg)
-
-PyMODINIT_FUNC initperf_trace_context(void);
-#else
 #define _PyCapsule_GetPointer(arg1, arg2) \
   PyCapsule_GetPointer((arg1), (arg2))
 #define _PyBytes_FromStringAndSize(arg1, arg2) \
@@ -41,7 +32,6 @@ PyMODINIT_FUNC initperf_trace_context(void);
   PyUnicode_AsUTF8(arg)
 
 PyMODINIT_FUNC PyInit_perf_trace_context(void);
-#endif
 
 static struct scripting_context *get_args(PyObject *args, const char *name, PyObject **arg2)
 {
@@ -103,7 +93,7 @@ static PyObject *perf_sample_insn(PyObject *obj, PyObject *args)
 	if (c->sample->ip && !c->sample->insn_len && thread__maps(c->al->thread)) {
 		struct machine *machine =  maps__machine(thread__maps(c->al->thread));
 
-		script_fetch_insn(c->sample, c->al->thread, machine);
+		script_fetch_insn(c->sample, c->al->thread, machine, /*native_arch=*/true);
 	}
 	if (!c->sample->insn_len)
 		Py_RETURN_NONE; /* N.B. This is a return statement */
@@ -182,6 +172,15 @@ static PyObject *perf_sample_srccode(PyObject *obj, PyObject *args)
 	return perf_sample_src(obj, args, true);
 }
 
+static PyObject *__perf_config_get(PyObject *obj, PyObject *args)
+{
+	const char *config_name;
+
+	if (!PyArg_ParseTuple(args, "s", &config_name))
+		return NULL;
+	return Py_BuildValue("s", perf_config_get(config_name));
+}
+
 static PyMethodDef ContextMethods[] = {
 #ifdef HAVE_LIBTRACEEVENT
 	{ "common_pc", perf_trace_context_common_pc, METH_VARARGS,
@@ -199,15 +198,10 @@ static PyMethodDef ContextMethods[] = {
 	  METH_VARARGS,	"Get source file name and line number."},
 	{ "perf_sample_srccode", perf_sample_srccode,
 	  METH_VARARGS,	"Get source file name, line number and line."},
+	{ "perf_config_get", __perf_config_get, METH_VARARGS, "Get perf config entry"},
 	{ NULL, NULL, 0, NULL}
 };
 
-#if PY_MAJOR_VERSION < 3
-PyMODINIT_FUNC initperf_trace_context(void)
-{
-	(void) Py_InitModule("perf_trace_context", ContextMethods);
-}
-#else
 PyMODINIT_FUNC PyInit_perf_trace_context(void)
 {
 	static struct PyModuleDef moduledef = {
@@ -229,4 +223,3 @@ PyMODINIT_FUNC PyInit_perf_trace_context(void)
 
 	return mod;
 }
-#endif
diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py
index d973c2baed1c..ba208c90d631 100755
--- a/tools/perf/scripts/python/arm-cs-trace-disasm.py
+++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
@@ -11,36 +11,74 @@ import os
 from os import path
 import re
 from subprocess import *
-from optparse import OptionParser, make_option
+import argparse
+import platform
 
-from perf_trace_context import perf_set_itrace_options, \
-	perf_sample_insn, perf_sample_srccode
+from perf_trace_context import perf_sample_srccode, perf_config_get
 
 # Below are some example commands for using this script.
+# Note a --kcore recording is required for accurate decode
+# due to the alternatives patching mechanism. However this
+# script only supports reading vmlinux for disassembly dump,
+# meaning that any patched instructions will appear
+# as unpatched, but the instruction ranges themselves will
+# be correct. In addition to this, source line info comes
+# from Perf, and when using kcore there is no debug info. The
+# following lists the supported features in each mode:
+#
+# +-----------+-----------------+------------------+------------------+
+# | Recording | Accurate decode | Source line dump | Disassembly dump |
+# +-----------+-----------------+------------------+------------------+
+# | --kcore   | yes             | no               | yes              |
+# | normal    | no              | yes              | yes              |
+# +-----------+-----------------+------------------+------------------+
+#
+# Output disassembly with objdump and auto detect vmlinux
+# (when running on same machine.)
+#  perf script -s scripts/python/arm-cs-trace-disasm.py -d
 #
-# Output disassembly with objdump:
-#  perf script -s scripts/python/arm-cs-trace-disasm.py \
-#		-- -d objdump -k path/to/vmlinux
 # Output disassembly with llvm-objdump:
 #  perf script -s scripts/python/arm-cs-trace-disasm.py \
 #		-- -d llvm-objdump-11 -k path/to/vmlinux
+#
 # Output only source line and symbols:
 #  perf script -s scripts/python/arm-cs-trace-disasm.py
 
-# Command line parsing.
-option_list = [
-	# formatting options for the bottom entry of the stack
-	make_option("-k", "--vmlinux", dest="vmlinux_name",
-		    help="Set path to vmlinux file"),
-	make_option("-d", "--objdump", dest="objdump_name",
-		    help="Set path to objdump executable file"),
-	make_option("-v", "--verbose", dest="verbose",
-		    action="store_true", default=False,
-		    help="Enable debugging log")
-]
+def default_objdump():
+	config = perf_config_get("annotate.objdump")
+	return config if config else "objdump"
 
-parser = OptionParser(option_list=option_list)
-(options, args) = parser.parse_args()
+# Command line parsing.
+def int_arg(v):
+	v = int(v)
+	if v < 0:
+		raise argparse.ArgumentTypeError("Argument must be a positive integer")
+	return v
+
+args = argparse.ArgumentParser()
+args.add_argument("-k", "--vmlinux",
+		  help="Set path to vmlinux file. Omit to autodetect if running on same machine")
+args.add_argument("-d", "--objdump", nargs="?", const=default_objdump(),
+		  help="Show disassembly. Can also be used to change the objdump path"),
+args.add_argument("-v", "--verbose", action="store_true", help="Enable debugging log")
+args.add_argument("--start-time", type=int_arg, help="Monotonic clock time of sample to start from. "
+		  "See 'time' field on samples in -v mode.")
+args.add_argument("--stop-time", type=int_arg, help="Monotonic clock time of sample to stop at. "
+		  "See 'time' field on samples in -v mode.")
+args.add_argument("--start-sample", type=int_arg, help="Index of sample to start from. "
+		  "See 'index' field on samples in -v mode.")
+args.add_argument("--stop-sample", type=int_arg, help="Index of sample to stop at. "
+		  "See 'index' field on samples in -v mode.")
+
+options = args.parse_args()
+if (options.start_time and options.stop_time and
+    options.start_time >= options.stop_time):
+	print("--start-time must less than --stop-time")
+	exit(2)
+if (options.start_sample and options.stop_sample and
+    options.start_sample >= options.stop_sample):
+	print("--start-sample must less than --stop-sample")
+	exit(2)
 
 # Initialize global dicts and regular expression
 disasm_cache = dict()
@@ -48,11 +86,23 @@ cpu_data = dict()
 disasm_re = re.compile(r"^\s*([0-9a-fA-F]+):")
 disasm_func_re = re.compile(r"^\s*([0-9a-fA-F]+)\s.*:")
 cache_size = 64*1024
+sample_idx = -1
 
 glb_source_file_name	= None
 glb_line_number		= None
 glb_dso			= None
 
+kver = platform.release()
+vmlinux_paths = [
+	f"/usr/lib/debug/boot/vmlinux-{kver}.debug",
+	f"/usr/lib/debug/lib/modules/{kver}/vmlinux",
+	f"/lib/modules/{kver}/build/vmlinux",
+	f"/usr/lib/debug/boot/vmlinux-{kver}",
+	f"/boot/vmlinux-{kver}",
+	f"/boot/vmlinux",
+	f"vmlinux"
+]
+
 def get_optional(perf_dict, field):
        if field in perf_dict:
                return perf_dict[field]
@@ -63,12 +113,25 @@ def get_offset(perf_dict, field):
 		return "+%#x" % perf_dict[field]
 	return ""
 
+def find_vmlinux():
+	if hasattr(find_vmlinux, "path"):
+		return find_vmlinux.path
+
+	for v in vmlinux_paths:
+		if os.access(v, os.R_OK):
+			find_vmlinux.path = v
+			break
+	else:
+		find_vmlinux.path = None
+
+	return find_vmlinux.path
+
 def get_dso_file_path(dso_name, dso_build_id):
 	if (dso_name == "[kernel.kallsyms]" or dso_name == "vmlinux"):
-		if (options.vmlinux_name):
-			return options.vmlinux_name;
+		if (options.vmlinux):
+			return options.vmlinux;
 		else:
-			return dso_name
+			return find_vmlinux() if find_vmlinux() else dso_name
 
 	if (dso_name == "[vdso]") :
 		append = "/vdso"
@@ -92,7 +155,7 @@ def read_disam(dso_fname, dso_start, start_addr, stop_addr):
 	else:
 		start_addr = start_addr - dso_start;
 		stop_addr = stop_addr - dso_start;
-		disasm = [ options.objdump_name, "-d", "-z",
+		disasm = [ options.objdump, "-d", "-z",
 			   "--start-address="+format(start_addr,"#x"),
 			   "--stop-address="+format(stop_addr,"#x") ]
 		disasm += [ dso_fname ]
@@ -112,10 +175,10 @@ def print_disam(dso_fname, dso_start, start_addr, stop_addr):
 
 def print_sample(sample):
 	print("Sample = { cpu: %04d addr: 0x%016x phys_addr: 0x%016x ip: 0x%016x " \
-	      "pid: %d tid: %d period: %d time: %d }" % \
+	      "pid: %d tid: %d period: %d time: %d index: %d}" % \
 	      (sample['cpu'], sample['addr'], sample['phys_addr'], \
 	       sample['ip'], sample['pid'], sample['tid'], \
-	       sample['period'], sample['time']))
+	       sample['period'], sample['time'], sample_idx))
 
 def trace_begin():
 	print('ARM CoreSight Trace Data Assembler Dump')
@@ -177,6 +240,7 @@ def print_srccode(comm, param_dict, sample, symbol, dso):
 def process_event(param_dict):
 	global cache_size
 	global options
+	global sample_idx
 
 	sample = param_dict["sample"]
 	comm = param_dict["comm"]
@@ -187,22 +251,36 @@ def process_event(param_dict):
 	dso_start = get_optional(param_dict, "dso_map_start")
 	dso_end = get_optional(param_dict, "dso_map_end")
 	symbol = get_optional(param_dict, "symbol")
+	map_pgoff = get_optional(param_dict, "map_pgoff")
+	# check for valid map offset
+	if (str(map_pgoff) == '[unknown]'):
+		map_pgoff = 0
 
 	cpu = sample["cpu"]
 	ip = sample["ip"]
 	addr = sample["addr"]
 
-	# Initialize CPU data if it's empty, and directly return back
-	# if this is the first tracing event for this CPU.
-	if (cpu_data.get(str(cpu) + 'addr') == None):
-		cpu_data[str(cpu) + 'addr'] = addr
-		return
+	sample_idx += 1
 
+	if (options.start_time and sample["time"] < options.start_time):
+		return
+	if (options.stop_time and sample["time"] > options.stop_time):
+		exit(0)
+	if (options.start_sample and sample_idx < options.start_sample):
+		return
+	if (options.stop_sample and sample_idx > options.stop_sample):
+		exit(0)
 
 	if (options.verbose == True):
 		print("Event type: %s" % name)
 		print_sample(sample)
 
+	# Initialize CPU data if it's empty, and directly return back
+	# if this is the first tracing event for this CPU.
+	if (cpu_data.get(str(cpu) + 'addr') == None):
+		cpu_data[str(cpu) + 'addr'] = addr
+		return
+
 	# If cannot find dso so cannot dump assembler, bail out
 	if (dso == '[unknown]'):
 		return
@@ -244,9 +322,10 @@ def process_event(param_dict):
 	# Record for previous sample packet
 	cpu_data[str(cpu) + 'addr'] = addr
 
-	# Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4
-	if (start_addr == 0 and stop_addr == 4):
-		print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu)
+	# Filter out zero start_address. Optionally identify CS_ETM_TRACE_ON packet
+	if (start_addr == 0):
+		if ((stop_addr == 4) and (options.verbose == True)):
+			print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu)
 		return
 
 	if (start_addr < int(dso_start) or start_addr > int(dso_end)):
@@ -257,19 +336,20 @@ def process_event(param_dict):
 		print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso))
 		return
 
-	if (options.objdump_name != None):
+	if (options.objdump != None):
 		# It doesn't need to decrease virtual memory offset for disassembly
 		# for kernel dso and executable file dso, so in this case we set
 		# vm_start to zero.
 		if (dso == "[kernel.kallsyms]" or dso_start == 0x400000):
 			dso_vm_start = 0
+			map_pgoff = 0
 		else:
 			dso_vm_start = int(dso_start)
 
 		dso_fname = get_dso_file_path(dso, dso_bid)
 		if path.exists(dso_fname):
-			print_disam(dso_fname, dso_vm_start, start_addr, stop_addr)
+			print_disam(dso_fname, dso_vm_start, start_addr + map_pgoff, stop_addr + map_pgoff)
 		else:
-			print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr))
+			print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr + map_pgoff, stop_addr + map_pgoff))
 
 	print_srccode(comm, param_dict, sample, symbol, dso)
diff --git a/tools/perf/scripts/python/mem-phys-addr.py b/tools/perf/scripts/python/mem-phys-addr.py
index 1f332e72b9b0..5e237a5a5f1b 100644
--- a/tools/perf/scripts/python/mem-phys-addr.py
+++ b/tools/perf/scripts/python/mem-phys-addr.py
@@ -3,98 +3,125 @@
 #
 # Copyright (c) 2018, Intel Corporation.
 
-from __future__ import division
-from __future__ import print_function
-
 import os
 import sys
-import struct
 import re
 import bisect
 import collections
+from dataclasses import dataclass
+from typing import (Dict, Optional)
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+    '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+@dataclass(frozen=True)
+class IomemEntry:
+    """Read from a line in /proc/iomem"""
+    begin: int
+    end: int
+    indent: int
+    label: str
 
-#physical address ranges for System RAM
-system_ram = []
-#physical address ranges for Persistent Memory
-pmem = []
-#file object for proc iomem
-f = None
-#Count for each type of memory
-load_mem_type_cnt = collections.Counter()
-#perf event name
-event_name = None
+# Physical memory layout from /proc/iomem. Key is the indent and then
+# a list of ranges.
+iomem: Dict[int, list[IomemEntry]] = collections.defaultdict(list)
+# Child nodes from the iomem parent.
+children: Dict[IomemEntry, set[IomemEntry]] = collections.defaultdict(set)
+# Maximum indent seen before an entry in the iomem file.
+max_indent: int = 0
+# Count for each range of memory.
+load_mem_type_cnt: Dict[IomemEntry, int] = collections.Counter()
+# Perf event name set from the first sample in the data.
+event_name: Optional[str] = None
 
 def parse_iomem():
-	global f
-	f = open('/proc/iomem', 'r')
-	for i, j in enumerate(f):
-		m = re.split('-|:',j,2)
-		if m[2].strip() == 'System RAM':
-			system_ram.append(int(m[0], 16))
-			system_ram.append(int(m[1], 16))
-		if m[2].strip() == 'Persistent Memory':
-			pmem.append(int(m[0], 16))
-			pmem.append(int(m[1], 16))
+    """Populate iomem from /proc/iomem file"""
+    global iomem
+    global max_indent
+    global children
+    with open('/proc/iomem', 'r', encoding='ascii') as f:
+        for line in f:
+            indent = 0
+            while line[indent] == ' ':
+                indent += 1
+            if indent > max_indent:
+                max_indent = indent
+            m = re.split('-|:', line, 2)
+            begin = int(m[0], 16)
+            end = int(m[1], 16)
+            label = m[2].strip()
+            entry = IomemEntry(begin, end, indent, label)
+            # Before adding entry, search for a parent node using its begin.
+            if indent > 0:
+                parent = find_memory_type(begin)
+                assert parent, f"Given indent expected a parent for {label}"
+                children[parent].add(entry)
+            iomem[indent].append(entry)
 
-def print_memory_type():
-	print("Event: %s" % (event_name))
-	print("%-40s  %10s  %10s\n" % ("Memory type", "count", "percentage"), end='')
-	print("%-40s  %10s  %10s\n" % ("----------------------------------------",
-					"-----------", "-----------"),
-					end='');
-	total = sum(load_mem_type_cnt.values())
-	for mem_type, count in sorted(load_mem_type_cnt.most_common(), \
-					key = lambda kv: (kv[1], kv[0]), reverse = True):
-		print("%-40s  %10d  %10.1f%%\n" %
-			(mem_type, count, 100 * count / total),
-			end='')
+def find_memory_type(phys_addr) -> Optional[IomemEntry]:
+    """Search iomem for the range containing phys_addr with the maximum indent"""
+    for i in range(max_indent, -1, -1):
+        if i not in iomem:
+            continue
+        position = bisect.bisect_right(iomem[i], phys_addr,
+                                       key=lambda entry: entry.begin)
+        if position is None:
+            continue
+        iomem_entry = iomem[i][position-1]
+        if  iomem_entry.begin <= phys_addr <= iomem_entry.end:
+            return iomem_entry
+    print(f"Didn't find {phys_addr}")
+    return None
 
-def trace_begin():
-	parse_iomem()
+def print_memory_type():
+    print(f"Event: {event_name}")
+    print(f"{'Memory type':<40}  {'count':>10}  {'percentage':>10}")
+    print(f"{'-' * 40:<40}  {'-' * 10:>10}  {'-' * 10:>10}")
+    total = sum(load_mem_type_cnt.values())
+    # Add count from children into the parent.
+    for i in range(max_indent, -1, -1):
+        if i not in iomem:
+            continue
+        for entry in iomem[i]:
+            global children
+            for child in children[entry]:
+                if load_mem_type_cnt[child] > 0:
+                    load_mem_type_cnt[entry] += load_mem_type_cnt[child]
 
-def trace_end():
-	print_memory_type()
-	f.close()
+    def print_entries(entries):
+        """Print counts from parents down to their children"""
+        global children
+        for entry in sorted(entries,
+                            key = lambda entry: load_mem_type_cnt[entry],
+                            reverse = True):
+            count = load_mem_type_cnt[entry]
+            if count > 0:
+                mem_type = ' ' * entry.indent + f"{entry.begin:x}-{entry.end:x} : {entry.label}"
+                percent = 100 * count / total
+                print(f"{mem_type:<40}  {count:>10}  {percent:>10.1f}")
+                print_entries(children[entry])
 
-def is_system_ram(phys_addr):
-	#/proc/iomem is sorted
-	position = bisect.bisect(system_ram, phys_addr)
-	if position % 2 == 0:
-		return False
-	return True
+    print_entries(iomem[0])
 
-def is_persistent_mem(phys_addr):
-	position = bisect.bisect(pmem, phys_addr)
-	if position % 2 == 0:
-		return False
-	return True
+def trace_begin():
+    parse_iomem()
 
-def find_memory_type(phys_addr):
-	if phys_addr == 0:
-		return "N/A"
-	if is_system_ram(phys_addr):
-		return "System RAM"
+def trace_end():
+    print_memory_type()
 
-	if is_persistent_mem(phys_addr):
-		return "Persistent Memory"
+def process_event(param_dict):
+    if "sample" not in param_dict:
+        return
 
-	#slow path, search all
-	f.seek(0, 0)
-	for j in f:
-		m = re.split('-|:',j,2)
-		if int(m[0], 16) <= phys_addr <= int(m[1], 16):
-			return m[2]
-	return "N/A"
+    sample = param_dict["sample"]
+    if "phys_addr" not in sample:
+        return
 
-def process_event(param_dict):
-	name       = param_dict["ev_name"]
-	sample     = param_dict["sample"]
-	phys_addr  = sample["phys_addr"]
+    phys_addr  = sample["phys_addr"]
+    entry = find_memory_type(phys_addr)
+    if entry:
+        load_mem_type_cnt[entry] += 1
 
-	global event_name
-	if event_name == None:
-		event_name = name
-	load_mem_type_cnt[find_memory_type(phys_addr)] += 1
+    global event_name
+    if event_name is None:
+        event_name  = param_dict["ev_name"]
diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py
index 00552eeb7178..30c4bccee5b2 100644
--- a/tools/perf/scripts/python/netdev-times.py
+++ b/tools/perf/scripts/python/netdev-times.py
@@ -293,7 +293,8 @@ def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm, callchain,
 			skbaddr, location, protocol, reason)
 	all_event_list.append(event_info)
 
-def skb__consume_skb(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr):
+def skb__consume_skb(name, context, cpu, sec, nsec, pid, comm, callchain,
+			skbaddr, location):
 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
 			skbaddr)
 	all_event_list.append(event_info)
diff --git a/tools/perf/scripts/python/parallel-perf.py b/tools/perf/scripts/python/parallel-perf.py
new file mode 100755
index 000000000000..be85fd7f6632
--- /dev/null
+++ b/tools/perf/scripts/python/parallel-perf.py
@@ -0,0 +1,989 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a perf script command multiple times in parallel, using perf script
+# options --cpu and --time so that each job processes a different chunk
+# of the data.
+#
+# Copyright (c) 2024, Intel Corporation.
+
+import subprocess
+import argparse
+import pathlib
+import shlex
+import time
+import copy
+import sys
+import os
+import re
+
+glb_prog_name = "parallel-perf.py"
+glb_min_interval = 10.0
+glb_min_samples = 64
+
+class Verbosity():
+
+	def __init__(self, quiet=False, verbose=False, debug=False):
+		self.normal    = True
+		self.verbose   = verbose
+		self.debug     = debug
+		self.self_test = True
+		if self.debug:
+			self.verbose = True
+		if self.verbose:
+			quiet = False
+		if quiet:
+			self.normal = False
+
+# Manage work (Start/Wait/Kill), as represented by a subprocess.Popen command
+class Work():
+
+	def __init__(self, cmd, pipe_to, output_dir="."):
+		self.popen = None
+		self.consumer = None
+		self.cmd = cmd
+		self.pipe_to = pipe_to
+		self.output_dir = output_dir
+		self.cmdout_name = f"{output_dir}/cmd.txt"
+		self.stdout_name = f"{output_dir}/out.txt"
+		self.stderr_name = f"{output_dir}/err.txt"
+
+	def Command(self):
+		sh_cmd = [ shlex.quote(x) for x in self.cmd ]
+		return " ".join(self.cmd)
+
+	def Stdout(self):
+		return open(self.stdout_name, "w")
+
+	def Stderr(self):
+		return open(self.stderr_name, "w")
+
+	def CreateOutputDir(self):
+		pathlib.Path(self.output_dir).mkdir(parents=True, exist_ok=True)
+
+	def Start(self):
+		if self.popen:
+			return
+		self.CreateOutputDir()
+		with open(self.cmdout_name, "w") as f:
+			f.write(self.Command())
+			f.write("\n")
+		stdout = self.Stdout()
+		stderr = self.Stderr()
+		if self.pipe_to:
+			self.popen = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=stderr)
+			args = shlex.split(self.pipe_to)
+			self.consumer = subprocess.Popen(args, stdin=self.popen.stdout, stdout=stdout, stderr=stderr)
+		else:
+			self.popen = subprocess.Popen(self.cmd, stdout=stdout, stderr=stderr)
+
+	def RemoveEmptyErrFile(self):
+		if os.path.exists(self.stderr_name):
+			if os.path.getsize(self.stderr_name) == 0:
+				os.unlink(self.stderr_name)
+
+	def Errors(self):
+		if os.path.exists(self.stderr_name):
+			if os.path.getsize(self.stderr_name) != 0:
+				return [ f"Non-empty error file {self.stderr_name}" ]
+		return []
+
+	def TidyUp(self):
+		self.RemoveEmptyErrFile()
+
+	def RawPollWait(self, p, wait):
+		if wait:
+			return p.wait()
+		return p.poll()
+
+	def Poll(self, wait=False):
+		if not self.popen:
+			return None
+		result = self.RawPollWait(self.popen, wait)
+		if self.consumer:
+			res = result
+			result = self.RawPollWait(self.consumer, wait)
+			if result != None and res == None:
+				self.popen.kill()
+				result = None
+			elif result == 0 and res != None and res != 0:
+				result = res
+		if result != None:
+			self.TidyUp()
+		return result
+
+	def Wait(self):
+		return self.Poll(wait=True)
+
+	def Kill(self):
+		if not self.popen:
+			return
+		self.popen.kill()
+		if self.consumer:
+			self.consumer.kill()
+
+def KillWork(worklist, verbosity):
+	for w in worklist:
+		w.Kill()
+	for w in worklist:
+		w.Wait()
+
+def NumberOfCPUs():
+	return os.sysconf("SC_NPROCESSORS_ONLN")
+
+def NanoSecsToSecsStr(x):
+	if x == None:
+		return ""
+	x = str(x)
+	if len(x) < 10:
+		x = "0" * (10 - len(x)) + x
+	return x[:len(x) - 9] + "." + x[-9:]
+
+def InsertOptionAfter(cmd, option, after):
+	try:
+		pos = cmd.index(after)
+		cmd.insert(pos + 1, option)
+	except:
+		cmd.append(option)
+
+def CreateWorkList(cmd, pipe_to, output_dir, cpus, time_ranges_by_cpu):
+	max_len = len(str(cpus[-1]))
+	cpu_dir_fmt = f"cpu-%.{max_len}u"
+	worklist = []
+	pos = 0
+	for cpu in cpus:
+		if cpu >= 0:
+			cpu_dir = os.path.join(output_dir, cpu_dir_fmt % cpu)
+			cpu_option = f"--cpu={cpu}"
+		else:
+			cpu_dir = output_dir
+			cpu_option = None
+
+		tr_dir_fmt = "time-range"
+
+		if len(time_ranges_by_cpu) > 1:
+			time_ranges = time_ranges_by_cpu[pos]
+			tr_dir_fmt += f"-{pos}"
+			pos += 1
+		else:
+			time_ranges = time_ranges_by_cpu[0]
+
+		max_len = len(str(len(time_ranges)))
+		tr_dir_fmt += f"-%.{max_len}u"
+
+		i = 0
+		for r in time_ranges:
+			if r == [None, None]:
+				time_option = None
+				work_output_dir = cpu_dir
+			else:
+				time_option = "--time=" + NanoSecsToSecsStr(r[0]) + "," + NanoSecsToSecsStr(r[1])
+				work_output_dir = os.path.join(cpu_dir, tr_dir_fmt % i)
+				i += 1
+			work_cmd = list(cmd)
+			if time_option != None:
+				InsertOptionAfter(work_cmd, time_option, "script")
+			if cpu_option != None:
+				InsertOptionAfter(work_cmd, cpu_option, "script")
+			w = Work(work_cmd, pipe_to, work_output_dir)
+			worklist.append(w)
+	return worklist
+
+def DoRunWork(worklist, nr_jobs, verbosity):
+	nr_to_do = len(worklist)
+	not_started = list(worklist)
+	running = []
+	done = []
+	chg = False
+	while True:
+		nr_done = len(done)
+		if chg and verbosity.normal:
+			nr_run = len(running)
+			print(f"\rThere are {nr_to_do} jobs: {nr_done} completed, {nr_run} running", flush=True, end=" ")
+			if verbosity.verbose:
+				print()
+			chg = False
+		if nr_done == nr_to_do:
+			break
+		while len(running) < nr_jobs and len(not_started):
+			w = not_started.pop(0)
+			running.append(w)
+			if verbosity.verbose:
+				print("Starting:", w.Command())
+			w.Start()
+			chg = True
+		if len(running):
+			time.sleep(0.1)
+		finished = []
+		not_finished = []
+		while len(running):
+			w = running.pop(0)
+			r = w.Poll()
+			if r == None:
+				not_finished.append(w)
+				continue
+			if r == 0:
+				if verbosity.verbose:
+					print("Finished:", w.Command())
+				finished.append(w)
+				chg = True
+				continue
+			if verbosity.normal and not verbosity.verbose:
+				print()
+			print("Job failed!\n    return code:", r, "\n    command:    ", w.Command())
+			if w.pipe_to:
+				print("    piped to:   ", w.pipe_to)
+			print("Killing outstanding jobs")
+			KillWork(not_finished, verbosity)
+			KillWork(running, verbosity)
+			return False
+		running = not_finished
+		done += finished
+	errorlist = []
+	for w in worklist:
+		errorlist += w.Errors()
+	if len(errorlist):
+		print("Errors:")
+		for e in errorlist:
+			print(e)
+	elif verbosity.normal:
+		print("\r"," "*50, "\rAll jobs finished successfully", flush=True)
+	return True
+
+def RunWork(worklist, nr_jobs=NumberOfCPUs(), verbosity=Verbosity()):
+	try:
+		return DoRunWork(worklist, nr_jobs, verbosity)
+	except:
+		for w in worklist:
+			w.Kill()
+		raise
+	return True
+
+def ReadHeader(perf, file_name):
+	return subprocess.Popen([perf, "script", "--header-only", "--input", file_name], stdout=subprocess.PIPE).stdout.read().decode("utf-8")
+
+def ParseHeader(hdr):
+	result = {}
+	lines = hdr.split("\n")
+	for line in lines:
+		if ":" in line and line[0] == "#":
+			pos = line.index(":")
+			name = line[1:pos-1].strip()
+			value = line[pos+1:].strip()
+			if name in result:
+				orig_name = name
+				nr = 2
+				while True:
+					name = f"{orig_name} {nr}"
+					if name not in result:
+						break
+					nr += 1
+			result[name] = value
+	return result
+
+def HeaderField(hdr_dict, hdr_fld):
+	if hdr_fld not in hdr_dict:
+		raise Exception(f"'{hdr_fld}' missing from header information")
+	return hdr_dict[hdr_fld]
+
+# Represent the position of an option within a command string
+# and provide the option value and/or remove the option
+class OptPos():
+
+	def Init(self, opt_element=-1, value_element=-1, opt_pos=-1, value_pos=-1, error=None):
+		self.opt_element = opt_element		# list element that contains option
+		self.value_element = value_element	# list element that contains option value
+		self.opt_pos = opt_pos			# string position of option
+		self.value_pos = value_pos		# string position of value
+		self.error = error			# error message string
+
+	def __init__(self, args, short_name, long_name, default=None):
+		self.args = list(args)
+		self.default = default
+		n = 2 + len(long_name)
+		m = len(short_name)
+		pos = -1
+		for opt in args:
+			pos += 1
+			if m and opt[:2] == f"-{short_name}":
+				if len(opt) == 2:
+					if pos + 1 < len(args):
+						self.Init(pos, pos + 1, 0, 0)
+					else:
+						self.Init(error = f"-{short_name} option missing value")
+				else:
+					self.Init(pos, pos, 0, 2)
+				return
+			if opt[:n] == f"--{long_name}":
+				if len(opt) == n:
+					if pos + 1 < len(args):
+						self.Init(pos, pos + 1, 0, 0)
+					else:
+						self.Init(error = f"--{long_name} option missing value")
+				elif opt[n] == "=":
+					self.Init(pos, pos, 0, n + 1)
+				else:
+					self.Init(error = f"--{long_name} option expected '='")
+				return
+			if m and opt[:1] == "-" and opt[:2] != "--" and short_name in opt:
+				ipos = opt.index(short_name)
+				if "-" in opt[1:]:
+					hpos = opt[1:].index("-")
+					if hpos < ipos:
+						continue
+				if ipos + 1 == len(opt):
+					if pos + 1 < len(args):
+						self.Init(pos, pos + 1, ipos, 0)
+					else:
+						self.Init(error = f"-{short_name} option missing value")
+				else:
+					self.Init(pos, pos, ipos, ipos + 1)
+				return
+		self.Init()
+
+	def Value(self):
+		if self.opt_element >= 0:
+			if self.opt_element != self.value_element:
+				return self.args[self.value_element]
+			else:
+				return self.args[self.value_element][self.value_pos:]
+		return self.default
+
+	def Remove(self, args):
+		if self.opt_element == -1:
+			return
+		if self.opt_element != self.value_element:
+			del args[self.value_element]
+		if self.opt_pos:
+			args[self.opt_element] = args[self.opt_element][:self.opt_pos]
+		else:
+			del args[self.opt_element]
+
+def DetermineInputFileName(cmd):
+	p = OptPos(cmd, "i", "input", "perf.data")
+	if p.error:
+		raise Exception(f"perf command {p.error}")
+	file_name = p.Value()
+	if not os.path.exists(file_name):
+		raise Exception(f"perf command input file '{file_name}' not found")
+	return file_name
+
+def ReadOption(args, short_name, long_name, err_prefix, remove=False):
+	p = OptPos(args, short_name, long_name)
+	if p.error:
+		raise Exception(f"{err_prefix}{p.error}")
+	value = p.Value()
+	if remove:
+		p.Remove(args)
+	return value
+
+def ExtractOption(args, short_name, long_name, err_prefix):
+	return ReadOption(args, short_name, long_name, err_prefix, True)
+
+def ReadPerfOption(args, short_name, long_name):
+	return ReadOption(args, short_name, long_name, "perf command ")
+
+def ExtractPerfOption(args, short_name, long_name):
+	return ExtractOption(args, short_name, long_name, "perf command ")
+
+def PerfDoubleQuickCommands(cmd, file_name):
+	cpu_str = ReadPerfOption(cmd, "C", "cpu")
+	time_str = ReadPerfOption(cmd, "", "time")
+	# Use double-quick sampling to determine trace data density
+	times_cmd = ["perf", "script", "--ns", "--input", file_name, "--itrace=qqi"]
author	Tvrtko Ursulin <tursulin@ursulin.net>	2025-02-05 09:29:14 +0000
committer	Tvrtko Ursulin <tursulin@ursulin.net>	2025-02-05 09:29:14 +0000
commit	c771600c6af14749609b49565ffb4cac2959710d (patch)
tree	e9be426f1b32598527127fd0fa1b265a7cc83044 /tools/perf/scripts/python
parent	4a82ceb04ad4bbb9cc20925abccb70938313e555 (diff)
parent	2014c95afecee3e76ca4a56956a936e23283f05b (diff)