From 3839a8eedde57a9f5b869025ce2626a7a37f5ccb Mon Sep 17 00:00:00 2001 From: Yash Suthar Date: Mon, 20 Apr 2026 15:42:36 +0530 Subject: tracing: Remove redundant IS_ERR() check in trace_pipe_open() in trace_pipe_open() already check the IS_ERR(iter) and return early on error,so iter after will be valid and it is safe to return 0 at end. Link: https://patch.msgid.link/20260420101236.223919-1-yashsuthar983@gmail.com Signed-off-by: Yash Suthar Signed-off-by: Steven Rostedt --- kernel/trace/trace_remote.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_remote.c b/kernel/trace/trace_remote.c index d6c3f94d67cd..2a6cc000ec98 100644 --- a/kernel/trace/trace_remote.c +++ b/kernel/trace/trace_remote.c @@ -602,7 +602,7 @@ static int trace_pipe_open(struct inode *inode, struct file *filp) filp->private_data = iter; - return IS_ERR(iter) ? PTR_ERR(iter) : 0; + return 0; } static int trace_pipe_release(struct inode *inode, struct file *filp) -- cgit v1.2.3 From f07883450eb14d1cf020b55d9f3a7ec5683bcd26 Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Thu, 30 Apr 2026 12:33:50 +0800 Subject: tracing: Bound synthetic-field strings with seq_buf The synthetic field helpers build a prefixed synthetic variable name and a generated hist command in fixed MAX_FILTER_STR_VAL buffers. The current code appends those strings with raw strcat(), so long key lists, field names, or saved filters can run past the end of the staging buffers. Build both strings with seq_buf and propagate -E2BIG if either the synthetic variable name or the generated command exceeds MAX_FILTER_STR_VAL. This keeps the existing tracing-side limit while using the helper intended for bounded command construction. Cc: Mathieu Desnoyers Cc: Tom Zanussi Link: https://patch.msgid.link/20260430043350.57928-1-pengpeng@iscas.ac.cn Fixes: 02205a6752f2 ("tracing: Add support for 'field variables'") Acked-by: Masami Hiramatsu (Google) Reviewed-by: Tom Zanussi Signed-off-by: Pengpeng Hou [ sdr: Moved struct seq_buf *s for upside-down x-mas tree formatting ] Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_hist.c | 41 ++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index eb2c2bc8bc3d..9701650c89b2 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -2967,13 +2968,22 @@ find_synthetic_field_var(struct hist_trigger_data *target_hist_data, { struct hist_field *event_var; char *synthetic_name; + struct seq_buf s; synthetic_name = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL); if (!synthetic_name) return ERR_PTR(-ENOMEM); - strcpy(synthetic_name, "synthetic_"); - strcat(synthetic_name, field_name); + seq_buf_init(&s, synthetic_name, MAX_FILTER_STR_VAL); + seq_buf_printf(&s, "synthetic_%s", field_name); + + /* Terminate synthetic_name with a NUL. */ + seq_buf_str(&s); + + if (seq_buf_has_overflowed(&s)) { + kfree(synthetic_name); + return ERR_PTR(-E2BIG); + } event_var = find_event_var(target_hist_data, system, event_name, synthetic_name); @@ -3019,6 +3029,7 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, struct hist_field *key_field; struct hist_field *event_var; char *saved_filter; + struct seq_buf s; char *cmd; int ret; @@ -3063,28 +3074,34 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, return ERR_PTR(-ENOMEM); } + seq_buf_init(&s, cmd, MAX_FILTER_STR_VAL); + /* Use the same keys as the compatible histogram */ - strcat(cmd, "keys="); + seq_buf_puts(&s, "keys="); for_each_hist_key_field(i, hist_data) { key_field = hist_data->fields[i]; if (!first) - strcat(cmd, ","); - strcat(cmd, key_field->field->name); + seq_buf_putc(&s, ','); + seq_buf_puts(&s, key_field->field->name); first = false; } /* Create the synthetic field variable specification */ - strcat(cmd, ":synthetic_"); - strcat(cmd, field_name); - strcat(cmd, "="); - strcat(cmd, field_name); + seq_buf_printf(&s, ":synthetic_%s=%s", field_name, field_name); /* Use the same filter as the compatible histogram */ saved_filter = find_trigger_filter(hist_data, file); - if (saved_filter) { - strcat(cmd, " if "); - strcat(cmd, saved_filter); + if (saved_filter) + seq_buf_printf(&s, " if %s", saved_filter); + + /* Terminate cmd with a NUL. */ + seq_buf_str(&s); + + if (seq_buf_has_overflowed(&s)) { + kfree(cmd); + kfree(var_hist); + return ERR_PTR(-E2BIG); } var_hist->cmd = kstrdup(cmd, GFP_KERNEL); -- cgit v1.2.3 From 8a6e9af2fac8cf212f86cff282ae96f6c7d7ca18 Mon Sep 17 00:00:00 2001 From: Yash Suthar Date: Sat, 2 May 2026 23:17:41 +0530 Subject: tracing: Switch trace_recursion_record.c code over to use guard() Switch mutex_lock()/mutex_unlock() to guard(). also drop the ret local variable and return directly. Link: https://patch.msgid.link/20260502174741.39636-1-yashsuthar983@gmail.com Signed-off-by: Yash Suthar Signed-off-by: Steven Rostedt --- kernel/trace/trace_recursion_record.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_recursion_record.c b/kernel/trace/trace_recursion_record.c index 784fe1fbb866..bac4bc844ccd 100644 --- a/kernel/trace/trace_recursion_record.c +++ b/kernel/trace/trace_recursion_record.c @@ -180,9 +180,8 @@ static const struct seq_operations recursed_function_seq_ops = { static int recursed_function_open(struct inode *inode, struct file *file) { - int ret = 0; + guard(mutex)(&recursed_function_lock); - mutex_lock(&recursed_function_lock); /* If this file was opened for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { /* disable updating records */ @@ -194,10 +193,9 @@ static int recursed_function_open(struct inode *inode, struct file *file) atomic_set(&nr_records, 0); } if (file->f_mode & FMODE_READ) - ret = seq_open(file, &recursed_function_seq_ops); - mutex_unlock(&recursed_function_lock); + return seq_open(file, &recursed_function_seq_ops); - return ret; + return 0; } static ssize_t recursed_function_write(struct file *file, -- cgit v1.2.3 From bb4613842e8033a714bacf3b62cc70638926cdcf Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 13 May 2026 15:00:07 -0400 Subject: tracing: Allow perf to read synthetic events Currently, perf can not enable synthetic events. When it does, it either causes a warning in the kernel or errors with "no such device". Add the necessary code to allow perf to also attach to synthetic events. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://patch.msgid.link/20260513150007.3b280e87@gandalf.local.home Reported-by: Ian Rogers Acked-by: Namhyung Kim Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_synth.c | 121 +++++++++++++++++++++++++++++--------- 1 file changed, 94 insertions(+), 27 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 39ac4eba0702..e6871230bde9 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -499,28 +499,19 @@ static unsigned int trace_stack(struct synth_trace_event *entry, return len; } -static void trace_event_raw_event_synth(void *__data, - u64 *var_ref_vals, - unsigned int *var_ref_idx) +static __always_inline int get_field_size(struct synth_event *event, + u64 *var_ref_vals, + unsigned int *var_ref_idx) { - unsigned int i, n_u64, val_idx, len, data_size = 0; - struct trace_event_file *trace_file = __data; - struct synth_trace_event *entry; - struct trace_event_buffer fbuffer; - struct trace_buffer *buffer; - struct synth_event *event; - int fields_size = 0; - - event = trace_file->event_call->data; - - if (trace_trigger_soft_disabled(trace_file)) - return; + int fields_size; fields_size = event->n_u64 * sizeof(u64); - for (i = 0; i < event->n_dynamic_fields; i++) { + for (int i = 0; i < event->n_dynamic_fields; i++) { unsigned int field_pos = event->dynamic_fields[i]->field_pos; char *str_val; + int val_idx; + int len; val_idx = var_ref_idx[field_pos]; str_val = (char *)(long)var_ref_vals[val_idx]; @@ -535,18 +526,18 @@ static void trace_event_raw_event_synth(void *__data, fields_size += len; } + return fields_size; +} - /* - * Avoid ring buffer recursion detection, as this event - * is being performed within another event. - */ - buffer = trace_file->tr->array_buffer.buffer; - guard(ring_buffer_nest)(buffer); - - entry = trace_event_buffer_reserve(&fbuffer, trace_file, - sizeof(*entry) + fields_size); - if (!entry) - return; +static __always_inline void write_synth_entry(struct synth_event *event, + struct synth_trace_event *entry, + u64 *var_ref_vals, + unsigned int *var_ref_idx) +{ + int data_size = 0; + int i, n_u64; + int val_idx; + int len; for (i = 0, n_u64 = 0; i < event->n_fields; i++) { val_idx = var_ref_idx[i]; @@ -587,10 +578,83 @@ static void trace_event_raw_event_synth(void *__data, n_u64++; } } +} + +static void trace_event_raw_event_synth(void *__data, + u64 *var_ref_vals, + unsigned int *var_ref_idx) +{ + struct trace_event_file *trace_file = __data; + struct synth_trace_event *entry; + struct trace_event_buffer fbuffer; + struct trace_buffer *buffer; + struct synth_event *event; + int fields_size; + + event = trace_file->event_call->data; + + if (trace_trigger_soft_disabled(trace_file)) + return; + + fields_size = get_field_size(event, var_ref_vals, var_ref_idx); + + /* + * Avoid ring buffer recursion detection, as this event + * is being performed within another event. + */ + buffer = trace_file->tr->array_buffer.buffer; + guard(ring_buffer_nest)(buffer); + + entry = trace_event_buffer_reserve(&fbuffer, trace_file, + sizeof(*entry) + fields_size); + if (!entry) + return; + + write_synth_entry(event, entry, var_ref_vals, var_ref_idx); trace_event_buffer_commit(&fbuffer); } +#ifdef CONFIG_PERF_EVENTS +static void perf_event_raw_event_synth(void *__data, + u64 *var_ref_vals, + unsigned int *var_ref_idx) +{ + struct trace_event_call *call = __data; + struct synth_trace_event *entry; + struct hlist_head *perf_head; + struct synth_event *event; + struct pt_regs *regs; + int fields_size; + size_t size; + int context; + + event = call->data; + + perf_head = this_cpu_ptr(call->perf_events); + + if (!perf_head || hlist_empty(perf_head)) + return; + + fields_size = get_field_size(event, var_ref_vals, var_ref_idx); + + size = ALIGN(sizeof(*entry) + fields_size, 8); + + entry = perf_trace_buf_alloc(size, ®s, &context); + + if (unlikely(!entry)) + return; + + write_synth_entry(event, entry, var_ref_vals, var_ref_idx); + + perf_fetch_caller_regs(regs); + + perf_trace_buf_submit(entry, size, context, + call->event.type, 1, regs, + perf_head, NULL); +} +#endif + static void free_synth_event_print_fmt(struct trace_event_call *call) { if (call) { @@ -917,6 +981,9 @@ static int register_synth_event(struct synth_event *event) call->flags = TRACE_EVENT_FL_TRACEPOINT; call->class->reg = synth_event_reg; call->class->probe = trace_event_raw_event_synth; +#ifdef CONFIG_PERF_EVENTS + call->class->perf_probe = perf_event_raw_event_synth; +#endif call->data = event; call->tp = event->tp; -- cgit v1.2.3 From 5ec75d6a1b5b8beebea3bcaa85baad295ee09dcb Mon Sep 17 00:00:00 2001 From: Yu Peng Date: Tue, 19 May 2026 16:16:20 +0800 Subject: tracing/branch: Use pr_warn() instead of printk(KERN_WARNING) Use pr_warn() instead of printk(KERN_WARNING ...) for the branch tracer warning messages. Keep the message text unchanged. The change only removes the open-coded log level from these warnings. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260519081620.3874441-1-pengyu@kylinos.cn Signed-off-by: Yu Peng Signed-off-by: Steven Rostedt --- kernel/trace/trace_branch.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index d1564db95a8f..d8e97ad798f0 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -181,8 +181,7 @@ __init static int init_branch_tracer(void) ret = register_trace_event(&trace_branch_event); if (!ret) { - printk(KERN_WARNING "Warning: could not register " - "branch events\n"); + pr_warn("Warning: could not register branch events\n"); return 1; } return register_tracer(&branch_trace); @@ -374,8 +373,7 @@ __init static int init_annotated_branch_stats(void) ret = register_stat_tracer(&annotated_branch_stats); if (ret) { - printk(KERN_WARNING "Warning: could not register " - "annotated branches stats\n"); + pr_warn("Warning: could not register annotated branches stats\n"); return ret; } return 0; @@ -439,8 +437,7 @@ __init static int all_annotated_branch_stats(void) ret = register_stat_tracer(&all_branch_stats); if (ret) { - printk(KERN_WARNING "Warning: could not register " - "all branches stats\n"); + pr_warn("Warning: could not register all branches stats\n"); return ret; } return 0; -- cgit v1.2.3 From 153498f200d295f3efa6bd37fc6e7ff105ab344f Mon Sep 17 00:00:00 2001 From: Yu Peng Date: Tue, 19 May 2026 16:34:09 +0800 Subject: tracing: Use krealloc_array() for trace option array growth Use krealloc_array() when growing tr->topts instead of open-coding the size calculation in krealloc(). This makes the resize path use the helper intended for array allocations and avoids manual multiplication of the element count and element size. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260519083409.3885032-1-pengyu@kylinos.cn Signed-off-by: Yu Peng Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6eb4d3097a4d..aec3f31ed027 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7928,8 +7928,8 @@ create_trace_option_files(struct trace_array *tr, struct tracer *tracer, if (!topts) return 0; - tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1), - GFP_KERNEL); + tr_topts = krealloc_array(tr->topts, tr->nr_topts + 1, sizeof(*tr->topts), + GFP_KERNEL); if (!tr_topts) { kfree(topts); return -ENOMEM; -- cgit v1.2.3 From 3a5b29657593fe7695337dfcf40c49eb6fa544d4 Mon Sep 17 00:00:00 2001 From: Ao Sun Date: Thu, 21 May 2026 01:52:34 +0000 Subject: tracing: Fix README path for synthetic_events The events/ prefix should be removed, since synthetic_events is now directly under the tracing root directory. Cc: Cc: Link: https://patch.msgid.link/20260521015211.111-1-ao.sun@transsion.com Signed-off-by: Ao Sun Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index aec3f31ed027..4218c174460b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4474,7 +4474,7 @@ static const char readme_msg[] = "\t snapshot() - snapshot the trace buffer\n\n" #endif #ifdef CONFIG_SYNTH_EVENTS - " events/synthetic_events\t- Create/append/remove/show synthetic events\n" + " synthetic_events\t- Create/append/remove/show synthetic events\n" "\t Write into this file to define/undefine new synthetic events.\n" "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n" #endif -- cgit v1.2.3 From 817e148935dd76ce39bf468f51308ec77de87033 Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Wed, 20 May 2026 14:50:06 -0700 Subject: tracing: Simplify pages allocation for tracing_map logic Change to a flexible array member to allocate together with the array struct. Simplifies code slightly by removing no longer correct null checks for pages and removing kfrees. Cc: Mathieu Desnoyers Cc: Kees Cook Cc: "Gustavo A. R. Silva" Link: https://patch.msgid.link/20260520215006.12008-1-rosenp@gmail.com Signed-off-by: Rosen Penev Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt --- kernel/trace/tracing_map.c | 30 +++++++++++------------------- kernel/trace/tracing_map.h | 2 +- 2 files changed, 12 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 0dd7927df22a..d7922f40dbe2 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -288,9 +288,6 @@ static void tracing_map_array_clear(struct tracing_map_array *a) { unsigned int i; - if (!a->pages) - return; - for (i = 0; i < a->n_pages; i++) memset(a->pages[i], 0, PAGE_SIZE); } @@ -302,9 +299,6 @@ static void tracing_map_array_free(struct tracing_map_array *a) if (!a) return; - if (!a->pages) - goto free; - for (i = 0; i < a->n_pages; i++) { if (!a->pages[i]) break; @@ -312,9 +306,6 @@ static void tracing_map_array_free(struct tracing_map_array *a) free_page((unsigned long)a->pages[i]); } - kfree(a->pages); - - free: kfree(a); } @@ -322,24 +313,25 @@ static struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts, unsigned int entry_size) { struct tracing_map_array *a; + unsigned int entry_size_shift; + unsigned int entries_per_page; + unsigned int n_pages; unsigned int i; - a = kzalloc_obj(*a); + entry_size_shift = fls(roundup_pow_of_two(entry_size) - 1); + entries_per_page = PAGE_SIZE / (1 << entry_size_shift); + n_pages = max(1, n_elts / entries_per_page); + + a = kzalloc_flex(*a, pages, n_pages); if (!a) return NULL; - a->entry_size_shift = fls(roundup_pow_of_two(entry_size) - 1); - a->entries_per_page = PAGE_SIZE / (1 << a->entry_size_shift); - a->n_pages = n_elts / a->entries_per_page; - if (!a->n_pages) - a->n_pages = 1; + a->entry_size_shift = entry_size_shift; + a->entries_per_page = entries_per_page; + a->n_pages = n_pages; a->entry_shift = fls(a->entries_per_page) - 1; a->entry_mask = (1 << a->entry_shift) - 1; - a->pages = kcalloc(a->n_pages, sizeof(void *), GFP_KERNEL); - if (!a->pages) - goto free; - for (i = 0; i < a->n_pages; i++) { a->pages[i] = (void *)get_zeroed_page(GFP_KERNEL); if (!a->pages[i]) diff --git a/kernel/trace/tracing_map.h b/kernel/trace/tracing_map.h index 99c37eeebc16..18a02959d77b 100644 --- a/kernel/trace/tracing_map.h +++ b/kernel/trace/tracing_map.h @@ -167,7 +167,7 @@ struct tracing_map_array { unsigned int entry_shift; unsigned int entry_mask; unsigned int n_pages; - void **pages; + void *pages[] __counted_by(n_pages); }; #define TRACING_MAP_ARRAY_ELT(array, idx) \ -- cgit v1.2.3 From 292c8197e3685aa7ea5a3803ca240b2210fcd021 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 21 May 2026 09:50:26 -0400 Subject: tracing: Move trace_iterator_increment() into trace_find_next_entry_inc() trace_iterator_increment() is only called from trace_find_next_entry_inc(). It's a small enough function that really doesn't need to be separated. Move the code from trace_iterator_increment() into trace_find_next_entry_inc() and remove trace_iterator_increment(). Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260521095026.20c9799d@gandalf.local.home Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4218c174460b..ae527c419508 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2338,15 +2338,6 @@ void trace_last_func_repeats(struct trace_array *tr, __buffer_unlock_commit(buffer, event); } -static void trace_iterator_increment(struct trace_iterator *iter) -{ - struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); - - iter->idx++; - if (buf_iter) - ring_buffer_iter_advance(buf_iter); -} - static struct trace_entry * peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, unsigned long *lost_events) @@ -2676,11 +2667,17 @@ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, /* Find the next real entry, and increment the iterator to the next entry */ void *trace_find_next_entry_inc(struct trace_iterator *iter) { + struct ring_buffer_iter *buf_iter; + iter->ent = __find_next_entry(iter, &iter->cpu, &iter->lost_events, &iter->ts); - if (iter->ent) - trace_iterator_increment(iter); + if (iter->ent) { + iter->idx++; + buf_iter = trace_buffer_iter(iter, iter->cpu); + if (buf_iter) + ring_buffer_iter_advance(buf_iter); + } return iter->ent ? iter : NULL; } -- cgit v1.2.3 From bfda3b9424e02c6b2afac74410457c9c1743ba4a Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Fri, 22 May 2026 14:44:07 -0700 Subject: tracing: Turn hist_elt_data field_var_str into a flexible array The field_var_str array was allocated separately via kcalloc() with its length already known at elt_data allocation time. Convert it to a flexible array member and fold the two allocations into a single kzalloc_flex(), reordering hist_trigger_elt_data_alloc() so n_str is computed and bounds-checked before the struct allocation. hist_elt_data is only reached through tracing_map_elt::private_data (a void *), never embedded, so adding a FAM imposes no tail-position constraint on any enclosing struct. Added __counted_by for extra runtime analysis. Link: https://patch.msgid.link/20260522214407.18120-1-rosenp@gmail.com Assisted-by: Claude:Opus-4.7 Signed-off-by: Rosen Penev Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_hist.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 9701650c89b2..82ce492ab268 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -683,8 +683,8 @@ struct track_data { struct hist_elt_data { char *comm; u64 *var_ref_vals; - char **field_var_str; int n_field_var_str; + char *field_var_str[] __counted_by(n_field_var_str); }; struct snapshot_context { @@ -1629,8 +1629,6 @@ static void hist_elt_data_free(struct hist_elt_data *elt_data) for (i = 0; i < elt_data->n_field_var_str; i++) kfree(elt_data->field_var_str[i]); - kfree(elt_data->field_var_str); - kfree(elt_data->comm); kfree(elt_data); } @@ -1650,10 +1648,19 @@ static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt) struct hist_field *hist_field; unsigned int i, n_str; - elt_data = kzalloc_obj(*elt_data); + BUILD_BUG_ON(STR_VAR_LEN_MAX & (sizeof(u64) - 1)); + + n_str = hist_data->n_field_var_str + hist_data->n_save_var_str + + hist_data->n_var_str; + if (n_str > SYNTH_FIELDS_MAX) + return -EINVAL; + + elt_data = kzalloc_flex(*elt_data, field_var_str, n_str); if (!elt_data) return -ENOMEM; + elt_data->n_field_var_str = n_str; + for_each_hist_field(i, hist_data) { hist_field = hist_data->fields[i]; @@ -1667,24 +1674,8 @@ static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt) } } - n_str = hist_data->n_field_var_str + hist_data->n_save_var_str + - hist_data->n_var_str; - if (n_str > SYNTH_FIELDS_MAX) { - hist_elt_data_free(elt_data); - return -EINVAL; - } - - BUILD_BUG_ON(STR_VAR_LEN_MAX & (sizeof(u64) - 1)); - size = STR_VAR_LEN_MAX; - elt_data->field_var_str = kcalloc(n_str, sizeof(char *), GFP_KERNEL); - if (!elt_data->field_var_str) { - hist_elt_data_free(elt_data); - return -EINVAL; - } - elt_data->n_field_var_str = n_str; - for (i = 0; i < n_str; i++) { elt_data->field_var_str[i] = kzalloc(size, GFP_KERNEL); if (!elt_data->field_var_str[i]) { -- cgit v1.2.3 From 01046072880b654dbadf71be2f645aad4a7b5d87 Mon Sep 17 00:00:00 2001 From: Karl Mehltretter Date: Mon, 25 May 2026 19:04:28 +0200 Subject: tracing: Disable KCOV instrumentation for trace_irqsoff.o When KCOV runs its boot selftest with whole-kernel instrumentation enabled, it sets current->kcov_mode to KCOV_MODE_TRACE_PC without installing a coverage area. Any instrumented code accepted as task-context coverage in that window dereferences current->kcov_area and crashes. On ARMv5 Versatile PB with CONFIG_KCOV_SELFTEST=y, CONFIG_KCOV_INSTRUMENT_ALL=y and CONFIG_IRQSOFF_TRACER=y, boot hits a NULL pointer fault during the selftest: kcov: running self test Internal error: Oops: 5 [#1] ARM PC is at __sanitizer_cov_trace_pc+0x4c/0x90 Kernel panic - not syncing: Fatal exception A diagnostic run showed the unwanted coverage comes from the IRQs-off tracer callbacks reached from ARM IRQ entry before hardirq context is visible to KCOV: __sanitizer_cov_trace_pc from tracer_hardirqs_off+0x18/0x1cc tracer_hardirqs_off from trace_hardirqs_off+0x34/0x54 trace_hardirqs_off from __irq_svc+0x58/0xb0 __irq_svc from kcov_init+0x7c/0xdc and similarly through tracer_hardirqs_on(). trace_preemptirq.o is already excluded because this tracing path can run from early interrupt code and produce coverage unrelated to syscall inputs. Exclude trace_irqsoff.o as well, instead of requiring users to turn off CONFIG_KCOV_INSTRUMENT_ALL=y, which is the default whole-kernel KCOV mode. With the exclusion in place, the same ARMv5 Versatile PB QEMU test boots through the KCOV selftest and reaches userspace. Tested on ARMv5 Versatile PB QEMU with CONFIG_KCOV_SELFTEST=y, CONFIG_KCOV_INSTRUMENT_ALL=y and CONFIG_IRQSOFF_TRACER=y. Link: https://patch.msgid.link/20260525170428.67211-1-kmehltretter@gmail.com Assisted-by: Codex:gpt-5 Signed-off-by: Karl Mehltretter Signed-off-by: Steven Rostedt --- kernel/trace/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 9b0834134cae..660675e1d426 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -48,9 +48,10 @@ ifdef CONFIG_GCOV_PROFILE_FTRACE GCOV_PROFILE := y endif -# Functions in this file could be invoked from early interrupt -# code and produce random code coverage. +# Functions in these files can run from IRQ entry before hardirq context +# is visible to KCOV, and produce coverage unrelated to syscall inputs. KCOV_INSTRUMENT_trace_preemptirq.o := n +KCOV_INSTRUMENT_trace_irqsoff.o := n CFLAGS_bpf_trace.o := -I$(src) -- cgit v1.2.3 From 9581123304b23049437324038698af9fb56ee663 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 27 May 2026 11:13:01 -0400 Subject: perf/ftrace: Fix WARNING in __unregister_ftrace_function perf_ftrace_function_unregister() unconditionally calls unregister_ftrace_function() without checking whether the ftrace_ops was ever successfully registered. This triggers a WARN_ON in __unregister_ftrace_function() when the ops doesn't have FTRACE_OPS_FL_ENABLED set. This can happen during perf_event_alloc() error cleanup when perf_trace_destroy() is called via __free_event() on an event whose ftrace_ops registration failed or was already torn down by perf_try_init_event()'s err_destroy path. The call path is: perf_event_alloc() error cleanup -> __free_event() -> event->destroy() [tp_perf_event_destroy] -> perf_trace_destroy() -> perf_trace_event_close() -> TRACE_REG_PERF_CLOSE -> perf_ftrace_function_unregister() -> unregister_ftrace_function() -> __unregister_ftrace_function() -> WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)) Fix this by checking FTRACE_OPS_FL_ENABLED before attempting to unregister. If the ops is not enabled, just free the filter and return success. Link: https://patch.msgid.link/20260527111301.2d0d8256@fangorn Signed-off-by: Rik van Riel Signed-off-by: Steven Rostedt --- kernel/trace/trace_event_perf.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index a6bb7577e8c5..5b272856e5ab 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -497,7 +497,17 @@ static int perf_ftrace_function_register(struct perf_event *event) static int perf_ftrace_function_unregister(struct perf_event *event) { struct ftrace_ops *ops = &event->ftrace_ops; - int ret = unregister_ftrace_function(ops); + int ret = 0; + + /* + * Perf will call this unconditionally even if the ops is not + * enabled. The unregister_ftrace_function() will warn if called + * when not enabled. Just bypass the unregistering if ops isn't + * enabled here. + */ + if (ops->flags & FTRACE_OPS_FL_ENABLED) + ret = unregister_ftrace_function(ops); + ftrace_free_filter(ops); return ret; } -- cgit v1.2.3