diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/trace/Kconfig | 22 | ||||
| -rw-r--r-- | kernel/trace/bpf_trace.c | 28 | ||||
| -rw-r--r-- | kernel/trace/fgraph.c | 65 | ||||
| -rw-r--r-- | kernel/trace/fprobe.c | 664 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 203 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 8 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 8 | ||||
| -rw-r--r-- | kernel/trace/trace_fprobe.c | 270 | ||||
| -rw-r--r-- | kernel/trace/trace_functions_graph.c | 47 | ||||
| -rw-r--r-- | kernel/trace/trace_irqsoff.c | 6 | ||||
| -rw-r--r-- | kernel/trace/trace_probe_tmpl.h | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_sched_wakeup.c | 6 | ||||
| -rw-r--r-- | kernel/trace/trace_selftest.c | 11 |
13 files changed, 856 insertions, 484 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 74c2b1d43bb9..d570b8b9c0a9 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -31,9 +31,14 @@ config HAVE_FUNCTION_GRAPH_TRACER help See Documentation/trace/ftrace-design.rst -config HAVE_FUNCTION_GRAPH_RETVAL +config HAVE_FUNCTION_GRAPH_FREGS bool +config HAVE_FTRACE_GRAPH_FUNC + bool + help + True if ftrace_graph_func() is defined. + config HAVE_DYNAMIC_FTRACE bool help @@ -57,6 +62,12 @@ config HAVE_DYNAMIC_FTRACE_WITH_ARGS This allows for use of ftrace_regs_get_argument() and ftrace_regs_get_stack_pointer(). +config HAVE_FTRACE_REGS_HAVING_PT_REGS + bool + help + If this is set, ftrace_regs has pt_regs, thus it can convert to + pt_regs without allocating memory. + config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE bool help @@ -232,7 +243,7 @@ config FUNCTION_GRAPH_TRACER config FUNCTION_GRAPH_RETVAL bool "Kernel Function Graph Return Value" - depends on HAVE_FUNCTION_GRAPH_RETVAL + depends on HAVE_FUNCTION_GRAPH_FREGS depends on FUNCTION_GRAPH_TRACER default n help @@ -296,10 +307,9 @@ config DYNAMIC_FTRACE_WITH_ARGS config FPROBE bool "Kernel Function Probe (fprobe)" - depends on FUNCTION_TRACER - depends on DYNAMIC_FTRACE_WITH_REGS - depends on HAVE_RETHOOK - select RETHOOK + depends on HAVE_FUNCTION_GRAPH_FREGS && HAVE_FTRACE_GRAPH_FUNC + depends on DYNAMIC_FTRACE_WITH_ARGS + select FUNCTION_GRAPH_TRACER default n help This option enables kernel function probe (fprobe) based on ftrace. diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 9f2f65767639..c462aca8b7e6 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2585,6 +2585,20 @@ struct user_syms { char *buf; }; +#ifndef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS +static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs); +#define bpf_kprobe_multi_pt_regs_ptr() this_cpu_ptr(&bpf_kprobe_multi_pt_regs) +#else +#define bpf_kprobe_multi_pt_regs_ptr() (NULL) +#endif + +static unsigned long ftrace_get_entry_ip(unsigned long fentry_ip) +{ + unsigned long ip = ftrace_get_symaddr(fentry_ip); + + return ip ? : fentry_ip; +} + static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 cnt) { unsigned long __user usymbol; @@ -2779,7 +2793,7 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx) static int kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, - unsigned long entry_ip, struct pt_regs *regs, + unsigned long entry_ip, struct ftrace_regs *fregs, bool is_return, void *data) { struct bpf_kprobe_multi_run_ctx run_ctx = { @@ -2791,6 +2805,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, .entry_ip = entry_ip, }; struct bpf_run_ctx *old_run_ctx; + struct pt_regs *regs; int err; if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { @@ -2801,6 +2816,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, migrate_disable(); rcu_read_lock(); + regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr()); old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx); err = bpf_prog_run(link->link.prog, regs); bpf_reset_run_ctx(old_run_ctx); @@ -2814,26 +2830,28 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, static int kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip, - unsigned long ret_ip, struct pt_regs *regs, + unsigned long ret_ip, struct ftrace_regs *fregs, void *data) { struct bpf_kprobe_multi_link *link; int err; link = container_of(fp, struct bpf_kprobe_multi_link, fp); - err = kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs, false, data); + err = kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip), + fregs, false, data); return is_kprobe_session(link->link.prog) ? err : 0; } static void kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip, - unsigned long ret_ip, struct pt_regs *regs, + unsigned long ret_ip, struct ftrace_regs *fregs, void *data) { struct bpf_kprobe_multi_link *link; link = container_of(fp, struct bpf_kprobe_multi_link, fp); - kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs, true, data); + kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip), + fregs, true, data); } static int symbols_cmp_r(const void *a, const void *b, const void *priv) diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 30e3ddc8a8a8..9e6b5a71555b 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -292,13 +292,15 @@ static inline unsigned long make_data_type_val(int idx, int size, int offset) } /* ftrace_graph_entry set to this to tell some archs to run function graph */ -static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops) +static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops, + struct ftrace_regs *fregs) { return 0; } /* ftrace_graph_return set to this to tell some archs to run function graph */ -static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops) +static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops, + struct ftrace_regs *fregs) { } @@ -520,13 +522,15 @@ int __weak ftrace_disable_ftrace_graph_caller(void) #endif int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct ftrace_regs *fregs) { return 0; } static void ftrace_graph_ret_stub(struct ftrace_graph_ret *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct ftrace_regs *fregs) { } @@ -644,14 +648,20 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, #endif /* If the caller does not use ftrace, call this function. */ -int function_graph_enter(unsigned long ret, unsigned long func, - unsigned long frame_pointer, unsigned long *retp) +int function_graph_enter_regs(unsigned long ret, unsigned long func, + unsigned long frame_pointer, unsigned long *retp, + struct ftrace_regs *fregs) { struct ftrace_graph_ent trace; unsigned long bitmap = 0; int offset; + int bit; int i; + bit = ftrace_test_recursion_trylock(func, ret); + if (bit < 0) + return -EBUSY; + trace.func = func; trace.depth = ++current->curr_ret_depth; @@ -663,7 +673,7 @@ int function_graph_enter(unsigned long ret, unsigned long func, if (static_branch_likely(&fgraph_do_direct)) { int save_curr_ret_stack = current->curr_ret_stack; - if (static_call(fgraph_func)(&trace, fgraph_direct_gops)) + if (static_call(fgraph_func)(&trace, fgraph_direct_gops, fregs)) bitmap |= BIT(fgraph_direct_gops->idx); else /* Clear out any saved storage */ @@ -681,7 +691,7 @@ int function_graph_enter(unsigned long ret, unsigned long func, save_curr_ret_stack = current->curr_ret_stack; if (ftrace_ops_test(&gops->ops, func, NULL) && - gops->entryfunc(&trace, gops)) + gops->entryfunc(&trace, gops, fregs)) bitmap |= BIT(i); else /* Clear out any saved storage */ @@ -697,12 +707,13 @@ int function_graph_enter(unsigned long ret, unsigned long func, * flag, set that bit always. */ set_bitmap(current, offset, bitmap | BIT(0)); - + ftrace_test_recursion_unlock(bit); return 0; out_ret: current->curr_ret_stack -= FGRAPH_FRAME_OFFSET + 1; out: current->curr_ret_depth--; + ftrace_test_recursion_unlock(bit); return -EBUSY; } @@ -792,15 +803,12 @@ static struct notifier_block ftrace_suspend_notifier = { .notifier_call = ftrace_suspend_notifier_call, }; -/* fgraph_ret_regs is not defined without CONFIG_FUNCTION_GRAPH_RETVAL */ -struct fgraph_ret_regs; - /* * Send the trace to the ring-buffer. * @return the original return address. */ -static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs, - unsigned long frame_pointer) +static inline unsigned long +__ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointer) { struct ftrace_ret_stack *ret_stack; struct ftrace_graph_ret trace; @@ -819,8 +827,11 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs } trace.rettime = trace_clock_local(); + if (fregs) + ftrace_regs_set_instruction_pointer(fregs, ret); + #ifdef CONFIG_FUNCTION_GRAPH_RETVAL - trace.retval = fgraph_ret_regs_return_value(ret_regs); + trace.retval = ftrace_regs_get_return_value(fregs); #endif bitmap = get_bitmap_bits(current, offset); @@ -828,7 +839,7 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs #ifdef CONFIG_HAVE_STATIC_CALL if (static_branch_likely(&fgraph_do_direct)) { if (test_bit(fgraph_direct_gops->idx, &bitmap)) - static_call(fgraph_retfunc)(&trace, fgraph_direct_gops); + static_call(fgraph_retfunc)(&trace, fgraph_direct_gops, fregs); } else #endif { @@ -838,7 +849,7 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs if (gops == &fgraph_stub) continue; - gops->retfunc(&trace, gops); + gops->retfunc(&trace, gops, fregs); } } @@ -855,14 +866,14 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs } /* - * After all architecures have selected HAVE_FUNCTION_GRAPH_RETVAL, we can - * leave only ftrace_return_to_handler(ret_regs). + * After all architecures have selected HAVE_FUNCTION_GRAPH_FREGS, we can + * leave only ftrace_return_to_handler(fregs). */ -#ifdef CONFIG_HAVE_FUNCTION_GRAPH_RETVAL -unsigned long ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs) +#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FREGS +unsigned long ftrace_return_to_handler(struct ftrace_regs *fregs) { - return __ftrace_return_to_handler(ret_regs, - fgraph_ret_regs_frame_pointer(ret_regs)); + return __ftrace_return_to_handler(fregs, + ftrace_regs_get_frame_pointer(fregs)); } #else unsigned long ftrace_return_to_handler(unsigned long frame_pointer) @@ -1010,7 +1021,8 @@ void ftrace_graph_sleep_time_control(bool enable) * Simply points to ftrace_stub, but with the proper protocol. * Defined by the linker script in linux/vmlinux.lds.h */ -void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops *gops); +void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops *gops, + struct ftrace_regs *fregs); /* The callbacks that hook a function */ trace_func_graph_ret_t ftrace_graph_return = ftrace_stub_graph; @@ -1174,7 +1186,8 @@ void ftrace_graph_exit_task(struct task_struct *t) #ifdef CONFIG_DYNAMIC_FTRACE static int fgraph_pid_func(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct ftrace_regs *fregs) { struct trace_array *tr = gops->ops.private; int pid; @@ -1188,7 +1201,7 @@ static int fgraph_pid_func(struct ftrace_graph_ent *trace, return 0; } - return gops->saved_func(trace, gops); + return gops->saved_func(trace, gops, fregs); } void fgraph_update_pid_func(void) diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 9ff018245840..2560b312ad57 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -8,98 +8,224 @@ #include <linux/fprobe.h> #include <linux/kallsyms.h> #include <linux/kprobes.h> -#include <linux/rethook.h> +#include <linux/list.h> +#include <linux/mutex.h> #include <linux/slab.h> #include <linux/sort.h> +#include <asm/fprobe.h> + #include "trace.h" -struct fprobe_rethook_node { - struct rethook_node node; - unsigned long entry_ip; - unsigned long entry_parent_ip; - char data[]; -}; +#define FPROBE_IP_HASH_BITS 8 +#define FPROBE_IP_TABLE_SIZE (1 << FPROBE_IP_HASH_BITS) -static inline void __fprobe_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct ftrace_regs *fregs) -{ - struct fprobe_rethook_node *fpr; - struct rethook_node *rh = NULL; - struct fprobe *fp; - void *entry_data = NULL; - int ret = 0; +#define FPROBE_HASH_BITS 6 +#define FPROBE_TABLE_SIZE (1 << FPROBE_HASH_BITS) - fp = container_of(ops, struct fprobe, ops); +#define SIZE_IN_LONG(x) ((x + sizeof(long) - 1) >> (sizeof(long) == 8 ? 3 : 2)) - if (fp->exit_handler) { - rh = rethook_try_get(fp->rethook); - if (!rh) { - fp->nmissed++; - return; - } - fpr = container_of(rh, struct fprobe_rethook_node, node); - fpr->entry_ip = ip; - fpr->entry_parent_ip = parent_ip; - if (fp->entry_data_size) - entry_data = fpr->data; +/* + * fprobe_table: hold 'fprobe_hlist::hlist' for checking the fprobe still + * exists. The key is the address of fprobe instance. + * fprobe_ip_table: hold 'fprobe_hlist::array[*]' for searching the fprobe + * instance related to the funciton address. The key is the ftrace IP + * address. + * + * When unregistering the fprobe, fprobe_hlist::fp and fprobe_hlist::array[*].fp + * are set NULL and delete those from both hash tables (by hlist_del_rcu). + * After an RCU grace period, the fprobe_hlist itself will be released. + * + * fprobe_table and fprobe_ip_table can be accessed from either + * - Normal hlist traversal and RCU add/del under 'fprobe_mutex' is held. + * - RCU hlist traversal under disabling preempt + */ +static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE]; +static struct hlist_head fprobe_ip_table[FPROBE_IP_TABLE_SIZE]; +static DEFINE_MUTEX(fprobe_mutex); + +/* + * Find first fprobe in the hlist. It will be iterated twice in the entry + * probe, once for correcting the total required size, the second time is + * calling back the user handlers. + * Thus the hlist in the fprobe_table must be sorted and new probe needs to + * be added *before* the first fprobe. + */ +static struct fprobe_hlist_node *find_first_fprobe_node(unsigned long ip) +{ + struct fprobe_hlist_node *node; + struct hlist_head *head; + + head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)]; + hlist_for_each_entry_rcu(node, head, hlist, + lockdep_is_held(&fprobe_mutex)) { + if (node->addr == ip) + return node; } + return NULL; +} +NOKPROBE_SYMBOL(find_first_fprobe_node); - if (fp->entry_handler) - ret = fp->entry_handler(fp, ip, parent_ip, ftrace_get_regs(fregs), entry_data); +/* Node insertion and deletion requires the fprobe_mutex */ +static void insert_fprobe_node(struct fprobe_hlist_node *node) +{ + unsigned long ip = node->addr; + struct fprobe_hlist_node *next; + struct hlist_head *head; - /* If entry_handler returns !0, nmissed is not counted. */ - if (rh) { - if (ret) - rethook_recycle(rh); - else - rethook_hook(rh, ftrace_get_regs(fregs), true); + lockdep_assert_held(&fprobe_mutex); + + next = find_first_fprobe_node(ip); + if (next) { + hlist_add_before_rcu(&node->hlist, &next->hlist); + return; } + head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)]; + hlist_add_head_rcu(&node->hlist, head); } -static void fprobe_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct ftrace_regs *fregs) +/* Return true if there are synonims */ +static bool delete_fprobe_node(struct fprobe_hlist_node *node) { - struct fprobe *fp; - int bit; + lockdep_assert_held(&fprobe_mutex); - fp = container_of(ops, struct fprobe, ops); - if (fprobe_disabled(fp)) - return; + WRITE_ONCE(node->fp, NULL); + hlist_del_rcu(&node->hlist); + return !!find_first_fprobe_node(node->addr); +} - /* recursion detection has to go before any traceable function and - * all functions before this point should be marked as notrace - */ - bit = ftrace_test_recursion_trylock(ip, parent_ip); - if (bit < 0) { - fp->nmissed++; - return; +/* Check existence of the fprobe */ +static bool is_fprobe_still_exist(struct fprobe *fp) +{ + struct hlist_head *head; + struct fprobe_hlist *fph; + + head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)]; + hlist_for_each_entry_rcu(fph, head, hlist, + lockdep_is_held(&fprobe_mutex)) { + if (fph->fp == fp) + return true; } - __fprobe_handler(ip, parent_ip, ops, fregs); - ftrace_test_recursion_unlock(bit); + return false; +} +NOKPROBE_SYMBOL(is_fprobe_still_exist); + +static int add_fprobe_hash(struct fprobe *fp) +{ + struct fprobe_hlist *fph = fp->hlist_array; + struct hlist_head *head; + + lockdep_assert_held(&fprobe_mutex); + + if (WARN_ON_ONCE(!fph)) + return -EINVAL; + + if (is_fprobe_still_exist(fp)) + return -EEXIST; + + head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)]; + hlist_add_head_rcu(&fp->hlist_array->hlist, head); + return 0; +} + +static int del_fprobe_hash(struct fprobe *fp) +{ + struct fprobe_hlist *fph = fp->hlist_array; + lockdep_assert_held(&fprobe_mutex); + + if (WARN_ON_ONCE(!fph)) + return -EINVAL; + + if (!is_fprobe_still_exist(fp)) + return -ENOENT; + + fph->fp = NULL; + hlist_del_rcu(&fph->hlist); + return 0; } -NOKPROBE_SYMBOL(fprobe_handler); -static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct ftrace_regs *fregs) +#ifdef ARCH_DEFINE_ENCODE_FPROBE_HEADER + +/* The arch should encode fprobe_header info into one unsigned long */ +#define FPROBE_HEADER_SIZE_IN_LONG 1 + +static inline bool write_fprobe_header(unsigned long *stack, + struct fprobe *fp, unsigned int size_words) { + if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD || + !arch_fprobe_header_encodable(fp))) + return false; + + *stack = arch_encode_fprobe_header(fp, size_words); + return true; +} + +static inline void read_fprobe_header(unsigned long *stack, + struct fprobe **fp, unsigned int *size_words) +{ + *fp = arch_decode_fprobe_header_fp(*stack); + *size_words = arch_decode_fprobe_header_size(*stack); +} + +#else + +/* Generic fprobe_header */ +struct __fprobe_header { struct fprobe *fp; - int bit; + unsigned long size_words; +} __packed; - fp = container_of(ops, struct fprobe, ops); - if (fprobe_disabled(fp)) - return; +#define FPROBE_HEADER_SIZE_IN_LONG SIZE_IN_LONG(sizeof(struct __fprobe_header)) - /* recursion detection has to go before any traceable function and - * all functions called before this point should be marked as notrace - */ - bit = ftrace_test_recursion_trylock(ip, parent_ip); - if (bit < 0) { - fp->nmissed++; - return; - } +static inline bool write_fprobe_header(unsigned long *stack, + struct fprobe *fp, unsigned int size_words) +{ + struct __fprobe_header *fph = (struct __fprobe_header *)stack; + + if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD)) + return false; + + fph->fp = fp; + fph->size_words = size_words; + return true; +} + +static inline void read_fprobe_header(unsigned long *stack, + struct fprobe **fp, unsigned int *size_words) +{ + struct __fprobe_header *fph = (struct __fprobe_header *)stack; + + *fp = fph->fp; + *size_words = fph->size_words; +} + +#endif + +/* + * fprobe shadow stack management: + * Since fprobe shares a single fgraph_ops, it needs to share the stack entry + * among the probes on the same function exit. Note that a new probe can be + * registered before a target function is returning, we can not use the hash + * table to find the corresponding probes. Thus the probe address is stored on + * the shadow stack with its entry data size. + * + */ +static inline int __fprobe_handler(unsigned long ip, unsigned long parent_ip, + struct fprobe *fp, struct ftrace_regs *fregs, + void *data) +{ + if (!fp->entry_handler) + return 0; + + return fp->entry_handler(fp, ip, parent_ip, fregs, data); +} +static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip, + struct fprobe *fp, struct ftrace_regs *fregs, + void *data) +{ + int ret; /* * This user handler is shared with other kprobes and is not expected to be * called recursively. So if any other kprobe handler is running, this will @@ -108,44 +234,183 @@ static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip, */ if (unlikely(kprobe_running())) { fp->nmissed++; - goto recursion_unlock; + return 0; } kprobe_busy_begin(); - __fprobe_handler(ip, parent_ip, ops, fregs); + ret = __fprobe_handler(ip, parent_ip, fp, fregs, data); kprobe_busy_end(); - -recursion_unlock: - ftrace_test_recursion_unlock(bit); + return ret; } -static void fprobe_exit_handler(struct rethook_node *rh, void *data, - unsigned long ret_ip, struct pt_regs *regs) +static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops, + struct ftrace_regs *fregs) { - struct fprobe *fp = (struct fprobe *)data; - struct fprobe_rethook_node *fpr; - int bit; + struct fprobe_hlist_node *node, *first; + unsigned long *fgraph_data = NULL; + unsigned long func = trace->func; + unsigned long ret_ip; + int reserved_words; + struct fprobe *fp; + int used, ret; - if (!fp || fprobe_disabled(fp)) - return; + if (WARN_ON_ONCE(!fregs)) + return 0; - fpr = container_of(rh, struct fprobe_rethook_node, node); + first = node = find_first_fprobe_node(func); + if (unlikely(!first)) + return 0; + + reserved_words = 0; + hlist_for_each_entry_from_rcu(node, hlist) { + if (node->addr != func) + break; + fp = READ_ONCE(node->fp); + if (!fp || !fp->exit_handler) + continue; + /* + * Since fprobe can be enabled until the next loop, we ignore the + * fprobe's disabled flag in this loop. + */ + reserved_words += + FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size); + } + node = first; + if (reserved_words) { + fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long)); + if (unlikely(!fgraph_data)) { + hlist_for_each_entry_from_rcu(node, hlist) { + if (node->addr != func) + break; + fp = READ_ONCE(node->fp); + if (fp && !fprobe_disabled(fp)) + fp->nmissed++; + } + return 0; + } + } /* - * we need to assure no calls to traceable functions in-between the - * end of fprobe_handler and the beginning of fprobe_exit_handler. + * TODO: recursion detection has been done in the fgraph. Thus we need + * to add a callback to increment missed counter. */ - bit = ftrace_test_recursion_trylock(fpr->entry_ip, fpr->entry_parent_ip); - if (bit < 0) { - fp->nmissed++; + ret_ip = ftrace_regs_get_return_address(fregs); + used = 0; + hlist_for_each_entry_from_rcu(node, hlist) { + int data_size; + void *data; + + if (node->addr != func) + break; + fp = READ_ONCE(node->fp); + if (!fp || fprobe_disabled(fp)) + continue; + + data_size = fp->entry_data_size; + if (data_size && fp->exit_handler) + data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG; + else + data = NULL; + + if (fprobe_shared_with_kprobes(fp)) + ret = __fprobe_kprobe_handler(func, ret_ip, fp, fregs, data); + else + ret = __fprobe_handler(func, ret_ip, fp, fregs, data); + + /* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */ + if (!ret && fp->exit_handler) { + int size_words = SIZE_IN_LONG(data_size); + + if (write_fprobe_header(&fgraph_data[used], fp, size_words)) + used += FPROBE_HEADER_SIZE_IN_LONG + size_words; + } + } + if (used < reserved_words) + memset(fgraph_data + used, 0, reserved_words - used); + + /* If any exit_handler is set, data must be used. */ + return used != 0; +} +NOKPROBE_SYMBOL(fprobe_entry); + +static void fprobe_return(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops, + struct ftrace_regs *fregs) +{ + unsigned long *fgraph_data = NULL; + unsigned long ret_ip; + struct fprobe *fp; + int size, curr; + int size_words; + + fgraph_data = (unsigned long *)fgraph_retrieve_data(gops->idx, &size); + if (WARN_ON_ONCE(!fgraph_data)) return; + size_words = SIZE_IN_LONG(size); + ret_ip = ftrace_regs_get_instruction_pointer(fregs); + + preempt_disable(); + + curr = 0; + while (size_words > curr) { + read_fprobe_header(&fgraph_data[curr], &fp, &size); + if (!fp) + break; + curr += FPROBE_HEADER_SIZE_IN_LONG; + if (is_fprobe_still_exist(fp) && !fprobe_disabled(fp)) { + if (WARN_ON_ONCE(curr + size > size_words)) + break; + fp->exit_handler(fp, trace->func, ret_ip, fregs, + size ? fgraph_data + curr : NULL); + } + curr += size; } + preempt_enable(); +} +NOKPROBE_SYMBOL(fprobe_return); + +static struct fgraph_ops fprobe_graph_ops = { + .entryfunc = fprobe_entry, + .retfunc = fprobe_return, +}; +static int fprobe_graph_active; - fp->exit_handler(fp, fpr->entry_ip, ret_ip, regs, - fp->entry_data_size ? (void *)fpr->data : NULL); - ftrace_test_recursion_unlock(bit); +/* Add @addrs to the ftrace filter and register fgraph if needed. */ +static int fprobe_graph_add_ips(unsigned long *addrs, int num) +{ + int ret; + + lockdep_assert_held(&fprobe_mutex); + + ret = ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 0, 0); + if (ret) + return ret; + + if (!fprobe_graph_active) { + ret = register_ftrace_graph(&fprobe_graph_ops); + if (WARN_ON_ONCE(ret)) { + ftrace_free_filter(&fprobe_graph_ops.ops); + return ret; + } + } + fprobe_graph_active++; + return 0; +} + +/* Remove @addrs from the ftrace filter and unregister fgraph if possible. */ +static void fprobe_graph_remove_ips(unsigned long *addrs, int num) +{ + lockdep_assert_held(&fprobe_mutex); + + fprobe_graph_active--; + if (!fprobe_graph_active) { + /* Q: should we unregister it ? */ + unregister_ftrace_graph(&fprobe_graph_ops); + return; + } + + ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0); } -NOKPROBE_SYMBOL(fprobe_exit_handler); static int symbols_cmp(const void *a, const void *b) { @@ -175,53 +440,97 @@ static unsigned long *get_ftrace_locations(const char **syms, int num) return ERR_PTR(-ENOENT); } -static void fprobe_init(struct fprobe *fp) -{ - fp->nmissed = 0; - if (fprobe_shared_with_kprobes(fp)) - fp->ops.func = fprobe_kprobe_handler; - else - fp->ops.func = fprobe_handler; - fp->ops.flags |= FTRACE_OPS_FL_SAVE_REGS; -} +struct filter_match_data { + const char *filter; + const char *notfilter; + size_t index; + size_t size; + unsigned long *addrs; +}; -static int fprobe_init_rethook(struct fprobe *fp, int num) +static int filter_match_callback(void *data, const char *name, unsigned long addr) { - int size; + struct filter_match_data *match = data; - if (!fp->exit_handler) { - fp->rethook = NULL; + if (!glob_match(match->filter, name) || + (match->notfilter && glob_match(match->notfilter, name))) return 0; - } - /* Initialize rethook if needed */ - if (fp->nr_maxactive) - num = fp->nr_maxactive; - else - num *= num_possible_cpus() * 2; - if (num <= 0) - return -EINVAL; + if (!ftrace_location(addr)) + return 0; + + if (match->addrs) + match->addrs[match->index] = addr; - size = sizeof(struct fprobe_rethook_node) + fp->entry_data_size; + match->index++; + return match->index == match->size; +} - /* Initialize rethook */ - fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler, size, num); - if (IS_ERR(fp->rethook)) - return PTR_ERR(fp->rethook); +/* + * Make IP list from the filter/no-filter glob patterns. + * Return the number of matched symbols, or -ENOENT. + */ +static int ip_list_from_filter(const char *filter, const char *notfilter, + unsigned long *addrs, size_t size) +{ + struct filter_match_data match = { .filter = filter, .notfilter = notfilter, + .index = 0, .size = size, .addrs = addrs}; + int ret; - return 0; + ret = kallsyms_on_each_symbol(filter_match_callback, &match); + if (ret < 0) + return ret; + ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match); + if (ret < 0) + return ret; + + return match.index ?: -ENOENT; } static void fprobe_fail_cleanup(struct fprobe *fp) { - if (!IS_ERR_OR_NULL(fp->rethook)) { - /* Don't need to cleanup rethook->handler because this is not used. */ - rethook_free(fp->rethook); - fp->rethook = NULL; + kfree(fp->hlist_array); + fp->hlist_array = NULL; +} + +/* Initialize the fprobe data structure. */ +static int fprobe_init(struct fprobe *fp, unsigned long *addrs, int num) +{ + struct fprobe_hlist *hlist_array; + unsigned long addr; + int size, i; + + if (!fp || !addrs || num <= 0) + return -EINVAL; + + size = ALIGN(fp->entry_data_size, sizeof(long)); + if (size > MAX_FPROBE_DATA_SIZE) + return -E2BIG; + fp->entry_data_size = size; + + hlist_array = kzalloc(struct_size(hlist_array, array, num), GFP_KERNEL); + if (!hlist_array) + return -ENOMEM; + + fp->nmissed = 0; + + hlist_array->size = num; + fp->hlist_array = hlist_array; + hlist_array->fp = fp; + for (i = 0; i < num; i++) { + hlist_array->array[i].fp = fp; + addr = ftrace_location(addrs[i]); + if (!addr) { + fprobe_fail_cleanup(fp); + return -ENOENT; + } + hlist_array->array[i].addr = addr; } - ftrace_free_filter(&fp->ops); + return 0; } +#define FPROBE_IPS_MAX INT_MAX + /** * register_fprobe() - Register fprobe to ftrace by pattern. * @fp: A fprobe data structure to be registered. @@ -235,46 +544,24 @@ static void fprobe_fail_cleanup(struct fprobe *fp) */ int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter) { - struct ftrace_hash *hash; - unsigned char *str; - int ret, len; + unsigned long *addrs; + int ret; if (!fp || !filter) return -EINVAL; - fprobe_init(fp); - - len = strlen(filter); - str = kstrdup(filter, GFP_KERNEL); - ret = ftrace_set_filter(&fp->ops, str, len, 0); - kfree(str); - if (ret) + ret = ip_list_from_filter(filter, notfilter, NULL, FPROBE_IPS_MAX); + if (ret < 0) return ret; - if (notfilter) { - len = strlen(notfilter); - str = kstrdup(notfilter, GFP_KERNEL); - ret = ftrace_set_notrace(&fp->ops, str, len, 0); - kfree(str); - if (ret) - goto out; - } - - /* TODO: - * correctly calculate the total number of filtered symbols - * from both filter and notfilter. - */ - hash = rcu_access_pointer(fp->ops.local_hash.filter_hash); - if (WARN_ON_ONCE(!hash)) - goto out; - - ret = fprobe_init_rethook(fp, (int)hash->count); - if (!ret) - ret = register_ftrace_function(&fp->ops); + addrs = kcalloc(ret, sizeof(unsigned long), GFP_KERNEL); + if (!addrs) + return -ENOMEM; + ret = ip_list_from_filter(filter, notfilter, addrs, ret); + if (ret > 0) + ret = register_fprobe_ips(fp, addrs, ret); -out: - if (ret) - fprobe_fail_cleanup(fp); + kfree(addrs); return ret; } EXPORT_SYMBOL_GPL(register_fprobe); @@ -282,7 +569,7 @@ EXPORT_SYMBOL_GPL(register_fprobe); /** * register_fprobe_ips() - Register fprobe to ftrace by address. * @f |
