diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/trace/fgraph.c | 1054 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 688 | ||||
| -rw-r--r-- | kernel/trace/ftrace_internal.h | 18 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 93 | ||||
| -rw-r--r-- | kernel/trace/trace_functions.c | 15 | ||||
| -rw-r--r-- | kernel/trace/trace_functions_graph.c | 96 | ||||
| -rw-r--r-- | kernel/trace/trace_irqsoff.c | 10 | ||||
| -rw-r--r-- | kernel/trace/trace_sched_wakeup.c | 10 | ||||
| -rw-r--r-- | kernel/trace/trace_selftest.c | 259 |
9 files changed, 1894 insertions, 349 deletions
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index a130b2d898f7..fc205ad167a9 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -7,9 +7,11 @@ * * Highly modified by Steven Rostedt (VMware). */ +#include <linux/bits.h> #include <linux/jump_label.h> #include <linux/suspend.h> #include <linux/ftrace.h> +#include <linux/static_call.h> #include <linux/slab.h> #include <trace/events/sched.h> @@ -17,17 +19,447 @@ #include "ftrace_internal.h" #include "trace.h" -#ifdef CONFIG_DYNAMIC_FTRACE -#define ASSIGN_OPS_HASH(opsname, val) \ - .func_hash = val, \ - .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), -#else -#define ASSIGN_OPS_HASH(opsname, val) -#endif +/* + * FGRAPH_FRAME_SIZE: Size in bytes of the meta data on the shadow stack + * FGRAPH_FRAME_OFFSET: Size in long words of the meta data frame + */ +#define FGRAPH_FRAME_SIZE sizeof(struct ftrace_ret_stack) +#define FGRAPH_FRAME_OFFSET DIV_ROUND_UP(FGRAPH_FRAME_SIZE, sizeof(long)) + +/* + * On entry to a function (via function_graph_enter()), a new fgraph frame + * (ftrace_ret_stack) is pushed onto the stack as well as a word that + * holds a bitmask and a type (called "bitmap"). The bitmap is defined as: + * + * bits: 0 - 9 offset in words from the previous ftrace_ret_stack + * + * bits: 10 - 11 Type of storage + * 0 - reserved + * 1 - bitmap of fgraph_array index + * 2 - reserved data + * + * For type with "bitmap of fgraph_array index" (FGRAPH_TYPE_BITMAP): + * bits: 12 - 27 The bitmap of fgraph_ops fgraph_array index + * That is, it's a bitmask of 0-15 (16 bits) + * where if a corresponding ops in the fgraph_array[] + * expects a callback from the return of the function + * it's corresponding bit will be set. + * + * + * The top of the ret_stack (when not empty) will always have a reference + * word that points to the last fgraph frame that was saved. + * + * For reserved data: + * bits: 12 - 17 The size in words that is stored + * bits: 18 - 23 The index of fgraph_array, which shows who is stored + * + * That is, at the end of function_graph_enter, if the first and forth + * fgraph_ops on the fgraph_array[] (index 0 and 3) needs their retfunc called + * on the return of the function being traced, and the forth fgraph_ops + * stored two words of data, this is what will be on the task's shadow + * ret_stack: (the stack grows upward) + * + * ret_stack[SHADOW_STACK_OFFSET] + * | SHADOW_STACK_TASK_VARS(ret_stack)[15] | + * ... + * | SHADOW_STACK_TASK_VARS(ret_stack)[0] | + * ret_stack[SHADOW_STACK_MAX_OFFSET] + * ... + * | | <- task->curr_ret_stack + * +--------------------------------------------+ + * | (3 << 12) | (3 << 10) | FGRAPH_FRAME_OFFSET| + * | *or put another way* | + * | (3 << FGRAPH_DATA_INDEX_SHIFT)| \ | This is for fgraph_ops[3]. + * | ((2 - 1) << FGRAPH_DATA_SHIFT)| \ | The data size is 2 words. + * | (FGRAPH_TYPE_DATA << FGRAPH_TYPE_SHIFT)| \ | + * | (offset2:FGRAPH_FRAME_OFFSET+3) | <- the offset2 is from here + * +--------------------------------------------+ ( It is 4 words from the ret_stack) + * | STORED DATA WORD 2 | + * | STORED DATA WORD 1 | + * +--------------------------------------------+ + * | (9 << 12) | (1 << 10) | FGRAPH_FRAME_OFFSET| + * | *or put another way* | + * | (BIT(3)|BIT(0)) << FGRAPH_INDEX_SHIFT | \ | + * | FGRAPH_TYPE_BITMAP << FGRAPH_TYPE_SHIFT| \ | + * | (offset1:FGRAPH_FRAME_OFFSET) | <- the offset1 is from here + * +--------------------------------------------+ + * | struct ftrace_ret_stack | + * | (stores the saved ret pointer) | <- the offset points here + * +--------------------------------------------+ + * | (X) | (N) | ( N words away from + * | | previous ret_stack) + * ... + * ret_stack[0] + * + * If a backtrace is required, and the real return pointer needs to be + * fetched, then it looks at the task's curr_ret_stack offset, if it + * is greater than zero (reserved, or right before popped), it would mask + * the value by FGRAPH_FRAME_OFFSET_MASK to get the offset of the + * ftrace_ret_stack structure stored on the shadow stack. + */ + +/* + * The following is for the top word on the stack: + * + * FGRAPH_FRAME_OFFSET (0-9) holds the offset delta to the fgraph frame + * FGRAPH_TYPE (10-11) holds the type of word this is. + * (RESERVED or BITMAP) + */ +#define FGRAPH_FRAME_OFFSET_BITS 10 +#define FGRAPH_FRAME_OFFSET_MASK GENMASK(FGRAPH_FRAME_OFFSET_BITS - 1, 0) + +#define FGRAPH_TYPE_BITS 2 +#define FGRAPH_TYPE_MASK GENMASK(FGRAPH_TYPE_BITS - 1, 0) +#define FGRAPH_TYPE_SHIFT FGRAPH_FRAME_OFFSET_BITS + +enum { + FGRAPH_TYPE_RESERVED = 0, + FGRAPH_TYPE_BITMAP = 1, + FGRAPH_TYPE_DATA = 2, +}; + +/* + * For BITMAP type: + * FGRAPH_INDEX (12-27) bits holding the gops index wanting return callback called + */ +#define FGRAPH_INDEX_BITS 16 +#define FGRAPH_INDEX_MASK GENMASK(FGRAPH_INDEX_BITS - 1, 0) +#define FGRAPH_INDEX_SHIFT (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_BITS) + +/* + * For DATA type: + * FGRAPH_DATA (12-17) bits hold the size of data (in words) + * FGRAPH_INDEX (18-23) bits hold the index for which gops->idx the data is for + * + * Note: + * data_size == 0 means 1 word, and 31 (=2^5 - 1) means 32 words. + */ +#define FGRAPH_DATA_BITS 5 +#define FGRAPH_DATA_MASK GENMASK(FGRAPH_DATA_BITS - 1, 0) +#define FGRAPH_DATA_SHIFT (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_BITS) +#define FGRAPH_MAX_DATA_SIZE (sizeof(long) * (1 << FGRAPH_DATA_BITS)) + +#define FGRAPH_DATA_INDEX_BITS 4 +#define FGRAPH_DATA_INDEX_MASK GENMASK(FGRAPH_DATA_INDEX_BITS - 1, 0) +#define FGRAPH_DATA_INDEX_SHIFT (FGRAPH_DATA_SHIFT + FGRAPH_DATA_BITS) + +#define FGRAPH_MAX_INDEX \ + ((FGRAPH_INDEX_SIZE << FGRAPH_DATA_BITS) + FGRAPH_RET_INDEX) + +#define FGRAPH_ARRAY_SIZE FGRAPH_INDEX_BITS + +/* + * SHADOW_STACK_SIZE: The size in bytes of the entire shadow stack + * SHADOW_STACK_OFFSET: The size in long words of the shadow stack + * SHADOW_STACK_MAX_OFFSET: The max offset of the stack for a new frame to be added + */ +#define SHADOW_STACK_SIZE (PAGE_SIZE) +#define SHADOW_STACK_OFFSET (SHADOW_STACK_SIZE / sizeof(long)) +/* Leave on a buffer at the end */ +#define SHADOW_STACK_MAX_OFFSET \ + (SHADOW_STACK_OFFSET - (FGRAPH_FRAME_OFFSET + 1 + FGRAPH_ARRAY_SIZE)) + +/* RET_STACK(): Return the frame from a given @offset from task @t */ +#define RET_STACK(t, offset) ((struct ftrace_ret_stack *)(&(t)->ret_stack[offset])) + +/* + * Each fgraph_ops has a reservered unsigned long at the end (top) of the + * ret_stack to store task specific state. + */ +#define SHADOW_STACK_TASK_VARS(ret_stack) \ + ((unsigned long *)(&(ret_stack)[SHADOW_STACK_OFFSET - FGRAPH_ARRAY_SIZE])) DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph); int ftrace_graph_active; +static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE]; +static unsigned long fgraph_array_bitmask; + +/* LRU index table for fgraph_array */ +static int fgraph_lru_table[FGRAPH_ARRAY_SIZE]; +static int fgraph_lru_next; +static int fgraph_lru_last; + +/* Initialize fgraph_lru_table with unused index */ +static void fgraph_lru_init(void) +{ + int i; + + for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) + fgraph_lru_table[i] = i; +} + +/* Release the used index to the LRU table */ +static int fgraph_lru_release_index(int idx) +{ + if (idx < 0 || idx >= FGRAPH_ARRAY_SIZE || + WARN_ON_ONCE(fgraph_lru_table[fgraph_lru_last] != -1)) + return -1; + + fgraph_lru_table[fgraph_lru_last] = idx; + fgraph_lru_last = (fgraph_lru_last + 1) % FGRAPH_ARRAY_SIZE; + + clear_bit(idx, &fgraph_array_bitmask); + return 0; +} + +/* Allocate a new index from LRU table */ +static int fgraph_lru_alloc_index(void) +{ + int idx = fgraph_lru_table[fgraph_lru_next]; + + /* No id is available */ + if (idx == -1) + return -1; + + fgraph_lru_table[fgraph_lru_next] = -1; + fgraph_lru_next = (fgraph_lru_next + 1) % FGRAPH_ARRAY_SIZE; + + set_bit(idx, &fgraph_array_bitmask); + return idx; +} + +/* Get the offset to the fgraph frame from a ret_stack value */ +static inline int __get_offset(unsigned long val) +{ + return val & FGRAPH_FRAME_OFFSET_MASK; +} + +/* Get the type of word from a ret_stack value */ +static inline int __get_type(unsigned long val) +{ + return (val >> FGRAPH_TYPE_SHIFT) & FGRAPH_TYPE_MASK; +} + +/* Get the data_index for a DATA type ret_stack word */ +static inline int __get_data_index(unsigned long val) +{ + return (val >> FGRAPH_DATA_INDEX_SHIFT) & FGRAPH_DATA_INDEX_MASK; +} + +/* Get the data_size for a DATA type ret_stack word */ +static inline int __get_data_size(unsigned long val) +{ + return ((val >> FGRAPH_DATA_SHIFT) & FGRAPH_DATA_MASK) + 1; +} + +/* Get the word from the ret_stack at @offset */ +static inline unsigned long get_fgraph_entry(struct task_struct *t, int offset) +{ + return t->ret_stack[offset]; +} + +/* Get the FRAME_OFFSET from the word from the @offset on ret_stack */ +static inline int get_frame_offset(struct task_struct *t, int offset) +{ + return __get_offset(t->ret_stack[offset]); +} + +/* For BITMAP type: get the bitmask from the @offset at ret_stack */ +static inline unsigned long +get_bitmap_bits(struct task_struct *t, int offset) +{ + return (t->ret_stack[offset] >> FGRAPH_INDEX_SHIFT) & FGRAPH_INDEX_MASK; +} + +/* Write the bitmap to the ret_stack at @offset (does index, offset and bitmask) */ +static inline void +set_bitmap(struct task_struct *t, int offset, unsigned long bitmap) +{ + t->ret_stack[offset] = (bitmap << FGRAPH_INDEX_SHIFT) | + (FGRAPH_TYPE_BITMAP << FGRAPH_TYPE_SHIFT) | FGRAPH_FRAME_OFFSET; +} + +/* For DATA type: get the data saved under the ret_stack word at @offset */ +static inline void *get_data_type_data(struct task_struct *t, int offset) +{ + unsigned long val = t->ret_stack[offset]; + + if (__get_type(val) != FGRAPH_TYPE_DATA) + return NULL; + offset -= __get_data_size(val); + return (void *)&t->ret_stack[offset]; +} + +/* Create the ret_stack word for a DATA type */ +static inline unsigned long make_data_type_val(int idx, int size, int offset) +{ + return (idx << FGRAPH_DATA_INDEX_SHIFT) | + ((size - 1) << FGRAPH_DATA_SHIFT) | + (FGRAPH_TYPE_DATA << FGRAPH_TYPE_SHIFT) | offset; +} + +/* ftrace_graph_entry set to this to tell some archs to run function graph */ +static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops) +{ + return 0; +} + +/* ftrace_graph_return set to this to tell some archs to run function graph */ +static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops) +{ +} + +static void ret_stack_set_task_var(struct task_struct *t, int idx, long val) +{ + unsigned long *gvals = SHADOW_STACK_TASK_VARS(t->ret_stack); + + gvals[idx] = val; +} + +static unsigned long * +ret_stack_get_task_var(struct task_struct *t, int idx) +{ + unsigned long *gvals = SHADOW_STACK_TASK_VARS(t->ret_stack); + + return &gvals[idx]; +} + +static void ret_stack_init_task_vars(unsigned long *ret_stack) +{ + unsigned long *gvals = SHADOW_STACK_TASK_VARS(ret_stack); + + memset(gvals, 0, sizeof(*gvals) * FGRAPH_ARRAY_SIZE); +} + +/** + * fgraph_reserve_data - Reserve storage on the task's ret_stack + * @idx: The index of fgraph_array + * @size_bytes: The size in bytes to reserve + * + * Reserves space of up to FGRAPH_MAX_DATA_SIZE bytes on the + * task's ret_stack shadow stack, for a given fgraph_ops during + * the entryfunc() call. If entryfunc() returns zero, the storage + * is discarded. An entryfunc() can only call this once per iteration. + * The fgraph_ops retfunc() can retrieve this stored data with + * fgraph_retrieve_data(). + * + * Returns: On success, a pointer to the data on the stack. + * Otherwise, NULL if there's not enough space left on the + * ret_stack for the data, or if fgraph_reserve_data() was called + * more than once for a single entryfunc() call. + */ +void *fgraph_reserve_data(int idx, int size_bytes) +{ + unsigned long val; + void *data; + int curr_ret_stack = current->curr_ret_stack; + int data_size; + + if (size_bytes > FGRAPH_MAX_DATA_SIZE) + return NULL; + + /* Convert the data size to number of longs. */ + data_size = (size_bytes + sizeof(long) - 1) >> (sizeof(long) == 4 ? 2 : 3); + + val = get_fgraph_entry(current, curr_ret_stack - 1); + data = ¤t->ret_stack[curr_ret_stack]; + + curr_ret_stack += data_size + 1; + if (unlikely(curr_ret_stack >= SHADOW_STACK_MAX_OFFSET)) + return NULL; + + val = make_data_type_val(idx, data_size, __get_offset(val) + data_size + 1); + + /* Set the last word to be reserved */ + current->ret_stack[curr_ret_stack - 1] = val; + + /* Make sure interrupts see this */ + barrier(); + current->curr_ret_stack = curr_ret_stack; + /* Again sync with interrupts, and reset reserve */ + current->ret_stack[curr_ret_stack - 1] = val; + + return data; +} + +/** + * fgraph_retrieve_data - Retrieve stored data from fgraph_reserve_data() + * @idx: the index of fgraph_array (fgraph_ops::idx) + * @size_bytes: pointer to retrieved data size. + * + * This is to be called by a fgraph_ops retfunc(), to retrieve data that + * was stored by the fgraph_ops entryfunc() on the function entry. + * That is, this will retrieve the data that was reserved on the + * entry of the function that corresponds to the exit of the function + * that the fgraph_ops retfunc() is called on. + * + * Returns: The stored data from fgraph_reserve_data() called by the + * matching entryfunc() for the retfunc() this is called from. + * Or NULL if there was nothing stored. + */ +void *fgraph_retrieve_data(int idx, int *size_bytes) +{ + int offset = current->curr_ret_stack - 1; + unsigned long val; + + val = get_fgraph_entry(current, offset); + while (__get_type(val) == FGRAPH_TYPE_DATA) { + if (__get_data_index(val) == idx) + goto found; + offset -= __get_data_size(val) + 1; + val = get_fgraph_entry(current, offset); + } + return NULL; +found: + if (size_bytes) + *size_bytes = __get_data_size(val) * sizeof(long); + return get_data_type_data(current, offset); +} + +/** + * fgraph_get_task_var - retrieve a task specific state variable + * @gops: The ftrace_ops that owns the task specific variable + * + * Every registered fgraph_ops has a task state variable + * reserved on the task's ret_stack. This function returns the + * address to that variable. + * + * Returns the address to the fgraph_ops @gops tasks specific + * unsigned long variable. + */ +unsigned long *fgraph_get_task_var(struct fgraph_ops *gops) +{ + return ret_stack_get_task_var(current, gops->idx); +} + +/* + * @offset: The offset into @t->ret_stack to find the ret_stack entry + * @frame_offset: Where to place the offset into @t->ret_stack of that entry + * + * Returns a pointer to the previous ret_stack below @offset or NULL + * when it reaches the bottom of the stack. + * + * Calling this with: + * + * offset = task->curr_ret_stack; + * do { + * ret_stack = get_ret_stack(task, offset, &offset); + * } while (ret_stack); + * + * Will iterate through all the ret_stack entries from curr_ret_stack + * down to the first one. + */ +static inline struct ftrace_ret_stack * +get_ret_stack(struct task_struct *t, int offset, int *frame_offset) +{ + int offs; + + BUILD_BUG_ON(FGRAPH_FRAME_SIZE % sizeof(long)); + + if (unlikely(offset <= 0)) + return NULL; + + offs = get_frame_offset(t, --offset); + if (WARN_ON_ONCE(offs <= 0 || offs > offset)) + return NULL; + + offset -= offs; + + *frame_offset = offset; + return RET_STACK(t, offset); +} + /* Both enabled by default (can be cleared by function_graph tracer flags */ static bool fgraph_sleep_time = true; @@ -51,6 +483,27 @@ int __weak ftrace_disable_ftrace_graph_caller(void) } #endif +int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) +{ + return 0; +} + +static void ftrace_graph_ret_stub(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) +{ +} + +static struct fgraph_ops fgraph_stub = { + .entryfunc = ftrace_graph_entry_stub, + .retfunc = ftrace_graph_ret_stub, +}; + +static struct fgraph_ops *fgraph_direct_gops = &fgraph_stub; +DEFINE_STATIC_CALL(fgraph_func, ftrace_graph_entry_stub); +DEFINE_STATIC_CALL(fgraph_retfunc, ftrace_graph_ret_stub); +static DEFINE_STATIC_KEY_TRUE(fgraph_do_direct); + /** * ftrace_graph_stop - set to permanently disable function graph tracing * @@ -67,10 +520,13 @@ void ftrace_graph_stop(void) /* Add a function return address to the trace stack on thread info.*/ static int ftrace_push_return_trace(unsigned long ret, unsigned long func, - unsigned long frame_pointer, unsigned long *retp) + unsigned long frame_pointer, unsigned long *retp, + int fgraph_idx) { + struct ftrace_ret_stack *ret_stack; unsigned long long calltime; - int index; + unsigned long val; + int offset; if (unlikely(ftrace_graph_is_dead())) return -EBUSY; @@ -78,32 +534,67 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, if (!current->ret_stack) return -EBUSY; + BUILD_BUG_ON(SHADOW_STACK_SIZE % sizeof(long)); + + /* Set val to "reserved" with the delta to the new fgraph frame */ + val = (FGRAPH_TYPE_RESERVED << FGRAPH_TYPE_SHIFT) | FGRAPH_FRAME_OFFSET; + /* * We must make sure the ret_stack is tested before we read * anything else. */ smp_rmb(); - /* The return trace stack is full */ - if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { + /* + * Check if there's room on the shadow stack to fit a fraph frame + * and a bitmap word. + */ + if (current->curr_ret_stack + FGRAPH_FRAME_OFFSET + 1 >= SHADOW_STACK_MAX_OFFSET) { atomic_inc(¤t->trace_overrun); return -EBUSY; } calltime = trace_clock_local(); - index = ++current->curr_ret_stack; + offset = READ_ONCE(current->curr_ret_stack); + ret_stack = RET_STACK(current, offset); + offset += FGRAPH_FRAME_OFFSET; + + /* ret offset = FGRAPH_FRAME_OFFSET ; type = reserved */ + current->ret_stack[offset] = val; + ret_stack->ret = ret; + /* + * The unwinders expect curr_ret_stack to point to either zero + * or an offset where to find the next ret_stack. Even though the + * ret stack might be bogus, we want to write the ret and the + * offset to find the ret_stack before we increment the stack point. + * If an interrupt comes in now before we increment the curr_ret_stack + * it may blow away what we wrote. But that's fine, because the + * offset will still be correct (even though the 'ret' won't be). + * What we worry about is the offset being correct after we increment + * the curr_ret_stack and before we update that offset, as if an + * interrupt comes in and does an unwind stack dump, it will need + * at least a correct offset! + */ + barrier(); + WRITE_ONCE(current->curr_ret_stack, offset + 1); + /* + * This next barrier is to ensure that an interrupt coming in + * will not corrupt what we are about to write. + */ barrier(); - current->ret_stack[index].ret = ret; - current->ret_stack[index].func = func; - current->ret_stack[index].calltime = calltime; + + /* Still keep it reserved even if an interrupt came in */ + current->ret_stack[offset] = val; + + ret_stack->ret = ret; + ret_stack->func = func; + ret_stack->calltime = calltime; #ifdef HAVE_FUNCTION_GRAPH_FP_TEST - current->ret_stack[index].fp = frame_pointer; -#endif -#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR - current->ret_stack[index].retp = retp; + ret_stack->fp = frame_pointer; #endif - return 0; + ret_stack->retp = retp; + return offset; } /* @@ -120,44 +611,85 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, # define MCOUNT_INSN_SIZE 0 #endif +/* If the caller does not use ftrace, call this function. */ int function_graph_enter(unsigned long ret, unsigned long func, unsigned long frame_pointer, unsigned long *retp) { struct ftrace_graph_ent trace; + unsigned long bitmap = 0; + int offset; + int i; trace.func = func; trace.depth = ++current->curr_ret_depth; - if (ftrace_push_return_trace(ret, func, frame_pointer, retp)) + offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0); + if (offset < 0) goto out; - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) +#ifdef CONFIG_HAVE_STATIC_CALL + if (static_branch_likely(&fgraph_do_direct)) { + int save_curr_ret_stack = current->curr_ret_stack; + + if (static_call(fgraph_func)(&trace, fgraph_direct_gops)) + bitmap |= BIT(fgraph_direct_gops->idx); + else + /* Clear out any saved storage */ + current->curr_ret_stack = save_curr_ret_stack; + } else +#endif + { + for_each_set_bit(i, &fgraph_array_bitmask, + sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) { + struct fgraph_ops *gops = READ_ONCE(fgraph_array[i]); + int save_curr_ret_stack; + + if (gops == &fgraph_stub) + continue; + + save_curr_ret_stack = current->curr_ret_stack; + if (ftrace_ops_test(&gops->ops, func, NULL) && + gops->entryfunc(&trace, gops)) + bitmap |= BIT(i); + else + /* Clear out any saved storage */ + current->curr_ret_stack = save_curr_ret_stack; + } + } + + if (!bitmap) goto out_ret; + /* + * Since this function uses fgraph_idx = 0 as a tail-call checking + * flag, set that bit always. + */ + set_bitmap(current, offset, bitmap | BIT(0)); + return 0; out_ret: - current->curr_ret_stack--; + current->curr_ret_stack -= FGRAPH_FRAME_OFFSET + 1; out: current->curr_ret_depth--; return -EBUSY; } /* Retrieve a function return address to the trace stack on thread info.*/ -static void +static struct ftrace_ret_stack * ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, - unsigned long frame_pointer) + unsigned long frame_pointer, int *offset) { - int index; + struct ftrace_ret_stack *ret_stack; - index = current->curr_ret_stack; + ret_stack = get_ret_stack(current, current->curr_ret_stack, offset); - if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) { + if (unlikely(!ret_stack)) { ftrace_graph_stop(); - WARN_ON(1); + WARN(1, "Bad function graph ret_stack pointer: %d", + current->curr_ret_stack); /* Might as well panic, otherwise we have no where to go */ *ret = (unsigned long)panic; - return; + return NULL; } #ifdef HAVE_FUNCTION_GRAPH_FP_TEST @@ -175,30 +707,33 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, * Note, -mfentry does not use frame pointers, and this test * is not needed if CC_USING_FENTRY is set. */ - if (unlikely(current->ret_stack[index].fp != frame_pointer)) { + if (unlikely(ret_stack->fp != frame_pointer)) { ftrace_graph_stop(); WARN(1, "Bad frame pointer: expected %lx, received %lx\n" " from func %ps return to %lx\n", - current->ret_stack[index].fp, + ret_stack->fp, frame_pointer, - (void *)current->ret_stack[index].func, - current->ret_stack[index].ret); + (void *)ret_stack->func, + ret_stack->ret); *ret = (unsigned long)panic; - return; + return NULL; } #endif - *ret = current->ret_stack[index].ret; - trace->func = current->ret_stack[index].func; - trace->calltime = current->ret_stack[index].calltime; + *offset += FGRAPH_FRAME_OFFSET; + *ret = ret_stack->ret; + trace->func = ret_stack->func; + trace->calltime = ret_stack->calltime; trace->overrun = atomic_read(¤t->trace_overrun); - trace->depth = current->curr_ret_depth--; + trace->depth = current->curr_ret_depth; /* * We still want to trace interrupts coming in if * max_depth is set to 1. Make sure the decrement is * seen before ftrace_graph_return. */ barrier(); + + return ret_stack; } /* @@ -236,30 +771,55 @@ struct fgraph_ret_regs; static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs, unsigned long frame_pointer) { + struct ftrace_ret_stack *ret_stack; struct ftrace_graph_ret trace; + unsigned long bitmap; unsigned long ret; + int offset; + int i; + + ret_stack = ftrace_pop_return_trace(&trace, &ret, frame_pointer, &offset); + + if (unlikely(!ret_stack)) { + ftrace_graph_stop(); + WARN_ON(1); + /* Might as well panic. What else to do? */ + return (unsigned long)panic; + } - ftrace_pop_return_trace(&trace, &ret, frame_pointer); + trace.rettime = trace_clock_local(); #ifdef CONFIG_FUNCTION_GRAPH_RETVAL trace.retval = fgraph_ret_regs_return_value(ret_regs); #endif - trace.rettime = trace_clock_local(); - ftrace_graph_return(&trace); + + bitmap = get_bitmap_bits(current, offset); + +#ifdef CONFIG_HAVE_STATIC_CALL + if (static_branch_likely(&fgraph_do_direct)) { + if (test_bit(fgraph_direct_gops->idx, &bitmap)) + static_call(fgraph_retfunc)(&trace, fgraph_direct_gops); + } else +#endif + { + for_each_set_bit(i, &bitmap, sizeof(bitmap) * BITS_PER_BYTE) { + struct fgraph_ops *gops = fgraph_array[i]; + + if (gops == &fgraph_stub) + continue; + + gops->retfunc(&trace, gops); + } + } + /* * The ftrace_graph_return() may still access the current * ret_stack structure, we need to make sure the update of * curr_ret_stack is after that. */ barrier(); - current->curr_ret_stack--; - - if (unlikely(!ret)) { - ftrace_graph_stop(); - WARN_ON(1); - /* Might as well panic. What else to do? */ - ret = (unsigned long)panic; - } + current->curr_ret_stack = offset - FGRAPH_FRAME_OFFSET; + current->curr_ret_depth--; return ret; } @@ -282,7 +842,7 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) /** * ftrace_graph_get_ret_stack - return the entry of the shadow stack - * @task: The task to read the shadow stack from + * @task: The task to read the shadow stack from. * @idx: Index down the shadow stack * * Return the ret_struct on the shadow stack of the @task at the @@ -294,104 +854,116 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) struct ftrace_ret_stack * ftrace_graph_get_ret_stack(struct task_struct *task, int idx) { - idx = task->curr_ret_stack - idx; + struct ftrace_ret_stack *ret_stack = NULL; + int offset = task->curr_ret_stack; - if (idx >= 0 && idx <= task->curr_ret_stack) - return &task->ret_stack[idx]; + if (offset < 0) + return NULL; - return NULL; + do { + ret_stack = get_ret_stack(task, offset, &offset); + } while (ret_stack && --idx >= 0); + + return ret_stack; } /** - * ftrace_graph_ret_addr - convert a potentially modified stack return address - * to its original value + * ftrace_graph_ret_addr - return the original value of the return address + * @task: The task the unwinder is being executed on + * @idx: An initialized pointer to the next stack index to use + * @ret: The current return address (likely pointing to return_handler) + * @retp: The address on the stack of the current return location * * This function can be called by stack unwinding code to convert a found stack - * return address ('ret') to its original value, in case the function graph + * return address (@ret) to its original value, in case the function graph * tracer has modified it to be 'return_to_handler'. If the address hasn't - * been modified, the unchanged value of 'ret' is returned. + * been modified, the unchanged value of @ret is returned. * - * 'idx' is a state variable which should be initialized by the caller to zero - * before the first call. + * @idx holds the last index used to know where to start from. It should be + * initialized to zero for the first iteration as that will mean to start + * at the top of the shadow stack. If the location is found, this pointer + * will be assigned that location so that if called again, it will continue + * where it left off. * - * 'retp' is a pointer to the return address on the stack. It's ignored if - * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined. + * @retp is a pointer to the return address on the stack. */ -#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp) { - int index = task->curr_ret_stack; - int i; + struct ftrace_ret_stack *ret_stack; + unsigned long return_handler = (unsigned long)dereference_kernel_function_descriptor(return_to_handler); + int i = task->curr_ret_stack; - if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler)) + if (ret != return_handler) return ret; - if (index < 0) + if (!idx) return ret; - for (i = 0; i <= index; i++) - if (task->ret_stack[i].retp == retp) - return task->ret_stack[i].ret; + i = *idx ? : task->curr_ret_stack; + while (i > 0) { + ret_stack = get_ret_stack(current, i, &i); + if (!ret_stack) + break; + /* + * For the tail-call, there would be 2 or more ftrace_ret_stacks on + * the ret_stack, which records "return_to_handler" as the return + * address except for the last one. + * But on the real stack, there should be 1 entry because tail-call + * reuses the return address on the stack and jump to the next function. + * Thus we will continue to find real return address. + */ + if (ret_stack->retp == retp && + ret_stack->ret != return_handler) { + *idx = i; + return ret_stack->ret; + } + } return ret; } -#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ -unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, - unsigned long ret, unsigned long *retp) -{ - int task_idx; - - if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler)) - return ret; - - task_idx = task->curr_ret_stack; - - if (!task->ret_stack || task_idx < *idx) - return ret; - - task_idx -= *idx; - (*idx)++; - - return task->ret_stack[task_idx].ret; -} -#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ static struct ftrace_ops graph_ops = { .func = ftrace_graph_func, - .flags = FTRACE_OPS_FL_INITIALIZED | - FTRACE_OPS_FL_PID | - FTRACE_OPS_GRAPH_STUB, + .flags = FTRACE_OPS_GRAPH_STUB, #ifdef FTRACE_GRAPH_TRAMP_ADDR .trampoline = FTRACE_GRAPH_TRAMP_ADDR, /* trampoline_size is only needed for dynamically allocated tramps */ #endif - ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash) }; -void ftrace_graph_sleep_time_control(bool enable) +void fgraph_init_ops(struct ftrace_ops *dst_ops, + struct ftrace_ops *src_ops) { - fgraph_sleep_time = enable; + dst_ops->flags = FTRACE_OPS_FL_PID | FTRACE_OPS_GRAPH_STUB; + +#ifdef CONFIG_DYNAMIC_FTRACE + if (src_ops) { + dst_ops->func_hash = &src_ops->local_hash; + mutex_init(&dst_ops->local_hash.regex_lock); + INIT_LIST_HEAD(&dst_ops->subop_list); + dst_ops->flags |= FTRACE_OPS_FL_INITIALIZED; + } +#endif } -int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) +void ftrace_graph_sleep_time_control(bool enable) { - return 0; + fgraph_sleep_time = enable; } /* * Simply points to ftrace_stub, but with the proper protocol. * Defined by the linker script in linux/vmlinux.lds.h */ -extern void ftrace_stub_graph(struct ftrace_graph_ret *); +void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops *gops); /* The callbacks that hook a function */ trace_func_graph_ret_t ftrace_graph_return = ftrace_stub_graph; trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; -static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ -static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) +static int alloc_retstack_tasklist(unsigned long **ret_stack_list) { int i; int ret = 0; @@ -399,10 +971,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) struct task_struct *g, *t; for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) { - ret_stack_list[i] = - kmalloc_array(FTRACE_RETFUNC_DEPTH, - sizeof(struct ftrace_ret_stack), - GFP_KERNEL); + ret_stack_list[i] = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack_list[i]) { start = 0; end = i; @@ -420,9 +989,10 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) if (t->ret_stack == NULL) { atomic_set(&t->trace_overrun, 0); - t->curr_ret_stack = -1; + ret_stack_init_task_vars(ret_stack_list[start]); + t->curr_ret_stack = 0; t->curr_ret_depth = -1; - /* Make sure the tasks see the -1 first: */ + /* Make sure the tasks see the 0 first: */ smp_wmb(); t->ret_stack = ret_stack_list[start++]; } @@ -442,8 +1012,9 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt, struct task_struct *next, unsigned int prev_state) { + struct ftrace_ret_stack *ret_stack; unsigned long long timestamp; - int index; + int offs |
