• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/components/opensource/linux/linux-2.6.36/kernel/trace/
1/*
2 * Kprobes-based tracing events
3 *
4 * Created by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18 */
19
20#include <linux/module.h>
21#include <linux/uaccess.h>
22#include <linux/kprobes.h>
23#include <linux/seq_file.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/debugfs.h>
27#include <linux/types.h>
28#include <linux/string.h>
29#include <linux/ctype.h>
30#include <linux/ptrace.h>
31#include <linux/perf_event.h>
32#include <linux/stringify.h>
33#include <linux/limits.h>
34#include <linux/uaccess.h>
35#include <asm/bitsperlong.h>
36
37#include "trace.h"
38#include "trace_output.h"
39
40#define MAX_TRACE_ARGS 128
41#define MAX_ARGSTR_LEN 63
42#define MAX_EVENT_NAME_LEN 64
43#define MAX_STRING_SIZE PATH_MAX
44#define KPROBE_EVENT_SYSTEM "kprobes"
45
46/* Reserved field names */
47#define FIELD_STRING_IP "__probe_ip"
48#define FIELD_STRING_RETIP "__probe_ret_ip"
49#define FIELD_STRING_FUNC "__probe_func"
50
51const char *reserved_field_names[] = {
52	"common_type",
53	"common_flags",
54	"common_preempt_count",
55	"common_pid",
56	"common_tgid",
57	"common_lock_depth",
58	FIELD_STRING_IP,
59	FIELD_STRING_RETIP,
60	FIELD_STRING_FUNC,
61};
62
63/* Printing function type */
64typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *,
65				 void *);
66#define PRINT_TYPE_FUNC_NAME(type)	print_type_##type
67#define PRINT_TYPE_FMT_NAME(type)	print_type_format_##type
68
69/* Printing  in basic type function template */
70#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast)			\
71static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s,	\
72						const char *name,	\
73						void *data, void *ent)\
74{									\
75	return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
76}									\
77static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
78
79DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
80DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
81DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
82DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
83DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
84DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
85DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
86DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
87
88/* data_rloc: data relative location, compatible with u32 */
89#define make_data_rloc(len, roffs)	\
90	(((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
91#define get_rloc_len(dl)	((u32)(dl) >> 16)
92#define get_rloc_offs(dl)	((u32)(dl) & 0xffff)
93
94static inline void *get_rloc_data(u32 *dl)
95{
96	return (u8 *)dl + get_rloc_offs(*dl);
97}
98
99/* For data_loc conversion */
100static inline void *get_loc_data(u32 *dl, void *ent)
101{
102	return (u8 *)ent + get_rloc_offs(*dl);
103}
104
105/*
106 * Convert data_rloc to data_loc:
107 *  data_rloc stores the offset from data_rloc itself, but data_loc
108 *  stores the offset from event entry.
109 */
110#define convert_rloc_to_loc(dl, offs)	((u32)(dl) + (offs))
111
112/* For defining macros, define string/string_size types */
113typedef u32 string;
114typedef u32 string_size;
115
116/* Print type function for string type */
117static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
118						  const char *name,
119						  void *data, void *ent)
120{
121	int len = *(u32 *)data >> 16;
122
123	if (!len)
124		return trace_seq_printf(s, " %s=(fault)", name);
125	else
126		return trace_seq_printf(s, " %s=\"%s\"", name,
127					(const char *)get_loc_data(data, ent));
128}
129static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
130
131/* Data fetch function type */
132typedef	void (*fetch_func_t)(struct pt_regs *, void *, void *);
133
134struct fetch_param {
135	fetch_func_t	fn;
136	void *data;
137};
138
139static __kprobes void call_fetch(struct fetch_param *fprm,
140				 struct pt_regs *regs, void *dest)
141{
142	return fprm->fn(regs, fprm->data, dest);
143}
144
145#define FETCH_FUNC_NAME(method, type)	fetch_##method##_##type
146/*
147 * Define macro for basic types - we don't need to define s* types, because
148 * we have to care only about bitwidth at recording time.
149 */
150#define DEFINE_BASIC_FETCH_FUNCS(method) \
151DEFINE_FETCH_##method(u8)		\
152DEFINE_FETCH_##method(u16)		\
153DEFINE_FETCH_##method(u32)		\
154DEFINE_FETCH_##method(u64)
155
156#define CHECK_FETCH_FUNCS(method, fn)			\
157	(((FETCH_FUNC_NAME(method, u8) == fn) ||	\
158	  (FETCH_FUNC_NAME(method, u16) == fn) ||	\
159	  (FETCH_FUNC_NAME(method, u32) == fn) ||	\
160	  (FETCH_FUNC_NAME(method, u64) == fn) ||	\
161	  (FETCH_FUNC_NAME(method, string) == fn) ||	\
162	  (FETCH_FUNC_NAME(method, string_size) == fn)) \
163	 && (fn != NULL))
164
165/* Data fetch function templates */
166#define DEFINE_FETCH_reg(type)						\
167static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs,	\
168					void *offset, void *dest)	\
169{									\
170	*(type *)dest = (type)regs_get_register(regs,			\
171				(unsigned int)((unsigned long)offset));	\
172}
173DEFINE_BASIC_FETCH_FUNCS(reg)
174/* No string on the register */
175#define fetch_reg_string NULL
176#define fetch_reg_string_size NULL
177
178#define DEFINE_FETCH_stack(type)					\
179static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
180					  void *offset, void *dest)	\
181{									\
182	*(type *)dest = (type)regs_get_kernel_stack_nth(regs,		\
183				(unsigned int)((unsigned long)offset));	\
184}
185DEFINE_BASIC_FETCH_FUNCS(stack)
186/* No string on the stack entry */
187#define fetch_stack_string NULL
188#define fetch_stack_string_size NULL
189
190#define DEFINE_FETCH_retval(type)					\
191static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
192					  void *dummy, void *dest)	\
193{									\
194	*(type *)dest = (type)regs_return_value(regs);			\
195}
196DEFINE_BASIC_FETCH_FUNCS(retval)
197/* No string on the retval */
198#define fetch_retval_string NULL
199#define fetch_retval_string_size NULL
200
201#define DEFINE_FETCH_memory(type)					\
202static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
203					  void *addr, void *dest)	\
204{									\
205	type retval;							\
206	if (probe_kernel_address(addr, retval))				\
207		*(type *)dest = 0;					\
208	else								\
209		*(type *)dest = retval;					\
210}
211DEFINE_BASIC_FETCH_FUNCS(memory)
212/*
213 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
214 * length and relative data location.
215 */
216static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
217						      void *addr, void *dest)
218{
219	long ret;
220	int maxlen = get_rloc_len(*(u32 *)dest);
221	u8 *dst = get_rloc_data(dest);
222	u8 *src = addr;
223	mm_segment_t old_fs = get_fs();
224	if (!maxlen)
225		return;
226	/*
227	 * Try to get string again, since the string can be changed while
228	 * probing.
229	 */
230	set_fs(KERNEL_DS);
231	pagefault_disable();
232	do
233		ret = __copy_from_user_inatomic(dst++, src++, 1);
234	while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
235	dst[-1] = '\0';
236	pagefault_enable();
237	set_fs(old_fs);
238
239	if (ret < 0) {	/* Failed to fetch string */
240		((u8 *)get_rloc_data(dest))[0] = '\0';
241		*(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
242	} else
243		*(u32 *)dest = make_data_rloc(src - (u8 *)addr,
244					      get_rloc_offs(*(u32 *)dest));
245}
246/* Return the length of string -- including null terminal byte */
247static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
248							void *addr, void *dest)
249{
250	int ret, len = 0;
251	u8 c;
252	mm_segment_t old_fs = get_fs();
253
254	set_fs(KERNEL_DS);
255	pagefault_disable();
256	do {
257		ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
258		len++;
259	} while (c && ret == 0 && len < MAX_STRING_SIZE);
260	pagefault_enable();
261	set_fs(old_fs);
262
263	if (ret < 0)	/* Failed to check the length */
264		*(u32 *)dest = 0;
265	else
266		*(u32 *)dest = len;
267}
268
269/* Memory fetching by symbol */
270struct symbol_cache {
271	char *symbol;
272	long offset;
273	unsigned long addr;
274};
275
276static unsigned long update_symbol_cache(struct symbol_cache *sc)
277{
278	sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
279	if (sc->addr)
280		sc->addr += sc->offset;
281	return sc->addr;
282}
283
284static void free_symbol_cache(struct symbol_cache *sc)
285{
286	kfree(sc->symbol);
287	kfree(sc);
288}
289
290static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
291{
292	struct symbol_cache *sc;
293
294	if (!sym || strlen(sym) == 0)
295		return NULL;
296	sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
297	if (!sc)
298		return NULL;
299
300	sc->symbol = kstrdup(sym, GFP_KERNEL);
301	if (!sc->symbol) {
302		kfree(sc);
303		return NULL;
304	}
305	sc->offset = offset;
306
307	update_symbol_cache(sc);
308	return sc;
309}
310
311#define DEFINE_FETCH_symbol(type)					\
312static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
313					  void *data, void *dest)	\
314{									\
315	struct symbol_cache *sc = data;					\
316	if (sc->addr)							\
317		fetch_memory_##type(regs, (void *)sc->addr, dest);	\
318	else								\
319		*(type *)dest = 0;					\
320}
321DEFINE_BASIC_FETCH_FUNCS(symbol)
322DEFINE_FETCH_symbol(string)
323DEFINE_FETCH_symbol(string_size)
324
325/* Dereference memory access function */
326struct deref_fetch_param {
327	struct fetch_param orig;
328	long offset;
329};
330
331#define DEFINE_FETCH_deref(type)					\
332static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
333					    void *data, void *dest)	\
334{									\
335	struct deref_fetch_param *dprm = data;				\
336	unsigned long addr;						\
337	call_fetch(&dprm->orig, regs, &addr);				\
338	if (addr) {							\
339		addr += dprm->offset;					\
340		fetch_memory_##type(regs, (void *)addr, dest);		\
341	} else								\
342		*(type *)dest = 0;					\
343}
344DEFINE_BASIC_FETCH_FUNCS(deref)
345DEFINE_FETCH_deref(string)
346DEFINE_FETCH_deref(string_size)
347
348static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
349{
350	if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
351		free_deref_fetch_param(data->orig.data);
352	else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
353		free_symbol_cache(data->orig.data);
354	kfree(data);
355}
356
357/* Default (unsigned long) fetch type */
358#define __DEFAULT_FETCH_TYPE(t) u##t
359#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
360#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
361#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
362
363/* Fetch types */
364enum {
365	FETCH_MTD_reg = 0,
366	FETCH_MTD_stack,
367	FETCH_MTD_retval,
368	FETCH_MTD_memory,
369	FETCH_MTD_symbol,
370	FETCH_MTD_deref,
371	FETCH_MTD_END,
372};
373
374#define ASSIGN_FETCH_FUNC(method, type)	\
375	[FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
376
377#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype)	\
378	{.name = _name,				\
379	 .size = _size,					\
380	 .is_signed = sign,				\
381	 .print = PRINT_TYPE_FUNC_NAME(ptype),		\
382	 .fmt = PRINT_TYPE_FMT_NAME(ptype),		\
383	 .fmttype = _fmttype,				\
384	 .fetch = {					\
385ASSIGN_FETCH_FUNC(reg, ftype),				\
386ASSIGN_FETCH_FUNC(stack, ftype),			\
387ASSIGN_FETCH_FUNC(retval, ftype),			\
388ASSIGN_FETCH_FUNC(memory, ftype),			\
389ASSIGN_FETCH_FUNC(symbol, ftype),			\
390ASSIGN_FETCH_FUNC(deref, ftype),			\
391	  }						\
392	}
393
394#define ASSIGN_FETCH_TYPE(ptype, ftype, sign)			\
395	__ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
396
397#define FETCH_TYPE_STRING 0
398#define FETCH_TYPE_STRSIZE 1
399
400/* Fetch type information table */
401static const struct fetch_type {
402	const char	*name;		/* Name of type */
403	size_t		size;		/* Byte size of type */
404	int		is_signed;	/* Signed flag */
405	print_type_func_t	print;	/* Print functions */
406	const char	*fmt;		/* Fromat string */
407	const char	*fmttype;	/* Name in format file */
408	/* Fetch functions */
409	fetch_func_t	fetch[FETCH_MTD_END];
410} fetch_type_table[] = {
411	/* Special types */
412	[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
413					sizeof(u32), 1, "__data_loc char[]"),
414	[FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
415					string_size, sizeof(u32), 0, "u32"),
416	/* Basic types */
417	ASSIGN_FETCH_TYPE(u8,  u8,  0),
418	ASSIGN_FETCH_TYPE(u16, u16, 0),
419	ASSIGN_FETCH_TYPE(u32, u32, 0),
420	ASSIGN_FETCH_TYPE(u64, u64, 0),
421	ASSIGN_FETCH_TYPE(s8,  u8,  1),
422	ASSIGN_FETCH_TYPE(s16, u16, 1),
423	ASSIGN_FETCH_TYPE(s32, u32, 1),
424	ASSIGN_FETCH_TYPE(s64, u64, 1),
425};
426
427static const struct fetch_type *find_fetch_type(const char *type)
428{
429	int i;
430
431	if (!type)
432		type = DEFAULT_FETCH_TYPE_STR;
433
434	for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
435		if (strcmp(type, fetch_type_table[i].name) == 0)
436			return &fetch_type_table[i];
437	return NULL;
438}
439
440/* Special function : only accept unsigned long */
441static __kprobes void fetch_stack_address(struct pt_regs *regs,
442					  void *dummy, void *dest)
443{
444	*(unsigned long *)dest = kernel_stack_pointer(regs);
445}
446
447static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
448					    fetch_func_t orig_fn)
449{
450	int i;
451
452	if (type != &fetch_type_table[FETCH_TYPE_STRING])
453		return NULL;	/* Only string type needs size function */
454	for (i = 0; i < FETCH_MTD_END; i++)
455		if (type->fetch[i] == orig_fn)
456			return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
457
458	WARN_ON(1);	/* This should not happen */
459	return NULL;
460}
461
462/**
463 * Kprobe event core functions
464 */
465
466struct probe_arg {
467	struct fetch_param	fetch;
468	struct fetch_param	fetch_size;
469	unsigned int		offset;	/* Offset from argument entry */
470	const char		*name;	/* Name of this argument */
471	const char		*comm;	/* Command of this argument */
472	const struct fetch_type	*type;	/* Type of this argument */
473};
474
475/* Flags for trace_probe */
476#define TP_FLAG_TRACE	1
477#define TP_FLAG_PROFILE	2
478
479struct trace_probe {
480	struct list_head	list;
481	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
482	unsigned long 		nhit;
483	unsigned int		flags;	/* For TP_FLAG_* */
484	const char		*symbol;	/* symbol name */
485	struct ftrace_event_class	class;
486	struct ftrace_event_call	call;
487	ssize_t			size;		/* trace entry size */
488	unsigned int		nr_args;
489	struct probe_arg	args[];
490};
491
492#define SIZEOF_TRACE_PROBE(n)			\
493	(offsetof(struct trace_probe, args) +	\
494	(sizeof(struct probe_arg) * (n)))
495
496
497static __kprobes int probe_is_return(struct trace_probe *tp)
498{
499	return tp->rp.handler != NULL;
500}
501
502static __kprobes const char *probe_symbol(struct trace_probe *tp)
503{
504	return tp->symbol ? tp->symbol : "unknown";
505}
506
507static int register_probe_event(struct trace_probe *tp);
508static void unregister_probe_event(struct trace_probe *tp);
509
510static DEFINE_MUTEX(probe_lock);
511static LIST_HEAD(probe_list);
512
513static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
514static int kretprobe_dispatcher(struct kretprobe_instance *ri,
515				struct pt_regs *regs);
516
517/* Check the name is good for event/group/fields */
518static int is_good_name(const char *name)
519{
520	if (!isalpha(*name) && *name != '_')
521		return 0;
522	while (*++name != '\0') {
523		if (!isalpha(*name) && !isdigit(*name) && *name != '_')
524			return 0;
525	}
526	return 1;
527}
528
529/*
530 * Allocate new trace_probe and initialize it (including kprobes).
531 */
532static struct trace_probe *alloc_trace_probe(const char *group,
533					     const char *event,
534					     void *addr,
535					     const char *symbol,
536					     unsigned long offs,
537					     int nargs, int is_return)
538{
539	struct trace_probe *tp;
540	int ret = -ENOMEM;
541
542	tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
543	if (!tp)
544		return ERR_PTR(ret);
545
546	if (symbol) {
547		tp->symbol = kstrdup(symbol, GFP_KERNEL);
548		if (!tp->symbol)
549			goto error;
550		tp->rp.kp.symbol_name = tp->symbol;
551		tp->rp.kp.offset = offs;
552	} else
553		tp->rp.kp.addr = addr;
554
555	if (is_return)
556		tp->rp.handler = kretprobe_dispatcher;
557	else
558		tp->rp.kp.pre_handler = kprobe_dispatcher;
559
560	if (!event || !is_good_name(event)) {
561		ret = -EINVAL;
562		goto error;
563	}
564
565	tp->call.class = &tp->class;
566	tp->call.name = kstrdup(event, GFP_KERNEL);
567	if (!tp->call.name)
568		goto error;
569
570	if (!group || !is_good_name(group)) {
571		ret = -EINVAL;
572		goto error;
573	}
574
575	tp->class.system = kstrdup(group, GFP_KERNEL);
576	if (!tp->class.system)
577		goto error;
578
579	INIT_LIST_HEAD(&tp->list);
580	return tp;
581error:
582	kfree(tp->call.name);
583	kfree(tp->symbol);
584	kfree(tp);
585	return ERR_PTR(ret);
586}
587
588static void free_probe_arg(struct probe_arg *arg)
589{
590	if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
591		free_deref_fetch_param(arg->fetch.data);
592	else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
593		free_symbol_cache(arg->fetch.data);
594	kfree(arg->name);
595	kfree(arg->comm);
596}
597
598static void free_trace_probe(struct trace_probe *tp)
599{
600	int i;
601
602	for (i = 0; i < tp->nr_args; i++)
603		free_probe_arg(&tp->args[i]);
604
605	kfree(tp->call.class->system);
606	kfree(tp->call.name);
607	kfree(tp->symbol);
608	kfree(tp);
609}
610
611static struct trace_probe *find_probe_event(const char *event,
612					    const char *group)
613{
614	struct trace_probe *tp;
615
616	list_for_each_entry(tp, &probe_list, list)
617		if (strcmp(tp->call.name, event) == 0 &&
618		    strcmp(tp->call.class->system, group) == 0)
619			return tp;
620	return NULL;
621}
622
623/* Unregister a trace_probe and probe_event: call with locking probe_lock */
624static void unregister_trace_probe(struct trace_probe *tp)
625{
626	if (probe_is_return(tp))
627		unregister_kretprobe(&tp->rp);
628	else
629		unregister_kprobe(&tp->rp.kp);
630	list_del(&tp->list);
631	unregister_probe_event(tp);
632}
633
634/* Register a trace_probe and probe_event */
635static int register_trace_probe(struct trace_probe *tp)
636{
637	struct trace_probe *old_tp;
638	int ret;
639
640	mutex_lock(&probe_lock);
641
642	/* register as an event */
643	old_tp = find_probe_event(tp->call.name, tp->call.class->system);
644	if (old_tp) {
645		/* delete old event */
646		unregister_trace_probe(old_tp);
647		free_trace_probe(old_tp);
648	}
649	ret = register_probe_event(tp);
650	if (ret) {
651		pr_warning("Faild to register probe event(%d)\n", ret);
652		goto end;
653	}
654
655	tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
656	if (probe_is_return(tp))
657		ret = register_kretprobe(&tp->rp);
658	else
659		ret = register_kprobe(&tp->rp.kp);
660
661	if (ret) {
662		pr_warning("Could not insert probe(%d)\n", ret);
663		if (ret == -EILSEQ) {
664			pr_warning("Probing address(0x%p) is not an "
665				   "instruction boundary.\n",
666				   tp->rp.kp.addr);
667			ret = -EINVAL;
668		}
669		unregister_probe_event(tp);
670	} else
671		list_add_tail(&tp->list, &probe_list);
672end:
673	mutex_unlock(&probe_lock);
674	return ret;
675}
676
677/* Split symbol and offset. */
678static int split_symbol_offset(char *symbol, unsigned long *offset)
679{
680	char *tmp;
681	int ret;
682
683	if (!offset)
684		return -EINVAL;
685
686	tmp = strchr(symbol, '+');
687	if (tmp) {
688		/* skip sign because strict_strtol doesn't accept '+' */
689		ret = strict_strtoul(tmp + 1, 0, offset);
690		if (ret)
691			return ret;
692		*tmp = '\0';
693	} else
694		*offset = 0;
695	return 0;
696}
697
698#define PARAM_MAX_ARGS 16
699#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
700
701static int parse_probe_vars(char *arg, const struct fetch_type *t,
702			    struct fetch_param *f, int is_return)
703{
704	int ret = 0;
705	unsigned long param;
706
707	if (strcmp(arg, "retval") == 0) {
708		if (is_return)
709			f->fn = t->fetch[FETCH_MTD_retval];
710		else
711			ret = -EINVAL;
712	} else if (strncmp(arg, "stack", 5) == 0) {
713		if (arg[5] == '\0') {
714			if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
715				f->fn = fetch_stack_address;
716			else
717				ret = -EINVAL;
718		} else if (isdigit(arg[5])) {
719			ret = strict_strtoul(arg + 5, 10, &param);
720			if (ret || param > PARAM_MAX_STACK)
721				ret = -EINVAL;
722			else {
723				f->fn = t->fetch[FETCH_MTD_stack];
724				f->data = (void *)param;
725			}
726		} else
727			ret = -EINVAL;
728	} else
729		ret = -EINVAL;
730	return ret;
731}
732
733/* Recursive argument parser */
734static int __parse_probe_arg(char *arg, const struct fetch_type *t,
735			     struct fetch_param *f, int is_return)
736{
737	int ret = 0;
738	unsigned long param;
739	long offset;
740	char *tmp;
741
742	switch (arg[0]) {
743	case '$':
744		ret = parse_probe_vars(arg + 1, t, f, is_return);
745		break;
746	case '%':	/* named register */
747		ret = regs_query_register_offset(arg + 1);
748		if (ret >= 0) {
749			f->fn = t->fetch[FETCH_MTD_reg];
750			f->data = (void *)(unsigned long)ret;
751			ret = 0;
752		}
753		break;
754	case '@':	/* memory or symbol */
755		if (isdigit(arg[1])) {
756			ret = strict_strtoul(arg + 1, 0, &param);
757			if (ret)
758				break;
759			f->fn = t->fetch[FETCH_MTD_memory];
760			f->data = (void *)param;
761		} else {
762			ret = split_symbol_offset(arg + 1, &offset);
763			if (ret)
764				break;
765			f->data = alloc_symbol_cache(arg + 1, offset);
766			if (f->data)
767				f->fn = t->fetch[FETCH_MTD_symbol];
768		}
769		break;
770	case '+':	/* deref memory */
771	case '-':
772		tmp = strchr(arg, '(');
773		if (!tmp)
774			break;
775		*tmp = '\0';
776		ret = strict_strtol(arg + 1, 0, &offset);
777		if (ret)
778			break;
779		if (arg[0] == '-')
780			offset = -offset;
781		arg = tmp + 1;
782		tmp = strrchr(arg, ')');
783		if (tmp) {
784			struct deref_fetch_param *dprm;
785			const struct fetch_type *t2 = find_fetch_type(NULL);
786			*tmp = '\0';
787			dprm = kzalloc(sizeof(struct deref_fetch_param),
788				       GFP_KERNEL);
789			if (!dprm)
790				return -ENOMEM;
791			dprm->offset = offset;
792			ret = __parse_probe_arg(arg, t2, &dprm->orig,
793						is_return);
794			if (ret)
795				kfree(dprm);
796			else {
797				f->fn = t->fetch[FETCH_MTD_deref];
798				f->data = (void *)dprm;
799			}
800		}
801		break;
802	}
803	if (!ret && !f->fn) {	/* Parsed, but do not find fetch method */
804		pr_info("%s type has no corresponding fetch method.\n",
805			t->name);
806		ret = -EINVAL;
807	}
808	return ret;
809}
810
811/* String length checking wrapper */
812static int parse_probe_arg(char *arg, struct trace_probe *tp,
813			   struct probe_arg *parg, int is_return)
814{
815	const char *t;
816	int ret;
817
818	if (strlen(arg) > MAX_ARGSTR_LEN) {
819		pr_info("Argument is too long.: %s\n",  arg);
820		return -ENOSPC;
821	}
822	parg->comm = kstrdup(arg, GFP_KERNEL);
823	if (!parg->comm) {
824		pr_info("Failed to allocate memory for command '%s'.\n", arg);
825		return -ENOMEM;
826	}
827	t = strchr(parg->comm, ':');
828	if (t) {
829		arg[t - parg->comm] = '\0';
830		t++;
831	}
832	parg->type = find_fetch_type(t);
833	if (!parg->type) {
834		pr_info("Unsupported type: %s\n", t);
835		return -EINVAL;
836	}
837	parg->offset = tp->size;
838	tp->size += parg->type->size;
839	ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
840	if (ret >= 0) {
841		parg->fetch_size.fn = get_fetch_size_function(parg->type,
842							      parg->fetch.fn);
843		parg->fetch_size.data = parg->fetch.data;
844	}
845	return ret;
846}
847
848/* Return 1 if name is reserved or already used by another argument */
849static int conflict_field_name(const char *name,
850			       struct probe_arg *args, int narg)
851{
852	int i;
853	for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
854		if (strcmp(reserved_field_names[i], name) == 0)
855			return 1;
856	for (i = 0; i < narg; i++)
857		if (strcmp(args[i].name, name) == 0)
858			return 1;
859	return 0;
860}
861
862static int create_trace_probe(int argc, char **argv)
863{
864	/*
865	 * Argument syntax:
866	 *  - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
867	 *  - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
868	 * Fetch args:
869	 *  $retval	: fetch return value
870	 *  $stack	: fetch stack address
871	 *  $stackN	: fetch Nth of stack (N:0-)
872	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)
873	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
874	 *  %REG	: fetch register REG
875	 * Dereferencing memory fetch:
876	 *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
877	 * Alias name of args:
878	 *  NAME=FETCHARG : set NAME as alias of FETCHARG.
879	 * Type of args:
880	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
881	 */
882	struct trace_probe *tp;
883	int i, ret = 0;
884	int is_return = 0, is_delete = 0;
885	char *symbol = NULL, *event = NULL, *group = NULL;
886	char *arg;
887	unsigned long offset = 0;
888	void *addr = NULL;
889	char buf[MAX_EVENT_NAME_LEN];
890
891	/* argc must be >= 1 */
892	if (argv[0][0] == 'p')
893		is_return = 0;
894	else if (argv[0][0] == 'r')
895		is_return = 1;
896	else if (argv[0][0] == '-')
897		is_delete = 1;
898	else {
899		pr_info("Probe definition must be started with 'p', 'r' or"
900			" '-'.\n");
901		return -EINVAL;
902	}
903
904	if (argv[0][1] == ':') {
905		event = &argv[0][2];
906		if (strchr(event, '/')) {
907			group = event;
908			event = strchr(group, '/') + 1;
909			event[-1] = '\0';
910			if (strlen(group) == 0) {
911				pr_info("Group name is not specified\n");
912				return -EINVAL;
913			}
914		}
915		if (strlen(event) == 0) {
916			pr_info("Event name is not specified\n");
917			return -EINVAL;
918		}
919	}
920	if (!group)
921		group = KPROBE_EVENT_SYSTEM;
922
923	if (is_delete) {
924		if (!event) {
925			pr_info("Delete command needs an event name.\n");
926			return -EINVAL;
927		}
928		mutex_lock(&probe_lock);
929		tp = find_probe_event(event, group);
930		if (!tp) {
931			mutex_unlock(&probe_lock);
932			pr_info("Event %s/%s doesn't exist.\n", group, event);
933			return -ENOENT;
934		}
935		/* delete an event */
936		unregister_trace_probe(tp);
937		free_trace_probe(tp);
938		mutex_unlock(&probe_lock);
939		return 0;
940	}
941
942	if (argc < 2) {
943		pr_info("Probe point is not specified.\n");
944		return -EINVAL;
945	}
946	if (isdigit(argv[1][0])) {
947		if (is_return) {
948			pr_info("Return probe point must be a symbol.\n");
949			return -EINVAL;
950		}
951		/* an address specified */
952		ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
953		if (ret) {
954			pr_info("Failed to parse address.\n");
955			return ret;
956		}
957	} else {
958		/* a symbol specified */
959		symbol = argv[1];
960		/* TODO: support .init module functions */
961		ret = split_symbol_offset(symbol, &offset);
962		if (ret) {
963			pr_info("Failed to parse symbol.\n");
964			return ret;
965		}
966		if (offset && is_return) {
967			pr_info("Return probe must be used without offset.\n");
968			return -EINVAL;
969		}
970	}
971	argc -= 2; argv += 2;
972
973	/* setup a probe */
974	if (!event) {
975		/* Make a new event name */
976		if (symbol)
977			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
978				 is_return ? 'r' : 'p', symbol, offset);
979		else
980			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
981				 is_return ? 'r' : 'p', addr);
982		event = buf;
983	}
984	tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
985			       is_return);
986	if (IS_ERR(tp)) {
987		pr_info("Failed to allocate trace_probe.(%d)\n",
988			(int)PTR_ERR(tp));
989		return PTR_ERR(tp);
990	}
991
992	/* parse arguments */
993	ret = 0;
994	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
995		/* Increment count for freeing args in error case */
996		tp->nr_args++;
997
998		/* Parse argument name */
999		arg = strchr(argv[i], '=');
1000		if (arg) {
1001			*arg++ = '\0';
1002			tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
1003		} else {
1004			arg = argv[i];
1005			/* If argument name is omitted, set "argN" */
1006			snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
1007			tp->args[i].name = kstrdup(buf, GFP_KERNEL);
1008		}
1009
1010		if (!tp->args[i].name) {
1011			pr_info("Failed to allocate argument[%d] name.\n", i);
1012			ret = -ENOMEM;
1013			goto error;
1014		}
1015
1016		if (!is_good_name(tp->args[i].name)) {
1017			pr_info("Invalid argument[%d] name: %s\n",
1018				i, tp->args[i].name);
1019			ret = -EINVAL;
1020			goto error;
1021		}
1022
1023		if (conflict_field_name(tp->args[i].name, tp->args, i)) {
1024			pr_info("Argument[%d] name '%s' conflicts with "
1025				"another field.\n", i, argv[i]);
1026			ret = -EINVAL;
1027			goto error;
1028		}
1029
1030		/* Parse fetch argument */
1031		ret = parse_probe_arg(arg, tp, &tp->args[i], is_return);
1032		if (ret) {
1033			pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
1034			goto error;
1035		}
1036	}
1037
1038	ret = register_trace_probe(tp);
1039	if (ret)
1040		goto error;
1041	return 0;
1042
1043error:
1044	free_trace_probe(tp);
1045	return ret;
1046}
1047
1048static void cleanup_all_probes(void)
1049{
1050	struct trace_probe *tp;
1051
1052	mutex_lock(&probe_lock);
1053	/* TODO: Use batch unregistration */
1054	while (!list_empty(&probe_list)) {
1055		tp = list_entry(probe_list.next, struct trace_probe, list);
1056		unregister_trace_probe(tp);
1057		free_trace_probe(tp);
1058	}
1059	mutex_unlock(&probe_lock);
1060}
1061
1062
1063/* Probes listing interfaces */
1064static void *probes_seq_start(struct seq_file *m, loff_t *pos)
1065{
1066	mutex_lock(&probe_lock);
1067	return seq_list_start(&probe_list, *pos);
1068}
1069
1070static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
1071{
1072	return seq_list_next(v, &probe_list, pos);
1073}
1074
1075static void probes_seq_stop(struct seq_file *m, void *v)
1076{
1077	mutex_unlock(&probe_lock);
1078}
1079
1080static int probes_seq_show(struct seq_file *m, void *v)
1081{
1082	struct trace_probe *tp = v;
1083	int i;
1084
1085	seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
1086	seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
1087
1088	if (!tp->symbol)
1089		seq_printf(m, " 0x%p", tp->rp.kp.addr);
1090	else if (tp->rp.kp.offset)
1091		seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
1092	else
1093		seq_printf(m, " %s", probe_symbol(tp));
1094
1095	for (i = 0; i < tp->nr_args; i++)
1096		seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
1097	seq_printf(m, "\n");
1098
1099	return 0;
1100}
1101
1102static const struct seq_operations probes_seq_op = {
1103	.start  = probes_seq_start,
1104	.next   = probes_seq_next,
1105	.stop   = probes_seq_stop,
1106	.show   = probes_seq_show
1107};
1108
1109static int probes_open(struct inode *inode, struct file *file)
1110{
1111	if ((file->f_mode & FMODE_WRITE) &&
1112	    (file->f_flags & O_TRUNC))
1113		cleanup_all_probes();
1114
1115	return seq_open(file, &probes_seq_op);
1116}
1117
1118static int command_trace_probe(const char *buf)
1119{
1120	char **argv;
1121	int argc = 0, ret = 0;
1122
1123	argv = argv_split(GFP_KERNEL, buf, &argc);
1124	if (!argv)
1125		return -ENOMEM;
1126
1127	if (argc)
1128		ret = create_trace_probe(argc, argv);
1129
1130	argv_free(argv);
1131	return ret;
1132}
1133
1134#define WRITE_BUFSIZE 128
1135
1136static ssize_t probes_write(struct file *file, const char __user *buffer,
1137			    size_t count, loff_t *ppos)
1138{
1139	char *kbuf, *tmp;
1140	int ret;
1141	size_t done;
1142	size_t size;
1143
1144	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
1145	if (!kbuf)
1146		return -ENOMEM;
1147
1148	ret = done = 0;
1149	while (done < count) {
1150		size = count - done;
1151		if (size >= WRITE_BUFSIZE)
1152			size = WRITE_BUFSIZE - 1;
1153		if (copy_from_user(kbuf, buffer + done, size)) {
1154			ret = -EFAULT;
1155			goto out;
1156		}
1157		kbuf[size] = '\0';
1158		tmp = strchr(kbuf, '\n');
1159		if (tmp) {
1160			*tmp = '\0';
1161			size = tmp - kbuf + 1;
1162		} else if (done + size < count) {
1163			pr_warning("Line length is too long: "
1164				   "Should be less than %d.", WRITE_BUFSIZE);
1165			ret = -EINVAL;
1166			goto out;
1167		}
1168		done += size;
1169		/* Remove comments */
1170		tmp = strchr(kbuf, '#');
1171		if (tmp)
1172			*tmp = '\0';
1173
1174		ret = command_trace_probe(kbuf);
1175		if (ret)
1176			goto out;
1177	}
1178	ret = done;
1179out:
1180	kfree(kbuf);
1181	return ret;
1182}
1183
1184static const struct file_operations kprobe_events_ops = {
1185	.owner          = THIS_MODULE,
1186	.open           = probes_open,
1187	.read           = seq_read,
1188	.llseek         = seq_lseek,
1189	.release        = seq_release,
1190	.write		= probes_write,
1191};
1192
1193/* Probes profiling interfaces */
1194static int probes_profile_seq_show(struct seq_file *m, void *v)
1195{
1196	struct trace_probe *tp = v;
1197
1198	seq_printf(m, "  %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
1199		   tp->rp.kp.nmissed);
1200
1201	return 0;
1202}
1203
1204static const struct seq_operations profile_seq_op = {
1205	.start  = probes_seq_start,
1206	.next   = probes_seq_next,
1207	.stop   = probes_seq_stop,
1208	.show   = probes_profile_seq_show
1209};
1210
1211static int profile_open(struct inode *inode, struct file *file)
1212{
1213	return seq_open(file, &profile_seq_op);
1214}
1215
1216static const struct file_operations kprobe_profile_ops = {
1217	.owner          = THIS_MODULE,
1218	.open           = profile_open,
1219	.read           = seq_read,
1220	.llseek         = seq_lseek,
1221	.release        = seq_release,
1222};
1223
1224/* Sum up total data length for dynamic arraies (strings) */
1225static __kprobes int __get_data_size(struct trace_probe *tp,
1226				     struct pt_regs *regs)
1227{
1228	int i, ret = 0;
1229	u32 len;
1230
1231	for (i = 0; i < tp->nr_args; i++)
1232		if (unlikely(tp->args[i].fetch_size.fn)) {
1233			call_fetch(&tp->args[i].fetch_size, regs, &len);
1234			ret += len;
1235		}
1236
1237	return ret;
1238}
1239
1240/* Store the value of each argument */
1241static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
1242				       struct pt_regs *regs,
1243				       u8 *data, int maxlen)
1244{
1245	int i;
1246	u32 end = tp->size;
1247	u32 *dl;	/* Data (relative) location */
1248
1249	for (i = 0; i < tp->nr_args; i++) {
1250		if (unlikely(tp->args[i].fetch_size.fn)) {
1251			/*
1252			 * First, we set the relative location and
1253			 * maximum data length to *dl
1254			 */
1255			dl = (u32 *)(data + tp->args[i].offset);
1256			*dl = make_data_rloc(maxlen, end - tp->args[i].offset);
1257			/* Then try to fetch string or dynamic array data */
1258			call_fetch(&tp->args[i].fetch, regs, dl);
1259			/* Reduce maximum length */
1260			end += get_rloc_len(*dl);
1261			maxlen -= get_rloc_len(*dl);
1262			/* Trick here, convert data_rloc to data_loc */
1263			*dl = convert_rloc_to_loc(*dl,
1264				 ent_size + tp->args[i].offset);
1265		} else
1266			/* Just fetching data normally */
1267			call_fetch(&tp->args[i].fetch, regs,
1268				   data + tp->args[i].offset);
1269	}
1270}
1271
1272/* Kprobe handler */
1273static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1274{
1275	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1276	struct kprobe_trace_entry_head *entry;
1277	struct ring_buffer_event *event;
1278	struct ring_buffer *buffer;
1279	int size, dsize, pc;
1280	unsigned long irq_flags;
1281	struct ftrace_event_call *call = &tp->call;
1282
1283	tp->nhit++;
1284
1285	local_save_flags(irq_flags);
1286	pc = preempt_count();
1287
1288	dsize = __get_data_size(tp, regs);
1289	size = sizeof(*entry) + tp->size + dsize;
1290
1291	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1292						  size, irq_flags, pc);
1293	if (!event)
1294		return;
1295
1296	entry = ring_buffer_event_data(event);
1297	entry->ip = (unsigned long)kp->addr;
1298	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1299
1300	if (!filter_current_check_discard(buffer, call, entry, event))
1301		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1302}
1303
1304/* Kretprobe handler */
1305static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
1306					  struct pt_regs *regs)
1307{
1308	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1309	struct kretprobe_trace_entry_head *entry;
1310	struct ring_buffer_event *event;
1311	struct ring_buffer *buffer;
1312	int size, pc, dsize;
1313	unsigned long irq_flags;
1314	struct ftrace_event_call *call = &tp->call;
1315
1316	local_save_flags(irq_flags);
1317	pc = preempt_count();
1318
1319	dsize = __get_data_size(tp, regs);
1320	size = sizeof(*entry) + tp->size + dsize;
1321
1322	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1323						  size, irq_flags, pc);
1324	if (!event)
1325		return;
1326
1327	entry = ring_buffer_event_data(event);
1328	entry->func = (unsigned long)tp->rp.kp.addr;
1329	entry->ret_ip = (unsigned long)ri->ret_addr;
1330	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1331
1332	if (!filter_current_check_discard(buffer, call, entry, event))
1333		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1334}
1335
1336/* Event entry printers */
1337enum print_line_t
1338print_kprobe_event(struct trace_iterator *iter, int flags,
1339		   struct trace_event *event)
1340{
1341	struct kprobe_trace_entry_head *field;
1342	struct trace_seq *s = &iter->seq;
1343	struct trace_probe *tp;
1344	u8 *data;
1345	int i;
1346
1347	field = (struct kprobe_trace_entry_head *)iter->ent;
1348	tp = container_of(event, struct trace_probe, call.event);
1349
1350	if (!trace_seq_printf(s, "%s: (", tp->call.name))
1351		goto partial;
1352
1353	if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1354		goto partial;
1355
1356	if (!trace_seq_puts(s, ")"))
1357		goto partial;
1358
1359	data = (u8 *)&field[1];
1360	for (i = 0; i < tp->nr_args; i++)
1361		if (!tp->args[i].type->print(s, tp->args[i].name,
1362					     data + tp->args[i].offset, field))
1363			goto partial;
1364
1365	if (!trace_seq_puts(s, "\n"))
1366		goto partial;
1367
1368	return TRACE_TYPE_HANDLED;
1369partial:
1370	return TRACE_TYPE_PARTIAL_LINE;
1371}
1372
1373enum print_line_t
1374print_kretprobe_event(struct trace_iterator *iter, int flags,
1375		      struct trace_event *event)
1376{
1377	struct kretprobe_trace_entry_head *field;
1378	struct trace_seq *s = &iter->seq;
1379	struct trace_probe *tp;
1380	u8 *data;
1381	int i;
1382
1383	field = (struct kretprobe_trace_entry_head *)iter->ent;
1384	tp = container_of(event, struct trace_probe, call.event);
1385
1386	if (!trace_seq_printf(s, "%s: (", tp->call.name))
1387		goto partial;
1388
1389	if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1390		goto partial;
1391
1392	if (!trace_seq_puts(s, " <- "))
1393		goto partial;
1394
1395	if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1396		goto partial;
1397
1398	if (!trace_seq_puts(s, ")"))
1399		goto partial;
1400
1401	data = (u8 *)&field[1];
1402	for (i = 0; i < tp->nr_args; i++)
1403		if (!tp->args[i].type->print(s, tp->args[i].name,
1404					     data + tp->args[i].offset, field))
1405			goto partial;
1406
1407	if (!trace_seq_puts(s, "\n"))
1408		goto partial;
1409
1410	return TRACE_TYPE_HANDLED;
1411partial:
1412	return TRACE_TYPE_PARTIAL_LINE;
1413}
1414
1415static int probe_event_enable(struct ftrace_event_call *call)
1416{
1417	struct trace_probe *tp = (struct trace_probe *)call->data;
1418
1419	tp->flags |= TP_FLAG_TRACE;
1420	if (probe_is_return(tp))
1421		return enable_kretprobe(&tp->rp);
1422	else
1423		return enable_kprobe(&tp->rp.kp);
1424}
1425
1426static void probe_event_disable(struct ftrace_event_call *call)
1427{
1428	struct trace_probe *tp = (struct trace_probe *)call->data;
1429
1430	tp->flags &= ~TP_FLAG_TRACE;
1431	if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1432		if (probe_is_return(tp))
1433			disable_kretprobe(&tp->rp);
1434		else
1435			disable_kprobe(&tp->rp.kp);
1436	}
1437}
1438
1439#undef DEFINE_FIELD
1440#define DEFINE_FIELD(type, item, name, is_signed)			\
1441	do {								\
1442		ret = trace_define_field(event_call, #type, name,	\
1443					 offsetof(typeof(field), item),	\
1444					 sizeof(field.item), is_signed, \
1445					 FILTER_OTHER);			\
1446		if (ret)						\
1447			return ret;					\
1448	} while (0)
1449
1450static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1451{
1452	int ret, i;
1453	struct kprobe_trace_entry_head field;
1454	struct trace_probe *tp = (struct trace_probe *)event_call->data;
1455
1456	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1457	/* Set argument names as fields */
1458	for (i = 0; i < tp->nr_args; i++) {
1459		ret = trace_define_field(event_call, tp->args[i].type->fmttype,
1460					 tp->args[i].name,
1461					 sizeof(field) + tp->args[i].offset,
1462					 tp->args[i].type->size,
1463					 tp->args[i].type->is_signed,
1464					 FILTER_OTHER);
1465		if (ret)
1466			return ret;
1467	}
1468	return 0;
1469}
1470
1471static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1472{
1473	int ret, i;
1474	struct kretprobe_trace_entry_head field;
1475	struct trace_probe *tp = (struct trace_probe *)event_call->data;
1476
1477	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1478	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1479	/* Set argument names as fields */
1480	for (i = 0; i < tp->nr_args; i++) {
1481		ret = trace_define_field(event_call, tp->args[i].type->fmttype,
1482					 tp->args[i].name,
1483					 sizeof(field) + tp->args[i].offset,
1484					 tp->args[i].type->size,
1485					 tp->args[i].type->is_signed,
1486					 FILTER_OTHER);
1487		if (ret)
1488			return ret;
1489	}
1490	return 0;
1491}
1492
1493static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1494{
1495	int i;
1496	int pos = 0;
1497
1498	const char *fmt, *arg;
1499
1500	if (!probe_is_return(tp)) {
1501		fmt = "(%lx)";
1502		arg = "REC->" FIELD_STRING_IP;
1503	} else {
1504		fmt = "(%lx <- %lx)";
1505		arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
1506	}
1507
1508	/* When len=0, we just calculate the needed length */
1509#define LEN_OR_ZERO (len ? len - pos : 0)
1510
1511	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
1512
1513	for (i = 0; i < tp->nr_args; i++) {
1514		pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
1515				tp->args[i].name, tp->args[i].type->fmt);
1516	}
1517
1518	pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
1519
1520	for (i = 0; i < tp->nr_args; i++) {
1521		if (strcmp(tp->args[i].type->name, "string") == 0)
1522			pos += snprintf(buf + pos, LEN_OR_ZERO,
1523					", __get_str(%s)",
1524					tp->args[i].name);
1525		else
1526			pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
1527					tp->args[i].name);
1528	}
1529
1530#undef LEN_OR_ZERO
1531
1532	/* return the length of print_fmt */
1533	return pos;
1534}
1535
1536static int set_print_fmt(struct trace_probe *tp)
1537{
1538	int len;
1539	char *print_fmt;
1540
1541	/* First: called with 0 length to calculate the needed length */
1542	len = __set_print_fmt(tp, NULL, 0);
1543	print_fmt = kmalloc(len + 1, GFP_KERNEL);
1544	if (!print_fmt)
1545		return -ENOMEM;
1546
1547	/* Second: actually write the @print_fmt */
1548	__set_print_fmt(tp, print_fmt, len + 1);
1549	tp->call.print_fmt = print_fmt;
1550
1551	return 0;
1552}
1553
1554#ifdef CONFIG_PERF_EVENTS
1555
1556/* Kprobe profile handler */
1557static __kprobes void kprobe_perf_func(struct kprobe *kp,
1558					 struct pt_regs *regs)
1559{
1560	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1561	struct ftrace_event_call *call = &tp->call;
1562	struct kprobe_trace_entry_head *entry;
1563	struct hlist_head *head;
1564	int size, __size, dsize;
1565	int rctx;
1566
1567	dsize = __get_data_size(tp, regs);
1568	__size = sizeof(*entry) + tp->size + dsize;
1569	size = ALIGN(__size + sizeof(u32), sizeof(u64));
1570	size -= sizeof(u32);
1571	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1572		     "profile buffer not large enough"))
1573		return;
1574
1575	entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1576	if (!entry)
1577		return;
1578
1579	entry->ip = (unsigned long)kp->addr;
1580	memset(&entry[1], 0, dsize);
1581	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1582
1583	head = this_cpu_ptr(call->perf_events);
1584	perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
1585}
1586
1587/* Kretprobe profile handler */
1588static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1589					    struct pt_regs *regs)
1590{
1591	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1592	struct ftrace_event_call *call = &tp->call;
1593	struct kretprobe_trace_entry_head *entry;
1594	struct hlist_head *head;
1595	int size, __size, dsize;
1596	int rctx;
1597
1598	dsize = __get_data_size(tp, regs);
1599	__size = sizeof(*entry) + tp->size + dsize;
1600	size = ALIGN(__size + sizeof(u32), sizeof(u64));
1601	size -= sizeof(u32);
1602	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1603		     "profile buffer not large enough"))
1604		return;
1605
1606	entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1607	if (!entry)
1608		return;
1609
1610	entry->func = (unsigned long)tp->rp.kp.addr;
1611	entry->ret_ip = (unsigned long)ri->ret_addr;
1612	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1613
1614	head = this_cpu_ptr(call->perf_events);
1615	perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
1616}
1617
1618static int probe_perf_enable(struct ftrace_event_call *call)
1619{
1620	struct trace_probe *tp = (struct trace_probe *)call->data;
1621
1622	tp->flags |= TP_FLAG_PROFILE;
1623
1624	if (probe_is_return(tp))
1625		return enable_kretprobe(&tp->rp);
1626	else
1627		return enable_kprobe(&tp->rp.kp);
1628}
1629
1630static void probe_perf_disable(struct ftrace_event_call *call)
1631{
1632	struct trace_probe *tp = (struct trace_probe *)call->data;
1633
1634	tp->flags &= ~TP_FLAG_PROFILE;
1635
1636	if (!(tp->flags & TP_FLAG_TRACE)) {
1637		if (probe_is_return(tp))
1638			disable_kretprobe(&tp->rp);
1639		else
1640			disable_kprobe(&tp->rp.kp);
1641	}
1642}
1643#endif	/* CONFIG_PERF_EVENTS */
1644
1645static __kprobes
1646int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
1647{
1648	switch (type) {
1649	case TRACE_REG_REGISTER:
1650		return probe_event_enable(event);
1651	case TRACE_REG_UNREGISTER:
1652		probe_event_disable(event);
1653		return 0;
1654
1655#ifdef CONFIG_PERF_EVENTS
1656	case TRACE_REG_PERF_REGISTER:
1657		return probe_perf_enable(event);
1658	case TRACE_REG_PERF_UNREGISTER:
1659		probe_perf_disable(event);
1660		return 0;
1661#endif
1662	}
1663	return 0;
1664}
1665
1666static __kprobes
1667int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1668{
1669	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1670
1671	if (tp->flags & TP_FLAG_TRACE)
1672		kprobe_trace_func(kp, regs);
1673#ifdef CONFIG_PERF_EVENTS
1674	if (tp->flags & TP_FLAG_PROFILE)
1675		kprobe_perf_func(kp, regs);
1676#endif
1677	return 0;	/* We don't tweek kernel, so just return 0 */
1678}
1679
1680static __kprobes
1681int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1682{
1683	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1684
1685	if (tp->flags & TP_FLAG_TRACE)
1686		kretprobe_trace_func(ri, regs);
1687#ifdef CONFIG_PERF_EVENTS
1688	if (tp->flags & TP_FLAG_PROFILE)
1689		kretprobe_perf_func(ri, regs);
1690#endif
1691	return 0;	/* We don't tweek kernel, so just return 0 */
1692}
1693
1694static struct trace_event_functions kretprobe_funcs = {
1695	.trace		= print_kretprobe_event
1696};
1697
1698static struct trace_event_functions kprobe_funcs = {
1699	.trace		= print_kprobe_event
1700};
1701
1702static int register_probe_event(struct trace_probe *tp)
1703{
1704	struct ftrace_event_call *call = &tp->call;
1705	int ret;
1706
1707	/* Initialize ftrace_event_call */
1708	INIT_LIST_HEAD(&call->class->fields);
1709	if (probe_is_return(tp)) {
1710		call->event.funcs = &kretprobe_funcs;
1711		call->class->define_fields = kretprobe_event_define_fields;
1712	} else {
1713		call->event.funcs = &kprobe_funcs;
1714		call->class->define_fields = kprobe_event_define_fields;
1715	}
1716	if (set_print_fmt(tp) < 0)
1717		return -ENOMEM;
1718	ret = register_ftrace_event(&call->event);
1719	if (!ret) {
1720		kfree(call->print_fmt);
1721		return -ENODEV;
1722	}
1723	call->flags = 0;
1724	call->class->reg = kprobe_register;
1725	call->data = tp;
1726	ret = trace_add_event_call(call);
1727	if (ret) {
1728		pr_info("Failed to register kprobe event: %s\n", call->name);
1729		kfree(call->print_fmt);
1730		unregister_ftrace_event(&call->event);
1731	}
1732	return ret;
1733}
1734
1735static void unregister_probe_event(struct trace_probe *tp)
1736{
1737	/* tp->event is unregistered in trace_remove_event_call() */
1738	trace_remove_event_call(&tp->call);
1739	kfree(tp->call.print_fmt);
1740}
1741
1742/* Make a debugfs interface for controling probe points */
1743static __init int init_kprobe_trace(void)
1744{
1745	struct dentry *d_tracer;
1746	struct dentry *entry;
1747
1748	d_tracer = tracing_init_dentry();
1749	if (!d_tracer)
1750		return 0;
1751
1752	entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1753				    NULL, &kprobe_events_ops);
1754
1755	/* Event list interface */
1756	if (!entry)
1757		pr_warning("Could not create debugfs "
1758			   "'kprobe_events' entry\n");
1759
1760	/* Profile interface */
1761	entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1762				    NULL, &kprobe_profile_ops);
1763
1764	if (!entry)
1765		pr_warning("Could not create debugfs "
1766			   "'kprobe_profile' entry\n");
1767	return 0;
1768}
1769fs_initcall(init_kprobe_trace);
1770
1771
1772#ifdef CONFIG_FTRACE_STARTUP_TEST
1773
1774static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1775					int a4, int a5, int a6)
1776{
1777	return a1 + a2 + a3 + a4 + a5 + a6;
1778}
1779
1780static __init int kprobe_trace_self_tests_init(void)
1781{
1782	int ret, warn = 0;
1783	int (*target)(int, int, int, int, int, int);
1784	struct trace_probe *tp;
1785
1786	target = kprobe_trace_selftest_target;
1787
1788	pr_info("Testing kprobe tracing: ");
1789
1790	ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1791				  "$stack $stack0 +0($stack)");
1792	if (WARN_ON_ONCE(ret)) {
1793		pr_warning("error on probing function entry.\n");
1794		warn++;
1795	} else {
1796		/* Enable trace point */
1797		tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
1798		if (WARN_ON_ONCE(tp == NULL)) {
1799			pr_warning("error on getting new probe.\n");
1800			warn++;
1801		} else
1802			probe_event_enable(&tp->call);
1803	}
1804
1805	ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1806				  "$retval");
1807	if (WARN_ON_ONCE(ret)) {
1808		pr_warning("error on probing function return.\n");
1809		warn++;
1810	} else {
1811		/* Enable trace point */
1812		tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
1813		if (WARN_ON_ONCE(tp == NULL)) {
1814			pr_warning("error on getting new probe.\n");
1815			warn++;
1816		} else
1817			probe_event_enable(&tp->call);
1818	}
1819
1820	if (warn)
1821		goto end;
1822
1823	ret = target(1, 2, 3, 4, 5, 6);
1824
1825	ret = command_trace_probe("-:testprobe");
1826	if (WARN_ON_ONCE(ret)) {
1827		pr_warning("error on deleting a probe.\n");
1828		warn++;
1829	}
1830
1831	ret = command_trace_probe("-:testprobe2");
1832	if (WARN_ON_ONCE(ret)) {
1833		pr_warning("error on deleting a probe.\n");
1834		warn++;
1835	}
1836
1837end:
1838	cleanup_all_probes();
1839	if (warn)
1840		pr_cont("NG: Some tests are failed. Please check them.\n");
1841	else
1842		pr_cont("OK\n");
1843	return 0;
1844}
1845
1846late_initcall(kprobe_trace_self_tests_init);
1847
1848#endif
1849