1/*
2 * Tracing hooks
3 *
4 * Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
5 *
6 * This copyrighted material is made available to anyone wishing to use,
7 * modify, copy, or redistribute it subject to the terms and conditions
8 * of the GNU General Public License v.2.
9 *
10 * This file defines hook entry points called by core code where
11 * user tracing/debugging support might need to do something.  These
12 * entry points are called tracehook_*().  Each hook declared below
13 * has a detailed kerneldoc comment giving the context (locking et
14 * al) from which it is called, and the meaning of its return value.
15 *
16 * Each function here typically has only one call site, so it is ok
17 * to have some nontrivial tracehook_*() inlines.  In all cases, the
18 * fast path when no tracing is enabled should be very short.
19 *
20 * The purpose of this file and the tracehook_* layer is to consolidate
21 * the interface that the kernel core and arch code uses to enable any
22 * user debugging or tracing facility (such as ptrace).  The interfaces
23 * here are carefully documented so that maintainers of core and arch
24 * code do not need to think about the implementation details of the
25 * tracing facilities.  Likewise, maintainers of the tracing code do not
26 * need to understand all the calling core or arch code in detail, just
27 * documented circumstances of each call, such as locking conditions.
28 *
29 * If the calling core code changes so that locking is different, then
30 * it is ok to change the interface documented here.  The maintainer of
31 * core code changing should notify the maintainers of the tracing code
32 * that they need to work out the change.
33 *
34 * Some tracehook_*() inlines take arguments that the current tracing
35 * implementations might not necessarily use.  These function signatures
36 * are chosen to pass in all the information that is on hand in the
37 * caller and might conceivably be relevant to a tracer, so that the
38 * core code won't have to be updated when tracing adds more features.
39 * If a call site changes so that some of those parameters are no longer
40 * already on hand without extra work, then the tracehook_* interface
41 * can change so there is no make-work burden on the core code.  The
42 * maintainer of core code changing should notify the maintainers of the
43 * tracing code that they need to work out the change.
44 */
45
46#ifndef _LINUX_TRACEHOOK_H
47#define _LINUX_TRACEHOOK_H	1
48
49#include <linux/sched.h>
50#include <linux/ptrace.h>
51#include <linux/security.h>
52struct linux_binprm;
53
54/**
55 * tracehook_expect_breakpoints - guess if task memory might be touched
56 * @task:		current task, making a new mapping
57 *
58 * Return nonzero if @task is expected to want breakpoint insertion in
59 * its memory at some point.  A zero return is no guarantee it won't
60 * be done, but this is a hint that it's known to be likely.
61 *
62 * May be called with @task->mm->mmap_sem held for writing.
63 */
64static inline int tracehook_expect_breakpoints(struct task_struct *task)
65{
66	return (task_ptrace(task) & PT_PTRACED) != 0;
67}
68
69/*
70 * ptrace report for syscall entry and exit looks identical.
71 */
72static inline void ptrace_report_syscall(struct pt_regs *regs)
73{
74	int ptrace = task_ptrace(current);
75
76	if (!(ptrace & PT_PTRACED))
77		return;
78
79	ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
80
81	/*
82	 * this isn't the same as continuing with a signal, but it will do
83	 * for normal use.  strace only continues with a signal if the
84	 * stopping signal is not SIGTRAP.  -brl
85	 */
86	if (current->exit_code) {
87		send_sig(current->exit_code, current, 1);
88		current->exit_code = 0;
89	}
90}
91
92/**
93 * tracehook_report_syscall_entry - task is about to attempt a system call
94 * @regs:		user register state of current task
95 *
96 * This will be called if %TIF_SYSCALL_TRACE has been set, when the
97 * current task has just entered the kernel for a system call.
98 * Full user register state is available here.  Changing the values
99 * in @regs can affect the system call number and arguments to be tried.
100 * It is safe to block here, preventing the system call from beginning.
101 *
102 * Returns zero normally, or nonzero if the calling arch code should abort
103 * the system call.  That must prevent normal entry so no system call is
104 * made.  If @task ever returns to user mode after this, its register state
105 * is unspecified, but should be something harmless like an %ENOSYS error
106 * return.  It should preserve enough information so that syscall_rollback()
107 * can work (see asm-generic/syscall.h).
108 *
109 * Called without locks, just after entering kernel mode.
110 */
111static inline __must_check int tracehook_report_syscall_entry(
112	struct pt_regs *regs)
113{
114	ptrace_report_syscall(regs);
115	return 0;
116}
117
118/**
119 * tracehook_report_syscall_exit - task has just finished a system call
120 * @regs:		user register state of current task
121 * @step:		nonzero if simulating single-step or block-step
122 *
123 * This will be called if %TIF_SYSCALL_TRACE has been set, when the
124 * current task has just finished an attempted system call.  Full
125 * user register state is available here.  It is safe to block here,
126 * preventing signals from being processed.
127 *
128 * If @step is nonzero, this report is also in lieu of the normal
129 * trap that would follow the system call instruction because
130 * user_enable_block_step() or user_enable_single_step() was used.
131 * In this case, %TIF_SYSCALL_TRACE might not be set.
132 *
133 * Called without locks, just before checking for pending signals.
134 */
135static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
136{
137	if (step) {
138		siginfo_t info;
139		user_single_step_siginfo(current, regs, &info);
140		force_sig_info(SIGTRAP, &info, current);
141		return;
142	}
143
144	ptrace_report_syscall(regs);
145}
146
147/**
148 * tracehook_unsafe_exec - check for exec declared unsafe due to tracing
149 * @task:		current task doing exec
150 *
151 * Return %LSM_UNSAFE_* bits applied to an exec because of tracing.
152 *
153 * @task->cred_guard_mutex is held by the caller through the do_execve().
154 */
155static inline int tracehook_unsafe_exec(struct task_struct *task)
156{
157	int unsafe = 0;
158	int ptrace = task_ptrace(task);
159	if (ptrace & PT_PTRACED) {
160		if (ptrace & PT_PTRACE_CAP)
161			unsafe |= LSM_UNSAFE_PTRACE_CAP;
162		else
163			unsafe |= LSM_UNSAFE_PTRACE;
164	}
165	return unsafe;
166}
167
168/**
169 * tracehook_tracer_task - return the task that is tracing the given task
170 * @tsk:		task to consider
171 *
172 * Returns NULL if noone is tracing @task, or the &struct task_struct
173 * pointer to its tracer.
174 *
175 * Must called under rcu_read_lock().  The pointer returned might be kept
176 * live only by RCU.  During exec, this may be called with task_lock()
177 * held on @task, still held from when tracehook_unsafe_exec() was called.
178 */
179static inline struct task_struct *tracehook_tracer_task(struct task_struct *tsk)
180{
181	if (task_ptrace(tsk) & PT_PTRACED)
182		return rcu_dereference(tsk->parent);
183	return NULL;
184}
185
186/**
187 * tracehook_report_exec - a successful exec was completed
188 * @fmt:		&struct linux_binfmt that performed the exec
189 * @bprm:		&struct linux_binprm containing exec details
190 * @regs:		user-mode register state
191 *
192 * An exec just completed, we are shortly going to return to user mode.
193 * The freshly initialized register state can be seen and changed in @regs.
194 * The name, file and other pointers in @bprm are still on hand to be
195 * inspected, but will be freed as soon as this returns.
196 *
197 * Called with no locks, but with some kernel resources held live
198 * and a reference on @fmt->module.
199 */
200static inline void tracehook_report_exec(struct linux_binfmt *fmt,
201					 struct linux_binprm *bprm,
202					 struct pt_regs *regs)
203{
204	if (!ptrace_event(PT_TRACE_EXEC, PTRACE_EVENT_EXEC, 0) &&
205	    unlikely(task_ptrace(current) & PT_PTRACED))
206		send_sig(SIGTRAP, current, 0);
207}
208
209/**
210 * tracehook_report_exit - task has begun to exit
211 * @exit_code:		pointer to value destined for @current->exit_code
212 *
213 * @exit_code points to the value passed to do_exit(), which tracing
214 * might change here.  This is almost the first thing in do_exit(),
215 * before freeing any resources or setting the %PF_EXITING flag.
216 *
217 * Called with no locks held.
218 */
219static inline void tracehook_report_exit(long *exit_code)
220{
221	ptrace_event(PT_TRACE_EXIT, PTRACE_EVENT_EXIT, *exit_code);
222}
223
224/**
225 * tracehook_prepare_clone - prepare for new child to be cloned
226 * @clone_flags:	%CLONE_* flags from clone/fork/vfork system call
227 *
228 * This is called before a new user task is to be cloned.
229 * Its return value will be passed to tracehook_finish_clone().
230 *
231 * Called with no locks held.
232 */
233static inline int tracehook_prepare_clone(unsigned clone_flags)
234{
235	if (clone_flags & CLONE_UNTRACED)
236		return 0;
237
238	if (clone_flags & CLONE_VFORK) {
239		if (current->ptrace & PT_TRACE_VFORK)
240			return PTRACE_EVENT_VFORK;
241	} else if ((clone_flags & CSIGNAL) != SIGCHLD) {
242		if (current->ptrace & PT_TRACE_CLONE)
243			return PTRACE_EVENT_CLONE;
244	} else if (current->ptrace & PT_TRACE_FORK)
245		return PTRACE_EVENT_FORK;
246
247	return 0;
248}
249
250/**
251 * tracehook_finish_clone - new child created and being attached
252 * @child:		new child task
253 * @clone_flags:	%CLONE_* flags from clone/fork/vfork system call
254 * @trace:		return value from tracehook_prepare_clone()
255 *
256 * This is called immediately after adding @child to its parent's children list.
257 * The @trace value is that returned by tracehook_prepare_clone().
258 *
259 * Called with current's siglock and write_lock_irq(&tasklist_lock) held.
260 */
261static inline void tracehook_finish_clone(struct task_struct *child,
262					  unsigned long clone_flags, int trace)
263{
264	ptrace_init_task(child, (clone_flags & CLONE_PTRACE) || trace);
265}
266
267/**
268 * tracehook_report_clone - in parent, new child is about to start running
269 * @regs:		parent's user register state
270 * @clone_flags:	flags from parent's system call
271 * @pid:		new child's PID in the parent's namespace
272 * @child:		new child task
273 *
274 * Called after a child is set up, but before it has been started running.
275 * This is not a good place to block, because the child has not started
276 * yet.  Suspend the child here if desired, and then block in
277 * tracehook_report_clone_complete().  This must prevent the child from
278 * self-reaping if tracehook_report_clone_complete() uses the @child
279 * pointer; otherwise it might have died and been released by the time
280 * tracehook_report_clone_complete() is called.
281 *
282 * Called with no locks held, but the child cannot run until this returns.
283 */
284static inline void tracehook_report_clone(struct pt_regs *regs,
285					  unsigned long clone_flags,
286					  pid_t pid, struct task_struct *child)
287{
288	if (unlikely(task_ptrace(child))) {
289		/*
290		 * It doesn't matter who attached/attaching to this
291		 * task, the pending SIGSTOP is right in any case.
292		 */
293		sigaddset(&child->pending.signal, SIGSTOP);
294		set_tsk_thread_flag(child, TIF_SIGPENDING);
295	}
296}
297
298/**
299 * tracehook_report_clone_complete - new child is running
300 * @trace:		return value from tracehook_prepare_clone()
301 * @regs:		parent's user register state
302 * @clone_flags:	flags from parent's system call
303 * @pid:		new child's PID in the parent's namespace
304 * @child:		child task, already running
305 *
306 * This is called just after the child has started running.  This is
307 * just before the clone/fork syscall returns, or blocks for vfork
308 * child completion if @clone_flags has the %CLONE_VFORK bit set.
309 * The @child pointer may be invalid if a self-reaping child died and
310 * tracehook_report_clone() took no action to prevent it from self-reaping.
311 *
312 * Called with no locks held.
313 */
314static inline void tracehook_report_clone_complete(int trace,
315						   struct pt_regs *regs,
316						   unsigned long clone_flags,
317						   pid_t pid,
318						   struct task_struct *child)
319{
320	if (unlikely(trace))
321		ptrace_event(0, trace, pid);
322}
323
324/**
325 * tracehook_report_vfork_done - vfork parent's child has exited or exec'd
326 * @child:		child task, already running
327 * @pid:		new child's PID in the parent's namespace
328 *
329 * Called after a %CLONE_VFORK parent has waited for the child to complete.
330 * The clone/vfork system call will return immediately after this.
331 * The @child pointer may be invalid if a self-reaping child died and
332 * tracehook_report_clone() took no action to prevent it from self-reaping.
333 *
334 * Called with no locks held.
335 */
336static inline void tracehook_report_vfork_done(struct task_struct *child,
337					       pid_t pid)
338{
339	ptrace_event(PT_TRACE_VFORK_DONE, PTRACE_EVENT_VFORK_DONE, pid);
340}
341
342/**
343 * tracehook_prepare_release_task - task is being reaped, clean up tracing
344 * @task:		task in %EXIT_DEAD state
345 *
346 * This is called in release_task() just before @task gets finally reaped
347 * and freed.  This would be the ideal place to remove and clean up any
348 * tracing-related state for @task.
349 *
350 * Called with no locks held.
351 */
352static inline void tracehook_prepare_release_task(struct task_struct *task)
353{
354}
355
356/**
357 * tracehook_finish_release_task - final tracing clean-up
358 * @task:		task in %EXIT_DEAD state
359 *
360 * This is called in release_task() when @task is being in the middle of
361 * being reaped.  After this, there must be no tracing entanglements.
362 *
363 * Called with write_lock_irq(&tasklist_lock) held.
364 */
365static inline void tracehook_finish_release_task(struct task_struct *task)
366{
367	ptrace_release_task(task);
368}
369
370/**
371 * tracehook_signal_handler - signal handler setup is complete
372 * @sig:		number of signal being delivered
373 * @info:		siginfo_t of signal being delivered
374 * @ka:			sigaction setting that chose the handler
375 * @regs:		user register state
376 * @stepping:		nonzero if debugger single-step or block-step in use
377 *
378 * Called by the arch code after a signal handler has been set up.
379 * Register and stack state reflects the user handler about to run.
380 * Signal mask changes have already been made.
381 *
382 * Called without locks, shortly before returning to user mode
383 * (or handling more signals).
384 */
385static inline void tracehook_signal_handler(int sig, siginfo_t *info,
386					    const struct k_sigaction *ka,
387					    struct pt_regs *regs, int stepping)
388{
389	if (stepping)
390		ptrace_notify(SIGTRAP);
391}
392
393/**
394 * tracehook_consider_ignored_signal - suppress short-circuit of ignored signal
395 * @task:		task receiving the signal
396 * @sig:		signal number being sent
397 *
398 * Return zero iff tracing doesn't care to examine this ignored signal,
399 * so it can short-circuit normal delivery and never even get queued.
400 *
401 * Called with @task->sighand->siglock held.
402 */
403static inline int tracehook_consider_ignored_signal(struct task_struct *task,
404						    int sig)
405{
406	return (task_ptrace(task) & PT_PTRACED) != 0;
407}
408
409/**
410 * tracehook_consider_fatal_signal - suppress special handling of fatal signal
411 * @task:		task receiving the signal
412 * @sig:		signal number being sent
413 *
414 * Return nonzero to prevent special handling of this termination signal.
415 * Normally handler for signal is %SIG_DFL.  It can be %SIG_IGN if @sig is
416 * ignored, in which case force_sig() is about to reset it to %SIG_DFL.
417 * When this returns zero, this signal might cause a quick termination
418 * that does not give the debugger a chance to intercept the signal.
419 *
420 * Called with or without @task->sighand->siglock held.
421 */
422static inline int tracehook_consider_fatal_signal(struct task_struct *task,
423						  int sig)
424{
425	return (task_ptrace(task) & PT_PTRACED) != 0;
426}
427
428/**
429 * tracehook_force_sigpending - let tracing force signal_pending(current) on
430 *
431 * Called when recomputing our signal_pending() flag.  Return nonzero
432 * to force the signal_pending() flag on, so that tracehook_get_signal()
433 * will be called before the next return to user mode.
434 *
435 * Called with @current->sighand->siglock held.
436 */
437static inline int tracehook_force_sigpending(void)
438{
439	return 0;
440}
441
442/**
443 * tracehook_get_signal - deliver synthetic signal to traced task
444 * @task:		@current
445 * @regs:		task_pt_regs(@current)
446 * @info:		details of synthetic signal
447 * @return_ka:		sigaction for synthetic signal
448 *
449 * Return zero to check for a real pending signal normally.
450 * Return -1 after releasing the siglock to repeat the check.
451 * Return a signal number to induce an artifical signal delivery,
452 * setting *@info and *@return_ka to specify its details and behavior.
453 *
454 * The @return_ka->sa_handler value controls the disposition of the
455 * signal, no matter the signal number.  For %SIG_DFL, the return value
456 * is a representative signal to indicate the behavior (e.g. %SIGTERM
457 * for death, %SIGQUIT for core dump, %SIGSTOP for job control stop,
458 * %SIGTSTP for stop unless in an orphaned pgrp), but the signal number
459 * reported will be @info->si_signo instead.
460 *
461 * Called with @task->sighand->siglock held, before dequeuing pending signals.
462 */
463static inline int tracehook_get_signal(struct task_struct *task,
464				       struct pt_regs *regs,
465				       siginfo_t *info,
466				       struct k_sigaction *return_ka)
467{
468	return 0;
469}
470
471/**
472 * tracehook_notify_jctl - report about job control stop/continue
473 * @notify:		zero, %CLD_STOPPED or %CLD_CONTINUED
474 * @why:		%CLD_STOPPED or %CLD_CONTINUED
475 *
476 * This is called when we might call do_notify_parent_cldstop().
477 *
478 * @notify is zero if we would not ordinarily send a %SIGCHLD,
479 * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD.
480 *
481 * @why is %CLD_STOPPED when about to stop for job control;
482 * we are already in %TASK_STOPPED state, about to call schedule().
483 * It might also be that we have just exited (check %PF_EXITING),
484 * but need to report that a group-wide stop is complete.
485 *
486 * @why is %CLD_CONTINUED when waking up after job control stop and
487 * ready to make a delayed @notify report.
488 *
489 * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal.
490 *
491 * Called with the siglock held.
492 */
493static inline int tracehook_notify_jctl(int notify, int why)
494{
495	return notify ?: (current->ptrace & PT_PTRACED) ? why : 0;
496}
497
498/**
499 * tracehook_finish_jctl - report about return from job control stop
500 *
501 * This is called by do_signal_stop() after wakeup.
502 */
503static inline void tracehook_finish_jctl(void)
504{
505}
506
507#define DEATH_REAP			-1
508#define DEATH_DELAYED_GROUP_LEADER	-2
509
510/**
511 * tracehook_notify_death - task is dead, ready to notify parent
512 * @task:		@current task now exiting
513 * @death_cookie:	value to pass to tracehook_report_death()
514 * @group_dead:		nonzero if this was the last thread in the group to die
515 *
516 * A return value >= 0 means call do_notify_parent() with that signal
517 * number.  Negative return value can be %DEATH_REAP to self-reap right
518 * now, or %DEATH_DELAYED_GROUP_LEADER to a zombie without notifying our
519 * parent.  Note that a return value of 0 means a do_notify_parent() call
520 * that sends no signal, but still wakes up a parent blocked in wait*().
521 *
522 * Called with write_lock_irq(&tasklist_lock) held.
523 */
524static inline int tracehook_notify_death(struct task_struct *task,
525					 void **death_cookie, int group_dead)
526{
527	if (task_detached(task))
528		return task->ptrace ? SIGCHLD : DEATH_REAP;
529
530	/*
531	 * If something other than our normal parent is ptracing us, then
532	 * send it a SIGCHLD instead of honoring exit_signal.  exit_signal
533	 * only has special meaning to our real parent.
534	 */
535	if (thread_group_empty(task) && !ptrace_reparented(task))
536		return task->exit_signal;
537
538	return task->ptrace ? SIGCHLD : DEATH_DELAYED_GROUP_LEADER;
539}
540
541/**
542 * tracehook_report_death - task is dead and ready to be reaped
543 * @task:		@current task now exiting
544 * @signal:		return value from tracheook_notify_death()
545 * @death_cookie:	value passed back from tracehook_notify_death()
546 * @group_dead:		nonzero if this was the last thread in the group to die
547 *
548 * Thread has just become a zombie or is about to self-reap.  If positive,
549 * @signal is the signal number just sent to the parent (usually %SIGCHLD).
550 * If @signal is %DEATH_REAP, this thread will self-reap.  If @signal is
551 * %DEATH_DELAYED_GROUP_LEADER, this is a delayed_group_leader() zombie.
552 * The @death_cookie was passed back by tracehook_notify_death().
553 *
554 * If normal reaping is not inhibited, @task->exit_state might be changing
555 * in parallel.
556 *
557 * Called without locks.
558 */
559static inline void tracehook_report_death(struct task_struct *task,
560					  int signal, void *death_cookie,
561					  int group_dead)
562{
563}
564
565#ifdef TIF_NOTIFY_RESUME
566/**
567 * set_notify_resume - cause tracehook_notify_resume() to be called
568 * @task:		task that will call tracehook_notify_resume()
569 *
570 * Calling this arranges that @task will call tracehook_notify_resume()
571 * before returning to user mode.  If it's already running in user mode,
572 * it will enter the kernel and call tracehook_notify_resume() soon.
573 * If it's blocked, it will not be woken.
574 */
575static inline void set_notify_resume(struct task_struct *task)
576{
577	if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME))
578		kick_process(task);
579}
580
581/**
582 * tracehook_notify_resume - report when about to return to user mode
583 * @regs:		user-mode registers of @current task
584 *
585 * This is called when %TIF_NOTIFY_RESUME has been set.  Now we are
586 * about to return to user mode, and the user state in @regs can be
587 * inspected or adjusted.  The caller in arch code has cleared
588 * %TIF_NOTIFY_RESUME before the call.  If the flag gets set again
589 * asynchronously, this will be called again before we return to
590 * user mode.
591 *
592 * Called without locks.
593 */
594static inline void tracehook_notify_resume(struct pt_regs *regs)
595{
596}
597#endif	/* TIF_NOTIFY_RESUME */
598
599#endif	/* <linux/tracehook.h> */
600