1/*
2 * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*  sysctl interface for paramters from user-land */
30
31#include <sys/param.h>
32#include <sys/mman.h>
33#include <sys/stat.h>
34#include <sys/sysctl.h>
35#include <sys/kauth.h>
36#include <libkern/libkern.h>
37#include <kern/debug.h>
38#include <pexpert/pexpert.h>
39
40#include <kperf/context.h>
41#include <kperf/action.h>
42#include <kperf/timetrigger.h>
43#include <kperf/pet.h>
44#include <kperf/kperfbsd.h>
45#include <kperf/kperf.h>
46
47
48/* a pid which is allowed to control kperf without requiring root access */
49static pid_t blessed_pid = -1;
50static boolean_t blessed_preempt = FALSE;
51
52/* IDs for dispatch from SYSCTL macros */
53#define REQ_SAMPLING        (1)
54#define REQ_ACTION_COUNT    (2)
55#define REQ_ACTION_SAMPLERS (3)
56#define REQ_TIMER_COUNT     (4)
57#define REQ_TIMER_PERIOD    (5)
58#define REQ_TIMER_PET       (6)
59#define REQ_TIMER_ACTION    (7)
60#define REQ_BLESS           (8)
61#define REQ_ACTION_USERDATA (9)
62#define REQ_ACTION_FILTER_BY_TASK (10)
63#define REQ_ACTION_FILTER_BY_PID  (11)
64#define REQ_KDBG_CALLSTACKS (12)
65#define REQ_PET_IDLE_RATE   (13)
66#define REQ_BLESS_PREEMPT   (14)
67
68/* simple state variables */
69int kperf_debug_level = 0;
70
71static lck_grp_attr_t *kperf_cfg_lckgrp_attr = NULL;
72static lck_grp_t      *kperf_cfg_lckgrp = NULL;
73static lck_mtx_t       kperf_cfg_lock;
74static boolean_t       kperf_cfg_initted = FALSE;
75
76void kdbg_swap_global_state_pid(pid_t old_pid, pid_t new_pid); /* bsd/kern/kdebug.c */
77
78/***************************
79 *
80 * lock init
81 *
82 ***************************/
83
84void
85kperf_bootstrap(void)
86{
87	kperf_cfg_lckgrp_attr = lck_grp_attr_alloc_init();
88	kperf_cfg_lckgrp = lck_grp_alloc_init("kperf cfg",
89                                          kperf_cfg_lckgrp_attr);
90	lck_mtx_init(&kperf_cfg_lock, kperf_cfg_lckgrp, LCK_ATTR_NULL);
91
92	kperf_cfg_initted = TRUE;
93}
94
95/***************************
96 *
97 * sysctl handlers
98 *
99 ***************************/
100
101static int
102sysctl_timer_period( __unused struct sysctl_oid *oidp, struct sysctl_req *req )
103{
104    int error = 0;
105    uint64_t inputs[2], retval;
106    unsigned timer, set = 0;
107
108    /* get 2x 64-bit words */
109    error = SYSCTL_IN( req, inputs, 2*sizeof(inputs[0]) );
110    if(error)
111	    return (error);
112
113    /* setup inputs */
114    timer = (unsigned) inputs[0];
115    if( inputs[1] != ~0ULL )
116	    set = 1;
117
118    if( set )
119    {
120	    error = kperf_timer_set_period( timer, inputs[1] );
121	    if( error )
122		    return error;
123    }
124
125    error = kperf_timer_get_period(timer, &retval);
126    if(error)
127	    return (error);
128
129    inputs[1] = retval;
130
131    if( error == 0 )
132	    error = SYSCTL_OUT( req, inputs, 2*sizeof(inputs[0]) );
133
134    return error;
135}
136
137static int
138sysctl_timer_action( __unused struct sysctl_oid *oidp, struct sysctl_req *req )
139{
140    int error = 0;
141    uint64_t inputs[2];
142    uint32_t retval;
143    unsigned timer, set = 0;
144
145    /* get 2x 64-bit words */
146    error = SYSCTL_IN( req, inputs, 2*sizeof(inputs[0]) );
147    if(error)
148	    return (error);
149
150    /* setup inputs */
151    timer = (unsigned) inputs[0];
152    if( inputs[1] != ~0ULL )
153	    set = 1;
154
155    if( set )
156    {
157	    error = kperf_timer_set_action( timer, inputs[1] );
158	    if( error )
159		    return error;
160    }
161
162    error = kperf_timer_get_action(timer, &retval);
163    if(error)
164	    return (error);
165
166    inputs[1] = retval;
167
168    if( error == 0 )
169	    error = SYSCTL_OUT( req, inputs, 2*sizeof(inputs[0]) );
170
171    return error;
172}
173
174static int
175sysctl_action_samplers( __unused struct sysctl_oid *oidp,
176                        struct sysctl_req *req )
177{
178    int error = 0;
179    uint64_t inputs[3];
180    uint32_t retval;
181    unsigned actionid, set = 0;
182
183    /* get 3x 64-bit words */
184    error = SYSCTL_IN( req, inputs, 3*sizeof(inputs[0]) );
185    if(error)
186	    return (error);
187
188    /* setup inputs */
189    set = (unsigned) inputs[0];
190    actionid = (unsigned) inputs[1];
191
192    if( set )
193    {
194	    error = kperf_action_set_samplers( actionid, inputs[2] );
195	    if( error )
196		    return error;
197    }
198
199    error = kperf_action_get_samplers(actionid, &retval);
200    if(error)
201	    return (error);
202
203    inputs[2] = retval;
204
205    if( error == 0 )
206	    error = SYSCTL_OUT( req, inputs, 3*sizeof(inputs[0]) );
207
208    return error;
209}
210
211static int
212sysctl_action_userdata( __unused struct sysctl_oid *oidp,
213                        struct sysctl_req *req )
214{
215    int error = 0;
216    uint64_t inputs[3];
217    uint32_t retval;
218    unsigned actionid, set = 0;
219
220    /* get 3x 64-bit words */
221    error = SYSCTL_IN( req, inputs, 3*sizeof(inputs[0]) );
222    if(error)
223	    return (error);
224
225    /* setup inputs */
226    set = (unsigned) inputs[0];
227    actionid = (unsigned) inputs[1];
228
229    if( set )
230    {
231	    error = kperf_action_set_userdata( actionid, inputs[2] );
232	    if( error )
233		    return error;
234    }
235
236    error = kperf_action_get_userdata(actionid, &retval);
237    if(error)
238	    return (error);
239
240    inputs[2] = retval;
241
242    if( error == 0 )
243	    error = SYSCTL_OUT( req, inputs, 3*sizeof(inputs[0]) );
244
245    return error;
246}
247
248static int
249sysctl_action_filter( __unused struct sysctl_oid *oidp,
250		      struct sysctl_req *req, int is_task_t )
251{
252    int error = 0;
253    uint64_t inputs[3];
254    int retval;
255    unsigned actionid, set = 0;
256    mach_port_name_t portname;
257    int pid;
258
259    /* get 3x 64-bit words */
260    error = SYSCTL_IN( req, inputs, 3*sizeof(inputs[0]) );
261    if(error)
262	    return (error);
263
264    /* setup inputs */
265    set = (unsigned) inputs[0];
266    actionid = (unsigned) inputs[1];
267
268    if( set )
269    {
270	    if( is_task_t )
271	    {
272		    portname = (mach_port_name_t) inputs[2];
273		    pid = kperf_port_to_pid(portname);
274	    }
275	    else
276		    pid = (int) inputs[2];
277
278	    error = kperf_action_set_filter( actionid, pid );
279	    if( error )
280		    return error;
281    }
282
283    error = kperf_action_get_filter(actionid, &retval);
284    if(error)
285	    return (error);
286
287    inputs[2] = retval;
288
289    if( error == 0 )
290	    error = SYSCTL_OUT( req, inputs, 3*sizeof(inputs[0]) );
291
292    return error;
293}
294
295static int
296sysctl_sampling( struct sysctl_oid *oidp, struct sysctl_req *req )
297{
298    int error = 0;
299    uint32_t value = 0;
300
301    /* get the old value and process it */
302    value = kperf_sampling_status();
303
304    /* copy out the old value, get the new value */
305    error = sysctl_handle_int(oidp, &value, 0, req);
306    if (error || !req->newptr)
307	    return (error);
308
309    /* if that worked, and we're writing... */
310    if( value )
311	    error = kperf_sampling_enable();
312    else
313	    error = kperf_sampling_disable();
314
315    return error;
316}
317
318static int
319sysctl_action_count( struct sysctl_oid *oidp, struct sysctl_req *req )
320{
321    int error = 0;
322    uint32_t value = 0;
323
324    /* get the old value and process it */
325    value = kperf_action_get_count();
326
327    /* copy out the old value, get the new value */
328    error = sysctl_handle_int(oidp, &value, 0, req);
329    if (error || !req->newptr)
330	    return (error);
331
332    /* if that worked, and we're writing... */
333    return kperf_action_set_count(value);
334}
335
336static int
337sysctl_timer_count( struct sysctl_oid *oidp, struct sysctl_req *req )
338{
339    int error = 0;
340    uint32_t value = 0;
341
342    /* get the old value and process it */
343    value = kperf_timer_get_count();
344
345    /* copy out the old value, get the new value */
346    error = sysctl_handle_int(oidp, &value, 0, req);
347    if (error || !req->newptr)
348	    return (error);
349
350    /* if that worked, and we're writing... */
351    return kperf_timer_set_count(value);
352}
353
354static int
355sysctl_timer_pet( struct sysctl_oid *oidp, struct sysctl_req *req )
356{
357    int error = 0;
358    uint32_t value = 0;
359
360    /* get the old value and process it */
361    value = kperf_timer_get_petid();
362
363    /* copy out the old value, get the new value */
364    error = sysctl_handle_int(oidp, &value, 0, req);
365    if (error || !req->newptr)
366	    return (error);
367
368    /* if that worked, and we're writing... */
369    return kperf_timer_set_petid(value);
370}
371
372static int
373sysctl_bless( struct sysctl_oid *oidp, struct sysctl_req *req )
374{
375    int error = 0;
376    int value = 0;
377
378    /* get the old value and process it */
379    value = blessed_pid;
380
381    /* copy out the old value, get the new value */
382    error = sysctl_handle_int(oidp, &value, 0, req);
383    if (error || !req->newptr)
384	    return (error);
385
386    /* if that worked, and we're writing... */
387    error = kperf_bless_pid(value);
388
389    return error;
390}
391
392static int
393sysctl_bless_preempt( struct sysctl_oid *oidp, struct sysctl_req *req )
394{
395    int error = 0;
396    int value = 0;
397
398    /* get the old value and process it */
399    value = blessed_preempt;
400
401    /* copy out the old value, get the new value */
402    error = sysctl_handle_int(oidp, &value, 0, req);
403    if (error || !req->newptr)
404	    return (error);
405
406    /* if that worked, and we're writing... */
407    blessed_preempt = value ? TRUE : FALSE;
408
409    return 0;
410}
411
412
413static int
414sysctl_kdbg_callstacks( struct sysctl_oid *oidp, struct sysctl_req *req )
415{
416    int error = 0;
417    int value = 0;
418
419    /* get the old value and process it */
420    value = kperf_kdbg_get_stacks();
421
422    /* copy out the old value, get the new value */
423    error = sysctl_handle_int(oidp, &value, 0, req);
424    if (error || !req->newptr)
425	    return (error);
426
427    /* if that worked, and we're writing... */
428    error = kperf_kdbg_set_stacks(value);
429
430    return error;
431}
432
433static int
434sysctl_pet_idle_rate( struct sysctl_oid *oidp, struct sysctl_req *req )
435{
436    int error = 0;
437    int value = 0;
438
439    /* get the old value and process it */
440    value = kperf_get_pet_idle_rate();
441
442    /* copy out the old value, get the new value */
443    error = sysctl_handle_int(oidp, &value, 0, req);
444    if (error || !req->newptr)
445	    return (error);
446
447    /* if that worked, and we're writing... */
448    kperf_set_pet_idle_rate(value);
449
450    return error;
451}
452
453/*
454 * #define SYSCTL_HANDLER_ARGS (struct sysctl_oid *oidp,         \
455 *                                void *arg1, int arg2,                 \
456 *                              struct sysctl_req *req )
457 */
458static int
459kperf_sysctl SYSCTL_HANDLER_ARGS
460{
461	int ret;
462
463	// __unused struct sysctl_oid *unused_oidp = oidp;
464	(void)arg2;
465
466	if ( !kperf_cfg_initted )
467		panic("kperf_bootstrap not called");
468
469	ret = kperf_access_check();
470	if (ret) {
471		return ret;
472	}
473
474	lck_mtx_lock(&kperf_cfg_lock);
475
476	/* which request */
477	switch( (uintptr_t) arg1 )
478	{
479	case REQ_ACTION_COUNT:
480		ret = sysctl_action_count( oidp, req );
481		break;
482	case REQ_ACTION_SAMPLERS:
483		ret = sysctl_action_samplers( oidp, req );
484		break;
485	case REQ_ACTION_USERDATA:
486		ret = sysctl_action_userdata( oidp, req );
487		break;
488	case REQ_TIMER_COUNT:
489		ret = sysctl_timer_count( oidp, req );
490		break;
491	case REQ_TIMER_PERIOD:
492		ret = sysctl_timer_period( oidp, req );
493		break;
494	case REQ_TIMER_PET:
495		ret = sysctl_timer_pet( oidp, req );
496		break;
497	case REQ_TIMER_ACTION:
498		ret = sysctl_timer_action( oidp, req );
499		break;
500	case REQ_SAMPLING:
501		ret = sysctl_sampling( oidp, req );
502		break;
503	case REQ_KDBG_CALLSTACKS:
504		ret = sysctl_kdbg_callstacks( oidp, req );
505		break;
506	case REQ_ACTION_FILTER_BY_TASK:
507		ret = sysctl_action_filter( oidp, req, 1 );
508		break;
509	case REQ_ACTION_FILTER_BY_PID:
510		ret = sysctl_action_filter( oidp, req, 0 );
511		break;
512	case REQ_PET_IDLE_RATE:
513		ret = sysctl_pet_idle_rate( oidp, req );
514		break;
515	case REQ_BLESS_PREEMPT:
516		ret = sysctl_bless_preempt( oidp, req );
517		break;
518	default:
519		ret = ENOENT;
520		break;
521	}
522
523	lck_mtx_unlock(&kperf_cfg_lock);
524
525	return ret;
526}
527
528static int
529kperf_sysctl_bless_handler SYSCTL_HANDLER_ARGS
530{
531	int ret;
532	// __unused struct sysctl_oid *unused_oidp = oidp;
533	(void)arg2;
534
535	if ( !kperf_cfg_initted )
536		panic("kperf_bootstrap not called");
537
538	lck_mtx_lock(&kperf_cfg_lock);
539
540	/* which request */
541	if ( (uintptr_t) arg1 == REQ_BLESS )
542		ret = sysctl_bless( oidp, req );
543	else
544		ret = ENOENT;
545
546	lck_mtx_unlock(&kperf_cfg_lock);
547
548	return ret;
549}
550
551
552/***************************
553 *
554 * Access control
555 *
556 ***************************/
557
558/* Validate whether the current process has priviledges to access
559 * kperf (and by extension, trace). Returns 0 if access is granted.
560 */
561int
562kperf_access_check(void)
563{
564	proc_t p = current_proc();
565	proc_t blessed_p;
566	int ret = 0;
567	boolean_t pid_gone = FALSE;
568
569	/* check if the pid that held the lock is gone */
570	blessed_p = proc_find(blessed_pid);
571
572	if ( blessed_p != NULL )
573		proc_rele(blessed_p);
574	else
575		pid_gone = TRUE;
576
577	if ( blessed_pid == -1 || pid_gone ) {
578		/* check for root */
579		ret = suser(kauth_cred_get(), &p->p_acflag);
580		if( !ret )
581			return ret;
582	}
583
584	/* check against blessed pid */
585	if( p->p_pid != blessed_pid )
586		return EACCES;
587
588	/* access granted. */
589	return 0;
590}
591
592/* specify a pid as being able to access kperf/trace, depiste not
593 * being root
594 */
595int
596kperf_bless_pid(pid_t newpid)
597{
598	proc_t p = NULL;
599	pid_t current_pid;
600
601	p = current_proc();
602	current_pid = p->p_pid;
603
604	/* are we allowed to preempt? */
605	if ( (newpid != -1) && (blessed_pid != -1) &&
606	     (blessed_pid != current_pid) && !blessed_preempt ) {
607		/* check if the pid that held the lock is gone */
608		p = proc_find(blessed_pid);
609
610		if ( p != NULL ) {
611			proc_rele(p);
612			return EACCES;
613		}
614	}
615
616	/* validate new pid */
617	if ( newpid != -1 ) {
618		p = proc_find(newpid);
619
620		if ( p == NULL )
621			return EINVAL;
622
623		proc_rele(p);
624	}
625
626	/* take trace facility as well */
627	kdbg_swap_global_state_pid(blessed_pid, newpid);
628
629	blessed_pid = newpid;
630	blessed_preempt = FALSE;
631
632	return 0;
633}
634
635/***************************
636 *
637 * sysctl hooks
638 *
639 ***************************/
640
641/* root kperf node */
642SYSCTL_NODE(, OID_AUTO, kperf, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
643            "kperf");
644
645/* action sub-section */
646SYSCTL_NODE(_kperf, OID_AUTO, action, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
647            "action");
648
649SYSCTL_PROC(_kperf_action, OID_AUTO, count,
650            CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
651            (void*)REQ_ACTION_COUNT,
652            sizeof(int), kperf_sysctl, "I", "Number of actions");
653
654SYSCTL_PROC(_kperf_action, OID_AUTO, samplers,
655            CTLFLAG_RW|CTLFLAG_ANYBODY,
656            (void*)REQ_ACTION_SAMPLERS,
657            3*sizeof(uint64_t), kperf_sysctl, "UQ",
658            "What to sample what a trigger fires an action");
659
660SYSCTL_PROC(_kperf_action, OID_AUTO, userdata,
661            CTLFLAG_RW|CTLFLAG_ANYBODY,
662            (void*)REQ_ACTION_USERDATA,
663            3*sizeof(uint64_t), kperf_sysctl, "UQ",
664            "User data to attribute to action");
665
666SYSCTL_PROC(_kperf_action, OID_AUTO, filter_by_task,
667            CTLFLAG_RW|CTLFLAG_ANYBODY,
668            (void*)REQ_ACTION_FILTER_BY_TASK,
669            3*sizeof(uint64_t), kperf_sysctl, "UQ",
670            "Apply a task filter to the action");
671
672SYSCTL_PROC(_kperf_action, OID_AUTO, filter_by_pid,
673            CTLFLAG_RW|CTLFLAG_ANYBODY,
674            (void*)REQ_ACTION_FILTER_BY_PID,
675            3*sizeof(uint64_t), kperf_sysctl, "UQ",
676            "Apply a pid filter to the action");
677
678/* timer sub-section */
679SYSCTL_NODE(_kperf, OID_AUTO, timer, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
680            "timer");
681
682SYSCTL_PROC(_kperf_timer, OID_AUTO, count,
683            CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
684            (void*)REQ_TIMER_COUNT,
685            sizeof(int), kperf_sysctl, "I", "Number of time triggers");
686
687SYSCTL_PROC(_kperf_timer, OID_AUTO, period,
688            CTLFLAG_RW|CTLFLAG_ANYBODY,
689            (void*)REQ_TIMER_PERIOD,
690            2*sizeof(uint64_t), kperf_sysctl, "UQ", "Timer number and period");
691
692SYSCTL_PROC(_kperf_timer, OID_AUTO, action,
693            CTLFLAG_RW|CTLFLAG_ANYBODY,
694            (void*)REQ_TIMER_ACTION,
695            2*sizeof(uint64_t), kperf_sysctl, "UQ", "Timer number and actionid");
696
697SYSCTL_PROC(_kperf_timer, OID_AUTO, pet_timer,
698            CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
699            (void*)REQ_TIMER_PET,
700            sizeof(int), kperf_sysctl, "I", "Which timer ID does PET");
701
702/* misc */
703SYSCTL_PROC(_kperf, OID_AUTO, sampling,
704            CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
705            (void*)REQ_SAMPLING,
706            sizeof(int), kperf_sysctl, "I", "Sampling running");
707
708SYSCTL_PROC(_kperf, OID_AUTO, blessed_pid,
709            CTLTYPE_INT|CTLFLAG_RW, /* must be root */
710            (void*)REQ_BLESS,
711            sizeof(int), kperf_sysctl_bless_handler, "I", "Blessed pid");
712
713SYSCTL_PROC(_kperf, OID_AUTO, blessed_preempt,
714            CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
715            (void*)REQ_BLESS_PREEMPT,
716            sizeof(int), kperf_sysctl, "I", "Blessed preemption");
717
718
719SYSCTL_PROC(_kperf, OID_AUTO, kdbg_callstacks,
720            CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
721            (void*)REQ_KDBG_CALLSTACKS,
722            sizeof(int), kperf_sysctl, "I", "Generate kdbg callstacks");
723
724SYSCTL_INT(_kperf, OID_AUTO, kdbg_cswitch,
725           CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
726           &kperf_cswitch_hook, 0, "Generate context switch info");
727
728SYSCTL_PROC(_kperf, OID_AUTO, pet_idle_rate,
729            CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
730            (void*)REQ_PET_IDLE_RATE,
731            sizeof(int), kperf_sysctl, "I", "Rate at which unscheduled threads are forced to be sampled in PET mode");
732
733/* debug */
734SYSCTL_INT(_kperf, OID_AUTO, debug_level, CTLFLAG_RW,
735           &kperf_debug_level, 0, "debug level");
736
737