1/*-
2 * Copyright (c) 2005-2007 Joseph Koshy
3 * Copyright (c) 2007 The FreeBSD Foundation
4 * All rights reserved.
5 *
6 * Portions of this software were developed by A. Joseph Koshy under
7 * sponsorship from the FreeBSD Foundation and Google, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 */
31
32/*
33 * Logging code for hwpmc(4)
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD$");
38
39#include <sys/param.h>
40#if (__FreeBSD_version >= 1100000)
41#include <sys/capsicum.h>
42#else
43#include <sys/capability.h>
44#endif
45#include <sys/file.h>
46#include <sys/kernel.h>
47#include <sys/kthread.h>
48#include <sys/lock.h>
49#include <sys/module.h>
50#include <sys/mutex.h>
51#include <sys/pmc.h>
52#include <sys/pmckern.h>
53#include <sys/pmclog.h>
54#include <sys/proc.h>
55#include <sys/signalvar.h>
56#include <sys/sysctl.h>
57#include <sys/systm.h>
58#include <sys/uio.h>
59#include <sys/unistd.h>
60#include <sys/vnode.h>
61
62/*
63 * Sysctl tunables
64 */
65
66SYSCTL_DECL(_kern_hwpmc);
67
68/*
69 * kern.hwpmc.logbuffersize -- size of the per-cpu owner buffers.
70 */
71
72static int pmclog_buffer_size = PMC_LOG_BUFFER_SIZE;
73SYSCTL_INT(_kern_hwpmc, OID_AUTO, logbuffersize, CTLFLAG_RDTUN,
74    &pmclog_buffer_size, 0, "size of log buffers in kilobytes");
75
76/*
77 * kern.hwpmc.nbuffer -- number of global log buffers
78 */
79
80static int pmc_nlogbuffers = PMC_NLOGBUFFERS;
81SYSCTL_INT(_kern_hwpmc, OID_AUTO, nbuffers, CTLFLAG_RDTUN,
82    &pmc_nlogbuffers, 0, "number of global log buffers");
83
84/*
85 * Global log buffer list and associated spin lock.
86 */
87
88TAILQ_HEAD(, pmclog_buffer) pmc_bufferlist =
89	TAILQ_HEAD_INITIALIZER(pmc_bufferlist);
90static struct mtx pmc_bufferlist_mtx;	/* spin lock */
91static struct mtx pmc_kthread_mtx;	/* sleep lock */
92
93#define	PMCLOG_INIT_BUFFER_DESCRIPTOR(D) do {				\
94		const int __roundup = roundup(sizeof(*D),		\
95			sizeof(uint32_t));				\
96		(D)->plb_fence = ((char *) (D)) +			\
97			 1024*pmclog_buffer_size;			\
98		(D)->plb_base  = (D)->plb_ptr = ((char *) (D)) +	\
99			__roundup;					\
100	} while (0)
101
102
103/*
104 * Log file record constructors.
105 */
106#define	_PMCLOG_TO_HEADER(T,L)						\
107	((PMCLOG_HEADER_MAGIC << 24) |					\
108	 (PMCLOG_TYPE_ ## T << 16)   |					\
109	 ((L) & 0xFFFF))
110
111/* reserve LEN bytes of space and initialize the entry header */
112#define	_PMCLOG_RESERVE(PO,TYPE,LEN,ACTION) do {			\
113		uint32_t *_le;						\
114		int _len = roundup((LEN), sizeof(uint32_t));		\
115		if ((_le = pmclog_reserve((PO), _len)) == NULL) {	\
116			ACTION;						\
117		}							\
118		*_le = _PMCLOG_TO_HEADER(TYPE,_len);			\
119		_le += 3	/* skip over timestamp */
120
121#define	PMCLOG_RESERVE(P,T,L)		_PMCLOG_RESERVE(P,T,L,return)
122#define	PMCLOG_RESERVE_WITH_ERROR(P,T,L) _PMCLOG_RESERVE(P,T,L,		\
123	error=ENOMEM;goto error)
124
125#define	PMCLOG_EMIT32(V)	do { *_le++ = (V); } while (0)
126#define	PMCLOG_EMIT64(V)	do { 					\
127		*_le++ = (uint32_t) ((V) & 0xFFFFFFFF);			\
128		*_le++ = (uint32_t) (((V) >> 32) & 0xFFFFFFFF);		\
129	} while (0)
130
131
132/* Emit a string.  Caution: does NOT update _le, so needs to be last */
133#define	PMCLOG_EMITSTRING(S,L)	do { bcopy((S), _le, (L)); } while (0)
134#define	PMCLOG_EMITNULLSTRING(L) do { bzero(_le, (L)); } while (0)
135
136#define	PMCLOG_DESPATCH(PO)						\
137		pmclog_release((PO));					\
138	} while (0)
139
140
141/*
142 * Assertions about the log file format.
143 */
144
145CTASSERT(sizeof(struct pmclog_callchain) == 6*4 +
146    PMC_CALLCHAIN_DEPTH_MAX*sizeof(uintfptr_t));
147CTASSERT(sizeof(struct pmclog_closelog) == 3*4);
148CTASSERT(sizeof(struct pmclog_dropnotify) == 3*4);
149CTASSERT(sizeof(struct pmclog_map_in) == PATH_MAX +
150    4*4 + sizeof(uintfptr_t));
151CTASSERT(offsetof(struct pmclog_map_in,pl_pathname) ==
152    4*4 + sizeof(uintfptr_t));
153CTASSERT(sizeof(struct pmclog_map_out) == 4*4 + 2*sizeof(uintfptr_t));
154CTASSERT(sizeof(struct pmclog_pcsample) == 6*4 + sizeof(uintfptr_t));
155CTASSERT(sizeof(struct pmclog_pmcallocate) == 6*4);
156CTASSERT(sizeof(struct pmclog_pmcattach) == 5*4 + PATH_MAX);
157CTASSERT(offsetof(struct pmclog_pmcattach,pl_pathname) == 5*4);
158CTASSERT(sizeof(struct pmclog_pmcdetach) == 5*4);
159CTASSERT(sizeof(struct pmclog_proccsw) == 5*4 + 8);
160CTASSERT(sizeof(struct pmclog_procexec) == 5*4 + PATH_MAX +
161    sizeof(uintfptr_t));
162CTASSERT(offsetof(struct pmclog_procexec,pl_pathname) == 5*4 +
163    sizeof(uintfptr_t));
164CTASSERT(sizeof(struct pmclog_procexit) == 5*4 + 8);
165CTASSERT(sizeof(struct pmclog_procfork) == 5*4);
166CTASSERT(sizeof(struct pmclog_sysexit) == 4*4);
167CTASSERT(sizeof(struct pmclog_userdata) == 4*4);
168
169/*
170 * Log buffer structure
171 */
172
173struct pmclog_buffer {
174	TAILQ_ENTRY(pmclog_buffer) plb_next;
175	char 		*plb_base;
176	char		*plb_ptr;
177	char 		*plb_fence;
178};
179
180/*
181 * Prototypes
182 */
183
184static int pmclog_get_buffer(struct pmc_owner *po);
185static void pmclog_loop(void *arg);
186static void pmclog_release(struct pmc_owner *po);
187static uint32_t *pmclog_reserve(struct pmc_owner *po, int length);
188static void pmclog_schedule_io(struct pmc_owner *po);
189static void pmclog_stop_kthread(struct pmc_owner *po);
190
191/*
192 * Helper functions
193 */
194
195/*
196 * Get a log buffer
197 */
198
199static int
200pmclog_get_buffer(struct pmc_owner *po)
201{
202	struct pmclog_buffer *plb;
203
204	mtx_assert(&po->po_mtx, MA_OWNED);
205
206	KASSERT(po->po_curbuf == NULL,
207	    ("[pmclog,%d] po=%p current buffer still valid", __LINE__, po));
208
209	mtx_lock_spin(&pmc_bufferlist_mtx);
210	if ((plb = TAILQ_FIRST(&pmc_bufferlist)) != NULL)
211		TAILQ_REMOVE(&pmc_bufferlist, plb, plb_next);
212	mtx_unlock_spin(&pmc_bufferlist_mtx);
213
214	PMCDBG2(LOG,GTB,1, "po=%p plb=%p", po, plb);
215
216#ifdef	HWPMC_DEBUG
217	if (plb)
218		KASSERT(plb->plb_ptr == plb->plb_base &&
219		    plb->plb_base < plb->plb_fence,
220		    ("[pmclog,%d] po=%p buffer invariants: ptr=%p "
221		    "base=%p fence=%p", __LINE__, po, plb->plb_ptr,
222		    plb->plb_base, plb->plb_fence));
223#endif
224
225	po->po_curbuf = plb;
226
227	/* update stats */
228	atomic_add_int(&pmc_stats.pm_buffer_requests, 1);
229	if (plb == NULL)
230		atomic_add_int(&pmc_stats.pm_buffer_requests_failed, 1);
231
232	return (plb ? 0 : ENOMEM);
233}
234
235/*
236 * Log handler loop.
237 *
238 * This function is executed by each pmc owner's helper thread.
239 */
240
241static void
242pmclog_loop(void *arg)
243{
244	int error;
245	struct pmc_owner *po;
246	struct pmclog_buffer *lb;
247	struct proc *p;
248	struct ucred *ownercred;
249	struct ucred *mycred;
250	struct thread *td;
251	struct uio auio;
252	struct iovec aiov;
253	size_t nbytes;
254
255	po = (struct pmc_owner *) arg;
256	p = po->po_owner;
257	td = curthread;
258	mycred = td->td_ucred;
259
260	PROC_LOCK(p);
261	ownercred = crhold(p->p_ucred);
262	PROC_UNLOCK(p);
263
264	PMCDBG2(LOG,INI,1, "po=%p kt=%p", po, po->po_kthread);
265	KASSERT(po->po_kthread == curthread->td_proc,
266	    ("[pmclog,%d] proc mismatch po=%p po/kt=%p curproc=%p", __LINE__,
267		po, po->po_kthread, curthread->td_proc));
268
269	lb = NULL;
270
271
272	/*
273	 * Loop waiting for I/O requests to be added to the owner
274	 * struct's queue.  The loop is exited when the log file
275	 * is deconfigured.
276	 */
277
278	mtx_lock(&pmc_kthread_mtx);
279
280	for (;;) {
281
282		/* check if we've been asked to exit */
283		if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
284			break;
285
286		if (lb == NULL) { /* look for a fresh buffer to write */
287			mtx_lock_spin(&po->po_mtx);
288			if ((lb = TAILQ_FIRST(&po->po_logbuffers)) == NULL) {
289				mtx_unlock_spin(&po->po_mtx);
290
291				/* No more buffers and shutdown required. */
292				if (po->po_flags & PMC_PO_SHUTDOWN) {
293					mtx_unlock(&pmc_kthread_mtx);
294					/*
295			 		 * Close the file to get PMCLOG_EOF
296					 * error in pmclog(3).
297					 */
298					fo_close(po->po_file, curthread);
299					mtx_lock(&pmc_kthread_mtx);
300					break;
301				}
302
303				(void) msleep(po, &pmc_kthread_mtx, PWAIT,
304				    "pmcloop", 0);
305				continue;
306			}
307
308			TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next);
309			mtx_unlock_spin(&po->po_mtx);
310		}
311
312		mtx_unlock(&pmc_kthread_mtx);
313
314		/* process the request */
315		PMCDBG3(LOG,WRI,2, "po=%p base=%p ptr=%p", po,
316		    lb->plb_base, lb->plb_ptr);
317		/* change our thread's credentials before issuing the I/O */
318
319		aiov.iov_base = lb->plb_base;
320		aiov.iov_len  = nbytes = lb->plb_ptr - lb->plb_base;
321
322		auio.uio_iov    = &aiov;
323		auio.uio_iovcnt = 1;
324		auio.uio_offset = -1;
325		auio.uio_resid  = nbytes;
326		auio.uio_rw     = UIO_WRITE;
327		auio.uio_segflg = UIO_SYSSPACE;
328		auio.uio_td     = td;
329
330		/* switch thread credentials -- see kern_ktrace.c */
331		td->td_ucred = ownercred;
332		error = fo_write(po->po_file, &auio, ownercred, 0, td);
333		td->td_ucred = mycred;
334
335		if (error) {
336			/* XXX some errors are recoverable */
337			/* send a SIGIO to the owner and exit */
338			PROC_LOCK(p);
339			kern_psignal(p, SIGIO);
340			PROC_UNLOCK(p);
341
342			mtx_lock(&pmc_kthread_mtx);
343
344			po->po_error = error; /* save for flush log */
345
346			PMCDBG2(LOG,WRI,2, "po=%p error=%d", po, error);
347
348			break;
349		}
350
351		mtx_lock(&pmc_kthread_mtx);
352
353		/* put the used buffer back into the global pool */
354		PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
355
356		mtx_lock_spin(&pmc_bufferlist_mtx);
357		TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
358		mtx_unlock_spin(&pmc_bufferlist_mtx);
359
360		lb = NULL;
361	}
362
363	wakeup_one(po->po_kthread);
364	po->po_kthread = NULL;
365
366	mtx_unlock(&pmc_kthread_mtx);
367
368	/* return the current I/O buffer to the global pool */
369	if (lb) {
370		PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
371
372		mtx_lock_spin(&pmc_bufferlist_mtx);
373		TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
374		mtx_unlock_spin(&pmc_bufferlist_mtx);
375	}
376
377	/*
378	 * Exit this thread, signalling the waiter
379	 */
380
381	crfree(ownercred);
382
383	kproc_exit(0);
384}
385
386/*
387 * Release and log entry and schedule an I/O if needed.
388 */
389
390static void
391pmclog_release(struct pmc_owner *po)
392{
393	KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base,
394	    ("[pmclog,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__,
395		po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base));
396	KASSERT(po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
397	    ("[pmclog,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__,
398		po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_fence));
399
400	/* schedule an I/O if we've filled a buffer */
401	if (po->po_curbuf->plb_ptr >= po->po_curbuf->plb_fence)
402		pmclog_schedule_io(po);
403
404	mtx_unlock_spin(&po->po_mtx);
405
406	PMCDBG1(LOG,REL,1, "po=%p", po);
407}
408
409
410/*
411 * Attempt to reserve 'length' bytes of space in an owner's log
412 * buffer.  The function returns a pointer to 'length' bytes of space
413 * if there was enough space or returns NULL if no space was
414 * available.  Non-null returns do so with the po mutex locked.  The
415 * caller must invoke pmclog_release() on the pmc owner structure
416 * when done.
417 */
418
419static uint32_t *
420pmclog_reserve(struct pmc_owner *po, int length)
421{
422	uintptr_t newptr, oldptr;
423	uint32_t *lh;
424	struct timespec ts;
425
426	PMCDBG2(LOG,ALL,1, "po=%p len=%d", po, length);
427
428	KASSERT(length % sizeof(uint32_t) == 0,
429	    ("[pmclog,%d] length not a multiple of word size", __LINE__));
430
431	mtx_lock_spin(&po->po_mtx);
432
433	/* No more data when shutdown in progress. */
434	if (po->po_flags & PMC_PO_SHUTDOWN) {
435		mtx_unlock_spin(&po->po_mtx);
436		return (NULL);
437	}
438
439	if (po->po_curbuf == NULL)
440		if (pmclog_get_buffer(po) != 0) {
441			mtx_unlock_spin(&po->po_mtx);
442			return (NULL);
443		}
444
445	KASSERT(po->po_curbuf != NULL,
446	    ("[pmclog,%d] po=%p no current buffer", __LINE__, po));
447
448	KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base &&
449	    po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
450	    ("[pmclog,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p",
451		__LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base,
452		po->po_curbuf->plb_fence));
453
454	oldptr = (uintptr_t) po->po_curbuf->plb_ptr;
455	newptr = oldptr + length;
456
457	KASSERT(oldptr != (uintptr_t) NULL,
458	    ("[pmclog,%d] po=%p Null log buffer pointer", __LINE__, po));
459
460	/*
461	 * If we have space in the current buffer, return a pointer to
462	 * available space with the PO structure locked.
463	 */
464	if (newptr <= (uintptr_t) po->po_curbuf->plb_fence) {
465		po->po_curbuf->plb_ptr = (char *) newptr;
466		goto done;
467	}
468
469	/*
470	 * Otherwise, schedule the current buffer for output and get a
471	 * fresh buffer.
472	 */
473	pmclog_schedule_io(po);
474
475	if (pmclog_get_buffer(po) != 0) {
476		mtx_unlock_spin(&po->po_mtx);
477		return (NULL);
478	}
479
480	KASSERT(po->po_curbuf != NULL,
481	    ("[pmclog,%d] po=%p no current buffer", __LINE__, po));
482
483	KASSERT(po->po_curbuf->plb_ptr != NULL,
484	    ("[pmclog,%d] null return from pmc_get_log_buffer", __LINE__));
485
486	KASSERT(po->po_curbuf->plb_ptr == po->po_curbuf->plb_base &&
487	    po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
488	    ("[pmclog,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p",
489		__LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base,
490		po->po_curbuf->plb_fence));
491
492	oldptr = (uintptr_t) po->po_curbuf->plb_ptr;
493
494 done:
495	lh = (uint32_t *) oldptr;
496	lh++;				/* skip header */
497	getnanotime(&ts);		/* fill in the timestamp */
498	*lh++ = ts.tv_sec & 0xFFFFFFFF;
499	*lh++ = ts.tv_nsec & 0xFFFFFFF;
500	return ((uint32_t *) oldptr);
501}
502
503/*
504 * Schedule an I/O.
505 *
506 * Transfer the current buffer to the helper kthread.
507 */
508
509static void
510pmclog_schedule_io(struct pmc_owner *po)
511{
512	KASSERT(po->po_curbuf != NULL,
513	    ("[pmclog,%d] schedule_io with null buffer po=%p", __LINE__, po));
514
515	KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base,
516	    ("[pmclog,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__,
517		po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base));
518	KASSERT(po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
519	    ("[pmclog,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__,
520		po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_fence));
521
522	PMCDBG1(LOG,SIO, 1, "po=%p", po);
523
524	mtx_assert(&po->po_mtx, MA_OWNED);
525
526	/*
527	 * Add the current buffer to the tail of the buffer list and
528	 * wakeup the helper.
529	 */
530	TAILQ_INSERT_TAIL(&po->po_logbuffers, po->po_curbuf, plb_next);
531	po->po_curbuf = NULL;
532	wakeup_one(po);
533}
534
535/*
536 * Stop the helper kthread.
537 */
538
539static void
540pmclog_stop_kthread(struct pmc_owner *po)
541{
542	/*
543	 * Close the file to force the thread out of fo_write,
544	 * unset flag, wakeup the helper thread,
545	 * wait for it to exit
546	 */
547
548	if (po->po_file != NULL)
549		fo_close(po->po_file, curthread);
550
551	mtx_lock(&pmc_kthread_mtx);
552	po->po_flags &= ~PMC_PO_OWNS_LOGFILE;
553	wakeup_one(po);
554	if (po->po_kthread)
555		msleep(po->po_kthread, &pmc_kthread_mtx, PPAUSE, "pmckstp", 0);
556	mtx_unlock(&pmc_kthread_mtx);
557}
558
559/*
560 * Public functions
561 */
562
563/*
564 * Configure a log file for pmc owner 'po'.
565 *
566 * Parameter 'logfd' is a file handle referencing an open file in the
567 * owner process.  This file needs to have been opened for writing.
568 */
569
570int
571pmclog_configure_log(struct pmc_mdep *md, struct pmc_owner *po, int logfd)
572{
573	int error;
574	struct proc *p;
575	cap_rights_t rights;
576	/*
577	 * As long as it is possible to get a LOR between pmc_sx lock and
578	 * proctree/allproc sx locks used for adding a new process, assure
579	 * the former is not held here.
580	 */
581	sx_assert(&pmc_sx, SA_UNLOCKED);
582	PMCDBG2(LOG,CFG,1, "config po=%p logfd=%d", po, logfd);
583
584	p = po->po_owner;
585
586	/* return EBUSY if a log file was already present */
587	if (po->po_flags & PMC_PO_OWNS_LOGFILE)
588		return (EBUSY);
589
590	KASSERT(po->po_kthread == NULL,
591	    ("[pmclog,%d] po=%p kthread (%p) already present", __LINE__, po,
592		po->po_kthread));
593	KASSERT(po->po_file == NULL,
594	    ("[pmclog,%d] po=%p file (%p) already present", __LINE__, po,
595		po->po_file));
596
597	/* get a reference to the file state */
598	error = fget_write(curthread, logfd,
599	    cap_rights_init(&rights, CAP_WRITE), &po->po_file);
600	if (error)
601		goto error;
602
603	/* mark process as owning a log file */
604	po->po_flags |= PMC_PO_OWNS_LOGFILE;
605	error = kproc_create(pmclog_loop, po, &po->po_kthread,
606	    RFHIGHPID, 0, "hwpmc: proc(%d)", p->p_pid);
607	if (error)
608		goto error;
609
610	/* mark process as using HWPMCs */
611	PROC_LOCK(p);
612	p->p_flag |= P_HWPMC;
613	PROC_UNLOCK(p);
614
615	/* create a log initialization entry */
616	PMCLOG_RESERVE_WITH_ERROR(po, INITIALIZE,
617	    sizeof(struct pmclog_initialize));
618	PMCLOG_EMIT32(PMC_VERSION);
619	PMCLOG_EMIT32(md->pmd_cputype);
620	PMCLOG_DESPATCH(po);
621
622	return (0);
623
624 error:
625	/* shutdown the thread */
626	if (po->po_kthread)
627		pmclog_stop_kthread(po);
628
629	KASSERT(po->po_kthread == NULL, ("[pmclog,%d] po=%p kthread not "
630	    "stopped", __LINE__, po));
631
632	if (po->po_file)
633		(void) fdrop(po->po_file, curthread);
634	po->po_file  = NULL;	/* clear file and error state */
635	po->po_error = 0;
636
637	return (error);
638}
639
640
641/*
642 * De-configure a log file.  This will throw away any buffers queued
643 * for this owner process.
644 */
645
646int
647pmclog_deconfigure_log(struct pmc_owner *po)
648{
649	int error;
650	struct pmclog_buffer *lb;
651
652	PMCDBG1(LOG,CFG,1, "de-config po=%p", po);
653
654	if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
655		return (EINVAL);
656
657	KASSERT(po->po_sscount == 0,
658	    ("[pmclog,%d] po=%p still owning SS PMCs", __LINE__, po));
659	KASSERT(po->po_file != NULL,
660	    ("[pmclog,%d] po=%p no log file", __LINE__, po));
661
662	/* stop the kthread, this will reset the 'OWNS_LOGFILE' flag */
663	pmclog_stop_kthread(po);
664
665	KASSERT(po->po_kthread == NULL,
666	    ("[pmclog,%d] po=%p kthread not stopped", __LINE__, po));
667
668	/* return all queued log buffers to the global pool */
669	while ((lb = TAILQ_FIRST(&po->po_logbuffers)) != NULL) {
670		TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next);
671		PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
672		mtx_lock_spin(&pmc_bufferlist_mtx);
673		TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
674		mtx_unlock_spin(&pmc_bufferlist_mtx);
675	}
676
677	/* return the 'current' buffer to the global pool */
678	if ((lb = po->po_curbuf) != NULL) {
679		PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
680		mtx_lock_spin(&pmc_bufferlist_mtx);
681		TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
682		mtx_unlock_spin(&pmc_bufferlist_mtx);
683	}
684
685	/* drop a reference to the fd */
686	error = fdrop(po->po_file, curthread);
687	po->po_file  = NULL;
688	po->po_error = 0;
689
690	return (error);
691}
692
693/*
694 * Flush a process' log buffer.
695 */
696
697int
698pmclog_flush(struct pmc_owner *po)
699{
700	int error;
701	struct pmclog_buffer *lb;
702
703	PMCDBG1(LOG,FLS,1, "po=%p", po);
704
705	/*
706	 * If there is a pending error recorded by the logger thread,
707	 * return that.
708	 */
709	if (po->po_error)
710		return (po->po_error);
711
712	error = 0;
713
714	/*
715	 * Check that we do have an active log file.
716	 */
717	mtx_lock(&pmc_kthread_mtx);
718	if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) {
719		error = EINVAL;
720		goto error;
721	}
722
723	/*
724	 * Schedule the current buffer if any and not empty.
725	 */
726	mtx_lock_spin(&po->po_mtx);
727	lb = po->po_curbuf;
728	if (lb && lb->plb_ptr != lb->plb_base) {
729		pmclog_schedule_io(po);
730	} else
731		error = ENOBUFS;
732	mtx_unlock_spin(&po->po_mtx);
733
734 error:
735	mtx_unlock(&pmc_kthread_mtx);
736
737	return (error);
738}
739
740int
741pmclog_close(struct pmc_owner *po)
742{
743
744	PMCDBG1(LOG,CLO,1, "po=%p", po);
745
746	mtx_lock(&pmc_kthread_mtx);
747
748	/*
749	 * Schedule the current buffer.
750	 */
751	mtx_lock_spin(&po->po_mtx);
752	if (po->po_curbuf)
753		pmclog_schedule_io(po);
754	else
755		wakeup_one(po);
756	mtx_unlock_spin(&po->po_mtx);
757
758	/*
759	 * Initiate shutdown: no new data queued,
760	 * thread will close file on last block.
761	 */
762	po->po_flags |= PMC_PO_SHUTDOWN;
763
764	mtx_unlock(&pmc_kthread_mtx);
765
766	return (0);
767}
768
769void
770pmclog_process_callchain(struct pmc *pm, struct pmc_sample *ps)
771{
772	int n, recordlen;
773	uint32_t flags;
774	struct pmc_owner *po;
775
776	PMCDBG3(LOG,SAM,1,"pm=%p pid=%d n=%d", pm, ps->ps_pid,
777	    ps->ps_nsamples);
778
779	recordlen = offsetof(struct pmclog_callchain, pl_pc) +
780	    ps->ps_nsamples * sizeof(uintfptr_t);
781	po = pm->pm_owner;
782	flags = PMC_CALLCHAIN_TO_CPUFLAGS(ps->ps_cpu,ps->ps_flags);
783	PMCLOG_RESERVE(po, CALLCHAIN, recordlen);
784	PMCLOG_EMIT32(ps->ps_pid);
785	PMCLOG_EMIT32(pm->pm_id);
786	PMCLOG_EMIT32(flags);
787	for (n = 0; n < ps->ps_nsamples; n++)
788		PMCLOG_EMITADDR(ps->ps_pc[n]);
789	PMCLOG_DESPATCH(po);
790}
791
792void
793pmclog_process_closelog(struct pmc_owner *po)
794{
795	PMCLOG_RESERVE(po,CLOSELOG,sizeof(struct pmclog_closelog));
796	PMCLOG_DESPATCH(po);
797}
798
799void
800pmclog_process_dropnotify(struct pmc_owner *po)
801{
802	PMCLOG_RESERVE(po,DROPNOTIFY,sizeof(struct pmclog_dropnotify));
803	PMCLOG_DESPATCH(po);
804}
805
806void
807pmclog_process_map_in(struct pmc_owner *po, pid_t pid, uintfptr_t start,
808    const char *path)
809{
810	int pathlen, recordlen;
811
812	KASSERT(path != NULL, ("[pmclog,%d] map-in, null path", __LINE__));
813
814	pathlen = strlen(path) + 1;	/* #bytes for path name */
815	recordlen = offsetof(struct pmclog_map_in, pl_pathname) +
816	    pathlen;
817
818	PMCLOG_RESERVE(po, MAP_IN, recordlen);
819	PMCLOG_EMIT32(pid);
820	PMCLOG_EMITADDR(start);
821	PMCLOG_EMITSTRING(path,pathlen);
822	PMCLOG_DESPATCH(po);
823}
824
825void
826pmclog_process_map_out(struct pmc_owner *po, pid_t pid, uintfptr_t start,
827    uintfptr_t end)
828{
829	KASSERT(start <= end, ("[pmclog,%d] start > end", __LINE__));
830
831	PMCLOG_RESERVE(po, MAP_OUT, sizeof(struct pmclog_map_out));
832	PMCLOG_EMIT32(pid);
833	PMCLOG_EMITADDR(start);
834	PMCLOG_EMITADDR(end);
835	PMCLOG_DESPATCH(po);
836}
837
838void
839pmclog_process_pmcallocate(struct pmc *pm)
840{
841	struct pmc_owner *po;
842	struct pmc_soft *ps;
843
844	po = pm->pm_owner;
845
846	PMCDBG1(LOG,ALL,1, "pm=%p", pm);
847
848	if (PMC_TO_CLASS(pm) == PMC_CLASS_SOFT) {
849		PMCLOG_RESERVE(po, PMCALLOCATEDYN,
850		    sizeof(struct pmclog_pmcallocatedyn));
851		PMCLOG_EMIT32(pm->pm_id);
852		PMCLOG_EMIT32(pm->pm_event);
853		PMCLOG_EMIT32(pm->pm_flags);
854		ps = pmc_soft_ev_acquire(pm->pm_event);
855		if (ps != NULL)
856			PMCLOG_EMITSTRING(ps->ps_ev.pm_ev_name,PMC_NAME_MAX);
857		else
858			PMCLOG_EMITNULLSTRING(PMC_NAME_MAX);
859		pmc_soft_ev_release(ps);
860		PMCLOG_DESPATCH(po);
861	} else {
862		PMCLOG_RESERVE(po, PMCALLOCATE,
863		    sizeof(struct pmclog_pmcallocate));
864		PMCLOG_EMIT32(pm->pm_id);
865		PMCLOG_EMIT32(pm->pm_event);
866		PMCLOG_EMIT32(pm->pm_flags);
867		PMCLOG_DESPATCH(po);
868	}
869}
870
871void
872pmclog_process_pmcattach(struct pmc *pm, pid_t pid, char *path)
873{
874	int pathlen, recordlen;
875	struct pmc_owner *po;
876
877	PMCDBG2(LOG,ATT,1,"pm=%p pid=%d", pm, pid);
878
879	po = pm->pm_owner;
880
881	pathlen = strlen(path) + 1;	/* #bytes for the string */
882	recordlen = offsetof(struct pmclog_pmcattach, pl_pathname) + pathlen;
883
884	PMCLOG_RESERVE(po, PMCATTACH, recordlen);
885	PMCLOG_EMIT32(pm->pm_id);
886	PMCLOG_EMIT32(pid);
887	PMCLOG_EMITSTRING(path, pathlen);
888	PMCLOG_DESPATCH(po);
889}
890
891void
892pmclog_process_pmcdetach(struct pmc *pm, pid_t pid)
893{
894	struct pmc_owner *po;
895
896	PMCDBG2(LOG,ATT,1,"!pm=%p pid=%d", pm, pid);
897
898	po = pm->pm_owner;
899
900	PMCLOG_RESERVE(po, PMCDETACH, sizeof(struct pmclog_pmcdetach));
901	PMCLOG_EMIT32(pm->pm_id);
902	PMCLOG_EMIT32(pid);
903	PMCLOG_DESPATCH(po);
904}
905
906/*
907 * Log a context switch event to the log file.
908 */
909
910void
911pmclog_process_proccsw(struct pmc *pm, struct pmc_process *pp, pmc_value_t v)
912{
913	struct pmc_owner *po;
914
915	KASSERT(pm->pm_flags & PMC_F_LOG_PROCCSW,
916	    ("[pmclog,%d] log-process-csw called gratuitously", __LINE__));
917
918	PMCDBG3(LOG,SWO,1,"pm=%p pid=%d v=%jx", pm, pp->pp_proc->p_pid,
919	    v);
920
921	po = pm->pm_owner;
922
923	PMCLOG_RESERVE(po, PROCCSW, sizeof(struct pmclog_proccsw));
924	PMCLOG_EMIT32(pm->pm_id);
925	PMCLOG_EMIT64(v);
926	PMCLOG_EMIT32(pp->pp_proc->p_pid);
927	PMCLOG_DESPATCH(po);
928}
929
930void
931pmclog_process_procexec(struct pmc_owner *po, pmc_id_t pmid, pid_t pid,
932    uintfptr_t startaddr, char *path)
933{
934	int pathlen, recordlen;
935
936	PMCDBG3(LOG,EXC,1,"po=%p pid=%d path=\"%s\"", po, pid, path);
937
938	pathlen   = strlen(path) + 1;	/* #bytes for the path */
939	recordlen = offsetof(struct pmclog_procexec, pl_pathname) + pathlen;
940
941	PMCLOG_RESERVE(po, PROCEXEC, recordlen);
942	PMCLOG_EMIT32(pid);
943	PMCLOG_EMITADDR(startaddr);
944	PMCLOG_EMIT32(pmid);
945	PMCLOG_EMITSTRING(path,pathlen);
946	PMCLOG_DESPATCH(po);
947}
948
949/*
950 * Log a process exit event (and accumulated pmc value) to the log file.
951 */
952
953void
954pmclog_process_procexit(struct pmc *pm, struct pmc_process *pp)
955{
956	int ri;
957	struct pmc_owner *po;
958
959	ri = PMC_TO_ROWINDEX(pm);
960	PMCDBG3(LOG,EXT,1,"pm=%p pid=%d v=%jx", pm, pp->pp_proc->p_pid,
961	    pp->pp_pmcs[ri].pp_pmcval);
962
963	po = pm->pm_owner;
964
965	PMCLOG_RESERVE(po, PROCEXIT, sizeof(struct pmclog_procexit));
966	PMCLOG_EMIT32(pm->pm_id);
967	PMCLOG_EMIT64(pp->pp_pmcs[ri].pp_pmcval);
968	PMCLOG_EMIT32(pp->pp_proc->p_pid);
969	PMCLOG_DESPATCH(po);
970}
971
972/*
973 * Log a fork event.
974 */
975
976void
977pmclog_process_procfork(struct pmc_owner *po, pid_t oldpid, pid_t newpid)
978{
979	PMCLOG_RESERVE(po, PROCFORK, sizeof(struct pmclog_procfork));
980	PMCLOG_EMIT32(oldpid);
981	PMCLOG_EMIT32(newpid);
982	PMCLOG_DESPATCH(po);
983}
984
985/*
986 * Log a process exit event of the form suitable for system-wide PMCs.
987 */
988
989void
990pmclog_process_sysexit(struct pmc_owner *po, pid_t pid)
991{
992	PMCLOG_RESERVE(po, SYSEXIT, sizeof(struct pmclog_sysexit));
993	PMCLOG_EMIT32(pid);
994	PMCLOG_DESPATCH(po);
995}
996
997/*
998 * Write a user log entry.
999 */
1000
1001int
1002pmclog_process_userlog(struct pmc_owner *po, struct pmc_op_writelog *wl)
1003{
1004	int error;
1005
1006	PMCDBG2(LOG,WRI,1, "writelog po=%p ud=0x%x", po, wl->pm_userdata);
1007
1008	error = 0;
1009
1010	PMCLOG_RESERVE_WITH_ERROR(po, USERDATA,
1011	    sizeof(struct pmclog_userdata));
1012	PMCLOG_EMIT32(wl->pm_userdata);
1013	PMCLOG_DESPATCH(po);
1014
1015 error:
1016	return (error);
1017}
1018
1019/*
1020 * Initialization.
1021 *
1022 * Create a pool of log buffers and initialize mutexes.
1023 */
1024
1025void
1026pmclog_initialize()
1027{
1028	int n;
1029	struct pmclog_buffer *plb;
1030
1031	if (pmclog_buffer_size <= 0) {
1032		(void) printf("hwpmc: tunable logbuffersize=%d must be "
1033		    "greater than zero.\n", pmclog_buffer_size);
1034		pmclog_buffer_size = PMC_LOG_BUFFER_SIZE;
1035	}
1036
1037	if (pmc_nlogbuffers <= 0) {
1038		(void) printf("hwpmc: tunable nlogbuffers=%d must be greater "
1039		    "than zero.\n", pmc_nlogbuffers);
1040		pmc_nlogbuffers = PMC_NLOGBUFFERS;
1041	}
1042
1043	/* create global pool of log buffers */
1044	for (n = 0; n < pmc_nlogbuffers; n++) {
1045		plb = malloc(1024 * pmclog_buffer_size, M_PMC,
1046		    M_WAITOK|M_ZERO);
1047		PMCLOG_INIT_BUFFER_DESCRIPTOR(plb);
1048		TAILQ_INSERT_HEAD(&pmc_bufferlist, plb, plb_next);
1049	}
1050	mtx_init(&pmc_bufferlist_mtx, "pmc-buffer-list", "pmc-leaf",
1051	    MTX_SPIN);
1052	mtx_init(&pmc_kthread_mtx, "pmc-kthread", "pmc-sleep", MTX_DEF);
1053}
1054
1055/*
1056 * Shutdown logging.
1057 *
1058 * Destroy mutexes and release memory back the to free pool.
1059 */
1060
1061void
1062pmclog_shutdown()
1063{
1064	struct pmclog_buffer *plb;
1065
1066	mtx_destroy(&pmc_kthread_mtx);
1067	mtx_destroy(&pmc_bufferlist_mtx);
1068
1069	while ((plb = TAILQ_FIRST(&pmc_bufferlist)) != NULL) {
1070		TAILQ_REMOVE(&pmc_bufferlist, plb, plb_next);
1071		free(plb, M_PMC);
1072	}
1073}
1074