1178479Sjb/*
2178479Sjb * CDDL HEADER START
3178479Sjb *
4178479Sjb * The contents of this file are subject to the terms of the
5178479Sjb * Common Development and Distribution License (the "License").
6178479Sjb * You may not use this file except in compliance with the License.
7178479Sjb *
8178479Sjb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9178479Sjb * or http://www.opensolaris.org/os/licensing.
10178479Sjb * See the License for the specific language governing permissions
11178479Sjb * and limitations under the License.
12178479Sjb *
13178479Sjb * When distributing Covered Code, include this CDDL HEADER in each
14178479Sjb * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15178479Sjb * If applicable, add the following below this CDDL HEADER, with the
16178479Sjb * fields enclosed by brackets "[]" replaced with your own identifying
17178479Sjb * information: Portions Copyright [yyyy] [name of copyright owner]
18178479Sjb *
19178479Sjb * CDDL HEADER END
20178479Sjb */
21178479Sjb
22178479Sjb/*
23178479Sjb * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24178479Sjb * Use is subject to license terms.
25178479Sjb */
26178479Sjb
27237624Spfg/*
28268578Srpaulo * Copyright (c) 2013, Joyent, Inc. All rights reserved.
29253725Spfg * Copyright (c) 2012 by Delphix. All rights reserved.
30237624Spfg */
31178479Sjb
32178479Sjb#include <stdlib.h>
33178479Sjb#include <strings.h>
34178479Sjb#include <errno.h>
35178479Sjb#include <unistd.h>
36178479Sjb#include <dt_impl.h>
37178479Sjb#include <assert.h>
38297077Smav#ifdef illumos
39178479Sjb#include <alloca.h>
40178558Sjb#else
41178558Sjb#include <sys/sysctl.h>
42211554Srpaulo#include <libproc_compat.h>
43178558Sjb#endif
44178479Sjb#include <limits.h>
45178479Sjb
46178479Sjb#define	DTRACE_AHASHSIZE	32779		/* big 'ol prime */
47178479Sjb
48178479Sjb/*
49178479Sjb * Because qsort(3C) does not allow an argument to be passed to a comparison
50178479Sjb * function, the variables that affect comparison must regrettably be global;
51178479Sjb * they are protected by a global static lock, dt_qsort_lock.
52178479Sjb */
53178479Sjbstatic pthread_mutex_t dt_qsort_lock = PTHREAD_MUTEX_INITIALIZER;
54178479Sjb
55178479Sjbstatic int dt_revsort;
56178479Sjbstatic int dt_keysort;
57178479Sjbstatic int dt_keypos;
58178479Sjb
59178479Sjb#define	DT_LESSTHAN	(dt_revsort == 0 ? -1 : 1)
60178479Sjb#define	DT_GREATERTHAN	(dt_revsort == 0 ? 1 : -1)
61178479Sjb
62178479Sjbstatic void
63178479Sjbdt_aggregate_count(int64_t *existing, int64_t *new, size_t size)
64178479Sjb{
65178558Sjb	uint_t i;
66178479Sjb
67178479Sjb	for (i = 0; i < size / sizeof (int64_t); i++)
68178479Sjb		existing[i] = existing[i] + new[i];
69178479Sjb}
70178479Sjb
71178479Sjbstatic int
72178479Sjbdt_aggregate_countcmp(int64_t *lhs, int64_t *rhs)
73178479Sjb{
74178479Sjb	int64_t lvar = *lhs;
75178479Sjb	int64_t rvar = *rhs;
76178479Sjb
77178479Sjb	if (lvar < rvar)
78178479Sjb		return (DT_LESSTHAN);
79178479Sjb
80178479Sjb	if (lvar > rvar)
81178479Sjb		return (DT_GREATERTHAN);
82178479Sjb
83178479Sjb	return (0);
84178479Sjb}
85178479Sjb
86178479Sjb/*ARGSUSED*/
87178479Sjbstatic void
88178479Sjbdt_aggregate_min(int64_t *existing, int64_t *new, size_t size)
89178479Sjb{
90178479Sjb	if (*new < *existing)
91178479Sjb		*existing = *new;
92178479Sjb}
93178479Sjb
94178479Sjb/*ARGSUSED*/
95178479Sjbstatic void
96178479Sjbdt_aggregate_max(int64_t *existing, int64_t *new, size_t size)
97178479Sjb{
98178479Sjb	if (*new > *existing)
99178479Sjb		*existing = *new;
100178479Sjb}
101178479Sjb
102178479Sjbstatic int
103178479Sjbdt_aggregate_averagecmp(int64_t *lhs, int64_t *rhs)
104178479Sjb{
105178479Sjb	int64_t lavg = lhs[0] ? (lhs[1] / lhs[0]) : 0;
106178479Sjb	int64_t ravg = rhs[0] ? (rhs[1] / rhs[0]) : 0;
107178479Sjb
108178479Sjb	if (lavg < ravg)
109178479Sjb		return (DT_LESSTHAN);
110178479Sjb
111178479Sjb	if (lavg > ravg)
112178479Sjb		return (DT_GREATERTHAN);
113178479Sjb
114178479Sjb	return (0);
115178479Sjb}
116178479Sjb
117178479Sjbstatic int
118178479Sjbdt_aggregate_stddevcmp(int64_t *lhs, int64_t *rhs)
119178479Sjb{
120178479Sjb	uint64_t lsd = dt_stddev((uint64_t *)lhs, 1);
121178479Sjb	uint64_t rsd = dt_stddev((uint64_t *)rhs, 1);
122178479Sjb
123178479Sjb	if (lsd < rsd)
124178479Sjb		return (DT_LESSTHAN);
125178479Sjb
126178479Sjb	if (lsd > rsd)
127178479Sjb		return (DT_GREATERTHAN);
128178479Sjb
129178479Sjb	return (0);
130178479Sjb}
131178479Sjb
132178479Sjb/*ARGSUSED*/
133178479Sjbstatic void
134178479Sjbdt_aggregate_lquantize(int64_t *existing, int64_t *new, size_t size)
135178479Sjb{
136178479Sjb	int64_t arg = *existing++;
137178479Sjb	uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg);
138178479Sjb	int i;
139178479Sjb
140178479Sjb	for (i = 0; i <= levels + 1; i++)
141178479Sjb		existing[i] = existing[i] + new[i + 1];
142178479Sjb}
143178479Sjb
144178479Sjbstatic long double
145178479Sjbdt_aggregate_lquantizedsum(int64_t *lquanta)
146178479Sjb{
147178479Sjb	int64_t arg = *lquanta++;
148178479Sjb	int32_t base = DTRACE_LQUANTIZE_BASE(arg);
149178479Sjb	uint16_t step = DTRACE_LQUANTIZE_STEP(arg);
150178479Sjb	uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg), i;
151178479Sjb	long double total = (long double)lquanta[0] * (long double)(base - 1);
152178479Sjb
153178479Sjb	for (i = 0; i < levels; base += step, i++)
154178479Sjb		total += (long double)lquanta[i + 1] * (long double)base;
155178479Sjb
156178479Sjb	return (total + (long double)lquanta[levels + 1] *
157178479Sjb	    (long double)(base + 1));
158178479Sjb}
159178479Sjb
160178479Sjbstatic int64_t
161178479Sjbdt_aggregate_lquantizedzero(int64_t *lquanta)
162178479Sjb{
163178479Sjb	int64_t arg = *lquanta++;
164178479Sjb	int32_t base = DTRACE_LQUANTIZE_BASE(arg);
165178479Sjb	uint16_t step = DTRACE_LQUANTIZE_STEP(arg);
166178479Sjb	uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg), i;
167178479Sjb
168178479Sjb	if (base - 1 == 0)
169178479Sjb		return (lquanta[0]);
170178479Sjb
171178479Sjb	for (i = 0; i < levels; base += step, i++) {
172178479Sjb		if (base != 0)
173178479Sjb			continue;
174178479Sjb
175178479Sjb		return (lquanta[i + 1]);
176178479Sjb	}
177178479Sjb
178178479Sjb	if (base + 1 == 0)
179178479Sjb		return (lquanta[levels + 1]);
180178479Sjb
181178479Sjb	return (0);
182178479Sjb}
183178479Sjb
184178479Sjbstatic int
185178479Sjbdt_aggregate_lquantizedcmp(int64_t *lhs, int64_t *rhs)
186178479Sjb{
187178479Sjb	long double lsum = dt_aggregate_lquantizedsum(lhs);
188178479Sjb	long double rsum = dt_aggregate_lquantizedsum(rhs);
189178479Sjb	int64_t lzero, rzero;
190178479Sjb
191178479Sjb	if (lsum < rsum)
192178479Sjb		return (DT_LESSTHAN);
193178479Sjb
194178479Sjb	if (lsum > rsum)
195178479Sjb		return (DT_GREATERTHAN);
196178479Sjb
197178479Sjb	/*
198178479Sjb	 * If they're both equal, then we will compare based on the weights at
199178479Sjb	 * zero.  If the weights at zero are equal (or if zero is not within
200178479Sjb	 * the range of the linear quantization), then this will be judged a
201178479Sjb	 * tie and will be resolved based on the key comparison.
202178479Sjb	 */
203178479Sjb	lzero = dt_aggregate_lquantizedzero(lhs);
204178479Sjb	rzero = dt_aggregate_lquantizedzero(rhs);
205178479Sjb
206178479Sjb	if (lzero < rzero)
207178479Sjb		return (DT_LESSTHAN);
208178479Sjb
209178479Sjb	if (lzero > rzero)
210178479Sjb		return (DT_GREATERTHAN);
211178479Sjb
212178479Sjb	return (0);
213178479Sjb}
214178479Sjb
215237624Spfgstatic void
216237624Spfgdt_aggregate_llquantize(int64_t *existing, int64_t *new, size_t size)
217237624Spfg{
218237624Spfg	int i;
219237624Spfg
220237624Spfg	for (i = 1; i < size / sizeof (int64_t); i++)
221237624Spfg		existing[i] = existing[i] + new[i];
222237624Spfg}
223237624Spfg
224237624Spfgstatic long double
225237624Spfgdt_aggregate_llquantizedsum(int64_t *llquanta)
226237624Spfg{
227237624Spfg	int64_t arg = *llquanta++;
228237624Spfg	uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg);
229237624Spfg	uint16_t low = DTRACE_LLQUANTIZE_LOW(arg);
230237624Spfg	uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg);
231237624Spfg	uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
232237624Spfg	int bin = 0, order;
233237624Spfg	int64_t value = 1, next, step;
234237624Spfg	long double total;
235237624Spfg
236237624Spfg	assert(nsteps >= factor);
237237624Spfg	assert(nsteps % factor == 0);
238237624Spfg
239237624Spfg	for (order = 0; order < low; order++)
240237624Spfg		value *= factor;
241237624Spfg
242237624Spfg	total = (long double)llquanta[bin++] * (long double)(value - 1);
243237624Spfg
244237624Spfg	next = value * factor;
245237624Spfg	step = next > nsteps ? next / nsteps : 1;
246237624Spfg
247237624Spfg	while (order <= high) {
248237624Spfg		assert(value < next);
249237624Spfg		total += (long double)llquanta[bin++] * (long double)(value);
250237624Spfg
251237624Spfg		if ((value += step) != next)
252237624Spfg			continue;
253237624Spfg
254237624Spfg		next = value * factor;
255237624Spfg		step = next > nsteps ? next / nsteps : 1;
256237624Spfg		order++;
257237624Spfg	}
258237624Spfg
259237624Spfg	return (total + (long double)llquanta[bin] * (long double)value);
260237624Spfg}
261237624Spfg
262178479Sjbstatic int
263237624Spfgdt_aggregate_llquantizedcmp(int64_t *lhs, int64_t *rhs)
264237624Spfg{
265237624Spfg	long double lsum = dt_aggregate_llquantizedsum(lhs);
266237624Spfg	long double rsum = dt_aggregate_llquantizedsum(rhs);
267237624Spfg	int64_t lzero, rzero;
268237624Spfg
269237624Spfg	if (lsum < rsum)
270237624Spfg		return (DT_LESSTHAN);
271237624Spfg
272237624Spfg	if (lsum > rsum)
273237624Spfg		return (DT_GREATERTHAN);
274237624Spfg
275237624Spfg	/*
276237624Spfg	 * If they're both equal, then we will compare based on the weights at
277237624Spfg	 * zero.  If the weights at zero are equal, then this will be judged a
278237624Spfg	 * tie and will be resolved based on the key comparison.
279237624Spfg	 */
280237624Spfg	lzero = lhs[1];
281237624Spfg	rzero = rhs[1];
282237624Spfg
283237624Spfg	if (lzero < rzero)
284237624Spfg		return (DT_LESSTHAN);
285237624Spfg
286237624Spfg	if (lzero > rzero)
287237624Spfg		return (DT_GREATERTHAN);
288237624Spfg
289237624Spfg	return (0);
290237624Spfg}
291237624Spfg
292237624Spfgstatic int
293178479Sjbdt_aggregate_quantizedcmp(int64_t *lhs, int64_t *rhs)
294178479Sjb{
295178558Sjb	int nbuckets = DTRACE_QUANTIZE_NBUCKETS;
296178479Sjb	long double ltotal = 0, rtotal = 0;
297178479Sjb	int64_t lzero, rzero;
298178558Sjb	uint_t i;
299178479Sjb
300178479Sjb	for (i = 0; i < nbuckets; i++) {
301178479Sjb		int64_t bucketval = DTRACE_QUANTIZE_BUCKETVAL(i);
302178479Sjb
303178479Sjb		if (bucketval == 0) {
304178479Sjb			lzero = lhs[i];
305178479Sjb			rzero = rhs[i];
306178479Sjb		}
307178479Sjb
308178479Sjb		ltotal += (long double)bucketval * (long double)lhs[i];
309178479Sjb		rtotal += (long double)bucketval * (long double)rhs[i];
310178479Sjb	}
311178479Sjb
312178479Sjb	if (ltotal < rtotal)
313178479Sjb		return (DT_LESSTHAN);
314178479Sjb
315178479Sjb	if (ltotal > rtotal)
316178479Sjb		return (DT_GREATERTHAN);
317178479Sjb
318178479Sjb	/*
319178479Sjb	 * If they're both equal, then we will compare based on the weights at
320178479Sjb	 * zero.  If the weights at zero are equal, then this will be judged a
321178479Sjb	 * tie and will be resolved based on the key comparison.
322178479Sjb	 */
323178479Sjb	if (lzero < rzero)
324178479Sjb		return (DT_LESSTHAN);
325178479Sjb
326178479Sjb	if (lzero > rzero)
327178479Sjb		return (DT_GREATERTHAN);
328178479Sjb
329178479Sjb	return (0);
330178479Sjb}
331178479Sjb
332178479Sjbstatic void
333178479Sjbdt_aggregate_usym(dtrace_hdl_t *dtp, uint64_t *data)
334178479Sjb{
335178479Sjb	uint64_t pid = data[0];
336178479Sjb	uint64_t *pc = &data[1];
337178479Sjb	struct ps_prochandle *P;
338178479Sjb	GElf_Sym sym;
339178479Sjb
340178479Sjb	if (dtp->dt_vector != NULL)
341178479Sjb		return;
342178479Sjb
343178479Sjb	if ((P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0)) == NULL)
344178479Sjb		return;
345178479Sjb
346178479Sjb	dt_proc_lock(dtp, P);
347178479Sjb
348178479Sjb	if (Plookup_by_addr(P, *pc, NULL, 0, &sym) == 0)
349178479Sjb		*pc = sym.st_value;
350178479Sjb
351178479Sjb	dt_proc_unlock(dtp, P);
352178479Sjb	dt_proc_release(dtp, P);
353178479Sjb}
354178479Sjb
355178479Sjbstatic void
356178479Sjbdt_aggregate_umod(dtrace_hdl_t *dtp, uint64_t *data)
357178479Sjb{
358178479Sjb	uint64_t pid = data[0];
359178479Sjb	uint64_t *pc = &data[1];
360178479Sjb	struct ps_prochandle *P;
361178479Sjb	const prmap_t *map;
362178479Sjb
363178479Sjb	if (dtp->dt_vector != NULL)
364178479Sjb		return;
365178479Sjb
366178479Sjb	if ((P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0)) == NULL)
367178479Sjb		return;
368178479Sjb
369178479Sjb	dt_proc_lock(dtp, P);
370178479Sjb
371178479Sjb	if ((map = Paddr_to_map(P, *pc)) != NULL)
372178479Sjb		*pc = map->pr_vaddr;
373178479Sjb
374178479Sjb	dt_proc_unlock(dtp, P);
375178479Sjb	dt_proc_release(dtp, P);
376178479Sjb}
377178479Sjb
378178479Sjbstatic void
379178479Sjbdt_aggregate_sym(dtrace_hdl_t *dtp, uint64_t *data)
380178479Sjb{
381178479Sjb	GElf_Sym sym;
382178479Sjb	uint64_t *pc = data;
383178479Sjb
384178479Sjb	if (dtrace_lookup_by_addr(dtp, *pc, &sym, NULL) == 0)
385178479Sjb		*pc = sym.st_value;
386178479Sjb}
387178479Sjb
388178479Sjbstatic void
389178479Sjbdt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *data)
390178479Sjb{
391178479Sjb	uint64_t *pc = data;
392178479Sjb	dt_module_t *dmp;
393178479Sjb
394178479Sjb	if (dtp->dt_vector != NULL) {
395178479Sjb		/*
396178479Sjb		 * We don't have a way of just getting the module for a
397178479Sjb		 * vectored open, and it doesn't seem to be worth defining
398178479Sjb		 * one.  This means that use of mod() won't get true
399178479Sjb		 * aggregation in the postmortem case (some modules may
400178479Sjb		 * appear more than once in aggregation output).  It seems
401178479Sjb		 * unlikely that anyone will ever notice or care...
402178479Sjb		 */
403178479Sjb		return;
404178479Sjb	}
405178479Sjb
406178479Sjb	for (dmp = dt_list_next(&dtp->dt_modlist); dmp != NULL;
407178479Sjb	    dmp = dt_list_next(dmp)) {
408178479Sjb		if (*pc - dmp->dm_text_va < dmp->dm_text_size) {
409178479Sjb			*pc = dmp->dm_text_va;
410178479Sjb			return;
411178479Sjb		}
412178479Sjb	}
413178479Sjb}
414178479Sjb
415178479Sjbstatic dtrace_aggvarid_t
416178479Sjbdt_aggregate_aggvarid(dt_ahashent_t *ent)
417178479Sjb{
418178479Sjb	dtrace_aggdesc_t *agg = ent->dtahe_data.dtada_desc;
419178479Sjb	caddr_t data = ent->dtahe_data.dtada_data;
420178479Sjb	dtrace_recdesc_t *rec = agg->dtagd_rec;
421178479Sjb
422178479Sjb	/*
423178479Sjb	 * First, we'll check the variable ID in the aggdesc.  If it's valid,
424178479Sjb	 * we'll return it.  If not, we'll use the compiler-generated ID
425178479Sjb	 * present as the first record.
426178479Sjb	 */
427178479Sjb	if (agg->dtagd_varid != DTRACE_AGGVARIDNONE)
428178479Sjb		return (agg->dtagd_varid);
429178479Sjb
430178479Sjb	agg->dtagd_varid = *((dtrace_aggvarid_t *)(uintptr_t)(data +
431178479Sjb	    rec->dtrd_offset));
432178479Sjb
433178479Sjb	return (agg->dtagd_varid);
434178479Sjb}
435178479Sjb
436178479Sjb
437178479Sjbstatic int
438178479Sjbdt_aggregate_snap_cpu(dtrace_hdl_t *dtp, processorid_t cpu)
439178479Sjb{
440178479Sjb	dtrace_epid_t id;
441178479Sjb	uint64_t hashval;
442178479Sjb	size_t offs, roffs, size, ndx;
443178479Sjb	int i, j, rval;
444178479Sjb	caddr_t addr, data;
445178479Sjb	dtrace_recdesc_t *rec;
446178479Sjb	dt_aggregate_t *agp = &dtp->dt_aggregate;
447178479Sjb	dtrace_aggdesc_t *agg;
448178479Sjb	dt_ahash_t *hash = &agp->dtat_hash;
449178479Sjb	dt_ahashent_t *h;
450178479Sjb	dtrace_bufdesc_t b = agp->dtat_buf, *buf = &b;
451178479Sjb	dtrace_aggdata_t *aggdata;
452178479Sjb	int flags = agp->dtat_flags;
453178479Sjb
454178479Sjb	buf->dtbd_cpu = cpu;
455178479Sjb
456297077Smav#ifdef illumos
457178479Sjb	if (dt_ioctl(dtp, DTRACEIOC_AGGSNAP, buf) == -1) {
458178558Sjb#else
459178558Sjb	if (dt_ioctl(dtp, DTRACEIOC_AGGSNAP, &buf) == -1) {
460178558Sjb#endif
461178479Sjb		if (errno == ENOENT) {
462178479Sjb			/*
463178479Sjb			 * If that failed with ENOENT, it may be because the
464178479Sjb			 * CPU was unconfigured.  This is okay; we'll just
465178479Sjb			 * do nothing but return success.
466178479Sjb			 */
467178479Sjb			return (0);
468178479Sjb		}
469178479Sjb
470178479Sjb		return (dt_set_errno(dtp, errno));
471178479Sjb	}
472178479Sjb
473178479Sjb	if (buf->dtbd_drops != 0) {
474178479Sjb		if (dt_handle_cpudrop(dtp, cpu,
475178479Sjb		    DTRACEDROP_AGGREGATION, buf->dtbd_drops) == -1)
476178479Sjb			return (-1);
477178479Sjb	}
478178479Sjb
479178479Sjb	if (buf->dtbd_size == 0)
480178479Sjb		return (0);
481178479Sjb
482178479Sjb	if (hash->dtah_hash == NULL) {
483178479Sjb		size_t size;
484178479Sjb
485178479Sjb		hash->dtah_size = DTRACE_AHASHSIZE;
486178479Sjb		size = hash->dtah_size * sizeof (dt_ahashent_t *);
487178479Sjb
488178479Sjb		if ((hash->dtah_hash = malloc(size)) == NULL)
489178479Sjb			return (dt_set_errno(dtp, EDT_NOMEM));
490178479Sjb
491178479Sjb		bzero(hash->dtah_hash, size);
492178479Sjb	}
493178479Sjb
494178479Sjb	for (offs = 0; offs < buf->dtbd_size; ) {
495178479Sjb		/*
496178479Sjb		 * We're guaranteed to have an ID.
497178479Sjb		 */
498178479Sjb		id = *((dtrace_epid_t *)((uintptr_t)buf->dtbd_data +
499178479Sjb		    (uintptr_t)offs));
500178479Sjb
501178479Sjb		if (id == DTRACE_AGGIDNONE) {
502178479Sjb			/*
503178479Sjb			 * This is filler to assure proper alignment of the
504178479Sjb			 * next record; we simply ignore it.
505178479Sjb			 */
506178479Sjb			offs += sizeof (id);
507178479Sjb			continue;
508178479Sjb		}
509178479Sjb
510178479Sjb		if ((rval = dt_aggid_lookup(dtp, id, &agg)) != 0)
511178479Sjb			return (rval);
512178479Sjb
513178479Sjb		addr = buf->dtbd_data + offs;
514178479Sjb		size = agg->dtagd_size;
515178479Sjb		hashval = 0;
516178479Sjb
517178479Sjb		for (j = 0; j < agg->dtagd_nrecs - 1; j++) {
518178479Sjb			rec = &agg->dtagd_rec[j];
519178479Sjb			roffs = rec->dtrd_offset;
520178479Sjb
521178479Sjb			switch (rec->dtrd_action) {
522178479Sjb			case DTRACEACT_USYM:
523178479Sjb				dt_aggregate_usym(dtp,
524178479Sjb				    /* LINTED - alignment */
525178479Sjb				    (uint64_t *)&addr[roffs]);
526178479Sjb				break;
527178479Sjb
528178479Sjb			case DTRACEACT_UMOD:
529178479Sjb				dt_aggregate_umod(dtp,
530178479Sjb				    /* LINTED - alignment */
531178479Sjb				    (uint64_t *)&addr[roffs]);
532178479Sjb				break;
533178479Sjb
534178479Sjb			case DTRACEACT_SYM:
535178479Sjb				/* LINTED - alignment */
536178479Sjb				dt_aggregate_sym(dtp, (uint64_t *)&addr[roffs]);
537178479Sjb				break;
538178479Sjb
539178479Sjb			case DTRACEACT_MOD:
540178479Sjb				/* LINTED - alignment */
541178479Sjb				dt_aggregate_mod(dtp, (uint64_t *)&addr[roffs]);
542178479Sjb				break;
543178479Sjb
544178479Sjb			default:
545178479Sjb				break;
546178479Sjb			}
547178479Sjb
548178479Sjb			for (i = 0; i < rec->dtrd_size; i++)
549178479Sjb				hashval += addr[roffs + i];
550178479Sjb		}
551178479Sjb
552178479Sjb		ndx = hashval % hash->dtah_size;
553178479Sjb
554178479Sjb		for (h = hash->dtah_hash[ndx]; h != NULL; h = h->dtahe_next) {
555178479Sjb			if (h->dtahe_hashval != hashval)
556178479Sjb				continue;
557178479Sjb
558178479Sjb			if (h->dtahe_size != size)
559178479Sjb				continue;
560178479Sjb
561178479Sjb			aggdata = &h->dtahe_data;
562178479Sjb			data = aggdata->dtada_data;
563178479Sjb
564178479Sjb			for (j = 0; j < agg->dtagd_nrecs - 1; j++) {
565178479Sjb				rec = &agg->dtagd_rec[j];
566178479Sjb				roffs = rec->dtrd_offset;
567178479Sjb
568178479Sjb				for (i = 0; i < rec->dtrd_size; i++)
569178479Sjb					if (addr[roffs + i] != data[roffs + i])
570178479Sjb						goto hashnext;
571178479Sjb			}
572178479Sjb
573178479Sjb			/*
574178479Sjb			 * We found it.  Now we need to apply the aggregating
575178479Sjb			 * action on the data here.
576178479Sjb			 */
577178479Sjb			rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1];
578178479Sjb			roffs = rec->dtrd_offset;
579178479Sjb			/* LINTED - alignment */
580178479Sjb			h->dtahe_aggregate((int64_t *)&data[roffs],
581178479Sjb			    /* LINTED - alignment */
582178479Sjb			    (int64_t *)&addr[roffs], rec->dtrd_size);
583178479Sjb
584178479Sjb			/*
585178479Sjb			 * If we're keeping per CPU data, apply the aggregating
586178479Sjb			 * action there as well.
587178479Sjb			 */
588178479Sjb			if (aggdata->dtada_percpu != NULL) {
589178479Sjb				data = aggdata->dtada_percpu[cpu];
590178479Sjb
591178479Sjb				/* LINTED - alignment */
592178479Sjb				h->dtahe_aggregate((int64_t *)data,
593178479Sjb				    /* LINTED - alignment */
594178479Sjb				    (int64_t *)&addr[roffs], rec->dtrd_size);
595178479Sjb			}
596178479Sjb
597178479Sjb			goto bufnext;
598178479Sjbhashnext:
599178479Sjb			continue;
600178479Sjb		}
601178479Sjb
602178479Sjb		/*
603178479Sjb		 * If we're here, we couldn't find an entry for this record.
604178479Sjb		 */
605178479Sjb		if ((h = malloc(sizeof (dt_ahashent_t))) == NULL)
606178479Sjb			return (dt_set_errno(dtp, EDT_NOMEM));
607178479Sjb		bzero(h, sizeof (dt_ahashent_t));
608178479Sjb		aggdata = &h->dtahe_data;
609178479Sjb
610178479Sjb		if ((aggdata->dtada_data = malloc(size)) == NULL) {
611178479Sjb			free(h);
612178479Sjb			return (dt_set_errno(dtp, EDT_NOMEM));
613178479Sjb		}
614178479Sjb
615178479Sjb		bcopy(addr, aggdata->dtada_data, size);
616178479Sjb		aggdata->dtada_size = size;
617178479Sjb		aggdata->dtada_desc = agg;
618178479Sjb		aggdata->dtada_handle = dtp;
619178479Sjb		(void) dt_epid_lookup(dtp, agg->dtagd_epid,
620178479Sjb		    &aggdata->dtada_edesc, &aggdata->dtada_pdesc);
621178479Sjb		aggdata->dtada_normal = 1;
622178479Sjb
623178479Sjb		h->dtahe_hashval = hashval;
624178479Sjb		h->dtahe_size = size;
625178479Sjb		(void) dt_aggregate_aggvarid(h);
626178479Sjb
627178479Sjb		rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1];
628178479Sjb
629178479Sjb		if (flags & DTRACE_A_PERCPU) {
630178479Sjb			int max_cpus = agp->dtat_maxcpu;
631178479Sjb			caddr_t *percpu = malloc(max_cpus * sizeof (caddr_t));
632178479Sjb
633178479Sjb			if (percpu == NULL) {
634178479Sjb				free(aggdata->dtada_data);
635178479Sjb				free(h);
636178479Sjb				return (dt_set_errno(dtp, EDT_NOMEM));
637178479Sjb			}
638178479Sjb
639178479Sjb			for (j = 0; j < max_cpus; j++) {
640178479Sjb				percpu[j] = malloc(rec->dtrd_size);
641178479Sjb
642178479Sjb				if (percpu[j] == NULL) {
643178479Sjb					while (--j >= 0)
644178479Sjb						free(percpu[j]);
645178479Sjb
646178479Sjb					free(aggdata->dtada_data);
647178479Sjb					free(h);
648178479Sjb					return (dt_set_errno(dtp, EDT_NOMEM));
649178479Sjb				}
650178479Sjb
651178479Sjb				if (j == cpu) {
652178479Sjb					bcopy(&addr[rec->dtrd_offset],
653178479Sjb					    percpu[j], rec->dtrd_size);
654178479Sjb				} else {
655178479Sjb					bzero(percpu[j], rec->dtrd_size);
656178479Sjb				}
657178479Sjb			}
658178479Sjb
659178479Sjb			aggdata->dtada_percpu = percpu;
660178479Sjb		}
661178479Sjb
662178479Sjb		switch (rec->dtrd_action) {
663178479Sjb		case DTRACEAGG_MIN:
664178479Sjb			h->dtahe_aggregate = dt_aggregate_min;
665178479Sjb			break;
666178479Sjb
667178479Sjb		case DTRACEAGG_MAX:
668178479Sjb			h->dtahe_aggregate = dt_aggregate_max;
669178479Sjb			break;
670178479Sjb
671178479Sjb		case DTRACEAGG_LQUANTIZE:
672178479Sjb			h->dtahe_aggregate = dt_aggregate_lquantize;
673178479Sjb			break;
674178479Sjb
675237624Spfg		case DTRACEAGG_LLQUANTIZE:
676237624Spfg			h->dtahe_aggregate = dt_aggregate_llquantize;
677237624Spfg			break;
678237624Spfg
679178479Sjb		case DTRACEAGG_COUNT:
680178479Sjb		case DTRACEAGG_SUM:
681178479Sjb		case DTRACEAGG_AVG:
682178479Sjb		case DTRACEAGG_STDDEV:
683178479Sjb		case DTRACEAGG_QUANTIZE:
684178479Sjb			h->dtahe_aggregate = dt_aggregate_count;
685178479Sjb			break;
686178479Sjb
687178479Sjb		default:
688178479Sjb			return (dt_set_errno(dtp, EDT_BADAGG));
689178479Sjb		}
690178479Sjb
691178479Sjb		if (hash->dtah_hash[ndx] != NULL)
692178479Sjb			hash->dtah_hash[ndx]->dtahe_prev = h;
693178479Sjb
694178479Sjb		h->dtahe_next = hash->dtah_hash[ndx];
695178479Sjb		hash->dtah_hash[ndx] = h;
696178479Sjb
697178479Sjb		if (hash->dtah_all != NULL)
698178479Sjb			hash->dtah_all->dtahe_prevall = h;
699178479Sjb
700178479Sjb		h->dtahe_nextall = hash->dtah_all;
701178479Sjb		hash->dtah_all = h;
702178479Sjbbufnext:
703178479Sjb		offs += agg->dtagd_size;
704178479Sjb	}
705178479Sjb
706178479Sjb	return (0);
707178479Sjb}
708178479Sjb
709178479Sjbint
710178479Sjbdtrace_aggregate_snap(dtrace_hdl_t *dtp)
711178479Sjb{
712178479Sjb	int i, rval;
713178479Sjb	dt_aggregate_t *agp = &dtp->dt_aggregate;
714178479Sjb	hrtime_t now = gethrtime();
715178479Sjb	dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_AGGRATE];
716178479Sjb
717178479Sjb	if (dtp->dt_lastagg != 0) {
718178479Sjb		if (now - dtp->dt_lastagg < interval)
719178479Sjb			return (0);
720178479Sjb
721178479Sjb		dtp->dt_lastagg += interval;
722178479Sjb	} else {
723178479Sjb		dtp->dt_lastagg = now;
724178479Sjb	}
725178479Sjb
726178479Sjb	if (!dtp->dt_active)
727178479Sjb		return (dt_set_errno(dtp, EINVAL));
728178479Sjb
729178479Sjb	if (agp->dtat_buf.dtbd_size == 0)
730178479Sjb		return (0);
731178479Sjb
732178479Sjb	for (i = 0; i < agp->dtat_ncpus; i++) {
733178558Sjb		if ((rval = dt_aggregate_snap_cpu(dtp, agp->dtat_cpus[i])))
734178479Sjb			return (rval);
735178479Sjb	}
736178479Sjb
737178479Sjb	return (0);
738178479Sjb}
739178479Sjb
740178479Sjbstatic int
741178479Sjbdt_aggregate_hashcmp(const void *lhs, const void *rhs)
742178479Sjb{
743178479Sjb	dt_ahashent_t *lh = *((dt_ahashent_t **)lhs);
744178479Sjb	dt_ahashent_t *rh = *((dt_ahashent_t **)rhs);
745178479Sjb	dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc;
746178479Sjb	dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc;
747178479Sjb
748178479Sjb	if (lagg->dtagd_nrecs < ragg->dtagd_nrecs)
749178479Sjb		return (DT_LESSTHAN);
750178479Sjb
751178479Sjb	if (lagg->dtagd_nrecs > ragg->dtagd_nrecs)
752178479Sjb		return (DT_GREATERTHAN);
753178479Sjb
754178479Sjb	return (0);
755178479Sjb}
756178479Sjb
757178479Sjbstatic int
758178479Sjbdt_aggregate_varcmp(const void *lhs, const void *rhs)
759178479Sjb{
760178479Sjb	dt_ahashent_t *lh = *((dt_ahashent_t **)lhs);
761178479Sjb	dt_ahashent_t *rh = *((dt_ahashent_t **)rhs);
762178479Sjb	dtrace_aggvarid_t lid, rid;
763178479Sjb
764178479Sjb	lid = dt_aggregate_aggvarid(lh);
765178479Sjb	rid = dt_aggregate_aggvarid(rh);
766178479Sjb
767178479Sjb	if (lid < rid)
768178479Sjb		return (DT_LESSTHAN);
769178479Sjb
770178479Sjb	if (lid > rid)
771178479Sjb		return (DT_GREATERTHAN);
772178479Sjb
773178479Sjb	return (0);
774178479Sjb}
775178479Sjb
776178479Sjbstatic int
777178479Sjbdt_aggregate_keycmp(const void *lhs, const void *rhs)
778178479Sjb{
779178479Sjb	dt_ahashent_t *lh = *((dt_ahashent_t **)lhs);
780178479Sjb	dt_ahashent_t *rh = *((dt_ahashent_t **)rhs);
781178479Sjb	dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc;
782178479Sjb	dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc;
783178479Sjb	dtrace_recdesc_t *lrec, *rrec;
784178479Sjb	char *ldata, *rdata;
785178479Sjb	int rval, i, j, keypos, nrecs;
786178479Sjb
787178479Sjb	if ((rval = dt_aggregate_hashcmp(lhs, rhs)) != 0)
788178479Sjb		return (rval);
789178479Sjb
790178479Sjb	nrecs = lagg->dtagd_nrecs - 1;
791178479Sjb	assert(nrecs == ragg->dtagd_nrecs - 1);
792178479Sjb
793178479Sjb	keypos = dt_keypos + 1 >= nrecs ? 0 : dt_keypos;
794178479Sjb
795178479Sjb	for (i = 1; i < nrecs; i++) {
796178479Sjb		uint64_t lval, rval;
797178479Sjb		int ndx = i + keypos;
798178479Sjb
799178479Sjb		if (ndx >= nrecs)
800178479Sjb			ndx = ndx - nrecs + 1;
801178479Sjb
802178479Sjb		lrec = &lagg->dtagd_rec[ndx];
803178479Sjb		rrec = &ragg->dtagd_rec[ndx];
804178479Sjb
805178479Sjb		ldata = lh->dtahe_data.dtada_data + lrec->dtrd_offset;
806178479Sjb		rdata = rh->dtahe_data.dtada_data + rrec->dtrd_offset;
807178479Sjb
808178479Sjb		if (lrec->dtrd_size < rrec->dtrd_size)
809178479Sjb			return (DT_LESSTHAN);
810178479Sjb
811178479Sjb		if (lrec->dtrd_size > rrec->dtrd_size)
812178479Sjb			return (DT_GREATERTHAN);
813178479Sjb
814178479Sjb		switch (lrec->dtrd_size) {
815178479Sjb		case sizeof (uint64_t):
816178479Sjb			/* LINTED - alignment */
817178479Sjb			lval = *((uint64_t *)ldata);
818178479Sjb			/* LINTED - alignment */
819178479Sjb			rval = *((uint64_t *)rdata);
820178479Sjb			break;
821178479Sjb
822178479Sjb		case sizeof (uint32_t):
823178479Sjb			/* LINTED - alignment */
824178479Sjb			lval = *((uint32_t *)ldata);
825178479Sjb			/* LINTED - alignment */
826178479Sjb			rval = *((uint32_t *)rdata);
827178479Sjb			break;
828178479Sjb
829178479Sjb		case sizeof (uint16_t):
830178479Sjb			/* LINTED - alignment */
831178479Sjb			lval = *((uint16_t *)ldata);
832178479Sjb			/* LINTED - alignment */
833178479Sjb			rval = *((uint16_t *)rdata);
834178479Sjb			break;
835178479Sjb
836178479Sjb		case sizeof (uint8_t):
837178479Sjb			lval = *((uint8_t *)ldata);
838178479Sjb			rval = *((uint8_t *)rdata);
839178479Sjb			break;
840178479Sjb
841178479Sjb		default:
842178479Sjb			switch (lrec->dtrd_action) {
843178479Sjb			case DTRACEACT_UMOD:
844178479Sjb			case DTRACEACT_UADDR:
845178479Sjb			case DTRACEACT_USYM:
846178479Sjb				for (j = 0; j < 2; j++) {
847178479Sjb					/* LINTED - alignment */
848178479Sjb					lval = ((uint64_t *)ldata)[j];
849178479Sjb					/* LINTED - alignment */
850178479Sjb					rval = ((uint64_t *)rdata)[j];
851178479Sjb
852178479Sjb					if (lval < rval)
853178479Sjb						return (DT_LESSTHAN);
854178479Sjb
855178479Sjb					if (lval > rval)
856178479Sjb						return (DT_GREATERTHAN);
857178479Sjb				}
858178479Sjb
859178479Sjb				break;
860178479Sjb
861178479Sjb			default:
862178479Sjb				for (j = 0; j < lrec->dtrd_size; j++) {
863178479Sjb					lval = ((uint8_t *)ldata)[j];
864178479Sjb					rval = ((uint8_t *)rdata)[j];
865178479Sjb
866178479Sjb					if (lval < rval)
867178479Sjb						return (DT_LESSTHAN);
868178479Sjb
869178479Sjb					if (lval > rval)
870178479Sjb						return (DT_GREATERTHAN);
871178479Sjb				}
872178479Sjb			}
873178479Sjb
874178479Sjb			continue;
875178479Sjb		}
876178479Sjb
877178479Sjb		if (lval < rval)
878178479Sjb			return (DT_LESSTHAN);
879178479Sjb
880178479Sjb		if (lval > rval)
881178479Sjb			return (DT_GREATERTHAN);
882178479Sjb	}
883178479Sjb
884178479Sjb	return (0);
885178479Sjb}
886178479Sjb
887178479Sjbstatic int
888178479Sjbdt_aggregate_valcmp(const void *lhs, const void *rhs)
889178479Sjb{
890178479Sjb	dt_ahashent_t *lh = *((dt_ahashent_t **)lhs);
891178479Sjb	dt_ahashent_t *rh = *((dt_ahashent_t **)rhs);
892178479Sjb	dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc;
893178479Sjb	dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc;
894178479Sjb	caddr_t ldata = lh->dtahe_data.dtada_data;
895178479Sjb	caddr_t rdata = rh->dtahe_data.dtada_data;
896178479Sjb	dtrace_recdesc_t *lrec, *rrec;
897178479Sjb	int64_t *laddr, *raddr;
898253725Spfg	int rval;
899178479Sjb
900253725Spfg	assert(lagg->dtagd_nrecs == ragg->dtagd_nrecs);
901178479Sjb
902253725Spfg	lrec = &lagg->dtagd_rec[lagg->dtagd_nrecs - 1];
903253725Spfg	rrec = &ragg->dtagd_rec[ragg->dtagd_nrecs - 1];
904178479Sjb
905253725Spfg	assert(lrec->dtrd_action == rrec->dtrd_action);
906178479Sjb
907178479Sjb	laddr = (int64_t *)(uintptr_t)(ldata + lrec->dtrd_offset);
908178479Sjb	raddr = (int64_t *)(uintptr_t)(rdata + rrec->dtrd_offset);
909178479Sjb
910178479Sjb	switch (lrec->dtrd_action) {
911178479Sjb	case DTRACEAGG_AVG:
912178479Sjb		rval = dt_aggregate_averagecmp(laddr, raddr);
913178479Sjb		break;
914178479Sjb
915178479Sjb	case DTRACEAGG_STDDEV:
916178479Sjb		rval = dt_aggregate_stddevcmp(laddr, raddr);
917178479Sjb		break;
918178479Sjb
919178479Sjb	case DTRACEAGG_QUANTIZE:
920178479Sjb		rval = dt_aggregate_quantizedcmp(laddr, raddr);
921178479Sjb		break;
922178479Sjb
923178479Sjb	case DTRACEAGG_LQUANTIZE:
924178479Sjb		rval = dt_aggregate_lquantizedcmp(laddr, raddr);
925178479Sjb		break;
926178479Sjb
927237624Spfg	case DTRACEAGG_LLQUANTIZE:
928237624Spfg		rval = dt_aggregate_llquantizedcmp(laddr, raddr);
929237624Spfg		break;
930237624Spfg
931178479Sjb	case DTRACEAGG_COUNT:
932178479Sjb	case DTRACEAGG_SUM:
933178479Sjb	case DTRACEAGG_MIN:
934178479Sjb	case DTRACEAGG_MAX:
935178479Sjb		rval = dt_aggregate_countcmp(laddr, raddr);
936178479Sjb		break;
937178479Sjb
938178479Sjb	default:
939178479Sjb		assert(0);
940178479Sjb	}
941178479Sjb
942178479Sjb	return (rval);
943178479Sjb}
944178479Sjb
945178479Sjbstatic int
946178479Sjbdt_aggregate_valkeycmp(const void *lhs, const void *rhs)
947178479Sjb{
948178479Sjb	int rval;
949178479Sjb
950178479Sjb	if ((rval = dt_aggregate_valcmp(lhs, rhs)) != 0)
951178479Sjb		return (rval);
952178479Sjb
953178479Sjb	/*
954178479Sjb	 * If we're here, the values for the two aggregation elements are
955178479Sjb	 * equal.  We already know that the key layout is the same for the two
956178479Sjb	 * elements; we must now compare the keys themselves as a tie-breaker.
957178479Sjb	 */
958178479Sjb	return (dt_aggregate_keycmp(lhs, rhs));
959178479Sjb}
960178479Sjb
961178479Sjbstatic int
962178479Sjbdt_aggregate_keyvarcmp(const void *lhs, const void *rhs)
963178479Sjb{
964178479Sjb	int rval;
965178479Sjb
966178479Sjb	if ((rval = dt_aggregate_keycmp(lhs, rhs)) != 0)
967178479Sjb		return (rval);
968178479Sjb
969178479Sjb	return (dt_aggregate_varcmp(lhs, rhs));
970178479Sjb}
971178479Sjb
972178479Sjbstatic int
973178479Sjbdt_aggregate_varkeycmp(const void *lhs, const void *rhs)
974178479Sjb{
975178479Sjb	int rval;
976178479Sjb
977178479Sjb	if ((rval = dt_aggregate_varcmp(lhs, rhs)) != 0)
978178479Sjb		return (rval);
979178479Sjb
980178479Sjb	return (dt_aggregate_keycmp(lhs, rhs));
981178479Sjb}
982178479Sjb
983178479Sjbstatic int
984178479Sjbdt_aggregate_valvarcmp(const void *lhs, const void *rhs)
985178479Sjb{
986178479Sjb	int rval;
987178479Sjb
988178479Sjb	if ((rval = dt_aggregate_valkeycmp(lhs, rhs)) != 0)
989178479Sjb		return (rval);
990178479Sjb
991178479Sjb	return (dt_aggregate_varcmp(lhs, rhs));
992178479Sjb}
993178479Sjb
994178479Sjbstatic int
995178479Sjbdt_aggregate_varvalcmp(const void *lhs, const void *rhs)
996178479Sjb{
997178479Sjb	int rval;
998178479Sjb
999178479Sjb	if ((rval = dt_aggregate_varcmp(lhs, rhs)) != 0)
1000178479Sjb		return (rval);
1001178479Sjb
1002178479Sjb	return (dt_aggregate_valkeycmp(lhs, rhs));
1003178479Sjb}
1004178479Sjb
1005178479Sjbstatic int
1006178479Sjbdt_aggregate_keyvarrevcmp(const void *lhs, const void *rhs)
1007178479Sjb{
1008178479Sjb	return (dt_aggregate_keyvarcmp(rhs, lhs));
1009178479Sjb}
1010178479Sjb
1011178479Sjbstatic int
1012178479Sjbdt_aggregate_varkeyrevcmp(const void *lhs, const void *rhs)
1013178479Sjb{
1014178479Sjb	return (dt_aggregate_varkeycmp(rhs, lhs));
1015178479Sjb}
1016178479Sjb
1017178479Sjbstatic int
1018178479Sjbdt_aggregate_valvarrevcmp(const void *lhs, const void *rhs)
1019178479Sjb{
1020178479Sjb	return (dt_aggregate_valvarcmp(rhs, lhs));
1021178479Sjb}
1022178479Sjb
1023178479Sjbstatic int
1024178479Sjbdt_aggregate_varvalrevcmp(const void *lhs, const void *rhs)
1025178479Sjb{
1026178479Sjb	return (dt_aggregate_varvalcmp(rhs, lhs));
1027178479Sjb}
1028178479Sjb
1029178479Sjbstatic int
1030178479Sjbdt_aggregate_bundlecmp(const void *lhs, const void *rhs)
1031178479Sjb{
1032178479Sjb	dt_ahashent_t **lh = *((dt_ahashent_t ***)lhs);
1033178479Sjb	dt_ahashent_t **rh = *((dt_ahashent_t ***)rhs);
1034178479Sjb	int i, rval;
1035178479Sjb
1036178479Sjb	if (dt_keysort) {
1037178479Sjb		/*
1038178479Sjb		 * If we're sorting on keys, we need to scan until we find the
1039178479Sjb		 * last entry -- that's the representative key.  (The order of
1040178479Sjb		 * the bundle is values followed by key to accommodate the
1041178479Sjb		 * default behavior of sorting by value.)  If the keys are
1042178479Sjb		 * equal, we'll fall into the value comparison loop, below.
1043178479Sjb		 */
1044178479Sjb		for (i = 0; lh[i + 1] != NULL; i++)
1045178479Sjb			continue;
1046178479Sjb
1047178479Sjb		assert(i != 0);
1048178479Sjb		assert(rh[i + 1] == NULL);
1049178479Sjb
1050178479Sjb		if ((rval = dt_aggregate_keycmp(&lh[i], &rh[i])) != 0)
1051178479Sjb			return (rval);
1052178479Sjb	}
1053178479Sjb
1054178479Sjb	for (i = 0; ; i++) {
1055178479Sjb		if (lh[i + 1] == NULL) {
1056178479Sjb			/*
1057178479Sjb			 * All of the values are equal; if we're sorting on
1058178479Sjb			 * keys, then we're only here because the keys were
1059178479Sjb			 * found to be equal and these records are therefore
1060178479Sjb			 * equal.  If we're not sorting on keys, we'll use the
1061178479Sjb			 * key comparison from the representative key as the
1062178479Sjb			 * tie-breaker.
1063178479Sjb			 */
1064178479Sjb			if (dt_keysort)
1065178479Sjb				return (0);
1066178479Sjb
1067178479Sjb			assert(i != 0);
1068178479Sjb			assert(rh[i + 1] == NULL);
1069178479Sjb			return (dt_aggregate_keycmp(&lh[i], &rh[i]));
1070178479Sjb		} else {
1071178479Sjb			if ((rval = dt_aggregate_valcmp(&lh[i], &rh[i])) != 0)
1072178479Sjb				return (rval);
1073178479Sjb		}
1074178479Sjb	}
1075178479Sjb}
1076178479Sjb
1077178479Sjbint
1078178479Sjbdt_aggregate_go(dtrace_hdl_t *dtp)
1079178479Sjb{
1080178479Sjb	dt_aggregate_t *agp = &dtp->dt_aggregate;
1081178479Sjb	dtrace_optval_t size, cpu;
1082178479Sjb	dtrace_bufdesc_t *buf = &agp->dtat_buf;
1083178479Sjb	int rval, i;
1084178479Sjb
1085178479Sjb	assert(agp->dtat_maxcpu == 0);
1086178479Sjb	assert(agp->dtat_ncpu == 0);
1087178479Sjb	assert(agp->dtat_cpus == NULL);
1088178479Sjb
1089178479Sjb	agp->dtat_maxcpu = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
1090178479Sjb	agp->dtat_ncpu = dt_sysconf(dtp, _SC_NPROCESSORS_MAX);
1091178479Sjb	agp->dtat_cpus = malloc(agp->dtat_ncpu * sizeof (processorid_t));
1092178479Sjb
1093178479Sjb	if (agp->dtat_cpus == NULL)
1094178479Sjb		return (dt_set_errno(dtp, EDT_NOMEM));
1095178479Sjb
1096178479Sjb	/*
1097178479Sjb	 * Use the aggregation buffer size as reloaded from the kernel.
1098178479Sjb	 */
1099178479Sjb	size = dtp->dt_options[DTRACEOPT_AGGSIZE];
1100178479Sjb
1101178479Sjb	rval = dtrace_getopt(dtp, "aggsize", &size);
1102178479Sjb	assert(rval == 0);
1103178479Sjb
1104178479Sjb	if (size == 0 || size == DTRACEOPT_UNSET)
1105178479Sjb		return (0);
1106178479Sjb
1107178479Sjb	buf = &agp->dtat_buf;
1108178479Sjb	buf->dtbd_size = size;
1109178479Sjb
1110178479Sjb	if ((buf->dtbd_data = malloc(buf->dtbd_size)) == NULL)
1111178479Sjb		return (dt_set_errno(dtp, EDT_NOMEM));
1112178479Sjb
1113178479Sjb	/*
1114178479Sjb	 * Now query for the CPUs enabled.
1115178479Sjb	 */
1116178479Sjb	rval = dtrace_getopt(dtp, "cpu", &cpu);
1117178479Sjb	assert(rval == 0 && cpu != DTRACEOPT_UNSET);
1118178479Sjb
1119178479Sjb	if (cpu != DTRACE_CPUALL) {
1120178479Sjb		assert(cpu < agp->dtat_ncpu);
1121178479Sjb		agp->dtat_cpus[agp->dtat_ncpus++] = (processorid_t)cpu;
1122178479Sjb
1123178479Sjb		return (0);
1124178479Sjb	}
1125178479Sjb
1126178479Sjb	agp->dtat_ncpus = 0;
1127178479Sjb	for (i = 0; i < agp->dtat_maxcpu; i++) {
1128178479Sjb		if (dt_status(dtp, i) == -1)
1129178479Sjb			continue;
1130178479Sjb
1131178479Sjb		agp->dtat_cpus[agp->dtat_ncpus++] = i;
1132178479Sjb	}
1133178479Sjb
1134178479Sjb	return (0);
1135178479Sjb}
1136178479Sjb
1137178479Sjbstatic int
1138178479Sjbdt_aggwalk_rval(dtrace_hdl_t *dtp, dt_ahashent_t *h, int rval)
1139178479Sjb{
1140178479Sjb	dt_aggregate_t *agp = &dtp->dt_aggregate;
1141178479Sjb	dtrace_aggdata_t *data;
1142178479Sjb	dtrace_aggdesc_t *aggdesc;
1143178479Sjb	dtrace_recdesc_t *rec;
1144178479Sjb	int i;
1145178479Sjb
1146178479Sjb	switch (rval) {
1147178479Sjb	case DTRACE_AGGWALK_NEXT:
1148178479Sjb		break;
1149178479Sjb
1150178479Sjb	case DTRACE_AGGWALK_CLEAR: {
1151178479Sjb		uint32_t size, offs = 0;
1152178479Sjb
1153178479Sjb		aggdesc = h->dtahe_data.dtada_desc;
1154178479Sjb		rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
1155178479Sjb		size = rec->dtrd_size;
1156178479Sjb		data = &h->dtahe_data;
1157178479Sjb
1158178479Sjb		if (rec->dtrd_action == DTRACEAGG_LQUANTIZE) {
1159178479Sjb			offs = sizeof (uint64_t);
1160178479Sjb			size -= sizeof (uint64_t);
1161178479Sjb		}
1162178479Sjb
1163178479Sjb		bzero(&data->dtada_data[rec->dtrd_offset] + offs, size);
1164178479Sjb
1165178479Sjb		if (data->dtada_percpu == NULL)
1166178479Sjb			break;
1167178479Sjb
1168178479Sjb		for (i = 0; i < dtp->dt_aggregate.dtat_maxcpu; i++)
1169178479Sjb			bzero(data->dtada_percpu[i] + offs, size);
1170178479Sjb		break;
1171178479Sjb	}
1172178479Sjb
1173178479Sjb	case DTRACE_AGGWALK_ERROR:
1174178479Sjb		/*
1175178479Sjb		 * We assume that errno is already set in this case.
1176178479Sjb		 */
1177178479Sjb		return (dt_set_errno(dtp, errno));
1178178479Sjb
1179178479Sjb	case DTRACE_AGGWALK_ABORT:
1180178479Sjb		return (dt_set_errno(dtp, EDT_DIRABORT));
1181178479Sjb
1182178479Sjb	case DTRACE_AGGWALK_DENORMALIZE:
1183178479Sjb		h->dtahe_data.dtada_normal = 1;
1184178479Sjb		return (0);
1185178479Sjb
1186178479Sjb	case DTRACE_AGGWALK_NORMALIZE:
1187178479Sjb		if (h->dtahe_data.dtada_normal == 0) {
1188178479Sjb			h->dtahe_data.dtada_normal = 1;
1189178479Sjb			return (dt_set_errno(dtp, EDT_BADRVAL));
1190178479Sjb		}
1191178479Sjb
1192178479Sjb		return (0);
1193178479Sjb
1194178479Sjb	case DTRACE_AGGWALK_REMOVE: {
1195178479Sjb		dtrace_aggdata_t *aggdata = &h->dtahe_data;
1196178558Sjb		int max_cpus = agp->dtat_maxcpu;
1197178479Sjb
1198178479Sjb		/*
1199178479Sjb		 * First, remove this hash entry from its hash chain.
1200178479Sjb		 */
1201178479Sjb		if (h->dtahe_prev != NULL) {
1202178479Sjb			h->dtahe_prev->dtahe_next = h->dtahe_next;
1203178479Sjb		} else {
1204178479Sjb			dt_ahash_t *hash = &agp->dtat_hash;
1205178479Sjb			size_t ndx = h->dtahe_hashval % hash->dtah_size;
1206178479Sjb
1207178479Sjb			assert(hash->dtah_hash[ndx] == h);
1208178479Sjb			hash->dtah_hash[ndx] = h->dtahe_next;
1209178479Sjb		}
1210178479Sjb
1211178479Sjb		if (h->dtahe_next != NULL)
1212178479Sjb			h->dtahe_next->dtahe_prev = h->dtahe_prev;
1213178479Sjb
1214178479Sjb		/*
1215178479Sjb		 * Now remove it from the list of all hash entries.
1216178479Sjb		 */
1217178479Sjb		if (h->dtahe_prevall != NULL) {
1218178479Sjb			h->dtahe_prevall->dtahe_nextall = h->dtahe_nextall;
1219178479Sjb		} else {
1220178479Sjb			dt_ahash_t *hash = &agp->dtat_hash;
1221178479Sjb
1222178479Sjb			assert(hash->dtah_all == h);
1223178479Sjb			hash->dtah_all = h->dtahe_nextall;
1224178479Sjb		}
1225178479Sjb
1226178479Sjb		if (h->dtahe_nextall != NULL)
1227178479Sjb			h->dtahe_nextall->dtahe_prevall = h->dtahe_prevall;
1228178479Sjb
1229178479Sjb		/*
1230178479Sjb		 * We're unlinked.  We can safely destroy the data.
1231178479Sjb		 */
1232178479Sjb		if (aggdata->dtada_percpu != NULL) {
1233178479Sjb			for (i = 0; i < max_cpus; i++)
1234178479Sjb				free(aggdata->dtada_percpu[i]);
1235178479Sjb			free(aggdata->dtada_percpu);
1236178479Sjb		}
1237178479Sjb
1238178479Sjb		free(aggdata->dtada_data);
1239178479Sjb		free(h);
1240178479Sjb
1241178479Sjb		return (0);
1242178479Sjb	}
1243178479Sjb
1244178479Sjb	default:
1245178479Sjb		return (dt_set_errno(dtp, EDT_BADRVAL));
1246178479Sjb	}
1247178479Sjb
1248178479Sjb	return (0);
1249178479Sjb}
1250178479Sjb
1251178479Sjbvoid
1252178479Sjbdt_aggregate_qsort(dtrace_hdl_t *dtp, void *base, size_t nel, size_t width,
1253178479Sjb    int (*compar)(const void *, const void *))
1254178479Sjb{
1255178479Sjb	int rev = dt_revsort, key = dt_keysort, keypos = dt_keypos;
1256178479Sjb	dtrace_optval_t keyposopt = dtp->dt_options[DTRACEOPT_AGGSORTKEYPOS];
1257178479Sjb
1258178479Sjb	dt_revsort = (dtp->dt_options[DTRACEOPT_AGGSORTREV] != DTRACEOPT_UNSET);
1259178479Sjb	dt_keysort = (dtp->dt_options[DTRACEOPT_AGGSORTKEY] != DTRACEOPT_UNSET);
1260178479Sjb
1261178479Sjb	if (keyposopt != DTRACEOPT_UNSET && keyposopt <= INT_MAX) {
1262178479Sjb		dt_keypos = (int)keyposopt;
1263178479Sjb	} else {
1264178479Sjb		dt_keypos = 0;
1265178479Sjb	}
1266178479Sjb
1267178479Sjb	if (compar == NULL) {
1268178479Sjb		if (!dt_keysort) {
1269178479Sjb			compar = dt_aggregate_varvalcmp;
1270178479Sjb		} else {
1271178479Sjb			compar = dt_aggregate_varkeycmp;
1272178479Sjb		}
1273178479Sjb	}
1274178479Sjb
1275178479Sjb	qsort(base, nel, width, compar);
1276178479Sjb
1277178479Sjb	dt_revsort = rev;
1278178479Sjb	dt_keysort = key;
1279178479Sjb	dt_keypos = keypos;
1280178479Sjb}
1281178479Sjb
1282178479Sjbint
1283178479Sjbdtrace_aggregate_walk(dtrace_hdl_t *dtp, dtrace_aggregate_f *func, void *arg)
1284178479Sjb{
1285178479Sjb	dt_ahashent_t *h, *next;
1286178479Sjb	dt_ahash_t *hash = &dtp->dt_aggregate.dtat_hash;
1287178479Sjb
1288178479Sjb	for (h = hash->dtah_all; h != NULL; h = next) {
1289178479Sjb		/*
1290178479Sjb		 * dt_aggwalk_rval() can potentially remove the current hash
1291178479Sjb		 * entry; we need to load the next hash entry before calling
1292178479Sjb		 * into it.
1293178479Sjb		 */
1294178479Sjb		next = h->dtahe_nextall;
1295178479Sjb
1296178479Sjb		if (dt_aggwalk_rval(dtp, h, func(&h->dtahe_data, arg)) == -1)
1297178479Sjb			return (-1);
1298178479Sjb	}
1299178479Sjb
1300178479Sjb	return (0);
1301178479Sjb}
1302178479Sjb
1303178479Sjbstatic int
1304268578Srpaulodt_aggregate_total(dtrace_hdl_t *dtp, boolean_t clear)
1305268578Srpaulo{
1306268578Srpaulo	dt_ahashent_t *h;
1307268578Srpaulo	dtrace_aggdata_t **total;
1308268578Srpaulo	dtrace_aggid_t max = DTRACE_AGGVARIDNONE, id;
1309268578Srpaulo	dt_aggregate_t *agp = &dtp->dt_aggregate;
1310268578Srpaulo	dt_ahash_t *hash = &agp->dtat_hash;
1311268578Srpaulo	uint32_t tflags;
1312268578Srpaulo
1313268578Srpaulo	tflags = DTRACE_A_TOTAL | DTRACE_A_HASNEGATIVES | DTRACE_A_HASPOSITIVES;
1314268578Srpaulo
1315268578Srpaulo	/*
1316268578Srpaulo	 * If we need to deliver per-aggregation totals, we're going to take
1317268578Srpaulo	 * three passes over the aggregate:  one to clear everything out and
1318268578Srpaulo	 * determine our maximum aggregation ID, one to actually total
1319268578Srpaulo	 * everything up, and a final pass to assign the totals to the
1320268578Srpaulo	 * individual elements.
1321268578Srpaulo	 */
1322268578Srpaulo	for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
1323268578Srpaulo		dtrace_aggdata_t *aggdata = &h->dtahe_data;
1324268578Srpaulo
1325268578Srpaulo		if ((id = dt_aggregate_aggvarid(h)) > max)
1326268578Srpaulo			max = id;
1327268578Srpaulo
1328268578Srpaulo		aggdata->dtada_total = 0;
1329268578Srpaulo		aggdata->dtada_flags &= ~tflags;
1330268578Srpaulo	}
1331268578Srpaulo
1332268578Srpaulo	if (clear || max == DTRACE_AGGVARIDNONE)
1333268578Srpaulo		return (0);
1334268578Srpaulo
1335268578Srpaulo	total = dt_zalloc(dtp, (max + 1) * sizeof (dtrace_aggdata_t *));
1336268578Srpaulo
1337268578Srpaulo	if (total == NULL)
1338268578Srpaulo		return (-1);
1339268578Srpaulo
1340268578Srpaulo	for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
1341268578Srpaulo		dtrace_aggdata_t *aggdata = &h->dtahe_data;
1342268578Srpaulo		dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1343268578Srpaulo		dtrace_recdesc_t *rec;
1344268578Srpaulo		caddr_t data;
1345268578Srpaulo		int64_t val, *addr;
1346268578Srpaulo
1347268578Srpaulo		rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1];
1348268578Srpaulo		data = aggdata->dtada_data;
1349268578Srpaulo		addr = (int64_t *)(uintptr_t)(data + rec->dtrd_offset);
1350268578Srpaulo
1351268578Srpaulo		switch (rec->dtrd_action) {
1352268578Srpaulo		case DTRACEAGG_STDDEV:
1353268578Srpaulo			val = dt_stddev((uint64_t *)addr, 1);
1354268578Srpaulo			break;
1355268578Srpaulo
1356268578Srpaulo		case DTRACEAGG_SUM:
1357268578Srpaulo		case DTRACEAGG_COUNT:
1358268578Srpaulo			val = *addr;
1359268578Srpaulo			break;
1360268578Srpaulo
1361268578Srpaulo		case DTRACEAGG_AVG:
1362268578Srpaulo			val = addr[0] ? (addr[1] / addr[0]) : 0;
1363268578Srpaulo			break;
1364268578Srpaulo
1365268578Srpaulo		default:
1366268578Srpaulo			continue;
1367268578Srpaulo		}
1368268578Srpaulo
1369268578Srpaulo		if (total[agg->dtagd_varid] == NULL) {
1370268578Srpaulo			total[agg->dtagd_varid] = aggdata;
1371268578Srpaulo			aggdata->dtada_flags |= DTRACE_A_TOTAL;
1372268578Srpaulo		} else {
1373268578Srpaulo			aggdata = total[agg->dtagd_varid];
1374268578Srpaulo		}
1375268578Srpaulo
1376268578Srpaulo		if (val > 0)
1377268578Srpaulo			aggdata->dtada_flags |= DTRACE_A_HASPOSITIVES;
1378268578Srpaulo
1379268578Srpaulo		if (val < 0) {
1380268578Srpaulo			aggdata->dtada_flags |= DTRACE_A_HASNEGATIVES;
1381268578Srpaulo			val = -val;
1382268578Srpaulo		}
1383268578Srpaulo
1384268578Srpaulo		if (dtp->dt_options[DTRACEOPT_AGGZOOM] != DTRACEOPT_UNSET) {
1385268578Srpaulo			val = (int64_t)((long double)val *
1386268578Srpaulo			    (1 / DTRACE_AGGZOOM_MAX));
1387268578Srpaulo
1388268578Srpaulo			if (val > aggdata->dtada_total)
1389268578Srpaulo				aggdata->dtada_total = val;
1390268578Srpaulo		} else {
1391268578Srpaulo			aggdata->dtada_total += val;
1392268578Srpaulo		}
1393268578Srpaulo	}
1394268578Srpaulo
1395268578Srpaulo	/*
1396268578Srpaulo	 * And now one final pass to set everyone's total.
1397268578Srpaulo	 */
1398268578Srpaulo	for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
1399268578Srpaulo		dtrace_aggdata_t *aggdata = &h->dtahe_data, *t;
1400268578Srpaulo		dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1401268578Srpaulo
1402268578Srpaulo		if ((t = total[agg->dtagd_varid]) == NULL || aggdata == t)
1403268578Srpaulo			continue;
1404268578Srpaulo
1405268578Srpaulo		aggdata->dtada_total = t->dtada_total;
1406268578Srpaulo		aggdata->dtada_flags |= (t->dtada_flags & tflags);
1407268578Srpaulo	}
1408268578Srpaulo
1409268578Srpaulo	dt_free(dtp, total);
1410268578Srpaulo
1411268578Srpaulo	return (0);
1412268578Srpaulo}
1413268578Srpaulo
1414268578Srpaulostatic int
1415268578Srpaulodt_aggregate_minmaxbin(dtrace_hdl_t *dtp, boolean_t clear)
1416268578Srpaulo{
1417268578Srpaulo	dt_ahashent_t *h;
1418268578Srpaulo	dtrace_aggdata_t **minmax;
1419268578Srpaulo	dtrace_aggid_t max = DTRACE_AGGVARIDNONE, id;
1420268578Srpaulo	dt_aggregate_t *agp = &dtp->dt_aggregate;
1421268578Srpaulo	dt_ahash_t *hash = &agp->dtat_hash;
1422268578Srpaulo
1423268578Srpaulo	for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
1424268578Srpaulo		dtrace_aggdata_t *aggdata = &h->dtahe_data;
1425268578Srpaulo
1426268578Srpaulo		if ((id = dt_aggregate_aggvarid(h)) > max)
1427268578Srpaulo			max = id;
1428268578Srpaulo
1429268578Srpaulo		aggdata->dtada_minbin = 0;
1430268578Srpaulo		aggdata->dtada_maxbin = 0;
1431268578Srpaulo		aggdata->dtada_flags &= ~DTRACE_A_MINMAXBIN;
1432268578Srpaulo	}
1433268578Srpaulo
1434268578Srpaulo	if (clear || max == DTRACE_AGGVARIDNONE)
1435268578Srpaulo		return (0);
1436268578Srpaulo
1437268578Srpaulo	minmax = dt_zalloc(dtp, (max + 1) * sizeof (dtrace_aggdata_t *));
1438268578Srpaulo
1439268578Srpaulo	if (minmax == NULL)
1440268578Srpaulo		return (-1);
1441268578Srpaulo
1442268578Srpaulo	for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
1443268578Srpaulo		dtrace_aggdata_t *aggdata = &h->dtahe_data;
1444268578Srpaulo		dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1445268578Srpaulo		dtrace_recdesc_t *rec;
1446268578Srpaulo		caddr_t data;
1447268578Srpaulo		int64_t *addr;
1448268578Srpaulo		int minbin = -1, maxbin = -1, i;
1449268578Srpaulo		int start = 0, size;
1450268578Srpaulo
1451268578Srpaulo		rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1];
1452268578Srpaulo		size = rec->dtrd_size / sizeof (int64_t);
1453268578Srpaulo		data = aggdata->dtada_data;
1454268578Srpaulo		addr = (int64_t *)(uintptr_t)(data + rec->dtrd_offset);
1455268578Srpaulo
1456268578Srpaulo		switch (rec->dtrd_action) {
1457268578Srpaulo		case DTRACEAGG_LQUANTIZE:
1458268578Srpaulo			/*
1459268578Srpaulo			 * For lquantize(), we always display the entire range
1460268578Srpaulo			 * of the aggregation when aggpack is set.
1461268578Srpaulo			 */
1462268578Srpaulo			start = 1;
1463268578Srpaulo			minbin = start;
1464268578Srpaulo			maxbin = size - 1 - start;
1465268578Srpaulo			break;
1466268578Srpaulo
1467268578Srpaulo		case DTRACEAGG_QUANTIZE:
1468268578Srpaulo			for (i = start; i < size; i++) {
1469268578Srpaulo				if (!addr[i])
1470268578Srpaulo					continue;
1471268578Srpaulo
1472268578Srpaulo				if (minbin == -1)
1473268578Srpaulo					minbin = i - start;
1474268578Srpaulo
1475268578Srpaulo				maxbin = i - start;
1476268578Srpaulo			}
1477268578Srpaulo
1478268578Srpaulo			if (minbin == -1) {
1479268578Srpaulo				/*
1480268578Srpaulo				 * If we have no data (e.g., due to a clear()
1481268578Srpaulo				 * or negative increments), we'll use the
1482268578Srpaulo				 * zero bucket as both our min and max.
1483268578Srpaulo				 */
1484268578Srpaulo				minbin = maxbin = DTRACE_QUANTIZE_ZEROBUCKET;
1485268578Srpaulo			}
1486268578Srpaulo
1487268578Srpaulo			break;
1488268578Srpaulo
1489268578Srpaulo		default:
1490268578Srpaulo			continue;
1491268578Srpaulo		}
1492268578Srpaulo
1493268578Srpaulo		if (minmax[agg->dtagd_varid] == NULL) {
1494268578Srpaulo			minmax[agg->dtagd_varid] = aggdata;
1495268578Srpaulo			aggdata->dtada_flags |= DTRACE_A_MINMAXBIN;
1496268578Srpaulo			aggdata->dtada_minbin = minbin;
1497268578Srpaulo			aggdata->dtada_maxbin = maxbin;
1498268578Srpaulo			continue;
1499268578Srpaulo		}
1500268578Srpaulo
1501268578Srpaulo		if (minbin < minmax[agg->dtagd_varid]->dtada_minbin)
1502268578Srpaulo			minmax[agg->dtagd_varid]->dtada_minbin = minbin;
1503268578Srpaulo
1504268578Srpaulo		if (maxbin > minmax[agg->dtagd_varid]->dtada_maxbin)
1505268578Srpaulo			minmax[agg->dtagd_varid]->dtada_maxbin = maxbin;
1506268578Srpaulo	}
1507268578Srpaulo
1508268578Srpaulo	/*
1509268578Srpaulo	 * And now one final pass to set everyone's minbin and maxbin.
1510268578Srpaulo	 */
1511268578Srpaulo	for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
1512268578Srpaulo		dtrace_aggdata_t *aggdata = &h->dtahe_data, *mm;
1513268578Srpaulo		dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1514268578Srpaulo
1515268578Srpaulo		if ((mm = minmax[agg->dtagd_varid]) == NULL || aggdata == mm)
1516268578Srpaulo			continue;
1517268578Srpaulo
1518268578Srpaulo		aggdata->dtada_minbin = mm->dtada_minbin;
1519268578Srpaulo		aggdata->dtada_maxbin = mm->dtada_maxbin;
1520268578Srpaulo		aggdata->dtada_flags |= DTRACE_A_MINMAXBIN;
1521268578Srpaulo	}
1522268578Srpaulo
1523268578Srpaulo	dt_free(dtp, minmax);
1524268578Srpaulo
1525268578Srpaulo	return (0);
1526268578Srpaulo}
1527268578Srpaulo
1528268578Srpaulostatic int
1529178479Sjbdt_aggregate_walk_sorted(dtrace_hdl_t *dtp,
1530178479Sjb    dtrace_aggregate_f *func, void *arg,
1531178479Sjb    int (*sfunc)(const void *, const void *))
1532178479Sjb{
1533178479Sjb	dt_aggregate_t *agp = &dtp->dt_aggregate;
1534178479Sjb	dt_ahashent_t *h, **sorted;
1535178479Sjb	dt_ahash_t *hash = &agp->dtat_hash;
1536178479Sjb	size_t i, nentries = 0;
1537268578Srpaulo	int rval = -1;
1538178479Sjb
1539268578Srpaulo	agp->dtat_flags &= ~(DTRACE_A_TOTAL | DTRACE_A_MINMAXBIN);
1540268578Srpaulo
1541268578Srpaulo	if (dtp->dt_options[DTRACEOPT_AGGHIST] != DTRACEOPT_UNSET) {
1542268578Srpaulo		agp->dtat_flags |= DTRACE_A_TOTAL;
1543268578Srpaulo
1544268578Srpaulo		if (dt_aggregate_total(dtp, B_FALSE) != 0)
1545268578Srpaulo			return (-1);
1546268578Srpaulo	}
1547268578Srpaulo
1548268578Srpaulo	if (dtp->dt_options[DTRACEOPT_AGGPACK] != DTRACEOPT_UNSET) {
1549268578Srpaulo		agp->dtat_flags |= DTRACE_A_MINMAXBIN;
1550268578Srpaulo
1551268578Srpaulo		if (dt_aggregate_minmaxbin(dtp, B_FALSE) != 0)
1552268578Srpaulo			return (-1);
1553268578Srpaulo	}
1554268578Srpaulo
1555178479Sjb	for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall)
1556178479Sjb		nentries++;
1557178479Sjb
1558178479Sjb	sorted = dt_alloc(dtp, nentries * sizeof (dt_ahashent_t *));
1559178479Sjb
1560178479Sjb	if (sorted == NULL)
1561268578Srpaulo		goto out;
1562178479Sjb
1563178479Sjb	for (h = hash->dtah_all, i = 0; h != NULL; h = h->dtahe_nextall)
1564178479Sjb		sorted[i++] = h;
1565178479Sjb
1566178479Sjb	(void) pthread_mutex_lock(&dt_qsort_lock);
1567178479Sjb
1568178479Sjb	if (sfunc == NULL) {
1569178479Sjb		dt_aggregate_qsort(dtp, sorted, nentries,
1570178479Sjb		    sizeof (dt_ahashent_t *), NULL);
1571178479Sjb	} else {
1572178479Sjb		/*
1573178479Sjb		 * If we've been explicitly passed a sorting function,
1574178479Sjb		 * we'll use that -- ignoring the values of the "aggsortrev",
1575178479Sjb		 * "aggsortkey" and "aggsortkeypos" options.
1576178479Sjb		 */
1577178479Sjb		qsort(sorted, nentries, sizeof (dt_ahashent_t *), sfunc);
1578178479Sjb	}
1579178479Sjb
1580178479Sjb	(void) pthread_mutex_unlock(&dt_qsort_lock);
1581178479Sjb
1582178479Sjb	for (i = 0; i < nentries; i++) {
1583178479Sjb		h = sorted[i];
1584178479Sjb
1585268578Srpaulo		if (dt_aggwalk_rval(dtp, h, func(&h->dtahe_data, arg)) == -1)
1586268578Srpaulo			goto out;
1587178479Sjb	}
1588178479Sjb
1589268578Srpaulo	rval = 0;
1590268578Srpauloout:
1591268578Srpaulo	if (agp->dtat_flags & DTRACE_A_TOTAL)
1592268578Srpaulo		(void) dt_aggregate_total(dtp, B_TRUE);
1593268578Srpaulo
1594268578Srpaulo	if (agp->dtat_flags & DTRACE_A_MINMAXBIN)
1595268578Srpaulo		(void) dt_aggregate_minmaxbin(dtp, B_TRUE);
1596268578Srpaulo
1597178479Sjb	dt_free(dtp, sorted);
1598268578Srpaulo	return (rval);
1599178479Sjb}
1600178479Sjb
1601178479Sjbint
1602178479Sjbdtrace_aggregate_walk_sorted(dtrace_hdl_t *dtp,
1603178479Sjb    dtrace_aggregate_f *func, void *arg)
1604178479Sjb{
1605178479Sjb	return (dt_aggregate_walk_sorted(dtp, func, arg, NULL));
1606178479Sjb}
1607178479Sjb
1608178479Sjbint
1609178479Sjbdtrace_aggregate_walk_keysorted(dtrace_hdl_t *dtp,
1610178479Sjb    dtrace_aggregate_f *func, void *arg)
1611178479Sjb{
1612178479Sjb	return (dt_aggregate_walk_sorted(dtp, func,
1613178479Sjb	    arg, dt_aggregate_varkeycmp));
1614178479Sjb}
1615178479Sjb
1616178479Sjbint
1617178479Sjbdtrace_aggregate_walk_valsorted(dtrace_hdl_t *dtp,
1618178479Sjb    dtrace_aggregate_f *func, void *arg)
1619178479Sjb{
1620178479Sjb	return (dt_aggregate_walk_sorted(dtp, func,
1621178479Sjb	    arg, dt_aggregate_varvalcmp));
1622178479Sjb}
1623178479Sjb
1624178479Sjbint
1625178479Sjbdtrace_aggregate_walk_keyvarsorted(dtrace_hdl_t *dtp,
1626178479Sjb    dtrace_aggregate_f *func, void *arg)
1627178479Sjb{
1628178479Sjb	return (dt_aggregate_walk_sorted(dtp, func,
1629178479Sjb	    arg, dt_aggregate_keyvarcmp));
1630178479Sjb}
1631178479Sjb
1632178479Sjbint
1633178479Sjbdtrace_aggregate_walk_valvarsorted(dtrace_hdl_t *dtp,
1634178479Sjb    dtrace_aggregate_f *func, void *arg)
1635178479Sjb{
1636178479Sjb	return (dt_aggregate_walk_sorted(dtp, func,
1637178479Sjb	    arg, dt_aggregate_valvarcmp));
1638178479Sjb}
1639178479Sjb
1640178479Sjbint
1641178479Sjbdtrace_aggregate_walk_keyrevsorted(dtrace_hdl_t *dtp,
1642178479Sjb    dtrace_aggregate_f *func, void *arg)
1643178479Sjb{
1644178479Sjb	return (dt_aggregate_walk_sorted(dtp, func,
1645178479Sjb	    arg, dt_aggregate_varkeyrevcmp));
1646178479Sjb}
1647178479Sjb
1648178479Sjbint
1649178479Sjbdtrace_aggregate_walk_valrevsorted(dtrace_hdl_t *dtp,
1650178479Sjb    dtrace_aggregate_f *func, void *arg)
1651178479Sjb{
1652178479Sjb	return (dt_aggregate_walk_sorted(dtp, func,
1653178479Sjb	    arg, dt_aggregate_varvalrevcmp));
1654178479Sjb}
1655178479Sjb
1656178479Sjbint
1657178479Sjbdtrace_aggregate_walk_keyvarrevsorted(dtrace_hdl_t *dtp,
1658178479Sjb    dtrace_aggregate_f *func, void *arg)
1659178479Sjb{
1660178479Sjb	return (dt_aggregate_walk_sorted(dtp, func,
1661178479Sjb	    arg, dt_aggregate_keyvarrevcmp));
1662178479Sjb}
1663178479Sjb
1664178479Sjbint
1665178479Sjbdtrace_aggregate_walk_valvarrevsorted(dtrace_hdl_t *dtp,
1666178479Sjb    dtrace_aggregate_f *func, void *arg)
1667178479Sjb{
1668178479Sjb	return (dt_aggregate_walk_sorted(dtp, func,
1669178479Sjb	    arg, dt_aggregate_valvarrevcmp));
1670178479Sjb}
1671178479Sjb
1672178479Sjbint
1673178479Sjbdtrace_aggregate_walk_joined(dtrace_hdl_t *dtp, dtrace_aggvarid_t *aggvars,
1674178479Sjb    int naggvars, dtrace_aggregate_walk_joined_f *func, void *arg)
1675178479Sjb{
1676178479Sjb	dt_aggregate_t *agp = &dtp->dt_aggregate;
1677178479Sjb	dt_ahashent_t *h, **sorted = NULL, ***bundle, **nbundle;
1678178479Sjb	const dtrace_aggdata_t **data;
1679178479Sjb	dt_ahashent_t *zaggdata = NULL;
1680178479Sjb	dt_ahash_t *hash = &agp->dtat_hash;
1681178479Sjb	size_t nentries = 0, nbundles = 0, start, zsize = 0, bundlesize;
1682178479Sjb	dtrace_aggvarid_t max = 0, aggvar;
1683178479Sjb	int rval = -1, *map, *remap = NULL;
1684178479Sjb	int i, j;
1685178479Sjb	dtrace_optval_t sortpos = dtp->dt_options[DTRACEOPT_AGGSORTPOS];
1686178479Sjb
1687178479Sjb	/*
1688178479Sjb	 * If the sorting position is greater than the number of aggregation
1689178479Sjb	 * variable IDs, we silently set it to 0.
1690178479Sjb	 */
1691178479Sjb	if (sortpos == DTRACEOPT_UNSET || sortpos >= naggvars)
1692178479Sjb		sortpos = 0;
1693178479Sjb
1694178479Sjb	/*
1695178479Sjb	 * First we need to translate the specified aggregation variable IDs
1696178479Sjb	 * into a linear map that will allow us to translate an aggregation
1697178479Sjb	 * variable ID into its position in the specified aggvars.
1698178479Sjb	 */
1699178479Sjb	for (i = 0; i < naggvars; i++) {
1700178479Sjb		if (aggvars[i] == DTRACE_AGGVARIDNONE || aggvars[i] < 0)
1701178479Sjb			return (dt_set_errno(dtp, EDT_BADAGGVAR));
1702178479Sjb
1703178479Sjb		if (aggvars[i] > max)
1704178479Sjb			max = aggvars[i];
1705178479Sjb	}
1706178479Sjb
1707178479Sjb	if ((map = dt_zalloc(dtp, (max + 1) * sizeof (int))) == NULL)
1708178479Sjb		return (-1);
1709178479Sjb
1710178479Sjb	zaggdata = dt_zalloc(dtp, naggvars * sizeof (dt_ahashent_t));
1711178479Sjb
1712178479Sjb	if (zaggdata == NULL)
1713178479Sjb		goto out;
1714178479Sjb
1715178479Sjb	for (i = 0; i < naggvars; i++) {
1716178479Sjb		int ndx = i + sortpos;
1717178479Sjb
1718178479Sjb		if (ndx >= naggvars)
1719178479Sjb			ndx -= naggvars;
1720178479Sjb
1721178479Sjb		aggvar = aggvars[ndx];
1722178479Sjb		assert(aggvar <= max);
1723178479Sjb
1724178479Sjb		if (map[aggvar]) {
1725178479Sjb			/*
1726178479Sjb			 * We have an aggregation variable that is present
1727178479Sjb			 * more than once in the array of aggregation
1728178479Sjb			 * variables.  While it's unclear why one might want
1729178479Sjb			 * to do this, it's legal.  To support this construct,
1730178479Sjb			 * we will allocate a remap that will indicate the
1731178479Sjb			 * position from which this aggregation variable
1732178479Sjb			 * should be pulled.  (That is, where the remap will
1733178479Sjb			 * map from one position to another.)
1734178479Sjb			 */
1735178479Sjb			if (remap == NULL) {
1736178479Sjb				remap = dt_zalloc(dtp, naggvars * sizeof (int));
1737178479Sjb
1738178479Sjb				if (remap == NULL)
1739178479Sjb					goto out;
1740178479Sjb			}
1741178479Sjb
1742178479Sjb			/*
1743178479Sjb			 * Given that the variable is already present, assert
1744178479Sjb			 * that following through the mapping and adjusting
1745178479Sjb			 * for the sort position yields the same aggregation
1746178479Sjb			 * variable ID.
1747178479Sjb			 */
1748178479Sjb			assert(aggvars[(map[aggvar] - 1 + sortpos) %
1749178479Sjb			    naggvars] == aggvars[ndx]);
1750178479Sjb
1751178479Sjb			remap[i] = map[aggvar];
1752178479Sjb			continue;
1753178479Sjb		}
1754178479Sjb
1755178479Sjb		map[aggvar] = i + 1;
1756178479Sjb	}
1757178479Sjb
1758178479Sjb	/*
1759178479Sjb	 * We need to take two passes over the data to size our allocation, so
1760178479Sjb	 * we'll use the first pass to also fill in the zero-filled data to be
1761178479Sjb	 * used to properly format a zero-valued aggregation.
1762178479Sjb	 */
1763178479Sjb	for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
1764178479Sjb		dtrace_aggvarid_t id;
1765178479Sjb		int ndx;
1766178479Sjb
1767178479Sjb		if ((id = dt_aggregate_aggvarid(h)) > max || !(ndx = map[id]))
1768178479Sjb			continue;
1769178479Sjb
1770178479Sjb		if (zaggdata[ndx - 1].dtahe_size == 0) {
1771178479Sjb			zaggdata[ndx - 1].dtahe_size = h->dtahe_size;
1772178479Sjb			zaggdata[ndx - 1].dtahe_data = h->dtahe_data;
1773178479Sjb		}
1774178479Sjb
1775178479Sjb		nentries++;
1776178479Sjb	}
1777178479Sjb
1778178479Sjb	if (nentries == 0) {
1779178479Sjb		/*
1780178479Sjb		 * We couldn't find any entries; there is nothing else to do.
1781178479Sjb		 */
1782178479Sjb		rval = 0;
1783178479Sjb		goto out;
1784178479Sjb	}
1785178479Sjb
1786178479Sjb	/*
1787178479Sjb	 * Before we sort the data, we're going to look for any holes in our
1788178479Sjb	 * zero-filled data.  This will occur if an aggregation variable that
1789178479Sjb	 * we are being asked to print has not yet been assigned the result of
1790178479Sjb	 * any aggregating action for _any_ tuple.  The issue becomes that we
1791178479Sjb	 * would like a zero value to be printed for all columns for this
1792178479Sjb	 * aggregation, but without any record description, we don't know the
1793178479Sjb	 * aggregating action that corresponds to the aggregation variable.  To
1794178479Sjb	 * try to find a match, we're simply going to lookup aggregation IDs
1795178479Sjb	 * (which are guaranteed to be contiguous and to start from 1), looking
1796178479Sjb	 * for the specified aggregation variable ID.  If we find a match,
1797178479Sjb	 * we'll use that.  If we iterate over all aggregation IDs and don't
1798178479Sjb	 * find a match, then we must be an anonymous enabling.  (Anonymous
1799178479Sjb	 * enablings can't currently derive either aggregation variable IDs or
1800178479Sjb	 * aggregation variable names given only an aggregation ID.)  In this
1801178479Sjb	 * obscure case (anonymous enabling, multiple aggregation printa() with
1802178479Sjb	 * some aggregations not represented for any tuple), our defined
1803178479Sjb	 * behavior is that the zero will be printed in the format of the first
1804178479Sjb	 * aggregation variable that contains any non-zero value.
1805178479Sjb	 */
1806178479Sjb	for (i = 0; i < naggvars; i++) {
1807178479Sjb		if (zaggdata[i].dtahe_size == 0) {
1808178479Sjb			dtrace_aggvarid_t aggvar;
1809178479Sjb
1810178479Sjb			aggvar = aggvars[(i - sortpos + naggvars) % naggvars];
1811178479Sjb			assert(zaggdata[i].dtahe_data.dtada_data == NULL);
1812178479Sjb
1813178479Sjb			for (j = DTRACE_AGGIDNONE + 1; ; j++) {
1814178479Sjb				dtrace_aggdesc_t *agg;
1815178479Sjb				dtrace_aggdata_t *aggdata;
1816178479Sjb
1817178479Sjb				if (dt_aggid_lookup(dtp, j, &agg) != 0)
1818178479Sjb					break;
1819178479Sjb
1820178479Sjb				if (agg->dtagd_varid != aggvar)
1821178479Sjb					continue;
1822178479Sjb
1823178479Sjb				/*
1824178479Sjb				 * We have our description -- now we need to
1825178479Sjb				 * cons up the zaggdata entry for it.
1826178479Sjb				 */
1827178479Sjb				aggdata = &zaggdata[i].dtahe_data;
1828178479Sjb				aggdata->dtada_size = agg->dtagd_size;
1829178479Sjb				aggdata->dtada_desc = agg;
1830178479Sjb				aggdata->dtada_handle = dtp;
1831178479Sjb				(void) dt_epid_lookup(dtp, agg->dtagd_epid,
1832178479Sjb				    &aggdata->dtada_edesc,
1833178479Sjb				    &aggdata->dtada_pdesc);
1834178479Sjb				aggdata->dtada_normal = 1;
1835178479Sjb				zaggdata[i].dtahe_hashval = 0;
1836178479Sjb				zaggdata[i].dtahe_size = agg->dtagd_size;
1837178479Sjb				break;
1838178479Sjb			}
1839178479Sjb
1840178479Sjb			if (zaggdata[i].dtahe_size == 0) {
1841178479Sjb				caddr_t data;
1842178479Sjb
1843178479Sjb				/*
1844178479Sjb				 * We couldn't find this aggregation, meaning
1845178479Sjb				 * that we have never seen it before for any
1846178479Sjb				 * tuple _and_ this is an anonymous enabling.
1847178479Sjb				 * That is, we're in the obscure case outlined
1848178479Sjb				 * above.  In this case, our defined behavior
1849178479Sjb				 * is to format the data in the format of the
1850178479Sjb				 * first non-zero aggregation -- of which, of
1851178479Sjb				 * course, we know there to be at least one
1852178479Sjb				 * (or nentries would have been zero).
1853178479Sjb				 */
1854178479Sjb				for (j = 0; j < naggvars; j++) {
1855178479Sjb					if (zaggdata[j].dtahe_size != 0)
1856178479Sjb						break;
1857178479Sjb				}
1858178479Sjb
1859178479Sjb				assert(j < naggvars);
1860178479Sjb				zaggdata[i] = zaggdata[j];
1861178479Sjb
1862178479Sjb				data = zaggdata[i].dtahe_data.dtada_data;
1863178479Sjb				assert(data != NULL);
1864178479Sjb			}
1865178479Sjb		}
1866178479Sjb	}
1867178479Sjb
1868178479Sjb	/*
1869178479Sjb	 * Now we need to allocate our zero-filled data for use for
1870178479Sjb	 * aggregations that don't have a value corresponding to a given key.
1871178479Sjb	 */
1872178479Sjb	for (i = 0; i < naggvars; i++) {
1873178479Sjb		dtrace_aggdata_t *aggdata = &zaggdata[i].dtahe_data;
1874178479Sjb		dtrace_aggdesc_t *aggdesc = aggdata->dtada_desc;
1875178479Sjb		dtrace_recdesc_t *rec;
1876178479Sjb		uint64_t larg;
1877178479Sjb		caddr_t zdata;
1878178479Sjb
1879178479Sjb		zsize = zaggdata[i].dtahe_size;
1880178479Sjb		assert(zsize != 0);
1881178479Sjb
1882178479Sjb		if ((zdata = dt_zalloc(dtp, zsize)) == NULL) {
1883178479Sjb			/*
1884178479Sjb			 * If we failed to allocated some zero-filled data, we
1885178479Sjb			 * need to zero out the remaining dtada_data pointers
1886178479Sjb			 * to prevent the wrong data from being freed below.
1887178479Sjb			 */
1888178479Sjb			for (j = i; j < naggvars; j++)
1889178479Sjb				zaggdata[j].dtahe_data.dtada_data = NULL;
1890178479Sjb			goto out;
1891178479Sjb		}
1892178479Sjb
1893178479Sjb		aggvar = aggvars[(i - sortpos + naggvars) % naggvars];
1894178479Sjb
1895178479Sjb		/*
1896178479Sjb		 * First, the easy bit.  To maintain compatibility with
1897178479Sjb		 * consumers that pull the compiler-generated ID out of the
1898178479Sjb		 * data, we put that ID at the top of the zero-filled data.
1899178479Sjb		 */
1900178479Sjb		rec = &aggdesc->dtagd_rec[0];
1901178479Sjb		/* LINTED - alignment */
1902178479Sjb		*((dtrace_aggvarid_t *)(zdata + rec->dtrd_offset)) = aggvar;
1903178479Sjb
1904178479Sjb		rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
1905178479Sjb
1906178479Sjb		/*
1907178479Sjb		 * Now for the more complicated part.  If (and only if) this
1908178479Sjb		 * is an lquantize() aggregating action, zero-filled data is
1909178479Sjb		 * not equivalent to an empty record:  we must also get the
1910178479Sjb		 * parameters for the lquantize().
1911178479Sjb		 */
1912178479Sjb		if (rec->dtrd_action == DTRACEAGG_LQUANTIZE) {
1913178479Sjb			if (aggdata->dtada_data != NULL) {
1914178479Sjb				/*
1915178479Sjb				 * The easier case here is if we actually have
1916178479Sjb				 * some prototype data -- in which case we
1917178479Sjb				 * manually dig it out of the aggregation
1918178479Sjb				 * record.
1919178479Sjb				 */
1920178479Sjb				/* LINTED - alignment */
1921178479Sjb				larg = *((uint64_t *)(aggdata->dtada_data +
1922178479Sjb				    rec->dtrd_offset));
1923178479Sjb			} else {
1924178479Sjb				/*
1925178479Sjb				 * We don't have any prototype data.  As a
1926178479Sjb				 * result, we know that we _do_ have the
1927178479Sjb				 * compiler-generated information.  (If this
1928178479Sjb				 * were an anonymous enabling, all of our
1929178479Sjb				 * zero-filled data would have prototype data
1930178479Sjb				 * -- either directly or indirectly.) So as
1931178479Sjb				 * gross as it is, we'll grovel around in the
1932178479Sjb				 * compiler-generated information to find the
1933178479Sjb				 * lquantize() parameters.
1934178479Sjb				 */
1935178479Sjb				dtrace_stmtdesc_t *sdp;
1936178479Sjb				dt_ident_t *aid;
1937178479Sjb				dt_idsig_t *isp;
1938178479Sjb
1939178479Sjb				sdp = (dtrace_stmtdesc_t *)(uintptr_t)
1940178479Sjb				    aggdesc->dtagd_rec[0].dtrd_uarg;
1941178479Sjb				aid = sdp->dtsd_aggdata;
1942178479Sjb				isp = (dt_idsig_t *)aid->di_data;
1943178479Sjb				assert(isp->dis_auxinfo != 0);
1944178479Sjb				larg = isp->dis_auxinfo;
1945178479Sjb			}
1946178479Sjb
1947178479Sjb			/* LINTED - alignment */
1948178479Sjb			*((uint64_t *)(zdata + rec->dtrd_offset)) = larg;
1949178479Sjb		}
1950178479Sjb
1951178479Sjb		aggdata->dtada_data = zdata;
1952178479Sjb	}
1953178479Sjb
1954178479Sjb	/*
1955178479Sjb	 * Now that we've dealt with setting up our zero-filled data, we can
1956178479Sjb	 * allocate our sorted array, and take another pass over the data to
1957178479Sjb	 * fill it.
1958178479Sjb	 */
1959178479Sjb	sorted = dt_alloc(dtp, nentries * sizeof (dt_ahashent_t *));
1960178479Sjb
1961178479Sjb	if (sorted == NULL)
1962178479Sjb		goto out;
1963178479Sjb
1964178479Sjb	for (h = hash->dtah_all, i = 0; h != NULL; h = h->dtahe_nextall) {
1965178479Sjb		dtrace_aggvarid_t id;
1966178479Sjb
1967178479Sjb		if ((id = dt_aggregate_aggvarid(h)) > max || !map[id])
1968178479Sjb			continue;
1969178479Sjb
1970178479Sjb		sorted[i++] = h;
1971178479Sjb	}
1972178479Sjb
1973178479Sjb	assert(i == nentries);
1974178479Sjb
1975178479Sjb	/*
1976178479Sjb	 * We've loaded our array; now we need to sort by value to allow us
1977178479Sjb	 * to create bundles of like value.  We're going to acquire the
1978178479Sjb	 * dt_qsort_lock here, and hold it across all of our subsequent
1979178479Sjb	 * comparison and sorting.
1980178479Sjb	 */
1981178479Sjb	(void) pthread_mutex_lock(&dt_qsort_lock);
1982178479Sjb
1983178479Sjb	qsort(sorted, nentries, sizeof (dt_ahashent_t *),
1984178479Sjb	    dt_aggregate_keyvarcmp);
1985178479Sjb
1986178479Sjb	/*
1987178479Sjb	 * Now we need to go through and create bundles.  Because the number
1988178479Sjb	 * of bundles is bounded by the size of the sorted array, we're going
1989178479Sjb	 * to reuse the underlying storage.  And note that "bundle" is an
1990178479Sjb	 * array of pointers to arrays of pointers to dt_ahashent_t -- making
1991178479Sjb	 * its type (regrettably) "dt_ahashent_t ***".  (Regrettable because
1992178479Sjb	 * '*' -- like '_' and 'X' -- should never appear in triplicate in
1993178479Sjb	 * an ideal world.)
1994178479Sjb	 */
1995178479Sjb	bundle = (dt_ahashent_t ***)sorted;
1996178479Sjb
1997178479Sjb	for (i = 1, start = 0; i <= nentries; i++) {
1998178479Sjb		if (i < nentries &&
1999178479Sjb		    dt_aggregate_keycmp(&sorted[i], &sorted[i - 1]) == 0)
2000178479Sjb			continue;
2001178479Sjb
2002178479Sjb		/*
2003178479Sjb		 * We have a bundle boundary.  Everything from start to
2004178479Sjb		 * (i - 1) belongs in one bundle.
2005178479Sjb		 */
2006178479Sjb		assert(i - start <= naggvars);
2007178479Sjb		bundlesize = (naggvars + 2) * sizeof (dt_ahashent_t *);
2008178479Sjb
2009178479Sjb		if ((nbundle = dt_zalloc(dtp, bundlesize)) == NULL) {
2010178479Sjb			(void) pthread_mutex_unlock(&dt_qsort_lock);
2011178479Sjb			goto out;
2012178479Sjb		}
2013178479Sjb
2014178479Sjb		for (j = start; j < i; j++) {
2015178479Sjb			dtrace_aggvarid_t id = dt_aggregate_aggvarid(sorted[j]);
2016178479Sjb
2017178479Sjb			assert(id <= max);
2018178479Sjb			assert(map[id] != 0);
2019178479Sjb			assert(map[id] - 1 < naggvars);
2020178479Sjb			assert(nbundle[map[id] - 1] == NULL);
2021178479Sjb			nbundle[map[id] - 1] = sorted[j];
2022178479Sjb
2023178479Sjb			if (nbundle[naggvars] == NULL)
2024178479Sjb				nbundle[naggvars] = sorted[j];
2025178479Sjb		}
2026178479Sjb
2027178479Sjb		for (j = 0; j < naggvars; j++) {
2028178479Sjb			if (nbundle[j] != NULL)
2029178479Sjb				continue;
2030178479Sjb
2031178479Sjb			/*
2032178479Sjb			 * Before we assume that this aggregation variable
2033178479Sjb			 * isn't present (and fall back to using the
2034178479Sjb			 * zero-filled data allocated earlier), check the
2035178479Sjb			 * remap.  If we have a remapping, we'll drop it in
2036178479Sjb			 * here.  Note that we might be remapping an
2037178479Sjb			 * aggregation variable that isn't present for this
2038178479Sjb			 * key; in this case, the aggregation data that we
2039178479Sjb			 * copy will point to the zeroed data.
2040178479Sjb			 */
2041178479Sjb			if (remap != NULL && remap[j]) {
2042178479Sjb				assert(remap[j] - 1 < j);
2043178479Sjb				assert(nbundle[remap[j] - 1] != NULL);
2044178479Sjb				nbundle[j] = nbundle[remap[j] - 1];
2045178479Sjb			} else {
2046178479Sjb				nbundle[j] = &zaggdata[j];
2047178479Sjb			}
2048178479Sjb		}
2049178479Sjb
2050178479Sjb		bundle[nbundles++] = nbundle;
2051178479Sjb		start = i;
2052178479Sjb	}
2053178479Sjb
2054178479Sjb	/*
2055178479Sjb	 * Now we need to re-sort based on the first value.
2056178479Sjb	 */
2057178479Sjb	dt_aggregate_qsort(dtp, bundle, nbundles, sizeof (dt_ahashent_t **),
2058178479Sjb	    dt_aggregate_bundlecmp);
2059178479Sjb
2060178479Sjb	(void) pthread_mutex_unlock(&dt_qsort_lock);
2061178479Sjb
2062178479Sjb	/*
2063178479Sjb	 * We're done!  Now we just need to go back over the sorted bundles,
2064178479Sjb	 * calling the function.
2065178479Sjb	 */
2066178479Sjb	data = alloca((naggvars + 1) * sizeof (dtrace_aggdata_t *));
2067178479Sjb
2068178479Sjb	for (i = 0; i < nbundles; i++) {
2069178479Sjb		for (j = 0; j < naggvars; j++)
2070178479Sjb			data[j + 1] = NULL;
2071178479Sjb
2072178479Sjb		for (j = 0; j < naggvars; j++) {
2073178479Sjb			int ndx = j - sortpos;
2074178479Sjb
2075178479Sjb			if (ndx < 0)
2076178479Sjb				ndx += naggvars;
2077178479Sjb
2078178479Sjb			assert(bundle[i][ndx] != NULL);
2079178479Sjb			data[j + 1] = &bundle[i][ndx]->dtahe_data;
2080178479Sjb		}
2081178479Sjb
2082178479Sjb		for (j = 0; j < naggvars; j++)
2083178479Sjb			assert(data[j + 1] != NULL);
2084178479Sjb
2085178479Sjb		/*
2086178479Sjb		 * The representative key is the last element in the bundle.
2087178479Sjb		 * Assert that we have one, and then set it to be the first
2088178479Sjb		 * element of data.
2089178479Sjb		 */
2090178479Sjb		assert(bundle[i][j] != NULL);
2091178479Sjb		data[0] = &bundle[i][j]->dtahe_data;
2092178479Sjb
2093178479Sjb		if ((rval = func(data, naggvars + 1, arg)) == -1)
2094178479Sjb			goto out;
2095178479Sjb	}
2096178479Sjb
2097178479Sjb	rval = 0;
2098178479Sjbout:
2099178479Sjb	for (i = 0; i < nbundles; i++)
2100178479Sjb		dt_free(dtp, bundle[i]);
2101178479Sjb
2102178479Sjb	if (zaggdata != NULL) {
2103178479Sjb		for (i = 0; i < naggvars; i++)
2104178479Sjb			dt_free(dtp, zaggdata[i].dtahe_data.dtada_data);
2105178479Sjb	}
2106178479Sjb
2107178479Sjb	dt_free(dtp, zaggdata);
2108178479Sjb	dt_free(dtp, sorted);
2109178479Sjb	dt_free(dtp, remap);
2110178479Sjb	dt_free(dtp, map);
2111178479Sjb
2112178479Sjb	return (rval);
2113178479Sjb}
2114178479Sjb
2115178479Sjbint
2116178479Sjbdtrace_aggregate_print(dtrace_hdl_t *dtp, FILE *fp,
2117178479Sjb    dtrace_aggregate_walk_f *func)
2118178479Sjb{
2119178479Sjb	dt_print_aggdata_t pd;
2120178479Sjb
2121268578Srpaulo	bzero(&pd, sizeof (pd));
2122268578Srpaulo
2123178479Sjb	pd.dtpa_dtp = dtp;
2124178479Sjb	pd.dtpa_fp = fp;
2125178479Sjb	pd.dtpa_allunprint = 1;
2126178479Sjb
2127178479Sjb	if (func == NULL)
2128178479Sjb		func = dtrace_aggregate_walk_sorted;
2129178479Sjb
2130178479Sjb	if ((*func)(dtp, dt_print_agg, &pd) == -1)
2131178479Sjb		return (dt_set_errno(dtp, dtp->dt_errno));
2132178479Sjb
2133178479Sjb	return (0);
2134178479Sjb}
2135178479Sjb
2136178479Sjbvoid
2137178479Sjbdtrace_aggregate_clear(dtrace_hdl_t *dtp)
2138178479Sjb{
2139178479Sjb	dt_aggregate_t *agp = &dtp->dt_aggregate;
2140178479Sjb	dt_ahash_t *hash = &agp->dtat_hash;
2141178479Sjb	dt_ahashent_t *h;
2142178479Sjb	dtrace_aggdata_t *data;
2143178479Sjb	dtrace_aggdesc_t *aggdesc;
2144178479Sjb	dtrace_recdesc_t *rec;
2145178479Sjb	int i, max_cpus = agp->dtat_maxcpu;
2146178479Sjb
2147178479Sjb	for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
2148178479Sjb		aggdesc = h->dtahe_data.dtada_desc;
2149178479Sjb		rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
2150178479Sjb		data = &h->dtahe_data;
2151178479Sjb
2152178479Sjb		bzero(&data->dtada_data[rec->dtrd_offset], rec->dtrd_size);
2153178479Sjb
2154178479Sjb		if (data->dtada_percpu == NULL)
2155178479Sjb			continue;
2156178479Sjb
2157178479Sjb		for (i = 0; i < max_cpus; i++)
2158178479Sjb			bzero(data->dtada_percpu[i], rec->dtrd_size);
2159178479Sjb	}
2160178479Sjb}
2161178479Sjb
2162178479Sjbvoid
2163178479Sjbdt_aggregate_destroy(dtrace_hdl_t *dtp)
2164178479Sjb{
2165178479Sjb	dt_aggregate_t *agp = &dtp->dt_aggregate;
2166178479Sjb	dt_ahash_t *hash = &agp->dtat_hash;
2167178479Sjb	dt_ahashent_t *h, *next;
2168178479Sjb	dtrace_aggdata_t *aggdata;
2169178479Sjb	int i, max_cpus = agp->dtat_maxcpu;
2170178479Sjb
2171178479Sjb	if (hash->dtah_hash == NULL) {
2172178479Sjb		assert(hash->dtah_all == NULL);
2173178479Sjb	} else {
2174178479Sjb		free(hash->dtah_hash);
2175178479Sjb
2176178479Sjb		for (h = hash->dtah_all; h != NULL; h = next) {
2177178479Sjb			next = h->dtahe_nextall;
2178178479Sjb
2179178479Sjb			aggdata = &h->dtahe_data;
2180178479Sjb
2181178479Sjb			if (aggdata->dtada_percpu != NULL) {
2182178479Sjb				for (i = 0; i < max_cpus; i++)
2183178479Sjb					free(aggdata->dtada_percpu[i]);
2184178479Sjb				free(aggdata->dtada_percpu);
2185178479Sjb			}
2186178479Sjb
2187178479Sjb			free(aggdata->dtada_data);
2188178479Sjb			free(h);
2189178479Sjb		}
2190178479Sjb
2191178479Sjb		hash->dtah_hash = NULL;
2192178479Sjb		hash->dtah_all = NULL;
2193178479Sjb		hash->dtah_size = 0;
2194178479Sjb	}
2195178479Sjb
2196178479Sjb	free(agp->dtat_buf.dtbd_data);
2197178479Sjb	free(agp->dtat_cpus);
2198178479Sjb}
2199