1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"@(#)dt_module.c	1.13	07/08/07 SMI"
27
28#include <sys/types.h>
29#include <sys/stat.h>
30
31#include "darwin_shim.h"
32#include "elf.h" /* In lieu of Solaris <sys/elf.h> */
33#include "decl.h"
34#include <mach-o/loader.h>
35#include <mach-o/nlist.h>
36#include <mach-o/stab.h>
37
38#include <fcntl.h>
39#define stat64 stat
40#define OBJFS_ROOT "/system/object"
41
42#include <unistd.h>
43#include <strings.h>
44#include <stdlib.h>
45#include <libelf.h>
46#include <limits.h>
47#include <assert.h>
48#include <errno.h>
49#include <dirent.h>
50#include <ctype.h>
51#include <sys/sysctl.h>
52
53#include <dtrace.h>
54
55#include <System/sys/kas_info.h>
56
57#include <dt_strtab.h>
58#include <dt_module.h>
59#include <dt_impl.h>
60
61static const char *dt_module_strtab; /* active strtab for qsort callbacks */
62
63int dtrace_kernel_path(char*, size_t);
64
65static void
66dt_module_symhash_insert(dt_module_t *dmp, const char *name, uint_t id)
67{
68	dt_sym_t *dsp = &dmp->dm_symchains[dmp->dm_symfree];
69	uint_t h;
70
71	assert(dmp->dm_symfree < dmp->dm_nsymelems + 1);
72
73	dsp->ds_symid = id;
74	h = dt_strtab_hash(name, NULL) % dmp->dm_nsymbuckets;
75	dsp->ds_next = dmp->dm_symbuckets[h];
76	dmp->dm_symbuckets[h] = dmp->dm_symfree++;
77}
78
79static uint_t
80dt_module_syminit32(dt_module_t *dmp)
81{
82	const Elf32_Sym *sym = dmp->dm_symtab.cts_data;
83	const char *base = dmp->dm_strtab.cts_data;
84	size_t ss_size = dmp->dm_strtab.cts_size;
85	uint_t i, n = dmp->dm_nsymelems;
86	uint_t asrsv = 0;
87
88	for (i = 0; i < n; i++, sym++) {
89		const char *name = base + sym->st_name;
90		uchar_t type = ELF32_ST_TYPE(sym->st_info);
91
92		if (type >= STT_NUM || type == STT_SECTION)
93			continue; /* skip sections and unknown types */
94
95		if (sym->st_name == 0 || sym->st_name >= ss_size)
96			continue; /* skip null or invalid names */
97
98		if (sym->st_value != 0 &&
99		    (ELF32_ST_BIND(sym->st_info) != STB_LOCAL || sym->st_size))
100			asrsv++; /* reserve space in the address map */
101
102		dt_module_symhash_insert(dmp, name, i);
103	}
104
105	return (asrsv);
106}
107
108static uint_t
109dt_module_syminit64(dt_module_t *dmp)
110{
111	const Elf64_Sym *sym = dmp->dm_symtab.cts_data;
112	const char *base = dmp->dm_strtab.cts_data;
113	size_t ss_size = dmp->dm_strtab.cts_size;
114	uint_t i, n = dmp->dm_nsymelems;
115	uint_t asrsv = 0;
116
117	for (i = 0; i < n; i++, sym++) {
118		const char *name = base + sym->st_name;
119		uchar_t type = ELF64_ST_TYPE(sym->st_info);
120
121		if (type >= STT_NUM || type == STT_SECTION)
122			continue; /* skip sections and unknown types */
123
124		if (sym->st_name == 0 || sym->st_name >= ss_size)
125			continue; /* skip null or invalid names */
126
127		if (sym->st_value != 0 &&
128		    (ELF64_ST_BIND(sym->st_info) != STB_LOCAL || sym->st_size))
129			asrsv++; /* reserve space in the address map */
130
131		dt_module_symhash_insert(dmp, name, i);
132	}
133
134	return (asrsv);
135}
136
137/*
138 * Sort comparison function for 32-bit symbol address-to-name lookups.  We sort
139 * symbols by value.  If values are equal, we prefer the symbol that is
140 * non-zero sized, typed, not weak, or lexically first, in that order.
141 */
142static int
143dt_module_symcomp32(const void *lp, const void *rp)
144{
145	Elf32_Sym *lhs = *((Elf32_Sym **)lp);
146	Elf32_Sym *rhs = *((Elf32_Sym **)rp);
147
148	if (lhs->st_value != rhs->st_value)
149		return (lhs->st_value > rhs->st_value ? 1 : -1);
150
151	if ((lhs->st_size == 0) != (rhs->st_size == 0))
152		return (lhs->st_size == 0 ? 1 : -1);
153
154	if ((ELF32_ST_TYPE(lhs->st_info) == STT_NOTYPE) !=
155	    (ELF32_ST_TYPE(rhs->st_info) == STT_NOTYPE))
156		return (ELF32_ST_TYPE(lhs->st_info) == STT_NOTYPE ? 1 : -1);
157
158	if ((ELF32_ST_BIND(lhs->st_info) == STB_WEAK) !=
159	    (ELF32_ST_BIND(rhs->st_info) == STB_WEAK))
160		return (ELF32_ST_BIND(lhs->st_info) == STB_WEAK ? 1 : -1);
161
162	return (strcmp(dt_module_strtab + lhs->st_name,
163	    dt_module_strtab + rhs->st_name));
164}
165
166/*
167 * Sort comparison function for 64-bit symbol address-to-name lookups.  We sort
168 * symbols by value.  If values are equal, we prefer the symbol that is
169 * non-zero sized, typed, not weak, or lexically first, in that order.
170 */
171static int
172dt_module_symcomp64(const void *lp, const void *rp)
173{
174	Elf64_Sym *lhs = *((Elf64_Sym **)lp);
175	Elf64_Sym *rhs = *((Elf64_Sym **)rp);
176
177	if (lhs->st_value != rhs->st_value)
178		return (lhs->st_value > rhs->st_value ? 1 : -1);
179
180	if ((lhs->st_size == 0) != (rhs->st_size == 0))
181		return (lhs->st_size == 0 ? 1 : -1);
182
183	if ((ELF64_ST_TYPE(lhs->st_info) == STT_NOTYPE) !=
184	    (ELF64_ST_TYPE(rhs->st_info) == STT_NOTYPE))
185		return (ELF64_ST_TYPE(lhs->st_info) == STT_NOTYPE ? 1 : -1);
186
187	if ((ELF64_ST_BIND(lhs->st_info) == STB_WEAK) !=
188	    (ELF64_ST_BIND(rhs->st_info) == STB_WEAK))
189		return (ELF64_ST_BIND(lhs->st_info) == STB_WEAK ? 1 : -1);
190
191	return (strcmp(dt_module_strtab + lhs->st_name,
192	    dt_module_strtab + rhs->st_name));
193}
194
195static void
196dt_module_symsort32(dt_module_t *dmp)
197{
198	Elf32_Sym *symtab = (Elf32_Sym *)dmp->dm_symtab.cts_data;
199	Elf32_Sym **sympp = (Elf32_Sym **)dmp->dm_asmap;
200	const dt_sym_t *dsp = dmp->dm_symchains + 1;
201	uint_t i, n = dmp->dm_symfree;
202
203	for (i = 1; i < n; i++, dsp++) {
204		Elf32_Sym *sym = symtab + dsp->ds_symid;
205		if (sym->st_value != 0 &&
206		    (ELF32_ST_BIND(sym->st_info) != STB_LOCAL || sym->st_size))
207			*sympp++ = sym;
208	}
209
210	dmp->dm_aslen = (uint_t)(sympp - (Elf32_Sym **)dmp->dm_asmap);
211	assert(dmp->dm_aslen <= dmp->dm_asrsv);
212
213	dt_module_strtab = dmp->dm_strtab.cts_data;
214	qsort(dmp->dm_asmap, dmp->dm_aslen,
215	    sizeof (Elf32_Sym *), dt_module_symcomp32);
216	dt_module_strtab = NULL;
217}
218
219static void
220dt_module_symsort64(dt_module_t *dmp)
221{
222	Elf64_Sym *symtab = (Elf64_Sym *)dmp->dm_symtab.cts_data;
223	Elf64_Sym **sympp = (Elf64_Sym **)dmp->dm_asmap;
224	const dt_sym_t *dsp = dmp->dm_symchains + 1;
225	uint_t i, n = dmp->dm_symfree;
226
227	for (i = 1; i < n; i++, dsp++) {
228		Elf64_Sym *sym = symtab + dsp->ds_symid;
229		if (sym->st_value != 0 &&
230		    (ELF64_ST_BIND(sym->st_info) != STB_LOCAL || sym->st_size))
231			*sympp++ = sym;
232	}
233
234	dmp->dm_aslen = (uint_t)(sympp - (Elf64_Sym **)dmp->dm_asmap);
235	assert(dmp->dm_aslen <= dmp->dm_asrsv);
236
237	dt_module_strtab = dmp->dm_strtab.cts_data;
238	qsort(dmp->dm_asmap, dmp->dm_aslen,
239	    sizeof (Elf64_Sym *), dt_module_symcomp64);
240	dt_module_strtab = NULL;
241}
242
243static GElf_Sym *
244dt_module_symgelf32(const Elf32_Sym *src, GElf_Sym *dst)
245{
246	if (dst != NULL) {
247		dst->st_name = src->st_name;
248		dst->st_info = src->st_info;
249		dst->st_other = src->st_other;
250		dst->st_shndx = src->st_shndx;
251		dst->st_value = src->st_value;
252		dst->st_size = src->st_size;
253	}
254
255	return (dst);
256}
257
258static GElf_Sym *
259dt_module_symgelf64(const Elf64_Sym *src, GElf_Sym *dst)
260{
261	if (dst != NULL) {
262		bcopy(src, dst, sizeof (GElf_Sym));
263        }
264
265	return (dst);
266}
267
268static GElf_Sym *
269dt_module_symname32(dt_module_t *dmp, const char *name,
270    GElf_Sym *symp, uint_t *idp)
271{
272	const Elf32_Sym *symtab = dmp->dm_symtab.cts_data;
273	const char *strtab = dmp->dm_strtab.cts_data;
274
275	const Elf32_Sym *sym;
276	const dt_sym_t *dsp;
277	uint_t i, h;
278
279	if (dmp->dm_nsymelems == 0)
280		return (NULL);
281
282	h = dt_strtab_hash(name, NULL) % dmp->dm_nsymbuckets;
283
284	for (i = dmp->dm_symbuckets[h]; i != 0; i = dsp->ds_next) {
285		dsp = &dmp->dm_symchains[i];
286		sym = symtab + dsp->ds_symid;
287
288		if (strcmp(name, strtab + sym->st_name) == 0) {
289			if (idp != NULL)
290				*idp = dsp->ds_symid;
291			return (dt_module_symgelf32(sym, symp));
292		}
293	}
294
295	return (NULL);
296}
297
298static GElf_Sym *
299dt_module_symname64(dt_module_t *dmp, const char *name,
300    GElf_Sym *symp, uint_t *idp)
301{
302	const Elf64_Sym *symtab = dmp->dm_symtab.cts_data;
303	const char *strtab = dmp->dm_strtab.cts_data;
304
305	const Elf64_Sym *sym;
306	const dt_sym_t *dsp;
307	uint_t i, h;
308
309	if (dmp->dm_nsymelems == 0)
310		return (NULL);
311
312	h = dt_strtab_hash(name, NULL) % dmp->dm_nsymbuckets;
313
314	for (i = dmp->dm_symbuckets[h]; i != 0; i = dsp->ds_next) {
315		dsp = &dmp->dm_symchains[i];
316		sym = symtab + dsp->ds_symid;
317
318		if (strcmp(name, strtab + sym->st_name) == 0) {
319			if (idp != NULL)
320				*idp = dsp->ds_symid;
321			return (dt_module_symgelf64(sym, symp));
322		}
323	}
324
325	return (NULL);
326}
327
328static GElf_Sym *
329dt_module_symaddr32(dt_module_t *dmp, GElf_Addr addr,
330    GElf_Sym *symp, uint_t *idp)
331{
332	const Elf32_Sym **asmap = (const Elf32_Sym **)dmp->dm_asmap;
333	const Elf32_Sym *symtab = dmp->dm_symtab.cts_data;
334	const Elf32_Sym *sym;
335
336	uint_t i, mid, lo = 0, hi = dmp->dm_aslen - 1;
337	Elf32_Addr v;
338
339	if (dmp->dm_aslen == 0)
340		return (NULL);
341
342	while (hi - lo > 1) {
343		mid = (lo + hi) / 2;
344		if (addr >= asmap[mid]->st_value)
345			lo = mid;
346		else
347			hi = mid;
348	}
349
350	i = addr < asmap[hi]->st_value ? lo : hi;
351	sym = asmap[i];
352	v = sym->st_value;
353
354	/*
355	 * If the previous entry has the same value, improve our choice.  The
356	 * order of equal-valued symbols is determined by the comparison func.
357	 */
358	while (i-- != 0 && asmap[i]->st_value == v)
359		sym = asmap[i];
360
361	if (addr - sym->st_value < MAX(sym->st_size, 1)) {
362		if (idp != NULL)
363			*idp = (uint_t)(sym - symtab);
364		return (dt_module_symgelf32(sym, symp));
365	}
366
367	return (NULL);
368}
369
370static GElf_Sym *
371dt_module_symaddr64(dt_module_t *dmp, GElf_Addr addr,
372    GElf_Sym *symp, uint_t *idp)
373{
374	const Elf64_Sym **asmap = (const Elf64_Sym **)dmp->dm_asmap;
375	const Elf64_Sym *symtab = dmp->dm_symtab.cts_data;
376	const Elf64_Sym *sym;
377
378	uint_t i, mid, lo = 0, hi = dmp->dm_aslen - 1;
379	Elf64_Addr v;
380
381	if (dmp->dm_aslen == 0)
382		return (NULL);
383
384	while (hi - lo > 1) {
385		mid = (lo + hi) / 2;
386		if (addr >= asmap[mid]->st_value)
387			lo = mid;
388		else
389			hi = mid;
390	}
391
392	i = addr < asmap[hi]->st_value ? lo : hi;
393	sym = asmap[i];
394	v = sym->st_value;
395
396	/*
397	 * If the previous entry has the same value, improve our choice.  The
398	 * order of equal-valued symbols is determined by the comparison func.
399	 */
400	while (i-- != 0 && asmap[i]->st_value == v)
401		sym = asmap[i];
402
403	if (addr - sym->st_value < MAX(sym->st_size, 1)) {
404		if (idp != NULL)
405			*idp = (uint_t)(sym - symtab);
406		return (dt_module_symgelf64(sym, symp));
407	}
408
409	return (NULL);
410}
411
412static const dt_modops_t dt_modops_32 = {
413	dt_module_syminit32,
414	dt_module_symsort32,
415	dt_module_symname32,
416	dt_module_symaddr32
417};
418
419static const dt_modops_t dt_modops_64 = {
420	dt_module_syminit64,
421	dt_module_symsort64,
422	dt_module_symname64,
423	dt_module_symaddr64
424};
425
426char
427*demangleSymbolCString(const char *mangled)
428{
429	// demangle() linked from CoreSymbolication
430	extern const char* demangle(const char*);
431	return (char*)demangle(mangled);
432}
433
434static uint_t
435dt_module_syminit_macho(dt_module_t *dmp)
436{
437	const struct nlist *sym = (const struct nlist *)(dmp->dm_symtab.cts_data);
438	const char *base = (const char *)dmp->dm_strtab.cts_data;
439	uint_t i, n = dmp->dm_nsymelems;
440	uint_t asrsv = 0;
441
442	for (i = 0; i < n; i++, sym++) {
443		const char *name = base + sym->n_un.n_strx;
444		uchar_t type = sym->n_type & (N_TYPE | N_EXT);
445
446        // Check that the symbol is a global and that it has a name.
447        if (((N_SECT | N_EXT) != type && (N_ABS | N_EXT) != type))
448            continue;
449
450		if (STT_FUNC != sym->n_desc && STT_OBJECT != sym->n_desc)
451			continue;
452
453		if (0 == sym->n_un.n_strx) // iff a null, "", name.
454			continue;
455
456		if ('_' == name[0])
457			name++; // Lop off omnipresent underscore to match DWARF convention
458
459		if (sym->n_value != 0)
460			asrsv++; /* reserve space in the address map */
461
462		dt_module_symhash_insert(dmp, name, i);
463	}
464
465	return (asrsv);
466}
467
468static int
469dt_module_symcomp_macho(const void *lp, const void *rp)
470{
471	struct nlist *lhs = *((struct nlist **)lp);
472	struct nlist *rhs = *((struct nlist **)rp);
473
474	if (lhs->n_value != rhs->n_value)
475		return (lhs->n_value > rhs->n_value ? 1 : -1);
476
477	if ((lhs->n_desc & N_WEAK_REF) != (rhs->n_desc & N_WEAK_REF))
478		return ((lhs->n_desc & N_WEAK_REF) ? 1 : -1);
479
480	return (strcmp(dt_module_strtab + lhs->n_un.n_strx,
481	    dt_module_strtab + rhs->n_un.n_strx)); // Leading underscores compare equal so leave them be
482}
483
484static void
485dt_module_symsort_macho(dt_module_t *dmp)
486{
487	struct nlist *symtab = (struct nlist *)(dmp->dm_symtab.cts_data);
488	struct nlist **sympp = (struct nlist **)dmp->dm_asmap;
489	const dt_sym_t *dsp = dmp->dm_symchains + 1;
490	uint_t i, n = dmp->dm_symfree;
491
492	for (i = 1; i < n; i++, dsp++) {
493		struct nlist *sym = symtab + dsp->ds_symid;
494		if (sym->n_value != 0)
495			*sympp++ = sym;
496	}
497
498	dmp->dm_aslen = (uint_t)(sympp - (struct nlist **)dmp->dm_asmap);
499	assert(dmp->dm_aslen <= dmp->dm_asrsv);
500
501	dt_module_strtab = ((char *)(dmp->dm_symtab.cts_data)) + dmp->dm_symtab.cts_size;
502	qsort(dmp->dm_asmap, dmp->dm_aslen,
503	    sizeof (struct nlist *), dt_module_symcomp_macho);
504	dt_module_strtab = NULL;
505}
506
507static uint64_t
508dt_module_slide()
509{
510        static bool initialized = FALSE;
511        static pthread_mutex_t guard = PTHREAD_MUTEX_INITIALIZER;
512        static uint64_t kernel_slide = 0;
513
514        if (!initialized) {
515                pthread_mutex_lock(&guard);
516                if (!initialized) {
517                        initialized = TRUE;
518
519                        size_t size = sizeof(kernel_slide);
520
521                        if (kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, &kernel_slide, &size) != KERN_SUCCESS)
522                                kernel_slide = 0;
523                }
524                pthread_mutex_unlock(&guard);
525        }
526
527        return kernel_slide;
528}
529
530static GElf_Sym *
531dt_module_symname_macho(dt_module_t *dmp, const char *name,
532    GElf_Sym *symp, uint_t *idp)
533{
534	const struct nlist *symtab = (const struct nlist *)(dmp->dm_symtab.cts_data);
535	const char *strtab = (const char *)dmp->dm_strtab.cts_data;
536
537	const struct nlist *sym;
538	const dt_sym_t *dsp;
539	uint_t i, h;
540
541	if (dmp->dm_nsymelems == 0)
542		return (NULL);
543
544	h = dt_strtab_hash(name, NULL) % dmp->dm_nsymbuckets;
545
546	for (i = dmp->dm_symbuckets[h]; i != 0; i = dsp->ds_next) {
547		dsp = &dmp->dm_symchains[i];
548		sym = symtab + dsp->ds_symid;
549		const char *sname = strtab + sym->n_un.n_strx;
550
551		if ('_' == sname[0])
552			sname++; // Lop off omnipresent underscore
553
554		if (strcmp(name, sname) == 0) {
555			if (idp != NULL)
556				*idp = dsp->ds_symid;
557
558			symp->st_name = (GElf_Sxword)(sname - strtab);
559			symp->st_info = STT_NOTYPE;
560			symp->st_other = 0;
561			symp->st_shndx = sym->n_sect;
562			symp->st_value = sym->n_value + dt_module_slide();
563			symp->st_size = 0;
564
565			if (sym->n_type & N_STAB) { /* Detect C++ methods */
566
567				switch(sym->n_type) {
568				case N_FUN:
569					symp->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_FUNC));
570					break;
571				case N_GSYM:
572					symp->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_OBJECT));
573					break;
574				default:
575					break;
576				}
577
578			} else if ((N_ABS | N_EXT) == (sym->n_type & (N_TYPE | N_EXT)) ||
579				(N_SECT | N_EXT) == (sym->n_type & (N_TYPE | N_EXT))) {
580
581				symp->st_info = GELF_ST_INFO((STB_GLOBAL), (sym->n_desc));
582			} else if ((N_UNDF | N_EXT) == (sym->n_type & (N_TYPE | N_EXT)) &&
583						sym->n_sect == NO_SECT) {
584				symp->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_OBJECT)); /* Common */
585			}
586
587			return symp;
588		}
589	}
590
591	return (NULL);
592}
593
594static GElf_Sym *
595dt_module_symaddr_macho(dt_module_t *dmp, GElf_Addr addr,
596    GElf_Sym *symp, uint_t *idp)
597{
598	const struct nlist **asmap = (const struct nlist **)dmp->dm_asmap;
599	const struct nlist *symtab = (const struct nlist *)(dmp->dm_symtab.cts_data);
600	const char *strtab = (const char *)dmp->dm_strtab.cts_data;
601	const struct nlist *sym;
602
603	uint_t i, mid, lo = 0, hi = dmp->dm_aslen - 1;
604	Elf32_Addr v;
605
606	if (dmp->dm_aslen == 0)
607		return (NULL);
608
609	while (hi - lo > 1) {
610		mid = (lo + hi) / 2;
611		if (addr >= asmap[mid]->n_value)
612			lo = mid;
613		else
614			hi = mid;
615	}
616
617	i = addr < asmap[hi]->n_value ? lo : hi;
618	sym = asmap[i];
619	v = sym->n_value + dt_module_slide();
620
621	/*
622	 * If the previous entry has the same value, improve our choice.  The
623	 * order of equal-valued symbols is determined by the comparison func.
624	 */
625	while (i-- != 0 && asmap[i]->n_value == v)
626		sym = asmap[i];
627
628	/*
629	 * Either addr < asmap[0]->n_value (in which case i is zero), or
630	 * i is the smallest index s.t. asmap[i]->n_value <= addr <= asmap[i+1]->n_value.
631	 * We'd like a st_size bounds check: if (addr - sym->n_value < MAX(sym->st_size, 1)),
632	 * but Mach-o nlist entries don't capture the size attribute, sigh.
633	 * At least make sure addr isn't too small.
634	 */
635	if (sym->n_value <= addr) {
636		const char *name = strtab + sym->n_un.n_strx;
637
638		if ('_' == name[0])
639			name++; // Lop off omnipresent underscore
640
641		symp->st_name = (GElf_Sxword)(name - strtab);
642		symp->st_info = STT_NOTYPE;
643		symp->st_other = 0;
644		symp->st_shndx = sym->n_sect;
645		symp->st_value = sym->n_value;
646		symp->st_size = 0;
647
648		if (sym->n_type & N_STAB) { /* Detect C++ methods */
649
650			switch(sym->n_type) {
651			case N_FUN:
652				symp->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_FUNC));
653				break;
654			case N_GSYM:
655				symp->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_OBJECT));
656				break;
657			default:
658				break;
659			}
660
661		} else if ((N_ABS | N_EXT) == (sym->n_type & (N_TYPE | N_EXT)) ||
662			(N_SECT | N_EXT) == (sym->n_type & (N_TYPE | N_EXT))) {
663
664			symp->st_info = GELF_ST_INFO((STB_GLOBAL), (sym->n_desc));
665		} else if ((N_UNDF | N_EXT) == (sym->n_type & (N_TYPE | N_EXT)) &&
666					sym->n_sect == NO_SECT) {
667			symp->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_OBJECT)); /* Common */
668		}
669
670		if (idp != NULL) {
671			*idp = (uint_t)(sym - symtab);
672		}
673
674		return symp;
675	}
676
677	return (NULL);
678}
679
680static const dt_modops_t dt_modops_macho_32 = {
681	dt_module_syminit_macho,
682	dt_module_symsort_macho,
683	dt_module_symname_macho,
684	dt_module_symaddr_macho
685};
686
687static uint_t
688dt_module_syminit_macho_64(dt_module_t *dmp)
689{
690	const struct nlist_64 *sym = (const struct nlist_64 *)(dmp->dm_symtab.cts_data);
691	const char *base = (const char *)dmp->dm_strtab.cts_data;
692	uint_t i, n = dmp->dm_nsymelems;
693	uint_t asrsv = 0;
694
695	for (i = 0; i < n; i++, sym++) {
696		const char *name = base + sym->n_un.n_strx;
697		uchar_t type = sym->n_type & (N_TYPE | N_EXT);
698
699        // Check that the symbol is a global and that it has a name.
700        if (((N_SECT | N_EXT) != type && (N_ABS | N_EXT) != type))
701            continue;
702
703		if (STT_FUNC != sym->n_desc && STT_OBJECT != sym->n_desc)
704			continue;
705
706		if (0 == sym->n_un.n_strx) // iff a null, "", name.
707			continue;
708
709		if ('_' == name[0])
710			name++; // Lop off omnipresent underscore to match DWARF convention
711
712		if (sym->n_value != 0)
713			asrsv++; /* reserve space in the address map */
714
715		dt_module_symhash_insert(dmp, name, i);
716	}
717
718	return (asrsv);
719}
720
721static int
722dt_module_symcomp_macho_64(const void *lp, const void *rp)
723{
724	struct nlist_64 *lhs = *((struct nlist_64 **)lp);
725	struct nlist_64 *rhs = *((struct nlist_64 **)rp);
726
727	if (lhs->n_value != rhs->n_value)
728		return (lhs->n_value > rhs->n_value ? 1 : -1);
729
730	if ((lhs->n_desc & N_WEAK_REF) != (rhs->n_desc & N_WEAK_REF))
731		return ((lhs->n_desc & N_WEAK_REF) ? 1 : -1);
732
733	return (strcmp(dt_module_strtab + lhs->n_un.n_strx,
734	    dt_module_strtab + rhs->n_un.n_strx)); // Leading underscores compare equal so leave them be
735}
736
737static void
738dt_module_symsort_macho_64(dt_module_t *dmp)
739{
740	struct nlist_64 *symtab = (struct nlist_64 *)(dmp->dm_symtab.cts_data);
741	struct nlist_64 **sympp = (struct nlist_64 **)dmp->dm_asmap;
742	const dt_sym_t *dsp = dmp->dm_symchains + 1;
743	uint_t i, n = dmp->dm_symfree;
744
745	for (i = 1; i < n; i++, dsp++) {
746		struct nlist_64 *sym = symtab + dsp->ds_symid;
747		if (sym->n_value != 0)
748			*sympp++ = sym;
749	}
750
751	dmp->dm_aslen = (uint_t)(sympp - (struct nlist_64 **)dmp->dm_asmap);
752	assert(dmp->dm_aslen <= dmp->dm_asrsv);
753
754	dt_module_strtab = ((char *)(dmp->dm_symtab.cts_data)) + dmp->dm_symtab.cts_size;
755	qsort(dmp->dm_asmap, dmp->dm_aslen,
756	    sizeof (struct nlist_64 *), dt_module_symcomp_macho_64);
757	dt_module_strtab = NULL;
758}
759
760static GElf_Sym *
761dt_module_symname_macho_64(dt_module_t *dmp, const char *name,
762    GElf_Sym *symp, uint_t *idp)
763{
764	const struct nlist_64 *symtab = (const struct nlist_64 *)(dmp->dm_symtab.cts_data);
765	const char *strtab = (const char *)dmp->dm_strtab.cts_data;
766
767	const struct nlist_64 *sym;
768	const dt_sym_t *dsp;
769	uint_t i, h;
770
771	if (dmp->dm_nsymelems == 0)
772		return (NULL);
773
774	h = dt_strtab_hash(name, NULL) % dmp->dm_nsymbuckets;
775
776	for (i = dmp->dm_symbuckets[h]; i != 0; i = dsp->ds_next) {
777		dsp = &dmp->dm_symchains[i];
778		sym = symtab + dsp->ds_symid;
779		const char *sname = strtab + sym->n_un.n_strx;
780
781		if ('_' == sname[0])
782			sname++; // Lop off omnipresent underscore
783
784		if (strcmp(name, sname) == 0) {
785			if (idp != NULL)
786				*idp = dsp->ds_symid;
787
788			symp->st_name = (GElf_Sxword)(sname - strtab);
789			symp->st_info = STT_NOTYPE;
790			symp->st_other = 0;
791			symp->st_shndx = sym->n_sect;
792			symp->st_value = sym->n_value + dt_module_slide();
793			symp->st_size = 0;
794
795			if (sym->n_type & N_STAB) { /* Detect C++ methods */
796
797				switch(sym->n_type) {
798				case N_FUN:
799					symp->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_FUNC));
800					break;
801				case N_GSYM:
802					symp->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_OBJECT));
803					break;
804				default:
805					break;
806				}
807
808			} else if ((N_ABS | N_EXT) == (sym->n_type & (N_TYPE | N_EXT)) ||
809				(N_SECT | N_EXT) == (sym->n_type & (N_TYPE | N_EXT))) {
810
811				symp->st_info = GELF_ST_INFO((STB_GLOBAL), (sym->n_desc));
812			} else if ((N_UNDF | N_EXT) == (sym->n_type & (N_TYPE | N_EXT)) &&
813						sym->n_sect == NO_SECT) {
814				symp->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_OBJECT)); /* Common */
815			}
816
817			return symp;
818		}
819	}
820
821	return (NULL);
822}
823
824static GElf_Sym *
825dt_module_symaddr_macho_64(dt_module_t *dmp, GElf_Addr addr,
826    GElf_Sym *symp, uint_t *idp)
827{
828	const struct nlist_64 **asmap = (const struct nlist_64 **)dmp->dm_asmap;
829	const struct nlist_64 *symtab = (const struct nlist_64 *)(dmp->dm_symtab.cts_data);
830	const char *strtab = (const char *)dmp->dm_strtab.cts_data;
831	const struct nlist_64 *sym;
832
833	uint_t i, mid, lo = 0, hi = dmp->dm_aslen - 1;
834	Elf32_Addr v;
835
836	if (dmp->dm_aslen == 0)
837		return (NULL);
838
839	while (hi - lo > 1) {
840		mid = (lo + hi) / 2;
841		if (addr >= asmap[mid]->n_value)
842			lo = mid;
843		else
844			hi = mid;
845	}
846
847	i = addr < asmap[hi]->n_value ? lo : hi;
848	sym = asmap[i];
849	v = sym->n_value;
850
851	/*
852	 * If the previous entry has the same value, improve our choice.  The
853	 * order of equal-valued symbols is determined by the comparison func.
854	 */
855	while (i-- != 0 && asmap[i]->n_value == v)
856		sym = asmap[i];
857
858	/*
859	 * Either addr < asmap[0]->n_value (in which case i is zero), or
860	 * i is the smallest index s.t. asmap[i]->n_value <= addr <= asmap[i+1]->n_value.
861	 * We'd like a st_size bounds check: if (addr - sym->n_value < MAX(sym->st_size, 1)),
862	 * but Mach-o nlist entries don't capture the size attribute, sigh.
863	 * At least make sure addr isn't too small.
864	 */
865	if (sym->n_value <= addr) {
866		const char *name = strtab + sym->n_un.n_strx;
867
868		if ('_' == name[0])
869			name++; // Lop off omnipresent underscore
870
871		symp->st_name = (GElf_Sxword)(name - strtab);
872		symp->st_info = STT_NOTYPE;
873		symp->st_other = 0;
874		symp->st_shndx = sym->n_sect;
875		symp->st_value = sym->n_value + dt_module_slide();
876		symp->st_size = 0;
877
878		if (sym->n_type & N_STAB) { /* Detect C++ methods */
879
880			switch(sym->n_type) {
881			case N_FUN:
882				symp->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_FUNC));
883				break;
884			case N_GSYM:
885				symp->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_OBJECT));
886				break;
887			default:
888				break;
889			}
890
891		} else if ((N_ABS | N_EXT) == (sym->n_type & (N_TYPE | N_EXT)) ||
892			(N_SECT | N_EXT) == (sym->n_type & (N_TYPE | N_EXT))) {
893
894			symp->st_info = GELF_ST_INFO((STB_GLOBAL), (sym->n_desc));
895		} else if ((N_UNDF | N_EXT) == (sym->n_type & (N_TYPE | N_EXT)) &&
896					sym->n_sect == NO_SECT) {
897			symp->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_OBJECT)); /* Common */
898		}
899
900		if (idp != NULL) {
901			*idp = (uint_t)(sym - symtab);
902		}
903
904		return symp;
905	}
906
907	return (NULL);
908}
909
910static const dt_modops_t dt_modops_macho_64 = {
911	dt_module_syminit_macho_64,
912	dt_module_symsort_macho_64,
913	dt_module_symname_macho_64,
914	dt_module_symaddr_macho_64
915};
916
917dt_module_t *
918dt_module_create(dtrace_hdl_t *dtp, const char *name)
919{
920	uint_t h = dt_strtab_hash(name, NULL) % dtp->dt_modbuckets;
921	dt_module_t *dmp;
922
923	for (dmp = dtp->dt_mods[h]; dmp != NULL; dmp = dmp->dm_next) {
924		if (strcmp(dmp->dm_name, name) == 0)
925			return (dmp);
926	}
927
928	if ((dmp = malloc(sizeof (dt_module_t))) == NULL)
929		return (NULL); /* caller must handle allocation failure */
930
931	bzero(dmp, sizeof (dt_module_t));
932	(void) strlcpy(dmp->dm_name, name, sizeof (dmp->dm_name));
933	dt_list_append(&dtp->dt_modlist, dmp);
934	dmp->dm_next = dtp->dt_mods[h];
935	dtp->dt_mods[h] = dmp;
936	dtp->dt_nmods++;
937
938	if (dtp->dt_conf.dtc_ctfmodel == CTF_MODEL_LP64)
939		dmp->dm_ops = &dt_modops_macho_64;
940	else
941		dmp->dm_ops = &dt_modops_macho_32;
942
943	return (dmp);
944}
945
946dt_module_t *
947dt_module_lookup_by_name(dtrace_hdl_t *dtp, const char *name)
948{
949	uint_t h = dt_strtab_hash(name, NULL) % dtp->dt_modbuckets;
950	dt_module_t *dmp;
951
952	for (dmp = dtp->dt_mods[h]; dmp != NULL; dmp = dmp->dm_next) {
953		if (strcmp(dmp->dm_name, name) == 0)
954			return (dmp);
955	}
956
957	return (NULL);
958}
959
960/*ARGSUSED*/
961dt_module_t *
962dt_module_lookup_by_ctf(dtrace_hdl_t *dtp, ctf_file_t *ctfp)
963{
964	return (ctfp ? ctf_getspecific(ctfp) : NULL);
965}
966
967static int
968dt_module_load_sect(dtrace_hdl_t *dtp, dt_module_t *dmp, ctf_sect_t *ctsp)
969{
970	const char *s;
971	size_t shstrs;
972	GElf_Shdr sh;
973	Elf_Data *dp;
974	Elf_Scn *sp;
975
976	if (elf_getshstrndx(dmp->dm_elf, &shstrs) == 0)
977		return (dt_set_errno(dtp, EDT_NOTLOADED));
978
979	for (sp = NULL; (sp = elf_nextscn(dmp->dm_elf, sp)) != NULL; ) {
980		if (gelf_getshdr(sp, &sh) == NULL || sh.sh_type == SHT_NULL ||
981		    (s = elf_strptr(dmp->dm_elf, shstrs, sh.sh_name)) == NULL)
982			continue; /* skip any malformed sections */
983
984		if (sh.sh_type == ctsp->cts_type &&
985		    sh.sh_entsize == ctsp->cts_entsize &&
986		    strcmp(s, ctsp->cts_name) == 0)
987			break; /* section matches specification */
988	}
989
990	/*
991	 * If the section isn't found, return success but leave cts_data set
992	 * to NULL and cts_size set to zero for our caller.
993	 */
994	if (sp == NULL || (dp = elf_getdata(sp, NULL)) == NULL)
995		return (0);
996
997	ctsp->cts_data = dp->d_buf;
998	ctsp->cts_size = dp->d_size;
999
1000	dt_dprintf("loaded %s [%s] (%lu bytes)\n",
1001	    dmp->dm_name, ctsp->cts_name, (ulong_t)ctsp->cts_size);
1002
1003	return (0);
1004}
1005
1006int
1007dt_module_load(dtrace_hdl_t *dtp, dt_module_t *dmp)
1008{
1009	if (dmp->dm_flags & DT_DM_LOADED)
1010		return (0); /* module is already loaded */
1011
1012	dmp->dm_ctdata.cts_name = ".SUNW_ctf";
1013	dmp->dm_ctdata.cts_type = SHT_UNKNOWN12;
1014	dmp->dm_ctdata.cts_flags = 0;
1015	dmp->dm_ctdata.cts_data = NULL;
1016	dmp->dm_ctdata.cts_size = 0;
1017	dmp->dm_ctdata.cts_entsize = 0;
1018	dmp->dm_ctdata.cts_offset = 0;
1019
1020	dmp->dm_symtab.cts_name = ".symtab";
1021	dmp->dm_symtab.cts_type = SHT_STRTAB;
1022	dmp->dm_symtab.cts_flags = 0;
1023	dmp->dm_symtab.cts_data = NULL;
1024	dmp->dm_symtab.cts_size = 0;
1025	dmp->dm_symtab.cts_entsize =
1026		dmp->dm_ops == &dt_modops_macho_32 ? sizeof(struct nlist) :
1027		dmp->dm_ops == &dt_modops_macho_64 ? sizeof(struct nlist_64) :
1028		(dmp->dm_ops == &dt_modops_64 ? sizeof (Elf64_Sym) : sizeof (Elf32_Sym));
1029	dmp->dm_symtab.cts_offset = 0;
1030
1031	dmp->dm_strtab.cts_name = ".dir_str_table";
1032	dmp->dm_strtab.cts_type = SHT_STRTAB;
1033	dmp->dm_strtab.cts_flags = 0;
1034	dmp->dm_strtab.cts_data = NULL;
1035	dmp->dm_strtab.cts_size = 0;
1036	dmp->dm_strtab.cts_entsize = 0;
1037	dmp->dm_strtab.cts_offset = 0;
1038
1039	/*
1040	 * Attempt to load the module's CTF section, symbol table section, and
1041	 * string table section.  Note that modules may not contain CTF data:
1042	 * this will result in a successful load_sect but data of size zero.
1043	 * We will then fail if dt_module_getctf() is called, as shown below.
1044	 */
1045	if (dt_module_load_sect(dtp, dmp, &dmp->dm_ctdata) == -1 ||
1046	    dt_module_load_sect(dtp, dmp, &dmp->dm_symtab) == -1 ||
1047	    dt_module_load_sect(dtp, dmp, &dmp->dm_strtab) == -1) {
1048		dt_module_unload(dtp, dmp);
1049		return (-1); /* dt_errno is set for us */
1050	}
1051
1052	/*
1053	 * Allocate the hash chains and hash buckets for symbol name lookup.
1054	 * This is relatively simple since the symbol table is of fixed size
1055	 * and is known in advance.  We allocate one extra element since we
1056	 * use element indices instead of pointers and zero is our sentinel.
1057	 */
1058	dmp->dm_nsymelems =
1059	    dmp->dm_symtab.cts_size / dmp->dm_symtab.cts_entsize;
1060
1061	dmp->dm_nsymbuckets = _dtrace_strbuckets;
1062	dmp->dm_symfree = 1;		/* first free element is index 1 */
1063
1064	dmp->dm_symbuckets = malloc(sizeof (uint_t) * dmp->dm_nsymbuckets);
1065	dmp->dm_symchains = malloc(sizeof (dt_sym_t) * dmp->dm_nsymelems + 1);
1066
1067	if (dmp->dm_symbuckets == NULL || dmp->dm_symchains == NULL) {
1068		dt_module_unload(dtp, dmp);
1069		return (dt_set_errno(dtp, EDT_NOMEM));
1070	}
1071
1072	bzero(dmp->dm_symbuckets, sizeof (uint_t) * dmp->dm_nsymbuckets);
1073	bzero(dmp->dm_symchains, sizeof (dt_sym_t) * dmp->dm_nsymelems + 1);
1074
1075	/*
1076	 * Iterate over the symbol table data buffer and insert each symbol
1077	 * name into the name hash if the name and type are valid.  Then
1078	 * allocate the address map, fill it in, and sort it.
1079	 */
1080	dmp->dm_asrsv = dmp->dm_ops->do_syminit(dmp);
1081
1082	dt_dprintf("hashed %s [%s] (%u symbols)\n",
1083	    dmp->dm_name, dmp->dm_symtab.cts_name, dmp->dm_symfree - 1);
1084
1085	if ((dmp->dm_asmap = malloc(sizeof (void *) * dmp->dm_asrsv)) == NULL) {
1086		dt_module_unload(dtp, dmp);
1087		return (dt_set_errno(dtp, EDT_NOMEM));
1088	}
1089
1090	dmp->dm_ops->do_symsort(dmp);
1091
1092	dt_dprintf("sorted %s [%s] (%u symbols)\n",
1093	    dmp->dm_name, dmp->dm_symtab.cts_name, dmp->dm_aslen);
1094
1095	dmp->dm_flags |= DT_DM_LOADED;
1096	return (0);
1097}
1098
1099ctf_file_t *
1100dt_module_getctf(dtrace_hdl_t *dtp, dt_module_t *dmp)
1101{
1102	const char *parent;
1103	dt_module_t *pmp;
1104	ctf_file_t *pfp;
1105	int model;
1106
1107	if (dmp->dm_ctfp != NULL || dt_module_load(dtp, dmp) != 0)
1108		return (dmp->dm_ctfp);
1109
1110	if (dmp->dm_ops == &dt_modops_macho_64)
1111		model = CTF_MODEL_LP64;
1112	else if (dmp->dm_ops == &dt_modops_macho_32)
1113		model = CTF_MODEL_ILP32;
1114	else
1115	if (dmp->dm_ops == &dt_modops_64)
1116		model = CTF_MODEL_LP64;
1117	else
1118		model = CTF_MODEL_ILP32;
1119
1120	if (dmp->dm_ctdata.cts_size == 0) {
1121		(void) dt_set_errno(dtp, EDT_NOCTF);
1122		return (NULL);
1123	}
1124
1125	dmp->dm_ctfp = ctf_bufopen(&dmp->dm_ctdata,
1126	    &dmp->dm_symtab, &dmp->dm_strtab, &dtp->dt_ctferr);
1127
1128	if (dmp->dm_ctfp == NULL) {
1129		(void) dt_set_errno(dtp, EDT_CTF);
1130		return (NULL);
1131	}
1132
1133	(void) ctf_setmodel(dmp->dm_ctfp, model);
1134	ctf_setspecific(dmp->dm_ctfp, dmp);
1135
1136	if ((parent = ctf_parent_name(dmp->dm_ctfp)) != NULL) {
1137		if ((pmp = dt_module_create(dtp, parent)) == NULL ||
1138		    (pfp = dt_module_getctf(dtp, pmp)) == NULL) {
1139			if (pmp == NULL)
1140				(void) dt_set_errno(dtp, EDT_NOMEM);
1141			goto err;
1142		}
1143
1144		if (ctf_import(dmp->dm_ctfp, pfp) == CTF_ERR) {
1145			dtp->dt_ctferr = ctf_errno(dmp->dm_ctfp);
1146			(void) dt_set_errno(dtp, EDT_CTF);
1147			goto err;
1148		}
1149	}
1150
1151	dt_dprintf("loaded CTF container for %s (%p)\n",
1152	    dmp->dm_name, (void *)dmp->dm_ctfp);
1153
1154	return (dmp->dm_ctfp);
1155
1156err:
1157	ctf_close(dmp->dm_ctfp);
1158	dmp->dm_ctfp = NULL;
1159	return (NULL);
1160}
1161
1162/*ARGSUSED*/
1163void
1164dt_module_unload(dtrace_hdl_t *dtp, dt_module_t *dmp)
1165{
1166	ctf_close(dmp->dm_ctfp);
1167	dmp->dm_ctfp = NULL;
1168
1169	bzero(&dmp->dm_ctdata, sizeof (ctf_sect_t));
1170	bzero(&dmp->dm_symtab, sizeof (ctf_sect_t));
1171	bzero(&dmp->dm_strtab, sizeof (ctf_sect_t));
1172
1173	if (dmp->dm_symbuckets != NULL) {
1174		free(dmp->dm_symbuckets);
1175		dmp->dm_symbuckets = NULL;
1176	}
1177
1178	if (dmp->dm_symchains != NULL) {
1179		free(dmp->dm_symchains);
1180		dmp->dm_symchains = NULL;
1181	}
1182
1183	if (dmp->dm_asmap != NULL) {
1184		free(dmp->dm_asmap);
1185		dmp->dm_asmap = NULL;
1186	}
1187
1188	dmp->dm_symfree = 0;
1189	dmp->dm_nsymbuckets = 0;
1190	dmp->dm_nsymelems = 0;
1191	dmp->dm_asrsv = 0;
1192	dmp->dm_aslen = 0;
1193
1194	dmp->dm_text_va = (GElf_Addr)0;
1195	dmp->dm_text_size = 0;
1196	dmp->dm_data_va = (GElf_Addr)0;
1197	dmp->dm_data_size = 0;
1198	dmp->dm_bss_va = (GElf_Addr)0;
1199	dmp->dm_bss_size = 0;
1200
1201	if (dmp->dm_extern != NULL) {
1202		dt_idhash_destroy(dmp->dm_extern);
1203		dmp->dm_extern = NULL;
1204	}
1205
1206	(void) elf_end(dmp->dm_elf);
1207	dmp->dm_elf = NULL;
1208
1209	dmp->dm_flags &= ~DT_DM_LOADED;
1210}
1211
1212void
1213dt_module_destroy(dtrace_hdl_t *dtp, dt_module_t *dmp)
1214{
1215	dt_list_delete(&dtp->dt_modlist, dmp);
1216	assert(dtp->dt_nmods != 0);
1217	dtp->dt_nmods--;
1218
1219	dt_module_unload(dtp, dmp);
1220	free(dmp);
1221}
1222
1223/*
1224 * Insert a new external symbol reference into the specified module.  The new
1225 * symbol will be marked as undefined and is assigned a symbol index beyond
1226 * any existing cached symbols from this module.  We use the ident's di_data
1227 * field to store a pointer to a copy of the dtrace_syminfo_t for this symbol.
1228 */
1229dt_ident_t *
1230dt_module_extern(dtrace_hdl_t *dtp, dt_module_t *dmp,
1231    const char *name, const dtrace_typeinfo_t *tip)
1232{
1233	dtrace_syminfo_t *sip;
1234	dt_ident_t *idp;
1235	uint_t id;
1236
1237	if (dmp->dm_extern == NULL && (dmp->dm_extern = dt_idhash_create(
1238	    "extern", NULL, dmp->dm_nsymelems, UINT_MAX)) == NULL) {
1239		(void) dt_set_errno(dtp, EDT_NOMEM);
1240		return (NULL);
1241	}
1242
1243	if (dt_idhash_nextid(dmp->dm_extern, &id) == -1) {
1244		(void) dt_set_errno(dtp, EDT_SYMOFLOW);
1245		return (NULL);
1246	}
1247
1248	if ((sip = malloc(sizeof (dtrace_syminfo_t))) == NULL) {
1249		(void) dt_set_errno(dtp, EDT_NOMEM);
1250		return (NULL);
1251	}
1252
1253	idp = dt_idhash_insert(dmp->dm_extern, name, DT_IDENT_SYMBOL, 0, id,
1254	    _dtrace_symattr, 0, &dt_idops_thaw, NULL, dtp->dt_gen);
1255
1256	if (idp == NULL) {
1257		(void) dt_set_errno(dtp, EDT_NOMEM);
1258		free(sip);
1259		return (NULL);
1260	}
1261
1262	sip->dts_object = dmp->dm_name;
1263	sip->dts_name = idp->di_name;
1264	sip->dts_id = idp->di_id;
1265
1266	idp->di_data = sip;
1267	idp->di_ctfp = tip->dtt_ctfp;
1268	idp->di_type = tip->dtt_type;
1269
1270	return (idp);
1271}
1272
1273const char *
1274dt_module_modelname(dt_module_t *dmp)
1275{
1276	if (dmp->dm_ops == &dt_modops_macho_64)
1277		return ("64-bit");
1278	else if (dmp->dm_ops == &dt_modops_32)
1279		return ("32-bit");
1280	else if (dmp->dm_ops == &dt_modops_64)
1281		return ("64-bit");
1282	else
1283		return ("32-bit");
1284}
1285
1286/*
1287 * Update our module cache by adding an entry for the specified module 'name'.
1288 * We create the dt_module_t and populate it using /system/object/<name>/.
1289 */
1290static void
1291dt_module_update(dtrace_hdl_t *dtp, const char *name)
1292{
1293	char fname[MAXPATHLEN];
1294	struct stat64 st;
1295	int fd, err, bits;
1296
1297	dt_module_t *dmp;
1298	const char *s;
1299	size_t shstrs;
1300	GElf_Shdr sh;
1301	Elf_Data *dp;
1302	Elf_Scn *sp;
1303
1304	unsigned read_cmd = ELF_C_READ;
1305
1306	if (0 == strcmp("mach_kernel", name)) {
1307		if (dtrace_kernel_path(fname, sizeof(fname)) != 0) {
1308			dt_dprintf("failed to retrieve the kernel binary, the module cache will not be updated\n");
1309			return;
1310		}
1311
1312		read_cmd = ELF_C_RDKERNTYPE;
1313	}
1314
1315	if ((fd = open(fname, O_RDONLY)) == -1 || fstat64(fd, &st) == -1 ||
1316	    (dmp = dt_module_create(dtp, name)) == NULL) {
1317		dt_dprintf("failed to open %s: %s\n", fname, strerror(errno));
1318		(void) close(fd);
1319		return;
1320	}
1321
1322	/*
1323	 * Since the module can unload out from under us (and /system/object
1324	 * will return ENOENT), tell libelf to cook the entire file now and
1325	 * then close the underlying file descriptor immediately.  If this
1326	 * succeeds, we know that we can continue safely using dmp->dm_elf.
1327	 */
1328	dmp->dm_elf = elf_begin(fd, read_cmd, NULL);
1329	err = elf_cntl(dmp->dm_elf, ELF_C_FDREAD);
1330	(void) close(fd);
1331
1332	if (dmp->dm_elf == NULL || err == -1 ||
1333	    elf_getshstrndx(dmp->dm_elf, &shstrs) == 0) {
1334		dt_dprintf("failed to load %s: %s\n",
1335		    fname, elf_errmsg(elf_errno()));
1336		dt_module_destroy(dtp, dmp);
1337		return;
1338	}
1339
1340	switch (gelf_getclass(dmp->dm_elf)) {
1341	case ELFCLASS32:
1342		dmp->dm_ops = (dmp->dm_elf->ed_kind == ELF_K_MACHO ? &dt_modops_macho_32 : &dt_modops_32);
1343		bits = 32;
1344		break;
1345	case ELFCLASS64:
1346		dmp->dm_ops = (dmp->dm_elf->ed_kind == ELF_K_MACHO ? &dt_modops_macho_64 : &dt_modops_64);
1347		bits = 64;
1348		break;
1349	default:
1350		dt_dprintf("failed to load %s: unknown ELF class\n", fname);
1351		dt_module_destroy(dtp, dmp);
1352		return;
1353	}
1354
1355	/*
1356	 * Iterate over the section headers locating various sections of
1357	 * interest and use their attributes to flesh out the dt_module_t.
1358	 */
1359	for (sp = NULL; (sp = elf_nextscn(dmp->dm_elf, sp)) != NULL; ) {
1360		if (gelf_getshdr(sp, &sh) == NULL || sh.sh_type == SHT_NULL ||
1361		    (s = elf_strptr(dmp->dm_elf, shstrs, sh.sh_name)) == NULL)
1362			continue; /* skip any malformed sections */
1363
1364		if (strcmp(s, ".text") == 0) {
1365			dmp->dm_text_size = sh.sh_size;
1366			dmp->dm_text_va = sh.sh_addr;
1367		} else if (strcmp(s, ".data") == 0) {
1368			dmp->dm_data_size = sh.sh_size;
1369			dmp->dm_data_va = sh.sh_addr;
1370		} else if (strcmp(s, ".bss") == 0) {
1371			dmp->dm_bss_size = sh.sh_size;
1372			dmp->dm_bss_va = sh.sh_addr;
1373		} else if (strcmp(s, ".info") == 0 &&
1374		    (dp = elf_getdata(sp, NULL)) != NULL) {
1375			bcopy(dp->d_buf, &dmp->dm_info,
1376			    MIN(sh.sh_size, sizeof (dmp->dm_info)));
1377		} else if (strcmp(s, ".filename") == 0 &&
1378		    (dp = elf_getdata(sp, NULL)) != NULL) {
1379			(void) strlcpy(dmp->dm_file,
1380			    dp->d_buf, sizeof (dmp->dm_file));
1381		}
1382	}
1383
1384	dmp->dm_flags |= DT_DM_KERNEL;
1385	dmp->dm_modid = (int)OBJFS_MODID(st.st_ino);
1386
1387	if (dmp->dm_info.objfs_info_primary)
1388		dmp->dm_flags |= DT_DM_PRIMARY;
1389
1390	dt_dprintf("opened %d-bit module %s (%s) [%d]\n",
1391	    bits, dmp->dm_name, dmp->dm_file, dmp->dm_modid);
1392}
1393
1394/*
1395 * Unload all the loaded modules and then refresh the module cache with the
1396 * latest list of loaded modules and their address ranges.
1397 */
1398void
1399dtrace_update(dtrace_hdl_t *dtp)
1400{
1401	dt_module_t *dmp;
1402	DIR *dirp;
1403
1404	for (dmp = dt_list_next(&dtp->dt_modlist);
1405	    dmp != NULL; dmp = dt_list_next(dmp))
1406		dt_module_unload(dtp, dmp);
1407
1408	if (!(dtp->dt_oflags & DTRACE_O_NOSYS)) {
1409		dt_module_update(dtp, "mach_kernel");
1410	}
1411
1412	/*
1413	 * Look up all the macro identifiers and set di_id to the latest value.
1414	 * This code collaborates with dt_lex.l on the use of di_id.  We will
1415	 * need to implement something fancier if we need to support non-ints.
1416	 */
1417	dt_idhash_lookup(dtp->dt_macros, "egid")->di_id = getegid();
1418	dt_idhash_lookup(dtp->dt_macros, "euid")->di_id = geteuid();
1419	dt_idhash_lookup(dtp->dt_macros, "gid")->di_id = getgid();
1420	dt_idhash_lookup(dtp->dt_macros, "pid")->di_id = getpid();
1421	dt_idhash_lookup(dtp->dt_macros, "pgid")->di_id = getpgid(0);
1422	dt_idhash_lookup(dtp->dt_macros, "ppid")->di_id = getppid();
1423	dt_idhash_lookup(dtp->dt_macros, "projid")->di_id = getprojid();
1424	dt_idhash_lookup(dtp->dt_macros, "sid")->di_id = getsid(0);
1425	dt_idhash_lookup(dtp->dt_macros, "taskid")->di_id = gettaskid();
1426	dt_idhash_lookup(dtp->dt_macros, "uid")->di_id = getuid();
1427
1428	/*
1429	 * Cache the pointers to the modules representing the base executable
1430	 * and the run-time linker in the dtrace client handle. Note that on
1431	 * x86 krtld is folded into unix, so if we don't find it, use unix
1432	 * instead.
1433	 */
1434	dtp->dt_exec = dt_module_lookup_by_name(dtp, "mach_kernel");
1435	dtp->dt_rtld = dt_module_lookup_by_name(dtp, "dyld"); /* XXX to what purpose? */
1436
1437	/*
1438	 * If this is the first time we are initializing the module list,
1439	 * remove the module for genunix from the module list and then move it
1440	 * to the front of the module list.  We do this so that type and symbol
1441	 * queries encounter genunix and thereby optimize for the common case
1442	 * in dtrace_lookup_by_name() and dtrace_lookup_by_type(), below.
1443	 */
1444	if (dtp->dt_exec != NULL &&
1445	    dtp->dt_cdefs == NULL && dtp->dt_ddefs == NULL) {
1446		dt_list_delete(&dtp->dt_modlist, dtp->dt_exec);
1447		dt_list_prepend(&dtp->dt_modlist, dtp->dt_exec);
1448	}
1449}
1450
1451static dt_module_t *
1452dt_module_from_object(dtrace_hdl_t *dtp, const char *object)
1453{
1454	int err = EDT_NOMOD;
1455	dt_module_t *dmp;
1456
1457	switch ((uintptr_t)object) {
1458	case (uintptr_t)DTRACE_OBJ_EXEC:
1459		dmp = dtp->dt_exec;
1460		break;
1461	case (uintptr_t)DTRACE_OBJ_RTLD:
1462		dmp = dtp->dt_rtld;
1463		break;
1464	case (uintptr_t)DTRACE_OBJ_CDEFS:
1465		dmp = dtp->dt_cdefs;
1466		break;
1467	case (uintptr_t)DTRACE_OBJ_DDEFS:
1468		dmp = dtp->dt_ddefs;
1469		break;
1470	default:
1471		dmp = dt_module_create(dtp, object);
1472		err = EDT_NOMEM;
1473	}
1474
1475	if (dmp == NULL)
1476		(void) dt_set_errno(dtp, err);
1477
1478	return (dmp);
1479}
1480
1481/*
1482 * Exported interface to look up a symbol by name.  We return the GElf_Sym and
1483 * complete symbol information for the matching symbol.
1484 */
1485int
1486dtrace_lookup_by_name(dtrace_hdl_t *dtp, const char *object, const char *name,
1487    GElf_Sym *symp, dtrace_syminfo_t *sip)
1488{
1489	dt_module_t *dmp;
1490	dt_ident_t *idp;
1491	uint_t n, id;
1492	GElf_Sym sym;
1493
1494	uint_t mask = 0; /* mask of dt_module flags to match */
1495	uint_t bits = 0; /* flag bits that must be present */
1496
1497	if (object != DTRACE_OBJ_EVERY &&
1498	    object != DTRACE_OBJ_KMODS &&
1499	    object != DTRACE_OBJ_UMODS) {
1500		if ((dmp = dt_module_from_object(dtp, object)) == NULL)
1501			return (-1); /* dt_errno is set for us */
1502
1503		if (dt_module_load(dtp, dmp) == -1)
1504			return (-1); /* dt_errno is set for us */
1505		n = 1;
1506
1507	} else {
1508		if (object == DTRACE_OBJ_KMODS)
1509			mask = bits = DT_DM_KERNEL;
1510		else if (object == DTRACE_OBJ_UMODS)
1511			mask = DT_DM_KERNEL;
1512
1513		dmp = dt_list_next(&dtp->dt_modlist);
1514		n = dtp->dt_nmods;
1515	}
1516
1517	if (symp == NULL)
1518		symp = &sym;
1519
1520	for (; n > 0; n--, dmp = dt_list_next(dmp)) {
1521		if ((dmp->dm_flags & mask) != bits)
1522			continue; /* failed to match required attributes */
1523
1524		if (dt_module_load(dtp, dmp) == -1)
1525			continue; /* failed to load symbol table */
1526
1527		if (dmp->dm_ops->do_symname(dmp, name, symp, &id) != NULL) {
1528			if (sip != NULL) {
1529				sip->dts_object = dmp->dm_name;
1530				sip->dts_name = (const char *)
1531				    dmp->dm_strtab.cts_data + symp->st_name;
1532				sip->dts_id = id;
1533			}
1534			return (0);
1535		}
1536
1537		if (dmp->dm_extern != NULL &&
1538		    (idp = dt_idhash_lookup(dmp->dm_extern, name)) != NULL) {
1539			if (symp != &sym) {
1540				symp->st_name = (uintptr_t)idp->di_name;
1541				symp->st_info =
1542				    GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE);
1543				symp->st_other = 0;
1544				symp->st_shndx = SHN_UNDEF;
1545				symp->st_value = 0;
1546				symp->st_size =
1547				    ctf_type_size(idp->di_ctfp, idp->di_type);
1548			}
1549
1550			if (sip != NULL) {
1551				sip->dts_object = dmp->dm_name;
1552				sip->dts_name = idp->di_name;
1553				sip->dts_id = idp->di_id;
1554			}
1555
1556			return (0);
1557		}
1558	}
1559
1560	return (dt_set_errno(dtp, EDT_NOSYM));
1561}
1562
1563int
1564dtrace_lookup_by_type(dtrace_hdl_t *dtp, const char *object, const char *name,
1565    dtrace_typeinfo_t *tip)
1566{
1567	dtrace_typeinfo_t ti;
1568	dt_module_t *dmp;
1569	int found = 0;
1570	ctf_id_t id;
1571	uint_t n;
1572	int justone;
1573
1574	uint_t mask = 0; /* mask of dt_module flags to match */
1575	uint_t bits = 0; /* flag bits that must be present */
1576
1577	if (object != DTRACE_OBJ_EVERY &&
1578	    object != DTRACE_OBJ_KMODS &&
1579	    object != DTRACE_OBJ_UMODS) {
1580		if ((dmp = dt_module_from_object(dtp, object)) == NULL)
1581			return (-1); /* dt_errno is set for us */
1582
1583		if (dt_module_load(dtp, dmp) == -1)
1584			return (-1); /* dt_errno is set for us */
1585		n = 1;
1586		justone = 1;
1587
1588	} else {
1589		if (object == DTRACE_OBJ_KMODS)
1590			mask = bits = DT_DM_KERNEL;
1591		else if (object == DTRACE_OBJ_UMODS)
1592			mask = DT_DM_KERNEL;
1593
1594		dmp = dt_list_next(&dtp->dt_modlist);
1595		n = dtp->dt_nmods;
1596		justone = 0;
1597	}
1598
1599	if (tip == NULL)
1600		tip = &ti;
1601
1602	for (; n > 0; n--, dmp = dt_list_next(dmp)) {
1603		if ((dmp->dm_flags & mask) != bits)
1604			continue; /* failed to match required attributes */
1605
1606		/*
1607		 * If we can't load the CTF container, continue on to the next
1608		 * module.  If our search was scoped to only one module then
1609		 * return immediately leaving dt_errno unmodified.
1610		 */
1611		if (dt_module_getctf(dtp, dmp) == NULL) {
1612			if (justone)
1613				return (-1);
1614			continue;
1615		}
1616
1617		/*
1618		 * Look up the type in the module's CTF container.  If our
1619		 * match is a forward declaration tag, save this choice in
1620		 * 'tip' and keep going in the hope that we will locate the
1621		 * underlying structure definition.  Otherwise just return.
1622		 */
1623		if ((id = ctf_lookup_by_name(dmp->dm_ctfp, name)) != CTF_ERR) {
1624			tip->dtt_object = dmp->dm_name;
1625			tip->dtt_ctfp = dmp->dm_ctfp;
1626			tip->dtt_type = id;
1627
1628			if (ctf_type_kind(dmp->dm_ctfp, ctf_type_resolve(
1629			    dmp->dm_ctfp, id)) != CTF_K_FORWARD)
1630				return (0);
1631
1632			found++;
1633		}
1634	}
1635
1636	if (found == 0)
1637		return (dt_set_errno(dtp, EDT_NOTYPE));
1638
1639	return (0);
1640}
1641
1642int
1643dtrace_symbol_type(dtrace_hdl_t *dtp, const GElf_Sym *symp,
1644    const dtrace_syminfo_t *sip, dtrace_typeinfo_t *tip)
1645{
1646	dt_module_t *dmp;
1647
1648	tip->dtt_object = NULL;
1649	tip->dtt_ctfp = NULL;
1650	tip->dtt_type = CTF_ERR;
1651
1652	if ((dmp = dt_module_lookup_by_name(dtp, sip->dts_object)) == NULL)
1653		return (dt_set_errno(dtp, EDT_NOMOD));
1654
1655	if (symp->st_shndx == SHN_UNDEF && dmp->dm_extern != NULL) {
1656		dt_ident_t *idp =
1657		    dt_idhash_lookup(dmp->dm_extern, sip->dts_name);
1658
1659		if (idp == NULL)
1660			return (dt_set_errno(dtp, EDT_NOSYM));
1661
1662		tip->dtt_ctfp = idp->di_ctfp;
1663		tip->dtt_type = idp->di_type;
1664
1665	} else if (GELF_ST_TYPE(symp->st_info) != STT_FUNC) {
1666		if (dt_module_getctf(dtp, dmp) == NULL)
1667			return (-1); /* errno is set for us */
1668
1669		tip->dtt_ctfp = dmp->dm_ctfp;
1670		tip->dtt_type = ctf_lookup_by_symbol(dmp->dm_ctfp, sip->dts_id);
1671
1672		if (tip->dtt_type == CTF_ERR) {
1673			dtp->dt_ctferr = ctf_errno(tip->dtt_ctfp);
1674			return (dt_set_errno(dtp, EDT_CTF));
1675		}
1676
1677	} else {
1678		tip->dtt_ctfp = DT_FPTR_CTFP(dtp);
1679		tip->dtt_type = DT_FPTR_TYPE(dtp);
1680	}
1681
1682	tip->dtt_object = dmp->dm_name;
1683	return (0);
1684}
1685
1686static dtrace_objinfo_t *
1687dt_module_info(const dt_module_t *dmp, dtrace_objinfo_t *dto)
1688{
1689	dto->dto_name = dmp->dm_name;
1690	dto->dto_file = dmp->dm_file;
1691	dto->dto_id = dmp->dm_modid;
1692	dto->dto_flags = 0;
1693
1694	if (dmp->dm_flags & DT_DM_KERNEL)
1695		dto->dto_flags |= DTRACE_OBJ_F_KERNEL;
1696	if (dmp->dm_flags & DT_DM_PRIMARY)
1697		dto->dto_flags |= DTRACE_OBJ_F_PRIMARY;
1698
1699	dto->dto_text_va = dmp->dm_text_va;
1700	dto->dto_text_size = dmp->dm_text_size;
1701	dto->dto_data_va = dmp->dm_data_va;
1702	dto->dto_data_size = dmp->dm_data_size;
1703	dto->dto_bss_va = dmp->dm_bss_va;
1704	dto->dto_bss_size = dmp->dm_bss_size;
1705
1706	return (dto);
1707}
1708
1709int
1710dtrace_object_iter(dtrace_hdl_t *dtp, dtrace_obj_f *func, void *data)
1711{
1712	const dt_module_t *dmp = dt_list_next(&dtp->dt_modlist);
1713	dtrace_objinfo_t dto;
1714	int rv;
1715
1716	for (; dmp != NULL; dmp = dt_list_next(dmp)) {
1717		if ((rv = (*func)(dtp, dt_module_info(dmp, &dto), data)) != 0)
1718			return (rv);
1719	}
1720
1721	return (0);
1722}
1723
1724int
1725dtrace_object_info(dtrace_hdl_t *dtp, const char *object, dtrace_objinfo_t *dto)
1726{
1727	dt_module_t *dmp;
1728
1729	if (object == DTRACE_OBJ_EVERY || object == DTRACE_OBJ_KMODS ||
1730	    object == DTRACE_OBJ_UMODS || dto == NULL)
1731		return (dt_set_errno(dtp, EINVAL));
1732
1733	if ((dmp = dt_module_from_object(dtp, object)) == NULL)
1734		return (-1); /* dt_errno is set for us */
1735
1736	if (dt_module_load(dtp, dmp) == -1)
1737		return (-1); /* dt_errno is set for us */
1738
1739	(void) dt_module_info(dmp, dto);
1740	return (0);
1741}
1742