brand.c revision 12613:4c5722bc28dc
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25#include <sys/kmem.h>
26#include <sys/errno.h>
27#include <sys/systm.h>
28#include <sys/cmn_err.h>
29#include <sys/brand.h>
30#include <sys/machbrand.h>
31#include <sys/modctl.h>
32#include <sys/rwlock.h>
33#include <sys/zone.h>
34#include <sys/pathname.h>
35
36#define	SUPPORTED_BRAND_VERSION BRAND_VER_1
37
38#if defined(__sparcv9)
39/* sparcv9 uses system wide brand interposition hooks */
40static void brand_plat_interposition_enable(void);
41static void brand_plat_interposition_disable(void);
42
43struct brand_mach_ops native_mach_ops  = {
44		NULL, NULL
45};
46#else /* !__sparcv9 */
47struct brand_mach_ops native_mach_ops  = {
48		NULL, NULL, NULL, NULL
49};
50#endif /* !__sparcv9 */
51
52brand_t native_brand = {
53		BRAND_VER_1,
54		"native",
55		NULL,
56		&native_mach_ops
57};
58
59/*
60 * Used to maintain a list of all the brands currently loaded into the
61 * kernel.
62 */
63struct brand_list {
64	int			bl_refcnt;
65	struct brand_list	*bl_next;
66	brand_t			*bl_brand;
67};
68
69static struct brand_list *brand_list = NULL;
70
71/*
72 * This lock protects the integrity of the brand list.
73 */
74static kmutex_t brand_list_lock;
75
76void
77brand_init()
78{
79	mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL);
80	p0.p_brand = &native_brand;
81}
82
83int
84brand_register(brand_t *brand)
85{
86	struct brand_list *list, *scan;
87
88	if (brand == NULL)
89		return (EINVAL);
90
91	if (brand->b_version != SUPPORTED_BRAND_VERSION) {
92		if (brand->b_version < SUPPORTED_BRAND_VERSION) {
93			cmn_err(CE_WARN,
94			    "brand '%s' was built to run on older versions "
95			    "of Solaris.",
96			    brand->b_name);
97		} else {
98			cmn_err(CE_WARN,
99			    "brand '%s' was built to run on a newer version "
100			    "of Solaris.",
101			    brand->b_name);
102		}
103		return (EINVAL);
104	}
105
106	/* Sanity checks */
107	if (brand->b_name == NULL || brand->b_ops == NULL ||
108	    brand->b_ops->b_brandsys == NULL) {
109		cmn_err(CE_WARN, "Malformed brand");
110		return (EINVAL);
111	}
112
113	list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP);
114
115	/* Add the brand to the list of loaded brands. */
116	mutex_enter(&brand_list_lock);
117
118	/*
119	 * Check to be sure we haven't already registered this brand.
120	 */
121	for (scan = brand_list; scan != NULL; scan = scan->bl_next) {
122		if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) {
123			cmn_err(CE_WARN,
124			    "Invalid attempt to load a second instance of "
125			    "brand %s", brand->b_name);
126			mutex_exit(&brand_list_lock);
127			kmem_free(list, sizeof (struct brand_list));
128			return (EINVAL);
129		}
130	}
131
132#if defined(__sparcv9)
133	/* sparcv9 uses system wide brand interposition hooks */
134	if (brand_list == NULL)
135		brand_plat_interposition_enable();
136#endif /* __sparcv9 */
137
138	list->bl_brand = brand;
139	list->bl_refcnt = 0;
140	list->bl_next = brand_list;
141	brand_list = list;
142
143	mutex_exit(&brand_list_lock);
144
145	return (0);
146}
147
148/*
149 * The kernel module implementing this brand is being unloaded, so remove
150 * it from the list of active brands.
151 */
152int
153brand_unregister(brand_t *brand)
154{
155	struct brand_list *list, *prev;
156
157	/* Sanity checks */
158	if (brand == NULL || brand->b_name == NULL) {
159		cmn_err(CE_WARN, "Malformed brand");
160		return (EINVAL);
161	}
162
163	prev = NULL;
164	mutex_enter(&brand_list_lock);
165
166	for (list = brand_list; list != NULL; list = list->bl_next) {
167		if (list->bl_brand == brand)
168			break;
169		prev = list;
170	}
171
172	if (list == NULL) {
173		cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name);
174		mutex_exit(&brand_list_lock);
175		return (EINVAL);
176	}
177
178	if (list->bl_refcnt > 0) {
179		cmn_err(CE_WARN, "Unregistering brand %s which is still in use",
180		    brand->b_name);
181		mutex_exit(&brand_list_lock);
182		return (EBUSY);
183	}
184
185	/* Remove brand from the list */
186	if (prev != NULL)
187		prev->bl_next = list->bl_next;
188	else
189		brand_list = list->bl_next;
190
191#if defined(__sparcv9)
192	/* sparcv9 uses system wide brand interposition hooks */
193	if (brand_list == NULL)
194		brand_plat_interposition_disable();
195#endif /* __sparcv9 */
196
197	mutex_exit(&brand_list_lock);
198
199	kmem_free(list, sizeof (struct brand_list));
200
201	return (0);
202}
203
204/*
205 * Record that a zone of this brand has been instantiated.  If the kernel
206 * module implementing this brand's functionality is not present, this
207 * routine attempts to load the module as a side effect.
208 */
209brand_t *
210brand_register_zone(struct brand_attr *attr)
211{
212	struct brand_list *l = NULL;
213	ddi_modhandle_t	hdl = NULL;
214	char *modname;
215	int err = 0;
216
217	if (is_system_labeled()) {
218		cmn_err(CE_WARN,
219		    "Branded zones are not allowed on labeled systems.");
220		return (NULL);
221	}
222
223	/*
224	 * We make at most two passes through this loop.  The first time
225	 * through, we're looking to see if this is a new user of an
226	 * already loaded brand.  If the brand hasn't been loaded, we
227	 * call ddi_modopen() to force it to be loaded and then make a
228	 * second pass through the list of brands.  If we don't find the
229	 * brand the second time through it means that the modname
230	 * specified in the brand_attr structure doesn't provide the brand
231	 * specified in the brandname field.  This would suggest a bug in
232	 * the brand's config.xml file.  We close the module and return
233	 * 'NULL' to the caller.
234	 */
235	for (;;) {
236		/*
237		 * Search list of loaded brands
238		 */
239		mutex_enter(&brand_list_lock);
240		for (l = brand_list; l != NULL; l = l->bl_next)
241			if (strcmp(attr->ba_brandname,
242			    l->bl_brand->b_name) == 0)
243				break;
244		if ((l != NULL) || (hdl != NULL))
245			break;
246		mutex_exit(&brand_list_lock);
247
248		/*
249		 * We didn't find that the requested brand has been loaded
250		 * yet, so we trigger the load of the appropriate kernel
251		 * module and search the list again.
252		 */
253		modname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
254		(void) strcpy(modname, "brand/");
255		(void) strcat(modname, attr->ba_modname);
256		hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err);
257		kmem_free(modname, MAXPATHLEN);
258
259		if (err != 0)
260			return (NULL);
261	}
262
263	/*
264	 * If we found the matching brand, bump its reference count.
265	 */
266	if (l != NULL)
267		l->bl_refcnt++;
268
269	mutex_exit(&brand_list_lock);
270
271	if (hdl != NULL)
272		(void) ddi_modclose(hdl);
273
274	return ((l != NULL) ? l->bl_brand : NULL);
275}
276
277/*
278 * Return the number of zones currently using this brand.
279 */
280int
281brand_zone_count(struct brand *bp)
282{
283	struct brand_list *l;
284	int cnt = 0;
285
286	mutex_enter(&brand_list_lock);
287	for (l = brand_list; l != NULL; l = l->bl_next)
288		if (l->bl_brand == bp) {
289			cnt = l->bl_refcnt;
290			break;
291		}
292	mutex_exit(&brand_list_lock);
293
294	return (cnt);
295}
296
297void
298brand_unregister_zone(struct brand *bp)
299{
300	struct brand_list *list;
301
302	mutex_enter(&brand_list_lock);
303	for (list = brand_list; list != NULL; list = list->bl_next) {
304		if (list->bl_brand == bp) {
305			ASSERT(list->bl_refcnt > 0);
306			list->bl_refcnt--;
307			break;
308		}
309	}
310	mutex_exit(&brand_list_lock);
311}
312
313void
314brand_setbrand(proc_t *p)
315{
316	brand_t *bp = p->p_zone->zone_brand;
317
318	ASSERT(bp != NULL);
319	ASSERT(p->p_brand == &native_brand);
320
321	/*
322	 * We should only be called from exec(), when we know the process
323	 * is single-threaded.
324	 */
325	ASSERT(p->p_tlist == p->p_tlist->t_forw);
326
327	p->p_brand = bp;
328	ASSERT(PROC_IS_BRANDED(p));
329	BROP(p)->b_setbrand(p);
330}
331
332void
333brand_clearbrand(proc_t *p)
334{
335	brand_t *bp = p->p_zone->zone_brand;
336	ASSERT(bp != NULL);
337
338	/*
339	 * We should only be called from exec_common() or proc_exit(),
340	 * when we know the process is single-threaded.
341	 */
342	ASSERT(p->p_tlist == p->p_tlist->t_forw);
343
344	ASSERT(PROC_IS_BRANDED(p));
345	BROP(p)->b_proc_exit(p, p->p_tlist->t_lwp);
346	p->p_brand = &native_brand;
347}
348
349#if defined(__sparcv9)
350/*
351 * Currently, only sparc has system level brand syscall interposition.
352 * On x86 we're able to enable syscall interposition on a per-cpu basis
353 * when a branded thread is scheduled to run on a cpu.
354 */
355
356/* Local variables needed for dynamic syscall interposition support */
357static uint32_t	syscall_trap_patch_instr_orig;
358static uint32_t	syscall_trap32_patch_instr_orig;
359
360/* Trap Table syscall entry hot patch points */
361extern void	syscall_trap_patch_point(void);
362extern void	syscall_trap32_patch_point(void);
363
364/* Alternate syscall entry handlers used when branded zones are running */
365extern void	syscall_wrapper(void);
366extern void	syscall_wrapper32(void);
367
368/* Macros used to facilitate sparcv9 instruction generation */
369#define	BA_A_INSTR	0x30800000	/* ba,a addr */
370#define	DISP22(from, to) \
371	((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff)
372
373/*ARGSUSED*/
374static void
375brand_plat_interposition_enable(void)
376{
377	ASSERT(MUTEX_HELD(&brand_list_lock));
378
379	/*
380	 * Before we hot patch the kernel save the current instructions
381	 * so that we can restore them later.
382	 */
383	syscall_trap_patch_instr_orig =
384	    *(uint32_t *)syscall_trap_patch_point;
385	syscall_trap32_patch_instr_orig =
386	    *(uint32_t *)syscall_trap32_patch_point;
387
388	/*
389	 * Modify the trap table at the patch points.
390	 *
391	 * We basically replace the first instruction at the patch
392	 * point with a ba,a instruction that will transfer control
393	 * to syscall_wrapper or syscall_wrapper32 for 64-bit and
394	 * 32-bit syscalls respectively.  It's important to note that
395	 * the annul bit is set in the branch so we don't execute
396	 * the instruction directly following the one we're patching
397	 * during the branch's delay slot.
398	 *
399	 * It also doesn't matter that we're not atomically updating both
400	 * the 64 and 32 bit syscall paths at the same time since there's
401	 * no actual branded processes running on the system yet.
402	 */
403	hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
404	    BA_A_INSTR | DISP22(syscall_trap_patch_point, syscall_wrapper),
405	    4);
406	hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
407	    BA_A_INSTR | DISP22(syscall_trap32_patch_point, syscall_wrapper32),
408	    4);
409}
410
411/*ARGSUSED*/
412static void
413brand_plat_interposition_disable(void)
414{
415	ASSERT(MUTEX_HELD(&brand_list_lock));
416
417	/*
418	 * Restore the original instructions at the trap table syscall
419	 * patch points to disable the brand syscall interposition
420	 * mechanism.
421	 */
422	hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
423	    syscall_trap_patch_instr_orig, 4);
424	hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
425	    syscall_trap32_patch_instr_orig, 4);
426}
427#endif /* __sparcv9 */
428
429/*
430 * The following functions can be shared among kernel brand modules which
431 * implement Solaris-derived brands, all of which need to do similar tasks
432 * to manage the brand.
433 */
434
435#if defined(_LP64)
436static void
437Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
438{
439	bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
440	dst->e_type =		src->e_type;
441	dst->e_machine =	src->e_machine;
442	dst->e_version =	src->e_version;
443	dst->e_entry =		src->e_entry;
444	dst->e_phoff =		src->e_phoff;
445	dst->e_shoff =		src->e_shoff;
446	dst->e_flags =		src->e_flags;
447	dst->e_ehsize =		src->e_ehsize;
448	dst->e_phentsize =	src->e_phentsize;
449	dst->e_phnum =		src->e_phnum;
450	dst->e_shentsize =	src->e_shentsize;
451	dst->e_shnum =		src->e_shnum;
452	dst->e_shstrndx =	src->e_shstrndx;
453}
454#endif /* _LP64 */
455
456/*
457 * Return -1 if the cmd was not handled by this function.
458 */
459/*ARGSUSED*/
460int
461brand_solaris_cmd(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
462    struct brand *pbrand, int brandvers)
463{
464	brand_proc_data_t	*spd;
465	brand_proc_reg_t	reg;
466	proc_t			*p = curproc;
467	int			err;
468
469	/*
470	 * There is one operation that is supported for a native
471	 * process; B_EXEC_BRAND.  This brand operaion is redundant
472	 * since the kernel assumes a native process doing an exec
473	 * in a branded zone is going to run a branded processes.
474	 * hence we don't support this operation.
475	 */
476	if (cmd == B_EXEC_BRAND)
477		return (ENOSYS);
478
479	/* For all other operations this must be a branded process. */
480	if (p->p_brand == &native_brand)
481		return (ENOSYS);
482
483	ASSERT(p->p_brand == pbrand);
484	ASSERT(p->p_brand_data != NULL);
485
486	spd = (brand_proc_data_t *)p->p_brand_data;
487
488	switch ((cmd)) {
489	case B_EXEC_NATIVE:
490		err = exec_common((char *)arg1, (const char **)arg2,
491		    (const char **)arg3, EBA_NATIVE);
492		return (err);
493
494	/*
495	 * Get the address of the user-space system call handler from
496	 * the user process and attach it to the proc structure.
497	 */
498	case B_REGISTER:
499		if (p->p_model == DATAMODEL_NATIVE) {
500			if (copyin((void *)arg1, &reg, sizeof (reg)) != 0)
501				return (EFAULT);
502		}
503#if defined(_LP64)
504		else {
505			brand_common_reg32_t reg32;
506
507			if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0)
508				return (EFAULT);
509			reg.sbr_version = reg32.sbr_version;
510			reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler;
511		}
512#endif /* _LP64 */
513
514		if (reg.sbr_version != brandvers)
515			return (ENOTSUP);
516		spd->spd_handler = reg.sbr_handler;
517		return (0);
518
519	case B_ELFDATA:
520		if (p->p_model == DATAMODEL_NATIVE) {
521			if (copyout(&spd->spd_elf_data, (void *)arg1,
522			    sizeof (brand_elf_data_t)) != 0)
523				return (EFAULT);
524		}
525#if defined(_LP64)
526		else {
527			brand_elf_data32_t sed32;
528
529			sed32.sed_phdr = spd->spd_elf_data.sed_phdr;
530			sed32.sed_phent = spd->spd_elf_data.sed_phent;
531			sed32.sed_phnum = spd->spd_elf_data.sed_phnum;
532			sed32.sed_entry = spd->spd_elf_data.sed_entry;
533			sed32.sed_base = spd->spd_elf_data.sed_base;
534			sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry;
535			sed32.sed_lddata = spd->spd_elf_data.sed_lddata;
536			if (copyout(&sed32, (void *)arg1, sizeof (sed32))
537			    != 0)
538				return (EFAULT);
539		}
540#endif /* _LP64 */
541		return (0);
542
543	/*
544	 * The B_TRUSS_POINT subcommand exists so that we can see
545	 * truss output from interposed system calls that return
546	 * without first calling any other system call, meaning they
547	 * would be invisible to truss(1).
548	 * If the second argument is set non-zero, set errno to that
549	 * value as well.
550	 *
551	 * Common arguments seen with truss are:
552	 *
553	 *	arg1: syscall number
554	 *	arg2: errno
555	 */
556	case B_TRUSS_POINT:
557		return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2));
558	}
559
560	return (-1);
561}
562
563/*ARGSUSED*/
564void
565brand_solaris_copy_procdata(proc_t *child, proc_t *parent, struct brand *pbrand)
566{
567	brand_proc_data_t	*spd;
568
569	ASSERT(parent->p_brand == pbrand);
570	ASSERT(child->p_brand == pbrand);
571	ASSERT(parent->p_brand_data != NULL);
572	ASSERT(child->p_brand_data == NULL);
573
574	/*
575	 * Just duplicate all the proc data of the parent for the
576	 * child
577	 */
578	spd = kmem_alloc(sizeof (brand_proc_data_t), KM_SLEEP);
579	bcopy(parent->p_brand_data, spd, sizeof (brand_proc_data_t));
580	child->p_brand_data = spd;
581}
582
583/*ARGSUSED*/
584int
585brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
586    intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file,
587    cred_t *cred, int brand_action, struct brand *pbrand, char *bname,
588    char *brandlib, char *brandlib32, char *brandlinker, char *brandlinker32)
589{
590
591	vnode_t		*nvp;
592	Ehdr		ehdr;
593	Addr		uphdr_vaddr;
594	intptr_t	voffset;
595	int		interp;
596	int		i, err;
597	struct execenv	env;
598	struct user	*up = PTOU(curproc);
599	brand_proc_data_t	*spd;
600	brand_elf_data_t sed, *sedp;
601	char		*linker;
602	uintptr_t	lddata; /* lddata of executable's linker */
603
604	ASSERT(curproc->p_brand == pbrand);
605	ASSERT(curproc->p_brand_data != NULL);
606
607	spd = (brand_proc_data_t *)curproc->p_brand_data;
608	sedp = &spd->spd_elf_data;
609
610	args->brandname = bname;
611
612	/*
613	 * We will exec the brand library and then map in the target
614	 * application and (optionally) the brand's default linker.
615	 */
616	if (args->to_model == DATAMODEL_NATIVE) {
617		args->emulator = brandlib;
618		linker = brandlinker;
619	}
620#if defined(_LP64)
621	else {
622		args->emulator = brandlib32;
623		linker = brandlinker32;
624	}
625#endif  /* _LP64 */
626
627	if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW,
628	    NULLVPP, &nvp)) != 0) {
629		uprintf("%s: not found.", args->emulator);
630		return (err);
631	}
632
633	if (args->to_model == DATAMODEL_NATIVE) {
634		err = elfexec(nvp, uap, args, idatap, level + 1, execsz,
635		    setid, exec_file, cred, brand_action);
636	}
637#if defined(_LP64)
638	else {
639		err = elf32exec(nvp, uap, args, idatap, level + 1, execsz,
640		    setid, exec_file, cred, brand_action);
641	}
642#endif  /* _LP64 */
643	VN_RELE(nvp);
644	if (err != 0)
645		return (err);
646
647	/*
648	 * The u_auxv veCTors are set up by elfexec to point to the
649	 * brand emulation library and linker.  Save these so they can
650	 * be copied to the specific brand aux vectors.
651	 */
652	bzero(&sed, sizeof (sed));
653	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
654		switch (up->u_auxv[i].a_type) {
655		case AT_SUN_LDDATA:
656			sed.sed_lddata = up->u_auxv[i].a_un.a_val;
657			break;
658		case AT_BASE:
659			sed.sed_base = up->u_auxv[i].a_un.a_val;
660			break;
661		case AT_ENTRY:
662			sed.sed_entry = up->u_auxv[i].a_un.a_val;
663			break;
664		case AT_PHDR:
665			sed.sed_phdr = up->u_auxv[i].a_un.a_val;
666			break;
667		case AT_PHENT:
668			sed.sed_phent = up->u_auxv[i].a_un.a_val;
669			break;
670		case AT_PHNUM:
671			sed.sed_phnum = up->u_auxv[i].a_un.a_val;
672			break;
673		default:
674			break;
675		}
676	}
677	/* Make sure the emulator has an entry point */
678	ASSERT(sed.sed_entry != NULL);
679	ASSERT(sed.sed_phdr != NULL);
680
681	bzero(&env, sizeof (env));
682	if (args->to_model == DATAMODEL_NATIVE) {
683		err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr,
684		    &voffset, exec_file, &interp, &env.ex_bssbase,
685		    &env.ex_brkbase, &env.ex_brksize, NULL);
686	}
687#if defined(_LP64)
688	else {
689		Elf32_Ehdr ehdr32;
690		Elf32_Addr uphdr_vaddr32;
691		err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
692		    &voffset, exec_file, &interp, &env.ex_bssbase,
693		    &env.ex_brkbase, &env.ex_brksize, NULL);
694		Ehdr32to64(&ehdr32, &ehdr);
695
696		if (uphdr_vaddr32 == (Elf32_Addr)-1)
697			uphdr_vaddr = (Addr)-1;
698		else
699			uphdr_vaddr = uphdr_vaddr32;
700	}
701#endif  /* _LP64 */
702	if (err != 0)
703		return (err);
704
705	/*
706	 * Save off the important properties of the executable. The
707	 * brand library will ask us for this data later, when it is
708	 * initializing and getting ready to transfer control to the
709	 * brand application.
710	 */
711	if (uphdr_vaddr == (Addr)-1)
712		sedp->sed_phdr = voffset + ehdr.e_phoff;
713	else
714		sedp->sed_phdr = voffset + uphdr_vaddr;
715	sedp->sed_entry = voffset + ehdr.e_entry;
716	sedp->sed_phent = ehdr.e_phentsize;
717	sedp->sed_phnum = ehdr.e_phnum;
718
719	if (interp) {
720		if (ehdr.e_type == ET_DYN) {
721			/*
722			 * This is a shared object executable, so we
723			 * need to pick a reasonable place to put the
724			 * heap. Just don't use the first page.
725			 */
726			env.ex_brkbase = (caddr_t)PAGESIZE;
727			env.ex_bssbase = (caddr_t)PAGESIZE;
728		}
729
730		/*
731		 * If the program needs an interpreter (most do), map
732		 * it in and store relevant information about it in the
733		 * aux vector, where the brand library can find it.
734		 */
735		if ((err = lookupname(linker, UIO_SYSSPACE,
736		    FOLLOW, NULLVPP, &nvp)) != 0) {
737			uprintf("%s: not found.", brandlinker);
738			return (err);
739		}
740		if (args->to_model == DATAMODEL_NATIVE) {
741			err = mapexec_brand(nvp, args, &ehdr,
742			    &uphdr_vaddr, &voffset, exec_file, &interp,
743			    NULL, NULL, NULL, &lddata);
744		}
745#if defined(_LP64)
746		else {
747			Elf32_Ehdr ehdr32;
748			Elf32_Addr uphdr_vaddr32;
749			err = mapexec32_brand(nvp, args, &ehdr32,
750			    &uphdr_vaddr32, &voffset, exec_file, &interp,
751			    NULL, NULL, NULL, &lddata);
752			Ehdr32to64(&ehdr32, &ehdr);
753
754			if (uphdr_vaddr32 == (Elf32_Addr)-1)
755				uphdr_vaddr = (Addr)-1;
756			else
757				uphdr_vaddr = uphdr_vaddr32;
758		}
759#endif  /* _LP64 */
760		VN_RELE(nvp);
761		if (err != 0)
762			return (err);
763
764		/*
765		 * Now that we know the base address of the brand's
766		 * linker, place it in the aux vector.
767		 */
768		sedp->sed_base = voffset;
769		sedp->sed_ldentry = voffset + ehdr.e_entry;
770		sedp->sed_lddata = voffset + lddata;
771	} else {
772		/*
773		 * This program has no interpreter. The brand library
774		 * will jump to the address in the AT_SUN_BRAND_LDENTRY
775		 * aux vector, so in this case, put the entry point of
776		 * the main executable there.
777		 */
778		if (ehdr.e_type == ET_EXEC) {
779			/*
780			 * An executable with no interpreter, this must
781			 * be a statically linked executable, which
782			 * means we loaded it at the address specified
783			 * in the elf header, in which case the e_entry
784			 * field of the elf header is an absolute
785			 * address.
786			 */
787			sedp->sed_ldentry = ehdr.e_entry;
788			sedp->sed_entry = ehdr.e_entry;
789			sedp->sed_lddata = NULL;
790			sedp->sed_base = NULL;
791		} else {
792			/*
793			 * A shared object with no interpreter, we use
794			 * the calculated address from above.
795			 */
796			sedp->sed_ldentry = sedp->sed_entry;
797			sedp->sed_entry = NULL;
798			sedp->sed_phdr = NULL;
799			sedp->sed_phent = NULL;
800			sedp->sed_phnum = NULL;
801			sedp->sed_lddata = NULL;
802			sedp->sed_base = voffset;
803
804			if (ehdr.e_type == ET_DYN) {
805				/*
806				 * Delay setting the brkbase until the
807				 * first call to brk(); see elfexec()
808				 * for details.
809				 */
810				env.ex_bssbase = (caddr_t)0;
811				env.ex_brkbase = (caddr_t)0;
812				env.ex_brksize = 0;
813			}
814		}
815	}
816
817	env.ex_magic = elfmagic;
818	env.ex_vp = vp;
819	setexecenv(&env);
820
821	/*
822	 * It's time to manipulate the process aux vectors.  First
823	 * we need to update the AT_SUN_AUXFLAGS aux vector to set
824	 * the AF_SUN_NOPLM flag.
825	 */
826	if (args->to_model == DATAMODEL_NATIVE) {
827		auxv_t		auxflags_auxv;
828
829		if (copyin(args->auxp_auxflags, &auxflags_auxv,
830		    sizeof (auxflags_auxv)) != 0)
831			return (EFAULT);
832
833		ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS);
834		auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM;
835		if (copyout(&auxflags_auxv, args->auxp_auxflags,
836		    sizeof (auxflags_auxv)) != 0)
837			return (EFAULT);
838	}
839#if defined(_LP64)
840	else {
841		auxv32_t	auxflags_auxv32;
842
843		if (copyin(args->auxp_auxflags, &auxflags_auxv32,
844		    sizeof (auxflags_auxv32)) != 0)
845			return (EFAULT);
846
847		ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS);
848		auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM;
849		if (copyout(&auxflags_auxv32, args->auxp_auxflags,
850		    sizeof (auxflags_auxv32)) != 0)
851			return (EFAULT);
852	}
853#endif  /* _LP64 */
854
855	/* Second, copy out the brand specific aux vectors. */
856	if (args->to_model == DATAMODEL_NATIVE) {
857		auxv_t brand_auxv[] = {
858		    { AT_SUN_BRAND_AUX1, 0 },
859		    { AT_SUN_BRAND_AUX2, 0 },
860		    { AT_SUN_BRAND_AUX3, 0 }
861		};
862
863		ASSERT(brand_auxv[0].a_type ==
864		    AT_SUN_BRAND_COMMON_LDDATA);
865		brand_auxv[0].a_un.a_val = sed.sed_lddata;
866
867		if (copyout(&brand_auxv, args->auxp_brand,
868		    sizeof (brand_auxv)) != 0)
869			return (EFAULT);
870	}
871#if defined(_LP64)
872	else {
873		auxv32_t brand_auxv32[] = {
874		    { AT_SUN_BRAND_AUX1, 0 },
875		    { AT_SUN_BRAND_AUX2, 0 },
876		    { AT_SUN_BRAND_AUX3, 0 }
877		};
878
879		ASSERT(brand_auxv32[0].a_type == AT_SUN_BRAND_COMMON_LDDATA);
880		brand_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata;
881		if (copyout(&brand_auxv32, args->auxp_brand,
882		    sizeof (brand_auxv32)) != 0)
883			return (EFAULT);
884	}
885#endif  /* _LP64 */
886
887	/*
888	 * Third, the /proc aux vectors set up by elfexec() point to
889	 * brand emulation library and it's linker.  Copy these to the
890	 * /proc brand specific aux vector, and update the regular
891	 * /proc aux vectors to point to the executable (and it's
892	 * linker).  This will enable debuggers to access the
893	 * executable via the usual /proc or elf notes aux vectors.
894	 *
895	 * The brand emulation library's linker will get it's aux
896	 * vectors off the stack, and then update the stack with the
897	 * executable's aux vectors before jumping to the executable's
898	 * linker.
899	 *
900	 * Debugging the brand emulation library must be done from
901	 * the global zone, where the librtld_db module knows how to
902	 * fetch the brand specific aux vectors to access the brand
903	 * emulation libraries linker.
904	 */
905	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
906		ulong_t val;
907
908		switch (up->u_auxv[i].a_type) {
909		case AT_SUN_BRAND_COMMON_LDDATA:
910			up->u_auxv[i].a_un.a_val = sed.sed_lddata;
911			continue;
912		case AT_BASE:
913			val = sedp->sed_base;
914			break;
915		case AT_ENTRY:
916			val = sedp->sed_entry;
917			break;
918		case AT_PHDR:
919			val = sedp->sed_phdr;
920			break;
921		case AT_PHENT:
922			val = sedp->sed_phent;
923			break;
924		case AT_PHNUM:
925			val = sedp->sed_phnum;
926			break;
927		case AT_SUN_LDDATA:
928			val = sedp->sed_lddata;
929			break;
930		default:
931			continue;
932		}
933
934		up->u_auxv[i].a_un.a_val = val;
935		if (val == NULL) {
936			/* Hide the entry for static binaries */
937			up->u_auxv[i].a_type = AT_IGNORE;
938		}
939	}
940
941	/*
942	 * The last thing we do here is clear spd->spd_handler.  This
943	 * is important because if we're already a branded process and
944	 * if this exec succeeds, there is a window between when the
945	 * exec() first returns to the userland of the new process and
946	 * when our brand library get's initialized, during which we
947	 * don't want system calls to be re-directed to our brand
948	 * library since it hasn't been initialized yet.
949	 */
950	spd->spd_handler = NULL;
951
952	return (0);
953}
954
955void
956brand_solaris_exec(struct brand *pbrand)
957{
958	brand_proc_data_t	*spd = curproc->p_brand_data;
959
960	ASSERT(curproc->p_brand == pbrand);
961	ASSERT(curproc->p_brand_data != NULL);
962	ASSERT(ttolwp(curthread)->lwp_brand != NULL);
963
964	/*
965	 * We should only be called from exec(), when we know the process
966	 * is single-threaded.
967	 */
968	ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw);
969
970	/* Upon exec, reset our lwp brand data. */
971	(void) brand_solaris_freelwp(ttolwp(curthread), pbrand);
972	(void) brand_solaris_initlwp(ttolwp(curthread), pbrand);
973
974	/*
975	 * Upon exec, reset all the proc brand data, except for the elf
976	 * data associated with the executable we are exec'ing.
977	 */
978	spd->spd_handler = NULL;
979}
980
981int
982brand_solaris_fini(char **emul_table, struct modlinkage *modlinkage,
983    struct brand *pbrand)
984{
985	int err;
986
987	/*
988	 * If there are any zones using this brand, we can't allow it
989	 * to be unloaded.
990	 */
991	if (brand_zone_count(pbrand))
992		return (EBUSY);
993
994	kmem_free(*emul_table, NSYSCALL);
995	*emul_table = NULL;
996
997	err = mod_remove(modlinkage);
998	if (err)
999		cmn_err(CE_WARN, "Couldn't unload brand module");
1000
1001	return (err);
1002}
1003
1004/*ARGSUSED*/
1005void
1006brand_solaris_forklwp(klwp_t *p, klwp_t *c, struct brand *pbrand)
1007{
1008	ASSERT(p->lwp_procp->p_brand == pbrand);
1009	ASSERT(c->lwp_procp->p_brand == pbrand);
1010
1011	ASSERT(p->lwp_procp->p_brand_data != NULL);
1012	ASSERT(c->lwp_procp->p_brand_data != NULL);
1013
1014	/*
1015	 * Both LWPs have already had been initialized via
1016	 * brand_solaris_initlwp().
1017	 */
1018	ASSERT(p->lwp_brand != NULL);
1019	ASSERT(c->lwp_brand != NULL);
1020}
1021
1022/*ARGSUSED*/
1023void
1024brand_solaris_freelwp(klwp_t *l, struct brand *pbrand)
1025{
1026	ASSERT(l->lwp_procp->p_brand == pbrand);
1027	ASSERT(l->lwp_procp->p_brand_data != NULL);
1028	ASSERT(l->lwp_brand != NULL);
1029	l->lwp_brand = NULL;
1030}
1031
1032/*ARGSUSED*/
1033int
1034brand_solaris_initlwp(klwp_t *l, struct brand *pbrand)
1035{
1036	ASSERT(l->lwp_procp->p_brand == pbrand);
1037	ASSERT(l->lwp_procp->p_brand_data != NULL);
1038	ASSERT(l->lwp_brand == NULL);
1039	l->lwp_brand = (void *)-1;
1040	return (0);
1041}
1042
1043/*ARGSUSED*/
1044void
1045brand_solaris_lwpexit(klwp_t *l, struct brand *pbrand)
1046{
1047	proc_t  *p = l->lwp_procp;
1048
1049	ASSERT(l->lwp_procp->p_brand == pbrand);
1050	ASSERT(l->lwp_procp->p_brand_data != NULL);
1051	ASSERT(l->lwp_brand != NULL);
1052
1053	/*
1054	 * We should never be called for the last thread in a process.
1055	 * (That case is handled by brand_solaris_proc_exit().)
1056	 * Therefore this lwp must be exiting from a multi-threaded
1057	 * process.
1058	 */
1059	ASSERT(p->p_tlist != p->p_tlist->t_forw);
1060
1061	l->lwp_brand = NULL;
1062}
1063
1064/*ARGSUSED*/
1065void
1066brand_solaris_proc_exit(struct proc *p, klwp_t *l, struct brand *pbrand)
1067{
1068	ASSERT(p->p_brand == pbrand);
1069	ASSERT(p->p_brand_data != NULL);
1070
1071	/*
1072	 * We should only be called from proc_exit(), when we know that
1073	 * process is single-threaded.
1074	 */
1075	ASSERT(p->p_tlist == p->p_tlist->t_forw);
1076
1077	/* upon exit, free our lwp brand data */
1078	(void) brand_solaris_freelwp(ttolwp(curthread), pbrand);
1079
1080	/* upon exit, free our proc brand data */
1081	kmem_free(p->p_brand_data, sizeof (brand_proc_data_t));
1082	p->p_brand_data = NULL;
1083}
1084
1085void
1086brand_solaris_setbrand(proc_t *p, struct brand *pbrand)
1087{
1088	ASSERT(p->p_brand == pbrand);
1089	ASSERT(p->p_brand_data == NULL);
1090
1091	/*
1092	 * We should only be called from exec(), when we know the process
1093	 * is single-threaded.
1094	 */
1095	ASSERT(p->p_tlist == p->p_tlist->t_forw);
1096
1097	p->p_brand_data = kmem_zalloc(sizeof (brand_proc_data_t), KM_SLEEP);
1098	(void) brand_solaris_initlwp(p->p_tlist->t_lwp, pbrand);
1099}
1100