s10_brand.c revision 11970:9c3f3660b754
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/errno.h>
28#include <sys/exec.h>
29#include <sys/file.h>
30#include <sys/kmem.h>
31#include <sys/modctl.h>
32#include <sys/model.h>
33#include <sys/proc.h>
34#include <sys/syscall.h>
35#include <sys/systm.h>
36#include <sys/thread.h>
37#include <sys/cmn_err.h>
38#include <sys/archsystm.h>
39#include <sys/pathname.h>
40#include <sys/sunddi.h>
41
42#include <sys/machbrand.h>
43#include <sys/brand.h>
44#include "s10_brand.h"
45
46char *s10_emulation_table = NULL;
47
48void	s10_init_brand_data(zone_t *);
49void	s10_free_brand_data(zone_t *);
50void	s10_setbrand(proc_t *);
51int	s10_getattr(zone_t *, int, void *, size_t *);
52int	s10_setattr(zone_t *, int, void *, size_t);
53int	s10_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
54		uintptr_t, uintptr_t, uintptr_t);
55void	s10_copy_procdata(proc_t *, proc_t *);
56void	s10_proc_exit(struct proc *, klwp_t *);
57void	s10_exec();
58int	s10_initlwp(klwp_t *);
59void	s10_forklwp(klwp_t *, klwp_t *);
60void	s10_freelwp(klwp_t *);
61void	s10_lwpexit(klwp_t *);
62int	s10_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
63	long *, int, caddr_t, cred_t *, int);
64void	s10_sigset_native_to_s10(sigset_t *);
65void	s10_sigset_s10_to_native(sigset_t *);
66
67/* s10 brand */
68struct brand_ops s10_brops = {
69	s10_init_brand_data,
70	s10_free_brand_data,
71	s10_brandsys,
72	s10_setbrand,
73	s10_getattr,
74	s10_setattr,
75	s10_copy_procdata,
76	s10_proc_exit,
77	s10_exec,
78	lwp_setrval,
79	s10_initlwp,
80	s10_forklwp,
81	s10_freelwp,
82	s10_lwpexit,
83	s10_elfexec,
84	s10_sigset_native_to_s10,
85	s10_sigset_s10_to_native,
86	S10_NSIG,
87};
88
89#ifdef	sparc
90
91struct brand_mach_ops s10_mops = {
92	s10_brand_syscall_callback,
93	s10_brand_syscall32_callback
94};
95
96#else	/* sparc */
97
98#ifdef	__amd64
99
100struct brand_mach_ops s10_mops = {
101	s10_brand_sysenter_callback,
102	NULL,
103	s10_brand_int91_callback,
104	s10_brand_syscall_callback,
105	s10_brand_syscall32_callback,
106	NULL
107};
108
109#else	/* ! __amd64 */
110
111struct brand_mach_ops s10_mops = {
112	s10_brand_sysenter_callback,
113	NULL,
114	NULL,
115	s10_brand_syscall_callback,
116	NULL,
117	NULL
118};
119#endif	/* __amd64 */
120
121#endif	/* _sparc */
122
123struct brand	s10_brand = {
124	BRAND_VER_1,
125	"solaris10",
126	&s10_brops,
127	&s10_mops
128};
129
130static struct modlbrand modlbrand = {
131	&mod_brandops,		/* type of module */
132	"Solaris 10 Brand",	/* description of module */
133	&s10_brand		/* driver ops */
134};
135
136static struct modlinkage modlinkage = {
137	MODREV_1, (void *)&modlbrand, NULL
138};
139
140void
141s10_setbrand(proc_t *p)
142{
143	ASSERT(p->p_brand == &s10_brand);
144	ASSERT(p->p_brand_data == NULL);
145
146	/*
147	 * We should only be called from exec(), when we know the process
148	 * is single-threaded.
149	 */
150	ASSERT(p->p_tlist == p->p_tlist->t_forw);
151
152	p->p_brand_data = kmem_zalloc(sizeof (s10_proc_data_t), KM_SLEEP);
153	(void) s10_initlwp(p->p_tlist->t_lwp);
154}
155
156/*ARGSUSED*/
157int
158s10_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
159{
160	ASSERT(zone->zone_brand == &s10_brand);
161	if (attr == S10_EMUL_BITMAP) {
162		if (buf == NULL || *bufsize != sizeof (s10_emul_bitmap_t))
163			return (EINVAL);
164		if (copyout(((s10_zone_data_t *)zone->zone_brand_data)->
165		    emul_bitmap, buf, sizeof (s10_emul_bitmap_t)) != 0)
166			return (EFAULT);
167		return (0);
168	}
169
170	return (EINVAL);
171}
172
173int
174s10_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
175{
176	ASSERT(zone->zone_brand == &s10_brand);
177	if (attr == S10_EMUL_BITMAP) {
178		if (buf == NULL || bufsize != sizeof (s10_emul_bitmap_t))
179			return (EINVAL);
180		if (copyin(buf, ((s10_zone_data_t *)zone->zone_brand_data)->
181		    emul_bitmap, sizeof (s10_emul_bitmap_t)) != 0)
182			return (EFAULT);
183		return (0);
184	}
185
186	return (EINVAL);
187}
188
189#ifdef	__amd64
190/*
191 * The Nevada kernel clears %fs for threads in 64-bit x86 processes but S10's
192 * libc expects %fs to be nonzero.  This causes some committed
193 * libc/libthread interfaces (e.g., thr_main()) to fail, which impacts several
194 * libraries, including libdoor.  This function sets the specified LWP's %fs
195 * register to the legacy S10 selector value (LWPFS_SEL).
196 *
197 * The best solution to the aforementioned problem is backporting CRs
198 * 6467491 to Solaris 10 so that 64-bit x86 Solaris 10 processes
199 * would accept zero for %fs.  Backporting the CRs is a requirement for running
200 * S10 Containers in PV domUs because 64-bit Xen clears %fsbase when %fs is
201 * nonzero.  Such behavior breaks 64-bit processes because Xen has to fetch the
202 * FS segments' base addresses from the LWPs' GDTs, which are only capable of
203 * 32-bit addressing.
204 */
205/*ARGSUSED*/
206static void
207s10_amd64_correct_fsreg(klwp_t *l)
208{
209	if (lwp_getdatamodel(l) == DATAMODEL_NATIVE) {
210		kpreempt_disable();
211		l->lwp_pcb.pcb_fs = LWPFS_SEL;
212		l->lwp_pcb.pcb_rupdate = 1;
213		lwptot(l)->t_post_sys = 1;	/* Guarantee update_sregs() */
214		kpreempt_enable();
215	}
216}
217#endif	/* __amd64 */
218
219int
220s10_native()
221{
222	struct user	*up = PTOU(curproc);
223	char		*args_new, *comm_new, *p;
224	int		len;
225
226	len = sizeof (S10_NATIVE_LINKER32 " ") - 1;
227
228	/*
229	 * Make sure that the process' interpreter is the native dynamic linker.
230	 * Convention dictates that native processes executing within solaris10-
231	 * branded zones are interpreted by the native dynamic linker (the
232	 * process and its arguments are specified as arguments to the dynamic
233	 * linker).  If this convention is violated (i.e.,
234	 * brandsys(B_S10_NATIVE, ...) is invoked by a process that shouldn't be
235	 * native), then do nothing and silently indicate success.
236	 */
237	if (strcmp(up->u_comm, S10_LINKER_NAME) != 0)
238		return (0);
239	if (strncmp(up->u_psargs, S10_NATIVE_LINKER64 " /", len + 4) == 0)
240		len += 3;		/* to account for "/64" in the path */
241	else if (strncmp(up->u_psargs, S10_NATIVE_LINKER32 " /", len + 1) != 0)
242		return (0);
243
244	args_new = strdup(&up->u_psargs[len]);
245	if ((p = strchr(args_new, ' ')) != NULL)
246		*p = '\0';
247	if ((comm_new = strrchr(args_new, '/')) != NULL)
248		comm_new = strdup(comm_new + 1);
249	else
250		comm_new = strdup(args_new);
251	if (p != NULL)
252		*p = ' ';
253
254	if ((strlen(args_new) != 0) && (strlen(comm_new) != 0)) {
255		mutex_enter(&curproc->p_lock);
256		(void) strlcpy(up->u_comm, comm_new, MAXCOMLEN+1);
257		(void) strlcpy(up->u_psargs, args_new, PSARGSZ);
258		mutex_exit(&curproc->p_lock);
259	}
260
261	strfree(args_new);
262	strfree(comm_new);
263	return (0);
264}
265
266/*
267 * Get the address of the user-space system call handler from the user
268 * process and attach it to the proc structure.
269 */
270/*ARGSUSED*/
271int
272s10_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
273    uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
274{
275	s10_proc_data_t	*spd;
276	s10_brand_reg_t	reg;
277	proc_t		*p = curproc;
278	int		err;
279
280	*rval = 0;
281
282	/*
283	 * B_EXEC_BRAND is redundant
284	 * since the kernel assumes a native process doing an exec
285	 * in a branded zone is going to run a branded processes.
286	 * hence we don't support this operation.
287	 */
288	if (cmd == B_EXEC_BRAND)
289		return (ENOSYS);
290
291	if (cmd == B_S10_NATIVE)
292		return (s10_native());
293
294	/* For all other operations this must be a branded process. */
295	if (p->p_brand == &native_brand)
296		return (ENOSYS);
297
298	ASSERT(p->p_brand == &s10_brand);
299	ASSERT(p->p_brand_data != NULL);
300
301	spd = (s10_proc_data_t *)p->p_brand_data;
302
303	switch (cmd) {
304	case B_EXEC_NATIVE:
305		err = exec_common(
306		    (char *)arg1, (const char **)arg2, (const char **)arg3,
307		    EBA_NATIVE);
308		return (err);
309
310	case B_REGISTER:
311		if (p->p_model == DATAMODEL_NATIVE) {
312			if (copyin((void *)arg1, &reg, sizeof (reg)) != 0)
313				return (EFAULT);
314#if defined(_LP64)
315		} else {
316			s10_brand_reg32_t reg32;
317
318			if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0)
319				return (EFAULT);
320			reg.sbr_version = reg32.sbr_version;
321			reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler;
322#endif /* _LP64 */
323		}
324
325		if (reg.sbr_version != S10_VERSION)
326			return (ENOTSUP);
327		spd->spd_handler = reg.sbr_handler;
328		return (0);
329
330	case B_ELFDATA:
331		if (p->p_model == DATAMODEL_NATIVE) {
332			if (copyout(&spd->spd_elf_data, (void *)arg1,
333			    sizeof (s10_elf_data_t)) != 0)
334				return (EFAULT);
335#if defined(_LP64)
336		} else {
337			s10_elf_data32_t sed32;
338
339			sed32.sed_phdr = spd->spd_elf_data.sed_phdr;
340			sed32.sed_phent = spd->spd_elf_data.sed_phent;
341			sed32.sed_phnum = spd->spd_elf_data.sed_phnum;
342			sed32.sed_entry = spd->spd_elf_data.sed_entry;
343			sed32.sed_base = spd->spd_elf_data.sed_base;
344			sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry;
345			sed32.sed_lddata = spd->spd_elf_data.sed_lddata;
346			if (copyout(&sed32, (void *)arg1, sizeof (sed32)) != 0)
347				return (EFAULT);
348#endif /* _LP64 */
349		}
350		return (0);
351
352	case B_S10_PIDINFO:
353		/*
354		 * The s10 brand needs to be able to get the pid of the
355		 * current process and the pid of the zone's init, and it
356		 * needs to do this on every process startup.  Early in
357		 * brand startup, we can't call getpid() because calls to
358		 * getpid() represent a magical signal to some old-skool
359		 * debuggers.  By merging all of this into one call, we
360		 * make this quite a bit cheaper and easier to handle in
361		 * the brand module.
362		 */
363		if (copyout(&p->p_pid, (void *)arg1, sizeof (pid_t)) != 0)
364			return (EFAULT);
365		if (copyout(&p->p_zone->zone_proc_initpid, (void *)arg2,
366		    sizeof (pid_t)) != 0)
367			return (EFAULT);
368		return (0);
369
370	case B_S10_TRUSS_POINT:
371		/*
372		 * This subcommand exists so that we can see truss output
373		 * from interposed system calls that return without first
374		 * calling any other system call, meaning they would be
375		 * invisible to truss(1).
376		 *
377		 * If the second argument is set non-zero, set errno to that
378		 * value as well.
379		 *
380		 * Arguments are:
381		 *
382		 *    arg1: syscall number
383		 *    arg2: errno
384		 */
385		return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2));
386
387	case B_S10_ISFDXATTRDIR: {
388		/*
389		 * This subcommand enables the userland brand emulation library
390		 * to determine whether a file descriptor refers to an extended
391		 * file attributes directory.  There is no standard syscall or
392		 * libc function that can make such a determination.
393		 */
394		file_t *dir_filep;
395
396		dir_filep = getf((int)arg1);
397		if (dir_filep == NULL)
398			return (EBADF);
399		ASSERT(dir_filep->f_vnode != NULL);
400		*rval = IS_XATTRDIR(dir_filep->f_vnode);
401		releasef((int)arg1);
402		return (0);
403	}
404
405#ifdef	__amd64
406	case B_S10_FSREGCORRECTION:
407		/*
408		 * This subcommand exists so that the SYS_lwp_private and
409		 * SYS_lwp_create syscalls can manually set the current thread's
410		 * %fs register to the legacy S10 selector value for 64-bit x86
411		 * processes.
412		 */
413		s10_amd64_correct_fsreg(ttolwp(curthread));
414		return (0);
415#endif	/* __amd64 */
416	}
417
418	return (EINVAL);
419}
420
421/*
422 * Copy the per-process brand data from a parent proc to a child.
423 */
424void
425s10_copy_procdata(proc_t *child, proc_t *parent)
426{
427	s10_proc_data_t	*spd;
428
429	ASSERT(parent->p_brand == &s10_brand);
430	ASSERT(child->p_brand == &s10_brand);
431	ASSERT(parent->p_brand_data != NULL);
432	ASSERT(child->p_brand_data == NULL);
433
434	/* Just duplicate all the proc data of the parent for the child */
435	spd = kmem_alloc(sizeof (s10_proc_data_t), KM_SLEEP);
436	bcopy(parent->p_brand_data, spd, sizeof (s10_proc_data_t));
437	child->p_brand_data = spd;
438}
439
440/*ARGSUSED*/
441void
442s10_proc_exit(struct proc *p, klwp_t *l)
443{
444	ASSERT(p->p_brand == &s10_brand);
445	ASSERT(p->p_brand_data != NULL);
446
447	/*
448	 * We should only be called from proc_exit(), when we know that
449	 * process is single-threaded.
450	 */
451	ASSERT(p->p_tlist == p->p_tlist->t_forw);
452
453	/* upon exit, free our lwp brand data */
454	(void) s10_freelwp(ttolwp(curthread));
455
456	/* upon exit, free our proc brand data */
457	kmem_free(p->p_brand_data, sizeof (s10_proc_data_t));
458	p->p_brand_data = NULL;
459}
460
461void
462s10_exec()
463{
464	s10_proc_data_t	*spd = curproc->p_brand_data;
465
466	ASSERT(curproc->p_brand == &s10_brand);
467	ASSERT(curproc->p_brand_data != NULL);
468	ASSERT(ttolwp(curthread)->lwp_brand != NULL);
469
470	/*
471	 * We should only be called from exec(), when we know the process
472	 * is single-threaded.
473	 */
474	ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw);
475
476	/* Upon exec, reset our lwp brand data. */
477	(void) s10_freelwp(ttolwp(curthread));
478	(void) s10_initlwp(ttolwp(curthread));
479
480	/*
481	 * Upon exec, reset all the proc brand data, except for the elf
482	 * data associated with the executable we are exec'ing.
483	 */
484	spd->spd_handler = NULL;
485}
486
487/*ARGSUSED*/
488int
489s10_initlwp(klwp_t *l)
490{
491	ASSERT(l->lwp_procp->p_brand == &s10_brand);
492	ASSERT(l->lwp_procp->p_brand_data != NULL);
493	ASSERT(l->lwp_brand == NULL);
494	l->lwp_brand = (void *)-1;
495	return (0);
496}
497
498/*ARGSUSED*/
499void
500s10_forklwp(klwp_t *p, klwp_t *c)
501{
502	ASSERT(p->lwp_procp->p_brand == &s10_brand);
503	ASSERT(c->lwp_procp->p_brand == &s10_brand);
504
505	ASSERT(p->lwp_procp->p_brand_data != NULL);
506	ASSERT(c->lwp_procp->p_brand_data != NULL);
507
508	/* Both LWPs have already had been initialized via s10_initlwp() */
509	ASSERT(p->lwp_brand != NULL);
510	ASSERT(c->lwp_brand != NULL);
511
512#ifdef	__amd64
513	/*
514	 * Only correct the child's %fs register if the parent's %fs register
515	 * is LWPFS_SEL.  If the parent's %fs register is zero, then the Solaris
516	 * 10 environment that we're emulating uses a version of libc that
517	 * works when %fs is zero (i.e., it contains backports of CRs 6467491
518	 * and 6501650).
519	 */
520	if (p->lwp_pcb.pcb_fs == LWPFS_SEL)
521		s10_amd64_correct_fsreg(c);
522#endif	/* __amd64 */
523}
524
525/*ARGSUSED*/
526void
527s10_freelwp(klwp_t *l)
528{
529	ASSERT(l->lwp_procp->p_brand == &s10_brand);
530	ASSERT(l->lwp_procp->p_brand_data != NULL);
531	ASSERT(l->lwp_brand != NULL);
532	l->lwp_brand = NULL;
533}
534
535/*ARGSUSED*/
536void
537s10_lwpexit(klwp_t *l)
538{
539	ASSERT(l->lwp_procp->p_brand == &s10_brand);
540	ASSERT(l->lwp_procp->p_brand_data != NULL);
541	ASSERT(l->lwp_brand != NULL);
542
543	/*
544	 * We should never be called for the last thread in a process.
545	 * (That case is handled by s10_proc_exit().)  There for this lwp
546	 * must be exiting from a multi-threaded process.
547	 */
548	ASSERT(l->lwp_procp->p_tlist != l->lwp_procp->p_tlist->t_forw);
549
550	l->lwp_brand = NULL;
551}
552
553void
554s10_free_brand_data(zone_t *zone)
555{
556	kmem_free(zone->zone_brand_data, sizeof (s10_zone_data_t));
557}
558
559void
560s10_init_brand_data(zone_t *zone)
561{
562	ASSERT(zone->zone_brand == &s10_brand);
563	ASSERT(zone->zone_brand_data == NULL);
564	zone->zone_brand_data = kmem_zalloc(sizeof (s10_zone_data_t), KM_SLEEP);
565}
566
567#if defined(_LP64)
568static void
569Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
570{
571	bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
572	dst->e_type =		src->e_type;
573	dst->e_machine =	src->e_machine;
574	dst->e_version =	src->e_version;
575	dst->e_entry =		src->e_entry;
576	dst->e_phoff =		src->e_phoff;
577	dst->e_shoff =		src->e_shoff;
578	dst->e_flags =		src->e_flags;
579	dst->e_ehsize =		src->e_ehsize;
580	dst->e_phentsize =	src->e_phentsize;
581	dst->e_phnum =		src->e_phnum;
582	dst->e_shentsize =	src->e_shentsize;
583	dst->e_shnum =		src->e_shnum;
584	dst->e_shstrndx =	src->e_shstrndx;
585}
586#endif /* _LP64 */
587
588int
589s10_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
590	int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
591	int brand_action)
592{
593	vnode_t		*nvp;
594	Ehdr		ehdr;
595	Addr		uphdr_vaddr;
596	intptr_t	voffset;
597	int		interp;
598	int		i, err;
599	struct execenv	env;
600	struct user	*up = PTOU(curproc);
601	s10_proc_data_t	*spd;
602	s10_elf_data_t	sed, *sedp;
603	char		*linker;
604	uintptr_t	lddata; /* lddata of executable's linker */
605
606	ASSERT(curproc->p_brand == &s10_brand);
607	ASSERT(curproc->p_brand_data != NULL);
608
609	spd = (s10_proc_data_t *)curproc->p_brand_data;
610	sedp = &spd->spd_elf_data;
611
612	args->brandname = S10_BRANDNAME;
613
614	/*
615	 * We will exec the brand library and then map in the target
616	 * application and (optionally) the brand's default linker.
617	 */
618	if (args->to_model == DATAMODEL_NATIVE) {
619		args->emulator = S10_LIB;
620		linker = S10_LINKER;
621#if defined(_LP64)
622	} else {
623		args->emulator = S10_LIB32;
624		linker = S10_LINKER32;
625#endif /* _LP64 */
626	}
627
628	if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW, NULLVPP,
629	    &nvp)) != 0) {
630		uprintf("%s: not found.", args->emulator);
631		return (err);
632	}
633
634	if (args->to_model == DATAMODEL_NATIVE) {
635		err = elfexec(nvp, uap, args, idatap, level + 1, execsz,
636		    setid, exec_file, cred, brand_action);
637#if defined(_LP64)
638	} else {
639		err = elf32exec(nvp, uap, args, idatap, level + 1, execsz,
640		    setid, exec_file, cred, brand_action);
641#endif /* _LP64 */
642	}
643	VN_RELE(nvp);
644	if (err != 0)
645		return (err);
646
647	/*
648	 * The u_auxv vectors are set up by elfexec to point to the brand
649	 * emulation library and linker.  Save these so they can be copied to
650	 * the specific brand aux vectors.
651	 */
652	bzero(&sed, sizeof (sed));
653	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
654		switch (up->u_auxv[i].a_type) {
655		case AT_SUN_LDDATA:
656			sed.sed_lddata = up->u_auxv[i].a_un.a_val;
657			break;
658		case AT_BASE:
659			sed.sed_base = up->u_auxv[i].a_un.a_val;
660			break;
661		case AT_ENTRY:
662			sed.sed_entry = up->u_auxv[i].a_un.a_val;
663			break;
664		case AT_PHDR:
665			sed.sed_phdr = up->u_auxv[i].a_un.a_val;
666			break;
667		case AT_PHENT:
668			sed.sed_phent = up->u_auxv[i].a_un.a_val;
669			break;
670		case AT_PHNUM:
671			sed.sed_phnum = up->u_auxv[i].a_un.a_val;
672			break;
673		default:
674			break;
675		}
676	}
677	/* Make sure the emulator has an entry point */
678	ASSERT(sed.sed_entry != NULL);
679	ASSERT(sed.sed_phdr != NULL);
680
681	bzero(&env, sizeof (env));
682	if (args->to_model == DATAMODEL_NATIVE) {
683		err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset,
684		    exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase,
685		    &env.ex_brksize, NULL);
686#if defined(_LP64)
687	} else {
688		Elf32_Ehdr ehdr32;
689		Elf32_Addr uphdr_vaddr32;
690		err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
691		    &voffset, exec_file, &interp, &env.ex_bssbase,
692		    &env.ex_brkbase, &env.ex_brksize, NULL);
693		Ehdr32to64(&ehdr32, &ehdr);
694		if (uphdr_vaddr32 == (Elf32_Addr)-1)
695			uphdr_vaddr = (Addr)-1;
696		else
697			uphdr_vaddr = uphdr_vaddr32;
698#endif /* _LP64 */
699	}
700	if (err != 0)
701		return (err);
702
703	/*
704	 * Save off the important properties of the executable. The brand
705	 * library will ask us for this data later, when it is initializing
706	 * and getting ready to transfer control to the brand application.
707	 */
708	if (uphdr_vaddr == (Addr)-1)
709		sedp->sed_phdr = voffset + ehdr.e_phoff;
710	else
711		sedp->sed_phdr = voffset + uphdr_vaddr;
712	sedp->sed_entry = voffset + ehdr.e_entry;
713	sedp->sed_phent = ehdr.e_phentsize;
714	sedp->sed_phnum = ehdr.e_phnum;
715
716	if (interp) {
717		if (ehdr.e_type == ET_DYN) {
718			/*
719			 * This is a shared object executable, so we need to
720			 * pick a reasonable place to put the heap. Just don't
721			 * use the first page.
722			 */
723			env.ex_brkbase = (caddr_t)PAGESIZE;
724			env.ex_bssbase = (caddr_t)PAGESIZE;
725		}
726
727		/*
728		 * If the program needs an interpreter (most do), map it in and
729		 * store relevant information about it in the aux vector, where
730		 * the brand library can find it.
731		 */
732		if ((err = lookupname(linker, UIO_SYSSPACE,
733		    FOLLOW, NULLVPP, &nvp)) != 0) {
734			uprintf("%s: not found.", S10_LINKER);
735			return (err);
736		}
737		if (args->to_model == DATAMODEL_NATIVE) {
738			err = mapexec_brand(nvp, args, &ehdr,
739			    &uphdr_vaddr, &voffset, exec_file, &interp,
740			    NULL, NULL, NULL, &lddata);
741#if defined(_LP64)
742		} else {
743			Elf32_Ehdr ehdr32;
744			Elf32_Addr uphdr_vaddr32;
745			err = mapexec32_brand(nvp, args, &ehdr32,
746			    &uphdr_vaddr32, &voffset, exec_file, &interp,
747			    NULL, NULL, NULL, &lddata);
748			Ehdr32to64(&ehdr32, &ehdr);
749			if (uphdr_vaddr32 == (Elf32_Addr)-1)
750				uphdr_vaddr = (Addr)-1;
751			else
752				uphdr_vaddr = uphdr_vaddr32;
753#endif /* _LP64 */
754		}
755		VN_RELE(nvp);
756		if (err != 0)
757			return (err);
758
759		/*
760		 * Now that we know the base address of the brand's linker,
761		 * place it in the aux vector.
762		 */
763		sedp->sed_base = voffset;
764		sedp->sed_ldentry = voffset + ehdr.e_entry;
765		sedp->sed_lddata = voffset + lddata;
766	} else {
767		/*
768		 * This program has no interpreter. The brand library will
769		 * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector,
770		 * so in this case, put the entry point of the main executable
771		 * there.
772		 */
773		if (ehdr.e_type == ET_EXEC) {
774			/*
775			 * An executable with no interpreter, this must be a
776			 * statically linked executable, which means we loaded
777			 * it at the address specified in the elf header, in
778			 * which case the e_entry field of the elf header is an
779			 * absolute address.
780			 */
781			sedp->sed_ldentry = ehdr.e_entry;
782			sedp->sed_entry = ehdr.e_entry;
783			sedp->sed_lddata = NULL;
784			sedp->sed_base = NULL;
785		} else {
786			/*
787			 * A shared object with no interpreter, we use the
788			 * calculated address from above.
789			 */
790			sedp->sed_ldentry = sedp->sed_entry;
791			sedp->sed_entry = NULL;
792			sedp->sed_phdr = NULL;
793			sedp->sed_phent = NULL;
794			sedp->sed_phnum = NULL;
795			sedp->sed_lddata = NULL;
796			sedp->sed_base = voffset;
797
798			if (ehdr.e_type == ET_DYN) {
799				/*
800				 * Delay setting the brkbase until the first
801				 * call to brk(); see elfexec() for details.
802				 */
803				env.ex_bssbase = (caddr_t)0;
804				env.ex_brkbase = (caddr_t)0;
805				env.ex_brksize = 0;
806			}
807		}
808	}
809
810	env.ex_magic = elfmagic;
811	env.ex_vp = vp;
812	setexecenv(&env);
813
814	/*
815	 * It's time to manipulate the process aux vectors.  First
816	 * we need to update the AT_SUN_AUXFLAGS aux vector to set
817	 * the AF_SUN_NOPLM flag.
818	 */
819	if (args->to_model == DATAMODEL_NATIVE) {
820		auxv_t		auxflags_auxv;
821
822		if (copyin(args->auxp_auxflags, &auxflags_auxv,
823		    sizeof (auxflags_auxv)) != 0)
824			return (EFAULT);
825
826		ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS);
827		auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM;
828		if (copyout(&auxflags_auxv, args->auxp_auxflags,
829		    sizeof (auxflags_auxv)) != 0)
830			return (EFAULT);
831#if defined(_LP64)
832	} else {
833		auxv32_t	auxflags_auxv32;
834
835		if (copyin(args->auxp_auxflags, &auxflags_auxv32,
836		    sizeof (auxflags_auxv32)) != 0)
837			return (EFAULT);
838
839		ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS);
840		auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM;
841		if (copyout(&auxflags_auxv32, args->auxp_auxflags,
842		    sizeof (auxflags_auxv32)) != 0)
843			return (EFAULT);
844#endif /* _LP64 */
845	}
846
847	/* Second, copy out the brand specific aux vectors. */
848	if (args->to_model == DATAMODEL_NATIVE) {
849		auxv_t s10_auxv[] = {
850		    { AT_SUN_BRAND_AUX1, 0 },
851		    { AT_SUN_BRAND_AUX2, 0 },
852		    { AT_SUN_BRAND_AUX3, 0 }
853		};
854
855		ASSERT(s10_auxv[0].a_type == AT_SUN_BRAND_S10_LDDATA);
856		s10_auxv[0].a_un.a_val = sed.sed_lddata;
857
858		if (copyout(&s10_auxv, args->auxp_brand,
859		    sizeof (s10_auxv)) != 0)
860			return (EFAULT);
861#if defined(_LP64)
862	} else {
863		auxv32_t s10_auxv32[] = {
864		    { AT_SUN_BRAND_AUX1, 0 },
865		    { AT_SUN_BRAND_AUX2, 0 },
866		    { AT_SUN_BRAND_AUX3, 0 }
867		};
868
869		ASSERT(s10_auxv32[0].a_type == AT_SUN_BRAND_S10_LDDATA);
870		s10_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata;
871		if (copyout(&s10_auxv32, args->auxp_brand,
872		    sizeof (s10_auxv32)) != 0)
873			return (EFAULT);
874#endif /* _LP64 */
875	}
876
877	/*
878	 * Third, the the /proc aux vectors set up by elfexec() point to brand
879	 * emulation library and it's linker.  Copy these to the /proc brand
880	 * specific aux vector, and update the regular /proc aux vectors to
881	 * point to the executable (and it's linker).  This will enable
882	 * debuggers to access the executable via the usual /proc or elf notes
883	 * aux vectors.
884	 *
885	 * The brand emulation library's linker will get it's aux vectors off
886	 * the stack, and then update the stack with the executable's aux
887	 * vectors before jumping to the executable's linker.
888	 *
889	 * Debugging the brand emulation library must be done from
890	 * the global zone, where the librtld_db module knows how to fetch the
891	 * brand specific aux vectors to access the brand emulation libraries
892	 * linker.
893	 */
894	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
895		ulong_t val;
896
897		switch (up->u_auxv[i].a_type) {
898		case AT_SUN_BRAND_S10_LDDATA:
899			up->u_auxv[i].a_un.a_val = sed.sed_lddata;
900			continue;
901		case AT_BASE:
902			val = sedp->sed_base;
903			break;
904		case AT_ENTRY:
905			val = sedp->sed_entry;
906			break;
907		case AT_PHDR:
908			val = sedp->sed_phdr;
909			break;
910		case AT_PHENT:
911			val = sedp->sed_phent;
912			break;
913		case AT_PHNUM:
914			val = sedp->sed_phnum;
915			break;
916		case AT_SUN_LDDATA:
917			val = sedp->sed_lddata;
918			break;
919		default:
920			continue;
921		}
922
923		up->u_auxv[i].a_un.a_val = val;
924		if (val == NULL) {
925			/* Hide the entry for static binaries */
926			up->u_auxv[i].a_type = AT_IGNORE;
927		}
928	}
929
930	/*
931	 * The last thing we do here is clear spd->spd_handler.  This is
932	 * important because if we're already a branded process and if this
933	 * exec succeeds, there is a window between when the exec() first
934	 * returns to the userland of the new process and when our brand
935	 * library get's initialized, during which we don't want system
936	 * calls to be re-directed to our brand library since it hasn't
937	 * been initialized yet.
938	 */
939	spd->spd_handler = NULL;
940
941	return (0);
942}
943
944void
945s10_sigset_native_to_s10(sigset_t *set)
946{
947	int nativesig;
948	int s10sig;
949	sigset_t s10set;
950
951	/*
952	 * Shortcut: we know the first 32 signals are the same in both
953	 * s10 and native Solaris.  Just assign the first word.
954	 */
955	s10set.__sigbits[0] = set->__sigbits[0];
956	s10set.__sigbits[1] = 0;
957	s10set.__sigbits[2] = 0;
958	s10set.__sigbits[3] = 0;
959
960	/*
961	 * Copy the remainder of the initial set of common signals.
962	 */
963	for (nativesig = 33; nativesig < S10_SIGRTMIN; nativesig++)
964		if (sigismember(set, nativesig))
965			sigaddset(&s10set, nativesig);
966
967	/*
968	 * Convert any native RT signals to their S10 values.
969	 */
970	for (nativesig = _SIGRTMIN, s10sig = S10_SIGRTMIN;
971	    nativesig <= _SIGRTMAX && s10sig <= S10_SIGRTMAX;
972	    nativesig++, s10sig++) {
973		if (sigismember(set, nativesig))
974			sigaddset(&s10set, s10sig);
975	}
976
977	*set = s10set;
978}
979
980void
981s10_sigset_s10_to_native(sigset_t *set)
982{
983	int s10sig;
984	int nativesig;
985	sigset_t nativeset;
986
987	/*
988	 * Shortcut: we know the first 32 signals are the same in both
989	 * s10 and native Solaris.  Just assign the first word.
990	 */
991	nativeset.__sigbits[0] = set->__sigbits[0];
992	nativeset.__sigbits[1] = 0;
993	nativeset.__sigbits[2] = 0;
994	nativeset.__sigbits[3] = 0;
995
996	/*
997	 * Copy the remainder of the initial set of common signals.
998	 */
999	for (s10sig = 33; s10sig < S10_SIGRTMIN; s10sig++)
1000		if (sigismember(set, s10sig))
1001			sigaddset(&nativeset, s10sig);
1002
1003	/*
1004	 * Convert any S10 RT signals to their native values.
1005	 */
1006	for (s10sig = S10_SIGRTMIN, nativesig = _SIGRTMIN;
1007	    s10sig <= S10_SIGRTMAX && nativesig <= _SIGRTMAX;
1008	    s10sig++, nativesig++) {
1009		if (sigismember(set, s10sig))
1010			sigaddset(&nativeset, nativesig);
1011	}
1012
1013	*set = nativeset;
1014}
1015
1016int
1017_init(void)
1018{
1019	int err;
1020
1021	/*
1022	 * Set up the table indicating which system calls we want to
1023	 * interpose on.  We should probably build this automatically from
1024	 * a list of system calls that is shared with the user-space
1025	 * library.
1026	 */
1027	s10_emulation_table = kmem_zalloc(NSYSCALL, KM_SLEEP);
1028	s10_emulation_table[S10_SYS_forkall] = 1;		/*   2 */
1029	s10_emulation_table[S10_SYS_open] = 1;			/*   5 */
1030	s10_emulation_table[S10_SYS_wait] = 1;			/*   7 */
1031	s10_emulation_table[S10_SYS_creat] = 1;			/*   8 */
1032	s10_emulation_table[S10_SYS_unlink] = 1;		/*  10 */
1033	s10_emulation_table[S10_SYS_exec] = 1;			/*  11 */
1034	s10_emulation_table[S10_SYS_chown] = 1;			/*  16 */
1035	s10_emulation_table[S10_SYS_stat] = 1;			/*  18 */
1036	s10_emulation_table[S10_SYS_umount] = 1;		/*  22 */
1037	s10_emulation_table[S10_SYS_fstat] = 1;			/*  28 */
1038	s10_emulation_table[S10_SYS_utime] = 1;			/*  30 */
1039	s10_emulation_table[S10_SYS_access] = 1;		/*  33 */
1040	s10_emulation_table[SYS_kill] = 1;			/*  37 */
1041	s10_emulation_table[S10_SYS_dup] = 1;			/*  41 */
1042	s10_emulation_table[SYS_ioctl] = 1;			/*  54 */
1043	s10_emulation_table[SYS_execve] = 1;			/*  59 */
1044	s10_emulation_table[SYS_acctctl] = 1;			/*  71 */
1045	s10_emulation_table[S10_SYS_issetugid] = 1;		/*  75 */
1046	s10_emulation_table[S10_SYS_fsat] = 1;			/*  76 */
1047	s10_emulation_table[S10_SYS_rmdir] = 1;			/*  79 */
1048	s10_emulation_table[SYS_getdents] = 1;			/*  81 */
1049	s10_emulation_table[S10_SYS_poll] = 1;			/*  87 */
1050	s10_emulation_table[S10_SYS_lstat] = 1;			/*  88 */
1051	s10_emulation_table[S10_SYS_fchown] = 1;		/*  94 */
1052	s10_emulation_table[SYS_sigprocmask] = 1;		/*  95 */
1053	s10_emulation_table[SYS_sigsuspend] = 1;		/*  96 */
1054	s10_emulation_table[SYS_sigaction] = 1;			/*  98 */
1055	s10_emulation_table[SYS_sigpending] = 1;		/*  99 */
1056	s10_emulation_table[SYS_waitid] = 1;			/* 107 */
1057	s10_emulation_table[SYS_sigsendsys] = 1;		/* 108 */
1058#if defined(__x86)
1059	s10_emulation_table[S10_SYS_xstat] = 1;			/* 123 */
1060	s10_emulation_table[S10_SYS_lxstat] = 1;		/* 124 */
1061	s10_emulation_table[S10_SYS_fxstat] = 1;		/* 125 */
1062	s10_emulation_table[S10_SYS_xmknod] = 1;		/* 126 */
1063#endif
1064	s10_emulation_table[S10_SYS_lchown] = 1;		/* 130 */
1065	s10_emulation_table[S10_SYS_rename] = 1;		/* 134 */
1066	s10_emulation_table[SYS_uname] = 1;			/* 135 */
1067	s10_emulation_table[SYS_sysconfig] = 1;			/* 137 */
1068	s10_emulation_table[SYS_systeminfo] = 1;		/* 139 */
1069	s10_emulation_table[S10_SYS_fork1] = 1;			/* 143 */
1070	s10_emulation_table[SYS_sigtimedwait] = 1;		/* 144 */
1071	s10_emulation_table[S10_SYS_lwp_sema_wait] = 1;		/* 147 */
1072	s10_emulation_table[S10_SYS_utimes] = 1;		/* 154 */
1073	s10_emulation_table[SYS_lwp_create] = 1;		/* 159 */
1074	s10_emulation_table[SYS_lwp_kill] = 1;			/* 163 */
1075	s10_emulation_table[SYS_lwp_sigmask] = 1;		/* 165 */
1076#if defined(__amd64)
1077	s10_emulation_table[SYS_lwp_private] = 1;		/* 166 */
1078#endif	/* __amd64 */
1079	s10_emulation_table[S10_SYS_lwp_mutex_lock] = 1;	/* 169 */
1080	s10_emulation_table[SYS_pwrite] = 1;			/* 174 */
1081	s10_emulation_table[SYS_auditsys] = 1;			/* 186 */
1082	s10_emulation_table[SYS_sigqueue] = 1;			/* 190 */
1083	s10_emulation_table[SYS_signotify] = 1;			/* 205 */
1084	s10_emulation_table[SYS_lwp_mutex_timedlock] = 1;	/* 210 */
1085	s10_emulation_table[SYS_getdents64] = 1;		/* 213 */
1086	s10_emulation_table[S10_SYS_stat64] = 1;		/* 215 */
1087	s10_emulation_table[S10_SYS_lstat64] = 1;		/* 216 */
1088	s10_emulation_table[S10_SYS_fstat64] = 1;		/* 217 */
1089	s10_emulation_table[SYS_pwrite64] = 1;			/* 223 */
1090	s10_emulation_table[S10_SYS_creat64] = 1;		/* 224 */
1091	s10_emulation_table[S10_SYS_open64] = 1;		/* 225 */
1092	s10_emulation_table[SYS_zone] = 1;			/* 227 */
1093	s10_emulation_table[SYS_lwp_mutex_trylock] = 1;		/* 251 */
1094
1095	err = mod_install(&modlinkage);
1096	if (err) {
1097		cmn_err(CE_WARN, "Couldn't install brand module");
1098		kmem_free(s10_emulation_table, NSYSCALL);
1099	}
1100
1101	return (err);
1102}
1103
1104int
1105_info(struct modinfo *modinfop)
1106{
1107	return (mod_info(&modlinkage, modinfop));
1108}
1109
1110int
1111_fini(void)
1112{
1113	int err;
1114
1115	/*
1116	 * If there are any zones using this brand, we can't allow it to be
1117	 * unloaded.
1118	 */
1119	if (brand_zone_count(&s10_brand))
1120		return (EBUSY);
1121
1122	kmem_free(s10_emulation_table, NSYSCALL);
1123	s10_emulation_table = NULL;
1124
1125	err = mod_remove(&modlinkage);
1126	if (err)
1127		cmn_err(CE_WARN, "Couldn't unload s10 brand module");
1128
1129	return (err);
1130}
1131