s10_brand.c revision 11913:283e725df792
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/errno.h>
28#include <sys/exec.h>
29#include <sys/file.h>
30#include <sys/kmem.h>
31#include <sys/modctl.h>
32#include <sys/model.h>
33#include <sys/proc.h>
34#include <sys/syscall.h>
35#include <sys/systm.h>
36#include <sys/thread.h>
37#include <sys/cmn_err.h>
38#include <sys/archsystm.h>
39#include <sys/pathname.h>
40#include <sys/sunddi.h>
41
42#include <sys/machbrand.h>
43#include <sys/brand.h>
44#include "s10_brand.h"
45
46char *s10_emulation_table = NULL;
47
48void	s10_init_brand_data(zone_t *);
49void	s10_free_brand_data(zone_t *);
50void	s10_setbrand(proc_t *);
51int	s10_getattr(zone_t *, int, void *, size_t *);
52int	s10_setattr(zone_t *, int, void *, size_t);
53int	s10_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
54		uintptr_t, uintptr_t, uintptr_t);
55void	s10_copy_procdata(proc_t *, proc_t *);
56void	s10_proc_exit(struct proc *, klwp_t *);
57void	s10_exec();
58int	s10_initlwp(klwp_t *);
59void	s10_forklwp(klwp_t *, klwp_t *);
60void	s10_freelwp(klwp_t *);
61void	s10_lwpexit(klwp_t *);
62int	s10_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
63	long *, int, caddr_t, cred_t *, int);
64
65/* s10 brand */
66struct brand_ops s10_brops = {
67	s10_init_brand_data,
68	s10_free_brand_data,
69	s10_brandsys,
70	s10_setbrand,
71	s10_getattr,
72	s10_setattr,
73	s10_copy_procdata,
74	s10_proc_exit,
75	s10_exec,
76	lwp_setrval,
77	s10_initlwp,
78	s10_forklwp,
79	s10_freelwp,
80	s10_lwpexit,
81	s10_elfexec
82};
83
84#ifdef	sparc
85
86struct brand_mach_ops s10_mops = {
87	s10_brand_syscall_callback,
88	s10_brand_syscall32_callback
89};
90
91#else	/* sparc */
92
93#ifdef	__amd64
94
95struct brand_mach_ops s10_mops = {
96	s10_brand_sysenter_callback,
97	NULL,
98	s10_brand_int91_callback,
99	s10_brand_syscall_callback,
100	s10_brand_syscall32_callback,
101	NULL
102};
103
104#else	/* ! __amd64 */
105
106struct brand_mach_ops s10_mops = {
107	s10_brand_sysenter_callback,
108	NULL,
109	NULL,
110	s10_brand_syscall_callback,
111	NULL,
112	NULL
113};
114#endif	/* __amd64 */
115
116#endif	/* _sparc */
117
118struct brand	s10_brand = {
119	BRAND_VER_1,
120	"solaris10",
121	&s10_brops,
122	&s10_mops
123};
124
125static struct modlbrand modlbrand = {
126	&mod_brandops,		/* type of module */
127	"Solaris 10 Brand",	/* description of module */
128	&s10_brand		/* driver ops */
129};
130
131static struct modlinkage modlinkage = {
132	MODREV_1, (void *)&modlbrand, NULL
133};
134
135void
136s10_setbrand(proc_t *p)
137{
138	ASSERT(p->p_brand == &s10_brand);
139	ASSERT(p->p_brand_data == NULL);
140
141	/*
142	 * We should only be called from exec(), when we know the process
143	 * is single-threaded.
144	 */
145	ASSERT(p->p_tlist == p->p_tlist->t_forw);
146
147	p->p_brand_data = kmem_zalloc(sizeof (s10_proc_data_t), KM_SLEEP);
148	(void) s10_initlwp(p->p_tlist->t_lwp);
149}
150
151/*ARGSUSED*/
152int
153s10_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
154{
155	ASSERT(zone->zone_brand == &s10_brand);
156	if (attr == S10_EMUL_BITMAP) {
157		if (buf == NULL || *bufsize != sizeof (s10_emul_bitmap_t))
158			return (EINVAL);
159		if (copyout(((s10_zone_data_t *)zone->zone_brand_data)->
160		    emul_bitmap, buf, sizeof (s10_emul_bitmap_t)) != 0)
161			return (EFAULT);
162		return (0);
163	}
164
165	return (EINVAL);
166}
167
168int
169s10_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
170{
171	ASSERT(zone->zone_brand == &s10_brand);
172	if (attr == S10_EMUL_BITMAP) {
173		if (buf == NULL || bufsize != sizeof (s10_emul_bitmap_t))
174			return (EINVAL);
175		if (copyin(buf, ((s10_zone_data_t *)zone->zone_brand_data)->
176		    emul_bitmap, sizeof (s10_emul_bitmap_t)) != 0)
177			return (EFAULT);
178		return (0);
179	}
180
181	return (EINVAL);
182}
183
184#ifdef	__amd64
185/*
186 * The Nevada kernel clears %fs for threads in 64-bit x86 processes but S10's
187 * libc expects %fs to be nonzero.  This causes some committed
188 * libc/libthread interfaces (e.g., thr_main()) to fail, which impacts several
189 * libraries, including libdoor.  This function sets the specified LWP's %fs
190 * register to the legacy S10 selector value (LWPFS_SEL).
191 *
192 * The best solution to the aforementioned problem is backporting CRs
193 * 6467491 to Solaris 10 so that 64-bit x86 Solaris 10 processes
194 * would accept zero for %fs.  Backporting the CRs is a requirement for running
195 * S10 Containers in PV domUs because 64-bit Xen clears %fsbase when %fs is
196 * nonzero.  Such behavior breaks 64-bit processes because Xen has to fetch the
197 * FS segments' base addresses from the LWPs' GDTs, which are only capable of
198 * 32-bit addressing.
199 */
200/*ARGSUSED*/
201static void
202s10_amd64_correct_fsreg(klwp_t *l)
203{
204	if (lwp_getdatamodel(l) == DATAMODEL_NATIVE) {
205		kpreempt_disable();
206		l->lwp_pcb.pcb_fs = LWPFS_SEL;
207		l->lwp_pcb.pcb_rupdate = 1;
208		lwptot(l)->t_post_sys = 1;	/* Guarantee update_sregs() */
209		kpreempt_enable();
210	}
211}
212#endif	/* __amd64 */
213
214int
215s10_native()
216{
217	struct user	*up = PTOU(curproc);
218	char		*args_new, *comm_new, *p;
219	int		len;
220
221	len = sizeof (S10_NATIVE_LINKER32 " ") - 1;
222
223	/*
224	 * Make sure that the process' interpreter is the native dynamic linker.
225	 * Convention dictates that native processes executing within solaris10-
226	 * branded zones are interpreted by the native dynamic linker (the
227	 * process and its arguments are specified as arguments to the dynamic
228	 * linker).  If this convention is violated (i.e.,
229	 * brandsys(B_S10_NATIVE, ...) is invoked by a process that shouldn't be
230	 * native), then do nothing and silently indicate success.
231	 */
232	if (strcmp(up->u_comm, S10_LINKER_NAME) != 0)
233		return (0);
234	if (strncmp(up->u_psargs, S10_NATIVE_LINKER64 " /", len + 4) == 0)
235		len += 3;		/* to account for "/64" in the path */
236	else if (strncmp(up->u_psargs, S10_NATIVE_LINKER32 " /", len + 1) != 0)
237		return (0);
238
239	args_new = strdup(&up->u_psargs[len]);
240	if ((p = strchr(args_new, ' ')) != NULL)
241		*p = '\0';
242	if ((comm_new = strrchr(args_new, '/')) != NULL)
243		comm_new = strdup(comm_new + 1);
244	else
245		comm_new = strdup(args_new);
246	if (p != NULL)
247		*p = ' ';
248
249	if ((strlen(args_new) != 0) && (strlen(comm_new) != 0)) {
250		mutex_enter(&curproc->p_lock);
251		(void) strlcpy(up->u_comm, comm_new, MAXCOMLEN+1);
252		(void) strlcpy(up->u_psargs, args_new, PSARGSZ);
253		mutex_exit(&curproc->p_lock);
254	}
255
256	strfree(args_new);
257	strfree(comm_new);
258	return (0);
259}
260
261/*
262 * Get the address of the user-space system call handler from the user
263 * process and attach it to the proc structure.
264 */
265/*ARGSUSED*/
266int
267s10_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
268    uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
269{
270	s10_proc_data_t	*spd;
271	s10_brand_reg_t	reg;
272	proc_t		*p = curproc;
273	int		err;
274
275	*rval = 0;
276
277	/*
278	 * B_EXEC_BRAND is redundant
279	 * since the kernel assumes a native process doing an exec
280	 * in a branded zone is going to run a branded processes.
281	 * hence we don't support this operation.
282	 */
283	if (cmd == B_EXEC_BRAND)
284		return (ENOSYS);
285
286	if (cmd == B_S10_NATIVE)
287		return (s10_native());
288
289	/* For all other operations this must be a branded process. */
290	if (p->p_brand == &native_brand)
291		return (ENOSYS);
292
293	ASSERT(p->p_brand == &s10_brand);
294	ASSERT(p->p_brand_data != NULL);
295
296	spd = (s10_proc_data_t *)p->p_brand_data;
297
298	switch (cmd) {
299	case B_EXEC_NATIVE:
300		err = exec_common(
301		    (char *)arg1, (const char **)arg2, (const char **)arg3,
302		    EBA_NATIVE);
303		return (err);
304
305	case B_REGISTER:
306		if (p->p_model == DATAMODEL_NATIVE) {
307			if (copyin((void *)arg1, &reg, sizeof (reg)) != 0)
308				return (EFAULT);
309#if defined(_LP64)
310		} else {
311			s10_brand_reg32_t reg32;
312
313			if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0)
314				return (EFAULT);
315			reg.sbr_version = reg32.sbr_version;
316			reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler;
317#endif /* _LP64 */
318		}
319
320		if (reg.sbr_version != S10_VERSION)
321			return (ENOTSUP);
322		spd->spd_handler = reg.sbr_handler;
323		return (0);
324
325	case B_ELFDATA:
326		if (p->p_model == DATAMODEL_NATIVE) {
327			if (copyout(&spd->spd_elf_data, (void *)arg1,
328			    sizeof (s10_elf_data_t)) != 0)
329				return (EFAULT);
330#if defined(_LP64)
331		} else {
332			s10_elf_data32_t sed32;
333
334			sed32.sed_phdr = spd->spd_elf_data.sed_phdr;
335			sed32.sed_phent = spd->spd_elf_data.sed_phent;
336			sed32.sed_phnum = spd->spd_elf_data.sed_phnum;
337			sed32.sed_entry = spd->spd_elf_data.sed_entry;
338			sed32.sed_base = spd->spd_elf_data.sed_base;
339			sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry;
340			sed32.sed_lddata = spd->spd_elf_data.sed_lddata;
341			if (copyout(&sed32, (void *)arg1, sizeof (sed32)) != 0)
342				return (EFAULT);
343#endif /* _LP64 */
344		}
345		return (0);
346
347	case B_S10_PIDINFO:
348		/*
349		 * The s10 brand needs to be able to get the pid of the
350		 * current process and the pid of the zone's init, and it
351		 * needs to do this on every process startup.  Early in
352		 * brand startup, we can't call getpid() because calls to
353		 * getpid() represent a magical signal to some old-skool
354		 * debuggers.  By merging all of this into one call, we
355		 * make this quite a bit cheaper and easier to handle in
356		 * the brand module.
357		 */
358		if (copyout(&p->p_pid, (void *)arg1, sizeof (pid_t)) != 0)
359			return (EFAULT);
360		if (copyout(&p->p_zone->zone_proc_initpid, (void *)arg2,
361		    sizeof (pid_t)) != 0)
362			return (EFAULT);
363		return (0);
364
365	case B_S10_TRUSS_POINT:
366		/*
367		 * This subcommand exists so that we can see truss output
368		 * from interposed system calls that return without first
369		 * calling any other system call, meaning they would be
370		 * invisible to truss(1).
371		 *
372		 * If the second argument is set non-zero, set errno to that
373		 * value as well.
374		 *
375		 * Arguments are:
376		 *
377		 *    arg1: syscall number
378		 *    arg2: errno
379		 */
380		return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2));
381
382	case B_S10_ISFDXATTRDIR: {
383		/*
384		 * This subcommand enables the userland brand emulation library
385		 * to determine whether a file descriptor refers to an extended
386		 * file attributes directory.  There is no standard syscall or
387		 * libc function that can make such a determination.
388		 */
389		file_t *dir_filep;
390
391		dir_filep = getf((int)arg1);
392		if (dir_filep == NULL)
393			return (EBADF);
394		ASSERT(dir_filep->f_vnode != NULL);
395		*rval = IS_XATTRDIR(dir_filep->f_vnode);
396		releasef((int)arg1);
397		return (0);
398	}
399
400#ifdef	__amd64
401	case B_S10_FSREGCORRECTION:
402		/*
403		 * This subcommand exists so that the SYS_lwp_private and
404		 * SYS_lwp_create syscalls can manually set the current thread's
405		 * %fs register to the legacy S10 selector value for 64-bit x86
406		 * processes.
407		 */
408		s10_amd64_correct_fsreg(ttolwp(curthread));
409		return (0);
410#endif	/* __amd64 */
411	}
412
413	return (EINVAL);
414}
415
416/*
417 * Copy the per-process brand data from a parent proc to a child.
418 */
419void
420s10_copy_procdata(proc_t *child, proc_t *parent)
421{
422	s10_proc_data_t	*spd;
423
424	ASSERT(parent->p_brand == &s10_brand);
425	ASSERT(child->p_brand == &s10_brand);
426	ASSERT(parent->p_brand_data != NULL);
427	ASSERT(child->p_brand_data == NULL);
428
429	/* Just duplicate all the proc data of the parent for the child */
430	spd = kmem_alloc(sizeof (s10_proc_data_t), KM_SLEEP);
431	bcopy(parent->p_brand_data, spd, sizeof (s10_proc_data_t));
432	child->p_brand_data = spd;
433}
434
435/*ARGSUSED*/
436void
437s10_proc_exit(struct proc *p, klwp_t *l)
438{
439	ASSERT(p->p_brand == &s10_brand);
440	ASSERT(p->p_brand_data != NULL);
441
442	/*
443	 * We should only be called from proc_exit(), when we know that
444	 * process is single-threaded.
445	 */
446	ASSERT(p->p_tlist == p->p_tlist->t_forw);
447
448	/* upon exit, free our lwp brand data */
449	(void) s10_freelwp(ttolwp(curthread));
450
451	/* upon exit, free our proc brand data */
452	kmem_free(p->p_brand_data, sizeof (s10_proc_data_t));
453	p->p_brand_data = NULL;
454}
455
456void
457s10_exec()
458{
459	s10_proc_data_t	*spd = curproc->p_brand_data;
460
461	ASSERT(curproc->p_brand == &s10_brand);
462	ASSERT(curproc->p_brand_data != NULL);
463	ASSERT(ttolwp(curthread)->lwp_brand != NULL);
464
465	/*
466	 * We should only be called from exec(), when we know the process
467	 * is single-threaded.
468	 */
469	ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw);
470
471	/* Upon exec, reset our lwp brand data. */
472	(void) s10_freelwp(ttolwp(curthread));
473	(void) s10_initlwp(ttolwp(curthread));
474
475	/*
476	 * Upon exec, reset all the proc brand data, except for the elf
477	 * data associated with the executable we are exec'ing.
478	 */
479	spd->spd_handler = NULL;
480}
481
482/*ARGSUSED*/
483int
484s10_initlwp(klwp_t *l)
485{
486	ASSERT(l->lwp_procp->p_brand == &s10_brand);
487	ASSERT(l->lwp_procp->p_brand_data != NULL);
488	ASSERT(l->lwp_brand == NULL);
489	l->lwp_brand = (void *)-1;
490	return (0);
491}
492
493/*ARGSUSED*/
494void
495s10_forklwp(klwp_t *p, klwp_t *c)
496{
497	ASSERT(p->lwp_procp->p_brand == &s10_brand);
498	ASSERT(c->lwp_procp->p_brand == &s10_brand);
499
500	ASSERT(p->lwp_procp->p_brand_data != NULL);
501	ASSERT(c->lwp_procp->p_brand_data != NULL);
502
503	/* Both LWPs have already had been initialized via s10_initlwp() */
504	ASSERT(p->lwp_brand != NULL);
505	ASSERT(c->lwp_brand != NULL);
506
507#ifdef	__amd64
508	/*
509	 * Only correct the child's %fs register if the parent's %fs register
510	 * is LWPFS_SEL.  If the parent's %fs register is zero, then the Solaris
511	 * 10 environment that we're emulating uses a version of libc that
512	 * works when %fs is zero (i.e., it contains backports of CRs 6467491
513	 * and 6501650).
514	 */
515	if (p->lwp_pcb.pcb_fs == LWPFS_SEL)
516		s10_amd64_correct_fsreg(c);
517#endif	/* __amd64 */
518}
519
520/*ARGSUSED*/
521void
522s10_freelwp(klwp_t *l)
523{
524	ASSERT(l->lwp_procp->p_brand == &s10_brand);
525	ASSERT(l->lwp_procp->p_brand_data != NULL);
526	ASSERT(l->lwp_brand != NULL);
527	l->lwp_brand = NULL;
528}
529
530/*ARGSUSED*/
531void
532s10_lwpexit(klwp_t *l)
533{
534	ASSERT(l->lwp_procp->p_brand == &s10_brand);
535	ASSERT(l->lwp_procp->p_brand_data != NULL);
536	ASSERT(l->lwp_brand != NULL);
537
538	/*
539	 * We should never be called for the last thread in a process.
540	 * (That case is handled by s10_proc_exit().)  There for this lwp
541	 * must be exiting from a multi-threaded process.
542	 */
543	ASSERT(l->lwp_procp->p_tlist != l->lwp_procp->p_tlist->t_forw);
544
545	l->lwp_brand = NULL;
546}
547
548void
549s10_free_brand_data(zone_t *zone)
550{
551	kmem_free(zone->zone_brand_data, sizeof (s10_zone_data_t));
552}
553
554void
555s10_init_brand_data(zone_t *zone)
556{
557	ASSERT(zone->zone_brand == &s10_brand);
558	ASSERT(zone->zone_brand_data == NULL);
559	zone->zone_brand_data = kmem_zalloc(sizeof (s10_zone_data_t), KM_SLEEP);
560}
561
562#if defined(_LP64)
563static void
564Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
565{
566	bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
567	dst->e_type =		src->e_type;
568	dst->e_machine =	src->e_machine;
569	dst->e_version =	src->e_version;
570	dst->e_entry =		src->e_entry;
571	dst->e_phoff =		src->e_phoff;
572	dst->e_shoff =		src->e_shoff;
573	dst->e_flags =		src->e_flags;
574	dst->e_ehsize =		src->e_ehsize;
575	dst->e_phentsize =	src->e_phentsize;
576	dst->e_phnum =		src->e_phnum;
577	dst->e_shentsize =	src->e_shentsize;
578	dst->e_shnum =		src->e_shnum;
579	dst->e_shstrndx =	src->e_shstrndx;
580}
581#endif /* _LP64 */
582
583int
584s10_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
585	int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
586	int brand_action)
587{
588	vnode_t		*nvp;
589	Ehdr		ehdr;
590	Addr		uphdr_vaddr;
591	intptr_t	voffset;
592	int		interp;
593	int		i, err;
594	struct execenv	env;
595	struct user	*up = PTOU(curproc);
596	s10_proc_data_t	*spd;
597	s10_elf_data_t	sed, *sedp;
598	char		*linker;
599	uintptr_t	lddata; /* lddata of executable's linker */
600
601	ASSERT(curproc->p_brand == &s10_brand);
602	ASSERT(curproc->p_brand_data != NULL);
603
604	spd = (s10_proc_data_t *)curproc->p_brand_data;
605	sedp = &spd->spd_elf_data;
606
607	args->brandname = S10_BRANDNAME;
608
609	/*
610	 * We will exec the brand library and then map in the target
611	 * application and (optionally) the brand's default linker.
612	 */
613	if (args->to_model == DATAMODEL_NATIVE) {
614		args->emulator = S10_LIB;
615		linker = S10_LINKER;
616#if defined(_LP64)
617	} else {
618		args->emulator = S10_LIB32;
619		linker = S10_LINKER32;
620#endif /* _LP64 */
621	}
622
623	if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW, NULLVPP,
624	    &nvp)) != 0) {
625		uprintf("%s: not found.", args->emulator);
626		return (err);
627	}
628
629	if (args->to_model == DATAMODEL_NATIVE) {
630		err = elfexec(nvp, uap, args, idatap, level + 1, execsz,
631		    setid, exec_file, cred, brand_action);
632#if defined(_LP64)
633	} else {
634		err = elf32exec(nvp, uap, args, idatap, level + 1, execsz,
635		    setid, exec_file, cred, brand_action);
636#endif /* _LP64 */
637	}
638	VN_RELE(nvp);
639	if (err != 0)
640		return (err);
641
642	/*
643	 * The u_auxv vectors are set up by elfexec to point to the brand
644	 * emulation library and linker.  Save these so they can be copied to
645	 * the specific brand aux vectors.
646	 */
647	bzero(&sed, sizeof (sed));
648	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
649		switch (up->u_auxv[i].a_type) {
650		case AT_SUN_LDDATA:
651			sed.sed_lddata = up->u_auxv[i].a_un.a_val;
652			break;
653		case AT_BASE:
654			sed.sed_base = up->u_auxv[i].a_un.a_val;
655			break;
656		case AT_ENTRY:
657			sed.sed_entry = up->u_auxv[i].a_un.a_val;
658			break;
659		case AT_PHDR:
660			sed.sed_phdr = up->u_auxv[i].a_un.a_val;
661			break;
662		case AT_PHENT:
663			sed.sed_phent = up->u_auxv[i].a_un.a_val;
664			break;
665		case AT_PHNUM:
666			sed.sed_phnum = up->u_auxv[i].a_un.a_val;
667			break;
668		default:
669			break;
670		}
671	}
672	/* Make sure the emulator has an entry point */
673	ASSERT(sed.sed_entry != NULL);
674	ASSERT(sed.sed_phdr != NULL);
675
676	bzero(&env, sizeof (env));
677	if (args->to_model == DATAMODEL_NATIVE) {
678		err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset,
679		    exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase,
680		    &env.ex_brksize, NULL);
681#if defined(_LP64)
682	} else {
683		Elf32_Ehdr ehdr32;
684		Elf32_Addr uphdr_vaddr32;
685		err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
686		    &voffset, exec_file, &interp, &env.ex_bssbase,
687		    &env.ex_brkbase, &env.ex_brksize, NULL);
688		Ehdr32to64(&ehdr32, &ehdr);
689		if (uphdr_vaddr32 == (Elf32_Addr)-1)
690			uphdr_vaddr = (Addr)-1;
691		else
692			uphdr_vaddr = uphdr_vaddr32;
693#endif /* _LP64 */
694	}
695	if (err != 0)
696		return (err);
697
698	/*
699	 * Save off the important properties of the executable. The brand
700	 * library will ask us for this data later, when it is initializing
701	 * and getting ready to transfer control to the brand application.
702	 */
703	if (uphdr_vaddr == (Addr)-1)
704		sedp->sed_phdr = voffset + ehdr.e_phoff;
705	else
706		sedp->sed_phdr = voffset + uphdr_vaddr;
707	sedp->sed_entry = voffset + ehdr.e_entry;
708	sedp->sed_phent = ehdr.e_phentsize;
709	sedp->sed_phnum = ehdr.e_phnum;
710
711	if (interp) {
712		if (ehdr.e_type == ET_DYN) {
713			/*
714			 * This is a shared object executable, so we need to
715			 * pick a reasonable place to put the heap. Just don't
716			 * use the first page.
717			 */
718			env.ex_brkbase = (caddr_t)PAGESIZE;
719			env.ex_bssbase = (caddr_t)PAGESIZE;
720		}
721
722		/*
723		 * If the program needs an interpreter (most do), map it in and
724		 * store relevant information about it in the aux vector, where
725		 * the brand library can find it.
726		 */
727		if ((err = lookupname(linker, UIO_SYSSPACE,
728		    FOLLOW, NULLVPP, &nvp)) != 0) {
729			uprintf("%s: not found.", S10_LINKER);
730			return (err);
731		}
732		if (args->to_model == DATAMODEL_NATIVE) {
733			err = mapexec_brand(nvp, args, &ehdr,
734			    &uphdr_vaddr, &voffset, exec_file, &interp,
735			    NULL, NULL, NULL, &lddata);
736#if defined(_LP64)
737		} else {
738			Elf32_Ehdr ehdr32;
739			Elf32_Addr uphdr_vaddr32;
740			err = mapexec32_brand(nvp, args, &ehdr32,
741			    &uphdr_vaddr32, &voffset, exec_file, &interp,
742			    NULL, NULL, NULL, &lddata);
743			Ehdr32to64(&ehdr32, &ehdr);
744			if (uphdr_vaddr32 == (Elf32_Addr)-1)
745				uphdr_vaddr = (Addr)-1;
746			else
747				uphdr_vaddr = uphdr_vaddr32;
748#endif /* _LP64 */
749		}
750		VN_RELE(nvp);
751		if (err != 0)
752			return (err);
753
754		/*
755		 * Now that we know the base address of the brand's linker,
756		 * place it in the aux vector.
757		 */
758		sedp->sed_base = voffset;
759		sedp->sed_ldentry = voffset + ehdr.e_entry;
760		sedp->sed_lddata = voffset + lddata;
761	} else {
762		/*
763		 * This program has no interpreter. The brand library will
764		 * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector,
765		 * so in this case, put the entry point of the main executable
766		 * there.
767		 */
768		if (ehdr.e_type == ET_EXEC) {
769			/*
770			 * An executable with no interpreter, this must be a
771			 * statically linked executable, which means we loaded
772			 * it at the address specified in the elf header, in
773			 * which case the e_entry field of the elf header is an
774			 * absolute address.
775			 */
776			sedp->sed_ldentry = ehdr.e_entry;
777			sedp->sed_entry = ehdr.e_entry;
778			sedp->sed_lddata = NULL;
779			sedp->sed_base = NULL;
780		} else {
781			/*
782			 * A shared object with no interpreter, we use the
783			 * calculated address from above.
784			 */
785			sedp->sed_ldentry = sedp->sed_entry;
786			sedp->sed_entry = NULL;
787			sedp->sed_phdr = NULL;
788			sedp->sed_phent = NULL;
789			sedp->sed_phnum = NULL;
790			sedp->sed_lddata = NULL;
791			sedp->sed_base = voffset;
792
793			if (ehdr.e_type == ET_DYN) {
794				/*
795				 * Delay setting the brkbase until the first
796				 * call to brk(); see elfexec() for details.
797				 */
798				env.ex_bssbase = (caddr_t)0;
799				env.ex_brkbase = (caddr_t)0;
800				env.ex_brksize = 0;
801			}
802		}
803	}
804
805	env.ex_magic = elfmagic;
806	env.ex_vp = vp;
807	setexecenv(&env);
808
809	/*
810	 * It's time to manipulate the process aux vectors.  First
811	 * we need to update the AT_SUN_AUXFLAGS aux vector to set
812	 * the AF_SUN_NOPLM flag.
813	 */
814	if (args->to_model == DATAMODEL_NATIVE) {
815		auxv_t		auxflags_auxv;
816
817		if (copyin(args->auxp_auxflags, &auxflags_auxv,
818		    sizeof (auxflags_auxv)) != 0)
819			return (EFAULT);
820
821		ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS);
822		auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM;
823		if (copyout(&auxflags_auxv, args->auxp_auxflags,
824		    sizeof (auxflags_auxv)) != 0)
825			return (EFAULT);
826#if defined(_LP64)
827	} else {
828		auxv32_t	auxflags_auxv32;
829
830		if (copyin(args->auxp_auxflags, &auxflags_auxv32,
831		    sizeof (auxflags_auxv32)) != 0)
832			return (EFAULT);
833
834		ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS);
835		auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM;
836		if (copyout(&auxflags_auxv32, args->auxp_auxflags,
837		    sizeof (auxflags_auxv32)) != 0)
838			return (EFAULT);
839#endif /* _LP64 */
840	}
841
842	/* Second, copy out the brand specific aux vectors. */
843	if (args->to_model == DATAMODEL_NATIVE) {
844		auxv_t s10_auxv[] = {
845		    { AT_SUN_BRAND_AUX1, 0 },
846		    { AT_SUN_BRAND_AUX2, 0 },
847		    { AT_SUN_BRAND_AUX3, 0 }
848		};
849
850		ASSERT(s10_auxv[0].a_type == AT_SUN_BRAND_S10_LDDATA);
851		s10_auxv[0].a_un.a_val = sed.sed_lddata;
852
853		if (copyout(&s10_auxv, args->auxp_brand,
854		    sizeof (s10_auxv)) != 0)
855			return (EFAULT);
856#if defined(_LP64)
857	} else {
858		auxv32_t s10_auxv32[] = {
859		    { AT_SUN_BRAND_AUX1, 0 },
860		    { AT_SUN_BRAND_AUX2, 0 },
861		    { AT_SUN_BRAND_AUX3, 0 }
862		};
863
864		ASSERT(s10_auxv32[0].a_type == AT_SUN_BRAND_S10_LDDATA);
865		s10_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata;
866		if (copyout(&s10_auxv32, args->auxp_brand,
867		    sizeof (s10_auxv32)) != 0)
868			return (EFAULT);
869#endif /* _LP64 */
870	}
871
872	/*
873	 * Third, the the /proc aux vectors set up by elfexec() point to brand
874	 * emulation library and it's linker.  Copy these to the /proc brand
875	 * specific aux vector, and update the regular /proc aux vectors to
876	 * point to the executable (and it's linker).  This will enable
877	 * debuggers to access the executable via the usual /proc or elf notes
878	 * aux vectors.
879	 *
880	 * The brand emulation library's linker will get it's aux vectors off
881	 * the stack, and then update the stack with the executable's aux
882	 * vectors before jumping to the executable's linker.
883	 *
884	 * Debugging the brand emulation library must be done from
885	 * the global zone, where the librtld_db module knows how to fetch the
886	 * brand specific aux vectors to access the brand emulation libraries
887	 * linker.
888	 */
889	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
890		ulong_t val;
891
892		switch (up->u_auxv[i].a_type) {
893		case AT_SUN_BRAND_S10_LDDATA:
894			up->u_auxv[i].a_un.a_val = sed.sed_lddata;
895			continue;
896		case AT_BASE:
897			val = sedp->sed_base;
898			break;
899		case AT_ENTRY:
900			val = sedp->sed_entry;
901			break;
902		case AT_PHDR:
903			val = sedp->sed_phdr;
904			break;
905		case AT_PHENT:
906			val = sedp->sed_phent;
907			break;
908		case AT_PHNUM:
909			val = sedp->sed_phnum;
910			break;
911		case AT_SUN_LDDATA:
912			val = sedp->sed_lddata;
913			break;
914		default:
915			continue;
916		}
917
918		up->u_auxv[i].a_un.a_val = val;
919		if (val == NULL) {
920			/* Hide the entry for static binaries */
921			up->u_auxv[i].a_type = AT_IGNORE;
922		}
923	}
924
925	/*
926	 * The last thing we do here is clear spd->spd_handler.  This is
927	 * important because if we're already a branded process and if this
928	 * exec succeeds, there is a window between when the exec() first
929	 * returns to the userland of the new process and when our brand
930	 * library get's initialized, during which we don't want system
931	 * calls to be re-directed to our brand library since it hasn't
932	 * been initialized yet.
933	 */
934	spd->spd_handler = NULL;
935
936	return (0);
937}
938
939
940int
941_init(void)
942{
943	int err;
944
945	/*
946	 * Set up the table indicating which system calls we want to
947	 * interpose on.  We should probably build this automatically from
948	 * a list of system calls that is shared with the user-space
949	 * library.
950	 */
951	s10_emulation_table = kmem_zalloc(NSYSCALL, KM_SLEEP);
952	s10_emulation_table[S10_SYS_forkall] = 1;		/*   2 */
953	s10_emulation_table[S10_SYS_open] = 1;			/*   5 */
954	s10_emulation_table[S10_SYS_wait] = 1;			/*   7 */
955	s10_emulation_table[S10_SYS_creat] = 1;			/*   8 */
956	s10_emulation_table[S10_SYS_unlink] = 1;		/*  10 */
957	s10_emulation_table[S10_SYS_exec] = 1;			/*  11 */
958	s10_emulation_table[S10_SYS_chown] = 1;			/*  16 */
959	s10_emulation_table[S10_SYS_stat] = 1;			/*  18 */
960	s10_emulation_table[S10_SYS_umount] = 1;		/*  22 */
961	s10_emulation_table[S10_SYS_fstat] = 1;			/*  28 */
962	s10_emulation_table[S10_SYS_utime] = 1;			/*  30 */
963	s10_emulation_table[S10_SYS_access] = 1;		/*  33 */
964	s10_emulation_table[SYS_kill] = 1;			/*  37 */
965	s10_emulation_table[S10_SYS_dup] = 1;			/*  41 */
966	s10_emulation_table[SYS_ioctl] = 1;			/*  54 */
967	s10_emulation_table[SYS_execve] = 1;			/*  59 */
968	s10_emulation_table[SYS_acctctl] = 1;			/*  71 */
969	s10_emulation_table[S10_SYS_issetugid] = 1;		/*  75 */
970	s10_emulation_table[S10_SYS_fsat] = 1;			/*  76 */
971	s10_emulation_table[S10_SYS_rmdir] = 1;			/*  79 */
972	s10_emulation_table[SYS_getdents] = 1;			/*  81 */
973	s10_emulation_table[S10_SYS_poll] = 1;			/*  87 */
974	s10_emulation_table[S10_SYS_lstat] = 1;			/*  88 */
975	s10_emulation_table[S10_SYS_fchown] = 1;		/*  94 */
976	s10_emulation_table[SYS_sigprocmask] = 1;		/*  95 */
977	s10_emulation_table[SYS_sigsuspend] = 1;		/*  96 */
978	s10_emulation_table[SYS_sigaction] = 1;			/*  98 */
979	s10_emulation_table[SYS_sigpending] = 1;		/*  99 */
980	s10_emulation_table[SYS_context] = 1;			/* 100 */
981	s10_emulation_table[SYS_waitid] = 1;			/* 107 */
982	s10_emulation_table[SYS_sigsendsys] = 1;		/* 108 */
983#if defined(__x86)
984	s10_emulation_table[S10_SYS_xstat] = 1;			/* 123 */
985	s10_emulation_table[S10_SYS_lxstat] = 1;		/* 124 */
986	s10_emulation_table[S10_SYS_fxstat] = 1;		/* 125 */
987	s10_emulation_table[S10_SYS_xmknod] = 1;		/* 126 */
988#endif
989	s10_emulation_table[S10_SYS_lchown] = 1;		/* 130 */
990	s10_emulation_table[S10_SYS_rename] = 1;		/* 134 */
991	s10_emulation_table[SYS_uname] = 1;			/* 135 */
992	s10_emulation_table[SYS_sysconfig] = 1;			/* 137 */
993	s10_emulation_table[SYS_systeminfo] = 1;		/* 139 */
994	s10_emulation_table[S10_SYS_fork1] = 1;			/* 143 */
995	s10_emulation_table[SYS_sigtimedwait] = 1;		/* 144 */
996	s10_emulation_table[S10_SYS_lwp_sema_wait] = 1;		/* 147 */
997	s10_emulation_table[S10_SYS_utimes] = 1;		/* 154 */
998	s10_emulation_table[SYS_lwp_create] = 1;		/* 159 */
999	s10_emulation_table[SYS_lwp_kill] = 1;			/* 163 */
1000	s10_emulation_table[SYS_lwp_sigmask] = 1;		/* 165 */
1001#if defined(__amd64)
1002	s10_emulation_table[SYS_lwp_private] = 1;		/* 166 */
1003#endif	/* __amd64 */
1004	s10_emulation_table[S10_SYS_lwp_mutex_lock] = 1;	/* 169 */
1005	s10_emulation_table[SYS_pwrite] = 1;			/* 174 */
1006	s10_emulation_table[SYS_auditsys] = 1;			/* 186 */
1007	s10_emulation_table[SYS_sigqueue] = 1;			/* 190 */
1008	s10_emulation_table[SYS_signotify] = 1;			/* 205 */
1009	s10_emulation_table[SYS_lwp_mutex_timedlock] = 1;	/* 210 */
1010	s10_emulation_table[SYS_getdents64] = 1;		/* 213 */
1011	s10_emulation_table[S10_SYS_stat64] = 1;		/* 215 */
1012	s10_emulation_table[S10_SYS_lstat64] = 1;		/* 216 */
1013	s10_emulation_table[S10_SYS_fstat64] = 1;		/* 217 */
1014	s10_emulation_table[SYS_pwrite64] = 1;			/* 223 */
1015	s10_emulation_table[S10_SYS_creat64] = 1;		/* 224 */
1016	s10_emulation_table[S10_SYS_open64] = 1;		/* 225 */
1017	s10_emulation_table[SYS_zone] = 1;			/* 227 */
1018	s10_emulation_table[SYS_lwp_mutex_trylock] = 1;		/* 251 */
1019
1020	err = mod_install(&modlinkage);
1021	if (err) {
1022		cmn_err(CE_WARN, "Couldn't install brand module");
1023		kmem_free(s10_emulation_table, NSYSCALL);
1024	}
1025
1026	return (err);
1027}
1028
1029int
1030_info(struct modinfo *modinfop)
1031{
1032	return (mod_info(&modlinkage, modinfop));
1033}
1034
1035int
1036_fini(void)
1037{
1038	int err;
1039
1040	/*
1041	 * If there are any zones using this brand, we can't allow it to be
1042	 * unloaded.
1043	 */
1044	if (brand_zone_count(&s10_brand))
1045		return (EBUSY);
1046
1047	kmem_free(s10_emulation_table, NSYSCALL);
1048	s10_emulation_table = NULL;
1049
1050	err = mod_remove(&modlinkage);
1051	if (err)
1052		cmn_err(CE_WARN, "Couldn't unload s10 brand module");
1053
1054	return (err);
1055}
1056