s10_brand.c revision 11970:9c3f3660b754
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27#include <sys/errno.h> 28#include <sys/exec.h> 29#include <sys/file.h> 30#include <sys/kmem.h> 31#include <sys/modctl.h> 32#include <sys/model.h> 33#include <sys/proc.h> 34#include <sys/syscall.h> 35#include <sys/systm.h> 36#include <sys/thread.h> 37#include <sys/cmn_err.h> 38#include <sys/archsystm.h> 39#include <sys/pathname.h> 40#include <sys/sunddi.h> 41 42#include <sys/machbrand.h> 43#include <sys/brand.h> 44#include "s10_brand.h" 45 46char *s10_emulation_table = NULL; 47 48void s10_init_brand_data(zone_t *); 49void s10_free_brand_data(zone_t *); 50void s10_setbrand(proc_t *); 51int s10_getattr(zone_t *, int, void *, size_t *); 52int s10_setattr(zone_t *, int, void *, size_t); 53int s10_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t, 54 uintptr_t, uintptr_t, uintptr_t); 55void s10_copy_procdata(proc_t *, proc_t *); 56void s10_proc_exit(struct proc *, klwp_t *); 57void s10_exec(); 58int s10_initlwp(klwp_t *); 59void s10_forklwp(klwp_t *, klwp_t *); 60void s10_freelwp(klwp_t *); 61void s10_lwpexit(klwp_t *); 62int s10_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int, 63 long *, int, caddr_t, cred_t *, int); 64void s10_sigset_native_to_s10(sigset_t *); 65void s10_sigset_s10_to_native(sigset_t *); 66 67/* s10 brand */ 68struct brand_ops s10_brops = { 69 s10_init_brand_data, 70 s10_free_brand_data, 71 s10_brandsys, 72 s10_setbrand, 73 s10_getattr, 74 s10_setattr, 75 s10_copy_procdata, 76 s10_proc_exit, 77 s10_exec, 78 lwp_setrval, 79 s10_initlwp, 80 s10_forklwp, 81 s10_freelwp, 82 s10_lwpexit, 83 s10_elfexec, 84 s10_sigset_native_to_s10, 85 s10_sigset_s10_to_native, 86 S10_NSIG, 87}; 88 89#ifdef sparc 90 91struct brand_mach_ops s10_mops = { 92 s10_brand_syscall_callback, 93 s10_brand_syscall32_callback 94}; 95 96#else /* sparc */ 97 98#ifdef __amd64 99 100struct brand_mach_ops s10_mops = { 101 s10_brand_sysenter_callback, 102 NULL, 103 s10_brand_int91_callback, 104 s10_brand_syscall_callback, 105 s10_brand_syscall32_callback, 106 NULL 107}; 108 109#else /* ! __amd64 */ 110 111struct brand_mach_ops s10_mops = { 112 s10_brand_sysenter_callback, 113 NULL, 114 NULL, 115 s10_brand_syscall_callback, 116 NULL, 117 NULL 118}; 119#endif /* __amd64 */ 120 121#endif /* _sparc */ 122 123struct brand s10_brand = { 124 BRAND_VER_1, 125 "solaris10", 126 &s10_brops, 127 &s10_mops 128}; 129 130static struct modlbrand modlbrand = { 131 &mod_brandops, /* type of module */ 132 "Solaris 10 Brand", /* description of module */ 133 &s10_brand /* driver ops */ 134}; 135 136static struct modlinkage modlinkage = { 137 MODREV_1, (void *)&modlbrand, NULL 138}; 139 140void 141s10_setbrand(proc_t *p) 142{ 143 ASSERT(p->p_brand == &s10_brand); 144 ASSERT(p->p_brand_data == NULL); 145 146 /* 147 * We should only be called from exec(), when we know the process 148 * is single-threaded. 149 */ 150 ASSERT(p->p_tlist == p->p_tlist->t_forw); 151 152 p->p_brand_data = kmem_zalloc(sizeof (s10_proc_data_t), KM_SLEEP); 153 (void) s10_initlwp(p->p_tlist->t_lwp); 154} 155 156/*ARGSUSED*/ 157int 158s10_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize) 159{ 160 ASSERT(zone->zone_brand == &s10_brand); 161 if (attr == S10_EMUL_BITMAP) { 162 if (buf == NULL || *bufsize != sizeof (s10_emul_bitmap_t)) 163 return (EINVAL); 164 if (copyout(((s10_zone_data_t *)zone->zone_brand_data)-> 165 emul_bitmap, buf, sizeof (s10_emul_bitmap_t)) != 0) 166 return (EFAULT); 167 return (0); 168 } 169 170 return (EINVAL); 171} 172 173int 174s10_setattr(zone_t *zone, int attr, void *buf, size_t bufsize) 175{ 176 ASSERT(zone->zone_brand == &s10_brand); 177 if (attr == S10_EMUL_BITMAP) { 178 if (buf == NULL || bufsize != sizeof (s10_emul_bitmap_t)) 179 return (EINVAL); 180 if (copyin(buf, ((s10_zone_data_t *)zone->zone_brand_data)-> 181 emul_bitmap, sizeof (s10_emul_bitmap_t)) != 0) 182 return (EFAULT); 183 return (0); 184 } 185 186 return (EINVAL); 187} 188 189#ifdef __amd64 190/* 191 * The Nevada kernel clears %fs for threads in 64-bit x86 processes but S10's 192 * libc expects %fs to be nonzero. This causes some committed 193 * libc/libthread interfaces (e.g., thr_main()) to fail, which impacts several 194 * libraries, including libdoor. This function sets the specified LWP's %fs 195 * register to the legacy S10 selector value (LWPFS_SEL). 196 * 197 * The best solution to the aforementioned problem is backporting CRs 198 * 6467491 to Solaris 10 so that 64-bit x86 Solaris 10 processes 199 * would accept zero for %fs. Backporting the CRs is a requirement for running 200 * S10 Containers in PV domUs because 64-bit Xen clears %fsbase when %fs is 201 * nonzero. Such behavior breaks 64-bit processes because Xen has to fetch the 202 * FS segments' base addresses from the LWPs' GDTs, which are only capable of 203 * 32-bit addressing. 204 */ 205/*ARGSUSED*/ 206static void 207s10_amd64_correct_fsreg(klwp_t *l) 208{ 209 if (lwp_getdatamodel(l) == DATAMODEL_NATIVE) { 210 kpreempt_disable(); 211 l->lwp_pcb.pcb_fs = LWPFS_SEL; 212 l->lwp_pcb.pcb_rupdate = 1; 213 lwptot(l)->t_post_sys = 1; /* Guarantee update_sregs() */ 214 kpreempt_enable(); 215 } 216} 217#endif /* __amd64 */ 218 219int 220s10_native() 221{ 222 struct user *up = PTOU(curproc); 223 char *args_new, *comm_new, *p; 224 int len; 225 226 len = sizeof (S10_NATIVE_LINKER32 " ") - 1; 227 228 /* 229 * Make sure that the process' interpreter is the native dynamic linker. 230 * Convention dictates that native processes executing within solaris10- 231 * branded zones are interpreted by the native dynamic linker (the 232 * process and its arguments are specified as arguments to the dynamic 233 * linker). If this convention is violated (i.e., 234 * brandsys(B_S10_NATIVE, ...) is invoked by a process that shouldn't be 235 * native), then do nothing and silently indicate success. 236 */ 237 if (strcmp(up->u_comm, S10_LINKER_NAME) != 0) 238 return (0); 239 if (strncmp(up->u_psargs, S10_NATIVE_LINKER64 " /", len + 4) == 0) 240 len += 3; /* to account for "/64" in the path */ 241 else if (strncmp(up->u_psargs, S10_NATIVE_LINKER32 " /", len + 1) != 0) 242 return (0); 243 244 args_new = strdup(&up->u_psargs[len]); 245 if ((p = strchr(args_new, ' ')) != NULL) 246 *p = '\0'; 247 if ((comm_new = strrchr(args_new, '/')) != NULL) 248 comm_new = strdup(comm_new + 1); 249 else 250 comm_new = strdup(args_new); 251 if (p != NULL) 252 *p = ' '; 253 254 if ((strlen(args_new) != 0) && (strlen(comm_new) != 0)) { 255 mutex_enter(&curproc->p_lock); 256 (void) strlcpy(up->u_comm, comm_new, MAXCOMLEN+1); 257 (void) strlcpy(up->u_psargs, args_new, PSARGSZ); 258 mutex_exit(&curproc->p_lock); 259 } 260 261 strfree(args_new); 262 strfree(comm_new); 263 return (0); 264} 265 266/* 267 * Get the address of the user-space system call handler from the user 268 * process and attach it to the proc structure. 269 */ 270/*ARGSUSED*/ 271int 272s10_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, 273 uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6) 274{ 275 s10_proc_data_t *spd; 276 s10_brand_reg_t reg; 277 proc_t *p = curproc; 278 int err; 279 280 *rval = 0; 281 282 /* 283 * B_EXEC_BRAND is redundant 284 * since the kernel assumes a native process doing an exec 285 * in a branded zone is going to run a branded processes. 286 * hence we don't support this operation. 287 */ 288 if (cmd == B_EXEC_BRAND) 289 return (ENOSYS); 290 291 if (cmd == B_S10_NATIVE) 292 return (s10_native()); 293 294 /* For all other operations this must be a branded process. */ 295 if (p->p_brand == &native_brand) 296 return (ENOSYS); 297 298 ASSERT(p->p_brand == &s10_brand); 299 ASSERT(p->p_brand_data != NULL); 300 301 spd = (s10_proc_data_t *)p->p_brand_data; 302 303 switch (cmd) { 304 case B_EXEC_NATIVE: 305 err = exec_common( 306 (char *)arg1, (const char **)arg2, (const char **)arg3, 307 EBA_NATIVE); 308 return (err); 309 310 case B_REGISTER: 311 if (p->p_model == DATAMODEL_NATIVE) { 312 if (copyin((void *)arg1, ®, sizeof (reg)) != 0) 313 return (EFAULT); 314#if defined(_LP64) 315 } else { 316 s10_brand_reg32_t reg32; 317 318 if (copyin((void *)arg1, ®32, sizeof (reg32)) != 0) 319 return (EFAULT); 320 reg.sbr_version = reg32.sbr_version; 321 reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler; 322#endif /* _LP64 */ 323 } 324 325 if (reg.sbr_version != S10_VERSION) 326 return (ENOTSUP); 327 spd->spd_handler = reg.sbr_handler; 328 return (0); 329 330 case B_ELFDATA: 331 if (p->p_model == DATAMODEL_NATIVE) { 332 if (copyout(&spd->spd_elf_data, (void *)arg1, 333 sizeof (s10_elf_data_t)) != 0) 334 return (EFAULT); 335#if defined(_LP64) 336 } else { 337 s10_elf_data32_t sed32; 338 339 sed32.sed_phdr = spd->spd_elf_data.sed_phdr; 340 sed32.sed_phent = spd->spd_elf_data.sed_phent; 341 sed32.sed_phnum = spd->spd_elf_data.sed_phnum; 342 sed32.sed_entry = spd->spd_elf_data.sed_entry; 343 sed32.sed_base = spd->spd_elf_data.sed_base; 344 sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry; 345 sed32.sed_lddata = spd->spd_elf_data.sed_lddata; 346 if (copyout(&sed32, (void *)arg1, sizeof (sed32)) != 0) 347 return (EFAULT); 348#endif /* _LP64 */ 349 } 350 return (0); 351 352 case B_S10_PIDINFO: 353 /* 354 * The s10 brand needs to be able to get the pid of the 355 * current process and the pid of the zone's init, and it 356 * needs to do this on every process startup. Early in 357 * brand startup, we can't call getpid() because calls to 358 * getpid() represent a magical signal to some old-skool 359 * debuggers. By merging all of this into one call, we 360 * make this quite a bit cheaper and easier to handle in 361 * the brand module. 362 */ 363 if (copyout(&p->p_pid, (void *)arg1, sizeof (pid_t)) != 0) 364 return (EFAULT); 365 if (copyout(&p->p_zone->zone_proc_initpid, (void *)arg2, 366 sizeof (pid_t)) != 0) 367 return (EFAULT); 368 return (0); 369 370 case B_S10_TRUSS_POINT: 371 /* 372 * This subcommand exists so that we can see truss output 373 * from interposed system calls that return without first 374 * calling any other system call, meaning they would be 375 * invisible to truss(1). 376 * 377 * If the second argument is set non-zero, set errno to that 378 * value as well. 379 * 380 * Arguments are: 381 * 382 * arg1: syscall number 383 * arg2: errno 384 */ 385 return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2)); 386 387 case B_S10_ISFDXATTRDIR: { 388 /* 389 * This subcommand enables the userland brand emulation library 390 * to determine whether a file descriptor refers to an extended 391 * file attributes directory. There is no standard syscall or 392 * libc function that can make such a determination. 393 */ 394 file_t *dir_filep; 395 396 dir_filep = getf((int)arg1); 397 if (dir_filep == NULL) 398 return (EBADF); 399 ASSERT(dir_filep->f_vnode != NULL); 400 *rval = IS_XATTRDIR(dir_filep->f_vnode); 401 releasef((int)arg1); 402 return (0); 403 } 404 405#ifdef __amd64 406 case B_S10_FSREGCORRECTION: 407 /* 408 * This subcommand exists so that the SYS_lwp_private and 409 * SYS_lwp_create syscalls can manually set the current thread's 410 * %fs register to the legacy S10 selector value for 64-bit x86 411 * processes. 412 */ 413 s10_amd64_correct_fsreg(ttolwp(curthread)); 414 return (0); 415#endif /* __amd64 */ 416 } 417 418 return (EINVAL); 419} 420 421/* 422 * Copy the per-process brand data from a parent proc to a child. 423 */ 424void 425s10_copy_procdata(proc_t *child, proc_t *parent) 426{ 427 s10_proc_data_t *spd; 428 429 ASSERT(parent->p_brand == &s10_brand); 430 ASSERT(child->p_brand == &s10_brand); 431 ASSERT(parent->p_brand_data != NULL); 432 ASSERT(child->p_brand_data == NULL); 433 434 /* Just duplicate all the proc data of the parent for the child */ 435 spd = kmem_alloc(sizeof (s10_proc_data_t), KM_SLEEP); 436 bcopy(parent->p_brand_data, spd, sizeof (s10_proc_data_t)); 437 child->p_brand_data = spd; 438} 439 440/*ARGSUSED*/ 441void 442s10_proc_exit(struct proc *p, klwp_t *l) 443{ 444 ASSERT(p->p_brand == &s10_brand); 445 ASSERT(p->p_brand_data != NULL); 446 447 /* 448 * We should only be called from proc_exit(), when we know that 449 * process is single-threaded. 450 */ 451 ASSERT(p->p_tlist == p->p_tlist->t_forw); 452 453 /* upon exit, free our lwp brand data */ 454 (void) s10_freelwp(ttolwp(curthread)); 455 456 /* upon exit, free our proc brand data */ 457 kmem_free(p->p_brand_data, sizeof (s10_proc_data_t)); 458 p->p_brand_data = NULL; 459} 460 461void 462s10_exec() 463{ 464 s10_proc_data_t *spd = curproc->p_brand_data; 465 466 ASSERT(curproc->p_brand == &s10_brand); 467 ASSERT(curproc->p_brand_data != NULL); 468 ASSERT(ttolwp(curthread)->lwp_brand != NULL); 469 470 /* 471 * We should only be called from exec(), when we know the process 472 * is single-threaded. 473 */ 474 ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw); 475 476 /* Upon exec, reset our lwp brand data. */ 477 (void) s10_freelwp(ttolwp(curthread)); 478 (void) s10_initlwp(ttolwp(curthread)); 479 480 /* 481 * Upon exec, reset all the proc brand data, except for the elf 482 * data associated with the executable we are exec'ing. 483 */ 484 spd->spd_handler = NULL; 485} 486 487/*ARGSUSED*/ 488int 489s10_initlwp(klwp_t *l) 490{ 491 ASSERT(l->lwp_procp->p_brand == &s10_brand); 492 ASSERT(l->lwp_procp->p_brand_data != NULL); 493 ASSERT(l->lwp_brand == NULL); 494 l->lwp_brand = (void *)-1; 495 return (0); 496} 497 498/*ARGSUSED*/ 499void 500s10_forklwp(klwp_t *p, klwp_t *c) 501{ 502 ASSERT(p->lwp_procp->p_brand == &s10_brand); 503 ASSERT(c->lwp_procp->p_brand == &s10_brand); 504 505 ASSERT(p->lwp_procp->p_brand_data != NULL); 506 ASSERT(c->lwp_procp->p_brand_data != NULL); 507 508 /* Both LWPs have already had been initialized via s10_initlwp() */ 509 ASSERT(p->lwp_brand != NULL); 510 ASSERT(c->lwp_brand != NULL); 511 512#ifdef __amd64 513 /* 514 * Only correct the child's %fs register if the parent's %fs register 515 * is LWPFS_SEL. If the parent's %fs register is zero, then the Solaris 516 * 10 environment that we're emulating uses a version of libc that 517 * works when %fs is zero (i.e., it contains backports of CRs 6467491 518 * and 6501650). 519 */ 520 if (p->lwp_pcb.pcb_fs == LWPFS_SEL) 521 s10_amd64_correct_fsreg(c); 522#endif /* __amd64 */ 523} 524 525/*ARGSUSED*/ 526void 527s10_freelwp(klwp_t *l) 528{ 529 ASSERT(l->lwp_procp->p_brand == &s10_brand); 530 ASSERT(l->lwp_procp->p_brand_data != NULL); 531 ASSERT(l->lwp_brand != NULL); 532 l->lwp_brand = NULL; 533} 534 535/*ARGSUSED*/ 536void 537s10_lwpexit(klwp_t *l) 538{ 539 ASSERT(l->lwp_procp->p_brand == &s10_brand); 540 ASSERT(l->lwp_procp->p_brand_data != NULL); 541 ASSERT(l->lwp_brand != NULL); 542 543 /* 544 * We should never be called for the last thread in a process. 545 * (That case is handled by s10_proc_exit().) There for this lwp 546 * must be exiting from a multi-threaded process. 547 */ 548 ASSERT(l->lwp_procp->p_tlist != l->lwp_procp->p_tlist->t_forw); 549 550 l->lwp_brand = NULL; 551} 552 553void 554s10_free_brand_data(zone_t *zone) 555{ 556 kmem_free(zone->zone_brand_data, sizeof (s10_zone_data_t)); 557} 558 559void 560s10_init_brand_data(zone_t *zone) 561{ 562 ASSERT(zone->zone_brand == &s10_brand); 563 ASSERT(zone->zone_brand_data == NULL); 564 zone->zone_brand_data = kmem_zalloc(sizeof (s10_zone_data_t), KM_SLEEP); 565} 566 567#if defined(_LP64) 568static void 569Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst) 570{ 571 bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident)); 572 dst->e_type = src->e_type; 573 dst->e_machine = src->e_machine; 574 dst->e_version = src->e_version; 575 dst->e_entry = src->e_entry; 576 dst->e_phoff = src->e_phoff; 577 dst->e_shoff = src->e_shoff; 578 dst->e_flags = src->e_flags; 579 dst->e_ehsize = src->e_ehsize; 580 dst->e_phentsize = src->e_phentsize; 581 dst->e_phnum = src->e_phnum; 582 dst->e_shentsize = src->e_shentsize; 583 dst->e_shnum = src->e_shnum; 584 dst->e_shstrndx = src->e_shstrndx; 585} 586#endif /* _LP64 */ 587 588int 589s10_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, 590 int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred, 591 int brand_action) 592{ 593 vnode_t *nvp; 594 Ehdr ehdr; 595 Addr uphdr_vaddr; 596 intptr_t voffset; 597 int interp; 598 int i, err; 599 struct execenv env; 600 struct user *up = PTOU(curproc); 601 s10_proc_data_t *spd; 602 s10_elf_data_t sed, *sedp; 603 char *linker; 604 uintptr_t lddata; /* lddata of executable's linker */ 605 606 ASSERT(curproc->p_brand == &s10_brand); 607 ASSERT(curproc->p_brand_data != NULL); 608 609 spd = (s10_proc_data_t *)curproc->p_brand_data; 610 sedp = &spd->spd_elf_data; 611 612 args->brandname = S10_BRANDNAME; 613 614 /* 615 * We will exec the brand library and then map in the target 616 * application and (optionally) the brand's default linker. 617 */ 618 if (args->to_model == DATAMODEL_NATIVE) { 619 args->emulator = S10_LIB; 620 linker = S10_LINKER; 621#if defined(_LP64) 622 } else { 623 args->emulator = S10_LIB32; 624 linker = S10_LINKER32; 625#endif /* _LP64 */ 626 } 627 628 if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW, NULLVPP, 629 &nvp)) != 0) { 630 uprintf("%s: not found.", args->emulator); 631 return (err); 632 } 633 634 if (args->to_model == DATAMODEL_NATIVE) { 635 err = elfexec(nvp, uap, args, idatap, level + 1, execsz, 636 setid, exec_file, cred, brand_action); 637#if defined(_LP64) 638 } else { 639 err = elf32exec(nvp, uap, args, idatap, level + 1, execsz, 640 setid, exec_file, cred, brand_action); 641#endif /* _LP64 */ 642 } 643 VN_RELE(nvp); 644 if (err != 0) 645 return (err); 646 647 /* 648 * The u_auxv vectors are set up by elfexec to point to the brand 649 * emulation library and linker. Save these so they can be copied to 650 * the specific brand aux vectors. 651 */ 652 bzero(&sed, sizeof (sed)); 653 for (i = 0; i < __KERN_NAUXV_IMPL; i++) { 654 switch (up->u_auxv[i].a_type) { 655 case AT_SUN_LDDATA: 656 sed.sed_lddata = up->u_auxv[i].a_un.a_val; 657 break; 658 case AT_BASE: 659 sed.sed_base = up->u_auxv[i].a_un.a_val; 660 break; 661 case AT_ENTRY: 662 sed.sed_entry = up->u_auxv[i].a_un.a_val; 663 break; 664 case AT_PHDR: 665 sed.sed_phdr = up->u_auxv[i].a_un.a_val; 666 break; 667 case AT_PHENT: 668 sed.sed_phent = up->u_auxv[i].a_un.a_val; 669 break; 670 case AT_PHNUM: 671 sed.sed_phnum = up->u_auxv[i].a_un.a_val; 672 break; 673 default: 674 break; 675 } 676 } 677 /* Make sure the emulator has an entry point */ 678 ASSERT(sed.sed_entry != NULL); 679 ASSERT(sed.sed_phdr != NULL); 680 681 bzero(&env, sizeof (env)); 682 if (args->to_model == DATAMODEL_NATIVE) { 683 err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset, 684 exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase, 685 &env.ex_brksize, NULL); 686#if defined(_LP64) 687 } else { 688 Elf32_Ehdr ehdr32; 689 Elf32_Addr uphdr_vaddr32; 690 err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32, 691 &voffset, exec_file, &interp, &env.ex_bssbase, 692 &env.ex_brkbase, &env.ex_brksize, NULL); 693 Ehdr32to64(&ehdr32, &ehdr); 694 if (uphdr_vaddr32 == (Elf32_Addr)-1) 695 uphdr_vaddr = (Addr)-1; 696 else 697 uphdr_vaddr = uphdr_vaddr32; 698#endif /* _LP64 */ 699 } 700 if (err != 0) 701 return (err); 702 703 /* 704 * Save off the important properties of the executable. The brand 705 * library will ask us for this data later, when it is initializing 706 * and getting ready to transfer control to the brand application. 707 */ 708 if (uphdr_vaddr == (Addr)-1) 709 sedp->sed_phdr = voffset + ehdr.e_phoff; 710 else 711 sedp->sed_phdr = voffset + uphdr_vaddr; 712 sedp->sed_entry = voffset + ehdr.e_entry; 713 sedp->sed_phent = ehdr.e_phentsize; 714 sedp->sed_phnum = ehdr.e_phnum; 715 716 if (interp) { 717 if (ehdr.e_type == ET_DYN) { 718 /* 719 * This is a shared object executable, so we need to 720 * pick a reasonable place to put the heap. Just don't 721 * use the first page. 722 */ 723 env.ex_brkbase = (caddr_t)PAGESIZE; 724 env.ex_bssbase = (caddr_t)PAGESIZE; 725 } 726 727 /* 728 * If the program needs an interpreter (most do), map it in and 729 * store relevant information about it in the aux vector, where 730 * the brand library can find it. 731 */ 732 if ((err = lookupname(linker, UIO_SYSSPACE, 733 FOLLOW, NULLVPP, &nvp)) != 0) { 734 uprintf("%s: not found.", S10_LINKER); 735 return (err); 736 } 737 if (args->to_model == DATAMODEL_NATIVE) { 738 err = mapexec_brand(nvp, args, &ehdr, 739 &uphdr_vaddr, &voffset, exec_file, &interp, 740 NULL, NULL, NULL, &lddata); 741#if defined(_LP64) 742 } else { 743 Elf32_Ehdr ehdr32; 744 Elf32_Addr uphdr_vaddr32; 745 err = mapexec32_brand(nvp, args, &ehdr32, 746 &uphdr_vaddr32, &voffset, exec_file, &interp, 747 NULL, NULL, NULL, &lddata); 748 Ehdr32to64(&ehdr32, &ehdr); 749 if (uphdr_vaddr32 == (Elf32_Addr)-1) 750 uphdr_vaddr = (Addr)-1; 751 else 752 uphdr_vaddr = uphdr_vaddr32; 753#endif /* _LP64 */ 754 } 755 VN_RELE(nvp); 756 if (err != 0) 757 return (err); 758 759 /* 760 * Now that we know the base address of the brand's linker, 761 * place it in the aux vector. 762 */ 763 sedp->sed_base = voffset; 764 sedp->sed_ldentry = voffset + ehdr.e_entry; 765 sedp->sed_lddata = voffset + lddata; 766 } else { 767 /* 768 * This program has no interpreter. The brand library will 769 * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector, 770 * so in this case, put the entry point of the main executable 771 * there. 772 */ 773 if (ehdr.e_type == ET_EXEC) { 774 /* 775 * An executable with no interpreter, this must be a 776 * statically linked executable, which means we loaded 777 * it at the address specified in the elf header, in 778 * which case the e_entry field of the elf header is an 779 * absolute address. 780 */ 781 sedp->sed_ldentry = ehdr.e_entry; 782 sedp->sed_entry = ehdr.e_entry; 783 sedp->sed_lddata = NULL; 784 sedp->sed_base = NULL; 785 } else { 786 /* 787 * A shared object with no interpreter, we use the 788 * calculated address from above. 789 */ 790 sedp->sed_ldentry = sedp->sed_entry; 791 sedp->sed_entry = NULL; 792 sedp->sed_phdr = NULL; 793 sedp->sed_phent = NULL; 794 sedp->sed_phnum = NULL; 795 sedp->sed_lddata = NULL; 796 sedp->sed_base = voffset; 797 798 if (ehdr.e_type == ET_DYN) { 799 /* 800 * Delay setting the brkbase until the first 801 * call to brk(); see elfexec() for details. 802 */ 803 env.ex_bssbase = (caddr_t)0; 804 env.ex_brkbase = (caddr_t)0; 805 env.ex_brksize = 0; 806 } 807 } 808 } 809 810 env.ex_magic = elfmagic; 811 env.ex_vp = vp; 812 setexecenv(&env); 813 814 /* 815 * It's time to manipulate the process aux vectors. First 816 * we need to update the AT_SUN_AUXFLAGS aux vector to set 817 * the AF_SUN_NOPLM flag. 818 */ 819 if (args->to_model == DATAMODEL_NATIVE) { 820 auxv_t auxflags_auxv; 821 822 if (copyin(args->auxp_auxflags, &auxflags_auxv, 823 sizeof (auxflags_auxv)) != 0) 824 return (EFAULT); 825 826 ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS); 827 auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM; 828 if (copyout(&auxflags_auxv, args->auxp_auxflags, 829 sizeof (auxflags_auxv)) != 0) 830 return (EFAULT); 831#if defined(_LP64) 832 } else { 833 auxv32_t auxflags_auxv32; 834 835 if (copyin(args->auxp_auxflags, &auxflags_auxv32, 836 sizeof (auxflags_auxv32)) != 0) 837 return (EFAULT); 838 839 ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS); 840 auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM; 841 if (copyout(&auxflags_auxv32, args->auxp_auxflags, 842 sizeof (auxflags_auxv32)) != 0) 843 return (EFAULT); 844#endif /* _LP64 */ 845 } 846 847 /* Second, copy out the brand specific aux vectors. */ 848 if (args->to_model == DATAMODEL_NATIVE) { 849 auxv_t s10_auxv[] = { 850 { AT_SUN_BRAND_AUX1, 0 }, 851 { AT_SUN_BRAND_AUX2, 0 }, 852 { AT_SUN_BRAND_AUX3, 0 } 853 }; 854 855 ASSERT(s10_auxv[0].a_type == AT_SUN_BRAND_S10_LDDATA); 856 s10_auxv[0].a_un.a_val = sed.sed_lddata; 857 858 if (copyout(&s10_auxv, args->auxp_brand, 859 sizeof (s10_auxv)) != 0) 860 return (EFAULT); 861#if defined(_LP64) 862 } else { 863 auxv32_t s10_auxv32[] = { 864 { AT_SUN_BRAND_AUX1, 0 }, 865 { AT_SUN_BRAND_AUX2, 0 }, 866 { AT_SUN_BRAND_AUX3, 0 } 867 }; 868 869 ASSERT(s10_auxv32[0].a_type == AT_SUN_BRAND_S10_LDDATA); 870 s10_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata; 871 if (copyout(&s10_auxv32, args->auxp_brand, 872 sizeof (s10_auxv32)) != 0) 873 return (EFAULT); 874#endif /* _LP64 */ 875 } 876 877 /* 878 * Third, the the /proc aux vectors set up by elfexec() point to brand 879 * emulation library and it's linker. Copy these to the /proc brand 880 * specific aux vector, and update the regular /proc aux vectors to 881 * point to the executable (and it's linker). This will enable 882 * debuggers to access the executable via the usual /proc or elf notes 883 * aux vectors. 884 * 885 * The brand emulation library's linker will get it's aux vectors off 886 * the stack, and then update the stack with the executable's aux 887 * vectors before jumping to the executable's linker. 888 * 889 * Debugging the brand emulation library must be done from 890 * the global zone, where the librtld_db module knows how to fetch the 891 * brand specific aux vectors to access the brand emulation libraries 892 * linker. 893 */ 894 for (i = 0; i < __KERN_NAUXV_IMPL; i++) { 895 ulong_t val; 896 897 switch (up->u_auxv[i].a_type) { 898 case AT_SUN_BRAND_S10_LDDATA: 899 up->u_auxv[i].a_un.a_val = sed.sed_lddata; 900 continue; 901 case AT_BASE: 902 val = sedp->sed_base; 903 break; 904 case AT_ENTRY: 905 val = sedp->sed_entry; 906 break; 907 case AT_PHDR: 908 val = sedp->sed_phdr; 909 break; 910 case AT_PHENT: 911 val = sedp->sed_phent; 912 break; 913 case AT_PHNUM: 914 val = sedp->sed_phnum; 915 break; 916 case AT_SUN_LDDATA: 917 val = sedp->sed_lddata; 918 break; 919 default: 920 continue; 921 } 922 923 up->u_auxv[i].a_un.a_val = val; 924 if (val == NULL) { 925 /* Hide the entry for static binaries */ 926 up->u_auxv[i].a_type = AT_IGNORE; 927 } 928 } 929 930 /* 931 * The last thing we do here is clear spd->spd_handler. This is 932 * important because if we're already a branded process and if this 933 * exec succeeds, there is a window between when the exec() first 934 * returns to the userland of the new process and when our brand 935 * library get's initialized, during which we don't want system 936 * calls to be re-directed to our brand library since it hasn't 937 * been initialized yet. 938 */ 939 spd->spd_handler = NULL; 940 941 return (0); 942} 943 944void 945s10_sigset_native_to_s10(sigset_t *set) 946{ 947 int nativesig; 948 int s10sig; 949 sigset_t s10set; 950 951 /* 952 * Shortcut: we know the first 32 signals are the same in both 953 * s10 and native Solaris. Just assign the first word. 954 */ 955 s10set.__sigbits[0] = set->__sigbits[0]; 956 s10set.__sigbits[1] = 0; 957 s10set.__sigbits[2] = 0; 958 s10set.__sigbits[3] = 0; 959 960 /* 961 * Copy the remainder of the initial set of common signals. 962 */ 963 for (nativesig = 33; nativesig < S10_SIGRTMIN; nativesig++) 964 if (sigismember(set, nativesig)) 965 sigaddset(&s10set, nativesig); 966 967 /* 968 * Convert any native RT signals to their S10 values. 969 */ 970 for (nativesig = _SIGRTMIN, s10sig = S10_SIGRTMIN; 971 nativesig <= _SIGRTMAX && s10sig <= S10_SIGRTMAX; 972 nativesig++, s10sig++) { 973 if (sigismember(set, nativesig)) 974 sigaddset(&s10set, s10sig); 975 } 976 977 *set = s10set; 978} 979 980void 981s10_sigset_s10_to_native(sigset_t *set) 982{ 983 int s10sig; 984 int nativesig; 985 sigset_t nativeset; 986 987 /* 988 * Shortcut: we know the first 32 signals are the same in both 989 * s10 and native Solaris. Just assign the first word. 990 */ 991 nativeset.__sigbits[0] = set->__sigbits[0]; 992 nativeset.__sigbits[1] = 0; 993 nativeset.__sigbits[2] = 0; 994 nativeset.__sigbits[3] = 0; 995 996 /* 997 * Copy the remainder of the initial set of common signals. 998 */ 999 for (s10sig = 33; s10sig < S10_SIGRTMIN; s10sig++) 1000 if (sigismember(set, s10sig)) 1001 sigaddset(&nativeset, s10sig); 1002 1003 /* 1004 * Convert any S10 RT signals to their native values. 1005 */ 1006 for (s10sig = S10_SIGRTMIN, nativesig = _SIGRTMIN; 1007 s10sig <= S10_SIGRTMAX && nativesig <= _SIGRTMAX; 1008 s10sig++, nativesig++) { 1009 if (sigismember(set, s10sig)) 1010 sigaddset(&nativeset, nativesig); 1011 } 1012 1013 *set = nativeset; 1014} 1015 1016int 1017_init(void) 1018{ 1019 int err; 1020 1021 /* 1022 * Set up the table indicating which system calls we want to 1023 * interpose on. We should probably build this automatically from 1024 * a list of system calls that is shared with the user-space 1025 * library. 1026 */ 1027 s10_emulation_table = kmem_zalloc(NSYSCALL, KM_SLEEP); 1028 s10_emulation_table[S10_SYS_forkall] = 1; /* 2 */ 1029 s10_emulation_table[S10_SYS_open] = 1; /* 5 */ 1030 s10_emulation_table[S10_SYS_wait] = 1; /* 7 */ 1031 s10_emulation_table[S10_SYS_creat] = 1; /* 8 */ 1032 s10_emulation_table[S10_SYS_unlink] = 1; /* 10 */ 1033 s10_emulation_table[S10_SYS_exec] = 1; /* 11 */ 1034 s10_emulation_table[S10_SYS_chown] = 1; /* 16 */ 1035 s10_emulation_table[S10_SYS_stat] = 1; /* 18 */ 1036 s10_emulation_table[S10_SYS_umount] = 1; /* 22 */ 1037 s10_emulation_table[S10_SYS_fstat] = 1; /* 28 */ 1038 s10_emulation_table[S10_SYS_utime] = 1; /* 30 */ 1039 s10_emulation_table[S10_SYS_access] = 1; /* 33 */ 1040 s10_emulation_table[SYS_kill] = 1; /* 37 */ 1041 s10_emulation_table[S10_SYS_dup] = 1; /* 41 */ 1042 s10_emulation_table[SYS_ioctl] = 1; /* 54 */ 1043 s10_emulation_table[SYS_execve] = 1; /* 59 */ 1044 s10_emulation_table[SYS_acctctl] = 1; /* 71 */ 1045 s10_emulation_table[S10_SYS_issetugid] = 1; /* 75 */ 1046 s10_emulation_table[S10_SYS_fsat] = 1; /* 76 */ 1047 s10_emulation_table[S10_SYS_rmdir] = 1; /* 79 */ 1048 s10_emulation_table[SYS_getdents] = 1; /* 81 */ 1049 s10_emulation_table[S10_SYS_poll] = 1; /* 87 */ 1050 s10_emulation_table[S10_SYS_lstat] = 1; /* 88 */ 1051 s10_emulation_table[S10_SYS_fchown] = 1; /* 94 */ 1052 s10_emulation_table[SYS_sigprocmask] = 1; /* 95 */ 1053 s10_emulation_table[SYS_sigsuspend] = 1; /* 96 */ 1054 s10_emulation_table[SYS_sigaction] = 1; /* 98 */ 1055 s10_emulation_table[SYS_sigpending] = 1; /* 99 */ 1056 s10_emulation_table[SYS_waitid] = 1; /* 107 */ 1057 s10_emulation_table[SYS_sigsendsys] = 1; /* 108 */ 1058#if defined(__x86) 1059 s10_emulation_table[S10_SYS_xstat] = 1; /* 123 */ 1060 s10_emulation_table[S10_SYS_lxstat] = 1; /* 124 */ 1061 s10_emulation_table[S10_SYS_fxstat] = 1; /* 125 */ 1062 s10_emulation_table[S10_SYS_xmknod] = 1; /* 126 */ 1063#endif 1064 s10_emulation_table[S10_SYS_lchown] = 1; /* 130 */ 1065 s10_emulation_table[S10_SYS_rename] = 1; /* 134 */ 1066 s10_emulation_table[SYS_uname] = 1; /* 135 */ 1067 s10_emulation_table[SYS_sysconfig] = 1; /* 137 */ 1068 s10_emulation_table[SYS_systeminfo] = 1; /* 139 */ 1069 s10_emulation_table[S10_SYS_fork1] = 1; /* 143 */ 1070 s10_emulation_table[SYS_sigtimedwait] = 1; /* 144 */ 1071 s10_emulation_table[S10_SYS_lwp_sema_wait] = 1; /* 147 */ 1072 s10_emulation_table[S10_SYS_utimes] = 1; /* 154 */ 1073 s10_emulation_table[SYS_lwp_create] = 1; /* 159 */ 1074 s10_emulation_table[SYS_lwp_kill] = 1; /* 163 */ 1075 s10_emulation_table[SYS_lwp_sigmask] = 1; /* 165 */ 1076#if defined(__amd64) 1077 s10_emulation_table[SYS_lwp_private] = 1; /* 166 */ 1078#endif /* __amd64 */ 1079 s10_emulation_table[S10_SYS_lwp_mutex_lock] = 1; /* 169 */ 1080 s10_emulation_table[SYS_pwrite] = 1; /* 174 */ 1081 s10_emulation_table[SYS_auditsys] = 1; /* 186 */ 1082 s10_emulation_table[SYS_sigqueue] = 1; /* 190 */ 1083 s10_emulation_table[SYS_signotify] = 1; /* 205 */ 1084 s10_emulation_table[SYS_lwp_mutex_timedlock] = 1; /* 210 */ 1085 s10_emulation_table[SYS_getdents64] = 1; /* 213 */ 1086 s10_emulation_table[S10_SYS_stat64] = 1; /* 215 */ 1087 s10_emulation_table[S10_SYS_lstat64] = 1; /* 216 */ 1088 s10_emulation_table[S10_SYS_fstat64] = 1; /* 217 */ 1089 s10_emulation_table[SYS_pwrite64] = 1; /* 223 */ 1090 s10_emulation_table[S10_SYS_creat64] = 1; /* 224 */ 1091 s10_emulation_table[S10_SYS_open64] = 1; /* 225 */ 1092 s10_emulation_table[SYS_zone] = 1; /* 227 */ 1093 s10_emulation_table[SYS_lwp_mutex_trylock] = 1; /* 251 */ 1094 1095 err = mod_install(&modlinkage); 1096 if (err) { 1097 cmn_err(CE_WARN, "Couldn't install brand module"); 1098 kmem_free(s10_emulation_table, NSYSCALL); 1099 } 1100 1101 return (err); 1102} 1103 1104int 1105_info(struct modinfo *modinfop) 1106{ 1107 return (mod_info(&modlinkage, modinfop)); 1108} 1109 1110int 1111_fini(void) 1112{ 1113 int err; 1114 1115 /* 1116 * If there are any zones using this brand, we can't allow it to be 1117 * unloaded. 1118 */ 1119 if (brand_zone_count(&s10_brand)) 1120 return (EBUSY); 1121 1122 kmem_free(s10_emulation_table, NSYSCALL); 1123 s10_emulation_table = NULL; 1124 1125 err = mod_remove(&modlinkage); 1126 if (err) 1127 cmn_err(CE_WARN, "Couldn't unload s10 brand module"); 1128 1129 return (err); 1130} 1131