s10_brand.c revision 11315:92ff2a8d2f86
1226586Sdim/* 2226586Sdim * CDDL HEADER START 3226586Sdim * 4226586Sdim * The contents of this file are subject to the terms of the 5226586Sdim * Common Development and Distribution License (the "License"). 6226586Sdim * You may not use this file except in compliance with the License. 7226586Sdim * 8226586Sdim * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9226586Sdim * or http://www.opensolaris.org/os/licensing. 10226586Sdim * See the License for the specific language governing permissions 11226586Sdim * and limitations under the License. 12226586Sdim * 13226586Sdim * When distributing Covered Code, include this CDDL HEADER in each 14239462Sdim * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15243830Sdim * If applicable, add the following below this CDDL HEADER, with the 16226586Sdim * fields enclosed by brackets "[]" replaced with your own identifying 17243830Sdim * information: Portions Copyright [yyyy] [name of copyright owner] 18239462Sdim * 19239462Sdim * CDDL HEADER END 20226586Sdim */ 21226586Sdim/* 22226586Sdim * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23226586Sdim * Use is subject to license terms. 24226586Sdim */ 25239462Sdim 26226586Sdim#include <sys/errno.h> 27226586Sdim#include <sys/exec.h> 28226586Sdim#include <sys/file.h> 29226586Sdim#include <sys/kmem.h> 30226586Sdim#include <sys/modctl.h> 31226586Sdim#include <sys/model.h> 32226586Sdim#include <sys/proc.h> 33226586Sdim#include <sys/syscall.h> 34226586Sdim#include <sys/systm.h> 35226586Sdim#include <sys/thread.h> 36243830Sdim#include <sys/cmn_err.h> 37243830Sdim#include <sys/archsystm.h> 38243830Sdim#include <sys/pathname.h> 39226586Sdim#include <sys/sunddi.h> 40226586Sdim 41226586Sdim#include <sys/machbrand.h> 42226586Sdim#include <sys/brand.h> 43226586Sdim#include "s10_brand.h" 44226586Sdim 45226586Sdimchar *s10_emulation_table = NULL; 46239462Sdim 47226586Sdimvoid s10_init_brand_data(zone_t *); 48239462Sdimvoid s10_free_brand_data(zone_t *); 49226586Sdimvoid s10_setbrand(proc_t *); 50226586Sdimint s10_getattr(zone_t *, int, void *, size_t *); 51226586Sdimint s10_setattr(zone_t *, int, void *, size_t); 52226586Sdimint s10_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t, 53239462Sdim uintptr_t, uintptr_t, uintptr_t); 54239462Sdimvoid s10_copy_procdata(proc_t *, proc_t *); 55239462Sdimvoid s10_proc_exit(struct proc *, klwp_t *); 56239462Sdimvoid s10_exec(); 57239462Sdimint s10_initlwp(klwp_t *); 58239462Sdimvoid s10_forklwp(klwp_t *, klwp_t *); 59239462Sdimvoid s10_freelwp(klwp_t *); 60239462Sdimvoid s10_lwpexit(klwp_t *); 61239462Sdimint s10_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int, 62239462Sdim long *, int, caddr_t, cred_t *, int); 63239462Sdim 64251662Sdim/* s10 brand */ 65251662Sdimstruct brand_ops s10_brops = { 66251662Sdim s10_init_brand_data, 67251662Sdim s10_free_brand_data, 68251662Sdim s10_brandsys, 69251662Sdim s10_setbrand, 70251662Sdim s10_getattr, 71251662Sdim s10_setattr, 72251662Sdim s10_copy_procdata, 73251662Sdim s10_proc_exit, 74251662Sdim s10_exec, 75251662Sdim lwp_setrval, 76226586Sdim s10_initlwp, 77226586Sdim s10_forklwp, 78239462Sdim s10_freelwp, 79239462Sdim s10_lwpexit, 80239462Sdim s10_elfexec 81226586Sdim}; 82239462Sdim 83226586Sdim#ifdef sparc 84239462Sdim 85226586Sdimstruct brand_mach_ops s10_mops = { 86239462Sdim s10_brand_syscall_callback, 87239462Sdim s10_brand_syscall32_callback 88239462Sdim}; 89239462Sdim 90239462Sdim#else /* sparc */ 91239462Sdim 92239462Sdim#ifdef __amd64 93239462Sdim 94239462Sdimstruct brand_mach_ops s10_mops = { 95239462Sdim s10_brand_sysenter_callback, 96239462Sdim NULL, 97239462Sdim s10_brand_int91_callback, 98239462Sdim s10_brand_syscall_callback, 99239462Sdim s10_brand_syscall32_callback, 100239462Sdim NULL 101239462Sdim}; 102239462Sdim 103239462Sdim#else /* ! __amd64 */ 104239462Sdim 105239462Sdimstruct brand_mach_ops s10_mops = { 106239462Sdim s10_brand_sysenter_callback, 107239462Sdim NULL, 108239462Sdim NULL, 109239462Sdim s10_brand_syscall_callback, 110239462Sdim NULL, 111243830Sdim NULL 112226586Sdim}; 113243830Sdim#endif /* __amd64 */ 114226586Sdim 115243830Sdim#endif /* _sparc */ 116226586Sdim 117226586Sdimstruct brand s10_brand = { 118226586Sdim BRAND_VER_1, 119243830Sdim "solaris10", 120226586Sdim &s10_brops, 121226586Sdim &s10_mops 122226586Sdim}; 123226586Sdim 124226586Sdimstatic struct modlbrand modlbrand = { 125226586Sdim &mod_brandops, /* type of module */ 126226586Sdim "Solaris 10 Brand", /* description of module */ 127243830Sdim &s10_brand /* driver ops */ 128226586Sdim}; 129243830Sdim 130243830Sdimstatic struct modlinkage modlinkage = { 131243830Sdim MODREV_1, (void *)&modlbrand, NULL 132243830Sdim}; 133243830Sdim 134243830Sdimvoid 135243830Sdims10_setbrand(proc_t *p) 136243830Sdim{ 137239462Sdim ASSERT(p->p_brand == &s10_brand); 138239462Sdim ASSERT(p->p_brand_data == NULL); 139226586Sdim 140239462Sdim /* 141239462Sdim * We should only be called from exec(), when we know the process 142239462Sdim * is single-threaded. 143239462Sdim */ 144239462Sdim ASSERT(p->p_tlist == p->p_tlist->t_forw); 145239462Sdim 146239462Sdim p->p_brand_data = kmem_zalloc(sizeof (s10_proc_data_t), KM_SLEEP); 147239462Sdim (void) s10_initlwp(p->p_tlist->t_lwp); 148239462Sdim} 149239462Sdim 150239462Sdim/*ARGSUSED*/ 151239462Sdimint 152239462Sdims10_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize) 153239462Sdim{ 154239462Sdim ASSERT(zone->zone_brand == &s10_brand); 155239462Sdim if (attr == S10_EMUL_BITMAP) { 156239462Sdim if (buf == NULL || *bufsize != sizeof (s10_emul_bitmap_t)) 157239462Sdim return (EINVAL); 158239462Sdim if (copyout(((s10_zone_data_t *)zone->zone_brand_data)-> 159239462Sdim emul_bitmap, buf, sizeof (s10_emul_bitmap_t)) != 0) 160239462Sdim return (EFAULT); 161239462Sdim return (0); 162239462Sdim } 163239462Sdim 164239462Sdim return (EINVAL); 165239462Sdim} 166239462Sdim 167239462Sdimint 168239462Sdims10_setattr(zone_t *zone, int attr, void *buf, size_t bufsize) 169239462Sdim{ 170239462Sdim ASSERT(zone->zone_brand == &s10_brand); 171239462Sdim if (attr == S10_EMUL_BITMAP) { 172239462Sdim if (buf == NULL || bufsize != sizeof (s10_emul_bitmap_t)) 173239462Sdim return (EINVAL); 174239462Sdim if (copyin(buf, ((s10_zone_data_t *)zone->zone_brand_data)-> 175243830Sdim emul_bitmap, sizeof (s10_emul_bitmap_t)) != 0) 176243830Sdim return (EFAULT); 177239462Sdim return (0); 178239462Sdim } 179239462Sdim 180239462Sdim return (EINVAL); 181239462Sdim} 182239462Sdim 183239462Sdim#ifdef __amd64 184239462Sdim/* 185239462Sdim * The Nevada kernel clears %fs for threads in 64-bit x86 processes but S10's 186239462Sdim * libc expects %fs to be nonzero. This causes some committed 187239462Sdim * libc/libthread interfaces (e.g., thr_main()) to fail, which impacts several 188239462Sdim * libraries, including libdoor. This function sets the specified LWP's %fs 189239462Sdim * register to the legacy S10 selector value (LWPFS_SEL). 190239462Sdim * 191239462Sdim * The best solution to the aforementioned problem is backporting CRs 192239462Sdim * 6467491 to Solaris 10 so that 64-bit x86 Solaris 10 processes 193239462Sdim * would accept zero for %fs. Backporting the CRs is a requirement for running 194239462Sdim * S10 Containers in PV domUs because 64-bit Xen clears %fsbase when %fs is 195243830Sdim * nonzero. Such behavior breaks 64-bit processes because Xen has to fetch the 196243830Sdim * FS segments' base addresses from the LWPs' GDTs, which are only capable of 197243830Sdim * 32-bit addressing. 198243830Sdim */ 199243830Sdim/*ARGSUSED*/ 200243830Sdimstatic void 201239462Sdims10_amd64_correct_fsreg(klwp_t *l) 202239462Sdim{ 203226586Sdim if (lwp_getdatamodel(l) == DATAMODEL_NATIVE) { 204226586Sdim kpreempt_disable(); 205226586Sdim l->lwp_pcb.pcb_fs = LWPFS_SEL; 206234353Sdim l->lwp_pcb.pcb_rupdate = 1; 207239462Sdim lwptot(l)->t_post_sys = 1; /* Guarantee update_sregs() */ 208243830Sdim kpreempt_enable(); 209243830Sdim } 210239462Sdim} 211243830Sdim#endif /* __amd64 */ 212239462Sdim 213243830Sdimint 214226586Sdims10_native() 215243830Sdim{ 216243830Sdim struct user *up = PTOU(curproc); 217243830Sdim char *args_new, *comm_new, *p; 218243830Sdim int len; 219226586Sdim 220243830Sdim len = sizeof (S10_NATIVE_LINKER32 " ") - 1; 221243830Sdim 222243830Sdim /* 223243830Sdim * Make sure that the process' interpreter is the native dynamic linker. 224243830Sdim * Convention dictates that native processes executing within solaris10- 225243830Sdim * branded zones are interpreted by the native dynamic linker (the 226243830Sdim * process and its arguments are specified as arguments to the dynamic 227243830Sdim * linker). If this convention is violated (i.e., 228243830Sdim * brandsys(B_S10_NATIVE, ...) is invoked by a process that shouldn't be 229243830Sdim * native), then do nothing and silently indicate success. 230243830Sdim */ 231243830Sdim if (strcmp(up->u_comm, S10_LINKER_NAME) != 0) 232243830Sdim return (0); 233243830Sdim if (strncmp(up->u_psargs, S10_NATIVE_LINKER64 " /", len + 4) == 0) 234243830Sdim len += 3; /* to account for "/64" in the path */ 235243830Sdim else if (strncmp(up->u_psargs, S10_NATIVE_LINKER32 " /", len + 1) != 0) 236243830Sdim return (0); 237243830Sdim 238243830Sdim args_new = strdup(&up->u_psargs[len]); 239226586Sdim if ((p = strchr(args_new, ' ')) != NULL) 240239462Sdim *p = '\0'; 241226586Sdim if ((comm_new = strrchr(args_new, '/')) != NULL) 242226586Sdim comm_new = strdup(comm_new + 1); 243226586Sdim else 244226586Sdim comm_new = strdup(args_new); 245226586Sdim if (p != NULL) 246226586Sdim *p = ' '; 247226586Sdim 248226586Sdim if ((strlen(args_new) != 0) && (strlen(comm_new) != 0)) { 249226586Sdim mutex_enter(&curproc->p_lock); 250226586Sdim (void) strlcpy(up->u_comm, comm_new, MAXCOMLEN+1); 251243830Sdim (void) strlcpy(up->u_psargs, args_new, PSARGSZ); 252243830Sdim mutex_exit(&curproc->p_lock); 253243830Sdim } 254239462Sdim 255243830Sdim strfree(args_new); 256243830Sdim strfree(comm_new); 257243830Sdim return (0); 258243830Sdim} 259243830Sdim 260243830Sdim/* 261243830Sdim * Get the address of the user-space system call handler from the user 262243830Sdim * process and attach it to the proc structure. 263243830Sdim */ 264243830Sdim/*ARGSUSED*/ 265239462Sdimint 266239462Sdims10_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, 267226586Sdim uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6) 268226586Sdim{ 269226586Sdim s10_proc_data_t *spd; 270226586Sdim s10_brand_reg_t reg; 271226586Sdim proc_t *p = curproc; 272226586Sdim int err; 273226586Sdim 274 *rval = 0; 275 276 /* 277 * B_EXEC_BRAND is redundant 278 * since the kernel assumes a native process doing an exec 279 * in a branded zone is going to run a branded processes. 280 * hence we don't support this operation. 281 */ 282 if (cmd == B_EXEC_BRAND) 283 return (ENOSYS); 284 285 if (cmd == B_S10_NATIVE) 286 return (s10_native()); 287 288 /* For all other operations this must be a branded process. */ 289 if (p->p_brand == &native_brand) 290 return (ENOSYS); 291 292 ASSERT(p->p_brand == &s10_brand); 293 ASSERT(p->p_brand_data != NULL); 294 295 spd = (s10_proc_data_t *)p->p_brand_data; 296 297 switch (cmd) { 298 case B_EXEC_NATIVE: 299 err = exec_common( 300 (char *)arg1, (const char **)arg2, (const char **)arg3, 301 EBA_NATIVE); 302 return (err); 303 304 case B_REGISTER: 305 if (p->p_model == DATAMODEL_NATIVE) { 306 if (copyin((void *)arg1, ®, sizeof (reg)) != 0) 307 return (EFAULT); 308#if defined(_LP64) 309 } else { 310 s10_brand_reg32_t reg32; 311 312 if (copyin((void *)arg1, ®32, sizeof (reg32)) != 0) 313 return (EFAULT); 314 reg.sbr_version = reg32.sbr_version; 315 reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler; 316#endif /* _LP64 */ 317 } 318 319 if (reg.sbr_version != S10_VERSION) 320 return (ENOTSUP); 321 spd->spd_handler = reg.sbr_handler; 322 return (0); 323 324 case B_ELFDATA: 325 if (p->p_model == DATAMODEL_NATIVE) { 326 if (copyout(&spd->spd_elf_data, (void *)arg1, 327 sizeof (s10_elf_data_t)) != 0) 328 return (EFAULT); 329#if defined(_LP64) 330 } else { 331 s10_elf_data32_t sed32; 332 333 sed32.sed_phdr = spd->spd_elf_data.sed_phdr; 334 sed32.sed_phent = spd->spd_elf_data.sed_phent; 335 sed32.sed_phnum = spd->spd_elf_data.sed_phnum; 336 sed32.sed_entry = spd->spd_elf_data.sed_entry; 337 sed32.sed_base = spd->spd_elf_data.sed_base; 338 sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry; 339 sed32.sed_lddata = spd->spd_elf_data.sed_lddata; 340 if (copyout(&sed32, (void *)arg1, sizeof (sed32)) != 0) 341 return (EFAULT); 342#endif /* _LP64 */ 343 } 344 return (0); 345 346 case B_S10_PIDINFO: 347 /* 348 * The s10 brand needs to be able to get the pid of the 349 * current process and the pid of the zone's init, and it 350 * needs to do this on every process startup. Early in 351 * brand startup, we can't call getpid() because calls to 352 * getpid() represent a magical signal to some old-skool 353 * debuggers. By merging all of this into one call, we 354 * make this quite a bit cheaper and easier to handle in 355 * the brand module. 356 */ 357 if (copyout(&p->p_pid, (void *)arg1, sizeof (pid_t)) != 0) 358 return (EFAULT); 359 if (copyout(&p->p_zone->zone_proc_initpid, (void *)arg2, 360 sizeof (pid_t)) != 0) 361 return (EFAULT); 362 return (0); 363 364 case B_S10_TRUSS_POINT: 365 /* 366 * This subcommand exists so that we can see truss output 367 * from interposed system calls that return without first 368 * calling any other system call, meaning they would be 369 * invisible to truss(1). 370 * 371 * If the second argument is set non-zero, set errno to that 372 * value as well. 373 * 374 * Arguments are: 375 * 376 * arg1: syscall number 377 * arg2: errno 378 */ 379 return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2)); 380 381 case B_S10_ISFDXATTRDIR: { 382 /* 383 * This subcommand enables the userland brand emulation library 384 * to determine whether a file descriptor refers to an extended 385 * file attributes directory. There is no standard syscall or 386 * libc function that can make such a determination. 387 */ 388 file_t *dir_filep; 389 390 dir_filep = getf((int)arg1); 391 if (dir_filep == NULL) 392 return (EBADF); 393 ASSERT(dir_filep->f_vnode != NULL); 394 *rval = IS_XATTRDIR(dir_filep->f_vnode); 395 releasef((int)arg1); 396 return (0); 397 } 398 399#ifdef __amd64 400 case B_S10_FSREGCORRECTION: 401 /* 402 * This subcommand exists so that the SYS_lwp_private and 403 * SYS_lwp_create syscalls can manually set the current thread's 404 * %fs register to the legacy S10 selector value for 64-bit x86 405 * processes. 406 */ 407 s10_amd64_correct_fsreg(ttolwp(curthread)); 408 return (0); 409#endif /* __amd64 */ 410 } 411 412 return (EINVAL); 413} 414 415/* 416 * Copy the per-process brand data from a parent proc to a child. 417 */ 418void 419s10_copy_procdata(proc_t *child, proc_t *parent) 420{ 421 s10_proc_data_t *spd; 422 423 ASSERT(parent->p_brand == &s10_brand); 424 ASSERT(child->p_brand == &s10_brand); 425 ASSERT(parent->p_brand_data != NULL); 426 ASSERT(child->p_brand_data == NULL); 427 428 /* Just duplicate all the proc data of the parent for the child */ 429 spd = kmem_alloc(sizeof (s10_proc_data_t), KM_SLEEP); 430 bcopy(parent->p_brand_data, spd, sizeof (s10_proc_data_t)); 431 child->p_brand_data = spd; 432} 433 434/*ARGSUSED*/ 435void 436s10_proc_exit(struct proc *p, klwp_t *l) 437{ 438 ASSERT(p->p_brand == &s10_brand); 439 ASSERT(p->p_brand_data != NULL); 440 441 /* 442 * We should only be called from proc_exit(), when we know that 443 * process is single-threaded. 444 */ 445 ASSERT(p->p_tlist == p->p_tlist->t_forw); 446 447 /* upon exit, free our lwp brand data */ 448 (void) s10_freelwp(ttolwp(curthread)); 449 450 /* upon exit, free our proc brand data */ 451 kmem_free(p->p_brand_data, sizeof (s10_proc_data_t)); 452 p->p_brand_data = NULL; 453} 454 455void 456s10_exec() 457{ 458 s10_proc_data_t *spd = curproc->p_brand_data; 459 460 ASSERT(curproc->p_brand == &s10_brand); 461 ASSERT(curproc->p_brand_data != NULL); 462 ASSERT(ttolwp(curthread)->lwp_brand != NULL); 463 464 /* 465 * We should only be called from exec(), when we know the process 466 * is single-threaded. 467 */ 468 ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw); 469 470 /* Upon exec, reset our lwp brand data. */ 471 (void) s10_freelwp(ttolwp(curthread)); 472 (void) s10_initlwp(ttolwp(curthread)); 473 474 /* 475 * Upon exec, reset all the proc brand data, except for the elf 476 * data associated with the executable we are exec'ing. 477 */ 478 spd->spd_handler = NULL; 479} 480 481/*ARGSUSED*/ 482int 483s10_initlwp(klwp_t *l) 484{ 485 ASSERT(l->lwp_procp->p_brand == &s10_brand); 486 ASSERT(l->lwp_procp->p_brand_data != NULL); 487 ASSERT(l->lwp_brand == NULL); 488 l->lwp_brand = (void *)-1; 489 return (0); 490} 491 492/*ARGSUSED*/ 493void 494s10_forklwp(klwp_t *p, klwp_t *c) 495{ 496 ASSERT(p->lwp_procp->p_brand == &s10_brand); 497 ASSERT(c->lwp_procp->p_brand == &s10_brand); 498 499 ASSERT(p->lwp_procp->p_brand_data != NULL); 500 ASSERT(c->lwp_procp->p_brand_data != NULL); 501 502 /* Both LWPs have already had been initialized via s10_initlwp() */ 503 ASSERT(p->lwp_brand != NULL); 504 ASSERT(c->lwp_brand != NULL); 505 506#ifdef __amd64 507 /* 508 * Only correct the child's %fs register if the parent's %fs register 509 * is LWPFS_SEL. If the parent's %fs register is zero, then the Solaris 510 * 10 environment that we're emulating uses a version of libc that 511 * works when %fs is zero (i.e., it contains backports of CRs 6467491 512 * and 6501650). 513 */ 514 if (p->lwp_pcb.pcb_fs == LWPFS_SEL) 515 s10_amd64_correct_fsreg(c); 516#endif /* __amd64 */ 517} 518 519/*ARGSUSED*/ 520void 521s10_freelwp(klwp_t *l) 522{ 523 ASSERT(l->lwp_procp->p_brand == &s10_brand); 524 ASSERT(l->lwp_procp->p_brand_data != NULL); 525 ASSERT(l->lwp_brand != NULL); 526 l->lwp_brand = NULL; 527} 528 529/*ARGSUSED*/ 530void 531s10_lwpexit(klwp_t *l) 532{ 533 ASSERT(l->lwp_procp->p_brand == &s10_brand); 534 ASSERT(l->lwp_procp->p_brand_data != NULL); 535 ASSERT(l->lwp_brand != NULL); 536 537 /* 538 * We should never be called for the last thread in a process. 539 * (That case is handled by s10_proc_exit().) There for this lwp 540 * must be exiting from a multi-threaded process. 541 */ 542 ASSERT(l->lwp_procp->p_tlist != l->lwp_procp->p_tlist->t_forw); 543 544 l->lwp_brand = NULL; 545} 546 547void 548s10_free_brand_data(zone_t *zone) 549{ 550 kmem_free(zone->zone_brand_data, sizeof (s10_zone_data_t)); 551} 552 553void 554s10_init_brand_data(zone_t *zone) 555{ 556 ASSERT(zone->zone_brand == &s10_brand); 557 ASSERT(zone->zone_brand_data == NULL); 558 zone->zone_brand_data = kmem_zalloc(sizeof (s10_zone_data_t), KM_SLEEP); 559} 560 561#if defined(_LP64) 562static void 563Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst) 564{ 565 bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident)); 566 dst->e_type = src->e_type; 567 dst->e_machine = src->e_machine; 568 dst->e_version = src->e_version; 569 dst->e_entry = src->e_entry; 570 dst->e_phoff = src->e_phoff; 571 dst->e_shoff = src->e_shoff; 572 dst->e_flags = src->e_flags; 573 dst->e_ehsize = src->e_ehsize; 574 dst->e_phentsize = src->e_phentsize; 575 dst->e_phnum = src->e_phnum; 576 dst->e_shentsize = src->e_shentsize; 577 dst->e_shnum = src->e_shnum; 578 dst->e_shstrndx = src->e_shstrndx; 579} 580#endif /* _LP64 */ 581 582int 583s10_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, 584 int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred, 585 int brand_action) 586{ 587 vnode_t *nvp; 588 Ehdr ehdr; 589 Addr uphdr_vaddr; 590 intptr_t voffset; 591 int interp; 592 int i, err; 593 struct execenv env; 594 struct user *up = PTOU(curproc); 595 s10_proc_data_t *spd; 596 s10_elf_data_t sed, *sedp; 597 char *linker; 598 uintptr_t lddata; /* lddata of executable's linker */ 599 600 ASSERT(curproc->p_brand == &s10_brand); 601 ASSERT(curproc->p_brand_data != NULL); 602 603 spd = (s10_proc_data_t *)curproc->p_brand_data; 604 sedp = &spd->spd_elf_data; 605 606 args->brandname = S10_BRANDNAME; 607 608 /* 609 * We will exec the brand library and then map in the target 610 * application and (optionally) the brand's default linker. 611 */ 612 if (args->to_model == DATAMODEL_NATIVE) { 613 args->emulator = S10_LIB; 614 linker = S10_LINKER; 615#if defined(_LP64) 616 } else { 617 args->emulator = S10_LIB32; 618 linker = S10_LINKER32; 619#endif /* _LP64 */ 620 } 621 622 if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW, NULLVPP, 623 &nvp)) != 0) { 624 uprintf("%s: not found.", args->emulator); 625 return (err); 626 } 627 628 if (args->to_model == DATAMODEL_NATIVE) { 629 err = elfexec(nvp, uap, args, idatap, level + 1, execsz, 630 setid, exec_file, cred, brand_action); 631#if defined(_LP64) 632 } else { 633 err = elf32exec(nvp, uap, args, idatap, level + 1, execsz, 634 setid, exec_file, cred, brand_action); 635#endif /* _LP64 */ 636 } 637 VN_RELE(nvp); 638 if (err != 0) 639 return (err); 640 641 /* 642 * The u_auxv vectors are set up by elfexec to point to the brand 643 * emulation library and linker. Save these so they can be copied to 644 * the specific brand aux vectors. 645 */ 646 bzero(&sed, sizeof (sed)); 647 for (i = 0; i < __KERN_NAUXV_IMPL; i++) { 648 switch (up->u_auxv[i].a_type) { 649 case AT_SUN_LDDATA: 650 sed.sed_lddata = up->u_auxv[i].a_un.a_val; 651 break; 652 case AT_BASE: 653 sed.sed_base = up->u_auxv[i].a_un.a_val; 654 break; 655 case AT_ENTRY: 656 sed.sed_entry = up->u_auxv[i].a_un.a_val; 657 break; 658 case AT_PHDR: 659 sed.sed_phdr = up->u_auxv[i].a_un.a_val; 660 break; 661 case AT_PHENT: 662 sed.sed_phent = up->u_auxv[i].a_un.a_val; 663 break; 664 case AT_PHNUM: 665 sed.sed_phnum = up->u_auxv[i].a_un.a_val; 666 break; 667 default: 668 break; 669 } 670 } 671 /* Make sure the emulator has an entry point */ 672 ASSERT(sed.sed_entry != NULL); 673 ASSERT(sed.sed_phdr != NULL); 674 675 bzero(&env, sizeof (env)); 676 if (args->to_model == DATAMODEL_NATIVE) { 677 err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset, 678 exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase, 679 &env.ex_brksize, NULL); 680#if defined(_LP64) 681 } else { 682 Elf32_Ehdr ehdr32; 683 Elf32_Addr uphdr_vaddr32; 684 err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32, 685 &voffset, exec_file, &interp, &env.ex_bssbase, 686 &env.ex_brkbase, &env.ex_brksize, NULL); 687 Ehdr32to64(&ehdr32, &ehdr); 688 if (uphdr_vaddr32 == (Elf32_Addr)-1) 689 uphdr_vaddr = (Addr)-1; 690 else 691 uphdr_vaddr = uphdr_vaddr32; 692#endif /* _LP64 */ 693 } 694 if (err != 0) 695 return (err); 696 697 /* 698 * Save off the important properties of the executable. The brand 699 * library will ask us for this data later, when it is initializing 700 * and getting ready to transfer control to the brand application. 701 */ 702 if (uphdr_vaddr == (Addr)-1) 703 sedp->sed_phdr = voffset + ehdr.e_phoff; 704 else 705 sedp->sed_phdr = voffset + uphdr_vaddr; 706 sedp->sed_entry = voffset + ehdr.e_entry; 707 sedp->sed_phent = ehdr.e_phentsize; 708 sedp->sed_phnum = ehdr.e_phnum; 709 710 if (interp) { 711 if (ehdr.e_type == ET_DYN) { 712 /* 713 * This is a shared object executable, so we need to 714 * pick a reasonable place to put the heap. Just don't 715 * use the first page. 716 */ 717 env.ex_brkbase = (caddr_t)PAGESIZE; 718 env.ex_bssbase = (caddr_t)PAGESIZE; 719 } 720 721 /* 722 * If the program needs an interpreter (most do), map it in and 723 * store relevant information about it in the aux vector, where 724 * the brand library can find it. 725 */ 726 if ((err = lookupname(linker, UIO_SYSSPACE, 727 FOLLOW, NULLVPP, &nvp)) != 0) { 728 uprintf("%s: not found.", S10_LINKER); 729 return (err); 730 } 731 if (args->to_model == DATAMODEL_NATIVE) { 732 err = mapexec_brand(nvp, args, &ehdr, 733 &uphdr_vaddr, &voffset, exec_file, &interp, 734 NULL, NULL, NULL, &lddata); 735#if defined(_LP64) 736 } else { 737 Elf32_Ehdr ehdr32; 738 Elf32_Addr uphdr_vaddr32; 739 err = mapexec32_brand(nvp, args, &ehdr32, 740 &uphdr_vaddr32, &voffset, exec_file, &interp, 741 NULL, NULL, NULL, &lddata); 742 Ehdr32to64(&ehdr32, &ehdr); 743 if (uphdr_vaddr32 == (Elf32_Addr)-1) 744 uphdr_vaddr = (Addr)-1; 745 else 746 uphdr_vaddr = uphdr_vaddr32; 747#endif /* _LP64 */ 748 } 749 VN_RELE(nvp); 750 if (err != 0) 751 return (err); 752 753 /* 754 * Now that we know the base address of the brand's linker, 755 * place it in the aux vector. 756 */ 757 sedp->sed_base = voffset; 758 sedp->sed_ldentry = voffset + ehdr.e_entry; 759 sedp->sed_lddata = voffset + lddata; 760 } else { 761 /* 762 * This program has no interpreter. The brand library will 763 * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector, 764 * so in this case, put the entry point of the main executable 765 * there. 766 */ 767 if (ehdr.e_type == ET_EXEC) { 768 /* 769 * An executable with no interpreter, this must be a 770 * statically linked executable, which means we loaded 771 * it at the address specified in the elf header, in 772 * which case the e_entry field of the elf header is an 773 * absolute address. 774 */ 775 sedp->sed_ldentry = ehdr.e_entry; 776 sedp->sed_entry = ehdr.e_entry; 777 sedp->sed_lddata = NULL; 778 sedp->sed_base = NULL; 779 } else { 780 /* 781 * A shared object with no interpreter, we use the 782 * calculated address from above. 783 */ 784 sedp->sed_ldentry = sedp->sed_entry; 785 sedp->sed_entry = NULL; 786 sedp->sed_phdr = NULL; 787 sedp->sed_phent = NULL; 788 sedp->sed_phnum = NULL; 789 sedp->sed_lddata = NULL; 790 sedp->sed_base = voffset; 791 792 if (ehdr.e_type == ET_DYN) { 793 /* 794 * Delay setting the brkbase until the first 795 * call to brk(); see elfexec() for details. 796 */ 797 env.ex_bssbase = (caddr_t)0; 798 env.ex_brkbase = (caddr_t)0; 799 env.ex_brksize = 0; 800 } 801 } 802 } 803 804 env.ex_magic = elfmagic; 805 env.ex_vp = vp; 806 setexecenv(&env); 807 808 /* 809 * It's time to manipulate the process aux vectors. First 810 * we need to update the AT_SUN_AUXFLAGS aux vector to set 811 * the AF_SUN_NOPLM flag. 812 */ 813 if (args->to_model == DATAMODEL_NATIVE) { 814 auxv_t auxflags_auxv; 815 816 if (copyin(args->auxp_auxflags, &auxflags_auxv, 817 sizeof (auxflags_auxv)) != 0) 818 return (EFAULT); 819 820 ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS); 821 auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM; 822 if (copyout(&auxflags_auxv, args->auxp_auxflags, 823 sizeof (auxflags_auxv)) != 0) 824 return (EFAULT); 825#if defined(_LP64) 826 } else { 827 auxv32_t auxflags_auxv32; 828 829 if (copyin(args->auxp_auxflags, &auxflags_auxv32, 830 sizeof (auxflags_auxv32)) != 0) 831 return (EFAULT); 832 833 ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS); 834 auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM; 835 if (copyout(&auxflags_auxv32, args->auxp_auxflags, 836 sizeof (auxflags_auxv32)) != 0) 837 return (EFAULT); 838#endif /* _LP64 */ 839 } 840 841 /* Second, copy out the brand specific aux vectors. */ 842 if (args->to_model == DATAMODEL_NATIVE) { 843 auxv_t s10_auxv[] = { 844 { AT_SUN_BRAND_AUX1, 0 }, 845 { AT_SUN_BRAND_AUX2, 0 }, 846 { AT_SUN_BRAND_AUX3, 0 } 847 }; 848 849 ASSERT(s10_auxv[0].a_type == AT_SUN_BRAND_S10_LDDATA); 850 s10_auxv[0].a_un.a_val = sed.sed_lddata; 851 852 if (copyout(&s10_auxv, args->auxp_brand, 853 sizeof (s10_auxv)) != 0) 854 return (EFAULT); 855#if defined(_LP64) 856 } else { 857 auxv32_t s10_auxv32[] = { 858 { AT_SUN_BRAND_AUX1, 0 }, 859 { AT_SUN_BRAND_AUX2, 0 }, 860 { AT_SUN_BRAND_AUX3, 0 } 861 }; 862 863 ASSERT(s10_auxv32[0].a_type == AT_SUN_BRAND_S10_LDDATA); 864 s10_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata; 865 if (copyout(&s10_auxv32, args->auxp_brand, 866 sizeof (s10_auxv32)) != 0) 867 return (EFAULT); 868#endif /* _LP64 */ 869 } 870 871 /* 872 * Third, the the /proc aux vectors set up by elfexec() point to brand 873 * emulation library and it's linker. Copy these to the /proc brand 874 * specific aux vector, and update the regular /proc aux vectors to 875 * point to the executable (and it's linker). This will enable 876 * debuggers to access the executable via the usual /proc or elf notes 877 * aux vectors. 878 * 879 * The brand emulation library's linker will get it's aux vectors off 880 * the stack, and then update the stack with the executable's aux 881 * vectors before jumping to the executable's linker. 882 * 883 * Debugging the brand emulation library must be done from 884 * the global zone, where the librtld_db module knows how to fetch the 885 * brand specific aux vectors to access the brand emulation libraries 886 * linker. 887 */ 888 for (i = 0; i < __KERN_NAUXV_IMPL; i++) { 889 ulong_t val; 890 891 switch (up->u_auxv[i].a_type) { 892 case AT_SUN_BRAND_S10_LDDATA: 893 up->u_auxv[i].a_un.a_val = sed.sed_lddata; 894 continue; 895 case AT_BASE: 896 val = sedp->sed_base; 897 break; 898 case AT_ENTRY: 899 val = sedp->sed_entry; 900 break; 901 case AT_PHDR: 902 val = sedp->sed_phdr; 903 break; 904 case AT_PHENT: 905 val = sedp->sed_phent; 906 break; 907 case AT_PHNUM: 908 val = sedp->sed_phnum; 909 break; 910 case AT_SUN_LDDATA: 911 val = sedp->sed_lddata; 912 break; 913 default: 914 continue; 915 } 916 917 up->u_auxv[i].a_un.a_val = val; 918 if (val == NULL) { 919 /* Hide the entry for static binaries */ 920 up->u_auxv[i].a_type = AT_IGNORE; 921 } 922 } 923 924 /* 925 * The last thing we do here is clear spd->spd_handler. This is 926 * important because if we're already a branded process and if this 927 * exec succeeds, there is a window between when the exec() first 928 * returns to the userland of the new process and when our brand 929 * library get's initialized, during which we don't want system 930 * calls to be re-directed to our brand library since it hasn't 931 * been initialized yet. 932 */ 933 spd->spd_handler = NULL; 934 935 return (0); 936} 937 938 939int 940_init(void) 941{ 942 int err; 943 944 /* 945 * Set up the table indicating which system calls we want to 946 * interpose on. We should probably build this automatically from 947 * a list of system calls that is shared with the user-space 948 * library. 949 */ 950 s10_emulation_table = kmem_zalloc(NSYSCALL, KM_SLEEP); 951 s10_emulation_table[SYS_exec] = 1; /* 11 */ 952 s10_emulation_table[SYS_ioctl] = 1; /* 54 */ 953 s10_emulation_table[SYS_execve] = 1; /* 59 */ 954 s10_emulation_table[SYS_acctctl] = 1; /* 71 */ 955 s10_emulation_table[S10_SYS_issetugid] = 1; /* 75 */ 956 s10_emulation_table[SYS_getdents] = 1; /* 81 */ 957 s10_emulation_table[SYS_uname] = 1; /* 135 */ 958 s10_emulation_table[SYS_systeminfo] = 1; /* 139 */ 959#ifdef __amd64 960 s10_emulation_table[SYS_lwp_create] = 1; /* 159 */ 961 s10_emulation_table[SYS_lwp_private] = 1; /* 166 */ 962#endif /* __amd64 */ 963 s10_emulation_table[SYS_pwrite] = 1; /* 174 */ 964 s10_emulation_table[SYS_auditsys] = 1; /* 186 */ 965 s10_emulation_table[SYS_sigqueue] = 1; /* 190 */ 966 s10_emulation_table[SYS_lwp_mutex_timedlock] = 1; /* 210 */ 967 s10_emulation_table[SYS_getdents64] = 1; /* 213 */ 968 s10_emulation_table[SYS_pwrite64] = 1; /* 223 */ 969 s10_emulation_table[SYS_zone] = 1; /* 227 */ 970 s10_emulation_table[SYS_lwp_mutex_trylock] = 1; /* 251 */ 971 972 err = mod_install(&modlinkage); 973 if (err) { 974 cmn_err(CE_WARN, "Couldn't install brand module"); 975 kmem_free(s10_emulation_table, NSYSCALL); 976 } 977 978 return (err); 979} 980 981int 982_info(struct modinfo *modinfop) 983{ 984 return (mod_info(&modlinkage, modinfop)); 985} 986 987int 988_fini(void) 989{ 990 int err; 991 992 /* 993 * If there are any zones using this brand, we can't allow it to be 994 * unloaded. 995 */ 996 if (brand_zone_count(&s10_brand)) 997 return (EBUSY); 998 999 kmem_free(s10_emulation_table, NSYSCALL); 1000 s10_emulation_table = NULL; 1001 1002 err = mod_remove(&modlinkage); 1003 if (err) 1004 cmn_err(CE_WARN, "Couldn't unload s10 brand module"); 1005 1006 return (err); 1007} 1008