sys_machdep.c revision 276084
174462Salfred/*- 274462Salfred * Copyright (c) 1990 The Regents of the University of California. 31901Swollman * All rights reserved. 41901Swollman * 51901Swollman * Redistribution and use in source and binary forms, with or without 61901Swollman * modification, are permitted provided that the following conditions 71901Swollman * are met: 81901Swollman * 1. Redistributions of source code must retain the above copyright 91901Swollman * notice, this list of conditions and the following disclaimer. 108870Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111901Swollman * notice, this list of conditions and the following disclaimer in the 121901Swollman * documentation and/or other materials provided with the distribution. 131901Swollman * 4. Neither the name of the University nor the names of its contributors 148870Srgrimes * may be used to endorse or promote products derived from this software 151901Swollman * without specific prior written permission. 161901Swollman * 171901Swollman * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 188870Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 191901Swollman * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 201901Swollman * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 211901Swollman * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 228870Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 231901Swollman * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 241901Swollman * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 251901Swollman * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 268870Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 271901Swollman * SUCH DAMAGE. 281901Swollman * 291901Swollman * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91 301901Swollman */ 3174462Salfred 3274462Salfred#include <sys/cdefs.h> 3374462Salfred__FBSDID("$FreeBSD: stable/10/sys/i386/i386/sys_machdep.c 276084 2014-12-22 21:32:39Z jhb $"); 341901Swollman 3574462Salfred#include "opt_capsicum.h" 3674462Salfred#include "opt_kstack_pages.h" 3774462Salfred 3874462Salfred#include <sys/param.h> 391901Swollman#include <sys/capability.h> 4092990Sobrien#include <sys/systm.h> 4192990Sobrien#include <sys/lock.h> 421901Swollman#include <sys/malloc.h> 431901Swollman#include <sys/mutex.h> 441901Swollman#include <sys/priv.h> 451901Swollman#include <sys/proc.h> 461901Swollman#include <sys/smp.h> 4785138Salfred#include <sys/sysproto.h> 481901Swollman 491901Swollman#include <vm/vm.h> 501901Swollman#include <vm/pmap.h> 5175094Siedowse#include <vm/vm_map.h> 5274462Salfred#include <vm/vm_extern.h> 531901Swollman 5474462Salfred#include <machine/cpu.h> 5574462Salfred#include <machine/pcb.h> 5611666Sphk#include <machine/pcb_ext.h> 5774462Salfred#include <machine/proc.h> 581901Swollman#include <machine/sysarch.h> 5974462Salfred 6074462Salfred#include <security/audit/audit.h> 6174462Salfred 6274462Salfred#ifdef XEN 631901Swollman#include <machine/xen/xenfunc.h> 641901Swollman 651901Swollmanvoid i386_reset_ldt(struct proc_ldt *pldt); 6674462Salfred 6774462Salfredvoid 681901Swollmani386_reset_ldt(struct proc_ldt *pldt) 691901Swollman{ 701901Swollman xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len); 7174462Salfred} 721901Swollman#else 7374462Salfred#define i386_reset_ldt(x) 741901Swollman#endif 7592905Sobrien 7692905Sobrien#include <vm/vm_kern.h> /* for kernel_map */ 7792905Sobrien 7892905Sobrien#define MAX_LD 8192 7992905Sobrien#define LD_PER_PAGE 512 8092905Sobrien#define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1)) 8192905Sobrien#define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3) 8292905Sobrien#define NULL_LDT_BASE ((caddr_t)NULL) 831901Swollman 8474462Salfred#ifdef SMP 8574462Salfredstatic void set_user_ldt_rv(struct vmspace *vmsp); 861901Swollman#endif 8774462Salfredstatic int i386_set_ldt_data(struct thread *, int start, int num, 881901Swollman union descriptor *descs); 8974462Salfredstatic int i386_ldt_grow(struct thread *td, int len); 9074462Salfred 911901Swollman#ifndef _SYS_SYSPROTO_H_ 9274462Salfredstruct sysarch_args { 9374462Salfred int op; 9474462Salfred char *parms; 9574462Salfred}; 9674462Salfred#endif 9774462Salfred 9874462Salfredint 9974462Salfredsysarch(td, uap) 10074462Salfred struct thread *td; 10174462Salfred register struct sysarch_args *uap; 10274462Salfred{ 10374462Salfred int error; 1041901Swollman union descriptor *lp; 10574462Salfred union { 1061901Swollman struct i386_ldt_args largs; 10774462Salfred struct i386_ioperm_args iargs; 10874462Salfred struct i386_get_xfpustate xfpu; 1091901Swollman } kargs; 11074462Salfred uint32_t base; 11174462Salfred struct segment_descriptor sd, *sdp; 11274462Salfred 1131901Swollman AUDIT_ARG_CMD(uap->op); 1141901Swollman 1151901Swollman#ifdef CAPABILITY_MODE 11674462Salfred /* 1171901Swollman * When adding new operations, add a new case statement here to 11874462Salfred * explicitly indicate whether or not the operation is safe to 11974462Salfred * perform in capability mode. 1201901Swollman */ 1211901Swollman if (IN_CAPABILITY_MODE(td)) { 1221901Swollman switch (uap->op) { 1231901Swollman case I386_GET_LDT: 12474462Salfred case I386_SET_LDT: 1251901Swollman case I386_GET_IOPERM: 12674462Salfred case I386_GET_FSBASE: 1271901Swollman case I386_SET_FSBASE: 1281901Swollman case I386_GET_GSBASE: 1291901Swollman case I386_SET_GSBASE: 13074462Salfred case I386_GET_XFPUSTATE: 13174462Salfred break; 1321901Swollman 13374462Salfred case I386_SET_IOPERM: 13474462Salfred default: 13574462Salfred#ifdef KTRACE 13674462Salfred if (KTRPOINT(td, KTR_CAPFAIL)) 13774462Salfred ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL); 13874462Salfred#endif 13974462Salfred return (ECAPMODE); 14074462Salfred } 1411901Swollman } 1421901Swollman#endif 14374462Salfred 14474462Salfred switch (uap->op) { 14574462Salfred case I386_GET_IOPERM: 14674462Salfred case I386_SET_IOPERM: 1471901Swollman if ((error = copyin(uap->parms, &kargs.iargs, 1481901Swollman sizeof(struct i386_ioperm_args))) != 0) 1491901Swollman return (error); 15074462Salfred break; 1511901Swollman case I386_GET_LDT: 15274462Salfred case I386_SET_LDT: 1531901Swollman if ((error = copyin(uap->parms, &kargs.largs, 1541901Swollman sizeof(struct i386_ldt_args))) != 0) 1551901Swollman return (error); 15674462Salfred if (kargs.largs.num > MAX_LD || kargs.largs.num <= 0) 15774462Salfred return (EINVAL); 1581901Swollman break; 15974462Salfred case I386_GET_XFPUSTATE: 16074462Salfred if ((error = copyin(uap->parms, &kargs.xfpu, 16174462Salfred sizeof(struct i386_get_xfpustate))) != 0) 16274462Salfred return (error); 1631901Swollman break; 16474462Salfred default: 16574462Salfred break; 16674462Salfred } 1671901Swollman 1681901Swollman switch(uap->op) { 16974462Salfred case I386_GET_LDT: 17074462Salfred error = i386_get_ldt(td, &kargs.largs); 17174462Salfred break; 17274462Salfred case I386_SET_LDT: 17374462Salfred if (kargs.largs.descs != NULL) { 1741901Swollman lp = (union descriptor *)malloc( 1751901Swollman kargs.largs.num * sizeof(union descriptor), 1761901Swollman M_TEMP, M_WAITOK); 17774462Salfred error = copyin(kargs.largs.descs, lp, 1781901Swollman kargs.largs.num * sizeof(union descriptor)); 17974462Salfred if (error == 0) 1801901Swollman error = i386_set_ldt(td, &kargs.largs, lp); 1811901Swollman free(lp, M_TEMP); 1821901Swollman } else { 1831901Swollman error = i386_set_ldt(td, &kargs.largs, NULL); 18474462Salfred } 1851901Swollman break; 18674462Salfred case I386_GET_IOPERM: 18774462Salfred error = i386_get_ioperm(td, &kargs.iargs); 18874462Salfred if (error == 0) 18974462Salfred error = copyout(&kargs.iargs, uap->parms, 1901901Swollman sizeof(struct i386_ioperm_args)); 19174462Salfred break; 19274462Salfred case I386_SET_IOPERM: 19374462Salfred error = i386_set_ioperm(td, &kargs.iargs); 1941901Swollman break; 1951901Swollman case I386_VM86: 19674462Salfred error = vm86_sysarch(td, uap->parms); 1971901Swollman break; 19874462Salfred case I386_GET_FSBASE: 1991901Swollman sdp = &td->td_pcb->pcb_fsd; 2001901Swollman base = sdp->sd_hibase << 24 | sdp->sd_lobase; 2011901Swollman error = copyout(&base, uap->parms, sizeof(base)); 2028870Srgrimes break; 20374462Salfred case I386_SET_FSBASE: 20474462Salfred error = copyin(uap->parms, &base, sizeof(base)); 2051901Swollman if (!error) { 20674462Salfred /* 20774462Salfred * Construct a descriptor and store it in the pcb for 20874462Salfred * the next context switch. Also store it in the gdt 20974462Salfred * so that the load of tf_fs into %fs will activate it 2101901Swollman * at return to userland. 21174462Salfred */ 21274462Salfred sd.sd_lobase = base & 0xffffff; 21374462Salfred sd.sd_hibase = (base >> 24) & 0xff; 2141901Swollman#ifdef XEN 2151901Swollman /* need to do nosegneg like Linux */ 21674462Salfred sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff; 2178870Srgrimes#else 2181901Swollman sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ 21974462Salfred#endif 2201901Swollman sd.sd_hilimit = 0xf; 22174462Salfred sd.sd_type = SDT_MEMRWA; 22274462Salfred sd.sd_dpl = SEL_UPL; 2231901Swollman sd.sd_p = 1; 2241901Swollman sd.sd_xx = 0; 22574462Salfred sd.sd_def32 = 1; 22674462Salfred sd.sd_gran = 1; 22774462Salfred critical_enter(); 22874462Salfred td->td_pcb->pcb_fsd = sd; 22974462Salfred#ifdef XEN 23074462Salfred HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[0]), 23174462Salfred *(uint64_t *)&sd); 23274462Salfred#else 23374462Salfred PCPU_GET(fsgs_gdt)[0] = sd; 23474462Salfred#endif 23574462Salfred critical_exit(); 23674462Salfred td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL); 23774462Salfred } 23874462Salfred break; 23974462Salfred case I386_GET_GSBASE: 24074462Salfred sdp = &td->td_pcb->pcb_gsd; 24174462Salfred base = sdp->sd_hibase << 24 | sdp->sd_lobase; 24274462Salfred error = copyout(&base, uap->parms, sizeof(base)); 24374462Salfred break; 24474462Salfred case I386_SET_GSBASE: 24574462Salfred error = copyin(uap->parms, &base, sizeof(base)); 24674462Salfred if (!error) { 24774462Salfred /* 24874462Salfred * Construct a descriptor and store it in the pcb for 24974462Salfred * the next context switch. Also store it in the gdt 25074462Salfred * because we have to do a load_gs() right now. 25174462Salfred */ 25274462Salfred sd.sd_lobase = base & 0xffffff; 25374462Salfred sd.sd_hibase = (base >> 24) & 0xff; 25474462Salfred 25574462Salfred#ifdef XEN 25674462Salfred /* need to do nosegneg like Linux */ 25774462Salfred sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff; 25874462Salfred#else 25974462Salfred sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ 260#endif 261 sd.sd_hilimit = 0xf; 262 sd.sd_type = SDT_MEMRWA; 263 sd.sd_dpl = SEL_UPL; 264 sd.sd_p = 1; 265 sd.sd_xx = 0; 266 sd.sd_def32 = 1; 267 sd.sd_gran = 1; 268 critical_enter(); 269 td->td_pcb->pcb_gsd = sd; 270#ifdef XEN 271 HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[1]), 272 *(uint64_t *)&sd); 273#else 274 PCPU_GET(fsgs_gdt)[1] = sd; 275#endif 276 critical_exit(); 277 load_gs(GSEL(GUGS_SEL, SEL_UPL)); 278 } 279 break; 280 case I386_GET_XFPUSTATE: 281 if (kargs.xfpu.len > cpu_max_ext_state_size - 282 sizeof(union savefpu)) 283 return (EINVAL); 284 npxgetregs(td); 285 error = copyout((char *)(get_pcb_user_save_td(td) + 1), 286 kargs.xfpu.addr, kargs.xfpu.len); 287 break; 288 default: 289 error = EINVAL; 290 break; 291 } 292 return (error); 293} 294 295int 296i386_extend_pcb(struct thread *td) 297{ 298 int i, offset; 299 u_long *addr; 300 struct pcb_ext *ext; 301 struct soft_segment_descriptor ssd = { 302 0, /* segment base address (overwritten) */ 303 ctob(IOPAGES + 1) - 1, /* length */ 304 SDT_SYS386TSS, /* segment type */ 305 0, /* priority level */ 306 1, /* descriptor present */ 307 0, 0, 308 0, /* default 32 size */ 309 0 /* granularity */ 310 }; 311 312 ext = (struct pcb_ext *)kmem_malloc(kernel_arena, ctob(IOPAGES+1), 313 M_WAITOK | M_ZERO); 314 /* -16 is so we can convert a trapframe into vm86trapframe inplace */ 315 ext->ext_tss.tss_esp0 = td->td_kstack + ctob(KSTACK_PAGES) - 316 sizeof(struct pcb) - 16; 317 ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); 318 /* 319 * The last byte of the i/o map must be followed by an 0xff byte. 320 * We arbitrarily allocate 16 bytes here, to keep the starting 321 * address on a doubleword boundary. 322 */ 323 offset = PAGE_SIZE - 16; 324 ext->ext_tss.tss_ioopt = 325 (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16; 326 ext->ext_iomap = (caddr_t)ext + offset; 327 ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32; 328 329 addr = (u_long *)ext->ext_vm86.vm86_intmap; 330 for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++) 331 *addr++ = ~0; 332 333 ssd.ssd_base = (unsigned)&ext->ext_tss; 334 ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext); 335 ssdtosd(&ssd, &ext->ext_tssd); 336 337 KASSERT(td == curthread, ("giving TSS to !curthread")); 338 KASSERT(td->td_pcb->pcb_ext == 0, ("already have a TSS!")); 339 340 /* Switch to the new TSS. */ 341 critical_enter(); 342 td->td_pcb->pcb_ext = ext; 343 PCPU_SET(private_tss, 1); 344 *PCPU_GET(tss_gdt) = ext->ext_tssd; 345 ltr(GSEL(GPROC0_SEL, SEL_KPL)); 346 critical_exit(); 347 348 return 0; 349} 350 351int 352i386_set_ioperm(td, uap) 353 struct thread *td; 354 struct i386_ioperm_args *uap; 355{ 356 int i, error; 357 char *iomap; 358 359 if ((error = priv_check(td, PRIV_IO)) != 0) 360 return (error); 361 if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 362 return (error); 363 /* 364 * XXX 365 * While this is restricted to root, we should probably figure out 366 * whether any other driver is using this i/o address, as so not to 367 * cause confusion. This probably requires a global 'usage registry'. 368 */ 369 370 if (td->td_pcb->pcb_ext == 0) 371 if ((error = i386_extend_pcb(td)) != 0) 372 return (error); 373 iomap = (char *)td->td_pcb->pcb_ext->ext_iomap; 374 375 if (uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY) 376 return (EINVAL); 377 378 for (i = uap->start; i < uap->start + uap->length; i++) { 379 if (uap->enable) 380 iomap[i >> 3] &= ~(1 << (i & 7)); 381 else 382 iomap[i >> 3] |= (1 << (i & 7)); 383 } 384 return (error); 385} 386 387int 388i386_get_ioperm(td, uap) 389 struct thread *td; 390 struct i386_ioperm_args *uap; 391{ 392 int i, state; 393 char *iomap; 394 395 if (uap->start >= IOPAGES * PAGE_SIZE * NBBY) 396 return (EINVAL); 397 398 if (td->td_pcb->pcb_ext == 0) { 399 uap->length = 0; 400 goto done; 401 } 402 403 iomap = (char *)td->td_pcb->pcb_ext->ext_iomap; 404 405 i = uap->start; 406 state = (iomap[i >> 3] >> (i & 7)) & 1; 407 uap->enable = !state; 408 uap->length = 1; 409 410 for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) { 411 if (state != ((iomap[i >> 3] >> (i & 7)) & 1)) 412 break; 413 uap->length++; 414 } 415 416done: 417 return (0); 418} 419 420/* 421 * Update the GDT entry pointing to the LDT to point to the LDT of the 422 * current process. Manage dt_lock holding/unholding autonomously. 423 */ 424void 425set_user_ldt(struct mdproc *mdp) 426{ 427 struct proc_ldt *pldt; 428 int dtlocked; 429 430 dtlocked = 0; 431 if (!mtx_owned(&dt_lock)) { 432 mtx_lock_spin(&dt_lock); 433 dtlocked = 1; 434 } 435 436 pldt = mdp->md_ldt; 437#ifdef XEN 438 i386_reset_ldt(pldt); 439 PCPU_SET(currentldt, (int)pldt); 440#else 441#ifdef SMP 442 gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pldt->ldt_sd; 443#else 444 gdt[GUSERLDT_SEL].sd = pldt->ldt_sd; 445#endif 446 lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); 447 PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL)); 448#endif /* XEN */ 449 if (dtlocked) 450 mtx_unlock_spin(&dt_lock); 451} 452 453#ifdef SMP 454static void 455set_user_ldt_rv(struct vmspace *vmsp) 456{ 457 struct thread *td; 458 459 td = curthread; 460 if (vmsp != td->td_proc->p_vmspace) 461 return; 462 463 set_user_ldt(&td->td_proc->p_md); 464} 465#endif 466 467#ifdef XEN 468 469/* 470 * dt_lock must be held. Returns with dt_lock held. 471 */ 472struct proc_ldt * 473user_ldt_alloc(struct mdproc *mdp, int len) 474{ 475 struct proc_ldt *pldt, *new_ldt; 476 477 mtx_assert(&dt_lock, MA_OWNED); 478 mtx_unlock_spin(&dt_lock); 479 new_ldt = malloc(sizeof(struct proc_ldt), 480 M_SUBPROC, M_WAITOK); 481 482 new_ldt->ldt_len = len = NEW_MAX_LD(len); 483 new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena, 484 round_page(len * sizeof(union descriptor)), M_WAITOK); 485 new_ldt->ldt_refcnt = 1; 486 new_ldt->ldt_active = 0; 487 488 mtx_lock_spin(&dt_lock); 489 if ((pldt = mdp->md_ldt)) { 490 if (len > pldt->ldt_len) 491 len = pldt->ldt_len; 492 bcopy(pldt->ldt_base, new_ldt->ldt_base, 493 len * sizeof(union descriptor)); 494 } else { 495 bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE); 496 } 497 mtx_unlock_spin(&dt_lock); /* XXX kill once pmap locking fixed. */ 498 pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base, 499 new_ldt->ldt_len*sizeof(union descriptor)); 500 mtx_lock_spin(&dt_lock); /* XXX kill once pmap locking fixed. */ 501 return (new_ldt); 502} 503#else 504/* 505 * dt_lock must be held. Returns with dt_lock held. 506 */ 507struct proc_ldt * 508user_ldt_alloc(struct mdproc *mdp, int len) 509{ 510 struct proc_ldt *pldt, *new_ldt; 511 512 mtx_assert(&dt_lock, MA_OWNED); 513 mtx_unlock_spin(&dt_lock); 514 new_ldt = malloc(sizeof(struct proc_ldt), 515 M_SUBPROC, M_WAITOK); 516 517 new_ldt->ldt_len = len = NEW_MAX_LD(len); 518 new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena, 519 len * sizeof(union descriptor), M_WAITOK); 520 new_ldt->ldt_refcnt = 1; 521 new_ldt->ldt_active = 0; 522 523 mtx_lock_spin(&dt_lock); 524 gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base; 525 gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1; 526 ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd); 527 528 if ((pldt = mdp->md_ldt) != NULL) { 529 if (len > pldt->ldt_len) 530 len = pldt->ldt_len; 531 bcopy(pldt->ldt_base, new_ldt->ldt_base, 532 len * sizeof(union descriptor)); 533 } else 534 bcopy(ldt, new_ldt->ldt_base, sizeof(ldt)); 535 536 return (new_ldt); 537} 538#endif /* !XEN */ 539 540/* 541 * Must be called with dt_lock held. Returns with dt_lock unheld. 542 */ 543void 544user_ldt_free(struct thread *td) 545{ 546 struct mdproc *mdp = &td->td_proc->p_md; 547 struct proc_ldt *pldt; 548 549 mtx_assert(&dt_lock, MA_OWNED); 550 if ((pldt = mdp->md_ldt) == NULL) { 551 mtx_unlock_spin(&dt_lock); 552 return; 553 } 554 555 if (td == curthread) { 556#ifdef XEN 557 i386_reset_ldt(&default_proc_ldt); 558 PCPU_SET(currentldt, (int)&default_proc_ldt); 559#else 560 lldt(_default_ldt); 561 PCPU_SET(currentldt, _default_ldt); 562#endif 563 } 564 565 mdp->md_ldt = NULL; 566 user_ldt_deref(pldt); 567} 568 569void 570user_ldt_deref(struct proc_ldt *pldt) 571{ 572 573 mtx_assert(&dt_lock, MA_OWNED); 574 if (--pldt->ldt_refcnt == 0) { 575 mtx_unlock_spin(&dt_lock); 576 kmem_free(kernel_arena, (vm_offset_t)pldt->ldt_base, 577 pldt->ldt_len * sizeof(union descriptor)); 578 free(pldt, M_SUBPROC); 579 } else 580 mtx_unlock_spin(&dt_lock); 581} 582 583/* 584 * Note for the authors of compat layers (linux, etc): copyout() in 585 * the function below is not a problem since it presents data in 586 * arch-specific format (i.e. i386-specific in this case), not in 587 * the OS-specific one. 588 */ 589int 590i386_get_ldt(td, uap) 591 struct thread *td; 592 struct i386_ldt_args *uap; 593{ 594 int error = 0; 595 struct proc_ldt *pldt; 596 int nldt, num; 597 union descriptor *lp; 598 599#ifdef DEBUG 600 printf("i386_get_ldt: start=%d num=%d descs=%p\n", 601 uap->start, uap->num, (void *)uap->descs); 602#endif 603 604 mtx_lock_spin(&dt_lock); 605 if ((pldt = td->td_proc->p_md.md_ldt) != NULL) { 606 nldt = pldt->ldt_len; 607 lp = &((union descriptor *)(pldt->ldt_base))[uap->start]; 608 mtx_unlock_spin(&dt_lock); 609 num = min(uap->num, nldt); 610 } else { 611 mtx_unlock_spin(&dt_lock); 612 nldt = sizeof(ldt)/sizeof(ldt[0]); 613 num = min(uap->num, nldt); 614 lp = &ldt[uap->start]; 615 } 616 617 if ((uap->start > (unsigned int)nldt) || 618 ((unsigned int)num > (unsigned int)nldt) || 619 ((unsigned int)(uap->start + num) > (unsigned int)nldt)) 620 return(EINVAL); 621 622 error = copyout(lp, uap->descs, num * sizeof(union descriptor)); 623 if (!error) 624 td->td_retval[0] = num; 625 626 return(error); 627} 628 629int 630i386_set_ldt(td, uap, descs) 631 struct thread *td; 632 struct i386_ldt_args *uap; 633 union descriptor *descs; 634{ 635 int error = 0, i; 636 int largest_ld; 637 struct mdproc *mdp = &td->td_proc->p_md; 638 struct proc_ldt *pldt; 639 union descriptor *dp; 640 641#ifdef DEBUG 642 printf("i386_set_ldt: start=%d num=%d descs=%p\n", 643 uap->start, uap->num, (void *)uap->descs); 644#endif 645 646 if (descs == NULL) { 647 /* Free descriptors */ 648 if (uap->start == 0 && uap->num == 0) { 649 /* 650 * Treat this as a special case, so userland needn't 651 * know magic number NLDT. 652 */ 653 uap->start = NLDT; 654 uap->num = MAX_LD - NLDT; 655 } 656 if (uap->num == 0) 657 return (EINVAL); 658 mtx_lock_spin(&dt_lock); 659 if ((pldt = mdp->md_ldt) == NULL || 660 uap->start >= pldt->ldt_len) { 661 mtx_unlock_spin(&dt_lock); 662 return (0); 663 } 664 largest_ld = uap->start + uap->num; 665 if (largest_ld > pldt->ldt_len) 666 largest_ld = pldt->ldt_len; 667 i = largest_ld - uap->start; 668 bzero(&((union descriptor *)(pldt->ldt_base))[uap->start], 669 sizeof(union descriptor) * i); 670 mtx_unlock_spin(&dt_lock); 671 return (0); 672 } 673 674 if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) { 675 /* verify range of descriptors to modify */ 676 largest_ld = uap->start + uap->num; 677 if (uap->start >= MAX_LD || largest_ld > MAX_LD) { 678 return (EINVAL); 679 } 680 } 681 682 /* Check descriptors for access violations */ 683 for (i = 0; i < uap->num; i++) { 684 dp = &descs[i]; 685 686 switch (dp->sd.sd_type) { 687 case SDT_SYSNULL: /* system null */ 688 dp->sd.sd_p = 0; 689 break; 690 case SDT_SYS286TSS: /* system 286 TSS available */ 691 case SDT_SYSLDT: /* system local descriptor table */ 692 case SDT_SYS286BSY: /* system 286 TSS busy */ 693 case SDT_SYSTASKGT: /* system task gate */ 694 case SDT_SYS286IGT: /* system 286 interrupt gate */ 695 case SDT_SYS286TGT: /* system 286 trap gate */ 696 case SDT_SYSNULL2: /* undefined by Intel */ 697 case SDT_SYS386TSS: /* system 386 TSS available */ 698 case SDT_SYSNULL3: /* undefined by Intel */ 699 case SDT_SYS386BSY: /* system 386 TSS busy */ 700 case SDT_SYSNULL4: /* undefined by Intel */ 701 case SDT_SYS386IGT: /* system 386 interrupt gate */ 702 case SDT_SYS386TGT: /* system 386 trap gate */ 703 case SDT_SYS286CGT: /* system 286 call gate */ 704 case SDT_SYS386CGT: /* system 386 call gate */ 705 /* I can't think of any reason to allow a user proc 706 * to create a segment of these types. They are 707 * for OS use only. 708 */ 709 return (EACCES); 710 /*NOTREACHED*/ 711 712 /* memory segment types */ 713 case SDT_MEMEC: /* memory execute only conforming */ 714 case SDT_MEMEAC: /* memory execute only accessed conforming */ 715 case SDT_MEMERC: /* memory execute read conforming */ 716 case SDT_MEMERAC: /* memory execute read accessed conforming */ 717 /* Must be "present" if executable and conforming. */ 718 if (dp->sd.sd_p == 0) 719 return (EACCES); 720 break; 721 case SDT_MEMRO: /* memory read only */ 722 case SDT_MEMROA: /* memory read only accessed */ 723 case SDT_MEMRW: /* memory read write */ 724 case SDT_MEMRWA: /* memory read write accessed */ 725 case SDT_MEMROD: /* memory read only expand dwn limit */ 726 case SDT_MEMRODA: /* memory read only expand dwn lim accessed */ 727 case SDT_MEMRWD: /* memory read write expand dwn limit */ 728 case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */ 729 case SDT_MEME: /* memory execute only */ 730 case SDT_MEMEA: /* memory execute only accessed */ 731 case SDT_MEMER: /* memory execute read */ 732 case SDT_MEMERA: /* memory execute read accessed */ 733 break; 734 default: 735 return(EINVAL); 736 /*NOTREACHED*/ 737 } 738 739 /* Only user (ring-3) descriptors may be present. */ 740 if ((dp->sd.sd_p != 0) && (dp->sd.sd_dpl != SEL_UPL)) 741 return (EACCES); 742 } 743 744 if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) { 745 /* Allocate a free slot */ 746 mtx_lock_spin(&dt_lock); 747 if ((pldt = mdp->md_ldt) == NULL) { 748 if ((error = i386_ldt_grow(td, NLDT + 1))) { 749 mtx_unlock_spin(&dt_lock); 750 return (error); 751 } 752 pldt = mdp->md_ldt; 753 } 754again: 755 /* 756 * start scanning a bit up to leave room for NVidia and 757 * Wine, which still user the "Blat" method of allocation. 758 */ 759 dp = &((union descriptor *)(pldt->ldt_base))[NLDT]; 760 for (i = NLDT; i < pldt->ldt_len; ++i) { 761 if (dp->sd.sd_type == SDT_SYSNULL) 762 break; 763 dp++; 764 } 765 if (i >= pldt->ldt_len) { 766 if ((error = i386_ldt_grow(td, pldt->ldt_len+1))) { 767 mtx_unlock_spin(&dt_lock); 768 return (error); 769 } 770 goto again; 771 } 772 uap->start = i; 773 error = i386_set_ldt_data(td, i, 1, descs); 774 mtx_unlock_spin(&dt_lock); 775 } else { 776 largest_ld = uap->start + uap->num; 777 mtx_lock_spin(&dt_lock); 778 if (!(error = i386_ldt_grow(td, largest_ld))) { 779 error = i386_set_ldt_data(td, uap->start, uap->num, 780 descs); 781 } 782 mtx_unlock_spin(&dt_lock); 783 } 784 if (error == 0) 785 td->td_retval[0] = uap->start; 786 return (error); 787} 788#ifdef XEN 789static int 790i386_set_ldt_data(struct thread *td, int start, int num, 791 union descriptor *descs) 792{ 793 struct mdproc *mdp = &td->td_proc->p_md; 794 struct proc_ldt *pldt = mdp->md_ldt; 795 796 mtx_assert(&dt_lock, MA_OWNED); 797 798 while (num) { 799 xen_update_descriptor( 800 &((union descriptor *)(pldt->ldt_base))[start], 801 descs); 802 num--; 803 start++; 804 descs++; 805 } 806 return (0); 807} 808#else 809static int 810i386_set_ldt_data(struct thread *td, int start, int num, 811 union descriptor *descs) 812{ 813 struct mdproc *mdp = &td->td_proc->p_md; 814 struct proc_ldt *pldt = mdp->md_ldt; 815 816 mtx_assert(&dt_lock, MA_OWNED); 817 818 /* Fill in range */ 819 bcopy(descs, 820 &((union descriptor *)(pldt->ldt_base))[start], 821 num * sizeof(union descriptor)); 822 return (0); 823} 824#endif /* !XEN */ 825 826static int 827i386_ldt_grow(struct thread *td, int len) 828{ 829 struct mdproc *mdp = &td->td_proc->p_md; 830 struct proc_ldt *new_ldt, *pldt; 831 caddr_t old_ldt_base = NULL_LDT_BASE; 832 int old_ldt_len = 0; 833 834 mtx_assert(&dt_lock, MA_OWNED); 835 836 if (len > MAX_LD) 837 return (ENOMEM); 838 if (len < NLDT + 1) 839 len = NLDT + 1; 840 841 /* Allocate a user ldt. */ 842 if ((pldt = mdp->md_ldt) == NULL || len > pldt->ldt_len) { 843 new_ldt = user_ldt_alloc(mdp, len); 844 if (new_ldt == NULL) 845 return (ENOMEM); 846 pldt = mdp->md_ldt; 847 848 if (pldt != NULL) { 849 if (new_ldt->ldt_len <= pldt->ldt_len) { 850 /* 851 * We just lost the race for allocation, so 852 * free the new object and return. 853 */ 854 mtx_unlock_spin(&dt_lock); 855 kmem_free(kernel_arena, 856 (vm_offset_t)new_ldt->ldt_base, 857 new_ldt->ldt_len * sizeof(union descriptor)); 858 free(new_ldt, M_SUBPROC); 859 mtx_lock_spin(&dt_lock); 860 return (0); 861 } 862 863 /* 864 * We have to substitute the current LDT entry for 865 * curproc with the new one since its size grew. 866 */ 867 old_ldt_base = pldt->ldt_base; 868 old_ldt_len = pldt->ldt_len; 869 pldt->ldt_sd = new_ldt->ldt_sd; 870 pldt->ldt_base = new_ldt->ldt_base; 871 pldt->ldt_len = new_ldt->ldt_len; 872 } else 873 mdp->md_ldt = pldt = new_ldt; 874#ifdef SMP 875 /* 876 * Signal other cpus to reload ldt. We need to unlock dt_lock 877 * here because other CPU will contest on it since their 878 * curthreads won't hold the lock and will block when trying 879 * to acquire it. 880 */ 881 mtx_unlock_spin(&dt_lock); 882 smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv, 883 NULL, td->td_proc->p_vmspace); 884#else 885 set_user_ldt(&td->td_proc->p_md); 886 mtx_unlock_spin(&dt_lock); 887#endif 888 if (old_ldt_base != NULL_LDT_BASE) { 889 kmem_free(kernel_arena, (vm_offset_t)old_ldt_base, 890 old_ldt_len * sizeof(union descriptor)); 891 free(new_ldt, M_SUBPROC); 892 } 893 mtx_lock_spin(&dt_lock); 894 } 895 return (0); 896} 897