14Srgrimes/*- 24Srgrimes * Copyright (c) 1990 The Regents of the University of California. 34Srgrimes * All rights reserved. 44Srgrimes * 54Srgrimes * Redistribution and use in source and binary forms, with or without 64Srgrimes * modification, are permitted provided that the following conditions 74Srgrimes * are met: 84Srgrimes * 1. Redistributions of source code must retain the above copyright 94Srgrimes * notice, this list of conditions and the following disclaimer. 104Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 114Srgrimes * notice, this list of conditions and the following disclaimer in the 124Srgrimes * documentation and/or other materials provided with the distribution. 134Srgrimes * 4. Neither the name of the University nor the names of its contributors 144Srgrimes * may be used to endorse or promote products derived from this software 154Srgrimes * without specific prior written permission. 164Srgrimes * 174Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 184Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 194Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 204Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 214Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 224Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 234Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 244Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 254Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 264Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 274Srgrimes * SUCH DAMAGE. 284Srgrimes * 29620Srgrimes * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91 304Srgrimes */ 314Srgrimes 32115683Sobrien#include <sys/cdefs.h> 33115683Sobrien__FBSDID("$FreeBSD: releng/10.3/sys/i386/i386/sys_machdep.c 292572 2015-12-21 22:16:09Z jhb $"); 34115683Sobrien 35223668Sjonathan#include "opt_capsicum.h" 3683366Sjulian#include "opt_kstack_pages.h" 3782309Speter 382056Swollman#include <sys/param.h> 39280258Srwatson#include <sys/capsicum.h> 402056Swollman#include <sys/systm.h> 4176166Smarkm#include <sys/lock.h> 4254188Sluoqi#include <sys/malloc.h> 4376166Smarkm#include <sys/mutex.h> 44164033Srwatson#include <sys/priv.h> 452056Swollman#include <sys/proc.h> 4676078Sjhb#include <sys/smp.h> 4776166Smarkm#include <sys/sysproto.h> 4812662Sdg 4912662Sdg#include <vm/vm.h> 5012662Sdg#include <vm/pmap.h> 5112662Sdg#include <vm/vm_map.h> 5212662Sdg#include <vm/vm_extern.h> 5312662Sdg 542056Swollman#include <machine/cpu.h> 55138129Sdas#include <machine/pcb.h> 56138129Sdas#include <machine/pcb_ext.h> 5785449Sjhb#include <machine/proc.h> 582056Swollman#include <machine/sysarch.h> 596874Sdg 60162175Srwatson#include <security/audit/audit.h> 61162175Srwatson 62181775Skmacy#ifdef XEN 63181775Skmacy#include <machine/xen/xenfunc.h> 64181775Skmacy 65181775Skmacyvoid i386_reset_ldt(struct proc_ldt *pldt); 66181775Skmacy 67181775Skmacyvoid 68181775Skmacyi386_reset_ldt(struct proc_ldt *pldt) 69181775Skmacy{ 70181775Skmacy xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len); 71181775Skmacy} 72181775Skmacy#else 73181775Skmacy#define i386_reset_ldt(x) 74181775Skmacy#endif 75181775Skmacy 762056Swollman#include <vm/vm_kern.h> /* for kernel_map */ 771051Sdg 7814348Sjkh#define MAX_LD 8192 7914348Sjkh#define LD_PER_PAGE 512 8014348Sjkh#define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1)) 8114348Sjkh#define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3) 82171309Sattilio#define NULL_LDT_BASE ((caddr_t)NULL) 8314348Sjkh 84171295Sattilio#ifdef SMP 85171295Sattiliostatic void set_user_ldt_rv(struct vmspace *vmsp); 86171295Sattilio#endif 87118242Sdavidxustatic int i386_set_ldt_data(struct thread *, int start, int num, 88118242Sdavidxu union descriptor *descs); 89118242Sdavidxustatic int i386_ldt_grow(struct thread *td, int len); 904Srgrimes 91286311Skibvoid 92286311Skibfill_based_sd(struct segment_descriptor *sdp, uint32_t base) 93286311Skib{ 94286311Skib 95286311Skib sdp->sd_lobase = base & 0xffffff; 96286311Skib sdp->sd_hibase = (base >> 24) & 0xff; 97286311Skib#ifdef XEN 98286311Skib /* need to do nosegneg like Linux */ 99286311Skib sdp->sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff; 100286311Skib#else 101286311Skib sdp->sd_lolimit = 0xffff; /* 4GB limit, wraps around */ 102286311Skib#endif 103286311Skib sdp->sd_hilimit = 0xf; 104286311Skib sdp->sd_type = SDT_MEMRWA; 105286311Skib sdp->sd_dpl = SEL_UPL; 106286311Skib sdp->sd_p = 1; 107286311Skib sdp->sd_xx = 0; 108286311Skib sdp->sd_def32 = 1; 109286311Skib sdp->sd_gran = 1; 110286311Skib} 111286311Skib 11212220Sbde#ifndef _SYS_SYSPROTO_H_ 1136874Sdgstruct sysarch_args { 1146874Sdg int op; 1156874Sdg char *parms; 116132Sdg}; 11712220Sbde#endif 118132Sdg 1196874Sdgint 12083366Sjuliansysarch(td, uap) 12183366Sjulian struct thread *td; 1226874Sdg register struct sysarch_args *uap; 1234Srgrimes{ 124114029Sjhb int error; 125140862Ssobomax union descriptor *lp; 126140862Ssobomax union { 127140862Ssobomax struct i386_ldt_args largs; 128140862Ssobomax struct i386_ioperm_args iargs; 129276084Sjhb struct i386_get_xfpustate xfpu; 130140862Ssobomax } kargs; 131145034Speter uint32_t base; 132145034Speter struct segment_descriptor sd, *sdp; 1334Srgrimes 134195104Srwatson AUDIT_ARG_CMD(uap->op); 135219134Srwatson 136223668Sjonathan#ifdef CAPABILITY_MODE 137219134Srwatson /* 138223692Sjonathan * When adding new operations, add a new case statement here to 139223692Sjonathan * explicitly indicate whether or not the operation is safe to 140223692Sjonathan * perform in capability mode. 141219134Srwatson */ 142219134Srwatson if (IN_CAPABILITY_MODE(td)) { 143219134Srwatson switch (uap->op) { 144223692Sjonathan case I386_GET_LDT: 145223692Sjonathan case I386_SET_LDT: 146223692Sjonathan case I386_GET_IOPERM: 147223692Sjonathan case I386_GET_FSBASE: 148223692Sjonathan case I386_SET_FSBASE: 149223692Sjonathan case I386_GET_GSBASE: 150223692Sjonathan case I386_SET_GSBASE: 151276084Sjhb case I386_GET_XFPUSTATE: 152223692Sjonathan break; 153219134Srwatson 154223692Sjonathan case I386_SET_IOPERM: 155223692Sjonathan default: 156226498Sdes#ifdef KTRACE 157226498Sdes if (KTRPOINT(td, KTR_CAPFAIL)) 158255677Spjd ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL); 159226498Sdes#endif 160223692Sjonathan return (ECAPMODE); 161219134Srwatson } 162219134Srwatson } 163219134Srwatson#endif 164219134Srwatson 165140862Ssobomax switch (uap->op) { 166140862Ssobomax case I386_GET_IOPERM: 167140862Ssobomax case I386_SET_IOPERM: 168140862Ssobomax if ((error = copyin(uap->parms, &kargs.iargs, 169140862Ssobomax sizeof(struct i386_ioperm_args))) != 0) 170140862Ssobomax return (error); 171140862Ssobomax break; 172140862Ssobomax case I386_GET_LDT: 173140862Ssobomax case I386_SET_LDT: 174140862Ssobomax if ((error = copyin(uap->parms, &kargs.largs, 175140862Ssobomax sizeof(struct i386_ldt_args))) != 0) 176140862Ssobomax return (error); 177144013Sdas if (kargs.largs.num > MAX_LD || kargs.largs.num <= 0) 178144013Sdas return (EINVAL); 179140862Ssobomax break; 180276084Sjhb case I386_GET_XFPUSTATE: 181276084Sjhb if ((error = copyin(uap->parms, &kargs.xfpu, 182276084Sjhb sizeof(struct i386_get_xfpustate))) != 0) 183276084Sjhb return (error); 184276084Sjhb break; 185140862Ssobomax default: 186140862Ssobomax break; 187140862Ssobomax } 188140862Ssobomax 1896874Sdg switch(uap->op) { 1908876Srgrimes case I386_GET_LDT: 191140862Ssobomax error = i386_get_ldt(td, &kargs.largs); 1924Srgrimes break; 1938876Srgrimes case I386_SET_LDT: 194140862Ssobomax if (kargs.largs.descs != NULL) { 195267714Skib lp = (union descriptor *)malloc( 196253685Sjeff kargs.largs.num * sizeof(union descriptor), 197267714Skib M_TEMP, M_WAITOK); 198140862Ssobomax error = copyin(kargs.largs.descs, lp, 199140862Ssobomax kargs.largs.num * sizeof(union descriptor)); 200140862Ssobomax if (error == 0) 201140862Ssobomax error = i386_set_ldt(td, &kargs.largs, lp); 202267714Skib free(lp, M_TEMP); 203140862Ssobomax } else { 204140862Ssobomax error = i386_set_ldt(td, &kargs.largs, NULL); 205140862Ssobomax } 2064Srgrimes break; 20727993Sdyson case I386_GET_IOPERM: 208140862Ssobomax error = i386_get_ioperm(td, &kargs.iargs); 209140862Ssobomax if (error == 0) 210140862Ssobomax error = copyout(&kargs.iargs, uap->parms, 211140862Ssobomax sizeof(struct i386_ioperm_args)); 21227993Sdyson break; 21327993Sdyson case I386_SET_IOPERM: 214140862Ssobomax error = i386_set_ioperm(td, &kargs.iargs); 21527993Sdyson break; 21627993Sdyson case I386_VM86: 21783366Sjulian error = vm86_sysarch(td, uap->parms); 21828872Sjlemon break; 219145034Speter case I386_GET_FSBASE: 220145034Speter sdp = &td->td_pcb->pcb_fsd; 221145034Speter base = sdp->sd_hibase << 24 | sdp->sd_lobase; 222145034Speter error = copyout(&base, uap->parms, sizeof(base)); 223145034Speter break; 224145034Speter case I386_SET_FSBASE: 225145034Speter error = copyin(uap->parms, &base, sizeof(base)); 226286311Skib if (error == 0) { 227145034Speter /* 228145034Speter * Construct a descriptor and store it in the pcb for 229145034Speter * the next context switch. Also store it in the gdt 230145034Speter * so that the load of tf_fs into %fs will activate it 231145034Speter * at return to userland. 232145034Speter */ 233286311Skib fill_based_sd(&sd, base); 234145274Sdavidxu critical_enter(); 235145034Speter td->td_pcb->pcb_fsd = sd; 236181775Skmacy#ifdef XEN 237181775Skmacy HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[0]), 238181775Skmacy *(uint64_t *)&sd); 239181775Skmacy#else 240145034Speter PCPU_GET(fsgs_gdt)[0] = sd; 241181775Skmacy#endif 242145276Sdavidxu critical_exit(); 243145034Speter td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL); 244145034Speter } 245145034Speter break; 246145034Speter case I386_GET_GSBASE: 247145034Speter sdp = &td->td_pcb->pcb_gsd; 248145034Speter base = sdp->sd_hibase << 24 | sdp->sd_lobase; 249145034Speter error = copyout(&base, uap->parms, sizeof(base)); 250145034Speter break; 251145034Speter case I386_SET_GSBASE: 252145034Speter error = copyin(uap->parms, &base, sizeof(base)); 253286311Skib if (error == 0) { 254145034Speter /* 255145034Speter * Construct a descriptor and store it in the pcb for 256145034Speter * the next context switch. Also store it in the gdt 257145034Speter * because we have to do a load_gs() right now. 258145034Speter */ 259286311Skib fill_based_sd(&sd, base); 260145274Sdavidxu critical_enter(); 261145034Speter td->td_pcb->pcb_gsd = sd; 262181775Skmacy#ifdef XEN 263181775Skmacy HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[1]), 264181775Skmacy *(uint64_t *)&sd); 265181775Skmacy#else 266145034Speter PCPU_GET(fsgs_gdt)[1] = sd; 267181775Skmacy#endif 268145274Sdavidxu critical_exit(); 269145034Speter load_gs(GSEL(GUGS_SEL, SEL_UPL)); 270145034Speter } 271145034Speter break; 272276084Sjhb case I386_GET_XFPUSTATE: 273276084Sjhb if (kargs.xfpu.len > cpu_max_ext_state_size - 274276084Sjhb sizeof(union savefpu)) 275276084Sjhb return (EINVAL); 276276084Sjhb npxgetregs(td); 277276084Sjhb error = copyout((char *)(get_pcb_user_save_td(td) + 1), 278276084Sjhb kargs.xfpu.addr, kargs.xfpu.len); 279276084Sjhb break; 2806874Sdg default: 281114029Sjhb error = EINVAL; 2824Srgrimes break; 2834Srgrimes } 28427993Sdyson return (error); 2854Srgrimes} 2864Srgrimes 28727993Sdysonint 28883366Sjuliani386_extend_pcb(struct thread *td) 28927993Sdyson{ 29027993Sdyson int i, offset; 29127993Sdyson u_long *addr; 29227993Sdyson struct pcb_ext *ext; 29327993Sdyson struct soft_segment_descriptor ssd = { 29427993Sdyson 0, /* segment base address (overwritten) */ 29527993Sdyson ctob(IOPAGES + 1) - 1, /* length */ 29627993Sdyson SDT_SYS386TSS, /* segment type */ 29727993Sdyson 0, /* priority level */ 29827993Sdyson 1, /* descriptor present */ 29927993Sdyson 0, 0, 30027993Sdyson 0, /* default 32 size */ 30127993Sdyson 0 /* granularity */ 30227993Sdyson }; 30327993Sdyson 304254025Sjeff ext = (struct pcb_ext *)kmem_malloc(kernel_arena, ctob(IOPAGES+1), 305267714Skib M_WAITOK | M_ZERO); 30683366Sjulian /* -16 is so we can convert a trapframe into vm86trapframe inplace */ 307292572Sjhb ext->ext_tss.tss_esp0 = (vm_offset_t)td->td_pcb - 16; 30854188Sluoqi ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); 30927993Sdyson /* 31027993Sdyson * The last byte of the i/o map must be followed by an 0xff byte. 31127993Sdyson * We arbitrarily allocate 16 bytes here, to keep the starting 31227993Sdyson * address on a doubleword boundary. 31327993Sdyson */ 31427993Sdyson offset = PAGE_SIZE - 16; 31527993Sdyson ext->ext_tss.tss_ioopt = 31627993Sdyson (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16; 31727993Sdyson ext->ext_iomap = (caddr_t)ext + offset; 31827993Sdyson ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32; 31927993Sdyson 32027993Sdyson addr = (u_long *)ext->ext_vm86.vm86_intmap; 32127993Sdyson for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++) 32227993Sdyson *addr++ = ~0; 32327993Sdyson 32427993Sdyson ssd.ssd_base = (unsigned)&ext->ext_tss; 32527993Sdyson ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext); 32627993Sdyson ssdtosd(&ssd, &ext->ext_tssd); 32776434Sjhb 328150173Sjhb KASSERT(td == curthread, ("giving TSS to !curthread")); 32983366Sjulian KASSERT(td->td_pcb->pcb_ext == 0, ("already have a TSS!")); 330150173Sjhb 331150173Sjhb /* Switch to the new TSS. */ 332153726Sdavidxu critical_enter(); 33383366Sjulian td->td_pcb->pcb_ext = ext; 334153726Sdavidxu PCPU_SET(private_tss, 1); 335150173Sjhb *PCPU_GET(tss_gdt) = ext->ext_tssd; 336150173Sjhb ltr(GSEL(GPROC0_SEL, SEL_KPL)); 337153726Sdavidxu critical_exit(); 33827993Sdyson 33927993Sdyson return 0; 34027993Sdyson} 34127993Sdyson 342140862Ssobomaxint 343140862Ssobomaxi386_set_ioperm(td, uap) 34483366Sjulian struct thread *td; 345140862Ssobomax struct i386_ioperm_args *uap; 34627993Sdyson{ 34733306Sbde int i, error; 34827993Sdyson char *iomap; 34927993Sdyson 350164033Srwatson if ((error = priv_check(td, PRIV_IO)) != 0) 35154188Sluoqi return (error); 35291406Sjhb if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 35383972Srwatson return (error); 35427993Sdyson /* 35527993Sdyson * XXX 35627993Sdyson * While this is restricted to root, we should probably figure out 35727993Sdyson * whether any other driver is using this i/o address, as so not to 35827993Sdyson * cause confusion. This probably requires a global 'usage registry'. 35927993Sdyson */ 36027993Sdyson 36183366Sjulian if (td->td_pcb->pcb_ext == 0) 36283366Sjulian if ((error = i386_extend_pcb(td)) != 0) 36327993Sdyson return (error); 36483366Sjulian iomap = (char *)td->td_pcb->pcb_ext->ext_iomap; 36527993Sdyson 366140862Ssobomax if (uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY) 36727993Sdyson return (EINVAL); 36827993Sdyson 369140862Ssobomax for (i = uap->start; i < uap->start + uap->length; i++) { 370140862Ssobomax if (uap->enable) 37127993Sdyson iomap[i >> 3] &= ~(1 << (i & 7)); 37227993Sdyson else 37327993Sdyson iomap[i >> 3] |= (1 << (i & 7)); 37427993Sdyson } 37527993Sdyson return (error); 37627993Sdyson} 37727993Sdyson 378140862Ssobomaxint 379140862Ssobomaxi386_get_ioperm(td, uap) 38083366Sjulian struct thread *td; 381140862Ssobomax struct i386_ioperm_args *uap; 38227993Sdyson{ 383140862Ssobomax int i, state; 38427993Sdyson char *iomap; 38527993Sdyson 386140862Ssobomax if (uap->start >= IOPAGES * PAGE_SIZE * NBBY) 38733306Sbde return (EINVAL); 38827993Sdyson 38983366Sjulian if (td->td_pcb->pcb_ext == 0) { 390140862Ssobomax uap->length = 0; 39127993Sdyson goto done; 39227993Sdyson } 39327993Sdyson 39483366Sjulian iomap = (char *)td->td_pcb->pcb_ext->ext_iomap; 39527993Sdyson 396140862Ssobomax i = uap->start; 39732012Speter state = (iomap[i >> 3] >> (i & 7)) & 1; 398140862Ssobomax uap->enable = !state; 399140862Ssobomax uap->length = 1; 40027993Sdyson 401140862Ssobomax for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) { 40227993Sdyson if (state != ((iomap[i >> 3] >> (i & 7)) & 1)) 40327993Sdyson break; 404140862Ssobomax uap->length++; 40527993Sdyson } 406140862Ssobomax 40727993Sdysondone: 408140862Ssobomax return (0); 40927993Sdyson} 41027993Sdyson 41114348Sjkh/* 41214348Sjkh * Update the GDT entry pointing to the LDT to point to the LDT of the 413169802Sjeff * current process. Manage dt_lock holding/unholding autonomously. 41414348Sjkh */ 4151051Sdgvoid 41685449Sjhbset_user_ldt(struct mdproc *mdp) 4171051Sdg{ 41885449Sjhb struct proc_ldt *pldt; 419169802Sjeff int dtlocked; 42054188Sluoqi 421169802Sjeff dtlocked = 0; 422169802Sjeff if (!mtx_owned(&dt_lock)) { 423169802Sjeff mtx_lock_spin(&dt_lock); 424169802Sjeff dtlocked = 1; 425169802Sjeff } 426169802Sjeff 42785449Sjhb pldt = mdp->md_ldt; 428181775Skmacy#ifdef XEN 429181775Skmacy i386_reset_ldt(pldt); 430181775Skmacy PCPU_SET(currentldt, (int)pldt); 431181775Skmacy#else 43246129Sluoqi#ifdef SMP 43385449Sjhb gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pldt->ldt_sd; 43446129Sluoqi#else 43585449Sjhb gdt[GUSERLDT_SEL].sd = pldt->ldt_sd; 43646129Sluoqi#endif 4371051Sdg lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); 43865597Sjake PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL)); 439181775Skmacy#endif /* XEN */ 440169802Sjeff if (dtlocked) 441169802Sjeff mtx_unlock_spin(&dt_lock); 44277486Sjhb} 44377486Sjhb 44477502Sjhb#ifdef SMP 44577502Sjhbstatic void 446171295Sattilioset_user_ldt_rv(struct vmspace *vmsp) 44777486Sjhb{ 448171295Sattilio struct thread *td; 44977486Sjhb 450171295Sattilio td = curthread; 451171295Sattilio if (vmsp != td->td_proc->p_vmspace) 45277486Sjhb return; 45377486Sjhb 45485449Sjhb set_user_ldt(&td->td_proc->p_md); 4551051Sdg} 45677502Sjhb#endif 4571051Sdg 458181775Skmacy#ifdef XEN 459181775Skmacy 460181775Skmacy/* 461181775Skmacy * dt_lock must be held. Returns with dt_lock held. 462181775Skmacy */ 463181775Skmacystruct proc_ldt * 464181775Skmacyuser_ldt_alloc(struct mdproc *mdp, int len) 465181775Skmacy{ 466181775Skmacy struct proc_ldt *pldt, *new_ldt; 467181775Skmacy 468181775Skmacy mtx_assert(&dt_lock, MA_OWNED); 469181775Skmacy mtx_unlock_spin(&dt_lock); 470184205Sdes new_ldt = malloc(sizeof(struct proc_ldt), 471181775Skmacy M_SUBPROC, M_WAITOK); 472181775Skmacy 473181775Skmacy new_ldt->ldt_len = len = NEW_MAX_LD(len); 474254025Sjeff new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena, 475267714Skib round_page(len * sizeof(union descriptor)), M_WAITOK); 476181775Skmacy new_ldt->ldt_refcnt = 1; 477181775Skmacy new_ldt->ldt_active = 0; 478181775Skmacy 479216845Scperciva mtx_lock_spin(&dt_lock); 480181775Skmacy if ((pldt = mdp->md_ldt)) { 481181775Skmacy if (len > pldt->ldt_len) 482181775Skmacy len = pldt->ldt_len; 483181775Skmacy bcopy(pldt->ldt_base, new_ldt->ldt_base, 484181775Skmacy len * sizeof(union descriptor)); 485181775Skmacy } else { 486181775Skmacy bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE); 487181775Skmacy } 488216845Scperciva mtx_unlock_spin(&dt_lock); /* XXX kill once pmap locking fixed. */ 489181775Skmacy pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base, 490181775Skmacy new_ldt->ldt_len*sizeof(union descriptor)); 491216845Scperciva mtx_lock_spin(&dt_lock); /* XXX kill once pmap locking fixed. */ 492182960Skib return (new_ldt); 493181775Skmacy} 494181775Skmacy#else 49576434Sjhb/* 496169802Sjeff * dt_lock must be held. Returns with dt_lock held. 49776434Sjhb */ 49885449Sjhbstruct proc_ldt * 49985449Sjhbuser_ldt_alloc(struct mdproc *mdp, int len) 50054188Sluoqi{ 50185449Sjhb struct proc_ldt *pldt, *new_ldt; 50254188Sluoqi 503169802Sjeff mtx_assert(&dt_lock, MA_OWNED); 504169802Sjeff mtx_unlock_spin(&dt_lock); 505184205Sdes new_ldt = malloc(sizeof(struct proc_ldt), 506111119Simp M_SUBPROC, M_WAITOK); 50754188Sluoqi 50854188Sluoqi new_ldt->ldt_len = len = NEW_MAX_LD(len); 509254025Sjeff new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena, 510267714Skib len * sizeof(union descriptor), M_WAITOK); 511171295Sattilio new_ldt->ldt_refcnt = 1; 51254188Sluoqi new_ldt->ldt_active = 0; 51354188Sluoqi 514169802Sjeff mtx_lock_spin(&dt_lock); 51554188Sluoqi gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base; 51654188Sluoqi gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1; 51754188Sluoqi ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd); 51854188Sluoqi 519169802Sjeff if ((pldt = mdp->md_ldt) != NULL) { 52085449Sjhb if (len > pldt->ldt_len) 52185449Sjhb len = pldt->ldt_len; 52285449Sjhb bcopy(pldt->ldt_base, new_ldt->ldt_base, 52385449Sjhb len * sizeof(union descriptor)); 524169802Sjeff } else 52554188Sluoqi bcopy(ldt, new_ldt->ldt_base, sizeof(ldt)); 526169802Sjeff 527169802Sjeff return (new_ldt); 52854188Sluoqi} 529181775Skmacy#endif /* !XEN */ 53054188Sluoqi 53176434Sjhb/* 532170110Sattilio * Must be called with dt_lock held. Returns with dt_lock unheld. 53376434Sjhb */ 53454188Sluoqivoid 53585449Sjhbuser_ldt_free(struct thread *td) 53654188Sluoqi{ 53785449Sjhb struct mdproc *mdp = &td->td_proc->p_md; 538169802Sjeff struct proc_ldt *pldt; 53954188Sluoqi 540169802Sjeff mtx_assert(&dt_lock, MA_OWNED); 541182960Skib if ((pldt = mdp->md_ldt) == NULL) { 542182960Skib mtx_unlock_spin(&dt_lock); 54354188Sluoqi return; 544182960Skib } 54554188Sluoqi 546228962Sjhb if (td == curthread) { 547216847Scperciva#ifdef XEN 548216847Scperciva i386_reset_ldt(&default_proc_ldt); 549216847Scperciva PCPU_SET(currentldt, (int)&default_proc_ldt); 550216847Scperciva#else 55154188Sluoqi lldt(_default_ldt); 55265597Sjake PCPU_SET(currentldt, _default_ldt); 553216847Scperciva#endif 55454188Sluoqi } 55554188Sluoqi 55685449Sjhb mdp->md_ldt = NULL; 557182961Skib user_ldt_deref(pldt); 558182961Skib} 559182961Skib 560182961Skibvoid 561182961Skibuser_ldt_deref(struct proc_ldt *pldt) 562182961Skib{ 563182961Skib 564182961Skib mtx_assert(&dt_lock, MA_OWNED); 565171295Sattilio if (--pldt->ldt_refcnt == 0) { 566171295Sattilio mtx_unlock_spin(&dt_lock); 567254025Sjeff kmem_free(kernel_arena, (vm_offset_t)pldt->ldt_base, 56885449Sjhb pldt->ldt_len * sizeof(union descriptor)); 569184205Sdes free(pldt, M_SUBPROC); 570171295Sattilio } else 571171295Sattilio mtx_unlock_spin(&dt_lock); 57254188Sluoqi} 57354188Sluoqi 574140862Ssobomax/* 575140862Ssobomax * Note for the authors of compat layers (linux, etc): copyout() in 576140862Ssobomax * the function below is not a problem since it presents data in 577140862Ssobomax * arch-specific format (i.e. i386-specific in this case), not in 578140862Ssobomax * the OS-specific one. 579140862Ssobomax */ 580140862Ssobomaxint 581140862Ssobomaxi386_get_ldt(td, uap) 58283366Sjulian struct thread *td; 583140862Ssobomax struct i386_ldt_args *uap; 5841051Sdg{ 5851051Sdg int error = 0; 586169802Sjeff struct proc_ldt *pldt; 5871051Sdg int nldt, num; 5881051Sdg union descriptor *lp; 5891051Sdg 5901051Sdg#ifdef DEBUG 59138505Sbde printf("i386_get_ldt: start=%d num=%d descs=%p\n", 59250816Sluoqi uap->start, uap->num, (void *)uap->descs); 5931051Sdg#endif 5941051Sdg 595169802Sjeff mtx_lock_spin(&dt_lock); 596169802Sjeff if ((pldt = td->td_proc->p_md.md_ldt) != NULL) { 59785449Sjhb nldt = pldt->ldt_len; 598169802Sjeff lp = &((union descriptor *)(pldt->ldt_base))[uap->start]; 599169802Sjeff mtx_unlock_spin(&dt_lock); 6001051Sdg num = min(uap->num, nldt); 6011051Sdg } else { 602169802Sjeff mtx_unlock_spin(&dt_lock); 6031051Sdg nldt = sizeof(ldt)/sizeof(ldt[0]); 6041051Sdg num = min(uap->num, nldt); 6051051Sdg lp = &ldt[uap->start]; 6061051Sdg } 607145950Scperciva 608145950Scperciva if ((uap->start > (unsigned int)nldt) || 609145950Scperciva ((unsigned int)num > (unsigned int)nldt) || 610145950Scperciva ((unsigned int)(uap->start + num) > (unsigned int)nldt)) 6111051Sdg return(EINVAL); 6121051Sdg 61350816Sluoqi error = copyout(lp, uap->descs, num * sizeof(union descriptor)); 6141051Sdg if (!error) 61583366Sjulian td->td_retval[0] = num; 6161051Sdg 6171051Sdg return(error); 6181051Sdg} 6191051Sdg 620140862Ssobomaxint 621140862Ssobomaxi386_set_ldt(td, uap, descs) 62283366Sjulian struct thread *td; 623140862Ssobomax struct i386_ldt_args *uap; 624140862Ssobomax union descriptor *descs; 6251051Sdg{ 626118242Sdavidxu int error = 0, i; 62754188Sluoqi int largest_ld; 62885449Sjhb struct mdproc *mdp = &td->td_proc->p_md; 629147558Sjhb struct proc_ldt *pldt; 630140862Ssobomax union descriptor *dp; 6311051Sdg 6321051Sdg#ifdef DEBUG 63338505Sbde printf("i386_set_ldt: start=%d num=%d descs=%p\n", 634118253Sjulian uap->start, uap->num, (void *)uap->descs); 6351051Sdg#endif 6361051Sdg 637140862Ssobomax if (descs == NULL) { 638118242Sdavidxu /* Free descriptors */ 639118242Sdavidxu if (uap->start == 0 && uap->num == 0) { 640118242Sdavidxu /* 641118242Sdavidxu * Treat this as a special case, so userland needn't 642118242Sdavidxu * know magic number NLDT. 643139450Sjhb */ 644118242Sdavidxu uap->start = NLDT; 645118242Sdavidxu uap->num = MAX_LD - NLDT; 646118242Sdavidxu } 647217543Sjhb if (uap->num == 0) 648118242Sdavidxu return (EINVAL); 649169802Sjeff mtx_lock_spin(&dt_lock); 650169802Sjeff if ((pldt = mdp->md_ldt) == NULL || 651169802Sjeff uap->start >= pldt->ldt_len) { 652169802Sjeff mtx_unlock_spin(&dt_lock); 653118242Sdavidxu return (0); 65476434Sjhb } 655118242Sdavidxu largest_ld = uap->start + uap->num; 656118242Sdavidxu if (largest_ld > pldt->ldt_len) 657118242Sdavidxu largest_ld = pldt->ldt_len; 658118242Sdavidxu i = largest_ld - uap->start; 659118242Sdavidxu bzero(&((union descriptor *)(pldt->ldt_base))[uap->start], 660118242Sdavidxu sizeof(union descriptor) * i); 661169802Sjeff mtx_unlock_spin(&dt_lock); 662118242Sdavidxu return (0); 66354188Sluoqi } 66454188Sluoqi 665118440Sjulian if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) { 666118242Sdavidxu /* verify range of descriptors to modify */ 667118242Sdavidxu largest_ld = uap->start + uap->num; 668217543Sjhb if (uap->start >= MAX_LD || largest_ld > MAX_LD) { 669118242Sdavidxu return (EINVAL); 670118242Sdavidxu } 671118242Sdavidxu } 672118242Sdavidxu 6731051Sdg /* Check descriptors for access violations */ 674118242Sdavidxu for (i = 0; i < uap->num; i++) { 675104118Speter dp = &descs[i]; 6761051Sdg 677104118Speter switch (dp->sd.sd_type) { 67854188Sluoqi case SDT_SYSNULL: /* system null */ 679104118Speter dp->sd.sd_p = 0; 6801051Sdg break; 68154188Sluoqi case SDT_SYS286TSS: /* system 286 TSS available */ 68254188Sluoqi case SDT_SYSLDT: /* system local descriptor table */ 68354188Sluoqi case SDT_SYS286BSY: /* system 286 TSS busy */ 68454188Sluoqi case SDT_SYSTASKGT: /* system task gate */ 68554188Sluoqi case SDT_SYS286IGT: /* system 286 interrupt gate */ 68654188Sluoqi case SDT_SYS286TGT: /* system 286 trap gate */ 68754188Sluoqi case SDT_SYSNULL2: /* undefined by Intel */ 68854188Sluoqi case SDT_SYS386TSS: /* system 386 TSS available */ 68954188Sluoqi case SDT_SYSNULL3: /* undefined by Intel */ 69054188Sluoqi case SDT_SYS386BSY: /* system 386 TSS busy */ 69154188Sluoqi case SDT_SYSNULL4: /* undefined by Intel */ 69254188Sluoqi case SDT_SYS386IGT: /* system 386 interrupt gate */ 69354188Sluoqi case SDT_SYS386TGT: /* system 386 trap gate */ 69454188Sluoqi case SDT_SYS286CGT: /* system 286 call gate */ 69554188Sluoqi case SDT_SYS386CGT: /* system 386 call gate */ 69654188Sluoqi /* I can't think of any reason to allow a user proc 69754188Sluoqi * to create a segment of these types. They are 69854188Sluoqi * for OS use only. 69954188Sluoqi */ 700118242Sdavidxu return (EACCES); 70176434Sjhb /*NOTREACHED*/ 70254188Sluoqi 70354188Sluoqi /* memory segment types */ 70454188Sluoqi case SDT_MEMEC: /* memory execute only conforming */ 70554188Sluoqi case SDT_MEMEAC: /* memory execute only accessed conforming */ 70654188Sluoqi case SDT_MEMERC: /* memory execute read conforming */ 70754188Sluoqi case SDT_MEMERAC: /* memory execute read accessed conforming */ 70854188Sluoqi /* Must be "present" if executable and conforming. */ 709140862Ssobomax if (dp->sd.sd_p == 0) 71076434Sjhb return (EACCES); 71154188Sluoqi break; 71254188Sluoqi case SDT_MEMRO: /* memory read only */ 71354188Sluoqi case SDT_MEMROA: /* memory read only accessed */ 71454188Sluoqi case SDT_MEMRW: /* memory read write */ 71554188Sluoqi case SDT_MEMRWA: /* memory read write accessed */ 71654188Sluoqi case SDT_MEMROD: /* memory read only expand dwn limit */ 71754188Sluoqi case SDT_MEMRODA: /* memory read only expand dwn lim accessed */ 71854188Sluoqi case SDT_MEMRWD: /* memory read write expand dwn limit */ 71954188Sluoqi case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */ 72054188Sluoqi case SDT_MEME: /* memory execute only */ 72154188Sluoqi case SDT_MEMEA: /* memory execute only accessed */ 72254188Sluoqi case SDT_MEMER: /* memory execute read */ 72354188Sluoqi case SDT_MEMERA: /* memory execute read accessed */ 72454188Sluoqi break; 7251051Sdg default: 72614348Sjkh return(EINVAL); 7271051Sdg /*NOTREACHED*/ 7281051Sdg } 72954188Sluoqi 73054188Sluoqi /* Only user (ring-3) descriptors may be present. */ 731140862Ssobomax if ((dp->sd.sd_p != 0) && (dp->sd.sd_dpl != SEL_UPL)) 73254188Sluoqi return (EACCES); 7331051Sdg } 7341051Sdg 735118440Sjulian if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) { 736118242Sdavidxu /* Allocate a free slot */ 737169802Sjeff mtx_lock_spin(&dt_lock); 738169802Sjeff if ((pldt = mdp->md_ldt) == NULL) { 739169802Sjeff if ((error = i386_ldt_grow(td, NLDT + 1))) { 740169802Sjeff mtx_unlock_spin(&dt_lock); 741118242Sdavidxu return (error); 742169802Sjeff } 743118242Sdavidxu pldt = mdp->md_ldt; 744118242Sdavidxu } 745118242Sdavidxuagain: 746118345Sjulian /* 747118345Sjulian * start scanning a bit up to leave room for NVidia and 748118345Sjulian * Wine, which still user the "Blat" method of allocation. 749118345Sjulian */ 750118440Sjulian dp = &((union descriptor *)(pldt->ldt_base))[NLDT]; 751118360Sjulian for (i = NLDT; i < pldt->ldt_len; ++i) { 752118242Sdavidxu if (dp->sd.sd_type == SDT_SYSNULL) 753118242Sdavidxu break; 754118242Sdavidxu dp++; 755118242Sdavidxu } 756118242Sdavidxu if (i >= pldt->ldt_len) { 757169802Sjeff if ((error = i386_ldt_grow(td, pldt->ldt_len+1))) { 758169802Sjeff mtx_unlock_spin(&dt_lock); 759118242Sdavidxu return (error); 760169802Sjeff } 761118242Sdavidxu goto again; 762118242Sdavidxu } 763118242Sdavidxu uap->start = i; 764118242Sdavidxu error = i386_set_ldt_data(td, i, 1, descs); 765169802Sjeff mtx_unlock_spin(&dt_lock); 766118242Sdavidxu } else { 767118242Sdavidxu largest_ld = uap->start + uap->num; 768169802Sjeff mtx_lock_spin(&dt_lock); 769169802Sjeff if (!(error = i386_ldt_grow(td, largest_ld))) { 770118242Sdavidxu error = i386_set_ldt_data(td, uap->start, uap->num, 771118242Sdavidxu descs); 772118242Sdavidxu } 773169802Sjeff mtx_unlock_spin(&dt_lock); 774118242Sdavidxu } 775118242Sdavidxu if (error == 0) 776118242Sdavidxu td->td_retval[0] = uap->start; 777118242Sdavidxu return (error); 778118242Sdavidxu} 779181775Skmacy#ifdef XEN 780181775Skmacystatic int 781181775Skmacyi386_set_ldt_data(struct thread *td, int start, int num, 782181775Skmacy union descriptor *descs) 783181775Skmacy{ 784181775Skmacy struct mdproc *mdp = &td->td_proc->p_md; 785181775Skmacy struct proc_ldt *pldt = mdp->md_ldt; 786118242Sdavidxu 787181775Skmacy mtx_assert(&dt_lock, MA_OWNED); 788181775Skmacy 789216846Scperciva while (num) { 790216846Scperciva xen_update_descriptor( 791216846Scperciva &((union descriptor *)(pldt->ldt_base))[start], 792216846Scperciva descs); 793216846Scperciva num--; 794216846Scperciva start++; 795216846Scperciva descs++; 796216846Scperciva } 797181775Skmacy return (0); 798181775Skmacy} 799181775Skmacy#else 800118242Sdavidxustatic int 801118242Sdavidxui386_set_ldt_data(struct thread *td, int start, int num, 802118242Sdavidxu union descriptor *descs) 803118242Sdavidxu{ 804118242Sdavidxu struct mdproc *mdp = &td->td_proc->p_md; 805118242Sdavidxu struct proc_ldt *pldt = mdp->md_ldt; 806118242Sdavidxu 807169802Sjeff mtx_assert(&dt_lock, MA_OWNED); 808118242Sdavidxu 8091051Sdg /* Fill in range */ 810118242Sdavidxu bcopy(descs, 811118242Sdavidxu &((union descriptor *)(pldt->ldt_base))[start], 812118242Sdavidxu num * sizeof(union descriptor)); 813104118Speter return (0); 8141051Sdg} 815181775Skmacy#endif /* !XEN */ 816118242Sdavidxu 817118242Sdavidxustatic int 818118242Sdavidxui386_ldt_grow(struct thread *td, int len) 819118242Sdavidxu{ 820118242Sdavidxu struct mdproc *mdp = &td->td_proc->p_md; 821171295Sattilio struct proc_ldt *new_ldt, *pldt; 822171295Sattilio caddr_t old_ldt_base = NULL_LDT_BASE; 823171295Sattilio int old_ldt_len = 0; 824118242Sdavidxu 825169802Sjeff mtx_assert(&dt_lock, MA_OWNED); 826169802Sjeff 827118242Sdavidxu if (len > MAX_LD) 828118242Sdavidxu return (ENOMEM); 829139450Sjhb if (len < NLDT + 1) 830139450Sjhb len = NLDT + 1; 831147558Sjhb 832147558Sjhb /* Allocate a user ldt. */ 833170110Sattilio if ((pldt = mdp->md_ldt) == NULL || len > pldt->ldt_len) { 834147558Sjhb new_ldt = user_ldt_alloc(mdp, len); 835118242Sdavidxu if (new_ldt == NULL) 836118242Sdavidxu return (ENOMEM); 837118242Sdavidxu pldt = mdp->md_ldt; 838147558Sjhb 839169802Sjeff if (pldt != NULL) { 840171295Sattilio if (new_ldt->ldt_len <= pldt->ldt_len) { 841118242Sdavidxu /* 842171295Sattilio * We just lost the race for allocation, so 843171295Sattilio * free the new object and return. 844118242Sdavidxu */ 845170110Sattilio mtx_unlock_spin(&dt_lock); 846254025Sjeff kmem_free(kernel_arena, 847118242Sdavidxu (vm_offset_t)new_ldt->ldt_base, 848118242Sdavidxu new_ldt->ldt_len * sizeof(union descriptor)); 849184205Sdes free(new_ldt, M_SUBPROC); 850170110Sattilio mtx_lock_spin(&dt_lock); 851118242Sdavidxu return (0); 852118242Sdavidxu } 853171295Sattilio 854171295Sattilio /* 855171295Sattilio * We have to substitute the current LDT entry for 856171295Sattilio * curproc with the new one since its size grew. 857171295Sattilio */ 858171295Sattilio old_ldt_base = pldt->ldt_base; 859171295Sattilio old_ldt_len = pldt->ldt_len; 860171295Sattilio pldt->ldt_sd = new_ldt->ldt_sd; 861171295Sattilio pldt->ldt_base = new_ldt->ldt_base; 862171295Sattilio pldt->ldt_len = new_ldt->ldt_len; 863169802Sjeff } else 864118242Sdavidxu mdp->md_ldt = pldt = new_ldt; 865118242Sdavidxu#ifdef SMP 866170110Sattilio /* 867170110Sattilio * Signal other cpus to reload ldt. We need to unlock dt_lock 868170110Sattilio * here because other CPU will contest on it since their 869170110Sattilio * curthreads won't hold the lock and will block when trying 870170110Sattilio * to acquire it. 871170110Sattilio */ 872170110Sattilio mtx_unlock_spin(&dt_lock); 873118242Sdavidxu smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv, 874171295Sattilio NULL, td->td_proc->p_vmspace); 875118242Sdavidxu#else 876171309Sattilio set_user_ldt(&td->td_proc->p_md); 877171295Sattilio mtx_unlock_spin(&dt_lock); 878118242Sdavidxu#endif 879171295Sattilio if (old_ldt_base != NULL_LDT_BASE) { 880254025Sjeff kmem_free(kernel_arena, (vm_offset_t)old_ldt_base, 881171295Sattilio old_ldt_len * sizeof(union descriptor)); 882184205Sdes free(new_ldt, M_SUBPROC); 883171295Sattilio } 884171295Sattilio mtx_lock_spin(&dt_lock); 885118242Sdavidxu } 886118242Sdavidxu return (0); 887118242Sdavidxu} 888