sys_machdep.c revision 77486
1/*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91 34 * $FreeBSD: head/sys/i386/i386/sys_machdep.c 77486 2001-05-30 14:35:22Z jhb $ 35 * 36 */ 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/lock.h> 41#include <sys/malloc.h> 42#include <sys/mutex.h> 43#include <sys/proc.h> 44#include <sys/smp.h> 45#include <sys/sysproto.h> 46#include <sys/user.h> 47 48#include <vm/vm.h> 49#include <vm/pmap.h> 50#include <vm/vm_map.h> 51#include <vm/vm_extern.h> 52 53#include <machine/cpu.h> 54#include <machine/pcb_ext.h> /* pcb.h included by sys/user.h */ 55#include <machine/sysarch.h> 56 57#include <vm/vm_kern.h> /* for kernel_map */ 58 59#define MAX_LD 8192 60#define LD_PER_PAGE 512 61#define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1)) 62#define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3) 63 64 65 66static int i386_get_ldt __P((struct proc *, char *)); 67static int i386_set_ldt __P((struct proc *, char *)); 68static int i386_get_ioperm __P((struct proc *, char *)); 69static int i386_set_ioperm __P((struct proc *, char *)); 70 71#ifndef _SYS_SYSPROTO_H_ 72struct sysarch_args { 73 int op; 74 char *parms; 75}; 76#endif 77 78int 79sysarch(p, uap) 80 struct proc *p; 81 register struct sysarch_args *uap; 82{ 83 int error = 0; 84 85 switch(uap->op) { 86 case I386_GET_LDT: 87 error = i386_get_ldt(p, uap->parms); 88 break; 89 90 case I386_SET_LDT: 91 error = i386_set_ldt(p, uap->parms); 92 break; 93 case I386_GET_IOPERM: 94 error = i386_get_ioperm(p, uap->parms); 95 break; 96 case I386_SET_IOPERM: 97 error = i386_set_ioperm(p, uap->parms); 98 break; 99 case I386_VM86: 100 error = vm86_sysarch(p, uap->parms); 101 break; 102 default: 103 error = EOPNOTSUPP; 104 break; 105 } 106 return (error); 107} 108 109int 110i386_extend_pcb(struct proc *p) 111{ 112 int i, offset; 113 u_long *addr; 114 struct pcb_ext *ext; 115 struct soft_segment_descriptor ssd = { 116 0, /* segment base address (overwritten) */ 117 ctob(IOPAGES + 1) - 1, /* length */ 118 SDT_SYS386TSS, /* segment type */ 119 0, /* priority level */ 120 1, /* descriptor present */ 121 0, 0, 122 0, /* default 32 size */ 123 0 /* granularity */ 124 }; 125 126 ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1)); 127 if (ext == 0) 128 return (ENOMEM); 129 bzero(ext, sizeof(struct pcb_ext)); 130 ext->ext_tss.tss_esp0 = (unsigned)p->p_addr + ctob(UPAGES) - 16; 131 ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); 132 /* 133 * The last byte of the i/o map must be followed by an 0xff byte. 134 * We arbitrarily allocate 16 bytes here, to keep the starting 135 * address on a doubleword boundary. 136 */ 137 offset = PAGE_SIZE - 16; 138 ext->ext_tss.tss_ioopt = 139 (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16; 140 ext->ext_iomap = (caddr_t)ext + offset; 141 ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32; 142 143 addr = (u_long *)ext->ext_vm86.vm86_intmap; 144 for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++) 145 *addr++ = ~0; 146 147 ssd.ssd_base = (unsigned)&ext->ext_tss; 148 ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext); 149 ssdtosd(&ssd, &ext->ext_tssd); 150 151 KASSERT(p == curproc, ("giving a TSS to non-curproc")); 152 KASSERT(p->p_addr->u_pcb.pcb_ext == 0, ("already have a TSS!")); 153 mtx_lock_spin(&sched_lock); 154 p->p_addr->u_pcb.pcb_ext = ext; 155 156 /* switch to the new TSS after syscall completes */ 157 need_resched(p); 158 mtx_unlock_spin(&sched_lock); 159 160 return 0; 161} 162 163static int 164i386_set_ioperm(p, args) 165 struct proc *p; 166 char *args; 167{ 168 int i, error; 169 struct i386_ioperm_args ua; 170 char *iomap; 171 172 if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0) 173 return (error); 174 175 if ((error = suser(p)) != 0) 176 return (error); 177 if (securelevel > 0) 178 return (EPERM); 179 /* 180 * XXX 181 * While this is restricted to root, we should probably figure out 182 * whether any other driver is using this i/o address, as so not to 183 * cause confusion. This probably requires a global 'usage registry'. 184 */ 185 186 if (p->p_addr->u_pcb.pcb_ext == 0) 187 if ((error = i386_extend_pcb(p)) != 0) 188 return (error); 189 iomap = (char *)p->p_addr->u_pcb.pcb_ext->ext_iomap; 190 191 if (ua.start + ua.length > IOPAGES * PAGE_SIZE * NBBY) 192 return (EINVAL); 193 194 for (i = ua.start; i < ua.start + ua.length; i++) { 195 if (ua.enable) 196 iomap[i >> 3] &= ~(1 << (i & 7)); 197 else 198 iomap[i >> 3] |= (1 << (i & 7)); 199 } 200 return (error); 201} 202 203static int 204i386_get_ioperm(p, args) 205 struct proc *p; 206 char *args; 207{ 208 int i, state, error; 209 struct i386_ioperm_args ua; 210 char *iomap; 211 212 if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0) 213 return (error); 214 if (ua.start >= IOPAGES * PAGE_SIZE * NBBY) 215 return (EINVAL); 216 217 if (p->p_addr->u_pcb.pcb_ext == 0) { 218 ua.length = 0; 219 goto done; 220 } 221 222 iomap = (char *)p->p_addr->u_pcb.pcb_ext->ext_iomap; 223 224 i = ua.start; 225 state = (iomap[i >> 3] >> (i & 7)) & 1; 226 ua.enable = !state; 227 ua.length = 1; 228 229 for (i = ua.start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) { 230 if (state != ((iomap[i >> 3] >> (i & 7)) & 1)) 231 break; 232 ua.length++; 233 } 234 235done: 236 error = copyout(&ua, args, sizeof(struct i386_ioperm_args)); 237 return (error); 238} 239 240/* 241 * Update the GDT entry pointing to the LDT to point to the LDT of the 242 * current process. 243 * 244 * This must be called with sched_lock held. Unfortunately, we can't use a 245 * mtx_assert() here because cpu_switch() calls this function after changing 246 * curproc but before sched_lock's owner is updated in mi_switch(). 247 */ 248void 249set_user_ldt(struct pcb *pcb) 250{ 251 struct pcb_ldt *pcb_ldt; 252 253 pcb_ldt = pcb->pcb_ldt; 254#ifdef SMP 255 gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pcb_ldt->ldt_sd; 256#else 257 gdt[GUSERLDT_SEL].sd = pcb_ldt->ldt_sd; 258#endif 259 lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); 260 PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL)); 261} 262 263void 264set_user_ldt_rv(struct pcb *pcb) 265{ 266 267 if (pcb != PCPU_GET(curpcb)) 268 return; 269 270 mtx_lock_spin(&sched_lock); 271 set_user_ldt(pcb); 272 mtx_unlock_spin(&sched_lock); 273} 274 275/* 276 * Must be called with either sched_lock free or held but not recursed. 277 * If it does not return NULL, it will return with it owned. 278 */ 279struct pcb_ldt * 280user_ldt_alloc(struct pcb *pcb, int len) 281{ 282 struct pcb_ldt *pcb_ldt, *new_ldt; 283 284 if (mtx_owned(&sched_lock)) 285 mtx_unlock_spin(&sched_lock); 286 mtx_assert(&sched_lock, MA_NOTOWNED); 287 MALLOC(new_ldt, struct pcb_ldt *, sizeof(struct pcb_ldt), 288 M_SUBPROC, M_WAITOK); 289 290 new_ldt->ldt_len = len = NEW_MAX_LD(len); 291 new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map, 292 len * sizeof(union descriptor)); 293 if (new_ldt->ldt_base == NULL) { 294 FREE(new_ldt, M_SUBPROC); 295 return NULL; 296 } 297 new_ldt->ldt_refcnt = 1; 298 new_ldt->ldt_active = 0; 299 300 mtx_lock_spin(&sched_lock); 301 gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base; 302 gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1; 303 ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd); 304 305 if ((pcb_ldt = pcb->pcb_ldt)) { 306 if (len > pcb_ldt->ldt_len) 307 len = pcb_ldt->ldt_len; 308 bcopy(pcb_ldt->ldt_base, new_ldt->ldt_base, 309 len * sizeof(union descriptor)); 310 } else { 311 bcopy(ldt, new_ldt->ldt_base, sizeof(ldt)); 312 } 313 return new_ldt; 314} 315 316/* 317 * Must be called either with sched_lock free or held but not recursed. 318 * If pcb->pcb_ldt is not NULL, it will return with sched_lock released. 319 */ 320void 321user_ldt_free(struct pcb *pcb) 322{ 323 struct pcb_ldt *pcb_ldt = pcb->pcb_ldt; 324 325 if (pcb_ldt == NULL) 326 return; 327 328 if (!mtx_owned(&sched_lock)) 329 mtx_lock_spin(&sched_lock); 330 mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); 331 if (pcb == PCPU_GET(curpcb)) { 332 lldt(_default_ldt); 333 PCPU_SET(currentldt, _default_ldt); 334 } 335 336 pcb->pcb_ldt = NULL; 337 if (--pcb_ldt->ldt_refcnt == 0) { 338 mtx_unlock_spin(&sched_lock); 339 kmem_free(kernel_map, (vm_offset_t)pcb_ldt->ldt_base, 340 pcb_ldt->ldt_len * sizeof(union descriptor)); 341 FREE(pcb_ldt, M_SUBPROC); 342 } else 343 mtx_unlock_spin(&sched_lock); 344} 345 346static int 347i386_get_ldt(p, args) 348 struct proc *p; 349 char *args; 350{ 351 int error = 0; 352 struct pcb *pcb = &p->p_addr->u_pcb; 353 struct pcb_ldt *pcb_ldt = pcb->pcb_ldt; 354 int nldt, num; 355 union descriptor *lp; 356 struct i386_ldt_args ua, *uap = &ua; 357 358 if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0) 359 return(error); 360 361#ifdef DEBUG 362 printf("i386_get_ldt: start=%d num=%d descs=%p\n", 363 uap->start, uap->num, (void *)uap->descs); 364#endif 365 366 /* verify range of LDTs exist */ 367 if ((uap->start < 0) || (uap->num <= 0)) 368 return(EINVAL); 369 370 if (pcb_ldt) { 371 nldt = pcb_ldt->ldt_len; 372 num = min(uap->num, nldt); 373 lp = &((union descriptor *)(pcb_ldt->ldt_base))[uap->start]; 374 } else { 375 nldt = sizeof(ldt)/sizeof(ldt[0]); 376 num = min(uap->num, nldt); 377 lp = &ldt[uap->start]; 378 } 379 if (uap->start > nldt) 380 return(EINVAL); 381 382 error = copyout(lp, uap->descs, num * sizeof(union descriptor)); 383 if (!error) 384 p->p_retval[0] = num; 385 386 return(error); 387} 388 389static int 390i386_set_ldt(p, args) 391 struct proc *p; 392 char *args; 393{ 394 int error = 0, i, n; 395 int largest_ld; 396 struct pcb *pcb = &p->p_addr->u_pcb; 397 struct pcb_ldt *pcb_ldt = pcb->pcb_ldt; 398 struct i386_ldt_args ua, *uap = &ua; 399 caddr_t old_ldt_base; 400 int old_ldt_len; 401 critical_t savecrit; 402 403 if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0) 404 return(error); 405 406#ifdef DEBUG 407 printf("i386_set_ldt: start=%d num=%d descs=%p\n", 408 uap->start, uap->num, (void *)uap->descs); 409#endif 410 411 /* verify range of descriptors to modify */ 412 if ((uap->start < 0) || (uap->start >= MAX_LD) || (uap->num < 0) || 413 (uap->num > MAX_LD)) 414 { 415 return(EINVAL); 416 } 417 largest_ld = uap->start + uap->num - 1; 418 if (largest_ld >= MAX_LD) 419 return(EINVAL); 420 421 /* allocate user ldt */ 422 if (!pcb_ldt || largest_ld >= pcb_ldt->ldt_len) { 423 struct pcb_ldt *new_ldt = user_ldt_alloc(pcb, largest_ld); 424 if (new_ldt == NULL) 425 return ENOMEM; 426 if (pcb_ldt) { 427 old_ldt_base = pcb_ldt->ldt_base; 428 old_ldt_len = pcb_ldt->ldt_len; 429 pcb_ldt->ldt_sd = new_ldt->ldt_sd; 430 pcb_ldt->ldt_base = new_ldt->ldt_base; 431 pcb_ldt->ldt_len = new_ldt->ldt_len; 432 mtx_unlock_spin(&sched_lock); 433 kmem_free(kernel_map, (vm_offset_t)old_ldt_base, 434 old_ldt_len * sizeof(union descriptor)); 435 FREE(new_ldt, M_SUBPROC); 436#ifndef SMP 437 mtx_lock_spin(&sched_lock); 438#endif 439 } else { 440 pcb->pcb_ldt = pcb_ldt = new_ldt; 441#ifdef SMP 442 mtx_unlock_spin(&sched_lock); 443#endif 444 } 445#ifdef SMP 446 /* signal other cpus to reload ldt */ 447 smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv, NULL, pcb); 448#else 449 set_user_ldt(pcb); 450 mtx_unlock_spin(&sched_lock); 451#endif 452 } 453 454 /* Check descriptors for access violations */ 455 for (i = 0, n = uap->start; i < uap->num; i++, n++) { 456 union descriptor desc, *dp; 457 dp = &uap->descs[i]; 458 error = copyin(dp, &desc, sizeof(union descriptor)); 459 if (error) 460 return(error); 461 462 switch (desc.sd.sd_type) { 463 case SDT_SYSNULL: /* system null */ 464 desc.sd.sd_p = 0; 465 break; 466 case SDT_SYS286TSS: /* system 286 TSS available */ 467 case SDT_SYSLDT: /* system local descriptor table */ 468 case SDT_SYS286BSY: /* system 286 TSS busy */ 469 case SDT_SYSTASKGT: /* system task gate */ 470 case SDT_SYS286IGT: /* system 286 interrupt gate */ 471 case SDT_SYS286TGT: /* system 286 trap gate */ 472 case SDT_SYSNULL2: /* undefined by Intel */ 473 case SDT_SYS386TSS: /* system 386 TSS available */ 474 case SDT_SYSNULL3: /* undefined by Intel */ 475 case SDT_SYS386BSY: /* system 386 TSS busy */ 476 case SDT_SYSNULL4: /* undefined by Intel */ 477 case SDT_SYS386IGT: /* system 386 interrupt gate */ 478 case SDT_SYS386TGT: /* system 386 trap gate */ 479 case SDT_SYS286CGT: /* system 286 call gate */ 480 case SDT_SYS386CGT: /* system 386 call gate */ 481 /* I can't think of any reason to allow a user proc 482 * to create a segment of these types. They are 483 * for OS use only. 484 */ 485 return EACCES; 486 /*NOTREACHED*/ 487 488 /* memory segment types */ 489 case SDT_MEMEC: /* memory execute only conforming */ 490 case SDT_MEMEAC: /* memory execute only accessed conforming */ 491 case SDT_MEMERC: /* memory execute read conforming */ 492 case SDT_MEMERAC: /* memory execute read accessed conforming */ 493 /* Must be "present" if executable and conforming. */ 494 if (desc.sd.sd_p == 0) 495 return (EACCES); 496 break; 497 case SDT_MEMRO: /* memory read only */ 498 case SDT_MEMROA: /* memory read only accessed */ 499 case SDT_MEMRW: /* memory read write */ 500 case SDT_MEMRWA: /* memory read write accessed */ 501 case SDT_MEMROD: /* memory read only expand dwn limit */ 502 case SDT_MEMRODA: /* memory read only expand dwn lim accessed */ 503 case SDT_MEMRWD: /* memory read write expand dwn limit */ 504 case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */ 505 case SDT_MEME: /* memory execute only */ 506 case SDT_MEMEA: /* memory execute only accessed */ 507 case SDT_MEMER: /* memory execute read */ 508 case SDT_MEMERA: /* memory execute read accessed */ 509 break; 510 default: 511 return(EINVAL); 512 /*NOTREACHED*/ 513 } 514 515 /* Only user (ring-3) descriptors may be present. */ 516 if ((desc.sd.sd_p != 0) && (desc.sd.sd_dpl != SEL_UPL)) 517 return (EACCES); 518 } 519 520 /* Fill in range */ 521 savecrit = critical_enter(); 522 error = copyin(uap->descs, 523 &((union descriptor *)(pcb_ldt->ldt_base))[uap->start], 524 uap->num * sizeof(union descriptor)); 525 if (!error) 526 p->p_retval[0] = uap->start; 527 critical_exit(savecrit); 528 529 return(error); 530} 531