sys_machdep.c revision 77486
1/*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	from: @(#)sys_machdep.c	5.5 (Berkeley) 1/19/91
34 * $FreeBSD: head/sys/i386/i386/sys_machdep.c 77486 2001-05-30 14:35:22Z jhb $
35 *
36 */
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/lock.h>
41#include <sys/malloc.h>
42#include <sys/mutex.h>
43#include <sys/proc.h>
44#include <sys/smp.h>
45#include <sys/sysproto.h>
46#include <sys/user.h>
47
48#include <vm/vm.h>
49#include <vm/pmap.h>
50#include <vm/vm_map.h>
51#include <vm/vm_extern.h>
52
53#include <machine/cpu.h>
54#include <machine/pcb_ext.h>	/* pcb.h included by sys/user.h */
55#include <machine/sysarch.h>
56
57#include <vm/vm_kern.h>		/* for kernel_map */
58
59#define MAX_LD 8192
60#define LD_PER_PAGE 512
61#define NEW_MAX_LD(num)  ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1))
62#define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3)
63
64
65
66static int i386_get_ldt	__P((struct proc *, char *));
67static int i386_set_ldt	__P((struct proc *, char *));
68static int i386_get_ioperm	__P((struct proc *, char *));
69static int i386_set_ioperm	__P((struct proc *, char *));
70
71#ifndef _SYS_SYSPROTO_H_
72struct sysarch_args {
73	int op;
74	char *parms;
75};
76#endif
77
78int
79sysarch(p, uap)
80	struct proc *p;
81	register struct sysarch_args *uap;
82{
83	int error = 0;
84
85	switch(uap->op) {
86	case I386_GET_LDT:
87		error = i386_get_ldt(p, uap->parms);
88		break;
89
90	case I386_SET_LDT:
91		error = i386_set_ldt(p, uap->parms);
92		break;
93	case I386_GET_IOPERM:
94		error = i386_get_ioperm(p, uap->parms);
95		break;
96	case I386_SET_IOPERM:
97		error = i386_set_ioperm(p, uap->parms);
98		break;
99	case I386_VM86:
100		error = vm86_sysarch(p, uap->parms);
101		break;
102	default:
103		error = EOPNOTSUPP;
104		break;
105	}
106	return (error);
107}
108
109int
110i386_extend_pcb(struct proc *p)
111{
112	int i, offset;
113	u_long *addr;
114	struct pcb_ext *ext;
115	struct soft_segment_descriptor ssd = {
116		0,			/* segment base address (overwritten) */
117		ctob(IOPAGES + 1) - 1,	/* length */
118		SDT_SYS386TSS,		/* segment type */
119		0,			/* priority level */
120		1,			/* descriptor present */
121		0, 0,
122		0,			/* default 32 size */
123		0			/* granularity */
124	};
125
126	ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1));
127	if (ext == 0)
128		return (ENOMEM);
129	bzero(ext, sizeof(struct pcb_ext));
130	ext->ext_tss.tss_esp0 = (unsigned)p->p_addr + ctob(UPAGES) - 16;
131	ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
132	/*
133	 * The last byte of the i/o map must be followed by an 0xff byte.
134	 * We arbitrarily allocate 16 bytes here, to keep the starting
135	 * address on a doubleword boundary.
136	 */
137	offset = PAGE_SIZE - 16;
138	ext->ext_tss.tss_ioopt =
139	    (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16;
140	ext->ext_iomap = (caddr_t)ext + offset;
141	ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32;
142
143	addr = (u_long *)ext->ext_vm86.vm86_intmap;
144	for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++)
145		*addr++ = ~0;
146
147	ssd.ssd_base = (unsigned)&ext->ext_tss;
148	ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext);
149	ssdtosd(&ssd, &ext->ext_tssd);
150
151	KASSERT(p == curproc, ("giving a TSS to non-curproc"));
152	KASSERT(p->p_addr->u_pcb.pcb_ext == 0, ("already have a TSS!"));
153	mtx_lock_spin(&sched_lock);
154	p->p_addr->u_pcb.pcb_ext = ext;
155
156	/* switch to the new TSS after syscall completes */
157	need_resched(p);
158	mtx_unlock_spin(&sched_lock);
159
160	return 0;
161}
162
163static int
164i386_set_ioperm(p, args)
165	struct proc *p;
166	char *args;
167{
168	int i, error;
169	struct i386_ioperm_args ua;
170	char *iomap;
171
172	if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
173		return (error);
174
175	if ((error = suser(p)) != 0)
176		return (error);
177	if (securelevel > 0)
178		return (EPERM);
179	/*
180	 * XXX
181	 * While this is restricted to root, we should probably figure out
182	 * whether any other driver is using this i/o address, as so not to
183	 * cause confusion.  This probably requires a global 'usage registry'.
184	 */
185
186	if (p->p_addr->u_pcb.pcb_ext == 0)
187		if ((error = i386_extend_pcb(p)) != 0)
188			return (error);
189	iomap = (char *)p->p_addr->u_pcb.pcb_ext->ext_iomap;
190
191	if (ua.start + ua.length > IOPAGES * PAGE_SIZE * NBBY)
192		return (EINVAL);
193
194	for (i = ua.start; i < ua.start + ua.length; i++) {
195		if (ua.enable)
196			iomap[i >> 3] &= ~(1 << (i & 7));
197		else
198			iomap[i >> 3] |= (1 << (i & 7));
199	}
200	return (error);
201}
202
203static int
204i386_get_ioperm(p, args)
205	struct proc *p;
206	char *args;
207{
208	int i, state, error;
209	struct i386_ioperm_args ua;
210	char *iomap;
211
212	if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
213		return (error);
214	if (ua.start >= IOPAGES * PAGE_SIZE * NBBY)
215		return (EINVAL);
216
217	if (p->p_addr->u_pcb.pcb_ext == 0) {
218		ua.length = 0;
219		goto done;
220	}
221
222	iomap = (char *)p->p_addr->u_pcb.pcb_ext->ext_iomap;
223
224	i = ua.start;
225	state = (iomap[i >> 3] >> (i & 7)) & 1;
226	ua.enable = !state;
227	ua.length = 1;
228
229	for (i = ua.start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
230		if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
231			break;
232		ua.length++;
233	}
234
235done:
236	error = copyout(&ua, args, sizeof(struct i386_ioperm_args));
237	return (error);
238}
239
240/*
241 * Update the GDT entry pointing to the LDT to point to the LDT of the
242 * current process.
243 *
244 * This must be called with sched_lock held.  Unfortunately, we can't use a
245 * mtx_assert() here because cpu_switch() calls this function after changing
246 * curproc but before sched_lock's owner is updated in mi_switch().
247 */
248void
249set_user_ldt(struct pcb *pcb)
250{
251	struct pcb_ldt *pcb_ldt;
252
253	pcb_ldt = pcb->pcb_ldt;
254#ifdef SMP
255	gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pcb_ldt->ldt_sd;
256#else
257	gdt[GUSERLDT_SEL].sd = pcb_ldt->ldt_sd;
258#endif
259	lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
260	PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL));
261}
262
263void
264set_user_ldt_rv(struct pcb *pcb)
265{
266
267	if (pcb != PCPU_GET(curpcb))
268		return;
269
270	mtx_lock_spin(&sched_lock);
271	set_user_ldt(pcb);
272	mtx_unlock_spin(&sched_lock);
273}
274
275/*
276 * Must be called with either sched_lock free or held but not recursed.
277 * If it does not return NULL, it will return with it owned.
278 */
279struct pcb_ldt *
280user_ldt_alloc(struct pcb *pcb, int len)
281{
282	struct pcb_ldt *pcb_ldt, *new_ldt;
283
284	if (mtx_owned(&sched_lock))
285		mtx_unlock_spin(&sched_lock);
286	mtx_assert(&sched_lock, MA_NOTOWNED);
287	MALLOC(new_ldt, struct pcb_ldt *, sizeof(struct pcb_ldt),
288		M_SUBPROC, M_WAITOK);
289
290	new_ldt->ldt_len = len = NEW_MAX_LD(len);
291	new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
292		len * sizeof(union descriptor));
293	if (new_ldt->ldt_base == NULL) {
294		FREE(new_ldt, M_SUBPROC);
295		return NULL;
296	}
297	new_ldt->ldt_refcnt = 1;
298	new_ldt->ldt_active = 0;
299
300	mtx_lock_spin(&sched_lock);
301	gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base;
302	gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1;
303	ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd);
304
305	if ((pcb_ldt = pcb->pcb_ldt)) {
306		if (len > pcb_ldt->ldt_len)
307			len = pcb_ldt->ldt_len;
308		bcopy(pcb_ldt->ldt_base, new_ldt->ldt_base,
309			len * sizeof(union descriptor));
310	} else {
311		bcopy(ldt, new_ldt->ldt_base, sizeof(ldt));
312	}
313	return new_ldt;
314}
315
316/*
317 * Must be called either with sched_lock free or held but not recursed.
318 * If pcb->pcb_ldt is not NULL, it will return with sched_lock released.
319 */
320void
321user_ldt_free(struct pcb *pcb)
322{
323	struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
324
325	if (pcb_ldt == NULL)
326		return;
327
328	if (!mtx_owned(&sched_lock))
329		mtx_lock_spin(&sched_lock);
330	mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
331	if (pcb == PCPU_GET(curpcb)) {
332		lldt(_default_ldt);
333		PCPU_SET(currentldt, _default_ldt);
334	}
335
336	pcb->pcb_ldt = NULL;
337	if (--pcb_ldt->ldt_refcnt == 0) {
338		mtx_unlock_spin(&sched_lock);
339		kmem_free(kernel_map, (vm_offset_t)pcb_ldt->ldt_base,
340			pcb_ldt->ldt_len * sizeof(union descriptor));
341		FREE(pcb_ldt, M_SUBPROC);
342	} else
343		mtx_unlock_spin(&sched_lock);
344}
345
346static int
347i386_get_ldt(p, args)
348	struct proc *p;
349	char *args;
350{
351	int error = 0;
352	struct pcb *pcb = &p->p_addr->u_pcb;
353	struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
354	int nldt, num;
355	union descriptor *lp;
356	struct i386_ldt_args ua, *uap = &ua;
357
358	if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
359		return(error);
360
361#ifdef	DEBUG
362	printf("i386_get_ldt: start=%d num=%d descs=%p\n",
363	    uap->start, uap->num, (void *)uap->descs);
364#endif
365
366	/* verify range of LDTs exist */
367	if ((uap->start < 0) || (uap->num <= 0))
368		return(EINVAL);
369
370	if (pcb_ldt) {
371		nldt = pcb_ldt->ldt_len;
372		num = min(uap->num, nldt);
373		lp = &((union descriptor *)(pcb_ldt->ldt_base))[uap->start];
374	} else {
375		nldt = sizeof(ldt)/sizeof(ldt[0]);
376		num = min(uap->num, nldt);
377		lp = &ldt[uap->start];
378	}
379	if (uap->start > nldt)
380		return(EINVAL);
381
382	error = copyout(lp, uap->descs, num * sizeof(union descriptor));
383	if (!error)
384		p->p_retval[0] = num;
385
386	return(error);
387}
388
389static int
390i386_set_ldt(p, args)
391	struct proc *p;
392	char *args;
393{
394	int error = 0, i, n;
395	int largest_ld;
396	struct pcb *pcb = &p->p_addr->u_pcb;
397	struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
398	struct i386_ldt_args ua, *uap = &ua;
399	caddr_t old_ldt_base;
400	int old_ldt_len;
401	critical_t savecrit;
402
403	if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
404		return(error);
405
406#ifdef	DEBUG
407	printf("i386_set_ldt: start=%d num=%d descs=%p\n",
408	    uap->start, uap->num, (void *)uap->descs);
409#endif
410
411	/* verify range of descriptors to modify */
412	if ((uap->start < 0) || (uap->start >= MAX_LD) || (uap->num < 0) ||
413		(uap->num > MAX_LD))
414	{
415		return(EINVAL);
416	}
417	largest_ld = uap->start + uap->num - 1;
418	if (largest_ld >= MAX_LD)
419		return(EINVAL);
420
421	/* allocate user ldt */
422	if (!pcb_ldt || largest_ld >= pcb_ldt->ldt_len) {
423		struct pcb_ldt *new_ldt = user_ldt_alloc(pcb, largest_ld);
424		if (new_ldt == NULL)
425			return ENOMEM;
426		if (pcb_ldt) {
427			old_ldt_base = pcb_ldt->ldt_base;
428			old_ldt_len = pcb_ldt->ldt_len;
429			pcb_ldt->ldt_sd = new_ldt->ldt_sd;
430			pcb_ldt->ldt_base = new_ldt->ldt_base;
431			pcb_ldt->ldt_len = new_ldt->ldt_len;
432			mtx_unlock_spin(&sched_lock);
433			kmem_free(kernel_map, (vm_offset_t)old_ldt_base,
434				old_ldt_len * sizeof(union descriptor));
435			FREE(new_ldt, M_SUBPROC);
436#ifndef SMP
437			mtx_lock_spin(&sched_lock);
438#endif
439		} else {
440			pcb->pcb_ldt = pcb_ldt = new_ldt;
441#ifdef SMP
442			mtx_unlock_spin(&sched_lock);
443#endif
444		}
445#ifdef SMP
446		/* signal other cpus to reload ldt */
447		smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv, NULL, pcb);
448#else
449		set_user_ldt(pcb);
450		mtx_unlock_spin(&sched_lock);
451#endif
452	}
453
454	/* Check descriptors for access violations */
455	for (i = 0, n = uap->start; i < uap->num; i++, n++) {
456		union descriptor desc, *dp;
457		dp = &uap->descs[i];
458		error = copyin(dp, &desc, sizeof(union descriptor));
459		if (error)
460			return(error);
461
462		switch (desc.sd.sd_type) {
463		case SDT_SYSNULL:	/* system null */
464			desc.sd.sd_p = 0;
465			break;
466		case SDT_SYS286TSS: /* system 286 TSS available */
467		case SDT_SYSLDT:    /* system local descriptor table */
468		case SDT_SYS286BSY: /* system 286 TSS busy */
469		case SDT_SYSTASKGT: /* system task gate */
470		case SDT_SYS286IGT: /* system 286 interrupt gate */
471		case SDT_SYS286TGT: /* system 286 trap gate */
472		case SDT_SYSNULL2:  /* undefined by Intel */
473		case SDT_SYS386TSS: /* system 386 TSS available */
474		case SDT_SYSNULL3:  /* undefined by Intel */
475		case SDT_SYS386BSY: /* system 386 TSS busy */
476		case SDT_SYSNULL4:  /* undefined by Intel */
477		case SDT_SYS386IGT: /* system 386 interrupt gate */
478		case SDT_SYS386TGT: /* system 386 trap gate */
479		case SDT_SYS286CGT: /* system 286 call gate */
480		case SDT_SYS386CGT: /* system 386 call gate */
481			/* I can't think of any reason to allow a user proc
482			 * to create a segment of these types.  They are
483			 * for OS use only.
484			 */
485			return EACCES;
486			/*NOTREACHED*/
487
488		/* memory segment types */
489		case SDT_MEMEC:   /* memory execute only conforming */
490		case SDT_MEMEAC:  /* memory execute only accessed conforming */
491		case SDT_MEMERC:  /* memory execute read conforming */
492		case SDT_MEMERAC: /* memory execute read accessed conforming */
493			 /* Must be "present" if executable and conforming. */
494			if (desc.sd.sd_p == 0)
495				return (EACCES);
496			break;
497		case SDT_MEMRO:   /* memory read only */
498		case SDT_MEMROA:  /* memory read only accessed */
499		case SDT_MEMRW:   /* memory read write */
500		case SDT_MEMRWA:  /* memory read write accessed */
501		case SDT_MEMROD:  /* memory read only expand dwn limit */
502		case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
503		case SDT_MEMRWD:  /* memory read write expand dwn limit */
504		case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
505		case SDT_MEME:    /* memory execute only */
506		case SDT_MEMEA:   /* memory execute only accessed */
507		case SDT_MEMER:   /* memory execute read */
508		case SDT_MEMERA:  /* memory execute read accessed */
509			break;
510		default:
511			return(EINVAL);
512			/*NOTREACHED*/
513		}
514
515		/* Only user (ring-3) descriptors may be present. */
516		if ((desc.sd.sd_p != 0) && (desc.sd.sd_dpl != SEL_UPL))
517			return (EACCES);
518	}
519
520	/* Fill in range */
521	savecrit = critical_enter();
522	error = copyin(uap->descs,
523	    &((union descriptor *)(pcb_ldt->ldt_base))[uap->start],
524	    uap->num * sizeof(union descriptor));
525	if (!error)
526		p->p_retval[0] = uap->start;
527	critical_exit(savecrit);
528
529	return(error);
530}
531