sys_machdep.c revision 296953
190075Sobrien/*-
290075Sobrien * Copyright (c) 2003 Peter Wemm.
390075Sobrien * Copyright (c) 1990 The Regents of the University of California.
490075Sobrien * All rights reserved.
590075Sobrien *
690075Sobrien * Redistribution and use in source and binary forms, with or without
790075Sobrien * modification, are permitted provided that the following conditions
8132718Skan * are met:
990075Sobrien * 1. Redistributions of source code must retain the above copyright
10132718Skan *    notice, this list of conditions and the following disclaimer.
11132718Skan * 2. Redistributions in binary form must reproduce the above copyright
12132718Skan *    notice, this list of conditions and the following disclaimer in the
13132718Skan *    documentation and/or other materials provided with the distribution.
1490075Sobrien * 4. Neither the name of the University nor the names of its contributors
15132718Skan *    may be used to endorse or promote products derived from this software
16132718Skan *    without specific prior written permission.
17132718Skan *
18132718Skan * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
1990075Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20132718Skan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21132718Skan * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22132718Skan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23132718Skan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2490075Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2590075Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2690075Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2790075Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2890075Sobrien * SUCH DAMAGE.
2990075Sobrien *
3090075Sobrien *	from: @(#)sys_machdep.c	5.5 (Berkeley) 1/19/91
3190075Sobrien */
3290075Sobrien
3390075Sobrien#include <sys/cdefs.h>
3490075Sobrien__FBSDID("$FreeBSD: releng/9.3/sys/amd64/amd64/sys_machdep.c 296953 2016-03-16 22:30:03Z glebius $");
3590075Sobrien
3690075Sobrien#include "opt_capsicum.h"
3790075Sobrien
3890075Sobrien#include <sys/param.h>
39117395Skan#include <sys/systm.h>
40117395Skan#include <sys/capability.h>
4190075Sobrien#include <sys/kernel.h>
4290075Sobrien#include <sys/lock.h>
4390075Sobrien#include <sys/malloc.h>
4490075Sobrien#include <sys/mutex.h>
4590075Sobrien#include <sys/priv.h>
4690075Sobrien#include <sys/proc.h>
47117395Skan#include <sys/sysproto.h>
48117395Skan#include <sys/uio.h>
49132718Skan
50117395Skan#include <vm/vm.h>
51117395Skan#include <vm/pmap.h>
5290075Sobrien#include <vm/vm_kern.h>		/* for kernel_map */
5390075Sobrien#include <vm/vm_extern.h>
5490075Sobrien
55117395Skan#include <machine/frame.h>
56117395Skan#include <machine/md_var.h>
57117395Skan#include <machine/pcb.h>
5890075Sobrien#include <machine/specialreg.h>
59117395Skan#include <machine/sysarch.h>
6090075Sobrien#include <machine/tss.h>
6190075Sobrien#include <machine/vmparam.h>
6290075Sobrien
6390075Sobrien#include <security/audit/audit.h>
6490075Sobrien
6590075Sobrien#define	MAX_LD		8192
6690075Sobrien
67117395Skanint max_ldt_segment = 1024;
6890075SobrienSYSCTL_INT(_machdep, OID_AUTO, max_ldt_segment, CTLFLAG_RDTUN,
6990075Sobrien    &max_ldt_segment, 0,
7090075Sobrien    "Maximum number of allowed LDT segments in the single address space");
71117395Skan
72117395Skanstatic void
73117395Skanmax_ldt_segment_init(void *arg __unused)
74117395Skan{
75117395Skan
76117395Skan	TUNABLE_INT_FETCH("machdep.max_ldt_segment", &max_ldt_segment);
77117395Skan	if (max_ldt_segment <= 0)
78117395Skan		max_ldt_segment = 1;
7990075Sobrien	if (max_ldt_segment > MAX_LD)
8090075Sobrien		max_ldt_segment = MAX_LD;
8190075Sobrien}
8290075SobrienSYSINIT(maxldt, SI_SUB_VM_CONF, SI_ORDER_ANY, max_ldt_segment_init, NULL);
8390075Sobrien
8490075Sobrien#ifdef notyet
8590075Sobrien#ifdef SMP
8690075Sobrienstatic void set_user_ldt_rv(struct vmspace *vmsp);
8790075Sobrien#endif
88117395Skan#endif
8990075Sobrienstatic void user_ldt_derefl(struct proc_ldt *pldt);
9090075Sobrien
9190075Sobrien#ifndef _SYS_SYSPROTO_H_
9290075Sobrienstruct sysarch_args {
9390075Sobrien	int op;
9490075Sobrien	char *parms;
9596263Sobrien};
9696263Sobrien#endif
9796263Sobrien
9890075Sobrienint
9990075Sobriensysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space)
100117395Skan{
10190075Sobrien	struct i386_ldt_args *largs, la;
10290075Sobrien	struct user_segment_descriptor *lp;
10390075Sobrien	int error = 0;
10490075Sobrien
10590075Sobrien	/*
10690075Sobrien	 * XXXKIB check that the BSM generation code knows to encode
10790075Sobrien	 * the op argument.
10890075Sobrien	 */
109132718Skan	AUDIT_ARG_CMD(uap->op);
11090075Sobrien	if (uap_space == UIO_USERSPACE) {
11190075Sobrien		error = copyin(uap->parms, &la, sizeof(struct i386_ldt_args));
11290075Sobrien		if (error != 0)
11390075Sobrien			return (error);
11490075Sobrien		largs = &la;
11590075Sobrien	} else
11690075Sobrien		largs = (struct i386_ldt_args *)uap->parms;
117132718Skan
118132718Skan	switch (uap->op) {
11990075Sobrien	case I386_GET_LDT:
12090075Sobrien		error = amd64_get_ldt(td, largs);
12190075Sobrien		break;
12290075Sobrien	case I386_SET_LDT:
12390075Sobrien		if (largs->descs != NULL && largs->num > max_ldt_segment)
12490075Sobrien			return (EINVAL);
12590075Sobrien		set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
12690075Sobrien		if (largs->descs != NULL) {
12790075Sobrien			lp = malloc(largs->num * sizeof(struct
12890075Sobrien			    user_segment_descriptor), M_TEMP, M_WAITOK);
12990075Sobrien			error = copyin(largs->descs, lp, largs->num *
13090075Sobrien			    sizeof(struct user_segment_descriptor));
13190075Sobrien			if (error == 0)
13290075Sobrien				error = amd64_set_ldt(td, largs, lp);
13390075Sobrien			free(lp, M_TEMP);
13490075Sobrien		} else {
13590075Sobrien			error = amd64_set_ldt(td, largs, NULL);
13690075Sobrien		}
13790075Sobrien		break;
13890075Sobrien	}
13990075Sobrien	return (error);
14090075Sobrien}
14190075Sobrien
14290075Sobrienvoid
14390075Sobrienupdate_gdt_gsbase(struct thread *td, uint32_t base)
14490075Sobrien{
14590075Sobrien	struct user_segment_descriptor *sd;
14690075Sobrien
14790075Sobrien	if (td != curthread)
148		return;
149	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
150	critical_enter();
151	sd = PCPU_GET(gs32p);
152	sd->sd_lobase = base & 0xffffff;
153	sd->sd_hibase = (base >> 24) & 0xff;
154	critical_exit();
155}
156
157void
158update_gdt_fsbase(struct thread *td, uint32_t base)
159{
160	struct user_segment_descriptor *sd;
161
162	if (td != curthread)
163		return;
164	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
165	critical_enter();
166	sd = PCPU_GET(fs32p);
167	sd->sd_lobase = base & 0xffffff;
168	sd->sd_hibase = (base >> 24) & 0xff;
169	critical_exit();
170}
171
172int
173sysarch(td, uap)
174	struct thread *td;
175	register struct sysarch_args *uap;
176{
177	int error = 0;
178	struct pcb *pcb = curthread->td_pcb;
179	uint32_t i386base;
180	uint64_t a64base;
181	struct i386_ioperm_args iargs;
182	struct i386_get_xfpustate i386xfpu;
183	struct amd64_get_xfpustate a64xfpu;
184
185#ifdef CAPABILITY_MODE
186	/*
187	 * When adding new operations, add a new case statement here to
188	 * explicitly indicate whether or not the operation is safe to
189	 * perform in capability mode.
190	 */
191	if (IN_CAPABILITY_MODE(td)) {
192		switch (uap->op) {
193		case I386_GET_LDT:
194		case I386_SET_LDT:
195		case I386_GET_IOPERM:
196		case I386_GET_FSBASE:
197		case I386_SET_FSBASE:
198		case I386_GET_GSBASE:
199		case I386_SET_GSBASE:
200		case I386_GET_XFPUSTATE:
201		case AMD64_GET_FSBASE:
202		case AMD64_SET_FSBASE:
203		case AMD64_GET_GSBASE:
204		case AMD64_SET_GSBASE:
205		case AMD64_GET_XFPUSTATE:
206			break;
207
208		case I386_SET_IOPERM:
209		default:
210			return (ECAPMODE);
211		}
212	}
213#endif
214
215	if (uap->op == I386_GET_LDT || uap->op == I386_SET_LDT)
216		return (sysarch_ldt(td, uap, UIO_USERSPACE));
217	/*
218	 * XXXKIB check that the BSM generation code knows to encode
219	 * the op argument.
220	 */
221	AUDIT_ARG_CMD(uap->op);
222	switch (uap->op) {
223	case I386_GET_IOPERM:
224	case I386_SET_IOPERM:
225		if ((error = copyin(uap->parms, &iargs,
226		    sizeof(struct i386_ioperm_args))) != 0)
227			return (error);
228		break;
229	case I386_GET_XFPUSTATE:
230		if ((error = copyin(uap->parms, &i386xfpu,
231		    sizeof(struct i386_get_xfpustate))) != 0)
232			return (error);
233		a64xfpu.addr = (void *)(uintptr_t)i386xfpu.addr;
234		a64xfpu.len = i386xfpu.len;
235		break;
236	case AMD64_GET_XFPUSTATE:
237		if ((error = copyin(uap->parms, &a64xfpu,
238		    sizeof(struct amd64_get_xfpustate))) != 0)
239			return (error);
240		break;
241	default:
242		break;
243	}
244
245	switch (uap->op) {
246	case I386_GET_IOPERM:
247		error = amd64_get_ioperm(td, &iargs);
248		if (error == 0)
249			error = copyout(&iargs, uap->parms,
250			    sizeof(struct i386_ioperm_args));
251		break;
252	case I386_SET_IOPERM:
253		error = amd64_set_ioperm(td, &iargs);
254		break;
255	case I386_GET_FSBASE:
256		i386base = pcb->pcb_fsbase;
257		error = copyout(&i386base, uap->parms, sizeof(i386base));
258		break;
259	case I386_SET_FSBASE:
260		error = copyin(uap->parms, &i386base, sizeof(i386base));
261		if (!error) {
262			pcb->pcb_fsbase = i386base;
263			td->td_frame->tf_fs = _ufssel;
264			update_gdt_fsbase(td, i386base);
265		}
266		break;
267	case I386_GET_GSBASE:
268		i386base = pcb->pcb_gsbase;
269		error = copyout(&i386base, uap->parms, sizeof(i386base));
270		break;
271	case I386_SET_GSBASE:
272		error = copyin(uap->parms, &i386base, sizeof(i386base));
273		if (!error) {
274			pcb->pcb_gsbase = i386base;
275			td->td_frame->tf_gs = _ugssel;
276			update_gdt_gsbase(td, i386base);
277		}
278		break;
279	case AMD64_GET_FSBASE:
280		error = copyout(&pcb->pcb_fsbase, uap->parms, sizeof(pcb->pcb_fsbase));
281		break;
282
283	case AMD64_SET_FSBASE:
284		error = copyin(uap->parms, &a64base, sizeof(a64base));
285		if (!error) {
286			if (a64base < VM_MAXUSER_ADDRESS) {
287				pcb->pcb_fsbase = a64base;
288				set_pcb_flags(pcb, PCB_FULL_IRET);
289				td->td_frame->tf_fs = _ufssel;
290			} else
291				error = EINVAL;
292		}
293		break;
294
295	case AMD64_GET_GSBASE:
296		error = copyout(&pcb->pcb_gsbase, uap->parms, sizeof(pcb->pcb_gsbase));
297		break;
298
299	case AMD64_SET_GSBASE:
300		error = copyin(uap->parms, &a64base, sizeof(a64base));
301		if (!error) {
302			if (a64base < VM_MAXUSER_ADDRESS) {
303				pcb->pcb_gsbase = a64base;
304				set_pcb_flags(pcb, PCB_FULL_IRET);
305				td->td_frame->tf_gs = _ugssel;
306			} else
307				error = EINVAL;
308		}
309		break;
310
311	case I386_GET_XFPUSTATE:
312	case AMD64_GET_XFPUSTATE:
313		if (a64xfpu.len > cpu_max_ext_state_size -
314		    sizeof(struct savefpu))
315			return (EINVAL);
316		fpugetregs(td);
317		error = copyout((char *)(get_pcb_user_save_td(td) + 1),
318		    a64xfpu.addr, a64xfpu.len);
319		return (error);
320
321	default:
322		error = EINVAL;
323		break;
324	}
325	return (error);
326}
327
328int
329amd64_set_ioperm(td, uap)
330	struct thread *td;
331	struct i386_ioperm_args *uap;
332{
333	int i, error;
334	char *iomap;
335	struct amd64tss *tssp;
336	struct system_segment_descriptor *tss_sd;
337	u_long *addr;
338	struct pcb *pcb;
339
340	if ((error = priv_check(td, PRIV_IO)) != 0)
341		return (error);
342	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
343		return (error);
344	if (uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY)
345		return (EINVAL);
346
347	/*
348	 * XXX
349	 * While this is restricted to root, we should probably figure out
350	 * whether any other driver is using this i/o address, as so not to
351	 * cause confusion.  This probably requires a global 'usage registry'.
352	 */
353	pcb = td->td_pcb;
354	if (pcb->pcb_tssp == NULL) {
355		tssp = (struct amd64tss *)kmem_alloc(kernel_map,
356		    ctob(IOPAGES+1));
357		if (tssp == NULL)
358			return (ENOMEM);
359		iomap = (char *)&tssp[1];
360		addr = (u_long *)iomap;
361		for (i = 0; i < (ctob(IOPAGES) + 1) / sizeof(u_long); i++)
362			*addr++ = ~0;
363		critical_enter();
364		/* Takes care of tss_rsp0. */
365		memcpy(tssp, &common_tss[PCPU_GET(cpuid)],
366		    sizeof(struct amd64tss));
367		tssp->tss_iobase = sizeof(*tssp);
368		pcb->pcb_tssp = tssp;
369		tss_sd = PCPU_GET(tss);
370		tss_sd->sd_lobase = (u_long)tssp & 0xffffff;
371		tss_sd->sd_hibase = ((u_long)tssp >> 24) & 0xfffffffffful;
372		tss_sd->sd_type = SDT_SYSTSS;
373		ltr(GSEL(GPROC0_SEL, SEL_KPL));
374		PCPU_SET(tssp, tssp);
375		critical_exit();
376	} else
377		iomap = (char *)&pcb->pcb_tssp[1];
378	for (i = uap->start; i < uap->start + uap->length; i++) {
379		if (uap->enable)
380			iomap[i >> 3] &= ~(1 << (i & 7));
381		else
382			iomap[i >> 3] |= (1 << (i & 7));
383	}
384	return (error);
385}
386
387int
388amd64_get_ioperm(td, uap)
389	struct thread *td;
390	struct i386_ioperm_args *uap;
391{
392	int i, state;
393	char *iomap;
394
395	if (uap->start >= IOPAGES * PAGE_SIZE * NBBY)
396		return (EINVAL);
397	if (td->td_pcb->pcb_tssp == NULL) {
398		uap->length = 0;
399		goto done;
400	}
401
402	iomap = (char *)&td->td_pcb->pcb_tssp[1];
403
404	i = uap->start;
405	state = (iomap[i >> 3] >> (i & 7)) & 1;
406	uap->enable = !state;
407	uap->length = 1;
408
409	for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
410		if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
411			break;
412		uap->length++;
413	}
414
415done:
416	return (0);
417}
418
419/*
420 * Update the GDT entry pointing to the LDT to point to the LDT of the
421 * current process.
422 */
423void
424set_user_ldt(struct mdproc *mdp)
425{
426
427	critical_enter();
428	*PCPU_GET(ldt) = mdp->md_ldt_sd;
429	lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
430	critical_exit();
431}
432
433#ifdef notyet
434#ifdef SMP
435static void
436set_user_ldt_rv(struct vmspace *vmsp)
437{
438	struct thread *td;
439
440	td = curthread;
441	if (vmsp != td->td_proc->p_vmspace)
442		return;
443
444	set_user_ldt(&td->td_proc->p_md);
445}
446#endif
447#endif
448
449struct proc_ldt *
450user_ldt_alloc(struct proc *p, int force)
451{
452	struct proc_ldt *pldt, *new_ldt;
453	struct mdproc *mdp;
454	struct soft_segment_descriptor sldt;
455
456	mtx_assert(&dt_lock, MA_OWNED);
457	mdp = &p->p_md;
458	if (!force && mdp->md_ldt != NULL)
459		return (mdp->md_ldt);
460	mtx_unlock(&dt_lock);
461	new_ldt = malloc(sizeof(struct proc_ldt), M_SUBPROC, M_WAITOK);
462	new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
463	     max_ldt_segment * sizeof(struct user_segment_descriptor));
464	if (new_ldt->ldt_base == NULL) {
465		FREE(new_ldt, M_SUBPROC);
466		mtx_lock(&dt_lock);
467		return (NULL);
468	}
469	new_ldt->ldt_refcnt = 1;
470	sldt.ssd_base = (uint64_t)new_ldt->ldt_base;
471	sldt.ssd_limit = max_ldt_segment *
472	    sizeof(struct user_segment_descriptor) - 1;
473	sldt.ssd_type = SDT_SYSLDT;
474	sldt.ssd_dpl = SEL_KPL;
475	sldt.ssd_p = 1;
476	sldt.ssd_long = 0;
477	sldt.ssd_def32 = 0;
478	sldt.ssd_gran = 0;
479	mtx_lock(&dt_lock);
480	pldt = mdp->md_ldt;
481	if (pldt != NULL && !force) {
482		kmem_free(kernel_map, (vm_offset_t)new_ldt->ldt_base,
483		    max_ldt_segment * sizeof(struct user_segment_descriptor));
484		free(new_ldt, M_SUBPROC);
485		return (pldt);
486	}
487
488	if (pldt != NULL) {
489		bcopy(pldt->ldt_base, new_ldt->ldt_base, max_ldt_segment *
490		    sizeof(struct user_segment_descriptor));
491		user_ldt_derefl(pldt);
492	}
493	ssdtosyssd(&sldt, &p->p_md.md_ldt_sd);
494	atomic_store_rel_ptr((volatile uintptr_t *)&mdp->md_ldt,
495	    (uintptr_t)new_ldt);
496	if (p == curproc)
497		set_user_ldt(mdp);
498
499	return (mdp->md_ldt);
500}
501
502void
503user_ldt_free(struct thread *td)
504{
505	struct proc *p = td->td_proc;
506	struct mdproc *mdp = &p->p_md;
507	struct proc_ldt *pldt;
508
509	mtx_assert(&dt_lock, MA_OWNED);
510	if ((pldt = mdp->md_ldt) == NULL) {
511		mtx_unlock(&dt_lock);
512		return;
513	}
514
515	mdp->md_ldt = NULL;
516	bzero(&mdp->md_ldt_sd, sizeof(mdp->md_ldt_sd));
517	if (td == curthread)
518		lldt(GSEL(GNULL_SEL, SEL_KPL));
519	user_ldt_deref(pldt);
520}
521
522static void
523user_ldt_derefl(struct proc_ldt *pldt)
524{
525
526	if (--pldt->ldt_refcnt == 0) {
527		kmem_free(kernel_map, (vm_offset_t)pldt->ldt_base,
528		    max_ldt_segment * sizeof(struct user_segment_descriptor));
529		free(pldt, M_SUBPROC);
530	}
531}
532
533void
534user_ldt_deref(struct proc_ldt *pldt)
535{
536
537	mtx_assert(&dt_lock, MA_OWNED);
538	user_ldt_derefl(pldt);
539	mtx_unlock(&dt_lock);
540}
541
542/*
543 * Note for the authors of compat layers (linux, etc): copyout() in
544 * the function below is not a problem since it presents data in
545 * arch-specific format (i.e. i386-specific in this case), not in
546 * the OS-specific one.
547 */
548int
549amd64_get_ldt(td, uap)
550	struct thread *td;
551	struct i386_ldt_args *uap;
552{
553	int error = 0;
554	struct proc_ldt *pldt;
555	int num;
556	struct user_segment_descriptor *lp;
557
558#ifdef	DEBUG
559	printf("amd64_get_ldt: start=%d num=%d descs=%p\n",
560	    uap->start, uap->num, (void *)uap->descs);
561#endif
562
563	if ((pldt = td->td_proc->p_md.md_ldt) != NULL) {
564		lp = &((struct user_segment_descriptor *)(pldt->ldt_base))
565		    [uap->start];
566		num = min(uap->num, max_ldt_segment);
567	} else
568		return (EINVAL);
569
570	if ((uap->start > (unsigned int)max_ldt_segment) ||
571	    ((unsigned int)num > (unsigned int)max_ldt_segment) ||
572	    ((unsigned int)(uap->start + num) > (unsigned int)max_ldt_segment))
573		return(EINVAL);
574
575	error = copyout(lp, uap->descs, num *
576	    sizeof(struct user_segment_descriptor));
577	if (!error)
578		td->td_retval[0] = num;
579
580	return(error);
581}
582
583int
584amd64_set_ldt(td, uap, descs)
585	struct thread *td;
586	struct i386_ldt_args *uap;
587	struct user_segment_descriptor *descs;
588{
589	int error = 0;
590	unsigned int largest_ld, i;
591	struct mdproc *mdp = &td->td_proc->p_md;
592	struct proc_ldt *pldt;
593	struct user_segment_descriptor *dp;
594	struct proc *p;
595
596#ifdef	DEBUG
597	printf("amd64_set_ldt: start=%d num=%d descs=%p\n",
598	    uap->start, uap->num, (void *)uap->descs);
599#endif
600
601	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
602	p = td->td_proc;
603	if (descs == NULL) {
604		/* Free descriptors */
605		if (uap->start == 0 && uap->num == 0)
606			uap->num = max_ldt_segment;
607		if (uap->num == 0)
608			return (EINVAL);
609		if ((pldt = mdp->md_ldt) == NULL ||
610		    uap->start >= max_ldt_segment)
611			return (0);
612		largest_ld = uap->start + uap->num;
613		if (largest_ld > max_ldt_segment)
614			largest_ld = max_ldt_segment;
615		i = largest_ld - uap->start;
616		mtx_lock(&dt_lock);
617		bzero(&((struct user_segment_descriptor *)(pldt->ldt_base))
618		    [uap->start], sizeof(struct user_segment_descriptor) * i);
619		mtx_unlock(&dt_lock);
620		return (0);
621	}
622
623	if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) {
624		/* verify range of descriptors to modify */
625		largest_ld = uap->start + uap->num;
626		if (uap->start >= max_ldt_segment ||
627		    largest_ld > max_ldt_segment)
628			return (EINVAL);
629	}
630
631	/* Check descriptors for access violations */
632	for (i = 0; i < uap->num; i++) {
633		dp = &descs[i];
634
635		switch (dp->sd_type) {
636		case SDT_SYSNULL:	/* system null */
637			dp->sd_p = 0;
638			break;
639		case SDT_SYS286TSS:
640		case SDT_SYSLDT:
641		case SDT_SYS286BSY:
642		case SDT_SYS286CGT:
643		case SDT_SYSTASKGT:
644		case SDT_SYS286IGT:
645		case SDT_SYS286TGT:
646		case SDT_SYSNULL2:
647		case SDT_SYSTSS:
648		case SDT_SYSNULL3:
649		case SDT_SYSBSY:
650		case SDT_SYSCGT:
651		case SDT_SYSNULL4:
652		case SDT_SYSIGT:
653		case SDT_SYSTGT:
654			/* I can't think of any reason to allow a user proc
655			 * to create a segment of these types.  They are
656			 * for OS use only.
657			 */
658			return (EACCES);
659			/*NOTREACHED*/
660
661		/* memory segment types */
662		case SDT_MEMEC:   /* memory execute only conforming */
663		case SDT_MEMEAC:  /* memory execute only accessed conforming */
664		case SDT_MEMERC:  /* memory execute read conforming */
665		case SDT_MEMERAC: /* memory execute read accessed conforming */
666			 /* Must be "present" if executable and conforming. */
667			if (dp->sd_p == 0)
668				return (EACCES);
669			break;
670		case SDT_MEMRO:   /* memory read only */
671		case SDT_MEMROA:  /* memory read only accessed */
672		case SDT_MEMRW:   /* memory read write */
673		case SDT_MEMRWA:  /* memory read write accessed */
674		case SDT_MEMROD:  /* memory read only expand dwn limit */
675		case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
676		case SDT_MEMRWD:  /* memory read write expand dwn limit */
677		case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
678		case SDT_MEME:    /* memory execute only */
679		case SDT_MEMEA:   /* memory execute only accessed */
680		case SDT_MEMER:   /* memory execute read */
681		case SDT_MEMERA:  /* memory execute read accessed */
682			break;
683		default:
684			return(EINVAL);
685			/*NOTREACHED*/
686		}
687
688		/* Only user (ring-3) descriptors may be present. */
689		if ((dp->sd_p != 0) && (dp->sd_dpl != SEL_UPL))
690			return (EACCES);
691	}
692
693	if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) {
694		/* Allocate a free slot */
695		mtx_lock(&dt_lock);
696		pldt = user_ldt_alloc(p, 0);
697		if (pldt == NULL) {
698			mtx_unlock(&dt_lock);
699			return (ENOMEM);
700		}
701
702		/*
703		 * start scanning a bit up to leave room for NVidia and
704		 * Wine, which still user the "Blat" method of allocation.
705		 */
706		i = 16;
707		dp = &((struct user_segment_descriptor *)(pldt->ldt_base))[i];
708		for (; i < max_ldt_segment; ++i, ++dp) {
709			if (dp->sd_type == SDT_SYSNULL)
710				break;
711		}
712		if (i >= max_ldt_segment) {
713			mtx_unlock(&dt_lock);
714			return (ENOSPC);
715		}
716		uap->start = i;
717		error = amd64_set_ldt_data(td, i, 1, descs);
718		mtx_unlock(&dt_lock);
719	} else {
720		largest_ld = uap->start + uap->num;
721		if (largest_ld > max_ldt_segment)
722			return (EINVAL);
723		mtx_lock(&dt_lock);
724		if (user_ldt_alloc(p, 0) != NULL) {
725			error = amd64_set_ldt_data(td, uap->start, uap->num,
726			    descs);
727		}
728		mtx_unlock(&dt_lock);
729	}
730	if (error == 0)
731		td->td_retval[0] = uap->start;
732	return (error);
733}
734
735int
736amd64_set_ldt_data(struct thread *td, int start, int num,
737    struct user_segment_descriptor *descs)
738{
739	struct mdproc *mdp = &td->td_proc->p_md;
740	struct proc_ldt *pldt = mdp->md_ldt;
741
742	mtx_assert(&dt_lock, MA_OWNED);
743
744	/* Fill in range */
745	bcopy(descs,
746	    &((struct user_segment_descriptor *)(pldt->ldt_base))[start],
747	    num * sizeof(struct user_segment_descriptor));
748	return (0);
749}
750