vm_machdep.c revision 16532
1139826Simp/*-
253541Sshin * Copyright (c) 1982, 1986 The Regents of the University of California.
353541Sshin * Copyright (c) 1989, 1990 William Jolitz
453541Sshin * Copyright (c) 1994 John Dyson
553541Sshin * All rights reserved.
653541Sshin *
753541Sshin * This code is derived from software contributed to Berkeley by
853541Sshin * the Systems Programming Group of the University of Utah Computer
953541Sshin * Science Department, and William Jolitz.
1053541Sshin *
1153541Sshin * Redistribution and use in source and binary forms, with or without
1253541Sshin * modification, are permitted provided that the following conditions
1353541Sshin * are met:
1453541Sshin * 1. Redistributions of source code must retain the above copyright
1553541Sshin *    notice, this list of conditions and the following disclaimer.
1653541Sshin * 2. Redistributions in binary form must reproduce the above copyright
1753541Sshin *    notice, this list of conditions and the following disclaimer in the
1853541Sshin *    documentation and/or other materials provided with the distribution.
1953541Sshin * 3. All advertising materials mentioning features or use of this software
2053541Sshin *    must display the following acknowledgement:
2153541Sshin *	This product includes software developed by the University of
2253541Sshin *	California, Berkeley and its contributors.
2353541Sshin * 4. Neither the name of the University nor the names of its contributors
2453541Sshin *    may be used to endorse or promote products derived from this software
2553541Sshin *    without specific prior written permission.
2653541Sshin *
2753541Sshin * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28174510Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29174510Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3053541Sshin * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
3153541Sshin * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32139826Simp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3353541Sshin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3453541Sshin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3553541Sshin * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3653541Sshin * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3753541Sshin * SUCH DAMAGE.
3853541Sshin *
3953541Sshin *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
4053541Sshin *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
4153541Sshin *	$Id: vm_machdep.c,v 1.65 1996/06/20 01:47:21 davidg Exp $
4253541Sshin */
4353541Sshin
4453541Sshin#include "npx.h"
4553541Sshin#include "opt_bounce.h"
4653541Sshin
4753541Sshin#include <sys/param.h>
4853541Sshin#include <sys/systm.h>
4953541Sshin#include <sys/proc.h>
5053541Sshin#include <sys/malloc.h>
5153541Sshin#include <sys/buf.h>
5253541Sshin#include <sys/vnode.h>
5353541Sshin#include <sys/vmmeter.h>
5453541Sshin
5553541Sshin#include <machine/clock.h>
5653541Sshin#include <machine/md_var.h>
5753541Sshin
5853541Sshin#include <vm/vm.h>
5953541Sshin#include <vm/vm_param.h>
6053541Sshin#include <vm/vm_prot.h>
6153541Sshin#include <vm/lock.h>
62174510Sobrien#include <vm/vm_kern.h>
63174510Sobrien#include <vm/vm_page.h>
64174510Sobrien#include <vm/vm_map.h>
6553541Sshin#include <vm/vm_extern.h>
6653541Sshin
6753541Sshin#include <sys/user.h>
68185751Simp
6953541Sshin#include <i386/isa/isa.h>
7053541Sshin
7153541Sshin#ifdef BOUNCE_BUFFERS
7253541Sshinstatic vm_offset_t
73185747Skmacy		vm_bounce_kva __P((int size, int waitok));
7453541Sshinstatic void	vm_bounce_kva_free __P((vm_offset_t addr, vm_offset_t size,
75120727Ssam					int now));
7653541Sshinstatic vm_offset_t
7753541Sshin		vm_bounce_page_find __P((int count));
78257176Sglebiusstatic void	vm_bounce_page_free __P((vm_offset_t pa, int count));
79194714Sbz
80185571Sbzstatic volatile int	kvasfreecnt;
8153541Sshin
8253541Sshincaddr_t		bouncememory;
8353541Sshinint		bouncepages;
8453541Sshinstatic int	bpwait;
8562587Sitojunstatic vm_offset_t	*bouncepa;
8653541Sshinstatic int		bmwait, bmfreeing;
8753541Sshin
8862587Sitojun#define BITS_IN_UNSIGNED (8*sizeof(unsigned))
89121283Sumestatic int		bounceallocarraysize;
9053541Sshinstatic unsigned	*bounceallocarray;
9153541Sshinstatic int		bouncefree;
9253541Sshin
9353541Sshin#define SIXTEENMEG (4096*4096)
9453541Sshin#define MAXBKVA 1024
9553541Sshinint		maxbkva = MAXBKVA*PAGE_SIZE;
96175162Sobrien
97193731Szec/* special list that can be used at interrupt time for eventual kva free */
98193731Szecstatic struct kvasfree {
99193731Szec	vm_offset_t addr;
10053541Sshin	vm_offset_t size;
10153541Sshin} kvaf[MAXBKVA];
10253541Sshin
10353541Sshin/*
10453541Sshin * get bounce buffer pages (count physically contiguous)
10553541Sshin * (only 1 inplemented now)
106171260Sdelphij */
10753541Sshinstatic vm_offset_t
10853541Sshinvm_bounce_page_find(count)
10953541Sshin	int count;
11053541Sshin{
11153541Sshin	int bit;
112186119Sqingli	int s,i;
11353541Sshin
11453541Sshin	if (count != 1)
11553541Sshin		panic("vm_bounce_page_find -- no support for > 1 page yet!!!");
11653541Sshin
11753541Sshin	s = splbio();
11853541Sshinretry:
11953541Sshin	for (i = 0; i < bounceallocarraysize; i++) {
12053541Sshin		if (bounceallocarray[i] != 0xffffffff) {
12153541Sshin			bit = ffs(~bounceallocarray[i]);
12253541Sshin			if (bit) {
12353541Sshin				bounceallocarray[i] |= 1 << (bit - 1) ;
12453541Sshin				bouncefree -= count;
12553541Sshin				splx(s);
12653541Sshin				return bouncepa[(i * BITS_IN_UNSIGNED + (bit - 1))];
12753541Sshin			}
12853541Sshin		}
12953541Sshin	}
13053541Sshin	bpwait = 1;
13153541Sshin	tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0);
13253541Sshin	goto retry;
13353541Sshin}
13453541Sshin
13553541Sshinstatic void
13653541Sshinvm_bounce_kva_free(addr, size, now)
13753541Sshin	vm_offset_t addr;
138274175Smelifaro	vm_offset_t size;
13953541Sshin	int now;
140274175Smelifaro{
141274175Smelifaro	int s = splbio();
142274175Smelifaro	kvaf[kvasfreecnt].addr = addr;
143274175Smelifaro	kvaf[kvasfreecnt].size = size;
144274175Smelifaro	++kvasfreecnt;
145274175Smelifaro	if( now) {
146274175Smelifaro		/*
147274175Smelifaro		 * this will do wakeups
148274175Smelifaro		 */
149274175Smelifaro		vm_bounce_kva(0,0);
150274175Smelifaro	} else {
15153541Sshin		if (bmwait) {
152186119Sqingli		/*
15353541Sshin		 * if anyone is waiting on the bounce-map, then wakeup
15453541Sshin		 */
15553541Sshin			wakeup((caddr_t) io_map);
15653541Sshin			bmwait = 0;
15753541Sshin		}
15853541Sshin	}
15953541Sshin	splx(s);
16053541Sshin}
16153541Sshin
16253541Sshin/*
16353541Sshin * free count bounce buffer pages
16453541Sshin */
16553541Sshinstatic void
166231852Sbzvm_bounce_page_free(pa, count)
167231852Sbz	vm_offset_t pa;
16853541Sshin	int count;
169186119Sqingli{
17053541Sshin	int allocindex;
17153541Sshin	int index;
17253541Sshin	int bit;
17353541Sshin
17453541Sshin	if (count != 1)
175120727Ssam		panic("vm_bounce_page_free -- no support for > 1 page yet!!!");
17653541Sshin
17753541Sshin	for(index=0;index<bouncepages;index++) {
178186119Sqingli		if( pa == bouncepa[index])
17953541Sshin			break;
18053541Sshin	}
18153541Sshin
18253541Sshin	if( index == bouncepages)
18353541Sshin		panic("vm_bounce_page_free: invalid bounce buffer");
18453541Sshin
18553541Sshin	allocindex = index / BITS_IN_UNSIGNED;
18653541Sshin	bit = index % BITS_IN_UNSIGNED;
18753541Sshin
188215701Sdim	bounceallocarray[allocindex] &= ~(1 << bit);
189195727Srwatson
190195699Srwatson	bouncefree += count;
19153541Sshin	if (bpwait) {
192286458Smelifaro		bpwait = 0;
19353541Sshin		wakeup((caddr_t) &bounceallocarray);
19453541Sshin	}
19553541Sshin}
196262763Sglebius
197262763Sglebius/*
19853541Sshin * allocate count bounce buffer kva pages
19953541Sshin */
200262763Sglebiusstatic vm_offset_t
20153541Sshinvm_bounce_kva(size, waitok)
20253541Sshin	int size;
20353541Sshin	int waitok;
204286458Smelifaro{
20553541Sshin	int i;
20653541Sshin	vm_offset_t kva = 0;
20753541Sshin	vm_offset_t off;
20853541Sshin	int s = splbio();
20953541Sshinmore:
210286458Smelifaro	if (!bmfreeing && kvasfreecnt) {
21153541Sshin		bmfreeing = 1;
212286458Smelifaro		for (i = 0; i < kvasfreecnt; i++) {
21353541Sshin			for(off=0;off<kvaf[i].size;off+=PAGE_SIZE) {
214286458Smelifaro				pmap_kremove( kvaf[i].addr + off);
215286458Smelifaro			}
216286458Smelifaro			kmem_free_wakeup(io_map, kvaf[i].addr,
217231852Sbz				kvaf[i].size);
21853541Sshin		}
219231852Sbz		kvasfreecnt = 0;
220231852Sbz		bmfreeing = 0;
221231852Sbz		if( bmwait) {
222231852Sbz			bmwait = 0;
223231852Sbz			wakeup( (caddr_t) io_map);
224286458Smelifaro		}
225231852Sbz	}
226286458Smelifaro
227231852Sbz	if( size == 0) {
228231852Sbz		splx(s);
22953541Sshin		return NULL;
230181803Sbz	}
231183550Szec
23253541Sshin	if ((kva = kmem_alloc_pageable(io_map, size)) == 0) {
23353541Sshin		if( !waitok) {
23453541Sshin			splx(s);
23553541Sshin			return NULL;
23653541Sshin		}
237231852Sbz		bmwait = 1;
238231852Sbz		tsleep((caddr_t) io_map, PRIBIO, "bmwait", 0);
239231852Sbz		goto more;
24053541Sshin	}
24153541Sshin	splx(s);
24253541Sshin	return kva;
24353541Sshin}
24453541Sshin
245178888Sjulian/*
246274118Smelifaro * same as vm_bounce_kva -- but really allocate (but takes pages as arg)
24753541Sshin */
248272361Smelifarovm_offset_t
249272361Smelifarovm_bounce_kva_alloc(count)
250272361Smelifaroint count;
25153541Sshin{
252231852Sbz	int i;
253231852Sbz	vm_offset_t kva;
254283291Sjkim	vm_offset_t pa;
255231852Sbz	if( bouncepages == 0) {
256231852Sbz		kva = (vm_offset_t) malloc(count*PAGE_SIZE, M_TEMP, M_WAITOK);
257231852Sbz		return kva;
258231852Sbz	}
259274118Smelifaro	kva = vm_bounce_kva(count*PAGE_SIZE, 1);
26053541Sshin	for(i=0;i<count;i++) {
261193731Szec		pa = vm_bounce_page_find(1);
262193731Szec		pmap_kenter(kva + i * PAGE_SIZE, pa);
263193731Szec	}
264193731Szec	return kva;
265193731Szec}
266193731Szec
267193731Szec/*
268193731Szec * same as vm_bounce_kva_free -- but really free
269193731Szec */
270193731Szecvoid
271231852Sbzvm_bounce_kva_alloc_free(kva, count)
272231852Sbz	vm_offset_t kva;
273231852Sbz	int count;
274231852Sbz{
275231852Sbz	int i;
276231852Sbz	vm_offset_t pa;
277231852Sbz	if( bouncepages == 0) {
278231852Sbz		free((caddr_t) kva, M_TEMP);
279231852Sbz		return;
280231852Sbz	}
281231852Sbz	for(i = 0; i < count; i++) {
282231852Sbz		pa = pmap_kextract(kva + i * PAGE_SIZE);
283231852Sbz		vm_bounce_page_free(pa, 1);
284231852Sbz	}
285231852Sbz	vm_bounce_kva_free(kva, count*PAGE_SIZE, 0);
286231852Sbz}
287231852Sbz
288231852Sbz/*
289231852Sbz * do the things necessary to the struct buf to implement
290231852Sbz * bounce buffers...  inserted before the disk sort
291231852Sbz */
292231852Sbzvoid
293231852Sbzvm_bounce_alloc(bp)
294231852Sbz	struct buf *bp;
295231852Sbz{
296231852Sbz	int countvmpg;
297231852Sbz	vm_offset_t vastart, vaend;
298231852Sbz	vm_offset_t vapstart, vapend;
299231852Sbz	vm_offset_t va, kva;
300231852Sbz	vm_offset_t pa;
301231852Sbz	int dobounceflag = 0;
302231852Sbz	int i;
303231852Sbz
304231852Sbz	if (bouncepages == 0)
305231852Sbz		return;
306231852Sbz
307231852Sbz	if (bp->b_flags & B_BOUNCE) {
308231852Sbz		printf("vm_bounce_alloc: called recursively???\n");
309231852Sbz		return;
310231852Sbz	}
311
312	if (bp->b_bufsize < bp->b_bcount) {
313		printf(
314		    "vm_bounce_alloc: b_bufsize(0x%lx) < b_bcount(0x%lx) !!\n",
315			bp->b_bufsize, bp->b_bcount);
316		panic("vm_bounce_alloc");
317	}
318
319/*
320 *  This is not really necessary
321 *	if( bp->b_bufsize != bp->b_bcount) {
322 *		printf("size: %d, count: %d\n", bp->b_bufsize, bp->b_bcount);
323 *	}
324 */
325
326
327	vastart = (vm_offset_t) bp->b_data;
328	vaend = (vm_offset_t) bp->b_data + bp->b_bufsize;
329
330	vapstart = trunc_page(vastart);
331	vapend = round_page(vaend);
332	countvmpg = (vapend - vapstart) / PAGE_SIZE;
333
334/*
335 * if any page is above 16MB, then go into bounce-buffer mode
336 */
337	va = vapstart;
338	for (i = 0; i < countvmpg; i++) {
339		pa = pmap_kextract(va);
340		if (pa >= SIXTEENMEG)
341			++dobounceflag;
342		if( pa == 0)
343			panic("vm_bounce_alloc: Unmapped page");
344		va += PAGE_SIZE;
345	}
346	if (dobounceflag == 0)
347		return;
348
349	if (bouncepages < dobounceflag)
350		panic("Not enough bounce buffers!!!");
351
352/*
353 * allocate a replacement kva for b_addr
354 */
355	kva = vm_bounce_kva(countvmpg*PAGE_SIZE, 1);
356#if 0
357	printf("%s: vapstart: %x, vapend: %x, countvmpg: %d, kva: %x ",
358		(bp->b_flags & B_READ) ? "read":"write",
359			vapstart, vapend, countvmpg, kva);
360#endif
361	va = vapstart;
362	for (i = 0; i < countvmpg; i++) {
363		pa = pmap_kextract(va);
364		if (pa >= SIXTEENMEG) {
365			/*
366			 * allocate a replacement page
367			 */
368			vm_offset_t bpa = vm_bounce_page_find(1);
369			pmap_kenter(kva + (PAGE_SIZE * i), bpa);
370#if 0
371			printf("r(%d): (%x,%x,%x) ", i, va, pa, bpa);
372#endif
373			/*
374			 * if we are writing, the copy the data into the page
375			 */
376			if ((bp->b_flags & B_READ) == 0) {
377				bcopy((caddr_t) va, (caddr_t) kva + (PAGE_SIZE * i), PAGE_SIZE);
378			}
379		} else {
380			/*
381			 * use original page
382			 */
383			pmap_kenter(kva + (PAGE_SIZE * i), pa);
384		}
385		va += PAGE_SIZE;
386	}
387
388/*
389 * flag the buffer as being bounced
390 */
391	bp->b_flags |= B_BOUNCE;
392/*
393 * save the original buffer kva
394 */
395	bp->b_savekva = bp->b_data;
396/*
397 * put our new kva into the buffer (offset by original offset)
398 */
399	bp->b_data = (caddr_t) (((vm_offset_t) kva) |
400				((vm_offset_t) bp->b_savekva & PAGE_MASK));
401#if 0
402	printf("b_savekva: %x, newva: %x\n", bp->b_savekva, bp->b_data);
403#endif
404	return;
405}
406
407/*
408 * hook into biodone to free bounce buffer
409 */
410void
411vm_bounce_free(bp)
412	struct buf *bp;
413{
414	int i;
415	vm_offset_t origkva, bouncekva, bouncekvaend;
416
417/*
418 * if this isn't a bounced buffer, then just return
419 */
420	if ((bp->b_flags & B_BOUNCE) == 0)
421		return;
422
423/*
424 *  This check is not necessary
425 *	if (bp->b_bufsize != bp->b_bcount) {
426 *		printf("vm_bounce_free: b_bufsize=%d, b_bcount=%d\n",
427 *			bp->b_bufsize, bp->b_bcount);
428 *	}
429 */
430
431	origkva = (vm_offset_t) bp->b_savekva;
432	bouncekva = (vm_offset_t) bp->b_data;
433/*
434	printf("free: %d ", bp->b_bufsize);
435*/
436
437/*
438 * check every page in the kva space for b_addr
439 */
440	for (i = 0; i < bp->b_bufsize; ) {
441		vm_offset_t mybouncepa;
442		vm_offset_t copycount;
443
444		copycount = round_page(bouncekva + 1) - bouncekva;
445		mybouncepa = pmap_kextract(trunc_page(bouncekva));
446
447/*
448 * if this is a bounced pa, then process as one
449 */
450		if ( mybouncepa != pmap_kextract( trunc_page( origkva))) {
451			vm_offset_t tocopy = copycount;
452			if (i + tocopy > bp->b_bufsize)
453				tocopy = bp->b_bufsize - i;
454/*
455 * if this is a read, then copy from bounce buffer into original buffer
456 */
457			if (bp->b_flags & B_READ)
458				bcopy((caddr_t) bouncekva, (caddr_t) origkva, tocopy);
459/*
460 * free the bounce allocation
461 */
462
463/*
464			printf("(kva: %x, pa: %x)", bouncekva, mybouncepa);
465*/
466			vm_bounce_page_free(mybouncepa, 1);
467		}
468
469		origkva += copycount;
470		bouncekva += copycount;
471		i += copycount;
472	}
473
474/*
475	printf("\n");
476*/
477/*
478 * add the old kva into the "to free" list
479 */
480
481	bouncekva= trunc_page((vm_offset_t) bp->b_data);
482	bouncekvaend= round_page((vm_offset_t)bp->b_data + bp->b_bufsize);
483
484/*
485	printf("freeva: %d\n", (bouncekvaend - bouncekva) / PAGE_SIZE);
486*/
487	vm_bounce_kva_free( bouncekva, (bouncekvaend - bouncekva), 0);
488	bp->b_data = bp->b_savekva;
489	bp->b_savekva = 0;
490	bp->b_flags &= ~B_BOUNCE;
491
492	return;
493}
494
495
496/*
497 * init the bounce buffer system
498 */
499void
500vm_bounce_init()
501{
502	int i;
503
504	kvasfreecnt = 0;
505
506	if (bouncepages == 0)
507		return;
508
509	bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED;
510	bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT);
511
512	if (!bounceallocarray)
513		panic("Cannot allocate bounce resource array");
514
515	bouncepa = malloc(bouncepages * sizeof(vm_offset_t), M_TEMP, M_NOWAIT);
516	if (!bouncepa)
517		panic("Cannot allocate physical memory array");
518
519	for(i=0;i<bounceallocarraysize;i++) {
520		bounceallocarray[i] = 0xffffffff;
521	}
522
523	for(i=0;i<bouncepages;i++) {
524		vm_offset_t pa;
525		if( (pa = pmap_kextract((vm_offset_t) bouncememory + i * PAGE_SIZE)) >= SIXTEENMEG)
526			panic("bounce memory out of range");
527		if( pa == 0)
528			panic("bounce memory not resident");
529		bouncepa[i] = pa;
530		bounceallocarray[i/(8*sizeof(int))] &= ~(1<<(i%(8*sizeof(int))));
531	}
532	bouncefree = bouncepages;
533
534}
535#endif /* BOUNCE_BUFFERS */
536
537/*
538 * quick version of vm_fault
539 */
540void
541vm_fault_quick(v, prot)
542	caddr_t v;
543	int prot;
544{
545	if (prot & VM_PROT_WRITE)
546		subyte(v, fubyte(v));
547	else
548		fubyte(v);
549}
550
551/*
552 * Finish a fork operation, with process p2 nearly set up.
553 * Copy and update the kernel stack and pcb, making the child
554 * ready to run, and marking it so that it can return differently
555 * than the parent.  Returns 1 in the child process, 0 in the parent.
556 * We currently double-map the user area so that the stack is at the same
557 * address in each process; in the future we will probably relocate
558 * the frame pointers on the stack after copying.
559 */
560int
561cpu_fork(p1, p2)
562	register struct proc *p1, *p2;
563{
564	struct pcb *pcb2 = &p2->p_addr->u_pcb;
565	int sp, offset;
566	volatile int retval;
567
568	/*
569	 * Copy pcb and stack from proc p1 to p2.
570	 * We do this as cheaply as possible, copying only the active
571	 * part of the stack.  The stack and pcb need to agree;
572	 * this is tricky, as the final pcb is constructed by savectx,
573	 * but its frame isn't yet on the stack when the stack is copied.
574	 * This should be done differently, with a single call
575	 * that copies and updates the pcb+stack,
576	 * replacing the bcopy and savectx.
577	 */
578
579	__asm __volatile("movl %%esp,%0" : "=r" (sp));
580	offset = sp - (int)kstack;
581
582	retval = 1;		/* return 1 in child */
583	bcopy((caddr_t)kstack + offset, (caddr_t)p2->p_addr + offset,
584	    (unsigned) ctob(UPAGES) - offset);
585	p2->p_md.md_regs = p1->p_md.md_regs;
586
587	*pcb2 = p1->p_addr->u_pcb;
588	pcb2->pcb_cr3 = vtophys(p2->p_vmspace->vm_pmap.pm_pdir);
589
590	retval = 0;		/* return 0 in parent */
591	savectx(pcb2);
592	return (retval);
593}
594
595void
596cpu_exit(p)
597	register struct proc *p;
598{
599#ifdef USER_LDT
600	struct pcb *pcb;
601#endif
602
603#if NNPX > 0
604	npxexit(p);
605#endif	/* NNPX */
606#ifdef USER_LDT
607	pcb = &p->p_addr->u_pcb;
608	if (pcb->pcb_ldt != 0) {
609		if (pcb == curpcb)
610			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
611		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
612			pcb->pcb_ldt_len * sizeof(union descriptor));
613		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
614	}
615#endif
616	cnt.v_swtch++;
617	cpu_switch(p);
618	panic("cpu_exit");
619}
620
621void
622cpu_wait(p)
623	struct proc *p;
624{
625	/* drop per-process resources */
626	pmap_qremove((vm_offset_t) p->p_addr, UPAGES);
627	kmem_free(u_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
628	vmspace_free(p->p_vmspace);
629}
630
631/*
632 * Dump the machine specific header information at the start of a core dump.
633 */
634int
635cpu_coredump(p, vp, cred)
636	struct proc *p;
637	struct vnode *vp;
638	struct ucred *cred;
639{
640
641	return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
642	    (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
643	    p));
644}
645
646#ifdef notyet
647static void
648setredzone(pte, vaddr)
649	u_short *pte;
650	caddr_t vaddr;
651{
652/* eventually do this by setting up an expand-down stack segment
653   for ss0: selector, allowing stack access down to top of u.
654   this means though that protection violations need to be handled
655   thru a double fault exception that must do an integral task
656   switch to a known good context, within which a dump can be
657   taken. a sensible scheme might be to save the initial context
658   used by sched (that has physical memory mapped 1:1 at bottom)
659   and take the dump while still in mapped mode */
660}
661#endif
662
663/*
664 * Convert kernel VA to physical address
665 */
666u_long
667kvtop(void *addr)
668{
669	vm_offset_t va;
670
671	va = pmap_kextract((vm_offset_t)addr);
672	if (va == 0)
673		panic("kvtop: zero page frame");
674	return((int)va);
675}
676
677/*
678 * Map an IO request into kernel virtual address space.
679 *
680 * All requests are (re)mapped into kernel VA space.
681 * Notice that we use b_bufsize for the size of the buffer
682 * to be mapped.  b_bcount might be modified by the driver.
683 */
684void
685vmapbuf(bp)
686	register struct buf *bp;
687{
688	register caddr_t addr, v, kva;
689	vm_offset_t pa;
690
691	if ((bp->b_flags & B_PHYS) == 0)
692		panic("vmapbuf");
693
694	for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page(bp->b_data);
695	    addr < bp->b_data + bp->b_bufsize;
696	    addr += PAGE_SIZE, v += PAGE_SIZE) {
697		/*
698		 * Do the vm_fault if needed; do the copy-on-write thing
699		 * when reading stuff off device into memory.
700		 */
701		vm_fault_quick(addr,
702			(bp->b_flags&B_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
703		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
704		if (pa == 0)
705			panic("vmapbuf: page not present");
706		vm_page_hold(PHYS_TO_VM_PAGE(pa));
707		pmap_kenter((vm_offset_t) v, pa);
708	}
709
710	kva = bp->b_saveaddr;
711	bp->b_saveaddr = bp->b_data;
712	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
713}
714
715/*
716 * Free the io map PTEs associated with this IO operation.
717 * We also invalidate the TLB entries and restore the original b_addr.
718 */
719void
720vunmapbuf(bp)
721	register struct buf *bp;
722{
723	register caddr_t addr;
724	vm_offset_t pa;
725
726	if ((bp->b_flags & B_PHYS) == 0)
727		panic("vunmapbuf");
728
729	for (addr = (caddr_t)trunc_page(bp->b_data);
730	    addr < bp->b_data + bp->b_bufsize;
731	    addr += PAGE_SIZE) {
732		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
733		pmap_kremove((vm_offset_t) addr);
734		vm_page_unhold(PHYS_TO_VM_PAGE(pa));
735	}
736
737	bp->b_data = bp->b_saveaddr;
738}
739
740/*
741 * Force reset the processor by invalidating the entire address space!
742 */
743void
744cpu_reset() {
745
746	/*
747	 * Attempt to do a CPU reset via the keyboard controller,
748	 * do not turn of the GateA20, as any machine that fails
749	 * to do the reset here would then end up in no man's land.
750	 */
751
752#ifndef BROKEN_KEYBOARD_RESET
753	outb(IO_KBD + 4, 0xFE);
754	DELAY(500000);	/* wait 0.5 sec to see if that did it */
755	printf("Keyboard reset did not work, attempting CPU shutdown\n");
756	DELAY(1000000);	/* wait 1 sec for printf to complete */
757#endif
758
759	/* force a shutdown by unmapping entire address space ! */
760	bzero((caddr_t) PTD, PAGE_SIZE);
761
762	/* "good night, sweet prince .... <THUNK!>" */
763	pmap_update();
764	/* NOTREACHED */
765	while(1);
766}
767
768/*
769 * Grow the user stack to allow for 'sp'. This version grows the stack in
770 *	chunks of SGROWSIZ.
771 */
772int
773grow(p, sp)
774	struct proc *p;
775	u_int sp;
776{
777	unsigned int nss;
778	caddr_t v;
779	struct vmspace *vm = p->p_vmspace;
780
781	if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK)
782	    return (1);
783
784	nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE);
785
786	if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
787		return (0);
788
789	if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
790	    SGROWSIZ) < nss) {
791		int grow_amount;
792		/*
793		 * If necessary, grow the VM that the stack occupies
794		 * to allow for the rlimit. This allows us to not have
795		 * to allocate all of the VM up-front in execve (which
796		 * is expensive).
797		 * Grow the VM by the amount requested rounded up to
798		 * the nearest SGROWSIZ to provide for some hysteresis.
799		 */
800		grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
801		v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
802		    SGROWSIZ) - grow_amount;
803		/*
804		 * If there isn't enough room to extend by SGROWSIZ, then
805		 * just extend to the maximum size
806		 */
807		if (v < vm->vm_maxsaddr) {
808			v = vm->vm_maxsaddr;
809			grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
810		}
811		if ((grow_amount == 0) || (vm_map_find(&vm->vm_map, NULL, 0, (vm_offset_t *)&v,
812		    grow_amount, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)) {
813			return (0);
814		}
815		vm->vm_ssize += grow_amount >> PAGE_SHIFT;
816	}
817
818	return (1);
819}
820
821/*
822 * prototype routine to implement the pre-zeroed page mechanism
823 * this routine is called from the idle loop.
824 */
825int
826vm_page_zero_idle() {
827	vm_page_t m;
828	if ((cnt.v_free_count > cnt.v_interrupt_free_min) &&
829		(m = TAILQ_FIRST(&vm_page_queue_free))) {
830		TAILQ_REMOVE(&vm_page_queue_free, m, pageq);
831		enable_intr();
832		pmap_zero_page(VM_PAGE_TO_PHYS(m));
833		disable_intr();
834		TAILQ_INSERT_HEAD(&vm_page_queue_zero, m, pageq);
835		m->queue = PQ_ZERO;
836		++vm_page_zero_count;
837		return 1;
838	}
839	return 0;
840}
841