vm_machdep.c revision 18169
159769Sgrog/*-
259769Sgrog * Copyright (c) 1982, 1986 The Regents of the University of California.
324424Swosch * Copyright (c) 1989, 1990 William Jolitz
424424Swosch * Copyright (c) 1994 John Dyson
524424Swosch * All rights reserved.
624424Swosch *
724424Swosch * This code is derived from software contributed to Berkeley by
824424Swosch * the Systems Programming Group of the University of Utah Computer
924424Swosch * Science Department, and William Jolitz.
1024424Swosch *
1124424Swosch * Redistribution and use in source and binary forms, with or without
1224424Swosch * modification, are permitted provided that the following conditions
1324424Swosch * are met:
1424424Swosch * 1. Redistributions of source code must retain the above copyright
1542704Swosch *    notice, this list of conditions and the following disclaimer.
1642704Swosch * 2. Redistributions in binary form must reproduce the above copyright
1742704Swosch *    notice, this list of conditions and the following disclaimer in the
1824424Swosch *    documentation and/or other materials provided with the distribution.
1942704Swosch * 3. All advertising materials mentioning features or use of this software
2042704Swosch *    must display the following acknowledgement:
2142704Swosch *	This product includes software developed by the University of
2242704Swosch *	California, Berkeley and its contributors.
2342704Swosch * 4. Neither the name of the University nor the names of its contributors
2442704Swosch *    may be used to endorse or promote products derived from this software
2542704Swosch *    without specific prior written permission.
2642704Swosch *
2742704Swosch * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2842704Swosch * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2942704Swosch * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3059769Sgrog * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
3159769Sgrog * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3259769Sgrog * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3359769Sgrog * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3459769Sgrog * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3559769Sgrog * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3659769Sgrog * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3759769Sgrog * SUCH DAMAGE.
3859769Sgrog *
3924424Swosch *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
4042704Swosch *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
4124424Swosch *	$Id: vm_machdep.c,v 1.67 1996/07/12 04:11:10 bde Exp $
4242704Swosch */
4324424Swosch
4442704Swosch#include "npx.h"
4524424Swosch#include "opt_bounce.h"
4624424Swosch
4724424Swosch#include <sys/param.h>
4842704Swosch#include <sys/systm.h>
4925031Swosch#include <sys/proc.h>
5059156Swosch#include <sys/malloc.h>
5125031Swosch#include <sys/buf.h>
5225031Swosch#include <sys/vnode.h>
5324424Swosch#include <sys/vmmeter.h>
5424424Swosch
5524424Swosch#include <machine/clock.h>
5624424Swosch#include <machine/md_var.h>
5724424Swosch
5824424Swosch#include <vm/vm.h>
5924424Swosch#include <vm/vm_param.h>
6025031Swosch#include <vm/vm_prot.h>
6124424Swosch#include <vm/lock.h>
6224424Swosch#include <vm/vm_kern.h>
6325031Swosch#include <vm/vm_page.h>
6425031Swosch#include <vm/vm_map.h>
6545349Swosch#include <vm/vm_extern.h>
6625031Swosch
6745349Swosch#include <sys/user.h>
6845349Swosch
6945349Swosch#include <i386/isa/isa.h>
7045349Swosch
7145349Swosch#ifdef BOUNCE_BUFFERS
7245349Swoschstatic vm_offset_t
7345349Swosch		vm_bounce_kva __P((int size, int waitok));
7445349Swoschstatic void	vm_bounce_kva_free __P((vm_offset_t addr, vm_offset_t size,
7545349Swosch					int now));
7645349Swoschstatic vm_offset_t
7745349Swosch		vm_bounce_page_find __P((int count));
7845349Swoschstatic void	vm_bounce_page_free __P((vm_offset_t pa, int count));
7945349Swosch
8045349Swoschstatic volatile int	kvasfreecnt;
8146318Swosch
8245349Swoschcaddr_t		bouncememory;
8345349Swoschint		bouncepages;
8445349Swoschstatic int	bpwait;
8546318Swoschstatic vm_offset_t	*bouncepa;
8646318Swoschstatic int		bmwait, bmfreeing;
8746318Swosch
8846318Swosch#define BITS_IN_UNSIGNED (8*sizeof(unsigned))
8946318Swoschstatic int		bounceallocarraysize;
9046318Swoschstatic unsigned	*bounceallocarray;
9146318Swoschstatic int		bouncefree;
9245349Swosch
9345349Swosch#define SIXTEENMEG (4096*4096)
9445349Swosch#define MAXBKVA 1024
9546318Swoschint		maxbkva = MAXBKVA*PAGE_SIZE;
9646318Swosch
9746318Swosch/* special list that can be used at interrupt time for eventual kva free */
9846318Swoschstatic struct kvasfree {
9966542Sitojun	vm_offset_t addr;
10066542Sitojun	vm_offset_t size;
10166542Sitojun} kvaf[MAXBKVA];
10246318Swosch
10366542Sitojun/*
10466542Sitojun * get bounce buffer pages (count physically contiguous)
10566542Sitojun * (only 1 inplemented now)
10646318Swosch */
10746318Swoschstatic vm_offset_t
10846318Swoschvm_bounce_page_find(count)
10946318Swosch	int count;
11046318Swosch{
11146318Swosch	int bit;
11246318Swosch	int s,i;
11366542Sitojun
11446321Swosch	if (count != 1)
11546321Swosch		panic("vm_bounce_page_find -- no support for > 1 page yet!!!");
11646318Swosch
11746318Swosch	s = splbio();
11856406Swoschretry:
11956406Swosch	for (i = 0; i < bounceallocarraysize; i++) {
12058448Swosch		if (bounceallocarray[i] != 0xffffffff) {
12158448Swosch			bit = ffs(~bounceallocarray[i]);
12258448Swosch			if (bit) {
12358448Swosch				bounceallocarray[i] |= 1 << (bit - 1) ;
12469277Sasmodai				bouncefree -= count;
12569277Sasmodai				splx(s);
12669277Sasmodai				return bouncepa[(i * BITS_IN_UNSIGNED + (bit - 1))];
12769277Sasmodai			}
12869277Sasmodai		}
12969277Sasmodai	}
13069277Sasmodai	bpwait = 1;
13169277Sasmodai	tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0);
13269277Sasmodai	goto retry;
13369277Sasmodai}
13469277Sasmodai
13569277Sasmodaistatic void
13665412Swoschvm_bounce_kva_free(addr, size, now)
13765412Swosch	vm_offset_t addr;
13865412Swosch	vm_offset_t size;
13965412Swosch	int now;
14065412Swosch{
14165412Swosch	int s = splbio();
14224424Swosch	kvaf[kvasfreecnt].addr = addr;
14324424Swosch	kvaf[kvasfreecnt].size = size;
14424424Swosch	++kvasfreecnt;
14524424Swosch	if( now) {
14624424Swosch		/*
14769277Sasmodai		 * this will do wakeups
14869277Sasmodai		 */
14924424Swosch		vm_bounce_kva(0,0);
15025031Swosch	} else {
15125031Swosch		if (bmwait) {
15225031Swosch		/*
15325031Swosch		 * if anyone is waiting on the bounce-map, then wakeup
15425031Swosch		 */
15525031Swosch			wakeup((caddr_t) io_map);
15625031Swosch			bmwait = 0;
15725031Swosch		}
15825031Swosch	}
15925031Swosch	splx(s);
16025031Swosch}
16125031Swosch
16225031Swosch/*
16325031Swosch * free count bounce buffer pages
16425031Swosch */
16538440Sjkhstatic void
16645349Swoschvm_bounce_page_free(pa, count)
16745349Swosch	vm_offset_t pa;
16842704Swosch	int count;
16925031Swosch{
17025031Swosch	int allocindex;
17124424Swosch	int index;
17259769Sgrog	int bit;
17325031Swosch
17425031Swosch	if (count != 1)
17525031Swosch		panic("vm_bounce_page_free -- no support for > 1 page yet!!!");
17625031Swosch
17759769Sgrog	for(index=0;index<bouncepages;index++) {
17825031Swosch		if( pa == bouncepa[index])
17925031Swosch			break;
18025031Swosch	}
18125031Swosch
18224424Swosch	if( index == bouncepages)
18325031Swosch		panic("vm_bounce_page_free: invalid bounce buffer");
18425031Swosch
18525031Swosch	allocindex = index / BITS_IN_UNSIGNED;
18625031Swosch	bit = index % BITS_IN_UNSIGNED;
18725031Swosch
18859769Sgrog	bounceallocarray[allocindex] &= ~(1 << bit);
18959769Sgrog
19042704Swosch	bouncefree += count;
19142704Swosch	if (bpwait) {
19242704Swosch		bpwait = 0;
19342704Swosch		wakeup((caddr_t) &bounceallocarray);
19442704Swosch	}
19542704Swosch}
19625031Swosch
19725031Swosch/*
19824424Swosch * allocate count bounce buffer kva pages
19925031Swosch */
20025031Swoschstatic vm_offset_t
20125031Swoschvm_bounce_kva(size, waitok)
20225031Swosch	int size;
20325031Swosch	int waitok;
20425031Swosch{
20525031Swosch	int i;
20625031Swosch	vm_offset_t kva = 0;
20725031Swosch	vm_offset_t off;
20824424Swosch	int s = splbio();
20925031Swoschmore:
21025031Swosch	if (!bmfreeing && kvasfreecnt) {
21125031Swosch		bmfreeing = 1;
21225031Swosch		for (i = 0; i < kvasfreecnt; i++) {
21325031Swosch			for(off=0;off<kvaf[i].size;off+=PAGE_SIZE) {
21425031Swosch				pmap_kremove( kvaf[i].addr + off);
21525031Swosch			}
21625031Swosch			kmem_free_wakeup(io_map, kvaf[i].addr,
21759156Swosch				kvaf[i].size);
21825031Swosch		}
21925031Swosch		kvasfreecnt = 0;
22025031Swosch		bmfreeing = 0;
22125031Swosch		if( bmwait) {
22225031Swosch			bmwait = 0;
22325031Swosch			wakeup( (caddr_t) io_map);
22425031Swosch		}
22525031Swosch	}
22625031Swosch
22725031Swosch	if( size == 0) {
22824424Swosch		splx(s);
22925031Swosch		return 0;
23025031Swosch	}
23125031Swosch
23225031Swosch	if ((kva = kmem_alloc_pageable(io_map, size)) == 0) {
23325031Swosch		if( !waitok) {
23425031Swosch			splx(s);
23525031Swosch			return 0;
23625031Swosch		}
23725031Swosch		bmwait = 1;
23869278Sasmodai		tsleep((caddr_t) io_map, PRIBIO, "bmwait", 0);
23925031Swosch		goto more;
24057000Swosch	}
24125031Swosch	splx(s);
24245349Swosch	return kva;
24338440Sjkh}
24425031Swosch
24538440Sjkh/*
24638440Sjkh * same as vm_bounce_kva -- but really allocate (but takes pages as arg)
24749392Swosch */
24857000Swoschvm_offset_t
24938440Sjkhvm_bounce_kva_alloc(count)
25038440Sjkhint count;
25169278Sasmodai{
25225031Swosch	int i;
25325031Swosch	vm_offset_t kva;
25469278Sasmodai	vm_offset_t pa;
25545349Swosch	if( bouncepages == 0) {
25669278Sasmodai		kva = (vm_offset_t) malloc(count*PAGE_SIZE, M_TEMP, M_WAITOK);
25745349Swosch		return kva;
25845349Swosch	}
25969278Sasmodai	kva = vm_bounce_kva(count*PAGE_SIZE, 1);
26069278Sasmodai	for(i=0;i<count;i++) {
26169278Sasmodai		pa = vm_bounce_page_find(1);
26269278Sasmodai		pmap_kenter(kva + i * PAGE_SIZE, pa);
26369278Sasmodai	}
26469278Sasmodai	return kva;
26557000Swosch}
26645349Swosch
26769277Sasmodai/*
26845349Swosch * same as vm_bounce_kva_free -- but really free
26966542Sitojun */
27069277Sasmodaivoid
27157000Swoschvm_bounce_kva_alloc_free(kva, count)
27245349Swosch	vm_offset_t kva;
27357000Swosch	int count;
27469277Sasmodai{
27542589Swosch	int i;
27646321Swosch	vm_offset_t pa;
27746321Swosch	if( bouncepages == 0) {
27846321Swosch		free((caddr_t) kva, M_TEMP);
27945349Swosch		return;
28045349Swosch	}
28157000Swosch	for(i = 0; i < count; i++) {
28246318Swosch		pa = pmap_kextract(kva + i * PAGE_SIZE);
28356406Swosch		vm_bounce_page_free(pa, 1);
28455389Sbillf	}
28555389Sbillf	vm_bounce_kva_free(kva, count*PAGE_SIZE, 0);
28657000Swosch}
28755389Sbillf
28855389Sbillf/*
28955389Sbillf * do the things necessary to the struct buf to implement
29058448Swosch * bounce buffers...  inserted before the disk sort
29158448Swosch */
29265412Swoschvoid
29364612Salexvm_bounce_alloc(bp)
29464612Salex	struct buf *bp;
29565411Swosch{
29665974Swosch	int countvmpg;
29769277Sasmodai	vm_offset_t vastart, vaend;
29869277Sasmodai	vm_offset_t vapstart, vapend;
29969277Sasmodai	vm_offset_t va, kva;
30024424Swosch	vm_offset_t pa;
30125031Swosch	int dobounceflag = 0;
30224424Swosch	int i;
30324424Swosch
30424424Swosch	if (bouncepages == 0)
30524424Swosch		return;
30624424Swosch
30724424Swosch	if (bp->b_flags & B_BOUNCE) {
30824424Swosch		printf("vm_bounce_alloc: called recursively???\n");
30924424Swosch		return;
31024424Swosch	}
31124424Swosch
31224424Swosch	if (bp->b_bufsize < bp->b_bcount) {
31324424Swosch		printf(
31424424Swosch		    "vm_bounce_alloc: b_bufsize(0x%lx) < b_bcount(0x%lx) !!\n",
31524424Swosch			bp->b_bufsize, bp->b_bcount);
31625031Swosch		panic("vm_bounce_alloc");
31725031Swosch	}
31824424Swosch
31924424Swosch/*
32024424Swosch *  This is not really necessary
32124424Swosch *	if( bp->b_bufsize != bp->b_bcount) {
32259769Sgrog *		printf("size: %d, count: %d\n", bp->b_bufsize, bp->b_bcount);
32324424Swosch *	}
32424424Swosch */
32525031Swosch
32625031Swosch
32725031Swosch	vastart = (vm_offset_t) bp->b_data;
32825031Swosch	vaend = (vm_offset_t) bp->b_data + bp->b_bufsize;
32959769Sgrog
33025031Swosch	vapstart = trunc_page(vastart);
33131658Swosch	vapend = round_page(vaend);
33259769Sgrog	countvmpg = (vapend - vapstart) / PAGE_SIZE;
33331658Swosch
33459769Sgrog/*
33559769Sgrog * if any page is above 16MB, then go into bounce-buffer mode
33659769Sgrog */
33765415Swosch	va = vapstart;
33865415Swosch	for (i = 0; i < countvmpg; i++) {
33965415Swosch		pa = pmap_kextract(va);
34025031Swosch		if (pa >= SIXTEENMEG)
34125031Swosch			++dobounceflag;
34225031Swosch		if( pa == 0)
34359769Sgrog			panic("vm_bounce_alloc: Unmapped page");
34425031Swosch		va += PAGE_SIZE;
34525031Swosch	}
34631658Swosch	if (dobounceflag == 0)
34725031Swosch		return;
34824424Swosch
34967388Swosch	if (bouncepages < dobounceflag)
35067388Swosch		panic("Not enough bounce buffers!!!");
35142589Swosch
35250970Speter/*
35359769Sgrog * allocate a replacement kva for b_addr
354 */
355	kva = vm_bounce_kva(countvmpg*PAGE_SIZE, 1);
356#if 0
357	printf("%s: vapstart: %x, vapend: %x, countvmpg: %d, kva: %x ",
358		(bp->b_flags & B_READ) ? "read":"write",
359			vapstart, vapend, countvmpg, kva);
360#endif
361	va = vapstart;
362	for (i = 0; i < countvmpg; i++) {
363		pa = pmap_kextract(va);
364		if (pa >= SIXTEENMEG) {
365			/*
366			 * allocate a replacement page
367			 */
368			vm_offset_t bpa = vm_bounce_page_find(1);
369			pmap_kenter(kva + (PAGE_SIZE * i), bpa);
370#if 0
371			printf("r(%d): (%x,%x,%x) ", i, va, pa, bpa);
372#endif
373			/*
374			 * if we are writing, the copy the data into the page
375			 */
376			if ((bp->b_flags & B_READ) == 0) {
377				bcopy((caddr_t) va, (caddr_t) kva + (PAGE_SIZE * i), PAGE_SIZE);
378			}
379		} else {
380			/*
381			 * use original page
382			 */
383			pmap_kenter(kva + (PAGE_SIZE * i), pa);
384		}
385		va += PAGE_SIZE;
386	}
387
388/*
389 * flag the buffer as being bounced
390 */
391	bp->b_flags |= B_BOUNCE;
392/*
393 * save the original buffer kva
394 */
395	bp->b_savekva = bp->b_data;
396/*
397 * put our new kva into the buffer (offset by original offset)
398 */
399	bp->b_data = (caddr_t) (((vm_offset_t) kva) |
400				((vm_offset_t) bp->b_savekva & PAGE_MASK));
401#if 0
402	printf("b_savekva: %x, newva: %x\n", bp->b_savekva, bp->b_data);
403#endif
404	return;
405}
406
407/*
408 * hook into biodone to free bounce buffer
409 */
410void
411vm_bounce_free(bp)
412	struct buf *bp;
413{
414	int i;
415	vm_offset_t origkva, bouncekva, bouncekvaend;
416
417/*
418 * if this isn't a bounced buffer, then just return
419 */
420	if ((bp->b_flags & B_BOUNCE) == 0)
421		return;
422
423/*
424 *  This check is not necessary
425 *	if (bp->b_bufsize != bp->b_bcount) {
426 *		printf("vm_bounce_free: b_bufsize=%d, b_bcount=%d\n",
427 *			bp->b_bufsize, bp->b_bcount);
428 *	}
429 */
430
431	origkva = (vm_offset_t) bp->b_savekva;
432	bouncekva = (vm_offset_t) bp->b_data;
433/*
434	printf("free: %d ", bp->b_bufsize);
435*/
436
437/*
438 * check every page in the kva space for b_addr
439 */
440	for (i = 0; i < bp->b_bufsize; ) {
441		vm_offset_t mybouncepa;
442		vm_offset_t copycount;
443
444		copycount = round_page(bouncekva + 1) - bouncekva;
445		mybouncepa = pmap_kextract(trunc_page(bouncekva));
446
447/*
448 * if this is a bounced pa, then process as one
449 */
450		if ( mybouncepa != pmap_kextract( trunc_page( origkva))) {
451			vm_offset_t tocopy = copycount;
452			if (i + tocopy > bp->b_bufsize)
453				tocopy = bp->b_bufsize - i;
454/*
455 * if this is a read, then copy from bounce buffer into original buffer
456 */
457			if (bp->b_flags & B_READ)
458				bcopy((caddr_t) bouncekva, (caddr_t) origkva, tocopy);
459/*
460 * free the bounce allocation
461 */
462
463/*
464			printf("(kva: %x, pa: %x)", bouncekva, mybouncepa);
465*/
466			vm_bounce_page_free(mybouncepa, 1);
467		}
468
469		origkva += copycount;
470		bouncekva += copycount;
471		i += copycount;
472	}
473
474/*
475	printf("\n");
476*/
477/*
478 * add the old kva into the "to free" list
479 */
480
481	bouncekva= trunc_page((vm_offset_t) bp->b_data);
482	bouncekvaend= round_page((vm_offset_t)bp->b_data + bp->b_bufsize);
483
484/*
485	printf("freeva: %d\n", (bouncekvaend - bouncekva) / PAGE_SIZE);
486*/
487	vm_bounce_kva_free( bouncekva, (bouncekvaend - bouncekva), 0);
488	bp->b_data = bp->b_savekva;
489	bp->b_savekva = 0;
490	bp->b_flags &= ~B_BOUNCE;
491
492	return;
493}
494
495
496/*
497 * init the bounce buffer system
498 */
499void
500vm_bounce_init()
501{
502	int i;
503
504	kvasfreecnt = 0;
505
506	if (bouncepages == 0)
507		return;
508
509	bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED;
510	bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT);
511
512	if (!bounceallocarray)
513		panic("Cannot allocate bounce resource array");
514
515	bouncepa = malloc(bouncepages * sizeof(vm_offset_t), M_TEMP, M_NOWAIT);
516	if (!bouncepa)
517		panic("Cannot allocate physical memory array");
518
519	for(i=0;i<bounceallocarraysize;i++) {
520		bounceallocarray[i] = 0xffffffff;
521	}
522
523	for(i=0;i<bouncepages;i++) {
524		vm_offset_t pa;
525		if( (pa = pmap_kextract((vm_offset_t) bouncememory + i * PAGE_SIZE)) >= SIXTEENMEG)
526			panic("bounce memory out of range");
527		if( pa == 0)
528			panic("bounce memory not resident");
529		bouncepa[i] = pa;
530		bounceallocarray[i/(8*sizeof(int))] &= ~(1<<(i%(8*sizeof(int))));
531	}
532	bouncefree = bouncepages;
533
534}
535#endif /* BOUNCE_BUFFERS */
536
537/*
538 * quick version of vm_fault
539 */
540void
541vm_fault_quick(v, prot)
542	caddr_t v;
543	int prot;
544{
545	if (prot & VM_PROT_WRITE)
546		subyte(v, fubyte(v));
547	else
548		fubyte(v);
549}
550
551/*
552 * Finish a fork operation, with process p2 nearly set up.
553 * Copy and update the kernel stack and pcb, making the child
554 * ready to run, and marking it so that it can return differently
555 * than the parent.  Returns 1 in the child process, 0 in the parent.
556 * We currently double-map the user area so that the stack is at the same
557 * address in each process; in the future we will probably relocate
558 * the frame pointers on the stack after copying.
559 */
560int
561cpu_fork(p1, p2)
562	register struct proc *p1, *p2;
563{
564	struct pcb *pcb2 = &p2->p_addr->u_pcb;
565	int sp, offset;
566	volatile int retval;
567
568	/*
569	 * Copy pcb and stack from proc p1 to p2.
570	 * We do this as cheaply as possible, copying only the active
571	 * part of the stack.  The stack and pcb need to agree;
572	 * this is tricky, as the final pcb is constructed by savectx,
573	 * but its frame isn't yet on the stack when the stack is copied.
574	 * This should be done differently, with a single call
575	 * that copies and updates the pcb+stack,
576	 * replacing the bcopy and savectx.
577	 */
578
579	__asm __volatile("movl %%esp,%0" : "=r" (sp));
580	offset = sp - (int)kstack;
581
582	retval = 1;		/* return 1 in child */
583	bcopy((caddr_t)kstack + offset, (caddr_t)p2->p_addr + offset,
584	    (unsigned) ctob(UPAGES) - offset);
585	p2->p_md.md_regs = p1->p_md.md_regs;
586
587	*pcb2 = p1->p_addr->u_pcb;
588	pcb2->pcb_cr3 = vtophys(p2->p_vmspace->vm_pmap.pm_pdir);
589
590	retval = 0;		/* return 0 in parent */
591	savectx(pcb2);
592	return (retval);
593}
594
595void
596cpu_exit(p)
597	register struct proc *p;
598{
599#ifdef USER_LDT
600	struct pcb *pcb;
601#endif
602
603#if NNPX > 0
604	npxexit(p);
605#endif	/* NNPX */
606#ifdef USER_LDT
607	pcb = &p->p_addr->u_pcb;
608	if (pcb->pcb_ldt != 0) {
609		if (pcb == curpcb)
610			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
611		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
612			pcb->pcb_ldt_len * sizeof(union descriptor));
613		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
614	}
615#endif
616	cnt.v_swtch++;
617	cpu_switch(p);
618	panic("cpu_exit");
619}
620
621void
622cpu_wait(p)
623	struct proc *p;
624{
625	/* drop per-process resources */
626	pmap_qremove((vm_offset_t) p->p_addr, UPAGES);
627	kmem_free(u_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
628	vmspace_free(p->p_vmspace);
629}
630
631/*
632 * Dump the machine specific header information at the start of a core dump.
633 */
634int
635cpu_coredump(p, vp, cred)
636	struct proc *p;
637	struct vnode *vp;
638	struct ucred *cred;
639{
640
641	return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
642	    (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
643	    p));
644}
645
646#ifdef notyet
647static void
648setredzone(pte, vaddr)
649	u_short *pte;
650	caddr_t vaddr;
651{
652/* eventually do this by setting up an expand-down stack segment
653   for ss0: selector, allowing stack access down to top of u.
654   this means though that protection violations need to be handled
655   thru a double fault exception that must do an integral task
656   switch to a known good context, within which a dump can be
657   taken. a sensible scheme might be to save the initial context
658   used by sched (that has physical memory mapped 1:1 at bottom)
659   and take the dump while still in mapped mode */
660}
661#endif
662
663/*
664 * Convert kernel VA to physical address
665 */
666u_long
667kvtop(void *addr)
668{
669	vm_offset_t va;
670
671	va = pmap_kextract((vm_offset_t)addr);
672	if (va == 0)
673		panic("kvtop: zero page frame");
674	return((int)va);
675}
676
677/*
678 * Map an IO request into kernel virtual address space.
679 *
680 * All requests are (re)mapped into kernel VA space.
681 * Notice that we use b_bufsize for the size of the buffer
682 * to be mapped.  b_bcount might be modified by the driver.
683 */
684void
685vmapbuf(bp)
686	register struct buf *bp;
687{
688	register caddr_t addr, v, kva;
689	vm_offset_t pa;
690
691	if ((bp->b_flags & B_PHYS) == 0)
692		panic("vmapbuf");
693
694	for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page(bp->b_data);
695	    addr < bp->b_data + bp->b_bufsize;
696	    addr += PAGE_SIZE, v += PAGE_SIZE) {
697		/*
698		 * Do the vm_fault if needed; do the copy-on-write thing
699		 * when reading stuff off device into memory.
700		 */
701		vm_fault_quick(addr,
702			(bp->b_flags&B_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
703		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
704		if (pa == 0)
705			panic("vmapbuf: page not present");
706		vm_page_hold(PHYS_TO_VM_PAGE(pa));
707		pmap_kenter((vm_offset_t) v, pa);
708	}
709
710	kva = bp->b_saveaddr;
711	bp->b_saveaddr = bp->b_data;
712	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
713}
714
715/*
716 * Free the io map PTEs associated with this IO operation.
717 * We also invalidate the TLB entries and restore the original b_addr.
718 */
719void
720vunmapbuf(bp)
721	register struct buf *bp;
722{
723	register caddr_t addr;
724	vm_offset_t pa;
725
726	if ((bp->b_flags & B_PHYS) == 0)
727		panic("vunmapbuf");
728
729	for (addr = (caddr_t)trunc_page(bp->b_data);
730	    addr < bp->b_data + bp->b_bufsize;
731	    addr += PAGE_SIZE) {
732		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
733		pmap_kremove((vm_offset_t) addr);
734		vm_page_unhold(PHYS_TO_VM_PAGE(pa));
735	}
736
737	bp->b_data = bp->b_saveaddr;
738}
739
740/*
741 * Force reset the processor by invalidating the entire address space!
742 */
743void
744cpu_reset() {
745
746	/*
747	 * Attempt to do a CPU reset via the keyboard controller,
748	 * do not turn of the GateA20, as any machine that fails
749	 * to do the reset here would then end up in no man's land.
750	 */
751
752#ifndef BROKEN_KEYBOARD_RESET
753	outb(IO_KBD + 4, 0xFE);
754	DELAY(500000);	/* wait 0.5 sec to see if that did it */
755	printf("Keyboard reset did not work, attempting CPU shutdown\n");
756	DELAY(1000000);	/* wait 1 sec for printf to complete */
757#endif
758
759	/* force a shutdown by unmapping entire address space ! */
760	bzero((caddr_t) PTD, PAGE_SIZE);
761
762	/* "good night, sweet prince .... <THUNK!>" */
763	pmap_update();
764	/* NOTREACHED */
765	while(1);
766}
767
768/*
769 * Grow the user stack to allow for 'sp'. This version grows the stack in
770 *	chunks of SGROWSIZ.
771 */
772int
773grow(p, sp)
774	struct proc *p;
775	u_int sp;
776{
777	unsigned int nss;
778	caddr_t v;
779	struct vmspace *vm = p->p_vmspace;
780
781	if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK)
782	    return (1);
783
784	nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE);
785
786	if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
787		return (0);
788
789	if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
790	    SGROWSIZ) < nss) {
791		int grow_amount;
792		/*
793		 * If necessary, grow the VM that the stack occupies
794		 * to allow for the rlimit. This allows us to not have
795		 * to allocate all of the VM up-front in execve (which
796		 * is expensive).
797		 * Grow the VM by the amount requested rounded up to
798		 * the nearest SGROWSIZ to provide for some hysteresis.
799		 */
800		grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
801		v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
802		    SGROWSIZ) - grow_amount;
803		/*
804		 * If there isn't enough room to extend by SGROWSIZ, then
805		 * just extend to the maximum size
806		 */
807		if (v < vm->vm_maxsaddr) {
808			v = vm->vm_maxsaddr;
809			grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
810		}
811		if ((grow_amount == 0) || (vm_map_find(&vm->vm_map, NULL, 0, (vm_offset_t *)&v,
812		    grow_amount, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)) {
813			return (0);
814		}
815		vm->vm_ssize += grow_amount >> PAGE_SHIFT;
816	}
817
818	return (1);
819}
820
821/*
822 * prototype routine to implement the pre-zeroed page mechanism
823 * this routine is called from the idle loop.
824 */
825int
826vm_page_zero_idle() {
827	vm_page_t m;
828	static int free_rover = 0;
829	if ((cnt.v_free_count > cnt.v_interrupt_free_min) &&
830		(m = vm_page_list_find(PQ_FREE, free_rover))) {
831		--(*vm_page_queues[m->queue].lcnt);
832		TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
833		enable_intr();
834		pmap_zero_page(VM_PAGE_TO_PHYS(m));
835		disable_intr();
836		m->queue = PQ_ZERO + m->pc;
837		++(*vm_page_queues[m->queue].lcnt);
838		TAILQ_INSERT_HEAD(vm_page_queues[m->queue].pl, m, pageq);
839		free_rover = (free_rover + PQ_PRIME3) & PQ_L2_MASK;
840		++vm_page_zero_count;
841		return 1;
842	}
843	return 0;
844}
845