vm_machdep.c revision 31322
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
40 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41 *	$Id: vm_machdep.c,v 1.91 1997/11/18 09:27:23 bde Exp $
42 */
43
44#include "npx.h"
45#include "opt_bounce.h"
46#include "opt_vm86.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/proc.h>
51#include <sys/malloc.h>
52#include <sys/buf.h>
53#include <sys/vnode.h>
54#include <sys/vmmeter.h>
55
56#include <machine/clock.h>
57#include <machine/cpu.h>
58#include <machine/md_var.h>
59#ifdef SMP
60#include <machine/smp.h>
61#endif
62
63#include <vm/vm.h>
64#include <vm/vm_param.h>
65#include <vm/vm_prot.h>
66#include <sys/lock.h>
67#include <vm/vm_kern.h>
68#include <vm/vm_page.h>
69#include <vm/vm_map.h>
70#include <vm/vm_extern.h>
71
72#include <sys/user.h>
73
74#ifdef PC98
75#include <pc98/pc98/pc98.h>
76#else
77#include <i386/isa/isa.h>
78#endif
79
80#ifdef BOUNCE_BUFFERS
81static vm_offset_t
82		vm_bounce_kva __P((int size, int waitok));
83static void	vm_bounce_kva_free __P((vm_offset_t addr, vm_offset_t size,
84					int now));
85static vm_offset_t
86		vm_bounce_page_find __P((int count));
87static void	vm_bounce_page_free __P((vm_offset_t pa, int count));
88
89static volatile int	kvasfreecnt;
90
91caddr_t		bouncememory;
92static int	bpwait;
93static vm_offset_t	*bouncepa;
94static int		bmwait, bmfreeing;
95
96#define BITS_IN_UNSIGNED (8*sizeof(unsigned))
97static int		bounceallocarraysize;
98static unsigned	*bounceallocarray;
99static int		bouncefree;
100
101#if defined(PC98) && defined (EPSON_BOUNCEDMA)
102#define SIXTEENMEG (3840*4096)			/* 15MB boundary */
103#else
104#define SIXTEENMEG (4096*4096)
105#endif
106#define MAXBKVA 1024
107int		maxbkva = MAXBKVA*PAGE_SIZE;
108
109/* special list that can be used at interrupt time for eventual kva free */
110static struct kvasfree {
111	vm_offset_t addr;
112	vm_offset_t size;
113} kvaf[MAXBKVA];
114
115/*
116 * get bounce buffer pages (count physically contiguous)
117 * (only 1 inplemented now)
118 */
119static vm_offset_t
120vm_bounce_page_find(count)
121	int count;
122{
123	int bit;
124	int s,i;
125
126	if (count != 1)
127		panic("vm_bounce_page_find -- no support for > 1 page yet!!!");
128
129	s = splbio();
130retry:
131	for (i = 0; i < bounceallocarraysize; i++) {
132		if (bounceallocarray[i] != 0xffffffff) {
133			bit = ffs(~bounceallocarray[i]);
134			if (bit) {
135				bounceallocarray[i] |= 1 << (bit - 1) ;
136				bouncefree -= count;
137				splx(s);
138				return bouncepa[(i * BITS_IN_UNSIGNED + (bit - 1))];
139			}
140		}
141	}
142	bpwait = 1;
143	tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0);
144	goto retry;
145}
146
147static void
148vm_bounce_kva_free(addr, size, now)
149	vm_offset_t addr;
150	vm_offset_t size;
151	int now;
152{
153	int s = splbio();
154	kvaf[kvasfreecnt].addr = addr;
155	kvaf[kvasfreecnt].size = size;
156	++kvasfreecnt;
157	if( now) {
158		/*
159		 * this will do wakeups
160		 */
161		vm_bounce_kva(0,0);
162	} else {
163		if (bmwait) {
164		/*
165		 * if anyone is waiting on the bounce-map, then wakeup
166		 */
167			wakeup((caddr_t) io_map);
168			bmwait = 0;
169		}
170	}
171	splx(s);
172}
173
174/*
175 * free count bounce buffer pages
176 */
177static void
178vm_bounce_page_free(pa, count)
179	vm_offset_t pa;
180	int count;
181{
182	int allocindex;
183	int index;
184	int bit;
185
186	if (count != 1)
187		panic("vm_bounce_page_free -- no support for > 1 page yet!!!");
188
189	for(index=0;index<bouncepages;index++) {
190		if( pa == bouncepa[index])
191			break;
192	}
193
194	if( index == bouncepages)
195		panic("vm_bounce_page_free: invalid bounce buffer");
196
197	allocindex = index / BITS_IN_UNSIGNED;
198	bit = index % BITS_IN_UNSIGNED;
199
200	bounceallocarray[allocindex] &= ~(1 << bit);
201
202	bouncefree += count;
203	if (bpwait) {
204		bpwait = 0;
205		wakeup((caddr_t) &bounceallocarray);
206	}
207}
208
209/*
210 * allocate count bounce buffer kva pages
211 */
212static vm_offset_t
213vm_bounce_kva(size, waitok)
214	int size;
215	int waitok;
216{
217	int i;
218	vm_offset_t kva = 0;
219	vm_offset_t off;
220	int s = splbio();
221more:
222	if (!bmfreeing && kvasfreecnt) {
223		bmfreeing = 1;
224		for (i = 0; i < kvasfreecnt; i++) {
225			for(off=0;off<kvaf[i].size;off+=PAGE_SIZE) {
226				pmap_kremove( kvaf[i].addr + off);
227			}
228			kmem_free_wakeup(io_map, kvaf[i].addr,
229				kvaf[i].size);
230		}
231		kvasfreecnt = 0;
232		bmfreeing = 0;
233		if( bmwait) {
234			bmwait = 0;
235			wakeup( (caddr_t) io_map);
236		}
237	}
238
239	if( size == 0) {
240		splx(s);
241		return 0;
242	}
243
244	if ((kva = kmem_alloc_pageable(io_map, size)) == 0) {
245		if( !waitok) {
246			splx(s);
247			return 0;
248		}
249		bmwait = 1;
250		tsleep((caddr_t) io_map, PRIBIO, "bmwait", 0);
251		goto more;
252	}
253	splx(s);
254	return kva;
255}
256
257/*
258 * same as vm_bounce_kva -- but really allocate (but takes pages as arg)
259 */
260vm_offset_t
261vm_bounce_kva_alloc(count)
262int count;
263{
264	int i;
265	vm_offset_t kva;
266	vm_offset_t pa;
267	if( bouncepages == 0) {
268		kva = (vm_offset_t) malloc(count*PAGE_SIZE, M_TEMP, M_WAITOK);
269		return kva;
270	}
271	kva = vm_bounce_kva(count*PAGE_SIZE, 1);
272	for(i=0;i<count;i++) {
273		pa = vm_bounce_page_find(1);
274		pmap_kenter(kva + i * PAGE_SIZE, pa);
275	}
276	return kva;
277}
278
279/*
280 * same as vm_bounce_kva_free -- but really free
281 */
282void
283vm_bounce_kva_alloc_free(kva, count)
284	vm_offset_t kva;
285	int count;
286{
287	int i;
288	vm_offset_t pa;
289	if( bouncepages == 0) {
290		free((caddr_t) kva, M_TEMP);
291		return;
292	}
293	for(i = 0; i < count; i++) {
294		pa = pmap_kextract(kva + i * PAGE_SIZE);
295		vm_bounce_page_free(pa, 1);
296	}
297	vm_bounce_kva_free(kva, count*PAGE_SIZE, 0);
298}
299
300/*
301 * do the things necessary to the struct buf to implement
302 * bounce buffers...  inserted before the disk sort
303 */
304void
305vm_bounce_alloc(bp)
306	struct buf *bp;
307{
308	int countvmpg;
309	vm_offset_t vastart, vaend;
310	vm_offset_t vapstart, vapend;
311	vm_offset_t va, kva;
312	vm_offset_t pa;
313	int dobounceflag = 0;
314	int i;
315
316	if (bouncepages == 0)
317		return;
318
319	if (bp->b_flags & B_BOUNCE) {
320		printf("vm_bounce_alloc: called recursively???\n");
321		return;
322	}
323
324	if (bp->b_bufsize < bp->b_bcount) {
325		printf(
326		    "vm_bounce_alloc: b_bufsize(0x%lx) < b_bcount(0x%lx) !!\n",
327			bp->b_bufsize, bp->b_bcount);
328		panic("vm_bounce_alloc");
329	}
330
331/*
332 *  This is not really necessary
333 *	if( bp->b_bufsize != bp->b_bcount) {
334 *		printf("size: %d, count: %d\n", bp->b_bufsize, bp->b_bcount);
335 *	}
336 */
337
338
339	vastart = (vm_offset_t) bp->b_data;
340	vaend = (vm_offset_t) bp->b_data + bp->b_bufsize;
341
342	vapstart = trunc_page(vastart);
343	vapend = round_page(vaend);
344	countvmpg = (vapend - vapstart) / PAGE_SIZE;
345
346/*
347 * if any page is above 16MB, then go into bounce-buffer mode
348 */
349	va = vapstart;
350	for (i = 0; i < countvmpg; i++) {
351		pa = pmap_kextract(va);
352		if (pa >= SIXTEENMEG)
353			++dobounceflag;
354		if( pa == 0)
355			panic("vm_bounce_alloc: Unmapped page");
356		va += PAGE_SIZE;
357	}
358	if (dobounceflag == 0)
359		return;
360
361	if (bouncepages < dobounceflag)
362		panic("Not enough bounce buffers!!!");
363
364/*
365 * allocate a replacement kva for b_addr
366 */
367	kva = vm_bounce_kva(countvmpg*PAGE_SIZE, 1);
368#if 0
369	printf("%s: vapstart: %x, vapend: %x, countvmpg: %d, kva: %x ",
370		(bp->b_flags & B_READ) ? "read":"write",
371			vapstart, vapend, countvmpg, kva);
372#endif
373	va = vapstart;
374	for (i = 0; i < countvmpg; i++) {
375		pa = pmap_kextract(va);
376		if (pa >= SIXTEENMEG) {
377			/*
378			 * allocate a replacement page
379			 */
380			vm_offset_t bpa = vm_bounce_page_find(1);
381			pmap_kenter(kva + (PAGE_SIZE * i), bpa);
382#if 0
383			printf("r(%d): (%x,%x,%x) ", i, va, pa, bpa);
384#endif
385			/*
386			 * if we are writing, the copy the data into the page
387			 */
388			if ((bp->b_flags & B_READ) == 0) {
389				bcopy((caddr_t) va, (caddr_t) kva + (PAGE_SIZE * i), PAGE_SIZE);
390			}
391		} else {
392			/*
393			 * use original page
394			 */
395			pmap_kenter(kva + (PAGE_SIZE * i), pa);
396		}
397		va += PAGE_SIZE;
398	}
399
400/*
401 * flag the buffer as being bounced
402 */
403	bp->b_flags |= B_BOUNCE;
404/*
405 * save the original buffer kva
406 */
407	bp->b_savekva = bp->b_data;
408/*
409 * put our new kva into the buffer (offset by original offset)
410 */
411	bp->b_data = (caddr_t) (((vm_offset_t) kva) |
412				((vm_offset_t) bp->b_savekva & PAGE_MASK));
413#if 0
414	printf("b_savekva: %x, newva: %x\n", bp->b_savekva, bp->b_data);
415#endif
416	return;
417}
418
419/*
420 * hook into biodone to free bounce buffer
421 */
422void
423vm_bounce_free(bp)
424	struct buf *bp;
425{
426	int i;
427	vm_offset_t origkva, bouncekva, bouncekvaend;
428
429/*
430 * if this isn't a bounced buffer, then just return
431 */
432	if ((bp->b_flags & B_BOUNCE) == 0)
433		return;
434
435/*
436 *  This check is not necessary
437 *	if (bp->b_bufsize != bp->b_bcount) {
438 *		printf("vm_bounce_free: b_bufsize=%d, b_bcount=%d\n",
439 *			bp->b_bufsize, bp->b_bcount);
440 *	}
441 */
442
443	origkva = (vm_offset_t) bp->b_savekva;
444	bouncekva = (vm_offset_t) bp->b_data;
445/*
446	printf("free: %d ", bp->b_bufsize);
447*/
448
449/*
450 * check every page in the kva space for b_addr
451 */
452	for (i = 0; i < bp->b_bufsize; ) {
453		vm_offset_t mybouncepa;
454		vm_offset_t copycount;
455
456		copycount = round_page(bouncekva + 1) - bouncekva;
457		mybouncepa = pmap_kextract(trunc_page(bouncekva));
458
459/*
460 * if this is a bounced pa, then process as one
461 */
462		if ( mybouncepa != pmap_kextract( trunc_page( origkva))) {
463			vm_offset_t tocopy = copycount;
464			if (i + tocopy > bp->b_bufsize)
465				tocopy = bp->b_bufsize - i;
466/*
467 * if this is a read, then copy from bounce buffer into original buffer
468 */
469			if (bp->b_flags & B_READ)
470				bcopy((caddr_t) bouncekva, (caddr_t) origkva, tocopy);
471/*
472 * free the bounce allocation
473 */
474
475/*
476			printf("(kva: %x, pa: %x)", bouncekva, mybouncepa);
477*/
478			vm_bounce_page_free(mybouncepa, 1);
479		}
480
481		origkva += copycount;
482		bouncekva += copycount;
483		i += copycount;
484	}
485
486/*
487	printf("\n");
488*/
489/*
490 * add the old kva into the "to free" list
491 */
492
493	bouncekva= trunc_page((vm_offset_t) bp->b_data);
494	bouncekvaend= round_page((vm_offset_t)bp->b_data + bp->b_bufsize);
495
496/*
497	printf("freeva: %d\n", (bouncekvaend - bouncekva) / PAGE_SIZE);
498*/
499	vm_bounce_kva_free( bouncekva, (bouncekvaend - bouncekva), 0);
500	bp->b_data = bp->b_savekva;
501	bp->b_savekva = 0;
502	bp->b_flags &= ~B_BOUNCE;
503
504	return;
505}
506
507
508/*
509 * init the bounce buffer system
510 */
511void
512vm_bounce_init()
513{
514	int i;
515
516	kvasfreecnt = 0;
517
518	if (bouncepages == 0)
519		return;
520
521	bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED;
522	bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT);
523
524	if (!bounceallocarray)
525		panic("Cannot allocate bounce resource array");
526
527	bouncepa = malloc(bouncepages * sizeof(vm_offset_t), M_TEMP, M_NOWAIT);
528	if (!bouncepa)
529		panic("Cannot allocate physical memory array");
530
531	for(i=0;i<bounceallocarraysize;i++) {
532		bounceallocarray[i] = 0xffffffff;
533	}
534
535	for(i=0;i<bouncepages;i++) {
536		vm_offset_t pa;
537		if( (pa = pmap_kextract((vm_offset_t) bouncememory + i * PAGE_SIZE)) >= SIXTEENMEG)
538			panic("bounce memory out of range");
539		if( pa == 0)
540			panic("bounce memory not resident");
541		bouncepa[i] = pa;
542		bounceallocarray[i/(8*sizeof(int))] &= ~(1<<(i%(8*sizeof(int))));
543	}
544	bouncefree = bouncepages;
545
546}
547#endif /* BOUNCE_BUFFERS */
548
549/*
550 * quick version of vm_fault
551 */
552void
553vm_fault_quick(v, prot)
554	caddr_t v;
555	int prot;
556{
557	if (prot & VM_PROT_WRITE)
558		subyte(v, fubyte(v));
559	else
560		fubyte(v);
561}
562
563/*
564 * Finish a fork operation, with process p2 nearly set up.
565 * Copy and update the pcb, set up the stack so that the child
566 * ready to run and return to user mode.
567 */
568void
569cpu_fork(p1, p2)
570	register struct proc *p1, *p2;
571{
572	struct pcb *pcb2 = &p2->p_addr->u_pcb;
573
574	/* Ensure that p1's pcb is up to date. */
575	if (npxproc == p1)
576		npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
577
578	/* Copy p1's pcb. */
579	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
580
581	/*
582	 * Create a new fresh stack for the new process.
583	 * Copy the trap frame for the return to user mode as if from a
584	 * syscall.  This copies the user mode register values.
585	 */
586	p2->p_md.md_regs = (struct trapframe *)
587			   ((int)p2->p_addr + UPAGES * PAGE_SIZE) - 1;
588	*p2->p_md.md_regs = *p1->p_md.md_regs;
589
590	/*
591	 * Set registers for trampoline to user mode.  Leave space for the
592	 * return address on stack.  These are the kernel mode register values.
593	 */
594	pcb2->pcb_cr3 = vtophys(p2->p_vmspace->vm_pmap.pm_pdir);
595	pcb2->pcb_edi = p2->p_md.md_regs->tf_edi;
596	pcb2->pcb_esi = (int)fork_return;
597	pcb2->pcb_ebp = p2->p_md.md_regs->tf_ebp;
598	pcb2->pcb_esp = (int)p2->p_md.md_regs - sizeof(void *);
599	pcb2->pcb_ebx = (int)p2;
600	pcb2->pcb_eip = (int)fork_trampoline;
601	/*
602	 * pcb2->pcb_ldt:	duplicated below, if necessary.
603	 * pcb2->pcb_ldt_len:	cloned above.
604	 * pcb2->pcb_savefpu:	cloned above.
605	 * pcb2->pcb_flags:	cloned above (always 0 here?).
606	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
607	 */
608
609#ifdef VM86
610	/*
611	 * XXX don't copy the i/o pages.  this should probably be fixed.
612	 */
613	pcb2->pcb_ext = 0;
614#endif
615
616#ifdef USER_LDT
617        /* Copy the LDT, if necessary. */
618        if (pcb2->pcb_ldt != 0) {
619                union descriptor *new_ldt;
620                size_t len = pcb2->pcb_ldt_len * sizeof(union descriptor);
621
622                new_ldt = (union descriptor *)kmem_alloc(kernel_map, len);
623                bcopy(pcb2->pcb_ldt, new_ldt, len);
624                pcb2->pcb_ldt = (caddr_t)new_ldt;
625        }
626#endif
627
628	/*
629	 * Now, cpu_switch() can schedule the new process.
630	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
631	 * containing the return address when exiting cpu_switch.
632	 * This will normally be to proc_trampoline(), which will have
633	 * %ebx loaded with the new proc's pointer.  proc_trampoline()
634	 * will set up a stack to call fork_return(p, frame); to complete
635	 * the return to user-mode.
636	 */
637}
638
639/*
640 * Intercept the return address from a freshly forked process that has NOT
641 * been scheduled yet.
642 *
643 * This is needed to make kernel threads stay in kernel mode.
644 */
645void
646cpu_set_fork_handler(p, func, arg)
647	struct proc *p;
648	void (*func) __P((void *));
649	void *arg;
650{
651	/*
652	 * Note that the trap frame follows the args, so the function
653	 * is really called like this:  func(arg, frame);
654	 */
655	p->p_addr->u_pcb.pcb_esi = (int) func;	/* function */
656	p->p_addr->u_pcb.pcb_ebx = (int) arg;	/* first arg */
657}
658
659void
660cpu_exit(p)
661	register struct proc *p;
662{
663#if defined(USER_LDT) || defined(VM86)
664	struct pcb *pcb = &p->p_addr->u_pcb;
665#endif
666
667#if NNPX > 0
668	npxexit(p);
669#endif	/* NNPX */
670#ifdef VM86
671	if (pcb->pcb_ext != 0) {
672	        /*
673		 * XXX do we need to move the TSS off the allocated pages
674		 * before freeing them?  (not done here)
675		 */
676		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
677		    ctob(IOPAGES + 1));
678		pcb->pcb_ext = 0;
679	}
680#endif
681#ifdef USER_LDT
682	if (pcb->pcb_ldt != 0) {
683		if (pcb == curpcb)
684			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
685		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
686			pcb->pcb_ldt_len * sizeof(union descriptor));
687		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
688	}
689#endif
690	cnt.v_swtch++;
691	cpu_switch(p);
692	panic("cpu_exit");
693}
694
695void
696cpu_wait(p)
697	struct proc *p;
698{
699	/* drop per-process resources */
700	pmap_dispose_proc(p);
701	vmspace_free(p->p_vmspace);
702}
703
704/*
705 * Dump the machine specific header information at the start of a core dump.
706 */
707int
708cpu_coredump(p, vp, cred)
709	struct proc *p;
710	struct vnode *vp;
711	struct ucred *cred;
712{
713
714	return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
715	    (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
716	    p));
717}
718
719#ifdef notyet
720static void
721setredzone(pte, vaddr)
722	u_short *pte;
723	caddr_t vaddr;
724{
725/* eventually do this by setting up an expand-down stack segment
726   for ss0: selector, allowing stack access down to top of u.
727   this means though that protection violations need to be handled
728   thru a double fault exception that must do an integral task
729   switch to a known good context, within which a dump can be
730   taken. a sensible scheme might be to save the initial context
731   used by sched (that has physical memory mapped 1:1 at bottom)
732   and take the dump while still in mapped mode */
733}
734#endif
735
736/*
737 * Convert kernel VA to physical address
738 */
739u_long
740kvtop(void *addr)
741{
742	vm_offset_t va;
743
744	va = pmap_kextract((vm_offset_t)addr);
745	if (va == 0)
746		panic("kvtop: zero page frame");
747	return((int)va);
748}
749
750/*
751 * Map an IO request into kernel virtual address space.
752 *
753 * All requests are (re)mapped into kernel VA space.
754 * Notice that we use b_bufsize for the size of the buffer
755 * to be mapped.  b_bcount might be modified by the driver.
756 */
757void
758vmapbuf(bp)
759	register struct buf *bp;
760{
761	register caddr_t addr, v, kva;
762	vm_offset_t pa;
763
764	if ((bp->b_flags & B_PHYS) == 0)
765		panic("vmapbuf");
766
767	for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page(bp->b_data);
768	    addr < bp->b_data + bp->b_bufsize;
769	    addr += PAGE_SIZE, v += PAGE_SIZE) {
770		/*
771		 * Do the vm_fault if needed; do the copy-on-write thing
772		 * when reading stuff off device into memory.
773		 */
774		vm_fault_quick(addr,
775			(bp->b_flags&B_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
776		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
777		if (pa == 0)
778			panic("vmapbuf: page not present");
779		vm_page_hold(PHYS_TO_VM_PAGE(pa));
780		pmap_kenter((vm_offset_t) v, pa);
781	}
782
783	kva = bp->b_saveaddr;
784	bp->b_saveaddr = bp->b_data;
785	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
786}
787
788/*
789 * Free the io map PTEs associated with this IO operation.
790 * We also invalidate the TLB entries and restore the original b_addr.
791 */
792void
793vunmapbuf(bp)
794	register struct buf *bp;
795{
796	register caddr_t addr;
797	vm_offset_t pa;
798
799	if ((bp->b_flags & B_PHYS) == 0)
800		panic("vunmapbuf");
801
802	for (addr = (caddr_t)trunc_page(bp->b_data);
803	    addr < bp->b_data + bp->b_bufsize;
804	    addr += PAGE_SIZE) {
805		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
806		pmap_kremove((vm_offset_t) addr);
807		vm_page_unhold(PHYS_TO_VM_PAGE(pa));
808	}
809
810	bp->b_data = bp->b_saveaddr;
811}
812
813/*
814 * Force reset the processor by invalidating the entire address space!
815 */
816void
817cpu_reset()
818{
819
820#ifdef PC98
821	/*
822	 * Attempt to do a CPU reset via CPU reset port.
823	 */
824	disable_intr();
825	outb(0x37, 0x0f);		/* SHUT0 = 0. */
826	outb(0x37, 0x0b);		/* SHUT1 = 0. */
827	outb(0xf0, 0x00);		/* Reset. */
828#else
829	/*
830	 * Attempt to do a CPU reset via the keyboard controller,
831	 * do not turn of the GateA20, as any machine that fails
832	 * to do the reset here would then end up in no man's land.
833	 */
834
835#if !defined(BROKEN_KEYBOARD_RESET)
836	outb(IO_KBD + 4, 0xFE);
837	DELAY(500000);	/* wait 0.5 sec to see if that did it */
838	printf("Keyboard reset did not work, attempting CPU shutdown\n");
839	DELAY(1000000);	/* wait 1 sec for printf to complete */
840#endif
841#endif /* PC98 */
842	/* force a shutdown by unmapping entire address space ! */
843	bzero((caddr_t) PTD, PAGE_SIZE);
844
845	/* "good night, sweet prince .... <THUNK!>" */
846	invltlb();
847	/* NOTREACHED */
848	while(1);
849}
850
851/*
852 * Grow the user stack to allow for 'sp'. This version grows the stack in
853 *	chunks of SGROWSIZ.
854 */
855int
856grow(p, sp)
857	struct proc *p;
858	u_int sp;
859{
860	unsigned int nss;
861	caddr_t v;
862	struct vmspace *vm = p->p_vmspace;
863
864	if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK)
865	    return (1);
866
867	nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE);
868
869	if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
870		return (0);
871
872	if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
873	    SGROWSIZ) < nss) {
874		int grow_amount;
875		/*
876		 * If necessary, grow the VM that the stack occupies
877		 * to allow for the rlimit. This allows us to not have
878		 * to allocate all of the VM up-front in execve (which
879		 * is expensive).
880		 * Grow the VM by the amount requested rounded up to
881		 * the nearest SGROWSIZ to provide for some hysteresis.
882		 */
883		grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
884		v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
885		    SGROWSIZ) - grow_amount;
886		/*
887		 * If there isn't enough room to extend by SGROWSIZ, then
888		 * just extend to the maximum size
889		 */
890		if (v < vm->vm_maxsaddr) {
891			v = vm->vm_maxsaddr;
892			grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
893		}
894		if ((grow_amount == 0) || (vm_map_find(&vm->vm_map, NULL, 0, (vm_offset_t *)&v,
895		    grow_amount, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)) {
896			return (0);
897		}
898		vm->vm_ssize += grow_amount >> PAGE_SHIFT;
899	}
900
901	return (1);
902}
903
904/*
905 * Implement the pre-zeroed page mechanism.
906 * This routine is called from the idle loop.
907 */
908int
909vm_page_zero_idle()
910{
911	static int free_rover;
912	vm_page_t m;
913	int s;
914
915#ifdef WRONG
916	if (cnt.v_free_count <= cnt.v_interrupt_free_min)
917		return (0);
918#endif
919	/*
920	 * XXX
921	 * We stop zeroing pages when there are sufficent prezeroed pages.
922	 * This threshold isn't really needed, except we want to
923	 * bypass unneeded calls to vm_page_list_find, and the
924	 * associated cache flush and latency.  The pre-zero will
925	 * still be called when there are significantly more
926	 * non-prezeroed pages than zeroed pages.  The threshold
927	 * of half the number of reserved pages is arbitrary, but
928	 * approximately the right amount.  Eventually, we should
929	 * perhaps interrupt the zero operation when a process
930	 * is found to be ready to run.
931	 */
932	if (cnt.v_free_count - vm_page_zero_count <= cnt.v_free_reserved / 2)
933		return (0);
934#ifdef SMP
935	get_mplock();
936#endif
937	s = splvm();
938	enable_intr();
939	m = vm_page_list_find(PQ_FREE, free_rover);
940	if (m != NULL) {
941		--(*vm_page_queues[m->queue].lcnt);
942		TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
943		splx(s);
944#ifdef SMP
945		rel_mplock();
946#endif
947		pmap_zero_page(VM_PAGE_TO_PHYS(m));
948#ifdef SMP
949		get_mplock();
950#endif
951		(void)splvm();
952		m->queue = PQ_ZERO + m->pc;
953		++(*vm_page_queues[m->queue].lcnt);
954		TAILQ_INSERT_HEAD(vm_page_queues[m->queue].pl, m, pageq);
955		free_rover = (free_rover + PQ_PRIME3) & PQ_L2_MASK;
956		++vm_page_zero_count;
957	}
958	splx(s);
959	disable_intr();
960#ifdef SMP
961	rel_mplock();
962#endif
963	return (1);
964}
965
966/*
967 * Tell whether this address is in some physical memory region.
968 * Currently used by the kernel coredump code in order to avoid
969 * dumping the ``ISA memory hole'' which could cause indefinite hangs,
970 * or other unpredictable behaviour.
971 */
972
973#include "isa.h"
974
975int
976is_physical_memory(addr)
977	vm_offset_t addr;
978{
979
980#if NISA > 0
981	/* The ISA ``memory hole''. */
982	if (addr >= 0xa0000 && addr < 0x100000)
983		return 0;
984#endif
985
986	/*
987	 * stuff other tests for known memory-mapped devices (PCI?)
988	 * here
989	 */
990
991	return 1;
992}
993