vm_machdep.c revision 34840
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
40 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41 *	$Id: vm_machdep.c,v 1.104 1998/03/17 09:10:05 kato Exp $
42 */
43
44#include "npx.h"
45#include "opt_bounce.h"
46#include "opt_user_ldt.h"
47#include "opt_vm86.h"
48#ifdef PC98
49#include "opt_pc98.h"
50#endif
51
52#include <sys/param.h>
53#include <sys/systm.h>
54#include <sys/proc.h>
55#include <sys/malloc.h>
56#include <sys/buf.h>
57#include <sys/vnode.h>
58#include <sys/vmmeter.h>
59#include <sys/kernel.h>
60#include <sys/sysctl.h>
61
62#include <machine/clock.h>
63#include <machine/cpu.h>
64#include <machine/md_var.h>
65#ifdef SMP
66#include <machine/smp.h>
67#endif
68#ifdef VM86
69#include <machine/pcb_ext.h>
70#include <machine/vm86.h>
71#endif
72
73#include <vm/vm.h>
74#include <vm/vm_param.h>
75#include <vm/vm_prot.h>
76#include <sys/lock.h>
77#include <vm/vm_kern.h>
78#include <vm/vm_page.h>
79#include <vm/vm_map.h>
80#include <vm/vm_extern.h>
81
82#include <sys/user.h>
83
84#ifdef PC98
85#include <pc98/pc98/pc98.h>
86#else
87#include <i386/isa/isa.h>
88#endif
89
90#ifdef BOUNCE_BUFFERS
91static vm_offset_t
92		vm_bounce_kva __P((int size, int waitok));
93static void	vm_bounce_kva_free __P((vm_offset_t addr, vm_offset_t size,
94					int now));
95static vm_offset_t
96		vm_bounce_page_find __P((int count));
97static void	vm_bounce_page_free __P((vm_offset_t pa, int count));
98
99static volatile int	kvasfreecnt;
100
101caddr_t		bouncememory;
102static int	bpwait;
103static vm_offset_t	*bouncepa;
104static int		bmwait, bmfreeing;
105
106#define BITS_IN_UNSIGNED (8*sizeof(unsigned))
107static int		bounceallocarraysize;
108static unsigned	*bounceallocarray;
109static int		bouncefree;
110
111#if defined(PC98) && defined (EPSON_BOUNCEDMA)
112#define SIXTEENMEG (3840*4096)			/* 15MB boundary */
113#else
114#define SIXTEENMEG (4096*4096)
115#endif
116#define MAXBKVA 1024
117int		maxbkva = MAXBKVA*PAGE_SIZE;
118
119/* special list that can be used at interrupt time for eventual kva free */
120static struct kvasfree {
121	vm_offset_t addr;
122	vm_offset_t size;
123} kvaf[MAXBKVA];
124
125/*
126 * get bounce buffer pages (count physically contiguous)
127 * (only 1 inplemented now)
128 */
129static vm_offset_t
130vm_bounce_page_find(count)
131	int count;
132{
133	int bit;
134	int s,i;
135
136	if (count != 1)
137		panic("vm_bounce_page_find -- no support for > 1 page yet!!!");
138
139	s = splbio();
140retry:
141	for (i = 0; i < bounceallocarraysize; i++) {
142		if (bounceallocarray[i] != 0xffffffff) {
143			bit = ffs(~bounceallocarray[i]);
144			if (bit) {
145				bounceallocarray[i] |= 1 << (bit - 1) ;
146				bouncefree -= count;
147				splx(s);
148				return bouncepa[(i * BITS_IN_UNSIGNED + (bit - 1))];
149			}
150		}
151	}
152	bpwait = 1;
153	tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0);
154	goto retry;
155}
156
157static void
158vm_bounce_kva_free(addr, size, now)
159	vm_offset_t addr;
160	vm_offset_t size;
161	int now;
162{
163	int s = splbio();
164	kvaf[kvasfreecnt].addr = addr;
165	kvaf[kvasfreecnt].size = size;
166	++kvasfreecnt;
167	if( now) {
168		/*
169		 * this will do wakeups
170		 */
171		vm_bounce_kva(0,0);
172	} else {
173		if (bmwait) {
174		/*
175		 * if anyone is waiting on the bounce-map, then wakeup
176		 */
177			wakeup((caddr_t) io_map);
178			bmwait = 0;
179		}
180	}
181	splx(s);
182}
183
184/*
185 * free count bounce buffer pages
186 */
187static void
188vm_bounce_page_free(pa, count)
189	vm_offset_t pa;
190	int count;
191{
192	int allocindex;
193	int index;
194	int bit;
195
196	if (count != 1)
197		panic("vm_bounce_page_free -- no support for > 1 page yet!!!");
198
199	for(index=0;index<bouncepages;index++) {
200		if( pa == bouncepa[index])
201			break;
202	}
203
204	if( index == bouncepages)
205		panic("vm_bounce_page_free: invalid bounce buffer");
206
207	allocindex = index / BITS_IN_UNSIGNED;
208	bit = index % BITS_IN_UNSIGNED;
209
210	bounceallocarray[allocindex] &= ~(1 << bit);
211
212	bouncefree += count;
213	if (bpwait) {
214		bpwait = 0;
215		wakeup((caddr_t) &bounceallocarray);
216	}
217}
218
219/*
220 * allocate count bounce buffer kva pages
221 */
222static vm_offset_t
223vm_bounce_kva(size, waitok)
224	int size;
225	int waitok;
226{
227	int i;
228	vm_offset_t kva = 0;
229	vm_offset_t off;
230	int s = splbio();
231more:
232	if (!bmfreeing && kvasfreecnt) {
233		bmfreeing = 1;
234		for (i = 0; i < kvasfreecnt; i++) {
235			for(off=0;off<kvaf[i].size;off+=PAGE_SIZE) {
236				pmap_kremove( kvaf[i].addr + off);
237			}
238			kmem_free_wakeup(io_map, kvaf[i].addr,
239				kvaf[i].size);
240		}
241		kvasfreecnt = 0;
242		bmfreeing = 0;
243		if( bmwait) {
244			bmwait = 0;
245			wakeup( (caddr_t) io_map);
246		}
247	}
248
249	if( size == 0) {
250		splx(s);
251		return 0;
252	}
253
254	if ((kva = kmem_alloc_pageable(io_map, size)) == 0) {
255		if( !waitok) {
256			splx(s);
257			return 0;
258		}
259		bmwait = 1;
260		tsleep((caddr_t) io_map, PRIBIO, "bmwait", 0);
261		goto more;
262	}
263	splx(s);
264	return kva;
265}
266
267/*
268 * same as vm_bounce_kva -- but really allocate (but takes pages as arg)
269 */
270vm_offset_t
271vm_bounce_kva_alloc(count)
272int count;
273{
274	int i;
275	vm_offset_t kva;
276	vm_offset_t pa;
277	if( bouncepages == 0) {
278		kva = (vm_offset_t) malloc(count*PAGE_SIZE, M_TEMP, M_WAITOK);
279		return kva;
280	}
281	kva = vm_bounce_kva(count*PAGE_SIZE, 1);
282	for(i=0;i<count;i++) {
283		pa = vm_bounce_page_find(1);
284		pmap_kenter(kva + i * PAGE_SIZE, pa);
285	}
286	return kva;
287}
288
289/*
290 * same as vm_bounce_kva_free -- but really free
291 */
292void
293vm_bounce_kva_alloc_free(kva, count)
294	vm_offset_t kva;
295	int count;
296{
297	int i;
298	vm_offset_t pa;
299	if( bouncepages == 0) {
300		free((caddr_t) kva, M_TEMP);
301		return;
302	}
303	for(i = 0; i < count; i++) {
304		pa = pmap_kextract(kva + i * PAGE_SIZE);
305		vm_bounce_page_free(pa, 1);
306	}
307	vm_bounce_kva_free(kva, count*PAGE_SIZE, 0);
308}
309
310/*
311 * do the things necessary to the struct buf to implement
312 * bounce buffers...  inserted before the disk sort
313 */
314void
315vm_bounce_alloc(bp)
316	struct buf *bp;
317{
318	int countvmpg;
319	vm_offset_t vastart, vaend;
320	vm_offset_t vapstart, vapend;
321	vm_offset_t va, kva;
322	vm_offset_t pa;
323	int dobounceflag = 0;
324	int i;
325
326	if (bouncepages == 0)
327		return;
328
329	if (bp->b_flags & B_BOUNCE) {
330		printf("vm_bounce_alloc: called recursively???\n");
331		return;
332	}
333
334	if (bp->b_bufsize < bp->b_bcount) {
335		printf(
336		    "vm_bounce_alloc: b_bufsize(0x%lx) < b_bcount(0x%lx) !!\n",
337			bp->b_bufsize, bp->b_bcount);
338		panic("vm_bounce_alloc");
339	}
340
341/*
342 *  This is not really necessary
343 *	if( bp->b_bufsize != bp->b_bcount) {
344 *		printf("size: %d, count: %d\n", bp->b_bufsize, bp->b_bcount);
345 *	}
346 */
347
348
349	vastart = (vm_offset_t) bp->b_data;
350	vaend = (vm_offset_t) bp->b_data + bp->b_bufsize;
351
352	vapstart = trunc_page(vastart);
353	vapend = round_page(vaend);
354	countvmpg = (vapend - vapstart) / PAGE_SIZE;
355
356/*
357 * if any page is above 16MB, then go into bounce-buffer mode
358 */
359	va = vapstart;
360	for (i = 0; i < countvmpg; i++) {
361		pa = pmap_kextract(va);
362		if (pa >= SIXTEENMEG)
363			++dobounceflag;
364		if( pa == 0)
365			panic("vm_bounce_alloc: Unmapped page");
366		va += PAGE_SIZE;
367	}
368	if (dobounceflag == 0)
369		return;
370
371	if (bouncepages < dobounceflag)
372		panic("Not enough bounce buffers!!!");
373
374/*
375 * allocate a replacement kva for b_addr
376 */
377	kva = vm_bounce_kva(countvmpg*PAGE_SIZE, 1);
378#if 0
379	printf("%s: vapstart: %x, vapend: %x, countvmpg: %d, kva: %x ",
380		(bp->b_flags & B_READ) ? "read":"write",
381			vapstart, vapend, countvmpg, kva);
382#endif
383	va = vapstart;
384	for (i = 0; i < countvmpg; i++) {
385		pa = pmap_kextract(va);
386		if (pa >= SIXTEENMEG) {
387			/*
388			 * allocate a replacement page
389			 */
390			vm_offset_t bpa = vm_bounce_page_find(1);
391			pmap_kenter(kva + (PAGE_SIZE * i), bpa);
392#if 0
393			printf("r(%d): (%x,%x,%x) ", i, va, pa, bpa);
394#endif
395			/*
396			 * if we are writing, the copy the data into the page
397			 */
398			if ((bp->b_flags & B_READ) == 0) {
399				bcopy((caddr_t) va, (caddr_t) kva + (PAGE_SIZE * i), PAGE_SIZE);
400			}
401		} else {
402			/*
403			 * use original page
404			 */
405			pmap_kenter(kva + (PAGE_SIZE * i), pa);
406		}
407		va += PAGE_SIZE;
408	}
409
410/*
411 * flag the buffer as being bounced
412 */
413	bp->b_flags |= B_BOUNCE;
414/*
415 * save the original buffer kva
416 */
417	bp->b_savekva = bp->b_data;
418/*
419 * put our new kva into the buffer (offset by original offset)
420 */
421	bp->b_data = (caddr_t) (((vm_offset_t) kva) |
422				((vm_offset_t) bp->b_savekva & PAGE_MASK));
423#if 0
424	printf("b_savekva: %x, newva: %x\n", bp->b_savekva, bp->b_data);
425#endif
426	return;
427}
428
429/*
430 * hook into biodone to free bounce buffer
431 */
432void
433vm_bounce_free(bp)
434	struct buf *bp;
435{
436	int i;
437	vm_offset_t origkva, bouncekva, bouncekvaend;
438
439/*
440 * if this isn't a bounced buffer, then just return
441 */
442	if ((bp->b_flags & B_BOUNCE) == 0)
443		return;
444
445/*
446 *  This check is not necessary
447 *	if (bp->b_bufsize != bp->b_bcount) {
448 *		printf("vm_bounce_free: b_bufsize=%d, b_bcount=%d\n",
449 *			bp->b_bufsize, bp->b_bcount);
450 *	}
451 */
452
453	origkva = (vm_offset_t) bp->b_savekva;
454	bouncekva = (vm_offset_t) bp->b_data;
455/*
456	printf("free: %d ", bp->b_bufsize);
457*/
458
459/*
460 * check every page in the kva space for b_addr
461 */
462	for (i = 0; i < bp->b_bufsize; ) {
463		vm_offset_t mybouncepa;
464		vm_offset_t copycount;
465
466		copycount = round_page(bouncekva + 1) - bouncekva;
467		mybouncepa = pmap_kextract(trunc_page(bouncekva));
468
469/*
470 * if this is a bounced pa, then process as one
471 */
472		if ( mybouncepa != pmap_kextract( trunc_page( origkva))) {
473			vm_offset_t tocopy = copycount;
474			if (i + tocopy > bp->b_bufsize)
475				tocopy = bp->b_bufsize - i;
476/*
477 * if this is a read, then copy from bounce buffer into original buffer
478 */
479			if (bp->b_flags & B_READ)
480				bcopy((caddr_t) bouncekva, (caddr_t) origkva, tocopy);
481/*
482 * free the bounce allocation
483 */
484
485/*
486			printf("(kva: %x, pa: %x)", bouncekva, mybouncepa);
487*/
488			vm_bounce_page_free(mybouncepa, 1);
489		}
490
491		origkva += copycount;
492		bouncekva += copycount;
493		i += copycount;
494	}
495
496/*
497	printf("\n");
498*/
499/*
500 * add the old kva into the "to free" list
501 */
502
503	bouncekva= trunc_page((vm_offset_t) bp->b_data);
504	bouncekvaend= round_page((vm_offset_t)bp->b_data + bp->b_bufsize);
505
506/*
507	printf("freeva: %d\n", (bouncekvaend - bouncekva) / PAGE_SIZE);
508*/
509	vm_bounce_kva_free( bouncekva, (bouncekvaend - bouncekva), 0);
510	bp->b_data = bp->b_savekva;
511	bp->b_savekva = 0;
512	bp->b_flags &= ~B_BOUNCE;
513
514	return;
515}
516
517
518/*
519 * init the bounce buffer system
520 */
521void
522vm_bounce_init()
523{
524	int i;
525
526	kvasfreecnt = 0;
527
528	if (bouncepages == 0)
529		return;
530
531	bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED;
532	bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT);
533
534	if (!bounceallocarray)
535		panic("Cannot allocate bounce resource array");
536
537	bouncepa = malloc(bouncepages * sizeof(vm_offset_t), M_TEMP, M_NOWAIT);
538	if (!bouncepa)
539		panic("Cannot allocate physical memory array");
540
541	for(i=0;i<bounceallocarraysize;i++) {
542		bounceallocarray[i] = 0xffffffff;
543	}
544
545	for(i=0;i<bouncepages;i++) {
546		vm_offset_t pa;
547		if( (pa = pmap_kextract((vm_offset_t) bouncememory + i * PAGE_SIZE)) >= SIXTEENMEG) {
548			printf("vm_bounce_init: bounce memory out of range -- bounce disabled\n");
549			free(bounceallocarray, M_TEMP);
550			bounceallocarray = NULL;
551			free(bouncepa, M_TEMP);
552			bouncepa = NULL;
553			bouncepages = 0;
554			break;
555		}
556		if( pa == 0)
557			panic("bounce memory not resident");
558		bouncepa[i] = pa;
559		bounceallocarray[i/(8*sizeof(int))] &= ~(1<<(i%(8*sizeof(int))));
560	}
561	bouncefree = bouncepages;
562
563}
564#endif /* BOUNCE_BUFFERS */
565
566/*
567 * quick version of vm_fault
568 */
569void
570vm_fault_quick(v, prot)
571	caddr_t v;
572	int prot;
573{
574	if (prot & VM_PROT_WRITE)
575		subyte(v, fubyte(v));
576	else
577		fubyte(v);
578}
579
580/*
581 * Finish a fork operation, with process p2 nearly set up.
582 * Copy and update the pcb, set up the stack so that the child
583 * ready to run and return to user mode.
584 */
585void
586cpu_fork(p1, p2)
587	register struct proc *p1, *p2;
588{
589	struct pcb *pcb2 = &p2->p_addr->u_pcb;
590
591#if NNPX > 0
592	/* Ensure that p1's pcb is up to date. */
593	if (npxproc == p1)
594		npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
595#endif
596
597	/* Copy p1's pcb. */
598	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
599
600	/*
601	 * Create a new fresh stack for the new process.
602	 * Copy the trap frame for the return to user mode as if from a
603	 * syscall.  This copies the user mode register values.
604	 */
605	p2->p_md.md_regs = (struct trapframe *)
606#ifdef VM86
607			   ((int)p2->p_addr + UPAGES * PAGE_SIZE - 16) - 1;
608#else
609			   ((int)p2->p_addr + UPAGES * PAGE_SIZE) - 1;
610#endif /* VM86 */
611	*p2->p_md.md_regs = *p1->p_md.md_regs;
612
613	/*
614	 * Set registers for trampoline to user mode.  Leave space for the
615	 * return address on stack.  These are the kernel mode register values.
616	 */
617	pcb2->pcb_cr3 = vtophys(p2->p_vmspace->vm_pmap.pm_pdir);
618	pcb2->pcb_edi = p2->p_md.md_regs->tf_edi;
619	pcb2->pcb_esi = (int)fork_return;
620	pcb2->pcb_ebp = p2->p_md.md_regs->tf_ebp;
621	pcb2->pcb_esp = (int)p2->p_md.md_regs - sizeof(void *);
622	pcb2->pcb_ebx = (int)p2;
623	pcb2->pcb_eip = (int)fork_trampoline;
624	/*
625	 * pcb2->pcb_ldt:	duplicated below, if necessary.
626	 * pcb2->pcb_ldt_len:	cloned above.
627	 * pcb2->pcb_savefpu:	cloned above.
628	 * pcb2->pcb_flags:	cloned above (always 0 here?).
629	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
630	 */
631
632#ifdef VM86
633	/*
634	 * XXX don't copy the i/o pages.  this should probably be fixed.
635	 */
636	pcb2->pcb_ext = 0;
637#endif
638
639#ifdef USER_LDT
640        /* Copy the LDT, if necessary. */
641        if (pcb2->pcb_ldt != 0) {
642                union descriptor *new_ldt;
643                size_t len = pcb2->pcb_ldt_len * sizeof(union descriptor);
644
645                new_ldt = (union descriptor *)kmem_alloc(kernel_map, len);
646                bcopy(pcb2->pcb_ldt, new_ldt, len);
647                pcb2->pcb_ldt = (caddr_t)new_ldt;
648        }
649#endif
650
651	/*
652	 * Now, cpu_switch() can schedule the new process.
653	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
654	 * containing the return address when exiting cpu_switch.
655	 * This will normally be to proc_trampoline(), which will have
656	 * %ebx loaded with the new proc's pointer.  proc_trampoline()
657	 * will set up a stack to call fork_return(p, frame); to complete
658	 * the return to user-mode.
659	 */
660}
661
662/*
663 * Intercept the return address from a freshly forked process that has NOT
664 * been scheduled yet.
665 *
666 * This is needed to make kernel threads stay in kernel mode.
667 */
668void
669cpu_set_fork_handler(p, func, arg)
670	struct proc *p;
671	void (*func) __P((void *));
672	void *arg;
673{
674	/*
675	 * Note that the trap frame follows the args, so the function
676	 * is really called like this:  func(arg, frame);
677	 */
678	p->p_addr->u_pcb.pcb_esi = (int) func;	/* function */
679	p->p_addr->u_pcb.pcb_ebx = (int) arg;	/* first arg */
680}
681
682void
683cpu_exit(p)
684	register struct proc *p;
685{
686#if defined(USER_LDT) || defined(VM86)
687	struct pcb *pcb = &p->p_addr->u_pcb;
688#endif
689
690#if NNPX > 0
691	npxexit(p);
692#endif	/* NNPX */
693#ifdef VM86
694	if (pcb->pcb_ext != 0) {
695	        /*
696		 * XXX do we need to move the TSS off the allocated pages
697		 * before freeing them?  (not done here)
698		 */
699		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
700		    ctob(IOPAGES + 1));
701		pcb->pcb_ext = 0;
702	}
703#endif
704#ifdef USER_LDT
705	if (pcb->pcb_ldt != 0) {
706		if (pcb == curpcb)
707			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
708		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
709			pcb->pcb_ldt_len * sizeof(union descriptor));
710		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
711	}
712#endif
713	cnt.v_swtch++;
714	cpu_switch(p);
715	panic("cpu_exit");
716}
717
718void
719cpu_wait(p)
720	struct proc *p;
721{
722	/* drop per-process resources */
723	pmap_dispose_proc(p);
724
725	/* and clean-out the vmspace */
726	vmspace_free(p->p_vmspace);
727}
728
729/*
730 * Dump the machine specific header information at the start of a core dump.
731 */
732int
733cpu_coredump(p, vp, cred)
734	struct proc *p;
735	struct vnode *vp;
736	struct ucred *cred;
737{
738
739	return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
740	    (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
741	    p));
742}
743
744#ifdef notyet
745static void
746setredzone(pte, vaddr)
747	u_short *pte;
748	caddr_t vaddr;
749{
750/* eventually do this by setting up an expand-down stack segment
751   for ss0: selector, allowing stack access down to top of u.
752   this means though that protection violations need to be handled
753   thru a double fault exception that must do an integral task
754   switch to a known good context, within which a dump can be
755   taken. a sensible scheme might be to save the initial context
756   used by sched (that has physical memory mapped 1:1 at bottom)
757   and take the dump while still in mapped mode */
758}
759#endif
760
761/*
762 * Convert kernel VA to physical address
763 */
764u_long
765kvtop(void *addr)
766{
767	vm_offset_t va;
768
769	va = pmap_kextract((vm_offset_t)addr);
770	if (va == 0)
771		panic("kvtop: zero page frame");
772	return((int)va);
773}
774
775/*
776 * Map an IO request into kernel virtual address space.
777 *
778 * All requests are (re)mapped into kernel VA space.
779 * Notice that we use b_bufsize for the size of the buffer
780 * to be mapped.  b_bcount might be modified by the driver.
781 */
782void
783vmapbuf(bp)
784	register struct buf *bp;
785{
786	register caddr_t addr, v, kva;
787	vm_offset_t pa;
788
789	if ((bp->b_flags & B_PHYS) == 0)
790		panic("vmapbuf");
791
792	for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page(bp->b_data);
793	    addr < bp->b_data + bp->b_bufsize;
794	    addr += PAGE_SIZE, v += PAGE_SIZE) {
795		/*
796		 * Do the vm_fault if needed; do the copy-on-write thing
797		 * when reading stuff off device into memory.
798		 */
799		vm_fault_quick(addr,
800			(bp->b_flags&B_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
801		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
802		if (pa == 0)
803			panic("vmapbuf: page not present");
804		vm_page_hold(PHYS_TO_VM_PAGE(pa));
805		pmap_kenter((vm_offset_t) v, pa);
806	}
807
808	kva = bp->b_saveaddr;
809	bp->b_saveaddr = bp->b_data;
810	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
811}
812
813/*
814 * Free the io map PTEs associated with this IO operation.
815 * We also invalidate the TLB entries and restore the original b_addr.
816 */
817void
818vunmapbuf(bp)
819	register struct buf *bp;
820{
821	register caddr_t addr;
822	vm_offset_t pa;
823
824	if ((bp->b_flags & B_PHYS) == 0)
825		panic("vunmapbuf");
826
827	for (addr = (caddr_t)trunc_page(bp->b_data);
828	    addr < bp->b_data + bp->b_bufsize;
829	    addr += PAGE_SIZE) {
830		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
831		pmap_kremove((vm_offset_t) addr);
832		vm_page_unhold(PHYS_TO_VM_PAGE(pa));
833	}
834
835	bp->b_data = bp->b_saveaddr;
836}
837
838/*
839 * Force reset the processor by invalidating the entire address space!
840 */
841void
842cpu_reset()
843{
844
845#ifdef PC98
846	/*
847	 * Attempt to do a CPU reset via CPU reset port.
848	 */
849	disable_intr();
850	outb(0x37, 0x0f);		/* SHUT0 = 0. */
851	outb(0x37, 0x0b);		/* SHUT1 = 0. */
852	outb(0xf0, 0x00);		/* Reset. */
853#else
854	/*
855	 * Attempt to do a CPU reset via the keyboard controller,
856	 * do not turn of the GateA20, as any machine that fails
857	 * to do the reset here would then end up in no man's land.
858	 */
859
860#if !defined(BROKEN_KEYBOARD_RESET)
861	outb(IO_KBD + 4, 0xFE);
862	DELAY(500000);	/* wait 0.5 sec to see if that did it */
863	printf("Keyboard reset did not work, attempting CPU shutdown\n");
864	DELAY(1000000);	/* wait 1 sec for printf to complete */
865#endif
866#endif /* PC98 */
867	/* force a shutdown by unmapping entire address space ! */
868	bzero((caddr_t) PTD, PAGE_SIZE);
869
870	/* "good night, sweet prince .... <THUNK!>" */
871	invltlb();
872	/* NOTREACHED */
873	while(1);
874}
875
876/*
877 * Grow the user stack to allow for 'sp'. This version grows the stack in
878 *	chunks of SGROWSIZ.
879 */
880int
881grow(p, sp)
882	struct proc *p;
883	u_int sp;
884{
885	unsigned int nss;
886	caddr_t v;
887	struct vmspace *vm = p->p_vmspace;
888
889	if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK)
890	    return (1);
891
892	nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE);
893
894	if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
895		return (0);
896
897	if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
898	    SGROWSIZ) < nss) {
899		int grow_amount;
900		/*
901		 * If necessary, grow the VM that the stack occupies
902		 * to allow for the rlimit. This allows us to not have
903		 * to allocate all of the VM up-front in execve (which
904		 * is expensive).
905		 * Grow the VM by the amount requested rounded up to
906		 * the nearest SGROWSIZ to provide for some hysteresis.
907		 */
908		grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
909		v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
910		    SGROWSIZ) - grow_amount;
911		/*
912		 * If there isn't enough room to extend by SGROWSIZ, then
913		 * just extend to the maximum size
914		 */
915		if (v < vm->vm_maxsaddr) {
916			v = vm->vm_maxsaddr;
917			grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
918		}
919		if ((grow_amount == 0) || (vm_map_find(&vm->vm_map, NULL, 0, (vm_offset_t *)&v,
920		    grow_amount, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)) {
921			return (0);
922		}
923		vm->vm_ssize += grow_amount >> PAGE_SHIFT;
924	}
925
926	return (1);
927}
928
929static int cnt_prezero;
930
931SYSCTL_INT(_machdep, OID_AUTO, cnt_prezero, CTLFLAG_RD, &cnt_prezero, 0, "");
932
933/*
934 * Implement the pre-zeroed page mechanism.
935 * This routine is called from the idle loop.
936 */
937int
938vm_page_zero_idle()
939{
940	static int free_rover;
941	vm_page_t m;
942	int s;
943
944	/*
945	 * XXX
946	 * We stop zeroing pages when there are sufficent prezeroed pages.
947	 * This threshold isn't really needed, except we want to
948	 * bypass unneeded calls to vm_page_list_find, and the
949	 * associated cache flush and latency.  The pre-zero will
950	 * still be called when there are significantly more
951	 * non-prezeroed pages than zeroed pages.  The threshold
952	 * of half the number of reserved pages is arbitrary, but
953	 * approximately the right amount.  Eventually, we should
954	 * perhaps interrupt the zero operation when a process
955	 * is found to be ready to run.
956	 */
957	if (cnt.v_free_count - vm_page_zero_count <= cnt.v_free_reserved / 2)
958		return (0);
959#ifdef SMP
960	if (try_mplock()) {
961#endif
962		s = splvm();
963		__asm __volatile("sti" : : : "memory");
964		m = vm_page_list_find(PQ_FREE, free_rover);
965		if (m != NULL) {
966			--(*vm_page_queues[m->queue].lcnt);
967			TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
968			m->queue = PQ_NONE;
969			splx(s);
970#if 0
971			rel_mplock();
972#endif
973			pmap_zero_page(VM_PAGE_TO_PHYS(m));
974#if 0
975			get_mplock();
976#endif
977			(void)splvm();
978			m->queue = PQ_ZERO + m->pc;
979			++(*vm_page_queues[m->queue].lcnt);
980			TAILQ_INSERT_HEAD(vm_page_queues[m->queue].pl, m,
981			    pageq);
982			free_rover = (free_rover + PQ_PRIME3) & PQ_L2_MASK;
983			++vm_page_zero_count;
984			++cnt_prezero;
985		}
986		splx(s);
987		__asm __volatile("cli" : : : "memory");
988#ifdef SMP
989		rel_mplock();
990#endif
991		return (1);
992#ifdef SMP
993	}
994#endif
995	return (0);
996}
997
998/*
999 * Software interrupt handler for queued VM system processing.
1000 */
1001void
1002swi_vm()
1003{
1004	if (busdma_swi_pending != 0)
1005		busdma_swi();
1006}
1007
1008/*
1009 * Tell whether this address is in some physical memory region.
1010 * Currently used by the kernel coredump code in order to avoid
1011 * dumping the ``ISA memory hole'' which could cause indefinite hangs,
1012 * or other unpredictable behaviour.
1013 */
1014
1015#include "isa.h"
1016
1017int
1018is_physical_memory(addr)
1019	vm_offset_t addr;
1020{
1021
1022#if NISA > 0
1023	/* The ISA ``memory hole''. */
1024	if (addr >= 0xa0000 && addr < 0x100000)
1025		return 0;
1026#endif
1027
1028	/*
1029	 * stuff other tests for known memory-mapped devices (PCI?)
1030	 * here
1031	 */
1032
1033	return 1;
1034}
1035