vm_machdep.c revision 33817
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
40 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41 *	$Id: vm_machdep.c,v 1.100 1998/02/13 05:30:18 bde Exp $
42 */
43
44#include "npx.h"
45#include "opt_bounce.h"
46#include "opt_user_ldt.h"
47#include "opt_vm86.h"
48
49#include <sys/param.h>
50#include <sys/systm.h>
51#include <sys/proc.h>
52#include <sys/malloc.h>
53#include <sys/buf.h>
54#include <sys/vnode.h>
55#include <sys/vmmeter.h>
56#include <sys/kernel.h>
57#include <sys/sysctl.h>
58
59#include <machine/clock.h>
60#include <machine/cpu.h>
61#include <machine/md_var.h>
62#ifdef SMP
63#include <machine/smp.h>
64#endif
65
66#include <vm/vm.h>
67#include <vm/vm_param.h>
68#include <vm/vm_prot.h>
69#include <sys/lock.h>
70#include <vm/vm_kern.h>
71#include <vm/vm_page.h>
72#include <vm/vm_map.h>
73#include <vm/vm_extern.h>
74
75#include <sys/user.h>
76
77#ifdef PC98
78#include <pc98/pc98/pc98.h>
79#else
80#include <i386/isa/isa.h>
81#endif
82
83#ifdef BOUNCE_BUFFERS
84static vm_offset_t
85		vm_bounce_kva __P((int size, int waitok));
86static void	vm_bounce_kva_free __P((vm_offset_t addr, vm_offset_t size,
87					int now));
88static vm_offset_t
89		vm_bounce_page_find __P((int count));
90static void	vm_bounce_page_free __P((vm_offset_t pa, int count));
91
92static volatile int	kvasfreecnt;
93
94caddr_t		bouncememory;
95static int	bpwait;
96static vm_offset_t	*bouncepa;
97static int		bmwait, bmfreeing;
98
99#define BITS_IN_UNSIGNED (8*sizeof(unsigned))
100static int		bounceallocarraysize;
101static unsigned	*bounceallocarray;
102static int		bouncefree;
103
104#if defined(PC98) && defined (EPSON_BOUNCEDMA)
105#define SIXTEENMEG (3840*4096)			/* 15MB boundary */
106#else
107#define SIXTEENMEG (4096*4096)
108#endif
109#define MAXBKVA 1024
110int		maxbkva = MAXBKVA*PAGE_SIZE;
111
112/* special list that can be used at interrupt time for eventual kva free */
113static struct kvasfree {
114	vm_offset_t addr;
115	vm_offset_t size;
116} kvaf[MAXBKVA];
117
118/*
119 * get bounce buffer pages (count physically contiguous)
120 * (only 1 inplemented now)
121 */
122static vm_offset_t
123vm_bounce_page_find(count)
124	int count;
125{
126	int bit;
127	int s,i;
128
129	if (count != 1)
130		panic("vm_bounce_page_find -- no support for > 1 page yet!!!");
131
132	s = splbio();
133retry:
134	for (i = 0; i < bounceallocarraysize; i++) {
135		if (bounceallocarray[i] != 0xffffffff) {
136			bit = ffs(~bounceallocarray[i]);
137			if (bit) {
138				bounceallocarray[i] |= 1 << (bit - 1) ;
139				bouncefree -= count;
140				splx(s);
141				return bouncepa[(i * BITS_IN_UNSIGNED + (bit - 1))];
142			}
143		}
144	}
145	bpwait = 1;
146	tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0);
147	goto retry;
148}
149
150static void
151vm_bounce_kva_free(addr, size, now)
152	vm_offset_t addr;
153	vm_offset_t size;
154	int now;
155{
156	int s = splbio();
157	kvaf[kvasfreecnt].addr = addr;
158	kvaf[kvasfreecnt].size = size;
159	++kvasfreecnt;
160	if( now) {
161		/*
162		 * this will do wakeups
163		 */
164		vm_bounce_kva(0,0);
165	} else {
166		if (bmwait) {
167		/*
168		 * if anyone is waiting on the bounce-map, then wakeup
169		 */
170			wakeup((caddr_t) io_map);
171			bmwait = 0;
172		}
173	}
174	splx(s);
175}
176
177/*
178 * free count bounce buffer pages
179 */
180static void
181vm_bounce_page_free(pa, count)
182	vm_offset_t pa;
183	int count;
184{
185	int allocindex;
186	int index;
187	int bit;
188
189	if (count != 1)
190		panic("vm_bounce_page_free -- no support for > 1 page yet!!!");
191
192	for(index=0;index<bouncepages;index++) {
193		if( pa == bouncepa[index])
194			break;
195	}
196
197	if( index == bouncepages)
198		panic("vm_bounce_page_free: invalid bounce buffer");
199
200	allocindex = index / BITS_IN_UNSIGNED;
201	bit = index % BITS_IN_UNSIGNED;
202
203	bounceallocarray[allocindex] &= ~(1 << bit);
204
205	bouncefree += count;
206	if (bpwait) {
207		bpwait = 0;
208		wakeup((caddr_t) &bounceallocarray);
209	}
210}
211
212/*
213 * allocate count bounce buffer kva pages
214 */
215static vm_offset_t
216vm_bounce_kva(size, waitok)
217	int size;
218	int waitok;
219{
220	int i;
221	vm_offset_t kva = 0;
222	vm_offset_t off;
223	int s = splbio();
224more:
225	if (!bmfreeing && kvasfreecnt) {
226		bmfreeing = 1;
227		for (i = 0; i < kvasfreecnt; i++) {
228			for(off=0;off<kvaf[i].size;off+=PAGE_SIZE) {
229				pmap_kremove( kvaf[i].addr + off);
230			}
231			kmem_free_wakeup(io_map, kvaf[i].addr,
232				kvaf[i].size);
233		}
234		kvasfreecnt = 0;
235		bmfreeing = 0;
236		if( bmwait) {
237			bmwait = 0;
238			wakeup( (caddr_t) io_map);
239		}
240	}
241
242	if( size == 0) {
243		splx(s);
244		return 0;
245	}
246
247	if ((kva = kmem_alloc_pageable(io_map, size)) == 0) {
248		if( !waitok) {
249			splx(s);
250			return 0;
251		}
252		bmwait = 1;
253		tsleep((caddr_t) io_map, PRIBIO, "bmwait", 0);
254		goto more;
255	}
256	splx(s);
257	return kva;
258}
259
260/*
261 * same as vm_bounce_kva -- but really allocate (but takes pages as arg)
262 */
263vm_offset_t
264vm_bounce_kva_alloc(count)
265int count;
266{
267	int i;
268	vm_offset_t kva;
269	vm_offset_t pa;
270	if( bouncepages == 0) {
271		kva = (vm_offset_t) malloc(count*PAGE_SIZE, M_TEMP, M_WAITOK);
272		return kva;
273	}
274	kva = vm_bounce_kva(count*PAGE_SIZE, 1);
275	for(i=0;i<count;i++) {
276		pa = vm_bounce_page_find(1);
277		pmap_kenter(kva + i * PAGE_SIZE, pa);
278	}
279	return kva;
280}
281
282/*
283 * same as vm_bounce_kva_free -- but really free
284 */
285void
286vm_bounce_kva_alloc_free(kva, count)
287	vm_offset_t kva;
288	int count;
289{
290	int i;
291	vm_offset_t pa;
292	if( bouncepages == 0) {
293		free((caddr_t) kva, M_TEMP);
294		return;
295	}
296	for(i = 0; i < count; i++) {
297		pa = pmap_kextract(kva + i * PAGE_SIZE);
298		vm_bounce_page_free(pa, 1);
299	}
300	vm_bounce_kva_free(kva, count*PAGE_SIZE, 0);
301}
302
303/*
304 * do the things necessary to the struct buf to implement
305 * bounce buffers...  inserted before the disk sort
306 */
307void
308vm_bounce_alloc(bp)
309	struct buf *bp;
310{
311	int countvmpg;
312	vm_offset_t vastart, vaend;
313	vm_offset_t vapstart, vapend;
314	vm_offset_t va, kva;
315	vm_offset_t pa;
316	int dobounceflag = 0;
317	int i;
318
319	if (bouncepages == 0)
320		return;
321
322	if (bp->b_flags & B_BOUNCE) {
323		printf("vm_bounce_alloc: called recursively???\n");
324		return;
325	}
326
327	if (bp->b_bufsize < bp->b_bcount) {
328		printf(
329		    "vm_bounce_alloc: b_bufsize(0x%lx) < b_bcount(0x%lx) !!\n",
330			bp->b_bufsize, bp->b_bcount);
331		panic("vm_bounce_alloc");
332	}
333
334/*
335 *  This is not really necessary
336 *	if( bp->b_bufsize != bp->b_bcount) {
337 *		printf("size: %d, count: %d\n", bp->b_bufsize, bp->b_bcount);
338 *	}
339 */
340
341
342	vastart = (vm_offset_t) bp->b_data;
343	vaend = (vm_offset_t) bp->b_data + bp->b_bufsize;
344
345	vapstart = trunc_page(vastart);
346	vapend = round_page(vaend);
347	countvmpg = (vapend - vapstart) / PAGE_SIZE;
348
349/*
350 * if any page is above 16MB, then go into bounce-buffer mode
351 */
352	va = vapstart;
353	for (i = 0; i < countvmpg; i++) {
354		pa = pmap_kextract(va);
355		if (pa >= SIXTEENMEG)
356			++dobounceflag;
357		if( pa == 0)
358			panic("vm_bounce_alloc: Unmapped page");
359		va += PAGE_SIZE;
360	}
361	if (dobounceflag == 0)
362		return;
363
364	if (bouncepages < dobounceflag)
365		panic("Not enough bounce buffers!!!");
366
367/*
368 * allocate a replacement kva for b_addr
369 */
370	kva = vm_bounce_kva(countvmpg*PAGE_SIZE, 1);
371#if 0
372	printf("%s: vapstart: %x, vapend: %x, countvmpg: %d, kva: %x ",
373		(bp->b_flags & B_READ) ? "read":"write",
374			vapstart, vapend, countvmpg, kva);
375#endif
376	va = vapstart;
377	for (i = 0; i < countvmpg; i++) {
378		pa = pmap_kextract(va);
379		if (pa >= SIXTEENMEG) {
380			/*
381			 * allocate a replacement page
382			 */
383			vm_offset_t bpa = vm_bounce_page_find(1);
384			pmap_kenter(kva + (PAGE_SIZE * i), bpa);
385#if 0
386			printf("r(%d): (%x,%x,%x) ", i, va, pa, bpa);
387#endif
388			/*
389			 * if we are writing, the copy the data into the page
390			 */
391			if ((bp->b_flags & B_READ) == 0) {
392				bcopy((caddr_t) va, (caddr_t) kva + (PAGE_SIZE * i), PAGE_SIZE);
393			}
394		} else {
395			/*
396			 * use original page
397			 */
398			pmap_kenter(kva + (PAGE_SIZE * i), pa);
399		}
400		va += PAGE_SIZE;
401	}
402
403/*
404 * flag the buffer as being bounced
405 */
406	bp->b_flags |= B_BOUNCE;
407/*
408 * save the original buffer kva
409 */
410	bp->b_savekva = bp->b_data;
411/*
412 * put our new kva into the buffer (offset by original offset)
413 */
414	bp->b_data = (caddr_t) (((vm_offset_t) kva) |
415				((vm_offset_t) bp->b_savekva & PAGE_MASK));
416#if 0
417	printf("b_savekva: %x, newva: %x\n", bp->b_savekva, bp->b_data);
418#endif
419	return;
420}
421
422/*
423 * hook into biodone to free bounce buffer
424 */
425void
426vm_bounce_free(bp)
427	struct buf *bp;
428{
429	int i;
430	vm_offset_t origkva, bouncekva, bouncekvaend;
431
432/*
433 * if this isn't a bounced buffer, then just return
434 */
435	if ((bp->b_flags & B_BOUNCE) == 0)
436		return;
437
438/*
439 *  This check is not necessary
440 *	if (bp->b_bufsize != bp->b_bcount) {
441 *		printf("vm_bounce_free: b_bufsize=%d, b_bcount=%d\n",
442 *			bp->b_bufsize, bp->b_bcount);
443 *	}
444 */
445
446	origkva = (vm_offset_t) bp->b_savekva;
447	bouncekva = (vm_offset_t) bp->b_data;
448/*
449	printf("free: %d ", bp->b_bufsize);
450*/
451
452/*
453 * check every page in the kva space for b_addr
454 */
455	for (i = 0; i < bp->b_bufsize; ) {
456		vm_offset_t mybouncepa;
457		vm_offset_t copycount;
458
459		copycount = round_page(bouncekva + 1) - bouncekva;
460		mybouncepa = pmap_kextract(trunc_page(bouncekva));
461
462/*
463 * if this is a bounced pa, then process as one
464 */
465		if ( mybouncepa != pmap_kextract( trunc_page( origkva))) {
466			vm_offset_t tocopy = copycount;
467			if (i + tocopy > bp->b_bufsize)
468				tocopy = bp->b_bufsize - i;
469/*
470 * if this is a read, then copy from bounce buffer into original buffer
471 */
472			if (bp->b_flags & B_READ)
473				bcopy((caddr_t) bouncekva, (caddr_t) origkva, tocopy);
474/*
475 * free the bounce allocation
476 */
477
478/*
479			printf("(kva: %x, pa: %x)", bouncekva, mybouncepa);
480*/
481			vm_bounce_page_free(mybouncepa, 1);
482		}
483
484		origkva += copycount;
485		bouncekva += copycount;
486		i += copycount;
487	}
488
489/*
490	printf("\n");
491*/
492/*
493 * add the old kva into the "to free" list
494 */
495
496	bouncekva= trunc_page((vm_offset_t) bp->b_data);
497	bouncekvaend= round_page((vm_offset_t)bp->b_data + bp->b_bufsize);
498
499/*
500	printf("freeva: %d\n", (bouncekvaend - bouncekva) / PAGE_SIZE);
501*/
502	vm_bounce_kva_free( bouncekva, (bouncekvaend - bouncekva), 0);
503	bp->b_data = bp->b_savekva;
504	bp->b_savekva = 0;
505	bp->b_flags &= ~B_BOUNCE;
506
507	return;
508}
509
510
511/*
512 * init the bounce buffer system
513 */
514void
515vm_bounce_init()
516{
517	int i;
518
519	kvasfreecnt = 0;
520
521	if (bouncepages == 0)
522		return;
523
524	bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED;
525	bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT);
526
527	if (!bounceallocarray)
528		panic("Cannot allocate bounce resource array");
529
530	bouncepa = malloc(bouncepages * sizeof(vm_offset_t), M_TEMP, M_NOWAIT);
531	if (!bouncepa)
532		panic("Cannot allocate physical memory array");
533
534	for(i=0;i<bounceallocarraysize;i++) {
535		bounceallocarray[i] = 0xffffffff;
536	}
537
538	for(i=0;i<bouncepages;i++) {
539		vm_offset_t pa;
540		if( (pa = pmap_kextract((vm_offset_t) bouncememory + i * PAGE_SIZE)) >= SIXTEENMEG) {
541			printf("vm_bounce_init: bounce memory out of range -- bounce disabled\n");
542			free(bounceallocarray, M_TEMP);
543			bounceallocarray = NULL;
544			free(bouncepa, M_TEMP);
545			bouncepa = NULL;
546			bouncepages = 0;
547			break;
548		}
549		if( pa == 0)
550			panic("bounce memory not resident");
551		bouncepa[i] = pa;
552		bounceallocarray[i/(8*sizeof(int))] &= ~(1<<(i%(8*sizeof(int))));
553	}
554	bouncefree = bouncepages;
555
556}
557#endif /* BOUNCE_BUFFERS */
558
559/*
560 * quick version of vm_fault
561 */
562void
563vm_fault_quick(v, prot)
564	caddr_t v;
565	int prot;
566{
567	if (prot & VM_PROT_WRITE)
568		subyte(v, fubyte(v));
569	else
570		fubyte(v);
571}
572
573/*
574 * Finish a fork operation, with process p2 nearly set up.
575 * Copy and update the pcb, set up the stack so that the child
576 * ready to run and return to user mode.
577 */
578void
579cpu_fork(p1, p2)
580	register struct proc *p1, *p2;
581{
582	struct pcb *pcb2 = &p2->p_addr->u_pcb;
583
584#if NNPX > 0
585	/* Ensure that p1's pcb is up to date. */
586	if (npxproc == p1)
587		npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
588#endif
589
590	/* Copy p1's pcb. */
591	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
592
593	/*
594	 * Create a new fresh stack for the new process.
595	 * Copy the trap frame for the return to user mode as if from a
596	 * syscall.  This copies the user mode register values.
597	 */
598	p2->p_md.md_regs = (struct trapframe *)
599			   ((int)p2->p_addr + UPAGES * PAGE_SIZE) - 1;
600	*p2->p_md.md_regs = *p1->p_md.md_regs;
601
602	/*
603	 * Set registers for trampoline to user mode.  Leave space for the
604	 * return address on stack.  These are the kernel mode register values.
605	 */
606	pcb2->pcb_cr3 = vtophys(p2->p_vmspace->vm_pmap.pm_pdir);
607	pcb2->pcb_edi = p2->p_md.md_regs->tf_edi;
608	pcb2->pcb_esi = (int)fork_return;
609	pcb2->pcb_ebp = p2->p_md.md_regs->tf_ebp;
610	pcb2->pcb_esp = (int)p2->p_md.md_regs - sizeof(void *);
611	pcb2->pcb_ebx = (int)p2;
612	pcb2->pcb_eip = (int)fork_trampoline;
613	/*
614	 * pcb2->pcb_ldt:	duplicated below, if necessary.
615	 * pcb2->pcb_ldt_len:	cloned above.
616	 * pcb2->pcb_savefpu:	cloned above.
617	 * pcb2->pcb_flags:	cloned above (always 0 here?).
618	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
619	 */
620
621#ifdef VM86
622	/*
623	 * XXX don't copy the i/o pages.  this should probably be fixed.
624	 */
625	pcb2->pcb_ext = 0;
626#endif
627
628#ifdef USER_LDT
629        /* Copy the LDT, if necessary. */
630        if (pcb2->pcb_ldt != 0) {
631                union descriptor *new_ldt;
632                size_t len = pcb2->pcb_ldt_len * sizeof(union descriptor);
633
634                new_ldt = (union descriptor *)kmem_alloc(kernel_map, len);
635                bcopy(pcb2->pcb_ldt, new_ldt, len);
636                pcb2->pcb_ldt = (caddr_t)new_ldt;
637        }
638#endif
639
640	/*
641	 * Now, cpu_switch() can schedule the new process.
642	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
643	 * containing the return address when exiting cpu_switch.
644	 * This will normally be to proc_trampoline(), which will have
645	 * %ebx loaded with the new proc's pointer.  proc_trampoline()
646	 * will set up a stack to call fork_return(p, frame); to complete
647	 * the return to user-mode.
648	 */
649}
650
651/*
652 * Intercept the return address from a freshly forked process that has NOT
653 * been scheduled yet.
654 *
655 * This is needed to make kernel threads stay in kernel mode.
656 */
657void
658cpu_set_fork_handler(p, func, arg)
659	struct proc *p;
660	void (*func) __P((void *));
661	void *arg;
662{
663	/*
664	 * Note that the trap frame follows the args, so the function
665	 * is really called like this:  func(arg, frame);
666	 */
667	p->p_addr->u_pcb.pcb_esi = (int) func;	/* function */
668	p->p_addr->u_pcb.pcb_ebx = (int) arg;	/* first arg */
669}
670
671void
672cpu_exit(p)
673	register struct proc *p;
674{
675#if defined(USER_LDT) || defined(VM86)
676	struct pcb *pcb = &p->p_addr->u_pcb;
677#endif
678
679#if NNPX > 0
680	npxexit(p);
681#endif	/* NNPX */
682#ifdef VM86
683	if (pcb->pcb_ext != 0) {
684	        /*
685		 * XXX do we need to move the TSS off the allocated pages
686		 * before freeing them?  (not done here)
687		 */
688		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
689		    ctob(IOPAGES + 1));
690		pcb->pcb_ext = 0;
691	}
692#endif
693#ifdef USER_LDT
694	if (pcb->pcb_ldt != 0) {
695		if (pcb == curpcb)
696			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
697		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
698			pcb->pcb_ldt_len * sizeof(union descriptor));
699		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
700	}
701#endif
702	cnt.v_swtch++;
703	cpu_switch(p);
704	panic("cpu_exit");
705}
706
707void
708cpu_wait(p)
709	struct proc *p;
710{
711	/* drop per-process resources */
712	pmap_dispose_proc(p);
713
714	/* and clean-out the vmspace */
715	vmspace_free(p->p_vmspace);
716}
717
718/*
719 * Dump the machine specific header information at the start of a core dump.
720 */
721int
722cpu_coredump(p, vp, cred)
723	struct proc *p;
724	struct vnode *vp;
725	struct ucred *cred;
726{
727
728	return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
729	    (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
730	    p));
731}
732
733#ifdef notyet
734static void
735setredzone(pte, vaddr)
736	u_short *pte;
737	caddr_t vaddr;
738{
739/* eventually do this by setting up an expand-down stack segment
740   for ss0: selector, allowing stack access down to top of u.
741   this means though that protection violations need to be handled
742   thru a double fault exception that must do an integral task
743   switch to a known good context, within which a dump can be
744   taken. a sensible scheme might be to save the initial context
745   used by sched (that has physical memory mapped 1:1 at bottom)
746   and take the dump while still in mapped mode */
747}
748#endif
749
750/*
751 * Convert kernel VA to physical address
752 */
753u_long
754kvtop(void *addr)
755{
756	vm_offset_t va;
757
758	va = pmap_kextract((vm_offset_t)addr);
759	if (va == 0)
760		panic("kvtop: zero page frame");
761	return((int)va);
762}
763
764/*
765 * Map an IO request into kernel virtual address space.
766 *
767 * All requests are (re)mapped into kernel VA space.
768 * Notice that we use b_bufsize for the size of the buffer
769 * to be mapped.  b_bcount might be modified by the driver.
770 */
771void
772vmapbuf(bp)
773	register struct buf *bp;
774{
775	register caddr_t addr, v, kva;
776	vm_offset_t pa;
777
778	if ((bp->b_flags & B_PHYS) == 0)
779		panic("vmapbuf");
780
781	for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page(bp->b_data);
782	    addr < bp->b_data + bp->b_bufsize;
783	    addr += PAGE_SIZE, v += PAGE_SIZE) {
784		/*
785		 * Do the vm_fault if needed; do the copy-on-write thing
786		 * when reading stuff off device into memory.
787		 */
788		vm_fault_quick(addr,
789			(bp->b_flags&B_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
790		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
791		if (pa == 0)
792			panic("vmapbuf: page not present");
793		vm_page_hold(PHYS_TO_VM_PAGE(pa));
794		pmap_kenter((vm_offset_t) v, pa);
795	}
796
797	kva = bp->b_saveaddr;
798	bp->b_saveaddr = bp->b_data;
799	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
800}
801
802/*
803 * Free the io map PTEs associated with this IO operation.
804 * We also invalidate the TLB entries and restore the original b_addr.
805 */
806void
807vunmapbuf(bp)
808	register struct buf *bp;
809{
810	register caddr_t addr;
811	vm_offset_t pa;
812
813	if ((bp->b_flags & B_PHYS) == 0)
814		panic("vunmapbuf");
815
816	for (addr = (caddr_t)trunc_page(bp->b_data);
817	    addr < bp->b_data + bp->b_bufsize;
818	    addr += PAGE_SIZE) {
819		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
820		pmap_kremove((vm_offset_t) addr);
821		vm_page_unhold(PHYS_TO_VM_PAGE(pa));
822	}
823
824	bp->b_data = bp->b_saveaddr;
825}
826
827/*
828 * Force reset the processor by invalidating the entire address space!
829 */
830void
831cpu_reset()
832{
833
834#ifdef PC98
835	/*
836	 * Attempt to do a CPU reset via CPU reset port.
837	 */
838	disable_intr();
839	outb(0x37, 0x0f);		/* SHUT0 = 0. */
840	outb(0x37, 0x0b);		/* SHUT1 = 0. */
841	outb(0xf0, 0x00);		/* Reset. */
842#else
843	/*
844	 * Attempt to do a CPU reset via the keyboard controller,
845	 * do not turn of the GateA20, as any machine that fails
846	 * to do the reset here would then end up in no man's land.
847	 */
848
849#if !defined(BROKEN_KEYBOARD_RESET)
850	outb(IO_KBD + 4, 0xFE);
851	DELAY(500000);	/* wait 0.5 sec to see if that did it */
852	printf("Keyboard reset did not work, attempting CPU shutdown\n");
853	DELAY(1000000);	/* wait 1 sec for printf to complete */
854#endif
855#endif /* PC98 */
856	/* force a shutdown by unmapping entire address space ! */
857	bzero((caddr_t) PTD, PAGE_SIZE);
858
859	/* "good night, sweet prince .... <THUNK!>" */
860	invltlb();
861	/* NOTREACHED */
862	while(1);
863}
864
865/*
866 * Grow the user stack to allow for 'sp'. This version grows the stack in
867 *	chunks of SGROWSIZ.
868 */
869int
870grow(p, sp)
871	struct proc *p;
872	u_int sp;
873{
874	unsigned int nss;
875	caddr_t v;
876	struct vmspace *vm = p->p_vmspace;
877
878	if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK)
879	    return (1);
880
881	nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE);
882
883	if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
884		return (0);
885
886	if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
887	    SGROWSIZ) < nss) {
888		int grow_amount;
889		/*
890		 * If necessary, grow the VM that the stack occupies
891		 * to allow for the rlimit. This allows us to not have
892		 * to allocate all of the VM up-front in execve (which
893		 * is expensive).
894		 * Grow the VM by the amount requested rounded up to
895		 * the nearest SGROWSIZ to provide for some hysteresis.
896		 */
897		grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
898		v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
899		    SGROWSIZ) - grow_amount;
900		/*
901		 * If there isn't enough room to extend by SGROWSIZ, then
902		 * just extend to the maximum size
903		 */
904		if (v < vm->vm_maxsaddr) {
905			v = vm->vm_maxsaddr;
906			grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
907		}
908		if ((grow_amount == 0) || (vm_map_find(&vm->vm_map, NULL, 0, (vm_offset_t *)&v,
909		    grow_amount, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)) {
910			return (0);
911		}
912		vm->vm_ssize += grow_amount >> PAGE_SHIFT;
913	}
914
915	return (1);
916}
917
918
919int cnt_prezero;
920
921SYSCTL_INT(_machdep, OID_AUTO, cnt_prezero, CTLFLAG_RD, &cnt_prezero, 0, "");
922
923/*
924 * Implement the pre-zeroed page mechanism.
925 * This routine is called from the idle loop.
926 */
927int
928vm_page_zero_idle()
929{
930	static int free_rover;
931	vm_page_t m;
932	int s;
933
934	/*
935	 * XXX
936	 * We stop zeroing pages when there are sufficent prezeroed pages.
937	 * This threshold isn't really needed, except we want to
938	 * bypass unneeded calls to vm_page_list_find, and the
939	 * associated cache flush and latency.  The pre-zero will
940	 * still be called when there are significantly more
941	 * non-prezeroed pages than zeroed pages.  The threshold
942	 * of half the number of reserved pages is arbitrary, but
943	 * approximately the right amount.  Eventually, we should
944	 * perhaps interrupt the zero operation when a process
945	 * is found to be ready to run.
946	 */
947	if (cnt.v_free_count - vm_page_zero_count <= cnt.v_free_reserved / 2)
948		return (0);
949#ifdef SMP
950	if (try_mplock()) {
951#endif
952		s = splvm();
953		enable_intr();
954		m = vm_page_list_find(PQ_FREE, free_rover);
955		if (m != NULL) {
956			--(*vm_page_queues[m->queue].lcnt);
957			TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
958			m->queue = PQ_NONE;
959			splx(s);
960#if 0
961			rel_mplock();
962#endif
963			pmap_zero_page(VM_PAGE_TO_PHYS(m));
964#ifdef 0
965			get_mplock();
966#endif
967			(void)splvm();
968			m->queue = PQ_ZERO + m->pc;
969			++(*vm_page_queues[m->queue].lcnt);
970			TAILQ_INSERT_HEAD(vm_page_queues[m->queue].pl, m, pageq);
971			free_rover = (free_rover + PQ_PRIME3) & PQ_L2_MASK;
972			++vm_page_zero_count;
973			++cnt_prezero;
974		}
975		splx(s);
976		disable_intr();
977#ifdef SMP
978		rel_mplock();
979		return (1);
980	}
981#endif
982	return (0);
983}
984
985/*
986 * Software interrupt handler for queued VM system processing.
987 */
988void
989swi_vm()
990{
991	if (busdma_swi_pending != 0)
992		busdma_swi();
993}
994
995/*
996 * Tell whether this address is in some physical memory region.
997 * Currently used by the kernel coredump code in order to avoid
998 * dumping the ``ISA memory hole'' which could cause indefinite hangs,
999 * or other unpredictable behaviour.
1000 */
1001
1002#include "isa.h"
1003
1004int
1005is_physical_memory(addr)
1006	vm_offset_t addr;
1007{
1008
1009#if NISA > 0
1010	/* The ISA ``memory hole''. */
1011	if (addr >= 0xa0000 && addr < 0x100000)
1012		return 0;
1013#endif
1014
1015	/*
1016	 * stuff other tests for known memory-mapped devices (PCI?)
1017	 * here
1018	 */
1019
1020	return 1;
1021}
1022