vm_machdep.c revision 1298
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the Systems Programming Group of the University of Utah Computer
8 * Science Department, and William Jolitz.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
39 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
40 *	$Id: vm_machdep.c,v 1.13 1994/03/21 09:35:10 davidg Exp $
41 */
42
43#include "npx.h"
44#include "param.h"
45#include "systm.h"
46#include "proc.h"
47#include "malloc.h"
48#include "buf.h"
49#include "user.h"
50
51#include "../include/cpu.h"
52
53#include "vm/vm.h"
54#include "vm/vm_kern.h"
55
56#ifndef NOBOUNCE
57
58caddr_t		bouncememory;
59vm_offset_t	bouncepa, bouncepaend;
60int		bouncepages;
61vm_map_t	bounce_map;
62int		bmwait, bmfreeing;
63
64int		bounceallocarraysize;
65unsigned	*bounceallocarray;
66int		bouncefree;
67
68#define SIXTEENMEG (4096*4096)
69#define MAXBKVA 512
70
71/* special list that can be used at interrupt time for eventual kva free */
72struct kvasfree {
73	vm_offset_t addr;
74	vm_offset_t size;
75} kvaf[MAXBKVA];
76
77int		kvasfreecnt;
78
79/*
80 * get bounce buffer pages (count physically contiguous)
81 * (only 1 inplemented now)
82 */
83vm_offset_t
84vm_bounce_page_find(count)
85	int count;
86{
87	int bit;
88	int s,i;
89
90	if (count != 1)
91		panic("vm_bounce_page_find -- no support for > 1 page yet!!!");
92
93	s = splbio();
94retry:
95	for (i = 0; i < bounceallocarraysize; i++) {
96		if (bounceallocarray[i] != 0xffffffff) {
97			if (bit = ffs(~bounceallocarray[i])) {
98				bounceallocarray[i] |= 1 << (bit - 1) ;
99				bouncefree -= count;
100				splx(s);
101				return bouncepa + (i * 8 * sizeof(unsigned) + (bit - 1)) * NBPG;
102			}
103		}
104	}
105	tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0);
106	goto retry;
107}
108
109/*
110 * free count bounce buffer pages
111 */
112void
113vm_bounce_page_free(pa, count)
114	vm_offset_t pa;
115	int count;
116{
117	int allocindex;
118	int index;
119	int bit;
120
121	if (count != 1)
122		panic("vm_bounce_page_free -- no support for > 1 page yet!!!\n");
123
124	index = (pa - bouncepa) / NBPG;
125
126	if ((index < 0) || (index >= bouncepages))
127		panic("vm_bounce_page_free -- bad index\n");
128
129	allocindex = index / (8 * sizeof(unsigned));
130	bit = index % (8 * sizeof(unsigned));
131
132	bounceallocarray[allocindex] &= ~(1 << bit);
133
134	bouncefree += count;
135	wakeup((caddr_t) &bounceallocarray);
136}
137
138/*
139 * allocate count bounce buffer kva pages
140 */
141vm_offset_t
142vm_bounce_kva(count)
143	int count;
144{
145	int tofree;
146	int i;
147	int startfree;
148	vm_offset_t kva;
149	int s = splbio();
150	startfree = 0;
151more:
152	if (!bmfreeing && (tofree = kvasfreecnt)) {
153		bmfreeing = 1;
154more1:
155		for (i = startfree; i < kvasfreecnt; i++) {
156			pmap_remove(kernel_pmap,
157				kvaf[i].addr, kvaf[i].addr + kvaf[i].size);
158			kmem_free_wakeup(bounce_map, kvaf[i].addr,
159				kvaf[i].size);
160		}
161		if (kvasfreecnt != tofree) {
162			startfree = i;
163			bmfreeing = 0;
164			goto more;
165		}
166		kvasfreecnt = 0;
167		bmfreeing = 0;
168	}
169
170	if (!(kva = kmem_alloc_pageable(bounce_map, count * NBPG))) {
171		bmwait = 1;
172		tsleep((caddr_t) bounce_map, PRIBIO, "bmwait", 0);
173		goto more;
174	}
175
176	splx(s);
177
178	return kva;
179}
180
181/*
182 * init the bounce buffer system
183 */
184void
185vm_bounce_init()
186{
187	vm_offset_t minaddr, maxaddr;
188
189	if (bouncepages == 0)
190		return;
191
192	bounceallocarraysize = (bouncepages + (8*sizeof(unsigned))-1) / (8 * sizeof(unsigned));
193	bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT);
194
195	if (!bounceallocarray)
196		panic("Cannot allocate bounce resource array\n");
197
198	bzero(bounceallocarray, bounceallocarraysize * sizeof(long));
199
200	bounce_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, MAXBKVA * NBPG, FALSE);
201
202	bouncepa = pmap_extract(kernel_pmap, (vm_offset_t) bouncememory);
203	bouncepaend = bouncepa + bouncepages * NBPG;
204	bouncefree = bouncepages;
205	kvasfreecnt = 0;
206}
207
208/*
209 * do the things necessary to the struct buf to implement
210 * bounce buffers...  inserted before the disk sort
211 */
212void
213vm_bounce_alloc(bp)
214	struct buf *bp;
215{
216	int countvmpg;
217	vm_offset_t vastart, vaend;
218	vm_offset_t vapstart, vapend;
219	vm_offset_t va, kva;
220	vm_offset_t pa;
221	int dobounceflag = 0;
222	int bounceindex;
223	int i;
224	int s;
225
226	if (bouncepages == 0)
227		return;
228
229	vastart = (vm_offset_t) bp->b_un.b_addr;
230	vaend = (vm_offset_t) bp->b_un.b_addr + bp->b_bcount;
231
232	vapstart = i386_trunc_page(vastart);
233	vapend = i386_round_page(vaend);
234	countvmpg = (vapend - vapstart) / NBPG;
235
236/*
237 * if any page is above 16MB, then go into bounce-buffer mode
238 */
239	va = vapstart;
240	for (i = 0; i < countvmpg; i++) {
241		pa = pmap_extract(kernel_pmap, va);
242		if (pa >= SIXTEENMEG)
243			++dobounceflag;
244		va += NBPG;
245	}
246	if (dobounceflag == 0)
247		return;
248
249	if (bouncepages < dobounceflag)
250		panic("Not enough bounce buffers!!!");
251
252/*
253 * allocate a replacement kva for b_addr
254 */
255	kva = vm_bounce_kva(countvmpg);
256	va = vapstart;
257	for (i = 0; i < countvmpg; i++) {
258		pa = pmap_extract(kernel_pmap, va);
259		if (pa >= SIXTEENMEG) {
260			/*
261			 * allocate a replacement page
262			 */
263			vm_offset_t bpa = vm_bounce_page_find(1);
264			pmap_enter(kernel_pmap, kva + (NBPG * i), bpa, VM_PROT_DEFAULT,
265				TRUE);
266			/*
267			 * if we are writing, the copy the data into the page
268			 */
269			if ((bp->b_flags & B_READ) == 0)
270				bcopy((caddr_t) va, (caddr_t) kva + (NBPG * i), NBPG);
271		} else {
272			/*
273			 * use original page
274			 */
275			pmap_enter(kernel_pmap, kva + (NBPG * i), pa, VM_PROT_DEFAULT,
276				TRUE);
277		}
278		va += NBPG;
279	}
280
281/*
282 * flag the buffer as being bounced
283 */
284	bp->b_flags |= B_BOUNCE;
285/*
286 * save the original buffer kva
287 */
288	bp->b_savekva = bp->b_un.b_addr;
289/*
290 * put our new kva into the buffer (offset by original offset)
291 */
292	bp->b_un.b_addr = (caddr_t) (((vm_offset_t) kva) |
293				((vm_offset_t) bp->b_savekva & (NBPG - 1)));
294	return;
295}
296
297/*
298 * hook into biodone to free bounce buffer
299 */
300void
301vm_bounce_free(bp)
302	struct buf *bp;
303{
304	int i;
305	vm_offset_t origkva, bouncekva;
306	vm_offset_t vastart, vaend;
307	vm_offset_t vapstart, vapend;
308	int countbounce = 0;
309	vm_offset_t firstbouncepa = 0;
310	int firstbounceindex;
311	int countvmpg;
312	vm_offset_t bcount;
313	int s;
314
315/*
316 * if this isn't a bounced buffer, then just return
317 */
318	if ((bp->b_flags & B_BOUNCE) == 0)
319		return;
320
321	origkva = (vm_offset_t) bp->b_savekva;
322	bouncekva = (vm_offset_t) bp->b_un.b_addr;
323
324	vastart = bouncekva;
325	vaend = bouncekva + bp->b_bcount;
326	bcount = bp->b_bcount;
327
328	vapstart = i386_trunc_page(vastart);
329	vapend = i386_round_page(vaend);
330
331	countvmpg = (vapend - vapstart) / NBPG;
332
333/*
334 * check every page in the kva space for b_addr
335 */
336	for (i = 0; i < countvmpg; i++) {
337		vm_offset_t mybouncepa;
338		vm_offset_t copycount;
339
340		copycount = i386_round_page(bouncekva + 1) - bouncekva;
341		mybouncepa = pmap_extract(kernel_pmap, i386_trunc_page(bouncekva));
342
343/*
344 * if this is a bounced pa, then process as one
345 */
346		if ((mybouncepa >= bouncepa) && (mybouncepa < bouncepaend)) {
347			if (copycount > bcount)
348				copycount = bcount;
349/*
350 * if this is a read, then copy from bounce buffer into original buffer
351 */
352			if (bp->b_flags & B_READ)
353				bcopy((caddr_t) bouncekva, (caddr_t) origkva, copycount);
354/*
355 * free the bounce allocation
356 */
357			vm_bounce_page_free(i386_trunc_page(mybouncepa), 1);
358		}
359
360		origkva += copycount;
361		bouncekva += copycount;
362		bcount -= copycount;
363	}
364
365/*
366 * add the old kva into the "to free" list
367 */
368	bouncekva = i386_trunc_page((vm_offset_t) bp->b_un.b_addr);
369	kvaf[kvasfreecnt].addr = bouncekva;
370	kvaf[kvasfreecnt++].size = countvmpg * NBPG;
371	if (bmwait) {
372		/*
373		 * if anyone is waiting on the bounce-map, then wakeup
374		 */
375		wakeup((caddr_t) bounce_map);
376		bmwait = 0;
377	}
378
379	bp->b_un.b_addr = bp->b_savekva;
380	bp->b_savekva = 0;
381	bp->b_flags &= ~B_BOUNCE;
382
383	return;
384}
385
386#endif /* NOBOUNCE */
387
388/*
389 * Finish a fork operation, with process p2 nearly set up.
390 * Copy and update the kernel stack and pcb, making the child
391 * ready to run, and marking it so that it can return differently
392 * than the parent.  Returns 1 in the child process, 0 in the parent.
393 * We currently double-map the user area so that the stack is at the same
394 * address in each process; in the future we will probably relocate
395 * the frame pointers on the stack after copying.
396 */
397int
398cpu_fork(p1, p2)
399	register struct proc *p1, *p2;
400{
401	register struct user *up = p2->p_addr;
402	int foo, offset, addr, i;
403	extern char kstack[];
404	extern int mvesp();
405
406	/*
407	 * Copy pcb and stack from proc p1 to p2.
408	 * We do this as cheaply as possible, copying only the active
409	 * part of the stack.  The stack and pcb need to agree;
410	 * this is tricky, as the final pcb is constructed by savectx,
411	 * but its frame isn't yet on the stack when the stack is copied.
412	 * swtch compensates for this when the child eventually runs.
413	 * This should be done differently, with a single call
414	 * that copies and updates the pcb+stack,
415	 * replacing the bcopy and savectx.
416	 */
417	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
418	offset = mvesp() - (int)kstack;
419	bcopy((caddr_t)kstack + offset, (caddr_t)p2->p_addr + offset,
420	    (unsigned) ctob(UPAGES) - offset);
421	p2->p_regs = p1->p_regs;
422
423	/*
424	 * Wire top of address space of child to it's kstack.
425	 * First, fault in a page of pte's to map it.
426	 */
427#if 0
428        addr = trunc_page((u_int)vtopte(kstack));
429	vm_map_pageable(&p2->p_vmspace->vm_map, addr, addr+NBPG, FALSE);
430	for (i=0; i < UPAGES; i++)
431		pmap_enter(&p2->p_vmspace->vm_pmap, kstack+i*NBPG,
432			   pmap_extract(kernel_pmap, ((int)p2->p_addr)+i*NBPG),
433			   /*
434			    * The user area has to be mapped writable because
435			    * it contains the kernel stack (when CR0_WP is on
436			    * on a 486 there is no user-read/kernel-write
437			    * mode).  It is protected from user mode access
438			    * by the segment limits.
439			    */
440			   VM_PROT_READ|VM_PROT_WRITE, TRUE);
441#endif
442	pmap_activate(&p2->p_vmspace->vm_pmap, &up->u_pcb);
443
444	/*
445	 *
446	 * Arrange for a non-local goto when the new process
447	 * is started, to resume here, returning nonzero from setjmp.
448	 */
449	if (savectx(up, 1)) {
450		/*
451		 * Return 1 in child.
452		 */
453		return (1);
454	}
455	return (0);
456}
457
458#ifdef notyet
459/*
460 * cpu_exit is called as the last action during exit.
461 *
462 * We change to an inactive address space and a "safe" stack,
463 * passing thru an argument to the new stack. Now, safely isolated
464 * from the resources we're shedding, we release the address space
465 * and any remaining machine-dependent resources, including the
466 * memory for the user structure and kernel stack.
467 *
468 * Next, we assign a dummy context to be written over by swtch,
469 * calling it to send this process off to oblivion.
470 * [The nullpcb allows us to minimize cost in swtch() by not having
471 * a special case].
472 */
473struct proc *swtch_to_inactive();
474volatile void
475cpu_exit(p)
476	register struct proc *p;
477{
478	static struct pcb nullpcb;	/* pcb to overwrite on last swtch */
479
480#if NNPX > 0
481	npxexit(p);
482#endif	/* NNPX */
483
484	/* move to inactive space and stack, passing arg accross */
485	p = swtch_to_inactive(p);
486
487	/* drop per-process resources */
488	vmspace_free(p->p_vmspace);
489	kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
490
491	p->p_addr = (struct user *) &nullpcb;
492	splclock();
493	swtch();
494	/* NOTREACHED */
495}
496#else
497void
498cpu_exit(p)
499	register struct proc *p;
500{
501
502#if NNPX > 0
503	npxexit(p);
504#endif	/* NNPX */
505	splclock();
506	curproc = 0;
507	swtch();
508	/*
509	 * This is to shutup the compiler, and if swtch() failed I suppose
510	 * this would be a good thing.  This keeps gcc happy because panic
511	 * is a volatile void function as well.
512	 */
513	panic("cpu_exit");
514}
515
516void
517cpu_wait(p) struct proc *p; {
518/*	extern vm_map_t upages_map; */
519	extern char kstack[];
520
521	/* drop per-process resources */
522 	pmap_remove(vm_map_pmap(kernel_map), (vm_offset_t) p->p_addr,
523		((vm_offset_t) p->p_addr) + ctob(UPAGES));
524	kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
525	vmspace_free(p->p_vmspace);
526}
527#endif
528
529/*
530 * Set a red zone in the kernel stack after the u. area.
531 */
532void
533setredzone(pte, vaddr)
534	u_short *pte;
535	caddr_t vaddr;
536{
537/* eventually do this by setting up an expand-down stack segment
538   for ss0: selector, allowing stack access down to top of u.
539   this means though that protection violations need to be handled
540   thru a double fault exception that must do an integral task
541   switch to a known good context, within which a dump can be
542   taken. a sensible scheme might be to save the initial context
543   used by sched (that has physical memory mapped 1:1 at bottom)
544   and take the dump while still in mapped mode */
545}
546
547/*
548 * Convert kernel VA to physical address
549 */
550u_long
551kvtop(void *addr)
552{
553	vm_offset_t va;
554
555	va = pmap_extract(kernel_pmap, (vm_offset_t)addr);
556	if (va == 0)
557		panic("kvtop: zero page frame");
558	return((int)va);
559}
560
561extern vm_map_t phys_map;
562
563/*
564 * Map an IO request into kernel virtual address space.  Requests fall into
565 * one of five catagories:
566 *
567 *	B_PHYS|B_UAREA:	User u-area swap.
568 *			Address is relative to start of u-area (p_addr).
569 *	B_PHYS|B_PAGET:	User page table swap.
570 *			Address is a kernel VA in usrpt (Usrptmap).
571 *	B_PHYS|B_DIRTY:	Dirty page push.
572 *			Address is a VA in proc2's address space.
573 *	B_PHYS|B_PGIN:	Kernel pagein of user pages.
574 *			Address is VA in user's address space.
575 *	B_PHYS:		User "raw" IO request.
576 *			Address is VA in user's address space.
577 *
578 * All requests are (re)mapped into kernel VA space via the useriomap
579 * (a name with only slightly more meaning than "kernelmap")
580 */
581void
582vmapbuf(bp)
583	register struct buf *bp;
584{
585	register int npf;
586	register caddr_t addr;
587	register long flags = bp->b_flags;
588	struct proc *p;
589	int off;
590	vm_offset_t kva;
591	register vm_offset_t pa;
592
593	if ((flags & B_PHYS) == 0)
594		panic("vmapbuf");
595	addr = bp->b_saveaddr = bp->b_un.b_addr;
596	off = (int)addr & PGOFSET;
597	p = bp->b_proc;
598	npf = btoc(round_page(bp->b_bcount + off));
599	kva = kmem_alloc_wait(phys_map, ctob(npf));
600	bp->b_un.b_addr = (caddr_t) (kva + off);
601	while (npf--) {
602		pa = pmap_extract(&p->p_vmspace->vm_pmap, (vm_offset_t)addr);
603		if (pa == 0)
604			panic("vmapbuf: null page frame");
605		pmap_enter(vm_map_pmap(phys_map), kva, trunc_page(pa),
606			   VM_PROT_READ|VM_PROT_WRITE, TRUE);
607		addr += PAGE_SIZE;
608		kva += PAGE_SIZE;
609	}
610}
611
612/*
613 * Free the io map PTEs associated with this IO operation.
614 * We also invalidate the TLB entries and restore the original b_addr.
615 */
616void
617vunmapbuf(bp)
618	register struct buf *bp;
619{
620	register int npf;
621	register caddr_t addr = bp->b_un.b_addr;
622	vm_offset_t kva;
623
624	if ((bp->b_flags & B_PHYS) == 0)
625		panic("vunmapbuf");
626	npf = btoc(round_page(bp->b_bcount + ((int)addr & PGOFSET)));
627	kva = (vm_offset_t)((int)addr & ~PGOFSET);
628	kmem_free_wakeup(phys_map, kva, ctob(npf));
629	bp->b_un.b_addr = bp->b_saveaddr;
630	bp->b_saveaddr = NULL;
631}
632
633/*
634 * Force reset the processor by invalidating the entire address space!
635 */
636void
637cpu_reset() {
638
639	/* force a shutdown by unmapping entire address space ! */
640	bzero((caddr_t) PTD, NBPG);
641
642	/* "good night, sweet prince .... <THUNK!>" */
643	tlbflush();
644	/* NOTREACHED */
645	while(1);
646}
647
648/*
649 * Grow the user stack to allow for 'sp'. This version grows the stack in
650 *	chunks of SGROWSIZ.
651 */
652int
653grow(p, sp)
654	struct proc *p;
655	int sp;
656{
657	unsigned int nss;
658	caddr_t v;
659	struct vmspace *vm = p->p_vmspace;
660
661	if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK)
662	    return (1);
663
664	nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE);
665
666	if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
667		return (0);
668
669	if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
670	    SGROWSIZ) < nss) {
671		int grow_amount;
672		/*
673		 * If necessary, grow the VM that the stack occupies
674		 * to allow for the rlimit. This allows us to not have
675		 * to allocate all of the VM up-front in execve (which
676		 * is expensive).
677		 * Grow the VM by the amount requested rounded up to
678		 * the nearest SGROWSIZ to provide for some hysteresis.
679		 */
680		grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
681		v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
682		    SGROWSIZ) - grow_amount;
683		/*
684		 * If there isn't enough room to extend by SGROWSIZ, then
685		 * just extend to the maximum size
686		 */
687		if (v < vm->vm_maxsaddr) {
688			v = vm->vm_maxsaddr;
689			grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
690		}
691		if (vm_allocate(&vm->vm_map, (vm_offset_t *)&v,
692		    grow_amount, FALSE) != KERN_SUCCESS) {
693			return (0);
694		}
695		vm->vm_ssize += grow_amount >> PAGE_SHIFT;
696	}
697
698	return (1);
699}
700