vm_machdep.c revision 1312
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the Systems Programming Group of the University of Utah Computer
8 * Science Department, and William Jolitz.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
39 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
40 *	$Id: vm_machdep.c,v 1.15 1994/03/24 23:12:35 davidg Exp $
41 */
42
43#include "npx.h"
44#include "param.h"
45#include "systm.h"
46#include "proc.h"
47#include "malloc.h"
48#include "buf.h"
49#include "user.h"
50
51#include "../include/cpu.h"
52
53#include "vm/vm.h"
54#include "vm/vm_kern.h"
55
56#ifndef NOBOUNCE
57
58caddr_t		bouncememory;
59vm_offset_t	bouncepa, bouncepaend;
60int		bouncepages, bpwait;
61vm_map_t	bounce_map;
62int		bmwait, bmfreeing;
63
64#define BITS_IN_UNSIGNED (8*sizeof(unsigned))
65int		bounceallocarraysize;
66unsigned	*bounceallocarray;
67int		bouncefree;
68
69#define SIXTEENMEG (4096*4096)
70#define MAXBKVA 512
71
72/* special list that can be used at interrupt time for eventual kva free */
73struct kvasfree {
74	vm_offset_t addr;
75	vm_offset_t size;
76} kvaf[MAXBKVA];
77
78int		kvasfreecnt;
79
80/*
81 * get bounce buffer pages (count physically contiguous)
82 * (only 1 inplemented now)
83 */
84vm_offset_t
85vm_bounce_page_find(count)
86	int count;
87{
88	int bit;
89	int s,i;
90
91	if (count != 1)
92		panic("vm_bounce_page_find -- no support for > 1 page yet!!!");
93
94	s = splbio();
95retry:
96	for (i = 0; i < bounceallocarraysize; i++) {
97		if (bounceallocarray[i] != 0xffffffff) {
98			if (bit = ffs(~bounceallocarray[i])) {
99				bounceallocarray[i] |= 1 << (bit - 1) ;
100				bouncefree -= count;
101				splx(s);
102				return bouncepa + (i * BITS_IN_UNSIGNED + (bit - 1)) * NBPG;
103			}
104		}
105	}
106	bpwait = 1;
107	tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0);
108	goto retry;
109}
110
111/*
112 * free count bounce buffer pages
113 */
114void
115vm_bounce_page_free(pa, count)
116	vm_offset_t pa;
117	int count;
118{
119	int allocindex;
120	int index;
121	int bit;
122
123	if (count != 1)
124		panic("vm_bounce_page_free -- no support for > 1 page yet!!!\n");
125
126	index = (pa - bouncepa) / NBPG;
127
128	if ((index < 0) || (index >= bouncepages))
129		panic("vm_bounce_page_free -- bad index\n");
130
131	allocindex = index / BITS_IN_UNSIGNED;
132	bit = index % BITS_IN_UNSIGNED;
133
134	bounceallocarray[allocindex] &= ~(1 << bit);
135
136	bouncefree += count;
137	if (bpwait) {
138		bpwait = 0;
139		wakeup((caddr_t) &bounceallocarray);
140	}
141}
142
143/*
144 * allocate count bounce buffer kva pages
145 */
146vm_offset_t
147vm_bounce_kva(count)
148	int count;
149{
150	int tofree;
151	int i;
152	int startfree;
153	vm_offset_t kva = 0;
154	int s = splbio();
155	int size = count*NBPG;
156	startfree = 0;
157more:
158	if (!bmfreeing && (tofree = kvasfreecnt)) {
159		bmfreeing = 1;
160more1:
161		for (i = startfree; i < kvasfreecnt; i++) {
162			/*
163			 * if we have a kva of the right size, no sense
164			 * in freeing/reallocating...
165			 * might affect fragmentation short term, but
166			 * as long as the amount of bounce_map is
167			 * significantly more than the maximum transfer
168			 * size, I don't think that it is a problem.
169			 */
170			pmap_remove(kernel_pmap,
171				kvaf[i].addr, kvaf[i].addr + kvaf[i].size);
172			if( !kva && kvaf[i].size == size) {
173				kva = kvaf[i].addr;
174			} else {
175				kmem_free_wakeup(bounce_map, kvaf[i].addr,
176					kvaf[i].size);
177			}
178		}
179		if (kvasfreecnt != tofree) {
180			startfree = i;
181			bmfreeing = 0;
182			goto more;
183		}
184		kvasfreecnt = 0;
185		bmfreeing = 0;
186	}
187
188	if (!kva && !(kva = kmem_alloc_pageable(bounce_map, size))) {
189		bmwait = 1;
190		tsleep((caddr_t) bounce_map, PRIBIO, "bmwait", 0);
191		goto more;
192	}
193	splx(s);
194
195	return kva;
196}
197
198/*
199 * init the bounce buffer system
200 */
201void
202vm_bounce_init()
203{
204	vm_offset_t minaddr, maxaddr;
205
206	if (bouncepages == 0)
207		return;
208
209	bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED;
210	bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT);
211
212	if (!bounceallocarray)
213		panic("Cannot allocate bounce resource array\n");
214
215	bzero(bounceallocarray, bounceallocarraysize * sizeof(long));
216
217	bounce_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, MAXBKVA * NBPG, FALSE);
218
219	bouncepa = pmap_kextract((vm_offset_t) bouncememory);
220	bouncepaend = bouncepa + bouncepages * NBPG;
221	bouncefree = bouncepages;
222	kvasfreecnt = 0;
223}
224
225/*
226 * do the things necessary to the struct buf to implement
227 * bounce buffers...  inserted before the disk sort
228 */
229void
230vm_bounce_alloc(bp)
231	struct buf *bp;
232{
233	int countvmpg;
234	vm_offset_t vastart, vaend;
235	vm_offset_t vapstart, vapend;
236	vm_offset_t va, kva;
237	vm_offset_t pa;
238	int dobounceflag = 0;
239	int bounceindex;
240	int i;
241	int s;
242
243	if (bouncepages == 0)
244		return;
245
246	vastart = (vm_offset_t) bp->b_un.b_addr;
247	vaend = (vm_offset_t) bp->b_un.b_addr + bp->b_bcount;
248
249	vapstart = i386_trunc_page(vastart);
250	vapend = i386_round_page(vaend);
251	countvmpg = (vapend - vapstart) / NBPG;
252
253/*
254 * if any page is above 16MB, then go into bounce-buffer mode
255 */
256	va = vapstart;
257	for (i = 0; i < countvmpg; i++) {
258		pa = pmap_kextract(va);
259		if (pa >= SIXTEENMEG)
260			++dobounceflag;
261		va += NBPG;
262	}
263	if (dobounceflag == 0)
264		return;
265
266	if (bouncepages < dobounceflag)
267		panic("Not enough bounce buffers!!!");
268
269/*
270 * allocate a replacement kva for b_addr
271 */
272	kva = vm_bounce_kva(countvmpg);
273	va = vapstart;
274	for (i = 0; i < countvmpg; i++) {
275		pa = pmap_kextract(va);
276		if (pa >= SIXTEENMEG) {
277			/*
278			 * allocate a replacement page
279			 */
280			vm_offset_t bpa = vm_bounce_page_find(1);
281			pmap_kenter(kva + (NBPG * i), bpa);
282			/*
283			 * if we are writing, the copy the data into the page
284			 */
285			if ((bp->b_flags & B_READ) == 0)
286				bcopy((caddr_t) va, (caddr_t) kva + (NBPG * i), NBPG);
287		} else {
288			/*
289			 * use original page
290			 */
291			pmap_kenter(kva + (NBPG * i), pa);
292		}
293		va += NBPG;
294	}
295	pmap_update();
296
297/*
298 * flag the buffer as being bounced
299 */
300	bp->b_flags |= B_BOUNCE;
301/*
302 * save the original buffer kva
303 */
304	bp->b_savekva = bp->b_un.b_addr;
305/*
306 * put our new kva into the buffer (offset by original offset)
307 */
308	bp->b_un.b_addr = (caddr_t) (((vm_offset_t) kva) |
309				((vm_offset_t) bp->b_savekva & (NBPG - 1)));
310	return;
311}
312
313/*
314 * hook into biodone to free bounce buffer
315 */
316void
317vm_bounce_free(bp)
318	struct buf *bp;
319{
320	int i;
321	vm_offset_t origkva, bouncekva;
322	vm_offset_t vastart, vaend;
323	vm_offset_t vapstart, vapend;
324	int countbounce = 0;
325	vm_offset_t firstbouncepa = 0;
326	int firstbounceindex;
327	int countvmpg;
328	vm_offset_t bcount;
329	int s;
330
331/*
332 * if this isn't a bounced buffer, then just return
333 */
334	if ((bp->b_flags & B_BOUNCE) == 0)
335		return;
336
337	origkva = (vm_offset_t) bp->b_savekva;
338	bouncekva = (vm_offset_t) bp->b_un.b_addr;
339
340	vastart = bouncekva;
341	vaend = bouncekva + bp->b_bcount;
342	bcount = bp->b_bcount;
343
344	vapstart = i386_trunc_page(vastart);
345	vapend = i386_round_page(vaend);
346
347	countvmpg = (vapend - vapstart) / NBPG;
348
349/*
350 * check every page in the kva space for b_addr
351 */
352	for (i = 0; i < countvmpg; i++) {
353		vm_offset_t mybouncepa;
354		vm_offset_t copycount;
355
356		copycount = i386_round_page(bouncekva + 1) - bouncekva;
357		mybouncepa = pmap_kextract(i386_trunc_page(bouncekva));
358
359/*
360 * if this is a bounced pa, then process as one
361 */
362		if ((mybouncepa >= bouncepa) && (mybouncepa < bouncepaend)) {
363			if (copycount > bcount)
364				copycount = bcount;
365/*
366 * if this is a read, then copy from bounce buffer into original buffer
367 */
368			if (bp->b_flags & B_READ)
369				bcopy((caddr_t) bouncekva, (caddr_t) origkva, copycount);
370/*
371 * free the bounce allocation
372 */
373			vm_bounce_page_free(i386_trunc_page(mybouncepa), 1);
374		}
375
376		origkva += copycount;
377		bouncekva += copycount;
378		bcount -= copycount;
379	}
380
381/*
382 * add the old kva into the "to free" list
383 */
384	bouncekva = i386_trunc_page((vm_offset_t) bp->b_un.b_addr);
385	kvaf[kvasfreecnt].addr = bouncekva;
386	kvaf[kvasfreecnt++].size = countvmpg * NBPG;
387	if (bmwait) {
388		/*
389		 * if anyone is waiting on the bounce-map, then wakeup
390		 */
391		wakeup((caddr_t) bounce_map);
392		bmwait = 0;
393	}
394
395	bp->b_un.b_addr = bp->b_savekva;
396	bp->b_savekva = 0;
397	bp->b_flags &= ~B_BOUNCE;
398
399	return;
400}
401
402#endif /* NOBOUNCE */
403
404/*
405 * Finish a fork operation, with process p2 nearly set up.
406 * Copy and update the kernel stack and pcb, making the child
407 * ready to run, and marking it so that it can return differently
408 * than the parent.  Returns 1 in the child process, 0 in the parent.
409 * We currently double-map the user area so that the stack is at the same
410 * address in each process; in the future we will probably relocate
411 * the frame pointers on the stack after copying.
412 */
413int
414cpu_fork(p1, p2)
415	register struct proc *p1, *p2;
416{
417	register struct user *up = p2->p_addr;
418	int foo, offset, addr, i;
419	extern char kstack[];
420	extern int mvesp();
421
422	/*
423	 * Copy pcb and stack from proc p1 to p2.
424	 * We do this as cheaply as possible, copying only the active
425	 * part of the stack.  The stack and pcb need to agree;
426	 * this is tricky, as the final pcb is constructed by savectx,
427	 * but its frame isn't yet on the stack when the stack is copied.
428	 * swtch compensates for this when the child eventually runs.
429	 * This should be done differently, with a single call
430	 * that copies and updates the pcb+stack,
431	 * replacing the bcopy and savectx.
432	 */
433	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
434	offset = mvesp() - (int)kstack;
435	bcopy((caddr_t)kstack + offset, (caddr_t)p2->p_addr + offset,
436	    (unsigned) ctob(UPAGES) - offset);
437	p2->p_regs = p1->p_regs;
438
439	/*
440	 * Wire top of address space of child to it's kstack.
441	 * First, fault in a page of pte's to map it.
442	 */
443#if 0
444        addr = trunc_page((u_int)vtopte(kstack));
445	vm_map_pageable(&p2->p_vmspace->vm_map, addr, addr+NBPG, FALSE);
446	for (i=0; i < UPAGES; i++)
447		pmap_enter(&p2->p_vmspace->vm_pmap, kstack+i*NBPG,
448			   pmap_extract(kernel_pmap, ((int)p2->p_addr)+i*NBPG),
449			   /*
450			    * The user area has to be mapped writable because
451			    * it contains the kernel stack (when CR0_WP is on
452			    * on a 486 there is no user-read/kernel-write
453			    * mode).  It is protected from user mode access
454			    * by the segment limits.
455			    */
456			   VM_PROT_READ|VM_PROT_WRITE, TRUE);
457#endif
458	pmap_activate(&p2->p_vmspace->vm_pmap, &up->u_pcb);
459
460	/*
461	 *
462	 * Arrange for a non-local goto when the new process
463	 * is started, to resume here, returning nonzero from setjmp.
464	 */
465	if (savectx(up, 1)) {
466		/*
467		 * Return 1 in child.
468		 */
469		return (1);
470	}
471	return (0);
472}
473
474#ifdef notyet
475/*
476 * cpu_exit is called as the last action during exit.
477 *
478 * We change to an inactive address space and a "safe" stack,
479 * passing thru an argument to the new stack. Now, safely isolated
480 * from the resources we're shedding, we release the address space
481 * and any remaining machine-dependent resources, including the
482 * memory for the user structure and kernel stack.
483 *
484 * Next, we assign a dummy context to be written over by swtch,
485 * calling it to send this process off to oblivion.
486 * [The nullpcb allows us to minimize cost in swtch() by not having
487 * a special case].
488 */
489struct proc *swtch_to_inactive();
490volatile void
491cpu_exit(p)
492	register struct proc *p;
493{
494	static struct pcb nullpcb;	/* pcb to overwrite on last swtch */
495
496#if NNPX > 0
497	npxexit(p);
498#endif	/* NNPX */
499
500	/* move to inactive space and stack, passing arg accross */
501	p = swtch_to_inactive(p);
502
503	/* drop per-process resources */
504	vmspace_free(p->p_vmspace);
505	kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
506
507	p->p_addr = (struct user *) &nullpcb;
508	splclock();
509	swtch();
510	/* NOTREACHED */
511}
512#else
513void
514cpu_exit(p)
515	register struct proc *p;
516{
517
518#if NNPX > 0
519	npxexit(p);
520#endif	/* NNPX */
521	splclock();
522	curproc = 0;
523	swtch();
524	/*
525	 * This is to shutup the compiler, and if swtch() failed I suppose
526	 * this would be a good thing.  This keeps gcc happy because panic
527	 * is a volatile void function as well.
528	 */
529	panic("cpu_exit");
530}
531
532void
533cpu_wait(p) struct proc *p; {
534/*	extern vm_map_t upages_map; */
535	extern char kstack[];
536
537	/* drop per-process resources */
538 	pmap_remove(vm_map_pmap(kernel_map), (vm_offset_t) p->p_addr,
539		((vm_offset_t) p->p_addr) + ctob(UPAGES));
540	kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
541	vmspace_free(p->p_vmspace);
542}
543#endif
544
545/*
546 * Set a red zone in the kernel stack after the u. area.
547 */
548void
549setredzone(pte, vaddr)
550	u_short *pte;
551	caddr_t vaddr;
552{
553/* eventually do this by setting up an expand-down stack segment
554   for ss0: selector, allowing stack access down to top of u.
555   this means though that protection violations need to be handled
556   thru a double fault exception that must do an integral task
557   switch to a known good context, within which a dump can be
558   taken. a sensible scheme might be to save the initial context
559   used by sched (that has physical memory mapped 1:1 at bottom)
560   and take the dump while still in mapped mode */
561}
562
563/*
564 * Convert kernel VA to physical address
565 */
566u_long
567kvtop(void *addr)
568{
569	vm_offset_t va;
570
571	va = pmap_kextract((vm_offset_t)addr);
572	if (va == 0)
573		panic("kvtop: zero page frame");
574	return((int)va);
575}
576
577extern vm_map_t phys_map;
578
579/*
580 * Map an IO request into kernel virtual address space.  Requests fall into
581 * one of five catagories:
582 *
583 *	B_PHYS|B_UAREA:	User u-area swap.
584 *			Address is relative to start of u-area (p_addr).
585 *	B_PHYS|B_PAGET:	User page table swap.
586 *			Address is a kernel VA in usrpt (Usrptmap).
587 *	B_PHYS|B_DIRTY:	Dirty page push.
588 *			Address is a VA in proc2's address space.
589 *	B_PHYS|B_PGIN:	Kernel pagein of user pages.
590 *			Address is VA in user's address space.
591 *	B_PHYS:		User "raw" IO request.
592 *			Address is VA in user's address space.
593 *
594 * All requests are (re)mapped into kernel VA space via the useriomap
595 * (a name with only slightly more meaning than "kernelmap")
596 */
597void
598vmapbuf(bp)
599	register struct buf *bp;
600{
601	register int npf;
602	register caddr_t addr;
603	register long flags = bp->b_flags;
604	struct proc *p;
605	int off;
606	vm_offset_t kva;
607	register vm_offset_t pa;
608
609	if ((flags & B_PHYS) == 0)
610		panic("vmapbuf");
611	addr = bp->b_saveaddr = bp->b_un.b_addr;
612	off = (int)addr & PGOFSET;
613	p = bp->b_proc;
614	npf = btoc(round_page(bp->b_bcount + off));
615	kva = kmem_alloc_wait(phys_map, ctob(npf));
616	bp->b_un.b_addr = (caddr_t) (kva + off);
617	while (npf--) {
618		pa = pmap_extract(&p->p_vmspace->vm_pmap, (vm_offset_t)addr);
619		if (pa == 0)
620			panic("vmapbuf: null page frame");
621		pmap_kenter(kva, trunc_page(pa));
622		addr += PAGE_SIZE;
623		kva += PAGE_SIZE;
624	}
625	pmap_update();
626}
627
628/*
629 * Free the io map PTEs associated with this IO operation.
630 * We also invalidate the TLB entries and restore the original b_addr.
631 */
632void
633vunmapbuf(bp)
634	register struct buf *bp;
635{
636	register int npf;
637	register caddr_t addr = bp->b_un.b_addr;
638	vm_offset_t kva;
639
640	if ((bp->b_flags & B_PHYS) == 0)
641		panic("vunmapbuf");
642	npf = btoc(round_page(bp->b_bcount + ((int)addr & PGOFSET)));
643	kva = (vm_offset_t)((int)addr & ~PGOFSET);
644	kmem_free_wakeup(phys_map, kva, ctob(npf));
645	bp->b_un.b_addr = bp->b_saveaddr;
646	bp->b_saveaddr = NULL;
647}
648
649/*
650 * Force reset the processor by invalidating the entire address space!
651 */
652void
653cpu_reset() {
654
655	/* force a shutdown by unmapping entire address space ! */
656	bzero((caddr_t) PTD, NBPG);
657
658	/* "good night, sweet prince .... <THUNK!>" */
659	tlbflush();
660	/* NOTREACHED */
661	while(1);
662}
663
664/*
665 * Grow the user stack to allow for 'sp'. This version grows the stack in
666 *	chunks of SGROWSIZ.
667 */
668int
669grow(p, sp)
670	struct proc *p;
671	int sp;
672{
673	unsigned int nss;
674	caddr_t v;
675	struct vmspace *vm = p->p_vmspace;
676
677	if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK)
678	    return (1);
679
680	nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE);
681
682	if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
683		return (0);
684
685	if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
686	    SGROWSIZ) < nss) {
687		int grow_amount;
688		/*
689		 * If necessary, grow the VM that the stack occupies
690		 * to allow for the rlimit. This allows us to not have
691		 * to allocate all of the VM up-front in execve (which
692		 * is expensive).
693		 * Grow the VM by the amount requested rounded up to
694		 * the nearest SGROWSIZ to provide for some hysteresis.
695		 */
696		grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
697		v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
698		    SGROWSIZ) - grow_amount;
699		/*
700		 * If there isn't enough room to extend by SGROWSIZ, then
701		 * just extend to the maximum size
702		 */
703		if (v < vm->vm_maxsaddr) {
704			v = vm->vm_maxsaddr;
705			grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
706		}
707		if (vm_allocate(&vm->vm_map, (vm_offset_t *)&v,
708		    grow_amount, FALSE) != KERN_SUCCESS) {
709			return (0);
710		}
711		vm->vm_ssize += grow_amount >> PAGE_SHIFT;
712	}
713
714	return (1);
715}
716