vm_machdep.c revision 1307
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the Systems Programming Group of the University of Utah Computer
8 * Science Department, and William Jolitz.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
39 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
40 *	$Id: vm_machdep.c,v 1.14 1994/03/23 09:15:06 davidg Exp $
41 */
42
43#include "npx.h"
44#include "param.h"
45#include "systm.h"
46#include "proc.h"
47#include "malloc.h"
48#include "buf.h"
49#include "user.h"
50
51#include "../include/cpu.h"
52
53#include "vm/vm.h"
54#include "vm/vm_kern.h"
55
56#ifndef NOBOUNCE
57
58caddr_t		bouncememory;
59vm_offset_t	bouncepa, bouncepaend;
60int		bouncepages, bpwait;
61vm_map_t	bounce_map;
62int		bmwait, bmfreeing;
63
64#define BITS_IN_UNSIGNED (8*sizeof(unsigned))
65int		bounceallocarraysize;
66unsigned	*bounceallocarray;
67int		bouncefree;
68
69#define SIXTEENMEG (4096*4096)
70#define MAXBKVA 512
71
72/* special list that can be used at interrupt time for eventual kva free */
73struct kvasfree {
74	vm_offset_t addr;
75	vm_offset_t size;
76} kvaf[MAXBKVA];
77
78int		kvasfreecnt;
79
80/*
81 * get bounce buffer pages (count physically contiguous)
82 * (only 1 inplemented now)
83 */
84vm_offset_t
85vm_bounce_page_find(count)
86	int count;
87{
88	int bit;
89	int s,i;
90
91	if (count != 1)
92		panic("vm_bounce_page_find -- no support for > 1 page yet!!!");
93
94	s = splbio();
95retry:
96	for (i = 0; i < bounceallocarraysize; i++) {
97		if (bounceallocarray[i] != 0xffffffff) {
98			if (bit = ffs(~bounceallocarray[i])) {
99				bounceallocarray[i] |= 1 << (bit - 1) ;
100				bouncefree -= count;
101				splx(s);
102				return bouncepa + (i * BITS_IN_UNSIGNED + (bit - 1)) * NBPG;
103			}
104		}
105	}
106	bpwait = 1;
107	tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0);
108	goto retry;
109}
110
111/*
112 * free count bounce buffer pages
113 */
114void
115vm_bounce_page_free(pa, count)
116	vm_offset_t pa;
117	int count;
118{
119	int allocindex;
120	int index;
121	int bit;
122
123	if (count != 1)
124		panic("vm_bounce_page_free -- no support for > 1 page yet!!!\n");
125
126	index = (pa - bouncepa) / NBPG;
127
128	if ((index < 0) || (index >= bouncepages))
129		panic("vm_bounce_page_free -- bad index\n");
130
131	allocindex = index / BITS_IN_UNSIGNED;
132	bit = index % BITS_IN_UNSIGNED;
133
134	bounceallocarray[allocindex] &= ~(1 << bit);
135
136	bouncefree += count;
137	if (bpwait) {
138		bpwait = 0;
139		wakeup((caddr_t) &bounceallocarray);
140	}
141}
142
143/*
144 * allocate count bounce buffer kva pages
145 */
146vm_offset_t
147vm_bounce_kva(count)
148	int count;
149{
150	int tofree;
151	int i;
152	int startfree;
153	vm_offset_t kva;
154	int s = splbio();
155	startfree = 0;
156more:
157	if (!bmfreeing && (tofree = kvasfreecnt)) {
158		bmfreeing = 1;
159more1:
160		for (i = startfree; i < kvasfreecnt; i++) {
161			pmap_remove(kernel_pmap,
162				kvaf[i].addr, kvaf[i].addr + kvaf[i].size);
163			kmem_free_wakeup(bounce_map, kvaf[i].addr,
164				kvaf[i].size);
165		}
166		if (kvasfreecnt != tofree) {
167			startfree = i;
168			bmfreeing = 0;
169			goto more;
170		}
171		kvasfreecnt = 0;
172		bmfreeing = 0;
173	}
174
175	if (!(kva = kmem_alloc_pageable(bounce_map, count * NBPG))) {
176		bmwait = 1;
177		tsleep((caddr_t) bounce_map, PRIBIO, "bmwait", 0);
178		goto more;
179	}
180
181	splx(s);
182
183	return kva;
184}
185
186/*
187 * init the bounce buffer system
188 */
189void
190vm_bounce_init()
191{
192	vm_offset_t minaddr, maxaddr;
193
194	if (bouncepages == 0)
195		return;
196
197	bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED;
198	bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT);
199
200	if (!bounceallocarray)
201		panic("Cannot allocate bounce resource array\n");
202
203	bzero(bounceallocarray, bounceallocarraysize * sizeof(long));
204
205	bounce_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, MAXBKVA * NBPG, FALSE);
206
207	bouncepa = pmap_kextract((vm_offset_t) bouncememory);
208	bouncepaend = bouncepa + bouncepages * NBPG;
209	bouncefree = bouncepages;
210	kvasfreecnt = 0;
211}
212
213/*
214 * do the things necessary to the struct buf to implement
215 * bounce buffers...  inserted before the disk sort
216 */
217void
218vm_bounce_alloc(bp)
219	struct buf *bp;
220{
221	int countvmpg;
222	vm_offset_t vastart, vaend;
223	vm_offset_t vapstart, vapend;
224	vm_offset_t va, kva;
225	vm_offset_t pa;
226	int dobounceflag = 0;
227	int bounceindex;
228	int i;
229	int s;
230
231	if (bouncepages == 0)
232		return;
233
234	vastart = (vm_offset_t) bp->b_un.b_addr;
235	vaend = (vm_offset_t) bp->b_un.b_addr + bp->b_bcount;
236
237	vapstart = i386_trunc_page(vastart);
238	vapend = i386_round_page(vaend);
239	countvmpg = (vapend - vapstart) / NBPG;
240
241/*
242 * if any page is above 16MB, then go into bounce-buffer mode
243 */
244	va = vapstart;
245	for (i = 0; i < countvmpg; i++) {
246		pa = pmap_kextract(va);
247		if (pa >= SIXTEENMEG)
248			++dobounceflag;
249		va += NBPG;
250	}
251	if (dobounceflag == 0)
252		return;
253
254	if (bouncepages < dobounceflag)
255		panic("Not enough bounce buffers!!!");
256
257/*
258 * allocate a replacement kva for b_addr
259 */
260	kva = vm_bounce_kva(countvmpg);
261	va = vapstart;
262	for (i = 0; i < countvmpg; i++) {
263		pa = pmap_kextract(va);
264		if (pa >= SIXTEENMEG) {
265			/*
266			 * allocate a replacement page
267			 */
268			vm_offset_t bpa = vm_bounce_page_find(1);
269			pmap_enter(kernel_pmap, kva + (NBPG * i), bpa, VM_PROT_DEFAULT,
270				TRUE);
271			/*
272			 * if we are writing, the copy the data into the page
273			 */
274			if ((bp->b_flags & B_READ) == 0)
275				bcopy((caddr_t) va, (caddr_t) kva + (NBPG * i), NBPG);
276		} else {
277			/*
278			 * use original page
279			 */
280			pmap_enter(kernel_pmap, kva + (NBPG * i), pa, VM_PROT_DEFAULT,
281				TRUE);
282		}
283		va += NBPG;
284	}
285
286/*
287 * flag the buffer as being bounced
288 */
289	bp->b_flags |= B_BOUNCE;
290/*
291 * save the original buffer kva
292 */
293	bp->b_savekva = bp->b_un.b_addr;
294/*
295 * put our new kva into the buffer (offset by original offset)
296 */
297	bp->b_un.b_addr = (caddr_t) (((vm_offset_t) kva) |
298				((vm_offset_t) bp->b_savekva & (NBPG - 1)));
299	return;
300}
301
302/*
303 * hook into biodone to free bounce buffer
304 */
305void
306vm_bounce_free(bp)
307	struct buf *bp;
308{
309	int i;
310	vm_offset_t origkva, bouncekva;
311	vm_offset_t vastart, vaend;
312	vm_offset_t vapstart, vapend;
313	int countbounce = 0;
314	vm_offset_t firstbouncepa = 0;
315	int firstbounceindex;
316	int countvmpg;
317	vm_offset_t bcount;
318	int s;
319
320/*
321 * if this isn't a bounced buffer, then just return
322 */
323	if ((bp->b_flags & B_BOUNCE) == 0)
324		return;
325
326	origkva = (vm_offset_t) bp->b_savekva;
327	bouncekva = (vm_offset_t) bp->b_un.b_addr;
328
329	vastart = bouncekva;
330	vaend = bouncekva + bp->b_bcount;
331	bcount = bp->b_bcount;
332
333	vapstart = i386_trunc_page(vastart);
334	vapend = i386_round_page(vaend);
335
336	countvmpg = (vapend - vapstart) / NBPG;
337
338/*
339 * check every page in the kva space for b_addr
340 */
341	for (i = 0; i < countvmpg; i++) {
342		vm_offset_t mybouncepa;
343		vm_offset_t copycount;
344
345		copycount = i386_round_page(bouncekva + 1) - bouncekva;
346		mybouncepa = pmap_kextract(i386_trunc_page(bouncekva));
347
348/*
349 * if this is a bounced pa, then process as one
350 */
351		if ((mybouncepa >= bouncepa) && (mybouncepa < bouncepaend)) {
352			if (copycount > bcount)
353				copycount = bcount;
354/*
355 * if this is a read, then copy from bounce buffer into original buffer
356 */
357			if (bp->b_flags & B_READ)
358				bcopy((caddr_t) bouncekva, (caddr_t) origkva, copycount);
359/*
360 * free the bounce allocation
361 */
362			vm_bounce_page_free(i386_trunc_page(mybouncepa), 1);
363		}
364
365		origkva += copycount;
366		bouncekva += copycount;
367		bcount -= copycount;
368	}
369
370/*
371 * add the old kva into the "to free" list
372 */
373	bouncekva = i386_trunc_page((vm_offset_t) bp->b_un.b_addr);
374	kvaf[kvasfreecnt].addr = bouncekva;
375	kvaf[kvasfreecnt++].size = countvmpg * NBPG;
376	if (bmwait) {
377		/*
378		 * if anyone is waiting on the bounce-map, then wakeup
379		 */
380		wakeup((caddr_t) bounce_map);
381		bmwait = 0;
382	}
383
384	bp->b_un.b_addr = bp->b_savekva;
385	bp->b_savekva = 0;
386	bp->b_flags &= ~B_BOUNCE;
387
388	return;
389}
390
391#endif /* NOBOUNCE */
392
393/*
394 * Finish a fork operation, with process p2 nearly set up.
395 * Copy and update the kernel stack and pcb, making the child
396 * ready to run, and marking it so that it can return differently
397 * than the parent.  Returns 1 in the child process, 0 in the parent.
398 * We currently double-map the user area so that the stack is at the same
399 * address in each process; in the future we will probably relocate
400 * the frame pointers on the stack after copying.
401 */
402int
403cpu_fork(p1, p2)
404	register struct proc *p1, *p2;
405{
406	register struct user *up = p2->p_addr;
407	int foo, offset, addr, i;
408	extern char kstack[];
409	extern int mvesp();
410
411	/*
412	 * Copy pcb and stack from proc p1 to p2.
413	 * We do this as cheaply as possible, copying only the active
414	 * part of the stack.  The stack and pcb need to agree;
415	 * this is tricky, as the final pcb is constructed by savectx,
416	 * but its frame isn't yet on the stack when the stack is copied.
417	 * swtch compensates for this when the child eventually runs.
418	 * This should be done differently, with a single call
419	 * that copies and updates the pcb+stack,
420	 * replacing the bcopy and savectx.
421	 */
422	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
423	offset = mvesp() - (int)kstack;
424	bcopy((caddr_t)kstack + offset, (caddr_t)p2->p_addr + offset,
425	    (unsigned) ctob(UPAGES) - offset);
426	p2->p_regs = p1->p_regs;
427
428	/*
429	 * Wire top of address space of child to it's kstack.
430	 * First, fault in a page of pte's to map it.
431	 */
432#if 0
433        addr = trunc_page((u_int)vtopte(kstack));
434	vm_map_pageable(&p2->p_vmspace->vm_map, addr, addr+NBPG, FALSE);
435	for (i=0; i < UPAGES; i++)
436		pmap_enter(&p2->p_vmspace->vm_pmap, kstack+i*NBPG,
437			   pmap_extract(kernel_pmap, ((int)p2->p_addr)+i*NBPG),
438			   /*
439			    * The user area has to be mapped writable because
440			    * it contains the kernel stack (when CR0_WP is on
441			    * on a 486 there is no user-read/kernel-write
442			    * mode).  It is protected from user mode access
443			    * by the segment limits.
444			    */
445			   VM_PROT_READ|VM_PROT_WRITE, TRUE);
446#endif
447	pmap_activate(&p2->p_vmspace->vm_pmap, &up->u_pcb);
448
449	/*
450	 *
451	 * Arrange for a non-local goto when the new process
452	 * is started, to resume here, returning nonzero from setjmp.
453	 */
454	if (savectx(up, 1)) {
455		/*
456		 * Return 1 in child.
457		 */
458		return (1);
459	}
460	return (0);
461}
462
463#ifdef notyet
464/*
465 * cpu_exit is called as the last action during exit.
466 *
467 * We change to an inactive address space and a "safe" stack,
468 * passing thru an argument to the new stack. Now, safely isolated
469 * from the resources we're shedding, we release the address space
470 * and any remaining machine-dependent resources, including the
471 * memory for the user structure and kernel stack.
472 *
473 * Next, we assign a dummy context to be written over by swtch,
474 * calling it to send this process off to oblivion.
475 * [The nullpcb allows us to minimize cost in swtch() by not having
476 * a special case].
477 */
478struct proc *swtch_to_inactive();
479volatile void
480cpu_exit(p)
481	register struct proc *p;
482{
483	static struct pcb nullpcb;	/* pcb to overwrite on last swtch */
484
485#if NNPX > 0
486	npxexit(p);
487#endif	/* NNPX */
488
489	/* move to inactive space and stack, passing arg accross */
490	p = swtch_to_inactive(p);
491
492	/* drop per-process resources */
493	vmspace_free(p->p_vmspace);
494	kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
495
496	p->p_addr = (struct user *) &nullpcb;
497	splclock();
498	swtch();
499	/* NOTREACHED */
500}
501#else
502void
503cpu_exit(p)
504	register struct proc *p;
505{
506
507#if NNPX > 0
508	npxexit(p);
509#endif	/* NNPX */
510	splclock();
511	curproc = 0;
512	swtch();
513	/*
514	 * This is to shutup the compiler, and if swtch() failed I suppose
515	 * this would be a good thing.  This keeps gcc happy because panic
516	 * is a volatile void function as well.
517	 */
518	panic("cpu_exit");
519}
520
521void
522cpu_wait(p) struct proc *p; {
523/*	extern vm_map_t upages_map; */
524	extern char kstack[];
525
526	/* drop per-process resources */
527 	pmap_remove(vm_map_pmap(kernel_map), (vm_offset_t) p->p_addr,
528		((vm_offset_t) p->p_addr) + ctob(UPAGES));
529	kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
530	vmspace_free(p->p_vmspace);
531}
532#endif
533
534/*
535 * Set a red zone in the kernel stack after the u. area.
536 */
537void
538setredzone(pte, vaddr)
539	u_short *pte;
540	caddr_t vaddr;
541{
542/* eventually do this by setting up an expand-down stack segment
543   for ss0: selector, allowing stack access down to top of u.
544   this means though that protection violations need to be handled
545   thru a double fault exception that must do an integral task
546   switch to a known good context, within which a dump can be
547   taken. a sensible scheme might be to save the initial context
548   used by sched (that has physical memory mapped 1:1 at bottom)
549   and take the dump while still in mapped mode */
550}
551
552/*
553 * Convert kernel VA to physical address
554 */
555u_long
556kvtop(void *addr)
557{
558	vm_offset_t va;
559
560	va = pmap_kextract((vm_offset_t)addr);
561	if (va == 0)
562		panic("kvtop: zero page frame");
563	return((int)va);
564}
565
566extern vm_map_t phys_map;
567
568/*
569 * Map an IO request into kernel virtual address space.  Requests fall into
570 * one of five catagories:
571 *
572 *	B_PHYS|B_UAREA:	User u-area swap.
573 *			Address is relative to start of u-area (p_addr).
574 *	B_PHYS|B_PAGET:	User page table swap.
575 *			Address is a kernel VA in usrpt (Usrptmap).
576 *	B_PHYS|B_DIRTY:	Dirty page push.
577 *			Address is a VA in proc2's address space.
578 *	B_PHYS|B_PGIN:	Kernel pagein of user pages.
579 *			Address is VA in user's address space.
580 *	B_PHYS:		User "raw" IO request.
581 *			Address is VA in user's address space.
582 *
583 * All requests are (re)mapped into kernel VA space via the useriomap
584 * (a name with only slightly more meaning than "kernelmap")
585 */
586void
587vmapbuf(bp)
588	register struct buf *bp;
589{
590	register int npf;
591	register caddr_t addr;
592	register long flags = bp->b_flags;
593	struct proc *p;
594	int off;
595	vm_offset_t kva;
596	register vm_offset_t pa;
597
598	if ((flags & B_PHYS) == 0)
599		panic("vmapbuf");
600	addr = bp->b_saveaddr = bp->b_un.b_addr;
601	off = (int)addr & PGOFSET;
602	p = bp->b_proc;
603	npf = btoc(round_page(bp->b_bcount + off));
604	kva = kmem_alloc_wait(phys_map, ctob(npf));
605	bp->b_un.b_addr = (caddr_t) (kva + off);
606	while (npf--) {
607		pa = pmap_extract(&p->p_vmspace->vm_pmap, (vm_offset_t)addr);
608		if (pa == 0)
609			panic("vmapbuf: null page frame");
610		pmap_enter(vm_map_pmap(phys_map), kva, trunc_page(pa),
611			   VM_PROT_READ|VM_PROT_WRITE, TRUE);
612		addr += PAGE_SIZE;
613		kva += PAGE_SIZE;
614	}
615}
616
617/*
618 * Free the io map PTEs associated with this IO operation.
619 * We also invalidate the TLB entries and restore the original b_addr.
620 */
621void
622vunmapbuf(bp)
623	register struct buf *bp;
624{
625	register int npf;
626	register caddr_t addr = bp->b_un.b_addr;
627	vm_offset_t kva;
628
629	if ((bp->b_flags & B_PHYS) == 0)
630		panic("vunmapbuf");
631	npf = btoc(round_page(bp->b_bcount + ((int)addr & PGOFSET)));
632	kva = (vm_offset_t)((int)addr & ~PGOFSET);
633	kmem_free_wakeup(phys_map, kva, ctob(npf));
634	bp->b_un.b_addr = bp->b_saveaddr;
635	bp->b_saveaddr = NULL;
636}
637
638/*
639 * Force reset the processor by invalidating the entire address space!
640 */
641void
642cpu_reset() {
643
644	/* force a shutdown by unmapping entire address space ! */
645	bzero((caddr_t) PTD, NBPG);
646
647	/* "good night, sweet prince .... <THUNK!>" */
648	tlbflush();
649	/* NOTREACHED */
650	while(1);
651}
652
653/*
654 * Grow the user stack to allow for 'sp'. This version grows the stack in
655 *	chunks of SGROWSIZ.
656 */
657int
658grow(p, sp)
659	struct proc *p;
660	int sp;
661{
662	unsigned int nss;
663	caddr_t v;
664	struct vmspace *vm = p->p_vmspace;
665
666	if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK)
667	    return (1);
668
669	nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE);
670
671	if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
672		return (0);
673
674	if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
675	    SGROWSIZ) < nss) {
676		int grow_amount;
677		/*
678		 * If necessary, grow the VM that the stack occupies
679		 * to allow for the rlimit. This allows us to not have
680		 * to allocate all of the VM up-front in execve (which
681		 * is expensive).
682		 * Grow the VM by the amount requested rounded up to
683		 * the nearest SGROWSIZ to provide for some hysteresis.
684		 */
685		grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
686		v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
687		    SGROWSIZ) - grow_amount;
688		/*
689		 * If there isn't enough room to extend by SGROWSIZ, then
690		 * just extend to the maximum size
691		 */
692		if (v < vm->vm_maxsaddr) {
693			v = vm->vm_maxsaddr;
694			grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
695		}
696		if (vm_allocate(&vm->vm_map, (vm_offset_t *)&v,
697		    grow_amount, FALSE) != KERN_SUCCESS) {
698			return (0);
699		}
700		vm->vm_ssize += grow_amount >> PAGE_SHIFT;
701	}
702
703	return (1);
704}
705