vm_machdep.c revision 199135
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * Copyright (c) 2001 Jake Burkholder.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * the Systems Programming Group of the University of Utah Computer
10 * Science Department, and William Jolitz.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 *    must display the following acknowledgement:
22 *	This product includes software developed by the University of
23 *	California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 *    may be used to endorse or promote products derived from this software
26 *    without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
41 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
42 *	from: FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.167 2001/07/12
43 */
44
45#include <sys/cdefs.h>
46__FBSDID("$FreeBSD: head/sys/sparc64/sparc64/vm_machdep.c 199135 2009-11-10 11:43:07Z kib $");
47
48#include "opt_pmap.h"
49
50#include <sys/param.h>
51#include <sys/systm.h>
52#include <sys/bio.h>
53#include <sys/buf.h>
54#include <sys/kernel.h>
55#include <sys/linker_set.h>
56#include <sys/malloc.h>
57#include <sys/mbuf.h>
58#include <sys/mutex.h>
59#include <sys/proc.h>
60#include <sys/sysent.h>
61#include <sys/sf_buf.h>
62#include <sys/sched.h>
63#include <sys/sysctl.h>
64#include <sys/unistd.h>
65#include <sys/vmmeter.h>
66
67#include <dev/ofw/openfirm.h>
68
69#include <vm/vm.h>
70#include <vm/vm_extern.h>
71#include <vm/pmap.h>
72#include <vm/vm_kern.h>
73#include <vm/vm_map.h>
74#include <vm/vm_page.h>
75#include <vm/vm_pageout.h>
76#include <vm/vm_param.h>
77#include <vm/uma.h>
78#include <vm/uma_int.h>
79
80#include <machine/cache.h>
81#include <machine/cpu.h>
82#include <machine/fp.h>
83#include <machine/frame.h>
84#include <machine/fsr.h>
85#include <machine/md_var.h>
86#include <machine/ofw_machdep.h>
87#include <machine/ofw_mem.h>
88#include <machine/pcb.h>
89#include <machine/tlb.h>
90#include <machine/tstate.h>
91
92#ifndef NSFBUFS
93#define	NSFBUFS		(512 + maxusers * 16)
94#endif
95
96static void	sf_buf_init(void *arg);
97SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
98
99/*
100 * Expanded sf_freelist head.  Really an SLIST_HEAD() in disguise, with the
101 * sf_freelist head with the sf_lock mutex.
102 */
103static struct {
104	SLIST_HEAD(, sf_buf) sf_head;
105	struct mtx sf_lock;
106} sf_freelist;
107
108static u_int	sf_buf_alloc_want;
109
110PMAP_STATS_VAR(uma_nsmall_alloc);
111PMAP_STATS_VAR(uma_nsmall_alloc_oc);
112PMAP_STATS_VAR(uma_nsmall_free);
113
114void
115cpu_exit(struct thread *td)
116{
117	struct proc *p;
118
119	p = td->td_proc;
120	p->p_md.md_sigtramp = NULL;
121	if (p->p_md.md_utrap != NULL) {
122		utrap_free(p->p_md.md_utrap);
123		p->p_md.md_utrap = NULL;
124	}
125}
126
127void
128cpu_thread_exit(struct thread *td)
129{
130
131}
132
133void
134cpu_thread_clean(struct thread *td)
135{
136
137}
138
139void
140cpu_thread_alloc(struct thread *td)
141{
142	struct pcb *pcb;
143
144	pcb = (struct pcb *)((td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
145	    sizeof(struct pcb)) & ~0x3fUL);
146	pcb->pcb_nsaved = 0;
147	td->td_frame = (struct trapframe *)pcb - 1;
148	td->td_pcb = pcb;
149}
150
151void
152cpu_thread_free(struct thread *td)
153{
154
155}
156
157void
158cpu_thread_swapin(struct thread *td)
159{
160
161}
162
163void
164cpu_thread_swapout(struct thread *td)
165{
166
167}
168
169void
170cpu_set_syscall_retval(struct thread *td, int error)
171{
172
173	switch (error) {
174	case 0:
175		td->td_frame->tf_out[0] = td->td_retval[0];
176		td->td_frame->tf_out[1] = td->td_retval[1];
177		td->td_frame->tf_tstate &= ~TSTATE_XCC_C;
178		break;
179
180	case ERESTART:
181		/*
182		 * Undo the tpc advancement we have done on syscall
183		 * enter, we want to reexecute the system call.
184		 */
185		td->td_frame->tf_tpc = td->td_pcb->pcb_tpc;
186		td->td_frame->tf_tnpc -= 4;
187		break;
188
189	case EJUSTRETURN:
190		break;
191
192	default:
193		if (td->td_proc->p_sysent->sv_errsize) {
194			if (error >= td->td_proc->p_sysent->sv_errsize)
195				error = -1;	/* XXX */
196			else
197				error = td->td_proc->p_sysent->sv_errtbl[error];
198		}
199		td->td_frame->tf_out[0] = error;
200		td->td_frame->tf_tstate |= TSTATE_XCC_C;
201		break;
202	}
203}
204
205void
206cpu_set_upcall(struct thread *td, struct thread *td0)
207{
208	struct trapframe *tf;
209	struct frame *fr;
210	struct pcb *pcb;
211
212	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
213
214	pcb = td->td_pcb;
215	tf = td->td_frame;
216	fr = (struct frame *)tf - 1;
217	fr->fr_local[0] = (u_long)fork_return;
218	fr->fr_local[1] = (u_long)td;
219	fr->fr_local[2] = (u_long)tf;
220	pcb->pcb_pc = (u_long)fork_trampoline - 8;
221	pcb->pcb_sp = (u_long)fr - SPOFF;
222
223	/* Setup to release the spin count in fork_exit(). */
224	td->td_md.md_spinlock_count = 1;
225	td->td_md.md_saved_pil = 0;
226}
227
228void
229cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
230	stack_t *stack)
231{
232	struct trapframe *tf;
233	uint64_t sp;
234
235	if (td == curthread)
236		flushw();
237	tf = td->td_frame;
238	sp = (uint64_t)stack->ss_sp + stack->ss_size;
239	tf->tf_out[0] = (uint64_t)arg;
240	tf->tf_out[6] = sp - SPOFF - sizeof(struct frame);
241	tf->tf_tpc = (uint64_t)entry;
242	tf->tf_tnpc = tf->tf_tpc + 4;
243
244	td->td_retval[0] = tf->tf_out[0];
245	td->td_retval[1] = tf->tf_out[1];
246}
247
248int
249cpu_set_user_tls(struct thread *td, void *tls_base)
250{
251
252	if (td == curthread)
253		flushw();
254	td->td_frame->tf_global[7] = (uint64_t) tls_base;
255	return (0);
256}
257
258/*
259 * Finish a fork operation, with process p2 nearly set up.
260 * Copy and update the pcb, set up the stack so that the child
261 * ready to run and return to user mode.
262 */
263void
264cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
265{
266	struct trapframe *tf;
267	struct frame *fp;
268	struct pcb *pcb1;
269	struct pcb *pcb2;
270	vm_offset_t sp;
271	int error;
272	int i;
273
274	KASSERT(td1 == curthread || td1 == &thread0,
275	    ("cpu_fork: p1 not curproc and not proc0"));
276
277	if ((flags & RFPROC) == 0)
278		return;
279
280	p2->p_md.md_sigtramp = td1->td_proc->p_md.md_sigtramp;
281	p2->p_md.md_utrap = utrap_hold(td1->td_proc->p_md.md_utrap);
282
283	/* The pcb must be aligned on a 64-byte boundary. */
284	pcb1 = td1->td_pcb;
285	pcb2 = (struct pcb *)((td2->td_kstack + td2->td_kstack_pages *
286	    PAGE_SIZE - sizeof(struct pcb)) & ~0x3fUL);
287	td2->td_pcb = pcb2;
288
289	/*
290	 * Ensure that p1's pcb is up to date.
291	 */
292	critical_enter();
293	if ((td1->td_frame->tf_fprs & FPRS_FEF) != 0)
294		savefpctx(pcb1->pcb_ufp);
295	critical_exit();
296	/* Make sure the copied windows are spilled. */
297	flushw();
298	/* Copy the pcb (this will copy the windows saved in the pcb, too). */
299	bcopy(pcb1, pcb2, sizeof(*pcb1));
300
301	/*
302	 * If we're creating a new user process and we're sharing the address
303	 * space, the parent's top most frame must be saved in the pcb.  The
304	 * child will pop the frame when it returns to user mode, and may
305	 * overwrite it with its own data causing much suffering for the
306	 * parent.  We check if its already in the pcb, and if not copy it
307	 * in.  Its unlikely that the copyin will fail, but if so there's not
308	 * much we can do.  The parent will likely crash soon anyway in that
309	 * case.
310	 */
311	if ((flags & RFMEM) != 0 && td1 != &thread0) {
312		sp = td1->td_frame->tf_sp;
313		for (i = 0; i < pcb1->pcb_nsaved; i++) {
314			if (pcb1->pcb_rwsp[i] == sp)
315				break;
316		}
317		if (i == pcb1->pcb_nsaved) {
318			error = copyin((caddr_t)sp + SPOFF, &pcb1->pcb_rw[i],
319			    sizeof(struct rwindow));
320			if (error == 0) {
321				pcb1->pcb_rwsp[i] = sp;
322				pcb1->pcb_nsaved++;
323			}
324		}
325	}
326
327	/*
328	 * Create a new fresh stack for the new process.
329	 * Copy the trap frame for the return to user mode as if from a
330	 * syscall.  This copies most of the user mode register values.
331	 */
332	tf = (struct trapframe *)pcb2 - 1;
333	bcopy(td1->td_frame, tf, sizeof(*tf));
334
335	tf->tf_out[0] = 0;			/* Child returns zero */
336	tf->tf_out[1] = 0;
337	tf->tf_tstate &= ~TSTATE_XCC_C;		/* success */
338	tf->tf_fprs = 0;
339
340	td2->td_frame = tf;
341	fp = (struct frame *)tf - 1;
342	fp->fr_local[0] = (u_long)fork_return;
343	fp->fr_local[1] = (u_long)td2;
344	fp->fr_local[2] = (u_long)tf;
345	/* Terminate stack traces at this frame. */
346	fp->fr_pc = fp->fr_fp = 0;
347	pcb2->pcb_sp = (u_long)fp - SPOFF;
348	pcb2->pcb_pc = (u_long)fork_trampoline - 8;
349
350	/* Setup to release the spin count in fork_exit(). */
351	td2->td_md.md_spinlock_count = 1;
352	td2->td_md.md_saved_pil = 0;
353
354	/*
355	 * Now, cpu_switch() can schedule the new process.
356	 */
357}
358
359void
360cpu_reset(void)
361{
362	static char bspec[64] = "";
363	phandle_t chosen;
364	static struct {
365		cell_t	name;
366		cell_t	nargs;
367		cell_t	nreturns;
368		cell_t	bootspec;
369	} args = {
370		(cell_t)"boot",
371		1,
372		0,
373		(cell_t)bspec
374	};
375
376	if ((chosen = OF_finddevice("/chosen")) != 0) {
377		if (OF_getprop(chosen, "bootpath", bspec, sizeof(bspec)) == -1)
378			bspec[0] = '\0';
379		bspec[sizeof(bspec) - 1] = '\0';
380	}
381
382	cpu_shutdown(&args);
383}
384
385/*
386 * Intercept the return address from a freshly forked process that has NOT
387 * been scheduled yet.
388 *
389 * This is needed to make kernel threads stay in kernel mode.
390 */
391void
392cpu_set_fork_handler(struct thread *td, void (*func)(void *), void *arg)
393{
394	struct frame *fp;
395	struct pcb *pcb;
396
397	pcb = td->td_pcb;
398	fp = (struct frame *)(pcb->pcb_sp + SPOFF);
399	fp->fr_local[0] = (u_long)func;
400	fp->fr_local[1] = (u_long)arg;
401}
402
403int
404is_physical_memory(vm_paddr_t addr)
405{
406	struct ofw_mem_region *mr;
407
408	for (mr = sparc64_memreg; mr < sparc64_memreg + sparc64_nmemreg; mr++)
409		if (addr >= mr->mr_start && addr < mr->mr_start + mr->mr_size)
410			return (1);
411	return (0);
412}
413
414/*
415 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
416 */
417static void
418sf_buf_init(void *arg)
419{
420	struct sf_buf *sf_bufs;
421	vm_offset_t sf_base;
422	int i;
423
424	nsfbufs = NSFBUFS;
425	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
426
427	mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
428	SLIST_INIT(&sf_freelist.sf_head);
429	sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE);
430	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
431	    M_NOWAIT | M_ZERO);
432	for (i = 0; i < nsfbufs; i++) {
433		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
434		SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
435	}
436	sf_buf_alloc_want = 0;
437}
438
439/*
440 * Get an sf_buf from the freelist.  Will block if none are available.
441 */
442struct sf_buf *
443sf_buf_alloc(struct vm_page *m, int flags)
444{
445	struct sf_buf *sf;
446	int error;
447
448	mtx_lock(&sf_freelist.sf_lock);
449	while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
450		if (flags & SFB_NOWAIT)
451			break;
452		sf_buf_alloc_want++;
453		mbstat.sf_allocwait++;
454		error = msleep(&sf_freelist, &sf_freelist.sf_lock,
455		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
456		sf_buf_alloc_want--;
457
458		/*
459		 * If we got a signal, don't risk going back to sleep.
460		 */
461		if (error)
462			break;
463	}
464	if (sf != NULL) {
465		SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
466		sf->m = m;
467		nsfbufsused++;
468		nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
469		pmap_qenter(sf->kva, &sf->m, 1);
470	}
471	mtx_unlock(&sf_freelist.sf_lock);
472	return (sf);
473}
474
475/*
476 * Release resources back to the system.
477 */
478void
479sf_buf_free(struct sf_buf *sf)
480{
481
482	pmap_qremove(sf->kva, 1);
483	mtx_lock(&sf_freelist.sf_lock);
484	SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
485	nsfbufsused--;
486	if (sf_buf_alloc_want > 0)
487		wakeup_one(&sf_freelist);
488	mtx_unlock(&sf_freelist.sf_lock);
489}
490
491void
492swi_vm(void *v)
493{
494
495	/*
496	 * Nothing to do here yet - busdma bounce buffers are not yet
497	 * implemented.
498	 */
499}
500
501void *
502uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
503{
504	static vm_pindex_t color;
505	vm_paddr_t pa;
506	vm_page_t m;
507	int pflags;
508	void *va;
509
510	PMAP_STATS_INC(uma_nsmall_alloc);
511
512	*flags = UMA_SLAB_PRIV;
513
514	if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
515		pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED;
516	else
517		pflags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED;
518
519	if (wait & M_ZERO)
520		pflags |= VM_ALLOC_ZERO;
521
522	for (;;) {
523		m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ);
524		if (m == NULL) {
525			if (wait & M_NOWAIT)
526				return (NULL);
527			else
528				VM_WAIT;
529		} else
530			break;
531	}
532
533	pa = VM_PAGE_TO_PHYS(m);
534	if (m->md.color != DCACHE_COLOR(pa)) {
535		KASSERT(m->md.colors[0] == 0 && m->md.colors[1] == 0,
536		    ("uma_small_alloc: free page still has mappings!"));
537		PMAP_STATS_INC(uma_nsmall_alloc_oc);
538		m->md.color = DCACHE_COLOR(pa);
539		dcache_page_inval(pa);
540	}
541	va = (void *)TLB_PHYS_TO_DIRECT(pa);
542	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
543		bzero(va, PAGE_SIZE);
544	return (va);
545}
546
547void
548uma_small_free(void *mem, int size, u_int8_t flags)
549{
550	vm_page_t m;
551
552	PMAP_STATS_INC(uma_nsmall_free);
553	m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS((vm_offset_t)mem));
554	m->wire_count--;
555	vm_page_free(m);
556	atomic_subtract_int(&cnt.v_wire_count, 1);
557}
558