1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * Copyright (c) 2001 Jake Burkholder.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * the Systems Programming Group of the University of Utah Computer
10 * Science Department, and William Jolitz.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
37 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
38 *	from: FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.167 2001/07/12
39 */
40
41#include <sys/cdefs.h>
42__FBSDID("$FreeBSD$");
43
44#include "opt_pmap.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/kernel.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/sysent.h>
56#include <sys/sf_buf.h>
57#include <sys/sched.h>
58#include <sys/sysctl.h>
59#include <sys/unistd.h>
60#include <sys/vmmeter.h>
61
62#include <dev/ofw/openfirm.h>
63
64#include <vm/vm.h>
65#include <vm/vm_extern.h>
66#include <vm/pmap.h>
67#include <vm/vm_kern.h>
68#include <vm/vm_map.h>
69#include <vm/vm_page.h>
70#include <vm/vm_pageout.h>
71#include <vm/vm_param.h>
72#include <vm/uma.h>
73#include <vm/uma_int.h>
74
75#include <machine/cache.h>
76#include <machine/cpu.h>
77#include <machine/fp.h>
78#include <machine/frame.h>
79#include <machine/fsr.h>
80#include <machine/md_var.h>
81#include <machine/ofw_machdep.h>
82#include <machine/ofw_mem.h>
83#include <machine/pcb.h>
84#include <machine/tlb.h>
85#include <machine/tstate.h>
86
87#ifndef NSFBUFS
88#define	NSFBUFS		(512 + maxusers * 16)
89#endif
90
91static void	sf_buf_init(void *arg);
92SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
93
94/*
95 * Expanded sf_freelist head.  Really an SLIST_HEAD() in disguise, with the
96 * sf_freelist head with the sf_lock mutex.
97 */
98static struct {
99	SLIST_HEAD(, sf_buf) sf_head;
100	struct mtx sf_lock;
101} sf_freelist;
102
103static u_int	sf_buf_alloc_want;
104
105PMAP_STATS_VAR(uma_nsmall_alloc);
106PMAP_STATS_VAR(uma_nsmall_alloc_oc);
107PMAP_STATS_VAR(uma_nsmall_free);
108
109void
110cpu_exit(struct thread *td)
111{
112	struct proc *p;
113
114	p = td->td_proc;
115	p->p_md.md_sigtramp = NULL;
116	if (p->p_md.md_utrap != NULL) {
117		utrap_free(p->p_md.md_utrap);
118		p->p_md.md_utrap = NULL;
119	}
120}
121
122void
123cpu_thread_exit(struct thread *td)
124{
125
126}
127
128void
129cpu_thread_clean(struct thread *td)
130{
131
132}
133
134void
135cpu_thread_alloc(struct thread *td)
136{
137	struct pcb *pcb;
138
139	pcb = (struct pcb *)((td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
140	    sizeof(struct pcb)) & ~0x3fUL);
141	pcb->pcb_nsaved = 0;
142	td->td_frame = (struct trapframe *)pcb - 1;
143	td->td_pcb = pcb;
144}
145
146void
147cpu_thread_free(struct thread *td)
148{
149
150}
151
152void
153cpu_thread_swapin(struct thread *td)
154{
155
156}
157
158void
159cpu_thread_swapout(struct thread *td)
160{
161
162}
163
164void
165cpu_set_syscall_retval(struct thread *td, int error)
166{
167
168	switch (error) {
169	case 0:
170		td->td_frame->tf_out[0] = td->td_retval[0];
171		td->td_frame->tf_out[1] = td->td_retval[1];
172		td->td_frame->tf_tstate &= ~TSTATE_XCC_C;
173		break;
174
175	case ERESTART:
176		/*
177		 * Undo the tpc advancement we have done on syscall
178		 * enter, we want to reexecute the system call.
179		 */
180		td->td_frame->tf_tpc = td->td_pcb->pcb_tpc;
181		td->td_frame->tf_tnpc -= 4;
182		break;
183
184	case EJUSTRETURN:
185		break;
186
187	default:
188		if (td->td_proc->p_sysent->sv_errsize) {
189			if (error >= td->td_proc->p_sysent->sv_errsize)
190				error = -1;	/* XXX */
191			else
192				error = td->td_proc->p_sysent->sv_errtbl[error];
193		}
194		td->td_frame->tf_out[0] = error;
195		td->td_frame->tf_tstate |= TSTATE_XCC_C;
196		break;
197	}
198}
199
200void
201cpu_set_upcall(struct thread *td, struct thread *td0)
202{
203	struct trapframe *tf;
204	struct frame *fr;
205	struct pcb *pcb;
206
207	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
208
209	pcb = td->td_pcb;
210	tf = td->td_frame;
211	fr = (struct frame *)tf - 1;
212	fr->fr_local[0] = (u_long)fork_return;
213	fr->fr_local[1] = (u_long)td;
214	fr->fr_local[2] = (u_long)tf;
215	pcb->pcb_pc = (u_long)fork_trampoline - 8;
216	pcb->pcb_sp = (u_long)fr - SPOFF;
217
218	/* Setup to release the spin count in fork_exit(). */
219	td->td_md.md_spinlock_count = 1;
220	td->td_md.md_saved_pil = 0;
221}
222
223void
224cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
225    stack_t *stack)
226{
227	struct trapframe *tf;
228	uint64_t sp;
229
230	if (td == curthread)
231		flushw();
232	tf = td->td_frame;
233	sp = (uint64_t)stack->ss_sp + stack->ss_size;
234	tf->tf_out[0] = (uint64_t)arg;
235	tf->tf_out[6] = sp - SPOFF - sizeof(struct frame);
236	tf->tf_tpc = (uint64_t)entry;
237	tf->tf_tnpc = tf->tf_tpc + 4;
238
239	td->td_retval[0] = tf->tf_out[0];
240	td->td_retval[1] = tf->tf_out[1];
241}
242
243int
244cpu_set_user_tls(struct thread *td, void *tls_base)
245{
246
247	if (td == curthread)
248		flushw();
249	td->td_frame->tf_global[7] = (uint64_t)tls_base;
250	return (0);
251}
252
253/*
254 * Finish a fork operation, with process p2 nearly set up.
255 * Copy and update the pcb, set up the stack so that the child
256 * ready to run and return to user mode.
257 */
258void
259cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
260{
261	struct trapframe *tf;
262	struct frame *fp;
263	struct pcb *pcb1;
264	struct pcb *pcb2;
265	vm_offset_t sp;
266	int error;
267	int i;
268
269	KASSERT(td1 == curthread || td1 == &thread0,
270	    ("cpu_fork: p1 not curproc and not proc0"));
271
272	if ((flags & RFPROC) == 0)
273		return;
274
275	p2->p_md.md_sigtramp = td1->td_proc->p_md.md_sigtramp;
276	p2->p_md.md_utrap = utrap_hold(td1->td_proc->p_md.md_utrap);
277
278	/* The pcb must be aligned on a 64-byte boundary. */
279	pcb1 = td1->td_pcb;
280	pcb2 = (struct pcb *)((td2->td_kstack + td2->td_kstack_pages *
281	    PAGE_SIZE - sizeof(struct pcb)) & ~0x3fUL);
282	td2->td_pcb = pcb2;
283
284	/*
285	 * Ensure that p1's pcb is up to date.
286	 */
287	critical_enter();
288	if ((td1->td_frame->tf_fprs & FPRS_FEF) != 0)
289		savefpctx(pcb1->pcb_ufp);
290	critical_exit();
291	/* Make sure the copied windows are spilled. */
292	flushw();
293	/* Copy the pcb (this will copy the windows saved in the pcb, too). */
294	bcopy(pcb1, pcb2, sizeof(*pcb1));
295
296	/*
297	 * If we're creating a new user process and we're sharing the address
298	 * space, the parent's top most frame must be saved in the pcb.  The
299	 * child will pop the frame when it returns to user mode, and may
300	 * overwrite it with its own data causing much suffering for the
301	 * parent.  We check if its already in the pcb, and if not copy it
302	 * in.  Its unlikely that the copyin will fail, but if so there's not
303	 * much we can do.  The parent will likely crash soon anyway in that
304	 * case.
305	 */
306	if ((flags & RFMEM) != 0 && td1 != &thread0) {
307		sp = td1->td_frame->tf_sp;
308		for (i = 0; i < pcb1->pcb_nsaved; i++) {
309			if (pcb1->pcb_rwsp[i] == sp)
310				break;
311		}
312		if (i == pcb1->pcb_nsaved) {
313			error = copyin((caddr_t)sp + SPOFF, &pcb1->pcb_rw[i],
314			    sizeof(struct rwindow));
315			if (error == 0) {
316				pcb1->pcb_rwsp[i] = sp;
317				pcb1->pcb_nsaved++;
318			}
319		}
320	}
321
322	/*
323	 * Create a new fresh stack for the new process.
324	 * Copy the trap frame for the return to user mode as if from a
325	 * syscall.  This copies most of the user mode register values.
326	 */
327	tf = (struct trapframe *)pcb2 - 1;
328	bcopy(td1->td_frame, tf, sizeof(*tf));
329
330	tf->tf_out[0] = 0;			/* Child returns zero */
331	tf->tf_out[1] = 0;
332	tf->tf_tstate &= ~TSTATE_XCC_C;		/* success */
333	tf->tf_fprs = 0;
334
335	td2->td_frame = tf;
336	fp = (struct frame *)tf - 1;
337	fp->fr_local[0] = (u_long)fork_return;
338	fp->fr_local[1] = (u_long)td2;
339	fp->fr_local[2] = (u_long)tf;
340	/* Terminate stack traces at this frame. */
341	fp->fr_pc = fp->fr_fp = 0;
342	pcb2->pcb_sp = (u_long)fp - SPOFF;
343	pcb2->pcb_pc = (u_long)fork_trampoline - 8;
344
345	/* Setup to release the spin count in fork_exit(). */
346	td2->td_md.md_spinlock_count = 1;
347	td2->td_md.md_saved_pil = 0;
348
349	/*
350	 * Now, cpu_switch() can schedule the new process.
351	 */
352}
353
354void
355cpu_reset(void)
356{
357	static char bspec[64] = "";
358	phandle_t chosen;
359	static struct {
360		cell_t	name;
361		cell_t	nargs;
362		cell_t	nreturns;
363		cell_t	bootspec;
364	} args = {
365		(cell_t)"boot",
366		1,
367		0,
368		(cell_t)bspec
369	};
370
371	if ((chosen = OF_finddevice("/chosen")) != 0) {
372		if (OF_getprop(chosen, "bootpath", bspec, sizeof(bspec)) == -1)
373			bspec[0] = '\0';
374		bspec[sizeof(bspec) - 1] = '\0';
375	}
376
377	cpu_shutdown(&args);
378}
379
380/*
381 * Intercept the return address from a freshly forked process that has NOT
382 * been scheduled yet.
383 *
384 * This is needed to make kernel threads stay in kernel mode.
385 */
386void
387cpu_set_fork_handler(struct thread *td, void (*func)(void *), void *arg)
388{
389	struct frame *fp;
390	struct pcb *pcb;
391
392	pcb = td->td_pcb;
393	fp = (struct frame *)(pcb->pcb_sp + SPOFF);
394	fp->fr_local[0] = (u_long)func;
395	fp->fr_local[1] = (u_long)arg;
396}
397
398int
399is_physical_memory(vm_paddr_t addr)
400{
401	struct ofw_mem_region *mr;
402
403	for (mr = sparc64_memreg; mr < sparc64_memreg + sparc64_nmemreg; mr++)
404		if (addr >= mr->mr_start && addr < mr->mr_start + mr->mr_size)
405			return (1);
406	return (0);
407}
408
409/*
410 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
411 */
412static void
413sf_buf_init(void *arg)
414{
415	struct sf_buf *sf_bufs;
416	vm_offset_t sf_base;
417	int i;
418
419	nsfbufs = NSFBUFS;
420	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
421
422	mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
423	SLIST_INIT(&sf_freelist.sf_head);
424	sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE);
425	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
426	    M_NOWAIT | M_ZERO);
427	for (i = 0; i < nsfbufs; i++) {
428		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
429		SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
430	}
431	sf_buf_alloc_want = 0;
432}
433
434/*
435 * Get an sf_buf from the freelist.  Will block if none are available.
436 */
437struct sf_buf *
438sf_buf_alloc(struct vm_page *m, int flags)
439{
440	struct sf_buf *sf;
441	int error;
442
443	mtx_lock(&sf_freelist.sf_lock);
444	while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
445		if (flags & SFB_NOWAIT)
446			break;
447		sf_buf_alloc_want++;
448		mbstat.sf_allocwait++;
449		error = msleep(&sf_freelist, &sf_freelist.sf_lock,
450		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
451		sf_buf_alloc_want--;
452
453		/*
454		 * If we got a signal, don't risk going back to sleep.
455		 */
456		if (error)
457			break;
458	}
459	if (sf != NULL) {
460		SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
461		sf->m = m;
462		nsfbufsused++;
463		nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
464		pmap_qenter(sf->kva, &sf->m, 1);
465	}
466	mtx_unlock(&sf_freelist.sf_lock);
467	return (sf);
468}
469
470/*
471 * Release resources back to the system.
472 */
473void
474sf_buf_free(struct sf_buf *sf)
475{
476
477	pmap_qremove(sf->kva, 1);
478	mtx_lock(&sf_freelist.sf_lock);
479	SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
480	nsfbufsused--;
481	if (sf_buf_alloc_want > 0)
482		wakeup(&sf_freelist);
483	mtx_unlock(&sf_freelist.sf_lock);
484}
485
486void
487swi_vm(void *v)
488{
489
490	/* Nothing to do here - busdma bounce buffers are not implemented. */
491}
492
493void *
494uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
495{
496	static vm_pindex_t color;
497	vm_paddr_t pa;
498	vm_page_t m;
499	int pflags;
500	void *va;
501
502	PMAP_STATS_INC(uma_nsmall_alloc);
503
504	*flags = UMA_SLAB_PRIV;
505
506	if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
507		pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED;
508	else
509		pflags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED;
510
511	if (wait & M_ZERO)
512		pflags |= VM_ALLOC_ZERO;
513
514	for (;;) {
515		m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ);
516		if (m == NULL) {
517			if (wait & M_NOWAIT)
518				return (NULL);
519			else
520				VM_WAIT;
521		} else
522			break;
523	}
524
525	pa = VM_PAGE_TO_PHYS(m);
526	if (dcache_color_ignore == 0 && m->md.color != DCACHE_COLOR(pa)) {
527		KASSERT(m->md.colors[0] == 0 && m->md.colors[1] == 0,
528		    ("uma_small_alloc: free page %p still has mappings!", m));
529		PMAP_STATS_INC(uma_nsmall_alloc_oc);
530		m->md.color = DCACHE_COLOR(pa);
531		dcache_page_inval(pa);
532	}
533	va = (void *)TLB_PHYS_TO_DIRECT(pa);
534	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
535		cpu_block_zero(va, PAGE_SIZE);
536	return (va);
537}
538
539void
540uma_small_free(void *mem, int size, u_int8_t flags)
541{
542	vm_page_t m;
543
544	PMAP_STATS_INC(uma_nsmall_free);
545	m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS((vm_offset_t)mem));
546	m->wire_count--;
547	vm_page_free(m);
548	atomic_subtract_int(&cnt.v_wire_count, 1);
549}
550