vm_machdep.c revision 154928
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
10 *
11 * Redistribution and use in source and binary :forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
40 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41 */
42
43#include <sys/cdefs.h>
44__FBSDID("$FreeBSD: head/sys/arm/arm/vm_machdep.c 154928 2006-01-27 21:07:04Z cognet $");
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/kernel.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/proc.h>
52#include <sys/socketvar.h>
53#include <sys/sf_buf.h>
54#include <sys/unistd.h>
55#include <machine/cpu.h>
56#include <machine/pcb.h>
57#include <machine/sysarch.h>
58#include <vm/vm.h>
59#include <vm/pmap.h>
60#include <sys/lock.h>
61#include <sys/mutex.h>
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65#include <vm/vm_kern.h>
66#include <vm/vm_page.h>
67#include <vm/vm_map.h>
68#include <vm/vm_param.h>
69#include <vm/uma.h>
70#include <vm/uma_int.h>
71
72#ifndef NSFBUFS
73#define NSFBUFS		(512 + maxusers * 16)
74#endif
75
76static void     sf_buf_init(void *arg);
77SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
78
79LIST_HEAD(sf_head, sf_buf);
80
81
82/*
83 * A hash table of active sendfile(2) buffers
84 */
85static struct sf_head *sf_buf_active;
86static u_long sf_buf_hashmask;
87
88#define SF_BUF_HASH(m)  (((m) - vm_page_array) & sf_buf_hashmask)
89
90static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
91static u_int    sf_buf_alloc_want;
92
93/*
94 * A lock used to synchronize access to the hash table and free list
95 */
96static struct mtx sf_buf_lock;
97
98/*
99 * Finish a fork operation, with process p2 nearly set up.
100 * Copy and update the pcb, set up the stack so that the child
101 * ready to run and return to user mode.
102 */
103void
104cpu_fork(register struct thread *td1, register struct proc *p2,
105    struct thread *td2, int flags)
106{
107	struct pcb *pcb1, *pcb2;
108	struct trapframe *tf;
109	struct switchframe *sf;
110	struct mdproc *mdp2;
111
112	if ((flags & RFPROC) == 0)
113		return;
114	pcb1 = td1->td_pcb;
115	pcb2 = (struct pcb *)(td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1;
116#ifdef __XSCALE__
117	pmap_use_minicache(td2->td_kstack, td2->td_kstack_pages * PAGE_SIZE);
118	if (td2->td_altkstack)
119		pmap_use_minicache(td2->td_altkstack, td2->td_altkstack_pages *
120		    PAGE_SIZE);
121#endif
122	td2->td_pcb = pcb2;
123	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
124	mdp2 = &p2->p_md;
125	bcopy(&td1->td_proc->p_md, mdp2, sizeof(*mdp2));
126	pcb2->un_32.pcb32_und_sp = td2->td_kstack + USPACE_UNDEF_STACK_TOP;
127	pcb2->un_32.pcb32_sp = td2->td_kstack +
128	    USPACE_SVC_STACK_TOP - sizeof(*pcb2);
129	pmap_activate(td2);
130	td2->td_frame = tf =
131	    (struct trapframe *)pcb2->un_32.pcb32_sp - 1;
132	*tf = *td1->td_frame;
133	sf = (struct switchframe *)tf - 1;
134	sf->sf_r4 = (u_int)fork_return;
135	sf->sf_r5 = (u_int)td2;
136	sf->sf_pc = (u_int)fork_trampoline;
137	tf->tf_spsr &= ~PSR_C_bit;
138	tf->tf_r0 = 0;
139	tf->tf_r1 = 0;
140	pcb2->un_32.pcb32_sp = (u_int)sf;
141
142	/* Setup to release sched_lock in fork_exit(). */
143	td2->td_md.md_spinlock_count = 1;
144	td2->td_md.md_saved_cspr = 0;
145	td2->td_md.md_tp = *(uint32_t **)ARM_TP_ADDRESS;
146}
147
148void
149cpu_thread_swapin(struct thread *td)
150{
151}
152
153void
154cpu_thread_swapout(struct thread *td)
155{
156}
157
158/*
159 * Detatch mapped page and release resources back to the system.
160 */
161void
162sf_buf_free(struct sf_buf *sf)
163{
164	 mtx_lock(&sf_buf_lock);
165	 sf->ref_count--;
166	 if (sf->ref_count == 0) {
167		 TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
168		 nsfbufsused--;
169		 if (sf_buf_alloc_want > 0)
170			 wakeup_one(&sf_buf_freelist);
171	 }
172	 mtx_unlock(&sf_buf_lock);
173}
174
175/*
176 *  * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
177 *   */
178static void
179sf_buf_init(void *arg)
180{
181	struct sf_buf *sf_bufs;
182	vm_offset_t sf_base;
183	int i;
184
185	nsfbufs = NSFBUFS;
186	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
187
188	sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
189	TAILQ_INIT(&sf_buf_freelist);
190	sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE);
191	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
192	    M_NOWAIT | M_ZERO);
193	for (i = 0; i < nsfbufs; i++) {
194		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
195		TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
196	}
197	sf_buf_alloc_want = 0;
198	mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
199}
200
201/*
202 * Get an sf_buf from the freelist. Will block if none are available.
203 */
204struct sf_buf *
205sf_buf_alloc(struct vm_page *m, int flags)
206{
207	struct sf_head *hash_list;
208	struct sf_buf *sf;
209	int error;
210
211	hash_list = &sf_buf_active[SF_BUF_HASH(m)];
212	mtx_lock(&sf_buf_lock);
213	LIST_FOREACH(sf, hash_list, list_entry) {
214		if (sf->m == m) {
215			sf->ref_count++;
216			if (sf->ref_count == 1) {
217				TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
218				nsfbufsused++;
219				nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
220			}
221			goto done;
222		}
223	}
224	while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
225		if (flags & SFB_NOWAIT)
226			goto done;
227		sf_buf_alloc_want++;
228		mbstat.sf_allocwait++;
229		error = msleep(&sf_buf_freelist, &sf_buf_lock,
230		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
231		sf_buf_alloc_want--;
232
233
234		/*
235		 * If we got a signal, don't risk going back to sleep.
236		 */
237		if (error)
238			goto done;
239	}
240	TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
241	if (sf->m != NULL)
242		LIST_REMOVE(sf, list_entry);
243	LIST_INSERT_HEAD(hash_list, sf, list_entry);
244	sf->ref_count = 1;
245	sf->m = m;
246	nsfbufsused++;
247	nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
248	pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m));
249done:
250	mtx_unlock(&sf_buf_lock);
251	return (sf);
252
253}
254
255/*
256 * Initialize machine state (pcb and trap frame) for a new thread about to
257 * upcall. Put enough state in the new thread's PCB to get it to go back
258 * userret(), where we can intercept it again to set the return (upcall)
259 * Address and stack, along with those from upcals that are from other sources
260 * such as those generated in thread_userret() itself.
261 */
262void
263cpu_set_upcall(struct thread *td, struct thread *td0)
264{
265	struct trapframe *tf;
266	struct switchframe *sf;
267
268	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
269	bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb));
270	tf = td->td_frame;
271	sf = (struct switchframe *)tf - 1;
272	sf->sf_r4 = (u_int)fork_return;
273	sf->sf_r5 = (u_int)td;
274	sf->sf_pc = (u_int)fork_trampoline;
275	tf->tf_spsr &= ~PSR_C_bit;
276	tf->tf_r0 = 0;
277	td->td_pcb->un_32.pcb32_sp = (u_int)sf;
278	td->td_pcb->un_32.pcb32_und_sp = td->td_kstack + USPACE_UNDEF_STACK_TOP;
279
280	/* Setup to release sched_lock in fork_exit(). */
281	td->td_md.md_spinlock_count = 1;
282	td->td_md.md_saved_cspr = 0;
283}
284
285/*
286 * Set that machine state for performing an upcall that has to
287 * be done in thread_userret() so that those upcalls generated
288 * in thread_userret() itself can be done as well.
289 */
290void
291cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
292	stack_t *stack)
293{
294	struct trapframe *tf = td->td_frame;
295
296	tf->tf_usr_sp = ((int)stack->ss_sp + stack->ss_size
297	    - sizeof(struct trapframe)) & ~7;
298	tf->tf_pc = (int)entry;
299	tf->tf_r0 = (int)arg;
300	tf->tf_spsr = PSR_USR32_MODE;
301}
302
303int
304cpu_set_user_tls(struct thread *td, void *tls_base)
305{
306
307	if (td != curthread)
308		td->td_md.md_tp = tls_base;
309	else {
310		critical_enter();
311		*(void **)ARM_TP_ADDRESS = tls_base;
312		critical_exit();
313	}
314	return (0);
315}
316
317void
318cpu_thread_exit(struct thread *td)
319{
320}
321
322void
323cpu_thread_setup(struct thread *td)
324{
325	td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages *
326	    PAGE_SIZE) - 1;
327	td->td_frame = (struct trapframe *)
328	    ((u_int)td->td_kstack + USPACE_SVC_STACK_TOP - sizeof(struct pcb)) - 1;
329#ifdef __XSCALE__
330	pmap_use_minicache(td->td_kstack, td->td_kstack_pages * PAGE_SIZE);
331#endif
332
333}
334void
335cpu_thread_clean(struct thread *td)
336{
337}
338
339/*
340 * Intercept the return address from a freshly forked process that has NOT
341 * been scheduled yet.
342 *
343 * This is needed to make kernel threads stay in kernel mode.
344 */
345void
346cpu_set_fork_handler(struct thread *td, void (*func)(void *), void *arg)
347{
348	struct switchframe *sf;
349	struct trapframe *tf;
350
351	tf = td->td_frame;
352	sf = (struct switchframe *)tf - 1;
353	sf->sf_r4 = (u_int)func;
354	sf->sf_r5 = (u_int)arg;
355	td->td_pcb->un_32.pcb32_sp = (u_int)sf;
356}
357
358/*
359 * Software interrupt handler for queued VM system processing.
360 */
361void
362swi_vm(void *dummy)
363{
364}
365
366void
367cpu_exit(struct thread *td)
368{
369}
370
371#ifdef ARM_USE_SMALL_ALLOC
372
373static TAILQ_HEAD(,arm_small_page) pages_normal =
374	TAILQ_HEAD_INITIALIZER(pages_normal);
375static TAILQ_HEAD(,arm_small_page) pages_wt =
376	TAILQ_HEAD_INITIALIZER(pages_wt);
377static TAILQ_HEAD(,arm_small_page) free_pgdesc =
378	TAILQ_HEAD_INITIALIZER(free_pgdesc);
379
380extern uma_zone_t l2zone;
381
382struct mtx smallalloc_mtx;
383
384MALLOC_DEFINE(M_VMSMALLALLOC, "vm_small_alloc", "VM Small alloc data");
385
386vm_offset_t alloc_curaddr;
387vm_offset_t alloc_firstaddr;
388
389extern int doverbose;
390
391void
392arm_add_smallalloc_pages(void *list, void *mem, int bytes, int pagetable)
393{
394	struct arm_small_page *pg;
395
396	bytes &= ~PAGE_SIZE;
397	while (bytes > 0) {
398		pg = (struct arm_small_page *)list;
399		pg->addr = mem;
400		if (pagetable)
401			TAILQ_INSERT_HEAD(&pages_wt, pg, pg_list);
402		else
403			TAILQ_INSERT_HEAD(&pages_normal, pg, pg_list);
404		list = (char *)list + sizeof(*pg);
405		mem = (char *)mem + PAGE_SIZE;
406		bytes -= PAGE_SIZE;
407	}
408}
409
410static void *
411arm_uma_do_alloc(struct arm_small_page **pglist, int bytes, int pagetable,
412    int flags)
413{
414	void *ret;
415	vm_page_t page_array = NULL;
416
417	*pglist = (void *)kmem_malloc(kmem_map, (0x100000 / PAGE_SIZE) *
418	    sizeof(struct arm_small_page), flags);
419	if (*pglist && alloc_curaddr < 0xf0000000) {/* XXX */
420		mtx_lock(&Giant);
421		page_array = vm_page_alloc_contig(0x100000 / PAGE_SIZE,
422		    0, 0xffffffff, 0x100000, 0);
423		mtx_unlock(&Giant);
424	}
425	if (page_array) {
426		vm_paddr_t pa = VM_PAGE_TO_PHYS(page_array);
427		mtx_lock(&smallalloc_mtx);
428		ret = (void*)alloc_curaddr;
429		alloc_curaddr += 0x100000;
430		/* XXX: ARM_TP_ADDRESS should probably be move elsewhere. */
431		if (alloc_curaddr == ARM_TP_ADDRESS)
432			alloc_curaddr += 0x100000;
433		mtx_unlock(&smallalloc_mtx);
434		pmap_kenter_section((vm_offset_t)ret, pa
435		    , pagetable);
436	} else {
437		if (*pglist)
438			kmem_free(kmem_map, (vm_offset_t)*pglist,
439			    (0x100000 / PAGE_SIZE) *
440			    sizeof(struct arm_small_page));
441		*pglist = NULL;
442		ret = (void *)kmem_malloc(kmem_map, bytes, M_WAITOK);
443	}
444	return (ret);
445}
446
447void *
448uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
449{
450	void *ret;
451	struct arm_small_page *sp, *tmp;
452	TAILQ_HEAD(,arm_small_page) *head;
453	static int in_alloc;
454	static int in_sleep;
455	int should_wakeup = 0;
456
457	*flags = UMA_SLAB_PRIV;
458	/*
459	 * For CPUs where we setup page tables as write back, there's no
460	 * need to maintain two separate pools.
461	 */
462	if (zone == l2zone && pte_l1_s_cache_mode != pte_l1_s_cache_mode_pt)
463		head = (void *)&pages_wt;
464	else
465		head = (void *)&pages_normal;
466
467	mtx_lock(&smallalloc_mtx);
468retry:
469	sp = TAILQ_FIRST(head);
470
471	if (!sp) {
472		/* No more free pages, need to alloc more. */
473		if (in_alloc && (wait & M_WAITOK)) {
474			/* Somebody else is already doing the allocation. */
475			in_sleep++;
476			msleep(&in_alloc, &smallalloc_mtx, PWAIT,
477			    "smallalloc", 0);
478			in_sleep--;
479			goto retry;
480		} else if (in_alloc) {
481			mtx_unlock(&smallalloc_mtx);
482			return (NULL);
483		}
484		in_alloc = 1;
485		mtx_unlock(&smallalloc_mtx);
486		/* Try to alloc 1MB of contiguous memory. */
487		ret = arm_uma_do_alloc(&sp, bytes, zone == l2zone ?
488		    SECTION_PT : SECTION_CACHE, wait);
489		mtx_lock(&smallalloc_mtx);
490		in_alloc = 0;
491		if (in_sleep)
492			should_wakeup = 1;
493		if (sp) {
494			for (int i = 0; i < (0x100000 / PAGE_SIZE) - 1;
495			    i++) {
496				tmp = &sp[i];
497				tmp->addr = (char *)ret + i * PAGE_SIZE;
498				TAILQ_INSERT_HEAD(head, tmp, pg_list);
499			}
500			ret = (char *)ret + 0x100000 - PAGE_SIZE;
501			TAILQ_INSERT_HEAD(&free_pgdesc, &sp[(0x100000 / (
502			    PAGE_SIZE)) - 1], pg_list);
503		} else
504			*flags = UMA_SLAB_KMEM;
505
506	} else {
507		sp = TAILQ_FIRST(head);
508		TAILQ_REMOVE(head, sp, pg_list);
509		TAILQ_INSERT_HEAD(&free_pgdesc, sp, pg_list);
510		ret = sp->addr;
511	}
512	if (should_wakeup)
513		wakeup(&in_alloc);
514	mtx_unlock(&smallalloc_mtx);
515	if ((wait & M_ZERO))
516		bzero(ret, bytes);
517	return (ret);
518}
519
520void
521uma_small_free(void *mem, int size, u_int8_t flags)
522{
523	pd_entry_t *pd;
524	pt_entry_t *pt;
525
526	if (flags & UMA_SLAB_KMEM)
527		kmem_free(kmem_map, (vm_offset_t)mem, size);
528	else {
529		struct arm_small_page *sp;
530
531		mtx_lock(&smallalloc_mtx);
532		sp = TAILQ_FIRST(&free_pgdesc);
533		KASSERT(sp != NULL, ("No more free page descriptor ?"));
534		TAILQ_REMOVE(&free_pgdesc, sp, pg_list);
535		sp->addr = mem;
536		pmap_get_pde_pte(kernel_pmap, (vm_offset_t)mem, &pd, &pt);
537		if ((*pd & pte_l1_s_cache_mask) == pte_l1_s_cache_mode_pt &&
538		    pte_l1_s_cache_mode_pt != pte_l1_s_cache_mode)
539			TAILQ_INSERT_HEAD(&pages_wt, sp, pg_list);
540		else
541			TAILQ_INSERT_HEAD(&pages_normal, sp, pg_list);
542		mtx_unlock(&smallalloc_mtx);
543	}
544}
545
546#endif
547