1/*	$OpenBSD: uvm_glue.c,v 1.84 2022/09/10 20:35:29 miod Exp $	*/
2/*	$NetBSD: uvm_glue.c,v 1.44 2001/02/06 19:54:44 eeh Exp $	*/
3
4/*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * Copyright (c) 1991, 1993, The Regents of the University of California.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * The Mach Operating System project at Carnegie-Mellon University.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	@(#)vm_glue.c	8.6 (Berkeley) 1/5/94
38 * from: Id: uvm_glue.c,v 1.1.2.8 1998/02/07 01:16:54 chs Exp
39 *
40 *
41 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42 * All rights reserved.
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57 *  School of Computer Science
58 *  Carnegie Mellon University
59 *  Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65/*
66 * uvm_glue.c: glue functions
67 */
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/proc.h>
72#include <sys/resourcevar.h>
73#include <sys/buf.h>
74#include <sys/user.h>
75#ifdef SYSVSHM
76#include <sys/shm.h>
77#endif
78#include <sys/sched.h>
79
80#include <uvm/uvm.h>
81
82/*
83 * uvm_kernacc: can the kernel access a region of memory
84 *
85 * - called from malloc [DIAGNOSTIC], and /dev/kmem driver (mem.c)
86 */
87boolean_t
88uvm_kernacc(caddr_t addr, size_t len, int rw)
89{
90	boolean_t rv;
91	vaddr_t saddr, eaddr;
92	vm_prot_t prot = rw == B_READ ? PROT_READ : PROT_WRITE;
93
94	saddr = trunc_page((vaddr_t)addr);
95	eaddr = round_page((vaddr_t)addr + len);
96	vm_map_lock_read(kernel_map);
97	rv = uvm_map_checkprot(kernel_map, saddr, eaddr, prot);
98	vm_map_unlock_read(kernel_map);
99
100	return rv;
101}
102
103/*
104 * uvm_vslock: wire user memory for I/O
105 *
106 * - called from sys_sysctl
107 */
108int
109uvm_vslock(struct proc *p, caddr_t addr, size_t len, vm_prot_t access_type)
110{
111	struct vm_map *map = &p->p_vmspace->vm_map;
112	vaddr_t start, end;
113
114	start = trunc_page((vaddr_t)addr);
115	end = round_page((vaddr_t)addr + len);
116	if (end <= start)
117		return (EINVAL);
118
119	return uvm_fault_wire(map, start, end, access_type);
120}
121
122/*
123 * uvm_vsunlock: unwire user memory wired by uvm_vslock()
124 *
125 * - called from sys_sysctl
126 */
127void
128uvm_vsunlock(struct proc *p, caddr_t addr, size_t len)
129{
130	vaddr_t start, end;
131
132	start = trunc_page((vaddr_t)addr);
133	end = round_page((vaddr_t)addr + len);
134	KASSERT(end > start);
135
136	uvm_fault_unwire(&p->p_vmspace->vm_map, start, end);
137}
138
139/*
140 * uvm_vslock_device: wire user memory, make sure it's device reachable
141 *  and bounce if necessary.
142 *
143 * - called from physio
144 */
145int
146uvm_vslock_device(struct proc *p, void *addr, size_t len,
147    vm_prot_t access_type, void **retp)
148{
149	struct vm_map *map = &p->p_vmspace->vm_map;
150	struct vm_page *pg;
151	struct pglist pgl;
152	int npages;
153	vaddr_t start, end, off;
154	vaddr_t sva, va;
155	vsize_t sz;
156	int error, mapv, i;
157
158	start = trunc_page((vaddr_t)addr);
159	end = round_page((vaddr_t)addr + len);
160	sz = end - start;
161	off = (vaddr_t)addr - start;
162	if (end <= start)
163		return (EINVAL);
164
165	vm_map_lock_read(map);
166retry:
167	mapv = map->timestamp;
168	vm_map_unlock_read(map);
169
170	if ((error = uvm_fault_wire(map, start, end, access_type)))
171		return (error);
172
173	vm_map_lock_read(map);
174	if (mapv != map->timestamp)
175		goto retry;
176
177	npages = atop(sz);
178	for (i = 0; i < npages; i++) {
179		paddr_t pa;
180
181		if (!pmap_extract(map->pmap, start + ptoa(i), &pa)) {
182			error = EFAULT;
183			goto out_unwire;
184		}
185		if (!PADDR_IS_DMA_REACHABLE(pa))
186			break;
187	}
188	if (i == npages) {
189		*retp = NULL;
190		return (0);
191	}
192
193	va = (vaddr_t)km_alloc(sz, &kv_any, &kp_none, &kd_nowait);
194	if (va == 0) {
195		error = ENOMEM;
196		goto out_unwire;
197	}
198	sva = va;
199
200	TAILQ_INIT(&pgl);
201	error = uvm_pglistalloc(npages * PAGE_SIZE, dma_constraint.ucr_low,
202	    dma_constraint.ucr_high, 0, 0, &pgl, npages, UVM_PLA_WAITOK);
203	if (error)
204		goto out_unmap;
205
206	while ((pg = TAILQ_FIRST(&pgl)) != NULL) {
207		TAILQ_REMOVE(&pgl, pg, pageq);
208		pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), PROT_READ | PROT_WRITE);
209		va += PAGE_SIZE;
210	}
211	pmap_update(pmap_kernel());
212	KASSERT(va == sva + sz);
213	*retp = (void *)(sva + off);
214
215	if ((error = copyin(addr, *retp, len)) == 0)
216		return 0;
217
218	uvm_km_pgremove_intrsafe(sva, sva + sz);
219	pmap_kremove(sva, sz);
220	pmap_update(pmap_kernel());
221out_unmap:
222	km_free((void *)sva, sz, &kv_any, &kp_none);
223out_unwire:
224	uvm_fault_unwire_locked(map, start, end);
225	vm_map_unlock_read(map);
226	return (error);
227}
228
229/*
230 * uvm_vsunlock_device: unwire user memory wired by uvm_vslock_device()
231 *
232 * - called from physio
233 */
234void
235uvm_vsunlock_device(struct proc *p, void *addr, size_t len, void *map)
236{
237	vaddr_t start, end;
238	vaddr_t kva;
239	vsize_t sz;
240
241	start = trunc_page((vaddr_t)addr);
242	end = round_page((vaddr_t)addr + len);
243	KASSERT(end > start);
244	sz = end - start;
245
246	if (map)
247		copyout(map, addr, len);
248
249	uvm_fault_unwire_locked(&p->p_vmspace->vm_map, start, end);
250	vm_map_unlock_read(&p->p_vmspace->vm_map);
251
252	if (!map)
253		return;
254
255	kva = trunc_page((vaddr_t)map);
256	uvm_km_pgremove_intrsafe(kva, kva + sz);
257	pmap_kremove(kva, sz);
258	pmap_update(pmap_kernel());
259	uvm_km_free(kernel_map, kva, sz);
260}
261
262/*
263 * uvm_uarea_alloc: allocate the u-area for a new thread
264 */
265vaddr_t
266uvm_uarea_alloc(void)
267{
268	vaddr_t uaddr;
269
270	uaddr = uvm_km_kmemalloc_pla(kernel_map, uvm.kernel_object, USPACE,
271	    USPACE_ALIGN, UVM_KMF_ZERO,
272	    no_constraint.ucr_low, no_constraint.ucr_high,
273	    0, 0, USPACE/PAGE_SIZE);
274
275	return (uaddr);
276}
277
278/*
279 * uvm_uarea_free: free a dead thread's stack
280 *
281 * - the thread passed to us is a dead thread; we
282 *   are running on a different context now (the reaper).
283 */
284void
285uvm_uarea_free(struct proc *p)
286{
287	uvm_km_free(kernel_map, (vaddr_t)p->p_addr, USPACE);
288	p->p_addr = NULL;
289}
290
291/*
292 * uvm_exit: exit a virtual address space
293 */
294void
295uvm_exit(struct process *pr)
296{
297	struct vmspace *vm = pr->ps_vmspace;
298
299	pr->ps_vmspace = NULL;
300	uvmspace_free(vm);
301}
302
303/*
304 * uvm_init_limit: init per-process VM limits
305 *
306 * - called for process 0 and then inherited by all others.
307 */
308void
309uvm_init_limits(struct plimit *limit0)
310{
311	/*
312	 * Set up the initial limits on process VM.  Set the maximum
313	 * resident set size to be all of (reasonably) available memory.
314	 * This causes any single, large process to start random page
315	 * replacement once it fills memory.
316	 */
317	limit0->pl_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
318	limit0->pl_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
319	limit0->pl_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
320	limit0->pl_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
321	limit0->pl_rlimit[RLIMIT_RSS].rlim_cur = ptoa(uvmexp.free);
322}
323
324#ifdef __HAVE_PMAP_COLLECT
325
326#ifdef DEBUG
327int	enableswap = 1;
328int	swapdebug = 0;
329#define	SDB_FOLLOW	1
330#define SDB_SWAPIN	2
331#define SDB_SWAPOUT	4
332#endif
333
334
335/*
336 * swapout_threads: find threads that can be swapped
337 *
338 * - called by the pagedaemon
339 * - try and swap at least one process
340 * - processes that are sleeping or stopped for maxslp or more seconds
341 *   are swapped... otherwise the longest-sleeping or stopped process
342 *   is swapped, otherwise the longest resident process...
343 */
344void
345uvm_swapout_threads(void)
346{
347	struct process *pr;
348	struct proc *p, *slpp;
349	struct process *outpr;
350	int outpri;
351	int didswap = 0;
352	extern int maxslp;
353	/* XXXCDC: should move off to uvmexp. or uvm., also in uvm_meter */
354
355#ifdef DEBUG
356	if (!enableswap)
357		return;
358#endif
359
360	/*
361	 * outpr/outpri  : stop/sleep process whose most active thread has
362	 *	the largest sleeptime < maxslp
363	 */
364	outpr = NULL;
365	outpri = 0;
366	LIST_FOREACH(pr, &allprocess, ps_list) {
367		if (pr->ps_flags & (PS_SYSTEM | PS_EXITING))
368			continue;
369
370		/*
371		 * slpp: the sleeping or stopped thread in pr with
372		 * the smallest p_slptime
373		 */
374		slpp = NULL;
375		TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
376			switch (p->p_stat) {
377			case SRUN:
378			case SONPROC:
379				goto next_process;
380
381			case SSLEEP:
382			case SSTOP:
383				if (slpp == NULL ||
384				    slpp->p_slptime < p->p_slptime)
385					slpp = p;
386				continue;
387			}
388		}
389
390		if (slpp != NULL) {
391			if (slpp->p_slptime >= maxslp) {
392				pmap_collect(pr->ps_vmspace->vm_map.pmap);
393				didswap++;
394			} else if (slpp->p_slptime > outpri) {
395				outpr = pr;
396				outpri = slpp->p_slptime;
397			}
398		}
399next_process:	;
400	}
401
402	/*
403	 * If we didn't get rid of any real duds, toss out the next most
404	 * likely sleeping/stopped or running candidate.  We only do this
405	 * if we are real low on memory since we don't gain much by doing
406	 * it.
407	 */
408	if (didswap == 0 && uvmexp.free <= atop(round_page(USPACE)) &&
409	    outpr != NULL) {
410#ifdef DEBUG
411		if (swapdebug & SDB_SWAPOUT)
412			printf("swapout_threads: no duds, try procpr %p\n",
413			    outpr);
414#endif
415		pmap_collect(outpr->ps_vmspace->vm_map.pmap);
416	}
417}
418
419#endif	/* __HAVE_PMAP_COLLECT */
420
421/*
422 * uvm_atopg: convert KVAs back to their page structures.
423 */
424struct vm_page *
425uvm_atopg(vaddr_t kva)
426{
427	struct vm_page *pg;
428	paddr_t pa;
429	boolean_t rv;
430
431	rv = pmap_extract(pmap_kernel(), kva, &pa);
432	KASSERT(rv);
433	pg = PHYS_TO_VM_PAGE(pa);
434	KASSERT(pg != NULL);
435	return (pg);
436}
437
438void
439uvm_pause(void)
440{
441	static unsigned int toggle;
442	if (toggle++ > 128) {
443		toggle = 0;
444		KERNEL_UNLOCK();
445		KERNEL_LOCK();
446	}
447	sched_pause(preempt);
448}
449
450#ifndef SMALL_KERNEL
451int
452fill_vmmap(struct process *pr, struct kinfo_vmentry *kve,
453    size_t *lenp)
454{
455	struct vm_map *map;
456
457	if (pr != NULL)
458		map = &pr->ps_vmspace->vm_map;
459	else
460		map = kernel_map;
461	return uvm_map_fill_vmmap(map, kve, lenp);
462}
463#endif
464