1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27/*	  All Rights Reserved  	*/
28
29
30#pragma ident	"%Z%%M%	%I%	%E% SMI"
31
32#include <sys/types.h>
33#include <sys/bitmap.h>
34#include <sys/sysmacros.h>
35#include <sys/kmem.h>
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/user.h>
39#include <sys/unistd.h>
40#include <sys/errno.h>
41#include <sys/proc.h>
42#include <sys/mman.h>
43#include <sys/tuneable.h>
44#include <sys/cmn_err.h>
45#include <sys/cred.h>
46#include <sys/vmsystm.h>
47#include <sys/debug.h>
48#include <sys/policy.h>
49
50#include <vm/as.h>
51#include <vm/seg.h>
52
53static uint_t mem_getpgszc(size_t);
54
55/*
56 * Memory control operations
57 */
58int
59memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask)
60{
61	struct as *as = ttoproc(curthread)->p_as;
62	struct proc *p = ttoproc(curthread);
63	size_t pgsz;
64	uint_t szc, oszc, pgcmd;
65	int error = 0;
66	faultcode_t fc;
67	uintptr_t iarg;
68	STRUCT_DECL(memcntl_mha, mha);
69
70	if (mask)
71		return (set_errno(EINVAL));
72	if ((cmd == MC_LOCKAS) || (cmd == MC_UNLOCKAS)) {
73		if ((addr != 0) || (len != 0)) {
74			return (set_errno(EINVAL));
75		}
76	} else if (cmd != MC_HAT_ADVISE) {
77		if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) {
78			return (set_errno(EINVAL));
79		}
80		/*
81		 * We're only concerned with the address range
82		 * here, not the protections.  The protections
83		 * are only used as a "filter" in this code,
84		 * they aren't set or modified here.
85		 */
86		if (valid_usr_range(addr, len, 0, as,
87		    as->a_userlimit) != RANGE_OKAY) {
88			return (set_errno(ENOMEM));
89		}
90	}
91
92	if (cmd == MC_HAT_ADVISE) {
93		if (attr != 0 || mask != 0) {
94			return (set_errno(EINVAL));
95		}
96
97	} else {
98		if ((VALID_ATTR & attr) != attr) {
99			return (set_errno(EINVAL));
100		}
101		if ((attr & SHARED) && (attr & PRIVATE)) {
102			return (set_errno(EINVAL));
103		}
104		if (((cmd == MC_LOCKAS) || (cmd == MC_LOCK) ||
105		    (cmd == MC_UNLOCKAS) || (cmd == MC_UNLOCK)) &&
106		    (error = secpolicy_lock_memory(CRED())) != 0)
107			return (set_errno(error));
108	}
109	if (attr) {
110		attr |= PROT_USER;
111	}
112
113	switch (cmd) {
114	case MC_SYNC:
115		/*
116		 * MS_SYNC used to be defined to be zero but is now non-zero.
117		 * For binary compatibility we still accept zero
118		 * (the absence of MS_ASYNC) to mean the same thing.
119		 */
120		iarg = (uintptr_t)arg;
121		if ((iarg & ~MS_INVALIDATE) == 0)
122			iarg |= MS_SYNC;
123
124		if (((iarg & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) != 0) ||
125			((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))) {
126			error = set_errno(EINVAL);
127		} else {
128			error = as_ctl(as, addr, len, cmd, attr, iarg, NULL, 0);
129			if (error) {
130				(void) set_errno(error);
131			}
132		}
133		return (error);
134	case MC_LOCKAS:
135		if ((uintptr_t)arg & ~(MCL_FUTURE|MCL_CURRENT) ||
136		    (uintptr_t)arg == 0) {
137			return (set_errno(EINVAL));
138		}
139		break;
140	case MC_LOCK:
141	case MC_UNLOCKAS:
142	case MC_UNLOCK:
143		break;
144	case MC_HAT_ADVISE:
145		/*
146		 * Set prefered page size.
147		 */
148		STRUCT_INIT(mha, get_udatamodel());
149		if (copyin(arg, STRUCT_BUF(mha), STRUCT_SIZE(mha))) {
150			return (set_errno(EFAULT));
151		}
152
153		pgcmd = STRUCT_FGET(mha, mha_cmd);
154
155		/*
156		 * Currently only MHA_MAPSIZE_VA, MHA_MAPSIZE_STACK
157		 * and MHA_MAPSIZE_BSSBRK are supported. Only one
158		 * command may be specified at a time.
159		 */
160		if ((~(MHA_MAPSIZE_VA|MHA_MAPSIZE_STACK|MHA_MAPSIZE_BSSBRK) &
161		    pgcmd) || pgcmd == 0 || !ISP2(pgcmd) ||
162		    STRUCT_FGET(mha, mha_flags))
163			return (set_errno(EINVAL));
164
165		pgsz = STRUCT_FGET(mha, mha_pagesize);
166
167		/*
168		 * call platform specific map_pgsz() routine to get the
169		 * optimal pgsz if pgsz is 0.
170		 *
171		 * For stack and heap operations addr and len must be zero.
172		 */
173		if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) {
174			if (addr != NULL || len != 0) {
175				return (set_errno(EINVAL));
176			}
177
178			/*
179			 * Disable autompss for this process unless pgsz == 0,
180			 * which means the system should pick.  In the
181			 * pgsz == 0 case, leave the SAUTOLPG setting alone, as
182			 * we don't want to enable it when someone has
183			 * disabled automatic large page selection for the
184			 * whole system.
185			 */
186			mutex_enter(&p->p_lock);
187			if (pgsz != 0) {
188				p->p_flag &= ~SAUTOLPG;
189			}
190			mutex_exit(&p->p_lock);
191
192			as_rangelock(as);
193
194			if (pgsz == 0) {
195				int	type;
196
197				if (pgcmd == MHA_MAPSIZE_BSSBRK)
198					type = MAPPGSZ_HEAP;
199				else
200					type = MAPPGSZ_STK;
201
202				pgsz = map_pgsz(type, p, 0, 0, 1);
203			}
204		} else {
205			/*
206			 * addr and len must be valid for range specified.
207			 */
208			if (valid_usr_range(addr, len, 0, as,
209			    as->a_userlimit) != RANGE_OKAY) {
210				return (set_errno(ENOMEM));
211			}
212			/*
213			 * Note that we don't disable automatic large page
214			 * selection for anon segments based on use of
215			 * memcntl().
216			 */
217			if (pgsz == 0) {
218				error = as_set_default_lpsize(as, addr, len);
219				if (error) {
220					(void) set_errno(error);
221				}
222				return (error);
223			}
224
225			/*
226			 * addr and len must be prefered page size aligned
227			 */
228			if (!IS_P2ALIGNED(addr, pgsz) ||
229			    !IS_P2ALIGNED(len, pgsz)) {
230				return (set_errno(EINVAL));
231			}
232		}
233
234		szc = mem_getpgszc(pgsz);
235		if (szc == (uint_t)-1) {
236			if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK))
237			    != 0) {
238				as_rangeunlock(as);
239			}
240			return (set_errno(EINVAL));
241		}
242
243		/*
244		 * For stack and heap operations we first need to pad
245		 * out existing range (create new mappings) to the new
246		 * prefered page size boundary. Also the start of the
247		 * .bss for the heap or user's stack base may not be on
248		 * the new prefered page size boundary. For these cases
249		 * we align the base of the request on the new prefered
250		 * page size.
251		 */
252		if (pgcmd & MHA_MAPSIZE_BSSBRK) {
253			if (szc == p->p_brkpageszc) {
254				as_rangeunlock(as);
255				return (0);
256			}
257			if (szc > p->p_brkpageszc) {
258				error = brk_internal(p->p_brkbase
259				    + p->p_brksize, szc);
260				if (error) {
261					as_rangeunlock(as);
262					return (set_errno(error));
263				}
264			}
265			/*
266			 * It is possible for brk_internal to silently fail to
267			 * promote the heap size, so don't panic or ASSERT.
268			 */
269			if (!IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz)) {
270				as_rangeunlock(as);
271				return (set_errno(ENOMEM));
272			}
273			oszc = p->p_brkpageszc;
274			p->p_brkpageszc = szc;
275
276			addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
277			    pgsz);
278			len = (p->p_brkbase + p->p_brksize) - addr;
279			ASSERT(IS_P2ALIGNED(len, pgsz));
280			/*
281			 * Perhaps no existing pages to promote.
282			 */
283			if (len == 0) {
284				as_rangeunlock(as);
285				return (0);
286			}
287		}
288		/*
289		 * The code below, as does grow.c, assumes stacks always grow
290		 * downward.
291		 */
292		if (pgcmd & MHA_MAPSIZE_STACK) {
293			if (szc == p->p_stkpageszc) {
294				as_rangeunlock(as);
295				return (0);
296			}
297
298			if (szc > p->p_stkpageszc) {
299				error = grow_internal(p->p_usrstack -
300				    p->p_stksize, szc);
301				if (error) {
302					as_rangeunlock(as);
303					return (set_errno(error));
304				}
305			}
306			/*
307			 * It is possible for grow_internal to silently fail to
308			 * promote the stack size, so don't panic or ASSERT.
309			 */
310			if (!IS_P2ALIGNED(p->p_usrstack - p->p_stksize, pgsz)) {
311				as_rangeunlock(as);
312				return (set_errno(ENOMEM));
313			}
314			oszc = p->p_stkpageszc;
315			p->p_stkpageszc = szc;
316
317			addr = p->p_usrstack - p->p_stksize;
318			len = P2ALIGN(p->p_stksize, pgsz);
319
320			/*
321			 * Perhaps nothing to promote.
322			 */
323			if (len == 0 || addr >= p->p_usrstack ||
324			    (addr + len) < addr) {
325				as_rangeunlock(as);
326				return (0);
327			}
328		}
329		ASSERT(IS_P2ALIGNED(addr, pgsz));
330		ASSERT(IS_P2ALIGNED(len, pgsz));
331		error = as_setpagesize(as, addr, len, szc, B_TRUE);
332
333		/*
334		 * On stack or heap failures restore original
335		 * pg size code.
336		 */
337		if (error) {
338			if ((pgcmd & MHA_MAPSIZE_BSSBRK) != 0) {
339				p->p_brkpageszc = oszc;
340			}
341			if ((pgcmd & MHA_MAPSIZE_STACK) != 0) {
342				p->p_stkpageszc = oszc;
343			}
344			(void) set_errno(error);
345		}
346		if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) {
347			as_rangeunlock(as);
348		}
349		return (error);
350	case MC_ADVISE:
351		if ((uintptr_t)arg == MADV_FREE) {
352			len &= PAGEMASK;
353		}
354		switch ((uintptr_t)arg) {
355		case MADV_WILLNEED:
356			fc = as_faulta(as, addr, len);
357			if (fc) {
358				if (FC_CODE(fc) == FC_OBJERR)
359					error = set_errno(FC_ERRNO(fc));
360				else if (FC_CODE(fc) == FC_NOMAP)
361					error = set_errno(ENOMEM);
362				else
363					error = set_errno(EINVAL);
364				return (error);
365			}
366			break;
367
368		case MADV_DONTNEED:
369			/*
370			 * For now, don't need is turned into an as_ctl(MC_SYNC)
371			 * operation flagged for async invalidate.
372			 */
373			error = as_ctl(as, addr, len, MC_SYNC, attr,
374			    MS_ASYNC | MS_INVALIDATE, NULL, 0);
375			if (error)
376				(void) set_errno(error);
377			return (error);
378
379		default:
380			error = as_ctl(as, addr, len, cmd, attr,
381			    (uintptr_t)arg, NULL, 0);
382			if (error)
383				(void) set_errno(error);
384			return (error);
385		}
386		break;
387	default:
388		return (set_errno(EINVAL));
389	}
390
391	error = as_ctl(as, addr, len, cmd, attr, (uintptr_t)arg, NULL, 0);
392
393	if (error)
394		(void) set_errno(error);
395	return (error);
396}
397
398/*
399 * Return page size code for page size passed in. If
400 * matching page size not found or supported, return -1.
401 */
402static uint_t
403mem_getpgszc(size_t pgsz) {
404	return ((uint_t)page_szc_user_filtered(pgsz));
405}
406