1/*
2 *
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/types.h>
28#include <sys/cmn_err.h>
29#include <sys/vm.h>
30#include <sys/mman.h>
31#include <vm/vm_dep.h>
32#include <vm/seg_kmem.h>
33#include <vm/seg_kpm.h>
34#include <sys/mem_config.h>
35#include <sys/sysmacros.h>
36
37extern pgcnt_t pp_dummy_npages;
38extern pfn_t *pp_dummy_pfn;	/* Array of dummy pfns. */
39
40extern kmutex_t memseg_lists_lock;
41extern struct memseg *memseg_va_avail;
42extern struct memseg *memseg_alloc();
43
44extern page_t *ppvm_base;
45extern pgcnt_t ppvm_size;
46
47static int sun4v_memseg_debug;
48
49extern struct memseg *memseg_reuse(pgcnt_t);
50extern void remap_to_dummy(caddr_t, pgcnt_t);
51
52/*
53 * The page_t memory for incoming pages is allocated from existing memory
54 * which can create a potential situation where memory addition fails
55 * because of shortage of existing memory.  To mitigate this situation
56 * some memory is always reserved ahead of time for page_t allocation.
57 * Each 4MB of reserved page_t's guarantees a 256MB (x64) addition without
58 * page_t allocation.  The added 256MB added memory could theoretically
59 * allow an addition of 16GB.
60 */
61#define	RSV_SIZE	0x40000000	/* add size with rsrvd page_t's 1G */
62
63#ifdef	DEBUG
64#define	MEMSEG_DEBUG(args...) if (sun4v_memseg_debug) printf(args)
65#else
66#define	MEMSEG_DEBUG(...)
67#endif
68
69/*
70 * The page_t's for the incoming memory are allocated from
71 * existing pages.
72 */
73/*ARGSUSED*/
74int
75memseg_alloc_meta(pfn_t base, pgcnt_t npgs, void **ptp, pgcnt_t *metap)
76{
77	page_t		*pp, *opp, *epp;
78	pgcnt_t		metapgs;
79	int		i;
80	struct seg	kseg;
81	caddr_t		vaddr;
82
83	/*
84	 * Verify incoming memory is within supported DR range.
85	 */
86	if ((base + npgs) * sizeof (page_t) > ppvm_size)
87		return (KPHYSM_ENOTSUP);
88
89	opp = pp = ppvm_base + base;
90	epp = pp + npgs;
91	metapgs = btopr(npgs * sizeof (page_t));
92
93	if (!IS_P2ALIGNED((uint64_t)pp, PAGESIZE) &&
94	    page_find(&mpvp, (u_offset_t)pp)) {
95		/*
96		 * Another memseg has page_t's in the same
97		 * page which 'pp' resides.  This would happen
98		 * if PAGESIZE is not an integral multiple of
99		 * sizeof (page_t) and therefore 'pp'
100		 * does not start on a page boundry.
101		 *
102		 * Since the other memseg's pages_t's still
103		 * map valid pages, skip allocation of this page.
104		 * Advance 'pp' to the next page which should
105		 * belong only to the incoming memseg.
106		 *
107		 * If the last page_t in the current page
108		 * crosses a page boundary, this should still
109		 * work.  The first part of the page_t is
110		 * already allocated.  The second part of
111		 * the page_t will be allocated below.
112		 */
113		ASSERT(PAGESIZE % sizeof (page_t));
114		pp = (page_t *)P2ROUNDUP((uint64_t)pp, PAGESIZE);
115		metapgs--;
116	}
117
118	if (!IS_P2ALIGNED((uint64_t)epp, PAGESIZE) &&
119	    page_find(&mpvp, (u_offset_t)epp)) {
120		/*
121		 * Another memseg has page_t's in the same
122		 * page which 'epp' resides.  This would happen
123		 * if PAGESIZE is not an integral multiple of
124		 * sizeof (page_t) and therefore 'epp'
125		 * does not start on a page boundry.
126		 *
127		 * Since the other memseg's pages_t's still
128		 * map valid pages, skip allocation of this page.
129		 */
130		ASSERT(PAGESIZE % sizeof (page_t));
131		metapgs--;
132	}
133
134	ASSERT(IS_P2ALIGNED((uint64_t)pp, PAGESIZE));
135
136	/*
137	 * Back metadata space with physical pages.
138	 */
139	kseg.s_as = &kas;
140	vaddr = (caddr_t)pp;
141
142	for (i = 0; i < metapgs; i++)
143		if (page_find(&mpvp, (u_offset_t)(vaddr + i * PAGESIZE)))
144			panic("page_find(0x%p, %p)\n",
145			    (void *)&mpvp, (void *)(vaddr + i * PAGESIZE));
146
147	/*
148	 * Allocate the metadata pages; these are the pages that will
149	 * contain the page_t's for the incoming memory.
150	 */
151	if ((page_create_va(&mpvp, (u_offset_t)pp, ptob(metapgs),
152	    PG_NORELOC | PG_EXCL, &kseg, vaddr)) == NULL) {
153		MEMSEG_DEBUG("memseg_alloc_meta: can't get 0x%ld metapgs",
154		    metapgs);
155		return (KPHYSM_ERESOURCE);
156	}
157
158	ASSERT(ptp);
159	ASSERT(metap);
160
161	*ptp = (void *)opp;
162	*metap = metapgs;
163
164	return (KPHYSM_OK);
165}
166
167void
168memseg_free_meta(void *ptp, pgcnt_t metapgs)
169{
170	int i;
171	page_t *pp;
172	u_offset_t off;
173
174	if (!metapgs)
175		return;
176
177	off = (u_offset_t)ptp;
178
179	ASSERT(off);
180	ASSERT(IS_P2ALIGNED((uint64_t)off, PAGESIZE));
181
182	MEMSEG_DEBUG("memseg_free_meta: off=0x%lx metapgs=0x%lx\n",
183	    (uint64_t)off, metapgs);
184	/*
185	 * Free pages allocated during add.
186	 */
187	for (i = 0; i < metapgs; i++) {
188		pp = page_find(&mpvp, off);
189		ASSERT(pp);
190		ASSERT(pp->p_szc == 0);
191		page_io_unlock(pp);
192		page_destroy(pp, 0);
193		off += PAGESIZE;
194	}
195}
196
197pfn_t
198memseg_get_metapfn(void *ptp, pgcnt_t metapg)
199{
200	page_t *pp;
201	u_offset_t off;
202
203	off = (u_offset_t)ptp + ptob(metapg);
204
205	ASSERT(off);
206	ASSERT(IS_P2ALIGNED((uint64_t)off, PAGESIZE));
207
208	pp = page_find(&mpvp, off);
209	ASSERT(pp);
210	ASSERT(pp->p_szc == 0);
211	ASSERT(pp->p_pagenum != PFN_INVALID);
212
213	return (pp->p_pagenum);
214}
215
216/*
217 * Remap a memseg's page_t's to dummy pages.  Skip the low/high
218 * ends of the range if they are already in use.
219 */
220void
221memseg_remap_meta(struct memseg *seg)
222{
223	int i;
224	u_offset_t off;
225	page_t *pp;
226#if 0
227	page_t *epp;
228#endif
229	pgcnt_t metapgs;
230
231	metapgs = btopr(MSEG_NPAGES(seg) * sizeof (page_t));
232	ASSERT(metapgs);
233	pp = seg->pages;
234	seg->pages_end = seg->pages_base;
235#if 0
236	epp = seg->epages;
237
238	/*
239	 * This code cannot be tested as the kernel does not compile
240	 * when page_t size is changed.  It is left here as a starting
241	 * point if the unaligned page_t size needs to be supported.
242	 */
243
244	if (!IS_P2ALIGNED((uint64_t)pp, PAGESIZE) &&
245	    page_find(&mpvp, (u_offset_t)(pp - 1)) && !page_deleted(pp - 1)) {
246		/*
247		 * Another memseg has page_t's in the same
248		 * page which 'pp' resides.  This would happen
249		 * if PAGESIZE is not an integral multiple of
250		 * sizeof (page_t) and therefore 'seg->pages'
251		 * does not start on a page boundry.
252		 *
253		 * Since the other memseg's pages_t's still
254		 * map valid pages, skip remap of this page.
255		 * Advance 'pp' to the next page which should
256		 * belong only to the outgoing memseg.
257		 *
258		 * If the last page_t in the current page
259		 * crosses a page boundary, this should still
260		 * work.  The first part of the page_t is
261		 * valid since memseg_lock_delete_all() has
262		 * been called.  The second part of the page_t
263		 * will be remapped to the corresponding
264		 * dummy page below.
265		 */
266		ASSERT(PAGESIZE % sizeof (page_t));
267		pp = (page_t *)P2ROUNDUP((uint64_t)pp, PAGESIZE);
268		metapgs--;
269	}
270
271	if (!IS_P2ALIGNED((uint64_t)epp, PAGESIZE) &&
272	    page_find(&mpvp, (u_offset_t)epp) && !page_deleted(epp)) {
273		/*
274		 * Another memseg has page_t's in the same
275		 * page which 'epp' resides.  This would happen
276		 * if PAGESIZE is not an integral multiple of
277		 * sizeof (page_t) and therefore 'seg->epages'
278		 * does not start on a page boundry.
279		 *
280		 * Since the other memseg's pages_t's still
281		 * map valid pages, skip remap of this page.
282		 */
283		ASSERT(PAGESIZE % sizeof (page_t));
284		metapgs--;
285	}
286#endif
287	ASSERT(IS_P2ALIGNED((uint64_t)pp, PAGESIZE));
288
289	remap_to_dummy((caddr_t)pp, metapgs);
290
291	off = (u_offset_t)pp;
292
293	MEMSEG_DEBUG("memseg_remap_meta: off=0x%lx metapgs=0x%lx\n",
294	    (uint64_t)off, metapgs);
295	/*
296	 * Free pages allocated during add.
297	 */
298	for (i = 0; i < metapgs; i++) {
299		pp = page_find(&mpvp, off);
300		ASSERT(pp);
301		ASSERT(pp->p_szc == 0);
302		page_io_unlock(pp);
303		page_destroy(pp, 0);
304		off += PAGESIZE;
305	}
306}
307