memnode.c revision 4769:291956cbfc21
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/systm.h>
29#include <sys/sysmacros.h>
30#include <sys/bootconf.h>
31#include <sys/atomic.h>
32#include <sys/lgrp.h>
33#include <sys/memlist.h>
34#include <sys/memnode.h>
35#include <sys/platform_module.h>
36#include <vm/vm_dep.h>
37
38int	max_mem_nodes = 1;
39
40struct mem_node_conf mem_node_config[MAX_MEM_NODES];
41int mem_node_pfn_shift;
42/*
43 * num_memnodes should be updated atomically and always >=
44 * the number of bits in memnodes_mask or the algorithm may fail.
45 */
46uint16_t num_memnodes;
47mnodeset_t memnodes_mask; /* assumes 8*(sizeof(mnodeset_t)) >= MAX_MEM_NODES */
48
49/*
50 * If set, mem_node_physalign should be a power of two, and
51 * should reflect the minimum address alignment of each node.
52 */
53uint64_t mem_node_physalign;
54
55/*
56 * Platform hooks we will need.
57 */
58
59#pragma weak plat_build_mem_nodes
60#pragma weak plat_slice_add
61#pragma weak plat_slice_del
62
63/*
64 * Adjust the memnode config after a DR operation.
65 *
66 * It is rather tricky to do these updates since we can't
67 * protect the memnode structures with locks, so we must
68 * be mindful of the order in which updates and reads to
69 * these values can occur.
70 */
71
72void
73mem_node_add_slice(pfn_t start, pfn_t end)
74{
75	int mnode;
76	mnodeset_t newmask, oldmask;
77
78	/*
79	 * DR will pass us the first pfn that is allocatable.
80	 * We need to round down to get the real start of
81	 * the slice.
82	 */
83	if (mem_node_physalign) {
84		start &= ~(btop(mem_node_physalign) - 1);
85		end = roundup(end, btop(mem_node_physalign)) - 1;
86	}
87
88	if (&plat_slice_add)
89		plat_slice_add(start, end);
90
91	mnode = PFN_2_MEM_NODE(start);
92	ASSERT(mnode < max_mem_nodes);
93
94	if (cas32((uint32_t *)&mem_node_config[mnode].exists, 0, 1)) {
95		/*
96		 * Add slice to existing node.
97		 */
98		if (start < mem_node_config[mnode].physbase)
99			mem_node_config[mnode].physbase = start;
100		if (end > mem_node_config[mnode].physmax)
101			mem_node_config[mnode].physmax = end;
102	} else {
103		mem_node_config[mnode].physbase = start;
104		mem_node_config[mnode].physmax = end;
105		atomic_add_16(&num_memnodes, 1);
106		do {
107			oldmask = memnodes_mask;
108			newmask = memnodes_mask | (1ull << mnode);
109		} while (cas64(&memnodes_mask, oldmask, newmask) != oldmask);
110	}
111
112	/*
113	 * Inform the common lgrp framework about the new memory
114	 */
115	lgrp_config(LGRP_CONFIG_MEM_ADD, mnode, MEM_NODE_2_LGRPHAND(mnode));
116}
117
118/* ARGSUSED */
119void
120mem_node_pre_del_slice(pfn_t start, pfn_t end)
121{
122	int mnode = PFN_2_MEM_NODE(start);
123
124	ASSERT(mnode < max_mem_nodes);
125	ASSERT(mem_node_config[mnode].exists == 1);
126}
127
128/*
129 * Remove a PFN range from a memnode.  On some platforms,
130 * the memnode will be created with physbase at the first
131 * allocatable PFN, but later deleted with the MC slice
132 * base address converted to a PFN, in which case we need
133 * to assume physbase and up.
134 */
135void
136mem_node_post_del_slice(pfn_t start, pfn_t end, int cancelled)
137{
138	int mnode;
139	pgcnt_t delta_pgcnt, node_size;
140	mnodeset_t omask, nmask;
141
142	if (mem_node_physalign) {
143		start &= ~(btop(mem_node_physalign) - 1);
144		end = roundup(end, btop(mem_node_physalign)) - 1;
145	}
146	mnode = PFN_2_MEM_NODE(start);
147
148	ASSERT(mnode < max_mem_nodes);
149	ASSERT(mem_node_config[mnode].exists == 1);
150
151	if (!cancelled) {
152		delta_pgcnt = end - start;
153		node_size = mem_node_config[mnode].physmax -
154		    mem_node_config[mnode].physbase;
155
156		if (node_size > delta_pgcnt) {
157			/*
158			 * Subtract the slice from the memnode.
159			 */
160			if (start <= mem_node_config[mnode].physbase)
161				mem_node_config[mnode].physbase = end + 1;
162			ASSERT(end <= mem_node_config[mnode].physmax);
163			if (end == mem_node_config[mnode].physmax)
164				mem_node_config[mnode].physmax = start - 1;
165		} else {
166			/*
167			 * Let the common lgrp framework know this mnode is
168			 * leaving
169			 */
170			lgrp_config(LGRP_CONFIG_MEM_DEL,
171			    mnode, MEM_NODE_2_LGRPHAND(mnode));
172
173			/*
174			 * Delete the whole node.
175			 */
176			ASSERT(MNODE_PGCNT(mnode) == 0);
177			do {
178				omask = memnodes_mask;
179				nmask = omask & ~(1ull << mnode);
180			} while (cas64(&memnodes_mask, omask, nmask) != omask);
181			atomic_add_16(&num_memnodes, -1);
182			mem_node_config[mnode].exists = 0;
183		}
184
185		if (&plat_slice_del)
186			plat_slice_del(start, end);
187	}
188}
189
190void
191startup_build_mem_nodes(struct memlist *list)
192{
193	pfn_t	start, end;
194
195	/* LINTED: ASSERT will always true or false */
196	ASSERT(NBBY * sizeof (mnodeset_t) >= max_mem_nodes);
197
198	if (&plat_build_mem_nodes) {
199		plat_build_mem_nodes(list);
200	} else {
201		/*
202		 * Boot install lists are arranged <addr, len>, ...
203		 */
204		while (list) {
205			start = list->address >> PAGESHIFT;
206			if (start > physmax)
207				continue;
208			end = (list->address + list->size - 1) >> PAGESHIFT;
209			if (end > physmax)
210				end = physmax;
211			mem_node_add_slice(start, end);
212			list = list->next;
213		}
214		mem_node_physalign = 0;
215		mem_node_pfn_shift = 0;
216	}
217}
218
219/*
220 * Allocate an unassigned memnode.
221 */
222int
223mem_node_alloc()
224{
225	int mnode;
226	mnodeset_t newmask, oldmask;
227
228	/*
229	 * Find an unused memnode.  Update it atomically to prevent
230	 * a first time memnode creation race.
231	 */
232	for (mnode = 0; mnode < max_mem_nodes; mnode++)
233		if (cas32((uint32_t *)&mem_node_config[mnode].exists,
234		    0, 1) == 0)
235			break;
236
237	if (mnode >= max_mem_nodes)
238		panic("Out of free memnodes\n");
239
240	mem_node_config[mnode].physbase = (pfn_t)-1l;
241	mem_node_config[mnode].physmax = 0;
242	atomic_add_16(&num_memnodes, 1);
243	do {
244		oldmask = memnodes_mask;
245		newmask = memnodes_mask | (1ull << mnode);
246	} while (cas64(&memnodes_mask, oldmask, newmask) != oldmask);
247
248	return (mnode);
249}
250
251/*
252 * Find the intersection between a memnode and a memlist
253 * and returns the number of pages that overlap.
254 *
255 * Assumes the list is protected from DR operations by
256 * the memlist lock.
257 */
258pgcnt_t
259mem_node_memlist_pages(int mnode, struct memlist *mlist)
260{
261	pfn_t		base, end;
262	pfn_t		cur_base, cur_end;
263	pgcnt_t		npgs;
264	struct memlist	*pmem;
265
266	base = mem_node_config[mnode].physbase;
267	end = mem_node_config[mnode].physmax;
268	npgs = 0;
269
270	memlist_read_lock();
271
272	for (pmem = mlist; pmem; pmem = pmem->next) {
273		cur_base = btop(pmem->address);
274		cur_end = cur_base + btop(pmem->size) - 1;
275		if (end < cur_base || base > cur_end)
276			continue;
277		npgs = npgs + (MIN(cur_end, end) -
278		    MAX(cur_base, base)) + 1;
279	}
280
281	memlist_read_unlock();
282
283	return (npgs);
284}
285