vmm_mem.c revision 239700
1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD$
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan#include <sys/cdefs.h>
30221828Sgrehan__FBSDID("$FreeBSD$");
31221828Sgrehan
32221828Sgrehan#include <sys/param.h>
33221828Sgrehan#include <sys/lock.h>
34221828Sgrehan#include <sys/mutex.h>
35221828Sgrehan#include <sys/linker.h>
36221828Sgrehan#include <sys/systm.h>
37221828Sgrehan#include <sys/malloc.h>
38221828Sgrehan#include <sys/kernel.h>
39221828Sgrehan
40221828Sgrehan#include <vm/vm.h>
41221828Sgrehan#include <vm/pmap.h>
42221828Sgrehan
43221828Sgrehan#include <machine/md_var.h>
44221828Sgrehan#include <machine/metadata.h>
45221828Sgrehan#include <machine/pc/bios.h>
46221828Sgrehan#include <machine/vmparam.h>
47221828Sgrehan#include <machine/pmap.h>
48221828Sgrehan
49221828Sgrehan#include "vmm_util.h"
50221828Sgrehan#include "vmm_mem.h"
51221828Sgrehan
52221828Sgrehanstatic MALLOC_DEFINE(M_VMM_MEM, "vmm memory", "vmm memory");
53221828Sgrehan
54221828Sgrehan#define	MB		(1024 * 1024)
55221828Sgrehan#define	GB		(1024 * MB)
56221828Sgrehan
57221828Sgrehan#define	VMM_MEM_MAXSEGS	64
58221828Sgrehan
59221828Sgrehan/* protected by vmm_mem_mtx */
60221828Sgrehanstatic struct {
61221828Sgrehan	vm_paddr_t	base;
62221828Sgrehan	vm_size_t	length;
63221828Sgrehan} vmm_mem_avail[VMM_MEM_MAXSEGS];
64221828Sgrehan
65221828Sgrehanstatic int vmm_mem_nsegs;
66239700Sgrehansize_t vmm_mem_total_bytes;
67221828Sgrehan
68221828Sgrehanstatic vm_paddr_t maxaddr;
69221828Sgrehan
70221828Sgrehanstatic struct mtx vmm_mem_mtx;
71221828Sgrehan
72221828Sgrehan/*
73221828Sgrehan * Steal any memory that was deliberately hidden from FreeBSD either by
74221828Sgrehan * the use of MAXMEM kernel config option or the hw.physmem loader tunable.
75221828Sgrehan */
76221828Sgrehanstatic int
77221828Sgrehanvmm_mem_steal_memory(void)
78221828Sgrehan{
79221828Sgrehan	int nsegs;
80221828Sgrehan	caddr_t kmdp;
81221828Sgrehan	uint32_t smapsize;
82221828Sgrehan	uint64_t base, length;
83221828Sgrehan	struct bios_smap *smapbase, *smap, *smapend;
84221828Sgrehan
85221828Sgrehan	/*
86221828Sgrehan	 * Borrowed from hammer_time() and getmemsize() in machdep.c
87221828Sgrehan	 */
88221828Sgrehan	kmdp = preload_search_by_type("elf kernel");
89221828Sgrehan	if (kmdp == NULL)
90221828Sgrehan		kmdp = preload_search_by_type("elf64 kernel");
91221828Sgrehan
92221828Sgrehan	smapbase = (struct bios_smap *)preload_search_info(kmdp,
93221828Sgrehan		MODINFO_METADATA | MODINFOMD_SMAP);
94221828Sgrehan	if (smapbase == NULL)
95221828Sgrehan		panic("No BIOS smap info from loader!");
96221828Sgrehan
97221828Sgrehan	smapsize = *((uint32_t *)smapbase - 1);
98221828Sgrehan	smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
99221828Sgrehan
100239700Sgrehan	vmm_mem_total_bytes = 0;
101221828Sgrehan	nsegs = 0;
102221828Sgrehan	for (smap = smapbase; smap < smapend; smap++) {
103221828Sgrehan		/*
104221828Sgrehan		 * XXX
105221828Sgrehan		 * Assuming non-overlapping, monotonically increasing
106221828Sgrehan		 * memory segments.
107221828Sgrehan		 */
108221828Sgrehan		if (smap->type != SMAP_TYPE_MEMORY)
109221828Sgrehan			continue;
110221828Sgrehan		if (smap->length == 0)
111221828Sgrehan			break;
112221828Sgrehan
113221828Sgrehan		base = roundup(smap->base, NBPDR);
114221828Sgrehan		length = rounddown(smap->length, NBPDR);
115221828Sgrehan
116221828Sgrehan		/* Skip this segment if FreeBSD is using all of it. */
117221828Sgrehan		if (base + length <= ptoa(Maxmem))
118221828Sgrehan			continue;
119221828Sgrehan
120221828Sgrehan		/*
121221828Sgrehan		 * If FreeBSD is using part of this segment then adjust
122221828Sgrehan		 * 'base' and 'length' accordingly.
123221828Sgrehan		 */
124221828Sgrehan		if (base < ptoa(Maxmem)) {
125221828Sgrehan			uint64_t used;
126221828Sgrehan			used = roundup(ptoa(Maxmem), NBPDR) - base;
127221828Sgrehan			base += used;
128221828Sgrehan			length -= used;
129221828Sgrehan		}
130221828Sgrehan
131221828Sgrehan		if (length == 0)
132221828Sgrehan			continue;
133221828Sgrehan
134221828Sgrehan		vmm_mem_avail[nsegs].base = base;
135221828Sgrehan		vmm_mem_avail[nsegs].length = length;
136239700Sgrehan		vmm_mem_total_bytes += length;
137221828Sgrehan
138221828Sgrehan		if (base + length > maxaddr)
139221828Sgrehan			maxaddr = base + length;
140221828Sgrehan
141221828Sgrehan		if (0 && bootverbose) {
142221828Sgrehan			printf("vmm_mem_populate: index %d, base 0x%0lx, "
143221828Sgrehan			       "length %ld\n",
144221828Sgrehan			       nsegs, vmm_mem_avail[nsegs].base,
145221828Sgrehan			       vmm_mem_avail[nsegs].length);
146221828Sgrehan		}
147221828Sgrehan
148221828Sgrehan		nsegs++;
149221828Sgrehan		if (nsegs >= VMM_MEM_MAXSEGS) {
150221828Sgrehan			printf("vmm_mem_populate: maximum number of vmm memory "
151221828Sgrehan			       "segments reached!\n");
152221828Sgrehan			return (ENOSPC);
153221828Sgrehan		}
154221828Sgrehan	}
155221828Sgrehan
156221828Sgrehan	vmm_mem_nsegs = nsegs;
157221828Sgrehan
158221828Sgrehan	return (0);
159221828Sgrehan}
160221828Sgrehan
161221828Sgrehanstatic void
162221828Sgrehanvmm_mem_direct_map(vm_paddr_t start, vm_paddr_t end)
163221828Sgrehan{
164221828Sgrehan	vm_paddr_t addr, remaining;
165221828Sgrehan	int pdpi, pdi, superpage_size;
166221828Sgrehan	pml4_entry_t *pml4p;
167221828Sgrehan	pdp_entry_t *pdp;
168221828Sgrehan	pd_entry_t *pd;
169221828Sgrehan	uint64_t page_attr_bits;
170221828Sgrehan
171221828Sgrehan	if (end >= NBPML4)
172221828Sgrehan		panic("Cannot map memory beyond %ldGB", NBPML4 / GB);
173221828Sgrehan
174221940Sjhb	if (vmm_supports_1G_pages())
175221828Sgrehan		superpage_size = NBPDP;
176221828Sgrehan	else
177221828Sgrehan		superpage_size = NBPDR;
178221828Sgrehan
179221828Sgrehan	/*
180221828Sgrehan	 * Get the page directory pointer page that contains the direct
181221828Sgrehan	 * map address mappings.
182221828Sgrehan	 */
183221828Sgrehan	pml4p = kernel_pmap->pm_pml4;
184221828Sgrehan	pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4p[DMPML4I] & ~PAGE_MASK);
185221828Sgrehan
186221828Sgrehan	page_attr_bits = PG_RW | PG_V | PG_PS | PG_G;
187221828Sgrehan	addr = start;
188221828Sgrehan	while (addr < end) {
189221828Sgrehan		remaining = end - addr;
190221828Sgrehan		pdpi = addr / NBPDP;
191221828Sgrehan		if (superpage_size == NBPDP &&
192221828Sgrehan		    remaining >= NBPDP &&
193221828Sgrehan		    addr % NBPDP == 0) {
194221828Sgrehan			/*
195221828Sgrehan			 * If there isn't a mapping for this address then
196221828Sgrehan			 * create one but if there is one already make sure
197221828Sgrehan			 * it matches what we expect it to be.
198221828Sgrehan			 */
199221828Sgrehan			if (pdp[pdpi] == 0) {
200221828Sgrehan				pdp[pdpi] = addr | page_attr_bits;
201221828Sgrehan				if (0 && bootverbose) {
202221828Sgrehan					printf("vmm_mem_populate: mapping "
203221828Sgrehan					       "0x%lx with 1GB page at "
204221828Sgrehan					       "pdpi %d\n", addr, pdpi);
205221828Sgrehan				}
206221828Sgrehan			} else {
207221828Sgrehan				pdp_entry_t pdpe = pdp[pdpi];
208221828Sgrehan				if ((pdpe & ~PAGE_MASK) != addr ||
209221828Sgrehan				    (pdpe & page_attr_bits) != page_attr_bits) {
210221828Sgrehan					panic("An invalid mapping 0x%016lx "
211221828Sgrehan					      "already exists for 0x%016lx\n",
212221828Sgrehan					      pdpe, addr);
213221828Sgrehan				}
214221828Sgrehan			}
215221828Sgrehan			addr += NBPDP;
216221828Sgrehan		} else {
217221828Sgrehan			if (remaining < NBPDR) {
218221828Sgrehan				panic("vmm_mem_populate: remaining (%ld) must "
219221828Sgrehan				      "be greater than NBPDR (%d)\n",
220221828Sgrehan				      remaining, NBPDR);
221221828Sgrehan			}
222221828Sgrehan			if (pdp[pdpi] == 0) {
223221828Sgrehan				/*
224221828Sgrehan				 * XXX we lose this memory forever because
225221828Sgrehan				 * we do not keep track of the virtual address
226221828Sgrehan				 * that would be required to free this page.
227221828Sgrehan				 */
228221828Sgrehan				pd = malloc(PAGE_SIZE, M_VMM_MEM,
229221828Sgrehan					    M_WAITOK | M_ZERO);
230221828Sgrehan				if ((uintptr_t)pd & PAGE_MASK) {
231221828Sgrehan					panic("vmm_mem_populate: page directory"
232221828Sgrehan					      "page not aligned on %d "
233221828Sgrehan					      "boundary\n", PAGE_SIZE);
234221828Sgrehan				}
235221828Sgrehan				pdp[pdpi] = vtophys(pd);
236221828Sgrehan				pdp[pdpi] |= PG_RW | PG_V | PG_U;
237221828Sgrehan				if (0 && bootverbose) {
238221828Sgrehan					printf("Creating page directory "
239221828Sgrehan					       "at pdp index %d for 0x%016lx\n",
240221828Sgrehan					       pdpi, addr);
241221828Sgrehan				}
242221828Sgrehan			}
243221828Sgrehan			pdi = (addr % NBPDP) / NBPDR;
244221828Sgrehan			pd = (pd_entry_t *)PHYS_TO_DMAP(pdp[pdpi] & ~PAGE_MASK);
245221828Sgrehan
246221828Sgrehan			/*
247221828Sgrehan			 * Create a new mapping if one doesn't already exist
248221828Sgrehan			 * or validate it if it does.
249221828Sgrehan			 */
250221828Sgrehan			if (pd[pdi] == 0) {
251221828Sgrehan				pd[pdi] = addr | page_attr_bits;
252221828Sgrehan				if (0 && bootverbose) {
253221828Sgrehan					printf("vmm_mem_populate: mapping "
254221828Sgrehan					       "0x%lx with 2MB page at "
255221828Sgrehan					       "pdpi %d, pdi %d\n",
256221828Sgrehan					       addr, pdpi, pdi);
257221828Sgrehan				}
258221828Sgrehan			} else {
259221828Sgrehan				pd_entry_t pde = pd[pdi];
260221828Sgrehan				if ((pde & ~PAGE_MASK) != addr ||
261221828Sgrehan				    (pde & page_attr_bits) != page_attr_bits) {
262221828Sgrehan					panic("An invalid mapping 0x%016lx "
263221828Sgrehan					      "already exists for 0x%016lx\n",
264221828Sgrehan					      pde, addr);
265221828Sgrehan				}
266221828Sgrehan			}
267221828Sgrehan			addr += NBPDR;
268221828Sgrehan		}
269221828Sgrehan	}
270221828Sgrehan}
271221828Sgrehan
272221828Sgrehanstatic int
273221828Sgrehanvmm_mem_populate(void)
274221828Sgrehan{
275221828Sgrehan	int seg, error;
276221828Sgrehan	vm_paddr_t start, end;
277221828Sgrehan
278221828Sgrehan	/* populate the vmm_mem_avail[] array */
279221828Sgrehan	error = vmm_mem_steal_memory();
280221828Sgrehan	if (error)
281221828Sgrehan		return (error);
282221828Sgrehan
283221828Sgrehan	/*
284221828Sgrehan	 * Now map the memory that was hidden from FreeBSD in
285221828Sgrehan	 * the direct map VA space.
286221828Sgrehan	 */
287221828Sgrehan	for (seg = 0; seg < vmm_mem_nsegs; seg++) {
288221828Sgrehan		start = vmm_mem_avail[seg].base;
289221828Sgrehan		end = start + vmm_mem_avail[seg].length;
290221828Sgrehan		if ((start & PDRMASK) != 0 || (end & PDRMASK) != 0) {
291221828Sgrehan			panic("start (0x%016lx) and end (0x%016lx) must be "
292221828Sgrehan			      "aligned on a %dMB boundary\n",
293221828Sgrehan			      start, end, NBPDR / MB);
294221828Sgrehan		}
295221828Sgrehan		vmm_mem_direct_map(start, end);
296221828Sgrehan	}
297221828Sgrehan
298221828Sgrehan	return (0);
299221828Sgrehan}
300221828Sgrehan
301221828Sgrehanint
302221828Sgrehanvmm_mem_init(void)
303221828Sgrehan{
304221828Sgrehan	int error;
305221828Sgrehan
306221828Sgrehan	mtx_init(&vmm_mem_mtx, "vmm_mem_mtx", NULL, MTX_DEF);
307221828Sgrehan
308221828Sgrehan	error = vmm_mem_populate();
309221828Sgrehan	if (error)
310221828Sgrehan		return (error);
311221828Sgrehan
312221828Sgrehan	return (0);
313221828Sgrehan}
314221828Sgrehan
315221828Sgrehanvm_paddr_t
316221828Sgrehanvmm_mem_alloc(size_t size)
317221828Sgrehan{
318221828Sgrehan	int i;
319221828Sgrehan	vm_paddr_t addr;
320221828Sgrehan
321221828Sgrehan	if ((size & PDRMASK) != 0) {
322221828Sgrehan		panic("vmm_mem_alloc: size 0x%0lx must be "
323221828Sgrehan		      "aligned on a 0x%0x boundary\n", size, NBPDR);
324221828Sgrehan	}
325221828Sgrehan
326221828Sgrehan	addr = 0;
327221828Sgrehan
328221828Sgrehan	mtx_lock(&vmm_mem_mtx);
329221828Sgrehan	for (i = 0; i < vmm_mem_nsegs; i++) {
330221828Sgrehan		if (vmm_mem_avail[i].length >= size) {
331221828Sgrehan			addr = vmm_mem_avail[i].base;
332221828Sgrehan			vmm_mem_avail[i].base += size;
333221828Sgrehan			vmm_mem_avail[i].length -= size;
334221828Sgrehan			/* remove a zero length segment */
335221828Sgrehan			if (vmm_mem_avail[i].length == 0) {
336221828Sgrehan				memmove(&vmm_mem_avail[i],
337221828Sgrehan					&vmm_mem_avail[i + 1],
338221828Sgrehan					(vmm_mem_nsegs - (i + 1)) *
339221828Sgrehan					 sizeof(vmm_mem_avail[0]));
340221828Sgrehan				vmm_mem_nsegs--;
341221828Sgrehan			}
342221828Sgrehan			break;
343221828Sgrehan		}
344221828Sgrehan	}
345221828Sgrehan	mtx_unlock(&vmm_mem_mtx);
346221828Sgrehan
347221828Sgrehan	return (addr);
348221828Sgrehan}
349221828Sgrehan
350239700Sgrehansize_t
351239700Sgrehanvmm_mem_get_mem_total(void)
352239700Sgrehan{
353239700Sgrehan	return vmm_mem_total_bytes;
354239700Sgrehan}
355239700Sgrehan
356239700Sgrehansize_t
357239700Sgrehanvmm_mem_get_mem_free(void)
358239700Sgrehan{
359239700Sgrehan	size_t length = 0;
360239700Sgrehan	int i;
361239700Sgrehan
362239700Sgrehan	mtx_lock(&vmm_mem_mtx);
363239700Sgrehan	for (i = 0; i < vmm_mem_nsegs; i++) {
364239700Sgrehan		length += vmm_mem_avail[i].length;
365239700Sgrehan	}
366239700Sgrehan	mtx_unlock(&vmm_mem_mtx);
367239700Sgrehan
368239700Sgrehan	return(length);
369239700Sgrehan}
370239700Sgrehan
371221828Sgrehanvoid
372221828Sgrehanvmm_mem_free(vm_paddr_t base, size_t length)
373221828Sgrehan{
374221828Sgrehan	int i;
375221828Sgrehan
376221828Sgrehan	if ((base & PDRMASK) != 0 || (length & PDRMASK) != 0) {
377221828Sgrehan		panic("vmm_mem_free: base 0x%0lx and length 0x%0lx must be "
378221828Sgrehan		      "aligned on a 0x%0x boundary\n", base, length, NBPDR);
379221828Sgrehan	}
380221828Sgrehan
381221828Sgrehan	mtx_lock(&vmm_mem_mtx);
382221828Sgrehan
383221828Sgrehan	for (i = 0; i < vmm_mem_nsegs; i++) {
384221828Sgrehan		if (vmm_mem_avail[i].base > base)
385221828Sgrehan			break;
386221828Sgrehan	}
387221828Sgrehan
388221828Sgrehan	if (vmm_mem_nsegs >= VMM_MEM_MAXSEGS)
389221828Sgrehan		panic("vmm_mem_free: cannot free any more segments");
390221828Sgrehan
391221828Sgrehan	/* Create a new segment at index 'i' */
392221828Sgrehan	memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i],
393221828Sgrehan		(vmm_mem_nsegs - i) * sizeof(vmm_mem_avail[0]));
394221828Sgrehan
395221828Sgrehan	vmm_mem_avail[i].base = base;
396221828Sgrehan	vmm_mem_avail[i].length = length;
397221828Sgrehan
398221828Sgrehan	vmm_mem_nsegs++;
399221828Sgrehan
400221828Sgrehancoalesce_some_more:
401221828Sgrehan	for (i = 0; i < vmm_mem_nsegs - 1; i++) {
402221828Sgrehan		if (vmm_mem_avail[i].base + vmm_mem_avail[i].length ==
403221828Sgrehan		    vmm_mem_avail[i + 1].base) {
404221828Sgrehan			vmm_mem_avail[i].length += vmm_mem_avail[i + 1].length;
405221828Sgrehan			memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i + 2],
406221828Sgrehan			  (vmm_mem_nsegs - (i + 2)) * sizeof(vmm_mem_avail[0]));
407221828Sgrehan			vmm_mem_nsegs--;
408221828Sgrehan			goto coalesce_some_more;
409221828Sgrehan		}
410221828Sgrehan	}
411221828Sgrehan
412221828Sgrehan	mtx_unlock(&vmm_mem_mtx);
413221828Sgrehan}
414221828Sgrehan
415221828Sgrehanvm_paddr_t
416221828Sgrehanvmm_mem_maxaddr(void)
417221828Sgrehan{
418221828Sgrehan
419221828Sgrehan	return (maxaddr);
420221828Sgrehan}
421221828Sgrehan
422221828Sgrehanvoid
423221828Sgrehanvmm_mem_dump(void)
424221828Sgrehan{
425221828Sgrehan	int i;
426221828Sgrehan	vm_paddr_t base;
427221828Sgrehan	vm_size_t length;
428221828Sgrehan
429221828Sgrehan	mtx_lock(&vmm_mem_mtx);
430221828Sgrehan	for (i = 0; i < vmm_mem_nsegs; i++) {
431221828Sgrehan		base = vmm_mem_avail[i].base;
432221828Sgrehan		length = vmm_mem_avail[i].length;
433221828Sgrehan		printf("%-4d0x%016lx    0x%016lx\n", i, base, base + length);
434221828Sgrehan	}
435221828Sgrehan	mtx_unlock(&vmm_mem_mtx);
436221828Sgrehan}
437