1/*
2 * Handle the memory map.
3 * The functions here do the job until bootmem takes over.
4 *
5 *  Getting sanitize_e820_map() in sync with i386 version by applying change:
6 *  -  Provisions for empty E820 memory regions (reported by certain BIOSes).
7 *     Alex Achenbach <xela@slit.de>, December 2002.
8 *  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
9 *
10 */
11#include <linux/kernel.h>
12#include <linux/types.h>
13#include <linux/init.h>
14#include <linux/bootmem.h>
15#include <linux/ioport.h>
16#include <linux/string.h>
17#include <linux/kexec.h>
18#include <linux/module.h>
19#include <linux/mm.h>
20#include <linux/suspend.h>
21#include <linux/pfn.h>
22
23#include <asm/pgtable.h>
24#include <asm/page.h>
25#include <asm/e820.h>
26#include <asm/proto.h>
27#include <asm/bootsetup.h>
28#include <asm/sections.h>
29
30struct e820map e820;
31
32/*
33 * PFN of last memory page.
34 */
35unsigned long end_pfn;
36EXPORT_SYMBOL(end_pfn);
37
38/*
39 * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
40 * The direct mapping extends to end_pfn_map, so that we can directly access
41 * apertures, ACPI and other tables without having to play with fixmaps.
42 */
43unsigned long end_pfn_map;
44
45/*
46 * Last pfn which the user wants to use.
47 */
48static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
49
50extern struct resource code_resource, data_resource;
51
52/* Check for some hardcoded bad areas that early boot is not allowed to touch */
53static inline int bad_addr(unsigned long *addrp, unsigned long size)
54{
55	unsigned long addr = *addrp, last = addr + size;
56
57	/* various gunk below that needed for SMP startup */
58	if (addr < 0x8000) {
59		*addrp = PAGE_ALIGN(0x8000);
60		return 1;
61	}
62
63	/* direct mapping tables of the kernel */
64	if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
65		*addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
66		return 1;
67	}
68
69	/* initrd */
70#ifdef CONFIG_BLK_DEV_INITRD
71	if (LOADER_TYPE && INITRD_START && last >= INITRD_START &&
72	    addr < INITRD_START+INITRD_SIZE) {
73		*addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE);
74		return 1;
75	}
76#endif
77	/* kernel code */
78	if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
79		*addrp = PAGE_ALIGN(__pa_symbol(&_end));
80		return 1;
81	}
82
83	if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
84		*addrp = PAGE_ALIGN(ebda_addr + ebda_size);
85		return 1;
86	}
87
88#ifdef CONFIG_NUMA
89	/* NUMA memory to node map */
90	if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) {
91		*addrp = nodemap_addr + nodemap_size;
92		return 1;
93	}
94#endif
95	return 0;
96}
97
98/*
99 * This function checks if any part of the range <start,end> is mapped
100 * with type.
101 */
102int
103e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
104{
105	int i;
106	for (i = 0; i < e820.nr_map; i++) {
107		struct e820entry *ei = &e820.map[i];
108		if (type && ei->type != type)
109			continue;
110		if (ei->addr >= end || ei->addr + ei->size <= start)
111			continue;
112		return 1;
113	}
114	return 0;
115}
116EXPORT_SYMBOL_GPL(e820_any_mapped);
117
118/*
119 * This function checks if the entire range <start,end> is mapped with type.
120 *
121 * Note: this function only works correct if the e820 table is sorted and
122 * not-overlapping, which is the case
123 */
124int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type)
125{
126	int i;
127	for (i = 0; i < e820.nr_map; i++) {
128		struct e820entry *ei = &e820.map[i];
129		if (type && ei->type != type)
130			continue;
131		/* is the region (part) in overlap with the current region ?*/
132		if (ei->addr >= end || ei->addr + ei->size <= start)
133			continue;
134
135		/* if the region is at the beginning of <start,end> we move
136		 * start to the end of the region since it's ok until there
137		 */
138		if (ei->addr <= start)
139			start = ei->addr + ei->size;
140		/* if start is now at or beyond end, we're done, full coverage */
141		if (start >= end)
142			return 1; /* we're done */
143	}
144	return 0;
145}
146
147/*
148 * Find a free area in a specific range.
149 */
150unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsigned size)
151{
152	int i;
153	for (i = 0; i < e820.nr_map; i++) {
154		struct e820entry *ei = &e820.map[i];
155		unsigned long addr = ei->addr, last;
156		if (ei->type != E820_RAM)
157			continue;
158		if (addr < start)
159			addr = start;
160		if (addr > ei->addr + ei->size)
161			continue;
162		while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
163			;
164		last = PAGE_ALIGN(addr) + size;
165		if (last > ei->addr + ei->size)
166			continue;
167		if (last > end)
168			continue;
169		return addr;
170	}
171	return -1UL;
172}
173
174/*
175 * Find the highest page frame number we have available
176 */
177unsigned long __init e820_end_of_ram(void)
178{
179	unsigned long end_pfn = 0;
180	end_pfn = find_max_pfn_with_active_regions();
181
182	if (end_pfn > end_pfn_map)
183		end_pfn_map = end_pfn;
184	if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
185		end_pfn_map = MAXMEM>>PAGE_SHIFT;
186	if (end_pfn > end_user_pfn)
187		end_pfn = end_user_pfn;
188	if (end_pfn > end_pfn_map)
189		end_pfn = end_pfn_map;
190
191	printk("end_pfn_map = %lu\n", end_pfn_map);
192	return end_pfn;
193}
194
195/*
196 * Find the hole size in the range.
197 */
198unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
199{
200	unsigned long ram = 0;
201	int i;
202
203	for (i = 0; i < e820.nr_map; i++) {
204		struct e820entry *ei = &e820.map[i];
205		unsigned long last, addr;
206
207		if (ei->type != E820_RAM ||
208		    ei->addr+ei->size <= start ||
209		    ei->addr >= end)
210			continue;
211
212		addr = round_up(ei->addr, PAGE_SIZE);
213		if (addr < start)
214			addr = start;
215
216		last = round_down(ei->addr + ei->size, PAGE_SIZE);
217		if (last >= end)
218			last = end;
219
220		if (last > addr)
221			ram += last - addr;
222	}
223	return ((end - start) - ram);
224}
225
226/*
227 * Mark e820 reserved areas as busy for the resource manager.
228 */
229void __init e820_reserve_resources(void)
230{
231	int i;
232	for (i = 0; i < e820.nr_map; i++) {
233		struct resource *res;
234		res = alloc_bootmem_low(sizeof(struct resource));
235		switch (e820.map[i].type) {
236		case E820_RAM:	res->name = "System RAM"; break;
237		case E820_ACPI:	res->name = "ACPI Tables"; break;
238		case E820_NVS:	res->name = "ACPI Non-volatile Storage"; break;
239		default:	res->name = "reserved";
240		}
241		res->start = e820.map[i].addr;
242		res->end = res->start + e820.map[i].size - 1;
243		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
244		request_resource(&iomem_resource, res);
245		if (e820.map[i].type == E820_RAM) {
246			/*
247			 *  We don't know which RAM region contains kernel data,
248			 *  so we try it repeatedly and let the resource manager
249			 *  test it.
250			 */
251			request_resource(res, &code_resource);
252			request_resource(res, &data_resource);
253#ifdef CONFIG_KEXEC
254			request_resource(res, &crashk_res);
255#endif
256		}
257	}
258}
259
260/*
261 * Find the ranges of physical addresses that do not correspond to
262 * e820 RAM areas and mark the corresponding pages as nosave for software
263 * suspend and suspend to RAM.
264 *
265 * This function requires the e820 map to be sorted and without any
266 * overlapping entries and assumes the first e820 area to be RAM.
267 */
268void __init e820_mark_nosave_regions(void)
269{
270	int i;
271	unsigned long paddr;
272
273	paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
274	for (i = 1; i < e820.nr_map; i++) {
275		struct e820entry *ei = &e820.map[i];
276
277		if (paddr < ei->addr)
278			register_nosave_region(PFN_DOWN(paddr),
279						PFN_UP(ei->addr));
280
281		paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
282		if (ei->type != E820_RAM)
283			register_nosave_region(PFN_UP(ei->addr),
284						PFN_DOWN(paddr));
285
286		if (paddr >= (end_pfn << PAGE_SHIFT))
287			break;
288	}
289}
290
291/* Walk the e820 map and register active regions within a node */
292void __init
293e820_register_active_regions(int nid, unsigned long start_pfn,
294							unsigned long end_pfn)
295{
296	int i;
297	unsigned long ei_startpfn, ei_endpfn;
298	for (i = 0; i < e820.nr_map; i++) {
299		struct e820entry *ei = &e820.map[i];
300		ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
301		ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE)
302								>> PAGE_SHIFT;
303
304		/* Skip map entries smaller than a page */
305		if (ei_startpfn >= ei_endpfn)
306			continue;
307
308		/* Check if end_pfn_map should be updated */
309		if (ei->type != E820_RAM && ei_endpfn > end_pfn_map)
310			end_pfn_map = ei_endpfn;
311
312		/* Skip if map is outside the node */
313		if (ei->type != E820_RAM ||
314				ei_endpfn <= start_pfn ||
315				ei_startpfn >= end_pfn)
316			continue;
317
318		/* Check for overlaps */
319		if (ei_startpfn < start_pfn)
320			ei_startpfn = start_pfn;
321		if (ei_endpfn > end_pfn)
322			ei_endpfn = end_pfn;
323
324		/* Obey end_user_pfn to save on memmap */
325		if (ei_startpfn >= end_user_pfn)
326			continue;
327		if (ei_endpfn > end_user_pfn)
328			ei_endpfn = end_user_pfn;
329
330		add_active_range(nid, ei_startpfn, ei_endpfn);
331	}
332}
333
334/*
335 * Add a memory region to the kernel e820 map.
336 */
337void __init add_memory_region(unsigned long start, unsigned long size, int type)
338{
339	int x = e820.nr_map;
340
341	if (x == E820MAX) {
342		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
343		return;
344	}
345
346	e820.map[x].addr = start;
347	e820.map[x].size = size;
348	e820.map[x].type = type;
349	e820.nr_map++;
350}
351
352void __init e820_print_map(char *who)
353{
354	int i;
355
356	for (i = 0; i < e820.nr_map; i++) {
357		printk(" %s: %016Lx - %016Lx ", who,
358			(unsigned long long) e820.map[i].addr,
359			(unsigned long long) (e820.map[i].addr + e820.map[i].size));
360		switch (e820.map[i].type) {
361		case E820_RAM:	printk("(usable)\n");
362				break;
363		case E820_RESERVED:
364				printk("(reserved)\n");
365				break;
366		case E820_ACPI:
367				printk("(ACPI data)\n");
368				break;
369		case E820_NVS:
370				printk("(ACPI NVS)\n");
371				break;
372		default:	printk("type %u\n", e820.map[i].type);
373				break;
374		}
375	}
376}
377
378/*
379 * Sanitize the BIOS e820 map.
380 *
381 * Some e820 responses include overlapping entries.  The following
382 * replaces the original e820 map with a new one, removing overlaps.
383 *
384 */
385static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
386{
387	struct change_member {
388		struct e820entry *pbios; /* pointer to original bios entry */
389		unsigned long long addr; /* address for this change point */
390	};
391	static struct change_member change_point_list[2*E820MAX] __initdata;
392	static struct change_member *change_point[2*E820MAX] __initdata;
393	static struct e820entry *overlap_list[E820MAX] __initdata;
394	static struct e820entry new_bios[E820MAX] __initdata;
395	struct change_member *change_tmp;
396	unsigned long current_type, last_type;
397	unsigned long long last_addr;
398	int chgidx, still_changing;
399	int overlap_entries;
400	int new_bios_entry;
401	int old_nr, new_nr, chg_nr;
402	int i;
403
404	/*
405		Visually we're performing the following (1,2,3,4 = memory types)...
406
407		Sample memory map (w/overlaps):
408		   ____22__________________
409		   ______________________4_
410		   ____1111________________
411		   _44_____________________
412		   11111111________________
413		   ____________________33__
414		   ___________44___________
415		   __________33333_________
416		   ______________22________
417		   ___________________2222_
418		   _________111111111______
419		   _____________________11_
420		   _________________4______
421
422		Sanitized equivalent (no overlap):
423		   1_______________________
424		   _44_____________________
425		   ___1____________________
426		   ____22__________________
427		   ______11________________
428		   _________1______________
429		   __________3_____________
430		   ___________44___________
431		   _____________33_________
432		   _______________2________
433		   ________________1_______
434		   _________________4______
435		   ___________________2____
436		   ____________________33__
437		   ______________________4_
438	*/
439
440	/* if there's only one memory region, don't bother */
441	if (*pnr_map < 2)
442		return -1;
443
444	old_nr = *pnr_map;
445
446	/* bail out if we find any unreasonable addresses in bios map */
447	for (i=0; i<old_nr; i++)
448		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
449			return -1;
450
451	/* create pointers for initial change-point information (for sorting) */
452	for (i=0; i < 2*old_nr; i++)
453		change_point[i] = &change_point_list[i];
454
455	/* record all known change-points (starting and ending addresses),
456	   omitting those that are for empty memory regions */
457	chgidx = 0;
458	for (i=0; i < old_nr; i++)	{
459		if (biosmap[i].size != 0) {
460			change_point[chgidx]->addr = biosmap[i].addr;
461			change_point[chgidx++]->pbios = &biosmap[i];
462			change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
463			change_point[chgidx++]->pbios = &biosmap[i];
464		}
465	}
466	chg_nr = chgidx;
467
468	/* sort change-point list by memory addresses (low -> high) */
469	still_changing = 1;
470	while (still_changing)	{
471		still_changing = 0;
472		for (i=1; i < chg_nr; i++)  {
473			/* if <current_addr> > <last_addr>, swap */
474			/* or, if current=<start_addr> & last=<end_addr>, swap */
475			if ((change_point[i]->addr < change_point[i-1]->addr) ||
476				((change_point[i]->addr == change_point[i-1]->addr) &&
477				 (change_point[i]->addr == change_point[i]->pbios->addr) &&
478				 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
479			   )
480			{
481				change_tmp = change_point[i];
482				change_point[i] = change_point[i-1];
483				change_point[i-1] = change_tmp;
484				still_changing=1;
485			}
486		}
487	}
488
489	/* create a new bios memory map, removing overlaps */
490	overlap_entries=0;	 /* number of entries in the overlap table */
491	new_bios_entry=0;	 /* index for creating new bios map entries */
492	last_type = 0;		 /* start with undefined memory type */
493	last_addr = 0;		 /* start with 0 as last starting address */
494	/* loop through change-points, determining affect on the new bios map */
495	for (chgidx=0; chgidx < chg_nr; chgidx++)
496	{
497		/* keep track of all overlapping bios entries */
498		if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
499		{
500			/* add map entry to overlap list (> 1 entry implies an overlap) */
501			overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
502		}
503		else
504		{
505			/* remove entry from list (order independent, so swap with last) */
506			for (i=0; i<overlap_entries; i++)
507			{
508				if (overlap_list[i] == change_point[chgidx]->pbios)
509					overlap_list[i] = overlap_list[overlap_entries-1];
510			}
511			overlap_entries--;
512		}
513		/* if there are overlapping entries, decide which "type" to use */
514		/* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
515		current_type = 0;
516		for (i=0; i<overlap_entries; i++)
517			if (overlap_list[i]->type > current_type)
518				current_type = overlap_list[i]->type;
519		/* continue building up new bios map based on this information */
520		if (current_type != last_type)	{
521			if (last_type != 0)	 {
522				new_bios[new_bios_entry].size =
523					change_point[chgidx]->addr - last_addr;
524				/* move forward only if the new size was non-zero */
525				if (new_bios[new_bios_entry].size != 0)
526					if (++new_bios_entry >= E820MAX)
527						break; 	/* no more space left for new bios entries */
528			}
529			if (current_type != 0)	{
530				new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
531				new_bios[new_bios_entry].type = current_type;
532				last_addr=change_point[chgidx]->addr;
533			}
534			last_type = current_type;
535		}
536	}
537	new_nr = new_bios_entry;   /* retain count for new bios entries */
538
539	/* copy new bios mapping into original location */
540	memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
541	*pnr_map = new_nr;
542
543	return 0;
544}
545
546/*
547 * Copy the BIOS e820 map into a safe place.
548 *
549 * Sanity-check it while we're at it..
550 *
551 * If we're lucky and live on a modern system, the setup code
552 * will have given us a memory map that we can use to properly
553 * set up memory.  If we aren't, we'll fake a memory map.
554 */
555static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
556{
557	/* Only one memory region (or negative)? Ignore it */
558	if (nr_map < 2)
559		return -1;
560
561	do {
562		unsigned long start = biosmap->addr;
563		unsigned long size = biosmap->size;
564		unsigned long end = start + size;
565		unsigned long type = biosmap->type;
566
567		/* Overflow in 64 bits? Ignore the memory map. */
568		if (start > end)
569			return -1;
570
571		add_memory_region(start, size, type);
572	} while (biosmap++,--nr_map);
573	return 0;
574}
575
576void early_panic(char *msg)
577{
578	early_printk(msg);
579	panic(msg);
580}
581
582void __init setup_memory_region(void)
583{
584	/*
585	 * Try to copy the BIOS-supplied E820-map.
586	 *
587	 * Otherwise fake a memory map; one section from 0k->640k,
588	 * the next section from 1mb->appropriate_mem_k
589	 */
590	sanitize_e820_map(E820_MAP, &E820_MAP_NR);
591	if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0)
592		early_panic("Cannot find a valid memory map");
593	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
594	e820_print_map("BIOS-e820");
595}
596
597static int __init parse_memopt(char *p)
598{
599	if (!p)
600		return -EINVAL;
601	end_user_pfn = memparse(p, &p);
602	end_user_pfn >>= PAGE_SHIFT;
603	return 0;
604}
605early_param("mem", parse_memopt);
606
607static int userdef __initdata;
608
609static int __init parse_memmap_opt(char *p)
610{
611	char *oldp;
612	unsigned long long start_at, mem_size;
613
614	if (!strcmp(p, "exactmap")) {
615#ifdef CONFIG_CRASH_DUMP
616		/* If we are doing a crash dump, we
617		 * still need to know the real mem
618		 * size before original memory map is
619		 * reset.
620		 */
621		e820_register_active_regions(0, 0, -1UL);
622		saved_max_pfn = e820_end_of_ram();
623		remove_all_active_ranges();
624#endif
625		end_pfn_map = 0;
626		e820.nr_map = 0;
627		userdef = 1;
628		return 0;
629	}
630
631	oldp = p;
632	mem_size = memparse(p, &p);
633	if (p == oldp)
634		return -EINVAL;
635	if (*p == '@') {
636		start_at = memparse(p+1, &p);
637		add_memory_region(start_at, mem_size, E820_RAM);
638	} else if (*p == '#') {
639		start_at = memparse(p+1, &p);
640		add_memory_region(start_at, mem_size, E820_ACPI);
641	} else if (*p == '$') {
642		start_at = memparse(p+1, &p);
643		add_memory_region(start_at, mem_size, E820_RESERVED);
644	} else {
645		end_user_pfn = (mem_size >> PAGE_SHIFT);
646	}
647	return *p == '\0' ? 0 : -EINVAL;
648}
649early_param("memmap", parse_memmap_opt);
650
651void __init finish_e820_parsing(void)
652{
653	if (userdef) {
654		printk(KERN_INFO "user-defined physical RAM map:\n");
655		e820_print_map("user");
656	}
657}
658
659unsigned long pci_mem_start = 0xaeedbabe;
660EXPORT_SYMBOL(pci_mem_start);
661
662/*
663 * Search for the biggest gap in the low 32 bits of the e820
664 * memory space.  We pass this space to PCI to assign MMIO resources
665 * for hotplug or unconfigured devices in.
666 * Hopefully the BIOS let enough space left.
667 */
668__init void e820_setup_gap(void)
669{
670	unsigned long gapstart, gapsize, round;
671	unsigned long last;
672	int i;
673	int found = 0;
674
675	last = 0x100000000ull;
676	gapstart = 0x10000000;
677	gapsize = 0x400000;
678	i = e820.nr_map;
679	while (--i >= 0) {
680		unsigned long long start = e820.map[i].addr;
681		unsigned long long end = start + e820.map[i].size;
682
683		/*
684		 * Since "last" is at most 4GB, we know we'll
685		 * fit in 32 bits if this condition is true
686		 */
687		if (last > end) {
688			unsigned long gap = last - end;
689
690			if (gap > gapsize) {
691				gapsize = gap;
692				gapstart = end;
693				found = 1;
694			}
695		}
696		if (start < last)
697			last = start;
698	}
699
700	if (!found) {
701		gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
702		printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n"
703		       KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n");
704	}
705
706	/*
707	 * See how much we want to round up: start off with
708	 * rounding to the next 1MB area.
709	 */
710	round = 0x100000;
711	while ((gapsize >> 4) > round)
712		round += round;
713	/* Fun with two's complement */
714	pci_mem_start = (gapstart + round) & -round;
715
716	printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
717		pci_mem_start, gapstart, gapsize);
718}
719