1/*
2 *  linux/arch/arm/mm/init.c
3 *
4 *  Copyright (C) 1995-2000 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/config.h>
11#include <linux/signal.h>
12#include <linux/sched.h>
13#include <linux/kernel.h>
14#include <linux/errno.h>
15#include <linux/string.h>
16#include <linux/types.h>
17#include <linux/ptrace.h>
18#include <linux/mman.h>
19#include <linux/mm.h>
20#include <linux/swap.h>
21#include <linux/swapctl.h>
22#include <linux/smp.h>
23#include <linux/init.h>
24#include <linux/bootmem.h>
25#include <linux/blk.h>
26
27#include <asm/segment.h>
28#include <asm/mach-types.h>
29#include <asm/pgalloc.h>
30#include <asm/dma.h>
31#include <asm/hardware.h>
32#include <asm/setup.h>
33
34#include <asm/mach/arch.h>
35#include <asm/mach/map.h>
36
37#ifndef CONFIG_DISCONTIGMEM
38#define NR_NODES	1
39#else
40#define NR_NODES	4
41#endif
42
43#ifdef CONFIG_CPU_32
44#define TABLE_OFFSET	(PTRS_PER_PTE)
45#else
46#define TABLE_OFFSET	0
47#endif
48
49#define TABLE_SIZE	((TABLE_OFFSET + PTRS_PER_PTE) * sizeof(void *))
50
51static unsigned long totalram_pages;
52extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
53extern char _stext, _text, _etext, _end, __init_begin, __init_end;
54
55/*
56 * The sole use of this is to pass memory configuration
57 * data from paging_init to mem_init.
58 */
59static struct meminfo meminfo __initdata = { 0, };
60
61/*
62 * empty_zero_page is a special page that is used for
63 * zero-initialized data and COW.
64 */
65struct page *empty_zero_page;
66
67#ifndef CONFIG_NO_PGT_CACHE
68struct pgtable_cache_struct quicklists;
69
70int do_check_pgt_cache(int low, int high)
71{
72	int freed = 0;
73
74	if(pgtable_cache_size > high) {
75		do {
76			if(pgd_quicklist) {
77				free_pgd_slow(get_pgd_fast());
78				freed++;
79			}
80			if(pmd_quicklist) {
81				pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
82				freed++;
83			}
84			if(pte_quicklist) {
85				pte_free_slow(pte_alloc_one_fast(NULL, 0));
86				freed++;
87			}
88		} while(pgtable_cache_size > low);
89	}
90	return freed;
91}
92#else
93int do_check_pgt_cache(int low, int high)
94{
95	return 0;
96}
97#endif
98
99/* This is currently broken
100 * PG_skip is used on sparc/sparc64 architectures to "skip" certain
101 * parts of the address space.
102 *
103 * #define PG_skip	10
104 * #define PageSkip(page) (machine_is_riscpc() && test_bit(PG_skip, &(page)->flags))
105 *			if (PageSkip(page)) {
106 *				page = page->next_hash;
107 *				if (page == NULL)
108 *					break;
109 *			}
110 */
111void show_mem(void)
112{
113	int free = 0, total = 0, reserved = 0;
114	int shared = 0, cached = 0, slab = 0, node;
115
116	printk("Mem-info:\n");
117	show_free_areas();
118	printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
119
120	for (node = 0; node < numnodes; node++) {
121		struct page *page, *end;
122
123		page = NODE_MEM_MAP(node);
124		end  = page + NODE_DATA(node)->node_size;
125
126		do {
127			total++;
128			if (PageReserved(page))
129				reserved++;
130			else if (PageSwapCache(page))
131				cached++;
132			else if (PageSlab(page))
133				slab++;
134			else if (!page_count(page))
135				free++;
136			else
137				shared += atomic_read(&page->count) - 1;
138			page++;
139		} while (page < end);
140	}
141
142	printk("%d pages of RAM\n", total);
143	printk("%d free pages\n", free);
144	printk("%d reserved pages\n", reserved);
145	printk("%d slab pages\n", slab);
146	printk("%d pages shared\n", shared);
147	printk("%d pages swap cached\n", cached);
148#ifndef CONFIG_NO_PGT_CACHE
149	printk("%ld page tables cached\n", pgtable_cache_size);
150#endif
151	show_buffers();
152}
153
154struct node_info {
155	unsigned int start;
156	unsigned int end;
157	int bootmap_pages;
158};
159
160#define O_PFN_DOWN(x)	((x) >> PAGE_SHIFT)
161#define V_PFN_DOWN(x)	O_PFN_DOWN(__pa(x))
162
163#define O_PFN_UP(x)	(PAGE_ALIGN(x) >> PAGE_SHIFT)
164#define V_PFN_UP(x)	O_PFN_UP(__pa(x))
165
166#define PFN_SIZE(x)	((x) >> PAGE_SHIFT)
167#define PFN_RANGE(s,e)	PFN_SIZE(PAGE_ALIGN((unsigned long)(e)) - \
168				(((unsigned long)(s)) & PAGE_MASK))
169
170static unsigned int __init
171find_bootmap_pfn(int node, struct meminfo *mi, unsigned int bootmap_pages)
172{
173	unsigned int start_pfn, bank, bootmap_pfn;
174
175	start_pfn   = V_PFN_UP(&_end);
176	bootmap_pfn = 0;
177
178	for (bank = 0; bank < mi->nr_banks; bank ++) {
179		unsigned int start, end;
180
181		if (mi->bank[bank].node != node)
182			continue;
183
184		start = O_PFN_UP(mi->bank[bank].start);
185		end   = O_PFN_DOWN(mi->bank[bank].size +
186				   mi->bank[bank].start);
187
188		if (end < start_pfn)
189			continue;
190
191		if (start < start_pfn)
192			start = start_pfn;
193
194		if (end <= start)
195			continue;
196
197		if (end - start >= bootmap_pages) {
198			bootmap_pfn = start;
199			break;
200		}
201	}
202
203	if (bootmap_pfn == 0)
204		BUG();
205
206	return bootmap_pfn;
207}
208
209/*
210 * Scan the memory info structure and pull out:
211 *  - the end of memory
212 *  - the number of nodes
213 *  - the pfn range of each node
214 *  - the number of bootmem bitmap pages
215 */
216static unsigned int __init
217find_memend_and_nodes(struct meminfo *mi, struct node_info *np)
218{
219	unsigned int i, bootmem_pages = 0, memend_pfn = 0;
220
221	for (i = 0; i < NR_NODES; i++) {
222		np[i].start = -1U;
223		np[i].end = 0;
224		np[i].bootmap_pages = 0;
225	}
226
227	for (i = 0; i < mi->nr_banks; i++) {
228		unsigned long start, end;
229		int node;
230
231		if (mi->bank[i].size == 0) {
232			/*
233			 * Mark this bank with an invalid node number
234			 */
235			mi->bank[i].node = -1;
236			continue;
237		}
238
239		node = mi->bank[i].node;
240
241		if (node >= numnodes) {
242			numnodes = node + 1;
243
244			/*
245			 * Make sure we haven't exceeded the maximum number
246			 * of nodes that we have in this configuration.  If
247			 * we have, we're in trouble.  (maybe we ought to
248			 * limit, instead of bugging?)
249			 */
250			if (numnodes > NR_NODES)
251				BUG();
252		}
253
254		/*
255		 * Get the start and end pfns for this bank
256		 */
257		start = O_PFN_UP(mi->bank[i].start);
258		end   = O_PFN_DOWN(mi->bank[i].start + mi->bank[i].size);
259
260		if (np[node].start > start)
261			np[node].start = start;
262
263		if (np[node].end < end)
264			np[node].end = end;
265
266		if (memend_pfn < end)
267			memend_pfn = end;
268	}
269
270	/*
271	 * Calculate the number of pages we require to
272	 * store the bootmem bitmaps.
273	 */
274	for (i = 0; i < numnodes; i++) {
275		if (np[i].end == 0)
276			continue;
277
278		np[i].bootmap_pages = bootmem_bootmap_pages(np[i].end -
279							    np[i].start);
280		bootmem_pages += np[i].bootmap_pages;
281	}
282
283	/*
284	 * This doesn't seem to be used by the Linux memory
285	 * manager any more.  If we can get rid of it, we
286	 * also get rid of some of the stuff above as well.
287	 */
288	max_low_pfn = memend_pfn - O_PFN_DOWN(PHYS_OFFSET);
289	mi->end = memend_pfn << PAGE_SHIFT;
290
291	return bootmem_pages;
292}
293
294static int __init check_initrd(struct meminfo *mi)
295{
296	int initrd_node = -2;
297
298#ifdef CONFIG_BLK_DEV_INITRD
299	/*
300	 * Make sure that the initrd is within a valid area of
301	 * memory.
302	 */
303	if (initrd_start) {
304		unsigned long phys_initrd_start, phys_initrd_end;
305		unsigned int i;
306
307		phys_initrd_start = __pa(initrd_start);
308		phys_initrd_end   = __pa(initrd_end);
309
310		for (i = 0; i < mi->nr_banks; i++) {
311			unsigned long bank_end;
312
313			bank_end = mi->bank[i].start + mi->bank[i].size;
314
315			if (mi->bank[i].start <= phys_initrd_start &&
316			    phys_initrd_end <= bank_end)
317				initrd_node = mi->bank[i].node;
318		}
319	}
320
321	if (initrd_node == -1) {
322		printk(KERN_ERR "initrd (0x%08lx - 0x%08lx) extends beyond "
323		       "physical memory - disabling initrd\n",
324		       initrd_start, initrd_end);
325		initrd_start = initrd_end = 0;
326	}
327#endif
328
329	return initrd_node;
330}
331
332/*
333 * Reserve the various regions of node 0
334 */
335static __init void reserve_node_zero(unsigned int bootmap_pfn, unsigned int bootmap_pages)
336{
337	pg_data_t *pgdat = NODE_DATA(0);
338
339	/*
340	 * Register the kernel text and data with bootmem.
341	 * Note that this can only be in node 0.
342	 */
343	reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext);
344
345#ifdef CONFIG_CPU_32
346	/*
347	 * Reserve the page tables.  These are already in use,
348	 * and can only be in node 0.
349	 */
350	reserve_bootmem_node(pgdat, __pa(swapper_pg_dir),
351			     PTRS_PER_PGD * sizeof(void *));
352#endif
353	/*
354	 * And don't forget to reserve the allocator bitmap,
355	 * which will be freed later.
356	 */
357	reserve_bootmem_node(pgdat, bootmap_pfn << PAGE_SHIFT,
358			     bootmap_pages << PAGE_SHIFT);
359
360	/*
361	 * Hmm... This should go elsewhere, but we really really
362	 * need to stop things allocating the low memory; we need
363	 * a better implementation of GFP_DMA which does not assume
364	 * that DMA-able memory starts at zero.
365	 */
366	if (machine_is_integrator())
367		reserve_bootmem_node(pgdat, 0, __pa(swapper_pg_dir));
368	/*
369	 * These should likewise go elsewhere.  They pre-reserve
370	 * the screen memory region at the start of main system
371	 * memory.
372	 */
373	if (machine_is_archimedes() || machine_is_a5k())
374		reserve_bootmem_node(pgdat, 0x02000000, 0x00080000);
375	if (machine_is_edb7211())
376		reserve_bootmem_node(pgdat, 0xc0000000, 0x00020000);
377	if (machine_is_p720t())
378		reserve_bootmem_node(pgdat, PHYS_OFFSET, 0x00014000);
379#ifdef CONFIG_SA1111
380	/*
381	 * Because of the SA1111 DMA bug, we want to preserve
382	 * our precious DMA-able memory...
383	 */
384	reserve_bootmem_node(pgdat, PHYS_OFFSET, __pa(swapper_pg_dir)-PHYS_OFFSET);
385#endif
386}
387
388/*
389 * Register all available RAM in this node with the bootmem allocator.
390 */
391static inline void free_bootmem_node_bank(int node, struct meminfo *mi)
392{
393	pg_data_t *pgdat = NODE_DATA(node);
394	int bank;
395
396	for (bank = 0; bank < mi->nr_banks; bank++)
397		if (mi->bank[bank].node == node)
398			free_bootmem_node(pgdat, mi->bank[bank].start,
399					  mi->bank[bank].size);
400}
401
402/*
403 * Initialise the bootmem allocator for all nodes.  This is called
404 * early during the architecture specific initialisation.
405 */
406void __init bootmem_init(struct meminfo *mi)
407{
408	struct node_info node_info[NR_NODES], *np = node_info;
409	unsigned int bootmap_pages, bootmap_pfn, map_pg;
410	int node, initrd_node;
411
412	bootmap_pages = find_memend_and_nodes(mi, np);
413	bootmap_pfn   = find_bootmap_pfn(0, mi, bootmap_pages);
414	initrd_node   = check_initrd(mi);
415
416	map_pg = bootmap_pfn;
417
418	/*
419	 * Initialise the bootmem nodes.
420	 *
421	 * What we really want to do is:
422	 *
423	 *   unmap_all_regions_except_kernel();
424	 *   for_each_node_in_reverse_order(node) {
425	 *     map_node(node);
426	 *     allocate_bootmem_map(node);
427	 *     init_bootmem_node(node);
428	 *     free_bootmem_node(node);
429	 *   }
430	 *
431	 * but this is a 2.5-type change.  For now, we just set
432	 * the nodes up in reverse order.
433	 *
434	 * (we could also do with rolling bootmem_init and paging_init
435	 * into one generic "memory_init" type function).
436	 */
437	np += numnodes - 1;
438	for (node = numnodes - 1; node >= 0; node--, np--) {
439		/*
440		 * If there are no pages in this node, ignore it.
441		 * Note that node 0 must always have some pages.
442		 */
443		if (np->end == 0) {
444			if (node == 0)
445				BUG();
446			continue;
447		}
448
449		/*
450		 * Initialise the bootmem allocator.
451		 */
452		init_bootmem_node(NODE_DATA(node), map_pg, np->start, np->end);
453		free_bootmem_node_bank(node, mi);
454		map_pg += np->bootmap_pages;
455
456		/*
457		 * If this is node 0, we need to reserve some areas ASAP -
458		 * we may use bootmem on node 0 to setup the other nodes.
459		 */
460		if (node == 0)
461			reserve_node_zero(bootmap_pfn, bootmap_pages);
462	}
463
464
465#ifdef CONFIG_BLK_DEV_INITRD
466	if (initrd_node >= 0)
467		reserve_bootmem_node(NODE_DATA(initrd_node), __pa(initrd_start),
468				     initrd_end - initrd_start);
469#endif
470
471	if (map_pg != bootmap_pfn + bootmap_pages)
472		BUG();
473
474}
475
476/*
477 * paging_init() sets up the page tables, initialises the zone memory
478 * maps, and sets up the zero page, bad page and bad page tables.
479 */
480void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
481{
482	void *zero_page;
483	int node;
484
485	memcpy(&meminfo, mi, sizeof(meminfo));
486
487	/*
488	 * allocate the zero page.  Note that we count on this going ok.
489	 */
490	zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
491
492	/*
493	 * initialise the page tables.
494	 */
495	memtable_init(mi);
496	if (mdesc->map_io)
497		mdesc->map_io();
498	flush_tlb_all();
499
500	/*
501	 * initialise the zones within each node
502	 */
503	for (node = 0; node < numnodes; node++) {
504		unsigned long zone_size[MAX_NR_ZONES];
505		unsigned long zhole_size[MAX_NR_ZONES];
506		struct bootmem_data *bdata;
507		pg_data_t *pgdat;
508		int i;
509
510		/*
511		 * Initialise the zone size information.
512		 */
513		for (i = 0; i < MAX_NR_ZONES; i++) {
514			zone_size[i]  = 0;
515			zhole_size[i] = 0;
516		}
517
518		pgdat = NODE_DATA(node);
519		bdata = pgdat->bdata;
520
521		/*
522		 * The size of this node has already been determined.
523		 * If we need to do anything fancy with the allocation
524		 * of this memory to the zones, now is the time to do
525		 * it.
526		 */
527		zone_size[0] = bdata->node_low_pfn -
528				(bdata->node_boot_start >> PAGE_SHIFT);
529
530		/*
531		 * If this zone has zero size, skip it.
532		 */
533		if (!zone_size[0])
534			continue;
535
536		/*
537		 * For each bank in this node, calculate the size of the
538		 * holes.  holes = node_size - sum(bank_sizes_in_node)
539		 */
540		zhole_size[0] = zone_size[0];
541		for (i = 0; i < mi->nr_banks; i++) {
542			if (mi->bank[i].node != node)
543				continue;
544
545			zhole_size[0] -= mi->bank[i].size >> PAGE_SHIFT;
546		}
547
548		/*
549		 * Adjust the sizes according to any special
550		 * requirements for this machine type.
551		 */
552		arch_adjust_zones(node, zone_size, zhole_size);
553
554		free_area_init_node(node, pgdat, 0, zone_size,
555				bdata->node_boot_start, zhole_size);
556	}
557
558	/*
559	 * finish off the bad pages once
560	 * the mem_map is initialised
561	 */
562	memzero(zero_page, PAGE_SIZE);
563	empty_zero_page = virt_to_page(zero_page);
564	flush_dcache_page(empty_zero_page);
565}
566
567static inline void free_area(unsigned long addr, unsigned long end, char *s)
568{
569	unsigned int size = (end - addr) >> 10;
570
571	for (; addr < end; addr += PAGE_SIZE) {
572		struct page *page = virt_to_page(addr);
573		ClearPageReserved(page);
574		set_page_count(page, 1);
575		free_page(addr);
576		totalram_pages++;
577	}
578
579	if (size && s)
580		printk("Freeing %s memory: %dK\n", s, size);
581}
582
583/*
584 * mem_init() marks the free areas in the mem_map and tells us how much
585 * memory is free.  This is done after various parts of the system have
586 * claimed their memory after the kernel image.
587 */
588void __init mem_init(void)
589{
590	unsigned int codepages, datapages, initpages;
591	int i, node;
592
593	codepages = &_etext - &_text;
594	datapages = &_end - &_etext;
595	initpages = &__init_end - &__init_begin;
596
597	high_memory = (void *)__va(meminfo.end);
598	max_mapnr   = virt_to_page(high_memory) - mem_map;
599
600	/*
601	 * We may have non-contiguous memory.
602	 */
603	if (meminfo.nr_banks != 1)
604		create_memmap_holes(&meminfo);
605
606	/* this will put all unused low memory onto the freelists */
607	for (node = 0; node < numnodes; node++) {
608		pg_data_t *pgdat = NODE_DATA(node);
609
610		if (pgdat->node_size != 0)
611			totalram_pages += free_all_bootmem_node(pgdat);
612	}
613
614#ifdef CONFIG_SA1111
615	/* now that our DMA memory is actually so designated, we can free it */
616	free_area(PAGE_OFFSET, (unsigned long)swapper_pg_dir, NULL);
617#endif
618
619	/*
620	 * Since our memory may not be contiguous, calculate the
621	 * real number of pages we have in this system
622	 */
623	printk(KERN_INFO "Memory:");
624
625	num_physpages = 0;
626	for (i = 0; i < meminfo.nr_banks; i++) {
627		num_physpages += meminfo.bank[i].size >> PAGE_SHIFT;
628		printk(" %ldMB", meminfo.bank[i].size >> 20);
629	}
630
631	printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT));
632	printk(KERN_NOTICE "Memory: %luKB available (%dK code, "
633		"%dK data, %dK init)\n",
634		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
635		codepages >> 10, datapages >> 10, initpages >> 10);
636
637	if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
638		extern int sysctl_overcommit_memory;
639		/*
640		 * On a machine this small we won't get
641		 * anywhere without overcommit, so turn
642		 * it on by default.
643		 */
644		sysctl_overcommit_memory = 1;
645	}
646}
647
648void free_initmem(void)
649{
650	if (!machine_is_integrator()) {
651		free_area((unsigned long)(&__init_begin),
652			  (unsigned long)(&__init_end),
653			  "init");
654	}
655}
656
657#ifdef CONFIG_BLK_DEV_INITRD
658
659static int keep_initrd;
660
661void free_initrd_mem(unsigned long start, unsigned long end)
662{
663	if (!keep_initrd)
664		free_area(start, end, "initrd");
665}
666
667static int __init keepinitrd_setup(char *__unused)
668{
669	keep_initrd = 1;
670	return 1;
671}
672
673__setup("keepinitrd", keepinitrd_setup);
674#endif
675
676void si_meminfo(struct sysinfo *val)
677{
678	val->totalram  = totalram_pages;
679	val->sharedram = 0;
680	val->freeram   = nr_free_pages();
681	val->bufferram = atomic_read(&buffermem_pages);
682	val->totalhigh = 0;
683	val->freehigh  = 0;
684	val->mem_unit  = PAGE_SIZE;
685}
686