1/* 2 * machine_kexec.c - handle transition of Linux booting another kernel 3 * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> 4 * 5 * This source code is licensed under the GNU General Public License, 6 * Version 2. See the file COPYING for more details. 7 */ 8 9#include <linux/mm.h> 10#include <linux/kexec.h> 11#include <linux/string.h> 12#include <linux/reboot.h> 13#include <asm/pgtable.h> 14#include <asm/tlbflush.h> 15#include <asm/mmu_context.h> 16#include <asm/io.h> 17 18#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) 19static u64 kexec_pgd[512] PAGE_ALIGNED; 20static u64 kexec_pud0[512] PAGE_ALIGNED; 21static u64 kexec_pmd0[512] PAGE_ALIGNED; 22static u64 kexec_pte0[512] PAGE_ALIGNED; 23static u64 kexec_pud1[512] PAGE_ALIGNED; 24static u64 kexec_pmd1[512] PAGE_ALIGNED; 25static u64 kexec_pte1[512] PAGE_ALIGNED; 26 27static void init_level2_page(pmd_t *level2p, unsigned long addr) 28{ 29 unsigned long end_addr; 30 31 addr &= PAGE_MASK; 32 end_addr = addr + PUD_SIZE; 33 while (addr < end_addr) { 34 set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); 35 addr += PMD_SIZE; 36 } 37} 38 39static int init_level3_page(struct kimage *image, pud_t *level3p, 40 unsigned long addr, unsigned long last_addr) 41{ 42 unsigned long end_addr; 43 int result; 44 45 result = 0; 46 addr &= PAGE_MASK; 47 end_addr = addr + PGDIR_SIZE; 48 while ((addr < last_addr) && (addr < end_addr)) { 49 struct page *page; 50 pmd_t *level2p; 51 52 page = kimage_alloc_control_pages(image, 0); 53 if (!page) { 54 result = -ENOMEM; 55 goto out; 56 } 57 level2p = (pmd_t *)page_address(page); 58 init_level2_page(level2p, addr); 59 set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE)); 60 addr += PUD_SIZE; 61 } 62 /* clear the unused entries */ 63 while (addr < end_addr) { 64 pud_clear(level3p++); 65 addr += PUD_SIZE; 66 } 67out: 68 return result; 69} 70 71 72static int init_level4_page(struct kimage *image, pgd_t *level4p, 73 unsigned long addr, unsigned long last_addr) 74{ 75 unsigned long end_addr; 76 int result; 77 78 result = 0; 79 addr &= PAGE_MASK; 80 end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE); 81 while ((addr < last_addr) && (addr < end_addr)) { 82 struct page *page; 83 pud_t *level3p; 84 85 page = kimage_alloc_control_pages(image, 0); 86 if (!page) { 87 result = -ENOMEM; 88 goto out; 89 } 90 level3p = (pud_t *)page_address(page); 91 result = init_level3_page(image, level3p, addr, last_addr); 92 if (result) { 93 goto out; 94 } 95 set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); 96 addr += PGDIR_SIZE; 97 } 98 /* clear the unused entries */ 99 while (addr < end_addr) { 100 pgd_clear(level4p++); 101 addr += PGDIR_SIZE; 102 } 103out: 104 return result; 105} 106 107 108static int init_pgtable(struct kimage *image, unsigned long start_pgtable) 109{ 110 pgd_t *level4p; 111 level4p = (pgd_t *)__va(start_pgtable); 112 return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT); 113} 114 115static void set_idt(void *newidt, u16 limit) 116{ 117 struct desc_ptr curidt; 118 119 /* x86-64 supports unaliged loads & stores */ 120 curidt.size = limit; 121 curidt.address = (unsigned long)newidt; 122 123 __asm__ __volatile__ ( 124 "lidtq %0\n" 125 : : "m" (curidt) 126 ); 127}; 128 129 130static void set_gdt(void *newgdt, u16 limit) 131{ 132 struct desc_ptr curgdt; 133 134 /* x86-64 supports unaligned loads & stores */ 135 curgdt.size = limit; 136 curgdt.address = (unsigned long)newgdt; 137 138 __asm__ __volatile__ ( 139 "lgdtq %0\n" 140 : : "m" (curgdt) 141 ); 142}; 143 144static void load_segments(void) 145{ 146 __asm__ __volatile__ ( 147 "\tmovl %0,%%ds\n" 148 "\tmovl %0,%%es\n" 149 "\tmovl %0,%%ss\n" 150 "\tmovl %0,%%fs\n" 151 "\tmovl %0,%%gs\n" 152 : : "a" (__KERNEL_DS) : "memory" 153 ); 154} 155 156int machine_kexec_prepare(struct kimage *image) 157{ 158 unsigned long start_pgtable; 159 int result; 160 161 /* Calculate the offsets */ 162 start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; 163 164 /* Setup the identity mapped 64bit page table */ 165 result = init_pgtable(image, start_pgtable); 166 if (result) 167 return result; 168 169 return 0; 170} 171 172void machine_kexec_cleanup(struct kimage *image) 173{ 174 return; 175} 176 177/* 178 * Do not allocate memory (or fail in any way) in machine_kexec(). 179 * We are past the point of no return, committed to rebooting now. 180 */ 181NORET_TYPE void machine_kexec(struct kimage *image) 182{ 183 unsigned long page_list[PAGES_NR]; 184 void *control_page; 185 186 /* Interrupts aren't acceptable while we reboot */ 187 local_irq_disable(); 188 189 control_page = page_address(image->control_code_page) + PAGE_SIZE; 190 memcpy(control_page, relocate_kernel, PAGE_SIZE); 191 192 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); 193 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; 194 page_list[PA_PGD] = virt_to_phys(&kexec_pgd); 195 page_list[VA_PGD] = (unsigned long)kexec_pgd; 196 page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0); 197 page_list[VA_PUD_0] = (unsigned long)kexec_pud0; 198 page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0); 199 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; 200 page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0); 201 page_list[VA_PTE_0] = (unsigned long)kexec_pte0; 202 page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1); 203 page_list[VA_PUD_1] = (unsigned long)kexec_pud1; 204 page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1); 205 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; 206 page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1); 207 page_list[VA_PTE_1] = (unsigned long)kexec_pte1; 208 209 page_list[PA_TABLE_PAGE] = 210 (unsigned long)__pa(page_address(image->control_code_page)); 211 212 /* The segment registers are funny things, they have both a 213 * visible and an invisible part. Whenever the visible part is 214 * set to a specific selector, the invisible part is loaded 215 * with from a table in memory. At no other time is the 216 * descriptor table in memory accessed. 217 * 218 * I take advantage of this here by force loading the 219 * segments, before I zap the gdt with an invalid value. 220 */ 221 load_segments(); 222 /* The gdt & idt are now invalid. 223 * If you want to load them you must set up your own idt & gdt. 224 */ 225 set_gdt(phys_to_virt(0),0); 226 set_idt(phys_to_virt(0),0); 227 228 /* now call it */ 229 relocate_kernel((unsigned long)image->head, (unsigned long)page_list, 230 image->start); 231} 232 233/* crashkernel=size@addr specifies the location to reserve for 234 * a crash kernel. By reserving this memory we guarantee 235 * that linux never set's it up as a DMA target. 236 * Useful for holding code to do something appropriate 237 * after a kernel panic. 238 */ 239static int __init setup_crashkernel(char *arg) 240{ 241 unsigned long size, base; 242 char *p; 243 if (!arg) 244 return -EINVAL; 245 size = memparse(arg, &p); 246 if (arg == p) 247 return -EINVAL; 248 if (*p == '@') { 249 base = memparse(p+1, &p); 250 crashk_res.start = base; 251 crashk_res.end = base + size - 1; 252 } 253 return 0; 254} 255early_param("crashkernel", setup_crashkernel); 256