minidump_machdep.c revision 220021
1130803Smarcel/*- 2130803Smarcel * Copyright (c) 2006 Peter Wemm 3130803Smarcel * All rights reserved. 4130803Smarcel * 5130803Smarcel * Redistribution and use in source and binary forms, with or without 6130803Smarcel * modification, are permitted provided that the following conditions 7130803Smarcel * are met: 8130803Smarcel * 9130803Smarcel * 1. Redistributions of source code must retain the above copyright 10130803Smarcel * notice, this list of conditions and the following disclaimer. 11130803Smarcel * 2. Redistributions in binary form must reproduce the above copyright 12130803Smarcel * notice, this list of conditions and the following disclaimer in the 13130803Smarcel * documentation and/or other materials provided with the distribution. 14130803Smarcel * 15130803Smarcel * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16130803Smarcel * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17130803Smarcel * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18130803Smarcel * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19130803Smarcel * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20130803Smarcel * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21130803Smarcel * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22130803Smarcel * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23130803Smarcel * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24130803Smarcel * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25130803Smarcel */ 26130803Smarcel 27130803Smarcel#include <sys/cdefs.h> 28130803Smarcel__FBSDID("$FreeBSD: head/sys/amd64/amd64/minidump_machdep.c 220021 2011-03-26 06:21:05Z alc $"); 29130803Smarcel 30130803Smarcel#include <sys/param.h> 31130803Smarcel#include <sys/systm.h> 32130803Smarcel#include <sys/conf.h> 33130803Smarcel#include <sys/cons.h> 34130803Smarcel#include <sys/kernel.h> 35130803Smarcel#include <sys/kerneldump.h> 36130803Smarcel#include <sys/msgbuf.h> 37#include <vm/vm.h> 38#include <vm/pmap.h> 39#include <machine/atomic.h> 40#include <machine/elf.h> 41#include <machine/md_var.h> 42#include <machine/vmparam.h> 43#include <machine/minidump.h> 44 45CTASSERT(sizeof(struct kerneldumpheader) == 512); 46 47/* 48 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This 49 * is to protect us from metadata and to protect metadata from us. 50 */ 51#define SIZEOF_METADATA (64*1024) 52 53#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) 54#define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1)) 55 56uint64_t *vm_page_dump; 57int vm_page_dump_size; 58 59static struct kerneldumpheader kdh; 60static off_t dumplo; 61 62/* Handle chunked writes. */ 63static size_t fragsz; 64static void *dump_va; 65static size_t counter, progress; 66 67CTASSERT(sizeof(*vm_page_dump) == 8); 68 69static int 70is_dumpable(vm_paddr_t pa) 71{ 72 int i; 73 74 for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { 75 if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) 76 return (1); 77 } 78 return (0); 79} 80 81#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) 82 83static int 84blk_flush(struct dumperinfo *di) 85{ 86 int error; 87 88 if (fragsz == 0) 89 return (0); 90 91 error = dump_write(di, dump_va, 0, dumplo, fragsz); 92 dumplo += fragsz; 93 fragsz = 0; 94 return (error); 95} 96 97static int 98blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) 99{ 100 size_t len; 101 int error, i, c; 102 u_int maxdumpsz; 103 104 maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); 105 if (maxdumpsz == 0) /* seatbelt */ 106 maxdumpsz = PAGE_SIZE; 107 error = 0; 108 if ((sz % PAGE_SIZE) != 0) { 109 printf("size not page aligned\n"); 110 return (EINVAL); 111 } 112 if (ptr != NULL && pa != 0) { 113 printf("cant have both va and pa!\n"); 114 return (EINVAL); 115 } 116 if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) { 117 printf("address not page aligned\n"); 118 return (EINVAL); 119 } 120 if (ptr != NULL) { 121 /* If we're doing a virtual dump, flush any pre-existing pa pages */ 122 error = blk_flush(di); 123 if (error) 124 return (error); 125 } 126 while (sz) { 127 len = maxdumpsz - fragsz; 128 if (len > sz) 129 len = sz; 130 counter += len; 131 progress -= len; 132 if (counter >> 24) { 133 printf(" %ld", PG2MB(progress >> PAGE_SHIFT)); 134 counter &= (1<<24) - 1; 135 } 136 if (ptr) { 137 error = dump_write(di, ptr, 0, dumplo, len); 138 if (error) 139 return (error); 140 dumplo += len; 141 ptr += len; 142 sz -= len; 143 } else { 144 for (i = 0; i < len; i += PAGE_SIZE) 145 dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); 146 fragsz += len; 147 pa += len; 148 sz -= len; 149 if (fragsz == maxdumpsz) { 150 error = blk_flush(di); 151 if (error) 152 return (error); 153 } 154 } 155 156 /* Check for user abort. */ 157 c = cncheckc(); 158 if (c == 0x03) 159 return (ECANCELED); 160 if (c != -1) 161 printf(" (CTRL-C to abort) "); 162 } 163 164 return (0); 165} 166 167/* A fake page table page, to avoid having to handle both 4K and 2M pages */ 168static pd_entry_t fakepd[NPDEPG]; 169 170void 171minidumpsys(struct dumperinfo *di) 172{ 173 uint64_t dumpsize; 174 uint32_t pmapsize; 175 vm_offset_t va; 176 int error; 177 uint64_t bits; 178 uint64_t *pdp, *pd, *pt, pa; 179 int i, j, k, n, bit; 180 int retry_count; 181 struct minidumphdr mdhdr; 182 183 retry_count = 0; 184 retry: 185 retry_count++; 186 counter = 0; 187 /* Walk page table pages, set bits in vm_page_dump */ 188 pmapsize = 0; 189 pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys); 190 for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR, 191 kernel_vm_end); ) { 192 /* 193 * We always write a page, even if it is zero. Each 194 * page written corresponds to 1GB of space 195 */ 196 pmapsize += PAGE_SIZE; 197 i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); 198 if ((pdp[i] & PG_V) == 0) { 199 va += NBPDP; 200 continue; 201 } 202 203 /* 204 * 1GB page is represented as 512 2MB pages in a dump. 205 */ 206 if ((pdp[i] & PG_PS) != 0) { 207 va += NBPDP; 208 pa = pdp[i] & PG_PS_FRAME; 209 for (n = 0; n < NPDEPG * NPTEPG; n++) { 210 if (is_dumpable(pa)) 211 dump_add_page(pa); 212 pa += PAGE_SIZE; 213 } 214 continue; 215 } 216 217 pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); 218 for (n = 0; n < NPDEPG; n++, va += NBPDR) { 219 j = (va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1); 220 221 if ((pd[j] & PG_V) == 0) 222 continue; 223 224 if ((pd[j] & PG_PS) != 0) { 225 /* This is an entire 2M page. */ 226 pa = pd[j] & PG_PS_FRAME; 227 for (k = 0; k < NPTEPG; k++) { 228 if (is_dumpable(pa)) 229 dump_add_page(pa); 230 pa += PAGE_SIZE; 231 } 232 continue; 233 } 234 235 pa = pd[j] & PG_FRAME; 236 /* set bit for this PTE page */ 237 if (is_dumpable(pa)) 238 dump_add_page(pa); 239 /* and for each valid page in this 2MB block */ 240 pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); 241 for (k = 0; k < NPTEPG; k++) { 242 if ((pt[k] & PG_V) == 0) 243 continue; 244 pa = pt[k] & PG_FRAME; 245 if (is_dumpable(pa)) 246 dump_add_page(pa); 247 } 248 } 249 } 250 251 /* Calculate dump size. */ 252 dumpsize = pmapsize; 253 dumpsize += round_page(msgbufp->msg_size); 254 dumpsize += round_page(vm_page_dump_size); 255 for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 256 bits = vm_page_dump[i]; 257 while (bits) { 258 bit = bsfq(bits); 259 pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; 260 /* Clear out undumpable pages now if needed */ 261 if (is_dumpable(pa)) { 262 dumpsize += PAGE_SIZE; 263 } else { 264 dump_drop_page(pa); 265 } 266 bits &= ~(1ul << bit); 267 } 268 } 269 dumpsize += PAGE_SIZE; 270 271 /* Determine dump offset on device. */ 272 if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { 273 error = E2BIG; 274 goto fail; 275 } 276 dumplo = di->mediaoffset + di->mediasize - dumpsize; 277 dumplo -= sizeof(kdh) * 2; 278 progress = dumpsize; 279 280 /* Initialize mdhdr */ 281 bzero(&mdhdr, sizeof(mdhdr)); 282 strcpy(mdhdr.magic, MINIDUMP_MAGIC); 283 mdhdr.version = MINIDUMP_VERSION; 284 mdhdr.msgbufsize = msgbufp->msg_size; 285 mdhdr.bitmapsize = vm_page_dump_size; 286 mdhdr.pmapsize = pmapsize; 287 mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS; 288 mdhdr.dmapbase = DMAP_MIN_ADDRESS; 289 mdhdr.dmapend = DMAP_MAX_ADDRESS; 290 291 mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize); 292 293 printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576); 294 printf("Dumping %llu MB:", (long long)dumpsize >> 20); 295 296 /* Dump leader */ 297 error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 298 if (error) 299 goto fail; 300 dumplo += sizeof(kdh); 301 302 /* Dump my header */ 303 bzero(&fakepd, sizeof(fakepd)); 304 bcopy(&mdhdr, &fakepd, sizeof(mdhdr)); 305 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 306 if (error) 307 goto fail; 308 309 /* Dump msgbuf up front */ 310 error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); 311 if (error) 312 goto fail; 313 314 /* Dump bitmap */ 315 error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); 316 if (error) 317 goto fail; 318 319 /* Dump kernel page directory pages */ 320 bzero(fakepd, sizeof(fakepd)); 321 pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys); 322 for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR, 323 kernel_vm_end); va += NBPDP) { 324 i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); 325 326 /* We always write a page, even if it is zero */ 327 if ((pdp[i] & PG_V) == 0) { 328 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 329 if (error) 330 goto fail; 331 /* flush, in case we reuse fakepd in the same block */ 332 error = blk_flush(di); 333 if (error) 334 goto fail; 335 continue; 336 } 337 338 /* 1GB page is represented as 512 2MB pages in a dump */ 339 if ((pdp[i] & PG_PS) != 0) { 340 /* PDPE and PDP have identical layout in this case */ 341 fakepd[0] = pdp[i]; 342 for (j = 1; j < NPDEPG; j++) 343 fakepd[j] = fakepd[j - 1] + NBPDR; 344 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 345 if (error) 346 goto fail; 347 /* flush, in case we reuse fakepd in the same block */ 348 error = blk_flush(di); 349 if (error) 350 goto fail; 351 bzero(fakepd, sizeof(fakepd)); 352 continue; 353 } 354 355 pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); 356 error = blk_write(di, (char *)pd, 0, PAGE_SIZE); 357 if (error) 358 goto fail; 359 error = blk_flush(di); 360 if (error) 361 goto fail; 362 } 363 364 /* Dump memory chunks */ 365 /* XXX cluster it up and use blk_dump() */ 366 for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 367 bits = vm_page_dump[i]; 368 while (bits) { 369 bit = bsfq(bits); 370 pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; 371 error = blk_write(di, 0, pa, PAGE_SIZE); 372 if (error) 373 goto fail; 374 bits &= ~(1ul << bit); 375 } 376 } 377 378 error = blk_flush(di); 379 if (error) 380 goto fail; 381 382 /* Dump trailer */ 383 error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 384 if (error) 385 goto fail; 386 dumplo += sizeof(kdh); 387 388 /* Signal completion, signoff and exit stage left. */ 389 dump_write(di, NULL, 0, 0, 0); 390 printf("\nDump complete\n"); 391 return; 392 393 fail: 394 if (error < 0) 395 error = -error; 396 397 printf("\n"); 398 if (error == ENOSPC) { 399 printf("Dump map grown while dumping. "); 400 if (retry_count < 5) { 401 printf("Retrying...\n"); 402 goto retry; 403 } 404 printf("Dump failed.\n"); 405 } 406 else if (error == ECANCELED) 407 printf("Dump aborted\n"); 408 else if (error == E2BIG) 409 printf("Dump failed. Partition too small.\n"); 410 else 411 printf("** DUMP FAILED (ERROR %d) **\n", error); 412} 413 414void 415dump_add_page(vm_paddr_t pa) 416{ 417 int idx, bit; 418 419 pa >>= PAGE_SHIFT; 420 idx = pa >> 6; /* 2^6 = 64 */ 421 bit = pa & 63; 422 atomic_set_long(&vm_page_dump[idx], 1ul << bit); 423} 424 425void 426dump_drop_page(vm_paddr_t pa) 427{ 428 int idx, bit; 429 430 pa >>= PAGE_SHIFT; 431 idx = pa >> 6; /* 2^6 = 64 */ 432 bit = pa & 63; 433 atomic_clear_long(&vm_page_dump[idx], 1ul << bit); 434} 435