minidump_machdep.c revision 306365
1/*- 2 * Copyright (c) 2006 Peter Wemm 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: stable/11/sys/amd64/amd64/minidump_machdep.c 306365 2016-09-27 10:26:39Z kib $"); 29 30#include "opt_pmap.h" 31#include "opt_watchdog.h" 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/conf.h> 36#include <sys/cons.h> 37#include <sys/kernel.h> 38#include <sys/kerneldump.h> 39#include <sys/msgbuf.h> 40#include <sys/watchdog.h> 41#include <vm/vm.h> 42#include <vm/vm_param.h> 43#include <vm/vm_page.h> 44#include <vm/vm_phys.h> 45#include <vm/pmap.h> 46#include <machine/atomic.h> 47#include <machine/elf.h> 48#include <machine/md_var.h> 49#include <machine/minidump.h> 50 51CTASSERT(sizeof(struct kerneldumpheader) == 512); 52 53/* 54 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This 55 * is to protect us from metadata and to protect metadata from us. 56 */ 57#define SIZEOF_METADATA (64*1024) 58 59uint64_t *vm_page_dump; 60int vm_page_dump_size; 61 62static struct kerneldumpheader kdh; 63static off_t dumplo; 64 65/* Handle chunked writes. */ 66static size_t fragsz; 67static void *dump_va; 68static size_t counter, progress, dumpsize; 69 70CTASSERT(sizeof(*vm_page_dump) == 8); 71 72static int 73is_dumpable(vm_paddr_t pa) 74{ 75 vm_page_t m; 76 int i; 77 78 if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL) 79 return ((m->flags & PG_NODUMP) == 0); 80 for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { 81 if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) 82 return (1); 83 } 84 return (0); 85} 86 87#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) 88 89static int 90blk_flush(struct dumperinfo *di) 91{ 92 int error; 93 94 if (fragsz == 0) 95 return (0); 96 97 error = dump_write(di, dump_va, 0, dumplo, fragsz); 98 dumplo += fragsz; 99 fragsz = 0; 100 return (error); 101} 102 103static struct { 104 int min_per; 105 int max_per; 106 int visited; 107} progress_track[10] = { 108 { 0, 10, 0}, 109 { 10, 20, 0}, 110 { 20, 30, 0}, 111 { 30, 40, 0}, 112 { 40, 50, 0}, 113 { 50, 60, 0}, 114 { 60, 70, 0}, 115 { 70, 80, 0}, 116 { 80, 90, 0}, 117 { 90, 100, 0} 118}; 119 120static void 121report_progress(size_t progress, size_t dumpsize) 122{ 123 int sofar, i; 124 125 sofar = 100 - ((progress * 100) / dumpsize); 126 for (i = 0; i < nitems(progress_track); i++) { 127 if (sofar < progress_track[i].min_per || 128 sofar > progress_track[i].max_per) 129 continue; 130 if (progress_track[i].visited) 131 return; 132 progress_track[i].visited = 1; 133 printf("..%d%%", sofar); 134 return; 135 } 136} 137 138static int 139blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) 140{ 141 size_t len; 142 int error, i, c; 143 u_int maxdumpsz; 144 145 maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); 146 if (maxdumpsz == 0) /* seatbelt */ 147 maxdumpsz = PAGE_SIZE; 148 error = 0; 149 if ((sz % PAGE_SIZE) != 0) { 150 printf("size not page aligned\n"); 151 return (EINVAL); 152 } 153 if (ptr != NULL && pa != 0) { 154 printf("cant have both va and pa!\n"); 155 return (EINVAL); 156 } 157 if ((((uintptr_t)pa) % PAGE_SIZE) != 0) { 158 printf("address not page aligned %p\n", ptr); 159 return (EINVAL); 160 } 161 if (ptr != NULL) { 162 /* If we're doing a virtual dump, flush any pre-existing pa pages */ 163 error = blk_flush(di); 164 if (error) 165 return (error); 166 } 167 while (sz) { 168 len = maxdumpsz - fragsz; 169 if (len > sz) 170 len = sz; 171 counter += len; 172 progress -= len; 173 if (counter >> 24) { 174 report_progress(progress, dumpsize); 175 counter &= (1<<24) - 1; 176 } 177 178 wdog_kern_pat(WD_LASTVAL); 179 180 if (ptr) { 181 error = dump_write(di, ptr, 0, dumplo, len); 182 if (error) 183 return (error); 184 dumplo += len; 185 ptr += len; 186 sz -= len; 187 } else { 188 for (i = 0; i < len; i += PAGE_SIZE) 189 dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); 190 fragsz += len; 191 pa += len; 192 sz -= len; 193 if (fragsz == maxdumpsz) { 194 error = blk_flush(di); 195 if (error) 196 return (error); 197 } 198 } 199 200 /* Check for user abort. */ 201 c = cncheckc(); 202 if (c == 0x03) 203 return (ECANCELED); 204 if (c != -1) 205 printf(" (CTRL-C to abort) "); 206 } 207 208 return (0); 209} 210 211/* A fake page table page, to avoid having to handle both 4K and 2M pages */ 212static pd_entry_t fakepd[NPDEPG]; 213 214int 215minidumpsys(struct dumperinfo *di) 216{ 217 uint32_t pmapsize; 218 vm_offset_t va; 219 int error; 220 uint64_t bits; 221 uint64_t *pml4, *pdp, *pd, *pt, pa; 222 size_t size; 223 int i, ii, j, k, n, bit; 224 int retry_count; 225 struct minidumphdr mdhdr; 226 227 retry_count = 0; 228 retry: 229 retry_count++; 230 counter = 0; 231 for (i = 0; i < nitems(progress_track); i++) 232 progress_track[i].visited = 0; 233 /* Walk page table pages, set bits in vm_page_dump */ 234 pmapsize = 0; 235 for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR, 236 kernel_vm_end); ) { 237 /* 238 * We always write a page, even if it is zero. Each 239 * page written corresponds to 1GB of space 240 */ 241 pmapsize += PAGE_SIZE; 242 ii = pmap_pml4e_index(va); 243 pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; 244 pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 245 i = pmap_pdpe_index(va); 246 if ((pdp[i] & PG_V) == 0) { 247 va += NBPDP; 248 continue; 249 } 250 251 /* 252 * 1GB page is represented as 512 2MB pages in a dump. 253 */ 254 if ((pdp[i] & PG_PS) != 0) { 255 va += NBPDP; 256 pa = pdp[i] & PG_PS_FRAME; 257 for (n = 0; n < NPDEPG * NPTEPG; n++) { 258 if (is_dumpable(pa)) 259 dump_add_page(pa); 260 pa += PAGE_SIZE; 261 } 262 continue; 263 } 264 265 pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); 266 for (n = 0; n < NPDEPG; n++, va += NBPDR) { 267 j = pmap_pde_index(va); 268 269 if ((pd[j] & PG_V) == 0) 270 continue; 271 272 if ((pd[j] & PG_PS) != 0) { 273 /* This is an entire 2M page. */ 274 pa = pd[j] & PG_PS_FRAME; 275 for (k = 0; k < NPTEPG; k++) { 276 if (is_dumpable(pa)) 277 dump_add_page(pa); 278 pa += PAGE_SIZE; 279 } 280 continue; 281 } 282 283 pa = pd[j] & PG_FRAME; 284 /* set bit for this PTE page */ 285 if (is_dumpable(pa)) 286 dump_add_page(pa); 287 /* and for each valid page in this 2MB block */ 288 pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); 289 for (k = 0; k < NPTEPG; k++) { 290 if ((pt[k] & PG_V) == 0) 291 continue; 292 pa = pt[k] & PG_FRAME; 293 if (is_dumpable(pa)) 294 dump_add_page(pa); 295 } 296 } 297 } 298 299 /* Calculate dump size. */ 300 dumpsize = pmapsize; 301 dumpsize += round_page(msgbufp->msg_size); 302 dumpsize += round_page(vm_page_dump_size); 303 for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 304 bits = vm_page_dump[i]; 305 while (bits) { 306 bit = bsfq(bits); 307 pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; 308 /* Clear out undumpable pages now if needed */ 309 if (is_dumpable(pa)) { 310 dumpsize += PAGE_SIZE; 311 } else { 312 dump_drop_page(pa); 313 } 314 bits &= ~(1ul << bit); 315 } 316 } 317 dumpsize += PAGE_SIZE; 318 319 /* Determine dump offset on device. */ 320 if (di->mediasize < SIZEOF_METADATA + dumpsize + di->blocksize * 2) { 321 error = E2BIG; 322 goto fail; 323 } 324 dumplo = di->mediaoffset + di->mediasize - dumpsize; 325 dumplo -= di->blocksize * 2; 326 progress = dumpsize; 327 328 /* Initialize mdhdr */ 329 bzero(&mdhdr, sizeof(mdhdr)); 330 strcpy(mdhdr.magic, MINIDUMP_MAGIC); 331 mdhdr.version = MINIDUMP_VERSION; 332 mdhdr.msgbufsize = msgbufp->msg_size; 333 mdhdr.bitmapsize = vm_page_dump_size; 334 mdhdr.pmapsize = pmapsize; 335 mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS; 336 mdhdr.dmapbase = DMAP_MIN_ADDRESS; 337 mdhdr.dmapend = DMAP_MAX_ADDRESS; 338 339 mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize); 340 341 printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, 342 ptoa((uintmax_t)physmem) / 1048576); 343 344 /* Dump leader */ 345 error = dump_write_pad(di, &kdh, 0, dumplo, sizeof(kdh), &size); 346 if (error) 347 goto fail; 348 dumplo += size; 349 350 /* Dump my header */ 351 bzero(&fakepd, sizeof(fakepd)); 352 bcopy(&mdhdr, &fakepd, sizeof(mdhdr)); 353 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 354 if (error) 355 goto fail; 356 357 /* Dump msgbuf up front */ 358 error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); 359 if (error) 360 goto fail; 361 362 /* Dump bitmap */ 363 error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); 364 if (error) 365 goto fail; 366 367 /* Dump kernel page directory pages */ 368 bzero(fakepd, sizeof(fakepd)); 369 for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR, 370 kernel_vm_end); va += NBPDP) { 371 ii = pmap_pml4e_index(va); 372 pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; 373 pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 374 i = pmap_pdpe_index(va); 375 376 /* We always write a page, even if it is zero */ 377 if ((pdp[i] & PG_V) == 0) { 378 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 379 if (error) 380 goto fail; 381 /* flush, in case we reuse fakepd in the same block */ 382 error = blk_flush(di); 383 if (error) 384 goto fail; 385 continue; 386 } 387 388 /* 1GB page is represented as 512 2MB pages in a dump */ 389 if ((pdp[i] & PG_PS) != 0) { 390 /* PDPE and PDP have identical layout in this case */ 391 fakepd[0] = pdp[i]; 392 for (j = 1; j < NPDEPG; j++) 393 fakepd[j] = fakepd[j - 1] + NBPDR; 394 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 395 if (error) 396 goto fail; 397 /* flush, in case we reuse fakepd in the same block */ 398 error = blk_flush(di); 399 if (error) 400 goto fail; 401 bzero(fakepd, sizeof(fakepd)); 402 continue; 403 } 404 405 pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); 406 error = blk_write(di, (char *)pd, 0, PAGE_SIZE); 407 if (error) 408 goto fail; 409 error = blk_flush(di); 410 if (error) 411 goto fail; 412 } 413 414 /* Dump memory chunks */ 415 /* XXX cluster it up and use blk_dump() */ 416 for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 417 bits = vm_page_dump[i]; 418 while (bits) { 419 bit = bsfq(bits); 420 pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; 421 error = blk_write(di, 0, pa, PAGE_SIZE); 422 if (error) 423 goto fail; 424 bits &= ~(1ul << bit); 425 } 426 } 427 428 error = blk_flush(di); 429 if (error) 430 goto fail; 431 432 /* Dump trailer */ 433 error = dump_write_pad(di, &kdh, 0, dumplo, sizeof(kdh), &size); 434 if (error) 435 goto fail; 436 dumplo += size; 437 438 /* Signal completion, signoff and exit stage left. */ 439 dump_write(di, NULL, 0, 0, 0); 440 printf("\nDump complete\n"); 441 return (0); 442 443 fail: 444 if (error < 0) 445 error = -error; 446 447 printf("\n"); 448 if (error == ENOSPC) { 449 printf("Dump map grown while dumping. "); 450 if (retry_count < 5) { 451 printf("Retrying...\n"); 452 goto retry; 453 } 454 printf("Dump failed.\n"); 455 } 456 else if (error == ECANCELED) 457 printf("Dump aborted\n"); 458 else if (error == E2BIG) 459 printf("Dump failed. Partition too small.\n"); 460 else 461 printf("** DUMP FAILED (ERROR %d) **\n", error); 462 return (error); 463} 464 465void 466dump_add_page(vm_paddr_t pa) 467{ 468 int idx, bit; 469 470 pa >>= PAGE_SHIFT; 471 idx = pa >> 6; /* 2^6 = 64 */ 472 bit = pa & 63; 473 atomic_set_long(&vm_page_dump[idx], 1ul << bit); 474} 475 476void 477dump_drop_page(vm_paddr_t pa) 478{ 479 int idx, bit; 480 481 pa >>= PAGE_SHIFT; 482 idx = pa >> 6; /* 2^6 = 64 */ 483 bit = pa & 63; 484 atomic_clear_long(&vm_page_dump[idx], 1ul << bit); 485} 486