1157908Speter/*- 2157908Speter * Copyright (c) 2006 Peter Wemm 3157908Speter * All rights reserved. 4157908Speter * 5157908Speter * Redistribution and use in source and binary forms, with or without 6157908Speter * modification, are permitted provided that the following conditions 7157908Speter * are met: 8157908Speter * 9157908Speter * 1. Redistributions of source code must retain the above copyright 10157908Speter * notice, this list of conditions and the following disclaimer. 11157908Speter * 2. Redistributions in binary form must reproduce the above copyright 12157908Speter * notice, this list of conditions and the following disclaimer in the 13157908Speter * documentation and/or other materials provided with the distribution. 14157908Speter * 15157908Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16157908Speter * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17157908Speter * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18157908Speter * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19157908Speter * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20157908Speter * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21157908Speter * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22157908Speter * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23157908Speter * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24157908Speter * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25157908Speter */ 26157908Speter 27157908Speter#include <sys/cdefs.h> 28157908Speter__FBSDID("$FreeBSD$"); 29157908Speter 30225194Sjhb#include "opt_pmap.h" 31221173Sattilio#include "opt_watchdog.h" 32221173Sattilio 33157908Speter#include <sys/param.h> 34157908Speter#include <sys/systm.h> 35157908Speter#include <sys/conf.h> 36157908Speter#include <sys/cons.h> 37157908Speter#include <sys/kernel.h> 38157908Speter#include <sys/kerneldump.h> 39157908Speter#include <sys/msgbuf.h> 40221173Sattilio#include <sys/watchdog.h> 41157908Speter#include <vm/vm.h> 42254065Skib#include <vm/vm_param.h> 43230623Skmacy#include <vm/vm_page.h> 44243132Skib#include <vm/vm_phys.h> 45157908Speter#include <vm/pmap.h> 46157908Speter#include <machine/atomic.h> 47157908Speter#include <machine/elf.h> 48157908Speter#include <machine/md_var.h> 49157908Speter#include <machine/vmparam.h> 50157908Speter#include <machine/minidump.h> 51157908Speter 52157908SpeterCTASSERT(sizeof(struct kerneldumpheader) == 512); 53157908Speter 54157908Speter/* 55157908Speter * Don't touch the first SIZEOF_METADATA bytes on the dump device. This 56157908Speter * is to protect us from metadata and to protect metadata from us. 57157908Speter */ 58157908Speter#define SIZEOF_METADATA (64*1024) 59157908Speter 60157908Speter#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) 61157908Speter#define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1)) 62157908Speter 63157908Speteruint64_t *vm_page_dump; 64157908Speterint vm_page_dump_size; 65157908Speter 66157908Speterstatic struct kerneldumpheader kdh; 67157908Speterstatic off_t dumplo; 68157908Speter 69157908Speter/* Handle chunked writes. */ 70157908Speterstatic size_t fragsz; 71157908Speterstatic void *dump_va; 72221069Ssobomaxstatic size_t counter, progress, dumpsize; 73157908Speter 74157908SpeterCTASSERT(sizeof(*vm_page_dump) == 8); 75157908Speter 76157908Speterstatic int 77157908Speteris_dumpable(vm_paddr_t pa) 78157908Speter{ 79230623Skmacy vm_page_t m; 80157908Speter int i; 81157908Speter 82230623Skmacy if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL) 83230623Skmacy return ((m->flags & PG_NODUMP) == 0); 84157908Speter for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { 85157908Speter if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) 86157908Speter return (1); 87157908Speter } 88157908Speter return (0); 89157908Speter} 90157908Speter 91157908Speter#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) 92157908Speter 93157908Speterstatic int 94157908Speterblk_flush(struct dumperinfo *di) 95157908Speter{ 96157908Speter int error; 97157908Speter 98157908Speter if (fragsz == 0) 99157908Speter return (0); 100157908Speter 101175768Sru error = dump_write(di, dump_va, 0, dumplo, fragsz); 102157908Speter dumplo += fragsz; 103157908Speter fragsz = 0; 104157908Speter return (error); 105157908Speter} 106157908Speter 107221069Ssobomaxstatic struct { 108221069Ssobomax int min_per; 109221069Ssobomax int max_per; 110221069Ssobomax int visited; 111221069Ssobomax} progress_track[10] = { 112221069Ssobomax { 0, 10, 0}, 113221069Ssobomax { 10, 20, 0}, 114221069Ssobomax { 20, 30, 0}, 115221069Ssobomax { 30, 40, 0}, 116221069Ssobomax { 40, 50, 0}, 117221069Ssobomax { 50, 60, 0}, 118221069Ssobomax { 60, 70, 0}, 119221069Ssobomax { 70, 80, 0}, 120221069Ssobomax { 80, 90, 0}, 121221069Ssobomax { 90, 100, 0} 122221069Ssobomax}; 123221069Ssobomax 124221069Ssobomaxstatic void 125221069Ssobomaxreport_progress(size_t progress, size_t dumpsize) 126221069Ssobomax{ 127221069Ssobomax int sofar, i; 128221069Ssobomax 129221069Ssobomax sofar = 100 - ((progress * 100) / dumpsize); 130257575Skib for (i = 0; i < nitems(progress_track); i++) { 131257575Skib if (sofar < progress_track[i].min_per || 132257575Skib sofar > progress_track[i].max_per) 133221069Ssobomax continue; 134221069Ssobomax if (progress_track[i].visited) 135221069Ssobomax return; 136221069Ssobomax progress_track[i].visited = 1; 137221069Ssobomax printf("..%d%%", sofar); 138221069Ssobomax return; 139221069Ssobomax } 140221069Ssobomax} 141221069Ssobomax 142157908Speterstatic int 143157908Speterblk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) 144157908Speter{ 145157908Speter size_t len; 146157908Speter int error, i, c; 147176304Sscottl u_int maxdumpsz; 148157908Speter 149184499Skib maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); 150176304Sscottl if (maxdumpsz == 0) /* seatbelt */ 151176304Sscottl maxdumpsz = PAGE_SIZE; 152157908Speter error = 0; 153157908Speter if ((sz % PAGE_SIZE) != 0) { 154157908Speter printf("size not page aligned\n"); 155157908Speter return (EINVAL); 156157908Speter } 157157908Speter if (ptr != NULL && pa != 0) { 158157908Speter printf("cant have both va and pa!\n"); 159157908Speter return (EINVAL); 160157908Speter } 161257575Skib if ((((uintptr_t)pa) % PAGE_SIZE) != 0) { 162257575Skib printf("address not page aligned %p\n", ptr); 163157908Speter return (EINVAL); 164157908Speter } 165157908Speter if (ptr != NULL) { 166157908Speter /* If we're doing a virtual dump, flush any pre-existing pa pages */ 167157908Speter error = blk_flush(di); 168157908Speter if (error) 169157908Speter return (error); 170157908Speter } 171157908Speter while (sz) { 172176304Sscottl len = maxdumpsz - fragsz; 173157908Speter if (len > sz) 174157908Speter len = sz; 175157908Speter counter += len; 176157908Speter progress -= len; 177157908Speter if (counter >> 24) { 178221069Ssobomax report_progress(progress, dumpsize); 179157908Speter counter &= (1<<24) - 1; 180157908Speter } 181236503Savg 182221173Sattilio wdog_kern_pat(WD_LASTVAL); 183236503Savg 184157908Speter if (ptr) { 185175768Sru error = dump_write(di, ptr, 0, dumplo, len); 186157908Speter if (error) 187157908Speter return (error); 188157908Speter dumplo += len; 189157908Speter ptr += len; 190157908Speter sz -= len; 191157908Speter } else { 192157908Speter for (i = 0; i < len; i += PAGE_SIZE) 193157908Speter dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); 194157908Speter fragsz += len; 195157908Speter pa += len; 196157908Speter sz -= len; 197176304Sscottl if (fragsz == maxdumpsz) { 198157908Speter error = blk_flush(di); 199157908Speter if (error) 200157908Speter return (error); 201157908Speter } 202157908Speter } 203157908Speter 204157908Speter /* Check for user abort. */ 205157908Speter c = cncheckc(); 206157908Speter if (c == 0x03) 207157908Speter return (ECANCELED); 208157908Speter if (c != -1) 209157908Speter printf(" (CTRL-C to abort) "); 210157908Speter } 211157908Speter 212157908Speter return (0); 213157908Speter} 214157908Speter 215157908Speter/* A fake page table page, to avoid having to handle both 4K and 2M pages */ 216215133Savgstatic pd_entry_t fakepd[NPDEPG]; 217157908Speter 218157908Spetervoid 219157908Speterminidumpsys(struct dumperinfo *di) 220157908Speter{ 221215133Savg uint32_t pmapsize; 222157908Speter vm_offset_t va; 223157908Speter int error; 224157908Speter uint64_t bits; 225254547Sneel uint64_t *pml4, *pdp, *pd, *pt, pa; 226254547Sneel int i, ii, j, k, n, bit; 227215133Savg int retry_count; 228157908Speter struct minidumphdr mdhdr; 229157908Speter 230215133Savg retry_count = 0; 231215133Savg retry: 232215133Savg retry_count++; 233157908Speter counter = 0; 234257575Skib for (i = 0; i < nitems(progress_track); i++) 235257575Skib progress_track[i].visited = 0; 236157908Speter /* Walk page table pages, set bits in vm_page_dump */ 237215133Savg pmapsize = 0; 238246384Sneel for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR, 239215133Savg kernel_vm_end); ) { 240157908Speter /* 241157908Speter * We always write a page, even if it is zero. Each 242215133Savg * page written corresponds to 1GB of space 243157908Speter */ 244215133Savg pmapsize += PAGE_SIZE; 245254547Sneel ii = (va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1); 246254547Sneel pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; 247254547Sneel pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 248215133Savg i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); 249215133Savg if ((pdp[i] & PG_V) == 0) { 250215133Savg va += NBPDP; 251157908Speter continue; 252215133Savg } 253215133Savg 254215133Savg /* 255215133Savg * 1GB page is represented as 512 2MB pages in a dump. 256215133Savg */ 257215133Savg if ((pdp[i] & PG_PS) != 0) { 258215133Savg va += NBPDP; 259215133Savg pa = pdp[i] & PG_PS_FRAME; 260215133Savg for (n = 0; n < NPDEPG * NPTEPG; n++) { 261157908Speter if (is_dumpable(pa)) 262157908Speter dump_add_page(pa); 263157908Speter pa += PAGE_SIZE; 264157908Speter } 265157908Speter continue; 266157908Speter } 267215133Savg 268215133Savg pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); 269215133Savg for (n = 0; n < NPDEPG; n++, va += NBPDR) { 270215133Savg j = (va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1); 271215133Savg 272215133Savg if ((pd[j] & PG_V) == 0) 273215133Savg continue; 274215133Savg 275215133Savg if ((pd[j] & PG_PS) != 0) { 276215133Savg /* This is an entire 2M page. */ 277215133Savg pa = pd[j] & PG_PS_FRAME; 278215133Savg for (k = 0; k < NPTEPG; k++) { 279157908Speter if (is_dumpable(pa)) 280157908Speter dump_add_page(pa); 281215133Savg pa += PAGE_SIZE; 282157908Speter } 283215133Savg continue; 284157908Speter } 285215133Savg 286215133Savg pa = pd[j] & PG_FRAME; 287215133Savg /* set bit for this PTE page */ 288215133Savg if (is_dumpable(pa)) 289215133Savg dump_add_page(pa); 290215133Savg /* and for each valid page in this 2MB block */ 291215133Savg pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); 292215133Savg for (k = 0; k < NPTEPG; k++) { 293215133Savg if ((pt[k] & PG_V) == 0) 294215133Savg continue; 295215133Savg pa = pt[k] & PG_FRAME; 296215133Savg if (is_dumpable(pa)) 297215133Savg dump_add_page(pa); 298215133Savg } 299157908Speter } 300157908Speter } 301157908Speter 302157908Speter /* Calculate dump size. */ 303215133Savg dumpsize = pmapsize; 304157908Speter dumpsize += round_page(msgbufp->msg_size); 305157908Speter dumpsize += round_page(vm_page_dump_size); 306157908Speter for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 307157908Speter bits = vm_page_dump[i]; 308157908Speter while (bits) { 309157908Speter bit = bsfq(bits); 310157908Speter pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; 311157908Speter /* Clear out undumpable pages now if needed */ 312157908Speter if (is_dumpable(pa)) { 313157908Speter dumpsize += PAGE_SIZE; 314157908Speter } else { 315157908Speter dump_drop_page(pa); 316157908Speter } 317157908Speter bits &= ~(1ul << bit); 318157908Speter } 319157908Speter } 320157908Speter dumpsize += PAGE_SIZE; 321157908Speter 322157908Speter /* Determine dump offset on device. */ 323157908Speter if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { 324215133Savg error = E2BIG; 325157908Speter goto fail; 326157908Speter } 327157908Speter dumplo = di->mediaoffset + di->mediasize - dumpsize; 328157908Speter dumplo -= sizeof(kdh) * 2; 329157908Speter progress = dumpsize; 330157908Speter 331157908Speter /* Initialize mdhdr */ 332157908Speter bzero(&mdhdr, sizeof(mdhdr)); 333157908Speter strcpy(mdhdr.magic, MINIDUMP_MAGIC); 334157908Speter mdhdr.version = MINIDUMP_VERSION; 335157908Speter mdhdr.msgbufsize = msgbufp->msg_size; 336157908Speter mdhdr.bitmapsize = vm_page_dump_size; 337215133Savg mdhdr.pmapsize = pmapsize; 338179898Salc mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS; 339157908Speter mdhdr.dmapbase = DMAP_MIN_ADDRESS; 340157908Speter mdhdr.dmapend = DMAP_MAX_ADDRESS; 341157908Speter 342183527Speter mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize); 343157908Speter 344221069Ssobomax printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, 345221069Ssobomax ptoa((uintmax_t)physmem) / 1048576); 346157908Speter 347157908Speter /* Dump leader */ 348175768Sru error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 349157908Speter if (error) 350157908Speter goto fail; 351157908Speter dumplo += sizeof(kdh); 352157908Speter 353157908Speter /* Dump my header */ 354215133Savg bzero(&fakepd, sizeof(fakepd)); 355215133Savg bcopy(&mdhdr, &fakepd, sizeof(mdhdr)); 356215133Savg error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 357157908Speter if (error) 358157908Speter goto fail; 359157908Speter 360157908Speter /* Dump msgbuf up front */ 361157908Speter error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); 362157908Speter if (error) 363157908Speter goto fail; 364157908Speter 365157908Speter /* Dump bitmap */ 366157908Speter error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); 367157908Speter if (error) 368157908Speter goto fail; 369157908Speter 370215133Savg /* Dump kernel page directory pages */ 371215133Savg bzero(fakepd, sizeof(fakepd)); 372246384Sneel for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR, 373215133Savg kernel_vm_end); va += NBPDP) { 374254547Sneel ii = (va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1); 375254547Sneel pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; 376254547Sneel pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 377157908Speter i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); 378215133Savg 379157908Speter /* We always write a page, even if it is zero */ 380157908Speter if ((pdp[i] & PG_V) == 0) { 381215133Savg error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 382157908Speter if (error) 383157908Speter goto fail; 384215133Savg /* flush, in case we reuse fakepd in the same block */ 385157908Speter error = blk_flush(di); 386157908Speter if (error) 387157908Speter goto fail; 388157908Speter continue; 389157908Speter } 390215133Savg 391215133Savg /* 1GB page is represented as 512 2MB pages in a dump */ 392215133Savg if ((pdp[i] & PG_PS) != 0) { 393215133Savg /* PDPE and PDP have identical layout in this case */ 394215133Savg fakepd[0] = pdp[i]; 395215133Savg for (j = 1; j < NPDEPG; j++) 396215133Savg fakepd[j] = fakepd[j - 1] + NBPDR; 397215133Savg error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 398157908Speter if (error) 399157908Speter goto fail; 400215133Savg /* flush, in case we reuse fakepd in the same block */ 401157908Speter error = blk_flush(di); 402157908Speter if (error) 403157908Speter goto fail; 404215133Savg bzero(fakepd, sizeof(fakepd)); 405157908Speter continue; 406157908Speter } 407215133Savg 408215133Savg pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); 409215133Savg error = blk_write(di, (char *)pd, 0, PAGE_SIZE); 410215133Savg if (error) 411215133Savg goto fail; 412215133Savg error = blk_flush(di); 413215133Savg if (error) 414215133Savg goto fail; 415157908Speter } 416157908Speter 417157908Speter /* Dump memory chunks */ 418157908Speter /* XXX cluster it up and use blk_dump() */ 419157908Speter for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 420157908Speter bits = vm_page_dump[i]; 421157908Speter while (bits) { 422157908Speter bit = bsfq(bits); 423157908Speter pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; 424157908Speter error = blk_write(di, 0, pa, PAGE_SIZE); 425157908Speter if (error) 426157908Speter goto fail; 427157908Speter bits &= ~(1ul << bit); 428157908Speter } 429157908Speter } 430157908Speter 431157908Speter error = blk_flush(di); 432157908Speter if (error) 433157908Speter goto fail; 434157908Speter 435157908Speter /* Dump trailer */ 436175768Sru error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 437157908Speter if (error) 438157908Speter goto fail; 439157908Speter dumplo += sizeof(kdh); 440157908Speter 441157908Speter /* Signal completion, signoff and exit stage left. */ 442175768Sru dump_write(di, NULL, 0, 0, 0); 443157908Speter printf("\nDump complete\n"); 444157908Speter return; 445157908Speter 446157908Speter fail: 447157908Speter if (error < 0) 448157908Speter error = -error; 449157908Speter 450215133Savg printf("\n"); 451215133Savg if (error == ENOSPC) { 452215133Savg printf("Dump map grown while dumping. "); 453215133Savg if (retry_count < 5) { 454215133Savg printf("Retrying...\n"); 455215133Savg goto retry; 456215133Savg } 457215133Savg printf("Dump failed.\n"); 458215133Savg } 459215133Savg else if (error == ECANCELED) 460215133Savg printf("Dump aborted\n"); 461215133Savg else if (error == E2BIG) 462215133Savg printf("Dump failed. Partition too small.\n"); 463157908Speter else 464215133Savg printf("** DUMP FAILED (ERROR %d) **\n", error); 465157908Speter} 466157908Speter 467157908Spetervoid 468157908Speterdump_add_page(vm_paddr_t pa) 469157908Speter{ 470157908Speter int idx, bit; 471157908Speter 472157908Speter pa >>= PAGE_SHIFT; 473157908Speter idx = pa >> 6; /* 2^6 = 64 */ 474157908Speter bit = pa & 63; 475157908Speter atomic_set_long(&vm_page_dump[idx], 1ul << bit); 476157908Speter} 477157908Speter 478157908Spetervoid 479157908Speterdump_drop_page(vm_paddr_t pa) 480157908Speter{ 481157908Speter int idx, bit; 482157908Speter 483157908Speter pa >>= PAGE_SHIFT; 484157908Speter idx = pa >> 6; /* 2^6 = 64 */ 485157908Speter bit = pa & 63; 486157908Speter atomic_clear_long(&vm_page_dump[idx], 1ul << bit); 487157908Speter} 488