minidump_machdep.c revision 295882
1323124Sdes/*- 265668Skris * Copyright (c) 2006 Peter Wemm 365668Skris * All rights reserved. 465668Skris * 565668Skris * Redistribution and use in source and binary forms, with or without 665668Skris * modification, are permitted provided that the following conditions 765668Skris * are met: 865668Skris * 965668Skris * 1. Redistributions of source code must retain the above copyright 1065668Skris * notice, this list of conditions and the following disclaimer. 1165668Skris * 2. Redistributions in binary form must reproduce the above copyright 1265668Skris * notice, this list of conditions and the following disclaimer in the 1365668Skris * documentation and/or other materials provided with the distribution. 1465668Skris * 15162852Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16162852Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17162852Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18162852Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19162852Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20162852Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21162852Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22162852Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23181111Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24295367Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25295367Sdes */ 26295367Sdes 2765668Skris#include <sys/cdefs.h> 2865668Skris__FBSDID("$FreeBSD: head/sys/amd64/amd64/minidump_machdep.c 295882 2016-02-22 09:08:04Z skra $"); 29295367Sdes 3076259Sgreen#include "opt_pmap.h" 3176259Sgreen#include "opt_watchdog.h" 32323124Sdes 33295367Sdes#include <sys/param.h> 34295367Sdes#include <sys/systm.h> 3576259Sgreen#include <sys/conf.h> 3676259Sgreen#include <sys/cons.h> 37295367Sdes#include <sys/kernel.h> 38215116Sdes#include <sys/kerneldump.h> 39162852Sdes#include <sys/msgbuf.h> 40162852Sdes#include <sys/watchdog.h> 4165668Skris#include <vm/vm.h> 4265668Skris#include <vm/vm_param.h> 4365668Skris#include <vm/vm_page.h> 4465668Skris#include <vm/vm_phys.h> 4565668Skris#include <vm/pmap.h> 4665668Skris#include <machine/atomic.h> 47181111Sdes#include <machine/elf.h> 48204917Sdes#include <machine/md_var.h> 4965668Skris#include <machine/minidump.h> 5065668Skris 5165668SkrisCTASSERT(sizeof(struct kerneldumpheader) == 512); 5265668Skris 5365668Skris/* 5465668Skris * Don't touch the first SIZEOF_METADATA bytes on the dump device. This 5565668Skris * is to protect us from metadata and to protect metadata from us. 56157016Sdes */ 57157016Sdes#define SIZEOF_METADATA (64*1024) 58157016Sdes 59215116Sdes#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) 60215116Sdes#define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1)) 61215116Sdes 6276259Sgreenuint64_t *vm_page_dump; 6376259Sgreenint vm_page_dump_size; 6469587Sgreen 6569587Sgreenstatic struct kerneldumpheader kdh; 6669587Sgreenstatic off_t dumplo; 6769587Sgreen 6869587Sgreen/* Handle chunked writes. */ 6969587Sgreenstatic size_t fragsz; 7069587Sgreenstatic void *dump_va; 71181111Sdesstatic size_t counter, progress, dumpsize; 72204917Sdes 7369587SgreenCTASSERT(sizeof(*vm_page_dump) == 8); 7469587Sgreen 7569587Sgreenstatic int 76255767Sdesis_dumpable(vm_paddr_t pa) 77255767Sdes{ 7869587Sgreen vm_page_t m; 79296781Sdes int i; 80296781Sdes 81296781Sdes if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL) 82296781Sdes return ((m->flags & PG_NODUMP) == 0); 83157016Sdes for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { 8476259Sgreen if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) 8569587Sgreen return (1); 8669587Sgreen } 8776259Sgreen return (0); 88296781Sdes} 89296781Sdes 90296781Sdes#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) 91296781Sdes 92296781Sdesstatic int 93296781Sdesblk_flush(struct dumperinfo *di) 94296781Sdes{ 95296781Sdes int error; 96296781Sdes 97296781Sdes if (fragsz == 0) 98296781Sdes return (0); 99296781Sdes 100296781Sdes error = dump_write(di, dump_va, 0, dumplo, fragsz); 101296781Sdes dumplo += fragsz; 102296781Sdes fragsz = 0; 103296781Sdes return (error); 104296781Sdes} 105296781Sdes 106296781Sdesstatic struct { 107296781Sdes int min_per; 108296781Sdes int max_per; 109296781Sdes int visited; 110296781Sdes} progress_track[10] = { 111296781Sdes { 0, 10, 0}, 112296781Sdes { 10, 20, 0}, 113296781Sdes { 20, 30, 0}, 114296781Sdes { 30, 40, 0}, 115296781Sdes { 40, 50, 0}, 116296781Sdes { 50, 60, 0}, 117296781Sdes { 60, 70, 0}, 11876259Sgreen { 70, 80, 0}, 11976259Sgreen { 80, 90, 0}, 12076259Sgreen { 90, 100, 0} 12165668Skris}; 12276259Sgreen 12365668Skrisstatic void 124323124Sdesreport_progress(size_t progress, size_t dumpsize) 12565668Skris{ 126296781Sdes int sofar, i; 12769587Sgreen 12869587Sgreen sofar = 100 - ((progress * 100) / dumpsize); 12969587Sgreen for (i = 0; i < nitems(progress_track); i++) { 13069587Sgreen if (sofar < progress_track[i].min_per || 13176259Sgreen sofar > progress_track[i].max_per) 13276259Sgreen continue; 13376259Sgreen if (progress_track[i].visited) 13476259Sgreen return; 135296781Sdes progress_track[i].visited = 1; 136296781Sdes printf("..%d%%", sofar); 137204917Sdes return; 138204917Sdes } 139296781Sdes} 140296781Sdes 14165668Skrisstatic int 142296781Sdesblk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) 143296781Sdes{ 144296781Sdes size_t len; 145296781Sdes int error, i, c; 14665668Skris u_int maxdumpsz; 14765668Skris 148296781Sdes maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); 149296781Sdes if (maxdumpsz == 0) /* seatbelt */ 150296781Sdes maxdumpsz = PAGE_SIZE; 15165668Skris error = 0; 15265668Skris if ((sz % PAGE_SIZE) != 0) { 153296781Sdes printf("size not page aligned\n"); 154296781Sdes return (EINVAL); 155296781Sdes } 15665668Skris if (ptr != NULL && pa != 0) { 15765668Skris printf("cant have both va and pa!\n"); 158296781Sdes return (EINVAL); 159296781Sdes } 160296781Sdes if ((((uintptr_t)pa) % PAGE_SIZE) != 0) { 16165668Skris printf("address not page aligned %p\n", ptr); 16265668Skris return (EINVAL); 163296781Sdes } 164296781Sdes if (ptr != NULL) { 165296781Sdes /* If we're doing a virtual dump, flush any pre-existing pa pages */ 166181111Sdes error = blk_flush(di); 167181111Sdes if (error) 168296781Sdes return (error); 169296781Sdes } 170296781Sdes while (sz) { 171296781Sdes len = maxdumpsz - fragsz; 172296781Sdes if (len > sz) 17365668Skris len = sz; 17476259Sgreen counter += len; 17576259Sgreen progress -= len; 176296781Sdes if (counter >> 24) { 17776259Sgreen report_progress(progress, dumpsize); 17865668Skris counter &= (1<<24) - 1; 17976259Sgreen } 18076259Sgreen 18165668Skris wdog_kern_pat(WD_LASTVAL); 18276259Sgreen 18376259Sgreen if (ptr) { 18465668Skris error = dump_write(di, ptr, 0, dumplo, len); 18565668Skris if (error) 18665668Skris return (error); 18776259Sgreen dumplo += len; 18865668Skris ptr += len; 18976259Sgreen sz -= len; 19065668Skris } else { 19176259Sgreen for (i = 0; i < len; i += PAGE_SIZE) 19298675Sdes dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); 19376259Sgreen fragsz += len; 194255767Sdes pa += len; 19576259Sgreen sz -= len; 19676259Sgreen if (fragsz == maxdumpsz) { 19765668Skris error = blk_flush(di); 198162852Sdes if (error) 199221420Sdes return (error); 20076259Sgreen } 20165668Skris } 20265668Skris 203215116Sdes /* Check for user abort. */ 204215116Sdes c = cncheckc(); 205215116Sdes if (c == 0x03) 206296781Sdes return (ECANCELED); 207215116Sdes if (c != -1) 208215116Sdes printf(" (CTRL-C to abort) "); 209215116Sdes } 210215116Sdes 211215116Sdes return (0); 212215116Sdes} 213215116Sdes 214215116Sdes/* A fake page table page, to avoid having to handle both 4K and 2M pages */ 215215116Sdesstatic pd_entry_t fakepd[NPDEPG]; 216215116Sdes 217215116Sdesint 218215116Sdesminidumpsys(struct dumperinfo *di) 219215116Sdes{ 220215116Sdes uint32_t pmapsize; 221215116Sdes vm_offset_t va; 222215116Sdes int error; 223215116Sdes uint64_t bits; 224255767Sdes uint64_t *pml4, *pdp, *pd, *pt, pa; 225215116Sdes int i, ii, j, k, n, bit; 226215116Sdes int retry_count; 227215116Sdes struct minidumphdr mdhdr; 228215116Sdes 229215116Sdes retry_count = 0; 230215116Sdes retry: 231215116Sdes retry_count++; 232215116Sdes counter = 0; 233215116Sdes for (i = 0; i < nitems(progress_track); i++) 23465668Skris progress_track[i].visited = 0; 235295367Sdes /* Walk page table pages, set bits in vm_page_dump */ 23665668Skris pmapsize = 0; 23765668Skris for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR, 23876259Sgreen kernel_vm_end); ) { 23976259Sgreen /* 24076259Sgreen * We always write a page, even if it is zero. Each 24165668Skris * page written corresponds to 1GB of space 24276259Sgreen */ 24376259Sgreen pmapsize += PAGE_SIZE; 24465668Skris ii = (va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1); 24576259Sgreen pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; 24676259Sgreen pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 24765668Skris i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); 24865668Skris if ((pdp[i] & PG_V) == 0) { 24965668Skris va += NBPDP; 25076259Sgreen continue; 25165668Skris } 25276259Sgreen 25365668Skris /* 25476259Sgreen * 1GB page is represented as 512 2MB pages in a dump. 25598675Sdes */ 25676259Sgreen if ((pdp[i] & PG_PS) != 0) { 257255767Sdes va += NBPDP; 25876259Sgreen pa = pdp[i] & PG_PS_FRAME; 25965668Skris for (n = 0; n < NPDEPG * NPTEPG; n++) { 260162852Sdes if (is_dumpable(pa)) 26176259Sgreen dump_add_page(pa); 262295367Sdes pa += PAGE_SIZE; 263295367Sdes } 264295367Sdes continue; 265295367Sdes } 266295367Sdes 267295367Sdes pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); 268295367Sdes for (n = 0; n < NPDEPG; n++, va += NBPDR) { 269295367Sdes j = (va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1); 270295367Sdes 271295367Sdes if ((pd[j] & PG_V) == 0) 272295367Sdes continue; 273295367Sdes 27465668Skris if ((pd[j] & PG_PS) != 0) { 27565668Skris /* This is an entire 2M page. */ 27665668Skris pa = pd[j] & PG_PS_FRAME; 27776259Sgreen for (k = 0; k < NPTEPG; k++) { 278323124Sdes if (is_dumpable(pa)) 279323124Sdes dump_add_page(pa); 280323124Sdes pa += PAGE_SIZE; 28176259Sgreen } 28276259Sgreen continue; 28376259Sgreen } 28465668Skris 28576259Sgreen pa = pd[j] & PG_FRAME; 28676259Sgreen /* set bit for this PTE page */ 28765668Skris if (is_dumpable(pa)) 28876259Sgreen dump_add_page(pa); 28976259Sgreen /* and for each valid page in this 2MB block */ 29065668Skris pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); 29165668Skris for (k = 0; k < NPTEPG; k++) { 29265668Skris if ((pt[k] & PG_V) == 0) 29376259Sgreen continue; 29465668Skris pa = pt[k] & PG_FRAME; 29576259Sgreen if (is_dumpable(pa)) 29665668Skris dump_add_page(pa); 29776259Sgreen } 29898675Sdes } 29976259Sgreen } 300255767Sdes 30176259Sgreen /* Calculate dump size. */ 30265668Skris dumpsize = pmapsize; 303162852Sdes dumpsize += round_page(msgbufp->msg_size); 30476259Sgreen dumpsize += round_page(vm_page_dump_size); 305181111Sdes for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 306181111Sdes bits = vm_page_dump[i]; 307181111Sdes while (bits) { 308255767Sdes bit = bsfq(bits); 309181111Sdes pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; 310181111Sdes /* Clear out undumpable pages now if needed */ 311181111Sdes if (is_dumpable(pa)) { 312181111Sdes dumpsize += PAGE_SIZE; 313181111Sdes } else { 314181111Sdes dump_drop_page(pa); 315181111Sdes } 316181111Sdes bits &= ~(1ul << bit); 317181111Sdes } 318255767Sdes } 319124208Sdes dumpsize += PAGE_SIZE; 32076259Sgreen 32176259Sgreen /* Determine dump offset on device. */ 32276259Sgreen if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { 32398675Sdes error = E2BIG; 32476259Sgreen goto fail; 32576259Sgreen } 326181111Sdes dumplo = di->mediaoffset + di->mediasize - dumpsize; 32765668Skris dumplo -= sizeof(kdh) * 2; 328181111Sdes progress = dumpsize; 329181111Sdes 33065668Skris /* Initialize mdhdr */ 33176259Sgreen bzero(&mdhdr, sizeof(mdhdr)); 33276259Sgreen strcpy(mdhdr.magic, MINIDUMP_MAGIC); 333146998Sdes mdhdr.version = MINIDUMP_VERSION; 334192595Sdes mdhdr.msgbufsize = msgbufp->msg_size; 33576259Sgreen mdhdr.bitmapsize = vm_page_dump_size; 33676259Sgreen mdhdr.pmapsize = pmapsize; 33776259Sgreen mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS; 33876259Sgreen mdhdr.dmapbase = DMAP_MIN_ADDRESS; 33976259Sgreen mdhdr.dmapend = DMAP_MAX_ADDRESS; 34076259Sgreen 34176259Sgreen mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize); 34276259Sgreen 34376259Sgreen printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, 34476259Sgreen ptoa((uintmax_t)physmem) / 1048576); 34576259Sgreen 34676259Sgreen /* Dump leader */ 34776259Sgreen error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 34876259Sgreen if (error) 34976259Sgreen goto fail; 35076259Sgreen dumplo += sizeof(kdh); 35176259Sgreen 352146998Sdes /* Dump my header */ 353146998Sdes bzero(&fakepd, sizeof(fakepd)); 354255767Sdes bcopy(&mdhdr, &fakepd, sizeof(mdhdr)); 35576259Sgreen error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 35676259Sgreen if (error) 357162852Sdes goto fail; 35876259Sgreen 359146998Sdes /* Dump msgbuf up front */ 360295367Sdes error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); 361146998Sdes if (error) 362146998Sdes goto fail; 363146998Sdes 364147001Sdes /* Dump bitmap */ 365146998Sdes error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); 36698675Sdes if (error) 367146998Sdes goto fail; 368146998Sdes 369255767Sdes /* Dump kernel page directory pages */ 37076259Sgreen bzero(fakepd, sizeof(fakepd)); 37176259Sgreen for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR, 372147001Sdes kernel_vm_end); va += NBPDP) { 373240075Sdes ii = (va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1); 374146998Sdes pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; 375146998Sdes pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 37698675Sdes i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); 37792555Sdes 378255767Sdes /* We always write a page, even if it is zero */ 37976259Sgreen if ((pdp[i] & PG_V) == 0) { 38076259Sgreen error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 381248619Sdes if (error) 38292555Sdes goto fail; 383255767Sdes /* flush, in case we reuse fakepd in the same block */ 38476259Sgreen error = blk_flush(di); 38576259Sgreen if (error) 386157016Sdes goto fail; 387157016Sdes continue; 388157016Sdes } 389157016Sdes 390157016Sdes /* 1GB page is represented as 512 2MB pages in a dump */ 391157016Sdes if ((pdp[i] & PG_PS) != 0) { 392157016Sdes /* PDPE and PDP have identical layout in this case */ 393157016Sdes fakepd[0] = pdp[i]; 394157016Sdes for (j = 1; j < NPDEPG; j++) 395157016Sdes fakepd[j] = fakepd[j - 1] + NBPDR; 396157016Sdes error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 397157016Sdes if (error) 398157016Sdes goto fail; 399157016Sdes /* flush, in case we reuse fakepd in the same block */ 400157016Sdes error = blk_flush(di); 401157016Sdes if (error) 402255767Sdes goto fail; 403157016Sdes bzero(fakepd, sizeof(fakepd)); 404157016Sdes continue; 405157016Sdes } 406162852Sdes 407157016Sdes pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); 408255767Sdes error = blk_write(di, (char *)pd, 0, PAGE_SIZE); 409157016Sdes if (error) 410157016Sdes goto fail; 411157016Sdes error = blk_flush(di); 412157016Sdes if (error) 413157016Sdes goto fail; 414157016Sdes } 415157016Sdes 416157016Sdes /* Dump memory chunks */ 417157016Sdes /* XXX cluster it up and use blk_dump() */ 418157016Sdes for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 419157016Sdes bits = vm_page_dump[i]; 420157016Sdes while (bits) { 42165668Skris bit = bsfq(bits); 42265668Skris pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; 42365668Skris error = blk_write(di, 0, pa, PAGE_SIZE); 42465668Skris if (error) 42565668Skris goto fail; 42676259Sgreen bits &= ~(1ul << bit); 42765668Skris } 42876259Sgreen } 42965668Skris 43076259Sgreen error = blk_flush(di); 43165668Skris if (error) 43276259Sgreen goto fail; 43365668Skris 43465668Skris /* Dump trailer */ 43598675Sdes error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 43665668Skris if (error) 43765668Skris goto fail; 43865668Skris dumplo += sizeof(kdh); 43965668Skris 440124208Sdes /* Signal completion, signoff and exit stage left. */ 44176259Sgreen dump_write(di, NULL, 0, 0, 0); 44298675Sdes printf("\nDump complete\n"); 44376259Sgreen return (0); 44498675Sdes 44565668Skris fail: 44665668Skris if (error < 0) 44765668Skris error = -error; 448204917Sdes 449215116Sdes printf("\n"); 450215116Sdes if (error == ENOSPC) { 451215116Sdes printf("Dump map grown while dumping. "); 452295367Sdes if (retry_count < 5) { 453215116Sdes printf("Retrying...\n"); 454215116Sdes goto retry; 455215116Sdes } 456215116Sdes printf("Dump failed.\n"); 457215116Sdes } 458215116Sdes else if (error == ECANCELED) 459215116Sdes printf("Dump aborted\n"); 460215116Sdes else if (error == E2BIG) 461204917Sdes printf("Dump failed. Partition too small.\n"); 462323124Sdes else 463215116Sdes printf("** DUMP FAILED (ERROR %d) **\n", error); 464215116Sdes return (error); 465255767Sdes} 466295367Sdes 467295367Sdesvoid 468204917Sdesdump_add_page(vm_paddr_t pa) 469295367Sdes{ 470295367Sdes int idx, bit; 471295367Sdes 472295367Sdes pa >>= PAGE_SHIFT; 473204917Sdes idx = pa >> 6; /* 2^6 = 64 */ 474295367Sdes bit = pa & 63; 475295367Sdes atomic_set_long(&vm_page_dump[idx], 1ul << bit); 476295367Sdes} 477295367Sdes 478295367Sdesvoid 479295367Sdesdump_drop_page(vm_paddr_t pa) 480295367Sdes{ 481204917Sdes int idx, bit; 482204917Sdes 483295367Sdes pa >>= PAGE_SHIFT; 484295367Sdes idx = pa >> 6; /* 2^6 = 64 */ 485215116Sdes bit = pa & 63; 486215116Sdes atomic_clear_long(&vm_page_dump[idx], 1ul << bit); 487215116Sdes} 488215116Sdes