minidump_machdep.c revision 193066
1256905Sray/*- 2256905Sray * Copyright (c) 2006 Peter Wemm 3256905Sray * All rights reserved. 4256905Sray * 5256905Sray * Redistribution and use in source and binary forms, with or without 6256905Sray * modification, are permitted provided that the following conditions 7256905Sray * are met: 8256905Sray * 9256905Sray * 1. Redistributions of source code must retain the above copyright 10256905Sray * notice, this list of conditions and the following disclaimer. 11256905Sray * 2. Redistributions in binary form must reproduce the above copyright 12256905Sray * notice, this list of conditions and the following disclaimer in the 13256905Sray * documentation and/or other materials provided with the distribution. 14256905Sray * 15256905Sray * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16256905Sray * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17256905Sray * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18256905Sray * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19256905Sray * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20256905Sray * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21256905Sray * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22256905Sray * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23256905Sray * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24256905Sray * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25256905Sray */ 26256905Sray 27256905Sray#include <sys/cdefs.h> 28256905Sray__FBSDID("$FreeBSD: head/sys/amd64/amd64/minidump_machdep.c 193066 2009-05-29 21:27:12Z jamie $"); 29256905Sray 30256905Sray#include <sys/param.h> 31256905Sray#include <sys/systm.h> 32256905Sray#include <sys/conf.h> 33256905Sray#include <sys/cons.h> 34256905Sray#include <sys/kernel.h> 35256905Sray#include <sys/kerneldump.h> 36256905Sray#include <sys/msgbuf.h> 37256905Sray#include <vm/vm.h> 38256905Sray#include <vm/pmap.h> 39256905Sray#include <machine/atomic.h> 40256905Sray#include <machine/elf.h> 41257438Sray#include <machine/md_var.h> 42256905Sray#include <machine/vmparam.h> 43256905Sray#include <machine/minidump.h> 44256905Sray 45257438SrayCTASSERT(sizeof(struct kerneldumpheader) == 512); 46256905Sray 47256905Sray/* 48257438Sray * Don't touch the first SIZEOF_METADATA bytes on the dump device. This 49257438Sray * is to protect us from metadata and to protect metadata from us. 50257438Sray */ 51257727Sray#define SIZEOF_METADATA (64*1024) 52256905Sray 53256905Sray#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) 54279752Shselasky#define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1)) 55279752Shselasky 56279752Shselaskyextern uint64_t KPDPphys; 57257438Sray 58257438Srayuint64_t *vm_page_dump; 59256905Srayint vm_page_dump_size; 60256905Sray 61256905Sraystatic struct kerneldumpheader kdh; 62256905Sraystatic off_t dumplo; 63256905Sray 64256905Sray/* Handle chunked writes. */ 65256905Sraystatic size_t fragsz; 66256905Sraystatic void *dump_va; 67257438Sraystatic size_t counter, progress; 68257438Sray 69257438SrayCTASSERT(sizeof(*vm_page_dump) == 8); 70257438Sray 71257438Sraystatic int 72256905Srayis_dumpable(vm_paddr_t pa) 73256905Sray{ 74256905Sray int i; 75256905Sray 76256905Sray for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { 77256905Sray if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) 78256905Sray return (1); 79256905Sray } 80256905Sray return (0); 81256905Sray} 82256905Sray 83256905Sray#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) 84256905Sray 85256905Sraystatic int 86256905Srayblk_flush(struct dumperinfo *di) 87256905Sray{ 88256905Sray int error; 89256905Sray 90256905Sray if (fragsz == 0) 91256905Sray return (0); 92256905Sray 93256905Sray error = dump_write(di, dump_va, 0, dumplo, fragsz); 94256905Sray dumplo += fragsz; 95256905Sray fragsz = 0; 96256905Sray return (error); 97256905Sray} 98256905Sray 99256905Sraystatic int 100256905Srayblk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) 101256905Sray{ 102256905Sray size_t len; 103256905Sray int error, i, c; 104256905Sray u_int maxdumpsz; 105256905Sray 106256905Sray maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); 107256905Sray if (maxdumpsz == 0) /* seatbelt */ 108256905Sray maxdumpsz = PAGE_SIZE; 109256905Sray error = 0; 110256905Sray if ((sz % PAGE_SIZE) != 0) { 111256905Sray printf("size not page aligned\n"); 112256905Sray return (EINVAL); 113256905Sray } 114256905Sray if (ptr != NULL && pa != 0) { 115258491Sray printf("cant have both va and pa!\n"); 116258491Sray return (EINVAL); 117256905Sray } 118258491Sray if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) { 119258491Sray printf("address not page aligned\n"); 120258491Sray return (EINVAL); 121258491Sray } 122258491Sray if (ptr != NULL) { 123258491Sray /* If we're doing a virtual dump, flush any pre-existing pa pages */ 124258491Sray error = blk_flush(di); 125258491Sray if (error) 126258491Sray return (error); 127258491Sray } 128258491Sray while (sz) { 129258491Sray len = maxdumpsz - fragsz; 130258491Sray if (len > sz) 131258491Sray len = sz; 132258491Sray counter += len; 133258491Sray progress -= len; 134258491Sray if (counter >> 24) { 135258491Sray printf(" %ld", PG2MB(progress >> PAGE_SHIFT)); 136258491Sray counter &= (1<<24) - 1; 137258491Sray } 138258491Sray if (ptr) { 139258491Sray error = dump_write(di, ptr, 0, dumplo, len); 140278846Shselasky if (error) 141278846Shselasky return (error); 142258491Sray dumplo += len; 143258491Sray ptr += len; 144258491Sray sz -= len; 145258491Sray } else { 146258491Sray for (i = 0; i < len; i += PAGE_SIZE) 147258491Sray dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); 148258491Sray fragsz += len; 149256905Sray pa += len; 150256905Sray sz -= len; 151256905Sray if (fragsz == maxdumpsz) { 152256905Sray error = blk_flush(di); 153256905Sray if (error) 154256905Sray return (error); 155256905Sray } 156256905Sray } 157256905Sray 158256905Sray /* Check for user abort. */ 159256905Sray c = cncheckc(); 160256905Sray if (c == 0x03) 161256905Sray return (ECANCELED); 162256905Sray if (c != -1) 163256905Sray printf(" (CTRL-C to abort) "); 164256905Sray } 165256905Sray 166256905Sray return (0); 167256905Sray} 168256905Sray 169256905Sray/* A fake page table page, to avoid having to handle both 4K and 2M pages */ 170256905Sraystatic pt_entry_t fakept[NPTEPG]; 171256905Sray 172269620Snwhitehornvoid 173279752Shselaskyminidumpsys(struct dumperinfo *di) 174269620Snwhitehorn{ 175269620Snwhitehorn uint64_t dumpsize; 176279752Shselasky uint32_t ptesize; 177279752Shselasky vm_offset_t va; 178279752Shselasky int error; 179279752Shselasky uint64_t bits; 180279752Shselasky uint64_t *pdp, *pd, *pt, pa; 181307589Sgonzo int i, j, k, bit; 182307589Sgonzo struct minidumphdr mdhdr; 183256905Sray 184256905Sray counter = 0; 185256905Sray /* Walk page table pages, set bits in vm_page_dump */ 186256905Sray ptesize = 0; 187256905Sray pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys); 188256905Sray for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR, 189256905Sray kernel_vm_end); va += NBPDR) { 190256905Sray i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); 191256905Sray /* 192256905Sray * We always write a page, even if it is zero. Each 193256905Sray * page written corresponds to 2MB of space 194256905Sray */ 195256905Sray ptesize += PAGE_SIZE; 196256905Sray if ((pdp[i] & PG_V) == 0) 197259777Sray continue; 198256905Sray pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); 199256905Sray j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); 200256905Sray if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { 201256905Sray /* This is an entire 2M page. */ 202256905Sray pa = pd[j] & PG_PS_FRAME; 203256905Sray for (k = 0; k < NPTEPG; k++) { 204256905Sray if (is_dumpable(pa)) 205256905Sray dump_add_page(pa); 206256905Sray pa += PAGE_SIZE; 207256905Sray } 208256905Sray continue; 209256905Sray } 210256905Sray if ((pd[j] & PG_V) == PG_V) { 211256905Sray /* set bit for each valid page in this 2MB block */ 212256905Sray pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); 213256905Sray for (k = 0; k < NPTEPG; k++) { 214256905Sray if ((pt[k] & PG_V) == PG_V) { 215256905Sray pa = pt[k] & PG_FRAME; 216256905Sray if (is_dumpable(pa)) 217256905Sray dump_add_page(pa); 218256905Sray } 219256905Sray } 220256905Sray } else { 221256905Sray /* nothing, we're going to dump a null page */ 222256905Sray } 223256905Sray } 224256905Sray 225256905Sray /* Calculate dump size. */ 226256905Sray dumpsize = ptesize; 227256905Sray dumpsize += round_page(msgbufp->msg_size); 228256905Sray dumpsize += round_page(vm_page_dump_size); 229256905Sray for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 230256905Sray bits = vm_page_dump[i]; 231256905Sray while (bits) { 232256905Sray bit = bsfq(bits); 233256905Sray pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; 234256905Sray /* Clear out undumpable pages now if needed */ 235256905Sray if (is_dumpable(pa)) { 236256905Sray dumpsize += PAGE_SIZE; 237256905Sray } else { 238256905Sray dump_drop_page(pa); 239256905Sray } 240256905Sray bits &= ~(1ul << bit); 241256905Sray } 242256905Sray } 243256905Sray dumpsize += PAGE_SIZE; 244256905Sray 245256905Sray /* Determine dump offset on device. */ 246256905Sray if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { 247256905Sray error = ENOSPC; 248256905Sray goto fail; 249256905Sray } 250256905Sray dumplo = di->mediaoffset + di->mediasize - dumpsize; 251256905Sray dumplo -= sizeof(kdh) * 2; 252256905Sray progress = dumpsize; 253256905Sray 254256905Sray /* Initialize mdhdr */ 255256905Sray bzero(&mdhdr, sizeof(mdhdr)); 256256905Sray strcpy(mdhdr.magic, MINIDUMP_MAGIC); 257269620Snwhitehorn mdhdr.version = MINIDUMP_VERSION; 258269779Sdumbbell mdhdr.msgbufsize = msgbufp->msg_size; 259269779Sdumbbell mdhdr.bitmapsize = vm_page_dump_size; 260269779Sdumbbell mdhdr.ptesize = ptesize; 261269620Snwhitehorn mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS; 262256905Sray mdhdr.dmapbase = DMAP_MIN_ADDRESS; 263256905Sray mdhdr.dmapend = DMAP_MAX_ADDRESS; 264256905Sray 265256905Sray mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize); 266256905Sray 267256905Sray printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576); 268256905Sray printf("Dumping %llu MB:", (long long)dumpsize >> 20); 269256905Sray 270256905Sray /* Dump leader */ 271256905Sray error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 272256905Sray if (error) 273256905Sray goto fail; 274279488Sdumbbell dumplo += sizeof(kdh); 275279488Sdumbbell 276256905Sray /* Dump my header */ 277256905Sray bzero(&fakept, sizeof(fakept)); 278256905Sray bcopy(&mdhdr, &fakept, sizeof(mdhdr)); 279256905Sray error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 280256905Sray if (error) 281256905Sray goto fail; 282256905Sray 283256905Sray /* Dump msgbuf up front */ 284256905Sray error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); 285256905Sray if (error) 286256905Sray goto fail; 287256905Sray 288256905Sray /* Dump bitmap */ 289256905Sray error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); 290256905Sray if (error) 291256905Sray goto fail; 292256905Sray 293256905Sray /* Dump kernel page table pages */ 294256905Sray pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys); 295256905Sray for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR, 296256905Sray kernel_vm_end); va += NBPDR) { 297256905Sray i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); 298256905Sray /* We always write a page, even if it is zero */ 299256905Sray if ((pdp[i] & PG_V) == 0) { 300256905Sray bzero(fakept, sizeof(fakept)); 301256905Sray error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 302256905Sray if (error) 303256905Sray goto fail; 304256905Sray /* flush, in case we reuse fakept in the same block */ 305256905Sray error = blk_flush(di); 306256905Sray if (error) 307257438Sray goto fail; 308257438Sray continue; 309257438Sray } 310257438Sray pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); 311257438Sray j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); 312257438Sray if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { 313257438Sray /* This is a single 2M block. Generate a fake PTP */ 314257438Sray pa = pd[j] & PG_PS_FRAME; 315257438Sray for (k = 0; k < NPTEPG; k++) { 316257438Sray fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M; 317257438Sray } 318257438Sray error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 319257438Sray if (error) 320257438Sray goto fail; 321257438Sray /* flush, in case we reuse fakept in the same block */ 322257438Sray error = blk_flush(di); 323257438Sray if (error) 324257438Sray goto fail; 325257438Sray continue; 326257546Sray } 327257546Sray if ((pd[j] & PG_V) == PG_V) { 328257438Sray pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); 329257438Sray error = blk_write(di, (char *)pt, 0, PAGE_SIZE); 330257438Sray if (error) 331257438Sray goto fail; 332257438Sray } else { 333257438Sray bzero(fakept, sizeof(fakept)); 334257438Sray error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 335257438Sray if (error) 336257438Sray goto fail; 337257438Sray /* flush, in case we reuse fakept in the same block */ 338257438Sray error = blk_flush(di); 339257438Sray if (error) 340257438Sray goto fail; 341257438Sray } 342257438Sray } 343257438Sray 344257438Sray /* Dump memory chunks */ 345257438Sray /* XXX cluster it up and use blk_dump() */ 346257438Sray for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 347257438Sray bits = vm_page_dump[i]; 348257438Sray while (bits) { 349257438Sray bit = bsfq(bits); 350257438Sray pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; 351257438Sray error = blk_write(di, 0, pa, PAGE_SIZE); 352257438Sray if (error) 353257438Sray goto fail; 354257438Sray bits &= ~(1ul << bit); 355257438Sray } 356257438Sray } 357257438Sray 358257438Sray error = blk_flush(di); 359257438Sray if (error) 360257438Sray goto fail; 361257438Sray 362257438Sray /* Dump trailer */ 363257438Sray error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 364257438Sray if (error) 365257438Sray goto fail; 366257517Sray dumplo += sizeof(kdh); 367279752Shselasky 368257438Sray /* Signal completion, signoff and exit stage left. */ 369257438Sray dump_write(di, NULL, 0, 0, 0); 370 printf("\nDump complete\n"); 371 return; 372 373 fail: 374 if (error < 0) 375 error = -error; 376 377 if (error == ECANCELED) 378 printf("\nDump aborted\n"); 379 else if (error == ENOSPC) 380 printf("\nDump failed. Partition too small.\n"); 381 else 382 printf("\n** DUMP FAILED (ERROR %d) **\n", error); 383} 384 385void 386dump_add_page(vm_paddr_t pa) 387{ 388 int idx, bit; 389 390 pa >>= PAGE_SHIFT; 391 idx = pa >> 6; /* 2^6 = 64 */ 392 bit = pa & 63; 393 atomic_set_long(&vm_page_dump[idx], 1ul << bit); 394} 395 396void 397dump_drop_page(vm_paddr_t pa) 398{ 399 int idx, bit; 400 401 pa >>= PAGE_SHIFT; 402 idx = pa >> 6; /* 2^6 = 64 */ 403 bit = pa & 63; 404 atomic_clear_long(&vm_page_dump[idx], 1ul << bit); 405} 406