minidump_machdep.c revision 225194
1/*- 2 * Copyright (c) 2006 Peter Wemm 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/amd64/amd64/minidump_machdep.c 225194 2011-08-26 17:08:22Z jhb $"); 29 30#include "opt_pmap.h" 31#include "opt_watchdog.h" 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/conf.h> 36#include <sys/cons.h> 37#include <sys/kernel.h> 38#include <sys/kerneldump.h> 39#include <sys/msgbuf.h> 40#ifdef SW_WATCHDOG 41#include <sys/watchdog.h> 42#endif 43#include <vm/vm.h> 44#include <vm/pmap.h> 45#include <machine/atomic.h> 46#include <machine/elf.h> 47#include <machine/md_var.h> 48#include <machine/vmparam.h> 49#include <machine/minidump.h> 50 51CTASSERT(sizeof(struct kerneldumpheader) == 512); 52 53/* 54 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This 55 * is to protect us from metadata and to protect metadata from us. 56 */ 57#define SIZEOF_METADATA (64*1024) 58 59#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) 60#define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1)) 61 62uint64_t *vm_page_dump; 63int vm_page_dump_size; 64 65static struct kerneldumpheader kdh; 66static off_t dumplo; 67 68/* Handle chunked writes. */ 69static size_t fragsz; 70static void *dump_va; 71static size_t counter, progress, dumpsize; 72 73CTASSERT(sizeof(*vm_page_dump) == 8); 74 75static int 76is_dumpable(vm_paddr_t pa) 77{ 78 int i; 79 80 for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { 81 if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) 82 return (1); 83 } 84 return (0); 85} 86 87#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) 88 89static int 90blk_flush(struct dumperinfo *di) 91{ 92 int error; 93 94 if (fragsz == 0) 95 return (0); 96 97 error = dump_write(di, dump_va, 0, dumplo, fragsz); 98 dumplo += fragsz; 99 fragsz = 0; 100 return (error); 101} 102 103static struct { 104 int min_per; 105 int max_per; 106 int visited; 107} progress_track[10] = { 108 { 0, 10, 0}, 109 { 10, 20, 0}, 110 { 20, 30, 0}, 111 { 30, 40, 0}, 112 { 40, 50, 0}, 113 { 50, 60, 0}, 114 { 60, 70, 0}, 115 { 70, 80, 0}, 116 { 80, 90, 0}, 117 { 90, 100, 0} 118}; 119 120static void 121report_progress(size_t progress, size_t dumpsize) 122{ 123 int sofar, i; 124 125 sofar = 100 - ((progress * 100) / dumpsize); 126 for (i = 0; i < 10; i++) { 127 if (sofar < progress_track[i].min_per || sofar > progress_track[i].max_per) 128 continue; 129 if (progress_track[i].visited) 130 return; 131 progress_track[i].visited = 1; 132 printf("..%d%%", sofar); 133 return; 134 } 135} 136 137static int 138blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) 139{ 140 size_t len; 141 int error, i, c; 142 u_int maxdumpsz; 143 144 maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); 145 if (maxdumpsz == 0) /* seatbelt */ 146 maxdumpsz = PAGE_SIZE; 147 error = 0; 148 if ((sz % PAGE_SIZE) != 0) { 149 printf("size not page aligned\n"); 150 return (EINVAL); 151 } 152 if (ptr != NULL && pa != 0) { 153 printf("cant have both va and pa!\n"); 154 return (EINVAL); 155 } 156 if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) { 157 printf("address not page aligned\n"); 158 return (EINVAL); 159 } 160 if (ptr != NULL) { 161 /* If we're doing a virtual dump, flush any pre-existing pa pages */ 162 error = blk_flush(di); 163 if (error) 164 return (error); 165 } 166 while (sz) { 167 len = maxdumpsz - fragsz; 168 if (len > sz) 169 len = sz; 170 counter += len; 171 progress -= len; 172 if (counter >> 24) { 173 report_progress(progress, dumpsize); 174 counter &= (1<<24) - 1; 175 } 176#ifdef SW_WATCHDOG 177 wdog_kern_pat(WD_LASTVAL); 178#endif 179 if (ptr) { 180 error = dump_write(di, ptr, 0, dumplo, len); 181 if (error) 182 return (error); 183 dumplo += len; 184 ptr += len; 185 sz -= len; 186 } else { 187 for (i = 0; i < len; i += PAGE_SIZE) 188 dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); 189 fragsz += len; 190 pa += len; 191 sz -= len; 192 if (fragsz == maxdumpsz) { 193 error = blk_flush(di); 194 if (error) 195 return (error); 196 } 197 } 198 199 /* Check for user abort. */ 200 c = cncheckc(); 201 if (c == 0x03) 202 return (ECANCELED); 203 if (c != -1) 204 printf(" (CTRL-C to abort) "); 205 } 206 207 return (0); 208} 209 210/* A fake page table page, to avoid having to handle both 4K and 2M pages */ 211static pd_entry_t fakepd[NPDEPG]; 212 213void 214minidumpsys(struct dumperinfo *di) 215{ 216 uint32_t pmapsize; 217 vm_offset_t va; 218 int error; 219 uint64_t bits; 220 uint64_t *pdp, *pd, *pt, pa; 221 int i, j, k, n, bit; 222 int retry_count; 223 struct minidumphdr mdhdr; 224 225 retry_count = 0; 226 retry: 227 retry_count++; 228 counter = 0; 229 /* Walk page table pages, set bits in vm_page_dump */ 230 pmapsize = 0; 231 pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys); 232 for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR, 233 kernel_vm_end); ) { 234 /* 235 * We always write a page, even if it is zero. Each 236 * page written corresponds to 1GB of space 237 */ 238 pmapsize += PAGE_SIZE; 239 i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); 240 if ((pdp[i] & PG_V) == 0) { 241 va += NBPDP; 242 continue; 243 } 244 245 /* 246 * 1GB page is represented as 512 2MB pages in a dump. 247 */ 248 if ((pdp[i] & PG_PS) != 0) { 249 va += NBPDP; 250 pa = pdp[i] & PG_PS_FRAME; 251 for (n = 0; n < NPDEPG * NPTEPG; n++) { 252 if (is_dumpable(pa)) 253 dump_add_page(pa); 254 pa += PAGE_SIZE; 255 } 256 continue; 257 } 258 259 pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); 260 for (n = 0; n < NPDEPG; n++, va += NBPDR) { 261 j = (va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1); 262 263 if ((pd[j] & PG_V) == 0) 264 continue; 265 266 if ((pd[j] & PG_PS) != 0) { 267 /* This is an entire 2M page. */ 268 pa = pd[j] & PG_PS_FRAME; 269 for (k = 0; k < NPTEPG; k++) { 270 if (is_dumpable(pa)) 271 dump_add_page(pa); 272 pa += PAGE_SIZE; 273 } 274 continue; 275 } 276 277 pa = pd[j] & PG_FRAME; 278 /* set bit for this PTE page */ 279 if (is_dumpable(pa)) 280 dump_add_page(pa); 281 /* and for each valid page in this 2MB block */ 282 pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); 283 for (k = 0; k < NPTEPG; k++) { 284 if ((pt[k] & PG_V) == 0) 285 continue; 286 pa = pt[k] & PG_FRAME; 287 if (is_dumpable(pa)) 288 dump_add_page(pa); 289 } 290 } 291 } 292 293 /* Calculate dump size. */ 294 dumpsize = pmapsize; 295 dumpsize += round_page(msgbufp->msg_size); 296 dumpsize += round_page(vm_page_dump_size); 297 for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 298 bits = vm_page_dump[i]; 299 while (bits) { 300 bit = bsfq(bits); 301 pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; 302 /* Clear out undumpable pages now if needed */ 303 if (is_dumpable(pa)) { 304 dumpsize += PAGE_SIZE; 305 } else { 306 dump_drop_page(pa); 307 } 308 bits &= ~(1ul << bit); 309 } 310 } 311 dumpsize += PAGE_SIZE; 312 313 /* Determine dump offset on device. */ 314 if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { 315 error = E2BIG; 316 goto fail; 317 } 318 dumplo = di->mediaoffset + di->mediasize - dumpsize; 319 dumplo -= sizeof(kdh) * 2; 320 progress = dumpsize; 321 322 /* Initialize mdhdr */ 323 bzero(&mdhdr, sizeof(mdhdr)); 324 strcpy(mdhdr.magic, MINIDUMP_MAGIC); 325 mdhdr.version = MINIDUMP_VERSION; 326 mdhdr.msgbufsize = msgbufp->msg_size; 327 mdhdr.bitmapsize = vm_page_dump_size; 328 mdhdr.pmapsize = pmapsize; 329 mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS; 330 mdhdr.dmapbase = DMAP_MIN_ADDRESS; 331 mdhdr.dmapend = DMAP_MAX_ADDRESS; 332 333 mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize); 334 335 printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, 336 ptoa((uintmax_t)physmem) / 1048576); 337 338 /* Dump leader */ 339 error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 340 if (error) 341 goto fail; 342 dumplo += sizeof(kdh); 343 344 /* Dump my header */ 345 bzero(&fakepd, sizeof(fakepd)); 346 bcopy(&mdhdr, &fakepd, sizeof(mdhdr)); 347 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 348 if (error) 349 goto fail; 350 351 /* Dump msgbuf up front */ 352 error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); 353 if (error) 354 goto fail; 355 356 /* Dump bitmap */ 357 error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); 358 if (error) 359 goto fail; 360 361 /* Dump kernel page directory pages */ 362 bzero(fakepd, sizeof(fakepd)); 363 pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys); 364 for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR, 365 kernel_vm_end); va += NBPDP) { 366 i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); 367 368 /* We always write a page, even if it is zero */ 369 if ((pdp[i] & PG_V) == 0) { 370 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 371 if (error) 372 goto fail; 373 /* flush, in case we reuse fakepd in the same block */ 374 error = blk_flush(di); 375 if (error) 376 goto fail; 377 continue; 378 } 379 380 /* 1GB page is represented as 512 2MB pages in a dump */ 381 if ((pdp[i] & PG_PS) != 0) { 382 /* PDPE and PDP have identical layout in this case */ 383 fakepd[0] = pdp[i]; 384 for (j = 1; j < NPDEPG; j++) 385 fakepd[j] = fakepd[j - 1] + NBPDR; 386 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 387 if (error) 388 goto fail; 389 /* flush, in case we reuse fakepd in the same block */ 390 error = blk_flush(di); 391 if (error) 392 goto fail; 393 bzero(fakepd, sizeof(fakepd)); 394 continue; 395 } 396 397 pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); 398 error = blk_write(di, (char *)pd, 0, PAGE_SIZE); 399 if (error) 400 goto fail; 401 error = blk_flush(di); 402 if (error) 403 goto fail; 404 } 405 406 /* Dump memory chunks */ 407 /* XXX cluster it up and use blk_dump() */ 408 for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 409 bits = vm_page_dump[i]; 410 while (bits) { 411 bit = bsfq(bits); 412 pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; 413 error = blk_write(di, 0, pa, PAGE_SIZE); 414 if (error) 415 goto fail; 416 bits &= ~(1ul << bit); 417 } 418 } 419 420 error = blk_flush(di); 421 if (error) 422 goto fail; 423 424 /* Dump trailer */ 425 error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 426 if (error) 427 goto fail; 428 dumplo += sizeof(kdh); 429 430 /* Signal completion, signoff and exit stage left. */ 431 dump_write(di, NULL, 0, 0, 0); 432 printf("\nDump complete\n"); 433 return; 434 435 fail: 436 if (error < 0) 437 error = -error; 438 439 printf("\n"); 440 if (error == ENOSPC) { 441 printf("Dump map grown while dumping. "); 442 if (retry_count < 5) { 443 printf("Retrying...\n"); 444 goto retry; 445 } 446 printf("Dump failed.\n"); 447 } 448 else if (error == ECANCELED) 449 printf("Dump aborted\n"); 450 else if (error == E2BIG) 451 printf("Dump failed. Partition too small.\n"); 452 else 453 printf("** DUMP FAILED (ERROR %d) **\n", error); 454} 455 456void 457dump_add_page(vm_paddr_t pa) 458{ 459 int idx, bit; 460 461 pa >>= PAGE_SHIFT; 462 idx = pa >> 6; /* 2^6 = 64 */ 463 bit = pa & 63; 464 atomic_set_long(&vm_page_dump[idx], 1ul << bit); 465} 466 467void 468dump_drop_page(vm_paddr_t pa) 469{ 470 int idx, bit; 471 472 pa >>= PAGE_SHIFT; 473 idx = pa >> 6; /* 2^6 = 64 */ 474 bit = pa & 63; 475 atomic_clear_long(&vm_page_dump[idx], 1ul << bit); 476} 477