minidump_machdep.c revision 221069
1/*- 2 * Copyright (c) 2006 Peter Wemm 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/amd64/amd64/minidump_machdep.c 221069 2011-04-26 16:14:55Z sobomax $"); 29 30#include <sys/param.h> 31#include <sys/systm.h> 32#include <sys/conf.h> 33#include <sys/cons.h> 34#include <sys/kernel.h> 35#include <sys/kerneldump.h> 36#include <sys/msgbuf.h> 37#include <vm/vm.h> 38#include <vm/pmap.h> 39#include <machine/atomic.h> 40#include <machine/elf.h> 41#include <machine/md_var.h> 42#include <machine/vmparam.h> 43#include <machine/minidump.h> 44 45CTASSERT(sizeof(struct kerneldumpheader) == 512); 46 47/* 48 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This 49 * is to protect us from metadata and to protect metadata from us. 50 */ 51#define SIZEOF_METADATA (64*1024) 52 53#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) 54#define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1)) 55 56uint64_t *vm_page_dump; 57int vm_page_dump_size; 58 59static struct kerneldumpheader kdh; 60static off_t dumplo; 61 62/* Handle chunked writes. */ 63static size_t fragsz; 64static void *dump_va; 65static size_t counter, progress, dumpsize; 66 67CTASSERT(sizeof(*vm_page_dump) == 8); 68 69static int 70is_dumpable(vm_paddr_t pa) 71{ 72 int i; 73 74 for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { 75 if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) 76 return (1); 77 } 78 return (0); 79} 80 81#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) 82 83static int 84blk_flush(struct dumperinfo *di) 85{ 86 int error; 87 88 if (fragsz == 0) 89 return (0); 90 91 error = dump_write(di, dump_va, 0, dumplo, fragsz); 92 dumplo += fragsz; 93 fragsz = 0; 94 return (error); 95} 96 97static struct { 98 int min_per; 99 int max_per; 100 int visited; 101} progress_track[10] = { 102 { 0, 10, 0}, 103 { 10, 20, 0}, 104 { 20, 30, 0}, 105 { 30, 40, 0}, 106 { 40, 50, 0}, 107 { 50, 60, 0}, 108 { 60, 70, 0}, 109 { 70, 80, 0}, 110 { 80, 90, 0}, 111 { 90, 100, 0} 112}; 113 114static void 115report_progress(size_t progress, size_t dumpsize) 116{ 117 int sofar, i; 118 119 sofar = 100 - ((progress * 100) / dumpsize); 120 for (i = 0; i < 10; i++) { 121 if (sofar < progress_track[i].min_per || sofar > progress_track[i].max_per) 122 continue; 123 if (progress_track[i].visited) 124 return; 125 progress_track[i].visited = 1; 126 printf("..%d%%", sofar); 127 return; 128 } 129} 130 131static int 132blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) 133{ 134 size_t len; 135 int error, i, c; 136 u_int maxdumpsz; 137 138 maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); 139 if (maxdumpsz == 0) /* seatbelt */ 140 maxdumpsz = PAGE_SIZE; 141 error = 0; 142 if ((sz % PAGE_SIZE) != 0) { 143 printf("size not page aligned\n"); 144 return (EINVAL); 145 } 146 if (ptr != NULL && pa != 0) { 147 printf("cant have both va and pa!\n"); 148 return (EINVAL); 149 } 150 if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) { 151 printf("address not page aligned\n"); 152 return (EINVAL); 153 } 154 if (ptr != NULL) { 155 /* If we're doing a virtual dump, flush any pre-existing pa pages */ 156 error = blk_flush(di); 157 if (error) 158 return (error); 159 } 160 while (sz) { 161 len = maxdumpsz - fragsz; 162 if (len > sz) 163 len = sz; 164 counter += len; 165 progress -= len; 166 if (counter >> 24) { 167 report_progress(progress, dumpsize); 168 counter &= (1<<24) - 1; 169 } 170 if (ptr) { 171 error = dump_write(di, ptr, 0, dumplo, len); 172 if (error) 173 return (error); 174 dumplo += len; 175 ptr += len; 176 sz -= len; 177 } else { 178 for (i = 0; i < len; i += PAGE_SIZE) 179 dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); 180 fragsz += len; 181 pa += len; 182 sz -= len; 183 if (fragsz == maxdumpsz) { 184 error = blk_flush(di); 185 if (error) 186 return (error); 187 } 188 } 189 190 /* Check for user abort. */ 191 c = cncheckc(); 192 if (c == 0x03) 193 return (ECANCELED); 194 if (c != -1) 195 printf(" (CTRL-C to abort) "); 196 } 197 198 return (0); 199} 200 201/* A fake page table page, to avoid having to handle both 4K and 2M pages */ 202static pd_entry_t fakepd[NPDEPG]; 203 204void 205minidumpsys(struct dumperinfo *di) 206{ 207 uint32_t pmapsize; 208 vm_offset_t va; 209 int error; 210 uint64_t bits; 211 uint64_t *pdp, *pd, *pt, pa; 212 int i, j, k, n, bit; 213 int retry_count; 214 struct minidumphdr mdhdr; 215 216 retry_count = 0; 217 retry: 218 retry_count++; 219 counter = 0; 220 /* Walk page table pages, set bits in vm_page_dump */ 221 pmapsize = 0; 222 pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys); 223 for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR, 224 kernel_vm_end); ) { 225 /* 226 * We always write a page, even if it is zero. Each 227 * page written corresponds to 1GB of space 228 */ 229 pmapsize += PAGE_SIZE; 230 i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); 231 if ((pdp[i] & PG_V) == 0) { 232 va += NBPDP; 233 continue; 234 } 235 236 /* 237 * 1GB page is represented as 512 2MB pages in a dump. 238 */ 239 if ((pdp[i] & PG_PS) != 0) { 240 va += NBPDP; 241 pa = pdp[i] & PG_PS_FRAME; 242 for (n = 0; n < NPDEPG * NPTEPG; n++) { 243 if (is_dumpable(pa)) 244 dump_add_page(pa); 245 pa += PAGE_SIZE; 246 } 247 continue; 248 } 249 250 pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); 251 for (n = 0; n < NPDEPG; n++, va += NBPDR) { 252 j = (va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1); 253 254 if ((pd[j] & PG_V) == 0) 255 continue; 256 257 if ((pd[j] & PG_PS) != 0) { 258 /* This is an entire 2M page. */ 259 pa = pd[j] & PG_PS_FRAME; 260 for (k = 0; k < NPTEPG; k++) { 261 if (is_dumpable(pa)) 262 dump_add_page(pa); 263 pa += PAGE_SIZE; 264 } 265 continue; 266 } 267 268 pa = pd[j] & PG_FRAME; 269 /* set bit for this PTE page */ 270 if (is_dumpable(pa)) 271 dump_add_page(pa); 272 /* and for each valid page in this 2MB block */ 273 pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); 274 for (k = 0; k < NPTEPG; k++) { 275 if ((pt[k] & PG_V) == 0) 276 continue; 277 pa = pt[k] & PG_FRAME; 278 if (is_dumpable(pa)) 279 dump_add_page(pa); 280 } 281 } 282 } 283 284 /* Calculate dump size. */ 285 dumpsize = pmapsize; 286 dumpsize += round_page(msgbufp->msg_size); 287 dumpsize += round_page(vm_page_dump_size); 288 for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 289 bits = vm_page_dump[i]; 290 while (bits) { 291 bit = bsfq(bits); 292 pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; 293 /* Clear out undumpable pages now if needed */ 294 if (is_dumpable(pa)) { 295 dumpsize += PAGE_SIZE; 296 } else { 297 dump_drop_page(pa); 298 } 299 bits &= ~(1ul << bit); 300 } 301 } 302 dumpsize += PAGE_SIZE; 303 304 /* Determine dump offset on device. */ 305 if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { 306 error = E2BIG; 307 goto fail; 308 } 309 dumplo = di->mediaoffset + di->mediasize - dumpsize; 310 dumplo -= sizeof(kdh) * 2; 311 progress = dumpsize; 312 313 /* Initialize mdhdr */ 314 bzero(&mdhdr, sizeof(mdhdr)); 315 strcpy(mdhdr.magic, MINIDUMP_MAGIC); 316 mdhdr.version = MINIDUMP_VERSION; 317 mdhdr.msgbufsize = msgbufp->msg_size; 318 mdhdr.bitmapsize = vm_page_dump_size; 319 mdhdr.pmapsize = pmapsize; 320 mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS; 321 mdhdr.dmapbase = DMAP_MIN_ADDRESS; 322 mdhdr.dmapend = DMAP_MAX_ADDRESS; 323 324 mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize); 325 326 printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, 327 ptoa((uintmax_t)physmem) / 1048576); 328 329 /* Dump leader */ 330 error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 331 if (error) 332 goto fail; 333 dumplo += sizeof(kdh); 334 335 /* Dump my header */ 336 bzero(&fakepd, sizeof(fakepd)); 337 bcopy(&mdhdr, &fakepd, sizeof(mdhdr)); 338 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 339 if (error) 340 goto fail; 341 342 /* Dump msgbuf up front */ 343 error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); 344 if (error) 345 goto fail; 346 347 /* Dump bitmap */ 348 error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); 349 if (error) 350 goto fail; 351 352 /* Dump kernel page directory pages */ 353 bzero(fakepd, sizeof(fakepd)); 354 pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys); 355 for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR, 356 kernel_vm_end); va += NBPDP) { 357 i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); 358 359 /* We always write a page, even if it is zero */ 360 if ((pdp[i] & PG_V) == 0) { 361 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 362 if (error) 363 goto fail; 364 /* flush, in case we reuse fakepd in the same block */ 365 error = blk_flush(di); 366 if (error) 367 goto fail; 368 continue; 369 } 370 371 /* 1GB page is represented as 512 2MB pages in a dump */ 372 if ((pdp[i] & PG_PS) != 0) { 373 /* PDPE and PDP have identical layout in this case */ 374 fakepd[0] = pdp[i]; 375 for (j = 1; j < NPDEPG; j++) 376 fakepd[j] = fakepd[j - 1] + NBPDR; 377 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 378 if (error) 379 goto fail; 380 /* flush, in case we reuse fakepd in the same block */ 381 error = blk_flush(di); 382 if (error) 383 goto fail; 384 bzero(fakepd, sizeof(fakepd)); 385 continue; 386 } 387 388 pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); 389 error = blk_write(di, (char *)pd, 0, PAGE_SIZE); 390 if (error) 391 goto fail; 392 error = blk_flush(di); 393 if (error) 394 goto fail; 395 } 396 397 /* Dump memory chunks */ 398 /* XXX cluster it up and use blk_dump() */ 399 for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 400 bits = vm_page_dump[i]; 401 while (bits) { 402 bit = bsfq(bits); 403 pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; 404 error = blk_write(di, 0, pa, PAGE_SIZE); 405 if (error) 406 goto fail; 407 bits &= ~(1ul << bit); 408 } 409 } 410 411 error = blk_flush(di); 412 if (error) 413 goto fail; 414 415 /* Dump trailer */ 416 error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 417 if (error) 418 goto fail; 419 dumplo += sizeof(kdh); 420 421 /* Signal completion, signoff and exit stage left. */ 422 dump_write(di, NULL, 0, 0, 0); 423 printf("\nDump complete\n"); 424 return; 425 426 fail: 427 if (error < 0) 428 error = -error; 429 430 printf("\n"); 431 if (error == ENOSPC) { 432 printf("Dump map grown while dumping. "); 433 if (retry_count < 5) { 434 printf("Retrying...\n"); 435 goto retry; 436 } 437 printf("Dump failed.\n"); 438 } 439 else if (error == ECANCELED) 440 printf("Dump aborted\n"); 441 else if (error == E2BIG) 442 printf("Dump failed. Partition too small.\n"); 443 else 444 printf("** DUMP FAILED (ERROR %d) **\n", error); 445} 446 447void 448dump_add_page(vm_paddr_t pa) 449{ 450 int idx, bit; 451 452 pa >>= PAGE_SHIFT; 453 idx = pa >> 6; /* 2^6 = 64 */ 454 bit = pa & 63; 455 atomic_set_long(&vm_page_dump[idx], 1ul << bit); 456} 457 458void 459dump_drop_page(vm_paddr_t pa) 460{ 461 int idx, bit; 462 463 pa >>= PAGE_SHIFT; 464 idx = pa >> 6; /* 2^6 = 64 */ 465 bit = pa & 63; 466 atomic_clear_long(&vm_page_dump[idx], 1ul << bit); 467} 468