1184728Sraj/*- 2184728Sraj * Copyright (c) 2008 Semihalf, Grzegorz Bernacki 3184728Sraj * All rights reserved. 4184728Sraj * 5184728Sraj * Redistribution and use in source and binary forms, with or without 6184728Sraj * modification, are permitted provided that the following conditions 7184728Sraj * are met: 8184728Sraj * 9184728Sraj * 1. Redistributions of source code must retain the above copyright 10184728Sraj * notice, this list of conditions and the following disclaimer. 11184728Sraj * 2. Redistributions in binary form must reproduce the above copyright 12184728Sraj * notice, this list of conditions and the following disclaimer in the 13184728Sraj * documentation and/or other materials provided with the distribution. 14184728Sraj * 15184728Sraj * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16184728Sraj * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17184728Sraj * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18184728Sraj * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19184728Sraj * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20184728Sraj * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21184728Sraj * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22184728Sraj * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23184728Sraj * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24184728Sraj * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25184728Sraj * 26184728Sraj * from: FreeBSD: src/sys/i386/i386/minidump_machdep.c,v 1.6 2008/08/17 23:27:27 27184728Sraj */ 28184728Sraj 29184728Sraj#include <sys/cdefs.h> 30184728Sraj__FBSDID("$FreeBSD$"); 31184728Sraj 32221173Sattilio#include "opt_watchdog.h" 33221173Sattilio 34184728Sraj#include <sys/param.h> 35184728Sraj#include <sys/systm.h> 36184728Sraj#include <sys/conf.h> 37184728Sraj#include <sys/cons.h> 38184728Sraj#include <sys/kernel.h> 39184728Sraj#include <sys/kerneldump.h> 40184728Sraj#include <sys/msgbuf.h> 41221173Sattilio#ifdef SW_WATCHDOG 42221173Sattilio#include <sys/watchdog.h> 43221173Sattilio#endif 44184728Sraj#include <vm/vm.h> 45184728Sraj#include <vm/pmap.h> 46184728Sraj#include <machine/atomic.h> 47184728Sraj#include <machine/elf.h> 48184728Sraj#include <machine/md_var.h> 49184728Sraj#include <machine/vmparam.h> 50184728Sraj#include <machine/minidump.h> 51184728Sraj#include <machine/cpufunc.h> 52184728Sraj 53184728SrajCTASSERT(sizeof(struct kerneldumpheader) == 512); 54184728Sraj 55184728Sraj/* 56184728Sraj * Don't touch the first SIZEOF_METADATA bytes on the dump device. This 57184728Sraj * is to protect us from metadata and to protect metadata from us. 58184728Sraj */ 59184728Sraj#define SIZEOF_METADATA (64*1024) 60184728Sraj 61184728Srajuint32_t *vm_page_dump; 62184728Srajint vm_page_dump_size; 63184728Sraj 64184728Srajstatic struct kerneldumpheader kdh; 65184728Srajstatic off_t dumplo; 66184728Sraj 67184728Sraj/* Handle chunked writes. */ 68184728Srajstatic size_t fragsz, offset; 69184728Srajstatic void *dump_va; 70184728Srajstatic uint64_t counter, progress; 71184728Sraj 72184728SrajCTASSERT(sizeof(*vm_page_dump) == 4); 73184728Sraj 74184728Srajstatic int 75184728Srajis_dumpable(vm_paddr_t pa) 76184728Sraj{ 77184728Sraj int i; 78184728Sraj 79184728Sraj for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { 80184728Sraj if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) 81184728Sraj return (1); 82184728Sraj } 83184728Sraj return (0); 84184728Sraj} 85184728Sraj 86184728Sraj#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) 87184728Sraj 88184728Srajstatic int 89184728Srajblk_flush(struct dumperinfo *di) 90184728Sraj{ 91184728Sraj int error; 92184728Sraj 93184728Sraj if (fragsz == 0) 94184728Sraj return (0); 95184728Sraj 96184728Sraj error = dump_write(di, (char*)dump_va + offset, 0, dumplo, fragsz - offset); 97184728Sraj dumplo += (fragsz - offset); 98184728Sraj fragsz = 0; 99184728Sraj offset = 0; 100184728Sraj return (error); 101184728Sraj} 102184728Sraj 103184728Srajstatic int 104184728Srajblk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) 105184728Sraj{ 106184728Sraj size_t len; 107184728Sraj int error, i, c; 108184728Sraj u_int maxdumpsz; 109184728Sraj 110184728Sraj maxdumpsz = di->maxiosize; 111184728Sraj 112184728Sraj if (maxdumpsz == 0) /* seatbelt */ 113184728Sraj maxdumpsz = PAGE_SIZE; 114184728Sraj 115184728Sraj error = 0; 116184728Sraj 117184728Sraj if (ptr != NULL && pa != 0) { 118184728Sraj printf("cant have both va and pa!\n"); 119184728Sraj return (EINVAL); 120184728Sraj } 121184728Sraj 122184728Sraj if (ptr != NULL) { 123184728Sraj /* If we're doing a virtual dump, flush any pre-existing pa pages */ 124184728Sraj error = blk_flush(di); 125184728Sraj if (error) 126184728Sraj return (error); 127184728Sraj } 128184728Sraj 129184728Sraj while (sz) { 130184728Sraj if (fragsz == 0) { 131184728Sraj offset = pa & PAGE_MASK; 132184728Sraj fragsz += offset; 133184728Sraj } 134184728Sraj len = maxdumpsz - fragsz; 135184728Sraj if (len > sz) 136184728Sraj len = sz; 137184728Sraj counter += len; 138184728Sraj progress -= len; 139184728Sraj 140184728Sraj if (counter >> 22) { 141184728Sraj printf(" %lld", PG2MB(progress >> PAGE_SHIFT)); 142184728Sraj counter &= (1<<22) - 1; 143184728Sraj } 144184728Sraj 145221173Sattilio#ifdef SW_WATCHDOG 146221173Sattilio wdog_kern_pat(WD_LASTVAL); 147221173Sattilio#endif 148184728Sraj if (ptr) { 149184728Sraj error = dump_write(di, ptr, 0, dumplo, len); 150184728Sraj if (error) 151184728Sraj return (error); 152184728Sraj dumplo += len; 153184728Sraj ptr += len; 154184728Sraj sz -= len; 155184728Sraj } else { 156184728Sraj for (i = 0; i < len; i += PAGE_SIZE) 157278614Sian dump_va = pmap_kenter_temporary(pa + i, 158184728Sraj (i + fragsz) >> PAGE_SHIFT); 159184728Sraj fragsz += len; 160184728Sraj pa += len; 161184728Sraj sz -= len; 162184728Sraj if (fragsz == maxdumpsz) { 163184728Sraj error = blk_flush(di); 164184728Sraj if (error) 165184728Sraj return (error); 166184728Sraj } 167184728Sraj } 168184728Sraj 169184728Sraj /* Check for user abort. */ 170184728Sraj c = cncheckc(); 171184728Sraj if (c == 0x03) 172184728Sraj return (ECANCELED); 173184728Sraj if (c != -1) 174184728Sraj printf(" (CTRL-C to abort) "); 175184728Sraj } 176184728Sraj 177184728Sraj return (0); 178184728Sraj} 179184728Sraj 180184728Srajstatic int 181184728Srajblk_write_cont(struct dumperinfo *di, vm_paddr_t pa, size_t sz) 182184728Sraj{ 183184728Sraj int error; 184184728Sraj 185184728Sraj error = blk_write(di, 0, pa, sz); 186184728Sraj if (error) 187184728Sraj return (error); 188184728Sraj 189184728Sraj error = blk_flush(di); 190184728Sraj if (error) 191184728Sraj return (error); 192184728Sraj 193184728Sraj return (0); 194184728Sraj} 195184728Sraj 196184728Sraj/* A fake page table page, to avoid having to handle both 4K and 2M pages */ 197184728Srajstatic pt_entry_t fakept[NPTEPG]; 198184728Sraj 199184728Srajvoid 200184728Srajminidumpsys(struct dumperinfo *di) 201184728Sraj{ 202184728Sraj struct minidumphdr mdhdr; 203184728Sraj uint64_t dumpsize; 204184728Sraj uint32_t ptesize; 205184728Sraj uint32_t bits; 206184728Sraj uint32_t pa, prev_pa = 0, count = 0; 207184728Sraj vm_offset_t va; 208184728Sraj pd_entry_t *pdp; 209184728Sraj pt_entry_t *pt, *ptp; 210184728Sraj int i, k, bit, error; 211184728Sraj char *addr; 212184728Sraj 213266374Sian /* 214266374Sian * Flush caches. Note that in the SMP case this operates only on the 215266374Sian * current CPU's L1 cache. Before we reach this point, code in either 216266374Sian * the system shutdown or kernel debugger has called stop_cpus() to stop 217266374Sian * all cores other than this one. Part of the ARM handling of 218266374Sian * stop_cpus() is to call wbinv_all() on that core's local L1 cache. So 219266374Sian * by time we get to here, all that remains is to flush the L1 for the 220266374Sian * current CPU, then the L2. 221266374Sian */ 222184728Sraj cpu_idcache_wbinv_all(); 223184728Sraj cpu_l2cache_wbinv_all(); 224184728Sraj 225184728Sraj counter = 0; 226184728Sraj /* Walk page table pages, set bits in vm_page_dump */ 227184728Sraj ptesize = 0; 228184728Sraj for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { 229184728Sraj /* 230184728Sraj * We always write a page, even if it is zero. Each 231184728Sraj * page written corresponds to 2MB of space 232184728Sraj */ 233184728Sraj ptesize += L2_TABLE_SIZE_REAL; 234184728Sraj pmap_get_pde_pte(pmap_kernel(), va, &pdp, &ptp); 235184728Sraj if (pmap_pde_v(pdp) && pmap_pde_section(pdp)) { 236184728Sraj /* This is a section mapping 1M page. */ 237184728Sraj pa = (*pdp & L1_S_ADDR_MASK) | (va & ~L1_S_ADDR_MASK); 238184728Sraj for (k = 0; k < (L1_S_SIZE / PAGE_SIZE); k++) { 239184728Sraj if (is_dumpable(pa)) 240184728Sraj dump_add_page(pa); 241184728Sraj pa += PAGE_SIZE; 242184728Sraj } 243184728Sraj continue; 244184728Sraj } 245184728Sraj if (pmap_pde_v(pdp) && pmap_pde_page(pdp)) { 246184728Sraj /* Set bit for each valid page in this 1MB block */ 247278614Sian addr = pmap_kenter_temporary(*pdp & L1_C_ADDR_MASK, 0); 248184728Sraj pt = (pt_entry_t*)(addr + 249184728Sraj (((uint32_t)*pdp & L1_C_ADDR_MASK) & PAGE_MASK)); 250184728Sraj for (k = 0; k < 256; k++) { 251184728Sraj if ((pt[k] & L2_TYPE_MASK) == L2_TYPE_L) { 252184728Sraj pa = (pt[k] & L2_L_FRAME) | 253184728Sraj (va & L2_L_OFFSET); 254184728Sraj for (i = 0; i < 16; i++) { 255184728Sraj if (is_dumpable(pa)) 256184728Sraj dump_add_page(pa); 257184728Sraj k++; 258184728Sraj pa += PAGE_SIZE; 259184728Sraj } 260184728Sraj } else if ((pt[k] & L2_TYPE_MASK) == L2_TYPE_S) { 261184728Sraj pa = (pt[k] & L2_S_FRAME) | 262184728Sraj (va & L2_S_OFFSET); 263184728Sraj if (is_dumpable(pa)) 264184728Sraj dump_add_page(pa); 265184728Sraj } 266184728Sraj } 267184728Sraj } else { 268184728Sraj /* Nothing, we're going to dump a null page */ 269184728Sraj } 270184728Sraj } 271184728Sraj 272184728Sraj /* Calculate dump size. */ 273184728Sraj dumpsize = ptesize; 274184728Sraj dumpsize += round_page(msgbufp->msg_size); 275184728Sraj dumpsize += round_page(vm_page_dump_size); 276184728Sraj 277184728Sraj for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 278184728Sraj bits = vm_page_dump[i]; 279184728Sraj while (bits) { 280184728Sraj bit = ffs(bits) - 1; 281184728Sraj pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + 282184728Sraj bit) * PAGE_SIZE; 283184728Sraj /* Clear out undumpable pages now if needed */ 284184728Sraj if (is_dumpable(pa)) 285184728Sraj dumpsize += PAGE_SIZE; 286184728Sraj else 287184728Sraj dump_drop_page(pa); 288184728Sraj bits &= ~(1ul << bit); 289184728Sraj } 290184728Sraj } 291184728Sraj 292184728Sraj dumpsize += PAGE_SIZE; 293184728Sraj 294184728Sraj /* Determine dump offset on device. */ 295184728Sraj if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { 296184728Sraj error = ENOSPC; 297184728Sraj goto fail; 298184728Sraj } 299184728Sraj 300184728Sraj dumplo = di->mediaoffset + di->mediasize - dumpsize; 301184728Sraj dumplo -= sizeof(kdh) * 2; 302184728Sraj progress = dumpsize; 303184728Sraj 304184728Sraj /* Initialize mdhdr */ 305184728Sraj bzero(&mdhdr, sizeof(mdhdr)); 306184728Sraj strcpy(mdhdr.magic, MINIDUMP_MAGIC); 307184728Sraj mdhdr.version = MINIDUMP_VERSION; 308184728Sraj mdhdr.msgbufsize = msgbufp->msg_size; 309184728Sraj mdhdr.bitmapsize = vm_page_dump_size; 310184728Sraj mdhdr.ptesize = ptesize; 311184728Sraj mdhdr.kernbase = KERNBASE; 312184728Sraj 313184728Sraj mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_ARM_VERSION, dumpsize, 314184728Sraj di->blocksize); 315184728Sraj 316184728Sraj printf("Physical memory: %u MB\n", ptoa((uintmax_t)physmem) / 1048576); 317184728Sraj printf("Dumping %llu MB:", (long long)dumpsize >> 20); 318184728Sraj 319184728Sraj /* Dump leader */ 320184728Sraj error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 321184728Sraj if (error) 322184728Sraj goto fail; 323184728Sraj dumplo += sizeof(kdh); 324184728Sraj 325184728Sraj /* Dump my header */ 326184728Sraj bzero(&fakept, sizeof(fakept)); 327184728Sraj bcopy(&mdhdr, &fakept, sizeof(mdhdr)); 328184728Sraj error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 329184728Sraj if (error) 330184728Sraj goto fail; 331184728Sraj 332184728Sraj /* Dump msgbuf up front */ 333184728Sraj error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); 334184728Sraj if (error) 335184728Sraj goto fail; 336184728Sraj 337184728Sraj /* Dump bitmap */ 338184728Sraj error = blk_write(di, (char *)vm_page_dump, 0, 339184728Sraj round_page(vm_page_dump_size)); 340184728Sraj if (error) 341184728Sraj goto fail; 342184728Sraj 343184728Sraj /* Dump kernel page table pages */ 344184728Sraj for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { 345184728Sraj /* We always write a page, even if it is zero */ 346184728Sraj pmap_get_pde_pte(pmap_kernel(), va, &pdp, &ptp); 347184728Sraj 348184728Sraj if (pmap_pde_v(pdp) && pmap_pde_section(pdp)) { 349184728Sraj if (count) { 350184728Sraj error = blk_write_cont(di, prev_pa, 351184728Sraj count * L2_TABLE_SIZE_REAL); 352184728Sraj if (error) 353184728Sraj goto fail; 354184728Sraj count = 0; 355184728Sraj prev_pa = 0; 356184728Sraj } 357184728Sraj /* This is a single 2M block. Generate a fake PTP */ 358184728Sraj pa = (*pdp & L1_S_ADDR_MASK) | (va & ~L1_S_ADDR_MASK); 359184728Sraj for (k = 0; k < (L1_S_SIZE / PAGE_SIZE); k++) { 360184728Sraj fakept[k] = L2_S_PROTO | (pa + (k * PAGE_SIZE)) | 361184728Sraj L2_S_PROT(PTE_KERNEL, 362184728Sraj VM_PROT_READ | VM_PROT_WRITE); 363184728Sraj } 364184728Sraj error = blk_write(di, (char *)&fakept, 0, 365184728Sraj L2_TABLE_SIZE_REAL); 366184728Sraj if (error) 367184728Sraj goto fail; 368184728Sraj /* Flush, in case we reuse fakept in the same block */ 369184728Sraj error = blk_flush(di); 370184728Sraj if (error) 371184728Sraj goto fail; 372184728Sraj continue; 373184728Sraj } 374184728Sraj if (pmap_pde_v(pdp) && pmap_pde_page(pdp)) { 375184728Sraj pa = *pdp & L1_C_ADDR_MASK; 376184728Sraj if (!count) { 377184728Sraj prev_pa = pa; 378184728Sraj count++; 379184728Sraj } 380184728Sraj else { 381184728Sraj if (pa == (prev_pa + count * L2_TABLE_SIZE_REAL)) 382184728Sraj count++; 383184728Sraj else { 384184728Sraj error = blk_write_cont(di, prev_pa, 385184728Sraj count * L2_TABLE_SIZE_REAL); 386184728Sraj if (error) 387184728Sraj goto fail; 388184728Sraj count = 1; 389184728Sraj prev_pa = pa; 390184728Sraj } 391184728Sraj } 392184728Sraj } else { 393184728Sraj if (count) { 394184728Sraj error = blk_write_cont(di, prev_pa, 395184728Sraj count * L2_TABLE_SIZE_REAL); 396184728Sraj if (error) 397184728Sraj goto fail; 398184728Sraj count = 0; 399184728Sraj prev_pa = 0; 400184728Sraj } 401184728Sraj bzero(fakept, sizeof(fakept)); 402184728Sraj error = blk_write(di, (char *)&fakept, 0, 403184728Sraj L2_TABLE_SIZE_REAL); 404184728Sraj if (error) 405184728Sraj goto fail; 406184728Sraj /* Flush, in case we reuse fakept in the same block */ 407184728Sraj error = blk_flush(di); 408184728Sraj if (error) 409184728Sraj goto fail; 410184728Sraj } 411184728Sraj } 412184728Sraj 413184728Sraj if (count) { 414184728Sraj error = blk_write_cont(di, prev_pa, count * L2_TABLE_SIZE_REAL); 415184728Sraj if (error) 416184728Sraj goto fail; 417184728Sraj count = 0; 418184728Sraj prev_pa = 0; 419184728Sraj } 420184728Sraj 421184728Sraj /* Dump memory chunks */ 422184728Sraj for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 423184728Sraj bits = vm_page_dump[i]; 424184728Sraj while (bits) { 425184728Sraj bit = ffs(bits) - 1; 426184728Sraj pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + 427184728Sraj bit) * PAGE_SIZE; 428184728Sraj if (!count) { 429184728Sraj prev_pa = pa; 430184728Sraj count++; 431184728Sraj } else { 432184728Sraj if (pa == (prev_pa + count * PAGE_SIZE)) 433184728Sraj count++; 434184728Sraj else { 435184728Sraj error = blk_write_cont(di, prev_pa, 436184728Sraj count * PAGE_SIZE); 437184728Sraj if (error) 438184728Sraj goto fail; 439184728Sraj count = 1; 440184728Sraj prev_pa = pa; 441184728Sraj } 442184728Sraj } 443184728Sraj bits &= ~(1ul << bit); 444184728Sraj } 445184728Sraj } 446184728Sraj if (count) { 447184728Sraj error = blk_write_cont(di, prev_pa, count * PAGE_SIZE); 448184728Sraj if (error) 449184728Sraj goto fail; 450184728Sraj count = 0; 451184728Sraj prev_pa = 0; 452184728Sraj } 453184728Sraj 454184728Sraj /* Dump trailer */ 455184728Sraj error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 456184728Sraj if (error) 457184728Sraj goto fail; 458184728Sraj dumplo += sizeof(kdh); 459184728Sraj 460184728Sraj /* Signal completion, signoff and exit stage left. */ 461184728Sraj dump_write(di, NULL, 0, 0, 0); 462184728Sraj printf("\nDump complete\n"); 463184728Sraj return; 464184728Sraj 465184728Srajfail: 466184728Sraj if (error < 0) 467184728Sraj error = -error; 468184728Sraj 469184728Sraj if (error == ECANCELED) 470184728Sraj printf("\nDump aborted\n"); 471184728Sraj else if (error == ENOSPC) 472184728Sraj printf("\nDump failed. Partition too small.\n"); 473184728Sraj else 474184728Sraj printf("\n** DUMP FAILED (ERROR %d) **\n", error); 475184728Sraj} 476184728Sraj 477184728Srajvoid 478184728Srajdump_add_page(vm_paddr_t pa) 479184728Sraj{ 480184728Sraj int idx, bit; 481184728Sraj 482184728Sraj pa >>= PAGE_SHIFT; 483184728Sraj idx = pa >> 5; /* 2^5 = 32 */ 484184728Sraj bit = pa & 31; 485184728Sraj atomic_set_int(&vm_page_dump[idx], 1ul << bit); 486184728Sraj} 487184728Sraj 488184728Srajvoid 489184728Srajdump_drop_page(vm_paddr_t pa) 490184728Sraj{ 491184728Sraj int idx, bit; 492184728Sraj 493184728Sraj pa >>= PAGE_SHIFT; 494184728Sraj idx = pa >> 5; /* 2^5 = 32 */ 495184728Sraj bit = pa & 31; 496184728Sraj atomic_clear_int(&vm_page_dump[idx], 1ul << bit); 497184728Sraj} 498