1184728Sraj/*-
2184728Sraj * Copyright (c) 2008 Semihalf, Grzegorz Bernacki
3184728Sraj * All rights reserved.
4184728Sraj *
5184728Sraj * Redistribution and use in source and binary forms, with or without
6184728Sraj * modification, are permitted provided that the following conditions
7184728Sraj * are met:
8184728Sraj *
9184728Sraj * 1. Redistributions of source code must retain the above copyright
10184728Sraj *    notice, this list of conditions and the following disclaimer.
11184728Sraj * 2. Redistributions in binary form must reproduce the above copyright
12184728Sraj *    notice, this list of conditions and the following disclaimer in the
13184728Sraj *    documentation and/or other materials provided with the distribution.
14184728Sraj *
15184728Sraj * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16184728Sraj * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17184728Sraj * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18184728Sraj * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19184728Sraj * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20184728Sraj * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21184728Sraj * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22184728Sraj * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23184728Sraj * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24184728Sraj * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25184728Sraj *
26184728Sraj * from: FreeBSD: src/sys/i386/i386/minidump_machdep.c,v 1.6 2008/08/17 23:27:27
27184728Sraj */
28184728Sraj
29184728Sraj#include <sys/cdefs.h>
30184728Sraj__FBSDID("$FreeBSD$");
31184728Sraj
32221173Sattilio#include "opt_watchdog.h"
33221173Sattilio
34184728Sraj#include <sys/param.h>
35184728Sraj#include <sys/systm.h>
36184728Sraj#include <sys/conf.h>
37184728Sraj#include <sys/cons.h>
38184728Sraj#include <sys/kernel.h>
39184728Sraj#include <sys/kerneldump.h>
40184728Sraj#include <sys/msgbuf.h>
41221173Sattilio#ifdef SW_WATCHDOG
42221173Sattilio#include <sys/watchdog.h>
43221173Sattilio#endif
44184728Sraj#include <vm/vm.h>
45184728Sraj#include <vm/pmap.h>
46184728Sraj#include <machine/atomic.h>
47184728Sraj#include <machine/elf.h>
48184728Sraj#include <machine/md_var.h>
49184728Sraj#include <machine/vmparam.h>
50184728Sraj#include <machine/minidump.h>
51184728Sraj#include <machine/cpufunc.h>
52184728Sraj
53184728SrajCTASSERT(sizeof(struct kerneldumpheader) == 512);
54184728Sraj
55184728Sraj/*
56184728Sraj * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
57184728Sraj * is to protect us from metadata and to protect metadata from us.
58184728Sraj */
59184728Sraj#define	SIZEOF_METADATA		(64*1024)
60184728Sraj
61184728Srajuint32_t *vm_page_dump;
62184728Srajint vm_page_dump_size;
63184728Sraj
64184728Srajstatic struct kerneldumpheader kdh;
65184728Srajstatic off_t dumplo;
66184728Sraj
67184728Sraj/* Handle chunked writes. */
68184728Srajstatic size_t fragsz, offset;
69184728Srajstatic void *dump_va;
70184728Srajstatic uint64_t counter, progress;
71184728Sraj
72184728SrajCTASSERT(sizeof(*vm_page_dump) == 4);
73184728Sraj
74184728Srajstatic int
75184728Srajis_dumpable(vm_paddr_t pa)
76184728Sraj{
77184728Sraj	int i;
78184728Sraj
79184728Sraj	for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
80184728Sraj		if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
81184728Sraj			return (1);
82184728Sraj	}
83184728Sraj	return (0);
84184728Sraj}
85184728Sraj
86184728Sraj#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
87184728Sraj
88184728Srajstatic int
89184728Srajblk_flush(struct dumperinfo *di)
90184728Sraj{
91184728Sraj	int error;
92184728Sraj
93184728Sraj	if (fragsz == 0)
94184728Sraj		return (0);
95184728Sraj
96184728Sraj	error = dump_write(di, (char*)dump_va + offset, 0, dumplo, fragsz - offset);
97184728Sraj	dumplo += (fragsz - offset);
98184728Sraj	fragsz = 0;
99184728Sraj	offset = 0;
100184728Sraj	return (error);
101184728Sraj}
102184728Sraj
103184728Srajstatic int
104184728Srajblk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
105184728Sraj{
106184728Sraj	size_t len;
107184728Sraj	int error, i, c;
108184728Sraj	u_int maxdumpsz;
109184728Sraj
110184728Sraj	maxdumpsz = di->maxiosize;
111184728Sraj
112184728Sraj	if (maxdumpsz == 0)	/* seatbelt */
113184728Sraj		maxdumpsz = PAGE_SIZE;
114184728Sraj
115184728Sraj	error = 0;
116184728Sraj
117184728Sraj	if (ptr != NULL && pa != 0) {
118184728Sraj		printf("cant have both va and pa!\n");
119184728Sraj		return (EINVAL);
120184728Sraj	}
121184728Sraj
122184728Sraj	if (ptr != NULL) {
123184728Sraj		/* If we're doing a virtual dump, flush any pre-existing pa pages */
124184728Sraj		error = blk_flush(di);
125184728Sraj		if (error)
126184728Sraj			return (error);
127184728Sraj	}
128184728Sraj
129184728Sraj	while (sz) {
130184728Sraj		if (fragsz == 0) {
131184728Sraj			offset = pa & PAGE_MASK;
132184728Sraj			fragsz += offset;
133184728Sraj		}
134184728Sraj		len = maxdumpsz - fragsz;
135184728Sraj		if (len > sz)
136184728Sraj			len = sz;
137184728Sraj		counter += len;
138184728Sraj		progress -= len;
139184728Sraj
140184728Sraj		if (counter >> 22) {
141184728Sraj			printf(" %lld", PG2MB(progress >> PAGE_SHIFT));
142184728Sraj			counter &= (1<<22) - 1;
143184728Sraj		}
144184728Sraj
145221173Sattilio#ifdef SW_WATCHDOG
146221173Sattilio		wdog_kern_pat(WD_LASTVAL);
147221173Sattilio#endif
148184728Sraj		if (ptr) {
149184728Sraj			error = dump_write(di, ptr, 0, dumplo, len);
150184728Sraj			if (error)
151184728Sraj				return (error);
152184728Sraj			dumplo += len;
153184728Sraj			ptr += len;
154184728Sraj			sz -= len;
155184728Sraj		} else {
156184728Sraj			for (i = 0; i < len; i += PAGE_SIZE)
157278614Sian				dump_va = pmap_kenter_temporary(pa + i,
158184728Sraj				    (i + fragsz) >> PAGE_SHIFT);
159184728Sraj			fragsz += len;
160184728Sraj			pa += len;
161184728Sraj			sz -= len;
162184728Sraj			if (fragsz == maxdumpsz) {
163184728Sraj				error = blk_flush(di);
164184728Sraj				if (error)
165184728Sraj					return (error);
166184728Sraj			}
167184728Sraj		}
168184728Sraj
169184728Sraj		/* Check for user abort. */
170184728Sraj		c = cncheckc();
171184728Sraj		if (c == 0x03)
172184728Sraj			return (ECANCELED);
173184728Sraj		if (c != -1)
174184728Sraj			printf(" (CTRL-C to abort) ");
175184728Sraj	}
176184728Sraj
177184728Sraj	return (0);
178184728Sraj}
179184728Sraj
180184728Srajstatic int
181184728Srajblk_write_cont(struct dumperinfo *di, vm_paddr_t pa, size_t sz)
182184728Sraj{
183184728Sraj	int error;
184184728Sraj
185184728Sraj	error = blk_write(di, 0, pa, sz);
186184728Sraj	if (error)
187184728Sraj		return (error);
188184728Sraj
189184728Sraj	error = blk_flush(di);
190184728Sraj	if (error)
191184728Sraj		return (error);
192184728Sraj
193184728Sraj	return (0);
194184728Sraj}
195184728Sraj
196184728Sraj/* A fake page table page, to avoid having to handle both 4K and 2M pages */
197184728Srajstatic pt_entry_t fakept[NPTEPG];
198184728Sraj
199184728Srajvoid
200184728Srajminidumpsys(struct dumperinfo *di)
201184728Sraj{
202184728Sraj	struct minidumphdr mdhdr;
203184728Sraj	uint64_t dumpsize;
204184728Sraj	uint32_t ptesize;
205184728Sraj	uint32_t bits;
206184728Sraj	uint32_t pa, prev_pa = 0, count = 0;
207184728Sraj	vm_offset_t va;
208184728Sraj	pd_entry_t *pdp;
209184728Sraj	pt_entry_t *pt, *ptp;
210184728Sraj	int i, k, bit, error;
211184728Sraj	char *addr;
212184728Sraj
213266374Sian	/*
214266374Sian	 * Flush caches.  Note that in the SMP case this operates only on the
215266374Sian	 * current CPU's L1 cache.  Before we reach this point, code in either
216266374Sian	 * the system shutdown or kernel debugger has called stop_cpus() to stop
217266374Sian	 * all cores other than this one.  Part of the ARM handling of
218266374Sian	 * stop_cpus() is to call wbinv_all() on that core's local L1 cache.  So
219266374Sian	 * by time we get to here, all that remains is to flush the L1 for the
220266374Sian	 * current CPU, then the L2.
221266374Sian	 */
222184728Sraj	cpu_idcache_wbinv_all();
223184728Sraj	cpu_l2cache_wbinv_all();
224184728Sraj
225184728Sraj	counter = 0;
226184728Sraj	/* Walk page table pages, set bits in vm_page_dump */
227184728Sraj	ptesize = 0;
228184728Sraj	for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
229184728Sraj		/*
230184728Sraj		 * We always write a page, even if it is zero. Each
231184728Sraj		 * page written corresponds to 2MB of space
232184728Sraj		 */
233184728Sraj		ptesize += L2_TABLE_SIZE_REAL;
234184728Sraj		pmap_get_pde_pte(pmap_kernel(), va, &pdp, &ptp);
235184728Sraj		if (pmap_pde_v(pdp) && pmap_pde_section(pdp)) {
236184728Sraj			/* This is a section mapping 1M page. */
237184728Sraj			pa = (*pdp & L1_S_ADDR_MASK) | (va & ~L1_S_ADDR_MASK);
238184728Sraj			for (k = 0; k < (L1_S_SIZE / PAGE_SIZE); k++) {
239184728Sraj				if (is_dumpable(pa))
240184728Sraj					dump_add_page(pa);
241184728Sraj				pa += PAGE_SIZE;
242184728Sraj			}
243184728Sraj			continue;
244184728Sraj		}
245184728Sraj		if (pmap_pde_v(pdp) && pmap_pde_page(pdp)) {
246184728Sraj			/* Set bit for each valid page in this 1MB block */
247278614Sian			addr = pmap_kenter_temporary(*pdp & L1_C_ADDR_MASK, 0);
248184728Sraj			pt = (pt_entry_t*)(addr +
249184728Sraj			    (((uint32_t)*pdp  & L1_C_ADDR_MASK) & PAGE_MASK));
250184728Sraj			for (k = 0; k < 256; k++) {
251184728Sraj				if ((pt[k] & L2_TYPE_MASK) == L2_TYPE_L) {
252184728Sraj					pa = (pt[k] & L2_L_FRAME) |
253184728Sraj					    (va & L2_L_OFFSET);
254184728Sraj					for (i = 0; i < 16; i++) {
255184728Sraj						if (is_dumpable(pa))
256184728Sraj							dump_add_page(pa);
257184728Sraj						k++;
258184728Sraj						pa += PAGE_SIZE;
259184728Sraj					}
260184728Sraj				} else if ((pt[k] & L2_TYPE_MASK) == L2_TYPE_S) {
261184728Sraj					pa = (pt[k] & L2_S_FRAME) |
262184728Sraj					    (va & L2_S_OFFSET);
263184728Sraj					if (is_dumpable(pa))
264184728Sraj						dump_add_page(pa);
265184728Sraj				}
266184728Sraj			}
267184728Sraj		} else {
268184728Sraj			/* Nothing, we're going to dump a null page */
269184728Sraj		}
270184728Sraj	}
271184728Sraj
272184728Sraj	/* Calculate dump size. */
273184728Sraj	dumpsize = ptesize;
274184728Sraj	dumpsize += round_page(msgbufp->msg_size);
275184728Sraj	dumpsize += round_page(vm_page_dump_size);
276184728Sraj
277184728Sraj	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
278184728Sraj		bits = vm_page_dump[i];
279184728Sraj		while (bits) {
280184728Sraj			bit = ffs(bits) - 1;
281184728Sraj			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) +
282184728Sraj			    bit) * PAGE_SIZE;
283184728Sraj			/* Clear out undumpable pages now if needed */
284184728Sraj			if (is_dumpable(pa))
285184728Sraj				dumpsize += PAGE_SIZE;
286184728Sraj			else
287184728Sraj				dump_drop_page(pa);
288184728Sraj			bits &= ~(1ul << bit);
289184728Sraj		}
290184728Sraj	}
291184728Sraj
292184728Sraj	dumpsize += PAGE_SIZE;
293184728Sraj
294184728Sraj	/* Determine dump offset on device. */
295184728Sraj	if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
296184728Sraj		error = ENOSPC;
297184728Sraj		goto fail;
298184728Sraj	}
299184728Sraj
300184728Sraj	dumplo = di->mediaoffset + di->mediasize - dumpsize;
301184728Sraj	dumplo -= sizeof(kdh) * 2;
302184728Sraj	progress = dumpsize;
303184728Sraj
304184728Sraj	/* Initialize mdhdr */
305184728Sraj	bzero(&mdhdr, sizeof(mdhdr));
306184728Sraj	strcpy(mdhdr.magic, MINIDUMP_MAGIC);
307184728Sraj	mdhdr.version = MINIDUMP_VERSION;
308184728Sraj	mdhdr.msgbufsize = msgbufp->msg_size;
309184728Sraj	mdhdr.bitmapsize = vm_page_dump_size;
310184728Sraj	mdhdr.ptesize = ptesize;
311184728Sraj	mdhdr.kernbase = KERNBASE;
312184728Sraj
313184728Sraj	mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_ARM_VERSION, dumpsize,
314184728Sraj	    di->blocksize);
315184728Sraj
316184728Sraj	printf("Physical memory: %u MB\n", ptoa((uintmax_t)physmem) / 1048576);
317184728Sraj	printf("Dumping %llu MB:", (long long)dumpsize >> 20);
318184728Sraj
319184728Sraj	/* Dump leader */
320184728Sraj	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
321184728Sraj	if (error)
322184728Sraj		goto fail;
323184728Sraj	dumplo += sizeof(kdh);
324184728Sraj
325184728Sraj	/* Dump my header */
326184728Sraj	bzero(&fakept, sizeof(fakept));
327184728Sraj	bcopy(&mdhdr, &fakept, sizeof(mdhdr));
328184728Sraj	error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
329184728Sraj	if (error)
330184728Sraj		goto fail;
331184728Sraj
332184728Sraj	/* Dump msgbuf up front */
333184728Sraj	error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
334184728Sraj	if (error)
335184728Sraj		goto fail;
336184728Sraj
337184728Sraj	/* Dump bitmap */
338184728Sraj	error = blk_write(di, (char *)vm_page_dump, 0,
339184728Sraj	    round_page(vm_page_dump_size));
340184728Sraj	if (error)
341184728Sraj		goto fail;
342184728Sraj
343184728Sraj	/* Dump kernel page table pages */
344184728Sraj	for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
345184728Sraj		/* We always write a page, even if it is zero */
346184728Sraj		pmap_get_pde_pte(pmap_kernel(), va, &pdp, &ptp);
347184728Sraj
348184728Sraj		if (pmap_pde_v(pdp) && pmap_pde_section(pdp))  {
349184728Sraj			if (count) {
350184728Sraj				error = blk_write_cont(di, prev_pa,
351184728Sraj				    count * L2_TABLE_SIZE_REAL);
352184728Sraj				if (error)
353184728Sraj					goto fail;
354184728Sraj				count = 0;
355184728Sraj				prev_pa = 0;
356184728Sraj			}
357184728Sraj			/* This is a single 2M block. Generate a fake PTP */
358184728Sraj			pa = (*pdp & L1_S_ADDR_MASK) | (va & ~L1_S_ADDR_MASK);
359184728Sraj			for (k = 0; k < (L1_S_SIZE / PAGE_SIZE); k++) {
360184728Sraj				fakept[k] = L2_S_PROTO | (pa + (k * PAGE_SIZE)) |
361184728Sraj				    L2_S_PROT(PTE_KERNEL,
362184728Sraj				    VM_PROT_READ | VM_PROT_WRITE);
363184728Sraj			}
364184728Sraj			error = blk_write(di, (char *)&fakept, 0,
365184728Sraj			    L2_TABLE_SIZE_REAL);
366184728Sraj			if (error)
367184728Sraj				goto fail;
368184728Sraj			/* Flush, in case we reuse fakept in the same block */
369184728Sraj			error = blk_flush(di);
370184728Sraj			if (error)
371184728Sraj				goto fail;
372184728Sraj			continue;
373184728Sraj		}
374184728Sraj		if (pmap_pde_v(pdp) && pmap_pde_page(pdp)) {
375184728Sraj			pa = *pdp & L1_C_ADDR_MASK;
376184728Sraj			if (!count) {
377184728Sraj				prev_pa = pa;
378184728Sraj				count++;
379184728Sraj			}
380184728Sraj			else {
381184728Sraj				if (pa == (prev_pa + count * L2_TABLE_SIZE_REAL))
382184728Sraj					count++;
383184728Sraj				else {
384184728Sraj					error = blk_write_cont(di, prev_pa,
385184728Sraj					    count * L2_TABLE_SIZE_REAL);
386184728Sraj					if (error)
387184728Sraj						goto fail;
388184728Sraj					count = 1;
389184728Sraj					prev_pa = pa;
390184728Sraj				}
391184728Sraj			}
392184728Sraj		} else {
393184728Sraj			if (count) {
394184728Sraj				error = blk_write_cont(di, prev_pa,
395184728Sraj				    count * L2_TABLE_SIZE_REAL);
396184728Sraj				if (error)
397184728Sraj					goto fail;
398184728Sraj				count = 0;
399184728Sraj				prev_pa = 0;
400184728Sraj			}
401184728Sraj			bzero(fakept, sizeof(fakept));
402184728Sraj			error = blk_write(di, (char *)&fakept, 0,
403184728Sraj			    L2_TABLE_SIZE_REAL);
404184728Sraj			if (error)
405184728Sraj				goto fail;
406184728Sraj			/* Flush, in case we reuse fakept in the same block */
407184728Sraj			error = blk_flush(di);
408184728Sraj			if (error)
409184728Sraj				goto fail;
410184728Sraj		}
411184728Sraj	}
412184728Sraj
413184728Sraj	if (count) {
414184728Sraj		error = blk_write_cont(di, prev_pa, count * L2_TABLE_SIZE_REAL);
415184728Sraj		if (error)
416184728Sraj			goto fail;
417184728Sraj		count = 0;
418184728Sraj		prev_pa = 0;
419184728Sraj	}
420184728Sraj
421184728Sraj	/* Dump memory chunks */
422184728Sraj	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
423184728Sraj		bits = vm_page_dump[i];
424184728Sraj		while (bits) {
425184728Sraj			bit = ffs(bits) - 1;
426184728Sraj			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) +
427184728Sraj			    bit) * PAGE_SIZE;
428184728Sraj			if (!count) {
429184728Sraj				prev_pa = pa;
430184728Sraj				count++;
431184728Sraj			} else {
432184728Sraj				if (pa == (prev_pa + count * PAGE_SIZE))
433184728Sraj					count++;
434184728Sraj				else {
435184728Sraj					error = blk_write_cont(di, prev_pa,
436184728Sraj					    count * PAGE_SIZE);
437184728Sraj					if (error)
438184728Sraj						goto fail;
439184728Sraj					count = 1;
440184728Sraj					prev_pa = pa;
441184728Sraj				}
442184728Sraj			}
443184728Sraj			bits &= ~(1ul << bit);
444184728Sraj		}
445184728Sraj	}
446184728Sraj	if (count) {
447184728Sraj		error = blk_write_cont(di, prev_pa, count * PAGE_SIZE);
448184728Sraj		if (error)
449184728Sraj			goto fail;
450184728Sraj		count = 0;
451184728Sraj		prev_pa = 0;
452184728Sraj	}
453184728Sraj
454184728Sraj	/* Dump trailer */
455184728Sraj	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
456184728Sraj	if (error)
457184728Sraj		goto fail;
458184728Sraj	dumplo += sizeof(kdh);
459184728Sraj
460184728Sraj	/* Signal completion, signoff and exit stage left. */
461184728Sraj	dump_write(di, NULL, 0, 0, 0);
462184728Sraj	printf("\nDump complete\n");
463184728Sraj	return;
464184728Sraj
465184728Srajfail:
466184728Sraj	if (error < 0)
467184728Sraj		error = -error;
468184728Sraj
469184728Sraj	if (error == ECANCELED)
470184728Sraj		printf("\nDump aborted\n");
471184728Sraj	else if (error == ENOSPC)
472184728Sraj		printf("\nDump failed. Partition too small.\n");
473184728Sraj	else
474184728Sraj		printf("\n** DUMP FAILED (ERROR %d) **\n", error);
475184728Sraj}
476184728Sraj
477184728Srajvoid
478184728Srajdump_add_page(vm_paddr_t pa)
479184728Sraj{
480184728Sraj	int idx, bit;
481184728Sraj
482184728Sraj	pa >>= PAGE_SHIFT;
483184728Sraj	idx = pa >> 5;		/* 2^5 = 32 */
484184728Sraj	bit = pa & 31;
485184728Sraj	atomic_set_int(&vm_page_dump[idx], 1ul << bit);
486184728Sraj}
487184728Sraj
488184728Srajvoid
489184728Srajdump_drop_page(vm_paddr_t pa)
490184728Sraj{
491184728Sraj	int idx, bit;
492184728Sraj
493184728Sraj	pa >>= PAGE_SHIFT;
494184728Sraj	idx = pa >> 5;		/* 2^5 = 32 */
495184728Sraj	bit = pa & 31;
496184728Sraj	atomic_clear_int(&vm_page_dump[idx], 1ul << bit);
497184728Sraj}
498