1/*	$OpenBSD: subr_hibernate.c,v 1.141 2024/06/05 11:04:17 krw Exp $	*/
2
3/*
4 * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5 * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20#include <sys/hibernate.h>
21#include <sys/malloc.h>
22#include <sys/param.h>
23#include <sys/tree.h>
24#include <sys/systm.h>
25#include <sys/disklabel.h>
26#include <sys/disk.h>
27#include <sys/conf.h>
28#include <sys/buf.h>
29#include <sys/fcntl.h>
30#include <sys/stat.h>
31#include <sys/atomic.h>
32
33#include <uvm/uvm.h>
34#include <uvm/uvm_swap.h>
35
36#include <machine/hibernate.h>
37
38/* Make sure the signature can fit in one block */
39CTASSERT((offsetof(union hibernate_info, sec_size) + sizeof(u_int32_t)) <= DEV_BSIZE);
40
41/*
42 * Hibernate piglet layout information
43 *
44 * The piglet is a scratch area of memory allocated by the suspending kernel.
45 * Its phys and virt addrs are recorded in the signature block. The piglet is
46 * used to guarantee an unused area of memory that can be used by the resuming
47 * kernel for various things. The piglet is excluded during unpack operations.
48 * The piglet size is presently 4*HIBERNATE_CHUNK_SIZE (typically 4*4MB).
49 *
50 * Offset from piglet_base	Purpose
51 * ----------------------------------------------------------------------------
52 * 0				Private page for suspend I/O write functions
53 * 1*PAGE_SIZE			I/O page used during hibernate suspend
54 * 2*PAGE_SIZE			I/O page used during hibernate suspend
55 * 3*PAGE_SIZE			copy page used during hibernate suspend
56 * 4*PAGE_SIZE			final chunk ordering list (24 pages)
57 * 28*PAGE_SIZE			RLE utility page
58 * 29*PAGE_SIZE			start of hiballoc area
59 * 30*PAGE_SIZE			preserved entropy
60 * 110*PAGE_SIZE		end of hiballoc area (80 pages)
61 * 366*PAGE_SIZE		end of retguard preservation region (256 pages)
62 * ...				unused
63 * HIBERNATE_CHUNK_SIZE		start of hibernate chunk table
64 * 2*HIBERNATE_CHUNK_SIZE	bounce area for chunks being unpacked
65 * 4*HIBERNATE_CHUNK_SIZE	end of piglet
66 */
67
68/* Temporary vaddr ranges used during hibernate */
69vaddr_t hibernate_temp_page;
70vaddr_t hibernate_copy_page;
71vaddr_t hibernate_rle_page;
72
73/* Hibernate info as read from disk during resume */
74union hibernate_info disk_hib;
75struct bdevsw *bdsw;
76
77/*
78 * Global copy of the pig start address. This needs to be a global as we
79 * switch stacks after computing it - it can't be stored on the stack.
80 */
81paddr_t global_pig_start;
82
83/*
84 * Global copies of the piglet start addresses (PA/VA). We store these
85 * as globals to avoid having to carry them around as parameters, as the
86 * piglet is allocated early and freed late - its lifecycle extends beyond
87 * that of the hibernate info union which is calculated on suspend/resume.
88 */
89vaddr_t global_piglet_va;
90paddr_t global_piglet_pa;
91
92/* #define HIB_DEBUG */
93#ifdef HIB_DEBUG
94int	hib_debug = 99;
95#define DPRINTF(x...)     do { if (hib_debug) printf(x); } while (0)
96#define DNPRINTF(n,x...)  do { if (hib_debug > (n)) printf(x); } while (0)
97#else
98#define DPRINTF(x...)
99#define DNPRINTF(n,x...)
100#endif
101
102#define	ROUNDUP(_x, _y)	((((_x)+(_y)-1)/(_y))*(_y))
103
104#ifndef NO_PROPOLICE
105extern long __guard_local;
106#endif /* ! NO_PROPOLICE */
107
108/* Retguard phys address (need to skip this region during unpack) */
109paddr_t retguard_start_phys, retguard_end_phys;
110extern char __retguard_start, __retguard_end;
111
112void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t);
113int hibernate_calc_rle(paddr_t, paddr_t);
114int hibernate_write_rle(union hibernate_info *, paddr_t, paddr_t, daddr_t *,
115	size_t *);
116
117#define MAX_RLE (HIBERNATE_CHUNK_SIZE / PAGE_SIZE)
118
119/*
120 * Hib alloc enforced alignment.
121 */
122#define HIB_ALIGN		8 /* bytes alignment */
123
124/*
125 * sizeof builtin operation, but with alignment constraint.
126 */
127#define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
128
129struct hiballoc_entry {
130	size_t			hibe_use;
131	size_t			hibe_space;
132	RBT_ENTRY(hiballoc_entry) hibe_entry;
133};
134
135/*
136 * Sort hibernate memory ranges by ascending PA
137 */
138void
139hibernate_sort_ranges(union hibernate_info *hib_info)
140{
141	int i, j;
142	struct hibernate_memory_range *ranges;
143	paddr_t base, end;
144
145	ranges = hib_info->ranges;
146
147	for (i = 1; i < hib_info->nranges; i++) {
148		j = i;
149		while (j > 0 && ranges[j - 1].base > ranges[j].base) {
150			base = ranges[j].base;
151			end = ranges[j].end;
152			ranges[j].base = ranges[j - 1].base;
153			ranges[j].end = ranges[j - 1].end;
154			ranges[j - 1].base = base;
155			ranges[j - 1].end = end;
156			j--;
157		}
158	}
159}
160
161/*
162 * Compare hiballoc entries based on the address they manage.
163 *
164 * Since the address is fixed, relative to struct hiballoc_entry,
165 * we just compare the hiballoc_entry pointers.
166 */
167static __inline int
168hibe_cmp(const struct hiballoc_entry *l, const struct hiballoc_entry *r)
169{
170	vaddr_t vl = (vaddr_t)l;
171	vaddr_t vr = (vaddr_t)r;
172
173	return vl < vr ? -1 : (vl > vr);
174}
175
176RBT_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
177
178/*
179 * Given a hiballoc entry, return the address it manages.
180 */
181static __inline void *
182hib_entry_to_addr(struct hiballoc_entry *entry)
183{
184	caddr_t addr;
185
186	addr = (caddr_t)entry;
187	addr += HIB_SIZEOF(struct hiballoc_entry);
188	return addr;
189}
190
191/*
192 * Given an address, find the hiballoc that corresponds.
193 */
194static __inline struct hiballoc_entry*
195hib_addr_to_entry(void *addr_param)
196{
197	caddr_t addr;
198
199	addr = (caddr_t)addr_param;
200	addr -= HIB_SIZEOF(struct hiballoc_entry);
201	return (struct hiballoc_entry*)addr;
202}
203
204RBT_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp);
205
206/*
207 * Allocate memory from the arena.
208 *
209 * Returns NULL if no memory is available.
210 */
211void *
212hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
213{
214	struct hiballoc_entry *entry, *new_entry;
215	size_t find_sz;
216
217	/*
218	 * Enforce alignment of HIB_ALIGN bytes.
219	 *
220	 * Note that, because the entry is put in front of the allocation,
221	 * 0-byte allocations are guaranteed a unique address.
222	 */
223	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
224
225	/*
226	 * Find an entry with hibe_space >= find_sz.
227	 *
228	 * If the root node is not large enough, we switch to tree traversal.
229	 * Because all entries are made at the bottom of the free space,
230	 * traversal from the end has a slightly better chance of yielding
231	 * a sufficiently large space.
232	 */
233	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
234	entry = RBT_ROOT(hiballoc_addr, &arena->hib_addrs);
235	if (entry != NULL && entry->hibe_space < find_sz) {
236		RBT_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
237			if (entry->hibe_space >= find_sz)
238				break;
239		}
240	}
241
242	/*
243	 * Insufficient or too fragmented memory.
244	 */
245	if (entry == NULL)
246		return NULL;
247
248	/*
249	 * Create new entry in allocated space.
250	 */
251	new_entry = (struct hiballoc_entry*)(
252	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
253	new_entry->hibe_space = entry->hibe_space - find_sz;
254	new_entry->hibe_use = alloc_sz;
255
256	/*
257	 * Insert entry.
258	 */
259	if (RBT_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
260		panic("hib_alloc: insert failure");
261	entry->hibe_space = 0;
262
263	/* Return address managed by entry. */
264	return hib_entry_to_addr(new_entry);
265}
266
267void
268hib_getentropy(char **bufp, size_t *bufplen)
269{
270	if (!bufp || !bufplen)
271		return;
272
273	*bufp = (char *)(global_piglet_va + (29 * PAGE_SIZE));
274	*bufplen = PAGE_SIZE;
275}
276
277/*
278 * Free a pointer previously allocated from this arena.
279 *
280 * If addr is NULL, this will be silently accepted.
281 */
282void
283hib_free(struct hiballoc_arena *arena, void *addr)
284{
285	struct hiballoc_entry *entry, *prev;
286
287	if (addr == NULL)
288		return;
289
290	/*
291	 * Derive entry from addr and check it is really in this arena.
292	 */
293	entry = hib_addr_to_entry(addr);
294	if (RBT_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
295		panic("hib_free: freed item %p not in hib arena", addr);
296
297	/*
298	 * Give the space in entry to its predecessor.
299	 *
300	 * If entry has no predecessor, change its used space into free space
301	 * instead.
302	 */
303	prev = RBT_PREV(hiballoc_addr, entry);
304	if (prev != NULL &&
305	    (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
306	    prev->hibe_use + prev->hibe_space) == entry) {
307		/* Merge entry. */
308		RBT_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
309		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
310		    entry->hibe_use + entry->hibe_space;
311	} else {
312		/* Flip used memory to free space. */
313		entry->hibe_space += entry->hibe_use;
314		entry->hibe_use = 0;
315	}
316}
317
318/*
319 * Initialize hiballoc.
320 *
321 * The allocator will manage memory at ptr, which is len bytes.
322 */
323int
324hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
325{
326	struct hiballoc_entry *entry;
327	caddr_t ptr;
328	size_t len;
329
330	RBT_INIT(hiballoc_addr, &arena->hib_addrs);
331
332	/*
333	 * Hib allocator enforces HIB_ALIGN alignment.
334	 * Fixup ptr and len.
335	 */
336	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
337	len = p_len - ((size_t)ptr - (size_t)p_ptr);
338	len &= ~((size_t)HIB_ALIGN - 1);
339
340	/*
341	 * Insufficient memory to be able to allocate and also do bookkeeping.
342	 */
343	if (len <= HIB_SIZEOF(struct hiballoc_entry))
344		return ENOMEM;
345
346	/*
347	 * Create entry describing space.
348	 */
349	entry = (struct hiballoc_entry*)ptr;
350	entry->hibe_use = 0;
351	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
352	RBT_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
353
354	return 0;
355}
356
357/*
358 * Zero all free memory.
359 */
360void
361uvm_pmr_zero_everything(void)
362{
363	struct uvm_pmemrange	*pmr;
364	struct vm_page		*pg;
365	int			 i;
366
367	uvm_lock_fpageq();
368	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
369		/* Zero single pages. */
370		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
371		    != NULL) {
372			uvm_pmr_remove(pmr, pg);
373			uvm_pagezero(pg);
374			atomic_setbits_int(&pg->pg_flags, PG_ZERO);
375			uvmexp.zeropages++;
376			uvm_pmr_insert(pmr, pg, 0);
377		}
378
379		/* Zero multi page ranges. */
380		while ((pg = RBT_ROOT(uvm_pmr_size,
381		    &pmr->size[UVM_PMR_MEMTYPE_DIRTY])) != NULL) {
382			pg--; /* Size tree always has second page. */
383			uvm_pmr_remove(pmr, pg);
384			for (i = 0; i < pg->fpgsz; i++) {
385				uvm_pagezero(&pg[i]);
386				atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
387				uvmexp.zeropages++;
388			}
389			uvm_pmr_insert(pmr, pg, 0);
390		}
391	}
392	uvm_unlock_fpageq();
393}
394
395/*
396 * Mark all memory as dirty.
397 *
398 * Used to inform the system that the clean memory isn't clean for some
399 * reason, for example because we just came back from hibernate.
400 */
401void
402uvm_pmr_dirty_everything(void)
403{
404	struct uvm_pmemrange	*pmr;
405	struct vm_page		*pg;
406	int			 i;
407
408	uvm_lock_fpageq();
409	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
410		/* Dirty single pages. */
411		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
412		    != NULL) {
413			uvm_pmr_remove(pmr, pg);
414			atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
415			uvm_pmr_insert(pmr, pg, 0);
416		}
417
418		/* Dirty multi page ranges. */
419		while ((pg = RBT_ROOT(uvm_pmr_size,
420		    &pmr->size[UVM_PMR_MEMTYPE_ZERO])) != NULL) {
421			pg--; /* Size tree always has second page. */
422			uvm_pmr_remove(pmr, pg);
423			for (i = 0; i < pg->fpgsz; i++)
424				atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
425			uvm_pmr_insert(pmr, pg, 0);
426		}
427	}
428
429	uvmexp.zeropages = 0;
430	uvm_unlock_fpageq();
431}
432
433/*
434 * Allocate an area that can hold sz bytes and doesn't overlap with
435 * the piglet at piglet_pa.
436 */
437int
438uvm_pmr_alloc_pig(paddr_t *pa, psize_t sz, paddr_t piglet_pa)
439{
440	struct uvm_constraint_range pig_constraint;
441	struct kmem_pa_mode kp_pig = {
442		.kp_constraint = &pig_constraint,
443		.kp_maxseg = 1
444	};
445	vaddr_t va;
446
447	sz = round_page(sz);
448
449	pig_constraint.ucr_low = piglet_pa + 4 * HIBERNATE_CHUNK_SIZE;
450	pig_constraint.ucr_high = -1;
451
452	va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait);
453	if (va == 0) {
454		pig_constraint.ucr_low = 0;
455		pig_constraint.ucr_high = piglet_pa - 1;
456
457		va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait);
458		if (va == 0)
459			return ENOMEM;
460	}
461
462	pmap_extract(pmap_kernel(), va, pa);
463	return 0;
464}
465
466/*
467 * Allocate a piglet area.
468 *
469 * This needs to be in DMA-safe memory.
470 * Piglets are aligned.
471 *
472 * sz and align in bytes.
473 *
474 * The call will sleep for the pagedaemon to attempt to free memory.
475 * The pagedaemon may decide its not possible to free enough memory, causing
476 * the allocation to fail.
477 */
478int
479uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align)
480{
481	struct kmem_pa_mode kp_piglet = {
482		.kp_constraint = &dma_constraint,
483		.kp_align = align,
484		.kp_maxseg = 1
485	};
486
487	/* Ensure align is a power of 2 */
488	KASSERT((align & (align - 1)) == 0);
489
490	/*
491	 * Fixup arguments: align must be at least PAGE_SIZE,
492	 * sz will be converted to pagecount, since that is what
493	 * pmemrange uses internally.
494	 */
495	if (align < PAGE_SIZE)
496		kp_piglet.kp_align = PAGE_SIZE;
497
498	sz = round_page(sz);
499
500	*va = (vaddr_t)km_alloc(sz, &kv_any, &kp_piglet, &kd_nowait);
501	if (*va == 0)
502		return ENOMEM;
503
504	pmap_extract(pmap_kernel(), *va, pa);
505	return 0;
506}
507
508/*
509 * Free a piglet area.
510 */
511void
512uvm_pmr_free_piglet(vaddr_t va, vsize_t sz)
513{
514	/*
515	 * Fix parameters.
516	 */
517	sz = round_page(sz);
518
519	/*
520	 * Free the physical and virtual memory.
521	 */
522	km_free((void *)va, sz, &kv_any, &kp_dma_contig);
523}
524
525/*
526 * Physmem RLE compression support.
527 *
528 * Given a physical page address, return the number of pages starting at the
529 * address that are free.  Clamps to the number of pages in
530 * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free.
531 */
532int
533uvm_page_rle(paddr_t addr)
534{
535	struct vm_page		*pg, *pg_end;
536	struct vm_physseg	*vmp;
537	int			 pseg_idx, off_idx;
538
539	pseg_idx = vm_physseg_find(atop(addr), &off_idx);
540	if (pseg_idx == -1)
541		return 0;
542
543	vmp = &vm_physmem[pseg_idx];
544	pg = &vmp->pgs[off_idx];
545	if (!(pg->pg_flags & PQ_FREE))
546		return 0;
547
548	/*
549	 * Search for the first non-free page after pg.
550	 * Note that the page may not be the first page in a free pmemrange,
551	 * therefore pg->fpgsz cannot be used.
552	 */
553	for (pg_end = pg; pg_end <= vmp->lastpg &&
554	    (pg_end->pg_flags & PQ_FREE) == PQ_FREE &&
555	    (pg_end - pg) < HIBERNATE_CHUNK_SIZE/PAGE_SIZE; pg_end++)
556		;
557	return pg_end - pg;
558}
559
560/*
561 * Fills out the hibernate_info union pointed to by hib
562 * with information about this machine (swap signature block
563 * offsets, number of memory ranges, kernel in use, etc)
564 */
565int
566get_hibernate_info(union hibernate_info *hib, int suspend)
567{
568	struct disklabel dl;
569	char err_string[128], *dl_ret;
570	int part;
571	SHA2_CTX ctx;
572	void *fn;
573
574#ifndef NO_PROPOLICE
575	/* Save propolice guard */
576	hib->guard = __guard_local;
577#endif /* ! NO_PROPOLICE */
578
579	/* Determine I/O function to use */
580	hib->io_func = get_hibernate_io_function(swdevt[0].sw_dev);
581	if (hib->io_func == NULL)
582		return (1);
583
584	/* Calculate hibernate device */
585	hib->dev = swdevt[0].sw_dev;
586
587	/* Read disklabel (used to calculate signature and image offsets) */
588	dl_ret = disk_readlabel(&dl, hib->dev, err_string, sizeof(err_string));
589
590	if (dl_ret) {
591		printf("Hibernate error reading disklabel: %s\n", dl_ret);
592		return (1);
593	}
594
595	/* Make sure we have a swap partition. */
596	part = DISKPART(hib->dev);
597	if (dl.d_npartitions <= part ||
598	    dl.d_secsize > sizeof(union hibernate_info) ||
599	    dl.d_partitions[part].p_fstype != FS_SWAP ||
600	    DL_GETPSIZE(&dl.d_partitions[part]) == 0)
601		return (1);
602
603	/* Magic number */
604	hib->magic = HIBERNATE_MAGIC;
605
606	/* Calculate signature block location */
607	hib->sec_size = dl.d_secsize;
608	hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[part]) - 1;
609	hib->sig_offset = DL_SECTOBLK(&dl, hib->sig_offset);
610
611	SHA256Init(&ctx);
612	SHA256Update(&ctx, version, strlen(version));
613	fn = printf;
614	SHA256Update(&ctx, &fn, sizeof(fn));
615	fn = malloc;
616	SHA256Update(&ctx, &fn, sizeof(fn));
617	fn = km_alloc;
618	SHA256Update(&ctx, &fn, sizeof(fn));
619	fn = strlen;
620	SHA256Update(&ctx, &fn, sizeof(fn));
621	SHA256Final((u_int8_t *)&hib->kern_hash, &ctx);
622
623	if (suspend) {
624		/* Grab the previously-allocated piglet addresses */
625		hib->piglet_va = global_piglet_va;
626		hib->piglet_pa = global_piglet_pa;
627		hib->io_page = (void *)hib->piglet_va;
628
629		/*
630		 * Initialization of the hibernate IO function for drivers
631		 * that need to do prep work (such as allocating memory or
632		 * setting up data structures that cannot safely be done
633		 * during suspend without causing side effects). There is
634		 * a matching HIB_DONE call performed after the write is
635		 * completed.
636		 */
637		if (hib->io_func(hib->dev,
638		    DL_SECTOBLK(&dl, DL_GETPOFFSET(&dl.d_partitions[part])),
639		    (vaddr_t)NULL,
640		    DL_SECTOBLK(&dl, DL_GETPSIZE(&dl.d_partitions[part])),
641		    HIB_INIT, hib->io_page))
642			goto fail;
643
644	} else {
645		/*
646		 * Resuming kernels use a regular private page for the driver
647		 * No need to free this I/O page as it will vanish as part of
648		 * the resume.
649		 */
650		hib->io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
651		if (!hib->io_page)
652			goto fail;
653	}
654
655	if (get_hibernate_info_md(hib))
656		goto fail;
657
658	return (0);
659
660fail:
661	return (1);
662}
663
664/*
665 * Allocate nitems*size bytes from the hiballoc area presently in use
666 */
667void *
668hibernate_zlib_alloc(void *unused, int nitems, int size)
669{
670	struct hibernate_zlib_state *hibernate_state;
671
672	hibernate_state =
673	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
674
675	return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size);
676}
677
678/*
679 * Free the memory pointed to by addr in the hiballoc area presently in
680 * use
681 */
682void
683hibernate_zlib_free(void *unused, void *addr)
684{
685	struct hibernate_zlib_state *hibernate_state;
686
687	hibernate_state =
688	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
689
690	hib_free(&hibernate_state->hiballoc_arena, addr);
691}
692
693/*
694 * Inflate next page of data from the image stream.
695 * The rle parameter is modified on exit to contain the number of pages to
696 * skip in the output stream (or 0 if this page was inflated into).
697 *
698 * Returns 0 if the stream contains additional data, or 1 if the stream is
699 * finished.
700 */
701int
702hibernate_inflate_page(int *rle)
703{
704	struct hibernate_zlib_state *hibernate_state;
705	int i;
706
707	hibernate_state =
708	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
709
710	/* Set up the stream for RLE code inflate */
711	hibernate_state->hib_stream.next_out = (unsigned char *)rle;
712	hibernate_state->hib_stream.avail_out = sizeof(*rle);
713
714	/* Inflate RLE code */
715	i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
716	if (i != Z_OK && i != Z_STREAM_END) {
717		/*
718		 * XXX - this will likely reboot/hang most machines
719		 *       since the console output buffer will be unmapped,
720		 *       but there's not much else we can do here.
721		 */
722		panic("rle inflate stream error");
723	}
724
725	if (hibernate_state->hib_stream.avail_out != 0) {
726		/*
727		 * XXX - this will likely reboot/hang most machines
728		 *       since the console output buffer will be unmapped,
729		 *       but there's not much else we can do here.
730		 */
731		panic("rle short inflate error");
732	}
733
734	if (*rle < 0 || *rle > 1024) {
735		/*
736		 * XXX - this will likely reboot/hang most machines
737		 *       since the console output buffer will be unmapped,
738		 *       but there's not much else we can do here.
739		 */
740		panic("invalid rle count");
741	}
742
743	if (i == Z_STREAM_END)
744		return (1);
745
746	if (*rle != 0)
747		return (0);
748
749	/* Set up the stream for page inflate */
750	hibernate_state->hib_stream.next_out =
751		(unsigned char *)HIBERNATE_INFLATE_PAGE;
752	hibernate_state->hib_stream.avail_out = PAGE_SIZE;
753
754	/* Process next block of data */
755	i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
756	if (i != Z_OK && i != Z_STREAM_END) {
757		/*
758		 * XXX - this will likely reboot/hang most machines
759		 *       since the console output buffer will be unmapped,
760		 *       but there's not much else we can do here.
761		 */
762		panic("inflate error");
763	}
764
765	/* We should always have extracted a full page ... */
766	if (hibernate_state->hib_stream.avail_out != 0) {
767		/*
768		 * XXX - this will likely reboot/hang most machines
769		 *       since the console output buffer will be unmapped,
770		 *       but there's not much else we can do here.
771		 */
772		panic("incomplete page");
773	}
774
775	return (i == Z_STREAM_END);
776}
777
778/*
779 * Inflate size bytes from src into dest, skipping any pages in
780 * [src..dest] that are special (see hibernate_inflate_skip)
781 *
782 * This function executes while using the resume-time stack
783 * and pmap, and therefore cannot use ddb/printf/etc. Doing so
784 * will likely hang or reset the machine since the console output buffer
785 * will be unmapped.
786 */
787void
788hibernate_inflate_region(union hibernate_info *hib, paddr_t dest,
789    paddr_t src, size_t size)
790{
791	int end_stream = 0, rle, skip;
792	struct hibernate_zlib_state *hibernate_state;
793
794	hibernate_state =
795	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
796
797	hibernate_state->hib_stream.next_in = (unsigned char *)src;
798	hibernate_state->hib_stream.avail_in = size;
799
800	do {
801		/*
802		 * Is this a special page? If yes, redirect the
803		 * inflate output to a scratch page (eg, discard it)
804		 */
805		skip = hibernate_inflate_skip(hib, dest);
806		if (skip == HIB_SKIP) {
807			hibernate_enter_resume_mapping(
808			    HIBERNATE_INFLATE_PAGE,
809			    HIBERNATE_INFLATE_PAGE, 0);
810		} else if (skip == HIB_MOVE) {
811			/*
812			 * Special case : retguard region. This gets moved
813			 * temporarily into the piglet region and copied into
814			 * place immediately before resume
815			 */
816			hibernate_enter_resume_mapping(
817			    HIBERNATE_INFLATE_PAGE,
818			    hib->piglet_pa + (110 * PAGE_SIZE) +
819			    hib->retguard_ofs, 0);
820			hib->retguard_ofs += PAGE_SIZE;
821			if (hib->retguard_ofs > 255 * PAGE_SIZE) {
822				/*
823				 * XXX - this will likely reboot/hang most
824				 *       machines since the console output
825				 *       buffer will be unmapped, but there's
826				 *       not much else we can do here.
827				 */
828				panic("retguard move error, out of space");
829			}
830		} else {
831			hibernate_enter_resume_mapping(
832			    HIBERNATE_INFLATE_PAGE, dest, 0);
833		}
834
835		hibernate_flush();
836		end_stream = hibernate_inflate_page(&rle);
837
838		if (rle == 0)
839			dest += PAGE_SIZE;
840		else
841			dest += (rle * PAGE_SIZE);
842	} while (!end_stream);
843}
844
845/*
846 * deflate from src into the I/O page, up to 'remaining' bytes
847 *
848 * Returns number of input bytes consumed, and may reset
849 * the 'remaining' parameter if not all the output space was consumed
850 * (this information is needed to know how much to write to disk)
851 */
852size_t
853hibernate_deflate(union hibernate_info *hib, paddr_t src,
854    size_t *remaining)
855{
856	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
857	struct hibernate_zlib_state *hibernate_state;
858
859	hibernate_state =
860	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
861
862	/* Set up the stream for deflate */
863	hibernate_state->hib_stream.next_in = (unsigned char *)src;
864	hibernate_state->hib_stream.avail_in = PAGE_SIZE - (src & PAGE_MASK);
865	hibernate_state->hib_stream.next_out =
866		(unsigned char *)hibernate_io_page + (PAGE_SIZE - *remaining);
867	hibernate_state->hib_stream.avail_out = *remaining;
868
869	/* Process next block of data */
870	if (deflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH) != Z_OK)
871		panic("hibernate zlib deflate error");
872
873	/* Update pointers and return number of bytes consumed */
874	*remaining = hibernate_state->hib_stream.avail_out;
875	return (PAGE_SIZE - (src & PAGE_MASK)) -
876	    hibernate_state->hib_stream.avail_in;
877}
878
879/*
880 * Write the hibernation information specified in hiber_info
881 * to the location in swap previously calculated (last block of
882 * swap), called the "signature block".
883 */
884int
885hibernate_write_signature(union hibernate_info *hib)
886{
887	memset(&disk_hib, 0, hib->sec_size);
888	memcpy(&disk_hib, hib, DEV_BSIZE);
889
890	/* Write hibernate info to disk */
891	return (hib->io_func(hib->dev, hib->sig_offset,
892	    (vaddr_t)&disk_hib, hib->sec_size, HIB_W,
893	    hib->io_page));
894}
895
896/*
897 * Write the memory chunk table to the area in swap immediately
898 * preceding the signature block. The chunk table is stored
899 * in the piglet when this function is called.  Returns errno.
900 */
901int
902hibernate_write_chunktable(union hibernate_info *hib)
903{
904	vaddr_t hibernate_chunk_table_start;
905	size_t hibernate_chunk_table_size;
906	int i, err;
907
908	hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE;
909
910	hibernate_chunk_table_start = hib->piglet_va +
911	    HIBERNATE_CHUNK_SIZE;
912
913	/* Write chunk table */
914	for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS) {
915		if ((err = hib->io_func(hib->dev,
916		    hib->chunktable_offset + (i/DEV_BSIZE),
917		    (vaddr_t)(hibernate_chunk_table_start + i),
918		    MAXPHYS, HIB_W, hib->io_page))) {
919			DPRINTF("chunktable write error: %d\n", err);
920			return (err);
921		}
922	}
923
924	return (0);
925}
926
927/*
928 * Write an empty hiber_info to the swap signature block, which is
929 * guaranteed to not match any valid hib.
930 */
931int
932hibernate_clear_signature(union hibernate_info *hib)
933{
934	uint8_t buf[DEV_BSIZE];
935
936	/* Zero out a blank hiber_info */
937	memcpy(&buf, &disk_hib, sizeof(buf));
938	memset(&disk_hib, 0, hib->sec_size);
939
940	/* Write (zeroed) hibernate info to disk */
941	DPRINTF("clearing hibernate signature block location: %lld\n",
942		hib->sig_offset);
943	if (hibernate_block_io(hib,
944	    hib->sig_offset,
945	    hib->sec_size, (vaddr_t)&disk_hib, 1))
946		printf("Warning: could not clear hibernate signature\n");
947
948	memcpy(&disk_hib, buf, sizeof(buf));
949	return (0);
950}
951
952/*
953 * Compare two hibernate_infos to determine if they are the same (eg,
954 * we should be performing a hibernate resume on this machine.
955 * Not all fields are checked - just enough to verify that the machine
956 * has the same memory configuration and kernel as the one that
957 * wrote the signature previously.
958 */
959int
960hibernate_compare_signature(union hibernate_info *mine,
961    union hibernate_info *disk)
962{
963	u_int i;
964
965	if (mine->nranges != disk->nranges) {
966		printf("unhibernate failed: memory layout changed\n");
967		return (1);
968	}
969
970	if (bcmp(mine->kern_hash, disk->kern_hash, SHA256_DIGEST_LENGTH) != 0) {
971		printf("unhibernate failed: original kernel changed\n");
972		return (1);
973	}
974
975	for (i = 0; i < mine->nranges; i++) {
976		if ((mine->ranges[i].base != disk->ranges[i].base) ||
977		    (mine->ranges[i].end != disk->ranges[i].end) ) {
978			DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n",
979				i,
980				(void *)mine->ranges[i].base,
981				(void *)mine->ranges[i].end,
982				(void *)disk->ranges[i].base,
983				(void *)disk->ranges[i].end);
984			printf("unhibernate failed: memory size changed\n");
985			return (1);
986		}
987	}
988
989	return (0);
990}
991
992/*
993 * Transfers xfer_size bytes between the hibernate device specified in
994 * hib_info at offset blkctr and the vaddr specified at dest.
995 *
996 * Separate offsets and pages are used to handle misaligned reads (reads
997 * that span a page boundary).
998 *
999 * blkctr specifies a relative offset (relative to the start of swap),
1000 * not an absolute disk offset
1001 *
1002 */
1003int
1004hibernate_block_io(union hibernate_info *hib, daddr_t blkctr,
1005    size_t xfer_size, vaddr_t dest, int iswrite)
1006{
1007	struct buf *bp;
1008	int error;
1009
1010	bp = geteblk(xfer_size);
1011	if (iswrite)
1012		bcopy((caddr_t)dest, bp->b_data, xfer_size);
1013
1014	bp->b_bcount = xfer_size;
1015	bp->b_blkno = blkctr;
1016	CLR(bp->b_flags, B_READ | B_WRITE | B_DONE);
1017	SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW);
1018	bp->b_dev = hib->dev;
1019	(*bdsw->d_strategy)(bp);
1020
1021	error = biowait(bp);
1022	if (error) {
1023		printf("hib block_io biowait error %d blk %lld size %zu\n",
1024			error, (long long)blkctr, xfer_size);
1025	} else if (!iswrite)
1026		bcopy(bp->b_data, (caddr_t)dest, xfer_size);
1027
1028	bp->b_flags |= B_INVAL;
1029	brelse(bp);
1030
1031	return (error != 0);
1032}
1033
1034/*
1035 * Preserve one page worth of random data, generated from the resuming
1036 * kernel's arc4random. After resume, this preserved entropy can be used
1037 * to further improve the un-hibernated machine's entropy pool. This
1038 * random data is stored in the piglet, which is preserved across the
1039 * unpack operation, and is restored later in the resume process (see
1040 * hib_getentropy)
1041 */
1042void
1043hibernate_preserve_entropy(union hibernate_info *hib)
1044{
1045	void *entropy;
1046
1047	entropy = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
1048
1049	if (!entropy)
1050		return;
1051
1052	pmap_activate(curproc);
1053	pmap_kenter_pa((vaddr_t)entropy,
1054	    (paddr_t)(hib->piglet_pa + (29 * PAGE_SIZE)),
1055	    PROT_READ | PROT_WRITE);
1056
1057	arc4random_buf((void *)entropy, PAGE_SIZE);
1058	pmap_kremove((vaddr_t)entropy, PAGE_SIZE);
1059	km_free(entropy, PAGE_SIZE, &kv_any, &kp_none);
1060}
1061
1062#ifndef NO_PROPOLICE
1063vaddr_t
1064hibernate_unprotect_ssp(void)
1065{
1066	struct kmem_dyn_mode kd_avoidalias;
1067	vaddr_t va = trunc_page((vaddr_t)&__guard_local);
1068	paddr_t pa;
1069
1070	pmap_extract(pmap_kernel(), va, &pa);
1071
1072	memset(&kd_avoidalias, 0, sizeof kd_avoidalias);
1073	kd_avoidalias.kd_prefer = pa;
1074	kd_avoidalias.kd_waitok = 1;
1075	va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_avoidalias);
1076	if (!va)
1077		panic("hibernate_unprotect_ssp");
1078
1079	pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
1080	pmap_update(pmap_kernel());
1081
1082	return va;
1083}
1084
1085void
1086hibernate_reprotect_ssp(vaddr_t va)
1087{
1088	pmap_kremove(va, PAGE_SIZE);
1089	km_free((void *)va, PAGE_SIZE, &kv_any, &kp_none);
1090}
1091#endif /* NO_PROPOLICE */
1092
1093/*
1094 * Reads the signature block from swap, checks against the current machine's
1095 * information. If the information matches, perform a resume by reading the
1096 * saved image into the pig area, and unpacking.
1097 *
1098 * Must be called with interrupts enabled.
1099 */
1100void
1101hibernate_resume(void)
1102{
1103	uint8_t buf[DEV_BSIZE];
1104	union hibernate_info *hib = (union hibernate_info *)&buf;
1105	int s;
1106#ifndef NO_PROPOLICE
1107	vsize_t off = (vaddr_t)&__guard_local -
1108	    trunc_page((vaddr_t)&__guard_local);
1109	vaddr_t guard_va;
1110#endif
1111
1112	/* Get current running machine's hibernate info */
1113	memset(buf, 0, sizeof(buf));
1114	if (get_hibernate_info(hib, 0)) {
1115		DPRINTF("couldn't retrieve machine's hibernate info\n");
1116		return;
1117	}
1118
1119	/* Read hibernate info from disk */
1120	s = splbio();
1121
1122	bdsw = &bdevsw[major(hib->dev)];
1123	if ((*bdsw->d_open)(hib->dev, FREAD, S_IFCHR, curproc)) {
1124		printf("hibernate_resume device open failed\n");
1125		splx(s);
1126		return;
1127	}
1128
1129	DPRINTF("reading hibernate signature block location: %lld\n",
1130		hib->sig_offset);
1131
1132	if (hibernate_block_io(hib,
1133	    hib->sig_offset,
1134	    hib->sec_size, (vaddr_t)&disk_hib, 0)) {
1135		DPRINTF("error in hibernate read\n");
1136		goto fail;
1137	}
1138
1139	/* Check magic number */
1140	if (disk_hib.magic != HIBERNATE_MAGIC) {
1141		DPRINTF("wrong magic number in hibernate signature: %x\n",
1142			disk_hib.magic);
1143		goto fail;
1144	}
1145
1146	/*
1147	 * We (possibly) found a hibernate signature. Clear signature first,
1148	 * to prevent accidental resume or endless resume cycles later.
1149	 */
1150	if (hibernate_clear_signature(hib)) {
1151		DPRINTF("error clearing hibernate signature block\n");
1152		goto fail;
1153	}
1154
1155	/*
1156	 * If on-disk and in-memory hibernate signatures match,
1157	 * this means we should do a resume from hibernate.
1158	 */
1159	if (hibernate_compare_signature(hib, &disk_hib)) {
1160		DPRINTF("mismatched hibernate signature block\n");
1161		goto fail;
1162	}
1163	disk_hib.dev = hib->dev;
1164
1165#ifdef MULTIPROCESSOR
1166	/* XXX - if we fail later, we may need to rehatch APs on some archs */
1167	DPRINTF("hibernate: quiescing APs\n");
1168	hibernate_quiesce_cpus();
1169#endif /* MULTIPROCESSOR */
1170
1171	/* Read the image from disk into the image (pig) area */
1172	if (hibernate_read_image(&disk_hib))
1173		goto fail;
1174	if ((*bdsw->d_close)(hib->dev, 0, S_IFCHR, curproc))
1175		printf("hibernate_resume device close failed\n");
1176	bdsw = NULL;
1177
1178	DPRINTF("hibernate: quiescing devices\n");
1179	if (config_suspend_all(DVACT_QUIESCE) != 0)
1180		goto fail;
1181
1182#ifndef NO_PROPOLICE
1183	guard_va = hibernate_unprotect_ssp();
1184#endif /* NO_PROPOLICE */
1185
1186	(void) splhigh();
1187	hibernate_disable_intr_machdep();
1188	cold = 2;
1189
1190	DPRINTF("hibernate: suspending devices\n");
1191	if (config_suspend_all(DVACT_SUSPEND) != 0) {
1192		cold = 0;
1193		hibernate_enable_intr_machdep();
1194#ifndef NO_PROPOLICE
1195		hibernate_reprotect_ssp(guard_va);
1196#endif /* ! NO_PROPOLICE */
1197		goto fail;
1198	}
1199
1200	pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_start,
1201	    &retguard_start_phys);
1202	pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_end,
1203	    &retguard_end_phys);
1204
1205	hibernate_preserve_entropy(&disk_hib);
1206
1207	printf("Unpacking image...\n");
1208
1209	/* Switch stacks */
1210	DPRINTF("hibernate: switching stacks\n");
1211	hibernate_switch_stack_machdep();
1212
1213#ifndef NO_PROPOLICE
1214	/* Start using suspended kernel's propolice guard */
1215	*(long *)(guard_va + off) = disk_hib.guard;
1216	hibernate_reprotect_ssp(guard_va);
1217#endif /* ! NO_PROPOLICE */
1218
1219	/* Unpack and resume */
1220	hibernate_unpack_image(&disk_hib);
1221
1222fail:
1223	if (!bdsw)
1224		printf("\nUnable to resume hibernated image\n");
1225	else if ((*bdsw->d_close)(hib->dev, 0, S_IFCHR, curproc))
1226		printf("hibernate_resume device close failed\n");
1227	splx(s);
1228}
1229
1230/*
1231 * Unpack image from pig area to original location by looping through the
1232 * list of output chunks in the order they should be restored (fchunks).
1233 *
1234 * Note that due to the stack smash protector and the fact that we have
1235 * switched stacks, it is not permitted to return from this function.
1236 */
1237void
1238hibernate_unpack_image(union hibernate_info *hib)
1239{
1240	uint8_t buf[DEV_BSIZE];
1241	struct hibernate_disk_chunk *chunks;
1242	union hibernate_info *local_hib = (union hibernate_info *)&buf;
1243	paddr_t image_cur = global_pig_start;
1244	short i, *fchunks;
1245	char *pva;
1246
1247	/* Piglet will be identity mapped (VA == PA) */
1248	pva = (char *)hib->piglet_pa;
1249
1250	fchunks = (short *)(pva + (4 * PAGE_SIZE));
1251
1252	chunks = (struct hibernate_disk_chunk *)(pva + HIBERNATE_CHUNK_SIZE);
1253
1254	/* Can't use hiber_info that's passed in after this point */
1255	memcpy(buf, hib, sizeof(buf));
1256	local_hib->retguard_ofs = 0;
1257
1258	/* VA == PA */
1259	local_hib->piglet_va = local_hib->piglet_pa;
1260
1261	/*
1262	 * Point of no return. Once we pass this point, only kernel code can
1263	 * be accessed. No global variables or other kernel data structures
1264	 * are guaranteed to be coherent after unpack starts.
1265	 *
1266	 * The image is now in high memory (pig area), we unpack from the pig
1267	 * to the correct location in memory. We'll eventually end up copying
1268	 * on top of ourself, but we are assured the kernel code here is the
1269	 * same between the hibernated and resuming kernel, and we are running
1270	 * on our own stack, so the overwrite is ok.
1271	 */
1272	DPRINTF("hibernate: activating alt. pagetable and starting unpack\n");
1273	hibernate_activate_resume_pt_machdep();
1274
1275	for (i = 0; i < local_hib->chunk_ctr; i++) {
1276		/* Reset zlib for inflate */
1277		if (hibernate_zlib_reset(local_hib, 0) != Z_OK)
1278			panic("hibernate failed to reset zlib for inflate");
1279
1280		hibernate_process_chunk(local_hib, &chunks[fchunks[i]],
1281		    image_cur);
1282
1283		image_cur += chunks[fchunks[i]].compressed_size;
1284	}
1285
1286	/*
1287	 * Resume the loaded kernel by jumping to the MD resume vector.
1288	 * We won't be returning from this call. We pass the location of
1289	 * the retguard save area so the MD code can replace it before
1290	 * resuming. See the piglet layout at the top of this file for
1291	 * more information on the layout of the piglet area.
1292	 *
1293	 * We use 'global_piglet_va' here since by the time we are at
1294	 * this point, we have already unpacked the image, and we want
1295	 * the suspended kernel's view of what the piglet was, before
1296	 * suspend occurred (since we will need to use that in the retguard
1297	 * copy code in hibernate_resume_machdep.)
1298	 */
1299	hibernate_resume_machdep(global_piglet_va + (110 * PAGE_SIZE));
1300}
1301
1302/*
1303 * Bounce a compressed image chunk to the piglet, entering mappings for the
1304 * copied pages as needed
1305 */
1306void
1307hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size)
1308{
1309	size_t ct, ofs;
1310	paddr_t src = img_cur;
1311	vaddr_t dest = piglet;
1312
1313	/* Copy first partial page */
1314	ct = (PAGE_SIZE) - (src & PAGE_MASK);
1315	ofs = (src & PAGE_MASK);
1316
1317	if (ct < PAGE_SIZE) {
1318		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE,
1319			(src - ofs), 0);
1320		hibernate_flush();
1321		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE + ofs), (caddr_t)dest, ct);
1322		src += ct;
1323		dest += ct;
1324	}
1325
1326	/* Copy remaining pages */
1327	while (src < size + img_cur) {
1328		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE, src, 0);
1329		hibernate_flush();
1330		ct = PAGE_SIZE;
1331		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE), (caddr_t)dest, ct);
1332		hibernate_flush();
1333		src += ct;
1334		dest += ct;
1335	}
1336}
1337
1338/*
1339 * Process a chunk by bouncing it to the piglet, followed by unpacking
1340 */
1341void
1342hibernate_process_chunk(union hibernate_info *hib,
1343    struct hibernate_disk_chunk *chunk, paddr_t img_cur)
1344{
1345	char *pva = (char *)hib->piglet_va;
1346
1347	hibernate_copy_chunk_to_piglet(img_cur,
1348	 (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)), chunk->compressed_size);
1349	hibernate_inflate_region(hib, chunk->base,
1350	    (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)),
1351	    chunk->compressed_size);
1352}
1353
1354/*
1355 * Calculate RLE component for 'inaddr'. Clamps to max RLE pages between
1356 * inaddr and range_end.
1357 */
1358int
1359hibernate_calc_rle(paddr_t inaddr, paddr_t range_end)
1360{
1361	int rle;
1362
1363	rle = uvm_page_rle(inaddr);
1364	KASSERT(rle >= 0 && rle <= MAX_RLE);
1365
1366	/* Clamp RLE to range end */
1367	if (rle > 0 && inaddr + (rle * PAGE_SIZE) > range_end)
1368		rle = (range_end - inaddr) / PAGE_SIZE;
1369
1370	return (rle);
1371}
1372
1373/*
1374 * Write the RLE byte for page at 'inaddr' to the output stream.
1375 * Returns the number of pages to be skipped at 'inaddr'.
1376 */
1377int
1378hibernate_write_rle(union hibernate_info *hib, paddr_t inaddr,
1379	paddr_t range_end, daddr_t *blkctr,
1380	size_t *out_remaining)
1381{
1382	int rle, err, *rleloc;
1383	struct hibernate_zlib_state *hibernate_state;
1384	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1385
1386	hibernate_state =
1387	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1388
1389	rle = hibernate_calc_rle(inaddr, range_end);
1390
1391	rleloc = (int *)hibernate_rle_page + MAX_RLE - 1;
1392	*rleloc = rle;
1393
1394	/* Deflate the RLE byte into the stream */
1395	hibernate_deflate(hib, (paddr_t)rleloc, out_remaining);
1396
1397	/* Did we fill the output page? If so, flush to disk */
1398	if (*out_remaining == 0) {
1399		if ((err = hib->io_func(hib->dev, *blkctr + hib->image_offset,
1400			(vaddr_t)hibernate_io_page, PAGE_SIZE, HIB_W,
1401			hib->io_page))) {
1402				DPRINTF("hib write error %d\n", err);
1403				return (err);
1404		}
1405
1406		*blkctr += PAGE_SIZE / DEV_BSIZE;
1407		*out_remaining = PAGE_SIZE;
1408
1409		/* If we didn't deflate the entire RLE byte, finish it now */
1410		if (hibernate_state->hib_stream.avail_in != 0)
1411			hibernate_deflate(hib,
1412				(vaddr_t)hibernate_state->hib_stream.next_in,
1413				out_remaining);
1414	}
1415
1416	return (rle);
1417}
1418
1419/*
1420 * Write a compressed version of this machine's memory to disk, at the
1421 * precalculated swap offset:
1422 *
1423 * end of swap - signature block size - chunk table size - memory size
1424 *
1425 * The function begins by looping through each phys mem range, cutting each
1426 * one into MD sized chunks. These chunks are then compressed individually
1427 * and written out to disk, in phys mem order. Some chunks might compress
1428 * more than others, and for this reason, each chunk's size is recorded
1429 * in the chunk table, which is written to disk after the image has
1430 * properly been compressed and written (in hibernate_write_chunktable).
1431 *
1432 * When this function is called, the machine is nearly suspended - most
1433 * devices are quiesced/suspended, interrupts are off, and cold has
1434 * been set. This means that there can be no side effects once the
1435 * write has started, and the write function itself can also have no
1436 * side effects. This also means no printfs are permitted (since printf
1437 * has side effects.)
1438 *
1439 * Return values :
1440 *
1441 * 0      - success
1442 * EIO    - I/O error occurred writing the chunks
1443 * EINVAL - Failed to write a complete range
1444 * ENOMEM - Memory allocation failure during preparation of the zlib arena
1445 */
1446int
1447hibernate_write_chunks(union hibernate_info *hib)
1448{
1449	paddr_t range_base, range_end, inaddr, temp_inaddr;
1450	size_t out_remaining, used;
1451	struct hibernate_disk_chunk *chunks;
1452	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1453	daddr_t blkctr = 0;
1454	int i, rle, err;
1455	struct hibernate_zlib_state *hibernate_state;
1456
1457	hibernate_state =
1458	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1459
1460	hib->chunk_ctr = 0;
1461
1462	/*
1463	 * Map the utility VAs to the piglet. See the piglet map at the
1464	 * top of this file for piglet layout information.
1465	 */
1466	hibernate_copy_page = hib->piglet_va + 3 * PAGE_SIZE;
1467	hibernate_rle_page = hib->piglet_va + 28 * PAGE_SIZE;
1468
1469	chunks = (struct hibernate_disk_chunk *)(hib->piglet_va +
1470	    HIBERNATE_CHUNK_SIZE);
1471
1472	/* Calculate the chunk regions */
1473	for (i = 0; i < hib->nranges; i++) {
1474		range_base = hib->ranges[i].base;
1475		range_end = hib->ranges[i].end;
1476
1477		inaddr = range_base;
1478
1479		while (inaddr < range_end) {
1480			chunks[hib->chunk_ctr].base = inaddr;
1481			if (inaddr + HIBERNATE_CHUNK_SIZE < range_end)
1482				chunks[hib->chunk_ctr].end = inaddr +
1483				    HIBERNATE_CHUNK_SIZE;
1484			else
1485				chunks[hib->chunk_ctr].end = range_end;
1486
1487			inaddr += HIBERNATE_CHUNK_SIZE;
1488			hib->chunk_ctr ++;
1489		}
1490	}
1491
1492	uvm_pmr_dirty_everything();
1493	uvm_pmr_zero_everything();
1494
1495	/* Compress and write the chunks in the chunktable */
1496	for (i = 0; i < hib->chunk_ctr; i++) {
1497		range_base = chunks[i].base;
1498		range_end = chunks[i].end;
1499
1500		chunks[i].offset = blkctr + hib->image_offset;
1501
1502		/* Reset zlib for deflate */
1503		if (hibernate_zlib_reset(hib, 1) != Z_OK) {
1504			DPRINTF("hibernate_zlib_reset failed for deflate\n");
1505			return (ENOMEM);
1506		}
1507
1508		inaddr = range_base;
1509
1510		/*
1511		 * For each range, loop through its phys mem region
1512		 * and write out the chunks (the last chunk might be
1513		 * smaller than the chunk size).
1514		 */
1515		while (inaddr < range_end) {
1516			out_remaining = PAGE_SIZE;
1517			while (out_remaining > 0 && inaddr < range_end) {
1518				/*
1519				 * Adjust for regions that are not evenly
1520				 * divisible by PAGE_SIZE or overflowed
1521				 * pages from the previous iteration.
1522				 */
1523				temp_inaddr = (inaddr & PAGE_MASK) +
1524				    hibernate_copy_page;
1525
1526				/* Deflate from temp_inaddr to IO page */
1527				if (inaddr != range_end) {
1528					if (inaddr % PAGE_SIZE == 0) {
1529						rle = hibernate_write_rle(hib,
1530							inaddr,
1531							range_end,
1532							&blkctr,
1533							&out_remaining);
1534					}
1535
1536					if (rle == 0) {
1537						pmap_kenter_pa(hibernate_temp_page,
1538							inaddr & PMAP_PA_MASK,
1539							PROT_READ);
1540
1541						bcopy((caddr_t)hibernate_temp_page,
1542							(caddr_t)hibernate_copy_page,
1543							PAGE_SIZE);
1544						inaddr += hibernate_deflate(hib,
1545							temp_inaddr,
1546							&out_remaining);
1547					} else {
1548						inaddr += rle * PAGE_SIZE;
1549						if (inaddr > range_end)
1550							inaddr = range_end;
1551					}
1552
1553				}
1554
1555				if (out_remaining == 0) {
1556					/* Filled up the page */
1557					if ((err = hib->io_func(hib->dev,
1558					    blkctr + hib->image_offset,
1559					    (vaddr_t)hibernate_io_page,
1560					    PAGE_SIZE, HIB_W, hib->io_page))) {
1561						DPRINTF("hib write error %d\n",
1562						    err);
1563						return (err);
1564					}
1565					blkctr += PAGE_SIZE / DEV_BSIZE;
1566				}
1567			}
1568		}
1569
1570		if (inaddr != range_end) {
1571			DPRINTF("deflate range ended prematurely\n");
1572			return (EINVAL);
1573		}
1574
1575		/*
1576		 * End of range. Round up to next secsize bytes
1577		 * after finishing compress
1578		 */
1579		if (out_remaining == 0)
1580			out_remaining = PAGE_SIZE;
1581
1582		/* Finish compress */
1583		hibernate_state->hib_stream.next_in = (unsigned char *)inaddr;
1584		hibernate_state->hib_stream.avail_in = 0;
1585		hibernate_state->hib_stream.next_out =
1586		    (unsigned char *)hibernate_io_page +
1587			(PAGE_SIZE - out_remaining);
1588
1589		/* We have an extra output page available for finalize */
1590		hibernate_state->hib_stream.avail_out =
1591			out_remaining + PAGE_SIZE;
1592
1593		if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH)) !=
1594		    Z_STREAM_END) {
1595			DPRINTF("deflate error in output stream: %d\n", err);
1596			return (err);
1597		}
1598
1599		out_remaining = hibernate_state->hib_stream.avail_out;
1600
1601		/* Round up to next sector if needed */
1602		used = ROUNDUP(2 * PAGE_SIZE - out_remaining, hib->sec_size);
1603
1604		/* Write final block(s) for this chunk */
1605		if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset,
1606		    (vaddr_t)hibernate_io_page, used,
1607		    HIB_W, hib->io_page))) {
1608			DPRINTF("hib final write error %d\n", err);
1609			return (err);
1610		}
1611
1612		blkctr += used / DEV_BSIZE;
1613
1614		chunks[i].compressed_size = (blkctr + hib->image_offset -
1615		    chunks[i].offset) * DEV_BSIZE;
1616	}
1617
1618	hib->chunktable_offset = hib->image_offset + blkctr;
1619	return (0);
1620}
1621
1622/*
1623 * Reset the zlib stream state and allocate a new hiballoc area for either
1624 * inflate or deflate. This function is called once for each hibernate chunk.
1625 * Calling hiballoc_init multiple times is acceptable since the memory it is
1626 * provided is unmanaged memory (stolen). We use the memory provided to us
1627 * by the piglet allocated via the supplied hib.
1628 */
1629int
1630hibernate_zlib_reset(union hibernate_info *hib, int deflate)
1631{
1632	vaddr_t hibernate_zlib_start;
1633	size_t hibernate_zlib_size;
1634	char *pva = (char *)hib->piglet_va;
1635	struct hibernate_zlib_state *hibernate_state;
1636
1637	hibernate_state =
1638	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1639
1640	if (!deflate)
1641		pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK));
1642
1643	/*
1644	 * See piglet layout information at the start of this file for
1645	 * information on the zlib page assignments.
1646	 */
1647	hibernate_zlib_start = (vaddr_t)(pva + (30 * PAGE_SIZE));
1648	hibernate_zlib_size = 80 * PAGE_SIZE;
1649
1650	memset((void *)hibernate_zlib_start, 0, hibernate_zlib_size);
1651	memset(hibernate_state, 0, PAGE_SIZE);
1652
1653	/* Set up stream structure */
1654	hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc;
1655	hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free;
1656
1657	/* Initialize the hiballoc arena for zlib allocs/frees */
1658	hiballoc_init(&hibernate_state->hiballoc_arena,
1659	    (caddr_t)hibernate_zlib_start, hibernate_zlib_size);
1660
1661	if (deflate) {
1662		return deflateInit(&hibernate_state->hib_stream,
1663		    Z_BEST_SPEED);
1664	} else
1665		return inflateInit(&hibernate_state->hib_stream);
1666}
1667
1668/*
1669 * Reads the hibernated memory image from disk, whose location and
1670 * size are recorded in hib. Begin by reading the persisted
1671 * chunk table, which records the original chunk placement location
1672 * and compressed size for each. Next, allocate a pig region of
1673 * sufficient size to hold the compressed image. Next, read the
1674 * chunks into the pig area (calling hibernate_read_chunks to do this),
1675 * and finally, if all of the above succeeds, clear the hibernate signature.
1676 * The function will then return to hibernate_resume, which will proceed
1677 * to unpack the pig image to the correct place in memory.
1678 */
1679int
1680hibernate_read_image(union hibernate_info *hib)
1681{
1682	size_t compressed_size, disk_size, chunktable_size, pig_sz;
1683	paddr_t image_start, image_end, pig_start, pig_end;
1684	struct hibernate_disk_chunk *chunks;
1685	daddr_t blkctr;
1686	vaddr_t chunktable = (vaddr_t)NULL;
1687	paddr_t piglet_chunktable = hib->piglet_pa +
1688	    HIBERNATE_CHUNK_SIZE;
1689	int i, status;
1690
1691	status = 0;
1692	pmap_activate(curproc);
1693
1694	/* Calculate total chunk table size in disk blocks */
1695	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / DEV_BSIZE;
1696
1697	blkctr = hib->chunktable_offset;
1698
1699	chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE, &kv_any,
1700	    &kp_none, &kd_nowait);
1701
1702	if (!chunktable)
1703		return (1);
1704
1705	/* Map chunktable pages */
1706	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += PAGE_SIZE)
1707		pmap_kenter_pa(chunktable + i, piglet_chunktable + i,
1708		    PROT_READ | PROT_WRITE);
1709	pmap_update(pmap_kernel());
1710
1711	/* Read the chunktable from disk into the piglet chunktable */
1712	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE;
1713	    i += MAXPHYS, blkctr += MAXPHYS/DEV_BSIZE)
1714		hibernate_block_io(hib, blkctr, MAXPHYS,
1715		    chunktable + i, 0);
1716
1717	blkctr = hib->image_offset;
1718	compressed_size = 0;
1719
1720	chunks = (struct hibernate_disk_chunk *)chunktable;
1721
1722	for (i = 0; i < hib->chunk_ctr; i++)
1723		compressed_size += chunks[i].compressed_size;
1724
1725	disk_size = compressed_size;
1726
1727	printf("unhibernating @ block %lld length %luMB\n",
1728	    hib->sig_offset - chunktable_size,
1729	    compressed_size / (1024 * 1024));
1730
1731	/* Allocate the pig area */
1732	pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE;
1733	if (uvm_pmr_alloc_pig(&pig_start, pig_sz, hib->piglet_pa) == ENOMEM) {
1734		status = 1;
1735		goto unmap;
1736	}
1737
1738	pig_end = pig_start + pig_sz;
1739
1740	/* Calculate image extents. Pig image must end on a chunk boundary. */
1741	image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE - 1);
1742	image_start = image_end - disk_size;
1743
1744	hibernate_read_chunks(hib, image_start, image_end, disk_size,
1745	    chunks);
1746
1747	/* Prepare the resume time pmap/page table */
1748	hibernate_populate_resume_pt(hib, image_start, image_end);
1749
1750unmap:
1751	/* Unmap chunktable pages */
1752	pmap_kremove(chunktable, HIBERNATE_CHUNK_TABLE_SIZE);
1753	pmap_update(pmap_kernel());
1754
1755	return (status);
1756}
1757
1758/*
1759 * Read the hibernated memory chunks from disk (chunk information at this
1760 * point is stored in the piglet) into the pig area specified by
1761 * [pig_start .. pig_end]. Order the chunks so that the final chunk is the
1762 * only chunk with overlap possibilities.
1763 */
1764int
1765hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start,
1766    paddr_t pig_end, size_t image_compr_size,
1767    struct hibernate_disk_chunk *chunks)
1768{
1769	paddr_t img_cur, piglet_base;
1770	daddr_t blkctr;
1771	size_t processed, compressed_size, read_size;
1772	int nchunks, nfchunks, num_io_pages;
1773	vaddr_t tempva, hibernate_fchunk_area;
1774	short *fchunks, i, j;
1775
1776	tempva = (vaddr_t)NULL;
1777	hibernate_fchunk_area = (vaddr_t)NULL;
1778	nfchunks = 0;
1779	piglet_base = hib->piglet_pa;
1780	global_pig_start = pig_start;
1781
1782	/*
1783	 * These mappings go into the resuming kernel's page table, and are
1784	 * used only during image read. They disappear from existence
1785	 * when the suspended kernel is unpacked on top of us.
1786	 */
1787	tempva = (vaddr_t)km_alloc(MAXPHYS + PAGE_SIZE, &kv_any, &kp_none,
1788		&kd_nowait);
1789	if (!tempva)
1790		return (1);
1791	hibernate_fchunk_area = (vaddr_t)km_alloc(24 * PAGE_SIZE, &kv_any,
1792	    &kp_none, &kd_nowait);
1793	if (!hibernate_fchunk_area)
1794		return (1);
1795
1796	/* Final output chunk ordering VA */
1797	fchunks = (short *)hibernate_fchunk_area;
1798
1799	/* Map the chunk ordering region */
1800	for(i = 0; i < 24 ; i++)
1801		pmap_kenter_pa(hibernate_fchunk_area + (i * PAGE_SIZE),
1802			piglet_base + ((4 + i) * PAGE_SIZE),
1803			PROT_READ | PROT_WRITE);
1804	pmap_update(pmap_kernel());
1805
1806	nchunks = hib->chunk_ctr;
1807
1808	/* Initially start all chunks as unplaced */
1809	for (i = 0; i < nchunks; i++)
1810		chunks[i].flags = 0;
1811
1812	/*
1813	 * Search the list for chunks that are outside the pig area. These
1814	 * can be placed first in the final output list.
1815	 */
1816	for (i = 0; i < nchunks; i++) {
1817		if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) {
1818			fchunks[nfchunks] = i;
1819			nfchunks++;
1820			chunks[i].flags |= HIBERNATE_CHUNK_PLACED;
1821		}
1822	}
1823
1824	/*
1825	 * Walk the ordering, place the chunks in ascending memory order.
1826	 */
1827	for (i = 0; i < nchunks; i++) {
1828		if (chunks[i].flags != HIBERNATE_CHUNK_PLACED) {
1829			fchunks[nfchunks] = i;
1830			nfchunks++;
1831			chunks[i].flags = HIBERNATE_CHUNK_PLACED;
1832		}
1833	}
1834
1835	img_cur = pig_start;
1836
1837	for (i = 0; i < nfchunks; i++) {
1838		blkctr = chunks[fchunks[i]].offset;
1839		processed = 0;
1840		compressed_size = chunks[fchunks[i]].compressed_size;
1841
1842		while (processed < compressed_size) {
1843			if (compressed_size - processed >= MAXPHYS)
1844				read_size = MAXPHYS;
1845			else
1846				read_size = compressed_size - processed;
1847
1848			/*
1849			 * We're reading read_size bytes, offset from the
1850			 * start of a page by img_cur % PAGE_SIZE, so the
1851			 * end will be read_size + (img_cur % PAGE_SIZE)
1852			 * from the start of the first page.  Round that
1853			 * up to the next page size.
1854			 */
1855			num_io_pages = (read_size + (img_cur % PAGE_SIZE)
1856				+ PAGE_SIZE - 1) / PAGE_SIZE;
1857
1858			KASSERT(num_io_pages <= MAXPHYS/PAGE_SIZE + 1);
1859
1860			/* Map pages for this read */
1861			for (j = 0; j < num_io_pages; j ++)
1862				pmap_kenter_pa(tempva + j * PAGE_SIZE,
1863				    img_cur + j * PAGE_SIZE,
1864				    PROT_READ | PROT_WRITE);
1865
1866			pmap_update(pmap_kernel());
1867
1868			hibernate_block_io(hib, blkctr, read_size,
1869			    tempva + (img_cur & PAGE_MASK), 0);
1870
1871			blkctr += (read_size / DEV_BSIZE);
1872
1873			pmap_kremove(tempva, num_io_pages * PAGE_SIZE);
1874			pmap_update(pmap_kernel());
1875
1876			processed += read_size;
1877			img_cur += read_size;
1878		}
1879	}
1880
1881	pmap_kremove(hibernate_fchunk_area, 24 * PAGE_SIZE);
1882	pmap_update(pmap_kernel());
1883
1884	return (0);
1885}
1886
1887/*
1888 * Hibernating a machine comprises the following operations:
1889 *  1. Calculating this machine's hibernate_info information
1890 *  2. Allocating a piglet and saving the piglet's physaddr
1891 *  3. Calculating the memory chunks
1892 *  4. Writing the compressed chunks to disk
1893 *  5. Writing the chunk table
1894 *  6. Writing the signature block (hibernate_info)
1895 *
1896 * On most architectures, the function calling hibernate_suspend would
1897 * then power off the machine using some MD-specific implementation.
1898 */
1899int
1900hibernate_suspend(void)
1901{
1902	uint8_t buf[DEV_BSIZE];
1903	union hibernate_info *hib = (union hibernate_info *)&buf;
1904	u_long start, end;
1905
1906	/*
1907	 * Calculate memory ranges, swap offsets, etc.
1908	 * This also allocates a piglet whose physaddr is stored in
1909	 * hib->piglet_pa and vaddr stored in hib->piglet_va
1910	 */
1911	if (get_hibernate_info(hib, 1)) {
1912		DPRINTF("failed to obtain hibernate info\n");
1913		return (1);
1914	}
1915
1916	/* Find a page-addressed region in swap [start,end] */
1917	if (uvm_hibswap(hib->dev, &start, &end)) {
1918		printf("hibernate: cannot find any swap\n");
1919		return (1);
1920	}
1921
1922	if (end - start < 1000) {
1923		printf("hibernate: insufficient swap (%lu is too small)\n",
1924			end - start + 1);
1925		return (1);
1926	}
1927
1928	pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_start,
1929	    &retguard_start_phys);
1930	pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_end,
1931	    &retguard_end_phys);
1932
1933	/* Calculate block offsets in swap */
1934	hib->image_offset = ctod(start);
1935
1936	DPRINTF("hibernate @ block %lld max-length %lu blocks\n",
1937	    hib->image_offset, ctod(end) - ctod(start) + 1);
1938
1939	pmap_activate(curproc);
1940	DPRINTF("hibernate: writing chunks\n");
1941	if (hibernate_write_chunks(hib)) {
1942		DPRINTF("hibernate_write_chunks failed\n");
1943		return (1);
1944	}
1945
1946	DPRINTF("hibernate: writing chunktable\n");
1947	if (hibernate_write_chunktable(hib)) {
1948		DPRINTF("hibernate_write_chunktable failed\n");
1949		return (1);
1950	}
1951
1952	DPRINTF("hibernate: writing signature\n");
1953	if (hibernate_write_signature(hib)) {
1954		DPRINTF("hibernate_write_signature failed\n");
1955		return (1);
1956	}
1957
1958	/* Allow the disk to settle */
1959	delay(500000);
1960
1961	/*
1962	 * Give the device-specific I/O function a notification that we're
1963	 * done, and that it can clean up or shutdown as needed.
1964	 */
1965	hib->io_func(hib->dev, 0, (vaddr_t)NULL, 0, HIB_DONE, hib->io_page);
1966	return (0);
1967}
1968
1969int
1970hibernate_alloc(void)
1971{
1972	KASSERT(global_piglet_va == 0);
1973	KASSERT(hibernate_temp_page == 0);
1974
1975	pmap_activate(curproc);
1976	pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE,
1977	    PROT_READ | PROT_WRITE);
1978
1979	/* Allocate a piglet, store its addresses in the supplied globals */
1980	if (uvm_pmr_alloc_piglet(&global_piglet_va, &global_piglet_pa,
1981	    HIBERNATE_CHUNK_SIZE * 4, HIBERNATE_CHUNK_SIZE))
1982		goto unmap;
1983
1984	/*
1985	 * Allocate VA for the temp page.
1986	 *
1987	 * This will become part of the suspended kernel and will
1988	 * be freed in hibernate_free, upon resume (or hibernate
1989	 * failure)
1990	 */
1991	hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any,
1992	    &kp_none, &kd_nowait);
1993	if (!hibernate_temp_page) {
1994		uvm_pmr_free_piglet(global_piglet_va, 4 * HIBERNATE_CHUNK_SIZE);
1995		global_piglet_va = 0;
1996		goto unmap;
1997	}
1998	return (0);
1999unmap:
2000	pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE);
2001	pmap_update(pmap_kernel());
2002	return (ENOMEM);
2003}
2004
2005/*
2006 * Free items allocated by hibernate_alloc()
2007 */
2008void
2009hibernate_free(void)
2010{
2011	pmap_activate(curproc);
2012
2013	if (global_piglet_va)
2014		uvm_pmr_free_piglet(global_piglet_va,
2015		    4 * HIBERNATE_CHUNK_SIZE);
2016
2017	if (hibernate_temp_page) {
2018		pmap_kremove(hibernate_temp_page, PAGE_SIZE);
2019		km_free((void *)hibernate_temp_page, PAGE_SIZE,
2020		    &kv_any, &kp_none);
2021	}
2022
2023	global_piglet_va = 0;
2024	hibernate_temp_page = 0;
2025	pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE);
2026	pmap_update(pmap_kernel());
2027}
2028