1/*
2 * Common functions related to playlist handling and the management of
3 * the BootCache module.
4 */
5
6
7/* XXX trim includes */
8#include <sys/param.h>
9#include <sys/sysctl.h>
10#include <sys/time.h>
11#include <sys/types.h>
12#include <sys/wait.h>
13#include <sys/stat.h>
14#include <sys/syscall.h>
15#include <sys/attr.h>
16#include <sys/mount.h>
17#include <sys/xattr.h>
18
19#include <err.h>
20#include <errno.h>
21#include <fcntl.h>
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25#include <unistd.h>
26
27
28#include "BootCache.h"
29
30/*
31 * Return a user-readable string for a given uuid
32 *
33 * Returns a pointer to a static buffer, which is
34 * racy, so this should only be used for debugging purposes
35 */
36static inline const char* uuid_string(uuid_t uuid)
37{
38	/* Racy, but used for debug output so who cares */
39	static uuid_string_t uuidString;
40	uuid_unparse(uuid, uuidString);
41	return (char*)uuidString;
42}
43
44void BC_free_playlist(struct BC_playlist *pc) {
45	if (pc) {
46		if (pc->p_mounts)
47			free(pc->p_mounts);
48		if (pc->p_entries)
49			free(pc->p_entries);
50		free(pc);
51	}
52}
53
54void BC_free_history(struct BC_history *hc) {
55	if (hc) {
56		if (hc->h_mounts)
57			free(hc->h_mounts);
58		if (hc->h_entries)
59			free(hc->h_entries);
60		free(hc);
61	}
62}
63
64/*
65 * Read the named playlist from disk into an allocated buffer.
66 */
67int
68BC_read_playlist(const char *pfname, struct BC_playlist **ppc)
69{
70	struct BC_playlist *pc;
71	struct BC_playlist_header ph;
72	int error, fd;
73
74	fd = -1;
75	pc = NULL;
76	*ppc = NULL;
77
78
79	if (pfname == NULL) {
80		warnx("No playlist path provided");
81		error = EINVAL;
82		goto out;
83	}
84	if ((fd = open(pfname, O_RDONLY)) == -1) {
85		warnx("Unable to open %s: %d %s", pfname, errno, strerror(errno));
86		error = errno;
87		goto out;
88	}
89	if (read(fd, &ph, sizeof(ph)) != sizeof(ph)) {
90		warnx("could not read header from %s", pfname);
91		error = EINVAL;
92		goto out;
93	}
94	if (ph.ph_magic != PH_MAGIC) {
95		warnx("bad playlist magic");
96		error = EINVAL;
97		goto out;
98	}
99
100	if ((pc = calloc(1, sizeof(*pc))) == NULL) {
101		warnx("Unable to allocate playlist structure");
102		error = errno;
103		goto out;
104	}
105
106	pc->p_nmounts  = ph.ph_nmounts;
107	pc->p_nentries = ph.ph_nentries;
108
109	if ((pc->p_mounts = malloc(sizeof(*pc->p_mounts) * pc->p_nmounts)) == NULL) {
110		warnx("could not allocate memory for playlist mounts");
111		error = errno;
112		goto out;
113	}
114	if (read(fd, pc->p_mounts, sizeof(*pc->p_mounts) * pc->p_nmounts) != (sizeof(*pc->p_mounts) * pc->p_nmounts)) {
115		warnx("could not read playlist mounts");
116		error = EINVAL;
117		goto out;
118	}
119
120	if ((pc->p_entries = malloc(sizeof(*pc->p_entries) * pc->p_nentries)) == NULL) {
121		warnx("could not allocate memory for playlist data");
122		error = errno;
123		goto out;
124	}
125	if (read(fd, pc->p_entries, (sizeof(*pc->p_entries) * pc->p_nentries)) != (sizeof(*pc->p_entries) * pc->p_nentries)) {
126		warnx("could not read playlist data");
127		error = EINVAL;
128		goto out;
129	}
130
131	if ((error = BC_verify_playlist(pc)) != 0) {
132		goto out;
133	}
134
135	*ppc = pc;
136out:
137	if (fd != -1)
138		close(fd);
139	if (error) {
140		PC_FREE_ZERO(pc);
141	}
142	return(error);
143}
144
145/*
146 * Write the playlist to the named file, securely.
147 */
148int
149BC_write_playlist(const char *pfname, const struct BC_playlist *pc)
150{
151	struct BC_playlist_header ph;
152	char *tfname;
153	int error, fd;
154
155	tfname = NULL;
156	fd = -1;
157
158	if ((error = BC_verify_playlist(pc)) != 0) {
159		goto out;
160	}
161
162	/*
163	 * Prepare the output file.
164	 *
165	 * Create a secure temporary file and write an invalid header.
166	 */
167	size_t tpathlen = strlen(pfname) + 8;
168	if (tpathlen > MAXPATHLEN) {
169		warnx("playlist filename too long");
170		error = ENAMETOOLONG;
171		goto out;
172	}
173	if ((tfname = malloc(tpathlen)) == NULL) {
174		warnx("could not allocate %lu bytes for playlist filename", strlen(pfname));
175		error = errno;
176		goto out;
177	}
178	snprintf(tfname, tpathlen, "%s.XXXXXX", pfname);
179	if ((fd = mkstemp(tfname)) < 0) {
180		warnx("could not create temporary playlist file");
181		error = errno;
182		goto out;
183	}
184	ph.ph_magic   = 0;
185	ph.ph_nmounts  = 0;
186	ph.ph_nentries = 0;
187	if (write(fd, &ph, sizeof(ph)) != sizeof(ph)) {
188		warnx("could not write initial header to temporary playlist file");
189		error = errno;
190		goto out;
191	}
192
193	/*
194	 * Write the playlist mounts.
195	 */
196	if (write(fd, pc->p_mounts, pc->p_nmounts * sizeof(*pc->p_mounts)) != (pc->p_nmounts * sizeof(*pc->p_mounts))) {
197		warnx("could not write mounts to temporary playlist file: %d %s", errno, strerror(errno));
198		error = errno;
199		goto out;
200	}
201
202	/*
203	 * Write the playlist entries.
204	 */
205	if (write(fd, pc->p_entries, pc->p_nentries * sizeof(*pc->p_entries)) != (pc->p_nentries * sizeof(*pc->p_entries))) {
206		warnx("could not write entries to temporary playlist file");
207		error = errno;
208		goto out;
209	}
210
211	/*
212	 * Write an updated (valid) header to the playlist file.
213	 */
214	ph.ph_magic   = PH_MAGIC;
215	ph.ph_nmounts  = pc->p_nmounts;
216	ph.ph_nentries = pc->p_nentries;
217	if (lseek(fd, 0, SEEK_SET) != 0) {
218		warnx("could not seek on temporary playlist file");
219		error = errno;
220		goto out;
221	}
222	if (write(fd, &ph, sizeof(ph)) != sizeof(ph)) {
223		warnx("could not write header to temporary playlist file");
224		error = errno;
225		goto out;
226	}
227	close(fd);
228	fd = -1;
229
230	/*
231	 * Rename the temporary playlist file over the original.
232	 */
233	if (rename((const char *)tfname, pfname) != 0) {
234		warnx("could not save playlist file %s->%s", tfname, pfname);
235		error = errno;
236		goto out;
237	}
238	/* free here to avoid exploitable race with unlink below */
239	free(tfname);
240	tfname = NULL;
241
242out:
243	if (tfname != NULL) {
244		unlink(tfname);
245		free(tfname);
246	}
247	if (fd != -1)
248		close(fd);
249	return(error);
250
251}
252
253/*
254 * Merge two playlists 'a' and 'b' into 'a's buffer.
255 *
256 * Does not sort or coalesce the lists.  Robust in the case
257 * where any list pointer is NULL or length is zero.
258 */
259int
260BC_merge_playlists(struct BC_playlist* pa, const struct BC_playlist* pb)
261{
262	int mount_idx_b, mount_idx_a, entry_idx;
263	if ((pa->p_mounts = reallocf(pa->p_mounts, sizeof(*pa->p_mounts) * (pa->p_nmounts + pb->p_nmounts))) == NULL)
264		return(ENOMEM);
265
266	/* merge the mount list */
267	for (mount_idx_b = 0; mount_idx_b < pb->p_nmounts; mount_idx_b++) {
268		for (mount_idx_a = 0; mount_idx_a < pa->p_nmounts; mount_idx_a++) {
269			if (0 == uuid_compare(pa->p_mounts[mount_idx_a].pm_uuid, pb->p_mounts[mount_idx_b].pm_uuid)) {
270				break;
271			}
272		}
273
274		if (mount_idx_a == pa->p_nmounts) {
275			pa->p_nmounts++;
276
277			pa->p_mounts[mount_idx_a].pm_nentries = 0;
278			uuid_copy(pa->p_mounts[mount_idx_a].pm_uuid, pb->p_mounts[mount_idx_b].pm_uuid);
279		}
280
281		pa->p_mounts[mount_idx_a].pm_nentries += pb->p_mounts[mount_idx_b].pm_nentries;
282	}
283
284	if ((pa->p_entries = reallocf(pa->p_entries, sizeof(*pa->p_entries) * (pa->p_nentries + pb->p_nentries))) == NULL)
285		return(ENOMEM);
286	memcpy(pa->p_entries + pa->p_nentries, pb->p_entries, pb->p_nentries * sizeof(*pb->p_entries));
287
288	/* fixup mount indexes for the new entries */
289	for (entry_idx = pa->p_nentries; entry_idx < (pa->p_nentries + pb->p_nentries); entry_idx++) {
290		mount_idx_b = pa->p_entries[entry_idx].pe_mount_idx;
291		for (mount_idx_a = 0; mount_idx_a < pa->p_nmounts; mount_idx_a++) {
292			if (0 == uuid_compare(pa->p_mounts[mount_idx_a].pm_uuid, pb->p_mounts[mount_idx_b].pm_uuid)) {
293				pa->p_entries[entry_idx].pe_mount_idx = mount_idx_a;
294				break;
295			}
296		}
297		if (mount_idx_a == pa->p_nmounts) {
298			warnx("Mounts not merged properly for entry %d (mount %d)", entry_idx, mount_idx_b);
299			return(1);
300		}
301	}
302
303	pa->p_nentries += pb->p_nentries;
304
305#ifdef BOOTCACHE_ENTRIES_SORTED_BY_DISK_OFFSET
306    BC_sort_playlist(pa);
307#endif
308    BC_coalesce_playlist(pa);
309
310	return BC_verify_playlist(pa);
311}
312
313/*
314 * Sort a playlist.
315 */
316static int
317compare_playlist_entry(const void *vfirst, const void *vsecond)
318{
319	const struct BC_playlist_entry	*first, *second;
320
321	first = (const struct BC_playlist_entry *)vfirst;
322	second = (const struct BC_playlist_entry *)vsecond;
323
324	// Sort by volume first, then by logical block address
325	int uuid_comparison = first->pe_mount_idx - second->pe_mount_idx;
326	if (uuid_comparison != 0)
327		return((uuid_comparison < 0) ? -1 : 1);
328
329	if (first->pe_offset == second->pe_offset)
330		return(0);
331	return((first->pe_offset < second->pe_offset) ? -1 : 1);
332}
333
334void
335BC_sort_playlist(struct BC_playlist *pc)
336{
337	if ((pc == NULL) || (pc->p_mounts == NULL) || (pc->p_entries == NULL) || (pc->p_nmounts == 0) || (pc->p_nentries == 0))
338		return;
339
340	if (pc->p_nentries > 0)
341		qsort((void *)pc->p_entries, pc->p_nentries, sizeof(*pc->p_entries), compare_playlist_entry);
342}
343
344/*
345 * Coalesece a sorted playlist into the smallest set of contiguous
346 * extents.  Sets the new size of the playlist and realloc's the buffer.
347 */
348int
349BC_coalesce_playlist(struct BC_playlist *pc)
350{
351	struct BC_playlist_entry *pe, *dpe;
352	int i, oentries, mount_idx;
353
354	/*
355	 * Scan the sorted list and emit coalesced playlist entries.
356	 */
357	pe = pc->p_entries;
358	oentries = 0;
359	dpe = pe;
360
361	/* clear the number of entries per mount to be recalculated below */
362	for (mount_idx = 0; mount_idx < pc->p_nmounts; mount_idx++)
363		pc->p_mounts[mount_idx].pm_nentries = 0;
364
365	while (pe < pc->p_entries + pc->p_nentries) {
366		/* entry is the first in a possible set */
367
368		if (pe->pe_length > 0) { /* skip entries we've marked as empty */
369			/* scan following entries to see if they can be coalesced */
370			for (i = 1; (pe + i) < (pc->p_entries + pc->p_nentries); i++) {
371
372				/* not the same mount, ignore */
373				if (pe->pe_mount_idx != (pe + i)->pe_mount_idx)
374					continue;
375
376				/* entry is not inside or adjacent to preceeding */
377				if ((pe + i)->pe_offset > (pe->pe_offset + pe->pe_length))
378					break;
379
380#define MAX_MERGE_SIZE (8*1024*1024)
381
382				/* If these extents are both low priority, or both not low-priority but in the same batch, or combined they only grow the earlier extent by at most 8MB, merge them together
383				 * Or, if we'd be subtracting out the middle of the first extent and so would require a new extent in order to keep the top half, go ahead and merge since we can't handle that case */
384				if (((pe->pe_flags & BC_PE_LOWPRIORITY) && ((pe + i)->pe_flags & BC_PE_LOWPRIORITY)) ||
385					(!(pe->pe_flags & BC_PE_LOWPRIORITY) && !((pe + i)->pe_flags & BC_PE_LOWPRIORITY) && pe->pe_batch == (pe + i)->pe_batch) ||
386					(pe->pe_batch <= (pe + i)->pe_batch && (int64_t)((pe + i)->pe_offset + (pe + i)->pe_length) - (pe->pe_offset + pe->pe_length) <= MAX_MERGE_SIZE) ||
387					(pe->pe_batch > (pe + i)->pe_batch && (int64_t)(pe + i)->pe_offset - pe->pe_offset <= MAX_MERGE_SIZE) ||
388					((pe->pe_offset + pe->pe_length) > ((pe + i)->pe_offset + (pe + i)->pe_length) && (pe->pe_batch > (pe + i)->pe_batch || pe->pe_flags & BC_PE_LOWPRIORITY))
389					) {
390					/* merge the two extents together */
391
392					/* adjust length if required */
393					pe->pe_length = MAX((pe->pe_offset + pe->pe_length), ((pe + i)->pe_offset + (pe + i)->pe_length)) - pe->pe_offset;
394					pe->pe_batch = MIN(pe->pe_batch, (pe + i)->pe_batch);
395					if ( !((pe + i)->pe_flags & BC_PE_LOWPRIORITY)) {
396						/* If any are high priority, the merged extent is high priority */
397						pe->pe_flags &= (~BC_PE_LOWPRIORITY);
398					}
399					if ((pe + i)->pe_flags & BC_PE_SHARED) {
400						/* If any are shared, the merged extent is shared */
401						pe->pe_flags |= BC_PE_SHARED;
402					}
403					(pe + i)->pe_length = 0; /* mark this entry as empty */
404				} else {
405					/* subtract the intersection from the later extent */
406
407					if (pe->pe_batch < (pe + i)->pe_batch ||
408						(pe + i)->pe_flags & BC_PE_LOWPRIORITY) {
409						/* subtract intersection from pe+i */
410
411						u_int64_t endpoint = MAX((pe->pe_offset + pe->pe_length), ((pe + i)->pe_offset + (pe + i)->pe_length));
412						(pe + i)->pe_offset = pe->pe_offset + pe->pe_length;
413						(pe + i)->pe_length = endpoint - (pe + i)->pe_offset;
414					} else {
415						/* subtract intersection from pe */
416						/* We made sure that ((pe + i)->pe_offset + (pe + i)->pe_length) is greater than (pe->pe_offset + pe->pe_length) above */
417						pe->pe_length = (pe + i)->pe_offset - pe->pe_offset;
418					}
419				}
420
421			}
422
423			if (pe->pe_length > 0) {
424			  /* save entry */
425			  *(dpe++) = *pe;
426			  oentries++;
427			  pc->p_mounts[pe->pe_mount_idx].pm_nentries++;
428			}
429		}
430		pe++;
431	}
432
433	/*
434	 * Shrink the alloction if possible.  If realloc fails, handle it
435	 * gracefully.
436	 */
437	pc->p_nentries = oentries;
438	pe = pc->p_entries;
439	pc->p_entries = realloc(pc->p_entries, sizeof(*pc->p_entries) * pc->p_nentries);
440	if (pc->p_entries == NULL)
441		pc->p_entries = pe;
442	return(0);
443}
444
445/*
446 * Verify the consistency of a playlist
447 *
448 * Returns 0 if the playlist is consistent, non-0 otherwise
449 */
450int
451BC_verify_playlist(const struct BC_playlist *pc)
452{
453	int i, error = 0;
454	int* mount_counts = NULL;
455
456	if (pc == NULL) {
457		warnx("playlist is null");
458		error = EINVAL;
459		goto out;
460	}
461	if ((pc->p_mounts == NULL) && (pc->p_nmounts > 0)) {
462		warnx("mounts is null");
463		error = EINVAL;
464		goto out;
465	}
466	if ((pc->p_entries == NULL) && (pc->p_nentries > 0)) {
467		warnx("entries is null");
468		error = EINVAL;
469		goto out;
470	}
471	if (pc->p_nentries > 0 && pc->p_nmounts == 0) {
472		warnx("entries, but no mounts");
473		error = EINVAL;
474		goto out;
475	}
476
477	/* A playlist with nothing in it is technically valid */
478	if (pc->p_nmounts == 0) {
479		error = 0;
480		goto out;
481	}
482
483	mount_counts = calloc(pc->p_nmounts, sizeof(int));
484
485	for (i = 0; i < pc->p_nentries; i++) {
486		if (pc->p_entries[i].pe_mount_idx >= pc->p_nmounts) {
487			warnx("entry %d referenced unknown mount %d", i, pc->p_entries[i].pe_mount_idx);
488			error = EINVAL;
489			goto out;
490		}
491		mount_counts[pc->p_entries[i].pe_mount_idx]++;
492
493		if (pc->p_entries[i].pe_offset + pc->p_entries[i].pe_length <= pc->p_entries[i].pe_offset) {
494			warnx("entry %d has a bad range %lld,%lld", i, pc->p_entries[i].pe_offset, pc->p_entries[i].pe_length);
495			error = EINVAL;
496			goto out;
497		}
498
499	}
500
501	for (i = 0; i < pc->p_nmounts; i++) {
502		if (pc->p_mounts[i].pm_nentries == 0) {
503			warnx("mount %d has 0 entries", i);
504			error = EINVAL;
505			goto out;
506		}
507		if (mount_counts[i] != pc->p_mounts[i].pm_nentries) {
508			warnx("%d entries exist for mount %d, vs %d claimed", mount_counts[i], i, pc->p_mounts[i].pm_nentries);
509			error = EINVAL;
510			goto out;
511		}
512	}
513
514out:
515	if (mount_counts)
516		free(mount_counts);
517
518	return (error);
519}
520
521
522/*
523 * Fetch cache statistics.
524 */
525int
526BC_fetch_statistics(struct BC_statistics **pss)
527{
528	struct BC_command bc;
529	static struct BC_statistics ss;
530	int error;
531
532	bc.bc_magic = BC_MAGIC;
533	bc.bc_opcode = BC_OP_STATS;
534	bc.bc_data1 = (uintptr_t) &ss;
535	bc.bc_data1_size = (unsigned int) sizeof(ss);
536	error = sysctlbyname(BC_SYSCTL, NULL, NULL, &bc, sizeof(bc));
537	if (error != 0) {
538		return(errno);
539	}
540	*pss = &ss;
541	return(0);
542}
543
544/*
545 * Convert a list of history entries into a smaller list of
546 * playlist entries.
547 *
548 * Detects the presence of a prefetch tag and marks playlist entries prior
549 * to the tag as requiring prefetch.
550 *
551 * Returns the playlist in an allocated buffer.
552 */
553int
554BC_convert_history(const struct BC_history *hc, struct BC_playlist **ppc)
555{
556	struct BC_playlist *pc;
557	struct BC_playlist_mount *pm;
558	struct BC_playlist_entry *pe;
559	struct BC_history_mount *hm;
560	struct BC_history_entry *he;
561	int error, mount_idx, curbatch;
562
563	*ppc = NULL;
564
565	if (hc == NULL) {
566		error = EINVAL;
567		goto out;
568	}
569
570	if ((pc = calloc(1, sizeof(*pc))) == NULL) {
571		error = errno;
572		goto out;
573	}
574
575	/* We won't convert all the history mounts into playlist mounts (no uuid) */
576	if ((pc->p_mounts = malloc(sizeof(*pc->p_mounts) * hc->h_nmounts)) == NULL) {
577		error = errno;
578		goto out;
579	}
580	pc->p_nmounts = 0;
581	for (hm = hc->h_mounts; hm < (hc->h_mounts + hc->h_nmounts); hm++) {
582		if (! uuid_is_null(hm->hm_uuid)) {
583			pm = pc->p_mounts + pc->p_nmounts;
584			uuid_copy(pm->pm_uuid, hm->hm_uuid);
585			pm->pm_nentries = 0;
586			pc->p_nmounts++;
587		}
588	}
589
590	/* We won't convert all the history entries into playlist entries (writes, tags) */
591	if ((pc->p_entries = malloc(sizeof(*pc->p_entries) * hc->h_nentries)) == NULL) {
592		error = errno;
593		goto out;
594	}
595	/* scan history and convert */
596	curbatch = 0;
597	pc->p_nentries = 0;
598	for (he = hc->h_entries; he < (hc->h_entries + hc->h_nentries); he++) {
599
600		/* if we find a tag, mark the next batch */
601		if (he->he_flags & BC_HE_TAG) {
602			if (curbatch < BC_MAXBATCHES) {
603				curbatch++;
604			}
605			continue;
606		}
607
608		/* if we find a writethrough, discard it (debugging use only) */
609		if (he->he_flags & BC_HE_WRITE)
610			continue;
611
612		if (he->he_length == 0)
613			continue;
614
615		if (he->he_mount_idx >= hc->h_nmounts) {
616			warnx("History entry %ld referenced mount index %d, but there are only %d mounts", (long)(he - hc->h_entries), he->he_mount_idx, hc->h_nmounts);
617			error = EINVAL;
618			goto out;
619		}
620
621		hm = hc->h_mounts + he->he_mount_idx;
622		pe = pc->p_entries + pc->p_nentries;
623
624		/* convert history entry across */
625		for (mount_idx = 0; mount_idx < pc->p_nmounts; mount_idx++) {
626			if (0 == uuid_compare(pc->p_mounts[mount_idx].pm_uuid, hm->hm_uuid)) {
627				break;
628			}
629		}
630		if (mount_idx >= pc->p_nmounts) {
631			/* Entry matched a mount we filtered out */
632			continue;
633		}
634
635		pm = pc->p_mounts + mount_idx;
636
637		pe->pe_mount_idx = mount_idx;
638		pe->pe_offset = he->he_offset;
639		pe->pe_length = he->he_length;
640		pe->pe_batch = curbatch;
641		pe->pe_flags = 0;
642
643		if (pm->pm_nentries >= hm->hm_nentries) {
644			warnx("Bad playlist: more entries existed than mount %s claimed (%d)", uuid_string(pm->pm_uuid), hm->hm_nentries);
645			error = EINVAL;
646			goto out;
647		}
648
649		pm->pm_nentries++;
650		pc->p_nentries++;
651	}
652
653	/* Remove any mounts with no entries */
654	for (mount_idx = 0; mount_idx < pc->p_nmounts; mount_idx++) {
655		pm = pc->p_mounts + mount_idx;
656		if (pm->pm_nentries == 0) {
657			pc->p_nmounts--;
658			if (mount_idx < pc->p_nmounts) {
659				memcpy(pm, pm + 1, sizeof(*pm) * (pc->p_nmounts - mount_idx));
660				for (pe = pc->p_entries; pe < (pc->p_entries + pc->p_nentries); pe++)
661					if (pe->pe_mount_idx > mount_idx)
662						pe->pe_mount_idx--;
663				mount_idx--;
664			}
665		}
666	}
667
668	if ((error = BC_verify_playlist(pc)) != 0) {
669		goto out;
670	}
671
672	*ppc = pc;
673
674out:
675	if (error) {
676		PC_FREE_ZERO(pc);
677	}
678	return(error);
679}
680
681/*
682 * Start the cache, feed it the playlist if provided.
683 */
684int
685BC_start(struct BC_playlist *pc)
686{
687	struct BC_command bc;
688
689	bc.bc_magic = BC_MAGIC;
690	bc.bc_opcode = BC_OP_START;
691	if (pc) {
692		bc.bc_data1      = (uintptr_t)     pc->p_mounts;
693		bc.bc_data1_size = (unsigned int) (pc->p_nmounts * sizeof(*pc->p_mounts));
694		bc.bc_data2      = (uintptr_t)     pc->p_entries;
695		bc.bc_data2_size = (unsigned int) (pc->p_nentries * sizeof(*pc->p_entries));
696	} else {
697		bc.bc_data1      = 0;
698		bc.bc_data1_size = 0;
699		bc.bc_data2      = 0;
700		bc.bc_data2_size = 0;
701	}
702	return(sysctlbyname(BC_SYSCTL, NULL, NULL, &bc, sizeof(bc)) ? errno : 0);
703}
704
705int
706BC_stop(struct BC_history **phc)
707{
708	struct BC_command bc;
709	struct BC_history *hc;
710	int error;
711	size_t nsize;
712
713	/*
714	 * Stop the cache and get the history buffer size.
715	 */
716	bc.bc_magic = BC_MAGIC;
717	bc.bc_opcode = BC_OP_STOP;
718	nsize = sizeof(bc);
719	error = sysctlbyname(BC_SYSCTL, &bc, &nsize, &bc, nsize);
720	if (error != 0) {
721		/* if cache was not running, not really an error */
722		if (errno != ENXIO)
723			warnx("could not stop cache: %d %s", errno, strerror(errno));
724		return(errno);
725	}
726	if (nsize != sizeof(bc)) {
727		warnx("control structure wrong size, version mismatch?");
728		return(EINVAL);
729	}
730
731	/*
732	 * Fetch and clear the history buffer.
733	 */
734	bc.bc_opcode = BC_OP_HISTORY;
735	if ((hc = calloc(1, sizeof(*hc))) == NULL) {
736		warnx("could not allocate history struct memory");
737		return(ENOMEM);
738	}
739	if (bc.bc_data1_size == 0 || bc.bc_data2_size == 0) {
740		bc.bc_data1      = 0;
741		bc.bc_data2      = 0;
742		bc.bc_data1_size = 0;
743		bc.bc_data2_size = 0;
744	} else {
745		if ((hc->h_mounts = malloc(bc.bc_data1_size)) == NULL) {
746			warnx("could not allocate history mounts memory");
747			HC_FREE_ZERO(hc);
748			return(ENOMEM);
749		}
750		if ((hc->h_entries = malloc(bc.bc_data2_size)) == NULL) {
751			warnx("could not allocate history mounts memory");
752			HC_FREE_ZERO(hc);
753			return(ENOMEM);
754		}
755		hc->h_nmounts  = bc.bc_data1_size / sizeof(struct BC_history_mount);
756		hc->h_nentries = bc.bc_data2_size / sizeof(struct BC_history_entry);
757
758		bc.bc_data1 = (uintptr_t) hc->h_mounts;
759		bc.bc_data2 = (uintptr_t) hc->h_entries;
760	}
761	error = sysctlbyname(BC_SYSCTL, NULL, NULL, &bc, sizeof(bc));
762	if (error != 0) {
763		warnx("could not fetch %u and %u bytes of history: %d %s", bc.bc_data1_size, bc.bc_data2_size, errno, strerror(errno));
764		HC_FREE_ZERO(hc);
765		return(errno);
766	}
767
768	if (hc->h_mounts || hc->h_entries) {
769		hc->h_nmounts  = bc.bc_data1_size / sizeof(struct BC_history_mount);
770		hc->h_nentries = bc.bc_data2_size / sizeof(struct BC_history_entry);
771	}
772
773	*phc = hc;
774	return(0);
775}
776
777/*
778 * Create a bootcache playlist for the given file's disk blocks.
779 * If an error occurs, as much of the playlist as could be calculated is returned.
780 * The caller is responsible for freeing the returned BC_playlist, if non-NULL.
781 */
782int
783BC_playlist_for_file(int fd, struct BC_playlist** ppc) {
784    return BC_playlist_for_filename(fd, NULL, 0, ppc);
785}
786
787/*
788 * Create a bootcache playlist for the given file's disk blocks.
789 * If an error occurs, as much of the playlist as could be calculated is returned.
790 * The caller is responsible for freeing the returned BC_playlist, if non-NULL.
791 *
792 * Compressed files are handled appropriately if the filename is provided.
793 */
794int
795BC_playlist_for_filename(int fd, const char *fname, off_t maxsize, struct BC_playlist** ppc) {
796	struct BC_playlist* pc = NULL;
797	int error = 0;
798    int compressed_fd = -1;
799    off_t filesize = 0;
800
801	*ppc = NULL;
802
803	struct stat fd_stats;
804	if (0 != fstat(fd, &fd_stats)) {
805		warnx("Unable to stat %d: %d %s", fd, errno, strerror(errno));
806		error = errno;
807        goto out;
808	}
809
810    filesize = fd_stats.st_size;
811
812    if (fd_stats.st_flags & UF_COMPRESSED) {
813        if (fname) {
814            // If the file is compressed, figure out the size of the (hidden)
815            // resource fork, if present
816            size_t sz = getxattr(fname, XATTR_RESOURCEFORK_NAME, NULL, 0, 0,
817                                 XATTR_SHOWCOMPRESSION);
818            if (sz > 0){
819                filesize = sz;
820
821                // We want to do the fcntl on the resource fork, not the data fork
822                char rsrcpath[PATH_MAX];
823                sprintf(rsrcpath, "%s/..namedfork/rsrc", fname);
824                compressed_fd = open(rsrcpath, O_RDONLY);
825                if (compressed_fd < 0){
826                    warnx("Unable to open compressed resource fork %s", rsrcpath);
827                    error = errno;
828                    goto out;
829                }
830                fd = compressed_fd;
831            } else {
832                /* This is a compressed file, but it has no resource fork. Nothing to do */
833                error = EINVAL;
834                goto out;
835            }
836        } else {
837            /* This is a compressed file, but we weren't provided a filename so can't grab the resource fork */
838            error = EINVAL;
839            goto out;
840        }
841    }
842
843	struct {
844		uint32_t size;
845		uuid_t   uuid;
846	} attrBuf = {0, {0}};
847
848	// Get the mount's UUID where the file lives
849	struct statfs statfs_buf;
850	if (0 != fstatfs(fd, &statfs_buf)) {
851		warnx("Unable to stafs %d: %d %s", fd, errno, strerror(errno));
852		// Assume it's on the root volume
853		uuid_clear(attrBuf.uuid);
854        // Round up to the block size
855        filesize = (((filesize + (512 - 1)) / 512) * 512);
856	} else {
857        // Round up to the block size
858        filesize = (((filesize + (statfs_buf.f_bsize - 1)) / statfs_buf.f_bsize) * statfs_buf.f_bsize);
859
860		struct attrlist list = {
861			.bitmapcount = ATTR_BIT_MAP_COUNT,
862			.volattr = ATTR_VOL_INFO | ATTR_VOL_UUID,
863		};
864
865		if (0 != getattrlist(statfs_buf.f_mntonname,  &list, &attrBuf, sizeof(attrBuf), 0)) {
866			warnx("Unable to determine uuid for volume %s", statfs_buf.f_mntonname);
867			// Assume it's on the root volume
868			uuid_clear(attrBuf.uuid);
869		}
870	}
871
872    if (maxsize != 0 && maxsize < filesize) {
873        filesize = maxsize;
874    }
875
876	// Allocate the playlist and inialize with the given mount
877	pc = calloc(1, sizeof(*pc));
878	if(!pc) {
879		warnx("Unable to allocate playlist structure");
880		error = errno;
881        goto out;
882	}
883	pc->p_nmounts = 1;
884	pc->p_mounts = malloc(sizeof(*pc->p_mounts));
885	if(!pc->p_mounts) {
886		warnx("could not allocate memory for playlist mounts");
887		PC_FREE_ZERO(pc);
888		error = errno;
889        goto out;
890	}
891	pc->p_mounts[0].pm_nentries = 0;
892	uuid_copy(pc->p_mounts[0].pm_uuid, attrBuf.uuid);
893	pc->p_nentries = 0;
894	pc->p_entries = NULL;
895
896	off_t ra_off;
897	for (ra_off = 0; (ra_off < filesize); ) {
898		off_t remaining = (filesize - ra_off);
899		struct log2phys block_range = {
900			.l2p_flags       = 0,
901			.l2p_devoffset   = ra_off,    //As an IN parameter to F_LOG2PHYS_EXT, this is the offset into the file
902			.l2p_contigbytes = remaining, //As an IN parameter to F_LOG2PHYS_EXT, this is the number of bytes to be queried
903		};
904
905		int ret = fcntl(fd, F_LOG2PHYS_EXT, &block_range);
906		if (ret != 0) {
907			//RLOG(NOTICE, "fcntl(%d, F_LOG2PHYS_EXT, &{.offset: %"PRIdoff", .bytes: %"PRIdoff"}) => %d (errno: %d %s)", fd, block_range.l2p_devoffset, block_range.l2p_contigbytes, ret, errno, strerror(errno));
908			break;
909		}
910
911		// block_range.l2p_devoffset;   as an OUT parameter from F_LOG2PHYS_EXT, this is the offset in bytes on the disk
912		// block_range.l2p_contigbytes; as an OUT parameter from F_LOG2PHYS_EXT, this is the number of bytes in the range
913
914		if (remaining < block_range.l2p_contigbytes ) {
915			warnx("Invalid size returned for %d from disk (%lld bytes requested, %lld bytes returned)", fd, remaining, block_range.l2p_contigbytes);
916			break;
917		}
918
919		//RLOG(DEBUG, "%"PRIdoff":%"PRIdoff" is %"PRIdoff":%"PRIdoff" on disk\n", ra_off, remaining, block_range.l2p_devoffset, block_range.l2p_contigbytes);
920
921		if (block_range.l2p_contigbytes <= 0) {
922			//RLOG(INFO, "%"PRIdoff":%"PRIdoff" returned %"PRIdoff":%"PRIdoff"\n", ra_off, remaining, block_range.l2p_devoffset, block_range.l2p_contigbytes);
923			break;
924		}
925
926		if (block_range.l2p_devoffset < 0) {
927			warnx("Invalid block range return for %d from disk (%lld:%lld returned %lld:%lld)\n", fd, ra_off, remaining, block_range.l2p_devoffset, block_range.l2p_contigbytes);
928			break;
929		}
930
931		ra_off += block_range.l2p_contigbytes;
932
933		// Add this range to our array
934		// TODO: reduce the number of realloc calls
935		pc->p_entries = reallocf(pc->p_entries, (pc->p_nentries + 1) * sizeof(*pc->p_entries));
936		if(! pc->p_entries) {
937			warnx("could not allocate memory for playlist data");
938			error = errno;
939            goto out;
940		}
941		pc->p_entries[pc->p_nentries].pe_offset    = block_range.l2p_devoffset;
942		pc->p_entries[pc->p_nentries].pe_length    = block_range.l2p_contigbytes;
943		pc->p_entries[pc->p_nentries].pe_batch     = 0;
944		pc->p_entries[pc->p_nentries].pe_flags     = 0;
945		pc->p_entries[pc->p_nentries].pe_mount_idx = 0;
946		pc->p_mounts[0].pm_nentries++;
947		pc->p_nentries++;
948	}
949
950    if (pc->p_nentries == 0) {
951        error = ENOENT;
952        goto out;
953    }
954
955	if ((error = BC_verify_playlist(pc)) != 0) {
956        goto out;
957	}
958
959	*ppc = pc;
960    error = 0;
961out:
962    if (compressed_fd >= 0) {
963        close(compressed_fd);
964    }
965    if (error != 0) {
966        PC_FREE_ZERO(pc);
967    }
968	return (error);
969}
970
971/*
972 * Notify the kext that a new mount has appeared.
973 */
974int
975BC_notify_mount(void)
976{
977	struct BC_command bc;
978	int error;
979
980	bc.bc_magic = BC_MAGIC;
981	bc.bc_opcode = BC_OP_MOUNT;
982	error = sysctlbyname(BC_SYSCTL, NULL, NULL, &bc, sizeof(bc));
983	if (error != 0) {
984		/* if cache was not running, not really an error */
985		if (errno != ENXIO)
986			warnx("could not notify cache of new mount: %d %s", errno, strerror(errno));
987		return(errno);
988	}
989
990	return(0);
991}
992
993/*
994 * Check if the boot cache is supported on this device.
995 * Returns 0 if it is supported, non-0 if it isn't
996 */
997int
998BC_test(void)
999{
1000	struct BC_command bc;
1001	bc.bc_magic = BC_MAGIC;
1002	bc.bc_opcode = BC_OP_TEST;
1003	return(sysctlbyname(BC_SYSCTL, NULL, NULL, &bc, sizeof(bc)) ? errno : 0);
1004}
1005
1006/*
1007 * Jettison the cache.
1008 */
1009int
1010BC_jettison(void)
1011{
1012	struct BC_command bc;
1013	int error;
1014
1015	bc.bc_magic = BC_MAGIC;
1016	bc.bc_opcode = BC_OP_JETTISON;
1017	error = sysctlbyname(BC_SYSCTL, NULL, NULL, &bc, sizeof(bc));
1018	if (error != 0) {
1019		/* if cache was not running, not really an error */
1020		if (errno != ENXIO)
1021			warnx("could not jettison cache: %d %s", errno, strerror(errno));
1022		return(errno);
1023	}
1024
1025	return(0);
1026}
1027
1028int
1029BC_print_statistics(char *fname, struct BC_statistics *ss)
1030{
1031	FILE *fp;
1032	uint disk_msecs, disk_bytes, d, b, m, bytes_remaining, other;
1033
1034	if (ss == NULL)
1035		return(0);
1036
1037	errno = 0;
1038	if (fname != NULL) {
1039		fp = fopen(fname, "w");
1040	} else {
1041		fp = stdout;
1042	}
1043	if (fp == NULL)
1044		return(errno);
1045
1046	/* readahead */
1047fprintf(fp, "initiated reads            %u\n", ss->ss_initiated_reads);
1048fprintf(fp, "blocks read                %u\n", ss->ss_read_blocks);
1049fprintf(fp, "bytes read                 %u\n", ss->ss_read_bytes);
1050	if (ss->ss_read_bytes > 0) {
1051		bytes_remaining = ss->ss_read_bytes - ss->ss_hit_bytes - ss->ss_read_errors_bytes - ss->ss_write_discards - ss->ss_read_discards - ss->ss_stolen_bytes - ss->ss_spurious_bytes - ss->ss_hit_bytes_afterhistory - ss->ss_lost_bytes_afterhistory - ss->ss_bypass_nocache_discards;
1052fprintf(fp, " bytes used                %-10u  %3.0f%%\n", ss->ss_hit_bytes, ((float)ss->ss_hit_bytes / ss->ss_read_bytes) * 100);
1053fprintf(fp, " bytes remaining           %-10u  %3.0f%%\n", bytes_remaining, ((float)bytes_remaining / ss->ss_read_bytes) * 100);
1054fprintf(fp, " nonshared bytes read      %-10u  %3.0f%%\n", (ss->ss_read_bytes - ss->ss_shared_bytes), ((float)(ss->ss_read_bytes - ss->ss_shared_bytes) / ss->ss_read_bytes) * 100);
1055		if ((ss->ss_read_bytes - ss->ss_shared_bytes) > 0) {
1056fprintf(fp, "  nonshared bytes used     %-10u  %3.0f%%\n", (ss->ss_hit_bytes - ss->ss_hit_shared_bytes), ((float)(ss->ss_hit_bytes - ss->ss_hit_shared_bytes) / (ss->ss_read_bytes - ss->ss_shared_bytes)) * 100);
1057		}
1058fprintf(fp, " shared cache bytes read   %-10u  %3.0f%%\n", ss->ss_shared_bytes, ((float)ss->ss_shared_bytes / ss->ss_read_bytes) * 100);
1059		if (ss->ss_shared_bytes > 0) {
1060fprintf(fp, "  shared cache bytes used  %-10u  %3.0f%%\n", ss->ss_hit_shared_bytes, ((float)ss->ss_hit_shared_bytes / ss->ss_shared_bytes) * 100);
1061		}
1062fprintf(fp, " low priority bytes        %-10u  %3.0f%%\n", ss->ss_read_bytes_lowpri, ((float)ss->ss_read_bytes_lowpri / ss->ss_read_bytes) * 100);
1063fprintf(fp, " bytes failed to read      %-10u  %3.0f%%\n", ss->ss_read_errors_bytes, ((float)ss->ss_read_errors_bytes / ss->ss_read_bytes) * 100);
1064		if (ss->ss_bypass_nocache_discards > 0) {
1065fprintf(fp, " bytes discarded by noncac %-10u  %3.0f%%\n", ss->ss_bypass_nocache_discards, ((float)ss->ss_bypass_nocache_discards / ss->ss_read_bytes) * 100);
1066		}
1067fprintf(fp, " bytes discarded by write  %-10u  %3.0f%%\n", ss->ss_write_discards, ((float)ss->ss_write_discards / ss->ss_read_bytes) * 100);
1068fprintf(fp, " bytes discarded by read   %-10u  %3.0f%%\n", ss->ss_read_discards, ((float)ss->ss_read_discards / ss->ss_read_bytes) * 100);
1069fprintf(fp, " bytes discarded by error  %-10u  %3.0f%%\n", ss->ss_error_discards, ((float)ss->ss_error_discards / ss->ss_read_bytes) * 100);
1070fprintf(fp, " bytes stolen from cache   %-10u  %3.0f%%\n", ss->ss_stolen_bytes, ((float)ss->ss_stolen_bytes / ss->ss_read_bytes) * 100);
1071fprintf(fp, " bytes used after boot     %-10u  %3.0f%%\n", ss->ss_hit_bytes_afterhistory, ((float)ss->ss_hit_bytes_afterhistory / ss->ss_read_bytes) * 100);
1072fprintf(fp, " bytes lost after boot     %-10u  %3.0f%%\n", ss->ss_lost_bytes_afterhistory, ((float)ss->ss_lost_bytes_afterhistory / ss->ss_read_bytes) * 100);
1073		if (bytes_remaining == 0 || 0 != ss->ss_spurious_bytes) {
1074fprintf(fp, " bytes wasted              %-10u  %3.0f%%\n", ss->ss_spurious_bytes, ((float)ss->ss_spurious_bytes / ss->ss_read_bytes) * 100);
1075		}
1076	}
1077fprintf(fp, "bytes requested            %u\n", ss->ss_requested_bytes);
1078	if (ss->ss_requested_bytes > 0) {
1079fprintf(fp, " bytes hit                 %-10u  %3.0f%%\n", ss->ss_hit_bytes, ((float)ss->ss_hit_bytes / ss->ss_requested_bytes) * 100);
1080		for (m = 0; m < STAT_MOUNTMAX; m++) {
1081			if (ss->ss_requested_bytes_m[m] > 0 && ss->ss_requested_bytes_m[m] != ss->ss_requested_bytes) {
1082fprintf(fp, "  mount %d bytes hit        %-10u  %3.0f%% of %u\n", m, ss->ss_hit_bytes_m[m], ((float)ss->ss_hit_bytes_m[m] / ss->ss_requested_bytes_m[m]) * 100, ss->ss_requested_bytes_m[m]);
1083			}
1084		}
1085fprintf(fp, " noncached bytes requested %-10u  %3.0f%%\n", ss->ss_bypass_nocache_bytes + ss->ss_hit_nocache_bytes, ((float)(ss->ss_bypass_nocache_bytes + ss->ss_hit_nocache_bytes) / ss->ss_requested_bytes) * 100);
1086		if (ss->ss_bypass_nocache_bytes + ss->ss_hit_nocache_bytes > 0) {
1087fprintf(fp, "  noncached bytes hit      %-10u  %3.0f%%\n", ss->ss_hit_nocache_bytes, ((float)ss->ss_hit_nocache_bytes / (ss->ss_bypass_nocache_bytes + ss->ss_hit_nocache_bytes)) * 100);
1088fprintf(fp, "  noncached bytes missed   %-10u  %3.0f%%\n", ss->ss_bypass_nocache_bytes, ((float)ss->ss_bypass_nocache_bytes / (ss->ss_bypass_nocache_bytes + ss->ss_hit_nocache_bytes)) * 100);
1089		}
1090	}
1091
1092	if (ss->ss_initiated_reads > 0 && ss->ss_read_errors > 0) {
1093fprintf(fp, " read errors               %-10u  %3.0f%%\n", ss->ss_read_errors, ((float)ss->ss_read_errors / ss->ss_initiated_reads) * 100);
1094	}
1095	if (ss->ss_cache_time > 0 || ss->ss_read_bytes == 0) {
1096fprintf(fp, "cache active time          %d.%03ds\n", ss->ss_cache_time / 1000, ss->ss_cache_time % 1000);
1097	} else {
1098fprintf(fp, "cache active time          (still active)\n");
1099	}
1100
1101	/* inbound strategy */
1102fprintf(fp, "\n");
1103fprintf(fp, "total strategy calls       %u\n", ss->ss_strategy_calls);
1104	if (ss->ss_strategy_calls > 0) {
1105fprintf(fp, " filled calls              %-10u  %3.0f%%\n", ss->ss_strategy_calls - ss->ss_strategy_bypassed, ((float)(ss->ss_strategy_calls - ss->ss_strategy_bypassed) / ss->ss_strategy_calls) * 100);
1106fprintf(fp, " bypassed calls            %-10u  %3.0f%%\n", ss->ss_strategy_bypassed, ((float)ss->ss_strategy_bypassed / ss->ss_strategy_calls) * 100);
1107		if (ss->ss_strategy_bypassed > 0) {
1108			other = ss->ss_strategy_bypassed - ss->ss_strategy_nonread - ss->ss_strategy_noncached_mount - ss->ss_strategy_nonblocksize - (ss->ss_extent_lookups - ss->ss_extent_hits) - ss->ss_strategy_bypass_duringio_unfilled - ss->ss_strategy_unfilled_lowpri;
1109fprintf(fp, "  nonread calls            %-10u  %3.0f%%\n", ss->ss_strategy_nonread, ((float)ss->ss_strategy_nonread / ss->ss_strategy_bypassed) * 100);
1110fprintf(fp, "  noncached mount calls    %-10u  %3.0f%%\n", ss->ss_strategy_noncached_mount, ((float)ss->ss_strategy_noncached_mount / ss->ss_strategy_bypassed) * 100);
1111			if (ss->ss_strategy_nonblocksize > 0) {
1112fprintf(fp, "  nonblocksize calls       %-10u  %3.0f%%\n", ss->ss_strategy_nonblocksize, ((float)ss->ss_strategy_nonblocksize / ss->ss_strategy_bypassed) * 100);
1113			}
1114fprintf(fp, "  cache misses             %-10u  %3.0f%%\n", ss->ss_extent_lookups - ss->ss_extent_hits, ((float)(ss->ss_extent_lookups - ss->ss_extent_hits) / ss->ss_strategy_bypassed) * 100);
1115fprintf(fp, "  cache hit failures       %-10u  %3.0f%%\n", other, ((float)other / ss->ss_strategy_bypassed) * 100);
1116			if (ss->ss_strategy_bypass_duringio_unfilled > 0) {
1117fprintf(fp, "  unfilled extent          %-10u  %3.0f%%\n", ss->ss_strategy_bypass_duringio_unfilled, ((float)ss->ss_strategy_bypass_duringio_unfilled / ss->ss_strategy_bypassed) * 100);
1118			}
1119			if (ss->ss_strategy_unfilled_lowpri > 0) {
1120fprintf(fp, "  unfilled lowpri extent   %-10u  %3.0f%%\n", ss->ss_strategy_unfilled_lowpri, ((float)ss->ss_strategy_unfilled_lowpri / ss->ss_strategy_bypassed) * 100);
1121			}
1122		}
1123fprintf(fp, " noncached calls           %-10u  %3.0f%%\n", ss->ss_strategy_bypass_nocache + ss->ss_strategy_hit_nocache, ((float)(ss->ss_strategy_bypass_nocache + ss->ss_strategy_hit_nocache) / ss->ss_strategy_calls) * 100);
1124		if (ss->ss_strategy_bypass_nocache + ss->ss_strategy_hit_nocache > 0) {
1125fprintf(fp, "  noncached calls filled   %-10u  %3.0f%%\n", ss->ss_strategy_hit_nocache, ((float)ss->ss_strategy_hit_nocache / (ss->ss_strategy_bypass_nocache + ss->ss_strategy_hit_nocache)) * 100);
1126fprintf(fp, "  noncached calls bypassed %-10u  %3.0f%%\n", ss->ss_strategy_bypass_nocache, ((float)ss->ss_strategy_bypass_nocache / (ss->ss_strategy_bypass_nocache + ss->ss_strategy_hit_nocache)) * 100);
1127		}
1128fprintf(fp, " throttled calls           %-10u  %3.0f%%\n", ss->ss_strategy_throttled, ((float)ss->ss_strategy_throttled / ss->ss_strategy_calls) * 100);
1129fprintf(fp, " calls during readahead    %-10u  %3.0f%%\n", ss->ss_strategy_duringio, ((float)ss->ss_strategy_duringio / ss->ss_strategy_calls) * 100);
1130		if (ss->ss_strategy_duringio > 0) {
1131fprintf(fp, "  filled during readahead  %-10u  %3.0f%%\n", ss->ss_strategy_duringio - ss->ss_strategy_bypass_duringio, ((float)(ss->ss_strategy_duringio - ss->ss_strategy_bypass_duringio) / ss->ss_strategy_duringio) * 100);
1132fprintf(fp, "  bypassed during readahea %-10u  %3.0f%%\n", ss->ss_strategy_bypass_duringio, ((float)ss->ss_strategy_bypass_duringio / ss->ss_strategy_duringio) * 100);
1133			if (ss->ss_strategy_bypass_duringio > 0) {
1134				other = ss->ss_strategy_bypass_duringio - ss->ss_strategy_bypass_duringio_rootdisk_nonread - ss->ss_strategy_bypass_duringio_rootdisk_read - ss->ss_strategy_bypass_duringio_rootdisk_failure - ss->ss_strategy_bypass_duringio_unfilled - ss->ss_strategy_bypass_duringio_nocache;
1135				if (ss->ss_strategy_bypass_duringio_unfilled > 0) {
1136fprintf(fp, "   unfilled extent         %-10u  %3.0f%%\n", ss->ss_strategy_bypass_duringio_unfilled, ((float)ss->ss_strategy_bypass_duringio_unfilled / ss->ss_strategy_bypass_duringio) * 100);
1137				}
1138				if (ss->ss_strategy_bypass_duringio_nocache > 0) {
1139fprintf(fp, "   noncached IO            %-10u  %3.0f%%\n", ss->ss_strategy_bypass_duringio_nocache, ((float)ss->ss_strategy_bypass_duringio_nocache / ss->ss_strategy_bypass_duringio) * 100);
1140				}
1141fprintf(fp, "   root disk nonread       %-10u  %3.0f%%\n", ss->ss_strategy_bypass_duringio_rootdisk_nonread, ((float)ss->ss_strategy_bypass_duringio_rootdisk_nonread / ss->ss_strategy_bypass_duringio) * 100);
1142fprintf(fp, "   root disk cache miss    %-10u  %3.0f%%\n", ss->ss_strategy_bypass_duringio_rootdisk_read, ((float)ss->ss_strategy_bypass_duringio_rootdisk_read / ss->ss_strategy_bypass_duringio) * 100);
1143fprintf(fp, "   root disk hit failure   %-10u  %3.0f%%\n", ss->ss_strategy_bypass_duringio_rootdisk_failure, ((float)ss->ss_strategy_bypass_duringio_rootdisk_failure / ss->ss_strategy_bypass_duringio) * 100);
1144fprintf(fp, "   non-root                %-10u  %3.0f%%\n", other, ((float)other / ss->ss_strategy_bypass_duringio) * 100);
1145fprintf(fp, "   forced throttled        %-10u  %3.0f%%\n", ss->ss_strategy_forced_throttled, ((float)ss->ss_strategy_forced_throttled / ss->ss_strategy_bypass_duringio) * 100);
1146			}
1147fprintf(fp, "  extent hits during reada %-10u  %3.0f%%\n", ss->ss_hit_duringio, ((float)ss->ss_hit_duringio / ss->ss_strategy_duringio) * 100);
1148			if (ss->ss_hit_duringio > 0) {
1149fprintf(fp, "   blocked during readahea %-10u  %3.1f%%\n", ss->ss_strategy_blocked, ((float)ss->ss_strategy_blocked / ss->ss_hit_duringio) * 100);
1150fprintf(fp, "   block calls timed out   %-10u  %3.1f%%\n", ss->ss_strategy_timedout, ((float)ss->ss_strategy_timedout / ss->ss_hit_duringio) * 100);
1151fprintf(fp, "   longest blocked call    %ums\n", ss->ss_strategy_time_longest_blocked);
1152fprintf(fp, "   total blocked duration  %ums\n", ss->ss_strategy_time_blocked);
1153			}
1154		}
1155	}
1156	if (ss->ss_strategy_unknown > 0) {
1157fprintf(fp, "unknown strategy calls     %u\n", ss->ss_strategy_unknown);
1158fprintf(fp, "unknown strategy bytes     %u\n", ss->ss_strategy_unknown_bytes);
1159	}
1160
1161	if (ss->ss_read_bytes - ss->ss_read_bytes_lowpri > 0) {
1162fprintf(fp, "\n");
1163fprintf(fp, "total readahead threads    %u\n", ss->ss_readahead_threads);
1164		for (d = 0; d < STAT_DISKMAX; d++) {
1165			disk_bytes = 0;
1166			disk_msecs = 0;
1167
1168			for(b = 0; b < STAT_BATCHMAX; b++) {
1169				disk_bytes += ss->ss_batch_bytes[d][b];
1170				disk_msecs += ss->ss_batch_time[d][b];
1171			}
1172			if (0 == disk_bytes) continue; /* no reads for this disk */
1173
1174			if (disk_msecs > 0) {
1175fprintf(fp, "Disk %d reader rate:        %ukB/s, %utps\n",
1176						d,
1177						(u_int)(((unsigned long long)disk_bytes * 1000) / (disk_msecs * 1024)),
1178						(ss->ss_disk_initiated_reads[d] * 1000) / disk_msecs);
1179			}
1180
1181fprintf(fp, "Disk %d time                %d.%03ds\n",
1182					d,
1183					disk_msecs / 1000,
1184					disk_msecs % 1000);
1185fprintf(fp, "Disk %d bytes read:         %u\n", d, disk_bytes);
1186
1187			for(b = 0; b < STAT_BATCHMAX; b++) {
1188				if (ss->ss_batch_time[d][b] > 0) {
1189fprintf(fp, "  batch %d time             %d.%03ds\n",
1190							b,
1191							ss->ss_batch_time[d][b] / 1000,
1192							ss->ss_batch_time[d][b] % 1000);
1193				}
1194			}
1195
1196			for(b = 0; b < STAT_BATCHMAX; b++) {
1197				if (ss->ss_batch_bytes[d][b] > 0) {
1198fprintf(fp, "  batch %d bytes read:      %-10u", b, ss->ss_batch_bytes[d][b]);
1199					if (ss->ss_batch_late_bytes[d][b] > 0) {
1200fprintf(fp, " (%u late)", ss->ss_batch_late_bytes[d][b]);
1201					}
1202fprintf(fp, "\n");
1203				}
1204			}
1205		}
1206	}
1207
1208	if (ss->ss_read_bytes_lowpri > 0) {
1209		for (d = 0; d < STAT_DISKMAX; d++) {
1210			if (0 == ss->ss_batch_bytes_lowpri[d]) continue; /* no reads for this disk */
1211
1212			if (ss->ss_batch_time_lowpri[d] > 0) {
1213fprintf(fp, "Disk %d lowpri reader rate: %ukB/s, %u.%utps\n",
1214						d,
1215						(u_int)(((unsigned long long)ss->ss_batch_bytes_lowpri[d] * 1000) / (ss->ss_batch_time_lowpri[d] * 1024)),
1216						(ss->ss_disk_initiated_reads_lowpri[d] * 1000) / ss->ss_batch_time_lowpri[d], (10 * (ss->ss_disk_initiated_reads_lowpri[d] * 1000) / ss->ss_batch_time_lowpri[d]) % 10);
1217			}
1218
1219fprintf(fp, "Disk %d lowpri bytes read:  %u\n", d, ss->ss_batch_bytes_lowpri[d]);
1220fprintf(fp, "Disk %d lowpri time:        %d.%03ds\n",
1221					d,
1222					ss->ss_batch_time_lowpri[d] / 1000,
1223					ss->ss_batch_time_lowpri[d] % 1000);
1224		}
1225	}
1226
1227	/* extents */
1228fprintf(fp, "\n");
1229fprintf(fp, "mounts in cache            %u\n", ss->ss_total_mounts);
1230fprintf(fp, "extents in cache           %u\n", ss->ss_total_extents);
1231	if (ss->ss_extents_clipped > 0 && ss->ss_total_extents > 0) {
1232fprintf(fp, " extents clipped           %-10u  %3.0f%%\n", ss->ss_extents_clipped, ((float)ss->ss_extents_clipped / ss->ss_total_extents) * 100);
1233	}
1234fprintf(fp, "extent lookups             %u\n", ss->ss_extent_lookups);
1235	if (ss->ss_extent_lookups > 0) {
1236fprintf(fp, "extent hits                %-10u  %3.0f%%\n", ss->ss_extent_hits, ((float)ss->ss_extent_hits / ss->ss_extent_lookups) * 100);
1237		if (ss->ss_extent_hits > 0) {
1238fprintf(fp, " multiple extent hits      %-10u  %3.0f%%\n", ss->ss_hit_multiple, ((float)ss->ss_hit_multiple / ss->ss_extent_hits) * 100);
1239fprintf(fp, " hits aborted              %-10u  %3.0f%%\n", ss->ss_hit_aborted, ((float)ss->ss_hit_aborted / ss->ss_extent_hits) * 100);
1240fprintf(fp, " hits missing blocks       %-10u  %3.0f%%\n", ss->ss_hit_blkmissing, ((float)ss->ss_hit_blkmissing / ss->ss_extent_hits) * 100);
1241fprintf(fp, " hits stolen               %-10u  %3.0f%%\n", ss->ss_hit_stolen, ((float)ss->ss_hit_stolen / ss->ss_extent_hits) * 100);
1242fprintf(fp, " hits failed               %-10u  %3.0f%%\n", ss->ss_hit_failure, ((float)ss->ss_hit_failure / ss->ss_extent_hits) * 100);
1243		}
1244	}
1245
1246	/* history */
1247fprintf(fp, "\n");
1248fprintf(fp, "preload time               %d.%03ds\n", ss->ss_preload_time / 1000, ss->ss_preload_time % 1000);
1249	if (ss->ss_history_time > 0 || ss->ss_history_entries == 0) {
1250fprintf(fp, "history active time        %d.%03ds\n", ss->ss_history_time / 1000, ss->ss_history_time % 1000);
1251		if (ss->ss_history_time > 0) {
1252fprintf(fp, "read/write IO rate         %u/%utps\n", (ss->ss_history_reads * 1000) / ss->ss_history_time, (ss->ss_history_writes * 1000) / ss->ss_history_time);
1253		}
1254	} else {
1255fprintf(fp, "history active time        (still active)\n");
1256	}
1257fprintf(fp, "history entries            %u\n", ss->ss_history_entries);
1258fprintf(fp, "history bytes              %u\n", ss->ss_history_bytes);
1259fprintf(fp, "history mounts             %u\n", ss->ss_history_mounts);
1260fprintf(fp, "unidentifiable mounts      %u\n", ss->ss_history_mount_no_uuid);
1261	if (ss->ss_history_unknown > 0) {
1262fprintf(fp, "history unknown calls      %u\n", ss->ss_history_unknown);
1263fprintf(fp, "history unknown bytes      %u\n", ss->ss_history_unknown_bytes);
1264	}
1265fprintf(fp, "number history recordings  %u\n", ss->ss_history_num_recordings);
1266
1267	/* flags */
1268fprintf(fp, "\n");
1269fprintf(fp, "current flags              0x%x\n", ss->ss_cache_flags);
1270
1271	if (fp == stdout) {
1272		fflush(fp);
1273	} else {
1274		fclose(fp);
1275	}
1276	return(0);
1277}
1278
1279int
1280BC_print_history(char *fname, struct BC_history *hc)
1281{
1282	FILE *fp;
1283	int i;
1284
1285	errno = 0;
1286	if (fname != NULL) {
1287		fp = fopen(fname, "w");
1288	} else {
1289		fp = stdout;
1290	}
1291	if (fp == NULL)
1292		return(errno);
1293
1294	for (i = 0; i < hc->h_nentries; i++) {
1295		fprintf(fp, "%s %-12llu %-8llu %5u%s%s\n",
1296				uuid_string(hc->h_mounts[hc->h_entries[i].he_mount_idx].hm_uuid),
1297				hc->h_entries[i].he_offset, hc->h_entries[i].he_length,
1298				hc->h_entries[i].he_pid,
1299				hc->h_entries[i].he_flags & BC_HE_HIT    ? " hit"    :
1300				hc->h_entries[i].he_flags & BC_HE_WRITE  ? " write"  :
1301				hc->h_entries[i].he_flags & BC_HE_TAG    ? " tag"    : " miss",
1302				hc->h_entries[i].he_flags & BC_HE_SHARED ? " shared" : "");
1303	}
1304
1305	if (fp == stdout) {
1306		fflush(fp);
1307	} else {
1308		fclose(fp);
1309	}
1310	return(0);
1311}
1312
1313int
1314BC_tag_history(void)
1315{
1316	struct BC_command bc;
1317	int error;
1318
1319	bc.bc_magic = BC_MAGIC;
1320	bc.bc_opcode = BC_OP_TAG;
1321	error = sysctlbyname(BC_SYSCTL, NULL, NULL, &bc, sizeof(bc));
1322	if (error != 0) {
1323		warnx("could not insert prefetch tag: %d %s", errno, strerror(errno));
1324		return(ENOENT);
1325	}
1326	return(0);
1327}
1328
1329/*
1330 * Unload the BootCache kext.
1331 */
1332int
1333BC_unload(void)
1334{
1335#if 1
1336	return(0);
1337#else
1338	pid_t	child;
1339	char	*argv[4];
1340	int	result;
1341
1342	child = fork();
1343	switch (child) {
1344	case -1:
1345		/* fork failed, bail with error */
1346		return(errno);
1347	case 0:
1348		/* we are the child, do our work */
1349		argv[0] = BC_KEXTUNLOAD;
1350		argv[1] = "-b";
1351		argv[2] = BC_BUNDLE_ID;
1352		argv[3] = NULL;
1353		result = execve(BC_KEXTUNLOAD, argv, NULL);
1354		exit((result != 0) ? 1 : 0);
1355	default:
1356		/* we are the parent, wait for the child */
1357		waitpid(child, &result, 0);
1358		break;
1359	}
1360	/* EBUSY is not a good error */
1361	return((result != 0) ? EBUSY : 0);
1362#endif
1363}
1364