1/*
2 * Copyright (c) 2003-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/fcntl.h>
32#include <sys/kernel.h>
33#include <sys/malloc.h>
34#include <sys/ubc.h>
35#include <sys/ubc_internal.h>
36#include <sys/vnode.h>
37#include <sys/vnode_internal.h>
38#include <sys/kauth.h>
39
40#include <hfs/hfs.h>
41#include <hfs/hfs_endian.h>
42#include <hfs/hfs_format.h>
43#include <hfs/hfs_mount.h>
44#include <hfs/hfs_hotfiles.h>
45
46#include "hfscommon/headers/BTreeScanner.h"
47
48
49#define HFC_DEBUG  0
50#define HFC_VERBOSE 0
51
52
53/*
54 * Minimum post Tiger base time.
55 * Thu Mar 31 17:00:00 2005
56 */
57#define HFC_MIN_BASE_TIME   0x424c8f00L
58
59/*
60 * Hot File List (runtime).
61 */
62typedef struct hotfileinfo {
63	u_int32_t  hf_fileid;
64	u_int32_t  hf_temperature;
65	u_int32_t  hf_blocks;
66} hotfileinfo_t;
67
68typedef struct hotfilelist {
69	u_int32_t     hfl_magic;
70	u_int32_t     hfl_version;
71	time_t        hfl_duration;    /* duration of sample period */
72	int           hfl_count;       /* count of hot files recorded */
73	int           hfl_next;        /* next file to move */
74	int           hfl_totalblocks; /* total hot file blocks */
75	int           hfl_reclaimblks; /* blocks to reclaim in HFV */
76	u_int32_t     hfl_spare[2];
77	hotfileinfo_t hfl_hotfile[1];  /* array of hot files */
78} hotfilelist_t;
79
80
81/*
82 * Hot File Entry (runtime).
83 */
84typedef struct hotfile_entry {
85	struct  hotfile_entry  *left;
86	struct  hotfile_entry  *right;
87	u_int32_t  fileid;
88	u_int32_t  temperature;
89	u_int32_t  blocks;
90} hotfile_entry_t;
91
92/*
93 * Hot File Recording Data (runtime).
94 */
95typedef struct hotfile_data {
96	struct hfsmount *hfsmp;
97	long             refcount;
98	int		 activefiles;  /* active number of hot files */
99	u_int32_t	 threshold;
100	u_int32_t	 maxblocks;
101	hotfile_entry_t	*rootentry;
102	hotfile_entry_t	*freelist;
103	hotfile_entry_t	*coldest;
104	hotfile_entry_t	 entries[1];
105} hotfile_data_t;
106
107static int  hfs_recording_start (struct hfsmount *);
108static int  hfs_recording_stop (struct hfsmount *);
109
110
111/*
112 * Hot File Data recording functions (in-memory binary tree).
113 */
114static void              hf_insert (hotfile_data_t *, hotfile_entry_t *);
115static void              hf_delete (hotfile_data_t *, u_int32_t, u_int32_t);
116static hotfile_entry_t * hf_coldest (hotfile_data_t *);
117static hotfile_entry_t * hf_getnewentry (hotfile_data_t *);
118static void              hf_getsortedlist (hotfile_data_t *, hotfilelist_t *);
119
120#if HFC_DEBUG
121static hotfile_entry_t * hf_lookup (hotfile_data_t *, u_int32_t, u_int32_t);
122static void  hf_maxdepth(hotfile_entry_t *, int, int *);
123static void  hf_printtree (hotfile_entry_t *);
124#endif
125
126/*
127 * Hot File misc support functions.
128 */
129static int  hotfiles_collect (struct hfsmount *);
130static int  hotfiles_age (struct hfsmount *);
131static int  hotfiles_adopt (struct hfsmount *);
132static int  hotfiles_evict (struct hfsmount *, vfs_context_t);
133static int  hotfiles_refine (struct hfsmount *);
134static int  hotextents(struct hfsmount *, HFSPlusExtentDescriptor *);
135static int  hfs_addhotfile_internal(struct vnode *);
136
137
138/*
139 * Hot File Cluster B-tree (on disk) functions.
140 */
141static int  hfc_btree_create (struct hfsmount *, unsigned int, unsigned int);
142static int  hfc_btree_open (struct hfsmount *, struct vnode **);
143static int  hfc_btree_close (struct hfsmount *, struct vnode *);
144static int  hfc_comparekeys (HotFileKey *, HotFileKey *);
145
146
147char hfc_tag[] = "CLUSTERED HOT FILES B-TREE     ";
148
149
150/*
151 *========================================================================
152 *                       HOT FILE INTERFACE ROUTINES
153 *========================================================================
154 */
155
156/*
157 * Start recording the hotest files on a file system.
158 *
159 * Requires that the hfc_mutex be held.
160 */
161static int
162hfs_recording_start(struct hfsmount *hfsmp)
163{
164	hotfile_data_t *hotdata;
165	struct timeval tv;
166	int maxentries;
167	size_t size;
168	int i;
169	int error;
170
171	if ((hfsmp->hfs_flags & HFS_READ_ONLY) ||
172	    (hfsmp->jnl == NULL) ||
173	    (hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0) {
174		return (EPERM);
175	}
176	if (HFSTOVCB(hfsmp)->freeBlocks < (2 * (u_int32_t)hfsmp->hfs_hotfile_maxblks)) {
177		return (ENOSPC);
178	}
179	if (hfsmp->hfc_stage != HFC_IDLE) {
180		return (EBUSY);
181	}
182	hfsmp->hfc_stage = HFC_BUSY;
183
184	/*
185	 * Dump previous recording data.
186	 */
187	if (hfsmp->hfc_recdata) {
188		void * tmp;
189
190		tmp = hfsmp->hfc_recdata;
191		hfsmp->hfc_recdata = NULL;
192		FREE(tmp, M_TEMP);
193	}
194
195	microtime(&tv);  /* Times are base on GMT time. */
196
197	/*
198	 * On first startup check for suspended recording.
199	 */
200	if (hfsmp->hfc_timebase == 0 &&
201	    hfc_btree_open(hfsmp, &hfsmp->hfc_filevp) == 0) {
202		HotFilesInfo hotfileinfo;
203
204		if ((BTGetUserData(VTOF(hfsmp->hfc_filevp), &hotfileinfo,
205		                   sizeof(hotfileinfo)) == 0) &&
206		    (SWAP_BE32 (hotfileinfo.magic) == HFC_MAGIC) &&
207		    (SWAP_BE32 (hotfileinfo.timeleft) > 0) &&
208		    (SWAP_BE32 (hotfileinfo.timebase) > 0)) {
209			hfsmp->hfc_maxfiles = SWAP_BE32 (hotfileinfo.maxfilecnt);
210			hfsmp->hfc_timebase = SWAP_BE32 (hotfileinfo.timebase);
211			hfsmp->hfc_timeout = SWAP_BE32 (hotfileinfo.timeleft) + tv.tv_sec ;
212			/* Fix up any bogus timebase values. */
213			if (hfsmp->hfc_timebase < HFC_MIN_BASE_TIME) {
214				hfsmp->hfc_timebase = hfsmp->hfc_timeout - HFC_DEFAULT_DURATION;
215			}
216#if HFC_VERBOSE
217			printf("hfs: Resume recording hot files on %s (%d secs left)\n",
218				hfsmp->vcbVN, SWAP_BE32 (hotfileinfo.timeleft));
219#endif
220		} else {
221			hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
222			hfsmp->hfc_timebase = tv.tv_sec + 1;
223			hfsmp->hfc_timeout = hfsmp->hfc_timebase + HFC_DEFAULT_DURATION;
224		}
225		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
226		hfsmp->hfc_filevp = NULL;
227	} else {
228		struct cat_attr cattr;
229		u_int32_t cnid;
230
231		/*
232		 * Make sure a btree file exists.
233		 */
234		cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, HFC_FILENAME, &cattr, NULL);
235		if ((cnid == 0) &&
236		    !S_ISREG(cattr.ca_mode) &&
237		    (error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT))) {
238			hfsmp->hfc_stage = HFC_IDLE;
239			wakeup((caddr_t)&hfsmp->hfc_stage);
240			return (error);
241		}
242#if HFC_VERBOSE
243		printf("hfs: begin recording hot files on %s\n", hfsmp->vcbVN);
244#endif
245		hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
246		hfsmp->hfc_timeout = tv.tv_sec + HFC_DEFAULT_DURATION;
247
248		/* Reset time base.  */
249		if (hfsmp->hfc_timebase == 0) {
250			hfsmp->hfc_timebase = tv.tv_sec + 1;
251		} else {
252			time_t cumulativebase;
253
254			cumulativebase = hfsmp->hfc_timeout - (HFC_CUMULATIVE_CYCLES * HFC_DEFAULT_DURATION);
255			hfsmp->hfc_timebase = MAX(hfsmp->hfc_timebase, cumulativebase);
256		}
257	}
258
259	if ((hfsmp->hfc_maxfiles == 0) ||
260	    (hfsmp->hfc_maxfiles > HFC_MAXIMUM_FILE_COUNT)) {
261		hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
262	}
263	maxentries = hfsmp->hfc_maxfiles;
264
265	size = sizeof(hotfile_data_t) + (maxentries * sizeof(hotfile_entry_t));
266	MALLOC(hotdata, hotfile_data_t *, size, M_TEMP, M_WAITOK);
267	if (hotdata == NULL) {
268		hfsmp->hfc_recdata = NULL;
269		hfsmp->hfc_stage = HFC_IDLE;
270		wakeup((caddr_t)&hfsmp->hfc_stage);
271		return(ENOMEM);
272	}
273
274	bzero(hotdata, size);
275
276	for (i = 1; i < maxentries ; i++)
277		hotdata->entries[i-1].right = &hotdata->entries[i];
278
279	hotdata->freelist = &hotdata->entries[0];
280	/*
281	 * Establish minimum temperature and maximum file size.
282	 */
283	hotdata->threshold = HFC_MINIMUM_TEMPERATURE;
284	hotdata->maxblocks = HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize;
285	hotdata->hfsmp = hfsmp;
286
287	hfsmp->hfc_recdata = hotdata;
288	hfsmp->hfc_stage = HFC_RECORDING;
289	wakeup((caddr_t)&hfsmp->hfc_stage);
290	return (0);
291}
292
293/*
294 * Stop recording the hotest files on a file system.
295 *
296 * Requires that the hfc_mutex be held.
297 */
298static int
299hfs_recording_stop(struct hfsmount *hfsmp)
300{
301	hotfile_data_t *hotdata;
302	hotfilelist_t  *listp;
303	struct timeval tv;
304	size_t  size;
305	enum hfc_stage newstage = HFC_IDLE;
306	int  error;
307
308	if (hfsmp->hfc_stage != HFC_RECORDING)
309		return (EPERM);
310
311	hfsmp->hfc_stage = HFC_BUSY;
312
313	hotfiles_collect(hfsmp);
314
315
316	/*
317	 * Convert hot file data into a simple file id list....
318	 *
319	 * then dump the sample data
320	 */
321#if HFC_VERBOSE
322	printf("hfs: end of hot file recording on %s\n", hfsmp->vcbVN);
323#endif
324	hotdata = (hotfile_data_t *)hfsmp->hfc_recdata;
325	if (hotdata == NULL)
326		return (0);
327	hfsmp->hfc_recdata = NULL;
328	hfsmp->hfc_stage = HFC_EVALUATION;
329	wakeup((caddr_t)&hfsmp->hfc_stage);
330
331#if HFC_VERBOSE
332	printf("hfs:   curentries: %d\n", hotdata->activefiles);
333#endif
334	/*
335	 * If no hot files recorded then we're done.
336	 */
337	if (hotdata->rootentry == NULL) {
338		error = 0;
339		goto out;
340	}
341
342	/* Open the B-tree file for writing... */
343	if (hfsmp->hfc_filevp)
344		panic("hfs_recording_stop: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
345
346	error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
347	if (error) {
348		goto out;
349	}
350
351	/*
352	 * Age the previous set of clustered hot files.
353	 */
354	error = hotfiles_age(hfsmp);
355	if (error) {
356		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
357		hfsmp->hfc_filevp = NULL;
358		goto out;
359	}
360
361	/*
362	 * Create a sorted list of hotest files.
363	 */
364	size = sizeof(hotfilelist_t);
365	size += sizeof(hotfileinfo_t) * (hotdata->activefiles - 1);
366	MALLOC(listp, hotfilelist_t *, size, M_TEMP, M_WAITOK);
367	if (listp == NULL) {
368		error = ENOMEM;
369		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
370		hfsmp->hfc_filevp = NULL;
371		goto out;
372	}
373
374	bzero(listp, size);
375
376	hf_getsortedlist(hotdata, listp);	/* NOTE: destroys hot file tree! */
377	microtime(&tv);
378	listp->hfl_duration = tv.tv_sec - hfsmp->hfc_timebase;
379	hfsmp->hfc_recdata = listp;
380
381	/*
382	 * Account for duplicates.
383	 */
384	error = hotfiles_refine(hfsmp);
385	if (error) {
386		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
387		hfsmp->hfc_filevp = NULL;
388		goto out;
389	}
390
391	/*
392	 * Compute the amount of space to reclaim...
393	 */
394	if (listp->hfl_totalblocks > hfsmp->hfs_hotfile_freeblks) {
395		listp->hfl_reclaimblks =
396			MIN(listp->hfl_totalblocks, hfsmp->hfs_hotfile_maxblks) -
397			hfsmp->hfs_hotfile_freeblks;
398#if HFC_VERBOSE
399		printf("hfs_recording_stop: need to reclaim %d blocks\n", listp->hfl_reclaimblks);
400#endif
401		if (listp->hfl_reclaimblks)
402			newstage = HFC_EVICTION;
403		else
404			newstage = HFC_ADOPTION;
405	} else {
406		newstage = HFC_ADOPTION;
407	}
408
409	if (newstage == HFC_ADOPTION && listp->hfl_totalblocks == 0) {
410		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
411		hfsmp->hfc_filevp = NULL;
412		newstage = HFC_IDLE;
413	}
414out:
415#if HFC_VERBOSE
416	if (newstage == HFC_EVICTION)
417		printf("hfs: evicting coldest files\n");
418	else if (newstage == HFC_ADOPTION)
419		printf("hfs: adopting hotest files\n");
420#endif
421	FREE(hotdata, M_TEMP);
422
423	hfsmp->hfc_stage = newstage;
424	wakeup((caddr_t)&hfsmp->hfc_stage);
425	return (error);
426}
427
428/*
429 * Suspend recording the hotest files on a file system.
430 */
431int
432hfs_recording_suspend(struct hfsmount *hfsmp)
433{
434	HotFilesInfo hotfileinfo;
435	hotfile_data_t *hotdata = NULL;
436	struct timeval tv;
437	int  error;
438
439	if (hfsmp->hfc_stage == HFC_DISABLED)
440		return (0);
441
442	lck_mtx_lock(&hfsmp->hfc_mutex);
443
444	/*
445	 * XXX NOTE
446	 * A suspend can occur during eval/evict/adopt stage.
447	 * In that case we would need to write out info and
448	 * flush our HFBT vnode. Currently we just bail.
449	 */
450
451	hotdata = (hotfile_data_t *)hfsmp->hfc_recdata;
452	if (hotdata == NULL || hfsmp->hfc_stage != HFC_RECORDING) {
453		error = 0;
454		goto out;
455	}
456	hfsmp->hfc_stage = HFC_BUSY;
457
458#if HFC_VERBOSE
459	printf("hfs: suspend hot file recording on %s\n", hfsmp->vcbVN);
460#endif
461	error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
462	if (error) {
463		printf("hfs_recording_suspend: err %d opening btree\n", error);
464		goto out;
465	}
466
467	if (hfs_start_transaction(hfsmp) != 0) {
468	    error = EINVAL;
469	    goto out;
470	}
471	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
472		error = EPERM;
473		goto end_transaction;
474	}
475
476	microtime(&tv);
477	hotfileinfo.magic       = SWAP_BE32 (HFC_MAGIC);
478	hotfileinfo.version     = SWAP_BE32 (HFC_VERSION);
479	hotfileinfo.duration    = SWAP_BE32 (HFC_DEFAULT_DURATION);
480	hotfileinfo.timebase    = SWAP_BE32 (hfsmp->hfc_timebase);
481	hotfileinfo.timeleft    = SWAP_BE32 (hfsmp->hfc_timeout - tv.tv_sec);
482	hotfileinfo.threshold   = SWAP_BE32 (hotdata->threshold);
483	hotfileinfo.maxfileblks = SWAP_BE32 (hotdata->maxblocks);
484	hotfileinfo.maxfilecnt  = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT);
485	strlcpy((char *)hotfileinfo.tag, hfc_tag, sizeof hotfileinfo.tag);
486	(void) BTSetUserData(VTOF(hfsmp->hfc_filevp), &hotfileinfo, sizeof(hotfileinfo));
487
488	hfs_unlock(VTOC(hfsmp->hfc_filevp));
489
490end_transaction:
491	hfs_end_transaction(hfsmp);
492
493out:
494	if (hfsmp->hfc_filevp) {
495		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
496		hfsmp->hfc_filevp = NULL;
497	}
498	if (hotdata) {
499		FREE(hotdata, M_TEMP);
500		hfsmp->hfc_recdata = NULL;
501	}
502	hfsmp->hfc_stage = HFC_DISABLED;
503	wakeup((caddr_t)&hfsmp->hfc_stage);
504
505	lck_mtx_unlock(&hfsmp->hfc_mutex);
506	return (error);
507}
508
509
510/*
511 *
512 */
513int
514hfs_recording_init(struct hfsmount *hfsmp)
515{
516	CatalogKey * keyp;
517	CatalogRecord * datap;
518	u_int32_t  dataSize;
519	HFSPlusCatalogFile *filep;
520	BTScanState scanstate;
521	BTreeIterator * iterator = NULL;
522	FSBufferDescriptor  record;
523	HotFileKey * key;
524	filefork_t * filefork;
525	u_int32_t  data;
526	struct cat_attr cattr;
527	u_int32_t  cnid;
528	int error = 0;
529
530	int inserted = 0;  /* debug variables */
531	int filecount = 0;
532
533	/*
534	 * For now, only the boot volume is supported.
535	 */
536	if ((vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) == 0) {
537		hfsmp->hfc_stage = HFC_DISABLED;
538		return (EPERM);
539	}
540
541	/*
542	 * Tracking of hot files requires up-to-date access times.
543	 * So if access time updates are disabled, then we disable
544	 * hot files, too.
545	 */
546	if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_NOATIME) {
547		hfsmp->hfc_stage = HFC_DISABLED;
548		return EPERM;
549	}
550
551	/*
552	 * If the Hot File btree exists then metadata zone is ready.
553	 */
554	cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, HFC_FILENAME, &cattr, NULL);
555	if (cnid != 0 && S_ISREG(cattr.ca_mode)) {
556		if (hfsmp->hfc_stage == HFC_DISABLED)
557			hfsmp->hfc_stage = HFC_IDLE;
558		return (0);
559	}
560
561	if (hfs_start_transaction(hfsmp) != 0) {
562		return EINVAL;
563	}
564
565	error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT);
566	if (error) {
567#if HFC_VERBOSE
568		printf("hfs: Error %d creating hot file b-tree on %s \n", error, hfsmp->vcbVN);
569#endif
570		goto out2;
571	}
572	/*
573	 * Open the Hot File B-tree file for writing.
574	 */
575	if (hfsmp->hfc_filevp)
576		panic("hfs_recording_init: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
577	error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
578	if (error) {
579#if HFC_VERBOSE
580		printf("hfs: Error %d opening hot file b-tree on %s \n", error, hfsmp->vcbVN);
581#endif
582		goto out2;
583	}
584	MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
585	if (iterator == NULL) {
586		error = ENOMEM;
587		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
588		hfsmp->hfc_filevp = NULL;
589		goto out2;
590	}
591	bzero(iterator, sizeof(*iterator));
592	key = (HotFileKey*) &iterator->key;
593	key->keyLength = HFC_KEYLENGTH;
594
595	record.bufferAddress = &data;
596	record.itemSize = sizeof(u_int32_t);
597	record.itemCount = 1;
598#if HFC_VERBOSE
599	printf("hfs: Evaluating space for \"%s\" metadata zone...\n", HFSTOVCB(hfsmp)->vcbVN);
600#endif
601	/*
602	 * Get ready to scan the Catalog file.
603	 */
604	error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0,
605	                       kCatSearchBufferSize, &scanstate);
606	if (error) {
607		printf("hfs_recording_init: err %d BTScanInit\n", error);
608		goto out2;
609	}
610
611	/*
612	 * The writes to Hot File B-tree file are journaled.
613	 */
614	if (hfs_start_transaction(hfsmp) != 0) {
615	    error = EINVAL;
616	    goto out1;
617	}
618	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
619		error = EPERM;
620		goto out0;
621	}
622	filefork = VTOF(hfsmp->hfc_filevp);
623
624	/*
625	 * Visit all the catalog btree leaf records.
626	 */
627	for (;;) {
628		error = BTScanNextRecord(&scanstate, 0, (void **)&keyp, (void **)&datap, &dataSize);
629		if (error) {
630			if (error == btNotFound)
631				error = 0;
632			else
633				printf("hfs_recording_init: err %d BTScanNext\n", error);
634			break;
635		}
636		if ((datap->recordType != kHFSPlusFileRecord) ||
637		    (dataSize != sizeof(HFSPlusCatalogFile))) {
638			continue;
639		}
640		filep = (HFSPlusCatalogFile *)datap;
641		filecount++;
642		if (filep->dataFork.totalBlocks == 0) {
643			continue;
644		}
645		/*
646		 * Any file that has blocks inside the hot file
647		 * space is recorded for later eviction.
648		 *
649		 * For now, resource forks are ignored.
650		 */
651		if (!hotextents(hfsmp, &filep->dataFork.extents[0])) {
652			continue;
653		}
654		cnid = filep->fileID;
655
656		/* Skip over journal files. */
657		if (cnid == hfsmp->hfs_jnlfileid || cnid == hfsmp->hfs_jnlinfoblkid) {
658			continue;
659		}
660		/*
661		 * XXX - need to skip quota files as well.
662		 */
663
664		/* Insert a hot file entry. */
665		key->keyLength   = HFC_KEYLENGTH;
666		key->temperature = HFC_MINIMUM_TEMPERATURE;
667		key->fileID      = cnid;
668		key->forkType    = 0;
669		data = 0x3f3f3f3f;
670		error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
671		if (error) {
672			printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
673			error = MacToVFSError(error);
674			break;
675		}
676
677		/* Insert the corresponding thread record. */
678		key->keyLength = HFC_KEYLENGTH;
679		key->temperature = HFC_LOOKUPTAG;
680		key->fileID = cnid;
681		key->forkType = 0;
682		data = HFC_MINIMUM_TEMPERATURE;
683		error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
684		if (error) {
685			printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
686			error = MacToVFSError(error);
687			break;
688		}
689		inserted++;
690	}
691	(void) BTFlushPath(filefork);
692	hfs_unlock(VTOC(hfsmp->hfc_filevp));
693
694out0:
695	hfs_end_transaction(hfsmp);
696#if HFC_VERBOSE
697	printf("hfs: %d files identified out of %d\n", inserted, filecount);
698#endif
699
700out1:
701	(void) BTScanTerminate(&scanstate, &data, &data, &data);
702out2:
703	hfs_end_transaction(hfsmp);
704	if (iterator)
705		FREE(iterator, M_TEMP);
706	if (hfsmp->hfc_filevp) {
707		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
708		hfsmp->hfc_filevp = NULL;
709	}
710	if (error == 0)
711		hfsmp->hfc_stage = HFC_IDLE;
712
713	return (error);
714}
715
716/*
717 * Use sync to perform ocassional background work.
718 */
719int
720hfs_hotfilesync(struct hfsmount *hfsmp, vfs_context_t ctx)
721{
722	if (hfsmp->hfc_stage) {
723		struct timeval tv;
724
725		lck_mtx_lock(&hfsmp->hfc_mutex);
726
727		switch (hfsmp->hfc_stage) {
728		case HFC_IDLE:
729			(void) hfs_recording_start(hfsmp);
730			break;
731
732		case HFC_RECORDING:
733			microtime(&tv);
734			if (tv.tv_sec > hfsmp->hfc_timeout)
735				(void) hfs_recording_stop(hfsmp);
736			break;
737
738		case HFC_EVICTION:
739			(void) hotfiles_evict(hfsmp, ctx);
740			break;
741
742		case HFC_ADOPTION:
743			(void) hotfiles_adopt(hfsmp);
744			break;
745		default:
746			break;
747		}
748
749		lck_mtx_unlock(&hfsmp->hfc_mutex);
750	}
751	return (0);
752}
753
754/*
755 * Add a hot file to the recording list.
756 *
757 * This can happen when a hot file gets reclaimed or at the
758 * end of the recording period for any active hot file.
759 *
760 * NOTE: Since both the data and resource fork can  be hot,
761 * there can be two entries for the same file id.
762 *
763 * Note: the cnode is locked on entry.
764 */
765int
766hfs_addhotfile(struct vnode *vp)
767{
768	hfsmount_t *hfsmp;
769	int error;
770
771	hfsmp = VTOHFS(vp);
772	if (hfsmp->hfc_stage != HFC_RECORDING)
773		return (0);
774
775	lck_mtx_lock(&hfsmp->hfc_mutex);
776	error = hfs_addhotfile_internal(vp);
777	lck_mtx_unlock(&hfsmp->hfc_mutex);
778	return (error);
779}
780
781static int
782hfs_addhotfile_internal(struct vnode *vp)
783{
784	hotfile_data_t *hotdata;
785	hotfile_entry_t *entry;
786	hfsmount_t *hfsmp;
787	cnode_t *cp;
788	filefork_t *ffp;
789	u_int32_t temperature;
790
791	hfsmp = VTOHFS(vp);
792	if (hfsmp->hfc_stage != HFC_RECORDING)
793		return (0);
794
795	/*
796	 * Only regular files are eligible for hotfiles addition.
797	 *
798	 * Symlinks were previously added to the list and may exist in
799	 * extant hotfiles regions, but no new ones will be added, and no
800	 * symlinks will now be relocated/evicted from the hotfiles region.
801	 */
802	if (!vnode_isreg(vp) || vnode_issystem(vp)) {
803		return (0);
804	}
805
806	/* Skip resource forks for now. */
807	if (VNODE_IS_RSRC(vp)) {
808		return (0);
809	}
810	if ((hotdata = (hotfile_data_t *)hfsmp->hfc_recdata) == NULL) {
811		return (0);
812	}
813	ffp = VTOF(vp);
814	cp = VTOC(vp);
815
816	if ((ffp->ff_bytesread == 0) ||
817	    (ffp->ff_blocks == 0) ||
818	    (ffp->ff_size == 0) ||
819	    (ffp->ff_blocks > hotdata->maxblocks) ||
820	    (cp->c_flag & (C_DELETED | C_NOEXISTS)) ||
821	    (cp->c_bsdflags & UF_NODUMP) ||
822	    (cp->c_atime < hfsmp->hfc_timebase)) {
823		return (0);
824	}
825
826	temperature = ffp->ff_bytesread / ffp->ff_size;
827	if (temperature < hotdata->threshold) {
828		return (0);
829	}
830	/*
831	 * If there is room or this file is hotter than
832	 * the coldest one then add it to the list.
833	 *
834	 */
835	if ((hotdata->activefiles < hfsmp->hfc_maxfiles) ||
836	    (hotdata->coldest == NULL) ||
837	    (temperature > hotdata->coldest->temperature)) {
838		++hotdata->refcount;
839		entry = hf_getnewentry(hotdata);
840		entry->temperature = temperature;
841		entry->fileid = cp->c_fileid;
842		entry->blocks = ffp->ff_blocks;
843		hf_insert(hotdata, entry);
844		--hotdata->refcount;
845	}
846
847	return (0);
848}
849
850/*
851 * Remove a hot file from the recording list.
852 *
853 * This can happen when a hot file becomes
854 * an active vnode (active hot files are
855 * not kept in the recording list until the
856 * end of the recording period).
857 *
858 * Note: the cnode is locked on entry.
859 */
860int
861hfs_removehotfile(struct vnode *vp)
862{
863	hotfile_data_t *hotdata;
864	hfsmount_t *hfsmp;
865	cnode_t *cp;
866	filefork_t *ffp;
867	u_int32_t temperature;
868
869	hfsmp = VTOHFS(vp);
870	if (hfsmp->hfc_stage != HFC_RECORDING)
871		return (0);
872
873	if ((!vnode_isreg(vp)) || vnode_issystem(vp)) {
874		return (0);
875	}
876
877	ffp = VTOF(vp);
878	cp = VTOC(vp);
879
880	if ((ffp->ff_bytesread == 0) || (ffp->ff_blocks == 0) ||
881	    (ffp->ff_size == 0) || (cp->c_atime < hfsmp->hfc_timebase)) {
882		return (0);
883	}
884
885	lck_mtx_lock(&hfsmp->hfc_mutex);
886	if (hfsmp->hfc_stage != HFC_RECORDING)
887		goto out;
888	if ((hotdata = (hotfile_data_t *)hfsmp->hfc_recdata) == NULL)
889		goto out;
890
891	temperature = ffp->ff_bytesread / ffp->ff_size;
892	if (temperature < hotdata->threshold)
893		goto out;
894
895	if (hotdata->coldest && (temperature >= hotdata->coldest->temperature)) {
896		++hotdata->refcount;
897		hf_delete(hotdata, VTOC(vp)->c_fileid, temperature);
898		--hotdata->refcount;
899	}
900out:
901	lck_mtx_unlock(&hfsmp->hfc_mutex);
902	return (0);
903}
904
905
906/*
907 *========================================================================
908 *                     HOT FILE MAINTENANCE ROUTINES
909 *========================================================================
910 */
911
912static int
913hotfiles_collect_callback(struct vnode *vp, __unused void *cargs)
914{
915        if ((vnode_isreg(vp)) && !vnode_issystem(vp))
916	        (void) hfs_addhotfile_internal(vp);
917
918	return (VNODE_RETURNED);
919}
920
921/*
922 * Add all active hot files to the recording list.
923 */
924static int
925hotfiles_collect(struct hfsmount *hfsmp)
926{
927	struct mount *mp = HFSTOVFS(hfsmp);
928
929	if (vfs_busy(mp, LK_NOWAIT))
930		return (0);
931
932	/*
933	 * hotfiles_collect_callback will be called for each vnode
934	 * hung off of this mount point
935	 * the vnode will be
936	 * properly referenced and unreferenced around the callback
937	 */
938	vnode_iterate(mp, 0, hotfiles_collect_callback, (void *)NULL);
939
940	vfs_unbusy(mp);
941
942	return (0);
943}
944
945
946/*
947 * Update the data of a btree record
948 * This is called from within BTUpdateRecord.
949 */
950static int
951update_callback(const HotFileKey *key, u_int32_t *data, u_int32_t *state)
952{
953	if (key->temperature == HFC_LOOKUPTAG)
954		*data = *state;
955	return (0);
956}
957
958/*
959 * Identify files already in hot area.
960 */
961static int
962hotfiles_refine(struct hfsmount *hfsmp)
963{
964	BTreeIterator * iterator = NULL;
965	struct mount *mp;
966	filefork_t * filefork;
967	hotfilelist_t  *listp;
968	FSBufferDescriptor  record;
969	HotFileKey * key;
970	u_int32_t  data;
971	int  i;
972	int  error = 0;
973
974
975	if ((listp = (hotfilelist_t  *)hfsmp->hfc_recdata) == NULL)
976		return (0);
977
978	mp = HFSTOVFS(hfsmp);
979
980	MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
981	if (iterator == NULL) {
982		error = ENOMEM;
983		goto out;
984	}
985	bzero(iterator, sizeof(*iterator));
986	key = (HotFileKey*) &iterator->key;
987
988	record.bufferAddress = &data;
989	record.itemSize = sizeof(u_int32_t);
990	record.itemCount = 1;
991
992	if (hfs_start_transaction(hfsmp) != 0) {
993	    error = EINVAL;
994	    goto out;
995	}
996	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
997		error = EPERM;
998		goto out1;
999	}
1000	filefork = VTOF(hfsmp->hfc_filevp);
1001
1002	for (i = 0; i < listp->hfl_count; ++i) {
1003		/*
1004		 * Check if entry (thread) is already in hot area.
1005		 */
1006		key->keyLength = HFC_KEYLENGTH;
1007		key->temperature = HFC_LOOKUPTAG;
1008		key->fileID = listp->hfl_hotfile[i].hf_fileid;
1009		key->forkType = 0;
1010		(void) BTInvalidateHint(iterator);
1011		if (BTSearchRecord(filefork, iterator, &record, NULL, iterator) != 0) {
1012			continue;  /* not in hot area, so skip */
1013		}
1014
1015		/*
1016		 * Update thread entry with latest temperature.
1017		 */
1018		error = BTUpdateRecord(filefork, iterator,
1019				(IterateCallBackProcPtr)update_callback,
1020				&listp->hfl_hotfile[i].hf_temperature);
1021		if (error) {
1022			printf("hfs: hotfiles_refine: BTUpdateRecord failed %d (file %d)\n", error, key->fileID);
1023			error = MacToVFSError(error);
1024		//	break;
1025		}
1026		/*
1027		 * Re-key entry with latest temperature.
1028		 */
1029		key->keyLength = HFC_KEYLENGTH;
1030		key->temperature = data;
1031		key->fileID = listp->hfl_hotfile[i].hf_fileid;
1032		key->forkType = 0;
1033		/* Pick up record data. */
1034		(void) BTInvalidateHint(iterator);
1035		(void) BTSearchRecord(filefork, iterator, &record, NULL, iterator);
1036		error = BTDeleteRecord(filefork, iterator);
1037		if (error) {
1038			printf("hfs: hotfiles_refine: BTDeleteRecord failed %d (file %d)\n", error, key->fileID);
1039			error = MacToVFSError(error);
1040			break;
1041		}
1042		key->keyLength = HFC_KEYLENGTH;
1043		key->temperature = listp->hfl_hotfile[i].hf_temperature;
1044		key->fileID = listp->hfl_hotfile[i].hf_fileid;
1045		key->forkType = 0;
1046		error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
1047		if (error) {
1048			printf("hfs: hotfiles_refine: BTInsertRecord failed %d (file %d)\n", error, key->fileID);
1049			error = MacToVFSError(error);
1050			break;
1051		}
1052
1053		/*
1054		 * Invalidate this entry in the list.
1055		 */
1056		listp->hfl_hotfile[i].hf_temperature = 0;
1057		listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
1058
1059	} /* end for */
1060
1061	(void) BTFlushPath(filefork);
1062	hfs_unlock(VTOC(hfsmp->hfc_filevp));
1063
1064out1:
1065	hfs_end_transaction(hfsmp);
1066out:
1067	if (iterator)
1068		FREE(iterator, M_TEMP);
1069	return (error);
1070}
1071
1072/*
1073 * Move new hot files into hot area.
1074 *
1075 * Requires that the hfc_mutex be held.
1076 */
1077static int
1078hotfiles_adopt(struct hfsmount *hfsmp)
1079{
1080	BTreeIterator * iterator = NULL;
1081	struct vnode *vp;
1082	filefork_t * filefork;
1083	hotfilelist_t  *listp;
1084	FSBufferDescriptor  record;
1085	HotFileKey * key;
1086	u_int32_t  data;
1087	enum hfc_stage stage;
1088	int  fileblocks;
1089	int  blksmoved;
1090	int  i;
1091	int  last;
1092	int  error = 0;
1093	int  startedtrans = 0;
1094
1095	if ((listp = (hotfilelist_t  *)hfsmp->hfc_recdata) == NULL)
1096		return (0);
1097
1098	if (hfsmp->hfc_stage != HFC_ADOPTION) {
1099		return (EBUSY);
1100	}
1101	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
1102		return (EPERM);
1103	}
1104
1105	MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
1106	if (iterator == NULL) {
1107		hfs_unlock(VTOC(hfsmp->hfc_filevp));
1108		return (ENOMEM);
1109	}
1110
1111	stage = hfsmp->hfc_stage;
1112	hfsmp->hfc_stage = HFC_BUSY;
1113
1114	blksmoved = 0;
1115	last = listp->hfl_next + HFC_FILESPERSYNC;
1116	if (last > listp->hfl_count)
1117		last = listp->hfl_count;
1118
1119	bzero(iterator, sizeof(*iterator));
1120	key = (HotFileKey*) &iterator->key;
1121	key->keyLength = HFC_KEYLENGTH;
1122
1123	record.bufferAddress = &data;
1124	record.itemSize = sizeof(u_int32_t);
1125	record.itemCount = 1;
1126
1127	filefork = VTOF(hfsmp->hfc_filevp);
1128
1129	for (i = listp->hfl_next; (i < last) && (blksmoved < HFC_BLKSPERSYNC); ++i) {
1130		/*
1131		 * Skip invalid entries (already in hot area).
1132		 */
1133		if (listp->hfl_hotfile[i].hf_temperature == 0) {
1134				listp->hfl_next++;
1135				continue;
1136		}
1137		/*
1138		 * Acquire a vnode for this file.
1139		 */
1140		error = hfs_vget(hfsmp, listp->hfl_hotfile[i].hf_fileid, &vp, 0, 0);
1141		if (error) {
1142			if (error == ENOENT) {
1143				error = 0;
1144				listp->hfl_next++;
1145				continue;  /* stale entry, go to next */
1146			}
1147			break;
1148		}
1149		if (!vnode_isreg(vp)) {
1150			/* Symlinks are ineligible for adoption into the hotfile zone.  */
1151			printf("hfs: hotfiles_adopt: huh, not a file %d (%d)\n", listp->hfl_hotfile[i].hf_fileid, VTOC(vp)->c_cnid);
1152			hfs_unlock(VTOC(vp));
1153			vnode_put(vp);
1154			listp->hfl_hotfile[i].hf_temperature = 0;
1155			listp->hfl_next++;
1156			continue;  /* stale entry, go to next */
1157		}
1158		if (hotextents(hfsmp, &VTOF(vp)->ff_extents[0])) {
1159			hfs_unlock(VTOC(vp));
1160			vnode_put(vp);
1161			listp->hfl_hotfile[i].hf_temperature = 0;
1162			listp->hfl_next++;
1163			listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
1164			continue;  /* stale entry, go to next */
1165		}
1166		fileblocks = VTOF(vp)->ff_blocks;
1167		if (fileblocks > hfsmp->hfs_hotfile_freeblks) {
1168			hfs_unlock(VTOC(vp));
1169			vnode_put(vp);
1170			listp->hfl_next++;
1171			listp->hfl_totalblocks -= fileblocks;
1172			continue;  /* entry too big, go to next */
1173		}
1174
1175		if ((blksmoved > 0) &&
1176		    (blksmoved + fileblocks) > HFC_BLKSPERSYNC) {
1177			hfs_unlock(VTOC(vp));
1178			vnode_put(vp);
1179			break;  /* adopt this entry the next time around */
1180		}
1181		if (VTOC(vp)->c_desc.cd_nameptr)
1182			data = *(const u_int32_t *)(VTOC(vp)->c_desc.cd_nameptr);
1183		else
1184			data = 0x3f3f3f3f;
1185
1186		error = hfs_relocate(vp, hfsmp->hfs_hotfile_start, kauth_cred_get(), current_proc());
1187		hfs_unlock(VTOC(vp));
1188		vnode_put(vp);
1189		if (error) {
1190			/* Move on to next item. */
1191			listp->hfl_next++;
1192			continue;
1193		}
1194		/* Keep hot file free space current. */
1195		hfsmp->hfs_hotfile_freeblks -= fileblocks;
1196		listp->hfl_totalblocks -= fileblocks;
1197
1198		/* Insert hot file entry */
1199		key->keyLength   = HFC_KEYLENGTH;
1200		key->temperature = listp->hfl_hotfile[i].hf_temperature;
1201		key->fileID      = listp->hfl_hotfile[i].hf_fileid;
1202		key->forkType    = 0;
1203
1204		/* Start a new transaction before calling BTree code. */
1205		if (hfs_start_transaction(hfsmp) != 0) {
1206		    error = EINVAL;
1207		    break;
1208		}
1209		startedtrans = 1;
1210
1211		error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
1212		if (error) {
1213			printf("hfs: hotfiles_adopt: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
1214			error = MacToVFSError(error);
1215			stage = HFC_IDLE;
1216			break;
1217		}
1218
1219		/* Insert thread record */
1220		key->keyLength = HFC_KEYLENGTH;
1221		key->temperature = HFC_LOOKUPTAG;
1222		key->fileID = listp->hfl_hotfile[i].hf_fileid;
1223		key->forkType = 0;
1224		data = listp->hfl_hotfile[i].hf_temperature;
1225		error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
1226		if (error) {
1227			printf("hfs: hotfiles_adopt: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
1228			error = MacToVFSError(error);
1229			stage = HFC_IDLE;
1230			break;
1231		}
1232		(void) BTFlushPath(filefork);
1233
1234		/* Transaction complete. */
1235		if (startedtrans) {
1236		    hfs_end_transaction(hfsmp);
1237		    startedtrans = 0;
1238		}
1239
1240		blksmoved += fileblocks;
1241		listp->hfl_next++;
1242		if (listp->hfl_next >= listp->hfl_count) {
1243			break;
1244		}
1245		if (hfsmp->hfs_hotfile_freeblks <= 0) {
1246#if HFC_VERBOSE
1247			printf("hfs: hotfiles_adopt: free space exhausted (%d)\n", hfsmp->hfs_hotfile_freeblks);
1248#endif
1249			break;
1250		}
1251	} /* end for */
1252
1253#if HFC_VERBOSE
1254	printf("hfs: hotfiles_adopt: [%d] adopted %d blocks (%d left)\n", listp->hfl_next, blksmoved, listp->hfl_totalblocks);
1255#endif
1256	/* Finish any outstanding transactions. */
1257	if (startedtrans) {
1258		(void) BTFlushPath(filefork);
1259		hfs_end_transaction(hfsmp);
1260		startedtrans = 0;
1261	}
1262	hfs_unlock(VTOC(hfsmp->hfc_filevp));
1263
1264	if ((listp->hfl_next >= listp->hfl_count) || (hfsmp->hfs_hotfile_freeblks <= 0)) {
1265#if HFC_VERBOSE
1266		printf("hfs: hotfiles_adopt: all done relocating %d files\n", listp->hfl_count);
1267		printf("hfs: hotfiles_adopt: %d blocks free in hot file band\n", hfsmp->hfs_hotfile_freeblks);
1268#endif
1269		stage = HFC_IDLE;
1270	}
1271	FREE(iterator, M_TEMP);
1272
1273	if (stage != HFC_ADOPTION && hfsmp->hfc_filevp) {
1274		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
1275		hfsmp->hfc_filevp = NULL;
1276	}
1277	hfsmp->hfc_stage = stage;
1278	wakeup((caddr_t)&hfsmp->hfc_stage);
1279	return (error);
1280}
1281
1282/*
1283 * Reclaim space by evicting the coldest files.
1284 *
1285 * Requires that the hfc_mutex be held.
1286 */
1287static int
1288hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx)
1289{
1290	BTreeIterator * iterator = NULL;
1291	struct vnode *vp;
1292	HotFileKey * key;
1293	filefork_t * filefork;
1294	hotfilelist_t  *listp;
1295	enum hfc_stage stage;
1296	u_int32_t savedtemp;
1297	int  blksmoved;
1298	int  filesmoved;
1299	int  fileblocks;
1300	int  error = 0;
1301	int  startedtrans = 0;
1302	int  bt_op;
1303
1304	if (hfsmp->hfc_stage != HFC_EVICTION) {
1305		return (EBUSY);
1306	}
1307
1308	if ((listp = (hotfilelist_t  *)hfsmp->hfc_recdata) == NULL)
1309		return (0);
1310
1311	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
1312		return (EPERM);
1313	}
1314
1315	MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
1316	if (iterator == NULL) {
1317		hfs_unlock(VTOC(hfsmp->hfc_filevp));
1318		return (ENOMEM);
1319	}
1320
1321	stage = hfsmp->hfc_stage;
1322	hfsmp->hfc_stage = HFC_BUSY;
1323
1324	filesmoved = blksmoved = 0;
1325	bt_op = kBTreeFirstRecord;
1326
1327	bzero(iterator, sizeof(*iterator));
1328	key = (HotFileKey*) &iterator->key;
1329
1330	filefork = VTOF(hfsmp->hfc_filevp);
1331
1332	while (listp->hfl_reclaimblks > 0 &&
1333	       blksmoved < HFC_BLKSPERSYNC &&
1334	       filesmoved < HFC_FILESPERSYNC) {
1335
1336		/*
1337		 * Obtain the first record (ie the coldest one).
1338		 */
1339		if (BTIterateRecord(filefork, bt_op, iterator, NULL, NULL) != 0) {
1340#if HFC_VERBOSE
1341			printf("hfs: hotfiles_evict: no more records\n");
1342#endif
1343			error = 0;
1344			stage = HFC_ADOPTION;
1345			break;
1346		}
1347		if (key->keyLength != HFC_KEYLENGTH) {
1348			printf("hfs: hotfiles_evict: invalid key length %d\n", key->keyLength);
1349			error = EFTYPE;
1350			break;
1351		}
1352		if (key->temperature == HFC_LOOKUPTAG) {
1353#if HFC_VERBOSE
1354			printf("hfs: hotfiles_evict: ran into thread records\n");
1355#endif
1356			error = 0;
1357			stage = HFC_ADOPTION;
1358			break;
1359		}
1360		/*
1361		 * Aquire the vnode for this file.
1362		 */
1363		error = hfs_vget(hfsmp, key->fileID, &vp, 0, 0);
1364		if (error) {
1365			if (error == ENOENT) {
1366				goto delete;  /* stale entry, go to next */
1367			} else {
1368				printf("hfs: hotfiles_evict: err %d getting file %d\n",
1369				       error, key->fileID);
1370			}
1371			break;
1372		}
1373
1374		/*
1375		 * Symlinks that may have been inserted into the hotfile zone during a previous OS are now stuck
1376		 * here.  We do not want to move them.
1377		 */
1378		if (!vnode_isreg(vp)) {
1379			printf("hfs: hotfiles_evict: huh, not a file %d\n", key->fileID);
1380			hfs_unlock(VTOC(vp));
1381			vnode_put(vp);
1382			goto delete;  /* invalid entry, go to next */
1383		}
1384
1385		fileblocks = VTOF(vp)->ff_blocks;
1386		if ((blksmoved > 0) &&
1387		    (blksmoved + fileblocks) > HFC_BLKSPERSYNC) {
1388			hfs_unlock(VTOC(vp));
1389			vnode_put(vp);
1390			break;
1391		}
1392		/*
1393		 * Make sure file is in the hot area.
1394		 */
1395		if (!hotextents(hfsmp, &VTOF(vp)->ff_extents[0])) {
1396#if HFC_VERBOSE
1397			printf("hfs: hotfiles_evict: file %d isn't hot!\n", key->fileID);
1398#endif
1399			hfs_unlock(VTOC(vp));
1400			vnode_put(vp);
1401			goto delete;  /* stale entry, go to next */
1402		}
1403
1404		/*
1405		 * Relocate file out of hot area.
1406		 */
1407		error = hfs_relocate(vp, HFSTOVCB(hfsmp)->nextAllocation, vfs_context_ucred(ctx), vfs_context_proc(ctx));
1408		if (error) {
1409			printf("hfs: hotfiles_evict: err %d relocating file %d\n", error, key->fileID);
1410			hfs_unlock(VTOC(vp));
1411			vnode_put(vp);
1412			bt_op = kBTreeNextRecord;
1413			goto next;  /* go to next */
1414		}
1415
1416		//
1417		// We do not believe that this call to hfs_fsync() is
1418		// necessary and it causes a journal transaction
1419		// deadlock so we are removing it.
1420		//
1421		// (void) hfs_fsync(vp, MNT_WAIT, 0, p);
1422
1423		hfs_unlock(VTOC(vp));
1424		vnode_put(vp);
1425
1426		hfsmp->hfs_hotfile_freeblks += fileblocks;
1427		listp->hfl_reclaimblks -= fileblocks;
1428		if (listp->hfl_reclaimblks < 0)
1429			listp->hfl_reclaimblks = 0;
1430		blksmoved += fileblocks;
1431		filesmoved++;
1432delete:
1433		/* Start a new transaction before calling BTree code. */
1434		if (hfs_start_transaction(hfsmp) != 0) {
1435		    error = EINVAL;
1436		    break;
1437		}
1438		startedtrans = 1;
1439
1440		error = BTDeleteRecord(filefork, iterator);
1441		if (error) {
1442			error = MacToVFSError(error);
1443			break;
1444		}
1445		savedtemp = key->temperature;
1446		key->temperature = HFC_LOOKUPTAG;
1447		error = BTDeleteRecord(filefork, iterator);
1448		if (error) {
1449			error = MacToVFSError(error);
1450			break;
1451		}
1452		key->temperature = savedtemp;
1453next:
1454		(void) BTFlushPath(filefork);
1455
1456		/* Transaction complete. */
1457		if (startedtrans) {
1458			hfs_end_transaction(hfsmp);
1459			startedtrans = 0;
1460		}
1461
1462	} /* end while */
1463
1464#if HFC_VERBOSE
1465	printf("hfs: hotfiles_evict: moved %d files (%d blks, %d to go)\n", filesmoved, blksmoved, listp->hfl_reclaimblks);
1466#endif
1467	/* Finish any outstanding transactions. */
1468	if (startedtrans) {
1469		(void) BTFlushPath(filefork);
1470		hfs_end_transaction(hfsmp);
1471		startedtrans = 0;
1472	}
1473	hfs_unlock(VTOC(hfsmp->hfc_filevp));
1474
1475	/*
1476	 * Move to next stage when finished.
1477	 */
1478	if (listp->hfl_reclaimblks <= 0) {
1479		stage = HFC_ADOPTION;
1480#if HFC_VERBOSE
1481		printf("hfs: hotfiles_evict: %d blocks free in hot file band\n", hfsmp->hfs_hotfile_freeblks);
1482#endif
1483	}
1484	FREE(iterator, M_TEMP);
1485	hfsmp->hfc_stage = stage;
1486	wakeup((caddr_t)&hfsmp->hfc_stage);
1487	return (error);
1488}
1489
1490/*
1491 * Age the existing records in the hot files b-tree.
1492 */
1493static int
1494hotfiles_age(struct hfsmount *hfsmp)
1495{
1496	BTreeInfoRec  btinfo;
1497	BTreeIterator * iterator = NULL;
1498	BTreeIterator * prev_iterator;
1499	FSBufferDescriptor  record;
1500	FSBufferDescriptor  prev_record;
1501	HotFileKey * key;
1502	HotFileKey * prev_key;
1503	filefork_t * filefork;
1504	u_int32_t  data;
1505	u_int32_t  prev_data;
1506	u_int32_t  newtemp;
1507	int  error;
1508	int  i;
1509	int  numrecs;
1510	int  aged = 0;
1511	u_int16_t  reclen;
1512
1513
1514	MALLOC(iterator, BTreeIterator *, 2 * sizeof(*iterator), M_TEMP, M_WAITOK);
1515	if (iterator == NULL) {
1516		error = ENOMEM;
1517		goto out2;
1518	}
1519	bzero(iterator, 2 * sizeof(*iterator));
1520	key = (HotFileKey*) &iterator->key;
1521
1522	prev_iterator = &iterator[1];
1523	prev_key = (HotFileKey*) &prev_iterator->key;
1524
1525	record.bufferAddress = &data;
1526	record.itemSize = sizeof(data);
1527	record.itemCount = 1;
1528	prev_record.bufferAddress = &prev_data;
1529	prev_record.itemSize = sizeof(prev_data);
1530	prev_record.itemCount = 1;
1531
1532	/*
1533	 * Capture b-tree changes inside a transaction
1534	 */
1535	if (hfs_start_transaction(hfsmp) != 0) {
1536	    error = EINVAL;
1537	    goto out2;
1538	}
1539	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
1540		error = EPERM;
1541		goto out1;
1542	}
1543	filefork = VTOF(hfsmp->hfc_filevp);
1544
1545	error = BTGetInformation(filefork, 0, &btinfo);
1546	if (error) {
1547		error = MacToVFSError(error);
1548		goto out;
1549	}
1550	if (btinfo.numRecords < 2) {
1551		error = 0;
1552		goto out;
1553	}
1554
1555	/* Only want 1st half of leaf records */
1556	numrecs = (btinfo.numRecords /= 2) - 1;
1557
1558	error = BTIterateRecord(filefork, kBTreeFirstRecord, iterator, &record, &reclen);
1559	if (error) {
1560		printf("hfs_agehotfiles: BTIterateRecord: %d\n", error);
1561		error = MacToVFSError(error);
1562		goto out;
1563	}
1564	bcopy(iterator, prev_iterator, sizeof(BTreeIterator));
1565	prev_data = data;
1566
1567	for (i = 0; i < numrecs; ++i) {
1568		error = BTIterateRecord(filefork, kBTreeNextRecord, iterator, &record, &reclen);
1569		if (error == 0) {
1570			if (key->temperature < prev_key->temperature) {
1571				printf("hfs_agehotfiles: out of order keys!\n");
1572				error = EFTYPE;
1573				break;
1574			}
1575			if (reclen != sizeof(data)) {
1576				printf("hfs_agehotfiles: invalid record length %d\n", reclen);
1577				error = EFTYPE;
1578				break;
1579			}
1580			if (key->keyLength != HFC_KEYLENGTH) {
1581				printf("hfs_agehotfiles: invalid key length %d\n", key->keyLength);
1582				error = EFTYPE;
1583				break;
1584			}
1585		} else if ((error == fsBTEndOfIterationErr || error == fsBTRecordNotFoundErr) &&
1586		    (i == (numrecs - 1))) {
1587			error = 0;
1588		} else if (error) {
1589			printf("hfs_agehotfiles: %d of %d BTIterateRecord: %d\n", i, numrecs, error);
1590			error = MacToVFSError(error);
1591			break;
1592		}
1593		if (prev_key->temperature == HFC_LOOKUPTAG) {
1594#if HFC_VERBOSE
1595			printf("hfs_agehotfiles: ran into thread record\n");
1596#endif
1597			error = 0;
1598			break;
1599		}
1600		error = BTDeleteRecord(filefork, prev_iterator);
1601		if (error) {
1602			printf("hfs_agehotfiles: BTDeleteRecord failed %d (file %d)\n", error, prev_key->fileID);
1603			error = MacToVFSError(error);
1604			break;
1605		}
1606
1607		/* Age by halving the temperature (floor = 4) */
1608		newtemp = MAX(prev_key->temperature >> 1, 4);
1609		prev_key->temperature = newtemp;
1610
1611		error = BTInsertRecord(filefork, prev_iterator, &prev_record, prev_record.itemSize);
1612		if (error) {
1613			printf("hfs_agehotfiles: BTInsertRecord failed %d (file %d)\n", error, prev_key->fileID);
1614			error = MacToVFSError(error);
1615			break;
1616		}
1617		++aged;
1618		/*
1619		 * Update thread entry with latest temperature.
1620		 */
1621		prev_key->temperature = HFC_LOOKUPTAG;
1622		error = BTUpdateRecord(filefork, prev_iterator,
1623				(IterateCallBackProcPtr)update_callback,
1624				&newtemp);
1625		if (error) {
1626			printf("hfs_agehotfiles: %d of %d BTUpdateRecord failed %d (file %d, %d)\n",
1627				i, numrecs, error, prev_key->fileID, newtemp);
1628			error = MacToVFSError(error);
1629		//	break;
1630		}
1631
1632		bcopy(iterator, prev_iterator, sizeof(BTreeIterator));
1633		prev_data = data;
1634
1635	} /* end for */
1636
1637#if HFC_VERBOSE
1638	if (error == 0)
1639		printf("hfs_agehotfiles: aged %d records out of %d\n", aged, btinfo.numRecords);
1640#endif
1641	(void) BTFlushPath(filefork);
1642out:
1643	hfs_unlock(VTOC(hfsmp->hfc_filevp));
1644out1:
1645	hfs_end_transaction(hfsmp);
1646out2:
1647	if (iterator)
1648		FREE(iterator, M_TEMP);
1649	return (error);
1650}
1651
1652/*
1653 * Return true if any blocks (or all blocks if all is true)
1654 * are contained in the hot file region.
1655 */
1656static int
1657hotextents(struct hfsmount *hfsmp, HFSPlusExtentDescriptor * extents)
1658{
1659	u_int32_t  b1, b2;
1660	int  i;
1661	int  inside = 0;
1662
1663	for (i = 0; i < kHFSPlusExtentDensity; ++i) {
1664		b1 = extents[i].startBlock;
1665		if (b1 == 0)
1666			break;
1667		b2 = b1 + extents[i].blockCount - 1;
1668		if ((b1 >= hfsmp->hfs_hotfile_start &&
1669		     b2 <= hfsmp->hfs_hotfile_end) ||
1670		    (b1 < hfsmp->hfs_hotfile_end &&
1671		     b2 > hfsmp->hfs_hotfile_end)) {
1672			inside = 1;
1673			break;
1674		}
1675	}
1676	return (inside);
1677}
1678
1679
1680/*
1681 *========================================================================
1682 *                       HOT FILE B-TREE ROUTINES
1683 *========================================================================
1684 */
1685
1686/*
1687 * Open the hot files b-tree for writing.
1688 *
1689 * On successful exit the vnode has a reference but not an iocount.
1690 */
1691static int
1692hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp)
1693{
1694	proc_t p;
1695	struct vnode *vp;
1696	struct cat_desc  cdesc;
1697	struct cat_attr  cattr;
1698	struct cat_fork  cfork;
1699	static char filename[] = HFC_FILENAME;
1700	int  error;
1701	int  retry = 0;
1702	int lockflags;
1703	int newvnode_flags = 0;
1704
1705	*vpp = NULL;
1706	p = current_proc();
1707
1708	bzero(&cdesc, sizeof(cdesc));
1709	cdesc.cd_parentcnid = kRootDirID;
1710	cdesc.cd_nameptr = (const u_int8_t *)filename;
1711	cdesc.cd_namelen = strlen(filename);
1712
1713	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1714
1715	error = cat_lookup(hfsmp, &cdesc, 0, 0, &cdesc, &cattr, &cfork, NULL);
1716
1717	hfs_systemfile_unlock(hfsmp, lockflags);
1718
1719	if (error) {
1720		printf("hfs: hfc_btree_open: cat_lookup error %d\n", error);
1721		return (error);
1722	}
1723again:
1724	cdesc.cd_flags |= CD_ISMETA;
1725	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cdesc, 0, &cattr,
1726							&cfork, &vp, &newvnode_flags);
1727	if (error) {
1728		printf("hfs: hfc_btree_open: hfs_getnewvnode error %d\n", error);
1729		cat_releasedesc(&cdesc);
1730		return (error);
1731	}
1732	if (!vnode_issystem(vp)) {
1733#if HFC_VERBOSE
1734		printf("hfs: hfc_btree_open: file has UBC, try again\n");
1735#endif
1736		hfs_unlock(VTOC(vp));
1737		vnode_recycle(vp);
1738		vnode_put(vp);
1739		if (retry++ == 0)
1740			goto again;
1741		else
1742			return (EBUSY);
1743	}
1744
1745	/* Open the B-tree file for writing... */
1746	error = BTOpenPath(VTOF(vp), (KeyCompareProcPtr) hfc_comparekeys);
1747	if (error) {
1748		printf("hfs: hfc_btree_open: BTOpenPath error %d\n", error);
1749		error = MacToVFSError(error);
1750	}
1751
1752	hfs_unlock(VTOC(vp));
1753	if (error == 0) {
1754		*vpp = vp;
1755		vnode_ref(vp);  /* keep a reference while its open */
1756	}
1757	vnode_put(vp);
1758
1759	if (!vnode_issystem(vp))
1760		panic("hfs: hfc_btree_open: not a system file (vp = %p)", vp);
1761
1762	return (error);
1763}
1764
1765/*
1766 * Close the hot files b-tree.
1767 *
1768 * On entry the vnode has a reference.
1769 */
1770static int
1771hfc_btree_close(struct hfsmount *hfsmp, struct vnode *vp)
1772{
1773	proc_t p = current_proc();
1774	int  error = 0;
1775
1776
1777	if (hfsmp->jnl) {
1778	    hfs_journal_flush(hfsmp, FALSE);
1779	}
1780
1781	if (vnode_get(vp) == 0) {
1782		error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1783		if (error == 0) {
1784			(void) hfs_fsync(vp, MNT_WAIT, 0, p);
1785			error = BTClosePath(VTOF(vp));
1786			hfs_unlock(VTOC(vp));
1787		}
1788		vnode_rele(vp);
1789		vnode_recycle(vp);
1790		vnode_put(vp);
1791	}
1792
1793	return (error);
1794}
1795
1796/*
1797 *  Create a hot files btree file.
1798 *
1799 */
1800static int
1801hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int entries)
1802{
1803	struct vnode *dvp = NULL;
1804	struct vnode *vp = NULL;
1805	struct cnode *cp = NULL;
1806	vfs_context_t ctx = vfs_context_current();
1807	struct vnode_attr va;
1808	struct componentname cname;
1809	static char filename[] = HFC_FILENAME;
1810	int  error;
1811
1812	if (hfsmp->hfc_filevp)
1813		panic("hfs: hfc_btree_create: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
1814
1815	error = VFS_ROOT(HFSTOVFS(hfsmp), &dvp, ctx);
1816	if (error) {
1817		return (error);
1818	}
1819	cname.cn_nameiop = CREATE;
1820	cname.cn_flags = ISLASTCN;
1821	cname.cn_context = ctx;
1822	cname.cn_pnbuf = filename;
1823	cname.cn_pnlen = sizeof(filename);
1824	cname.cn_nameptr = filename;
1825	cname.cn_namelen = strlen(filename);
1826	cname.cn_hash = 0;
1827	cname.cn_consume = 0;
1828
1829	VATTR_INIT(&va);
1830	VATTR_SET(&va, va_type, VREG);
1831	VATTR_SET(&va, va_mode, S_IFREG | S_IRUSR | S_IWUSR);
1832	VATTR_SET(&va, va_uid, 0);
1833	VATTR_SET(&va, va_gid, 0);
1834
1835	if (hfs_start_transaction(hfsmp) != 0) {
1836	    error = EINVAL;
1837	    goto out;
1838	}
1839
1840	/* call ourselves directly, ignore the higher-level VFS file creation code */
1841	error = VNOP_CREATE(dvp, &vp, &cname, &va, ctx);
1842	if (error) {
1843		printf("hfs: error %d creating HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
1844		goto out;
1845	}
1846	if (dvp) {
1847		vnode_put(dvp);
1848		dvp = NULL;
1849	}
1850	if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
1851		goto out;
1852	}
1853	cp = VTOC(vp);
1854
1855	/* Don't use non-regular files or files with links. */
1856	if (!vnode_isreg(vp) || cp->c_linkcount != 1) {
1857		error = EFTYPE;
1858		goto out;
1859	}
1860
1861	printf("hfs: created HFBT on %s\n", HFSTOVCB(hfsmp)->vcbVN);
1862
1863	if (VTOF(vp)->ff_size < nodesize) {
1864		caddr_t  buffer;
1865		u_int16_t *index;
1866		u_int16_t  offset;
1867		BTNodeDescriptor  *ndp;
1868		BTHeaderRec  *bthp;
1869		HotFilesInfo *hotfileinfo;
1870		int  nodecnt;
1871		int  filesize;
1872		int  entirespernode;
1873
1874		/*
1875		 * Mark it invisible (truncate will pull these changes).
1876		 */
1877		((FndrFileInfo *)&cp->c_finderinfo[0])->fdFlags |=
1878			SWAP_BE16 (kIsInvisible + kNameLocked);
1879
1880		if (kmem_alloc(kernel_map, (vm_offset_t *)&buffer, nodesize)) {
1881			error = ENOMEM;
1882			goto out;
1883		}
1884		bzero(buffer, nodesize);
1885		index = (u_int16_t *)buffer;
1886
1887		entirespernode = (nodesize - sizeof(BTNodeDescriptor) - 2) /
1888				 (sizeof(HotFileKey) + 6);
1889		nodecnt = 2 + howmany(entries * 2, entirespernode);
1890		nodecnt = roundup(nodecnt, 8);
1891		filesize = nodecnt * nodesize;
1892
1893		/* FILL IN THE NODE DESCRIPTOR:  */
1894		ndp = (BTNodeDescriptor *)buffer;
1895		ndp->kind = kBTHeaderNode;
1896		ndp->numRecords = SWAP_BE16 (3);
1897		offset = sizeof(BTNodeDescriptor);
1898		index[(nodesize / 2) - 1] = SWAP_BE16 (offset);
1899
1900		/* FILL IN THE HEADER RECORD:  */
1901		bthp = (BTHeaderRec *)((u_int8_t *)buffer + offset);
1902		bthp->nodeSize     = SWAP_BE16 (nodesize);
1903		bthp->totalNodes   = SWAP_BE32 (filesize / nodesize);
1904		bthp->freeNodes    = SWAP_BE32 (nodecnt - 1);
1905		bthp->clumpSize    = SWAP_BE32 (filesize);
1906		bthp->btreeType    = kUserBTreeType; /* non-metadata */
1907		bthp->attributes  |= SWAP_BE32 (kBTBigKeysMask);
1908		bthp->maxKeyLength = SWAP_BE16 (HFC_KEYLENGTH);
1909		offset += sizeof(BTHeaderRec);
1910		index[(nodesize / 2) - 2] = SWAP_BE16 (offset);
1911
1912		/* FILL IN THE USER RECORD:  */
1913		hotfileinfo = (HotFilesInfo *)((u_int8_t *)buffer + offset);
1914		hotfileinfo->magic       = SWAP_BE32 (HFC_MAGIC);
1915		hotfileinfo->version     = SWAP_BE32 (HFC_VERSION);
1916		hotfileinfo->duration    = SWAP_BE32 (HFC_DEFAULT_DURATION);
1917		hotfileinfo->timebase    = 0;
1918		hotfileinfo->timeleft    = 0;
1919		hotfileinfo->threshold   = SWAP_BE32 (HFC_MINIMUM_TEMPERATURE);
1920		hotfileinfo->maxfileblks = SWAP_BE32 (HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize);
1921		hotfileinfo->maxfilecnt  = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT);
1922		strlcpy((char *)hotfileinfo->tag, hfc_tag,
1923			sizeof hotfileinfo->tag);
1924		offset += kBTreeHeaderUserBytes;
1925		index[(nodesize / 2) - 3] = SWAP_BE16 (offset);
1926
1927		/* FILL IN THE MAP RECORD (only one node in use). */
1928		*((u_int8_t *)buffer + offset) = 0x80;
1929		offset += nodesize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec)
1930				   - kBTreeHeaderUserBytes - (4 * sizeof(int16_t));
1931		index[(nodesize / 2) - 4] = SWAP_BE16 (offset);
1932
1933		vnode_setnoflush(vp);
1934		error = hfs_truncate(vp, (off_t)filesize, IO_NDELAY, 0, 0, ctx);
1935		if (error) {
1936			printf("hfs: error %d growing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
1937			goto out;
1938		}
1939		cp->c_flag |= C_ZFWANTSYNC;
1940		cp->c_zftimeout = 1;
1941
1942		if (error == 0) {
1943			struct vnop_write_args args;
1944			uio_t auio;
1945
1946			auio = uio_create(1, 0, UIO_SYSSPACE, UIO_WRITE);
1947			uio_addiov(auio, (uintptr_t)buffer, nodesize);
1948
1949			args.a_desc = &vnop_write_desc;
1950			args.a_vp = vp;
1951			args.a_uio = auio;
1952			args.a_ioflag = 0;
1953			args.a_context = ctx;
1954
1955			hfs_unlock(cp);
1956			cp = NULL;
1957
1958			error = hfs_vnop_write(&args);
1959			if (error)
1960				printf("hfs: error %d writing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
1961
1962			uio_free(auio);
1963		}
1964		kmem_free(kernel_map, (vm_offset_t)buffer, nodesize);
1965	}
1966out:
1967	hfs_end_transaction(hfsmp);
1968	if (dvp) {
1969		vnode_put(dvp);
1970	}
1971	if (vp) {
1972		if (cp)
1973			hfs_unlock(cp);
1974		vnode_recycle(vp);
1975		vnode_put(vp);
1976	}
1977	return (error);
1978}
1979
1980/*
1981 * Compare two hot file b-tree keys.
1982 *
1983 * Result:   +n  search key > trial key
1984 *            0  search key = trial key
1985 *           -n  search key < trial key
1986 */
1987static int
1988hfc_comparekeys(HotFileKey *searchKey, HotFileKey *trialKey)
1989{
1990	/*
1991	 * Compared temperatures first.
1992	 */
1993	if (searchKey->temperature == trialKey->temperature) {
1994		/*
1995		 * Temperatures are equal so compare file ids.
1996		 */
1997		if (searchKey->fileID == trialKey->fileID) {
1998			/*
1999			 * File ids are equal so compare fork types.
2000			 */
2001			if (searchKey->forkType == trialKey->forkType) {
2002				return (0);
2003			} else if (searchKey->forkType > trialKey->forkType) {
2004				return (1);
2005			}
2006		} else if (searchKey->fileID > trialKey->fileID) {
2007			return (1);
2008		}
2009	} else if (searchKey->temperature > trialKey->temperature) {
2010		return (1);
2011	}
2012
2013	return (-1);
2014}
2015
2016
2017/*
2018 *========================================================================
2019 *               HOT FILE DATA COLLECTING ROUTINES
2020 *========================================================================
2021 */
2022
2023/*
2024 * Lookup a hot file entry in the tree.
2025 */
2026#if HFC_DEBUG
2027static hotfile_entry_t *
2028hf_lookup(hotfile_data_t *hotdata, u_int32_t fileid, u_int32_t temperature)
2029{
2030	hotfile_entry_t *entry = hotdata->rootentry;
2031
2032	while (entry &&
2033	       entry->temperature != temperature &&
2034	       entry->fileid != fileid) {
2035
2036		if (temperature > entry->temperature)
2037			entry = entry->right;
2038		else if (temperature < entry->temperature)
2039			entry = entry->left;
2040		else if (fileid > entry->fileid)
2041			entry = entry->right;
2042		else
2043			entry = entry->left;
2044	}
2045	return (entry);
2046}
2047#endif
2048
2049/*
2050 * Insert a hot file entry into the tree.
2051 */
2052static void
2053hf_insert(hotfile_data_t *hotdata, hotfile_entry_t *newentry)
2054{
2055	hotfile_entry_t *entry = hotdata->rootentry;
2056	u_int32_t fileid = newentry->fileid;
2057	u_int32_t temperature = newentry->temperature;
2058
2059	if (entry == NULL) {
2060		hotdata->rootentry = newentry;
2061		hotdata->coldest = newentry;
2062		hotdata->activefiles++;
2063		return;
2064	}
2065
2066	while (entry) {
2067		if (temperature > entry->temperature) {
2068			if (entry->right)
2069				entry = entry->right;
2070			else {
2071				entry->right = newentry;
2072				break;
2073			}
2074		} else if (temperature < entry->temperature) {
2075			if (entry->left)
2076				entry = entry->left;
2077			else {
2078			    	entry->left = newentry;
2079				break;
2080			}
2081		} else if (fileid > entry->fileid) {
2082			if (entry->right)
2083				entry = entry->right;
2084			else {
2085	       			if (entry->fileid != fileid)
2086					entry->right = newentry;
2087				break;
2088			}
2089		} else {
2090			if (entry->left)
2091				entry = entry->left;
2092			else {
2093	       			if (entry->fileid != fileid)
2094			    		entry->left = newentry;
2095				break;
2096			}
2097		}
2098	}
2099
2100	hotdata->activefiles++;
2101}
2102
2103/*
2104 * Find the coldest entry in the tree.
2105 */
2106static hotfile_entry_t *
2107hf_coldest(hotfile_data_t *hotdata)
2108{
2109	hotfile_entry_t *entry = hotdata->rootentry;
2110
2111	if (entry) {
2112		while (entry->left)
2113			entry = entry->left;
2114	}
2115	return (entry);
2116}
2117
2118/*
2119 * Find the hottest entry in the tree.
2120 */
2121static hotfile_entry_t *
2122hf_hottest(hotfile_data_t *hotdata)
2123{
2124	hotfile_entry_t *entry = hotdata->rootentry;
2125
2126	if (entry) {
2127		while (entry->right)
2128			entry = entry->right;
2129	}
2130	return (entry);
2131}
2132
2133/*
2134 * Delete a hot file entry from the tree.
2135 */
2136static void
2137hf_delete(hotfile_data_t *hotdata, u_int32_t fileid, u_int32_t temperature)
2138{
2139	hotfile_entry_t *entry, *parent, *next;
2140
2141	parent = NULL;
2142	entry = hotdata->rootentry;
2143
2144	while (entry &&
2145	       entry->temperature != temperature &&
2146	       entry->fileid != fileid) {
2147
2148		parent = entry;
2149		if (temperature > entry->temperature)
2150			entry = entry->right;
2151		else if (temperature < entry->temperature)
2152			entry = entry->left;
2153		else if (fileid > entry->fileid)
2154			entry = entry->right;
2155		else
2156			entry = entry->left;
2157	}
2158
2159	if (entry) {
2160		/*
2161		 * Reorginize the sub-trees spanning from our entry.
2162		 */
2163		if ((next = entry->right)) {
2164			hotfile_entry_t *pnextl, *psub;
2165			/*
2166			 * Tree pruning: take the left branch of the
2167			 * current entry and place it at the lowest
2168			 * left branch of the current right branch
2169			 */
2170			psub = next;
2171
2172			/* Walk the Right/Left sub tree from current entry */
2173			while ((pnextl = psub->left))
2174				psub = pnextl;
2175
2176			/* Plug the old left tree to the new ->Right leftmost entry */
2177			psub->left = entry->left;
2178
2179		} else /* only left sub-tree, simple case */ {
2180			next = entry->left;
2181		}
2182		/*
2183		 * Now, plug the current entry sub tree to
2184		 * the good pointer of our parent entry.
2185		 */
2186		if (parent == NULL)
2187			hotdata->rootentry = next;
2188		else if (parent->left == entry)
2189			parent->left = next;
2190		else
2191			parent->right = next;
2192
2193		/* Place entry back on the free-list */
2194		entry->left = 0;
2195		entry->fileid = 0;
2196		entry->temperature = 0;
2197
2198		entry->right = hotdata->freelist;
2199		hotdata->freelist = entry;
2200		hotdata->activefiles--;
2201
2202		if (hotdata->coldest == entry || hotdata->coldest == NULL) {
2203			hotdata->coldest = hf_coldest(hotdata);
2204		}
2205
2206	}
2207}
2208
2209/*
2210 * Get a free hot file entry.
2211 */
2212static hotfile_entry_t *
2213hf_getnewentry(hotfile_data_t *hotdata)
2214{
2215	hotfile_entry_t * entry;
2216
2217	/*
2218	 * When the free list is empty then steal the coldest one
2219	 */
2220	if (hotdata->freelist == NULL) {
2221		entry = hf_coldest(hotdata);
2222		hf_delete(hotdata, entry->fileid, entry->temperature);
2223	}
2224	entry = hotdata->freelist;
2225	hotdata->freelist = entry->right;
2226	entry->right = 0;
2227
2228	return (entry);
2229}
2230
2231
2232/*
2233 * Generate a sorted list of hot files (hottest to coldest).
2234 *
2235 * As a side effect, every node in the hot file tree will be
2236 * deleted (moved to the free list).
2237 */
2238static void
2239hf_getsortedlist(hotfile_data_t * hotdata, hotfilelist_t *sortedlist)
2240{
2241	int i = 0;
2242	hotfile_entry_t *entry;
2243
2244	while ((entry = hf_hottest(hotdata)) != NULL) {
2245		sortedlist->hfl_hotfile[i].hf_fileid = entry->fileid;
2246		sortedlist->hfl_hotfile[i].hf_temperature = entry->temperature;
2247		sortedlist->hfl_hotfile[i].hf_blocks = entry->blocks;
2248		sortedlist->hfl_totalblocks += entry->blocks;
2249		++i;
2250
2251		hf_delete(hotdata, entry->fileid, entry->temperature);
2252	}
2253
2254	sortedlist->hfl_count = i;
2255
2256#if HFC_VERBOSE
2257	printf("hfs: hf_getsortedlist returned %d entries\n", i);
2258#endif
2259}
2260
2261
2262#if HFC_DEBUG
2263static void
2264hf_maxdepth(hotfile_entry_t * root, int depth, int *maxdepth)
2265{
2266	if (root) {
2267		depth++;
2268		if (depth > *maxdepth)
2269			*maxdepth = depth;
2270		hf_maxdepth(root->left, depth, maxdepth);
2271		hf_maxdepth(root->right, depth, maxdepth);
2272	}
2273}
2274
2275static void
2276hf_printtree(hotfile_entry_t * root)
2277{
2278	if (root) {
2279		hf_printtree(root->left);
2280		printf("hfs: temperature: % 8d, fileid %d\n", root->temperature, root->fileid);
2281		hf_printtree(root->right);
2282	}
2283}
2284#endif
2285