1/*
2 * Copyright (c) 2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#if !HFS_COMPRESSION
29/* we need these symbols even though compression is turned off */
30char register_decmpfs_decompressor;
31char unregister_decmpfs_decompressor;
32#else /* HFS_COMPRESSION */
33#include <sys/kernel.h>
34#include <sys/vnode_internal.h>
35#include <sys/file_internal.h>
36#include <sys/stat.h>
37#include <sys/fcntl.h>
38#include <sys/xattr.h>
39#include <sys/namei.h>
40#include <sys/user.h>
41#include <sys/mount_internal.h>
42#include <sys/ubc.h>
43#include <sys/decmpfs.h>
44#include <sys/uio_internal.h>
45#include <libkern/OSByteOrder.h>
46
47#pragma mark --- debugging ---
48
49#define COMPRESSION_DEBUG 0
50#define COMPRESSION_DEBUG_VERBOSE 0
51#define MALLOC_DEBUG 0
52
53static const char *
54baseName(const char *path)
55{
56    if (!path)
57        return NULL;
58    const char *ret = path;
59    int i;
60    for (i = 0; path[i] != 0; i++) {
61        if (path[i] == '/')
62            ret = &path[i + 1];
63    }
64    return ret;
65}
66
67#define ErrorLog(x, args...) printf("%s:%d:%s: " x, baseName(__FILE__), __LINE__, __FUNCTION__, ## args)
68
69#if COMPRESSION_DEBUG
70#define DebugLog ErrorLog
71#else
72#define DebugLog(x...) do { } while(0)
73#endif
74
75#if COMPRESSION_DEBUG_VERBOSE
76#define VerboseLog ErrorLog
77#else
78#define VerboseLog(x...) do { } while(0)
79#endif
80
81#if MALLOC_DEBUG
82
83static SInt32 totalAlloc;
84
85typedef struct {
86    uint32_t allocSz;
87    uint32_t magic;
88    const char *file;
89    int line;
90} allocated;
91
92static void *
93_malloc(uint32_t sz, __unused int type, __unused int flags, const char *file, int line)
94{
95    uint32_t allocSz = sz + 2 * sizeof(allocated);
96
97    allocated *alloc = NULL;
98    MALLOC(alloc, allocated *, allocSz, type, flags);
99    if (!alloc) {
100        ErrorLog("malloc failed\n");
101        return NULL;
102    }
103
104    char *ret = (char*)&alloc[1];
105    allocated *alloc2 = (allocated*)(ret + sz);
106
107    alloc->allocSz = allocSz;
108    alloc->magic = 0xdadadada;
109    alloc->file = file;
110    alloc->line = line;
111
112    *alloc2 = *alloc;
113
114    int s = OSAddAtomic(sz, &totalAlloc);
115    ErrorLog("malloc(%d) -> %p, total allocations %d\n", sz, ret, s + sz);
116
117    return ret;
118}
119
120static void
121_free(char *ret, __unused int type, const char *file, int line)
122{
123    if (!ret) {
124        ErrorLog("freeing null\n");
125        return;
126    }
127    allocated *alloc = (allocated*)ret;
128    alloc--;
129    uint32_t sz = alloc->allocSz - 2 * sizeof(allocated);
130    allocated *alloc2 = (allocated*)(ret + sz);
131
132    if (alloc->magic != 0xdadadada) {
133        panic("freeing bad pointer");
134    }
135
136    if (memcmp(alloc, alloc2, sizeof(*alloc)) != 0) {
137        panic("clobbered data");
138    }
139
140    memset(ret, 0xce, sz);
141    alloc2->file = file;
142    alloc2->line = line;
143    FREE(alloc, type);
144    int s = OSAddAtomic(-sz, &totalAlloc);
145    ErrorLog("free(%p,%d) -> total allocations %d\n", ret, sz, s - sz);
146}
147
148#undef MALLOC
149#undef FREE
150#define	MALLOC(space, cast, size, type, flags) (space) = (cast)_malloc(size, type, flags, __FILE__, __LINE__)
151#define FREE(addr, type) _free((void *)addr, type, __FILE__, __LINE__)
152
153#endif /* MALLOC_DEBUG */
154
155#pragma mark --- globals ---
156
157static lck_grp_t *decmpfs_lockgrp;
158
159static decmpfs_registration * decompressors[CMP_MAX]; /* the registered compressors */
160static lck_rw_t * decompressorsLock;
161static int decompress_channel; /* channel used by decompress_file to wake up waiters */
162static lck_mtx_t *decompress_channel_mtx;
163
164vfs_context_t decmpfs_ctx;
165
166#pragma mark --- decmp_get_func ---
167
168#define offsetof_func(func) ((uintptr_t)(&(((decmpfs_registration*)NULL)->func)))
169
170static void *
171_func_from_offset(uint32_t type, uintptr_t offset)
172{
173    /* get the function at the given offset in the registration for the given type */
174    decmpfs_registration *reg = decompressors[type];
175    char *regChar = (char*)reg;
176    char *func = &regChar[offset];
177    void **funcPtr = (void**)func;
178
179    switch (reg->decmpfs_registration) {
180        case DECMPFS_REGISTRATION_VERSION_V1:
181            if (offset > offsetof_func(free_data))
182                return NULL;
183            break;
184        case DECMPFS_REGISTRATION_VERSION_V3:
185            if (offset > offsetof_func(get_flags))
186                return NULL;
187            break;
188        default:
189            return NULL;
190    }
191
192    return funcPtr[0];
193}
194
195extern void IOServicePublishResource( const char * property, boolean_t value );
196extern boolean_t IOServiceWaitForMatchingResource( const char * property, uint64_t timeout );
197extern boolean_t IOCatalogueMatchingDriversPresent( const char * property );
198
199static void *
200_decmp_get_func(uint32_t type, uintptr_t offset)
201{
202	/*
203	 this function should be called while holding a shared lock to decompressorsLock,
204	 and will return with the lock held
205	 */
206
207	if (type >= CMP_MAX)
208		return NULL;
209
210	if (decompressors[type] != NULL) {
211		// the compressor has already registered but the function might be null
212		return _func_from_offset(type, offset);
213	}
214
215    // does IOKit know about a kext that is supposed to provide this type?
216    char providesName[80];
217    snprintf(providesName, sizeof(providesName), "com.apple.AppleFSCompression.providesType%u", type);
218    if (IOCatalogueMatchingDriversPresent(providesName)) {
219        // there is a kext that says it will register for this type, so let's wait for it
220        char resourceName[80];
221        uint64_t delay = 10000000ULL; // 10 milliseconds.
222        snprintf(resourceName, sizeof(resourceName), "com.apple.AppleFSCompression.Type%u", type);
223        printf("waiting for %s\n", resourceName);
224        while(decompressors[type] == NULL) {
225            lck_rw_unlock_shared(decompressorsLock); // we have to unlock to allow the kext to register
226            if (IOServiceWaitForMatchingResource(resourceName, delay)) {
227                lck_rw_lock_shared(decompressorsLock);
228                break;
229            }
230            if (!IOCatalogueMatchingDriversPresent(providesName)) {
231                //
232                printf("the kext with %s is no longer present\n", providesName);
233                lck_rw_lock_shared(decompressorsLock);
234                break;
235            }
236            printf("still waiting for %s\n", resourceName);
237            delay *= 2;
238            lck_rw_lock_shared(decompressorsLock);
239        }
240        // IOKit says the kext is loaded, so it should be registered too!
241        if (decompressors[type] == NULL) {
242            ErrorLog("we found %s, but the type still isn't registered\n", providesName);
243            return NULL;
244        }
245        // it's now registered, so let's return the function
246        return _func_from_offset(type, offset);
247    }
248
249	// the compressor hasn't registered, so it never will unless someone manually kextloads it
250	ErrorLog("tried to access a compressed file of unregistered type %d\n", type);
251	return NULL;
252}
253
254#define decmp_get_func(type, func) ((typeof(((decmpfs_registration*)NULL)->func))_decmp_get_func(type, offsetof_func(func)))
255
256#pragma mark --- utilities ---
257
258#if COMPRESSION_DEBUG
259static char*
260vnpath(vnode_t vp, char *path, int len)
261{
262    int origlen = len;
263    path[0] = 0;
264    vn_getpath(vp, path, &len);
265    path[origlen - 1] = 0;
266    return path;
267}
268
269static int
270vnsize(vnode_t vp, uint64_t *size)
271{
272    struct vnode_attr va;
273    VATTR_INIT(&va);
274    VATTR_WANTED(&va, va_data_size);
275	int error = vnode_getattr(vp, &va, decmpfs_ctx);
276    if (error != 0) {
277        ErrorLog("vnode_getattr err %d\n", error);
278        return error;
279    }
280    *size = va.va_data_size;
281    return 0;
282}
283#endif /* COMPRESSION_DEBUG */
284
285#pragma mark --- cnode routines ---
286
287void
288decmpfs_cnode_init(decmpfs_cnode *cp)
289{
290    memset(cp, 0, sizeof(*cp));
291	lck_rw_init(&cp->compressed_data_lock, decmpfs_lockgrp, NULL);
292}
293
294void
295decmpfs_cnode_destroy(decmpfs_cnode *cp)
296{
297	lck_rw_destroy(&cp->compressed_data_lock, decmpfs_lockgrp);
298}
299
300boolean_t
301decmpfs_trylock_compressed_data(decmpfs_cnode *cp, int exclusive)
302{
303	void *thread = current_thread();
304	boolean_t retval = FALSE;
305
306	if (cp->lockowner == thread) {
307		/* this thread is already holding an exclusive lock, so bump the count */
308		cp->lockcount++;
309		retval = TRUE;
310	} else if (exclusive) {
311		if ((retval = lck_rw_try_lock_exclusive(&cp->compressed_data_lock))) {
312			cp->lockowner = thread;
313			cp->lockcount = 1;
314		}
315	} else {
316		if ((retval = lck_rw_try_lock_shared(&cp->compressed_data_lock))) {
317			cp->lockowner = (void *)-1;
318		}
319	}
320	return retval;
321}
322
323void
324decmpfs_lock_compressed_data(decmpfs_cnode *cp, int exclusive)
325{
326	void *thread = current_thread();
327
328	if (cp->lockowner == thread) {
329		/* this thread is already holding an exclusive lock, so bump the count */
330		cp->lockcount++;
331	} else if (exclusive) {
332		lck_rw_lock_exclusive(&cp->compressed_data_lock);
333		cp->lockowner = thread;
334		cp->lockcount = 1;
335	} else {
336		lck_rw_lock_shared(&cp->compressed_data_lock);
337		cp->lockowner = (void *)-1;
338	}
339}
340
341void
342decmpfs_unlock_compressed_data(decmpfs_cnode *cp, __unused int exclusive)
343{
344	void *thread = current_thread();
345
346	if (cp->lockowner == thread) {
347		/* this thread is holding an exclusive lock, so decrement the count */
348		if ((--cp->lockcount) > 0) {
349			/* the caller still has outstanding locks, so we're done */
350			return;
351		}
352		cp->lockowner = NULL;
353	}
354
355	lck_rw_done(&cp->compressed_data_lock);
356}
357
358uint32_t
359decmpfs_cnode_get_vnode_state(decmpfs_cnode *cp)
360{
361    return cp->cmp_state;
362}
363
364void
365decmpfs_cnode_set_vnode_state(decmpfs_cnode *cp, uint32_t state, int skiplock)
366{
367	if (!skiplock) decmpfs_lock_compressed_data(cp, 1);
368	cp->cmp_state = state;
369    if (state == FILE_TYPE_UNKNOWN) {
370        /* clear out the compression type too */
371        cp->cmp_type = 0;
372    }
373	if (!skiplock) decmpfs_unlock_compressed_data(cp, 1);
374}
375
376static void
377decmpfs_cnode_set_vnode_cmp_type(decmpfs_cnode *cp, uint32_t cmp_type, int skiplock)
378{
379    if (!skiplock) decmpfs_lock_compressed_data(cp, 1);
380    cp->cmp_type = cmp_type;
381    if (!skiplock) decmpfs_unlock_compressed_data(cp, 1);
382}
383
384static void
385decmpfs_cnode_set_vnode_minimal_xattr(decmpfs_cnode *cp, int minimal_xattr, int skiplock)
386{
387    if (!skiplock) decmpfs_lock_compressed_data(cp, 1);
388    cp->cmp_minimal_xattr = minimal_xattr;
389    if (!skiplock) decmpfs_unlock_compressed_data(cp, 1);
390}
391
392uint64_t
393decmpfs_cnode_get_vnode_cached_size(decmpfs_cnode *cp)
394{
395    return cp->uncompressed_size;
396}
397
398static void
399decmpfs_cnode_set_vnode_cached_size(decmpfs_cnode *cp, uint64_t size)
400{
401    while(1) {
402        uint64_t old = cp->uncompressed_size;
403        if (OSCompareAndSwap64(old, size, (UInt64*)&cp->uncompressed_size)) {
404            return;
405        } else {
406            /* failed to write our value, so loop */
407        }
408    }
409}
410
411static uint64_t
412decmpfs_cnode_get_decompression_flags(decmpfs_cnode *cp)
413{
414    return cp->decompression_flags;
415}
416
417static void
418decmpfs_cnode_set_decompression_flags(decmpfs_cnode *cp, uint64_t flags)
419{
420    while(1) {
421        uint64_t old = cp->decompression_flags;
422        if (OSCompareAndSwap64(old, flags, (UInt64*)&cp->decompression_flags)) {
423            return;
424        } else {
425            /* failed to write our value, so loop */
426        }
427    }
428}
429
430#pragma mark --- decmpfs state routines ---
431
432static int
433decmpfs_fetch_compressed_header(vnode_t vp, decmpfs_cnode *cp, decmpfs_header **hdrOut, int returnInvalid)
434{
435    /*
436     fetches vp's compression xattr, converting it into a decmpfs_header; returns 0 or errno
437     if returnInvalid == 1, returns the header even if the type was invalid (out of range),
438     and return ERANGE in that case
439     */
440
441	size_t read_size             = 0;
442	size_t attr_size             = 0;
443    uio_t attr_uio               = NULL;
444    int err                      = 0;
445    char *data                   = NULL;
446    decmpfs_header *hdr = NULL;
447	char uio_buf[ UIO_SIZEOF(1) ];
448
449    if ((cp != NULL) &&
450        (cp->cmp_type != 0) &&
451        (cp->cmp_minimal_xattr != 0)) {
452        /* this file's xattr didn't have any extra data when we fetched it, so we can synthesize a header from the data in the cnode */
453
454        MALLOC(data, char *, sizeof(decmpfs_header), M_TEMP, M_WAITOK);
455        if (!data) {
456            err = ENOMEM;
457            goto out;
458        }
459        hdr = (decmpfs_header*)data;
460        hdr->attr_size = sizeof(decmpfs_disk_header);
461        hdr->compression_magic = DECMPFS_MAGIC;
462        hdr->compression_type  = cp->cmp_type;
463        hdr->uncompressed_size = decmpfs_cnode_get_vnode_cached_size(cp);
464    } else {
465        /* figure out how big the xattr is on disk */
466        err = vn_getxattr(vp, DECMPFS_XATTR_NAME, NULL, &attr_size, XATTR_NOSECURITY, decmpfs_ctx);
467        if (err != 0)
468            goto out;
469
470        if (attr_size < sizeof(decmpfs_disk_header) || attr_size > MAX_DECMPFS_XATTR_SIZE) {
471            err = EINVAL;
472            goto out;
473        }
474
475        /* allocation includes space for the extra attr_size field of a compressed_header */
476        MALLOC(data, char *, attr_size + sizeof(hdr->attr_size), M_TEMP, M_WAITOK);
477        if (!data) {
478            err = ENOMEM;
479            goto out;
480        }
481
482        /* read the xattr into our buffer, skipping over the attr_size field at the beginning */
483        attr_uio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
484        uio_addiov(attr_uio, CAST_USER_ADDR_T(data + sizeof(hdr->attr_size)), attr_size);
485
486        err = vn_getxattr(vp, DECMPFS_XATTR_NAME, attr_uio, &read_size, XATTR_NOSECURITY, decmpfs_ctx);
487        if (err != 0)
488            goto out;
489        if (read_size != attr_size) {
490            err = EINVAL;
491            goto out;
492        }
493        hdr = (decmpfs_header*)data;
494        hdr->attr_size = attr_size;
495        /* swap the fields to native endian */
496        hdr->compression_magic = OSSwapLittleToHostInt32(hdr->compression_magic);
497        hdr->compression_type  = OSSwapLittleToHostInt32(hdr->compression_type);
498        hdr->uncompressed_size = OSSwapLittleToHostInt64(hdr->uncompressed_size);
499    }
500
501    if (hdr->compression_magic != DECMPFS_MAGIC) {
502        ErrorLog("invalid compression_magic 0x%08x, should be 0x%08x\n", hdr->compression_magic, DECMPFS_MAGIC);
503        err = EINVAL;
504		goto out;
505    }
506
507    if (hdr->compression_type >= CMP_MAX) {
508        if (returnInvalid) {
509            /* return the header even though the type is out of range */
510            err = ERANGE;
511        } else {
512            ErrorLog("compression_type %d out of range\n", hdr->compression_type);
513            err = EINVAL;
514        }
515		goto out;
516    }
517
518out:
519    if (err && (err != ERANGE)) {
520        DebugLog("err %d\n", err);
521        if (data) FREE(data, M_TEMP);
522        *hdrOut = NULL;
523    } else {
524        *hdrOut = hdr;
525    }
526    return err;
527}
528
529static int
530decmpfs_fast_get_state(decmpfs_cnode *cp)
531{
532    /*
533     return the cached state
534     this should *only* be called when we know that decmpfs_file_is_compressed has already been called,
535     because this implies that the cached state is valid
536     */
537    int cmp_state = decmpfs_cnode_get_vnode_state(cp);
538
539    switch(cmp_state) {
540        case FILE_IS_NOT_COMPRESSED:
541        case FILE_IS_COMPRESSED:
542        case FILE_IS_CONVERTING:
543            return cmp_state;
544        case FILE_TYPE_UNKNOWN:
545            /*
546             we should only get here if decmpfs_file_is_compressed was not called earlier on this vnode,
547             which should not be possible
548             */
549            ErrorLog("decmpfs_fast_get_state called on unknown file\n");
550            return FILE_IS_NOT_COMPRESSED;
551        default:
552            /* */
553            ErrorLog("unknown cmp_state %d\n", cmp_state);
554            return FILE_IS_NOT_COMPRESSED;
555    }
556}
557
558static int
559decmpfs_fast_file_is_compressed(decmpfs_cnode *cp)
560{
561    int cmp_state = decmpfs_cnode_get_vnode_state(cp);
562
563    switch(cmp_state) {
564        case FILE_IS_NOT_COMPRESSED:
565			return 0;
566        case FILE_IS_COMPRESSED:
567        case FILE_IS_CONVERTING:
568            return 1;
569        case FILE_TYPE_UNKNOWN:
570            /*
571             we should only get here if decmpfs_file_is_compressed was not called earlier on this vnode,
572             which should not be possible
573             */
574            ErrorLog("decmpfs_fast_get_state called on unknown file\n");
575            return 0;
576        default:
577            /* */
578            ErrorLog("unknown cmp_state %d\n", cmp_state);
579            return 0;
580    }
581}
582
583errno_t
584decmpfs_validate_compressed_file(vnode_t vp, decmpfs_cnode *cp)
585{
586    /* give a compressor a chance to indicate that a compressed file is invalid */
587
588    decmpfs_header *hdr = NULL;
589    errno_t err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0);
590    if (err) {
591        /* we couldn't get the header */
592        if (decmpfs_fast_get_state(cp) == FILE_IS_NOT_COMPRESSED) {
593            /* the file is no longer compressed, so return success */
594            err = 0;
595        }
596        goto out;
597    }
598
599    lck_rw_lock_shared(decompressorsLock);
600    decmpfs_validate_compressed_file_func validate = decmp_get_func(hdr->compression_type, validate);
601    if (validate) {    /* make sure this validation function is valid */
602        /* is the data okay? */
603		err = validate(vp, decmpfs_ctx, hdr);
604    } else if (decmp_get_func(hdr->compression_type, fetch) == NULL) {
605        /* the type isn't registered */
606        err = EIO;
607    } else {
608        /* no validate registered, so nothing to do */
609        err = 0;
610    }
611    lck_rw_unlock_shared(decompressorsLock);
612out:
613    if (hdr) FREE(hdr, M_TEMP);
614#if COMPRESSION_DEBUG
615    if (err) {
616        DebugLog("decmpfs_validate_compressed_file ret %d, vp->v_flag %d\n", err, vp->v_flag);
617    }
618#endif
619    return err;
620}
621
622int
623decmpfs_file_is_compressed(vnode_t vp, decmpfs_cnode *cp)
624{
625    /*
626     determines whether vp points to a compressed file
627
628     to speed up this operation, we cache the result in the cnode, and do as little as possible
629     in the case where the cnode already has a valid cached state
630
631     */
632
633    int ret = 0;
634	int error = 0;
635	uint32_t cmp_state;
636	struct vnode_attr va_fetch;
637    decmpfs_header *hdr = NULL;
638    mount_t mp = NULL;
639	int cnode_locked = 0;
640    int saveInvalid = 0; // save the header data even though the type was out of range
641    uint64_t decompression_flags = 0;
642
643    if (vnode_isnamedstream(vp)) {
644        /*
645         named streams can't be compressed
646         since named streams of the same file share the same cnode,
647         we don't want to get/set the state in the cnode, just return 0
648         */
649        return 0;
650    }
651
652    /* examine the cached a state in this cnode */
653    cmp_state = decmpfs_cnode_get_vnode_state(cp);
654    switch(cmp_state) {
655        case FILE_IS_NOT_COMPRESSED:
656			return 0;
657        case FILE_IS_COMPRESSED:
658			return 1;
659        case FILE_IS_CONVERTING:
660            /* treat the file as compressed, because this gives us a way to block future reads until decompression is done */
661            return 1;
662        case FILE_TYPE_UNKNOWN:
663            /* the first time we encountered this vnode, so we need to check it out */
664            break;
665        default:
666            /* unknown state, assume file is not compressed */
667            ErrorLog("unknown cmp_state %d\n", cmp_state);
668            return 0;
669    }
670
671    if (!vnode_isreg(vp)) {
672        /* only regular files can be compressed */
673        ret = FILE_IS_NOT_COMPRESSED;
674        goto done;
675    }
676
677    mp = vnode_mount(vp);
678    if (mp == NULL) {
679        /*
680         this should only be true before we mount the root filesystem
681         we short-cut this return to avoid the call to getattr below, which
682         will fail before root is mounted
683         */
684        ret = FILE_IS_NOT_COMPRESSED;
685        goto done;
686    }
687    if ((mp->mnt_flag & MNT_LOCAL) == 0) {
688        /* compression only supported on local filesystems */
689        ret = FILE_IS_NOT_COMPRESSED;
690        goto done;
691    }
692
693	/* lock our cnode data so that another caller doesn't change the state under us */
694	decmpfs_lock_compressed_data(cp, 1);
695	cnode_locked = 1;
696
697	VATTR_INIT(&va_fetch);
698	VATTR_WANTED(&va_fetch, va_flags);
699	error = vnode_getattr(vp, &va_fetch, decmpfs_ctx);
700	if (error) {
701        /* failed to get the bsd flags so the file is not compressed */
702        ret = FILE_IS_NOT_COMPRESSED;
703        goto done;
704    }
705	if (va_fetch.va_flags & UF_COMPRESSED) {
706		/* UF_COMPRESSED is on, make sure the file has the DECMPFS_XATTR_NAME xattr */
707        error = decmpfs_fetch_compressed_header(vp, cp, &hdr, 1);
708        if ((hdr != NULL) && (error == ERANGE)) {
709            saveInvalid = 1;
710        }
711        if (error) {
712            /* failed to get the xattr so the file is not compressed */
713            ret = FILE_IS_NOT_COMPRESSED;
714            goto done;
715        }
716        /* we got the xattr, so the file is compressed */
717        ret = FILE_IS_COMPRESSED;
718        goto done;
719	}
720    /* UF_COMPRESSED isn't on, so the file isn't compressed */
721    ret = FILE_IS_NOT_COMPRESSED;
722
723done:
724    if (((ret == FILE_IS_COMPRESSED) || saveInvalid) && hdr) {
725		/*
726		 cache the uncompressed size away in the cnode
727		 */
728
729		if (!cnode_locked) {
730			/*
731			 we should never get here since the only place ret is set to FILE_IS_COMPRESSED
732			 is after the call to decmpfs_lock_compressed_data above
733			 */
734			decmpfs_lock_compressed_data(cp, 1);
735			cnode_locked = 1;
736		}
737
738        decmpfs_cnode_set_vnode_cached_size(cp, hdr->uncompressed_size);
739		decmpfs_cnode_set_vnode_state(cp, ret, 1);
740        decmpfs_cnode_set_vnode_cmp_type(cp, hdr->compression_type, 1);
741        /* remember if the xattr's size was equal to the minimal xattr */
742        if (hdr->attr_size == sizeof(decmpfs_disk_header)) {
743            decmpfs_cnode_set_vnode_minimal_xattr(cp, 1, 1);
744        }
745        if (ret == FILE_IS_COMPRESSED) {
746            /* update the ubc's size for this file */
747            ubc_setsize(vp, hdr->uncompressed_size);
748
749            /* update the decompression flags in the decmpfs cnode */
750            lck_rw_lock_shared(decompressorsLock);
751            decmpfs_get_decompression_flags_func get_flags = decmp_get_func(hdr->compression_type, get_flags);
752            if (get_flags) {
753                decompression_flags = get_flags(vp, decmpfs_ctx, hdr);
754            }
755            lck_rw_unlock_shared(decompressorsLock);
756            decmpfs_cnode_set_decompression_flags(cp, decompression_flags);
757        }
758	} else {
759		/* we might have already taken the lock above; if so, skip taking it again by passing cnode_locked as the skiplock parameter */
760		decmpfs_cnode_set_vnode_state(cp, ret, cnode_locked);
761	}
762
763	if (cnode_locked) decmpfs_unlock_compressed_data(cp, 1);
764
765    if (hdr) FREE(hdr, M_TEMP);
766
767	switch(ret) {
768        case FILE_IS_NOT_COMPRESSED:
769			return 0;
770        case FILE_IS_COMPRESSED:
771        case FILE_IS_CONVERTING:
772			return 1;
773        default:
774            /* unknown state, assume file is not compressed */
775            ErrorLog("unknown ret %d\n", ret);
776            return 0;
777    }
778}
779
780int
781decmpfs_update_attributes(vnode_t vp, struct vnode_attr *vap)
782{
783    int error = 0;
784
785    if (VATTR_IS_ACTIVE(vap, va_flags)) {
786        /* the BSD flags are being updated */
787        if (vap->va_flags & UF_COMPRESSED) {
788            /* the compressed bit is being set, did it change? */
789            struct vnode_attr va_fetch;
790            int old_flags = 0;
791            VATTR_INIT(&va_fetch);
792            VATTR_WANTED(&va_fetch, va_flags);
793			error = vnode_getattr(vp, &va_fetch, decmpfs_ctx);
794            if (error)
795                return error;
796
797            old_flags = va_fetch.va_flags;
798
799            if (!(old_flags & UF_COMPRESSED)) {
800                /*
801                 * Compression bit was turned on, make sure the file has the DECMPFS_XATTR_NAME attribute.
802                 * This precludes anyone from using the UF_COMPRESSED bit for anything else, and it enforces
803                 * an order of operation -- you must first do the setxattr and then the chflags.
804                 */
805
806				if (VATTR_IS_ACTIVE(vap, va_data_size)) {
807					/*
808					 * don't allow the caller to set the BSD flag and the size in the same call
809					 * since this doesn't really make sense
810					 */
811					vap->va_flags &= ~UF_COMPRESSED;
812					return 0;
813				}
814
815                decmpfs_header *hdr = NULL;
816                error = decmpfs_fetch_compressed_header(vp, NULL, &hdr, 1);
817                if (error == 0) {
818                    /*
819                     allow the flag to be set since the decmpfs attribute is present
820                     in that case, we also want to truncate the data fork of the file
821                     */
822                    VATTR_SET_ACTIVE(vap, va_data_size);
823                    vap->va_data_size = 0;
824                } else if (error == ERANGE) {
825                    /* the file had a decmpfs attribute but the type was out of range, so don't muck with the file's data size */
826                } else {
827                    /* no DECMPFS_XATTR_NAME attribute, so deny the update */
828					vap->va_flags &= ~UF_COMPRESSED;
829                }
830                if (hdr) FREE(hdr, M_TEMP);
831            }
832        }
833    }
834
835    return 0;
836}
837
838static int
839wait_for_decompress(decmpfs_cnode *cp)
840{
841    int state;
842    lck_mtx_lock(decompress_channel_mtx);
843    do {
844        state = decmpfs_fast_get_state(cp);
845        if (state != FILE_IS_CONVERTING) {
846            /* file is not decompressing */
847            lck_mtx_unlock(decompress_channel_mtx);
848            return state;
849        }
850        msleep((caddr_t)&decompress_channel, decompress_channel_mtx, PINOD, "wait_for_decompress", NULL);
851    } while(1);
852}
853
854#pragma mark --- decmpfs hide query routines ---
855
856int
857decmpfs_hides_rsrc(vfs_context_t ctx, decmpfs_cnode *cp)
858{
859	/*
860	 WARNING!!!
861	 callers may (and do) pass NULL for ctx, so we should only use it
862	 for this equality comparison
863
864	 This routine should only be called after a file has already been through decmpfs_file_is_compressed
865	 */
866
867	if (ctx == decmpfs_ctx)
868		return 0;
869
870	if (!decmpfs_fast_file_is_compressed(cp))
871		return 0;
872
873	/* all compressed files hide their resource fork */
874	return 1;
875}
876
877int
878decmpfs_hides_xattr(vfs_context_t ctx, decmpfs_cnode *cp, const char *xattr)
879{
880	/*
881	 WARNING!!!
882	 callers may (and do) pass NULL for ctx, so we should only use it
883	 for this equality comparison
884
885	 This routine should only be called after a file has already been through decmpfs_file_is_compressed
886	 */
887
888	if (ctx == decmpfs_ctx)
889		return 0;
890	if (strncmp(xattr, XATTR_RESOURCEFORK_NAME, 22) == 0)
891		return decmpfs_hides_rsrc(ctx, cp);
892	if (!decmpfs_fast_file_is_compressed(cp))
893    /* file is not compressed, so don't hide this xattr */
894		return 0;
895	if (strncmp(xattr, DECMPFS_XATTR_NAME, 11) == 0)
896    /* it's our xattr, so hide it */
897		return 1;
898	/* don't hide this xattr */
899	return 0;
900}
901
902#pragma mark --- registration/validation routines ---
903
904static inline int registration_valid(decmpfs_registration *registration)
905{
906    return registration && ((registration->decmpfs_registration == DECMPFS_REGISTRATION_VERSION_V1) || (registration->decmpfs_registration == DECMPFS_REGISTRATION_VERSION_V3));
907}
908
909errno_t
910register_decmpfs_decompressor(uint32_t compression_type, decmpfs_registration *registration)
911{
912    /* called by kexts to register decompressors */
913
914    errno_t ret = 0;
915    int locked = 0;
916    char resourceName[80];
917
918    if ((compression_type >= CMP_MAX) || !registration_valid(registration)) {
919        ret = EINVAL;
920        goto out;
921    }
922
923    lck_rw_lock_exclusive(decompressorsLock); locked = 1;
924
925    /* make sure the registration for this type is zero */
926	if (decompressors[compression_type] != NULL) {
927		ret = EEXIST;
928		goto out;
929	}
930    decompressors[compression_type] = registration;
931    snprintf(resourceName, sizeof(resourceName), "com.apple.AppleFSCompression.Type%u", compression_type);
932    IOServicePublishResource(resourceName, TRUE);
933
934out:
935    if (locked) lck_rw_unlock_exclusive(decompressorsLock);
936    return ret;
937}
938
939errno_t
940unregister_decmpfs_decompressor(uint32_t compression_type, decmpfs_registration *registration)
941{
942    /* called by kexts to unregister decompressors */
943
944    errno_t ret = 0;
945    int locked = 0;
946    char resourceName[80];
947
948    if ((compression_type >= CMP_MAX) || !registration_valid(registration)) {
949        ret = EINVAL;
950        goto out;
951    }
952
953    lck_rw_lock_exclusive(decompressorsLock); locked = 1;
954    if (decompressors[compression_type] != registration) {
955        ret = EEXIST;
956        goto out;
957    }
958    decompressors[compression_type] = NULL;
959    snprintf(resourceName, sizeof(resourceName), "com.apple.AppleFSCompression.Type%u", compression_type);
960    IOServicePublishResource(resourceName, FALSE);
961
962out:
963    if (locked) lck_rw_unlock_exclusive(decompressorsLock);
964    return ret;
965}
966
967static int
968compression_type_valid(decmpfs_header *hdr)
969{
970    /* fast pre-check to determine if the given compressor has checked in */
971    int ret = 0;
972
973    /* every compressor must have at least a fetch function */
974    lck_rw_lock_shared(decompressorsLock);
975    if (decmp_get_func(hdr->compression_type, fetch) != NULL) {
976        ret = 1;
977    }
978    lck_rw_unlock_shared(decompressorsLock);
979
980    return ret;
981}
982
983#pragma mark --- compression/decompression routines ---
984
985static int
986decmpfs_fetch_uncompressed_data(vnode_t vp, decmpfs_cnode *cp, decmpfs_header *hdr, off_t offset, user_ssize_t size, int nvec, decmpfs_vector *vec, uint64_t *bytes_read)
987{
988    /* get the uncompressed bytes for the specified region of vp by calling out to the registered compressor */
989
990    int err          = 0;
991
992    *bytes_read = 0;
993
994    if ((uint64_t)offset >= hdr->uncompressed_size) {
995        /* reading past end of file; nothing to do */
996        err = 0;
997        goto out;
998    }
999    if (offset < 0) {
1000        /* tried to read from before start of file */
1001        err = EINVAL;
1002        goto out;
1003    }
1004    if ((uint64_t)(offset + size) > hdr->uncompressed_size) {
1005        /* adjust size so we don't read past the end of the file */
1006		size = hdr->uncompressed_size - offset;
1007	}
1008    if (size == 0) {
1009        /* nothing to read */
1010        err = 0;
1011        goto out;
1012    }
1013
1014    lck_rw_lock_shared(decompressorsLock);
1015    decmpfs_fetch_uncompressed_data_func fetch = decmp_get_func(hdr->compression_type, fetch);
1016    if (fetch) {
1017		err = fetch(vp, decmpfs_ctx, hdr, offset, size, nvec, vec, bytes_read);
1018		lck_rw_unlock_shared(decompressorsLock);
1019        if (err == 0) {
1020            uint64_t decompression_flags = decmpfs_cnode_get_decompression_flags(cp);
1021            if (decompression_flags & DECMPFS_FLAGS_FORCE_FLUSH_ON_DECOMPRESS) {
1022#if	!defined(__i386__) && !defined(__x86_64__)
1023                int i;
1024                for (i = 0; i < nvec; i++) {
1025                    flush_dcache64((addr64_t)(uintptr_t)vec[i].buf, vec[i].size, FALSE);
1026                }
1027#endif
1028            }
1029        }
1030    } else {
1031        err = ENOTSUP;
1032        lck_rw_unlock_shared(decompressorsLock);
1033    }
1034
1035out:
1036    return err;
1037}
1038
1039static kern_return_t
1040commit_upl(upl_t upl, upl_offset_t pl_offset, size_t uplSize, int flags, int abort)
1041{
1042    kern_return_t kr = 0;
1043
1044    /* commit the upl pages */
1045    if (abort) {
1046        VerboseLog("aborting upl, flags 0x%08x\n", flags);
1047		kr = ubc_upl_abort_range(upl, pl_offset, uplSize, flags);
1048        if (kr != KERN_SUCCESS)
1049            ErrorLog("ubc_upl_commit_range error %d\n", (int)kr);
1050    } else {
1051        VerboseLog("committing upl, flags 0x%08x\n", flags | UPL_COMMIT_CLEAR_DIRTY);
1052		kr = ubc_upl_commit_range(upl, pl_offset, uplSize, flags | UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_WRITTEN_BY_KERNEL);
1053        if (kr != KERN_SUCCESS)
1054            ErrorLog("ubc_upl_commit_range error %d\n", (int)kr);
1055    }
1056    return kr;
1057}
1058
1059errno_t
1060decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmpfs_cnode *cp)
1061{
1062    /* handles a page-in request from vfs for a compressed file */
1063
1064    int err                      = 0;
1065    struct vnode *vp             = ap->a_vp;
1066    upl_t pl                     = ap->a_pl;
1067	upl_offset_t pl_offset       = ap->a_pl_offset;
1068    off_t f_offset               = ap->a_f_offset;
1069    size_t size                  = ap->a_size;
1070	int flags                    = ap->a_flags;
1071    off_t uplPos                 = 0;
1072    user_ssize_t uplSize         = 0;
1073	void *data                   = NULL;
1074    decmpfs_header *hdr = NULL;
1075    int abort_pagein             = 0;
1076    uint64_t cachedSize          = 0;
1077	int cmpdata_locked           = 0;
1078
1079    if(!decmpfs_trylock_compressed_data(cp, 0)) {
1080	    return EAGAIN;
1081    }
1082    cmpdata_locked = 1;
1083
1084
1085	if (flags & ~(UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD)) {
1086		DebugLog("pagein: unknown flags 0x%08x\n", (flags & ~(UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD)));
1087	}
1088
1089    err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0);
1090    if (err != 0) {
1091        goto out;
1092    }
1093
1094    cachedSize = hdr->uncompressed_size;
1095
1096    if (!compression_type_valid(hdr)) {
1097        /* compressor not registered */
1098        err = ENOTSUP;
1099        goto out;
1100    }
1101
1102    /* map the upl so we can fetch into it */
1103	kern_return_t kr = ubc_upl_map(pl, (vm_offset_t*)&data);
1104	if ((kr != KERN_SUCCESS) || (data == NULL)) {
1105		goto out;
1106	}
1107
1108    uplPos = f_offset;
1109    uplSize = size;
1110
1111    /* clip the size to the size of the file */
1112    if ((uint64_t)uplPos + uplSize > cachedSize) {
1113        /* truncate the read to the size of the file */
1114        uplSize = cachedSize - uplPos;
1115    }
1116
1117    /* do the fetch */
1118    decmpfs_vector vec;
1119
1120decompress:
1121    /* the mapped data pointer points to the first page of the page list, so we want to start filling in at an offset of pl_offset */
1122    vec.buf = (char*)data + pl_offset;
1123    vec.size = size;
1124
1125    uint64_t did_read = 0;
1126	if (decmpfs_fast_get_state(cp) == FILE_IS_CONVERTING) {
1127		ErrorLog("unexpected pagein during decompress\n");
1128		/*
1129		 if the file is converting, this must be a recursive call to pagein from underneath a call to decmpfs_decompress_file;
1130		 pretend that it succeeded but don't do anything since we're just going to write over the pages anyway
1131		 */
1132		err = 0;
1133		did_read = 0;
1134	} else {
1135        err = decmpfs_fetch_uncompressed_data(vp, cp, hdr, uplPos, uplSize, 1, &vec, &did_read);
1136	}
1137    if (err) {
1138        DebugLog("decmpfs_fetch_uncompressed_data err %d\n", err);
1139        int cmp_state = decmpfs_fast_get_state(cp);
1140        if (cmp_state == FILE_IS_CONVERTING) {
1141            DebugLog("cmp_state == FILE_IS_CONVERTING\n");
1142            cmp_state = wait_for_decompress(cp);
1143            if (cmp_state == FILE_IS_COMPRESSED) {
1144                DebugLog("cmp_state == FILE_IS_COMPRESSED\n");
1145                /* a decompress was attempted but it failed, let's try calling fetch again */
1146                goto decompress;
1147            }
1148        }
1149        if (cmp_state == FILE_IS_NOT_COMPRESSED) {
1150            DebugLog("cmp_state == FILE_IS_NOT_COMPRESSED\n");
1151            /* the file was decompressed after we started reading it */
1152            abort_pagein = 1;   /* we're not going to commit our data */
1153            *is_compressed = 0; /* instruct caller to fall back to its normal path */
1154        }
1155    }
1156
1157    /* zero out whatever we didn't read, and zero out the end of the last page(s) */
1158    uint64_t total_size = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
1159    if (did_read < total_size) {
1160        memset((char*)vec.buf + did_read, 0, total_size - did_read);
1161    }
1162
1163	kr = ubc_upl_unmap(pl); data = NULL; /* make sure to set data to NULL so we don't try to unmap again below */
1164    if (kr != KERN_SUCCESS)
1165        ErrorLog("ubc_upl_unmap error %d\n", (int)kr);
1166    else {
1167        if (!abort_pagein) {
1168            /* commit our pages */
1169			kr = commit_upl(pl, pl_offset, total_size, UPL_COMMIT_FREE_ON_EMPTY, 0);
1170        }
1171    }
1172
1173out:
1174	if (data) ubc_upl_unmap(pl);
1175    if (hdr) FREE(hdr, M_TEMP);
1176	if (cmpdata_locked) decmpfs_unlock_compressed_data(cp, 0);
1177    if (err)
1178        ErrorLog("err %d\n", err);
1179
1180	return err;
1181}
1182
1183errno_t
1184decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_cnode *cp)
1185{
1186    /* handles a read request from vfs for a compressed file */
1187
1188    uio_t uio                    = ap->a_uio;
1189    vnode_t vp                   = ap->a_vp;
1190    int err                      = 0;
1191    int countInt                 = 0;
1192    off_t uplPos                 = 0;
1193    user_ssize_t uplSize         = 0;
1194    user_ssize_t uplRemaining    = 0;
1195    off_t curUplPos              = 0;
1196    user_ssize_t curUplSize      = 0;
1197    kern_return_t kr             = KERN_SUCCESS;
1198    int abort_read               = 0;
1199    void *data                   = NULL;
1200    uint64_t did_read            = 0;
1201    upl_t upl                    = NULL;
1202    upl_page_info_t *pli         = NULL;
1203    decmpfs_header *hdr          = NULL;
1204    uint64_t cachedSize          = 0;
1205    off_t uioPos                 = 0;
1206    user_ssize_t uioRemaining    = 0;
1207	int cmpdata_locked           = 0;
1208
1209	decmpfs_lock_compressed_data(cp, 0); cmpdata_locked = 1;
1210
1211    uplPos = uio_offset(uio);
1212    uplSize = uio_resid(uio);
1213    VerboseLog("uplPos %lld uplSize %lld\n", uplPos, uplSize);
1214
1215    cachedSize = decmpfs_cnode_get_vnode_cached_size(cp);
1216
1217    if ((uint64_t)uplPos + uplSize > cachedSize) {
1218        /* truncate the read to the size of the file */
1219        uplSize = cachedSize - uplPos;
1220    }
1221
1222    /* give the cluster layer a chance to fill in whatever it already has */
1223    countInt = (uplSize > INT_MAX) ? INT_MAX : uplSize;
1224    err = cluster_copy_ubc_data(vp, uio, &countInt, 0);
1225    if (err != 0)
1226        goto out;
1227
1228    /* figure out what's left */
1229    uioPos = uio_offset(uio);
1230    uioRemaining = uio_resid(uio);
1231    if ((uint64_t)uioPos + uioRemaining > cachedSize) {
1232        /* truncate the read to the size of the file */
1233        uioRemaining = cachedSize - uioPos;
1234    }
1235
1236    if (uioRemaining <= 0) {
1237        /* nothing left */
1238        goto out;
1239    }
1240
1241    err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0);
1242    if (err != 0) {
1243        goto out;
1244    }
1245    if (!compression_type_valid(hdr)) {
1246        err = ENOTSUP;
1247        goto out;
1248    }
1249
1250    uplPos = uioPos;
1251    uplSize = uioRemaining;
1252#if COMPRESSION_DEBUG
1253    char path[PATH_MAX];
1254    DebugLog("%s: uplPos %lld uplSize %lld\n", vnpath(vp, path, sizeof(path)), (uint64_t)uplPos, (uint64_t)uplSize);
1255#endif
1256
1257    lck_rw_lock_shared(decompressorsLock);
1258    decmpfs_adjust_fetch_region_func adjust_fetch = decmp_get_func(hdr->compression_type, adjust_fetch);
1259    if (adjust_fetch) {
1260        /* give the compressor a chance to adjust the portion of the file that we read */
1261		adjust_fetch(vp, decmpfs_ctx, hdr, &uplPos, &uplSize);
1262        VerboseLog("adjusted uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize);
1263    }
1264    lck_rw_unlock_shared(decompressorsLock);
1265
1266    /* clip the adjusted size to the size of the file */
1267    if ((uint64_t)uplPos + uplSize > cachedSize) {
1268        /* truncate the read to the size of the file */
1269        uplSize = cachedSize - uplPos;
1270    }
1271
1272    if (uplSize <= 0) {
1273        /* nothing left */
1274        goto out;
1275    }
1276
1277    /*
1278     since we're going to create a upl for the given region of the file,
1279     make sure we're on page boundaries
1280     */
1281
1282    if (uplPos & (PAGE_SIZE - 1)) {
1283        /* round position down to page boundary */
1284        uplSize += (uplPos & (PAGE_SIZE - 1));
1285        uplPos &= ~(PAGE_SIZE - 1);
1286    }
1287    /* round size up to page multiple */
1288    uplSize = (uplSize + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
1289
1290    VerboseLog("new uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize);
1291
1292    uplRemaining = uplSize;
1293    curUplPos = uplPos;
1294    curUplSize = 0;
1295
1296    while(uplRemaining > 0) {
1297        /* start after the last upl */
1298        curUplPos += curUplSize;
1299
1300        /* clip to max upl size */
1301        curUplSize = uplRemaining;
1302        if (curUplSize > MAX_UPL_SIZE * PAGE_SIZE) {
1303            curUplSize = MAX_UPL_SIZE * PAGE_SIZE;
1304        }
1305
1306        /* create the upl */
1307        kr = ubc_create_upl(vp, curUplPos, curUplSize, &upl, &pli, UPL_SET_LITE);
1308        if (kr != KERN_SUCCESS) {
1309            ErrorLog("ubc_create_upl error %d\n", (int)kr);
1310            err = EINVAL;
1311            goto out;
1312        }
1313        VerboseLog("curUplPos %lld curUplSize %lld\n", (uint64_t)curUplPos, (uint64_t)curUplSize);
1314
1315        /* map the upl */
1316        kr = ubc_upl_map(upl, (vm_offset_t*)&data);
1317        if (kr != KERN_SUCCESS) {
1318            ErrorLog("ubc_upl_map error %d\n", (int)kr);
1319            err = EINVAL;
1320            goto out;
1321        }
1322
1323        /* make sure the map succeeded */
1324        if (!data) {
1325            ErrorLog("ubc_upl_map mapped null\n");
1326            err = EINVAL;
1327            goto out;
1328        }
1329
1330        /* fetch uncompressed data into the mapped upl */
1331        decmpfs_vector vec;
1332    decompress:
1333        vec = (decmpfs_vector){ .buf = data, .size = curUplSize };
1334        err = decmpfs_fetch_uncompressed_data(vp, cp, hdr, curUplPos, curUplSize, 1, &vec, &did_read);
1335        if (err) {
1336            ErrorLog("decmpfs_fetch_uncompressed_data err %d\n", err);
1337
1338            /* maybe the file is converting to decompressed */
1339            int cmp_state = decmpfs_fast_get_state(cp);
1340            if (cmp_state == FILE_IS_CONVERTING) {
1341                ErrorLog("cmp_state == FILE_IS_CONVERTING\n");
1342                cmp_state = wait_for_decompress(cp);
1343                if (cmp_state == FILE_IS_COMPRESSED) {
1344                    ErrorLog("cmp_state == FILE_IS_COMPRESSED\n");
1345                    /* a decompress was attempted but it failed, let's try fetching again */
1346                    goto decompress;
1347                }
1348            }
1349            if (cmp_state == FILE_IS_NOT_COMPRESSED) {
1350                ErrorLog("cmp_state == FILE_IS_NOT_COMPRESSED\n");
1351                /* the file was decompressed after we started reading it */
1352                abort_read = 1;     /* we're not going to commit our data */
1353                *is_compressed = 0; /* instruct caller to fall back to its normal path */
1354            }
1355            kr = KERN_FAILURE;
1356            did_read = 0;
1357        }
1358        /* zero out the remainder of the last page */
1359        memset((char*)data + did_read, 0, curUplSize - did_read);
1360        kr = ubc_upl_unmap(upl);
1361        if (kr == KERN_SUCCESS) {
1362            if (abort_read) {
1363				kr = commit_upl(upl, 0, curUplSize, UPL_ABORT_FREE_ON_EMPTY, 1);
1364            } else {
1365                VerboseLog("uioPos %lld uioRemaining %lld\n", (uint64_t)uioPos, (uint64_t)uioRemaining);
1366                if (uioRemaining) {
1367                    off_t uplOff = uioPos - curUplPos;
1368                    if (uplOff < 0) {
1369                        ErrorLog("uplOff %lld should never be negative\n", (int64_t)uplOff);
1370                        err = EINVAL;
1371                    } else {
1372                        off_t count = curUplPos + curUplSize - uioPos;
1373                        if (count < 0) {
1374                            /* this upl is entirely before the uio */
1375                        } else {
1376                            if (count > uioRemaining)
1377                                count = uioRemaining;
1378                            int io_resid = count;
1379                            err = cluster_copy_upl_data(uio, upl, uplOff, &io_resid);
1380                            int copied = count - io_resid;
1381                            VerboseLog("uplOff %lld count %lld copied %lld\n", (uint64_t)uplOff, (uint64_t)count, (uint64_t)copied);
1382                            if (err) {
1383                                ErrorLog("cluster_copy_upl_data err %d\n", err);
1384                            }
1385                            uioPos += copied;
1386                            uioRemaining -= copied;
1387                        }
1388                    }
1389                }
1390				kr = commit_upl(upl, 0, curUplSize, UPL_COMMIT_FREE_ON_EMPTY | UPL_COMMIT_INACTIVATE, 0);
1391                if (err) {
1392                    goto out;
1393                }
1394            }
1395        } else {
1396            ErrorLog("ubc_upl_unmap error %d\n", (int)kr);
1397        }
1398
1399        uplRemaining -= curUplSize;
1400    }
1401
1402out:
1403    if (hdr) FREE(hdr, M_TEMP);
1404	if (cmpdata_locked) decmpfs_unlock_compressed_data(cp, 0);
1405    if (err) {/* something went wrong */
1406        ErrorLog("err %d\n", err);
1407        return err;
1408    }
1409
1410#if COMPRESSION_DEBUG
1411    uplSize = uio_resid(uio);
1412    if (uplSize)
1413        VerboseLog("still %lld bytes to copy\n", uplSize);
1414#endif
1415    return 0;
1416}
1417
1418int
1419decmpfs_free_compressed_data(vnode_t vp, decmpfs_cnode *cp)
1420{
1421    /*
1422     call out to the decompressor to free remove any data associated with this compressed file
1423     then delete the file's compression xattr
1424     */
1425
1426    decmpfs_header *hdr = NULL;
1427    int err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0);
1428    if (err) {
1429        ErrorLog("decmpfs_fetch_compressed_header err %d\n", err);
1430    } else {
1431        lck_rw_lock_shared(decompressorsLock);
1432        decmpfs_free_compressed_data_func free_data = decmp_get_func(hdr->compression_type, free_data);
1433        if (free_data) {
1434			err = free_data(vp, decmpfs_ctx, hdr);
1435        } else {
1436            /* nothing to do, so no error */
1437            err = 0;
1438        }
1439        lck_rw_unlock_shared(decompressorsLock);
1440
1441        if (err != 0) {
1442            ErrorLog("decompressor err %d\n", err);
1443        }
1444    }
1445
1446    /* delete the xattr */
1447	err = vn_removexattr(vp, DECMPFS_XATTR_NAME, 0, decmpfs_ctx);
1448    if (err != 0) {
1449        goto out;
1450    }
1451
1452out:
1453    if (hdr) FREE(hdr, M_TEMP);
1454    return err;
1455}
1456
1457#pragma mark --- file conversion routines ---
1458
1459static int
1460unset_compressed_flag(vnode_t vp)
1461{
1462    int err = 0;
1463    struct vnode_attr va;
1464    int new_bsdflags = 0;
1465
1466    VATTR_INIT(&va);
1467    VATTR_WANTED(&va, va_flags);
1468	err = vnode_getattr(vp, &va, decmpfs_ctx);
1469
1470    if (err != 0) {
1471        ErrorLog("vnode_getattr err %d\n", err);
1472    } else {
1473        new_bsdflags = va.va_flags & ~UF_COMPRESSED;
1474
1475        VATTR_INIT(&va);
1476        VATTR_SET(&va, va_flags, new_bsdflags);
1477		err = vnode_setattr(vp, &va, decmpfs_ctx);
1478        if (err != 0) {
1479            ErrorLog("vnode_setattr err %d\n", err);
1480        }
1481    }
1482    return err;
1483}
1484
1485int
1486decmpfs_decompress_file(vnode_t vp, decmpfs_cnode *cp, off_t toSize, int truncate_okay, int skiplock)
1487{
1488	/* convert a compressed file to an uncompressed file */
1489
1490	int err                      = 0;
1491	char *data                   = NULL;
1492	uio_t uio_w                  = 0;
1493	off_t offset                 = 0;
1494	uint32_t old_state           = 0;
1495	uint32_t new_state           = 0;
1496	int update_file_state        = 0;
1497	int allocSize                = 0;
1498	decmpfs_header *hdr = NULL;
1499	int cmpdata_locked           = 0;
1500	off_t remaining              = 0;
1501	uint64_t uncompressed_size   = 0;
1502
1503	if (!skiplock) {
1504		decmpfs_lock_compressed_data(cp, 1); cmpdata_locked = 1;
1505	}
1506
1507decompress:
1508	old_state = decmpfs_fast_get_state(cp);
1509
1510	switch(old_state) {
1511		case FILE_IS_NOT_COMPRESSED:
1512		{
1513			/* someone else decompressed the file */
1514			err = 0;
1515			goto out;
1516		}
1517
1518		case FILE_TYPE_UNKNOWN:
1519		{
1520			/* the file is in an unknown state, so update the state and retry */
1521			(void)decmpfs_file_is_compressed(vp, cp);
1522
1523			/* try again */
1524			goto decompress;
1525		}
1526
1527		case FILE_IS_COMPRESSED:
1528		{
1529			/* the file is compressed, so decompress it */
1530			break;
1531		}
1532
1533		default:
1534		{
1535			/*
1536			 this shouldn't happen since multiple calls to decmpfs_decompress_file lock each other out,
1537			 and when decmpfs_decompress_file returns, the state should be always be set back to
1538			 FILE_IS_NOT_COMPRESSED or FILE_IS_UNKNOWN
1539			 */
1540			err = EINVAL;
1541			goto out;
1542		}
1543	}
1544
1545    err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0);
1546	if (err != 0) {
1547		goto out;
1548	}
1549
1550	uncompressed_size = hdr->uncompressed_size;
1551	if (toSize == -1)
1552		toSize = hdr->uncompressed_size;
1553
1554	if (toSize == 0) {
1555		/* special case truncating the file to zero bytes */
1556		goto nodecmp;
1557	} else if ((uint64_t)toSize > hdr->uncompressed_size) {
1558		/* the caller is trying to grow the file, so we should decompress all the data */
1559		toSize = hdr->uncompressed_size;
1560	}
1561
1562	allocSize = MIN(64*1024, toSize);
1563	MALLOC(data, char *, allocSize, M_TEMP, M_WAITOK);
1564	if (!data) {
1565		err = ENOMEM;
1566		goto out;
1567	}
1568
1569	uio_w = uio_create(1, 0LL, UIO_SYSSPACE, UIO_WRITE);
1570	if (!uio_w) {
1571		err = ENOMEM;
1572		goto out;
1573	}
1574	uio_w->uio_flags |= UIO_FLAGS_IS_COMPRESSED_FILE;
1575
1576	remaining = toSize;
1577
1578	/* tell the buffer cache that this is an empty file */
1579	ubc_setsize(vp, 0);
1580
1581	/* if we got here, we need to decompress the file */
1582	decmpfs_cnode_set_vnode_state(cp, FILE_IS_CONVERTING, 1);
1583
1584	while(remaining > 0) {
1585		/* loop decompressing data from the file and writing it into the data fork */
1586
1587		uint64_t bytes_read = 0;
1588		decmpfs_vector vec = { .buf = data, .size = MIN(allocSize, remaining) };
1589		err = decmpfs_fetch_uncompressed_data(vp, cp, hdr, offset, vec.size, 1, &vec, &bytes_read);
1590		if (err != 0) {
1591			ErrorLog("decmpfs_fetch_uncompressed_data err %d\n", err);
1592			goto out;
1593		}
1594
1595		if (bytes_read == 0) {
1596			/* we're done reading data */
1597			break;
1598		}
1599
1600		uio_reset(uio_w, offset, UIO_SYSSPACE, UIO_WRITE);
1601		err = uio_addiov(uio_w, CAST_USER_ADDR_T(data), bytes_read);
1602		if (err != 0) {
1603			ErrorLog("uio_addiov err %d\n", err);
1604			err = ENOMEM;
1605			goto out;
1606		}
1607
1608		err = VNOP_WRITE(vp, uio_w, 0, decmpfs_ctx);
1609		if (err != 0) {
1610			/* if the write failed, truncate the file to zero bytes */
1611			ErrorLog("VNOP_WRITE err %d\n", err);
1612			break;
1613		}
1614		offset += bytes_read;
1615		remaining -= bytes_read;
1616	}
1617
1618	if (err == 0) {
1619		if (offset != toSize) {
1620			ErrorLog("file decompressed to %lld instead of %lld\n", offset, toSize);
1621			err = EINVAL;
1622			goto out;
1623		}
1624	}
1625
1626	if (err == 0) {
1627		/* sync the data and metadata */
1628		err = VNOP_FSYNC(vp, MNT_WAIT, decmpfs_ctx);
1629		if (err != 0) {
1630			ErrorLog("VNOP_FSYNC err %d\n", err);
1631			goto out;
1632		}
1633	}
1634
1635	if (err != 0) {
1636		/* write, setattr, or fsync failed */
1637		ErrorLog("aborting decompress, err %d\n", err);
1638		if (truncate_okay) {
1639			/* truncate anything we might have written */
1640			int error = vnode_setsize(vp, 0, 0, decmpfs_ctx);
1641			ErrorLog("vnode_setsize err %d\n", error);
1642		}
1643		goto out;
1644	}
1645
1646nodecmp:
1647	/* if we're truncating the file to zero bytes, we'll skip ahead to here */
1648
1649	/* unset the compressed flag */
1650	unset_compressed_flag(vp);
1651
1652	/* free the compressed data associated with this file */
1653	err = decmpfs_free_compressed_data(vp, cp);
1654	if (err != 0) {
1655		ErrorLog("decmpfs_free_compressed_data err %d\n", err);
1656	}
1657
1658	/*
1659	 even if free_compressed_data or vnode_getattr/vnode_setattr failed, return success
1660	 since we succeeded in writing all of the file data to the data fork
1661	 */
1662	err = 0;
1663
1664	/* if we got this far, the file was successfully decompressed */
1665	update_file_state = 1;
1666	new_state = FILE_IS_NOT_COMPRESSED;
1667
1668#if COMPRESSION_DEBUG
1669	{
1670		uint64_t filesize = 0;
1671		vnsize(vp, &filesize);
1672		DebugLog("new file size %lld\n", filesize);
1673	}
1674#endif
1675
1676out:
1677	if (hdr) FREE(hdr, M_TEMP);
1678	if (data) FREE(data, M_TEMP);
1679	if (uio_w) uio_free(uio_w);
1680
1681	if (err != 0) {
1682		/* if there was a failure, reset compression flags to unknown and clear the buffer cache data */
1683		update_file_state = 1;
1684		new_state = FILE_TYPE_UNKNOWN;
1685		if (uncompressed_size) {
1686			ubc_setsize(vp, 0);
1687			ubc_setsize(vp, uncompressed_size);
1688        }
1689	}
1690
1691	if (update_file_state) {
1692		lck_mtx_lock(decompress_channel_mtx);
1693		decmpfs_cnode_set_vnode_state(cp, new_state, 1);
1694		wakeup((caddr_t)&decompress_channel); /* wake up anyone who might have been waiting for decompression */
1695		lck_mtx_unlock(decompress_channel_mtx);
1696	}
1697
1698	if (cmpdata_locked) decmpfs_unlock_compressed_data(cp, 1);
1699
1700	return err;
1701}
1702
1703#pragma mark --- Type1 compressor ---
1704
1705/*
1706 The "Type1" compressor stores the data fork directly in the compression xattr
1707 */
1708
1709static int
1710decmpfs_validate_compressed_file_Type1(__unused vnode_t vp, __unused vfs_context_t ctx, decmpfs_header *hdr)
1711{
1712    int err          = 0;
1713
1714    if (hdr->uncompressed_size + sizeof(decmpfs_disk_header) != (uint64_t)hdr->attr_size) {
1715        err = EINVAL;
1716        goto out;
1717    }
1718out:
1719    return err;
1720}
1721
1722static int
1723decmpfs_fetch_uncompressed_data_Type1(__unused vnode_t vp, __unused vfs_context_t ctx, decmpfs_header *hdr, off_t offset, user_ssize_t size, int nvec, decmpfs_vector *vec, uint64_t *bytes_read)
1724{
1725    int err          = 0;
1726    int i;
1727    user_ssize_t remaining;
1728
1729    if (hdr->uncompressed_size + sizeof(decmpfs_disk_header) != (uint64_t)hdr->attr_size) {
1730        err = EINVAL;
1731        goto out;
1732    }
1733
1734#if COMPRESSION_DEBUG
1735    static int dummy = 0; // prevent syslog from coalescing printfs
1736    char path[PATH_MAX];
1737    DebugLog("%s: %d memcpy %lld at %lld\n", vnpath(vp, path, sizeof(path)), dummy++, size, (uint64_t)offset);
1738#endif
1739
1740    remaining = size;
1741    for (i = 0; (i < nvec) && (remaining > 0); i++) {
1742        user_ssize_t curCopy = vec[i].size;
1743        if (curCopy > remaining)
1744            curCopy = remaining;
1745        memcpy(vec[i].buf, hdr->attr_bytes + offset, curCopy);
1746        offset += curCopy;
1747        remaining -= curCopy;
1748    }
1749
1750    if ((bytes_read) && (err == 0))
1751        *bytes_read = (size - remaining);
1752
1753out:
1754    return err;
1755}
1756
1757static decmpfs_registration Type1Reg =
1758{
1759    .decmpfs_registration = DECMPFS_REGISTRATION_VERSION,
1760    .validate          = decmpfs_validate_compressed_file_Type1,
1761    .adjust_fetch      = NULL, /* no adjust necessary */
1762    .fetch             = decmpfs_fetch_uncompressed_data_Type1,
1763    .free_data         = NULL, /* no free necessary */
1764    .get_flags         = NULL  /* no flags */
1765};
1766
1767#pragma mark --- decmpfs initialization ---
1768
1769void decmpfs_init()
1770{
1771    static int done = 0;
1772    if (done) return;
1773
1774	decmpfs_ctx = vfs_context_create(vfs_context_kernel());
1775
1776    lck_grp_attr_t *attr = lck_grp_attr_alloc_init();
1777    decmpfs_lockgrp = lck_grp_alloc_init("VFSCOMP",  attr);
1778    decompressorsLock = lck_rw_alloc_init(decmpfs_lockgrp, NULL);
1779    decompress_channel_mtx = lck_mtx_alloc_init(decmpfs_lockgrp, NULL);
1780
1781    register_decmpfs_decompressor(CMP_Type1, &Type1Reg);
1782
1783    done = 1;
1784}
1785#endif /* HFS_COMPRESSION */
1786