1/*
2 * Copyright (c) 1999-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 *	File:	ubc_subr.c
30 *	Author:	Umesh Vaishampayan [umeshv@apple.com]
31 *		05-Aug-1999	umeshv	Created.
32 *
33 *	Functions related to Unified Buffer cache.
34 *
35 * Caller of UBC functions MUST have a valid reference on the vnode.
36 *
37 */
38
39#include <sys/types.h>
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/lock.h>
43#include <sys/mman.h>
44#include <sys/mount_internal.h>
45#include <sys/vnode_internal.h>
46#include <sys/ubc_internal.h>
47#include <sys/ucred.h>
48#include <sys/proc_internal.h>
49#include <sys/kauth.h>
50#include <sys/buf.h>
51#include <sys/user.h>
52#include <sys/codesign.h>
53
54#include <mach/mach_types.h>
55#include <mach/memory_object_types.h>
56#include <mach/memory_object_control.h>
57#include <mach/vm_map.h>
58#include <mach/upl.h>
59
60#include <kern/kern_types.h>
61#include <kern/kalloc.h>
62#include <kern/zalloc.h>
63#include <kern/thread.h>
64#include <vm/vm_kern.h>
65#include <vm/vm_protos.h> /* last */
66
67#include <libkern/crypto/sha1.h>
68
69#include <security/mac_framework.h>
70
71/* XXX These should be in a BSD accessible Mach header, but aren't. */
72extern kern_return_t memory_object_pages_resident(memory_object_control_t,
73							boolean_t *);
74extern kern_return_t	memory_object_signed(memory_object_control_t control,
75					     boolean_t is_signed);
76extern void Debugger(const char *message);
77
78
79/* XXX no one uses this interface! */
80kern_return_t ubc_page_op_with_control(
81	memory_object_control_t	 control,
82	off_t		         f_offset,
83	int		         ops,
84	ppnum_t	                 *phys_entryp,
85	int		         *flagsp);
86
87
88#if DIAGNOSTIC
89#if defined(assert)
90#undef assert()
91#endif
92#define assert(cond)    \
93    ((void) ((cond) ? 0 : panic("Assert failed: %s", # cond)))
94#else
95#include <kern/assert.h>
96#endif /* DIAGNOSTIC */
97
98static int ubc_info_init_internal(struct vnode *vp, int withfsize, off_t filesize);
99static int ubc_umcallback(vnode_t, void *);
100static int ubc_msync_internal(vnode_t, off_t, off_t, off_t *, int, int *);
101static void ubc_cs_free(struct ubc_info *uip);
102
103struct zone	*ubc_info_zone;
104
105
106/*
107 * CODESIGNING
108 * Routines to navigate code signing data structures in the kernel...
109 */
110static boolean_t
111cs_valid_range(
112	const void *start,
113	const void *end,
114	const void *lower_bound,
115	const void *upper_bound)
116{
117	if (upper_bound < lower_bound ||
118	    end < start) {
119		return FALSE;
120	}
121
122	if (start < lower_bound ||
123	    end > upper_bound) {
124		return FALSE;
125	}
126
127	return TRUE;
128}
129
130/*
131 * Magic numbers used by Code Signing
132 */
133enum {
134	CSMAGIC_REQUIREMENT = 0xfade0c00,		/* single Requirement blob */
135	CSMAGIC_REQUIREMENTS = 0xfade0c01,		/* Requirements vector (internal requirements) */
136	CSMAGIC_CODEDIRECTORY = 0xfade0c02,		/* CodeDirectory blob */
137	CSMAGIC_EMBEDDED_SIGNATURE = 0xfade0cc0, /* embedded form of signature data */
138	CSMAGIC_EMBEDDED_SIGNATURE_OLD = 0xfade0b02,	/* XXX */
139	CSMAGIC_DETACHED_SIGNATURE = 0xfade0cc1, /* multi-arch collection of embedded signatures */
140
141	CSSLOT_CODEDIRECTORY = 0,				/* slot index for CodeDirectory */
142};
143
144
145/*
146 * Structure of an embedded-signature SuperBlob
147 */
148typedef struct __BlobIndex {
149	uint32_t type;					/* type of entry */
150	uint32_t offset;				/* offset of entry */
151} CS_BlobIndex;
152
153typedef struct __SuperBlob {
154	uint32_t magic;					/* magic number */
155	uint32_t length;				/* total length of SuperBlob */
156	uint32_t count;					/* number of index entries following */
157	CS_BlobIndex index[];			/* (count) entries */
158	/* followed by Blobs in no particular order as indicated by offsets in index */
159} CS_SuperBlob;
160
161
162/*
163 * C form of a CodeDirectory.
164 */
165typedef struct __CodeDirectory {
166	uint32_t magic;					/* magic number (CSMAGIC_CODEDIRECTORY) */
167	uint32_t length;				/* total length of CodeDirectory blob */
168	uint32_t version;				/* compatibility version */
169	uint32_t flags;					/* setup and mode flags */
170	uint32_t hashOffset;			/* offset of hash slot element at index zero */
171	uint32_t identOffset;			/* offset of identifier string */
172	uint32_t nSpecialSlots;			/* number of special hash slots */
173	uint32_t nCodeSlots;			/* number of ordinary (code) hash slots */
174	uint32_t codeLimit;				/* limit to main image signature range */
175	uint8_t hashSize;				/* size of each hash in bytes */
176	uint8_t hashType;				/* type of hash (cdHashType* constants) */
177	uint8_t spare1;					/* unused (must be zero) */
178	uint8_t	pageSize;				/* log2(page size in bytes); 0 => infinite */
179	uint32_t spare2;				/* unused (must be zero) */
180	/* followed by dynamic content as located by offset fields above */
181} CS_CodeDirectory;
182
183
184/*
185 * Locate the CodeDirectory from an embedded signature blob
186 */
187static const
188CS_CodeDirectory *findCodeDirectory(
189	const CS_SuperBlob *embedded,
190	char *lower_bound,
191	char *upper_bound)
192{
193	const CS_CodeDirectory *cd = NULL;
194
195	if (embedded &&
196	    cs_valid_range(embedded, embedded + 1, lower_bound, upper_bound) &&
197	    ntohl(embedded->magic) == CSMAGIC_EMBEDDED_SIGNATURE) {
198		const CS_BlobIndex *limit;
199		const CS_BlobIndex *p;
200
201		limit = &embedded->index[ntohl(embedded->count)];
202		if (!cs_valid_range(&embedded->index[0], limit,
203				    lower_bound, upper_bound)) {
204			return NULL;
205		}
206		for (p = embedded->index; p < limit; ++p) {
207			if (ntohl(p->type) == CSSLOT_CODEDIRECTORY) {
208				const unsigned char *base;
209
210				base = (const unsigned char *)embedded;
211				cd = (const CS_CodeDirectory *)(base + ntohl(p->offset));
212				break;
213			}
214		}
215	} else {
216		/*
217		 * Detached signatures come as a bare CS_CodeDirectory,
218		 * without a blob.
219		 */
220		cd = (const CS_CodeDirectory *) embedded;
221	}
222	if (cd &&
223	    cs_valid_range(cd, cd + 1, lower_bound, upper_bound) &&
224	    cs_valid_range(cd, (const char *) cd + ntohl(cd->length),
225			   lower_bound, upper_bound) &&
226	    cs_valid_range(cd, (const char *) cd + ntohl(cd->hashOffset),
227			   lower_bound, upper_bound) &&
228	    cs_valid_range(cd, (const char *) cd +
229			   ntohl(cd->hashOffset) +
230			   (ntohl(cd->nCodeSlots) * SHA1_RESULTLEN),
231			   lower_bound, upper_bound) &&
232
233	    ntohl(cd->magic) == CSMAGIC_CODEDIRECTORY) {
234		return cd;
235	}
236
237	// not found or not a valid code directory
238	return NULL;
239}
240
241
242/*
243 * Locating a page hash
244 */
245static const unsigned char *
246hashes(
247	const CS_CodeDirectory *cd,
248	unsigned page,
249	char *lower_bound,
250	char *upper_bound)
251{
252	const unsigned char *base, *top, *hash;
253	uint32_t nCodeSlots;
254
255	assert(cs_valid_range(cd, cd + 1, lower_bound, upper_bound));
256
257	base = (const unsigned char *)cd + ntohl(cd->hashOffset);
258	nCodeSlots = ntohl(cd->nCodeSlots);
259	top = base + nCodeSlots * SHA1_RESULTLEN;
260	if (!cs_valid_range(base, top,
261			    lower_bound, upper_bound) ||
262	    page > nCodeSlots) {
263		return NULL;
264	}
265	assert(page < nCodeSlots);
266
267	hash = base + page * SHA1_RESULTLEN;
268	if (!cs_valid_range(hash, hash + SHA1_RESULTLEN,
269			    lower_bound, upper_bound)) {
270		hash = NULL;
271	}
272
273	return hash;
274}
275/*
276 * CODESIGNING
277 * End of routines to navigate code signing data structures in the kernel.
278 */
279
280
281/*
282 * ubc_init
283 *
284 * Initialization of the zone for Unified Buffer Cache.
285 *
286 * Parameters:	(void)
287 *
288 * Returns:	(void)
289 *
290 * Implicit returns:
291 *		ubc_info_zone(global)	initialized for subsequent allocations
292 */
293__private_extern__ void
294ubc_init(void)
295{
296	int	i;
297
298	i = (vm_size_t) sizeof (struct ubc_info);
299
300	ubc_info_zone = zinit (i, 10000*i, 8192, "ubc_info zone");
301}
302
303
304/*
305 * ubc_info_init
306 *
307 * Allocate and attach an empty ubc_info structure to a vnode
308 *
309 * Parameters:	vp			Pointer to the vnode
310 *
311 * Returns:	0			Success
312 *	vnode_size:ENOMEM		Not enough space
313 *	vnode_size:???			Other error from vnode_getattr
314 *
315 */
316int
317ubc_info_init(struct vnode *vp)
318{
319	return(ubc_info_init_internal(vp, 0, 0));
320}
321
322
323/*
324 * ubc_info_init_withsize
325 *
326 * Allocate and attach a sized ubc_info structure to a vnode
327 *
328 * Parameters:	vp			Pointer to the vnode
329 *		filesize		The size of the file
330 *
331 * Returns:	0			Success
332 *	vnode_size:ENOMEM		Not enough space
333 *	vnode_size:???			Other error from vnode_getattr
334 */
335int
336ubc_info_init_withsize(struct vnode *vp, off_t filesize)
337{
338	return(ubc_info_init_internal(vp, 1, filesize));
339}
340
341
342/*
343 * ubc_info_init_internal
344 *
345 * Allocate and attach a ubc_info structure to a vnode
346 *
347 * Parameters:	vp			Pointer to the vnode
348 *		withfsize{0,1}		Zero if the size should be obtained
349 *					from the vnode; otherwise, use filesize
350 *		filesize		The size of the file, if withfsize == 1
351 *
352 * Returns:	0			Success
353 *	vnode_size:ENOMEM		Not enough space
354 *	vnode_size:???			Other error from vnode_getattr
355 *
356 * Notes:	We call a blocking zalloc(), and the zone was created as an
357 *		expandable and collectable zone, so if no memory is available,
358 *		it is possible for zalloc() to block indefinitely.  zalloc()
359 *		may also panic if the zone of zones is exhausted, since it's
360 *		NOT expandable.
361 *
362 *		We unconditionally call vnode_pager_setup(), even if this is
363 *		a reuse of a ubc_info; in that case, we should probably assert
364 *		that it does not already have a pager association, but do not.
365 *
366 *		Since memory_object_create_named() can only fail from receiving
367 *		an invalid pager argument, the explicit check and panic is
368 *		merely precautionary.
369 */
370static int
371ubc_info_init_internal(vnode_t vp, int withfsize, off_t filesize)
372{
373	register struct ubc_info	*uip;
374	void *  pager;
375	int error = 0;
376	kern_return_t kret;
377	memory_object_control_t control;
378
379	uip = vp->v_ubcinfo;
380
381	/*
382	 * If there is not already a ubc_info attached to the vnode, we
383	 * attach one; otherwise, we will reuse the one that's there.
384	 */
385	if (uip == UBC_INFO_NULL) {
386
387		uip = (struct ubc_info *) zalloc(ubc_info_zone);
388		bzero((char *)uip, sizeof(struct ubc_info));
389
390		uip->ui_vnode = vp;
391		uip->ui_flags = UI_INITED;
392		uip->ui_ucred = NOCRED;
393	}
394	assert(uip->ui_flags != UI_NONE);
395	assert(uip->ui_vnode == vp);
396
397	/* now set this ubc_info in the vnode */
398	vp->v_ubcinfo = uip;
399
400	/*
401	 * Allocate a pager object for this vnode
402	 *
403	 * XXX The value of the pager parameter is currently ignored.
404	 * XXX Presumably, this API changed to avoid the race between
405	 * XXX setting the pager and the UI_HASPAGER flag.
406	 */
407	pager = (void *)vnode_pager_setup(vp, uip->ui_pager);
408	assert(pager);
409
410	/*
411	 * Explicitly set the pager into the ubc_info, after setting the
412	 * UI_HASPAGER flag.
413	 */
414	SET(uip->ui_flags, UI_HASPAGER);
415	uip->ui_pager = pager;
416
417	/*
418	 * Note: We can not use VNOP_GETATTR() to get accurate
419	 * value of ui_size because this may be an NFS vnode, and
420	 * nfs_getattr() can call vinvalbuf(); if this happens,
421	 * ubc_info is not set up to deal with that event.
422	 * So use bogus size.
423	 */
424
425	/*
426	 * create a vnode - vm_object association
427	 * memory_object_create_named() creates a "named" reference on the
428	 * memory object we hold this reference as long as the vnode is
429	 * "alive."  Since memory_object_create_named() took its own reference
430	 * on the vnode pager we passed it, we can drop the reference
431	 * vnode_pager_setup() returned here.
432	 */
433	kret = memory_object_create_named(pager,
434		(memory_object_size_t)uip->ui_size, &control);
435	vnode_pager_deallocate(pager);
436	if (kret != KERN_SUCCESS)
437		panic("ubc_info_init: memory_object_create_named returned %d", kret);
438
439	assert(control);
440	uip->ui_control = control;	/* cache the value of the mo control */
441	SET(uip->ui_flags, UI_HASOBJREF);	/* with a named reference */
442
443	if (withfsize == 0) {
444		/* initialize the size */
445		error = vnode_size(vp, &uip->ui_size, vfs_context_current());
446		if (error)
447			uip->ui_size = 0;
448	} else {
449		uip->ui_size = filesize;
450	}
451	vp->v_lflag |= VNAMED_UBC;	/* vnode has a named ubc reference */
452
453	return (error);
454}
455
456
457/*
458 * ubc_info_free
459 *
460 * Free a ubc_info structure
461 *
462 * Parameters:	uip			A pointer to the ubc_info to free
463 *
464 * Returns:	(void)
465 *
466 * Notes:	If there is a credential that has subsequently been associated
467 *		with the ubc_info via a call to ubc_setcred(), the reference
468 *		to the credential is dropped.
469 *
470 *		It's actually impossible for a ubc_info.ui_control to take the
471 *		value MEMORY_OBJECT_CONTROL_NULL.
472 */
473static void
474ubc_info_free(struct ubc_info *uip)
475{
476	if (IS_VALID_CRED(uip->ui_ucred)) {
477		kauth_cred_unref(&uip->ui_ucred);
478	}
479
480	if (uip->ui_control != MEMORY_OBJECT_CONTROL_NULL)
481		memory_object_control_deallocate(uip->ui_control);
482
483	cluster_release(uip);
484	ubc_cs_free(uip);
485
486	zfree(ubc_info_zone, uip);
487	return;
488}
489
490
491void
492ubc_info_deallocate(struct ubc_info *uip)
493{
494        ubc_info_free(uip);
495}
496
497
498/*
499 * ubc_setsize
500 *
501 * Tell the  VM that the the size of the file represented by the vnode has
502 * changed
503 *
504 * Parameters:	vp			The vp whose backing file size is
505 *					being changed
506 *		nsize			The new size of the backing file
507 *
508 * Returns:	1			Success
509 *		0			Failure
510 *
511 * Notes:	This function will indicate failure if the new size that's
512 *		being attempted to be set is negative.
513 *
514 *		This function will fail if there is no ubc_info currently
515 *		associated with the vnode.
516 *
517 *		This function will indicate success it the new size is the
518 *		same or larger than the old size (in this case, the remainder
519 *		of the file will require modification or use of an existing upl
520 *		to access successfully).
521 *
522 *		This function will fail if the new file size is smaller, and
523 *		the memory region being invalidated was unable to actually be
524 *		invalidated and/or the last page could not be flushed, if the
525 *		new size is not aligned to a page boundary.  This is usually
526 *		indicative of an I/O error.
527 */
528int
529ubc_setsize(struct vnode *vp, off_t nsize)
530{
531	off_t osize;	/* ui_size before change */
532	off_t lastpg, olastpgend, lastoff;
533	struct ubc_info *uip;
534	memory_object_control_t control;
535	kern_return_t kret = KERN_SUCCESS;
536
537	if (nsize < (off_t)0)
538		return (0);
539
540	if (!UBCINFOEXISTS(vp))
541		return (0);
542
543	uip = vp->v_ubcinfo;
544	osize = uip->ui_size;
545	/*
546	 * Update the size before flushing the VM
547	 */
548	uip->ui_size = nsize;
549
550	if (nsize >= osize)	/* Nothing more to do */
551		return (1);		/* return success */
552
553	/*
554	 * When the file shrinks, invalidate the pages beyond the
555	 * new size. Also get rid of garbage beyond nsize on the
556	 * last page. The ui_size already has the nsize, so any
557	 * subsequent page-in will zero-fill the tail properly
558	 */
559	lastpg = trunc_page_64(nsize);
560	olastpgend = round_page_64(osize);
561	control = uip->ui_control;
562	assert(control);
563	lastoff = (nsize & PAGE_MASK_64);
564
565	if (lastoff) {
566	        upl_t		upl;
567		upl_page_info_t	*pl;
568
569
570	        /*
571		 * new EOF ends up in the middle of a page
572		 * zero the tail of this page if its currently
573		 * present in the cache
574		 */
575	        kret = ubc_create_upl(vp, lastpg, PAGE_SIZE, &upl, &pl, UPL_SET_LITE);
576
577		if (kret != KERN_SUCCESS)
578		        panic("ubc_setsize: ubc_create_upl (error = %d)\n", kret);
579
580		if (upl_valid_page(pl, 0))
581		        cluster_zero(upl, (uint32_t)lastoff, PAGE_SIZE - (uint32_t)lastoff, NULL);
582
583		ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
584
585		lastpg += PAGE_SIZE_64;
586	}
587	if (olastpgend > lastpg) {
588	        /*
589		 * invalidate the pages beyond the new EOF page
590		 *
591		 */
592	        kret = memory_object_lock_request(control,
593						  (memory_object_offset_t)lastpg,
594						  (memory_object_size_t)(olastpgend - lastpg), NULL, NULL,
595						  MEMORY_OBJECT_RETURN_NONE, MEMORY_OBJECT_DATA_FLUSH,
596						  VM_PROT_NO_CHANGE);
597		if (kret != KERN_SUCCESS)
598		        printf("ubc_setsize: invalidate failed (error = %d)\n", kret);
599	}
600	return ((kret == KERN_SUCCESS) ? 1 : 0);
601}
602
603
604/*
605 * ubc_getsize
606 *
607 * Get the size of the file assocated with the specified vnode
608 *
609 * Parameters:	vp			The vnode whose size is of interest
610 *
611 * Returns:	0			There is no ubc_info associated with
612 *					this vnode, or the size is zero
613 *		!0			The size of the file
614 *
615 * Notes:	Using this routine, it is not possible for a caller to
616 *		successfully distinguish between a vnode associate with a zero
617 *		length file, and a vnode with no associated ubc_info.  The
618 *		caller therefore needs to not care, or needs to ensure that
619 *		they have previously successfully called ubc_info_init() or
620 *		ubc_info_init_withsize().
621 */
622off_t
623ubc_getsize(struct vnode *vp)
624{
625	/* people depend on the side effect of this working this way
626	 * as they call this for directory
627	 */
628	if (!UBCINFOEXISTS(vp))
629		return ((off_t)0);
630	return (vp->v_ubcinfo->ui_size);
631}
632
633
634/*
635 * ubc_umount
636 *
637 * Call ubc_sync_range(vp, 0, EOF, UBC_PUSHALL) on all the vnodes for this
638 * mount point
639 *
640 * Parameters:	mp			The mount point
641 *
642 * Returns:	0			Success
643 *
644 * Notes:	There is no failure indication for this function.
645 *
646 *		This function is used in the unmount path; since it may block
647 *		I/O indefinitely, it should not be used in the forced unmount
648 *		path, since a device unavailability could also block that
649 *		indefinitely.
650 *
651 *		Because there is no device ejection interlock on USB, FireWire,
652 *		or similar devices, it's possible that an ejection that begins
653 *		subsequent to the vnode_iterate() completing, either on one of
654 *		those devices, or a network mount for which the server quits
655 *		responding, etc., may cause the caller to block indefinitely.
656 */
657__private_extern__ int
658ubc_umount(struct mount *mp)
659{
660	vnode_iterate(mp, 0, ubc_umcallback, 0);
661	return(0);
662}
663
664
665/*
666 * ubc_umcallback
667 *
668 * Used by ubc_umount() as an internal implementation detail; see ubc_umount()
669 * and vnode_iterate() for details of implementation.
670 */
671static int
672ubc_umcallback(vnode_t vp, __unused void * args)
673{
674
675	if (UBCINFOEXISTS(vp)) {
676
677		(void) ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL, UBC_PUSHALL);
678	}
679	return (VNODE_RETURNED);
680}
681
682
683/*
684 * ubc_getcred
685 *
686 * Get the credentials currently active for the ubc_info associated with the
687 * vnode.
688 *
689 * Parameters:	vp			The vnode whose ubc_info credentials
690 *					are to be retrieved
691 *
692 * Returns:	!NOCRED			The credentials
693 *		NOCRED			If there is no ubc_info for the vnode,
694 *					or if there is one, but it has not had
695 *					any credentials associated with it via
696 *					a call to ubc_setcred()
697 */
698kauth_cred_t
699ubc_getcred(struct vnode *vp)
700{
701        if (UBCINFOEXISTS(vp))
702	        return (vp->v_ubcinfo->ui_ucred);
703
704	return (NOCRED);
705}
706
707
708/*
709 * ubc_setthreadcred
710 *
711 * If they are not already set, set the credentials of the ubc_info structure
712 * associated with the vnode to those of the supplied thread; otherwise leave
713 * them alone.
714 *
715 * Parameters:	vp			The vnode whose ubc_info creds are to
716 *					be set
717 *		p			The process whose credentials are to
718 *					be used, if not running on an assumed
719 *					credential
720 *		thread			The thread whose credentials are to
721 *					be used
722 *
723 * Returns:	1			This vnode has no associated ubc_info
724 *		0			Success
725 *
726 * Notes:	This function takes a proc parameter to account for bootstrap
727 *		issues where a task or thread may call this routine, either
728 *		before credentials have been initialized by bsd_init(), or if
729 *		there is no BSD info asscoiate with a mach thread yet.  This
730 *		is known to happen in both the initial swap and memory mapping
731 *		calls.
732 *
733 *		This function is generally used only in the following cases:
734 *
735 *		o	a memory mapped file via the mmap() system call
736 *		o	a memory mapped file via the deprecated map_fd() call
737 *		o	a swap store backing file
738 *		o	subsequent to a successful write via vn_write()
739 *
740 *		The information is then used by the NFS client in order to
741 *		cons up a wire message in either the page-in or page-out path.
742 *
743 *		There are two potential problems with the use of this API:
744 *
745 *		o	Because the write path only set it on a successful
746 *			write, there is a race window between setting the
747 *			credential and its use to evict the pages to the
748 *			remote file server
749 *
750 *		o	Because a page-in may occur prior to a write, the
751 *			credential may not be set at this time, if the page-in
752 *			is not the result of a mapping established via mmap()
753 *			or map_fd().
754 *
755 *		In both these cases, this will be triggered from the paging
756 *		path, which will instead use the credential of the current
757 *		process, which in this case is either the dynamic_pager or
758 *		the kernel task, both of which utilize "root" credentials.
759 *
760 *		This may potentially permit operations to occur which should
761 *		be denied, or it may cause to be denied operations which
762 *		should be permitted, depending on the configuration of the NFS
763 *		server.
764 */
765int
766ubc_setthreadcred(struct vnode *vp, proc_t p, thread_t thread)
767{
768	struct ubc_info *uip;
769	kauth_cred_t credp;
770	struct uthread  *uthread = get_bsdthread_info(thread);
771
772	if (!UBCINFOEXISTS(vp))
773		return (1);
774
775	vnode_lock(vp);
776
777	uip = vp->v_ubcinfo;
778	credp = uip->ui_ucred;
779
780	if (!IS_VALID_CRED(credp)) {
781		/* use per-thread cred, if assumed identity, else proc cred */
782		if (uthread == NULL || (uthread->uu_flag & UT_SETUID) == 0) {
783			uip->ui_ucred = kauth_cred_proc_ref(p);
784		} else {
785			uip->ui_ucred = uthread->uu_ucred;
786			kauth_cred_ref(uip->ui_ucred);
787		}
788	}
789	vnode_unlock(vp);
790
791	return (0);
792}
793
794
795/*
796 * ubc_setcred
797 *
798 * If they are not already set, set the credentials of the ubc_info structure
799 * associated with the vnode to those of the process; otherwise leave them
800 * alone.
801 *
802 * Parameters:	vp			The vnode whose ubc_info creds are to
803 *					be set
804 *		p			The process whose credentials are to
805 *					be used
806 *
807 * Returns:	0			This vnode has no associated ubc_info
808 *		1			Success
809 *
810 * Notes:	The return values for this function are inverted from nearly
811 *		all other uses in the kernel.
812 *
813 *		See also ubc_setthreadcred(), above.
814 *
815 *		This function is considered deprecated, and generally should
816 *		not be used, as it is incompatible with per-thread credentials;
817 *		it exists for legacy KPI reasons.
818 *
819 * DEPRECATION:	ubc_setcred() is being deprecated. Please use
820 *		ubc_setthreadcred() instead.
821 */
822int
823ubc_setcred(struct vnode *vp, proc_t p)
824{
825	struct ubc_info *uip;
826	kauth_cred_t credp;
827
828	/* If there is no ubc_info, deny the operation */
829	if ( !UBCINFOEXISTS(vp))
830		return (0);
831
832	/*
833	 * Check to see if there is already a credential reference in the
834	 * ubc_info; if there is not, take one on the supplied credential.
835	 */
836	vnode_lock(vp);
837	uip = vp->v_ubcinfo;
838	credp = uip->ui_ucred;
839	if (!IS_VALID_CRED(credp)) {
840		uip->ui_ucred = kauth_cred_proc_ref(p);
841	}
842	vnode_unlock(vp);
843
844	return (1);
845}
846
847
848/*
849 * ubc_getpager
850 *
851 * Get the pager associated with the ubc_info associated with the vnode.
852 *
853 * Parameters:	vp			The vnode to obtain the pager from
854 *
855 * Returns:	!VNODE_PAGER_NULL	The memory_object_t for the pager
856 *		VNODE_PAGER_NULL	There is no ubc_info for this vnode
857 *
858 * Notes:	For each vnode that has a ubc_info associated with it, that
859 *		ubc_info SHALL have a pager associated with it, so in the
860 *		normal case, it's impossible to return VNODE_PAGER_NULL for
861 *		a vnode with an associated ubc_info.
862 */
863__private_extern__ memory_object_t
864ubc_getpager(struct vnode *vp)
865{
866        if (UBCINFOEXISTS(vp))
867	        return (vp->v_ubcinfo->ui_pager);
868
869	return (0);
870}
871
872
873/*
874 * ubc_getobject
875 *
876 * Get the memory object control associated with the ubc_info associated with
877 * the vnode
878 *
879 * Parameters:	vp			The vnode to obtain the memory object
880 *					from
881 *		flags			DEPRECATED
882 *
883 * Returns:	!MEMORY_OBJECT_CONTROL_NULL
884 *		MEMORY_OBJECT_CONTROL_NULL
885 *
886 * Notes:	Historically, if the flags were not "do not reactivate", this
887 *		function would look up the memory object using the pager if
888 *		it did not exist (this could be the case if the vnode had
889 *		been previously reactivated).  The flags would also permit a
890 *		hold to be requested, which would have created an object
891 *		reference, if one had not already existed.  This usage is
892 *		deprecated, as it would permit a race between finding and
893 *		taking the reference vs. a single reference being dropped in
894 *		another thread.
895 */
896memory_object_control_t
897ubc_getobject(struct vnode *vp, __unused int flags)
898{
899        if (UBCINFOEXISTS(vp))
900	        return((vp->v_ubcinfo->ui_control));
901
902	return (MEMORY_OBJECT_CONTROL_NULL);
903}
904
905
906/*
907 * ubc_blktooff
908 *
909 * Convert a given block number to a memory backing object (file) offset for a
910 * given vnode
911 *
912 * Parameters:	vp			The vnode in which the block is located
913 *		blkno			The block number to convert
914 *
915 * Returns:	!-1			The offset into the backing object
916 *		-1			There is no ubc_info associated with
917 *					the vnode
918 *		-1			An error occurred in the underlying VFS
919 *					while translating the block to an
920 *					offset; the most likely cause is that
921 *					the caller specified a block past the
922 *					end of the file, but this could also be
923 *					any other error from VNOP_BLKTOOFF().
924 *
925 * Note:	Representing the error in band loses some information, but does
926 *		not occlude a valid offset, since an off_t of -1 is normally
927 *		used to represent EOF.  If we had a more reliable constant in
928 *		our header files for it (i.e. explicitly cast to an off_t), we
929 *		would use it here instead.
930 */
931off_t
932ubc_blktooff(vnode_t vp, daddr64_t blkno)
933{
934	off_t file_offset = -1;
935	int error;
936
937	if (UBCINFOEXISTS(vp)) {
938		error = VNOP_BLKTOOFF(vp, blkno, &file_offset);
939		if (error)
940			file_offset = -1;
941	}
942
943	return (file_offset);
944}
945
946
947/*
948 * ubc_offtoblk
949 *
950 * Convert a given offset in a memory backing object into a block number for a
951 * given vnode
952 *
953 * Parameters:	vp			The vnode in which the offset is
954 *					located
955 *		offset			The offset into the backing object
956 *
957 * Returns:	!-1			The returned block number
958 *		-1			There is no ubc_info associated with
959 *					the vnode
960 *		-1			An error occurred in the underlying VFS
961 *					while translating the block to an
962 *					offset; the most likely cause is that
963 *					the caller specified a block past the
964 *					end of the file, but this could also be
965 *					any other error from VNOP_OFFTOBLK().
966 *
967 * Note:	Representing the error in band loses some information, but does
968 *		not occlude a valid block number, since block numbers exceed
969 *		the valid range for offsets, due to their relative sizes.  If
970 *		we had a more reliable constant than -1 in our header files
971 *		for it (i.e. explicitly cast to an daddr64_t), we would use it
972 *		here instead.
973 */
974daddr64_t
975ubc_offtoblk(vnode_t vp, off_t offset)
976{
977	daddr64_t blkno = -1;
978	int error = 0;
979
980	if (UBCINFOEXISTS(vp)) {
981		error = VNOP_OFFTOBLK(vp, offset, &blkno);
982		if (error)
983			blkno = -1;
984	}
985
986	return (blkno);
987}
988
989
990/*
991 * ubc_pages_resident
992 *
993 * Determine whether or not a given vnode has pages resident via the memory
994 * object control associated with the ubc_info associated with the vnode
995 *
996 * Parameters:	vp			The vnode we want to know about
997 *
998 * Returns:	1			Yes
999 *		0			No
1000 */
1001int
1002ubc_pages_resident(vnode_t vp)
1003{
1004	kern_return_t		kret;
1005	boolean_t			has_pages_resident;
1006
1007	if (!UBCINFOEXISTS(vp))
1008		return (0);
1009
1010	/*
1011	 * The following call may fail if an invalid ui_control is specified,
1012	 * or if there is no VM object associated with the control object.  In
1013	 * either case, reacting to it as if there were no pages resident will
1014	 * result in correct behavior.
1015	 */
1016	kret = memory_object_pages_resident(vp->v_ubcinfo->ui_control, &has_pages_resident);
1017
1018	if (kret != KERN_SUCCESS)
1019		return (0);
1020
1021	if (has_pages_resident == TRUE)
1022		return (1);
1023
1024	return (0);
1025}
1026
1027
1028/*
1029 * ubc_sync_range
1030 *
1031 * Clean and/or invalidate a range in the memory object that backs this vnode
1032 *
1033 * Parameters:	vp			The vnode whose associated ubc_info's
1034 *					associated memory object is to have a
1035 *					range invalidated within it
1036 *		beg_off			The start of the range, as an offset
1037 *		end_off			The end of the range, as an offset
1038 *		flags			See ubc_msync_internal()
1039 *
1040 * Returns:	1			Success
1041 *		0			Failure
1042 *
1043 * Notes:	see ubc_msync_internal() for more detailed information.
1044 *
1045 * DEPRECATED:	This interface is obsolete due to a failure to return error
1046 *		information needed in order to correct failures.  The currently
1047 *		recommended interface is ubc_msync().
1048 */
1049int
1050ubc_sync_range(vnode_t vp, off_t beg_off, off_t end_off, int flags)
1051{
1052        return (ubc_msync_internal(vp, beg_off, end_off, NULL, flags, NULL));
1053}
1054
1055
1056/*
1057 * ubc_msync
1058 *
1059 * Clean and/or invalidate a range in the memory object that backs this vnode
1060 *
1061 * Parameters:	vp			The vnode whose associated ubc_info's
1062 *					associated memory object is to have a
1063 *					range invalidated within it
1064 *		beg_off			The start of the range, as an offset
1065 *		end_off			The end of the range, as an offset
1066 *		resid_off		The address of an off_t supplied by the
1067 *					caller; may be set to NULL to ignore
1068 *		flags			See ubc_msync_internal()
1069 *
1070 * Returns:	0			Success
1071 *		!0			Failure; an errno is returned
1072 *
1073 * Implicit Returns:
1074 *		*resid_off, modified	If non-NULL, the  contents are ALWAYS
1075 *					modified; they are initialized to the
1076 *					beg_off, and in case of an I/O error,
1077 *					the difference between beg_off and the
1078 *					current value will reflect what was
1079 *					able to be written before the error
1080 *					occurred.  If no error is returned, the
1081 *					value of the resid_off is undefined; do
1082 *					NOT use it in place of end_off if you
1083 *					intend to increment from the end of the
1084 *					last call and call iteratively.
1085 *
1086 * Notes:	see ubc_msync_internal() for more detailed information.
1087 *
1088 */
1089errno_t
1090ubc_msync(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags)
1091{
1092        int retval;
1093	int io_errno = 0;
1094
1095	if (resid_off)
1096	        *resid_off = beg_off;
1097
1098        retval = ubc_msync_internal(vp, beg_off, end_off, resid_off, flags, &io_errno);
1099
1100	if (retval == 0 && io_errno == 0)
1101	        return (EINVAL);
1102	return (io_errno);
1103}
1104
1105
1106/*
1107 * Clean and/or invalidate a range in the memory object that backs this vnode
1108 *
1109 * Parameters:	vp			The vnode whose associated ubc_info's
1110 *					associated memory object is to have a
1111 *					range invalidated within it
1112 *		beg_off			The start of the range, as an offset
1113 *		end_off			The end of the range, as an offset
1114 *		resid_off		The address of an off_t supplied by the
1115 *					caller; may be set to NULL to ignore
1116 *		flags			MUST contain at least one of the flags
1117 *					UBC_INVALIDATE, UBC_PUSHDIRTY, or
1118 *					UBC_PUSHALL; if UBC_PUSHDIRTY is used,
1119 *					UBC_SYNC may also be specified to cause
1120 *					this function to block until the
1121 *					operation is complete.  The behavior
1122 *					of UBC_SYNC is otherwise undefined.
1123 *		io_errno		The address of an int to contain the
1124 *					errno from a failed I/O operation, if
1125 *					one occurs; may be set to NULL to
1126 *					ignore
1127 *
1128 * Returns:	1			Success
1129 *		0			Failure
1130 *
1131 * Implicit Returns:
1132 *		*resid_off, modified	The contents of this offset MAY be
1133 *					modified; in case of an I/O error, the
1134 *					difference between beg_off and the
1135 *					current value will reflect what was
1136 *					able to be written before the error
1137 *					occurred.
1138 *		*io_errno, modified	The contents of this offset are set to
1139 *					an errno, if an error occurs; if the
1140 *					caller supplies an io_errno parameter,
1141 *					they should be careful to initialize it
1142 *					to 0 before calling this function to
1143 *					enable them to distinguish an error
1144 *					with a valid *resid_off from an invalid
1145 *					one, and to avoid potentially falsely
1146 *					reporting an error, depending on use.
1147 *
1148 * Notes:	If there is no ubc_info associated with the vnode supplied,
1149 *		this function immediately returns success.
1150 *
1151 *		If the value of end_off is less than or equal to beg_off, this
1152 *		function immediately returns success; that is, end_off is NOT
1153 *		inclusive.
1154 *
1155 *		IMPORTANT: one of the flags UBC_INVALIDATE, UBC_PUSHDIRTY, or
1156 *		UBC_PUSHALL MUST be specified; that is, it is NOT possible to
1157 *		attempt to block on in-progress I/O by calling this function
1158 *		with UBC_PUSHDIRTY, and then later call it with just UBC_SYNC
1159 *		in order to block pending on the I/O already in progress.
1160 *
1161 *		The start offset is truncated to the page boundary and the
1162 *		size is adjusted to include the last page in the range; that
1163 *		is, end_off on exactly a page boundary will not change if it
1164 *		is rounded, and the range of bytes written will be from the
1165 *		truncate beg_off to the rounded (end_off - 1).
1166 */
1167static int
1168ubc_msync_internal(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags, int *io_errno)
1169{
1170	memory_object_size_t	tsize;
1171	kern_return_t		kret;
1172	int request_flags = 0;
1173	int flush_flags   = MEMORY_OBJECT_RETURN_NONE;
1174
1175	if ( !UBCINFOEXISTS(vp))
1176	        return (0);
1177	if ((flags & (UBC_INVALIDATE | UBC_PUSHDIRTY | UBC_PUSHALL)) == 0)
1178	        return (0);
1179	if (end_off <= beg_off)
1180	        return (1);
1181
1182	if (flags & UBC_INVALIDATE)
1183	        /*
1184		 * discard the resident pages
1185		 */
1186		request_flags = (MEMORY_OBJECT_DATA_FLUSH | MEMORY_OBJECT_DATA_NO_CHANGE);
1187
1188	if (flags & UBC_SYNC)
1189	        /*
1190		 * wait for all the I/O to complete before returning
1191		 */
1192	        request_flags |= MEMORY_OBJECT_IO_SYNC;
1193
1194	if (flags & UBC_PUSHDIRTY)
1195	        /*
1196		 * we only return the dirty pages in the range
1197		 */
1198	        flush_flags = MEMORY_OBJECT_RETURN_DIRTY;
1199
1200	if (flags & UBC_PUSHALL)
1201	        /*
1202		 * then return all the interesting pages in the range (both
1203		 * dirty and precious) to the pager
1204		 */
1205	        flush_flags = MEMORY_OBJECT_RETURN_ALL;
1206
1207	beg_off = trunc_page_64(beg_off);
1208	end_off = round_page_64(end_off);
1209	tsize   = (memory_object_size_t)end_off - beg_off;
1210
1211	/* flush and/or invalidate pages in the range requested */
1212	kret = memory_object_lock_request(vp->v_ubcinfo->ui_control,
1213					  beg_off, tsize,
1214					  (memory_object_offset_t *)resid_off,
1215					  io_errno, flush_flags, request_flags,
1216					  VM_PROT_NO_CHANGE);
1217
1218	return ((kret == KERN_SUCCESS) ? 1 : 0);
1219}
1220
1221
1222/*
1223 * ubc_msync_internal
1224 *
1225 * Explicitly map a vnode that has an associate ubc_info, and add a reference
1226 * to it for the ubc system, if there isn't one already, so it will not be
1227 * recycled while it's in use, and set flags on the ubc_info to indicate that
1228 * we have done this
1229 *
1230 * Parameters:	vp			The vnode to map
1231 *		flags			The mapping flags for the vnode; this
1232 *					will be a combination of one or more of
1233 *					PROT_READ, PROT_WRITE, and PROT_EXEC
1234 *
1235 * Returns:	0			Success
1236 *		EPERM			Permission was denied
1237 *
1238 * Notes:	An I/O reference on the vnode must already be held on entry
1239 *
1240 *		If there is no ubc_info associated with the vnode, this function
1241 *		will return success.
1242 *
1243 *		If a permission error occurs, this function will return
1244 *		failure; all other failures will cause this function to return
1245 *		success.
1246 *
1247 *		IMPORTANT: This is an internal use function, and its symbols
1248 *		are not exported, hence its error checking is not very robust.
1249 *		It is primarily used by:
1250 *
1251 *		o	mmap(), when mapping a file
1252 *		o	The deprecated map_fd() interface, when mapping a file
1253 *		o	When mapping a shared file (a shared library in the
1254 *			shared segment region)
1255 *		o	When loading a program image during the exec process
1256 *
1257 *		...all of these uses ignore the return code, and any fault that
1258 *		results later because of a failure is handled in the fix-up path
1259 *		of the fault handler.  The interface exists primarily as a
1260 *		performance hint.
1261 *
1262 *		Given that third party implementation of the type of interfaces
1263 *		that would use this function, such as alternative executable
1264 *		formats, etc., are unsupported, this function is not exported
1265 *		for general use.
1266 *
1267 *		The extra reference is held until the VM system unmaps the
1268 *		vnode from its own context to maintain a vnode reference in
1269 *		cases like open()/mmap()/close(), which leave the backing
1270 *		object referenced by a mapped memory region in a process
1271 *		address space.
1272 */
1273__private_extern__ int
1274ubc_map(vnode_t vp, int flags)
1275{
1276	struct ubc_info *uip;
1277	int error = 0;
1278	int need_ref = 0;
1279	int need_wakeup = 0;
1280
1281	if (UBCINFOEXISTS(vp)) {
1282
1283		vnode_lock(vp);
1284		uip = vp->v_ubcinfo;
1285
1286		while (ISSET(uip->ui_flags, UI_MAPBUSY)) {
1287			SET(uip->ui_flags, UI_MAPWAITING);
1288			(void) msleep(&uip->ui_flags, &vp->v_lock,
1289				      PRIBIO, "ubc_map", NULL);
1290		}
1291		SET(uip->ui_flags, UI_MAPBUSY);
1292		vnode_unlock(vp);
1293
1294		error = VNOP_MMAP(vp, flags, vfs_context_current());
1295
1296		if (error != EPERM)
1297		        error = 0;
1298
1299		vnode_lock_spin(vp);
1300
1301		if (error == 0) {
1302			if ( !ISSET(uip->ui_flags, UI_ISMAPPED))
1303			        need_ref = 1;
1304			SET(uip->ui_flags, (UI_WASMAPPED | UI_ISMAPPED));
1305		}
1306		CLR(uip->ui_flags, UI_MAPBUSY);
1307
1308		if (ISSET(uip->ui_flags, UI_MAPWAITING)) {
1309			CLR(uip->ui_flags, UI_MAPWAITING);
1310			need_wakeup = 1;
1311		}
1312		vnode_unlock(vp);
1313
1314		if (need_wakeup)
1315			wakeup(&uip->ui_flags);
1316
1317		if (need_ref)
1318			vnode_ref(vp);
1319	}
1320	return (error);
1321}
1322
1323
1324/*
1325 * ubc_destroy_named
1326 *
1327 * Destroy the named memory object associated with the ubc_info control object
1328 * associated with the designated vnode, if there is a ubc_info associated
1329 * with the vnode, and a control object is associated with it
1330 *
1331 * Parameters:	vp			The designated vnode
1332 *
1333 * Returns:	(void)
1334 *
1335 * Notes:	This function is called on vnode termination for all vnodes,
1336 *		and must therefore not assume that there is a ubc_info that is
1337 *		associated with the vnode, nor that there is a control object
1338 *		associated with the ubc_info.
1339 *
1340 *		If all the conditions necessary are present, this function
1341 *		calls memory_object_destory(), which will in turn end up
1342 *		calling ubc_unmap() to release any vnode references that were
1343 *		established via ubc_map().
1344 *
1345 *		IMPORTANT: This is an internal use function that is used
1346 *		exclusively by the internal use function vclean().
1347 */
1348__private_extern__ void
1349ubc_destroy_named(vnode_t vp)
1350{
1351	memory_object_control_t control;
1352	struct ubc_info *uip;
1353	kern_return_t kret;
1354
1355	if (UBCINFOEXISTS(vp)) {
1356	        uip = vp->v_ubcinfo;
1357
1358		/* Terminate the memory object  */
1359		control = ubc_getobject(vp, UBC_HOLDOBJECT);
1360		if (control != MEMORY_OBJECT_CONTROL_NULL) {
1361		        kret = memory_object_destroy(control, 0);
1362			if (kret != KERN_SUCCESS)
1363			        panic("ubc_destroy_named: memory_object_destroy failed");
1364		}
1365	}
1366}
1367
1368
1369/*
1370 * ubc_isinuse
1371 *
1372 * Determine whether or not a vnode is currently in use by ubc at a level in
1373 * excess of the requested busycount
1374 *
1375 * Parameters:	vp			The vnode to check
1376 *		busycount		The threshold busy count, used to bias
1377 *					the count usually already held by the
1378 *					caller to avoid races
1379 *
1380 * Returns:	1			The vnode is in use over the threshold
1381 *		0			The vnode is not in use over the
1382 *					threshold
1383 *
1384 * Notes:	Because the vnode is only held locked while actually asking
1385 *		the use count, this function only represents a snapshot of the
1386 *		current state of the vnode.  If more accurate information is
1387 *		required, an additional busycount should be held by the caller
1388 *		and a non-zero busycount used.
1389 *
1390 *		If there is no ubc_info associated with the vnode, this
1391 *		function will report that the vnode is not in use by ubc.
1392 */
1393int
1394ubc_isinuse(struct vnode *vp, int busycount)
1395{
1396	if ( !UBCINFOEXISTS(vp))
1397		return (0);
1398	return(ubc_isinuse_locked(vp, busycount, 0));
1399}
1400
1401
1402/*
1403 * ubc_isinuse_locked
1404 *
1405 * Determine whether or not a vnode is currently in use by ubc at a level in
1406 * excess of the requested busycount
1407 *
1408 * Parameters:	vp			The vnode to check
1409 *		busycount		The threshold busy count, used to bias
1410 *					the count usually already held by the
1411 *					caller to avoid races
1412 *		locked			True if the vnode is already locked by
1413 *					the caller
1414 *
1415 * Returns:	1			The vnode is in use over the threshold
1416 *		0			The vnode is not in use over the
1417 *					threshold
1418 *
1419 * Notes:	If the vnode is not locked on entry, it is locked while
1420 *		actually asking the use count.  If this is the case, this
1421 *		function only represents a snapshot of the current state of
1422 *		the vnode.  If more accurate information is required, the
1423 *		vnode lock should be held by the caller, otherwise an
1424 *		additional busycount should be held by the caller and a
1425 *		non-zero busycount used.
1426 *
1427 *		If there is no ubc_info associated with the vnode, this
1428 *		function will report that the vnode is not in use by ubc.
1429 */
1430int
1431ubc_isinuse_locked(struct vnode *vp, int busycount, int locked)
1432{
1433	int retval = 0;
1434
1435
1436	if (!locked)
1437		vnode_lock(vp);
1438
1439	if ((vp->v_usecount - vp->v_kusecount) > busycount)
1440		retval = 1;
1441
1442	if (!locked)
1443		vnode_unlock(vp);
1444	return (retval);
1445}
1446
1447
1448/*
1449 * ubc_unmap
1450 *
1451 * Reverse the effects of a ubc_map() call for a given vnode
1452 *
1453 * Parameters:	vp			vnode to unmap from ubc
1454 *
1455 * Returns:	(void)
1456 *
1457 * Notes:	This is an internal use function used by vnode_pager_unmap().
1458 *		It will attempt to obtain a reference on the supplied vnode,
1459 *		and if it can do so, and there is an associated ubc_info, and
1460 *		the flags indicate that it was mapped via ubc_map(), then the
1461 *		flag is cleared, the mapping removed, and the reference taken
1462 *		by ubc_map() is released.
1463 *
1464 *		IMPORTANT: This MUST only be called by the VM
1465 *		to prevent race conditions.
1466 */
1467__private_extern__ void
1468ubc_unmap(struct vnode *vp)
1469{
1470	struct ubc_info *uip;
1471	int	need_rele = 0;
1472	int	need_wakeup = 0;
1473
1474	if (vnode_getwithref(vp))
1475	        return;
1476
1477	if (UBCINFOEXISTS(vp)) {
1478		vnode_lock(vp);
1479		uip = vp->v_ubcinfo;
1480
1481		while (ISSET(uip->ui_flags, UI_MAPBUSY)) {
1482			SET(uip->ui_flags, UI_MAPWAITING);
1483			(void) msleep(&uip->ui_flags, &vp->v_lock,
1484				      PRIBIO, "ubc_unmap", NULL);
1485		}
1486		SET(uip->ui_flags, UI_MAPBUSY);
1487
1488		if (ISSET(uip->ui_flags, UI_ISMAPPED)) {
1489	        CLR(uip->ui_flags, UI_ISMAPPED);
1490			need_rele = 1;
1491		}
1492		vnode_unlock(vp);
1493
1494		if (need_rele) {
1495			(void) VNOP_MNOMAP(vp, vfs_context_current());
1496			vnode_rele(vp);
1497		}
1498
1499		vnode_lock_spin(vp);
1500
1501		CLR(uip->ui_flags, UI_MAPBUSY);
1502		if (ISSET(uip->ui_flags, UI_MAPWAITING)) {
1503			CLR(uip->ui_flags, UI_MAPWAITING);
1504			need_wakeup = 1;
1505		}
1506		vnode_unlock(vp);
1507
1508		if (need_wakeup)
1509		    wakeup(&uip->ui_flags);
1510
1511	}
1512	/*
1513	 * the drop of the vnode ref will cleanup
1514	 */
1515	vnode_put(vp);
1516}
1517
1518
1519/*
1520 * ubc_page_op
1521 *
1522 * Manipulate individual page state for a vnode with an associated ubc_info
1523 * with an associated memory object control.
1524 *
1525 * Parameters:	vp			The vnode backing the page
1526 *		f_offset		A file offset interior to the page
1527 *		ops			The operations to perform, as a bitmap
1528 *					(see below for more information)
1529 *		phys_entryp		The address of a ppnum_t; may be NULL
1530 *					to ignore
1531 *		flagsp			A pointer to an int to contain flags;
1532 *					may be NULL to ignore
1533 *
1534 * Returns:	KERN_SUCCESS		Success
1535 *		KERN_INVALID_ARGUMENT	If the memory object control has no VM
1536 *					object associated
1537 *		KERN_INVALID_OBJECT	If UPL_POP_PHYSICAL and the object is
1538 *					not physically contiguous
1539 *		KERN_INVALID_OBJECT	If !UPL_POP_PHYSICAL and the object is
1540 *					physically contiguous
1541 *		KERN_FAILURE		If the page cannot be looked up
1542 *
1543 * Implicit Returns:
1544 *		*phys_entryp (modified)	If phys_entryp is non-NULL and
1545 *					UPL_POP_PHYSICAL
1546 *		*flagsp (modified)	If flagsp is non-NULL and there was
1547 *					!UPL_POP_PHYSICAL and a KERN_SUCCESS
1548 *
1549 * Notes:	For object boundaries, it is considerably more efficient to
1550 *		ensure that f_offset is in fact on a page boundary, as this
1551 *		will avoid internal use of the hash table to identify the
1552 *		page, and would therefore skip a number of early optimizations.
1553 *		Since this is a page operation anyway, the caller should try
1554 *		to pass only a page aligned offset because of this.
1555 *
1556 *		*flagsp may be modified even if this function fails.  If it is
1557 *		modified, it will contain the condition of the page before the
1558 *		requested operation was attempted; these will only include the
1559 *		bitmap flags, and not the PL_POP_PHYSICAL, UPL_POP_DUMP,
1560 *		UPL_POP_SET, or UPL_POP_CLR bits.
1561 *
1562 *		The flags field may contain a specific operation, such as
1563 *		UPL_POP_PHYSICAL or UPL_POP_DUMP:
1564 *
1565 *		o	UPL_POP_PHYSICAL	Fail if not contiguous; if
1566 *						*phys_entryp and successful, set
1567 *						*phys_entryp
1568 *		o	UPL_POP_DUMP		Dump the specified page
1569 *
1570 *		Otherwise, it is treated as a bitmap of one or more page
1571 *		operations to perform on the final memory object; allowable
1572 *		bit values are:
1573 *
1574 *		o	UPL_POP_DIRTY		The page is dirty
1575 *		o	UPL_POP_PAGEOUT		The page is paged out
1576 *		o	UPL_POP_PRECIOUS	The page is precious
1577 *		o	UPL_POP_ABSENT		The page is absent
1578 *		o	UPL_POP_BUSY		The page is busy
1579 *
1580 *		If the page status is only being queried and not modified, then
1581 *		not other bits should be specified.  However, if it is being
1582 *		modified, exactly ONE of the following bits should be set:
1583 *
1584 *		o	UPL_POP_SET		Set the current bitmap bits
1585 *		o	UPL_POP_CLR		Clear the current bitmap bits
1586 *
1587 *		Thus to effect a combination of setting an clearing, it may be
1588 *		necessary to call this function twice.  If this is done, the
1589 *		set should be used before the clear, since clearing may trigger
1590 *		a wakeup on the destination page, and if the page is backed by
1591 *		an encrypted swap file, setting will trigger the decryption
1592 *		needed before the wakeup occurs.
1593 */
1594kern_return_t
1595ubc_page_op(
1596	struct vnode 	*vp,
1597	off_t		f_offset,
1598	int		ops,
1599	ppnum_t	*phys_entryp,
1600	int		*flagsp)
1601{
1602	memory_object_control_t		control;
1603
1604	control = ubc_getobject(vp, UBC_FLAGS_NONE);
1605	if (control == MEMORY_OBJECT_CONTROL_NULL)
1606		return KERN_INVALID_ARGUMENT;
1607
1608	return (memory_object_page_op(control,
1609				      (memory_object_offset_t)f_offset,
1610				      ops,
1611				      phys_entryp,
1612				      flagsp));
1613}
1614
1615
1616/*
1617 * ubc_range_op
1618 *
1619 * Manipulate page state for a range of memory for a vnode with an associated
1620 * ubc_info with an associated memory object control, when page level state is
1621 * not required to be returned from the call (i.e. there are no phys_entryp or
1622 * flagsp parameters to this call, and it takes a range which may contain
1623 * multiple pages, rather than an offset interior to a single page).
1624 *
1625 * Parameters:	vp			The vnode backing the page
1626 *		f_offset_beg		A file offset interior to the start page
1627 *		f_offset_end		A file offset interior to the end page
1628 *		ops			The operations to perform, as a bitmap
1629 *					(see below for more information)
1630 *		range			The address of an int; may be NULL to
1631 *					ignore
1632 *
1633 * Returns:	KERN_SUCCESS		Success
1634 *		KERN_INVALID_ARGUMENT	If the memory object control has no VM
1635 *					object associated
1636 *		KERN_INVALID_OBJECT	If the object is physically contiguous
1637 *
1638 * Implicit Returns:
1639 *		*range (modified)	If range is non-NULL, its contents will
1640 *					be modified to contain the number of
1641 *					bytes successfully operated upon.
1642 *
1643 * Notes:	IMPORTANT: This function cannot be used on a range that
1644 *		consists of physically contiguous pages.
1645 *
1646 *		For object boundaries, it is considerably more efficient to
1647 *		ensure that f_offset_beg and f_offset_end are in fact on page
1648 *		boundaries, as this will avoid internal use of the hash table
1649 *		to identify the page, and would therefore skip a number of
1650 *		early optimizations.  Since this is an operation on a set of
1651 *		pages anyway, the caller should try to pass only a page aligned
1652 *		offsets because of this.
1653 *
1654 *		*range will be modified only if this function succeeds.
1655 *
1656 *		The flags field MUST contain a specific operation; allowable
1657 *		values are:
1658 *
1659 *		o	UPL_ROP_ABSENT	Returns the extent of the range
1660 *					presented which is absent, starting
1661 *					with the start address presented
1662 *
1663 *		o	UPL_ROP_PRESENT	Returns the extent of the range
1664 *					presented which is present (resident),
1665 *					starting with the start address
1666 *					presented
1667 *		o	UPL_ROP_DUMP	Dump the pages which are found in the
1668 *					target object for the target range.
1669 *
1670 *		IMPORTANT: For UPL_ROP_ABSENT and UPL_ROP_PRESENT; if there are
1671 *		multiple regions in the range, only the first matching region
1672 *		is returned.
1673 */
1674kern_return_t
1675ubc_range_op(
1676	struct vnode 	*vp,
1677	off_t		f_offset_beg,
1678	off_t		f_offset_end,
1679	int             ops,
1680	int             *range)
1681{
1682	memory_object_control_t		control;
1683
1684	control = ubc_getobject(vp, UBC_FLAGS_NONE);
1685	if (control == MEMORY_OBJECT_CONTROL_NULL)
1686		return KERN_INVALID_ARGUMENT;
1687
1688	return (memory_object_range_op(control,
1689				      (memory_object_offset_t)f_offset_beg,
1690				      (memory_object_offset_t)f_offset_end,
1691				      ops,
1692				      range));
1693}
1694
1695
1696/*
1697 * ubc_create_upl
1698 *
1699 * Given a vnode, cause the population of a portion of the vm_object; based on
1700 * the nature of the request, the pages returned may contain valid data, or
1701 * they may be uninitialized.
1702 *
1703 * Parameters:	vp			The vnode from which to create the upl
1704 *		f_offset		The start offset into the backing store
1705 *					represented by the vnode
1706 *		bufsize			The size of the upl to create
1707 *		uplp			Pointer to the upl_t to receive the
1708 *					created upl; MUST NOT be NULL
1709 *		plp			Pointer to receive the internal page
1710 *					list for the created upl; MAY be NULL
1711 *					to ignore
1712 *
1713 * Returns:	KERN_SUCCESS		The requested upl has been created
1714 *		KERN_INVALID_ARGUMENT	The bufsize argument is not an even
1715 *					multiple of the page size
1716 *		KERN_INVALID_ARGUMENT	There is no ubc_info associated with
1717 *					the vnode, or there is no memory object
1718 *					control associated with the ubc_info
1719 *	memory_object_upl_request:KERN_INVALID_VALUE
1720 *					The supplied upl_flags argument is
1721 *					invalid
1722 * Implicit Returns:
1723 *		*uplp (modified)
1724 *		*plp (modified)		If non-NULL, the value of *plp will be
1725 *					modified to point to the internal page
1726 *					list; this modification may occur even
1727 *					if this function is unsuccessful, in
1728 *					which case the contents may be invalid
1729 *
1730 * Note:	If successful, the returned *uplp MUST subsequently be freed
1731 *		via a call to ubc_upl_commit(), ubc_upl_commit_range(),
1732 *		ubc_upl_abort(), or ubc_upl_abort_range().
1733 */
1734kern_return_t
1735ubc_create_upl(
1736	struct vnode	*vp,
1737	off_t 		f_offset,
1738	long		bufsize,
1739	upl_t		*uplp,
1740	upl_page_info_t	**plp,
1741	int		uplflags)
1742{
1743	memory_object_control_t		control;
1744	mach_msg_type_number_t		count;
1745	int                             ubcflags;
1746	kern_return_t			kr;
1747
1748	if (bufsize & 0xfff)
1749		return KERN_INVALID_ARGUMENT;
1750
1751	if (uplflags & UPL_FOR_PAGEOUT) {
1752		uplflags &= ~UPL_FOR_PAGEOUT;
1753	        ubcflags  =  UBC_FOR_PAGEOUT;
1754	} else
1755	        ubcflags = UBC_FLAGS_NONE;
1756
1757	control = ubc_getobject(vp, ubcflags);
1758	if (control == MEMORY_OBJECT_CONTROL_NULL)
1759		return KERN_INVALID_ARGUMENT;
1760
1761	if (uplflags & UPL_WILL_BE_DUMPED) {
1762	        uplflags &= ~UPL_WILL_BE_DUMPED;
1763		uplflags |= (UPL_NO_SYNC|UPL_SET_INTERNAL);
1764	} else
1765	        uplflags |= (UPL_NO_SYNC|UPL_CLEAN_IN_PLACE|UPL_SET_INTERNAL);
1766	count = 0;
1767
1768	kr = memory_object_upl_request(control, f_offset, bufsize, uplp, NULL, &count, uplflags);
1769	if (plp != NULL)
1770			*plp = UPL_GET_INTERNAL_PAGE_LIST(*uplp);
1771	return kr;
1772}
1773
1774
1775/*
1776 * ubc_upl_maxbufsize
1777 *
1778 * Return the maximum bufsize ubc_create_upl( ) will take.
1779 *
1780 * Parameters:	none
1781 *
1782 * Returns:	maximum size buffer (in bytes) ubc_create_upl( ) will take.
1783 */
1784upl_size_t
1785ubc_upl_maxbufsize(
1786	void)
1787{
1788	return(MAX_UPL_SIZE * PAGE_SIZE);
1789}
1790
1791/*
1792 * ubc_upl_map
1793 *
1794 * Map the page list assocated with the supplied upl into the kernel virtual
1795 * address space at the virtual address indicated by the dst_addr argument;
1796 * the entire upl is mapped
1797 *
1798 * Parameters:	upl			The upl to map
1799 *		dst_addr		The address at which to map the upl
1800 *
1801 * Returns:	KERN_SUCCESS		The upl has been mapped
1802 *		KERN_INVALID_ARGUMENT	The upl is UPL_NULL
1803 *		KERN_FAILURE		The upl is already mapped
1804 *	vm_map_enter:KERN_INVALID_ARGUMENT
1805 *					A failure code from vm_map_enter() due
1806 *					to an invalid argument
1807 */
1808kern_return_t
1809ubc_upl_map(
1810	upl_t		upl,
1811	vm_offset_t	*dst_addr)
1812{
1813	return (vm_upl_map(kernel_map, upl, dst_addr));
1814}
1815
1816
1817/*
1818 * ubc_upl_unmap
1819 *
1820 * Unmap the page list assocated with the supplied upl from the kernel virtual
1821 * address space; the entire upl is unmapped.
1822 *
1823 * Parameters:	upl			The upl to unmap
1824 *
1825 * Returns:	KERN_SUCCESS		The upl has been unmapped
1826 *		KERN_FAILURE		The upl is not currently mapped
1827 *		KERN_INVALID_ARGUMENT	If the upl is UPL_NULL
1828 */
1829kern_return_t
1830ubc_upl_unmap(
1831	upl_t	upl)
1832{
1833	return(vm_upl_unmap(kernel_map, upl));
1834}
1835
1836
1837/*
1838 * ubc_upl_commit
1839 *
1840 * Commit the contents of the upl to the backing store
1841 *
1842 * Parameters:	upl			The upl to commit
1843 *
1844 * Returns:	KERN_SUCCESS		The upl has been committed
1845 *		KERN_INVALID_ARGUMENT	The supplied upl was UPL_NULL
1846 *		KERN_FAILURE		The supplied upl does not represent
1847 *					device memory, and the offset plus the
1848 *					size would exceed the actual size of
1849 *					the upl
1850 *
1851 * Notes:	In practice, the only return value for this function should be
1852 *		KERN_SUCCESS, unless there has been data structure corruption;
1853 *		since the upl is deallocated regardless of success or failure,
1854 *		there's really nothing to do about this other than panic.
1855 *
1856 *		IMPORTANT: Use of this function should not be mixed with use of
1857 *		ubc_upl_commit_range(), due to the unconditional deallocation
1858 *		by this function.
1859 */
1860kern_return_t
1861ubc_upl_commit(
1862	upl_t 			upl)
1863{
1864	upl_page_info_t	*pl;
1865	kern_return_t 	kr;
1866
1867	pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
1868	kr = upl_commit(upl, pl, MAX_UPL_SIZE);
1869	upl_deallocate(upl);
1870	return kr;
1871}
1872
1873
1874/*
1875 * ubc_upl_commit
1876 *
1877 * Commit the contents of the specified range of the upl to the backing store
1878 *
1879 * Parameters:	upl			The upl to commit
1880 *		offset			The offset into the upl
1881 *		size			The size of the region to be committed,
1882 *					starting at the specified offset
1883 *		flags			commit type (see below)
1884 *
1885 * Returns:	KERN_SUCCESS		The range has been committed
1886 *		KERN_INVALID_ARGUMENT	The supplied upl was UPL_NULL
1887 *		KERN_FAILURE		The supplied upl does not represent
1888 *					device memory, and the offset plus the
1889 *					size would exceed the actual size of
1890 *					the upl
1891 *
1892 * Notes:	IMPORTANT: If the commit is successful, and the object is now
1893 *		empty, the upl will be deallocated.  Since the caller cannot
1894 *		check that this is the case, the UPL_COMMIT_FREE_ON_EMPTY flag
1895 *		should generally only be used when the offset is 0 and the size
1896 *		is equal to the upl size.
1897 *
1898 *		The flags argument is a bitmap of flags on the rage of pages in
1899 *		the upl to be committed; allowable flags are:
1900 *
1901 *		o	UPL_COMMIT_FREE_ON_EMPTY	Free the upl when it is
1902 *							both empty and has been
1903 *							successfully committed
1904 *		o	UPL_COMMIT_CLEAR_DIRTY		Clear each pages dirty
1905 *							bit; will prevent a
1906 *							later pageout
1907 *		o	UPL_COMMIT_SET_DIRTY		Set each pages dirty
1908 *							bit; will cause a later
1909 *							pageout
1910 *		o	UPL_COMMIT_INACTIVATE		Clear each pages
1911 *							reference bit; the page
1912 *							will not be accessed
1913 *		o	UPL_COMMIT_ALLOW_ACCESS		Unbusy each page; pages
1914 *							become busy when an
1915 *							IOMemoryDescriptor is
1916 *							mapped or redirected,
1917 *							and we have to wait for
1918 *							an IOKit driver
1919 *
1920 *		The flag UPL_COMMIT_NOTIFY_EMPTY is used internally, and should
1921 *		not be specified by the caller.
1922 *
1923 *		The UPL_COMMIT_CLEAR_DIRTY and UPL_COMMIT_SET_DIRTY flags are
1924 *		mutually exclusive, and should not be combined.
1925 */
1926kern_return_t
1927ubc_upl_commit_range(
1928	upl_t 			upl,
1929	vm_offset_t		offset,
1930	vm_size_t		size,
1931	int				flags)
1932{
1933	upl_page_info_t	*pl;
1934	boolean_t		empty;
1935	kern_return_t 	kr;
1936
1937	if (flags & UPL_COMMIT_FREE_ON_EMPTY)
1938		flags |= UPL_COMMIT_NOTIFY_EMPTY;
1939
1940	if (flags & UPL_COMMIT_KERNEL_ONLY_FLAGS) {
1941		return KERN_INVALID_ARGUMENT;
1942	}
1943
1944	pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
1945
1946	kr = upl_commit_range(upl, offset, size, flags,
1947						  pl, MAX_UPL_SIZE, &empty);
1948
1949	if((flags & UPL_COMMIT_FREE_ON_EMPTY) && empty)
1950		upl_deallocate(upl);
1951
1952	return kr;
1953}
1954
1955
1956/*
1957 * ubc_upl_abort_range
1958 *
1959 * Abort the contents of the specified range of the specified upl
1960 *
1961 * Parameters:	upl			The upl to abort
1962 *		offset			The offset into the upl
1963 *		size			The size of the region to be aborted,
1964 *					starting at the specified offset
1965 *		abort_flags		abort type (see below)
1966 *
1967 * Returns:	KERN_SUCCESS		The range has been aborted
1968 *		KERN_INVALID_ARGUMENT	The supplied upl was UPL_NULL
1969 *		KERN_FAILURE		The supplied upl does not represent
1970 *					device memory, and the offset plus the
1971 *					size would exceed the actual size of
1972 *					the upl
1973 *
1974 * Notes:	IMPORTANT: If the abort is successful, and the object is now
1975 *		empty, the upl will be deallocated.  Since the caller cannot
1976 *		check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag
1977 *		should generally only be used when the offset is 0 and the size
1978 *		is equal to the upl size.
1979 *
1980 *		The abort_flags argument is a bitmap of flags on the range of
1981 *		pages in the upl to be aborted; allowable flags are:
1982 *
1983 *		o	UPL_ABORT_FREE_ON_EMPTY	Free the upl when it is both
1984 *						empty and has been successfully
1985 *						aborted
1986 *		o	UPL_ABORT_RESTART	The operation must be restarted
1987 *		o	UPL_ABORT_UNAVAILABLE	The pages are unavailable
1988 *		o	UPL_ABORT_ERROR		An I/O error occurred
1989 *		o	UPL_ABORT_DUMP_PAGES	Just free the pages
1990 *		o	UPL_ABORT_NOTIFY_EMPTY	RESERVED
1991 *		o	UPL_ABORT_ALLOW_ACCESS	RESERVED
1992 *
1993 *		The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should
1994 *		not be specified by the caller.  It is intended to fulfill the
1995 *		same role as UPL_COMMIT_NOTIFY_EMPTY does in the function
1996 *		ubc_upl_commit_range(), but is never referenced internally.
1997 *
1998 *		The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor
1999 *		referenced; do not use it.
2000 */
2001kern_return_t
2002ubc_upl_abort_range(
2003	upl_t			upl,
2004	vm_offset_t		offset,
2005	vm_size_t		size,
2006	int				abort_flags)
2007{
2008	kern_return_t 	kr;
2009	boolean_t		empty = FALSE;
2010
2011	if (abort_flags & UPL_ABORT_FREE_ON_EMPTY)
2012		abort_flags |= UPL_ABORT_NOTIFY_EMPTY;
2013
2014	kr = upl_abort_range(upl, offset, size, abort_flags, &empty);
2015
2016	if((abort_flags & UPL_ABORT_FREE_ON_EMPTY) && empty)
2017		upl_deallocate(upl);
2018
2019	return kr;
2020}
2021
2022
2023/*
2024 * ubc_upl_abort
2025 *
2026 * Abort the contents of the specified upl
2027 *
2028 * Parameters:	upl			The upl to abort
2029 *		abort_type		abort type (see below)
2030 *
2031 * Returns:	KERN_SUCCESS		The range has been aborted
2032 *		KERN_INVALID_ARGUMENT	The supplied upl was UPL_NULL
2033 *		KERN_FAILURE		The supplied upl does not represent
2034 *					device memory, and the offset plus the
2035 *					size would exceed the actual size of
2036 *					the upl
2037 *
2038 * Notes:	IMPORTANT: If the abort is successful, and the object is now
2039 *		empty, the upl will be deallocated.  Since the caller cannot
2040 *		check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag
2041 *		should generally only be used when the offset is 0 and the size
2042 *		is equal to the upl size.
2043 *
2044 *		The abort_type is a bitmap of flags on the range of
2045 *		pages in the upl to be aborted; allowable flags are:
2046 *
2047 *		o	UPL_ABORT_FREE_ON_EMPTY	Free the upl when it is both
2048 *						empty and has been successfully
2049 *						aborted
2050 *		o	UPL_ABORT_RESTART	The operation must be restarted
2051 *		o	UPL_ABORT_UNAVAILABLE	The pages are unavailable
2052 *		o	UPL_ABORT_ERROR		An I/O error occurred
2053 *		o	UPL_ABORT_DUMP_PAGES	Just free the pages
2054 *		o	UPL_ABORT_NOTIFY_EMPTY	RESERVED
2055 *		o	UPL_ABORT_ALLOW_ACCESS	RESERVED
2056 *
2057 *		The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should
2058 *		not be specified by the caller.  It is intended to fulfill the
2059 *		same role as UPL_COMMIT_NOTIFY_EMPTY does in the function
2060 *		ubc_upl_commit_range(), but is never referenced internally.
2061 *
2062 *		The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor
2063 *		referenced; do not use it.
2064 */
2065kern_return_t
2066ubc_upl_abort(
2067	upl_t			upl,
2068	int				abort_type)
2069{
2070	kern_return_t	kr;
2071
2072	kr = upl_abort(upl, abort_type);
2073	upl_deallocate(upl);
2074	return kr;
2075}
2076
2077
2078/*
2079 * ubc_upl_pageinfo
2080 *
2081 *  Retrieve the internal page list for the specified upl
2082 *
2083 * Parameters:	upl			The upl to obtain the page list from
2084 *
2085 * Returns:	!NULL			The (upl_page_info_t *) for the page
2086 *					list internal to the upl
2087 *		NULL			Error/no page list associated
2088 *
2089 * Notes:	IMPORTANT: The function is only valid on internal objects
2090 *		where the list request was made with the UPL_INTERNAL flag.
2091 *
2092 *		This function is a utility helper function, since some callers
2093 *		may not have direct access to the header defining the macro,
2094 *		due to abstraction layering constraints.
2095 */
2096upl_page_info_t *
2097ubc_upl_pageinfo(
2098	upl_t			upl)
2099{
2100	return (UPL_GET_INTERNAL_PAGE_LIST(upl));
2101}
2102
2103
2104int
2105UBCINFOEXISTS(struct vnode * vp)
2106{
2107        return((vp) && ((vp)->v_type == VREG) && ((vp)->v_ubcinfo != UBC_INFO_NULL));
2108}
2109
2110
2111/*
2112 * CODE SIGNING
2113 */
2114#define CS_BLOB_PAGEABLE 0
2115static volatile SInt32 cs_blob_size = 0;
2116static volatile SInt32 cs_blob_count = 0;
2117static SInt32 cs_blob_size_peak = 0;
2118static UInt32 cs_blob_size_max = 0;
2119static SInt32 cs_blob_count_peak = 0;
2120extern int cs_debug;
2121
2122int cs_validation = 1;
2123
2124SYSCTL_INT(_vm, OID_AUTO, cs_validation, CTLFLAG_RW, &cs_validation, 0, "Do validate code signatures");
2125SYSCTL_INT(_vm, OID_AUTO, cs_blob_count, CTLFLAG_RD, &cs_blob_count, 0, "Current number of code signature blobs");
2126SYSCTL_INT(_vm, OID_AUTO, cs_blob_size, CTLFLAG_RD, &cs_blob_size, 0, "Current size of all code signature blobs");
2127SYSCTL_INT(_vm, OID_AUTO, cs_blob_count_peak, CTLFLAG_RD, &cs_blob_count_peak, 0, "Peak number of code signature blobs");
2128SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_peak, CTLFLAG_RD, &cs_blob_size_peak, 0, "Peak size of code signature blobs");
2129SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_max, CTLFLAG_RD, &cs_blob_size_max, 0, "Size of biggest code signature blob");
2130
2131kern_return_t
2132ubc_cs_blob_allocate(
2133	vm_offset_t	*blob_addr_p,
2134	vm_size_t	*blob_size_p)
2135{
2136	kern_return_t	kr;
2137
2138#if CS_BLOB_PAGEABLE
2139	*blob_size_p = round_page(*blob_size_p);
2140	kr = kmem_alloc(kernel_map, blob_addr_p, *blob_size_p);
2141#else	/* CS_BLOB_PAGEABLE */
2142	*blob_addr_p = (vm_offset_t) kalloc(*blob_size_p);
2143	if (*blob_addr_p == 0) {
2144		kr = KERN_NO_SPACE;
2145	} else {
2146		kr = KERN_SUCCESS;
2147	}
2148#endif	/* CS_BLOB_PAGEABLE */
2149	return kr;
2150}
2151
2152void
2153ubc_cs_blob_deallocate(
2154	vm_offset_t	blob_addr,
2155	vm_size_t	blob_size)
2156{
2157#if CS_BLOB_PAGEABLE
2158	kmem_free(kernel_map, blob_addr, blob_size);
2159#else	/* CS_BLOB_PAGEABLE */
2160	kfree((void *) blob_addr, blob_size);
2161#endif	/* CS_BLOB_PAGEABLE */
2162}
2163
2164int
2165ubc_cs_blob_add(
2166	struct vnode	*vp,
2167	cpu_type_t	cputype,
2168	off_t		base_offset,
2169	vm_address_t	addr,
2170	vm_size_t	size)
2171{
2172	kern_return_t		kr;
2173	struct ubc_info		*uip;
2174	struct cs_blob		*blob, *oblob;
2175	int			error;
2176	ipc_port_t		blob_handle;
2177	memory_object_size_t	blob_size;
2178	const CS_CodeDirectory *cd;
2179	off_t			blob_start_offset, blob_end_offset;
2180	SHA1_CTX		sha1ctxt;
2181
2182	blob_handle = IPC_PORT_NULL;
2183
2184	blob = (struct cs_blob *) kalloc(sizeof (struct cs_blob));
2185	if (blob == NULL) {
2186		return ENOMEM;
2187	}
2188
2189#if CS_BLOB_PAGEABLE
2190	/* get a memory entry on the blob */
2191	blob_size = (memory_object_size_t) size;
2192	kr = mach_make_memory_entry_64(kernel_map,
2193				       &blob_size,
2194				       addr,
2195				       VM_PROT_READ,
2196				       &blob_handle,
2197				       IPC_PORT_NULL);
2198	if (kr != KERN_SUCCESS) {
2199		error = ENOMEM;
2200		goto out;
2201	}
2202	if (memory_object_round_page(blob_size) !=
2203	    (memory_object_size_t) round_page(size)) {
2204		printf("ubc_cs_blob_add: size mismatch 0x%llx 0x%x !?\n",
2205		       blob_size, size);
2206		panic("XXX FBDP size mismatch 0x%llx 0x%x\n", blob_size, size);
2207		error = EINVAL;
2208		goto out;
2209	}
2210#else
2211	blob_size = (memory_object_size_t) size;
2212	blob_handle = IPC_PORT_NULL;
2213#endif
2214
2215	/* fill in the new blob */
2216	blob->csb_cpu_type = cputype;
2217	blob->csb_base_offset = base_offset;
2218	blob->csb_mem_size = size;
2219	blob->csb_mem_offset = 0;
2220	blob->csb_mem_handle = blob_handle;
2221	blob->csb_mem_kaddr = addr;
2222
2223	/*
2224	 * Validate the blob's contents
2225	 */
2226	cd = findCodeDirectory(
2227		(const CS_SuperBlob *) addr,
2228		(char *) addr,
2229		(char *) addr + blob->csb_mem_size);
2230	if (cd == NULL) {
2231		/* no code directory => useless blob ! */
2232		blob->csb_flags = 0;
2233		blob->csb_start_offset = 0;
2234		blob->csb_end_offset = 0;
2235	} else {
2236		unsigned char *sha1_base;
2237		int sha1_size;
2238
2239		blob->csb_flags = ntohl(cd->flags) | CS_VALID;
2240		blob->csb_end_offset = round_page(ntohl(cd->codeLimit));
2241		blob->csb_start_offset = (blob->csb_end_offset -
2242					  (ntohl(cd->nCodeSlots) * PAGE_SIZE));
2243		/* compute the blob's SHA1 hash */
2244		sha1_base = (const unsigned char *) cd;
2245		sha1_size = ntohl(cd->length);
2246		SHA1Init(&sha1ctxt);
2247		SHA1Update(&sha1ctxt, sha1_base, sha1_size);
2248		SHA1Final(blob->csb_sha1, &sha1ctxt);
2249	}
2250
2251	/*
2252	 * Let policy module check whether the blob's signature is accepted.
2253	 */
2254#if CONFIG_MACF
2255	error = mac_vnode_check_signature(vp, blob->csb_sha1, (void*)addr, size);
2256	if (error)
2257		goto out;
2258#endif
2259
2260	/*
2261	 * Validate the blob's coverage
2262	 */
2263	blob_start_offset = blob->csb_base_offset + blob->csb_start_offset;
2264	blob_end_offset = blob->csb_base_offset + blob->csb_end_offset;
2265
2266	if (blob_start_offset >= blob_end_offset ||
2267	    blob_start_offset < 0 ||
2268	    blob_end_offset <= 0) {
2269		/* reject empty or backwards blob */
2270		error = EINVAL;
2271		goto out;
2272	}
2273
2274	vnode_lock(vp);
2275	if (! UBCINFOEXISTS(vp)) {
2276		vnode_unlock(vp);
2277		error = ENOENT;
2278		goto out;
2279	}
2280	uip = vp->v_ubcinfo;
2281
2282	/* check if this new blob overlaps with an existing blob */
2283	for (oblob = uip->cs_blobs;
2284	     oblob != NULL;
2285	     oblob = oblob->csb_next) {
2286		 off_t oblob_start_offset, oblob_end_offset;
2287
2288		 oblob_start_offset = (oblob->csb_base_offset +
2289				       oblob->csb_start_offset);
2290		 oblob_end_offset = (oblob->csb_base_offset +
2291				     oblob->csb_end_offset);
2292		 if (blob_start_offset >= oblob_end_offset ||
2293		     blob_end_offset <= oblob_start_offset) {
2294			 /* no conflict with this existing blob */
2295		 } else {
2296			 /* conflict ! */
2297			 if (blob_start_offset == oblob_start_offset &&
2298			     blob_end_offset == oblob_end_offset &&
2299			     blob->csb_mem_size == oblob->csb_mem_size &&
2300			     blob->csb_flags == oblob->csb_flags &&
2301			     (blob->csb_cpu_type == CPU_TYPE_ANY ||
2302			      oblob->csb_cpu_type == CPU_TYPE_ANY ||
2303			      blob->csb_cpu_type == oblob->csb_cpu_type) &&
2304			     !bcmp(blob->csb_sha1,
2305				   oblob->csb_sha1,
2306				   SHA1_RESULTLEN)) {
2307				 /*
2308				  * We already have this blob:
2309				  * we'll return success but
2310				  * throw away the new blob.
2311				  */
2312				 if (oblob->csb_cpu_type == CPU_TYPE_ANY) {
2313					 /*
2314					  * The old blob matches this one
2315					  * but doesn't have any CPU type.
2316					  * Update it with whatever the caller
2317					  * provided this time.
2318					  */
2319					 oblob->csb_cpu_type = cputype;
2320				 }
2321				 vnode_unlock(vp);
2322				 error = EAGAIN;
2323				 goto out;
2324			 } else {
2325				 /* different blob: reject the new one */
2326				 vnode_unlock(vp);
2327				 error = EALREADY;
2328				 goto out;
2329			 }
2330		 }
2331
2332	}
2333
2334
2335	/* mark this vnode's VM object as having "signed pages" */
2336	kr = memory_object_signed(uip->ui_control, TRUE);
2337	if (kr != KERN_SUCCESS) {
2338		vnode_unlock(vp);
2339		error = ENOENT;
2340		goto out;
2341	}
2342
2343	/*
2344	 * Add this blob to the list of blobs for this vnode.
2345	 * We always add at the front of the list and we never remove a
2346	 * blob from the list, so ubc_cs_get_blobs() can return whatever
2347	 * the top of the list was and that list will remain valid
2348	 * while we validate a page, even after we release the vnode's lock.
2349	 */
2350	blob->csb_next = uip->cs_blobs;
2351	uip->cs_blobs = blob;
2352
2353	OSAddAtomic(+1, &cs_blob_count);
2354	if (cs_blob_count > cs_blob_count_peak) {
2355		cs_blob_count_peak = cs_blob_count; /* XXX atomic ? */
2356	}
2357	OSAddAtomic(+blob->csb_mem_size, &cs_blob_size);
2358	if (cs_blob_size > cs_blob_size_peak) {
2359		cs_blob_size_peak = cs_blob_size; /* XXX atomic ? */
2360	}
2361	if (blob->csb_mem_size > cs_blob_size_max) {
2362		cs_blob_size_max = blob->csb_mem_size;
2363	}
2364
2365	if (cs_debug) {
2366		proc_t p;
2367
2368		p = current_proc();
2369		printf("CODE SIGNING: proc %d(%s) "
2370		       "loaded %s signatures for file (%s) "
2371		       "range 0x%llx:0x%llx flags 0x%x\n",
2372		       p->p_pid, p->p_comm,
2373		       blob->csb_cpu_type == -1 ? "detached" : "embedded",
2374		       vnode_name(vp),
2375		       blob->csb_base_offset + blob->csb_start_offset,
2376		       blob->csb_base_offset + blob->csb_end_offset,
2377		       blob->csb_flags);
2378	}
2379
2380	vnode_unlock(vp);
2381
2382	error = 0;	/* success ! */
2383
2384out:
2385	if (error) {
2386		/* we failed; release what we allocated */
2387		if (blob) {
2388			kfree(blob, sizeof (*blob));
2389			blob = NULL;
2390		}
2391		if (blob_handle != IPC_PORT_NULL) {
2392			mach_memory_entry_port_release(blob_handle);
2393			blob_handle = IPC_PORT_NULL;
2394		}
2395	}
2396
2397	if (error == EAGAIN) {
2398		/*
2399		 * See above:  error is EAGAIN if we were asked
2400		 * to add an existing blob again.  We cleaned the new
2401		 * blob and we want to return success.
2402		 */
2403		error = 0;
2404		/*
2405		 * Since we're not failing, consume the data we received.
2406		 */
2407		ubc_cs_blob_deallocate(addr, size);
2408	}
2409
2410	return error;
2411}
2412
2413
2414struct cs_blob *
2415ubc_cs_blob_get(
2416	struct vnode	*vp,
2417	cpu_type_t	cputype,
2418	off_t		offset)
2419{
2420	struct ubc_info	*uip;
2421	struct cs_blob	*blob;
2422	off_t offset_in_blob;
2423
2424	vnode_lock_spin(vp);
2425
2426	if (! UBCINFOEXISTS(vp)) {
2427		blob = NULL;
2428		goto out;
2429	}
2430
2431	uip = vp->v_ubcinfo;
2432	for (blob = uip->cs_blobs;
2433	     blob != NULL;
2434	     blob = blob->csb_next) {
2435		if (cputype != -1 && blob->csb_cpu_type == cputype) {
2436			break;
2437		}
2438		if (offset != -1) {
2439			offset_in_blob = offset - blob->csb_base_offset;
2440			if (offset_in_blob >= blob->csb_start_offset &&
2441			    offset_in_blob < blob->csb_end_offset) {
2442				/* our offset is covered by this blob */
2443				break;
2444			}
2445		}
2446	}
2447
2448out:
2449	vnode_unlock(vp);
2450
2451	return blob;
2452}
2453
2454static void
2455ubc_cs_free(
2456	struct ubc_info	*uip)
2457{
2458	struct cs_blob	*blob, *next_blob;
2459
2460	for (blob = uip->cs_blobs;
2461	     blob != NULL;
2462	     blob = next_blob) {
2463		next_blob = blob->csb_next;
2464		if (blob->csb_mem_kaddr != 0) {
2465			ubc_cs_blob_deallocate(blob->csb_mem_kaddr,
2466					       blob->csb_mem_size);
2467			blob->csb_mem_kaddr = 0;
2468		}
2469		if (blob->csb_mem_handle != IPC_PORT_NULL) {
2470			mach_memory_entry_port_release(blob->csb_mem_handle);
2471		}
2472		blob->csb_mem_handle = IPC_PORT_NULL;
2473		OSAddAtomic(-1, &cs_blob_count);
2474		OSAddAtomic(-blob->csb_mem_size, &cs_blob_size);
2475		kfree(blob, sizeof (*blob));
2476	}
2477	uip->cs_blobs = NULL;
2478}
2479
2480struct cs_blob *
2481ubc_get_cs_blobs(
2482	struct vnode	*vp)
2483{
2484	struct ubc_info	*uip;
2485	struct cs_blob	*blobs;
2486
2487	vnode_lock_spin(vp);
2488
2489	if (! UBCINFOEXISTS(vp)) {
2490		blobs = NULL;
2491		goto out;
2492	}
2493
2494	uip = vp->v_ubcinfo;
2495	blobs = uip->cs_blobs;
2496
2497out:
2498	vnode_unlock(vp);
2499
2500	return blobs;
2501}
2502
2503unsigned long cs_validate_page_no_hash = 0;
2504unsigned long cs_validate_page_bad_hash = 0;
2505boolean_t
2506cs_validate_page(
2507	void			*_blobs,
2508	memory_object_offset_t	page_offset,
2509	const void		*data,
2510	boolean_t		*tainted)
2511{
2512	SHA1_CTX		sha1ctxt;
2513	unsigned char		actual_hash[SHA1_RESULTLEN];
2514	unsigned char		expected_hash[SHA1_RESULTLEN];
2515	boolean_t		found_hash;
2516	struct cs_blob		*blobs, *blob;
2517	const CS_CodeDirectory	*cd;
2518	const CS_SuperBlob	*embedded;
2519	off_t			start_offset, end_offset;
2520	const unsigned char	*hash;
2521	boolean_t		validated;
2522	off_t			offset;	/* page offset in the file */
2523	size_t			size;
2524	off_t			codeLimit = 0;
2525	char			*lower_bound, *upper_bound;
2526	vm_offset_t		kaddr, blob_addr;
2527	vm_size_t		ksize;
2528	kern_return_t		kr;
2529
2530	offset = page_offset;
2531
2532	/* retrieve the expected hash */
2533	found_hash = FALSE;
2534	blobs = (struct cs_blob *) _blobs;
2535
2536	for (blob = blobs;
2537	     blob != NULL;
2538	     blob = blob->csb_next) {
2539		offset = page_offset - blob->csb_base_offset;
2540		if (offset < blob->csb_start_offset ||
2541		    offset >= blob->csb_end_offset) {
2542			/* our page is not covered by this blob */
2543			continue;
2544		}
2545
2546		/* map the blob in the kernel address space */
2547		kaddr = blob->csb_mem_kaddr;
2548		if (kaddr == 0) {
2549			ksize = (vm_size_t) (blob->csb_mem_size +
2550					     blob->csb_mem_offset);
2551			kr = vm_map(kernel_map,
2552				    &kaddr,
2553				    ksize,
2554				    0,
2555				    VM_FLAGS_ANYWHERE,
2556				    blob->csb_mem_handle,
2557				    0,
2558				    TRUE,
2559				    VM_PROT_READ,
2560				    VM_PROT_READ,
2561				    VM_INHERIT_NONE);
2562			if (kr != KERN_SUCCESS) {
2563				/* XXX FBDP what to do !? */
2564				printf("cs_validate_page: failed to map blob, "
2565				       "size=0x%x kr=0x%x\n",
2566				       blob->csb_mem_size, kr);
2567				break;
2568			}
2569		}
2570		blob_addr = kaddr + blob->csb_mem_offset;
2571
2572		lower_bound = CAST_DOWN(char *, blob_addr);
2573		upper_bound = lower_bound + blob->csb_mem_size;
2574
2575		embedded = (const CS_SuperBlob *) blob_addr;
2576		cd = findCodeDirectory(embedded, lower_bound, upper_bound);
2577		if (cd != NULL) {
2578			if (cd->pageSize != PAGE_SHIFT ||
2579			    cd->hashType != 0x1 ||
2580			    cd->hashSize != SHA1_RESULTLEN) {
2581				/* bogus blob ? */
2582				continue;
2583			}
2584
2585			end_offset = round_page(ntohl(cd->codeLimit));
2586			start_offset = end_offset - (ntohl(cd->nCodeSlots) * PAGE_SIZE);
2587			offset = page_offset - blob->csb_base_offset;
2588			if (offset < start_offset ||
2589			    offset >= end_offset) {
2590				/* our page is not covered by this blob */
2591				continue;
2592			}
2593
2594			codeLimit = ntohl(cd->codeLimit);
2595			hash = hashes(cd, atop(offset),
2596				      lower_bound, upper_bound);
2597			if (hash != NULL) {
2598				bcopy(hash, expected_hash,
2599				      sizeof (expected_hash));
2600				found_hash = TRUE;
2601			}
2602
2603			break;
2604		}
2605	}
2606
2607	if (found_hash == FALSE) {
2608		/*
2609		 * We can't verify this page because there is no signature
2610		 * for it (yet).  It's possible that this part of the object
2611		 * is not signed, or that signatures for that part have not
2612		 * been loaded yet.
2613		 * Report that the page has not been validated and let the
2614		 * caller decide if it wants to accept it or not.
2615		 */
2616		cs_validate_page_no_hash++;
2617		if (cs_debug > 1) {
2618			printf("CODE SIGNING: cs_validate_page: "
2619			       "off 0x%llx: no hash to validate !?\n",
2620			       page_offset);
2621		}
2622		validated = FALSE;
2623		*tainted = FALSE;
2624	} else {
2625
2626		size = PAGE_SIZE;
2627		const uint32_t *asha1, *esha1;
2628		if (offset + size > codeLimit) {
2629			/* partial page at end of segment */
2630			assert(offset < codeLimit);
2631			size = codeLimit & PAGE_MASK;
2632		}
2633		/* compute the actual page's SHA1 hash */
2634		SHA1Init(&sha1ctxt);
2635		SHA1UpdateUsePhysicalAddress(&sha1ctxt, data, size);
2636		SHA1Final(actual_hash, &sha1ctxt);
2637
2638		asha1 = (const uint32_t *) actual_hash;
2639		esha1 = (const uint32_t *) expected_hash;
2640
2641		if (bcmp(expected_hash, actual_hash, SHA1_RESULTLEN) != 0) {
2642			if (cs_debug) {
2643				printf("CODE SIGNING: cs_validate_page: "
2644				       "off 0x%llx size 0x%lx: "
2645				       "actual [0x%x 0x%x 0x%x 0x%x 0x%x] != "
2646				       "expected [0x%x 0x%x 0x%x 0x%x 0x%x]\n",
2647				       page_offset, size,
2648				       asha1[0], asha1[1], asha1[2],
2649				       asha1[3], asha1[4],
2650				       esha1[0], esha1[1], esha1[2],
2651				       esha1[3], esha1[4]);
2652			}
2653			cs_validate_page_bad_hash++;
2654			*tainted = TRUE;
2655		} else {
2656			if (cs_debug > 1) {
2657				printf("CODE SIGNING: cs_validate_page: "
2658				       "off 0x%llx size 0x%lx: SHA1 OK\n",
2659				       page_offset, size);
2660			}
2661			*tainted = FALSE;
2662		}
2663		validated = TRUE;
2664	}
2665
2666	return validated;
2667}
2668
2669int
2670ubc_cs_getcdhash(
2671	vnode_t		vp,
2672	off_t		offset,
2673	unsigned char	*cdhash)
2674{
2675	struct cs_blob *blobs, *blob;
2676	off_t rel_offset;
2677
2678	blobs = ubc_get_cs_blobs(vp);
2679	for (blob = blobs;
2680	     blob != NULL;
2681	     blob = blob->csb_next) {
2682		/* compute offset relative to this blob */
2683		rel_offset = offset - blob->csb_base_offset;
2684		if (rel_offset >= blob->csb_start_offset &&
2685		    rel_offset < blob->csb_end_offset) {
2686			/* this blob does cover our "offset" ! */
2687			break;
2688		}
2689	}
2690
2691	if (blob == NULL) {
2692		/* we didn't find a blob covering "offset" */
2693		return EBADEXEC; /* XXX any better error ? */
2694	}
2695
2696	/* get the SHA1 hash of that blob */
2697	bcopy(blob->csb_sha1, cdhash, sizeof (blob->csb_sha1));
2698
2699	return 0;
2700}
2701