1/*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1998 Apple Computer, Inc.  All rights reserved.
29 *
30 *	File:	bsd/kern/kern_symfile.c
31 *
32 * HISTORY
33 */
34
35#include <mach/vm_param.h>
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/signalvar.h>
40#include <sys/resourcevar.h>
41#include <sys/namei.h>
42#include <sys/vnode_internal.h>
43#include <sys/proc_internal.h>
44#include <sys/kauth.h>
45#include <sys/timeb.h>
46#include <sys/times.h>
47#include <sys/acct.h>
48#include <sys/file_internal.h>
49#include <sys/uio.h>
50#include <sys/kernel.h>
51#include <sys/stat.h>
52#include <sys/disk.h>
53#include <sys/conf.h>
54
55#include <mach-o/loader.h>
56#include <mach-o/nlist.h>
57
58#include <kern/kalloc.h>
59#include <vm/vm_kern.h>
60#include <pexpert/pexpert.h>
61#include <IOKit/IOHibernatePrivate.h>
62
63/* This function is called from kern_sysctl in the current process context;
64 * it is exported with the System6.0.exports, but this appears to be a legacy
65 * export, as there are no internal consumers.
66 */
67int
68get_kernel_symfile(__unused proc_t p, __unused char const **symfile);
69int
70get_kernel_symfile(__unused proc_t p, __unused char const **symfile)
71{
72    return KERN_FAILURE;
73}
74
75struct kern_direct_file_io_ref_t
76{
77    vfs_context_t  ctx;
78    struct vnode * vp;
79    dev_t          device;
80    uint32_t	   blksize;
81    off_t          filelength;
82    char           pinned;
83};
84
85
86static int file_ioctl(void * p1, void * p2, u_long theIoctl, caddr_t result)
87{
88    dev_t device = *(dev_t*) p1;
89
90    return ((*bdevsw[major(device)].d_ioctl)
91		    (device, theIoctl, result, S_IFBLK, p2));
92}
93
94static int device_ioctl(void * p1, __unused void * p2, u_long theIoctl, caddr_t result)
95{
96    return (VNOP_IOCTL(p1, theIoctl, result, 0, p2));
97}
98
99static int
100kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl, off_t offset, off_t end)
101{
102    int error;
103    int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
104    void * p1;
105    void * p2;
106    uint64_t    fileblk;
107    size_t      filechunk;
108    dk_extent_t  extent;
109    dk_unmap_t   unmap;
110    _dk_cs_pin_t pin;
111
112    bzero(&extent, sizeof(dk_extent_t));
113    bzero(&unmap, sizeof(dk_unmap_t));
114    bzero(&pin, sizeof(pin));
115    if (ref->vp->v_type == VREG)
116    {
117	 p1 = &ref->device;
118	 p2 = kernproc;
119	 do_ioctl = &file_ioctl;
120    }
121    else
122    {
123	/* Partition. */
124	p1 = ref->vp;
125	p2 = ref->ctx;
126	do_ioctl = &device_ioctl;
127    }
128    while (offset < end)
129    {
130        if (ref->vp->v_type == VREG)
131        {
132            daddr64_t blkno;
133	    filechunk = 1*1024*1024*1024;
134	    if (filechunk > (size_t)(end - offset))
135	    filechunk = (size_t)(end - offset);
136            error = VNOP_BLOCKMAP(ref->vp, offset, filechunk, &blkno,
137								  &filechunk, NULL, VNODE_WRITE, NULL);
138			if (error) break;
139            fileblk = blkno * ref->blksize;
140        }
141        else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
142        {
143            fileblk = offset;
144            filechunk = ref->filelength;
145        }
146
147	if (DKIOCUNMAP == theIoctl)
148	{
149	    extent.offset = fileblk;
150	    extent.length = filechunk;
151	    unmap.extents = &extent;
152	    unmap.extentsCount = 1;
153	    error = do_ioctl(p1, p2, theIoctl, (caddr_t)&unmap);
154// 	    printf("DKIOCUNMAP(%d) 0x%qx, 0x%qx\n", error, extent.offset, extent.length);
155	}
156	else if (_DKIOCCSPINEXTENT == theIoctl)
157	{
158	    pin.cp_extent.offset = fileblk;
159	    pin.cp_extent.length = filechunk;
160	    pin.cp_flags = _DKIOCCSPINFORHIBERNATION;
161	    error = do_ioctl(p1, p2, theIoctl, (caddr_t)&pin);
162	    if (error && (ENOTTY != error))
163	    {
164		printf("_DKIOCCSPINEXTENT(%d) 0x%qx, 0x%qx\n",
165			error, pin.cp_extent.offset, pin.cp_extent.length);
166	    }
167	}
168	else error = EINVAL;
169
170	if (error) break;
171        offset += filechunk;
172    }
173    return (error);
174}
175
176
177struct kern_direct_file_io_ref_t *
178kern_open_file_for_direct_io(const char * name,
179                 boolean_t create_file,
180			     kern_get_file_extents_callback_t callback,
181			     void * callback_ref,
182                             off_t set_file_size,
183                             off_t write_file_offset,
184                             caddr_t write_file_addr,
185                             vm_size_t write_file_len,
186			     dev_t * partition_device_result,
187			     dev_t * image_device_result,
188                             uint64_t * partitionbase_result,
189                             uint64_t * maxiocount_result,
190                             uint32_t * oflags)
191{
192    struct kern_direct_file_io_ref_t * ref;
193
194    proc_t			p;
195    struct vnode_attr		va;
196    int				error;
197    off_t			f_offset;
198    uint64_t                    fileblk;
199    size_t                      filechunk;
200    uint64_t                    physoffset;
201    dev_t			device;
202    dev_t			target = 0;
203    int			        isssd = 0;
204    uint32_t                    flags = 0;
205    uint32_t			blksize;
206    off_t 			maxiocount, count, segcount;
207    boolean_t                   locked = FALSE;
208
209    int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
210    void * p1 = NULL;
211    void * p2 = NULL;
212
213    error = EFAULT;
214
215    ref = (struct kern_direct_file_io_ref_t *) kalloc(sizeof(struct kern_direct_file_io_ref_t));
216    if (!ref)
217    {
218	error = EFAULT;
219    	goto out;
220    }
221
222    bzero(ref, sizeof(*ref));
223    p = kernproc;
224    ref->ctx = vfs_context_create(vfs_context_current());
225
226    if ((error = vnode_open(name, (create_file) ? (O_CREAT | FWRITE) : FWRITE,
227                            (0), 0, &ref->vp, ref->ctx)))
228        goto out;
229
230    if (ref->vp->v_type == VREG)
231    {
232        vnode_lock_spin(ref->vp);
233        SET(ref->vp->v_flag, VSWAP);
234        vnode_unlock(ref->vp);
235    }
236
237    if (write_file_addr && write_file_len)
238    {
239	if ((error = kern_write_file(ref, write_file_offset, write_file_addr, write_file_len, 0)))
240	    goto out;
241    }
242
243    VATTR_INIT(&va);
244    VATTR_WANTED(&va, va_rdev);
245    VATTR_WANTED(&va, va_fsid);
246    VATTR_WANTED(&va, va_data_size);
247    VATTR_WANTED(&va, va_data_alloc);
248    VATTR_WANTED(&va, va_nlink);
249    error = EFAULT;
250    if (vnode_getattr(ref->vp, &va, ref->ctx))
251    	goto out;
252
253    kprintf("vp va_rdev major %d minor %d\n", major(va.va_rdev), minor(va.va_rdev));
254    kprintf("vp va_fsid major %d minor %d\n", major(va.va_fsid), minor(va.va_fsid));
255    kprintf("vp size %qd alloc %qd\n", va.va_data_size, va.va_data_alloc);
256
257    if (ref->vp->v_type == VREG)
258    {
259		/* Don't dump files with links. */
260		if (va.va_nlink != 1)
261			goto out;
262
263        device = va.va_fsid;
264        ref->filelength = va.va_data_size;
265
266        p1 = &device;
267        p2 = p;
268        do_ioctl = &file_ioctl;
269
270		if (set_file_size)
271	    {
272			error = vnode_setsize(ref->vp, set_file_size,
273								  IO_NOZEROFILL | IO_NOAUTH, ref->ctx);
274			if (error)
275				goto out;
276			ref->filelength = set_file_size;
277		}
278    }
279    else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
280    {
281	/* Partition. */
282        device = va.va_rdev;
283
284        p1 = ref->vp;
285        p2 = ref->ctx;
286        do_ioctl = &device_ioctl;
287    }
288    else
289    {
290	/* Don't dump to non-regular files. */
291	error = EFAULT;
292        goto out;
293    }
294    ref->device = device;
295
296    // get block size
297
298    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize);
299    if (error)
300        goto out;
301
302    if (ref->vp->v_type != VREG)
303    {
304        error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk);
305        if (error)
306            goto out;
307	ref->filelength = fileblk * ref->blksize;
308    }
309
310    // pin logical extents
311
312    error = kern_ioctl_file_extents(ref, _DKIOCCSPINEXTENT, 0, ref->filelength);
313    if (error && (ENOTTY != error)) goto out;
314    ref->pinned = (error == 0);
315
316    // generate the block list
317
318    error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL);
319    if (error)
320        goto out;
321    locked = TRUE;
322
323    f_offset = 0;
324    while (f_offset < ref->filelength)
325    {
326        if (ref->vp->v_type == VREG)
327        {
328            filechunk = 1*1024*1024*1024;
329            daddr64_t blkno;
330
331            error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno,
332								  &filechunk, NULL, VNODE_WRITE, NULL);
333            if (error)
334                goto out;
335
336            fileblk = blkno * ref->blksize;
337        }
338        else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
339        {
340            fileblk = f_offset;
341            filechunk = f_offset ? 0 : ref->filelength;
342        }
343
344        physoffset = 0;
345        while (physoffset < filechunk)
346        {
347            dk_physical_extent_t getphysreq;
348            bzero(&getphysreq, sizeof(getphysreq));
349
350            getphysreq.offset = fileblk + physoffset;
351            getphysreq.length = (filechunk - physoffset);
352            error = do_ioctl(p1, p2, DKIOCGETPHYSICALEXTENT, (caddr_t) &getphysreq);
353            if (error)
354                goto out;
355            if (!target)
356            {
357                target = getphysreq.dev;
358            }
359            else if (target != getphysreq.dev)
360            {
361                error = ENOTSUP;
362                goto out;
363            }
364#if HIBFRAGMENT
365	    uint64_t rev;
366	    for (rev = 4096; rev <= getphysreq.length; rev += 4096)
367	    {
368		callback(callback_ref, getphysreq.offset + getphysreq.length - rev, 4096);
369	    }
370#else
371            callback(callback_ref, getphysreq.offset, getphysreq.length);
372#endif
373            physoffset += getphysreq.length;
374        }
375        f_offset += filechunk;
376    }
377    callback(callback_ref, 0ULL, 0ULL);
378
379    if (ref->vp->v_type == VREG)
380        p1 = &target;
381
382    // get partition base
383
384    if (partitionbase_result)
385    {
386        error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result);
387        if (error)
388            goto out;
389    }
390
391    // get block size & constraints
392
393    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize);
394    if (error)
395        goto out;
396
397    maxiocount = 1*1024*1024*1024;
398
399    error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t) &count);
400    if (error)
401        count = 0;
402    count *= blksize;
403    if (count && (count < maxiocount))
404        maxiocount = count;
405
406    error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t) &count);
407    if (error)
408        count = 0;
409    count *= blksize;
410    if (count && (count < maxiocount))
411        maxiocount = count;
412
413    error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTREAD, (caddr_t) &count);
414    if (error)
415        count = 0;
416    if (count && (count < maxiocount))
417        maxiocount = count;
418
419    error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t) &count);
420    if (error)
421        count = 0;
422    if (count && (count < maxiocount))
423        maxiocount = count;
424
425    error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count);
426    if (!error)
427	error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTREAD, (caddr_t) &segcount);
428    if (error)
429        count = segcount = 0;
430    count *= segcount;
431    if (count && (count < maxiocount))
432        maxiocount = count;
433
434    error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count);
435    if (!error)
436	error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTWRITE, (caddr_t) &segcount);
437    if (error)
438        count = segcount = 0;
439    count *= segcount;
440    if (count && (count < maxiocount))
441        maxiocount = count;
442
443    kprintf("max io 0x%qx bytes\n", maxiocount);
444    if (maxiocount_result)
445        *maxiocount_result = maxiocount;
446
447    error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd);
448    if (!error && isssd)
449        flags |= kIOHibernateOptionSSD;
450
451    if (partition_device_result)
452        *partition_device_result = device;
453    if (image_device_result)
454        *image_device_result = target;
455    if (oflags)
456        *oflags = flags;
457
458out:
459    kprintf("kern_open_file_for_direct_io(%d)\n", error);
460
461    if (error && locked)
462    {
463        p1 = &device;
464        (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL);
465    }
466
467    if (error && ref)
468    {
469    if (ref->pinned)
470    {
471        _dk_cs_pin_t pin;
472        bzero(&pin, sizeof(pin));
473
474	    pin.cp_flags = _DKIOCCSPINDISCARDBLACKLIST;
475        p1 = &device;
476        (void) do_ioctl(p1, p2, _DKIOCCSUNPINEXTENT, (caddr_t)&pin);
477    }
478	if (ref->vp)
479	{
480	    vnode_close(ref->vp, FWRITE, ref->ctx);
481	    ref->vp = NULLVP;
482	}
483	vfs_context_rele(ref->ctx);
484	kfree(ref, sizeof(struct kern_direct_file_io_ref_t));
485	ref = NULL;
486    }
487
488    return(ref);
489}
490
491int
492kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len, int ioflag)
493{
494    return (vn_rdwr(UIO_WRITE, ref->vp,
495			addr, len, offset,
496			UIO_SYSSPACE, ioflag|IO_SYNC|IO_NODELOCKED|IO_UNIT,
497                        vfs_context_ucred(ref->ctx), (int *) 0,
498			vfs_context_proc(ref->ctx)));
499}
500
501
502void
503kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref,
504			      off_t write_offset, caddr_t addr, vm_size_t write_length,
505			      off_t discard_offset, off_t discard_end)
506{
507    int error;
508    _dk_cs_pin_t pin;
509    kprintf("kern_close_file_for_direct_io\n");
510
511    if (!ref) return;
512
513    if (ref->vp)
514    {
515        int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
516        void * p1;
517        void * p2;
518
519        if (ref->vp->v_type == VREG)
520        {
521            p1 = &ref->device;
522            p2 = kernproc;
523            do_ioctl = &file_ioctl;
524        }
525        else
526        {
527            /* Partition. */
528            p1 = ref->vp;
529            p2 = ref->ctx;
530            do_ioctl = &device_ioctl;
531        }
532        (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL);
533
534        if (ref->pinned)
535        {
536            bzero(&pin, sizeof(pin));
537            pin.cp_flags = _DKIOCCSPINDISCARDBLACKLIST;
538            (void) do_ioctl(p1, p2, _DKIOCCSUNPINEXTENT, (caddr_t)&pin);
539        }
540
541
542        if (discard_offset && discard_end && !ref->pinned)
543        {
544            (void) kern_ioctl_file_extents(ref, DKIOCUNMAP, discard_offset, discard_end);
545        }
546        if (addr && write_length)
547        {
548            (void) kern_write_file(ref, write_offset, addr, write_length, 0);
549        }
550
551        error = vnode_close(ref->vp, FWRITE, ref->ctx);
552
553        ref->vp = NULLVP;
554        kprintf("vnode_close(%d)\n", error);
555    }
556    vfs_context_rele(ref->ctx);
557    ref->ctx = NULL;
558    kfree(ref, sizeof(struct kern_direct_file_io_ref_t));
559}
560