1/*	$OpenBSD: spec_vnops.c,v 1.112 2024/02/03 18:51:58 beck Exp $	*/
2/*	$NetBSD: spec_vnops.c,v 1.29 1996/04/22 01:42:38 christos Exp $	*/
3
4/*
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	@(#)spec_vnops.c	8.8 (Berkeley) 11/21/94
33 */
34
35#include <sys/param.h>
36#include <sys/proc.h>
37#include <sys/systm.h>
38#include <sys/conf.h>
39#include <sys/buf.h>
40#include <sys/mount.h>
41#include <sys/vnode.h>
42#include <sys/lock.h>
43#include <sys/stat.h>
44#include <sys/errno.h>
45#include <sys/fcntl.h>
46#include <sys/disklabel.h>
47#include <sys/lockf.h>
48#include <sys/dkio.h>
49#include <sys/malloc.h>
50#include <sys/specdev.h>
51#include <sys/unistd.h>
52
53#define v_lastr v_specinfo->si_lastr
54
55int	spec_open_clone(struct vop_open_args *);
56
57struct vnodechain speclisth[SPECHSZ];
58
59const struct vops spec_vops = {
60	.vop_lookup	= vop_generic_lookup,
61	.vop_create	= vop_generic_badop,
62	.vop_mknod	= vop_generic_badop,
63	.vop_open	= spec_open,
64	.vop_close	= spec_close,
65	.vop_access	= spec_access,
66	.vop_getattr	= spec_getattr,
67	.vop_setattr	= spec_setattr,
68	.vop_read	= spec_read,
69	.vop_write	= spec_write,
70	.vop_ioctl	= spec_ioctl,
71	.vop_kqfilter	= spec_kqfilter,
72	.vop_revoke	= vop_generic_revoke,
73	.vop_fsync	= spec_fsync,
74	.vop_remove	= vop_generic_badop,
75	.vop_link	= vop_generic_badop,
76	.vop_rename	= vop_generic_badop,
77	.vop_mkdir	= vop_generic_badop,
78	.vop_rmdir	= vop_generic_badop,
79	.vop_symlink	= vop_generic_badop,
80	.vop_readdir	= vop_generic_badop,
81	.vop_readlink	= vop_generic_badop,
82	.vop_abortop	= vop_generic_badop,
83	.vop_inactive	= spec_inactive,
84	.vop_reclaim	= nullop,
85	.vop_lock	= nullop,
86	.vop_unlock	= nullop,
87	.vop_islocked	= nullop,
88	.vop_bmap	= vop_generic_bmap,
89	.vop_strategy	= spec_strategy,
90	.vop_print	= spec_print,
91	.vop_pathconf	= spec_pathconf,
92	.vop_advlock	= spec_advlock,
93	.vop_bwrite	= vop_generic_bwrite,
94};
95
96/*
97 * Open a special file.
98 */
99int
100spec_open(void *v)
101{
102	struct vop_open_args *ap = v;
103	struct proc *p = ap->a_p;
104	struct vnode *vp = ap->a_vp;
105	struct vnode *bvp;
106	dev_t bdev;
107	dev_t dev = (dev_t)vp->v_rdev;
108	int maj = major(dev);
109	int error;
110
111	/*
112	 * Don't allow open if fs is mounted -nodev.
113	 */
114	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
115		return (ENXIO);
116
117	switch (vp->v_type) {
118
119	case VCHR:
120		if ((u_int)maj >= nchrdev)
121			return (ENXIO);
122		if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
123			/*
124			 * When running in very secure mode, do not allow
125			 * opens for writing of any disk character devices.
126			 */
127			if (securelevel >= 2 && cdevsw[maj].d_type == D_DISK)
128				return (EPERM);
129			/*
130			 * When running in secure mode, do not allow opens
131			 * for writing of /dev/mem, /dev/kmem, or character
132			 * devices whose corresponding block devices are
133			 * currently mounted.
134			 */
135			if (securelevel >= 1) {
136				if ((bdev = chrtoblk(dev)) != NODEV &&
137				    vfinddev(bdev, VBLK, &bvp) &&
138				    bvp->v_usecount > 0 &&
139				    (error = vfs_mountedon(bvp)))
140					return (error);
141				if (iskmemdev(dev))
142					return (EPERM);
143			}
144		}
145		if (cdevsw[maj].d_type == D_TTY)
146			vp->v_flag |= VISTTY;
147		if (cdevsw[maj].d_flags & D_CLONE)
148			return (spec_open_clone(ap));
149		VOP_UNLOCK(vp);
150		error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p);
151		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
152		return (error);
153
154	case VBLK:
155		if ((u_int)maj >= nblkdev)
156			return (ENXIO);
157		/*
158		 * When running in very secure mode, do not allow
159		 * opens for writing of any disk block devices.
160		 */
161		if (securelevel >= 2 && ap->a_cred != FSCRED &&
162		    (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK)
163			return (EPERM);
164		/*
165		 * Do not allow opens of block devices that are
166		 * currently mounted.
167		 */
168		if ((error = vfs_mountedon(vp)) != 0)
169			return (error);
170		return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p));
171	case VNON:
172	case VLNK:
173	case VDIR:
174	case VREG:
175	case VBAD:
176	case VFIFO:
177	case VSOCK:
178		break;
179	}
180	return (0);
181}
182
183/*
184 * Vnode op for read
185 */
186int
187spec_read(void *v)
188{
189	struct vop_read_args *ap = v;
190	struct vnode *vp = ap->a_vp;
191	struct uio *uio = ap->a_uio;
192 	struct proc *p = uio->uio_procp;
193	struct buf *bp;
194	daddr_t bn, nextbn, bscale;
195	int bsize;
196	struct partinfo dpart;
197	size_t n;
198	int on, majordev;
199	int (*ioctl)(dev_t, u_long, caddr_t, int, struct proc *);
200	int error = 0;
201
202#ifdef DIAGNOSTIC
203	if (uio->uio_rw != UIO_READ)
204		panic("spec_read mode");
205	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
206		panic("spec_read proc");
207#endif
208	if (uio->uio_resid == 0)
209		return (0);
210
211	switch (vp->v_type) {
212
213	case VCHR:
214		VOP_UNLOCK(vp);
215		error = (*cdevsw[major(vp->v_rdev)].d_read)
216			(vp->v_rdev, uio, ap->a_ioflag);
217		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
218		return (error);
219
220	case VBLK:
221		if (uio->uio_offset < 0)
222			return (EINVAL);
223		bsize = BLKDEV_IOSIZE;
224		if ((majordev = major(vp->v_rdev)) < nblkdev &&
225		    (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
226		    (*ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) {
227			u_int32_t frag =
228			    DISKLABELV1_FFS_FRAG(dpart.part->p_fragblock);
229			u_int32_t fsize =
230			    DISKLABELV1_FFS_FSIZE(dpart.part->p_fragblock);
231			if (dpart.part->p_fstype == FS_BSDFFS && frag != 0 &&
232			    fsize != 0)
233				bsize = frag * fsize;
234		}
235		bscale = btodb(bsize);
236		do {
237			bn = btodb(uio->uio_offset) & ~(bscale - 1);
238			on = uio->uio_offset % bsize;
239			n = ulmin((bsize - on), uio->uio_resid);
240			if (vp->v_lastr + bscale == bn) {
241				nextbn = bn + bscale;
242				error = breadn(vp, bn, bsize, &nextbn, &bsize,
243				    1, &bp);
244			} else
245				error = bread(vp, bn, bsize, &bp);
246			vp->v_lastr = bn;
247			n = ulmin(n, bsize - bp->b_resid);
248			if (error) {
249				brelse(bp);
250				return (error);
251			}
252			error = uiomove((char *)bp->b_data + on, n, uio);
253			brelse(bp);
254		} while (error == 0 && uio->uio_resid > 0 && n != 0);
255		return (error);
256
257	default:
258		panic("spec_read type");
259	}
260	/* NOTREACHED */
261}
262
263int
264spec_inactive(void *v)
265{
266	struct vop_inactive_args *ap = v;
267
268	VOP_UNLOCK(ap->a_vp);
269	return (0);
270}
271
272/*
273 * Vnode op for write
274 */
275int
276spec_write(void *v)
277{
278	struct vop_write_args *ap = v;
279	struct vnode *vp = ap->a_vp;
280	struct uio *uio = ap->a_uio;
281	struct proc *p = uio->uio_procp;
282	struct buf *bp;
283	daddr_t bn, bscale;
284	int bsize;
285	struct partinfo dpart;
286	size_t n;
287	int on, majordev;
288	int (*ioctl)(dev_t, u_long, caddr_t, int, struct proc *);
289	int error = 0;
290
291#ifdef DIAGNOSTIC
292	if (uio->uio_rw != UIO_WRITE)
293		panic("spec_write mode");
294	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
295		panic("spec_write proc");
296#endif
297
298	switch (vp->v_type) {
299
300	case VCHR:
301		VOP_UNLOCK(vp);
302		error = (*cdevsw[major(vp->v_rdev)].d_write)
303			(vp->v_rdev, uio, ap->a_ioflag);
304		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
305		return (error);
306
307	case VBLK:
308		if (uio->uio_resid == 0)
309			return (0);
310		if (uio->uio_offset < 0)
311			return (EINVAL);
312		bsize = BLKDEV_IOSIZE;
313		if ((majordev = major(vp->v_rdev)) < nblkdev &&
314		    (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
315		    (*ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) {
316			u_int32_t frag =
317			    DISKLABELV1_FFS_FRAG(dpart.part->p_fragblock);
318			u_int32_t fsize =
319			    DISKLABELV1_FFS_FSIZE(dpart.part->p_fragblock);
320			if (dpart.part->p_fstype == FS_BSDFFS && frag != 0 &&
321			    fsize != 0)
322				bsize = frag * fsize;
323		}
324		bscale = btodb(bsize);
325		do {
326			bn = btodb(uio->uio_offset) & ~(bscale - 1);
327			on = uio->uio_offset % bsize;
328			n = ulmin((bsize - on), uio->uio_resid);
329			error = bread(vp, bn, bsize, &bp);
330			n = ulmin(n, bsize - bp->b_resid);
331			if (error) {
332				brelse(bp);
333				return (error);
334			}
335			error = uiomove((char *)bp->b_data + on, n, uio);
336			if (n + on == bsize)
337				bawrite(bp);
338			else
339				bdwrite(bp);
340		} while (error == 0 && uio->uio_resid > 0 && n != 0);
341		return (error);
342
343	default:
344		panic("spec_write type");
345	}
346	/* NOTREACHED */
347}
348
349/*
350 * Device ioctl operation.
351 */
352int
353spec_ioctl(void *v)
354{
355	struct vop_ioctl_args *ap = v;
356	dev_t dev = ap->a_vp->v_rdev;
357	int maj = major(dev);
358
359	switch (ap->a_vp->v_type) {
360
361	case VCHR:
362		return ((*cdevsw[maj].d_ioctl)(dev, ap->a_command, ap->a_data,
363		    ap->a_fflag, ap->a_p));
364
365	case VBLK:
366		return ((*bdevsw[maj].d_ioctl)(dev, ap->a_command, ap->a_data,
367		    ap->a_fflag, ap->a_p));
368
369	default:
370		panic("spec_ioctl");
371		/* NOTREACHED */
372	}
373}
374
375int
376spec_kqfilter(void *v)
377{
378	struct vop_kqfilter_args *ap = v;
379	dev_t dev;
380
381	dev = ap->a_vp->v_rdev;
382
383	switch (ap->a_vp->v_type) {
384	default:
385		if (ap->a_kn->kn_flags & (__EV_POLL | __EV_SELECT))
386			return seltrue_kqfilter(dev, ap->a_kn);
387		break;
388	case VCHR:
389		if (cdevsw[major(dev)].d_kqfilter)
390			return (*cdevsw[major(dev)].d_kqfilter)(dev, ap->a_kn);
391	}
392	return (EOPNOTSUPP);
393}
394
395/*
396 * Synch buffers associated with a block device
397 */
398int
399spec_fsync(void *v)
400{
401	struct vop_fsync_args *ap = v;
402	struct vnode *vp = ap->a_vp;
403	struct buf *bp;
404	struct buf *nbp;
405	int s;
406
407	if (vp->v_type == VCHR)
408		return (0);
409	/*
410	 * Flush all dirty buffers associated with a block device.
411	 */
412loop:
413	s = splbio();
414	LIST_FOREACH_SAFE(bp, &vp->v_dirtyblkhd, b_vnbufs, nbp) {
415		if ((bp->b_flags & B_BUSY))
416			continue;
417		if ((bp->b_flags & B_DELWRI) == 0)
418			panic("spec_fsync: not dirty");
419		bremfree(bp);
420		buf_acquire(bp);
421		splx(s);
422		bawrite(bp);
423		goto loop;
424	}
425	if (ap->a_waitfor == MNT_WAIT) {
426		vwaitforio (vp, 0, "spec_fsync", INFSLP);
427
428#ifdef DIAGNOSTIC
429		if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
430			splx(s);
431			vprint("spec_fsync: dirty", vp);
432			goto loop;
433		}
434#endif
435	}
436	splx(s);
437	return (0);
438}
439
440int
441spec_strategy(void *v)
442{
443	struct vop_strategy_args *ap = v;
444	struct buf *bp = ap->a_bp;
445	int maj = major(bp->b_dev);
446
447	(*bdevsw[maj].d_strategy)(bp);
448	return (0);
449}
450
451/*
452 * Device close routine
453 */
454int
455spec_close(void *v)
456{
457	struct vop_close_args *ap = v;
458	struct proc *p = ap->a_p;
459	struct vnode *vp = ap->a_vp;
460	dev_t dev = vp->v_rdev;
461	int (*devclose)(dev_t, int, int, struct proc *);
462	int mode, relock, xlocked, error;
463	int clone = 0;
464
465	mtx_enter(&vnode_mtx);
466	xlocked = (vp->v_lflag & VXLOCK);
467	mtx_leave(&vnode_mtx);
468
469	switch (vp->v_type) {
470	case VCHR:
471		/*
472		 * Hack: a tty device that is a controlling terminal
473		 * has a reference from the session structure.
474		 * We cannot easily tell that a character device is
475		 * a controlling terminal, unless it is the closing
476		 * process' controlling terminal.  In that case,
477		 * if the reference count is 2 (this last descriptor
478		 * plus the session), release the reference from the session.
479		 */
480		if (vcount(vp) == 2 && p != NULL && p->p_p->ps_pgrp &&
481		    vp == p->p_p->ps_pgrp->pg_session->s_ttyvp) {
482			vrele(vp);
483			p->p_p->ps_pgrp->pg_session->s_ttyvp = NULL;
484		}
485		if (cdevsw[major(dev)].d_flags & D_CLONE) {
486			clone = 1;
487		} else {
488			/*
489			 * If the vnode is locked, then we are in the midst
490			 * of forcibly closing the device, otherwise we only
491			 * close on last reference.
492			 */
493			if (vcount(vp) > 1 && !xlocked)
494				return (0);
495		}
496		devclose = cdevsw[major(dev)].d_close;
497		mode = S_IFCHR;
498		break;
499
500	case VBLK:
501		/*
502		 * On last close of a block device (that isn't mounted)
503		 * we must invalidate any in core blocks, so that
504		 * we can, for instance, change floppy disks. In order to do
505		 * that, we must lock the vnode. If we are coming from
506		 * vclean(), the vnode is already locked.
507		 */
508		if (!xlocked)
509			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
510		error = vinvalbuf(vp, V_SAVE, ap->a_cred, p, 0, INFSLP);
511		if (!xlocked)
512			VOP_UNLOCK(vp);
513		if (error)
514			return (error);
515		/*
516		 * We do not want to really close the device if it
517		 * is still in use unless we are trying to close it
518		 * forcibly. Since every use (buffer, vnode, swap, cmap)
519		 * holds a reference to the vnode, and because we mark
520		 * any other vnodes that alias this device, when the
521		 * sum of the reference counts on all the aliased
522		 * vnodes descends to one, we are on last close.
523		 */
524		if (vcount(vp) > 1 && !xlocked)
525			return (0);
526		devclose = bdevsw[major(dev)].d_close;
527		mode = S_IFBLK;
528		break;
529
530	default:
531		panic("spec_close: not special");
532	}
533
534	/* release lock if held and this isn't coming from vclean() */
535	relock = VOP_ISLOCKED(vp) && !xlocked;
536	if (relock)
537		VOP_UNLOCK(vp);
538	error = (*devclose)(dev, ap->a_fflag, mode, p);
539	if (relock)
540		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
541
542	if (error == 0 && clone) {
543		struct vnode *pvp;
544
545		pvp = vp->v_specparent; /* get parent device */
546		clrbit(pvp->v_specbitmap, minor(dev) >> CLONE_SHIFT);
547		vrele(pvp);
548	}
549
550	return (error);
551}
552
553int
554spec_getattr(void *v)
555{
556	struct vop_getattr_args	*ap = v;
557	struct vnode		*vp = ap->a_vp;
558	int			 error;
559
560	if (!(vp->v_flag & VCLONE))
561		return (EBADF);
562
563	vn_lock(vp->v_specparent, LK_EXCLUSIVE|LK_RETRY);
564	error = VOP_GETATTR(vp->v_specparent, ap->a_vap, ap->a_cred, ap->a_p);
565	VOP_UNLOCK(vp->v_specparent);
566
567	return (error);
568}
569
570int
571spec_setattr(void *v)
572{
573	struct vop_getattr_args	*ap = v;
574	struct proc		*p = ap->a_p;
575	struct vnode		*vp = ap->a_vp;
576	int			 error;
577
578	if (!(vp->v_flag & VCLONE))
579		return (EBADF);
580
581	vn_lock(vp->v_specparent, LK_EXCLUSIVE|LK_RETRY);
582	error = VOP_SETATTR(vp->v_specparent, ap->a_vap, ap->a_cred, p);
583	VOP_UNLOCK(vp->v_specparent);
584
585	return (error);
586}
587
588int
589spec_access(void *v)
590{
591	struct vop_access_args	*ap = v;
592	struct vnode		*vp = ap->a_vp;
593	int			 error;
594
595	if (!(vp->v_flag & VCLONE))
596		return (EBADF);
597
598	vn_lock(vp->v_specparent, LK_EXCLUSIVE|LK_RETRY);
599	error = VOP_ACCESS(vp->v_specparent, ap->a_mode, ap->a_cred, ap->a_p);
600	VOP_UNLOCK(vp->v_specparent);
601
602	return (error);
603}
604
605/*
606 * Print out the contents of a special device vnode.
607 */
608int
609spec_print(void *v)
610{
611	struct vop_print_args *ap = v;
612
613	printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
614		minor(ap->a_vp->v_rdev));
615	return 0;
616}
617
618/*
619 * Return POSIX pathconf information applicable to special devices.
620 */
621int
622spec_pathconf(void *v)
623{
624	struct vop_pathconf_args *ap = v;
625	int error = 0;
626
627	switch (ap->a_name) {
628	case _PC_LINK_MAX:
629		*ap->a_retval = LINK_MAX;
630		break;
631	case _PC_MAX_CANON:
632		*ap->a_retval = MAX_CANON;
633		break;
634	case _PC_MAX_INPUT:
635		*ap->a_retval = MAX_INPUT;
636		break;
637	case _PC_CHOWN_RESTRICTED:
638		*ap->a_retval = 1;
639		break;
640	case _PC_VDISABLE:
641		*ap->a_retval = _POSIX_VDISABLE;
642		break;
643	case _PC_TIMESTAMP_RESOLUTION:
644		*ap->a_retval = 1;
645		break;
646	default:
647		error = EINVAL;
648		break;
649	}
650
651	return (error);
652}
653
654/*
655 * Special device advisory byte-level locks.
656 */
657int
658spec_advlock(void *v)
659{
660	struct vop_advlock_args *ap = v;
661	struct vnode *vp = ap->a_vp;
662
663	return (lf_advlock(&vp->v_speclockf, (off_t)0, ap->a_id,
664		ap->a_op, ap->a_fl, ap->a_flags));
665}
666
667/*
668 * Copyright (c) 2006 Pedro Martelletto <pedro@ambientworks.net>
669 * Copyright (c) 2006 Thordur Bjornsson <thib@openbsd.org>
670 *
671 * Permission to use, copy, modify, and distribute this software for any
672 * purpose with or without fee is hereby granted, provided that the above
673 * copyright notice and this permission notice appear in all copies.
674 *
675 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
676 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
677 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
678 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
679 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
680 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
681 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
682 */
683
684#ifdef	CLONE_DEBUG
685#define	DNPRINTF(m...)	do { printf(m);  } while (0)
686#else
687#define	DNPRINTF(m...)	/* nothing */
688#endif
689
690int
691spec_open_clone(struct vop_open_args *ap)
692{
693	struct vnode *cvp, *vp = ap->a_vp;
694	struct cloneinfo *cip;
695	int error, i;
696
697	DNPRINTF("cloning vnode\n");
698
699	if (minor(vp->v_rdev) >= (1 << CLONE_SHIFT))
700		return (ENXIO);
701
702	for (i = 1; i < CLONE_MAPSZ * NBBY; i++)
703		if (isclr(vp->v_specbitmap, i)) {
704			setbit(vp->v_specbitmap, i);
705			break;
706		}
707
708	if (i == CLONE_MAPSZ * NBBY)
709		return (EBUSY); /* too many open instances */
710
711	error = cdevvp(makedev(major(vp->v_rdev),
712	    (i << CLONE_SHIFT) | minor(vp->v_rdev)), &cvp);
713	if (error) {
714		clrbit(vp->v_specbitmap, i);
715		return (error); /* out of vnodes */
716	}
717
718	VOP_UNLOCK(vp);
719
720	error = cdevsw[major(vp->v_rdev)].d_open(cvp->v_rdev, ap->a_mode,
721	    S_IFCHR, ap->a_p);
722
723	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
724
725	if (error) {
726		vput(cvp);
727		clrbit(vp->v_specbitmap, i);
728		return (error); /* device open failed */
729	}
730
731	cvp->v_flag |= VCLONE;
732
733	cip = malloc(sizeof(struct cloneinfo), M_TEMP, M_WAITOK);
734	cip->ci_data = vp->v_data;
735	cip->ci_vp = cvp;
736
737	cvp->v_specparent = vp;
738	vp->v_flag |= VCLONED;
739	vp->v_data = cip;
740
741	DNPRINTF("clone of vnode %p is vnode %p\n", vp, cvp);
742
743	return (0); /* device cloned */
744}
745