driver.c revision 7656:2621e50fdf4a
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <sys/types.h>
27#include <sys/t_lock.h>
28#include <sys/param.h>
29#include <sys/conf.h>
30#include <sys/systm.h>
31#include <sys/sysmacros.h>
32#include <sys/buf.h>
33#include <sys/cred.h>
34#include <sys/user.h>
35#include <sys/stat.h>
36#include <sys/uio.h>
37#include <sys/vnode.h>
38#include <sys/fs/snode.h>
39#include <sys/open.h>
40#include <sys/kmem.h>
41#include <sys/file.h>
42#include <sys/debug.h>
43#include <sys/tnf_probe.h>
44
45/* Don't #include <sys/ddi.h> - it #undef's getmajor() */
46
47#include <sys/sunddi.h>
48#include <sys/sunndi.h>
49#include <sys/sunpm.h>
50#include <sys/ddi_impldefs.h>
51#include <sys/ndi_impldefs.h>
52#include <sys/esunddi.h>
53#include <sys/autoconf.h>
54#include <sys/modctl.h>
55#include <sys/epm.h>
56#include <sys/dacf.h>
57#include <sys/sunmdi.h>
58#include <sys/instance.h>
59#include <sys/sdt.h>
60
61static void i_attach_ctlop(dev_info_t *, ddi_attach_cmd_t, ddi_pre_post_t, int);
62static void i_detach_ctlop(dev_info_t *, ddi_detach_cmd_t, ddi_pre_post_t, int);
63
64/* decide what to do when a double dev_lclose is detected */
65#ifdef	DEBUG
66int		dev_lclose_ce = CE_PANIC;
67#else	/* DEBUG */
68int		dev_lclose_ce = CE_WARN;
69#endif	/* DEBUG */
70
71/*
72 * Configuration-related entry points for nexus and leaf drivers
73 */
74int
75devi_identify(dev_info_t *devi)
76{
77	struct dev_ops *ops;
78	int (*fn)(dev_info_t *);
79
80	if ((ops = ddi_get_driver(devi)) == NULL ||
81	    (fn = ops->devo_identify) == NULL)
82		return (-1);
83
84	return ((*fn)(devi));
85}
86
87int
88devi_probe(dev_info_t *devi)
89{
90	int rv, probe_failed;
91	pm_ppm_cookie_t ppm_cookie;
92	struct dev_ops *ops;
93	int (*fn)(dev_info_t *);
94
95	ops = ddi_get_driver(devi);
96	ASSERT(ops);
97
98	pm_pre_probe(devi, &ppm_cookie);
99
100	/*
101	 * probe(9E) in 2.0 implies that you can get
102	 * away with not writing one of these .. so we
103	 * pretend we're 'nulldev' if we don't find one (sigh).
104	 */
105	if ((fn = ops->devo_probe) == NULL)
106		rv = DDI_PROBE_DONTCARE;
107	else
108		rv = (*fn)(devi);
109
110	switch (rv) {
111	case DDI_PROBE_DONTCARE:
112	case DDI_PROBE_SUCCESS:
113		probe_failed = 0;
114		break;
115	default:
116		probe_failed = 1;
117		break;
118	}
119	pm_post_probe(&ppm_cookie, rv, probe_failed);
120
121	return (rv);
122}
123
124
125/*
126 * devi_attach()
127 * 	attach a device instance to the system if the driver supplies an
128 * 	attach(9E) entrypoint.
129 */
130int
131devi_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
132{
133	struct dev_ops *ops;
134	int error;
135	int (*fn)(dev_info_t *, ddi_attach_cmd_t);
136	pm_ppm_cookie_t pc;
137
138	if ((error = mdi_pre_attach(devi, cmd)) != DDI_SUCCESS) {
139		return (error);
140	}
141
142	pm_pre_attach(devi, &pc, cmd);
143
144	if ((cmd == DDI_RESUME || cmd == DDI_PM_RESUME) &&
145	    e_ddi_parental_suspend_resume(devi)) {
146		error = e_ddi_resume(devi, cmd);
147		goto done;
148	}
149	ops = ddi_get_driver(devi);
150	ASSERT(ops);
151	if ((fn = ops->devo_attach) == NULL) {
152		error = DDI_FAILURE;
153		goto done;
154	}
155
156	/*
157	 * Call the driver's attach(9e) entrypoint
158	 */
159	i_attach_ctlop(devi, cmd, DDI_PRE, 0);
160	error = (*fn)(devi, cmd);
161	i_attach_ctlop(devi, cmd, DDI_POST, error);
162
163done:
164	pm_post_attach(&pc, error);
165	mdi_post_attach(devi, cmd, error);
166
167	return (error);
168}
169
170/*
171 * devi_detach()
172 * 	detach a device instance from the system if the driver supplies a
173 * 	detach(9E) entrypoint.
174 */
175int
176devi_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
177{
178	struct dev_ops *ops;
179	int error;
180	int (*fn)(dev_info_t *, ddi_detach_cmd_t);
181	pm_ppm_cookie_t pc;
182
183	ASSERT(cmd == DDI_SUSPEND || cmd == DDI_PM_SUSPEND ||
184	    cmd == DDI_DETACH);
185
186	if ((cmd == DDI_SUSPEND || cmd == DDI_PM_SUSPEND) &&
187	    e_ddi_parental_suspend_resume(devi)) {
188		return (e_ddi_suspend(devi, cmd));
189	}
190	ops = ddi_get_driver(devi);
191	ASSERT(ops);
192	if ((fn = ops->devo_detach) == NULL)
193		return (DDI_FAILURE);
194
195	if ((error = mdi_pre_detach(devi, cmd)) != DDI_SUCCESS) {
196		return (error);
197	}
198	i_detach_ctlop(devi, cmd, DDI_PRE, 0);
199	pm_pre_detach(devi, cmd, &pc);
200
201	/*
202	 * Call the driver's detach routine
203	 */
204	error = (*fn)(devi, cmd);
205
206	pm_post_detach(&pc, error);
207	i_detach_ctlop(devi, cmd, DDI_POST, error);
208	mdi_post_detach(devi, cmd, error);
209
210	return (error);
211}
212
213static void
214i_attach_ctlop(dev_info_t *devi, ddi_attach_cmd_t cmd, ddi_pre_post_t w,
215    int ret)
216{
217	int error;
218	struct attachspec as;
219	dev_info_t *pdip = ddi_get_parent(devi);
220
221	as.cmd = cmd;
222	as.when = w;
223	as.pdip = pdip;
224	as.result = ret;
225	(void) ddi_ctlops(devi, devi, DDI_CTLOPS_ATTACH, &as, &error);
226}
227
228static void
229i_detach_ctlop(dev_info_t *devi, ddi_detach_cmd_t cmd, ddi_pre_post_t w,
230    int ret)
231{
232	int error;
233	struct detachspec ds;
234	dev_info_t *pdip = ddi_get_parent(devi);
235
236	ds.cmd = cmd;
237	ds.when = w;
238	ds.pdip = pdip;
239	ds.result = ret;
240	(void) ddi_ctlops(devi, devi, DDI_CTLOPS_DETACH, &ds, &error);
241}
242
243/*
244 * This entry point not defined by Solaris 2.0 DDI/DKI, so
245 * its inclusion here is somewhat moot.
246 */
247int
248devi_reset(dev_info_t *devi, ddi_reset_cmd_t cmd)
249{
250	struct dev_ops *ops;
251	int (*fn)(dev_info_t *, ddi_reset_cmd_t);
252
253	if ((ops = ddi_get_driver(devi)) == NULL ||
254	    (fn = ops->devo_reset) == NULL)
255		return (DDI_FAILURE);
256
257	return ((*fn)(devi, cmd));
258}
259
260int
261devi_quiesce(dev_info_t *devi)
262{
263	struct dev_ops *ops;
264	int (*fn)(dev_info_t *);
265
266	if (((ops = ddi_get_driver(devi)) == NULL) ||
267	    (ops->devo_rev < 4) || ((fn = ops->devo_quiesce) == NULL))
268		return (DDI_FAILURE);
269
270	return ((*fn)(devi));
271}
272
273/*
274 * Leaf driver entry points. The following [cb]dev_* functions are *not* part
275 * of the DDI, please use functions defined in <sys/sunldi.h> and driver_lyr.c.
276 */
277int
278dev_open(dev_t *devp, int flag, int type, struct cred *cred)
279{
280	struct cb_ops   *cb;
281
282	cb = devopsp[getmajor(*devp)]->devo_cb_ops;
283	return ((*cb->cb_open)(devp, flag, type, cred));
284}
285
286int
287dev_close(dev_t dev, int flag, int type, struct cred *cred)
288{
289	struct cb_ops   *cb;
290
291	cb = (devopsp[getmajor(dev)])->devo_cb_ops;
292	return ((*cb->cb_close)(dev, flag, type, cred));
293}
294
295/*
296 * New Leaf driver open entry point.  We make a vnode and go through specfs
297 * in order to obtain open close exclusions guarantees.  Note that we drop
298 * OTYP_LYR if it was specified - we are going through specfs and it provides
299 * last close semantics (FKLYR is provided to open(9E)).  Also, since
300 * spec_open will drive attach via e_ddi_hold_devi_by_dev for a makespecvp
301 * vnode with no SDIP_SET on the common snode, the dev_lopen caller no longer
302 * needs to call ddi_hold_installed_driver.
303 */
304int
305dev_lopen(dev_t *devp, int flag, int otype, struct cred *cred)
306{
307	struct vnode	*vp;
308	int		error;
309	struct vnode	*cvp;
310
311	vp = makespecvp(*devp, (otype == OTYP_BLK) ? VBLK : VCHR);
312	error = VOP_OPEN(&vp, flag | FKLYR, cred, NULL);
313	if (error == 0) {
314		/* Pick up the (possibly) new dev_t value. */
315		*devp = vp->v_rdev;
316
317		/*
318		 * Place extra hold on the common vnode, which contains the
319		 * open count, so that it is not destroyed by the VN_RELE of
320		 * the shadow makespecvp vnode below.
321		 */
322		cvp = STOV(VTOCS(vp));
323		VN_HOLD(cvp);
324	}
325
326	/* release the shadow makespecvp vnode. */
327	VN_RELE(vp);
328	return (error);
329}
330
331/*
332 * Leaf driver close entry point.  We make a vnode and go through specfs in
333 * order to obtain open close exclusions guarantees.  Note that we drop
334 * OTYP_LYR if it was specified - we are going through specfs and it provides
335 * last close semantics (FLKYR is provided to close(9E)).
336 */
337int
338dev_lclose(dev_t dev, int flag, int otype, struct cred *cred)
339{
340	struct vnode	*vp;
341	int		error;
342	struct vnode	*cvp;
343	char		*funcname;
344	ulong_t		offset;
345
346	vp = makespecvp(dev, (otype == OTYP_BLK) ? VBLK : VCHR);
347	error = VOP_CLOSE(vp, flag | FKLYR, 1, (offset_t)0, cred, NULL);
348
349	/*
350	 * Release the extra dev_lopen hold on the common vnode. We inline a
351	 * VN_RELE(cvp) call so that we can detect more dev_lclose calls than
352	 * dev_lopen calls without panic. See vn_rele.  If our inline of
353	 * vn_rele called VOP_INACTIVE(cvp, CRED(), ...) we would panic on the
354	 * "release the makespecvp vnode" VN_RELE(vp) that follows  - so
355	 * instead we diagnose this situation.  Note that the driver has
356	 * still seen a double close(9E), but that would have occurred with
357	 * the old dev_close implementation too.
358	 */
359	cvp = STOV(VTOCS(vp));
360	mutex_enter(&cvp->v_lock);
361	switch (cvp->v_count) {
362	default:
363		cvp->v_count--;
364		break;
365
366	case 0:
367		VTOS(vp)->s_commonvp = NULL;	/* avoid panic */
368		/*FALLTHROUGH*/
369	case 1:
370		/*
371		 * The following message indicates a serious problem in the
372		 * identified driver, the driver should be fixed. If obtaining
373		 * a panic dump is needed to diagnose the driver problem then
374		 * adding "set dev_lclose_ce=3" to /etc/system will cause a
375		 * panic when this occurs.
376		 */
377		funcname = modgetsymname((uintptr_t)caller(), &offset);
378		cmn_err(dev_lclose_ce, "dev_lclose: extra close of dev_t 0x%lx "
379		    "from %s`%s()", dev, mod_containing_pc(caller()),
380		    funcname ? funcname : "unknown...");
381		break;
382	}
383	mutex_exit(&cvp->v_lock);
384
385	/* release the makespecvp vnode. */
386	VN_RELE(vp);
387	return (error);
388}
389
390/*
391 * Returns -1 or the instance number of the given dev_t as
392 * interpreted by the device driver.  The code may load the driver
393 * but it does not attach any instances.
394 *
395 * Instance is supposed to be a int but drivers have assumed that
396 * the pointer was a pointer to "void *" instead of a pointer to
397 * "int *" so we now explicitly pass a pointer to "void *" and then
398 * cast the result to an int when returning the value.
399 */
400int
401dev_to_instance(dev_t dev)
402{
403	major_t		major = getmajor(dev);
404	struct dev_ops	*ops;
405	void		*vinstance;
406	int		error;
407
408	/* verify that the major number is reasonable and driver is loaded */
409	if ((major >= devcnt) ||
410	    ((ops = mod_hold_dev_by_major(major)) == NULL))
411		return (-1);
412	ASSERT(CB_DRV_INSTALLED(ops));
413
414	/* verify that it supports the getinfo(9E) entry point */
415	if (ops->devo_getinfo == NULL) {
416		mod_rele_dev_by_major(major);
417		return (-1);
418	}
419
420	/* ask the driver to extract the instance number from the devt */
421	error = (*ops->devo_getinfo)(NULL, DDI_INFO_DEVT2INSTANCE,
422	    (void *)dev, &vinstance);
423
424	/* release the driver */
425	mod_rele_dev_by_major(major);
426
427	if (error != DDI_SUCCESS)
428		return (-1);
429
430	return ((int)(uintptr_t)vinstance);
431}
432
433static void
434bdev_strategy_tnf_probe(struct buf *bp)
435{
436	/* Kernel probe */
437	TNF_PROBE_5(strategy, "io blockio", /* CSTYLED */,
438	    tnf_device, device, bp->b_edev,
439	    tnf_diskaddr, block, bp->b_lblkno,
440	    tnf_size, size, bp->b_bcount,
441	    tnf_opaque, buf, bp,
442	    tnf_bioflags, flags, bp->b_flags);
443}
444
445int
446bdev_strategy(struct buf *bp)
447{
448	struct dev_ops *ops;
449
450	ops = devopsp[getmajor(bp->b_edev)];
451
452	/*
453	 * Before we hit the io:::start probe, we need to fill in the b_dip
454	 * field of the buf structure.  This should be -- for the most part --
455	 * incredibly cheap.  If you're in this code looking to bum cycles,
456	 * there is almost certainly bigger game further down the I/O path...
457	 */
458	(void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO,
459	    (void *)bp->b_edev, (void **)&bp->b_dip);
460
461	DTRACE_IO1(start, struct buf *, bp);
462	bp->b_flags |= B_STARTED;
463
464	/*
465	 * Call the TNF probe here instead of the inline code
466	 * to force our compiler to use the tail call optimization.
467	 */
468	bdev_strategy_tnf_probe(bp);
469
470	return (ops->devo_cb_ops->cb_strategy(bp));
471}
472
473int
474bdev_print(dev_t dev, caddr_t str)
475{
476	struct cb_ops	*cb;
477
478	cb = devopsp[getmajor(dev)]->devo_cb_ops;
479	return ((*cb->cb_print)(dev, str));
480}
481
482/*
483 * Return number of DEV_BSIZE byte blocks.
484 */
485int
486bdev_size(dev_t dev)
487{
488	uint_t		nblocks;
489	uint_t		blksize;
490
491	if ((nblocks = e_ddi_getprop(dev, VBLK, "nblocks",
492	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, -1)) == -1)
493		return (-1);
494
495	/* Get blksize, default to DEV_BSIZE */
496	if ((blksize = e_ddi_getprop(dev, VBLK, "blksize",
497	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, -1)) == -1)
498		blksize = e_ddi_getprop(DDI_DEV_T_ANY, VBLK, "device-blksize",
499		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, DEV_BSIZE);
500
501	if (blksize >= DEV_BSIZE)
502		return (nblocks * (blksize / DEV_BSIZE));
503	else
504		return (nblocks / (DEV_BSIZE / blksize));
505}
506
507/*
508 * Same for 64-bit Nblocks property
509 */
510uint64_t
511bdev_Size(dev_t dev)
512{
513	uint64_t	nblocks;
514	uint_t		blksize;
515
516	if ((nblocks = e_ddi_getprop_int64(dev, VBLK, "Nblocks",
517	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, -1)) == -1)
518		return (-1);
519
520	/* Get blksize, default to DEV_BSIZE */
521	if ((blksize = e_ddi_getprop(dev, VBLK, "blksize",
522	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, -1)) == -1)
523		blksize = e_ddi_getprop(DDI_DEV_T_ANY, VBLK, "device-blksize",
524		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, DEV_BSIZE);
525
526	if (blksize >= DEV_BSIZE)
527		return (nblocks * (blksize / DEV_BSIZE));
528	else
529		return (nblocks / (DEV_BSIZE / blksize));
530}
531
532int
533bdev_dump(dev_t dev, caddr_t addr, daddr_t blkno, int blkcnt)
534{
535	struct cb_ops	*cb;
536
537	cb = devopsp[getmajor(dev)]->devo_cb_ops;
538	return ((*cb->cb_dump)(dev, addr, blkno, blkcnt));
539}
540
541int
542cdev_read(dev_t dev, struct uio *uiop, struct cred *cred)
543{
544	struct cb_ops	*cb;
545
546	cb = devopsp[getmajor(dev)]->devo_cb_ops;
547	return ((*cb->cb_read)(dev, uiop, cred));
548}
549
550int
551cdev_write(dev_t dev, struct uio *uiop, struct cred *cred)
552{
553	struct cb_ops	*cb;
554
555	cb = devopsp[getmajor(dev)]->devo_cb_ops;
556	return ((*cb->cb_write)(dev, uiop, cred));
557}
558
559int
560cdev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, struct cred *cred,
561    int *rvalp)
562{
563	struct cb_ops	*cb;
564
565	cb = devopsp[getmajor(dev)]->devo_cb_ops;
566	return ((*cb->cb_ioctl)(dev, cmd, arg, mode, cred, rvalp));
567}
568
569int
570cdev_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
571	size_t *maplen, uint_t mode)
572{
573	struct cb_ops	*cb;
574
575	cb = devopsp[getmajor(dev)]->devo_cb_ops;
576	return ((*cb->cb_devmap)(dev, dhp, off, len, maplen, mode));
577}
578
579int
580cdev_mmap(int (*mapfunc)(dev_t, off_t, int), dev_t dev, off_t off, int prot)
581{
582	return ((*mapfunc)(dev, off, prot));
583}
584
585int
586cdev_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
587	    uint_t prot, uint_t maxprot, uint_t flags, cred_t *credp)
588{
589	struct cb_ops	*cb;
590
591	cb = devopsp[getmajor(dev)]->devo_cb_ops;
592	return ((*cb->cb_segmap)(dev, off, as, addrp,
593	    len, prot, maxprot, flags, credp));
594}
595
596int
597cdev_poll(dev_t dev, short events, int anyyet, short *reventsp,
598	struct pollhead **pollhdrp)
599{
600	struct cb_ops	*cb;
601
602	cb = devopsp[getmajor(dev)]->devo_cb_ops;
603	return ((*cb->cb_chpoll)(dev, events, anyyet, reventsp, pollhdrp));
604}
605
606/*
607 * A 'size' property can be provided by a VCHR device.
608 *
609 * Since it's defined as zero for STREAMS devices, so we avoid the
610 * overhead of looking it up.  Note also that we don't force an
611 * unused driver into memory simply to ask about it's size.  We also
612 * don't bother to ask it its size unless it's already been attached
613 * (the attach routine is the earliest place the property will be created)
614 *
615 * XXX	In an ideal world, we'd call this at VOP_GETATTR() time.
616 */
617int
618cdev_size(dev_t dev)
619{
620	major_t maj;
621	struct devnames *dnp;
622
623	if ((maj = getmajor(dev)) >= devcnt)
624		return (0);
625
626	dnp = &(devnamesp[maj]);
627	LOCK_DEV_OPS(&dnp->dn_lock);
628	if (devopsp[maj] && devopsp[maj]->devo_cb_ops &&
629	    !devopsp[maj]->devo_cb_ops->cb_str) {
630		UNLOCK_DEV_OPS(&dnp->dn_lock);
631		return (e_ddi_getprop(dev, VCHR, "size",
632		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, 0));
633	}
634	UNLOCK_DEV_OPS(&dnp->dn_lock);
635	return (0);
636}
637
638/*
639 * same for 64-bit Size property
640 */
641uint64_t
642cdev_Size(dev_t dev)
643{
644	major_t maj;
645	struct devnames *dnp;
646
647	if ((maj = getmajor(dev)) >= devcnt)
648		return (0);
649
650	dnp = &(devnamesp[maj]);
651	LOCK_DEV_OPS(&dnp->dn_lock);
652	if (devopsp[maj] && devopsp[maj]->devo_cb_ops &&
653	    !devopsp[maj]->devo_cb_ops->cb_str) {
654		UNLOCK_DEV_OPS(&dnp->dn_lock);
655		return (e_ddi_getprop_int64(dev, VCHR, "Size",
656		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, 0));
657	}
658	UNLOCK_DEV_OPS(&dnp->dn_lock);
659	return (0);
660}
661
662/*
663 * XXX	This routine is poorly named, because block devices can and do
664 *	have properties (see bdev_size() above).
665 *
666 * XXX	fix the comment in devops.h that claims that cb_prop_op
667 *	is character-only.
668 */
669int
670cdev_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
671    char *name, caddr_t valuep, int *lengthp)
672{
673	struct cb_ops	*cb;
674
675	if ((cb = devopsp[DEVI(dip)->devi_major]->devo_cb_ops) == NULL)
676		return (DDI_PROP_NOT_FOUND);
677
678	return ((*cb->cb_prop_op)(dev, dip, prop_op, mod_flags,
679	    name, valuep, lengthp));
680}
681