drcompat.c revision 7862:f8b6a07acfd6
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * Standard module for handling DLPI Style 2 attach/detach
28 */
29
30#include <sys/types.h>
31#include <sys/conf.h>
32#include <sys/modctl.h>
33#include <sys/cmn_err.h>
34#include <sys/sunddi.h>
35#include <sys/esunddi.h>
36#include <sys/strsubr.h>
37#include <sys/ddi.h>
38#include <sys/dlpi.h>
39#include <sys/strsun.h>
40#include <sys/policy.h>
41
42static struct streamtab drstab;
43
44static struct fmodsw fsw = {
45	DRMODNAME,
46	&drstab,
47	D_MP
48};
49
50
51/*
52 * Module linkage information for the kernel.
53 */
54
55static struct modlstrmod modlstrmod = {
56	&mod_strmodops, "dr compatibility for DLPI style 2 drivers", &fsw
57};
58
59
60static struct modlinkage modlinkage = {
61	MODREV_1, &modlstrmod, NULL
62};
63
64
65int
66_init(void)
67{
68	return (mod_install(&modlinkage));
69}
70
71int
72_fini(void)
73{
74	return (mod_remove(&modlinkage));
75}
76
77int
78_info(struct modinfo *modinfop)
79{
80	return (mod_info(&modlinkage, modinfop));
81}
82
83
84static int	dropen(queue_t *, dev_t *, int, int, cred_t *);
85static int	drclose(queue_t *, int, cred_t *);
86static int	drrput(queue_t *, mblk_t *);
87static int	drwput(queue_t *, mblk_t *);
88
89static struct module_info drinfo = {
90	0,
91	DRMODNAME,
92	0,
93	INFPSZ,
94	1,
95	0
96};
97
98static struct qinit drrinit = {
99	(int (*)())drrput,
100	NULL,
101	dropen,
102	drclose,
103	NULL,
104	&drinfo
105};
106
107static struct qinit drwinit = {
108	(int (*)())drwput,
109	NULL,
110	NULL,
111	NULL,
112	NULL,
113	&drinfo
114};
115
116static struct streamtab drstab = {
117	&drrinit,
118	&drwinit,
119	NULL,
120	NULL
121};
122
123/*
124 * This module is pushed directly on top of the bottom driver
125 * in a DLPI style-2 stream by stropen(). It intercepts
126 * DL_ATTACH_REQ/DL_DETACH_REQ messages on the write side
127 * and acks on the read side, calls qassociate where needed.
128 * The primary purpose is to workaround a DR race condition
129 * affecting non-DDI compliant DLPI style 2 drivers, which may
130 * cause the system to panic.
131 *
132 * The following action is taken:
133 * Write side (drwput):
134 *	attach request:	hold driver instance assuming ppa == instance.
135 *		This way, the instance cannot be detached while the
136 *		driver is processing DL_ATTACH_REQ.
137 *
138 *		On a successful hold, store the dip in a ring buffer
139 *		to be processed lated by the read side.
140 *		If hold fails (most likely ppa != instance), we store
141 *		NULL in the ring buffer and read side won't take
142 *		any action on ack.
143 *
144 * Read side (drrput):
145 *	attach success: if (dip held on write side) associate queue with dip
146 *	attach failure:	if (dip held on write side) release hold on dip
147 *	detach success: associate queue with NULL
148 *	detach failure:	do nothing
149 *
150 * The module assumes that incoming DL_ATTACH_REQ/DL_DETACH_REQ
151 * messages are ordered (non-concurrent) and the bottom
152 * driver processes them and sends acknowledgements in the same
153 * order. This assumption is reasonable because concurrent
154 * association results in non-deterministic queue behavior.
155 * The module is coded carefully such that unordered messages
156 * do not result in a system panic.
157 *
158 * The module handles multiple outstanding messages queued
159 * in the bottom driver. Messages processed on the write side
160 * but not yet arrived at read side are placed in the ring buffer
161 * dr_dip[], between dr_nfirst and dr_nlast. The write side is
162 * producer and the read side is the consumer. The buffer is full
163 * when dr_nfirst == dr_nlast.
164 *
165 * The current size of the ring buffer is 64 (MAX_DLREQS) per stream.
166 * During normal testing, we have not seen outstanding messages
167 * above 10.
168 */
169
170#define	MAX_DLREQS	64
171#define	INCR(x)		{(x)++; if ((x) >= MAX_DLREQS) (x) = 0; }
172
173struct drstate {
174	kmutex_t dr_lock;
175	major_t dr_major;
176	int dr_nfirst;
177	int dr_nlast;
178	dev_info_t *dr_dip[MAX_DLREQS];
179};
180
181/* ARGSUSED1 */
182static int
183dropen(queue_t *q, dev_t *devp, int oflag, int sflag, cred_t *crp)
184{
185	struct drstate *dsp;
186
187	if (sflag != MODOPEN) {	/* must be a pushed module */
188		return (EINVAL);
189	}
190
191	if (secpolicy_net_rawaccess(crp) != 0) {
192		return (EPERM);
193	}
194
195	if (q->q_ptr != NULL) {
196		return (0);	/* already open */
197	}
198
199	dsp = kmem_zalloc(sizeof (*dsp), KM_SLEEP);
200	dsp->dr_major = getmajor(*devp);
201	mutex_init(&dsp->dr_lock, NULL, MUTEX_DEFAULT, NULL);
202	q->q_ptr = OTHERQ(q)->q_ptr = dsp;
203	qprocson(q);
204	ddi_assoc_queue_with_devi(q, NULL);
205	return (0);
206}
207
208/* ARGSUSED1 */
209static int
210drclose(queue_t *q, int cflag, cred_t *crp)
211{
212	struct drstate *dsp = q->q_ptr;
213
214	ASSERT(dsp);
215	ddi_assoc_queue_with_devi(q, NULL);
216	qprocsoff(q);
217
218	mutex_destroy(&dsp->dr_lock);
219	kmem_free(dsp, sizeof (*dsp));
220	q->q_ptr = NULL;
221
222	return (0);
223}
224
225static int
226drrput(queue_t *q, mblk_t *mp)
227{
228	struct drstate *dsp;
229	union DL_primitives *dlp;
230	dev_info_t *dip;
231
232	switch (DB_TYPE(mp)) {
233	case M_PROTO:
234	case M_PCPROTO:
235		break;
236	default:
237		putnext(q, mp);
238		return (0);
239	}
240
241	/* make sure size is sufficient for dl_primitive */
242	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
243		putnext(q, mp);
244		return (0);
245	}
246
247	dlp = (union DL_primitives *)mp->b_rptr;
248	switch (dlp->dl_primitive) {
249	case DL_OK_ACK: {
250		/* check for proper size, let upper layer deal with error */
251		if (MBLKL(mp) < DL_OK_ACK_SIZE) {
252			putnext(q, mp);
253			return (0);
254		}
255
256		dsp = q->q_ptr;
257		switch (dlp->ok_ack.dl_correct_primitive) {
258		case DL_ATTACH_REQ:
259			/*
260			 * ddi_assoc_queue_with_devi() will hold dip,
261			 * so release after association.
262			 *
263			 * dip is NULL means we didn't hold dip on read side.
264			 * (unlikely, but possible), so we do nothing.
265			 */
266			mutex_enter(&dsp->dr_lock);
267			dip = dsp->dr_dip[dsp->dr_nlast];
268			dsp->dr_dip[dsp->dr_nlast] = NULL;
269			INCR(dsp->dr_nlast);
270			mutex_exit(&dsp->dr_lock);
271			if (dip) {
272				ddi_assoc_queue_with_devi(q, dip);
273				ddi_release_devi(dip);
274			}
275			break;
276
277		case DL_DETACH_REQ:
278			ddi_assoc_queue_with_devi(q, NULL);
279			break;
280		default:
281			break;
282		}
283		break;
284	}
285	case DL_ERROR_ACK:
286		if (dlp->error_ack.dl_error_primitive != DL_ATTACH_REQ)
287			break;
288
289		dsp = q->q_ptr;
290		mutex_enter(&dsp->dr_lock);
291		dip = dsp->dr_dip[dsp->dr_nlast];
292		dsp->dr_dip[dsp->dr_nlast] = NULL;
293		INCR(dsp->dr_nlast);
294		mutex_exit(&dsp->dr_lock);
295		/*
296		 * Release dip on attach failure
297		 */
298		if (dip) {
299			ddi_release_devi(dip);
300		}
301		break;
302	default:
303		break;
304	}
305
306	putnext(q, mp);
307	return (0);
308}
309
310/*
311 * Detect dl attach, hold the dip to prevent it from detaching
312 */
313static int
314drwput(queue_t *q, mblk_t *mp)
315{
316	struct drstate *dsp;
317	union DL_primitives *dlp;
318	dev_info_t *dip;
319
320	switch (DB_TYPE(mp)) {
321	case M_PROTO:
322	case M_PCPROTO:
323		break;
324	default:
325		putnext(q, mp);
326		return (0);
327	}
328
329	/* make sure size is sufficient for dl_primitive */
330	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
331		putnext(q, mp);
332		return (0);
333	}
334
335	dlp = (union DL_primitives *)mp->b_rptr;
336	switch (dlp->dl_primitive) {
337	case DL_ATTACH_REQ:
338		/*
339		 * Check for proper size of the message.
340		 *
341		 * If size is correct, get the ppa and attempt to
342		 * hold the device assuming ppa is instance.
343		 *
344		 * If size is wrong, we can't get the ppa, but
345		 * still increment dr_nfirst because the read side
346		 * will get a error ack on DL_ATTACH_REQ.
347		 */
348		dip = NULL;
349		dsp = q->q_ptr;
350		if (MBLKL(mp) >= DL_OK_ACK_SIZE) {
351			dip = ddi_hold_devi_by_instance(dsp->dr_major,
352			    dlp->attach_req.dl_ppa, E_DDI_HOLD_DEVI_NOATTACH);
353		}
354
355		mutex_enter(&dsp->dr_lock);
356		dsp->dr_dip[dsp->dr_nfirst] = dip;
357		INCR(dsp->dr_nfirst);
358		/*
359		 * Check if ring buffer is full. If so, assert in debug
360		 * kernel and produce a warning in non-debug kernel.
361		 */
362		ASSERT(dsp->dr_nfirst != dsp->dr_nlast);
363		if (dsp->dr_nfirst == dsp->dr_nlast) {
364			cmn_err(CE_WARN, "drcompat: internal buffer full");
365		}
366		mutex_exit(&dsp->dr_lock);
367		break;
368	default:
369		break;
370	}
371
372	putnext(q, mp);
373	return (0);
374}
375