1145519Sdarrenr/*-
2145510Sdarrenr * Copyright (c) 2012 The FreeBSD Foundation
322514Sdarrenr * All rights reserved.
4255332Scy *
522514Sdarrenr * This software was developed by Edward Tomasz Napierala under sponsorship
680486Sdarrenr * from the FreeBSD Foundation.
7145510Sdarrenr *
8255332Scy * Redistribution and use in source and binary forms, with or without
922514Sdarrenr * modification, are permitted provided that the following conditions
1022514Sdarrenr * are met:
1126119Sdarrenr * 1. Redistributions of source code must retain the above copyright
1226119Sdarrenr *    notice, this list of conditions and the following disclaimer.
1326119Sdarrenr * 2. Redistributions in binary form must reproduce the above copyright
1453024Sguido *    notice, this list of conditions and the following disclaimer in the
1553024Sguido *    documentation and/or other materials provided with the distribution.
1653024Sguido *
1753024Sguido * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1853024Sguido * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1953024Sguido * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2053024Sguido * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2124583Sdarrenr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2222514Sdarrenr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2353024Sguido * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2424583Sdarrenr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2553024Sguido * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2622514Sdarrenr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2724583Sdarrenr * SUCH DAMAGE.
2824583Sdarrenr *
29255332Scy */
30145510Sdarrenr
3122514Sdarrenr/*
3224583Sdarrenr * iSCSI Common Layer.  It's used by both the initiator and target to send
33145510Sdarrenr * and receive iSCSI PDUs.
34145510Sdarrenr */
3553024Sguido
3653024Sguido#include <sys/cdefs.h>
3753024Sguido__FBSDID("$FreeBSD: stable/10/sys/dev/iscsi/icl.c 307378 2016-10-15 17:39:40Z mav $");
3853024Sguido
3953024Sguido#include <sys/param.h>
4026119Sdarrenr#include <sys/capsicum.h>
41#include <sys/condvar.h>
42#include <sys/conf.h>
43#include <sys/file.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/lock.h>
47#include <sys/mbuf.h>
48#include <sys/mutex.h>
49#include <sys/module.h>
50#include <sys/protosw.h>
51#include <sys/socket.h>
52#include <sys/socketvar.h>
53#include <sys/sysctl.h>
54#include <sys/systm.h>
55#include <sys/sx.h>
56#include <sys/uio.h>
57#include <vm/uma.h>
58#include <netinet/in.h>
59#include <netinet/tcp.h>
60
61#include <dev/iscsi/icl.h>
62#include <dev/iscsi/iscsi_proto.h>
63
64SYSCTL_NODE(_kern, OID_AUTO, icl, CTLFLAG_RD, 0, "iSCSI Common Layer");
65static int debug = 1;
66TUNABLE_INT("kern.icl.debug", &debug);
67SYSCTL_INT(_kern_icl, OID_AUTO, debug, CTLFLAG_RWTUN,
68    &debug, 0, "Enable debug messages");
69static int coalesce = 1;
70TUNABLE_INT("kern.icl.coalesce", &coalesce);
71SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN,
72    &coalesce, 0, "Try to coalesce PDUs before sending");
73static int partial_receive_len = 128 * 1024;
74TUNABLE_INT("kern.icl.partial_receive_len", &partial_receive_len);
75SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
76    &partial_receive_len, 0, "Minimum read size for partially received "
77    "data segment");
78static int sendspace = 1048576;
79TUNABLE_INT("kern.icl.sendspace", &sendspace);
80SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN,
81    &sendspace, 0, "Default send socket buffer size");
82static int recvspace = 1048576;
83TUNABLE_INT("kern.icl.recvspace", &recvspace);
84SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN,
85    &recvspace, 0, "Default receive socket buffer size");
86
87static uma_zone_t icl_conn_zone;
88static uma_zone_t icl_pdu_zone;
89
90static volatile u_int	icl_ncons;
91
92#define	ICL_DEBUG(X, ...)						\
93	do {								\
94		if (debug > 1)						\
95			printf("%s: " X "\n", __func__, ## __VA_ARGS__);\
96	} while (0)
97
98#define	ICL_WARN(X, ...)						\
99	do {								\
100		if (debug > 0) {					\
101			printf("WARNING: %s: " X "\n",			\
102			    __func__, ## __VA_ARGS__);			\
103		}							\
104	} while (0)
105
106#define ICL_CONN_LOCK(X)		mtx_lock(X->ic_lock)
107#define ICL_CONN_UNLOCK(X)		mtx_unlock(X->ic_lock)
108#define ICL_CONN_LOCK_ASSERT(X)		mtx_assert(X->ic_lock, MA_OWNED)
109#define ICL_CONN_LOCK_ASSERT_NOT(X)	mtx_assert(X->ic_lock, MA_NOTOWNED)
110
111STAILQ_HEAD(icl_pdu_stailq, icl_pdu);
112
113static void
114icl_conn_fail(struct icl_conn *ic)
115{
116	if (ic->ic_socket == NULL)
117		return;
118
119	/*
120	 * XXX
121	 */
122	ic->ic_socket->so_error = EDOOFUS;
123	(ic->ic_error)(ic);
124}
125
126static struct mbuf *
127icl_conn_receive(struct icl_conn *ic, size_t len)
128{
129	struct uio uio;
130	struct socket *so;
131	struct mbuf *m;
132	int error, flags;
133
134	so = ic->ic_socket;
135
136	memset(&uio, 0, sizeof(uio));
137	uio.uio_resid = len;
138
139	flags = MSG_DONTWAIT;
140	error = soreceive(so, NULL, &uio, &m, NULL, &flags);
141	if (error != 0) {
142		ICL_DEBUG("soreceive error %d", error);
143		return (NULL);
144	}
145	if (uio.uio_resid != 0) {
146		m_freem(m);
147		ICL_DEBUG("short read");
148		return (NULL);
149	}
150
151	return (m);
152}
153
154static struct icl_pdu *
155icl_pdu_new_empty(struct icl_conn *ic, int flags)
156{
157	struct icl_pdu *ip;
158
159#ifdef DIAGNOSTIC
160	refcount_acquire(&ic->ic_outstanding_pdus);
161#endif
162	ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
163	if (ip == NULL) {
164		ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
165#ifdef DIAGNOSTIC
166		refcount_release(&ic->ic_outstanding_pdus);
167#endif
168		return (NULL);
169	}
170
171	ip->ip_conn = ic;
172
173	return (ip);
174}
175
176void
177icl_pdu_free(struct icl_pdu *ip)
178{
179	struct icl_conn *ic;
180
181	ic = ip->ip_conn;
182
183	m_freem(ip->ip_bhs_mbuf);
184	m_freem(ip->ip_ahs_mbuf);
185	m_freem(ip->ip_data_mbuf);
186	uma_zfree(icl_pdu_zone, ip);
187#ifdef DIAGNOSTIC
188	refcount_release(&ic->ic_outstanding_pdus);
189#endif
190}
191
192/*
193 * Allocate icl_pdu with empty BHS to fill up by the caller.
194 */
195struct icl_pdu *
196icl_pdu_new(struct icl_conn *ic, int flags)
197{
198	struct icl_pdu *ip;
199
200	ip = icl_pdu_new_empty(ic, flags);
201	if (ip == NULL)
202		return (NULL);
203
204	ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs),
205	    flags, MT_DATA, M_PKTHDR);
206	if (ip->ip_bhs_mbuf == NULL) {
207		ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
208		icl_pdu_free(ip);
209		return (NULL);
210	}
211	ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *);
212	memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs));
213	ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs);
214
215	return (ip);
216}
217
218static int
219icl_pdu_ahs_length(const struct icl_pdu *request)
220{
221
222	return (request->ip_bhs->bhs_total_ahs_len * 4);
223}
224
225size_t
226icl_pdu_data_segment_length(const struct icl_pdu *request)
227{
228	uint32_t len = 0;
229
230	len += request->ip_bhs->bhs_data_segment_len[0];
231	len <<= 8;
232	len += request->ip_bhs->bhs_data_segment_len[1];
233	len <<= 8;
234	len += request->ip_bhs->bhs_data_segment_len[2];
235
236	return (len);
237}
238
239static void
240icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len)
241{
242
243	response->ip_bhs->bhs_data_segment_len[2] = len;
244	response->ip_bhs->bhs_data_segment_len[1] = len >> 8;
245	response->ip_bhs->bhs_data_segment_len[0] = len >> 16;
246}
247
248static size_t
249icl_pdu_padding(const struct icl_pdu *ip)
250{
251
252	if ((ip->ip_data_len % 4) != 0)
253		return (4 - (ip->ip_data_len % 4));
254
255	return (0);
256}
257
258static size_t
259icl_pdu_size(const struct icl_pdu *response)
260{
261	size_t len;
262
263	KASSERT(response->ip_ahs_len == 0, ("responding with AHS"));
264
265	len = sizeof(struct iscsi_bhs) + response->ip_data_len +
266	    icl_pdu_padding(response);
267	if (response->ip_conn->ic_header_crc32c)
268		len += ISCSI_HEADER_DIGEST_SIZE;
269	if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c)
270		len += ISCSI_DATA_DIGEST_SIZE;
271
272	return (len);
273}
274
275static int
276icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep)
277{
278	struct mbuf *m;
279
280	m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs));
281	if (m == NULL) {
282		ICL_DEBUG("failed to receive BHS");
283		return (-1);
284	}
285
286	request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs));
287	if (request->ip_bhs_mbuf == NULL) {
288		ICL_WARN("m_pullup failed");
289		return (-1);
290	}
291	request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *);
292
293	/*
294	 * XXX: For architectures with strict alignment requirements
295	 * 	we may need to allocate ip_bhs and copy the data into it.
296	 * 	For some reason, though, not doing this doesn't seem
297	 * 	to cause problems; tested on sparc64.
298	 */
299
300	*availablep -= sizeof(struct iscsi_bhs);
301	return (0);
302}
303
304static int
305icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep)
306{
307
308	request->ip_ahs_len = icl_pdu_ahs_length(request);
309	if (request->ip_ahs_len == 0)
310		return (0);
311
312	request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn,
313	    request->ip_ahs_len);
314	if (request->ip_ahs_mbuf == NULL) {
315		ICL_DEBUG("failed to receive AHS");
316		return (-1);
317	}
318
319	*availablep -= request->ip_ahs_len;
320	return (0);
321}
322
323static uint32_t
324icl_mbuf_to_crc32c(const struct mbuf *m0)
325{
326	uint32_t digest = 0xffffffff;
327	const struct mbuf *m;
328
329	for (m = m0; m != NULL; m = m->m_next)
330		digest = calculate_crc32c(digest,
331		    mtod(m, const void *), m->m_len);
332
333	digest = digest ^ 0xffffffff;
334
335	return (digest);
336}
337
338static int
339icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep)
340{
341	struct mbuf *m;
342	uint32_t received_digest, valid_digest;
343
344	if (request->ip_conn->ic_header_crc32c == false)
345		return (0);
346
347	m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE);
348	if (m == NULL) {
349		ICL_DEBUG("failed to receive header digest");
350		return (-1);
351	}
352
353	CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE);
354	m_copydata(m, 0, ISCSI_HEADER_DIGEST_SIZE, (void *)&received_digest);
355	m_freem(m);
356
357	*availablep -= ISCSI_HEADER_DIGEST_SIZE;
358
359	/*
360	 * XXX: Handle AHS.
361	 */
362	valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
363	if (received_digest != valid_digest) {
364		ICL_WARN("header digest check failed; got 0x%x, "
365		    "should be 0x%x", received_digest, valid_digest);
366		return (-1);
367	}
368
369	return (0);
370}
371
372/*
373 * Return the number of bytes that should be waiting in the receive socket
374 * before icl_pdu_receive_data_segment() gets called.
375 */
376static size_t
377icl_pdu_data_segment_receive_len(const struct icl_pdu *request)
378{
379	size_t len;
380
381	len = icl_pdu_data_segment_length(request);
382	if (len == 0)
383		return (0);
384
385	/*
386	 * Account for the parts of data segment already read from
387	 * the socket buffer.
388	 */
389	KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
390	len -= request->ip_data_len;
391
392	/*
393	 * Don't always wait for the full data segment to be delivered
394	 * to the socket; this might badly affect performance due to
395	 * TCP window scaling.
396	 */
397	if (len > partial_receive_len) {
398#if 0
399		ICL_DEBUG("need %zd bytes of data, limiting to %zd",
400		    len, partial_receive_len));
401#endif
402		len = partial_receive_len;
403
404		return (len);
405	}
406
407	/*
408	 * Account for padding.  Note that due to the way code is written,
409	 * the icl_pdu_receive_data_segment() must always receive padding
410	 * along with the last part of data segment, because it would be
411	 * impossible to tell whether we've already received the full data
412	 * segment including padding, or without it.
413	 */
414	if ((len % 4) != 0)
415		len += 4 - (len % 4);
416
417#if 0
418	ICL_DEBUG("need %zd bytes of data", len));
419#endif
420
421	return (len);
422}
423
424static int
425icl_pdu_receive_data_segment(struct icl_pdu *request,
426    size_t *availablep, bool *more_neededp)
427{
428	struct icl_conn *ic;
429	size_t len, padding = 0;
430	struct mbuf *m;
431
432	ic = request->ip_conn;
433
434	*more_neededp = false;
435	ic->ic_receive_len = 0;
436
437	len = icl_pdu_data_segment_length(request);
438	if (len == 0)
439		return (0);
440
441	if ((len % 4) != 0)
442		padding = 4 - (len % 4);
443
444	/*
445	 * Account for already received parts of data segment.
446	 */
447	KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
448	len -= request->ip_data_len;
449
450	if (len + padding > *availablep) {
451		/*
452		 * Not enough data in the socket buffer.  Receive as much
453		 * as we can.  Don't receive padding, since, obviously, it's
454		 * not the end of data segment yet.
455		 */
456#if 0
457		ICL_DEBUG("limited from %zd to %zd",
458		    len + padding, *availablep - padding));
459#endif
460		len = *availablep - padding;
461		*more_neededp = true;
462		padding = 0;
463	}
464
465	/*
466	 * Must not try to receive padding without at least one byte
467	 * of actual data segment.
468	 */
469	if (len > 0) {
470		m = icl_conn_receive(request->ip_conn, len + padding);
471		if (m == NULL) {
472			ICL_DEBUG("failed to receive data segment");
473			return (-1);
474		}
475
476		if (request->ip_data_mbuf == NULL)
477			request->ip_data_mbuf = m;
478		else
479			m_cat(request->ip_data_mbuf, m);
480
481		request->ip_data_len += len;
482		*availablep -= len + padding;
483	} else
484		ICL_DEBUG("len 0");
485
486	if (*more_neededp)
487		ic->ic_receive_len =
488		    icl_pdu_data_segment_receive_len(request);
489
490	return (0);
491}
492
493static int
494icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep)
495{
496	struct mbuf *m;
497	uint32_t received_digest, valid_digest;
498
499	if (request->ip_conn->ic_data_crc32c == false)
500		return (0);
501
502	if (request->ip_data_len == 0)
503		return (0);
504
505	m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE);
506	if (m == NULL) {
507		ICL_DEBUG("failed to receive data digest");
508		return (-1);
509	}
510
511	CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE);
512	m_copydata(m, 0, ISCSI_DATA_DIGEST_SIZE, (void *)&received_digest);
513	m_freem(m);
514
515	*availablep -= ISCSI_DATA_DIGEST_SIZE;
516
517	/*
518	 * Note that ip_data_mbuf also contains padding; since digest
519	 * calculation is supposed to include that, we iterate over
520	 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it.
521	 */
522	valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
523	if (received_digest != valid_digest) {
524		ICL_WARN("data digest check failed; got 0x%x, "
525		    "should be 0x%x", received_digest, valid_digest);
526		return (-1);
527	}
528
529	return (0);
530}
531
532/*
533 * Somewhat contrary to the name, this attempts to receive only one
534 * "part" of PDU at a time; call it repeatedly until it returns non-NULL.
535 */
536static struct icl_pdu *
537icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep)
538{
539	struct icl_pdu *request;
540	struct socket *so;
541	size_t len;
542	int error;
543	bool more_needed;
544
545	so = ic->ic_socket;
546
547	if (ic->ic_receive_state == ICL_CONN_STATE_BHS) {
548		KASSERT(ic->ic_receive_pdu == NULL,
549		    ("ic->ic_receive_pdu != NULL"));
550		request = icl_pdu_new_empty(ic, M_NOWAIT);
551		if (request == NULL) {
552			ICL_DEBUG("failed to allocate PDU; "
553			    "dropping connection");
554			icl_conn_fail(ic);
555			return (NULL);
556		}
557		ic->ic_receive_pdu = request;
558	} else {
559		KASSERT(ic->ic_receive_pdu != NULL,
560		    ("ic->ic_receive_pdu == NULL"));
561		request = ic->ic_receive_pdu;
562	}
563
564	if (*availablep < ic->ic_receive_len) {
565#if 0
566		ICL_DEBUG("not enough data; need %zd, "
567		    "have %zd", ic->ic_receive_len, *availablep);
568#endif
569		return (NULL);
570	}
571
572	switch (ic->ic_receive_state) {
573	case ICL_CONN_STATE_BHS:
574		//ICL_DEBUG("receiving BHS");
575		error = icl_pdu_receive_bhs(request, availablep);
576		if (error != 0) {
577			ICL_DEBUG("failed to receive BHS; "
578			    "dropping connection");
579			break;
580		}
581
582		/*
583		 * We don't enforce any limit for AHS length;
584		 * its length is stored in 8 bit field.
585		 */
586
587		len = icl_pdu_data_segment_length(request);
588		if (len > ic->ic_max_data_segment_length) {
589			ICL_WARN("received data segment "
590			    "length %zd is larger than negotiated "
591			    "MaxDataSegmentLength %zd; "
592			    "dropping connection",
593			    len, ic->ic_max_data_segment_length);
594			error = EINVAL;
595			break;
596		}
597
598		ic->ic_receive_state = ICL_CONN_STATE_AHS;
599		ic->ic_receive_len = icl_pdu_ahs_length(request);
600		break;
601
602	case ICL_CONN_STATE_AHS:
603		//ICL_DEBUG("receiving AHS");
604		error = icl_pdu_receive_ahs(request, availablep);
605		if (error != 0) {
606			ICL_DEBUG("failed to receive AHS; "
607			    "dropping connection");
608			break;
609		}
610		ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST;
611		if (ic->ic_header_crc32c == false)
612			ic->ic_receive_len = 0;
613		else
614			ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE;
615		break;
616
617	case ICL_CONN_STATE_HEADER_DIGEST:
618		//ICL_DEBUG("receiving header digest");
619		error = icl_pdu_check_header_digest(request, availablep);
620		if (error != 0) {
621			ICL_DEBUG("header digest failed; "
622			    "dropping connection");
623			break;
624		}
625
626		ic->ic_receive_state = ICL_CONN_STATE_DATA;
627		ic->ic_receive_len =
628		    icl_pdu_data_segment_receive_len(request);
629		break;
630
631	case ICL_CONN_STATE_DATA:
632		//ICL_DEBUG("receiving data segment");
633		error = icl_pdu_receive_data_segment(request, availablep,
634		    &more_needed);
635		if (error != 0) {
636			ICL_DEBUG("failed to receive data segment;"
637			    "dropping connection");
638			break;
639		}
640
641		if (more_needed)
642			break;
643
644		ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST;
645		if (request->ip_data_len == 0 || ic->ic_data_crc32c == false)
646			ic->ic_receive_len = 0;
647		else
648			ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE;
649		break;
650
651	case ICL_CONN_STATE_DATA_DIGEST:
652		//ICL_DEBUG("receiving data digest");
653		error = icl_pdu_check_data_digest(request, availablep);
654		if (error != 0) {
655			ICL_DEBUG("data digest failed; "
656			    "dropping connection");
657			break;
658		}
659
660		/*
661		 * We've received complete PDU; reset the receive state machine
662		 * and return the PDU.
663		 */
664		ic->ic_receive_state = ICL_CONN_STATE_BHS;
665		ic->ic_receive_len = sizeof(struct iscsi_bhs);
666		ic->ic_receive_pdu = NULL;
667		return (request);
668
669	default:
670		panic("invalid ic_receive_state %d\n", ic->ic_receive_state);
671	}
672
673	if (error != 0) {
674		/*
675		 * Don't free the PDU; it's pointed to by ic->ic_receive_pdu
676		 * and will get freed in icl_conn_close().
677		 */
678		icl_conn_fail(ic);
679	}
680
681	return (NULL);
682}
683
684static void
685icl_conn_receive_pdus(struct icl_conn *ic, size_t available)
686{
687	struct icl_pdu *response;
688	struct socket *so;
689
690	so = ic->ic_socket;
691
692	/*
693	 * This can never happen; we're careful to only mess with ic->ic_socket
694	 * pointer when the send/receive threads are not running.
695	 */
696	KASSERT(so != NULL, ("NULL socket"));
697
698	for (;;) {
699		if (ic->ic_disconnecting)
700			return;
701
702		if (so->so_error != 0) {
703			ICL_DEBUG("connection error %d; "
704			    "dropping connection", so->so_error);
705			icl_conn_fail(ic);
706			return;
707		}
708
709		/*
710		 * Loop until we have a complete PDU or there is not enough
711		 * data in the socket buffer.
712		 */
713		if (available < ic->ic_receive_len) {
714#if 0
715			ICL_DEBUG("not enough data; have %zd, "
716			    "need %zd", available,
717			    ic->ic_receive_len);
718#endif
719			return;
720		}
721
722		response = icl_conn_receive_pdu(ic, &available);
723		if (response == NULL)
724			continue;
725
726		if (response->ip_ahs_len > 0) {
727			ICL_WARN("received PDU with unsupported "
728			    "AHS; opcode 0x%x; dropping connection",
729			    response->ip_bhs->bhs_opcode);
730			icl_pdu_free(response);
731			icl_conn_fail(ic);
732			return;
733		}
734
735		(ic->ic_receive)(response);
736	}
737}
738
739static void
740icl_receive_thread(void *arg)
741{
742	struct icl_conn *ic;
743	size_t available;
744	struct socket *so;
745
746	ic = arg;
747	so = ic->ic_socket;
748
749	for (;;) {
750		if (ic->ic_disconnecting) {
751			//ICL_DEBUG("terminating");
752			break;
753		}
754
755		/*
756		 * Set the low watermark, to be checked by
757		 * soreadable() in icl_soupcall_receive()
758		 * to avoid unneccessary wakeups until there
759		 * is enough data received to read the PDU.
760		 */
761		SOCKBUF_LOCK(&so->so_rcv);
762		available = so->so_rcv.sb_cc;
763		if (available < ic->ic_receive_len) {
764			so->so_rcv.sb_lowat = ic->ic_receive_len;
765			cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx);
766		} else
767			so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1;
768		SOCKBUF_UNLOCK(&so->so_rcv);
769
770		icl_conn_receive_pdus(ic, available);
771	}
772
773	ICL_CONN_LOCK(ic);
774	ic->ic_receive_running = false;
775	cv_signal(&ic->ic_send_cv);
776	ICL_CONN_UNLOCK(ic);
777	kthread_exit();
778}
779
780static int
781icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
782{
783	struct icl_conn *ic;
784
785	if (!soreadable(so))
786		return (SU_OK);
787
788	ic = arg;
789	cv_signal(&ic->ic_receive_cv);
790	return (SU_OK);
791}
792
793static int
794icl_pdu_finalize(struct icl_pdu *request)
795{
796	size_t padding, pdu_len;
797	uint32_t digest, zero = 0;
798	int ok;
799	struct icl_conn *ic;
800
801	ic = request->ip_conn;
802
803	icl_pdu_set_data_segment_length(request, request->ip_data_len);
804
805	pdu_len = icl_pdu_size(request);
806
807	if (ic->ic_header_crc32c) {
808		digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
809		ok = m_append(request->ip_bhs_mbuf, sizeof(digest),
810		    (void *)&digest);
811		if (ok != 1) {
812			ICL_WARN("failed to append header digest");
813			return (1);
814		}
815	}
816
817	if (request->ip_data_len != 0) {
818		padding = icl_pdu_padding(request);
819		if (padding > 0) {
820			ok = m_append(request->ip_data_mbuf, padding,
821			    (void *)&zero);
822			if (ok != 1) {
823				ICL_WARN("failed to append padding");
824				return (1);
825			}
826		}
827
828		if (ic->ic_data_crc32c) {
829			digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
830
831			ok = m_append(request->ip_data_mbuf, sizeof(digest),
832			    (void *)&digest);
833			if (ok != 1) {
834				ICL_WARN("failed to append data digest");
835				return (1);
836			}
837		}
838
839		m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf);
840		request->ip_data_mbuf = NULL;
841	}
842
843	request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
844
845	return (0);
846}
847
848static void
849icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue)
850{
851	struct icl_pdu *request, *request2;
852	struct socket *so;
853	size_t available, size, size2;
854	int coalesced, error;
855
856	ICL_CONN_LOCK_ASSERT_NOT(ic);
857
858	so = ic->ic_socket;
859
860	SOCKBUF_LOCK(&so->so_snd);
861	/*
862	 * Check how much space do we have for transmit.  We can't just
863	 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE,
864	 * as it always frees the mbuf chain passed to it, even in case
865	 * of error.
866	 */
867	available = sbspace(&so->so_snd);
868
869	/*
870	 * Notify the socket upcall that we don't need wakeups
871	 * for the time being.
872	 */
873	so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1;
874	SOCKBUF_UNLOCK(&so->so_snd);
875
876	while (!STAILQ_EMPTY(queue)) {
877		request = STAILQ_FIRST(queue);
878		size = icl_pdu_size(request);
879		if (available < size) {
880
881			/*
882			 * Set the low watermark, to be checked by
883			 * sowriteable() in icl_soupcall_send()
884			 * to avoid unneccessary wakeups until there
885			 * is enough space for the PDU to fit.
886			 */
887			SOCKBUF_LOCK(&so->so_snd);
888			available = sbspace(&so->so_snd);
889			if (available < size) {
890#if 1
891				ICL_DEBUG("no space to send; "
892				    "have %zd, need %zd",
893				    available, size);
894#endif
895				so->so_snd.sb_lowat = size;
896				SOCKBUF_UNLOCK(&so->so_snd);
897				return;
898			}
899			SOCKBUF_UNLOCK(&so->so_snd);
900		}
901		STAILQ_REMOVE_HEAD(queue, ip_next);
902		error = icl_pdu_finalize(request);
903		if (error != 0) {
904			ICL_DEBUG("failed to finalize PDU; "
905			    "dropping connection");
906			icl_conn_fail(ic);
907			icl_pdu_free(request);
908			return;
909		}
910		if (coalesce) {
911			coalesced = 1;
912			for (;;) {
913				request2 = STAILQ_FIRST(queue);
914				if (request2 == NULL)
915					break;
916				size2 = icl_pdu_size(request2);
917				if (available < size + size2)
918					break;
919				STAILQ_REMOVE_HEAD(queue, ip_next);
920				error = icl_pdu_finalize(request2);
921				if (error != 0) {
922					ICL_DEBUG("failed to finalize PDU; "
923					    "dropping connection");
924					icl_conn_fail(ic);
925					icl_pdu_free(request);
926					icl_pdu_free(request2);
927					return;
928				}
929				m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf);
930				request2->ip_bhs_mbuf = NULL;
931				request->ip_bhs_mbuf->m_pkthdr.len += size2;
932				size += size2;
933				STAILQ_REMOVE_AFTER(queue, request, ip_next);
934				icl_pdu_free(request2);
935				coalesced++;
936			}
937#if 0
938			if (coalesced > 1) {
939				ICL_DEBUG("coalesced %d PDUs into %zd bytes",
940				    coalesced, size);
941			}
942#endif
943		}
944		available -= size;
945		error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
946		    NULL, MSG_DONTWAIT, curthread);
947		request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
948		if (error != 0) {
949			ICL_DEBUG("failed to send PDU, error %d; "
950			    "dropping connection", error);
951			icl_conn_fail(ic);
952			icl_pdu_free(request);
953			return;
954		}
955		icl_pdu_free(request);
956	}
957}
958
959static void
960icl_send_thread(void *arg)
961{
962	struct icl_conn *ic;
963	struct icl_pdu_stailq queue;
964
965	ic = arg;
966
967	STAILQ_INIT(&queue);
968
969	ICL_CONN_LOCK(ic);
970	for (;;) {
971		for (;;) {
972			/*
973			 * If the local queue is empty, populate it from
974			 * the main one.  This way the icl_conn_send_pdus()
975			 * can go through all the queued PDUs without holding
976			 * any locks.
977			 */
978			if (STAILQ_EMPTY(&queue))
979				STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu);
980
981			ic->ic_check_send_space = false;
982			ICL_CONN_UNLOCK(ic);
983			icl_conn_send_pdus(ic, &queue);
984			ICL_CONN_LOCK(ic);
985
986			/*
987			 * The icl_soupcall_send() was called since the last
988			 * call to sbspace(); go around;
989			 */
990			if (ic->ic_check_send_space)
991				continue;
992
993			/*
994			 * Local queue is empty, but we still have PDUs
995			 * in the main one; go around.
996			 */
997			if (STAILQ_EMPTY(&queue) &&
998			    !STAILQ_EMPTY(&ic->ic_to_send))
999				continue;
1000
1001			/*
1002			 * There might be some stuff in the local queue,
1003			 * which didn't get sent due to not having enough send
1004			 * space.  Wait for socket upcall.
1005			 */
1006			break;
1007		}
1008
1009		if (ic->ic_disconnecting) {
1010			//ICL_DEBUG("terminating");
1011			break;
1012		}
1013
1014		cv_wait(&ic->ic_send_cv, ic->ic_lock);
1015	}
1016
1017	/*
1018	 * We're exiting; move PDUs back to the main queue, so they can
1019	 * get freed properly.  At this point ordering doesn't matter.
1020	 */
1021	STAILQ_CONCAT(&ic->ic_to_send, &queue);
1022
1023	ic->ic_send_running = false;
1024	cv_signal(&ic->ic_send_cv);
1025	ICL_CONN_UNLOCK(ic);
1026	kthread_exit();
1027}
1028
1029static int
1030icl_soupcall_send(struct socket *so, void *arg, int waitflag)
1031{
1032	struct icl_conn *ic;
1033
1034	if (!sowriteable(so))
1035		return (SU_OK);
1036
1037	ic = arg;
1038
1039	ICL_CONN_LOCK(ic);
1040	ic->ic_check_send_space = true;
1041	ICL_CONN_UNLOCK(ic);
1042
1043	cv_signal(&ic->ic_send_cv);
1044
1045	return (SU_OK);
1046}
1047
1048int
1049icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len,
1050    int flags)
1051{
1052	struct mbuf *mb, *newmb;
1053	size_t copylen, off = 0;
1054
1055	KASSERT(len > 0, ("len == 0"));
1056
1057	newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR);
1058	if (newmb == NULL) {
1059		ICL_WARN("failed to allocate mbuf for %zd bytes", len);
1060		return (ENOMEM);
1061	}
1062
1063	for (mb = newmb; mb != NULL; mb = mb->m_next) {
1064		copylen = min(M_TRAILINGSPACE(mb), len - off);
1065		memcpy(mtod(mb, char *), (const char *)addr + off, copylen);
1066		mb->m_len = copylen;
1067		off += copylen;
1068	}
1069	KASSERT(off == len, ("%s: off != len", __func__));
1070
1071	if (request->ip_data_mbuf == NULL) {
1072		request->ip_data_mbuf = newmb;
1073		request->ip_data_len = len;
1074	} else {
1075		m_cat(request->ip_data_mbuf, newmb);
1076		request->ip_data_len += len;
1077	}
1078
1079	return (0);
1080}
1081
1082void
1083icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len)
1084{
1085
1086	m_copydata(ip->ip_data_mbuf, off, len, addr);
1087}
1088
1089void
1090icl_pdu_queue(struct icl_pdu *ip)
1091{
1092	struct icl_conn *ic;
1093
1094	ic = ip->ip_conn;
1095
1096	ICL_CONN_LOCK_ASSERT(ic);
1097
1098	if (ic->ic_disconnecting || ic->ic_socket == NULL) {
1099		ICL_DEBUG("icl_pdu_queue on closed connection");
1100		icl_pdu_free(ip);
1101		return;
1102	}
1103
1104	if (!STAILQ_EMPTY(&ic->ic_to_send)) {
1105		STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1106		/*
1107		 * If the queue is not empty, someone else had already
1108		 * signaled the send thread; no need to do that again,
1109		 * just return.
1110		 */
1111		return;
1112	}
1113
1114	STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1115	cv_signal(&ic->ic_send_cv);
1116}
1117
1118struct icl_conn *
1119icl_conn_new(const char *name, struct mtx *lock)
1120{
1121	struct icl_conn *ic;
1122
1123	refcount_acquire(&icl_ncons);
1124
1125	ic = uma_zalloc(icl_conn_zone, M_WAITOK | M_ZERO);
1126
1127	STAILQ_INIT(&ic->ic_to_send);
1128	ic->ic_lock = lock;
1129	cv_init(&ic->ic_send_cv, "icl_tx");
1130	cv_init(&ic->ic_receive_cv, "icl_rx");
1131#ifdef DIAGNOSTIC
1132	refcount_init(&ic->ic_outstanding_pdus, 0);
1133#endif
1134	ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH;
1135	ic->ic_name = name;
1136
1137	return (ic);
1138}
1139
1140void
1141icl_conn_free(struct icl_conn *ic)
1142{
1143
1144	cv_destroy(&ic->ic_send_cv);
1145	cv_destroy(&ic->ic_receive_cv);
1146	uma_zfree(icl_conn_zone, ic);
1147	refcount_release(&icl_ncons);
1148}
1149
1150static int
1151icl_conn_start(struct icl_conn *ic)
1152{
1153	size_t minspace;
1154	struct sockopt opt;
1155	int error, one = 1;
1156
1157	ICL_CONN_LOCK(ic);
1158
1159	/*
1160	 * XXX: Ugly hack.
1161	 */
1162	if (ic->ic_socket == NULL) {
1163		ICL_CONN_UNLOCK(ic);
1164		return (EINVAL);
1165	}
1166
1167	ic->ic_receive_state = ICL_CONN_STATE_BHS;
1168	ic->ic_receive_len = sizeof(struct iscsi_bhs);
1169	ic->ic_disconnecting = false;
1170
1171	ICL_CONN_UNLOCK(ic);
1172
1173	/*
1174	 * For sendspace, this is required because the current code cannot
1175	 * send a PDU in pieces; thus, the minimum buffer size is equal
1176	 * to the maximum PDU size.  "+4" is to account for possible padding.
1177	 *
1178	 * What we should actually do here is to use autoscaling, but set
1179	 * some minimal buffer size to "minspace".  I don't know a way to do
1180	 * that, though.
1181	 */
1182	minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length +
1183	    ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4;
1184	if (sendspace < minspace) {
1185		ICL_WARN("kern.icl.sendspace too low; must be at least %zd",
1186		    minspace);
1187		sendspace = minspace;
1188	}
1189	if (recvspace < minspace) {
1190		ICL_WARN("kern.icl.recvspace too low; must be at least %zd",
1191		    minspace);
1192		recvspace = minspace;
1193	}
1194
1195	error = soreserve(ic->ic_socket, sendspace, recvspace);
1196	if (error != 0) {
1197		ICL_WARN("soreserve failed with error %d", error);
1198		icl_conn_close(ic);
1199		return (error);
1200	}
1201	ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE;
1202	ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE;
1203
1204	/*
1205	 * Disable Nagle.
1206	 */
1207	bzero(&opt, sizeof(opt));
1208	opt.sopt_dir = SOPT_SET;
1209	opt.sopt_level = IPPROTO_TCP;
1210	opt.sopt_name = TCP_NODELAY;
1211	opt.sopt_val = &one;
1212	opt.sopt_valsize = sizeof(one);
1213	error = sosetopt(ic->ic_socket, &opt);
1214	if (error != 0) {
1215		ICL_WARN("disabling TCP_NODELAY failed with error %d", error);
1216		icl_conn_close(ic);
1217		return (error);
1218	}
1219
1220	/*
1221	 * Register socket upcall, to get notified about incoming PDUs
1222	 * and free space to send outgoing ones.
1223	 */
1224	SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1225	soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic);
1226	SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1227	SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1228	soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic);
1229	SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1230
1231	/*
1232	 * Start threads.
1233	 */
1234	ICL_CONN_LOCK(ic);
1235	ic->ic_send_running = ic->ic_receive_running = true;
1236	ICL_CONN_UNLOCK(ic);
1237	error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx",
1238	    ic->ic_name);
1239	if (error != 0) {
1240		ICL_WARN("kthread_add(9) failed with error %d", error);
1241		ICL_CONN_LOCK(ic);
1242		ic->ic_send_running = ic->ic_receive_running = false;
1243		cv_signal(&ic->ic_send_cv);
1244		ICL_CONN_UNLOCK(ic);
1245		icl_conn_close(ic);
1246		return (error);
1247	}
1248	error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx",
1249	    ic->ic_name);
1250	if (error != 0) {
1251		ICL_WARN("kthread_add(9) failed with error %d", error);
1252		ICL_CONN_LOCK(ic);
1253		ic->ic_receive_running = false;
1254		cv_signal(&ic->ic_send_cv);
1255		ICL_CONN_UNLOCK(ic);
1256		icl_conn_close(ic);
1257		return (error);
1258	}
1259
1260	return (0);
1261}
1262
1263int
1264icl_conn_handoff(struct icl_conn *ic, int fd)
1265{
1266	struct file *fp;
1267	struct socket *so;
1268	cap_rights_t rights;
1269	int error;
1270
1271	ICL_CONN_LOCK_ASSERT_NOT(ic);
1272
1273	/*
1274	 * Steal the socket from userland.
1275	 */
1276	error = fget(curthread, fd,
1277	    cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
1278	if (error != 0)
1279		return (error);
1280	if (fp->f_type != DTYPE_SOCKET) {
1281		fdrop(fp, curthread);
1282		return (EINVAL);
1283	}
1284	so = fp->f_data;
1285	if (so->so_type != SOCK_STREAM) {
1286		fdrop(fp, curthread);
1287		return (EINVAL);
1288	}
1289
1290	ICL_CONN_LOCK(ic);
1291
1292	if (ic->ic_socket != NULL) {
1293		ICL_CONN_UNLOCK(ic);
1294		fdrop(fp, curthread);
1295		return (EBUSY);
1296	}
1297
1298	ic->ic_socket = fp->f_data;
1299	fp->f_ops = &badfileops;
1300	fp->f_data = NULL;
1301	fdrop(fp, curthread);
1302	ICL_CONN_UNLOCK(ic);
1303
1304	error = icl_conn_start(ic);
1305
1306	return (error);
1307}
1308
1309void
1310icl_conn_close(struct icl_conn *ic)
1311{
1312	struct icl_pdu *pdu;
1313	struct socket *so;
1314
1315	ICL_CONN_LOCK(ic);
1316
1317	/*
1318	 * Wake up the threads, so they can properly terminate.
1319	 */
1320	ic->ic_disconnecting = true;
1321	while (ic->ic_receive_running || ic->ic_send_running) {
1322		cv_signal(&ic->ic_receive_cv);
1323		cv_signal(&ic->ic_send_cv);
1324		cv_wait(&ic->ic_send_cv, ic->ic_lock);
1325	}
1326
1327	/* Some other thread could close the connection same time. */
1328	so = ic->ic_socket;
1329	if (so == NULL) {
1330		ICL_CONN_UNLOCK(ic);
1331		return;
1332	}
1333	ic->ic_socket = NULL;
1334
1335	/*
1336	 * Deregister socket upcalls.
1337	 */
1338	ICL_CONN_UNLOCK(ic);
1339	SOCKBUF_LOCK(&so->so_snd);
1340	if (so->so_snd.sb_upcall != NULL)
1341		soupcall_clear(so, SO_SND);
1342	SOCKBUF_UNLOCK(&so->so_snd);
1343	SOCKBUF_LOCK(&so->so_rcv);
1344	if (so->so_rcv.sb_upcall != NULL)
1345		soupcall_clear(so, SO_RCV);
1346	SOCKBUF_UNLOCK(&so->so_rcv);
1347	soclose(so);
1348	ICL_CONN_LOCK(ic);
1349
1350	if (ic->ic_receive_pdu != NULL) {
1351		//ICL_DEBUG("freeing partially received PDU");
1352		icl_pdu_free(ic->ic_receive_pdu);
1353		ic->ic_receive_pdu = NULL;
1354	}
1355
1356	/*
1357	 * Remove any outstanding PDUs from the send queue.
1358	 */
1359	while (!STAILQ_EMPTY(&ic->ic_to_send)) {
1360		pdu = STAILQ_FIRST(&ic->ic_to_send);
1361		STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next);
1362		icl_pdu_free(pdu);
1363	}
1364
1365	KASSERT(STAILQ_EMPTY(&ic->ic_to_send),
1366	    ("destroying session with non-empty send queue"));
1367#ifdef DIAGNOSTIC
1368	KASSERT(ic->ic_outstanding_pdus == 0,
1369	    ("destroying session with %d outstanding PDUs",
1370	     ic->ic_outstanding_pdus));
1371#endif
1372	ICL_CONN_UNLOCK(ic);
1373}
1374
1375bool
1376icl_conn_connected(struct icl_conn *ic)
1377{
1378	ICL_CONN_LOCK_ASSERT_NOT(ic);
1379
1380	ICL_CONN_LOCK(ic);
1381	if (ic->ic_socket == NULL) {
1382		ICL_CONN_UNLOCK(ic);
1383		return (false);
1384	}
1385	if (ic->ic_socket->so_error != 0) {
1386		ICL_CONN_UNLOCK(ic);
1387		return (false);
1388	}
1389	ICL_CONN_UNLOCK(ic);
1390	return (true);
1391}
1392
1393#ifdef ICL_KERNEL_PROXY
1394int
1395icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so)
1396{
1397	int error;
1398
1399	ICL_CONN_LOCK_ASSERT_NOT(ic);
1400
1401	if (so->so_type != SOCK_STREAM)
1402		return (EINVAL);
1403
1404	ICL_CONN_LOCK(ic);
1405	if (ic->ic_socket != NULL) {
1406		ICL_CONN_UNLOCK(ic);
1407		return (EBUSY);
1408	}
1409	ic->ic_socket = so;
1410	ICL_CONN_UNLOCK(ic);
1411
1412	error = icl_conn_start(ic);
1413
1414	return (error);
1415}
1416#endif /* ICL_KERNEL_PROXY */
1417
1418static int
1419icl_unload(void)
1420{
1421
1422	if (icl_ncons != 0)
1423		return (EBUSY);
1424
1425	uma_zdestroy(icl_conn_zone);
1426	uma_zdestroy(icl_pdu_zone);
1427
1428	return (0);
1429}
1430
1431static void
1432icl_load(void)
1433{
1434
1435	icl_conn_zone = uma_zcreate("icl_conn",
1436	    sizeof(struct icl_conn), NULL, NULL, NULL, NULL,
1437	    UMA_ALIGN_PTR, 0);
1438	icl_pdu_zone = uma_zcreate("icl_pdu",
1439	    sizeof(struct icl_pdu), NULL, NULL, NULL, NULL,
1440	    UMA_ALIGN_PTR, 0);
1441
1442	refcount_init(&icl_ncons, 0);
1443}
1444
1445static int
1446icl_modevent(module_t mod, int what, void *arg)
1447{
1448
1449	switch (what) {
1450	case MOD_LOAD:
1451		icl_load();
1452		return (0);
1453	case MOD_UNLOAD:
1454		return (icl_unload());
1455	default:
1456		return (EINVAL);
1457	}
1458}
1459
1460moduledata_t icl_data = {
1461	"icl",
1462	icl_modevent,
1463	0
1464};
1465
1466DECLARE_MODULE(icl, icl_data, SI_SUB_DRIVERS, SI_ORDER_FIRST);
1467MODULE_VERSION(icl, 1);
1468