1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2012 The FreeBSD Foundation
5 * All rights reserved.
6 *
7 * This software was developed by Edward Tomasz Napierala under sponsorship
8 * from the FreeBSD Foundation.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 */
32
33/*
34 * Software implementation of iSCSI Common Layer kobj(9) interface.
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: stable/11/sys/dev/iscsi/icl_soft.c 361736 2020-06-02 20:42:45Z mav $");
39
40#include <sys/param.h>
41#include <sys/capsicum.h>
42#include <sys/condvar.h>
43#include <sys/conf.h>
44#include <sys/file.h>
45#include <sys/kernel.h>
46#include <sys/kthread.h>
47#include <sys/lock.h>
48#include <sys/mbuf.h>
49#include <sys/mutex.h>
50#include <sys/module.h>
51#include <sys/protosw.h>
52#include <sys/socket.h>
53#include <sys/socketvar.h>
54#include <sys/sysctl.h>
55#include <sys/systm.h>
56#include <sys/sx.h>
57#include <sys/uio.h>
58#include <vm/uma.h>
59#include <netinet/in.h>
60#include <netinet/tcp.h>
61
62#include <dev/iscsi/icl.h>
63#include <dev/iscsi/iscsi_proto.h>
64#include <icl_conn_if.h>
65
66static int coalesce = 1;
67SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN,
68    &coalesce, 0, "Try to coalesce PDUs before sending");
69static int partial_receive_len = 128 * 1024;
70SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
71    &partial_receive_len, 0, "Minimum read size for partially received "
72    "data segment");
73static int sendspace = 1048576;
74SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN,
75    &sendspace, 0, "Default send socket buffer size");
76static int recvspace = 1048576;
77SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN,
78    &recvspace, 0, "Default receive socket buffer size");
79
80static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend");
81static uma_zone_t icl_pdu_zone;
82
83static volatile u_int	icl_ncons;
84
85#define ICL_CONN_LOCK(X)		mtx_lock(X->ic_lock)
86#define ICL_CONN_UNLOCK(X)		mtx_unlock(X->ic_lock)
87#define ICL_CONN_LOCK_ASSERT(X)		mtx_assert(X->ic_lock, MA_OWNED)
88#define ICL_CONN_LOCK_ASSERT_NOT(X)	mtx_assert(X->ic_lock, MA_NOTOWNED)
89
90STAILQ_HEAD(icl_pdu_stailq, icl_pdu);
91
92static icl_conn_new_pdu_t	icl_soft_conn_new_pdu;
93static icl_conn_pdu_free_t	icl_soft_conn_pdu_free;
94static icl_conn_pdu_data_segment_length_t
95				    icl_soft_conn_pdu_data_segment_length;
96static icl_conn_pdu_append_data_t	icl_soft_conn_pdu_append_data;
97static icl_conn_pdu_get_data_t	icl_soft_conn_pdu_get_data;
98static icl_conn_pdu_queue_t	icl_soft_conn_pdu_queue;
99static icl_conn_handoff_t	icl_soft_conn_handoff;
100static icl_conn_free_t		icl_soft_conn_free;
101static icl_conn_close_t		icl_soft_conn_close;
102static icl_conn_task_setup_t	icl_soft_conn_task_setup;
103static icl_conn_task_done_t	icl_soft_conn_task_done;
104static icl_conn_transfer_setup_t	icl_soft_conn_transfer_setup;
105static icl_conn_transfer_done_t	icl_soft_conn_transfer_done;
106#ifdef ICL_KERNEL_PROXY
107static icl_conn_connect_t	icl_soft_conn_connect;
108#endif
109
110static kobj_method_t icl_soft_methods[] = {
111	KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu),
112	KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free),
113	KOBJMETHOD(icl_conn_pdu_data_segment_length,
114	    icl_soft_conn_pdu_data_segment_length),
115	KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data),
116	KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data),
117	KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue),
118	KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff),
119	KOBJMETHOD(icl_conn_free, icl_soft_conn_free),
120	KOBJMETHOD(icl_conn_close, icl_soft_conn_close),
121	KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup),
122	KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done),
123	KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup),
124	KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done),
125#ifdef ICL_KERNEL_PROXY
126	KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect),
127#endif
128	{ 0, 0 }
129};
130
131DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn));
132
133static void
134icl_conn_fail(struct icl_conn *ic)
135{
136	if (ic->ic_socket == NULL)
137		return;
138
139	/*
140	 * XXX
141	 */
142	ic->ic_socket->so_error = EDOOFUS;
143	(ic->ic_error)(ic);
144}
145
146static struct mbuf *
147icl_conn_receive(struct icl_conn *ic, size_t len)
148{
149	struct uio uio;
150	struct socket *so;
151	struct mbuf *m;
152	int error, flags;
153
154	so = ic->ic_socket;
155
156	memset(&uio, 0, sizeof(uio));
157	uio.uio_resid = len;
158
159	flags = MSG_DONTWAIT;
160	error = soreceive(so, NULL, &uio, &m, NULL, &flags);
161	if (error != 0) {
162		ICL_DEBUG("soreceive error %d", error);
163		return (NULL);
164	}
165	if (uio.uio_resid != 0) {
166		m_freem(m);
167		ICL_DEBUG("short read");
168		return (NULL);
169	}
170
171	return (m);
172}
173
174static int
175icl_conn_receive_buf(struct icl_conn *ic, void *buf, size_t len)
176{
177	struct iovec iov[1];
178	struct uio uio;
179	struct socket *so;
180	int error, flags;
181
182	so = ic->ic_socket;
183
184	memset(&uio, 0, sizeof(uio));
185	iov[0].iov_base = buf;
186	iov[0].iov_len = len;
187	uio.uio_iov = iov;
188	uio.uio_iovcnt = 1;
189	uio.uio_offset = 0;
190	uio.uio_resid = len;
191	uio.uio_segflg = UIO_SYSSPACE;
192	uio.uio_rw = UIO_READ;
193
194	flags = MSG_DONTWAIT;
195	error = soreceive(so, NULL, &uio, NULL, NULL, &flags);
196	if (error != 0) {
197		ICL_DEBUG("soreceive error %d", error);
198		return (-1);
199	}
200	if (uio.uio_resid != 0) {
201		ICL_DEBUG("short read");
202		return (-1);
203	}
204
205	return (0);
206}
207
208static void
209icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
210{
211
212	m_freem(ip->ip_bhs_mbuf);
213	m_freem(ip->ip_ahs_mbuf);
214	m_freem(ip->ip_data_mbuf);
215	uma_zfree(icl_pdu_zone, ip);
216#ifdef DIAGNOSTIC
217	refcount_release(&ic->ic_outstanding_pdus);
218#endif
219}
220
221/*
222 * Allocate icl_pdu with empty BHS to fill up by the caller.
223 */
224struct icl_pdu *
225icl_soft_conn_new_pdu(struct icl_conn *ic, int flags)
226{
227	struct icl_pdu *ip;
228
229#ifdef DIAGNOSTIC
230	refcount_acquire(&ic->ic_outstanding_pdus);
231#endif
232	ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
233	if (ip == NULL) {
234		ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
235#ifdef DIAGNOSTIC
236		refcount_release(&ic->ic_outstanding_pdus);
237#endif
238		return (NULL);
239	}
240	ip->ip_conn = ic;
241
242	CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN);
243	ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA);
244	if (ip->ip_bhs_mbuf == NULL) {
245		ICL_WARN("failed to allocate BHS mbuf");
246		icl_soft_conn_pdu_free(ic, ip);
247		return (NULL);
248	}
249	ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *);
250	memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs));
251	ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs);
252
253	return (ip);
254}
255
256static int
257icl_pdu_ahs_length(const struct icl_pdu *request)
258{
259
260	return (request->ip_bhs->bhs_total_ahs_len * 4);
261}
262
263static size_t
264icl_pdu_data_segment_length(const struct icl_pdu *request)
265{
266	uint32_t len = 0;
267
268	len += request->ip_bhs->bhs_data_segment_len[0];
269	len <<= 8;
270	len += request->ip_bhs->bhs_data_segment_len[1];
271	len <<= 8;
272	len += request->ip_bhs->bhs_data_segment_len[2];
273
274	return (len);
275}
276
277size_t
278icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic,
279    const struct icl_pdu *request)
280{
281
282	return (icl_pdu_data_segment_length(request));
283}
284
285static void
286icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len)
287{
288
289	response->ip_bhs->bhs_data_segment_len[2] = len;
290	response->ip_bhs->bhs_data_segment_len[1] = len >> 8;
291	response->ip_bhs->bhs_data_segment_len[0] = len >> 16;
292}
293
294static size_t
295icl_pdu_padding(const struct icl_pdu *ip)
296{
297
298	if ((ip->ip_data_len % 4) != 0)
299		return (4 - (ip->ip_data_len % 4));
300
301	return (0);
302}
303
304static size_t
305icl_pdu_size(const struct icl_pdu *response)
306{
307	size_t len;
308
309	KASSERT(response->ip_ahs_len == 0, ("responding with AHS"));
310
311	len = sizeof(struct iscsi_bhs) + response->ip_data_len +
312	    icl_pdu_padding(response);
313	if (response->ip_conn->ic_header_crc32c)
314		len += ISCSI_HEADER_DIGEST_SIZE;
315	if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c)
316		len += ISCSI_DATA_DIGEST_SIZE;
317
318	return (len);
319}
320
321static int
322icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep)
323{
324
325	if (icl_conn_receive_buf(request->ip_conn,
326	    request->ip_bhs, sizeof(struct iscsi_bhs))) {
327		ICL_DEBUG("failed to receive BHS");
328		return (-1);
329	}
330
331	*availablep -= sizeof(struct iscsi_bhs);
332	return (0);
333}
334
335static int
336icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep)
337{
338
339	request->ip_ahs_len = icl_pdu_ahs_length(request);
340	if (request->ip_ahs_len == 0)
341		return (0);
342
343	request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn,
344	    request->ip_ahs_len);
345	if (request->ip_ahs_mbuf == NULL) {
346		ICL_DEBUG("failed to receive AHS");
347		return (-1);
348	}
349
350	*availablep -= request->ip_ahs_len;
351	return (0);
352}
353
354static uint32_t
355icl_mbuf_to_crc32c(const struct mbuf *m0)
356{
357	uint32_t digest = 0xffffffff;
358	const struct mbuf *m;
359
360	for (m = m0; m != NULL; m = m->m_next)
361		digest = calculate_crc32c(digest,
362		    mtod(m, const void *), m->m_len);
363
364	digest = digest ^ 0xffffffff;
365
366	return (digest);
367}
368
369static int
370icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep)
371{
372	uint32_t received_digest, valid_digest;
373
374	if (request->ip_conn->ic_header_crc32c == false)
375		return (0);
376
377	CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE);
378	if (icl_conn_receive_buf(request->ip_conn,
379	    &received_digest, ISCSI_HEADER_DIGEST_SIZE)) {
380		ICL_DEBUG("failed to receive header digest");
381		return (-1);
382	}
383	*availablep -= ISCSI_HEADER_DIGEST_SIZE;
384
385	/* Temporary attach AHS to BHS to calculate header digest. */
386	request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf;
387	valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
388	request->ip_bhs_mbuf->m_next = NULL;
389	if (received_digest != valid_digest) {
390		ICL_WARN("header digest check failed; got 0x%x, "
391		    "should be 0x%x", received_digest, valid_digest);
392		return (-1);
393	}
394
395	return (0);
396}
397
398/*
399 * Return the number of bytes that should be waiting in the receive socket
400 * before icl_pdu_receive_data_segment() gets called.
401 */
402static size_t
403icl_pdu_data_segment_receive_len(const struct icl_pdu *request)
404{
405	size_t len;
406
407	len = icl_pdu_data_segment_length(request);
408	if (len == 0)
409		return (0);
410
411	/*
412	 * Account for the parts of data segment already read from
413	 * the socket buffer.
414	 */
415	KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
416	len -= request->ip_data_len;
417
418	/*
419	 * Don't always wait for the full data segment to be delivered
420	 * to the socket; this might badly affect performance due to
421	 * TCP window scaling.
422	 */
423	if (len > partial_receive_len) {
424#if 0
425		ICL_DEBUG("need %zd bytes of data, limiting to %zd",
426		    len, partial_receive_len));
427#endif
428		len = partial_receive_len;
429
430		return (len);
431	}
432
433	/*
434	 * Account for padding.  Note that due to the way code is written,
435	 * the icl_pdu_receive_data_segment() must always receive padding
436	 * along with the last part of data segment, because it would be
437	 * impossible to tell whether we've already received the full data
438	 * segment including padding, or without it.
439	 */
440	if ((len % 4) != 0)
441		len += 4 - (len % 4);
442
443#if 0
444	ICL_DEBUG("need %zd bytes of data", len));
445#endif
446
447	return (len);
448}
449
450static int
451icl_pdu_receive_data_segment(struct icl_pdu *request,
452    size_t *availablep, bool *more_neededp)
453{
454	struct icl_conn *ic;
455	size_t len, padding = 0;
456	struct mbuf *m;
457
458	ic = request->ip_conn;
459
460	*more_neededp = false;
461	ic->ic_receive_len = 0;
462
463	len = icl_pdu_data_segment_length(request);
464	if (len == 0)
465		return (0);
466
467	if ((len % 4) != 0)
468		padding = 4 - (len % 4);
469
470	/*
471	 * Account for already received parts of data segment.
472	 */
473	KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
474	len -= request->ip_data_len;
475
476	if (len + padding > *availablep) {
477		/*
478		 * Not enough data in the socket buffer.  Receive as much
479		 * as we can.  Don't receive padding, since, obviously, it's
480		 * not the end of data segment yet.
481		 */
482#if 0
483		ICL_DEBUG("limited from %zd to %zd",
484		    len + padding, *availablep - padding));
485#endif
486		len = *availablep - padding;
487		*more_neededp = true;
488		padding = 0;
489	}
490
491	/*
492	 * Must not try to receive padding without at least one byte
493	 * of actual data segment.
494	 */
495	if (len > 0) {
496		m = icl_conn_receive(request->ip_conn, len + padding);
497		if (m == NULL) {
498			ICL_DEBUG("failed to receive data segment");
499			return (-1);
500		}
501
502		if (request->ip_data_mbuf == NULL)
503			request->ip_data_mbuf = m;
504		else
505			m_cat(request->ip_data_mbuf, m);
506
507		request->ip_data_len += len;
508		*availablep -= len + padding;
509	} else
510		ICL_DEBUG("len 0");
511
512	if (*more_neededp)
513		ic->ic_receive_len =
514		    icl_pdu_data_segment_receive_len(request);
515
516	return (0);
517}
518
519static int
520icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep)
521{
522	uint32_t received_digest, valid_digest;
523
524	if (request->ip_conn->ic_data_crc32c == false)
525		return (0);
526
527	if (request->ip_data_len == 0)
528		return (0);
529
530	CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE);
531	if (icl_conn_receive_buf(request->ip_conn,
532	    &received_digest, ISCSI_DATA_DIGEST_SIZE)) {
533		ICL_DEBUG("failed to receive data digest");
534		return (-1);
535	}
536	*availablep -= ISCSI_DATA_DIGEST_SIZE;
537
538	/*
539	 * Note that ip_data_mbuf also contains padding; since digest
540	 * calculation is supposed to include that, we iterate over
541	 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it.
542	 */
543	valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
544	if (received_digest != valid_digest) {
545		ICL_WARN("data digest check failed; got 0x%x, "
546		    "should be 0x%x", received_digest, valid_digest);
547		return (-1);
548	}
549
550	return (0);
551}
552
553/*
554 * Somewhat contrary to the name, this attempts to receive only one
555 * "part" of PDU at a time; call it repeatedly until it returns non-NULL.
556 */
557static struct icl_pdu *
558icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep)
559{
560	struct icl_pdu *request;
561	struct socket *so;
562	size_t len;
563	int error;
564	bool more_needed;
565
566	so = ic->ic_socket;
567
568	if (ic->ic_receive_state == ICL_CONN_STATE_BHS) {
569		KASSERT(ic->ic_receive_pdu == NULL,
570		    ("ic->ic_receive_pdu != NULL"));
571		request = icl_soft_conn_new_pdu(ic, M_NOWAIT);
572		if (request == NULL) {
573			ICL_DEBUG("failed to allocate PDU; "
574			    "dropping connection");
575			icl_conn_fail(ic);
576			return (NULL);
577		}
578		ic->ic_receive_pdu = request;
579	} else {
580		KASSERT(ic->ic_receive_pdu != NULL,
581		    ("ic->ic_receive_pdu == NULL"));
582		request = ic->ic_receive_pdu;
583	}
584
585	if (*availablep < ic->ic_receive_len) {
586#if 0
587		ICL_DEBUG("not enough data; need %zd, "
588		    "have %zd", ic->ic_receive_len, *availablep);
589#endif
590		return (NULL);
591	}
592
593	switch (ic->ic_receive_state) {
594	case ICL_CONN_STATE_BHS:
595		//ICL_DEBUG("receiving BHS");
596		error = icl_pdu_receive_bhs(request, availablep);
597		if (error != 0) {
598			ICL_DEBUG("failed to receive BHS; "
599			    "dropping connection");
600			break;
601		}
602
603		/*
604		 * We don't enforce any limit for AHS length;
605		 * its length is stored in 8 bit field.
606		 */
607
608		len = icl_pdu_data_segment_length(request);
609		if (len > ic->ic_max_data_segment_length) {
610			ICL_WARN("received data segment "
611			    "length %zd is larger than negotiated "
612			    "MaxDataSegmentLength %zd; "
613			    "dropping connection",
614			    len, ic->ic_max_data_segment_length);
615			error = EINVAL;
616			break;
617		}
618
619		ic->ic_receive_state = ICL_CONN_STATE_AHS;
620		ic->ic_receive_len = icl_pdu_ahs_length(request);
621		break;
622
623	case ICL_CONN_STATE_AHS:
624		//ICL_DEBUG("receiving AHS");
625		error = icl_pdu_receive_ahs(request, availablep);
626		if (error != 0) {
627			ICL_DEBUG("failed to receive AHS; "
628			    "dropping connection");
629			break;
630		}
631		ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST;
632		if (ic->ic_header_crc32c == false)
633			ic->ic_receive_len = 0;
634		else
635			ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE;
636		break;
637
638	case ICL_CONN_STATE_HEADER_DIGEST:
639		//ICL_DEBUG("receiving header digest");
640		error = icl_pdu_check_header_digest(request, availablep);
641		if (error != 0) {
642			ICL_DEBUG("header digest failed; "
643			    "dropping connection");
644			break;
645		}
646
647		ic->ic_receive_state = ICL_CONN_STATE_DATA;
648		ic->ic_receive_len =
649		    icl_pdu_data_segment_receive_len(request);
650		break;
651
652	case ICL_CONN_STATE_DATA:
653		//ICL_DEBUG("receiving data segment");
654		error = icl_pdu_receive_data_segment(request, availablep,
655		    &more_needed);
656		if (error != 0) {
657			ICL_DEBUG("failed to receive data segment;"
658			    "dropping connection");
659			break;
660		}
661
662		if (more_needed)
663			break;
664
665		ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST;
666		if (request->ip_data_len == 0 || ic->ic_data_crc32c == false)
667			ic->ic_receive_len = 0;
668		else
669			ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE;
670		break;
671
672	case ICL_CONN_STATE_DATA_DIGEST:
673		//ICL_DEBUG("receiving data digest");
674		error = icl_pdu_check_data_digest(request, availablep);
675		if (error != 0) {
676			ICL_DEBUG("data digest failed; "
677			    "dropping connection");
678			break;
679		}
680
681		/*
682		 * We've received complete PDU; reset the receive state machine
683		 * and return the PDU.
684		 */
685		ic->ic_receive_state = ICL_CONN_STATE_BHS;
686		ic->ic_receive_len = sizeof(struct iscsi_bhs);
687		ic->ic_receive_pdu = NULL;
688		return (request);
689
690	default:
691		panic("invalid ic_receive_state %d\n", ic->ic_receive_state);
692	}
693
694	if (error != 0) {
695		/*
696		 * Don't free the PDU; it's pointed to by ic->ic_receive_pdu
697		 * and will get freed in icl_soft_conn_close().
698		 */
699		icl_conn_fail(ic);
700	}
701
702	return (NULL);
703}
704
705static void
706icl_conn_receive_pdus(struct icl_conn *ic, size_t available)
707{
708	struct icl_pdu *response;
709	struct socket *so;
710
711	so = ic->ic_socket;
712
713	/*
714	 * This can never happen; we're careful to only mess with ic->ic_socket
715	 * pointer when the send/receive threads are not running.
716	 */
717	KASSERT(so != NULL, ("NULL socket"));
718
719	for (;;) {
720		if (ic->ic_disconnecting)
721			return;
722
723		if (so->so_error != 0) {
724			ICL_DEBUG("connection error %d; "
725			    "dropping connection", so->so_error);
726			icl_conn_fail(ic);
727			return;
728		}
729
730		/*
731		 * Loop until we have a complete PDU or there is not enough
732		 * data in the socket buffer.
733		 */
734		if (available < ic->ic_receive_len) {
735#if 0
736			ICL_DEBUG("not enough data; have %zd, "
737			    "need %zd", available,
738			    ic->ic_receive_len);
739#endif
740			return;
741		}
742
743		response = icl_conn_receive_pdu(ic, &available);
744		if (response == NULL)
745			continue;
746
747		if (response->ip_ahs_len > 0) {
748			ICL_WARN("received PDU with unsupported "
749			    "AHS; opcode 0x%x; dropping connection",
750			    response->ip_bhs->bhs_opcode);
751			icl_soft_conn_pdu_free(ic, response);
752			icl_conn_fail(ic);
753			return;
754		}
755
756		(ic->ic_receive)(response);
757	}
758}
759
760static void
761icl_receive_thread(void *arg)
762{
763	struct icl_conn *ic;
764	size_t available;
765	struct socket *so;
766
767	ic = arg;
768	so = ic->ic_socket;
769
770	for (;;) {
771		if (ic->ic_disconnecting) {
772			//ICL_DEBUG("terminating");
773			break;
774		}
775
776		/*
777		 * Set the low watermark, to be checked by
778		 * soreadable() in icl_soupcall_receive()
779		 * to avoid unnecessary wakeups until there
780		 * is enough data received to read the PDU.
781		 */
782		SOCKBUF_LOCK(&so->so_rcv);
783		available = sbavail(&so->so_rcv);
784		if (available < ic->ic_receive_len) {
785			so->so_rcv.sb_lowat = ic->ic_receive_len;
786			cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx);
787		} else
788			so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1;
789		SOCKBUF_UNLOCK(&so->so_rcv);
790
791		icl_conn_receive_pdus(ic, available);
792	}
793
794	ICL_CONN_LOCK(ic);
795	ic->ic_receive_running = false;
796	cv_signal(&ic->ic_send_cv);
797	ICL_CONN_UNLOCK(ic);
798	kthread_exit();
799}
800
801static int
802icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
803{
804	struct icl_conn *ic;
805
806	if (!soreadable(so))
807		return (SU_OK);
808
809	ic = arg;
810	cv_signal(&ic->ic_receive_cv);
811	return (SU_OK);
812}
813
814static int
815icl_pdu_finalize(struct icl_pdu *request)
816{
817	size_t padding, pdu_len;
818	uint32_t digest, zero = 0;
819	int ok;
820	struct icl_conn *ic;
821
822	ic = request->ip_conn;
823
824	icl_pdu_set_data_segment_length(request, request->ip_data_len);
825
826	pdu_len = icl_pdu_size(request);
827
828	if (ic->ic_header_crc32c) {
829		digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
830		ok = m_append(request->ip_bhs_mbuf, sizeof(digest),
831		    (void *)&digest);
832		if (ok != 1) {
833			ICL_WARN("failed to append header digest");
834			return (1);
835		}
836	}
837
838	if (request->ip_data_len != 0) {
839		padding = icl_pdu_padding(request);
840		if (padding > 0) {
841			ok = m_append(request->ip_data_mbuf, padding,
842			    (void *)&zero);
843			if (ok != 1) {
844				ICL_WARN("failed to append padding");
845				return (1);
846			}
847		}
848
849		if (ic->ic_data_crc32c) {
850			digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
851
852			ok = m_append(request->ip_data_mbuf, sizeof(digest),
853			    (void *)&digest);
854			if (ok != 1) {
855				ICL_WARN("failed to append data digest");
856				return (1);
857			}
858		}
859
860		m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf);
861		request->ip_data_mbuf = NULL;
862	}
863
864	request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
865
866	return (0);
867}
868
869static void
870icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue)
871{
872	struct icl_pdu *request, *request2;
873	struct socket *so;
874	long available, size, size2;
875	int coalesced, error;
876
877	ICL_CONN_LOCK_ASSERT_NOT(ic);
878
879	so = ic->ic_socket;
880
881	SOCKBUF_LOCK(&so->so_snd);
882	/*
883	 * Check how much space do we have for transmit.  We can't just
884	 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE,
885	 * as it always frees the mbuf chain passed to it, even in case
886	 * of error.
887	 */
888	available = sbspace(&so->so_snd);
889
890	/*
891	 * Notify the socket upcall that we don't need wakeups
892	 * for the time being.
893	 */
894	so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1;
895	SOCKBUF_UNLOCK(&so->so_snd);
896
897	while (!STAILQ_EMPTY(queue)) {
898		request = STAILQ_FIRST(queue);
899		size = icl_pdu_size(request);
900		if (available < size) {
901
902			/*
903			 * Set the low watermark, to be checked by
904			 * sowriteable() in icl_soupcall_send()
905			 * to avoid unnecessary wakeups until there
906			 * is enough space for the PDU to fit.
907			 */
908			SOCKBUF_LOCK(&so->so_snd);
909			available = sbspace(&so->so_snd);
910			if (available < size) {
911#if 1
912				ICL_DEBUG("no space to send; "
913				    "have %ld, need %ld",
914				    available, size);
915#endif
916				so->so_snd.sb_lowat = max(size,
917				    so->so_snd.sb_hiwat / 8);
918				SOCKBUF_UNLOCK(&so->so_snd);
919				return;
920			}
921			SOCKBUF_UNLOCK(&so->so_snd);
922		}
923		STAILQ_REMOVE_HEAD(queue, ip_next);
924		error = icl_pdu_finalize(request);
925		if (error != 0) {
926			ICL_DEBUG("failed to finalize PDU; "
927			    "dropping connection");
928			icl_soft_conn_pdu_free(ic, request);
929			icl_conn_fail(ic);
930			return;
931		}
932		if (coalesce) {
933			coalesced = 1;
934			for (;;) {
935				request2 = STAILQ_FIRST(queue);
936				if (request2 == NULL)
937					break;
938				size2 = icl_pdu_size(request2);
939				if (available < size + size2)
940					break;
941				STAILQ_REMOVE_HEAD(queue, ip_next);
942				error = icl_pdu_finalize(request2);
943				if (error != 0) {
944					ICL_DEBUG("failed to finalize PDU; "
945					    "dropping connection");
946					icl_soft_conn_pdu_free(ic, request);
947					icl_soft_conn_pdu_free(ic, request2);
948					icl_conn_fail(ic);
949					return;
950				}
951				m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf);
952				request2->ip_bhs_mbuf = NULL;
953				request->ip_bhs_mbuf->m_pkthdr.len += size2;
954				size += size2;
955				STAILQ_REMOVE_AFTER(queue, request, ip_next);
956				icl_soft_conn_pdu_free(ic, request2);
957				coalesced++;
958			}
959#if 0
960			if (coalesced > 1) {
961				ICL_DEBUG("coalesced %d PDUs into %ld bytes",
962				    coalesced, size);
963			}
964#endif
965		}
966		available -= size;
967		error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
968		    NULL, MSG_DONTWAIT, curthread);
969		request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
970		if (error != 0) {
971			ICL_DEBUG("failed to send PDU, error %d; "
972			    "dropping connection", error);
973			icl_soft_conn_pdu_free(ic, request);
974			icl_conn_fail(ic);
975			return;
976		}
977		icl_soft_conn_pdu_free(ic, request);
978	}
979}
980
981static void
982icl_send_thread(void *arg)
983{
984	struct icl_conn *ic;
985	struct icl_pdu_stailq queue;
986
987	ic = arg;
988
989	STAILQ_INIT(&queue);
990
991	ICL_CONN_LOCK(ic);
992	for (;;) {
993		for (;;) {
994			/*
995			 * If the local queue is empty, populate it from
996			 * the main one.  This way the icl_conn_send_pdus()
997			 * can go through all the queued PDUs without holding
998			 * any locks.
999			 */
1000			if (STAILQ_EMPTY(&queue))
1001				STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu);
1002
1003			ic->ic_check_send_space = false;
1004			ICL_CONN_UNLOCK(ic);
1005			icl_conn_send_pdus(ic, &queue);
1006			ICL_CONN_LOCK(ic);
1007
1008			/*
1009			 * The icl_soupcall_send() was called since the last
1010			 * call to sbspace(); go around;
1011			 */
1012			if (ic->ic_check_send_space)
1013				continue;
1014
1015			/*
1016			 * Local queue is empty, but we still have PDUs
1017			 * in the main one; go around.
1018			 */
1019			if (STAILQ_EMPTY(&queue) &&
1020			    !STAILQ_EMPTY(&ic->ic_to_send))
1021				continue;
1022
1023			/*
1024			 * There might be some stuff in the local queue,
1025			 * which didn't get sent due to not having enough send
1026			 * space.  Wait for socket upcall.
1027			 */
1028			break;
1029		}
1030
1031		if (ic->ic_disconnecting) {
1032			//ICL_DEBUG("terminating");
1033			break;
1034		}
1035
1036		cv_wait(&ic->ic_send_cv, ic->ic_lock);
1037	}
1038
1039	/*
1040	 * We're exiting; move PDUs back to the main queue, so they can
1041	 * get freed properly.  At this point ordering doesn't matter.
1042	 */
1043	STAILQ_CONCAT(&ic->ic_to_send, &queue);
1044
1045	ic->ic_send_running = false;
1046	cv_signal(&ic->ic_send_cv);
1047	ICL_CONN_UNLOCK(ic);
1048	kthread_exit();
1049}
1050
1051static int
1052icl_soupcall_send(struct socket *so, void *arg, int waitflag)
1053{
1054	struct icl_conn *ic;
1055
1056	if (!sowriteable(so))
1057		return (SU_OK);
1058
1059	ic = arg;
1060
1061	ICL_CONN_LOCK(ic);
1062	ic->ic_check_send_space = true;
1063	ICL_CONN_UNLOCK(ic);
1064
1065	cv_signal(&ic->ic_send_cv);
1066
1067	return (SU_OK);
1068}
1069
1070static int
1071icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
1072    const void *addr, size_t len, int flags)
1073{
1074	struct mbuf *mb, *newmb;
1075	size_t copylen, off = 0;
1076
1077	KASSERT(len > 0, ("len == 0"));
1078
1079	newmb = m_getm2(NULL, len, flags, MT_DATA, 0);
1080	if (newmb == NULL) {
1081		ICL_WARN("failed to allocate mbuf for %zd bytes", len);
1082		return (ENOMEM);
1083	}
1084
1085	for (mb = newmb; mb != NULL; mb = mb->m_next) {
1086		copylen = min(M_TRAILINGSPACE(mb), len - off);
1087		memcpy(mtod(mb, char *), (const char *)addr + off, copylen);
1088		mb->m_len = copylen;
1089		off += copylen;
1090	}
1091	KASSERT(off == len, ("%s: off != len", __func__));
1092
1093	if (request->ip_data_mbuf == NULL) {
1094		request->ip_data_mbuf = newmb;
1095		request->ip_data_len = len;
1096	} else {
1097		m_cat(request->ip_data_mbuf, newmb);
1098		request->ip_data_len += len;
1099	}
1100
1101	return (0);
1102}
1103
1104void
1105icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
1106    size_t off, void *addr, size_t len)
1107{
1108
1109	m_copydata(ip->ip_data_mbuf, off, len, addr);
1110}
1111
1112static void
1113icl_pdu_queue(struct icl_pdu *ip)
1114{
1115	struct icl_conn *ic;
1116
1117	ic = ip->ip_conn;
1118
1119	ICL_CONN_LOCK_ASSERT(ic);
1120
1121	if (ic->ic_disconnecting || ic->ic_socket == NULL) {
1122		ICL_DEBUG("icl_pdu_queue on closed connection");
1123		icl_soft_conn_pdu_free(ic, ip);
1124		return;
1125	}
1126
1127	if (!STAILQ_EMPTY(&ic->ic_to_send)) {
1128		STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1129		/*
1130		 * If the queue is not empty, someone else had already
1131		 * signaled the send thread; no need to do that again,
1132		 * just return.
1133		 */
1134		return;
1135	}
1136
1137	STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1138	cv_signal(&ic->ic_send_cv);
1139}
1140
1141void
1142icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
1143{
1144
1145	icl_pdu_queue(ip);
1146}
1147
1148static struct icl_conn *
1149icl_soft_new_conn(const char *name, struct mtx *lock)
1150{
1151	struct icl_conn *ic;
1152
1153	refcount_acquire(&icl_ncons);
1154
1155	ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO);
1156
1157	STAILQ_INIT(&ic->ic_to_send);
1158	ic->ic_lock = lock;
1159	cv_init(&ic->ic_send_cv, "icl_tx");
1160	cv_init(&ic->ic_receive_cv, "icl_rx");
1161#ifdef DIAGNOSTIC
1162	refcount_init(&ic->ic_outstanding_pdus, 0);
1163#endif
1164	ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH;
1165	ic->ic_name = name;
1166	ic->ic_offload = "None";
1167	ic->ic_unmapped = false;
1168
1169	return (ic);
1170}
1171
1172void
1173icl_soft_conn_free(struct icl_conn *ic)
1174{
1175
1176#ifdef DIAGNOSTIC
1177	KASSERT(ic->ic_outstanding_pdus == 0,
1178	    ("destroying session with %d outstanding PDUs",
1179	     ic->ic_outstanding_pdus));
1180#endif
1181	cv_destroy(&ic->ic_send_cv);
1182	cv_destroy(&ic->ic_receive_cv);
1183	kobj_delete((struct kobj *)ic, M_ICL_SOFT);
1184	refcount_release(&icl_ncons);
1185}
1186
1187static int
1188icl_conn_start(struct icl_conn *ic)
1189{
1190	size_t minspace;
1191	struct sockopt opt;
1192	int error, one = 1;
1193
1194	ICL_CONN_LOCK(ic);
1195
1196	/*
1197	 * XXX: Ugly hack.
1198	 */
1199	if (ic->ic_socket == NULL) {
1200		ICL_CONN_UNLOCK(ic);
1201		return (EINVAL);
1202	}
1203
1204	ic->ic_receive_state = ICL_CONN_STATE_BHS;
1205	ic->ic_receive_len = sizeof(struct iscsi_bhs);
1206	ic->ic_disconnecting = false;
1207
1208	ICL_CONN_UNLOCK(ic);
1209
1210	/*
1211	 * For sendspace, this is required because the current code cannot
1212	 * send a PDU in pieces; thus, the minimum buffer size is equal
1213	 * to the maximum PDU size.  "+4" is to account for possible padding.
1214	 *
1215	 * What we should actually do here is to use autoscaling, but set
1216	 * some minimal buffer size to "minspace".  I don't know a way to do
1217	 * that, though.
1218	 */
1219	minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length +
1220	    ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4;
1221	if (sendspace < minspace) {
1222		ICL_WARN("kern.icl.sendspace too low; must be at least %zd",
1223		    minspace);
1224		sendspace = minspace;
1225	}
1226	if (recvspace < minspace) {
1227		ICL_WARN("kern.icl.recvspace too low; must be at least %zd",
1228		    minspace);
1229		recvspace = minspace;
1230	}
1231
1232	error = soreserve(ic->ic_socket, sendspace, recvspace);
1233	if (error != 0) {
1234		ICL_WARN("soreserve failed with error %d", error);
1235		icl_soft_conn_close(ic);
1236		return (error);
1237	}
1238	ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE;
1239	ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE;
1240
1241	/*
1242	 * Disable Nagle.
1243	 */
1244	bzero(&opt, sizeof(opt));
1245	opt.sopt_dir = SOPT_SET;
1246	opt.sopt_level = IPPROTO_TCP;
1247	opt.sopt_name = TCP_NODELAY;
1248	opt.sopt_val = &one;
1249	opt.sopt_valsize = sizeof(one);
1250	error = sosetopt(ic->ic_socket, &opt);
1251	if (error != 0) {
1252		ICL_WARN("disabling TCP_NODELAY failed with error %d", error);
1253		icl_soft_conn_close(ic);
1254		return (error);
1255	}
1256
1257	/*
1258	 * Register socket upcall, to get notified about incoming PDUs
1259	 * and free space to send outgoing ones.
1260	 */
1261	SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1262	soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic);
1263	SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1264	SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1265	soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic);
1266	SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1267
1268	/*
1269	 * Start threads.
1270	 */
1271	ICL_CONN_LOCK(ic);
1272	ic->ic_send_running = ic->ic_receive_running = true;
1273	ICL_CONN_UNLOCK(ic);
1274	error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx",
1275	    ic->ic_name);
1276	if (error != 0) {
1277		ICL_WARN("kthread_add(9) failed with error %d", error);
1278		ICL_CONN_LOCK(ic);
1279		ic->ic_send_running = ic->ic_receive_running = false;
1280		cv_signal(&ic->ic_send_cv);
1281		ICL_CONN_UNLOCK(ic);
1282		icl_soft_conn_close(ic);
1283		return (error);
1284	}
1285	error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx",
1286	    ic->ic_name);
1287	if (error != 0) {
1288		ICL_WARN("kthread_add(9) failed with error %d", error);
1289		ICL_CONN_LOCK(ic);
1290		ic->ic_receive_running = false;
1291		cv_signal(&ic->ic_send_cv);
1292		ICL_CONN_UNLOCK(ic);
1293		icl_soft_conn_close(ic);
1294		return (error);
1295	}
1296
1297	return (0);
1298}
1299
1300int
1301icl_soft_conn_handoff(struct icl_conn *ic, int fd)
1302{
1303	struct file *fp;
1304	struct socket *so;
1305	cap_rights_t rights;
1306	int error;
1307
1308	ICL_CONN_LOCK_ASSERT_NOT(ic);
1309
1310#ifdef ICL_KERNEL_PROXY
1311	/*
1312	 * We're transitioning to Full Feature phase, and we don't
1313	 * really care.
1314	 */
1315	if (fd == 0) {
1316		ICL_CONN_LOCK(ic);
1317		if (ic->ic_socket == NULL) {
1318			ICL_CONN_UNLOCK(ic);
1319			ICL_WARN("proxy handoff without connect");
1320			return (EINVAL);
1321		}
1322		ICL_CONN_UNLOCK(ic);
1323		return (0);
1324	}
1325#endif
1326
1327	/*
1328	 * Steal the socket from userland.
1329	 */
1330	error = fget(curthread, fd,
1331	    cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
1332	if (error != 0)
1333		return (error);
1334	if (fp->f_type != DTYPE_SOCKET) {
1335		fdrop(fp, curthread);
1336		return (EINVAL);
1337	}
1338	so = fp->f_data;
1339	if (so->so_type != SOCK_STREAM) {
1340		fdrop(fp, curthread);
1341		return (EINVAL);
1342	}
1343
1344	ICL_CONN_LOCK(ic);
1345
1346	if (ic->ic_socket != NULL) {
1347		ICL_CONN_UNLOCK(ic);
1348		fdrop(fp, curthread);
1349		return (EBUSY);
1350	}
1351
1352	ic->ic_socket = fp->f_data;
1353	fp->f_ops = &badfileops;
1354	fp->f_data = NULL;
1355	fdrop(fp, curthread);
1356	ICL_CONN_UNLOCK(ic);
1357
1358	error = icl_conn_start(ic);
1359
1360	return (error);
1361}
1362
1363void
1364icl_soft_conn_close(struct icl_conn *ic)
1365{
1366	struct icl_pdu *pdu;
1367	struct socket *so;
1368
1369	ICL_CONN_LOCK(ic);
1370
1371	/*
1372	 * Wake up the threads, so they can properly terminate.
1373	 */
1374	ic->ic_disconnecting = true;
1375	while (ic->ic_receive_running || ic->ic_send_running) {
1376		cv_signal(&ic->ic_receive_cv);
1377		cv_signal(&ic->ic_send_cv);
1378		cv_wait(&ic->ic_send_cv, ic->ic_lock);
1379	}
1380
1381	/* Some other thread could close the connection same time. */
1382	so = ic->ic_socket;
1383	if (so == NULL) {
1384		ICL_CONN_UNLOCK(ic);
1385		return;
1386	}
1387	ic->ic_socket = NULL;
1388
1389	/*
1390	 * Deregister socket upcalls.
1391	 */
1392	ICL_CONN_UNLOCK(ic);
1393	SOCKBUF_LOCK(&so->so_snd);
1394	if (so->so_snd.sb_upcall != NULL)
1395		soupcall_clear(so, SO_SND);
1396	SOCKBUF_UNLOCK(&so->so_snd);
1397	SOCKBUF_LOCK(&so->so_rcv);
1398	if (so->so_rcv.sb_upcall != NULL)
1399		soupcall_clear(so, SO_RCV);
1400	SOCKBUF_UNLOCK(&so->so_rcv);
1401	soclose(so);
1402	ICL_CONN_LOCK(ic);
1403
1404	if (ic->ic_receive_pdu != NULL) {
1405		//ICL_DEBUG("freeing partially received PDU");
1406		icl_soft_conn_pdu_free(ic, ic->ic_receive_pdu);
1407		ic->ic_receive_pdu = NULL;
1408	}
1409
1410	/*
1411	 * Remove any outstanding PDUs from the send queue.
1412	 */
1413	while (!STAILQ_EMPTY(&ic->ic_to_send)) {
1414		pdu = STAILQ_FIRST(&ic->ic_to_send);
1415		STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next);
1416		icl_soft_conn_pdu_free(ic, pdu);
1417	}
1418
1419	KASSERT(STAILQ_EMPTY(&ic->ic_to_send),
1420	    ("destroying session with non-empty send queue"));
1421	ICL_CONN_UNLOCK(ic);
1422}
1423
1424int
1425icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
1426    struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp)
1427{
1428
1429	return (0);
1430}
1431
1432void
1433icl_soft_conn_task_done(struct icl_conn *ic, void *prv)
1434{
1435}
1436
1437int
1438icl_soft_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io,
1439    uint32_t *transfer_tag, void **prvp)
1440{
1441
1442	return (0);
1443}
1444
1445void
1446icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv)
1447{
1448}
1449
1450static int
1451icl_soft_limits(size_t *limitp)
1452{
1453
1454	*limitp = 128 * 1024;
1455
1456	return (0);
1457}
1458
1459#ifdef ICL_KERNEL_PROXY
1460int
1461icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype,
1462    int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
1463{
1464
1465	return (icl_soft_proxy_connect(ic, domain, socktype, protocol,
1466	    from_sa, to_sa));
1467}
1468
1469int
1470icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so)
1471{
1472	int error;
1473
1474	ICL_CONN_LOCK_ASSERT_NOT(ic);
1475
1476	if (so->so_type != SOCK_STREAM)
1477		return (EINVAL);
1478
1479	ICL_CONN_LOCK(ic);
1480	if (ic->ic_socket != NULL) {
1481		ICL_CONN_UNLOCK(ic);
1482		return (EBUSY);
1483	}
1484	ic->ic_socket = so;
1485	ICL_CONN_UNLOCK(ic);
1486
1487	error = icl_conn_start(ic);
1488
1489	return (error);
1490}
1491#endif /* ICL_KERNEL_PROXY */
1492
1493static int
1494icl_soft_load(void)
1495{
1496	int error;
1497
1498	icl_pdu_zone = uma_zcreate("icl_pdu",
1499	    sizeof(struct icl_pdu), NULL, NULL, NULL, NULL,
1500	    UMA_ALIGN_PTR, 0);
1501	refcount_init(&icl_ncons, 0);
1502
1503	/*
1504	 * The reason we call this "none" is that to the user,
1505	 * it's known as "offload driver"; "offload driver: soft"
1506	 * doesn't make much sense.
1507	 */
1508	error = icl_register("none", false, 0,
1509	    icl_soft_limits, icl_soft_new_conn);
1510	KASSERT(error == 0, ("failed to register"));
1511
1512#if defined(ICL_KERNEL_PROXY) && 0
1513	/*
1514	 * Debugging aid for kernel proxy functionality.
1515	 */
1516	error = icl_register("proxytest", true, 0,
1517	    icl_soft_limits, icl_soft_new_conn);
1518	KASSERT(error == 0, ("failed to register"));
1519#endif
1520
1521	return (error);
1522}
1523
1524static int
1525icl_soft_unload(void)
1526{
1527
1528	if (icl_ncons != 0)
1529		return (EBUSY);
1530
1531	icl_unregister("none", false);
1532#if defined(ICL_KERNEL_PROXY) && 0
1533	icl_unregister("proxytest", true);
1534#endif
1535
1536	uma_zdestroy(icl_pdu_zone);
1537
1538	return (0);
1539}
1540
1541static int
1542icl_soft_modevent(module_t mod, int what, void *arg)
1543{
1544
1545	switch (what) {
1546	case MOD_LOAD:
1547		return (icl_soft_load());
1548	case MOD_UNLOAD:
1549		return (icl_soft_unload());
1550	default:
1551		return (EINVAL);
1552	}
1553}
1554
1555moduledata_t icl_soft_data = {
1556	"icl_soft",
1557	icl_soft_modevent,
1558	0
1559};
1560
1561DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
1562MODULE_DEPEND(icl_soft, icl, 1, 1, 1);
1563MODULE_VERSION(icl_soft, 1);
1564