1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <sys/conf.h>
27#include <sys/stat.h>
28#include <sys/file.h>
29#include <sys/ddi.h>
30#include <sys/sunddi.h>
31#include <sys/modctl.h>
32#include <sys/priv.h>
33#include <sys/cpuvar.h>
34#include <sys/socket.h>
35#include <sys/strsubr.h>
36#include <sys/sysmacros.h>
37#include <sys/sdt.h>
38#include <netinet/tcp.h>
39#include <inet/tcp.h>
40#include <sys/socketvar.h>
41#include <sys/pathname.h>
42#include <sys/fs/snode.h>
43#include <sys/fs/dv_node.h>
44#include <sys/vnode.h>
45#include <netinet/in.h>
46#include <net/if.h>
47#include <sys/sockio.h>
48#include <sys/ksocket.h>
49#include <sys/filio.h>		/* FIONBIO */
50#include <sys/iscsi_protocol.h>
51#include <sys/idm/idm.h>
52#include <sys/idm/idm_so.h>
53#include <sys/idm/idm_text.h>
54
55#define	IN_PROGRESS_DELAY	1
56
57/*
58 * in6addr_any is currently all zeroes, but use the macro in case this
59 * ever changes.
60 */
61static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
62
63static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
64static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
65static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
66
67static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
68static void idm_so_conn_destroy_common(idm_conn_t *ic);
69static void idm_so_conn_connect_common(idm_conn_t *ic);
70
71static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
72    boolean_t boot_conn);
73static void idm_set_ini_postconnect_options(idm_so_conn_t *sc);
74static void idm_set_tgt_connect_options(ksocket_t so);
75static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
76
77static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
78static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
79    idm_buf_t *idb, uint32_t offset, uint32_t length);
80static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
81static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
82    idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
83
84static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
85    uint32_t ro, uint32_t dlength);
86
87static idm_status_t idm_so_handle_digest(idm_conn_t *it,
88    nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
89
90static void idm_so_socket_set_nonblock(struct sonode *node);
91static void idm_so_socket_set_block(struct sonode *node);
92
93/*
94 * Transport ops prototypes
95 */
96static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
97static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
98static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
99static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
100static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
101static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
102static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
103static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
104    nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
105static void idm_so_notice_key_values(idm_conn_t *it,
106    nvlist_t *negotiated_nvl);
107static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
108    nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
109static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
110    idm_transport_caps_t *caps);
111static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
112static void idm_so_buf_free(idm_buf_t *idb);
113static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
114static void idm_so_buf_teardown(idm_buf_t *idb);
115static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
116static void idm_so_tgt_svc_destroy(idm_svc_t *is);
117static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
118static void idm_so_tgt_svc_offline(idm_svc_t *is);
119static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
120static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
121static void idm_so_conn_disconnect(idm_conn_t *ic);
122static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
123static void idm_so_ini_conn_destroy(idm_conn_t *ic);
124static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
125
126/*
127 * IDM Native Sockets transport operations
128 */
129static
130idm_transport_ops_t idm_so_transport_ops = {
131	idm_so_tx,			/* it_tx_pdu */
132	idm_so_buf_tx_to_ini,		/* it_buf_tx_to_ini */
133	idm_so_buf_rx_from_ini,		/* it_buf_rx_from_ini */
134	idm_so_rx_datain,		/* it_rx_datain */
135	idm_so_rx_rtt,			/* it_rx_rtt */
136	idm_so_rx_dataout,		/* it_rx_dataout */
137	NULL,				/* it_alloc_conn_rsrc */
138	NULL,				/* it_free_conn_rsrc */
139	NULL,				/* it_tgt_enable_datamover */
140	NULL,				/* it_ini_enable_datamover */
141	NULL,				/* it_conn_terminate */
142	idm_so_free_task_rsrc,		/* it_free_task_rsrc */
143	idm_so_negotiate_key_values,	/* it_negotiate_key_values */
144	idm_so_notice_key_values,	/* it_notice_key_values */
145	idm_so_conn_is_capable,		/* it_conn_is_capable */
146	idm_so_buf_alloc,		/* it_buf_alloc */
147	idm_so_buf_free,		/* it_buf_free */
148	idm_so_buf_setup,		/* it_buf_setup */
149	idm_so_buf_teardown,		/* it_buf_teardown */
150	idm_so_tgt_svc_create,		/* it_tgt_svc_create */
151	idm_so_tgt_svc_destroy,		/* it_tgt_svc_destroy */
152	idm_so_tgt_svc_online,		/* it_tgt_svc_online */
153	idm_so_tgt_svc_offline,		/* it_tgt_svc_offline */
154	idm_so_tgt_conn_destroy,	/* it_tgt_conn_destroy */
155	idm_so_tgt_conn_connect,	/* it_tgt_conn_connect */
156	idm_so_conn_disconnect,		/* it_tgt_conn_disconnect */
157	idm_so_ini_conn_create,		/* it_ini_conn_create */
158	idm_so_ini_conn_destroy,	/* it_ini_conn_destroy */
159	idm_so_ini_conn_connect,	/* it_ini_conn_connect */
160	idm_so_conn_disconnect,		/* it_ini_conn_disconnect */
161	idm_so_declare_key_values	/* it_declare_key_values */
162};
163
164kmutex_t	idm_so_timed_socket_mutex;
165/*
166 * idm_so_init()
167 * Sockets transport initialization
168 */
169void
170idm_so_init(idm_transport_t *it)
171{
172	/* Cache for IDM Data and R2T Transmit PDU's */
173	idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
174	    sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
175	    &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
176
177	/* Cache for IDM Receive PDU's */
178	idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
179	    sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
180	    &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
181
182	/* 128k buffer cache */
183	idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
184	    IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
185
186	/* Set the sockets transport ops */
187	it->it_ops = &idm_so_transport_ops;
188
189	mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
190
191}
192
193/*
194 * idm_so_fini()
195 * Sockets transport teardown
196 */
197void
198idm_so_fini(void)
199{
200	kmem_cache_destroy(idm.idm_so_128k_buf_cache);
201	kmem_cache_destroy(idm.idm_sotx_pdu_cache);
202	kmem_cache_destroy(idm.idm_sorx_pdu_cache);
203	mutex_destroy(&idm_so_timed_socket_mutex);
204}
205
206ksocket_t
207idm_socreate(int domain, int type, int protocol)
208{
209	ksocket_t ks;
210
211	if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
212	    CRED())) {
213		return (ks);
214	} else {
215		return (NULL);
216	}
217}
218
219/*
220 * idm_soshutdown will disconnect the socket and prevent subsequent PDU
221 * reception and transmission.  The sonode still exists but its state
222 * gets modified to indicate it is no longer connected.  Calls to
223 * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
224 * regain control of a thread stuck in idm_sorecv.
225 */
226void
227idm_soshutdown(ksocket_t so)
228{
229	(void) ksocket_shutdown(so, SHUT_RDWR, CRED());
230}
231
232/*
233 * idm_sodestroy releases all resources associated with a socket previously
234 * created with idm_socreate.  The socket must be shutdown using
235 * idm_soshutdown before the socket is destroyed with idm_sodestroy,
236 * otherwise undefined behavior will result.
237 */
238void
239idm_sodestroy(ksocket_t ks)
240{
241	(void) ksocket_close(ks, CRED());
242}
243
244/*
245 * Function to compare two addresses in sockaddr_storage format
246 */
247
248int
249idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
250    const struct sockaddr_storage *cmp_ss2,
251    boolean_t v4_mapped_as_v4,
252    boolean_t compare_ports)
253{
254	struct sockaddr_storage			mapped_v4_ss1, mapped_v4_ss2;
255	const struct sockaddr_storage		*ss1, *ss2;
256	struct in_addr				*in1, *in2;
257	struct in6_addr				*in61, *in62;
258	int i;
259
260	/*
261	 * Normalize V4-mapped IPv6 addresses into V4 format if
262	 * v4_mapped_as_v4 is B_TRUE.
263	 */
264	ss1 = cmp_ss1;
265	ss2 = cmp_ss2;
266	if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
267		in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
268		if (IN6_IS_ADDR_V4MAPPED(in61)) {
269			bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
270			mapped_v4_ss1.ss_family = AF_INET;
271			((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
272			    ((struct sockaddr_in *)ss1)->sin_port;
273			IN6_V4MAPPED_TO_INADDR(in61,
274			    &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
275			ss1 = &mapped_v4_ss1;
276		}
277	}
278	ss2 = cmp_ss2;
279	if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
280		in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
281		if (IN6_IS_ADDR_V4MAPPED(in62)) {
282			bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
283			mapped_v4_ss2.ss_family = AF_INET;
284			((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
285			    ((struct sockaddr_in *)ss2)->sin_port;
286			IN6_V4MAPPED_TO_INADDR(in62,
287			    &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
288			ss2 = &mapped_v4_ss2;
289		}
290	}
291
292	/*
293	 * Compare ports, then address family, then ip address
294	 */
295	if (compare_ports &&
296	    (((struct sockaddr_in *)ss1)->sin_port !=
297	    ((struct sockaddr_in *)ss2)->sin_port)) {
298		if (((struct sockaddr_in *)ss1)->sin_port >
299		    ((struct sockaddr_in *)ss2)->sin_port)
300			return (1);
301		else
302			return (-1);
303	}
304
305	/*
306	 * ports are the same
307	 */
308	if (ss1->ss_family != ss2->ss_family) {
309		if (ss1->ss_family == AF_INET)
310			return (1);
311		else
312			return (-1);
313	}
314
315	/*
316	 * address families are the same
317	 */
318	if (ss1->ss_family == AF_INET) {
319		in1 = &((struct sockaddr_in *)ss1)->sin_addr;
320		in2 = &((struct sockaddr_in *)ss2)->sin_addr;
321
322		if (in1->s_addr > in2->s_addr)
323			return (1);
324		else if (in1->s_addr < in2->s_addr)
325			return (-1);
326		else
327			return (0);
328	} else if (ss1->ss_family == AF_INET6) {
329		in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
330		in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
331
332		for (i = 0; i < 4; i++) {
333			if (in61->s6_addr32[i] > in62->s6_addr32[i])
334				return (1);
335			else if (in61->s6_addr32[i] < in62->s6_addr32[i])
336				return (-1);
337		}
338		return (0);
339	}
340
341	return (1);
342}
343
344/*
345 * IP address filter functions to flag addresses that should not
346 * go out to initiators through discovery.
347 */
348static boolean_t
349idm_v4_addr_okay(struct in_addr *in_addr)
350{
351	in_addr_t addr = ntohl(in_addr->s_addr);
352
353	if ((INADDR_NONE == addr) ||
354	    (IN_MULTICAST(addr)) ||
355	    ((addr >> IN_CLASSA_NSHIFT) == 0) ||
356	    ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
357		return (B_FALSE);
358	}
359	return (B_TRUE);
360}
361
362static boolean_t
363idm_v6_addr_okay(struct in6_addr *addr6)
364{
365
366	if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
367	    (IN6_IS_ADDR_LOOPBACK(addr6)) ||
368	    (IN6_IS_ADDR_MULTICAST(addr6)) ||
369	    (IN6_IS_ADDR_V4MAPPED(addr6)) ||
370	    (IN6_IS_ADDR_V4COMPAT(addr6)) ||
371	    (IN6_IS_ADDR_LINKLOCAL(addr6))) {
372		return (B_FALSE);
373	}
374	return (B_TRUE);
375}
376
377/*
378 * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
379 * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
380 */
381int
382idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
383{
384	ksocket_t 		so4, so6;
385	struct lifnum		lifn;
386	struct lifconf		lifc;
387	struct lifreq		*lp;
388	int			rval;
389	int			numifs;
390	int			bufsize;
391	void			*buf;
392	int			i, j, n, rc;
393	struct sockaddr_storage	ss;
394	struct sockaddr_in	*sin;
395	struct sockaddr_in6	*sin6;
396	idm_addr_t		*ip;
397	idm_addr_list_t		*ipaddr = NULL;
398	int			size_ipaddr;
399
400	*ipaddr_p = NULL;
401	size_ipaddr = 0;
402	buf = NULL;
403
404	/* create an ipv4 and ipv6 UDP socket */
405	if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
406		return (0);
407	if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
408		idm_sodestroy(so6);
409		return (0);
410	}
411
412
413retry_count:
414	/* snapshot the current number of interfaces */
415	lifn.lifn_family = PF_UNSPEC;
416	lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
417	lifn.lifn_count = 0;
418	/* use vp6 for ioctls with unspecified families by default */
419	if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
420	    != 0) {
421		goto cleanup;
422	}
423
424	numifs = lifn.lifn_count;
425	if (numifs <= 0) {
426		goto cleanup;
427	}
428
429	/* allocate extra room in case more interfaces appear */
430	numifs += 10;
431
432	/* get the interface names and ip addresses */
433	bufsize = numifs * sizeof (struct lifreq);
434	buf = kmem_alloc(bufsize, KM_SLEEP);
435
436	lifc.lifc_family = AF_UNSPEC;
437	lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
438	lifc.lifc_len = bufsize;
439	lifc.lifc_buf = buf;
440	rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
441	if (rc != 0) {
442		goto cleanup;
443	}
444	/* if our extra room is used up, try again */
445	if (bufsize <= lifc.lifc_len) {
446		kmem_free(buf, bufsize);
447		buf = NULL;
448		goto retry_count;
449	}
450	/* calc actual number of ifconfs */
451	n = lifc.lifc_len / sizeof (struct lifreq);
452
453	/* get ip address */
454	if (n > 0) {
455		size_ipaddr = sizeof (idm_addr_list_t) +
456		    (n - 1) * sizeof (idm_addr_t);
457		ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
458	} else {
459		goto cleanup;
460	}
461
462	/*
463	 * Examine the array of interfaces and filter uninteresting ones
464	 */
465	for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
466
467		/*
468		 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
469		 */
470		ss = lp->lifr_addr;
471		/*
472		 * fetch the flags using the socket of the correct family
473		 */
474		switch (ss.ss_family) {
475		case AF_INET:
476			rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
477			    &rval, CRED());
478			break;
479		case AF_INET6:
480			rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
481			    &rval, CRED());
482			break;
483		default:
484			continue;
485		}
486		if (rc == 0) {
487			/*
488			 * If we got the flags, skip uninteresting
489			 * interfaces based on flags
490			 */
491			if ((lp->lifr_flags & IFF_UP) != IFF_UP)
492				continue;
493			if (lp->lifr_flags &
494			    (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
495				continue;
496		}
497
498		/* save ip address */
499		ip = &ipaddr->al_addrs[j];
500		switch (ss.ss_family) {
501		case AF_INET:
502			sin = (struct sockaddr_in *)&ss;
503			if (!idm_v4_addr_okay(&sin->sin_addr))
504				continue;
505			ip->a_addr.i_addr.in4 = sin->sin_addr;
506			ip->a_addr.i_insize = sizeof (struct in_addr);
507			break;
508		case AF_INET6:
509			sin6 = (struct sockaddr_in6 *)&ss;
510			if (!idm_v6_addr_okay(&sin6->sin6_addr))
511				continue;
512			ip->a_addr.i_addr.in6 = sin6->sin6_addr;
513			ip->a_addr.i_insize = sizeof (struct in6_addr);
514			break;
515		default:
516			continue;
517		}
518		j++;
519	}
520
521	if (j == 0) {
522		/* no valid ifaddr */
523		kmem_free(ipaddr, size_ipaddr);
524		size_ipaddr = 0;
525		ipaddr = NULL;
526	} else {
527		ipaddr->al_out_cnt = j;
528	}
529
530
531cleanup:
532	idm_sodestroy(so6);
533	idm_sodestroy(so4);
534
535	if (buf != NULL)
536		kmem_free(buf, bufsize);
537
538	*ipaddr_p = ipaddr;
539	return (size_ipaddr);
540}
541
542int
543idm_sorecv(ksocket_t so, void *msg, size_t len)
544{
545	iovec_t iov;
546
547	ASSERT(so != NULL);
548	ASSERT(len != 0);
549
550	/*
551	 * Fill in iovec and receive data
552	 */
553	iov.iov_base = msg;
554	iov.iov_len = len;
555
556	return (idm_iov_sorecv(so, &iov, 1, len));
557}
558
559/*
560 * idm_sosendto - Sends a buffered data on a non-connected socket.
561 *
562 * This function puts the data provided on the wire by calling sosendmsg.
563 * It will return only when all the data has been sent or if an error
564 * occurs.
565 *
566 * Returns 0 for success, the socket errno value if sosendmsg fails, and
567 * -1 if sosendmsg returns success but uio_resid != 0
568 */
569int
570idm_sosendto(ksocket_t so, void *buff, size_t len,
571    struct sockaddr *name, socklen_t namelen)
572{
573	struct msghdr		msg;
574	struct iovec		iov[1];
575	int			error;
576	size_t			sent = 0;
577
578	iov[0].iov_base	= buff;
579	iov[0].iov_len	= len;
580
581	/* Initialization of the message header. */
582	bzero(&msg, sizeof (msg));
583	msg.msg_iov	= iov;
584	msg.msg_iovlen	= 1;
585	msg.msg_name	= name;
586	msg.msg_namelen	= namelen;
587
588	if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
589		/* Data sent */
590		if (sent == len) {
591			/* All data sent.  Success. */
592			return (0);
593		} else {
594			/* Not all data was sent.  Failure */
595			return (-1);
596		}
597	}
598
599	/* Send failed */
600	return (error);
601}
602
603/*
604 * idm_iov_sosend - Sends an iovec on a connection.
605 *
606 * This function puts the data provided on the wire by calling sosendmsg.
607 * It will return only when all the data has been sent or if an error
608 * occurs.
609 *
610 * Returns 0 for success, the socket errno value if sosendmsg fails, and
611 * -1 if sosendmsg returns success but uio_resid != 0
612 */
613int
614idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
615{
616	struct msghdr		msg;
617	int			error;
618	size_t 			sent = 0;
619
620	ASSERT(iop != NULL);
621
622	/* Initialization of the message header. */
623	bzero(&msg, sizeof (msg));
624	msg.msg_iov	= iop;
625	msg.msg_iovlen	= iovlen;
626
627	if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
628	    == 0) {
629		/* Data sent */
630		if (sent == total_len) {
631			/* All data sent.  Success. */
632			return (0);
633		} else {
634			/* Not all data was sent.  Failure */
635			return (-1);
636		}
637	}
638
639	/* Send failed */
640	return (error);
641}
642
643/*
644 * idm_iov_sorecv - Receives an iovec from a connection
645 *
646 * This function gets the data asked for from the socket.  It will return
647 * only when all the requested data has been retrieved or if an error
648 * occurs.
649 *
650 * Returns 0 for success, the socket errno value if sorecvmsg fails, and
651 * -1 if sorecvmsg returns success but uio_resid != 0
652 */
653int
654idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
655{
656	struct msghdr		msg;
657	int			error;
658	size_t			recv;
659	int 			flags;
660
661	ASSERT(iop != NULL);
662
663	/* Initialization of the message header. */
664	bzero(&msg, sizeof (msg));
665	msg.msg_iov	= iop;
666	msg.msg_iovlen	= iovlen;
667	flags		= MSG_WAITALL;
668
669	if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
670	    == 0) {
671		/* Received data */
672		if (recv == total_len) {
673			/* All requested data received.  Success */
674			return (0);
675		} else {
676			/*
677			 * Not all data was received.  The connection has
678			 * probably failed.
679			 */
680			return (-1);
681		}
682	}
683
684	/* Receive failed */
685	return (error);
686}
687
688static void
689idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
690{
691	int	conn_abort = 10000;
692	int	conn_notify = 2000;
693	int	abort = 30000;
694
695	/* Pre-connect socket options */
696	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
697	    TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
698	    CRED());
699	if (boot_conn == B_FALSE) {
700		(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
701		    TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
702		    CRED());
703		(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
704		    TCP_ABORT_THRESHOLD,
705		    (char *)&abort, sizeof (int), CRED());
706	}
707}
708
709static void
710idm_set_ini_postconnect_options(idm_so_conn_t *sc)
711{
712	int32_t		rcvbuf = IDM_RCVBUF_SIZE;
713	int32_t		sndbuf = IDM_SNDBUF_SIZE;
714	const int	on = 1;
715
716	/* Set postconnect options */
717	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_NODELAY,
718	    (char *)&on, sizeof (int), CRED());
719	(void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_RCVBUF,
720	    (char *)&rcvbuf, sizeof (int), CRED());
721	(void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_SNDBUF,
722	    (char *)&sndbuf, sizeof (int), CRED());
723}
724
725static void
726idm_set_tgt_connect_options(ksocket_t ks)
727{
728	int32_t		rcvbuf = IDM_RCVBUF_SIZE;
729	int32_t		sndbuf = IDM_SNDBUF_SIZE;
730	const int	on = 1;
731
732	/* Set connect options */
733	(void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
734	    (char *)&rcvbuf, sizeof (int), CRED());
735	(void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
736	    (char *)&sndbuf, sizeof (int), CRED());
737	(void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
738	    (char *)&on, sizeof (on), CRED());
739}
740
741static uint32_t
742n2h24(const uchar_t *ptr)
743{
744	return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
745}
746
747
748static idm_status_t
749idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
750{
751	iscsi_hdr_t	*bhs;
752	uint32_t	hdr_digest_crc;
753	uint32_t	crc_calculated;
754	void		*new_hdr;
755	int		ahslen = 0;
756	int		total_len = 0;
757	int		iovlen = 0;
758	struct iovec	iov[2];
759	idm_so_conn_t	*so_conn;
760	int		rc;
761
762	so_conn = ic->ic_transport_private;
763
764	/*
765	 * Read BHS
766	 */
767	bhs = pdu->isp_hdr;
768	rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
769	if (rc != IDM_STATUS_SUCCESS) {
770		return (IDM_STATUS_FAIL);
771	}
772
773	/*
774	 * Check actual AHS length against the amount available in the buffer
775	 */
776	pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
777	    (bhs->hlength * sizeof (uint32_t));
778	pdu->isp_datalen = n2h24(bhs->dlength);
779	if (ic->ic_conn_type == CONN_TYPE_TGT &&
780	    pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
781		IDM_CONN_LOG(CE_WARN,
782		    "idm_sorecvhdr: exceeded the max data segment length");
783		return (IDM_STATUS_FAIL);
784	}
785	if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
786		/* Allocate a new header segment and change the callback */
787		new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
788		bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
789		pdu->isp_hdr = new_hdr;
790		pdu->isp_flags |= IDM_PDU_ADDL_HDR;
791
792		/*
793		 * This callback will restore the expected values after
794		 * the RX PDU has been processed.
795		 */
796		pdu->isp_callback = idm_sorx_addl_pdu_cb;
797	}
798
799	/*
800	 * Setup receipt of additional header and header digest (if enabled).
801	 */
802	if (bhs->hlength > 0) {
803		iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
804		ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
805		iov[iovlen].iov_len = ahslen;
806		total_len += iov[iovlen].iov_len;
807		iovlen++;
808	}
809
810	if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
811		iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
812		iov[iovlen].iov_len = sizeof (hdr_digest_crc);
813		total_len += iov[iovlen].iov_len;
814		iovlen++;
815	}
816
817	if ((iovlen != 0) &&
818	    (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
819	    total_len) != 0)) {
820		return (IDM_STATUS_FAIL);
821	}
822
823	/*
824	 * Validate header digest if enabled
825	 */
826	if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
827		crc_calculated = idm_crc32c(pdu->isp_hdr,
828		    sizeof (iscsi_hdr_t) + ahslen);
829		if (crc_calculated != hdr_digest_crc) {
830			/* Invalid Header Digest */
831			return (IDM_STATUS_HEADER_DIGEST);
832		}
833	}
834
835	return (0);
836}
837
838/*
839 * idm_so_ini_conn_create()
840 * Allocate the sockets transport connection resources.
841 */
842static idm_status_t
843idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
844{
845	ksocket_t	so;
846	idm_so_conn_t	*so_conn;
847	idm_status_t	idmrc;
848
849	so = idm_socreate(cr->cr_domain, cr->cr_type,
850	    cr->cr_protocol);
851	if (so == NULL) {
852		return (IDM_STATUS_FAIL);
853	}
854
855	/* Bind the socket if configured to do so */
856	if (cr->cr_bound) {
857		if (ksocket_bind(so, &cr->cr_bound_addr.sin,
858		    SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
859			idm_sodestroy(so);
860			return (IDM_STATUS_FAIL);
861		}
862	}
863
864	idmrc = idm_so_conn_create_common(ic, so);
865	if (idmrc != IDM_STATUS_SUCCESS) {
866		idm_soshutdown(so);
867		idm_sodestroy(so);
868		return (IDM_STATUS_FAIL);
869	}
870
871	so_conn = ic->ic_transport_private;
872	/* Set up socket options */
873	idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
874
875	return (IDM_STATUS_SUCCESS);
876}
877
878/*
879 * idm_so_ini_conn_destroy()
880 * Tear down the sockets transport connection resources.
881 */
882static void
883idm_so_ini_conn_destroy(idm_conn_t *ic)
884{
885	idm_so_conn_destroy_common(ic);
886}
887
888/*
889 * idm_so_ini_conn_connect()
890 * Establish the connection referred to by the handle previously allocated via
891 * idm_so_ini_conn_create().
892 */
893static idm_status_t
894idm_so_ini_conn_connect(idm_conn_t *ic)
895{
896	idm_so_conn_t	*so_conn;
897	struct sonode	*node = NULL;
898	int 		rc;
899	clock_t		lbolt, conn_login_max, conn_login_interval;
900	boolean_t	nonblock;
901
902	so_conn = ic->ic_transport_private;
903	nonblock = ic->ic_conn_params.nonblock_socket;
904	conn_login_max = ic->ic_conn_params.conn_login_max;
905	conn_login_interval = ddi_get_lbolt() +
906	    SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
907
908	if (nonblock == B_TRUE) {
909		node = ((struct sonode *)(so_conn->ic_so));
910		/* Set to none block socket mode */
911		idm_so_socket_set_nonblock(node);
912		do {
913			rc = ksocket_connect(so_conn->ic_so,
914			    &ic->ic_ini_dst_addr.sin,
915			    (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
916			    CRED());
917			if (rc == 0 || rc == EISCONN) {
918				/* socket success or already success */
919				rc = IDM_STATUS_SUCCESS;
920				break;
921			}
922			if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
923			    (rc == ECONNRESET)) {
924				/* socket connection timeout or refuse */
925				break;
926			}
927			lbolt = ddi_get_lbolt();
928			if (lbolt > conn_login_max) {
929				/*
930				 * Connection retry timeout,
931				 * failed connect to target.
932				 */
933				break;
934			}
935			if (lbolt < conn_login_interval) {
936				if ((rc == EINPROGRESS) || (rc == EALREADY)) {
937					/* TCP connect still in progress */
938					delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
939					continue;
940				} else {
941					delay(conn_login_interval - lbolt);
942				}
943			}
944			conn_login_interval = ddi_get_lbolt() +
945			    SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
946		} while (rc != 0);
947		/* resume to nonblock mode */
948		if (rc == IDM_STATUS_SUCCESS) {
949			idm_so_socket_set_block(node);
950		}
951	} else {
952		rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
953		    (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
954	}
955
956	if (rc != 0) {
957		idm_soshutdown(so_conn->ic_so);
958		return (IDM_STATUS_FAIL);
959	}
960
961	idm_so_conn_connect_common(ic);
962
963	idm_set_ini_postconnect_options(so_conn);
964
965	return (IDM_STATUS_SUCCESS);
966}
967
968idm_status_t
969idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
970{
971	idm_status_t	idmrc;
972
973	idmrc = idm_so_conn_create_common(ic, new_so);
974
975	return (idmrc);
976}
977
978static void
979idm_so_tgt_conn_destroy(idm_conn_t *ic)
980{
981	idm_so_conn_destroy_common(ic);
982}
983
984/*
985 * idm_so_tgt_conn_connect()
986 * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
987 * is invoked from the SM as a result of an inbound connection request.
988 */
989static idm_status_t
990idm_so_tgt_conn_connect(idm_conn_t *ic)
991{
992	idm_so_conn_connect_common(ic);
993
994	return (IDM_STATUS_SUCCESS);
995}
996
997static idm_status_t
998idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
999{
1000	idm_so_conn_t	*so_conn;
1001
1002	so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
1003	so_conn->ic_so = new_so;
1004
1005	ic->ic_transport_private = so_conn;
1006	ic->ic_transport_hdrlen = 0;
1007
1008	/* Set the scoreboarding flag on this connection */
1009	ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
1010	ic->ic_conn_params.max_recv_dataseglen =
1011	    ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1012	ic->ic_conn_params.max_xmit_dataseglen =
1013	    ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1014
1015	/*
1016	 * Initialize tx thread mutex and list
1017	 */
1018	mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1019	cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1020	list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1021	    offsetof(idm_pdu_t, idm_tx_link));
1022
1023	return (IDM_STATUS_SUCCESS);
1024}
1025
1026static void
1027idm_so_conn_destroy_common(idm_conn_t *ic)
1028{
1029	idm_so_conn_t	*so_conn = ic->ic_transport_private;
1030
1031	ic->ic_transport_private = NULL;
1032	idm_sodestroy(so_conn->ic_so);
1033	list_destroy(&so_conn->ic_tx_list);
1034	mutex_destroy(&so_conn->ic_tx_mutex);
1035	cv_destroy(&so_conn->ic_tx_cv);
1036
1037	kmem_free(so_conn, sizeof (idm_so_conn_t));
1038}
1039
1040static void
1041idm_so_conn_connect_common(idm_conn_t *ic)
1042{
1043	idm_so_conn_t	*so_conn;
1044	struct sockaddr_in6	t_addr;
1045	socklen_t	t_addrlen = 0;
1046
1047	so_conn = ic->ic_transport_private;
1048	bzero(&t_addr, sizeof (struct sockaddr_in6));
1049	t_addrlen = sizeof (struct sockaddr_in6);
1050
1051	/* Set the local and remote addresses in the idm conn handle */
1052	(void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1053	    &t_addrlen, CRED());
1054	bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1055	(void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1056	    &t_addrlen, CRED());
1057	bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1058
1059	mutex_enter(&ic->ic_mutex);
1060	so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1061	    &p0, TS_RUN, minclsyspri);
1062	so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1063	    &p0, TS_RUN, minclsyspri);
1064
1065	while (so_conn->ic_rx_thread_did == 0 ||
1066	    so_conn->ic_tx_thread_did == 0)
1067		cv_wait(&ic->ic_cv, &ic->ic_mutex);
1068	mutex_exit(&ic->ic_mutex);
1069}
1070
1071/*
1072 * idm_so_conn_disconnect()
1073 * Shutdown the socket connection and stop the thread
1074 */
1075static void
1076idm_so_conn_disconnect(idm_conn_t *ic)
1077{
1078	idm_so_conn_t	*so_conn;
1079
1080	so_conn = ic->ic_transport_private;
1081
1082	mutex_enter(&ic->ic_mutex);
1083	so_conn->ic_rx_thread_running = B_FALSE;
1084	so_conn->ic_tx_thread_running = B_FALSE;
1085	/* We need to wakeup the TX thread */
1086	mutex_enter(&so_conn->ic_tx_mutex);
1087	cv_signal(&so_conn->ic_tx_cv);
1088	mutex_exit(&so_conn->ic_tx_mutex);
1089	mutex_exit(&ic->ic_mutex);
1090
1091	/* This should wakeup the RX thread if it is sleeping */
1092	idm_soshutdown(so_conn->ic_so);
1093
1094	thread_join(so_conn->ic_tx_thread_did);
1095	thread_join(so_conn->ic_rx_thread_did);
1096}
1097
1098/*
1099 * idm_so_tgt_svc_create()
1100 * Establish a service on an IP address and port.  idm_svc_req_t contains
1101 * the service parameters.
1102 */
1103/*ARGSUSED*/
1104static idm_status_t
1105idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1106{
1107	idm_so_svc_t		*so_svc;
1108
1109	so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1110
1111	/* Set the new sockets service in svc handle */
1112	is->is_so_svc = (void *)so_svc;
1113
1114	return (IDM_STATUS_SUCCESS);
1115}
1116
1117/*
1118 * idm_so_tgt_svc_destroy()
1119 * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1120 */
1121static void
1122idm_so_tgt_svc_destroy(idm_svc_t *is)
1123{
1124	/* the socket will have been torn down; free the service */
1125	kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1126}
1127
1128/*
1129 * idm_so_tgt_svc_online()
1130 * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1131 */
1132
1133static idm_status_t
1134idm_so_tgt_svc_online(idm_svc_t *is)
1135{
1136	idm_so_svc_t		*so_svc;
1137	idm_svc_req_t		*sr = &is->is_svc_req;
1138	struct sockaddr_in6	sin6_ip;
1139	const uint32_t		on = 1;
1140	const uint32_t		off = 0;
1141
1142	mutex_enter(&is->is_mutex);
1143	so_svc = (idm_so_svc_t *)is->is_so_svc;
1144
1145	/*
1146	 * Try creating an IPv6 socket first
1147	 */
1148	if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1149		mutex_exit(&is->is_mutex);
1150		return (IDM_STATUS_FAIL);
1151	} else {
1152		bzero(&sin6_ip, sizeof (sin6_ip));
1153		sin6_ip.sin6_family = AF_INET6;
1154		sin6_ip.sin6_port = htons(sr->sr_port);
1155		sin6_ip.sin6_addr = in6addr_any;
1156
1157		(void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1158		    SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1159		/*
1160		 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1161		 */
1162		(void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1163		    SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
1164
1165		if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1166		    sizeof (sin6_ip), CRED()) != 0) {
1167			mutex_exit(&is->is_mutex);
1168			idm_sodestroy(so_svc->is_so);
1169			return (IDM_STATUS_FAIL);
1170		}
1171	}
1172
1173	idm_set_tgt_connect_options(so_svc->is_so);
1174
1175	if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1176		mutex_exit(&is->is_mutex);
1177		idm_soshutdown(so_svc->is_so);
1178		idm_sodestroy(so_svc->is_so);
1179		return (IDM_STATUS_FAIL);
1180	}
1181
1182	/* Launch a watch thread */
1183	so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1184	    is, 0, &p0, TS_RUN, minclsyspri);
1185
1186	if (so_svc->is_thread == NULL) {
1187		/* Failure to launch; teardown the socket */
1188		mutex_exit(&is->is_mutex);
1189		idm_soshutdown(so_svc->is_so);
1190		idm_sodestroy(so_svc->is_so);
1191		return (IDM_STATUS_FAIL);
1192	}
1193	ksocket_hold(so_svc->is_so);
1194	/* Wait for the port watcher thread to start */
1195	while (!so_svc->is_thread_running)
1196		cv_wait(&is->is_cv, &is->is_mutex);
1197	mutex_exit(&is->is_mutex);
1198
1199	return (IDM_STATUS_SUCCESS);
1200}
1201
1202/*
1203 * idm_so_tgt_svc_offline
1204 *
1205 * Stop listening on the IP address and port identified by idm_svc_t.
1206 */
1207static void
1208idm_so_tgt_svc_offline(idm_svc_t *is)
1209{
1210	idm_so_svc_t		*so_svc;
1211	mutex_enter(&is->is_mutex);
1212	so_svc = (idm_so_svc_t *)is->is_so_svc;
1213	so_svc->is_thread_running = B_FALSE;
1214	mutex_exit(&is->is_mutex);
1215
1216	/*
1217	 * Teardown socket
1218	 */
1219	idm_sodestroy(so_svc->is_so);
1220
1221	/*
1222	 * Now we expect the port watcher thread to terminate
1223	 */
1224	thread_join(so_svc->is_thread_did);
1225}
1226
1227/*
1228 * Watch thread for target service connection establishment.
1229 */
1230void
1231idm_so_svc_port_watcher(void *arg)
1232{
1233	idm_svc_t		*svc = arg;
1234	ksocket_t		new_so;
1235	idm_conn_t		*ic;
1236	idm_status_t		idmrc;
1237	idm_so_svc_t		*so_svc;
1238	int			rc;
1239	const uint32_t		off = 0;
1240	struct sockaddr_in6 	t_addr;
1241	socklen_t		t_addrlen;
1242
1243	bzero(&t_addr, sizeof (struct sockaddr_in6));
1244	t_addrlen = sizeof (struct sockaddr_in6);
1245	mutex_enter(&svc->is_mutex);
1246
1247	so_svc = svc->is_so_svc;
1248	so_svc->is_thread_running = B_TRUE;
1249	so_svc->is_thread_did = so_svc->is_thread->t_did;
1250
1251	cv_signal(&svc->is_cv);
1252
1253	IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1254	    svc->is_svc_req.sr_port);
1255
1256	while (so_svc->is_thread_running) {
1257		mutex_exit(&svc->is_mutex);
1258
1259		if ((rc = ksocket_accept(so_svc->is_so,
1260		    (struct sockaddr *)&t_addr, &t_addrlen,
1261		    &new_so, CRED())) != 0) {
1262			mutex_enter(&svc->is_mutex);
1263			if (rc == ECONNABORTED)
1264				continue;
1265			/* Connection problem */
1266			break;
1267		}
1268		/*
1269		 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1270		 */
1271		(void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1272		    (char *)&off, sizeof (off), CRED());
1273
1274		idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1275		    &ic);
1276		if (idmrc != IDM_STATUS_SUCCESS) {
1277			/* Drop connection */
1278			idm_soshutdown(new_so);
1279			idm_sodestroy(new_so);
1280			mutex_enter(&svc->is_mutex);
1281			continue;
1282		}
1283
1284		idmrc = idm_so_tgt_conn_create(ic, new_so);
1285		if (idmrc != IDM_STATUS_SUCCESS) {
1286			idm_svc_conn_destroy(ic);
1287			idm_soshutdown(new_so);
1288			idm_sodestroy(new_so);
1289			mutex_enter(&svc->is_mutex);
1290			continue;
1291		}
1292
1293		/*
1294		 * Kick the state machine.  At CS_S3_XPT_UP the state machine
1295		 * will notify the client (target) about the new connection.
1296		 */
1297		idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1298
1299		mutex_enter(&svc->is_mutex);
1300	}
1301	ksocket_rele(so_svc->is_so);
1302	so_svc->is_thread_running = B_FALSE;
1303	mutex_exit(&svc->is_mutex);
1304
1305	IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1306	    svc->is_svc_req.sr_port);
1307
1308	thread_exit();
1309}
1310
1311/*
1312 * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1313 * frees resources associated with the task.
1314 *
1315 * It's not clear that this should return idm_status_t.  What do we do
1316 * if it fails?
1317 */
1318static idm_status_t
1319idm_so_free_task_rsrc(idm_task_t *idt)
1320{
1321	idm_buf_t	*idb, *next_idb;
1322
1323	/*
1324	 * There is nothing to cleanup on initiator connections
1325	 */
1326	if (IDM_CONN_ISINI(idt->idt_ic))
1327		return (IDM_STATUS_SUCCESS);
1328
1329	/*
1330	 * If this is a target connection, call idm_buf_rx_from_ini_done for
1331	 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1332	 *
1333	 * In addition, remove any buffers associated with this task from
1334	 * the ic_tx_list.  We'll do this by walking the idt_inbufv list, but
1335	 * items don't actually get removed from that list (and completion
1336	 * routines called) until idm_task_cleanup.
1337	 */
1338	mutex_enter(&idt->idt_mutex);
1339
1340	for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
1341		next_idb = list_next(&idt->idt_outbufv, idb);
1342		if (idb->idb_in_transport) {
1343			/*
1344			 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1345			 */
1346			DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1347			    uintptr_t, idb->idb_buf,
1348			    uint32_t, idb->idb_bufoffset,
1349			    uint64_t, 0, uint32_t, 0, uint32_t, 0,
1350			    uint32_t, idb->idb_xfer_len,
1351			    int, XFER_BUF_RX_FROM_INI);
1352			idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1353			mutex_enter(&idt->idt_mutex);
1354		}
1355	}
1356
1357	for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
1358		next_idb = list_next(&idt->idt_inbufv, idb);
1359		/*
1360		 * We want to remove these items from the tx_list as well,
1361		 * but knowing it's in the idt_inbufv list is not a guarantee
1362		 * that it's in the tx_list.  If it's on the tx list then
1363		 * let idm_sotx_thread() clean it up.
1364		 */
1365		if (idb->idb_in_transport && !idb->idb_tx_thread) {
1366			/*
1367			 * idm_buf_tx_to_ini_done releases idt->idt_mutex
1368			 */
1369			DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1370			    uintptr_t, idb->idb_buf,
1371			    uint32_t, idb->idb_bufoffset,
1372			    uint64_t, 0, uint32_t, 0, uint32_t, 0,
1373			    uint32_t, idb->idb_xfer_len,
1374			    int, XFER_BUF_TX_TO_INI);
1375			idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1376			mutex_enter(&idt->idt_mutex);
1377		}
1378	}
1379
1380	mutex_exit(&idt->idt_mutex);
1381
1382	return (IDM_STATUS_SUCCESS);
1383}
1384
1385/*
1386 * idm_so_negotiate_key_values() validates the key values for this connection
1387 */
1388/* ARGSUSED */
1389static kv_status_t
1390idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1391    nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1392{
1393	/* All parameters are negotiated at the iscsit level */
1394	return (KV_HANDLED);
1395}
1396
1397/*
1398 * idm_so_notice_key_values() activates the negotiated key values for
1399 * this connection.
1400 */
1401static void
1402idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1403{
1404	char			*nvp_name;
1405	nvpair_t		*nvp;
1406	nvpair_t		*next_nvp;
1407	int			nvrc;
1408	idm_status_t		idm_status;
1409	const idm_kv_xlate_t	*ikvx;
1410	uint64_t		num_val;
1411
1412	for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1413	    nvp != NULL; nvp = next_nvp) {
1414		next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1415		nvp_name = nvpair_name(nvp);
1416
1417		ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1418		switch (ikvx->ik_key_id) {
1419		case KI_HEADER_DIGEST:
1420		case KI_DATA_DIGEST:
1421			idm_status = idm_so_handle_digest(it, nvp, ikvx);
1422			ASSERT(idm_status == 0);
1423
1424			/* Remove processed item from negotiated_nvl list */
1425			nvrc = nvlist_remove_all(
1426			    negotiated_nvl, ikvx->ik_key_name);
1427			ASSERT(nvrc == 0);
1428			break;
1429		case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1430			/*
1431			 * Just pass the value down to idm layer.
1432			 * No need to remove it from negotiated_nvl list here.
1433			 */
1434			nvrc = nvpair_value_uint64(nvp, &num_val);
1435			ASSERT(nvrc == 0);
1436			it->ic_conn_params.max_xmit_dataseglen =
1437			    (uint32_t)num_val;
1438			break;
1439		default:
1440			break;
1441		}
1442	}
1443}
1444
1445/*
1446 * idm_so_declare_key_values() declares the key values for this connection
1447 */
1448/* ARGSUSED */
1449static kv_status_t
1450idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1451    nvlist_t *outgoing_nvl)
1452{
1453	char			*nvp_name;
1454	nvpair_t		*nvp;
1455	nvpair_t		*next_nvp;
1456	kv_status_t		kvrc;
1457	int			nvrc = 0;
1458	const idm_kv_xlate_t	*ikvx;
1459	uint64_t		num_val;
1460
1461	for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1462	    nvp != NULL && nvrc == 0; nvp = next_nvp) {
1463		next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1464		nvp_name = nvpair_name(nvp);
1465
1466		ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1467		switch (ikvx->ik_key_id) {
1468		case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1469			if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1470				break;
1471			}
1472			if (outgoing_nvl &&
1473			    (nvrc = nvlist_add_uint64(outgoing_nvl,
1474			    nvp_name, num_val)) != 0) {
1475				break;
1476			}
1477			it->ic_conn_params.max_recv_dataseglen =
1478			    (uint32_t)num_val;
1479			break;
1480		default:
1481			break;
1482		}
1483	}
1484	kvrc = idm_nvstat_to_kvstat(nvrc);
1485	return (kvrc);
1486}
1487
1488static idm_status_t
1489idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1490    const idm_kv_xlate_t *ikvx)
1491{
1492	int			nvrc;
1493	char			*digest_choice_string;
1494
1495	nvrc = nvpair_value_string(digest_choice,
1496	    &digest_choice_string);
1497	ASSERT(nvrc == 0);
1498	if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1499		switch (ikvx->ik_key_id) {
1500		case KI_HEADER_DIGEST:
1501			it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1502			break;
1503		case KI_DATA_DIGEST:
1504			it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1505			break;
1506		default:
1507			ASSERT(0);
1508			break;
1509		}
1510	} else if (strcasecmp(digest_choice_string, "none") == 0) {
1511		switch (ikvx->ik_key_id) {
1512		case KI_HEADER_DIGEST:
1513			it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1514			break;
1515		case KI_DATA_DIGEST:
1516			it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1517			break;
1518		default:
1519			ASSERT(0);
1520			break;
1521		}
1522	} else {
1523		ASSERT(0);
1524	}
1525
1526	return (IDM_STATUS_SUCCESS);
1527}
1528
1529
1530/*
1531 * idm_so_conn_is_capable() verifies that the passed connection is provided
1532 * for by the sockets interface.
1533 */
1534/* ARGSUSED */
1535static boolean_t
1536idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1537{
1538	return (B_TRUE);
1539}
1540
1541/*
1542 * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1543 * idm_sorecv_scsidata() function invoked earlier actually reads the data
1544 * off the socket into the appropriate buffers.
1545 */
1546static void
1547idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1548{
1549	iscsi_data_hdr_t	*bhs;
1550	idm_task_t		*idt;
1551	idm_buf_t		*idb;
1552	uint32_t		datasn;
1553	size_t			offset;
1554	iscsi_hdr_t		*ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1555	iscsi_data_rsp_hdr_t    *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1556
1557	ASSERT(ic != NULL);
1558	ASSERT(pdu != NULL);
1559
1560	bhs	= (iscsi_data_hdr_t *)pdu->isp_hdr;
1561	datasn	= ntohl(bhs->datasn);
1562	offset	= ntohl(bhs->offset);
1563
1564	ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP);
1565
1566	/*
1567	 * Look up the task corresponding to the initiator task tag
1568	 * to get the buffers affiliated with the task.
1569	 */
1570	idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1571	if (idt == NULL) {
1572		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1573		idm_pdu_rx_protocol_error(ic, pdu);
1574		return;
1575	}
1576
1577	idb = pdu->isp_sorx_buf;
1578	if (idb == NULL) {
1579		IDM_CONN_LOG(CE_WARN,
1580		    "idm_so_rx_datain: failed to find buffer");
1581		idm_task_rele(idt);
1582		idm_pdu_rx_protocol_error(ic, pdu);
1583		return;
1584	}
1585
1586	/*
1587	 * DataSN values should be sequential and should not have any gaps or
1588	 * repetitions. Check the DataSN with the one stored in the task.
1589	 */
1590	if (datasn == idt->idt_exp_datasn) {
1591		idt->idt_exp_datasn++; /* keep track of DataSN received */
1592	} else {
1593		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1594		idm_task_rele(idt);
1595		idm_pdu_rx_protocol_error(ic, pdu);
1596		return;
1597	}
1598
1599	/*
1600	 * PDUs in a sequence should be in continuously increasing
1601	 * address offset
1602	 */
1603	if (offset != idb->idb_exp_offset) {
1604		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1605		idm_task_rele(idt);
1606		idm_pdu_rx_protocol_error(ic, pdu);
1607		return;
1608	}
1609	/* Expected next relative buffer offset */
1610	idb->idb_exp_offset += n2h24(bhs->dlength);
1611	idt->idt_rx_bytes += n2h24(bhs->dlength);
1612
1613	idm_task_rele(idt);
1614
1615	/*
1616	 * For now call scsi_rsp which will process the data rsp
1617	 * Revisit, need to provide an explicit client entry point for
1618	 * phase collapse completions.
1619	 */
1620	if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) &&
1621	    (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1622		(*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1623	}
1624
1625	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1626}
1627
1628/*
1629 * The idm_so_rx_dataout() function is used by the iSCSI target to read
1630 * data from the Data-Out PDU sent by the iSCSI initiator.
1631 *
1632 * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1633 * task to get the buffers associated with the PDU. A PDU might span buffers.
1634 * The data is then read into the respective buffer.
1635 */
1636static void
1637idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1638{
1639
1640	iscsi_data_hdr_t	*bhs;
1641	idm_task_t		*idt;
1642	idm_buf_t		*idb;
1643	size_t			offset;
1644
1645	ASSERT(ic != NULL);
1646	ASSERT(pdu != NULL);
1647
1648	bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1649	offset = ntohl(bhs->offset);
1650	ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA);
1651
1652	/*
1653	 * Look up the task corresponding to the initiator task tag
1654	 * to get the buffers affiliated with the task.
1655	 */
1656	idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1657	if (idt == NULL) {
1658		IDM_CONN_LOG(CE_WARN,
1659		    "idm_so_rx_dataout: failed to find task");
1660		idm_pdu_rx_protocol_error(ic, pdu);
1661		return;
1662	}
1663
1664	idb = pdu->isp_sorx_buf;
1665	if (idb == NULL) {
1666		IDM_CONN_LOG(CE_WARN,
1667		    "idm_so_rx_dataout: failed to find buffer");
1668		idm_task_rele(idt);
1669		idm_pdu_rx_protocol_error(ic, pdu);
1670		return;
1671	}
1672
1673	/* Keep track of data transferred - check data offsets */
1674	if (offset != idb->idb_exp_offset) {
1675		IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1676		    "%ld, %d", offset, idb->idb_exp_offset);
1677		idm_task_rele(idt);
1678		idm_pdu_rx_protocol_error(ic, pdu);
1679		return;
1680	}
1681	/* Expected next relative offset */
1682	idb->idb_exp_offset += ntoh24(bhs->dlength);
1683	idt->idt_rx_bytes += n2h24(bhs->dlength);
1684
1685	/*
1686	 * Call the buffer callback when the transfer is complete
1687	 *
1688	 * The connection state machine should only abort tasks after
1689	 * shutting down the connection so we are assured that there
1690	 * won't be a simultaneous attempt to abort this task at the
1691	 * same time as we are processing this PDU (due to a connection
1692	 * state change).
1693	 */
1694	if (bhs->flags & ISCSI_FLAG_FINAL) {
1695		/*
1696		 * We only want to call idm_buf_rx_from_ini_done once
1697		 * per transfer.  It's possible that this task has
1698		 * already been aborted in which case
1699		 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1700		 * for each buffer with idb_in_transport==B_TRUE.  To
1701		 * close this window and ensure that this doesn't happen,
1702		 * we'll clear idb->idb_in_transport now while holding
1703		 * the task mutex.   This is only really an issue for
1704		 * SCSI task abort -- if tasks were being aborted because
1705		 * of a connection state change the state machine would
1706		 * have already stopped the receive thread.
1707		 */
1708		mutex_enter(&idt->idt_mutex);
1709
1710		/*
1711		 * Release the task hold here (obtained in idm_task_find)
1712		 * because the task may complete synchronously during
1713		 * idm_buf_rx_from_ini_done.  Since we still have an active
1714		 * buffer we know there is at least one additional hold on idt.
1715		 */
1716		idm_task_rele(idt);
1717
1718		/*
1719		 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1720		 */
1721		DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1722		    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1723		    uint64_t, 0, uint32_t, 0, uint32_t, 0,
1724		    uint32_t, idb->idb_xfer_len,
1725		    int, XFER_BUF_RX_FROM_INI);
1726		idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1727		idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1728		return;
1729	}
1730
1731	idm_task_rele(idt);
1732	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1733}
1734
1735/*
1736 * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1737 * the R2T PDU sent by the iSCSI target indicating that it is ready to
1738 * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1739 * and looks up the task in the task tree using the itt to get the output
1740 * buffers associated the task. The R2T PDU contains the offset of the
1741 * requested data and the data length. This function then constructs a
1742 * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1743 * PDU is associated with the R2T by the Target Transfer Tag  (ttt).
1744 */
1745
1746static void
1747idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1748{
1749	idm_task_t		*idt;
1750	idm_buf_t		*idb;
1751	iscsi_rtt_hdr_t		*rtt_hdr;
1752	uint32_t		data_offset;
1753	uint32_t		data_length;
1754
1755	ASSERT(ic != NULL);
1756	ASSERT(pdu != NULL);
1757
1758	rtt_hdr	= (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1759	data_offset = ntohl(rtt_hdr->data_offset);
1760	data_length = ntohl(rtt_hdr->data_length);
1761	idt	= idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1762
1763	if (idt == NULL) {
1764		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1765		idm_pdu_rx_protocol_error(ic, pdu);
1766		return;
1767	}
1768
1769	/* Find the buffer bound to the task by the iSCSI initiator */
1770	mutex_enter(&idt->idt_mutex);
1771	idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1772	if (idb == NULL) {
1773		mutex_exit(&idt->idt_mutex);
1774		idm_task_rele(idt);
1775		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1776		idm_pdu_rx_protocol_error(ic, pdu);
1777		return;
1778	}
1779
1780	/* return buffer contains this data */
1781	if (data_offset + data_length > idb->idb_buflen) {
1782		/* Overflow */
1783		mutex_exit(&idt->idt_mutex);
1784		idm_task_rele(idt);
1785		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1786		    "buffer");
1787		idm_pdu_rx_protocol_error(ic, pdu);
1788		return;
1789	}
1790
1791	idt->idt_r2t_ttt = rtt_hdr->ttt;
1792	idt->idt_exp_datasn = 0;
1793
1794	idm_so_send_rtt_data(ic, idt, idb, data_offset,
1795	    ntohl(rtt_hdr->data_length));
1796	/*
1797	 * the idt_mutex is released in idm_so_send_rtt_data
1798	 */
1799
1800	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1801	idm_task_rele(idt);
1802
1803}
1804
1805idm_status_t
1806idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1807{
1808	uint8_t		pad[ISCSI_PAD_WORD_LEN];
1809	int		pad_len;
1810	uint32_t	data_digest_crc;
1811	uint32_t	crc_calculated;
1812	int		total_len;
1813	idm_so_conn_t	*so_conn;
1814
1815	so_conn = ic->ic_transport_private;
1816
1817	pad_len = ((ISCSI_PAD_WORD_LEN -
1818	    (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1819	    (ISCSI_PAD_WORD_LEN - 1));
1820
1821	ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1822
1823	total_len = pdu->isp_datalen;
1824
1825	if (pad_len) {
1826		pdu->isp_iov[pdu->isp_iovlen].iov_base	= (char *)&pad;
1827		pdu->isp_iov[pdu->isp_iovlen].iov_len	= pad_len;
1828		total_len		+= pad_len;
1829		pdu->isp_iovlen++;
1830	}
1831
1832	/* setup data digest */
1833	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1834		pdu->isp_iov[pdu->isp_iovlen].iov_base =
1835		    (char *)&data_digest_crc;
1836		pdu->isp_iov[pdu->isp_iovlen].iov_len =
1837		    sizeof (data_digest_crc);
1838		total_len		+= sizeof (data_digest_crc);
1839		pdu->isp_iovlen++;
1840	}
1841
1842	pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1843
1844	if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1845	    pdu->isp_iovlen, total_len) != 0) {
1846		return (IDM_STATUS_IO);
1847	}
1848
1849	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1850		crc_calculated = idm_crc32c(pdu->isp_data,
1851		    pdu->isp_datalen);
1852		if (pad_len) {
1853			crc_calculated = idm_crc32c_continued((char *)&pad,
1854			    pad_len, crc_calculated);
1855		}
1856		if (crc_calculated != data_digest_crc) {
1857			IDM_CONN_LOG(CE_WARN,
1858			    "idm_sorecvdata: "
1859			    "CRC error: actual 0x%x, calc 0x%x",
1860			    data_digest_crc, crc_calculated);
1861
1862			/* Invalid Data Digest */
1863			return (IDM_STATUS_DATA_DIGEST);
1864		}
1865	}
1866
1867	return (IDM_STATUS_SUCCESS);
1868}
1869
1870/*
1871 * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1872 * Data-type PDU header must be read into the idm_pdu_t structure prior to
1873 * calling this function.
1874 */
1875idm_status_t
1876idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1877{
1878	iscsi_data_hdr_t	*bhs;
1879	idm_task_t		*task;
1880	uint32_t		offset;
1881	uint8_t			opcode;
1882	uint32_t		dlength;
1883	list_t			*buflst;
1884	uint32_t		xfer_bytes;
1885	idm_status_t		status;
1886
1887	ASSERT(ic != NULL);
1888	ASSERT(pdu != NULL);
1889
1890	bhs	= (iscsi_data_hdr_t *)pdu->isp_hdr;
1891
1892	offset	= ntohl(bhs->offset);
1893	opcode	= bhs->opcode;
1894	dlength = n2h24(bhs->dlength);
1895
1896	ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
1897	    (opcode == ISCSI_OP_SCSI_DATA));
1898
1899	/*
1900	 * Successful lookup implicitly gets a "hold" on the task.  This
1901	 * hold must be released before leaving this function.  At one
1902	 * point we were caching this task context and retaining the hold
1903	 * but it turned out to be very difficult to release the hold properly.
1904	 * The task can be aborted and the connection shutdown between this
1905	 * call and the subsequent expected call to idm_so_rx_datain/
1906	 * idm_so_rx_dataout (in which case those functions are not called).
1907	 * Releasing the hold in the PDU callback doesn't work well either
1908	 * because the whole task may be completed by then at which point
1909	 * it is too late to release the hold -- for better or worse this
1910	 * code doesn't wait on the refcnts during normal operation.
1911	 * idm_task_find() is very fast and it is not a huge burden if we
1912	 * have to do it twice.
1913	 */
1914	task = idm_task_find(ic, bhs->itt, bhs->ttt);
1915	if (task == NULL) {
1916		IDM_CONN_LOG(CE_WARN,
1917		    "idm_sorecv_scsidata: could not find task");
1918		return (IDM_STATUS_FAIL);
1919	}
1920
1921	mutex_enter(&task->idt_mutex);
1922	buflst	= (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
1923	    &task->idt_inbufv : &task->idt_outbufv;
1924	pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
1925	mutex_exit(&task->idt_mutex);
1926
1927	if (pdu->isp_sorx_buf == NULL) {
1928		idm_task_rele(task);
1929		IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
1930		    "buffer for offset %x opcode=%x",
1931		    offset, opcode);
1932		return (IDM_STATUS_FAIL);
1933	}
1934
1935	xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
1936	ASSERT(xfer_bytes != 0);
1937	if (xfer_bytes != dlength) {
1938		idm_task_rele(task);
1939		/*
1940		 * Buffer overflow, connection error.  The PDU data is still
1941		 * sitting in the socket so we can't use the connection
1942		 * again until that data is drained.
1943		 */
1944		return (IDM_STATUS_FAIL);
1945	}
1946
1947	status = idm_sorecvdata(ic, pdu);
1948
1949	idm_task_rele(task);
1950
1951	return (status);
1952}
1953
1954static uint32_t
1955idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
1956{
1957	uint32_t	buf_ro = ro - idb->idb_bufoffset;
1958	uint32_t	xfer_len = min(dlength, idb->idb_buflen - buf_ro);
1959
1960	ASSERT(ro >= idb->idb_bufoffset);
1961
1962	pdu->isp_iov[pdu->isp_iovlen].iov_base	=
1963	    (caddr_t)idb->idb_buf + buf_ro;
1964	pdu->isp_iov[pdu->isp_iovlen].iov_len	= xfer_len;
1965	pdu->isp_iovlen++;
1966
1967	return (xfer_len);
1968}
1969
1970int
1971idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1972{
1973	pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
1974	ASSERT(pdu->isp_data != NULL);
1975
1976	pdu->isp_databuflen = pdu->isp_datalen;
1977	pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
1978	pdu->isp_iov[0].iov_len = pdu->isp_datalen;
1979	pdu->isp_iovlen = 1;
1980	/*
1981	 * Since we are associating a new data buffer with this received
1982	 * PDU we need to set a specific callback to free the data
1983	 * after the PDU is processed.
1984	 */
1985	pdu->isp_flags |= IDM_PDU_ADDL_DATA;
1986	pdu->isp_callback = idm_sorx_addl_pdu_cb;
1987
1988	return (idm_sorecvdata(ic, pdu));
1989}
1990
1991void
1992idm_sorx_thread(void *arg)
1993{
1994	boolean_t	conn_failure = B_FALSE;
1995	idm_conn_t	*ic = (idm_conn_t *)arg;
1996	idm_so_conn_t	*so_conn;
1997	idm_pdu_t	*pdu;
1998	idm_status_t	rc;
1999
2000	idm_conn_hold(ic);
2001
2002	mutex_enter(&ic->ic_mutex);
2003
2004	so_conn = ic->ic_transport_private;
2005	so_conn->ic_rx_thread_running = B_TRUE;
2006	so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
2007	cv_signal(&ic->ic_cv);
2008
2009	while (so_conn->ic_rx_thread_running) {
2010		mutex_exit(&ic->ic_mutex);
2011
2012		/*
2013		 * Get PDU with default header size (large enough for
2014		 * BHS plus any anticipated AHS).  PDU from
2015		 * the cache will have all values set correctly
2016		 * for sockets RX including callback.
2017		 */
2018		pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2019		pdu->isp_ic = ic;
2020		pdu->isp_flags = 0;
2021		pdu->isp_transport_hdrlen = 0;
2022
2023		if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2024			/*
2025			 * Call idm_pdu_complete so that we call the callback
2026			 * and ensure any memory allocated in idm_sorecvhdr
2027			 * gets freed up.
2028			 */
2029			idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2030
2031			/*
2032			 * If ic_rx_thread_running is still set then
2033			 * this is some kind of connection problem
2034			 * on the socket.  In this case we want to
2035			 * generate an event.  Otherwise some other
2036			 * thread closed the socket due to another
2037			 * issue in which case we don't need to
2038			 * generate an event.
2039			 */
2040			mutex_enter(&ic->ic_mutex);
2041			if (so_conn->ic_rx_thread_running) {
2042				conn_failure = B_TRUE;
2043				so_conn->ic_rx_thread_running = B_FALSE;
2044			}
2045
2046			continue;
2047		}
2048
2049		/*
2050		 * Header has been read and validated.  Now we need
2051		 * to read the PDU data payload (if present).  SCSI data
2052		 * need to be transferred from the socket directly into
2053		 * the associated transfer buffer for the SCSI task.
2054		 */
2055		if (pdu->isp_datalen != 0) {
2056			if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2057			    (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2058				rc = idm_sorecv_scsidata(ic, pdu);
2059				/*
2060				 * All SCSI errors are fatal to the
2061				 * connection right now since we have no
2062				 * place to put the data.  What we need
2063				 * is some kind of sink to dispose of unwanted
2064				 * SCSI data.  For example an invalid task tag
2065				 * should not kill the connection (although
2066				 * we may want to drop the connection).
2067				 */
2068			} else {
2069				/*
2070				 * Not data PDUs so allocate a buffer for the
2071				 * data segment and read the remaining data.
2072				 */
2073				rc = idm_sorecv_nonscsidata(ic, pdu);
2074			}
2075			if (rc != 0) {
2076				/*
2077				 * Call idm_pdu_complete so that we call the
2078				 * callback and ensure any memory allocated
2079				 * in idm_sorecvhdr gets freed up.
2080				 */
2081				idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2082
2083				/*
2084				 * If ic_rx_thread_running is still set then
2085				 * this is some kind of connection problem
2086				 * on the socket.  In this case we want to
2087				 * generate an event.  Otherwise some other
2088				 * thread closed the socket due to another
2089				 * issue in which case we don't need to
2090				 * generate an event.
2091				 */
2092				mutex_enter(&ic->ic_mutex);
2093				if (so_conn->ic_rx_thread_running) {
2094					conn_failure = B_TRUE;
2095					so_conn->ic_rx_thread_running = B_FALSE;
2096				}
2097				continue;
2098			}
2099		}
2100
2101		/*
2102		 * Process RX PDU
2103		 */
2104		idm_pdu_rx(ic, pdu);
2105
2106		mutex_enter(&ic->ic_mutex);
2107	}
2108
2109	mutex_exit(&ic->ic_mutex);
2110
2111	/*
2112	 * If we dropped out of the RX processing loop because of
2113	 * a socket problem or other connection failure (including
2114	 * digest errors) then we need to generate a state machine
2115	 * event to shut the connection down.
2116	 * If the state machine is already in, for example, INIT_ERROR, this
2117	 * event will get dropped, and the TX thread will never be notified
2118	 * to shut down.  To be safe, we'll just notify it here.
2119	 */
2120	if (conn_failure) {
2121		if (so_conn->ic_tx_thread_running) {
2122			so_conn->ic_tx_thread_running = B_FALSE;
2123			mutex_enter(&so_conn->ic_tx_mutex);
2124			cv_signal(&so_conn->ic_tx_cv);
2125			mutex_exit(&so_conn->ic_tx_mutex);
2126		}
2127
2128		idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2129	}
2130
2131	idm_conn_rele(ic);
2132
2133	thread_exit();
2134}
2135
2136/*
2137 * idm_so_tx
2138 *
2139 * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2140 * point.  By definition, it is supposed to be fast.  So, simply queue
2141 * the entry and return.  The real work is done by idm_i_so_tx() via
2142 * idm_sotx_thread().
2143 */
2144
2145static void
2146idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2147{
2148	idm_so_conn_t *so_conn = ic->ic_transport_private;
2149
2150	ASSERT(pdu->isp_ic == ic);
2151	mutex_enter(&so_conn->ic_tx_mutex);
2152
2153	if (!so_conn->ic_tx_thread_running) {
2154		mutex_exit(&so_conn->ic_tx_mutex);
2155		idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2156		return;
2157	}
2158
2159	list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2160	cv_signal(&so_conn->ic_tx_cv);
2161	mutex_exit(&so_conn->ic_tx_mutex);
2162}
2163
2164static idm_status_t
2165idm_i_so_tx(idm_pdu_t *pdu)
2166{
2167	idm_conn_t	*ic = pdu->isp_ic;
2168	idm_status_t	status = IDM_STATUS_SUCCESS;
2169	uint8_t		pad[ISCSI_PAD_WORD_LEN];
2170	int		pad_len;
2171	uint32_t	hdr_digest_crc;
2172	uint32_t	data_digest_crc = 0;
2173	int		total_len = 0;
2174	int		iovlen = 0;
2175	struct iovec	iov[6];
2176	idm_so_conn_t	*so_conn;
2177
2178	so_conn = ic->ic_transport_private;
2179
2180	/* Setup BHS */
2181	iov[iovlen].iov_base	= (caddr_t)pdu->isp_hdr;
2182	iov[iovlen].iov_len	= pdu->isp_hdrlen;
2183	total_len		+= iov[iovlen].iov_len;
2184	iovlen++;
2185
2186	/* Setup header digest */
2187	if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2188	    (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2189		hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2190
2191		iov[iovlen].iov_base	= (caddr_t)&hdr_digest_crc;
2192		iov[iovlen].iov_len	= sizeof (hdr_digest_crc);
2193		total_len		+= iov[iovlen].iov_len;
2194		iovlen++;
2195	}
2196
2197	/* Setup the data */
2198	if (pdu->isp_datalen) {
2199		idm_task_t		*idt;
2200		idm_buf_t		*idb;
2201		iscsi_data_hdr_t	*ihp;
2202		ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2203		/* Write of immediate data */
2204		if (ic->ic_ffp &&
2205		    (ihp->opcode == ISCSI_OP_SCSI_CMD ||
2206		    ihp->opcode == ISCSI_OP_SCSI_DATA)) {
2207			idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2208			if (idt) {
2209				mutex_enter(&idt->idt_mutex);
2210				idb = idm_buf_find(&idt->idt_outbufv, 0);
2211				mutex_exit(&idt->idt_mutex);
2212				/*
2213				 * If the initiator call to idm_buf_alloc
2214				 * failed then we can get to this point
2215				 * without a bound buffer.  The associated
2216				 * connection failure will clean things up
2217				 * later.  It would be nice to come up with
2218				 * a cleaner way to handle this.  In
2219				 * particular it seems absurd to look up
2220				 * the task and the buffer just to update
2221				 * this counter.
2222				 */
2223				if (idb)
2224					idb->idb_xfer_len += pdu->isp_datalen;
2225				idm_task_rele(idt);
2226			}
2227		}
2228
2229		iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2230		iov[iovlen].iov_len  = pdu->isp_datalen;
2231		total_len += iov[iovlen].iov_len;
2232		iovlen++;
2233	}
2234
2235	/* Setup the data pad if necessary */
2236	pad_len = ((ISCSI_PAD_WORD_LEN -
2237	    (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2238	    (ISCSI_PAD_WORD_LEN - 1));
2239
2240	if (pad_len) {
2241		bzero(pad, sizeof (pad));
2242		iov[iovlen].iov_base = (void *)&pad;
2243		iov[iovlen].iov_len  = pad_len;
2244		total_len		+= iov[iovlen].iov_len;
2245		iovlen++;
2246	}
2247
2248	/*
2249	 * Setup the data digest if enabled.  Data-digest is not sent
2250	 * for login-phase PDUs.
2251	 */
2252	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2253	    ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2254	    (pdu->isp_datalen || pad_len)) {
2255		/*
2256		 * RFC3720/10.2.3: A zero-length Data Segment also
2257		 * implies a zero-length data digest.
2258		 */
2259		if (pdu->isp_datalen) {
2260			data_digest_crc = idm_crc32c(pdu->isp_data,
2261			    pdu->isp_datalen);
2262		}
2263		if (pad_len) {
2264			data_digest_crc = idm_crc32c_continued(&pad,
2265			    pad_len, data_digest_crc);
2266		}
2267
2268		iov[iovlen].iov_base	= (caddr_t)&data_digest_crc;
2269		iov[iovlen].iov_len	= sizeof (data_digest_crc);
2270		total_len		+= iov[iovlen].iov_len;
2271		iovlen++;
2272	}
2273
2274	/* Transmit the PDU */
2275	if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2276	    total_len) != 0) {
2277		/* Set error status */
2278		IDM_CONN_LOG(CE_WARN,
2279		    "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2280		    "data: %p", (void *) so_conn->ic_so, (void *) ic,
2281		    (void *) pdu->isp_data);
2282		status = IDM_STATUS_IO;
2283	}
2284
2285	/*
2286	 * Success does not mean that the PDU actually reached the
2287	 * remote node since it could get dropped along the way.
2288	 */
2289	idm_pdu_complete(pdu, status);
2290
2291	return (status);
2292}
2293
2294/*
2295 * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2296 * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2297 * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2298 * A target can invoke this function multiple times for a single read command
2299 * (identified by the same ITT) to split the input into several sequences.
2300 *
2301 * DataSN starts with 0 for the first data PDU of an input command and advances
2302 * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2303 * which is set to 1 for the last data PDU of a sequence.
2304 * If the initiator supports phase collapse, the status bit must be set along
2305 * with the F bit to indicate that the status is shipped together with the last
2306 * Data-In PDU.
2307 *
2308 * The data PDUs within a sequence will be sent in order with the buffer offset
2309 * in increasing order. i.e. initiator and target must have negotiated the
2310 * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2311 *
2312 * Caller holds idt->idt_mutex
2313 */
2314static idm_status_t
2315idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2316{
2317	idm_so_conn_t	*so_conn = idb->idb_ic->ic_transport_private;
2318	idm_pdu_t	tmppdu;
2319
2320	ASSERT(mutex_owned(&idt->idt_mutex));
2321
2322	/*
2323	 * Put the idm_buf_t on the tx queue.  It will be transmitted by
2324	 * idm_sotx_thread.
2325	 */
2326	mutex_enter(&so_conn->ic_tx_mutex);
2327
2328	DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2329	    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2330	    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2331	    uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2332
2333	if (!so_conn->ic_tx_thread_running) {
2334		mutex_exit(&so_conn->ic_tx_mutex);
2335		/*
2336		 * Don't release idt->idt_mutex since we're supposed to hold
2337		 * in when calling idm_buf_tx_to_ini_done
2338		 */
2339		DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2340		    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2341		    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2342		    uint32_t, idb->idb_xfer_len,
2343		    int, XFER_BUF_TX_TO_INI);
2344		idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2345		return (IDM_STATUS_FAIL);
2346	}
2347
2348	/*
2349	 * Build a template for the data PDU headers we will use so that
2350	 * the SN values will stay consistent with other PDU's we are
2351	 * transmitting like R2T and SCSI status.
2352	 */
2353	bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2354	tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2355	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2356	    ISCSI_OP_SCSI_DATA_RSP);
2357	idb->idb_tx_thread = B_TRUE;
2358	list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2359	cv_signal(&so_conn->ic_tx_cv);
2360	mutex_exit(&so_conn->ic_tx_mutex);
2361	mutex_exit(&idt->idt_mutex);
2362
2363	/*
2364	 * Returning success here indicates the transfer was successfully
2365	 * dispatched -- it does not mean that the transfer completed
2366	 * successfully.
2367	 */
2368	return (IDM_STATUS_SUCCESS);
2369}
2370
2371/*
2372 * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2373 * data blocks it is ready to receive from the initiator in response to a WRITE
2374 * SCSI command. The target iSCSI layer passes the information about the desired
2375 * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2376 * offset and datalen are passed via the 'idb' argument.
2377 *
2378 * Scope for Prototype build:
2379 * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2380 * negotiated the "InitialR2T" to "Yes".
2381 *
2382 * Caller holds idt->idt_mutex
2383 */
2384static idm_status_t
2385idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2386{
2387	idm_pdu_t		*pdu;
2388	iscsi_rtt_hdr_t		*rtt;
2389
2390	ASSERT(mutex_owned(&idt->idt_mutex));
2391
2392	DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2393	    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2394	    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2395	    uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2396
2397	pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2398	pdu->isp_ic = idt->idt_ic;
2399	pdu->isp_flags = IDM_PDU_SET_STATSN;
2400	bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2401
2402	/* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2403	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2404
2405	/* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2406	rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2407
2408	rtt->opcode		= ISCSI_OP_RTT_RSP;
2409	rtt->flags		= ISCSI_FLAG_FINAL;
2410	rtt->data_offset	= htonl(idb->idb_bufoffset);
2411	rtt->data_length	= htonl(idb->idb_xfer_len);
2412	rtt->rttsn		= htonl(idt->idt_exp_rttsn++);
2413
2414	/* Keep track of buffer offsets */
2415	idb->idb_exp_offset	= idb->idb_bufoffset;
2416	mutex_exit(&idt->idt_mutex);
2417
2418	/*
2419	 * Transmit the PDU.
2420	 */
2421	idm_pdu_tx(pdu);
2422
2423	return (IDM_STATUS_SUCCESS);
2424}
2425
2426static idm_status_t
2427idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2428{
2429	if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2430		idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2431		    KM_NOSLEEP);
2432		idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2433	} else {
2434		idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2435		idb->idb_buf_private = NULL;
2436	}
2437
2438	if (idb->idb_buf == NULL) {
2439		IDM_CONN_LOG(CE_NOTE,
2440		    "idm_so_buf_alloc: failed buffer allocation");
2441		return (IDM_STATUS_FAIL);
2442	}
2443
2444	return (IDM_STATUS_SUCCESS);
2445}
2446
2447/* ARGSUSED */
2448static idm_status_t
2449idm_so_buf_setup(idm_buf_t *idb)
2450{
2451	/* Ensure bufalloc'd flag is unset */
2452	idb->idb_bufalloc = B_FALSE;
2453
2454	return (IDM_STATUS_SUCCESS);
2455}
2456
2457/* ARGSUSED */
2458static void
2459idm_so_buf_teardown(idm_buf_t *idb)
2460{
2461	/* nothing to do here */
2462}
2463
2464static void
2465idm_so_buf_free(idm_buf_t *idb)
2466{
2467	if (idb->idb_buf_private == NULL) {
2468		kmem_free(idb->idb_buf, idb->idb_buflen);
2469	} else {
2470		kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2471	}
2472}
2473
2474static void
2475idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2476    uint32_t offset, uint32_t length)
2477{
2478	idm_so_conn_t	*so_conn = ic->ic_transport_private;
2479	idm_pdu_t	tmppdu;
2480	idm_buf_t	*rtt_buf;
2481
2482	ASSERT(mutex_owned(&idt->idt_mutex));
2483
2484	/*
2485	 * Allocate a buffer to represent the RTT transfer.  We could further
2486	 * optimize this by allocating the buffers internally from an rtt
2487	 * specific buffer cache since this is socket-specific code but for
2488	 * now we will keep it simple.
2489	 */
2490	rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2491	if (rtt_buf == NULL) {
2492		/*
2493		 * If we're in FFP then the failure was likely a resource
2494		 * allocation issue and we should close the connection by
2495		 * sending a CE_TRANSPORT_FAIL event.
2496		 *
2497		 * If we're not in FFP then idm_buf_alloc will always
2498		 * fail and the state is transitioning to "complete" anyway
2499		 * so we won't bother to send an event.
2500		 */
2501		mutex_enter(&ic->ic_state_mutex);
2502		if (ic->ic_ffp)
2503			idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2504			    NULL, CT_NONE);
2505		mutex_exit(&ic->ic_state_mutex);
2506		mutex_exit(&idt->idt_mutex);
2507		return;
2508	}
2509
2510	rtt_buf->idb_buf_cb = NULL;
2511	rtt_buf->idb_cb_arg = NULL;
2512	rtt_buf->idb_bufoffset = offset;
2513	rtt_buf->idb_xfer_len = length;
2514	rtt_buf->idb_ic = idt->idt_ic;
2515	rtt_buf->idb_task_binding = idt;
2516
2517	/*
2518	 * The new buffer (if any) represents an additional
2519	 * reference on the task
2520	 */
2521	idm_task_hold(idt);
2522	mutex_exit(&idt->idt_mutex);
2523
2524	/*
2525	 * Put the idm_buf_t on the tx queue.  It will be transmitted by
2526	 * idm_sotx_thread.
2527	 */
2528	mutex_enter(&so_conn->ic_tx_mutex);
2529
2530	if (!so_conn->ic_tx_thread_running) {
2531		idm_buf_free(rtt_buf);
2532		mutex_exit(&so_conn->ic_tx_mutex);
2533		idm_task_rele(idt);
2534		return;
2535	}
2536
2537	/*
2538	 * Build a template for the data PDU headers we will use so that
2539	 * the SN values will stay consistent with other PDU's we are
2540	 * transmitting like R2T and SCSI status.
2541	 */
2542	bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2543	tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2544	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2545	    ISCSI_OP_SCSI_DATA);
2546	rtt_buf->idb_tx_thread = B_TRUE;
2547	rtt_buf->idb_in_transport = B_TRUE;
2548	list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2549	cv_signal(&so_conn->ic_tx_cv);
2550	mutex_exit(&so_conn->ic_tx_mutex);
2551}
2552
2553static void
2554idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2555{
2556	/*
2557	 * Don't worry about status -- we assume any error handling
2558	 * is performed by the caller (idm_sotx_thread).
2559	 */
2560	idb->idb_in_transport = B_FALSE;
2561	idm_task_rele(idt);
2562	idm_buf_free(idb);
2563}
2564
2565static idm_status_t
2566idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2567    uint32_t buf_region_offset, uint32_t buf_region_length)
2568{
2569	idm_conn_t		*ic;
2570	uint32_t		max_dataseglen;
2571	size_t			remainder, chunk;
2572	uint32_t		data_offset = buf_region_offset;
2573	iscsi_data_hdr_t	*bhs;
2574	idm_pdu_t		*pdu;
2575	idm_status_t		tx_status;
2576
2577	ASSERT(mutex_owned(&idt->idt_mutex));
2578
2579	ic = idt->idt_ic;
2580
2581	max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2582	remainder = buf_region_length;
2583
2584	while (remainder) {
2585		if (idt->idt_state != TASK_ACTIVE) {
2586			ASSERT((idt->idt_state != TASK_IDLE) &&
2587			    (idt->idt_state != TASK_COMPLETE));
2588			return (IDM_STATUS_ABORTED);
2589		}
2590
2591		/* check to see if we need to chunk the data */
2592		if (remainder > max_dataseglen) {
2593			chunk = max_dataseglen;
2594		} else {
2595			chunk = remainder;
2596		}
2597
2598		/* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2599		pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2600		pdu->isp_ic = ic;
2601		pdu->isp_flags = 0;	/* initialize isp_flags */
2602
2603		/*
2604		 * We've already built a build a header template
2605		 * to use during the transfer.  Use this template so that
2606		 * the SN values stay consistent with any unrelated PDU's
2607		 * being transmitted.
2608		 */
2609		bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2610		    sizeof (iscsi_hdr_t));
2611
2612		/*
2613		 * Set DataSN, data offset, and flags in BHS
2614		 * For the prototype build, A = 0, S = 0, U = 0
2615		 */
2616		bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2617
2618		bhs->datasn		= htonl(idt->idt_exp_datasn++);
2619
2620		hton24(bhs->dlength, chunk);
2621		bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2622
2623		/* setup data */
2624		pdu->isp_data	=  (uint8_t *)idb->idb_buf + data_offset;
2625		pdu->isp_datalen = (uint_t)chunk;
2626
2627		if (chunk == remainder) {
2628			bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2629			/* Piggyback the status with the last data PDU */
2630			if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
2631				pdu->isp_flags |= IDM_PDU_SET_STATSN |
2632				    IDM_PDU_ADVANCE_STATSN;
2633				(*idt->idt_ic->ic_conn_ops.icb_update_statsn)
2634				    (idt, pdu);
2635				idt->idt_flags |=
2636				    IDM_TASK_PHASECOLLAPSE_SUCCESS;
2637
2638			}
2639		}
2640
2641		remainder	-= chunk;
2642		data_offset	+= chunk;
2643
2644		/* Instrument the data-send DTrace probe. */
2645		if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2646			DTRACE_ISCSI_2(data__send,
2647			    idm_conn_t *, idt->idt_ic,
2648			    iscsi_data_rsp_hdr_t *,
2649			    (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2650		}
2651
2652		/*
2653		 * Now that we're done working with idt_exp_datasn,
2654		 * idt->idt_state and idb->idb_bufoffset we can release
2655		 * the task lock -- don't want to hold it across the
2656		 * call to idm_i_so_tx since we could block.
2657		 */
2658		mutex_exit(&idt->idt_mutex);
2659
2660		/*
2661		 * Transmit the PDU.  Call the internal routine directly
2662		 * as there is already implicit ordering.
2663		 */
2664		if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2665			mutex_enter(&idt->idt_mutex);
2666			return (tx_status);
2667		}
2668
2669		mutex_enter(&idt->idt_mutex);
2670		idt->idt_tx_bytes += chunk;
2671	}
2672
2673	return (IDM_STATUS_SUCCESS);
2674}
2675
2676/*
2677 * TX PDU cache
2678 */
2679/* ARGSUSED */
2680int
2681idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2682{
2683	idm_pdu_t	*pdu = hdl;
2684
2685	bzero(pdu, sizeof (idm_pdu_t));
2686	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2687	pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2688	pdu->isp_callback = idm_sotx_cache_pdu_cb;
2689	pdu->isp_magic = IDM_PDU_MAGIC;
2690	bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2691
2692	return (0);
2693}
2694
2695/* ARGSUSED */
2696void
2697idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2698{
2699	/* reset values between use */
2700	pdu->isp_datalen = 0;
2701
2702	kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2703}
2704
2705/*
2706 * RX PDU cache
2707 */
2708/* ARGSUSED */
2709int
2710idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2711{
2712	idm_pdu_t	*pdu = hdl;
2713
2714	bzero(pdu, sizeof (idm_pdu_t));
2715	pdu->isp_magic = IDM_PDU_MAGIC;
2716	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2717	pdu->isp_callback = idm_sorx_cache_pdu_cb;
2718
2719	return (0);
2720}
2721
2722/* ARGSUSED */
2723static void
2724idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2725{
2726	pdu->isp_iovlen = 0;
2727	pdu->isp_sorx_buf = 0;
2728	kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2729}
2730
2731static void
2732idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2733{
2734	/*
2735	 * We had to modify our cached RX PDU with a longer header buffer
2736	 * and/or a longer data buffer.  Release the new buffers and fix
2737	 * the fields back to what we would expect for a cached RX PDU.
2738	 */
2739	if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2740		kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2741	}
2742	if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2743		kmem_free(pdu->isp_data, pdu->isp_datalen);
2744	}
2745	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2746	pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2747	pdu->isp_data = NULL;
2748	pdu->isp_datalen = 0;
2749	pdu->isp_sorx_buf = 0;
2750	pdu->isp_callback = idm_sorx_cache_pdu_cb;
2751	idm_sorx_cache_pdu_cb(pdu, status);
2752}
2753
2754/*
2755 * This thread is only active when I/O is queued for transmit
2756 * because the socket is busy.
2757 */
2758void
2759idm_sotx_thread(void *arg)
2760{
2761	idm_conn_t	*ic = arg;
2762	idm_tx_obj_t	*object, *next;
2763	idm_so_conn_t	*so_conn;
2764	idm_status_t	status = IDM_STATUS_SUCCESS;
2765
2766	idm_conn_hold(ic);
2767
2768	mutex_enter(&ic->ic_mutex);
2769	so_conn = ic->ic_transport_private;
2770	so_conn->ic_tx_thread_running = B_TRUE;
2771	so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2772	cv_signal(&ic->ic_cv);
2773	mutex_exit(&ic->ic_mutex);
2774
2775	mutex_enter(&so_conn->ic_tx_mutex);
2776
2777	while (so_conn->ic_tx_thread_running) {
2778		while (list_is_empty(&so_conn->ic_tx_list)) {
2779			DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2780			cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2781			DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2782
2783			if (!so_conn->ic_tx_thread_running) {
2784				goto tx_bail;
2785			}
2786		}
2787
2788		object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2789		list_remove(&so_conn->ic_tx_list, object);
2790		mutex_exit(&so_conn->ic_tx_mutex);
2791
2792		switch (object->idm_tx_obj_magic) {
2793		case IDM_PDU_MAGIC: {
2794			idm_pdu_t *pdu = (idm_pdu_t *)object;
2795			DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2796			    idm_pdu_t *, (idm_pdu_t *)object);
2797
2798			if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
2799				/* No IDM task */
2800				(ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
2801			}
2802			status = idm_i_so_tx((idm_pdu_t *)object);
2803			break;
2804		}
2805		case IDM_BUF_MAGIC: {
2806			idm_buf_t *idb = (idm_buf_t *)object;
2807			idm_task_t *idt = idb->idb_task_binding;
2808
2809			DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2810			    idm_buf_t *, idb);
2811
2812			mutex_enter(&idt->idt_mutex);
2813			status = idm_so_send_buf_region(idt,
2814			    idb, 0, idb->idb_xfer_len);
2815
2816			/*
2817			 * TX thread owns the buffer so we expect it to
2818			 * be "in transport"
2819			 */
2820			ASSERT(idb->idb_in_transport);
2821			if (IDM_CONN_ISTGT(ic)) {
2822				/*
2823				 * idm_buf_tx_to_ini_done releases
2824				 * idt->idt_mutex
2825				 */
2826				DTRACE_ISCSI_8(xfer__done,
2827				    idm_conn_t *, idt->idt_ic,
2828				    uintptr_t, idb->idb_buf,
2829				    uint32_t, idb->idb_bufoffset,
2830				    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2831				    uint32_t, idb->idb_xfer_len,
2832				    int, XFER_BUF_TX_TO_INI);
2833				idm_buf_tx_to_ini_done(idt, idb, status);
2834			} else {
2835				idm_so_send_rtt_data_done(idt, idb);
2836				mutex_exit(&idt->idt_mutex);
2837			}
2838			break;
2839		}
2840
2841		default:
2842			IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2843			    "(0x%08x)", object->idm_tx_obj_magic);
2844			status = IDM_STATUS_FAIL;
2845		}
2846
2847		mutex_enter(&so_conn->ic_tx_mutex);
2848
2849		if (status != IDM_STATUS_SUCCESS) {
2850			so_conn->ic_tx_thread_running = B_FALSE;
2851			idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2852		}
2853	}
2854
2855	/*
2856	 * Before we leave, we need to abort every item remaining in the
2857	 * TX list.
2858	 */
2859
2860tx_bail:
2861	object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2862
2863	while (object != NULL) {
2864		next = list_next(&so_conn->ic_tx_list, object);
2865
2866		list_remove(&so_conn->ic_tx_list, object);
2867		switch (object->idm_tx_obj_magic) {
2868		case IDM_PDU_MAGIC:
2869			idm_pdu_complete((idm_pdu_t *)object,
2870			    IDM_STATUS_ABORTED);
2871			break;
2872
2873		case IDM_BUF_MAGIC: {
2874			idm_buf_t *idb = (idm_buf_t *)object;
2875			idm_task_t *idt = idb->idb_task_binding;
2876			mutex_exit(&so_conn->ic_tx_mutex);
2877			mutex_enter(&idt->idt_mutex);
2878			/*
2879			 * TX thread owns the buffer so we expect it to
2880			 * be "in transport"
2881			 */
2882			ASSERT(idb->idb_in_transport);
2883			if (IDM_CONN_ISTGT(ic)) {
2884				/*
2885				 * idm_buf_tx_to_ini_done releases
2886				 * idt->idt_mutex
2887				 */
2888				DTRACE_ISCSI_8(xfer__done,
2889				    idm_conn_t *, idt->idt_ic,
2890				    uintptr_t, idb->idb_buf,
2891				    uint32_t, idb->idb_bufoffset,
2892				    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2893				    uint32_t, idb->idb_xfer_len,
2894				    int, XFER_BUF_TX_TO_INI);
2895				idm_buf_tx_to_ini_done(idt, idb,
2896				    IDM_STATUS_ABORTED);
2897			} else {
2898				idm_so_send_rtt_data_done(idt, idb);
2899				mutex_exit(&idt->idt_mutex);
2900			}
2901			mutex_enter(&so_conn->ic_tx_mutex);
2902			break;
2903		}
2904		default:
2905			IDM_CONN_LOG(CE_WARN,
2906			    "idm_sotx_thread: Unexpected magic "
2907			    "(0x%08x)", object->idm_tx_obj_magic);
2908		}
2909
2910		object = next;
2911	}
2912
2913	mutex_exit(&so_conn->ic_tx_mutex);
2914	idm_conn_rele(ic);
2915	thread_exit();
2916	/*NOTREACHED*/
2917}
2918
2919static void
2920idm_so_socket_set_nonblock(struct sonode *node)
2921{
2922	(void) VOP_SETFL(node->so_vnode, node->so_flag,
2923	    (node->so_state | FNONBLOCK), CRED(), NULL);
2924}
2925
2926static void
2927idm_so_socket_set_block(struct sonode *node)
2928{
2929	(void) VOP_SETFL(node->so_vnode, node->so_flag,
2930	    (node->so_state & (~FNONBLOCK)), CRED(), NULL);
2931}
2932
2933
2934/*
2935 * Called by kernel sockets when the connection has been accepted or
2936 * rejected. In early volo, a "disconnect" callback was sent instead of
2937 * "connectfailed", so we check for both.
2938 */
2939/* ARGSUSED */
2940void
2941idm_so_timed_socket_connect_cb(ksocket_t ks,
2942    ksocket_callback_event_t ev, void *arg, uintptr_t info)
2943{
2944	idm_so_timed_socket_t	*itp = arg;
2945	ASSERT(itp != NULL);
2946	ASSERT(ev == KSOCKET_EV_CONNECTED ||
2947	    ev == KSOCKET_EV_CONNECTFAILED ||
2948	    ev == KSOCKET_EV_DISCONNECTED);
2949
2950	mutex_enter(&idm_so_timed_socket_mutex);
2951	itp->it_callback_called = B_TRUE;
2952	if (ev == KSOCKET_EV_CONNECTED) {
2953		itp->it_socket_error_code = 0;
2954	} else {
2955		/* Make sure the error code is non-zero on error */
2956		if (info == 0)
2957			info = ECONNRESET;
2958		itp->it_socket_error_code = (int)info;
2959	}
2960	cv_signal(&itp->it_cv);
2961	mutex_exit(&idm_so_timed_socket_mutex);
2962}
2963
2964int
2965idm_so_timed_socket_connect(ksocket_t ks,
2966    struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
2967{
2968	clock_t			conn_login_max;
2969	int			rc, nonblocking, rval;
2970	idm_so_timed_socket_t	it;
2971	ksocket_callbacks_t	ks_cb;
2972
2973	conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
2974
2975	/*
2976	 * Set to non-block socket mode, with callback on connect
2977	 * Early volo used "disconnected" instead of "connectfailed",
2978	 * so set callback to look for both.
2979	 */
2980	bzero(&it, sizeof (it));
2981	ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
2982	    KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
2983	ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
2984	ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
2985	ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
2986	cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
2987	rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
2988	if (rc != 0)
2989		return (rc);
2990
2991	/* Set to non-blocking mode */
2992	nonblocking = 1;
2993	rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
2994	    CRED());
2995	if (rc != 0)
2996		goto cleanup;
2997
2998	bzero(&it, sizeof (it));
2999	for (;;) {
3000		/*
3001		 * Warning -- in a loopback scenario, the call to
3002		 * the connect_cb can occur inside the call to
3003		 * ksocket_connect. Do not hold the mutex around the
3004		 * call to ksocket_connect.
3005		 */
3006		rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
3007		if (rc == 0 || rc == EISCONN) {
3008			/* socket success or already success */
3009			rc = 0;
3010			break;
3011		}
3012		if ((rc != EINPROGRESS) && (rc != EALREADY)) {
3013			break;
3014		}
3015
3016		/* TCP connect still in progress. See if out of time. */
3017		if (ddi_get_lbolt() > conn_login_max) {
3018			/*
3019			 * Connection retry timeout,
3020			 * failed connect to target.
3021			 */
3022			rc = ETIMEDOUT;
3023			break;
3024		}
3025
3026		/*
3027		 * TCP connect still in progress.  Sleep until callback.
3028		 * Do NOT go to sleep if the callback already occurred!
3029		 */
3030		mutex_enter(&idm_so_timed_socket_mutex);
3031		if (!it.it_callback_called) {
3032			(void) cv_timedwait(&it.it_cv,
3033			    &idm_so_timed_socket_mutex, conn_login_max);
3034		}
3035		if (it.it_callback_called) {
3036			rc = it.it_socket_error_code;
3037			mutex_exit(&idm_so_timed_socket_mutex);
3038			break;
3039		}
3040		/* If timer expires, go call ksocket_connect one last time. */
3041		mutex_exit(&idm_so_timed_socket_mutex);
3042	}
3043
3044	/* resume blocking mode */
3045	nonblocking = 0;
3046	(void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3047	    CRED());
3048cleanup:
3049	(void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
3050	cv_destroy(&it.it_cv);
3051	if (rc != 0) {
3052		idm_soshutdown(ks);
3053	}
3054	return (rc);
3055}
3056
3057
3058void
3059idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
3060{
3061	int			dp_addr_size;
3062	struct sockaddr_in	*sin;
3063	struct sockaddr_in6	*sin6;
3064
3065	/* Build sockaddr_storage for this portal (idm_addr_t) */
3066	bzero(sa, sizeof (*sa));
3067	dp_addr_size = dportal->a_addr.i_insize;
3068	if (dp_addr_size == sizeof (struct in_addr)) {
3069		/* IPv4 */
3070		sa->ss_family = AF_INET;
3071		sin = (struct sockaddr_in *)sa;
3072		sin->sin_port = htons(dportal->a_port);
3073		bcopy(&dportal->a_addr.i_addr.in4,
3074		    &sin->sin_addr, sizeof (struct in_addr));
3075	} else if (dp_addr_size == sizeof (struct in6_addr)) {
3076		/* IPv6 */
3077		sa->ss_family = AF_INET6;
3078		sin6 = (struct sockaddr_in6 *)sa;
3079		sin6->sin6_port = htons(dportal->a_port);
3080		bcopy(&dportal->a_addr.i_addr.in6,
3081		    &sin6->sin6_addr, sizeof (struct in6_addr));
3082	} else {
3083		ASSERT(0);
3084	}
3085}
3086
3087
3088/*
3089 * return a human-readable form of a sockaddr_storage, in the form
3090 * [ip-address]:port.  This is used in calls to logging functions.
3091 * If several calls to idm_sa_ntop are made within the same invocation
3092 * of a logging function, then each one needs its own buf.
3093 */
3094const char *
3095idm_sa_ntop(const struct sockaddr_storage *sa,
3096    char *buf, size_t size)
3097{
3098	static const char bogus_ip[] = "[0].-1";
3099	char tmp[INET6_ADDRSTRLEN];
3100
3101	switch (sa->ss_family) {
3102	case AF_INET6:
3103		{
3104			const struct sockaddr_in6 *in6 =
3105			    (const struct sockaddr_in6 *) sa;
3106
3107			if (inet_ntop(in6->sin6_family,
3108			    &in6->sin6_addr, tmp, sizeof (tmp)) == NULL) {
3109				goto err;
3110			}
3111			if (strlen(tmp) + sizeof ("[].65535") > size) {
3112				goto err;
3113			}
3114			/* struct sockaddr_storage gets port info from v4 loc */
3115			(void) snprintf(buf, size, "[%s].%u", tmp,
3116			    ntohs(in6->sin6_port));
3117			return (buf);
3118		}
3119	case AF_INET:
3120		{
3121			const struct sockaddr_in *in =
3122			    (const struct sockaddr_in *) sa;
3123
3124			if (inet_ntop(in->sin_family, &in->sin_addr,
3125			    tmp, sizeof (tmp)) == NULL) {
3126				goto err;
3127			}
3128			if (strlen(tmp) + sizeof ("[].65535") > size) {
3129				goto err;
3130			}
3131			(void) snprintf(buf, size,  "[%s].%u", tmp,
3132			    ntohs(in->sin_port));
3133			return (buf);
3134		}
3135	default:
3136		break;
3137	}
3138err:
3139	(void) snprintf(buf, size, "%s", bogus_ip);
3140	return (buf);
3141}
3142