iscsi.c revision 273307
1/*-
2 * Copyright (c) 2012 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: stable/10/sys/dev/iscsi/iscsi.c 273307 2014-10-20 07:28:18Z mav $");
33
34#include <sys/param.h>
35#include <sys/condvar.h>
36#include <sys/conf.h>
37#include <sys/eventhandler.h>
38#include <sys/file.h>
39#include <sys/kernel.h>
40#include <sys/kthread.h>
41#include <sys/lock.h>
42#include <sys/malloc.h>
43#include <sys/mutex.h>
44#include <sys/module.h>
45#include <sys/sysctl.h>
46#include <sys/systm.h>
47#include <sys/sx.h>
48#include <vm/uma.h>
49
50#include <cam/cam.h>
51#include <cam/cam_ccb.h>
52#include <cam/cam_xpt.h>
53#include <cam/cam_debug.h>
54#include <cam/cam_sim.h>
55#include <cam/cam_xpt_sim.h>
56#include <cam/cam_xpt_periph.h>
57#include <cam/cam_periph.h>
58#include <cam/scsi/scsi_all.h>
59#include <cam/scsi/scsi_message.h>
60
61#include <dev/iscsi/icl.h>
62#include <dev/iscsi/iscsi_ioctl.h>
63#include <dev/iscsi/iscsi_proto.h>
64#include <dev/iscsi/iscsi.h>
65
66#ifdef ICL_KERNEL_PROXY
67#include <sys/socketvar.h>
68#endif
69
70#ifdef ICL_KERNEL_PROXY
71FEATURE(iscsi_kernel_proxy, "iSCSI initiator built with ICL_KERNEL_PROXY");
72#endif
73
74/*
75 * XXX: This is global so the iscsi_unload() can access it.
76 * 	Think about how to do this properly.
77 */
78static struct iscsi_softc	*sc;
79
80SYSCTL_NODE(_kern, OID_AUTO, iscsi, CTLFLAG_RD, 0, "iSCSI initiator");
81static int debug = 1;
82TUNABLE_INT("kern.iscsi.debug", &debug);
83SYSCTL_INT(_kern_iscsi, OID_AUTO, debug, CTLFLAG_RWTUN,
84    &debug, 0, "Enable debug messages");
85static int ping_timeout = 5;
86TUNABLE_INT("kern.iscsi.ping_timeout", &ping_timeout);
87SYSCTL_INT(_kern_iscsi, OID_AUTO, ping_timeout, CTLFLAG_RWTUN, &ping_timeout,
88    0, "Timeout for ping (NOP-Out) requests, in seconds");
89static int iscsid_timeout = 60;
90TUNABLE_INT("kern.iscsi.iscsid_timeout", &iscsid_timeout);
91SYSCTL_INT(_kern_iscsi, OID_AUTO, iscsid_timeout, CTLFLAG_RWTUN, &iscsid_timeout,
92    0, "Time to wait for iscsid(8) to handle reconnection, in seconds");
93static int login_timeout = 60;
94TUNABLE_INT("kern.iscsi.login_timeout", &login_timeout);
95SYSCTL_INT(_kern_iscsi, OID_AUTO, login_timeout, CTLFLAG_RWTUN, &login_timeout,
96    0, "Time to wait for iscsid(8) to finish Login Phase, in seconds");
97static int maxtags = 255;
98TUNABLE_INT("kern.iscsi.maxtags", &maxtags);
99SYSCTL_INT(_kern_iscsi, OID_AUTO, maxtags, CTLFLAG_RWTUN, &maxtags,
100    0, "Max number of IO requests queued");
101static int fail_on_disconnection = 0;
102TUNABLE_INT("kern.iscsi.fail_on_disconnection", &fail_on_disconnection);
103SYSCTL_INT(_kern_iscsi, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN,
104    &fail_on_disconnection, 0, "Destroy CAM SIM on connection failure");
105
106static MALLOC_DEFINE(M_ISCSI, "iSCSI", "iSCSI initiator");
107static uma_zone_t iscsi_outstanding_zone;
108
109#define	CONN_SESSION(X)	((struct iscsi_session *)X->ic_prv0)
110#define	PDU_SESSION(X)	(CONN_SESSION(X->ip_conn))
111
112#define	ISCSI_DEBUG(X, ...)						\
113	do {								\
114		if (debug > 1) 						\
115			printf("%s: " X "\n", __func__, ## __VA_ARGS__);\
116	} while (0)
117
118#define	ISCSI_WARN(X, ...)						\
119	do {								\
120		if (debug > 0) {					\
121			printf("WARNING: %s: " X "\n",			\
122			    __func__, ## __VA_ARGS__);			\
123		}							\
124	} while (0)
125
126#define	ISCSI_SESSION_DEBUG(S, X, ...)					\
127	do {								\
128		if (debug > 1) {					\
129			printf("%s: %s (%s): " X "\n",			\
130			    __func__, S->is_conf.isc_target_addr,	\
131			    S->is_conf.isc_target, ## __VA_ARGS__);	\
132		}							\
133	} while (0)
134
135#define	ISCSI_SESSION_WARN(S, X, ...)					\
136	do {								\
137		if (debug > 0) {					\
138			printf("WARNING: %s (%s): " X "\n",		\
139			    S->is_conf.isc_target_addr,			\
140			    S->is_conf.isc_target, ## __VA_ARGS__);	\
141		}							\
142	} while (0)
143
144#define ISCSI_SESSION_LOCK(X)		mtx_lock(&X->is_lock)
145#define ISCSI_SESSION_UNLOCK(X)		mtx_unlock(&X->is_lock)
146#define ISCSI_SESSION_LOCK_ASSERT(X)	mtx_assert(&X->is_lock, MA_OWNED)
147
148static int	iscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg,
149		    int mode, struct thread *td);
150
151static struct cdevsw iscsi_cdevsw = {
152     .d_version = D_VERSION,
153     .d_ioctl   = iscsi_ioctl,
154     .d_name    = "iscsi",
155};
156
157static void	iscsi_pdu_queue_locked(struct icl_pdu *request);
158static void	iscsi_pdu_queue(struct icl_pdu *request);
159static void	iscsi_pdu_update_statsn(const struct icl_pdu *response);
160static void	iscsi_pdu_handle_nop_in(struct icl_pdu *response);
161static void	iscsi_pdu_handle_scsi_response(struct icl_pdu *response);
162static void	iscsi_pdu_handle_task_response(struct icl_pdu *response);
163static void	iscsi_pdu_handle_data_in(struct icl_pdu *response);
164static void	iscsi_pdu_handle_logout_response(struct icl_pdu *response);
165static void	iscsi_pdu_handle_r2t(struct icl_pdu *response);
166static void	iscsi_pdu_handle_async_message(struct icl_pdu *response);
167static void	iscsi_pdu_handle_reject(struct icl_pdu *response);
168static void	iscsi_session_reconnect(struct iscsi_session *is);
169static void	iscsi_session_terminate(struct iscsi_session *is);
170static void	iscsi_action(struct cam_sim *sim, union ccb *ccb);
171static void	iscsi_poll(struct cam_sim *sim);
172static struct iscsi_outstanding	*iscsi_outstanding_find(struct iscsi_session *is,
173		    uint32_t initiator_task_tag);
174static struct iscsi_outstanding	*iscsi_outstanding_add(struct iscsi_session *is,
175		    uint32_t initiator_task_tag, union ccb *ccb);
176static void	iscsi_outstanding_remove(struct iscsi_session *is,
177		    struct iscsi_outstanding *io);
178
179static bool
180iscsi_pdu_prepare(struct icl_pdu *request)
181{
182	struct iscsi_session *is;
183	struct iscsi_bhs_scsi_command *bhssc;
184
185	is = PDU_SESSION(request);
186
187	ISCSI_SESSION_LOCK_ASSERT(is);
188
189	/*
190	 * We're only using fields common for all the request
191	 * (initiator -> target) PDUs.
192	 */
193	bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs;
194
195	/*
196	 * Data-Out PDU does not contain CmdSN.
197	 */
198	if (bhssc->bhssc_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_OUT) {
199		if (is->is_cmdsn > is->is_maxcmdsn &&
200		    (bhssc->bhssc_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0) {
201			/*
202			 * Current MaxCmdSN prevents us from sending any more
203			 * SCSI Command PDUs to the target; postpone the PDU.
204			 * It will get resent by either iscsi_pdu_queue(),
205			 * or by maintenance thread.
206			 */
207#if 0
208			ISCSI_SESSION_DEBUG(is, "postponing send, CmdSN %d, ExpCmdSN %d, MaxCmdSN %d, opcode 0x%x",
209			    is->is_cmdsn, is->is_expcmdsn, is->is_maxcmdsn, bhssc->bhssc_opcode);
210#endif
211			return (true);
212		}
213		bhssc->bhssc_cmdsn = htonl(is->is_cmdsn);
214		if ((bhssc->bhssc_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0)
215			is->is_cmdsn++;
216	}
217	bhssc->bhssc_expstatsn = htonl(is->is_statsn + 1);
218
219	return (false);
220}
221
222static void
223iscsi_session_send_postponed(struct iscsi_session *is)
224{
225	struct icl_pdu *request;
226	bool postpone;
227
228	ISCSI_SESSION_LOCK_ASSERT(is);
229
230	while (!STAILQ_EMPTY(&is->is_postponed)) {
231		request = STAILQ_FIRST(&is->is_postponed);
232		postpone = iscsi_pdu_prepare(request);
233		if (postpone)
234			break;
235		STAILQ_REMOVE_HEAD(&is->is_postponed, ip_next);
236		icl_pdu_queue(request);
237	}
238}
239
240static void
241iscsi_pdu_queue_locked(struct icl_pdu *request)
242{
243	struct iscsi_session *is;
244	bool postpone;
245
246	is = PDU_SESSION(request);
247	ISCSI_SESSION_LOCK_ASSERT(is);
248	iscsi_session_send_postponed(is);
249	postpone = iscsi_pdu_prepare(request);
250	if (postpone) {
251		STAILQ_INSERT_TAIL(&is->is_postponed, request, ip_next);
252		return;
253	}
254	icl_pdu_queue(request);
255}
256
257static void
258iscsi_pdu_queue(struct icl_pdu *request)
259{
260	struct iscsi_session *is;
261
262	is = PDU_SESSION(request);
263	ISCSI_SESSION_LOCK(is);
264	iscsi_pdu_queue_locked(request);
265	ISCSI_SESSION_UNLOCK(is);
266}
267
268static void
269iscsi_session_logout(struct iscsi_session *is)
270{
271	struct icl_pdu *request;
272	struct iscsi_bhs_logout_request *bhslr;
273
274	request = icl_pdu_new_bhs(is->is_conn, M_NOWAIT);
275	if (request == NULL)
276		return;
277
278	bhslr = (struct iscsi_bhs_logout_request *)request->ip_bhs;
279	bhslr->bhslr_opcode = ISCSI_BHS_OPCODE_LOGOUT_REQUEST;
280	bhslr->bhslr_reason = BHSLR_REASON_CLOSE_SESSION;
281	iscsi_pdu_queue_locked(request);
282}
283
284static void
285iscsi_session_terminate_task(struct iscsi_session *is,
286    struct iscsi_outstanding *io, bool requeue)
287{
288
289	if (io->io_ccb != NULL) {
290		io->io_ccb->ccb_h.status &= ~(CAM_SIM_QUEUED | CAM_STATUS_MASK);
291		if (requeue)
292			io->io_ccb->ccb_h.status |= CAM_REQUEUE_REQ;
293		else
294			io->io_ccb->ccb_h.status |= CAM_REQ_ABORTED;
295		if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
296			io->io_ccb->ccb_h.status |= CAM_DEV_QFRZN;
297			xpt_freeze_devq(io->io_ccb->ccb_h.path, 1);
298			ISCSI_SESSION_DEBUG(is, "freezing devq");
299		}
300		xpt_done(io->io_ccb);
301	}
302	iscsi_outstanding_remove(is, io);
303}
304
305static void
306iscsi_session_terminate_tasks(struct iscsi_session *is, bool requeue)
307{
308	struct iscsi_outstanding *io, *tmp;
309
310	ISCSI_SESSION_LOCK_ASSERT(is);
311
312	TAILQ_FOREACH_SAFE(io, &is->is_outstanding, io_next, tmp) {
313		iscsi_session_terminate_task(is, io, requeue);
314	}
315}
316
317static void
318iscsi_session_cleanup(struct iscsi_session *is, bool destroy_sim)
319{
320	struct icl_pdu *pdu;
321
322	ISCSI_SESSION_LOCK_ASSERT(is);
323
324	/*
325	 * Don't queue any new PDUs.
326	 */
327	if (is->is_sim != NULL && is->is_simq_frozen == false) {
328		ISCSI_SESSION_DEBUG(is, "freezing");
329		xpt_freeze_simq(is->is_sim, 1);
330		is->is_simq_frozen = true;
331	}
332
333	/*
334	 * Remove postponed PDUs.
335	 */
336	while (!STAILQ_EMPTY(&is->is_postponed)) {
337		pdu = STAILQ_FIRST(&is->is_postponed);
338		STAILQ_REMOVE_HEAD(&is->is_postponed, ip_next);
339		icl_pdu_free(pdu);
340	}
341
342	if (destroy_sim == false) {
343		/*
344		 * Terminate SCSI tasks, asking CAM to requeue them.
345		 */
346		iscsi_session_terminate_tasks(is, true);
347		return;
348	}
349
350	iscsi_session_terminate_tasks(is, false);
351
352	if (is->is_sim == NULL)
353		return;
354
355	ISCSI_SESSION_DEBUG(is, "deregistering SIM");
356	xpt_async(AC_LOST_DEVICE, is->is_path, NULL);
357
358	if (is->is_simq_frozen) {
359		xpt_release_simq(is->is_sim, 1);
360		is->is_simq_frozen = false;
361	}
362
363	xpt_free_path(is->is_path);
364	is->is_path = NULL;
365	xpt_bus_deregister(cam_sim_path(is->is_sim));
366	cam_sim_free(is->is_sim, TRUE /*free_devq*/);
367	is->is_sim = NULL;
368	is->is_devq = NULL;
369}
370
371static void
372iscsi_maintenance_thread_reconnect(struct iscsi_session *is)
373{
374
375	icl_conn_shutdown(is->is_conn);
376	icl_conn_close(is->is_conn);
377
378	ISCSI_SESSION_LOCK(is);
379
380	is->is_connected = false;
381	is->is_reconnecting = false;
382	is->is_login_phase = false;
383
384#ifdef ICL_KERNEL_PROXY
385	if (is->is_login_pdu != NULL) {
386		icl_pdu_free(is->is_login_pdu);
387		is->is_login_pdu = NULL;
388	}
389	cv_signal(&is->is_login_cv);
390#endif
391
392	if (fail_on_disconnection) {
393		ISCSI_SESSION_DEBUG(is, "connection failed, destroying devices");
394		iscsi_session_cleanup(is, true);
395	} else {
396		iscsi_session_cleanup(is, false);
397	}
398
399	KASSERT(TAILQ_EMPTY(&is->is_outstanding),
400	    ("destroying session with active tasks"));
401	KASSERT(STAILQ_EMPTY(&is->is_postponed),
402	    ("destroying session with postponed PDUs"));
403
404	/*
405	 * Request immediate reconnection from iscsid(8).
406	 */
407	//ISCSI_SESSION_DEBUG(is, "waking up iscsid(8)");
408	is->is_waiting_for_iscsid = true;
409	strlcpy(is->is_reason, "Waiting for iscsid(8)", sizeof(is->is_reason));
410	is->is_timeout = 0;
411	ISCSI_SESSION_UNLOCK(is);
412	cv_signal(&is->is_softc->sc_cv);
413}
414
415static void
416iscsi_maintenance_thread_terminate(struct iscsi_session *is)
417{
418	struct iscsi_softc *sc;
419
420	sc = is->is_softc;
421	sx_xlock(&sc->sc_lock);
422	TAILQ_REMOVE(&sc->sc_sessions, is, is_next);
423	sx_xunlock(&sc->sc_lock);
424
425	icl_conn_close(is->is_conn);
426
427	ISCSI_SESSION_LOCK(is);
428
429	KASSERT(is->is_terminating, ("is_terminating == false"));
430
431#ifdef ICL_KERNEL_PROXY
432	if (is->is_login_pdu != NULL) {
433		icl_pdu_free(is->is_login_pdu);
434		is->is_login_pdu = NULL;
435	}
436	cv_signal(&is->is_login_cv);
437#endif
438
439	callout_drain(&is->is_callout);
440
441	iscsi_session_cleanup(is, true);
442
443	KASSERT(TAILQ_EMPTY(&is->is_outstanding),
444	    ("destroying session with active tasks"));
445	KASSERT(STAILQ_EMPTY(&is->is_postponed),
446	    ("destroying session with postponed PDUs"));
447
448	ISCSI_SESSION_UNLOCK(is);
449
450	icl_conn_free(is->is_conn);
451	mtx_destroy(&is->is_lock);
452	cv_destroy(&is->is_maintenance_cv);
453#ifdef ICL_KERNEL_PROXY
454	cv_destroy(&is->is_login_cv);
455#endif
456	ISCSI_SESSION_DEBUG(is, "terminated");
457	free(is, M_ISCSI);
458
459	/*
460	 * The iscsi_unload() routine might be waiting.
461	 */
462	cv_signal(&sc->sc_cv);
463}
464
465static void
466iscsi_maintenance_thread(void *arg)
467{
468	struct iscsi_session *is;
469
470	is = arg;
471
472	for (;;) {
473		ISCSI_SESSION_LOCK(is);
474		if (is->is_reconnecting == false &&
475		    is->is_terminating == false &&
476		    STAILQ_EMPTY(&is->is_postponed))
477			cv_wait(&is->is_maintenance_cv, &is->is_lock);
478
479		if (is->is_reconnecting) {
480			ISCSI_SESSION_UNLOCK(is);
481			iscsi_maintenance_thread_reconnect(is);
482			continue;
483		}
484
485		if (is->is_terminating) {
486			ISCSI_SESSION_UNLOCK(is);
487			iscsi_maintenance_thread_terminate(is);
488			kthread_exit();
489			return;
490		}
491
492		iscsi_session_send_postponed(is);
493		ISCSI_SESSION_UNLOCK(is);
494	}
495}
496
497static void
498iscsi_session_reconnect(struct iscsi_session *is)
499{
500
501	/*
502	 * XXX: We can't use locking here, because
503	 * 	it's being called from various contexts.
504	 * 	Hope it doesn't break anything.
505	 */
506	if (is->is_reconnecting)
507		return;
508
509	is->is_reconnecting = true;
510	cv_signal(&is->is_maintenance_cv);
511}
512
513static void
514iscsi_session_terminate(struct iscsi_session *is)
515{
516	if (is->is_terminating)
517		return;
518
519	is->is_terminating = true;
520
521#if 0
522	iscsi_session_logout(is);
523#endif
524	cv_signal(&is->is_maintenance_cv);
525}
526
527static void
528iscsi_callout(void *context)
529{
530	struct icl_pdu *request;
531	struct iscsi_bhs_nop_out *bhsno;
532	struct iscsi_session *is;
533	bool reconnect_needed = false;
534
535	is = context;
536
537	if (is->is_terminating)
538		return;
539
540	callout_schedule(&is->is_callout, 1 * hz);
541
542	ISCSI_SESSION_LOCK(is);
543	is->is_timeout++;
544
545	if (is->is_waiting_for_iscsid) {
546		if (iscsid_timeout > 0 && is->is_timeout > iscsid_timeout) {
547			ISCSI_SESSION_WARN(is, "timed out waiting for iscsid(8) "
548			    "for %d seconds; reconnecting",
549			    is->is_timeout);
550			reconnect_needed = true;
551		}
552		goto out;
553	}
554
555	if (is->is_login_phase) {
556		if (login_timeout > 0 && is->is_timeout > login_timeout) {
557			ISCSI_SESSION_WARN(is, "login timed out after %d seconds; "
558			    "reconnecting", is->is_timeout);
559			reconnect_needed = true;
560		}
561		goto out;
562	}
563
564	if (ping_timeout <= 0) {
565		/*
566		 * Pings are disabled.  Don't send NOP-Out in this case.
567		 * Reset the timeout, to avoid triggering reconnection,
568		 * should the user decide to reenable them.
569		 */
570		is->is_timeout = 0;
571		goto out;
572	}
573
574	if (is->is_timeout >= ping_timeout) {
575		ISCSI_SESSION_WARN(is, "no ping reply (NOP-In) after %d seconds; "
576		    "reconnecting", ping_timeout);
577		reconnect_needed = true;
578		goto out;
579	}
580
581	ISCSI_SESSION_UNLOCK(is);
582
583	/*
584	 * If the ping was reset less than one second ago - which means
585	 * that we've received some PDU during the last second - assume
586	 * the traffic flows correctly and don't bother sending a NOP-Out.
587	 *
588	 * (It's 2 - one for one second, and one for incrementing is_timeout
589	 * earlier in this routine.)
590	 */
591	if (is->is_timeout < 2)
592		return;
593
594	request = icl_pdu_new_bhs(is->is_conn, M_NOWAIT);
595	if (request == NULL) {
596		ISCSI_SESSION_WARN(is, "failed to allocate PDU");
597		return;
598	}
599	bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs;
600	bhsno->bhsno_opcode = ISCSI_BHS_OPCODE_NOP_OUT |
601	    ISCSI_BHS_OPCODE_IMMEDIATE;
602	bhsno->bhsno_flags = 0x80;
603	bhsno->bhsno_target_transfer_tag = 0xffffffff;
604	iscsi_pdu_queue(request);
605	return;
606
607out:
608	ISCSI_SESSION_UNLOCK(is);
609
610	if (reconnect_needed)
611		iscsi_session_reconnect(is);
612}
613
614static void
615iscsi_pdu_update_statsn(const struct icl_pdu *response)
616{
617	const struct iscsi_bhs_data_in *bhsdi;
618	struct iscsi_session *is;
619	uint32_t expcmdsn, maxcmdsn;
620
621	is = PDU_SESSION(response);
622
623	ISCSI_SESSION_LOCK_ASSERT(is);
624
625	/*
626	 * We're only using fields common for all the response
627	 * (target -> initiator) PDUs.
628	 */
629	bhsdi = (const struct iscsi_bhs_data_in *)response->ip_bhs;
630	/*
631	 * Ok, I lied.  In case of Data-In, "The fields StatSN, Status,
632	 * and Residual Count only have meaningful content if the S bit
633	 * is set to 1", so we also need to check the bit specific for
634	 * Data-In PDU.
635	 */
636	if (bhsdi->bhsdi_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_IN ||
637	    (bhsdi->bhsdi_flags & BHSDI_FLAGS_S) != 0) {
638		if (ntohl(bhsdi->bhsdi_statsn) < is->is_statsn) {
639			ISCSI_SESSION_WARN(is,
640			    "PDU StatSN %d >= session StatSN %d, opcode 0x%x",
641			    is->is_statsn, ntohl(bhsdi->bhsdi_statsn),
642			    bhsdi->bhsdi_opcode);
643		}
644		is->is_statsn = ntohl(bhsdi->bhsdi_statsn);
645	}
646
647	expcmdsn = ntohl(bhsdi->bhsdi_expcmdsn);
648	maxcmdsn = ntohl(bhsdi->bhsdi_maxcmdsn);
649
650	/*
651	 * XXX: Compare using Serial Arithmetic Sense.
652	 */
653	if (maxcmdsn + 1 < expcmdsn) {
654		ISCSI_SESSION_DEBUG(is, "PDU MaxCmdSN %d + 1 < PDU ExpCmdSN %d; ignoring",
655		    maxcmdsn, expcmdsn);
656	} else {
657		if (maxcmdsn > is->is_maxcmdsn) {
658			is->is_maxcmdsn = maxcmdsn;
659
660			/*
661			 * Command window increased; kick the maintanance thread
662			 * to send out postponed commands.
663			 */
664			if (!STAILQ_EMPTY(&is->is_postponed))
665				cv_signal(&is->is_maintenance_cv);
666		} else if (maxcmdsn < is->is_maxcmdsn) {
667			ISCSI_SESSION_DEBUG(is, "PDU MaxCmdSN %d < session MaxCmdSN %d; ignoring",
668			    maxcmdsn, is->is_maxcmdsn);
669		}
670
671		if (expcmdsn > is->is_expcmdsn) {
672			is->is_expcmdsn = expcmdsn;
673		} else if (expcmdsn < is->is_expcmdsn) {
674			ISCSI_SESSION_DEBUG(is, "PDU ExpCmdSN %d < session ExpCmdSN %d; ignoring",
675			    expcmdsn, is->is_expcmdsn);
676		}
677	}
678
679	/*
680	 * Every incoming PDU - not just NOP-In - resets the ping timer.
681	 * The purpose of the timeout is to reset the connection when it stalls;
682	 * we don't want this to happen when NOP-In or NOP-Out ends up delayed
683	 * in some queue.
684	 */
685	is->is_timeout = 0;
686}
687
688static void
689iscsi_receive_callback(struct icl_pdu *response)
690{
691	struct iscsi_session *is;
692
693	is = PDU_SESSION(response);
694
695	ISCSI_SESSION_LOCK(is);
696
697#ifdef ICL_KERNEL_PROXY
698	if (is->is_login_phase) {
699		if (is->is_login_pdu == NULL)
700			is->is_login_pdu = response;
701		else
702			icl_pdu_free(response);
703		ISCSI_SESSION_UNLOCK(is);
704		cv_signal(&is->is_login_cv);
705		return;
706	}
707#endif
708
709	iscsi_pdu_update_statsn(response);
710
711	/*
712	 * The handling routine is responsible for freeing the PDU
713	 * when it's no longer needed.
714	 */
715	switch (response->ip_bhs->bhs_opcode) {
716	case ISCSI_BHS_OPCODE_NOP_IN:
717		iscsi_pdu_handle_nop_in(response);
718		break;
719	case ISCSI_BHS_OPCODE_SCSI_RESPONSE:
720		iscsi_pdu_handle_scsi_response(response);
721		break;
722	case ISCSI_BHS_OPCODE_TASK_RESPONSE:
723		iscsi_pdu_handle_task_response(response);
724		break;
725	case ISCSI_BHS_OPCODE_SCSI_DATA_IN:
726		iscsi_pdu_handle_data_in(response);
727		break;
728	case ISCSI_BHS_OPCODE_LOGOUT_RESPONSE:
729		iscsi_pdu_handle_logout_response(response);
730		break;
731	case ISCSI_BHS_OPCODE_R2T:
732		iscsi_pdu_handle_r2t(response);
733		break;
734	case ISCSI_BHS_OPCODE_ASYNC_MESSAGE:
735		iscsi_pdu_handle_async_message(response);
736		break;
737	case ISCSI_BHS_OPCODE_REJECT:
738		iscsi_pdu_handle_reject(response);
739		break;
740	default:
741		ISCSI_SESSION_WARN(is, "received PDU with unsupported "
742		    "opcode 0x%x; reconnecting",
743		    response->ip_bhs->bhs_opcode);
744		iscsi_session_reconnect(is);
745		icl_pdu_free(response);
746	}
747
748	ISCSI_SESSION_UNLOCK(is);
749}
750
751static void
752iscsi_error_callback(struct icl_conn *ic)
753{
754	struct iscsi_session *is;
755
756	is = CONN_SESSION(ic);
757
758	ISCSI_SESSION_WARN(is, "connection error; reconnecting");
759	iscsi_session_reconnect(is);
760}
761
762static void
763iscsi_pdu_handle_nop_in(struct icl_pdu *response)
764{
765	struct iscsi_session *is;
766	struct iscsi_bhs_nop_out *bhsno;
767	struct iscsi_bhs_nop_in *bhsni;
768	struct icl_pdu *request;
769	void *data = NULL;
770	size_t datasize;
771	int error;
772
773	is = PDU_SESSION(response);
774	bhsni = (struct iscsi_bhs_nop_in *)response->ip_bhs;
775
776	if (bhsni->bhsni_target_transfer_tag == 0xffffffff) {
777		/*
778		 * Nothing to do; iscsi_pdu_update_statsn() already
779		 * zeroed the timeout.
780		 */
781		icl_pdu_free(response);
782		return;
783	}
784
785	datasize = icl_pdu_data_segment_length(response);
786	if (datasize > 0) {
787		data = malloc(datasize, M_ISCSI, M_NOWAIT | M_ZERO);
788		if (data == NULL) {
789			ISCSI_SESSION_WARN(is, "failed to allocate memory; "
790			    "reconnecting");
791			icl_pdu_free(response);
792			iscsi_session_reconnect(is);
793			return;
794		}
795		icl_pdu_get_data(response, 0, data, datasize);
796	}
797
798	request = icl_pdu_new_bhs(response->ip_conn, M_NOWAIT);
799	if (request == NULL) {
800		ISCSI_SESSION_WARN(is, "failed to allocate memory; "
801		    "reconnecting");
802		free(data, M_ISCSI);
803		icl_pdu_free(response);
804		iscsi_session_reconnect(is);
805		return;
806	}
807	bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs;
808	bhsno->bhsno_opcode = ISCSI_BHS_OPCODE_NOP_OUT |
809	    ISCSI_BHS_OPCODE_IMMEDIATE;
810	bhsno->bhsno_flags = 0x80;
811	bhsno->bhsno_initiator_task_tag = 0xffffffff;
812	bhsno->bhsno_target_transfer_tag = bhsni->bhsni_target_transfer_tag;
813	if (datasize > 0) {
814		error = icl_pdu_append_data(request, data, datasize, M_NOWAIT);
815		if (error != 0) {
816			ISCSI_SESSION_WARN(is, "failed to allocate memory; "
817			    "reconnecting");
818			free(data, M_ISCSI);
819			icl_pdu_free(request);
820			icl_pdu_free(response);
821			iscsi_session_reconnect(is);
822			return;
823		}
824		free(data, M_ISCSI);
825	}
826
827	icl_pdu_free(response);
828	iscsi_pdu_queue_locked(request);
829}
830
831static void
832iscsi_pdu_handle_scsi_response(struct icl_pdu *response)
833{
834	struct iscsi_bhs_scsi_response *bhssr;
835	struct iscsi_outstanding *io;
836	struct iscsi_session *is;
837	struct ccb_scsiio *csio;
838	size_t data_segment_len;
839	uint16_t sense_len;
840
841	is = PDU_SESSION(response);
842
843	bhssr = (struct iscsi_bhs_scsi_response *)response->ip_bhs;
844	io = iscsi_outstanding_find(is, bhssr->bhssr_initiator_task_tag);
845	if (io == NULL || io->io_ccb == NULL) {
846		ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhssr->bhssr_initiator_task_tag);
847		icl_pdu_free(response);
848		iscsi_session_reconnect(is);
849		return;
850	}
851
852	if (bhssr->bhssr_response != BHSSR_RESPONSE_COMMAND_COMPLETED) {
853		ISCSI_SESSION_WARN(is, "service response 0x%x", bhssr->bhssr_response);
854 		if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
855 			xpt_freeze_devq(io->io_ccb->ccb_h.path, 1);
856			ISCSI_SESSION_DEBUG(is, "freezing devq");
857		}
858 		io->io_ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN;
859	} else if (bhssr->bhssr_status == 0) {
860		io->io_ccb->ccb_h.status = CAM_REQ_CMP;
861	} else {
862 		if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
863 			xpt_freeze_devq(io->io_ccb->ccb_h.path, 1);
864			ISCSI_SESSION_DEBUG(is, "freezing devq");
865		}
866 		io->io_ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_DEV_QFRZN;
867		io->io_ccb->csio.scsi_status = bhssr->bhssr_status;
868	}
869
870	if (bhssr->bhssr_flags & BHSSR_FLAGS_RESIDUAL_OVERFLOW) {
871		ISCSI_SESSION_WARN(is, "target indicated residual overflow");
872		icl_pdu_free(response);
873		iscsi_session_reconnect(is);
874		return;
875	}
876
877	csio = &io->io_ccb->csio;
878
879	data_segment_len = icl_pdu_data_segment_length(response);
880	if (data_segment_len > 0) {
881		if (data_segment_len < sizeof(sense_len)) {
882			ISCSI_SESSION_WARN(is, "truncated data segment (%zd bytes)",
883			    data_segment_len);
884			if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
885				xpt_freeze_devq(io->io_ccb->ccb_h.path, 1);
886				ISCSI_SESSION_DEBUG(is, "freezing devq");
887			}
888			io->io_ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN;
889			goto out;
890		}
891		icl_pdu_get_data(response, 0, &sense_len, sizeof(sense_len));
892		sense_len = ntohs(sense_len);
893#if 0
894		ISCSI_SESSION_DEBUG(is, "sense_len %d, data len %zd",
895		    sense_len, data_segment_len);
896#endif
897		if (sizeof(sense_len) + sense_len > data_segment_len) {
898			ISCSI_SESSION_WARN(is, "truncated data segment "
899			    "(%zd bytes, should be %zd)",
900			    data_segment_len, sizeof(sense_len) + sense_len);
901			if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
902				xpt_freeze_devq(io->io_ccb->ccb_h.path, 1);
903				ISCSI_SESSION_DEBUG(is, "freezing devq");
904			}
905			io->io_ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN;
906			goto out;
907		} else if (sizeof(sense_len) + sense_len < data_segment_len)
908			ISCSI_SESSION_WARN(is, "oversize data segment "
909			    "(%zd bytes, should be %zd)",
910			    data_segment_len, sizeof(sense_len) + sense_len);
911		if (sense_len > csio->sense_len) {
912			ISCSI_SESSION_DEBUG(is, "truncating sense from %d to %d",
913			    sense_len, csio->sense_len);
914			sense_len = csio->sense_len;
915		}
916		icl_pdu_get_data(response, sizeof(sense_len), &csio->sense_data, sense_len);
917		csio->sense_resid = csio->sense_len - sense_len;
918		io->io_ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
919	}
920
921out:
922	if (bhssr->bhssr_flags & BHSSR_FLAGS_RESIDUAL_UNDERFLOW)
923		csio->resid = ntohl(bhssr->bhssr_residual_count);
924
925	if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
926		KASSERT(io->io_received <= csio->dxfer_len,
927		    ("io->io_received > csio->dxfer_len"));
928		if (io->io_received < csio->dxfer_len) {
929			if (csio->resid != csio->dxfer_len - io->io_received) {
930				ISCSI_SESSION_WARN(is, "underflow mismatch: "
931				    "target indicates %d, we calculated %zd",
932				    csio->resid,
933				    csio->dxfer_len - io->io_received);
934			}
935			csio->resid = csio->dxfer_len - io->io_received;
936		}
937	}
938
939	xpt_done(io->io_ccb);
940	iscsi_outstanding_remove(is, io);
941	icl_pdu_free(response);
942}
943
944static void
945iscsi_pdu_handle_task_response(struct icl_pdu *response)
946{
947	struct iscsi_bhs_task_management_response *bhstmr;
948	struct iscsi_outstanding *io, *aio;
949	struct iscsi_session *is;
950
951	is = PDU_SESSION(response);
952
953	bhstmr = (struct iscsi_bhs_task_management_response *)response->ip_bhs;
954	io = iscsi_outstanding_find(is, bhstmr->bhstmr_initiator_task_tag);
955	if (io == NULL || io->io_ccb != NULL) {
956		ISCSI_SESSION_WARN(is, "bad itt 0x%x",
957		    bhstmr->bhstmr_initiator_task_tag);
958		icl_pdu_free(response);
959		iscsi_session_reconnect(is);
960		return;
961	}
962
963	if (bhstmr->bhstmr_response != BHSTMR_RESPONSE_FUNCTION_COMPLETE) {
964		ISCSI_SESSION_WARN(is, "task response 0x%x",
965		    bhstmr->bhstmr_response);
966	} else {
967		aio = iscsi_outstanding_find(is, io->io_datasn);
968		if (aio != NULL && aio->io_ccb != NULL)
969			iscsi_session_terminate_task(is, aio, false);
970	}
971
972	iscsi_outstanding_remove(is, io);
973	icl_pdu_free(response);
974}
975
976static void
977iscsi_pdu_handle_data_in(struct icl_pdu *response)
978{
979	struct iscsi_bhs_data_in *bhsdi;
980	struct iscsi_outstanding *io;
981	struct iscsi_session *is;
982	struct ccb_scsiio *csio;
983	size_t data_segment_len;
984
985	is = PDU_SESSION(response);
986	bhsdi = (struct iscsi_bhs_data_in *)response->ip_bhs;
987	io = iscsi_outstanding_find(is, bhsdi->bhsdi_initiator_task_tag);
988	if (io == NULL || io->io_ccb == NULL) {
989		ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhsdi->bhsdi_initiator_task_tag);
990		icl_pdu_free(response);
991		iscsi_session_reconnect(is);
992		return;
993	}
994
995	data_segment_len = icl_pdu_data_segment_length(response);
996	if (data_segment_len == 0) {
997		/*
998		 * "The sending of 0 length data segments should be avoided,
999		 * but initiators and targets MUST be able to properly receive
1000		 * 0 length data segments."
1001		 */
1002		icl_pdu_free(response);
1003		return;
1004	}
1005
1006	/*
1007	 * We need to track this for security reasons - without it, malicious target
1008	 * could respond to SCSI READ without sending Data-In PDUs, which would result
1009	 * in read operation on the initiator side returning random kernel data.
1010	 */
1011	if (ntohl(bhsdi->bhsdi_buffer_offset) != io->io_received) {
1012		ISCSI_SESSION_WARN(is, "data out of order; expected offset %zd, got %zd",
1013		    io->io_received, (size_t)ntohl(bhsdi->bhsdi_buffer_offset));
1014		icl_pdu_free(response);
1015		iscsi_session_reconnect(is);
1016		return;
1017	}
1018
1019	csio = &io->io_ccb->csio;
1020
1021	if (io->io_received + data_segment_len > csio->dxfer_len) {
1022		ISCSI_SESSION_WARN(is, "oversize data segment (%zd bytes "
1023		    "at offset %zd, buffer is %d)",
1024		    data_segment_len, io->io_received, csio->dxfer_len);
1025		icl_pdu_free(response);
1026		iscsi_session_reconnect(is);
1027		return;
1028	}
1029
1030	icl_pdu_get_data(response, 0, csio->data_ptr + io->io_received, data_segment_len);
1031	io->io_received += data_segment_len;
1032
1033	/*
1034	 * XXX: Check DataSN.
1035	 * XXX: Check F.
1036	 */
1037	if ((bhsdi->bhsdi_flags & BHSDI_FLAGS_S) == 0) {
1038		/*
1039		 * Nothing more to do.
1040		 */
1041		icl_pdu_free(response);
1042		return;
1043	}
1044
1045	//ISCSI_SESSION_DEBUG(is, "got S flag; status 0x%x", bhsdi->bhsdi_status);
1046	if (bhsdi->bhsdi_status == 0) {
1047		io->io_ccb->ccb_h.status = CAM_REQ_CMP;
1048	} else {
1049		if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
1050			xpt_freeze_devq(io->io_ccb->ccb_h.path, 1);
1051			ISCSI_SESSION_DEBUG(is, "freezing devq");
1052		}
1053		io->io_ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_DEV_QFRZN;
1054		csio->scsi_status = bhsdi->bhsdi_status;
1055	}
1056
1057	if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
1058		KASSERT(io->io_received <= csio->dxfer_len,
1059		    ("io->io_received > csio->dxfer_len"));
1060		if (io->io_received < csio->dxfer_len) {
1061			csio->resid = ntohl(bhsdi->bhsdi_residual_count);
1062			if (csio->resid != csio->dxfer_len - io->io_received) {
1063				ISCSI_SESSION_WARN(is, "underflow mismatch: "
1064				    "target indicates %d, we calculated %zd",
1065				    csio->resid,
1066				    csio->dxfer_len - io->io_received);
1067			}
1068			csio->resid = csio->dxfer_len - io->io_received;
1069		}
1070	}
1071
1072	xpt_done(io->io_ccb);
1073	iscsi_outstanding_remove(is, io);
1074	icl_pdu_free(response);
1075}
1076
1077static void
1078iscsi_pdu_handle_logout_response(struct icl_pdu *response)
1079{
1080
1081	ISCSI_SESSION_DEBUG(PDU_SESSION(response), "logout response");
1082	icl_pdu_free(response);
1083}
1084
1085static void
1086iscsi_pdu_handle_r2t(struct icl_pdu *response)
1087{
1088	struct icl_pdu *request;
1089	struct iscsi_session *is;
1090	struct iscsi_bhs_r2t *bhsr2t;
1091	struct iscsi_bhs_data_out *bhsdo;
1092	struct iscsi_outstanding *io;
1093	struct ccb_scsiio *csio;
1094	size_t off, len, total_len;
1095	int error;
1096
1097	is = PDU_SESSION(response);
1098
1099	bhsr2t = (struct iscsi_bhs_r2t *)response->ip_bhs;
1100	io = iscsi_outstanding_find(is, bhsr2t->bhsr2t_initiator_task_tag);
1101	if (io == NULL || io->io_ccb == NULL) {
1102		ISCSI_SESSION_WARN(is, "bad itt 0x%x; reconnecting",
1103		    bhsr2t->bhsr2t_initiator_task_tag);
1104		icl_pdu_free(response);
1105		iscsi_session_reconnect(is);
1106		return;
1107	}
1108
1109	csio = &io->io_ccb->csio;
1110
1111	if ((csio->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_OUT) {
1112		ISCSI_SESSION_WARN(is, "received R2T for read command; reconnecting");
1113		icl_pdu_free(response);
1114		iscsi_session_reconnect(is);
1115		return;
1116	}
1117
1118	/*
1119	 * XXX: Verify R2TSN.
1120	 */
1121
1122	io->io_datasn = 0;
1123
1124	off = ntohl(bhsr2t->bhsr2t_buffer_offset);
1125	if (off > csio->dxfer_len) {
1126		ISCSI_SESSION_WARN(is, "target requested invalid offset "
1127		    "%zd, buffer is is %d; reconnecting", off, csio->dxfer_len);
1128		icl_pdu_free(response);
1129		iscsi_session_reconnect(is);
1130		return;
1131	}
1132
1133	total_len = ntohl(bhsr2t->bhsr2t_desired_data_transfer_length);
1134	if (total_len == 0 || total_len > csio->dxfer_len) {
1135		ISCSI_SESSION_WARN(is, "target requested invalid length "
1136		    "%zd, buffer is %d; reconnecting", total_len, csio->dxfer_len);
1137		icl_pdu_free(response);
1138		iscsi_session_reconnect(is);
1139		return;
1140	}
1141
1142	//ISCSI_SESSION_DEBUG(is, "r2t; off %zd, len %zd", off, total_len);
1143
1144	for (;;) {
1145		len = total_len;
1146
1147		if (len > is->is_max_data_segment_length)
1148			len = is->is_max_data_segment_length;
1149
1150		if (off + len > csio->dxfer_len) {
1151			ISCSI_SESSION_WARN(is, "target requested invalid "
1152			    "length/offset %zd, buffer is %d; reconnecting",
1153			    off + len, csio->dxfer_len);
1154			icl_pdu_free(response);
1155			iscsi_session_reconnect(is);
1156			return;
1157		}
1158
1159		request = icl_pdu_new_bhs(response->ip_conn, M_NOWAIT);
1160		if (request == NULL) {
1161			icl_pdu_free(response);
1162			iscsi_session_reconnect(is);
1163			return;
1164		}
1165
1166		bhsdo = (struct iscsi_bhs_data_out *)request->ip_bhs;
1167		bhsdo->bhsdo_opcode = ISCSI_BHS_OPCODE_SCSI_DATA_OUT;
1168		bhsdo->bhsdo_lun = bhsr2t->bhsr2t_lun;
1169		bhsdo->bhsdo_initiator_task_tag =
1170		    bhsr2t->bhsr2t_initiator_task_tag;
1171		bhsdo->bhsdo_target_transfer_tag =
1172		    bhsr2t->bhsr2t_target_transfer_tag;
1173		bhsdo->bhsdo_datasn = htonl(io->io_datasn++);
1174		bhsdo->bhsdo_buffer_offset = htonl(off);
1175		error = icl_pdu_append_data(request, csio->data_ptr + off, len,
1176		    M_NOWAIT);
1177		if (error != 0) {
1178			ISCSI_SESSION_WARN(is, "failed to allocate memory; "
1179			    "reconnecting");
1180			icl_pdu_free(request);
1181			icl_pdu_free(response);
1182			iscsi_session_reconnect(is);
1183			return;
1184		}
1185
1186		off += len;
1187		total_len -= len;
1188
1189		if (total_len == 0) {
1190			bhsdo->bhsdo_flags |= BHSDO_FLAGS_F;
1191			//ISCSI_SESSION_DEBUG(is, "setting F, off %zd", off);
1192		} else {
1193			//ISCSI_SESSION_DEBUG(is, "not finished, off %zd", off);
1194		}
1195
1196		iscsi_pdu_queue_locked(request);
1197
1198		if (total_len == 0)
1199			break;
1200	}
1201
1202	icl_pdu_free(response);
1203}
1204
1205static void
1206iscsi_pdu_handle_async_message(struct icl_pdu *response)
1207{
1208	struct iscsi_bhs_asynchronous_message *bhsam;
1209	struct iscsi_session *is;
1210
1211	is = PDU_SESSION(response);
1212	bhsam = (struct iscsi_bhs_asynchronous_message *)response->ip_bhs;
1213	switch (bhsam->bhsam_async_event) {
1214	case BHSAM_EVENT_TARGET_REQUESTS_LOGOUT:
1215		ISCSI_SESSION_WARN(is, "target requests logout; removing session");
1216		iscsi_session_logout(is);
1217		iscsi_session_terminate(is);
1218		break;
1219	case BHSAM_EVENT_TARGET_TERMINATES_CONNECTION:
1220		ISCSI_SESSION_WARN(is, "target indicates it will drop drop the connection");
1221		break;
1222	case BHSAM_EVENT_TARGET_TERMINATES_SESSION:
1223		ISCSI_SESSION_WARN(is, "target indicates it will drop drop the session");
1224		break;
1225	default:
1226		/*
1227		 * XXX: Technically, we're obligated to also handle
1228		 * 	parameter renegotiation.
1229		 */
1230		ISCSI_SESSION_WARN(is, "ignoring AsyncEvent %d", bhsam->bhsam_async_event);
1231		break;
1232	}
1233
1234	icl_pdu_free(response);
1235}
1236
1237static void
1238iscsi_pdu_handle_reject(struct icl_pdu *response)
1239{
1240	struct iscsi_bhs_reject *bhsr;
1241	struct iscsi_session *is;
1242
1243	is = PDU_SESSION(response);
1244	bhsr = (struct iscsi_bhs_reject *)response->ip_bhs;
1245	ISCSI_SESSION_WARN(is, "received Reject PDU, reason 0x%x; protocol error?",
1246	    bhsr->bhsr_reason);
1247
1248	icl_pdu_free(response);
1249}
1250
1251static int
1252iscsi_ioctl_daemon_wait(struct iscsi_softc *sc,
1253    struct iscsi_daemon_request *request)
1254{
1255	struct iscsi_session *is;
1256	int error;
1257
1258	sx_slock(&sc->sc_lock);
1259	for (;;) {
1260		TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
1261			ISCSI_SESSION_LOCK(is);
1262			if (is->is_waiting_for_iscsid)
1263				break;
1264			ISCSI_SESSION_UNLOCK(is);
1265		}
1266
1267		if (is == NULL) {
1268			/*
1269			 * No session requires attention from iscsid(8); wait.
1270			 */
1271			error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock);
1272			if (error != 0) {
1273				sx_sunlock(&sc->sc_lock);
1274				return (error);
1275			}
1276			continue;
1277		}
1278
1279		is->is_waiting_for_iscsid = false;
1280		is->is_login_phase = true;
1281		is->is_reason[0] = '\0';
1282		ISCSI_SESSION_UNLOCK(is);
1283
1284		request->idr_session_id = is->is_id;
1285		memcpy(&request->idr_isid, &is->is_isid,
1286		    sizeof(request->idr_isid));
1287		request->idr_tsih = 0;	/* New or reinstated session. */
1288		memcpy(&request->idr_conf, &is->is_conf,
1289		    sizeof(request->idr_conf));
1290
1291		sx_sunlock(&sc->sc_lock);
1292		return (0);
1293	}
1294}
1295
1296static int
1297iscsi_ioctl_daemon_handoff(struct iscsi_softc *sc,
1298    struct iscsi_daemon_handoff *handoff)
1299{
1300	struct iscsi_session *is;
1301	int error;
1302
1303	sx_slock(&sc->sc_lock);
1304
1305	/*
1306	 * Find the session to hand off socket to.
1307	 */
1308	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
1309		if (is->is_id == handoff->idh_session_id)
1310			break;
1311	}
1312	if (is == NULL) {
1313		sx_sunlock(&sc->sc_lock);
1314		return (ESRCH);
1315	}
1316	ISCSI_SESSION_LOCK(is);
1317	if (is->is_conf.isc_discovery || is->is_terminating) {
1318		ISCSI_SESSION_UNLOCK(is);
1319		sx_sunlock(&sc->sc_lock);
1320		return (EINVAL);
1321	}
1322	if (is->is_connected) {
1323		/*
1324		 * This might have happened because another iscsid(8)
1325		 * instance handed off the connection in the meantime.
1326		 * Just return.
1327		 */
1328		ISCSI_SESSION_WARN(is, "handoff on already connected "
1329		    "session");
1330		ISCSI_SESSION_UNLOCK(is);
1331		sx_sunlock(&sc->sc_lock);
1332		return (EBUSY);
1333	}
1334
1335	strlcpy(is->is_target_alias, handoff->idh_target_alias,
1336	    sizeof(is->is_target_alias));
1337	is->is_tsih = handoff->idh_tsih;
1338	is->is_statsn = handoff->idh_statsn;
1339	is->is_initial_r2t = handoff->idh_initial_r2t;
1340	is->is_immediate_data = handoff->idh_immediate_data;
1341	is->is_max_data_segment_length = handoff->idh_max_data_segment_length;
1342	is->is_max_burst_length = handoff->idh_max_burst_length;
1343	is->is_first_burst_length = handoff->idh_first_burst_length;
1344
1345	if (handoff->idh_header_digest == ISCSI_DIGEST_CRC32C)
1346		is->is_conn->ic_header_crc32c = true;
1347	else
1348		is->is_conn->ic_header_crc32c = false;
1349	if (handoff->idh_data_digest == ISCSI_DIGEST_CRC32C)
1350		is->is_conn->ic_data_crc32c = true;
1351	else
1352		is->is_conn->ic_data_crc32c = false;
1353
1354	is->is_cmdsn = 0;
1355	is->is_expcmdsn = 0;
1356	is->is_maxcmdsn = 0;
1357	is->is_waiting_for_iscsid = false;
1358	is->is_login_phase = false;
1359	is->is_timeout = 0;
1360	is->is_connected = true;
1361	is->is_reason[0] = '\0';
1362
1363	ISCSI_SESSION_UNLOCK(is);
1364
1365#ifdef ICL_KERNEL_PROXY
1366	if (handoff->idh_socket != 0) {
1367#endif
1368		/*
1369		 * Handoff without using ICL proxy.
1370		 */
1371		error = icl_conn_handoff(is->is_conn, handoff->idh_socket);
1372		if (error != 0) {
1373			sx_sunlock(&sc->sc_lock);
1374			iscsi_session_terminate(is);
1375			return (error);
1376		}
1377#ifdef ICL_KERNEL_PROXY
1378	}
1379#endif
1380
1381	sx_sunlock(&sc->sc_lock);
1382
1383	if (is->is_sim != NULL) {
1384		/*
1385		 * When reconnecting, there already is SIM allocated for the session.
1386		 */
1387		KASSERT(is->is_simq_frozen, ("reconnect without frozen simq"));
1388		ISCSI_SESSION_LOCK(is);
1389		ISCSI_SESSION_DEBUG(is, "releasing");
1390		xpt_release_simq(is->is_sim, 1);
1391		is->is_simq_frozen = false;
1392		ISCSI_SESSION_UNLOCK(is);
1393
1394	} else {
1395		ISCSI_SESSION_LOCK(is);
1396		is->is_devq = cam_simq_alloc(maxtags);
1397		if (is->is_devq == NULL) {
1398			ISCSI_SESSION_WARN(is, "failed to allocate simq");
1399			iscsi_session_terminate(is);
1400			return (ENOMEM);
1401		}
1402
1403		is->is_sim = cam_sim_alloc(iscsi_action, iscsi_poll, "iscsi",
1404		    is, is->is_id /* unit */, &is->is_lock,
1405		    1, maxtags, is->is_devq);
1406		if (is->is_sim == NULL) {
1407			ISCSI_SESSION_UNLOCK(is);
1408			ISCSI_SESSION_WARN(is, "failed to allocate SIM");
1409			cam_simq_free(is->is_devq);
1410			iscsi_session_terminate(is);
1411			return (ENOMEM);
1412		}
1413
1414		error = xpt_bus_register(is->is_sim, NULL, 0);
1415		if (error != 0) {
1416			ISCSI_SESSION_UNLOCK(is);
1417			ISCSI_SESSION_WARN(is, "failed to register bus");
1418			iscsi_session_terminate(is);
1419			return (ENOMEM);
1420		}
1421
1422		error = xpt_create_path(&is->is_path, /*periph*/NULL,
1423		    cam_sim_path(is->is_sim), CAM_TARGET_WILDCARD,
1424		    CAM_LUN_WILDCARD);
1425		if (error != CAM_REQ_CMP) {
1426			ISCSI_SESSION_UNLOCK(is);
1427			ISCSI_SESSION_WARN(is, "failed to create path");
1428			iscsi_session_terminate(is);
1429			return (ENOMEM);
1430		}
1431		ISCSI_SESSION_UNLOCK(is);
1432	}
1433
1434	return (0);
1435}
1436
1437static int
1438iscsi_ioctl_daemon_fail(struct iscsi_softc *sc,
1439    struct iscsi_daemon_fail *fail)
1440{
1441	struct iscsi_session *is;
1442
1443	sx_slock(&sc->sc_lock);
1444
1445	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
1446		if (is->is_id == fail->idf_session_id)
1447			break;
1448	}
1449	if (is == NULL) {
1450		sx_sunlock(&sc->sc_lock);
1451		return (ESRCH);
1452	}
1453	ISCSI_SESSION_LOCK(is);
1454	ISCSI_SESSION_DEBUG(is, "iscsid(8) failed: %s",
1455	    fail->idf_reason);
1456	strlcpy(is->is_reason, fail->idf_reason, sizeof(is->is_reason));
1457	//is->is_waiting_for_iscsid = false;
1458	//is->is_login_phase = true;
1459	//iscsi_session_reconnect(is);
1460	ISCSI_SESSION_UNLOCK(is);
1461	sx_sunlock(&sc->sc_lock);
1462
1463	return (0);
1464}
1465
1466#ifdef ICL_KERNEL_PROXY
1467static int
1468iscsi_ioctl_daemon_connect(struct iscsi_softc *sc,
1469    struct iscsi_daemon_connect *idc)
1470{
1471	struct iscsi_session *is;
1472	struct sockaddr *from_sa, *to_sa;
1473	int error;
1474
1475	sx_slock(&sc->sc_lock);
1476	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
1477		if (is->is_id == idc->idc_session_id)
1478			break;
1479	}
1480	if (is == NULL) {
1481		sx_sunlock(&sc->sc_lock);
1482		return (ESRCH);
1483	}
1484	sx_sunlock(&sc->sc_lock);
1485
1486	if (idc->idc_from_addrlen > 0) {
1487		error = getsockaddr(&from_sa, (void *)idc->idc_from_addr, idc->idc_from_addrlen);
1488		if (error != 0) {
1489			ISCSI_SESSION_WARN(is,
1490			    "getsockaddr failed with error %d", error);
1491			return (error);
1492		}
1493	} else {
1494		from_sa = NULL;
1495	}
1496	error = getsockaddr(&to_sa, (void *)idc->idc_to_addr, idc->idc_to_addrlen);
1497	if (error != 0) {
1498		ISCSI_SESSION_WARN(is, "getsockaddr failed with error %d",
1499		    error);
1500		free(from_sa, M_SONAME);
1501		return (error);
1502	}
1503
1504	ISCSI_SESSION_LOCK(is);
1505	is->is_waiting_for_iscsid = false;
1506	is->is_login_phase = true;
1507	is->is_timeout = 0;
1508	ISCSI_SESSION_UNLOCK(is);
1509
1510	error = icl_conn_connect(is->is_conn, idc->idc_iser, idc->idc_domain,
1511	    idc->idc_socktype, idc->idc_protocol, from_sa, to_sa);
1512	free(from_sa, M_SONAME);
1513	free(to_sa, M_SONAME);
1514
1515	/*
1516	 * Digests are always disabled during login phase.
1517	 */
1518	is->is_conn->ic_header_crc32c = false;
1519	is->is_conn->ic_data_crc32c = false;
1520
1521	return (error);
1522}
1523
1524static int
1525iscsi_ioctl_daemon_send(struct iscsi_softc *sc,
1526    struct iscsi_daemon_send *ids)
1527{
1528	struct iscsi_session *is;
1529	struct icl_pdu *ip;
1530	size_t datalen;
1531	void *data;
1532	int error;
1533
1534	sx_slock(&sc->sc_lock);
1535	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
1536		if (is->is_id == ids->ids_session_id)
1537			break;
1538	}
1539	if (is == NULL) {
1540		sx_sunlock(&sc->sc_lock);
1541		return (ESRCH);
1542	}
1543	sx_sunlock(&sc->sc_lock);
1544
1545	if (is->is_login_phase == false)
1546		return (EBUSY);
1547
1548	if (is->is_terminating || is->is_reconnecting)
1549		return (EIO);
1550
1551	datalen = ids->ids_data_segment_len;
1552	if (datalen > ISCSI_MAX_DATA_SEGMENT_LENGTH)
1553		return (EINVAL);
1554	if (datalen > 0) {
1555		data = malloc(datalen, M_ISCSI, M_WAITOK);
1556		error = copyin(ids->ids_data_segment, data, datalen);
1557		if (error != 0) {
1558			free(data, M_ISCSI);
1559			return (error);
1560		}
1561	}
1562
1563	ip = icl_pdu_new_bhs(is->is_conn, M_WAITOK);
1564	memcpy(ip->ip_bhs, ids->ids_bhs, sizeof(*ip->ip_bhs));
1565	if (datalen > 0) {
1566		error = icl_pdu_append_data(ip, data, datalen, M_WAITOK);
1567		KASSERT(error == 0, ("icl_pdu_append_data(..., M_WAITOK) failed"));
1568		free(data, M_ISCSI);
1569	}
1570	icl_pdu_queue(ip);
1571
1572	return (0);
1573}
1574
1575static int
1576iscsi_ioctl_daemon_receive(struct iscsi_softc *sc,
1577    struct iscsi_daemon_receive *idr)
1578{
1579	struct iscsi_session *is;
1580	struct icl_pdu *ip;
1581	void *data;
1582
1583	sx_slock(&sc->sc_lock);
1584	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
1585		if (is->is_id == idr->idr_session_id)
1586			break;
1587	}
1588	if (is == NULL) {
1589		sx_sunlock(&sc->sc_lock);
1590		return (ESRCH);
1591	}
1592	sx_sunlock(&sc->sc_lock);
1593
1594	if (is->is_login_phase == false)
1595		return (EBUSY);
1596
1597	ISCSI_SESSION_LOCK(is);
1598	while (is->is_login_pdu == NULL &&
1599	    is->is_terminating == false &&
1600	    is->is_reconnecting == false)
1601		cv_wait(&is->is_login_cv, &is->is_lock);
1602	if (is->is_terminating || is->is_reconnecting) {
1603		ISCSI_SESSION_UNLOCK(is);
1604		return (EIO);
1605	}
1606	ip = is->is_login_pdu;
1607	is->is_login_pdu = NULL;
1608	ISCSI_SESSION_UNLOCK(is);
1609
1610	if (ip->ip_data_len > idr->idr_data_segment_len) {
1611		icl_pdu_free(ip);
1612		return (EMSGSIZE);
1613	}
1614
1615	copyout(ip->ip_bhs, idr->idr_bhs, sizeof(*ip->ip_bhs));
1616	if (ip->ip_data_len > 0) {
1617		data = malloc(ip->ip_data_len, M_ISCSI, M_WAITOK);
1618		icl_pdu_get_data(ip, 0, data, ip->ip_data_len);
1619		copyout(data, idr->idr_data_segment, ip->ip_data_len);
1620		free(data, M_ISCSI);
1621	}
1622
1623	icl_pdu_free(ip);
1624
1625	return (0);
1626}
1627#endif /* ICL_KERNEL_PROXY */
1628
1629static void
1630iscsi_sanitize_session_conf(struct iscsi_session_conf *isc)
1631{
1632	/*
1633	 * Just make sure all the fields are null-terminated.
1634	 *
1635	 * XXX: This is not particularly secure.  We should
1636	 * 	create our own conf and then copy in relevant
1637	 * 	fields.
1638	 */
1639	isc->isc_initiator[ISCSI_NAME_LEN - 1] = '\0';
1640	isc->isc_initiator_addr[ISCSI_ADDR_LEN - 1] = '\0';
1641	isc->isc_initiator_alias[ISCSI_ALIAS_LEN - 1] = '\0';
1642	isc->isc_target[ISCSI_NAME_LEN - 1] = '\0';
1643	isc->isc_target_addr[ISCSI_ADDR_LEN - 1] = '\0';
1644	isc->isc_user[ISCSI_NAME_LEN - 1] = '\0';
1645	isc->isc_secret[ISCSI_SECRET_LEN - 1] = '\0';
1646	isc->isc_mutual_user[ISCSI_NAME_LEN - 1] = '\0';
1647	isc->isc_mutual_secret[ISCSI_SECRET_LEN - 1] = '\0';
1648}
1649
1650static bool
1651iscsi_valid_session_conf(const struct iscsi_session_conf *isc)
1652{
1653
1654	if (isc->isc_initiator[0] == '\0') {
1655		ISCSI_DEBUG("empty isc_initiator");
1656		return (false);
1657	}
1658
1659	if (isc->isc_target_addr[0] == '\0') {
1660		ISCSI_DEBUG("empty isc_target_addr");
1661		return (false);
1662	}
1663
1664	if (isc->isc_discovery != 0 && isc->isc_target[0] != 0) {
1665		ISCSI_DEBUG("non-empty isc_target for discovery session");
1666		return (false);
1667	}
1668
1669	if (isc->isc_discovery == 0 && isc->isc_target[0] == 0) {
1670		ISCSI_DEBUG("empty isc_target for non-discovery session");
1671		return (false);
1672	}
1673
1674	return (true);
1675}
1676
1677static int
1678iscsi_ioctl_session_add(struct iscsi_softc *sc, struct iscsi_session_add *isa)
1679{
1680	struct iscsi_session *is;
1681	const struct iscsi_session *is2;
1682	int error;
1683
1684	iscsi_sanitize_session_conf(&isa->isa_conf);
1685	if (iscsi_valid_session_conf(&isa->isa_conf) == false)
1686		return (EINVAL);
1687
1688	is = malloc(sizeof(*is), M_ISCSI, M_ZERO | M_WAITOK);
1689	memcpy(&is->is_conf, &isa->isa_conf, sizeof(is->is_conf));
1690
1691	sx_xlock(&sc->sc_lock);
1692
1693	/*
1694	 * Prevent duplicates.
1695	 */
1696	TAILQ_FOREACH(is2, &sc->sc_sessions, is_next) {
1697		if (!!is->is_conf.isc_discovery !=
1698		    !!is2->is_conf.isc_discovery)
1699			continue;
1700
1701		if (strcmp(is->is_conf.isc_target_addr,
1702		    is2->is_conf.isc_target_addr) != 0)
1703			continue;
1704
1705		if (is->is_conf.isc_discovery == 0 &&
1706		    strcmp(is->is_conf.isc_target,
1707		    is2->is_conf.isc_target) != 0)
1708			continue;
1709
1710		sx_xunlock(&sc->sc_lock);
1711		free(is, M_ISCSI);
1712		return (EBUSY);
1713	}
1714
1715	is->is_conn = icl_conn_new("iscsi", &is->is_lock);
1716	is->is_conn->ic_receive = iscsi_receive_callback;
1717	is->is_conn->ic_error = iscsi_error_callback;
1718	is->is_conn->ic_prv0 = is;
1719	TAILQ_INIT(&is->is_outstanding);
1720	STAILQ_INIT(&is->is_postponed);
1721	mtx_init(&is->is_lock, "iscsi_lock", NULL, MTX_DEF);
1722	cv_init(&is->is_maintenance_cv, "iscsi_mt");
1723#ifdef ICL_KERNEL_PROXY
1724	cv_init(&is->is_login_cv, "iscsi_login");
1725#endif
1726
1727	is->is_softc = sc;
1728	sc->sc_last_session_id++;
1729	is->is_id = sc->sc_last_session_id;
1730	is->is_isid[0] = 0x80; /* RFC 3720, 10.12.5: 10b, "Random" ISID. */
1731	arc4rand(&is->is_isid[1], 5, 0);
1732	is->is_tsih = 0;
1733	callout_init(&is->is_callout, 1);
1734	callout_reset(&is->is_callout, 1 * hz, iscsi_callout, is);
1735	TAILQ_INSERT_TAIL(&sc->sc_sessions, is, is_next);
1736
1737	error = kthread_add(iscsi_maintenance_thread, is, NULL, NULL, 0, 0, "iscsimt");
1738	if (error != 0) {
1739		ISCSI_SESSION_WARN(is, "kthread_add(9) failed with error %d", error);
1740		return (error);
1741	}
1742
1743	/*
1744	 * Trigger immediate reconnection.
1745	 */
1746	ISCSI_SESSION_LOCK(is);
1747	is->is_waiting_for_iscsid = true;
1748	strlcpy(is->is_reason, "Waiting for iscsid(8)", sizeof(is->is_reason));
1749	ISCSI_SESSION_UNLOCK(is);
1750	cv_signal(&sc->sc_cv);
1751
1752	sx_xunlock(&sc->sc_lock);
1753
1754	return (0);
1755}
1756
1757static bool
1758iscsi_session_conf_matches(unsigned int id1, const struct iscsi_session_conf *c1,
1759    unsigned int id2, const struct iscsi_session_conf *c2)
1760{
1761	if (id2 == 0 && c2->isc_target[0] == '\0' &&
1762	    c2->isc_target_addr[0] == '\0')
1763		return (true);
1764	if (id2 != 0 && id2 == id1)
1765		return (true);
1766	if (c2->isc_target[0] != '\0' &&
1767	    strcmp(c1->isc_target, c2->isc_target) == 0)
1768		return (true);
1769	if (c2->isc_target_addr[0] != '\0' &&
1770	    strcmp(c1->isc_target_addr, c2->isc_target_addr) == 0)
1771		return (true);
1772	return (false);
1773}
1774
1775static int
1776iscsi_ioctl_session_remove(struct iscsi_softc *sc,
1777    struct iscsi_session_remove *isr)
1778{
1779	struct iscsi_session *is, *tmp;
1780	bool found = false;
1781
1782	iscsi_sanitize_session_conf(&isr->isr_conf);
1783
1784	sx_xlock(&sc->sc_lock);
1785	TAILQ_FOREACH_SAFE(is, &sc->sc_sessions, is_next, tmp) {
1786		ISCSI_SESSION_LOCK(is);
1787		if (iscsi_session_conf_matches(is->is_id, &is->is_conf,
1788		    isr->isr_session_id, &isr->isr_conf)) {
1789			found = true;
1790			iscsi_session_logout(is);
1791			iscsi_session_terminate(is);
1792		}
1793		ISCSI_SESSION_UNLOCK(is);
1794	}
1795	sx_xunlock(&sc->sc_lock);
1796
1797	if (!found)
1798		return (ESRCH);
1799
1800	return (0);
1801}
1802
1803static int
1804iscsi_ioctl_session_list(struct iscsi_softc *sc, struct iscsi_session_list *isl)
1805{
1806	int error;
1807	unsigned int i = 0;
1808	struct iscsi_session *is;
1809	struct iscsi_session_state iss;
1810
1811	sx_slock(&sc->sc_lock);
1812	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
1813		if (i >= isl->isl_nentries) {
1814			sx_sunlock(&sc->sc_lock);
1815			return (EMSGSIZE);
1816		}
1817		memset(&iss, 0, sizeof(iss));
1818		memcpy(&iss.iss_conf, &is->is_conf, sizeof(iss.iss_conf));
1819		iss.iss_id = is->is_id;
1820		strlcpy(iss.iss_target_alias, is->is_target_alias, sizeof(iss.iss_target_alias));
1821		strlcpy(iss.iss_reason, is->is_reason, sizeof(iss.iss_reason));
1822
1823		if (is->is_conn->ic_header_crc32c)
1824			iss.iss_header_digest = ISCSI_DIGEST_CRC32C;
1825		else
1826			iss.iss_header_digest = ISCSI_DIGEST_NONE;
1827
1828		if (is->is_conn->ic_data_crc32c)
1829			iss.iss_data_digest = ISCSI_DIGEST_CRC32C;
1830		else
1831			iss.iss_data_digest = ISCSI_DIGEST_NONE;
1832
1833		iss.iss_max_data_segment_length = is->is_max_data_segment_length;
1834		iss.iss_immediate_data = is->is_immediate_data;
1835		iss.iss_connected = is->is_connected;
1836
1837		error = copyout(&iss, isl->isl_pstates + i, sizeof(iss));
1838		if (error != 0) {
1839			sx_sunlock(&sc->sc_lock);
1840			return (error);
1841		}
1842		i++;
1843	}
1844	sx_sunlock(&sc->sc_lock);
1845
1846	isl->isl_nentries = i;
1847
1848	return (0);
1849}
1850
1851static int
1852iscsi_ioctl_session_modify(struct iscsi_softc *sc,
1853    struct iscsi_session_modify *ism)
1854{
1855	struct iscsi_session *is;
1856
1857	iscsi_sanitize_session_conf(&ism->ism_conf);
1858	if (iscsi_valid_session_conf(&ism->ism_conf) == false)
1859		return (EINVAL);
1860
1861	sx_xlock(&sc->sc_lock);
1862	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
1863		ISCSI_SESSION_LOCK(is);
1864		if (is->is_id == ism->ism_session_id)
1865			break;
1866		ISCSI_SESSION_UNLOCK(is);
1867	}
1868	if (is == NULL) {
1869		sx_xunlock(&sc->sc_lock);
1870		return (ESRCH);
1871	}
1872	sx_xunlock(&sc->sc_lock);
1873
1874	memcpy(&is->is_conf, &ism->ism_conf, sizeof(is->is_conf));
1875	ISCSI_SESSION_UNLOCK(is);
1876
1877	iscsi_session_reconnect(is);
1878
1879	return (0);
1880}
1881
1882static int
1883iscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int mode,
1884    struct thread *td)
1885{
1886	struct iscsi_softc *sc;
1887
1888	sc = dev->si_drv1;
1889
1890	switch (cmd) {
1891	case ISCSIDWAIT:
1892		return (iscsi_ioctl_daemon_wait(sc,
1893		    (struct iscsi_daemon_request *)arg));
1894	case ISCSIDHANDOFF:
1895		return (iscsi_ioctl_daemon_handoff(sc,
1896		    (struct iscsi_daemon_handoff *)arg));
1897	case ISCSIDFAIL:
1898		return (iscsi_ioctl_daemon_fail(sc,
1899		    (struct iscsi_daemon_fail *)arg));
1900#ifdef ICL_KERNEL_PROXY
1901	case ISCSIDCONNECT:
1902		return (iscsi_ioctl_daemon_connect(sc,
1903		    (struct iscsi_daemon_connect *)arg));
1904	case ISCSIDSEND:
1905		return (iscsi_ioctl_daemon_send(sc,
1906		    (struct iscsi_daemon_send *)arg));
1907	case ISCSIDRECEIVE:
1908		return (iscsi_ioctl_daemon_receive(sc,
1909		    (struct iscsi_daemon_receive *)arg));
1910#endif /* ICL_KERNEL_PROXY */
1911	case ISCSISADD:
1912		return (iscsi_ioctl_session_add(sc,
1913		    (struct iscsi_session_add *)arg));
1914	case ISCSISREMOVE:
1915		return (iscsi_ioctl_session_remove(sc,
1916		    (struct iscsi_session_remove *)arg));
1917	case ISCSISLIST:
1918		return (iscsi_ioctl_session_list(sc,
1919		    (struct iscsi_session_list *)arg));
1920	case ISCSISMODIFY:
1921		return (iscsi_ioctl_session_modify(sc,
1922		    (struct iscsi_session_modify *)arg));
1923	default:
1924		return (EINVAL);
1925	}
1926}
1927
1928static uint64_t
1929iscsi_encode_lun(uint32_t lun)
1930{
1931	uint8_t encoded[8];
1932	uint64_t result;
1933
1934	memset(encoded, 0, sizeof(encoded));
1935
1936	if (lun < 256) {
1937		/*
1938		 * Peripheral device addressing.
1939		 */
1940		encoded[1] = lun;
1941	} else if (lun < 16384) {
1942		/*
1943		 * Flat space addressing.
1944		 */
1945		encoded[0] = 0x40;
1946		encoded[0] |= (lun >> 8) & 0x3f;
1947		encoded[1] = lun & 0xff;
1948	} else {
1949		/*
1950		 * Extended flat space addressing.
1951		 */
1952		encoded[0] = 0xd2;
1953		encoded[1] = lun >> 16;
1954		encoded[2] = lun >> 8;
1955		encoded[3] = lun;
1956	}
1957
1958	memcpy(&result, encoded, sizeof(result));
1959	return (result);
1960}
1961
1962static struct iscsi_outstanding *
1963iscsi_outstanding_find(struct iscsi_session *is, uint32_t initiator_task_tag)
1964{
1965	struct iscsi_outstanding *io;
1966
1967	ISCSI_SESSION_LOCK_ASSERT(is);
1968
1969	TAILQ_FOREACH(io, &is->is_outstanding, io_next) {
1970		if (io->io_initiator_task_tag == initiator_task_tag)
1971			return (io);
1972	}
1973	return (NULL);
1974}
1975
1976static struct iscsi_outstanding *
1977iscsi_outstanding_find_ccb(struct iscsi_session *is, union ccb *ccb)
1978{
1979	struct iscsi_outstanding *io;
1980
1981	ISCSI_SESSION_LOCK_ASSERT(is);
1982
1983	TAILQ_FOREACH(io, &is->is_outstanding, io_next) {
1984		if (io->io_ccb == ccb)
1985			return (io);
1986	}
1987	return (NULL);
1988}
1989
1990static struct iscsi_outstanding *
1991iscsi_outstanding_add(struct iscsi_session *is,
1992    uint32_t initiator_task_tag, union ccb *ccb)
1993{
1994	struct iscsi_outstanding *io;
1995
1996	ISCSI_SESSION_LOCK_ASSERT(is);
1997
1998	KASSERT(iscsi_outstanding_find(is, initiator_task_tag) == NULL,
1999	    ("initiator_task_tag 0x%x already added", initiator_task_tag));
2000
2001	io = uma_zalloc(iscsi_outstanding_zone, M_NOWAIT | M_ZERO);
2002	if (io == NULL) {
2003		ISCSI_SESSION_WARN(is, "failed to allocate %zd bytes", sizeof(*io));
2004		return (NULL);
2005	}
2006	io->io_initiator_task_tag = initiator_task_tag;
2007	io->io_ccb = ccb;
2008	TAILQ_INSERT_TAIL(&is->is_outstanding, io, io_next);
2009	return (io);
2010}
2011
2012static void
2013iscsi_outstanding_remove(struct iscsi_session *is, struct iscsi_outstanding *io)
2014{
2015
2016	ISCSI_SESSION_LOCK_ASSERT(is);
2017
2018	TAILQ_REMOVE(&is->is_outstanding, io, io_next);
2019	uma_zfree(iscsi_outstanding_zone, io);
2020}
2021
2022static void
2023iscsi_action_abort(struct iscsi_session *is, union ccb *ccb)
2024{
2025	struct icl_pdu *request;
2026	struct iscsi_bhs_task_management_request *bhstmr;
2027	struct ccb_abort *cab = &ccb->cab;
2028	struct iscsi_outstanding *io, *aio;
2029
2030	ISCSI_SESSION_LOCK_ASSERT(is);
2031
2032#if 0
2033	KASSERT(is->is_login_phase == false, ("%s called during Login Phase", __func__));
2034#else
2035	if (is->is_login_phase) {
2036		ccb->ccb_h.status = CAM_REQ_ABORTED;
2037		xpt_done(ccb);
2038		return;
2039	}
2040#endif
2041
2042	aio = iscsi_outstanding_find_ccb(is, cab->abort_ccb);
2043	if (aio == NULL) {
2044		ccb->ccb_h.status = CAM_REQ_CMP;
2045		xpt_done(ccb);
2046		return;
2047	}
2048
2049	request = icl_pdu_new_bhs(is->is_conn, M_NOWAIT);
2050	if (request == NULL) {
2051		ccb->ccb_h.status = CAM_RESRC_UNAVAIL;
2052		xpt_done(ccb);
2053		return;
2054	}
2055
2056	bhstmr = (struct iscsi_bhs_task_management_request *)request->ip_bhs;
2057	bhstmr->bhstmr_opcode = ISCSI_BHS_OPCODE_TASK_REQUEST;
2058	bhstmr->bhstmr_function = 0x80 | BHSTMR_FUNCTION_ABORT_TASK;
2059
2060	bhstmr->bhstmr_lun = iscsi_encode_lun(ccb->ccb_h.target_lun);
2061	bhstmr->bhstmr_initiator_task_tag = is->is_initiator_task_tag;
2062	is->is_initiator_task_tag++;
2063	bhstmr->bhstmr_referenced_task_tag = aio->io_initiator_task_tag;
2064
2065	io = iscsi_outstanding_add(is, bhstmr->bhstmr_initiator_task_tag, NULL);
2066	if (io == NULL) {
2067		icl_pdu_free(request);
2068		ccb->ccb_h.status = CAM_RESRC_UNAVAIL;
2069		xpt_done(ccb);
2070		return;
2071	}
2072	io->io_datasn = aio->io_initiator_task_tag;
2073	iscsi_pdu_queue_locked(request);
2074}
2075
2076static void
2077iscsi_action_scsiio(struct iscsi_session *is, union ccb *ccb)
2078{
2079	struct icl_pdu *request;
2080	struct iscsi_bhs_scsi_command *bhssc;
2081	struct ccb_scsiio *csio;
2082	struct iscsi_outstanding *io;
2083	size_t len;
2084	int error;
2085
2086	ISCSI_SESSION_LOCK_ASSERT(is);
2087
2088#if 0
2089	KASSERT(is->is_login_phase == false, ("%s called during Login Phase", __func__));
2090#else
2091	if (is->is_login_phase) {
2092		ISCSI_SESSION_DEBUG(is, "called during login phase");
2093		if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
2094			xpt_freeze_devq(ccb->ccb_h.path, 1);
2095			ISCSI_SESSION_DEBUG(is, "freezing devq");
2096		}
2097		ccb->ccb_h.status = CAM_REQ_ABORTED | CAM_DEV_QFRZN;
2098		xpt_done(ccb);
2099		return;
2100	}
2101#endif
2102
2103	request = icl_pdu_new_bhs(is->is_conn, M_NOWAIT);
2104	if (request == NULL) {
2105		if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
2106			xpt_freeze_devq(ccb->ccb_h.path, 1);
2107			ISCSI_SESSION_DEBUG(is, "freezing devq");
2108		}
2109		ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN;
2110		xpt_done(ccb);
2111		return;
2112	}
2113
2114	csio = &ccb->csio;
2115	bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs;
2116	bhssc->bhssc_opcode = ISCSI_BHS_OPCODE_SCSI_COMMAND;
2117	bhssc->bhssc_flags |= BHSSC_FLAGS_F;
2118	switch (csio->ccb_h.flags & CAM_DIR_MASK) {
2119	case CAM_DIR_IN:
2120		bhssc->bhssc_flags |= BHSSC_FLAGS_R;
2121		break;
2122	case CAM_DIR_OUT:
2123		bhssc->bhssc_flags |= BHSSC_FLAGS_W;
2124		break;
2125	}
2126
2127	if ((ccb->ccb_h.flags & CAM_TAG_ACTION_VALID) != 0) {
2128		switch (csio->tag_action) {
2129		case MSG_HEAD_OF_Q_TAG:
2130			bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_HOQ;
2131			break;
2132		case MSG_ORDERED_Q_TAG:
2133			bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_ORDERED;
2134			break;
2135		case MSG_ACA_TASK:
2136			bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_ACA;
2137			break;
2138		case MSG_SIMPLE_Q_TAG:
2139		default:
2140			bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_SIMPLE;
2141			break;
2142		}
2143	} else
2144		bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_UNTAGGED;
2145
2146	bhssc->bhssc_lun = iscsi_encode_lun(csio->ccb_h.target_lun);
2147	bhssc->bhssc_initiator_task_tag = is->is_initiator_task_tag;
2148	is->is_initiator_task_tag++;
2149	bhssc->bhssc_expected_data_transfer_length = htonl(csio->dxfer_len);
2150	KASSERT(csio->cdb_len <= sizeof(bhssc->bhssc_cdb),
2151	    ("unsupported CDB size %zd", (size_t)csio->cdb_len));
2152
2153	if (csio->ccb_h.flags & CAM_CDB_POINTER)
2154		memcpy(&bhssc->bhssc_cdb, csio->cdb_io.cdb_ptr, csio->cdb_len);
2155	else
2156		memcpy(&bhssc->bhssc_cdb, csio->cdb_io.cdb_bytes, csio->cdb_len);
2157
2158	io = iscsi_outstanding_add(is, bhssc->bhssc_initiator_task_tag, ccb);
2159	if (io == NULL) {
2160		icl_pdu_free(request);
2161		if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
2162			xpt_freeze_devq(ccb->ccb_h.path, 1);
2163			ISCSI_SESSION_DEBUG(is, "freezing devq");
2164		}
2165		ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN;
2166		xpt_done(ccb);
2167		return;
2168	}
2169
2170	if (is->is_immediate_data &&
2171	    (csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_OUT) {
2172		len = csio->dxfer_len;
2173		//ISCSI_SESSION_DEBUG(is, "adding %zd of immediate data", len);
2174		if (len > is->is_first_burst_length) {
2175			ISCSI_SESSION_DEBUG(is, "len %zd -> %zd", len, is->is_first_burst_length);
2176			len = is->is_first_burst_length;
2177		}
2178
2179		error = icl_pdu_append_data(request, csio->data_ptr, len, M_NOWAIT);
2180		if (error != 0) {
2181			icl_pdu_free(request);
2182			if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
2183				xpt_freeze_devq(ccb->ccb_h.path, 1);
2184				ISCSI_SESSION_DEBUG(is, "freezing devq");
2185			}
2186			ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN;
2187			xpt_done(ccb);
2188			return;
2189		}
2190	}
2191	iscsi_pdu_queue_locked(request);
2192}
2193
2194static void
2195iscsi_action(struct cam_sim *sim, union ccb *ccb)
2196{
2197	struct iscsi_session *is;
2198
2199	is = cam_sim_softc(sim);
2200
2201	ISCSI_SESSION_LOCK_ASSERT(is);
2202
2203	if (is->is_terminating ||
2204	    (is->is_connected == false && fail_on_disconnection)) {
2205		ccb->ccb_h.status = CAM_DEV_NOT_THERE;
2206		xpt_done(ccb);
2207		return;
2208	}
2209
2210	switch (ccb->ccb_h.func_code) {
2211	case XPT_PATH_INQ:
2212	{
2213		struct ccb_pathinq *cpi = &ccb->cpi;
2214
2215		cpi->version_num = 1;
2216		cpi->hba_inquiry = PI_TAG_ABLE;
2217		cpi->target_sprt = 0;
2218		//cpi->hba_misc = PIM_NOBUSRESET;
2219		cpi->hba_misc = 0;
2220		cpi->hba_eng_cnt = 0;
2221		cpi->max_target = 0;
2222		cpi->max_lun = 255;
2223		//cpi->initiator_id = 0; /* XXX */
2224		cpi->initiator_id = 64; /* XXX */
2225		strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
2226		strlcpy(cpi->hba_vid, "iSCSI", HBA_IDLEN);
2227		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
2228		cpi->unit_number = cam_sim_unit(sim);
2229		cpi->bus_id = cam_sim_bus(sim);
2230		cpi->base_transfer_speed = 150000; /* XXX */
2231		cpi->transport = XPORT_ISCSI;
2232		cpi->transport_version = 0;
2233		cpi->protocol = PROTO_SCSI;
2234		cpi->protocol_version = SCSI_REV_SPC3;
2235		cpi->maxio = MAXPHYS;
2236		cpi->ccb_h.status = CAM_REQ_CMP;
2237		break;
2238	}
2239	case XPT_GET_TRAN_SETTINGS:
2240	{
2241		struct ccb_trans_settings	*cts;
2242		struct ccb_trans_settings_scsi	*scsi;
2243
2244		cts = &ccb->cts;
2245		scsi = &cts->proto_specific.scsi;
2246
2247		cts->protocol = PROTO_SCSI;
2248		cts->protocol_version = SCSI_REV_SPC3;
2249		cts->transport = XPORT_ISCSI;
2250		cts->transport_version = 0;
2251		scsi->valid = CTS_SCSI_VALID_TQ;
2252		scsi->flags = CTS_SCSI_FLAGS_TAG_ENB;
2253		cts->ccb_h.status = CAM_REQ_CMP;
2254		break;
2255	}
2256	case XPT_CALC_GEOMETRY:
2257		cam_calc_geometry(&ccb->ccg, /*extended*/1);
2258		ccb->ccb_h.status = CAM_REQ_CMP;
2259		break;
2260#if 0
2261	/*
2262	 * XXX: What's the point?
2263	 */
2264	case XPT_RESET_BUS:
2265	case XPT_TERM_IO:
2266		ISCSI_SESSION_DEBUG(is, "faking success for reset, abort, or term_io");
2267		ccb->ccb_h.status = CAM_REQ_CMP;
2268		break;
2269#endif
2270	case XPT_ABORT:
2271		iscsi_action_abort(is, ccb);
2272		return;
2273	case XPT_SCSI_IO:
2274		iscsi_action_scsiio(is, ccb);
2275		return;
2276	default:
2277#if 0
2278		ISCSI_SESSION_DEBUG(is, "got unsupported code 0x%x", ccb->ccb_h.func_code);
2279#endif
2280		ccb->ccb_h.status = CAM_FUNC_NOTAVAIL;
2281		break;
2282	}
2283	xpt_done(ccb);
2284}
2285
2286static void
2287iscsi_poll(struct cam_sim *sim)
2288{
2289
2290	KASSERT(0, ("%s: you're not supposed to be here", __func__));
2291}
2292
2293static void
2294iscsi_shutdown(struct iscsi_softc *sc)
2295{
2296	struct iscsi_session *is;
2297
2298	ISCSI_DEBUG("removing all sessions due to shutdown");
2299
2300	sx_slock(&sc->sc_lock);
2301	TAILQ_FOREACH(is, &sc->sc_sessions, is_next)
2302		iscsi_session_terminate(is);
2303	sx_sunlock(&sc->sc_lock);
2304}
2305
2306static int
2307iscsi_load(void)
2308{
2309	int error;
2310
2311	sc = malloc(sizeof(*sc), M_ISCSI, M_ZERO | M_WAITOK);
2312	sx_init(&sc->sc_lock, "iscsi");
2313	TAILQ_INIT(&sc->sc_sessions);
2314	cv_init(&sc->sc_cv, "iscsi_cv");
2315
2316	iscsi_outstanding_zone = uma_zcreate("iscsi_outstanding",
2317	    sizeof(struct iscsi_outstanding), NULL, NULL, NULL, NULL,
2318	    UMA_ALIGN_PTR, 0);
2319
2320	error = make_dev_p(MAKEDEV_CHECKNAME, &sc->sc_cdev, &iscsi_cdevsw,
2321	    NULL, UID_ROOT, GID_WHEEL, 0600, "iscsi");
2322	if (error != 0) {
2323		ISCSI_WARN("failed to create device node, error %d", error);
2324		return (error);
2325	}
2326	sc->sc_cdev->si_drv1 = sc;
2327
2328	/*
2329	 * Note that this needs to get run before dashutdown().  Otherwise,
2330	 * when rebooting with iSCSI session with outstanding requests,
2331	 * but disconnected, dashutdown() will hang on cam_periph_runccb().
2332	 */
2333	sc->sc_shutdown_eh = EVENTHANDLER_REGISTER(shutdown_post_sync,
2334	    iscsi_shutdown, sc, SHUTDOWN_PRI_FIRST);
2335
2336	return (0);
2337}
2338
2339static int
2340iscsi_unload(void)
2341{
2342	struct iscsi_session *is, *tmp;
2343
2344	if (sc->sc_cdev != NULL) {
2345		ISCSI_DEBUG("removing device node");
2346		destroy_dev(sc->sc_cdev);
2347		ISCSI_DEBUG("device node removed");
2348	}
2349
2350	if (sc->sc_shutdown_eh != NULL)
2351		EVENTHANDLER_DEREGISTER(shutdown_post_sync, sc->sc_shutdown_eh);
2352
2353	sx_slock(&sc->sc_lock);
2354	TAILQ_FOREACH_SAFE(is, &sc->sc_sessions, is_next, tmp)
2355		iscsi_session_terminate(is);
2356	while(!TAILQ_EMPTY(&sc->sc_sessions)) {
2357		ISCSI_DEBUG("waiting for sessions to terminate");
2358		cv_wait(&sc->sc_cv, &sc->sc_lock);
2359	}
2360	ISCSI_DEBUG("all sessions terminated");
2361	sx_sunlock(&sc->sc_lock);
2362
2363	uma_zdestroy(iscsi_outstanding_zone);
2364	sx_destroy(&sc->sc_lock);
2365	cv_destroy(&sc->sc_cv);
2366	free(sc, M_ISCSI);
2367	return (0);
2368}
2369
2370static int
2371iscsi_quiesce(void)
2372{
2373	sx_slock(&sc->sc_lock);
2374	if (!TAILQ_EMPTY(&sc->sc_sessions)) {
2375		sx_sunlock(&sc->sc_lock);
2376		return (EBUSY);
2377	}
2378	sx_sunlock(&sc->sc_lock);
2379	return (0);
2380}
2381
2382static int
2383iscsi_modevent(module_t mod, int what, void *arg)
2384{
2385	int error;
2386
2387	switch (what) {
2388	case MOD_LOAD:
2389		error = iscsi_load();
2390		break;
2391	case MOD_UNLOAD:
2392		error = iscsi_unload();
2393		break;
2394	case MOD_QUIESCE:
2395		error = iscsi_quiesce();
2396		break;
2397	default:
2398		error = EINVAL;
2399		break;
2400	}
2401	return (error);
2402}
2403
2404moduledata_t iscsi_data = {
2405	"iscsi",
2406	iscsi_modevent,
2407	0
2408};
2409
2410DECLARE_MODULE(iscsi, iscsi_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
2411MODULE_DEPEND(iscsi, cam, 1, 1, 1);
2412MODULE_DEPEND(iscsi, icl, 1, 1, 1);
2413