srpt_ch.c revision 9881:741c9e4e094c
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * RDMA channel interface for Solaris SCSI RDMA Protocol Target (SRP)
29 * transport port provider module for the COMSTAR framework.
30 */
31
32#include <sys/cpuvar.h>
33#include <sys/types.h>
34#include <sys/conf.h>
35#include <sys/stat.h>
36#include <sys/file.h>
37#include <sys/ddi.h>
38#include <sys/sunddi.h>
39#include <sys/modctl.h>
40#include <sys/sysmacros.h>
41#include <sys/sdt.h>
42#include <sys/taskq.h>
43#include <sys/scsi/scsi.h>
44#include <sys/ib/ibtl/ibti.h>
45
46#include <stmf.h>
47#include <stmf_ioctl.h>
48#include <portif.h>
49
50#include "srp.h"
51#include "srpt_impl.h"
52#include "srpt_ioc.h"
53#include "srpt_stp.h"
54#include "srpt_ch.h"
55
56extern srpt_ctxt_t *srpt_ctxt;
57extern uint16_t srpt_send_msg_depth;
58
59/*
60 * Prototypes.
61 */
62static void srpt_ch_scq_hdlr(ibt_cq_hdl_t cq_dhl, void *arg);
63static void srpt_ch_rcq_hdlr(ibt_cq_hdl_t cq_dhl, void *arg);
64static void srpt_ch_process_iu(srpt_channel_t *ch, srpt_iu_t *iu);
65
66/*
67 * srpt_ch_alloc()
68 */
69srpt_channel_t *
70srpt_ch_alloc(srpt_target_port_t *tgt, uint8_t port)
71{
72	ibt_status_t			status;
73	srpt_channel_t			*ch;
74	ibt_cq_attr_t			cq_attr;
75	ibt_rc_chan_alloc_args_t	ch_args;
76	uint32_t			cq_real_size;
77	srpt_ioc_t			*ioc;
78
79	ASSERT(tgt != NULL);
80	ioc = tgt->tp_ioc;
81	ASSERT(ioc != NULL);
82
83	ch = kmem_zalloc(sizeof (*ch), KM_SLEEP);
84	rw_init(&ch->ch_rwlock, NULL, RW_DRIVER, NULL);
85	mutex_init(&ch->ch_reflock, NULL, MUTEX_DRIVER, NULL);
86	cv_init(&ch->ch_cv_complete, NULL, CV_DRIVER, NULL);
87	ch->ch_refcnt	= 1;
88	ch->ch_cv_waiters = 0;
89
90	ch->ch_state  = SRPT_CHANNEL_CONNECTING;
91	ch->ch_tgt    = tgt;
92	ch->ch_req_lim_delta = 0;
93	ch->ch_ti_iu_len = 0;
94
95	cq_attr.cq_size	 = srpt_send_msg_depth * 2;
96	cq_attr.cq_sched = 0;
97	cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
98
99	status = ibt_alloc_cq(ioc->ioc_ibt_hdl, &cq_attr, &ch->ch_scq_hdl,
100	    &cq_real_size);
101	if (status != IBT_SUCCESS) {
102		SRPT_DPRINTF_L1("ch_alloc, send CQ alloc error (%d)",
103		    status);
104		goto scq_alloc_err;
105	}
106
107	cq_attr.cq_size	 = srpt_send_msg_depth + 1;
108	cq_attr.cq_sched = 0;
109	cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
110
111	status = ibt_alloc_cq(ioc->ioc_ibt_hdl, &cq_attr, &ch->ch_rcq_hdl,
112	    &cq_real_size);
113	if (status != IBT_SUCCESS) {
114		SRPT_DPRINTF_L2("ch_alloc, receive CQ alloc error (%d)",
115		    status);
116		goto rcq_alloc_err;
117	}
118
119	ibt_set_cq_handler(ch->ch_scq_hdl, srpt_ch_scq_hdlr, ch);
120	ibt_set_cq_handler(ch->ch_rcq_hdl, srpt_ch_rcq_hdlr, ch);
121	ibt_enable_cq_notify(ch->ch_scq_hdl, IBT_NEXT_COMPLETION);
122	ibt_enable_cq_notify(ch->ch_rcq_hdl, IBT_NEXT_COMPLETION);
123
124	ch_args.rc_flags   = IBT_WR_SIGNALED;
125
126	/* Maker certain initiator can not read/write our memory */
127	ch_args.rc_control = 0;
128
129	ch_args.rc_hca_port_num = port;
130
131	/*
132	 * Any SRP IU can result in a number of STMF data buffer transfers
133	 * and those transfers themselves could span multiple initiator
134	 * buffers.  Therefore, the number of send WQE's actually required
135	 * can vary.  Here we assume that on average an I/O will require
136	 * no more than SRPT_MAX_OUT_IO_PER_CMD send WQE's.  In practice
137	 * this will prevent send work queue overrun, but we will also
138	 * inform STMF to throttle I/O should the work queue become full.
139	 *
140	 * If the HCA tells us the max outstanding WRs for a channel is
141	 * lower than our default, use the HCA value.
142	 */
143	ch_args.rc_sizes.cs_sq = min(ioc->ioc_attr.hca_max_chan_sz,
144	    (srpt_send_msg_depth * SRPT_MAX_OUT_IO_PER_CMD));
145	ch_args.rc_sizes.cs_rq =  0;
146	ch_args.rc_sizes.cs_sq_sgl = 2;
147	ch_args.rc_sizes.cs_rq_sgl = 0;
148
149	ch_args.rc_scq = ch->ch_scq_hdl;
150	ch_args.rc_rcq = ch->ch_rcq_hdl;
151	ch_args.rc_pd  = ioc->ioc_pd_hdl;
152	ch_args.rc_clone_chan = NULL;
153	ch_args.rc_srq = ioc->ioc_srq_hdl;
154
155	status = ibt_alloc_rc_channel(ioc->ioc_ibt_hdl, IBT_ACHAN_USES_SRQ,
156	    &ch_args, &ch->ch_chan_hdl, &ch->ch_sizes);
157	if (status != IBT_SUCCESS) {
158		SRPT_DPRINTF_L2("ch_alloc, IBT channel alloc error (%d)",
159		    status);
160		goto qp_alloc_err;
161	}
162
163	/*
164	 * Create pool of send WQE entries to map send wqe work IDs
165	 * to various types (specifically in error cases where OP
166	 * is not known).
167	 */
168	ch->ch_num_swqe = ch->ch_sizes.cs_sq;
169	SRPT_DPRINTF_L2("ch_alloc, number of SWQEs = %u", ch->ch_num_swqe);
170	ch->ch_swqe = kmem_zalloc(sizeof (srpt_swqe_t) * ch->ch_num_swqe,
171	    KM_SLEEP);
172	if (ch->ch_swqe == NULL) {
173		SRPT_DPRINTF_L2("ch_alloc, SWQE alloc error");
174		ibt_free_channel(ch->ch_chan_hdl);
175		goto qp_alloc_err;
176	}
177	mutex_init(&ch->ch_swqe_lock, NULL, MUTEX_DRIVER, NULL);
178	ch->ch_head = 1;
179	for (ch->ch_tail = 1; ch->ch_tail < ch->ch_num_swqe -1; ch->ch_tail++) {
180		ch->ch_swqe[ch->ch_tail].sw_next = ch->ch_tail + 1;
181	}
182	ch->ch_swqe[ch->ch_tail].sw_next = 0;
183
184	ibt_set_chan_private(ch->ch_chan_hdl, ch);
185	return (ch);
186
187qp_alloc_err:
188	ibt_free_cq(ch->ch_rcq_hdl);
189
190rcq_alloc_err:
191	ibt_free_cq(ch->ch_scq_hdl);
192
193scq_alloc_err:
194	cv_destroy(&ch->ch_cv_complete);
195	mutex_destroy(&ch->ch_reflock);
196	rw_destroy(&ch->ch_rwlock);
197	kmem_free(ch, sizeof (*ch));
198
199	return (NULL);
200}
201
202/*
203 * srpt_ch_add_ref()
204 */
205void
206srpt_ch_add_ref(srpt_channel_t *ch)
207{
208	mutex_enter(&ch->ch_reflock);
209	ch->ch_refcnt++;
210	SRPT_DPRINTF_L4("ch_add_ref, ch (%p), refcnt (%d)",
211	    (void *)ch, ch->ch_refcnt);
212	ASSERT(ch->ch_refcnt != 0);
213	mutex_exit(&ch->ch_reflock);
214}
215
216/*
217 * srpt_ch_release_ref()
218 *
219 * A non-zero value for wait causes thread to block until all references
220 * to channel are released.
221 */
222void
223srpt_ch_release_ref(srpt_channel_t *ch, uint_t wait)
224{
225	mutex_enter(&ch->ch_reflock);
226
227	SRPT_DPRINTF_L4("ch_release_ref, ch (%p), refcnt (%d), wait (%d)",
228	    (void *)ch, ch->ch_refcnt, wait);
229
230	ASSERT(ch->ch_refcnt != 0);
231
232	ch->ch_refcnt--;
233
234	if (ch->ch_refcnt != 0) {
235		if (wait) {
236			ch->ch_cv_waiters++;
237			while (ch->ch_refcnt != 0) {
238				cv_wait(&ch->ch_cv_complete, &ch->ch_reflock);
239			}
240			ch->ch_cv_waiters--;
241		} else {
242			mutex_exit(&ch->ch_reflock);
243			return;
244		}
245	}
246
247	/*
248	 * Last thread out frees the IB resources, locks/conditions and memory
249	 */
250	if (ch->ch_cv_waiters > 0) {
251		/* we're not last, wake someone else up */
252		cv_signal(&ch->ch_cv_complete);
253		mutex_exit(&ch->ch_reflock);
254		return;
255	}
256
257	SRPT_DPRINTF_L3("ch_release_ref - release resources");
258	if (ch->ch_chan_hdl) {
259		SRPT_DPRINTF_L3("ch_release_ref - free channel");
260		ibt_free_channel(ch->ch_chan_hdl);
261	}
262
263	if (ch->ch_scq_hdl) {
264		ibt_free_cq(ch->ch_scq_hdl);
265	}
266
267	if (ch->ch_rcq_hdl) {
268		ibt_free_cq(ch->ch_rcq_hdl);
269	}
270
271	/*
272	 * There should be no IU's associated with this
273	 * channel on the SCSI session.
274	 */
275	if (ch->ch_session != NULL) {
276		ASSERT(list_is_empty(&ch->ch_session->ss_task_list));
277
278		/*
279		 * Currently only have one channel per session, we will
280		 * need to release a reference when support is added
281		 * for multi-channel target login.
282		 */
283		srpt_stp_free_session(ch->ch_session);
284		ch->ch_session = NULL;
285	}
286
287	kmem_free(ch->ch_swqe, sizeof (srpt_swqe_t) * ch->ch_num_swqe);
288	mutex_destroy(&ch->ch_swqe_lock);
289	mutex_exit(&ch->ch_reflock);
290	mutex_destroy(&ch->ch_reflock);
291	rw_destroy(&ch->ch_rwlock);
292	kmem_free(ch, sizeof (srpt_channel_t));
293}
294
295/*
296 * srpt_ch_disconnect()
297 */
298void
299srpt_ch_disconnect(srpt_channel_t *ch)
300{
301	ibt_status_t		status;
302
303	SRPT_DPRINTF_L3("ch_disconnect, invoked for ch (%p)",
304	    (void *)ch);
305
306	rw_enter(&ch->ch_rwlock, RW_WRITER);
307
308	/*
309	 * If we are already in the process of disconnecting then
310	 * nothing need be done, CM will call-back into us when done.
311	 */
312	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
313		SRPT_DPRINTF_L2("ch_disconnect, called when"
314		    " disconnect in progress");
315		rw_exit(&ch->ch_rwlock);
316		return;
317	}
318	ch->ch_state = SRPT_CHANNEL_DISCONNECTING;
319	rw_exit(&ch->ch_rwlock);
320
321	/*
322	 * Initiate the sending of the CM DREQ message, the private data
323	 * should be the SRP Target logout IU.  We don't really care about
324	 * the remote CM DREP message returned.  We issue this in an
325	 * asynchronous manner and will cleanup when called back by CM.
326	 */
327	status = ibt_close_rc_channel(ch->ch_chan_hdl, IBT_NONBLOCKING,
328	    NULL, 0, NULL, NULL, 0);
329
330	if (status != IBT_SUCCESS) {
331		SRPT_DPRINTF_L2("ch_disconnect, close RC channel"
332		    " err(%d)", status);
333	}
334}
335
336/*
337 * srpt_ch_cleanup()
338 */
339void
340srpt_ch_cleanup(srpt_channel_t *ch)
341{
342	srpt_iu_t		*iu;
343	srpt_iu_t		*next;
344	ibt_wc_t		wc;
345	srpt_target_port_t	*tgt;
346	srpt_channel_t		*tgt_ch;
347	scsi_task_t		*iutask;
348
349	SRPT_DPRINTF_L3("ch_cleanup, invoked for ch(%p), state(%d)",
350	    (void *)ch, ch->ch_state);
351
352	/* add a ref for the channel until we're done */
353	srpt_ch_add_ref(ch);
354
355	tgt = ch->ch_tgt;
356	ASSERT(tgt != NULL);
357
358	/*
359	 * Make certain the channel is in the target ports list of
360	 * known channels and remove it (releasing the target
361	 * ports reference to the channel).
362	 */
363	mutex_enter(&tgt->tp_ch_list_lock);
364	tgt_ch = list_head(&tgt->tp_ch_list);
365	while (tgt_ch != NULL) {
366		if (tgt_ch == ch) {
367			list_remove(&tgt->tp_ch_list, tgt_ch);
368			srpt_ch_release_ref(tgt_ch, 0);
369			break;
370		}
371		tgt_ch = list_next(&tgt->tp_ch_list, tgt_ch);
372	}
373	mutex_exit(&tgt->tp_ch_list_lock);
374
375	if (tgt_ch == NULL) {
376		SRPT_DPRINTF_L2("ch_cleanup, target channel no"
377		    "longer known to target");
378		srpt_ch_release_ref(ch, 0);
379		return;
380	}
381
382	rw_enter(&ch->ch_rwlock, RW_WRITER);
383	ch->ch_state = SRPT_CHANNEL_DISCONNECTING;
384	rw_exit(&ch->ch_rwlock);
385
386
387	/*
388	 * Generally the IB CQ's will have been drained prior to
389	 * getting to this call; but we check here to make certain.
390	 */
391	if (ch->ch_scq_hdl) {
392		SRPT_DPRINTF_L4("ch_cleanup, start drain (%d)",
393		    ch->ch_swqe_posted);
394		while ((int)ch->ch_swqe_posted > 0) {
395			delay(drv_usectohz(1000));
396		}
397		ibt_set_cq_handler(ch->ch_scq_hdl, NULL, NULL);
398	}
399
400	if (ch->ch_rcq_hdl) {
401		ibt_set_cq_handler(ch->ch_rcq_hdl, NULL, NULL);
402
403		while (ibt_poll_cq(ch->ch_rcq_hdl, &wc, 1, NULL) ==
404		    IBT_SUCCESS) {
405			iu = (srpt_iu_t *)(uintptr_t)wc.wc_id;
406			SRPT_DPRINTF_L4("ch_cleanup, recovering"
407			    " outstanding RX iu(%p)", (void *)iu);
408			mutex_enter(&iu->iu_lock);
409			srpt_ioc_repost_recv_iu(iu->iu_ioc, iu);
410			/*
411			 * Channel reference has not yet been added for this
412			 * IU, so do not decrement.
413			 */
414			mutex_exit(&iu->iu_lock);
415		}
416	}
417
418	/*
419	 * Go through the list of outstanding IU for the channel's SCSI
420	 * session and for each either abort or complete an abort.
421	 */
422	rw_enter(&ch->ch_rwlock, RW_READER);
423	if (ch->ch_session != NULL) {
424		rw_enter(&ch->ch_session->ss_rwlock, RW_READER);
425		iu = list_head(&ch->ch_session->ss_task_list);
426		while (iu != NULL) {
427			next = list_next(&ch->ch_session->ss_task_list, iu);
428
429			mutex_enter(&iu->iu_lock);
430			if (ch == iu->iu_ch) {
431				if (iu->iu_stmf_task == NULL) {
432					cmn_err(CE_NOTE,
433					    "ch_cleanup, NULL stmf task");
434					ASSERT(0);
435				}
436				iutask = iu->iu_stmf_task;
437			} else {
438				iutask = NULL;
439			}
440			mutex_exit(&iu->iu_lock);
441
442			if (iutask != NULL) {
443				SRPT_DPRINTF_L4("ch_cleanup, aborting "
444				    "task(%p)", (void *)iutask);
445				stmf_abort(STMF_QUEUE_TASK_ABORT, iutask,
446				    STMF_ABORTED, NULL);
447			}
448			iu = next;
449		}
450		rw_exit(&ch->ch_session->ss_rwlock);
451	}
452	rw_exit(&ch->ch_rwlock);
453
454	srpt_ch_release_ref(ch, 0);
455}
456
457/*
458 * srpt_ch_rsp_comp()
459 *
460 * Process a completion for an IB SEND message.  A SEND completion
461 * is for a SRP response packet sent back to the initiator.  It
462 * will not have a STMF SCSI task associated with it if it was
463 * sent for a rejected IU, or was a task management abort response.
464 */
465static void
466srpt_ch_rsp_comp(srpt_channel_t *ch, srpt_iu_t *iu,
467	ibt_wc_status_t wc_status)
468{
469	ASSERT(iu->iu_ch == ch);
470
471	/*
472	 * If work completion indicates failure, decrement the
473	 * send posted count.  If it is a flush error, we are
474	 * done; for all other errors start a channel disconnect.
475	 */
476	if (wc_status != IBT_SUCCESS) {
477		SRPT_DPRINTF_L2("ch_rsp_comp, WC status err(%d)",
478		    wc_status);
479		atomic_dec_32(&iu->iu_sq_posted_cnt);
480
481		if (wc_status != IBT_WC_WR_FLUSHED_ERR) {
482			srpt_ch_disconnect(ch);
483		}
484
485		mutex_enter(&iu->iu_lock);
486		if (iu->iu_stmf_task == NULL) {
487			srpt_ioc_repost_recv_iu(iu->iu_ioc, iu);
488			mutex_exit(&iu->iu_lock);
489			srpt_ch_release_ref(ch, 0);
490		} else {
491			/* cleanup handled in task_free */
492			mutex_exit(&iu->iu_lock);
493		}
494		return;
495	}
496
497	/*
498	 * If the IU response completion is not associated with
499	 * with a SCSI task, release the IU to return the resource
500	 * and the reference to the channel it holds.
501	 */
502	mutex_enter(&iu->iu_lock);
503	atomic_dec_32(&iu->iu_sq_posted_cnt);
504
505	if (iu->iu_stmf_task == NULL) {
506		srpt_ioc_repost_recv_iu(iu->iu_ioc, iu);
507		mutex_exit(&iu->iu_lock);
508		srpt_ch_release_ref(ch, 0);
509		return;
510	}
511
512	/*
513	 * If STMF has requested the IU task be aborted, then notify STMF
514	 * the command is now aborted.
515	 */
516	if ((iu->iu_flags & SRPT_IU_STMF_ABORTING) != 0) {
517		scsi_task_t	*abort_task = iu->iu_stmf_task;
518
519		mutex_exit(&iu->iu_lock);
520		stmf_abort(STMF_REQUEUE_TASK_ABORT_LPORT, abort_task,
521		    STMF_ABORTED, NULL);
522		return;
523	}
524
525	/*
526	 * We should not get a SEND completion where the task has already
527	 * completed aborting and STMF has been informed.
528	 */
529	ASSERT((iu->iu_flags & SRPT_IU_ABORTED) == 0);
530
531	/*
532	 * Successful status response completion for SCSI task.
533	 * Let STMF know we are done.
534	 */
535	mutex_exit(&iu->iu_lock);
536
537	stmf_send_status_done(iu->iu_stmf_task, STMF_SUCCESS,
538	    STMF_IOF_LPORT_DONE);
539}
540
541/*
542 * srpt_ch_data_comp()
543 *
544 * Process an IB completion for a RDMA operation.  This completion
545 * should be associated with the last RDMA operation for any
546 * data buffer transfer.
547 */
548static void
549srpt_ch_data_comp(srpt_channel_t *ch, stmf_data_buf_t *stmf_dbuf,
550	ibt_wc_status_t wc_status)
551{
552	srpt_ds_dbuf_t		*dbuf;
553	srpt_iu_t		*iu;
554	stmf_status_t		status;
555
556	ASSERT(stmf_dbuf != NULL);
557
558	dbuf = (srpt_ds_dbuf_t *)stmf_dbuf->db_port_private;
559
560	ASSERT(dbuf != NULL);
561
562	iu = dbuf->db_iu;
563
564	ASSERT(iu != NULL);
565	ASSERT(iu->iu_ch == ch);
566
567	/*
568	 * If work completion indicates non-flush failure, then
569	 * start a channel disconnect (asynchronous) and release
570	 * the reference to the IU.  The task will be cleaned
571	 * up with STMF during channel shutdown processing.
572	 */
573	if (wc_status != IBT_SUCCESS) {
574		SRPT_DPRINTF_L2("ch_data_comp, WC status err(%d)",
575		    wc_status);
576		if (wc_status != IBT_WC_WR_FLUSHED_ERR) {
577			srpt_ch_disconnect(ch);
578		}
579		atomic_dec_32(&iu->iu_sq_posted_cnt);
580		return;
581	}
582
583	/*
584	 * If STMF has requested this task be aborted, then if this is the
585	 * last I/O operation outstanding, notify STMF the task has been
586	 *  aborted and ignore the completion.
587	 */
588	mutex_enter(&iu->iu_lock);
589	atomic_dec_32(&iu->iu_sq_posted_cnt);
590
591	if ((iu->iu_flags & SRPT_IU_STMF_ABORTING) != 0) {
592		scsi_task_t	*abort_task = iu->iu_stmf_task;
593
594		mutex_exit(&iu->iu_lock);
595		stmf_abort(STMF_REQUEUE_TASK_ABORT_LPORT, abort_task,
596		    STMF_ABORTED, NULL);
597		return;
598	}
599
600	/*
601	 * We should not get an RDMA completion where the task has already
602	 * completed aborting and STMF has been informed.
603	 */
604	ASSERT((iu->iu_flags & SRPT_IU_ABORTED) == 0);
605
606	/*
607	 * Good completion for last RDMA op associated with a data buffer
608	 * I/O, if specified initiate status otherwise let STMF know we are
609	 * done.
610	 */
611	stmf_dbuf->db_xfer_status = STMF_SUCCESS;
612	mutex_exit(&iu->iu_lock);
613
614	DTRACE_SRP_8(xfer__done, srpt_channel_t, ch,
615	    ibt_wr_ds_t, &(dbuf->db_sge), srpt_iu_t, iu,
616	    ibt_send_wr_t, 0, uint32_t, stmf_dbuf->db_data_size,
617	    uint32_t, 0, uint32_t, 0,
618	    uint32_t, (stmf_dbuf->db_flags & DB_DIRECTION_TO_RPORT) ? 1 : 0);
619
620	if ((stmf_dbuf->db_flags & DB_SEND_STATUS_GOOD) != 0) {
621		status = srpt_stp_send_status(dbuf->db_iu->iu_stmf_task, 0);
622		if (status == STMF_SUCCESS) {
623			return;
624		}
625		stmf_dbuf->db_xfer_status = STMF_FAILURE;
626	}
627	stmf_data_xfer_done(dbuf->db_iu->iu_stmf_task, stmf_dbuf, 0);
628}
629
630/*
631 * srpt_ch_scq_hdlr()
632 */
633static void
634srpt_ch_scq_hdlr(ibt_cq_hdl_t cq_hdl, void *arg)
635{
636	ibt_status_t		status;
637	srpt_channel_t		*ch = arg;
638	ibt_wc_t		wc[SRPT_SEND_WC_POLL_SIZE];
639	ibt_wc_t		*wcp;
640	int			i;
641	uint32_t		cq_rearmed = 0;
642	uint32_t		entries;
643	srpt_swqe_t		*swqe;
644
645	ASSERT(ch != NULL);
646
647	/* Reference channel for the duration of this call */
648	srpt_ch_add_ref(ch);
649
650	for (;;) {
651		status = ibt_poll_cq(cq_hdl, &wc[0], SRPT_SEND_WC_POLL_SIZE,
652		    &entries);
653		if (status == IBT_CQ_EMPTY) {
654			/*
655			 * CQ drained, if we have not rearmed the CQ
656			 * do so and poll to eliminate race; otherwise
657			 * we are done.
658			 */
659			if (cq_rearmed == 0) {
660				ibt_enable_cq_notify(ch->ch_scq_hdl,
661				    IBT_NEXT_COMPLETION);
662				cq_rearmed = 1;
663				continue;
664			} else {
665				break;
666			}
667		} else if (status != IBT_SUCCESS) {
668			/*
669			 * This error should not happen, it indicates something
670			 * abnormal has gone wrong and represents either a
671			 * hardware or programming logic coding error.
672			 */
673			SRPT_DPRINTF_L2("ch_scq_hdlr, unexpected CQ err(%d)",
674			    status);
675			srpt_ch_disconnect(ch);
676			break;
677		}
678
679		for (wcp = wc, i = 0; i < entries; i++, wcp++) {
680
681			/*
682			 * A zero work ID indicates this CQE is associated
683			 * with an intermediate post of a RDMA data transfer
684			 * operation.  Since intermediate data requests are
685			 * unsignaled, we should only get these if there was
686			 * an error.  No action is required.
687			 */
688			if (wcp->wc_id == 0) {
689				continue;
690			}
691			swqe = ch->ch_swqe + wcp->wc_id;
692
693			switch (swqe->sw_type) {
694			case SRPT_SWQE_TYPE_RESP:
695				srpt_ch_rsp_comp(ch, (srpt_iu_t *)
696				    swqe->sw_addr, wcp->wc_status);
697				break;
698
699			case SRPT_SWQE_TYPE_DATA:
700				srpt_ch_data_comp(ch, (stmf_data_buf_t *)
701				    swqe->sw_addr, wcp->wc_status);
702				break;
703
704			default:
705				SRPT_DPRINTF_L2("ch_scq_hdlr, bad type(%d)",
706				    swqe->sw_type);
707				ASSERT(0);
708			}
709
710			srpt_ch_free_swqe_wrid(ch, wcp->wc_id);
711		}
712	}
713
714	srpt_ch_release_ref(ch, 0);
715}
716
717/*
718 * srpt_ch_rcq_hdlr()
719 */
720static void
721srpt_ch_rcq_hdlr(ibt_cq_hdl_t cq_hdl, void *arg)
722{
723	ibt_status_t		status;
724	srpt_channel_t		*ch = arg;
725	ibt_wc_t		wc[SRPT_RECV_WC_POLL_SIZE];
726	ibt_wc_t		*wcp;
727	int			i;
728	uint32_t		entries;
729	srpt_iu_t		*iu;
730	uint_t			cq_rearmed = 0;
731
732	/*
733	 * The channel object will exists while the CQ handler call-back
734	 * is installed.
735	 */
736	ASSERT(ch != NULL);
737	srpt_ch_add_ref(ch);
738
739	/*
740	 * If we know a channel disconnect has started do nothing
741	 * and let channel cleanup code recover resources from the CQ.
742	 * We are not concerned about races with the state transition
743	 * since the code will do the correct thing either way. This
744	 * is simply to circumvent rearming the CQ, and it will
745	 * catch the state next time.
746	 */
747	rw_enter(&ch->ch_rwlock, RW_READER);
748	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
749		SRPT_DPRINTF_L2("ch_rcq_hdlr, channel disconnecting");
750		rw_exit(&ch->ch_rwlock);
751		srpt_ch_release_ref(ch, 0);
752		return;
753	}
754	rw_exit(&ch->ch_rwlock);
755
756	for (;;) {
757		status = ibt_poll_cq(cq_hdl, &wc[0], SRPT_RECV_WC_POLL_SIZE,
758		    &entries);
759		if (status == IBT_CQ_EMPTY) {
760			/*
761			 * OK, empty, if we have not rearmed the CQ
762			 * do so, and poll to eliminate race; otherwise
763			 * we are done.
764			 */
765			if (cq_rearmed == 0) {
766				ibt_enable_cq_notify(ch->ch_rcq_hdl,
767				    IBT_NEXT_COMPLETION);
768				cq_rearmed = 1;
769				continue;
770			} else {
771				break;
772			}
773		} else if (status != IBT_SUCCESS) {
774			/*
775			 * This error should not happen, it indicates something
776			 * abnormal has gone wrong and represents either a
777			 * hardware or programming logic coding error.
778			 */
779			SRPT_DPRINTF_L2("ch_rcq_hdlr, unexpected CQ err(%d)",
780			    status);
781			srpt_ch_disconnect(ch);
782			break;
783		}
784
785		for (wcp = wc, i = 0; i < entries; i++, wcp++) {
786
787			/*
788			 *  Check wc_status before proceeding.  If the
789			 *  status indicates a channel problem, stop processing.
790			 */
791			if (wcp->wc_status != IBT_WC_SUCCESS) {
792				if (wcp->wc_status == IBT_WC_WR_FLUSHED_ERR) {
793					SRPT_DPRINTF_L2(
794					    "ch_rcq, unexpected"
795					    " wc_status err(%d)",
796					    wcp->wc_status);
797					srpt_ch_disconnect(ch);
798					/* XXX - verify not leaking IUs */
799					goto done;
800				} else {
801					/* skip IUs with errors */
802					SRPT_DPRINTF_L2(
803					    "ch_rcq, ERROR comp(%d)",
804					    wcp->wc_status);
805					/* XXX - verify not leaking IUs */
806					continue;
807				}
808			}
809
810			iu = (srpt_iu_t *)(uintptr_t)wcp->wc_id;
811			ASSERT(iu != NULL);
812
813			/*
814			 * Process the IU.
815			 */
816			ASSERT(wcp->wc_type == IBT_WRC_RECV);
817			srpt_ch_process_iu(ch, iu);
818		}
819	}
820
821done:
822	srpt_ch_release_ref(ch, 0);
823}
824
825/*
826 * srpt_ch_srp_cmd()
827 */
828static int
829srpt_ch_srp_cmd(srpt_channel_t *ch, srpt_iu_t *iu)
830{
831	srp_cmd_req_t		*cmd = (srp_cmd_req_t *)iu->iu_buf;
832	srp_indirect_desc_t	*i_desc;
833	uint_t			i_di_cnt;
834	uint_t			i_do_cnt;
835	uint8_t			do_fmt;
836	uint8_t			di_fmt;
837	uint32_t		*cur_desc_off;
838	int			i;
839	ibt_status_t		status;
840	uint8_t			addlen;
841
842
843	DTRACE_SRP_2(task__command, srpt_channel_t, ch, srp_cmd_req_t, cmd);
844	iu->iu_ch  = ch;
845	iu->iu_tag = cmd->cr_tag;
846
847	/*
848	 * The SRP specification and SAM require support for bi-directional
849	 * data transfer, so we create a single buffer descriptor list that
850	 * in the IU buffer that covers the data-in and data-out buffers.
851	 * In practice we will just see unidirectional transfers with either
852	 * data-in or data out descriptors.  If we were to take that as fact,
853	 * we could reduce overhead slightly.
854	 */
855
856	/*
857	 * additional length is a 6-bit number in 4-byte words, so multiply by 4
858	 * to get bytes.
859	 */
860	addlen = cmd->cr_add_cdb_len & 0x3f;	/* mask off 6 bits */
861
862	cur_desc_off = (uint32_t *)(void *)&cmd->cr_add_data;
863	cur_desc_off  += addlen;		/* 32-bit arithmetic */
864	iu->iu_num_rdescs = 0;
865	iu->iu_rdescs = (srp_direct_desc_t *)(void *)cur_desc_off;
866
867	/*
868	 * Examine buffer description for Data In (i.e. data flows
869	 * to the initiator).
870	 */
871	i_do_cnt = i_di_cnt = 0;
872	di_fmt = cmd->cr_buf_fmt >> 4;
873	if (di_fmt == SRP_DATA_DESC_DIRECT) {
874		iu->iu_num_rdescs = 1;
875		cur_desc_off = (uint32_t *)(void *)&iu->iu_rdescs[1];
876	} else if (di_fmt == SRP_DATA_DESC_INDIRECT) {
877		i_desc = (srp_indirect_desc_t *)iu->iu_rdescs;
878		i_di_cnt  = b2h32(i_desc->id_table.dd_len) /
879		    sizeof (srp_direct_desc_t);
880
881		/*
882		 * Some initiators like OFED occasionally use the wrong counts,
883		 * so check total to allow for this.  NOTE: we do not support
884		 * reading of the descriptor table from the initiator, so if
885		 * not all descriptors are in the IU we drop the task.
886		 */
887		if (i_di_cnt > (cmd->cr_dicnt + cmd->cr_docnt)) {
888			SRPT_DPRINTF_L2("ch_srp_cmd, remote RDMA of"
889			    " descriptors not supported");
890			SRPT_DPRINTF_L2("ch_srp_cmd, sizeof entry (%d),"
891			    " i_di_cnt(%d), cr_dicnt(%d)",
892			    (uint_t)sizeof (srp_direct_desc_t),
893			    i_di_cnt, cmd->cr_dicnt);
894			iu->iu_rdescs = NULL;
895			return (1);
896		}
897		bcopy(&i_desc->id_desc[0], iu->iu_rdescs,
898		    sizeof (srp_direct_desc_t) * i_di_cnt);
899		iu->iu_num_rdescs += i_di_cnt;
900		cur_desc_off = (uint32_t *)(void *)&i_desc->id_desc[i_di_cnt];
901	}
902
903	/*
904	 * Examine buffer description for Data Out (i.e. data flows
905	 * from the initiator).
906	 */
907	do_fmt = cmd->cr_buf_fmt & 0x0F;
908	if (do_fmt == SRP_DATA_DESC_DIRECT) {
909		if (di_fmt == SRP_DATA_DESC_DIRECT) {
910			bcopy(cur_desc_off, &iu->iu_rdescs[iu->iu_num_rdescs],
911			    sizeof (srp_direct_desc_t));
912		}
913		iu->iu_num_rdescs++;
914	} else if (do_fmt == SRP_DATA_DESC_INDIRECT) {
915		i_desc = (srp_indirect_desc_t *)cur_desc_off;
916		i_do_cnt  = b2h32(i_desc->id_table.dd_len) /
917		    sizeof (srp_direct_desc_t);
918
919		/*
920		 * Some initiators like OFED occasionally use the wrong counts,
921		 * so check total to allow for this.  NOTE: we do not support
922		 * reading of the descriptor table from the initiator, so if
923		 * not all descriptors are in the IU we drop the task.
924		 */
925		if ((i_di_cnt + i_do_cnt) > (cmd->cr_dicnt + cmd->cr_docnt)) {
926			SRPT_DPRINTF_L2("ch_srp_cmd, remote RDMA of"
927			    " descriptors not supported");
928			SRPT_DPRINTF_L2("ch_srp_cmd, sizeof entry (%d),"
929			    " i_do_cnt(%d), cr_docnt(%d)",
930			    (uint_t)sizeof (srp_direct_desc_t),
931			    i_do_cnt, cmd->cr_docnt);
932			iu->iu_rdescs = 0;
933			return (1);
934		}
935		bcopy(&i_desc->id_desc[0], &iu->iu_rdescs[iu->iu_num_rdescs],
936		    sizeof (srp_direct_desc_t) * i_do_cnt);
937		iu->iu_num_rdescs += i_do_cnt;
938	}
939
940	iu->iu_tot_xfer_len = 0;
941	for (i = 0; i < iu->iu_num_rdescs; i++) {
942		iu->iu_rdescs[i].dd_vaddr = b2h64(iu->iu_rdescs[i].dd_vaddr);
943		iu->iu_rdescs[i].dd_hdl   = b2h32(iu->iu_rdescs[i].dd_hdl);
944		iu->iu_rdescs[i].dd_len   = b2h32(iu->iu_rdescs[i].dd_len);
945		iu->iu_tot_xfer_len += iu->iu_rdescs[i].dd_len;
946	}
947
948#ifdef DEBUG
949	if (srpt_errlevel >= SRPT_LOG_L4) {
950		SRPT_DPRINTF_L4("ch_srp_cmd, iu->iu_tot_xfer_len (%d)",
951		    iu->iu_tot_xfer_len);
952		for (i = 0; i < iu->iu_num_rdescs; i++) {
953			SRPT_DPRINTF_L4("ch_srp_cmd, rdescs[%d].dd_vaddr"
954			    " (0x%08llx)",
955			    i, (u_longlong_t)iu->iu_rdescs[i].dd_vaddr);
956			SRPT_DPRINTF_L4("ch_srp_cmd, rdescs[%d].dd_hdl"
957			    " (0x%08x)", i, iu->iu_rdescs[i].dd_hdl);
958			SRPT_DPRINTF_L4("ch_srp_cmd, rdescs[%d].dd_len (%d)",
959			    i, iu->iu_rdescs[i].dd_len);
960		}
961		SRPT_DPRINTF_L4("ch_srp_cmd, LUN (0x%08lx)",
962		    (unsigned long int) *((uint64_t *)(void *) cmd->cr_lun));
963	}
964#endif
965	rw_enter(&ch->ch_rwlock, RW_READER);
966
967	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
968		/*
969		 * The channel has begun disconnecting, so ignore the
970		 * the command returning the IU resources.
971		 */
972		rw_exit(&ch->ch_rwlock);
973		return (1);
974	}
975
976	/*
977	 * Once a SCSI task is allocated and assigned to the IU, it
978	 * owns those IU resources, which will be held until STMF
979	 * is notified the task is done (from a lport perspective).
980	 */
981	iu->iu_stmf_task = stmf_task_alloc(ch->ch_tgt->tp_lport,
982	    ch->ch_session->ss_ss, cmd->cr_lun,
983	    SRP_CDB_SIZE + (addlen * 4), 0);
984	if (iu->iu_stmf_task == NULL) {
985		/*
986		 * Could not allocate, return status to the initiator
987		 * indicating that we are temporarily unable to process
988		 * commands.  If unable to send, immediately return IU
989		 * resource.
990		 */
991		SRPT_DPRINTF_L2("ch_srp_cmd, SCSI task allocation failure");
992		rw_exit(&ch->ch_rwlock);
993		mutex_enter(&iu->iu_lock);
994		status = srpt_stp_send_response(iu, STATUS_BUSY, 0, 0, 0,
995		    NULL, SRPT_NO_FENCE_SEND);
996		mutex_exit(&iu->iu_lock);
997		if (status != IBT_SUCCESS) {
998			SRPT_DPRINTF_L2("ch_srp_cmd, error(%d) posting error"
999			    " response", status);
1000			return (1);
1001		} else {
1002			return (0);
1003		}
1004	}
1005
1006	iu->iu_stmf_task->task_port_private = iu;
1007	iu->iu_stmf_task->task_flags = 0;
1008
1009	if (di_fmt != 0) {
1010		iu->iu_stmf_task->task_flags |= TF_WRITE_DATA;
1011	}
1012	if (do_fmt != 0) {
1013		iu->iu_stmf_task->task_flags |= TF_READ_DATA;
1014	}
1015
1016	switch (cmd->cr_task_attr) {
1017	case SRP_TSK_ATTR_QTYPE_SIMPLE:
1018		iu->iu_stmf_task->task_flags |=	TF_ATTR_SIMPLE_QUEUE;
1019		break;
1020
1021	case SRP_TSK_ATTR_QTYPE_HEAD_OF_Q:
1022		iu->iu_stmf_task->task_flags |=	TF_ATTR_HEAD_OF_QUEUE;
1023		break;
1024
1025	case SRP_TSK_ATTR_QTYPE_ORDERED:
1026		iu->iu_stmf_task->task_flags |=	TF_ATTR_ORDERED_QUEUE;
1027		break;
1028
1029	case SRP_TSK_ATTR_QTYPE_ACA_Q_TAG:
1030		iu->iu_stmf_task->task_flags |=	TF_ATTR_ACA;
1031		break;
1032
1033	default:
1034		SRPT_DPRINTF_L2("ch_srp_cmd, reserved task attr (%d)",
1035		    cmd->cr_task_attr);
1036		iu->iu_stmf_task->task_flags |=	TF_ATTR_ORDERED_QUEUE;
1037		break;
1038	}
1039	iu->iu_stmf_task->task_additional_flags = 0;
1040	iu->iu_stmf_task->task_priority		= 0;
1041	iu->iu_stmf_task->task_mgmt_function    = TM_NONE;
1042	iu->iu_stmf_task->task_max_nbufs	= STMF_BUFS_MAX;
1043	iu->iu_stmf_task->task_expected_xfer_length = iu->iu_tot_xfer_len;
1044	iu->iu_stmf_task->task_csn_size		= 0;
1045
1046	bcopy(cmd->cr_cdb, iu->iu_stmf_task->task_cdb,
1047	    SRP_CDB_SIZE);
1048	if (addlen != 0) {
1049		bcopy(&cmd->cr_add_data,
1050		    iu->iu_stmf_task->task_cdb + SRP_CDB_SIZE,
1051		    addlen * 4);
1052	}
1053
1054	/*
1055	 * Add the IU/task to the session and post to STMF.  The task will
1056	 * remain in the session's list until STMF is informed by SRP that
1057	 * it is done with the task.
1058	 */
1059	DTRACE_SRP_3(scsi__command, srpt_channel_t, iu->iu_ch,
1060	    scsi_task_t, iu->iu_stmf_task, srp_cmd_req_t, cmd);
1061	srpt_stp_add_task(ch->ch_session, iu);
1062
1063	SRPT_DPRINTF_L3("ch_srp_cmd, new task (%p) posted",
1064	    (void *)iu->iu_stmf_task);
1065	stmf_post_task(iu->iu_stmf_task, NULL);
1066	rw_exit(&ch->ch_rwlock);
1067
1068	return (0);
1069}
1070
1071/*
1072 * srpt_ch_task_mgmt_abort()
1073 *
1074 * Returns 0 on success, indicating we've sent a management response.
1075 * Returns !0 to indicate failure; the IU should be reposted.
1076 */
1077static ibt_status_t
1078srpt_ch_task_mgmt_abort(srpt_channel_t *ch, srpt_iu_t *iu,
1079	uint64_t tag_to_abort)
1080{
1081	srpt_session_t	*session = ch->ch_session;
1082	srpt_iu_t	*ss_iu;
1083	ibt_status_t	status;
1084
1085	/*
1086	 * Locate the associated task (tag_to_abort) in the
1087	 * session's active task list.
1088	 */
1089	rw_enter(&session->ss_rwlock, RW_READER);
1090	ss_iu = list_head(&session->ss_task_list);
1091	while (ss_iu != NULL) {
1092		mutex_enter(&ss_iu->iu_lock);
1093		if ((tag_to_abort == ss_iu->iu_tag)) {
1094			mutex_exit(&ss_iu->iu_lock);
1095			break;
1096		}
1097		mutex_exit(&ss_iu->iu_lock);
1098		ss_iu = list_next(&session->ss_task_list, ss_iu);
1099	}
1100	rw_exit(&session->ss_rwlock);
1101
1102	/*
1103	 * Take appropriate action based on state of task
1104	 * to be aborted:
1105	 * 1) No longer exists - do nothing.
1106	 * 2) Previously aborted or status queued - do nothing.
1107	 * 3) Otherwise - initiate abort.
1108	 */
1109	if (ss_iu == NULL)  {
1110		goto send_mgmt_resp;
1111	}
1112
1113	mutex_enter(&ss_iu->iu_lock);
1114	if ((ss_iu->iu_flags & (SRPT_IU_STMF_ABORTING |
1115	    SRPT_IU_ABORTED | SRPT_IU_RESP_SENT)) != 0) {
1116		mutex_exit(&ss_iu->iu_lock);
1117		goto send_mgmt_resp;
1118	}
1119
1120	/*
1121	 * Set aborting flag and notify STMF of abort request.  No
1122	 * additional I/O will be queued for this IU.
1123	 */
1124	SRPT_DPRINTF_L3("ch_task_mgmt_abort, task found");
1125	ss_iu->iu_flags |= SRPT_IU_SRP_ABORTING;
1126	mutex_exit(&ss_iu->iu_lock);
1127	stmf_abort(STMF_QUEUE_TASK_ABORT,
1128	    ss_iu->iu_stmf_task, STMF_ABORTED, NULL);
1129
1130send_mgmt_resp:
1131	mutex_enter(&iu->iu_lock);
1132	status = srpt_stp_send_mgmt_response(iu, SRP_TM_SUCCESS,
1133	    SRPT_FENCE_SEND);
1134	mutex_exit(&iu->iu_lock);
1135
1136	if (status != IBT_SUCCESS) {
1137		SRPT_DPRINTF_L2("ch_task_mgmt_abort, err(%d)"
1138		    " posting abort response", status);
1139	}
1140
1141	return (status);
1142}
1143
1144/*
1145 * srpt_ch_srp_task_mgmt()
1146 */
1147static int
1148srpt_ch_srp_task_mgmt(srpt_channel_t *ch, srpt_iu_t *iu)
1149{
1150	srp_tsk_mgmt_t		*tsk = (srp_tsk_mgmt_t *)iu->iu_buf;
1151	uint8_t			tm_fn;
1152	ibt_status_t		status;
1153
1154	SRPT_DPRINTF_L3("ch_srp_task_mgmt, SRP TASK MGMT func(%d)",
1155	    tsk->tm_function);
1156
1157	/*
1158	 * Both tag and lun fileds have the same corresponding offsets
1159	 * in both srp_tsk_mgmt_t and srp_cmd_req_t structures.  The
1160	 * casting will allow us to use the same dtrace translator.
1161	 */
1162	DTRACE_SRP_2(task__command, srpt_channel_t, ch,
1163	    srp_cmd_req_t, (srp_cmd_req_t *)tsk);
1164
1165	iu->iu_ch  = ch;
1166	iu->iu_tag = tsk->tm_tag;
1167
1168	/*
1169	 * Task management aborts are processed directly by the SRP driver;
1170	 * all other task management requests are handed off to STMF.
1171	 */
1172	switch (tsk->tm_function) {
1173	case SRP_TSK_MGMT_ABORT_TASK:
1174		/*
1175		 * Initiate SCSI transport protocol specific task abort
1176		 * logic.
1177		 */
1178		status = srpt_ch_task_mgmt_abort(ch, iu, tsk->tm_task_tag);
1179		if (status != IBT_SUCCESS) {
1180			/* repost this IU */
1181			return (1);
1182		} else {
1183			return (0);
1184		}
1185
1186	case SRP_TSK_MGMT_ABORT_TASK_SET:
1187		tm_fn = TM_ABORT_TASK_SET;
1188		break;
1189
1190	case SRP_TSK_MGMT_CLEAR_TASK_SET:
1191		tm_fn = TM_CLEAR_TASK_SET;
1192		break;
1193
1194	case SRP_TSK_MGMT_LUN_RESET:
1195		tm_fn = TM_LUN_RESET;
1196		break;
1197
1198	case SRP_TSK_MGMT_CLEAR_ACA:
1199		tm_fn = TM_CLEAR_ACA;
1200		break;
1201
1202	default:
1203		/*
1204		 * SRP does not support the requested task management
1205		 * function; return a not supported status in the response.
1206		 */
1207		SRPT_DPRINTF_L2("ch_srp_task_mgmt, SRP task mgmt fn(%d)"
1208		    " not supported", tsk->tm_function);
1209		mutex_enter(&iu->iu_lock);
1210		status = srpt_stp_send_mgmt_response(iu,
1211		    SRP_TM_NOT_SUPPORTED, SRPT_NO_FENCE_SEND);
1212		mutex_exit(&iu->iu_lock);
1213		if (status != IBT_SUCCESS) {
1214			SRPT_DPRINTF_L2("ch_srp_task_mgmt, err(%d) posting"
1215			    " response", status);
1216			return (1);
1217		}
1218		return (0);
1219	}
1220
1221	rw_enter(&ch->ch_rwlock, RW_READER);
1222	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
1223		/*
1224		 * The channel has begun disconnecting, so ignore the
1225		 * the command returning the IU resources.
1226		 */
1227		rw_exit(&ch->ch_rwlock);
1228		return (1);
1229	}
1230
1231	/*
1232	 * Once a SCSI mgmt task is allocated and assigned to the IU, it
1233	 * owns those IU resources, which will be held until we inform
1234	 * STMF that we are done with the task (from an lports perspective).
1235	 */
1236	iu->iu_stmf_task = stmf_task_alloc(ch->ch_tgt->tp_lport,
1237	    ch->ch_session->ss_ss, tsk->tm_lun, 0, STMF_TASK_EXT_NONE);
1238	if (iu->iu_stmf_task == NULL) {
1239		/*
1240		 * Could not allocate, return status to the initiator
1241		 * indicating that we are temporarily unable to process
1242		 * commands.  If unable to send, immediately return IU
1243		 * resource.
1244		 */
1245		SRPT_DPRINTF_L2("ch_srp_task_mgmt, SCSI task allocation"
1246		    " failure");
1247		rw_exit(&ch->ch_rwlock);
1248		mutex_enter(&iu->iu_lock);
1249		status = srpt_stp_send_response(iu, STATUS_BUSY, 0, 0, 0,
1250		    NULL, SRPT_NO_FENCE_SEND);
1251		mutex_exit(&iu->iu_lock);
1252		if (status != IBT_SUCCESS) {
1253			SRPT_DPRINTF_L2("ch_srp_task_mgmt, err(%d) posting"
1254			    "busy response", status);
1255			/* repost the IU */
1256			return (1);
1257		}
1258		return (0);
1259	}
1260
1261	iu->iu_stmf_task->task_port_private = iu;
1262	iu->iu_stmf_task->task_flags = 0;
1263	iu->iu_stmf_task->task_additional_flags =
1264	    TASK_AF_NO_EXPECTED_XFER_LENGTH;
1265	iu->iu_stmf_task->task_priority = 0;
1266	iu->iu_stmf_task->task_mgmt_function = tm_fn;
1267	iu->iu_stmf_task->task_max_nbufs = STMF_BUFS_MAX;
1268	iu->iu_stmf_task->task_expected_xfer_length = 0;
1269	iu->iu_stmf_task->task_csn_size = 0;
1270
1271	/*
1272	 * Add the IU/task to the session and post to STMF.  The task will
1273	 * remain in the session's list until STMF is informed by SRP that
1274	 * it is done with the task.
1275	 */
1276	srpt_stp_add_task(ch->ch_session, iu);
1277
1278	SRPT_DPRINTF_L3("ch_srp_task_mgmt, new mgmt task(%p) posted",
1279	    (void *)iu->iu_stmf_task);
1280	stmf_post_task(iu->iu_stmf_task, NULL);
1281	rw_exit(&ch->ch_rwlock);
1282
1283	return (0);
1284}
1285
1286/*
1287 * srpt_ch_process_iu()
1288 */
1289static void
1290srpt_ch_process_iu(srpt_channel_t *ch, srpt_iu_t *iu)
1291{
1292	srpt_iu_data_t	*iud;
1293	int		status = 1;
1294
1295	/*
1296	 * IU adds reference to channel which will represent a
1297	 * a reference by STMF.  If for whatever reason the IU
1298	 * is not handed off to STMF, then this reference will be
1299	 * released.  Otherwise, the reference will be released when
1300	 * SRP informs STMF that the associated SCSI task is done.
1301	 */
1302	srpt_ch_add_ref(ch);
1303
1304	/*
1305	 * Validate login RC channel state. Normally active, if
1306	 * not active then we need to handle a possible race between the
1307	 * receipt of a implied RTU and CM calling back to notify of the
1308	 * state transition.
1309	 */
1310	rw_enter(&ch->ch_rwlock, RW_READER);
1311	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
1312		rw_exit(&ch->ch_rwlock);
1313		goto repost_iu;
1314	}
1315	rw_exit(&ch->ch_rwlock);
1316
1317	iud = iu->iu_buf;
1318
1319	switch (iud->rx_iu.srp_op) {
1320	case SRP_IU_CMD:
1321		status = srpt_ch_srp_cmd(ch, iu);
1322		break;
1323
1324	case SRP_IU_TASK_MGMT:
1325		status = srpt_ch_srp_task_mgmt(ch, iu);
1326		return;
1327
1328	case SRP_IU_I_LOGOUT:
1329		SRPT_DPRINTF_L3("ch_process_iu, SRP INITIATOR LOGOUT");
1330		/*
1331		 * Initiators should logout by issuing a CM disconnect
1332		 * request (DREQ) with the logout IU in the private data;
1333		 * however some initiators have been known to send the
1334		 * IU in-band, if this happens just initiate the logout.
1335		 * Note that we do not return a response as per the
1336		 * specification.
1337		 */
1338		srpt_stp_logout(ch);
1339		break;
1340
1341	case SRP_IU_AER_RSP:
1342	case SRP_IU_CRED_RSP:
1343	default:
1344		/*
1345		 * We don't send asynchronous events or ask for credit
1346		 * adjustments, so nothing need be done.  Log we got an
1347		 * unexpected IU but then just repost the IU to the SRQ.
1348		 */
1349		SRPT_DPRINTF_L2("ch_process_iu, invalid IU from initiator,"
1350		    " IU opcode(%d)", iud->rx_iu.srp_op);
1351		break;
1352	}
1353
1354	if (status == 0) {
1355		return;
1356	}
1357
1358repost_iu:
1359	SRPT_DPRINTF_L4("process_iu:  reposting iu %p", (void *)iu);
1360	mutex_enter(&iu->iu_lock);
1361	srpt_ioc_repost_recv_iu(iu->iu_ioc, iu);
1362	mutex_exit(&iu->iu_lock);
1363	srpt_ch_release_ref(ch, 0);
1364}
1365
1366/*
1367 * srpt_ch_post_send
1368 */
1369ibt_status_t
1370srpt_ch_post_send(srpt_channel_t *ch, srpt_iu_t *iu, uint32_t len,
1371	uint_t fence)
1372{
1373	ibt_status_t		status;
1374	ibt_send_wr_t		wr;
1375	ibt_wr_ds_t		ds;
1376	uint_t			posted;
1377
1378	ASSERT(ch != NULL);
1379	ASSERT(iu != NULL);
1380	ASSERT(mutex_owned(&iu->iu_lock));
1381
1382	rw_enter(&ch->ch_rwlock, RW_READER);
1383	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
1384		rw_exit(&ch->ch_rwlock);
1385		SRPT_DPRINTF_L2("ch_post_send, bad ch state (%d)",
1386		    ch->ch_state);
1387		return (IBT_FAILURE);
1388	}
1389	rw_exit(&ch->ch_rwlock);
1390
1391	wr.wr_id = srpt_ch_alloc_swqe_wrid(ch, SRPT_SWQE_TYPE_RESP,
1392	    (void *)iu);
1393	if (wr.wr_id == 0) {
1394		SRPT_DPRINTF_L2("ch_post_send, queue full");
1395		return (IBT_FAILURE);
1396	}
1397
1398	atomic_inc_32(&iu->iu_sq_posted_cnt);
1399
1400	wr.wr_flags = IBT_WR_SEND_SIGNAL;
1401	if (fence == SRPT_FENCE_SEND) {
1402		wr.wr_flags |= IBT_WR_SEND_FENCE;
1403	}
1404	wr.wr_opcode = IBT_WRC_SEND;
1405	wr.wr_trans  = IBT_RC_SRV;
1406	wr.wr_nds = 1;
1407	wr.wr_sgl = &ds;
1408
1409	ds.ds_va = iu->iu_sge.ds_va;
1410	ds.ds_key = iu->iu_sge.ds_key;
1411	ds.ds_len = len;
1412
1413	SRPT_DPRINTF_L4("ch_post_send, posting SRP response to channel"
1414	    " ds.ds_va (0x%16llx), ds.ds_key (0x%08x), "
1415	    " ds.ds_len (%d)",
1416	    (u_longlong_t)ds.ds_va, ds.ds_key, ds.ds_len);
1417
1418	status = ibt_post_send(ch->ch_chan_hdl, &wr, 1, &posted);
1419	if (status != IBT_SUCCESS) {
1420		SRPT_DPRINTF_L2("ch_post_send, post_send failed (%d)",
1421		    status);
1422		atomic_dec_32(&iu->iu_sq_posted_cnt);
1423		srpt_ch_free_swqe_wrid(ch, wr.wr_id);
1424		return (status);
1425	}
1426
1427	return (IBT_SUCCESS);
1428}
1429
1430/*
1431 * srpt_ch_alloc_swqe_wrid()
1432 */
1433ibt_wrid_t
1434srpt_ch_alloc_swqe_wrid(srpt_channel_t *ch,
1435	srpt_swqe_type_t wqe_type, void *addr)
1436{
1437	ibt_wrid_t	wrid;
1438
1439	mutex_enter(&ch->ch_swqe_lock);
1440	if (ch->ch_head == ch->ch_tail) {
1441		mutex_exit(&ch->ch_swqe_lock);
1442		return ((ibt_wrid_t)0);
1443	}
1444	wrid = (ibt_wrid_t)ch->ch_head;
1445	ch->ch_swqe[ch->ch_head].sw_type = wqe_type;
1446	ch->ch_swqe[ch->ch_head].sw_addr = addr;
1447	ch->ch_head = ch->ch_swqe[ch->ch_head].sw_next;
1448	ch->ch_swqe_posted++;
1449	mutex_exit(&ch->ch_swqe_lock);
1450	return (wrid);
1451}
1452
1453/*
1454 * srpt_ch_free_swqe_wrid()
1455 */
1456void
1457srpt_ch_free_swqe_wrid(srpt_channel_t *ch, ibt_wrid_t id)
1458{
1459	mutex_enter(&ch->ch_swqe_lock);
1460	ch->ch_swqe[ch->ch_tail].sw_next = id;
1461	ch->ch_tail = (uint32_t)id;
1462	ch->ch_swqe_posted--;
1463	mutex_exit(&ch->ch_swqe_lock);
1464}
1465