1/* $FreeBSD$ */
2/*-
3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include "icl_iser.h"
28
29static MALLOC_DEFINE(M_ISER_INITIATOR, "iser_initiator", "iser initiator backend");
30
31/* Register user buffer memory and initialize passive rdma
32 *  dto descriptor. Data size is stored in
33 *  task->data[ISER_DIR_IN].data_len, Protection size
34 *  os stored in task->prot[ISER_DIR_IN].data_len
35 */
36static int
37iser_prepare_read_cmd(struct icl_iser_pdu *iser_pdu)
38{
39	struct iser_hdr *hdr = &iser_pdu->desc.iser_header;
40	struct iser_data_buf *buf_in = &iser_pdu->data[ISER_DIR_IN];
41	struct iser_mem_reg *mem_reg;
42	int err;
43
44	err = iser_dma_map_task_data(iser_pdu,
45				     buf_in,
46				     ISER_DIR_IN,
47				     DMA_FROM_DEVICE);
48	if (err)
49		return (err);
50
51	err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_IN);
52	if (err) {
53		ISER_ERR("Failed to set up Data-IN RDMA");
54		return (err);
55	}
56
57	mem_reg = &iser_pdu->rdma_reg[ISER_DIR_IN];
58
59	hdr->flags    |= ISER_RSV;
60	hdr->read_stag = cpu_to_be32(mem_reg->rkey);
61	hdr->read_va   = cpu_to_be64(mem_reg->sge.addr);
62
63	return (0);
64}
65
66/* Register user buffer memory and initialize passive rdma
67 *  dto descriptor. Data size is stored in
68 *  task->data[ISER_DIR_OUT].data_len, Protection size
69 *  is stored at task->prot[ISER_DIR_OUT].data_len
70 */
71static int
72iser_prepare_write_cmd(struct icl_iser_pdu *iser_pdu)
73{
74	struct iser_hdr *hdr = &iser_pdu->desc.iser_header;
75	struct iser_data_buf *buf_out = &iser_pdu->data[ISER_DIR_OUT];
76	struct iser_mem_reg *mem_reg;
77	int err;
78
79	err = iser_dma_map_task_data(iser_pdu,
80				     buf_out,
81				     ISER_DIR_OUT,
82				     DMA_TO_DEVICE);
83	if (err)
84		return (err);
85
86	err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_OUT);
87	if (err) {
88		ISER_ERR("Failed to set up Data-out RDMA");
89		return (err);
90	}
91
92	mem_reg = &iser_pdu->rdma_reg[ISER_DIR_OUT];
93
94	hdr->flags     |= ISER_WSV;
95	hdr->write_stag = cpu_to_be32(mem_reg->rkey);
96	hdr->write_va   = cpu_to_be64(mem_reg->sge.addr);
97
98	return (0);
99}
100
101/* creates a new tx descriptor and adds header regd buffer */
102void
103iser_create_send_desc(struct iser_conn *iser_conn,
104		      struct iser_tx_desc *tx_desc)
105{
106	struct iser_device *device = iser_conn->ib_conn.device;
107
108	ib_dma_sync_single_for_cpu(device->ib_device,
109		tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
110
111	memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
112	tx_desc->iser_header.flags = ISER_VER;
113
114	tx_desc->num_sge = 1;
115
116	if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
117		tx_desc->tx_sg[0].lkey = device->mr->lkey;
118		ISER_DBG("sdesc %p lkey mismatch, fixing", tx_desc);
119	}
120}
121
122void
123iser_free_login_buf(struct iser_conn *iser_conn)
124{
125	struct iser_device *device = iser_conn->ib_conn.device;
126
127	if (!iser_conn->login_buf)
128		return;
129
130	if (iser_conn->login_req_dma)
131		ib_dma_unmap_single(device->ib_device,
132				    iser_conn->login_req_dma,
133				    ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
134
135	if (iser_conn->login_resp_dma)
136		ib_dma_unmap_single(device->ib_device,
137				    iser_conn->login_resp_dma,
138				    ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
139
140	free(iser_conn->login_buf, M_ISER_INITIATOR);
141
142	/* make sure we never redo any unmapping */
143	iser_conn->login_req_dma = 0;
144	iser_conn->login_resp_dma = 0;
145	iser_conn->login_buf = NULL;
146}
147
148int
149iser_alloc_login_buf(struct iser_conn *iser_conn)
150{
151	struct iser_device *device = iser_conn->ib_conn.device;
152	int req_err, resp_err;
153
154	BUG_ON(device == NULL);
155
156	iser_conn->login_buf = malloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE,
157				      M_ISER_INITIATOR, M_WAITOK | M_ZERO);
158
159	if (!iser_conn->login_buf)
160		goto out_err;
161
162	iser_conn->login_req_buf  = iser_conn->login_buf;
163	iser_conn->login_resp_buf = iser_conn->login_buf +
164				    ISCSI_DEF_MAX_RECV_SEG_LEN;
165
166	iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
167						     iser_conn->login_req_buf,
168						     ISCSI_DEF_MAX_RECV_SEG_LEN,
169						     DMA_TO_DEVICE);
170
171	iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
172						      iser_conn->login_resp_buf,
173						      ISER_RX_LOGIN_SIZE,
174						      DMA_FROM_DEVICE);
175
176	req_err  = ib_dma_mapping_error(device->ib_device,
177					iser_conn->login_req_dma);
178	resp_err = ib_dma_mapping_error(device->ib_device,
179					iser_conn->login_resp_dma);
180
181	if (req_err || resp_err) {
182		if (req_err)
183			iser_conn->login_req_dma = 0;
184		if (resp_err)
185			iser_conn->login_resp_dma = 0;
186		goto free_login_buf;
187	}
188
189	return (0);
190
191free_login_buf:
192	iser_free_login_buf(iser_conn);
193
194out_err:
195	ISER_DBG("unable to alloc or map login buf");
196	return (ENOMEM);
197}
198
199int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, int cmds_max)
200{
201	int i, j;
202	u64 dma_addr;
203	struct iser_rx_desc *rx_desc;
204	struct ib_sge       *rx_sg;
205	struct ib_conn *ib_conn = &iser_conn->ib_conn;
206	struct iser_device *device = ib_conn->device;
207
208	iser_conn->qp_max_recv_dtos = cmds_max;
209	iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2;
210
211	if (iser_create_fastreg_pool(ib_conn, cmds_max))
212		goto create_rdma_reg_res_failed;
213
214
215	iser_conn->num_rx_descs = cmds_max;
216	iser_conn->rx_descs = malloc(iser_conn->num_rx_descs *
217				sizeof(struct iser_rx_desc), M_ISER_INITIATOR,
218				M_WAITOK | M_ZERO);
219	if (!iser_conn->rx_descs)
220		goto rx_desc_alloc_fail;
221
222	rx_desc = iser_conn->rx_descs;
223
224	for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)  {
225		dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
226					ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
227		if (ib_dma_mapping_error(device->ib_device, dma_addr))
228			goto rx_desc_dma_map_failed;
229
230		rx_desc->dma_addr = dma_addr;
231
232		rx_sg = &rx_desc->rx_sg;
233		rx_sg->addr   = rx_desc->dma_addr;
234		rx_sg->length = ISER_RX_PAYLOAD_SIZE;
235		rx_sg->lkey   = device->mr->lkey;
236	}
237
238	iser_conn->rx_desc_head = 0;
239
240	return (0);
241
242rx_desc_dma_map_failed:
243	rx_desc = iser_conn->rx_descs;
244	for (j = 0; j < i; j++, rx_desc++)
245		ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
246				    ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
247	free(iser_conn->rx_descs, M_ISER_INITIATOR);
248	iser_conn->rx_descs = NULL;
249rx_desc_alloc_fail:
250	iser_free_fastreg_pool(ib_conn);
251create_rdma_reg_res_failed:
252	ISER_ERR("failed allocating rx descriptors / data buffers");
253
254	return (ENOMEM);
255}
256
257void
258iser_free_rx_descriptors(struct iser_conn *iser_conn)
259{
260	int i;
261	struct iser_rx_desc *rx_desc;
262	struct ib_conn *ib_conn = &iser_conn->ib_conn;
263	struct iser_device *device = ib_conn->device;
264
265	iser_free_fastreg_pool(ib_conn);
266
267	rx_desc = iser_conn->rx_descs;
268	for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)
269		ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
270				    ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
271
272	free(iser_conn->rx_descs, M_ISER_INITIATOR);
273
274	/* make sure we never redo any unmapping */
275	iser_conn->rx_descs = NULL;
276}
277
278static void
279iser_buf_to_sg(void *buf, struct iser_data_buf *data_buf)
280{
281	struct scatterlist *sg;
282	int i;
283	size_t len, tlen;
284	int offset;
285
286	tlen = data_buf->data_len;
287
288	for (i = 0; 0 < tlen; i++, tlen -= len)  {
289		sg = &data_buf->sgl[i];
290		offset = ((uintptr_t)buf) & ~PAGE_MASK;
291		len = min(PAGE_SIZE - offset, tlen);
292		sg_set_buf(sg, buf, len);
293		buf = (void *)(((u64)buf) + (u64)len);
294	}
295
296	data_buf->size = i;
297	sg_mark_end(sg);
298}
299
300
301static void
302iser_bio_to_sg(struct bio *bp, struct iser_data_buf *data_buf)
303{
304	struct scatterlist *sg;
305	int i;
306	size_t len, tlen;
307	int offset;
308
309	tlen = bp->bio_bcount;
310	offset = bp->bio_ma_offset;
311
312	for (i = 0; 0 < tlen; i++, tlen -= len) {
313		sg = &data_buf->sgl[i];
314		len = min(PAGE_SIZE - offset, tlen);
315		sg_set_page(sg, bp->bio_ma[i], len, offset);
316		offset = 0;
317	}
318
319	data_buf->size = i;
320	sg_mark_end(sg);
321}
322
323static int
324iser_csio_to_sg(struct ccb_scsiio *csio, struct iser_data_buf *data_buf)
325{
326	struct ccb_hdr *ccbh;
327	int err = 0;
328
329	ccbh = &csio->ccb_h;
330	switch ((ccbh->flags & CAM_DATA_MASK)) {
331		case CAM_DATA_BIO:
332			iser_bio_to_sg((struct bio *) csio->data_ptr, data_buf);
333			break;
334		case CAM_DATA_VADDR:
335			/*
336			 * Support KVA buffers for various scsi commands such as:
337			 *  - REPORT_LUNS
338			 *  - MODE_SENSE_6
339			 *  - INQUIRY
340			 *  - SERVICE_ACTION_IN.
341			 * The data of these commands always mapped into KVA.
342			 */
343			iser_buf_to_sg(csio->data_ptr, data_buf);
344			break;
345		default:
346			ISER_ERR("flags 0x%X unimplemented", ccbh->flags);
347			err = EINVAL;
348	}
349	return (err);
350}
351
352static inline bool
353iser_signal_comp(u8 sig_count)
354{
355	return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0);
356}
357
358int
359iser_send_command(struct iser_conn *iser_conn,
360		  struct icl_iser_pdu *iser_pdu)
361{
362	struct iser_data_buf *data_buf;
363	struct iser_tx_desc *tx_desc = &iser_pdu->desc;
364	struct iscsi_bhs_scsi_command *hdr = (struct iscsi_bhs_scsi_command *) &(iser_pdu->desc.iscsi_header);
365	struct ccb_scsiio *csio = iser_pdu->csio;
366	int err = 0;
367	u8 sig_count = ++iser_conn->ib_conn.sig_count;
368
369	/* build the tx desc regd header and add it to the tx desc dto */
370	tx_desc->type = ISCSI_TX_SCSI_COMMAND;
371	iser_create_send_desc(iser_conn, tx_desc);
372
373	if (hdr->bhssc_flags & BHSSC_FLAGS_R) {
374		data_buf = &iser_pdu->data[ISER_DIR_IN];
375	} else {
376		data_buf = &iser_pdu->data[ISER_DIR_OUT];
377	}
378
379	data_buf->sg = csio->data_ptr;
380	data_buf->data_len = csio->dxfer_len;
381
382	if (likely(csio->dxfer_len)) {
383		err = iser_csio_to_sg(csio, data_buf);
384		if (unlikely(err))
385			goto send_command_error;
386	}
387
388	if (hdr->bhssc_flags & BHSSC_FLAGS_R) {
389		err = iser_prepare_read_cmd(iser_pdu);
390		if (err)
391			goto send_command_error;
392	} else if (hdr->bhssc_flags & BHSSC_FLAGS_W) {
393		err = iser_prepare_write_cmd(iser_pdu);
394		if (err)
395			goto send_command_error;
396	}
397
398	err = iser_post_send(&iser_conn->ib_conn, tx_desc,
399			     iser_signal_comp(sig_count));
400	if (!err)
401		return (0);
402
403send_command_error:
404	ISER_ERR("iser_conn %p itt %u len %u err %d", iser_conn,
405			hdr->bhssc_initiator_task_tag,
406			hdr->bhssc_expected_data_transfer_length,
407			err);
408	return (err);
409}
410
411int
412iser_send_control(struct iser_conn *iser_conn,
413		  struct icl_iser_pdu *iser_pdu)
414{
415	struct iser_tx_desc *mdesc;
416	struct iser_device *device;
417	size_t datalen = iser_pdu->icl_pdu.ip_data_len;
418	int err;
419
420	mdesc = &iser_pdu->desc;
421
422	/* build the tx desc regd header and add it to the tx desc dto */
423	mdesc->type = ISCSI_TX_CONTROL;
424	iser_create_send_desc(iser_conn, mdesc);
425
426	device = iser_conn->ib_conn.device;
427
428	if (datalen > 0) {
429		struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
430		ib_dma_sync_single_for_cpu(device->ib_device,
431				iser_conn->login_req_dma, datalen,
432				DMA_TO_DEVICE);
433
434		ib_dma_sync_single_for_device(device->ib_device,
435			iser_conn->login_req_dma, datalen,
436			DMA_TO_DEVICE);
437
438		tx_dsg->addr    = iser_conn->login_req_dma;
439		tx_dsg->length  = datalen;
440		tx_dsg->lkey    = device->mr->lkey;
441		mdesc->num_sge = 2;
442	}
443
444	/* For login phase and discovery session we re-use the login buffer */
445	if (!iser_conn->handoff_done) {
446		err = iser_post_recvl(iser_conn);
447		if (err)
448			goto send_control_error;
449	}
450
451	err = iser_post_send(&iser_conn->ib_conn, mdesc, true);
452	if (!err)
453		return (0);
454
455send_control_error:
456	ISER_ERR("conn %p failed err %d", iser_conn, err);
457
458	return (err);
459
460}
461
462/**
463 * iser_rcv_dto_completion - recv DTO completion
464 */
465void
466iser_rcv_completion(struct iser_rx_desc *rx_desc,
467		    unsigned long rx_xfer_len,
468		    struct ib_conn *ib_conn)
469{
470	struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
471						   ib_conn);
472	struct icl_conn *ic = &iser_conn->icl_conn;
473	struct icl_pdu *response;
474	struct iscsi_bhs *hdr;
475	u64 rx_dma;
476	int rx_buflen;
477	int outstanding, count, err;
478
479	/* differentiate between login to all other PDUs */
480	if ((char *)rx_desc == iser_conn->login_resp_buf) {
481		rx_dma = iser_conn->login_resp_dma;
482		rx_buflen = ISER_RX_LOGIN_SIZE;
483	} else {
484		rx_dma = rx_desc->dma_addr;
485		rx_buflen = ISER_RX_PAYLOAD_SIZE;
486	}
487
488	ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
489				   rx_buflen, DMA_FROM_DEVICE);
490
491	hdr = &rx_desc->iscsi_header;
492
493	response = iser_new_pdu(ic, M_NOWAIT);
494	response->ip_bhs = hdr;
495	response->ip_data_len = rx_xfer_len - ISER_HEADERS_LEN;
496
497	/*
498	 * In case we got data in the receive buffer, assign the ip_data_mbuf
499	 * to the rx_buffer - later we'll copy it to upper layer buffers
500	 */
501	if (response->ip_data_len)
502		response->ip_data_mbuf = (struct mbuf *)(rx_desc->data);
503
504	ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
505				      rx_buflen, DMA_FROM_DEVICE);
506
507	/* decrementing conn->post_recv_buf_count only --after-- freeing the   *
508	 * task eliminates the need to worry on tasks which are completed in   *
509	 * parallel to the execution of iser_conn_term. So the code that waits *
510	 * for the posted rx bufs refcount to become zero handles everything   */
511	ib_conn->post_recv_buf_count--;
512
513	if (rx_dma == iser_conn->login_resp_dma)
514		goto receive;
515
516	outstanding = ib_conn->post_recv_buf_count;
517	if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
518		count = min(iser_conn->qp_max_recv_dtos - outstanding,
519			    iser_conn->min_posted_rx);
520		err = iser_post_recvm(iser_conn, count);
521		if (err)
522			ISER_ERR("posting %d rx bufs err %d", count, err);
523	}
524
525receive:
526	(ic->ic_receive)(response);
527}
528
529void
530iser_snd_completion(struct iser_tx_desc *tx_desc,
531		    struct ib_conn *ib_conn)
532{
533	struct icl_iser_pdu *iser_pdu = container_of(tx_desc, struct icl_iser_pdu, desc);
534	struct iser_conn *iser_conn = iser_pdu->iser_conn;
535
536	if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL)
537		iser_pdu_free(&iser_conn->icl_conn, &iser_pdu->icl_pdu);
538}
539