1/* $FreeBSD: stable/11/sys/dev/iser/icl_iser.c 331769 2018-03-30 18:06:29Z hselasky $ */
2/*-
3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include "icl_iser.h"
28
29SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW, 0, "iSER module");
30int iser_debug = 0;
31SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN,
32    &iser_debug, 0, "Enable iser debug messages");
33
34static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend");
35static uma_zone_t icl_pdu_zone;
36
37static volatile u_int	icl_iser_ncons;
38struct iser_global ig;
39
40static void iser_conn_release(struct icl_conn *ic);
41
42static icl_conn_new_pdu_t	iser_conn_new_pdu;
43static icl_conn_pdu_free_t	iser_conn_pdu_free;
44static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length;
45static icl_conn_pdu_append_data_t	iser_conn_pdu_append_data;
46static icl_conn_pdu_queue_t	iser_conn_pdu_queue;
47static icl_conn_handoff_t	iser_conn_handoff;
48static icl_conn_free_t		iser_conn_free;
49static icl_conn_close_t		iser_conn_close;
50static icl_conn_connect_t	iser_conn_connect;
51static icl_conn_task_setup_t	iser_conn_task_setup;
52static icl_conn_task_done_t	iser_conn_task_done;
53static icl_conn_pdu_get_data_t	iser_conn_pdu_get_data;
54
55static kobj_method_t icl_iser_methods[] = {
56	KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu),
57	KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free),
58	KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length),
59	KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data),
60	KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue),
61	KOBJMETHOD(icl_conn_handoff, iser_conn_handoff),
62	KOBJMETHOD(icl_conn_free, iser_conn_free),
63	KOBJMETHOD(icl_conn_close, iser_conn_close),
64	KOBJMETHOD(icl_conn_connect, iser_conn_connect),
65	KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup),
66	KOBJMETHOD(icl_conn_task_done, iser_conn_task_done),
67	KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data),
68	{ 0, 0 }
69};
70
71DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn));
72
73/**
74 * iser_initialize_headers() - Initialize task headers
75 * @pdu:       iser pdu
76 * @iser_conn:    iser connection
77 *
78 * Notes:
79 * This routine may race with iser teardown flow for scsi
80 * error handling TMFs. So for TMF we should acquire the
81 * state mutex to avoid dereferencing the IB device which
82 * may have already been terminated (racing teardown sequence).
83 */
84int
85iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn)
86{
87	struct iser_tx_desc *tx_desc = &pdu->desc;
88	struct iser_device *device = iser_conn->ib_conn.device;
89	u64 dma_addr;
90	int ret = 0;
91
92	dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
93				ISER_HEADERS_LEN, DMA_TO_DEVICE);
94	if (ib_dma_mapping_error(device->ib_device, dma_addr)) {
95		ret = -ENOMEM;
96		goto out;
97	}
98
99	tx_desc->mapped = true;
100	tx_desc->dma_addr = dma_addr;
101	tx_desc->tx_sg[0].addr   = tx_desc->dma_addr;
102	tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
103	tx_desc->tx_sg[0].lkey   = device->mr->lkey;
104
105out:
106
107	return (ret);
108}
109
110int
111iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
112			  const void *addr, size_t len, int flags)
113{
114	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
115
116	if (request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_LOGIN_REQUEST ||
117	    request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_TEXT_REQUEST) {
118		ISER_DBG("copy to login buff");
119		memcpy(iser_conn->login_req_buf, addr, len);
120		request->ip_data_len = len;
121	}
122
123	return (0);
124}
125
126void
127iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
128		       size_t off, void *addr, size_t len)
129{
130	/* If we have a receive data, copy it to upper layer buffer */
131	if (ip->ip_data_mbuf)
132		memcpy(addr, ip->ip_data_mbuf + off, len);
133}
134
135/*
136 * Allocate icl_pdu with empty BHS to fill up by the caller.
137 */
138struct icl_pdu *
139iser_new_pdu(struct icl_conn *ic, int flags)
140{
141	struct icl_iser_pdu *iser_pdu;
142	struct icl_pdu *ip;
143	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
144
145	iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
146	if (iser_pdu == NULL) {
147		ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu));
148		return (NULL);
149	}
150
151	iser_pdu->iser_conn = iser_conn;
152	ip = &iser_pdu->icl_pdu;
153	ip->ip_conn = ic;
154	ip->ip_bhs = &iser_pdu->desc.iscsi_header;
155
156	return (ip);
157}
158
159struct icl_pdu *
160iser_conn_new_pdu(struct icl_conn *ic, int flags)
161{
162	return (iser_new_pdu(ic, flags));
163}
164
165void
166iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
167{
168	struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
169
170	uma_zfree(icl_pdu_zone, iser_pdu);
171}
172
173size_t
174iser_conn_pdu_data_segment_length(struct icl_conn *ic,
175				  const struct icl_pdu *request)
176{
177	uint32_t len = 0;
178
179	len += request->ip_bhs->bhs_data_segment_len[0];
180	len <<= 8;
181	len += request->ip_bhs->bhs_data_segment_len[1];
182	len <<= 8;
183	len += request->ip_bhs->bhs_data_segment_len[2];
184
185	return (len);
186}
187
188void
189iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
190{
191	iser_pdu_free(ic, ip);
192}
193
194static bool
195is_control_opcode(uint8_t opcode)
196{
197	bool is_control = false;
198
199	switch (opcode & ISCSI_OPCODE_MASK) {
200		case ISCSI_BHS_OPCODE_NOP_OUT:
201		case ISCSI_BHS_OPCODE_LOGIN_REQUEST:
202		case ISCSI_BHS_OPCODE_LOGOUT_REQUEST:
203		case ISCSI_BHS_OPCODE_TEXT_REQUEST:
204			is_control = true;
205			break;
206		case ISCSI_BHS_OPCODE_SCSI_COMMAND:
207			is_control = false;
208			break;
209		default:
210			ISER_ERR("unknown opcode %d", opcode);
211	}
212
213	return (is_control);
214}
215
216void
217iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
218{
219	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
220	struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
221	int ret;
222
223	if (iser_conn->state != ISER_CONN_UP)
224		return;
225
226	ret = iser_initialize_headers(iser_pdu, iser_conn);
227	if (ret) {
228		ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu);
229		return;
230	}
231
232	if (is_control_opcode(ip->ip_bhs->bhs_opcode)) {
233		ret = iser_send_control(iser_conn, iser_pdu);
234		if (unlikely(ret))
235			ISER_ERR("Failed to send control pdu %p", iser_pdu);
236	} else {
237		ret = iser_send_command(iser_conn, iser_pdu);
238		if (unlikely(ret))
239			ISER_ERR("Failed to send command pdu %p", iser_pdu);
240	}
241}
242
243static struct icl_conn *
244iser_new_conn(const char *name, struct mtx *lock)
245{
246	struct iser_conn *iser_conn;
247	struct icl_conn *ic;
248
249	refcount_acquire(&icl_iser_ncons);
250
251	iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO);
252	if (!iser_conn) {
253		ISER_ERR("failed to allocate iser conn");
254		refcount_release(&icl_iser_ncons);
255		return (NULL);
256	}
257
258	cv_init(&iser_conn->up_cv, "iser_cv");
259	sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex");
260	mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "iser_flush_lock", NULL, MTX_DEF);
261	cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv");
262	mtx_init(&iser_conn->ib_conn.lock, "iser_lock", NULL, MTX_DEF);
263
264	ic = &iser_conn->icl_conn;
265	ic->ic_lock = lock;
266	ic->ic_name = name;
267	ic->ic_offload = strdup("iser", M_TEMP);
268	ic->ic_iser = true;
269	ic->ic_unmapped = true;
270
271	return (ic);
272}
273
274void
275iser_conn_free(struct icl_conn *ic)
276{
277	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
278
279	iser_conn_release(ic);
280	mtx_destroy(&iser_conn->ib_conn.lock);
281	cv_destroy(&iser_conn->ib_conn.beacon.flush_cv);
282	mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock);
283	sx_destroy(&iser_conn->state_mutex);
284	cv_destroy(&iser_conn->up_cv);
285	kobj_delete((struct kobj *)iser_conn, M_ICL_ISER);
286	refcount_release(&icl_iser_ncons);
287}
288
289int
290iser_conn_handoff(struct icl_conn *ic, int fd)
291{
292	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
293	int error = 0;
294
295	sx_xlock(&iser_conn->state_mutex);
296	if (iser_conn->state != ISER_CONN_UP) {
297		error = EINVAL;
298		ISER_ERR("iser_conn %p state is %d, teardown started\n",
299			 iser_conn, iser_conn->state);
300		goto out;
301	}
302
303	error = iser_alloc_rx_descriptors(iser_conn, ic->ic_maxtags);
304	if (error)
305		goto out;
306
307	error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx);
308	if (error)
309		goto post_error;
310
311	iser_conn->handoff_done = true;
312
313	sx_xunlock(&iser_conn->state_mutex);
314	return (error);
315
316post_error:
317	iser_free_rx_descriptors(iser_conn);
318out:
319	sx_xunlock(&iser_conn->state_mutex);
320	return (error);
321
322}
323
324/**
325 * Frees all conn objects
326 */
327static void
328iser_conn_release(struct icl_conn *ic)
329{
330	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
331	struct ib_conn *ib_conn = &iser_conn->ib_conn;
332	struct iser_conn *curr, *tmp;
333
334	mtx_lock(&ig.connlist_mutex);
335	/*
336	 * Search for iser connection in global list.
337	 * It may not be there in case of failure in connection establishment
338	 * stage.
339	 */
340	list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) {
341		if (iser_conn == curr) {
342			ISER_WARN("found iser_conn %p", iser_conn);
343			list_del(&iser_conn->conn_list);
344		}
345	}
346	mtx_unlock(&ig.connlist_mutex);
347
348	/*
349	 * In case we reconnecting or removing session, we need to
350	 * release IB resources (which is safe to call more than once).
351	 */
352	sx_xlock(&iser_conn->state_mutex);
353	iser_free_ib_conn_res(iser_conn, true);
354	sx_xunlock(&iser_conn->state_mutex);
355
356	if (ib_conn->cma_id != NULL) {
357		rdma_destroy_id(ib_conn->cma_id);
358		ib_conn->cma_id = NULL;
359	}
360
361}
362
363void
364iser_conn_close(struct icl_conn *ic)
365{
366	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
367
368	ISER_INFO("closing conn %p", iser_conn);
369
370	sx_xlock(&iser_conn->state_mutex);
371	/*
372	 * In case iser connection is waiting on conditional variable
373	 * (state PENDING) and we try to close it before connection establishment,
374	 * we need to signal it to continue releasing connection properly.
375	 */
376	if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING)
377		cv_signal(&iser_conn->up_cv);
378	sx_xunlock(&iser_conn->state_mutex);
379
380}
381
382int
383iser_conn_connect(struct icl_conn *ic, int domain, int socktype,
384		int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
385{
386	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
387	struct ib_conn *ib_conn = &iser_conn->ib_conn;
388	int err = 0;
389
390	iser_conn_release(ic);
391
392	sx_xlock(&iser_conn->state_mutex);
393	 /* the device is known only --after-- address resolution */
394	ib_conn->device = NULL;
395	iser_conn->handoff_done = false;
396
397	iser_conn->state = ISER_CONN_PENDING;
398
399	ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, (void *)iser_conn,
400			RDMA_PS_TCP, IB_QPT_RC);
401	if (IS_ERR(ib_conn->cma_id)) {
402		err = -PTR_ERR(ib_conn->cma_id);
403		ISER_ERR("rdma_create_id failed: %d", err);
404		goto id_failure;
405	}
406
407	err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000);
408	if (err) {
409		ISER_ERR("rdma_resolve_addr failed: %d", err);
410		if (err < 0)
411			err = -err;
412		goto addr_failure;
413	}
414
415	ISER_DBG("before cv_wait: %p", iser_conn);
416	cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex);
417	ISER_DBG("after cv_wait: %p", iser_conn);
418
419	if (iser_conn->state != ISER_CONN_UP) {
420		err = EIO;
421		goto addr_failure;
422	}
423
424	err = iser_alloc_login_buf(iser_conn);
425	if (err)
426		goto addr_failure;
427	sx_xunlock(&iser_conn->state_mutex);
428
429	mtx_lock(&ig.connlist_mutex);
430	list_add(&iser_conn->conn_list, &ig.connlist);
431	mtx_unlock(&ig.connlist_mutex);
432
433	return (0);
434
435id_failure:
436	ib_conn->cma_id = NULL;
437addr_failure:
438	sx_xunlock(&iser_conn->state_mutex);
439	return (err);
440}
441
442int
443iser_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
444		     struct ccb_scsiio *csio,
445		     uint32_t *task_tagp, void **prvp)
446{
447	struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
448
449	*prvp = ip;
450	iser_pdu->csio = csio;
451
452	return (0);
453}
454
455void
456iser_conn_task_done(struct icl_conn *ic, void *prv)
457{
458	struct icl_pdu *ip = prv;
459	struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
460	struct iser_device *device = iser_pdu->iser_conn->ib_conn.device;
461	struct iser_tx_desc *tx_desc = &iser_pdu->desc;
462
463	if (iser_pdu->dir[ISER_DIR_IN]) {
464		iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN);
465		iser_dma_unmap_task_data(iser_pdu,
466					 &iser_pdu->data[ISER_DIR_IN],
467					 DMA_FROM_DEVICE);
468	}
469
470	if (iser_pdu->dir[ISER_DIR_OUT]) {
471		iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT);
472		iser_dma_unmap_task_data(iser_pdu,
473					 &iser_pdu->data[ISER_DIR_OUT],
474					 DMA_TO_DEVICE);
475	}
476
477	if (likely(tx_desc->mapped)) {
478		ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
479				    ISER_HEADERS_LEN, DMA_TO_DEVICE);
480		tx_desc->mapped = false;
481	}
482
483	iser_pdu_free(ic, ip);
484}
485
486static int
487iser_limits(size_t *limitp)
488{
489	*limitp = 128 * 1024;
490
491	return (0);
492}
493
494static int
495icl_iser_load(void)
496{
497	int error;
498
499	ISER_DBG("Starting iSER datamover...");
500
501	icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu),
502				   NULL, NULL, NULL, NULL,
503				   UMA_ALIGN_PTR, 0);
504	/* FIXME: Check rc */
505
506	refcount_init(&icl_iser_ncons, 0);
507
508	error = icl_register("iser", true, 0, iser_limits, iser_new_conn);
509	KASSERT(error == 0, ("failed to register iser"));
510
511	memset(&ig, 0, sizeof(struct iser_global));
512
513	/* device init is called only after the first addr resolution */
514	sx_init(&ig.device_list_mutex,  "global_device_lock");
515	INIT_LIST_HEAD(&ig.device_list);
516	mtx_init(&ig.connlist_mutex, "iser_global_conn_lock", NULL, MTX_DEF);
517	INIT_LIST_HEAD(&ig.connlist);
518	sx_init(&ig.close_conns_mutex,  "global_close_conns_lock");
519
520	return (error);
521}
522
523static int
524icl_iser_unload(void)
525{
526	ISER_DBG("Removing iSER datamover...");
527
528	if (icl_iser_ncons != 0)
529		return (EBUSY);
530
531	sx_destroy(&ig.close_conns_mutex);
532	mtx_destroy(&ig.connlist_mutex);
533	sx_destroy(&ig.device_list_mutex);
534
535	icl_unregister("iser", true);
536
537	uma_zdestroy(icl_pdu_zone);
538
539	return (0);
540}
541
542static int
543icl_iser_modevent(module_t mod, int what, void *arg)
544{
545	switch (what) {
546	case MOD_LOAD:
547		return (icl_iser_load());
548	case MOD_UNLOAD:
549		return (icl_iser_unload());
550	default:
551		return (EINVAL);
552	}
553}
554
555moduledata_t icl_iser_data = {
556	.name = "icl_iser",
557	.evhand = icl_iser_modevent,
558	.priv = 0
559};
560
561DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
562MODULE_DEPEND(icl_iser, icl, 1, 1, 1);
563MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1);
564MODULE_DEPEND(icl_iser, linuxkpi, 1, 1, 1);
565MODULE_VERSION(icl_iser, 1);
566