1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * iSCSI Initiator over TCP/IP Data-Path
4 *
5 * Copyright (C) 2004 Dmitry Yusupov
6 * Copyright (C) 2004 Alex Aizman
7 * Copyright (C) 2005 - 2006 Mike Christie
8 * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
9 * maintained by open-iscsi@googlegroups.com
10 *
11 * See the file COPYING included with this distribution for more details.
12 *
13 * Credits:
14 *	Christoph Hellwig
15 *	FUJITA Tomonori
16 *	Arne Redlich
17 *	Zhenyu Wang
18 */
19
20#include <crypto/hash.h>
21#include <linux/types.h>
22#include <linux/inet.h>
23#include <linux/slab.h>
24#include <linux/sched/mm.h>
25#include <linux/file.h>
26#include <linux/blkdev.h>
27#include <linux/delay.h>
28#include <linux/kfifo.h>
29#include <linux/scatterlist.h>
30#include <linux/module.h>
31#include <linux/backing-dev.h>
32#include <net/tcp.h>
33#include <scsi/scsi_cmnd.h>
34#include <scsi/scsi_device.h>
35#include <scsi/scsi_host.h>
36#include <scsi/scsi.h>
37#include <scsi/scsi_transport_iscsi.h>
38#include <trace/events/iscsi.h>
39#include <trace/events/sock.h>
40
41#include "iscsi_tcp.h"
42
43MODULE_AUTHOR("Mike Christie <michaelc@cs.wisc.edu>, "
44	      "Dmitry Yusupov <dmitry_yus@yahoo.com>, "
45	      "Alex Aizman <itn780@yahoo.com>");
46MODULE_DESCRIPTION("iSCSI/TCP data-path");
47MODULE_LICENSE("GPL");
48
49static struct scsi_transport_template *iscsi_sw_tcp_scsi_transport;
50static const struct scsi_host_template iscsi_sw_tcp_sht;
51static struct iscsi_transport iscsi_sw_tcp_transport;
52
53static unsigned int iscsi_max_lun = ~0;
54module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
55
56static bool iscsi_recv_from_iscsi_q;
57module_param_named(recv_from_iscsi_q, iscsi_recv_from_iscsi_q, bool, 0644);
58MODULE_PARM_DESC(recv_from_iscsi_q, "Set to true to read iSCSI data/headers from the iscsi_q workqueue. The default is false which will perform reads from the network softirq context.");
59
60static int iscsi_sw_tcp_dbg;
61module_param_named(debug_iscsi_tcp, iscsi_sw_tcp_dbg, int,
62		   S_IRUGO | S_IWUSR);
63MODULE_PARM_DESC(debug_iscsi_tcp, "Turn on debugging for iscsi_tcp module "
64		 "Set to 1 to turn on, and zero to turn off. Default is off.");
65
66#define ISCSI_SW_TCP_DBG(_conn, dbg_fmt, arg...)		\
67	do {							\
68		if (iscsi_sw_tcp_dbg)				\
69			iscsi_conn_printk(KERN_INFO, _conn,	\
70					     "%s " dbg_fmt,	\
71					     __func__, ##arg);	\
72		iscsi_dbg_trace(trace_iscsi_dbg_sw_tcp,		\
73				&(_conn)->cls_conn->dev,	\
74				"%s " dbg_fmt, __func__, ##arg);\
75	} while (0);
76
77
78/**
79 * iscsi_sw_tcp_recv - TCP receive in sendfile fashion
80 * @rd_desc: read descriptor
81 * @skb: socket buffer
82 * @offset: offset in skb
83 * @len: skb->len - offset
84 */
85static int iscsi_sw_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
86			     unsigned int offset, size_t len)
87{
88	struct iscsi_conn *conn = rd_desc->arg.data;
89	unsigned int consumed, total_consumed = 0;
90	int status;
91
92	ISCSI_SW_TCP_DBG(conn, "in %d bytes\n", skb->len - offset);
93
94	do {
95		status = 0;
96		consumed = iscsi_tcp_recv_skb(conn, skb, offset, 0, &status);
97		offset += consumed;
98		total_consumed += consumed;
99	} while (consumed != 0 && status != ISCSI_TCP_SKB_DONE);
100
101	ISCSI_SW_TCP_DBG(conn, "read %d bytes status %d\n",
102			 skb->len - offset, status);
103	return total_consumed;
104}
105
106/**
107 * iscsi_sw_sk_state_check - check socket state
108 * @sk: socket
109 *
110 * If the socket is in CLOSE or CLOSE_WAIT we should
111 * not close the connection if there is still some
112 * data pending.
113 *
114 * Must be called with sk_callback_lock.
115 */
116static inline int iscsi_sw_sk_state_check(struct sock *sk)
117{
118	struct iscsi_conn *conn = sk->sk_user_data;
119
120	if ((sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) &&
121	    (conn->session->state != ISCSI_STATE_LOGGING_OUT) &&
122	    !atomic_read(&sk->sk_rmem_alloc)) {
123		ISCSI_SW_TCP_DBG(conn, "TCP_CLOSE|TCP_CLOSE_WAIT\n");
124		iscsi_conn_failure(conn, ISCSI_ERR_TCP_CONN_CLOSE);
125		return -ECONNRESET;
126	}
127	return 0;
128}
129
130static void iscsi_sw_tcp_recv_data(struct iscsi_conn *conn)
131{
132	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
133	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
134	struct sock *sk = tcp_sw_conn->sock->sk;
135	read_descriptor_t rd_desc;
136
137	/*
138	 * Use rd_desc to pass 'conn' to iscsi_tcp_recv.
139	 * We set count to 1 because we want the network layer to
140	 * hand us all the skbs that are available. iscsi_tcp_recv
141	 * handled pdus that cross buffers or pdus that still need data.
142	 */
143	rd_desc.arg.data = conn;
144	rd_desc.count = 1;
145
146	tcp_read_sock(sk, &rd_desc, iscsi_sw_tcp_recv);
147
148	/* If we had to (atomically) map a highmem page,
149	 * unmap it now. */
150	iscsi_tcp_segment_unmap(&tcp_conn->in.segment);
151
152	iscsi_sw_sk_state_check(sk);
153}
154
155static void iscsi_sw_tcp_recv_data_work(struct work_struct *work)
156{
157	struct iscsi_conn *conn = container_of(work, struct iscsi_conn,
158					       recvwork);
159	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
160	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
161	struct sock *sk = tcp_sw_conn->sock->sk;
162
163	lock_sock(sk);
164	iscsi_sw_tcp_recv_data(conn);
165	release_sock(sk);
166}
167
168static void iscsi_sw_tcp_data_ready(struct sock *sk)
169{
170	struct iscsi_sw_tcp_conn *tcp_sw_conn;
171	struct iscsi_tcp_conn *tcp_conn;
172	struct iscsi_conn *conn;
173
174	trace_sk_data_ready(sk);
175
176	read_lock_bh(&sk->sk_callback_lock);
177	conn = sk->sk_user_data;
178	if (!conn) {
179		read_unlock_bh(&sk->sk_callback_lock);
180		return;
181	}
182	tcp_conn = conn->dd_data;
183	tcp_sw_conn = tcp_conn->dd_data;
184
185	if (tcp_sw_conn->queue_recv)
186		iscsi_conn_queue_recv(conn);
187	else
188		iscsi_sw_tcp_recv_data(conn);
189	read_unlock_bh(&sk->sk_callback_lock);
190}
191
192static void iscsi_sw_tcp_state_change(struct sock *sk)
193{
194	struct iscsi_tcp_conn *tcp_conn;
195	struct iscsi_sw_tcp_conn *tcp_sw_conn;
196	struct iscsi_conn *conn;
197	void (*old_state_change)(struct sock *);
198
199	read_lock_bh(&sk->sk_callback_lock);
200	conn = sk->sk_user_data;
201	if (!conn) {
202		read_unlock_bh(&sk->sk_callback_lock);
203		return;
204	}
205
206	iscsi_sw_sk_state_check(sk);
207
208	tcp_conn = conn->dd_data;
209	tcp_sw_conn = tcp_conn->dd_data;
210	old_state_change = tcp_sw_conn->old_state_change;
211
212	read_unlock_bh(&sk->sk_callback_lock);
213
214	old_state_change(sk);
215}
216
217/**
218 * iscsi_sw_tcp_write_space - Called when more output buffer space is available
219 * @sk: socket space is available for
220 **/
221static void iscsi_sw_tcp_write_space(struct sock *sk)
222{
223	struct iscsi_conn *conn;
224	struct iscsi_tcp_conn *tcp_conn;
225	struct iscsi_sw_tcp_conn *tcp_sw_conn;
226	void (*old_write_space)(struct sock *);
227
228	read_lock_bh(&sk->sk_callback_lock);
229	conn = sk->sk_user_data;
230	if (!conn) {
231		read_unlock_bh(&sk->sk_callback_lock);
232		return;
233	}
234
235	tcp_conn = conn->dd_data;
236	tcp_sw_conn = tcp_conn->dd_data;
237	old_write_space = tcp_sw_conn->old_write_space;
238	read_unlock_bh(&sk->sk_callback_lock);
239
240	old_write_space(sk);
241
242	ISCSI_SW_TCP_DBG(conn, "iscsi_write_space\n");
243	iscsi_conn_queue_xmit(conn);
244}
245
246static void iscsi_sw_tcp_conn_set_callbacks(struct iscsi_conn *conn)
247{
248	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
249	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
250	struct sock *sk = tcp_sw_conn->sock->sk;
251
252	/* assign new callbacks */
253	write_lock_bh(&sk->sk_callback_lock);
254	sk->sk_user_data = conn;
255	tcp_sw_conn->old_data_ready = sk->sk_data_ready;
256	tcp_sw_conn->old_state_change = sk->sk_state_change;
257	tcp_sw_conn->old_write_space = sk->sk_write_space;
258	sk->sk_data_ready = iscsi_sw_tcp_data_ready;
259	sk->sk_state_change = iscsi_sw_tcp_state_change;
260	sk->sk_write_space = iscsi_sw_tcp_write_space;
261	write_unlock_bh(&sk->sk_callback_lock);
262}
263
264static void
265iscsi_sw_tcp_conn_restore_callbacks(struct iscsi_conn *conn)
266{
267	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
268	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
269	struct sock *sk = tcp_sw_conn->sock->sk;
270
271	/* restore socket callbacks, see also: iscsi_conn_set_callbacks() */
272	write_lock_bh(&sk->sk_callback_lock);
273	sk->sk_user_data    = NULL;
274	sk->sk_data_ready   = tcp_sw_conn->old_data_ready;
275	sk->sk_state_change = tcp_sw_conn->old_state_change;
276	sk->sk_write_space  = tcp_sw_conn->old_write_space;
277	sk->sk_no_check_tx = 0;
278	write_unlock_bh(&sk->sk_callback_lock);
279}
280
281/**
282 * iscsi_sw_tcp_xmit_segment - transmit segment
283 * @tcp_conn: the iSCSI TCP connection
284 * @segment: the buffer to transmnit
285 *
286 * This function transmits as much of the buffer as
287 * the network layer will accept, and returns the number of
288 * bytes transmitted.
289 *
290 * If CRC hashing is enabled, the function will compute the
291 * hash as it goes. When the entire segment has been transmitted,
292 * it will retrieve the hash value and send it as well.
293 */
294static int iscsi_sw_tcp_xmit_segment(struct iscsi_tcp_conn *tcp_conn,
295				     struct iscsi_segment *segment)
296{
297	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
298	struct socket *sk = tcp_sw_conn->sock;
299	unsigned int copied = 0;
300	int r = 0;
301
302	while (!iscsi_tcp_segment_done(tcp_conn, segment, 0, r)) {
303		struct scatterlist *sg;
304		struct msghdr msg = {};
305		struct bio_vec bv;
306		unsigned int offset, copy;
307
308		r = 0;
309		offset = segment->copied;
310		copy = segment->size - offset;
311
312		if (segment->total_copied + segment->size < segment->total_size)
313			msg.msg_flags |= MSG_MORE;
314
315		if (tcp_sw_conn->queue_recv)
316			msg.msg_flags |= MSG_DONTWAIT;
317
318		if (!segment->data) {
319			if (!tcp_conn->iscsi_conn->datadgst_en)
320				msg.msg_flags |= MSG_SPLICE_PAGES;
321			sg = segment->sg;
322			offset += segment->sg_offset + sg->offset;
323			bvec_set_page(&bv, sg_page(sg), copy, offset);
324		} else {
325			bvec_set_virt(&bv, segment->data + offset, copy);
326		}
327		iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, copy);
328
329		r = sock_sendmsg(sk, &msg);
330		if (r < 0) {
331			iscsi_tcp_segment_unmap(segment);
332			return r;
333		}
334		copied += r;
335	}
336	return copied;
337}
338
339/**
340 * iscsi_sw_tcp_xmit - TCP transmit
341 * @conn: iscsi connection
342 **/
343static int iscsi_sw_tcp_xmit(struct iscsi_conn *conn)
344{
345	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
346	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
347	struct iscsi_segment *segment = &tcp_sw_conn->out.segment;
348	unsigned int consumed = 0;
349	int rc = 0;
350
351	while (1) {
352		rc = iscsi_sw_tcp_xmit_segment(tcp_conn, segment);
353		/*
354		 * We may not have been able to send data because the conn
355		 * is getting stopped. libiscsi will know so propagate err
356		 * for it to do the right thing.
357		 */
358		if (rc == -EAGAIN)
359			return rc;
360		else if (rc < 0) {
361			rc = ISCSI_ERR_XMIT_FAILED;
362			goto error;
363		} else if (rc == 0)
364			break;
365
366		consumed += rc;
367
368		if (segment->total_copied >= segment->total_size) {
369			if (segment->done != NULL) {
370				rc = segment->done(tcp_conn, segment);
371				if (rc != 0)
372					goto error;
373			}
374		}
375	}
376
377	ISCSI_SW_TCP_DBG(conn, "xmit %d bytes\n", consumed);
378
379	conn->txdata_octets += consumed;
380	return consumed;
381
382error:
383	/* Transmit error. We could initiate error recovery
384	 * here. */
385	ISCSI_SW_TCP_DBG(conn, "Error sending PDU, errno=%d\n", rc);
386	iscsi_conn_failure(conn, rc);
387	return -EIO;
388}
389
390/**
391 * iscsi_sw_tcp_xmit_qlen - return the number of bytes queued for xmit
392 * @conn: iscsi connection
393 */
394static inline int iscsi_sw_tcp_xmit_qlen(struct iscsi_conn *conn)
395{
396	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
397	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
398	struct iscsi_segment *segment = &tcp_sw_conn->out.segment;
399
400	return segment->total_copied - segment->total_size;
401}
402
403static int iscsi_sw_tcp_pdu_xmit(struct iscsi_task *task)
404{
405	struct iscsi_conn *conn = task->conn;
406	unsigned int noreclaim_flag;
407	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
408	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
409	int rc = 0;
410
411	if (!tcp_sw_conn->sock) {
412		iscsi_conn_printk(KERN_ERR, conn,
413				  "Transport not bound to socket!\n");
414		return -EINVAL;
415	}
416
417	noreclaim_flag = memalloc_noreclaim_save();
418
419	while (iscsi_sw_tcp_xmit_qlen(conn)) {
420		rc = iscsi_sw_tcp_xmit(conn);
421		if (rc == 0) {
422			rc = -EAGAIN;
423			break;
424		}
425		if (rc < 0)
426			break;
427		rc = 0;
428	}
429
430	memalloc_noreclaim_restore(noreclaim_flag);
431	return rc;
432}
433
434/*
435 * This is called when we're done sending the header.
436 * Simply copy the data_segment to the send segment, and return.
437 */
438static int iscsi_sw_tcp_send_hdr_done(struct iscsi_tcp_conn *tcp_conn,
439				      struct iscsi_segment *segment)
440{
441	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
442
443	tcp_sw_conn->out.segment = tcp_sw_conn->out.data_segment;
444	ISCSI_SW_TCP_DBG(tcp_conn->iscsi_conn,
445			 "Header done. Next segment size %u total_size %u\n",
446			 tcp_sw_conn->out.segment.size,
447			 tcp_sw_conn->out.segment.total_size);
448	return 0;
449}
450
451static void iscsi_sw_tcp_send_hdr_prep(struct iscsi_conn *conn, void *hdr,
452				       size_t hdrlen)
453{
454	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
455	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
456
457	ISCSI_SW_TCP_DBG(conn, "%s\n", conn->hdrdgst_en ?
458			 "digest enabled" : "digest disabled");
459
460	/* Clear the data segment - needs to be filled in by the
461	 * caller using iscsi_tcp_send_data_prep() */
462	memset(&tcp_sw_conn->out.data_segment, 0,
463	       sizeof(struct iscsi_segment));
464
465	/* If header digest is enabled, compute the CRC and
466	 * place the digest into the same buffer. We make
467	 * sure that both iscsi_tcp_task and mtask have
468	 * sufficient room.
469	 */
470	if (conn->hdrdgst_en) {
471		iscsi_tcp_dgst_header(tcp_sw_conn->tx_hash, hdr, hdrlen,
472				      hdr + hdrlen);
473		hdrlen += ISCSI_DIGEST_SIZE;
474	}
475
476	/* Remember header pointer for later, when we need
477	 * to decide whether there's a payload to go along
478	 * with the header. */
479	tcp_sw_conn->out.hdr = hdr;
480
481	iscsi_segment_init_linear(&tcp_sw_conn->out.segment, hdr, hdrlen,
482				  iscsi_sw_tcp_send_hdr_done, NULL);
483}
484
485/*
486 * Prepare the send buffer for the payload data.
487 * Padding and checksumming will all be taken care
488 * of by the iscsi_segment routines.
489 */
490static int
491iscsi_sw_tcp_send_data_prep(struct iscsi_conn *conn, struct scatterlist *sg,
492			    unsigned int count, unsigned int offset,
493			    unsigned int len)
494{
495	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
496	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
497	struct ahash_request *tx_hash = NULL;
498	unsigned int hdr_spec_len;
499
500	ISCSI_SW_TCP_DBG(conn, "offset=%d, datalen=%d %s\n", offset, len,
501			 conn->datadgst_en ?
502			 "digest enabled" : "digest disabled");
503
504	/* Make sure the datalen matches what the caller
505	   said he would send. */
506	hdr_spec_len = ntoh24(tcp_sw_conn->out.hdr->dlength);
507	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));
508
509	if (conn->datadgst_en)
510		tx_hash = tcp_sw_conn->tx_hash;
511
512	return iscsi_segment_seek_sg(&tcp_sw_conn->out.data_segment,
513				     sg, count, offset, len,
514				     NULL, tx_hash);
515}
516
517static void
518iscsi_sw_tcp_send_linear_data_prep(struct iscsi_conn *conn, void *data,
519				   size_t len)
520{
521	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
522	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
523	struct ahash_request *tx_hash = NULL;
524	unsigned int hdr_spec_len;
525
526	ISCSI_SW_TCP_DBG(conn, "datalen=%zd %s\n", len, conn->datadgst_en ?
527			 "digest enabled" : "digest disabled");
528
529	/* Make sure the datalen matches what the caller
530	   said he would send. */
531	hdr_spec_len = ntoh24(tcp_sw_conn->out.hdr->dlength);
532	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));
533
534	if (conn->datadgst_en)
535		tx_hash = tcp_sw_conn->tx_hash;
536
537	iscsi_segment_init_linear(&tcp_sw_conn->out.data_segment,
538				data, len, NULL, tx_hash);
539}
540
541static int iscsi_sw_tcp_pdu_init(struct iscsi_task *task,
542				 unsigned int offset, unsigned int count)
543{
544	struct iscsi_conn *conn = task->conn;
545	int err = 0;
546
547	iscsi_sw_tcp_send_hdr_prep(conn, task->hdr, task->hdr_len);
548
549	if (!count)
550		return 0;
551
552	if (!task->sc)
553		iscsi_sw_tcp_send_linear_data_prep(conn, task->data, count);
554	else {
555		struct scsi_data_buffer *sdb = &task->sc->sdb;
556
557		err = iscsi_sw_tcp_send_data_prep(conn, sdb->table.sgl,
558						  sdb->table.nents, offset,
559						  count);
560	}
561
562	if (err) {
563		/* got invalid offset/len */
564		return -EIO;
565	}
566	return 0;
567}
568
569static int iscsi_sw_tcp_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
570{
571	struct iscsi_tcp_task *tcp_task = task->dd_data;
572
573	task->hdr = task->dd_data + sizeof(*tcp_task);
574	task->hdr_max = sizeof(struct iscsi_sw_tcp_hdrbuf) - ISCSI_DIGEST_SIZE;
575	return 0;
576}
577
578static struct iscsi_cls_conn *
579iscsi_sw_tcp_conn_create(struct iscsi_cls_session *cls_session,
580			 uint32_t conn_idx)
581{
582	struct iscsi_conn *conn;
583	struct iscsi_cls_conn *cls_conn;
584	struct iscsi_tcp_conn *tcp_conn;
585	struct iscsi_sw_tcp_conn *tcp_sw_conn;
586	struct crypto_ahash *tfm;
587
588	cls_conn = iscsi_tcp_conn_setup(cls_session, sizeof(*tcp_sw_conn),
589					conn_idx);
590	if (!cls_conn)
591		return NULL;
592	conn = cls_conn->dd_data;
593	tcp_conn = conn->dd_data;
594	tcp_sw_conn = tcp_conn->dd_data;
595	INIT_WORK(&conn->recvwork, iscsi_sw_tcp_recv_data_work);
596	tcp_sw_conn->queue_recv = iscsi_recv_from_iscsi_q;
597
598	mutex_init(&tcp_sw_conn->sock_lock);
599
600	tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
601	if (IS_ERR(tfm))
602		goto free_conn;
603
604	tcp_sw_conn->tx_hash = ahash_request_alloc(tfm, GFP_KERNEL);
605	if (!tcp_sw_conn->tx_hash)
606		goto free_tfm;
607	ahash_request_set_callback(tcp_sw_conn->tx_hash, 0, NULL, NULL);
608
609	tcp_sw_conn->rx_hash = ahash_request_alloc(tfm, GFP_KERNEL);
610	if (!tcp_sw_conn->rx_hash)
611		goto free_tx_hash;
612	ahash_request_set_callback(tcp_sw_conn->rx_hash, 0, NULL, NULL);
613
614	tcp_conn->rx_hash = tcp_sw_conn->rx_hash;
615
616	return cls_conn;
617
618free_tx_hash:
619	ahash_request_free(tcp_sw_conn->tx_hash);
620free_tfm:
621	crypto_free_ahash(tfm);
622free_conn:
623	iscsi_conn_printk(KERN_ERR, conn,
624			  "Could not create connection due to crc32c "
625			  "loading error. Make sure the crc32c "
626			  "module is built as a module or into the "
627			  "kernel\n");
628	iscsi_tcp_conn_teardown(cls_conn);
629	return NULL;
630}
631
632static void iscsi_sw_tcp_release_conn(struct iscsi_conn *conn)
633{
634	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
635	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
636	struct socket *sock = tcp_sw_conn->sock;
637
638	/*
639	 * The iscsi transport class will make sure we are not called in
640	 * parallel with start, stop, bind and destroys. However, this can be
641	 * called twice if userspace does a stop then a destroy.
642	 */
643	if (!sock)
644		return;
645
646	/*
647	 * Make sure we start socket shutdown now in case userspace is up
648	 * but delayed in releasing the socket.
649	 */
650	kernel_sock_shutdown(sock, SHUT_RDWR);
651
652	sock_hold(sock->sk);
653	iscsi_sw_tcp_conn_restore_callbacks(conn);
654	sock_put(sock->sk);
655
656	iscsi_suspend_rx(conn);
657
658	mutex_lock(&tcp_sw_conn->sock_lock);
659	tcp_sw_conn->sock = NULL;
660	mutex_unlock(&tcp_sw_conn->sock_lock);
661	sockfd_put(sock);
662}
663
664static void iscsi_sw_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
665{
666	struct iscsi_conn *conn = cls_conn->dd_data;
667	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
668	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
669
670	iscsi_sw_tcp_release_conn(conn);
671
672	ahash_request_free(tcp_sw_conn->rx_hash);
673	if (tcp_sw_conn->tx_hash) {
674		struct crypto_ahash *tfm;
675
676		tfm = crypto_ahash_reqtfm(tcp_sw_conn->tx_hash);
677		ahash_request_free(tcp_sw_conn->tx_hash);
678		crypto_free_ahash(tfm);
679	}
680
681	iscsi_tcp_conn_teardown(cls_conn);
682}
683
684static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
685{
686	struct iscsi_conn *conn = cls_conn->dd_data;
687	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
688	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
689	struct socket *sock = tcp_sw_conn->sock;
690
691	/* userspace may have goofed up and not bound us */
692	if (!sock)
693		return;
694
695	sock->sk->sk_err = EIO;
696	wake_up_interruptible(sk_sleep(sock->sk));
697
698	/* stop xmit side */
699	iscsi_suspend_tx(conn);
700
701	/* stop recv side and release socket */
702	iscsi_sw_tcp_release_conn(conn);
703
704	iscsi_conn_stop(cls_conn, flag);
705}
706
707static int
708iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session,
709		       struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
710		       int is_leading)
711{
712	struct iscsi_conn *conn = cls_conn->dd_data;
713	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
714	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
715	struct sock *sk;
716	struct socket *sock;
717	int err;
718
719	/* lookup for existing socket */
720	sock = sockfd_lookup((int)transport_eph, &err);
721	if (!sock) {
722		iscsi_conn_printk(KERN_ERR, conn,
723				  "sockfd_lookup failed %d\n", err);
724		return -EEXIST;
725	}
726
727	err = -EINVAL;
728	if (!sk_is_tcp(sock->sk))
729		goto free_socket;
730
731	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
732	if (err)
733		goto free_socket;
734
735	mutex_lock(&tcp_sw_conn->sock_lock);
736	/* bind iSCSI connection and socket */
737	tcp_sw_conn->sock = sock;
738	mutex_unlock(&tcp_sw_conn->sock_lock);
739
740	/* setup Socket parameters */
741	sk = sock->sk;
742	sk->sk_reuse = SK_CAN_REUSE;
743	sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
744	sk->sk_allocation = GFP_ATOMIC;
745	sk->sk_use_task_frag = false;
746	sk_set_memalloc(sk);
747	sock_no_linger(sk);
748
749	iscsi_sw_tcp_conn_set_callbacks(conn);
750	/*
751	 * set receive state machine into initial state
752	 */
753	iscsi_tcp_hdr_recv_prep(tcp_conn);
754	return 0;
755
756free_socket:
757	sockfd_put(sock);
758	return err;
759}
760
761static int iscsi_sw_tcp_conn_set_param(struct iscsi_cls_conn *cls_conn,
762				       enum iscsi_param param, char *buf,
763				       int buflen)
764{
765	struct iscsi_conn *conn = cls_conn->dd_data;
766	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
767	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
768
769	switch(param) {
770	case ISCSI_PARAM_HDRDGST_EN:
771		iscsi_set_param(cls_conn, param, buf, buflen);
772		break;
773	case ISCSI_PARAM_DATADGST_EN:
774		mutex_lock(&tcp_sw_conn->sock_lock);
775		if (!tcp_sw_conn->sock) {
776			mutex_unlock(&tcp_sw_conn->sock_lock);
777			return -ENOTCONN;
778		}
779		iscsi_set_param(cls_conn, param, buf, buflen);
780		mutex_unlock(&tcp_sw_conn->sock_lock);
781		break;
782	case ISCSI_PARAM_MAX_R2T:
783		return iscsi_tcp_set_max_r2t(conn, buf);
784	default:
785		return iscsi_set_param(cls_conn, param, buf, buflen);
786	}
787
788	return 0;
789}
790
791static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
792				       enum iscsi_param param, char *buf)
793{
794	struct iscsi_conn *conn = cls_conn->dd_data;
795	struct iscsi_sw_tcp_conn *tcp_sw_conn;
796	struct iscsi_tcp_conn *tcp_conn;
797	struct sockaddr_in6 addr;
798	struct socket *sock;
799	int rc;
800
801	switch(param) {
802	case ISCSI_PARAM_CONN_PORT:
803	case ISCSI_PARAM_CONN_ADDRESS:
804	case ISCSI_PARAM_LOCAL_PORT:
805		spin_lock_bh(&conn->session->frwd_lock);
806		if (!conn->session->leadconn) {
807			spin_unlock_bh(&conn->session->frwd_lock);
808			return -ENOTCONN;
809		}
810		/*
811		 * The conn has been setup and bound, so just grab a ref
812		 * incase a destroy runs while we are in the net layer.
813		 */
814		iscsi_get_conn(conn->cls_conn);
815		spin_unlock_bh(&conn->session->frwd_lock);
816
817		tcp_conn = conn->dd_data;
818		tcp_sw_conn = tcp_conn->dd_data;
819
820		mutex_lock(&tcp_sw_conn->sock_lock);
821		sock = tcp_sw_conn->sock;
822		if (!sock) {
823			rc = -ENOTCONN;
824			goto sock_unlock;
825		}
826
827		if (param == ISCSI_PARAM_LOCAL_PORT)
828			rc = kernel_getsockname(sock,
829						(struct sockaddr *)&addr);
830		else
831			rc = kernel_getpeername(sock,
832						(struct sockaddr *)&addr);
833sock_unlock:
834		mutex_unlock(&tcp_sw_conn->sock_lock);
835		iscsi_put_conn(conn->cls_conn);
836		if (rc < 0)
837			return rc;
838
839		return iscsi_conn_get_addr_param((struct sockaddr_storage *)
840						 &addr, param, buf);
841	default:
842		return iscsi_conn_get_param(cls_conn, param, buf);
843	}
844
845	return 0;
846}
847
848static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
849				       enum iscsi_host_param param, char *buf)
850{
851	struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(shost);
852	struct iscsi_session *session;
853	struct iscsi_conn *conn;
854	struct iscsi_tcp_conn *tcp_conn;
855	struct iscsi_sw_tcp_conn *tcp_sw_conn;
856	struct sockaddr_in6 addr;
857	struct socket *sock;
858	int rc;
859
860	switch (param) {
861	case ISCSI_HOST_PARAM_IPADDRESS:
862		session = tcp_sw_host->session;
863		if (!session)
864			return -ENOTCONN;
865
866		spin_lock_bh(&session->frwd_lock);
867		conn = session->leadconn;
868		if (!conn) {
869			spin_unlock_bh(&session->frwd_lock);
870			return -ENOTCONN;
871		}
872		tcp_conn = conn->dd_data;
873		tcp_sw_conn = tcp_conn->dd_data;
874		/*
875		 * The conn has been setup and bound, so just grab a ref
876		 * incase a destroy runs while we are in the net layer.
877		 */
878		iscsi_get_conn(conn->cls_conn);
879		spin_unlock_bh(&session->frwd_lock);
880
881		mutex_lock(&tcp_sw_conn->sock_lock);
882		sock = tcp_sw_conn->sock;
883		if (!sock)
884			rc = -ENOTCONN;
885		else
886			rc = kernel_getsockname(sock, (struct sockaddr *)&addr);
887		mutex_unlock(&tcp_sw_conn->sock_lock);
888		iscsi_put_conn(conn->cls_conn);
889		if (rc < 0)
890			return rc;
891
892		return iscsi_conn_get_addr_param((struct sockaddr_storage *)
893						 &addr,
894						 (enum iscsi_param)param, buf);
895	default:
896		return iscsi_host_get_param(shost, param, buf);
897	}
898
899	return 0;
900}
901
902static void
903iscsi_sw_tcp_conn_get_stats(struct iscsi_cls_conn *cls_conn,
904			    struct iscsi_stats *stats)
905{
906	struct iscsi_conn *conn = cls_conn->dd_data;
907	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
908	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
909
910	stats->custom_length = 3;
911	strcpy(stats->custom[0].desc, "tx_sendpage_failures");
912	stats->custom[0].value = tcp_sw_conn->sendpage_failures_cnt;
913	strcpy(stats->custom[1].desc, "rx_discontiguous_hdr");
914	stats->custom[1].value = tcp_sw_conn->discontiguous_hdr_cnt;
915	strcpy(stats->custom[2].desc, "eh_abort_cnt");
916	stats->custom[2].value = conn->eh_abort_cnt;
917
918	iscsi_tcp_conn_get_stats(cls_conn, stats);
919}
920
921static struct iscsi_cls_session *
922iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
923			    uint16_t qdepth, uint32_t initial_cmdsn)
924{
925	struct iscsi_cls_session *cls_session;
926	struct iscsi_session *session;
927	struct iscsi_sw_tcp_host *tcp_sw_host;
928	struct Scsi_Host *shost;
929	int rc;
930
931	if (ep) {
932		printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep);
933		return NULL;
934	}
935
936	shost = iscsi_host_alloc(&iscsi_sw_tcp_sht,
937				 sizeof(struct iscsi_sw_tcp_host), 1);
938	if (!shost)
939		return NULL;
940	shost->transportt = iscsi_sw_tcp_scsi_transport;
941	shost->cmd_per_lun = qdepth;
942	shost->max_lun = iscsi_max_lun;
943	shost->max_id = 0;
944	shost->max_channel = 0;
945	shost->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE;
946
947	rc = iscsi_host_get_max_scsi_cmds(shost, cmds_max);
948	if (rc < 0)
949		goto free_host;
950	shost->can_queue = rc;
951
952	if (iscsi_host_add(shost, NULL))
953		goto free_host;
954
955	cls_session = iscsi_session_setup(&iscsi_sw_tcp_transport, shost,
956					  cmds_max, 0,
957					  sizeof(struct iscsi_tcp_task) +
958					  sizeof(struct iscsi_sw_tcp_hdrbuf),
959					  initial_cmdsn, 0);
960	if (!cls_session)
961		goto remove_host;
962	session = cls_session->dd_data;
963
964	if (iscsi_tcp_r2tpool_alloc(session))
965		goto remove_session;
966
967	/* We are now fully setup so expose the session to sysfs. */
968	tcp_sw_host = iscsi_host_priv(shost);
969	tcp_sw_host->session = session;
970	return cls_session;
971
972remove_session:
973	iscsi_session_teardown(cls_session);
974remove_host:
975	iscsi_host_remove(shost, false);
976free_host:
977	iscsi_host_free(shost);
978	return NULL;
979}
980
981static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session)
982{
983	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
984	struct iscsi_session *session = cls_session->dd_data;
985
986	if (WARN_ON_ONCE(session->leadconn))
987		return;
988
989	iscsi_session_remove(cls_session);
990	/*
991	 * Our get_host_param needs to access the session, so remove the
992	 * host from sysfs before freeing the session to make sure userspace
993	 * is no longer accessing the callout.
994	 */
995	iscsi_host_remove(shost, false);
996
997	iscsi_tcp_r2tpool_free(cls_session->dd_data);
998
999	iscsi_session_free(cls_session);
1000	iscsi_host_free(shost);
1001}
1002
1003static umode_t iscsi_sw_tcp_attr_is_visible(int param_type, int param)
1004{
1005	switch (param_type) {
1006	case ISCSI_HOST_PARAM:
1007		switch (param) {
1008		case ISCSI_HOST_PARAM_NETDEV_NAME:
1009		case ISCSI_HOST_PARAM_HWADDRESS:
1010		case ISCSI_HOST_PARAM_IPADDRESS:
1011		case ISCSI_HOST_PARAM_INITIATOR_NAME:
1012			return S_IRUGO;
1013		default:
1014			return 0;
1015		}
1016	case ISCSI_PARAM:
1017		switch (param) {
1018		case ISCSI_PARAM_MAX_RECV_DLENGTH:
1019		case ISCSI_PARAM_MAX_XMIT_DLENGTH:
1020		case ISCSI_PARAM_HDRDGST_EN:
1021		case ISCSI_PARAM_DATADGST_EN:
1022		case ISCSI_PARAM_CONN_ADDRESS:
1023		case ISCSI_PARAM_CONN_PORT:
1024		case ISCSI_PARAM_LOCAL_PORT:
1025		case ISCSI_PARAM_EXP_STATSN:
1026		case ISCSI_PARAM_PERSISTENT_ADDRESS:
1027		case ISCSI_PARAM_PERSISTENT_PORT:
1028		case ISCSI_PARAM_PING_TMO:
1029		case ISCSI_PARAM_RECV_TMO:
1030		case ISCSI_PARAM_INITIAL_R2T_EN:
1031		case ISCSI_PARAM_MAX_R2T:
1032		case ISCSI_PARAM_IMM_DATA_EN:
1033		case ISCSI_PARAM_FIRST_BURST:
1034		case ISCSI_PARAM_MAX_BURST:
1035		case ISCSI_PARAM_PDU_INORDER_EN:
1036		case ISCSI_PARAM_DATASEQ_INORDER_EN:
1037		case ISCSI_PARAM_ERL:
1038		case ISCSI_PARAM_TARGET_NAME:
1039		case ISCSI_PARAM_TPGT:
1040		case ISCSI_PARAM_USERNAME:
1041		case ISCSI_PARAM_PASSWORD:
1042		case ISCSI_PARAM_USERNAME_IN:
1043		case ISCSI_PARAM_PASSWORD_IN:
1044		case ISCSI_PARAM_FAST_ABORT:
1045		case ISCSI_PARAM_ABORT_TMO:
1046		case ISCSI_PARAM_LU_RESET_TMO:
1047		case ISCSI_PARAM_TGT_RESET_TMO:
1048		case ISCSI_PARAM_IFACE_NAME:
1049		case ISCSI_PARAM_INITIATOR_NAME:
1050			return S_IRUGO;
1051		default:
1052			return 0;
1053		}
1054	}
1055
1056	return 0;
1057}
1058
1059static int iscsi_sw_tcp_slave_configure(struct scsi_device *sdev)
1060{
1061	struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(sdev->host);
1062	struct iscsi_session *session = tcp_sw_host->session;
1063	struct iscsi_conn *conn = session->leadconn;
1064
1065	if (conn->datadgst_en)
1066		blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES,
1067				   sdev->request_queue);
1068	blk_queue_dma_alignment(sdev->request_queue, 0);
1069	return 0;
1070}
1071
1072static const struct scsi_host_template iscsi_sw_tcp_sht = {
1073	.module			= THIS_MODULE,
1074	.name			= "iSCSI Initiator over TCP/IP",
1075	.queuecommand           = iscsi_queuecommand,
1076	.change_queue_depth	= scsi_change_queue_depth,
1077	.can_queue		= ISCSI_TOTAL_CMDS_MAX,
1078	.sg_tablesize		= 4096,
1079	.max_sectors		= 0xFFFF,
1080	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,
1081	.eh_timed_out		= iscsi_eh_cmd_timed_out,
1082	.eh_abort_handler       = iscsi_eh_abort,
1083	.eh_device_reset_handler= iscsi_eh_device_reset,
1084	.eh_target_reset_handler = iscsi_eh_recover_target,
1085	.dma_boundary		= PAGE_SIZE - 1,
1086	.slave_configure        = iscsi_sw_tcp_slave_configure,
1087	.proc_name		= "iscsi_tcp",
1088	.this_id		= -1,
1089	.track_queue_depth	= 1,
1090	.cmd_size		= sizeof(struct iscsi_cmd),
1091};
1092
1093static struct iscsi_transport iscsi_sw_tcp_transport = {
1094	.owner			= THIS_MODULE,
1095	.name			= "tcp",
1096	.caps			= CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
1097				  | CAP_DATADGST,
1098	/* session management */
1099	.create_session		= iscsi_sw_tcp_session_create,
1100	.destroy_session	= iscsi_sw_tcp_session_destroy,
1101	/* connection management */
1102	.create_conn		= iscsi_sw_tcp_conn_create,
1103	.bind_conn		= iscsi_sw_tcp_conn_bind,
1104	.destroy_conn		= iscsi_sw_tcp_conn_destroy,
1105	.attr_is_visible	= iscsi_sw_tcp_attr_is_visible,
1106	.set_param		= iscsi_sw_tcp_conn_set_param,
1107	.get_conn_param		= iscsi_sw_tcp_conn_get_param,
1108	.get_session_param	= iscsi_session_get_param,
1109	.start_conn		= iscsi_conn_start,
1110	.stop_conn		= iscsi_sw_tcp_conn_stop,
1111	/* iscsi host params */
1112	.get_host_param		= iscsi_sw_tcp_host_get_param,
1113	.set_host_param		= iscsi_host_set_param,
1114	/* IO */
1115	.send_pdu		= iscsi_conn_send_pdu,
1116	.get_stats		= iscsi_sw_tcp_conn_get_stats,
1117	/* iscsi task/cmd helpers */
1118	.init_task		= iscsi_tcp_task_init,
1119	.xmit_task		= iscsi_tcp_task_xmit,
1120	.cleanup_task		= iscsi_tcp_cleanup_task,
1121	/* low level pdu helpers */
1122	.xmit_pdu		= iscsi_sw_tcp_pdu_xmit,
1123	.init_pdu		= iscsi_sw_tcp_pdu_init,
1124	.alloc_pdu		= iscsi_sw_tcp_pdu_alloc,
1125	/* recovery */
1126	.session_recovery_timedout = iscsi_session_recovery_timedout,
1127};
1128
1129static int __init iscsi_sw_tcp_init(void)
1130{
1131	if (iscsi_max_lun < 1) {
1132		printk(KERN_ERR "iscsi_tcp: Invalid max_lun value of %u\n",
1133		       iscsi_max_lun);
1134		return -EINVAL;
1135	}
1136
1137	iscsi_sw_tcp_scsi_transport = iscsi_register_transport(
1138						&iscsi_sw_tcp_transport);
1139	if (!iscsi_sw_tcp_scsi_transport)
1140		return -ENODEV;
1141
1142	return 0;
1143}
1144
1145static void __exit iscsi_sw_tcp_exit(void)
1146{
1147	iscsi_unregister_transport(&iscsi_sw_tcp_transport);
1148}
1149
1150module_init(iscsi_sw_tcp_init);
1151module_exit(iscsi_sw_tcp_exit);
1152