1/*
2 * iSCSI Initiator over TCP/IP Data-Path
3 *
4 * Copyright (C) 2004 Dmitry Yusupov
5 * Copyright (C) 2004 Alex Aizman
6 * Copyright (C) 2005 - 2006 Mike Christie
7 * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
8 * maintained by open-iscsi@googlegroups.com
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published
12 * by the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * See the file COPYING included with this distribution for more details.
21 *
22 * Credits:
23 *	Christoph Hellwig
24 *	FUJITA Tomonori
25 *	Arne Redlich
26 *	Zhenyu Wang
27 */
28
29#include <linux/types.h>
30#include <linux/list.h>
31#include <linux/inet.h>
32#include <linux/blkdev.h>
33#include <linux/crypto.h>
34#include <linux/delay.h>
35#include <linux/kfifo.h>
36#include <linux/scatterlist.h>
37#include <linux/mutex.h>
38#include <net/tcp.h>
39#include <scsi/scsi_cmnd.h>
40#include <scsi/scsi_host.h>
41#include <scsi/scsi.h>
42#include <scsi/scsi_transport_iscsi.h>
43
44#include "iscsi_tcp.h"
45
46MODULE_AUTHOR("Dmitry Yusupov <dmitry_yus@yahoo.com>, "
47	      "Alex Aizman <itn780@yahoo.com>");
48MODULE_DESCRIPTION("iSCSI/TCP data-path");
49MODULE_LICENSE("GPL");
50/* #define DEBUG_TCP */
51#define DEBUG_ASSERT
52
53#ifdef DEBUG_TCP
54#define debug_tcp(fmt...) printk(KERN_INFO "tcp: " fmt)
55#else
56#define debug_tcp(fmt...)
57#endif
58
59#ifndef DEBUG_ASSERT
60#ifdef BUG_ON
61#undef BUG_ON
62#endif
63#define BUG_ON(expr)
64#endif
65
66static unsigned int iscsi_max_lun = 512;
67module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
68
69static inline void
70iscsi_buf_init_iov(struct iscsi_buf *ibuf, char *vbuf, int size)
71{
72	ibuf->sg.page = virt_to_page(vbuf);
73	ibuf->sg.offset = offset_in_page(vbuf);
74	ibuf->sg.length = size;
75	ibuf->sent = 0;
76	ibuf->use_sendmsg = 1;
77}
78
79static inline void
80iscsi_buf_init_sg(struct iscsi_buf *ibuf, struct scatterlist *sg)
81{
82	ibuf->sg.page = sg->page;
83	ibuf->sg.offset = sg->offset;
84	ibuf->sg.length = sg->length;
85	/*
86	 * Fastpath: sg element fits into single page
87	 */
88	if (sg->length + sg->offset <= PAGE_SIZE && !PageSlab(sg->page))
89		ibuf->use_sendmsg = 0;
90	else
91		ibuf->use_sendmsg = 1;
92	ibuf->sent = 0;
93}
94
95static inline int
96iscsi_buf_left(struct iscsi_buf *ibuf)
97{
98	int rc;
99
100	rc = ibuf->sg.length - ibuf->sent;
101	BUG_ON(rc < 0);
102	return rc;
103}
104
105static inline void
106iscsi_hdr_digest(struct iscsi_conn *conn, struct iscsi_buf *buf,
107		 u8* crc)
108{
109	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
110
111	crypto_hash_digest(&tcp_conn->tx_hash, &buf->sg, buf->sg.length, crc);
112	buf->sg.length = tcp_conn->hdr_size;
113}
114
115static inline int
116iscsi_hdr_extract(struct iscsi_tcp_conn *tcp_conn)
117{
118	struct sk_buff *skb = tcp_conn->in.skb;
119
120	tcp_conn->in.zero_copy_hdr = 0;
121
122	if (tcp_conn->in.copy >= tcp_conn->hdr_size &&
123	    tcp_conn->in_progress == IN_PROGRESS_WAIT_HEADER) {
124		/*
125		 * Zero-copy PDU Header: using connection context
126		 * to store header pointer.
127		 */
128		if (skb_shinfo(skb)->frag_list == NULL &&
129		    !skb_shinfo(skb)->nr_frags) {
130			tcp_conn->in.hdr = (struct iscsi_hdr *)
131				((char*)skb->data + tcp_conn->in.offset);
132			tcp_conn->in.zero_copy_hdr = 1;
133		} else {
134			/* ignoring return code since we checked
135			 * in.copy before */
136			skb_copy_bits(skb, tcp_conn->in.offset,
137				&tcp_conn->hdr, tcp_conn->hdr_size);
138			tcp_conn->in.hdr = &tcp_conn->hdr;
139		}
140		tcp_conn->in.offset += tcp_conn->hdr_size;
141		tcp_conn->in.copy -= tcp_conn->hdr_size;
142	} else {
143		int hdr_remains;
144		int copylen;
145
146		/*
147		 * PDU header scattered across SKB's,
148		 * copying it... This'll happen quite rarely.
149		 */
150
151		if (tcp_conn->in_progress == IN_PROGRESS_WAIT_HEADER)
152			tcp_conn->in.hdr_offset = 0;
153
154		hdr_remains = tcp_conn->hdr_size - tcp_conn->in.hdr_offset;
155		BUG_ON(hdr_remains <= 0);
156
157		copylen = min(tcp_conn->in.copy, hdr_remains);
158		skb_copy_bits(skb, tcp_conn->in.offset,
159			(char*)&tcp_conn->hdr + tcp_conn->in.hdr_offset,
160			copylen);
161
162		debug_tcp("PDU gather offset %d bytes %d in.offset %d "
163		       "in.copy %d\n", tcp_conn->in.hdr_offset, copylen,
164		       tcp_conn->in.offset, tcp_conn->in.copy);
165
166		tcp_conn->in.offset += copylen;
167		tcp_conn->in.copy -= copylen;
168		if (copylen < hdr_remains)  {
169			tcp_conn->in_progress = IN_PROGRESS_HEADER_GATHER;
170			tcp_conn->in.hdr_offset += copylen;
171		        return -EAGAIN;
172		}
173		tcp_conn->in.hdr = &tcp_conn->hdr;
174		tcp_conn->discontiguous_hdr_cnt++;
175	        tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
176	}
177
178	return 0;
179}
180
181/*
182 * must be called with session lock
183 */
184static void
185iscsi_tcp_cleanup_ctask(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
186{
187	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
188	struct iscsi_r2t_info *r2t;
189	struct scsi_cmnd *sc;
190
191	/* flush ctask's r2t queues */
192	while (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*))) {
193		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
194			    sizeof(void*));
195		debug_scsi("iscsi_tcp_cleanup_ctask pending r2t dropped\n");
196	}
197
198	sc = ctask->sc;
199	if (unlikely(!sc))
200		return;
201
202	tcp_ctask->xmstate = XMSTATE_IDLE;
203	tcp_ctask->r2t = NULL;
204}
205
206/**
207 * iscsi_data_rsp - SCSI Data-In Response processing
208 * @conn: iscsi connection
209 * @ctask: scsi command task
210 **/
211static int
212iscsi_data_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
213{
214	int rc;
215	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
216	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
217	struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)tcp_conn->in.hdr;
218	struct iscsi_session *session = conn->session;
219	int datasn = be32_to_cpu(rhdr->datasn);
220
221	rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr);
222	if (rc)
223		return rc;
224	/*
225	 * setup Data-In byte counter (gets decremented..)
226	 */
227	ctask->data_count = tcp_conn->in.datalen;
228
229	if (tcp_conn->in.datalen == 0)
230		return 0;
231
232	if (ctask->datasn != datasn)
233		return ISCSI_ERR_DATASN;
234
235	ctask->datasn++;
236
237	tcp_ctask->data_offset = be32_to_cpu(rhdr->offset);
238	if (tcp_ctask->data_offset + tcp_conn->in.datalen > ctask->total_length)
239		return ISCSI_ERR_DATA_OFFSET;
240
241	if (rhdr->flags & ISCSI_FLAG_DATA_STATUS) {
242		struct scsi_cmnd *sc = ctask->sc;
243
244		conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1;
245		if (rhdr->flags & ISCSI_FLAG_DATA_UNDERFLOW) {
246			int res_count = be32_to_cpu(rhdr->residual_count);
247
248			if (res_count > 0 &&
249			    res_count <= sc->request_bufflen) {
250				sc->resid = res_count;
251				sc->result = (DID_OK << 16) | rhdr->cmd_status;
252			} else
253				sc->result = (DID_BAD_TARGET << 16) |
254					rhdr->cmd_status;
255		} else if (rhdr->flags & ISCSI_FLAG_DATA_OVERFLOW) {
256			sc->resid = be32_to_cpu(rhdr->residual_count);
257			sc->result = (DID_OK << 16) | rhdr->cmd_status;
258		} else
259			sc->result = (DID_OK << 16) | rhdr->cmd_status;
260	}
261
262	conn->datain_pdus_cnt++;
263	return 0;
264}
265
266/**
267 * iscsi_solicit_data_init - initialize first Data-Out
268 * @conn: iscsi connection
269 * @ctask: scsi command task
270 * @r2t: R2T info
271 *
272 * Notes:
273 *	Initialize first Data-Out within this R2T sequence and finds
274 *	proper data_offset within this SCSI command.
275 *
276 *	This function is called with connection lock taken.
277 **/
278static void
279iscsi_solicit_data_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
280			struct iscsi_r2t_info *r2t)
281{
282	struct iscsi_data *hdr;
283	struct scsi_cmnd *sc = ctask->sc;
284
285	hdr = &r2t->dtask.hdr;
286	memset(hdr, 0, sizeof(struct iscsi_data));
287	hdr->ttt = r2t->ttt;
288	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
289	r2t->solicit_datasn++;
290	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
291	memcpy(hdr->lun, ctask->hdr->lun, sizeof(hdr->lun));
292	hdr->itt = ctask->hdr->itt;
293	hdr->exp_statsn = r2t->exp_statsn;
294	hdr->offset = cpu_to_be32(r2t->data_offset);
295	if (r2t->data_length > conn->max_xmit_dlength) {
296		hton24(hdr->dlength, conn->max_xmit_dlength);
297		r2t->data_count = conn->max_xmit_dlength;
298		hdr->flags = 0;
299	} else {
300		hton24(hdr->dlength, r2t->data_length);
301		r2t->data_count = r2t->data_length;
302		hdr->flags = ISCSI_FLAG_CMD_FINAL;
303	}
304	conn->dataout_pdus_cnt++;
305
306	r2t->sent = 0;
307
308	iscsi_buf_init_iov(&r2t->headbuf, (char*)hdr,
309			   sizeof(struct iscsi_hdr));
310
311	if (sc->use_sg) {
312		int i, sg_count = 0;
313		struct scatterlist *sg = sc->request_buffer;
314
315		r2t->sg = NULL;
316		for (i = 0; i < sc->use_sg; i++, sg += 1) {
317			if (sg_count + sg->length > r2t->data_offset) {
318				int page_offset;
319
320				/* sg page found! */
321
322				/* offset within this page */
323				page_offset = r2t->data_offset - sg_count;
324
325				/* fill in this buffer */
326				iscsi_buf_init_sg(&r2t->sendbuf, sg);
327				r2t->sendbuf.sg.offset += page_offset;
328				r2t->sendbuf.sg.length -= page_offset;
329
330				/* xmit logic will continue with next one */
331				r2t->sg = sg + 1;
332				break;
333			}
334			sg_count += sg->length;
335		}
336		BUG_ON(r2t->sg == NULL);
337	} else {
338		iscsi_buf_init_iov(&r2t->sendbuf,
339			    (char*)sc->request_buffer + r2t->data_offset,
340			    r2t->data_count);
341		r2t->sg = NULL;
342	}
343}
344
345/**
346 * iscsi_r2t_rsp - iSCSI R2T Response processing
347 * @conn: iscsi connection
348 * @ctask: scsi command task
349 **/
350static int
351iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
352{
353	struct iscsi_r2t_info *r2t;
354	struct iscsi_session *session = conn->session;
355	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
356	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
357	struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
358	int r2tsn = be32_to_cpu(rhdr->r2tsn);
359	int rc;
360
361	if (tcp_conn->in.datalen) {
362		printk(KERN_ERR "iscsi_tcp: invalid R2t with datalen %d\n",
363		       tcp_conn->in.datalen);
364		return ISCSI_ERR_DATALEN;
365	}
366
367	if (tcp_ctask->exp_r2tsn && tcp_ctask->exp_r2tsn != r2tsn)
368		return ISCSI_ERR_R2TSN;
369
370	rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr);
371	if (rc)
372		return rc;
373
374
375	/* fill-in new R2T associated with the task */
376	spin_lock(&session->lock);
377	if (!ctask->sc || ctask->mtask ||
378	     session->state != ISCSI_STATE_LOGGED_IN) {
379		printk(KERN_INFO "iscsi_tcp: dropping R2T itt %d in "
380		       "recovery...\n", ctask->itt);
381		spin_unlock(&session->lock);
382		return 0;
383	}
384
385	rc = __kfifo_get(tcp_ctask->r2tpool.queue, (void*)&r2t, sizeof(void*));
386	BUG_ON(!rc);
387
388	r2t->exp_statsn = rhdr->statsn;
389	r2t->data_length = be32_to_cpu(rhdr->data_length);
390	if (r2t->data_length == 0) {
391		printk(KERN_ERR "iscsi_tcp: invalid R2T with zero data len\n");
392		spin_unlock(&session->lock);
393		return ISCSI_ERR_DATALEN;
394	}
395
396	if (r2t->data_length > session->max_burst)
397		debug_scsi("invalid R2T with data len %u and max burst %u."
398			   "Attempting to execute request.\n",
399			    r2t->data_length, session->max_burst);
400
401	r2t->data_offset = be32_to_cpu(rhdr->data_offset);
402	if (r2t->data_offset + r2t->data_length > ctask->total_length) {
403		spin_unlock(&session->lock);
404		printk(KERN_ERR "iscsi_tcp: invalid R2T with data len %u at "
405		       "offset %u and total length %d\n", r2t->data_length,
406		       r2t->data_offset, ctask->total_length);
407		return ISCSI_ERR_DATALEN;
408	}
409
410	r2t->ttt = rhdr->ttt; /* no flip */
411	r2t->solicit_datasn = 0;
412
413	iscsi_solicit_data_init(conn, ctask, r2t);
414
415	tcp_ctask->exp_r2tsn = r2tsn + 1;
416	__kfifo_put(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*));
417	tcp_ctask->xmstate |= XMSTATE_SOL_HDR;
418	list_move_tail(&ctask->running, &conn->xmitqueue);
419
420	scsi_queue_work(session->host, &conn->xmitwork);
421	conn->r2t_pdus_cnt++;
422	spin_unlock(&session->lock);
423
424	return 0;
425}
426
427static int
428iscsi_tcp_hdr_recv(struct iscsi_conn *conn)
429{
430	int rc = 0, opcode, ahslen;
431	struct iscsi_hdr *hdr;
432	struct iscsi_session *session = conn->session;
433	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
434	uint32_t cdgst, rdgst = 0, itt;
435
436	hdr = tcp_conn->in.hdr;
437
438	/* verify PDU length */
439	tcp_conn->in.datalen = ntoh24(hdr->dlength);
440	if (tcp_conn->in.datalen > conn->max_recv_dlength) {
441		printk(KERN_ERR "iscsi_tcp: datalen %d > %d\n",
442		       tcp_conn->in.datalen, conn->max_recv_dlength);
443		return ISCSI_ERR_DATALEN;
444	}
445	tcp_conn->data_copied = 0;
446
447	/* read AHS */
448	ahslen = hdr->hlength << 2;
449	tcp_conn->in.offset += ahslen;
450	tcp_conn->in.copy -= ahslen;
451	if (tcp_conn->in.copy < 0) {
452		printk(KERN_ERR "iscsi_tcp: can't handle AHS with length "
453		       "%d bytes\n", ahslen);
454		return ISCSI_ERR_AHSLEN;
455	}
456
457	/* calculate read padding */
458	tcp_conn->in.padding = tcp_conn->in.datalen & (ISCSI_PAD_LEN-1);
459	if (tcp_conn->in.padding) {
460		tcp_conn->in.padding = ISCSI_PAD_LEN - tcp_conn->in.padding;
461		debug_scsi("read padding %d bytes\n", tcp_conn->in.padding);
462	}
463
464	if (conn->hdrdgst_en) {
465		struct scatterlist sg;
466
467		sg_init_one(&sg, (u8 *)hdr,
468			    sizeof(struct iscsi_hdr) + ahslen);
469		crypto_hash_digest(&tcp_conn->rx_hash, &sg, sg.length,
470				   (u8 *)&cdgst);
471		rdgst = *(uint32_t*)((char*)hdr + sizeof(struct iscsi_hdr) +
472				     ahslen);
473		if (cdgst != rdgst) {
474			printk(KERN_ERR "iscsi_tcp: hdrdgst error "
475			       "recv 0x%x calc 0x%x\n", rdgst, cdgst);
476			return ISCSI_ERR_HDR_DGST;
477		}
478	}
479
480	opcode = hdr->opcode & ISCSI_OPCODE_MASK;
481	/* verify itt (itt encoding: age+cid+itt) */
482	rc = iscsi_verify_itt(conn, hdr, &itt);
483	if (rc == ISCSI_ERR_NO_SCSI_CMD) {
484		tcp_conn->in.datalen = 0; /* force drop */
485		return 0;
486	} else if (rc)
487		return rc;
488
489	debug_tcp("opcode 0x%x offset %d copy %d ahslen %d datalen %d\n",
490		  opcode, tcp_conn->in.offset, tcp_conn->in.copy,
491		  ahslen, tcp_conn->in.datalen);
492
493	switch(opcode) {
494	case ISCSI_OP_SCSI_DATA_IN:
495		tcp_conn->in.ctask = session->cmds[itt];
496		rc = iscsi_data_rsp(conn, tcp_conn->in.ctask);
497		if (rc)
498			return rc;
499		/* fall through */
500	case ISCSI_OP_SCSI_CMD_RSP:
501		tcp_conn->in.ctask = session->cmds[itt];
502		if (tcp_conn->in.datalen)
503			goto copy_hdr;
504
505		spin_lock(&session->lock);
506		rc = __iscsi_complete_pdu(conn, hdr, NULL, 0);
507		spin_unlock(&session->lock);
508		break;
509	case ISCSI_OP_R2T:
510		tcp_conn->in.ctask = session->cmds[itt];
511		if (ahslen)
512			rc = ISCSI_ERR_AHSLEN;
513		else if (tcp_conn->in.ctask->sc->sc_data_direction ==
514								DMA_TO_DEVICE)
515			rc = iscsi_r2t_rsp(conn, tcp_conn->in.ctask);
516		else
517			rc = ISCSI_ERR_PROTO;
518		break;
519	case ISCSI_OP_LOGIN_RSP:
520	case ISCSI_OP_TEXT_RSP:
521	case ISCSI_OP_REJECT:
522	case ISCSI_OP_ASYNC_EVENT:
523		/*
524		 * It is possible that we could get a PDU with a buffer larger
525		 * than 8K, but there are no targets that currently do this.
526		 * For now we fail until we find a vendor that needs it
527		 */
528		if (ISCSI_DEF_MAX_RECV_SEG_LEN <
529		    tcp_conn->in.datalen) {
530			printk(KERN_ERR "iscsi_tcp: received buffer of len %u "
531			      "but conn buffer is only %u (opcode %0x)\n",
532			      tcp_conn->in.datalen,
533			      ISCSI_DEF_MAX_RECV_SEG_LEN, opcode);
534			rc = ISCSI_ERR_PROTO;
535			break;
536		}
537
538		if (tcp_conn->in.datalen)
539			goto copy_hdr;
540	/* fall through */
541	case ISCSI_OP_LOGOUT_RSP:
542	case ISCSI_OP_NOOP_IN:
543	case ISCSI_OP_SCSI_TMFUNC_RSP:
544		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
545		break;
546	default:
547		rc = ISCSI_ERR_BAD_OPCODE;
548		break;
549	}
550
551	return rc;
552
553copy_hdr:
554	/*
555	 * if we did zero copy for the header but we will need multiple
556	 * skbs to complete the command then we have to copy the header
557	 * for later use
558	 */
559	if (tcp_conn->in.zero_copy_hdr && tcp_conn->in.copy <=
560	   (tcp_conn->in.datalen + tcp_conn->in.padding +
561	    (conn->datadgst_en ? 4 : 0))) {
562		debug_tcp("Copying header for later use. in.copy %d in.datalen"
563			  " %d\n", tcp_conn->in.copy, tcp_conn->in.datalen);
564		memcpy(&tcp_conn->hdr, tcp_conn->in.hdr,
565		       sizeof(struct iscsi_hdr));
566		tcp_conn->in.hdr = &tcp_conn->hdr;
567		tcp_conn->in.zero_copy_hdr = 0;
568	}
569	return 0;
570}
571
572/**
573 * iscsi_ctask_copy - copy skb bits to the destanation cmd task
574 * @conn: iscsi tcp connection
575 * @ctask: scsi command task
576 * @buf: buffer to copy to
577 * @buf_size: size of buffer
578 * @offset: offset within the buffer
579 *
580 * Notes:
581 *	The function calls skb_copy_bits() and updates per-connection and
582 *	per-cmd byte counters.
583 *
584 *	Read counters (in bytes):
585 *
586 *	conn->in.offset		offset within in progress SKB
587 *	conn->in.copy		left to copy from in progress SKB
588 *				including padding
589 *	conn->in.copied		copied already from in progress SKB
590 *	conn->data_copied	copied already from in progress buffer
591 *	ctask->sent		total bytes sent up to the MidLayer
592 *	ctask->data_count	left to copy from in progress Data-In
593 *	buf_left		left to copy from in progress buffer
594 **/
595static inline int
596iscsi_ctask_copy(struct iscsi_tcp_conn *tcp_conn, struct iscsi_cmd_task *ctask,
597		void *buf, int buf_size, int offset)
598{
599	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
600	int buf_left = buf_size - (tcp_conn->data_copied + offset);
601	int size = min(tcp_conn->in.copy, buf_left);
602	int rc;
603
604	size = min(size, ctask->data_count);
605
606	debug_tcp("ctask_copy %d bytes at offset %d copied %d\n",
607	       size, tcp_conn->in.offset, tcp_conn->in.copied);
608
609	BUG_ON(size <= 0);
610	BUG_ON(tcp_ctask->sent + size > ctask->total_length);
611
612	rc = skb_copy_bits(tcp_conn->in.skb, tcp_conn->in.offset,
613			   (char*)buf + (offset + tcp_conn->data_copied), size);
614	/* must fit into skb->len */
615	BUG_ON(rc);
616
617	tcp_conn->in.offset += size;
618	tcp_conn->in.copy -= size;
619	tcp_conn->in.copied += size;
620	tcp_conn->data_copied += size;
621	tcp_ctask->sent += size;
622	ctask->data_count -= size;
623
624	BUG_ON(tcp_conn->in.copy < 0);
625	BUG_ON(ctask->data_count < 0);
626
627	if (buf_size != (tcp_conn->data_copied + offset)) {
628		if (!ctask->data_count) {
629			BUG_ON(buf_size - tcp_conn->data_copied < 0);
630			/* done with this PDU */
631			return buf_size - tcp_conn->data_copied;
632		}
633		return -EAGAIN;
634	}
635
636	/* done with this buffer or with both - PDU and buffer */
637	tcp_conn->data_copied = 0;
638	return 0;
639}
640
641/**
642 * iscsi_tcp_copy - copy skb bits to the destanation buffer
643 * @conn: iscsi tcp connection
644 *
645 * Notes:
646 *	The function calls skb_copy_bits() and updates per-connection
647 *	byte counters.
648 **/
649static inline int
650iscsi_tcp_copy(struct iscsi_conn *conn, int buf_size)
651{
652	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
653	int buf_left = buf_size - tcp_conn->data_copied;
654	int size = min(tcp_conn->in.copy, buf_left);
655	int rc;
656
657	debug_tcp("tcp_copy %d bytes at offset %d copied %d\n",
658	       size, tcp_conn->in.offset, tcp_conn->data_copied);
659	BUG_ON(size <= 0);
660
661	rc = skb_copy_bits(tcp_conn->in.skb, tcp_conn->in.offset,
662			   (char*)conn->data + tcp_conn->data_copied, size);
663	BUG_ON(rc);
664
665	tcp_conn->in.offset += size;
666	tcp_conn->in.copy -= size;
667	tcp_conn->in.copied += size;
668	tcp_conn->data_copied += size;
669
670	if (buf_size != tcp_conn->data_copied)
671		return -EAGAIN;
672
673	return 0;
674}
675
676static inline void
677partial_sg_digest_update(struct hash_desc *desc, struct scatterlist *sg,
678			 int offset, int length)
679{
680	struct scatterlist temp;
681
682	memcpy(&temp, sg, sizeof(struct scatterlist));
683	temp.offset = offset;
684	temp.length = length;
685	crypto_hash_update(desc, &temp, length);
686}
687
688static void
689iscsi_recv_digest_update(struct iscsi_tcp_conn *tcp_conn, char* buf, int len)
690{
691	struct scatterlist tmp;
692
693	sg_init_one(&tmp, buf, len);
694	crypto_hash_update(&tcp_conn->rx_hash, &tmp, len);
695}
696
697static int iscsi_scsi_data_in(struct iscsi_conn *conn)
698{
699	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
700	struct iscsi_cmd_task *ctask = tcp_conn->in.ctask;
701	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
702	struct scsi_cmnd *sc = ctask->sc;
703	struct scatterlist *sg;
704	int i, offset, rc = 0;
705
706	BUG_ON((void*)ctask != sc->SCp.ptr);
707
708	/*
709	 * copying Data-In into the Scsi_Cmnd
710	 */
711	if (!sc->use_sg) {
712		i = ctask->data_count;
713		rc = iscsi_ctask_copy(tcp_conn, ctask, sc->request_buffer,
714				      sc->request_bufflen,
715				      tcp_ctask->data_offset);
716		if (rc == -EAGAIN)
717			return rc;
718		if (conn->datadgst_en)
719			iscsi_recv_digest_update(tcp_conn, sc->request_buffer,
720						 i);
721		rc = 0;
722		goto done;
723	}
724
725	offset = tcp_ctask->data_offset;
726	sg = sc->request_buffer;
727
728	if (tcp_ctask->data_offset)
729		for (i = 0; i < tcp_ctask->sg_count; i++)
730			offset -= sg[i].length;
731	/* we've passed through partial sg*/
732	if (offset < 0)
733		offset = 0;
734
735	for (i = tcp_ctask->sg_count; i < sc->use_sg; i++) {
736		char *dest;
737
738		dest = kmap_atomic(sg[i].page, KM_SOFTIRQ0);
739		rc = iscsi_ctask_copy(tcp_conn, ctask, dest + sg[i].offset,
740				      sg[i].length, offset);
741		kunmap_atomic(dest, KM_SOFTIRQ0);
742		if (rc == -EAGAIN)
743			/* continue with the next SKB/PDU */
744			return rc;
745		if (!rc) {
746			if (conn->datadgst_en) {
747				if (!offset)
748					crypto_hash_update(
749							&tcp_conn->rx_hash,
750							&sg[i], sg[i].length);
751				else
752					partial_sg_digest_update(
753							&tcp_conn->rx_hash,
754							&sg[i],
755							sg[i].offset + offset,
756							sg[i].length - offset);
757			}
758			offset = 0;
759			tcp_ctask->sg_count++;
760		}
761
762		if (!ctask->data_count) {
763			if (rc && conn->datadgst_en)
764				/*
765				 * data-in is complete, but buffer not...
766				 */
767				partial_sg_digest_update(&tcp_conn->rx_hash,
768							 &sg[i],
769							 sg[i].offset,
770							 sg[i].length-rc);
771			rc = 0;
772			break;
773		}
774
775		if (!tcp_conn->in.copy)
776			return -EAGAIN;
777	}
778	BUG_ON(ctask->data_count);
779
780done:
781	/* check for non-exceptional status */
782	if (tcp_conn->in.hdr->flags & ISCSI_FLAG_DATA_STATUS) {
783		debug_scsi("done [sc %lx res %d itt 0x%x flags 0x%x]\n",
784			   (long)sc, sc->result, ctask->itt,
785			   tcp_conn->in.hdr->flags);
786		spin_lock(&conn->session->lock);
787		__iscsi_complete_pdu(conn, tcp_conn->in.hdr, NULL, 0);
788		spin_unlock(&conn->session->lock);
789	}
790
791	return rc;
792}
793
794static int
795iscsi_data_recv(struct iscsi_conn *conn)
796{
797	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
798	int rc = 0, opcode;
799
800	opcode = tcp_conn->in.hdr->opcode & ISCSI_OPCODE_MASK;
801	switch (opcode) {
802	case ISCSI_OP_SCSI_DATA_IN:
803		rc = iscsi_scsi_data_in(conn);
804		break;
805	case ISCSI_OP_SCSI_CMD_RSP:
806	case ISCSI_OP_TEXT_RSP:
807	case ISCSI_OP_LOGIN_RSP:
808	case ISCSI_OP_ASYNC_EVENT:
809	case ISCSI_OP_REJECT:
810		/*
811		 * Collect data segment to the connection's data
812		 * placeholder
813		 */
814		if (iscsi_tcp_copy(conn, tcp_conn->in.datalen)) {
815			rc = -EAGAIN;
816			goto exit;
817		}
818
819		rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr, conn->data,
820					tcp_conn->in.datalen);
821		if (!rc && conn->datadgst_en && opcode != ISCSI_OP_LOGIN_RSP)
822			iscsi_recv_digest_update(tcp_conn, conn->data,
823			  			tcp_conn->in.datalen);
824		break;
825	default:
826		BUG_ON(1);
827	}
828exit:
829	return rc;
830}
831
832/**
833 * iscsi_tcp_data_recv - TCP receive in sendfile fashion
834 * @rd_desc: read descriptor
835 * @skb: socket buffer
836 * @offset: offset in skb
837 * @len: skb->len - offset
838 **/
839static int
840iscsi_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
841		unsigned int offset, size_t len)
842{
843	int rc;
844	struct iscsi_conn *conn = rd_desc->arg.data;
845	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
846	int processed;
847	char pad[ISCSI_PAD_LEN];
848	struct scatterlist sg;
849
850	/*
851	 * Save current SKB and its offset in the corresponding
852	 * connection context.
853	 */
854	tcp_conn->in.copy = skb->len - offset;
855	tcp_conn->in.offset = offset;
856	tcp_conn->in.skb = skb;
857	tcp_conn->in.len = tcp_conn->in.copy;
858	BUG_ON(tcp_conn->in.copy <= 0);
859	debug_tcp("in %d bytes\n", tcp_conn->in.copy);
860
861more:
862	tcp_conn->in.copied = 0;
863	rc = 0;
864
865	if (unlikely(conn->suspend_rx)) {
866		debug_tcp("conn %d Rx suspended!\n", conn->id);
867		return 0;
868	}
869
870	if (tcp_conn->in_progress == IN_PROGRESS_WAIT_HEADER ||
871	    tcp_conn->in_progress == IN_PROGRESS_HEADER_GATHER) {
872		rc = iscsi_hdr_extract(tcp_conn);
873		if (rc) {
874		       if (rc == -EAGAIN)
875				goto nomore;
876		       else {
877				iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
878				return 0;
879		       }
880		}
881
882		/*
883		 * Verify and process incoming PDU header.
884		 */
885		rc = iscsi_tcp_hdr_recv(conn);
886		if (!rc && tcp_conn->in.datalen) {
887			if (conn->datadgst_en)
888				crypto_hash_init(&tcp_conn->rx_hash);
889			tcp_conn->in_progress = IN_PROGRESS_DATA_RECV;
890		} else if (rc) {
891			iscsi_conn_failure(conn, rc);
892			return 0;
893		}
894	}
895
896	if (tcp_conn->in_progress == IN_PROGRESS_DDIGEST_RECV) {
897		uint32_t recv_digest;
898
899		debug_tcp("extra data_recv offset %d copy %d\n",
900			  tcp_conn->in.offset, tcp_conn->in.copy);
901		rc = iscsi_tcp_copy(conn, sizeof(uint32_t));
902		if (rc) {
903			if (rc == -EAGAIN)
904				goto again;
905			iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
906			return 0;
907		}
908
909		memcpy(&recv_digest, conn->data, sizeof(uint32_t));
910		if (recv_digest != tcp_conn->in.datadgst) {
911			debug_tcp("iscsi_tcp: data digest error!"
912				  "0x%x != 0x%x\n", recv_digest,
913				  tcp_conn->in.datadgst);
914			iscsi_conn_failure(conn, ISCSI_ERR_DATA_DGST);
915			return 0;
916		} else {
917			debug_tcp("iscsi_tcp: data digest match!"
918				  "0x%x == 0x%x\n", recv_digest,
919				  tcp_conn->in.datadgst);
920			tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
921		}
922	}
923
924	if (tcp_conn->in_progress == IN_PROGRESS_DATA_RECV &&
925	   tcp_conn->in.copy) {
926
927		debug_tcp("data_recv offset %d copy %d\n",
928		       tcp_conn->in.offset, tcp_conn->in.copy);
929
930		rc = iscsi_data_recv(conn);
931		if (rc) {
932			if (rc == -EAGAIN)
933				goto again;
934			iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
935			return 0;
936		}
937		tcp_conn->in.copy -= tcp_conn->in.padding;
938		tcp_conn->in.offset += tcp_conn->in.padding;
939		if (conn->datadgst_en) {
940			if (tcp_conn->in.padding) {
941				debug_tcp("padding -> %d\n",
942					  tcp_conn->in.padding);
943				memset(pad, 0, tcp_conn->in.padding);
944				sg_init_one(&sg, pad, tcp_conn->in.padding);
945				crypto_hash_update(&tcp_conn->rx_hash,
946						   &sg, sg.length);
947			}
948			crypto_hash_final(&tcp_conn->rx_hash,
949					  (u8 *) &tcp_conn->in.datadgst);
950			debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst);
951			tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV;
952			tcp_conn->data_copied = 0;
953		} else
954			tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
955	}
956
957	debug_tcp("f, processed %d from out of %d padding %d\n",
958	       tcp_conn->in.offset - offset, (int)len, tcp_conn->in.padding);
959	BUG_ON(tcp_conn->in.offset - offset > len);
960
961	if (tcp_conn->in.offset - offset != len) {
962		debug_tcp("continue to process %d bytes\n",
963		       (int)len - (tcp_conn->in.offset - offset));
964		goto more;
965	}
966
967nomore:
968	processed = tcp_conn->in.offset - offset;
969	BUG_ON(processed == 0);
970	return processed;
971
972again:
973	processed = tcp_conn->in.offset - offset;
974	debug_tcp("c, processed %d from out of %d rd_desc_cnt %d\n",
975	          processed, (int)len, (int)rd_desc->count);
976	BUG_ON(processed == 0);
977	BUG_ON(processed > len);
978
979	conn->rxdata_octets += processed;
980	return processed;
981}
982
983static void
984iscsi_tcp_data_ready(struct sock *sk, int flag)
985{
986	struct iscsi_conn *conn = sk->sk_user_data;
987	read_descriptor_t rd_desc;
988
989	read_lock(&sk->sk_callback_lock);
990
991	/*
992	 * Use rd_desc to pass 'conn' to iscsi_tcp_data_recv.
993	 * We set count to 1 because we want the network layer to
994	 * hand us all the skbs that are available. iscsi_tcp_data_recv
995	 * handled pdus that cross buffers or pdus that still need data.
996	 */
997	rd_desc.arg.data = conn;
998	rd_desc.count = 1;
999	tcp_read_sock(sk, &rd_desc, iscsi_tcp_data_recv);
1000
1001	read_unlock(&sk->sk_callback_lock);
1002}
1003
1004static void
1005iscsi_tcp_state_change(struct sock *sk)
1006{
1007	struct iscsi_tcp_conn *tcp_conn;
1008	struct iscsi_conn *conn;
1009	struct iscsi_session *session;
1010	void (*old_state_change)(struct sock *);
1011
1012	read_lock(&sk->sk_callback_lock);
1013
1014	conn = (struct iscsi_conn*)sk->sk_user_data;
1015	session = conn->session;
1016
1017	if ((sk->sk_state == TCP_CLOSE_WAIT ||
1018	     sk->sk_state == TCP_CLOSE) &&
1019	    !atomic_read(&sk->sk_rmem_alloc)) {
1020		debug_tcp("iscsi_tcp_state_change: TCP_CLOSE|TCP_CLOSE_WAIT\n");
1021		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
1022	}
1023
1024	tcp_conn = conn->dd_data;
1025	old_state_change = tcp_conn->old_state_change;
1026
1027	read_unlock(&sk->sk_callback_lock);
1028
1029	old_state_change(sk);
1030}
1031
1032/**
1033 * iscsi_write_space - Called when more output buffer space is available
1034 * @sk: socket space is available for
1035 **/
1036static void
1037iscsi_write_space(struct sock *sk)
1038{
1039	struct iscsi_conn *conn = (struct iscsi_conn*)sk->sk_user_data;
1040	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1041
1042	tcp_conn->old_write_space(sk);
1043	debug_tcp("iscsi_write_space: cid %d\n", conn->id);
1044	scsi_queue_work(conn->session->host, &conn->xmitwork);
1045}
1046
1047static void
1048iscsi_conn_set_callbacks(struct iscsi_conn *conn)
1049{
1050	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1051	struct sock *sk = tcp_conn->sock->sk;
1052
1053	/* assign new callbacks */
1054	write_lock_bh(&sk->sk_callback_lock);
1055	sk->sk_user_data = conn;
1056	tcp_conn->old_data_ready = sk->sk_data_ready;
1057	tcp_conn->old_state_change = sk->sk_state_change;
1058	tcp_conn->old_write_space = sk->sk_write_space;
1059	sk->sk_data_ready = iscsi_tcp_data_ready;
1060	sk->sk_state_change = iscsi_tcp_state_change;
1061	sk->sk_write_space = iscsi_write_space;
1062	write_unlock_bh(&sk->sk_callback_lock);
1063}
1064
1065static void
1066iscsi_conn_restore_callbacks(struct iscsi_tcp_conn *tcp_conn)
1067{
1068	struct sock *sk = tcp_conn->sock->sk;
1069
1070	/* restore socket callbacks, see also: iscsi_conn_set_callbacks() */
1071	write_lock_bh(&sk->sk_callback_lock);
1072	sk->sk_user_data    = NULL;
1073	sk->sk_data_ready   = tcp_conn->old_data_ready;
1074	sk->sk_state_change = tcp_conn->old_state_change;
1075	sk->sk_write_space  = tcp_conn->old_write_space;
1076	sk->sk_no_check	 = 0;
1077	write_unlock_bh(&sk->sk_callback_lock);
1078}
1079
1080/**
1081 * iscsi_send - generic send routine
1082 * @sk: kernel's socket
1083 * @buf: buffer to write from
1084 * @size: actual size to write
1085 * @flags: socket's flags
1086 */
1087static inline int
1088iscsi_send(struct iscsi_conn *conn, struct iscsi_buf *buf, int size, int flags)
1089{
1090	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1091	struct socket *sk = tcp_conn->sock;
1092	int offset = buf->sg.offset + buf->sent, res;
1093
1094	/*
1095	 * if we got use_sg=0 or are sending something we kmallocd
1096	 * then we did not have to do kmap (kmap returns page_address)
1097	 *
1098	 * if we got use_sg > 0, but had to drop down, we do not
1099	 * set clustering so this should only happen for that
1100	 * slab case.
1101	 */
1102	if (buf->use_sendmsg)
1103		res = sock_no_sendpage(sk, buf->sg.page, offset, size, flags);
1104	else
1105		res = tcp_conn->sendpage(sk, buf->sg.page, offset, size, flags);
1106
1107	if (res >= 0) {
1108		conn->txdata_octets += res;
1109		buf->sent += res;
1110		return res;
1111	}
1112
1113	tcp_conn->sendpage_failures_cnt++;
1114	if (res == -EAGAIN)
1115		res = -ENOBUFS;
1116	else
1117		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
1118	return res;
1119}
1120
1121/**
1122 * iscsi_sendhdr - send PDU Header via tcp_sendpage()
1123 * @conn: iscsi connection
1124 * @buf: buffer to write from
1125 * @datalen: lenght of data to be sent after the header
1126 *
1127 * Notes:
1128 *	(Tx, Fast Path)
1129 **/
1130static inline int
1131iscsi_sendhdr(struct iscsi_conn *conn, struct iscsi_buf *buf, int datalen)
1132{
1133	int flags = 0; /* MSG_DONTWAIT; */
1134	int res, size;
1135
1136	size = buf->sg.length - buf->sent;
1137	BUG_ON(buf->sent + size > buf->sg.length);
1138	if (buf->sent + size != buf->sg.length || datalen)
1139		flags |= MSG_MORE;
1140
1141	res = iscsi_send(conn, buf, size, flags);
1142	debug_tcp("sendhdr %d bytes, sent %d res %d\n", size, buf->sent, res);
1143	if (res >= 0) {
1144		if (size != res)
1145			return -EAGAIN;
1146		return 0;
1147	}
1148
1149	return res;
1150}
1151
1152/**
1153 * iscsi_sendpage - send one page of iSCSI Data-Out.
1154 * @conn: iscsi connection
1155 * @buf: buffer to write from
1156 * @count: remaining data
1157 * @sent: number of bytes sent
1158 *
1159 * Notes:
1160 *	(Tx, Fast Path)
1161 **/
1162static inline int
1163iscsi_sendpage(struct iscsi_conn *conn, struct iscsi_buf *buf,
1164	       int *count, int *sent)
1165{
1166	int flags = 0; /* MSG_DONTWAIT; */
1167	int res, size;
1168
1169	size = buf->sg.length - buf->sent;
1170	BUG_ON(buf->sent + size > buf->sg.length);
1171	if (size > *count)
1172		size = *count;
1173	if (buf->sent + size != buf->sg.length || *count != size)
1174		flags |= MSG_MORE;
1175
1176	res = iscsi_send(conn, buf, size, flags);
1177	debug_tcp("sendpage: %d bytes, sent %d left %d sent %d res %d\n",
1178		  size, buf->sent, *count, *sent, res);
1179	if (res >= 0) {
1180		*count -= res;
1181		*sent += res;
1182		if (size != res)
1183			return -EAGAIN;
1184		return 0;
1185	}
1186
1187	return res;
1188}
1189
1190static inline void
1191iscsi_data_digest_init(struct iscsi_tcp_conn *tcp_conn,
1192		      struct iscsi_tcp_cmd_task *tcp_ctask)
1193{
1194	crypto_hash_init(&tcp_conn->tx_hash);
1195	tcp_ctask->digest_count = 4;
1196}
1197
1198/**
1199 * iscsi_solicit_data_cont - initialize next Data-Out
1200 * @conn: iscsi connection
1201 * @ctask: scsi command task
1202 * @r2t: R2T info
1203 * @left: bytes left to transfer
1204 *
1205 * Notes:
1206 *	Initialize next Data-Out within this R2T sequence and continue
1207 *	to process next Scatter-Gather element(if any) of this SCSI command.
1208 *
1209 *	Called under connection lock.
1210 **/
1211static void
1212iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
1213			struct iscsi_r2t_info *r2t, int left)
1214{
1215	struct iscsi_data *hdr;
1216	struct scsi_cmnd *sc = ctask->sc;
1217	int new_offset;
1218
1219	hdr = &r2t->dtask.hdr;
1220	memset(hdr, 0, sizeof(struct iscsi_data));
1221	hdr->ttt = r2t->ttt;
1222	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
1223	r2t->solicit_datasn++;
1224	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
1225	memcpy(hdr->lun, ctask->hdr->lun, sizeof(hdr->lun));
1226	hdr->itt = ctask->hdr->itt;
1227	hdr->exp_statsn = r2t->exp_statsn;
1228	new_offset = r2t->data_offset + r2t->sent;
1229	hdr->offset = cpu_to_be32(new_offset);
1230	if (left > conn->max_xmit_dlength) {
1231		hton24(hdr->dlength, conn->max_xmit_dlength);
1232		r2t->data_count = conn->max_xmit_dlength;
1233	} else {
1234		hton24(hdr->dlength, left);
1235		r2t->data_count = left;
1236		hdr->flags = ISCSI_FLAG_CMD_FINAL;
1237	}
1238	conn->dataout_pdus_cnt++;
1239
1240	iscsi_buf_init_iov(&r2t->headbuf, (char*)hdr,
1241			   sizeof(struct iscsi_hdr));
1242
1243	if (iscsi_buf_left(&r2t->sendbuf))
1244		return;
1245
1246	if (sc->use_sg) {
1247		iscsi_buf_init_sg(&r2t->sendbuf, r2t->sg);
1248		r2t->sg += 1;
1249	} else {
1250		iscsi_buf_init_iov(&r2t->sendbuf,
1251			    (char*)sc->request_buffer + new_offset,
1252			    r2t->data_count);
1253		r2t->sg = NULL;
1254	}
1255}
1256
1257static void iscsi_set_padding(struct iscsi_tcp_cmd_task *tcp_ctask,
1258			      unsigned long len)
1259{
1260	tcp_ctask->pad_count = len & (ISCSI_PAD_LEN - 1);
1261	if (!tcp_ctask->pad_count)
1262		return;
1263
1264	tcp_ctask->pad_count = ISCSI_PAD_LEN - tcp_ctask->pad_count;
1265	debug_scsi("write padding %d bytes\n", tcp_ctask->pad_count);
1266	tcp_ctask->xmstate |= XMSTATE_W_PAD;
1267}
1268
1269/**
1270 * iscsi_tcp_cmd_init - Initialize iSCSI SCSI_READ or SCSI_WRITE commands
1271 * @conn: iscsi connection
1272 * @ctask: scsi command task
1273 * @sc: scsi command
1274 **/
1275static void
1276iscsi_tcp_cmd_init(struct iscsi_cmd_task *ctask)
1277{
1278	struct scsi_cmnd *sc = ctask->sc;
1279	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1280
1281	BUG_ON(__kfifo_len(tcp_ctask->r2tqueue));
1282
1283	tcp_ctask->sent = 0;
1284	tcp_ctask->sg_count = 0;
1285
1286	if (sc->sc_data_direction == DMA_TO_DEVICE) {
1287		tcp_ctask->xmstate = XMSTATE_W_HDR;
1288		tcp_ctask->exp_r2tsn = 0;
1289		BUG_ON(ctask->total_length == 0);
1290
1291		if (sc->use_sg) {
1292			struct scatterlist *sg = sc->request_buffer;
1293
1294			iscsi_buf_init_sg(&tcp_ctask->sendbuf, sg);
1295			tcp_ctask->sg = sg + 1;
1296			tcp_ctask->bad_sg = sg + sc->use_sg;
1297		} else {
1298			iscsi_buf_init_iov(&tcp_ctask->sendbuf,
1299					   sc->request_buffer,
1300					   sc->request_bufflen);
1301			tcp_ctask->sg = NULL;
1302			tcp_ctask->bad_sg = NULL;
1303		}
1304		debug_scsi("cmd [itt 0x%x total %d imm_data %d "
1305			   "unsol count %d, unsol offset %d]\n",
1306			   ctask->itt, ctask->total_length, ctask->imm_count,
1307			   ctask->unsol_count, ctask->unsol_offset);
1308	} else
1309		tcp_ctask->xmstate = XMSTATE_R_HDR;
1310
1311	iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)ctask->hdr,
1312			    sizeof(struct iscsi_hdr));
1313}
1314
1315/**
1316 * iscsi_tcp_mtask_xmit - xmit management(immediate) task
1317 * @conn: iscsi connection
1318 * @mtask: task management task
1319 *
1320 * Notes:
1321 *	The function can return -EAGAIN in which case caller must
1322 *	call it again later, or recover. '0' return code means successful
1323 *	xmit.
1324 *
1325 *	Management xmit state machine consists of two states:
1326 *		IN_PROGRESS_IMM_HEAD - PDU Header xmit in progress
1327 *		IN_PROGRESS_IMM_DATA - PDU Data xmit in progress
1328 **/
1329static int
1330iscsi_tcp_mtask_xmit(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask)
1331{
1332	struct iscsi_tcp_mgmt_task *tcp_mtask = mtask->dd_data;
1333	int rc;
1334
1335	debug_scsi("mtask deq [cid %d state %x itt 0x%x]\n",
1336		conn->id, tcp_mtask->xmstate, mtask->itt);
1337
1338	if (tcp_mtask->xmstate & XMSTATE_IMM_HDR) {
1339		tcp_mtask->xmstate &= ~XMSTATE_IMM_HDR;
1340		if (mtask->data_count)
1341			tcp_mtask->xmstate |= XMSTATE_IMM_DATA;
1342		if (conn->c_stage != ISCSI_CONN_INITIAL_STAGE &&
1343		    conn->stop_stage != STOP_CONN_RECOVER &&
1344		    conn->hdrdgst_en)
1345			iscsi_hdr_digest(conn, &tcp_mtask->headbuf,
1346					(u8*)tcp_mtask->hdrext);
1347		rc = iscsi_sendhdr(conn, &tcp_mtask->headbuf,
1348				   mtask->data_count);
1349		if (rc) {
1350			tcp_mtask->xmstate |= XMSTATE_IMM_HDR;
1351			if (mtask->data_count)
1352				tcp_mtask->xmstate &= ~XMSTATE_IMM_DATA;
1353			return rc;
1354		}
1355	}
1356
1357	if (tcp_mtask->xmstate & XMSTATE_IMM_DATA) {
1358		BUG_ON(!mtask->data_count);
1359		tcp_mtask->xmstate &= ~XMSTATE_IMM_DATA;
1360		do {
1361			int rc;
1362
1363			rc = iscsi_sendpage(conn, &tcp_mtask->sendbuf,
1364					&mtask->data_count, &tcp_mtask->sent);
1365			if (rc) {
1366				tcp_mtask->xmstate |= XMSTATE_IMM_DATA;
1367				return rc;
1368			}
1369		} while (mtask->data_count);
1370	}
1371
1372	BUG_ON(tcp_mtask->xmstate != XMSTATE_IDLE);
1373	if (mtask->hdr->itt == RESERVED_ITT) {
1374		struct iscsi_session *session = conn->session;
1375
1376		spin_lock_bh(&session->lock);
1377		list_del(&conn->mtask->running);
1378		__kfifo_put(session->mgmtpool.queue, (void*)&conn->mtask,
1379			    sizeof(void*));
1380		spin_unlock_bh(&session->lock);
1381	}
1382	return 0;
1383}
1384
1385static inline int
1386iscsi_send_read_hdr(struct iscsi_conn *conn,
1387		    struct iscsi_tcp_cmd_task *tcp_ctask)
1388{
1389	int rc;
1390
1391	tcp_ctask->xmstate &= ~XMSTATE_R_HDR;
1392	if (conn->hdrdgst_en)
1393		iscsi_hdr_digest(conn, &tcp_ctask->headbuf,
1394				 (u8*)tcp_ctask->hdrext);
1395	rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, 0);
1396	if (!rc) {
1397		BUG_ON(tcp_ctask->xmstate != XMSTATE_IDLE);
1398		return 0; /* wait for Data-In */
1399	}
1400	tcp_ctask->xmstate |= XMSTATE_R_HDR;
1401	return rc;
1402}
1403
1404static inline int
1405iscsi_send_write_hdr(struct iscsi_conn *conn,
1406		     struct iscsi_cmd_task *ctask)
1407{
1408	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1409	int rc;
1410
1411	tcp_ctask->xmstate &= ~XMSTATE_W_HDR;
1412	if (conn->hdrdgst_en)
1413		iscsi_hdr_digest(conn, &tcp_ctask->headbuf,
1414				 (u8*)tcp_ctask->hdrext);
1415	rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, ctask->imm_count);
1416	if (rc) {
1417		tcp_ctask->xmstate |= XMSTATE_W_HDR;
1418		return rc;
1419	}
1420
1421	if (ctask->imm_count) {
1422		tcp_ctask->xmstate |= XMSTATE_IMM_DATA;
1423		iscsi_set_padding(tcp_ctask, ctask->imm_count);
1424
1425		if (ctask->conn->datadgst_en) {
1426			iscsi_data_digest_init(ctask->conn->dd_data, tcp_ctask);
1427			tcp_ctask->immdigest = 0;
1428		}
1429	}
1430
1431	if (ctask->unsol_count)
1432		tcp_ctask->xmstate |= XMSTATE_UNS_HDR | XMSTATE_UNS_INIT;
1433	return 0;
1434}
1435
1436static int
1437iscsi_send_padding(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
1438{
1439	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1440	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1441	int sent = 0, rc;
1442
1443	if (tcp_ctask->xmstate & XMSTATE_W_PAD) {
1444		iscsi_buf_init_iov(&tcp_ctask->sendbuf, (char*)&tcp_ctask->pad,
1445				   tcp_ctask->pad_count);
1446		if (conn->datadgst_en)
1447			crypto_hash_update(&tcp_conn->tx_hash,
1448					   &tcp_ctask->sendbuf.sg,
1449					   tcp_ctask->sendbuf.sg.length);
1450	} else if (!(tcp_ctask->xmstate & XMSTATE_W_RESEND_PAD))
1451		return 0;
1452
1453	tcp_ctask->xmstate &= ~XMSTATE_W_PAD;
1454	tcp_ctask->xmstate &= ~XMSTATE_W_RESEND_PAD;
1455	debug_scsi("sending %d pad bytes for itt 0x%x\n",
1456		   tcp_ctask->pad_count, ctask->itt);
1457	rc = iscsi_sendpage(conn, &tcp_ctask->sendbuf, &tcp_ctask->pad_count,
1458			   &sent);
1459	if (rc) {
1460		debug_scsi("padding send failed %d\n", rc);
1461		tcp_ctask->xmstate |= XMSTATE_W_RESEND_PAD;
1462	}
1463	return rc;
1464}
1465
1466static int
1467iscsi_send_digest(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
1468			struct iscsi_buf *buf, uint32_t *digest)
1469{
1470	struct iscsi_tcp_cmd_task *tcp_ctask;
1471	struct iscsi_tcp_conn *tcp_conn;
1472	int rc, sent = 0;
1473
1474	if (!conn->datadgst_en)
1475		return 0;
1476
1477	tcp_ctask = ctask->dd_data;
1478	tcp_conn = conn->dd_data;
1479
1480	if (!(tcp_ctask->xmstate & XMSTATE_W_RESEND_DATA_DIGEST)) {
1481		crypto_hash_final(&tcp_conn->tx_hash, (u8*)digest);
1482		iscsi_buf_init_iov(buf, (char*)digest, 4);
1483	}
1484	tcp_ctask->xmstate &= ~XMSTATE_W_RESEND_DATA_DIGEST;
1485
1486	rc = iscsi_sendpage(conn, buf, &tcp_ctask->digest_count, &sent);
1487	if (!rc)
1488		debug_scsi("sent digest 0x%x for itt 0x%x\n", *digest,
1489			  ctask->itt);
1490	else {
1491		debug_scsi("sending digest 0x%x failed for itt 0x%x!\n",
1492			  *digest, ctask->itt);
1493		tcp_ctask->xmstate |= XMSTATE_W_RESEND_DATA_DIGEST;
1494	}
1495	return rc;
1496}
1497
1498static int
1499iscsi_send_data(struct iscsi_cmd_task *ctask, struct iscsi_buf *sendbuf,
1500		struct scatterlist **sg, int *sent, int *count,
1501		struct iscsi_buf *digestbuf, uint32_t *digest)
1502{
1503	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1504	struct iscsi_conn *conn = ctask->conn;
1505	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1506	int rc, buf_sent, offset;
1507
1508	while (*count) {
1509		buf_sent = 0;
1510		offset = sendbuf->sent;
1511
1512		rc = iscsi_sendpage(conn, sendbuf, count, &buf_sent);
1513		*sent = *sent + buf_sent;
1514		if (buf_sent && conn->datadgst_en)
1515			partial_sg_digest_update(&tcp_conn->tx_hash,
1516				&sendbuf->sg, sendbuf->sg.offset + offset,
1517				buf_sent);
1518		if (!iscsi_buf_left(sendbuf) && *sg != tcp_ctask->bad_sg) {
1519			iscsi_buf_init_sg(sendbuf, *sg);
1520			*sg = *sg + 1;
1521		}
1522
1523		if (rc)
1524			return rc;
1525	}
1526
1527	rc = iscsi_send_padding(conn, ctask);
1528	if (rc)
1529		return rc;
1530
1531	return iscsi_send_digest(conn, ctask, digestbuf, digest);
1532}
1533
1534static int
1535iscsi_send_unsol_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
1536{
1537	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1538	struct iscsi_data_task *dtask;
1539	int rc;
1540
1541	tcp_ctask->xmstate |= XMSTATE_UNS_DATA;
1542	if (tcp_ctask->xmstate & XMSTATE_UNS_INIT) {
1543		dtask = &tcp_ctask->unsol_dtask;
1544
1545		iscsi_prep_unsolicit_data_pdu(ctask, &dtask->hdr);
1546		iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)&dtask->hdr,
1547				   sizeof(struct iscsi_hdr));
1548		if (conn->hdrdgst_en)
1549			iscsi_hdr_digest(conn, &tcp_ctask->headbuf,
1550					(u8*)dtask->hdrext);
1551
1552		tcp_ctask->xmstate &= ~XMSTATE_UNS_INIT;
1553		iscsi_set_padding(tcp_ctask, ctask->data_count);
1554	}
1555
1556	rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, ctask->data_count);
1557	if (rc) {
1558		tcp_ctask->xmstate &= ~XMSTATE_UNS_DATA;
1559		tcp_ctask->xmstate |= XMSTATE_UNS_HDR;
1560		return rc;
1561	}
1562
1563	if (conn->datadgst_en) {
1564		dtask = &tcp_ctask->unsol_dtask;
1565		iscsi_data_digest_init(ctask->conn->dd_data, tcp_ctask);
1566		dtask->digest = 0;
1567	}
1568
1569	debug_scsi("uns dout [itt 0x%x dlen %d sent %d]\n",
1570		   ctask->itt, ctask->unsol_count, tcp_ctask->sent);
1571	return 0;
1572}
1573
1574static int
1575iscsi_send_unsol_pdu(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
1576{
1577	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1578	int rc;
1579
1580	if (tcp_ctask->xmstate & XMSTATE_UNS_HDR) {
1581		BUG_ON(!ctask->unsol_count);
1582		tcp_ctask->xmstate &= ~XMSTATE_UNS_HDR;
1583send_hdr:
1584		rc = iscsi_send_unsol_hdr(conn, ctask);
1585		if (rc)
1586			return rc;
1587	}
1588
1589	if (tcp_ctask->xmstate & XMSTATE_UNS_DATA) {
1590		struct iscsi_data_task *dtask = &tcp_ctask->unsol_dtask;
1591		int start = tcp_ctask->sent;
1592
1593		rc = iscsi_send_data(ctask, &tcp_ctask->sendbuf, &tcp_ctask->sg,
1594				     &tcp_ctask->sent, &ctask->data_count,
1595				     &dtask->digestbuf, &dtask->digest);
1596		ctask->unsol_count -= tcp_ctask->sent - start;
1597		if (rc)
1598			return rc;
1599		tcp_ctask->xmstate &= ~XMSTATE_UNS_DATA;
1600		/*
1601		 * Done with the Data-Out. Next, check if we need
1602		 * to send another unsolicited Data-Out.
1603		 */
1604		if (ctask->unsol_count) {
1605			debug_scsi("sending more uns\n");
1606			tcp_ctask->xmstate |= XMSTATE_UNS_INIT;
1607			goto send_hdr;
1608		}
1609	}
1610	return 0;
1611}
1612
1613static int iscsi_send_sol_pdu(struct iscsi_conn *conn,
1614			      struct iscsi_cmd_task *ctask)
1615{
1616	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1617	struct iscsi_session *session = conn->session;
1618	struct iscsi_r2t_info *r2t;
1619	struct iscsi_data_task *dtask;
1620	int left, rc;
1621
1622	if (tcp_ctask->xmstate & XMSTATE_SOL_HDR) {
1623		tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
1624		tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
1625		if (!tcp_ctask->r2t) {
1626			spin_lock_bh(&session->lock);
1627			__kfifo_get(tcp_ctask->r2tqueue, (void*)&tcp_ctask->r2t,
1628				    sizeof(void*));
1629			spin_unlock_bh(&session->lock);
1630		}
1631send_hdr:
1632		r2t = tcp_ctask->r2t;
1633		dtask = &r2t->dtask;
1634
1635		if (conn->hdrdgst_en)
1636			iscsi_hdr_digest(conn, &r2t->headbuf,
1637					(u8*)dtask->hdrext);
1638		rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count);
1639		if (rc) {
1640			tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA;
1641			tcp_ctask->xmstate |= XMSTATE_SOL_HDR;
1642			return rc;
1643		}
1644
1645		if (conn->datadgst_en) {
1646			iscsi_data_digest_init(conn->dd_data, tcp_ctask);
1647			dtask->digest = 0;
1648		}
1649
1650		iscsi_set_padding(tcp_ctask, r2t->data_count);
1651		debug_scsi("sol dout [dsn %d itt 0x%x dlen %d sent %d]\n",
1652			r2t->solicit_datasn - 1, ctask->itt, r2t->data_count,
1653			r2t->sent);
1654	}
1655
1656	if (tcp_ctask->xmstate & XMSTATE_SOL_DATA) {
1657		r2t = tcp_ctask->r2t;
1658		dtask = &r2t->dtask;
1659
1660		rc = iscsi_send_data(ctask, &r2t->sendbuf, &r2t->sg,
1661				     &r2t->sent, &r2t->data_count,
1662				     &dtask->digestbuf, &dtask->digest);
1663		if (rc)
1664			return rc;
1665		tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA;
1666
1667		/*
1668		 * Done with this Data-Out. Next, check if we have
1669		 * to send another Data-Out for this R2T.
1670		 */
1671		BUG_ON(r2t->data_length - r2t->sent < 0);
1672		left = r2t->data_length - r2t->sent;
1673		if (left) {
1674			iscsi_solicit_data_cont(conn, ctask, r2t, left);
1675			tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
1676			tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
1677			goto send_hdr;
1678		}
1679
1680		/*
1681		 * Done with this R2T. Check if there are more
1682		 * outstanding R2Ts ready to be processed.
1683		 */
1684		spin_lock_bh(&session->lock);
1685		tcp_ctask->r2t = NULL;
1686		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
1687			    sizeof(void*));
1688		if (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t,
1689				sizeof(void*))) {
1690			tcp_ctask->r2t = r2t;
1691			tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
1692			tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
1693			spin_unlock_bh(&session->lock);
1694			goto send_hdr;
1695		}
1696		spin_unlock_bh(&session->lock);
1697	}
1698	return 0;
1699}
1700
1701static int
1702iscsi_tcp_ctask_xmit(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
1703{
1704	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1705	int rc = 0;
1706
1707	debug_scsi("ctask deq [cid %d xmstate %x itt 0x%x]\n",
1708		conn->id, tcp_ctask->xmstate, ctask->itt);
1709
1710	/*
1711	 * serialize with TMF AbortTask
1712	 */
1713	if (ctask->mtask)
1714		return rc;
1715
1716	if (tcp_ctask->xmstate & XMSTATE_R_HDR)
1717		return iscsi_send_read_hdr(conn, tcp_ctask);
1718
1719	if (tcp_ctask->xmstate & XMSTATE_W_HDR) {
1720		rc = iscsi_send_write_hdr(conn, ctask);
1721		if (rc)
1722			return rc;
1723	}
1724
1725	if (tcp_ctask->xmstate & XMSTATE_IMM_DATA) {
1726		rc = iscsi_send_data(ctask, &tcp_ctask->sendbuf, &tcp_ctask->sg,
1727				     &tcp_ctask->sent, &ctask->imm_count,
1728				     &tcp_ctask->immbuf, &tcp_ctask->immdigest);
1729		if (rc)
1730			return rc;
1731		tcp_ctask->xmstate &= ~XMSTATE_IMM_DATA;
1732	}
1733
1734	rc = iscsi_send_unsol_pdu(conn, ctask);
1735	if (rc)
1736		return rc;
1737
1738	rc = iscsi_send_sol_pdu(conn, ctask);
1739	if (rc)
1740		return rc;
1741
1742	return rc;
1743}
1744
1745static struct iscsi_cls_conn *
1746iscsi_tcp_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
1747{
1748	struct iscsi_conn *conn;
1749	struct iscsi_cls_conn *cls_conn;
1750	struct iscsi_tcp_conn *tcp_conn;
1751
1752	cls_conn = iscsi_conn_setup(cls_session, conn_idx);
1753	if (!cls_conn)
1754		return NULL;
1755	conn = cls_conn->dd_data;
1756	/*
1757	 * due to strange issues with iser these are not set
1758	 * in iscsi_conn_setup
1759	 */
1760	conn->max_recv_dlength = ISCSI_DEF_MAX_RECV_SEG_LEN;
1761
1762	tcp_conn = kzalloc(sizeof(*tcp_conn), GFP_KERNEL);
1763	if (!tcp_conn)
1764		goto tcp_conn_alloc_fail;
1765
1766	conn->dd_data = tcp_conn;
1767	tcp_conn->iscsi_conn = conn;
1768	tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
1769	/* initial operational parameters */
1770	tcp_conn->hdr_size = sizeof(struct iscsi_hdr);
1771
1772	tcp_conn->tx_hash.tfm = crypto_alloc_hash("crc32c", 0,
1773						  CRYPTO_ALG_ASYNC);
1774	tcp_conn->tx_hash.flags = 0;
1775	if (IS_ERR(tcp_conn->tx_hash.tfm)) {
1776		printk(KERN_ERR "Could not create connection due to crc32c "
1777		       "loading error %ld. Make sure the crc32c module is "
1778		       "built as a module or into the kernel\n",
1779			PTR_ERR(tcp_conn->tx_hash.tfm));
1780		goto free_tcp_conn;
1781	}
1782
1783	tcp_conn->rx_hash.tfm = crypto_alloc_hash("crc32c", 0,
1784						  CRYPTO_ALG_ASYNC);
1785	tcp_conn->rx_hash.flags = 0;
1786	if (IS_ERR(tcp_conn->rx_hash.tfm)) {
1787		printk(KERN_ERR "Could not create connection due to crc32c "
1788		       "loading error %ld. Make sure the crc32c module is "
1789		       "built as a module or into the kernel\n",
1790			PTR_ERR(tcp_conn->rx_hash.tfm));
1791		goto free_tx_tfm;
1792	}
1793
1794	return cls_conn;
1795
1796free_tx_tfm:
1797	crypto_free_hash(tcp_conn->tx_hash.tfm);
1798free_tcp_conn:
1799	kfree(tcp_conn);
1800tcp_conn_alloc_fail:
1801	iscsi_conn_teardown(cls_conn);
1802	return NULL;
1803}
1804
1805static void
1806iscsi_tcp_release_conn(struct iscsi_conn *conn)
1807{
1808	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1809
1810	if (!tcp_conn->sock)
1811		return;
1812
1813	sock_hold(tcp_conn->sock->sk);
1814	iscsi_conn_restore_callbacks(tcp_conn);
1815	sock_put(tcp_conn->sock->sk);
1816
1817	sock_release(tcp_conn->sock);
1818	tcp_conn->sock = NULL;
1819	conn->recv_lock = NULL;
1820}
1821
1822static void
1823iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
1824{
1825	struct iscsi_conn *conn = cls_conn->dd_data;
1826	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1827
1828	iscsi_tcp_release_conn(conn);
1829	iscsi_conn_teardown(cls_conn);
1830
1831	if (tcp_conn->tx_hash.tfm)
1832		crypto_free_hash(tcp_conn->tx_hash.tfm);
1833	if (tcp_conn->rx_hash.tfm)
1834		crypto_free_hash(tcp_conn->rx_hash.tfm);
1835
1836	kfree(tcp_conn);
1837}
1838
1839static void
1840iscsi_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
1841{
1842	struct iscsi_conn *conn = cls_conn->dd_data;
1843	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1844
1845	iscsi_conn_stop(cls_conn, flag);
1846	iscsi_tcp_release_conn(conn);
1847	tcp_conn->hdr_size = sizeof(struct iscsi_hdr);
1848}
1849
1850static int
1851iscsi_tcp_conn_bind(struct iscsi_cls_session *cls_session,
1852		    struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
1853		    int is_leading)
1854{
1855	struct iscsi_conn *conn = cls_conn->dd_data;
1856	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1857	struct sock *sk;
1858	struct socket *sock;
1859	int err;
1860
1861	/* lookup for existing socket */
1862	sock = sockfd_lookup((int)transport_eph, &err);
1863	if (!sock) {
1864		printk(KERN_ERR "iscsi_tcp: sockfd_lookup failed %d\n", err);
1865		return -EEXIST;
1866	}
1867
1868	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
1869	if (err)
1870		return err;
1871
1872	/* bind iSCSI connection and socket */
1873	tcp_conn->sock = sock;
1874
1875	/* setup Socket parameters */
1876	sk = sock->sk;
1877	sk->sk_reuse = 1;
1878	sk->sk_sndtimeo = 15 * HZ;
1879	sk->sk_allocation = GFP_ATOMIC;
1880
1881
1882	/*
1883	 * Intercept TCP callbacks for sendfile like receive
1884	 * processing.
1885	 */
1886	conn->recv_lock = &sk->sk_callback_lock;
1887	iscsi_conn_set_callbacks(conn);
1888	tcp_conn->sendpage = tcp_conn->sock->ops->sendpage;
1889	/*
1890	 * set receive state machine into initial state
1891	 */
1892	tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
1893
1894	return 0;
1895}
1896
1897/* called with host lock */
1898static void
1899iscsi_tcp_mgmt_init(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask,
1900		    char *data, uint32_t data_size)
1901{
1902	struct iscsi_tcp_mgmt_task *tcp_mtask = mtask->dd_data;
1903
1904	iscsi_buf_init_iov(&tcp_mtask->headbuf, (char*)mtask->hdr,
1905			   sizeof(struct iscsi_hdr));
1906	tcp_mtask->xmstate = XMSTATE_IMM_HDR;
1907	tcp_mtask->sent = 0;
1908
1909	if (mtask->data_count)
1910		iscsi_buf_init_iov(&tcp_mtask->sendbuf, (char*)mtask->data,
1911				    mtask->data_count);
1912}
1913
1914static int
1915iscsi_r2tpool_alloc(struct iscsi_session *session)
1916{
1917	int i;
1918	int cmd_i;
1919
1920	/*
1921	 * initialize per-task: R2T pool and xmit queue
1922	 */
1923	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
1924	        struct iscsi_cmd_task *ctask = session->cmds[cmd_i];
1925		struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1926
1927		/*
1928		 * pre-allocated x4 as much r2ts to handle race when
1929		 * target acks DataOut faster than we data_xmit() queues
1930		 * could replenish r2tqueue.
1931		 */
1932
1933		/* R2T pool */
1934		if (iscsi_pool_init(&tcp_ctask->r2tpool, session->max_r2t * 4,
1935				    (void***)&tcp_ctask->r2ts,
1936				    sizeof(struct iscsi_r2t_info))) {
1937			goto r2t_alloc_fail;
1938		}
1939
1940		/* R2T xmit queue */
1941		tcp_ctask->r2tqueue = kfifo_alloc(
1942		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL);
1943		if (tcp_ctask->r2tqueue == ERR_PTR(-ENOMEM)) {
1944			iscsi_pool_free(&tcp_ctask->r2tpool,
1945					(void**)tcp_ctask->r2ts);
1946			goto r2t_alloc_fail;
1947		}
1948	}
1949
1950	return 0;
1951
1952r2t_alloc_fail:
1953	for (i = 0; i < cmd_i; i++) {
1954		struct iscsi_cmd_task *ctask = session->cmds[i];
1955		struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1956
1957		kfifo_free(tcp_ctask->r2tqueue);
1958		iscsi_pool_free(&tcp_ctask->r2tpool,
1959				(void**)tcp_ctask->r2ts);
1960	}
1961	return -ENOMEM;
1962}
1963
1964static void
1965iscsi_r2tpool_free(struct iscsi_session *session)
1966{
1967	int i;
1968
1969	for (i = 0; i < session->cmds_max; i++) {
1970		struct iscsi_cmd_task *ctask = session->cmds[i];
1971		struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1972
1973		kfifo_free(tcp_ctask->r2tqueue);
1974		iscsi_pool_free(&tcp_ctask->r2tpool,
1975				(void**)tcp_ctask->r2ts);
1976	}
1977}
1978
1979static int
1980iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
1981		     char *buf, int buflen)
1982{
1983	struct iscsi_conn *conn = cls_conn->dd_data;
1984	struct iscsi_session *session = conn->session;
1985	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1986	int value;
1987
1988	switch(param) {
1989	case ISCSI_PARAM_HDRDGST_EN:
1990		iscsi_set_param(cls_conn, param, buf, buflen);
1991		tcp_conn->hdr_size = sizeof(struct iscsi_hdr);
1992		if (conn->hdrdgst_en)
1993			tcp_conn->hdr_size += sizeof(__u32);
1994		break;
1995	case ISCSI_PARAM_DATADGST_EN:
1996		iscsi_set_param(cls_conn, param, buf, buflen);
1997		tcp_conn->sendpage = conn->datadgst_en ?
1998			sock_no_sendpage : tcp_conn->sock->ops->sendpage;
1999		break;
2000	case ISCSI_PARAM_MAX_R2T:
2001		sscanf(buf, "%d", &value);
2002		if (session->max_r2t == roundup_pow_of_two(value))
2003			break;
2004		iscsi_r2tpool_free(session);
2005		iscsi_set_param(cls_conn, param, buf, buflen);
2006		if (session->max_r2t & (session->max_r2t - 1))
2007			session->max_r2t = roundup_pow_of_two(session->max_r2t);
2008		if (iscsi_r2tpool_alloc(session))
2009			return -ENOMEM;
2010		break;
2011	default:
2012		return iscsi_set_param(cls_conn, param, buf, buflen);
2013	}
2014
2015	return 0;
2016}
2017
2018static int
2019iscsi_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
2020			 enum iscsi_param param, char *buf)
2021{
2022	struct iscsi_conn *conn = cls_conn->dd_data;
2023	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
2024	struct inet_sock *inet;
2025	struct ipv6_pinfo *np;
2026	struct sock *sk;
2027	int len;
2028
2029	switch(param) {
2030	case ISCSI_PARAM_CONN_PORT:
2031		mutex_lock(&conn->xmitmutex);
2032		if (!tcp_conn->sock) {
2033			mutex_unlock(&conn->xmitmutex);
2034			return -EINVAL;
2035		}
2036
2037		inet = inet_sk(tcp_conn->sock->sk);
2038		len = sprintf(buf, "%hu\n", be16_to_cpu(inet->dport));
2039		mutex_unlock(&conn->xmitmutex);
2040		break;
2041	case ISCSI_PARAM_CONN_ADDRESS:
2042		mutex_lock(&conn->xmitmutex);
2043		if (!tcp_conn->sock) {
2044			mutex_unlock(&conn->xmitmutex);
2045			return -EINVAL;
2046		}
2047
2048		sk = tcp_conn->sock->sk;
2049		if (sk->sk_family == PF_INET) {
2050			inet = inet_sk(sk);
2051			len = sprintf(buf, NIPQUAD_FMT "\n",
2052				      NIPQUAD(inet->daddr));
2053		} else {
2054			np = inet6_sk(sk);
2055			len = sprintf(buf, NIP6_FMT "\n", NIP6(np->daddr));
2056		}
2057		mutex_unlock(&conn->xmitmutex);
2058		break;
2059	default:
2060		return iscsi_conn_get_param(cls_conn, param, buf);
2061	}
2062
2063	return len;
2064}
2065
2066static void
2067iscsi_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
2068{
2069	struct iscsi_conn *conn = cls_conn->dd_data;
2070	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
2071
2072	stats->txdata_octets = conn->txdata_octets;
2073	stats->rxdata_octets = conn->rxdata_octets;
2074	stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
2075	stats->dataout_pdus = conn->dataout_pdus_cnt;
2076	stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
2077	stats->datain_pdus = conn->datain_pdus_cnt;
2078	stats->r2t_pdus = conn->r2t_pdus_cnt;
2079	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
2080	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
2081	stats->custom_length = 3;
2082	strcpy(stats->custom[0].desc, "tx_sendpage_failures");
2083	stats->custom[0].value = tcp_conn->sendpage_failures_cnt;
2084	strcpy(stats->custom[1].desc, "rx_discontiguous_hdr");
2085	stats->custom[1].value = tcp_conn->discontiguous_hdr_cnt;
2086	strcpy(stats->custom[2].desc, "eh_abort_cnt");
2087	stats->custom[2].value = conn->eh_abort_cnt;
2088}
2089
2090static struct iscsi_cls_session *
2091iscsi_tcp_session_create(struct iscsi_transport *iscsit,
2092			 struct scsi_transport_template *scsit,
2093			 uint32_t initial_cmdsn, uint32_t *hostno)
2094{
2095	struct iscsi_cls_session *cls_session;
2096	struct iscsi_session *session;
2097	uint32_t hn;
2098	int cmd_i;
2099
2100	cls_session = iscsi_session_setup(iscsit, scsit,
2101					 sizeof(struct iscsi_tcp_cmd_task),
2102					 sizeof(struct iscsi_tcp_mgmt_task),
2103					 initial_cmdsn, &hn);
2104	if (!cls_session)
2105		return NULL;
2106	*hostno = hn;
2107
2108	session = class_to_transport_session(cls_session);
2109	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
2110		struct iscsi_cmd_task *ctask = session->cmds[cmd_i];
2111		struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
2112
2113		ctask->hdr = &tcp_ctask->hdr;
2114	}
2115
2116	for (cmd_i = 0; cmd_i < session->mgmtpool_max; cmd_i++) {
2117		struct iscsi_mgmt_task *mtask = session->mgmt_cmds[cmd_i];
2118		struct iscsi_tcp_mgmt_task *tcp_mtask = mtask->dd_data;
2119
2120		mtask->hdr = &tcp_mtask->hdr;
2121	}
2122
2123	if (iscsi_r2tpool_alloc(class_to_transport_session(cls_session)))
2124		goto r2tpool_alloc_fail;
2125
2126	return cls_session;
2127
2128r2tpool_alloc_fail:
2129	iscsi_session_teardown(cls_session);
2130	return NULL;
2131}
2132
2133static void iscsi_tcp_session_destroy(struct iscsi_cls_session *cls_session)
2134{
2135	iscsi_r2tpool_free(class_to_transport_session(cls_session));
2136	iscsi_session_teardown(cls_session);
2137}
2138
2139static struct scsi_host_template iscsi_sht = {
2140	.name			= "iSCSI Initiator over TCP/IP",
2141	.queuecommand           = iscsi_queuecommand,
2142	.change_queue_depth	= iscsi_change_queue_depth,
2143	.can_queue		= ISCSI_XMIT_CMDS_MAX - 1,
2144	.sg_tablesize		= ISCSI_SG_TABLESIZE,
2145	.max_sectors		= 0xFFFF,
2146	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,
2147	.eh_abort_handler       = iscsi_eh_abort,
2148	.eh_host_reset_handler	= iscsi_eh_host_reset,
2149	.use_clustering         = DISABLE_CLUSTERING,
2150	.proc_name		= "iscsi_tcp",
2151	.this_id		= -1,
2152};
2153
2154static struct iscsi_transport iscsi_tcp_transport = {
2155	.owner			= THIS_MODULE,
2156	.name			= "tcp",
2157	.caps			= CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
2158				  | CAP_DATADGST,
2159	.param_mask		= ISCSI_MAX_RECV_DLENGTH |
2160				  ISCSI_MAX_XMIT_DLENGTH |
2161				  ISCSI_HDRDGST_EN |
2162				  ISCSI_DATADGST_EN |
2163				  ISCSI_INITIAL_R2T_EN |
2164				  ISCSI_MAX_R2T |
2165				  ISCSI_IMM_DATA_EN |
2166				  ISCSI_FIRST_BURST |
2167				  ISCSI_MAX_BURST |
2168				  ISCSI_PDU_INORDER_EN |
2169				  ISCSI_DATASEQ_INORDER_EN |
2170				  ISCSI_ERL |
2171				  ISCSI_CONN_PORT |
2172				  ISCSI_CONN_ADDRESS |
2173				  ISCSI_EXP_STATSN |
2174				  ISCSI_PERSISTENT_PORT |
2175				  ISCSI_PERSISTENT_ADDRESS |
2176				  ISCSI_TARGET_NAME |
2177				  ISCSI_TPGT,
2178	.host_template		= &iscsi_sht,
2179	.conndata_size		= sizeof(struct iscsi_conn),
2180	.max_conn		= 1,
2181	.max_cmd_len		= ISCSI_TCP_MAX_CMD_LEN,
2182	/* session management */
2183	.create_session		= iscsi_tcp_session_create,
2184	.destroy_session	= iscsi_tcp_session_destroy,
2185	/* connection management */
2186	.create_conn		= iscsi_tcp_conn_create,
2187	.bind_conn		= iscsi_tcp_conn_bind,
2188	.destroy_conn		= iscsi_tcp_conn_destroy,
2189	.set_param		= iscsi_conn_set_param,
2190	.get_conn_param		= iscsi_tcp_conn_get_param,
2191	.get_session_param	= iscsi_session_get_param,
2192	.start_conn		= iscsi_conn_start,
2193	.stop_conn		= iscsi_tcp_conn_stop,
2194	/* IO */
2195	.send_pdu		= iscsi_conn_send_pdu,
2196	.get_stats		= iscsi_conn_get_stats,
2197	.init_cmd_task		= iscsi_tcp_cmd_init,
2198	.init_mgmt_task		= iscsi_tcp_mgmt_init,
2199	.xmit_cmd_task		= iscsi_tcp_ctask_xmit,
2200	.xmit_mgmt_task		= iscsi_tcp_mtask_xmit,
2201	.cleanup_cmd_task	= iscsi_tcp_cleanup_ctask,
2202	/* recovery */
2203	.session_recovery_timedout = iscsi_session_recovery_timedout,
2204};
2205
2206static int __init
2207iscsi_tcp_init(void)
2208{
2209	if (iscsi_max_lun < 1) {
2210		printk(KERN_ERR "iscsi_tcp: Invalid max_lun value of %u\n",
2211		       iscsi_max_lun);
2212		return -EINVAL;
2213	}
2214	iscsi_tcp_transport.max_lun = iscsi_max_lun;
2215
2216	if (!iscsi_register_transport(&iscsi_tcp_transport))
2217		return -ENODEV;
2218
2219	return 0;
2220}
2221
2222static void __exit
2223iscsi_tcp_exit(void)
2224{
2225	iscsi_unregister_transport(&iscsi_tcp_transport);
2226}
2227
2228module_init(iscsi_tcp_init);
2229module_exit(iscsi_tcp_exit);
2230