isc_soc.c revision 194990
1/*-
2 * Copyright (c) 2005-2008 Daniel Braniss <danny@cs.huji.ac.il>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27/*
28 | iSCSI
29 | $Id: isc_soc.c,v 1.26 2007/05/19 06:09:01 danny Exp danny $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/dev/iscsi/initiator/isc_soc.c 194990 2009-06-25 18:46:30Z kib $");
34
35#include "opt_iscsi_initiator.h"
36
37#include <sys/param.h>
38#include <sys/kernel.h>
39#include <sys/conf.h>
40#include <sys/systm.h>
41#include <sys/malloc.h>
42#include <sys/ctype.h>
43#include <sys/errno.h>
44#include <sys/sysctl.h>
45#include <sys/file.h>
46#include <sys/uio.h>
47#include <sys/socketvar.h>
48#include <sys/socket.h>
49#include <sys/protosw.h>
50#include <sys/proc.h>
51#include <sys/ioccom.h>
52#include <sys/queue.h>
53#include <sys/kthread.h>
54#include <sys/syslog.h>
55#include <sys/mbuf.h>
56#include <sys/user.h>
57
58#include <cam/cam.h>
59#include <cam/cam_ccb.h>
60
61#include <dev/iscsi/initiator/iscsi.h>
62#include <dev/iscsi/initiator/iscsivar.h>
63
64#ifndef NO_USE_MBUF
65#define USE_MBUF
66#endif
67
68#ifdef USE_MBUF
69
70static int ou_refcnt = 0;
71
72/*
73 | function for freeing external storage for mbuf
74 */
75static void
76ext_free(void *a, void *b)
77{
78     pduq_t *pq = b;
79
80     if(pq->buf != NULL) {
81	  debug(3, "ou_refcnt=%d a=%p b=%p", ou_refcnt, a, pq->buf);
82	  free(pq->buf, M_ISCSI);
83	  pq->buf = NULL;
84     }
85}
86
87int
88isc_sendPDU(isc_session_t *sp, pduq_t *pq)
89{
90     struct mbuf *mh, **mp;
91     pdu_t		*pp = &pq->pdu;
92     int		len, error;
93
94     debug_called(8);
95     /*
96      | mbuf for the iSCSI header
97      */
98     MGETHDR(mh, M_TRYWAIT, MT_DATA);
99     mh->m_len = mh->m_pkthdr.len = sizeof(union ipdu_u);
100     mh->m_pkthdr.rcvif = NULL;
101     MH_ALIGN(mh, sizeof(union ipdu_u));
102     bcopy(&pp->ipdu, mh->m_data, sizeof(union ipdu_u));
103     mh->m_next = NULL;
104
105     if(sp->hdrDigest)
106	  pq->pdu.hdr_dig = sp->hdrDigest(&pp->ipdu, sizeof(union ipdu_u), 0);
107     if(pp->ahs_len) {
108          /*
109	   | Add any AHS to the iSCSI hdr mbuf
110           |  XXX Assert: (mh->m_pkthdr.len + pp->ahs_len) < MHLEN
111	   */
112          bcopy(pp->ahs, (mh->m_data + mh->m_len), pp->ahs_len);
113          mh->m_len += pp->ahs_len;
114          mh->m_pkthdr.len += pp->ahs_len;
115
116	  if(sp->hdrDigest)
117	       pq->pdu.hdr_dig = sp->hdrDigest(&pp->ahs, pp->ahs_len, pq->pdu.hdr_dig);
118     }
119     if(sp->hdrDigest) {
120	  debug(2, "hdr_dig=%x", pq->pdu.hdr_dig);
121          /*
122	   | Add header digest to the iSCSI hdr mbuf
123	   | XXX Assert: (mh->m_pkthdr.len + 4) < MHLEN
124	   */
125          bcopy(&pp->hdr_dig, (mh->m_data + mh->m_len), sizeof(int));
126          mh->m_len += sizeof(int);
127          mh->m_pkthdr.len += sizeof(int);
128     }
129     mp = &mh->m_next;
130     if(pq->pdu.ds) {
131          struct mbuf   *md;
132          int           off = 0;
133
134          len = pp->ds_len;
135	  while(len & 03) // the specs say it must be int alligned
136	       len++;
137          while(len > 0) {
138                int       l;
139
140	       MGET(md, M_TRYWAIT, MT_DATA);
141	       md->m_ext.ref_cnt = &ou_refcnt;
142                l = min(MCLBYTES, len);
143	       debug(5, "setting ext_free(arg=%p len/l=%d/%d)", pq->buf, len, l);
144	       MEXTADD(md, pp->ds + off, l, ext_free, pp->ds + off, pq, 0, EXT_EXTREF);
145                md->m_len = l;
146                md->m_next = NULL;
147                mh->m_pkthdr.len += l;
148                *mp = md;
149                mp = &md->m_next;
150                len -= l;
151                off += l;
152          }
153     }
154     if(sp->dataDigest) {
155          struct mbuf   *me;
156
157	  pp->ds_dig = sp->dataDigest(pp->ds, pp->ds_len, 0);
158
159          MGET(me, M_TRYWAIT, MT_DATA);
160          me->m_len = sizeof(int);
161          MH_ALIGN(mh, sizeof(int));
162          bcopy(&pp->ds_dig, me->m_data, sizeof(int));
163          me->m_next = NULL;
164          mh->m_pkthdr.len += sizeof(int);
165          *mp = me;
166     }
167     if((error = sosend(sp->soc, NULL, NULL, mh, 0, 0, sp->td)) != 0) {
168	  sdebug(3, "error=%d", error);
169	  return error;
170     }
171     sp->stats.nsent++;
172     getbintime(&sp->stats.t_sent);
173     return 0;
174}
175#else /* NO_USE_MBUF */
176int
177isc_sendPDU(isc_session_t *sp, pduq_t *pq)
178{
179     struct uio *uio = &pq->uio;
180     struct iovec *iv;
181     pdu_t	*pp = &pq->pdu;
182     int	len, error;
183
184     debug_called(8);
185
186     bzero(uio, sizeof(struct uio));
187     uio->uio_rw = UIO_WRITE;
188     uio->uio_segflg = UIO_SYSSPACE;
189     uio->uio_td = sp->td;
190     uio->uio_iov = iv = pq->iov;
191
192     iv->iov_base = &pp->ipdu;
193     iv->iov_len = sizeof(union ipdu_u);
194     uio->uio_resid = pq->len;
195     iv++;
196     if(sp->hdrDigest)
197	  pq->pdu.hdr_dig = sp->hdrDigest(&pp->ipdu, sizeof(union ipdu_u), 0);
198     if(pp->ahs_len) {
199	  iv->iov_base = pp->ahs;
200	  iv->iov_len = pp->ahs_len;
201	  iv++;
202
203	  if(sp->hdrDigest)
204	       pq->pdu.hdr_dig = sp->hdrDigest(&pp->ahs, pp->ahs_len, pq->pdu.hdr_dig);
205     }
206     if(sp->hdrDigest) {
207	  debug(2, "hdr_dig=%x", pq->pdu.hdr_dig);
208	  iv->iov_base = &pp->hdr_dig;
209	  iv->iov_len = sizeof(int);
210	  iv++;
211     }
212     if(pq->pdu.ds) {
213	  iv->iov_base = pp->ds;
214	  iv->iov_len = pp->ds_len;
215	  while(iv->iov_len & 03) // the specs say it must be int alligned
216	       iv->iov_len++;
217	  iv++;
218     }
219     if(sp->dataDigest) {
220	  pp->ds_dig = sp->dataDigest(pp->ds, pp->ds_len, 0);
221	  iv->iov_base = &pp->ds_dig;
222	  iv->iov_len = sizeof(int);
223	  iv++;
224     }
225     uio->uio_iovcnt	= iv - pq->iov;
226     sdebug(5, "opcode=%x iovcnt=%d uio_resid=%d itt=%x",
227	    pp->ipdu.bhs.opcode, uio->uio_iovcnt, uio->uio_resid,
228	    ntohl(pp->ipdu.bhs.itt));
229     sdebug(5, "sp=%p sp->soc=%p uio=%p sp->td=%p",
230	    sp, sp->soc, uio, sp->td);
231     do {
232	  len = uio->uio_resid;
233	  error = sosend(sp->soc, NULL, uio, 0, 0, 0, sp->td);
234	  if(uio->uio_resid == 0 || error || len == uio->uio_resid) {
235	       if(uio->uio_resid) {
236		    sdebug(2, "uio->uio_resid=%d uio->uio_iovcnt=%d error=%d len=%d",
237			   uio->uio_resid, uio->uio_iovcnt, error, len);
238		    if(error == 0)
239			 error = EAGAIN; // 35
240	       }
241	       break;
242	  }
243	  /*
244	   | XXX: untested code
245	   */
246	  sdebug(1, "uio->uio_resid=%d uio->uio_iovcnt=%d",
247		uio->uio_resid, uio->uio_iovcnt);
248	  iv = uio->uio_iov;
249	  len -= uio->uio_resid;
250	  while(uio->uio_iovcnt > 0) {
251	       if(iv->iov_len > len) {
252		    caddr_t	bp = (caddr_t)iv->iov_base;
253
254		    iv->iov_len -= len;
255		    iv->iov_base = (void *)&bp[len];
256		    break;
257	       }
258	       len -= iv->iov_len;
259	       uio->uio_iovcnt--;
260	       uio->uio_iov++;
261	       iv++;
262	  }
263     } while(uio->uio_resid);
264
265     if(error == 0) {
266	  sp->stats.nsent++;
267	  getbintime(&sp->stats.t_sent);
268
269     }
270
271     return error;
272}
273#endif /* USE_MBUF */
274
275/*
276 | wait till a PDU header is received
277 | from the socket.
278 */
279/*
280   The format of the BHS is:
281
282   Byte/     0       |       1       |       2       |       3       |
283      /              |               |               |               |
284     |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|
285     +---------------+---------------+---------------+---------------+
286    0|.|I| Opcode    |F|  Opcode-specific fields                     |
287     +---------------+---------------+---------------+---------------+
288    4|TotalAHSLength | DataSegmentLength                             |
289     +---------------+---------------+---------------+---------------+
290    8| LUN or Opcode-specific fields                                 |
291     +                                                               +
292   12|                                                               |
293     +---------------+---------------+---------------+---------------+
294   16| Initiator Task Tag                                            |
295     +---------------+---------------+---------------+---------------+
296   20/ Opcode-specific fields                                        /
297    +/                                                               /
298     +---------------+---------------+---------------+---------------+
299   48
300 */
301static __inline int
302so_getbhs(isc_session_t *sp)
303{
304     bhs_t *bhs		= &sp->bhs;
305     struct uio		*uio = &sp->uio;
306     struct iovec	*iov = &sp->iov;
307     int		error, flags;
308
309     debug_called(8);
310
311     iov->iov_base	= bhs;
312     iov->iov_len	= sizeof(bhs_t);
313
314     uio->uio_iov	= iov;
315     uio->uio_iovcnt	= 1;
316     uio->uio_rw	= UIO_READ;
317     uio->uio_segflg	= UIO_SYSSPACE;
318     uio->uio_td	= curthread; // why ...
319     uio->uio_resid	= sizeof(bhs_t);
320
321     flags = MSG_WAITALL;
322     error = soreceive(sp->soc, NULL, uio, 0, 0, &flags);
323
324     if(error)
325	  debug(2, "error=%d so_error=%d uio->uio_resid=%zd iov.iov_len=%zd",
326		error,
327		sp->soc->so_error, uio->uio_resid, iov->iov_len);
328     if(!error && (uio->uio_resid > 0)) {
329	  error = EPIPE; // was EAGAIN
330	  debug(2, "error=%d so_error=%d uio->uio_resid=%zd iov.iov_len=%zd so_state=%x",
331		error,
332		sp->soc->so_error, uio->uio_resid, iov->iov_len, sp->soc->so_state);
333     }
334
335     return error;
336}
337
338/*
339 | so_recv gets called when there is at least
340 | an iSCSI header in the queue
341 */
342static int
343so_recv(isc_session_t *sp, pduq_t *pq)
344{
345     struct socket	*so = sp->soc;
346     sn_t		*sn = &sp->sn;
347     struct uio		*uio = &pq->uio;
348     pdu_t		*pp;
349     int		error;
350     size_t		n, len;
351     bhs_t		*bhs;
352     u_int		max, exp;
353
354     debug_called(8);
355     /*
356      | now calculate how much data should be in the buffer
357      | NOTE: digest is not verified/calculated - yet
358      */
359     pp = &pq->pdu;
360     bhs = &pp->ipdu.bhs;
361
362     len = 0;
363     if(bhs->AHSLength) {
364	  pp->ahs_len = bhs->AHSLength * 4;
365	  len += pp->ahs_len;
366     }
367     if(sp->hdrDigest)
368	  len += 4;
369     if(bhs->DSLength) {
370	  n = bhs->DSLength;
371#if BYTE_ORDER == LITTLE_ENDIAN
372	  pp->ds_len = ((n & 0x00ff0000) >> 16)
373	       | (n & 0x0000ff00)
374	       | ((n & 0x000000ff) << 16);
375#else
376	  pp->ds_len = n;
377#endif
378	  len += pp->ds_len;
379	  while(len & 03)
380	       len++;
381	  if(sp->dataDigest)
382	       len += 4;
383     }
384
385     if((sp->opt.maxRecvDataSegmentLength > 0) && (len > sp->opt.maxRecvDataSegmentLength)) {
386#if 0
387	  xdebug("impossible PDU length(%d) opt.maxRecvDataSegmentLength=%d",
388		 len, sp->opt.maxRecvDataSegmentLength);
389	  // deep trouble here, probably all we can do is
390	  // force a disconnect, XXX: check RFC ...
391	  log(LOG_ERR,
392	      "so_recv: impossible PDU length(%ld) from iSCSI %s/%s\n",
393	      len, sp->opt.targetAddress, sp->opt.targetName);
394#endif
395	  /*
396	   | XXX: this will really screwup the stream.
397	   | should clear up the buffer till a valid header
398	   | is found, or just close connection ...
399	   | should read the RFC.
400	   */
401	  error = E2BIG;
402	  goto out;
403     }
404     if(len) {
405	  int	flags = MSG_WAITALL;
406	  struct mbuf **mp;
407
408	  mp = &pq->mp;
409
410	  uio->uio_resid = len;
411	  uio->uio_td = curthread; // why ...
412	  if(sp->douio) {
413	       // it's more efficient to use mbufs -- why?
414	       if(bhs->opcode == ISCSI_READ_DATA) {
415		    pduq_t	*opq;
416
417		    opq = i_search_hld(sp, pq->pdu.ipdu.bhs.itt, 1);
418		    if(opq != NULL) {
419			 union ccb *ccb 		= opq->ccb;
420			 struct ccb_scsiio *csio	= &ccb->csio;
421			 pdu_t *opp			= &opq->pdu;
422			 scsi_req_t *cmd		= &opp->ipdu.scsi_req;
423			 data_in_t *rcmd		= &pq->pdu.ipdu.data_in;
424			 bhs_t *bhp			= &opp->ipdu.bhs;
425			 int	r;
426
427			 if(bhp->opcode == ISCSI_SCSI_CMD
428			    && cmd->R
429			    && (ntohl(cmd->edtlen) >= pq->pdu.ds_len)) {
430			      struct iovec *iov = pq->iov;
431			      iov->iov_base = csio->data_ptr + ntohl(rcmd->bo);
432			      iov->iov_len = pq->pdu.ds_len;
433
434			      uio->uio_rw = UIO_READ;
435			      uio->uio_segflg = UIO_SYSSPACE;
436			      uio->uio_iov = iov;
437			      uio->uio_iovcnt = 1;
438			      if(len > pq->pdu.ds_len) {
439				   pq->iov[1].iov_base = &r;
440				   pq->iov[1].iov_len = len - pq->pdu.ds_len;
441				   uio->uio_iovcnt++;
442			      }
443			      mp = NULL;
444
445			      sdebug(4, "uio_resid=0x%zx itt=0x%x bp=%p bo=%x len=%x/%x",
446				     uio->uio_resid,
447				     ntohl(pq->pdu.ipdu.bhs.itt),
448				     csio->data_ptr, ntohl(rcmd->bo), ntohl(cmd->edtlen), pq->pdu.ds_len);
449			 }
450		    }
451	       }
452	  }
453	  error = soreceive(so, NULL, uio, mp, NULL, &flags);
454	  //if(error == EAGAIN)
455	  // XXX: this needs work! it hangs iscontrol
456	  if(error || uio->uio_resid)
457	       goto out;
458     }
459     pq->len += len;
460     sdebug(6, "len=%d] opcode=0x%x ahs_len=0x%x ds_len=0x%x",
461	    pq->len, bhs->opcode, pp->ahs_len, pp->ds_len);
462
463     max = ntohl(bhs->MaxCmdSN);
464     exp = ntohl(bhs->ExpStSN);
465
466     if(max < exp - 1 &&
467	max > exp - _MAXINCR) {
468	  sdebug(2,  "bad cmd window size");
469	  error = EIO; // XXX: for now;
470	  goto out; // error
471     }
472
473     if(SNA_GT(max, sn->maxCmd))
474	  sn->maxCmd = max;
475
476     if(SNA_GT(exp, sn->expCmd))
477	  sn->expCmd = exp;
478
479     sp->cws = sn->maxCmd - sn->expCmd + 1;
480
481     return 0;
482
483 out:
484     // XXX: need some work here
485     xdebug("have a problem, error=%d", error);
486     pdu_free(sp->isc, pq);
487     if(!error && uio->uio_resid > 0)
488	  error = EPIPE;
489     return error;
490}
491
492/*
493 | wait for something to arrive.
494 | and if the pdu is without errors, process it.
495 */
496static int
497so_input(isc_session_t *sp)
498{
499     pduq_t		*pq;
500     int		error;
501
502     debug_called(8);
503     /*
504      | first read in the iSCSI header
505      */
506     error = so_getbhs(sp);
507     if(error == 0) {
508	  /*
509	   | now read the rest.
510	   */
511	  pq = pdu_alloc(sp->isc, M_NOWAIT);
512	  if(pq == NULL) { // XXX: might cause a deadlock ...
513	       debug(3, "out of pdus, wait");
514	       pq = pdu_alloc(sp->isc, M_NOWAIT);  // OK to WAIT
515	  }
516	  pq->pdu.ipdu.bhs = sp->bhs;
517	  pq->len = sizeof(bhs_t);	// so far only the header was read
518	  error = so_recv(sp, pq);
519	  if(error != 0) {
520	       error += 0x800; // XXX: just to see the error.
521	       // terminal error
522	       // XXX: close connection and exit
523	  }
524	  else {
525	       sp->stats.nrecv++;
526	       getbintime(&sp->stats.t_recv);
527	       ism_recv(sp, pq);
528	  }
529     }
530     return error;
531}
532
533/*
534 | one per active (connected) session.
535 | this thread is responsible for reading
536 | in packets from the target.
537 */
538static void
539isc_soc(void *vp)
540{
541     isc_session_t	*sp = (isc_session_t *)vp;
542     struct socket	*so = sp->soc;
543     int		error;
544
545     debug_called(8);
546
547     sp->flags |= ISC_CON_RUNNING;
548     if(sp->cam_path)
549	  ic_release(sp);
550
551     error = 0;
552     while((sp->flags & (ISC_CON_RUN | ISC_LINK_UP)) == (ISC_CON_RUN | ISC_LINK_UP)) {
553	  // XXX: hunting ...
554	  if(sp->soc == NULL || !(so->so_state & SS_ISCONNECTED)) {
555	       debug(2, "sp->soc=%p", sp->soc);
556	       break;
557	  }
558	  error = so_input(sp);
559	  if(error == 0) {
560	       mtx_lock(&sp->io_mtx);
561	       if(sp->flags & ISC_OWAITING) {
562	       wakeup(&sp->flags);
563	       }
564	       mtx_unlock(&sp->io_mtx);
565	  } else if(error == EPIPE) {
566	       break;
567	  }
568	  else if(error == EAGAIN) {
569	       if(so->so_state & SS_ISCONNECTED)
570		    // there seems to be a problem in 6.0 ...
571		    tsleep(sp, PRIBIO, "isc_soc", 2*hz);
572	  }
573     }
574     sdebug(2, "terminated, flags=%x so_count=%d so_state=%x error=%d proc=%p",
575	    sp->flags, so->so_count, so->so_state, error, sp->proc);
576     if((sp->proc != NULL) && sp->signal) {
577	  PROC_LOCK(sp->proc);
578	  psignal(sp->proc, sp->signal);
579	  PROC_UNLOCK(sp->proc);
580	  sp->flags |= ISC_SIGNALED;
581	  sdebug(2, "pid=%d signaled(%d)", sp->proc->p_pid, sp->signal);
582     }
583     else {
584	  // we have to do something ourselves
585	  // like closing this session ...
586     }
587     /*
588      | we've been terminated
589      */
590     // do we need this mutex ...?
591     mtx_lock(&sp->io_mtx);
592     sp->flags &= ~(ISC_CON_RUNNING | ISC_LINK_UP);
593     wakeup(&sp->soc);
594     mtx_unlock(&sp->io_mtx);
595
596     sdebug(2, "dropped ISC_CON_RUNNING");
597
598     kproc_exit(0);
599}
600
601void
602isc_stop_receiver(isc_session_t *sp)
603{
604     int	n;
605
606     debug_called(8);
607     sdebug(3, "sp=%p sp->soc=%p", sp, sp? sp->soc: 0);
608     mtx_lock(&sp->io_mtx);
609     sp->flags &= ~ISC_LINK_UP;
610     msleep(&sp->soc, &sp->io_mtx, PRIBIO|PDROP, "isc_stpc", 5*hz);
611
612     soshutdown(sp->soc, SHUT_RD);
613
614     mtx_lock(&sp->io_mtx);
615     sdebug(3, "soshutdown");
616     sp->flags &= ~ISC_CON_RUN;
617     n = 2;
618     while(n-- && (sp->flags & ISC_CON_RUNNING)) {
619	  sdebug(3, "waiting n=%d... flags=%x", n, sp->flags);
620	  msleep(&sp->soc, &sp->io_mtx, PRIBIO, "isc_stpc", 5*hz);
621     }
622     mtx_unlock(&sp->io_mtx);
623
624
625     if(sp->fp != NULL)
626	  fdrop(sp->fp, sp->td);
627     fputsock(sp->soc);
628     sp->soc = NULL;
629     sp->fp = NULL;
630
631     sdebug(3, "done");
632}
633
634void
635isc_start_receiver(isc_session_t *sp)
636{
637     debug_called(8);
638
639     sp->flags |= ISC_CON_RUN | ISC_LINK_UP;
640
641     kproc_create(isc_soc, sp, &sp->soc_proc, 0, 0, "iscsi%d", sp->sid);
642}
643