isc_soc.c revision 194990
1/*- 2 * Copyright (c) 2005-2008 Daniel Braniss <danny@cs.huji.ac.il> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 */ 27/* 28 | iSCSI 29 | $Id: isc_soc.c,v 1.26 2007/05/19 06:09:01 danny Exp danny $ 30 */ 31 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: head/sys/dev/iscsi/initiator/isc_soc.c 194990 2009-06-25 18:46:30Z kib $"); 34 35#include "opt_iscsi_initiator.h" 36 37#include <sys/param.h> 38#include <sys/kernel.h> 39#include <sys/conf.h> 40#include <sys/systm.h> 41#include <sys/malloc.h> 42#include <sys/ctype.h> 43#include <sys/errno.h> 44#include <sys/sysctl.h> 45#include <sys/file.h> 46#include <sys/uio.h> 47#include <sys/socketvar.h> 48#include <sys/socket.h> 49#include <sys/protosw.h> 50#include <sys/proc.h> 51#include <sys/ioccom.h> 52#include <sys/queue.h> 53#include <sys/kthread.h> 54#include <sys/syslog.h> 55#include <sys/mbuf.h> 56#include <sys/user.h> 57 58#include <cam/cam.h> 59#include <cam/cam_ccb.h> 60 61#include <dev/iscsi/initiator/iscsi.h> 62#include <dev/iscsi/initiator/iscsivar.h> 63 64#ifndef NO_USE_MBUF 65#define USE_MBUF 66#endif 67 68#ifdef USE_MBUF 69 70static int ou_refcnt = 0; 71 72/* 73 | function for freeing external storage for mbuf 74 */ 75static void 76ext_free(void *a, void *b) 77{ 78 pduq_t *pq = b; 79 80 if(pq->buf != NULL) { 81 debug(3, "ou_refcnt=%d a=%p b=%p", ou_refcnt, a, pq->buf); 82 free(pq->buf, M_ISCSI); 83 pq->buf = NULL; 84 } 85} 86 87int 88isc_sendPDU(isc_session_t *sp, pduq_t *pq) 89{ 90 struct mbuf *mh, **mp; 91 pdu_t *pp = &pq->pdu; 92 int len, error; 93 94 debug_called(8); 95 /* 96 | mbuf for the iSCSI header 97 */ 98 MGETHDR(mh, M_TRYWAIT, MT_DATA); 99 mh->m_len = mh->m_pkthdr.len = sizeof(union ipdu_u); 100 mh->m_pkthdr.rcvif = NULL; 101 MH_ALIGN(mh, sizeof(union ipdu_u)); 102 bcopy(&pp->ipdu, mh->m_data, sizeof(union ipdu_u)); 103 mh->m_next = NULL; 104 105 if(sp->hdrDigest) 106 pq->pdu.hdr_dig = sp->hdrDigest(&pp->ipdu, sizeof(union ipdu_u), 0); 107 if(pp->ahs_len) { 108 /* 109 | Add any AHS to the iSCSI hdr mbuf 110 | XXX Assert: (mh->m_pkthdr.len + pp->ahs_len) < MHLEN 111 */ 112 bcopy(pp->ahs, (mh->m_data + mh->m_len), pp->ahs_len); 113 mh->m_len += pp->ahs_len; 114 mh->m_pkthdr.len += pp->ahs_len; 115 116 if(sp->hdrDigest) 117 pq->pdu.hdr_dig = sp->hdrDigest(&pp->ahs, pp->ahs_len, pq->pdu.hdr_dig); 118 } 119 if(sp->hdrDigest) { 120 debug(2, "hdr_dig=%x", pq->pdu.hdr_dig); 121 /* 122 | Add header digest to the iSCSI hdr mbuf 123 | XXX Assert: (mh->m_pkthdr.len + 4) < MHLEN 124 */ 125 bcopy(&pp->hdr_dig, (mh->m_data + mh->m_len), sizeof(int)); 126 mh->m_len += sizeof(int); 127 mh->m_pkthdr.len += sizeof(int); 128 } 129 mp = &mh->m_next; 130 if(pq->pdu.ds) { 131 struct mbuf *md; 132 int off = 0; 133 134 len = pp->ds_len; 135 while(len & 03) // the specs say it must be int alligned 136 len++; 137 while(len > 0) { 138 int l; 139 140 MGET(md, M_TRYWAIT, MT_DATA); 141 md->m_ext.ref_cnt = &ou_refcnt; 142 l = min(MCLBYTES, len); 143 debug(5, "setting ext_free(arg=%p len/l=%d/%d)", pq->buf, len, l); 144 MEXTADD(md, pp->ds + off, l, ext_free, pp->ds + off, pq, 0, EXT_EXTREF); 145 md->m_len = l; 146 md->m_next = NULL; 147 mh->m_pkthdr.len += l; 148 *mp = md; 149 mp = &md->m_next; 150 len -= l; 151 off += l; 152 } 153 } 154 if(sp->dataDigest) { 155 struct mbuf *me; 156 157 pp->ds_dig = sp->dataDigest(pp->ds, pp->ds_len, 0); 158 159 MGET(me, M_TRYWAIT, MT_DATA); 160 me->m_len = sizeof(int); 161 MH_ALIGN(mh, sizeof(int)); 162 bcopy(&pp->ds_dig, me->m_data, sizeof(int)); 163 me->m_next = NULL; 164 mh->m_pkthdr.len += sizeof(int); 165 *mp = me; 166 } 167 if((error = sosend(sp->soc, NULL, NULL, mh, 0, 0, sp->td)) != 0) { 168 sdebug(3, "error=%d", error); 169 return error; 170 } 171 sp->stats.nsent++; 172 getbintime(&sp->stats.t_sent); 173 return 0; 174} 175#else /* NO_USE_MBUF */ 176int 177isc_sendPDU(isc_session_t *sp, pduq_t *pq) 178{ 179 struct uio *uio = &pq->uio; 180 struct iovec *iv; 181 pdu_t *pp = &pq->pdu; 182 int len, error; 183 184 debug_called(8); 185 186 bzero(uio, sizeof(struct uio)); 187 uio->uio_rw = UIO_WRITE; 188 uio->uio_segflg = UIO_SYSSPACE; 189 uio->uio_td = sp->td; 190 uio->uio_iov = iv = pq->iov; 191 192 iv->iov_base = &pp->ipdu; 193 iv->iov_len = sizeof(union ipdu_u); 194 uio->uio_resid = pq->len; 195 iv++; 196 if(sp->hdrDigest) 197 pq->pdu.hdr_dig = sp->hdrDigest(&pp->ipdu, sizeof(union ipdu_u), 0); 198 if(pp->ahs_len) { 199 iv->iov_base = pp->ahs; 200 iv->iov_len = pp->ahs_len; 201 iv++; 202 203 if(sp->hdrDigest) 204 pq->pdu.hdr_dig = sp->hdrDigest(&pp->ahs, pp->ahs_len, pq->pdu.hdr_dig); 205 } 206 if(sp->hdrDigest) { 207 debug(2, "hdr_dig=%x", pq->pdu.hdr_dig); 208 iv->iov_base = &pp->hdr_dig; 209 iv->iov_len = sizeof(int); 210 iv++; 211 } 212 if(pq->pdu.ds) { 213 iv->iov_base = pp->ds; 214 iv->iov_len = pp->ds_len; 215 while(iv->iov_len & 03) // the specs say it must be int alligned 216 iv->iov_len++; 217 iv++; 218 } 219 if(sp->dataDigest) { 220 pp->ds_dig = sp->dataDigest(pp->ds, pp->ds_len, 0); 221 iv->iov_base = &pp->ds_dig; 222 iv->iov_len = sizeof(int); 223 iv++; 224 } 225 uio->uio_iovcnt = iv - pq->iov; 226 sdebug(5, "opcode=%x iovcnt=%d uio_resid=%d itt=%x", 227 pp->ipdu.bhs.opcode, uio->uio_iovcnt, uio->uio_resid, 228 ntohl(pp->ipdu.bhs.itt)); 229 sdebug(5, "sp=%p sp->soc=%p uio=%p sp->td=%p", 230 sp, sp->soc, uio, sp->td); 231 do { 232 len = uio->uio_resid; 233 error = sosend(sp->soc, NULL, uio, 0, 0, 0, sp->td); 234 if(uio->uio_resid == 0 || error || len == uio->uio_resid) { 235 if(uio->uio_resid) { 236 sdebug(2, "uio->uio_resid=%d uio->uio_iovcnt=%d error=%d len=%d", 237 uio->uio_resid, uio->uio_iovcnt, error, len); 238 if(error == 0) 239 error = EAGAIN; // 35 240 } 241 break; 242 } 243 /* 244 | XXX: untested code 245 */ 246 sdebug(1, "uio->uio_resid=%d uio->uio_iovcnt=%d", 247 uio->uio_resid, uio->uio_iovcnt); 248 iv = uio->uio_iov; 249 len -= uio->uio_resid; 250 while(uio->uio_iovcnt > 0) { 251 if(iv->iov_len > len) { 252 caddr_t bp = (caddr_t)iv->iov_base; 253 254 iv->iov_len -= len; 255 iv->iov_base = (void *)&bp[len]; 256 break; 257 } 258 len -= iv->iov_len; 259 uio->uio_iovcnt--; 260 uio->uio_iov++; 261 iv++; 262 } 263 } while(uio->uio_resid); 264 265 if(error == 0) { 266 sp->stats.nsent++; 267 getbintime(&sp->stats.t_sent); 268 269 } 270 271 return error; 272} 273#endif /* USE_MBUF */ 274 275/* 276 | wait till a PDU header is received 277 | from the socket. 278 */ 279/* 280 The format of the BHS is: 281 282 Byte/ 0 | 1 | 2 | 3 | 283 / | | | | 284 |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| 285 +---------------+---------------+---------------+---------------+ 286 0|.|I| Opcode |F| Opcode-specific fields | 287 +---------------+---------------+---------------+---------------+ 288 4|TotalAHSLength | DataSegmentLength | 289 +---------------+---------------+---------------+---------------+ 290 8| LUN or Opcode-specific fields | 291 + + 292 12| | 293 +---------------+---------------+---------------+---------------+ 294 16| Initiator Task Tag | 295 +---------------+---------------+---------------+---------------+ 296 20/ Opcode-specific fields / 297 +/ / 298 +---------------+---------------+---------------+---------------+ 299 48 300 */ 301static __inline int 302so_getbhs(isc_session_t *sp) 303{ 304 bhs_t *bhs = &sp->bhs; 305 struct uio *uio = &sp->uio; 306 struct iovec *iov = &sp->iov; 307 int error, flags; 308 309 debug_called(8); 310 311 iov->iov_base = bhs; 312 iov->iov_len = sizeof(bhs_t); 313 314 uio->uio_iov = iov; 315 uio->uio_iovcnt = 1; 316 uio->uio_rw = UIO_READ; 317 uio->uio_segflg = UIO_SYSSPACE; 318 uio->uio_td = curthread; // why ... 319 uio->uio_resid = sizeof(bhs_t); 320 321 flags = MSG_WAITALL; 322 error = soreceive(sp->soc, NULL, uio, 0, 0, &flags); 323 324 if(error) 325 debug(2, "error=%d so_error=%d uio->uio_resid=%zd iov.iov_len=%zd", 326 error, 327 sp->soc->so_error, uio->uio_resid, iov->iov_len); 328 if(!error && (uio->uio_resid > 0)) { 329 error = EPIPE; // was EAGAIN 330 debug(2, "error=%d so_error=%d uio->uio_resid=%zd iov.iov_len=%zd so_state=%x", 331 error, 332 sp->soc->so_error, uio->uio_resid, iov->iov_len, sp->soc->so_state); 333 } 334 335 return error; 336} 337 338/* 339 | so_recv gets called when there is at least 340 | an iSCSI header in the queue 341 */ 342static int 343so_recv(isc_session_t *sp, pduq_t *pq) 344{ 345 struct socket *so = sp->soc; 346 sn_t *sn = &sp->sn; 347 struct uio *uio = &pq->uio; 348 pdu_t *pp; 349 int error; 350 size_t n, len; 351 bhs_t *bhs; 352 u_int max, exp; 353 354 debug_called(8); 355 /* 356 | now calculate how much data should be in the buffer 357 | NOTE: digest is not verified/calculated - yet 358 */ 359 pp = &pq->pdu; 360 bhs = &pp->ipdu.bhs; 361 362 len = 0; 363 if(bhs->AHSLength) { 364 pp->ahs_len = bhs->AHSLength * 4; 365 len += pp->ahs_len; 366 } 367 if(sp->hdrDigest) 368 len += 4; 369 if(bhs->DSLength) { 370 n = bhs->DSLength; 371#if BYTE_ORDER == LITTLE_ENDIAN 372 pp->ds_len = ((n & 0x00ff0000) >> 16) 373 | (n & 0x0000ff00) 374 | ((n & 0x000000ff) << 16); 375#else 376 pp->ds_len = n; 377#endif 378 len += pp->ds_len; 379 while(len & 03) 380 len++; 381 if(sp->dataDigest) 382 len += 4; 383 } 384 385 if((sp->opt.maxRecvDataSegmentLength > 0) && (len > sp->opt.maxRecvDataSegmentLength)) { 386#if 0 387 xdebug("impossible PDU length(%d) opt.maxRecvDataSegmentLength=%d", 388 len, sp->opt.maxRecvDataSegmentLength); 389 // deep trouble here, probably all we can do is 390 // force a disconnect, XXX: check RFC ... 391 log(LOG_ERR, 392 "so_recv: impossible PDU length(%ld) from iSCSI %s/%s\n", 393 len, sp->opt.targetAddress, sp->opt.targetName); 394#endif 395 /* 396 | XXX: this will really screwup the stream. 397 | should clear up the buffer till a valid header 398 | is found, or just close connection ... 399 | should read the RFC. 400 */ 401 error = E2BIG; 402 goto out; 403 } 404 if(len) { 405 int flags = MSG_WAITALL; 406 struct mbuf **mp; 407 408 mp = &pq->mp; 409 410 uio->uio_resid = len; 411 uio->uio_td = curthread; // why ... 412 if(sp->douio) { 413 // it's more efficient to use mbufs -- why? 414 if(bhs->opcode == ISCSI_READ_DATA) { 415 pduq_t *opq; 416 417 opq = i_search_hld(sp, pq->pdu.ipdu.bhs.itt, 1); 418 if(opq != NULL) { 419 union ccb *ccb = opq->ccb; 420 struct ccb_scsiio *csio = &ccb->csio; 421 pdu_t *opp = &opq->pdu; 422 scsi_req_t *cmd = &opp->ipdu.scsi_req; 423 data_in_t *rcmd = &pq->pdu.ipdu.data_in; 424 bhs_t *bhp = &opp->ipdu.bhs; 425 int r; 426 427 if(bhp->opcode == ISCSI_SCSI_CMD 428 && cmd->R 429 && (ntohl(cmd->edtlen) >= pq->pdu.ds_len)) { 430 struct iovec *iov = pq->iov; 431 iov->iov_base = csio->data_ptr + ntohl(rcmd->bo); 432 iov->iov_len = pq->pdu.ds_len; 433 434 uio->uio_rw = UIO_READ; 435 uio->uio_segflg = UIO_SYSSPACE; 436 uio->uio_iov = iov; 437 uio->uio_iovcnt = 1; 438 if(len > pq->pdu.ds_len) { 439 pq->iov[1].iov_base = &r; 440 pq->iov[1].iov_len = len - pq->pdu.ds_len; 441 uio->uio_iovcnt++; 442 } 443 mp = NULL; 444 445 sdebug(4, "uio_resid=0x%zx itt=0x%x bp=%p bo=%x len=%x/%x", 446 uio->uio_resid, 447 ntohl(pq->pdu.ipdu.bhs.itt), 448 csio->data_ptr, ntohl(rcmd->bo), ntohl(cmd->edtlen), pq->pdu.ds_len); 449 } 450 } 451 } 452 } 453 error = soreceive(so, NULL, uio, mp, NULL, &flags); 454 //if(error == EAGAIN) 455 // XXX: this needs work! it hangs iscontrol 456 if(error || uio->uio_resid) 457 goto out; 458 } 459 pq->len += len; 460 sdebug(6, "len=%d] opcode=0x%x ahs_len=0x%x ds_len=0x%x", 461 pq->len, bhs->opcode, pp->ahs_len, pp->ds_len); 462 463 max = ntohl(bhs->MaxCmdSN); 464 exp = ntohl(bhs->ExpStSN); 465 466 if(max < exp - 1 && 467 max > exp - _MAXINCR) { 468 sdebug(2, "bad cmd window size"); 469 error = EIO; // XXX: for now; 470 goto out; // error 471 } 472 473 if(SNA_GT(max, sn->maxCmd)) 474 sn->maxCmd = max; 475 476 if(SNA_GT(exp, sn->expCmd)) 477 sn->expCmd = exp; 478 479 sp->cws = sn->maxCmd - sn->expCmd + 1; 480 481 return 0; 482 483 out: 484 // XXX: need some work here 485 xdebug("have a problem, error=%d", error); 486 pdu_free(sp->isc, pq); 487 if(!error && uio->uio_resid > 0) 488 error = EPIPE; 489 return error; 490} 491 492/* 493 | wait for something to arrive. 494 | and if the pdu is without errors, process it. 495 */ 496static int 497so_input(isc_session_t *sp) 498{ 499 pduq_t *pq; 500 int error; 501 502 debug_called(8); 503 /* 504 | first read in the iSCSI header 505 */ 506 error = so_getbhs(sp); 507 if(error == 0) { 508 /* 509 | now read the rest. 510 */ 511 pq = pdu_alloc(sp->isc, M_NOWAIT); 512 if(pq == NULL) { // XXX: might cause a deadlock ... 513 debug(3, "out of pdus, wait"); 514 pq = pdu_alloc(sp->isc, M_NOWAIT); // OK to WAIT 515 } 516 pq->pdu.ipdu.bhs = sp->bhs; 517 pq->len = sizeof(bhs_t); // so far only the header was read 518 error = so_recv(sp, pq); 519 if(error != 0) { 520 error += 0x800; // XXX: just to see the error. 521 // terminal error 522 // XXX: close connection and exit 523 } 524 else { 525 sp->stats.nrecv++; 526 getbintime(&sp->stats.t_recv); 527 ism_recv(sp, pq); 528 } 529 } 530 return error; 531} 532 533/* 534 | one per active (connected) session. 535 | this thread is responsible for reading 536 | in packets from the target. 537 */ 538static void 539isc_soc(void *vp) 540{ 541 isc_session_t *sp = (isc_session_t *)vp; 542 struct socket *so = sp->soc; 543 int error; 544 545 debug_called(8); 546 547 sp->flags |= ISC_CON_RUNNING; 548 if(sp->cam_path) 549 ic_release(sp); 550 551 error = 0; 552 while((sp->flags & (ISC_CON_RUN | ISC_LINK_UP)) == (ISC_CON_RUN | ISC_LINK_UP)) { 553 // XXX: hunting ... 554 if(sp->soc == NULL || !(so->so_state & SS_ISCONNECTED)) { 555 debug(2, "sp->soc=%p", sp->soc); 556 break; 557 } 558 error = so_input(sp); 559 if(error == 0) { 560 mtx_lock(&sp->io_mtx); 561 if(sp->flags & ISC_OWAITING) { 562 wakeup(&sp->flags); 563 } 564 mtx_unlock(&sp->io_mtx); 565 } else if(error == EPIPE) { 566 break; 567 } 568 else if(error == EAGAIN) { 569 if(so->so_state & SS_ISCONNECTED) 570 // there seems to be a problem in 6.0 ... 571 tsleep(sp, PRIBIO, "isc_soc", 2*hz); 572 } 573 } 574 sdebug(2, "terminated, flags=%x so_count=%d so_state=%x error=%d proc=%p", 575 sp->flags, so->so_count, so->so_state, error, sp->proc); 576 if((sp->proc != NULL) && sp->signal) { 577 PROC_LOCK(sp->proc); 578 psignal(sp->proc, sp->signal); 579 PROC_UNLOCK(sp->proc); 580 sp->flags |= ISC_SIGNALED; 581 sdebug(2, "pid=%d signaled(%d)", sp->proc->p_pid, sp->signal); 582 } 583 else { 584 // we have to do something ourselves 585 // like closing this session ... 586 } 587 /* 588 | we've been terminated 589 */ 590 // do we need this mutex ...? 591 mtx_lock(&sp->io_mtx); 592 sp->flags &= ~(ISC_CON_RUNNING | ISC_LINK_UP); 593 wakeup(&sp->soc); 594 mtx_unlock(&sp->io_mtx); 595 596 sdebug(2, "dropped ISC_CON_RUNNING"); 597 598 kproc_exit(0); 599} 600 601void 602isc_stop_receiver(isc_session_t *sp) 603{ 604 int n; 605 606 debug_called(8); 607 sdebug(3, "sp=%p sp->soc=%p", sp, sp? sp->soc: 0); 608 mtx_lock(&sp->io_mtx); 609 sp->flags &= ~ISC_LINK_UP; 610 msleep(&sp->soc, &sp->io_mtx, PRIBIO|PDROP, "isc_stpc", 5*hz); 611 612 soshutdown(sp->soc, SHUT_RD); 613 614 mtx_lock(&sp->io_mtx); 615 sdebug(3, "soshutdown"); 616 sp->flags &= ~ISC_CON_RUN; 617 n = 2; 618 while(n-- && (sp->flags & ISC_CON_RUNNING)) { 619 sdebug(3, "waiting n=%d... flags=%x", n, sp->flags); 620 msleep(&sp->soc, &sp->io_mtx, PRIBIO, "isc_stpc", 5*hz); 621 } 622 mtx_unlock(&sp->io_mtx); 623 624 625 if(sp->fp != NULL) 626 fdrop(sp->fp, sp->td); 627 fputsock(sp->soc); 628 sp->soc = NULL; 629 sp->fp = NULL; 630 631 sdebug(3, "done"); 632} 633 634void 635isc_start_receiver(isc_session_t *sp) 636{ 637 debug_called(8); 638 639 sp->flags |= ISC_CON_RUN | ISC_LINK_UP; 640 641 kproc_create(isc_soc, sp, &sp->soc_proc, 0, 0, "iscsi%d", sp->sid); 642} 643