1292740Snp/*- 2292740Snp * Copyright (c) 2012 The FreeBSD Foundation 3292740Snp * Copyright (c) 2015 Chelsio Communications, Inc. 4292740Snp * All rights reserved. 5292740Snp * 6292740Snp * This software was developed by Edward Tomasz Napierala under sponsorship 7292740Snp * from the FreeBSD Foundation. 8292740Snp * 9292740Snp * Redistribution and use in source and binary forms, with or without 10292740Snp * modification, are permitted provided that the following conditions 11292740Snp * are met: 12292740Snp * 1. Redistributions of source code must retain the above copyright 13292740Snp * notice, this list of conditions and the following disclaimer. 14292740Snp * 2. Redistributions in binary form must reproduce the above copyright 15292740Snp * notice, this list of conditions and the following disclaimer in the 16292740Snp * documentation and/or other materials provided with the distribution. 17292740Snp * 18292740Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19292740Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20292740Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21292740Snp * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22292740Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23292740Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24292740Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25292740Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26292740Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27292740Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28292740Snp * SUCH DAMAGE. 29292740Snp * 30292740Snp */ 31292740Snp 32292740Snp/* 33292740Snp * cxgbei implementation of iSCSI Common Layer kobj(9) interface. 34292740Snp */ 35292740Snp 36292740Snp#include <sys/cdefs.h> 37292740Snp__FBSDID("$FreeBSD: stable/11/sys/dev/cxgbe/cxgbei/icl_cxgbei.c 345664 2019-03-28 23:43:38Z jhb $"); 38292740Snp 39292740Snp#include "opt_inet.h" 40292740Snp#include "opt_inet6.h" 41292740Snp 42292740Snp#ifdef TCP_OFFLOAD 43292740Snp#include <sys/param.h> 44292740Snp#include <sys/capsicum.h> 45292740Snp#include <sys/condvar.h> 46292740Snp#include <sys/conf.h> 47292740Snp#include <sys/file.h> 48292740Snp#include <sys/kernel.h> 49292740Snp#include <sys/kthread.h> 50292740Snp#include <sys/lock.h> 51292740Snp#include <sys/mbuf.h> 52292740Snp#include <sys/mutex.h> 53292740Snp#include <sys/module.h> 54292740Snp#include <sys/protosw.h> 55292740Snp#include <sys/socket.h> 56292740Snp#include <sys/socketvar.h> 57292740Snp#include <sys/sysctl.h> 58292740Snp#include <sys/systm.h> 59292740Snp#include <sys/sx.h> 60292740Snp#include <sys/uio.h> 61292740Snp#include <machine/bus.h> 62292740Snp#include <vm/uma.h> 63292740Snp#include <netinet/in.h> 64292740Snp#include <netinet/in_pcb.h> 65292740Snp#include <netinet/tcp.h> 66292740Snp#include <netinet/tcp_var.h> 67292740Snp#include <netinet/toecore.h> 68292740Snp 69292740Snp#include <dev/iscsi/icl.h> 70292740Snp#include <dev/iscsi/iscsi_proto.h> 71292740Snp#include <icl_conn_if.h> 72292740Snp 73292740Snp#include "common/common.h" 74302339Snp#include "common/t4_tcb.h" 75292740Snp#include "tom/t4_tom.h" 76292740Snp#include "cxgbei.h" 77292740Snp 78292740SnpSYSCTL_NODE(_kern_icl, OID_AUTO, cxgbei, CTLFLAG_RD, 0, "Chelsio iSCSI offload"); 79292740Snpstatic int coalesce = 1; 80292740SnpSYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, coalesce, CTLFLAG_RWTUN, 81292740Snp &coalesce, 0, "Try to coalesce PDUs before sending"); 82292740Snpstatic int partial_receive_len = 128 * 1024; 83292740SnpSYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, 84292740Snp &partial_receive_len, 0, "Minimum read size for partially received " 85292740Snp "data segment"); 86292740Snpstatic int sendspace = 1048576; 87292740SnpSYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, sendspace, CTLFLAG_RWTUN, 88292740Snp &sendspace, 0, "Default send socket buffer size"); 89292740Snpstatic int recvspace = 1048576; 90292740SnpSYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, recvspace, CTLFLAG_RWTUN, 91292740Snp &recvspace, 0, "Default receive socket buffer size"); 92292740Snp 93292740Snpstatic uma_zone_t icl_transfer_zone; 94292740Snp 95292740Snpstatic volatile u_int icl_cxgbei_ncons; 96292740Snp 97292740Snp#define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) 98292740Snp#define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) 99292740Snp#define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) 100292740Snp#define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) 101292740Snp 102292740Snpstruct icl_pdu *icl_cxgbei_new_pdu(int); 103292740Snpvoid icl_cxgbei_new_pdu_set_conn(struct icl_pdu *, struct icl_conn *); 104292740Snp 105292740Snpstatic icl_conn_new_pdu_t icl_cxgbei_conn_new_pdu; 106292740Snpicl_conn_pdu_free_t icl_cxgbei_conn_pdu_free; 107292740Snpstatic icl_conn_pdu_data_segment_length_t 108292740Snp icl_cxgbei_conn_pdu_data_segment_length; 109292740Snpstatic icl_conn_pdu_append_data_t icl_cxgbei_conn_pdu_append_data; 110292740Snpstatic icl_conn_pdu_get_data_t icl_cxgbei_conn_pdu_get_data; 111292740Snpstatic icl_conn_pdu_queue_t icl_cxgbei_conn_pdu_queue; 112292740Snpstatic icl_conn_handoff_t icl_cxgbei_conn_handoff; 113292740Snpstatic icl_conn_free_t icl_cxgbei_conn_free; 114292740Snpstatic icl_conn_close_t icl_cxgbei_conn_close; 115292740Snpstatic icl_conn_task_setup_t icl_cxgbei_conn_task_setup; 116292740Snpstatic icl_conn_task_done_t icl_cxgbei_conn_task_done; 117292740Snpstatic icl_conn_transfer_setup_t icl_cxgbei_conn_transfer_setup; 118292740Snpstatic icl_conn_transfer_done_t icl_cxgbei_conn_transfer_done; 119292740Snp 120292740Snpstatic kobj_method_t icl_cxgbei_methods[] = { 121292740Snp KOBJMETHOD(icl_conn_new_pdu, icl_cxgbei_conn_new_pdu), 122292740Snp KOBJMETHOD(icl_conn_pdu_free, icl_cxgbei_conn_pdu_free), 123292740Snp KOBJMETHOD(icl_conn_pdu_data_segment_length, 124292740Snp icl_cxgbei_conn_pdu_data_segment_length), 125292740Snp KOBJMETHOD(icl_conn_pdu_append_data, icl_cxgbei_conn_pdu_append_data), 126292740Snp KOBJMETHOD(icl_conn_pdu_get_data, icl_cxgbei_conn_pdu_get_data), 127292740Snp KOBJMETHOD(icl_conn_pdu_queue, icl_cxgbei_conn_pdu_queue), 128292740Snp KOBJMETHOD(icl_conn_handoff, icl_cxgbei_conn_handoff), 129292740Snp KOBJMETHOD(icl_conn_free, icl_cxgbei_conn_free), 130292740Snp KOBJMETHOD(icl_conn_close, icl_cxgbei_conn_close), 131292740Snp KOBJMETHOD(icl_conn_task_setup, icl_cxgbei_conn_task_setup), 132292740Snp KOBJMETHOD(icl_conn_task_done, icl_cxgbei_conn_task_done), 133292740Snp KOBJMETHOD(icl_conn_transfer_setup, icl_cxgbei_conn_transfer_setup), 134292740Snp KOBJMETHOD(icl_conn_transfer_done, icl_cxgbei_conn_transfer_done), 135292740Snp { 0, 0 } 136292740Snp}; 137292740Snp 138292740SnpDEFINE_CLASS(icl_cxgbei, icl_cxgbei_methods, sizeof(struct icl_cxgbei_conn)); 139292740Snp 140292740Snp#if 0 141292740Snp/* 142292740Snp * Subtract another 256 for AHS from MAX_DSL if AHS could be used. 143292740Snp */ 144292740Snp#define CXGBEI_MAX_PDU 16224 145292740Snp#define CXGBEI_MAX_DSL (CXGBEI_MAX_PDU - sizeof(struct iscsi_bhs) - 8) 146292740Snp#endif 147292740Snp#define CXGBEI_MAX_DSL 8192 148292740Snp#define CXGBEI_MAX_PDU (CXGBEI_MAX_DSL + sizeof(struct iscsi_bhs) + 8) 149292740Snp 150292740Snpvoid 151292740Snpicl_cxgbei_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 152292740Snp{ 153292740Snp#ifdef INVARIANTS 154292740Snp struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 155292740Snp#endif 156292740Snp 157292740Snp MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); 158292740Snp MPASS(ic == ip->ip_conn); 159292740Snp MPASS(ip->ip_bhs_mbuf != NULL); 160292740Snp 161292740Snp m_freem(ip->ip_ahs_mbuf); 162292740Snp m_freem(ip->ip_data_mbuf); 163292740Snp m_freem(ip->ip_bhs_mbuf); /* storage for icl_cxgbei_pdu itself */ 164292740Snp 165292740Snp#ifdef DIAGNOSTIC 166292740Snp if (__predict_true(ic != NULL)) 167292740Snp refcount_release(&ic->ic_outstanding_pdus); 168292740Snp#endif 169292740Snp} 170292740Snp 171292740Snpstruct icl_pdu * 172292740Snpicl_cxgbei_new_pdu(int flags) 173292740Snp{ 174292740Snp struct icl_cxgbei_pdu *icp; 175292740Snp struct icl_pdu *ip; 176292740Snp struct mbuf *m; 177292740Snp uintptr_t a; 178292740Snp 179292740Snp m = m_gethdr(flags, MT_DATA); 180292740Snp if (__predict_false(m == NULL)) 181292740Snp return (NULL); 182292740Snp 183292740Snp a = roundup2(mtod(m, uintptr_t), _Alignof(struct icl_cxgbei_pdu)); 184292740Snp icp = (struct icl_cxgbei_pdu *)a; 185292740Snp bzero(icp, sizeof(*icp)); 186292740Snp 187292740Snp icp->icp_signature = CXGBEI_PDU_SIGNATURE; 188292740Snp ip = &icp->ip; 189292740Snp ip->ip_bhs_mbuf = m; 190292740Snp 191292740Snp a = roundup2((uintptr_t)(icp + 1), _Alignof(struct iscsi_bhs *)); 192292740Snp ip->ip_bhs = (struct iscsi_bhs *)a; 193292740Snp#ifdef INVARIANTS 194292740Snp /* Everything must fit entirely in the mbuf. */ 195292740Snp a = (uintptr_t)(ip->ip_bhs + 1); 196292740Snp MPASS(a <= (uintptr_t)m + MSIZE); 197292740Snp#endif 198292740Snp bzero(ip->ip_bhs, sizeof(*ip->ip_bhs)); 199292740Snp 200292740Snp m->m_data = (void *)ip->ip_bhs; 201292740Snp m->m_len = sizeof(struct iscsi_bhs); 202292740Snp m->m_pkthdr.len = m->m_len; 203292740Snp 204292740Snp return (ip); 205292740Snp} 206292740Snp 207292740Snpvoid 208292740Snpicl_cxgbei_new_pdu_set_conn(struct icl_pdu *ip, struct icl_conn *ic) 209292740Snp{ 210292740Snp 211292740Snp ip->ip_conn = ic; 212292740Snp#ifdef DIAGNOSTIC 213292740Snp refcount_acquire(&ic->ic_outstanding_pdus); 214292740Snp#endif 215292740Snp} 216292740Snp 217292740Snp/* 218292740Snp * Allocate icl_pdu with empty BHS to fill up by the caller. 219292740Snp */ 220292740Snpstatic struct icl_pdu * 221292740Snpicl_cxgbei_conn_new_pdu(struct icl_conn *ic, int flags) 222292740Snp{ 223292740Snp struct icl_pdu *ip; 224292740Snp 225292740Snp ip = icl_cxgbei_new_pdu(flags); 226292740Snp if (__predict_false(ip == NULL)) 227292740Snp return (NULL); 228292740Snp icl_cxgbei_new_pdu_set_conn(ip, ic); 229292740Snp 230292740Snp return (ip); 231292740Snp} 232292740Snp 233292740Snpstatic size_t 234292740Snpicl_pdu_data_segment_length(const struct icl_pdu *request) 235292740Snp{ 236292740Snp uint32_t len = 0; 237292740Snp 238292740Snp len += request->ip_bhs->bhs_data_segment_len[0]; 239292740Snp len <<= 8; 240292740Snp len += request->ip_bhs->bhs_data_segment_len[1]; 241292740Snp len <<= 8; 242292740Snp len += request->ip_bhs->bhs_data_segment_len[2]; 243292740Snp 244292740Snp return (len); 245292740Snp} 246292740Snp 247292740Snpsize_t 248292740Snpicl_cxgbei_conn_pdu_data_segment_length(struct icl_conn *ic, 249292740Snp const struct icl_pdu *request) 250292740Snp{ 251292740Snp 252292740Snp return (icl_pdu_data_segment_length(request)); 253292740Snp} 254292740Snp 255292740Snpstatic uint32_t 256292740Snpicl_conn_build_tasktag(struct icl_conn *ic, uint32_t tag) 257292740Snp{ 258292740Snp return tag; 259292740Snp} 260292740Snp 261292740Snpstatic struct mbuf * 262292740Snpfinalize_pdu(struct icl_cxgbei_conn *icc, struct icl_cxgbei_pdu *icp) 263292740Snp{ 264292740Snp struct icl_pdu *ip = &icp->ip; 265292740Snp uint8_t ulp_submode, padding; 266292740Snp struct mbuf *m, *last; 267292740Snp struct iscsi_bhs *bhs; 268292740Snp 269292740Snp /* 270292740Snp * Fix up the data segment mbuf first. 271292740Snp */ 272292740Snp m = ip->ip_data_mbuf; 273292740Snp ulp_submode = icc->ulp_submode; 274292740Snp if (m) { 275292740Snp last = m_last(m); 276292740Snp 277292740Snp /* 278292740Snp * Round up the data segment to a 4B boundary. Pad with 0 if 279292740Snp * necessary. There will definitely be room in the mbuf. 280292740Snp */ 281292740Snp padding = roundup2(ip->ip_data_len, 4) - ip->ip_data_len; 282292740Snp if (padding) { 283292740Snp bzero(mtod(last, uint8_t *) + last->m_len, padding); 284292740Snp last->m_len += padding; 285292740Snp } 286292740Snp } else { 287292740Snp MPASS(ip->ip_data_len == 0); 288292740Snp ulp_submode &= ~ULP_CRC_DATA; 289292740Snp padding = 0; 290292740Snp } 291292740Snp 292292740Snp /* 293292740Snp * Now the header mbuf that has the BHS. 294292740Snp */ 295292740Snp m = ip->ip_bhs_mbuf; 296292740Snp MPASS(m->m_pkthdr.len == sizeof(struct iscsi_bhs)); 297292740Snp MPASS(m->m_len == sizeof(struct iscsi_bhs)); 298292740Snp 299292740Snp bhs = ip->ip_bhs; 300292740Snp bhs->bhs_data_segment_len[2] = ip->ip_data_len; 301292740Snp bhs->bhs_data_segment_len[1] = ip->ip_data_len >> 8; 302292740Snp bhs->bhs_data_segment_len[0] = ip->ip_data_len >> 16; 303292740Snp 304292740Snp /* "Convert" PDU to mbuf chain. Do not use icp/ip after this. */ 305292740Snp m->m_pkthdr.len = sizeof(struct iscsi_bhs) + ip->ip_data_len + padding; 306292740Snp m->m_next = ip->ip_data_mbuf; 307292740Snp set_mbuf_ulp_submode(m, ulp_submode); 308292740Snp#ifdef INVARIANTS 309292740Snp bzero(icp, sizeof(*icp)); 310292740Snp#endif 311292740Snp#ifdef DIAGNOSTIC 312292740Snp refcount_release(&icc->ic.ic_outstanding_pdus); 313292740Snp#endif 314292740Snp 315292740Snp return (m); 316292740Snp} 317292740Snp 318292740Snpint 319292740Snpicl_cxgbei_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *ip, 320292740Snp const void *addr, size_t len, int flags) 321292740Snp{ 322292740Snp struct mbuf *m; 323292740Snp#ifdef INVARIANTS 324292740Snp struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 325292740Snp#endif 326292740Snp 327292740Snp MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); 328292740Snp MPASS(ic == ip->ip_conn); 329292740Snp KASSERT(len > 0, ("%s: len is %jd", __func__, (intmax_t)len)); 330292740Snp 331292740Snp m = ip->ip_data_mbuf; 332292740Snp if (m == NULL) { 333292740Snp m = m_getjcl(M_NOWAIT, MT_DATA, 0, MJUM16BYTES); 334292740Snp if (__predict_false(m == NULL)) 335292740Snp return (ENOMEM); 336292740Snp 337292740Snp ip->ip_data_mbuf = m; 338292740Snp } 339292740Snp 340292740Snp if (__predict_true(m_append(m, len, addr) != 0)) { 341292740Snp ip->ip_data_len += len; 342292740Snp MPASS(ip->ip_data_len <= CXGBEI_MAX_DSL); 343292740Snp return (0); 344292740Snp } else { 345292740Snp if (flags & M_WAITOK) { 346292740Snp CXGBE_UNIMPLEMENTED("fail safe append"); 347292740Snp } 348292740Snp ip->ip_data_len = m_length(m, NULL); 349292740Snp return (1); 350292740Snp } 351292740Snp} 352292740Snp 353292740Snpvoid 354292740Snpicl_cxgbei_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 355292740Snp size_t off, void *addr, size_t len) 356292740Snp{ 357292740Snp struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 358292740Snp 359292740Snp if (icp->pdu_flags & SBUF_ULP_FLAG_DATA_DDPED) 360292740Snp return; /* data is DDP'ed, no need to copy */ 361292740Snp m_copydata(ip->ip_data_mbuf, off, len, addr); 362292740Snp} 363292740Snp 364292740Snpvoid 365292740Snpicl_cxgbei_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 366292740Snp{ 367292740Snp struct icl_cxgbei_conn *icc = ic_to_icc(ic); 368292740Snp struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 369292740Snp struct socket *so = ic->ic_socket; 370292740Snp struct toepcb *toep = icc->toep; 371292740Snp struct inpcb *inp; 372292740Snp struct mbuf *m; 373292740Snp 374292740Snp MPASS(ic == ip->ip_conn); 375292740Snp MPASS(ip->ip_bhs_mbuf != NULL); 376292740Snp /* The kernel doesn't generate PDUs with AHS. */ 377292740Snp MPASS(ip->ip_ahs_mbuf == NULL && ip->ip_ahs_len == 0); 378292740Snp 379292740Snp ICL_CONN_LOCK_ASSERT(ic); 380292740Snp /* NOTE: sowriteable without so_snd lock is a mostly harmless race. */ 381292740Snp if (ic->ic_disconnecting || so == NULL || !sowriteable(so)) { 382292740Snp icl_cxgbei_conn_pdu_free(ic, ip); 383292740Snp return; 384292740Snp } 385292740Snp 386292740Snp m = finalize_pdu(icc, icp); 387292740Snp M_ASSERTPKTHDR(m); 388292740Snp MPASS((m->m_pkthdr.len & 3) == 0); 389292740Snp MPASS(m->m_pkthdr.len + 8 <= CXGBEI_MAX_PDU); 390292740Snp 391292740Snp /* 392292740Snp * Do not get inp from toep->inp as the toepcb might have detached 393292740Snp * already. 394292740Snp */ 395292740Snp inp = sotoinpcb(so); 396292740Snp INP_WLOCK(inp); 397292740Snp if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) || 398292740Snp __predict_false((toep->flags & TPF_ATTACHED) == 0)) 399292740Snp m_freem(m); 400292740Snp else { 401292740Snp mbufq_enqueue(&toep->ulp_pduq, m); 402292740Snp t4_push_pdus(icc->sc, toep, 0); 403292740Snp } 404292740Snp INP_WUNLOCK(inp); 405292740Snp} 406292740Snp 407292740Snpstatic struct icl_conn * 408292740Snpicl_cxgbei_new_conn(const char *name, struct mtx *lock) 409292740Snp{ 410292740Snp struct icl_cxgbei_conn *icc; 411292740Snp struct icl_conn *ic; 412292740Snp 413292740Snp refcount_acquire(&icl_cxgbei_ncons); 414292740Snp 415292740Snp icc = (struct icl_cxgbei_conn *)kobj_create(&icl_cxgbei_class, M_CXGBE, 416292740Snp M_WAITOK | M_ZERO); 417292740Snp icc->icc_signature = CXGBEI_CONN_SIGNATURE; 418292740Snp STAILQ_INIT(&icc->rcvd_pdus); 419292740Snp 420292740Snp ic = &icc->ic; 421292740Snp ic->ic_lock = lock; 422292740Snp 423292740Snp /* XXXNP: review. Most of these icl_conn fields aren't really used */ 424292740Snp STAILQ_INIT(&ic->ic_to_send); 425292740Snp cv_init(&ic->ic_send_cv, "icl_cxgbei_tx"); 426292740Snp cv_init(&ic->ic_receive_cv, "icl_cxgbei_rx"); 427292740Snp#ifdef DIAGNOSTIC 428292740Snp refcount_init(&ic->ic_outstanding_pdus, 0); 429292740Snp#endif 430292740Snp ic->ic_max_data_segment_length = CXGBEI_MAX_DSL; 431292740Snp ic->ic_name = name; 432292740Snp ic->ic_offload = "cxgbei"; 433300369Strasz ic->ic_unmapped = false; 434292740Snp 435292740Snp CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc); 436292740Snp 437292740Snp return (ic); 438292740Snp} 439292740Snp 440292740Snpvoid 441292740Snpicl_cxgbei_conn_free(struct icl_conn *ic) 442292740Snp{ 443292740Snp struct icl_cxgbei_conn *icc = ic_to_icc(ic); 444292740Snp 445292740Snp MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); 446292740Snp 447292740Snp CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc); 448292740Snp 449292740Snp cv_destroy(&ic->ic_send_cv); 450292740Snp cv_destroy(&ic->ic_receive_cv); 451292740Snp 452292740Snp kobj_delete((struct kobj *)icc, M_CXGBE); 453292740Snp refcount_release(&icl_cxgbei_ncons); 454292740Snp} 455292740Snp 456292740Snpstatic int 457292740Snpicl_cxgbei_setsockopt(struct icl_conn *ic, struct socket *so) 458292740Snp{ 459292740Snp size_t minspace; 460292740Snp struct sockopt opt; 461292740Snp int error, one = 1; 462292740Snp 463292740Snp /* 464292740Snp * For sendspace, this is required because the current code cannot 465292740Snp * send a PDU in pieces; thus, the minimum buffer size is equal 466292740Snp * to the maximum PDU size. "+4" is to account for possible padding. 467292740Snp * 468292740Snp * What we should actually do here is to use autoscaling, but set 469292740Snp * some minimal buffer size to "minspace". I don't know a way to do 470292740Snp * that, though. 471292740Snp */ 472292740Snp minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + 473292740Snp ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 474292740Snp if (sendspace < minspace) 475292740Snp sendspace = minspace; 476292740Snp if (recvspace < minspace) 477292740Snp recvspace = minspace; 478292740Snp 479292740Snp error = soreserve(so, sendspace, recvspace); 480292740Snp if (error != 0) { 481292740Snp icl_cxgbei_conn_close(ic); 482292740Snp return (error); 483292740Snp } 484292740Snp SOCKBUF_LOCK(&so->so_snd); 485292740Snp so->so_snd.sb_flags |= SB_AUTOSIZE; 486292740Snp SOCKBUF_UNLOCK(&so->so_snd); 487292740Snp SOCKBUF_LOCK(&so->so_rcv); 488292740Snp so->so_rcv.sb_flags |= SB_AUTOSIZE; 489292740Snp SOCKBUF_UNLOCK(&so->so_rcv); 490292740Snp 491292740Snp /* 492292740Snp * Disable Nagle. 493292740Snp */ 494292740Snp bzero(&opt, sizeof(opt)); 495292740Snp opt.sopt_dir = SOPT_SET; 496292740Snp opt.sopt_level = IPPROTO_TCP; 497292740Snp opt.sopt_name = TCP_NODELAY; 498292740Snp opt.sopt_val = &one; 499292740Snp opt.sopt_valsize = sizeof(one); 500292740Snp error = sosetopt(so, &opt); 501292740Snp if (error != 0) { 502292740Snp icl_cxgbei_conn_close(ic); 503292740Snp return (error); 504292740Snp } 505292740Snp 506292740Snp return (0); 507292740Snp} 508292740Snp 509292740Snp/* 510292740Snp * Request/response structure used to find out the adapter offloading a socket. 511292740Snp */ 512292740Snpstruct find_ofld_adapter_rr { 513292740Snp struct socket *so; 514292740Snp struct adapter *sc; /* result */ 515292740Snp}; 516292740Snp 517292740Snpstatic void 518292740Snpfind_offload_adapter(struct adapter *sc, void *arg) 519292740Snp{ 520292740Snp struct find_ofld_adapter_rr *fa = arg; 521292740Snp struct socket *so = fa->so; 522292740Snp struct tom_data *td = sc->tom_softc; 523292740Snp struct tcpcb *tp; 524292740Snp struct inpcb *inp; 525292740Snp 526292740Snp /* Non-TCP were filtered out earlier. */ 527292740Snp MPASS(so->so_proto->pr_protocol == IPPROTO_TCP); 528292740Snp 529292740Snp if (fa->sc != NULL) 530292740Snp return; /* Found already. */ 531292740Snp 532292740Snp if (td == NULL) 533292740Snp return; /* TOE not enabled on this adapter. */ 534292740Snp 535292740Snp inp = sotoinpcb(so); 536292740Snp INP_WLOCK(inp); 537292740Snp if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 538292740Snp tp = intotcpcb(inp); 539292740Snp if (tp->t_flags & TF_TOE && tp->tod == &td->tod) 540292740Snp fa->sc = sc; /* Found. */ 541292740Snp } 542292740Snp INP_WUNLOCK(inp); 543292740Snp} 544292740Snp 545292740Snp/* XXXNP: move this to t4_tom. */ 546292740Snpstatic void 547292740Snpsend_iscsi_flowc_wr(struct adapter *sc, struct toepcb *toep, int maxlen) 548292740Snp{ 549292740Snp struct wrqe *wr; 550292740Snp struct fw_flowc_wr *flowc; 551292740Snp const u_int nparams = 1; 552292740Snp u_int flowclen; 553292740Snp struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 554292740Snp 555292740Snp flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); 556292740Snp 557292740Snp wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq); 558292740Snp if (wr == NULL) { 559292740Snp /* XXX */ 560292740Snp panic("%s: allocation failure.", __func__); 561292740Snp } 562292740Snp flowc = wrtod(wr); 563292740Snp memset(flowc, 0, wr->wr_len); 564292740Snp 565292740Snp flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 566292740Snp V_FW_FLOWC_WR_NPARAMS(nparams)); 567292740Snp flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | 568292740Snp V_FW_WR_FLOWID(toep->tid)); 569292740Snp 570292740Snp flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_TXDATAPLEN_MAX; 571292740Snp flowc->mnemval[0].val = htobe32(maxlen); 572292740Snp 573292740Snp txsd->tx_credits = howmany(flowclen, 16); 574292740Snp txsd->plen = 0; 575292740Snp KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, 576292740Snp ("%s: not enough credits (%d)", __func__, toep->tx_credits)); 577292740Snp toep->tx_credits -= txsd->tx_credits; 578292740Snp if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) 579292740Snp toep->txsd_pidx = 0; 580292740Snp toep->txsd_avail--; 581292740Snp 582292740Snp t4_wrq_tx(sc, wr); 583292740Snp} 584292740Snp 585292740Snpstatic void 586292740Snpset_ulp_mode_iscsi(struct adapter *sc, struct toepcb *toep, int hcrc, int dcrc) 587292740Snp{ 588302339Snp uint64_t val = ULP_MODE_ISCSI; 589292740Snp 590292740Snp if (hcrc) 591302339Snp val |= ULP_CRC_HEADER << 4; 592292740Snp if (dcrc) 593302339Snp val |= ULP_CRC_DATA << 4; 594292740Snp 595292740Snp CTR4(KTR_CXGBE, "%s: tid %u, ULP_MODE_ISCSI, CRC hdr=%d data=%d", 596292740Snp __func__, toep->tid, hcrc, dcrc); 597292740Snp 598345664Sjhb t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_ULP_TYPE, 599302339Snp V_TCB_ULP_TYPE(M_TCB_ULP_TYPE) | V_TCB_ULP_RAW(M_TCB_ULP_RAW), val, 600345664Sjhb 0, 0); 601292740Snp} 602292740Snp 603292740Snp/* 604292740Snp * XXXNP: Who is responsible for cleaning up the socket if this returns with an 605292740Snp * error? Review all error paths. 606292740Snp * 607292740Snp * XXXNP: What happens to the socket's fd reference if the operation is 608292740Snp * successful, and how does that affect the socket's life cycle? 609292740Snp */ 610292740Snpint 611292740Snpicl_cxgbei_conn_handoff(struct icl_conn *ic, int fd) 612292740Snp{ 613292740Snp struct icl_cxgbei_conn *icc = ic_to_icc(ic); 614292740Snp struct find_ofld_adapter_rr fa; 615292740Snp struct file *fp; 616292740Snp struct socket *so; 617292740Snp struct inpcb *inp; 618292740Snp struct tcpcb *tp; 619292740Snp struct toepcb *toep; 620292740Snp cap_rights_t rights; 621292740Snp int error; 622292740Snp 623292740Snp MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); 624292740Snp ICL_CONN_LOCK_ASSERT_NOT(ic); 625292740Snp 626292740Snp /* 627292740Snp * Steal the socket from userland. 628292740Snp */ 629292740Snp error = fget(curthread, fd, 630292740Snp cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); 631292740Snp if (error != 0) 632292740Snp return (error); 633292740Snp if (fp->f_type != DTYPE_SOCKET) { 634292740Snp fdrop(fp, curthread); 635292740Snp return (EINVAL); 636292740Snp } 637292740Snp so = fp->f_data; 638292740Snp if (so->so_type != SOCK_STREAM || 639292740Snp so->so_proto->pr_protocol != IPPROTO_TCP) { 640292740Snp fdrop(fp, curthread); 641292740Snp return (EINVAL); 642292740Snp } 643292740Snp 644292740Snp ICL_CONN_LOCK(ic); 645292740Snp if (ic->ic_socket != NULL) { 646292740Snp ICL_CONN_UNLOCK(ic); 647292740Snp fdrop(fp, curthread); 648292740Snp return (EBUSY); 649292740Snp } 650292740Snp ic->ic_disconnecting = false; 651292740Snp ic->ic_socket = so; 652292740Snp fp->f_ops = &badfileops; 653292740Snp fp->f_data = NULL; 654292740Snp fdrop(fp, curthread); 655292740Snp ICL_CONN_UNLOCK(ic); 656292740Snp 657292740Snp /* Find the adapter offloading this socket. */ 658292740Snp fa.sc = NULL; 659292740Snp fa.so = so; 660292740Snp t4_iterate(find_offload_adapter, &fa); 661292740Snp if (fa.sc == NULL) 662292740Snp return (EINVAL); 663292740Snp icc->sc = fa.sc; 664292740Snp 665292740Snp error = icl_cxgbei_setsockopt(ic, so); 666292740Snp if (error) 667292740Snp return (error); 668292740Snp 669292740Snp inp = sotoinpcb(so); 670292740Snp INP_WLOCK(inp); 671292740Snp tp = intotcpcb(inp); 672292740Snp if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) 673292740Snp error = EBUSY; 674292740Snp else { 675292740Snp /* 676292740Snp * socket could not have been "unoffloaded" if here. 677292740Snp */ 678292740Snp MPASS(tp->t_flags & TF_TOE); 679292740Snp MPASS(tp->tod != NULL); 680292740Snp MPASS(tp->t_toe != NULL); 681292740Snp toep = tp->t_toe; 682292740Snp MPASS(toep->vi->pi->adapter == icc->sc); 683292740Snp icc->toep = toep; 684292740Snp icc->cwt = cxgbei_select_worker_thread(icc); 685292740Snp icc->ulp_submode = 0; 686292740Snp if (ic->ic_header_crc32c) 687292740Snp icc->ulp_submode |= ULP_CRC_HEADER; 688292740Snp if (ic->ic_data_crc32c) 689292740Snp icc->ulp_submode |= ULP_CRC_DATA; 690292740Snp so->so_options |= SO_NO_DDP; 691292740Snp toep->ulp_mode = ULP_MODE_ISCSI; 692292740Snp toep->ulpcb = icc; 693292740Snp 694292740Snp send_iscsi_flowc_wr(icc->sc, toep, CXGBEI_MAX_PDU); 695292740Snp set_ulp_mode_iscsi(icc->sc, toep, ic->ic_header_crc32c, 696292740Snp ic->ic_data_crc32c); 697292740Snp error = 0; 698292740Snp } 699292740Snp INP_WUNLOCK(inp); 700292740Snp 701292740Snp return (error); 702292740Snp} 703292740Snp 704292740Snpvoid 705292740Snpicl_cxgbei_conn_close(struct icl_conn *ic) 706292740Snp{ 707292740Snp struct icl_cxgbei_conn *icc = ic_to_icc(ic); 708292740Snp struct icl_pdu *ip; 709292740Snp struct socket *so; 710292740Snp struct sockbuf *sb; 711292740Snp struct inpcb *inp; 712292740Snp struct toepcb *toep = icc->toep; 713292740Snp 714292740Snp MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); 715292740Snp ICL_CONN_LOCK_ASSERT_NOT(ic); 716292740Snp 717292740Snp ICL_CONN_LOCK(ic); 718292740Snp so = ic->ic_socket; 719292740Snp if (ic->ic_disconnecting || so == NULL) { 720292740Snp CTR4(KTR_CXGBE, "%s: icc %p (disconnecting = %d), so %p", 721292740Snp __func__, icc, ic->ic_disconnecting, so); 722292740Snp ICL_CONN_UNLOCK(ic); 723292740Snp return; 724292740Snp } 725292740Snp ic->ic_disconnecting = true; 726292740Snp 727292740Snp /* These are unused in this driver right now. */ 728292740Snp MPASS(STAILQ_EMPTY(&ic->ic_to_send)); 729292740Snp MPASS(ic->ic_receive_pdu == NULL); 730292740Snp 731292740Snp#ifdef DIAGNOSTIC 732292740Snp KASSERT(ic->ic_outstanding_pdus == 0, 733292740Snp ("destroying session with %d outstanding PDUs", 734292740Snp ic->ic_outstanding_pdus)); 735292740Snp#endif 736292740Snp ICL_CONN_UNLOCK(ic); 737292740Snp 738292740Snp CTR3(KTR_CXGBE, "%s: tid %d, icc %p", __func__, toep ? toep->tid : -1, 739292740Snp icc); 740292740Snp inp = sotoinpcb(so); 741292740Snp sb = &so->so_rcv; 742292740Snp INP_WLOCK(inp); 743292740Snp if (toep != NULL) { /* NULL if connection was never offloaded. */ 744292740Snp toep->ulpcb = NULL; 745292740Snp mbufq_drain(&toep->ulp_pduq); 746292740Snp SOCKBUF_LOCK(sb); 747292740Snp if (icc->rx_flags & RXF_ACTIVE) { 748292740Snp volatile u_int *p = &icc->rx_flags; 749292740Snp 750292740Snp SOCKBUF_UNLOCK(sb); 751292740Snp INP_WUNLOCK(inp); 752292740Snp 753292740Snp while (*p & RXF_ACTIVE) 754292740Snp pause("conclo", 1); 755292740Snp 756292740Snp INP_WLOCK(inp); 757292740Snp SOCKBUF_LOCK(sb); 758292740Snp } 759292740Snp 760292740Snp while (!STAILQ_EMPTY(&icc->rcvd_pdus)) { 761292740Snp ip = STAILQ_FIRST(&icc->rcvd_pdus); 762292740Snp STAILQ_REMOVE_HEAD(&icc->rcvd_pdus, ip_next); 763292740Snp icl_cxgbei_conn_pdu_free(ic, ip); 764292740Snp } 765292740Snp SOCKBUF_UNLOCK(sb); 766292740Snp } 767292740Snp INP_WUNLOCK(inp); 768292740Snp 769292740Snp ICL_CONN_LOCK(ic); 770292740Snp ic->ic_socket = NULL; 771292740Snp ICL_CONN_UNLOCK(ic); 772292740Snp 773292740Snp /* 774292740Snp * XXXNP: we should send RST instead of FIN when PDUs held in various 775292740Snp * queues were purged instead of delivered reliably but soabort isn't 776292740Snp * really general purpose and wouldn't do the right thing here. 777292740Snp */ 778292740Snp soclose(so); 779292740Snp} 780292740Snp 781292740Snpint 782300040Straszicl_cxgbei_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 783300040Strasz struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) 784292740Snp{ 785292740Snp void *prv; 786292740Snp 787292740Snp *task_tagp = icl_conn_build_tasktag(ic, *task_tagp); 788292740Snp 789292740Snp prv = uma_zalloc(icl_transfer_zone, M_NOWAIT | M_ZERO); 790292740Snp if (prv == NULL) 791292740Snp return (ENOMEM); 792292740Snp 793292740Snp *prvp = prv; 794292740Snp 795292740Snp cxgbei_conn_task_reserve_itt(ic, prvp, csio, task_tagp); 796292740Snp 797292740Snp return (0); 798292740Snp} 799292740Snp 800292740Snpvoid 801292740Snpicl_cxgbei_conn_task_done(struct icl_conn *ic, void *prv) 802292740Snp{ 803292740Snp 804292740Snp cxgbei_cleanup_task(ic, prv); 805292740Snp uma_zfree(icl_transfer_zone, prv); 806292740Snp} 807292740Snp 808292740Snpint 809292740Snpicl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, 810292740Snp uint32_t *transfer_tag, void **prvp) 811292740Snp{ 812292740Snp void *prv; 813292740Snp 814292740Snp *transfer_tag = icl_conn_build_tasktag(ic, *transfer_tag); 815292740Snp 816292740Snp prv = uma_zalloc(icl_transfer_zone, M_NOWAIT | M_ZERO); 817292740Snp if (prv == NULL) 818292740Snp return (ENOMEM); 819292740Snp 820292740Snp *prvp = prv; 821292740Snp 822292740Snp cxgbei_conn_transfer_reserve_ttt(ic, prvp, io, transfer_tag); 823292740Snp 824292740Snp return (0); 825292740Snp} 826292740Snp 827292740Snpvoid 828292740Snpicl_cxgbei_conn_transfer_done(struct icl_conn *ic, void *prv) 829292740Snp{ 830292740Snp cxgbei_cleanup_task(ic, prv); 831292740Snp uma_zfree(icl_transfer_zone, prv); 832292740Snp} 833292740Snp 834292740Snpstatic int 835292740Snpicl_cxgbei_limits(size_t *limitp) 836292740Snp{ 837292740Snp 838292740Snp *limitp = CXGBEI_MAX_DSL; 839292740Snp 840292740Snp return (0); 841292740Snp} 842292740Snp 843292740Snpstatic int 844292740Snpicl_cxgbei_load(void) 845292740Snp{ 846292740Snp int error; 847292740Snp 848292740Snp icl_transfer_zone = uma_zcreate("icl_transfer", 849292740Snp 16 * 1024, NULL, NULL, NULL, NULL, 850292740Snp UMA_ALIGN_PTR, 0); 851292740Snp 852292740Snp refcount_init(&icl_cxgbei_ncons, 0); 853292740Snp 854301119Strasz error = icl_register("cxgbei", false, -100, icl_cxgbei_limits, 855292740Snp icl_cxgbei_new_conn); 856292740Snp KASSERT(error == 0, ("failed to register")); 857292740Snp 858292740Snp return (error); 859292740Snp} 860292740Snp 861292740Snpstatic int 862292740Snpicl_cxgbei_unload(void) 863292740Snp{ 864292740Snp 865292740Snp if (icl_cxgbei_ncons != 0) 866292740Snp return (EBUSY); 867292740Snp 868300592Strasz icl_unregister("cxgbei", false); 869292740Snp 870292740Snp uma_zdestroy(icl_transfer_zone); 871292740Snp 872292740Snp return (0); 873292740Snp} 874292740Snp 875292740Snpstatic int 876292740Snpicl_cxgbei_modevent(module_t mod, int what, void *arg) 877292740Snp{ 878292740Snp 879292740Snp switch (what) { 880292740Snp case MOD_LOAD: 881292740Snp return (icl_cxgbei_load()); 882292740Snp case MOD_UNLOAD: 883292740Snp return (icl_cxgbei_unload()); 884292740Snp default: 885292740Snp return (EINVAL); 886292740Snp } 887292740Snp} 888292740Snp 889292740Snpmoduledata_t icl_cxgbei_data = { 890292740Snp "icl_cxgbei", 891292740Snp icl_cxgbei_modevent, 892292740Snp 0 893292740Snp}; 894292740Snp 895292740SnpDECLARE_MODULE(icl_cxgbei, icl_cxgbei_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 896292740SnpMODULE_DEPEND(icl_cxgbei, icl, 1, 1, 1); 897292740SnpMODULE_VERSION(icl_cxgbei, 1); 898292740Snp#endif 899