t4_tom.c revision 245935
1237263Snp/*- 2237263Snp * Copyright (c) 2012 Chelsio Communications, Inc. 3237263Snp * All rights reserved. 4237263Snp * Written by: Navdeep Parhar <np@FreeBSD.org> 5237263Snp * 6237263Snp * Redistribution and use in source and binary forms, with or without 7237263Snp * modification, are permitted provided that the following conditions 8237263Snp * are met: 9237263Snp * 1. Redistributions of source code must retain the above copyright 10237263Snp * notice, this list of conditions and the following disclaimer. 11237263Snp * 2. Redistributions in binary form must reproduce the above copyright 12237263Snp * notice, this list of conditions and the following disclaimer in the 13237263Snp * documentation and/or other materials provided with the distribution. 14237263Snp * 15237263Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16237263Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17237263Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18237263Snp * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19237263Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20237263Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21237263Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22237263Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23237263Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24237263Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25237263Snp * SUCH DAMAGE. 26237263Snp */ 27237263Snp 28237263Snp#include <sys/cdefs.h> 29237263Snp__FBSDID("$FreeBSD: head/sys/dev/cxgbe/tom/t4_tom.c 245935 2013-01-26 03:01:51Z np $"); 30237263Snp 31237263Snp#include "opt_inet.h" 32245441Snp#include "opt_inet6.h" 33237263Snp 34237263Snp#include <sys/param.h> 35237263Snp#include <sys/types.h> 36237263Snp#include <sys/systm.h> 37237263Snp#include <sys/kernel.h> 38237263Snp#include <sys/ktr.h> 39237263Snp#include <sys/module.h> 40237263Snp#include <sys/protosw.h> 41237263Snp#include <sys/domain.h> 42237263Snp#include <sys/socket.h> 43237263Snp#include <sys/socketvar.h> 44245448Snp#include <net/if.h> 45237263Snp#include <netinet/in.h> 46237263Snp#include <netinet/in_pcb.h> 47245448Snp#include <netinet/in_var.h> 48237263Snp#include <netinet/ip.h> 49245441Snp#include <netinet/ip6.h> 50237263Snp#include <netinet/tcp_var.h> 51245448Snp#include <netinet6/scope6_var.h> 52237263Snp#define TCPSTATES 53237263Snp#include <netinet/tcp_fsm.h> 54237263Snp#include <netinet/toecore.h> 55237263Snp 56237263Snp#ifdef TCP_OFFLOAD 57237263Snp#include "common/common.h" 58237263Snp#include "common/t4_msg.h" 59237263Snp#include "common/t4_regs.h" 60237263Snp#include "tom/t4_tom_l2t.h" 61237263Snp#include "tom/t4_tom.h" 62237263Snp 63239344Snpstatic struct protosw ddp_protosw; 64239344Snpstatic struct pr_usrreqs ddp_usrreqs; 65239344Snp 66245441Snpstatic struct protosw ddp6_protosw; 67245441Snpstatic struct pr_usrreqs ddp6_usrreqs; 68245441Snp 69237263Snp/* Module ops */ 70237263Snpstatic int t4_tom_mod_load(void); 71237263Snpstatic int t4_tom_mod_unload(void); 72237263Snpstatic int t4_tom_modevent(module_t, int, void *); 73237263Snp 74237263Snp/* ULD ops and helpers */ 75237263Snpstatic int t4_tom_activate(struct adapter *); 76237263Snpstatic int t4_tom_deactivate(struct adapter *); 77237263Snp 78237263Snpstatic struct uld_info tom_uld_info = { 79237263Snp .uld_id = ULD_TOM, 80237263Snp .activate = t4_tom_activate, 81237263Snp .deactivate = t4_tom_deactivate, 82237263Snp}; 83237263Snp 84237263Snpstatic void queue_tid_release(struct adapter *, int); 85237263Snpstatic void release_offload_resources(struct toepcb *); 86237263Snpstatic int alloc_tid_tabs(struct tid_info *); 87237263Snpstatic void free_tid_tabs(struct tid_info *); 88245448Snpstatic int add_lip(struct adapter *, struct in6_addr *); 89245448Snpstatic int delete_lip(struct adapter *, struct in6_addr *); 90245448Snpstatic struct clip_entry *search_lip(struct tom_data *, struct in6_addr *); 91245448Snpstatic void init_clip_table(struct adapter *, struct tom_data *); 92245448Snpstatic void destroy_clip_table(struct adapter *, struct tom_data *); 93237263Snpstatic void free_tom_data(struct adapter *, struct tom_data *); 94237263Snp 95237263Snpstruct toepcb * 96237263Snpalloc_toepcb(struct port_info *pi, int txqid, int rxqid, int flags) 97237263Snp{ 98237263Snp struct adapter *sc = pi->adapter; 99237263Snp struct toepcb *toep; 100237263Snp int tx_credits, txsd_total, len; 101237263Snp 102237263Snp /* 103237263Snp * The firmware counts tx work request credits in units of 16 bytes 104237263Snp * each. Reserve room for an ABORT_REQ so the driver never has to worry 105237263Snp * about tx credits if it wants to abort a connection. 106237263Snp */ 107237263Snp tx_credits = sc->params.ofldq_wr_cred; 108237263Snp tx_credits -= howmany(sizeof(struct cpl_abort_req), 16); 109237263Snp 110237263Snp /* 111237263Snp * Shortest possible tx work request is a fw_ofld_tx_data_wr + 1 byte 112237263Snp * immediate payload, and firmware counts tx work request credits in 113237263Snp * units of 16 byte. Calculate the maximum work requests possible. 114237263Snp */ 115237263Snp txsd_total = tx_credits / 116237263Snp howmany((sizeof(struct fw_ofld_tx_data_wr) + 1), 16); 117237263Snp 118237263Snp if (txqid < 0) 119237263Snp txqid = (arc4random() % pi->nofldtxq) + pi->first_ofld_txq; 120237263Snp KASSERT(txqid >= pi->first_ofld_txq && 121237263Snp txqid < pi->first_ofld_txq + pi->nofldtxq, 122237263Snp ("%s: txqid %d for port %p (first %d, n %d)", __func__, txqid, pi, 123237263Snp pi->first_ofld_txq, pi->nofldtxq)); 124237263Snp 125237263Snp if (rxqid < 0) 126237263Snp rxqid = (arc4random() % pi->nofldrxq) + pi->first_ofld_rxq; 127237263Snp KASSERT(rxqid >= pi->first_ofld_rxq && 128237263Snp rxqid < pi->first_ofld_rxq + pi->nofldrxq, 129237263Snp ("%s: rxqid %d for port %p (first %d, n %d)", __func__, rxqid, pi, 130237263Snp pi->first_ofld_rxq, pi->nofldrxq)); 131237263Snp 132237263Snp len = offsetof(struct toepcb, txsd) + 133237263Snp txsd_total * sizeof(struct ofld_tx_sdesc); 134237263Snp 135237263Snp toep = malloc(len, M_CXGBE, M_ZERO | flags); 136237263Snp if (toep == NULL) 137237263Snp return (NULL); 138237263Snp 139237263Snp toep->td = sc->tom_softc; 140237263Snp toep->port = pi; 141237263Snp toep->tx_credits = tx_credits; 142237263Snp toep->ofld_txq = &sc->sge.ofld_txq[txqid]; 143237263Snp toep->ofld_rxq = &sc->sge.ofld_rxq[rxqid]; 144237263Snp toep->ctrlq = &sc->sge.ctrlq[pi->port_id]; 145237263Snp toep->txsd_total = txsd_total; 146237263Snp toep->txsd_avail = txsd_total; 147237263Snp toep->txsd_pidx = 0; 148237263Snp toep->txsd_cidx = 0; 149237263Snp 150237263Snp return (toep); 151237263Snp} 152237263Snp 153237263Snpvoid 154237263Snpfree_toepcb(struct toepcb *toep) 155237263Snp{ 156237263Snp 157239514Snp KASSERT(!(toep->flags & TPF_ATTACHED), 158237263Snp ("%s: attached to an inpcb", __func__)); 159239514Snp KASSERT(!(toep->flags & TPF_CPL_PENDING), 160237263Snp ("%s: CPL pending", __func__)); 161237263Snp 162237263Snp free(toep, M_CXGBE); 163237263Snp} 164237263Snp 165237263Snp/* 166237263Snp * Set up the socket for TCP offload. 167237263Snp */ 168237263Snpvoid 169237263Snpoffload_socket(struct socket *so, struct toepcb *toep) 170237263Snp{ 171237263Snp struct tom_data *td = toep->td; 172237263Snp struct inpcb *inp = sotoinpcb(so); 173237263Snp struct tcpcb *tp = intotcpcb(inp); 174237263Snp struct sockbuf *sb; 175237263Snp 176237263Snp INP_WLOCK_ASSERT(inp); 177237263Snp 178237263Snp /* Update socket */ 179237263Snp sb = &so->so_snd; 180237263Snp SOCKBUF_LOCK(sb); 181237263Snp sb->sb_flags |= SB_NOCOALESCE; 182237263Snp SOCKBUF_UNLOCK(sb); 183237263Snp sb = &so->so_rcv; 184237263Snp SOCKBUF_LOCK(sb); 185237263Snp sb->sb_flags |= SB_NOCOALESCE; 186245441Snp if (toep->ulp_mode == ULP_MODE_TCPDDP) { 187245441Snp if (inp->inp_vflag & INP_IPV6) 188245441Snp so->so_proto = &ddp6_protosw; 189245441Snp else 190245441Snp so->so_proto = &ddp_protosw; 191245441Snp } 192237263Snp SOCKBUF_UNLOCK(sb); 193237263Snp 194237263Snp /* Update TCP PCB */ 195237263Snp tp->tod = &td->tod; 196237263Snp tp->t_toe = toep; 197237263Snp tp->t_flags |= TF_TOE; 198237263Snp 199237263Snp /* Install an extra hold on inp */ 200237263Snp toep->inp = inp; 201239514Snp toep->flags |= TPF_ATTACHED; 202237263Snp in_pcbref(inp); 203237263Snp 204237263Snp /* Add the TOE PCB to the active list */ 205237263Snp mtx_lock(&td->toep_list_lock); 206237263Snp TAILQ_INSERT_HEAD(&td->toep_list, toep, link); 207237263Snp mtx_unlock(&td->toep_list_lock); 208237263Snp} 209237263Snp 210237263Snp/* This is _not_ the normal way to "unoffload" a socket. */ 211237263Snpvoid 212237263Snpundo_offload_socket(struct socket *so) 213237263Snp{ 214237263Snp struct inpcb *inp = sotoinpcb(so); 215237263Snp struct tcpcb *tp = intotcpcb(inp); 216237263Snp struct toepcb *toep = tp->t_toe; 217237263Snp struct tom_data *td = toep->td; 218237263Snp struct sockbuf *sb; 219237263Snp 220237263Snp INP_WLOCK_ASSERT(inp); 221237263Snp 222237263Snp sb = &so->so_snd; 223237263Snp SOCKBUF_LOCK(sb); 224237263Snp sb->sb_flags &= ~SB_NOCOALESCE; 225237263Snp SOCKBUF_UNLOCK(sb); 226237263Snp sb = &so->so_rcv; 227237263Snp SOCKBUF_LOCK(sb); 228237263Snp sb->sb_flags &= ~SB_NOCOALESCE; 229237263Snp SOCKBUF_UNLOCK(sb); 230237263Snp 231237263Snp tp->tod = NULL; 232237263Snp tp->t_toe = NULL; 233237263Snp tp->t_flags &= ~TF_TOE; 234237263Snp 235237263Snp toep->inp = NULL; 236239514Snp toep->flags &= ~TPF_ATTACHED; 237237263Snp if (in_pcbrele_wlocked(inp)) 238237263Snp panic("%s: inp freed.", __func__); 239237263Snp 240237263Snp mtx_lock(&td->toep_list_lock); 241237263Snp TAILQ_REMOVE(&td->toep_list, toep, link); 242237263Snp mtx_unlock(&td->toep_list_lock); 243237263Snp} 244237263Snp 245237263Snpstatic void 246237263Snprelease_offload_resources(struct toepcb *toep) 247237263Snp{ 248237263Snp struct tom_data *td = toep->td; 249237263Snp struct adapter *sc = td_adapter(td); 250237263Snp int tid = toep->tid; 251237263Snp 252239514Snp KASSERT(!(toep->flags & TPF_CPL_PENDING), 253237263Snp ("%s: %p has CPL pending.", __func__, toep)); 254239514Snp KASSERT(!(toep->flags & TPF_ATTACHED), 255237263Snp ("%s: %p is still attached.", __func__, toep)); 256237263Snp 257245448Snp CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)", 258245448Snp __func__, toep, tid, toep->l2te, toep->ce); 259237263Snp 260239344Snp if (toep->ulp_mode == ULP_MODE_TCPDDP) 261239344Snp release_ddp_resources(toep); 262239344Snp 263237263Snp if (toep->l2te) 264237263Snp t4_l2t_release(toep->l2te); 265237263Snp 266237263Snp if (tid >= 0) { 267237263Snp remove_tid(sc, tid); 268237263Snp release_tid(sc, tid, toep->ctrlq); 269237263Snp } 270237263Snp 271245448Snp if (toep->ce) 272245448Snp release_lip(td, toep->ce); 273245448Snp 274237263Snp mtx_lock(&td->toep_list_lock); 275237263Snp TAILQ_REMOVE(&td->toep_list, toep, link); 276237263Snp mtx_unlock(&td->toep_list_lock); 277237263Snp 278237263Snp free_toepcb(toep); 279237263Snp} 280237263Snp 281237263Snp/* 282237263Snp * The kernel is done with the TCP PCB and this is our opportunity to unhook the 283237263Snp * toepcb hanging off of it. If the TOE driver is also done with the toepcb (no 284237263Snp * pending CPL) then it is time to release all resources tied to the toepcb. 285237263Snp * 286237263Snp * Also gets called when an offloaded active open fails and the TOM wants the 287237263Snp * kernel to take the TCP PCB back. 288237263Snp */ 289237263Snpstatic void 290237263Snpt4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp) 291237263Snp{ 292237263Snp#if defined(KTR) || defined(INVARIANTS) 293237263Snp struct inpcb *inp = tp->t_inpcb; 294237263Snp#endif 295237263Snp struct toepcb *toep = tp->t_toe; 296237263Snp 297237263Snp INP_WLOCK_ASSERT(inp); 298237263Snp 299237263Snp KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 300239514Snp KASSERT(toep->flags & TPF_ATTACHED, 301237263Snp ("%s: not attached", __func__)); 302237263Snp 303237263Snp#ifdef KTR 304237263Snp if (tp->t_state == TCPS_SYN_SENT) { 305237263Snp CTR6(KTR_CXGBE, "%s: atid %d, toep %p (0x%x), inp %p (0x%x)", 306237263Snp __func__, toep->tid, toep, toep->flags, inp, 307237263Snp inp->inp_flags); 308237263Snp } else { 309237263Snp CTR6(KTR_CXGBE, 310237263Snp "t4_pcb_detach: tid %d (%s), toep %p (0x%x), inp %p (0x%x)", 311237263Snp toep->tid, tcpstates[tp->t_state], toep, toep->flags, inp, 312237263Snp inp->inp_flags); 313237263Snp } 314237263Snp#endif 315237263Snp 316237263Snp tp->t_toe = NULL; 317237263Snp tp->t_flags &= ~TF_TOE; 318239514Snp toep->flags &= ~TPF_ATTACHED; 319237263Snp 320239514Snp if (!(toep->flags & TPF_CPL_PENDING)) 321237263Snp release_offload_resources(toep); 322237263Snp} 323237263Snp 324237263Snp/* 325237263Snp * The TOE driver will not receive any more CPLs for the tid associated with the 326237263Snp * toepcb; release the hold on the inpcb. 327237263Snp */ 328237263Snpvoid 329237263Snpfinal_cpl_received(struct toepcb *toep) 330237263Snp{ 331237263Snp struct inpcb *inp = toep->inp; 332237263Snp 333237263Snp KASSERT(inp != NULL, ("%s: inp is NULL", __func__)); 334237263Snp INP_WLOCK_ASSERT(inp); 335239514Snp KASSERT(toep->flags & TPF_CPL_PENDING, 336237263Snp ("%s: CPL not pending already?", __func__)); 337237263Snp 338237263Snp CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)", 339237263Snp __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags); 340237263Snp 341237263Snp toep->inp = NULL; 342239514Snp toep->flags &= ~TPF_CPL_PENDING; 343237263Snp 344239514Snp if (!(toep->flags & TPF_ATTACHED)) 345237263Snp release_offload_resources(toep); 346237263Snp 347237263Snp if (!in_pcbrele_wlocked(inp)) 348237263Snp INP_WUNLOCK(inp); 349237263Snp} 350237263Snp 351237263Snpvoid 352237263Snpinsert_tid(struct adapter *sc, int tid, void *ctx) 353237263Snp{ 354237263Snp struct tid_info *t = &sc->tids; 355237263Snp 356237263Snp t->tid_tab[tid] = ctx; 357237263Snp atomic_add_int(&t->tids_in_use, 1); 358237263Snp} 359237263Snp 360237263Snpvoid * 361237263Snplookup_tid(struct adapter *sc, int tid) 362237263Snp{ 363237263Snp struct tid_info *t = &sc->tids; 364237263Snp 365237263Snp return (t->tid_tab[tid]); 366237263Snp} 367237263Snp 368237263Snpvoid 369237263Snpupdate_tid(struct adapter *sc, int tid, void *ctx) 370237263Snp{ 371237263Snp struct tid_info *t = &sc->tids; 372237263Snp 373237263Snp t->tid_tab[tid] = ctx; 374237263Snp} 375237263Snp 376237263Snpvoid 377237263Snpremove_tid(struct adapter *sc, int tid) 378237263Snp{ 379237263Snp struct tid_info *t = &sc->tids; 380237263Snp 381237263Snp t->tid_tab[tid] = NULL; 382237263Snp atomic_subtract_int(&t->tids_in_use, 1); 383237263Snp} 384237263Snp 385237263Snpvoid 386237263Snprelease_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq) 387237263Snp{ 388237263Snp struct wrqe *wr; 389237263Snp struct cpl_tid_release *req; 390237263Snp 391237263Snp wr = alloc_wrqe(sizeof(*req), ctrlq); 392237263Snp if (wr == NULL) { 393237263Snp queue_tid_release(sc, tid); /* defer */ 394237263Snp return; 395237263Snp } 396237263Snp req = wrtod(wr); 397237263Snp 398237263Snp INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid); 399237263Snp 400237263Snp t4_wrq_tx(sc, wr); 401237263Snp} 402237263Snp 403237263Snpstatic void 404237263Snpqueue_tid_release(struct adapter *sc, int tid) 405237263Snp{ 406237263Snp 407237263Snp CXGBE_UNIMPLEMENTED("deferred tid release"); 408237263Snp} 409237263Snp 410237263Snp/* 411237263Snp * What mtu_idx to use, given a 4-tuple and/or an MSS cap 412237263Snp */ 413237263Snpint 414237263Snpfind_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss) 415237263Snp{ 416237263Snp unsigned short *mtus = &sc->params.mtus[0]; 417245441Snp int i, mss, n; 418237263Snp 419237263Snp KASSERT(inc != NULL || pmss > 0, 420237263Snp ("%s: at least one of inc/pmss must be specified", __func__)); 421237263Snp 422237263Snp mss = inc ? tcp_mssopt(inc) : pmss; 423237263Snp if (pmss > 0 && mss > pmss) 424237263Snp mss = pmss; 425237263Snp 426245441Snp if (inc->inc_flags & INC_ISIPV6) 427245441Snp n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 428245441Snp else 429245441Snp n = sizeof(struct ip) + sizeof(struct tcphdr); 430237263Snp 431245441Snp for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mss + n; i++) 432245441Snp continue; 433245441Snp 434237263Snp return (i); 435237263Snp} 436237263Snp 437237263Snp/* 438237263Snp * Determine the receive window size for a socket. 439237263Snp */ 440237263Snpu_long 441237263Snpselect_rcv_wnd(struct socket *so) 442237263Snp{ 443237263Snp unsigned long wnd; 444237263Snp 445237263Snp SOCKBUF_LOCK_ASSERT(&so->so_rcv); 446237263Snp 447237263Snp wnd = sbspace(&so->so_rcv); 448237263Snp if (wnd < MIN_RCV_WND) 449237263Snp wnd = MIN_RCV_WND; 450237263Snp 451237263Snp return min(wnd, MAX_RCV_WND); 452237263Snp} 453237263Snp 454237263Snpint 455237263Snpselect_rcv_wscale(void) 456237263Snp{ 457237263Snp int wscale = 0; 458237263Snp unsigned long space = sb_max; 459237263Snp 460237263Snp if (space > MAX_RCV_WND) 461237263Snp space = MAX_RCV_WND; 462237263Snp 463237263Snp while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space) 464237263Snp wscale++; 465237263Snp 466237263Snp return (wscale); 467237263Snp} 468237263Snp 469237263Snpextern int always_keepalive; 470237263Snp#define VIID_SMACIDX(v) (((unsigned int)(v) & 0x7f) << 1) 471237263Snp 472237263Snp/* 473237263Snp * socket so could be a listening socket too. 474237263Snp */ 475237263Snpuint64_t 476237263Snpcalc_opt0(struct socket *so, struct port_info *pi, struct l2t_entry *e, 477237263Snp int mtu_idx, int rscale, int rx_credits, int ulp_mode) 478237263Snp{ 479237263Snp uint64_t opt0; 480237263Snp 481237263Snp KASSERT(rx_credits <= M_RCV_BUFSIZ, 482237263Snp ("%s: rcv_bufsiz too high", __func__)); 483237263Snp 484237263Snp opt0 = F_TCAM_BYPASS | V_WND_SCALE(rscale) | V_MSS_IDX(mtu_idx) | 485237263Snp V_ULP_MODE(ulp_mode) | V_RCV_BUFSIZ(rx_credits); 486237263Snp 487237263Snp if (so != NULL) { 488237263Snp struct inpcb *inp = sotoinpcb(so); 489237263Snp struct tcpcb *tp = intotcpcb(inp); 490237263Snp int keepalive = always_keepalive || 491237263Snp so_options_get(so) & SO_KEEPALIVE; 492237263Snp 493237263Snp opt0 |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0); 494237263Snp opt0 |= V_KEEP_ALIVE(keepalive != 0); 495237263Snp } 496237263Snp 497237263Snp if (e != NULL) 498237263Snp opt0 |= V_L2T_IDX(e->idx); 499237263Snp 500237263Snp if (pi != NULL) { 501237263Snp opt0 |= V_SMAC_SEL(VIID_SMACIDX(pi->viid)); 502237263Snp opt0 |= V_TX_CHAN(pi->tx_chan); 503237263Snp } 504237263Snp 505237263Snp return htobe64(opt0); 506237263Snp} 507237263Snp 508237263Snp#define FILTER_SEL_WIDTH_P_FC (3 + 1) 509237263Snp#define FILTER_SEL_WIDTH_VIN_P_FC (6 + 7 + FILTER_SEL_WIDTH_P_FC) 510237263Snp#define FILTER_SEL_WIDTH_TAG_P_FC (3 + FILTER_SEL_WIDTH_VIN_P_FC) 511237263Snp#define FILTER_SEL_WIDTH_VLD_TAG_P_FC (1 + FILTER_SEL_WIDTH_TAG_P_FC) 512237263Snp#define VLAN_NONE 0xfff 513237263Snp#define FILTER_SEL_VLAN_NONE 0xffff 514237263Snp 515237263Snpuint32_t 516237263Snpselect_ntuple(struct port_info *pi, struct l2t_entry *e, uint32_t filter_mode) 517237263Snp{ 518237263Snp uint16_t viid = pi->viid; 519237263Snp uint32_t ntuple = 0; 520237263Snp 521237263Snp if (filter_mode == HW_TPL_FR_MT_PR_IV_P_FC) { 522237263Snp if (e->vlan == VLAN_NONE) 523237263Snp ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC; 524237263Snp else { 525237263Snp ntuple |= e->vlan << FILTER_SEL_WIDTH_P_FC; 526237263Snp ntuple |= 1 << FILTER_SEL_WIDTH_VLD_TAG_P_FC; 527237263Snp } 528237263Snp ntuple |= e->lport << S_PORT; 529237263Snp ntuple |= IPPROTO_TCP << FILTER_SEL_WIDTH_VLD_TAG_P_FC; 530237263Snp } else if (filter_mode == HW_TPL_FR_MT_PR_OV_P_FC) { 531237263Snp ntuple |= G_FW_VIID_VIN(viid) << FILTER_SEL_WIDTH_P_FC; 532237263Snp ntuple |= G_FW_VIID_PFN(viid) << FILTER_SEL_WIDTH_VIN_P_FC; 533237263Snp ntuple |= G_FW_VIID_VIVLD(viid) << FILTER_SEL_WIDTH_TAG_P_FC; 534237263Snp ntuple |= e->lport << S_PORT; 535237263Snp ntuple |= IPPROTO_TCP << FILTER_SEL_WIDTH_VLD_TAG_P_FC; 536237263Snp } 537237263Snp 538237263Snp return (htobe32(ntuple)); 539237263Snp} 540237263Snp 541245441Snpvoid 542245441Snpset_tcpddp_ulp_mode(struct toepcb *toep) 543245441Snp{ 544245441Snp 545245441Snp toep->ulp_mode = ULP_MODE_TCPDDP; 546245441Snp toep->ddp_flags = DDP_OK; 547245441Snp toep->ddp_score = DDP_LOW_SCORE; 548245441Snp} 549245441Snp 550245935Snpint 551245935Snpnegative_advice(int status) 552245935Snp{ 553245935Snp 554245935Snp return (status == CPL_ERR_RTX_NEG_ADVICE || 555245935Snp status == CPL_ERR_PERSIST_NEG_ADVICE || 556245935Snp status == CPL_ERR_KEEPALV_NEG_ADVICE); 557245935Snp} 558245935Snp 559237263Snpstatic int 560237263Snpalloc_tid_tabs(struct tid_info *t) 561237263Snp{ 562237263Snp size_t size; 563237263Snp unsigned int i; 564237263Snp 565237263Snp size = t->ntids * sizeof(*t->tid_tab) + 566237263Snp t->natids * sizeof(*t->atid_tab) + 567237263Snp t->nstids * sizeof(*t->stid_tab); 568237263Snp 569237263Snp t->tid_tab = malloc(size, M_CXGBE, M_ZERO | M_NOWAIT); 570237263Snp if (t->tid_tab == NULL) 571237263Snp return (ENOMEM); 572237263Snp 573237263Snp mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF); 574237263Snp t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids]; 575237263Snp t->afree = t->atid_tab; 576237263Snp t->atids_in_use = 0; 577237263Snp for (i = 1; i < t->natids; i++) 578237263Snp t->atid_tab[i - 1].next = &t->atid_tab[i]; 579237263Snp t->atid_tab[t->natids - 1].next = NULL; 580237263Snp 581237263Snp mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF); 582245276Snp t->stid_tab = (struct listen_ctx **)&t->atid_tab[t->natids]; 583237263Snp t->stids_in_use = 0; 584245276Snp TAILQ_INIT(&t->stids); 585245276Snp t->nstids_free_head = t->nstids; 586237263Snp 587237263Snp atomic_store_rel_int(&t->tids_in_use, 0); 588237263Snp 589237263Snp return (0); 590237263Snp} 591237263Snp 592237263Snpstatic void 593237263Snpfree_tid_tabs(struct tid_info *t) 594237263Snp{ 595237263Snp KASSERT(t->tids_in_use == 0, 596237263Snp ("%s: %d tids still in use.", __func__, t->tids_in_use)); 597237263Snp KASSERT(t->atids_in_use == 0, 598237263Snp ("%s: %d atids still in use.", __func__, t->atids_in_use)); 599237263Snp KASSERT(t->stids_in_use == 0, 600237263Snp ("%s: %d tids still in use.", __func__, t->stids_in_use)); 601237263Snp 602237263Snp free(t->tid_tab, M_CXGBE); 603237263Snp t->tid_tab = NULL; 604237263Snp 605237263Snp if (mtx_initialized(&t->atid_lock)) 606237263Snp mtx_destroy(&t->atid_lock); 607237263Snp if (mtx_initialized(&t->stid_lock)) 608237263Snp mtx_destroy(&t->stid_lock); 609237263Snp} 610237263Snp 611245448Snpstatic int 612245448Snpadd_lip(struct adapter *sc, struct in6_addr *lip) 613245448Snp{ 614245448Snp struct fw_clip_cmd c; 615245448Snp 616245448Snp ASSERT_SYNCHRONIZED_OP(sc); 617245448Snp /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ 618245448Snp 619245448Snp memset(&c, 0, sizeof(c)); 620245448Snp c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | 621245448Snp F_FW_CMD_WRITE); 622245448Snp c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c)); 623245448Snp c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; 624245448Snp c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; 625245448Snp 626245448Snp return (t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); 627245448Snp} 628245448Snp 629245448Snpstatic int 630245448Snpdelete_lip(struct adapter *sc, struct in6_addr *lip) 631245448Snp{ 632245448Snp struct fw_clip_cmd c; 633245448Snp 634245448Snp ASSERT_SYNCHRONIZED_OP(sc); 635245448Snp /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ 636245448Snp 637245448Snp memset(&c, 0, sizeof(c)); 638245448Snp c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | 639245448Snp F_FW_CMD_READ); 640245448Snp c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c)); 641245448Snp c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; 642245448Snp c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; 643245448Snp 644245448Snp return (t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); 645245448Snp} 646245448Snp 647245448Snpstatic struct clip_entry * 648245448Snpsearch_lip(struct tom_data *td, struct in6_addr *lip) 649245448Snp{ 650245448Snp struct clip_entry *ce; 651245448Snp 652245448Snp mtx_assert(&td->clip_table_lock, MA_OWNED); 653245448Snp 654245448Snp TAILQ_FOREACH(ce, &td->clip_table, link) { 655245448Snp if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) 656245448Snp return (ce); 657245448Snp } 658245448Snp 659245448Snp return (NULL); 660245448Snp} 661245448Snp 662245448Snpstruct clip_entry * 663245448Snphold_lip(struct tom_data *td, struct in6_addr *lip) 664245448Snp{ 665245448Snp struct clip_entry *ce; 666245448Snp 667245448Snp mtx_lock(&td->clip_table_lock); 668245448Snp ce = search_lip(td, lip); 669245448Snp if (ce != NULL) 670245448Snp ce->refcount++; 671245448Snp mtx_unlock(&td->clip_table_lock); 672245448Snp 673245448Snp return (ce); 674245448Snp} 675245448Snp 676245448Snpvoid 677245448Snprelease_lip(struct tom_data *td, struct clip_entry *ce) 678245448Snp{ 679245448Snp 680245448Snp mtx_lock(&td->clip_table_lock); 681245448Snp KASSERT(search_lip(td, &ce->lip) == ce, 682245448Snp ("%s: CLIP entry %p p not in CLIP table.", __func__, ce)); 683245448Snp KASSERT(ce->refcount > 0, 684245448Snp ("%s: CLIP entry %p has refcount 0", __func__, ce)); 685245448Snp --ce->refcount; 686245448Snp mtx_unlock(&td->clip_table_lock); 687245448Snp} 688245448Snp 689237263Snpstatic void 690245448Snpinit_clip_table(struct adapter *sc, struct tom_data *td) 691245448Snp{ 692245448Snp struct in6_ifaddr *ia; 693245448Snp struct in6_addr *lip, tlip; 694245448Snp struct clip_entry *ce; 695245448Snp 696245448Snp ASSERT_SYNCHRONIZED_OP(sc); 697245448Snp 698245448Snp mtx_init(&td->clip_table_lock, "CLIP table lock", NULL, MTX_DEF); 699245448Snp TAILQ_INIT(&td->clip_table); 700245448Snp 701245448Snp IN6_IFADDR_RLOCK(); 702245448Snp TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { 703245448Snp lip = &ia->ia_addr.sin6_addr; 704245448Snp 705245448Snp KASSERT(!IN6_IS_ADDR_MULTICAST(lip), 706245448Snp ("%s: mcast address in in6_ifaddr list", __func__)); 707245448Snp 708245448Snp if (IN6_IS_ADDR_LOOPBACK(lip)) 709245448Snp continue; 710245448Snp if (IN6_IS_SCOPE_EMBED(lip)) { 711245448Snp /* Remove the embedded scope */ 712245448Snp tlip = *lip; 713245448Snp lip = &tlip; 714245448Snp in6_clearscope(lip); 715245448Snp } 716245448Snp /* 717245448Snp * XXX: how to weed out the link local address for the loopback 718245448Snp * interface? It's fe80::1 usually (always?). 719245448Snp */ 720245448Snp 721245448Snp mtx_lock(&td->clip_table_lock); 722245448Snp if (search_lip(td, lip) == NULL) { 723245448Snp ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT); 724245448Snp memcpy(&ce->lip, lip, sizeof(ce->lip)); 725245448Snp ce->refcount = 0; 726245448Snp if (add_lip(sc, lip) == 0) 727245448Snp TAILQ_INSERT_TAIL(&td->clip_table, ce, link); 728245448Snp else 729245448Snp free(ce, M_CXGBE); 730245448Snp } 731245448Snp mtx_unlock(&td->clip_table_lock); 732245448Snp } 733245448Snp IN6_IFADDR_RUNLOCK(); 734245448Snp} 735245448Snp 736245448Snpstatic void 737245448Snpdestroy_clip_table(struct adapter *sc, struct tom_data *td) 738245448Snp{ 739245448Snp struct clip_entry *ce, *ce_temp; 740245448Snp 741245448Snp if (mtx_initialized(&td->clip_table_lock)) { 742245448Snp mtx_lock(&td->clip_table_lock); 743245448Snp TAILQ_FOREACH_SAFE(ce, &td->clip_table, link, ce_temp) { 744245448Snp KASSERT(ce->refcount == 0, 745245448Snp ("%s: CLIP entry %p still in use (%d)", __func__, 746245448Snp ce, ce->refcount)); 747245448Snp TAILQ_REMOVE(&td->clip_table, ce, link); 748245448Snp delete_lip(sc, &ce->lip); 749245448Snp free(ce, M_CXGBE); 750245448Snp } 751245448Snp mtx_unlock(&td->clip_table_lock); 752245448Snp mtx_destroy(&td->clip_table_lock); 753245448Snp } 754245448Snp} 755245448Snp 756245448Snpstatic void 757237263Snpfree_tom_data(struct adapter *sc, struct tom_data *td) 758237263Snp{ 759245448Snp 760245448Snp ASSERT_SYNCHRONIZED_OP(sc); 761245448Snp 762237263Snp KASSERT(TAILQ_EMPTY(&td->toep_list), 763237263Snp ("%s: TOE PCB list is not empty.", __func__)); 764237263Snp KASSERT(td->lctx_count == 0, 765237263Snp ("%s: lctx hash table is not empty.", __func__)); 766237263Snp 767237263Snp t4_uninit_l2t_cpl_handlers(sc); 768239344Snp t4_uninit_cpl_io_handlers(sc); 769239344Snp t4_uninit_ddp(sc, td); 770245448Snp destroy_clip_table(sc, td); 771237263Snp 772237263Snp if (td->listen_mask != 0) 773237263Snp hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask); 774237263Snp 775237263Snp if (mtx_initialized(&td->lctx_hash_lock)) 776237263Snp mtx_destroy(&td->lctx_hash_lock); 777237263Snp if (mtx_initialized(&td->toep_list_lock)) 778237263Snp mtx_destroy(&td->toep_list_lock); 779237263Snp 780237263Snp free_tid_tabs(&sc->tids); 781237263Snp free(td, M_CXGBE); 782237263Snp} 783237263Snp 784237263Snp/* 785237263Snp * Ground control to Major TOM 786237263Snp * Commencing countdown, engines on 787237263Snp */ 788237263Snpstatic int 789237263Snpt4_tom_activate(struct adapter *sc) 790237263Snp{ 791237263Snp struct tom_data *td; 792237263Snp struct toedev *tod; 793237263Snp int i, rc; 794237263Snp 795245274Snp ASSERT_SYNCHRONIZED_OP(sc); 796237263Snp 797237263Snp /* per-adapter softc for TOM */ 798237263Snp td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT); 799237263Snp if (td == NULL) 800237263Snp return (ENOMEM); 801237263Snp 802237263Snp /* List of TOE PCBs and associated lock */ 803237263Snp mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF); 804237263Snp TAILQ_INIT(&td->toep_list); 805237263Snp 806237263Snp /* Listen context */ 807237263Snp mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF); 808237263Snp td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE, 809237263Snp &td->listen_mask, HASH_NOWAIT); 810237263Snp 811237263Snp /* TID tables */ 812237263Snp rc = alloc_tid_tabs(&sc->tids); 813237263Snp if (rc != 0) 814237263Snp goto done; 815237263Snp 816245448Snp /* DDP page pods and CPL handlers */ 817239344Snp t4_init_ddp(sc, td); 818239344Snp 819245448Snp /* CLIP table for IPv6 offload */ 820245448Snp init_clip_table(sc, td); 821245448Snp 822237263Snp /* CPL handlers */ 823237263Snp t4_init_connect_cpl_handlers(sc); 824237263Snp t4_init_l2t_cpl_handlers(sc); 825237263Snp t4_init_listen_cpl_handlers(sc); 826237263Snp t4_init_cpl_io_handlers(sc); 827237263Snp 828237263Snp /* toedev ops */ 829237263Snp tod = &td->tod; 830237263Snp init_toedev(tod); 831237263Snp tod->tod_softc = sc; 832237263Snp tod->tod_connect = t4_connect; 833237263Snp tod->tod_listen_start = t4_listen_start; 834237263Snp tod->tod_listen_stop = t4_listen_stop; 835237263Snp tod->tod_rcvd = t4_rcvd; 836237263Snp tod->tod_output = t4_tod_output; 837237263Snp tod->tod_send_rst = t4_send_rst; 838237263Snp tod->tod_send_fin = t4_send_fin; 839237263Snp tod->tod_pcb_detach = t4_pcb_detach; 840237263Snp tod->tod_l2_update = t4_l2_update; 841237263Snp tod->tod_syncache_added = t4_syncache_added; 842237263Snp tod->tod_syncache_removed = t4_syncache_removed; 843237263Snp tod->tod_syncache_respond = t4_syncache_respond; 844237263Snp tod->tod_offload_socket = t4_offload_socket; 845237263Snp 846237263Snp for_each_port(sc, i) 847237263Snp TOEDEV(sc->port[i]->ifp) = &td->tod; 848237263Snp 849237263Snp sc->tom_softc = td; 850237263Snp sc->flags |= TOM_INIT_DONE; 851237263Snp register_toedev(sc->tom_softc); 852237263Snp 853237263Snpdone: 854237263Snp if (rc != 0) 855237263Snp free_tom_data(sc, td); 856237263Snp return (rc); 857237263Snp} 858237263Snp 859237263Snpstatic int 860237263Snpt4_tom_deactivate(struct adapter *sc) 861237263Snp{ 862237263Snp int rc = 0; 863237263Snp struct tom_data *td = sc->tom_softc; 864237263Snp 865245274Snp ASSERT_SYNCHRONIZED_OP(sc); 866237263Snp 867237263Snp if (td == NULL) 868237263Snp return (0); /* XXX. KASSERT? */ 869237263Snp 870237263Snp if (sc->offload_map != 0) 871237263Snp return (EBUSY); /* at least one port has IFCAP_TOE enabled */ 872237263Snp 873237263Snp mtx_lock(&td->toep_list_lock); 874237263Snp if (!TAILQ_EMPTY(&td->toep_list)) 875237263Snp rc = EBUSY; 876237263Snp mtx_unlock(&td->toep_list_lock); 877237263Snp 878237263Snp mtx_lock(&td->lctx_hash_lock); 879237263Snp if (td->lctx_count > 0) 880237263Snp rc = EBUSY; 881237263Snp mtx_unlock(&td->lctx_hash_lock); 882237263Snp 883237263Snp if (rc == 0) { 884237263Snp unregister_toedev(sc->tom_softc); 885237263Snp free_tom_data(sc, td); 886237263Snp sc->tom_softc = NULL; 887237263Snp sc->flags &= ~TOM_INIT_DONE; 888237263Snp } 889237263Snp 890237263Snp return (rc); 891237263Snp} 892237263Snp 893237263Snpstatic int 894237263Snpt4_tom_mod_load(void) 895237263Snp{ 896237263Snp int rc; 897245441Snp struct protosw *tcp_protosw, *tcp6_protosw; 898237263Snp 899239344Snp tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM); 900239344Snp if (tcp_protosw == NULL) 901239344Snp return (ENOPROTOOPT); 902239344Snp bcopy(tcp_protosw, &ddp_protosw, sizeof(ddp_protosw)); 903239344Snp bcopy(tcp_protosw->pr_usrreqs, &ddp_usrreqs, sizeof(ddp_usrreqs)); 904239344Snp ddp_usrreqs.pru_soreceive = t4_soreceive_ddp; 905239344Snp ddp_protosw.pr_usrreqs = &ddp_usrreqs; 906239344Snp 907245441Snp tcp6_protosw = pffindproto(PF_INET6, IPPROTO_TCP, SOCK_STREAM); 908245441Snp if (tcp6_protosw == NULL) 909245441Snp return (ENOPROTOOPT); 910245441Snp bcopy(tcp6_protosw, &ddp6_protosw, sizeof(ddp6_protosw)); 911245441Snp bcopy(tcp6_protosw->pr_usrreqs, &ddp6_usrreqs, sizeof(ddp6_usrreqs)); 912245441Snp ddp6_usrreqs.pru_soreceive = t4_soreceive_ddp; 913245441Snp ddp6_protosw.pr_usrreqs = &ddp6_usrreqs; 914245441Snp 915237263Snp rc = t4_register_uld(&tom_uld_info); 916237263Snp if (rc != 0) 917237263Snp t4_tom_mod_unload(); 918237263Snp 919237263Snp return (rc); 920237263Snp} 921237263Snp 922237263Snpstatic void 923237263Snptom_uninit(struct adapter *sc, void *arg __unused) 924237263Snp{ 925245274Snp if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4tomun")) 926245274Snp return; 927245274Snp 928237263Snp /* Try to free resources (works only if no port has IFCAP_TOE) */ 929237263Snp if (sc->flags & TOM_INIT_DONE) 930237263Snp t4_deactivate_uld(sc, ULD_TOM); 931245274Snp 932245274Snp end_synchronized_op(sc, LOCK_HELD); 933237263Snp} 934237263Snp 935237263Snpstatic int 936237263Snpt4_tom_mod_unload(void) 937237263Snp{ 938237263Snp t4_iterate(tom_uninit, NULL); 939237263Snp 940237263Snp if (t4_unregister_uld(&tom_uld_info) == EBUSY) 941237263Snp return (EBUSY); 942237263Snp 943237263Snp return (0); 944237263Snp} 945237263Snp#endif /* TCP_OFFLOAD */ 946237263Snp 947237263Snpstatic int 948237263Snpt4_tom_modevent(module_t mod, int cmd, void *arg) 949237263Snp{ 950237263Snp int rc = 0; 951237263Snp 952237263Snp#ifdef TCP_OFFLOAD 953237263Snp switch (cmd) { 954237263Snp case MOD_LOAD: 955237263Snp rc = t4_tom_mod_load(); 956237263Snp break; 957237263Snp 958237263Snp case MOD_UNLOAD: 959237263Snp rc = t4_tom_mod_unload(); 960237263Snp break; 961237263Snp 962237263Snp default: 963237263Snp rc = EINVAL; 964237263Snp } 965237263Snp#else 966237263Snp printf("t4_tom: compiled without TCP_OFFLOAD support.\n"); 967237263Snp rc = EOPNOTSUPP; 968237263Snp#endif 969237263Snp return (rc); 970237263Snp} 971237263Snp 972237263Snpstatic moduledata_t t4_tom_moddata= { 973237263Snp "t4_tom", 974237263Snp t4_tom_modevent, 975241394Skevlo 0 976237263Snp}; 977237263Snp 978237263SnpMODULE_VERSION(t4_tom, 1); 979237263SnpMODULE_DEPEND(t4_tom, toecore, 1, 1, 1); 980237263SnpMODULE_DEPEND(t4_tom, t4nex, 1, 1, 1); 981237263SnpDECLARE_MODULE(t4_tom, t4_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY); 982