t4_tom.c revision 346849
1/*- 2 * Copyright (c) 2012 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: stable/11/sys/dev/cxgbe/tom/t4_tom.c 346849 2019-04-28 18:36:54Z np $"); 30 31#include "opt_inet.h" 32#include "opt_inet6.h" 33 34#include <sys/param.h> 35#include <sys/types.h> 36#include <sys/systm.h> 37#include <sys/kernel.h> 38#include <sys/ktr.h> 39#include <sys/lock.h> 40#include <sys/limits.h> 41#include <sys/module.h> 42#include <sys/protosw.h> 43#include <sys/domain.h> 44#include <sys/refcount.h> 45#include <sys/rmlock.h> 46#include <sys/socket.h> 47#include <sys/socketvar.h> 48#include <sys/taskqueue.h> 49#include <net/if.h> 50#include <net/if_var.h> 51#include <net/if_types.h> 52#include <net/if_vlan_var.h> 53#include <netinet/in.h> 54#include <netinet/in_pcb.h> 55#include <netinet/in_var.h> 56#include <netinet/ip.h> 57#include <netinet/ip6.h> 58#include <netinet6/scope6_var.h> 59#define TCPSTATES 60#include <netinet/tcp_fsm.h> 61#include <netinet/tcp_timer.h> 62#include <netinet/tcp_var.h> 63#include <netinet/toecore.h> 64 65#ifdef TCP_OFFLOAD 66#include "common/common.h" 67#include "common/t4_msg.h" 68#include "common/t4_regs.h" 69#include "common/t4_regs_values.h" 70#include "common/t4_tcb.h" 71#include "tom/t4_tom_l2t.h" 72#include "tom/t4_tom.h" 73#include "tom/t4_tls.h" 74 75static struct protosw toe_protosw; 76static struct pr_usrreqs toe_usrreqs; 77 78static struct protosw toe6_protosw; 79static struct pr_usrreqs toe6_usrreqs; 80 81/* Module ops */ 82static int t4_tom_mod_load(void); 83static int t4_tom_mod_unload(void); 84static int t4_tom_modevent(module_t, int, void *); 85 86/* ULD ops and helpers */ 87static int t4_tom_activate(struct adapter *); 88static int t4_tom_deactivate(struct adapter *); 89 90static struct uld_info tom_uld_info = { 91 .uld_id = ULD_TOM, 92 .activate = t4_tom_activate, 93 .deactivate = t4_tom_deactivate, 94}; 95 96static void queue_tid_release(struct adapter *, int); 97static void release_offload_resources(struct toepcb *); 98static int alloc_tid_tabs(struct tid_info *); 99static void free_tid_tabs(struct tid_info *); 100static int add_lip(struct adapter *, struct in6_addr *); 101static int delete_lip(struct adapter *, struct in6_addr *); 102static struct clip_entry *search_lip(struct tom_data *, struct in6_addr *); 103static void init_clip_table(struct adapter *, struct tom_data *); 104static void update_clip(struct adapter *, void *); 105static void t4_clip_task(void *, int); 106static void update_clip_table(struct adapter *, struct tom_data *); 107static void destroy_clip_table(struct adapter *, struct tom_data *); 108static void free_tom_data(struct adapter *, struct tom_data *); 109static void reclaim_wr_resources(void *, int); 110 111static int in6_ifaddr_gen; 112static eventhandler_tag ifaddr_evhandler; 113static struct timeout_task clip_task; 114 115struct toepcb * 116alloc_toepcb(struct vi_info *vi, int txqid, int rxqid, int flags) 117{ 118 struct port_info *pi = vi->pi; 119 struct adapter *sc = pi->adapter; 120 struct toepcb *toep; 121 int tx_credits, txsd_total, len; 122 123 /* 124 * The firmware counts tx work request credits in units of 16 bytes 125 * each. Reserve room for an ABORT_REQ so the driver never has to worry 126 * about tx credits if it wants to abort a connection. 127 */ 128 tx_credits = sc->params.ofldq_wr_cred; 129 tx_credits -= howmany(sizeof(struct cpl_abort_req), 16); 130 131 /* 132 * Shortest possible tx work request is a fw_ofld_tx_data_wr + 1 byte 133 * immediate payload, and firmware counts tx work request credits in 134 * units of 16 byte. Calculate the maximum work requests possible. 135 */ 136 txsd_total = tx_credits / 137 howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16); 138 139 KASSERT(txqid >= vi->first_ofld_txq && 140 txqid < vi->first_ofld_txq + vi->nofldtxq, 141 ("%s: txqid %d for vi %p (first %d, n %d)", __func__, txqid, vi, 142 vi->first_ofld_txq, vi->nofldtxq)); 143 144 KASSERT(rxqid >= vi->first_ofld_rxq && 145 rxqid < vi->first_ofld_rxq + vi->nofldrxq, 146 ("%s: rxqid %d for vi %p (first %d, n %d)", __func__, rxqid, vi, 147 vi->first_ofld_rxq, vi->nofldrxq)); 148 149 len = offsetof(struct toepcb, txsd) + 150 txsd_total * sizeof(struct ofld_tx_sdesc); 151 152 toep = malloc(len, M_CXGBE, M_ZERO | flags); 153 if (toep == NULL) 154 return (NULL); 155 156 refcount_init(&toep->refcount, 1); 157 toep->td = sc->tom_softc; 158 toep->vi = vi; 159 toep->tc_idx = -1; 160 toep->tx_total = tx_credits; 161 toep->tx_credits = tx_credits; 162 toep->ofld_txq = &sc->sge.ofld_txq[txqid]; 163 toep->ofld_rxq = &sc->sge.ofld_rxq[rxqid]; 164 toep->ctrlq = &sc->sge.ctrlq[pi->port_id]; 165 mbufq_init(&toep->ulp_pduq, INT_MAX); 166 mbufq_init(&toep->ulp_pdu_reclaimq, INT_MAX); 167 toep->txsd_total = txsd_total; 168 toep->txsd_avail = txsd_total; 169 toep->txsd_pidx = 0; 170 toep->txsd_cidx = 0; 171 aiotx_init_toep(toep); 172 173 return (toep); 174} 175 176struct toepcb * 177hold_toepcb(struct toepcb *toep) 178{ 179 180 refcount_acquire(&toep->refcount); 181 return (toep); 182} 183 184void 185free_toepcb(struct toepcb *toep) 186{ 187 188 if (refcount_release(&toep->refcount) == 0) 189 return; 190 191 KASSERT(!(toep->flags & TPF_ATTACHED), 192 ("%s: attached to an inpcb", __func__)); 193 KASSERT(!(toep->flags & TPF_CPL_PENDING), 194 ("%s: CPL pending", __func__)); 195 196 if (toep->ulp_mode == ULP_MODE_TCPDDP) 197 ddp_uninit_toep(toep); 198 tls_uninit_toep(toep); 199 free(toep, M_CXGBE); 200} 201 202/* 203 * Set up the socket for TCP offload. 204 */ 205void 206offload_socket(struct socket *so, struct toepcb *toep) 207{ 208 struct tom_data *td = toep->td; 209 struct inpcb *inp = sotoinpcb(so); 210 struct tcpcb *tp = intotcpcb(inp); 211 struct sockbuf *sb; 212 213 INP_WLOCK_ASSERT(inp); 214 215 /* Update socket */ 216 sb = &so->so_snd; 217 SOCKBUF_LOCK(sb); 218 sb->sb_flags |= SB_NOCOALESCE; 219 SOCKBUF_UNLOCK(sb); 220 sb = &so->so_rcv; 221 SOCKBUF_LOCK(sb); 222 sb->sb_flags |= SB_NOCOALESCE; 223 if (inp->inp_vflag & INP_IPV6) 224 so->so_proto = &toe6_protosw; 225 else 226 so->so_proto = &toe_protosw; 227 SOCKBUF_UNLOCK(sb); 228 229 /* Update TCP PCB */ 230 tp->tod = &td->tod; 231 tp->t_toe = toep; 232 tp->t_flags |= TF_TOE; 233 234 /* Install an extra hold on inp */ 235 toep->inp = inp; 236 toep->flags |= TPF_ATTACHED; 237 in_pcbref(inp); 238 239 /* Add the TOE PCB to the active list */ 240 mtx_lock(&td->toep_list_lock); 241 TAILQ_INSERT_HEAD(&td->toep_list, toep, link); 242 mtx_unlock(&td->toep_list_lock); 243} 244 245/* This is _not_ the normal way to "unoffload" a socket. */ 246void 247undo_offload_socket(struct socket *so) 248{ 249 struct inpcb *inp = sotoinpcb(so); 250 struct tcpcb *tp = intotcpcb(inp); 251 struct toepcb *toep = tp->t_toe; 252 struct tom_data *td = toep->td; 253 struct sockbuf *sb; 254 255 INP_WLOCK_ASSERT(inp); 256 257 sb = &so->so_snd; 258 SOCKBUF_LOCK(sb); 259 sb->sb_flags &= ~SB_NOCOALESCE; 260 SOCKBUF_UNLOCK(sb); 261 sb = &so->so_rcv; 262 SOCKBUF_LOCK(sb); 263 sb->sb_flags &= ~SB_NOCOALESCE; 264 SOCKBUF_UNLOCK(sb); 265 266 tp->tod = NULL; 267 tp->t_toe = NULL; 268 tp->t_flags &= ~TF_TOE; 269 270 toep->inp = NULL; 271 toep->flags &= ~TPF_ATTACHED; 272 if (in_pcbrele_wlocked(inp)) 273 panic("%s: inp freed.", __func__); 274 275 mtx_lock(&td->toep_list_lock); 276 TAILQ_REMOVE(&td->toep_list, toep, link); 277 mtx_unlock(&td->toep_list_lock); 278} 279 280static void 281release_offload_resources(struct toepcb *toep) 282{ 283 struct tom_data *td = toep->td; 284 struct adapter *sc = td_adapter(td); 285 int tid = toep->tid; 286 287 KASSERT(!(toep->flags & TPF_CPL_PENDING), 288 ("%s: %p has CPL pending.", __func__, toep)); 289 KASSERT(!(toep->flags & TPF_ATTACHED), 290 ("%s: %p is still attached.", __func__, toep)); 291 292 CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)", 293 __func__, toep, tid, toep->l2te, toep->ce); 294 295 /* 296 * These queues should have been emptied at approximately the same time 297 * that a normal connection's socket's so_snd would have been purged or 298 * drained. Do _not_ clean up here. 299 */ 300 MPASS(mbufq_len(&toep->ulp_pduq) == 0); 301 MPASS(mbufq_len(&toep->ulp_pdu_reclaimq) == 0); 302#ifdef INVARIANTS 303 if (toep->ulp_mode == ULP_MODE_TCPDDP) 304 ddp_assert_empty(toep); 305#endif 306 307 if (toep->l2te) 308 t4_l2t_release(toep->l2te); 309 310 if (tid >= 0) { 311 remove_tid(sc, tid, toep->ce ? 2 : 1); 312 release_tid(sc, tid, toep->ctrlq); 313 } 314 315 if (toep->ce) 316 release_lip(td, toep->ce); 317 318#ifdef RATELIMIT 319 if (toep->tc_idx != -1) 320 t4_release_cl_rl_kbps(sc, toep->vi->pi->port_id, toep->tc_idx); 321#endif 322 mtx_lock(&td->toep_list_lock); 323 TAILQ_REMOVE(&td->toep_list, toep, link); 324 mtx_unlock(&td->toep_list_lock); 325 326 free_toepcb(toep); 327} 328 329/* 330 * The kernel is done with the TCP PCB and this is our opportunity to unhook the 331 * toepcb hanging off of it. If the TOE driver is also done with the toepcb (no 332 * pending CPL) then it is time to release all resources tied to the toepcb. 333 * 334 * Also gets called when an offloaded active open fails and the TOM wants the 335 * kernel to take the TCP PCB back. 336 */ 337static void 338t4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp) 339{ 340#if defined(KTR) || defined(INVARIANTS) 341 struct inpcb *inp = tp->t_inpcb; 342#endif 343 struct toepcb *toep = tp->t_toe; 344 345 INP_WLOCK_ASSERT(inp); 346 347 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 348 KASSERT(toep->flags & TPF_ATTACHED, 349 ("%s: not attached", __func__)); 350 351#ifdef KTR 352 if (tp->t_state == TCPS_SYN_SENT) { 353 CTR6(KTR_CXGBE, "%s: atid %d, toep %p (0x%x), inp %p (0x%x)", 354 __func__, toep->tid, toep, toep->flags, inp, 355 inp->inp_flags); 356 } else { 357 CTR6(KTR_CXGBE, 358 "t4_pcb_detach: tid %d (%s), toep %p (0x%x), inp %p (0x%x)", 359 toep->tid, tcpstates[tp->t_state], toep, toep->flags, inp, 360 inp->inp_flags); 361 } 362#endif 363 364 tp->t_toe = NULL; 365 tp->t_flags &= ~TF_TOE; 366 toep->flags &= ~TPF_ATTACHED; 367 368 if (!(toep->flags & TPF_CPL_PENDING)) 369 release_offload_resources(toep); 370} 371 372/* 373 * setsockopt handler. 374 */ 375static void 376t4_ctloutput(struct toedev *tod, struct tcpcb *tp, int dir, int name) 377{ 378 struct adapter *sc = tod->tod_softc; 379 struct toepcb *toep = tp->t_toe; 380 381 if (dir == SOPT_GET) 382 return; 383 384 CTR4(KTR_CXGBE, "%s: tp %p, dir %u, name %u", __func__, tp, dir, name); 385 386 switch (name) { 387 case TCP_NODELAY: 388 if (tp->t_state != TCPS_ESTABLISHED) 389 break; 390 t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_T_FLAGS, 391 V_TF_NAGLE(1), V_TF_NAGLE(tp->t_flags & TF_NODELAY ? 0 : 1), 392 0, 0); 393 break; 394 default: 395 break; 396 } 397} 398 399static inline int 400get_tcb_bit(u_char *tcb, int bit) 401{ 402 int ix, shift; 403 404 ix = 127 - (bit >> 3); 405 shift = bit & 0x7; 406 407 return ((tcb[ix] >> shift) & 1); 408} 409 410static inline uint64_t 411get_tcb_bits(u_char *tcb, int hi, int lo) 412{ 413 uint64_t rc = 0; 414 415 while (hi >= lo) { 416 rc = (rc << 1) | get_tcb_bit(tcb, hi); 417 --hi; 418 } 419 420 return (rc); 421} 422 423/* 424 * Called by the kernel to allow the TOE driver to "refine" values filled up in 425 * the tcp_info for an offloaded connection. 426 */ 427static void 428t4_tcp_info(struct toedev *tod, struct tcpcb *tp, struct tcp_info *ti) 429{ 430 int i, j, k, rc; 431 struct adapter *sc = tod->tod_softc; 432 struct toepcb *toep = tp->t_toe; 433 uint32_t addr, v; 434 uint32_t buf[TCB_SIZE / sizeof(uint32_t)]; 435 u_char *tcb, tmp; 436 437 INP_WLOCK_ASSERT(tp->t_inpcb); 438 MPASS(ti != NULL); 439 440 addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + toep->tid * TCB_SIZE; 441 rc = read_via_memwin(sc, 2, addr, &buf[0], TCB_SIZE); 442 if (rc != 0) 443 return; 444 445 tcb = (u_char *)&buf[0]; 446 for (i = 0, j = TCB_SIZE - 16; i < j; i += 16, j -= 16) { 447 for (k = 0; k < 16; k++) { 448 tmp = tcb[i + k]; 449 tcb[i + k] = tcb[j + k]; 450 tcb[j + k] = tmp; 451 } 452 } 453 454 ti->tcpi_state = get_tcb_bits(tcb, 115, 112); 455 456 v = get_tcb_bits(tcb, 271, 256); 457 ti->tcpi_rtt = tcp_ticks_to_us(sc, v); 458 459 v = get_tcb_bits(tcb, 287, 272); 460 ti->tcpi_rttvar = tcp_ticks_to_us(sc, v); 461 462 ti->tcpi_snd_ssthresh = get_tcb_bits(tcb, 487, 460); 463 ti->tcpi_snd_cwnd = get_tcb_bits(tcb, 459, 432); 464 ti->tcpi_rcv_nxt = get_tcb_bits(tcb, 553, 522); 465 466 ti->tcpi_snd_nxt = get_tcb_bits(tcb, 319, 288) - 467 get_tcb_bits(tcb, 375, 348); 468 469 /* Receive window being advertised by us. */ 470 ti->tcpi_rcv_space = get_tcb_bits(tcb, 581, 554); 471 472 /* Send window ceiling. */ 473 v = get_tcb_bits(tcb, 159, 144) << get_tcb_bits(tcb, 131, 128); 474 ti->tcpi_snd_wnd = min(v, ti->tcpi_snd_cwnd); 475} 476 477/* 478 * The TOE driver will not receive any more CPLs for the tid associated with the 479 * toepcb; release the hold on the inpcb. 480 */ 481void 482final_cpl_received(struct toepcb *toep) 483{ 484 struct inpcb *inp = toep->inp; 485 486 KASSERT(inp != NULL, ("%s: inp is NULL", __func__)); 487 INP_WLOCK_ASSERT(inp); 488 KASSERT(toep->flags & TPF_CPL_PENDING, 489 ("%s: CPL not pending already?", __func__)); 490 491 CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)", 492 __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags); 493 494 if (toep->ulp_mode == ULP_MODE_TCPDDP) 495 release_ddp_resources(toep); 496 toep->inp = NULL; 497 toep->flags &= ~TPF_CPL_PENDING; 498 mbufq_drain(&toep->ulp_pdu_reclaimq); 499 500 if (!(toep->flags & TPF_ATTACHED)) 501 release_offload_resources(toep); 502 503 if (!in_pcbrele_wlocked(inp)) 504 INP_WUNLOCK(inp); 505} 506 507void 508insert_tid(struct adapter *sc, int tid, void *ctx, int ntids) 509{ 510 struct tid_info *t = &sc->tids; 511 512 t->tid_tab[tid] = ctx; 513 atomic_add_int(&t->tids_in_use, ntids); 514} 515 516void * 517lookup_tid(struct adapter *sc, int tid) 518{ 519 struct tid_info *t = &sc->tids; 520 521 return (t->tid_tab[tid]); 522} 523 524void 525update_tid(struct adapter *sc, int tid, void *ctx) 526{ 527 struct tid_info *t = &sc->tids; 528 529 t->tid_tab[tid] = ctx; 530} 531 532void 533remove_tid(struct adapter *sc, int tid, int ntids) 534{ 535 struct tid_info *t = &sc->tids; 536 537 t->tid_tab[tid] = NULL; 538 atomic_subtract_int(&t->tids_in_use, ntids); 539} 540 541void 542release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq) 543{ 544 struct wrqe *wr; 545 struct cpl_tid_release *req; 546 547 wr = alloc_wrqe(sizeof(*req), ctrlq); 548 if (wr == NULL) { 549 queue_tid_release(sc, tid); /* defer */ 550 return; 551 } 552 req = wrtod(wr); 553 554 INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid); 555 556 t4_wrq_tx(sc, wr); 557} 558 559static void 560queue_tid_release(struct adapter *sc, int tid) 561{ 562 563 CXGBE_UNIMPLEMENTED("deferred tid release"); 564} 565 566/* 567 * What mtu_idx to use, given a 4-tuple. Note that both s->mss and tcp_mssopt 568 * have the MSS that we should advertise in our SYN. Advertised MSS doesn't 569 * account for any TCP options so the effective MSS (only payload, no headers or 570 * options) could be different. We fill up tp->t_maxseg with the effective MSS 571 * at the end of the 3-way handshake. 572 */ 573int 574find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, 575 struct offload_settings *s) 576{ 577 unsigned short *mtus = &sc->params.mtus[0]; 578 int i, mss, mtu; 579 580 MPASS(inc != NULL); 581 582 mss = s->mss > 0 ? s->mss : tcp_mssopt(inc); 583 if (inc->inc_flags & INC_ISIPV6) 584 mtu = mss + sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 585 else 586 mtu = mss + sizeof(struct ip) + sizeof(struct tcphdr); 587 588 for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mtu; i++) 589 continue; 590 591 return (i); 592} 593 594/* 595 * Determine the receive window size for a socket. 596 */ 597u_long 598select_rcv_wnd(struct socket *so) 599{ 600 unsigned long wnd; 601 602 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 603 604 wnd = sbspace(&so->so_rcv); 605 if (wnd < MIN_RCV_WND) 606 wnd = MIN_RCV_WND; 607 608 return min(wnd, MAX_RCV_WND); 609} 610 611int 612select_rcv_wscale(void) 613{ 614 int wscale = 0; 615 unsigned long space = sb_max; 616 617 if (space > MAX_RCV_WND) 618 space = MAX_RCV_WND; 619 620 while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space) 621 wscale++; 622 623 return (wscale); 624} 625 626/* 627 * socket so could be a listening socket too. 628 */ 629uint64_t 630calc_opt0(struct socket *so, struct vi_info *vi, struct l2t_entry *e, 631 int mtu_idx, int rscale, int rx_credits, int ulp_mode, 632 struct offload_settings *s) 633{ 634 int keepalive; 635 uint64_t opt0; 636 637 MPASS(so != NULL); 638 MPASS(vi != NULL); 639 KASSERT(rx_credits <= M_RCV_BUFSIZ, 640 ("%s: rcv_bufsiz too high", __func__)); 641 642 opt0 = F_TCAM_BYPASS | V_WND_SCALE(rscale) | V_MSS_IDX(mtu_idx) | 643 V_ULP_MODE(ulp_mode) | V_RCV_BUFSIZ(rx_credits) | 644 V_L2T_IDX(e->idx) | V_SMAC_SEL(vi->smt_idx) | 645 V_TX_CHAN(vi->pi->tx_chan); 646 647 keepalive = tcp_always_keepalive || so_options_get(so) & SO_KEEPALIVE; 648 opt0 |= V_KEEP_ALIVE(keepalive != 0); 649 650 if (s->nagle < 0) { 651 struct inpcb *inp = sotoinpcb(so); 652 struct tcpcb *tp = intotcpcb(inp); 653 654 opt0 |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0); 655 } else 656 opt0 |= V_NAGLE(s->nagle != 0); 657 658 return htobe64(opt0); 659} 660 661uint64_t 662select_ntuple(struct vi_info *vi, struct l2t_entry *e) 663{ 664 struct adapter *sc = vi->pi->adapter; 665 struct tp_params *tp = &sc->params.tp; 666 uint16_t viid = vi->viid; 667 uint64_t ntuple = 0; 668 669 /* 670 * Initialize each of the fields which we care about which are present 671 * in the Compressed Filter Tuple. 672 */ 673 if (tp->vlan_shift >= 0 && e->vlan != CPL_L2T_VLAN_NONE) 674 ntuple |= (uint64_t)(F_FT_VLAN_VLD | e->vlan) << tp->vlan_shift; 675 676 if (tp->port_shift >= 0) 677 ntuple |= (uint64_t)e->lport << tp->port_shift; 678 679 if (tp->protocol_shift >= 0) 680 ntuple |= (uint64_t)IPPROTO_TCP << tp->protocol_shift; 681 682 if (tp->vnic_shift >= 0) { 683 uint32_t vf = G_FW_VIID_VIN(viid); 684 uint32_t pf = G_FW_VIID_PFN(viid); 685 uint32_t vld = G_FW_VIID_VIVLD(viid); 686 687 ntuple |= (uint64_t)(V_FT_VNID_ID_VF(vf) | V_FT_VNID_ID_PF(pf) | 688 V_FT_VNID_ID_VLD(vld)) << tp->vnic_shift; 689 } 690 691 if (is_t4(sc)) 692 return (htobe32((uint32_t)ntuple)); 693 else 694 return (htobe64(V_FILTER_TUPLE(ntuple))); 695} 696 697static int 698is_tls_sock(struct socket *so, struct adapter *sc) 699{ 700 struct inpcb *inp = sotoinpcb(so); 701 int i, rc; 702 703 /* XXX: Eventually add a SO_WANT_TLS socket option perhaps? */ 704 rc = 0; 705 ADAPTER_LOCK(sc); 706 for (i = 0; i < sc->tt.num_tls_rx_ports; i++) { 707 if (inp->inp_lport == htons(sc->tt.tls_rx_ports[i]) || 708 inp->inp_fport == htons(sc->tt.tls_rx_ports[i])) { 709 rc = 1; 710 break; 711 } 712 } 713 ADAPTER_UNLOCK(sc); 714 return (rc); 715} 716 717int 718select_ulp_mode(struct socket *so, struct adapter *sc, 719 struct offload_settings *s) 720{ 721 722 if (can_tls_offload(sc) && 723 (s->tls > 0 || (s->tls < 0 && is_tls_sock(so, sc)))) 724 return (ULP_MODE_TLS); 725 else if (s->ddp > 0 || 726 (s->ddp < 0 && sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0)) 727 return (ULP_MODE_TCPDDP); 728 else 729 return (ULP_MODE_NONE); 730} 731 732void 733set_ulp_mode(struct toepcb *toep, int ulp_mode) 734{ 735 736 CTR4(KTR_CXGBE, "%s: toep %p (tid %d) ulp_mode %d", 737 __func__, toep, toep->tid, ulp_mode); 738 toep->ulp_mode = ulp_mode; 739 tls_init_toep(toep); 740 if (toep->ulp_mode == ULP_MODE_TCPDDP) 741 ddp_init_toep(toep); 742} 743 744int 745negative_advice(int status) 746{ 747 748 return (status == CPL_ERR_RTX_NEG_ADVICE || 749 status == CPL_ERR_PERSIST_NEG_ADVICE || 750 status == CPL_ERR_KEEPALV_NEG_ADVICE); 751} 752 753static int 754alloc_tid_tab(struct tid_info *t, int flags) 755{ 756 757 MPASS(t->ntids > 0); 758 MPASS(t->tid_tab == NULL); 759 760 t->tid_tab = malloc(t->ntids * sizeof(*t->tid_tab), M_CXGBE, 761 M_ZERO | flags); 762 if (t->tid_tab == NULL) 763 return (ENOMEM); 764 atomic_store_rel_int(&t->tids_in_use, 0); 765 766 return (0); 767} 768 769static void 770free_tid_tab(struct tid_info *t) 771{ 772 773 KASSERT(t->tids_in_use == 0, 774 ("%s: %d tids still in use.", __func__, t->tids_in_use)); 775 776 free(t->tid_tab, M_CXGBE); 777 t->tid_tab = NULL; 778} 779 780static int 781alloc_stid_tab(struct tid_info *t, int flags) 782{ 783 784 MPASS(t->nstids > 0); 785 MPASS(t->stid_tab == NULL); 786 787 t->stid_tab = malloc(t->nstids * sizeof(*t->stid_tab), M_CXGBE, 788 M_ZERO | flags); 789 if (t->stid_tab == NULL) 790 return (ENOMEM); 791 mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF); 792 t->stids_in_use = 0; 793 TAILQ_INIT(&t->stids); 794 t->nstids_free_head = t->nstids; 795 796 return (0); 797} 798 799static void 800free_stid_tab(struct tid_info *t) 801{ 802 803 KASSERT(t->stids_in_use == 0, 804 ("%s: %d tids still in use.", __func__, t->stids_in_use)); 805 806 if (mtx_initialized(&t->stid_lock)) 807 mtx_destroy(&t->stid_lock); 808 free(t->stid_tab, M_CXGBE); 809 t->stid_tab = NULL; 810} 811 812static void 813free_tid_tabs(struct tid_info *t) 814{ 815 816 free_tid_tab(t); 817 free_atid_tab(t); 818 free_stid_tab(t); 819} 820 821static int 822alloc_tid_tabs(struct tid_info *t) 823{ 824 int rc; 825 826 rc = alloc_tid_tab(t, M_NOWAIT); 827 if (rc != 0) 828 goto failed; 829 830 rc = alloc_atid_tab(t, M_NOWAIT); 831 if (rc != 0) 832 goto failed; 833 834 rc = alloc_stid_tab(t, M_NOWAIT); 835 if (rc != 0) 836 goto failed; 837 838 return (0); 839failed: 840 free_tid_tabs(t); 841 return (rc); 842} 843 844static int 845add_lip(struct adapter *sc, struct in6_addr *lip) 846{ 847 struct fw_clip_cmd c; 848 849 ASSERT_SYNCHRONIZED_OP(sc); 850 /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ 851 852 memset(&c, 0, sizeof(c)); 853 c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | 854 F_FW_CMD_WRITE); 855 c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c)); 856 c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; 857 c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; 858 859 return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); 860} 861 862static int 863delete_lip(struct adapter *sc, struct in6_addr *lip) 864{ 865 struct fw_clip_cmd c; 866 867 ASSERT_SYNCHRONIZED_OP(sc); 868 /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ 869 870 memset(&c, 0, sizeof(c)); 871 c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | 872 F_FW_CMD_READ); 873 c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c)); 874 c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; 875 c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; 876 877 return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); 878} 879 880static struct clip_entry * 881search_lip(struct tom_data *td, struct in6_addr *lip) 882{ 883 struct clip_entry *ce; 884 885 mtx_assert(&td->clip_table_lock, MA_OWNED); 886 887 TAILQ_FOREACH(ce, &td->clip_table, link) { 888 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) 889 return (ce); 890 } 891 892 return (NULL); 893} 894 895struct clip_entry * 896hold_lip(struct tom_data *td, struct in6_addr *lip, struct clip_entry *ce) 897{ 898 899 mtx_lock(&td->clip_table_lock); 900 if (ce == NULL) 901 ce = search_lip(td, lip); 902 if (ce != NULL) 903 ce->refcount++; 904 mtx_unlock(&td->clip_table_lock); 905 906 return (ce); 907} 908 909void 910release_lip(struct tom_data *td, struct clip_entry *ce) 911{ 912 913 mtx_lock(&td->clip_table_lock); 914 KASSERT(search_lip(td, &ce->lip) == ce, 915 ("%s: CLIP entry %p p not in CLIP table.", __func__, ce)); 916 KASSERT(ce->refcount > 0, 917 ("%s: CLIP entry %p has refcount 0", __func__, ce)); 918 --ce->refcount; 919 mtx_unlock(&td->clip_table_lock); 920} 921 922static void 923init_clip_table(struct adapter *sc, struct tom_data *td) 924{ 925 926 ASSERT_SYNCHRONIZED_OP(sc); 927 928 mtx_init(&td->clip_table_lock, "CLIP table lock", NULL, MTX_DEF); 929 TAILQ_INIT(&td->clip_table); 930 td->clip_gen = -1; 931 932 update_clip_table(sc, td); 933} 934 935static void 936update_clip(struct adapter *sc, void *arg __unused) 937{ 938 939 if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4tomuc")) 940 return; 941 942 if (uld_active(sc, ULD_TOM)) 943 update_clip_table(sc, sc->tom_softc); 944 945 end_synchronized_op(sc, LOCK_HELD); 946} 947 948static void 949t4_clip_task(void *arg, int count) 950{ 951 952 t4_iterate(update_clip, NULL); 953} 954 955static void 956update_clip_table(struct adapter *sc, struct tom_data *td) 957{ 958 struct rm_priotracker in6_ifa_tracker; 959 struct in6_ifaddr *ia; 960 struct in6_addr *lip, tlip; 961 struct clip_head stale; 962 struct clip_entry *ce, *ce_temp; 963 struct vi_info *vi; 964 int rc, gen, i, j; 965 uintptr_t last_vnet; 966 967 ASSERT_SYNCHRONIZED_OP(sc); 968 969 IN6_IFADDR_RLOCK(&in6_ifa_tracker); 970 mtx_lock(&td->clip_table_lock); 971 972 gen = atomic_load_acq_int(&in6_ifaddr_gen); 973 if (gen == td->clip_gen) 974 goto done; 975 976 TAILQ_INIT(&stale); 977 TAILQ_CONCAT(&stale, &td->clip_table, link); 978 979 /* 980 * last_vnet optimizes the common cases where all if_vnet = NULL (no 981 * VIMAGE) or all if_vnet = vnet0. 982 */ 983 last_vnet = (uintptr_t)(-1); 984 for_each_port(sc, i) 985 for_each_vi(sc->port[i], j, vi) { 986 if (last_vnet == (uintptr_t)vi->ifp->if_vnet) 987 continue; 988 989 /* XXX: races with if_vmove */ 990 CURVNET_SET(vi->ifp->if_vnet); 991 TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { 992 lip = &ia->ia_addr.sin6_addr; 993 994 KASSERT(!IN6_IS_ADDR_MULTICAST(lip), 995 ("%s: mcast address in in6_ifaddr list", __func__)); 996 997 if (IN6_IS_ADDR_LOOPBACK(lip)) 998 continue; 999 if (IN6_IS_SCOPE_EMBED(lip)) { 1000 /* Remove the embedded scope */ 1001 tlip = *lip; 1002 lip = &tlip; 1003 in6_clearscope(lip); 1004 } 1005 /* 1006 * XXX: how to weed out the link local address for the 1007 * loopback interface? It's fe80::1 usually (always?). 1008 */ 1009 1010 /* 1011 * If it's in the main list then we already know it's 1012 * not stale. 1013 */ 1014 TAILQ_FOREACH(ce, &td->clip_table, link) { 1015 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) 1016 goto next; 1017 } 1018 1019 /* 1020 * If it's in the stale list we should move it to the 1021 * main list. 1022 */ 1023 TAILQ_FOREACH(ce, &stale, link) { 1024 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) { 1025 TAILQ_REMOVE(&stale, ce, link); 1026 TAILQ_INSERT_TAIL(&td->clip_table, ce, 1027 link); 1028 goto next; 1029 } 1030 } 1031 1032 /* A new IP6 address; add it to the CLIP table */ 1033 ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT); 1034 memcpy(&ce->lip, lip, sizeof(ce->lip)); 1035 ce->refcount = 0; 1036 rc = add_lip(sc, lip); 1037 if (rc == 0) 1038 TAILQ_INSERT_TAIL(&td->clip_table, ce, link); 1039 else { 1040 char ip[INET6_ADDRSTRLEN]; 1041 1042 inet_ntop(AF_INET6, &ce->lip, &ip[0], 1043 sizeof(ip)); 1044 log(LOG_ERR, "%s: could not add %s (%d)\n", 1045 __func__, ip, rc); 1046 free(ce, M_CXGBE); 1047 } 1048next: 1049 continue; 1050 } 1051 CURVNET_RESTORE(); 1052 last_vnet = (uintptr_t)vi->ifp->if_vnet; 1053 } 1054 1055 /* 1056 * Remove stale addresses (those no longer in V_in6_ifaddrhead) that are 1057 * no longer referenced by the driver. 1058 */ 1059 TAILQ_FOREACH_SAFE(ce, &stale, link, ce_temp) { 1060 if (ce->refcount == 0) { 1061 rc = delete_lip(sc, &ce->lip); 1062 if (rc == 0) { 1063 TAILQ_REMOVE(&stale, ce, link); 1064 free(ce, M_CXGBE); 1065 } else { 1066 char ip[INET6_ADDRSTRLEN]; 1067 1068 inet_ntop(AF_INET6, &ce->lip, &ip[0], 1069 sizeof(ip)); 1070 log(LOG_ERR, "%s: could not delete %s (%d)\n", 1071 __func__, ip, rc); 1072 } 1073 } 1074 } 1075 /* The ones that are still referenced need to stay in the CLIP table */ 1076 TAILQ_CONCAT(&td->clip_table, &stale, link); 1077 1078 td->clip_gen = gen; 1079done: 1080 mtx_unlock(&td->clip_table_lock); 1081 IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); 1082} 1083 1084static void 1085destroy_clip_table(struct adapter *sc, struct tom_data *td) 1086{ 1087 struct clip_entry *ce, *ce_temp; 1088 1089 if (mtx_initialized(&td->clip_table_lock)) { 1090 mtx_lock(&td->clip_table_lock); 1091 TAILQ_FOREACH_SAFE(ce, &td->clip_table, link, ce_temp) { 1092 KASSERT(ce->refcount == 0, 1093 ("%s: CLIP entry %p still in use (%d)", __func__, 1094 ce, ce->refcount)); 1095 TAILQ_REMOVE(&td->clip_table, ce, link); 1096 delete_lip(sc, &ce->lip); 1097 free(ce, M_CXGBE); 1098 } 1099 mtx_unlock(&td->clip_table_lock); 1100 mtx_destroy(&td->clip_table_lock); 1101 } 1102} 1103 1104static void 1105free_tom_data(struct adapter *sc, struct tom_data *td) 1106{ 1107 1108 ASSERT_SYNCHRONIZED_OP(sc); 1109 1110 KASSERT(TAILQ_EMPTY(&td->toep_list), 1111 ("%s: TOE PCB list is not empty.", __func__)); 1112 KASSERT(td->lctx_count == 0, 1113 ("%s: lctx hash table is not empty.", __func__)); 1114 1115 t4_free_ppod_region(&td->pr); 1116 destroy_clip_table(sc, td); 1117 1118 if (td->listen_mask != 0) 1119 hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask); 1120 1121 if (mtx_initialized(&td->unsent_wr_lock)) 1122 mtx_destroy(&td->unsent_wr_lock); 1123 if (mtx_initialized(&td->lctx_hash_lock)) 1124 mtx_destroy(&td->lctx_hash_lock); 1125 if (mtx_initialized(&td->toep_list_lock)) 1126 mtx_destroy(&td->toep_list_lock); 1127 1128 free_tid_tabs(&sc->tids); 1129 free(td, M_CXGBE); 1130} 1131 1132static char * 1133prepare_pkt(int open_type, uint16_t vtag, struct inpcb *inp, int *pktlen, 1134 int *buflen) 1135{ 1136 char *pkt; 1137 struct tcphdr *th; 1138 int ipv6, len; 1139 const int maxlen = 1140 max(sizeof(struct ether_header), sizeof(struct ether_vlan_header)) + 1141 max(sizeof(struct ip), sizeof(struct ip6_hdr)) + 1142 sizeof(struct tcphdr); 1143 1144 MPASS(open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN); 1145 1146 pkt = malloc(maxlen, M_CXGBE, M_ZERO | M_NOWAIT); 1147 if (pkt == NULL) 1148 return (NULL); 1149 1150 ipv6 = inp->inp_vflag & INP_IPV6; 1151 len = 0; 1152 1153 if (vtag == 0xffff) { 1154 struct ether_header *eh = (void *)pkt; 1155 1156 if (ipv6) 1157 eh->ether_type = htons(ETHERTYPE_IPV6); 1158 else 1159 eh->ether_type = htons(ETHERTYPE_IP); 1160 1161 len += sizeof(*eh); 1162 } else { 1163 struct ether_vlan_header *evh = (void *)pkt; 1164 1165 evh->evl_encap_proto = htons(ETHERTYPE_VLAN); 1166 evh->evl_tag = htons(vtag); 1167 if (ipv6) 1168 evh->evl_proto = htons(ETHERTYPE_IPV6); 1169 else 1170 evh->evl_proto = htons(ETHERTYPE_IP); 1171 1172 len += sizeof(*evh); 1173 } 1174 1175 if (ipv6) { 1176 struct ip6_hdr *ip6 = (void *)&pkt[len]; 1177 1178 ip6->ip6_vfc = IPV6_VERSION; 1179 ip6->ip6_plen = htons(sizeof(struct tcphdr)); 1180 ip6->ip6_nxt = IPPROTO_TCP; 1181 if (open_type == OPEN_TYPE_ACTIVE) { 1182 ip6->ip6_src = inp->in6p_laddr; 1183 ip6->ip6_dst = inp->in6p_faddr; 1184 } else if (open_type == OPEN_TYPE_LISTEN) { 1185 ip6->ip6_src = inp->in6p_laddr; 1186 ip6->ip6_dst = ip6->ip6_src; 1187 } 1188 1189 len += sizeof(*ip6); 1190 } else { 1191 struct ip *ip = (void *)&pkt[len]; 1192 1193 ip->ip_v = IPVERSION; 1194 ip->ip_hl = sizeof(*ip) >> 2; 1195 ip->ip_tos = inp->inp_ip_tos; 1196 ip->ip_len = htons(sizeof(struct ip) + sizeof(struct tcphdr)); 1197 ip->ip_ttl = inp->inp_ip_ttl; 1198 ip->ip_p = IPPROTO_TCP; 1199 if (open_type == OPEN_TYPE_ACTIVE) { 1200 ip->ip_src = inp->inp_laddr; 1201 ip->ip_dst = inp->inp_faddr; 1202 } else if (open_type == OPEN_TYPE_LISTEN) { 1203 ip->ip_src = inp->inp_laddr; 1204 ip->ip_dst = ip->ip_src; 1205 } 1206 1207 len += sizeof(*ip); 1208 } 1209 1210 th = (void *)&pkt[len]; 1211 if (open_type == OPEN_TYPE_ACTIVE) { 1212 th->th_sport = inp->inp_lport; /* network byte order already */ 1213 th->th_dport = inp->inp_fport; /* ditto */ 1214 } else if (open_type == OPEN_TYPE_LISTEN) { 1215 th->th_sport = inp->inp_lport; /* network byte order already */ 1216 th->th_dport = th->th_sport; 1217 } 1218 len += sizeof(th); 1219 1220 *pktlen = *buflen = len; 1221 return (pkt); 1222} 1223 1224const struct offload_settings * 1225lookup_offload_policy(struct adapter *sc, int open_type, struct mbuf *m, 1226 uint16_t vtag, struct inpcb *inp) 1227{ 1228 const struct t4_offload_policy *op; 1229 char *pkt; 1230 struct offload_rule *r; 1231 int i, matched, pktlen, buflen; 1232 static const struct offload_settings allow_offloading_settings = { 1233 .offload = 1, 1234 .rx_coalesce = -1, 1235 .cong_algo = -1, 1236 .sched_class = -1, 1237 .tstamp = -1, 1238 .sack = -1, 1239 .nagle = -1, 1240 .ecn = -1, 1241 .ddp = -1, 1242 .tls = -1, 1243 .txq = -1, 1244 .rxq = -1, 1245 .mss = -1, 1246 }; 1247 static const struct offload_settings disallow_offloading_settings = { 1248 .offload = 0, 1249 /* rest is irrelevant when offload is off. */ 1250 }; 1251 1252 rw_assert(&sc->policy_lock, RA_LOCKED); 1253 1254 /* 1255 * If there's no Connection Offloading Policy attached to the device 1256 * then we need to return a default static policy. If 1257 * "cop_managed_offloading" is true, then we need to disallow 1258 * offloading until a COP is attached to the device. Otherwise we 1259 * allow offloading ... 1260 */ 1261 op = sc->policy; 1262 if (op == NULL) { 1263 if (sc->tt.cop_managed_offloading) 1264 return (&disallow_offloading_settings); 1265 else 1266 return (&allow_offloading_settings); 1267 } 1268 1269 switch (open_type) { 1270 case OPEN_TYPE_ACTIVE: 1271 case OPEN_TYPE_LISTEN: 1272 pkt = prepare_pkt(open_type, vtag, inp, &pktlen, &buflen); 1273 break; 1274 case OPEN_TYPE_PASSIVE: 1275 MPASS(m != NULL); 1276 pkt = mtod(m, char *); 1277 MPASS(*pkt == CPL_PASS_ACCEPT_REQ); 1278 pkt += sizeof(struct cpl_pass_accept_req); 1279 pktlen = m->m_pkthdr.len - sizeof(struct cpl_pass_accept_req); 1280 buflen = m->m_len - sizeof(struct cpl_pass_accept_req); 1281 break; 1282 default: 1283 MPASS(0); 1284 return (&disallow_offloading_settings); 1285 } 1286 1287 if (pkt == NULL || pktlen == 0 || buflen == 0) 1288 return (&disallow_offloading_settings); 1289 1290 r = &op->rule[0]; 1291 for (i = 0; i < op->nrules; i++, r++) { 1292 if (r->open_type != open_type && 1293 r->open_type != OPEN_TYPE_DONTCARE) { 1294 continue; 1295 } 1296 matched = bpf_filter(r->bpf_prog.bf_insns, pkt, pktlen, buflen); 1297 if (matched) 1298 break; 1299 } 1300 1301 if (open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN) 1302 free(pkt, M_CXGBE); 1303 1304 return (matched ? &r->settings : &disallow_offloading_settings); 1305} 1306 1307static void 1308reclaim_wr_resources(void *arg, int count) 1309{ 1310 struct tom_data *td = arg; 1311 STAILQ_HEAD(, wrqe) twr_list = STAILQ_HEAD_INITIALIZER(twr_list); 1312 struct cpl_act_open_req *cpl; 1313 u_int opcode, atid; 1314 struct wrqe *wr; 1315 struct adapter *sc; 1316 1317 mtx_lock(&td->unsent_wr_lock); 1318 STAILQ_SWAP(&td->unsent_wr_list, &twr_list, wrqe); 1319 mtx_unlock(&td->unsent_wr_lock); 1320 1321 while ((wr = STAILQ_FIRST(&twr_list)) != NULL) { 1322 STAILQ_REMOVE_HEAD(&twr_list, link); 1323 1324 cpl = wrtod(wr); 1325 opcode = GET_OPCODE(cpl); 1326 1327 switch (opcode) { 1328 case CPL_ACT_OPEN_REQ: 1329 case CPL_ACT_OPEN_REQ6: 1330 atid = G_TID_TID(be32toh(OPCODE_TID(cpl))); 1331 sc = td_adapter(td); 1332 1333 CTR2(KTR_CXGBE, "%s: atid %u ", __func__, atid); 1334 act_open_failure_cleanup(sc, atid, EHOSTUNREACH); 1335 free(wr, M_CXGBE); 1336 break; 1337 default: 1338 log(LOG_ERR, "%s: leaked work request %p, wr_len %d, " 1339 "opcode %x\n", __func__, wr, wr->wr_len, opcode); 1340 /* WR not freed here; go look at it with a debugger. */ 1341 } 1342 } 1343} 1344 1345/* 1346 * Ground control to Major TOM 1347 * Commencing countdown, engines on 1348 */ 1349static int 1350t4_tom_activate(struct adapter *sc) 1351{ 1352 struct tom_data *td; 1353 struct toedev *tod; 1354 struct vi_info *vi; 1355 struct sge_ofld_rxq *ofld_rxq; 1356 int i, j, rc, v; 1357 1358 ASSERT_SYNCHRONIZED_OP(sc); 1359 1360 /* per-adapter softc for TOM */ 1361 td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT); 1362 if (td == NULL) 1363 return (ENOMEM); 1364 1365 /* List of TOE PCBs and associated lock */ 1366 mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF); 1367 TAILQ_INIT(&td->toep_list); 1368 1369 /* Listen context */ 1370 mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF); 1371 td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE, 1372 &td->listen_mask, HASH_NOWAIT); 1373 1374 /* List of WRs for which L2 resolution failed */ 1375 mtx_init(&td->unsent_wr_lock, "Unsent WR list lock", NULL, MTX_DEF); 1376 STAILQ_INIT(&td->unsent_wr_list); 1377 TASK_INIT(&td->reclaim_wr_resources, 0, reclaim_wr_resources, td); 1378 1379 /* TID tables */ 1380 rc = alloc_tid_tabs(&sc->tids); 1381 if (rc != 0) 1382 goto done; 1383 1384 rc = t4_init_ppod_region(&td->pr, &sc->vres.ddp, 1385 t4_read_reg(sc, A_ULP_RX_TDDP_PSZ), "TDDP page pods"); 1386 if (rc != 0) 1387 goto done; 1388 t4_set_reg_field(sc, A_ULP_RX_TDDP_TAGMASK, 1389 V_TDDPTAGMASK(M_TDDPTAGMASK), td->pr.pr_tag_mask); 1390 1391 /* CLIP table for IPv6 offload */ 1392 init_clip_table(sc, td); 1393 1394 /* toedev ops */ 1395 tod = &td->tod; 1396 init_toedev(tod); 1397 tod->tod_softc = sc; 1398 tod->tod_connect = t4_connect; 1399 tod->tod_listen_start = t4_listen_start; 1400 tod->tod_listen_stop = t4_listen_stop; 1401 tod->tod_rcvd = t4_rcvd; 1402 tod->tod_output = t4_tod_output; 1403 tod->tod_send_rst = t4_send_rst; 1404 tod->tod_send_fin = t4_send_fin; 1405 tod->tod_pcb_detach = t4_pcb_detach; 1406 tod->tod_l2_update = t4_l2_update; 1407 tod->tod_syncache_added = t4_syncache_added; 1408 tod->tod_syncache_removed = t4_syncache_removed; 1409 tod->tod_syncache_respond = t4_syncache_respond; 1410 tod->tod_offload_socket = t4_offload_socket; 1411 tod->tod_ctloutput = t4_ctloutput; 1412#if 0 1413 tod->tod_tcp_info = t4_tcp_info; 1414#else 1415 (void)&t4_tcp_info; 1416#endif 1417 1418 for_each_port(sc, i) { 1419 for_each_vi(sc->port[i], v, vi) { 1420 TOEDEV(vi->ifp) = &td->tod; 1421 for_each_ofld_rxq(vi, j, ofld_rxq) { 1422 ofld_rxq->iq.set_tcb_rpl = do_set_tcb_rpl; 1423 ofld_rxq->iq.l2t_write_rpl = do_l2t_write_rpl2; 1424 } 1425 } 1426 } 1427 1428 sc->tom_softc = td; 1429 register_toedev(sc->tom_softc); 1430 1431done: 1432 if (rc != 0) 1433 free_tom_data(sc, td); 1434 return (rc); 1435} 1436 1437static int 1438t4_tom_deactivate(struct adapter *sc) 1439{ 1440 int rc = 0; 1441 struct tom_data *td = sc->tom_softc; 1442 1443 ASSERT_SYNCHRONIZED_OP(sc); 1444 1445 if (td == NULL) 1446 return (0); /* XXX. KASSERT? */ 1447 1448 if (sc->offload_map != 0) 1449 return (EBUSY); /* at least one port has IFCAP_TOE enabled */ 1450 1451 if (uld_active(sc, ULD_IWARP) || uld_active(sc, ULD_ISCSI)) 1452 return (EBUSY); /* both iWARP and iSCSI rely on the TOE. */ 1453 1454 mtx_lock(&td->toep_list_lock); 1455 if (!TAILQ_EMPTY(&td->toep_list)) 1456 rc = EBUSY; 1457 mtx_unlock(&td->toep_list_lock); 1458 1459 mtx_lock(&td->lctx_hash_lock); 1460 if (td->lctx_count > 0) 1461 rc = EBUSY; 1462 mtx_unlock(&td->lctx_hash_lock); 1463 1464 taskqueue_drain(taskqueue_thread, &td->reclaim_wr_resources); 1465 mtx_lock(&td->unsent_wr_lock); 1466 if (!STAILQ_EMPTY(&td->unsent_wr_list)) 1467 rc = EBUSY; 1468 mtx_unlock(&td->unsent_wr_lock); 1469 1470 if (rc == 0) { 1471 unregister_toedev(sc->tom_softc); 1472 free_tom_data(sc, td); 1473 sc->tom_softc = NULL; 1474 } 1475 1476 return (rc); 1477} 1478 1479static void 1480t4_tom_ifaddr_event(void *arg __unused, struct ifnet *ifp) 1481{ 1482 1483 atomic_add_rel_int(&in6_ifaddr_gen, 1); 1484 taskqueue_enqueue_timeout(taskqueue_thread, &clip_task, -hz / 4); 1485} 1486 1487static int 1488t4_aio_queue_tom(struct socket *so, struct kaiocb *job) 1489{ 1490 struct tcpcb *tp = so_sototcpcb(so); 1491 struct toepcb *toep = tp->t_toe; 1492 int error; 1493 1494 if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1495 error = t4_aio_queue_ddp(so, job); 1496 if (error != EOPNOTSUPP) 1497 return (error); 1498 } 1499 1500 return (t4_aio_queue_aiotx(so, job)); 1501} 1502 1503static int 1504t4_ctloutput_tom(struct socket *so, struct sockopt *sopt) 1505{ 1506 1507 if (sopt->sopt_level != IPPROTO_TCP) 1508 return (tcp_ctloutput(so, sopt)); 1509 1510 switch (sopt->sopt_name) { 1511 case TCP_TLSOM_SET_TLS_CONTEXT: 1512 case TCP_TLSOM_GET_TLS_TOM: 1513 case TCP_TLSOM_CLR_TLS_TOM: 1514 case TCP_TLSOM_CLR_QUIES: 1515 return (t4_ctloutput_tls(so, sopt)); 1516 default: 1517 return (tcp_ctloutput(so, sopt)); 1518 } 1519} 1520 1521static int 1522t4_tom_mod_load(void) 1523{ 1524 struct protosw *tcp_protosw, *tcp6_protosw; 1525 1526 /* CPL handlers */ 1527 t4_init_connect_cpl_handlers(); 1528 t4_init_listen_cpl_handlers(); 1529 t4_init_cpl_io_handlers(); 1530 1531 t4_ddp_mod_load(); 1532 t4_tls_mod_load(); 1533 1534 tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM); 1535 if (tcp_protosw == NULL) 1536 return (ENOPROTOOPT); 1537 bcopy(tcp_protosw, &toe_protosw, sizeof(toe_protosw)); 1538 bcopy(tcp_protosw->pr_usrreqs, &toe_usrreqs, sizeof(toe_usrreqs)); 1539 toe_usrreqs.pru_aio_queue = t4_aio_queue_tom; 1540 toe_protosw.pr_ctloutput = t4_ctloutput_tom; 1541 toe_protosw.pr_usrreqs = &toe_usrreqs; 1542 1543 tcp6_protosw = pffindproto(PF_INET6, IPPROTO_TCP, SOCK_STREAM); 1544 if (tcp6_protosw == NULL) 1545 return (ENOPROTOOPT); 1546 bcopy(tcp6_protosw, &toe6_protosw, sizeof(toe6_protosw)); 1547 bcopy(tcp6_protosw->pr_usrreqs, &toe6_usrreqs, sizeof(toe6_usrreqs)); 1548 toe6_usrreqs.pru_aio_queue = t4_aio_queue_tom; 1549 toe6_protosw.pr_ctloutput = t4_ctloutput_tom; 1550 toe6_protosw.pr_usrreqs = &toe6_usrreqs; 1551 1552 TIMEOUT_TASK_INIT(taskqueue_thread, &clip_task, 0, t4_clip_task, NULL); 1553 ifaddr_evhandler = EVENTHANDLER_REGISTER(ifaddr_event, 1554 t4_tom_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY); 1555 1556 return (t4_register_uld(&tom_uld_info)); 1557} 1558 1559static void 1560tom_uninit(struct adapter *sc, void *arg __unused) 1561{ 1562 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tomun")) 1563 return; 1564 1565 /* Try to free resources (works only if no port has IFCAP_TOE) */ 1566 if (uld_active(sc, ULD_TOM)) 1567 t4_deactivate_uld(sc, ULD_TOM); 1568 1569 end_synchronized_op(sc, 0); 1570} 1571 1572static int 1573t4_tom_mod_unload(void) 1574{ 1575 t4_iterate(tom_uninit, NULL); 1576 1577 if (t4_unregister_uld(&tom_uld_info) == EBUSY) 1578 return (EBUSY); 1579 1580 if (ifaddr_evhandler) { 1581 EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_evhandler); 1582 taskqueue_cancel_timeout(taskqueue_thread, &clip_task, NULL); 1583 } 1584 1585 t4_tls_mod_unload(); 1586 t4_ddp_mod_unload(); 1587 1588 t4_uninit_connect_cpl_handlers(); 1589 t4_uninit_listen_cpl_handlers(); 1590 t4_uninit_cpl_io_handlers(); 1591 1592 return (0); 1593} 1594#endif /* TCP_OFFLOAD */ 1595 1596static int 1597t4_tom_modevent(module_t mod, int cmd, void *arg) 1598{ 1599 int rc = 0; 1600 1601#ifdef TCP_OFFLOAD 1602 switch (cmd) { 1603 case MOD_LOAD: 1604 rc = t4_tom_mod_load(); 1605 break; 1606 1607 case MOD_UNLOAD: 1608 rc = t4_tom_mod_unload(); 1609 break; 1610 1611 default: 1612 rc = EINVAL; 1613 } 1614#else 1615 printf("t4_tom: compiled without TCP_OFFLOAD support.\n"); 1616 rc = EOPNOTSUPP; 1617#endif 1618 return (rc); 1619} 1620 1621static moduledata_t t4_tom_moddata= { 1622 "t4_tom", 1623 t4_tom_modevent, 1624 0 1625}; 1626 1627MODULE_VERSION(t4_tom, 1); 1628MODULE_DEPEND(t4_tom, toecore, 1, 1, 1); 1629MODULE_DEPEND(t4_tom, t4nex, 1, 1, 1); 1630DECLARE_MODULE(t4_tom, t4_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY); 1631