1/*- 2 * Copyright (c) 2012 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h>
| 1/*- 2 * Copyright (c) 2012 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h>
|
29__FBSDID("$FreeBSD: head/sys/dev/cxgbe/tom/t4_listen.c 245937 2013-01-26 03:23:28Z np $");
| 29__FBSDID("$FreeBSD: head/sys/dev/cxgbe/tom/t4_listen.c 248925 2013-03-30 02:26:20Z np $");
|
30 31#include "opt_inet.h" 32#include "opt_inet6.h" 33 34#ifdef TCP_OFFLOAD 35#include <sys/param.h> 36#include <sys/types.h> 37#include <sys/kernel.h> 38#include <sys/ktr.h> 39#include <sys/module.h> 40#include <sys/protosw.h> 41#include <sys/refcount.h> 42#include <sys/domain.h> 43#include <sys/fnv_hash.h> 44#include <sys/socket.h> 45#include <sys/socketvar.h> 46#include <net/ethernet.h> 47#include <net/if.h> 48#include <net/if_types.h> 49#include <net/if_vlan_var.h> 50#include <net/route.h> 51#include <netinet/in.h> 52#include <netinet/in_pcb.h> 53#include <netinet/ip.h> 54#include <netinet/ip6.h> 55#include <netinet6/scope6_var.h> 56#include <netinet/tcp_timer.h> 57#include <netinet/tcp_var.h> 58#define TCPSTATES 59#include <netinet/tcp_fsm.h> 60#include <netinet/toecore.h> 61 62#include "common/common.h" 63#include "common/t4_msg.h" 64#include "common/t4_regs.h" 65#include "tom/t4_tom_l2t.h" 66#include "tom/t4_tom.h" 67 68/* stid services */ 69static int alloc_stid(struct adapter *, struct listen_ctx *, int); 70static struct listen_ctx *lookup_stid(struct adapter *, int); 71static void free_stid(struct adapter *, struct listen_ctx *); 72 73/* lctx services */ 74static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *, 75 struct port_info *); 76static int free_lctx(struct adapter *, struct listen_ctx *); 77static void hold_lctx(struct listen_ctx *); 78static void listen_hash_add(struct adapter *, struct listen_ctx *); 79static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *); 80static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *); 81static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *); 82 83static inline void save_qids_in_mbuf(struct mbuf *, struct port_info *); 84static inline void get_qids_from_mbuf(struct mbuf *m, int *, int *); 85static void send_reset_synqe(struct toedev *, struct synq_entry *); 86 87static int 88alloc_stid(struct adapter *sc, struct listen_ctx *lctx, int isipv6) 89{ 90 struct tid_info *t = &sc->tids; 91 u_int stid, n, f, mask; 92 struct stid_region *sr = &lctx->stid_region; 93 94 /* 95 * An IPv6 server needs 2 naturally aligned stids (1 stid = 4 cells) in 96 * the TCAM. The start of the stid region is properly aligned (the chip 97 * requires each region to be 128-cell aligned). 98 */ 99 n = isipv6 ? 2 : 1; 100 mask = n - 1; 101 KASSERT((t->stid_base & mask) == 0 && (t->nstids & mask) == 0, 102 ("%s: stid region (%u, %u) not properly aligned. n = %u", 103 __func__, t->stid_base, t->nstids, n)); 104 105 mtx_lock(&t->stid_lock); 106 if (n > t->nstids - t->stids_in_use) { 107 mtx_unlock(&t->stid_lock); 108 return (-1); 109 } 110 111 if (t->nstids_free_head >= n) { 112 /* 113 * This allocation will definitely succeed because the region 114 * starts at a good alignment and we just checked we have enough 115 * stids free. 116 */ 117 f = t->nstids_free_head & mask; 118 t->nstids_free_head -= n + f; 119 stid = t->nstids_free_head; 120 TAILQ_INSERT_HEAD(&t->stids, sr, link); 121 } else { 122 struct stid_region *s; 123 124 stid = t->nstids_free_head; 125 TAILQ_FOREACH(s, &t->stids, link) { 126 stid += s->used + s->free; 127 f = stid & mask; 128 if (n <= s->free - f) { 129 stid -= n + f; 130 s->free -= n + f; 131 TAILQ_INSERT_AFTER(&t->stids, s, sr, link); 132 goto allocated; 133 } 134 } 135 136 if (__predict_false(stid != t->nstids)) { 137 panic("%s: stids TAILQ (%p) corrupt." 138 " At %d instead of %d at the end of the queue.", 139 __func__, &t->stids, stid, t->nstids); 140 } 141 142 mtx_unlock(&t->stid_lock); 143 return (-1); 144 } 145 146allocated: 147 sr->used = n; 148 sr->free = f; 149 t->stids_in_use += n; 150 t->stid_tab[stid] = lctx; 151 mtx_unlock(&t->stid_lock); 152 153 KASSERT(((stid + t->stid_base) & mask) == 0, 154 ("%s: EDOOFUS.", __func__)); 155 return (stid + t->stid_base); 156} 157 158static struct listen_ctx * 159lookup_stid(struct adapter *sc, int stid) 160{ 161 struct tid_info *t = &sc->tids; 162 163 return (t->stid_tab[stid - t->stid_base]); 164} 165 166static void 167free_stid(struct adapter *sc, struct listen_ctx *lctx) 168{ 169 struct tid_info *t = &sc->tids; 170 struct stid_region *sr = &lctx->stid_region; 171 struct stid_region *s; 172 173 KASSERT(sr->used > 0, ("%s: nonsense free (%d)", __func__, sr->used)); 174 175 mtx_lock(&t->stid_lock); 176 s = TAILQ_PREV(sr, stid_head, link); 177 if (s != NULL) 178 s->free += sr->used + sr->free; 179 else 180 t->nstids_free_head += sr->used + sr->free; 181 KASSERT(t->stids_in_use >= sr->used, 182 ("%s: stids_in_use (%u) < stids being freed (%u)", __func__, 183 t->stids_in_use, sr->used)); 184 t->stids_in_use -= sr->used; 185 TAILQ_REMOVE(&t->stids, sr, link); 186 mtx_unlock(&t->stid_lock); 187} 188 189static struct listen_ctx * 190alloc_lctx(struct adapter *sc, struct inpcb *inp, struct port_info *pi) 191{ 192 struct listen_ctx *lctx; 193 194 INP_WLOCK_ASSERT(inp); 195 196 lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO); 197 if (lctx == NULL) 198 return (NULL); 199 200 lctx->stid = alloc_stid(sc, lctx, inp->inp_vflag & INP_IPV6); 201 if (lctx->stid < 0) { 202 free(lctx, M_CXGBE); 203 return (NULL); 204 } 205 206 lctx->ctrlq = &sc->sge.ctrlq[pi->port_id]; 207 lctx->ofld_rxq = &sc->sge.ofld_rxq[pi->first_ofld_rxq]; 208 refcount_init(&lctx->refcount, 1); 209 TAILQ_INIT(&lctx->synq); 210 211 lctx->inp = inp; 212 in_pcbref(inp); 213 214 return (lctx); 215} 216 217/* Don't call this directly, use release_lctx instead */ 218static int 219free_lctx(struct adapter *sc, struct listen_ctx *lctx) 220{ 221 struct inpcb *inp = lctx->inp; 222 223 INP_WLOCK_ASSERT(inp); 224 KASSERT(lctx->refcount == 0, 225 ("%s: refcount %d", __func__, lctx->refcount)); 226 KASSERT(TAILQ_EMPTY(&lctx->synq), 227 ("%s: synq not empty.", __func__)); 228 KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid)); 229 230 CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p", 231 __func__, lctx->stid, lctx, lctx->inp); 232 233 free_stid(sc, lctx); 234 free(lctx, M_CXGBE); 235 236 return (in_pcbrele_wlocked(inp)); 237} 238 239static void 240hold_lctx(struct listen_ctx *lctx) 241{ 242 243 refcount_acquire(&lctx->refcount); 244} 245 246static inline uint32_t 247listen_hashfn(void *key, u_long mask) 248{ 249 250 return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask); 251} 252 253/* 254 * Add a listen_ctx entry to the listen hash table. 255 */ 256static void 257listen_hash_add(struct adapter *sc, struct listen_ctx *lctx) 258{ 259 struct tom_data *td = sc->tom_softc; 260 int bucket = listen_hashfn(lctx->inp, td->listen_mask); 261 262 mtx_lock(&td->lctx_hash_lock); 263 LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link); 264 td->lctx_count++; 265 mtx_unlock(&td->lctx_hash_lock); 266} 267 268/* 269 * Look for the listening socket's context entry in the hash and return it. 270 */ 271static struct listen_ctx * 272listen_hash_find(struct adapter *sc, struct inpcb *inp) 273{ 274 struct tom_data *td = sc->tom_softc; 275 int bucket = listen_hashfn(inp, td->listen_mask); 276 struct listen_ctx *lctx; 277 278 mtx_lock(&td->lctx_hash_lock); 279 LIST_FOREACH(lctx, &td->listen_hash[bucket], link) { 280 if (lctx->inp == inp) 281 break; 282 } 283 mtx_unlock(&td->lctx_hash_lock); 284 285 return (lctx); 286} 287 288/* 289 * Removes the listen_ctx structure for inp from the hash and returns it. 290 */ 291static struct listen_ctx * 292listen_hash_del(struct adapter *sc, struct inpcb *inp) 293{ 294 struct tom_data *td = sc->tom_softc; 295 int bucket = listen_hashfn(inp, td->listen_mask); 296 struct listen_ctx *lctx, *l; 297 298 mtx_lock(&td->lctx_hash_lock); 299 LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) { 300 if (lctx->inp == inp) { 301 LIST_REMOVE(lctx, link); 302 td->lctx_count--; 303 break; 304 } 305 } 306 mtx_unlock(&td->lctx_hash_lock); 307 308 return (lctx); 309} 310 311/* 312 * Releases a hold on the lctx. Must be called with the listening socket's inp 313 * locked. The inp may be freed by this function and it returns NULL to 314 * indicate this. 315 */ 316static struct inpcb * 317release_lctx(struct adapter *sc, struct listen_ctx *lctx) 318{ 319 struct inpcb *inp = lctx->inp; 320 int inp_freed = 0; 321 322 INP_WLOCK_ASSERT(inp); 323 if (refcount_release(&lctx->refcount)) 324 inp_freed = free_lctx(sc, lctx); 325 326 return (inp_freed ? NULL : inp); 327} 328 329static void 330send_reset_synqe(struct toedev *tod, struct synq_entry *synqe) 331{ 332 struct adapter *sc = tod->tod_softc; 333 struct mbuf *m = synqe->syn; 334 struct ifnet *ifp = m->m_pkthdr.rcvif; 335 struct port_info *pi = ifp->if_softc; 336 struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx]; 337 struct wrqe *wr; 338 struct fw_flowc_wr *flowc; 339 struct cpl_abort_req *req; 340 int txqid, rxqid, flowclen; 341 struct sge_wrq *ofld_txq; 342 struct sge_ofld_rxq *ofld_rxq; 343 const int nparams = 6; 344 unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN; 345 346 INP_WLOCK_ASSERT(synqe->lctx->inp); 347 348 CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s", 349 __func__, synqe, synqe->flags, synqe->tid, 350 synqe->flags & TPF_ABORT_SHUTDOWN ? 351 " (abort already in progress)" : ""); 352 if (synqe->flags & TPF_ABORT_SHUTDOWN) 353 return; /* abort already in progress */ 354 synqe->flags |= TPF_ABORT_SHUTDOWN; 355 356 get_qids_from_mbuf(m, &txqid, &rxqid); 357 ofld_txq = &sc->sge.ofld_txq[txqid]; 358 ofld_rxq = &sc->sge.ofld_rxq[rxqid]; 359 360 /* The wrqe will have two WRs - a flowc followed by an abort_req */ 361 flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); 362
| 30 31#include "opt_inet.h" 32#include "opt_inet6.h" 33 34#ifdef TCP_OFFLOAD 35#include <sys/param.h> 36#include <sys/types.h> 37#include <sys/kernel.h> 38#include <sys/ktr.h> 39#include <sys/module.h> 40#include <sys/protosw.h> 41#include <sys/refcount.h> 42#include <sys/domain.h> 43#include <sys/fnv_hash.h> 44#include <sys/socket.h> 45#include <sys/socketvar.h> 46#include <net/ethernet.h> 47#include <net/if.h> 48#include <net/if_types.h> 49#include <net/if_vlan_var.h> 50#include <net/route.h> 51#include <netinet/in.h> 52#include <netinet/in_pcb.h> 53#include <netinet/ip.h> 54#include <netinet/ip6.h> 55#include <netinet6/scope6_var.h> 56#include <netinet/tcp_timer.h> 57#include <netinet/tcp_var.h> 58#define TCPSTATES 59#include <netinet/tcp_fsm.h> 60#include <netinet/toecore.h> 61 62#include "common/common.h" 63#include "common/t4_msg.h" 64#include "common/t4_regs.h" 65#include "tom/t4_tom_l2t.h" 66#include "tom/t4_tom.h" 67 68/* stid services */ 69static int alloc_stid(struct adapter *, struct listen_ctx *, int); 70static struct listen_ctx *lookup_stid(struct adapter *, int); 71static void free_stid(struct adapter *, struct listen_ctx *); 72 73/* lctx services */ 74static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *, 75 struct port_info *); 76static int free_lctx(struct adapter *, struct listen_ctx *); 77static void hold_lctx(struct listen_ctx *); 78static void listen_hash_add(struct adapter *, struct listen_ctx *); 79static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *); 80static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *); 81static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *); 82 83static inline void save_qids_in_mbuf(struct mbuf *, struct port_info *); 84static inline void get_qids_from_mbuf(struct mbuf *m, int *, int *); 85static void send_reset_synqe(struct toedev *, struct synq_entry *); 86 87static int 88alloc_stid(struct adapter *sc, struct listen_ctx *lctx, int isipv6) 89{ 90 struct tid_info *t = &sc->tids; 91 u_int stid, n, f, mask; 92 struct stid_region *sr = &lctx->stid_region; 93 94 /* 95 * An IPv6 server needs 2 naturally aligned stids (1 stid = 4 cells) in 96 * the TCAM. The start of the stid region is properly aligned (the chip 97 * requires each region to be 128-cell aligned). 98 */ 99 n = isipv6 ? 2 : 1; 100 mask = n - 1; 101 KASSERT((t->stid_base & mask) == 0 && (t->nstids & mask) == 0, 102 ("%s: stid region (%u, %u) not properly aligned. n = %u", 103 __func__, t->stid_base, t->nstids, n)); 104 105 mtx_lock(&t->stid_lock); 106 if (n > t->nstids - t->stids_in_use) { 107 mtx_unlock(&t->stid_lock); 108 return (-1); 109 } 110 111 if (t->nstids_free_head >= n) { 112 /* 113 * This allocation will definitely succeed because the region 114 * starts at a good alignment and we just checked we have enough 115 * stids free. 116 */ 117 f = t->nstids_free_head & mask; 118 t->nstids_free_head -= n + f; 119 stid = t->nstids_free_head; 120 TAILQ_INSERT_HEAD(&t->stids, sr, link); 121 } else { 122 struct stid_region *s; 123 124 stid = t->nstids_free_head; 125 TAILQ_FOREACH(s, &t->stids, link) { 126 stid += s->used + s->free; 127 f = stid & mask; 128 if (n <= s->free - f) { 129 stid -= n + f; 130 s->free -= n + f; 131 TAILQ_INSERT_AFTER(&t->stids, s, sr, link); 132 goto allocated; 133 } 134 } 135 136 if (__predict_false(stid != t->nstids)) { 137 panic("%s: stids TAILQ (%p) corrupt." 138 " At %d instead of %d at the end of the queue.", 139 __func__, &t->stids, stid, t->nstids); 140 } 141 142 mtx_unlock(&t->stid_lock); 143 return (-1); 144 } 145 146allocated: 147 sr->used = n; 148 sr->free = f; 149 t->stids_in_use += n; 150 t->stid_tab[stid] = lctx; 151 mtx_unlock(&t->stid_lock); 152 153 KASSERT(((stid + t->stid_base) & mask) == 0, 154 ("%s: EDOOFUS.", __func__)); 155 return (stid + t->stid_base); 156} 157 158static struct listen_ctx * 159lookup_stid(struct adapter *sc, int stid) 160{ 161 struct tid_info *t = &sc->tids; 162 163 return (t->stid_tab[stid - t->stid_base]); 164} 165 166static void 167free_stid(struct adapter *sc, struct listen_ctx *lctx) 168{ 169 struct tid_info *t = &sc->tids; 170 struct stid_region *sr = &lctx->stid_region; 171 struct stid_region *s; 172 173 KASSERT(sr->used > 0, ("%s: nonsense free (%d)", __func__, sr->used)); 174 175 mtx_lock(&t->stid_lock); 176 s = TAILQ_PREV(sr, stid_head, link); 177 if (s != NULL) 178 s->free += sr->used + sr->free; 179 else 180 t->nstids_free_head += sr->used + sr->free; 181 KASSERT(t->stids_in_use >= sr->used, 182 ("%s: stids_in_use (%u) < stids being freed (%u)", __func__, 183 t->stids_in_use, sr->used)); 184 t->stids_in_use -= sr->used; 185 TAILQ_REMOVE(&t->stids, sr, link); 186 mtx_unlock(&t->stid_lock); 187} 188 189static struct listen_ctx * 190alloc_lctx(struct adapter *sc, struct inpcb *inp, struct port_info *pi) 191{ 192 struct listen_ctx *lctx; 193 194 INP_WLOCK_ASSERT(inp); 195 196 lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO); 197 if (lctx == NULL) 198 return (NULL); 199 200 lctx->stid = alloc_stid(sc, lctx, inp->inp_vflag & INP_IPV6); 201 if (lctx->stid < 0) { 202 free(lctx, M_CXGBE); 203 return (NULL); 204 } 205 206 lctx->ctrlq = &sc->sge.ctrlq[pi->port_id]; 207 lctx->ofld_rxq = &sc->sge.ofld_rxq[pi->first_ofld_rxq]; 208 refcount_init(&lctx->refcount, 1); 209 TAILQ_INIT(&lctx->synq); 210 211 lctx->inp = inp; 212 in_pcbref(inp); 213 214 return (lctx); 215} 216 217/* Don't call this directly, use release_lctx instead */ 218static int 219free_lctx(struct adapter *sc, struct listen_ctx *lctx) 220{ 221 struct inpcb *inp = lctx->inp; 222 223 INP_WLOCK_ASSERT(inp); 224 KASSERT(lctx->refcount == 0, 225 ("%s: refcount %d", __func__, lctx->refcount)); 226 KASSERT(TAILQ_EMPTY(&lctx->synq), 227 ("%s: synq not empty.", __func__)); 228 KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid)); 229 230 CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p", 231 __func__, lctx->stid, lctx, lctx->inp); 232 233 free_stid(sc, lctx); 234 free(lctx, M_CXGBE); 235 236 return (in_pcbrele_wlocked(inp)); 237} 238 239static void 240hold_lctx(struct listen_ctx *lctx) 241{ 242 243 refcount_acquire(&lctx->refcount); 244} 245 246static inline uint32_t 247listen_hashfn(void *key, u_long mask) 248{ 249 250 return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask); 251} 252 253/* 254 * Add a listen_ctx entry to the listen hash table. 255 */ 256static void 257listen_hash_add(struct adapter *sc, struct listen_ctx *lctx) 258{ 259 struct tom_data *td = sc->tom_softc; 260 int bucket = listen_hashfn(lctx->inp, td->listen_mask); 261 262 mtx_lock(&td->lctx_hash_lock); 263 LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link); 264 td->lctx_count++; 265 mtx_unlock(&td->lctx_hash_lock); 266} 267 268/* 269 * Look for the listening socket's context entry in the hash and return it. 270 */ 271static struct listen_ctx * 272listen_hash_find(struct adapter *sc, struct inpcb *inp) 273{ 274 struct tom_data *td = sc->tom_softc; 275 int bucket = listen_hashfn(inp, td->listen_mask); 276 struct listen_ctx *lctx; 277 278 mtx_lock(&td->lctx_hash_lock); 279 LIST_FOREACH(lctx, &td->listen_hash[bucket], link) { 280 if (lctx->inp == inp) 281 break; 282 } 283 mtx_unlock(&td->lctx_hash_lock); 284 285 return (lctx); 286} 287 288/* 289 * Removes the listen_ctx structure for inp from the hash and returns it. 290 */ 291static struct listen_ctx * 292listen_hash_del(struct adapter *sc, struct inpcb *inp) 293{ 294 struct tom_data *td = sc->tom_softc; 295 int bucket = listen_hashfn(inp, td->listen_mask); 296 struct listen_ctx *lctx, *l; 297 298 mtx_lock(&td->lctx_hash_lock); 299 LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) { 300 if (lctx->inp == inp) { 301 LIST_REMOVE(lctx, link); 302 td->lctx_count--; 303 break; 304 } 305 } 306 mtx_unlock(&td->lctx_hash_lock); 307 308 return (lctx); 309} 310 311/* 312 * Releases a hold on the lctx. Must be called with the listening socket's inp 313 * locked. The inp may be freed by this function and it returns NULL to 314 * indicate this. 315 */ 316static struct inpcb * 317release_lctx(struct adapter *sc, struct listen_ctx *lctx) 318{ 319 struct inpcb *inp = lctx->inp; 320 int inp_freed = 0; 321 322 INP_WLOCK_ASSERT(inp); 323 if (refcount_release(&lctx->refcount)) 324 inp_freed = free_lctx(sc, lctx); 325 326 return (inp_freed ? NULL : inp); 327} 328 329static void 330send_reset_synqe(struct toedev *tod, struct synq_entry *synqe) 331{ 332 struct adapter *sc = tod->tod_softc; 333 struct mbuf *m = synqe->syn; 334 struct ifnet *ifp = m->m_pkthdr.rcvif; 335 struct port_info *pi = ifp->if_softc; 336 struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx]; 337 struct wrqe *wr; 338 struct fw_flowc_wr *flowc; 339 struct cpl_abort_req *req; 340 int txqid, rxqid, flowclen; 341 struct sge_wrq *ofld_txq; 342 struct sge_ofld_rxq *ofld_rxq; 343 const int nparams = 6; 344 unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN; 345 346 INP_WLOCK_ASSERT(synqe->lctx->inp); 347 348 CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s", 349 __func__, synqe, synqe->flags, synqe->tid, 350 synqe->flags & TPF_ABORT_SHUTDOWN ? 351 " (abort already in progress)" : ""); 352 if (synqe->flags & TPF_ABORT_SHUTDOWN) 353 return; /* abort already in progress */ 354 synqe->flags |= TPF_ABORT_SHUTDOWN; 355 356 get_qids_from_mbuf(m, &txqid, &rxqid); 357 ofld_txq = &sc->sge.ofld_txq[txqid]; 358 ofld_rxq = &sc->sge.ofld_rxq[rxqid]; 359 360 /* The wrqe will have two WRs - a flowc followed by an abort_req */ 361 flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); 362
|
363 wr = alloc_wrqe(roundup(flowclen, EQ_ESIZE) + sizeof(*req), ofld_txq);
| 363 wr = alloc_wrqe(roundup2(flowclen, EQ_ESIZE) + sizeof(*req), ofld_txq);
|
364 if (wr == NULL) { 365 /* XXX */ 366 panic("%s: allocation failure.", __func__); 367 } 368 flowc = wrtod(wr);
| 364 if (wr == NULL) { 365 /* XXX */ 366 panic("%s: allocation failure.", __func__); 367 } 368 flowc = wrtod(wr);
|
369 req = (void *)((caddr_t)flowc + roundup(flowclen, EQ_ESIZE));
| 369 req = (void *)((caddr_t)flowc + roundup2(flowclen, EQ_ESIZE));
|
370 371 /* First the flowc ... */ 372 memset(flowc, 0, wr->wr_len); 373 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 374 V_FW_FLOWC_WR_NPARAMS(nparams)); 375 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | 376 V_FW_WR_FLOWID(synqe->tid)); 377 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 378 flowc->mnemval[0].val = htobe32(pfvf); 379 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 380 flowc->mnemval[1].val = htobe32(pi->tx_chan); 381 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 382 flowc->mnemval[2].val = htobe32(pi->tx_chan); 383 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 384 flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id); 385 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF; 386 flowc->mnemval[4].val = htobe32(512); 387 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS; 388 flowc->mnemval[5].val = htobe32(512); 389 synqe->flags |= TPF_FLOWC_WR_SENT; 390 391 /* ... then ABORT request */ 392 INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid); 393 req->rsvd0 = 0; /* don't have a snd_nxt */ 394 req->rsvd1 = 1; /* no data sent yet */ 395 req->cmd = CPL_ABORT_SEND_RST; 396 397 t4_l2t_send(sc, wr, e); 398} 399 400static int 401create_server(struct adapter *sc, struct listen_ctx *lctx) 402{ 403 struct wrqe *wr; 404 struct cpl_pass_open_req *req; 405 struct inpcb *inp = lctx->inp; 406 407 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); 408 if (wr == NULL) { 409 log(LOG_ERR, "%s: allocation failure", __func__); 410 return (ENOMEM); 411 } 412 req = wrtod(wr); 413 414 INIT_TP_WR(req, 0); 415 OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid)); 416 req->local_port = inp->inp_lport; 417 req->peer_port = 0; 418 req->local_ip = inp->inp_laddr.s_addr; 419 req->peer_ip = 0; 420 req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan)); 421 req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) | 422 F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id)); 423 424 t4_wrq_tx(sc, wr); 425 return (0); 426} 427 428static int 429create_server6(struct adapter *sc, struct listen_ctx *lctx) 430{ 431 struct wrqe *wr; 432 struct cpl_pass_open_req6 *req; 433 struct inpcb *inp = lctx->inp; 434 435 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); 436 if (wr == NULL) { 437 log(LOG_ERR, "%s: allocation failure", __func__); 438 return (ENOMEM); 439 } 440 req = wrtod(wr); 441 442 INIT_TP_WR(req, 0); 443 OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid)); 444 req->local_port = inp->inp_lport; 445 req->peer_port = 0; 446 req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0]; 447 req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8]; 448 req->peer_ip_hi = 0; 449 req->peer_ip_lo = 0; 450 req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan)); 451 req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) | 452 F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id)); 453 454 t4_wrq_tx(sc, wr); 455 return (0); 456} 457 458static int 459destroy_server(struct adapter *sc, struct listen_ctx *lctx) 460{ 461 struct wrqe *wr; 462 struct cpl_close_listsvr_req *req; 463 464 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); 465 if (wr == NULL) { 466 /* XXX */ 467 panic("%s: allocation failure.", __func__); 468 } 469 req = wrtod(wr); 470 471 INIT_TP_WR(req, 0); 472 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, 473 lctx->stid)); 474 req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id); 475 req->rsvd = htobe16(0); 476 477 t4_wrq_tx(sc, wr); 478 return (0); 479} 480 481/* 482 * Start a listening server by sending a passive open request to HW. 483 * 484 * Can't take adapter lock here and access to sc->flags, sc->open_device_map, 485 * sc->offload_map, if_capenable are all race prone. 486 */ 487int 488t4_listen_start(struct toedev *tod, struct tcpcb *tp) 489{ 490 struct adapter *sc = tod->tod_softc; 491 struct port_info *pi; 492 struct inpcb *inp = tp->t_inpcb; 493 struct listen_ctx *lctx; 494 int i, rc; 495 496 INP_WLOCK_ASSERT(inp); 497 498#if 0 499 ADAPTER_LOCK(sc); 500 if (IS_BUSY(sc)) { 501 log(LOG_ERR, "%s: listen request ignored, %s is busy", 502 __func__, device_get_nameunit(sc->dev)); 503 goto done; 504 } 505 506 KASSERT(sc->flags & TOM_INIT_DONE, 507 ("%s: TOM not initialized", __func__)); 508#endif 509 510 if ((sc->open_device_map & sc->offload_map) == 0) 511 goto done; /* no port that's UP with IFCAP_TOE enabled */ 512 513 /* 514 * Find a running port with IFCAP_TOE (4 or 6). We'll use the first 515 * such port's queues to send the passive open and receive the reply to 516 * it. 517 * 518 * XXX: need a way to mark a port in use by offload. if_cxgbe should 519 * then reject any attempt to bring down such a port (and maybe reject 520 * attempts to disable IFCAP_TOE on that port too?). 521 */ 522 for_each_port(sc, i) { 523 if (isset(&sc->open_device_map, i) && 524 sc->port[i]->ifp->if_capenable & IFCAP_TOE) 525 break; 526 } 527 KASSERT(i < sc->params.nports, 528 ("%s: no running port with TOE capability enabled.", __func__)); 529 pi = sc->port[i]; 530 531 if (listen_hash_find(sc, inp) != NULL) 532 goto done; /* already setup */ 533 534 lctx = alloc_lctx(sc, inp, pi); 535 if (lctx == NULL) { 536 log(LOG_ERR, 537 "%s: listen request ignored, %s couldn't allocate lctx\n", 538 __func__, device_get_nameunit(sc->dev)); 539 goto done; 540 } 541 listen_hash_add(sc, lctx); 542 543 CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x", 544 __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp, 545 inp->inp_vflag); 546 547 if (inp->inp_vflag & INP_IPV6) 548 rc = create_server6(sc, lctx); 549 else 550 rc = create_server(sc, lctx); 551 if (rc != 0) { 552 log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n", 553 __func__, device_get_nameunit(sc->dev), rc); 554 (void) listen_hash_del(sc, inp); 555 inp = release_lctx(sc, lctx); 556 /* can't be freed, host stack has a reference */ 557 KASSERT(inp != NULL, ("%s: inp freed", __func__)); 558 goto done; 559 } 560 lctx->flags |= LCTX_RPL_PENDING; 561done: 562#if 0 563 ADAPTER_UNLOCK(sc); 564#endif 565 return (0); 566} 567 568int 569t4_listen_stop(struct toedev *tod, struct tcpcb *tp) 570{ 571 struct listen_ctx *lctx; 572 struct adapter *sc = tod->tod_softc; 573 struct inpcb *inp = tp->t_inpcb; 574 struct synq_entry *synqe; 575 576 INP_WLOCK_ASSERT(inp); 577 578 lctx = listen_hash_del(sc, inp); 579 if (lctx == NULL) 580 return (ENOENT); /* no hardware listener for this inp */ 581 582 CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid, 583 lctx, lctx->flags); 584 585 /* 586 * If the reply to the PASS_OPEN is still pending we'll wait for it to 587 * arrive and clean up when it does. 588 */ 589 if (lctx->flags & LCTX_RPL_PENDING) { 590 KASSERT(TAILQ_EMPTY(&lctx->synq), 591 ("%s: synq not empty.", __func__)); 592 return (EINPROGRESS); 593 } 594 595 /* 596 * The host stack will abort all the connections on the listening 597 * socket's so_comp. It doesn't know about the connections on the synq 598 * so we need to take care of those. 599 */ 600 TAILQ_FOREACH(synqe, &lctx->synq, link) { 601 if (synqe->flags & TPF_SYNQE_HAS_L2TE) 602 send_reset_synqe(tod, synqe); 603 } 604 605 destroy_server(sc, lctx); 606 return (0); 607} 608 609static inline void 610hold_synqe(struct synq_entry *synqe) 611{ 612 613 refcount_acquire(&synqe->refcnt); 614} 615 616static inline void 617release_synqe(struct synq_entry *synqe) 618{ 619 620 if (refcount_release(&synqe->refcnt)) { 621 int needfree = synqe->flags & TPF_SYNQE_NEEDFREE; 622 623 m_freem(synqe->syn); 624 if (needfree) 625 free(synqe, M_CXGBE); 626 } 627} 628 629void 630t4_syncache_added(struct toedev *tod __unused, void *arg) 631{ 632 struct synq_entry *synqe = arg; 633 634 hold_synqe(synqe); 635} 636 637void 638t4_syncache_removed(struct toedev *tod __unused, void *arg) 639{ 640 struct synq_entry *synqe = arg; 641 642 release_synqe(synqe); 643} 644 645/* XXX */ 646extern void tcp_dooptions(struct tcpopt *, u_char *, int, int); 647 648int 649t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m) 650{ 651 struct adapter *sc = tod->tod_softc; 652 struct synq_entry *synqe = arg; 653 struct wrqe *wr; 654 struct l2t_entry *e; 655 struct tcpopt to; 656 struct ip *ip = mtod(m, struct ip *); 657 struct tcphdr *th; 658 659 wr = (struct wrqe *)atomic_readandclear_ptr(&synqe->wr); 660 if (wr == NULL) { 661 m_freem(m); 662 return (EALREADY); 663 } 664 665 if (ip->ip_v == IPVERSION) 666 th = (void *)(ip + 1); 667 else 668 th = (void *)((struct ip6_hdr *)ip + 1); 669 bzero(&to, sizeof(to)); 670 tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th), 671 TO_SYN); 672 673 /* save these for later */ 674 synqe->iss = be32toh(th->th_seq); 675 synqe->ts = to.to_tsval; 676 677 e = &sc->l2t->l2tab[synqe->l2e_idx]; 678 t4_l2t_send(sc, wr, e); 679 680 m_freem(m); /* don't need this any more */ 681 return (0); 682} 683 684static int 685do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss, 686 struct mbuf *m) 687{ 688 struct adapter *sc = iq->adapter; 689 const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1); 690 int stid = GET_TID(cpl); 691 unsigned int status = cpl->status; 692 struct listen_ctx *lctx = lookup_stid(sc, stid); 693 struct inpcb *inp = lctx->inp; 694#ifdef INVARIANTS 695 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 696#endif 697 698 KASSERT(opcode == CPL_PASS_OPEN_RPL, 699 ("%s: unexpected opcode 0x%x", __func__, opcode)); 700 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 701 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); 702 703 INP_WLOCK(inp); 704 705 CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x", 706 __func__, stid, status, lctx->flags); 707 708 lctx->flags &= ~LCTX_RPL_PENDING; 709 710 if (status != CPL_ERR_NONE) 711 log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status); 712 713#ifdef INVARIANTS 714 /* 715 * If the inp has been dropped (listening socket closed) then 716 * listen_stop must have run and taken the inp out of the hash. 717 */ 718 if (inp->inp_flags & INP_DROPPED) { 719 KASSERT(listen_hash_del(sc, inp) == NULL, 720 ("%s: inp %p still in listen hash", __func__, inp)); 721 } 722#endif 723 724 if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) { 725 if (release_lctx(sc, lctx) != NULL) 726 INP_WUNLOCK(inp); 727 return (status); 728 } 729 730 /* 731 * Listening socket stopped listening earlier and now the chip tells us 732 * it has started the hardware listener. Stop it; the lctx will be 733 * released in do_close_server_rpl. 734 */ 735 if (inp->inp_flags & INP_DROPPED) { 736 destroy_server(sc, lctx); 737 INP_WUNLOCK(inp); 738 return (status); 739 } 740 741 /* 742 * Failed to start hardware listener. Take inp out of the hash and 743 * release our reference on it. An error message has been logged 744 * already. 745 */ 746 if (status != CPL_ERR_NONE) { 747 listen_hash_del(sc, inp); 748 if (release_lctx(sc, lctx) != NULL) 749 INP_WUNLOCK(inp); 750 return (status); 751 } 752 753 /* hardware listener open for business */ 754 755 INP_WUNLOCK(inp); 756 return (status); 757} 758 759static int 760do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss, 761 struct mbuf *m) 762{ 763 struct adapter *sc = iq->adapter; 764 const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1); 765 int stid = GET_TID(cpl); 766 unsigned int status = cpl->status; 767 struct listen_ctx *lctx = lookup_stid(sc, stid); 768 struct inpcb *inp = lctx->inp; 769#ifdef INVARIANTS 770 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 771#endif 772 773 KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL, 774 ("%s: unexpected opcode 0x%x", __func__, opcode)); 775 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 776 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); 777 778 CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status); 779 780 if (status != CPL_ERR_NONE) { 781 log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n", 782 __func__, status, stid); 783 return (status); 784 } 785 786 INP_WLOCK(inp); 787 inp = release_lctx(sc, lctx); 788 if (inp != NULL) 789 INP_WUNLOCK(inp); 790 791 return (status); 792} 793 794static void 795done_with_synqe(struct adapter *sc, struct synq_entry *synqe) 796{ 797 struct listen_ctx *lctx = synqe->lctx; 798 struct inpcb *inp = lctx->inp; 799 struct port_info *pi = synqe->syn->m_pkthdr.rcvif->if_softc; 800 struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx]; 801 802 INP_WLOCK_ASSERT(inp); 803 804 TAILQ_REMOVE(&lctx->synq, synqe, link); 805 inp = release_lctx(sc, lctx); 806 if (inp) 807 INP_WUNLOCK(inp); 808 remove_tid(sc, synqe->tid); 809 release_tid(sc, synqe->tid, &sc->sge.ctrlq[pi->port_id]); 810 t4_l2t_release(e); 811 release_synqe(synqe); /* removed from synq list */ 812} 813 814int 815do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss, 816 struct mbuf *m) 817{ 818 struct adapter *sc = iq->adapter; 819 const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); 820 unsigned int tid = GET_TID(cpl); 821 struct synq_entry *synqe = lookup_tid(sc, tid); 822 struct listen_ctx *lctx = synqe->lctx; 823 struct inpcb *inp = lctx->inp; 824 int txqid; 825 struct sge_wrq *ofld_txq; 826#ifdef INVARIANTS 827 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 828#endif 829 830 KASSERT(opcode == CPL_ABORT_REQ_RSS, 831 ("%s: unexpected opcode 0x%x", __func__, opcode)); 832 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 833 KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__)); 834 835 CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d", 836 __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status); 837 838 if (negative_advice(cpl->status)) 839 return (0); /* Ignore negative advice */ 840 841 INP_WLOCK(inp); 842 843 get_qids_from_mbuf(synqe->syn, &txqid, NULL); 844 ofld_txq = &sc->sge.ofld_txq[txqid]; 845 846 /* 847 * If we'd initiated an abort earlier the reply to it is responsible for 848 * cleaning up resources. Otherwise we tear everything down right here 849 * right now. We owe the T4 a CPL_ABORT_RPL no matter what. 850 */ 851 if (synqe->flags & TPF_ABORT_SHUTDOWN) { 852 INP_WUNLOCK(inp); 853 goto done; 854 } 855 856 done_with_synqe(sc, synqe); 857 /* inp lock released by done_with_synqe */ 858done: 859 send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); 860 return (0); 861} 862 863int 864do_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss, 865 struct mbuf *m) 866{ 867 struct adapter *sc = iq->adapter; 868 const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); 869 unsigned int tid = GET_TID(cpl); 870 struct synq_entry *synqe = lookup_tid(sc, tid); 871 struct listen_ctx *lctx = synqe->lctx; 872 struct inpcb *inp = lctx->inp; 873#ifdef INVARIANTS 874 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 875#endif 876 877 KASSERT(opcode == CPL_ABORT_RPL_RSS, 878 ("%s: unexpected opcode 0x%x", __func__, opcode)); 879 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 880 KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__)); 881 882 CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d", 883 __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status); 884 885 INP_WLOCK(inp); 886 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 887 ("%s: wasn't expecting abort reply for synqe %p (0x%x)", 888 __func__, synqe, synqe->flags)); 889 890 done_with_synqe(sc, synqe); 891 /* inp lock released by done_with_synqe */ 892 893 return (0); 894} 895 896void 897t4_offload_socket(struct toedev *tod, void *arg, struct socket *so) 898{ 899 struct adapter *sc = tod->tod_softc; 900 struct synq_entry *synqe = arg; 901#ifdef INVARIANTS 902 struct inpcb *inp = sotoinpcb(so); 903#endif 904 struct cpl_pass_establish *cpl = mtod(synqe->syn, void *); 905 struct toepcb *toep = *(struct toepcb **)(cpl + 1); 906 907 INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */ 908 INP_WLOCK_ASSERT(inp); 909 KASSERT(synqe->flags & TPF_SYNQE, 910 ("%s: %p not a synq_entry?", __func__, arg)); 911 912 offload_socket(so, toep); 913 make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt); 914 toep->flags |= TPF_CPL_PENDING; 915 update_tid(sc, synqe->tid, toep); 916 synqe->flags |= TPF_SYNQE_EXPANDED; 917} 918 919static inline void 920save_qids_in_mbuf(struct mbuf *m, struct port_info *pi) 921{ 922 uint32_t txqid, rxqid; 923 924 txqid = (arc4random() % pi->nofldtxq) + pi->first_ofld_txq; 925 rxqid = (arc4random() % pi->nofldrxq) + pi->first_ofld_rxq; 926 927 m->m_pkthdr.flowid = (txqid << 16) | (rxqid & 0xffff); 928} 929 930static inline void 931get_qids_from_mbuf(struct mbuf *m, int *txqid, int *rxqid) 932{ 933 934 if (txqid) 935 *txqid = m->m_pkthdr.flowid >> 16; 936 if (rxqid) 937 *rxqid = m->m_pkthdr.flowid & 0xffff; 938} 939 940/* 941 * Use the trailing space in the mbuf in which the PASS_ACCEPT_REQ arrived to 942 * store some state temporarily. 943 */ 944static struct synq_entry * 945mbuf_to_synqe(struct mbuf *m) 946{
| 370 371 /* First the flowc ... */ 372 memset(flowc, 0, wr->wr_len); 373 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 374 V_FW_FLOWC_WR_NPARAMS(nparams)); 375 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | 376 V_FW_WR_FLOWID(synqe->tid)); 377 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 378 flowc->mnemval[0].val = htobe32(pfvf); 379 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 380 flowc->mnemval[1].val = htobe32(pi->tx_chan); 381 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 382 flowc->mnemval[2].val = htobe32(pi->tx_chan); 383 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 384 flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id); 385 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF; 386 flowc->mnemval[4].val = htobe32(512); 387 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS; 388 flowc->mnemval[5].val = htobe32(512); 389 synqe->flags |= TPF_FLOWC_WR_SENT; 390 391 /* ... then ABORT request */ 392 INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid); 393 req->rsvd0 = 0; /* don't have a snd_nxt */ 394 req->rsvd1 = 1; /* no data sent yet */ 395 req->cmd = CPL_ABORT_SEND_RST; 396 397 t4_l2t_send(sc, wr, e); 398} 399 400static int 401create_server(struct adapter *sc, struct listen_ctx *lctx) 402{ 403 struct wrqe *wr; 404 struct cpl_pass_open_req *req; 405 struct inpcb *inp = lctx->inp; 406 407 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); 408 if (wr == NULL) { 409 log(LOG_ERR, "%s: allocation failure", __func__); 410 return (ENOMEM); 411 } 412 req = wrtod(wr); 413 414 INIT_TP_WR(req, 0); 415 OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid)); 416 req->local_port = inp->inp_lport; 417 req->peer_port = 0; 418 req->local_ip = inp->inp_laddr.s_addr; 419 req->peer_ip = 0; 420 req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan)); 421 req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) | 422 F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id)); 423 424 t4_wrq_tx(sc, wr); 425 return (0); 426} 427 428static int 429create_server6(struct adapter *sc, struct listen_ctx *lctx) 430{ 431 struct wrqe *wr; 432 struct cpl_pass_open_req6 *req; 433 struct inpcb *inp = lctx->inp; 434 435 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); 436 if (wr == NULL) { 437 log(LOG_ERR, "%s: allocation failure", __func__); 438 return (ENOMEM); 439 } 440 req = wrtod(wr); 441 442 INIT_TP_WR(req, 0); 443 OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid)); 444 req->local_port = inp->inp_lport; 445 req->peer_port = 0; 446 req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0]; 447 req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8]; 448 req->peer_ip_hi = 0; 449 req->peer_ip_lo = 0; 450 req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan)); 451 req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) | 452 F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id)); 453 454 t4_wrq_tx(sc, wr); 455 return (0); 456} 457 458static int 459destroy_server(struct adapter *sc, struct listen_ctx *lctx) 460{ 461 struct wrqe *wr; 462 struct cpl_close_listsvr_req *req; 463 464 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); 465 if (wr == NULL) { 466 /* XXX */ 467 panic("%s: allocation failure.", __func__); 468 } 469 req = wrtod(wr); 470 471 INIT_TP_WR(req, 0); 472 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, 473 lctx->stid)); 474 req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id); 475 req->rsvd = htobe16(0); 476 477 t4_wrq_tx(sc, wr); 478 return (0); 479} 480 481/* 482 * Start a listening server by sending a passive open request to HW. 483 * 484 * Can't take adapter lock here and access to sc->flags, sc->open_device_map, 485 * sc->offload_map, if_capenable are all race prone. 486 */ 487int 488t4_listen_start(struct toedev *tod, struct tcpcb *tp) 489{ 490 struct adapter *sc = tod->tod_softc; 491 struct port_info *pi; 492 struct inpcb *inp = tp->t_inpcb; 493 struct listen_ctx *lctx; 494 int i, rc; 495 496 INP_WLOCK_ASSERT(inp); 497 498#if 0 499 ADAPTER_LOCK(sc); 500 if (IS_BUSY(sc)) { 501 log(LOG_ERR, "%s: listen request ignored, %s is busy", 502 __func__, device_get_nameunit(sc->dev)); 503 goto done; 504 } 505 506 KASSERT(sc->flags & TOM_INIT_DONE, 507 ("%s: TOM not initialized", __func__)); 508#endif 509 510 if ((sc->open_device_map & sc->offload_map) == 0) 511 goto done; /* no port that's UP with IFCAP_TOE enabled */ 512 513 /* 514 * Find a running port with IFCAP_TOE (4 or 6). We'll use the first 515 * such port's queues to send the passive open and receive the reply to 516 * it. 517 * 518 * XXX: need a way to mark a port in use by offload. if_cxgbe should 519 * then reject any attempt to bring down such a port (and maybe reject 520 * attempts to disable IFCAP_TOE on that port too?). 521 */ 522 for_each_port(sc, i) { 523 if (isset(&sc->open_device_map, i) && 524 sc->port[i]->ifp->if_capenable & IFCAP_TOE) 525 break; 526 } 527 KASSERT(i < sc->params.nports, 528 ("%s: no running port with TOE capability enabled.", __func__)); 529 pi = sc->port[i]; 530 531 if (listen_hash_find(sc, inp) != NULL) 532 goto done; /* already setup */ 533 534 lctx = alloc_lctx(sc, inp, pi); 535 if (lctx == NULL) { 536 log(LOG_ERR, 537 "%s: listen request ignored, %s couldn't allocate lctx\n", 538 __func__, device_get_nameunit(sc->dev)); 539 goto done; 540 } 541 listen_hash_add(sc, lctx); 542 543 CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x", 544 __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp, 545 inp->inp_vflag); 546 547 if (inp->inp_vflag & INP_IPV6) 548 rc = create_server6(sc, lctx); 549 else 550 rc = create_server(sc, lctx); 551 if (rc != 0) { 552 log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n", 553 __func__, device_get_nameunit(sc->dev), rc); 554 (void) listen_hash_del(sc, inp); 555 inp = release_lctx(sc, lctx); 556 /* can't be freed, host stack has a reference */ 557 KASSERT(inp != NULL, ("%s: inp freed", __func__)); 558 goto done; 559 } 560 lctx->flags |= LCTX_RPL_PENDING; 561done: 562#if 0 563 ADAPTER_UNLOCK(sc); 564#endif 565 return (0); 566} 567 568int 569t4_listen_stop(struct toedev *tod, struct tcpcb *tp) 570{ 571 struct listen_ctx *lctx; 572 struct adapter *sc = tod->tod_softc; 573 struct inpcb *inp = tp->t_inpcb; 574 struct synq_entry *synqe; 575 576 INP_WLOCK_ASSERT(inp); 577 578 lctx = listen_hash_del(sc, inp); 579 if (lctx == NULL) 580 return (ENOENT); /* no hardware listener for this inp */ 581 582 CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid, 583 lctx, lctx->flags); 584 585 /* 586 * If the reply to the PASS_OPEN is still pending we'll wait for it to 587 * arrive and clean up when it does. 588 */ 589 if (lctx->flags & LCTX_RPL_PENDING) { 590 KASSERT(TAILQ_EMPTY(&lctx->synq), 591 ("%s: synq not empty.", __func__)); 592 return (EINPROGRESS); 593 } 594 595 /* 596 * The host stack will abort all the connections on the listening 597 * socket's so_comp. It doesn't know about the connections on the synq 598 * so we need to take care of those. 599 */ 600 TAILQ_FOREACH(synqe, &lctx->synq, link) { 601 if (synqe->flags & TPF_SYNQE_HAS_L2TE) 602 send_reset_synqe(tod, synqe); 603 } 604 605 destroy_server(sc, lctx); 606 return (0); 607} 608 609static inline void 610hold_synqe(struct synq_entry *synqe) 611{ 612 613 refcount_acquire(&synqe->refcnt); 614} 615 616static inline void 617release_synqe(struct synq_entry *synqe) 618{ 619 620 if (refcount_release(&synqe->refcnt)) { 621 int needfree = synqe->flags & TPF_SYNQE_NEEDFREE; 622 623 m_freem(synqe->syn); 624 if (needfree) 625 free(synqe, M_CXGBE); 626 } 627} 628 629void 630t4_syncache_added(struct toedev *tod __unused, void *arg) 631{ 632 struct synq_entry *synqe = arg; 633 634 hold_synqe(synqe); 635} 636 637void 638t4_syncache_removed(struct toedev *tod __unused, void *arg) 639{ 640 struct synq_entry *synqe = arg; 641 642 release_synqe(synqe); 643} 644 645/* XXX */ 646extern void tcp_dooptions(struct tcpopt *, u_char *, int, int); 647 648int 649t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m) 650{ 651 struct adapter *sc = tod->tod_softc; 652 struct synq_entry *synqe = arg; 653 struct wrqe *wr; 654 struct l2t_entry *e; 655 struct tcpopt to; 656 struct ip *ip = mtod(m, struct ip *); 657 struct tcphdr *th; 658 659 wr = (struct wrqe *)atomic_readandclear_ptr(&synqe->wr); 660 if (wr == NULL) { 661 m_freem(m); 662 return (EALREADY); 663 } 664 665 if (ip->ip_v == IPVERSION) 666 th = (void *)(ip + 1); 667 else 668 th = (void *)((struct ip6_hdr *)ip + 1); 669 bzero(&to, sizeof(to)); 670 tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th), 671 TO_SYN); 672 673 /* save these for later */ 674 synqe->iss = be32toh(th->th_seq); 675 synqe->ts = to.to_tsval; 676 677 e = &sc->l2t->l2tab[synqe->l2e_idx]; 678 t4_l2t_send(sc, wr, e); 679 680 m_freem(m); /* don't need this any more */ 681 return (0); 682} 683 684static int 685do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss, 686 struct mbuf *m) 687{ 688 struct adapter *sc = iq->adapter; 689 const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1); 690 int stid = GET_TID(cpl); 691 unsigned int status = cpl->status; 692 struct listen_ctx *lctx = lookup_stid(sc, stid); 693 struct inpcb *inp = lctx->inp; 694#ifdef INVARIANTS 695 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 696#endif 697 698 KASSERT(opcode == CPL_PASS_OPEN_RPL, 699 ("%s: unexpected opcode 0x%x", __func__, opcode)); 700 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 701 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); 702 703 INP_WLOCK(inp); 704 705 CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x", 706 __func__, stid, status, lctx->flags); 707 708 lctx->flags &= ~LCTX_RPL_PENDING; 709 710 if (status != CPL_ERR_NONE) 711 log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status); 712 713#ifdef INVARIANTS 714 /* 715 * If the inp has been dropped (listening socket closed) then 716 * listen_stop must have run and taken the inp out of the hash. 717 */ 718 if (inp->inp_flags & INP_DROPPED) { 719 KASSERT(listen_hash_del(sc, inp) == NULL, 720 ("%s: inp %p still in listen hash", __func__, inp)); 721 } 722#endif 723 724 if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) { 725 if (release_lctx(sc, lctx) != NULL) 726 INP_WUNLOCK(inp); 727 return (status); 728 } 729 730 /* 731 * Listening socket stopped listening earlier and now the chip tells us 732 * it has started the hardware listener. Stop it; the lctx will be 733 * released in do_close_server_rpl. 734 */ 735 if (inp->inp_flags & INP_DROPPED) { 736 destroy_server(sc, lctx); 737 INP_WUNLOCK(inp); 738 return (status); 739 } 740 741 /* 742 * Failed to start hardware listener. Take inp out of the hash and 743 * release our reference on it. An error message has been logged 744 * already. 745 */ 746 if (status != CPL_ERR_NONE) { 747 listen_hash_del(sc, inp); 748 if (release_lctx(sc, lctx) != NULL) 749 INP_WUNLOCK(inp); 750 return (status); 751 } 752 753 /* hardware listener open for business */ 754 755 INP_WUNLOCK(inp); 756 return (status); 757} 758 759static int 760do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss, 761 struct mbuf *m) 762{ 763 struct adapter *sc = iq->adapter; 764 const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1); 765 int stid = GET_TID(cpl); 766 unsigned int status = cpl->status; 767 struct listen_ctx *lctx = lookup_stid(sc, stid); 768 struct inpcb *inp = lctx->inp; 769#ifdef INVARIANTS 770 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 771#endif 772 773 KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL, 774 ("%s: unexpected opcode 0x%x", __func__, opcode)); 775 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 776 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); 777 778 CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status); 779 780 if (status != CPL_ERR_NONE) { 781 log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n", 782 __func__, status, stid); 783 return (status); 784 } 785 786 INP_WLOCK(inp); 787 inp = release_lctx(sc, lctx); 788 if (inp != NULL) 789 INP_WUNLOCK(inp); 790 791 return (status); 792} 793 794static void 795done_with_synqe(struct adapter *sc, struct synq_entry *synqe) 796{ 797 struct listen_ctx *lctx = synqe->lctx; 798 struct inpcb *inp = lctx->inp; 799 struct port_info *pi = synqe->syn->m_pkthdr.rcvif->if_softc; 800 struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx]; 801 802 INP_WLOCK_ASSERT(inp); 803 804 TAILQ_REMOVE(&lctx->synq, synqe, link); 805 inp = release_lctx(sc, lctx); 806 if (inp) 807 INP_WUNLOCK(inp); 808 remove_tid(sc, synqe->tid); 809 release_tid(sc, synqe->tid, &sc->sge.ctrlq[pi->port_id]); 810 t4_l2t_release(e); 811 release_synqe(synqe); /* removed from synq list */ 812} 813 814int 815do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss, 816 struct mbuf *m) 817{ 818 struct adapter *sc = iq->adapter; 819 const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); 820 unsigned int tid = GET_TID(cpl); 821 struct synq_entry *synqe = lookup_tid(sc, tid); 822 struct listen_ctx *lctx = synqe->lctx; 823 struct inpcb *inp = lctx->inp; 824 int txqid; 825 struct sge_wrq *ofld_txq; 826#ifdef INVARIANTS 827 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 828#endif 829 830 KASSERT(opcode == CPL_ABORT_REQ_RSS, 831 ("%s: unexpected opcode 0x%x", __func__, opcode)); 832 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 833 KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__)); 834 835 CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d", 836 __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status); 837 838 if (negative_advice(cpl->status)) 839 return (0); /* Ignore negative advice */ 840 841 INP_WLOCK(inp); 842 843 get_qids_from_mbuf(synqe->syn, &txqid, NULL); 844 ofld_txq = &sc->sge.ofld_txq[txqid]; 845 846 /* 847 * If we'd initiated an abort earlier the reply to it is responsible for 848 * cleaning up resources. Otherwise we tear everything down right here 849 * right now. We owe the T4 a CPL_ABORT_RPL no matter what. 850 */ 851 if (synqe->flags & TPF_ABORT_SHUTDOWN) { 852 INP_WUNLOCK(inp); 853 goto done; 854 } 855 856 done_with_synqe(sc, synqe); 857 /* inp lock released by done_with_synqe */ 858done: 859 send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); 860 return (0); 861} 862 863int 864do_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss, 865 struct mbuf *m) 866{ 867 struct adapter *sc = iq->adapter; 868 const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); 869 unsigned int tid = GET_TID(cpl); 870 struct synq_entry *synqe = lookup_tid(sc, tid); 871 struct listen_ctx *lctx = synqe->lctx; 872 struct inpcb *inp = lctx->inp; 873#ifdef INVARIANTS 874 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 875#endif 876 877 KASSERT(opcode == CPL_ABORT_RPL_RSS, 878 ("%s: unexpected opcode 0x%x", __func__, opcode)); 879 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 880 KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__)); 881 882 CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d", 883 __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status); 884 885 INP_WLOCK(inp); 886 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 887 ("%s: wasn't expecting abort reply for synqe %p (0x%x)", 888 __func__, synqe, synqe->flags)); 889 890 done_with_synqe(sc, synqe); 891 /* inp lock released by done_with_synqe */ 892 893 return (0); 894} 895 896void 897t4_offload_socket(struct toedev *tod, void *arg, struct socket *so) 898{ 899 struct adapter *sc = tod->tod_softc; 900 struct synq_entry *synqe = arg; 901#ifdef INVARIANTS 902 struct inpcb *inp = sotoinpcb(so); 903#endif 904 struct cpl_pass_establish *cpl = mtod(synqe->syn, void *); 905 struct toepcb *toep = *(struct toepcb **)(cpl + 1); 906 907 INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */ 908 INP_WLOCK_ASSERT(inp); 909 KASSERT(synqe->flags & TPF_SYNQE, 910 ("%s: %p not a synq_entry?", __func__, arg)); 911 912 offload_socket(so, toep); 913 make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt); 914 toep->flags |= TPF_CPL_PENDING; 915 update_tid(sc, synqe->tid, toep); 916 synqe->flags |= TPF_SYNQE_EXPANDED; 917} 918 919static inline void 920save_qids_in_mbuf(struct mbuf *m, struct port_info *pi) 921{ 922 uint32_t txqid, rxqid; 923 924 txqid = (arc4random() % pi->nofldtxq) + pi->first_ofld_txq; 925 rxqid = (arc4random() % pi->nofldrxq) + pi->first_ofld_rxq; 926 927 m->m_pkthdr.flowid = (txqid << 16) | (rxqid & 0xffff); 928} 929 930static inline void 931get_qids_from_mbuf(struct mbuf *m, int *txqid, int *rxqid) 932{ 933 934 if (txqid) 935 *txqid = m->m_pkthdr.flowid >> 16; 936 if (rxqid) 937 *rxqid = m->m_pkthdr.flowid & 0xffff; 938} 939 940/* 941 * Use the trailing space in the mbuf in which the PASS_ACCEPT_REQ arrived to 942 * store some state temporarily. 943 */ 944static struct synq_entry * 945mbuf_to_synqe(struct mbuf *m) 946{
|
947 int len = roundup(sizeof (struct synq_entry), 8);
| 947 int len = roundup2(sizeof (struct synq_entry), 8);
|
948 int tspace = M_TRAILINGSPACE(m); 949 struct synq_entry *synqe = NULL; 950 951 if (tspace < len) { 952 synqe = malloc(sizeof(*synqe), M_CXGBE, M_NOWAIT); 953 if (synqe == NULL) 954 return (NULL); 955 synqe->flags = TPF_SYNQE | TPF_SYNQE_NEEDFREE; 956 } else { 957 synqe = (void *)(m->m_data + m->m_len + tspace - len); 958 synqe->flags = TPF_SYNQE; 959 } 960 961 return (synqe); 962} 963 964static void 965t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to) 966{ 967 bzero(to, sizeof(*to)); 968 969 if (t4opt->mss) { 970 to->to_flags |= TOF_MSS; 971 to->to_mss = be16toh(t4opt->mss); 972 } 973 974 if (t4opt->wsf) { 975 to->to_flags |= TOF_SCALE; 976 to->to_wscale = t4opt->wsf; 977 } 978 979 if (t4opt->tstamp) 980 to->to_flags |= TOF_TS; 981 982 if (t4opt->sack) 983 to->to_flags |= TOF_SACKPERM; 984} 985 986/* 987 * Options2 for passive open. 988 */ 989static uint32_t 990calc_opt2p(struct adapter *sc, struct port_info *pi, int rxqid, 991 const struct tcp_options *tcpopt, struct tcphdr *th, int ulp_mode) 992{ 993 uint32_t opt2 = 0; 994 struct sge_ofld_rxq *ofld_rxq = &sc->sge.ofld_rxq[rxqid]; 995 996 if (V_tcp_do_rfc1323) { 997 if (tcpopt->tstamp) 998 opt2 |= F_TSTAMPS_EN; 999 if (tcpopt->sack) 1000 opt2 |= F_SACK_EN; 1001 if (tcpopt->wsf > 0) 1002 opt2 |= F_WND_SCALE_EN; 1003 } 1004 1005 if (V_tcp_do_ecn && th->th_flags & (TH_ECE | TH_CWR)) 1006 opt2 |= F_CCTRL_ECN; 1007 1008 opt2 |= V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]);
| 948 int tspace = M_TRAILINGSPACE(m); 949 struct synq_entry *synqe = NULL; 950 951 if (tspace < len) { 952 synqe = malloc(sizeof(*synqe), M_CXGBE, M_NOWAIT); 953 if (synqe == NULL) 954 return (NULL); 955 synqe->flags = TPF_SYNQE | TPF_SYNQE_NEEDFREE; 956 } else { 957 synqe = (void *)(m->m_data + m->m_len + tspace - len); 958 synqe->flags = TPF_SYNQE; 959 } 960 961 return (synqe); 962} 963 964static void 965t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to) 966{ 967 bzero(to, sizeof(*to)); 968 969 if (t4opt->mss) { 970 to->to_flags |= TOF_MSS; 971 to->to_mss = be16toh(t4opt->mss); 972 } 973 974 if (t4opt->wsf) { 975 to->to_flags |= TOF_SCALE; 976 to->to_wscale = t4opt->wsf; 977 } 978 979 if (t4opt->tstamp) 980 to->to_flags |= TOF_TS; 981 982 if (t4opt->sack) 983 to->to_flags |= TOF_SACKPERM; 984} 985 986/* 987 * Options2 for passive open. 988 */ 989static uint32_t 990calc_opt2p(struct adapter *sc, struct port_info *pi, int rxqid, 991 const struct tcp_options *tcpopt, struct tcphdr *th, int ulp_mode) 992{ 993 uint32_t opt2 = 0; 994 struct sge_ofld_rxq *ofld_rxq = &sc->sge.ofld_rxq[rxqid]; 995 996 if (V_tcp_do_rfc1323) { 997 if (tcpopt->tstamp) 998 opt2 |= F_TSTAMPS_EN; 999 if (tcpopt->sack) 1000 opt2 |= F_SACK_EN; 1001 if (tcpopt->wsf > 0) 1002 opt2 |= F_WND_SCALE_EN; 1003 } 1004 1005 if (V_tcp_do_ecn && th->th_flags & (TH_ECE | TH_CWR)) 1006 opt2 |= F_CCTRL_ECN; 1007 1008 opt2 |= V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]);
|
1009 opt2 |= F_RX_COALESCE_VALID | V_RX_COALESCE(M_RX_COALESCE);
| |
1010 opt2 |= F_RSS_QUEUE_VALID | V_RSS_QUEUE(ofld_rxq->iq.abs_id);
| 1009 opt2 |= F_RSS_QUEUE_VALID | V_RSS_QUEUE(ofld_rxq->iq.abs_id);
|
| 1010 if (is_t4(sc)) 1011 opt2 |= F_RX_COALESCE_VALID | V_RX_COALESCE(M_RX_COALESCE); 1012 else 1013 opt2 |= F_T5_OPT_2_VALID | V_RX_COALESCE(M_RX_COALESCE);
|
1011 1012#ifdef USE_DDP_RX_FLOW_CONTROL 1013 if (ulp_mode == ULP_MODE_TCPDDP) 1014 opt2 |= F_RX_FC_VALID | F_RX_FC_DDP; 1015#endif 1016 1017 return htobe32(opt2); 1018} 1019 1020/* XXX: duplication. */ 1021static inline void 1022tcp_fields_to_host(struct tcphdr *th) 1023{ 1024 1025 th->th_seq = ntohl(th->th_seq); 1026 th->th_ack = ntohl(th->th_ack); 1027 th->th_win = ntohs(th->th_win); 1028 th->th_urp = ntohs(th->th_urp); 1029} 1030 1031static void 1032pass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc, 1033 struct tcphdr *th) 1034{ 1035 const struct cpl_pass_accept_req *cpl = mtod(m, const void *); 1036 const struct ether_header *eh; 1037 unsigned int hlen = be32toh(cpl->hdr_len); 1038 uintptr_t l3hdr; 1039 const struct tcphdr *tcp; 1040 1041 eh = (const void *)(cpl + 1); 1042 l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen)); 1043 tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen)); 1044 1045 if (inc) { 1046 bzero(inc, sizeof(*inc)); 1047 inc->inc_fport = tcp->th_sport; 1048 inc->inc_lport = tcp->th_dport; 1049 if (((struct ip *)l3hdr)->ip_v == IPVERSION) { 1050 const struct ip *ip = (const void *)l3hdr; 1051 1052 inc->inc_faddr = ip->ip_src; 1053 inc->inc_laddr = ip->ip_dst; 1054 } else { 1055 const struct ip6_hdr *ip6 = (const void *)l3hdr; 1056 1057 inc->inc_flags |= INC_ISIPV6; 1058 inc->inc6_faddr = ip6->ip6_src; 1059 inc->inc6_laddr = ip6->ip6_dst; 1060 } 1061 } 1062 1063 if (th) { 1064 bcopy(tcp, th, sizeof(*th)); 1065 tcp_fields_to_host(th); /* just like tcp_input */ 1066 } 1067} 1068 1069static int 1070ifnet_has_ip6(struct ifnet *ifp, struct in6_addr *ip6) 1071{ 1072 struct ifaddr *ifa; 1073 struct sockaddr_in6 *sin6; 1074 int found = 0; 1075 struct in6_addr in6 = *ip6; 1076 1077 /* Just as in ip6_input */ 1078 if (in6_clearscope(&in6) || in6_clearscope(&in6)) 1079 return (0); 1080 in6_setscope(&in6, ifp, NULL); 1081 1082 if_addr_rlock(ifp); 1083 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1084 sin6 = (void *)ifa->ifa_addr; 1085 if (sin6->sin6_family != AF_INET6) 1086 continue; 1087 1088 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &in6)) { 1089 found = 1; 1090 break; 1091 } 1092 } 1093 if_addr_runlock(ifp); 1094 1095 return (found); 1096} 1097 1098static struct l2t_entry * 1099get_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp, 1100 struct in_conninfo *inc) 1101{ 1102 struct rtentry *rt; 1103 struct l2t_entry *e; 1104 struct sockaddr_in6 sin6; 1105 struct sockaddr *dst = (void *)&sin6; 1106 1107 if (inc->inc_flags & INC_ISIPV6) { 1108 dst->sa_len = sizeof(struct sockaddr_in6); 1109 dst->sa_family = AF_INET6; 1110 ((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr; 1111 1112 if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) { 1113 /* no need for route lookup */ 1114 e = t4_l2t_get(pi, ifp, dst); 1115 return (e); 1116 } 1117 } else { 1118 dst->sa_len = sizeof(struct sockaddr_in); 1119 dst->sa_family = AF_INET; 1120 ((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr; 1121 } 1122 1123 rt = rtalloc1(dst, 0, 0); 1124 if (rt == NULL) 1125 return (NULL); 1126 else { 1127 struct sockaddr *nexthop; 1128 1129 RT_UNLOCK(rt); 1130 if (rt->rt_ifp != ifp) 1131 e = NULL; 1132 else { 1133 if (rt->rt_flags & RTF_GATEWAY) 1134 nexthop = rt->rt_gateway; 1135 else 1136 nexthop = dst; 1137 e = t4_l2t_get(pi, ifp, nexthop); 1138 } 1139 RTFREE(rt); 1140 } 1141 1142 return (e); 1143} 1144 1145static int 1146ifnet_has_ip(struct ifnet *ifp, struct in_addr in) 1147{ 1148 struct ifaddr *ifa; 1149 struct sockaddr_in *sin; 1150 int found = 0; 1151 1152 if_addr_rlock(ifp); 1153 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1154 sin = (void *)ifa->ifa_addr; 1155 if (sin->sin_family != AF_INET) 1156 continue; 1157 1158 if (sin->sin_addr.s_addr == in.s_addr) { 1159 found = 1; 1160 break; 1161 } 1162 } 1163 if_addr_runlock(ifp); 1164 1165 return (found); 1166} 1167 1168#define REJECT_PASS_ACCEPT() do { \ 1169 reject_reason = __LINE__; \ 1170 goto reject; \ 1171} while (0) 1172 1173/* 1174 * The context associated with a tid entry via insert_tid could be a synq_entry 1175 * or a toepcb. The only way CPL handlers can tell is via a bit in these flags. 1176 */ 1177CTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags)); 1178 1179/* 1180 * Incoming SYN on a listening socket. 1181 * 1182 * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe, 1183 * etc. 1184 */ 1185static int 1186do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, 1187 struct mbuf *m) 1188{ 1189 struct adapter *sc = iq->adapter; 1190 struct toedev *tod; 1191 const struct cpl_pass_accept_req *cpl = mtod(m, const void *); 1192 struct cpl_pass_accept_rpl *rpl; 1193 struct wrqe *wr; 1194 unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid)); 1195 unsigned int tid = GET_TID(cpl); 1196 struct listen_ctx *lctx = lookup_stid(sc, stid); 1197 struct inpcb *inp; 1198 struct socket *so; 1199 struct in_conninfo inc; 1200 struct tcphdr th; 1201 struct tcpopt to; 1202 struct port_info *pi; 1203 struct ifnet *hw_ifp, *ifp; 1204 struct l2t_entry *e = NULL; 1205 int rscale, mtu_idx, rx_credits, rxqid, ulp_mode; 1206 struct synq_entry *synqe = NULL; 1207 int reject_reason; 1208 uint16_t vid; 1209#ifdef INVARIANTS 1210 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1211#endif 1212 1213 KASSERT(opcode == CPL_PASS_ACCEPT_REQ, 1214 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1215 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); 1216 1217 CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid, 1218 lctx); 1219 1220 pass_accept_req_to_protohdrs(m, &inc, &th); 1221 t4opt_to_tcpopt(&cpl->tcpopt, &to); 1222 1223 pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))]; 1224 hw_ifp = pi->ifp; /* the cxgbeX ifnet */ 1225 m->m_pkthdr.rcvif = hw_ifp; 1226 tod = TOEDEV(hw_ifp); 1227 1228 /* 1229 * Figure out if there is a pseudo interface (vlan, lagg, etc.) 1230 * involved. Don't offload if the SYN had a VLAN tag and the vid 1231 * doesn't match anything on this interface. 1232 * 1233 * XXX: lagg support, lagg + vlan support. 1234 */ 1235 vid = EVL_VLANOFTAG(be16toh(cpl->vlan)); 1236 if (vid != 0xfff) { 1237 ifp = VLAN_DEVAT(hw_ifp, vid); 1238 if (ifp == NULL) 1239 REJECT_PASS_ACCEPT(); 1240 } else 1241 ifp = hw_ifp; 1242 1243 /* 1244 * Don't offload if the peer requested a TCP option that's not known to 1245 * the silicon. 1246 */ 1247 if (cpl->tcpopt.unknown) 1248 REJECT_PASS_ACCEPT(); 1249 1250 if (inc.inc_flags & INC_ISIPV6) { 1251 1252 /* Don't offload if the ifcap isn't enabled */ 1253 if ((ifp->if_capenable & IFCAP_TOE6) == 0) 1254 REJECT_PASS_ACCEPT(); 1255 1256 /* 1257 * SYN must be directed to an IP6 address on this ifnet. This 1258 * is more restrictive than in6_localip. 1259 */ 1260 if (!ifnet_has_ip6(ifp, &inc.inc6_laddr)) 1261 REJECT_PASS_ACCEPT(); 1262 } else { 1263 1264 /* Don't offload if the ifcap isn't enabled */ 1265 if ((ifp->if_capenable & IFCAP_TOE4) == 0) 1266 REJECT_PASS_ACCEPT(); 1267 1268 /* 1269 * SYN must be directed to an IP address on this ifnet. This 1270 * is more restrictive than in_localip. 1271 */ 1272 if (!ifnet_has_ip(ifp, inc.inc_laddr)) 1273 REJECT_PASS_ACCEPT(); 1274 } 1275 1276 e = get_l2te_for_nexthop(pi, ifp, &inc); 1277 if (e == NULL) 1278 REJECT_PASS_ACCEPT(); 1279 1280 synqe = mbuf_to_synqe(m); 1281 if (synqe == NULL) 1282 REJECT_PASS_ACCEPT(); 1283 1284 wr = alloc_wrqe(sizeof(*rpl), &sc->sge.ctrlq[pi->port_id]); 1285 if (wr == NULL) 1286 REJECT_PASS_ACCEPT(); 1287 rpl = wrtod(wr); 1288 1289 INP_INFO_WLOCK(&V_tcbinfo); /* for 4-tuple check, syncache_add */ 1290 1291 /* Don't offload if the 4-tuple is already in use */ 1292 if (toe_4tuple_check(&inc, &th, ifp) != 0) { 1293 INP_INFO_WUNLOCK(&V_tcbinfo); 1294 free(wr, M_CXGBE); 1295 REJECT_PASS_ACCEPT(); 1296 } 1297 1298 inp = lctx->inp; /* listening socket, not owned by TOE */ 1299 INP_WLOCK(inp); 1300 1301 /* Don't offload if the listening socket has closed */ 1302 if (__predict_false(inp->inp_flags & INP_DROPPED)) { 1303 /* 1304 * The listening socket has closed. The reply from the TOE to 1305 * our CPL_CLOSE_LISTSRV_REQ will ultimately release all 1306 * resources tied to this listen context. 1307 */ 1308 INP_WUNLOCK(inp); 1309 INP_INFO_WUNLOCK(&V_tcbinfo); 1310 free(wr, M_CXGBE); 1311 REJECT_PASS_ACCEPT(); 1312 } 1313 so = inp->inp_socket; 1314 1315 mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss)); 1316 rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0; 1317 SOCKBUF_LOCK(&so->so_rcv); 1318 /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ 1319 rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ); 1320 SOCKBUF_UNLOCK(&so->so_rcv); 1321 1322 save_qids_in_mbuf(m, pi); 1323 get_qids_from_mbuf(m, NULL, &rxqid); 1324 1325 INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid); 1326 if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) { 1327 ulp_mode = ULP_MODE_TCPDDP; 1328 synqe->flags |= TPF_SYNQE_TCPDDP; 1329 } else 1330 ulp_mode = ULP_MODE_NONE; 1331 rpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, rx_credits, ulp_mode); 1332 rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode); 1333 1334 synqe->tid = tid; 1335 synqe->lctx = lctx; 1336 synqe->syn = m; 1337 m = NULL; 1338 refcount_init(&synqe->refcnt, 1); /* 1 means extra hold */ 1339 synqe->l2e_idx = e->idx; 1340 synqe->rcv_bufsize = rx_credits; 1341 atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr); 1342 1343 insert_tid(sc, tid, synqe); 1344 TAILQ_INSERT_TAIL(&lctx->synq, synqe, link); 1345 hold_synqe(synqe); /* hold for the duration it's in the synq */ 1346 hold_lctx(lctx); /* A synqe on the list has a ref on its lctx */ 1347 1348 /* 1349 * If all goes well t4_syncache_respond will get called during 1350 * syncache_add. Also note that syncache_add releases both pcbinfo and 1351 * pcb locks. 1352 */ 1353 toe_syncache_add(&inc, &to, &th, inp, tod, synqe); 1354 INP_UNLOCK_ASSERT(inp); /* ok to assert, we have a ref on the inp */ 1355 INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); 1356 1357 /* 1358 * If we replied during syncache_add (synqe->wr has been consumed), 1359 * good. Otherwise, set it to 0 so that further syncache_respond 1360 * attempts by the kernel will be ignored. 1361 */ 1362 if (atomic_cmpset_ptr(&synqe->wr, (uintptr_t)wr, 0)) { 1363 1364 /* 1365 * syncache may or may not have a hold on the synqe, which may 1366 * or may not be stashed in the original SYN mbuf passed to us. 1367 * Just copy it over instead of dealing with all possibilities. 1368 */ 1369 m = m_dup(synqe->syn, M_NOWAIT); 1370 if (m) 1371 m->m_pkthdr.rcvif = hw_ifp; 1372 1373 remove_tid(sc, synqe->tid); 1374 free(wr, M_CXGBE); 1375 1376 /* Yank the synqe out of the lctx synq. */ 1377 INP_WLOCK(inp); 1378 TAILQ_REMOVE(&lctx->synq, synqe, link); 1379 release_synqe(synqe); /* removed from synq list */ 1380 inp = release_lctx(sc, lctx); 1381 if (inp) 1382 INP_WUNLOCK(inp); 1383 1384 release_synqe(synqe); /* extra hold */ 1385 REJECT_PASS_ACCEPT(); 1386 } 1387 1388 CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, SYNACK", 1389 __func__, stid, tid, lctx, synqe); 1390 1391 INP_WLOCK(inp); 1392 synqe->flags |= TPF_SYNQE_HAS_L2TE; 1393 if (__predict_false(inp->inp_flags & INP_DROPPED)) { 1394 /* 1395 * Listening socket closed but tod_listen_stop did not abort 1396 * this tid because there was no L2T entry for the tid at that 1397 * time. Abort it now. The reply to the abort will clean up. 1398 */ 1399 CTR6(KTR_CXGBE, 1400 "%s: stid %u, tid %u, lctx %p, synqe %p (0x%x), ABORT", 1401 __func__, stid, tid, lctx, synqe, synqe->flags); 1402 if (!(synqe->flags & TPF_SYNQE_EXPANDED)) 1403 send_reset_synqe(tod, synqe); 1404 INP_WUNLOCK(inp); 1405 1406 release_synqe(synqe); /* extra hold */ 1407 return (__LINE__); 1408 } 1409 INP_WUNLOCK(inp); 1410 1411 release_synqe(synqe); /* extra hold */ 1412 return (0); 1413reject: 1414 CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid, 1415 reject_reason); 1416 1417 if (e) 1418 t4_l2t_release(e); 1419 release_tid(sc, tid, lctx->ctrlq); 1420 1421 if (__predict_true(m != NULL)) { 1422 m_adj(m, sizeof(*cpl)); 1423 m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | 1424 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1425 m->m_pkthdr.csum_data = 0xffff; 1426 hw_ifp->if_input(hw_ifp, m); 1427 } 1428 1429 return (reject_reason); 1430} 1431 1432static void 1433synqe_to_protohdrs(struct synq_entry *synqe, 1434 const struct cpl_pass_establish *cpl, struct in_conninfo *inc, 1435 struct tcphdr *th, struct tcpopt *to) 1436{ 1437 uint16_t tcp_opt = be16toh(cpl->tcp_opt); 1438 1439 /* start off with the original SYN */ 1440 pass_accept_req_to_protohdrs(synqe->syn, inc, th); 1441 1442 /* modify parts to make it look like the ACK to our SYN|ACK */ 1443 th->th_flags = TH_ACK; 1444 th->th_ack = synqe->iss + 1; 1445 th->th_seq = be32toh(cpl->rcv_isn); 1446 bzero(to, sizeof(*to)); 1447 if (G_TCPOPT_TSTAMP(tcp_opt)) { 1448 to->to_flags |= TOF_TS; 1449 to->to_tsecr = synqe->ts; 1450 } 1451} 1452 1453static int 1454do_pass_establish(struct sge_iq *iq, const struct rss_header *rss, 1455 struct mbuf *m) 1456{ 1457 struct adapter *sc = iq->adapter; 1458 struct port_info *pi; 1459 struct ifnet *ifp; 1460 const struct cpl_pass_establish *cpl = (const void *)(rss + 1); 1461#if defined(KTR) || defined(INVARIANTS) 1462 unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid)); 1463#endif 1464 unsigned int tid = GET_TID(cpl); 1465 struct synq_entry *synqe = lookup_tid(sc, tid); 1466 struct listen_ctx *lctx = synqe->lctx; 1467 struct inpcb *inp = lctx->inp; 1468 struct socket *so; 1469 struct tcphdr th; 1470 struct tcpopt to; 1471 struct in_conninfo inc; 1472 struct toepcb *toep; 1473 u_int txqid, rxqid; 1474#ifdef INVARIANTS 1475 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1476#endif 1477 1478 KASSERT(opcode == CPL_PASS_ESTABLISH, 1479 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1480 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1481 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); 1482 KASSERT(synqe->flags & TPF_SYNQE, 1483 ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe)); 1484 1485 INP_INFO_WLOCK(&V_tcbinfo); /* for syncache_expand */ 1486 INP_WLOCK(inp); 1487 1488 CTR6(KTR_CXGBE, 1489 "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x", 1490 __func__, stid, tid, synqe, synqe->flags, inp->inp_flags); 1491 1492 if (__predict_false(inp->inp_flags & INP_DROPPED)) { 1493 1494 if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1495 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1496 ("%s: listen socket closed but tid %u not aborted.", 1497 __func__, tid)); 1498 } 1499 1500 INP_WUNLOCK(inp); 1501 INP_INFO_WUNLOCK(&V_tcbinfo); 1502 return (0); 1503 } 1504 1505 ifp = synqe->syn->m_pkthdr.rcvif; 1506 pi = ifp->if_softc; 1507 KASSERT(pi->adapter == sc, 1508 ("%s: pi %p, sc %p mismatch", __func__, pi, sc)); 1509 1510 get_qids_from_mbuf(synqe->syn, &txqid, &rxqid); 1511 KASSERT(rxqid == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0], 1512 ("%s: CPL arrived on unexpected rxq. %d %d", __func__, rxqid, 1513 (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0]))); 1514 1515 toep = alloc_toepcb(pi, txqid, rxqid, M_NOWAIT); 1516 if (toep == NULL) { 1517reset: 1518 /* 1519 * The reply to this abort will perform final cleanup. There is 1520 * no need to check for HAS_L2TE here. We can be here only if 1521 * we responded to the PASS_ACCEPT_REQ, and our response had the 1522 * L2T idx. 1523 */ 1524 send_reset_synqe(TOEDEV(ifp), synqe); 1525 INP_WUNLOCK(inp); 1526 INP_INFO_WUNLOCK(&V_tcbinfo); 1527 return (0); 1528 } 1529 toep->tid = tid; 1530 toep->l2te = &sc->l2t->l2tab[synqe->l2e_idx]; 1531 if (synqe->flags & TPF_SYNQE_TCPDDP) 1532 set_tcpddp_ulp_mode(toep); 1533 else 1534 toep->ulp_mode = ULP_MODE_NONE; 1535 /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ 1536 toep->rx_credits = synqe->rcv_bufsize; 1537 1538 so = inp->inp_socket; 1539 KASSERT(so != NULL, ("%s: socket is NULL", __func__)); 1540 1541 /* Come up with something that syncache_expand should be ok with. */ 1542 synqe_to_protohdrs(synqe, cpl, &inc, &th, &to); 1543 1544 /* 1545 * No more need for anything in the mbuf that carried the 1546 * CPL_PASS_ACCEPT_REQ. Drop the CPL_PASS_ESTABLISH and toep pointer 1547 * there. XXX: bad form but I don't want to increase the size of synqe. 1548 */ 1549 m = synqe->syn; 1550 KASSERT(sizeof(*cpl) + sizeof(toep) <= m->m_len, 1551 ("%s: no room in mbuf %p (m_len %d)", __func__, m, m->m_len)); 1552 bcopy(cpl, mtod(m, void *), sizeof(*cpl)); 1553 *(struct toepcb **)(mtod(m, struct cpl_pass_establish *) + 1) = toep; 1554 1555 if (!toe_syncache_expand(&inc, &to, &th, &so) || so == NULL) { 1556 free_toepcb(toep); 1557 goto reset; 1558 } 1559 1560 /* 1561 * This is for the unlikely case where the syncache entry that we added 1562 * has been evicted from the syncache, but the syncache_expand above 1563 * works because of syncookies. 1564 * 1565 * XXX: we've held the tcbinfo lock throughout so there's no risk of 1566 * anyone accept'ing a connection before we've installed our hooks, but 1567 * this somewhat defeats the purpose of having a tod_offload_socket :-( 1568 */ 1569 if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) { 1570 struct inpcb *new_inp = sotoinpcb(so); 1571 1572 INP_WLOCK(new_inp); 1573 tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0); 1574 t4_offload_socket(TOEDEV(ifp), synqe, so); 1575 INP_WUNLOCK(new_inp); 1576 } 1577 1578 /* Done with the synqe */ 1579 TAILQ_REMOVE(&lctx->synq, synqe, link); 1580 inp = release_lctx(sc, lctx); 1581 if (inp != NULL) 1582 INP_WUNLOCK(inp); 1583 INP_INFO_WUNLOCK(&V_tcbinfo); 1584 release_synqe(synqe); 1585 1586 return (0); 1587} 1588 1589void 1590t4_init_listen_cpl_handlers(struct adapter *sc) 1591{ 1592 1593 t4_register_cpl_handler(sc, CPL_PASS_OPEN_RPL, do_pass_open_rpl); 1594 t4_register_cpl_handler(sc, CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl); 1595 t4_register_cpl_handler(sc, CPL_PASS_ACCEPT_REQ, do_pass_accept_req); 1596 t4_register_cpl_handler(sc, CPL_PASS_ESTABLISH, do_pass_establish); 1597} 1598#endif
| 1014 1015#ifdef USE_DDP_RX_FLOW_CONTROL 1016 if (ulp_mode == ULP_MODE_TCPDDP) 1017 opt2 |= F_RX_FC_VALID | F_RX_FC_DDP; 1018#endif 1019 1020 return htobe32(opt2); 1021} 1022 1023/* XXX: duplication. */ 1024static inline void 1025tcp_fields_to_host(struct tcphdr *th) 1026{ 1027 1028 th->th_seq = ntohl(th->th_seq); 1029 th->th_ack = ntohl(th->th_ack); 1030 th->th_win = ntohs(th->th_win); 1031 th->th_urp = ntohs(th->th_urp); 1032} 1033 1034static void 1035pass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc, 1036 struct tcphdr *th) 1037{ 1038 const struct cpl_pass_accept_req *cpl = mtod(m, const void *); 1039 const struct ether_header *eh; 1040 unsigned int hlen = be32toh(cpl->hdr_len); 1041 uintptr_t l3hdr; 1042 const struct tcphdr *tcp; 1043 1044 eh = (const void *)(cpl + 1); 1045 l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen)); 1046 tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen)); 1047 1048 if (inc) { 1049 bzero(inc, sizeof(*inc)); 1050 inc->inc_fport = tcp->th_sport; 1051 inc->inc_lport = tcp->th_dport; 1052 if (((struct ip *)l3hdr)->ip_v == IPVERSION) { 1053 const struct ip *ip = (const void *)l3hdr; 1054 1055 inc->inc_faddr = ip->ip_src; 1056 inc->inc_laddr = ip->ip_dst; 1057 } else { 1058 const struct ip6_hdr *ip6 = (const void *)l3hdr; 1059 1060 inc->inc_flags |= INC_ISIPV6; 1061 inc->inc6_faddr = ip6->ip6_src; 1062 inc->inc6_laddr = ip6->ip6_dst; 1063 } 1064 } 1065 1066 if (th) { 1067 bcopy(tcp, th, sizeof(*th)); 1068 tcp_fields_to_host(th); /* just like tcp_input */ 1069 } 1070} 1071 1072static int 1073ifnet_has_ip6(struct ifnet *ifp, struct in6_addr *ip6) 1074{ 1075 struct ifaddr *ifa; 1076 struct sockaddr_in6 *sin6; 1077 int found = 0; 1078 struct in6_addr in6 = *ip6; 1079 1080 /* Just as in ip6_input */ 1081 if (in6_clearscope(&in6) || in6_clearscope(&in6)) 1082 return (0); 1083 in6_setscope(&in6, ifp, NULL); 1084 1085 if_addr_rlock(ifp); 1086 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1087 sin6 = (void *)ifa->ifa_addr; 1088 if (sin6->sin6_family != AF_INET6) 1089 continue; 1090 1091 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &in6)) { 1092 found = 1; 1093 break; 1094 } 1095 } 1096 if_addr_runlock(ifp); 1097 1098 return (found); 1099} 1100 1101static struct l2t_entry * 1102get_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp, 1103 struct in_conninfo *inc) 1104{ 1105 struct rtentry *rt; 1106 struct l2t_entry *e; 1107 struct sockaddr_in6 sin6; 1108 struct sockaddr *dst = (void *)&sin6; 1109 1110 if (inc->inc_flags & INC_ISIPV6) { 1111 dst->sa_len = sizeof(struct sockaddr_in6); 1112 dst->sa_family = AF_INET6; 1113 ((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr; 1114 1115 if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) { 1116 /* no need for route lookup */ 1117 e = t4_l2t_get(pi, ifp, dst); 1118 return (e); 1119 } 1120 } else { 1121 dst->sa_len = sizeof(struct sockaddr_in); 1122 dst->sa_family = AF_INET; 1123 ((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr; 1124 } 1125 1126 rt = rtalloc1(dst, 0, 0); 1127 if (rt == NULL) 1128 return (NULL); 1129 else { 1130 struct sockaddr *nexthop; 1131 1132 RT_UNLOCK(rt); 1133 if (rt->rt_ifp != ifp) 1134 e = NULL; 1135 else { 1136 if (rt->rt_flags & RTF_GATEWAY) 1137 nexthop = rt->rt_gateway; 1138 else 1139 nexthop = dst; 1140 e = t4_l2t_get(pi, ifp, nexthop); 1141 } 1142 RTFREE(rt); 1143 } 1144 1145 return (e); 1146} 1147 1148static int 1149ifnet_has_ip(struct ifnet *ifp, struct in_addr in) 1150{ 1151 struct ifaddr *ifa; 1152 struct sockaddr_in *sin; 1153 int found = 0; 1154 1155 if_addr_rlock(ifp); 1156 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1157 sin = (void *)ifa->ifa_addr; 1158 if (sin->sin_family != AF_INET) 1159 continue; 1160 1161 if (sin->sin_addr.s_addr == in.s_addr) { 1162 found = 1; 1163 break; 1164 } 1165 } 1166 if_addr_runlock(ifp); 1167 1168 return (found); 1169} 1170 1171#define REJECT_PASS_ACCEPT() do { \ 1172 reject_reason = __LINE__; \ 1173 goto reject; \ 1174} while (0) 1175 1176/* 1177 * The context associated with a tid entry via insert_tid could be a synq_entry 1178 * or a toepcb. The only way CPL handlers can tell is via a bit in these flags. 1179 */ 1180CTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags)); 1181 1182/* 1183 * Incoming SYN on a listening socket. 1184 * 1185 * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe, 1186 * etc. 1187 */ 1188static int 1189do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, 1190 struct mbuf *m) 1191{ 1192 struct adapter *sc = iq->adapter; 1193 struct toedev *tod; 1194 const struct cpl_pass_accept_req *cpl = mtod(m, const void *); 1195 struct cpl_pass_accept_rpl *rpl; 1196 struct wrqe *wr; 1197 unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid)); 1198 unsigned int tid = GET_TID(cpl); 1199 struct listen_ctx *lctx = lookup_stid(sc, stid); 1200 struct inpcb *inp; 1201 struct socket *so; 1202 struct in_conninfo inc; 1203 struct tcphdr th; 1204 struct tcpopt to; 1205 struct port_info *pi; 1206 struct ifnet *hw_ifp, *ifp; 1207 struct l2t_entry *e = NULL; 1208 int rscale, mtu_idx, rx_credits, rxqid, ulp_mode; 1209 struct synq_entry *synqe = NULL; 1210 int reject_reason; 1211 uint16_t vid; 1212#ifdef INVARIANTS 1213 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1214#endif 1215 1216 KASSERT(opcode == CPL_PASS_ACCEPT_REQ, 1217 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1218 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); 1219 1220 CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid, 1221 lctx); 1222 1223 pass_accept_req_to_protohdrs(m, &inc, &th); 1224 t4opt_to_tcpopt(&cpl->tcpopt, &to); 1225 1226 pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))]; 1227 hw_ifp = pi->ifp; /* the cxgbeX ifnet */ 1228 m->m_pkthdr.rcvif = hw_ifp; 1229 tod = TOEDEV(hw_ifp); 1230 1231 /* 1232 * Figure out if there is a pseudo interface (vlan, lagg, etc.) 1233 * involved. Don't offload if the SYN had a VLAN tag and the vid 1234 * doesn't match anything on this interface. 1235 * 1236 * XXX: lagg support, lagg + vlan support. 1237 */ 1238 vid = EVL_VLANOFTAG(be16toh(cpl->vlan)); 1239 if (vid != 0xfff) { 1240 ifp = VLAN_DEVAT(hw_ifp, vid); 1241 if (ifp == NULL) 1242 REJECT_PASS_ACCEPT(); 1243 } else 1244 ifp = hw_ifp; 1245 1246 /* 1247 * Don't offload if the peer requested a TCP option that's not known to 1248 * the silicon. 1249 */ 1250 if (cpl->tcpopt.unknown) 1251 REJECT_PASS_ACCEPT(); 1252 1253 if (inc.inc_flags & INC_ISIPV6) { 1254 1255 /* Don't offload if the ifcap isn't enabled */ 1256 if ((ifp->if_capenable & IFCAP_TOE6) == 0) 1257 REJECT_PASS_ACCEPT(); 1258 1259 /* 1260 * SYN must be directed to an IP6 address on this ifnet. This 1261 * is more restrictive than in6_localip. 1262 */ 1263 if (!ifnet_has_ip6(ifp, &inc.inc6_laddr)) 1264 REJECT_PASS_ACCEPT(); 1265 } else { 1266 1267 /* Don't offload if the ifcap isn't enabled */ 1268 if ((ifp->if_capenable & IFCAP_TOE4) == 0) 1269 REJECT_PASS_ACCEPT(); 1270 1271 /* 1272 * SYN must be directed to an IP address on this ifnet. This 1273 * is more restrictive than in_localip. 1274 */ 1275 if (!ifnet_has_ip(ifp, inc.inc_laddr)) 1276 REJECT_PASS_ACCEPT(); 1277 } 1278 1279 e = get_l2te_for_nexthop(pi, ifp, &inc); 1280 if (e == NULL) 1281 REJECT_PASS_ACCEPT(); 1282 1283 synqe = mbuf_to_synqe(m); 1284 if (synqe == NULL) 1285 REJECT_PASS_ACCEPT(); 1286 1287 wr = alloc_wrqe(sizeof(*rpl), &sc->sge.ctrlq[pi->port_id]); 1288 if (wr == NULL) 1289 REJECT_PASS_ACCEPT(); 1290 rpl = wrtod(wr); 1291 1292 INP_INFO_WLOCK(&V_tcbinfo); /* for 4-tuple check, syncache_add */ 1293 1294 /* Don't offload if the 4-tuple is already in use */ 1295 if (toe_4tuple_check(&inc, &th, ifp) != 0) { 1296 INP_INFO_WUNLOCK(&V_tcbinfo); 1297 free(wr, M_CXGBE); 1298 REJECT_PASS_ACCEPT(); 1299 } 1300 1301 inp = lctx->inp; /* listening socket, not owned by TOE */ 1302 INP_WLOCK(inp); 1303 1304 /* Don't offload if the listening socket has closed */ 1305 if (__predict_false(inp->inp_flags & INP_DROPPED)) { 1306 /* 1307 * The listening socket has closed. The reply from the TOE to 1308 * our CPL_CLOSE_LISTSRV_REQ will ultimately release all 1309 * resources tied to this listen context. 1310 */ 1311 INP_WUNLOCK(inp); 1312 INP_INFO_WUNLOCK(&V_tcbinfo); 1313 free(wr, M_CXGBE); 1314 REJECT_PASS_ACCEPT(); 1315 } 1316 so = inp->inp_socket; 1317 1318 mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss)); 1319 rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0; 1320 SOCKBUF_LOCK(&so->so_rcv); 1321 /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ 1322 rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ); 1323 SOCKBUF_UNLOCK(&so->so_rcv); 1324 1325 save_qids_in_mbuf(m, pi); 1326 get_qids_from_mbuf(m, NULL, &rxqid); 1327 1328 INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid); 1329 if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) { 1330 ulp_mode = ULP_MODE_TCPDDP; 1331 synqe->flags |= TPF_SYNQE_TCPDDP; 1332 } else 1333 ulp_mode = ULP_MODE_NONE; 1334 rpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, rx_credits, ulp_mode); 1335 rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode); 1336 1337 synqe->tid = tid; 1338 synqe->lctx = lctx; 1339 synqe->syn = m; 1340 m = NULL; 1341 refcount_init(&synqe->refcnt, 1); /* 1 means extra hold */ 1342 synqe->l2e_idx = e->idx; 1343 synqe->rcv_bufsize = rx_credits; 1344 atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr); 1345 1346 insert_tid(sc, tid, synqe); 1347 TAILQ_INSERT_TAIL(&lctx->synq, synqe, link); 1348 hold_synqe(synqe); /* hold for the duration it's in the synq */ 1349 hold_lctx(lctx); /* A synqe on the list has a ref on its lctx */ 1350 1351 /* 1352 * If all goes well t4_syncache_respond will get called during 1353 * syncache_add. Also note that syncache_add releases both pcbinfo and 1354 * pcb locks. 1355 */ 1356 toe_syncache_add(&inc, &to, &th, inp, tod, synqe); 1357 INP_UNLOCK_ASSERT(inp); /* ok to assert, we have a ref on the inp */ 1358 INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); 1359 1360 /* 1361 * If we replied during syncache_add (synqe->wr has been consumed), 1362 * good. Otherwise, set it to 0 so that further syncache_respond 1363 * attempts by the kernel will be ignored. 1364 */ 1365 if (atomic_cmpset_ptr(&synqe->wr, (uintptr_t)wr, 0)) { 1366 1367 /* 1368 * syncache may or may not have a hold on the synqe, which may 1369 * or may not be stashed in the original SYN mbuf passed to us. 1370 * Just copy it over instead of dealing with all possibilities. 1371 */ 1372 m = m_dup(synqe->syn, M_NOWAIT); 1373 if (m) 1374 m->m_pkthdr.rcvif = hw_ifp; 1375 1376 remove_tid(sc, synqe->tid); 1377 free(wr, M_CXGBE); 1378 1379 /* Yank the synqe out of the lctx synq. */ 1380 INP_WLOCK(inp); 1381 TAILQ_REMOVE(&lctx->synq, synqe, link); 1382 release_synqe(synqe); /* removed from synq list */ 1383 inp = release_lctx(sc, lctx); 1384 if (inp) 1385 INP_WUNLOCK(inp); 1386 1387 release_synqe(synqe); /* extra hold */ 1388 REJECT_PASS_ACCEPT(); 1389 } 1390 1391 CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, SYNACK", 1392 __func__, stid, tid, lctx, synqe); 1393 1394 INP_WLOCK(inp); 1395 synqe->flags |= TPF_SYNQE_HAS_L2TE; 1396 if (__predict_false(inp->inp_flags & INP_DROPPED)) { 1397 /* 1398 * Listening socket closed but tod_listen_stop did not abort 1399 * this tid because there was no L2T entry for the tid at that 1400 * time. Abort it now. The reply to the abort will clean up. 1401 */ 1402 CTR6(KTR_CXGBE, 1403 "%s: stid %u, tid %u, lctx %p, synqe %p (0x%x), ABORT", 1404 __func__, stid, tid, lctx, synqe, synqe->flags); 1405 if (!(synqe->flags & TPF_SYNQE_EXPANDED)) 1406 send_reset_synqe(tod, synqe); 1407 INP_WUNLOCK(inp); 1408 1409 release_synqe(synqe); /* extra hold */ 1410 return (__LINE__); 1411 } 1412 INP_WUNLOCK(inp); 1413 1414 release_synqe(synqe); /* extra hold */ 1415 return (0); 1416reject: 1417 CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid, 1418 reject_reason); 1419 1420 if (e) 1421 t4_l2t_release(e); 1422 release_tid(sc, tid, lctx->ctrlq); 1423 1424 if (__predict_true(m != NULL)) { 1425 m_adj(m, sizeof(*cpl)); 1426 m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | 1427 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1428 m->m_pkthdr.csum_data = 0xffff; 1429 hw_ifp->if_input(hw_ifp, m); 1430 } 1431 1432 return (reject_reason); 1433} 1434 1435static void 1436synqe_to_protohdrs(struct synq_entry *synqe, 1437 const struct cpl_pass_establish *cpl, struct in_conninfo *inc, 1438 struct tcphdr *th, struct tcpopt *to) 1439{ 1440 uint16_t tcp_opt = be16toh(cpl->tcp_opt); 1441 1442 /* start off with the original SYN */ 1443 pass_accept_req_to_protohdrs(synqe->syn, inc, th); 1444 1445 /* modify parts to make it look like the ACK to our SYN|ACK */ 1446 th->th_flags = TH_ACK; 1447 th->th_ack = synqe->iss + 1; 1448 th->th_seq = be32toh(cpl->rcv_isn); 1449 bzero(to, sizeof(*to)); 1450 if (G_TCPOPT_TSTAMP(tcp_opt)) { 1451 to->to_flags |= TOF_TS; 1452 to->to_tsecr = synqe->ts; 1453 } 1454} 1455 1456static int 1457do_pass_establish(struct sge_iq *iq, const struct rss_header *rss, 1458 struct mbuf *m) 1459{ 1460 struct adapter *sc = iq->adapter; 1461 struct port_info *pi; 1462 struct ifnet *ifp; 1463 const struct cpl_pass_establish *cpl = (const void *)(rss + 1); 1464#if defined(KTR) || defined(INVARIANTS) 1465 unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid)); 1466#endif 1467 unsigned int tid = GET_TID(cpl); 1468 struct synq_entry *synqe = lookup_tid(sc, tid); 1469 struct listen_ctx *lctx = synqe->lctx; 1470 struct inpcb *inp = lctx->inp; 1471 struct socket *so; 1472 struct tcphdr th; 1473 struct tcpopt to; 1474 struct in_conninfo inc; 1475 struct toepcb *toep; 1476 u_int txqid, rxqid; 1477#ifdef INVARIANTS 1478 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1479#endif 1480 1481 KASSERT(opcode == CPL_PASS_ESTABLISH, 1482 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1483 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1484 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); 1485 KASSERT(synqe->flags & TPF_SYNQE, 1486 ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe)); 1487 1488 INP_INFO_WLOCK(&V_tcbinfo); /* for syncache_expand */ 1489 INP_WLOCK(inp); 1490 1491 CTR6(KTR_CXGBE, 1492 "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x", 1493 __func__, stid, tid, synqe, synqe->flags, inp->inp_flags); 1494 1495 if (__predict_false(inp->inp_flags & INP_DROPPED)) { 1496 1497 if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1498 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1499 ("%s: listen socket closed but tid %u not aborted.", 1500 __func__, tid)); 1501 } 1502 1503 INP_WUNLOCK(inp); 1504 INP_INFO_WUNLOCK(&V_tcbinfo); 1505 return (0); 1506 } 1507 1508 ifp = synqe->syn->m_pkthdr.rcvif; 1509 pi = ifp->if_softc; 1510 KASSERT(pi->adapter == sc, 1511 ("%s: pi %p, sc %p mismatch", __func__, pi, sc)); 1512 1513 get_qids_from_mbuf(synqe->syn, &txqid, &rxqid); 1514 KASSERT(rxqid == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0], 1515 ("%s: CPL arrived on unexpected rxq. %d %d", __func__, rxqid, 1516 (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0]))); 1517 1518 toep = alloc_toepcb(pi, txqid, rxqid, M_NOWAIT); 1519 if (toep == NULL) { 1520reset: 1521 /* 1522 * The reply to this abort will perform final cleanup. There is 1523 * no need to check for HAS_L2TE here. We can be here only if 1524 * we responded to the PASS_ACCEPT_REQ, and our response had the 1525 * L2T idx. 1526 */ 1527 send_reset_synqe(TOEDEV(ifp), synqe); 1528 INP_WUNLOCK(inp); 1529 INP_INFO_WUNLOCK(&V_tcbinfo); 1530 return (0); 1531 } 1532 toep->tid = tid; 1533 toep->l2te = &sc->l2t->l2tab[synqe->l2e_idx]; 1534 if (synqe->flags & TPF_SYNQE_TCPDDP) 1535 set_tcpddp_ulp_mode(toep); 1536 else 1537 toep->ulp_mode = ULP_MODE_NONE; 1538 /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ 1539 toep->rx_credits = synqe->rcv_bufsize; 1540 1541 so = inp->inp_socket; 1542 KASSERT(so != NULL, ("%s: socket is NULL", __func__)); 1543 1544 /* Come up with something that syncache_expand should be ok with. */ 1545 synqe_to_protohdrs(synqe, cpl, &inc, &th, &to); 1546 1547 /* 1548 * No more need for anything in the mbuf that carried the 1549 * CPL_PASS_ACCEPT_REQ. Drop the CPL_PASS_ESTABLISH and toep pointer 1550 * there. XXX: bad form but I don't want to increase the size of synqe. 1551 */ 1552 m = synqe->syn; 1553 KASSERT(sizeof(*cpl) + sizeof(toep) <= m->m_len, 1554 ("%s: no room in mbuf %p (m_len %d)", __func__, m, m->m_len)); 1555 bcopy(cpl, mtod(m, void *), sizeof(*cpl)); 1556 *(struct toepcb **)(mtod(m, struct cpl_pass_establish *) + 1) = toep; 1557 1558 if (!toe_syncache_expand(&inc, &to, &th, &so) || so == NULL) { 1559 free_toepcb(toep); 1560 goto reset; 1561 } 1562 1563 /* 1564 * This is for the unlikely case where the syncache entry that we added 1565 * has been evicted from the syncache, but the syncache_expand above 1566 * works because of syncookies. 1567 * 1568 * XXX: we've held the tcbinfo lock throughout so there's no risk of 1569 * anyone accept'ing a connection before we've installed our hooks, but 1570 * this somewhat defeats the purpose of having a tod_offload_socket :-( 1571 */ 1572 if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) { 1573 struct inpcb *new_inp = sotoinpcb(so); 1574 1575 INP_WLOCK(new_inp); 1576 tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0); 1577 t4_offload_socket(TOEDEV(ifp), synqe, so); 1578 INP_WUNLOCK(new_inp); 1579 } 1580 1581 /* Done with the synqe */ 1582 TAILQ_REMOVE(&lctx->synq, synqe, link); 1583 inp = release_lctx(sc, lctx); 1584 if (inp != NULL) 1585 INP_WUNLOCK(inp); 1586 INP_INFO_WUNLOCK(&V_tcbinfo); 1587 release_synqe(synqe); 1588 1589 return (0); 1590} 1591 1592void 1593t4_init_listen_cpl_handlers(struct adapter *sc) 1594{ 1595 1596 t4_register_cpl_handler(sc, CPL_PASS_OPEN_RPL, do_pass_open_rpl); 1597 t4_register_cpl_handler(sc, CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl); 1598 t4_register_cpl_handler(sc, CPL_PASS_ACCEPT_REQ, do_pass_accept_req); 1599 t4_register_cpl_handler(sc, CPL_PASS_ESTABLISH, do_pass_establish); 1600} 1601#endif
|