t4_tom_l2t.c revision 302339
1237263Snp/*- 2237263Snp * Copyright (c) 2012 Chelsio Communications, Inc. 3237263Snp * All rights reserved. 4237263Snp * 5237263Snp * Redistribution and use in source and binary forms, with or without 6237263Snp * modification, are permitted provided that the following conditions 7237263Snp * are met: 8237263Snp * 1. Redistributions of source code must retain the above copyright 9237263Snp * notice, this list of conditions and the following disclaimer. 10237263Snp * 2. Redistributions in binary form must reproduce the above copyright 11237263Snp * notice, this list of conditions and the following disclaimer in the 12237263Snp * documentation and/or other materials provided with the distribution. 13237263Snp * 14237263Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15237263Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16237263Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17237263Snp * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18237263Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19237263Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20237263Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21237263Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22237263Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23237263Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24237263Snp * SUCH DAMAGE. 25237263Snp */ 26237263Snp#include <sys/cdefs.h> 27237263Snp__FBSDID("$FreeBSD: head/sys/dev/cxgbe/tom/t4_tom_l2t.c 302339 2016-07-05 01:29:24Z np $"); 28237263Snp 29237263Snp#include "opt_inet.h" 30245434Snp#include "opt_inet6.h" 31237263Snp 32237263Snp#ifdef TCP_OFFLOAD 33237263Snp#include <sys/param.h> 34237263Snp#include <sys/systm.h> 35237263Snp#include <sys/kernel.h> 36237263Snp#include <sys/module.h> 37237263Snp#include <sys/bus.h> 38245434Snp#include <sys/fnv_hash.h> 39237263Snp#include <sys/lock.h> 40237263Snp#include <sys/mutex.h> 41237263Snp#include <sys/rwlock.h> 42237263Snp#include <sys/socket.h> 43237263Snp#include <sys/sbuf.h> 44272719Snp#include <sys/taskqueue.h> 45237263Snp#include <net/if.h> 46237263Snp#include <net/if_types.h> 47237263Snp#include <net/ethernet.h> 48237263Snp#include <net/if_vlan_var.h> 49237263Snp#include <net/route.h> 50237263Snp#include <netinet/in.h> 51237263Snp#include <netinet/toecore.h> 52237263Snp 53237263Snp#include "common/common.h" 54237263Snp#include "common/t4_msg.h" 55237263Snp#include "tom/t4_tom_l2t.h" 56237263Snp#include "tom/t4_tom.h" 57237263Snp 58237263Snp#define VLAN_NONE 0xfff 59237263Snp 60237263Snpstatic inline void 61237263Snpl2t_hold(struct l2t_data *d, struct l2t_entry *e) 62237263Snp{ 63245434Snp 64237263Snp if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */ 65237263Snp atomic_subtract_int(&d->nfree, 1); 66237263Snp} 67237263Snp 68245434Snpstatic inline u_int 69245434Snpl2_hash(struct l2t_data *d, const struct sockaddr *sa, int ifindex) 70237263Snp{ 71245434Snp u_int hash, half = d->l2t_size / 2, start = 0; 72245434Snp const void *key; 73245434Snp size_t len; 74245434Snp 75245434Snp KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 76245434Snp ("%s: sa %p has unexpected sa_family %d", __func__, sa, 77245434Snp sa->sa_family)); 78245434Snp 79245434Snp if (sa->sa_family == AF_INET) { 80245434Snp const struct sockaddr_in *sin = (const void *)sa; 81245434Snp 82245434Snp key = &sin->sin_addr; 83245434Snp len = sizeof(sin->sin_addr); 84245434Snp } else { 85245434Snp const struct sockaddr_in6 *sin6 = (const void *)sa; 86245434Snp 87245434Snp key = &sin6->sin6_addr; 88245434Snp len = sizeof(sin6->sin6_addr); 89245434Snp start = half; 90245434Snp } 91245434Snp 92245434Snp hash = fnv_32_buf(key, len, FNV1_32_INIT); 93245434Snp hash = fnv_32_buf(&ifindex, sizeof(ifindex), hash); 94245434Snp hash %= half; 95245434Snp 96245434Snp return (hash + start); 97237263Snp} 98237263Snp 99245434Snpstatic inline int 100245434Snpl2_cmp(const struct sockaddr *sa, struct l2t_entry *e) 101245434Snp{ 102245434Snp 103245434Snp KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 104245434Snp ("%s: sa %p has unexpected sa_family %d", __func__, sa, 105245434Snp sa->sa_family)); 106245434Snp 107245434Snp if (sa->sa_family == AF_INET) { 108245434Snp const struct sockaddr_in *sin = (const void *)sa; 109245434Snp 110245434Snp return (e->addr[0] != sin->sin_addr.s_addr); 111245434Snp } else { 112245434Snp const struct sockaddr_in6 *sin6 = (const void *)sa; 113245434Snp 114245434Snp return (memcmp(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr))); 115245434Snp } 116245434Snp} 117245434Snp 118245434Snpstatic inline void 119245434Snpl2_store(const struct sockaddr *sa, struct l2t_entry *e) 120245434Snp{ 121245434Snp 122245434Snp KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 123245434Snp ("%s: sa %p has unexpected sa_family %d", __func__, sa, 124245434Snp sa->sa_family)); 125245434Snp 126245434Snp if (sa->sa_family == AF_INET) { 127245434Snp const struct sockaddr_in *sin = (const void *)sa; 128245434Snp 129245434Snp e->addr[0] = sin->sin_addr.s_addr; 130245434Snp e->ipv6 = 0; 131245434Snp } else { 132245434Snp const struct sockaddr_in6 *sin6 = (const void *)sa; 133245434Snp 134245434Snp memcpy(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr)); 135245434Snp e->ipv6 = 1; 136245434Snp } 137245434Snp} 138245434Snp 139237263Snp/* 140237263Snp * Add a WR to an L2T entry's queue of work requests awaiting resolution. 141237263Snp * Must be called with the entry's lock held. 142237263Snp */ 143237263Snpstatic inline void 144237263Snparpq_enqueue(struct l2t_entry *e, struct wrqe *wr) 145237263Snp{ 146237263Snp mtx_assert(&e->lock, MA_OWNED); 147237263Snp 148237263Snp STAILQ_INSERT_TAIL(&e->wr_list, wr, link); 149237263Snp} 150237263Snp 151237263Snpstatic inline void 152237263Snpsend_pending(struct adapter *sc, struct l2t_entry *e) 153237263Snp{ 154237263Snp struct wrqe *wr; 155237263Snp 156237263Snp mtx_assert(&e->lock, MA_OWNED); 157237263Snp 158237263Snp while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) { 159237263Snp STAILQ_REMOVE_HEAD(&e->wr_list, link); 160237263Snp t4_wrq_tx(sc, wr); 161237263Snp } 162237263Snp} 163237263Snp 164237263Snpstatic void 165272719Snpresolution_failed(struct adapter *sc, struct l2t_entry *e) 166237263Snp{ 167272719Snp struct tom_data *td = sc->tom_softc; 168237263Snp 169272719Snp mtx_assert(&e->lock, MA_OWNED); 170237263Snp 171272719Snp mtx_lock(&td->unsent_wr_lock); 172272719Snp STAILQ_CONCAT(&td->unsent_wr_list, &e->wr_list); 173272719Snp mtx_unlock(&td->unsent_wr_lock); 174237263Snp 175272719Snp taskqueue_enqueue(taskqueue_thread, &td->reclaim_wr_resources); 176237263Snp} 177237263Snp 178237263Snpstatic void 179237263Snpupdate_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr, 180237263Snp uint16_t vtag) 181237263Snp{ 182237263Snp 183237263Snp mtx_assert(&e->lock, MA_OWNED); 184237263Snp 185237263Snp /* 186237263Snp * The entry may be in active use (e->refcount > 0) or not. We update 187237263Snp * it even when it's not as this simplifies the case where we decide to 188237263Snp * reuse the entry later. 189237263Snp */ 190237263Snp 191237263Snp if (lladdr == NULL && 192237263Snp (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) { 193237263Snp /* 194237263Snp * Never got a valid L2 address for this one. Just mark it as 195237263Snp * failed instead of removing it from the hash (for which we'd 196237263Snp * need to wlock the table). 197237263Snp */ 198237263Snp e->state = L2T_STATE_FAILED; 199272719Snp resolution_failed(sc, e); 200237263Snp return; 201237263Snp 202237263Snp } else if (lladdr == NULL) { 203237263Snp 204237263Snp /* Valid or already-stale entry was deleted (or expired) */ 205237263Snp 206237263Snp KASSERT(e->state == L2T_STATE_VALID || 207237263Snp e->state == L2T_STATE_STALE, 208237263Snp ("%s: lladdr NULL, state %d", __func__, e->state)); 209237263Snp 210237263Snp e->state = L2T_STATE_STALE; 211237263Snp 212237263Snp } else { 213237263Snp 214237263Snp if (e->state == L2T_STATE_RESOLVING || 215237263Snp e->state == L2T_STATE_FAILED || 216237263Snp memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) { 217237263Snp 218237263Snp /* unresolved -> resolved; or dmac changed */ 219237263Snp 220237263Snp memcpy(e->dmac, lladdr, ETHER_ADDR_LEN); 221237263Snp e->vlan = vtag; 222302339Snp t4_write_l2e(e, 1); 223237263Snp } 224237263Snp e->state = L2T_STATE_VALID; 225237263Snp } 226237263Snp} 227237263Snp 228237263Snpstatic int 229237263Snpresolve_entry(struct adapter *sc, struct l2t_entry *e) 230237263Snp{ 231237263Snp struct tom_data *td = sc->tom_softc; 232237263Snp struct toedev *tod = &td->tod; 233237263Snp struct sockaddr_in sin = {0}; 234245434Snp struct sockaddr_in6 sin6 = {0}; 235245434Snp struct sockaddr *sa; 236292978Smelifaro uint8_t dmac[ETHER_HDR_LEN]; 237237263Snp uint16_t vtag = VLAN_NONE; 238237263Snp int rc; 239237263Snp 240245434Snp if (e->ipv6 == 0) { 241245434Snp sin.sin_family = AF_INET; 242245434Snp sin.sin_len = sizeof(struct sockaddr_in); 243245434Snp sin.sin_addr.s_addr = e->addr[0]; 244245434Snp sa = (void *)&sin; 245245434Snp } else { 246245434Snp sin6.sin6_family = AF_INET6; 247245434Snp sin6.sin6_len = sizeof(struct sockaddr_in6); 248245434Snp memcpy(&sin6.sin6_addr, &e->addr[0], sizeof(e->addr)); 249245434Snp sa = (void *)&sin6; 250245434Snp } 251237263Snp 252245434Snp rc = toe_l2_resolve(tod, e->ifp, sa, dmac, &vtag); 253237263Snp if (rc == EWOULDBLOCK) 254237263Snp return (rc); 255237263Snp 256237263Snp mtx_lock(&e->lock); 257237263Snp update_entry(sc, e, rc == 0 ? dmac : NULL, vtag); 258237263Snp mtx_unlock(&e->lock); 259237263Snp 260237263Snp return (rc); 261237263Snp} 262237263Snp 263237263Snpint 264237263Snpt4_l2t_send_slow(struct adapter *sc, struct wrqe *wr, struct l2t_entry *e) 265237263Snp{ 266237263Snp 267237263Snpagain: 268237263Snp switch (e->state) { 269237263Snp case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ 270237263Snp 271237263Snp if (resolve_entry(sc, e) != EWOULDBLOCK) 272237263Snp goto again; /* entry updated, re-examine state */ 273237263Snp 274237263Snp /* Fall through */ 275237263Snp 276237263Snp case L2T_STATE_VALID: /* fast-path, send the packet on */ 277237263Snp 278237263Snp t4_wrq_tx(sc, wr); 279237263Snp return (0); 280237263Snp 281237263Snp case L2T_STATE_RESOLVING: 282237263Snp case L2T_STATE_SYNC_WRITE: 283237263Snp 284237263Snp mtx_lock(&e->lock); 285237263Snp if (e->state != L2T_STATE_SYNC_WRITE && 286237263Snp e->state != L2T_STATE_RESOLVING) { 287237263Snp /* state changed by the time we got here */ 288237263Snp mtx_unlock(&e->lock); 289237263Snp goto again; 290237263Snp } 291237263Snp arpq_enqueue(e, wr); 292237263Snp mtx_unlock(&e->lock); 293237263Snp 294237263Snp if (resolve_entry(sc, e) == EWOULDBLOCK) 295237263Snp break; 296237263Snp 297237263Snp mtx_lock(&e->lock); 298237263Snp if (e->state == L2T_STATE_VALID && !STAILQ_EMPTY(&e->wr_list)) 299237263Snp send_pending(sc, e); 300237263Snp if (e->state == L2T_STATE_FAILED) 301272719Snp resolution_failed(sc, e); 302237263Snp mtx_unlock(&e->lock); 303237263Snp break; 304237263Snp 305237263Snp case L2T_STATE_FAILED: 306237263Snp return (EHOSTUNREACH); 307237263Snp } 308237263Snp 309237263Snp return (0); 310237263Snp} 311237263Snp 312302339Snpint 313237263Snpdo_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss, 314237263Snp struct mbuf *m) 315237263Snp{ 316237263Snp struct adapter *sc = iq->adapter; 317237263Snp const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); 318237263Snp unsigned int tid = GET_TID(rpl); 319245434Snp unsigned int idx = tid % L2T_SIZE; 320237263Snp 321302339Snp if (__predict_false(rpl->status != CPL_ERR_NONE)) { 322302339Snp log(LOG_ERR, 323302339Snp "Unexpected L2T_WRITE_RPL (%u) for entry at hw_idx %u\n", 324302339Snp rpl->status, idx); 325302339Snp return (EINVAL); 326302339Snp } 327237263Snp 328237263Snp if (tid & F_SYNC_WR) { 329245434Snp struct l2t_entry *e = &sc->l2t->l2tab[idx - sc->vres.l2t.start]; 330237263Snp 331237263Snp mtx_lock(&e->lock); 332237263Snp if (e->state != L2T_STATE_SWITCHING) { 333237263Snp send_pending(sc, e); 334237263Snp e->state = L2T_STATE_VALID; 335237263Snp } 336237263Snp mtx_unlock(&e->lock); 337237263Snp } 338237263Snp 339237263Snp return (0); 340237263Snp} 341237263Snp 342237263Snp/* 343237263Snp * The TOE wants an L2 table entry that it can use to reach the next hop over 344237263Snp * the specified port. Produce such an entry - create one if needed. 345237263Snp * 346237263Snp * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on 347237263Snp * top of the real cxgbe interface. 348237263Snp */ 349237263Snpstruct l2t_entry * 350237263Snpt4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa) 351237263Snp{ 352237263Snp struct l2t_entry *e; 353302339Snp struct adapter *sc = pi->adapter; 354302339Snp struct l2t_data *d = sc->l2t; 355245434Snp u_int hash, smt_idx = pi->port_id; 356237263Snp 357245434Snp KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 358245434Snp ("%s: sa %p has unexpected sa_family %d", __func__, sa, 359245434Snp sa->sa_family)); 360237263Snp 361237263Snp#ifndef VLAN_TAG 362237263Snp if (ifp->if_type == IFT_L2VLAN) 363237263Snp return (NULL); 364237263Snp#endif 365237263Snp 366245434Snp hash = l2_hash(d, sa, ifp->if_index); 367237263Snp rw_wlock(&d->lock); 368237263Snp for (e = d->l2tab[hash].first; e; e = e->next) { 369245434Snp if (l2_cmp(sa, e) == 0 && e->ifp == ifp && 370245434Snp e->smt_idx == smt_idx) { 371237263Snp l2t_hold(d, e); 372237263Snp goto done; 373237263Snp } 374237263Snp } 375237263Snp 376237263Snp /* Need to allocate a new entry */ 377237263Snp e = t4_alloc_l2e(d); 378237263Snp if (e) { 379237263Snp mtx_lock(&e->lock); /* avoid race with t4_l2t_free */ 380237263Snp e->next = d->l2tab[hash].first; 381237263Snp d->l2tab[hash].first = e; 382237263Snp 383237263Snp e->state = L2T_STATE_RESOLVING; 384245434Snp l2_store(sa, e); 385237263Snp e->ifp = ifp; 386237263Snp e->smt_idx = smt_idx; 387237263Snp e->hash = hash; 388237263Snp e->lport = pi->lport; 389302339Snp e->wrq = &sc->sge.ctrlq[pi->port_id]; 390302339Snp e->iqid = sc->sge.ofld_rxq[pi->vi[0].first_ofld_rxq].iq.abs_id; 391237263Snp atomic_store_rel_int(&e->refcnt, 1); 392237263Snp#ifdef VLAN_TAG 393237263Snp if (ifp->if_type == IFT_L2VLAN) 394237263Snp VLAN_TAG(ifp, &e->vlan); 395237263Snp else 396237263Snp e->vlan = VLAN_NONE; 397237263Snp#endif 398237263Snp mtx_unlock(&e->lock); 399237263Snp } 400237263Snpdone: 401237263Snp rw_wunlock(&d->lock); 402237263Snp return e; 403237263Snp} 404237263Snp 405237263Snp/* 406237263Snp * Called when the host's ARP layer makes a change to some entry that is loaded 407237263Snp * into the HW L2 table. 408237263Snp */ 409237263Snpvoid 410237263Snpt4_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, 411237263Snp uint8_t *lladdr, uint16_t vtag) 412237263Snp{ 413237263Snp struct adapter *sc = tod->tod_softc; 414237263Snp struct l2t_entry *e; 415237263Snp struct l2t_data *d = sc->l2t; 416245434Snp u_int hash; 417237263Snp 418237263Snp KASSERT(d != NULL, ("%s: no L2 table", __func__)); 419237263Snp 420245434Snp hash = l2_hash(d, sa, ifp->if_index); 421237263Snp rw_rlock(&d->lock); 422237263Snp for (e = d->l2tab[hash].first; e; e = e->next) { 423245434Snp if (l2_cmp(sa, e) == 0 && e->ifp == ifp) { 424237263Snp mtx_lock(&e->lock); 425237263Snp if (atomic_load_acq_int(&e->refcnt)) 426237263Snp goto found; 427237263Snp e->state = L2T_STATE_STALE; 428237263Snp mtx_unlock(&e->lock); 429237263Snp break; 430237263Snp } 431237263Snp } 432237263Snp rw_runlock(&d->lock); 433237263Snp 434237263Snp /* 435237263Snp * This is of no interest to us. We've never had an offloaded 436237263Snp * connection to this destination, and we aren't attempting one right 437237263Snp * now. 438237263Snp */ 439237263Snp return; 440237263Snp 441237263Snpfound: 442237263Snp rw_runlock(&d->lock); 443237263Snp 444237263Snp KASSERT(e->state != L2T_STATE_UNUSED, 445237263Snp ("%s: unused entry in the hash.", __func__)); 446237263Snp 447237263Snp update_entry(sc, e, lladdr, vtag); 448237263Snp mtx_unlock(&e->lock); 449237263Snp} 450237263Snp#endif 451