t4_tom_l2t.c revision 272719
1237263Snp/*- 2237263Snp * Copyright (c) 2012 Chelsio Communications, Inc. 3237263Snp * All rights reserved. 4237263Snp * 5237263Snp * Redistribution and use in source and binary forms, with or without 6237263Snp * modification, are permitted provided that the following conditions 7237263Snp * are met: 8237263Snp * 1. Redistributions of source code must retain the above copyright 9237263Snp * notice, this list of conditions and the following disclaimer. 10237263Snp * 2. Redistributions in binary form must reproduce the above copyright 11237263Snp * notice, this list of conditions and the following disclaimer in the 12237263Snp * documentation and/or other materials provided with the distribution. 13237263Snp * 14237263Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15237263Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16237263Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17237263Snp * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18237263Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19237263Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20237263Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21237263Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22237263Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23237263Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24237263Snp * SUCH DAMAGE. 25237263Snp */ 26237263Snp#include <sys/cdefs.h> 27237263Snp__FBSDID("$FreeBSD: head/sys/dev/cxgbe/tom/t4_tom_l2t.c 272719 2014-10-07 21:26:22Z np $"); 28237263Snp 29237263Snp#include "opt_inet.h" 30245434Snp#include "opt_inet6.h" 31237263Snp 32237263Snp#ifdef TCP_OFFLOAD 33237263Snp#include <sys/param.h> 34237263Snp#include <sys/systm.h> 35237263Snp#include <sys/kernel.h> 36237263Snp#include <sys/module.h> 37237263Snp#include <sys/bus.h> 38245434Snp#include <sys/fnv_hash.h> 39237263Snp#include <sys/lock.h> 40237263Snp#include <sys/mutex.h> 41237263Snp#include <sys/rwlock.h> 42237263Snp#include <sys/socket.h> 43237263Snp#include <sys/sbuf.h> 44272719Snp#include <sys/taskqueue.h> 45237263Snp#include <net/if.h> 46237263Snp#include <net/if_types.h> 47237263Snp#include <net/ethernet.h> 48237263Snp#include <net/if_vlan_var.h> 49237263Snp#include <net/route.h> 50237263Snp#include <netinet/in.h> 51237263Snp#include <netinet/toecore.h> 52237263Snp 53237263Snp#include "common/common.h" 54237263Snp#include "common/t4_msg.h" 55237263Snp#include "tom/t4_tom_l2t.h" 56237263Snp#include "tom/t4_tom.h" 57237263Snp 58237263Snp#define VLAN_NONE 0xfff 59237263Snp 60237263Snpstatic inline void 61237263Snpl2t_hold(struct l2t_data *d, struct l2t_entry *e) 62237263Snp{ 63245434Snp 64237263Snp if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */ 65237263Snp atomic_subtract_int(&d->nfree, 1); 66237263Snp} 67237263Snp 68245434Snpstatic inline u_int 69245434Snpl2_hash(struct l2t_data *d, const struct sockaddr *sa, int ifindex) 70237263Snp{ 71245434Snp u_int hash, half = d->l2t_size / 2, start = 0; 72245434Snp const void *key; 73245434Snp size_t len; 74245434Snp 75245434Snp KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 76245434Snp ("%s: sa %p has unexpected sa_family %d", __func__, sa, 77245434Snp sa->sa_family)); 78245434Snp 79245434Snp if (sa->sa_family == AF_INET) { 80245434Snp const struct sockaddr_in *sin = (const void *)sa; 81245434Snp 82245434Snp key = &sin->sin_addr; 83245434Snp len = sizeof(sin->sin_addr); 84245434Snp } else { 85245434Snp const struct sockaddr_in6 *sin6 = (const void *)sa; 86245434Snp 87245434Snp key = &sin6->sin6_addr; 88245434Snp len = sizeof(sin6->sin6_addr); 89245434Snp start = half; 90245434Snp } 91245434Snp 92245434Snp hash = fnv_32_buf(key, len, FNV1_32_INIT); 93245434Snp hash = fnv_32_buf(&ifindex, sizeof(ifindex), hash); 94245434Snp hash %= half; 95245434Snp 96245434Snp return (hash + start); 97237263Snp} 98237263Snp 99245434Snpstatic inline int 100245434Snpl2_cmp(const struct sockaddr *sa, struct l2t_entry *e) 101245434Snp{ 102245434Snp 103245434Snp KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 104245434Snp ("%s: sa %p has unexpected sa_family %d", __func__, sa, 105245434Snp sa->sa_family)); 106245434Snp 107245434Snp if (sa->sa_family == AF_INET) { 108245434Snp const struct sockaddr_in *sin = (const void *)sa; 109245434Snp 110245434Snp return (e->addr[0] != sin->sin_addr.s_addr); 111245434Snp } else { 112245434Snp const struct sockaddr_in6 *sin6 = (const void *)sa; 113245434Snp 114245434Snp return (memcmp(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr))); 115245434Snp } 116245434Snp} 117245434Snp 118245434Snpstatic inline void 119245434Snpl2_store(const struct sockaddr *sa, struct l2t_entry *e) 120245434Snp{ 121245434Snp 122245434Snp KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 123245434Snp ("%s: sa %p has unexpected sa_family %d", __func__, sa, 124245434Snp sa->sa_family)); 125245434Snp 126245434Snp if (sa->sa_family == AF_INET) { 127245434Snp const struct sockaddr_in *sin = (const void *)sa; 128245434Snp 129245434Snp e->addr[0] = sin->sin_addr.s_addr; 130245434Snp e->ipv6 = 0; 131245434Snp } else { 132245434Snp const struct sockaddr_in6 *sin6 = (const void *)sa; 133245434Snp 134245434Snp memcpy(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr)); 135245434Snp e->ipv6 = 1; 136245434Snp } 137245434Snp} 138245434Snp 139237263Snp/* 140237263Snp * Add a WR to an L2T entry's queue of work requests awaiting resolution. 141237263Snp * Must be called with the entry's lock held. 142237263Snp */ 143237263Snpstatic inline void 144237263Snparpq_enqueue(struct l2t_entry *e, struct wrqe *wr) 145237263Snp{ 146237263Snp mtx_assert(&e->lock, MA_OWNED); 147237263Snp 148237263Snp STAILQ_INSERT_TAIL(&e->wr_list, wr, link); 149237263Snp} 150237263Snp 151237263Snpstatic inline void 152237263Snpsend_pending(struct adapter *sc, struct l2t_entry *e) 153237263Snp{ 154237263Snp struct wrqe *wr; 155237263Snp 156237263Snp mtx_assert(&e->lock, MA_OWNED); 157237263Snp 158237263Snp while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) { 159237263Snp STAILQ_REMOVE_HEAD(&e->wr_list, link); 160237263Snp t4_wrq_tx(sc, wr); 161237263Snp } 162237263Snp} 163237263Snp 164237263Snpstatic void 165272719Snpresolution_failed(struct adapter *sc, struct l2t_entry *e) 166237263Snp{ 167272719Snp struct tom_data *td = sc->tom_softc; 168237263Snp 169272719Snp mtx_assert(&e->lock, MA_OWNED); 170237263Snp 171272719Snp mtx_lock(&td->unsent_wr_lock); 172272719Snp STAILQ_CONCAT(&td->unsent_wr_list, &e->wr_list); 173272719Snp mtx_unlock(&td->unsent_wr_lock); 174237263Snp 175272719Snp taskqueue_enqueue(taskqueue_thread, &td->reclaim_wr_resources); 176237263Snp} 177237263Snp 178237263Snpstatic void 179237263Snpupdate_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr, 180237263Snp uint16_t vtag) 181237263Snp{ 182237263Snp 183237263Snp mtx_assert(&e->lock, MA_OWNED); 184237263Snp 185237263Snp /* 186237263Snp * The entry may be in active use (e->refcount > 0) or not. We update 187237263Snp * it even when it's not as this simplifies the case where we decide to 188237263Snp * reuse the entry later. 189237263Snp */ 190237263Snp 191237263Snp if (lladdr == NULL && 192237263Snp (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) { 193237263Snp /* 194237263Snp * Never got a valid L2 address for this one. Just mark it as 195237263Snp * failed instead of removing it from the hash (for which we'd 196237263Snp * need to wlock the table). 197237263Snp */ 198237263Snp e->state = L2T_STATE_FAILED; 199272719Snp resolution_failed(sc, e); 200237263Snp return; 201237263Snp 202237263Snp } else if (lladdr == NULL) { 203237263Snp 204237263Snp /* Valid or already-stale entry was deleted (or expired) */ 205237263Snp 206237263Snp KASSERT(e->state == L2T_STATE_VALID || 207237263Snp e->state == L2T_STATE_STALE, 208237263Snp ("%s: lladdr NULL, state %d", __func__, e->state)); 209237263Snp 210237263Snp e->state = L2T_STATE_STALE; 211237263Snp 212237263Snp } else { 213237263Snp 214237263Snp if (e->state == L2T_STATE_RESOLVING || 215237263Snp e->state == L2T_STATE_FAILED || 216237263Snp memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) { 217237263Snp 218237263Snp /* unresolved -> resolved; or dmac changed */ 219237263Snp 220237263Snp memcpy(e->dmac, lladdr, ETHER_ADDR_LEN); 221237263Snp e->vlan = vtag; 222237263Snp t4_write_l2e(sc, e, 1); 223237263Snp } 224237263Snp e->state = L2T_STATE_VALID; 225237263Snp } 226237263Snp} 227237263Snp 228237263Snpstatic int 229237263Snpresolve_entry(struct adapter *sc, struct l2t_entry *e) 230237263Snp{ 231237263Snp struct tom_data *td = sc->tom_softc; 232237263Snp struct toedev *tod = &td->tod; 233237263Snp struct sockaddr_in sin = {0}; 234245434Snp struct sockaddr_in6 sin6 = {0}; 235245434Snp struct sockaddr *sa; 236237263Snp uint8_t dmac[ETHER_ADDR_LEN]; 237237263Snp uint16_t vtag = VLAN_NONE; 238237263Snp int rc; 239237263Snp 240245434Snp if (e->ipv6 == 0) { 241245434Snp sin.sin_family = AF_INET; 242245434Snp sin.sin_len = sizeof(struct sockaddr_in); 243245434Snp sin.sin_addr.s_addr = e->addr[0]; 244245434Snp sa = (void *)&sin; 245245434Snp } else { 246245434Snp sin6.sin6_family = AF_INET6; 247245434Snp sin6.sin6_len = sizeof(struct sockaddr_in6); 248245434Snp memcpy(&sin6.sin6_addr, &e->addr[0], sizeof(e->addr)); 249245434Snp sa = (void *)&sin6; 250245434Snp } 251237263Snp 252245434Snp rc = toe_l2_resolve(tod, e->ifp, sa, dmac, &vtag); 253237263Snp if (rc == EWOULDBLOCK) 254237263Snp return (rc); 255237263Snp 256237263Snp mtx_lock(&e->lock); 257237263Snp update_entry(sc, e, rc == 0 ? dmac : NULL, vtag); 258237263Snp mtx_unlock(&e->lock); 259237263Snp 260237263Snp return (rc); 261237263Snp} 262237263Snp 263237263Snpint 264237263Snpt4_l2t_send_slow(struct adapter *sc, struct wrqe *wr, struct l2t_entry *e) 265237263Snp{ 266237263Snp 267237263Snpagain: 268237263Snp switch (e->state) { 269237263Snp case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ 270237263Snp 271237263Snp if (resolve_entry(sc, e) != EWOULDBLOCK) 272237263Snp goto again; /* entry updated, re-examine state */ 273237263Snp 274237263Snp /* Fall through */ 275237263Snp 276237263Snp case L2T_STATE_VALID: /* fast-path, send the packet on */ 277237263Snp 278237263Snp t4_wrq_tx(sc, wr); 279237263Snp return (0); 280237263Snp 281237263Snp case L2T_STATE_RESOLVING: 282237263Snp case L2T_STATE_SYNC_WRITE: 283237263Snp 284237263Snp mtx_lock(&e->lock); 285237263Snp if (e->state != L2T_STATE_SYNC_WRITE && 286237263Snp e->state != L2T_STATE_RESOLVING) { 287237263Snp /* state changed by the time we got here */ 288237263Snp mtx_unlock(&e->lock); 289237263Snp goto again; 290237263Snp } 291237263Snp arpq_enqueue(e, wr); 292237263Snp mtx_unlock(&e->lock); 293237263Snp 294237263Snp if (resolve_entry(sc, e) == EWOULDBLOCK) 295237263Snp break; 296237263Snp 297237263Snp mtx_lock(&e->lock); 298237263Snp if (e->state == L2T_STATE_VALID && !STAILQ_EMPTY(&e->wr_list)) 299237263Snp send_pending(sc, e); 300237263Snp if (e->state == L2T_STATE_FAILED) 301272719Snp resolution_failed(sc, e); 302237263Snp mtx_unlock(&e->lock); 303237263Snp break; 304237263Snp 305237263Snp case L2T_STATE_FAILED: 306237263Snp return (EHOSTUNREACH); 307237263Snp } 308237263Snp 309237263Snp return (0); 310237263Snp} 311237263Snp 312237263Snp/* 313237263Snp * Called when an L2T entry has no more users. The entry is left in the hash 314237263Snp * table since it is likely to be reused but we also bump nfree to indicate 315237263Snp * that the entry can be reallocated for a different neighbor. We also drop 316237263Snp * the existing neighbor reference in case the neighbor is going away and is 317237263Snp * waiting on our reference. 318237263Snp * 319237263Snp * Because entries can be reallocated to other neighbors once their ref count 320237263Snp * drops to 0 we need to take the entry's lock to avoid races with a new 321237263Snp * incarnation. 322237263Snp */ 323237263Snp 324237263Snpstatic int 325237263Snpdo_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss, 326237263Snp struct mbuf *m) 327237263Snp{ 328237263Snp struct adapter *sc = iq->adapter; 329237263Snp const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); 330237263Snp unsigned int tid = GET_TID(rpl); 331245434Snp unsigned int idx = tid % L2T_SIZE; 332237263Snp int rc; 333237263Snp 334237263Snp rc = do_l2t_write_rpl(iq, rss, m); 335237263Snp if (rc != 0) 336237263Snp return (rc); 337237263Snp 338237263Snp if (tid & F_SYNC_WR) { 339245434Snp struct l2t_entry *e = &sc->l2t->l2tab[idx - sc->vres.l2t.start]; 340237263Snp 341237263Snp mtx_lock(&e->lock); 342237263Snp if (e->state != L2T_STATE_SWITCHING) { 343237263Snp send_pending(sc, e); 344237263Snp e->state = L2T_STATE_VALID; 345237263Snp } 346237263Snp mtx_unlock(&e->lock); 347237263Snp } 348237263Snp 349237263Snp return (0); 350237263Snp} 351237263Snp 352237263Snpvoid 353237263Snpt4_init_l2t_cpl_handlers(struct adapter *sc) 354237263Snp{ 355237263Snp 356237263Snp t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl2); 357237263Snp} 358237263Snp 359237263Snpvoid 360237263Snpt4_uninit_l2t_cpl_handlers(struct adapter *sc) 361237263Snp{ 362237263Snp 363237263Snp t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl); 364237263Snp} 365237263Snp 366237263Snp/* 367237263Snp * The TOE wants an L2 table entry that it can use to reach the next hop over 368237263Snp * the specified port. Produce such an entry - create one if needed. 369237263Snp * 370237263Snp * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on 371237263Snp * top of the real cxgbe interface. 372237263Snp */ 373237263Snpstruct l2t_entry * 374237263Snpt4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa) 375237263Snp{ 376237263Snp struct l2t_entry *e; 377237263Snp struct l2t_data *d = pi->adapter->l2t; 378245434Snp u_int hash, smt_idx = pi->port_id; 379237263Snp 380245434Snp KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 381245434Snp ("%s: sa %p has unexpected sa_family %d", __func__, sa, 382245434Snp sa->sa_family)); 383237263Snp 384237263Snp#ifndef VLAN_TAG 385237263Snp if (ifp->if_type == IFT_L2VLAN) 386237263Snp return (NULL); 387237263Snp#endif 388237263Snp 389245434Snp hash = l2_hash(d, sa, ifp->if_index); 390237263Snp rw_wlock(&d->lock); 391237263Snp for (e = d->l2tab[hash].first; e; e = e->next) { 392245434Snp if (l2_cmp(sa, e) == 0 && e->ifp == ifp && 393245434Snp e->smt_idx == smt_idx) { 394237263Snp l2t_hold(d, e); 395237263Snp goto done; 396237263Snp } 397237263Snp } 398237263Snp 399237263Snp /* Need to allocate a new entry */ 400237263Snp e = t4_alloc_l2e(d); 401237263Snp if (e) { 402237263Snp mtx_lock(&e->lock); /* avoid race with t4_l2t_free */ 403237263Snp e->next = d->l2tab[hash].first; 404237263Snp d->l2tab[hash].first = e; 405237263Snp 406237263Snp e->state = L2T_STATE_RESOLVING; 407245434Snp l2_store(sa, e); 408237263Snp e->ifp = ifp; 409237263Snp e->smt_idx = smt_idx; 410237263Snp e->hash = hash; 411237263Snp e->lport = pi->lport; 412237263Snp atomic_store_rel_int(&e->refcnt, 1); 413237263Snp#ifdef VLAN_TAG 414237263Snp if (ifp->if_type == IFT_L2VLAN) 415237263Snp VLAN_TAG(ifp, &e->vlan); 416237263Snp else 417237263Snp e->vlan = VLAN_NONE; 418237263Snp#endif 419237263Snp mtx_unlock(&e->lock); 420237263Snp } 421237263Snpdone: 422237263Snp rw_wunlock(&d->lock); 423237263Snp return e; 424237263Snp} 425237263Snp 426237263Snp/* 427237263Snp * Called when the host's ARP layer makes a change to some entry that is loaded 428237263Snp * into the HW L2 table. 429237263Snp */ 430237263Snpvoid 431237263Snpt4_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, 432237263Snp uint8_t *lladdr, uint16_t vtag) 433237263Snp{ 434237263Snp struct adapter *sc = tod->tod_softc; 435237263Snp struct l2t_entry *e; 436237263Snp struct l2t_data *d = sc->l2t; 437245434Snp u_int hash; 438237263Snp 439237263Snp KASSERT(d != NULL, ("%s: no L2 table", __func__)); 440237263Snp 441245434Snp hash = l2_hash(d, sa, ifp->if_index); 442237263Snp rw_rlock(&d->lock); 443237263Snp for (e = d->l2tab[hash].first; e; e = e->next) { 444245434Snp if (l2_cmp(sa, e) == 0 && e->ifp == ifp) { 445237263Snp mtx_lock(&e->lock); 446237263Snp if (atomic_load_acq_int(&e->refcnt)) 447237263Snp goto found; 448237263Snp e->state = L2T_STATE_STALE; 449237263Snp mtx_unlock(&e->lock); 450237263Snp break; 451237263Snp } 452237263Snp } 453237263Snp rw_runlock(&d->lock); 454237263Snp 455237263Snp /* 456237263Snp * This is of no interest to us. We've never had an offloaded 457237263Snp * connection to this destination, and we aren't attempting one right 458237263Snp * now. 459237263Snp */ 460237263Snp return; 461237263Snp 462237263Snpfound: 463237263Snp rw_runlock(&d->lock); 464237263Snp 465237263Snp KASSERT(e->state != L2T_STATE_UNUSED, 466237263Snp ("%s: unused entry in the hash.", __func__)); 467237263Snp 468237263Snp update_entry(sc, e, lladdr, vtag); 469237263Snp mtx_unlock(&e->lock); 470237263Snp} 471237263Snp#endif 472