1237263Snp/*- 2237263Snp * Copyright (c) 2012 Chelsio Communications, Inc. 3237263Snp * All rights reserved. 4237263Snp * 5237263Snp * Redistribution and use in source and binary forms, with or without 6237263Snp * modification, are permitted provided that the following conditions 7237263Snp * are met: 8237263Snp * 1. Redistributions of source code must retain the above copyright 9237263Snp * notice, this list of conditions and the following disclaimer. 10237263Snp * 2. Redistributions in binary form must reproduce the above copyright 11237263Snp * notice, this list of conditions and the following disclaimer in the 12237263Snp * documentation and/or other materials provided with the distribution. 13237263Snp * 14237263Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15237263Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16237263Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17237263Snp * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18237263Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19237263Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20237263Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21237263Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22237263Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23237263Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24237263Snp * SUCH DAMAGE. 25237263Snp */ 26237263Snp#include <sys/cdefs.h> 27237263Snp__FBSDID("$FreeBSD$"); 28237263Snp 29237263Snp#include "opt_inet.h" 30247434Snp#include "opt_inet6.h" 31237263Snp 32237263Snp#ifdef TCP_OFFLOAD 33237263Snp#include <sys/param.h> 34237263Snp#include <sys/systm.h> 35237263Snp#include <sys/kernel.h> 36237263Snp#include <sys/module.h> 37237263Snp#include <sys/bus.h> 38247434Snp#include <sys/fnv_hash.h> 39237263Snp#include <sys/lock.h> 40237263Snp#include <sys/mutex.h> 41237263Snp#include <sys/rwlock.h> 42237263Snp#include <sys/socket.h> 43237263Snp#include <sys/sbuf.h> 44237263Snp#include <net/if.h> 45237263Snp#include <net/if_types.h> 46237263Snp#include <net/ethernet.h> 47237263Snp#include <net/if_vlan_var.h> 48237263Snp#include <net/route.h> 49237263Snp#include <netinet/in.h> 50237263Snp#include <netinet/toecore.h> 51237263Snp 52237263Snp#include "common/common.h" 53237263Snp#include "common/t4_msg.h" 54237263Snp#include "tom/t4_tom_l2t.h" 55237263Snp#include "tom/t4_tom.h" 56237263Snp 57237263Snp#define VLAN_NONE 0xfff 58237263Snp 59237263Snpstatic inline void 60237263Snpl2t_hold(struct l2t_data *d, struct l2t_entry *e) 61237263Snp{ 62247434Snp 63237263Snp if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */ 64237263Snp atomic_subtract_int(&d->nfree, 1); 65237263Snp} 66237263Snp 67247434Snpstatic inline u_int 68247434Snpl2_hash(struct l2t_data *d, const struct sockaddr *sa, int ifindex) 69237263Snp{ 70247434Snp u_int hash, half = d->l2t_size / 2, start = 0; 71247434Snp const void *key; 72247434Snp size_t len; 73247434Snp 74247434Snp KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 75247434Snp ("%s: sa %p has unexpected sa_family %d", __func__, sa, 76247434Snp sa->sa_family)); 77247434Snp 78247434Snp if (sa->sa_family == AF_INET) { 79247434Snp const struct sockaddr_in *sin = (const void *)sa; 80247434Snp 81247434Snp key = &sin->sin_addr; 82247434Snp len = sizeof(sin->sin_addr); 83247434Snp } else { 84247434Snp const struct sockaddr_in6 *sin6 = (const void *)sa; 85247434Snp 86247434Snp key = &sin6->sin6_addr; 87247434Snp len = sizeof(sin6->sin6_addr); 88247434Snp start = half; 89247434Snp } 90247434Snp 91247434Snp hash = fnv_32_buf(key, len, FNV1_32_INIT); 92247434Snp hash = fnv_32_buf(&ifindex, sizeof(ifindex), hash); 93247434Snp hash %= half; 94247434Snp 95247434Snp return (hash + start); 96237263Snp} 97237263Snp 98247434Snpstatic inline int 99247434Snpl2_cmp(const struct sockaddr *sa, struct l2t_entry *e) 100247434Snp{ 101247434Snp 102247434Snp KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 103247434Snp ("%s: sa %p has unexpected sa_family %d", __func__, sa, 104247434Snp sa->sa_family)); 105247434Snp 106247434Snp if (sa->sa_family == AF_INET) { 107247434Snp const struct sockaddr_in *sin = (const void *)sa; 108247434Snp 109247434Snp return (e->addr[0] != sin->sin_addr.s_addr); 110247434Snp } else { 111247434Snp const struct sockaddr_in6 *sin6 = (const void *)sa; 112247434Snp 113247434Snp return (memcmp(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr))); 114247434Snp } 115247434Snp} 116247434Snp 117247434Snpstatic inline void 118247434Snpl2_store(const struct sockaddr *sa, struct l2t_entry *e) 119247434Snp{ 120247434Snp 121247434Snp KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 122247434Snp ("%s: sa %p has unexpected sa_family %d", __func__, sa, 123247434Snp sa->sa_family)); 124247434Snp 125247434Snp if (sa->sa_family == AF_INET) { 126247434Snp const struct sockaddr_in *sin = (const void *)sa; 127247434Snp 128247434Snp e->addr[0] = sin->sin_addr.s_addr; 129247434Snp e->ipv6 = 0; 130247434Snp } else { 131247434Snp const struct sockaddr_in6 *sin6 = (const void *)sa; 132247434Snp 133247434Snp memcpy(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr)); 134247434Snp e->ipv6 = 1; 135247434Snp } 136247434Snp} 137247434Snp 138237263Snp/* 139237263Snp * Add a WR to an L2T entry's queue of work requests awaiting resolution. 140237263Snp * Must be called with the entry's lock held. 141237263Snp */ 142237263Snpstatic inline void 143237263Snparpq_enqueue(struct l2t_entry *e, struct wrqe *wr) 144237263Snp{ 145237263Snp mtx_assert(&e->lock, MA_OWNED); 146237263Snp 147237263Snp STAILQ_INSERT_TAIL(&e->wr_list, wr, link); 148237263Snp} 149237263Snp 150237263Snpstatic inline void 151237263Snpsend_pending(struct adapter *sc, struct l2t_entry *e) 152237263Snp{ 153237263Snp struct wrqe *wr; 154237263Snp 155237263Snp mtx_assert(&e->lock, MA_OWNED); 156237263Snp 157237263Snp while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) { 158237263Snp STAILQ_REMOVE_HEAD(&e->wr_list, link); 159237263Snp t4_wrq_tx(sc, wr); 160237263Snp } 161237263Snp} 162237263Snp 163237263Snpstatic void 164237263Snpresolution_failed_for_wr(struct wrqe *wr) 165237263Snp{ 166247434Snp log(LOG_ERR, "%s: leaked work request %p, wr_len %d\n", __func__, wr, 167237263Snp wr->wr_len); 168237263Snp 169237263Snp /* free(wr, M_CXGBE); */ 170237263Snp} 171237263Snp 172237263Snpstatic void 173237263Snpresolution_failed(struct l2t_entry *e) 174237263Snp{ 175237263Snp struct wrqe *wr; 176237263Snp 177237263Snp mtx_assert(&e->lock, MA_OWNED); 178237263Snp 179237263Snp while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) { 180237263Snp STAILQ_REMOVE_HEAD(&e->wr_list, link); 181237263Snp resolution_failed_for_wr(wr); 182237263Snp } 183237263Snp} 184237263Snp 185237263Snpstatic void 186237263Snpupdate_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr, 187237263Snp uint16_t vtag) 188237263Snp{ 189237263Snp 190237263Snp mtx_assert(&e->lock, MA_OWNED); 191237263Snp 192237263Snp /* 193237263Snp * The entry may be in active use (e->refcount > 0) or not. We update 194237263Snp * it even when it's not as this simplifies the case where we decide to 195237263Snp * reuse the entry later. 196237263Snp */ 197237263Snp 198237263Snp if (lladdr == NULL && 199237263Snp (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) { 200237263Snp /* 201237263Snp * Never got a valid L2 address for this one. Just mark it as 202237263Snp * failed instead of removing it from the hash (for which we'd 203237263Snp * need to wlock the table). 204237263Snp */ 205237263Snp e->state = L2T_STATE_FAILED; 206237263Snp resolution_failed(e); 207237263Snp return; 208237263Snp 209237263Snp } else if (lladdr == NULL) { 210237263Snp 211237263Snp /* Valid or already-stale entry was deleted (or expired) */ 212237263Snp 213237263Snp KASSERT(e->state == L2T_STATE_VALID || 214237263Snp e->state == L2T_STATE_STALE, 215237263Snp ("%s: lladdr NULL, state %d", __func__, e->state)); 216237263Snp 217237263Snp e->state = L2T_STATE_STALE; 218237263Snp 219237263Snp } else { 220237263Snp 221237263Snp if (e->state == L2T_STATE_RESOLVING || 222237263Snp e->state == L2T_STATE_FAILED || 223237263Snp memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) { 224237263Snp 225237263Snp /* unresolved -> resolved; or dmac changed */ 226237263Snp 227237263Snp memcpy(e->dmac, lladdr, ETHER_ADDR_LEN); 228237263Snp e->vlan = vtag; 229237263Snp t4_write_l2e(sc, e, 1); 230237263Snp } 231237263Snp e->state = L2T_STATE_VALID; 232237263Snp } 233237263Snp} 234237263Snp 235237263Snpstatic int 236237263Snpresolve_entry(struct adapter *sc, struct l2t_entry *e) 237237263Snp{ 238237263Snp struct tom_data *td = sc->tom_softc; 239237263Snp struct toedev *tod = &td->tod; 240237263Snp struct sockaddr_in sin = {0}; 241247434Snp struct sockaddr_in6 sin6 = {0}; 242247434Snp struct sockaddr *sa; 243237263Snp uint8_t dmac[ETHER_ADDR_LEN]; 244237263Snp uint16_t vtag = VLAN_NONE; 245237263Snp int rc; 246237263Snp 247247434Snp if (e->ipv6 == 0) { 248247434Snp sin.sin_family = AF_INET; 249247434Snp sin.sin_len = sizeof(struct sockaddr_in); 250247434Snp sin.sin_addr.s_addr = e->addr[0]; 251247434Snp sa = (void *)&sin; 252247434Snp } else { 253247434Snp sin6.sin6_family = AF_INET6; 254247434Snp sin6.sin6_len = sizeof(struct sockaddr_in6); 255247434Snp memcpy(&sin6.sin6_addr, &e->addr[0], sizeof(e->addr)); 256247434Snp sa = (void *)&sin6; 257247434Snp } 258237263Snp 259247434Snp rc = toe_l2_resolve(tod, e->ifp, sa, dmac, &vtag); 260237263Snp if (rc == EWOULDBLOCK) 261237263Snp return (rc); 262237263Snp 263237263Snp mtx_lock(&e->lock); 264237263Snp update_entry(sc, e, rc == 0 ? dmac : NULL, vtag); 265237263Snp mtx_unlock(&e->lock); 266237263Snp 267237263Snp return (rc); 268237263Snp} 269237263Snp 270237263Snpint 271237263Snpt4_l2t_send_slow(struct adapter *sc, struct wrqe *wr, struct l2t_entry *e) 272237263Snp{ 273237263Snp 274237263Snpagain: 275237263Snp switch (e->state) { 276237263Snp case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ 277237263Snp 278237263Snp if (resolve_entry(sc, e) != EWOULDBLOCK) 279237263Snp goto again; /* entry updated, re-examine state */ 280237263Snp 281237263Snp /* Fall through */ 282237263Snp 283237263Snp case L2T_STATE_VALID: /* fast-path, send the packet on */ 284237263Snp 285237263Snp t4_wrq_tx(sc, wr); 286237263Snp return (0); 287237263Snp 288237263Snp case L2T_STATE_RESOLVING: 289237263Snp case L2T_STATE_SYNC_WRITE: 290237263Snp 291237263Snp mtx_lock(&e->lock); 292237263Snp if (e->state != L2T_STATE_SYNC_WRITE && 293237263Snp e->state != L2T_STATE_RESOLVING) { 294237263Snp /* state changed by the time we got here */ 295237263Snp mtx_unlock(&e->lock); 296237263Snp goto again; 297237263Snp } 298237263Snp arpq_enqueue(e, wr); 299237263Snp mtx_unlock(&e->lock); 300237263Snp 301237263Snp if (resolve_entry(sc, e) == EWOULDBLOCK) 302237263Snp break; 303237263Snp 304237263Snp mtx_lock(&e->lock); 305237263Snp if (e->state == L2T_STATE_VALID && !STAILQ_EMPTY(&e->wr_list)) 306237263Snp send_pending(sc, e); 307237263Snp if (e->state == L2T_STATE_FAILED) 308237263Snp resolution_failed(e); 309237263Snp mtx_unlock(&e->lock); 310237263Snp break; 311237263Snp 312237263Snp case L2T_STATE_FAILED: 313237263Snp resolution_failed_for_wr(wr); 314237263Snp return (EHOSTUNREACH); 315237263Snp } 316237263Snp 317237263Snp return (0); 318237263Snp} 319237263Snp 320237263Snp/* 321237263Snp * Called when an L2T entry has no more users. The entry is left in the hash 322237263Snp * table since it is likely to be reused but we also bump nfree to indicate 323237263Snp * that the entry can be reallocated for a different neighbor. We also drop 324237263Snp * the existing neighbor reference in case the neighbor is going away and is 325237263Snp * waiting on our reference. 326237263Snp * 327237263Snp * Because entries can be reallocated to other neighbors once their ref count 328237263Snp * drops to 0 we need to take the entry's lock to avoid races with a new 329237263Snp * incarnation. 330237263Snp */ 331237263Snp 332237263Snpstatic int 333237263Snpdo_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss, 334237263Snp struct mbuf *m) 335237263Snp{ 336237263Snp struct adapter *sc = iq->adapter; 337237263Snp const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); 338237263Snp unsigned int tid = GET_TID(rpl); 339247434Snp unsigned int idx = tid % L2T_SIZE; 340237263Snp int rc; 341237263Snp 342237263Snp rc = do_l2t_write_rpl(iq, rss, m); 343237263Snp if (rc != 0) 344237263Snp return (rc); 345237263Snp 346237263Snp if (tid & F_SYNC_WR) { 347247434Snp struct l2t_entry *e = &sc->l2t->l2tab[idx - sc->vres.l2t.start]; 348237263Snp 349237263Snp mtx_lock(&e->lock); 350237263Snp if (e->state != L2T_STATE_SWITCHING) { 351237263Snp send_pending(sc, e); 352237263Snp e->state = L2T_STATE_VALID; 353237263Snp } 354237263Snp mtx_unlock(&e->lock); 355237263Snp } 356237263Snp 357237263Snp return (0); 358237263Snp} 359237263Snp 360237263Snpvoid 361237263Snpt4_init_l2t_cpl_handlers(struct adapter *sc) 362237263Snp{ 363237263Snp 364237263Snp t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl2); 365237263Snp} 366237263Snp 367237263Snpvoid 368237263Snpt4_uninit_l2t_cpl_handlers(struct adapter *sc) 369237263Snp{ 370237263Snp 371237263Snp t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl); 372237263Snp} 373237263Snp 374237263Snp/* 375237263Snp * The TOE wants an L2 table entry that it can use to reach the next hop over 376237263Snp * the specified port. Produce such an entry - create one if needed. 377237263Snp * 378237263Snp * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on 379237263Snp * top of the real cxgbe interface. 380237263Snp */ 381237263Snpstruct l2t_entry * 382237263Snpt4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa) 383237263Snp{ 384237263Snp struct l2t_entry *e; 385237263Snp struct l2t_data *d = pi->adapter->l2t; 386247434Snp u_int hash, smt_idx = pi->port_id; 387237263Snp 388247434Snp KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 389247434Snp ("%s: sa %p has unexpected sa_family %d", __func__, sa, 390247434Snp sa->sa_family)); 391237263Snp 392237263Snp#ifndef VLAN_TAG 393237263Snp if (ifp->if_type == IFT_L2VLAN) 394237263Snp return (NULL); 395237263Snp#endif 396237263Snp 397247434Snp hash = l2_hash(d, sa, ifp->if_index); 398237263Snp rw_wlock(&d->lock); 399237263Snp for (e = d->l2tab[hash].first; e; e = e->next) { 400247434Snp if (l2_cmp(sa, e) == 0 && e->ifp == ifp && 401247434Snp e->smt_idx == smt_idx) { 402237263Snp l2t_hold(d, e); 403237263Snp goto done; 404237263Snp } 405237263Snp } 406237263Snp 407237263Snp /* Need to allocate a new entry */ 408237263Snp e = t4_alloc_l2e(d); 409237263Snp if (e) { 410237263Snp mtx_lock(&e->lock); /* avoid race with t4_l2t_free */ 411237263Snp e->next = d->l2tab[hash].first; 412237263Snp d->l2tab[hash].first = e; 413237263Snp 414237263Snp e->state = L2T_STATE_RESOLVING; 415247434Snp l2_store(sa, e); 416237263Snp e->ifp = ifp; 417237263Snp e->smt_idx = smt_idx; 418237263Snp e->hash = hash; 419237263Snp e->lport = pi->lport; 420237263Snp atomic_store_rel_int(&e->refcnt, 1); 421237263Snp#ifdef VLAN_TAG 422237263Snp if (ifp->if_type == IFT_L2VLAN) 423237263Snp VLAN_TAG(ifp, &e->vlan); 424237263Snp else 425237263Snp e->vlan = VLAN_NONE; 426237263Snp#endif 427237263Snp mtx_unlock(&e->lock); 428237263Snp } 429237263Snpdone: 430237263Snp rw_wunlock(&d->lock); 431237263Snp return e; 432237263Snp} 433237263Snp 434237263Snp/* 435237263Snp * Called when the host's ARP layer makes a change to some entry that is loaded 436237263Snp * into the HW L2 table. 437237263Snp */ 438237263Snpvoid 439237263Snpt4_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, 440237263Snp uint8_t *lladdr, uint16_t vtag) 441237263Snp{ 442237263Snp struct adapter *sc = tod->tod_softc; 443237263Snp struct l2t_entry *e; 444237263Snp struct l2t_data *d = sc->l2t; 445247434Snp u_int hash; 446237263Snp 447237263Snp KASSERT(d != NULL, ("%s: no L2 table", __func__)); 448237263Snp 449247434Snp hash = l2_hash(d, sa, ifp->if_index); 450237263Snp rw_rlock(&d->lock); 451237263Snp for (e = d->l2tab[hash].first; e; e = e->next) { 452247434Snp if (l2_cmp(sa, e) == 0 && e->ifp == ifp) { 453237263Snp mtx_lock(&e->lock); 454237263Snp if (atomic_load_acq_int(&e->refcnt)) 455237263Snp goto found; 456237263Snp e->state = L2T_STATE_STALE; 457237263Snp mtx_unlock(&e->lock); 458237263Snp break; 459237263Snp } 460237263Snp } 461237263Snp rw_runlock(&d->lock); 462237263Snp 463237263Snp /* 464237263Snp * This is of no interest to us. We've never had an offloaded 465237263Snp * connection to this destination, and we aren't attempting one right 466237263Snp * now. 467237263Snp */ 468237263Snp return; 469237263Snp 470237263Snpfound: 471237263Snp rw_runlock(&d->lock); 472237263Snp 473237263Snp KASSERT(e->state != L2T_STATE_UNUSED, 474237263Snp ("%s: unused entry in the hash.", __func__)); 475237263Snp 476237263Snp update_entry(sc, e, lladdr, vtag); 477237263Snp mtx_unlock(&e->lock); 478237263Snp} 479237263Snp#endif 480