1237263Snp/*- 2237263Snp * Copyright (c) 2012 Chelsio Communications, Inc. 3237263Snp * All rights reserved. 4237263Snp * 5237263Snp * Redistribution and use in source and binary forms, with or without 6237263Snp * modification, are permitted provided that the following conditions 7237263Snp * are met: 8237263Snp * 1. Redistributions of source code must retain the above copyright 9237263Snp * notice, this list of conditions and the following disclaimer. 10237263Snp * 2. Redistributions in binary form must reproduce the above copyright 11237263Snp * notice, this list of conditions and the following disclaimer in the 12237263Snp * documentation and/or other materials provided with the distribution. 13237263Snp * 14237263Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15237263Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16237263Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17237263Snp * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18237263Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19237263Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20237263Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21237263Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22237263Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23237263Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24237263Snp * SUCH DAMAGE. 25237263Snp */ 26178302Skmacy 27178302Skmacy#include <sys/cdefs.h> 28178302Skmacy__FBSDID("$FreeBSD$"); 29178302Skmacy 30237263Snp#include "opt_inet.h" 31237263Snp 32237263Snp#ifdef TCP_OFFLOAD 33178302Skmacy#include <sys/param.h> 34178302Skmacy#include <sys/systm.h> 35178302Skmacy#include <sys/kernel.h> 36178302Skmacy#include <sys/module.h> 37178302Skmacy#include <sys/bus.h> 38178302Skmacy#include <sys/socket.h> 39178302Skmacy#include <net/if.h> 40178302Skmacy#include <net/ethernet.h> 41178302Skmacy#include <net/if_vlan_var.h> 42178302Skmacy#include <netinet/in.h> 43237263Snp#include <netinet/toecore.h> 44178302Skmacy 45237263Snp#include "cxgb_include.h" 46237263Snp#include "ulp/tom/cxgb_tom.h" 47237263Snp#include "ulp/tom/cxgb_l2t.h" 48178302Skmacy 49237263Snp#define VLAN_NONE 0xfff 50237263Snp#define SA(x) ((struct sockaddr *)(x)) 51237263Snp#define SIN(x) ((struct sockaddr_in *)(x)) 52237263Snp#define SINADDR(x) (SIN(x)->sin_addr.s_addr) 53178302Skmacy 54178302Skmacy/* 55178302Skmacy * Module locking notes: There is a RW lock protecting the L2 table as a 56237263Snp * whole plus a mutex per L2T entry. Entry lookups and allocations happen 57178302Skmacy * under the protection of the table lock, individual entry changes happen 58237263Snp * while holding that entry's mutex. The table lock nests outside the 59178302Skmacy * entry locks. Allocations of new entries take the table lock as writers so 60178302Skmacy * no other lookups can happen while allocating new entries. Entry updates 61178302Skmacy * take the table lock as readers so multiple entries can be updated in 62178302Skmacy * parallel. An L2T entry can be dropped by decrementing its reference count 63178302Skmacy * and therefore can happen in parallel with entry allocation but no entry 64178302Skmacy * can change state or increment its ref count during allocation as both of 65178302Skmacy * these perform lookups. 66237263Snp * 67237263Snp * When acquiring multiple locks, the order is llentry -> L2 table -> L2 entry. 68178302Skmacy */ 69178302Skmacy 70178302Skmacystatic inline unsigned int 71178302Skmacyarp_hash(u32 key, int ifindex, const struct l2t_data *d) 72178302Skmacy{ 73178302Skmacy return jhash_2words(key, ifindex, 0) & (d->nentries - 1); 74178302Skmacy} 75178302Skmacy 76178302Skmacy/* 77237263Snp * Set up an L2T entry and send any packets waiting in the arp queue. Must be 78237263Snp * called with the entry locked. 79178302Skmacy */ 80178302Skmacystatic int 81237263Snpsetup_l2e_send_pending(struct adapter *sc, struct l2t_entry *e) 82178302Skmacy{ 83237263Snp struct mbuf *m; 84178302Skmacy struct cpl_l2t_write_req *req; 85237263Snp struct port_info *pi = &sc->port[e->smt_idx]; /* smt_idx is port_id */ 86178302Skmacy 87237263Snp mtx_assert(&e->lock, MA_OWNED); 88237263Snp 89237263Snp m = M_GETHDR_OFLD(pi->first_qset, CPL_PRIORITY_CONTROL, req); 90237263Snp if (m == NULL) { 91237263Snp log(LOG_ERR, "%s: no mbuf, can't setup L2 entry at index %d\n", 92237263Snp __func__, e->idx); 93237263Snp return (ENOMEM); 94178302Skmacy } 95237263Snp 96237263Snp req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 97178302Skmacy OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx)); 98178302Skmacy req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) | 99237263Snp V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) | 100237263Snp V_L2T_W_PRIO(EVL_PRIOFTAG(e->vlan))); 101237263Snp memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); 102178302Skmacy 103237263Snp t3_offload_tx(sc, m); 104237263Snp 105237263Snp /* 106237263Snp * XXX: We used pi->first_qset to send the L2T_WRITE_REQ. If any mbuf 107237263Snp * on the arpq is going out via another queue set associated with the 108237263Snp * port then it has a bad race with the L2T_WRITE_REQ. Ideally we 109237263Snp * should wait till the reply to the write before draining the arpq. 110237263Snp */ 111178302Skmacy while (e->arpq_head) { 112178302Skmacy m = e->arpq_head; 113178302Skmacy e->arpq_head = m->m_next; 114178302Skmacy m->m_next = NULL; 115237263Snp t3_offload_tx(sc, m); 116178302Skmacy } 117178302Skmacy e->arpq_tail = NULL; 118178302Skmacy 119237263Snp return (0); 120178302Skmacy} 121178302Skmacy 122178302Skmacy/* 123178302Skmacy * Add a packet to the an L2T entry's queue of packets awaiting resolution. 124178302Skmacy * Must be called with the entry's lock held. 125178302Skmacy */ 126178302Skmacystatic inline void 127178302Skmacyarpq_enqueue(struct l2t_entry *e, struct mbuf *m) 128178302Skmacy{ 129237263Snp mtx_assert(&e->lock, MA_OWNED); 130237263Snp 131178302Skmacy m->m_next = NULL; 132178302Skmacy if (e->arpq_head) 133178302Skmacy e->arpq_tail->m_next = m; 134178302Skmacy else 135178302Skmacy e->arpq_head = m; 136178302Skmacy e->arpq_tail = m; 137178302Skmacy} 138178302Skmacy 139237263Snpstatic void 140237263Snpresolution_failed_mbuf(struct mbuf *m) 141178302Skmacy{ 142237263Snp log(LOG_ERR, "%s: leaked mbuf %p, CPL at %p", 143237263Snp __func__, m, mtod(m, void *)); 144237263Snp} 145178302Skmacy 146237263Snpstatic void 147237263Snpresolution_failed(struct l2t_entry *e) 148237263Snp{ 149237263Snp struct mbuf *m; 150178302Skmacy 151237263Snp mtx_assert(&e->lock, MA_OWNED); 152237263Snp 153237263Snp while (e->arpq_head) { 154237263Snp m = e->arpq_head; 155237263Snp e->arpq_head = m->m_next; 156237263Snp m->m_next = NULL; 157237263Snp resolution_failed_mbuf(m); 158237263Snp } 159237263Snp e->arpq_tail = NULL; 160237263Snp} 161237263Snp 162237263Snpstatic void 163237263Snpupdate_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr, 164237263Snp uint16_t vtag) 165237263Snp{ 166237263Snp 167237263Snp mtx_assert(&e->lock, MA_OWNED); 168237263Snp 169237263Snp /* 170237263Snp * The entry may be in active use (e->refcount > 0) or not. We update 171237263Snp * it even when it's not as this simplifies the case where we decide to 172237263Snp * reuse the entry later. 173237263Snp */ 174237263Snp 175237263Snp if (lladdr == NULL && 176237263Snp (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) { 177178302Skmacy /* 178237263Snp * Never got a valid L2 address for this one. Just mark it as 179237263Snp * failed instead of removing it from the hash (for which we'd 180237263Snp * need to wlock the table). 181178302Skmacy */ 182237263Snp e->state = L2T_STATE_FAILED; 183237263Snp resolution_failed(e); 184237263Snp return; 185178302Skmacy 186237263Snp } else if (lladdr == NULL) { 187237263Snp 188237263Snp /* Valid or already-stale entry was deleted (or expired) */ 189237263Snp 190237263Snp KASSERT(e->state == L2T_STATE_VALID || 191237263Snp e->state == L2T_STATE_STALE, 192237263Snp ("%s: lladdr NULL, state %d", __func__, e->state)); 193237263Snp 194237263Snp e->state = L2T_STATE_STALE; 195237263Snp 196237263Snp } else { 197237263Snp 198237263Snp if (e->state == L2T_STATE_RESOLVING || 199237263Snp e->state == L2T_STATE_FAILED || 200237263Snp memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) { 201237263Snp 202237263Snp /* unresolved -> resolved; or dmac changed */ 203237263Snp 204237263Snp memcpy(e->dmac, lladdr, ETHER_ADDR_LEN); 205237263Snp e->vlan = vtag; 206237263Snp setup_l2e_send_pending(sc, e); 207178302Skmacy } 208237263Snp e->state = L2T_STATE_VALID; 209178302Skmacy } 210178302Skmacy} 211178302Skmacy 212237263Snpstatic int 213237263Snpresolve_entry(struct adapter *sc, struct l2t_entry *e) 214178302Skmacy{ 215237263Snp struct tom_data *td = sc->tom_softc; 216237263Snp struct toedev *tod = &td->tod; 217237263Snp struct sockaddr_in sin = {0}; 218292978Smelifaro uint8_t dmac[ETHER_HDR_LEN]; 219237263Snp uint16_t vtag = EVL_VLID_MASK; 220237263Snp int rc; 221237263Snp 222178302Skmacy sin.sin_family = AF_INET; 223178302Skmacy sin.sin_len = sizeof(struct sockaddr_in); 224237263Snp SINADDR(&sin) = e->addr; 225178302Skmacy 226237263Snp rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag); 227237263Snp if (rc == EWOULDBLOCK) 228237263Snp return (rc); 229237263Snp 230237263Snp mtx_lock(&e->lock); 231237263Snp update_entry(sc, e, rc == 0 ? dmac : NULL, vtag); 232237263Snp mtx_unlock(&e->lock); 233237263Snp 234237263Snp return (rc); 235237263Snp} 236237263Snp 237237263Snpint 238237263Snpt3_l2t_send_slow(struct adapter *sc, struct mbuf *m, struct l2t_entry *e) 239237263Snp{ 240237263Snp 241178302Skmacyagain: 242178302Skmacy switch (e->state) { 243178302Skmacy case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ 244237263Snp 245237263Snp if (resolve_entry(sc, e) != EWOULDBLOCK) 246237263Snp goto again; /* entry updated, re-examine state */ 247237263Snp 248237263Snp /* Fall through */ 249237263Snp 250178302Skmacy case L2T_STATE_VALID: /* fast-path, send the packet on */ 251237263Snp 252237263Snp return (t3_offload_tx(sc, m)); 253237263Snp 254178302Skmacy case L2T_STATE_RESOLVING: 255178302Skmacy mtx_lock(&e->lock); 256237263Snp if (e->state != L2T_STATE_RESOLVING) { 257178302Skmacy mtx_unlock(&e->lock); 258178302Skmacy goto again; 259178302Skmacy } 260237263Snp arpq_enqueue(e, m); 261178302Skmacy mtx_unlock(&e->lock); 262178302Skmacy 263237263Snp if (resolve_entry(sc, e) == EWOULDBLOCK) 264237263Snp break; 265237263Snp 266237263Snp mtx_lock(&e->lock); 267237263Snp if (e->state == L2T_STATE_VALID && e->arpq_head) 268237263Snp setup_l2e_send_pending(sc, e); 269237263Snp if (e->state == L2T_STATE_FAILED) 270237263Snp resolution_failed(e); 271237263Snp mtx_unlock(&e->lock); 272237263Snp break; 273237263Snp 274237263Snp case L2T_STATE_FAILED: 275237263Snp resolution_failed_mbuf(m); 276237263Snp return (EHOSTUNREACH); 277178302Skmacy } 278237263Snp 279237263Snp return (0); 280178302Skmacy} 281237263Snp 282178302Skmacy/* 283178302Skmacy * Allocate a free L2T entry. Must be called with l2t_data.lock held. 284178302Skmacy */ 285178302Skmacystatic struct l2t_entry * 286178302Skmacyalloc_l2e(struct l2t_data *d) 287178302Skmacy{ 288178302Skmacy struct l2t_entry *end, *e, **p; 289178302Skmacy 290237263Snp rw_assert(&d->lock, RA_WLOCKED); 291237263Snp 292178302Skmacy if (!atomic_load_acq_int(&d->nfree)) 293237263Snp return (NULL); 294178302Skmacy 295178302Skmacy /* there's definitely a free entry */ 296237263Snp for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e) { 297178302Skmacy if (atomic_load_acq_int(&e->refcnt) == 0) 298178302Skmacy goto found; 299237263Snp } 300178302Skmacy 301237263Snp for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e) 302237263Snp continue; 303178302Skmacyfound: 304178302Skmacy d->rover = e + 1; 305178302Skmacy atomic_add_int(&d->nfree, -1); 306178302Skmacy 307178302Skmacy /* 308178302Skmacy * The entry we found may be an inactive entry that is 309178302Skmacy * presently in the hash table. We need to remove it. 310178302Skmacy */ 311178302Skmacy if (e->state != L2T_STATE_UNUSED) { 312237263Snp int hash = arp_hash(e->addr, e->ifp->if_index, d); 313178302Skmacy 314237263Snp for (p = &d->l2tab[hash].first; *p; p = &(*p)->next) { 315178302Skmacy if (*p == e) { 316178302Skmacy *p = e->next; 317178302Skmacy break; 318178302Skmacy } 319237263Snp } 320178302Skmacy e->state = L2T_STATE_UNUSED; 321178302Skmacy } 322178302Skmacy 323237263Snp return (e); 324178302Skmacy} 325178302Skmacy 326178302Skmacystruct l2t_entry * 327237263Snpt3_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa) 328178302Skmacy{ 329237263Snp struct tom_data *td = pi->adapter->tom_softc; 330178302Skmacy struct l2t_entry *e; 331237263Snp struct l2t_data *d = td->l2t; 332237263Snp uint32_t addr = SINADDR(sa); 333237263Snp int hash = arp_hash(addr, ifp->if_index, d); 334237263Snp unsigned int smt_idx = pi->port_id; 335178302Skmacy 336178302Skmacy rw_wlock(&d->lock); 337237263Snp for (e = d->l2tab[hash].first; e; e = e->next) { 338237263Snp if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) { 339178302Skmacy l2t_hold(d, e); 340178302Skmacy goto done; 341178302Skmacy } 342237263Snp } 343178302Skmacy 344178302Skmacy /* Need to allocate a new entry */ 345178302Skmacy e = alloc_l2e(d); 346178302Skmacy if (e) { 347178302Skmacy mtx_lock(&e->lock); /* avoid race with t3_l2t_free */ 348178302Skmacy e->next = d->l2tab[hash].first; 349178302Skmacy d->l2tab[hash].first = e; 350237263Snp 351178302Skmacy e->state = L2T_STATE_RESOLVING; 352178302Skmacy e->addr = addr; 353237263Snp e->ifp = ifp; 354178302Skmacy e->smt_idx = smt_idx; 355178302Skmacy atomic_store_rel_int(&e->refcnt, 1); 356237263Snp 357237263Snp KASSERT(ifp->if_vlantrunk == NULL, ("TOE+VLAN unimplemented.")); 358237263Snp e->vlan = VLAN_NONE; 359237263Snp 360178302Skmacy mtx_unlock(&e->lock); 361237263Snp } 362178302Skmacy 363178302Skmacydone: 364178302Skmacy rw_wunlock(&d->lock); 365178302Skmacy 366237263Snp return (e); 367178302Skmacy} 368178302Skmacy 369178302Skmacyvoid 370237263Snpt3_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, 371237263Snp uint8_t *lladdr, uint16_t vtag) 372178302Skmacy{ 373237263Snp struct tom_data *td = t3_tomdata(tod); 374237263Snp struct adapter *sc = tod->tod_softc; 375178302Skmacy struct l2t_entry *e; 376237263Snp struct l2t_data *d = td->l2t; 377237263Snp u32 addr = *(u32 *) &SIN(sa)->sin_addr; 378237263Snp int hash = arp_hash(addr, ifp->if_index, d); 379178302Skmacy 380178302Skmacy rw_rlock(&d->lock); 381178302Skmacy for (e = d->l2tab[hash].first; e; e = e->next) 382237263Snp if (e->addr == addr && e->ifp == ifp) { 383178302Skmacy mtx_lock(&e->lock); 384178302Skmacy goto found; 385178302Skmacy } 386178302Skmacy rw_runlock(&d->lock); 387237263Snp 388237263Snp /* 389237263Snp * This is of no interest to us. We've never had an offloaded 390237263Snp * connection to this destination, and we aren't attempting one right 391237263Snp * now. 392237263Snp */ 393178302Skmacy return; 394178302Skmacy 395178302Skmacyfound: 396237263Snp rw_runlock(&d->lock); 397178302Skmacy 398237263Snp KASSERT(e->state != L2T_STATE_UNUSED, 399237263Snp ("%s: unused entry in the hash.", __func__)); 400237263Snp 401237263Snp update_entry(sc, e, lladdr, vtag); 402178302Skmacy mtx_unlock(&e->lock); 403178302Skmacy} 404178302Skmacy 405178302Skmacystruct l2t_data * 406178302Skmacyt3_init_l2t(unsigned int l2t_capacity) 407178302Skmacy{ 408178302Skmacy struct l2t_data *d; 409178302Skmacy int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry); 410178302Skmacy 411237263Snp d = malloc(size, M_CXGB, M_NOWAIT | M_ZERO); 412178302Skmacy if (!d) 413237263Snp return (NULL); 414178302Skmacy 415178302Skmacy d->nentries = l2t_capacity; 416178302Skmacy d->rover = &d->l2tab[1]; /* entry 0 is not used */ 417178302Skmacy atomic_store_rel_int(&d->nfree, l2t_capacity - 1); 418178302Skmacy rw_init(&d->lock, "L2T"); 419178302Skmacy 420178302Skmacy for (i = 0; i < l2t_capacity; ++i) { 421178302Skmacy d->l2tab[i].idx = i; 422178302Skmacy d->l2tab[i].state = L2T_STATE_UNUSED; 423237263Snp mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF); 424178302Skmacy atomic_store_rel_int(&d->l2tab[i].refcnt, 0); 425178302Skmacy } 426237263Snp return (d); 427178302Skmacy} 428178302Skmacy 429178302Skmacyvoid 430178302Skmacyt3_free_l2t(struct l2t_data *d) 431178302Skmacy{ 432178302Skmacy int i; 433178302Skmacy 434178302Skmacy rw_destroy(&d->lock); 435178302Skmacy for (i = 0; i < d->nentries; ++i) 436178302Skmacy mtx_destroy(&d->l2tab[i].lock); 437178302Skmacy 438237263Snp free(d, M_CXGB); 439178302Skmacy} 440237263Snp 441237263Snpstatic int 442237263Snpdo_l2t_write_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 443237263Snp{ 444237263Snp struct cpl_l2t_write_rpl *rpl = mtod(m, void *); 445237263Snp 446237263Snp if (rpl->status != CPL_ERR_NONE) 447237263Snp log(LOG_ERR, 448237263Snp "Unexpected L2T_WRITE_RPL status %u for entry %u\n", 449237263Snp rpl->status, GET_TID(rpl)); 450237263Snp 451237263Snp m_freem(m); 452237263Snp return (0); 453237263Snp} 454237263Snp 455237263Snpvoid 456237263Snpt3_init_l2t_cpl_handlers(struct adapter *sc) 457237263Snp{ 458237263Snp t3_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl); 459237263Snp} 460237263Snp#endif 461