1237920Snp/*- 2237920Snp * Copyright (c) 2012 Chelsio Communications, Inc. 3237920Snp * All rights reserved. 4237920Snp * 5237920Snp * Redistribution and use in source and binary forms, with or without 6237920Snp * modification, are permitted provided that the following conditions 7237920Snp * are met: 8237920Snp * 1. Redistributions of source code must retain the above copyright 9237920Snp * notice, this list of conditions and the following disclaimer. 10237920Snp * 2. Redistributions in binary form must reproduce the above copyright 11237920Snp * notice, this list of conditions and the following disclaimer in the 12237920Snp * documentation and/or other materials provided with the distribution. 13237920Snp * 14237920Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15237920Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16237920Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17237920Snp * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18237920Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19237920Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20237920Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21237920Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22237920Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23237920Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24237920Snp * SUCH DAMAGE. 25237920Snp */ 26178302Skmacy 27178302Skmacy#include <sys/cdefs.h> 28178302Skmacy__FBSDID("$FreeBSD$"); 29178302Skmacy 30237920Snp#include "opt_inet.h" 31237920Snp 32237920Snp#ifdef TCP_OFFLOAD 33178302Skmacy#include <sys/param.h> 34178302Skmacy#include <sys/systm.h> 35178302Skmacy#include <sys/kernel.h> 36178302Skmacy#include <sys/module.h> 37178302Skmacy#include <sys/bus.h> 38178302Skmacy#include <sys/socket.h> 39178302Skmacy#include <net/if.h> 40178302Skmacy#include <net/ethernet.h> 41178302Skmacy#include <net/if_vlan_var.h> 42178302Skmacy#include <netinet/in.h> 43237920Snp#include <netinet/toecore.h> 44178302Skmacy 45237920Snp#include "cxgb_include.h" 46237920Snp#include "ulp/tom/cxgb_tom.h" 47237920Snp#include "ulp/tom/cxgb_l2t.h" 48178302Skmacy 49237920Snp#define VLAN_NONE 0xfff 50237920Snp#define SA(x) ((struct sockaddr *)(x)) 51237920Snp#define SIN(x) ((struct sockaddr_in *)(x)) 52237920Snp#define SINADDR(x) (SIN(x)->sin_addr.s_addr) 53178302Skmacy 54178302Skmacy/* 55178302Skmacy * Module locking notes: There is a RW lock protecting the L2 table as a 56237920Snp * whole plus a mutex per L2T entry. Entry lookups and allocations happen 57178302Skmacy * under the protection of the table lock, individual entry changes happen 58237920Snp * while holding that entry's mutex. The table lock nests outside the 59178302Skmacy * entry locks. Allocations of new entries take the table lock as writers so 60178302Skmacy * no other lookups can happen while allocating new entries. Entry updates 61178302Skmacy * take the table lock as readers so multiple entries can be updated in 62178302Skmacy * parallel. An L2T entry can be dropped by decrementing its reference count 63178302Skmacy * and therefore can happen in parallel with entry allocation but no entry 64178302Skmacy * can change state or increment its ref count during allocation as both of 65178302Skmacy * these perform lookups. 66237920Snp * 67237920Snp * When acquiring multiple locks, the order is llentry -> L2 table -> L2 entry. 68178302Skmacy */ 69178302Skmacy 70178302Skmacystatic inline unsigned int 71178302Skmacyarp_hash(u32 key, int ifindex, const struct l2t_data *d) 72178302Skmacy{ 73178302Skmacy return jhash_2words(key, ifindex, 0) & (d->nentries - 1); 74178302Skmacy} 75178302Skmacy 76178302Skmacy/* 77237920Snp * Set up an L2T entry and send any packets waiting in the arp queue. Must be 78237920Snp * called with the entry locked. 79178302Skmacy */ 80178302Skmacystatic int 81237920Snpsetup_l2e_send_pending(struct adapter *sc, struct l2t_entry *e) 82178302Skmacy{ 83237920Snp struct mbuf *m; 84178302Skmacy struct cpl_l2t_write_req *req; 85237920Snp struct port_info *pi = &sc->port[e->smt_idx]; /* smt_idx is port_id */ 86178302Skmacy 87237920Snp mtx_assert(&e->lock, MA_OWNED); 88237920Snp 89237920Snp m = M_GETHDR_OFLD(pi->first_qset, CPL_PRIORITY_CONTROL, req); 90237920Snp if (m == NULL) { 91237920Snp log(LOG_ERR, "%s: no mbuf, can't setup L2 entry at index %d\n", 92237920Snp __func__, e->idx); 93237920Snp return (ENOMEM); 94178302Skmacy } 95237920Snp 96237920Snp req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 97178302Skmacy OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx)); 98178302Skmacy req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) | 99237920Snp V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) | 100237920Snp V_L2T_W_PRIO(EVL_PRIOFTAG(e->vlan))); 101237920Snp memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); 102178302Skmacy 103237920Snp t3_offload_tx(sc, m); 104237920Snp 105237920Snp /* 106237920Snp * XXX: We used pi->first_qset to send the L2T_WRITE_REQ. If any mbuf 107237920Snp * on the arpq is going out via another queue set associated with the 108237920Snp * port then it has a bad race with the L2T_WRITE_REQ. Ideally we 109237920Snp * should wait till the reply to the write before draining the arpq. 110237920Snp */ 111178302Skmacy while (e->arpq_head) { 112178302Skmacy m = e->arpq_head; 113178302Skmacy e->arpq_head = m->m_next; 114178302Skmacy m->m_next = NULL; 115237920Snp t3_offload_tx(sc, m); 116178302Skmacy } 117178302Skmacy e->arpq_tail = NULL; 118178302Skmacy 119237920Snp return (0); 120178302Skmacy} 121178302Skmacy 122178302Skmacy/* 123178302Skmacy * Add a packet to the an L2T entry's queue of packets awaiting resolution. 124178302Skmacy * Must be called with the entry's lock held. 125178302Skmacy */ 126178302Skmacystatic inline void 127178302Skmacyarpq_enqueue(struct l2t_entry *e, struct mbuf *m) 128178302Skmacy{ 129237920Snp mtx_assert(&e->lock, MA_OWNED); 130237920Snp 131178302Skmacy m->m_next = NULL; 132178302Skmacy if (e->arpq_head) 133178302Skmacy e->arpq_tail->m_next = m; 134178302Skmacy else 135178302Skmacy e->arpq_head = m; 136178302Skmacy e->arpq_tail = m; 137178302Skmacy} 138178302Skmacy 139237920Snpstatic void 140237920Snpresolution_failed_mbuf(struct mbuf *m) 141178302Skmacy{ 142237920Snp log(LOG_ERR, "%s: leaked mbuf %p, CPL at %p", 143237920Snp __func__, m, mtod(m, void *)); 144237920Snp} 145178302Skmacy 146237920Snpstatic void 147237920Snpresolution_failed(struct l2t_entry *e) 148237920Snp{ 149237920Snp struct mbuf *m; 150178302Skmacy 151237920Snp mtx_assert(&e->lock, MA_OWNED); 152237920Snp 153237920Snp while (e->arpq_head) { 154237920Snp m = e->arpq_head; 155237920Snp e->arpq_head = m->m_next; 156237920Snp m->m_next = NULL; 157237920Snp resolution_failed_mbuf(m); 158237920Snp } 159237920Snp e->arpq_tail = NULL; 160237920Snp} 161237920Snp 162237920Snpstatic void 163237920Snpupdate_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr, 164237920Snp uint16_t vtag) 165237920Snp{ 166237920Snp 167237920Snp mtx_assert(&e->lock, MA_OWNED); 168237920Snp 169237920Snp /* 170237920Snp * The entry may be in active use (e->refcount > 0) or not. We update 171237920Snp * it even when it's not as this simplifies the case where we decide to 172237920Snp * reuse the entry later. 173237920Snp */ 174237920Snp 175237920Snp if (lladdr == NULL && 176237920Snp (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) { 177178302Skmacy /* 178237920Snp * Never got a valid L2 address for this one. Just mark it as 179237920Snp * failed instead of removing it from the hash (for which we'd 180237920Snp * need to wlock the table). 181178302Skmacy */ 182237920Snp e->state = L2T_STATE_FAILED; 183237920Snp resolution_failed(e); 184237920Snp return; 185178302Skmacy 186237920Snp } else if (lladdr == NULL) { 187237920Snp 188237920Snp /* Valid or already-stale entry was deleted (or expired) */ 189237920Snp 190237920Snp KASSERT(e->state == L2T_STATE_VALID || 191237920Snp e->state == L2T_STATE_STALE, 192237920Snp ("%s: lladdr NULL, state %d", __func__, e->state)); 193237920Snp 194237920Snp e->state = L2T_STATE_STALE; 195237920Snp 196237920Snp } else { 197237920Snp 198237920Snp if (e->state == L2T_STATE_RESOLVING || 199237920Snp e->state == L2T_STATE_FAILED || 200237920Snp memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) { 201237920Snp 202237920Snp /* unresolved -> resolved; or dmac changed */ 203237920Snp 204237920Snp memcpy(e->dmac, lladdr, ETHER_ADDR_LEN); 205237920Snp e->vlan = vtag; 206237920Snp setup_l2e_send_pending(sc, e); 207178302Skmacy } 208237920Snp e->state = L2T_STATE_VALID; 209178302Skmacy } 210178302Skmacy} 211178302Skmacy 212237920Snpstatic int 213237920Snpresolve_entry(struct adapter *sc, struct l2t_entry *e) 214178302Skmacy{ 215237920Snp struct tom_data *td = sc->tom_softc; 216237920Snp struct toedev *tod = &td->tod; 217237920Snp struct sockaddr_in sin = {0}; 218237920Snp uint8_t dmac[ETHER_ADDR_LEN]; 219237920Snp uint16_t vtag = EVL_VLID_MASK; 220237920Snp int rc; 221237920Snp 222178302Skmacy sin.sin_family = AF_INET; 223178302Skmacy sin.sin_len = sizeof(struct sockaddr_in); 224237920Snp SINADDR(&sin) = e->addr; 225178302Skmacy 226237920Snp rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag); 227237920Snp if (rc == EWOULDBLOCK) 228237920Snp return (rc); 229237920Snp 230237920Snp mtx_lock(&e->lock); 231237920Snp update_entry(sc, e, rc == 0 ? dmac : NULL, vtag); 232237920Snp mtx_unlock(&e->lock); 233237920Snp 234237920Snp return (rc); 235237920Snp} 236237920Snp 237237920Snpint 238237920Snpt3_l2t_send_slow(struct adapter *sc, struct mbuf *m, struct l2t_entry *e) 239237920Snp{ 240237920Snp 241178302Skmacyagain: 242178302Skmacy switch (e->state) { 243178302Skmacy case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ 244237920Snp 245237920Snp if (resolve_entry(sc, e) != EWOULDBLOCK) 246237920Snp goto again; /* entry updated, re-examine state */ 247237920Snp 248237920Snp /* Fall through */ 249237920Snp 250178302Skmacy case L2T_STATE_VALID: /* fast-path, send the packet on */ 251237920Snp 252237920Snp return (t3_offload_tx(sc, m)); 253237920Snp 254178302Skmacy case L2T_STATE_RESOLVING: 255178302Skmacy mtx_lock(&e->lock); 256237920Snp if (e->state != L2T_STATE_RESOLVING) { 257178302Skmacy mtx_unlock(&e->lock); 258178302Skmacy goto again; 259178302Skmacy } 260237920Snp arpq_enqueue(e, m); 261178302Skmacy mtx_unlock(&e->lock); 262178302Skmacy 263237920Snp if (resolve_entry(sc, e) == EWOULDBLOCK) 264237920Snp break; 265237920Snp 266237920Snp mtx_lock(&e->lock); 267237920Snp if (e->state == L2T_STATE_VALID && e->arpq_head) 268237920Snp setup_l2e_send_pending(sc, e); 269237920Snp if (e->state == L2T_STATE_FAILED) 270237920Snp resolution_failed(e); 271237920Snp mtx_unlock(&e->lock); 272237920Snp break; 273237920Snp 274237920Snp case L2T_STATE_FAILED: 275237920Snp resolution_failed_mbuf(m); 276237920Snp return (EHOSTUNREACH); 277178302Skmacy } 278237920Snp 279237920Snp return (0); 280178302Skmacy} 281237920Snp 282178302Skmacy/* 283178302Skmacy * Allocate a free L2T entry. Must be called with l2t_data.lock held. 284178302Skmacy */ 285178302Skmacystatic struct l2t_entry * 286178302Skmacyalloc_l2e(struct l2t_data *d) 287178302Skmacy{ 288178302Skmacy struct l2t_entry *end, *e, **p; 289178302Skmacy 290237920Snp rw_assert(&d->lock, RA_WLOCKED); 291237920Snp 292178302Skmacy if (!atomic_load_acq_int(&d->nfree)) 293237920Snp return (NULL); 294178302Skmacy 295178302Skmacy /* there's definitely a free entry */ 296237920Snp for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e) { 297178302Skmacy if (atomic_load_acq_int(&e->refcnt) == 0) 298178302Skmacy goto found; 299237920Snp } 300178302Skmacy 301237920Snp for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e) 302237920Snp continue; 303178302Skmacyfound: 304178302Skmacy d->rover = e + 1; 305178302Skmacy atomic_add_int(&d->nfree, -1); 306178302Skmacy 307178302Skmacy /* 308178302Skmacy * The entry we found may be an inactive entry that is 309178302Skmacy * presently in the hash table. We need to remove it. 310178302Skmacy */ 311178302Skmacy if (e->state != L2T_STATE_UNUSED) { 312237920Snp int hash = arp_hash(e->addr, e->ifp->if_index, d); 313178302Skmacy 314237920Snp for (p = &d->l2tab[hash].first; *p; p = &(*p)->next) { 315178302Skmacy if (*p == e) { 316178302Skmacy *p = e->next; 317178302Skmacy break; 318178302Skmacy } 319237920Snp } 320178302Skmacy e->state = L2T_STATE_UNUSED; 321178302Skmacy } 322178302Skmacy 323237920Snp return (e); 324178302Skmacy} 325178302Skmacy 326178302Skmacystruct l2t_entry * 327237920Snpt3_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa) 328178302Skmacy{ 329237920Snp struct tom_data *td = pi->adapter->tom_softc; 330178302Skmacy struct l2t_entry *e; 331237920Snp struct l2t_data *d = td->l2t; 332237920Snp uint32_t addr = SINADDR(sa); 333237920Snp int hash = arp_hash(addr, ifp->if_index, d); 334237920Snp unsigned int smt_idx = pi->port_id; 335178302Skmacy 336178302Skmacy rw_wlock(&d->lock); 337237920Snp for (e = d->l2tab[hash].first; e; e = e->next) { 338237920Snp if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) { 339178302Skmacy l2t_hold(d, e); 340178302Skmacy goto done; 341178302Skmacy } 342237920Snp } 343178302Skmacy 344178302Skmacy /* Need to allocate a new entry */ 345178302Skmacy e = alloc_l2e(d); 346178302Skmacy if (e) { 347178302Skmacy mtx_lock(&e->lock); /* avoid race with t3_l2t_free */ 348178302Skmacy e->next = d->l2tab[hash].first; 349178302Skmacy d->l2tab[hash].first = e; 350237920Snp 351178302Skmacy e->state = L2T_STATE_RESOLVING; 352178302Skmacy e->addr = addr; 353237920Snp e->ifp = ifp; 354178302Skmacy e->smt_idx = smt_idx; 355178302Skmacy atomic_store_rel_int(&e->refcnt, 1); 356237920Snp 357237920Snp KASSERT(ifp->if_vlantrunk == NULL, ("TOE+VLAN unimplemented.")); 358237920Snp e->vlan = VLAN_NONE; 359237920Snp 360178302Skmacy mtx_unlock(&e->lock); 361237920Snp } 362178302Skmacy 363178302Skmacydone: 364178302Skmacy rw_wunlock(&d->lock); 365178302Skmacy 366237920Snp return (e); 367178302Skmacy} 368178302Skmacy 369178302Skmacyvoid 370237920Snpt3_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, 371237920Snp uint8_t *lladdr, uint16_t vtag) 372178302Skmacy{ 373237920Snp struct tom_data *td = t3_tomdata(tod); 374237920Snp struct adapter *sc = tod->tod_softc; 375178302Skmacy struct l2t_entry *e; 376237920Snp struct l2t_data *d = td->l2t; 377237920Snp u32 addr = *(u32 *) &SIN(sa)->sin_addr; 378237920Snp int hash = arp_hash(addr, ifp->if_index, d); 379178302Skmacy 380178302Skmacy rw_rlock(&d->lock); 381178302Skmacy for (e = d->l2tab[hash].first; e; e = e->next) 382237920Snp if (e->addr == addr && e->ifp == ifp) { 383178302Skmacy mtx_lock(&e->lock); 384178302Skmacy goto found; 385178302Skmacy } 386178302Skmacy rw_runlock(&d->lock); 387237920Snp 388237920Snp /* 389237920Snp * This is of no interest to us. We've never had an offloaded 390237920Snp * connection to this destination, and we aren't attempting one right 391237920Snp * now. 392237920Snp */ 393178302Skmacy return; 394178302Skmacy 395178302Skmacyfound: 396237920Snp rw_runlock(&d->lock); 397178302Skmacy 398237920Snp KASSERT(e->state != L2T_STATE_UNUSED, 399237920Snp ("%s: unused entry in the hash.", __func__)); 400237920Snp 401237920Snp update_entry(sc, e, lladdr, vtag); 402178302Skmacy mtx_unlock(&e->lock); 403178302Skmacy} 404178302Skmacy 405178302Skmacystruct l2t_data * 406178302Skmacyt3_init_l2t(unsigned int l2t_capacity) 407178302Skmacy{ 408178302Skmacy struct l2t_data *d; 409178302Skmacy int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry); 410178302Skmacy 411237920Snp d = malloc(size, M_CXGB, M_NOWAIT | M_ZERO); 412178302Skmacy if (!d) 413237920Snp return (NULL); 414178302Skmacy 415178302Skmacy d->nentries = l2t_capacity; 416178302Skmacy d->rover = &d->l2tab[1]; /* entry 0 is not used */ 417178302Skmacy atomic_store_rel_int(&d->nfree, l2t_capacity - 1); 418178302Skmacy rw_init(&d->lock, "L2T"); 419178302Skmacy 420178302Skmacy for (i = 0; i < l2t_capacity; ++i) { 421178302Skmacy d->l2tab[i].idx = i; 422178302Skmacy d->l2tab[i].state = L2T_STATE_UNUSED; 423237920Snp mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF); 424178302Skmacy atomic_store_rel_int(&d->l2tab[i].refcnt, 0); 425178302Skmacy } 426237920Snp return (d); 427178302Skmacy} 428178302Skmacy 429178302Skmacyvoid 430178302Skmacyt3_free_l2t(struct l2t_data *d) 431178302Skmacy{ 432178302Skmacy int i; 433178302Skmacy 434178302Skmacy rw_destroy(&d->lock); 435178302Skmacy for (i = 0; i < d->nentries; ++i) 436178302Skmacy mtx_destroy(&d->l2tab[i].lock); 437178302Skmacy 438237920Snp free(d, M_CXGB); 439178302Skmacy} 440237920Snp 441237920Snpstatic int 442237920Snpdo_l2t_write_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 443237920Snp{ 444237920Snp struct cpl_l2t_write_rpl *rpl = mtod(m, void *); 445237920Snp 446237920Snp if (rpl->status != CPL_ERR_NONE) 447237920Snp log(LOG_ERR, 448237920Snp "Unexpected L2T_WRITE_RPL status %u for entry %u\n", 449237920Snp rpl->status, GET_TID(rpl)); 450237920Snp 451237920Snp m_freem(m); 452237920Snp return (0); 453237920Snp} 454237920Snp 455237920Snpvoid 456237920Snpt3_init_l2t_cpl_handlers(struct adapter *sc) 457237920Snp{ 458237920Snp t3_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl); 459237920Snp} 460237920Snp#endif 461