1/*- 2 * Copyright (c) 2012 Chelsio Communications, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD$"); 29 30#include "opt_inet.h" 31 32#ifdef TCP_OFFLOAD 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/kernel.h> 36#include <sys/module.h> 37#include <sys/bus.h> 38#include <sys/socket.h> 39#include <net/if.h> 40#include <net/ethernet.h> 41#include <net/if_vlan_var.h> 42#include <netinet/in.h> 43#include <netinet/toecore.h> 44 45#include "cxgb_include.h" 46#include "ulp/tom/cxgb_tom.h" 47#include "ulp/tom/cxgb_l2t.h" 48 49#define VLAN_NONE 0xfff 50#define SA(x) ((struct sockaddr *)(x)) 51#define SIN(x) ((struct sockaddr_in *)(x)) 52#define SINADDR(x) (SIN(x)->sin_addr.s_addr) 53 54/* 55 * Module locking notes: There is a RW lock protecting the L2 table as a 56 * whole plus a mutex per L2T entry. Entry lookups and allocations happen 57 * under the protection of the table lock, individual entry changes happen 58 * while holding that entry's mutex. The table lock nests outside the 59 * entry locks. Allocations of new entries take the table lock as writers so 60 * no other lookups can happen while allocating new entries. Entry updates 61 * take the table lock as readers so multiple entries can be updated in 62 * parallel. An L2T entry can be dropped by decrementing its reference count 63 * and therefore can happen in parallel with entry allocation but no entry 64 * can change state or increment its ref count during allocation as both of 65 * these perform lookups. 66 * 67 * When acquiring multiple locks, the order is llentry -> L2 table -> L2 entry. 68 */ 69 70static inline unsigned int 71arp_hash(u32 key, int ifindex, const struct l2t_data *d) 72{ 73 return jhash_2words(key, ifindex, 0) & (d->nentries - 1); 74} 75 76/* 77 * Set up an L2T entry and send any packets waiting in the arp queue. Must be 78 * called with the entry locked. 79 */ 80static int 81setup_l2e_send_pending(struct adapter *sc, struct l2t_entry *e) 82{ 83 struct mbuf *m; 84 struct cpl_l2t_write_req *req; 85 struct port_info *pi = &sc->port[e->smt_idx]; /* smt_idx is port_id */ 86 87 mtx_assert(&e->lock, MA_OWNED); 88 89 m = M_GETHDR_OFLD(pi->first_qset, CPL_PRIORITY_CONTROL, req); 90 if (m == NULL) { 91 log(LOG_ERR, "%s: no mbuf, can't setup L2 entry at index %d\n", 92 __func__, e->idx); 93 return (ENOMEM); 94 } 95 96 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 97 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx)); 98 req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) | 99 V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) | 100 V_L2T_W_PRIO(EVL_PRIOFTAG(e->vlan))); 101 memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); 102 103 t3_offload_tx(sc, m); 104 105 /* 106 * XXX: We used pi->first_qset to send the L2T_WRITE_REQ. If any mbuf 107 * on the arpq is going out via another queue set associated with the 108 * port then it has a bad race with the L2T_WRITE_REQ. Ideally we 109 * should wait till the reply to the write before draining the arpq. 110 */ 111 while (e->arpq_head) { 112 m = e->arpq_head; 113 e->arpq_head = m->m_next; 114 m->m_next = NULL; 115 t3_offload_tx(sc, m); 116 } 117 e->arpq_tail = NULL; 118 119 return (0); 120} 121 122/* 123 * Add a packet to the an L2T entry's queue of packets awaiting resolution. 124 * Must be called with the entry's lock held. 125 */ 126static inline void 127arpq_enqueue(struct l2t_entry *e, struct mbuf *m) 128{ 129 mtx_assert(&e->lock, MA_OWNED); 130 131 m->m_next = NULL; 132 if (e->arpq_head) 133 e->arpq_tail->m_next = m; 134 else 135 e->arpq_head = m; 136 e->arpq_tail = m; 137} 138 139static void 140resolution_failed_mbuf(struct mbuf *m) 141{ 142 log(LOG_ERR, "%s: leaked mbuf %p, CPL at %p", 143 __func__, m, mtod(m, void *)); 144} 145 146static void 147resolution_failed(struct l2t_entry *e) 148{ 149 struct mbuf *m; 150 151 mtx_assert(&e->lock, MA_OWNED); 152 153 while (e->arpq_head) { 154 m = e->arpq_head; 155 e->arpq_head = m->m_next; 156 m->m_next = NULL; 157 resolution_failed_mbuf(m); 158 } 159 e->arpq_tail = NULL; 160} 161 162static void 163update_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr, 164 uint16_t vtag) 165{ 166 167 mtx_assert(&e->lock, MA_OWNED); 168 169 /* 170 * The entry may be in active use (e->refcount > 0) or not. We update 171 * it even when it's not as this simplifies the case where we decide to 172 * reuse the entry later. 173 */ 174 175 if (lladdr == NULL && 176 (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) { 177 /* 178 * Never got a valid L2 address for this one. Just mark it as 179 * failed instead of removing it from the hash (for which we'd 180 * need to wlock the table). 181 */ 182 e->state = L2T_STATE_FAILED; 183 resolution_failed(e); 184 return; 185 186 } else if (lladdr == NULL) { 187 188 /* Valid or already-stale entry was deleted (or expired) */ 189 190 KASSERT(e->state == L2T_STATE_VALID || 191 e->state == L2T_STATE_STALE, 192 ("%s: lladdr NULL, state %d", __func__, e->state)); 193 194 e->state = L2T_STATE_STALE; 195 196 } else { 197 198 if (e->state == L2T_STATE_RESOLVING || 199 e->state == L2T_STATE_FAILED || 200 memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) { 201 202 /* unresolved -> resolved; or dmac changed */ 203 204 memcpy(e->dmac, lladdr, ETHER_ADDR_LEN); 205 e->vlan = vtag; 206 setup_l2e_send_pending(sc, e); 207 } 208 e->state = L2T_STATE_VALID; 209 } 210} 211 212static int 213resolve_entry(struct adapter *sc, struct l2t_entry *e) 214{ 215 struct tom_data *td = sc->tom_softc; 216 struct toedev *tod = &td->tod; 217 struct sockaddr_in sin = {0}; 218 uint8_t dmac[ETHER_HDR_LEN]; 219 uint16_t vtag = EVL_VLID_MASK; 220 int rc; 221 222 sin.sin_family = AF_INET; 223 sin.sin_len = sizeof(struct sockaddr_in); 224 SINADDR(&sin) = e->addr; 225 226 rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag); 227 if (rc == EWOULDBLOCK) 228 return (rc); 229 230 mtx_lock(&e->lock); 231 update_entry(sc, e, rc == 0 ? dmac : NULL, vtag); 232 mtx_unlock(&e->lock); 233 234 return (rc); 235} 236 237int 238t3_l2t_send_slow(struct adapter *sc, struct mbuf *m, struct l2t_entry *e) 239{ 240 241again: 242 switch (e->state) { 243 case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ 244 245 if (resolve_entry(sc, e) != EWOULDBLOCK) 246 goto again; /* entry updated, re-examine state */ 247 248 /* Fall through */ 249 250 case L2T_STATE_VALID: /* fast-path, send the packet on */ 251 252 return (t3_offload_tx(sc, m)); 253 254 case L2T_STATE_RESOLVING: 255 mtx_lock(&e->lock); 256 if (e->state != L2T_STATE_RESOLVING) { 257 mtx_unlock(&e->lock); 258 goto again; 259 } 260 arpq_enqueue(e, m); 261 mtx_unlock(&e->lock); 262 263 if (resolve_entry(sc, e) == EWOULDBLOCK) 264 break; 265 266 mtx_lock(&e->lock); 267 if (e->state == L2T_STATE_VALID && e->arpq_head) 268 setup_l2e_send_pending(sc, e); 269 if (e->state == L2T_STATE_FAILED) 270 resolution_failed(e); 271 mtx_unlock(&e->lock); 272 break; 273 274 case L2T_STATE_FAILED: 275 resolution_failed_mbuf(m); 276 return (EHOSTUNREACH); 277 } 278 279 return (0); 280} 281 282/* 283 * Allocate a free L2T entry. Must be called with l2t_data.lock held. 284 */ 285static struct l2t_entry * 286alloc_l2e(struct l2t_data *d) 287{ 288 struct l2t_entry *end, *e, **p; 289 290 rw_assert(&d->lock, RA_WLOCKED); 291 292 if (!atomic_load_acq_int(&d->nfree)) 293 return (NULL); 294 295 /* there's definitely a free entry */ 296 for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e) { 297 if (atomic_load_acq_int(&e->refcnt) == 0) 298 goto found; 299 } 300 301 for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e) 302 continue; 303found: 304 d->rover = e + 1; 305 atomic_add_int(&d->nfree, -1); 306 307 /* 308 * The entry we found may be an inactive entry that is 309 * presently in the hash table. We need to remove it. 310 */ 311 if (e->state != L2T_STATE_UNUSED) { 312 int hash = arp_hash(e->addr, e->ifp->if_index, d); 313 314 for (p = &d->l2tab[hash].first; *p; p = &(*p)->next) { 315 if (*p == e) { 316 *p = e->next; 317 break; 318 } 319 } 320 e->state = L2T_STATE_UNUSED; 321 } 322 323 return (e); 324} 325 326struct l2t_entry * 327t3_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa) 328{ 329 struct tom_data *td = pi->adapter->tom_softc; 330 struct l2t_entry *e; 331 struct l2t_data *d = td->l2t; 332 uint32_t addr = SINADDR(sa); 333 int hash = arp_hash(addr, ifp->if_index, d); 334 unsigned int smt_idx = pi->port_id; 335 336 rw_wlock(&d->lock); 337 for (e = d->l2tab[hash].first; e; e = e->next) { 338 if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) { 339 l2t_hold(d, e); 340 goto done; 341 } 342 } 343 344 /* Need to allocate a new entry */ 345 e = alloc_l2e(d); 346 if (e) { 347 mtx_lock(&e->lock); /* avoid race with t3_l2t_free */ 348 e->next = d->l2tab[hash].first; 349 d->l2tab[hash].first = e; 350 351 e->state = L2T_STATE_RESOLVING; 352 e->addr = addr; 353 e->ifp = ifp; 354 e->smt_idx = smt_idx; 355 atomic_store_rel_int(&e->refcnt, 1); 356 357 KASSERT(ifp->if_vlantrunk == NULL, ("TOE+VLAN unimplemented.")); 358 e->vlan = VLAN_NONE; 359 360 mtx_unlock(&e->lock); 361 } 362 363done: 364 rw_wunlock(&d->lock); 365 366 return (e); 367} 368 369void 370t3_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, 371 uint8_t *lladdr, uint16_t vtag) 372{ 373 struct tom_data *td = t3_tomdata(tod); 374 struct adapter *sc = tod->tod_softc; 375 struct l2t_entry *e; 376 struct l2t_data *d = td->l2t; 377 u32 addr = *(u32 *) &SIN(sa)->sin_addr; 378 int hash = arp_hash(addr, ifp->if_index, d); 379 380 rw_rlock(&d->lock); 381 for (e = d->l2tab[hash].first; e; e = e->next) 382 if (e->addr == addr && e->ifp == ifp) { 383 mtx_lock(&e->lock); 384 goto found; 385 } 386 rw_runlock(&d->lock); 387 388 /* 389 * This is of no interest to us. We've never had an offloaded 390 * connection to this destination, and we aren't attempting one right 391 * now. 392 */ 393 return; 394 395found: 396 rw_runlock(&d->lock); 397 398 KASSERT(e->state != L2T_STATE_UNUSED, 399 ("%s: unused entry in the hash.", __func__)); 400 401 update_entry(sc, e, lladdr, vtag); 402 mtx_unlock(&e->lock); 403} 404 405struct l2t_data * 406t3_init_l2t(unsigned int l2t_capacity) 407{ 408 struct l2t_data *d; 409 int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry); 410 411 d = malloc(size, M_CXGB, M_NOWAIT | M_ZERO); 412 if (!d) 413 return (NULL); 414 415 d->nentries = l2t_capacity; 416 d->rover = &d->l2tab[1]; /* entry 0 is not used */ 417 atomic_store_rel_int(&d->nfree, l2t_capacity - 1); 418 rw_init(&d->lock, "L2T"); 419 420 for (i = 0; i < l2t_capacity; ++i) { 421 d->l2tab[i].idx = i; 422 d->l2tab[i].state = L2T_STATE_UNUSED; 423 mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF); 424 atomic_store_rel_int(&d->l2tab[i].refcnt, 0); 425 } 426 return (d); 427} 428 429void 430t3_free_l2t(struct l2t_data *d) 431{ 432 int i; 433 434 rw_destroy(&d->lock); 435 for (i = 0; i < d->nentries; ++i) 436 mtx_destroy(&d->l2tab[i].lock); 437 438 free(d, M_CXGB); 439} 440 441static int 442do_l2t_write_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 443{ 444 struct cpl_l2t_write_rpl *rpl = mtod(m, void *); 445 446 if (rpl->status != CPL_ERR_NONE) 447 log(LOG_ERR, 448 "Unexpected L2T_WRITE_RPL status %u for entry %u\n", 449 rpl->status, GET_TID(rpl)); 450 451 m_freem(m); 452 return (0); 453} 454 455void 456t3_init_l2t_cpl_handlers(struct adapter *sc) 457{ 458 t3_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl); 459} 460#endif 461