cxgb_l2t.c revision 183292
1/************************************************************************** 2 3Copyright (c) 2007, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_l2t.c 183292 2008-09-23 03:16:54Z kmacy $"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/kernel.h> 36#include <sys/module.h> 37#include <sys/bus.h> 38#include <sys/lock.h> 39#include <sys/mutex.h> 40#if __FreeBSD_version > 700000 41#include <sys/rwlock.h> 42#endif 43 44#include <sys/socket.h> 45#include <net/if.h> 46#include <net/ethernet.h> 47#include <net/if_vlan_var.h> 48#include <net/if_dl.h> 49#include <net/route.h> 50#include <netinet/in.h> 51#include <netinet/if_ether.h> 52 53#include <cxgb_include.h> 54 55#define VLAN_NONE 0xfff 56#define SDL(s) ((struct sockaddr_dl *)s) 57#define RT_ENADDR(sa) ((u_char *)LLADDR(SDL((sa)))) 58#define rt_expire rt_rmx.rmx_expire 59 60struct llinfo_arp { 61 struct callout la_timer; 62 struct rtentry *la_rt; 63 struct mbuf *la_hold; /* last packet until resolved/timeout */ 64 u_short la_preempt; /* countdown for pre-expiry arps */ 65 u_short la_asked; /* # requests sent */ 66}; 67 68/* 69 * Module locking notes: There is a RW lock protecting the L2 table as a 70 * whole plus a spinlock per L2T entry. Entry lookups and allocations happen 71 * under the protection of the table lock, individual entry changes happen 72 * while holding that entry's spinlock. The table lock nests outside the 73 * entry locks. Allocations of new entries take the table lock as writers so 74 * no other lookups can happen while allocating new entries. Entry updates 75 * take the table lock as readers so multiple entries can be updated in 76 * parallel. An L2T entry can be dropped by decrementing its reference count 77 * and therefore can happen in parallel with entry allocation but no entry 78 * can change state or increment its ref count during allocation as both of 79 * these perform lookups. 80 */ 81 82static inline unsigned int 83vlan_prio(const struct l2t_entry *e) 84{ 85 return e->vlan >> 13; 86} 87 88static inline unsigned int 89arp_hash(u32 key, int ifindex, const struct l2t_data *d) 90{ 91 return jhash_2words(key, ifindex, 0) & (d->nentries - 1); 92} 93 94static inline void 95neigh_replace(struct l2t_entry *e, struct rtentry *rt) 96{ 97 RT_LOCK(rt); 98 RT_ADDREF(rt); 99 RT_UNLOCK(rt); 100 101 if (e->neigh) 102 RTFREE(e->neigh); 103 e->neigh = rt; 104} 105 106/* 107 * Set up an L2T entry and send any packets waiting in the arp queue. The 108 * supplied mbuf is used for the CPL_L2T_WRITE_REQ. Must be called with the 109 * entry locked. 110 */ 111static int 112setup_l2e_send_pending(struct t3cdev *dev, struct mbuf *m, 113 struct l2t_entry *e) 114{ 115 struct cpl_l2t_write_req *req; 116 117 if (!m) { 118 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) 119 return (ENOMEM); 120 } 121 /* 122 * XXX MH_ALIGN 123 */ 124 req = mtod(m, struct cpl_l2t_write_req *); 125 m->m_pkthdr.len = m->m_len = sizeof(*req); 126 127 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 128 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx)); 129 req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) | 130 V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) | 131 V_L2T_W_PRIO(vlan_prio(e))); 132 133 memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); 134 m_set_priority(m, CPL_PRIORITY_CONTROL); 135 cxgb_ofld_send(dev, m); 136 while (e->arpq_head) { 137 m = e->arpq_head; 138 e->arpq_head = m->m_next; 139 m->m_next = NULL; 140 cxgb_ofld_send(dev, m); 141 } 142 e->arpq_tail = NULL; 143 e->state = L2T_STATE_VALID; 144 145 return 0; 146} 147 148/* 149 * Add a packet to the an L2T entry's queue of packets awaiting resolution. 150 * Must be called with the entry's lock held. 151 */ 152static inline void 153arpq_enqueue(struct l2t_entry *e, struct mbuf *m) 154{ 155 m->m_next = NULL; 156 if (e->arpq_head) 157 e->arpq_tail->m_next = m; 158 else 159 e->arpq_head = m; 160 e->arpq_tail = m; 161} 162 163int 164t3_l2t_send_slow(struct t3cdev *dev, struct mbuf *m, struct l2t_entry *e) 165{ 166 struct rtentry *rt = e->neigh; 167 struct sockaddr_in sin; 168 169 bzero(&sin, sizeof(struct sockaddr_in)); 170 sin.sin_family = AF_INET; 171 sin.sin_len = sizeof(struct sockaddr_in); 172 sin.sin_addr.s_addr = e->addr; 173 174 CTR2(KTR_CXGB, "send slow on rt=%p eaddr=0x%08x\n", rt, e->addr); 175again: 176 switch (e->state) { 177 case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ 178 arpresolve(rt->rt_ifp, rt, NULL, 179 (struct sockaddr *)&sin, e->dmac); 180 mtx_lock(&e->lock); 181 if (e->state == L2T_STATE_STALE) 182 e->state = L2T_STATE_VALID; 183 mtx_unlock(&e->lock); 184 case L2T_STATE_VALID: /* fast-path, send the packet on */ 185 return cxgb_ofld_send(dev, m); 186 case L2T_STATE_RESOLVING: 187 mtx_lock(&e->lock); 188 if (e->state != L2T_STATE_RESOLVING) { // ARP already completed 189 mtx_unlock(&e->lock); 190 goto again; 191 } 192 arpq_enqueue(e, m); 193 mtx_unlock(&e->lock); 194 /* 195 * Only the first packet added to the arpq should kick off 196 * resolution. However, because the m_gethdr below can fail, 197 * we allow each packet added to the arpq to retry resolution 198 * as a way of recovering from transient memory exhaustion. 199 * A better way would be to use a work request to retry L2T 200 * entries when there's no memory. 201 */ 202 if (arpresolve(rt->rt_ifp, rt, NULL, 203 (struct sockaddr *)&sin, e->dmac) == 0) { 204 CTR6(KTR_CXGB, "mac=%x:%x:%x:%x:%x:%x\n", 205 e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]); 206 207 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) 208 return (ENOMEM); 209 210 mtx_lock(&e->lock); 211 if (e->arpq_head) 212 setup_l2e_send_pending(dev, m, e); 213 else 214 m_freem(m); 215 mtx_unlock(&e->lock); 216 } 217 } 218 return 0; 219} 220 221void 222t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e) 223{ 224 struct rtentry *rt; 225 struct mbuf *m0; 226 struct sockaddr_in sin; 227 sin.sin_family = AF_INET; 228 sin.sin_len = sizeof(struct sockaddr_in); 229 sin.sin_addr.s_addr = e->addr; 230 231 if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) 232 return; 233 234 rt = e->neigh; 235again: 236 switch (e->state) { 237 case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ 238 arpresolve(rt->rt_ifp, rt, NULL, 239 (struct sockaddr *)&sin, e->dmac); 240 mtx_lock(&e->lock); 241 if (e->state == L2T_STATE_STALE) { 242 e->state = L2T_STATE_VALID; 243 } 244 mtx_unlock(&e->lock); 245 return; 246 case L2T_STATE_VALID: /* fast-path, send the packet on */ 247 return; 248 case L2T_STATE_RESOLVING: 249 mtx_lock(&e->lock); 250 if (e->state != L2T_STATE_RESOLVING) { // ARP already completed 251 mtx_unlock(&e->lock); 252 goto again; 253 } 254 mtx_unlock(&e->lock); 255 256 /* 257 * Only the first packet added to the arpq should kick off 258 * resolution. However, because the alloc_skb below can fail, 259 * we allow each packet added to the arpq to retry resolution 260 * as a way of recovering from transient memory exhaustion. 261 * A better way would be to use a work request to retry L2T 262 * entries when there's no memory. 263 */ 264 arpresolve(rt->rt_ifp, rt, NULL, 265 (struct sockaddr *)&sin, e->dmac); 266 267 } 268 return; 269} 270/* 271 * Allocate a free L2T entry. Must be called with l2t_data.lock held. 272 */ 273static struct l2t_entry * 274alloc_l2e(struct l2t_data *d) 275{ 276 struct l2t_entry *end, *e, **p; 277 278 if (!atomic_load_acq_int(&d->nfree)) 279 return NULL; 280 281 /* there's definitely a free entry */ 282 for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e) 283 if (atomic_load_acq_int(&e->refcnt) == 0) 284 goto found; 285 286 for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e) ; 287found: 288 d->rover = e + 1; 289 atomic_add_int(&d->nfree, -1); 290 291 /* 292 * The entry we found may be an inactive entry that is 293 * presently in the hash table. We need to remove it. 294 */ 295 if (e->state != L2T_STATE_UNUSED) { 296 int hash = arp_hash(e->addr, e->ifindex, d); 297 298 for (p = &d->l2tab[hash].first; *p; p = &(*p)->next) 299 if (*p == e) { 300 *p = e->next; 301 break; 302 } 303 e->state = L2T_STATE_UNUSED; 304 } 305 306 return e; 307} 308 309/* 310 * Called when an L2T entry has no more users. The entry is left in the hash 311 * table since it is likely to be reused but we also bump nfree to indicate 312 * that the entry can be reallocated for a different neighbor. We also drop 313 * the existing neighbor reference in case the neighbor is going away and is 314 * waiting on our reference. 315 * 316 * Because entries can be reallocated to other neighbors once their ref count 317 * drops to 0 we need to take the entry's lock to avoid races with a new 318 * incarnation. 319 */ 320void 321t3_l2e_free(struct l2t_data *d, struct l2t_entry *e) 322{ 323 struct rtentry *rt = NULL; 324 325 mtx_lock(&e->lock); 326 if (atomic_load_acq_int(&e->refcnt) == 0) { /* hasn't been recycled */ 327 rt = e->neigh; 328 e->neigh = NULL; 329 } 330 331 mtx_unlock(&e->lock); 332 atomic_add_int(&d->nfree, 1); 333 if (rt) 334 RTFREE(rt); 335} 336 337 338/* 339 * Update an L2T entry that was previously used for the same next hop as neigh. 340 * Must be called with softirqs disabled. 341 */ 342static inline void 343reuse_entry(struct l2t_entry *e, struct rtentry *neigh) 344{ 345 struct llinfo_arp *la; 346 347 la = (struct llinfo_arp *)neigh->rt_llinfo; 348 349 mtx_lock(&e->lock); /* avoid race with t3_l2t_free */ 350 if (neigh != e->neigh) 351 neigh_replace(e, neigh); 352 353 if (memcmp(e->dmac, RT_ENADDR(neigh->rt_gateway), sizeof(e->dmac)) || 354 (neigh->rt_expire > time_uptime)) 355 e->state = L2T_STATE_RESOLVING; 356 else if (la->la_hold == NULL) 357 e->state = L2T_STATE_VALID; 358 else 359 e->state = L2T_STATE_STALE; 360 mtx_unlock(&e->lock); 361} 362 363struct l2t_entry * 364t3_l2t_get(struct t3cdev *dev, struct rtentry *neigh, struct ifnet *ifp, 365 struct sockaddr *sa) 366{ 367 struct l2t_entry *e; 368 struct l2t_data *d = L2DATA(dev); 369 u32 addr = ((struct sockaddr_in *)sa)->sin_addr.s_addr; 370 int ifidx = neigh->rt_ifp->if_index; 371 int hash = arp_hash(addr, ifidx, d); 372 unsigned int smt_idx = ((struct port_info *)ifp->if_softc)->port_id; 373 374 rw_wlock(&d->lock); 375 for (e = d->l2tab[hash].first; e; e = e->next) 376 if (e->addr == addr && e->ifindex == ifidx && 377 e->smt_idx == smt_idx) { 378 l2t_hold(d, e); 379 if (atomic_load_acq_int(&e->refcnt) == 1) 380 reuse_entry(e, neigh); 381 goto done; 382 } 383 384 /* Need to allocate a new entry */ 385 e = alloc_l2e(d); 386 if (e) { 387 mtx_lock(&e->lock); /* avoid race with t3_l2t_free */ 388 e->next = d->l2tab[hash].first; 389 d->l2tab[hash].first = e; 390 rw_wunlock(&d->lock); 391 392 e->state = L2T_STATE_RESOLVING; 393 e->addr = addr; 394 e->ifindex = ifidx; 395 e->smt_idx = smt_idx; 396 atomic_store_rel_int(&e->refcnt, 1); 397 e->neigh = NULL; 398 399 400 neigh_replace(e, neigh); 401#ifdef notyet 402 /* 403 * XXX need to add accessor function for vlan tag 404 */ 405 if (neigh->rt_ifp->if_vlantrunk) 406 e->vlan = VLAN_DEV_INFO(neigh->dev)->vlan_id; 407 else 408#endif 409 e->vlan = VLAN_NONE; 410 mtx_unlock(&e->lock); 411 412 return (e); 413 } 414 415done: 416 rw_wunlock(&d->lock); 417 return e; 418} 419 420/* 421 * Called when address resolution fails for an L2T entry to handle packets 422 * on the arpq head. If a packet specifies a failure handler it is invoked, 423 * otherwise the packets is sent to the TOE. 424 * 425 * XXX: maybe we should abandon the latter behavior and just require a failure 426 * handler. 427 */ 428static void 429handle_failed_resolution(struct t3cdev *dev, struct mbuf *arpq) 430{ 431 432 while (arpq) { 433 struct mbuf *m = arpq; 434#ifdef notyet 435 struct l2t_mbuf_cb *cb = L2T_MBUF_CB(m); 436#endif 437 arpq = m->m_next; 438 m->m_next = NULL; 439#ifdef notyet 440 if (cb->arp_failure_handler) 441 cb->arp_failure_handler(dev, m); 442 else 443#endif 444 cxgb_ofld_send(dev, m); 445 } 446 447} 448 449void 450t3_l2t_update(struct t3cdev *dev, struct rtentry *neigh, 451 uint8_t *enaddr, struct sockaddr *sa) 452{ 453 struct l2t_entry *e; 454 struct mbuf *arpq = NULL; 455 struct l2t_data *d = L2DATA(dev); 456 u32 addr = *(u32 *) &((struct sockaddr_in *)sa)->sin_addr; 457 int ifidx = neigh->rt_ifp->if_index; 458 int hash = arp_hash(addr, ifidx, d); 459 struct llinfo_arp *la; 460 461 rw_rlock(&d->lock); 462 for (e = d->l2tab[hash].first; e; e = e->next) 463 if (e->addr == addr && e->ifindex == ifidx) { 464 mtx_lock(&e->lock); 465 goto found; 466 } 467 rw_runlock(&d->lock); 468 CTR1(KTR_CXGB, "t3_l2t_update: addr=0x%08x not found", addr); 469 return; 470 471found: 472 printf("found 0x%08x\n", addr); 473 474 rw_runlock(&d->lock); 475 memcpy(e->dmac, enaddr, ETHER_ADDR_LEN); 476 printf("mac=%x:%x:%x:%x:%x:%x\n", 477 e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]); 478 479 if (atomic_load_acq_int(&e->refcnt)) { 480 if (neigh != e->neigh) 481 neigh_replace(e, neigh); 482 483 la = (struct llinfo_arp *)neigh->rt_llinfo; 484 if (e->state == L2T_STATE_RESOLVING) { 485 486 if (la->la_asked >= 5 /* arp_maxtries */) { 487 arpq = e->arpq_head; 488 e->arpq_head = e->arpq_tail = NULL; 489 } else 490 setup_l2e_send_pending(dev, NULL, e); 491 } else { 492 e->state = L2T_STATE_VALID; 493 if (memcmp(e->dmac, RT_ENADDR(neigh->rt_gateway), 6)) 494 setup_l2e_send_pending(dev, NULL, e); 495 } 496 } 497 mtx_unlock(&e->lock); 498 499 if (arpq) 500 handle_failed_resolution(dev, arpq); 501} 502 503struct l2t_data * 504t3_init_l2t(unsigned int l2t_capacity) 505{ 506 struct l2t_data *d; 507 int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry); 508 509 d = cxgb_alloc_mem(size); 510 if (!d) 511 return NULL; 512 513 d->nentries = l2t_capacity; 514 d->rover = &d->l2tab[1]; /* entry 0 is not used */ 515 atomic_store_rel_int(&d->nfree, l2t_capacity - 1); 516 rw_init(&d->lock, "L2T"); 517 518 for (i = 0; i < l2t_capacity; ++i) { 519 d->l2tab[i].idx = i; 520 d->l2tab[i].state = L2T_STATE_UNUSED; 521 mtx_init(&d->l2tab[i].lock, "L2TAB", NULL, MTX_DEF); 522 atomic_store_rel_int(&d->l2tab[i].refcnt, 0); 523 } 524 return d; 525} 526 527void 528t3_free_l2t(struct l2t_data *d) 529{ 530 int i; 531 532 rw_destroy(&d->lock); 533 for (i = 0; i < d->nentries; ++i) 534 mtx_destroy(&d->l2tab[i].lock); 535 536 cxgb_free_mem(d); 537} 538