1/* 2 * Copyright (c) 2003-2007 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32#include <linux/skbuff.h> 33#include <linux/netdevice.h> 34#include <linux/if.h> 35#include <linux/if_vlan.h> 36#include <linux/jhash.h> 37#include <net/neighbour.h> 38#include "common.h" 39#include "t3cdev.h" 40#include "cxgb3_defs.h" 41#include "l2t.h" 42#include "t3_cpl.h" 43#include "firmware_exports.h" 44 45#define VLAN_NONE 0xfff 46 47/* 48 * Module locking notes: There is a RW lock protecting the L2 table as a 49 * whole plus a spinlock per L2T entry. Entry lookups and allocations happen 50 * under the protection of the table lock, individual entry changes happen 51 * while holding that entry's spinlock. The table lock nests outside the 52 * entry locks. Allocations of new entries take the table lock as writers so 53 * no other lookups can happen while allocating new entries. Entry updates 54 * take the table lock as readers so multiple entries can be updated in 55 * parallel. An L2T entry can be dropped by decrementing its reference count 56 * and therefore can happen in parallel with entry allocation but no entry 57 * can change state or increment its ref count during allocation as both of 58 * these perform lookups. 59 */ 60 61static inline unsigned int vlan_prio(const struct l2t_entry *e) 62{ 63 return e->vlan >> 13; 64} 65 66static inline unsigned int arp_hash(u32 key, int ifindex, 67 const struct l2t_data *d) 68{ 69 return jhash_2words(key, ifindex, 0) & (d->nentries - 1); 70} 71 72static inline void neigh_replace(struct l2t_entry *e, struct neighbour *n) 73{ 74 neigh_hold(n); 75 if (e->neigh) 76 neigh_release(e->neigh); 77 e->neigh = n; 78} 79 80/* 81 * Set up an L2T entry and send any packets waiting in the arp queue. The 82 * supplied skb is used for the CPL_L2T_WRITE_REQ. Must be called with the 83 * entry locked. 84 */ 85static int setup_l2e_send_pending(struct t3cdev *dev, struct sk_buff *skb, 86 struct l2t_entry *e) 87{ 88 struct cpl_l2t_write_req *req; 89 90 if (!skb) { 91 skb = alloc_skb(sizeof(*req), GFP_ATOMIC); 92 if (!skb) 93 return -ENOMEM; 94 } 95 96 req = (struct cpl_l2t_write_req *)__skb_put(skb, sizeof(*req)); 97 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 98 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx)); 99 req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) | 100 V_L2T_W_VLAN(e->vlan & VLAN_VID_MASK) | 101 V_L2T_W_PRIO(vlan_prio(e))); 102 memcpy(e->dmac, e->neigh->ha, sizeof(e->dmac)); 103 memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); 104 skb->priority = CPL_PRIORITY_CONTROL; 105 cxgb3_ofld_send(dev, skb); 106 while (e->arpq_head) { 107 skb = e->arpq_head; 108 e->arpq_head = skb->next; 109 skb->next = NULL; 110 cxgb3_ofld_send(dev, skb); 111 } 112 e->arpq_tail = NULL; 113 e->state = L2T_STATE_VALID; 114 115 return 0; 116} 117 118/* 119 * Add a packet to the an L2T entry's queue of packets awaiting resolution. 120 * Must be called with the entry's lock held. 121 */ 122static inline void arpq_enqueue(struct l2t_entry *e, struct sk_buff *skb) 123{ 124 skb->next = NULL; 125 if (e->arpq_head) 126 e->arpq_tail->next = skb; 127 else 128 e->arpq_head = skb; 129 e->arpq_tail = skb; 130} 131 132int t3_l2t_send_slow(struct t3cdev *dev, struct sk_buff *skb, 133 struct l2t_entry *e) 134{ 135again: 136 switch (e->state) { 137 case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ 138 neigh_event_send(e->neigh, NULL); 139 spin_lock_bh(&e->lock); 140 if (e->state == L2T_STATE_STALE) 141 e->state = L2T_STATE_VALID; 142 spin_unlock_bh(&e->lock); 143 case L2T_STATE_VALID: /* fast-path, send the packet on */ 144 return cxgb3_ofld_send(dev, skb); 145 case L2T_STATE_RESOLVING: 146 spin_lock_bh(&e->lock); 147 if (e->state != L2T_STATE_RESOLVING) { 148 /* ARP already completed */ 149 spin_unlock_bh(&e->lock); 150 goto again; 151 } 152 arpq_enqueue(e, skb); 153 spin_unlock_bh(&e->lock); 154 155 /* 156 * Only the first packet added to the arpq should kick off 157 * resolution. However, because the alloc_skb below can fail, 158 * we allow each packet added to the arpq to retry resolution 159 * as a way of recovering from transient memory exhaustion. 160 * A better way would be to use a work request to retry L2T 161 * entries when there's no memory. 162 */ 163 if (!neigh_event_send(e->neigh, NULL)) { 164 skb = alloc_skb(sizeof(struct cpl_l2t_write_req), 165 GFP_ATOMIC); 166 if (!skb) 167 break; 168 169 spin_lock_bh(&e->lock); 170 if (e->arpq_head) 171 setup_l2e_send_pending(dev, skb, e); 172 else /* we lost the race */ 173 __kfree_skb(skb); 174 spin_unlock_bh(&e->lock); 175 } 176 } 177 return 0; 178} 179 180EXPORT_SYMBOL(t3_l2t_send_slow); 181 182void t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e) 183{ 184again: 185 switch (e->state) { 186 case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ 187 neigh_event_send(e->neigh, NULL); 188 spin_lock_bh(&e->lock); 189 if (e->state == L2T_STATE_STALE) { 190 e->state = L2T_STATE_VALID; 191 } 192 spin_unlock_bh(&e->lock); 193 return; 194 case L2T_STATE_VALID: /* fast-path, send the packet on */ 195 return; 196 case L2T_STATE_RESOLVING: 197 spin_lock_bh(&e->lock); 198 if (e->state != L2T_STATE_RESOLVING) { 199 /* ARP already completed */ 200 spin_unlock_bh(&e->lock); 201 goto again; 202 } 203 spin_unlock_bh(&e->lock); 204 205 /* 206 * Only the first packet added to the arpq should kick off 207 * resolution. However, because the alloc_skb below can fail, 208 * we allow each packet added to the arpq to retry resolution 209 * as a way of recovering from transient memory exhaustion. 210 * A better way would be to use a work request to retry L2T 211 * entries when there's no memory. 212 */ 213 neigh_event_send(e->neigh, NULL); 214 } 215 return; 216} 217 218EXPORT_SYMBOL(t3_l2t_send_event); 219 220/* 221 * Allocate a free L2T entry. Must be called with l2t_data.lock held. 222 */ 223static struct l2t_entry *alloc_l2e(struct l2t_data *d) 224{ 225 struct l2t_entry *end, *e, **p; 226 227 if (!atomic_read(&d->nfree)) 228 return NULL; 229 230 /* there's definitely a free entry */ 231 for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e) 232 if (atomic_read(&e->refcnt) == 0) 233 goto found; 234 235 for (e = &d->l2tab[1]; atomic_read(&e->refcnt); ++e) ; 236found: 237 d->rover = e + 1; 238 atomic_dec(&d->nfree); 239 240 /* 241 * The entry we found may be an inactive entry that is 242 * presently in the hash table. We need to remove it. 243 */ 244 if (e->state != L2T_STATE_UNUSED) { 245 int hash = arp_hash(e->addr, e->ifindex, d); 246 247 for (p = &d->l2tab[hash].first; *p; p = &(*p)->next) 248 if (*p == e) { 249 *p = e->next; 250 break; 251 } 252 e->state = L2T_STATE_UNUSED; 253 } 254 return e; 255} 256 257/* 258 * Called when an L2T entry has no more users. The entry is left in the hash 259 * table since it is likely to be reused but we also bump nfree to indicate 260 * that the entry can be reallocated for a different neighbor. We also drop 261 * the existing neighbor reference in case the neighbor is going away and is 262 * waiting on our reference. 263 * 264 * Because entries can be reallocated to other neighbors once their ref count 265 * drops to 0 we need to take the entry's lock to avoid races with a new 266 * incarnation. 267 */ 268void t3_l2e_free(struct l2t_data *d, struct l2t_entry *e) 269{ 270 spin_lock_bh(&e->lock); 271 if (atomic_read(&e->refcnt) == 0) { /* hasn't been recycled */ 272 if (e->neigh) { 273 neigh_release(e->neigh); 274 e->neigh = NULL; 275 } 276 } 277 spin_unlock_bh(&e->lock); 278 atomic_inc(&d->nfree); 279} 280 281EXPORT_SYMBOL(t3_l2e_free); 282 283/* 284 * Update an L2T entry that was previously used for the same next hop as neigh. 285 * Must be called with softirqs disabled. 286 */ 287static inline void reuse_entry(struct l2t_entry *e, struct neighbour *neigh) 288{ 289 unsigned int nud_state; 290 291 spin_lock(&e->lock); /* avoid race with t3_l2t_free */ 292 293 if (neigh != e->neigh) 294 neigh_replace(e, neigh); 295 nud_state = neigh->nud_state; 296 if (memcmp(e->dmac, neigh->ha, sizeof(e->dmac)) || 297 !(nud_state & NUD_VALID)) 298 e->state = L2T_STATE_RESOLVING; 299 else if (nud_state & NUD_CONNECTED) 300 e->state = L2T_STATE_VALID; 301 else 302 e->state = L2T_STATE_STALE; 303 spin_unlock(&e->lock); 304} 305 306struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct neighbour *neigh, 307 struct net_device *dev) 308{ 309 struct l2t_entry *e; 310 struct l2t_data *d = L2DATA(cdev); 311 u32 addr = *(u32 *) neigh->primary_key; 312 int ifidx = neigh->dev->ifindex; 313 int hash = arp_hash(addr, ifidx, d); 314 struct port_info *p = netdev_priv(dev); 315 int smt_idx = p->port_id; 316 317 write_lock_bh(&d->lock); 318 for (e = d->l2tab[hash].first; e; e = e->next) 319 if (e->addr == addr && e->ifindex == ifidx && 320 e->smt_idx == smt_idx) { 321 l2t_hold(d, e); 322 if (atomic_read(&e->refcnt) == 1) 323 reuse_entry(e, neigh); 324 goto done; 325 } 326 327 /* Need to allocate a new entry */ 328 e = alloc_l2e(d); 329 if (e) { 330 spin_lock(&e->lock); /* avoid race with t3_l2t_free */ 331 e->next = d->l2tab[hash].first; 332 d->l2tab[hash].first = e; 333 e->state = L2T_STATE_RESOLVING; 334 e->addr = addr; 335 e->ifindex = ifidx; 336 e->smt_idx = smt_idx; 337 atomic_set(&e->refcnt, 1); 338 neigh_replace(e, neigh); 339 if (neigh->dev->priv_flags & IFF_802_1Q_VLAN) 340 e->vlan = VLAN_DEV_INFO(neigh->dev)->vlan_id; 341 else 342 e->vlan = VLAN_NONE; 343 spin_unlock(&e->lock); 344 } 345done: 346 write_unlock_bh(&d->lock); 347 return e; 348} 349 350EXPORT_SYMBOL(t3_l2t_get); 351 352static void handle_failed_resolution(struct t3cdev *dev, struct sk_buff *arpq) 353{ 354 while (arpq) { 355 struct sk_buff *skb = arpq; 356 struct l2t_skb_cb *cb = L2T_SKB_CB(skb); 357 358 arpq = skb->next; 359 skb->next = NULL; 360 if (cb->arp_failure_handler) 361 cb->arp_failure_handler(dev, skb); 362 else 363 cxgb3_ofld_send(dev, skb); 364 } 365} 366 367/* 368 * Called when the host's ARP layer makes a change to some entry that is 369 * loaded into the HW L2 table. 370 */ 371void t3_l2t_update(struct t3cdev *dev, struct neighbour *neigh) 372{ 373 struct l2t_entry *e; 374 struct sk_buff *arpq = NULL; 375 struct l2t_data *d = L2DATA(dev); 376 u32 addr = *(u32 *) neigh->primary_key; 377 int ifidx = neigh->dev->ifindex; 378 int hash = arp_hash(addr, ifidx, d); 379 380 read_lock_bh(&d->lock); 381 for (e = d->l2tab[hash].first; e; e = e->next) 382 if (e->addr == addr && e->ifindex == ifidx) { 383 spin_lock(&e->lock); 384 goto found; 385 } 386 read_unlock_bh(&d->lock); 387 return; 388 389found: 390 read_unlock(&d->lock); 391 if (atomic_read(&e->refcnt)) { 392 if (neigh != e->neigh) 393 neigh_replace(e, neigh); 394 395 if (e->state == L2T_STATE_RESOLVING) { 396 if (neigh->nud_state & NUD_FAILED) { 397 arpq = e->arpq_head; 398 e->arpq_head = e->arpq_tail = NULL; 399 } else if (neigh_is_connected(neigh)) 400 setup_l2e_send_pending(dev, NULL, e); 401 } else { 402 e->state = neigh_is_connected(neigh) ? 403 L2T_STATE_VALID : L2T_STATE_STALE; 404 if (memcmp(e->dmac, neigh->ha, 6)) 405 setup_l2e_send_pending(dev, NULL, e); 406 } 407 } 408 spin_unlock_bh(&e->lock); 409 410 if (arpq) 411 handle_failed_resolution(dev, arpq); 412} 413 414struct l2t_data *t3_init_l2t(unsigned int l2t_capacity) 415{ 416 struct l2t_data *d; 417 int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry); 418 419 d = cxgb_alloc_mem(size); 420 if (!d) 421 return NULL; 422 423 d->nentries = l2t_capacity; 424 d->rover = &d->l2tab[1]; /* entry 0 is not used */ 425 atomic_set(&d->nfree, l2t_capacity - 1); 426 rwlock_init(&d->lock); 427 428 for (i = 0; i < l2t_capacity; ++i) { 429 d->l2tab[i].idx = i; 430 d->l2tab[i].state = L2T_STATE_UNUSED; 431 spin_lock_init(&d->l2tab[i].lock); 432 atomic_set(&d->l2tab[i].refcnt, 0); 433 } 434 return d; 435} 436 437void t3_free_l2t(struct l2t_data *d) 438{ 439 cxgb_free_mem(d); 440} 441