1/*
2 * Copyright (c) 2003-2007 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#include <linux/skbuff.h>
33#include <linux/netdevice.h>
34#include <linux/if.h>
35#include <linux/if_vlan.h>
36#include <linux/jhash.h>
37#include <net/neighbour.h>
38#include "common.h"
39#include "t3cdev.h"
40#include "cxgb3_defs.h"
41#include "l2t.h"
42#include "t3_cpl.h"
43#include "firmware_exports.h"
44
45#define VLAN_NONE 0xfff
46
47/*
48 * Module locking notes:  There is a RW lock protecting the L2 table as a
49 * whole plus a spinlock per L2T entry.  Entry lookups and allocations happen
50 * under the protection of the table lock, individual entry changes happen
51 * while holding that entry's spinlock.  The table lock nests outside the
52 * entry locks.  Allocations of new entries take the table lock as writers so
53 * no other lookups can happen while allocating new entries.  Entry updates
54 * take the table lock as readers so multiple entries can be updated in
55 * parallel.  An L2T entry can be dropped by decrementing its reference count
56 * and therefore can happen in parallel with entry allocation but no entry
57 * can change state or increment its ref count during allocation as both of
58 * these perform lookups.
59 */
60
61static inline unsigned int vlan_prio(const struct l2t_entry *e)
62{
63	return e->vlan >> 13;
64}
65
66static inline unsigned int arp_hash(u32 key, int ifindex,
67				    const struct l2t_data *d)
68{
69	return jhash_2words(key, ifindex, 0) & (d->nentries - 1);
70}
71
72static inline void neigh_replace(struct l2t_entry *e, struct neighbour *n)
73{
74	neigh_hold(n);
75	if (e->neigh)
76		neigh_release(e->neigh);
77	e->neigh = n;
78}
79
80/*
81 * Set up an L2T entry and send any packets waiting in the arp queue.  The
82 * supplied skb is used for the CPL_L2T_WRITE_REQ.  Must be called with the
83 * entry locked.
84 */
85static int setup_l2e_send_pending(struct t3cdev *dev, struct sk_buff *skb,
86				  struct l2t_entry *e)
87{
88	struct cpl_l2t_write_req *req;
89
90	if (!skb) {
91		skb = alloc_skb(sizeof(*req), GFP_ATOMIC);
92		if (!skb)
93			return -ENOMEM;
94	}
95
96	req = (struct cpl_l2t_write_req *)__skb_put(skb, sizeof(*req));
97	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
98	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx));
99	req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) |
100			    V_L2T_W_VLAN(e->vlan & VLAN_VID_MASK) |
101			    V_L2T_W_PRIO(vlan_prio(e)));
102	memcpy(e->dmac, e->neigh->ha, sizeof(e->dmac));
103	memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
104	skb->priority = CPL_PRIORITY_CONTROL;
105	cxgb3_ofld_send(dev, skb);
106	while (e->arpq_head) {
107		skb = e->arpq_head;
108		e->arpq_head = skb->next;
109		skb->next = NULL;
110		cxgb3_ofld_send(dev, skb);
111	}
112	e->arpq_tail = NULL;
113	e->state = L2T_STATE_VALID;
114
115	return 0;
116}
117
118/*
119 * Add a packet to the an L2T entry's queue of packets awaiting resolution.
120 * Must be called with the entry's lock held.
121 */
122static inline void arpq_enqueue(struct l2t_entry *e, struct sk_buff *skb)
123{
124	skb->next = NULL;
125	if (e->arpq_head)
126		e->arpq_tail->next = skb;
127	else
128		e->arpq_head = skb;
129	e->arpq_tail = skb;
130}
131
132int t3_l2t_send_slow(struct t3cdev *dev, struct sk_buff *skb,
133		     struct l2t_entry *e)
134{
135again:
136	switch (e->state) {
137	case L2T_STATE_STALE:	/* entry is stale, kick off revalidation */
138		neigh_event_send(e->neigh, NULL);
139		spin_lock_bh(&e->lock);
140		if (e->state == L2T_STATE_STALE)
141			e->state = L2T_STATE_VALID;
142		spin_unlock_bh(&e->lock);
143	case L2T_STATE_VALID:	/* fast-path, send the packet on */
144		return cxgb3_ofld_send(dev, skb);
145	case L2T_STATE_RESOLVING:
146		spin_lock_bh(&e->lock);
147		if (e->state != L2T_STATE_RESOLVING) {
148			/* ARP already completed */
149			spin_unlock_bh(&e->lock);
150			goto again;
151		}
152		arpq_enqueue(e, skb);
153		spin_unlock_bh(&e->lock);
154
155		/*
156		 * Only the first packet added to the arpq should kick off
157		 * resolution.  However, because the alloc_skb below can fail,
158		 * we allow each packet added to the arpq to retry resolution
159		 * as a way of recovering from transient memory exhaustion.
160		 * A better way would be to use a work request to retry L2T
161		 * entries when there's no memory.
162		 */
163		if (!neigh_event_send(e->neigh, NULL)) {
164			skb = alloc_skb(sizeof(struct cpl_l2t_write_req),
165					GFP_ATOMIC);
166			if (!skb)
167				break;
168
169			spin_lock_bh(&e->lock);
170			if (e->arpq_head)
171				setup_l2e_send_pending(dev, skb, e);
172			else	/* we lost the race */
173				__kfree_skb(skb);
174			spin_unlock_bh(&e->lock);
175		}
176	}
177	return 0;
178}
179
180EXPORT_SYMBOL(t3_l2t_send_slow);
181
182void t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e)
183{
184again:
185	switch (e->state) {
186	case L2T_STATE_STALE:	/* entry is stale, kick off revalidation */
187		neigh_event_send(e->neigh, NULL);
188		spin_lock_bh(&e->lock);
189		if (e->state == L2T_STATE_STALE) {
190			e->state = L2T_STATE_VALID;
191		}
192		spin_unlock_bh(&e->lock);
193		return;
194	case L2T_STATE_VALID:	/* fast-path, send the packet on */
195		return;
196	case L2T_STATE_RESOLVING:
197		spin_lock_bh(&e->lock);
198		if (e->state != L2T_STATE_RESOLVING) {
199			/* ARP already completed */
200			spin_unlock_bh(&e->lock);
201			goto again;
202		}
203		spin_unlock_bh(&e->lock);
204
205		/*
206		 * Only the first packet added to the arpq should kick off
207		 * resolution.  However, because the alloc_skb below can fail,
208		 * we allow each packet added to the arpq to retry resolution
209		 * as a way of recovering from transient memory exhaustion.
210		 * A better way would be to use a work request to retry L2T
211		 * entries when there's no memory.
212		 */
213		neigh_event_send(e->neigh, NULL);
214	}
215	return;
216}
217
218EXPORT_SYMBOL(t3_l2t_send_event);
219
220/*
221 * Allocate a free L2T entry.  Must be called with l2t_data.lock held.
222 */
223static struct l2t_entry *alloc_l2e(struct l2t_data *d)
224{
225	struct l2t_entry *end, *e, **p;
226
227	if (!atomic_read(&d->nfree))
228		return NULL;
229
230	/* there's definitely a free entry */
231	for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e)
232		if (atomic_read(&e->refcnt) == 0)
233			goto found;
234
235	for (e = &d->l2tab[1]; atomic_read(&e->refcnt); ++e) ;
236found:
237	d->rover = e + 1;
238	atomic_dec(&d->nfree);
239
240	/*
241	 * The entry we found may be an inactive entry that is
242	 * presently in the hash table.  We need to remove it.
243	 */
244	if (e->state != L2T_STATE_UNUSED) {
245		int hash = arp_hash(e->addr, e->ifindex, d);
246
247		for (p = &d->l2tab[hash].first; *p; p = &(*p)->next)
248			if (*p == e) {
249				*p = e->next;
250				break;
251			}
252		e->state = L2T_STATE_UNUSED;
253	}
254	return e;
255}
256
257/*
258 * Called when an L2T entry has no more users.  The entry is left in the hash
259 * table since it is likely to be reused but we also bump nfree to indicate
260 * that the entry can be reallocated for a different neighbor.  We also drop
261 * the existing neighbor reference in case the neighbor is going away and is
262 * waiting on our reference.
263 *
264 * Because entries can be reallocated to other neighbors once their ref count
265 * drops to 0 we need to take the entry's lock to avoid races with a new
266 * incarnation.
267 */
268void t3_l2e_free(struct l2t_data *d, struct l2t_entry *e)
269{
270	spin_lock_bh(&e->lock);
271	if (atomic_read(&e->refcnt) == 0) {	/* hasn't been recycled */
272		if (e->neigh) {
273			neigh_release(e->neigh);
274			e->neigh = NULL;
275		}
276	}
277	spin_unlock_bh(&e->lock);
278	atomic_inc(&d->nfree);
279}
280
281EXPORT_SYMBOL(t3_l2e_free);
282
283/*
284 * Update an L2T entry that was previously used for the same next hop as neigh.
285 * Must be called with softirqs disabled.
286 */
287static inline void reuse_entry(struct l2t_entry *e, struct neighbour *neigh)
288{
289	unsigned int nud_state;
290
291	spin_lock(&e->lock);	/* avoid race with t3_l2t_free */
292
293	if (neigh != e->neigh)
294		neigh_replace(e, neigh);
295	nud_state = neigh->nud_state;
296	if (memcmp(e->dmac, neigh->ha, sizeof(e->dmac)) ||
297	    !(nud_state & NUD_VALID))
298		e->state = L2T_STATE_RESOLVING;
299	else if (nud_state & NUD_CONNECTED)
300		e->state = L2T_STATE_VALID;
301	else
302		e->state = L2T_STATE_STALE;
303	spin_unlock(&e->lock);
304}
305
306struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct neighbour *neigh,
307			     struct net_device *dev)
308{
309	struct l2t_entry *e;
310	struct l2t_data *d = L2DATA(cdev);
311	u32 addr = *(u32 *) neigh->primary_key;
312	int ifidx = neigh->dev->ifindex;
313	int hash = arp_hash(addr, ifidx, d);
314	struct port_info *p = netdev_priv(dev);
315	int smt_idx = p->port_id;
316
317	write_lock_bh(&d->lock);
318	for (e = d->l2tab[hash].first; e; e = e->next)
319		if (e->addr == addr && e->ifindex == ifidx &&
320		    e->smt_idx == smt_idx) {
321			l2t_hold(d, e);
322			if (atomic_read(&e->refcnt) == 1)
323				reuse_entry(e, neigh);
324			goto done;
325		}
326
327	/* Need to allocate a new entry */
328	e = alloc_l2e(d);
329	if (e) {
330		spin_lock(&e->lock);	/* avoid race with t3_l2t_free */
331		e->next = d->l2tab[hash].first;
332		d->l2tab[hash].first = e;
333		e->state = L2T_STATE_RESOLVING;
334		e->addr = addr;
335		e->ifindex = ifidx;
336		e->smt_idx = smt_idx;
337		atomic_set(&e->refcnt, 1);
338		neigh_replace(e, neigh);
339		if (neigh->dev->priv_flags & IFF_802_1Q_VLAN)
340			e->vlan = VLAN_DEV_INFO(neigh->dev)->vlan_id;
341		else
342			e->vlan = VLAN_NONE;
343		spin_unlock(&e->lock);
344	}
345done:
346	write_unlock_bh(&d->lock);
347	return e;
348}
349
350EXPORT_SYMBOL(t3_l2t_get);
351
352static void handle_failed_resolution(struct t3cdev *dev, struct sk_buff *arpq)
353{
354	while (arpq) {
355		struct sk_buff *skb = arpq;
356		struct l2t_skb_cb *cb = L2T_SKB_CB(skb);
357
358		arpq = skb->next;
359		skb->next = NULL;
360		if (cb->arp_failure_handler)
361			cb->arp_failure_handler(dev, skb);
362		else
363			cxgb3_ofld_send(dev, skb);
364	}
365}
366
367/*
368 * Called when the host's ARP layer makes a change to some entry that is
369 * loaded into the HW L2 table.
370 */
371void t3_l2t_update(struct t3cdev *dev, struct neighbour *neigh)
372{
373	struct l2t_entry *e;
374	struct sk_buff *arpq = NULL;
375	struct l2t_data *d = L2DATA(dev);
376	u32 addr = *(u32 *) neigh->primary_key;
377	int ifidx = neigh->dev->ifindex;
378	int hash = arp_hash(addr, ifidx, d);
379
380	read_lock_bh(&d->lock);
381	for (e = d->l2tab[hash].first; e; e = e->next)
382		if (e->addr == addr && e->ifindex == ifidx) {
383			spin_lock(&e->lock);
384			goto found;
385		}
386	read_unlock_bh(&d->lock);
387	return;
388
389found:
390	read_unlock(&d->lock);
391	if (atomic_read(&e->refcnt)) {
392		if (neigh != e->neigh)
393			neigh_replace(e, neigh);
394
395		if (e->state == L2T_STATE_RESOLVING) {
396			if (neigh->nud_state & NUD_FAILED) {
397				arpq = e->arpq_head;
398				e->arpq_head = e->arpq_tail = NULL;
399			} else if (neigh_is_connected(neigh))
400				setup_l2e_send_pending(dev, NULL, e);
401		} else {
402			e->state = neigh_is_connected(neigh) ?
403			    L2T_STATE_VALID : L2T_STATE_STALE;
404			if (memcmp(e->dmac, neigh->ha, 6))
405				setup_l2e_send_pending(dev, NULL, e);
406		}
407	}
408	spin_unlock_bh(&e->lock);
409
410	if (arpq)
411		handle_failed_resolution(dev, arpq);
412}
413
414struct l2t_data *t3_init_l2t(unsigned int l2t_capacity)
415{
416	struct l2t_data *d;
417	int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry);
418
419	d = cxgb_alloc_mem(size);
420	if (!d)
421		return NULL;
422
423	d->nentries = l2t_capacity;
424	d->rover = &d->l2tab[1];	/* entry 0 is not used */
425	atomic_set(&d->nfree, l2t_capacity - 1);
426	rwlock_init(&d->lock);
427
428	for (i = 0; i < l2t_capacity; ++i) {
429		d->l2tab[i].idx = i;
430		d->l2tab[i].state = L2T_STATE_UNUSED;
431		spin_lock_init(&d->l2tab[i].lock);
432		atomic_set(&d->l2tab[i].refcnt, 0);
433	}
434	return d;
435}
436
437void t3_free_l2t(struct l2t_data *d)
438{
439	cxgb_free_mem(d);
440}
441