t4_l2t.c revision 228561
1/*-
2 * Copyright (c) 2011 Chelsio Communications, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26#include <sys/cdefs.h>
27__FBSDID("$FreeBSD: head/sys/dev/cxgbe/t4_l2t.c 228561 2011-12-16 02:09:51Z np $");
28
29#include "opt_inet.h"
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/kernel.h>
34#include <sys/module.h>
35#include <sys/bus.h>
36#include <sys/lock.h>
37#include <sys/mutex.h>
38#include <sys/rwlock.h>
39#include <sys/socket.h>
40#include <sys/sbuf.h>
41#include <net/if.h>
42#include <net/if_types.h>
43#include <net/ethernet.h>
44#include <net/if_vlan_var.h>
45#include <net/if_dl.h>
46#include <net/if_llatbl.h>
47#include <net/route.h>
48#include <netinet/in.h>
49#include <netinet/in_var.h>
50#include <netinet/if_ether.h>
51
52#include "common/common.h"
53#include "common/jhash.h"
54#include "common/t4_msg.h"
55#include "t4_l2t.h"
56
57/*
58 * Module locking notes:  There is a RW lock protecting the L2 table as a
59 * whole plus a spinlock per L2T entry.  Entry lookups and allocations happen
60 * under the protection of the table lock, individual entry changes happen
61 * while holding that entry's spinlock.  The table lock nests outside the
62 * entry locks.  Allocations of new entries take the table lock as writers so
63 * no other lookups can happen while allocating new entries.  Entry updates
64 * take the table lock as readers so multiple entries can be updated in
65 * parallel.  An L2T entry can be dropped by decrementing its reference count
66 * and therefore can happen in parallel with entry allocation but no entry
67 * can change state or increment its ref count during allocation as both of
68 * these perform lookups.
69 *
70 * Note: We do not take refereces to ifnets in this module because both
71 * the TOE and the sockets already hold references to the interfaces and the
72 * lifetime of an L2T entry is fully contained in the lifetime of the TOE.
73 */
74
75/* identifies sync vs async L2T_WRITE_REQs */
76#define S_SYNC_WR    12
77#define V_SYNC_WR(x) ((x) << S_SYNC_WR)
78#define F_SYNC_WR    V_SYNC_WR(1)
79
80enum {
81	L2T_STATE_VALID,	/* entry is up to date */
82	L2T_STATE_STALE,	/* entry may be used but needs revalidation */
83	L2T_STATE_RESOLVING,	/* entry needs address resolution */
84	L2T_STATE_SYNC_WRITE,	/* synchronous write of entry underway */
85
86	/* when state is one of the below the entry is not hashed */
87	L2T_STATE_SWITCHING,	/* entry is being used by a switching filter */
88	L2T_STATE_UNUSED	/* entry not in use */
89};
90
91struct l2t_data {
92	struct rwlock lock;
93	volatile int nfree;	/* number of free entries */
94	struct l2t_entry *rover;/* starting point for next allocation */
95	struct l2t_entry l2tab[L2T_SIZE];
96};
97
98static int do_l2t_write_rpl(struct sge_iq *, const struct rss_header *,
99    struct mbuf *);
100
101#define VLAN_NONE	0xfff
102#define SA(x)           ((struct sockaddr *)(x))
103#define SIN(x)          ((struct sockaddr_in *)(x))
104#define SINADDR(x)      (SIN(x)->sin_addr.s_addr)
105
106/*
107 * Allocate a free L2T entry.  Must be called with l2t_data.lock held.
108 */
109static struct l2t_entry *
110alloc_l2e(struct l2t_data *d)
111{
112	struct l2t_entry *end, *e, **p;
113
114	rw_assert(&d->lock, RA_WLOCKED);
115
116	if (!atomic_load_acq_int(&d->nfree))
117		return (NULL);
118
119	/* there's definitely a free entry */
120	for (e = d->rover, end = &d->l2tab[L2T_SIZE]; e != end; ++e)
121		if (atomic_load_acq_int(&e->refcnt) == 0)
122			goto found;
123
124	for (e = d->l2tab; atomic_load_acq_int(&e->refcnt); ++e) ;
125found:
126	d->rover = e + 1;
127	atomic_subtract_int(&d->nfree, 1);
128
129	/*
130	 * The entry we found may be an inactive entry that is
131	 * presently in the hash table.  We need to remove it.
132	 */
133	if (e->state < L2T_STATE_SWITCHING) {
134		for (p = &d->l2tab[e->hash].first; *p; p = &(*p)->next) {
135			if (*p == e) {
136				*p = e->next;
137				e->next = NULL;
138				break;
139			}
140		}
141	}
142
143	e->state = L2T_STATE_UNUSED;
144	return (e);
145}
146
147/*
148 * Write an L2T entry.  Must be called with the entry locked.
149 * The write may be synchronous or asynchronous.
150 */
151static int
152write_l2e(struct adapter *sc, struct l2t_entry *e, int sync)
153{
154	struct mbuf *m;
155	struct cpl_l2t_write_req *req;
156
157	mtx_assert(&e->lock, MA_OWNED);
158
159	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
160		return (ENOMEM);
161
162	req = mtod(m, struct cpl_l2t_write_req *);
163	m->m_pkthdr.len = m->m_len = sizeof(*req);
164
165	INIT_TP_WR(req, 0);
166	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx |
167	    V_SYNC_WR(sync) | V_TID_QID(sc->sge.fwq.abs_id)));
168	req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!sync));
169	req->l2t_idx = htons(e->idx);
170	req->vlan = htons(e->vlan);
171	memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
172
173	t4_mgmt_tx(sc, m);
174
175	if (sync && e->state != L2T_STATE_SWITCHING)
176		e->state = L2T_STATE_SYNC_WRITE;
177
178	return (0);
179}
180
181/*
182 * Allocate an L2T entry for use by a switching rule.  Such need to be
183 * explicitly freed and while busy they are not on any hash chain, so normal
184 * address resolution updates do not see them.
185 */
186struct l2t_entry *
187t4_l2t_alloc_switching(struct l2t_data *d)
188{
189	struct l2t_entry *e;
190
191	rw_rlock(&d->lock);
192	e = alloc_l2e(d);
193	if (e) {
194		mtx_lock(&e->lock);          /* avoid race with t4_l2t_free */
195		e->state = L2T_STATE_SWITCHING;
196		atomic_store_rel_int(&e->refcnt, 1);
197		mtx_unlock(&e->lock);
198	}
199	rw_runlock(&d->lock);
200	return e;
201}
202
203/*
204 * Sets/updates the contents of a switching L2T entry that has been allocated
205 * with an earlier call to @t4_l2t_alloc_switching.
206 */
207int
208t4_l2t_set_switching(struct adapter *sc, struct l2t_entry *e, uint16_t vlan,
209    uint8_t port, uint8_t *eth_addr)
210{
211	int rc;
212
213	e->vlan = vlan;
214	e->lport = port;
215	memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN);
216	mtx_lock(&e->lock);
217	rc = write_l2e(sc, e, 0);
218	mtx_unlock(&e->lock);
219	return (rc);
220}
221
222int
223t4_init_l2t(struct adapter *sc, int flags)
224{
225	int i;
226	struct l2t_data *d;
227
228	d = malloc(sizeof(*d), M_CXGBE, M_ZERO | flags);
229	if (!d)
230		return (ENOMEM);
231
232	d->rover = d->l2tab;
233	atomic_store_rel_int(&d->nfree, L2T_SIZE);
234	rw_init(&d->lock, "L2T");
235
236	for (i = 0; i < L2T_SIZE; i++) {
237		d->l2tab[i].idx = i;
238		d->l2tab[i].state = L2T_STATE_UNUSED;
239		mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF);
240		atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
241	}
242
243	sc->l2t = d;
244	t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl);
245
246	return (0);
247}
248
249int
250t4_free_l2t(struct l2t_data *d)
251{
252	int i;
253
254	for (i = 0; i < L2T_SIZE; i++)
255		mtx_destroy(&d->l2tab[i].lock);
256	rw_destroy(&d->lock);
257	free(d, M_CXGBE);
258
259	return (0);
260}
261
262static inline unsigned int
263vlan_prio(const struct l2t_entry *e)
264{
265	return e->vlan >> 13;
266}
267
268static char
269l2e_state(const struct l2t_entry *e)
270{
271	switch (e->state) {
272	case L2T_STATE_VALID: return 'V';  /* valid, fast-path entry */
273	case L2T_STATE_STALE: return 'S';  /* needs revalidation, but usable */
274	case L2T_STATE_SYNC_WRITE: return 'W';
275	case L2T_STATE_RESOLVING: return e->arpq_head ? 'A' : 'R';
276	case L2T_STATE_SWITCHING: return 'X';
277	default: return 'U';
278	}
279}
280
281int
282sysctl_l2t(SYSCTL_HANDLER_ARGS)
283{
284	struct adapter *sc = arg1;
285	struct l2t_data *l2t = sc->l2t;
286	struct l2t_entry *e;
287	struct sbuf *sb;
288	int rc, i, header = 0;
289	char ip[60];
290
291	if (l2t == NULL)
292		return (ENXIO);
293
294	rc = sysctl_wire_old_buffer(req, 0);
295	if (rc != 0)
296		return (rc);
297
298	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
299	if (sb == NULL)
300		return (ENOMEM);
301
302	e = &l2t->l2tab[0];
303	for (i = 0; i < L2T_SIZE; i++, e++) {
304		mtx_lock(&e->lock);
305		if (e->state == L2T_STATE_UNUSED)
306			goto skip;
307
308		if (header == 0) {
309			sbuf_printf(sb, " Idx IP address      "
310			    "Ethernet address  VLAN/P LP State Users Port");
311			header = 1;
312		}
313		if (e->state == L2T_STATE_SWITCHING || e->v6)
314			ip[0] = 0;
315		else
316			snprintf(ip, sizeof(ip), "%s",
317			    inet_ntoa(*(struct in_addr *)&e->addr[0]));
318
319		/* XXX: accessing lle probably not safe? */
320		sbuf_printf(sb, "\n%4u %-15s %02x:%02x:%02x:%02x:%02x:%02x %4d"
321			   " %u %2u   %c   %5u %s",
322			   e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2],
323			   e->dmac[3], e->dmac[4], e->dmac[5],
324			   e->vlan & 0xfff, vlan_prio(e), e->lport,
325			   l2e_state(e), atomic_load_acq_int(&e->refcnt),
326			   e->lle ? e->lle->lle_tbl->llt_ifp->if_xname : "");
327skip:
328		mtx_unlock(&e->lock);
329	}
330
331	rc = sbuf_finish(sb);
332	sbuf_delete(sb);
333
334	return (rc);
335}
336
337#ifndef TCP_OFFLOAD_DISABLE
338static inline void
339l2t_hold(struct l2t_data *d, struct l2t_entry *e)
340{
341	if (atomic_fetchadd_int(&e->refcnt, 1) == 0)  /* 0 -> 1 transition */
342		atomic_subtract_int(&d->nfree, 1);
343}
344
345/*
346 * To avoid having to check address families we do not allow v4 and v6
347 * neighbors to be on the same hash chain.  We keep v4 entries in the first
348 * half of available hash buckets and v6 in the second.
349 */
350enum {
351	L2T_SZ_HALF = L2T_SIZE / 2,
352	L2T_HASH_MASK = L2T_SZ_HALF - 1
353};
354
355static inline unsigned int
356arp_hash(const uint32_t *key, int ifindex)
357{
358	return jhash_2words(*key, ifindex, 0) & L2T_HASH_MASK;
359}
360
361static inline unsigned int
362ipv6_hash(const uint32_t *key, int ifindex)
363{
364	uint32_t xor = key[0] ^ key[1] ^ key[2] ^ key[3];
365
366	return L2T_SZ_HALF + (jhash_2words(xor, ifindex, 0) & L2T_HASH_MASK);
367}
368
369static inline unsigned int
370addr_hash(const uint32_t *addr, int addr_len, int ifindex)
371{
372	return addr_len == 4 ? arp_hash(addr, ifindex) :
373			       ipv6_hash(addr, ifindex);
374}
375
376/*
377 * Checks if an L2T entry is for the given IP/IPv6 address.  It does not check
378 * whether the L2T entry and the address are of the same address family.
379 * Callers ensure an address is only checked against L2T entries of the same
380 * family, something made trivial by the separation of IP and IPv6 hash chains
381 * mentioned above.  Returns 0 if there's a match,
382 */
383static inline int
384addreq(const struct l2t_entry *e, const uint32_t *addr)
385{
386	if (e->v6)
387		return (e->addr[0] ^ addr[0]) | (e->addr[1] ^ addr[1]) |
388		       (e->addr[2] ^ addr[2]) | (e->addr[3] ^ addr[3]);
389	return e->addr[0] ^ addr[0];
390}
391
392/*
393 * Add a packet to an L2T entry's queue of packets awaiting resolution.
394 * Must be called with the entry's lock held.
395 */
396static inline void
397arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
398{
399	mtx_assert(&e->lock, MA_OWNED);
400
401	KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt not NULL", __func__));
402	if (e->arpq_head)
403		e->arpq_tail->m_nextpkt = m;
404	else
405		e->arpq_head = m;
406	e->arpq_tail = m;
407}
408
409static inline void
410send_pending(struct adapter *sc, struct l2t_entry *e)
411{
412	struct mbuf *m, *next;
413
414	mtx_assert(&e->lock, MA_OWNED);
415
416	for (m = e->arpq_head; m; m = next) {
417		next = m->m_nextpkt;
418		m->m_nextpkt = NULL;
419		t4_wrq_tx(sc, MBUF_EQ(m), m);
420	}
421	e->arpq_head = e->arpq_tail = NULL;
422}
423
424#ifdef INET
425/*
426 * Looks up and fills up an l2t_entry's lle.  We grab all the locks that we need
427 * ourself, and update e->state at the end if e->lle was successfully filled.
428 *
429 * The lle passed in comes from arpresolve and is ignored as it does not appear
430 * to be of much use.
431 */
432static int
433l2t_fill_lle(struct adapter *sc, struct l2t_entry *e, struct llentry *unused)
434{
435        int rc = 0;
436        struct sockaddr_in sin;
437        struct ifnet *ifp = e->ifp;
438        struct llentry *lle;
439
440        bzero(&sin, sizeof(struct sockaddr_in));
441	if (e->v6)
442		panic("%s: IPv6 L2 resolution not supported yet.", __func__);
443
444	sin.sin_family = AF_INET;
445	sin.sin_len = sizeof(struct sockaddr_in);
446	memcpy(&sin.sin_addr, e->addr, sizeof(struct sockaddr_in));
447
448        mtx_assert(&e->lock, MA_NOTOWNED);
449        KASSERT(e->addr && ifp, ("%s: bad prep before call", __func__));
450
451        IF_AFDATA_LOCK(ifp);
452        lle = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, SA(&sin));
453        IF_AFDATA_UNLOCK(ifp);
454        if (!LLE_IS_VALID(lle))
455                return (ENOMEM);
456        if (!(lle->la_flags & LLE_VALID)) {
457                rc = EINVAL;
458                goto done;
459        }
460
461        LLE_ADDREF(lle);
462
463        mtx_lock(&e->lock);
464        if (e->state == L2T_STATE_RESOLVING) {
465                KASSERT(e->lle == NULL, ("%s: lle already valid", __func__));
466                e->lle = lle;
467                memcpy(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN);
468		write_l2e(sc, e, 1);
469        } else {
470                KASSERT(e->lle == lle, ("%s: lle changed", __func__));
471                LLE_REMREF(lle);
472        }
473        mtx_unlock(&e->lock);
474done:
475        LLE_WUNLOCK(lle);
476        return (rc);
477}
478#endif
479
480int
481t4_l2t_send(struct adapter *sc, struct mbuf *m, struct l2t_entry *e)
482{
483#ifndef INET
484	return (EINVAL);
485#else
486	struct llentry *lle = NULL;
487	struct sockaddr_in sin;
488	struct ifnet *ifp = e->ifp;
489
490	if (e->v6)
491		panic("%s: IPv6 L2 resolution not supported yet.", __func__);
492
493        bzero(&sin, sizeof(struct sockaddr_in));
494	sin.sin_family = AF_INET;
495	sin.sin_len = sizeof(struct sockaddr_in);
496	memcpy(&sin.sin_addr, e->addr, sizeof(struct sockaddr_in));
497
498again:
499	switch (e->state) {
500	case L2T_STATE_STALE:     /* entry is stale, kick off revalidation */
501		if (arpresolve(ifp, NULL, NULL, SA(&sin), e->dmac, &lle) == 0)
502			l2t_fill_lle(sc, e, lle);
503
504		/* Fall through */
505
506	case L2T_STATE_VALID:     /* fast-path, send the packet on */
507		return t4_wrq_tx(sc, MBUF_EQ(m), m);
508
509	case L2T_STATE_RESOLVING:
510	case L2T_STATE_SYNC_WRITE:
511		mtx_lock(&e->lock);
512		if (e->state != L2T_STATE_SYNC_WRITE &&
513		    e->state != L2T_STATE_RESOLVING) {
514			/* state changed by the time we got here */
515			mtx_unlock(&e->lock);
516			goto again;
517		}
518		arpq_enqueue(e, m);
519		mtx_unlock(&e->lock);
520
521		if (e->state == L2T_STATE_RESOLVING &&
522		    arpresolve(ifp, NULL, NULL, SA(&sin), e->dmac, &lle) == 0)
523			l2t_fill_lle(sc, e, lle);
524	}
525
526	return (0);
527#endif
528}
529
530/*
531 * Called when an L2T entry has no more users.  The entry is left in the hash
532 * table since it is likely to be reused but we also bump nfree to indicate
533 * that the entry can be reallocated for a different neighbor.  We also drop
534 * the existing neighbor reference in case the neighbor is going away and is
535 * waiting on our reference.
536 *
537 * Because entries can be reallocated to other neighbors once their ref count
538 * drops to 0 we need to take the entry's lock to avoid races with a new
539 * incarnation.
540 */
541static void
542t4_l2e_free(struct l2t_entry *e)
543{
544	struct llentry *lle = NULL;
545	struct l2t_data *d;
546
547	mtx_lock(&e->lock);
548	if (atomic_load_acq_int(&e->refcnt) == 0) {  /* hasn't been recycled */
549		lle = e->lle;
550		e->lle = NULL;
551		/*
552		 * Don't need to worry about the arpq, an L2T entry can't be
553		 * released if any packets are waiting for resolution as we
554		 * need to be able to communicate with the device to close a
555		 * connection.
556		 */
557	}
558	mtx_unlock(&e->lock);
559
560	d = container_of(e, struct l2t_data, l2tab[e->idx]);
561	atomic_add_int(&d->nfree, 1);
562
563	if (lle)
564		LLE_FREE(lle);
565}
566
567void
568t4_l2t_release(struct l2t_entry *e)
569{
570	if (atomic_fetchadd_int(&e->refcnt, -1) == 1)
571		t4_l2e_free(e);
572}
573
574static int
575do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss,
576    struct mbuf *m)
577{
578	struct adapter *sc = iq->adapter;
579	const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
580	unsigned int tid = GET_TID(rpl);
581	unsigned int idx = tid & (L2T_SIZE - 1);
582
583	if (__predict_false(rpl->status != CPL_ERR_NONE)) {
584		log(LOG_ERR,
585		    "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
586		    rpl->status, idx);
587		return (EINVAL);
588	}
589
590	if (tid & F_SYNC_WR) {
591		struct l2t_entry *e = &sc->l2t->l2tab[idx];
592
593		mtx_lock(&e->lock);
594		if (e->state != L2T_STATE_SWITCHING) {
595			send_pending(sc, e);
596			e->state = L2T_STATE_VALID;
597		}
598		mtx_unlock(&e->lock);
599	}
600
601	return (0);
602}
603
604/*
605 * Reuse an L2T entry that was previously used for the same next hop.
606 */
607static void
608reuse_entry(struct l2t_entry *e)
609{
610	struct llentry *lle;
611
612	mtx_lock(&e->lock);                /* avoid race with t4_l2t_free */
613	lle = e->lle;
614	if (lle) {
615		KASSERT(lle->la_flags & LLE_VALID,
616			("%s: invalid lle stored in l2t_entry", __func__));
617
618		if (lle->la_expire >= time_uptime)
619			e->state = L2T_STATE_STALE;
620		else
621			e->state = L2T_STATE_VALID;
622	} else
623		e->state = L2T_STATE_RESOLVING;
624	mtx_unlock(&e->lock);
625}
626
627/*
628 * The TOE wants an L2 table entry that it can use to reach the next hop over
629 * the specified port.  Produce such an entry - create one if needed.
630 *
631 * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on
632 * top of the real cxgbe interface.
633 */
634struct l2t_entry *
635t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
636{
637	struct l2t_entry *e;
638	struct l2t_data *d = pi->adapter->l2t;
639	int addr_len;
640	uint32_t *addr;
641	int hash;
642	struct sockaddr_in6 *sin6;
643	unsigned int smt_idx = pi->port_id;
644
645	if (sa->sa_family == AF_INET) {
646		addr = (uint32_t *)&SINADDR(sa);
647		addr_len = sizeof(SINADDR(sa));
648	} else if (sa->sa_family == AF_INET6) {
649		sin6 = (struct sockaddr_in6 *)sa;
650		addr = (uint32_t *)&sin6->sin6_addr.s6_addr;
651		addr_len = sizeof(sin6->sin6_addr.s6_addr);
652	} else
653		return (NULL);
654
655	hash = addr_hash(addr, addr_len, ifp->if_index);
656
657	rw_wlock(&d->lock);
658	for (e = d->l2tab[hash].first; e; e = e->next) {
659		if (!addreq(e, addr) && e->ifp == ifp && e->smt_idx == smt_idx){
660			l2t_hold(d, e);
661			if (atomic_load_acq_int(&e->refcnt) == 1)
662				reuse_entry(e);
663			goto done;
664		}
665	}
666
667	/* Need to allocate a new entry */
668	e = alloc_l2e(d);
669	if (e) {
670		mtx_lock(&e->lock);          /* avoid race with t4_l2t_free */
671		e->state = L2T_STATE_RESOLVING;
672		memcpy(e->addr, addr, addr_len);
673		e->ifindex = ifp->if_index;
674		e->smt_idx = smt_idx;
675		e->ifp = ifp;
676		e->hash = hash;
677		e->lport = pi->lport;
678		e->v6 = (addr_len == 16);
679		e->lle = NULL;
680		atomic_store_rel_int(&e->refcnt, 1);
681		if (ifp->if_type == IFT_L2VLAN)
682			VLAN_TAG(ifp, &e->vlan);
683		else
684			e->vlan = VLAN_NONE;
685		e->next = d->l2tab[hash].first;
686		d->l2tab[hash].first = e;
687		mtx_unlock(&e->lock);
688	}
689done:
690	rw_wunlock(&d->lock);
691	return e;
692}
693
694/*
695 * Called when the host's neighbor layer makes a change to some entry that is
696 * loaded into the HW L2 table.
697 */
698void
699t4_l2t_update(struct adapter *sc, struct llentry *lle)
700{
701	struct l2t_entry *e;
702	struct l2t_data *d = sc->l2t;
703	struct sockaddr *sa = L3_ADDR(lle);
704	struct llentry *old_lle = NULL;
705	uint32_t *addr = (uint32_t *)&SINADDR(sa);
706	struct ifnet *ifp = lle->lle_tbl->llt_ifp;
707	int hash = addr_hash(addr, sizeof(*addr), ifp->if_index);
708
709	KASSERT(d != NULL, ("%s: no L2 table", __func__));
710	LLE_WLOCK_ASSERT(lle);
711	KASSERT(lle->la_flags & LLE_VALID || lle->la_flags & LLE_DELETED,
712	    ("%s: entry neither valid nor deleted.", __func__));
713
714	rw_rlock(&d->lock);
715	for (e = d->l2tab[hash].first; e; e = e->next) {
716		if (!addreq(e, addr) && e->ifp == ifp) {
717			mtx_lock(&e->lock);
718			if (atomic_load_acq_int(&e->refcnt))
719				goto found;
720			e->state = L2T_STATE_STALE;
721			mtx_unlock(&e->lock);
722			break;
723		}
724	}
725	rw_runlock(&d->lock);
726
727	/* The TOE has no interest in this LLE */
728	return;
729
730 found:
731	rw_runlock(&d->lock);
732
733        if (atomic_load_acq_int(&e->refcnt)) {
734
735                /* Entry is referenced by at least 1 offloaded connection. */
736
737                /* Handle deletes first */
738                if (lle->la_flags & LLE_DELETED) {
739                        if (lle == e->lle) {
740                                e->lle = NULL;
741                                e->state = L2T_STATE_RESOLVING;
742                                LLE_REMREF(lle);
743                        }
744                        goto done;
745                }
746
747                if (lle != e->lle) {
748                        old_lle = e->lle;
749                        LLE_ADDREF(lle);
750                        e->lle = lle;
751                }
752
753                if (e->state == L2T_STATE_RESOLVING ||
754                    memcmp(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN)) {
755
756                        /* unresolved -> resolved; or dmac changed */
757
758                        memcpy(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN);
759			write_l2e(sc, e, 1);
760                } else {
761
762                        /* +ve reinforcement of a valid or stale entry */
763
764                }
765
766                e->state = L2T_STATE_VALID;
767
768        } else {
769                /*
770                 * Entry was used previously but is unreferenced right now.
771                 * e->lle has been released and NULL'd out by t4_l2t_free, or
772                 * l2t_release is about to call t4_l2t_free and do that.
773                 *
774                 * Either way this is of no interest to us.
775                 */
776        }
777
778done:
779        mtx_unlock(&e->lock);
780        if (old_lle)
781                LLE_FREE(old_lle);
782}
783
784#endif
785