1237263Snp/*-
2237263Snp * Copyright (c) 2012 Chelsio Communications, Inc.
3237263Snp * All rights reserved.
4237263Snp *
5237263Snp * Redistribution and use in source and binary forms, with or without
6237263Snp * modification, are permitted provided that the following conditions
7237263Snp * are met:
8237263Snp * 1. Redistributions of source code must retain the above copyright
9237263Snp *    notice, this list of conditions and the following disclaimer.
10237263Snp * 2. Redistributions in binary form must reproduce the above copyright
11237263Snp *    notice, this list of conditions and the following disclaimer in the
12237263Snp *    documentation and/or other materials provided with the distribution.
13237263Snp *
14237263Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15237263Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16237263Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17237263Snp * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18237263Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19237263Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20237263Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21237263Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22237263Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23237263Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24237263Snp * SUCH DAMAGE.
25237263Snp */
26237263Snp#include <sys/cdefs.h>
27237263Snp__FBSDID("$FreeBSD$");
28237263Snp
29237263Snp#include "opt_inet.h"
30247434Snp#include "opt_inet6.h"
31237263Snp
32237263Snp#ifdef TCP_OFFLOAD
33237263Snp#include <sys/param.h>
34237263Snp#include <sys/systm.h>
35237263Snp#include <sys/kernel.h>
36237263Snp#include <sys/module.h>
37237263Snp#include <sys/bus.h>
38247434Snp#include <sys/fnv_hash.h>
39237263Snp#include <sys/lock.h>
40237263Snp#include <sys/mutex.h>
41237263Snp#include <sys/rwlock.h>
42237263Snp#include <sys/socket.h>
43237263Snp#include <sys/sbuf.h>
44237263Snp#include <net/if.h>
45237263Snp#include <net/if_types.h>
46237263Snp#include <net/ethernet.h>
47237263Snp#include <net/if_vlan_var.h>
48237263Snp#include <net/route.h>
49237263Snp#include <netinet/in.h>
50237263Snp#include <netinet/toecore.h>
51237263Snp
52237263Snp#include "common/common.h"
53237263Snp#include "common/t4_msg.h"
54237263Snp#include "tom/t4_tom_l2t.h"
55237263Snp#include "tom/t4_tom.h"
56237263Snp
57237263Snp#define VLAN_NONE	0xfff
58237263Snp
59237263Snpstatic inline void
60237263Snpl2t_hold(struct l2t_data *d, struct l2t_entry *e)
61237263Snp{
62247434Snp
63237263Snp	if (atomic_fetchadd_int(&e->refcnt, 1) == 0)  /* 0 -> 1 transition */
64237263Snp		atomic_subtract_int(&d->nfree, 1);
65237263Snp}
66237263Snp
67247434Snpstatic inline u_int
68247434Snpl2_hash(struct l2t_data *d, const struct sockaddr *sa, int ifindex)
69237263Snp{
70247434Snp	u_int hash, half = d->l2t_size / 2, start = 0;
71247434Snp	const void *key;
72247434Snp	size_t len;
73247434Snp
74247434Snp	KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
75247434Snp	    ("%s: sa %p has unexpected sa_family %d", __func__, sa,
76247434Snp	    sa->sa_family));
77247434Snp
78247434Snp	if (sa->sa_family == AF_INET) {
79247434Snp		const struct sockaddr_in *sin = (const void *)sa;
80247434Snp
81247434Snp		key = &sin->sin_addr;
82247434Snp		len = sizeof(sin->sin_addr);
83247434Snp	} else {
84247434Snp		const struct sockaddr_in6 *sin6 = (const void *)sa;
85247434Snp
86247434Snp		key = &sin6->sin6_addr;
87247434Snp		len = sizeof(sin6->sin6_addr);
88247434Snp		start = half;
89247434Snp	}
90247434Snp
91247434Snp	hash = fnv_32_buf(key, len, FNV1_32_INIT);
92247434Snp	hash = fnv_32_buf(&ifindex, sizeof(ifindex), hash);
93247434Snp	hash %= half;
94247434Snp
95247434Snp	return (hash + start);
96237263Snp}
97237263Snp
98247434Snpstatic inline int
99247434Snpl2_cmp(const struct sockaddr *sa, struct l2t_entry *e)
100247434Snp{
101247434Snp
102247434Snp	KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
103247434Snp	    ("%s: sa %p has unexpected sa_family %d", __func__, sa,
104247434Snp	    sa->sa_family));
105247434Snp
106247434Snp	if (sa->sa_family == AF_INET) {
107247434Snp		const struct sockaddr_in *sin = (const void *)sa;
108247434Snp
109247434Snp		return (e->addr[0] != sin->sin_addr.s_addr);
110247434Snp	} else {
111247434Snp		const struct sockaddr_in6 *sin6 = (const void *)sa;
112247434Snp
113247434Snp		return (memcmp(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr)));
114247434Snp	}
115247434Snp}
116247434Snp
117247434Snpstatic inline void
118247434Snpl2_store(const struct sockaddr *sa, struct l2t_entry *e)
119247434Snp{
120247434Snp
121247434Snp	KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
122247434Snp	    ("%s: sa %p has unexpected sa_family %d", __func__, sa,
123247434Snp	    sa->sa_family));
124247434Snp
125247434Snp	if (sa->sa_family == AF_INET) {
126247434Snp		const struct sockaddr_in *sin = (const void *)sa;
127247434Snp
128247434Snp		e->addr[0] = sin->sin_addr.s_addr;
129247434Snp		e->ipv6 = 0;
130247434Snp	} else {
131247434Snp		const struct sockaddr_in6 *sin6 = (const void *)sa;
132247434Snp
133247434Snp		memcpy(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr));
134247434Snp		e->ipv6 = 1;
135247434Snp	}
136247434Snp}
137247434Snp
138237263Snp/*
139237263Snp * Add a WR to an L2T entry's queue of work requests awaiting resolution.
140237263Snp * Must be called with the entry's lock held.
141237263Snp */
142237263Snpstatic inline void
143237263Snparpq_enqueue(struct l2t_entry *e, struct wrqe *wr)
144237263Snp{
145237263Snp	mtx_assert(&e->lock, MA_OWNED);
146237263Snp
147237263Snp	STAILQ_INSERT_TAIL(&e->wr_list, wr, link);
148237263Snp}
149237263Snp
150237263Snpstatic inline void
151237263Snpsend_pending(struct adapter *sc, struct l2t_entry *e)
152237263Snp{
153237263Snp	struct wrqe *wr;
154237263Snp
155237263Snp	mtx_assert(&e->lock, MA_OWNED);
156237263Snp
157237263Snp	while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) {
158237263Snp		STAILQ_REMOVE_HEAD(&e->wr_list, link);
159237263Snp		t4_wrq_tx(sc, wr);
160237263Snp	}
161237263Snp}
162237263Snp
163237263Snpstatic void
164237263Snpresolution_failed_for_wr(struct wrqe *wr)
165237263Snp{
166247434Snp	log(LOG_ERR, "%s: leaked work request %p, wr_len %d\n", __func__, wr,
167237263Snp	    wr->wr_len);
168237263Snp
169237263Snp	/* free(wr, M_CXGBE); */
170237263Snp}
171237263Snp
172237263Snpstatic void
173237263Snpresolution_failed(struct l2t_entry *e)
174237263Snp{
175237263Snp	struct wrqe *wr;
176237263Snp
177237263Snp	mtx_assert(&e->lock, MA_OWNED);
178237263Snp
179237263Snp	while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) {
180237263Snp		STAILQ_REMOVE_HEAD(&e->wr_list, link);
181237263Snp		resolution_failed_for_wr(wr);
182237263Snp	}
183237263Snp}
184237263Snp
185237263Snpstatic void
186237263Snpupdate_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr,
187237263Snp    uint16_t vtag)
188237263Snp{
189237263Snp
190237263Snp	mtx_assert(&e->lock, MA_OWNED);
191237263Snp
192237263Snp	/*
193237263Snp	 * The entry may be in active use (e->refcount > 0) or not.  We update
194237263Snp	 * it even when it's not as this simplifies the case where we decide to
195237263Snp	 * reuse the entry later.
196237263Snp	 */
197237263Snp
198237263Snp	if (lladdr == NULL &&
199237263Snp	    (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) {
200237263Snp		/*
201237263Snp		 * Never got a valid L2 address for this one.  Just mark it as
202237263Snp		 * failed instead of removing it from the hash (for which we'd
203237263Snp		 * need to wlock the table).
204237263Snp		 */
205237263Snp		e->state = L2T_STATE_FAILED;
206237263Snp		resolution_failed(e);
207237263Snp		return;
208237263Snp
209237263Snp	} else if (lladdr == NULL) {
210237263Snp
211237263Snp		/* Valid or already-stale entry was deleted (or expired) */
212237263Snp
213237263Snp		KASSERT(e->state == L2T_STATE_VALID ||
214237263Snp		    e->state == L2T_STATE_STALE,
215237263Snp		    ("%s: lladdr NULL, state %d", __func__, e->state));
216237263Snp
217237263Snp		e->state = L2T_STATE_STALE;
218237263Snp
219237263Snp	} else {
220237263Snp
221237263Snp		if (e->state == L2T_STATE_RESOLVING ||
222237263Snp		    e->state == L2T_STATE_FAILED ||
223237263Snp		    memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) {
224237263Snp
225237263Snp			/* unresolved -> resolved; or dmac changed */
226237263Snp
227237263Snp			memcpy(e->dmac, lladdr, ETHER_ADDR_LEN);
228237263Snp			e->vlan = vtag;
229237263Snp			t4_write_l2e(sc, e, 1);
230237263Snp		}
231237263Snp		e->state = L2T_STATE_VALID;
232237263Snp	}
233237263Snp}
234237263Snp
235237263Snpstatic int
236237263Snpresolve_entry(struct adapter *sc, struct l2t_entry *e)
237237263Snp{
238237263Snp	struct tom_data *td = sc->tom_softc;
239237263Snp	struct toedev *tod = &td->tod;
240237263Snp	struct sockaddr_in sin = {0};
241247434Snp	struct sockaddr_in6 sin6 = {0};
242247434Snp	struct sockaddr *sa;
243237263Snp	uint8_t dmac[ETHER_ADDR_LEN];
244237263Snp	uint16_t vtag = VLAN_NONE;
245237263Snp	int rc;
246237263Snp
247247434Snp	if (e->ipv6 == 0) {
248247434Snp		sin.sin_family = AF_INET;
249247434Snp		sin.sin_len = sizeof(struct sockaddr_in);
250247434Snp		sin.sin_addr.s_addr = e->addr[0];
251247434Snp		sa = (void *)&sin;
252247434Snp	} else {
253247434Snp		sin6.sin6_family = AF_INET6;
254247434Snp		sin6.sin6_len = sizeof(struct sockaddr_in6);
255247434Snp		memcpy(&sin6.sin6_addr, &e->addr[0], sizeof(e->addr));
256247434Snp		sa = (void *)&sin6;
257247434Snp	}
258237263Snp
259247434Snp	rc = toe_l2_resolve(tod, e->ifp, sa, dmac, &vtag);
260237263Snp	if (rc == EWOULDBLOCK)
261237263Snp		return (rc);
262237263Snp
263237263Snp	mtx_lock(&e->lock);
264237263Snp	update_entry(sc, e, rc == 0 ? dmac : NULL, vtag);
265237263Snp	mtx_unlock(&e->lock);
266237263Snp
267237263Snp	return (rc);
268237263Snp}
269237263Snp
270237263Snpint
271237263Snpt4_l2t_send_slow(struct adapter *sc, struct wrqe *wr, struct l2t_entry *e)
272237263Snp{
273237263Snp
274237263Snpagain:
275237263Snp	switch (e->state) {
276237263Snp	case L2T_STATE_STALE:     /* entry is stale, kick off revalidation */
277237263Snp
278237263Snp		if (resolve_entry(sc, e) != EWOULDBLOCK)
279237263Snp			goto again;	/* entry updated, re-examine state */
280237263Snp
281237263Snp		/* Fall through */
282237263Snp
283237263Snp	case L2T_STATE_VALID:     /* fast-path, send the packet on */
284237263Snp
285237263Snp		t4_wrq_tx(sc, wr);
286237263Snp		return (0);
287237263Snp
288237263Snp	case L2T_STATE_RESOLVING:
289237263Snp	case L2T_STATE_SYNC_WRITE:
290237263Snp
291237263Snp		mtx_lock(&e->lock);
292237263Snp		if (e->state != L2T_STATE_SYNC_WRITE &&
293237263Snp		    e->state != L2T_STATE_RESOLVING) {
294237263Snp			/* state changed by the time we got here */
295237263Snp			mtx_unlock(&e->lock);
296237263Snp			goto again;
297237263Snp		}
298237263Snp		arpq_enqueue(e, wr);
299237263Snp		mtx_unlock(&e->lock);
300237263Snp
301237263Snp		if (resolve_entry(sc, e) == EWOULDBLOCK)
302237263Snp			break;
303237263Snp
304237263Snp		mtx_lock(&e->lock);
305237263Snp		if (e->state == L2T_STATE_VALID && !STAILQ_EMPTY(&e->wr_list))
306237263Snp			send_pending(sc, e);
307237263Snp		if (e->state == L2T_STATE_FAILED)
308237263Snp			resolution_failed(e);
309237263Snp		mtx_unlock(&e->lock);
310237263Snp		break;
311237263Snp
312237263Snp	case L2T_STATE_FAILED:
313237263Snp		resolution_failed_for_wr(wr);
314237263Snp		return (EHOSTUNREACH);
315237263Snp	}
316237263Snp
317237263Snp	return (0);
318237263Snp}
319237263Snp
320237263Snp/*
321237263Snp * Called when an L2T entry has no more users.  The entry is left in the hash
322237263Snp * table since it is likely to be reused but we also bump nfree to indicate
323237263Snp * that the entry can be reallocated for a different neighbor.  We also drop
324237263Snp * the existing neighbor reference in case the neighbor is going away and is
325237263Snp * waiting on our reference.
326237263Snp *
327237263Snp * Because entries can be reallocated to other neighbors once their ref count
328237263Snp * drops to 0 we need to take the entry's lock to avoid races with a new
329237263Snp * incarnation.
330237263Snp */
331237263Snp
332237263Snpstatic int
333237263Snpdo_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss,
334237263Snp    struct mbuf *m)
335237263Snp{
336237263Snp	struct adapter *sc = iq->adapter;
337237263Snp	const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
338237263Snp	unsigned int tid = GET_TID(rpl);
339247434Snp	unsigned int idx = tid % L2T_SIZE;
340237263Snp	int rc;
341237263Snp
342237263Snp	rc = do_l2t_write_rpl(iq, rss, m);
343237263Snp	if (rc != 0)
344237263Snp		return (rc);
345237263Snp
346237263Snp	if (tid & F_SYNC_WR) {
347247434Snp		struct l2t_entry *e = &sc->l2t->l2tab[idx - sc->vres.l2t.start];
348237263Snp
349237263Snp		mtx_lock(&e->lock);
350237263Snp		if (e->state != L2T_STATE_SWITCHING) {
351237263Snp			send_pending(sc, e);
352237263Snp			e->state = L2T_STATE_VALID;
353237263Snp		}
354237263Snp		mtx_unlock(&e->lock);
355237263Snp	}
356237263Snp
357237263Snp	return (0);
358237263Snp}
359237263Snp
360237263Snpvoid
361237263Snpt4_init_l2t_cpl_handlers(struct adapter *sc)
362237263Snp{
363237263Snp
364237263Snp	t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl2);
365237263Snp}
366237263Snp
367237263Snpvoid
368237263Snpt4_uninit_l2t_cpl_handlers(struct adapter *sc)
369237263Snp{
370237263Snp
371237263Snp	t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl);
372237263Snp}
373237263Snp
374237263Snp/*
375237263Snp * The TOE wants an L2 table entry that it can use to reach the next hop over
376237263Snp * the specified port.  Produce such an entry - create one if needed.
377237263Snp *
378237263Snp * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on
379237263Snp * top of the real cxgbe interface.
380237263Snp */
381237263Snpstruct l2t_entry *
382237263Snpt4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
383237263Snp{
384237263Snp	struct l2t_entry *e;
385237263Snp	struct l2t_data *d = pi->adapter->l2t;
386247434Snp	u_int hash, smt_idx = pi->port_id;
387237263Snp
388247434Snp	KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
389247434Snp	    ("%s: sa %p has unexpected sa_family %d", __func__, sa,
390247434Snp	    sa->sa_family));
391237263Snp
392237263Snp#ifndef VLAN_TAG
393237263Snp	if (ifp->if_type == IFT_L2VLAN)
394237263Snp		return (NULL);
395237263Snp#endif
396237263Snp
397247434Snp	hash = l2_hash(d, sa, ifp->if_index);
398237263Snp	rw_wlock(&d->lock);
399237263Snp	for (e = d->l2tab[hash].first; e; e = e->next) {
400247434Snp		if (l2_cmp(sa, e) == 0 && e->ifp == ifp &&
401247434Snp		    e->smt_idx == smt_idx) {
402237263Snp			l2t_hold(d, e);
403237263Snp			goto done;
404237263Snp		}
405237263Snp	}
406237263Snp
407237263Snp	/* Need to allocate a new entry */
408237263Snp	e = t4_alloc_l2e(d);
409237263Snp	if (e) {
410237263Snp		mtx_lock(&e->lock);          /* avoid race with t4_l2t_free */
411237263Snp		e->next = d->l2tab[hash].first;
412237263Snp		d->l2tab[hash].first = e;
413237263Snp
414237263Snp		e->state = L2T_STATE_RESOLVING;
415247434Snp		l2_store(sa, e);
416237263Snp		e->ifp = ifp;
417237263Snp		e->smt_idx = smt_idx;
418237263Snp		e->hash = hash;
419237263Snp		e->lport = pi->lport;
420237263Snp		atomic_store_rel_int(&e->refcnt, 1);
421237263Snp#ifdef VLAN_TAG
422237263Snp		if (ifp->if_type == IFT_L2VLAN)
423237263Snp			VLAN_TAG(ifp, &e->vlan);
424237263Snp		else
425237263Snp			e->vlan = VLAN_NONE;
426237263Snp#endif
427237263Snp		mtx_unlock(&e->lock);
428237263Snp	}
429237263Snpdone:
430237263Snp	rw_wunlock(&d->lock);
431237263Snp	return e;
432237263Snp}
433237263Snp
434237263Snp/*
435237263Snp * Called when the host's ARP layer makes a change to some entry that is loaded
436237263Snp * into the HW L2 table.
437237263Snp */
438237263Snpvoid
439237263Snpt4_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
440237263Snp    uint8_t *lladdr, uint16_t vtag)
441237263Snp{
442237263Snp	struct adapter *sc = tod->tod_softc;
443237263Snp	struct l2t_entry *e;
444237263Snp	struct l2t_data *d = sc->l2t;
445247434Snp	u_int hash;
446237263Snp
447237263Snp	KASSERT(d != NULL, ("%s: no L2 table", __func__));
448237263Snp
449247434Snp	hash = l2_hash(d, sa, ifp->if_index);
450237263Snp	rw_rlock(&d->lock);
451237263Snp	for (e = d->l2tab[hash].first; e; e = e->next) {
452247434Snp		if (l2_cmp(sa, e) == 0 && e->ifp == ifp) {
453237263Snp			mtx_lock(&e->lock);
454237263Snp			if (atomic_load_acq_int(&e->refcnt))
455237263Snp				goto found;
456237263Snp			e->state = L2T_STATE_STALE;
457237263Snp			mtx_unlock(&e->lock);
458237263Snp			break;
459237263Snp		}
460237263Snp	}
461237263Snp	rw_runlock(&d->lock);
462237263Snp
463237263Snp	/*
464237263Snp	 * This is of no interest to us.  We've never had an offloaded
465237263Snp	 * connection to this destination, and we aren't attempting one right
466237263Snp	 * now.
467237263Snp	 */
468237263Snp	return;
469237263Snp
470237263Snpfound:
471237263Snp	rw_runlock(&d->lock);
472237263Snp
473237263Snp	KASSERT(e->state != L2T_STATE_UNUSED,
474237263Snp	    ("%s: unused entry in the hash.", __func__));
475237263Snp
476237263Snp	update_entry(sc, e, lladdr, vtag);
477237263Snp	mtx_unlock(&e->lock);
478237263Snp}
479237263Snp#endif
480