1237263Snp/*-
2237263Snp * Copyright (c) 2012 Chelsio Communications, Inc.
3237263Snp * All rights reserved.
4237263Snp *
5237263Snp * Redistribution and use in source and binary forms, with or without
6237263Snp * modification, are permitted provided that the following conditions
7237263Snp * are met:
8237263Snp * 1. Redistributions of source code must retain the above copyright
9237263Snp *    notice, this list of conditions and the following disclaimer.
10237263Snp * 2. Redistributions in binary form must reproduce the above copyright
11237263Snp *    notice, this list of conditions and the following disclaimer in the
12237263Snp *    documentation and/or other materials provided with the distribution.
13237263Snp *
14237263Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15237263Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16237263Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17237263Snp * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18237263Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19237263Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20237263Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21237263Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22237263Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23237263Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24237263Snp * SUCH DAMAGE.
25237263Snp */
26237263Snp#include <sys/cdefs.h>
27237263Snp__FBSDID("$FreeBSD$");
28237263Snp
29237263Snp#include "opt_inet.h"
30245434Snp#include "opt_inet6.h"
31237263Snp
32237263Snp#ifdef TCP_OFFLOAD
33237263Snp#include <sys/param.h>
34237263Snp#include <sys/systm.h>
35237263Snp#include <sys/kernel.h>
36237263Snp#include <sys/module.h>
37237263Snp#include <sys/bus.h>
38245434Snp#include <sys/fnv_hash.h>
39237263Snp#include <sys/lock.h>
40237263Snp#include <sys/mutex.h>
41237263Snp#include <sys/rwlock.h>
42237263Snp#include <sys/socket.h>
43237263Snp#include <sys/sbuf.h>
44272719Snp#include <sys/taskqueue.h>
45237263Snp#include <net/if.h>
46237263Snp#include <net/if_types.h>
47237263Snp#include <net/ethernet.h>
48237263Snp#include <net/if_vlan_var.h>
49237263Snp#include <net/route.h>
50237263Snp#include <netinet/in.h>
51237263Snp#include <netinet/toecore.h>
52237263Snp
53237263Snp#include "common/common.h"
54237263Snp#include "common/t4_msg.h"
55237263Snp#include "tom/t4_tom_l2t.h"
56237263Snp#include "tom/t4_tom.h"
57237263Snp
58237263Snp#define VLAN_NONE	0xfff
59237263Snp
60237263Snpstatic inline void
61237263Snpl2t_hold(struct l2t_data *d, struct l2t_entry *e)
62237263Snp{
63245434Snp
64237263Snp	if (atomic_fetchadd_int(&e->refcnt, 1) == 0)  /* 0 -> 1 transition */
65237263Snp		atomic_subtract_int(&d->nfree, 1);
66237263Snp}
67237263Snp
68245434Snpstatic inline u_int
69245434Snpl2_hash(struct l2t_data *d, const struct sockaddr *sa, int ifindex)
70237263Snp{
71245434Snp	u_int hash, half = d->l2t_size / 2, start = 0;
72245434Snp	const void *key;
73245434Snp	size_t len;
74245434Snp
75245434Snp	KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
76245434Snp	    ("%s: sa %p has unexpected sa_family %d", __func__, sa,
77245434Snp	    sa->sa_family));
78245434Snp
79245434Snp	if (sa->sa_family == AF_INET) {
80245434Snp		const struct sockaddr_in *sin = (const void *)sa;
81245434Snp
82245434Snp		key = &sin->sin_addr;
83245434Snp		len = sizeof(sin->sin_addr);
84245434Snp	} else {
85245434Snp		const struct sockaddr_in6 *sin6 = (const void *)sa;
86245434Snp
87245434Snp		key = &sin6->sin6_addr;
88245434Snp		len = sizeof(sin6->sin6_addr);
89245434Snp		start = half;
90245434Snp	}
91245434Snp
92245434Snp	hash = fnv_32_buf(key, len, FNV1_32_INIT);
93245434Snp	hash = fnv_32_buf(&ifindex, sizeof(ifindex), hash);
94245434Snp	hash %= half;
95245434Snp
96245434Snp	return (hash + start);
97237263Snp}
98237263Snp
99245434Snpstatic inline int
100245434Snpl2_cmp(const struct sockaddr *sa, struct l2t_entry *e)
101245434Snp{
102245434Snp
103245434Snp	KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
104245434Snp	    ("%s: sa %p has unexpected sa_family %d", __func__, sa,
105245434Snp	    sa->sa_family));
106245434Snp
107245434Snp	if (sa->sa_family == AF_INET) {
108245434Snp		const struct sockaddr_in *sin = (const void *)sa;
109245434Snp
110245434Snp		return (e->addr[0] != sin->sin_addr.s_addr);
111245434Snp	} else {
112245434Snp		const struct sockaddr_in6 *sin6 = (const void *)sa;
113245434Snp
114245434Snp		return (memcmp(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr)));
115245434Snp	}
116245434Snp}
117245434Snp
118245434Snpstatic inline void
119245434Snpl2_store(const struct sockaddr *sa, struct l2t_entry *e)
120245434Snp{
121245434Snp
122245434Snp	KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
123245434Snp	    ("%s: sa %p has unexpected sa_family %d", __func__, sa,
124245434Snp	    sa->sa_family));
125245434Snp
126245434Snp	if (sa->sa_family == AF_INET) {
127245434Snp		const struct sockaddr_in *sin = (const void *)sa;
128245434Snp
129245434Snp		e->addr[0] = sin->sin_addr.s_addr;
130245434Snp		e->ipv6 = 0;
131245434Snp	} else {
132245434Snp		const struct sockaddr_in6 *sin6 = (const void *)sa;
133245434Snp
134245434Snp		memcpy(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr));
135245434Snp		e->ipv6 = 1;
136245434Snp	}
137245434Snp}
138245434Snp
139237263Snp/*
140237263Snp * Add a WR to an L2T entry's queue of work requests awaiting resolution.
141237263Snp * Must be called with the entry's lock held.
142237263Snp */
143237263Snpstatic inline void
144237263Snparpq_enqueue(struct l2t_entry *e, struct wrqe *wr)
145237263Snp{
146237263Snp	mtx_assert(&e->lock, MA_OWNED);
147237263Snp
148237263Snp	STAILQ_INSERT_TAIL(&e->wr_list, wr, link);
149237263Snp}
150237263Snp
151237263Snpstatic inline void
152237263Snpsend_pending(struct adapter *sc, struct l2t_entry *e)
153237263Snp{
154237263Snp	struct wrqe *wr;
155237263Snp
156237263Snp	mtx_assert(&e->lock, MA_OWNED);
157237263Snp
158237263Snp	while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) {
159237263Snp		STAILQ_REMOVE_HEAD(&e->wr_list, link);
160237263Snp		t4_wrq_tx(sc, wr);
161237263Snp	}
162237263Snp}
163237263Snp
164237263Snpstatic void
165272719Snpresolution_failed(struct adapter *sc, struct l2t_entry *e)
166237263Snp{
167272719Snp	struct tom_data *td = sc->tom_softc;
168237263Snp
169272719Snp	mtx_assert(&e->lock, MA_OWNED);
170237263Snp
171272719Snp	mtx_lock(&td->unsent_wr_lock);
172272719Snp	STAILQ_CONCAT(&td->unsent_wr_list, &e->wr_list);
173272719Snp	mtx_unlock(&td->unsent_wr_lock);
174237263Snp
175272719Snp	taskqueue_enqueue(taskqueue_thread, &td->reclaim_wr_resources);
176237263Snp}
177237263Snp
178237263Snpstatic void
179237263Snpupdate_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr,
180237263Snp    uint16_t vtag)
181237263Snp{
182237263Snp
183237263Snp	mtx_assert(&e->lock, MA_OWNED);
184237263Snp
185237263Snp	/*
186237263Snp	 * The entry may be in active use (e->refcount > 0) or not.  We update
187237263Snp	 * it even when it's not as this simplifies the case where we decide to
188237263Snp	 * reuse the entry later.
189237263Snp	 */
190237263Snp
191237263Snp	if (lladdr == NULL &&
192237263Snp	    (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) {
193237263Snp		/*
194237263Snp		 * Never got a valid L2 address for this one.  Just mark it as
195237263Snp		 * failed instead of removing it from the hash (for which we'd
196237263Snp		 * need to wlock the table).
197237263Snp		 */
198237263Snp		e->state = L2T_STATE_FAILED;
199272719Snp		resolution_failed(sc, e);
200237263Snp		return;
201237263Snp
202237263Snp	} else if (lladdr == NULL) {
203237263Snp
204237263Snp		/* Valid or already-stale entry was deleted (or expired) */
205237263Snp
206237263Snp		KASSERT(e->state == L2T_STATE_VALID ||
207237263Snp		    e->state == L2T_STATE_STALE,
208237263Snp		    ("%s: lladdr NULL, state %d", __func__, e->state));
209237263Snp
210237263Snp		e->state = L2T_STATE_STALE;
211237263Snp
212237263Snp	} else {
213237263Snp
214237263Snp		if (e->state == L2T_STATE_RESOLVING ||
215237263Snp		    e->state == L2T_STATE_FAILED ||
216237263Snp		    memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) {
217237263Snp
218237263Snp			/* unresolved -> resolved; or dmac changed */
219237263Snp
220237263Snp			memcpy(e->dmac, lladdr, ETHER_ADDR_LEN);
221237263Snp			e->vlan = vtag;
222302339Snp			t4_write_l2e(e, 1);
223237263Snp		}
224237263Snp		e->state = L2T_STATE_VALID;
225237263Snp	}
226237263Snp}
227237263Snp
228237263Snpstatic int
229237263Snpresolve_entry(struct adapter *sc, struct l2t_entry *e)
230237263Snp{
231237263Snp	struct tom_data *td = sc->tom_softc;
232237263Snp	struct toedev *tod = &td->tod;
233237263Snp	struct sockaddr_in sin = {0};
234245434Snp	struct sockaddr_in6 sin6 = {0};
235245434Snp	struct sockaddr *sa;
236292978Smelifaro	uint8_t dmac[ETHER_HDR_LEN];
237237263Snp	uint16_t vtag = VLAN_NONE;
238237263Snp	int rc;
239237263Snp
240245434Snp	if (e->ipv6 == 0) {
241245434Snp		sin.sin_family = AF_INET;
242245434Snp		sin.sin_len = sizeof(struct sockaddr_in);
243245434Snp		sin.sin_addr.s_addr = e->addr[0];
244245434Snp		sa = (void *)&sin;
245245434Snp	} else {
246245434Snp		sin6.sin6_family = AF_INET6;
247245434Snp		sin6.sin6_len = sizeof(struct sockaddr_in6);
248245434Snp		memcpy(&sin6.sin6_addr, &e->addr[0], sizeof(e->addr));
249245434Snp		sa = (void *)&sin6;
250245434Snp	}
251237263Snp
252245434Snp	rc = toe_l2_resolve(tod, e->ifp, sa, dmac, &vtag);
253237263Snp	if (rc == EWOULDBLOCK)
254237263Snp		return (rc);
255237263Snp
256237263Snp	mtx_lock(&e->lock);
257237263Snp	update_entry(sc, e, rc == 0 ? dmac : NULL, vtag);
258237263Snp	mtx_unlock(&e->lock);
259237263Snp
260237263Snp	return (rc);
261237263Snp}
262237263Snp
263237263Snpint
264237263Snpt4_l2t_send_slow(struct adapter *sc, struct wrqe *wr, struct l2t_entry *e)
265237263Snp{
266237263Snp
267237263Snpagain:
268237263Snp	switch (e->state) {
269237263Snp	case L2T_STATE_STALE:     /* entry is stale, kick off revalidation */
270237263Snp
271237263Snp		if (resolve_entry(sc, e) != EWOULDBLOCK)
272237263Snp			goto again;	/* entry updated, re-examine state */
273237263Snp
274237263Snp		/* Fall through */
275237263Snp
276237263Snp	case L2T_STATE_VALID:     /* fast-path, send the packet on */
277237263Snp
278237263Snp		t4_wrq_tx(sc, wr);
279237263Snp		return (0);
280237263Snp
281237263Snp	case L2T_STATE_RESOLVING:
282237263Snp	case L2T_STATE_SYNC_WRITE:
283237263Snp
284237263Snp		mtx_lock(&e->lock);
285237263Snp		if (e->state != L2T_STATE_SYNC_WRITE &&
286237263Snp		    e->state != L2T_STATE_RESOLVING) {
287237263Snp			/* state changed by the time we got here */
288237263Snp			mtx_unlock(&e->lock);
289237263Snp			goto again;
290237263Snp		}
291237263Snp		arpq_enqueue(e, wr);
292237263Snp		mtx_unlock(&e->lock);
293237263Snp
294237263Snp		if (resolve_entry(sc, e) == EWOULDBLOCK)
295237263Snp			break;
296237263Snp
297237263Snp		mtx_lock(&e->lock);
298237263Snp		if (e->state == L2T_STATE_VALID && !STAILQ_EMPTY(&e->wr_list))
299237263Snp			send_pending(sc, e);
300237263Snp		if (e->state == L2T_STATE_FAILED)
301272719Snp			resolution_failed(sc, e);
302237263Snp		mtx_unlock(&e->lock);
303237263Snp		break;
304237263Snp
305237263Snp	case L2T_STATE_FAILED:
306237263Snp		return (EHOSTUNREACH);
307237263Snp	}
308237263Snp
309237263Snp	return (0);
310237263Snp}
311237263Snp
312302339Snpint
313237263Snpdo_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss,
314237263Snp    struct mbuf *m)
315237263Snp{
316237263Snp	struct adapter *sc = iq->adapter;
317237263Snp	const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
318237263Snp	unsigned int tid = GET_TID(rpl);
319245434Snp	unsigned int idx = tid % L2T_SIZE;
320237263Snp
321302339Snp	if (__predict_false(rpl->status != CPL_ERR_NONE)) {
322302339Snp		log(LOG_ERR,
323302339Snp		    "Unexpected L2T_WRITE_RPL (%u) for entry at hw_idx %u\n",
324302339Snp		    rpl->status, idx);
325302339Snp		return (EINVAL);
326302339Snp	}
327237263Snp
328237263Snp	if (tid & F_SYNC_WR) {
329245434Snp		struct l2t_entry *e = &sc->l2t->l2tab[idx - sc->vres.l2t.start];
330237263Snp
331237263Snp		mtx_lock(&e->lock);
332237263Snp		if (e->state != L2T_STATE_SWITCHING) {
333237263Snp			send_pending(sc, e);
334237263Snp			e->state = L2T_STATE_VALID;
335237263Snp		}
336237263Snp		mtx_unlock(&e->lock);
337237263Snp	}
338237263Snp
339237263Snp	return (0);
340237263Snp}
341237263Snp
342237263Snp/*
343237263Snp * The TOE wants an L2 table entry that it can use to reach the next hop over
344237263Snp * the specified port.  Produce such an entry - create one if needed.
345237263Snp *
346237263Snp * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on
347237263Snp * top of the real cxgbe interface.
348237263Snp */
349237263Snpstruct l2t_entry *
350237263Snpt4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
351237263Snp{
352237263Snp	struct l2t_entry *e;
353302339Snp	struct adapter *sc = pi->adapter;
354302339Snp	struct l2t_data *d = sc->l2t;
355245434Snp	u_int hash, smt_idx = pi->port_id;
356237263Snp
357245434Snp	KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
358245434Snp	    ("%s: sa %p has unexpected sa_family %d", __func__, sa,
359245434Snp	    sa->sa_family));
360237263Snp
361237263Snp#ifndef VLAN_TAG
362237263Snp	if (ifp->if_type == IFT_L2VLAN)
363237263Snp		return (NULL);
364237263Snp#endif
365237263Snp
366245434Snp	hash = l2_hash(d, sa, ifp->if_index);
367237263Snp	rw_wlock(&d->lock);
368237263Snp	for (e = d->l2tab[hash].first; e; e = e->next) {
369245434Snp		if (l2_cmp(sa, e) == 0 && e->ifp == ifp &&
370245434Snp		    e->smt_idx == smt_idx) {
371237263Snp			l2t_hold(d, e);
372237263Snp			goto done;
373237263Snp		}
374237263Snp	}
375237263Snp
376237263Snp	/* Need to allocate a new entry */
377237263Snp	e = t4_alloc_l2e(d);
378237263Snp	if (e) {
379237263Snp		mtx_lock(&e->lock);          /* avoid race with t4_l2t_free */
380237263Snp		e->next = d->l2tab[hash].first;
381237263Snp		d->l2tab[hash].first = e;
382237263Snp
383237263Snp		e->state = L2T_STATE_RESOLVING;
384245434Snp		l2_store(sa, e);
385237263Snp		e->ifp = ifp;
386237263Snp		e->smt_idx = smt_idx;
387237263Snp		e->hash = hash;
388237263Snp		e->lport = pi->lport;
389302339Snp		e->wrq = &sc->sge.ctrlq[pi->port_id];
390302339Snp		e->iqid = sc->sge.ofld_rxq[pi->vi[0].first_ofld_rxq].iq.abs_id;
391237263Snp		atomic_store_rel_int(&e->refcnt, 1);
392237263Snp#ifdef VLAN_TAG
393237263Snp		if (ifp->if_type == IFT_L2VLAN)
394237263Snp			VLAN_TAG(ifp, &e->vlan);
395237263Snp		else
396237263Snp			e->vlan = VLAN_NONE;
397237263Snp#endif
398237263Snp		mtx_unlock(&e->lock);
399237263Snp	}
400237263Snpdone:
401237263Snp	rw_wunlock(&d->lock);
402237263Snp	return e;
403237263Snp}
404237263Snp
405237263Snp/*
406237263Snp * Called when the host's ARP layer makes a change to some entry that is loaded
407237263Snp * into the HW L2 table.
408237263Snp */
409237263Snpvoid
410237263Snpt4_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
411237263Snp    uint8_t *lladdr, uint16_t vtag)
412237263Snp{
413237263Snp	struct adapter *sc = tod->tod_softc;
414237263Snp	struct l2t_entry *e;
415237263Snp	struct l2t_data *d = sc->l2t;
416245434Snp	u_int hash;
417237263Snp
418237263Snp	KASSERT(d != NULL, ("%s: no L2 table", __func__));
419237263Snp
420245434Snp	hash = l2_hash(d, sa, ifp->if_index);
421237263Snp	rw_rlock(&d->lock);
422237263Snp	for (e = d->l2tab[hash].first; e; e = e->next) {
423245434Snp		if (l2_cmp(sa, e) == 0 && e->ifp == ifp) {
424237263Snp			mtx_lock(&e->lock);
425237263Snp			if (atomic_load_acq_int(&e->refcnt))
426237263Snp				goto found;
427237263Snp			e->state = L2T_STATE_STALE;
428237263Snp			mtx_unlock(&e->lock);
429237263Snp			break;
430237263Snp		}
431237263Snp	}
432237263Snp	rw_runlock(&d->lock);
433237263Snp
434237263Snp	/*
435237263Snp	 * This is of no interest to us.  We've never had an offloaded
436237263Snp	 * connection to this destination, and we aren't attempting one right
437237263Snp	 * now.
438237263Snp	 */
439237263Snp	return;
440237263Snp
441237263Snpfound:
442237263Snp	rw_runlock(&d->lock);
443237263Snp
444237263Snp	KASSERT(e->state != L2T_STATE_UNUSED,
445237263Snp	    ("%s: unused entry in the hash.", __func__));
446237263Snp
447237263Snp	update_entry(sc, e, lladdr, vtag);
448237263Snp	mtx_unlock(&e->lock);
449237263Snp}
450237263Snp#endif
451