1237263Snp/*-
2237263Snp * Copyright (c) 2012 Chelsio Communications, Inc.
3237263Snp * All rights reserved.
4237263Snp *
5237263Snp * Redistribution and use in source and binary forms, with or without
6237263Snp * modification, are permitted provided that the following conditions
7237263Snp * are met:
8237263Snp * 1. Redistributions of source code must retain the above copyright
9237263Snp *    notice, this list of conditions and the following disclaimer.
10237263Snp * 2. Redistributions in binary form must reproduce the above copyright
11237263Snp *    notice, this list of conditions and the following disclaimer in the
12237263Snp *    documentation and/or other materials provided with the distribution.
13237263Snp *
14237263Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15237263Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16237263Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17237263Snp * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18237263Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19237263Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20237263Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21237263Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22237263Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23237263Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24237263Snp * SUCH DAMAGE.
25237263Snp */
26178302Skmacy
27178302Skmacy#include <sys/cdefs.h>
28178302Skmacy__FBSDID("$FreeBSD$");
29178302Skmacy
30237263Snp#include "opt_inet.h"
31237263Snp
32237263Snp#ifdef TCP_OFFLOAD
33178302Skmacy#include <sys/param.h>
34178302Skmacy#include <sys/systm.h>
35178302Skmacy#include <sys/kernel.h>
36178302Skmacy#include <sys/module.h>
37178302Skmacy#include <sys/bus.h>
38178302Skmacy#include <sys/socket.h>
39178302Skmacy#include <net/if.h>
40178302Skmacy#include <net/ethernet.h>
41178302Skmacy#include <net/if_vlan_var.h>
42178302Skmacy#include <netinet/in.h>
43237263Snp#include <netinet/toecore.h>
44178302Skmacy
45237263Snp#include "cxgb_include.h"
46237263Snp#include "ulp/tom/cxgb_tom.h"
47237263Snp#include "ulp/tom/cxgb_l2t.h"
48178302Skmacy
49237263Snp#define VLAN_NONE	0xfff
50237263Snp#define SA(x)		((struct sockaddr *)(x))
51237263Snp#define SIN(x)		((struct sockaddr_in *)(x))
52237263Snp#define SINADDR(x)	(SIN(x)->sin_addr.s_addr)
53178302Skmacy
54178302Skmacy/*
55178302Skmacy * Module locking notes:  There is a RW lock protecting the L2 table as a
56237263Snp * whole plus a mutex per L2T entry.  Entry lookups and allocations happen
57178302Skmacy * under the protection of the table lock, individual entry changes happen
58237263Snp * while holding that entry's mutex.  The table lock nests outside the
59178302Skmacy * entry locks.  Allocations of new entries take the table lock as writers so
60178302Skmacy * no other lookups can happen while allocating new entries.  Entry updates
61178302Skmacy * take the table lock as readers so multiple entries can be updated in
62178302Skmacy * parallel.  An L2T entry can be dropped by decrementing its reference count
63178302Skmacy * and therefore can happen in parallel with entry allocation but no entry
64178302Skmacy * can change state or increment its ref count during allocation as both of
65178302Skmacy * these perform lookups.
66237263Snp *
67237263Snp * When acquiring multiple locks, the order is llentry -> L2 table -> L2 entry.
68178302Skmacy */
69178302Skmacy
70178302Skmacystatic inline unsigned int
71178302Skmacyarp_hash(u32 key, int ifindex, const struct l2t_data *d)
72178302Skmacy{
73178302Skmacy	return jhash_2words(key, ifindex, 0) & (d->nentries - 1);
74178302Skmacy}
75178302Skmacy
76178302Skmacy/*
77237263Snp * Set up an L2T entry and send any packets waiting in the arp queue.  Must be
78237263Snp * called with the entry locked.
79178302Skmacy */
80178302Skmacystatic int
81237263Snpsetup_l2e_send_pending(struct adapter *sc, struct l2t_entry *e)
82178302Skmacy{
83237263Snp	struct mbuf *m;
84178302Skmacy	struct cpl_l2t_write_req *req;
85237263Snp	struct port_info *pi = &sc->port[e->smt_idx];	/* smt_idx is port_id */
86178302Skmacy
87237263Snp	mtx_assert(&e->lock, MA_OWNED);
88237263Snp
89237263Snp	m = M_GETHDR_OFLD(pi->first_qset, CPL_PRIORITY_CONTROL, req);
90237263Snp	if (m == NULL) {
91237263Snp		log(LOG_ERR, "%s: no mbuf, can't setup L2 entry at index %d\n",
92237263Snp		    __func__, e->idx);
93237263Snp		return (ENOMEM);
94178302Skmacy	}
95237263Snp
96237263Snp	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
97178302Skmacy	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx));
98178302Skmacy	req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) |
99237263Snp	    V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) |
100237263Snp	    V_L2T_W_PRIO(EVL_PRIOFTAG(e->vlan)));
101237263Snp	memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
102178302Skmacy
103237263Snp	t3_offload_tx(sc, m);
104237263Snp
105237263Snp	/*
106237263Snp	 * XXX: We used pi->first_qset to send the L2T_WRITE_REQ.  If any mbuf
107237263Snp	 * on the arpq is going out via another queue set associated with the
108237263Snp	 * port then it has a bad race with the L2T_WRITE_REQ.  Ideally we
109237263Snp	 * should wait till the reply to the write before draining the arpq.
110237263Snp	 */
111178302Skmacy	while (e->arpq_head) {
112178302Skmacy		m = e->arpq_head;
113178302Skmacy		e->arpq_head = m->m_next;
114178302Skmacy		m->m_next = NULL;
115237263Snp		t3_offload_tx(sc, m);
116178302Skmacy	}
117178302Skmacy	e->arpq_tail = NULL;
118178302Skmacy
119237263Snp	return (0);
120178302Skmacy}
121178302Skmacy
122178302Skmacy/*
123178302Skmacy * Add a packet to the an L2T entry's queue of packets awaiting resolution.
124178302Skmacy * Must be called with the entry's lock held.
125178302Skmacy */
126178302Skmacystatic inline void
127178302Skmacyarpq_enqueue(struct l2t_entry *e, struct mbuf *m)
128178302Skmacy{
129237263Snp	mtx_assert(&e->lock, MA_OWNED);
130237263Snp
131178302Skmacy	m->m_next = NULL;
132178302Skmacy	if (e->arpq_head)
133178302Skmacy		e->arpq_tail->m_next = m;
134178302Skmacy	else
135178302Skmacy		e->arpq_head = m;
136178302Skmacy	e->arpq_tail = m;
137178302Skmacy}
138178302Skmacy
139237263Snpstatic void
140237263Snpresolution_failed_mbuf(struct mbuf *m)
141178302Skmacy{
142237263Snp	log(LOG_ERR, "%s: leaked mbuf %p, CPL at %p",
143237263Snp	    __func__, m, mtod(m, void *));
144237263Snp}
145178302Skmacy
146237263Snpstatic void
147237263Snpresolution_failed(struct l2t_entry *e)
148237263Snp{
149237263Snp	struct mbuf *m;
150178302Skmacy
151237263Snp	mtx_assert(&e->lock, MA_OWNED);
152237263Snp
153237263Snp	while (e->arpq_head) {
154237263Snp		m = e->arpq_head;
155237263Snp		e->arpq_head = m->m_next;
156237263Snp		m->m_next = NULL;
157237263Snp		resolution_failed_mbuf(m);
158237263Snp	}
159237263Snp	e->arpq_tail = NULL;
160237263Snp}
161237263Snp
162237263Snpstatic void
163237263Snpupdate_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr,
164237263Snp    uint16_t vtag)
165237263Snp{
166237263Snp
167237263Snp	mtx_assert(&e->lock, MA_OWNED);
168237263Snp
169237263Snp	/*
170237263Snp	 * The entry may be in active use (e->refcount > 0) or not.  We update
171237263Snp	 * it even when it's not as this simplifies the case where we decide to
172237263Snp	 * reuse the entry later.
173237263Snp	 */
174237263Snp
175237263Snp	if (lladdr == NULL &&
176237263Snp	    (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) {
177178302Skmacy		/*
178237263Snp		 * Never got a valid L2 address for this one.  Just mark it as
179237263Snp		 * failed instead of removing it from the hash (for which we'd
180237263Snp		 * need to wlock the table).
181178302Skmacy		 */
182237263Snp		e->state = L2T_STATE_FAILED;
183237263Snp		resolution_failed(e);
184237263Snp		return;
185178302Skmacy
186237263Snp	} else if (lladdr == NULL) {
187237263Snp
188237263Snp		/* Valid or already-stale entry was deleted (or expired) */
189237263Snp
190237263Snp		KASSERT(e->state == L2T_STATE_VALID ||
191237263Snp		    e->state == L2T_STATE_STALE,
192237263Snp		    ("%s: lladdr NULL, state %d", __func__, e->state));
193237263Snp
194237263Snp		e->state = L2T_STATE_STALE;
195237263Snp
196237263Snp	} else {
197237263Snp
198237263Snp		if (e->state == L2T_STATE_RESOLVING ||
199237263Snp		    e->state == L2T_STATE_FAILED ||
200237263Snp		    memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) {
201237263Snp
202237263Snp			/* unresolved -> resolved; or dmac changed */
203237263Snp
204237263Snp			memcpy(e->dmac, lladdr, ETHER_ADDR_LEN);
205237263Snp			e->vlan = vtag;
206237263Snp			setup_l2e_send_pending(sc, e);
207178302Skmacy		}
208237263Snp		e->state = L2T_STATE_VALID;
209178302Skmacy	}
210178302Skmacy}
211178302Skmacy
212237263Snpstatic int
213237263Snpresolve_entry(struct adapter *sc, struct l2t_entry *e)
214178302Skmacy{
215237263Snp	struct tom_data *td = sc->tom_softc;
216237263Snp	struct toedev *tod = &td->tod;
217237263Snp	struct sockaddr_in sin = {0};
218292978Smelifaro	uint8_t dmac[ETHER_HDR_LEN];
219237263Snp	uint16_t vtag = EVL_VLID_MASK;
220237263Snp	int rc;
221237263Snp
222178302Skmacy	sin.sin_family = AF_INET;
223178302Skmacy	sin.sin_len = sizeof(struct sockaddr_in);
224237263Snp	SINADDR(&sin) = e->addr;
225178302Skmacy
226237263Snp	rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag);
227237263Snp	if (rc == EWOULDBLOCK)
228237263Snp		return (rc);
229237263Snp
230237263Snp	mtx_lock(&e->lock);
231237263Snp	update_entry(sc, e, rc == 0 ? dmac : NULL, vtag);
232237263Snp	mtx_unlock(&e->lock);
233237263Snp
234237263Snp	return (rc);
235237263Snp}
236237263Snp
237237263Snpint
238237263Snpt3_l2t_send_slow(struct adapter *sc, struct mbuf *m, struct l2t_entry *e)
239237263Snp{
240237263Snp
241178302Skmacyagain:
242178302Skmacy	switch (e->state) {
243178302Skmacy	case L2T_STATE_STALE:     /* entry is stale, kick off revalidation */
244237263Snp
245237263Snp		if (resolve_entry(sc, e) != EWOULDBLOCK)
246237263Snp			goto again;	/* entry updated, re-examine state */
247237263Snp
248237263Snp		/* Fall through */
249237263Snp
250178302Skmacy	case L2T_STATE_VALID:     /* fast-path, send the packet on */
251237263Snp
252237263Snp		return (t3_offload_tx(sc, m));
253237263Snp
254178302Skmacy	case L2T_STATE_RESOLVING:
255178302Skmacy		mtx_lock(&e->lock);
256237263Snp		if (e->state != L2T_STATE_RESOLVING) {
257178302Skmacy			mtx_unlock(&e->lock);
258178302Skmacy			goto again;
259178302Skmacy		}
260237263Snp		arpq_enqueue(e, m);
261178302Skmacy		mtx_unlock(&e->lock);
262178302Skmacy
263237263Snp		if (resolve_entry(sc, e) == EWOULDBLOCK)
264237263Snp			break;
265237263Snp
266237263Snp		mtx_lock(&e->lock);
267237263Snp		if (e->state == L2T_STATE_VALID && e->arpq_head)
268237263Snp			setup_l2e_send_pending(sc, e);
269237263Snp		if (e->state == L2T_STATE_FAILED)
270237263Snp			resolution_failed(e);
271237263Snp		mtx_unlock(&e->lock);
272237263Snp		break;
273237263Snp
274237263Snp	case L2T_STATE_FAILED:
275237263Snp		resolution_failed_mbuf(m);
276237263Snp		return (EHOSTUNREACH);
277178302Skmacy	}
278237263Snp
279237263Snp	return (0);
280178302Skmacy}
281237263Snp
282178302Skmacy/*
283178302Skmacy * Allocate a free L2T entry.  Must be called with l2t_data.lock held.
284178302Skmacy */
285178302Skmacystatic struct l2t_entry *
286178302Skmacyalloc_l2e(struct l2t_data *d)
287178302Skmacy{
288178302Skmacy	struct l2t_entry *end, *e, **p;
289178302Skmacy
290237263Snp	rw_assert(&d->lock, RA_WLOCKED);
291237263Snp
292178302Skmacy	if (!atomic_load_acq_int(&d->nfree))
293237263Snp		return (NULL);
294178302Skmacy
295178302Skmacy	/* there's definitely a free entry */
296237263Snp	for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e) {
297178302Skmacy		if (atomic_load_acq_int(&e->refcnt) == 0)
298178302Skmacy			goto found;
299237263Snp	}
300178302Skmacy
301237263Snp	for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e)
302237263Snp		continue;
303178302Skmacyfound:
304178302Skmacy	d->rover = e + 1;
305178302Skmacy	atomic_add_int(&d->nfree, -1);
306178302Skmacy
307178302Skmacy	/*
308178302Skmacy	 * The entry we found may be an inactive entry that is
309178302Skmacy	 * presently in the hash table.  We need to remove it.
310178302Skmacy	 */
311178302Skmacy	if (e->state != L2T_STATE_UNUSED) {
312237263Snp		int hash = arp_hash(e->addr, e->ifp->if_index, d);
313178302Skmacy
314237263Snp		for (p = &d->l2tab[hash].first; *p; p = &(*p)->next) {
315178302Skmacy			if (*p == e) {
316178302Skmacy				*p = e->next;
317178302Skmacy				break;
318178302Skmacy			}
319237263Snp		}
320178302Skmacy		e->state = L2T_STATE_UNUSED;
321178302Skmacy	}
322178302Skmacy
323237263Snp	return (e);
324178302Skmacy}
325178302Skmacy
326178302Skmacystruct l2t_entry *
327237263Snpt3_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
328178302Skmacy{
329237263Snp	struct tom_data *td = pi->adapter->tom_softc;
330178302Skmacy	struct l2t_entry *e;
331237263Snp	struct l2t_data *d = td->l2t;
332237263Snp	uint32_t addr = SINADDR(sa);
333237263Snp	int hash = arp_hash(addr, ifp->if_index, d);
334237263Snp	unsigned int smt_idx = pi->port_id;
335178302Skmacy
336178302Skmacy	rw_wlock(&d->lock);
337237263Snp	for (e = d->l2tab[hash].first; e; e = e->next) {
338237263Snp		if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) {
339178302Skmacy			l2t_hold(d, e);
340178302Skmacy			goto done;
341178302Skmacy		}
342237263Snp	}
343178302Skmacy
344178302Skmacy	/* Need to allocate a new entry */
345178302Skmacy	e = alloc_l2e(d);
346178302Skmacy	if (e) {
347178302Skmacy		mtx_lock(&e->lock);          /* avoid race with t3_l2t_free */
348178302Skmacy		e->next = d->l2tab[hash].first;
349178302Skmacy		d->l2tab[hash].first = e;
350237263Snp
351178302Skmacy		e->state = L2T_STATE_RESOLVING;
352178302Skmacy		e->addr = addr;
353237263Snp		e->ifp = ifp;
354178302Skmacy		e->smt_idx = smt_idx;
355178302Skmacy		atomic_store_rel_int(&e->refcnt, 1);
356237263Snp
357237263Snp		KASSERT(ifp->if_vlantrunk == NULL, ("TOE+VLAN unimplemented."));
358237263Snp		e->vlan = VLAN_NONE;
359237263Snp
360178302Skmacy		mtx_unlock(&e->lock);
361237263Snp	}
362178302Skmacy
363178302Skmacydone:
364178302Skmacy	rw_wunlock(&d->lock);
365178302Skmacy
366237263Snp	return (e);
367178302Skmacy}
368178302Skmacy
369178302Skmacyvoid
370237263Snpt3_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
371237263Snp    uint8_t *lladdr, uint16_t vtag)
372178302Skmacy{
373237263Snp	struct tom_data *td = t3_tomdata(tod);
374237263Snp	struct adapter *sc = tod->tod_softc;
375178302Skmacy	struct l2t_entry *e;
376237263Snp	struct l2t_data *d = td->l2t;
377237263Snp	u32 addr = *(u32 *) &SIN(sa)->sin_addr;
378237263Snp	int hash = arp_hash(addr, ifp->if_index, d);
379178302Skmacy
380178302Skmacy	rw_rlock(&d->lock);
381178302Skmacy	for (e = d->l2tab[hash].first; e; e = e->next)
382237263Snp		if (e->addr == addr && e->ifp == ifp) {
383178302Skmacy			mtx_lock(&e->lock);
384178302Skmacy			goto found;
385178302Skmacy		}
386178302Skmacy	rw_runlock(&d->lock);
387237263Snp
388237263Snp	/*
389237263Snp	 * This is of no interest to us.  We've never had an offloaded
390237263Snp	 * connection to this destination, and we aren't attempting one right
391237263Snp	 * now.
392237263Snp	 */
393178302Skmacy	return;
394178302Skmacy
395178302Skmacyfound:
396237263Snp	rw_runlock(&d->lock);
397178302Skmacy
398237263Snp	KASSERT(e->state != L2T_STATE_UNUSED,
399237263Snp	    ("%s: unused entry in the hash.", __func__));
400237263Snp
401237263Snp	update_entry(sc, e, lladdr, vtag);
402178302Skmacy	mtx_unlock(&e->lock);
403178302Skmacy}
404178302Skmacy
405178302Skmacystruct l2t_data *
406178302Skmacyt3_init_l2t(unsigned int l2t_capacity)
407178302Skmacy{
408178302Skmacy	struct l2t_data *d;
409178302Skmacy	int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry);
410178302Skmacy
411237263Snp	d = malloc(size, M_CXGB, M_NOWAIT | M_ZERO);
412178302Skmacy	if (!d)
413237263Snp		return (NULL);
414178302Skmacy
415178302Skmacy	d->nentries = l2t_capacity;
416178302Skmacy	d->rover = &d->l2tab[1];	/* entry 0 is not used */
417178302Skmacy	atomic_store_rel_int(&d->nfree, l2t_capacity - 1);
418178302Skmacy	rw_init(&d->lock, "L2T");
419178302Skmacy
420178302Skmacy	for (i = 0; i < l2t_capacity; ++i) {
421178302Skmacy		d->l2tab[i].idx = i;
422178302Skmacy		d->l2tab[i].state = L2T_STATE_UNUSED;
423237263Snp		mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF);
424178302Skmacy		atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
425178302Skmacy	}
426237263Snp	return (d);
427178302Skmacy}
428178302Skmacy
429178302Skmacyvoid
430178302Skmacyt3_free_l2t(struct l2t_data *d)
431178302Skmacy{
432178302Skmacy	int i;
433178302Skmacy
434178302Skmacy	rw_destroy(&d->lock);
435178302Skmacy	for (i = 0; i < d->nentries; ++i)
436178302Skmacy		mtx_destroy(&d->l2tab[i].lock);
437178302Skmacy
438237263Snp	free(d, M_CXGB);
439178302Skmacy}
440237263Snp
441237263Snpstatic int
442237263Snpdo_l2t_write_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
443237263Snp{
444237263Snp	struct cpl_l2t_write_rpl *rpl = mtod(m, void *);
445237263Snp
446237263Snp	if (rpl->status != CPL_ERR_NONE)
447237263Snp		log(LOG_ERR,
448237263Snp		       "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
449237263Snp		       rpl->status, GET_TID(rpl));
450237263Snp
451237263Snp	m_freem(m);
452237263Snp	return (0);
453237263Snp}
454237263Snp
455237263Snpvoid
456237263Snpt3_init_l2t_cpl_handlers(struct adapter *sc)
457237263Snp{
458237263Snp	t3_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl);
459237263Snp}
460237263Snp#endif
461