1/*-
2 * Copyright (c) 2012 Chelsio Communications, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include "opt_inet.h"
31
32#ifdef TCP_OFFLOAD
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/module.h>
37#include <sys/bus.h>
38#include <sys/socket.h>
39#include <net/if.h>
40#include <net/ethernet.h>
41#include <net/if_vlan_var.h>
42#include <netinet/in.h>
43#include <netinet/toecore.h>
44
45#include "cxgb_include.h"
46#include "ulp/tom/cxgb_tom.h"
47#include "ulp/tom/cxgb_l2t.h"
48
49#define VLAN_NONE	0xfff
50#define SA(x)		((struct sockaddr *)(x))
51#define SIN(x)		((struct sockaddr_in *)(x))
52#define SINADDR(x)	(SIN(x)->sin_addr.s_addr)
53
54/*
55 * Module locking notes:  There is a RW lock protecting the L2 table as a
56 * whole plus a mutex per L2T entry.  Entry lookups and allocations happen
57 * under the protection of the table lock, individual entry changes happen
58 * while holding that entry's mutex.  The table lock nests outside the
59 * entry locks.  Allocations of new entries take the table lock as writers so
60 * no other lookups can happen while allocating new entries.  Entry updates
61 * take the table lock as readers so multiple entries can be updated in
62 * parallel.  An L2T entry can be dropped by decrementing its reference count
63 * and therefore can happen in parallel with entry allocation but no entry
64 * can change state or increment its ref count during allocation as both of
65 * these perform lookups.
66 *
67 * When acquiring multiple locks, the order is llentry -> L2 table -> L2 entry.
68 */
69
70static inline unsigned int
71arp_hash(u32 key, int ifindex, const struct l2t_data *d)
72{
73	return jhash_2words(key, ifindex, 0) & (d->nentries - 1);
74}
75
76/*
77 * Set up an L2T entry and send any packets waiting in the arp queue.  Must be
78 * called with the entry locked.
79 */
80static int
81setup_l2e_send_pending(struct adapter *sc, struct l2t_entry *e)
82{
83	struct mbuf *m;
84	struct cpl_l2t_write_req *req;
85	struct port_info *pi = &sc->port[e->smt_idx];	/* smt_idx is port_id */
86
87	mtx_assert(&e->lock, MA_OWNED);
88
89	m = M_GETHDR_OFLD(pi->first_qset, CPL_PRIORITY_CONTROL, req);
90	if (m == NULL) {
91		log(LOG_ERR, "%s: no mbuf, can't setup L2 entry at index %d\n",
92		    __func__, e->idx);
93		return (ENOMEM);
94	}
95
96	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
97	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx));
98	req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) |
99	    V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) |
100	    V_L2T_W_PRIO(EVL_PRIOFTAG(e->vlan)));
101	memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
102
103	t3_offload_tx(sc, m);
104
105	/*
106	 * XXX: We used pi->first_qset to send the L2T_WRITE_REQ.  If any mbuf
107	 * on the arpq is going out via another queue set associated with the
108	 * port then it has a bad race with the L2T_WRITE_REQ.  Ideally we
109	 * should wait till the reply to the write before draining the arpq.
110	 */
111	while (e->arpq_head) {
112		m = e->arpq_head;
113		e->arpq_head = m->m_next;
114		m->m_next = NULL;
115		t3_offload_tx(sc, m);
116	}
117	e->arpq_tail = NULL;
118
119	return (0);
120}
121
122/*
123 * Add a packet to the an L2T entry's queue of packets awaiting resolution.
124 * Must be called with the entry's lock held.
125 */
126static inline void
127arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
128{
129	mtx_assert(&e->lock, MA_OWNED);
130
131	m->m_next = NULL;
132	if (e->arpq_head)
133		e->arpq_tail->m_next = m;
134	else
135		e->arpq_head = m;
136	e->arpq_tail = m;
137}
138
139static void
140resolution_failed_mbuf(struct mbuf *m)
141{
142	log(LOG_ERR, "%s: leaked mbuf %p, CPL at %p",
143	    __func__, m, mtod(m, void *));
144}
145
146static void
147resolution_failed(struct l2t_entry *e)
148{
149	struct mbuf *m;
150
151	mtx_assert(&e->lock, MA_OWNED);
152
153	while (e->arpq_head) {
154		m = e->arpq_head;
155		e->arpq_head = m->m_next;
156		m->m_next = NULL;
157		resolution_failed_mbuf(m);
158	}
159	e->arpq_tail = NULL;
160}
161
162static void
163update_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr,
164    uint16_t vtag)
165{
166
167	mtx_assert(&e->lock, MA_OWNED);
168
169	/*
170	 * The entry may be in active use (e->refcount > 0) or not.  We update
171	 * it even when it's not as this simplifies the case where we decide to
172	 * reuse the entry later.
173	 */
174
175	if (lladdr == NULL &&
176	    (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) {
177		/*
178		 * Never got a valid L2 address for this one.  Just mark it as
179		 * failed instead of removing it from the hash (for which we'd
180		 * need to wlock the table).
181		 */
182		e->state = L2T_STATE_FAILED;
183		resolution_failed(e);
184		return;
185
186	} else if (lladdr == NULL) {
187
188		/* Valid or already-stale entry was deleted (or expired) */
189
190		KASSERT(e->state == L2T_STATE_VALID ||
191		    e->state == L2T_STATE_STALE,
192		    ("%s: lladdr NULL, state %d", __func__, e->state));
193
194		e->state = L2T_STATE_STALE;
195
196	} else {
197
198		if (e->state == L2T_STATE_RESOLVING ||
199		    e->state == L2T_STATE_FAILED ||
200		    memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) {
201
202			/* unresolved -> resolved; or dmac changed */
203
204			memcpy(e->dmac, lladdr, ETHER_ADDR_LEN);
205			e->vlan = vtag;
206			setup_l2e_send_pending(sc, e);
207		}
208		e->state = L2T_STATE_VALID;
209	}
210}
211
212static int
213resolve_entry(struct adapter *sc, struct l2t_entry *e)
214{
215	struct tom_data *td = sc->tom_softc;
216	struct toedev *tod = &td->tod;
217	struct sockaddr_in sin = {0};
218	uint8_t dmac[ETHER_HDR_LEN];
219	uint16_t vtag = EVL_VLID_MASK;
220	int rc;
221
222	sin.sin_family = AF_INET;
223	sin.sin_len = sizeof(struct sockaddr_in);
224	SINADDR(&sin) = e->addr;
225
226	rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag);
227	if (rc == EWOULDBLOCK)
228		return (rc);
229
230	mtx_lock(&e->lock);
231	update_entry(sc, e, rc == 0 ? dmac : NULL, vtag);
232	mtx_unlock(&e->lock);
233
234	return (rc);
235}
236
237int
238t3_l2t_send_slow(struct adapter *sc, struct mbuf *m, struct l2t_entry *e)
239{
240
241again:
242	switch (e->state) {
243	case L2T_STATE_STALE:     /* entry is stale, kick off revalidation */
244
245		if (resolve_entry(sc, e) != EWOULDBLOCK)
246			goto again;	/* entry updated, re-examine state */
247
248		/* Fall through */
249
250	case L2T_STATE_VALID:     /* fast-path, send the packet on */
251
252		return (t3_offload_tx(sc, m));
253
254	case L2T_STATE_RESOLVING:
255		mtx_lock(&e->lock);
256		if (e->state != L2T_STATE_RESOLVING) {
257			mtx_unlock(&e->lock);
258			goto again;
259		}
260		arpq_enqueue(e, m);
261		mtx_unlock(&e->lock);
262
263		if (resolve_entry(sc, e) == EWOULDBLOCK)
264			break;
265
266		mtx_lock(&e->lock);
267		if (e->state == L2T_STATE_VALID && e->arpq_head)
268			setup_l2e_send_pending(sc, e);
269		if (e->state == L2T_STATE_FAILED)
270			resolution_failed(e);
271		mtx_unlock(&e->lock);
272		break;
273
274	case L2T_STATE_FAILED:
275		resolution_failed_mbuf(m);
276		return (EHOSTUNREACH);
277	}
278
279	return (0);
280}
281
282/*
283 * Allocate a free L2T entry.  Must be called with l2t_data.lock held.
284 */
285static struct l2t_entry *
286alloc_l2e(struct l2t_data *d)
287{
288	struct l2t_entry *end, *e, **p;
289
290	rw_assert(&d->lock, RA_WLOCKED);
291
292	if (!atomic_load_acq_int(&d->nfree))
293		return (NULL);
294
295	/* there's definitely a free entry */
296	for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e) {
297		if (atomic_load_acq_int(&e->refcnt) == 0)
298			goto found;
299	}
300
301	for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e)
302		continue;
303found:
304	d->rover = e + 1;
305	atomic_add_int(&d->nfree, -1);
306
307	/*
308	 * The entry we found may be an inactive entry that is
309	 * presently in the hash table.  We need to remove it.
310	 */
311	if (e->state != L2T_STATE_UNUSED) {
312		int hash = arp_hash(e->addr, e->ifp->if_index, d);
313
314		for (p = &d->l2tab[hash].first; *p; p = &(*p)->next) {
315			if (*p == e) {
316				*p = e->next;
317				break;
318			}
319		}
320		e->state = L2T_STATE_UNUSED;
321	}
322
323	return (e);
324}
325
326struct l2t_entry *
327t3_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
328{
329	struct tom_data *td = pi->adapter->tom_softc;
330	struct l2t_entry *e;
331	struct l2t_data *d = td->l2t;
332	uint32_t addr = SINADDR(sa);
333	int hash = arp_hash(addr, ifp->if_index, d);
334	unsigned int smt_idx = pi->port_id;
335
336	rw_wlock(&d->lock);
337	for (e = d->l2tab[hash].first; e; e = e->next) {
338		if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) {
339			l2t_hold(d, e);
340			goto done;
341		}
342	}
343
344	/* Need to allocate a new entry */
345	e = alloc_l2e(d);
346	if (e) {
347		mtx_lock(&e->lock);          /* avoid race with t3_l2t_free */
348		e->next = d->l2tab[hash].first;
349		d->l2tab[hash].first = e;
350
351		e->state = L2T_STATE_RESOLVING;
352		e->addr = addr;
353		e->ifp = ifp;
354		e->smt_idx = smt_idx;
355		atomic_store_rel_int(&e->refcnt, 1);
356
357		KASSERT(ifp->if_vlantrunk == NULL, ("TOE+VLAN unimplemented."));
358		e->vlan = VLAN_NONE;
359
360		mtx_unlock(&e->lock);
361	}
362
363done:
364	rw_wunlock(&d->lock);
365
366	return (e);
367}
368
369void
370t3_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
371    uint8_t *lladdr, uint16_t vtag)
372{
373	struct tom_data *td = t3_tomdata(tod);
374	struct adapter *sc = tod->tod_softc;
375	struct l2t_entry *e;
376	struct l2t_data *d = td->l2t;
377	u32 addr = *(u32 *) &SIN(sa)->sin_addr;
378	int hash = arp_hash(addr, ifp->if_index, d);
379
380	rw_rlock(&d->lock);
381	for (e = d->l2tab[hash].first; e; e = e->next)
382		if (e->addr == addr && e->ifp == ifp) {
383			mtx_lock(&e->lock);
384			goto found;
385		}
386	rw_runlock(&d->lock);
387
388	/*
389	 * This is of no interest to us.  We've never had an offloaded
390	 * connection to this destination, and we aren't attempting one right
391	 * now.
392	 */
393	return;
394
395found:
396	rw_runlock(&d->lock);
397
398	KASSERT(e->state != L2T_STATE_UNUSED,
399	    ("%s: unused entry in the hash.", __func__));
400
401	update_entry(sc, e, lladdr, vtag);
402	mtx_unlock(&e->lock);
403}
404
405struct l2t_data *
406t3_init_l2t(unsigned int l2t_capacity)
407{
408	struct l2t_data *d;
409	int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry);
410
411	d = malloc(size, M_CXGB, M_NOWAIT | M_ZERO);
412	if (!d)
413		return (NULL);
414
415	d->nentries = l2t_capacity;
416	d->rover = &d->l2tab[1];	/* entry 0 is not used */
417	atomic_store_rel_int(&d->nfree, l2t_capacity - 1);
418	rw_init(&d->lock, "L2T");
419
420	for (i = 0; i < l2t_capacity; ++i) {
421		d->l2tab[i].idx = i;
422		d->l2tab[i].state = L2T_STATE_UNUSED;
423		mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF);
424		atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
425	}
426	return (d);
427}
428
429void
430t3_free_l2t(struct l2t_data *d)
431{
432	int i;
433
434	rw_destroy(&d->lock);
435	for (i = 0; i < d->nentries; ++i)
436		mtx_destroy(&d->l2tab[i].lock);
437
438	free(d, M_CXGB);
439}
440
441static int
442do_l2t_write_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
443{
444	struct cpl_l2t_write_rpl *rpl = mtod(m, void *);
445
446	if (rpl->status != CPL_ERR_NONE)
447		log(LOG_ERR,
448		       "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
449		       rpl->status, GET_TID(rpl));
450
451	m_freem(m);
452	return (0);
453}
454
455void
456t3_init_l2t_cpl_handlers(struct adapter *sc)
457{
458	t3_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl);
459}
460#endif
461