1222509Snp/*-
2237263Snp * Copyright (c) 2012 Chelsio Communications, Inc.
3222509Snp * All rights reserved.
4222509Snp *
5222509Snp * Redistribution and use in source and binary forms, with or without
6222509Snp * modification, are permitted provided that the following conditions
7222509Snp * are met:
8222509Snp * 1. Redistributions of source code must retain the above copyright
9222509Snp *    notice, this list of conditions and the following disclaimer.
10222509Snp * 2. Redistributions in binary form must reproduce the above copyright
11222509Snp *    notice, this list of conditions and the following disclaimer in the
12222509Snp *    documentation and/or other materials provided with the distribution.
13222509Snp *
14222509Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15222509Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16222509Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17222509Snp * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18222509Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19222509Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20222509Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21222509Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22222509Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23222509Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24222509Snp * SUCH DAMAGE.
25222509Snp */
26222509Snp#include <sys/cdefs.h>
27222509Snp__FBSDID("$FreeBSD: stable/11/sys/dev/cxgbe/t4_l2t.c 346915 2019-04-29 19:23:27Z np $");
28222509Snp
29222509Snp#include "opt_inet.h"
30237819Snp#include "opt_inet6.h"
31222509Snp
32222509Snp#include <sys/param.h>
33257241Sglebius#include <sys/eventhandler.h>
34222509Snp#include <sys/systm.h>
35222509Snp#include <sys/kernel.h>
36222509Snp#include <sys/module.h>
37222509Snp#include <sys/bus.h>
38222509Snp#include <sys/lock.h>
39222509Snp#include <sys/mutex.h>
40222509Snp#include <sys/rwlock.h>
41222509Snp#include <sys/socket.h>
42228561Snp#include <sys/sbuf.h>
43222509Snp#include <netinet/in.h>
44222509Snp
45222509Snp#include "common/common.h"
46222509Snp#include "common/t4_msg.h"
47222509Snp#include "t4_l2t.h"
48222509Snp
49228561Snp/*
50228561Snp * Module locking notes:  There is a RW lock protecting the L2 table as a
51228561Snp * whole plus a spinlock per L2T entry.  Entry lookups and allocations happen
52228561Snp * under the protection of the table lock, individual entry changes happen
53228561Snp * while holding that entry's spinlock.  The table lock nests outside the
54228561Snp * entry locks.  Allocations of new entries take the table lock as writers so
55228561Snp * no other lookups can happen while allocating new entries.  Entry updates
56228561Snp * take the table lock as readers so multiple entries can be updated in
57228561Snp * parallel.  An L2T entry can be dropped by decrementing its reference count
58228561Snp * and therefore can happen in parallel with entry allocation but no entry
59228561Snp * can change state or increment its ref count during allocation as both of
60228561Snp * these perform lookups.
61228561Snp *
62298955Spfg * Note: We do not take references to ifnets in this module because both
63228561Snp * the TOE and the sockets already hold references to the interfaces and the
64228561Snp * lifetime of an L2T entry is fully contained in the lifetime of the TOE.
65228561Snp */
66228561Snp
67222509Snp/*
68228561Snp * Allocate a free L2T entry.  Must be called with l2t_data.lock held.
69222509Snp */
70237263Snpstruct l2t_entry *
71237263Snpt4_alloc_l2e(struct l2t_data *d)
72228561Snp{
73228561Snp	struct l2t_entry *end, *e, **p;
74228561Snp
75228561Snp	rw_assert(&d->lock, RA_WLOCKED);
76228561Snp
77228561Snp	if (!atomic_load_acq_int(&d->nfree))
78228561Snp		return (NULL);
79228561Snp
80228561Snp	/* there's definitely a free entry */
81245434Snp	for (e = d->rover, end = &d->l2tab[d->l2t_size]; e != end; ++e)
82228561Snp		if (atomic_load_acq_int(&e->refcnt) == 0)
83228561Snp			goto found;
84228561Snp
85237263Snp	for (e = d->l2tab; atomic_load_acq_int(&e->refcnt); ++e)
86237263Snp		continue;
87228561Snpfound:
88228561Snp	d->rover = e + 1;
89228561Snp	atomic_subtract_int(&d->nfree, 1);
90228561Snp
91228561Snp	/*
92228561Snp	 * The entry we found may be an inactive entry that is
93228561Snp	 * presently in the hash table.  We need to remove it.
94228561Snp	 */
95228561Snp	if (e->state < L2T_STATE_SWITCHING) {
96228561Snp		for (p = &d->l2tab[e->hash].first; *p; p = &(*p)->next) {
97228561Snp			if (*p == e) {
98228561Snp				*p = e->next;
99228561Snp				e->next = NULL;
100228561Snp				break;
101228561Snp			}
102228561Snp		}
103228561Snp	}
104228561Snp
105228561Snp	e->state = L2T_STATE_UNUSED;
106228561Snp	return (e);
107228561Snp}
108228561Snp
109346915Snpstatic struct l2t_entry *
110346915Snpfind_or_alloc_l2e(struct l2t_data *d, uint16_t vlan, uint8_t port, uint8_t *dmac)
111346915Snp{
112346915Snp	struct l2t_entry *end, *e, **p;
113346915Snp	struct l2t_entry *first_free = NULL;
114346915Snp
115346915Snp	for (e = &d->l2tab[0], end = &d->l2tab[d->l2t_size]; e != end; ++e) {
116346915Snp		if (atomic_load_acq_int(&e->refcnt) == 0) {
117346915Snp			if (!first_free)
118346915Snp				first_free = e;
119346915Snp		} else if (e->state == L2T_STATE_SWITCHING &&
120346915Snp		    memcmp(e->dmac, dmac, ETHER_ADDR_LEN) == 0 &&
121346915Snp		    e->vlan == vlan && e->lport == port)
122346915Snp			return (e);	/* Found existing entry that matches. */
123346915Snp	}
124346915Snp
125346915Snp	if (first_free == NULL)
126346915Snp		return (NULL);	/* No match and no room for a new entry. */
127346915Snp
128346915Snp	/*
129346915Snp	 * The entry we found may be an inactive entry that is
130346915Snp	 * presently in the hash table.  We need to remove it.
131346915Snp	 */
132346915Snp	e = first_free;
133346915Snp	if (e->state < L2T_STATE_SWITCHING) {
134346915Snp		for (p = &d->l2tab[e->hash].first; *p; p = &(*p)->next) {
135346915Snp			if (*p == e) {
136346915Snp				*p = e->next;
137346915Snp				e->next = NULL;
138346915Snp				break;
139346915Snp			}
140346915Snp		}
141346915Snp	}
142346915Snp	e->state = L2T_STATE_UNUSED;
143346915Snp	return (e);
144346915Snp}
145346915Snp
146346915Snp
147228561Snp/*
148228561Snp * Write an L2T entry.  Must be called with the entry locked.
149228561Snp * The write may be synchronous or asynchronous.
150228561Snp */
151237263Snpint
152302339Snpt4_write_l2e(struct l2t_entry *e, int sync)
153228561Snp{
154302339Snp	struct sge_wrq *wrq;
155302339Snp	struct adapter *sc;
156276485Snp	struct wrq_cookie cookie;
157228561Snp	struct cpl_l2t_write_req *req;
158302339Snp	int idx;
159228561Snp
160228561Snp	mtx_assert(&e->lock, MA_OWNED);
161302339Snp	MPASS(e->wrq != NULL);
162228561Snp
163302339Snp	wrq = e->wrq;
164302339Snp	sc = wrq->adapter;
165302339Snp
166302339Snp	req = start_wrq_wr(wrq, howmany(sizeof(*req), 16), &cookie);
167276485Snp	if (req == NULL)
168228561Snp		return (ENOMEM);
169228561Snp
170302339Snp	idx = e->idx + sc->vres.l2t.start;
171228561Snp	INIT_TP_WR(req, 0);
172245434Snp	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, idx |
173302339Snp	    V_SYNC_WR(sync) | V_TID_QID(e->iqid)));
174228561Snp	req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!sync));
175245434Snp	req->l2t_idx = htons(idx);
176228561Snp	req->vlan = htons(e->vlan);
177228561Snp	memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
178228561Snp
179302339Snp	commit_wrq_wr(wrq, req, &cookie);
180228561Snp
181228561Snp	if (sync && e->state != L2T_STATE_SWITCHING)
182228561Snp		e->state = L2T_STATE_SYNC_WRITE;
183228561Snp
184228561Snp	return (0);
185228561Snp}
186228561Snp
187228561Snp/*
188228561Snp * Allocate an L2T entry for use by a switching rule.  Such need to be
189228561Snp * explicitly freed and while busy they are not on any hash chain, so normal
190228561Snp * address resolution updates do not see them.
191228561Snp */
192228561Snpstruct l2t_entry *
193346915Snpt4_l2t_alloc_switching(struct adapter *sc, uint16_t vlan, uint8_t port,
194346915Snp    uint8_t *eth_addr)
195228561Snp{
196346915Snp	struct l2t_data *d = sc->l2t;
197228561Snp	struct l2t_entry *e;
198346915Snp	int rc;
199228561Snp
200244551Snp	rw_wlock(&d->lock);
201346915Snp	e = find_or_alloc_l2e(d, vlan, port, eth_addr);
202228561Snp	if (e) {
203346915Snp		if (atomic_load_acq_int(&e->refcnt) == 0) {
204346915Snp			mtx_lock(&e->lock);    /* avoid race with t4_l2t_free */
205346915Snp			e->wrq = &sc->sge.ctrlq[0];
206346915Snp			e->iqid = sc->sge.fwq.abs_id;
207346915Snp			e->state = L2T_STATE_SWITCHING;
208346915Snp			e->vlan = vlan;
209346915Snp			e->lport = port;
210346915Snp			memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN);
211346915Snp			atomic_store_rel_int(&e->refcnt, 1);
212346915Snp			atomic_subtract_int(&d->nfree, 1);
213346915Snp			rc = t4_write_l2e(e, 0);
214346915Snp			mtx_unlock(&e->lock);
215346915Snp			if (rc != 0)
216346915Snp				e = NULL;
217346915Snp		} else {
218346915Snp			MPASS(e->vlan == vlan);
219346915Snp			MPASS(e->lport == port);
220346915Snp			atomic_add_int(&e->refcnt, 1);
221346915Snp		}
222228561Snp	}
223244551Snp	rw_wunlock(&d->lock);
224346915Snp	return (e);
225228561Snp}
226228561Snp
227228561Snpint
228228561Snpt4_init_l2t(struct adapter *sc, int flags)
229228561Snp{
230245434Snp	int i, l2t_size;
231228561Snp	struct l2t_data *d;
232228561Snp
233245434Snp	l2t_size = sc->vres.l2t.size;
234245434Snp	if (l2t_size < 2)	/* At least 1 bucket for IP and 1 for IPv6 */
235245434Snp		return (EINVAL);
236245434Snp
237245434Snp	d = malloc(sizeof(*d) + l2t_size * sizeof (struct l2t_entry), M_CXGBE,
238245434Snp	    M_ZERO | flags);
239228561Snp	if (!d)
240228561Snp		return (ENOMEM);
241228561Snp
242245434Snp	d->l2t_size = l2t_size;
243228561Snp	d->rover = d->l2tab;
244245434Snp	atomic_store_rel_int(&d->nfree, l2t_size);
245228561Snp	rw_init(&d->lock, "L2T");
246228561Snp
247245434Snp	for (i = 0; i < l2t_size; i++) {
248237263Snp		struct l2t_entry *e = &d->l2tab[i];
249237263Snp
250237263Snp		e->idx = i;
251237263Snp		e->state = L2T_STATE_UNUSED;
252237263Snp		mtx_init(&e->lock, "L2T_E", NULL, MTX_DEF);
253237263Snp		STAILQ_INIT(&e->wr_list);
254237263Snp		atomic_store_rel_int(&e->refcnt, 0);
255228561Snp	}
256228561Snp
257228561Snp	sc->l2t = d;
258228561Snp
259228561Snp	return (0);
260228561Snp}
261228561Snp
262228561Snpint
263228561Snpt4_free_l2t(struct l2t_data *d)
264228561Snp{
265228561Snp	int i;
266228561Snp
267245434Snp	for (i = 0; i < d->l2t_size; i++)
268228561Snp		mtx_destroy(&d->l2tab[i].lock);
269228561Snp	rw_destroy(&d->lock);
270228561Snp	free(d, M_CXGBE);
271228561Snp
272228561Snp	return (0);
273228561Snp}
274228561Snp
275237263Snpint
276237263Snpdo_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss,
277237263Snp    struct mbuf *m)
278237263Snp{
279237263Snp	const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
280237263Snp	unsigned int tid = GET_TID(rpl);
281245434Snp	unsigned int idx = tid % L2T_SIZE;
282237263Snp
283237263Snp	if (__predict_false(rpl->status != CPL_ERR_NONE)) {
284237263Snp		log(LOG_ERR,
285245434Snp		    "Unexpected L2T_WRITE_RPL (%u) for entry at hw_idx %u\n",
286237263Snp		    rpl->status, idx);
287237263Snp		return (EINVAL);
288237263Snp	}
289237263Snp
290237263Snp	return (0);
291237263Snp}
292237263Snp
293222509Snpstatic inline unsigned int
294222509Snpvlan_prio(const struct l2t_entry *e)
295222509Snp{
296222509Snp	return e->vlan >> 13;
297222509Snp}
298222509Snp
299228561Snpstatic char
300228561Snpl2e_state(const struct l2t_entry *e)
301228561Snp{
302228561Snp	switch (e->state) {
303228561Snp	case L2T_STATE_VALID: return 'V';  /* valid, fast-path entry */
304228561Snp	case L2T_STATE_STALE: return 'S';  /* needs revalidation, but usable */
305228561Snp	case L2T_STATE_SYNC_WRITE: return 'W';
306237263Snp	case L2T_STATE_RESOLVING: return STAILQ_EMPTY(&e->wr_list) ? 'R' : 'A';
307228561Snp	case L2T_STATE_SWITCHING: return 'X';
308228561Snp	default: return 'U';
309228561Snp	}
310228561Snp}
311228561Snp
312228561Snpint
313228561Snpsysctl_l2t(SYSCTL_HANDLER_ARGS)
314228561Snp{
315228561Snp	struct adapter *sc = arg1;
316228561Snp	struct l2t_data *l2t = sc->l2t;
317228561Snp	struct l2t_entry *e;
318228561Snp	struct sbuf *sb;
319228561Snp	int rc, i, header = 0;
320245434Snp	char ip[INET6_ADDRSTRLEN];
321228561Snp
322228561Snp	if (l2t == NULL)
323228561Snp		return (ENXIO);
324228561Snp
325228561Snp	rc = sysctl_wire_old_buffer(req, 0);
326228561Snp	if (rc != 0)
327228561Snp		return (rc);
328228561Snp
329228561Snp	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
330228561Snp	if (sb == NULL)
331228561Snp		return (ENOMEM);
332228561Snp
333228561Snp	e = &l2t->l2tab[0];
334245434Snp	for (i = 0; i < l2t->l2t_size; i++, e++) {
335228561Snp		mtx_lock(&e->lock);
336228561Snp		if (e->state == L2T_STATE_UNUSED)
337228561Snp			goto skip;
338228561Snp
339228561Snp		if (header == 0) {
340228561Snp			sbuf_printf(sb, " Idx IP address      "
341228561Snp			    "Ethernet address  VLAN/P LP State Users Port");
342228561Snp			header = 1;
343228561Snp		}
344237263Snp		if (e->state == L2T_STATE_SWITCHING)
345228561Snp			ip[0] = 0;
346245434Snp		else {
347245434Snp			inet_ntop(e->ipv6 ? AF_INET6 : AF_INET, &e->addr[0],
348245434Snp			    &ip[0], sizeof(ip));
349245434Snp		}
350228561Snp
351245434Snp		/*
352245434Snp		 * XXX: IPv6 addresses may not align properly in the output.
353245434Snp		 */
354228561Snp		sbuf_printf(sb, "\n%4u %-15s %02x:%02x:%02x:%02x:%02x:%02x %4d"
355228561Snp			   " %u %2u   %c   %5u %s",
356228561Snp			   e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2],
357228561Snp			   e->dmac[3], e->dmac[4], e->dmac[5],
358228561Snp			   e->vlan & 0xfff, vlan_prio(e), e->lport,
359228561Snp			   l2e_state(e), atomic_load_acq_int(&e->refcnt),
360302313Snp			   e->ifp ? e->ifp->if_xname : "-");
361228561Snpskip:
362228561Snp		mtx_unlock(&e->lock);
363228561Snp	}
364228561Snp
365228561Snp	rc = sbuf_finish(sb);
366228561Snp	sbuf_delete(sb);
367228561Snp
368228561Snp	return (rc);
369228561Snp}
370