1/*-
2 * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
3 * Copyright (C) 2013 Intel Corporation
4 * Copyright (C) 2015 EMC Corporation
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29/*
30 * The Non-Transparent Bridge (NTB) is a device that allows you to connect
31 * two or more systems using a PCI-e links, providing remote memory access.
32 *
33 * This module contains a driver for simulated Ethernet device, using
34 * underlying NTB Transport device.
35 *
36 * NOTE: Much of the code in this module is shared with Linux. Any patches may
37 * be picked up and redistributed in Linux with a dual GPL/BSD license.
38 */
39
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD$");
42
43#include <sys/param.h>
44#include <sys/kernel.h>
45#include <sys/systm.h>
46#include <sys/buf_ring.h>
47#include <sys/bus.h>
48#include <sys/ktr.h>
49#include <sys/limits.h>
50#include <sys/module.h>
51#include <sys/socket.h>
52#include <sys/sockio.h>
53#include <sys/sysctl.h>
54#include <sys/taskqueue.h>
55
56#include <net/if.h>
57#include <net/if_media.h>
58#include <net/if_types.h>
59#include <net/if_media.h>
60#include <net/if_var.h>
61#include <net/bpf.h>
62#include <net/ethernet.h>
63
64#include <machine/bus.h>
65
66#include "../ntb_transport.h"
67
68#define KTR_NTB KTR_SPARE3
69#define NTB_MEDIATYPE		 (IFM_ETHER | IFM_AUTO | IFM_FDX)
70
71#define	NTB_CSUM_FEATURES	(CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
72#define	NTB_CSUM_FEATURES6	(CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
73#define	NTB_CSUM_SET		(CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \
74				    CSUM_PSEUDO_HDR | \
75				    CSUM_IP_CHECKED | CSUM_IP_VALID | \
76				    CSUM_SCTP_VALID)
77
78static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
79    "if_ntb");
80
81static unsigned g_if_ntb_num_queues = UINT_MAX;
82SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN,
83    &g_if_ntb_num_queues, 0, "Number of queues per interface");
84
85struct ntb_net_queue {
86	struct ntb_net_ctx	*sc;
87	if_t			 ifp;
88	struct ntb_transport_qp *qp;
89	struct buf_ring		*br;
90	struct task		 tx_task;
91	struct taskqueue	*tx_tq;
92	struct mtx		 tx_lock;
93	struct callout		 queue_full;
94};
95
96struct ntb_net_ctx {
97	if_t			 ifp;
98	struct ifmedia		 media;
99	u_char			 eaddr[ETHER_ADDR_LEN];
100	int			 num_queues;
101	struct ntb_net_queue	*queues;
102	int			 mtu;
103};
104
105static int ntb_net_probe(device_t dev);
106static int ntb_net_attach(device_t dev);
107static int ntb_net_detach(device_t dev);
108static void ntb_net_init(void *arg);
109static int ntb_ifmedia_upd(struct ifnet *);
110static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *);
111static int ntb_ioctl(if_t ifp, u_long command, caddr_t data);
112static int ntb_transmit(if_t ifp, struct mbuf *m);
113static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
114    void *data, int len);
115static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
116    void *data, int len);
117static void ntb_net_event_handler(void *data, enum ntb_link_event status);
118static void ntb_handle_tx(void *arg, int pending);
119static void ntb_qp_full(void *arg);
120static void ntb_qflush(if_t ifp);
121static void create_random_local_eui48(u_char *eaddr);
122
123static int
124ntb_net_probe(device_t dev)
125{
126
127	device_set_desc(dev, "NTB Network Interface");
128	return (0);
129}
130
131static int
132ntb_net_attach(device_t dev)
133{
134	struct ntb_net_ctx *sc = device_get_softc(dev);
135	struct ntb_net_queue *q;
136	if_t ifp;
137	struct ntb_queue_handlers handlers = { ntb_net_rx_handler,
138	    ntb_net_tx_handler, ntb_net_event_handler };
139	int i;
140
141	ifp = sc->ifp = if_gethandle(IFT_ETHER);
142	if (ifp == NULL) {
143		printf("ntb: Cannot allocate ifnet structure\n");
144		return (ENOMEM);
145	}
146	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
147	if_setdev(ifp, dev);
148
149	sc->num_queues = min(g_if_ntb_num_queues,
150	    ntb_transport_queue_count(dev));
151	sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue),
152	    M_DEVBUF, M_WAITOK | M_ZERO);
153	sc->mtu = INT_MAX;
154	for (i = 0; i < sc->num_queues; i++) {
155		q = &sc->queues[i];
156		q->sc = sc;
157		q->ifp = ifp;
158		q->qp = ntb_transport_create_queue(dev, i, &handlers, q);
159		if (q->qp == NULL)
160			break;
161		sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp));
162		mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF);
163		q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock);
164		TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q);
165		q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT,
166		    taskqueue_thread_enqueue, &q->tx_tq);
167		taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d",
168		    device_get_nameunit(dev), i);
169		callout_init(&q->queue_full, 1);
170	}
171	sc->num_queues = i;
172	device_printf(dev, "%d queue(s)\n", sc->num_queues);
173
174	if_setinitfn(ifp, ntb_net_init);
175	if_setsoftc(ifp, sc);
176	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
177	if_setioctlfn(ifp, ntb_ioctl);
178	if_settransmitfn(ifp, ntb_transmit);
179	if_setqflushfn(ifp, ntb_qflush);
180	create_random_local_eui48(sc->eaddr);
181	ether_ifattach(ifp, sc->eaddr);
182	if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 |
183	    IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
184	if_setcapenable(ifp, IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
185	if_setmtu(ifp, sc->mtu - ETHER_HDR_LEN);
186
187	ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd,
188	    ntb_ifmedia_sts);
189	ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL);
190	ifmedia_set(&sc->media, NTB_MEDIATYPE);
191
192	for (i = 0; i < sc->num_queues; i++)
193		ntb_transport_link_up(sc->queues[i].qp);
194	return (0);
195}
196
197static int
198ntb_net_detach(device_t dev)
199{
200	struct ntb_net_ctx *sc = device_get_softc(dev);
201	struct ntb_net_queue *q;
202	int i;
203
204	for (i = 0; i < sc->num_queues; i++)
205		ntb_transport_link_down(sc->queues[i].qp);
206	ether_ifdetach(sc->ifp);
207	if_free(sc->ifp);
208	ifmedia_removeall(&sc->media);
209	for (i = 0; i < sc->num_queues; i++) {
210		q = &sc->queues[i];
211		ntb_transport_free_queue(q->qp);
212		buf_ring_free(q->br, M_DEVBUF);
213		callout_drain(&q->queue_full);
214		taskqueue_drain_all(q->tx_tq);
215		mtx_destroy(&q->tx_lock);
216	}
217	free(sc->queues, M_DEVBUF);
218	return (0);
219}
220
221/* Network device interface */
222
223static void
224ntb_net_init(void *arg)
225{
226	struct ntb_net_ctx *sc = arg;
227	if_t ifp = sc->ifp;
228
229	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
230	if_setbaudrate(ifp, ntb_transport_link_speed(sc->queues[0].qp));
231	if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ?
232	    LINK_STATE_UP : LINK_STATE_DOWN);
233}
234
235static int
236ntb_ioctl(if_t ifp, u_long command, caddr_t data)
237{
238	struct ntb_net_ctx *sc = if_getsoftc(ifp);
239	struct ifreq *ifr = (struct ifreq *)data;
240	int error = 0;
241
242	switch (command) {
243	case SIOCSIFFLAGS:
244	case SIOCADDMULTI:
245	case SIOCDELMULTI:
246		break;
247
248	case SIOCSIFMTU:
249	    {
250		if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) {
251			error = EINVAL;
252			break;
253		}
254
255		if_setmtu(ifp, ifr->ifr_mtu);
256		break;
257	    }
258
259	case SIOCSIFMEDIA:
260	case SIOCGIFMEDIA:
261		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
262		break;
263
264	case SIOCSIFCAP:
265		if (ifr->ifr_reqcap & IFCAP_RXCSUM)
266			if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
267		else
268			if_setcapenablebit(ifp, 0, IFCAP_RXCSUM);
269		if (ifr->ifr_reqcap & IFCAP_TXCSUM) {
270			if_setcapenablebit(ifp, IFCAP_TXCSUM, 0);
271			if_sethwassistbits(ifp, NTB_CSUM_FEATURES, 0);
272		} else {
273			if_setcapenablebit(ifp, 0, IFCAP_TXCSUM);
274			if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES);
275		}
276		if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6)
277			if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
278		else
279			if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6);
280		if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) {
281			if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0);
282			if_sethwassistbits(ifp, NTB_CSUM_FEATURES6, 0);
283		} else {
284			if_setcapenablebit(ifp, 0, IFCAP_TXCSUM_IPV6);
285			if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES6);
286		}
287		break;
288
289	default:
290		error = ether_ioctl(ifp, command, data);
291		break;
292	}
293
294	return (error);
295}
296
297static int
298ntb_ifmedia_upd(struct ifnet *ifp)
299{
300	struct ntb_net_ctx *sc = if_getsoftc(ifp);
301	struct ifmedia *ifm = &sc->media;
302
303	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
304		return (EINVAL);
305
306	return (0);
307}
308
309static void
310ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
311{
312	struct ntb_net_ctx *sc = if_getsoftc(ifp);
313
314	ifmr->ifm_status = IFM_AVALID;
315	ifmr->ifm_active = NTB_MEDIATYPE;
316	if (ntb_transport_link_query(sc->queues[0].qp))
317		ifmr->ifm_status |= IFM_ACTIVE;
318}
319
320static void
321ntb_transmit_locked(struct ntb_net_queue *q)
322{
323	if_t ifp = q->ifp;
324	struct mbuf *m;
325	int rc, len;
326	short mflags;
327
328	CTR0(KTR_NTB, "TX: ntb_transmit_locked");
329	while ((m = drbr_peek(ifp, q->br)) != NULL) {
330		CTR1(KTR_NTB, "TX: start mbuf %p", m);
331		if_etherbpfmtap(ifp, m);
332		len = m->m_pkthdr.len;
333		mflags = m->m_flags;
334		rc = ntb_transport_tx_enqueue(q->qp, m, m, len);
335		if (rc != 0) {
336			CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc);
337			if (rc == EAGAIN) {
338				drbr_putback(ifp, q->br, m);
339				callout_reset_sbt(&q->queue_full,
340				    SBT_1MS / 4, SBT_1MS / 4,
341				    ntb_qp_full, q, 0);
342			} else {
343				m_freem(m);
344				drbr_advance(ifp, q->br);
345				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
346			}
347			break;
348		}
349		drbr_advance(ifp, q->br);
350		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
351		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
352		if (mflags & M_MCAST)
353			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
354	}
355}
356
357static int
358ntb_transmit(if_t ifp, struct mbuf *m)
359{
360	struct ntb_net_ctx *sc = if_getsoftc(ifp);
361	struct ntb_net_queue *q;
362	int error, i;
363
364	CTR0(KTR_NTB, "TX: ntb_transmit");
365	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
366		i = m->m_pkthdr.flowid % sc->num_queues;
367	else
368		i = curcpu % sc->num_queues;
369	q = &sc->queues[i];
370
371	error = drbr_enqueue(ifp, q->br, m);
372	if (error)
373		return (error);
374
375	if (mtx_trylock(&q->tx_lock)) {
376		ntb_transmit_locked(q);
377		mtx_unlock(&q->tx_lock);
378	} else
379		taskqueue_enqueue(q->tx_tq, &q->tx_task);
380	return (0);
381}
382
383static void
384ntb_handle_tx(void *arg, int pending)
385{
386	struct ntb_net_queue *q = arg;
387
388	mtx_lock(&q->tx_lock);
389	ntb_transmit_locked(q);
390	mtx_unlock(&q->tx_lock);
391}
392
393static void
394ntb_qp_full(void *arg)
395{
396	struct ntb_net_queue *q = arg;
397
398	CTR0(KTR_NTB, "TX: qp_full callout");
399	if (ntb_transport_tx_free_entry(q->qp) > 0)
400		taskqueue_enqueue(q->tx_tq, &q->tx_task);
401	else
402		callout_schedule_sbt(&q->queue_full,
403		    SBT_1MS / 4, SBT_1MS / 4, 0);
404}
405
406static void
407ntb_qflush(if_t ifp)
408{
409	struct ntb_net_ctx *sc = if_getsoftc(ifp);
410	struct ntb_net_queue *q;
411	struct mbuf *m;
412	int i;
413
414	for (i = 0; i < sc->num_queues; i++) {
415		q = &sc->queues[i];
416		mtx_lock(&q->tx_lock);
417		while ((m = buf_ring_dequeue_sc(q->br)) != NULL)
418			m_freem(m);
419		mtx_unlock(&q->tx_lock);
420	}
421	if_qflush(ifp);
422}
423
424/* Network Device Callbacks */
425static void
426ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
427    int len)
428{
429
430	m_freem(data);
431	CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data);
432}
433
434static void
435ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
436    int len)
437{
438	struct ntb_net_queue *q = qp_data;
439	struct ntb_net_ctx *sc = q->sc;
440	struct mbuf *m = data;
441	if_t ifp = q->ifp;
442	uint16_t proto;
443
444	CTR1(KTR_NTB, "RX: rx handler (%d)", len);
445	if (len < 0) {
446		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
447		return;
448	}
449
450	m->m_pkthdr.rcvif = ifp;
451	if (sc->num_queues > 1) {
452		m->m_pkthdr.flowid = q - sc->queues;
453		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
454	}
455	if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
456		m_copydata(m, 12, 2, (void *)&proto);
457		switch (ntohs(proto)) {
458		case ETHERTYPE_IP:
459			if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
460				m->m_pkthdr.csum_data = 0xffff;
461				m->m_pkthdr.csum_flags = NTB_CSUM_SET;
462			}
463			break;
464		case ETHERTYPE_IPV6:
465			if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) {
466				m->m_pkthdr.csum_data = 0xffff;
467				m->m_pkthdr.csum_flags = NTB_CSUM_SET;
468			}
469			break;
470		}
471	}
472	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
473	if_input(ifp, m);
474}
475
476static void
477ntb_net_event_handler(void *data, enum ntb_link_event status)
478{
479	struct ntb_net_queue *q = data;
480
481	if_setbaudrate(q->ifp, ntb_transport_link_speed(q->qp));
482	if_link_state_change(q->ifp, (status == NTB_LINK_UP) ? LINK_STATE_UP :
483	    LINK_STATE_DOWN);
484}
485
486/* Helper functions */
487/* TODO: This too should really be part of the kernel */
488#define EUI48_MULTICAST			1 << 0
489#define EUI48_LOCALLY_ADMINISTERED	1 << 1
490static void
491create_random_local_eui48(u_char *eaddr)
492{
493	static uint8_t counter = 0;
494
495	eaddr[0] = EUI48_LOCALLY_ADMINISTERED;
496	arc4rand(&eaddr[1], 4, 0);
497	eaddr[5] = counter++;
498}
499
500static device_method_t ntb_net_methods[] = {
501	/* Device interface */
502	DEVMETHOD(device_probe,     ntb_net_probe),
503	DEVMETHOD(device_attach,    ntb_net_attach),
504	DEVMETHOD(device_detach,    ntb_net_detach),
505	DEVMETHOD_END
506};
507
508devclass_t ntb_net_devclass;
509static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods,
510    sizeof(struct ntb_net_ctx));
511DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, ntb_net_devclass,
512    NULL, NULL);
513MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1);
514MODULE_VERSION(if_ntb, 1);
515