1259412Sluigi/*
2260368Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
3259412Sluigi *
4259412Sluigi * Redistribution and use in source and binary forms, with or without
5259412Sluigi * modification, are permitted provided that the following conditions
6259412Sluigi * are met:
7259412Sluigi *   1. Redistributions of source code must retain the above copyright
8259412Sluigi *      notice, this list of conditions and the following disclaimer.
9259412Sluigi *   2. Redistributions in binary form must reproduce the above copyright
10259412Sluigi *      notice, this list of conditions and the following disclaimer in the
11259412Sluigi *      documentation and/or other materials provided with the distribution.
12259412Sluigi *
13259412Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14259412Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15259412Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16259412Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17259412Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18259412Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19259412Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20259412Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21259412Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22259412Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23259412Sluigi * SUCH DAMAGE.
24259412Sluigi */
25259412Sluigi
26259412Sluigi/* $FreeBSD: releng/11.0/sys/dev/netmap/netmap_freebsd.c 285699 2015-07-19 18:07:25Z luigi $ */
27285349Sluigi#include "opt_inet.h"
28285349Sluigi#include "opt_inet6.h"
29259412Sluigi
30259412Sluigi#include <sys/types.h>
31259412Sluigi#include <sys/module.h>
32259412Sluigi#include <sys/errno.h>
33259412Sluigi#include <sys/param.h>  /* defines used in kernel.h */
34261909Sluigi#include <sys/poll.h>  /* POLLIN, POLLOUT */
35259412Sluigi#include <sys/kernel.h> /* types used in module initialization */
36259412Sluigi#include <sys/conf.h>	/* DEV_MODULE */
37261909Sluigi#include <sys/endian.h>
38259412Sluigi
39259412Sluigi#include <sys/rwlock.h>
40259412Sluigi
41259412Sluigi#include <vm/vm.h>      /* vtophys */
42259412Sluigi#include <vm/pmap.h>    /* vtophys */
43259412Sluigi#include <vm/vm_param.h>
44259412Sluigi#include <vm/vm_object.h>
45259412Sluigi#include <vm/vm_page.h>
46259412Sluigi#include <vm/vm_pager.h>
47259412Sluigi#include <vm/uma.h>
48259412Sluigi
49259412Sluigi
50259412Sluigi#include <sys/malloc.h>
51259412Sluigi#include <sys/socket.h> /* sockaddrs */
52259412Sluigi#include <sys/selinfo.h>
53259412Sluigi#include <net/if.h>
54259412Sluigi#include <net/if_var.h>
55270063Sluigi#include <net/if_types.h> /* IFT_ETHER */
56270063Sluigi#include <net/ethernet.h> /* ether_ifdetach */
57270063Sluigi#include <net/if_dl.h> /* LLADDR */
58259412Sluigi#include <machine/bus.h>        /* bus_dmamap_* */
59261909Sluigi#include <netinet/in.h>		/* in6_cksum_pseudo() */
60261909Sluigi#include <machine/in_cksum.h>  /* in_pseudo(), in_cksum_hdr() */
61259412Sluigi
62259412Sluigi#include <net/netmap.h>
63259412Sluigi#include <dev/netmap/netmap_kern.h>
64259412Sluigi#include <dev/netmap/netmap_mem2.h>
65259412Sluigi
66259412Sluigi
67259412Sluigi/* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
68259412Sluigi
69267180Sluigirawsum_t
70267180Sluiginm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
71261909Sluigi{
72261909Sluigi	/* TODO XXX please use the FreeBSD implementation for this. */
73261909Sluigi	uint16_t *words = (uint16_t *)data;
74261909Sluigi	int nw = len / 2;
75261909Sluigi	int i;
76261909Sluigi
77261909Sluigi	for (i = 0; i < nw; i++)
78261909Sluigi		cur_sum += be16toh(words[i]);
79261909Sluigi
80261909Sluigi	if (len & 1)
81261909Sluigi		cur_sum += (data[len-1] << 8);
82261909Sluigi
83261909Sluigi	return cur_sum;
84261909Sluigi}
85261909Sluigi
86261909Sluigi/* Fold a raw checksum: 'cur_sum' is in host byte order, while the
87261909Sluigi * return value is in network byte order.
88261909Sluigi */
89267180Sluigiuint16_t
90267180Sluiginm_csum_fold(rawsum_t cur_sum)
91261909Sluigi{
92261909Sluigi	/* TODO XXX please use the FreeBSD implementation for this. */
93261909Sluigi	while (cur_sum >> 16)
94261909Sluigi		cur_sum = (cur_sum & 0xFFFF) + (cur_sum >> 16);
95261909Sluigi
96261909Sluigi	return htobe16((~cur_sum) & 0xFFFF);
97261909Sluigi}
98261909Sluigi
99270063Sluigiuint16_t nm_csum_ipv4(struct nm_iphdr *iph)
100261909Sluigi{
101261909Sluigi#if 0
102261909Sluigi	return in_cksum_hdr((void *)iph);
103261909Sluigi#else
104261909Sluigi	return nm_csum_fold(nm_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0));
105261909Sluigi#endif
106261909Sluigi}
107261909Sluigi
108267180Sluigivoid
109267180Sluiginm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
110261909Sluigi					size_t datalen, uint16_t *check)
111261909Sluigi{
112262238Sluigi#ifdef INET
113261909Sluigi	uint16_t pseudolen = datalen + iph->protocol;
114261909Sluigi
115261909Sluigi	/* Compute and insert the pseudo-header cheksum. */
116261909Sluigi	*check = in_pseudo(iph->saddr, iph->daddr,
117261909Sluigi				 htobe16(pseudolen));
118261909Sluigi	/* Compute the checksum on TCP/UDP header + payload
119261909Sluigi	 * (includes the pseudo-header).
120261909Sluigi	 */
121261909Sluigi	*check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
122262238Sluigi#else
123262238Sluigi	static int notsupported = 0;
124262238Sluigi	if (!notsupported) {
125262238Sluigi		notsupported = 1;
126262238Sluigi		D("inet4 segmentation not supported");
127262238Sluigi	}
128262238Sluigi#endif
129261909Sluigi}
130261909Sluigi
131267180Sluigivoid
132267180Sluiginm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
133261909Sluigi					size_t datalen, uint16_t *check)
134261909Sluigi{
135261909Sluigi#ifdef INET6
136261909Sluigi	*check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0);
137261909Sluigi	*check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
138261909Sluigi#else
139261909Sluigi	static int notsupported = 0;
140261909Sluigi	if (!notsupported) {
141261909Sluigi		notsupported = 1;
142261909Sluigi		D("inet6 segmentation not supported");
143261909Sluigi	}
144261909Sluigi#endif
145261909Sluigi}
146261909Sluigi
147261909Sluigi
148259412Sluigi/*
149259412Sluigi * Intercept the rx routine in the standard device driver.
150259412Sluigi * Second argument is non-zero to intercept, 0 to restore
151259412Sluigi */
152259412Sluigiint
153285349Sluiginetmap_catch_rx(struct netmap_generic_adapter *gna, int intercept)
154259412Sluigi{
155285349Sluigi	struct netmap_adapter *na = &gna->up.up;
156259412Sluigi	struct ifnet *ifp = na->ifp;
157259412Sluigi
158259412Sluigi	if (intercept) {
159259412Sluigi		if (gna->save_if_input) {
160259412Sluigi			D("cannot intercept again");
161259412Sluigi			return EINVAL; /* already set */
162259412Sluigi		}
163259412Sluigi		gna->save_if_input = ifp->if_input;
164259412Sluigi		ifp->if_input = generic_rx_handler;
165259412Sluigi	} else {
166259412Sluigi		if (!gna->save_if_input){
167259412Sluigi			D("cannot restore");
168259412Sluigi			return EINVAL;  /* not saved */
169259412Sluigi		}
170259412Sluigi		ifp->if_input = gna->save_if_input;
171259412Sluigi		gna->save_if_input = NULL;
172259412Sluigi	}
173259412Sluigi
174259412Sluigi	return 0;
175259412Sluigi}
176259412Sluigi
177260368Sluigi
178259412Sluigi/*
179259412Sluigi * Intercept the packet steering routine in the tx path,
180259412Sluigi * so that we can decide which queue is used for an mbuf.
181259412Sluigi * Second argument is non-zero to intercept, 0 to restore.
182261909Sluigi * On freebsd we just intercept if_transmit.
183259412Sluigi */
184259412Sluigivoid
185260368Sluiginetmap_catch_tx(struct netmap_generic_adapter *gna, int enable)
186259412Sluigi{
187260368Sluigi	struct netmap_adapter *na = &gna->up.up;
188285349Sluigi	struct ifnet *ifp = netmap_generic_getifp(gna);
189260368Sluigi
190259412Sluigi	if (enable) {
191260368Sluigi		na->if_transmit = ifp->if_transmit;
192260368Sluigi		ifp->if_transmit = netmap_transmit;
193259412Sluigi	} else {
194260368Sluigi		ifp->if_transmit = na->if_transmit;
195259412Sluigi	}
196259412Sluigi}
197259412Sluigi
198260368Sluigi
199261909Sluigi/*
200261909Sluigi * Transmit routine used by generic_netmap_txsync(). Returns 0 on success
201259412Sluigi * and non-zero on error (which may be packet drops or other errors).
202259412Sluigi * addr and len identify the netmap buffer, m is the (preallocated)
203259412Sluigi * mbuf to use for transmissions.
204259412Sluigi *
205259412Sluigi * We should add a reference to the mbuf so the m_freem() at the end
206259412Sluigi * of the transmission does not consume resources.
207259412Sluigi *
208259412Sluigi * On FreeBSD, and on multiqueue cards, we can force the queue using
209275358Shselasky *      if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
210259412Sluigi *              i = m->m_pkthdr.flowid % adapter->num_queues;
211259412Sluigi *      else
212259412Sluigi *              i = curcpu % adapter->num_queues;
213259412Sluigi *
214259412Sluigi */
215259412Sluigiint
216259412Sluigigeneric_xmit_frame(struct ifnet *ifp, struct mbuf *m,
217259412Sluigi	void *addr, u_int len, u_int ring_nr)
218259412Sluigi{
219259412Sluigi	int ret;
220259412Sluigi
221267180Sluigi	/*
222267180Sluigi	 * The mbuf should be a cluster from our special pool,
223267180Sluigi	 * so we do not need to do an m_copyback but just copy
224267180Sluigi	 * (and eventually, just reference the netmap buffer)
225267180Sluigi	 */
226259412Sluigi
227270063Sluigi	if (GET_MBUF_REFCNT(m) != 1) {
228267180Sluigi		D("invalid refcnt %d for %p",
229270063Sluigi			GET_MBUF_REFCNT(m), m);
230267180Sluigi		panic("in generic_xmit_frame");
231267180Sluigi	}
232267180Sluigi	// XXX the ext_size check is unnecessary if we link the netmap buf
233267180Sluigi	if (m->m_ext.ext_size < len) {
234267180Sluigi		RD(5, "size %d < len %d", m->m_ext.ext_size, len);
235267180Sluigi		len = m->m_ext.ext_size;
236267180Sluigi	}
237267180Sluigi	if (0) { /* XXX seems to have negligible benefits */
238267180Sluigi		m->m_ext.ext_buf = m->m_data = addr;
239267180Sluigi	} else {
240267180Sluigi		bcopy(addr, m->m_data, len);
241267180Sluigi	}
242267180Sluigi	m->m_len = m->m_pkthdr.len = len;
243267180Sluigi	// inc refcount. All ours, we could skip the atomic
244270063Sluigi	atomic_fetchadd_int(PNT_MBUF_REFCNT(m), 1);
245275358Shselasky	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
246259412Sluigi	m->m_pkthdr.flowid = ring_nr;
247259412Sluigi	m->m_pkthdr.rcvif = ifp; /* used for tx notification */
248260368Sluigi	ret = NA(ifp)->if_transmit(ifp, m);
249259412Sluigi	return ret;
250259412Sluigi}
251259412Sluigi
252260368Sluigi
253267170Sluigi#if __FreeBSD_version >= 1100005
254267170Sluigistruct netmap_adapter *
255267170Sluiginetmap_getna(if_t ifp)
256267170Sluigi{
257267170Sluigi	return (NA((struct ifnet *)ifp));
258267170Sluigi}
259267170Sluigi#endif /* __FreeBSD_version >= 1100005 */
260267170Sluigi
261259412Sluigi/*
262259412Sluigi * The following two functions are empty until we have a generic
263259412Sluigi * way to extract the info from the ifp
264259412Sluigi */
265259412Sluigiint
266259412Sluigigeneric_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
267259412Sluigi{
268267180Sluigi	D("called, in tx %d rx %d", *tx, *rx);
269259412Sluigi	return 0;
270259412Sluigi}
271259412Sluigi
272260368Sluigi
273259412Sluigivoid
274259412Sluigigeneric_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
275259412Sluigi{
276267180Sluigi	D("called, in txq %d rxq %d", *txq, *rxq);
277261909Sluigi	*txq = netmap_generic_rings;
278261909Sluigi	*rxq = netmap_generic_rings;
279259412Sluigi}
280259412Sluigi
281260368Sluigi
282267180Sluigivoid
283270063Sluiginetmap_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na)
284259412Sluigi{
285259412Sluigi	ND("called");
286261909Sluigi	mit->mit_pending = 0;
287270063Sluigi	mit->mit_ring_idx = idx;
288261909Sluigi	mit->mit_na = na;
289259412Sluigi}
290259412Sluigi
291259412Sluigi
292267180Sluigivoid
293267180Sluiginetmap_mitigation_start(struct nm_generic_mit *mit)
294259412Sluigi{
295259412Sluigi	ND("called");
296259412Sluigi}
297259412Sluigi
298260368Sluigi
299267180Sluigivoid
300267180Sluiginetmap_mitigation_restart(struct nm_generic_mit *mit)
301259412Sluigi{
302259412Sluigi	ND("called");
303259412Sluigi}
304259412Sluigi
305260368Sluigi
306267180Sluigiint
307267180Sluiginetmap_mitigation_active(struct nm_generic_mit *mit)
308259412Sluigi{
309259412Sluigi	ND("called");
310259412Sluigi	return 0;
311259412Sluigi}
312259412Sluigi
313260368Sluigi
314267180Sluigivoid
315267180Sluiginetmap_mitigation_cleanup(struct nm_generic_mit *mit)
316259412Sluigi{
317259412Sluigi	ND("called");
318259412Sluigi}
319259412Sluigi
320270063Sluigistatic int
321270063Sluiginm_vi_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr)
322270063Sluigi{
323270063Sluigi	return EINVAL;
324270063Sluigi}
325260368Sluigi
326270063Sluigistatic void
327270063Sluiginm_vi_start(struct ifnet *ifp)
328270063Sluigi{
329270063Sluigi	panic("nm_vi_start() must not be called");
330270063Sluigi}
331270063Sluigi
332259412Sluigi/*
333270063Sluigi * Index manager of persistent virtual interfaces.
334270063Sluigi * It is used to decide the lowest byte of the MAC address.
335270063Sluigi * We use the same algorithm with management of bridge port index.
336270063Sluigi */
337270063Sluigi#define NM_VI_MAX	255
338270063Sluigistatic struct {
339270063Sluigi	uint8_t index[NM_VI_MAX]; /* XXX just for a reasonable number */
340270063Sluigi	uint8_t active;
341270063Sluigi	struct mtx lock;
342270063Sluigi} nm_vi_indices;
343270063Sluigi
344270063Sluigivoid
345270063Sluiginm_vi_init_index(void)
346270063Sluigi{
347270063Sluigi	int i;
348270063Sluigi	for (i = 0; i < NM_VI_MAX; i++)
349270063Sluigi		nm_vi_indices.index[i] = i;
350270063Sluigi	nm_vi_indices.active = 0;
351270063Sluigi	mtx_init(&nm_vi_indices.lock, "nm_vi_indices_lock", NULL, MTX_DEF);
352270063Sluigi}
353270063Sluigi
354270063Sluigi/* return -1 if no index available */
355270063Sluigistatic int
356270063Sluiginm_vi_get_index(void)
357270063Sluigi{
358270063Sluigi	int ret;
359270063Sluigi
360270063Sluigi	mtx_lock(&nm_vi_indices.lock);
361270063Sluigi	ret = nm_vi_indices.active == NM_VI_MAX ? -1 :
362270063Sluigi		nm_vi_indices.index[nm_vi_indices.active++];
363270063Sluigi	mtx_unlock(&nm_vi_indices.lock);
364270063Sluigi	return ret;
365270063Sluigi}
366270063Sluigi
367270063Sluigistatic void
368270063Sluiginm_vi_free_index(uint8_t val)
369270063Sluigi{
370270063Sluigi	int i, lim;
371270063Sluigi
372270063Sluigi	mtx_lock(&nm_vi_indices.lock);
373270063Sluigi	lim = nm_vi_indices.active;
374270063Sluigi	for (i = 0; i < lim; i++) {
375270063Sluigi		if (nm_vi_indices.index[i] == val) {
376270063Sluigi			/* swap index[lim-1] and j */
377270063Sluigi			int tmp = nm_vi_indices.index[lim-1];
378270063Sluigi			nm_vi_indices.index[lim-1] = val;
379270063Sluigi			nm_vi_indices.index[i] = tmp;
380270063Sluigi			nm_vi_indices.active--;
381270063Sluigi			break;
382270063Sluigi		}
383270063Sluigi	}
384270063Sluigi	if (lim == nm_vi_indices.active)
385270063Sluigi		D("funny, index %u didn't found", val);
386270063Sluigi	mtx_unlock(&nm_vi_indices.lock);
387270063Sluigi}
388270063Sluigi#undef NM_VI_MAX
389270063Sluigi
390270063Sluigi/*
391270063Sluigi * Implementation of a netmap-capable virtual interface that
392270063Sluigi * registered to the system.
393270063Sluigi * It is based on if_tap.c and ip_fw_log.c in FreeBSD 9.
394270063Sluigi *
395270063Sluigi * Note: Linux sets refcount to 0 on allocation of net_device,
396270063Sluigi * then increments it on registration to the system.
397270063Sluigi * FreeBSD sets refcount to 1 on if_alloc(), and does not
398270063Sluigi * increment this refcount on if_attach().
399270063Sluigi */
400270063Sluigiint
401270063Sluiginm_vi_persist(const char *name, struct ifnet **ret)
402270063Sluigi{
403270063Sluigi	struct ifnet *ifp;
404270063Sluigi	u_short macaddr_hi;
405270063Sluigi	uint32_t macaddr_mid;
406270063Sluigi	u_char eaddr[6];
407270063Sluigi	int unit = nm_vi_get_index(); /* just to decide MAC address */
408270063Sluigi
409270063Sluigi	if (unit < 0)
410270063Sluigi		return EBUSY;
411270063Sluigi	/*
412270063Sluigi	 * We use the same MAC address generation method with tap
413270063Sluigi	 * except for the highest octet is 00:be instead of 00:bd
414270063Sluigi	 */
415270063Sluigi	macaddr_hi = htons(0x00be); /* XXX tap + 1 */
416270063Sluigi	macaddr_mid = (uint32_t) ticks;
417270063Sluigi	bcopy(&macaddr_hi, eaddr, sizeof(short));
418270063Sluigi	bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t));
419270063Sluigi	eaddr[5] = (uint8_t)unit;
420270063Sluigi
421270063Sluigi	ifp = if_alloc(IFT_ETHER);
422270063Sluigi	if (ifp == NULL) {
423270063Sluigi		D("if_alloc failed");
424270063Sluigi		return ENOMEM;
425270063Sluigi	}
426270063Sluigi	if_initname(ifp, name, IF_DUNIT_NONE);
427270063Sluigi	ifp->if_mtu = 65536;
428270063Sluigi	ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
429270063Sluigi	ifp->if_init = (void *)nm_vi_dummy;
430270063Sluigi	ifp->if_ioctl = nm_vi_dummy;
431270063Sluigi	ifp->if_start = nm_vi_start;
432270063Sluigi	ifp->if_mtu = ETHERMTU;
433270063Sluigi	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
434270063Sluigi	ifp->if_capabilities |= IFCAP_LINKSTATE;
435270063Sluigi	ifp->if_capenable |= IFCAP_LINKSTATE;
436270063Sluigi
437270063Sluigi	ether_ifattach(ifp, eaddr);
438270063Sluigi	*ret = ifp;
439270063Sluigi	return 0;
440270063Sluigi}
441270063Sluigi/* unregister from the system and drop the final refcount */
442270063Sluigivoid
443270063Sluiginm_vi_detach(struct ifnet *ifp)
444270063Sluigi{
445270063Sluigi	nm_vi_free_index(((char *)IF_LLADDR(ifp))[5]);
446270063Sluigi	ether_ifdetach(ifp);
447270063Sluigi	if_free(ifp);
448270063Sluigi}
449270063Sluigi
450270063Sluigi/*
451259412Sluigi * In order to track whether pages are still mapped, we hook into
452259412Sluigi * the standard cdev_pager and intercept the constructor and
453259412Sluigi * destructor.
454259412Sluigi */
455259412Sluigi
456259412Sluigistruct netmap_vm_handle_t {
457259412Sluigi	struct cdev 		*dev;
458259412Sluigi	struct netmap_priv_d	*priv;
459259412Sluigi};
460259412Sluigi
461260368Sluigi
462259412Sluigistatic int
463259412Sluiginetmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
464259412Sluigi    vm_ooffset_t foff, struct ucred *cred, u_short *color)
465259412Sluigi{
466259412Sluigi	struct netmap_vm_handle_t *vmh = handle;
467261909Sluigi
468261909Sluigi	if (netmap_verbose)
469261909Sluigi		D("handle %p size %jd prot %d foff %jd",
470261909Sluigi			handle, (intmax_t)size, prot, (intmax_t)foff);
471274354Sluigi	if (color)
472274354Sluigi		*color = 0;
473259412Sluigi	dev_ref(vmh->dev);
474259412Sluigi	return 0;
475259412Sluigi}
476259412Sluigi
477259412Sluigi
478259412Sluigistatic void
479259412Sluiginetmap_dev_pager_dtor(void *handle)
480259412Sluigi{
481259412Sluigi	struct netmap_vm_handle_t *vmh = handle;
482259412Sluigi	struct cdev *dev = vmh->dev;
483259412Sluigi	struct netmap_priv_d *priv = vmh->priv;
484261909Sluigi
485261909Sluigi	if (netmap_verbose)
486261909Sluigi		D("handle %p", handle);
487259412Sluigi	netmap_dtor(priv);
488259412Sluigi	free(vmh, M_DEVBUF);
489259412Sluigi	dev_rel(dev);
490259412Sluigi}
491259412Sluigi
492260368Sluigi
493259412Sluigistatic int
494259412Sluiginetmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
495259412Sluigi	int prot, vm_page_t *mres)
496259412Sluigi{
497259412Sluigi	struct netmap_vm_handle_t *vmh = object->handle;
498259412Sluigi	struct netmap_priv_d *priv = vmh->priv;
499285349Sluigi	struct netmap_adapter *na = priv->np_na;
500259412Sluigi	vm_paddr_t paddr;
501259412Sluigi	vm_page_t page;
502259412Sluigi	vm_memattr_t memattr;
503259412Sluigi	vm_pindex_t pidx;
504259412Sluigi
505259412Sluigi	ND("object %p offset %jd prot %d mres %p",
506259412Sluigi			object, (intmax_t)offset, prot, mres);
507259412Sluigi	memattr = object->memattr;
508259412Sluigi	pidx = OFF_TO_IDX(offset);
509285349Sluigi	paddr = netmap_mem_ofstophys(na->nm_mem, offset);
510259412Sluigi	if (paddr == 0)
511259412Sluigi		return VM_PAGER_FAIL;
512259412Sluigi
513259412Sluigi	if (((*mres)->flags & PG_FICTITIOUS) != 0) {
514259412Sluigi		/*
515259412Sluigi		 * If the passed in result page is a fake page, update it with
516259412Sluigi		 * the new physical address.
517259412Sluigi		 */
518259412Sluigi		page = *mres;
519259412Sluigi		vm_page_updatefake(page, paddr, memattr);
520259412Sluigi	} else {
521259412Sluigi		/*
522259412Sluigi		 * Replace the passed in reqpage page with our own fake page and
523259412Sluigi		 * free up the all of the original pages.
524259412Sluigi		 */
525259412Sluigi#ifndef VM_OBJECT_WUNLOCK	/* FreeBSD < 10.x */
526259412Sluigi#define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK
527259412Sluigi#define VM_OBJECT_WLOCK	VM_OBJECT_LOCK
528259412Sluigi#endif /* VM_OBJECT_WUNLOCK */
529259412Sluigi
530259412Sluigi		VM_OBJECT_WUNLOCK(object);
531259412Sluigi		page = vm_page_getfake(paddr, memattr);
532259412Sluigi		VM_OBJECT_WLOCK(object);
533259412Sluigi		vm_page_lock(*mres);
534259412Sluigi		vm_page_free(*mres);
535259412Sluigi		vm_page_unlock(*mres);
536259412Sluigi		*mres = page;
537259412Sluigi		vm_page_insert(page, object, pidx);
538259412Sluigi	}
539259412Sluigi	page->valid = VM_PAGE_BITS_ALL;
540259412Sluigi	return (VM_PAGER_OK);
541259412Sluigi}
542259412Sluigi
543259412Sluigi
544259412Sluigistatic struct cdev_pager_ops netmap_cdev_pager_ops = {
545259412Sluigi	.cdev_pg_ctor = netmap_dev_pager_ctor,
546259412Sluigi	.cdev_pg_dtor = netmap_dev_pager_dtor,
547259412Sluigi	.cdev_pg_fault = netmap_dev_pager_fault,
548259412Sluigi};
549259412Sluigi
550259412Sluigi
551259412Sluigistatic int
552259412Sluiginetmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
553259412Sluigi	vm_size_t objsize,  vm_object_t *objp, int prot)
554259412Sluigi{
555259412Sluigi	int error;
556259412Sluigi	struct netmap_vm_handle_t *vmh;
557259412Sluigi	struct netmap_priv_d *priv;
558259412Sluigi	vm_object_t obj;
559259412Sluigi
560261909Sluigi	if (netmap_verbose)
561261909Sluigi		D("cdev %p foff %jd size %jd objp %p prot %d", cdev,
562261909Sluigi		    (intmax_t )*foff, (intmax_t )objsize, objp, prot);
563259412Sluigi
564259412Sluigi	vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF,
565259412Sluigi			      M_NOWAIT | M_ZERO);
566259412Sluigi	if (vmh == NULL)
567259412Sluigi		return ENOMEM;
568259412Sluigi	vmh->dev = cdev;
569259412Sluigi
570259412Sluigi	NMG_LOCK();
571259412Sluigi	error = devfs_get_cdevpriv((void**)&priv);
572259412Sluigi	if (error)
573259412Sluigi		goto err_unlock;
574285349Sluigi	if (priv->np_nifp == NULL) {
575285349Sluigi		error = EINVAL;
576285349Sluigi		goto err_unlock;
577285349Sluigi	}
578259412Sluigi	vmh->priv = priv;
579285359Sluigi	priv->np_refs++;
580259412Sluigi	NMG_UNLOCK();
581259412Sluigi
582259412Sluigi	obj = cdev_pager_allocate(vmh, OBJT_DEVICE,
583259412Sluigi		&netmap_cdev_pager_ops, objsize, prot,
584259412Sluigi		*foff, NULL);
585259412Sluigi	if (obj == NULL) {
586259412Sluigi		D("cdev_pager_allocate failed");
587259412Sluigi		error = EINVAL;
588259412Sluigi		goto err_deref;
589259412Sluigi	}
590259412Sluigi
591259412Sluigi	*objp = obj;
592259412Sluigi	return 0;
593259412Sluigi
594259412Sluigierr_deref:
595259412Sluigi	NMG_LOCK();
596285359Sluigi	priv->np_refs--;
597259412Sluigierr_unlock:
598259412Sluigi	NMG_UNLOCK();
599259412Sluigi// err:
600259412Sluigi	free(vmh, M_DEVBUF);
601259412Sluigi	return error;
602259412Sluigi}
603259412Sluigi
604285349Sluigi/*
605285359Sluigi * On FreeBSD the close routine is only called on the last close on
606285359Sluigi * the device (/dev/netmap) so we cannot do anything useful.
607285359Sluigi * To track close() on individual file descriptors we pass netmap_dtor() to
608285349Sluigi * devfs_set_cdevpriv() on open(). The FreeBSD kernel will call the destructor
609285349Sluigi * when the last fd pointing to the device is closed.
610285349Sluigi *
611285359Sluigi * Note that FreeBSD does not even munmap() on close() so we also have
612285359Sluigi * to track mmap() ourselves, and postpone the call to
613285349Sluigi * netmap_dtor() is called when the process has no open fds and no active
614285349Sluigi * memory maps on /dev/netmap, as in linux.
615285349Sluigi */
616259412Sluigistatic int
617259412Sluiginetmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
618259412Sluigi{
619259412Sluigi	if (netmap_verbose)
620259412Sluigi		D("dev %p fflag 0x%x devtype %d td %p",
621259412Sluigi			dev, fflag, devtype, td);
622259412Sluigi	return 0;
623259412Sluigi}
624259412Sluigi
625259412Sluigi
626259412Sluigistatic int
627259412Sluiginetmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
628259412Sluigi{
629259412Sluigi	struct netmap_priv_d *priv;
630259412Sluigi	int error;
631259412Sluigi
632259412Sluigi	(void)dev;
633259412Sluigi	(void)oflags;
634259412Sluigi	(void)devtype;
635259412Sluigi	(void)td;
636259412Sluigi
637259412Sluigi	priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
638259412Sluigi			      M_NOWAIT | M_ZERO);
639259412Sluigi	if (priv == NULL)
640259412Sluigi		return ENOMEM;
641285445Sluigi	priv->np_refs = 1;
642259412Sluigi	error = devfs_set_cdevpriv(priv, netmap_dtor);
643285359Sluigi	if (error) {
644285359Sluigi		free(priv, M_DEVBUF);
645285699Sluigi	} else {
646285699Sluigi		NMG_LOCK();
647285699Sluigi		netmap_use_count++;
648285699Sluigi		NMG_UNLOCK();
649285359Sluigi	}
650285359Sluigi	return error;
651259412Sluigi}
652259412Sluigi
653261909Sluigi/******************** kqueue support ****************/
654259412Sluigi
655261909Sluigi/*
656261909Sluigi * The OS_selwakeup also needs to issue a KNOTE_UNLOCKED.
657261909Sluigi * We use a non-zero argument to distinguish the call from the one
658261909Sluigi * in kevent_scan() which instead also needs to run netmap_poll().
659261909Sluigi * The knote uses a global mutex for the time being. We might
660261909Sluigi * try to reuse the one in the si, but it is not allocated
661261909Sluigi * permanently so it might be a bit tricky.
662261909Sluigi *
663261909Sluigi * The *kqfilter function registers one or another f_event
664261909Sluigi * depending on read or write mode.
665261909Sluigi * In the call to f_event() td_fpop is NULL so any child function
666261909Sluigi * calling devfs_get_cdevpriv() would fail - and we need it in
667261909Sluigi * netmap_poll(). As a workaround we store priv into kn->kn_hook
668261909Sluigi * and pass it as first argument to netmap_poll(), which then
669261909Sluigi * uses the failure to tell that we are called from f_event()
670261909Sluigi * and do not need the selrecord().
671261909Sluigi */
672261909Sluigi
673261909Sluigi
674261909Sluigivoid
675274459Sluigifreebsd_selwakeup(struct nm_selinfo *si, int pri)
676261909Sluigi{
677261909Sluigi	if (netmap_verbose)
678274459Sluigi		D("on knote %p", &si->si.si_note);
679274459Sluigi	selwakeuppri(&si->si, pri);
680261909Sluigi	/* use a non-zero hint to tell the notification from the
681261909Sluigi	 * call done in kqueue_scan() which uses 0
682261909Sluigi	 */
683274459Sluigi	KNOTE_UNLOCKED(&si->si.si_note, 0x100 /* notification */);
684261909Sluigi}
685261909Sluigi
686261909Sluigistatic void
687261909Sluiginetmap_knrdetach(struct knote *kn)
688261909Sluigi{
689261909Sluigi	struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
690285349Sluigi	struct selinfo *si = &priv->np_si[NR_RX]->si;
691261909Sluigi
692261909Sluigi	D("remove selinfo %p", si);
693261909Sluigi	knlist_remove(&si->si_note, kn, 0);
694261909Sluigi}
695261909Sluigi
696261909Sluigistatic void
697261909Sluiginetmap_knwdetach(struct knote *kn)
698261909Sluigi{
699261909Sluigi	struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
700285349Sluigi	struct selinfo *si = &priv->np_si[NR_TX]->si;
701261909Sluigi
702261909Sluigi	D("remove selinfo %p", si);
703261909Sluigi	knlist_remove(&si->si_note, kn, 0);
704261909Sluigi}
705261909Sluigi
706261909Sluigi/*
707261909Sluigi * callback from notifies (generated externally) and our
708261909Sluigi * calls to kevent(). The former we just return 1 (ready)
709261909Sluigi * since we do not know better.
710261909Sluigi * In the latter we call netmap_poll and return 0/1 accordingly.
711261909Sluigi */
712261909Sluigistatic int
713261909Sluiginetmap_knrw(struct knote *kn, long hint, int events)
714261909Sluigi{
715261909Sluigi	struct netmap_priv_d *priv;
716261909Sluigi	int revents;
717261909Sluigi
718261909Sluigi	if (hint != 0) {
719261909Sluigi		ND(5, "call from notify");
720261909Sluigi		return 1; /* assume we are ready */
721261909Sluigi	}
722261909Sluigi	priv = kn->kn_hook;
723261909Sluigi	/* the notification may come from an external thread,
724261909Sluigi	 * in which case we do not want to run the netmap_poll
725261909Sluigi	 * This should be filtered above, but check just in case.
726261909Sluigi	 */
727261909Sluigi	if (curthread != priv->np_td) { /* should not happen */
728261909Sluigi		RD(5, "curthread changed %p %p", curthread, priv->np_td);
729261909Sluigi		return 1;
730261909Sluigi	} else {
731261909Sluigi		revents = netmap_poll((void *)priv, events, curthread);
732261909Sluigi		return (events & revents) ? 1 : 0;
733261909Sluigi	}
734261909Sluigi}
735261909Sluigi
736261909Sluigistatic int
737261909Sluiginetmap_knread(struct knote *kn, long hint)
738261909Sluigi{
739261909Sluigi	return netmap_knrw(kn, hint, POLLIN);
740261909Sluigi}
741261909Sluigi
742261909Sluigistatic int
743261909Sluiginetmap_knwrite(struct knote *kn, long hint)
744261909Sluigi{
745261909Sluigi	return netmap_knrw(kn, hint, POLLOUT);
746261909Sluigi}
747261909Sluigi
748261909Sluigistatic struct filterops netmap_rfiltops = {
749261909Sluigi	.f_isfd = 1,
750261909Sluigi	.f_detach = netmap_knrdetach,
751261909Sluigi	.f_event = netmap_knread,
752261909Sluigi};
753261909Sluigi
754261909Sluigistatic struct filterops netmap_wfiltops = {
755261909Sluigi	.f_isfd = 1,
756261909Sluigi	.f_detach = netmap_knwdetach,
757261909Sluigi	.f_event = netmap_knwrite,
758261909Sluigi};
759261909Sluigi
760261909Sluigi
761261909Sluigi/*
762261909Sluigi * This is called when a thread invokes kevent() to record
763261909Sluigi * a change in the configuration of the kqueue().
764261909Sluigi * The 'priv' should be the same as in the netmap device.
765261909Sluigi */
766261909Sluigistatic int
767261909Sluiginetmap_kqfilter(struct cdev *dev, struct knote *kn)
768261909Sluigi{
769261909Sluigi	struct netmap_priv_d *priv;
770261909Sluigi	int error;
771261909Sluigi	struct netmap_adapter *na;
772274459Sluigi	struct nm_selinfo *si;
773261909Sluigi	int ev = kn->kn_filter;
774261909Sluigi
775261909Sluigi	if (ev != EVFILT_READ && ev != EVFILT_WRITE) {
776261909Sluigi		D("bad filter request %d", ev);
777261909Sluigi		return 1;
778261909Sluigi	}
779261909Sluigi	error = devfs_get_cdevpriv((void**)&priv);
780261909Sluigi	if (error) {
781261909Sluigi		D("device not yet setup");
782261909Sluigi		return 1;
783261909Sluigi	}
784261909Sluigi	na = priv->np_na;
785261909Sluigi	if (na == NULL) {
786261909Sluigi		D("no netmap adapter for this file descriptor");
787261909Sluigi		return 1;
788261909Sluigi	}
789261909Sluigi	/* the si is indicated in the priv */
790285349Sluigi	si = priv->np_si[(ev == EVFILT_WRITE) ? NR_TX : NR_RX];
791261909Sluigi	// XXX lock(priv) ?
792261909Sluigi	kn->kn_fop = (ev == EVFILT_WRITE) ?
793261909Sluigi		&netmap_wfiltops : &netmap_rfiltops;
794261909Sluigi	kn->kn_hook = priv;
795274459Sluigi	knlist_add(&si->si.si_note, kn, 1);
796261909Sluigi	// XXX unlock(priv)
797261909Sluigi	ND("register %p %s td %p priv %p kn %p np_nifp %p kn_fp/fpop %s",
798261909Sluigi		na, na->ifp->if_xname, curthread, priv, kn,
799261909Sluigi		priv->np_nifp,
800261909Sluigi		kn->kn_fp == curthread->td_fpop ? "match" : "MISMATCH");
801261909Sluigi	return 0;
802261909Sluigi}
803261909Sluigi
804259412Sluigistruct cdevsw netmap_cdevsw = {
805259412Sluigi	.d_version = D_VERSION,
806259412Sluigi	.d_name = "netmap",
807259412Sluigi	.d_open = netmap_open,
808259412Sluigi	.d_mmap_single = netmap_mmap_single,
809259412Sluigi	.d_ioctl = netmap_ioctl,
810259412Sluigi	.d_poll = netmap_poll,
811261909Sluigi	.d_kqfilter = netmap_kqfilter,
812259412Sluigi	.d_close = netmap_close,
813259412Sluigi};
814261909Sluigi/*--- end of kqueue support ----*/
815259412Sluigi
816259412Sluigi/*
817259412Sluigi * Kernel entry point.
818259412Sluigi *
819259412Sluigi * Initialize/finalize the module and return.
820259412Sluigi *
821259412Sluigi * Return 0 on success, errno on failure.
822259412Sluigi */
823259412Sluigistatic int
824259412Sluiginetmap_loader(__unused struct module *module, int event, __unused void *arg)
825259412Sluigi{
826259412Sluigi	int error = 0;
827259412Sluigi
828259412Sluigi	switch (event) {
829259412Sluigi	case MOD_LOAD:
830259412Sluigi		error = netmap_init();
831259412Sluigi		break;
832259412Sluigi
833259412Sluigi	case MOD_UNLOAD:
834285699Sluigi		/*
835285699Sluigi		 * if some one is still using netmap,
836285699Sluigi		 * then the module can not be unloaded.
837285699Sluigi		 */
838285699Sluigi		if (netmap_use_count) {
839285699Sluigi			D("netmap module can not be unloaded - netmap_use_count: %d",
840285699Sluigi					netmap_use_count);
841285699Sluigi			error = EBUSY;
842285699Sluigi			break;
843285699Sluigi		}
844259412Sluigi		netmap_fini();
845259412Sluigi		break;
846259412Sluigi
847259412Sluigi	default:
848259412Sluigi		error = EOPNOTSUPP;
849259412Sluigi		break;
850259412Sluigi	}
851259412Sluigi
852259412Sluigi	return (error);
853259412Sluigi}
854259412Sluigi
855259412Sluigi
856259412SluigiDEV_MODULE(netmap, netmap_loader, NULL);
857279199SluigiMODULE_VERSION(netmap, 1);
858