1262152Sluigi/*
2262152Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
3262152Sluigi *
4262152Sluigi * Redistribution and use in source and binary forms, with or without
5262152Sluigi * modification, are permitted provided that the following conditions
6262152Sluigi * are met:
7262152Sluigi *   1. Redistributions of source code must retain the above copyright
8262152Sluigi *      notice, this list of conditions and the following disclaimer.
9262152Sluigi *   2. Redistributions in binary form must reproduce the above copyright
10262152Sluigi *      notice, this list of conditions and the following disclaimer in the
11262152Sluigi *      documentation and/or other materials provided with the distribution.
12262152Sluigi *
13262152Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14262152Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15262152Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16262152Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17262152Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18262152Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19262152Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20262152Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21262152Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22262152Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23262152Sluigi * SUCH DAMAGE.
24262152Sluigi */
25262152Sluigi
26262152Sluigi/* $FreeBSD$ */
27262152Sluigi
28262152Sluigi#include <sys/types.h>
29262152Sluigi#include <sys/module.h>
30262152Sluigi#include <sys/errno.h>
31262152Sluigi#include <sys/param.h>  /* defines used in kernel.h */
32262152Sluigi#include <sys/poll.h>  /* POLLIN, POLLOUT */
33262152Sluigi#include <sys/kernel.h> /* types used in module initialization */
34262152Sluigi#include <sys/conf.h>	/* DEV_MODULE */
35262152Sluigi#include <sys/endian.h>
36262152Sluigi
37262152Sluigi#include <sys/rwlock.h>
38262152Sluigi
39262152Sluigi#include <vm/vm.h>      /* vtophys */
40262152Sluigi#include <vm/pmap.h>    /* vtophys */
41262152Sluigi#include <vm/vm_param.h>
42262152Sluigi#include <vm/vm_object.h>
43262152Sluigi#include <vm/vm_page.h>
44262152Sluigi#include <vm/vm_pager.h>
45262152Sluigi#include <vm/uma.h>
46262152Sluigi
47262152Sluigi
48262152Sluigi#include <sys/malloc.h>
49262152Sluigi#include <sys/socket.h> /* sockaddrs */
50262152Sluigi#include <sys/selinfo.h>
51262152Sluigi#include <net/if.h>
52262152Sluigi#include <net/if_var.h>
53270252Sluigi#include <net/if_types.h> /* IFT_ETHER */
54270252Sluigi#include <net/ethernet.h> /* ether_ifdetach */
55270252Sluigi#include <net/if_dl.h> /* LLADDR */
56262152Sluigi#include <machine/bus.h>        /* bus_dmamap_* */
57262152Sluigi#include <netinet/in.h>		/* in6_cksum_pseudo() */
58262152Sluigi#include <machine/in_cksum.h>  /* in_pseudo(), in_cksum_hdr() */
59262152Sluigi
60262152Sluigi#include <net/netmap.h>
61262152Sluigi#include <dev/netmap/netmap_kern.h>
62262152Sluigi#include <dev/netmap/netmap_mem2.h>
63262152Sluigi
64262152Sluigi
65262152Sluigi/* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
66262152Sluigi
67267282Sluigirawsum_t
68267282Sluiginm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
69262152Sluigi{
70262152Sluigi	/* TODO XXX please use the FreeBSD implementation for this. */
71262152Sluigi	uint16_t *words = (uint16_t *)data;
72262152Sluigi	int nw = len / 2;
73262152Sluigi	int i;
74262152Sluigi
75262152Sluigi	for (i = 0; i < nw; i++)
76262152Sluigi		cur_sum += be16toh(words[i]);
77262152Sluigi
78262152Sluigi	if (len & 1)
79262152Sluigi		cur_sum += (data[len-1] << 8);
80262152Sluigi
81262152Sluigi	return cur_sum;
82262152Sluigi}
83262152Sluigi
84262152Sluigi/* Fold a raw checksum: 'cur_sum' is in host byte order, while the
85262152Sluigi * return value is in network byte order.
86262152Sluigi */
87267282Sluigiuint16_t
88267282Sluiginm_csum_fold(rawsum_t cur_sum)
89262152Sluigi{
90262152Sluigi	/* TODO XXX please use the FreeBSD implementation for this. */
91262152Sluigi	while (cur_sum >> 16)
92262152Sluigi		cur_sum = (cur_sum & 0xFFFF) + (cur_sum >> 16);
93262152Sluigi
94262152Sluigi	return htobe16((~cur_sum) & 0xFFFF);
95262152Sluigi}
96262152Sluigi
97270252Sluigiuint16_t nm_csum_ipv4(struct nm_iphdr *iph)
98262152Sluigi{
99262152Sluigi#if 0
100262152Sluigi	return in_cksum_hdr((void *)iph);
101262152Sluigi#else
102262152Sluigi	return nm_csum_fold(nm_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0));
103262152Sluigi#endif
104262152Sluigi}
105262152Sluigi
106267282Sluigivoid
107267282Sluiginm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
108262152Sluigi					size_t datalen, uint16_t *check)
109262152Sluigi{
110262214Sluigi#ifdef INET
111262152Sluigi	uint16_t pseudolen = datalen + iph->protocol;
112262152Sluigi
113262152Sluigi	/* Compute and insert the pseudo-header cheksum. */
114262152Sluigi	*check = in_pseudo(iph->saddr, iph->daddr,
115262152Sluigi				 htobe16(pseudolen));
116262152Sluigi	/* Compute the checksum on TCP/UDP header + payload
117262152Sluigi	 * (includes the pseudo-header).
118262152Sluigi	 */
119262152Sluigi	*check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
120262214Sluigi#else
121262214Sluigi	static int notsupported = 0;
122262214Sluigi	if (!notsupported) {
123262214Sluigi		notsupported = 1;
124262214Sluigi		D("inet4 segmentation not supported");
125262214Sluigi	}
126262214Sluigi#endif
127262152Sluigi}
128262152Sluigi
129267282Sluigivoid
130267282Sluiginm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
131262152Sluigi					size_t datalen, uint16_t *check)
132262152Sluigi{
133262152Sluigi#ifdef INET6
134262152Sluigi	*check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0);
135262152Sluigi	*check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
136262152Sluigi#else
137262152Sluigi	static int notsupported = 0;
138262152Sluigi	if (!notsupported) {
139262152Sluigi		notsupported = 1;
140262152Sluigi		D("inet6 segmentation not supported");
141262152Sluigi	}
142262152Sluigi#endif
143262152Sluigi}
144262152Sluigi
145262152Sluigi
146262152Sluigi/*
147262152Sluigi * Intercept the rx routine in the standard device driver.
148262152Sluigi * Second argument is non-zero to intercept, 0 to restore
149262152Sluigi */
150262152Sluigiint
151262152Sluiginetmap_catch_rx(struct netmap_adapter *na, int intercept)
152262152Sluigi{
153270252Sluigi	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
154262152Sluigi	struct ifnet *ifp = na->ifp;
155262152Sluigi
156262152Sluigi	if (intercept) {
157262152Sluigi		if (gna->save_if_input) {
158262152Sluigi			D("cannot intercept again");
159262152Sluigi			return EINVAL; /* already set */
160262152Sluigi		}
161262152Sluigi		gna->save_if_input = ifp->if_input;
162262152Sluigi		ifp->if_input = generic_rx_handler;
163262152Sluigi	} else {
164262152Sluigi		if (!gna->save_if_input){
165262152Sluigi			D("cannot restore");
166262152Sluigi			return EINVAL;  /* not saved */
167262152Sluigi		}
168262152Sluigi		ifp->if_input = gna->save_if_input;
169262152Sluigi		gna->save_if_input = NULL;
170262152Sluigi	}
171262152Sluigi
172262152Sluigi	return 0;
173262152Sluigi}
174262152Sluigi
175262152Sluigi
176262152Sluigi/*
177262152Sluigi * Intercept the packet steering routine in the tx path,
178262152Sluigi * so that we can decide which queue is used for an mbuf.
179262152Sluigi * Second argument is non-zero to intercept, 0 to restore.
180262152Sluigi * On freebsd we just intercept if_transmit.
181262152Sluigi */
182262152Sluigivoid
183262152Sluiginetmap_catch_tx(struct netmap_generic_adapter *gna, int enable)
184262152Sluigi{
185262152Sluigi	struct netmap_adapter *na = &gna->up.up;
186262152Sluigi	struct ifnet *ifp = na->ifp;
187262152Sluigi
188262152Sluigi	if (enable) {
189262152Sluigi		na->if_transmit = ifp->if_transmit;
190262152Sluigi		ifp->if_transmit = netmap_transmit;
191262152Sluigi	} else {
192262152Sluigi		ifp->if_transmit = na->if_transmit;
193262152Sluigi	}
194262152Sluigi}
195262152Sluigi
196262152Sluigi
197262152Sluigi/*
198262152Sluigi * Transmit routine used by generic_netmap_txsync(). Returns 0 on success
199262152Sluigi * and non-zero on error (which may be packet drops or other errors).
200262152Sluigi * addr and len identify the netmap buffer, m is the (preallocated)
201262152Sluigi * mbuf to use for transmissions.
202262152Sluigi *
203262152Sluigi * We should add a reference to the mbuf so the m_freem() at the end
204262152Sluigi * of the transmission does not consume resources.
205262152Sluigi *
206262152Sluigi * On FreeBSD, and on multiqueue cards, we can force the queue using
207281955Shiren *      if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
208262152Sluigi *              i = m->m_pkthdr.flowid % adapter->num_queues;
209262152Sluigi *      else
210262152Sluigi *              i = curcpu % adapter->num_queues;
211262152Sluigi *
212262152Sluigi */
213262152Sluigiint
214262152Sluigigeneric_xmit_frame(struct ifnet *ifp, struct mbuf *m,
215262152Sluigi	void *addr, u_int len, u_int ring_nr)
216262152Sluigi{
217262152Sluigi	int ret;
218262152Sluigi
219267282Sluigi	/*
220267282Sluigi	 * The mbuf should be a cluster from our special pool,
221267282Sluigi	 * so we do not need to do an m_copyback but just copy
222267282Sluigi	 * (and eventually, just reference the netmap buffer)
223267282Sluigi	 */
224262152Sluigi
225270252Sluigi	if (GET_MBUF_REFCNT(m) != 1) {
226267282Sluigi		D("invalid refcnt %d for %p",
227270252Sluigi			GET_MBUF_REFCNT(m), m);
228267282Sluigi		panic("in generic_xmit_frame");
229267282Sluigi	}
230267282Sluigi	// XXX the ext_size check is unnecessary if we link the netmap buf
231267282Sluigi	if (m->m_ext.ext_size < len) {
232267282Sluigi		RD(5, "size %d < len %d", m->m_ext.ext_size, len);
233267282Sluigi		len = m->m_ext.ext_size;
234267282Sluigi	}
235270252Sluigi	if (0) { /* XXX seems to have negligible benefits */
236267282Sluigi		m->m_ext.ext_buf = m->m_data = addr;
237267282Sluigi	} else {
238267282Sluigi		bcopy(addr, m->m_data, len);
239267282Sluigi	}
240267282Sluigi	m->m_len = m->m_pkthdr.len = len;
241267282Sluigi	// inc refcount. All ours, we could skip the atomic
242270252Sluigi	atomic_fetchadd_int(PNT_MBUF_REFCNT(m), 1);
243281955Shiren	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
244262152Sluigi	m->m_pkthdr.flowid = ring_nr;
245262152Sluigi	m->m_pkthdr.rcvif = ifp; /* used for tx notification */
246262152Sluigi	ret = NA(ifp)->if_transmit(ifp, m);
247262152Sluigi	return ret;
248262152Sluigi}
249262152Sluigi
250262152Sluigi
251267282Sluigi#if __FreeBSD_version >= 1100005
252267282Sluigistruct netmap_adapter *
253267282Sluiginetmap_getna(if_t ifp)
254267282Sluigi{
255267282Sluigi	return (NA((struct ifnet *)ifp));
256267282Sluigi}
257267282Sluigi#endif /* __FreeBSD_version >= 1100005 */
258267282Sluigi
259262152Sluigi/*
260262152Sluigi * The following two functions are empty until we have a generic
261262152Sluigi * way to extract the info from the ifp
262262152Sluigi */
263262152Sluigiint
264262152Sluigigeneric_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
265262152Sluigi{
266267282Sluigi	D("called, in tx %d rx %d", *tx, *rx);
267262152Sluigi	return 0;
268262152Sluigi}
269262152Sluigi
270262152Sluigi
271262152Sluigivoid
272262152Sluigigeneric_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
273262152Sluigi{
274267282Sluigi	D("called, in txq %d rxq %d", *txq, *rxq);
275262152Sluigi	*txq = netmap_generic_rings;
276262152Sluigi	*rxq = netmap_generic_rings;
277262152Sluigi}
278262152Sluigi
279262152Sluigi
280267282Sluigivoid
281270252Sluiginetmap_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na)
282262152Sluigi{
283262152Sluigi	ND("called");
284262152Sluigi	mit->mit_pending = 0;
285270252Sluigi	mit->mit_ring_idx = idx;
286262152Sluigi	mit->mit_na = na;
287262152Sluigi}
288262152Sluigi
289262152Sluigi
290267282Sluigivoid
291267282Sluiginetmap_mitigation_start(struct nm_generic_mit *mit)
292262152Sluigi{
293262152Sluigi	ND("called");
294262152Sluigi}
295262152Sluigi
296262152Sluigi
297267282Sluigivoid
298267282Sluiginetmap_mitigation_restart(struct nm_generic_mit *mit)
299262152Sluigi{
300262152Sluigi	ND("called");
301262152Sluigi}
302262152Sluigi
303262152Sluigi
304267282Sluigiint
305267282Sluiginetmap_mitigation_active(struct nm_generic_mit *mit)
306262152Sluigi{
307262152Sluigi	ND("called");
308262152Sluigi	return 0;
309262152Sluigi}
310262152Sluigi
311262152Sluigi
312267282Sluigivoid
313267282Sluiginetmap_mitigation_cleanup(struct nm_generic_mit *mit)
314262152Sluigi{
315262152Sluigi	ND("called");
316262152Sluigi}
317262152Sluigi
318270252Sluigistatic int
319270252Sluiginm_vi_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr)
320270252Sluigi{
321270252Sluigi	return EINVAL;
322270252Sluigi}
323262152Sluigi
324270252Sluigistatic void
325270252Sluiginm_vi_start(struct ifnet *ifp)
326270252Sluigi{
327270252Sluigi	panic("nm_vi_start() must not be called");
328270252Sluigi}
329270252Sluigi
330262152Sluigi/*
331270252Sluigi * Index manager of persistent virtual interfaces.
332270252Sluigi * It is used to decide the lowest byte of the MAC address.
333270252Sluigi * We use the same algorithm with management of bridge port index.
334270252Sluigi */
335270252Sluigi#define NM_VI_MAX	255
336270252Sluigistatic struct {
337270252Sluigi	uint8_t index[NM_VI_MAX]; /* XXX just for a reasonable number */
338270252Sluigi	uint8_t active;
339270252Sluigi	struct mtx lock;
340270252Sluigi} nm_vi_indices;
341270252Sluigi
342270252Sluigivoid
343270252Sluiginm_vi_init_index(void)
344270252Sluigi{
345270252Sluigi	int i;
346270252Sluigi	for (i = 0; i < NM_VI_MAX; i++)
347270252Sluigi		nm_vi_indices.index[i] = i;
348270252Sluigi	nm_vi_indices.active = 0;
349270252Sluigi	mtx_init(&nm_vi_indices.lock, "nm_vi_indices_lock", NULL, MTX_DEF);
350270252Sluigi}
351270252Sluigi
352270252Sluigi/* return -1 if no index available */
353270252Sluigistatic int
354270252Sluiginm_vi_get_index(void)
355270252Sluigi{
356270252Sluigi	int ret;
357270252Sluigi
358270252Sluigi	mtx_lock(&nm_vi_indices.lock);
359270252Sluigi	ret = nm_vi_indices.active == NM_VI_MAX ? -1 :
360270252Sluigi		nm_vi_indices.index[nm_vi_indices.active++];
361270252Sluigi	mtx_unlock(&nm_vi_indices.lock);
362270252Sluigi	return ret;
363270252Sluigi}
364270252Sluigi
365270252Sluigistatic void
366270252Sluiginm_vi_free_index(uint8_t val)
367270252Sluigi{
368270252Sluigi	int i, lim;
369270252Sluigi
370270252Sluigi	mtx_lock(&nm_vi_indices.lock);
371270252Sluigi	lim = nm_vi_indices.active;
372270252Sluigi	for (i = 0; i < lim; i++) {
373270252Sluigi		if (nm_vi_indices.index[i] == val) {
374270252Sluigi			/* swap index[lim-1] and j */
375270252Sluigi			int tmp = nm_vi_indices.index[lim-1];
376270252Sluigi			nm_vi_indices.index[lim-1] = val;
377270252Sluigi			nm_vi_indices.index[i] = tmp;
378270252Sluigi			nm_vi_indices.active--;
379270252Sluigi			break;
380270252Sluigi		}
381270252Sluigi	}
382270252Sluigi	if (lim == nm_vi_indices.active)
383270252Sluigi		D("funny, index %u didn't found", val);
384270252Sluigi	mtx_unlock(&nm_vi_indices.lock);
385270252Sluigi}
386270252Sluigi#undef NM_VI_MAX
387270252Sluigi
388270252Sluigi/*
389270252Sluigi * Implementation of a netmap-capable virtual interface that
390270252Sluigi * registered to the system.
391270252Sluigi * It is based on if_tap.c and ip_fw_log.c in FreeBSD 9.
392270252Sluigi *
393270252Sluigi * Note: Linux sets refcount to 0 on allocation of net_device,
394270252Sluigi * then increments it on registration to the system.
395270252Sluigi * FreeBSD sets refcount to 1 on if_alloc(), and does not
396270252Sluigi * increment this refcount on if_attach().
397270252Sluigi */
398270252Sluigiint
399270252Sluiginm_vi_persist(const char *name, struct ifnet **ret)
400270252Sluigi{
401270252Sluigi	struct ifnet *ifp;
402270252Sluigi	u_short macaddr_hi;
403270252Sluigi	uint32_t macaddr_mid;
404270252Sluigi	u_char eaddr[6];
405270252Sluigi	int unit = nm_vi_get_index(); /* just to decide MAC address */
406270252Sluigi
407270252Sluigi	if (unit < 0)
408270252Sluigi		return EBUSY;
409270252Sluigi	/*
410270252Sluigi	 * We use the same MAC address generation method with tap
411270252Sluigi	 * except for the highest octet is 00:be instead of 00:bd
412270252Sluigi	 */
413270252Sluigi	macaddr_hi = htons(0x00be); /* XXX tap + 1 */
414270252Sluigi	macaddr_mid = (uint32_t) ticks;
415270252Sluigi	bcopy(&macaddr_hi, eaddr, sizeof(short));
416270252Sluigi	bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t));
417270252Sluigi	eaddr[5] = (uint8_t)unit;
418270252Sluigi
419270252Sluigi	ifp = if_alloc(IFT_ETHER);
420270252Sluigi	if (ifp == NULL) {
421270252Sluigi		D("if_alloc failed");
422270252Sluigi		return ENOMEM;
423270252Sluigi	}
424270252Sluigi	if_initname(ifp, name, IF_DUNIT_NONE);
425270252Sluigi	ifp->if_mtu = 65536;
426270252Sluigi	ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
427270252Sluigi	ifp->if_init = (void *)nm_vi_dummy;
428270252Sluigi	ifp->if_ioctl = nm_vi_dummy;
429270252Sluigi	ifp->if_start = nm_vi_start;
430270252Sluigi	ifp->if_mtu = ETHERMTU;
431270252Sluigi	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
432270252Sluigi	ifp->if_capabilities |= IFCAP_LINKSTATE;
433270252Sluigi	ifp->if_capenable |= IFCAP_LINKSTATE;
434270252Sluigi
435270252Sluigi	ether_ifattach(ifp, eaddr);
436270252Sluigi	*ret = ifp;
437270252Sluigi	return 0;
438270252Sluigi}
439270252Sluigi/* unregister from the system and drop the final refcount */
440270252Sluigivoid
441270252Sluiginm_vi_detach(struct ifnet *ifp)
442270252Sluigi{
443270252Sluigi	nm_vi_free_index(((char *)IF_LLADDR(ifp))[5]);
444270252Sluigi	ether_ifdetach(ifp);
445270252Sluigi	if_free(ifp);
446270252Sluigi}
447270252Sluigi
448270252Sluigi/*
449262152Sluigi * In order to track whether pages are still mapped, we hook into
450262152Sluigi * the standard cdev_pager and intercept the constructor and
451262152Sluigi * destructor.
452262152Sluigi */
453262152Sluigi
454262152Sluigistruct netmap_vm_handle_t {
455262152Sluigi	struct cdev 		*dev;
456262152Sluigi	struct netmap_priv_d	*priv;
457262152Sluigi};
458262152Sluigi
459262152Sluigi
460262152Sluigistatic int
461262152Sluiginetmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
462262152Sluigi    vm_ooffset_t foff, struct ucred *cred, u_short *color)
463262152Sluigi{
464262152Sluigi	struct netmap_vm_handle_t *vmh = handle;
465262152Sluigi
466262152Sluigi	if (netmap_verbose)
467262152Sluigi		D("handle %p size %jd prot %d foff %jd",
468262152Sluigi			handle, (intmax_t)size, prot, (intmax_t)foff);
469278779Sluigi	if (color)
470278779Sluigi		*color = 0;
471262152Sluigi	dev_ref(vmh->dev);
472262152Sluigi	return 0;
473262152Sluigi}
474262152Sluigi
475262152Sluigi
476262152Sluigistatic void
477262152Sluiginetmap_dev_pager_dtor(void *handle)
478262152Sluigi{
479262152Sluigi	struct netmap_vm_handle_t *vmh = handle;
480262152Sluigi	struct cdev *dev = vmh->dev;
481262152Sluigi	struct netmap_priv_d *priv = vmh->priv;
482262152Sluigi
483262152Sluigi	if (netmap_verbose)
484262152Sluigi		D("handle %p", handle);
485262152Sluigi	netmap_dtor(priv);
486262152Sluigi	free(vmh, M_DEVBUF);
487262152Sluigi	dev_rel(dev);
488262152Sluigi}
489262152Sluigi
490262152Sluigi
491262152Sluigistatic int
492262152Sluiginetmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
493262152Sluigi	int prot, vm_page_t *mres)
494262152Sluigi{
495262152Sluigi	struct netmap_vm_handle_t *vmh = object->handle;
496262152Sluigi	struct netmap_priv_d *priv = vmh->priv;
497262152Sluigi	vm_paddr_t paddr;
498262152Sluigi	vm_page_t page;
499262152Sluigi	vm_memattr_t memattr;
500262152Sluigi	vm_pindex_t pidx;
501262152Sluigi
502262152Sluigi	ND("object %p offset %jd prot %d mres %p",
503262152Sluigi			object, (intmax_t)offset, prot, mres);
504262152Sluigi	memattr = object->memattr;
505262152Sluigi	pidx = OFF_TO_IDX(offset);
506262152Sluigi	paddr = netmap_mem_ofstophys(priv->np_mref, offset);
507262152Sluigi	if (paddr == 0)
508262152Sluigi		return VM_PAGER_FAIL;
509262152Sluigi
510262152Sluigi	if (((*mres)->flags & PG_FICTITIOUS) != 0) {
511262152Sluigi		/*
512262152Sluigi		 * If the passed in result page is a fake page, update it with
513262152Sluigi		 * the new physical address.
514262152Sluigi		 */
515262152Sluigi		page = *mres;
516262152Sluigi		vm_page_updatefake(page, paddr, memattr);
517262152Sluigi	} else {
518262152Sluigi		/*
519262152Sluigi		 * Replace the passed in reqpage page with our own fake page and
520262152Sluigi		 * free up the all of the original pages.
521262152Sluigi		 */
522262152Sluigi#ifndef VM_OBJECT_WUNLOCK	/* FreeBSD < 10.x */
523262152Sluigi#define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK
524262152Sluigi#define VM_OBJECT_WLOCK	VM_OBJECT_LOCK
525262152Sluigi#endif /* VM_OBJECT_WUNLOCK */
526262152Sluigi
527262152Sluigi		VM_OBJECT_WUNLOCK(object);
528262152Sluigi		page = vm_page_getfake(paddr, memattr);
529262152Sluigi		VM_OBJECT_WLOCK(object);
530262152Sluigi		vm_page_lock(*mres);
531262152Sluigi		vm_page_free(*mres);
532262152Sluigi		vm_page_unlock(*mres);
533262152Sluigi		*mres = page;
534262152Sluigi		vm_page_insert(page, object, pidx);
535262152Sluigi	}
536262152Sluigi	page->valid = VM_PAGE_BITS_ALL;
537262152Sluigi	return (VM_PAGER_OK);
538262152Sluigi}
539262152Sluigi
540262152Sluigi
541262152Sluigistatic struct cdev_pager_ops netmap_cdev_pager_ops = {
542262152Sluigi	.cdev_pg_ctor = netmap_dev_pager_ctor,
543262152Sluigi	.cdev_pg_dtor = netmap_dev_pager_dtor,
544262152Sluigi	.cdev_pg_fault = netmap_dev_pager_fault,
545262152Sluigi};
546262152Sluigi
547262152Sluigi
548262152Sluigistatic int
549262152Sluiginetmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
550262152Sluigi	vm_size_t objsize,  vm_object_t *objp, int prot)
551262152Sluigi{
552262152Sluigi	int error;
553262152Sluigi	struct netmap_vm_handle_t *vmh;
554262152Sluigi	struct netmap_priv_d *priv;
555262152Sluigi	vm_object_t obj;
556262152Sluigi
557262152Sluigi	if (netmap_verbose)
558262152Sluigi		D("cdev %p foff %jd size %jd objp %p prot %d", cdev,
559262152Sluigi		    (intmax_t )*foff, (intmax_t )objsize, objp, prot);
560262152Sluigi
561262152Sluigi	vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF,
562262152Sluigi			      M_NOWAIT | M_ZERO);
563262152Sluigi	if (vmh == NULL)
564262152Sluigi		return ENOMEM;
565262152Sluigi	vmh->dev = cdev;
566262152Sluigi
567262152Sluigi	NMG_LOCK();
568262152Sluigi	error = devfs_get_cdevpriv((void**)&priv);
569262152Sluigi	if (error)
570262152Sluigi		goto err_unlock;
571262152Sluigi	vmh->priv = priv;
572262152Sluigi	priv->np_refcount++;
573262152Sluigi	NMG_UNLOCK();
574262152Sluigi
575262152Sluigi	error = netmap_get_memory(priv);
576262152Sluigi	if (error)
577262152Sluigi		goto err_deref;
578262152Sluigi
579262152Sluigi	obj = cdev_pager_allocate(vmh, OBJT_DEVICE,
580262152Sluigi		&netmap_cdev_pager_ops, objsize, prot,
581262152Sluigi		*foff, NULL);
582262152Sluigi	if (obj == NULL) {
583262152Sluigi		D("cdev_pager_allocate failed");
584262152Sluigi		error = EINVAL;
585262152Sluigi		goto err_deref;
586262152Sluigi	}
587262152Sluigi
588262152Sluigi	*objp = obj;
589262152Sluigi	return 0;
590262152Sluigi
591262152Sluigierr_deref:
592262152Sluigi	NMG_LOCK();
593262152Sluigi	priv->np_refcount--;
594262152Sluigierr_unlock:
595262152Sluigi	NMG_UNLOCK();
596262152Sluigi// err:
597262152Sluigi	free(vmh, M_DEVBUF);
598262152Sluigi	return error;
599262152Sluigi}
600262152Sluigi
601262152Sluigi
602262152Sluigi// XXX can we remove this ?
603262152Sluigistatic int
604262152Sluiginetmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
605262152Sluigi{
606262152Sluigi	if (netmap_verbose)
607262152Sluigi		D("dev %p fflag 0x%x devtype %d td %p",
608262152Sluigi			dev, fflag, devtype, td);
609262152Sluigi	return 0;
610262152Sluigi}
611262152Sluigi
612262152Sluigi
613262152Sluigistatic int
614262152Sluiginetmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
615262152Sluigi{
616262152Sluigi	struct netmap_priv_d *priv;
617262152Sluigi	int error;
618262152Sluigi
619262152Sluigi	(void)dev;
620262152Sluigi	(void)oflags;
621262152Sluigi	(void)devtype;
622262152Sluigi	(void)td;
623262152Sluigi
624262152Sluigi	// XXX wait or nowait ?
625262152Sluigi	priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
626262152Sluigi			      M_NOWAIT | M_ZERO);
627262152Sluigi	if (priv == NULL)
628262152Sluigi		return ENOMEM;
629262152Sluigi
630262152Sluigi	error = devfs_set_cdevpriv(priv, netmap_dtor);
631262152Sluigi	if (error)
632262152Sluigi	        return error;
633262152Sluigi
634262152Sluigi	priv->np_refcount = 1;
635262152Sluigi
636262152Sluigi	return 0;
637262152Sluigi}
638262152Sluigi
639262152Sluigi/******************** kqueue support ****************/
640262152Sluigi
641262152Sluigi/*
642262152Sluigi * The OS_selwakeup also needs to issue a KNOTE_UNLOCKED.
643262152Sluigi * We use a non-zero argument to distinguish the call from the one
644262152Sluigi * in kevent_scan() which instead also needs to run netmap_poll().
645262152Sluigi * The knote uses a global mutex for the time being. We might
646262152Sluigi * try to reuse the one in the si, but it is not allocated
647262152Sluigi * permanently so it might be a bit tricky.
648262152Sluigi *
649262152Sluigi * The *kqfilter function registers one or another f_event
650262152Sluigi * depending on read or write mode.
651262152Sluigi * In the call to f_event() td_fpop is NULL so any child function
652262152Sluigi * calling devfs_get_cdevpriv() would fail - and we need it in
653262152Sluigi * netmap_poll(). As a workaround we store priv into kn->kn_hook
654262152Sluigi * and pass it as first argument to netmap_poll(), which then
655262152Sluigi * uses the failure to tell that we are called from f_event()
656262152Sluigi * and do not need the selrecord().
657262152Sluigi */
658262152Sluigi
659262152Sluigi
660262152Sluigivoid
661278779Sluigifreebsd_selwakeup(struct nm_selinfo *si, int pri)
662262152Sluigi{
663262152Sluigi	if (netmap_verbose)
664278779Sluigi		D("on knote %p", &si->si.si_note);
665278779Sluigi	selwakeuppri(&si->si, pri);
666262152Sluigi	/* use a non-zero hint to tell the notification from the
667262152Sluigi	 * call done in kqueue_scan() which uses 0
668262152Sluigi	 */
669278779Sluigi	KNOTE_UNLOCKED(&si->si.si_note, 0x100 /* notification */);
670262152Sluigi}
671262152Sluigi
672262152Sluigistatic void
673262152Sluiginetmap_knrdetach(struct knote *kn)
674262152Sluigi{
675262152Sluigi	struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
676278779Sluigi	struct selinfo *si = &priv->np_rxsi->si;
677262152Sluigi
678262152Sluigi	D("remove selinfo %p", si);
679262152Sluigi	knlist_remove(&si->si_note, kn, 0);
680262152Sluigi}
681262152Sluigi
682262152Sluigistatic void
683262152Sluiginetmap_knwdetach(struct knote *kn)
684262152Sluigi{
685262152Sluigi	struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
686278779Sluigi	struct selinfo *si = &priv->np_txsi->si;
687262152Sluigi
688262152Sluigi	D("remove selinfo %p", si);
689262152Sluigi	knlist_remove(&si->si_note, kn, 0);
690262152Sluigi}
691262152Sluigi
692262152Sluigi/*
693262152Sluigi * callback from notifies (generated externally) and our
694262152Sluigi * calls to kevent(). The former we just return 1 (ready)
695262152Sluigi * since we do not know better.
696262152Sluigi * In the latter we call netmap_poll and return 0/1 accordingly.
697262152Sluigi */
698262152Sluigistatic int
699262152Sluiginetmap_knrw(struct knote *kn, long hint, int events)
700262152Sluigi{
701262152Sluigi	struct netmap_priv_d *priv;
702262152Sluigi	int revents;
703262152Sluigi
704262152Sluigi	if (hint != 0) {
705262152Sluigi		ND(5, "call from notify");
706262152Sluigi		return 1; /* assume we are ready */
707262152Sluigi	}
708262152Sluigi	priv = kn->kn_hook;
709262152Sluigi	/* the notification may come from an external thread,
710262152Sluigi	 * in which case we do not want to run the netmap_poll
711262152Sluigi	 * This should be filtered above, but check just in case.
712262152Sluigi	 */
713262152Sluigi	if (curthread != priv->np_td) { /* should not happen */
714262152Sluigi		RD(5, "curthread changed %p %p", curthread, priv->np_td);
715262152Sluigi		return 1;
716262152Sluigi	} else {
717262152Sluigi		revents = netmap_poll((void *)priv, events, curthread);
718262152Sluigi		return (events & revents) ? 1 : 0;
719262152Sluigi	}
720262152Sluigi}
721262152Sluigi
722262152Sluigistatic int
723262152Sluiginetmap_knread(struct knote *kn, long hint)
724262152Sluigi{
725262152Sluigi	return netmap_knrw(kn, hint, POLLIN);
726262152Sluigi}
727262152Sluigi
728262152Sluigistatic int
729262152Sluiginetmap_knwrite(struct knote *kn, long hint)
730262152Sluigi{
731262152Sluigi	return netmap_knrw(kn, hint, POLLOUT);
732262152Sluigi}
733262152Sluigi
734262152Sluigistatic struct filterops netmap_rfiltops = {
735262152Sluigi	.f_isfd = 1,
736262152Sluigi	.f_detach = netmap_knrdetach,
737262152Sluigi	.f_event = netmap_knread,
738262152Sluigi};
739262152Sluigi
740262152Sluigistatic struct filterops netmap_wfiltops = {
741262152Sluigi	.f_isfd = 1,
742262152Sluigi	.f_detach = netmap_knwdetach,
743262152Sluigi	.f_event = netmap_knwrite,
744262152Sluigi};
745262152Sluigi
746262152Sluigi
747262152Sluigi/*
748262152Sluigi * This is called when a thread invokes kevent() to record
749262152Sluigi * a change in the configuration of the kqueue().
750262152Sluigi * The 'priv' should be the same as in the netmap device.
751262152Sluigi */
752262152Sluigistatic int
753262152Sluiginetmap_kqfilter(struct cdev *dev, struct knote *kn)
754262152Sluigi{
755262152Sluigi	struct netmap_priv_d *priv;
756262152Sluigi	int error;
757262152Sluigi	struct netmap_adapter *na;
758278779Sluigi	struct nm_selinfo *si;
759262152Sluigi	int ev = kn->kn_filter;
760262152Sluigi
761262152Sluigi	if (ev != EVFILT_READ && ev != EVFILT_WRITE) {
762262152Sluigi		D("bad filter request %d", ev);
763262152Sluigi		return 1;
764262152Sluigi	}
765262152Sluigi	error = devfs_get_cdevpriv((void**)&priv);
766262152Sluigi	if (error) {
767262152Sluigi		D("device not yet setup");
768262152Sluigi		return 1;
769262152Sluigi	}
770262152Sluigi	na = priv->np_na;
771262152Sluigi	if (na == NULL) {
772262152Sluigi		D("no netmap adapter for this file descriptor");
773262152Sluigi		return 1;
774262152Sluigi	}
775262152Sluigi	/* the si is indicated in the priv */
776262152Sluigi	si = (ev == EVFILT_WRITE) ? priv->np_txsi : priv->np_rxsi;
777262152Sluigi	// XXX lock(priv) ?
778262152Sluigi	kn->kn_fop = (ev == EVFILT_WRITE) ?
779262152Sluigi		&netmap_wfiltops : &netmap_rfiltops;
780262152Sluigi	kn->kn_hook = priv;
781278779Sluigi	knlist_add(&si->si.si_note, kn, 1);
782262152Sluigi	// XXX unlock(priv)
783262152Sluigi	ND("register %p %s td %p priv %p kn %p np_nifp %p kn_fp/fpop %s",
784262152Sluigi		na, na->ifp->if_xname, curthread, priv, kn,
785262152Sluigi		priv->np_nifp,
786262152Sluigi		kn->kn_fp == curthread->td_fpop ? "match" : "MISMATCH");
787262152Sluigi	return 0;
788262152Sluigi}
789262152Sluigi
790262152Sluigistruct cdevsw netmap_cdevsw = {
791262152Sluigi	.d_version = D_VERSION,
792262152Sluigi	.d_name = "netmap",
793262152Sluigi	.d_open = netmap_open,
794262152Sluigi	.d_mmap_single = netmap_mmap_single,
795262152Sluigi	.d_ioctl = netmap_ioctl,
796262152Sluigi	.d_poll = netmap_poll,
797262152Sluigi	.d_kqfilter = netmap_kqfilter,
798262152Sluigi	.d_close = netmap_close,
799262152Sluigi};
800262152Sluigi/*--- end of kqueue support ----*/
801262152Sluigi
802262152Sluigi/*
803262152Sluigi * Kernel entry point.
804262152Sluigi *
805262152Sluigi * Initialize/finalize the module and return.
806262152Sluigi *
807262152Sluigi * Return 0 on success, errno on failure.
808262152Sluigi */
809262152Sluigistatic int
810262152Sluiginetmap_loader(__unused struct module *module, int event, __unused void *arg)
811262152Sluigi{
812262152Sluigi	int error = 0;
813262152Sluigi
814262152Sluigi	switch (event) {
815262152Sluigi	case MOD_LOAD:
816262152Sluigi		error = netmap_init();
817262152Sluigi		break;
818262152Sluigi
819262152Sluigi	case MOD_UNLOAD:
820262152Sluigi		netmap_fini();
821262152Sluigi		break;
822262152Sluigi
823262152Sluigi	default:
824262152Sluigi		error = EOPNOTSUPP;
825262152Sluigi		break;
826262152Sluigi	}
827262152Sluigi
828262152Sluigi	return (error);
829262152Sluigi}
830262152Sluigi
831262152Sluigi
832262152SluigiDEV_MODULE(netmap, netmap_loader, NULL);
833