netmap_freebsd.c revision 275358
1259412Sluigi/*
2260368Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
3259412Sluigi *
4259412Sluigi * Redistribution and use in source and binary forms, with or without
5259412Sluigi * modification, are permitted provided that the following conditions
6259412Sluigi * are met:
7259412Sluigi *   1. Redistributions of source code must retain the above copyright
8259412Sluigi *      notice, this list of conditions and the following disclaimer.
9259412Sluigi *   2. Redistributions in binary form must reproduce the above copyright
10259412Sluigi *      notice, this list of conditions and the following disclaimer in the
11259412Sluigi *      documentation and/or other materials provided with the distribution.
12259412Sluigi *
13259412Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14259412Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15259412Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16259412Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17259412Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18259412Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19259412Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20259412Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21259412Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22259412Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23259412Sluigi * SUCH DAMAGE.
24259412Sluigi */
25259412Sluigi
26259412Sluigi/* $FreeBSD: head/sys/dev/netmap/netmap_freebsd.c 275358 2014-12-01 11:45:24Z hselasky $ */
27259412Sluigi
28259412Sluigi#include <sys/types.h>
29259412Sluigi#include <sys/module.h>
30259412Sluigi#include <sys/errno.h>
31259412Sluigi#include <sys/param.h>  /* defines used in kernel.h */
32261909Sluigi#include <sys/poll.h>  /* POLLIN, POLLOUT */
33259412Sluigi#include <sys/kernel.h> /* types used in module initialization */
34259412Sluigi#include <sys/conf.h>	/* DEV_MODULE */
35261909Sluigi#include <sys/endian.h>
36259412Sluigi
37259412Sluigi#include <sys/rwlock.h>
38259412Sluigi
39259412Sluigi#include <vm/vm.h>      /* vtophys */
40259412Sluigi#include <vm/pmap.h>    /* vtophys */
41259412Sluigi#include <vm/vm_param.h>
42259412Sluigi#include <vm/vm_object.h>
43259412Sluigi#include <vm/vm_page.h>
44259412Sluigi#include <vm/vm_pager.h>
45259412Sluigi#include <vm/uma.h>
46259412Sluigi
47259412Sluigi
48259412Sluigi#include <sys/malloc.h>
49259412Sluigi#include <sys/socket.h> /* sockaddrs */
50259412Sluigi#include <sys/selinfo.h>
51259412Sluigi#include <net/if.h>
52259412Sluigi#include <net/if_var.h>
53270063Sluigi#include <net/if_types.h> /* IFT_ETHER */
54270063Sluigi#include <net/ethernet.h> /* ether_ifdetach */
55270063Sluigi#include <net/if_dl.h> /* LLADDR */
56259412Sluigi#include <machine/bus.h>        /* bus_dmamap_* */
57261909Sluigi#include <netinet/in.h>		/* in6_cksum_pseudo() */
58261909Sluigi#include <machine/in_cksum.h>  /* in_pseudo(), in_cksum_hdr() */
59259412Sluigi
60259412Sluigi#include <net/netmap.h>
61259412Sluigi#include <dev/netmap/netmap_kern.h>
62259412Sluigi#include <dev/netmap/netmap_mem2.h>
63259412Sluigi
64259412Sluigi
65259412Sluigi/* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
66259412Sluigi
67267180Sluigirawsum_t
68267180Sluiginm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
69261909Sluigi{
70261909Sluigi	/* TODO XXX please use the FreeBSD implementation for this. */
71261909Sluigi	uint16_t *words = (uint16_t *)data;
72261909Sluigi	int nw = len / 2;
73261909Sluigi	int i;
74261909Sluigi
75261909Sluigi	for (i = 0; i < nw; i++)
76261909Sluigi		cur_sum += be16toh(words[i]);
77261909Sluigi
78261909Sluigi	if (len & 1)
79261909Sluigi		cur_sum += (data[len-1] << 8);
80261909Sluigi
81261909Sluigi	return cur_sum;
82261909Sluigi}
83261909Sluigi
84261909Sluigi/* Fold a raw checksum: 'cur_sum' is in host byte order, while the
85261909Sluigi * return value is in network byte order.
86261909Sluigi */
87267180Sluigiuint16_t
88267180Sluiginm_csum_fold(rawsum_t cur_sum)
89261909Sluigi{
90261909Sluigi	/* TODO XXX please use the FreeBSD implementation for this. */
91261909Sluigi	while (cur_sum >> 16)
92261909Sluigi		cur_sum = (cur_sum & 0xFFFF) + (cur_sum >> 16);
93261909Sluigi
94261909Sluigi	return htobe16((~cur_sum) & 0xFFFF);
95261909Sluigi}
96261909Sluigi
97270063Sluigiuint16_t nm_csum_ipv4(struct nm_iphdr *iph)
98261909Sluigi{
99261909Sluigi#if 0
100261909Sluigi	return in_cksum_hdr((void *)iph);
101261909Sluigi#else
102261909Sluigi	return nm_csum_fold(nm_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0));
103261909Sluigi#endif
104261909Sluigi}
105261909Sluigi
106267180Sluigivoid
107267180Sluiginm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
108261909Sluigi					size_t datalen, uint16_t *check)
109261909Sluigi{
110262238Sluigi#ifdef INET
111261909Sluigi	uint16_t pseudolen = datalen + iph->protocol;
112261909Sluigi
113261909Sluigi	/* Compute and insert the pseudo-header cheksum. */
114261909Sluigi	*check = in_pseudo(iph->saddr, iph->daddr,
115261909Sluigi				 htobe16(pseudolen));
116261909Sluigi	/* Compute the checksum on TCP/UDP header + payload
117261909Sluigi	 * (includes the pseudo-header).
118261909Sluigi	 */
119261909Sluigi	*check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
120262238Sluigi#else
121262238Sluigi	static int notsupported = 0;
122262238Sluigi	if (!notsupported) {
123262238Sluigi		notsupported = 1;
124262238Sluigi		D("inet4 segmentation not supported");
125262238Sluigi	}
126262238Sluigi#endif
127261909Sluigi}
128261909Sluigi
129267180Sluigivoid
130267180Sluiginm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
131261909Sluigi					size_t datalen, uint16_t *check)
132261909Sluigi{
133261909Sluigi#ifdef INET6
134261909Sluigi	*check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0);
135261909Sluigi	*check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
136261909Sluigi#else
137261909Sluigi	static int notsupported = 0;
138261909Sluigi	if (!notsupported) {
139261909Sluigi		notsupported = 1;
140261909Sluigi		D("inet6 segmentation not supported");
141261909Sluigi	}
142261909Sluigi#endif
143261909Sluigi}
144261909Sluigi
145261909Sluigi
146259412Sluigi/*
147259412Sluigi * Intercept the rx routine in the standard device driver.
148259412Sluigi * Second argument is non-zero to intercept, 0 to restore
149259412Sluigi */
150259412Sluigiint
151259412Sluiginetmap_catch_rx(struct netmap_adapter *na, int intercept)
152259412Sluigi{
153270063Sluigi	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
154259412Sluigi	struct ifnet *ifp = na->ifp;
155259412Sluigi
156259412Sluigi	if (intercept) {
157259412Sluigi		if (gna->save_if_input) {
158259412Sluigi			D("cannot intercept again");
159259412Sluigi			return EINVAL; /* already set */
160259412Sluigi		}
161259412Sluigi		gna->save_if_input = ifp->if_input;
162259412Sluigi		ifp->if_input = generic_rx_handler;
163259412Sluigi	} else {
164259412Sluigi		if (!gna->save_if_input){
165259412Sluigi			D("cannot restore");
166259412Sluigi			return EINVAL;  /* not saved */
167259412Sluigi		}
168259412Sluigi		ifp->if_input = gna->save_if_input;
169259412Sluigi		gna->save_if_input = NULL;
170259412Sluigi	}
171259412Sluigi
172259412Sluigi	return 0;
173259412Sluigi}
174259412Sluigi
175260368Sluigi
176259412Sluigi/*
177259412Sluigi * Intercept the packet steering routine in the tx path,
178259412Sluigi * so that we can decide which queue is used for an mbuf.
179259412Sluigi * Second argument is non-zero to intercept, 0 to restore.
180261909Sluigi * On freebsd we just intercept if_transmit.
181259412Sluigi */
182259412Sluigivoid
183260368Sluiginetmap_catch_tx(struct netmap_generic_adapter *gna, int enable)
184259412Sluigi{
185260368Sluigi	struct netmap_adapter *na = &gna->up.up;
186260368Sluigi	struct ifnet *ifp = na->ifp;
187260368Sluigi
188259412Sluigi	if (enable) {
189260368Sluigi		na->if_transmit = ifp->if_transmit;
190260368Sluigi		ifp->if_transmit = netmap_transmit;
191259412Sluigi	} else {
192260368Sluigi		ifp->if_transmit = na->if_transmit;
193259412Sluigi	}
194259412Sluigi}
195259412Sluigi
196260368Sluigi
197261909Sluigi/*
198261909Sluigi * Transmit routine used by generic_netmap_txsync(). Returns 0 on success
199259412Sluigi * and non-zero on error (which may be packet drops or other errors).
200259412Sluigi * addr and len identify the netmap buffer, m is the (preallocated)
201259412Sluigi * mbuf to use for transmissions.
202259412Sluigi *
203259412Sluigi * We should add a reference to the mbuf so the m_freem() at the end
204259412Sluigi * of the transmission does not consume resources.
205259412Sluigi *
206259412Sluigi * On FreeBSD, and on multiqueue cards, we can force the queue using
207275358Shselasky *      if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
208259412Sluigi *              i = m->m_pkthdr.flowid % adapter->num_queues;
209259412Sluigi *      else
210259412Sluigi *              i = curcpu % adapter->num_queues;
211259412Sluigi *
212259412Sluigi */
213259412Sluigiint
214259412Sluigigeneric_xmit_frame(struct ifnet *ifp, struct mbuf *m,
215259412Sluigi	void *addr, u_int len, u_int ring_nr)
216259412Sluigi{
217259412Sluigi	int ret;
218259412Sluigi
219267180Sluigi	/*
220267180Sluigi	 * The mbuf should be a cluster from our special pool,
221267180Sluigi	 * so we do not need to do an m_copyback but just copy
222267180Sluigi	 * (and eventually, just reference the netmap buffer)
223267180Sluigi	 */
224259412Sluigi
225270063Sluigi	if (GET_MBUF_REFCNT(m) != 1) {
226267180Sluigi		D("invalid refcnt %d for %p",
227270063Sluigi			GET_MBUF_REFCNT(m), m);
228267180Sluigi		panic("in generic_xmit_frame");
229267180Sluigi	}
230267180Sluigi	// XXX the ext_size check is unnecessary if we link the netmap buf
231267180Sluigi	if (m->m_ext.ext_size < len) {
232267180Sluigi		RD(5, "size %d < len %d", m->m_ext.ext_size, len);
233267180Sluigi		len = m->m_ext.ext_size;
234267180Sluigi	}
235267180Sluigi	if (0) { /* XXX seems to have negligible benefits */
236267180Sluigi		m->m_ext.ext_buf = m->m_data = addr;
237267180Sluigi	} else {
238267180Sluigi		bcopy(addr, m->m_data, len);
239267180Sluigi	}
240267180Sluigi	m->m_len = m->m_pkthdr.len = len;
241267180Sluigi	// inc refcount. All ours, we could skip the atomic
242270063Sluigi	atomic_fetchadd_int(PNT_MBUF_REFCNT(m), 1);
243275358Shselasky	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
244259412Sluigi	m->m_pkthdr.flowid = ring_nr;
245259412Sluigi	m->m_pkthdr.rcvif = ifp; /* used for tx notification */
246260368Sluigi	ret = NA(ifp)->if_transmit(ifp, m);
247259412Sluigi	return ret;
248259412Sluigi}
249259412Sluigi
250260368Sluigi
251267170Sluigi#if __FreeBSD_version >= 1100005
252267170Sluigistruct netmap_adapter *
253267170Sluiginetmap_getna(if_t ifp)
254267170Sluigi{
255267170Sluigi	return (NA((struct ifnet *)ifp));
256267170Sluigi}
257267170Sluigi#endif /* __FreeBSD_version >= 1100005 */
258267170Sluigi
259259412Sluigi/*
260259412Sluigi * The following two functions are empty until we have a generic
261259412Sluigi * way to extract the info from the ifp
262259412Sluigi */
263259412Sluigiint
264259412Sluigigeneric_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
265259412Sluigi{
266267180Sluigi	D("called, in tx %d rx %d", *tx, *rx);
267259412Sluigi	return 0;
268259412Sluigi}
269259412Sluigi
270260368Sluigi
271259412Sluigivoid
272259412Sluigigeneric_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
273259412Sluigi{
274267180Sluigi	D("called, in txq %d rxq %d", *txq, *rxq);
275261909Sluigi	*txq = netmap_generic_rings;
276261909Sluigi	*rxq = netmap_generic_rings;
277259412Sluigi}
278259412Sluigi
279260368Sluigi
280267180Sluigivoid
281270063Sluiginetmap_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na)
282259412Sluigi{
283259412Sluigi	ND("called");
284261909Sluigi	mit->mit_pending = 0;
285270063Sluigi	mit->mit_ring_idx = idx;
286261909Sluigi	mit->mit_na = na;
287259412Sluigi}
288259412Sluigi
289259412Sluigi
290267180Sluigivoid
291267180Sluiginetmap_mitigation_start(struct nm_generic_mit *mit)
292259412Sluigi{
293259412Sluigi	ND("called");
294259412Sluigi}
295259412Sluigi
296260368Sluigi
297267180Sluigivoid
298267180Sluiginetmap_mitigation_restart(struct nm_generic_mit *mit)
299259412Sluigi{
300259412Sluigi	ND("called");
301259412Sluigi}
302259412Sluigi
303260368Sluigi
304267180Sluigiint
305267180Sluiginetmap_mitigation_active(struct nm_generic_mit *mit)
306259412Sluigi{
307259412Sluigi	ND("called");
308259412Sluigi	return 0;
309259412Sluigi}
310259412Sluigi
311260368Sluigi
312267180Sluigivoid
313267180Sluiginetmap_mitigation_cleanup(struct nm_generic_mit *mit)
314259412Sluigi{
315259412Sluigi	ND("called");
316259412Sluigi}
317259412Sluigi
318270063Sluigistatic int
319270063Sluiginm_vi_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr)
320270063Sluigi{
321270063Sluigi	return EINVAL;
322270063Sluigi}
323260368Sluigi
324270063Sluigistatic void
325270063Sluiginm_vi_start(struct ifnet *ifp)
326270063Sluigi{
327270063Sluigi	panic("nm_vi_start() must not be called");
328270063Sluigi}
329270063Sluigi
330259412Sluigi/*
331270063Sluigi * Index manager of persistent virtual interfaces.
332270063Sluigi * It is used to decide the lowest byte of the MAC address.
333270063Sluigi * We use the same algorithm with management of bridge port index.
334270063Sluigi */
335270063Sluigi#define NM_VI_MAX	255
336270063Sluigistatic struct {
337270063Sluigi	uint8_t index[NM_VI_MAX]; /* XXX just for a reasonable number */
338270063Sluigi	uint8_t active;
339270063Sluigi	struct mtx lock;
340270063Sluigi} nm_vi_indices;
341270063Sluigi
342270063Sluigivoid
343270063Sluiginm_vi_init_index(void)
344270063Sluigi{
345270063Sluigi	int i;
346270063Sluigi	for (i = 0; i < NM_VI_MAX; i++)
347270063Sluigi		nm_vi_indices.index[i] = i;
348270063Sluigi	nm_vi_indices.active = 0;
349270063Sluigi	mtx_init(&nm_vi_indices.lock, "nm_vi_indices_lock", NULL, MTX_DEF);
350270063Sluigi}
351270063Sluigi
352270063Sluigi/* return -1 if no index available */
353270063Sluigistatic int
354270063Sluiginm_vi_get_index(void)
355270063Sluigi{
356270063Sluigi	int ret;
357270063Sluigi
358270063Sluigi	mtx_lock(&nm_vi_indices.lock);
359270063Sluigi	ret = nm_vi_indices.active == NM_VI_MAX ? -1 :
360270063Sluigi		nm_vi_indices.index[nm_vi_indices.active++];
361270063Sluigi	mtx_unlock(&nm_vi_indices.lock);
362270063Sluigi	return ret;
363270063Sluigi}
364270063Sluigi
365270063Sluigistatic void
366270063Sluiginm_vi_free_index(uint8_t val)
367270063Sluigi{
368270063Sluigi	int i, lim;
369270063Sluigi
370270063Sluigi	mtx_lock(&nm_vi_indices.lock);
371270063Sluigi	lim = nm_vi_indices.active;
372270063Sluigi	for (i = 0; i < lim; i++) {
373270063Sluigi		if (nm_vi_indices.index[i] == val) {
374270063Sluigi			/* swap index[lim-1] and j */
375270063Sluigi			int tmp = nm_vi_indices.index[lim-1];
376270063Sluigi			nm_vi_indices.index[lim-1] = val;
377270063Sluigi			nm_vi_indices.index[i] = tmp;
378270063Sluigi			nm_vi_indices.active--;
379270063Sluigi			break;
380270063Sluigi		}
381270063Sluigi	}
382270063Sluigi	if (lim == nm_vi_indices.active)
383270063Sluigi		D("funny, index %u didn't found", val);
384270063Sluigi	mtx_unlock(&nm_vi_indices.lock);
385270063Sluigi}
386270063Sluigi#undef NM_VI_MAX
387270063Sluigi
388270063Sluigi/*
389270063Sluigi * Implementation of a netmap-capable virtual interface that
390270063Sluigi * registered to the system.
391270063Sluigi * It is based on if_tap.c and ip_fw_log.c in FreeBSD 9.
392270063Sluigi *
393270063Sluigi * Note: Linux sets refcount to 0 on allocation of net_device,
394270063Sluigi * then increments it on registration to the system.
395270063Sluigi * FreeBSD sets refcount to 1 on if_alloc(), and does not
396270063Sluigi * increment this refcount on if_attach().
397270063Sluigi */
398270063Sluigiint
399270063Sluiginm_vi_persist(const char *name, struct ifnet **ret)
400270063Sluigi{
401270063Sluigi	struct ifnet *ifp;
402270063Sluigi	u_short macaddr_hi;
403270063Sluigi	uint32_t macaddr_mid;
404270063Sluigi	u_char eaddr[6];
405270063Sluigi	int unit = nm_vi_get_index(); /* just to decide MAC address */
406270063Sluigi
407270063Sluigi	if (unit < 0)
408270063Sluigi		return EBUSY;
409270063Sluigi	/*
410270063Sluigi	 * We use the same MAC address generation method with tap
411270063Sluigi	 * except for the highest octet is 00:be instead of 00:bd
412270063Sluigi	 */
413270063Sluigi	macaddr_hi = htons(0x00be); /* XXX tap + 1 */
414270063Sluigi	macaddr_mid = (uint32_t) ticks;
415270063Sluigi	bcopy(&macaddr_hi, eaddr, sizeof(short));
416270063Sluigi	bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t));
417270063Sluigi	eaddr[5] = (uint8_t)unit;
418270063Sluigi
419270063Sluigi	ifp = if_alloc(IFT_ETHER);
420270063Sluigi	if (ifp == NULL) {
421270063Sluigi		D("if_alloc failed");
422270063Sluigi		return ENOMEM;
423270063Sluigi	}
424270063Sluigi	if_initname(ifp, name, IF_DUNIT_NONE);
425270063Sluigi	ifp->if_mtu = 65536;
426270063Sluigi	ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
427270063Sluigi	ifp->if_init = (void *)nm_vi_dummy;
428270063Sluigi	ifp->if_ioctl = nm_vi_dummy;
429270063Sluigi	ifp->if_start = nm_vi_start;
430270063Sluigi	ifp->if_mtu = ETHERMTU;
431270063Sluigi	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
432270063Sluigi	ifp->if_capabilities |= IFCAP_LINKSTATE;
433270063Sluigi	ifp->if_capenable |= IFCAP_LINKSTATE;
434270063Sluigi
435270063Sluigi	ether_ifattach(ifp, eaddr);
436270063Sluigi	*ret = ifp;
437270063Sluigi	return 0;
438270063Sluigi}
439270063Sluigi/* unregister from the system and drop the final refcount */
440270063Sluigivoid
441270063Sluiginm_vi_detach(struct ifnet *ifp)
442270063Sluigi{
443270063Sluigi	nm_vi_free_index(((char *)IF_LLADDR(ifp))[5]);
444270063Sluigi	ether_ifdetach(ifp);
445270063Sluigi	if_free(ifp);
446270063Sluigi}
447270063Sluigi
448270063Sluigi/*
449259412Sluigi * In order to track whether pages are still mapped, we hook into
450259412Sluigi * the standard cdev_pager and intercept the constructor and
451259412Sluigi * destructor.
452259412Sluigi */
453259412Sluigi
454259412Sluigistruct netmap_vm_handle_t {
455259412Sluigi	struct cdev 		*dev;
456259412Sluigi	struct netmap_priv_d	*priv;
457259412Sluigi};
458259412Sluigi
459260368Sluigi
460259412Sluigistatic int
461259412Sluiginetmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
462259412Sluigi    vm_ooffset_t foff, struct ucred *cred, u_short *color)
463259412Sluigi{
464259412Sluigi	struct netmap_vm_handle_t *vmh = handle;
465261909Sluigi
466261909Sluigi	if (netmap_verbose)
467261909Sluigi		D("handle %p size %jd prot %d foff %jd",
468261909Sluigi			handle, (intmax_t)size, prot, (intmax_t)foff);
469274354Sluigi	if (color)
470274354Sluigi		*color = 0;
471259412Sluigi	dev_ref(vmh->dev);
472259412Sluigi	return 0;
473259412Sluigi}
474259412Sluigi
475259412Sluigi
476259412Sluigistatic void
477259412Sluiginetmap_dev_pager_dtor(void *handle)
478259412Sluigi{
479259412Sluigi	struct netmap_vm_handle_t *vmh = handle;
480259412Sluigi	struct cdev *dev = vmh->dev;
481259412Sluigi	struct netmap_priv_d *priv = vmh->priv;
482261909Sluigi
483261909Sluigi	if (netmap_verbose)
484261909Sluigi		D("handle %p", handle);
485259412Sluigi	netmap_dtor(priv);
486259412Sluigi	free(vmh, M_DEVBUF);
487259412Sluigi	dev_rel(dev);
488259412Sluigi}
489259412Sluigi
490260368Sluigi
491259412Sluigistatic int
492259412Sluiginetmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
493259412Sluigi	int prot, vm_page_t *mres)
494259412Sluigi{
495259412Sluigi	struct netmap_vm_handle_t *vmh = object->handle;
496259412Sluigi	struct netmap_priv_d *priv = vmh->priv;
497259412Sluigi	vm_paddr_t paddr;
498259412Sluigi	vm_page_t page;
499259412Sluigi	vm_memattr_t memattr;
500259412Sluigi	vm_pindex_t pidx;
501259412Sluigi
502259412Sluigi	ND("object %p offset %jd prot %d mres %p",
503259412Sluigi			object, (intmax_t)offset, prot, mres);
504259412Sluigi	memattr = object->memattr;
505259412Sluigi	pidx = OFF_TO_IDX(offset);
506259412Sluigi	paddr = netmap_mem_ofstophys(priv->np_mref, offset);
507259412Sluigi	if (paddr == 0)
508259412Sluigi		return VM_PAGER_FAIL;
509259412Sluigi
510259412Sluigi	if (((*mres)->flags & PG_FICTITIOUS) != 0) {
511259412Sluigi		/*
512259412Sluigi		 * If the passed in result page is a fake page, update it with
513259412Sluigi		 * the new physical address.
514259412Sluigi		 */
515259412Sluigi		page = *mres;
516259412Sluigi		vm_page_updatefake(page, paddr, memattr);
517259412Sluigi	} else {
518259412Sluigi		/*
519259412Sluigi		 * Replace the passed in reqpage page with our own fake page and
520259412Sluigi		 * free up the all of the original pages.
521259412Sluigi		 */
522259412Sluigi#ifndef VM_OBJECT_WUNLOCK	/* FreeBSD < 10.x */
523259412Sluigi#define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK
524259412Sluigi#define VM_OBJECT_WLOCK	VM_OBJECT_LOCK
525259412Sluigi#endif /* VM_OBJECT_WUNLOCK */
526259412Sluigi
527259412Sluigi		VM_OBJECT_WUNLOCK(object);
528259412Sluigi		page = vm_page_getfake(paddr, memattr);
529259412Sluigi		VM_OBJECT_WLOCK(object);
530259412Sluigi		vm_page_lock(*mres);
531259412Sluigi		vm_page_free(*mres);
532259412Sluigi		vm_page_unlock(*mres);
533259412Sluigi		*mres = page;
534259412Sluigi		vm_page_insert(page, object, pidx);
535259412Sluigi	}
536259412Sluigi	page->valid = VM_PAGE_BITS_ALL;
537259412Sluigi	return (VM_PAGER_OK);
538259412Sluigi}
539259412Sluigi
540259412Sluigi
541259412Sluigistatic struct cdev_pager_ops netmap_cdev_pager_ops = {
542259412Sluigi	.cdev_pg_ctor = netmap_dev_pager_ctor,
543259412Sluigi	.cdev_pg_dtor = netmap_dev_pager_dtor,
544259412Sluigi	.cdev_pg_fault = netmap_dev_pager_fault,
545259412Sluigi};
546259412Sluigi
547259412Sluigi
548259412Sluigistatic int
549259412Sluiginetmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
550259412Sluigi	vm_size_t objsize,  vm_object_t *objp, int prot)
551259412Sluigi{
552259412Sluigi	int error;
553259412Sluigi	struct netmap_vm_handle_t *vmh;
554259412Sluigi	struct netmap_priv_d *priv;
555259412Sluigi	vm_object_t obj;
556259412Sluigi
557261909Sluigi	if (netmap_verbose)
558261909Sluigi		D("cdev %p foff %jd size %jd objp %p prot %d", cdev,
559261909Sluigi		    (intmax_t )*foff, (intmax_t )objsize, objp, prot);
560259412Sluigi
561259412Sluigi	vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF,
562259412Sluigi			      M_NOWAIT | M_ZERO);
563259412Sluigi	if (vmh == NULL)
564259412Sluigi		return ENOMEM;
565259412Sluigi	vmh->dev = cdev;
566259412Sluigi
567259412Sluigi	NMG_LOCK();
568259412Sluigi	error = devfs_get_cdevpriv((void**)&priv);
569259412Sluigi	if (error)
570259412Sluigi		goto err_unlock;
571259412Sluigi	vmh->priv = priv;
572259412Sluigi	priv->np_refcount++;
573259412Sluigi	NMG_UNLOCK();
574259412Sluigi
575259412Sluigi	error = netmap_get_memory(priv);
576259412Sluigi	if (error)
577259412Sluigi		goto err_deref;
578259412Sluigi
579259412Sluigi	obj = cdev_pager_allocate(vmh, OBJT_DEVICE,
580259412Sluigi		&netmap_cdev_pager_ops, objsize, prot,
581259412Sluigi		*foff, NULL);
582259412Sluigi	if (obj == NULL) {
583259412Sluigi		D("cdev_pager_allocate failed");
584259412Sluigi		error = EINVAL;
585259412Sluigi		goto err_deref;
586259412Sluigi	}
587259412Sluigi
588259412Sluigi	*objp = obj;
589259412Sluigi	return 0;
590259412Sluigi
591259412Sluigierr_deref:
592259412Sluigi	NMG_LOCK();
593259412Sluigi	priv->np_refcount--;
594259412Sluigierr_unlock:
595259412Sluigi	NMG_UNLOCK();
596259412Sluigi// err:
597259412Sluigi	free(vmh, M_DEVBUF);
598259412Sluigi	return error;
599259412Sluigi}
600259412Sluigi
601259412Sluigi
602259412Sluigi// XXX can we remove this ?
603259412Sluigistatic int
604259412Sluiginetmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
605259412Sluigi{
606259412Sluigi	if (netmap_verbose)
607259412Sluigi		D("dev %p fflag 0x%x devtype %d td %p",
608259412Sluigi			dev, fflag, devtype, td);
609259412Sluigi	return 0;
610259412Sluigi}
611259412Sluigi
612259412Sluigi
613259412Sluigistatic int
614259412Sluiginetmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
615259412Sluigi{
616259412Sluigi	struct netmap_priv_d *priv;
617259412Sluigi	int error;
618259412Sluigi
619259412Sluigi	(void)dev;
620259412Sluigi	(void)oflags;
621259412Sluigi	(void)devtype;
622259412Sluigi	(void)td;
623259412Sluigi
624259412Sluigi	// XXX wait or nowait ?
625259412Sluigi	priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
626259412Sluigi			      M_NOWAIT | M_ZERO);
627259412Sluigi	if (priv == NULL)
628259412Sluigi		return ENOMEM;
629259412Sluigi
630259412Sluigi	error = devfs_set_cdevpriv(priv, netmap_dtor);
631259412Sluigi	if (error)
632259412Sluigi	        return error;
633259412Sluigi
634259412Sluigi	priv->np_refcount = 1;
635259412Sluigi
636259412Sluigi	return 0;
637259412Sluigi}
638259412Sluigi
639261909Sluigi/******************** kqueue support ****************/
640259412Sluigi
641261909Sluigi/*
642261909Sluigi * The OS_selwakeup also needs to issue a KNOTE_UNLOCKED.
643261909Sluigi * We use a non-zero argument to distinguish the call from the one
644261909Sluigi * in kevent_scan() which instead also needs to run netmap_poll().
645261909Sluigi * The knote uses a global mutex for the time being. We might
646261909Sluigi * try to reuse the one in the si, but it is not allocated
647261909Sluigi * permanently so it might be a bit tricky.
648261909Sluigi *
649261909Sluigi * The *kqfilter function registers one or another f_event
650261909Sluigi * depending on read or write mode.
651261909Sluigi * In the call to f_event() td_fpop is NULL so any child function
652261909Sluigi * calling devfs_get_cdevpriv() would fail - and we need it in
653261909Sluigi * netmap_poll(). As a workaround we store priv into kn->kn_hook
654261909Sluigi * and pass it as first argument to netmap_poll(), which then
655261909Sluigi * uses the failure to tell that we are called from f_event()
656261909Sluigi * and do not need the selrecord().
657261909Sluigi */
658261909Sluigi
659261909Sluigi
660261909Sluigivoid
661274459Sluigifreebsd_selwakeup(struct nm_selinfo *si, int pri)
662261909Sluigi{
663261909Sluigi	if (netmap_verbose)
664274459Sluigi		D("on knote %p", &si->si.si_note);
665274459Sluigi	selwakeuppri(&si->si, pri);
666261909Sluigi	/* use a non-zero hint to tell the notification from the
667261909Sluigi	 * call done in kqueue_scan() which uses 0
668261909Sluigi	 */
669274459Sluigi	KNOTE_UNLOCKED(&si->si.si_note, 0x100 /* notification */);
670261909Sluigi}
671261909Sluigi
672261909Sluigistatic void
673261909Sluiginetmap_knrdetach(struct knote *kn)
674261909Sluigi{
675261909Sluigi	struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
676274459Sluigi	struct selinfo *si = &priv->np_rxsi->si;
677261909Sluigi
678261909Sluigi	D("remove selinfo %p", si);
679261909Sluigi	knlist_remove(&si->si_note, kn, 0);
680261909Sluigi}
681261909Sluigi
682261909Sluigistatic void
683261909Sluiginetmap_knwdetach(struct knote *kn)
684261909Sluigi{
685261909Sluigi	struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
686274459Sluigi	struct selinfo *si = &priv->np_txsi->si;
687261909Sluigi
688261909Sluigi	D("remove selinfo %p", si);
689261909Sluigi	knlist_remove(&si->si_note, kn, 0);
690261909Sluigi}
691261909Sluigi
692261909Sluigi/*
693261909Sluigi * callback from notifies (generated externally) and our
694261909Sluigi * calls to kevent(). The former we just return 1 (ready)
695261909Sluigi * since we do not know better.
696261909Sluigi * In the latter we call netmap_poll and return 0/1 accordingly.
697261909Sluigi */
698261909Sluigistatic int
699261909Sluiginetmap_knrw(struct knote *kn, long hint, int events)
700261909Sluigi{
701261909Sluigi	struct netmap_priv_d *priv;
702261909Sluigi	int revents;
703261909Sluigi
704261909Sluigi	if (hint != 0) {
705261909Sluigi		ND(5, "call from notify");
706261909Sluigi		return 1; /* assume we are ready */
707261909Sluigi	}
708261909Sluigi	priv = kn->kn_hook;
709261909Sluigi	/* the notification may come from an external thread,
710261909Sluigi	 * in which case we do not want to run the netmap_poll
711261909Sluigi	 * This should be filtered above, but check just in case.
712261909Sluigi	 */
713261909Sluigi	if (curthread != priv->np_td) { /* should not happen */
714261909Sluigi		RD(5, "curthread changed %p %p", curthread, priv->np_td);
715261909Sluigi		return 1;
716261909Sluigi	} else {
717261909Sluigi		revents = netmap_poll((void *)priv, events, curthread);
718261909Sluigi		return (events & revents) ? 1 : 0;
719261909Sluigi	}
720261909Sluigi}
721261909Sluigi
722261909Sluigistatic int
723261909Sluiginetmap_knread(struct knote *kn, long hint)
724261909Sluigi{
725261909Sluigi	return netmap_knrw(kn, hint, POLLIN);
726261909Sluigi}
727261909Sluigi
728261909Sluigistatic int
729261909Sluiginetmap_knwrite(struct knote *kn, long hint)
730261909Sluigi{
731261909Sluigi	return netmap_knrw(kn, hint, POLLOUT);
732261909Sluigi}
733261909Sluigi
734261909Sluigistatic struct filterops netmap_rfiltops = {
735261909Sluigi	.f_isfd = 1,
736261909Sluigi	.f_detach = netmap_knrdetach,
737261909Sluigi	.f_event = netmap_knread,
738261909Sluigi};
739261909Sluigi
740261909Sluigistatic struct filterops netmap_wfiltops = {
741261909Sluigi	.f_isfd = 1,
742261909Sluigi	.f_detach = netmap_knwdetach,
743261909Sluigi	.f_event = netmap_knwrite,
744261909Sluigi};
745261909Sluigi
746261909Sluigi
747261909Sluigi/*
748261909Sluigi * This is called when a thread invokes kevent() to record
749261909Sluigi * a change in the configuration of the kqueue().
750261909Sluigi * The 'priv' should be the same as in the netmap device.
751261909Sluigi */
752261909Sluigistatic int
753261909Sluiginetmap_kqfilter(struct cdev *dev, struct knote *kn)
754261909Sluigi{
755261909Sluigi	struct netmap_priv_d *priv;
756261909Sluigi	int error;
757261909Sluigi	struct netmap_adapter *na;
758274459Sluigi	struct nm_selinfo *si;
759261909Sluigi	int ev = kn->kn_filter;
760261909Sluigi
761261909Sluigi	if (ev != EVFILT_READ && ev != EVFILT_WRITE) {
762261909Sluigi		D("bad filter request %d", ev);
763261909Sluigi		return 1;
764261909Sluigi	}
765261909Sluigi	error = devfs_get_cdevpriv((void**)&priv);
766261909Sluigi	if (error) {
767261909Sluigi		D("device not yet setup");
768261909Sluigi		return 1;
769261909Sluigi	}
770261909Sluigi	na = priv->np_na;
771261909Sluigi	if (na == NULL) {
772261909Sluigi		D("no netmap adapter for this file descriptor");
773261909Sluigi		return 1;
774261909Sluigi	}
775261909Sluigi	/* the si is indicated in the priv */
776261909Sluigi	si = (ev == EVFILT_WRITE) ? priv->np_txsi : priv->np_rxsi;
777261909Sluigi	// XXX lock(priv) ?
778261909Sluigi	kn->kn_fop = (ev == EVFILT_WRITE) ?
779261909Sluigi		&netmap_wfiltops : &netmap_rfiltops;
780261909Sluigi	kn->kn_hook = priv;
781274459Sluigi	knlist_add(&si->si.si_note, kn, 1);
782261909Sluigi	// XXX unlock(priv)
783261909Sluigi	ND("register %p %s td %p priv %p kn %p np_nifp %p kn_fp/fpop %s",
784261909Sluigi		na, na->ifp->if_xname, curthread, priv, kn,
785261909Sluigi		priv->np_nifp,
786261909Sluigi		kn->kn_fp == curthread->td_fpop ? "match" : "MISMATCH");
787261909Sluigi	return 0;
788261909Sluigi}
789261909Sluigi
790259412Sluigistruct cdevsw netmap_cdevsw = {
791259412Sluigi	.d_version = D_VERSION,
792259412Sluigi	.d_name = "netmap",
793259412Sluigi	.d_open = netmap_open,
794259412Sluigi	.d_mmap_single = netmap_mmap_single,
795259412Sluigi	.d_ioctl = netmap_ioctl,
796259412Sluigi	.d_poll = netmap_poll,
797261909Sluigi	.d_kqfilter = netmap_kqfilter,
798259412Sluigi	.d_close = netmap_close,
799259412Sluigi};
800261909Sluigi/*--- end of kqueue support ----*/
801259412Sluigi
802259412Sluigi/*
803259412Sluigi * Kernel entry point.
804259412Sluigi *
805259412Sluigi * Initialize/finalize the module and return.
806259412Sluigi *
807259412Sluigi * Return 0 on success, errno on failure.
808259412Sluigi */
809259412Sluigistatic int
810259412Sluiginetmap_loader(__unused struct module *module, int event, __unused void *arg)
811259412Sluigi{
812259412Sluigi	int error = 0;
813259412Sluigi
814259412Sluigi	switch (event) {
815259412Sluigi	case MOD_LOAD:
816259412Sluigi		error = netmap_init();
817259412Sluigi		break;
818259412Sluigi
819259412Sluigi	case MOD_UNLOAD:
820259412Sluigi		netmap_fini();
821259412Sluigi		break;
822259412Sluigi
823259412Sluigi	default:
824259412Sluigi		error = EOPNOTSUPP;
825259412Sluigi		break;
826259412Sluigi	}
827259412Sluigi
828259412Sluigi	return (error);
829259412Sluigi}
830259412Sluigi
831259412Sluigi
832259412SluigiDEV_MODULE(netmap, netmap_loader, NULL);
833