netmap_freebsd.c revision 275358
1259412Sluigi/* 2260368Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 3259412Sluigi * 4259412Sluigi * Redistribution and use in source and binary forms, with or without 5259412Sluigi * modification, are permitted provided that the following conditions 6259412Sluigi * are met: 7259412Sluigi * 1. Redistributions of source code must retain the above copyright 8259412Sluigi * notice, this list of conditions and the following disclaimer. 9259412Sluigi * 2. Redistributions in binary form must reproduce the above copyright 10259412Sluigi * notice, this list of conditions and the following disclaimer in the 11259412Sluigi * documentation and/or other materials provided with the distribution. 12259412Sluigi * 13259412Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14259412Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15259412Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16259412Sluigi * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17259412Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18259412Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19259412Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20259412Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21259412Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22259412Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23259412Sluigi * SUCH DAMAGE. 24259412Sluigi */ 25259412Sluigi 26259412Sluigi/* $FreeBSD: head/sys/dev/netmap/netmap_freebsd.c 275358 2014-12-01 11:45:24Z hselasky $ */ 27259412Sluigi 28259412Sluigi#include <sys/types.h> 29259412Sluigi#include <sys/module.h> 30259412Sluigi#include <sys/errno.h> 31259412Sluigi#include <sys/param.h> /* defines used in kernel.h */ 32261909Sluigi#include <sys/poll.h> /* POLLIN, POLLOUT */ 33259412Sluigi#include <sys/kernel.h> /* types used in module initialization */ 34259412Sluigi#include <sys/conf.h> /* DEV_MODULE */ 35261909Sluigi#include <sys/endian.h> 36259412Sluigi 37259412Sluigi#include <sys/rwlock.h> 38259412Sluigi 39259412Sluigi#include <vm/vm.h> /* vtophys */ 40259412Sluigi#include <vm/pmap.h> /* vtophys */ 41259412Sluigi#include <vm/vm_param.h> 42259412Sluigi#include <vm/vm_object.h> 43259412Sluigi#include <vm/vm_page.h> 44259412Sluigi#include <vm/vm_pager.h> 45259412Sluigi#include <vm/uma.h> 46259412Sluigi 47259412Sluigi 48259412Sluigi#include <sys/malloc.h> 49259412Sluigi#include <sys/socket.h> /* sockaddrs */ 50259412Sluigi#include <sys/selinfo.h> 51259412Sluigi#include <net/if.h> 52259412Sluigi#include <net/if_var.h> 53270063Sluigi#include <net/if_types.h> /* IFT_ETHER */ 54270063Sluigi#include <net/ethernet.h> /* ether_ifdetach */ 55270063Sluigi#include <net/if_dl.h> /* LLADDR */ 56259412Sluigi#include <machine/bus.h> /* bus_dmamap_* */ 57261909Sluigi#include <netinet/in.h> /* in6_cksum_pseudo() */ 58261909Sluigi#include <machine/in_cksum.h> /* in_pseudo(), in_cksum_hdr() */ 59259412Sluigi 60259412Sluigi#include <net/netmap.h> 61259412Sluigi#include <dev/netmap/netmap_kern.h> 62259412Sluigi#include <dev/netmap/netmap_mem2.h> 63259412Sluigi 64259412Sluigi 65259412Sluigi/* ======================== FREEBSD-SPECIFIC ROUTINES ================== */ 66259412Sluigi 67267180Sluigirawsum_t 68267180Sluiginm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum) 69261909Sluigi{ 70261909Sluigi /* TODO XXX please use the FreeBSD implementation for this. */ 71261909Sluigi uint16_t *words = (uint16_t *)data; 72261909Sluigi int nw = len / 2; 73261909Sluigi int i; 74261909Sluigi 75261909Sluigi for (i = 0; i < nw; i++) 76261909Sluigi cur_sum += be16toh(words[i]); 77261909Sluigi 78261909Sluigi if (len & 1) 79261909Sluigi cur_sum += (data[len-1] << 8); 80261909Sluigi 81261909Sluigi return cur_sum; 82261909Sluigi} 83261909Sluigi 84261909Sluigi/* Fold a raw checksum: 'cur_sum' is in host byte order, while the 85261909Sluigi * return value is in network byte order. 86261909Sluigi */ 87267180Sluigiuint16_t 88267180Sluiginm_csum_fold(rawsum_t cur_sum) 89261909Sluigi{ 90261909Sluigi /* TODO XXX please use the FreeBSD implementation for this. */ 91261909Sluigi while (cur_sum >> 16) 92261909Sluigi cur_sum = (cur_sum & 0xFFFF) + (cur_sum >> 16); 93261909Sluigi 94261909Sluigi return htobe16((~cur_sum) & 0xFFFF); 95261909Sluigi} 96261909Sluigi 97270063Sluigiuint16_t nm_csum_ipv4(struct nm_iphdr *iph) 98261909Sluigi{ 99261909Sluigi#if 0 100261909Sluigi return in_cksum_hdr((void *)iph); 101261909Sluigi#else 102261909Sluigi return nm_csum_fold(nm_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0)); 103261909Sluigi#endif 104261909Sluigi} 105261909Sluigi 106267180Sluigivoid 107267180Sluiginm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data, 108261909Sluigi size_t datalen, uint16_t *check) 109261909Sluigi{ 110262238Sluigi#ifdef INET 111261909Sluigi uint16_t pseudolen = datalen + iph->protocol; 112261909Sluigi 113261909Sluigi /* Compute and insert the pseudo-header cheksum. */ 114261909Sluigi *check = in_pseudo(iph->saddr, iph->daddr, 115261909Sluigi htobe16(pseudolen)); 116261909Sluigi /* Compute the checksum on TCP/UDP header + payload 117261909Sluigi * (includes the pseudo-header). 118261909Sluigi */ 119261909Sluigi *check = nm_csum_fold(nm_csum_raw(data, datalen, 0)); 120262238Sluigi#else 121262238Sluigi static int notsupported = 0; 122262238Sluigi if (!notsupported) { 123262238Sluigi notsupported = 1; 124262238Sluigi D("inet4 segmentation not supported"); 125262238Sluigi } 126262238Sluigi#endif 127261909Sluigi} 128261909Sluigi 129267180Sluigivoid 130267180Sluiginm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data, 131261909Sluigi size_t datalen, uint16_t *check) 132261909Sluigi{ 133261909Sluigi#ifdef INET6 134261909Sluigi *check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0); 135261909Sluigi *check = nm_csum_fold(nm_csum_raw(data, datalen, 0)); 136261909Sluigi#else 137261909Sluigi static int notsupported = 0; 138261909Sluigi if (!notsupported) { 139261909Sluigi notsupported = 1; 140261909Sluigi D("inet6 segmentation not supported"); 141261909Sluigi } 142261909Sluigi#endif 143261909Sluigi} 144261909Sluigi 145261909Sluigi 146259412Sluigi/* 147259412Sluigi * Intercept the rx routine in the standard device driver. 148259412Sluigi * Second argument is non-zero to intercept, 0 to restore 149259412Sluigi */ 150259412Sluigiint 151259412Sluiginetmap_catch_rx(struct netmap_adapter *na, int intercept) 152259412Sluigi{ 153270063Sluigi struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; 154259412Sluigi struct ifnet *ifp = na->ifp; 155259412Sluigi 156259412Sluigi if (intercept) { 157259412Sluigi if (gna->save_if_input) { 158259412Sluigi D("cannot intercept again"); 159259412Sluigi return EINVAL; /* already set */ 160259412Sluigi } 161259412Sluigi gna->save_if_input = ifp->if_input; 162259412Sluigi ifp->if_input = generic_rx_handler; 163259412Sluigi } else { 164259412Sluigi if (!gna->save_if_input){ 165259412Sluigi D("cannot restore"); 166259412Sluigi return EINVAL; /* not saved */ 167259412Sluigi } 168259412Sluigi ifp->if_input = gna->save_if_input; 169259412Sluigi gna->save_if_input = NULL; 170259412Sluigi } 171259412Sluigi 172259412Sluigi return 0; 173259412Sluigi} 174259412Sluigi 175260368Sluigi 176259412Sluigi/* 177259412Sluigi * Intercept the packet steering routine in the tx path, 178259412Sluigi * so that we can decide which queue is used for an mbuf. 179259412Sluigi * Second argument is non-zero to intercept, 0 to restore. 180261909Sluigi * On freebsd we just intercept if_transmit. 181259412Sluigi */ 182259412Sluigivoid 183260368Sluiginetmap_catch_tx(struct netmap_generic_adapter *gna, int enable) 184259412Sluigi{ 185260368Sluigi struct netmap_adapter *na = &gna->up.up; 186260368Sluigi struct ifnet *ifp = na->ifp; 187260368Sluigi 188259412Sluigi if (enable) { 189260368Sluigi na->if_transmit = ifp->if_transmit; 190260368Sluigi ifp->if_transmit = netmap_transmit; 191259412Sluigi } else { 192260368Sluigi ifp->if_transmit = na->if_transmit; 193259412Sluigi } 194259412Sluigi} 195259412Sluigi 196260368Sluigi 197261909Sluigi/* 198261909Sluigi * Transmit routine used by generic_netmap_txsync(). Returns 0 on success 199259412Sluigi * and non-zero on error (which may be packet drops or other errors). 200259412Sluigi * addr and len identify the netmap buffer, m is the (preallocated) 201259412Sluigi * mbuf to use for transmissions. 202259412Sluigi * 203259412Sluigi * We should add a reference to the mbuf so the m_freem() at the end 204259412Sluigi * of the transmission does not consume resources. 205259412Sluigi * 206259412Sluigi * On FreeBSD, and on multiqueue cards, we can force the queue using 207275358Shselasky * if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 208259412Sluigi * i = m->m_pkthdr.flowid % adapter->num_queues; 209259412Sluigi * else 210259412Sluigi * i = curcpu % adapter->num_queues; 211259412Sluigi * 212259412Sluigi */ 213259412Sluigiint 214259412Sluigigeneric_xmit_frame(struct ifnet *ifp, struct mbuf *m, 215259412Sluigi void *addr, u_int len, u_int ring_nr) 216259412Sluigi{ 217259412Sluigi int ret; 218259412Sluigi 219267180Sluigi /* 220267180Sluigi * The mbuf should be a cluster from our special pool, 221267180Sluigi * so we do not need to do an m_copyback but just copy 222267180Sluigi * (and eventually, just reference the netmap buffer) 223267180Sluigi */ 224259412Sluigi 225270063Sluigi if (GET_MBUF_REFCNT(m) != 1) { 226267180Sluigi D("invalid refcnt %d for %p", 227270063Sluigi GET_MBUF_REFCNT(m), m); 228267180Sluigi panic("in generic_xmit_frame"); 229267180Sluigi } 230267180Sluigi // XXX the ext_size check is unnecessary if we link the netmap buf 231267180Sluigi if (m->m_ext.ext_size < len) { 232267180Sluigi RD(5, "size %d < len %d", m->m_ext.ext_size, len); 233267180Sluigi len = m->m_ext.ext_size; 234267180Sluigi } 235267180Sluigi if (0) { /* XXX seems to have negligible benefits */ 236267180Sluigi m->m_ext.ext_buf = m->m_data = addr; 237267180Sluigi } else { 238267180Sluigi bcopy(addr, m->m_data, len); 239267180Sluigi } 240267180Sluigi m->m_len = m->m_pkthdr.len = len; 241267180Sluigi // inc refcount. All ours, we could skip the atomic 242270063Sluigi atomic_fetchadd_int(PNT_MBUF_REFCNT(m), 1); 243275358Shselasky M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 244259412Sluigi m->m_pkthdr.flowid = ring_nr; 245259412Sluigi m->m_pkthdr.rcvif = ifp; /* used for tx notification */ 246260368Sluigi ret = NA(ifp)->if_transmit(ifp, m); 247259412Sluigi return ret; 248259412Sluigi} 249259412Sluigi 250260368Sluigi 251267170Sluigi#if __FreeBSD_version >= 1100005 252267170Sluigistruct netmap_adapter * 253267170Sluiginetmap_getna(if_t ifp) 254267170Sluigi{ 255267170Sluigi return (NA((struct ifnet *)ifp)); 256267170Sluigi} 257267170Sluigi#endif /* __FreeBSD_version >= 1100005 */ 258267170Sluigi 259259412Sluigi/* 260259412Sluigi * The following two functions are empty until we have a generic 261259412Sluigi * way to extract the info from the ifp 262259412Sluigi */ 263259412Sluigiint 264259412Sluigigeneric_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx) 265259412Sluigi{ 266267180Sluigi D("called, in tx %d rx %d", *tx, *rx); 267259412Sluigi return 0; 268259412Sluigi} 269259412Sluigi 270260368Sluigi 271259412Sluigivoid 272259412Sluigigeneric_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq) 273259412Sluigi{ 274267180Sluigi D("called, in txq %d rxq %d", *txq, *rxq); 275261909Sluigi *txq = netmap_generic_rings; 276261909Sluigi *rxq = netmap_generic_rings; 277259412Sluigi} 278259412Sluigi 279260368Sluigi 280267180Sluigivoid 281270063Sluiginetmap_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na) 282259412Sluigi{ 283259412Sluigi ND("called"); 284261909Sluigi mit->mit_pending = 0; 285270063Sluigi mit->mit_ring_idx = idx; 286261909Sluigi mit->mit_na = na; 287259412Sluigi} 288259412Sluigi 289259412Sluigi 290267180Sluigivoid 291267180Sluiginetmap_mitigation_start(struct nm_generic_mit *mit) 292259412Sluigi{ 293259412Sluigi ND("called"); 294259412Sluigi} 295259412Sluigi 296260368Sluigi 297267180Sluigivoid 298267180Sluiginetmap_mitigation_restart(struct nm_generic_mit *mit) 299259412Sluigi{ 300259412Sluigi ND("called"); 301259412Sluigi} 302259412Sluigi 303260368Sluigi 304267180Sluigiint 305267180Sluiginetmap_mitigation_active(struct nm_generic_mit *mit) 306259412Sluigi{ 307259412Sluigi ND("called"); 308259412Sluigi return 0; 309259412Sluigi} 310259412Sluigi 311260368Sluigi 312267180Sluigivoid 313267180Sluiginetmap_mitigation_cleanup(struct nm_generic_mit *mit) 314259412Sluigi{ 315259412Sluigi ND("called"); 316259412Sluigi} 317259412Sluigi 318270063Sluigistatic int 319270063Sluiginm_vi_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr) 320270063Sluigi{ 321270063Sluigi return EINVAL; 322270063Sluigi} 323260368Sluigi 324270063Sluigistatic void 325270063Sluiginm_vi_start(struct ifnet *ifp) 326270063Sluigi{ 327270063Sluigi panic("nm_vi_start() must not be called"); 328270063Sluigi} 329270063Sluigi 330259412Sluigi/* 331270063Sluigi * Index manager of persistent virtual interfaces. 332270063Sluigi * It is used to decide the lowest byte of the MAC address. 333270063Sluigi * We use the same algorithm with management of bridge port index. 334270063Sluigi */ 335270063Sluigi#define NM_VI_MAX 255 336270063Sluigistatic struct { 337270063Sluigi uint8_t index[NM_VI_MAX]; /* XXX just for a reasonable number */ 338270063Sluigi uint8_t active; 339270063Sluigi struct mtx lock; 340270063Sluigi} nm_vi_indices; 341270063Sluigi 342270063Sluigivoid 343270063Sluiginm_vi_init_index(void) 344270063Sluigi{ 345270063Sluigi int i; 346270063Sluigi for (i = 0; i < NM_VI_MAX; i++) 347270063Sluigi nm_vi_indices.index[i] = i; 348270063Sluigi nm_vi_indices.active = 0; 349270063Sluigi mtx_init(&nm_vi_indices.lock, "nm_vi_indices_lock", NULL, MTX_DEF); 350270063Sluigi} 351270063Sluigi 352270063Sluigi/* return -1 if no index available */ 353270063Sluigistatic int 354270063Sluiginm_vi_get_index(void) 355270063Sluigi{ 356270063Sluigi int ret; 357270063Sluigi 358270063Sluigi mtx_lock(&nm_vi_indices.lock); 359270063Sluigi ret = nm_vi_indices.active == NM_VI_MAX ? -1 : 360270063Sluigi nm_vi_indices.index[nm_vi_indices.active++]; 361270063Sluigi mtx_unlock(&nm_vi_indices.lock); 362270063Sluigi return ret; 363270063Sluigi} 364270063Sluigi 365270063Sluigistatic void 366270063Sluiginm_vi_free_index(uint8_t val) 367270063Sluigi{ 368270063Sluigi int i, lim; 369270063Sluigi 370270063Sluigi mtx_lock(&nm_vi_indices.lock); 371270063Sluigi lim = nm_vi_indices.active; 372270063Sluigi for (i = 0; i < lim; i++) { 373270063Sluigi if (nm_vi_indices.index[i] == val) { 374270063Sluigi /* swap index[lim-1] and j */ 375270063Sluigi int tmp = nm_vi_indices.index[lim-1]; 376270063Sluigi nm_vi_indices.index[lim-1] = val; 377270063Sluigi nm_vi_indices.index[i] = tmp; 378270063Sluigi nm_vi_indices.active--; 379270063Sluigi break; 380270063Sluigi } 381270063Sluigi } 382270063Sluigi if (lim == nm_vi_indices.active) 383270063Sluigi D("funny, index %u didn't found", val); 384270063Sluigi mtx_unlock(&nm_vi_indices.lock); 385270063Sluigi} 386270063Sluigi#undef NM_VI_MAX 387270063Sluigi 388270063Sluigi/* 389270063Sluigi * Implementation of a netmap-capable virtual interface that 390270063Sluigi * registered to the system. 391270063Sluigi * It is based on if_tap.c and ip_fw_log.c in FreeBSD 9. 392270063Sluigi * 393270063Sluigi * Note: Linux sets refcount to 0 on allocation of net_device, 394270063Sluigi * then increments it on registration to the system. 395270063Sluigi * FreeBSD sets refcount to 1 on if_alloc(), and does not 396270063Sluigi * increment this refcount on if_attach(). 397270063Sluigi */ 398270063Sluigiint 399270063Sluiginm_vi_persist(const char *name, struct ifnet **ret) 400270063Sluigi{ 401270063Sluigi struct ifnet *ifp; 402270063Sluigi u_short macaddr_hi; 403270063Sluigi uint32_t macaddr_mid; 404270063Sluigi u_char eaddr[6]; 405270063Sluigi int unit = nm_vi_get_index(); /* just to decide MAC address */ 406270063Sluigi 407270063Sluigi if (unit < 0) 408270063Sluigi return EBUSY; 409270063Sluigi /* 410270063Sluigi * We use the same MAC address generation method with tap 411270063Sluigi * except for the highest octet is 00:be instead of 00:bd 412270063Sluigi */ 413270063Sluigi macaddr_hi = htons(0x00be); /* XXX tap + 1 */ 414270063Sluigi macaddr_mid = (uint32_t) ticks; 415270063Sluigi bcopy(&macaddr_hi, eaddr, sizeof(short)); 416270063Sluigi bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t)); 417270063Sluigi eaddr[5] = (uint8_t)unit; 418270063Sluigi 419270063Sluigi ifp = if_alloc(IFT_ETHER); 420270063Sluigi if (ifp == NULL) { 421270063Sluigi D("if_alloc failed"); 422270063Sluigi return ENOMEM; 423270063Sluigi } 424270063Sluigi if_initname(ifp, name, IF_DUNIT_NONE); 425270063Sluigi ifp->if_mtu = 65536; 426270063Sluigi ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST; 427270063Sluigi ifp->if_init = (void *)nm_vi_dummy; 428270063Sluigi ifp->if_ioctl = nm_vi_dummy; 429270063Sluigi ifp->if_start = nm_vi_start; 430270063Sluigi ifp->if_mtu = ETHERMTU; 431270063Sluigi IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 432270063Sluigi ifp->if_capabilities |= IFCAP_LINKSTATE; 433270063Sluigi ifp->if_capenable |= IFCAP_LINKSTATE; 434270063Sluigi 435270063Sluigi ether_ifattach(ifp, eaddr); 436270063Sluigi *ret = ifp; 437270063Sluigi return 0; 438270063Sluigi} 439270063Sluigi/* unregister from the system and drop the final refcount */ 440270063Sluigivoid 441270063Sluiginm_vi_detach(struct ifnet *ifp) 442270063Sluigi{ 443270063Sluigi nm_vi_free_index(((char *)IF_LLADDR(ifp))[5]); 444270063Sluigi ether_ifdetach(ifp); 445270063Sluigi if_free(ifp); 446270063Sluigi} 447270063Sluigi 448270063Sluigi/* 449259412Sluigi * In order to track whether pages are still mapped, we hook into 450259412Sluigi * the standard cdev_pager and intercept the constructor and 451259412Sluigi * destructor. 452259412Sluigi */ 453259412Sluigi 454259412Sluigistruct netmap_vm_handle_t { 455259412Sluigi struct cdev *dev; 456259412Sluigi struct netmap_priv_d *priv; 457259412Sluigi}; 458259412Sluigi 459260368Sluigi 460259412Sluigistatic int 461259412Sluiginetmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 462259412Sluigi vm_ooffset_t foff, struct ucred *cred, u_short *color) 463259412Sluigi{ 464259412Sluigi struct netmap_vm_handle_t *vmh = handle; 465261909Sluigi 466261909Sluigi if (netmap_verbose) 467261909Sluigi D("handle %p size %jd prot %d foff %jd", 468261909Sluigi handle, (intmax_t)size, prot, (intmax_t)foff); 469274354Sluigi if (color) 470274354Sluigi *color = 0; 471259412Sluigi dev_ref(vmh->dev); 472259412Sluigi return 0; 473259412Sluigi} 474259412Sluigi 475259412Sluigi 476259412Sluigistatic void 477259412Sluiginetmap_dev_pager_dtor(void *handle) 478259412Sluigi{ 479259412Sluigi struct netmap_vm_handle_t *vmh = handle; 480259412Sluigi struct cdev *dev = vmh->dev; 481259412Sluigi struct netmap_priv_d *priv = vmh->priv; 482261909Sluigi 483261909Sluigi if (netmap_verbose) 484261909Sluigi D("handle %p", handle); 485259412Sluigi netmap_dtor(priv); 486259412Sluigi free(vmh, M_DEVBUF); 487259412Sluigi dev_rel(dev); 488259412Sluigi} 489259412Sluigi 490260368Sluigi 491259412Sluigistatic int 492259412Sluiginetmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, 493259412Sluigi int prot, vm_page_t *mres) 494259412Sluigi{ 495259412Sluigi struct netmap_vm_handle_t *vmh = object->handle; 496259412Sluigi struct netmap_priv_d *priv = vmh->priv; 497259412Sluigi vm_paddr_t paddr; 498259412Sluigi vm_page_t page; 499259412Sluigi vm_memattr_t memattr; 500259412Sluigi vm_pindex_t pidx; 501259412Sluigi 502259412Sluigi ND("object %p offset %jd prot %d mres %p", 503259412Sluigi object, (intmax_t)offset, prot, mres); 504259412Sluigi memattr = object->memattr; 505259412Sluigi pidx = OFF_TO_IDX(offset); 506259412Sluigi paddr = netmap_mem_ofstophys(priv->np_mref, offset); 507259412Sluigi if (paddr == 0) 508259412Sluigi return VM_PAGER_FAIL; 509259412Sluigi 510259412Sluigi if (((*mres)->flags & PG_FICTITIOUS) != 0) { 511259412Sluigi /* 512259412Sluigi * If the passed in result page is a fake page, update it with 513259412Sluigi * the new physical address. 514259412Sluigi */ 515259412Sluigi page = *mres; 516259412Sluigi vm_page_updatefake(page, paddr, memattr); 517259412Sluigi } else { 518259412Sluigi /* 519259412Sluigi * Replace the passed in reqpage page with our own fake page and 520259412Sluigi * free up the all of the original pages. 521259412Sluigi */ 522259412Sluigi#ifndef VM_OBJECT_WUNLOCK /* FreeBSD < 10.x */ 523259412Sluigi#define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK 524259412Sluigi#define VM_OBJECT_WLOCK VM_OBJECT_LOCK 525259412Sluigi#endif /* VM_OBJECT_WUNLOCK */ 526259412Sluigi 527259412Sluigi VM_OBJECT_WUNLOCK(object); 528259412Sluigi page = vm_page_getfake(paddr, memattr); 529259412Sluigi VM_OBJECT_WLOCK(object); 530259412Sluigi vm_page_lock(*mres); 531259412Sluigi vm_page_free(*mres); 532259412Sluigi vm_page_unlock(*mres); 533259412Sluigi *mres = page; 534259412Sluigi vm_page_insert(page, object, pidx); 535259412Sluigi } 536259412Sluigi page->valid = VM_PAGE_BITS_ALL; 537259412Sluigi return (VM_PAGER_OK); 538259412Sluigi} 539259412Sluigi 540259412Sluigi 541259412Sluigistatic struct cdev_pager_ops netmap_cdev_pager_ops = { 542259412Sluigi .cdev_pg_ctor = netmap_dev_pager_ctor, 543259412Sluigi .cdev_pg_dtor = netmap_dev_pager_dtor, 544259412Sluigi .cdev_pg_fault = netmap_dev_pager_fault, 545259412Sluigi}; 546259412Sluigi 547259412Sluigi 548259412Sluigistatic int 549259412Sluiginetmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff, 550259412Sluigi vm_size_t objsize, vm_object_t *objp, int prot) 551259412Sluigi{ 552259412Sluigi int error; 553259412Sluigi struct netmap_vm_handle_t *vmh; 554259412Sluigi struct netmap_priv_d *priv; 555259412Sluigi vm_object_t obj; 556259412Sluigi 557261909Sluigi if (netmap_verbose) 558261909Sluigi D("cdev %p foff %jd size %jd objp %p prot %d", cdev, 559261909Sluigi (intmax_t )*foff, (intmax_t )objsize, objp, prot); 560259412Sluigi 561259412Sluigi vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF, 562259412Sluigi M_NOWAIT | M_ZERO); 563259412Sluigi if (vmh == NULL) 564259412Sluigi return ENOMEM; 565259412Sluigi vmh->dev = cdev; 566259412Sluigi 567259412Sluigi NMG_LOCK(); 568259412Sluigi error = devfs_get_cdevpriv((void**)&priv); 569259412Sluigi if (error) 570259412Sluigi goto err_unlock; 571259412Sluigi vmh->priv = priv; 572259412Sluigi priv->np_refcount++; 573259412Sluigi NMG_UNLOCK(); 574259412Sluigi 575259412Sluigi error = netmap_get_memory(priv); 576259412Sluigi if (error) 577259412Sluigi goto err_deref; 578259412Sluigi 579259412Sluigi obj = cdev_pager_allocate(vmh, OBJT_DEVICE, 580259412Sluigi &netmap_cdev_pager_ops, objsize, prot, 581259412Sluigi *foff, NULL); 582259412Sluigi if (obj == NULL) { 583259412Sluigi D("cdev_pager_allocate failed"); 584259412Sluigi error = EINVAL; 585259412Sluigi goto err_deref; 586259412Sluigi } 587259412Sluigi 588259412Sluigi *objp = obj; 589259412Sluigi return 0; 590259412Sluigi 591259412Sluigierr_deref: 592259412Sluigi NMG_LOCK(); 593259412Sluigi priv->np_refcount--; 594259412Sluigierr_unlock: 595259412Sluigi NMG_UNLOCK(); 596259412Sluigi// err: 597259412Sluigi free(vmh, M_DEVBUF); 598259412Sluigi return error; 599259412Sluigi} 600259412Sluigi 601259412Sluigi 602259412Sluigi// XXX can we remove this ? 603259412Sluigistatic int 604259412Sluiginetmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 605259412Sluigi{ 606259412Sluigi if (netmap_verbose) 607259412Sluigi D("dev %p fflag 0x%x devtype %d td %p", 608259412Sluigi dev, fflag, devtype, td); 609259412Sluigi return 0; 610259412Sluigi} 611259412Sluigi 612259412Sluigi 613259412Sluigistatic int 614259412Sluiginetmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 615259412Sluigi{ 616259412Sluigi struct netmap_priv_d *priv; 617259412Sluigi int error; 618259412Sluigi 619259412Sluigi (void)dev; 620259412Sluigi (void)oflags; 621259412Sluigi (void)devtype; 622259412Sluigi (void)td; 623259412Sluigi 624259412Sluigi // XXX wait or nowait ? 625259412Sluigi priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF, 626259412Sluigi M_NOWAIT | M_ZERO); 627259412Sluigi if (priv == NULL) 628259412Sluigi return ENOMEM; 629259412Sluigi 630259412Sluigi error = devfs_set_cdevpriv(priv, netmap_dtor); 631259412Sluigi if (error) 632259412Sluigi return error; 633259412Sluigi 634259412Sluigi priv->np_refcount = 1; 635259412Sluigi 636259412Sluigi return 0; 637259412Sluigi} 638259412Sluigi 639261909Sluigi/******************** kqueue support ****************/ 640259412Sluigi 641261909Sluigi/* 642261909Sluigi * The OS_selwakeup also needs to issue a KNOTE_UNLOCKED. 643261909Sluigi * We use a non-zero argument to distinguish the call from the one 644261909Sluigi * in kevent_scan() which instead also needs to run netmap_poll(). 645261909Sluigi * The knote uses a global mutex for the time being. We might 646261909Sluigi * try to reuse the one in the si, but it is not allocated 647261909Sluigi * permanently so it might be a bit tricky. 648261909Sluigi * 649261909Sluigi * The *kqfilter function registers one or another f_event 650261909Sluigi * depending on read or write mode. 651261909Sluigi * In the call to f_event() td_fpop is NULL so any child function 652261909Sluigi * calling devfs_get_cdevpriv() would fail - and we need it in 653261909Sluigi * netmap_poll(). As a workaround we store priv into kn->kn_hook 654261909Sluigi * and pass it as first argument to netmap_poll(), which then 655261909Sluigi * uses the failure to tell that we are called from f_event() 656261909Sluigi * and do not need the selrecord(). 657261909Sluigi */ 658261909Sluigi 659261909Sluigi 660261909Sluigivoid 661274459Sluigifreebsd_selwakeup(struct nm_selinfo *si, int pri) 662261909Sluigi{ 663261909Sluigi if (netmap_verbose) 664274459Sluigi D("on knote %p", &si->si.si_note); 665274459Sluigi selwakeuppri(&si->si, pri); 666261909Sluigi /* use a non-zero hint to tell the notification from the 667261909Sluigi * call done in kqueue_scan() which uses 0 668261909Sluigi */ 669274459Sluigi KNOTE_UNLOCKED(&si->si.si_note, 0x100 /* notification */); 670261909Sluigi} 671261909Sluigi 672261909Sluigistatic void 673261909Sluiginetmap_knrdetach(struct knote *kn) 674261909Sluigi{ 675261909Sluigi struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook; 676274459Sluigi struct selinfo *si = &priv->np_rxsi->si; 677261909Sluigi 678261909Sluigi D("remove selinfo %p", si); 679261909Sluigi knlist_remove(&si->si_note, kn, 0); 680261909Sluigi} 681261909Sluigi 682261909Sluigistatic void 683261909Sluiginetmap_knwdetach(struct knote *kn) 684261909Sluigi{ 685261909Sluigi struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook; 686274459Sluigi struct selinfo *si = &priv->np_txsi->si; 687261909Sluigi 688261909Sluigi D("remove selinfo %p", si); 689261909Sluigi knlist_remove(&si->si_note, kn, 0); 690261909Sluigi} 691261909Sluigi 692261909Sluigi/* 693261909Sluigi * callback from notifies (generated externally) and our 694261909Sluigi * calls to kevent(). The former we just return 1 (ready) 695261909Sluigi * since we do not know better. 696261909Sluigi * In the latter we call netmap_poll and return 0/1 accordingly. 697261909Sluigi */ 698261909Sluigistatic int 699261909Sluiginetmap_knrw(struct knote *kn, long hint, int events) 700261909Sluigi{ 701261909Sluigi struct netmap_priv_d *priv; 702261909Sluigi int revents; 703261909Sluigi 704261909Sluigi if (hint != 0) { 705261909Sluigi ND(5, "call from notify"); 706261909Sluigi return 1; /* assume we are ready */ 707261909Sluigi } 708261909Sluigi priv = kn->kn_hook; 709261909Sluigi /* the notification may come from an external thread, 710261909Sluigi * in which case we do not want to run the netmap_poll 711261909Sluigi * This should be filtered above, but check just in case. 712261909Sluigi */ 713261909Sluigi if (curthread != priv->np_td) { /* should not happen */ 714261909Sluigi RD(5, "curthread changed %p %p", curthread, priv->np_td); 715261909Sluigi return 1; 716261909Sluigi } else { 717261909Sluigi revents = netmap_poll((void *)priv, events, curthread); 718261909Sluigi return (events & revents) ? 1 : 0; 719261909Sluigi } 720261909Sluigi} 721261909Sluigi 722261909Sluigistatic int 723261909Sluiginetmap_knread(struct knote *kn, long hint) 724261909Sluigi{ 725261909Sluigi return netmap_knrw(kn, hint, POLLIN); 726261909Sluigi} 727261909Sluigi 728261909Sluigistatic int 729261909Sluiginetmap_knwrite(struct knote *kn, long hint) 730261909Sluigi{ 731261909Sluigi return netmap_knrw(kn, hint, POLLOUT); 732261909Sluigi} 733261909Sluigi 734261909Sluigistatic struct filterops netmap_rfiltops = { 735261909Sluigi .f_isfd = 1, 736261909Sluigi .f_detach = netmap_knrdetach, 737261909Sluigi .f_event = netmap_knread, 738261909Sluigi}; 739261909Sluigi 740261909Sluigistatic struct filterops netmap_wfiltops = { 741261909Sluigi .f_isfd = 1, 742261909Sluigi .f_detach = netmap_knwdetach, 743261909Sluigi .f_event = netmap_knwrite, 744261909Sluigi}; 745261909Sluigi 746261909Sluigi 747261909Sluigi/* 748261909Sluigi * This is called when a thread invokes kevent() to record 749261909Sluigi * a change in the configuration of the kqueue(). 750261909Sluigi * The 'priv' should be the same as in the netmap device. 751261909Sluigi */ 752261909Sluigistatic int 753261909Sluiginetmap_kqfilter(struct cdev *dev, struct knote *kn) 754261909Sluigi{ 755261909Sluigi struct netmap_priv_d *priv; 756261909Sluigi int error; 757261909Sluigi struct netmap_adapter *na; 758274459Sluigi struct nm_selinfo *si; 759261909Sluigi int ev = kn->kn_filter; 760261909Sluigi 761261909Sluigi if (ev != EVFILT_READ && ev != EVFILT_WRITE) { 762261909Sluigi D("bad filter request %d", ev); 763261909Sluigi return 1; 764261909Sluigi } 765261909Sluigi error = devfs_get_cdevpriv((void**)&priv); 766261909Sluigi if (error) { 767261909Sluigi D("device not yet setup"); 768261909Sluigi return 1; 769261909Sluigi } 770261909Sluigi na = priv->np_na; 771261909Sluigi if (na == NULL) { 772261909Sluigi D("no netmap adapter for this file descriptor"); 773261909Sluigi return 1; 774261909Sluigi } 775261909Sluigi /* the si is indicated in the priv */ 776261909Sluigi si = (ev == EVFILT_WRITE) ? priv->np_txsi : priv->np_rxsi; 777261909Sluigi // XXX lock(priv) ? 778261909Sluigi kn->kn_fop = (ev == EVFILT_WRITE) ? 779261909Sluigi &netmap_wfiltops : &netmap_rfiltops; 780261909Sluigi kn->kn_hook = priv; 781274459Sluigi knlist_add(&si->si.si_note, kn, 1); 782261909Sluigi // XXX unlock(priv) 783261909Sluigi ND("register %p %s td %p priv %p kn %p np_nifp %p kn_fp/fpop %s", 784261909Sluigi na, na->ifp->if_xname, curthread, priv, kn, 785261909Sluigi priv->np_nifp, 786261909Sluigi kn->kn_fp == curthread->td_fpop ? "match" : "MISMATCH"); 787261909Sluigi return 0; 788261909Sluigi} 789261909Sluigi 790259412Sluigistruct cdevsw netmap_cdevsw = { 791259412Sluigi .d_version = D_VERSION, 792259412Sluigi .d_name = "netmap", 793259412Sluigi .d_open = netmap_open, 794259412Sluigi .d_mmap_single = netmap_mmap_single, 795259412Sluigi .d_ioctl = netmap_ioctl, 796259412Sluigi .d_poll = netmap_poll, 797261909Sluigi .d_kqfilter = netmap_kqfilter, 798259412Sluigi .d_close = netmap_close, 799259412Sluigi}; 800261909Sluigi/*--- end of kqueue support ----*/ 801259412Sluigi 802259412Sluigi/* 803259412Sluigi * Kernel entry point. 804259412Sluigi * 805259412Sluigi * Initialize/finalize the module and return. 806259412Sluigi * 807259412Sluigi * Return 0 on success, errno on failure. 808259412Sluigi */ 809259412Sluigistatic int 810259412Sluiginetmap_loader(__unused struct module *module, int event, __unused void *arg) 811259412Sluigi{ 812259412Sluigi int error = 0; 813259412Sluigi 814259412Sluigi switch (event) { 815259412Sluigi case MOD_LOAD: 816259412Sluigi error = netmap_init(); 817259412Sluigi break; 818259412Sluigi 819259412Sluigi case MOD_UNLOAD: 820259412Sluigi netmap_fini(); 821259412Sluigi break; 822259412Sluigi 823259412Sluigi default: 824259412Sluigi error = EOPNOTSUPP; 825259412Sluigi break; 826259412Sluigi } 827259412Sluigi 828259412Sluigi return (error); 829259412Sluigi} 830259412Sluigi 831259412Sluigi 832259412SluigiDEV_MODULE(netmap, netmap_loader, NULL); 833