1262152Sluigi/* 2262152Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 3262152Sluigi * 4262152Sluigi * Redistribution and use in source and binary forms, with or without 5262152Sluigi * modification, are permitted provided that the following conditions 6262152Sluigi * are met: 7262152Sluigi * 1. Redistributions of source code must retain the above copyright 8262152Sluigi * notice, this list of conditions and the following disclaimer. 9262152Sluigi * 2. Redistributions in binary form must reproduce the above copyright 10262152Sluigi * notice, this list of conditions and the following disclaimer in the 11262152Sluigi * documentation and/or other materials provided with the distribution. 12262152Sluigi * 13262152Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14262152Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15262152Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16262152Sluigi * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17262152Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18262152Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19262152Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20262152Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21262152Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22262152Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23262152Sluigi * SUCH DAMAGE. 24262152Sluigi */ 25262152Sluigi 26262152Sluigi/* $FreeBSD$ */ 27262152Sluigi 28262152Sluigi#include <sys/types.h> 29262152Sluigi#include <sys/module.h> 30262152Sluigi#include <sys/errno.h> 31262152Sluigi#include <sys/param.h> /* defines used in kernel.h */ 32262152Sluigi#include <sys/poll.h> /* POLLIN, POLLOUT */ 33262152Sluigi#include <sys/kernel.h> /* types used in module initialization */ 34262152Sluigi#include <sys/conf.h> /* DEV_MODULE */ 35262152Sluigi#include <sys/endian.h> 36262152Sluigi 37262152Sluigi#include <sys/rwlock.h> 38262152Sluigi 39262152Sluigi#include <vm/vm.h> /* vtophys */ 40262152Sluigi#include <vm/pmap.h> /* vtophys */ 41262152Sluigi#include <vm/vm_param.h> 42262152Sluigi#include <vm/vm_object.h> 43262152Sluigi#include <vm/vm_page.h> 44262152Sluigi#include <vm/vm_pager.h> 45262152Sluigi#include <vm/uma.h> 46262152Sluigi 47262152Sluigi 48262152Sluigi#include <sys/malloc.h> 49262152Sluigi#include <sys/socket.h> /* sockaddrs */ 50262152Sluigi#include <sys/selinfo.h> 51262152Sluigi#include <net/if.h> 52262152Sluigi#include <net/if_var.h> 53270252Sluigi#include <net/if_types.h> /* IFT_ETHER */ 54270252Sluigi#include <net/ethernet.h> /* ether_ifdetach */ 55270252Sluigi#include <net/if_dl.h> /* LLADDR */ 56262152Sluigi#include <machine/bus.h> /* bus_dmamap_* */ 57262152Sluigi#include <netinet/in.h> /* in6_cksum_pseudo() */ 58262152Sluigi#include <machine/in_cksum.h> /* in_pseudo(), in_cksum_hdr() */ 59262152Sluigi 60262152Sluigi#include <net/netmap.h> 61262152Sluigi#include <dev/netmap/netmap_kern.h> 62262152Sluigi#include <dev/netmap/netmap_mem2.h> 63262152Sluigi 64262152Sluigi 65262152Sluigi/* ======================== FREEBSD-SPECIFIC ROUTINES ================== */ 66262152Sluigi 67267282Sluigirawsum_t 68267282Sluiginm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum) 69262152Sluigi{ 70262152Sluigi /* TODO XXX please use the FreeBSD implementation for this. */ 71262152Sluigi uint16_t *words = (uint16_t *)data; 72262152Sluigi int nw = len / 2; 73262152Sluigi int i; 74262152Sluigi 75262152Sluigi for (i = 0; i < nw; i++) 76262152Sluigi cur_sum += be16toh(words[i]); 77262152Sluigi 78262152Sluigi if (len & 1) 79262152Sluigi cur_sum += (data[len-1] << 8); 80262152Sluigi 81262152Sluigi return cur_sum; 82262152Sluigi} 83262152Sluigi 84262152Sluigi/* Fold a raw checksum: 'cur_sum' is in host byte order, while the 85262152Sluigi * return value is in network byte order. 86262152Sluigi */ 87267282Sluigiuint16_t 88267282Sluiginm_csum_fold(rawsum_t cur_sum) 89262152Sluigi{ 90262152Sluigi /* TODO XXX please use the FreeBSD implementation for this. */ 91262152Sluigi while (cur_sum >> 16) 92262152Sluigi cur_sum = (cur_sum & 0xFFFF) + (cur_sum >> 16); 93262152Sluigi 94262152Sluigi return htobe16((~cur_sum) & 0xFFFF); 95262152Sluigi} 96262152Sluigi 97270252Sluigiuint16_t nm_csum_ipv4(struct nm_iphdr *iph) 98262152Sluigi{ 99262152Sluigi#if 0 100262152Sluigi return in_cksum_hdr((void *)iph); 101262152Sluigi#else 102262152Sluigi return nm_csum_fold(nm_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0)); 103262152Sluigi#endif 104262152Sluigi} 105262152Sluigi 106267282Sluigivoid 107267282Sluiginm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data, 108262152Sluigi size_t datalen, uint16_t *check) 109262152Sluigi{ 110262214Sluigi#ifdef INET 111262152Sluigi uint16_t pseudolen = datalen + iph->protocol; 112262152Sluigi 113262152Sluigi /* Compute and insert the pseudo-header cheksum. */ 114262152Sluigi *check = in_pseudo(iph->saddr, iph->daddr, 115262152Sluigi htobe16(pseudolen)); 116262152Sluigi /* Compute the checksum on TCP/UDP header + payload 117262152Sluigi * (includes the pseudo-header). 118262152Sluigi */ 119262152Sluigi *check = nm_csum_fold(nm_csum_raw(data, datalen, 0)); 120262214Sluigi#else 121262214Sluigi static int notsupported = 0; 122262214Sluigi if (!notsupported) { 123262214Sluigi notsupported = 1; 124262214Sluigi D("inet4 segmentation not supported"); 125262214Sluigi } 126262214Sluigi#endif 127262152Sluigi} 128262152Sluigi 129267282Sluigivoid 130267282Sluiginm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data, 131262152Sluigi size_t datalen, uint16_t *check) 132262152Sluigi{ 133262152Sluigi#ifdef INET6 134262152Sluigi *check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0); 135262152Sluigi *check = nm_csum_fold(nm_csum_raw(data, datalen, 0)); 136262152Sluigi#else 137262152Sluigi static int notsupported = 0; 138262152Sluigi if (!notsupported) { 139262152Sluigi notsupported = 1; 140262152Sluigi D("inet6 segmentation not supported"); 141262152Sluigi } 142262152Sluigi#endif 143262152Sluigi} 144262152Sluigi 145262152Sluigi 146262152Sluigi/* 147262152Sluigi * Intercept the rx routine in the standard device driver. 148262152Sluigi * Second argument is non-zero to intercept, 0 to restore 149262152Sluigi */ 150262152Sluigiint 151262152Sluiginetmap_catch_rx(struct netmap_adapter *na, int intercept) 152262152Sluigi{ 153270252Sluigi struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; 154262152Sluigi struct ifnet *ifp = na->ifp; 155262152Sluigi 156262152Sluigi if (intercept) { 157262152Sluigi if (gna->save_if_input) { 158262152Sluigi D("cannot intercept again"); 159262152Sluigi return EINVAL; /* already set */ 160262152Sluigi } 161262152Sluigi gna->save_if_input = ifp->if_input; 162262152Sluigi ifp->if_input = generic_rx_handler; 163262152Sluigi } else { 164262152Sluigi if (!gna->save_if_input){ 165262152Sluigi D("cannot restore"); 166262152Sluigi return EINVAL; /* not saved */ 167262152Sluigi } 168262152Sluigi ifp->if_input = gna->save_if_input; 169262152Sluigi gna->save_if_input = NULL; 170262152Sluigi } 171262152Sluigi 172262152Sluigi return 0; 173262152Sluigi} 174262152Sluigi 175262152Sluigi 176262152Sluigi/* 177262152Sluigi * Intercept the packet steering routine in the tx path, 178262152Sluigi * so that we can decide which queue is used for an mbuf. 179262152Sluigi * Second argument is non-zero to intercept, 0 to restore. 180262152Sluigi * On freebsd we just intercept if_transmit. 181262152Sluigi */ 182262152Sluigivoid 183262152Sluiginetmap_catch_tx(struct netmap_generic_adapter *gna, int enable) 184262152Sluigi{ 185262152Sluigi struct netmap_adapter *na = &gna->up.up; 186262152Sluigi struct ifnet *ifp = na->ifp; 187262152Sluigi 188262152Sluigi if (enable) { 189262152Sluigi na->if_transmit = ifp->if_transmit; 190262152Sluigi ifp->if_transmit = netmap_transmit; 191262152Sluigi } else { 192262152Sluigi ifp->if_transmit = na->if_transmit; 193262152Sluigi } 194262152Sluigi} 195262152Sluigi 196262152Sluigi 197262152Sluigi/* 198262152Sluigi * Transmit routine used by generic_netmap_txsync(). Returns 0 on success 199262152Sluigi * and non-zero on error (which may be packet drops or other errors). 200262152Sluigi * addr and len identify the netmap buffer, m is the (preallocated) 201262152Sluigi * mbuf to use for transmissions. 202262152Sluigi * 203262152Sluigi * We should add a reference to the mbuf so the m_freem() at the end 204262152Sluigi * of the transmission does not consume resources. 205262152Sluigi * 206262152Sluigi * On FreeBSD, and on multiqueue cards, we can force the queue using 207281955Shiren * if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 208262152Sluigi * i = m->m_pkthdr.flowid % adapter->num_queues; 209262152Sluigi * else 210262152Sluigi * i = curcpu % adapter->num_queues; 211262152Sluigi * 212262152Sluigi */ 213262152Sluigiint 214262152Sluigigeneric_xmit_frame(struct ifnet *ifp, struct mbuf *m, 215262152Sluigi void *addr, u_int len, u_int ring_nr) 216262152Sluigi{ 217262152Sluigi int ret; 218262152Sluigi 219267282Sluigi /* 220267282Sluigi * The mbuf should be a cluster from our special pool, 221267282Sluigi * so we do not need to do an m_copyback but just copy 222267282Sluigi * (and eventually, just reference the netmap buffer) 223267282Sluigi */ 224262152Sluigi 225270252Sluigi if (GET_MBUF_REFCNT(m) != 1) { 226267282Sluigi D("invalid refcnt %d for %p", 227270252Sluigi GET_MBUF_REFCNT(m), m); 228267282Sluigi panic("in generic_xmit_frame"); 229267282Sluigi } 230267282Sluigi // XXX the ext_size check is unnecessary if we link the netmap buf 231267282Sluigi if (m->m_ext.ext_size < len) { 232267282Sluigi RD(5, "size %d < len %d", m->m_ext.ext_size, len); 233267282Sluigi len = m->m_ext.ext_size; 234267282Sluigi } 235270252Sluigi if (0) { /* XXX seems to have negligible benefits */ 236267282Sluigi m->m_ext.ext_buf = m->m_data = addr; 237267282Sluigi } else { 238267282Sluigi bcopy(addr, m->m_data, len); 239267282Sluigi } 240267282Sluigi m->m_len = m->m_pkthdr.len = len; 241267282Sluigi // inc refcount. All ours, we could skip the atomic 242270252Sluigi atomic_fetchadd_int(PNT_MBUF_REFCNT(m), 1); 243281955Shiren M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 244262152Sluigi m->m_pkthdr.flowid = ring_nr; 245262152Sluigi m->m_pkthdr.rcvif = ifp; /* used for tx notification */ 246262152Sluigi ret = NA(ifp)->if_transmit(ifp, m); 247262152Sluigi return ret; 248262152Sluigi} 249262152Sluigi 250262152Sluigi 251267282Sluigi#if __FreeBSD_version >= 1100005 252267282Sluigistruct netmap_adapter * 253267282Sluiginetmap_getna(if_t ifp) 254267282Sluigi{ 255267282Sluigi return (NA((struct ifnet *)ifp)); 256267282Sluigi} 257267282Sluigi#endif /* __FreeBSD_version >= 1100005 */ 258267282Sluigi 259262152Sluigi/* 260262152Sluigi * The following two functions are empty until we have a generic 261262152Sluigi * way to extract the info from the ifp 262262152Sluigi */ 263262152Sluigiint 264262152Sluigigeneric_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx) 265262152Sluigi{ 266267282Sluigi D("called, in tx %d rx %d", *tx, *rx); 267262152Sluigi return 0; 268262152Sluigi} 269262152Sluigi 270262152Sluigi 271262152Sluigivoid 272262152Sluigigeneric_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq) 273262152Sluigi{ 274267282Sluigi D("called, in txq %d rxq %d", *txq, *rxq); 275262152Sluigi *txq = netmap_generic_rings; 276262152Sluigi *rxq = netmap_generic_rings; 277262152Sluigi} 278262152Sluigi 279262152Sluigi 280267282Sluigivoid 281270252Sluiginetmap_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na) 282262152Sluigi{ 283262152Sluigi ND("called"); 284262152Sluigi mit->mit_pending = 0; 285270252Sluigi mit->mit_ring_idx = idx; 286262152Sluigi mit->mit_na = na; 287262152Sluigi} 288262152Sluigi 289262152Sluigi 290267282Sluigivoid 291267282Sluiginetmap_mitigation_start(struct nm_generic_mit *mit) 292262152Sluigi{ 293262152Sluigi ND("called"); 294262152Sluigi} 295262152Sluigi 296262152Sluigi 297267282Sluigivoid 298267282Sluiginetmap_mitigation_restart(struct nm_generic_mit *mit) 299262152Sluigi{ 300262152Sluigi ND("called"); 301262152Sluigi} 302262152Sluigi 303262152Sluigi 304267282Sluigiint 305267282Sluiginetmap_mitigation_active(struct nm_generic_mit *mit) 306262152Sluigi{ 307262152Sluigi ND("called"); 308262152Sluigi return 0; 309262152Sluigi} 310262152Sluigi 311262152Sluigi 312267282Sluigivoid 313267282Sluiginetmap_mitigation_cleanup(struct nm_generic_mit *mit) 314262152Sluigi{ 315262152Sluigi ND("called"); 316262152Sluigi} 317262152Sluigi 318270252Sluigistatic int 319270252Sluiginm_vi_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr) 320270252Sluigi{ 321270252Sluigi return EINVAL; 322270252Sluigi} 323262152Sluigi 324270252Sluigistatic void 325270252Sluiginm_vi_start(struct ifnet *ifp) 326270252Sluigi{ 327270252Sluigi panic("nm_vi_start() must not be called"); 328270252Sluigi} 329270252Sluigi 330262152Sluigi/* 331270252Sluigi * Index manager of persistent virtual interfaces. 332270252Sluigi * It is used to decide the lowest byte of the MAC address. 333270252Sluigi * We use the same algorithm with management of bridge port index. 334270252Sluigi */ 335270252Sluigi#define NM_VI_MAX 255 336270252Sluigistatic struct { 337270252Sluigi uint8_t index[NM_VI_MAX]; /* XXX just for a reasonable number */ 338270252Sluigi uint8_t active; 339270252Sluigi struct mtx lock; 340270252Sluigi} nm_vi_indices; 341270252Sluigi 342270252Sluigivoid 343270252Sluiginm_vi_init_index(void) 344270252Sluigi{ 345270252Sluigi int i; 346270252Sluigi for (i = 0; i < NM_VI_MAX; i++) 347270252Sluigi nm_vi_indices.index[i] = i; 348270252Sluigi nm_vi_indices.active = 0; 349270252Sluigi mtx_init(&nm_vi_indices.lock, "nm_vi_indices_lock", NULL, MTX_DEF); 350270252Sluigi} 351270252Sluigi 352270252Sluigi/* return -1 if no index available */ 353270252Sluigistatic int 354270252Sluiginm_vi_get_index(void) 355270252Sluigi{ 356270252Sluigi int ret; 357270252Sluigi 358270252Sluigi mtx_lock(&nm_vi_indices.lock); 359270252Sluigi ret = nm_vi_indices.active == NM_VI_MAX ? -1 : 360270252Sluigi nm_vi_indices.index[nm_vi_indices.active++]; 361270252Sluigi mtx_unlock(&nm_vi_indices.lock); 362270252Sluigi return ret; 363270252Sluigi} 364270252Sluigi 365270252Sluigistatic void 366270252Sluiginm_vi_free_index(uint8_t val) 367270252Sluigi{ 368270252Sluigi int i, lim; 369270252Sluigi 370270252Sluigi mtx_lock(&nm_vi_indices.lock); 371270252Sluigi lim = nm_vi_indices.active; 372270252Sluigi for (i = 0; i < lim; i++) { 373270252Sluigi if (nm_vi_indices.index[i] == val) { 374270252Sluigi /* swap index[lim-1] and j */ 375270252Sluigi int tmp = nm_vi_indices.index[lim-1]; 376270252Sluigi nm_vi_indices.index[lim-1] = val; 377270252Sluigi nm_vi_indices.index[i] = tmp; 378270252Sluigi nm_vi_indices.active--; 379270252Sluigi break; 380270252Sluigi } 381270252Sluigi } 382270252Sluigi if (lim == nm_vi_indices.active) 383270252Sluigi D("funny, index %u didn't found", val); 384270252Sluigi mtx_unlock(&nm_vi_indices.lock); 385270252Sluigi} 386270252Sluigi#undef NM_VI_MAX 387270252Sluigi 388270252Sluigi/* 389270252Sluigi * Implementation of a netmap-capable virtual interface that 390270252Sluigi * registered to the system. 391270252Sluigi * It is based on if_tap.c and ip_fw_log.c in FreeBSD 9. 392270252Sluigi * 393270252Sluigi * Note: Linux sets refcount to 0 on allocation of net_device, 394270252Sluigi * then increments it on registration to the system. 395270252Sluigi * FreeBSD sets refcount to 1 on if_alloc(), and does not 396270252Sluigi * increment this refcount on if_attach(). 397270252Sluigi */ 398270252Sluigiint 399270252Sluiginm_vi_persist(const char *name, struct ifnet **ret) 400270252Sluigi{ 401270252Sluigi struct ifnet *ifp; 402270252Sluigi u_short macaddr_hi; 403270252Sluigi uint32_t macaddr_mid; 404270252Sluigi u_char eaddr[6]; 405270252Sluigi int unit = nm_vi_get_index(); /* just to decide MAC address */ 406270252Sluigi 407270252Sluigi if (unit < 0) 408270252Sluigi return EBUSY; 409270252Sluigi /* 410270252Sluigi * We use the same MAC address generation method with tap 411270252Sluigi * except for the highest octet is 00:be instead of 00:bd 412270252Sluigi */ 413270252Sluigi macaddr_hi = htons(0x00be); /* XXX tap + 1 */ 414270252Sluigi macaddr_mid = (uint32_t) ticks; 415270252Sluigi bcopy(&macaddr_hi, eaddr, sizeof(short)); 416270252Sluigi bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t)); 417270252Sluigi eaddr[5] = (uint8_t)unit; 418270252Sluigi 419270252Sluigi ifp = if_alloc(IFT_ETHER); 420270252Sluigi if (ifp == NULL) { 421270252Sluigi D("if_alloc failed"); 422270252Sluigi return ENOMEM; 423270252Sluigi } 424270252Sluigi if_initname(ifp, name, IF_DUNIT_NONE); 425270252Sluigi ifp->if_mtu = 65536; 426270252Sluigi ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST; 427270252Sluigi ifp->if_init = (void *)nm_vi_dummy; 428270252Sluigi ifp->if_ioctl = nm_vi_dummy; 429270252Sluigi ifp->if_start = nm_vi_start; 430270252Sluigi ifp->if_mtu = ETHERMTU; 431270252Sluigi IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 432270252Sluigi ifp->if_capabilities |= IFCAP_LINKSTATE; 433270252Sluigi ifp->if_capenable |= IFCAP_LINKSTATE; 434270252Sluigi 435270252Sluigi ether_ifattach(ifp, eaddr); 436270252Sluigi *ret = ifp; 437270252Sluigi return 0; 438270252Sluigi} 439270252Sluigi/* unregister from the system and drop the final refcount */ 440270252Sluigivoid 441270252Sluiginm_vi_detach(struct ifnet *ifp) 442270252Sluigi{ 443270252Sluigi nm_vi_free_index(((char *)IF_LLADDR(ifp))[5]); 444270252Sluigi ether_ifdetach(ifp); 445270252Sluigi if_free(ifp); 446270252Sluigi} 447270252Sluigi 448270252Sluigi/* 449262152Sluigi * In order to track whether pages are still mapped, we hook into 450262152Sluigi * the standard cdev_pager and intercept the constructor and 451262152Sluigi * destructor. 452262152Sluigi */ 453262152Sluigi 454262152Sluigistruct netmap_vm_handle_t { 455262152Sluigi struct cdev *dev; 456262152Sluigi struct netmap_priv_d *priv; 457262152Sluigi}; 458262152Sluigi 459262152Sluigi 460262152Sluigistatic int 461262152Sluiginetmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 462262152Sluigi vm_ooffset_t foff, struct ucred *cred, u_short *color) 463262152Sluigi{ 464262152Sluigi struct netmap_vm_handle_t *vmh = handle; 465262152Sluigi 466262152Sluigi if (netmap_verbose) 467262152Sluigi D("handle %p size %jd prot %d foff %jd", 468262152Sluigi handle, (intmax_t)size, prot, (intmax_t)foff); 469278779Sluigi if (color) 470278779Sluigi *color = 0; 471262152Sluigi dev_ref(vmh->dev); 472262152Sluigi return 0; 473262152Sluigi} 474262152Sluigi 475262152Sluigi 476262152Sluigistatic void 477262152Sluiginetmap_dev_pager_dtor(void *handle) 478262152Sluigi{ 479262152Sluigi struct netmap_vm_handle_t *vmh = handle; 480262152Sluigi struct cdev *dev = vmh->dev; 481262152Sluigi struct netmap_priv_d *priv = vmh->priv; 482262152Sluigi 483262152Sluigi if (netmap_verbose) 484262152Sluigi D("handle %p", handle); 485262152Sluigi netmap_dtor(priv); 486262152Sluigi free(vmh, M_DEVBUF); 487262152Sluigi dev_rel(dev); 488262152Sluigi} 489262152Sluigi 490262152Sluigi 491262152Sluigistatic int 492262152Sluiginetmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, 493262152Sluigi int prot, vm_page_t *mres) 494262152Sluigi{ 495262152Sluigi struct netmap_vm_handle_t *vmh = object->handle; 496262152Sluigi struct netmap_priv_d *priv = vmh->priv; 497262152Sluigi vm_paddr_t paddr; 498262152Sluigi vm_page_t page; 499262152Sluigi vm_memattr_t memattr; 500262152Sluigi vm_pindex_t pidx; 501262152Sluigi 502262152Sluigi ND("object %p offset %jd prot %d mres %p", 503262152Sluigi object, (intmax_t)offset, prot, mres); 504262152Sluigi memattr = object->memattr; 505262152Sluigi pidx = OFF_TO_IDX(offset); 506262152Sluigi paddr = netmap_mem_ofstophys(priv->np_mref, offset); 507262152Sluigi if (paddr == 0) 508262152Sluigi return VM_PAGER_FAIL; 509262152Sluigi 510262152Sluigi if (((*mres)->flags & PG_FICTITIOUS) != 0) { 511262152Sluigi /* 512262152Sluigi * If the passed in result page is a fake page, update it with 513262152Sluigi * the new physical address. 514262152Sluigi */ 515262152Sluigi page = *mres; 516262152Sluigi vm_page_updatefake(page, paddr, memattr); 517262152Sluigi } else { 518262152Sluigi /* 519262152Sluigi * Replace the passed in reqpage page with our own fake page and 520262152Sluigi * free up the all of the original pages. 521262152Sluigi */ 522262152Sluigi#ifndef VM_OBJECT_WUNLOCK /* FreeBSD < 10.x */ 523262152Sluigi#define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK 524262152Sluigi#define VM_OBJECT_WLOCK VM_OBJECT_LOCK 525262152Sluigi#endif /* VM_OBJECT_WUNLOCK */ 526262152Sluigi 527262152Sluigi VM_OBJECT_WUNLOCK(object); 528262152Sluigi page = vm_page_getfake(paddr, memattr); 529262152Sluigi VM_OBJECT_WLOCK(object); 530262152Sluigi vm_page_lock(*mres); 531262152Sluigi vm_page_free(*mres); 532262152Sluigi vm_page_unlock(*mres); 533262152Sluigi *mres = page; 534262152Sluigi vm_page_insert(page, object, pidx); 535262152Sluigi } 536262152Sluigi page->valid = VM_PAGE_BITS_ALL; 537262152Sluigi return (VM_PAGER_OK); 538262152Sluigi} 539262152Sluigi 540262152Sluigi 541262152Sluigistatic struct cdev_pager_ops netmap_cdev_pager_ops = { 542262152Sluigi .cdev_pg_ctor = netmap_dev_pager_ctor, 543262152Sluigi .cdev_pg_dtor = netmap_dev_pager_dtor, 544262152Sluigi .cdev_pg_fault = netmap_dev_pager_fault, 545262152Sluigi}; 546262152Sluigi 547262152Sluigi 548262152Sluigistatic int 549262152Sluiginetmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff, 550262152Sluigi vm_size_t objsize, vm_object_t *objp, int prot) 551262152Sluigi{ 552262152Sluigi int error; 553262152Sluigi struct netmap_vm_handle_t *vmh; 554262152Sluigi struct netmap_priv_d *priv; 555262152Sluigi vm_object_t obj; 556262152Sluigi 557262152Sluigi if (netmap_verbose) 558262152Sluigi D("cdev %p foff %jd size %jd objp %p prot %d", cdev, 559262152Sluigi (intmax_t )*foff, (intmax_t )objsize, objp, prot); 560262152Sluigi 561262152Sluigi vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF, 562262152Sluigi M_NOWAIT | M_ZERO); 563262152Sluigi if (vmh == NULL) 564262152Sluigi return ENOMEM; 565262152Sluigi vmh->dev = cdev; 566262152Sluigi 567262152Sluigi NMG_LOCK(); 568262152Sluigi error = devfs_get_cdevpriv((void**)&priv); 569262152Sluigi if (error) 570262152Sluigi goto err_unlock; 571262152Sluigi vmh->priv = priv; 572262152Sluigi priv->np_refcount++; 573262152Sluigi NMG_UNLOCK(); 574262152Sluigi 575262152Sluigi error = netmap_get_memory(priv); 576262152Sluigi if (error) 577262152Sluigi goto err_deref; 578262152Sluigi 579262152Sluigi obj = cdev_pager_allocate(vmh, OBJT_DEVICE, 580262152Sluigi &netmap_cdev_pager_ops, objsize, prot, 581262152Sluigi *foff, NULL); 582262152Sluigi if (obj == NULL) { 583262152Sluigi D("cdev_pager_allocate failed"); 584262152Sluigi error = EINVAL; 585262152Sluigi goto err_deref; 586262152Sluigi } 587262152Sluigi 588262152Sluigi *objp = obj; 589262152Sluigi return 0; 590262152Sluigi 591262152Sluigierr_deref: 592262152Sluigi NMG_LOCK(); 593262152Sluigi priv->np_refcount--; 594262152Sluigierr_unlock: 595262152Sluigi NMG_UNLOCK(); 596262152Sluigi// err: 597262152Sluigi free(vmh, M_DEVBUF); 598262152Sluigi return error; 599262152Sluigi} 600262152Sluigi 601262152Sluigi 602262152Sluigi// XXX can we remove this ? 603262152Sluigistatic int 604262152Sluiginetmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 605262152Sluigi{ 606262152Sluigi if (netmap_verbose) 607262152Sluigi D("dev %p fflag 0x%x devtype %d td %p", 608262152Sluigi dev, fflag, devtype, td); 609262152Sluigi return 0; 610262152Sluigi} 611262152Sluigi 612262152Sluigi 613262152Sluigistatic int 614262152Sluiginetmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 615262152Sluigi{ 616262152Sluigi struct netmap_priv_d *priv; 617262152Sluigi int error; 618262152Sluigi 619262152Sluigi (void)dev; 620262152Sluigi (void)oflags; 621262152Sluigi (void)devtype; 622262152Sluigi (void)td; 623262152Sluigi 624262152Sluigi // XXX wait or nowait ? 625262152Sluigi priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF, 626262152Sluigi M_NOWAIT | M_ZERO); 627262152Sluigi if (priv == NULL) 628262152Sluigi return ENOMEM; 629262152Sluigi 630262152Sluigi error = devfs_set_cdevpriv(priv, netmap_dtor); 631262152Sluigi if (error) 632262152Sluigi return error; 633262152Sluigi 634262152Sluigi priv->np_refcount = 1; 635262152Sluigi 636262152Sluigi return 0; 637262152Sluigi} 638262152Sluigi 639262152Sluigi/******************** kqueue support ****************/ 640262152Sluigi 641262152Sluigi/* 642262152Sluigi * The OS_selwakeup also needs to issue a KNOTE_UNLOCKED. 643262152Sluigi * We use a non-zero argument to distinguish the call from the one 644262152Sluigi * in kevent_scan() which instead also needs to run netmap_poll(). 645262152Sluigi * The knote uses a global mutex for the time being. We might 646262152Sluigi * try to reuse the one in the si, but it is not allocated 647262152Sluigi * permanently so it might be a bit tricky. 648262152Sluigi * 649262152Sluigi * The *kqfilter function registers one or another f_event 650262152Sluigi * depending on read or write mode. 651262152Sluigi * In the call to f_event() td_fpop is NULL so any child function 652262152Sluigi * calling devfs_get_cdevpriv() would fail - and we need it in 653262152Sluigi * netmap_poll(). As a workaround we store priv into kn->kn_hook 654262152Sluigi * and pass it as first argument to netmap_poll(), which then 655262152Sluigi * uses the failure to tell that we are called from f_event() 656262152Sluigi * and do not need the selrecord(). 657262152Sluigi */ 658262152Sluigi 659262152Sluigi 660262152Sluigivoid 661278779Sluigifreebsd_selwakeup(struct nm_selinfo *si, int pri) 662262152Sluigi{ 663262152Sluigi if (netmap_verbose) 664278779Sluigi D("on knote %p", &si->si.si_note); 665278779Sluigi selwakeuppri(&si->si, pri); 666262152Sluigi /* use a non-zero hint to tell the notification from the 667262152Sluigi * call done in kqueue_scan() which uses 0 668262152Sluigi */ 669278779Sluigi KNOTE_UNLOCKED(&si->si.si_note, 0x100 /* notification */); 670262152Sluigi} 671262152Sluigi 672262152Sluigistatic void 673262152Sluiginetmap_knrdetach(struct knote *kn) 674262152Sluigi{ 675262152Sluigi struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook; 676278779Sluigi struct selinfo *si = &priv->np_rxsi->si; 677262152Sluigi 678262152Sluigi D("remove selinfo %p", si); 679262152Sluigi knlist_remove(&si->si_note, kn, 0); 680262152Sluigi} 681262152Sluigi 682262152Sluigistatic void 683262152Sluiginetmap_knwdetach(struct knote *kn) 684262152Sluigi{ 685262152Sluigi struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook; 686278779Sluigi struct selinfo *si = &priv->np_txsi->si; 687262152Sluigi 688262152Sluigi D("remove selinfo %p", si); 689262152Sluigi knlist_remove(&si->si_note, kn, 0); 690262152Sluigi} 691262152Sluigi 692262152Sluigi/* 693262152Sluigi * callback from notifies (generated externally) and our 694262152Sluigi * calls to kevent(). The former we just return 1 (ready) 695262152Sluigi * since we do not know better. 696262152Sluigi * In the latter we call netmap_poll and return 0/1 accordingly. 697262152Sluigi */ 698262152Sluigistatic int 699262152Sluiginetmap_knrw(struct knote *kn, long hint, int events) 700262152Sluigi{ 701262152Sluigi struct netmap_priv_d *priv; 702262152Sluigi int revents; 703262152Sluigi 704262152Sluigi if (hint != 0) { 705262152Sluigi ND(5, "call from notify"); 706262152Sluigi return 1; /* assume we are ready */ 707262152Sluigi } 708262152Sluigi priv = kn->kn_hook; 709262152Sluigi /* the notification may come from an external thread, 710262152Sluigi * in which case we do not want to run the netmap_poll 711262152Sluigi * This should be filtered above, but check just in case. 712262152Sluigi */ 713262152Sluigi if (curthread != priv->np_td) { /* should not happen */ 714262152Sluigi RD(5, "curthread changed %p %p", curthread, priv->np_td); 715262152Sluigi return 1; 716262152Sluigi } else { 717262152Sluigi revents = netmap_poll((void *)priv, events, curthread); 718262152Sluigi return (events & revents) ? 1 : 0; 719262152Sluigi } 720262152Sluigi} 721262152Sluigi 722262152Sluigistatic int 723262152Sluiginetmap_knread(struct knote *kn, long hint) 724262152Sluigi{ 725262152Sluigi return netmap_knrw(kn, hint, POLLIN); 726262152Sluigi} 727262152Sluigi 728262152Sluigistatic int 729262152Sluiginetmap_knwrite(struct knote *kn, long hint) 730262152Sluigi{ 731262152Sluigi return netmap_knrw(kn, hint, POLLOUT); 732262152Sluigi} 733262152Sluigi 734262152Sluigistatic struct filterops netmap_rfiltops = { 735262152Sluigi .f_isfd = 1, 736262152Sluigi .f_detach = netmap_knrdetach, 737262152Sluigi .f_event = netmap_knread, 738262152Sluigi}; 739262152Sluigi 740262152Sluigistatic struct filterops netmap_wfiltops = { 741262152Sluigi .f_isfd = 1, 742262152Sluigi .f_detach = netmap_knwdetach, 743262152Sluigi .f_event = netmap_knwrite, 744262152Sluigi}; 745262152Sluigi 746262152Sluigi 747262152Sluigi/* 748262152Sluigi * This is called when a thread invokes kevent() to record 749262152Sluigi * a change in the configuration of the kqueue(). 750262152Sluigi * The 'priv' should be the same as in the netmap device. 751262152Sluigi */ 752262152Sluigistatic int 753262152Sluiginetmap_kqfilter(struct cdev *dev, struct knote *kn) 754262152Sluigi{ 755262152Sluigi struct netmap_priv_d *priv; 756262152Sluigi int error; 757262152Sluigi struct netmap_adapter *na; 758278779Sluigi struct nm_selinfo *si; 759262152Sluigi int ev = kn->kn_filter; 760262152Sluigi 761262152Sluigi if (ev != EVFILT_READ && ev != EVFILT_WRITE) { 762262152Sluigi D("bad filter request %d", ev); 763262152Sluigi return 1; 764262152Sluigi } 765262152Sluigi error = devfs_get_cdevpriv((void**)&priv); 766262152Sluigi if (error) { 767262152Sluigi D("device not yet setup"); 768262152Sluigi return 1; 769262152Sluigi } 770262152Sluigi na = priv->np_na; 771262152Sluigi if (na == NULL) { 772262152Sluigi D("no netmap adapter for this file descriptor"); 773262152Sluigi return 1; 774262152Sluigi } 775262152Sluigi /* the si is indicated in the priv */ 776262152Sluigi si = (ev == EVFILT_WRITE) ? priv->np_txsi : priv->np_rxsi; 777262152Sluigi // XXX lock(priv) ? 778262152Sluigi kn->kn_fop = (ev == EVFILT_WRITE) ? 779262152Sluigi &netmap_wfiltops : &netmap_rfiltops; 780262152Sluigi kn->kn_hook = priv; 781278779Sluigi knlist_add(&si->si.si_note, kn, 1); 782262152Sluigi // XXX unlock(priv) 783262152Sluigi ND("register %p %s td %p priv %p kn %p np_nifp %p kn_fp/fpop %s", 784262152Sluigi na, na->ifp->if_xname, curthread, priv, kn, 785262152Sluigi priv->np_nifp, 786262152Sluigi kn->kn_fp == curthread->td_fpop ? "match" : "MISMATCH"); 787262152Sluigi return 0; 788262152Sluigi} 789262152Sluigi 790262152Sluigistruct cdevsw netmap_cdevsw = { 791262152Sluigi .d_version = D_VERSION, 792262152Sluigi .d_name = "netmap", 793262152Sluigi .d_open = netmap_open, 794262152Sluigi .d_mmap_single = netmap_mmap_single, 795262152Sluigi .d_ioctl = netmap_ioctl, 796262152Sluigi .d_poll = netmap_poll, 797262152Sluigi .d_kqfilter = netmap_kqfilter, 798262152Sluigi .d_close = netmap_close, 799262152Sluigi}; 800262152Sluigi/*--- end of kqueue support ----*/ 801262152Sluigi 802262152Sluigi/* 803262152Sluigi * Kernel entry point. 804262152Sluigi * 805262152Sluigi * Initialize/finalize the module and return. 806262152Sluigi * 807262152Sluigi * Return 0 on success, errno on failure. 808262152Sluigi */ 809262152Sluigistatic int 810262152Sluiginetmap_loader(__unused struct module *module, int event, __unused void *arg) 811262152Sluigi{ 812262152Sluigi int error = 0; 813262152Sluigi 814262152Sluigi switch (event) { 815262152Sluigi case MOD_LOAD: 816262152Sluigi error = netmap_init(); 817262152Sluigi break; 818262152Sluigi 819262152Sluigi case MOD_UNLOAD: 820262152Sluigi netmap_fini(); 821262152Sluigi break; 822262152Sluigi 823262152Sluigi default: 824262152Sluigi error = EOPNOTSUPP; 825262152Sluigi break; 826262152Sluigi } 827262152Sluigi 828262152Sluigi return (error); 829262152Sluigi} 830262152Sluigi 831262152Sluigi 832262152SluigiDEV_MODULE(netmap, netmap_loader, NULL); 833