netmap_user.h revision 341477
1/* 2 * Copyright (C) 2011-2016 Universita` di Pisa 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28/* 29 * $FreeBSD: stable/11/sys/net/netmap_user.h 341477 2018-12-04 17:40:56Z vmaffione $ 30 * 31 * Functions and macros to manipulate netmap structures and packets 32 * in userspace. See netmap(4) for more information. 33 * 34 * The address of the struct netmap_if, say nifp, is computed from the 35 * value returned from ioctl(.., NIOCREG, ...) and the mmap region: 36 * ioctl(fd, NIOCREG, &req); 37 * mem = mmap(0, ... ); 38 * nifp = NETMAP_IF(mem, req.nr_nifp); 39 * (so simple, we could just do it manually) 40 * 41 * From there: 42 * struct netmap_ring *NETMAP_TXRING(nifp, index) 43 * struct netmap_ring *NETMAP_RXRING(nifp, index) 44 * we can access ring->cur, ring->head, ring->tail, etc. 45 * 46 * ring->slot[i] gives us the i-th slot (we can access 47 * directly len, flags, buf_idx) 48 * 49 * char *buf = NETMAP_BUF(ring, x) returns a pointer to 50 * the buffer numbered x 51 * 52 * All ring indexes (head, cur, tail) should always move forward. 53 * To compute the next index in a circular ring you can use 54 * i = nm_ring_next(ring, i); 55 * 56 * To ease porting apps from pcap to netmap we supply a few fuctions 57 * that can be called to open, close, read and write on netmap in a way 58 * similar to libpcap. Note that the read/write function depend on 59 * an ioctl()/select()/poll() being issued to refill rings or push 60 * packets out. 61 * 62 * In order to use these, include #define NETMAP_WITH_LIBS 63 * in the source file that invokes these functions. 64 */ 65 66#ifndef _NET_NETMAP_USER_H_ 67#define _NET_NETMAP_USER_H_ 68 69#define NETMAP_DEVICE_NAME "/dev/netmap" 70 71#ifdef __CYGWIN__ 72/* 73 * we can compile userspace apps with either cygwin or msvc, 74 * and we use _WIN32 to identify windows specific code 75 */ 76#ifndef _WIN32 77#define _WIN32 78#endif /* _WIN32 */ 79 80#endif /* __CYGWIN__ */ 81 82#ifdef _WIN32 83#undef NETMAP_DEVICE_NAME 84#define NETMAP_DEVICE_NAME "/proc/sys/DosDevices/Global/netmap" 85#include <windows.h> 86#include <WinDef.h> 87#include <sys/cygwin.h> 88#endif /* _WIN32 */ 89 90#include <stdint.h> 91#include <sys/socket.h> /* apple needs sockaddr */ 92#include <net/if.h> /* IFNAMSIZ */ 93#include <ctype.h> 94 95#ifndef likely 96#define likely(x) __builtin_expect(!!(x), 1) 97#define unlikely(x) __builtin_expect(!!(x), 0) 98#endif /* likely and unlikely */ 99 100#include <net/netmap.h> 101 102/* helper macro */ 103#define _NETMAP_OFFSET(type, ptr, offset) \ 104 ((type)(void *)((char *)(ptr) + (offset))) 105 106#define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) 107 108#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 109 nifp, (nifp)->ring_ofs[index] ) 110 111#define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 112 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) 113 114#define NETMAP_BUF(ring, index) \ 115 ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) 116 117#define NETMAP_BUF_IDX(ring, buf) \ 118 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ 119 (ring)->nr_buf_size ) 120 121 122static inline uint32_t 123nm_ring_next(struct netmap_ring *r, uint32_t i) 124{ 125 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); 126} 127 128 129/* 130 * Return 1 if we have pending transmissions in the tx ring. 131 * When everything is complete ring->head = ring->tail + 1 (modulo ring size) 132 */ 133static inline int 134nm_tx_pending(struct netmap_ring *r) 135{ 136 return nm_ring_next(r, r->tail) != r->head; 137} 138 139 140static inline uint32_t 141nm_ring_space(struct netmap_ring *ring) 142{ 143 int ret = ring->tail - ring->cur; 144 if (ret < 0) 145 ret += ring->num_slots; 146 return ret; 147} 148 149 150#ifdef NETMAP_WITH_LIBS 151/* 152 * Support for simple I/O libraries. 153 * Include other system headers required for compiling this. 154 */ 155 156#ifndef HAVE_NETMAP_WITH_LIBS 157#define HAVE_NETMAP_WITH_LIBS 158 159#include <stdio.h> 160#include <sys/time.h> 161#include <sys/mman.h> 162#include <string.h> /* memset */ 163#include <sys/ioctl.h> 164#include <sys/errno.h> /* EINVAL */ 165#include <fcntl.h> /* O_RDWR */ 166#include <unistd.h> /* close() */ 167#include <signal.h> 168#include <stdlib.h> 169 170#ifndef ND /* debug macros */ 171/* debug support */ 172#define ND(_fmt, ...) do {} while(0) 173#define D(_fmt, ...) \ 174 do { \ 175 struct timeval _t0; \ 176 gettimeofday(&_t0, NULL); \ 177 fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \ 178 (int)(_t0.tv_sec % 1000), (int)_t0.tv_usec, \ 179 __FUNCTION__, __LINE__, ##__VA_ARGS__); \ 180 } while (0) 181 182/* Rate limited version of "D", lps indicates how many per second */ 183#define RD(lps, format, ...) \ 184 do { \ 185 static int __t0, __cnt; \ 186 struct timeval __xxts; \ 187 gettimeofday(&__xxts, NULL); \ 188 if (__t0 != __xxts.tv_sec) { \ 189 __t0 = __xxts.tv_sec; \ 190 __cnt = 0; \ 191 } \ 192 if (__cnt++ < lps) { \ 193 D(format, ##__VA_ARGS__); \ 194 } \ 195 } while (0) 196#endif 197 198struct nm_pkthdr { /* first part is the same as pcap_pkthdr */ 199 struct timeval ts; 200 uint32_t caplen; 201 uint32_t len; 202 203 uint64_t flags; /* NM_MORE_PKTS etc */ 204#define NM_MORE_PKTS 1 205 struct nm_desc *d; 206 struct netmap_slot *slot; 207 uint8_t *buf; 208}; 209 210struct nm_stat { /* same as pcap_stat */ 211 u_int ps_recv; 212 u_int ps_drop; 213 u_int ps_ifdrop; 214#ifdef WIN32 /* XXX or _WIN32 ? */ 215 u_int bs_capt; 216#endif /* WIN32 */ 217}; 218 219#define NM_ERRBUF_SIZE 512 220 221struct nm_desc { 222 struct nm_desc *self; /* point to self if netmap. */ 223 int fd; 224 void *mem; 225 uint32_t memsize; 226 int done_mmap; /* set if mem is the result of mmap */ 227 struct netmap_if * const nifp; 228 uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; 229 uint16_t first_rx_ring, last_rx_ring, cur_rx_ring; 230 struct nmreq req; /* also contains the nr_name = ifname */ 231 struct nm_pkthdr hdr; 232 233 /* 234 * The memory contains netmap_if, rings and then buffers. 235 * Given a pointer (e.g. to nm_inject) we can compare with 236 * mem/buf_start/buf_end to tell if it is a buffer or 237 * some other descriptor in our region. 238 * We also store a pointer to some ring as it helps in the 239 * translation from buffer indexes to addresses. 240 */ 241 struct netmap_ring * const some_ring; 242 void * const buf_start; 243 void * const buf_end; 244 /* parameters from pcap_open_live */ 245 int snaplen; 246 int promisc; 247 int to_ms; 248 char *errbuf; 249 250 /* save flags so we can restore them on close */ 251 uint32_t if_flags; 252 uint32_t if_reqcap; 253 uint32_t if_curcap; 254 255 struct nm_stat st; 256 char msg[NM_ERRBUF_SIZE]; 257}; 258 259/* 260 * when the descriptor is open correctly, d->self == d 261 * Eventually we should also use some magic number. 262 */ 263#define P2NMD(p) ((struct nm_desc *)(p)) 264#define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d)) 265#define NETMAP_FD(d) (P2NMD(d)->fd) 266 267 268/* 269 * this is a slightly optimized copy routine which rounds 270 * to multiple of 64 bytes and is often faster than dealing 271 * with other odd sizes. We assume there is enough room 272 * in the source and destination buffers. 273 */ 274static inline void 275nm_pkt_copy(const void *_src, void *_dst, int l) 276{ 277 const uint64_t *src = (const uint64_t *)_src; 278 uint64_t *dst = (uint64_t *)_dst; 279 280 if (unlikely(l >= 1024 || l % 64)) { 281 memcpy(dst, src, l); 282 return; 283 } 284 for (; likely(l > 0); l-=64) { 285 *dst++ = *src++; 286 *dst++ = *src++; 287 *dst++ = *src++; 288 *dst++ = *src++; 289 *dst++ = *src++; 290 *dst++ = *src++; 291 *dst++ = *src++; 292 *dst++ = *src++; 293 } 294} 295 296 297/* 298 * The callback, invoked on each received packet. Same as libpcap 299 */ 300typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); 301 302/* 303 *--- the pcap-like API --- 304 * 305 * nm_open() opens a file descriptor, binds to a port and maps memory. 306 * 307 * ifname (netmap:foo or vale:foo) is the port name 308 * a suffix can indicate the follwing: 309 * ^ bind the host (sw) ring pair 310 * * bind host and NIC ring pairs 311 * -NN bind individual NIC ring pair 312 * {NN bind master side of pipe NN 313 * }NN bind slave side of pipe NN 314 * a suffix starting with / and the following flags, 315 * in any order: 316 * x exclusive access 317 * z zero copy monitor (both tx and rx) 318 * t monitor tx side (copy monitor) 319 * r monitor rx side (copy monitor) 320 * R bind only RX ring(s) 321 * T bind only TX ring(s) 322 * 323 * req provides the initial values of nmreq before parsing ifname. 324 * Remember that the ifname parsing will override the ring 325 * number in nm_ringid, and part of nm_flags; 326 * flags special functions, normally 0 327 * indicates which fields of *arg are significant 328 * arg special functions, normally NULL 329 * if passed a netmap_desc with mem != NULL, 330 * use that memory instead of mmap. 331 */ 332 333static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req, 334 uint64_t flags, const struct nm_desc *arg); 335 336/* 337 * nm_open can import some fields from the parent descriptor. 338 * These flags control which ones. 339 * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL, 340 * which set the initial value for these flags. 341 * Note that the 16 low bits of the flags are reserved for data 342 * that may go into the nmreq. 343 */ 344enum { 345 NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */ 346 NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */ 347 NM_OPEN_ARG1 = 0x100000, 348 NM_OPEN_ARG2 = 0x200000, 349 NM_OPEN_ARG3 = 0x400000, 350 NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */ 351}; 352 353 354/* 355 * nm_close() closes and restores the port to its previous state 356 */ 357 358static int nm_close(struct nm_desc *); 359 360/* 361 * nm_mmap() do mmap or inherit from parent if the nr_arg2 362 * (memory block) matches. 363 */ 364 365static int nm_mmap(struct nm_desc *, const struct nm_desc *); 366 367/* 368 * nm_inject() is the same as pcap_inject() 369 * nm_dispatch() is the same as pcap_dispatch() 370 * nm_nextpkt() is the same as pcap_next() 371 */ 372 373static int nm_inject(struct nm_desc *, const void *, size_t); 374static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *); 375static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *); 376 377#ifdef _WIN32 378 379intptr_t _get_osfhandle(int); /* defined in io.h in windows */ 380 381/* 382 * In windows we do not have yet native poll support, so we keep track 383 * of file descriptors associated to netmap ports to emulate poll on 384 * them and fall back on regular poll on other file descriptors. 385 */ 386struct win_netmap_fd_list { 387 struct win_netmap_fd_list *next; 388 int win_netmap_fd; 389 HANDLE win_netmap_handle; 390}; 391 392/* 393 * list head containing all the netmap opened fd and their 394 * windows HANDLE counterparts 395 */ 396static struct win_netmap_fd_list *win_netmap_fd_list_head; 397 398static void 399win_insert_fd_record(int fd) 400{ 401 struct win_netmap_fd_list *curr; 402 403 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 404 if (fd == curr->win_netmap_fd) { 405 return; 406 } 407 } 408 curr = calloc(1, sizeof(*curr)); 409 curr->next = win_netmap_fd_list_head; 410 curr->win_netmap_fd = fd; 411 curr->win_netmap_handle = IntToPtr(_get_osfhandle(fd)); 412 win_netmap_fd_list_head = curr; 413} 414 415void 416win_remove_fd_record(int fd) 417{ 418 struct win_netmap_fd_list *curr = win_netmap_fd_list_head; 419 struct win_netmap_fd_list *prev = NULL; 420 for (; curr ; prev = curr, curr = curr->next) { 421 if (fd != curr->win_netmap_fd) 422 continue; 423 /* found the entry */ 424 if (prev == NULL) { /* we are freeing the first entry */ 425 win_netmap_fd_list_head = curr->next; 426 } else { 427 prev->next = curr->next; 428 } 429 free(curr); 430 break; 431 } 432} 433 434 435HANDLE 436win_get_netmap_handle(int fd) 437{ 438 struct win_netmap_fd_list *curr; 439 440 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 441 if (fd == curr->win_netmap_fd) { 442 return curr->win_netmap_handle; 443 } 444 } 445 return NULL; 446} 447 448/* 449 * we need to wrap ioctl and mmap, at least for the netmap file descriptors 450 */ 451 452/* 453 * use this function only from netmap_user.h internal functions 454 * same as ioctl, returns 0 on success and -1 on error 455 */ 456static int 457win_nm_ioctl_internal(HANDLE h, int32_t ctlCode, void *arg) 458{ 459 DWORD bReturn = 0, szIn, szOut; 460 BOOL ioctlReturnStatus; 461 void *inParam = arg, *outParam = arg; 462 463 switch (ctlCode) { 464 case NETMAP_POLL: 465 szIn = sizeof(POLL_REQUEST_DATA); 466 szOut = sizeof(POLL_REQUEST_DATA); 467 break; 468 case NETMAP_MMAP: 469 szIn = 0; 470 szOut = sizeof(void*); 471 inParam = NULL; /* nothing on input */ 472 break; 473 case NIOCTXSYNC: 474 case NIOCRXSYNC: 475 szIn = 0; 476 szOut = 0; 477 break; 478 case NIOCREGIF: 479 szIn = sizeof(struct nmreq); 480 szOut = sizeof(struct nmreq); 481 break; 482 case NIOCCONFIG: 483 D("unsupported NIOCCONFIG!"); 484 return -1; 485 486 default: /* a regular ioctl */ 487 D("invalid ioctl %x on netmap fd", ctlCode); 488 return -1; 489 } 490 491 ioctlReturnStatus = DeviceIoControl(h, 492 ctlCode, inParam, szIn, 493 outParam, szOut, 494 &bReturn, NULL); 495 // XXX note windows returns 0 on error or async call, 1 on success 496 // we could call GetLastError() to figure out what happened 497 return ioctlReturnStatus ? 0 : -1; 498} 499 500/* 501 * this function is what must be called from user-space programs 502 * same as ioctl, returns 0 on success and -1 on error 503 */ 504static int 505win_nm_ioctl(int fd, int32_t ctlCode, void *arg) 506{ 507 HANDLE h = win_get_netmap_handle(fd); 508 509 if (h == NULL) { 510 return ioctl(fd, ctlCode, arg); 511 } else { 512 return win_nm_ioctl_internal(h, ctlCode, arg); 513 } 514} 515 516#define ioctl win_nm_ioctl /* from now on, within this file ... */ 517 518/* 519 * We cannot use the native mmap on windows 520 * The only parameter used is "fd", the other ones are just declared to 521 * make this signature comparable to the FreeBSD/Linux one 522 */ 523static void * 524win32_mmap_emulated(void *addr, size_t length, int prot, int flags, int fd, int32_t offset) 525{ 526 HANDLE h = win_get_netmap_handle(fd); 527 528 if (h == NULL) { 529 return mmap(addr, length, prot, flags, fd, offset); 530 } else { 531 MEMORY_ENTRY ret; 532 533 return win_nm_ioctl_internal(h, NETMAP_MMAP, &ret) ? 534 NULL : ret.pUsermodeVirtualAddress; 535 } 536} 537 538#define mmap win32_mmap_emulated 539 540#include <sys/poll.h> /* XXX needed to use the structure pollfd */ 541 542static int 543win_nm_poll(struct pollfd *fds, int nfds, int timeout) 544{ 545 HANDLE h; 546 547 if (nfds != 1 || fds == NULL || (h = win_get_netmap_handle(fds->fd)) == NULL) {; 548 return poll(fds, nfds, timeout); 549 } else { 550 POLL_REQUEST_DATA prd; 551 552 prd.timeout = timeout; 553 prd.events = fds->events; 554 555 win_nm_ioctl_internal(h, NETMAP_POLL, &prd); 556 if ((prd.revents == POLLERR) || (prd.revents == STATUS_TIMEOUT)) { 557 return -1; 558 } 559 return 1; 560 } 561} 562 563#define poll win_nm_poll 564 565static int 566win_nm_open(char* pathname, int flags) 567{ 568 569 if (strcmp(pathname, NETMAP_DEVICE_NAME) == 0) { 570 int fd = open(NETMAP_DEVICE_NAME, O_RDWR); 571 if (fd < 0) { 572 return -1; 573 } 574 575 win_insert_fd_record(fd); 576 return fd; 577 } else { 578 return open(pathname, flags); 579 } 580} 581 582#define open win_nm_open 583 584static int 585win_nm_close(int fd) 586{ 587 if (fd != -1) { 588 close(fd); 589 if (win_get_netmap_handle(fd) != NULL) { 590 win_remove_fd_record(fd); 591 } 592 } 593 return 0; 594} 595 596#define close win_nm_close 597 598#endif /* _WIN32 */ 599 600static int 601nm_is_identifier(const char *s, const char *e) 602{ 603 for (; s != e; s++) { 604 if (!isalnum(*s) && *s != '_') { 605 return 0; 606 } 607 } 608 609 return 1; 610} 611 612#define MAXERRMSG 80 613static int 614nm_parse(const char *ifname, struct nm_desc *d, char *err) 615{ 616 int is_vale; 617 const char *port = NULL; 618 const char *vpname = NULL; 619 u_int namelen; 620 uint32_t nr_ringid = 0, nr_flags; 621 char errmsg[MAXERRMSG] = ""; 622 long num; 623 uint16_t nr_arg2 = 0; 624 enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state; 625 626 errno = 0; 627 628 is_vale = (ifname[0] == 'v'); 629 if (is_vale) { 630 port = index(ifname, ':'); 631 if (port == NULL) { 632 snprintf(errmsg, MAXERRMSG, 633 "missing ':' in vale name"); 634 goto fail; 635 } 636 637 if (!nm_is_identifier(ifname + 4, port)) { 638 snprintf(errmsg, MAXERRMSG, "invalid bridge name"); 639 goto fail; 640 } 641 642 vpname = ++port; 643 } else { 644 ifname += 7; 645 port = ifname; 646 } 647 648 /* scan for a separator */ 649 for (; *port && !index("-*^{}/@", *port); port++) 650 ; 651 652 if (is_vale && !nm_is_identifier(vpname, port)) { 653 snprintf(errmsg, MAXERRMSG, "invalid bridge port name"); 654 goto fail; 655 } 656 657 namelen = port - ifname; 658 if (namelen >= sizeof(d->req.nr_name)) { 659 snprintf(errmsg, MAXERRMSG, "name too long"); 660 goto fail; 661 } 662 memcpy(d->req.nr_name, ifname, namelen); 663 d->req.nr_name[namelen] = '\0'; 664 665 p_state = P_START; 666 nr_flags = NR_REG_ALL_NIC; /* default for no suffix */ 667 while (*port) { 668 switch (p_state) { 669 case P_START: 670 switch (*port) { 671 case '^': /* only SW ring */ 672 nr_flags = NR_REG_SW; 673 p_state = P_RNGSFXOK; 674 break; 675 case '*': /* NIC and SW */ 676 nr_flags = NR_REG_NIC_SW; 677 p_state = P_RNGSFXOK; 678 break; 679 case '-': /* one NIC ring pair */ 680 nr_flags = NR_REG_ONE_NIC; 681 p_state = P_GETNUM; 682 break; 683 case '{': /* pipe (master endpoint) */ 684 nr_flags = NR_REG_PIPE_MASTER; 685 p_state = P_GETNUM; 686 break; 687 case '}': /* pipe (slave endoint) */ 688 nr_flags = NR_REG_PIPE_SLAVE; 689 p_state = P_GETNUM; 690 break; 691 case '/': /* start of flags */ 692 p_state = P_FLAGS; 693 break; 694 case '@': /* start of memid */ 695 p_state = P_MEMID; 696 break; 697 default: 698 snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port); 699 goto fail; 700 } 701 port++; 702 break; 703 case P_RNGSFXOK: 704 switch (*port) { 705 case '/': 706 p_state = P_FLAGS; 707 break; 708 case '@': 709 p_state = P_MEMID; 710 break; 711 default: 712 snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port); 713 goto fail; 714 } 715 port++; 716 break; 717 case P_GETNUM: 718 num = strtol(port, (char **)&port, 10); 719 if (num < 0 || num >= NETMAP_RING_MASK) { 720 snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)", 721 num, NETMAP_RING_MASK); 722 goto fail; 723 } 724 nr_ringid = num & NETMAP_RING_MASK; 725 p_state = P_RNGSFXOK; 726 break; 727 case P_FLAGS: 728 case P_FLAGSOK: 729 if (*port == '@') { 730 port++; 731 p_state = P_MEMID; 732 break; 733 } 734 switch (*port) { 735 case 'x': 736 nr_flags |= NR_EXCLUSIVE; 737 break; 738 case 'z': 739 nr_flags |= NR_ZCOPY_MON; 740 break; 741 case 't': 742 nr_flags |= NR_MONITOR_TX; 743 break; 744 case 'r': 745 nr_flags |= NR_MONITOR_RX; 746 break; 747 case 'R': 748 nr_flags |= NR_RX_RINGS_ONLY; 749 break; 750 case 'T': 751 nr_flags |= NR_TX_RINGS_ONLY; 752 break; 753 default: 754 snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port); 755 goto fail; 756 } 757 port++; 758 p_state = P_FLAGSOK; 759 break; 760 case P_MEMID: 761 if (nr_arg2 != 0) { 762 snprintf(errmsg, MAXERRMSG, "double setting of memid"); 763 goto fail; 764 } 765 num = strtol(port, (char **)&port, 10); 766 if (num <= 0) { 767 snprintf(errmsg, MAXERRMSG, "invalid memid %ld, must be >0", num); 768 goto fail; 769 } 770 nr_arg2 = num; 771 p_state = P_RNGSFXOK; 772 break; 773 } 774 } 775 if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) { 776 snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); 777 goto fail; 778 } 779 ND("flags: %s %s %s %s", 780 (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "", 781 (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", 782 (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "", 783 (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : ""); 784 785 d->req.nr_flags |= nr_flags; 786 d->req.nr_ringid |= nr_ringid; 787 d->req.nr_arg2 = nr_arg2; 788 789 d->self = d; 790 791 return 0; 792fail: 793 if (!errno) 794 errno = EINVAL; 795 if (err) 796 strncpy(err, errmsg, MAXERRMSG); 797 return -1; 798} 799 800/* 801 * Try to open, return descriptor if successful, NULL otherwise. 802 * An invalid netmap name will return errno = 0; 803 * You can pass a pointer to a pre-filled nm_desc to add special 804 * parameters. Flags is used as follows 805 * NM_OPEN_NO_MMAP use the memory from arg, only XXX avoid mmap 806 * if the nr_arg2 (memory block) matches. 807 * NM_OPEN_ARG1 use req.nr_arg1 from arg 808 * NM_OPEN_ARG2 use req.nr_arg2 from arg 809 * NM_OPEN_RING_CFG user ring config from arg 810 */ 811static struct nm_desc * 812nm_open(const char *ifname, const struct nmreq *req, 813 uint64_t new_flags, const struct nm_desc *arg) 814{ 815 struct nm_desc *d = NULL; 816 const struct nm_desc *parent = arg; 817 char errmsg[MAXERRMSG] = ""; 818 uint32_t nr_reg; 819 820 if (strncmp(ifname, "netmap:", 7) && 821 strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) { 822 errno = 0; /* name not recognised, not an error */ 823 return NULL; 824 } 825 826 d = (struct nm_desc *)calloc(1, sizeof(*d)); 827 if (d == NULL) { 828 snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure"); 829 errno = ENOMEM; 830 return NULL; 831 } 832 d->self = d; /* set this early so nm_close() works */ 833 d->fd = open(NETMAP_DEVICE_NAME, O_RDWR); 834 if (d->fd < 0) { 835 snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); 836 goto fail; 837 } 838 839 if (req) 840 d->req = *req; 841 842 if (!(new_flags & NM_OPEN_IFNAME)) { 843 if (nm_parse(ifname, d, errmsg) < 0) 844 goto fail; 845 } 846 847 d->req.nr_version = NETMAP_API; 848 d->req.nr_ringid &= NETMAP_RING_MASK; 849 850 /* optionally import info from parent */ 851 if (IS_NETMAP_DESC(parent) && new_flags) { 852 if (new_flags & NM_OPEN_ARG1) 853 D("overriding ARG1 %d", parent->req.nr_arg1); 854 d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ? 855 parent->req.nr_arg1 : 4; 856 if (new_flags & NM_OPEN_ARG2) { 857 D("overriding ARG2 %d", parent->req.nr_arg2); 858 d->req.nr_arg2 = parent->req.nr_arg2; 859 } 860 if (new_flags & NM_OPEN_ARG3) 861 D("overriding ARG3 %d", parent->req.nr_arg3); 862 d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ? 863 parent->req.nr_arg3 : 0; 864 if (new_flags & NM_OPEN_RING_CFG) { 865 D("overriding RING_CFG"); 866 d->req.nr_tx_slots = parent->req.nr_tx_slots; 867 d->req.nr_rx_slots = parent->req.nr_rx_slots; 868 d->req.nr_tx_rings = parent->req.nr_tx_rings; 869 d->req.nr_rx_rings = parent->req.nr_rx_rings; 870 } 871 if (new_flags & NM_OPEN_IFNAME) { 872 D("overriding ifname %s ringid 0x%x flags 0x%x", 873 parent->req.nr_name, parent->req.nr_ringid, 874 parent->req.nr_flags); 875 memcpy(d->req.nr_name, parent->req.nr_name, 876 sizeof(d->req.nr_name)); 877 d->req.nr_ringid = parent->req.nr_ringid; 878 d->req.nr_flags = parent->req.nr_flags; 879 } 880 } 881 /* add the *XPOLL flags */ 882 d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); 883 884 if (ioctl(d->fd, NIOCREGIF, &d->req)) { 885 snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno)); 886 goto fail; 887 } 888 889 nr_reg = d->req.nr_flags & NR_REG_MASK; 890 891 if (nr_reg == NR_REG_SW) { /* host stack */ 892 d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; 893 d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; 894 } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */ 895 d->first_tx_ring = 0; 896 d->first_rx_ring = 0; 897 d->last_tx_ring = d->req.nr_tx_rings - 1; 898 d->last_rx_ring = d->req.nr_rx_rings - 1; 899 } else if (nr_reg == NR_REG_NIC_SW) { 900 d->first_tx_ring = 0; 901 d->first_rx_ring = 0; 902 d->last_tx_ring = d->req.nr_tx_rings; 903 d->last_rx_ring = d->req.nr_rx_rings; 904 } else if (nr_reg == NR_REG_ONE_NIC) { 905 /* XXX check validity */ 906 d->first_tx_ring = d->last_tx_ring = 907 d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK; 908 } else { /* pipes */ 909 d->first_tx_ring = d->last_tx_ring = 0; 910 d->first_rx_ring = d->last_rx_ring = 0; 911 } 912 913 /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */ 914 if ((!(new_flags & NM_OPEN_NO_MMAP) || parent) && nm_mmap(d, parent)) { 915 snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); 916 goto fail; 917 } 918 919 920#ifdef DEBUG_NETMAP_USER 921 { /* debugging code */ 922 int i; 923 924 D("%s tx %d .. %d %d rx %d .. %d %d", ifname, 925 d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings, 926 d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings); 927 for (i = 0; i <= d->req.nr_tx_rings; i++) { 928 struct netmap_ring *r = NETMAP_TXRING(d->nifp, i); 929 D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 930 } 931 for (i = 0; i <= d->req.nr_rx_rings; i++) { 932 struct netmap_ring *r = NETMAP_RXRING(d->nifp, i); 933 D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 934 } 935 } 936#endif /* debugging */ 937 938 d->cur_tx_ring = d->first_tx_ring; 939 d->cur_rx_ring = d->first_rx_ring; 940 return d; 941 942fail: 943 nm_close(d); 944 if (errmsg[0]) 945 D("%s %s", errmsg, ifname); 946 if (errno == 0) 947 errno = EINVAL; 948 return NULL; 949} 950 951 952static int 953nm_close(struct nm_desc *d) 954{ 955 /* 956 * ugly trick to avoid unused warnings 957 */ 958 static void *__xxzt[] __attribute__ ((unused)) = 959 { (void *)nm_open, (void *)nm_inject, 960 (void *)nm_dispatch, (void *)nm_nextpkt } ; 961 962 if (d == NULL || d->self != d) 963 return EINVAL; 964 if (d->done_mmap && d->mem) 965 munmap(d->mem, d->memsize); 966 if (d->fd != -1) { 967 close(d->fd); 968 } 969 970 bzero(d, sizeof(*d)); 971 free(d); 972 return 0; 973} 974 975 976static int 977nm_mmap(struct nm_desc *d, const struct nm_desc *parent) 978{ 979 //XXX TODO: check if mmap is already done 980 981 if (IS_NETMAP_DESC(parent) && parent->mem && 982 parent->req.nr_arg2 == d->req.nr_arg2) { 983 /* do not mmap, inherit from parent */ 984 D("do not mmap, inherit from parent"); 985 d->memsize = parent->memsize; 986 d->mem = parent->mem; 987 } else { 988 /* XXX TODO: check if memsize is too large (or there is overflow) */ 989 d->memsize = d->req.nr_memsize; 990 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, 991 d->fd, 0); 992 if (d->mem == MAP_FAILED) { 993 goto fail; 994 } 995 d->done_mmap = 1; 996 } 997 { 998 struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); 999 struct netmap_ring *r = NETMAP_RXRING(nifp, d->first_rx_ring); 1000 if ((void *)r == (void *)nifp) { 1001 /* the descriptor is open for TX only */ 1002 r = NETMAP_TXRING(nifp, d->first_tx_ring); 1003 } 1004 1005 *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; 1006 *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; 1007 *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); 1008 *(void **)(uintptr_t)&d->buf_end = 1009 (char *)d->mem + d->memsize; 1010 } 1011 1012 return 0; 1013 1014fail: 1015 return EINVAL; 1016} 1017 1018/* 1019 * Same prototype as pcap_inject(), only need to cast. 1020 */ 1021static int 1022nm_inject(struct nm_desc *d, const void *buf, size_t size) 1023{ 1024 u_int c, n = d->last_tx_ring - d->first_tx_ring + 1, 1025 ri = d->cur_tx_ring; 1026 1027 for (c = 0; c < n ; c++, ri++) { 1028 /* compute current ring to use */ 1029 struct netmap_ring *ring; 1030 uint32_t i, j, idx; 1031 size_t rem; 1032 1033 if (ri > d->last_tx_ring) 1034 ri = d->first_tx_ring; 1035 ring = NETMAP_TXRING(d->nifp, ri); 1036 rem = size; 1037 j = ring->cur; 1038 while (rem > ring->nr_buf_size && j != ring->tail) { 1039 rem -= ring->nr_buf_size; 1040 j = nm_ring_next(ring, j); 1041 } 1042 if (j == ring->tail && rem > 0) 1043 continue; 1044 i = ring->cur; 1045 while (i != j) { 1046 idx = ring->slot[i].buf_idx; 1047 ring->slot[i].len = ring->nr_buf_size; 1048 ring->slot[i].flags = NS_MOREFRAG; 1049 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), ring->nr_buf_size); 1050 i = nm_ring_next(ring, i); 1051 buf = (char *)buf + ring->nr_buf_size; 1052 } 1053 idx = ring->slot[i].buf_idx; 1054 ring->slot[i].len = rem; 1055 ring->slot[i].flags = 0; 1056 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), rem); 1057 ring->head = ring->cur = nm_ring_next(ring, i); 1058 d->cur_tx_ring = ri; 1059 return size; 1060 } 1061 return 0; /* fail */ 1062} 1063 1064 1065/* 1066 * Same prototype as pcap_dispatch(), only need to cast. 1067 */ 1068static int 1069nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) 1070{ 1071 int n = d->last_rx_ring - d->first_rx_ring + 1; 1072 int c, got = 0, ri = d->cur_rx_ring; 1073 d->hdr.buf = NULL; 1074 d->hdr.flags = NM_MORE_PKTS; 1075 d->hdr.d = d; 1076 1077 if (cnt == 0) 1078 cnt = -1; 1079 /* cnt == -1 means infinite, but rings have a finite amount 1080 * of buffers and the int is large enough that we never wrap, 1081 * so we can omit checking for -1 1082 */ 1083 for (c=0; c < n && cnt != got; c++, ri++) { 1084 /* compute current ring to use */ 1085 struct netmap_ring *ring; 1086 1087 if (ri > d->last_rx_ring) 1088 ri = d->first_rx_ring; 1089 ring = NETMAP_RXRING(d->nifp, ri); 1090 for ( ; !nm_ring_empty(ring) && cnt != got; got++) { 1091 u_int idx, i; 1092 if (d->hdr.buf) { /* from previous round */ 1093 cb(arg, &d->hdr, d->hdr.buf); 1094 } 1095 i = ring->cur; 1096 idx = ring->slot[i].buf_idx; 1097 /* d->cur_rx_ring doesn't change inside this loop, but 1098 * set it here, so it reflects d->hdr.buf's ring */ 1099 d->cur_rx_ring = ri; 1100 d->hdr.slot = &ring->slot[i]; 1101 d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); 1102 // __builtin_prefetch(buf); 1103 d->hdr.len = d->hdr.caplen = ring->slot[i].len; 1104 d->hdr.ts = ring->ts; 1105 ring->head = ring->cur = nm_ring_next(ring, i); 1106 } 1107 } 1108 if (d->hdr.buf) { /* from previous round */ 1109 d->hdr.flags = 0; 1110 cb(arg, &d->hdr, d->hdr.buf); 1111 } 1112 return got; 1113} 1114 1115static u_char * 1116nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr) 1117{ 1118 int ri = d->cur_rx_ring; 1119 1120 do { 1121 /* compute current ring to use */ 1122 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); 1123 if (!nm_ring_empty(ring)) { 1124 u_int i = ring->cur; 1125 u_int idx = ring->slot[i].buf_idx; 1126 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 1127 1128 // __builtin_prefetch(buf); 1129 hdr->ts = ring->ts; 1130 hdr->len = hdr->caplen = ring->slot[i].len; 1131 ring->cur = nm_ring_next(ring, i); 1132 /* we could postpone advancing head if we want 1133 * to hold the buffer. This can be supported in 1134 * the future. 1135 */ 1136 ring->head = ring->cur; 1137 d->cur_rx_ring = ri; 1138 return buf; 1139 } 1140 ri++; 1141 if (ri > d->last_rx_ring) 1142 ri = d->first_rx_ring; 1143 } while (ri != d->cur_rx_ring); 1144 return NULL; /* nothing found */ 1145} 1146 1147#endif /* !HAVE_NETMAP_WITH_LIBS */ 1148 1149#endif /* NETMAP_WITH_LIBS */ 1150 1151#endif /* _NET_NETMAP_USER_H_ */ 1152