1/* $NetBSD: xennetback_xenbus.c,v 1.46 2011/05/30 14:34:58 joerg Exp $ */ 2 3/* 4 * Copyright (c) 2006 Manuel Bouyer. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28#include <sys/cdefs.h> 29__KERNEL_RCSID(0, "$NetBSD: xennetback_xenbus.c,v 1.46 2011/05/30 14:34:58 joerg Exp $"); 30 31#include "opt_xen.h" 32 33#include <sys/types.h> 34#include <sys/param.h> 35#include <sys/systm.h> 36#include <sys/malloc.h> 37#include <sys/queue.h> 38#include <sys/kernel.h> 39#include <sys/mbuf.h> 40#include <sys/protosw.h> 41#include <sys/socket.h> 42#include <sys/ioctl.h> 43#include <sys/errno.h> 44#include <sys/device.h> 45#include <sys/intr.h> 46 47#include <net/if.h> 48#include <net/if_types.h> 49#include <net/if_dl.h> 50#include <net/route.h> 51#include <net/netisr.h> 52#include <net/bpf.h> 53#include <net/bpfdesc.h> 54 55#include <net/if_ether.h> 56 57 58#include <xen/xen.h> 59#include <xen/xen_shm.h> 60#include <xen/evtchn.h> 61#include <xen/xenbus.h> 62#include <xen/xennet_checksum.h> 63 64#include <uvm/uvm.h> 65 66#ifdef XENDEBUG_NET 67#define XENPRINTF(x) printf x 68#else 69#define XENPRINTF(x) 70#endif 71 72#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) 73#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) 74 75/* linux wants at last 16 bytes free in front of the packet */ 76#define LINUX_REQUESTED_OFFSET 16 77 78/* hash list for TX requests */ 79/* descriptor of a packet being handled by the kernel */ 80struct xni_pkt { 81 int pkt_id; /* packet's ID */ 82 grant_handle_t pkt_handle; 83 struct xnetback_instance *pkt_xneti; /* pointer back to our softc */ 84}; 85 86static inline void xni_pkt_unmap(struct xni_pkt *, vaddr_t); 87 88 89/* pools for xni_pkt */ 90struct pool xni_pkt_pool; 91/* ratecheck(9) for pool allocation failures */ 92struct timeval xni_pool_errintvl = { 30, 0 }; /* 30s, each */ 93/* 94 * Backend network device driver for Xen 95 */ 96 97/* state of a xnetback instance */ 98typedef enum {CONNECTED, DISCONNECTING, DISCONNECTED} xnetback_state_t; 99 100/* we keep the xnetback instances in a linked list */ 101struct xnetback_instance { 102 SLIST_ENTRY(xnetback_instance) next; 103 struct xenbus_device *xni_xbusd; /* our xenstore entry */ 104 domid_t xni_domid; /* attached to this domain */ 105 uint32_t xni_handle; /* domain-specific handle */ 106 xnetback_state_t xni_status; 107 void *xni_softintr; 108 109 /* network interface stuff */ 110 struct ethercom xni_ec; 111 struct callout xni_restart; 112 uint8_t xni_enaddr[ETHER_ADDR_LEN]; 113 114 /* remote domain communication stuff */ 115 unsigned int xni_evtchn; /* our even channel */ 116 netif_tx_back_ring_t xni_txring; 117 netif_rx_back_ring_t xni_rxring; 118 grant_handle_t xni_tx_ring_handle; /* to unmap the ring */ 119 grant_handle_t xni_rx_ring_handle; 120 vaddr_t xni_tx_ring_va; /* to unmap the ring */ 121 vaddr_t xni_rx_ring_va; 122}; 123#define xni_if xni_ec.ec_if 124#define xni_bpf xni_if.if_bpf 125 126 void xvifattach(int); 127static int xennetback_ifioctl(struct ifnet *, u_long, void *); 128static void xennetback_ifstart(struct ifnet *); 129static void xennetback_ifsoftstart_transfer(void *); 130static void xennetback_ifsoftstart_copy(void *); 131static void xennetback_ifwatchdog(struct ifnet *); 132static int xennetback_ifinit(struct ifnet *); 133static void xennetback_ifstop(struct ifnet *, int); 134 135static int xennetback_xenbus_create(struct xenbus_device *); 136static int xennetback_xenbus_destroy(void *); 137static void xennetback_frontend_changed(void *, XenbusState); 138 139static inline void xennetback_tx_response(struct xnetback_instance *, 140 int, int); 141static void xennetback_tx_free(struct mbuf * , void *, size_t, void *); 142 143SLIST_HEAD(, xnetback_instance) xnetback_instances; 144 145static struct xnetback_instance *xnetif_lookup(domid_t, uint32_t); 146static int xennetback_evthandler(void *); 147 148static struct xenbus_backend_driver xvif_backend_driver = { 149 .xbakd_create = xennetback_xenbus_create, 150 .xbakd_type = "vif" 151}; 152 153/* 154 * Number of packets to transmit in one hypercall (= number of pages to 155 * transmit at once). 156 */ 157#define NB_XMIT_PAGES_BATCH 64 158/* 159 * We will transfer a mapped page to the remote domain, and remap another 160 * page in place immediately. For this we keep a list of pages available. 161 * When the list is empty, we ask the hypervisor to give us 162 * NB_XMIT_PAGES_BATCH pages back. 163 */ 164static unsigned long mcl_pages[NB_XMIT_PAGES_BATCH]; /* our physical pages */ 165int mcl_pages_alloc; /* current index in mcl_pages */ 166static int xennetback_get_mcl_page(paddr_t *); 167static void xennetback_get_new_mcl_pages(void); 168/* 169 * If we can't transfer the mbuf directly, we have to copy it to a page which 170 * will be transferred to the remote domain. We use a pool_cache 171 * for this, or the mbuf cluster pool cache if MCLBYTES == PAGE_SIZE 172 */ 173#if MCLBYTES != PAGE_SIZE 174pool_cache_t xmit_pages_cache; 175#endif 176pool_cache_t xmit_pages_cachep; 177 178/* arrays used in xennetback_ifstart(), too large to allocate on stack */ 179static mmu_update_t xstart_mmu[NB_XMIT_PAGES_BATCH]; 180static multicall_entry_t xstart_mcl[NB_XMIT_PAGES_BATCH + 1]; 181static gnttab_transfer_t xstart_gop_transfer[NB_XMIT_PAGES_BATCH]; 182static gnttab_copy_t xstart_gop_copy[NB_XMIT_PAGES_BATCH]; 183struct mbuf *mbufs_sent[NB_XMIT_PAGES_BATCH]; 184struct _pages_pool_free { 185 vaddr_t va; 186 paddr_t pa; 187} pages_pool_free[NB_XMIT_PAGES_BATCH]; 188 189 190static inline void 191xni_pkt_unmap(struct xni_pkt *pkt, vaddr_t pkt_va) 192{ 193 xen_shm_unmap(pkt_va, 1, &pkt->pkt_handle); 194 pool_put(&xni_pkt_pool, pkt); 195} 196 197void 198xvifattach(int n) 199{ 200 int i; 201 struct pglist mlist; 202 struct vm_page *pg; 203 204 XENPRINTF(("xennetback_init\n")); 205 206 /* 207 * steal some non-managed pages to the VM system, to replace 208 * mbuf cluster or xmit_pages_pool pages given to foreign domains. 209 */ 210 if (uvm_pglistalloc(PAGE_SIZE * NB_XMIT_PAGES_BATCH, 0, 0xffffffff, 211 0, 0, &mlist, NB_XMIT_PAGES_BATCH, 0) != 0) 212 panic("xennetback_init: uvm_pglistalloc"); 213 for (i = 0, pg = mlist.tqh_first; pg != NULL; 214 pg = pg->pageq.queue.tqe_next, i++) 215 mcl_pages[i] = xpmap_ptom(VM_PAGE_TO_PHYS(pg)) >> PAGE_SHIFT; 216 if (i != NB_XMIT_PAGES_BATCH) 217 panic("xennetback_init: %d mcl pages", i); 218 mcl_pages_alloc = NB_XMIT_PAGES_BATCH - 1; 219 220 /* initialise pools */ 221 pool_init(&xni_pkt_pool, sizeof(struct xni_pkt), 0, 0, 0, 222 "xnbpkt", NULL, IPL_VM); 223#if MCLBYTES != PAGE_SIZE 224 xmit_pages_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0, "xnbxm", NULL, 225 IPL_VM, NULL, NULL, NULL); 226 xmit_pages_cachep = xmit_pages_cache; 227#else 228 xmit_pages_cachep = mcl_cache; 229#endif 230 231 SLIST_INIT(&xnetback_instances); 232 xenbus_backend_register(&xvif_backend_driver); 233} 234 235static int 236xennetback_xenbus_create(struct xenbus_device *xbusd) 237{ 238 struct xnetback_instance *xneti; 239 long domid, handle; 240 struct ifnet *ifp; 241 extern int ifqmaxlen; /* XXX */ 242 char *val, *e, *p; 243 int i, err; 244 struct xenbus_transaction *xbt; 245 246 if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path, 247 "frontend-id", &domid, 10)) != 0) { 248 aprint_error("xvif: can't read %s/frontend-id: %d\n", 249 xbusd->xbusd_path, err); 250 return err; 251 } 252 if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path, 253 "handle", &handle, 10)) != 0) { 254 aprint_error("xvif: can't read %s/handle: %d\n", 255 xbusd->xbusd_path, err); 256 return err; 257 } 258 259 if (xnetif_lookup(domid, handle) != NULL) { 260 return EEXIST; 261 } 262 xneti = malloc(sizeof(struct xnetback_instance), M_DEVBUF, 263 M_NOWAIT | M_ZERO); 264 if (xneti == NULL) { 265 return ENOMEM; 266 } 267 xneti->xni_domid = domid; 268 xneti->xni_handle = handle; 269 xneti->xni_status = DISCONNECTED; 270 271 xbusd->xbusd_u.b.b_cookie = xneti; 272 xbusd->xbusd_u.b.b_detach = xennetback_xenbus_destroy; 273 xneti->xni_xbusd = xbusd; 274 275 ifp = &xneti->xni_if; 276 ifp->if_softc = xneti; 277 snprintf(ifp->if_xname, IFNAMSIZ, "xvif%di%d", 278 (int)domid, (int)handle); 279 280 /* read mac address */ 281 if ((err = xenbus_read(NULL, xbusd->xbusd_path, "mac", NULL, &val))) { 282 aprint_error_ifnet(ifp, "can't read %s/mac: %d\n", 283 xbusd->xbusd_path, err); 284 goto fail; 285 } 286 for (i = 0, p = val; i < 6; i++) { 287 xneti->xni_enaddr[i] = strtoul(p, &e, 16); 288 if ((e[0] == '\0' && i != 5) && e[0] != ':') { 289 aprint_error_ifnet(ifp, 290 "%s is not a valid mac address\n", val); 291 err = EINVAL; 292 goto fail; 293 } 294 p = &e[1]; 295 } 296 free(val, M_DEVBUF); 297 298 /* we can't use the same MAC addr as our guest */ 299 xneti->xni_enaddr[3]++; 300 /* create pseudo-interface */ 301 aprint_verbose_ifnet(ifp, "Ethernet address %s\n", 302 ether_sprintf(xneti->xni_enaddr)); 303 ifp->if_flags = 304 IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST; 305 ifp->if_snd.ifq_maxlen = 306 max(ifqmaxlen, NET_TX_RING_SIZE * 2); 307 ifp->if_capabilities = IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx; 308 ifp->if_ioctl = xennetback_ifioctl; 309 ifp->if_start = xennetback_ifstart; 310 ifp->if_watchdog = xennetback_ifwatchdog; 311 ifp->if_init = xennetback_ifinit; 312 ifp->if_stop = xennetback_ifstop; 313 ifp->if_timer = 0; 314 IFQ_SET_READY(&ifp->if_snd); 315 if_attach(ifp); 316 ether_ifattach(&xneti->xni_if, xneti->xni_enaddr); 317 318 SLIST_INSERT_HEAD(&xnetback_instances, xneti, next); 319 320 xbusd->xbusd_otherend_changed = xennetback_frontend_changed; 321 322 do { 323 xbt = xenbus_transaction_start(); 324 if (xbt == NULL) { 325 aprint_error_ifnet(ifp, 326 "%s: can't start transaction\n", 327 xbusd->xbusd_path); 328 goto fail; 329 } 330 err = xenbus_printf(xbt, xbusd->xbusd_path, 331 "vifname", "%s", ifp->if_xname); 332 if (err) { 333 aprint_error_ifnet(ifp, 334 "failed to write %s/vifname: %d\n", 335 xbusd->xbusd_path, err); 336 goto abort_xbt; 337 } 338 err = xenbus_printf(xbt, xbusd->xbusd_path, 339 "feature-rx-copy", "%d", 1); 340 if (err) { 341 aprint_error_ifnet(ifp, 342 "failed to write %s/feature-rx-copy: %d\n", 343 xbusd->xbusd_path, err); 344 goto abort_xbt; 345 } 346 err = xenbus_printf(xbt, xbusd->xbusd_path, 347 "feature-rx-flip", "%d", 1); 348 if (err) { 349 aprint_error_ifnet(ifp, 350 "failed to write %s/feature-rx-flip: %d\n", 351 xbusd->xbusd_path, err); 352 goto abort_xbt; 353 } 354 } while ((err = xenbus_transaction_end(xbt, 0)) == EAGAIN); 355 if (err) { 356 aprint_error_ifnet(ifp, 357 "%s: can't end transaction: %d\n", 358 xbusd->xbusd_path, err); 359 } 360 361 err = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait); 362 if (err) { 363 aprint_error_ifnet(ifp, 364 "failed to switch state on %s: %d\n", 365 xbusd->xbusd_path, err); 366 goto fail; 367 } 368 return 0; 369abort_xbt: 370 xenbus_transaction_end(xbt, 1); 371fail: 372 free(xneti, M_DEVBUF); 373 return err; 374} 375 376int 377xennetback_xenbus_destroy(void *arg) 378{ 379 struct xnetback_instance *xneti = arg; 380 struct gnttab_unmap_grant_ref op; 381 int err; 382 383#if 0 384 if (xneti->xni_status == CONNECTED) { 385 return EBUSY; 386 } 387#endif 388 aprint_verbose_ifnet(&xneti->xni_if, "disconnecting\n"); 389 hypervisor_mask_event(xneti->xni_evtchn); 390 event_remove_handler(xneti->xni_evtchn, xennetback_evthandler, xneti); 391 if (xneti->xni_softintr) { 392 softint_disestablish(xneti->xni_softintr); 393 xneti->xni_softintr = NULL; 394 } 395 396 SLIST_REMOVE(&xnetback_instances, 397 xneti, xnetback_instance, next); 398 399 ether_ifdetach(&xneti->xni_if); 400 if_detach(&xneti->xni_if); 401 402 if (xneti->xni_txring.sring) { 403 op.host_addr = xneti->xni_tx_ring_va; 404 op.handle = xneti->xni_tx_ring_handle; 405 op.dev_bus_addr = 0; 406 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 407 &op, 1); 408 if (err) 409 aprint_error_ifnet(&xneti->xni_if, 410 "unmap_grant_ref failed: %d\n", err); 411 } 412 if (xneti->xni_rxring.sring) { 413 op.host_addr = xneti->xni_rx_ring_va; 414 op.handle = xneti->xni_rx_ring_handle; 415 op.dev_bus_addr = 0; 416 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 417 &op, 1); 418 if (err) 419 aprint_error_ifnet(&xneti->xni_if, 420 "unmap_grant_ref failed: %d\n", err); 421 } 422 uvm_km_free(kernel_map, xneti->xni_tx_ring_va, 423 PAGE_SIZE, UVM_KMF_VAONLY); 424 uvm_km_free(kernel_map, xneti->xni_rx_ring_va, 425 PAGE_SIZE, UVM_KMF_VAONLY); 426 free(xneti, M_DEVBUF); 427 return 0; 428} 429 430static int 431xennetback_connect(struct xnetback_instance *xneti) 432{ 433 int err; 434 netif_tx_sring_t *tx_ring; 435 netif_rx_sring_t *rx_ring; 436 struct gnttab_map_grant_ref op; 437 struct gnttab_unmap_grant_ref uop; 438 evtchn_op_t evop; 439 u_long tx_ring_ref, rx_ring_ref; 440 u_long revtchn, rx_copy; 441 struct xenbus_device *xbusd = xneti->xni_xbusd; 442 443 /* read comunication informations */ 444 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 445 "tx-ring-ref", &tx_ring_ref, 10); 446 if (err) { 447 xenbus_dev_fatal(xbusd, err, "reading %s/tx-ring-ref", 448 xbusd->xbusd_otherend); 449 return -1; 450 } 451 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 452 "rx-ring-ref", &rx_ring_ref, 10); 453 if (err) { 454 xenbus_dev_fatal(xbusd, err, "reading %s/rx-ring-ref", 455 xbusd->xbusd_otherend); 456 return -1; 457 } 458 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 459 "event-channel", &revtchn, 10); 460 if (err) { 461 xenbus_dev_fatal(xbusd, err, "reading %s/event-channel", 462 xbusd->xbusd_otherend); 463 return -1; 464 } 465 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 466 "request-rx-copy", &rx_copy, 10); 467 if (err == ENOENT) 468 rx_copy = 0; 469 else if (err) { 470 xenbus_dev_fatal(xbusd, err, "reading %s/request-rx-copy", 471 xbusd->xbusd_otherend); 472 return -1; 473 } 474 475 if (rx_copy) 476 xneti->xni_softintr = softint_establish(SOFTINT_NET, 477 xennetback_ifsoftstart_copy, xneti); 478 else 479 xneti->xni_softintr = softint_establish(SOFTINT_NET, 480 xennetback_ifsoftstart_transfer, xneti); 481 482 if (xneti->xni_softintr == NULL) { 483 err = ENOMEM; 484 xenbus_dev_fatal(xbusd, ENOMEM, 485 "can't allocate softint", xbusd->xbusd_otherend); 486 return -1; 487 } 488 489 /* allocate VA space and map rings */ 490 xneti->xni_tx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 491 UVM_KMF_VAONLY); 492 if (xneti->xni_tx_ring_va == 0) { 493 xenbus_dev_fatal(xbusd, ENOMEM, 494 "can't get VA for TX ring", xbusd->xbusd_otherend); 495 goto err1; 496 } 497 tx_ring = (void *)xneti->xni_tx_ring_va; 498 499 xneti->xni_rx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 500 UVM_KMF_VAONLY); 501 if (xneti->xni_rx_ring_va == 0) { 502 xenbus_dev_fatal(xbusd, ENOMEM, 503 "can't get VA for RX ring", xbusd->xbusd_otherend); 504 goto err1; 505 } 506 rx_ring = (void *)xneti->xni_rx_ring_va; 507 508 op.host_addr = xneti->xni_tx_ring_va; 509 op.flags = GNTMAP_host_map; 510 op.ref = tx_ring_ref; 511 op.dom = xneti->xni_domid; 512 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); 513 if (err || op.status) { 514 aprint_error_ifnet(&xneti->xni_if, 515 "can't map TX grant ref: err %d status %d\n", 516 err, op.status); 517 goto err2; 518 } 519 xneti->xni_tx_ring_handle = op.handle; 520 BACK_RING_INIT(&xneti->xni_txring, tx_ring, PAGE_SIZE); 521 522 op.host_addr = xneti->xni_rx_ring_va; 523 op.flags = GNTMAP_host_map; 524 op.ref = rx_ring_ref; 525 op.dom = xneti->xni_domid; 526 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); 527 if (err || op.status) { 528 aprint_error_ifnet(&xneti->xni_if, 529 "can't map RX grant ref: err %d status %d\n", 530 err, op.status); 531 goto err2; 532 } 533 xneti->xni_rx_ring_handle = op.handle; 534 BACK_RING_INIT(&xneti->xni_rxring, rx_ring, PAGE_SIZE); 535 536 evop.cmd = EVTCHNOP_bind_interdomain; 537 evop.u.bind_interdomain.remote_dom = xneti->xni_domid; 538 evop.u.bind_interdomain.remote_port = revtchn; 539 err = HYPERVISOR_event_channel_op(&evop); 540 if (err) { 541 aprint_error_ifnet(&xneti->xni_if, 542 "can't get event channel: %d\n", err); 543 goto err2; 544 } 545 xneti->xni_evtchn = evop.u.bind_interdomain.local_port; 546 xen_wmb(); 547 xneti->xni_status = CONNECTED; 548 xen_wmb(); 549 550 event_set_handler(xneti->xni_evtchn, xennetback_evthandler, 551 xneti, IPL_NET, xneti->xni_if.if_xname); 552 xennetback_ifinit(&xneti->xni_if); 553 hypervisor_enable_event(xneti->xni_evtchn); 554 hypervisor_notify_via_evtchn(xneti->xni_evtchn); 555 return 0; 556 557err2: 558 /* unmap rings */ 559 if (xneti->xni_tx_ring_handle != 0) { 560 uop.host_addr = xneti->xni_tx_ring_va; 561 uop.handle = xneti->xni_tx_ring_handle; 562 uop.dev_bus_addr = 0; 563 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 564 &uop, 1); 565 if (err) 566 aprint_error_ifnet(&xneti->xni_if, 567 "unmap_grant_ref failed: %d\n", err); 568 } 569 570 if (xneti->xni_rx_ring_handle != 0) { 571 uop.host_addr = xneti->xni_rx_ring_va; 572 uop.handle = xneti->xni_rx_ring_handle; 573 uop.dev_bus_addr = 0; 574 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 575 &uop, 1); 576 if (err) 577 aprint_error_ifnet(&xneti->xni_if, 578 "unmap_grant_ref failed: %d\n", err); 579 } 580 581err1: 582 /* free rings VA space */ 583 if (xneti->xni_rx_ring_va != 0) 584 uvm_km_free(kernel_map, xneti->xni_rx_ring_va, 585 PAGE_SIZE, UVM_KMF_VAONLY); 586 587 if (xneti->xni_tx_ring_va != 0) 588 uvm_km_free(kernel_map, xneti->xni_tx_ring_va, 589 PAGE_SIZE, UVM_KMF_VAONLY); 590 591 softint_disestablish(xneti->xni_softintr); 592 return -1; 593 594} 595 596static void 597xennetback_frontend_changed(void *arg, XenbusState new_state) 598{ 599 struct xnetback_instance *xneti = arg; 600 struct xenbus_device *xbusd = xneti->xni_xbusd; 601 602 XENPRINTF(("%s: new state %d\n", xneti->xni_if.if_xname, new_state)); 603 switch(new_state) { 604 case XenbusStateInitialising: 605 case XenbusStateInitialised: 606 break; 607 608 case XenbusStateConnected: 609 if (xneti->xni_status == CONNECTED) 610 break; 611 if (xennetback_connect(xneti) == 0) 612 xenbus_switch_state(xbusd, NULL, XenbusStateConnected); 613 break; 614 615 case XenbusStateClosing: 616 xneti->xni_status = DISCONNECTING; 617 xneti->xni_if.if_flags &= ~(IFF_RUNNING | IFF_OACTIVE); 618 xneti->xni_if.if_timer = 0; 619 xenbus_switch_state(xbusd, NULL, XenbusStateClosing); 620 break; 621 622 case XenbusStateClosed: 623 /* otherend_changed() should handle it for us */ 624 panic("xennetback_frontend_changed: closed\n"); 625 case XenbusStateUnknown: 626 case XenbusStateInitWait: 627 default: 628 aprint_error("%s: invalid frontend state %d\n", 629 xneti->xni_if.if_xname, new_state); 630 break; 631 } 632 return; 633 634} 635 636/* lookup a xneti based on domain id and interface handle */ 637static struct xnetback_instance * 638xnetif_lookup(domid_t dom , uint32_t handle) 639{ 640 struct xnetback_instance *xneti; 641 642 SLIST_FOREACH(xneti, &xnetback_instances, next) { 643 if (xneti->xni_domid == dom && xneti->xni_handle == handle) 644 return xneti; 645 } 646 return NULL; 647} 648 649 650/* get a page to remplace a mbuf cluster page given to a domain */ 651static int 652xennetback_get_mcl_page(paddr_t *map) 653{ 654 if (mcl_pages_alloc < 0) 655 /* 656 * we exhausted our allocation. We can't allocate new ones yet 657 * because the current pages may not have been loaned to 658 * the remote domain yet. We have to let the caller do this. 659 */ 660 return -1; 661 662 *map = ((paddr_t)mcl_pages[mcl_pages_alloc]) << PAGE_SHIFT; 663 mcl_pages_alloc--; 664 return 0; 665 666} 667 668static void 669xennetback_get_new_mcl_pages(void) 670{ 671 int nb_pages; 672 struct xen_memory_reservation res; 673 674 /* get some new pages. */ 675 xenguest_handle(res.extent_start) = mcl_pages; 676 res.nr_extents = NB_XMIT_PAGES_BATCH; 677 res.extent_order = 0; 678 res.address_bits = 0; 679 res.domid = DOMID_SELF; 680 681 nb_pages = HYPERVISOR_memory_op(XENMEM_increase_reservation, &res); 682 if (nb_pages <= 0) { 683 printf("xennetback: can't get new mcl pages (%d)\n", nb_pages); 684 return; 685 } 686 if (nb_pages != NB_XMIT_PAGES_BATCH) 687 printf("xennetback: got only %d new mcl pages\n", nb_pages); 688 689 mcl_pages_alloc = nb_pages - 1; 690} 691 692static inline void 693xennetback_tx_response(struct xnetback_instance *xneti, int id, int status) 694{ 695 RING_IDX resp_prod; 696 netif_tx_response_t *txresp; 697 int do_event; 698 699 resp_prod = xneti->xni_txring.rsp_prod_pvt; 700 txresp = RING_GET_RESPONSE(&xneti->xni_txring, resp_prod); 701 702 txresp->id = id; 703 txresp->status = status; 704 xneti->xni_txring.rsp_prod_pvt++; 705 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xneti->xni_txring, do_event); 706 if (do_event) { 707 XENPRINTF(("%s send event\n", xneti->xni_if.if_xname)); 708 hypervisor_notify_via_evtchn(xneti->xni_evtchn); 709 } 710} 711 712static int 713xennetback_evthandler(void *arg) 714{ 715 struct xnetback_instance *xneti = arg; 716 struct ifnet *ifp = &xneti->xni_if; 717 netif_tx_request_t *txreq; 718 struct xni_pkt *pkt; 719 vaddr_t pkt_va; 720 struct mbuf *m; 721 int receive_pending, err; 722 RING_IDX req_cons; 723 724 XENPRINTF(("xennetback_evthandler ")); 725 req_cons = xneti->xni_txring.req_cons; 726 xen_rmb(); 727 while (1) { 728 xen_rmb(); /* be sure to read the request before updating */ 729 xneti->xni_txring.req_cons = req_cons; 730 xen_wmb(); 731 RING_FINAL_CHECK_FOR_REQUESTS(&xneti->xni_txring, 732 receive_pending); 733 if (receive_pending == 0) 734 break; 735 txreq = RING_GET_REQUEST(&xneti->xni_txring, req_cons); 736 xen_rmb(); 737 XENPRINTF(("%s pkt size %d\n", xneti->xni_if.if_xname, 738 txreq->size)); 739 req_cons++; 740 if (__predict_false((ifp->if_flags & (IFF_UP | IFF_RUNNING)) != 741 (IFF_UP | IFF_RUNNING))) { 742 /* interface not up, drop */ 743 xennetback_tx_response(xneti, txreq->id, 744 NETIF_RSP_DROPPED); 745 continue; 746 } 747 /* 748 * Do some sanity checks, and map the packet's page. 749 */ 750 if (__predict_false(txreq->size < ETHER_HDR_LEN || 751 txreq->size > (ETHER_MAX_LEN - ETHER_CRC_LEN))) { 752 printf("%s: packet size %d too big\n", 753 ifp->if_xname, txreq->size); 754 xennetback_tx_response(xneti, txreq->id, 755 NETIF_RSP_ERROR); 756 ifp->if_ierrors++; 757 continue; 758 } 759 /* don't cross page boundaries */ 760 if (__predict_false( 761 txreq->offset + txreq->size > PAGE_SIZE)) { 762 printf("%s: packet cross page boundary\n", 763 ifp->if_xname); 764 xennetback_tx_response(xneti, txreq->id, 765 NETIF_RSP_ERROR); 766 ifp->if_ierrors++; 767 continue; 768 } 769 /* get a mbuf for this packet */ 770 MGETHDR(m, M_DONTWAIT, MT_DATA); 771 if (__predict_false(m == NULL)) { 772 static struct timeval lasttime; 773 if (ratecheck(&lasttime, &xni_pool_errintvl)) 774 printf("%s: mbuf alloc failed\n", 775 ifp->if_xname); 776 xennetback_tx_response(xneti, txreq->id, 777 NETIF_RSP_DROPPED); 778 ifp->if_ierrors++; 779 continue; 780 } 781 782 XENPRINTF(("%s pkt offset %d size %d id %d req_cons %d\n", 783 xneti->xni_if.if_xname, txreq->offset, 784 txreq->size, txreq->id, MASK_NETIF_TX_IDX(req_cons))); 785 786 pkt = pool_get(&xni_pkt_pool, PR_NOWAIT); 787 if (__predict_false(pkt == NULL)) { 788 static struct timeval lasttime; 789 if (ratecheck(&lasttime, &xni_pool_errintvl)) 790 printf("%s: xnbpkt alloc failed\n", 791 ifp->if_xname); 792 xennetback_tx_response(xneti, txreq->id, 793 NETIF_RSP_DROPPED); 794 ifp->if_ierrors++; 795 m_freem(m); 796 continue; 797 } 798 err = xen_shm_map(1, xneti->xni_domid, &txreq->gref, &pkt_va, 799 &pkt->pkt_handle, XSHM_RO); 800 if (__predict_false(err == ENOMEM)) { 801 xennetback_tx_response(xneti, txreq->id, 802 NETIF_RSP_DROPPED); 803 ifp->if_ierrors++; 804 pool_put(&xni_pkt_pool, pkt); 805 m_freem(m); 806 continue; 807 } 808 809 if (__predict_false(err)) { 810 printf("%s: mapping foreing page failed: %d\n", 811 xneti->xni_if.if_xname, err); 812 xennetback_tx_response(xneti, txreq->id, 813 NETIF_RSP_ERROR); 814 ifp->if_ierrors++; 815 pool_put(&xni_pkt_pool, pkt); 816 m_freem(m); 817 continue; 818 } 819 820 if ((ifp->if_flags & IFF_PROMISC) == 0) { 821 struct ether_header *eh = 822 (void*)(pkt_va + txreq->offset); 823 if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 && 824 memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost, 825 ETHER_ADDR_LEN) != 0) { 826 xni_pkt_unmap(pkt, pkt_va); 827 m_freem(m); 828 xennetback_tx_response(xneti, txreq->id, 829 NETIF_RSP_OKAY); 830 continue; /* packet is not for us */ 831 } 832 } 833#ifdef notyet 834a lot of work is needed in the tcp stack to handle read-only ext storage 835so always copy for now. 836 if (((req_cons + 1) & (NET_TX_RING_SIZE - 1)) == 837 (xneti->xni_txring.rsp_prod_pvt & (NET_TX_RING_SIZE - 1))) 838#else 839 if (1) 840#endif /* notyet */ 841 { 842 /* 843 * This is the last TX buffer. Copy the data and 844 * ack it. Delaying it until the mbuf is 845 * freed will stall transmit. 846 */ 847 m->m_len = min(MHLEN, txreq->size); 848 m->m_pkthdr.len = 0; 849 m_copyback(m, 0, txreq->size, 850 (void *)(pkt_va + txreq->offset)); 851 xni_pkt_unmap(pkt, pkt_va); 852 if (m->m_pkthdr.len < txreq->size) { 853 ifp->if_ierrors++; 854 m_freem(m); 855 xennetback_tx_response(xneti, txreq->id, 856 NETIF_RSP_DROPPED); 857 continue; 858 } 859 xennetback_tx_response(xneti, txreq->id, 860 NETIF_RSP_OKAY); 861 } else { 862 863 pkt->pkt_id = txreq->id; 864 pkt->pkt_xneti = xneti; 865 866 MEXTADD(m, pkt_va + txreq->offset, 867 txreq->size, M_DEVBUF, xennetback_tx_free, pkt); 868 m->m_pkthdr.len = m->m_len = txreq->size; 869 m->m_flags |= M_EXT_ROMAP; 870 } 871 if ((txreq->flags & NETTXF_csum_blank) != 0) { 872 xennet_checksum_fill(&m); 873 if (m == NULL) { 874 ifp->if_ierrors++; 875 continue; 876 } 877 } 878 m->m_pkthdr.rcvif = ifp; 879 ifp->if_ipackets++; 880 881 bpf_mtap(ifp, m); 882 (*ifp->if_input)(ifp, m); 883 } 884 xen_rmb(); /* be sure to read the request before updating pointer */ 885 xneti->xni_txring.req_cons = req_cons; 886 xen_wmb(); 887 /* check to see if we can transmit more packets */ 888 softint_schedule(xneti->xni_softintr); 889 890 return 1; 891} 892 893static void 894xennetback_tx_free(struct mbuf *m, void *va, size_t size, void *arg) 895{ 896 int s = splnet(); 897 struct xni_pkt *pkt = arg; 898 struct xnetback_instance *xneti = pkt->pkt_xneti; 899 900 XENPRINTF(("xennetback_tx_free\n")); 901 902 xennetback_tx_response(xneti, pkt->pkt_id, NETIF_RSP_OKAY); 903 904 xni_pkt_unmap(pkt, (vaddr_t)va & ~PAGE_MASK); 905 906 if (m) 907 pool_cache_put(mb_cache, m); 908 splx(s); 909} 910 911static int 912xennetback_ifioctl(struct ifnet *ifp, u_long cmd, void *data) 913{ 914 //struct xnetback_instance *xneti = ifp->if_softc; 915 //struct ifreq *ifr = (struct ifreq *)data; 916 int s, error; 917 918 s = splnet(); 919 error = ether_ioctl(ifp, cmd, data); 920 if (error == ENETRESET) 921 error = 0; 922 splx(s); 923 return error; 924} 925 926static void 927xennetback_ifstart(struct ifnet *ifp) 928{ 929 struct xnetback_instance *xneti = ifp->if_softc; 930 931 /* 932 * The Xen communication channel is much more efficient if we can 933 * schedule batch of packets for the domain. To achieve this, we 934 * schedule a soft interrupt, and just return. This way, the network 935 * stack will enqueue all pending mbufs in the interface's send queue 936 * before it is processed by the soft interrupt handler(). 937 */ 938 softint_schedule(xneti->xni_softintr); 939} 940 941static void 942xennetback_ifsoftstart_transfer(void *arg) 943{ 944 struct xnetback_instance *xneti = arg; 945 struct ifnet *ifp = &xneti->xni_if; 946 struct mbuf *m; 947 vaddr_t xmit_va; 948 paddr_t xmit_pa; 949 paddr_t xmit_ma; 950 paddr_t newp_ma = 0; /* XXX gcc */ 951 int i, j, nppitems; 952 mmu_update_t *mmup; 953 multicall_entry_t *mclp; 954 netif_rx_response_t *rxresp; 955 RING_IDX req_prod, resp_prod; 956 int do_event = 0; 957 gnttab_transfer_t *gop; 958 int id, offset; 959 960 XENPRINTF(("xennetback_ifsoftstart_transfer ")); 961 int s = splnet(); 962 if (__predict_false( 963 (ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)) { 964 splx(s); 965 return; 966 } 967 968 while (!IFQ_IS_EMPTY(&ifp->if_snd)) { 969 XENPRINTF(("pkt\n")); 970 req_prod = xneti->xni_rxring.sring->req_prod; 971 resp_prod = xneti->xni_rxring.rsp_prod_pvt; 972 xen_rmb(); 973 974 mmup = xstart_mmu; 975 mclp = xstart_mcl; 976 gop = xstart_gop_transfer; 977 for (nppitems = 0, i = 0; !IFQ_IS_EMPTY(&ifp->if_snd);) { 978 XENPRINTF(("have a packet\n")); 979 IFQ_POLL(&ifp->if_snd, m); 980 if (__predict_false(m == NULL)) 981 panic("xennetback_ifstart: IFQ_POLL"); 982 if (__predict_false( 983 req_prod == xneti->xni_rxring.req_cons || 984 xneti->xni_rxring.req_cons - resp_prod == 985 NET_RX_RING_SIZE)) { 986 /* out of ring space */ 987 XENPRINTF(("xennetback_ifstart: ring full " 988 "req_prod 0x%x req_cons 0x%x resp_prod " 989 "0x%x\n", 990 req_prod, xneti->xni_rxring.req_cons, 991 resp_prod)); 992 ifp->if_timer = 1; 993 break; 994 } 995 if (__predict_false(i == NB_XMIT_PAGES_BATCH)) 996 break; /* we filled the array */ 997 if (__predict_false( 998 xennetback_get_mcl_page(&newp_ma) != 0)) 999 break; /* out of memory */ 1000 if ((m->m_flags & M_CLUSTER) != 0 && 1001 !M_READONLY(m) && MCLBYTES == PAGE_SIZE) { 1002 /* we can give this page away */ 1003 xmit_pa = m->m_ext.ext_paddr; 1004 xmit_ma = xpmap_ptom(xmit_pa); 1005 xmit_va = (vaddr_t)m->m_ext.ext_buf; 1006 KASSERT(xmit_pa != M_PADDR_INVALID); 1007 KASSERT((xmit_va & PAGE_MASK) == 0); 1008 offset = m->m_data - m->m_ext.ext_buf; 1009 } else { 1010 /* we have to copy the packet */ 1011 xmit_va = (vaddr_t)pool_cache_get_paddr( 1012 xmit_pages_cachep, 1013 PR_NOWAIT, &xmit_pa); 1014 if (__predict_false(xmit_va == 0)) 1015 break; /* out of memory */ 1016 1017 KASSERT(xmit_pa != POOL_PADDR_INVALID); 1018 xmit_ma = xpmap_ptom(xmit_pa); 1019 XENPRINTF(("xennetback_get_xmit_page: got va " 1020 "0x%x ma 0x%x\n", (u_int)xmit_va, 1021 (u_int)xmit_ma)); 1022 m_copydata(m, 0, m->m_pkthdr.len, 1023 (char *)xmit_va + LINUX_REQUESTED_OFFSET); 1024 offset = LINUX_REQUESTED_OFFSET; 1025 pages_pool_free[nppitems].va = xmit_va; 1026 pages_pool_free[nppitems].pa = xmit_pa; 1027 nppitems++; 1028 } 1029 /* start filling ring */ 1030 gop->ref = RING_GET_REQUEST(&xneti->xni_rxring, 1031 xneti->xni_rxring.req_cons)->gref; 1032 id = RING_GET_REQUEST(&xneti->xni_rxring, 1033 xneti->xni_rxring.req_cons)->id; 1034 xen_rmb(); 1035 xneti->xni_rxring.req_cons++; 1036 rxresp = RING_GET_RESPONSE(&xneti->xni_rxring, 1037 resp_prod); 1038 rxresp->id = id; 1039 rxresp->offset = offset; 1040 rxresp->status = m->m_pkthdr.len; 1041 if ((m->m_pkthdr.csum_flags & 1042 (M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0) { 1043 rxresp->flags = NETRXF_csum_blank; 1044 } else { 1045 rxresp->flags = 0; 1046 } 1047 /* 1048 * transfers the page containing the packet to the 1049 * remote domain, and map newp in place. 1050 */ 1051 xpmap_phys_to_machine_mapping[ 1052 (xmit_pa - XPMAP_OFFSET) >> PAGE_SHIFT] = 1053 newp_ma >> PAGE_SHIFT; 1054 MULTI_update_va_mapping(mclp, xmit_va, 1055 newp_ma | PG_V | PG_RW | PG_U | PG_M, 0); 1056 mclp++; 1057 gop->mfn = xmit_ma >> PAGE_SHIFT; 1058 gop->domid = xneti->xni_domid; 1059 gop++; 1060 1061 mmup->ptr = newp_ma | MMU_MACHPHYS_UPDATE; 1062 mmup->val = (xmit_pa - XPMAP_OFFSET) >> PAGE_SHIFT; 1063 mmup++; 1064 1065 /* done with this packet */ 1066 IFQ_DEQUEUE(&ifp->if_snd, m); 1067 mbufs_sent[i] = m; 1068 resp_prod++; 1069 i++; /* this packet has been queued */ 1070 ifp->if_opackets++; 1071 bpf_mtap(ifp, m); 1072 } 1073 if (i != 0) { 1074 /* 1075 * We may have allocated buffers which have entries 1076 * outstanding in the page update queue -- make sure 1077 * we flush those first! 1078 */ 1079 int svm = splvm(); 1080 xpq_flush_queue(); 1081 splx(svm); 1082 mclp[-1].args[MULTI_UVMFLAGS_INDEX] = 1083 UVMF_TLB_FLUSH|UVMF_ALL; 1084 mclp->op = __HYPERVISOR_mmu_update; 1085 mclp->args[0] = (unsigned long)xstart_mmu; 1086 mclp->args[1] = i; 1087 mclp->args[2] = 0; 1088 mclp->args[3] = DOMID_SELF; 1089 mclp++; 1090 /* update the MMU */ 1091 if (HYPERVISOR_multicall(xstart_mcl, i + 1) != 0) { 1092 panic("%s: HYPERVISOR_multicall failed", 1093 ifp->if_xname); 1094 } 1095 for (j = 0; j < i + 1; j++) { 1096 if (xstart_mcl[j].result != 0) { 1097 printf("%s: xstart_mcl[%d] " 1098 "failed (%lu)\n", ifp->if_xname, 1099 j, xstart_mcl[j].result); 1100 printf("%s: req_prod %u req_cons " 1101 "%u rsp_prod %u rsp_prod_pvt %u " 1102 "i %u\n", 1103 ifp->if_xname, 1104 xneti->xni_rxring.sring->req_prod, 1105 xneti->xni_rxring.req_cons, 1106 xneti->xni_rxring.sring->rsp_prod, 1107 xneti->xni_rxring.rsp_prod_pvt, 1108 i); 1109 } 1110 } 1111 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, 1112 xstart_gop_transfer, i) != 0) { 1113 panic("%s: GNTTABOP_transfer failed", 1114 ifp->if_xname); 1115 } 1116 1117 for (j = 0; j < i; j++) { 1118 if (xstart_gop_transfer[j].status != GNTST_okay) { 1119 printf("%s GNTTABOP_transfer[%d] %d\n", 1120 ifp->if_xname, 1121 j, xstart_gop_transfer[j].status); 1122 printf("%s: req_prod %u req_cons " 1123 "%u rsp_prod %u rsp_prod_pvt %u " 1124 "i %d\n", 1125 ifp->if_xname, 1126 xneti->xni_rxring.sring->req_prod, 1127 xneti->xni_rxring.req_cons, 1128 xneti->xni_rxring.sring->rsp_prod, 1129 xneti->xni_rxring.rsp_prod_pvt, 1130 i); 1131 rxresp = RING_GET_RESPONSE( 1132 &xneti->xni_rxring, 1133 xneti->xni_rxring.rsp_prod_pvt + j); 1134 rxresp->status = NETIF_RSP_ERROR; 1135 } 1136 } 1137 1138 /* update pointer */ 1139 KASSERT( 1140 xneti->xni_rxring.rsp_prod_pvt + i == resp_prod); 1141 xneti->xni_rxring.rsp_prod_pvt = resp_prod; 1142 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY( 1143 &xneti->xni_rxring, j); 1144 if (j) 1145 do_event = 1; 1146 /* now we can free the mbufs */ 1147 for (j = 0; j < i; j++) { 1148 m_freem(mbufs_sent[j]); 1149 } 1150 for (j = 0; j < nppitems; j++) { 1151 pool_cache_put_paddr(xmit_pages_cachep, 1152 (void *)pages_pool_free[j].va, 1153 pages_pool_free[j].pa); 1154 } 1155 } 1156 /* send event */ 1157 if (do_event) { 1158 xen_rmb(); 1159 XENPRINTF(("%s receive event\n", 1160 xneti->xni_if.if_xname)); 1161 hypervisor_notify_via_evtchn(xneti->xni_evtchn); 1162 do_event = 0; 1163 } 1164 /* check if we need to get back some pages */ 1165 if (mcl_pages_alloc < 0) { 1166 xennetback_get_new_mcl_pages(); 1167 if (mcl_pages_alloc < 0) { 1168 /* 1169 * setup the watchdog to try again, because 1170 * xennetback_ifstart() will never be called 1171 * again if queue is full. 1172 */ 1173 printf("xennetback_ifstart: no mcl_pages\n"); 1174 ifp->if_timer = 1; 1175 break; 1176 } 1177 } 1178 /* 1179 * note that we don't use RING_FINAL_CHECK_FOR_REQUESTS() 1180 * here, as the frontend doesn't notify when adding 1181 * requests anyway 1182 */ 1183 if (__predict_false( 1184 !RING_HAS_UNCONSUMED_REQUESTS(&xneti->xni_rxring))) { 1185 /* ring full */ 1186 break; 1187 } 1188 } 1189 splx(s); 1190} 1191 1192static void 1193xennetback_ifsoftstart_copy(void *arg) 1194{ 1195 struct xnetback_instance *xneti = arg; 1196 struct ifnet *ifp = &xneti->xni_if; 1197 struct mbuf *m, *new_m; 1198 paddr_t xmit_pa; 1199 paddr_t xmit_ma; 1200 int i, j; 1201 netif_rx_response_t *rxresp; 1202 RING_IDX req_prod, resp_prod; 1203 int do_event = 0; 1204 gnttab_copy_t *gop; 1205 int id, offset; 1206 1207 XENPRINTF(("xennetback_ifsoftstart_transfer ")); 1208 int s = splnet(); 1209 if (__predict_false( 1210 (ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)) { 1211 splx(s); 1212 return; 1213 } 1214 1215 while (!IFQ_IS_EMPTY(&ifp->if_snd)) { 1216 XENPRINTF(("pkt\n")); 1217 req_prod = xneti->xni_rxring.sring->req_prod; 1218 resp_prod = xneti->xni_rxring.rsp_prod_pvt; 1219 xen_rmb(); 1220 1221 gop = xstart_gop_copy; 1222 for (i = 0; !IFQ_IS_EMPTY(&ifp->if_snd);) { 1223 XENPRINTF(("have a packet\n")); 1224 IFQ_POLL(&ifp->if_snd, m); 1225 if (__predict_false(m == NULL)) 1226 panic("xennetback_ifstart: IFQ_POLL"); 1227 if (__predict_false( 1228 req_prod == xneti->xni_rxring.req_cons || 1229 xneti->xni_rxring.req_cons - resp_prod == 1230 NET_RX_RING_SIZE)) { 1231 /* out of ring space */ 1232 XENPRINTF(("xennetback_ifstart: ring full " 1233 "req_prod 0x%x req_cons 0x%x resp_prod " 1234 "0x%x\n", 1235 req_prod, xneti->xni_rxring.req_cons, 1236 resp_prod)); 1237 ifp->if_timer = 1; 1238 break; 1239 } 1240 if (__predict_false(i == NB_XMIT_PAGES_BATCH)) 1241 break; /* we filled the array */ 1242 switch (m->m_flags & (M_EXT|M_EXT_CLUSTER)) { 1243 case M_EXT|M_EXT_CLUSTER: 1244 KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID); 1245 xmit_pa = m->m_ext.ext_paddr; 1246 offset = m->m_data - m->m_ext.ext_buf; 1247 break; 1248 case 0: 1249 KASSERT(m->m_paddr != M_PADDR_INVALID); 1250 xmit_pa = m->m_paddr; 1251 offset = M_BUFOFFSET(m) + 1252 (m->m_data - M_BUFADDR(m)); 1253 break; 1254 default: 1255 if (__predict_false( 1256 !pmap_extract(pmap_kernel(), 1257 (vaddr_t)m->m_data, &xmit_pa))) { 1258 panic("xennet_start: no pa"); 1259 } 1260 offset = 0; 1261 break; 1262 } 1263 offset += (xmit_pa & ~PG_FRAME); 1264 xmit_pa = (xmit_pa & PG_FRAME); 1265 if (m->m_pkthdr.len != m->m_len || 1266 (offset + m->m_pkthdr.len) > PAGE_SIZE) { 1267 MGETHDR(new_m, M_DONTWAIT, MT_DATA); 1268 if (__predict_false(new_m == NULL)) { 1269 printf("%s: cannot allocate new mbuf\n", 1270 ifp->if_xname); 1271 break; 1272 } 1273 if (m->m_pkthdr.len > MHLEN) { 1274 MCLGET(new_m, M_DONTWAIT); 1275 if (__predict_false( 1276 (new_m->m_flags & M_EXT) == 0)) { 1277 XENPRINTF(( 1278 "%s: no mbuf cluster\n", 1279 ifp->if_xname)); 1280 m_freem(new_m); 1281 break; 1282 } 1283 xmit_pa = new_m->m_ext.ext_paddr; 1284 offset = new_m->m_data - 1285 new_m->m_ext.ext_buf; 1286 } else { 1287 xmit_pa = new_m->m_paddr; 1288 offset = M_BUFOFFSET(new_m) + 1289 (new_m->m_data - M_BUFADDR(new_m)); 1290 } 1291 offset += (xmit_pa & ~PG_FRAME); 1292 xmit_pa = (xmit_pa & PG_FRAME); 1293 m_copydata(m, 0, m->m_pkthdr.len, 1294 mtod(new_m, void *)); 1295 new_m->m_len = new_m->m_pkthdr.len = 1296 m->m_pkthdr.len; 1297 IFQ_DEQUEUE(&ifp->if_snd, m); 1298 m_freem(m); 1299 m = new_m; 1300 } else { 1301 IFQ_DEQUEUE(&ifp->if_snd, m); 1302 } 1303 1304 KASSERT(xmit_pa != POOL_PADDR_INVALID); 1305 KASSERT((offset + m->m_pkthdr.len) <= PAGE_SIZE); 1306 xmit_ma = xpmap_ptom(xmit_pa); 1307 /* start filling ring */ 1308 gop->flags = GNTCOPY_dest_gref; 1309 gop->source.offset = offset; 1310 gop->source.domid = DOMID_SELF; 1311 gop->source.u.gmfn = xmit_ma >> PAGE_SHIFT; 1312 1313 gop->dest.u.ref = RING_GET_REQUEST(&xneti->xni_rxring, 1314 xneti->xni_rxring.req_cons)->gref; 1315 gop->dest.offset = 0; 1316 gop->dest.domid = xneti->xni_domid; 1317 1318 gop->len = m->m_pkthdr.len; 1319 gop++; 1320 1321 id = RING_GET_REQUEST(&xneti->xni_rxring, 1322 xneti->xni_rxring.req_cons)->id; 1323 xen_rmb(); 1324 xneti->xni_rxring.req_cons++; 1325 rxresp = RING_GET_RESPONSE(&xneti->xni_rxring, 1326 resp_prod); 1327 rxresp->id = id; 1328 rxresp->offset = 0; 1329 rxresp->status = m->m_pkthdr.len; 1330 if ((m->m_pkthdr.csum_flags & 1331 (M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0) { 1332 rxresp->flags = NETRXF_csum_blank; 1333 } else { 1334 rxresp->flags = 0; 1335 } 1336 1337 mbufs_sent[i] = m; 1338 resp_prod++; 1339 i++; /* this packet has been queued */ 1340 ifp->if_opackets++; 1341 bpf_mtap(ifp, m); 1342 } 1343 if (i != 0) { 1344 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, 1345 xstart_gop_copy, i) != 0) { 1346 panic("%s: GNTTABOP_copy failed", 1347 ifp->if_xname); 1348 } 1349 1350 for (j = 0; j < i; j++) { 1351 if (xstart_gop_copy[j].status != GNTST_okay) { 1352 printf("%s GNTTABOP_copy[%d] %d\n", 1353 ifp->if_xname, 1354 j, xstart_gop_copy[j].status); 1355 printf("%s: req_prod %u req_cons " 1356 "%u rsp_prod %u rsp_prod_pvt %u " 1357 "i %d\n", 1358 ifp->if_xname, 1359 xneti->xni_rxring.sring->req_prod, 1360 xneti->xni_rxring.req_cons, 1361 xneti->xni_rxring.sring->rsp_prod, 1362 xneti->xni_rxring.rsp_prod_pvt, 1363 i); 1364 rxresp = RING_GET_RESPONSE( 1365 &xneti->xni_rxring, 1366 xneti->xni_rxring.rsp_prod_pvt + j); 1367 rxresp->status = NETIF_RSP_ERROR; 1368 } 1369 } 1370 1371 /* update pointer */ 1372 KASSERT( 1373 xneti->xni_rxring.rsp_prod_pvt + i == resp_prod); 1374 xneti->xni_rxring.rsp_prod_pvt = resp_prod; 1375 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY( 1376 &xneti->xni_rxring, j); 1377 if (j) 1378 do_event = 1; 1379 /* now we can free the mbufs */ 1380 for (j = 0; j < i; j++) { 1381 m_freem(mbufs_sent[j]); 1382 } 1383 } 1384 /* send event */ 1385 if (do_event) { 1386 xen_rmb(); 1387 XENPRINTF(("%s receive event\n", 1388 xneti->xni_if.if_xname)); 1389 hypervisor_notify_via_evtchn(xneti->xni_evtchn); 1390 do_event = 0; 1391 } 1392 /* 1393 * note that we don't use RING_FINAL_CHECK_FOR_REQUESTS() 1394 * here, as the frontend doesn't notify when adding 1395 * requests anyway 1396 */ 1397 if (__predict_false( 1398 !RING_HAS_UNCONSUMED_REQUESTS(&xneti->xni_rxring))) { 1399 /* ring full */ 1400 break; 1401 } 1402 } 1403 splx(s); 1404} 1405 1406 1407static void 1408xennetback_ifwatchdog(struct ifnet * ifp) 1409{ 1410 /* 1411 * We can get to the following condition: 1412 * transmit stalls because the ring is full when the ifq is full too. 1413 * In this case (as, unfortunably, we don't get an interrupt from xen 1414 * on transmit) noting will ever call xennetback_ifstart() again. 1415 * Here we abuse the watchdog to get out of this condition. 1416 */ 1417 XENPRINTF(("xennetback_ifwatchdog\n")); 1418 xennetback_ifstart(ifp); 1419} 1420 1421 1422static int 1423xennetback_ifinit(struct ifnet *ifp) 1424{ 1425 struct xnetback_instance *xneti = ifp->if_softc; 1426 int s = splnet(); 1427 1428 if ((ifp->if_flags & IFF_UP) == 0) { 1429 splx(s); 1430 return 0; 1431 } 1432 if (xneti->xni_status == CONNECTED) 1433 ifp->if_flags |= IFF_RUNNING; 1434 splx(s); 1435 return 0; 1436} 1437 1438static void 1439xennetback_ifstop(struct ifnet *ifp, int disable) 1440{ 1441 struct xnetback_instance *xneti = ifp->if_softc; 1442 int s = splnet(); 1443 1444 ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE); 1445 ifp->if_timer = 0; 1446 if (xneti->xni_status == CONNECTED) { 1447 XENPRINTF(("%s: req_prod 0x%x resp_prod 0x%x req_cons 0x%x " 1448 "event 0x%x\n", ifp->if_xname, xneti->xni_txring->req_prod, 1449 xneti->xni_txring->resp_prod, xneti->xni_txring->req_cons, 1450 xneti->xni_txring->event)); 1451 xennetback_evthandler(ifp->if_softc); /* flush pending RX requests */ 1452 } 1453 splx(s); 1454} 1455