1/*      $NetBSD: xennetback_xenbus.c,v 1.46 2011/05/30 14:34:58 joerg Exp $      */
2
3/*
4 * Copyright (c) 2006 Manuel Bouyer.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: xennetback_xenbus.c,v 1.46 2011/05/30 14:34:58 joerg Exp $");
30
31#include "opt_xen.h"
32
33#include <sys/types.h>
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/queue.h>
38#include <sys/kernel.h>
39#include <sys/mbuf.h>
40#include <sys/protosw.h>
41#include <sys/socket.h>
42#include <sys/ioctl.h>
43#include <sys/errno.h>
44#include <sys/device.h>
45#include <sys/intr.h>
46
47#include <net/if.h>
48#include <net/if_types.h>
49#include <net/if_dl.h>
50#include <net/route.h>
51#include <net/netisr.h>
52#include <net/bpf.h>
53#include <net/bpfdesc.h>
54
55#include <net/if_ether.h>
56
57
58#include <xen/xen.h>
59#include <xen/xen_shm.h>
60#include <xen/evtchn.h>
61#include <xen/xenbus.h>
62#include <xen/xennet_checksum.h>
63
64#include <uvm/uvm.h>
65
66#ifdef XENDEBUG_NET
67#define XENPRINTF(x) printf x
68#else
69#define XENPRINTF(x)
70#endif
71
72#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
73#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
74
75/* linux wants at last 16 bytes free in front of the packet */
76#define LINUX_REQUESTED_OFFSET 16
77
78/* hash list for TX requests */
79/* descriptor of a packet being handled by the kernel */
80struct xni_pkt {
81	int pkt_id; /* packet's ID */
82	grant_handle_t pkt_handle;
83	struct xnetback_instance *pkt_xneti; /* pointer back to our softc */
84};
85
86static inline void xni_pkt_unmap(struct xni_pkt *, vaddr_t);
87
88
89/* pools for xni_pkt */
90struct pool xni_pkt_pool;
91/* ratecheck(9) for pool allocation failures */
92struct timeval xni_pool_errintvl = { 30, 0 };  /* 30s, each */
93/*
94 * Backend network device driver for Xen
95 */
96
97/* state of a xnetback instance */
98typedef enum {CONNECTED, DISCONNECTING, DISCONNECTED} xnetback_state_t;
99
100/* we keep the xnetback instances in a linked list */
101struct xnetback_instance {
102	SLIST_ENTRY(xnetback_instance) next;
103	struct xenbus_device *xni_xbusd; /* our xenstore entry */
104	domid_t xni_domid;		/* attached to this domain */
105	uint32_t xni_handle;	/* domain-specific handle */
106	xnetback_state_t xni_status;
107	void *xni_softintr;
108
109	/* network interface stuff */
110	struct ethercom xni_ec;
111	struct callout xni_restart;
112	uint8_t xni_enaddr[ETHER_ADDR_LEN];
113
114	/* remote domain communication stuff */
115	unsigned int xni_evtchn; /* our even channel */
116	netif_tx_back_ring_t xni_txring;
117	netif_rx_back_ring_t xni_rxring;
118	grant_handle_t xni_tx_ring_handle; /* to unmap the ring */
119	grant_handle_t xni_rx_ring_handle;
120	vaddr_t xni_tx_ring_va; /* to unmap the ring */
121	vaddr_t xni_rx_ring_va;
122};
123#define xni_if    xni_ec.ec_if
124#define xni_bpf   xni_if.if_bpf
125
126       void xvifattach(int);
127static int  xennetback_ifioctl(struct ifnet *, u_long, void *);
128static void xennetback_ifstart(struct ifnet *);
129static void xennetback_ifsoftstart_transfer(void *);
130static void xennetback_ifsoftstart_copy(void *);
131static void xennetback_ifwatchdog(struct ifnet *);
132static int  xennetback_ifinit(struct ifnet *);
133static void xennetback_ifstop(struct ifnet *, int);
134
135static int  xennetback_xenbus_create(struct xenbus_device *);
136static int  xennetback_xenbus_destroy(void *);
137static void xennetback_frontend_changed(void *, XenbusState);
138
139static inline void xennetback_tx_response(struct xnetback_instance *,
140    int, int);
141static void xennetback_tx_free(struct mbuf * , void *, size_t, void *);
142
143SLIST_HEAD(, xnetback_instance) xnetback_instances;
144
145static struct xnetback_instance *xnetif_lookup(domid_t, uint32_t);
146static int  xennetback_evthandler(void *);
147
148static struct xenbus_backend_driver xvif_backend_driver = {
149	.xbakd_create = xennetback_xenbus_create,
150	.xbakd_type = "vif"
151};
152
153/*
154 * Number of packets to transmit in one hypercall (= number of pages to
155 * transmit at once).
156 */
157#define NB_XMIT_PAGES_BATCH 64
158/*
159 * We will transfer a mapped page to the remote domain, and remap another
160 * page in place immediately. For this we keep a list of pages available.
161 * When the list is empty, we ask the hypervisor to give us
162 * NB_XMIT_PAGES_BATCH pages back.
163 */
164static unsigned long mcl_pages[NB_XMIT_PAGES_BATCH]; /* our physical pages */
165int mcl_pages_alloc; /* current index in mcl_pages */
166static int  xennetback_get_mcl_page(paddr_t *);
167static void xennetback_get_new_mcl_pages(void);
168/*
169 * If we can't transfer the mbuf directly, we have to copy it to a page which
170 * will be transferred to the remote domain. We use a pool_cache
171 * for this, or the mbuf cluster pool cache if MCLBYTES == PAGE_SIZE
172 */
173#if MCLBYTES != PAGE_SIZE
174pool_cache_t xmit_pages_cache;
175#endif
176pool_cache_t xmit_pages_cachep;
177
178/* arrays used in xennetback_ifstart(), too large to allocate on stack */
179static mmu_update_t xstart_mmu[NB_XMIT_PAGES_BATCH];
180static multicall_entry_t xstart_mcl[NB_XMIT_PAGES_BATCH + 1];
181static gnttab_transfer_t xstart_gop_transfer[NB_XMIT_PAGES_BATCH];
182static gnttab_copy_t     xstart_gop_copy[NB_XMIT_PAGES_BATCH];
183struct mbuf *mbufs_sent[NB_XMIT_PAGES_BATCH];
184struct _pages_pool_free {
185	vaddr_t va;
186	paddr_t pa;
187} pages_pool_free[NB_XMIT_PAGES_BATCH];
188
189
190static inline void
191xni_pkt_unmap(struct xni_pkt *pkt, vaddr_t pkt_va)
192{
193	xen_shm_unmap(pkt_va, 1, &pkt->pkt_handle);
194	pool_put(&xni_pkt_pool, pkt);
195}
196
197void
198xvifattach(int n)
199{
200	int i;
201	struct pglist mlist;
202	struct vm_page *pg;
203
204	XENPRINTF(("xennetback_init\n"));
205
206	/*
207	 * steal some non-managed pages to the VM system, to replace
208	 * mbuf cluster or xmit_pages_pool pages given to foreign domains.
209	 */
210	if (uvm_pglistalloc(PAGE_SIZE * NB_XMIT_PAGES_BATCH, 0, 0xffffffff,
211	    0, 0, &mlist, NB_XMIT_PAGES_BATCH, 0) != 0)
212		panic("xennetback_init: uvm_pglistalloc");
213	for (i = 0, pg = mlist.tqh_first; pg != NULL;
214	    pg = pg->pageq.queue.tqe_next, i++)
215		mcl_pages[i] = xpmap_ptom(VM_PAGE_TO_PHYS(pg)) >> PAGE_SHIFT;
216	if (i != NB_XMIT_PAGES_BATCH)
217		panic("xennetback_init: %d mcl pages", i);
218	mcl_pages_alloc = NB_XMIT_PAGES_BATCH - 1;
219
220	/* initialise pools */
221	pool_init(&xni_pkt_pool, sizeof(struct xni_pkt), 0, 0, 0,
222	    "xnbpkt", NULL, IPL_VM);
223#if MCLBYTES != PAGE_SIZE
224	xmit_pages_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0, "xnbxm", NULL,
225	    IPL_VM, NULL, NULL, NULL);
226	xmit_pages_cachep = xmit_pages_cache;
227#else
228	xmit_pages_cachep = mcl_cache;
229#endif
230
231	SLIST_INIT(&xnetback_instances);
232	xenbus_backend_register(&xvif_backend_driver);
233}
234
235static int
236xennetback_xenbus_create(struct xenbus_device *xbusd)
237{
238	struct xnetback_instance *xneti;
239	long domid, handle;
240	struct ifnet *ifp;
241	extern int ifqmaxlen; /* XXX */
242	char *val, *e, *p;
243	int i, err;
244	struct xenbus_transaction *xbt;
245
246	if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path,
247	    "frontend-id", &domid, 10)) != 0) {
248		aprint_error("xvif: can't read %s/frontend-id: %d\n",
249		    xbusd->xbusd_path, err);
250		return err;
251	}
252	if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path,
253	    "handle", &handle, 10)) != 0) {
254		aprint_error("xvif: can't read %s/handle: %d\n",
255		    xbusd->xbusd_path, err);
256		return err;
257	}
258
259	if (xnetif_lookup(domid, handle) != NULL) {
260		return EEXIST;
261	}
262	xneti = malloc(sizeof(struct xnetback_instance), M_DEVBUF,
263	    M_NOWAIT | M_ZERO);
264	if (xneti == NULL) {
265		return ENOMEM;
266	}
267	xneti->xni_domid = domid;
268	xneti->xni_handle = handle;
269	xneti->xni_status = DISCONNECTED;
270
271	xbusd->xbusd_u.b.b_cookie = xneti;
272	xbusd->xbusd_u.b.b_detach = xennetback_xenbus_destroy;
273	xneti->xni_xbusd = xbusd;
274
275	ifp = &xneti->xni_if;
276	ifp->if_softc = xneti;
277	snprintf(ifp->if_xname, IFNAMSIZ, "xvif%di%d",
278	    (int)domid, (int)handle);
279
280	/* read mac address */
281	if ((err = xenbus_read(NULL, xbusd->xbusd_path, "mac", NULL, &val))) {
282		aprint_error_ifnet(ifp, "can't read %s/mac: %d\n",
283		    xbusd->xbusd_path, err);
284		goto fail;
285	}
286	for (i = 0, p = val; i < 6; i++) {
287		xneti->xni_enaddr[i] = strtoul(p, &e, 16);
288		if ((e[0] == '\0' && i != 5) && e[0] != ':') {
289			aprint_error_ifnet(ifp,
290			    "%s is not a valid mac address\n", val);
291			err = EINVAL;
292			goto fail;
293		}
294		p = &e[1];
295	}
296	free(val, M_DEVBUF);
297
298	/* we can't use the same MAC addr as our guest */
299	xneti->xni_enaddr[3]++;
300	/* create pseudo-interface */
301	aprint_verbose_ifnet(ifp, "Ethernet address %s\n",
302	    ether_sprintf(xneti->xni_enaddr));
303	ifp->if_flags =
304	    IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST;
305	ifp->if_snd.ifq_maxlen =
306	    max(ifqmaxlen, NET_TX_RING_SIZE * 2);
307	ifp->if_capabilities = IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx;
308	ifp->if_ioctl = xennetback_ifioctl;
309	ifp->if_start = xennetback_ifstart;
310	ifp->if_watchdog = xennetback_ifwatchdog;
311	ifp->if_init = xennetback_ifinit;
312	ifp->if_stop = xennetback_ifstop;
313	ifp->if_timer = 0;
314	IFQ_SET_READY(&ifp->if_snd);
315	if_attach(ifp);
316	ether_ifattach(&xneti->xni_if, xneti->xni_enaddr);
317
318	SLIST_INSERT_HEAD(&xnetback_instances, xneti, next);
319
320	xbusd->xbusd_otherend_changed = xennetback_frontend_changed;
321
322	do {
323		xbt = xenbus_transaction_start();
324		if (xbt == NULL) {
325			aprint_error_ifnet(ifp,
326			    "%s: can't start transaction\n",
327			    xbusd->xbusd_path);
328			goto fail;
329		}
330		err = xenbus_printf(xbt, xbusd->xbusd_path,
331		    "vifname", "%s", ifp->if_xname);
332		if (err) {
333			aprint_error_ifnet(ifp,
334			    "failed to write %s/vifname: %d\n",
335			    xbusd->xbusd_path, err);
336			goto abort_xbt;
337		}
338		err = xenbus_printf(xbt, xbusd->xbusd_path,
339		    "feature-rx-copy", "%d", 1);
340		if (err) {
341			aprint_error_ifnet(ifp,
342			    "failed to write %s/feature-rx-copy: %d\n",
343			    xbusd->xbusd_path, err);
344			goto abort_xbt;
345		}
346		err = xenbus_printf(xbt, xbusd->xbusd_path,
347		    "feature-rx-flip", "%d", 1);
348		if (err) {
349			aprint_error_ifnet(ifp,
350			    "failed to write %s/feature-rx-flip: %d\n",
351			    xbusd->xbusd_path, err);
352			goto abort_xbt;
353		}
354	} while ((err = xenbus_transaction_end(xbt, 0)) == EAGAIN);
355	if (err) {
356		aprint_error_ifnet(ifp,
357		    "%s: can't end transaction: %d\n",
358		    xbusd->xbusd_path, err);
359	}
360
361	err = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait);
362	if (err) {
363		aprint_error_ifnet(ifp,
364		    "failed to switch state on %s: %d\n",
365		    xbusd->xbusd_path, err);
366		goto fail;
367	}
368	return 0;
369abort_xbt:
370	xenbus_transaction_end(xbt, 1);
371fail:
372	free(xneti, M_DEVBUF);
373	return err;
374}
375
376int
377xennetback_xenbus_destroy(void *arg)
378{
379	struct xnetback_instance *xneti = arg;
380	struct gnttab_unmap_grant_ref op;
381	int err;
382
383#if 0
384	if (xneti->xni_status == CONNECTED) {
385		return EBUSY;
386	}
387#endif
388	aprint_verbose_ifnet(&xneti->xni_if, "disconnecting\n");
389	hypervisor_mask_event(xneti->xni_evtchn);
390	event_remove_handler(xneti->xni_evtchn, xennetback_evthandler, xneti);
391	if (xneti->xni_softintr) {
392		softint_disestablish(xneti->xni_softintr);
393		xneti->xni_softintr = NULL;
394	}
395
396	SLIST_REMOVE(&xnetback_instances,
397	    xneti, xnetback_instance, next);
398
399	ether_ifdetach(&xneti->xni_if);
400	if_detach(&xneti->xni_if);
401
402	if (xneti->xni_txring.sring) {
403		op.host_addr = xneti->xni_tx_ring_va;
404		op.handle = xneti->xni_tx_ring_handle;
405		op.dev_bus_addr = 0;
406		err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
407		    &op, 1);
408		if (err)
409			aprint_error_ifnet(&xneti->xni_if,
410					"unmap_grant_ref failed: %d\n", err);
411	}
412	if (xneti->xni_rxring.sring) {
413		op.host_addr = xneti->xni_rx_ring_va;
414		op.handle = xneti->xni_rx_ring_handle;
415		op.dev_bus_addr = 0;
416		err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
417		    &op, 1);
418		if (err)
419			aprint_error_ifnet(&xneti->xni_if,
420					"unmap_grant_ref failed: %d\n", err);
421	}
422	uvm_km_free(kernel_map, xneti->xni_tx_ring_va,
423	    PAGE_SIZE, UVM_KMF_VAONLY);
424	uvm_km_free(kernel_map, xneti->xni_rx_ring_va,
425	    PAGE_SIZE, UVM_KMF_VAONLY);
426	free(xneti, M_DEVBUF);
427	return 0;
428}
429
430static int
431xennetback_connect(struct xnetback_instance *xneti)
432{
433	int err;
434	netif_tx_sring_t *tx_ring;
435	netif_rx_sring_t *rx_ring;
436	struct gnttab_map_grant_ref op;
437	struct gnttab_unmap_grant_ref uop;
438	evtchn_op_t evop;
439	u_long tx_ring_ref, rx_ring_ref;
440	u_long revtchn, rx_copy;
441	struct xenbus_device *xbusd = xneti->xni_xbusd;
442
443	/* read comunication informations */
444	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
445	    "tx-ring-ref", &tx_ring_ref, 10);
446	if (err) {
447		xenbus_dev_fatal(xbusd, err, "reading %s/tx-ring-ref",
448		    xbusd->xbusd_otherend);
449		return -1;
450	}
451	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
452	    "rx-ring-ref", &rx_ring_ref, 10);
453	if (err) {
454		xenbus_dev_fatal(xbusd, err, "reading %s/rx-ring-ref",
455		    xbusd->xbusd_otherend);
456		return -1;
457	}
458	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
459	    "event-channel", &revtchn, 10);
460	if (err) {
461		xenbus_dev_fatal(xbusd, err, "reading %s/event-channel",
462		    xbusd->xbusd_otherend);
463		return -1;
464	}
465	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
466	    "request-rx-copy", &rx_copy, 10);
467	if (err == ENOENT)
468		rx_copy = 0;
469	else if (err) {
470		xenbus_dev_fatal(xbusd, err, "reading %s/request-rx-copy",
471		    xbusd->xbusd_otherend);
472		return -1;
473	}
474
475	if (rx_copy)
476		xneti->xni_softintr = softint_establish(SOFTINT_NET,
477		    xennetback_ifsoftstart_copy, xneti);
478	else
479		xneti->xni_softintr = softint_establish(SOFTINT_NET,
480		    xennetback_ifsoftstart_transfer, xneti);
481
482	if (xneti->xni_softintr == NULL) {
483		err = ENOMEM;
484		xenbus_dev_fatal(xbusd, ENOMEM,
485		    "can't allocate softint", xbusd->xbusd_otherend);
486		return -1;
487	}
488
489	/* allocate VA space and map rings */
490	xneti->xni_tx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
491	    UVM_KMF_VAONLY);
492	if (xneti->xni_tx_ring_va == 0) {
493		xenbus_dev_fatal(xbusd, ENOMEM,
494		    "can't get VA for TX ring", xbusd->xbusd_otherend);
495		goto err1;
496	}
497	tx_ring = (void *)xneti->xni_tx_ring_va;
498
499	xneti->xni_rx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
500	    UVM_KMF_VAONLY);
501	if (xneti->xni_rx_ring_va == 0) {
502		xenbus_dev_fatal(xbusd, ENOMEM,
503		    "can't get VA for RX ring", xbusd->xbusd_otherend);
504		goto err1;
505	}
506	rx_ring = (void *)xneti->xni_rx_ring_va;
507
508	op.host_addr = xneti->xni_tx_ring_va;
509	op.flags = GNTMAP_host_map;
510	op.ref = tx_ring_ref;
511	op.dom = xneti->xni_domid;
512	err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
513	if (err || op.status) {
514		aprint_error_ifnet(&xneti->xni_if,
515		    "can't map TX grant ref: err %d status %d\n",
516		    err, op.status);
517		goto err2;
518	}
519	xneti->xni_tx_ring_handle = op.handle;
520	BACK_RING_INIT(&xneti->xni_txring, tx_ring, PAGE_SIZE);
521
522	op.host_addr = xneti->xni_rx_ring_va;
523	op.flags = GNTMAP_host_map;
524	op.ref = rx_ring_ref;
525	op.dom = xneti->xni_domid;
526	err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
527	if (err || op.status) {
528		aprint_error_ifnet(&xneti->xni_if,
529		    "can't map RX grant ref: err %d status %d\n",
530		    err, op.status);
531		goto err2;
532	}
533	xneti->xni_rx_ring_handle = op.handle;
534	BACK_RING_INIT(&xneti->xni_rxring, rx_ring, PAGE_SIZE);
535
536	evop.cmd = EVTCHNOP_bind_interdomain;
537	evop.u.bind_interdomain.remote_dom = xneti->xni_domid;
538	evop.u.bind_interdomain.remote_port = revtchn;
539	err = HYPERVISOR_event_channel_op(&evop);
540	if (err) {
541		aprint_error_ifnet(&xneti->xni_if,
542		    "can't get event channel: %d\n", err);
543		goto err2;
544	}
545	xneti->xni_evtchn = evop.u.bind_interdomain.local_port;
546	xen_wmb();
547	xneti->xni_status = CONNECTED;
548	xen_wmb();
549
550	event_set_handler(xneti->xni_evtchn, xennetback_evthandler,
551	    xneti, IPL_NET, xneti->xni_if.if_xname);
552	xennetback_ifinit(&xneti->xni_if);
553	hypervisor_enable_event(xneti->xni_evtchn);
554	hypervisor_notify_via_evtchn(xneti->xni_evtchn);
555	return 0;
556
557err2:
558	/* unmap rings */
559	if (xneti->xni_tx_ring_handle != 0) {
560		uop.host_addr = xneti->xni_tx_ring_va;
561		uop.handle = xneti->xni_tx_ring_handle;
562		uop.dev_bus_addr = 0;
563		err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
564		    &uop, 1);
565		if (err)
566			aprint_error_ifnet(&xneti->xni_if,
567			    "unmap_grant_ref failed: %d\n", err);
568	}
569
570	if (xneti->xni_rx_ring_handle != 0) {
571		uop.host_addr = xneti->xni_rx_ring_va;
572		uop.handle = xneti->xni_rx_ring_handle;
573		uop.dev_bus_addr = 0;
574		err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
575		    &uop, 1);
576		if (err)
577			aprint_error_ifnet(&xneti->xni_if,
578			    "unmap_grant_ref failed: %d\n", err);
579	}
580
581err1:
582	/* free rings VA space */
583	if (xneti->xni_rx_ring_va != 0)
584		uvm_km_free(kernel_map, xneti->xni_rx_ring_va,
585		    PAGE_SIZE, UVM_KMF_VAONLY);
586
587	if (xneti->xni_tx_ring_va != 0)
588		uvm_km_free(kernel_map, xneti->xni_tx_ring_va,
589		    PAGE_SIZE, UVM_KMF_VAONLY);
590
591	softint_disestablish(xneti->xni_softintr);
592	return -1;
593
594}
595
596static void
597xennetback_frontend_changed(void *arg, XenbusState new_state)
598{
599	struct xnetback_instance *xneti = arg;
600	struct xenbus_device *xbusd = xneti->xni_xbusd;
601
602	XENPRINTF(("%s: new state %d\n", xneti->xni_if.if_xname, new_state));
603	switch(new_state) {
604	case XenbusStateInitialising:
605	case XenbusStateInitialised:
606		break;
607
608	case XenbusStateConnected:
609		if (xneti->xni_status == CONNECTED)
610			break;
611		if (xennetback_connect(xneti) == 0)
612			xenbus_switch_state(xbusd, NULL, XenbusStateConnected);
613		break;
614
615	case XenbusStateClosing:
616		xneti->xni_status = DISCONNECTING;
617		xneti->xni_if.if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
618		xneti->xni_if.if_timer = 0;
619		xenbus_switch_state(xbusd, NULL, XenbusStateClosing);
620		break;
621
622	case XenbusStateClosed:
623		/* otherend_changed() should handle it for us */
624		panic("xennetback_frontend_changed: closed\n");
625	case XenbusStateUnknown:
626	case XenbusStateInitWait:
627	default:
628		aprint_error("%s: invalid frontend state %d\n",
629		    xneti->xni_if.if_xname, new_state);
630		break;
631	}
632	return;
633
634}
635
636/* lookup a xneti based on domain id and interface handle */
637static struct xnetback_instance *
638xnetif_lookup(domid_t dom , uint32_t handle)
639{
640	struct xnetback_instance *xneti;
641
642	SLIST_FOREACH(xneti, &xnetback_instances, next) {
643		if (xneti->xni_domid == dom && xneti->xni_handle == handle)
644			return xneti;
645	}
646	return NULL;
647}
648
649
650/* get a page to remplace a mbuf cluster page given to a domain */
651static int
652xennetback_get_mcl_page(paddr_t *map)
653{
654	if (mcl_pages_alloc < 0)
655		/*
656		 * we exhausted our allocation. We can't allocate new ones yet
657		 * because the current pages may not have been loaned to
658		 * the remote domain yet. We have to let the caller do this.
659		 */
660		return -1;
661
662	*map = ((paddr_t)mcl_pages[mcl_pages_alloc]) << PAGE_SHIFT;
663	mcl_pages_alloc--;
664	return 0;
665
666}
667
668static void
669xennetback_get_new_mcl_pages(void)
670{
671	int nb_pages;
672	struct xen_memory_reservation res;
673
674	/* get some new pages. */
675	xenguest_handle(res.extent_start) = mcl_pages;
676	res.nr_extents = NB_XMIT_PAGES_BATCH;
677	res.extent_order = 0;
678	res.address_bits = 0;
679	res.domid = DOMID_SELF;
680
681	nb_pages = HYPERVISOR_memory_op(XENMEM_increase_reservation, &res);
682	if (nb_pages <= 0) {
683		printf("xennetback: can't get new mcl pages (%d)\n", nb_pages);
684		return;
685	}
686	if (nb_pages != NB_XMIT_PAGES_BATCH)
687		printf("xennetback: got only %d new mcl pages\n", nb_pages);
688
689	mcl_pages_alloc = nb_pages - 1;
690}
691
692static inline void
693xennetback_tx_response(struct xnetback_instance *xneti, int id, int status)
694{
695	RING_IDX resp_prod;
696	netif_tx_response_t *txresp;
697	int do_event;
698
699	resp_prod = xneti->xni_txring.rsp_prod_pvt;
700	txresp = RING_GET_RESPONSE(&xneti->xni_txring, resp_prod);
701
702	txresp->id = id;
703	txresp->status = status;
704	xneti->xni_txring.rsp_prod_pvt++;
705	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xneti->xni_txring, do_event);
706	if (do_event) {
707		XENPRINTF(("%s send event\n", xneti->xni_if.if_xname));
708		hypervisor_notify_via_evtchn(xneti->xni_evtchn);
709	}
710}
711
712static int
713xennetback_evthandler(void *arg)
714{
715	struct xnetback_instance *xneti = arg;
716	struct ifnet *ifp = &xneti->xni_if;
717	netif_tx_request_t *txreq;
718	struct xni_pkt *pkt;
719	vaddr_t pkt_va;
720	struct mbuf *m;
721	int receive_pending, err;
722	RING_IDX req_cons;
723
724	XENPRINTF(("xennetback_evthandler "));
725	req_cons = xneti->xni_txring.req_cons;
726	xen_rmb();
727	while (1) {
728		xen_rmb(); /* be sure to read the request before updating */
729		xneti->xni_txring.req_cons = req_cons;
730		xen_wmb();
731		RING_FINAL_CHECK_FOR_REQUESTS(&xneti->xni_txring,
732		    receive_pending);
733		if (receive_pending == 0)
734			break;
735		txreq = RING_GET_REQUEST(&xneti->xni_txring, req_cons);
736		xen_rmb();
737		XENPRINTF(("%s pkt size %d\n", xneti->xni_if.if_xname,
738		    txreq->size));
739		req_cons++;
740		if (__predict_false((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
741		    (IFF_UP | IFF_RUNNING))) {
742			/* interface not up, drop */
743			xennetback_tx_response(xneti, txreq->id,
744			    NETIF_RSP_DROPPED);
745			continue;
746		}
747		/*
748		 * Do some sanity checks, and map the packet's page.
749		 */
750		if (__predict_false(txreq->size < ETHER_HDR_LEN ||
751		   txreq->size > (ETHER_MAX_LEN - ETHER_CRC_LEN))) {
752			printf("%s: packet size %d too big\n",
753			    ifp->if_xname, txreq->size);
754			xennetback_tx_response(xneti, txreq->id,
755			    NETIF_RSP_ERROR);
756			ifp->if_ierrors++;
757			continue;
758		}
759		/* don't cross page boundaries */
760		if (__predict_false(
761		    txreq->offset + txreq->size > PAGE_SIZE)) {
762			printf("%s: packet cross page boundary\n",
763			    ifp->if_xname);
764			xennetback_tx_response(xneti, txreq->id,
765			    NETIF_RSP_ERROR);
766			ifp->if_ierrors++;
767			continue;
768		}
769		/* get a mbuf for this packet */
770		MGETHDR(m, M_DONTWAIT, MT_DATA);
771		if (__predict_false(m == NULL)) {
772			static struct timeval lasttime;
773			if (ratecheck(&lasttime, &xni_pool_errintvl))
774				printf("%s: mbuf alloc failed\n",
775				    ifp->if_xname);
776			xennetback_tx_response(xneti, txreq->id,
777			    NETIF_RSP_DROPPED);
778			ifp->if_ierrors++;
779			continue;
780		}
781
782		XENPRINTF(("%s pkt offset %d size %d id %d req_cons %d\n",
783		    xneti->xni_if.if_xname, txreq->offset,
784		    txreq->size, txreq->id, MASK_NETIF_TX_IDX(req_cons)));
785
786		pkt = pool_get(&xni_pkt_pool, PR_NOWAIT);
787		if (__predict_false(pkt == NULL)) {
788			static struct timeval lasttime;
789			if (ratecheck(&lasttime, &xni_pool_errintvl))
790				printf("%s: xnbpkt alloc failed\n",
791				    ifp->if_xname);
792			xennetback_tx_response(xneti, txreq->id,
793			    NETIF_RSP_DROPPED);
794			ifp->if_ierrors++;
795			m_freem(m);
796			continue;
797		}
798		err = xen_shm_map(1, xneti->xni_domid, &txreq->gref, &pkt_va,
799		    &pkt->pkt_handle, XSHM_RO);
800		if (__predict_false(err == ENOMEM)) {
801			xennetback_tx_response(xneti, txreq->id,
802			    NETIF_RSP_DROPPED);
803			ifp->if_ierrors++;
804			pool_put(&xni_pkt_pool, pkt);
805			m_freem(m);
806			continue;
807		}
808
809		if (__predict_false(err)) {
810			printf("%s: mapping foreing page failed: %d\n",
811			    xneti->xni_if.if_xname, err);
812			xennetback_tx_response(xneti, txreq->id,
813			    NETIF_RSP_ERROR);
814			ifp->if_ierrors++;
815			pool_put(&xni_pkt_pool, pkt);
816			m_freem(m);
817			continue;
818		}
819
820		if ((ifp->if_flags & IFF_PROMISC) == 0) {
821			struct ether_header *eh =
822			    (void*)(pkt_va + txreq->offset);
823			if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 &&
824			    memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost,
825			    ETHER_ADDR_LEN) != 0) {
826				xni_pkt_unmap(pkt, pkt_va);
827				m_freem(m);
828				xennetback_tx_response(xneti, txreq->id,
829				    NETIF_RSP_OKAY);
830				continue; /* packet is not for us */
831			}
832		}
833#ifdef notyet
834a lot of work is needed in the tcp stack to handle read-only ext storage
835so always copy for now.
836		if (((req_cons + 1) & (NET_TX_RING_SIZE - 1)) ==
837		    (xneti->xni_txring.rsp_prod_pvt & (NET_TX_RING_SIZE - 1)))
838#else
839		if (1)
840#endif /* notyet */
841		{
842			/*
843			 * This is the last TX buffer. Copy the data and
844			 * ack it. Delaying it until the mbuf is
845			 * freed will stall transmit.
846			 */
847			m->m_len = min(MHLEN, txreq->size);
848			m->m_pkthdr.len = 0;
849			m_copyback(m, 0, txreq->size,
850			    (void *)(pkt_va + txreq->offset));
851			xni_pkt_unmap(pkt, pkt_va);
852			if (m->m_pkthdr.len < txreq->size) {
853				ifp->if_ierrors++;
854				m_freem(m);
855				xennetback_tx_response(xneti, txreq->id,
856				    NETIF_RSP_DROPPED);
857				continue;
858			}
859			xennetback_tx_response(xneti, txreq->id,
860			    NETIF_RSP_OKAY);
861		} else {
862
863			pkt->pkt_id = txreq->id;
864			pkt->pkt_xneti = xneti;
865
866			MEXTADD(m, pkt_va + txreq->offset,
867			    txreq->size, M_DEVBUF, xennetback_tx_free, pkt);
868			m->m_pkthdr.len = m->m_len = txreq->size;
869			m->m_flags |= M_EXT_ROMAP;
870		}
871		if ((txreq->flags & NETTXF_csum_blank) != 0) {
872			xennet_checksum_fill(&m);
873			if (m == NULL) {
874				ifp->if_ierrors++;
875				continue;
876			}
877		}
878		m->m_pkthdr.rcvif = ifp;
879		ifp->if_ipackets++;
880
881		bpf_mtap(ifp, m);
882		(*ifp->if_input)(ifp, m);
883	}
884	xen_rmb(); /* be sure to read the request before updating pointer */
885	xneti->xni_txring.req_cons = req_cons;
886	xen_wmb();
887	/* check to see if we can transmit more packets */
888	softint_schedule(xneti->xni_softintr);
889
890	return 1;
891}
892
893static void
894xennetback_tx_free(struct mbuf *m, void *va, size_t size, void *arg)
895{
896	int s = splnet();
897	struct xni_pkt *pkt = arg;
898	struct xnetback_instance *xneti = pkt->pkt_xneti;
899
900	XENPRINTF(("xennetback_tx_free\n"));
901
902	xennetback_tx_response(xneti, pkt->pkt_id, NETIF_RSP_OKAY);
903
904	xni_pkt_unmap(pkt, (vaddr_t)va & ~PAGE_MASK);
905
906	if (m)
907		pool_cache_put(mb_cache, m);
908	splx(s);
909}
910
911static int
912xennetback_ifioctl(struct ifnet *ifp, u_long cmd, void *data)
913{
914	//struct xnetback_instance *xneti = ifp->if_softc;
915	//struct ifreq *ifr = (struct ifreq *)data;
916	int s, error;
917
918	s = splnet();
919	error = ether_ioctl(ifp, cmd, data);
920	if (error == ENETRESET)
921		error = 0;
922	splx(s);
923	return error;
924}
925
926static void
927xennetback_ifstart(struct ifnet *ifp)
928{
929	struct xnetback_instance *xneti = ifp->if_softc;
930
931	/*
932	 * The Xen communication channel is much more efficient if we can
933	 * schedule batch of packets for the domain. To achieve this, we
934	 * schedule a soft interrupt, and just return. This way, the network
935	 * stack will enqueue all pending mbufs in the interface's send queue
936	 * before it is processed by the soft interrupt handler().
937	 */
938	softint_schedule(xneti->xni_softintr);
939}
940
941static void
942xennetback_ifsoftstart_transfer(void *arg)
943{
944	struct xnetback_instance *xneti = arg;
945	struct ifnet *ifp = &xneti->xni_if;
946	struct mbuf *m;
947	vaddr_t xmit_va;
948	paddr_t xmit_pa;
949	paddr_t xmit_ma;
950	paddr_t newp_ma = 0; /* XXX gcc */
951	int i, j, nppitems;
952	mmu_update_t *mmup;
953	multicall_entry_t *mclp;
954	netif_rx_response_t *rxresp;
955	RING_IDX req_prod, resp_prod;
956	int do_event = 0;
957	gnttab_transfer_t *gop;
958	int id, offset;
959
960	XENPRINTF(("xennetback_ifsoftstart_transfer "));
961	int s = splnet();
962	if (__predict_false(
963	    (ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)) {
964		splx(s);
965		return;
966	}
967
968	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
969		XENPRINTF(("pkt\n"));
970		req_prod = xneti->xni_rxring.sring->req_prod;
971		resp_prod = xneti->xni_rxring.rsp_prod_pvt;
972		xen_rmb();
973
974		mmup = xstart_mmu;
975		mclp = xstart_mcl;
976		gop = xstart_gop_transfer;
977		for (nppitems = 0, i = 0; !IFQ_IS_EMPTY(&ifp->if_snd);) {
978			XENPRINTF(("have a packet\n"));
979			IFQ_POLL(&ifp->if_snd, m);
980			if (__predict_false(m == NULL))
981				panic("xennetback_ifstart: IFQ_POLL");
982			if (__predict_false(
983			    req_prod == xneti->xni_rxring.req_cons ||
984			    xneti->xni_rxring.req_cons - resp_prod ==
985			    NET_RX_RING_SIZE)) {
986				/* out of ring space */
987				XENPRINTF(("xennetback_ifstart: ring full "
988				    "req_prod 0x%x req_cons 0x%x resp_prod "
989				    "0x%x\n",
990				    req_prod, xneti->xni_rxring.req_cons,
991				    resp_prod));
992				ifp->if_timer = 1;
993				break;
994			}
995			if (__predict_false(i == NB_XMIT_PAGES_BATCH))
996				break; /* we filled the array */
997			if (__predict_false(
998			    xennetback_get_mcl_page(&newp_ma) != 0))
999				break; /* out of memory */
1000			if ((m->m_flags & M_CLUSTER) != 0 &&
1001			    !M_READONLY(m) && MCLBYTES == PAGE_SIZE) {
1002				/* we can give this page away */
1003				xmit_pa = m->m_ext.ext_paddr;
1004				xmit_ma = xpmap_ptom(xmit_pa);
1005				xmit_va = (vaddr_t)m->m_ext.ext_buf;
1006				KASSERT(xmit_pa != M_PADDR_INVALID);
1007				KASSERT((xmit_va & PAGE_MASK) == 0);
1008				offset = m->m_data - m->m_ext.ext_buf;
1009			} else {
1010				/* we have to copy the packet */
1011				xmit_va = (vaddr_t)pool_cache_get_paddr(
1012				    xmit_pages_cachep,
1013				    PR_NOWAIT, &xmit_pa);
1014				if (__predict_false(xmit_va == 0))
1015					break; /* out of memory */
1016
1017				KASSERT(xmit_pa != POOL_PADDR_INVALID);
1018				xmit_ma = xpmap_ptom(xmit_pa);
1019				XENPRINTF(("xennetback_get_xmit_page: got va "
1020				    "0x%x ma 0x%x\n", (u_int)xmit_va,
1021				    (u_int)xmit_ma));
1022				m_copydata(m, 0, m->m_pkthdr.len,
1023				    (char *)xmit_va + LINUX_REQUESTED_OFFSET);
1024				offset = LINUX_REQUESTED_OFFSET;
1025				pages_pool_free[nppitems].va = xmit_va;
1026				pages_pool_free[nppitems].pa = xmit_pa;
1027				nppitems++;
1028			}
1029			/* start filling ring */
1030			gop->ref = RING_GET_REQUEST(&xneti->xni_rxring,
1031			    xneti->xni_rxring.req_cons)->gref;
1032			id = RING_GET_REQUEST(&xneti->xni_rxring,
1033			    xneti->xni_rxring.req_cons)->id;
1034			xen_rmb();
1035			xneti->xni_rxring.req_cons++;
1036			rxresp = RING_GET_RESPONSE(&xneti->xni_rxring,
1037			    resp_prod);
1038			rxresp->id = id;
1039			rxresp->offset = offset;
1040			rxresp->status = m->m_pkthdr.len;
1041			if ((m->m_pkthdr.csum_flags &
1042			    (M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0) {
1043				rxresp->flags = NETRXF_csum_blank;
1044			} else {
1045				rxresp->flags = 0;
1046			}
1047			/*
1048			 * transfers the page containing the packet to the
1049			 * remote domain, and map newp in place.
1050			 */
1051			xpmap_phys_to_machine_mapping[
1052			    (xmit_pa - XPMAP_OFFSET) >> PAGE_SHIFT] =
1053			    newp_ma >> PAGE_SHIFT;
1054			MULTI_update_va_mapping(mclp, xmit_va,
1055			    newp_ma | PG_V | PG_RW | PG_U | PG_M, 0);
1056			mclp++;
1057			gop->mfn = xmit_ma >> PAGE_SHIFT;
1058			gop->domid = xneti->xni_domid;
1059			gop++;
1060
1061			mmup->ptr = newp_ma | MMU_MACHPHYS_UPDATE;
1062			mmup->val = (xmit_pa - XPMAP_OFFSET) >> PAGE_SHIFT;
1063			mmup++;
1064
1065			/* done with this packet */
1066			IFQ_DEQUEUE(&ifp->if_snd, m);
1067			mbufs_sent[i] = m;
1068			resp_prod++;
1069			i++; /* this packet has been queued */
1070			ifp->if_opackets++;
1071			bpf_mtap(ifp, m);
1072		}
1073		if (i != 0) {
1074			/*
1075			 * We may have allocated buffers which have entries
1076			 * outstanding in the page update queue -- make sure
1077			 * we flush those first!
1078			 */
1079			int svm = splvm();
1080			xpq_flush_queue();
1081			splx(svm);
1082			mclp[-1].args[MULTI_UVMFLAGS_INDEX] =
1083			    UVMF_TLB_FLUSH|UVMF_ALL;
1084			mclp->op = __HYPERVISOR_mmu_update;
1085			mclp->args[0] = (unsigned long)xstart_mmu;
1086			mclp->args[1] = i;
1087			mclp->args[2] = 0;
1088			mclp->args[3] = DOMID_SELF;
1089			mclp++;
1090			/* update the MMU */
1091			if (HYPERVISOR_multicall(xstart_mcl, i + 1) != 0) {
1092				panic("%s: HYPERVISOR_multicall failed",
1093				    ifp->if_xname);
1094			}
1095			for (j = 0; j < i + 1; j++) {
1096				if (xstart_mcl[j].result != 0) {
1097					printf("%s: xstart_mcl[%d] "
1098					    "failed (%lu)\n", ifp->if_xname,
1099					    j, xstart_mcl[j].result);
1100					printf("%s: req_prod %u req_cons "
1101					    "%u rsp_prod %u rsp_prod_pvt %u "
1102					    "i %u\n",
1103					    ifp->if_xname,
1104					    xneti->xni_rxring.sring->req_prod,
1105					    xneti->xni_rxring.req_cons,
1106					    xneti->xni_rxring.sring->rsp_prod,
1107					    xneti->xni_rxring.rsp_prod_pvt,
1108					    i);
1109				}
1110			}
1111			if (HYPERVISOR_grant_table_op(GNTTABOP_transfer,
1112			    xstart_gop_transfer, i) != 0) {
1113				panic("%s: GNTTABOP_transfer failed",
1114				    ifp->if_xname);
1115			}
1116
1117			for (j = 0; j < i; j++) {
1118				if (xstart_gop_transfer[j].status != GNTST_okay) {
1119					printf("%s GNTTABOP_transfer[%d] %d\n",
1120					    ifp->if_xname,
1121					    j, xstart_gop_transfer[j].status);
1122					printf("%s: req_prod %u req_cons "
1123					    "%u rsp_prod %u rsp_prod_pvt %u "
1124					    "i %d\n",
1125					    ifp->if_xname,
1126					    xneti->xni_rxring.sring->req_prod,
1127					    xneti->xni_rxring.req_cons,
1128					    xneti->xni_rxring.sring->rsp_prod,
1129					    xneti->xni_rxring.rsp_prod_pvt,
1130					    i);
1131					rxresp = RING_GET_RESPONSE(
1132					    &xneti->xni_rxring,
1133					    xneti->xni_rxring.rsp_prod_pvt + j);
1134					rxresp->status = NETIF_RSP_ERROR;
1135				}
1136			}
1137
1138			/* update pointer */
1139			KASSERT(
1140			    xneti->xni_rxring.rsp_prod_pvt + i == resp_prod);
1141			xneti->xni_rxring.rsp_prod_pvt = resp_prod;
1142			RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(
1143			    &xneti->xni_rxring, j);
1144			if (j)
1145				do_event = 1;
1146			/* now we can free the mbufs */
1147			for (j = 0; j < i; j++) {
1148				m_freem(mbufs_sent[j]);
1149			}
1150			for (j = 0; j < nppitems; j++) {
1151				pool_cache_put_paddr(xmit_pages_cachep,
1152				    (void *)pages_pool_free[j].va,
1153				    pages_pool_free[j].pa);
1154			}
1155		}
1156		/* send event */
1157		if (do_event) {
1158			xen_rmb();
1159			XENPRINTF(("%s receive event\n",
1160			    xneti->xni_if.if_xname));
1161			hypervisor_notify_via_evtchn(xneti->xni_evtchn);
1162			do_event = 0;
1163		}
1164		/* check if we need to get back some pages */
1165		if (mcl_pages_alloc < 0) {
1166			xennetback_get_new_mcl_pages();
1167			if (mcl_pages_alloc < 0) {
1168				/*
1169				 * setup the watchdog to try again, because
1170				 * xennetback_ifstart() will never be called
1171				 * again if queue is full.
1172				 */
1173				printf("xennetback_ifstart: no mcl_pages\n");
1174				ifp->if_timer = 1;
1175				break;
1176			}
1177		}
1178		/*
1179		 * note that we don't use RING_FINAL_CHECK_FOR_REQUESTS()
1180		 * here, as the frontend doesn't notify when adding
1181		 * requests anyway
1182		 */
1183		if (__predict_false(
1184		    !RING_HAS_UNCONSUMED_REQUESTS(&xneti->xni_rxring))) {
1185			/* ring full */
1186			break;
1187		}
1188	}
1189	splx(s);
1190}
1191
1192static void
1193xennetback_ifsoftstart_copy(void *arg)
1194{
1195	struct xnetback_instance *xneti = arg;
1196	struct ifnet *ifp = &xneti->xni_if;
1197	struct mbuf *m, *new_m;
1198	paddr_t xmit_pa;
1199	paddr_t xmit_ma;
1200	int i, j;
1201	netif_rx_response_t *rxresp;
1202	RING_IDX req_prod, resp_prod;
1203	int do_event = 0;
1204	gnttab_copy_t *gop;
1205	int id, offset;
1206
1207	XENPRINTF(("xennetback_ifsoftstart_transfer "));
1208	int s = splnet();
1209	if (__predict_false(
1210	    (ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)) {
1211		splx(s);
1212		return;
1213	}
1214
1215	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
1216		XENPRINTF(("pkt\n"));
1217		req_prod = xneti->xni_rxring.sring->req_prod;
1218		resp_prod = xneti->xni_rxring.rsp_prod_pvt;
1219		xen_rmb();
1220
1221		gop = xstart_gop_copy;
1222		for (i = 0; !IFQ_IS_EMPTY(&ifp->if_snd);) {
1223			XENPRINTF(("have a packet\n"));
1224			IFQ_POLL(&ifp->if_snd, m);
1225			if (__predict_false(m == NULL))
1226				panic("xennetback_ifstart: IFQ_POLL");
1227			if (__predict_false(
1228			    req_prod == xneti->xni_rxring.req_cons ||
1229			    xneti->xni_rxring.req_cons - resp_prod ==
1230			    NET_RX_RING_SIZE)) {
1231				/* out of ring space */
1232				XENPRINTF(("xennetback_ifstart: ring full "
1233				    "req_prod 0x%x req_cons 0x%x resp_prod "
1234				    "0x%x\n",
1235				    req_prod, xneti->xni_rxring.req_cons,
1236				    resp_prod));
1237				ifp->if_timer = 1;
1238				break;
1239			}
1240			if (__predict_false(i == NB_XMIT_PAGES_BATCH))
1241				break; /* we filled the array */
1242			switch (m->m_flags & (M_EXT|M_EXT_CLUSTER)) {
1243			case M_EXT|M_EXT_CLUSTER:
1244				KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID);
1245				xmit_pa = m->m_ext.ext_paddr;
1246				offset = m->m_data - m->m_ext.ext_buf;
1247				break;
1248			case 0:
1249				KASSERT(m->m_paddr != M_PADDR_INVALID);
1250				xmit_pa = m->m_paddr;
1251				offset = M_BUFOFFSET(m) +
1252				    (m->m_data - M_BUFADDR(m));
1253				break;
1254			default:
1255				if (__predict_false(
1256				    !pmap_extract(pmap_kernel(),
1257				    (vaddr_t)m->m_data, &xmit_pa))) {
1258					panic("xennet_start: no pa");
1259				}
1260				offset = 0;
1261				break;
1262			}
1263			offset += (xmit_pa & ~PG_FRAME);
1264			xmit_pa = (xmit_pa & PG_FRAME);
1265			if (m->m_pkthdr.len != m->m_len ||
1266			    (offset + m->m_pkthdr.len) > PAGE_SIZE) {
1267				MGETHDR(new_m, M_DONTWAIT, MT_DATA);
1268				if (__predict_false(new_m == NULL)) {
1269					printf("%s: cannot allocate new mbuf\n",
1270					    ifp->if_xname);
1271					break;
1272				}
1273				if (m->m_pkthdr.len > MHLEN) {
1274					MCLGET(new_m, M_DONTWAIT);
1275					if (__predict_false(
1276					    (new_m->m_flags & M_EXT) == 0)) {
1277						XENPRINTF((
1278						    "%s: no mbuf cluster\n",
1279						    ifp->if_xname));
1280						m_freem(new_m);
1281						break;
1282					}
1283					xmit_pa = new_m->m_ext.ext_paddr;
1284					offset = new_m->m_data -
1285					    new_m->m_ext.ext_buf;
1286				} else {
1287					xmit_pa = new_m->m_paddr;
1288					offset = M_BUFOFFSET(new_m) +
1289					    (new_m->m_data - M_BUFADDR(new_m));
1290				}
1291				offset += (xmit_pa & ~PG_FRAME);
1292				xmit_pa = (xmit_pa & PG_FRAME);
1293				m_copydata(m, 0, m->m_pkthdr.len,
1294				    mtod(new_m, void *));
1295				new_m->m_len = new_m->m_pkthdr.len =
1296				    m->m_pkthdr.len;
1297				IFQ_DEQUEUE(&ifp->if_snd, m);
1298				m_freem(m);
1299				m = new_m;
1300			} else {
1301				IFQ_DEQUEUE(&ifp->if_snd, m);
1302			}
1303
1304			KASSERT(xmit_pa != POOL_PADDR_INVALID);
1305			KASSERT((offset + m->m_pkthdr.len) <= PAGE_SIZE);
1306			xmit_ma = xpmap_ptom(xmit_pa);
1307			/* start filling ring */
1308			gop->flags = GNTCOPY_dest_gref;
1309			gop->source.offset = offset;
1310			gop->source.domid = DOMID_SELF;
1311			gop->source.u.gmfn = xmit_ma >> PAGE_SHIFT;
1312
1313			gop->dest.u.ref = RING_GET_REQUEST(&xneti->xni_rxring,
1314			    xneti->xni_rxring.req_cons)->gref;
1315			gop->dest.offset = 0;
1316			gop->dest.domid = xneti->xni_domid;
1317
1318			gop->len = m->m_pkthdr.len;
1319			gop++;
1320
1321			id = RING_GET_REQUEST(&xneti->xni_rxring,
1322			    xneti->xni_rxring.req_cons)->id;
1323			xen_rmb();
1324			xneti->xni_rxring.req_cons++;
1325			rxresp = RING_GET_RESPONSE(&xneti->xni_rxring,
1326			    resp_prod);
1327			rxresp->id = id;
1328			rxresp->offset = 0;
1329			rxresp->status = m->m_pkthdr.len;
1330			if ((m->m_pkthdr.csum_flags &
1331			    (M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0) {
1332				rxresp->flags = NETRXF_csum_blank;
1333			} else {
1334				rxresp->flags = 0;
1335			}
1336
1337			mbufs_sent[i] = m;
1338			resp_prod++;
1339			i++; /* this packet has been queued */
1340			ifp->if_opackets++;
1341			bpf_mtap(ifp, m);
1342		}
1343		if (i != 0) {
1344			if (HYPERVISOR_grant_table_op(GNTTABOP_copy,
1345			    xstart_gop_copy, i) != 0) {
1346				panic("%s: GNTTABOP_copy failed",
1347				    ifp->if_xname);
1348			}
1349
1350			for (j = 0; j < i; j++) {
1351				if (xstart_gop_copy[j].status != GNTST_okay) {
1352					printf("%s GNTTABOP_copy[%d] %d\n",
1353					    ifp->if_xname,
1354					    j, xstart_gop_copy[j].status);
1355					printf("%s: req_prod %u req_cons "
1356					    "%u rsp_prod %u rsp_prod_pvt %u "
1357					    "i %d\n",
1358					    ifp->if_xname,
1359					    xneti->xni_rxring.sring->req_prod,
1360					    xneti->xni_rxring.req_cons,
1361					    xneti->xni_rxring.sring->rsp_prod,
1362					    xneti->xni_rxring.rsp_prod_pvt,
1363					    i);
1364					rxresp = RING_GET_RESPONSE(
1365					    &xneti->xni_rxring,
1366					    xneti->xni_rxring.rsp_prod_pvt + j);
1367					rxresp->status = NETIF_RSP_ERROR;
1368				}
1369			}
1370
1371			/* update pointer */
1372			KASSERT(
1373			    xneti->xni_rxring.rsp_prod_pvt + i == resp_prod);
1374			xneti->xni_rxring.rsp_prod_pvt = resp_prod;
1375			RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(
1376			    &xneti->xni_rxring, j);
1377			if (j)
1378				do_event = 1;
1379			/* now we can free the mbufs */
1380			for (j = 0; j < i; j++) {
1381				m_freem(mbufs_sent[j]);
1382			}
1383		}
1384		/* send event */
1385		if (do_event) {
1386			xen_rmb();
1387			XENPRINTF(("%s receive event\n",
1388			    xneti->xni_if.if_xname));
1389			hypervisor_notify_via_evtchn(xneti->xni_evtchn);
1390			do_event = 0;
1391		}
1392		/*
1393		 * note that we don't use RING_FINAL_CHECK_FOR_REQUESTS()
1394		 * here, as the frontend doesn't notify when adding
1395		 * requests anyway
1396		 */
1397		if (__predict_false(
1398		    !RING_HAS_UNCONSUMED_REQUESTS(&xneti->xni_rxring))) {
1399			/* ring full */
1400			break;
1401		}
1402	}
1403	splx(s);
1404}
1405
1406
1407static void
1408xennetback_ifwatchdog(struct ifnet * ifp)
1409{
1410	/*
1411	 * We can get to the following condition:
1412	 * transmit stalls because the ring is full when the ifq is full too.
1413	 * In this case (as, unfortunably, we don't get an interrupt from xen
1414	 * on transmit) noting will ever call xennetback_ifstart() again.
1415	 * Here we abuse the watchdog to get out of this condition.
1416	 */
1417	XENPRINTF(("xennetback_ifwatchdog\n"));
1418	xennetback_ifstart(ifp);
1419}
1420
1421
1422static int
1423xennetback_ifinit(struct ifnet *ifp)
1424{
1425	struct xnetback_instance *xneti = ifp->if_softc;
1426	int s = splnet();
1427
1428	if ((ifp->if_flags & IFF_UP) == 0) {
1429		splx(s);
1430		return 0;
1431	}
1432	if (xneti->xni_status == CONNECTED)
1433		ifp->if_flags |= IFF_RUNNING;
1434	splx(s);
1435	return 0;
1436}
1437
1438static void
1439xennetback_ifstop(struct ifnet *ifp, int disable)
1440{
1441	struct xnetback_instance *xneti = ifp->if_softc;
1442	int s = splnet();
1443
1444	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1445	ifp->if_timer = 0;
1446	if (xneti->xni_status == CONNECTED) {
1447		XENPRINTF(("%s: req_prod 0x%x resp_prod 0x%x req_cons 0x%x "
1448		    "event 0x%x\n", ifp->if_xname, xneti->xni_txring->req_prod,
1449		    xneti->xni_txring->resp_prod, xneti->xni_txring->req_cons,
1450		    xneti->xni_txring->event));
1451		xennetback_evthandler(ifp->if_softc); /* flush pending RX requests */
1452	}
1453	splx(s);
1454}
1455