netfront.c revision 181945
1/*
2 *
3 * Copyright (c) 2004-2006 Kip Macy
4 * All rights reserved.
5 *
6 *
7 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
8 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
9 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
10 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
11 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
12 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
13 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
14 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
15 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
16 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
17 */
18
19
20#include <sys/cdefs.h>
21__FBSDID("$FreeBSD: head/sys/dev/xen/netfront/netfront.c 181945 2008-08-21 02:40:26Z kmacy $");
22
23#include <sys/param.h>
24#include <sys/systm.h>
25#include <sys/sockio.h>
26#include <sys/mbuf.h>
27#include <sys/malloc.h>
28#include <sys/kernel.h>
29#include <sys/socket.h>
30#include <sys/queue.h>
31#include <sys/sx.h>
32
33#include <net/if.h>
34#include <net/if_arp.h>
35#include <net/ethernet.h>
36#include <net/if_dl.h>
37#include <net/if_media.h>
38
39#include <net/bpf.h>
40
41#include <net/if_types.h>
42#include <net/if.h>
43
44#include <netinet/in_systm.h>
45#include <netinet/in.h>
46#include <netinet/ip.h>
47#include <netinet/if_ether.h>
48
49#include <vm/vm.h>
50#include <vm/pmap.h>
51
52#include <machine/clock.h>      /* for DELAY */
53#include <machine/bus.h>
54#include <machine/resource.h>
55#include <machine/frame.h>
56#include <machine/vmparam.h>
57
58#include <sys/bus.h>
59#include <sys/rman.h>
60
61#include <machine/intr_machdep.h>
62
63#include <machine/xen/xen-os.h>
64#include <machine/xen/hypervisor.h>
65#include <machine/xen/xen_intr.h>
66#include <machine/xen/evtchn.h>
67#include <machine/xen/xenbus.h>
68#include <xen/gnttab.h>
69#include <xen/interface/memory.h>
70#include <dev/xen/netfront/mbufq.h>
71#include <machine/xen/features.h>
72#include <xen/interface/io/netif.h>
73
74
75#define GRANT_INVALID_REF	0
76
77#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
78#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
79
80#ifdef CONFIG_XEN
81static int MODPARM_rx_copy = 0;
82module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
83MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
84static int MODPARM_rx_flip = 0;
85module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
86MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
87#else
88static const int MODPARM_rx_copy = 1;
89static const int MODPARM_rx_flip = 0;
90#endif
91
92#define RX_COPY_THRESHOLD 256
93
94#define net_ratelimit() 0
95
96struct netfront_info;
97struct netfront_rx_info;
98
99static void xn_txeof(struct netfront_info *);
100static void xn_rxeof(struct netfront_info *);
101static void network_alloc_rx_buffers(struct netfront_info *);
102
103static void xn_tick_locked(struct netfront_info *);
104static void xn_tick(void *);
105
106static void xn_intr(void *);
107static void xn_start_locked(struct ifnet *);
108static void xn_start(struct ifnet *);
109static int  xn_ioctl(struct ifnet *, u_long, caddr_t);
110static void xn_ifinit_locked(struct netfront_info *);
111static void xn_ifinit(void *);
112static void xn_stop(struct netfront_info *);
113#ifdef notyet
114static void xn_watchdog(struct ifnet *);
115#endif
116
117static void show_device(struct netfront_info *sc);
118#ifdef notyet
119static void netfront_closing(struct xenbus_device *dev);
120#endif
121static void netif_free(struct netfront_info *info);
122static int netfront_remove(struct xenbus_device *dev);
123
124static int talk_to_backend(struct xenbus_device *dev, struct netfront_info *info);
125static int create_netdev(struct xenbus_device *dev, struct ifnet **ifp);
126static void netif_disconnect_backend(struct netfront_info *info);
127static int setup_device(struct xenbus_device *dev, struct netfront_info *info);
128static void end_access(int ref, void *page);
129
130/* Xenolinux helper functions */
131static int network_connect(struct ifnet *ifp);
132
133static void xn_free_rx_ring(struct netfront_info *);
134
135static void xn_free_tx_ring(struct netfront_info *);
136
137static int xennet_get_responses(struct netfront_info *np,
138	struct netfront_rx_info *rinfo, RING_IDX rp, struct mbuf **list,
139	int *pages_flipped_p);
140
141#define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
142
143#define INVALID_P2M_ENTRY (~0UL)
144
145/*
146 * Mbuf pointers. We need these to keep track of the virtual addresses
147 * of our mbuf chains since we can only convert from virtual to physical,
148 * not the other way around.  The size must track the free index arrays.
149 */
150struct xn_chain_data {
151		struct mbuf		*xn_tx_chain[NET_TX_RING_SIZE+1];
152		struct mbuf		*xn_rx_chain[NET_RX_RING_SIZE+1];
153};
154
155
156struct net_device_stats
157{
158	u_long	rx_packets;		/* total packets received	*/
159	u_long	tx_packets;		/* total packets transmitted	*/
160	u_long	rx_bytes;		/* total bytes received 	*/
161	u_long	tx_bytes;		/* total bytes transmitted	*/
162	u_long	rx_errors;		/* bad packets received		*/
163	u_long	tx_errors;		/* packet transmit problems	*/
164	u_long	rx_dropped;		/* no space in linux buffers	*/
165	u_long	tx_dropped;		/* no space available in linux	*/
166	u_long	multicast;		/* multicast packets received	*/
167	u_long	collisions;
168
169	/* detailed rx_errors: */
170	u_long	rx_length_errors;
171	u_long	rx_over_errors;		/* receiver ring buff overflow	*/
172	u_long	rx_crc_errors;		/* recved pkt with crc error	*/
173	u_long	rx_frame_errors;	/* recv'd frame alignment error */
174	u_long	rx_fifo_errors;		/* recv'r fifo overrun		*/
175	u_long	rx_missed_errors;	/* receiver missed packet	*/
176
177	/* detailed tx_errors */
178	u_long	tx_aborted_errors;
179	u_long	tx_carrier_errors;
180	u_long	tx_fifo_errors;
181	u_long	tx_heartbeat_errors;
182	u_long	tx_window_errors;
183
184	/* for cslip etc */
185	u_long	rx_compressed;
186	u_long	tx_compressed;
187};
188
189struct netfront_info {
190
191	struct ifnet *xn_ifp;
192
193	struct net_device_stats stats;
194	u_int tx_full;
195
196	netif_tx_front_ring_t tx;
197	netif_rx_front_ring_t rx;
198
199	struct mtx   tx_lock;
200	struct mtx   rx_lock;
201	struct sx    sc_lock;
202
203	u_int handle;
204	u_int irq;
205	u_int copying_receiver;
206	u_int carrier;
207
208	/* Receive-ring batched refills. */
209#define RX_MIN_TARGET 32
210#define RX_MAX_TARGET NET_RX_RING_SIZE
211	int rx_min_target, rx_max_target, rx_target;
212
213	/*
214	 * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
215	 * array is an index into a chain of free entries.
216	 */
217
218	grant_ref_t gref_tx_head;
219	grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
220	grant_ref_t gref_rx_head;
221	grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1];
222
223#define TX_MAX_TARGET min(NET_RX_RING_SIZE, 256)
224	struct xenbus_device *xbdev;
225	int tx_ring_ref;
226	int rx_ring_ref;
227	uint8_t mac[ETHER_ADDR_LEN];
228	struct xn_chain_data	xn_cdata;	/* mbufs */
229	struct mbuf_head xn_rx_batch;	/* head of the batch queue */
230
231	int			xn_if_flags;
232	struct callout	        xn_stat_ch;
233
234	u_long rx_pfn_array[NET_RX_RING_SIZE];
235	multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
236	mmu_update_t rx_mmu[NET_RX_RING_SIZE];
237};
238
239#define rx_mbufs xn_cdata.xn_rx_chain
240#define tx_mbufs xn_cdata.xn_tx_chain
241
242#define XN_LOCK_INIT(_sc, _name) \
243        mtx_init(&(_sc)->tx_lock, #_name"_tx", "network transmit lock", MTX_DEF); \
244        mtx_init(&(_sc)->rx_lock, #_name"_rx", "network receive lock", MTX_DEF);  \
245        sx_init(&(_sc)->sc_lock, #_name"_rx")
246
247#define XN_RX_LOCK(_sc)           mtx_lock(&(_sc)->rx_lock)
248#define XN_RX_UNLOCK(_sc)         mtx_unlock(&(_sc)->rx_lock)
249
250#define XN_TX_LOCK(_sc)           mtx_lock(&(_sc)->tx_lock)
251#define XN_TX_UNLOCK(_sc)         mtx_unlock(&(_sc)->tx_lock)
252
253#define XN_LOCK(_sc)           sx_xlock(&(_sc)->sc_lock);
254#define XN_UNLOCK(_sc)         sx_xunlock(&(_sc)->sc_lock);
255
256#define XN_LOCK_ASSERT(_sc)    sx_assert(&(_sc)->sc_lock, SX_LOCKED);
257#define XN_RX_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->rx_lock, MA_OWNED);
258#define XN_TX_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->tx_lock, MA_OWNED);
259#define XN_LOCK_DESTROY(_sc)   mtx_destroy(&(_sc)->rx_lock); \
260                               mtx_destroy(&(_sc)->tx_lock); \
261                               sx_destroy(&(_sc)->sc_lock);
262
263struct netfront_rx_info {
264	struct netif_rx_response rx;
265	struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
266};
267
268#define netfront_carrier_on(netif)	((netif)->carrier = 1)
269#define netfront_carrier_off(netif)	((netif)->carrier = 0)
270#define netfront_carrier_ok(netif)	((netif)->carrier)
271
272/* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */
273
274
275
276/*
277 * Access macros for acquiring freeing slots in tx_skbs[].
278 */
279
280static inline void
281add_id_to_freelist(struct mbuf **list, unsigned short id)
282{
283	list[id] = list[0];
284	list[0]  = (void *)(u_long)id;
285}
286
287static inline unsigned short
288get_id_from_freelist(struct mbuf **list)
289{
290	u_int id = (u_int)(u_long)list[0];
291	list[0] = list[id];
292	return (id);
293}
294
295static inline int
296xennet_rxidx(RING_IDX idx)
297{
298	return idx & (NET_RX_RING_SIZE - 1);
299}
300
301static inline struct mbuf *
302xennet_get_rx_mbuf(struct netfront_info *np,
303						RING_IDX ri)
304{
305	int i = xennet_rxidx(ri);
306	struct mbuf *m;
307
308	m = np->rx_mbufs[i];
309	np->rx_mbufs[i] = NULL;
310	return (m);
311}
312
313static inline grant_ref_t
314xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri)
315{
316	int i = xennet_rxidx(ri);
317	grant_ref_t ref = np->grant_rx_ref[i];
318	np->grant_rx_ref[i] = GRANT_INVALID_REF;
319	return ref;
320}
321
322#ifdef DEBUG
323
324#endif
325#define IPRINTK(fmt, args...) \
326    printf("[XEN] " fmt, ##args)
327#define WPRINTK(fmt, args...) \
328    printf("[XEN] " fmt, ##args)
329#define DPRINTK(fmt, args...) \
330    printf("[XEN] " fmt, ##args)
331
332
333static __inline struct mbuf*
334makembuf (struct mbuf *buf)
335{
336	struct mbuf *m = NULL;
337
338        MGETHDR (m, M_DONTWAIT, MT_DATA);
339
340        if (! m)
341		return 0;
342
343		M_MOVE_PKTHDR(m, buf);
344
345		m_cljget(m, M_DONTWAIT, MJUMPAGESIZE);
346        m->m_pkthdr.len = buf->m_pkthdr.len;
347        m->m_len = buf->m_len;
348		m_copydata(buf, 0, buf->m_pkthdr.len, mtod(m,caddr_t) );
349
350		m->m_ext.ext_arg1 = (caddr_t *)(uintptr_t)(vtophys(mtod(m,caddr_t)) >> PAGE_SHIFT);
351
352       	return m;
353}
354
355/**
356 * Read the 'mac' node at the given device's node in the store, and parse that
357 * as colon-separated octets, placing result the given mac array.  mac must be
358 * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
359 * Return 0 on success, or errno on error.
360 */
361static int
362xen_net_read_mac(struct xenbus_device *dev, uint8_t mac[])
363{
364	char *s;
365	int i;
366	char *e;
367	char *macstr = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
368	if (IS_ERR(macstr)) {
369		return PTR_ERR(macstr);
370	}
371	s = macstr;
372	for (i = 0; i < ETHER_ADDR_LEN; i++) {
373		mac[i] = strtoul(s, &e, 16);
374		if (s == e || (e[0] != ':' && e[0] != 0)) {
375			free(macstr, M_DEVBUF);
376			return ENOENT;
377		}
378		s = &e[1];
379	}
380	free(macstr, M_DEVBUF);
381	return 0;
382}
383
384/**
385 * Entry point to this code when a new device is created.  Allocate the basic
386 * structures and the ring buffers for communication with the backend, and
387 * inform the backend of the appropriate details for those.  Switch to
388 * Connected state.
389 */
390static int
391netfront_probe(struct xenbus_device *dev, const struct xenbus_device_id *id)
392{
393	int err;
394	struct ifnet *ifp;
395	struct netfront_info *info;
396
397	printf("netfront_probe() \n");
398
399	err = create_netdev(dev, &ifp);
400	if (err) {
401		xenbus_dev_fatal(dev, err, "creating netdev");
402		return err;
403	}
404
405	info = ifp->if_softc;
406	dev->dev_driver_data = info;
407
408	return 0;
409}
410
411
412/**
413 * We are reconnecting to the backend, due to a suspend/resume, or a backend
414 * driver restart.  We tear down our netif structure and recreate it, but
415 * leave the device-layer structures intact so that this is transparent to the
416 * rest of the kernel.
417 */
418static int
419netfront_resume(struct xenbus_device *dev)
420{
421	struct netfront_info *info = dev->dev_driver_data;
422
423	DPRINTK("%s\n", dev->nodename);
424
425	netif_disconnect_backend(info);
426	return (0);
427}
428
429
430/* Common code used when first setting up, and when resuming. */
431static int
432talk_to_backend(struct xenbus_device *dev, struct netfront_info *info)
433{
434	const char *message;
435	struct xenbus_transaction xbt;
436	int err;
437
438	err = xen_net_read_mac(dev, info->mac);
439	if (err) {
440		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
441		goto out;
442	}
443
444	/* Create shared ring, alloc event channel. */
445	err = setup_device(dev, info);
446	if (err)
447		goto out;
448
449 again:
450	err = xenbus_transaction_start(&xbt);
451	if (err) {
452		xenbus_dev_fatal(dev, err, "starting transaction");
453		goto destroy_ring;
454	}
455	err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
456			    info->tx_ring_ref);
457	if (err) {
458		message = "writing tx ring-ref";
459		goto abort_transaction;
460	}
461	err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
462			    info->rx_ring_ref);
463	if (err) {
464		message = "writing rx ring-ref";
465		goto abort_transaction;
466	}
467	err = xenbus_printf(xbt, dev->nodename,
468		"event-channel", "%u", irq_to_evtchn_port(info->irq));
469	if (err) {
470		message = "writing event-channel";
471		goto abort_transaction;
472	}
473	err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
474			    info->copying_receiver);
475	if (err) {
476		message = "writing request-rx-copy";
477		goto abort_transaction;
478	}
479	err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
480	if (err) {
481		message = "writing feature-rx-notify";
482		goto abort_transaction;
483	}
484	err = xenbus_printf(xbt, dev->nodename, "feature-no-csum-offload", "%d", 1);
485	if (err) {
486		message = "writing feature-no-csum-offload";
487		goto abort_transaction;
488	}
489	err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
490	if (err) {
491		message = "writing feature-sg";
492		goto abort_transaction;
493	}
494#ifdef HAVE_TSO
495	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
496	if (err) {
497		message = "writing feature-gso-tcpv4";
498		goto abort_transaction;
499	}
500#endif
501
502	err = xenbus_transaction_end(xbt, 0);
503	if (err) {
504		if (err == EAGAIN)
505			goto again;
506		xenbus_dev_fatal(dev, err, "completing transaction");
507		goto destroy_ring;
508	}
509
510	return 0;
511
512 abort_transaction:
513	xenbus_transaction_end(xbt, 1);
514	xenbus_dev_fatal(dev, err, "%s", message);
515 destroy_ring:
516	netif_free(info);
517 out:
518	return err;
519}
520
521
522static int
523setup_device(struct xenbus_device *dev, struct netfront_info *info)
524{
525	netif_tx_sring_t *txs;
526	netif_rx_sring_t *rxs;
527	int err;
528	struct ifnet *ifp;
529
530	ifp = info->xn_ifp;
531
532	info->tx_ring_ref = GRANT_INVALID_REF;
533	info->rx_ring_ref = GRANT_INVALID_REF;
534	info->rx.sring = NULL;
535	info->tx.sring = NULL;
536	info->irq = 0;
537
538	txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
539	if (!txs) {
540		err = ENOMEM;
541		xenbus_dev_fatal(dev, err, "allocating tx ring page");
542		goto fail;
543	}
544	SHARED_RING_INIT(txs);
545	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
546	err = xenbus_grant_ring(dev, virt_to_mfn(txs));
547	if (err < 0)
548		goto fail;
549	info->tx_ring_ref = err;
550
551	rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
552	if (!rxs) {
553		err = ENOMEM;
554		xenbus_dev_fatal(dev, err, "allocating rx ring page");
555		goto fail;
556	}
557	SHARED_RING_INIT(rxs);
558	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
559
560	err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
561	if (err < 0)
562		goto fail;
563	info->rx_ring_ref = err;
564
565#if 0
566	network_connect(ifp);
567#endif
568	err = bind_listening_port_to_irqhandler(dev->otherend_id,
569		"xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, NULL);
570
571	if (err <= 0) {
572		xenbus_dev_fatal(dev, err,
573				 "bind_evtchn_to_irqhandler failed");
574		goto fail;
575	}
576	info->irq = err;
577
578	show_device(info);
579
580	return 0;
581
582 fail:
583	netif_free(info);
584	return err;
585}
586
587/**
588 * Callback received when the backend's state changes.
589 */
590static void
591backend_changed(struct xenbus_device *dev,
592			    XenbusState backend_state)
593{
594		struct netfront_info *sc = dev->dev_driver_data;
595
596	DPRINTK("\n");
597
598	switch (backend_state) {
599	case XenbusStateInitialising:
600	case XenbusStateInitialised:
601	case XenbusStateConnected:
602	case XenbusStateUnknown:
603	case XenbusStateClosed:
604			break;
605	case XenbusStateInitWait:
606		if (dev->state != XenbusStateInitialising)
607			break;
608		if (network_connect(sc->xn_ifp) != 0)
609			break;
610		xenbus_switch_state(dev, XenbusStateConnected);
611#ifdef notyet
612		(void)send_fake_arp(netdev);
613#endif
614		break;	break;
615	case XenbusStateClosing:
616			xenbus_frontend_closed(dev);
617		break;
618	}
619}
620
621static void
622xn_free_rx_ring(struct netfront_info *sc)
623{
624#if 0
625	int i;
626
627	for (i = 0; i < NET_RX_RING_SIZE; i++) {
628		if (sc->xn_cdata.xn_rx_chain[i] != NULL) {
629			m_freem(sc->xn_cdata.xn_rx_chain[i]);
630			sc->xn_cdata.xn_rx_chain[i] = NULL;
631		}
632	}
633
634	sc->rx.rsp_cons = 0;
635	sc->xn_rx_if->req_prod = 0;
636	sc->xn_rx_if->event = sc->rx.rsp_cons ;
637#endif
638}
639
640static void
641xn_free_tx_ring(struct netfront_info *sc)
642{
643#if 0
644	int i;
645
646	for (i = 0; i < NET_TX_RING_SIZE; i++) {
647		if (sc->xn_cdata.xn_tx_chain[i] != NULL) {
648			m_freem(sc->xn_cdata.xn_tx_chain[i]);
649			sc->xn_cdata.xn_tx_chain[i] = NULL;
650		}
651	}
652
653	return;
654#endif
655}
656
657static inline int
658netfront_tx_slot_available(struct netfront_info *np)
659{
660	return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
661		(TX_MAX_TARGET - /* MAX_SKB_FRAGS */ 24 - 2));
662}
663static void
664netif_release_tx_bufs(struct netfront_info *np)
665{
666	struct mbuf *m;
667	int i;
668
669	for (i = 1; i <= NET_TX_RING_SIZE; i++) {
670		m = np->xn_cdata.xn_tx_chain[i];
671
672		if (((u_long)m) < KERNBASE)
673			continue;
674		gnttab_grant_foreign_access_ref(np->grant_tx_ref[i],
675		    np->xbdev->otherend_id, virt_to_mfn(mtod(m, vm_offset_t)),
676		    GNTMAP_readonly);
677		gnttab_release_grant_reference(&np->gref_tx_head,
678		    np->grant_tx_ref[i]);
679		np->grant_tx_ref[i] = GRANT_INVALID_REF;
680		add_id_to_freelist(np->tx_mbufs, i);
681		m_freem(m);
682	}
683}
684
685static void
686network_alloc_rx_buffers(struct netfront_info *sc)
687{
688	unsigned short id;
689	struct mbuf *m_new;
690	int i, batch_target, notify;
691	RING_IDX req_prod;
692	struct xen_memory_reservation reservation;
693	grant_ref_t ref;
694	int nr_flips;
695	netif_rx_request_t *req;
696	vm_offset_t vaddr;
697	u_long pfn;
698
699	req_prod = sc->rx.req_prod_pvt;
700
701	if (unlikely(sc->carrier == 0))
702		return;
703
704	/*
705	 * Allocate skbuffs greedily, even though we batch updates to the
706	 * receive ring. This creates a less bursty demand on the memory
707	 * allocator, so should reduce the chance of failed allocation
708	 * requests both for ourself and for other kernel subsystems.
709	 */
710	batch_target = sc->rx_target - (req_prod - sc->rx.rsp_cons);
711	for (i = mbufq_len(&sc->xn_rx_batch); i < batch_target; i++) {
712		MGETHDR(m_new, M_DONTWAIT, MT_DATA);
713		if (m_new == NULL)
714			goto no_mbuf;
715
716		m_cljget(m_new, M_DONTWAIT, MJUMPAGESIZE);
717		if ((m_new->m_flags & M_EXT) == 0) {
718			m_freem(m_new);
719
720no_mbuf:
721			if (i != 0)
722				goto refill;
723			/*
724			 * XXX set timer
725			 */
726			break;
727		}
728		m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE;
729
730		/* queue the mbufs allocated */
731		mbufq_tail(&sc->xn_rx_batch, m_new);
732	}
733
734	/* Is the batch large enough to be worthwhile? */
735	if (i < (sc->rx_target/2)) {
736		if (req_prod >sc->rx.sring->req_prod)
737			goto push;
738		return;
739	}
740	/* Adjust floating fill target if we risked running out of buffers. */
741	if ( ((req_prod - sc->rx.sring->rsp_prod) < (sc->rx_target / 4)) &&
742	     ((sc->rx_target *= 2) > sc->rx_max_target) )
743		sc->rx_target = sc->rx_max_target;
744
745refill:
746	for (nr_flips = i = 0; ; i++) {
747		if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL)
748			break;
749
750		m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)(
751				vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT);
752
753		id = xennet_rxidx(req_prod + i);
754
755		KASSERT(sc->xn_cdata.xn_rx_chain[id] == NULL,
756		    ("non-NULL xm_rx_chain"));
757		sc->xn_cdata.xn_rx_chain[id] = m_new;
758
759		ref = gnttab_claim_grant_reference(&sc->gref_rx_head);
760		KASSERT((short)ref >= 0, ("negative ref"));
761		sc->grant_rx_ref[id] = ref;
762
763		vaddr = mtod(m_new, vm_offset_t);
764		pfn = vtophys(vaddr) >> PAGE_SHIFT;
765		req = RING_GET_REQUEST(&sc->rx, req_prod + i);
766
767		if (sc->copying_receiver == 0) {
768			gnttab_grant_foreign_transfer_ref(ref,
769			    sc->xbdev->otherend_id, pfn);
770			sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn);
771			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
772				/* Remove this page before passing
773				 * back to Xen.
774				 */
775				set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
776				MULTI_update_va_mapping(&sc->rx_mcl[i],
777				    vaddr, 0, 0);
778			}
779			nr_flips++;
780		} else {
781			gnttab_grant_foreign_access_ref(ref,
782			    sc->xbdev->otherend_id,
783			    PFNTOMFN(pfn), 0);
784		}
785		req->id = id;
786		req->gref = ref;
787
788		sc->rx_pfn_array[i] =
789		    vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT;
790	}
791
792	KASSERT(i, ("no mbufs processed")); /* should have returned earlier */
793	KASSERT(mbufq_len(&sc->xn_rx_batch) == 0, ("not all mbufs processed"));
794	/*
795	 * We may have allocated buffers which have entries outstanding
796	 * in the page * update queue -- make sure we flush those first!
797	 */
798	PT_UPDATES_FLUSH();
799	if (nr_flips != 0) {
800#ifdef notyet
801		/* Tell the ballon driver what is going on. */
802		balloon_update_driver_allowance(i);
803#endif
804		set_xen_guest_handle(reservation.extent_start,sc->rx_pfn_array);
805		reservation.nr_extents   = i;
806		reservation.extent_order = 0;
807		reservation.address_bits = 0;
808		reservation.domid        = DOMID_SELF;
809
810		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
811
812			/* After all PTEs have been zapped, flush the TLB. */
813			sc->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
814			    UVMF_TLB_FLUSH|UVMF_ALL;
815
816			/* Give away a batch of pages. */
817			sc->rx_mcl[i].op = __HYPERVISOR_memory_op;
818			sc->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
819			sc->rx_mcl[i].args[1] =  (u_long)&reservation;
820			/* Zap PTEs and give away pages in one big multicall. */
821			(void)HYPERVISOR_multicall(sc->rx_mcl, i+1);
822
823			/* Check return status of HYPERVISOR_dom_mem_op(). */
824			if (unlikely(sc->rx_mcl[i].result != i))
825				panic("Unable to reduce memory reservation\n");
826			} else {
827				if (HYPERVISOR_memory_op(
828				    XENMEM_decrease_reservation, &reservation)
829				    != i)
830					panic("Unable to reduce memory "
831					    "reservation\n");
832		}
833	} else {
834		wmb();
835	}
836
837	/* Above is a suitable barrier to ensure backend will see requests. */
838	sc->rx.req_prod_pvt = req_prod + i;
839push:
840	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify);
841	if (notify)
842		notify_remote_via_irq(sc->irq);
843}
844
845static void
846xn_rxeof(struct netfront_info *np)
847{
848	struct ifnet *ifp;
849	struct netfront_rx_info rinfo;
850	struct netif_rx_response *rx = &rinfo.rx;
851	struct netif_extra_info *extras = rinfo.extras;
852	RING_IDX i, rp;
853	multicall_entry_t *mcl;
854	struct mbuf *m;
855	struct mbuf_head rxq, errq;
856	int err, pages_flipped = 0;
857
858	XN_RX_LOCK_ASSERT(np);
859	if (!netfront_carrier_ok(np))
860		return;
861
862	mbufq_init(&errq);
863	mbufq_init(&rxq);
864
865	ifp = np->xn_ifp;
866
867	rp = np->rx.sring->rsp_prod;
868	rmb();	/* Ensure we see queued responses up to 'rp'. */
869
870	i = np->rx.rsp_cons;
871	while ((i != rp)) {
872		memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
873		memset(extras, 0, sizeof(rinfo.extras));
874
875		m = NULL;
876		err = xennet_get_responses(np, &rinfo, rp, &m,
877		    &pages_flipped);
878
879		if (unlikely(err)) {
880				if (m)
881						mbufq_tail(&errq, m);
882			np->stats.rx_errors++;
883			i = np->rx.rsp_cons;
884			continue;
885		}
886
887		m->m_pkthdr.rcvif = ifp;
888		if ( rx->flags & NETRXF_data_validated ) {
889			/* Tell the stack the checksums are okay */
890			/*
891			 * XXX this isn't necessarily the case - need to add
892			 * check
893			 */
894
895			m->m_pkthdr.csum_flags |=
896			    (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID
897			    | CSUM_PSEUDO_HDR);
898			m->m_pkthdr.csum_data = 0xffff;
899		}
900
901		np->stats.rx_packets++;
902		np->stats.rx_bytes += m->m_pkthdr.len;
903
904		mbufq_tail(&rxq, m);
905		np->rx.rsp_cons = ++i;
906	}
907
908	if (pages_flipped) {
909		/* Some pages are no longer absent... */
910#ifdef notyet
911		balloon_update_driver_allowance(-pages_flipped);
912#endif
913		/* Do all the remapping work, and M->P updates, in one big
914		 * hypercall.
915		 */
916		if (!!xen_feature(XENFEAT_auto_translated_physmap)) {
917			mcl = np->rx_mcl + pages_flipped;
918			mcl->op = __HYPERVISOR_mmu_update;
919			mcl->args[0] = (u_long)np->rx_mmu;
920			mcl->args[1] = pages_flipped;
921			mcl->args[2] = 0;
922			mcl->args[3] = DOMID_SELF;
923			(void)HYPERVISOR_multicall(np->rx_mcl,
924			    pages_flipped + 1);
925		}
926	}
927
928	while ((m = mbufq_dequeue(&errq)))
929		m_freem(m);
930
931	/*
932	 * Process all the mbufs after the remapping is complete.
933	 * Break the mbuf chain first though.
934	 */
935	while ((m = mbufq_dequeue(&rxq)) != NULL) {
936		ifp->if_ipackets++;
937
938		/*
939		 * Do we really need to drop the rx lock?
940		 */
941		XN_RX_UNLOCK(np);
942		/* Pass it up. */
943		(*ifp->if_input)(ifp, m);
944		XN_RX_LOCK(np);
945	}
946
947	np->rx.rsp_cons = i;
948
949#if 0
950	/* If we get a callback with very few responses, reduce fill target. */
951	/* NB. Note exponential increase, linear decrease. */
952	if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
953	    ((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target))
954		np->rx_target = np->rx_min_target;
955#endif
956
957	network_alloc_rx_buffers(np);
958
959	np->rx.sring->rsp_event = i + 1;
960}
961
962static void
963xn_txeof(struct netfront_info *np)
964{
965	RING_IDX i, prod;
966	unsigned short id;
967	struct ifnet *ifp;
968	struct mbuf *m;
969
970	XN_TX_LOCK_ASSERT(np);
971
972	if (!netfront_carrier_ok(np))
973		return;
974
975	ifp = np->xn_ifp;
976	ifp->if_timer = 0;
977
978	do {
979		prod = np->tx.sring->rsp_prod;
980		rmb(); /* Ensure we see responses up to 'rp'. */
981
982		for (i = np->tx.rsp_cons; i != prod; i++) {
983			id = RING_GET_RESPONSE(&np->tx, i)->id;
984			m = np->xn_cdata.xn_tx_chain[id];
985
986			ifp->if_opackets++;
987			KASSERT(m != NULL, ("mbuf not found in xn_tx_chain"));
988			M_ASSERTVALID(m);
989			if (unlikely(gnttab_query_foreign_access(
990			    np->grant_tx_ref[id]) != 0)) {
991				printf("network_tx_buf_gc: warning "
992				    "-- grant still in use by backend "
993				    "domain.\n");
994				goto out;
995			}
996			gnttab_end_foreign_access_ref(
997				np->grant_tx_ref[id], GNTMAP_readonly);
998			gnttab_release_grant_reference(
999				&np->gref_tx_head, np->grant_tx_ref[id]);
1000			np->grant_tx_ref[id] = GRANT_INVALID_REF;
1001
1002			np->xn_cdata.xn_tx_chain[id] = NULL;
1003			add_id_to_freelist(np->xn_cdata.xn_tx_chain, id);
1004			m_freem(m);
1005		}
1006		np->tx.rsp_cons = prod;
1007
1008		/*
1009		 * Set a new event, then check for race with update of
1010		 * tx_cons. Note that it is essential to schedule a
1011		 * callback, no matter how few buffers are pending. Even if
1012		 * there is space in the transmit ring, higher layers may
1013		 * be blocked because too much data is outstanding: in such
1014		 * cases notification from Xen is likely to be the only kick
1015		 * that we'll get.
1016		 */
1017		np->tx.sring->rsp_event =
1018		    prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
1019
1020		mb();
1021
1022	} while (prod != np->tx.sring->rsp_prod);
1023
1024 out:
1025	if (np->tx_full &&
1026	    ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) {
1027		np->tx_full = 0;
1028#if 0
1029		if (np->user_state == UST_OPEN)
1030			netif_wake_queue(dev);
1031#endif
1032	}
1033
1034}
1035
1036static void
1037xn_intr(void *xsc)
1038{
1039	struct netfront_info *np = xsc;
1040	struct ifnet *ifp = np->xn_ifp;
1041
1042#if 0
1043	if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod &&
1044	    likely(netfront_carrier_ok(np)) &&
1045	    ifp->if_drv_flags & IFF_DRV_RUNNING))
1046		return;
1047#endif
1048	if (np->tx.rsp_cons != np->tx.sring->rsp_prod) {
1049		XN_TX_LOCK(np);
1050		xn_txeof(np);
1051		XN_TX_UNLOCK(np);
1052	}
1053
1054	XN_RX_LOCK(np);
1055	xn_rxeof(np);
1056	XN_RX_UNLOCK(np);
1057
1058	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1059	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1060		xn_start(ifp);
1061}
1062
1063
1064static void
1065xennet_move_rx_slot(struct netfront_info *np, struct mbuf *m,
1066	grant_ref_t ref)
1067{
1068	int new = xennet_rxidx(np->rx.req_prod_pvt);
1069
1070	KASSERT(np->rx_mbufs[new] == NULL, ("rx_mbufs != NULL"));
1071	np->rx_mbufs[new] = m;
1072	np->grant_rx_ref[new] = ref;
1073	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
1074	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
1075	np->rx.req_prod_pvt++;
1076}
1077
1078static int
1079xennet_get_extras(struct netfront_info *np,
1080    struct netif_extra_info *extras, RING_IDX rp)
1081{
1082	struct netif_extra_info *extra;
1083	RING_IDX cons = np->rx.rsp_cons;
1084
1085	int err = 0;
1086
1087	do {
1088		struct mbuf *m;
1089		grant_ref_t ref;
1090
1091		if (unlikely(cons + 1 == rp)) {
1092#if 0
1093			if (net_ratelimit())
1094				WPRINTK("Missing extra info\n");
1095#endif
1096			err = -EINVAL;
1097			break;
1098		}
1099
1100		extra = (struct netif_extra_info *)
1101		RING_GET_RESPONSE(&np->rx, ++cons);
1102
1103		if (unlikely(!extra->type ||
1104			extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1105#if 0
1106			if (net_ratelimit())
1107				WPRINTK("Invalid extra type: %d\n",
1108					extra->type);
1109#endif
1110			err = -EINVAL;
1111		} else {
1112			memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
1113		}
1114
1115		m = xennet_get_rx_mbuf(np, cons);
1116		ref = xennet_get_rx_ref(np, cons);
1117		xennet_move_rx_slot(np, m, ref);
1118	} while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
1119
1120	np->rx.rsp_cons = cons;
1121	return err;
1122}
1123
1124static int
1125xennet_get_responses(struct netfront_info *np,
1126	struct netfront_rx_info *rinfo, RING_IDX rp,
1127	struct mbuf  **list,
1128	int *pages_flipped_p)
1129{
1130	int pages_flipped = *pages_flipped_p;
1131	struct mmu_update *mmu;
1132	struct multicall_entry *mcl;
1133	struct netif_rx_response *rx = &rinfo->rx;
1134	struct netif_extra_info *extras = rinfo->extras;
1135	RING_IDX cons = np->rx.rsp_cons;
1136	struct mbuf *m, *m0, *m_prev;
1137	grant_ref_t ref = xennet_get_rx_ref(np, cons);
1138	int max = 5 /* MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD) */;
1139	int frags = 1;
1140	int err = 0;
1141	u_long ret;
1142
1143	m0 = m = m_prev = xennet_get_rx_mbuf(np, cons);
1144
1145
1146	if (rx->flags & NETRXF_extra_info) {
1147		err = xennet_get_extras(np, extras, rp);
1148		cons = np->rx.rsp_cons;
1149	}
1150
1151
1152	if (m0 != NULL) {
1153			m0->m_pkthdr.len = 0;
1154			m0->m_next = NULL;
1155	}
1156
1157	for (;;) {
1158		u_long mfn;
1159
1160#if 0
1161		printf("rx->status=%hd rx->offset=%hu frags=%u\n",
1162			rx->status, rx->offset, frags);
1163#endif
1164		if (unlikely(rx->status < 0 ||
1165			rx->offset + rx->status > PAGE_SIZE)) {
1166#if 0
1167			if (net_ratelimit())
1168				WPRINTK("rx->offset: %x, size: %u\n",
1169					rx->offset, rx->status);
1170#endif
1171			xennet_move_rx_slot(np, m, ref);
1172			err = -EINVAL;
1173			goto next;
1174		}
1175
1176		/*
1177		 * This definitely indicates a bug, either in this driver or in
1178		 * the backend driver. In future this should flag the bad
1179		 * situation to the system controller to reboot the backed.
1180		 */
1181		if (ref == GRANT_INVALID_REF) {
1182#if 0
1183			if (net_ratelimit())
1184				WPRINTK("Bad rx response id %d.\n", rx->id);
1185#endif
1186			err = -EINVAL;
1187			goto next;
1188		}
1189
1190		if (!np->copying_receiver) {
1191			/* Memory pressure, insufficient buffer
1192			 * headroom, ...
1193			 */
1194			if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
1195				if (net_ratelimit())
1196					WPRINTK("Unfulfilled rx req "
1197						"(id=%d, st=%d).\n",
1198						rx->id, rx->status);
1199				xennet_move_rx_slot(np, m, ref);
1200				err = -ENOMEM;
1201				goto next;
1202			}
1203
1204			if (!xen_feature( XENFEAT_auto_translated_physmap)) {
1205				/* Remap the page. */
1206				void *vaddr = mtod(m, void *);
1207				uint32_t pfn;
1208
1209				mcl = np->rx_mcl + pages_flipped;
1210				mmu = np->rx_mmu + pages_flipped;
1211
1212				MULTI_update_va_mapping(mcl, (u_long)vaddr,
1213				    (((vm_paddr_t)mfn) << PAGE_SHIFT) | PG_RW |
1214				    PG_V | PG_M | PG_A, 0);
1215				pfn = (uint32_t)m->m_ext.ext_arg1;
1216				mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) |
1217				    MMU_MACHPHYS_UPDATE;
1218				mmu->val = pfn;
1219
1220				set_phys_to_machine(pfn, mfn);
1221			}
1222			pages_flipped++;
1223		} else {
1224			ret = gnttab_end_foreign_access_ref(ref, 0);
1225			KASSERT(ret, ("ret != 0"));
1226		}
1227
1228		gnttab_release_grant_reference(&np->gref_rx_head, ref);
1229
1230next:
1231		if (m != NULL) {
1232				m->m_len = rx->status;
1233				m->m_data += rx->offset;
1234				m0->m_pkthdr.len += rx->status;
1235		}
1236
1237		if (!(rx->flags & NETRXF_more_data))
1238			break;
1239
1240		if (cons + frags == rp) {
1241			if (net_ratelimit())
1242				WPRINTK("Need more frags\n");
1243			err = -ENOENT;
1244				break;
1245		}
1246		m_prev = m;
1247
1248		rx = RING_GET_RESPONSE(&np->rx, cons + frags);
1249		m = xennet_get_rx_mbuf(np, cons + frags);
1250
1251		m_prev->m_next = m;
1252		m->m_next = NULL;
1253		ref = xennet_get_rx_ref(np, cons + frags);
1254		frags++;
1255	}
1256	*list = m0;
1257
1258	if (unlikely(frags > max)) {
1259		if (net_ratelimit())
1260			WPRINTK("Too many frags\n");
1261		err = -E2BIG;
1262	}
1263
1264	if (unlikely(err))
1265		np->rx.rsp_cons = cons + frags;
1266
1267	*pages_flipped_p = pages_flipped;
1268
1269	return err;
1270}
1271
1272static void
1273xn_tick_locked(struct netfront_info *sc)
1274{
1275	XN_RX_LOCK_ASSERT(sc);
1276	callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
1277
1278	/* XXX placeholder for printing debug information */
1279
1280}
1281
1282
1283static void
1284xn_tick(void *xsc)
1285{
1286	struct netfront_info *sc;
1287
1288	sc = xsc;
1289	XN_RX_LOCK(sc);
1290	xn_tick_locked(sc);
1291	XN_RX_UNLOCK(sc);
1292
1293}
1294static void
1295xn_start_locked(struct ifnet *ifp)
1296{
1297	unsigned short id;
1298	struct mbuf *m_head, *new_m;
1299	struct netfront_info *sc;
1300	netif_tx_request_t *tx;
1301	RING_IDX i;
1302	grant_ref_t ref;
1303	u_long mfn, tx_bytes;
1304	int notify;
1305
1306	sc = ifp->if_softc;
1307	tx_bytes = 0;
1308
1309	if (!netfront_carrier_ok(sc))
1310		return;
1311
1312	for (i = sc->tx.req_prod_pvt; TRUE; i++) {
1313		IF_DEQUEUE(&ifp->if_snd, m_head);
1314		if (m_head == NULL)
1315			break;
1316
1317		if (!netfront_tx_slot_available(sc)) {
1318			IF_PREPEND(&ifp->if_snd, m_head);
1319			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1320			break;
1321		}
1322
1323		id = get_id_from_freelist(sc->xn_cdata.xn_tx_chain);
1324
1325		/*
1326		 * Start packing the mbufs in this chain into
1327		 * the fragment pointers. Stop when we run out
1328		 * of fragments or hit the end of the mbuf chain.
1329		 */
1330		new_m = makembuf(m_head);
1331		tx = RING_GET_REQUEST(&sc->tx, i);
1332		tx->id = id;
1333		ref = gnttab_claim_grant_reference(&sc->gref_tx_head);
1334		KASSERT((short)ref >= 0, ("Negative ref"));
1335		mfn = virt_to_mfn(mtod(new_m, vm_offset_t));
1336		gnttab_grant_foreign_access_ref(ref, sc->xbdev->otherend_id,
1337		    mfn, GNTMAP_readonly);
1338		tx->gref = sc->grant_tx_ref[id] = ref;
1339		tx->size = new_m->m_pkthdr.len;
1340#if 0
1341		tx->flags = (skb->ip_summed == CHECKSUM_HW) ? NETTXF_csum_blank : 0;
1342#endif
1343		tx->flags = 0;
1344		new_m->m_next = NULL;
1345		new_m->m_nextpkt = NULL;
1346
1347		m_freem(m_head);
1348
1349		sc->xn_cdata.xn_tx_chain[id] = new_m;
1350		BPF_MTAP(ifp, new_m);
1351
1352		sc->stats.tx_bytes += new_m->m_pkthdr.len;
1353		sc->stats.tx_packets++;
1354	}
1355
1356	sc->tx.req_prod_pvt = i;
1357	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify);
1358	if (notify)
1359		notify_remote_via_irq(sc->irq);
1360
1361	xn_txeof(sc);
1362
1363	if (RING_FULL(&sc->tx)) {
1364		sc->tx_full = 1;
1365#if 0
1366		netif_stop_queue(dev);
1367#endif
1368	}
1369
1370	return;
1371}
1372
1373static void
1374xn_start(struct ifnet *ifp)
1375{
1376	struct netfront_info *sc;
1377	sc = ifp->if_softc;
1378	XN_TX_LOCK(sc);
1379	xn_start_locked(ifp);
1380	XN_TX_UNLOCK(sc);
1381}
1382
1383/* equivalent of network_open() in Linux */
1384static void
1385xn_ifinit_locked(struct netfront_info *sc)
1386{
1387	struct ifnet *ifp;
1388
1389	XN_LOCK_ASSERT(sc);
1390
1391	ifp = sc->xn_ifp;
1392
1393	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1394		return;
1395
1396	xn_stop(sc);
1397
1398	network_alloc_rx_buffers(sc);
1399	sc->rx.sring->rsp_event = sc->rx.rsp_cons + 1;
1400
1401	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1402	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1403
1404	callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
1405
1406}
1407
1408
1409static void
1410xn_ifinit(void *xsc)
1411{
1412	struct netfront_info *sc = xsc;
1413
1414	XN_LOCK(sc);
1415	xn_ifinit_locked(sc);
1416	XN_UNLOCK(sc);
1417
1418}
1419
1420
1421static int
1422xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1423{
1424	struct netfront_info *sc = ifp->if_softc;
1425	struct ifreq *ifr = (struct ifreq *) data;
1426	struct ifaddr *ifa = (struct ifaddr *)data;
1427
1428	int mask, error = 0;
1429	switch(cmd) {
1430	case SIOCSIFADDR:
1431	case SIOCGIFADDR:
1432		XN_LOCK(sc);
1433		if (ifa->ifa_addr->sa_family == AF_INET) {
1434			ifp->if_flags |= IFF_UP;
1435			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1436				xn_ifinit_locked(sc);
1437			arp_ifinit(ifp, ifa);
1438		} else
1439			error = ether_ioctl(ifp, cmd, data);
1440		XN_UNLOCK(sc);
1441		break;
1442	case SIOCSIFMTU:
1443		/* XXX can we alter the MTU on a VN ?*/
1444#ifdef notyet
1445		if (ifr->ifr_mtu > XN_JUMBO_MTU)
1446			error = EINVAL;
1447		else
1448#endif
1449		{
1450			ifp->if_mtu = ifr->ifr_mtu;
1451			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1452			xn_ifinit(sc);
1453		}
1454		break;
1455	case SIOCSIFFLAGS:
1456		XN_LOCK(sc);
1457		if (ifp->if_flags & IFF_UP) {
1458			/*
1459			 * If only the state of the PROMISC flag changed,
1460			 * then just use the 'set promisc mode' command
1461			 * instead of reinitializing the entire NIC. Doing
1462			 * a full re-init means reloading the firmware and
1463			 * waiting for it to start up, which may take a
1464			 * second or two.
1465			 */
1466#ifdef notyet
1467			/* No promiscuous mode with Xen */
1468			if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1469			    ifp->if_flags & IFF_PROMISC &&
1470			    !(sc->xn_if_flags & IFF_PROMISC)) {
1471				XN_SETBIT(sc, XN_RX_MODE,
1472					  XN_RXMODE_RX_PROMISC);
1473			} else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1474				   !(ifp->if_flags & IFF_PROMISC) &&
1475				   sc->xn_if_flags & IFF_PROMISC) {
1476				XN_CLRBIT(sc, XN_RX_MODE,
1477					  XN_RXMODE_RX_PROMISC);
1478			} else
1479#endif
1480				xn_ifinit_locked(sc);
1481		} else {
1482			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1483				xn_stop(sc);
1484			}
1485		}
1486		sc->xn_if_flags = ifp->if_flags;
1487		XN_UNLOCK(sc);
1488		error = 0;
1489		break;
1490	case SIOCSIFCAP:
1491		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1492		if (mask & IFCAP_HWCSUM) {
1493			if (IFCAP_HWCSUM & ifp->if_capenable)
1494				ifp->if_capenable &= ~IFCAP_HWCSUM;
1495			else
1496				ifp->if_capenable |= IFCAP_HWCSUM;
1497		}
1498		error = 0;
1499		break;
1500	case SIOCADDMULTI:
1501	case SIOCDELMULTI:
1502#ifdef notyet
1503		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1504			XN_LOCK(sc);
1505			xn_setmulti(sc);
1506			XN_UNLOCK(sc);
1507			error = 0;
1508		}
1509#endif
1510		/* FALLTHROUGH */
1511	case SIOCSIFMEDIA:
1512	case SIOCGIFMEDIA:
1513		error = EINVAL;
1514		break;
1515	default:
1516		error = ether_ioctl(ifp, cmd, data);
1517	}
1518
1519	return (error);
1520}
1521
1522static void
1523xn_stop(struct netfront_info *sc)
1524{
1525	struct ifnet *ifp;
1526
1527	XN_LOCK_ASSERT(sc);
1528
1529	ifp = sc->xn_ifp;
1530
1531	callout_stop(&sc->xn_stat_ch);
1532
1533	xn_free_rx_ring(sc);
1534	xn_free_tx_ring(sc);
1535
1536	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1537}
1538
1539/* START of Xenolinux helper functions adapted to FreeBSD */
1540static int
1541network_connect(struct ifnet *ifp)
1542{
1543	struct netfront_info *np;
1544	int i, requeue_idx, err;
1545	grant_ref_t ref;
1546	netif_rx_request_t *req;
1547	u_int feature_rx_copy, feature_rx_flip;
1548
1549	printf("network_connect\n");
1550
1551	np = ifp->if_softc;
1552	err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1553			   "feature-rx-copy", "%u", &feature_rx_copy);
1554	if (err != 1)
1555		feature_rx_copy = 0;
1556	err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1557			   "feature-rx-flip", "%u", &feature_rx_flip);
1558	if (err != 1)
1559		feature_rx_flip = 1;
1560
1561	/*
1562	 * Copy packets on receive path if:
1563	 *  (a) This was requested by user, and the backend supports it; or
1564	 *  (b) Flipping was requested, but this is unsupported by the backend.
1565	 */
1566	np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
1567				(MODPARM_rx_flip && !feature_rx_flip));
1568
1569	XN_LOCK(np);
1570	/* Recovery procedure: */
1571	err = talk_to_backend(np->xbdev, np);
1572	if (err)
1573			return (err);
1574
1575	/* Step 1: Reinitialise variables. */
1576	netif_release_tx_bufs(np);
1577
1578	/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
1579	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
1580		struct mbuf *m;
1581
1582		if (np->rx_mbufs[i] == NULL)
1583			continue;
1584
1585		m = np->rx_mbufs[requeue_idx] = xennet_get_rx_mbuf(np, i);
1586		ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
1587		req = RING_GET_REQUEST(&np->rx, requeue_idx);
1588
1589		if (!np->copying_receiver) {
1590			gnttab_grant_foreign_transfer_ref(ref,
1591			    np->xbdev->otherend_id,
1592			    vtophys(mtod(m, vm_offset_t)));
1593		} else {
1594			gnttab_grant_foreign_access_ref(ref,
1595			    np->xbdev->otherend_id,
1596			    vtophys(mtod(m, vm_offset_t)), 0);
1597		}
1598		req->gref = ref;
1599		req->id   = requeue_idx;
1600
1601		requeue_idx++;
1602	}
1603
1604	np->rx.req_prod_pvt = requeue_idx;
1605
1606	/* Step 3: All public and private state should now be sane.  Get
1607	 * ready to start sending and receiving packets and give the driver
1608	 * domain a kick because we've probably just requeued some
1609	 * packets.
1610	 */
1611	netfront_carrier_on(np);
1612	notify_remote_via_irq(np->irq);
1613	XN_TX_LOCK(np);
1614	xn_txeof(np);
1615	XN_TX_UNLOCK(np);
1616	network_alloc_rx_buffers(np);
1617	XN_UNLOCK(np);
1618
1619	return (0);
1620}
1621
1622
1623static void
1624show_device(struct netfront_info *sc)
1625{
1626#ifdef DEBUG
1627	if (sc) {
1628		IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
1629			sc->xn_ifno,
1630			be_state_name[sc->xn_backend_state],
1631			sc->xn_user_state ? "open" : "closed",
1632			sc->xn_evtchn,
1633			sc->xn_irq,
1634			sc->xn_tx_if,
1635			sc->xn_rx_if);
1636	} else {
1637		IPRINTK("<vif NULL>\n");
1638	}
1639#endif
1640}
1641
1642static int ifno = 0;
1643
1644/** Create a network device.
1645 * @param handle device handle
1646 */
1647static int
1648create_netdev(struct xenbus_device *dev, struct ifnet **ifpp)
1649{
1650	int i;
1651	struct netfront_info *np;
1652	int err;
1653	struct ifnet *ifp;
1654
1655	np = (struct netfront_info *)malloc(sizeof(struct netfront_info),
1656	    M_DEVBUF, M_NOWAIT);
1657	if (np == NULL)
1658			return (ENOMEM);
1659
1660	memset(np, 0, sizeof(struct netfront_info));
1661
1662	np->xbdev         = dev;
1663
1664	XN_LOCK_INIT(np, xennetif);
1665	np->rx_target     = RX_MIN_TARGET;
1666	np->rx_min_target = RX_MIN_TARGET;
1667	np->rx_max_target = RX_MAX_TARGET;
1668
1669	/* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
1670	for (i = 0; i <= NET_TX_RING_SIZE; i++) {
1671		np->tx_mbufs[i] = (void *) ((u_long) i+1);
1672		np->grant_tx_ref[i] = GRANT_INVALID_REF;
1673	}
1674	for (i = 0; i <= NET_RX_RING_SIZE; i++) {
1675		np->rx_mbufs[i] = NULL;
1676		np->grant_rx_ref[i] = GRANT_INVALID_REF;
1677	}
1678	/* A grant for every tx ring slot */
1679	if (gnttab_alloc_grant_references(TX_MAX_TARGET,
1680					  &np->gref_tx_head) < 0) {
1681		printf("#### netfront can't alloc tx grant refs\n");
1682		err = ENOMEM;
1683		goto exit;
1684	}
1685	/* A grant for every rx ring slot */
1686	if (gnttab_alloc_grant_references(RX_MAX_TARGET,
1687					  &np->gref_rx_head) < 0) {
1688		printf("#### netfront can't alloc rx grant refs\n");
1689		gnttab_free_grant_references(np->gref_tx_head);
1690		err = ENOMEM;
1691		goto exit;
1692	}
1693
1694	err = xen_net_read_mac(dev, np->mac);
1695	if (err) {
1696		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
1697		goto out;
1698	}
1699
1700	/* Set up ifnet structure */
1701	*ifpp = ifp = np->xn_ifp = if_alloc(IFT_ETHER);
1702    	ifp->if_softc = np;
1703    	if_initname(ifp, "xn",  ifno++/* ifno */);
1704    	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
1705    	ifp->if_ioctl = xn_ioctl;
1706    	ifp->if_output = ether_output;
1707    	ifp->if_start = xn_start;
1708#ifdef notyet
1709    	ifp->if_watchdog = xn_watchdog;
1710#endif
1711    	ifp->if_init = xn_ifinit;
1712    	ifp->if_mtu = ETHERMTU;
1713    	ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
1714
1715#ifdef notyet
1716    	ifp->if_hwassist = XN_CSUM_FEATURES;
1717    	ifp->if_capabilities = IFCAP_HWCSUM;
1718    	ifp->if_capenable = ifp->if_capabilities;
1719#endif
1720
1721    	ether_ifattach(ifp, np->mac);
1722    	callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE);
1723	netfront_carrier_off(np);
1724
1725	return (0);
1726
1727exit:
1728	gnttab_free_grant_references(np->gref_tx_head);
1729out:
1730	panic("do something smart");
1731
1732}
1733
1734/**
1735 * Handle the change of state of the backend to Closing.  We must delete our
1736 * device-layer structures now, to ensure that writes are flushed through to
1737 * the backend.  Once is this done, we can switch to Closed in
1738 * acknowledgement.
1739 */
1740#if 0
1741static void netfront_closing(struct xenbus_device *dev)
1742{
1743#if 0
1744	struct netfront_info *info = dev->dev_driver_data;
1745
1746	DPRINTK("netfront_closing: %s removed\n", dev->nodename);
1747
1748	close_netdev(info);
1749#endif
1750	xenbus_switch_state(dev, XenbusStateClosed);
1751}
1752#endif
1753
1754static int netfront_remove(struct xenbus_device *dev)
1755{
1756	struct netfront_info *info = dev->dev_driver_data;
1757
1758	DPRINTK("%s\n", dev->nodename);
1759
1760	netif_free(info);
1761	free(info, M_DEVBUF);
1762
1763	return 0;
1764}
1765
1766
1767static void netif_free(struct netfront_info *info)
1768{
1769	netif_disconnect_backend(info);
1770#if 0
1771	close_netdev(info);
1772#endif
1773}
1774
1775
1776
1777static void netif_disconnect_backend(struct netfront_info *info)
1778{
1779	xn_stop(info);
1780	end_access(info->tx_ring_ref, info->tx.sring);
1781	end_access(info->rx_ring_ref, info->rx.sring);
1782	info->tx_ring_ref = GRANT_INVALID_REF;
1783	info->rx_ring_ref = GRANT_INVALID_REF;
1784	info->tx.sring = NULL;
1785	info->rx.sring = NULL;
1786
1787#if 0
1788	if (info->irq)
1789		unbind_from_irqhandler(info->irq, info->netdev);
1790#else
1791	panic("FIX ME");
1792#endif
1793	info->irq = 0;
1794}
1795
1796
1797static void end_access(int ref, void *page)
1798{
1799	if (ref != GRANT_INVALID_REF)
1800		gnttab_end_foreign_access(ref, 0, page);
1801}
1802
1803
1804/* ** Driver registration ** */
1805
1806
1807static struct xenbus_device_id netfront_ids[] = {
1808	{ "vif" },
1809	{ "" }
1810};
1811
1812
1813static struct xenbus_driver netfront = {
1814	.name = "vif",
1815	.ids = netfront_ids,
1816	.probe = netfront_probe,
1817	.remove = netfront_remove,
1818	.resume = netfront_resume,
1819	.otherend_changed = backend_changed,
1820};
1821
1822static void
1823netif_init(void *unused)
1824{
1825	if (!is_running_on_xen())
1826		return;
1827
1828	if (is_initial_xendomain())
1829		return;
1830
1831	IPRINTK("Initialising virtual ethernet driver.\n");
1832
1833	xenbus_register_frontend(&netfront);
1834}
1835
1836SYSINIT(xennetif, SI_SUB_PSEUDO, SI_ORDER_SECOND, netif_init, NULL);
1837
1838
1839/*
1840 * Local variables:
1841 * mode: C
1842 * c-set-style: "BSD"
1843 * c-basic-offset: 8
1844 * tab-width: 4
1845 * indent-tabs-mode: t
1846 * End:
1847 */
1848