netfront.c revision 222975
1207673Sjoel/*-
2181643Skmacy * Copyright (c) 2004-2006 Kip Macy
3181643Skmacy * All rights reserved.
4181643Skmacy *
5207673Sjoel * Redistribution and use in source and binary forms, with or without
6207673Sjoel * modification, are permitted provided that the following conditions
7207673Sjoel * are met:
8207673Sjoel * 1. Redistributions of source code must retain the above copyright
9207673Sjoel *    notice, this list of conditions and the following disclaimer.
10207673Sjoel * 2. Redistributions in binary form must reproduce the above copyright
11207673Sjoel *    notice, this list of conditions and the following disclaimer in the
12207673Sjoel *    documentation and/or other materials provided with the distribution.
13181643Skmacy *
14207673Sjoel * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15207673Sjoel * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16207673Sjoel * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17207673Sjoel * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18207673Sjoel * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19207673Sjoel * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20207673Sjoel * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21207673Sjoel * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22207673Sjoel * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23207673Sjoel * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24207673Sjoel * SUCH DAMAGE.
25181643Skmacy */
26181643Skmacy
27181643Skmacy
28181643Skmacy#include <sys/cdefs.h>
29181643Skmacy__FBSDID("$FreeBSD: head/sys/dev/xen/netfront/netfront.c 222975 2011-06-11 04:59:01Z gibbs $");
30181643Skmacy
31221130Sbz#include "opt_inet.h"
32221130Sbz
33181643Skmacy#include <sys/param.h>
34181643Skmacy#include <sys/systm.h>
35181643Skmacy#include <sys/sockio.h>
36181643Skmacy#include <sys/mbuf.h>
37181643Skmacy#include <sys/malloc.h>
38185605Skmacy#include <sys/module.h>
39181643Skmacy#include <sys/kernel.h>
40181643Skmacy#include <sys/socket.h>
41189699Sdfr#include <sys/sysctl.h>
42181643Skmacy#include <sys/queue.h>
43193618Sadrian#include <sys/lock.h>
44181643Skmacy#include <sys/sx.h>
45181643Skmacy
46181643Skmacy#include <net/if.h>
47181643Skmacy#include <net/if_arp.h>
48181643Skmacy#include <net/ethernet.h>
49181643Skmacy#include <net/if_dl.h>
50181643Skmacy#include <net/if_media.h>
51181643Skmacy
52181643Skmacy#include <net/bpf.h>
53181643Skmacy
54181643Skmacy#include <net/if_types.h>
55181643Skmacy#include <net/if.h>
56181643Skmacy
57181643Skmacy#include <netinet/in_systm.h>
58181643Skmacy#include <netinet/in.h>
59181643Skmacy#include <netinet/ip.h>
60181643Skmacy#include <netinet/if_ether.h>
61189699Sdfr#if __FreeBSD_version >= 700000
62189699Sdfr#include <netinet/tcp.h>
63189699Sdfr#include <netinet/tcp_lro.h>
64189699Sdfr#endif
65181643Skmacy
66181643Skmacy#include <vm/vm.h>
67181643Skmacy#include <vm/pmap.h>
68181643Skmacy
69181643Skmacy#include <machine/clock.h>      /* for DELAY */
70181643Skmacy#include <machine/bus.h>
71181643Skmacy#include <machine/resource.h>
72181643Skmacy#include <machine/frame.h>
73181910Skmacy#include <machine/vmparam.h>
74181643Skmacy
75181643Skmacy#include <sys/bus.h>
76181643Skmacy#include <sys/rman.h>
77181643Skmacy
78181643Skmacy#include <machine/intr_machdep.h>
79181643Skmacy
80181643Skmacy#include <machine/xen/xen-os.h>
81189699Sdfr#include <machine/xen/xenfunc.h>
82216956Srwatson#include <machine/xen/xenvar.h>
83186557Skmacy#include <xen/hypervisor.h>
84186557Skmacy#include <xen/xen_intr.h>
85186557Skmacy#include <xen/evtchn.h>
86181643Skmacy#include <xen/gnttab.h>
87181643Skmacy#include <xen/interface/memory.h>
88181643Skmacy#include <xen/interface/io/netif.h>
89185605Skmacy#include <xen/xenbus/xenbusvar.h>
90181643Skmacy
91189699Sdfr#include <dev/xen/netfront/mbufq.h>
92189699Sdfr
93185605Skmacy#include "xenbus_if.h"
94181643Skmacy
95208901Sken#define XN_CSUM_FEATURES	(CSUM_TCP | CSUM_UDP | CSUM_TSO)
96189699Sdfr
97181643Skmacy#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
98181643Skmacy#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
99181643Skmacy
100189699Sdfr#if __FreeBSD_version >= 700000
101189699Sdfr/*
102189699Sdfr * Should the driver do LRO on the RX end
103189699Sdfr *  this can be toggled on the fly, but the
104189699Sdfr *  interface must be reset (down/up) for it
105189699Sdfr *  to take effect.
106189699Sdfr */
107189699Sdfrstatic int xn_enable_lro = 1;
108189699SdfrTUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro);
109189699Sdfr#else
110189699Sdfr
111189699Sdfr#define IFCAP_TSO4	0
112189699Sdfr#define CSUM_TSO	0
113189699Sdfr
114189699Sdfr#endif
115189699Sdfr
116181643Skmacy#ifdef CONFIG_XEN
117181643Skmacystatic int MODPARM_rx_copy = 0;
118181643Skmacymodule_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
119181643SkmacyMODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
120181643Skmacystatic int MODPARM_rx_flip = 0;
121181643Skmacymodule_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
122181643SkmacyMODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
123181643Skmacy#else
124181643Skmacystatic const int MODPARM_rx_copy = 1;
125181643Skmacystatic const int MODPARM_rx_flip = 0;
126181643Skmacy#endif
127181643Skmacy
128208901Sken/**
129208901Sken * \brief The maximum allowed data fragments in a single transmit
130208901Sken *        request.
131208901Sken *
132208901Sken * This limit is imposed by the backend driver.  We assume here that
133208901Sken * we are dealing with a Linux driver domain and have set our limit
134208901Sken * to mirror the Linux MAX_SKB_FRAGS constant.
135208901Sken */
136208901Sken#define	MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2)
137208901Sken
138181643Skmacy#define RX_COPY_THRESHOLD 256
139181643Skmacy
140181643Skmacy#define net_ratelimit() 0
141181643Skmacy
142181643Skmacystruct netfront_info;
143181643Skmacystruct netfront_rx_info;
144181643Skmacy
145181643Skmacystatic void xn_txeof(struct netfront_info *);
146181643Skmacystatic void xn_rxeof(struct netfront_info *);
147181643Skmacystatic void network_alloc_rx_buffers(struct netfront_info *);
148181643Skmacy
149181643Skmacystatic void xn_tick_locked(struct netfront_info *);
150181643Skmacystatic void xn_tick(void *);
151181643Skmacy
152181643Skmacystatic void xn_intr(void *);
153208901Skenstatic inline int xn_count_frags(struct mbuf *m);
154208901Skenstatic int  xn_assemble_tx_request(struct netfront_info *sc,
155208901Sken				   struct mbuf *m_head);
156181643Skmacystatic void xn_start_locked(struct ifnet *);
157181643Skmacystatic void xn_start(struct ifnet *);
158181643Skmacystatic int  xn_ioctl(struct ifnet *, u_long, caddr_t);
159181643Skmacystatic void xn_ifinit_locked(struct netfront_info *);
160181643Skmacystatic void xn_ifinit(void *);
161181643Skmacystatic void xn_stop(struct netfront_info *);
162181643Skmacy#ifdef notyet
163181643Skmacystatic void xn_watchdog(struct ifnet *);
164181643Skmacy#endif
165181643Skmacy
166181643Skmacystatic void show_device(struct netfront_info *sc);
167181643Skmacy#ifdef notyet
168185605Skmacystatic void netfront_closing(device_t dev);
169181643Skmacy#endif
170181643Skmacystatic void netif_free(struct netfront_info *info);
171185605Skmacystatic int netfront_detach(device_t dev);
172181643Skmacy
173185605Skmacystatic int talk_to_backend(device_t dev, struct netfront_info *info);
174185605Skmacystatic int create_netdev(device_t dev);
175181643Skmacystatic void netif_disconnect_backend(struct netfront_info *info);
176185605Skmacystatic int setup_device(device_t dev, struct netfront_info *info);
177181643Skmacystatic void end_access(int ref, void *page);
178181643Skmacy
179199997Sgibbsstatic int  xn_ifmedia_upd(struct ifnet *ifp);
180199997Sgibbsstatic void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
181199997Sgibbs
182181643Skmacy/* Xenolinux helper functions */
183185605Skmacyint network_connect(struct netfront_info *);
184181643Skmacy
185181643Skmacystatic void xn_free_rx_ring(struct netfront_info *);
186181643Skmacy
187181643Skmacystatic void xn_free_tx_ring(struct netfront_info *);
188181643Skmacy
189181643Skmacystatic int xennet_get_responses(struct netfront_info *np,
190208901Sken	struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons,
191208901Sken	struct mbuf **list, int *pages_flipped_p);
192181643Skmacy
193181643Skmacy#define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
194181643Skmacy
195181643Skmacy#define INVALID_P2M_ENTRY (~0UL)
196181643Skmacy
197181643Skmacy/*
198181643Skmacy * Mbuf pointers. We need these to keep track of the virtual addresses
199181643Skmacy * of our mbuf chains since we can only convert from virtual to physical,
200181643Skmacy * not the other way around.  The size must track the free index arrays.
201181643Skmacy */
202181643Skmacystruct xn_chain_data {
203208901Sken	struct mbuf    *xn_tx_chain[NET_TX_RING_SIZE+1];
204208901Sken	int		xn_tx_chain_cnt;
205208901Sken	struct mbuf    *xn_rx_chain[NET_RX_RING_SIZE+1];
206181643Skmacy};
207181643Skmacy
208208901Sken#define NUM_ELEMENTS(x) (sizeof(x)/sizeof(*x))
209181643Skmacy
210181643Skmacystruct net_device_stats
211181643Skmacy{
212181643Skmacy	u_long	rx_packets;		/* total packets received	*/
213181643Skmacy	u_long	tx_packets;		/* total packets transmitted	*/
214181643Skmacy	u_long	rx_bytes;		/* total bytes received 	*/
215181643Skmacy	u_long	tx_bytes;		/* total bytes transmitted	*/
216181643Skmacy	u_long	rx_errors;		/* bad packets received		*/
217181643Skmacy	u_long	tx_errors;		/* packet transmit problems	*/
218181643Skmacy	u_long	rx_dropped;		/* no space in linux buffers	*/
219181643Skmacy	u_long	tx_dropped;		/* no space available in linux	*/
220181643Skmacy	u_long	multicast;		/* multicast packets received	*/
221181643Skmacy	u_long	collisions;
222181643Skmacy
223181643Skmacy	/* detailed rx_errors: */
224181643Skmacy	u_long	rx_length_errors;
225181643Skmacy	u_long	rx_over_errors;		/* receiver ring buff overflow	*/
226181643Skmacy	u_long	rx_crc_errors;		/* recved pkt with crc error	*/
227181643Skmacy	u_long	rx_frame_errors;	/* recv'd frame alignment error */
228181643Skmacy	u_long	rx_fifo_errors;		/* recv'r fifo overrun		*/
229181643Skmacy	u_long	rx_missed_errors;	/* receiver missed packet	*/
230181643Skmacy
231181643Skmacy	/* detailed tx_errors */
232181643Skmacy	u_long	tx_aborted_errors;
233181643Skmacy	u_long	tx_carrier_errors;
234181643Skmacy	u_long	tx_fifo_errors;
235181643Skmacy	u_long	tx_heartbeat_errors;
236181643Skmacy	u_long	tx_window_errors;
237181643Skmacy
238181643Skmacy	/* for cslip etc */
239181643Skmacy	u_long	rx_compressed;
240181643Skmacy	u_long	tx_compressed;
241181643Skmacy};
242181643Skmacy
243181643Skmacystruct netfront_info {
244181643Skmacy
245181643Skmacy	struct ifnet *xn_ifp;
246189699Sdfr#if __FreeBSD_version >= 700000
247189699Sdfr	struct lro_ctrl xn_lro;
248189699Sdfr#endif
249181643Skmacy
250181643Skmacy	struct net_device_stats stats;
251181643Skmacy	u_int tx_full;
252181643Skmacy
253181643Skmacy	netif_tx_front_ring_t tx;
254181643Skmacy	netif_rx_front_ring_t rx;
255181643Skmacy
256181643Skmacy	struct mtx   tx_lock;
257181643Skmacy	struct mtx   rx_lock;
258204158Skmacy	struct mtx   sc_lock;
259181643Skmacy
260181643Skmacy	u_int handle;
261181643Skmacy	u_int irq;
262181643Skmacy	u_int copying_receiver;
263181643Skmacy	u_int carrier;
264181643Skmacy
265181643Skmacy	/* Receive-ring batched refills. */
266181643Skmacy#define RX_MIN_TARGET 32
267181643Skmacy#define RX_MAX_TARGET NET_RX_RING_SIZE
268199997Sgibbs	int rx_min_target;
269199997Sgibbs	int rx_max_target;
270199997Sgibbs	int rx_target;
271181643Skmacy
272181643Skmacy	grant_ref_t gref_tx_head;
273181643Skmacy	grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
274181643Skmacy	grant_ref_t gref_rx_head;
275181643Skmacy	grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1];
276181643Skmacy
277199997Sgibbs	device_t		xbdev;
278199997Sgibbs	int			tx_ring_ref;
279199997Sgibbs	int			rx_ring_ref;
280199997Sgibbs	uint8_t			mac[ETHER_ADDR_LEN];
281181643Skmacy	struct xn_chain_data	xn_cdata;	/* mbufs */
282199997Sgibbs	struct mbuf_head	xn_rx_batch;	/* head of the batch queue */
283181643Skmacy
284181643Skmacy	int			xn_if_flags;
285181643Skmacy	struct callout	        xn_stat_ch;
286181643Skmacy
287199997Sgibbs	u_long			rx_pfn_array[NET_RX_RING_SIZE];
288199997Sgibbs	multicall_entry_t	rx_mcl[NET_RX_RING_SIZE+1];
289199997Sgibbs	mmu_update_t		rx_mmu[NET_RX_RING_SIZE];
290199997Sgibbs	struct ifmedia		sc_media;
291181643Skmacy};
292181643Skmacy
293181643Skmacy#define rx_mbufs xn_cdata.xn_rx_chain
294181643Skmacy#define tx_mbufs xn_cdata.xn_tx_chain
295181643Skmacy
296181643Skmacy#define XN_LOCK_INIT(_sc, _name) \
297181643Skmacy        mtx_init(&(_sc)->tx_lock, #_name"_tx", "network transmit lock", MTX_DEF); \
298181643Skmacy        mtx_init(&(_sc)->rx_lock, #_name"_rx", "network receive lock", MTX_DEF);  \
299208901Sken        mtx_init(&(_sc)->sc_lock, #_name"_sc", "netfront softc lock", MTX_DEF)
300181643Skmacy
301181643Skmacy#define XN_RX_LOCK(_sc)           mtx_lock(&(_sc)->rx_lock)
302181643Skmacy#define XN_RX_UNLOCK(_sc)         mtx_unlock(&(_sc)->rx_lock)
303181643Skmacy
304181643Skmacy#define XN_TX_LOCK(_sc)           mtx_lock(&(_sc)->tx_lock)
305181643Skmacy#define XN_TX_UNLOCK(_sc)         mtx_unlock(&(_sc)->tx_lock)
306181643Skmacy
307204158Skmacy#define XN_LOCK(_sc)           mtx_lock(&(_sc)->sc_lock);
308204158Skmacy#define XN_UNLOCK(_sc)         mtx_unlock(&(_sc)->sc_lock);
309181643Skmacy
310204158Skmacy#define XN_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->sc_lock, MA_OWNED);
311181643Skmacy#define XN_RX_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->rx_lock, MA_OWNED);
312181643Skmacy#define XN_TX_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->tx_lock, MA_OWNED);
313181643Skmacy#define XN_LOCK_DESTROY(_sc)   mtx_destroy(&(_sc)->rx_lock); \
314181643Skmacy                               mtx_destroy(&(_sc)->tx_lock); \
315204158Skmacy                               mtx_destroy(&(_sc)->sc_lock);
316181643Skmacy
317181643Skmacystruct netfront_rx_info {
318181643Skmacy	struct netif_rx_response rx;
319181643Skmacy	struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
320181643Skmacy};
321181643Skmacy
322181643Skmacy#define netfront_carrier_on(netif)	((netif)->carrier = 1)
323181643Skmacy#define netfront_carrier_off(netif)	((netif)->carrier = 0)
324181643Skmacy#define netfront_carrier_ok(netif)	((netif)->carrier)
325181643Skmacy
326181643Skmacy/* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */
327181643Skmacy
328181643Skmacy
329181643Skmacy
330181643Skmacy/*
331181643Skmacy * Access macros for acquiring freeing slots in tx_skbs[].
332181643Skmacy */
333181643Skmacy
334181643Skmacystatic inline void
335208901Skenadd_id_to_freelist(struct mbuf **list, uintptr_t id)
336181643Skmacy{
337208901Sken	KASSERT(id != 0,
338208901Sken		("%s: the head item (0) must always be free.", __func__));
339181643Skmacy	list[id] = list[0];
340208901Sken	list[0]  = (struct mbuf *)id;
341181643Skmacy}
342181643Skmacy
343181643Skmacystatic inline unsigned short
344181643Skmacyget_id_from_freelist(struct mbuf **list)
345181643Skmacy{
346208901Sken	uintptr_t id;
347208901Sken
348208901Sken	id = (uintptr_t)list[0];
349208901Sken	KASSERT(id != 0,
350208901Sken		("%s: the head item (0) must always remain free.", __func__));
351181643Skmacy	list[0] = list[id];
352181643Skmacy	return (id);
353181643Skmacy}
354181643Skmacy
355181643Skmacystatic inline int
356181643Skmacyxennet_rxidx(RING_IDX idx)
357181643Skmacy{
358181643Skmacy	return idx & (NET_RX_RING_SIZE - 1);
359181643Skmacy}
360181643Skmacy
361181643Skmacystatic inline struct mbuf *
362208901Skenxennet_get_rx_mbuf(struct netfront_info *np, RING_IDX ri)
363181643Skmacy{
364181643Skmacy	int i = xennet_rxidx(ri);
365181643Skmacy	struct mbuf *m;
366181643Skmacy
367181643Skmacy	m = np->rx_mbufs[i];
368181643Skmacy	np->rx_mbufs[i] = NULL;
369181643Skmacy	return (m);
370181643Skmacy}
371181643Skmacy
372181643Skmacystatic inline grant_ref_t
373181643Skmacyxennet_get_rx_ref(struct netfront_info *np, RING_IDX ri)
374181643Skmacy{
375181643Skmacy	int i = xennet_rxidx(ri);
376181643Skmacy	grant_ref_t ref = np->grant_rx_ref[i];
377214077Sgibbs	KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n"));
378214077Sgibbs	np->grant_rx_ref[i] = GRANT_REF_INVALID;
379181643Skmacy	return ref;
380181643Skmacy}
381181643Skmacy
382181643Skmacy#define IPRINTK(fmt, args...) \
383181643Skmacy    printf("[XEN] " fmt, ##args)
384204158Skmacy#ifdef INVARIANTS
385181643Skmacy#define WPRINTK(fmt, args...) \
386181643Skmacy    printf("[XEN] " fmt, ##args)
387204158Skmacy#else
388204158Skmacy#define WPRINTK(fmt, args...)
389204158Skmacy#endif
390204158Skmacy#ifdef DEBUG
391181643Skmacy#define DPRINTK(fmt, args...) \
392185605Skmacy    printf("[XEN] %s: " fmt, __func__, ##args)
393189699Sdfr#else
394189699Sdfr#define DPRINTK(fmt, args...)
395189699Sdfr#endif
396181643Skmacy
397181643Skmacy/**
398181643Skmacy * Read the 'mac' node at the given device's node in the store, and parse that
399181643Skmacy * as colon-separated octets, placing result the given mac array.  mac must be
400181643Skmacy * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
401181643Skmacy * Return 0 on success, or errno on error.
402181643Skmacy */
403181643Skmacystatic int
404185605Skmacyxen_net_read_mac(device_t dev, uint8_t mac[])
405181643Skmacy{
406186557Skmacy	int error, i;
407186557Skmacy	char *s, *e, *macstr;
408186557Skmacy
409214077Sgibbs	error = xs_read(XST_NIL, xenbus_get_node(dev), "mac", NULL,
410186557Skmacy	    (void **) &macstr);
411186557Skmacy	if (error)
412186557Skmacy		return (error);
413186557Skmacy
414181643Skmacy	s = macstr;
415181643Skmacy	for (i = 0; i < ETHER_ADDR_LEN; i++) {
416181643Skmacy		mac[i] = strtoul(s, &e, 16);
417181643Skmacy		if (s == e || (e[0] != ':' && e[0] != 0)) {
418214077Sgibbs			free(macstr, M_XENBUS);
419186557Skmacy			return (ENOENT);
420181643Skmacy		}
421181643Skmacy		s = &e[1];
422181643Skmacy	}
423214077Sgibbs	free(macstr, M_XENBUS);
424186557Skmacy	return (0);
425181643Skmacy}
426181643Skmacy
427181643Skmacy/**
428181643Skmacy * Entry point to this code when a new device is created.  Allocate the basic
429181643Skmacy * structures and the ring buffers for communication with the backend, and
430181643Skmacy * inform the backend of the appropriate details for those.  Switch to
431181643Skmacy * Connected state.
432181643Skmacy */
433181643Skmacystatic int
434185605Skmacynetfront_probe(device_t dev)
435181643Skmacy{
436185605Skmacy
437185605Skmacy	if (!strcmp(xenbus_get_type(dev), "vif")) {
438185605Skmacy		device_set_desc(dev, "Virtual Network Interface");
439185605Skmacy		return (0);
440185605Skmacy	}
441185605Skmacy
442185605Skmacy	return (ENXIO);
443185605Skmacy}
444185605Skmacy
445185605Skmacystatic int
446185605Skmacynetfront_attach(device_t dev)
447185605Skmacy{
448181643Skmacy	int err;
449181643Skmacy
450185605Skmacy	err = create_netdev(dev);
451181643Skmacy	if (err) {
452181643Skmacy		xenbus_dev_fatal(dev, err, "creating netdev");
453181643Skmacy		return err;
454181643Skmacy	}
455181643Skmacy
456189699Sdfr#if __FreeBSD_version >= 700000
457189699Sdfr	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
458189699Sdfr	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
459189699Sdfr	    OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW,
460189699Sdfr	    &xn_enable_lro, 0, "Large Receive Offload");
461189699Sdfr#endif
462189699Sdfr
463181643Skmacy	return 0;
464181643Skmacy}
465181643Skmacy
466181643Skmacy
467181643Skmacy/**
468181643Skmacy * We are reconnecting to the backend, due to a suspend/resume, or a backend
469181643Skmacy * driver restart.  We tear down our netif structure and recreate it, but
470181643Skmacy * leave the device-layer structures intact so that this is transparent to the
471181643Skmacy * rest of the kernel.
472181643Skmacy */
473186557Skmacystatic int
474185605Skmacynetfront_resume(device_t dev)
475181643Skmacy{
476185605Skmacy	struct netfront_info *info = device_get_softc(dev);
477186557Skmacy
478181643Skmacy	netif_disconnect_backend(info);
479181643Skmacy	return (0);
480181643Skmacy}
481181643Skmacy
482181643Skmacy
483181643Skmacy/* Common code used when first setting up, and when resuming. */
484181643Skmacystatic int
485185605Skmacytalk_to_backend(device_t dev, struct netfront_info *info)
486181643Skmacy{
487181643Skmacy	const char *message;
488214077Sgibbs	struct xs_transaction xst;
489185605Skmacy	const char *node = xenbus_get_node(dev);
490181643Skmacy	int err;
491181643Skmacy
492181643Skmacy	err = xen_net_read_mac(dev, info->mac);
493181643Skmacy	if (err) {
494185605Skmacy		xenbus_dev_fatal(dev, err, "parsing %s/mac", node);
495181643Skmacy		goto out;
496181643Skmacy	}
497181643Skmacy
498181643Skmacy	/* Create shared ring, alloc event channel. */
499181643Skmacy	err = setup_device(dev, info);
500181643Skmacy	if (err)
501181643Skmacy		goto out;
502181643Skmacy
503181643Skmacy again:
504214077Sgibbs	err = xs_transaction_start(&xst);
505181643Skmacy	if (err) {
506181643Skmacy		xenbus_dev_fatal(dev, err, "starting transaction");
507181643Skmacy		goto destroy_ring;
508181643Skmacy	}
509214077Sgibbs	err = xs_printf(xst, node, "tx-ring-ref","%u",
510208901Sken			info->tx_ring_ref);
511181643Skmacy	if (err) {
512181643Skmacy		message = "writing tx ring-ref";
513181643Skmacy		goto abort_transaction;
514181643Skmacy	}
515214077Sgibbs	err = xs_printf(xst, node, "rx-ring-ref","%u",
516208901Sken			info->rx_ring_ref);
517181643Skmacy	if (err) {
518181643Skmacy		message = "writing rx ring-ref";
519181643Skmacy		goto abort_transaction;
520181643Skmacy	}
521214077Sgibbs	err = xs_printf(xst, node,
522208901Sken			"event-channel", "%u", irq_to_evtchn_port(info->irq));
523181643Skmacy	if (err) {
524181643Skmacy		message = "writing event-channel";
525181643Skmacy		goto abort_transaction;
526181643Skmacy	}
527214077Sgibbs	err = xs_printf(xst, node, "request-rx-copy", "%u",
528208901Sken			info->copying_receiver);
529181643Skmacy	if (err) {
530181643Skmacy		message = "writing request-rx-copy";
531181643Skmacy		goto abort_transaction;
532181643Skmacy	}
533214077Sgibbs	err = xs_printf(xst, node, "feature-rx-notify", "%d", 1);
534181643Skmacy	if (err) {
535181643Skmacy		message = "writing feature-rx-notify";
536181643Skmacy		goto abort_transaction;
537181643Skmacy	}
538214077Sgibbs	err = xs_printf(xst, node, "feature-sg", "%d", 1);
539181643Skmacy	if (err) {
540181643Skmacy		message = "writing feature-sg";
541181643Skmacy		goto abort_transaction;
542181643Skmacy	}
543189699Sdfr#if __FreeBSD_version >= 700000
544214077Sgibbs	err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1);
545181643Skmacy	if (err) {
546181643Skmacy		message = "writing feature-gso-tcpv4";
547181643Skmacy		goto abort_transaction;
548181643Skmacy	}
549181643Skmacy#endif
550181643Skmacy
551214077Sgibbs	err = xs_transaction_end(xst, 0);
552181643Skmacy	if (err) {
553181643Skmacy		if (err == EAGAIN)
554181643Skmacy			goto again;
555181643Skmacy		xenbus_dev_fatal(dev, err, "completing transaction");
556181643Skmacy		goto destroy_ring;
557181643Skmacy	}
558181643Skmacy
559181643Skmacy	return 0;
560181643Skmacy
561181643Skmacy abort_transaction:
562214077Sgibbs	xs_transaction_end(xst, 1);
563181643Skmacy	xenbus_dev_fatal(dev, err, "%s", message);
564181643Skmacy destroy_ring:
565181643Skmacy	netif_free(info);
566181643Skmacy out:
567181643Skmacy	return err;
568181643Skmacy}
569181643Skmacy
570181643Skmacy
571181643Skmacystatic int
572185605Skmacysetup_device(device_t dev, struct netfront_info *info)
573181643Skmacy{
574181643Skmacy	netif_tx_sring_t *txs;
575181643Skmacy	netif_rx_sring_t *rxs;
576186557Skmacy	int error;
577181643Skmacy	struct ifnet *ifp;
578181643Skmacy
579181643Skmacy	ifp = info->xn_ifp;
580181643Skmacy
581214077Sgibbs	info->tx_ring_ref = GRANT_REF_INVALID;
582214077Sgibbs	info->rx_ring_ref = GRANT_REF_INVALID;
583181643Skmacy	info->rx.sring = NULL;
584181643Skmacy	info->tx.sring = NULL;
585181643Skmacy	info->irq = 0;
586181643Skmacy
587181643Skmacy	txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
588181643Skmacy	if (!txs) {
589186557Skmacy		error = ENOMEM;
590186557Skmacy		xenbus_dev_fatal(dev, error, "allocating tx ring page");
591181643Skmacy		goto fail;
592181643Skmacy	}
593181643Skmacy	SHARED_RING_INIT(txs);
594181643Skmacy	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
595186557Skmacy	error = xenbus_grant_ring(dev, virt_to_mfn(txs), &info->tx_ring_ref);
596186557Skmacy	if (error)
597181643Skmacy		goto fail;
598181643Skmacy
599181643Skmacy	rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
600181643Skmacy	if (!rxs) {
601186557Skmacy		error = ENOMEM;
602186557Skmacy		xenbus_dev_fatal(dev, error, "allocating rx ring page");
603181643Skmacy		goto fail;
604181643Skmacy	}
605181643Skmacy	SHARED_RING_INIT(rxs);
606181643Skmacy	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
607181643Skmacy
608186557Skmacy	error = xenbus_grant_ring(dev, virt_to_mfn(rxs), &info->rx_ring_ref);
609186557Skmacy	if (error)
610181643Skmacy		goto fail;
611181643Skmacy
612186557Skmacy	error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
613189699Sdfr	    "xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, &info->irq);
614181643Skmacy
615186557Skmacy	if (error) {
616186557Skmacy		xenbus_dev_fatal(dev, error,
617181643Skmacy				 "bind_evtchn_to_irqhandler failed");
618181643Skmacy		goto fail;
619181643Skmacy	}
620186557Skmacy
621181643Skmacy	show_device(info);
622181643Skmacy
623186557Skmacy	return (0);
624181643Skmacy
625181643Skmacy fail:
626181643Skmacy	netif_free(info);
627186557Skmacy	return (error);
628181643Skmacy}
629181643Skmacy
630221130Sbz#ifdef INET
631181643Skmacy/**
632189699Sdfr * If this interface has an ipv4 address, send an arp for it. This
633189699Sdfr * helps to get the network going again after migrating hosts.
634189699Sdfr */
635189699Sdfrstatic void
636189699Sdfrnetfront_send_fake_arp(device_t dev, struct netfront_info *info)
637189699Sdfr{
638189699Sdfr	struct ifnet *ifp;
639189699Sdfr	struct ifaddr *ifa;
640189699Sdfr
641189699Sdfr	ifp = info->xn_ifp;
642189699Sdfr	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
643189699Sdfr		if (ifa->ifa_addr->sa_family == AF_INET) {
644189699Sdfr			arp_ifinit(ifp, ifa);
645189699Sdfr		}
646189699Sdfr	}
647189699Sdfr}
648221130Sbz#endif
649189699Sdfr
650189699Sdfr/**
651181643Skmacy * Callback received when the backend's state changes.
652181643Skmacy */
653222975Sgibbsstatic void
654185605Skmacynetfront_backend_changed(device_t dev, XenbusState newstate)
655181643Skmacy{
656185605Skmacy	struct netfront_info *sc = device_get_softc(dev);
657181643Skmacy
658185605Skmacy	DPRINTK("newstate=%d\n", newstate);
659185605Skmacy
660185605Skmacy	switch (newstate) {
661181643Skmacy	case XenbusStateInitialising:
662181643Skmacy	case XenbusStateInitialised:
663181643Skmacy	case XenbusStateConnected:
664181643Skmacy	case XenbusStateUnknown:
665181643Skmacy	case XenbusStateClosed:
666183375Skmacy	case XenbusStateReconfigured:
667183375Skmacy	case XenbusStateReconfiguring:
668185605Skmacy		break;
669181643Skmacy	case XenbusStateInitWait:
670185605Skmacy		if (xenbus_get_state(dev) != XenbusStateInitialising)
671181643Skmacy			break;
672185605Skmacy		if (network_connect(sc) != 0)
673181643Skmacy			break;
674185605Skmacy		xenbus_set_state(dev, XenbusStateConnected);
675221130Sbz#ifdef INET
676189699Sdfr		netfront_send_fake_arp(dev, sc);
677221130Sbz#endif
678185605Skmacy		break;
679181643Skmacy	case XenbusStateClosing:
680185605Skmacy		xenbus_set_state(dev, XenbusStateClosed);
681181643Skmacy		break;
682181643Skmacy	}
683181643Skmacy}
684181643Skmacy
685181643Skmacystatic void
686181643Skmacyxn_free_rx_ring(struct netfront_info *sc)
687181643Skmacy{
688181643Skmacy#if 0
689181643Skmacy	int i;
690181643Skmacy
691181643Skmacy	for (i = 0; i < NET_RX_RING_SIZE; i++) {
692208901Sken		if (sc->xn_cdata.rx_mbufs[i] != NULL) {
693208901Sken			m_freem(sc->rx_mbufs[i]);
694208901Sken			sc->rx_mbufs[i] = NULL;
695181643Skmacy		}
696181643Skmacy	}
697181643Skmacy
698181643Skmacy	sc->rx.rsp_cons = 0;
699181643Skmacy	sc->xn_rx_if->req_prod = 0;
700181643Skmacy	sc->xn_rx_if->event = sc->rx.rsp_cons ;
701181643Skmacy#endif
702181643Skmacy}
703181643Skmacy
704181643Skmacystatic void
705181643Skmacyxn_free_tx_ring(struct netfront_info *sc)
706181643Skmacy{
707181643Skmacy#if 0
708181643Skmacy	int i;
709181643Skmacy
710181643Skmacy	for (i = 0; i < NET_TX_RING_SIZE; i++) {
711208901Sken		if (sc->tx_mbufs[i] != NULL) {
712208901Sken			m_freem(sc->tx_mbufs[i]);
713181643Skmacy			sc->xn_cdata.xn_tx_chain[i] = NULL;
714181643Skmacy		}
715181643Skmacy	}
716181643Skmacy
717181643Skmacy	return;
718181643Skmacy#endif
719181643Skmacy}
720181643Skmacy
721208901Sken/**
722208901Sken * \brief Verify that there is sufficient space in the Tx ring
723208901Sken *        buffer for a maximally sized request to be enqueued.
724192869Sadrian *
725208901Sken * A transmit request requires a transmit descriptor for each packet
726208901Sken * fragment, plus up to 2 entries for "options" (e.g. TSO).
727192869Sadrian */
728181643Skmacystatic inline int
729208901Skenxn_tx_slot_available(struct netfront_info *np)
730181643Skmacy{
731208901Sken	return (RING_FREE_REQUESTS(&np->tx) > (MAX_TX_REQ_FRAGS + 2));
732181643Skmacy}
733208901Sken
734181643Skmacystatic void
735181643Skmacynetif_release_tx_bufs(struct netfront_info *np)
736181643Skmacy{
737181643Skmacy	int i;
738181643Skmacy
739181643Skmacy	for (i = 1; i <= NET_TX_RING_SIZE; i++) {
740208901Sken		struct mbuf *m;
741181643Skmacy
742208901Sken		m = np->tx_mbufs[i];
743208901Sken
744208901Sken		/*
745208901Sken		 * We assume that no kernel addresses are
746208901Sken		 * less than NET_TX_RING_SIZE.  Any entry
747208901Sken		 * in the table that is below this number
748208901Sken		 * must be an index from free-list tracking.
749208901Sken		 */
750208901Sken		if (((uintptr_t)m) <= NET_TX_RING_SIZE)
751181643Skmacy			continue;
752181643Skmacy		gnttab_grant_foreign_access_ref(np->grant_tx_ref[i],
753185605Skmacy		    xenbus_get_otherend_id(np->xbdev),
754185605Skmacy		    virt_to_mfn(mtod(m, vm_offset_t)),
755181643Skmacy		    GNTMAP_readonly);
756181643Skmacy		gnttab_release_grant_reference(&np->gref_tx_head,
757181643Skmacy		    np->grant_tx_ref[i]);
758214077Sgibbs		np->grant_tx_ref[i] = GRANT_REF_INVALID;
759181643Skmacy		add_id_to_freelist(np->tx_mbufs, i);
760192871Sadrian		np->xn_cdata.xn_tx_chain_cnt--;
761192871Sadrian		if (np->xn_cdata.xn_tx_chain_cnt < 0) {
762192871Sadrian			panic("netif_release_tx_bufs: tx_chain_cnt must be >= 0");
763192871Sadrian		}
764181643Skmacy		m_freem(m);
765181643Skmacy	}
766181643Skmacy}
767181643Skmacy
768181643Skmacystatic void
769181643Skmacynetwork_alloc_rx_buffers(struct netfront_info *sc)
770181643Skmacy{
771185605Skmacy	int otherend_id = xenbus_get_otherend_id(sc->xbdev);
772181643Skmacy	unsigned short id;
773181643Skmacy	struct mbuf *m_new;
774181643Skmacy	int i, batch_target, notify;
775181643Skmacy	RING_IDX req_prod;
776181643Skmacy	struct xen_memory_reservation reservation;
777181643Skmacy	grant_ref_t ref;
778181643Skmacy	int nr_flips;
779181643Skmacy	netif_rx_request_t *req;
780181643Skmacy	vm_offset_t vaddr;
781181643Skmacy	u_long pfn;
782181643Skmacy
783181643Skmacy	req_prod = sc->rx.req_prod_pvt;
784181643Skmacy
785181643Skmacy	if (unlikely(sc->carrier == 0))
786181643Skmacy		return;
787181643Skmacy
788181643Skmacy	/*
789208901Sken	 * Allocate mbufs greedily, even though we batch updates to the
790181643Skmacy	 * receive ring. This creates a less bursty demand on the memory
791208901Sken	 * allocator, and so should reduce the chance of failed allocation
792181643Skmacy	 * requests both for ourself and for other kernel subsystems.
793208901Sken	 *
794208901Sken	 * Here we attempt to maintain rx_target buffers in flight, counting
795208901Sken	 * buffers that we have yet to process in the receive ring.
796181643Skmacy	 */
797181643Skmacy	batch_target = sc->rx_target - (req_prod - sc->rx.rsp_cons);
798181643Skmacy	for (i = mbufq_len(&sc->xn_rx_batch); i < batch_target; i++) {
799181643Skmacy		MGETHDR(m_new, M_DONTWAIT, MT_DATA);
800208901Sken		if (m_new == NULL) {
801208901Sken			printf("%s: MGETHDR failed\n", __func__);
802181643Skmacy			goto no_mbuf;
803208901Sken		}
804181643Skmacy
805181643Skmacy		m_cljget(m_new, M_DONTWAIT, MJUMPAGESIZE);
806181643Skmacy		if ((m_new->m_flags & M_EXT) == 0) {
807208901Sken			printf("%s: m_cljget failed\n", __func__);
808181643Skmacy			m_freem(m_new);
809181643Skmacy
810181643Skmacyno_mbuf:
811181643Skmacy			if (i != 0)
812181643Skmacy				goto refill;
813181643Skmacy			/*
814181643Skmacy			 * XXX set timer
815181643Skmacy			 */
816181643Skmacy			break;
817181643Skmacy		}
818181643Skmacy		m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE;
819181643Skmacy
820181643Skmacy		/* queue the mbufs allocated */
821181643Skmacy		mbufq_tail(&sc->xn_rx_batch, m_new);
822181643Skmacy	}
823181643Skmacy
824208901Sken	/*
825208901Sken	 * If we've allocated at least half of our target number of entries,
826208901Sken	 * submit them to the backend - we have enough to make the overhead
827208901Sken	 * of submission worthwhile.  Otherwise wait for more mbufs and
828208901Sken	 * request entries to become available.
829208901Sken	 */
830181643Skmacy	if (i < (sc->rx_target/2)) {
831181643Skmacy		if (req_prod >sc->rx.sring->req_prod)
832181643Skmacy			goto push;
833181643Skmacy		return;
834181643Skmacy	}
835181643Skmacy
836208901Sken	/*
837208901Sken	 * Double floating fill target if we risked having the backend
838208901Sken	 * run out of empty buffers for receive traffic.  We define "running
839208901Sken	 * low" as having less than a fourth of our target buffers free
840208901Sken	 * at the time we refilled the queue.
841208901Sken	 */
842208901Sken	if ((req_prod - sc->rx.sring->rsp_prod) < (sc->rx_target / 4)) {
843208901Sken		sc->rx_target *= 2;
844208901Sken		if (sc->rx_target > sc->rx_max_target)
845208901Sken			sc->rx_target = sc->rx_max_target;
846208901Sken	}
847208901Sken
848181643Skmacyrefill:
849181643Skmacy	for (nr_flips = i = 0; ; i++) {
850181643Skmacy		if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL)
851181643Skmacy			break;
852181643Skmacy
853181643Skmacy		m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)(
854181945Skmacy				vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT);
855181643Skmacy
856181643Skmacy		id = xennet_rxidx(req_prod + i);
857181643Skmacy
858208901Sken		KASSERT(sc->rx_mbufs[id] == NULL, ("non-NULL xm_rx_chain"));
859208901Sken		sc->rx_mbufs[id] = m_new;
860181643Skmacy
861181643Skmacy		ref = gnttab_claim_grant_reference(&sc->gref_rx_head);
862214077Sgibbs		KASSERT(ref != GNTTAB_LIST_END,
863214077Sgibbs			("reserved grant references exhuasted"));
864181643Skmacy		sc->grant_rx_ref[id] = ref;
865181643Skmacy
866181643Skmacy		vaddr = mtod(m_new, vm_offset_t);
867181643Skmacy		pfn = vtophys(vaddr) >> PAGE_SHIFT;
868181643Skmacy		req = RING_GET_REQUEST(&sc->rx, req_prod + i);
869181643Skmacy
870181643Skmacy		if (sc->copying_receiver == 0) {
871181643Skmacy			gnttab_grant_foreign_transfer_ref(ref,
872185605Skmacy			    otherend_id, pfn);
873181643Skmacy			sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn);
874181643Skmacy			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
875181643Skmacy				/* Remove this page before passing
876181643Skmacy				 * back to Xen.
877181643Skmacy				 */
878181643Skmacy				set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
879181643Skmacy				MULTI_update_va_mapping(&sc->rx_mcl[i],
880181643Skmacy				    vaddr, 0, 0);
881181643Skmacy			}
882181643Skmacy			nr_flips++;
883181643Skmacy		} else {
884181643Skmacy			gnttab_grant_foreign_access_ref(ref,
885185605Skmacy			    otherend_id,
886181643Skmacy			    PFNTOMFN(pfn), 0);
887181643Skmacy		}
888181643Skmacy		req->id = id;
889181643Skmacy		req->gref = ref;
890181643Skmacy
891181643Skmacy		sc->rx_pfn_array[i] =
892181643Skmacy		    vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT;
893181643Skmacy	}
894181643Skmacy
895181643Skmacy	KASSERT(i, ("no mbufs processed")); /* should have returned earlier */
896181643Skmacy	KASSERT(mbufq_len(&sc->xn_rx_batch) == 0, ("not all mbufs processed"));
897181643Skmacy	/*
898181643Skmacy	 * We may have allocated buffers which have entries outstanding
899181643Skmacy	 * in the page * update queue -- make sure we flush those first!
900181643Skmacy	 */
901181643Skmacy	PT_UPDATES_FLUSH();
902181643Skmacy	if (nr_flips != 0) {
903181643Skmacy#ifdef notyet
904181643Skmacy		/* Tell the ballon driver what is going on. */
905181643Skmacy		balloon_update_driver_allowance(i);
906181643Skmacy#endif
907183375Skmacy		set_xen_guest_handle(reservation.extent_start, sc->rx_pfn_array);
908181643Skmacy		reservation.nr_extents   = i;
909181643Skmacy		reservation.extent_order = 0;
910181643Skmacy		reservation.address_bits = 0;
911181643Skmacy		reservation.domid        = DOMID_SELF;
912181643Skmacy
913181643Skmacy		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
914181643Skmacy
915181643Skmacy			/* After all PTEs have been zapped, flush the TLB. */
916181643Skmacy			sc->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
917181643Skmacy			    UVMF_TLB_FLUSH|UVMF_ALL;
918181643Skmacy
919181643Skmacy			/* Give away a batch of pages. */
920181643Skmacy			sc->rx_mcl[i].op = __HYPERVISOR_memory_op;
921181643Skmacy			sc->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
922181643Skmacy			sc->rx_mcl[i].args[1] =  (u_long)&reservation;
923181643Skmacy			/* Zap PTEs and give away pages in one big multicall. */
924181643Skmacy			(void)HYPERVISOR_multicall(sc->rx_mcl, i+1);
925181643Skmacy
926181643Skmacy			/* Check return status of HYPERVISOR_dom_mem_op(). */
927181643Skmacy			if (unlikely(sc->rx_mcl[i].result != i))
928181643Skmacy				panic("Unable to reduce memory reservation\n");
929181643Skmacy			} else {
930181643Skmacy				if (HYPERVISOR_memory_op(
931181643Skmacy				    XENMEM_decrease_reservation, &reservation)
932181643Skmacy				    != i)
933181643Skmacy					panic("Unable to reduce memory "
934181643Skmacy					    "reservation\n");
935181643Skmacy		}
936181643Skmacy	} else {
937181643Skmacy		wmb();
938181643Skmacy	}
939181643Skmacy
940181643Skmacy	/* Above is a suitable barrier to ensure backend will see requests. */
941181643Skmacy	sc->rx.req_prod_pvt = req_prod + i;
942181643Skmacypush:
943181643Skmacy	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify);
944181643Skmacy	if (notify)
945181643Skmacy		notify_remote_via_irq(sc->irq);
946181643Skmacy}
947181643Skmacy
948181643Skmacystatic void
949181643Skmacyxn_rxeof(struct netfront_info *np)
950181643Skmacy{
951181643Skmacy	struct ifnet *ifp;
952189699Sdfr#if __FreeBSD_version >= 700000
953189699Sdfr	struct lro_ctrl *lro = &np->xn_lro;
954189699Sdfr	struct lro_entry *queued;
955189699Sdfr#endif
956181643Skmacy	struct netfront_rx_info rinfo;
957181643Skmacy	struct netif_rx_response *rx = &rinfo.rx;
958181643Skmacy	struct netif_extra_info *extras = rinfo.extras;
959181643Skmacy	RING_IDX i, rp;
960181643Skmacy	multicall_entry_t *mcl;
961181643Skmacy	struct mbuf *m;
962181945Skmacy	struct mbuf_head rxq, errq;
963185473Sdfr	int err, pages_flipped = 0, work_to_do;
964181643Skmacy
965185473Sdfr	do {
966185473Sdfr		XN_RX_LOCK_ASSERT(np);
967185473Sdfr		if (!netfront_carrier_ok(np))
968185473Sdfr			return;
969181643Skmacy
970185473Sdfr		mbufq_init(&errq);
971185473Sdfr		mbufq_init(&rxq);
972181643Skmacy
973185473Sdfr		ifp = np->xn_ifp;
974181643Skmacy
975185473Sdfr		rp = np->rx.sring->rsp_prod;
976185473Sdfr		rmb();	/* Ensure we see queued responses up to 'rp'. */
977181643Skmacy
978185473Sdfr		i = np->rx.rsp_cons;
979185473Sdfr		while ((i != rp)) {
980185473Sdfr			memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
981185473Sdfr			memset(extras, 0, sizeof(rinfo.extras));
982181643Skmacy
983185473Sdfr			m = NULL;
984208901Sken			err = xennet_get_responses(np, &rinfo, rp, &i, &m,
985185473Sdfr			    &pages_flipped);
986181643Skmacy
987185473Sdfr			if (unlikely(err)) {
988181945Skmacy				if (m)
989185473Sdfr					mbufq_tail(&errq, m);
990185473Sdfr				np->stats.rx_errors++;
991185473Sdfr				continue;
992185473Sdfr			}
993181643Skmacy
994185473Sdfr			m->m_pkthdr.rcvif = ifp;
995185473Sdfr			if ( rx->flags & NETRXF_data_validated ) {
996185473Sdfr				/* Tell the stack the checksums are okay */
997185473Sdfr				/*
998185473Sdfr				 * XXX this isn't necessarily the case - need to add
999185473Sdfr				 * check
1000185473Sdfr				 */
1001181643Skmacy
1002185473Sdfr				m->m_pkthdr.csum_flags |=
1003185473Sdfr					(CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID
1004185473Sdfr					    | CSUM_PSEUDO_HDR);
1005185473Sdfr				m->m_pkthdr.csum_data = 0xffff;
1006185473Sdfr			}
1007181643Skmacy
1008185473Sdfr			np->stats.rx_packets++;
1009185473Sdfr			np->stats.rx_bytes += m->m_pkthdr.len;
1010181643Skmacy
1011185473Sdfr			mbufq_tail(&rxq, m);
1012208901Sken			np->rx.rsp_cons = i;
1013185473Sdfr		}
1014181643Skmacy
1015185473Sdfr		if (pages_flipped) {
1016185473Sdfr			/* Some pages are no longer absent... */
1017181643Skmacy#ifdef notyet
1018185473Sdfr			balloon_update_driver_allowance(-pages_flipped);
1019181643Skmacy#endif
1020185473Sdfr			/* Do all the remapping work, and M->P updates, in one big
1021185473Sdfr			 * hypercall.
1022185473Sdfr			 */
1023185473Sdfr			if (!!xen_feature(XENFEAT_auto_translated_physmap)) {
1024185473Sdfr				mcl = np->rx_mcl + pages_flipped;
1025185473Sdfr				mcl->op = __HYPERVISOR_mmu_update;
1026185473Sdfr				mcl->args[0] = (u_long)np->rx_mmu;
1027185473Sdfr				mcl->args[1] = pages_flipped;
1028185473Sdfr				mcl->args[2] = 0;
1029185473Sdfr				mcl->args[3] = DOMID_SELF;
1030185473Sdfr				(void)HYPERVISOR_multicall(np->rx_mcl,
1031185473Sdfr				    pages_flipped + 1);
1032185473Sdfr			}
1033181643Skmacy		}
1034181643Skmacy
1035185473Sdfr		while ((m = mbufq_dequeue(&errq)))
1036185473Sdfr			m_freem(m);
1037181643Skmacy
1038185473Sdfr		/*
1039185473Sdfr		 * Process all the mbufs after the remapping is complete.
1040185473Sdfr		 * Break the mbuf chain first though.
1041185473Sdfr		 */
1042185473Sdfr		while ((m = mbufq_dequeue(&rxq)) != NULL) {
1043185473Sdfr			ifp->if_ipackets++;
1044181643Skmacy
1045185473Sdfr			/*
1046185473Sdfr			 * Do we really need to drop the rx lock?
1047185473Sdfr			 */
1048185473Sdfr			XN_RX_UNLOCK(np);
1049189699Sdfr#if __FreeBSD_version >= 700000
1050189699Sdfr			/* Use LRO if possible */
1051189699Sdfr			if ((ifp->if_capenable & IFCAP_LRO) == 0 ||
1052189699Sdfr			    lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) {
1053189699Sdfr				/*
1054189699Sdfr				 * If LRO fails, pass up to the stack
1055189699Sdfr				 * directly.
1056189699Sdfr				 */
1057189699Sdfr				(*ifp->if_input)(ifp, m);
1058189699Sdfr			}
1059189699Sdfr#else
1060185473Sdfr			(*ifp->if_input)(ifp, m);
1061189699Sdfr#endif
1062185473Sdfr			XN_RX_LOCK(np);
1063185473Sdfr		}
1064181643Skmacy
1065185473Sdfr		np->rx.rsp_cons = i;
1066181643Skmacy
1067189699Sdfr#if __FreeBSD_version >= 700000
1068189699Sdfr		/*
1069189699Sdfr		 * Flush any outstanding LRO work
1070189699Sdfr		 */
1071189699Sdfr		while (!SLIST_EMPTY(&lro->lro_active)) {
1072189699Sdfr			queued = SLIST_FIRST(&lro->lro_active);
1073189699Sdfr			SLIST_REMOVE_HEAD(&lro->lro_active, next);
1074189699Sdfr			tcp_lro_flush(lro, queued);
1075189699Sdfr		}
1076189699Sdfr#endif
1077189699Sdfr
1078181643Skmacy#if 0
1079185473Sdfr		/* If we get a callback with very few responses, reduce fill target. */
1080185473Sdfr		/* NB. Note exponential increase, linear decrease. */
1081185473Sdfr		if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
1082185473Sdfr			((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target))
1083185473Sdfr			np->rx_target = np->rx_min_target;
1084181643Skmacy#endif
1085181643Skmacy
1086185473Sdfr		network_alloc_rx_buffers(np);
1087181643Skmacy
1088185473Sdfr		RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, work_to_do);
1089185473Sdfr	} while (work_to_do);
1090181643Skmacy}
1091181643Skmacy
1092181643Skmacystatic void
1093181643Skmacyxn_txeof(struct netfront_info *np)
1094181643Skmacy{
1095181643Skmacy	RING_IDX i, prod;
1096181643Skmacy	unsigned short id;
1097181643Skmacy	struct ifnet *ifp;
1098189699Sdfr	netif_tx_response_t *txr;
1099181643Skmacy	struct mbuf *m;
1100181643Skmacy
1101181643Skmacy	XN_TX_LOCK_ASSERT(np);
1102181643Skmacy
1103181643Skmacy	if (!netfront_carrier_ok(np))
1104181643Skmacy		return;
1105181643Skmacy
1106181643Skmacy	ifp = np->xn_ifp;
1107181643Skmacy
1108181643Skmacy	do {
1109181643Skmacy		prod = np->tx.sring->rsp_prod;
1110181643Skmacy		rmb(); /* Ensure we see responses up to 'rp'. */
1111181643Skmacy
1112181643Skmacy		for (i = np->tx.rsp_cons; i != prod; i++) {
1113189699Sdfr			txr = RING_GET_RESPONSE(&np->tx, i);
1114189699Sdfr			if (txr->status == NETIF_RSP_NULL)
1115189699Sdfr				continue;
1116189699Sdfr
1117208901Sken			if (txr->status != NETIF_RSP_OKAY) {
1118208901Sken				printf("%s: WARNING: response is %d!\n",
1119208901Sken				       __func__, txr->status);
1120208901Sken			}
1121189699Sdfr			id = txr->id;
1122208901Sken			m = np->tx_mbufs[id];
1123192870Sadrian			KASSERT(m != NULL, ("mbuf not found in xn_tx_chain"));
1124208901Sken			KASSERT((uintptr_t)m > NET_TX_RING_SIZE,
1125208901Sken				("mbuf already on the free list, but we're "
1126208901Sken				"trying to free it again!"));
1127192870Sadrian			M_ASSERTVALID(m);
1128181643Skmacy
1129189699Sdfr			/*
1130189699Sdfr			 * Increment packet count if this is the last
1131189699Sdfr			 * mbuf of the chain.
1132189699Sdfr			 */
1133189699Sdfr			if (!m->m_next)
1134189699Sdfr				ifp->if_opackets++;
1135181643Skmacy			if (unlikely(gnttab_query_foreign_access(
1136181643Skmacy			    np->grant_tx_ref[id]) != 0)) {
1137208901Sken				panic("grant id %u still in use by the backend",
1138208901Sken				      id);
1139181643Skmacy			}
1140181643Skmacy			gnttab_end_foreign_access_ref(
1141183375Skmacy				np->grant_tx_ref[id]);
1142181643Skmacy			gnttab_release_grant_reference(
1143181643Skmacy				&np->gref_tx_head, np->grant_tx_ref[id]);
1144214077Sgibbs			np->grant_tx_ref[id] = GRANT_REF_INVALID;
1145181643Skmacy
1146208901Sken			np->tx_mbufs[id] = NULL;
1147208901Sken			add_id_to_freelist(np->tx_mbufs, id);
1148192871Sadrian			np->xn_cdata.xn_tx_chain_cnt--;
1149189699Sdfr			m_free(m);
1150192894Sadrian			/* Only mark the queue active if we've freed up at least one slot to try */
1151192894Sadrian			ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1152181643Skmacy		}
1153181643Skmacy		np->tx.rsp_cons = prod;
1154181643Skmacy
1155181643Skmacy		/*
1156181643Skmacy		 * Set a new event, then check for race with update of
1157181643Skmacy		 * tx_cons. Note that it is essential to schedule a
1158181643Skmacy		 * callback, no matter how few buffers are pending. Even if
1159181643Skmacy		 * there is space in the transmit ring, higher layers may
1160181643Skmacy		 * be blocked because too much data is outstanding: in such
1161181643Skmacy		 * cases notification from Xen is likely to be the only kick
1162181643Skmacy		 * that we'll get.
1163181643Skmacy		 */
1164181643Skmacy		np->tx.sring->rsp_event =
1165181643Skmacy		    prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
1166181643Skmacy
1167181643Skmacy		mb();
1168181643Skmacy	} while (prod != np->tx.sring->rsp_prod);
1169181643Skmacy
1170181643Skmacy	if (np->tx_full &&
1171181643Skmacy	    ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) {
1172181643Skmacy		np->tx_full = 0;
1173181643Skmacy#if 0
1174181643Skmacy		if (np->user_state == UST_OPEN)
1175181643Skmacy			netif_wake_queue(dev);
1176181643Skmacy#endif
1177181643Skmacy	}
1178181643Skmacy
1179181643Skmacy}
1180181643Skmacy
1181181643Skmacystatic void
1182181643Skmacyxn_intr(void *xsc)
1183181643Skmacy{
1184181643Skmacy	struct netfront_info *np = xsc;
1185181643Skmacy	struct ifnet *ifp = np->xn_ifp;
1186181643Skmacy
1187181643Skmacy#if 0
1188181643Skmacy	if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod &&
1189181643Skmacy	    likely(netfront_carrier_ok(np)) &&
1190181643Skmacy	    ifp->if_drv_flags & IFF_DRV_RUNNING))
1191181643Skmacy		return;
1192181643Skmacy#endif
1193208901Sken	if (RING_HAS_UNCONSUMED_RESPONSES(&np->tx)) {
1194181643Skmacy		XN_TX_LOCK(np);
1195181643Skmacy		xn_txeof(np);
1196181643Skmacy		XN_TX_UNLOCK(np);
1197181643Skmacy	}
1198181643Skmacy
1199181643Skmacy	XN_RX_LOCK(np);
1200181643Skmacy	xn_rxeof(np);
1201181643Skmacy	XN_RX_UNLOCK(np);
1202181643Skmacy
1203181643Skmacy	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1204181643Skmacy	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1205181643Skmacy		xn_start(ifp);
1206181643Skmacy}
1207181643Skmacy
1208181643Skmacy
1209181643Skmacystatic void
1210181643Skmacyxennet_move_rx_slot(struct netfront_info *np, struct mbuf *m,
1211181643Skmacy	grant_ref_t ref)
1212181643Skmacy{
1213181643Skmacy	int new = xennet_rxidx(np->rx.req_prod_pvt);
1214181643Skmacy
1215181643Skmacy	KASSERT(np->rx_mbufs[new] == NULL, ("rx_mbufs != NULL"));
1216181643Skmacy	np->rx_mbufs[new] = m;
1217181643Skmacy	np->grant_rx_ref[new] = ref;
1218181643Skmacy	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
1219181643Skmacy	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
1220181643Skmacy	np->rx.req_prod_pvt++;
1221181643Skmacy}
1222181643Skmacy
1223181643Skmacystatic int
1224181643Skmacyxennet_get_extras(struct netfront_info *np,
1225208901Sken    struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons)
1226181643Skmacy{
1227181643Skmacy	struct netif_extra_info *extra;
1228181643Skmacy
1229181643Skmacy	int err = 0;
1230181643Skmacy
1231181643Skmacy	do {
1232181643Skmacy		struct mbuf *m;
1233181643Skmacy		grant_ref_t ref;
1234181643Skmacy
1235208901Sken		if (unlikely(*cons + 1 == rp)) {
1236181643Skmacy#if 0
1237181643Skmacy			if (net_ratelimit())
1238181643Skmacy				WPRINTK("Missing extra info\n");
1239181643Skmacy#endif
1240208901Sken			err = EINVAL;
1241181643Skmacy			break;
1242181643Skmacy		}
1243181643Skmacy
1244181643Skmacy		extra = (struct netif_extra_info *)
1245208901Sken		RING_GET_RESPONSE(&np->rx, ++(*cons));
1246181643Skmacy
1247181643Skmacy		if (unlikely(!extra->type ||
1248181643Skmacy			extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1249181643Skmacy#if 0
1250181643Skmacy			if (net_ratelimit())
1251181643Skmacy				WPRINTK("Invalid extra type: %d\n",
1252181643Skmacy					extra->type);
1253181643Skmacy#endif
1254208901Sken			err = EINVAL;
1255181643Skmacy		} else {
1256181643Skmacy			memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
1257181643Skmacy		}
1258181643Skmacy
1259208901Sken		m = xennet_get_rx_mbuf(np, *cons);
1260208901Sken		ref = xennet_get_rx_ref(np, *cons);
1261181643Skmacy		xennet_move_rx_slot(np, m, ref);
1262181643Skmacy	} while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
1263181643Skmacy
1264181643Skmacy	return err;
1265181643Skmacy}
1266181643Skmacy
1267181643Skmacystatic int
1268181643Skmacyxennet_get_responses(struct netfront_info *np,
1269208901Sken	struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons,
1270181945Skmacy	struct mbuf  **list,
1271181643Skmacy	int *pages_flipped_p)
1272181643Skmacy{
1273181643Skmacy	int pages_flipped = *pages_flipped_p;
1274181643Skmacy	struct mmu_update *mmu;
1275181643Skmacy	struct multicall_entry *mcl;
1276181643Skmacy	struct netif_rx_response *rx = &rinfo->rx;
1277181643Skmacy	struct netif_extra_info *extras = rinfo->extras;
1278181945Skmacy	struct mbuf *m, *m0, *m_prev;
1279208901Sken	grant_ref_t ref = xennet_get_rx_ref(np, *cons);
1280208901Sken	RING_IDX ref_cons = *cons;
1281181643Skmacy	int frags = 1;
1282181643Skmacy	int err = 0;
1283181643Skmacy	u_long ret;
1284181643Skmacy
1285208901Sken	m0 = m = m_prev = xennet_get_rx_mbuf(np, *cons);
1286181945Skmacy
1287181945Skmacy
1288181643Skmacy	if (rx->flags & NETRXF_extra_info) {
1289208901Sken		err = xennet_get_extras(np, extras, rp, cons);
1290181643Skmacy	}
1291181643Skmacy
1292181945Skmacy
1293181945Skmacy	if (m0 != NULL) {
1294208901Sken		m0->m_pkthdr.len = 0;
1295208901Sken		m0->m_next = NULL;
1296181945Skmacy	}
1297181945Skmacy
1298181643Skmacy	for (;;) {
1299181643Skmacy		u_long mfn;
1300181643Skmacy
1301181945Skmacy#if 0
1302204158Skmacy		DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n",
1303181945Skmacy			rx->status, rx->offset, frags);
1304181945Skmacy#endif
1305181643Skmacy		if (unlikely(rx->status < 0 ||
1306181643Skmacy			rx->offset + rx->status > PAGE_SIZE)) {
1307208901Sken
1308181643Skmacy#if 0
1309181643Skmacy			if (net_ratelimit())
1310181643Skmacy				WPRINTK("rx->offset: %x, size: %u\n",
1311181643Skmacy					rx->offset, rx->status);
1312181643Skmacy#endif
1313181643Skmacy			xennet_move_rx_slot(np, m, ref);
1314208901Sken			if (m0 == m)
1315208901Sken				m0 = NULL;
1316208901Sken			m = NULL;
1317208901Sken			err = EINVAL;
1318208901Sken			goto next_skip_queue;
1319181643Skmacy		}
1320181945Skmacy
1321181643Skmacy		/*
1322181643Skmacy		 * This definitely indicates a bug, either in this driver or in
1323181643Skmacy		 * the backend driver. In future this should flag the bad
1324181643Skmacy		 * situation to the system controller to reboot the backed.
1325181643Skmacy		 */
1326214077Sgibbs		if (ref == GRANT_REF_INVALID) {
1327208901Sken
1328181643Skmacy#if 0
1329181643Skmacy			if (net_ratelimit())
1330181643Skmacy				WPRINTK("Bad rx response id %d.\n", rx->id);
1331181643Skmacy#endif
1332214077Sgibbs			printf("%s: Bad rx response id %d.\n", __func__,rx->id);
1333208901Sken			err = EINVAL;
1334181643Skmacy			goto next;
1335181643Skmacy		}
1336181643Skmacy
1337181643Skmacy		if (!np->copying_receiver) {
1338181643Skmacy			/* Memory pressure, insufficient buffer
1339181643Skmacy			 * headroom, ...
1340181643Skmacy			 */
1341181643Skmacy			if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
1342208901Sken				WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n",
1343208901Sken					rx->id, rx->status);
1344181643Skmacy				xennet_move_rx_slot(np, m, ref);
1345208901Sken				err = ENOMEM;
1346181643Skmacy				goto next;
1347181643Skmacy			}
1348181643Skmacy
1349181643Skmacy			if (!xen_feature( XENFEAT_auto_translated_physmap)) {
1350181643Skmacy				/* Remap the page. */
1351181643Skmacy				void *vaddr = mtod(m, void *);
1352181643Skmacy				uint32_t pfn;
1353181643Skmacy
1354181643Skmacy				mcl = np->rx_mcl + pages_flipped;
1355181643Skmacy				mmu = np->rx_mmu + pages_flipped;
1356181643Skmacy
1357181643Skmacy				MULTI_update_va_mapping(mcl, (u_long)vaddr,
1358181916Skmacy				    (((vm_paddr_t)mfn) << PAGE_SHIFT) | PG_RW |
1359181643Skmacy				    PG_V | PG_M | PG_A, 0);
1360186557Skmacy				pfn = (uintptr_t)m->m_ext.ext_arg1;
1361181643Skmacy				mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) |
1362181643Skmacy				    MMU_MACHPHYS_UPDATE;
1363181643Skmacy				mmu->val = pfn;
1364181643Skmacy
1365181643Skmacy				set_phys_to_machine(pfn, mfn);
1366181643Skmacy			}
1367181643Skmacy			pages_flipped++;
1368181643Skmacy		} else {
1369183375Skmacy			ret = gnttab_end_foreign_access_ref(ref);
1370181643Skmacy			KASSERT(ret, ("ret != 0"));
1371181643Skmacy		}
1372181643Skmacy
1373181643Skmacy		gnttab_release_grant_reference(&np->gref_rx_head, ref);
1374181643Skmacy
1375181643Skmacynext:
1376192286Sadrian		if (m == NULL)
1377192286Sadrian			break;
1378192286Sadrian
1379192286Sadrian		m->m_len = rx->status;
1380192286Sadrian		m->m_data += rx->offset;
1381192286Sadrian		m0->m_pkthdr.len += rx->status;
1382181945Skmacy
1383208901Skennext_skip_queue:
1384181643Skmacy		if (!(rx->flags & NETRXF_more_data))
1385181643Skmacy			break;
1386181643Skmacy
1387208901Sken		if (*cons + frags == rp) {
1388181643Skmacy			if (net_ratelimit())
1389181643Skmacy				WPRINTK("Need more frags\n");
1390208901Sken			err = ENOENT;
1391208901Sken			printf("%s: cons %u frags %u rp %u, not enough frags\n",
1392208901Sken			       __func__, *cons, frags, rp);
1393214077Sgibbs			break;
1394181643Skmacy		}
1395208901Sken		/*
1396208901Sken		 * Note that m can be NULL, if rx->status < 0 or if
1397208901Sken		 * rx->offset + rx->status > PAGE_SIZE above.
1398208901Sken		 */
1399181945Skmacy		m_prev = m;
1400181945Skmacy
1401208901Sken		rx = RING_GET_RESPONSE(&np->rx, *cons + frags);
1402208901Sken		m = xennet_get_rx_mbuf(np, *cons + frags);
1403181945Skmacy
1404208901Sken		/*
1405208901Sken		 * m_prev == NULL can happen if rx->status < 0 or if
1406208901Sken		 * rx->offset + * rx->status > PAGE_SIZE above.
1407208901Sken		 */
1408208901Sken		if (m_prev != NULL)
1409208901Sken			m_prev->m_next = m;
1410208901Sken
1411208901Sken		/*
1412208901Sken		 * m0 can be NULL if rx->status < 0 or if * rx->offset +
1413208901Sken		 * rx->status > PAGE_SIZE above.
1414208901Sken		 */
1415208901Sken		if (m0 == NULL)
1416208901Sken			m0 = m;
1417181945Skmacy		m->m_next = NULL;
1418208901Sken		ref = xennet_get_rx_ref(np, *cons + frags);
1419208901Sken		ref_cons = *cons + frags;
1420181643Skmacy		frags++;
1421181643Skmacy	}
1422181945Skmacy	*list = m0;
1423208901Sken	*cons += frags;
1424181643Skmacy	*pages_flipped_p = pages_flipped;
1425181643Skmacy
1426218056Sgibbs	return (err);
1427181643Skmacy}
1428181643Skmacy
1429181643Skmacystatic void
1430181643Skmacyxn_tick_locked(struct netfront_info *sc)
1431181643Skmacy{
1432181643Skmacy	XN_RX_LOCK_ASSERT(sc);
1433181643Skmacy	callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
1434181643Skmacy
1435181643Skmacy	/* XXX placeholder for printing debug information */
1436181643Skmacy
1437181643Skmacy}
1438181643Skmacy
1439181643Skmacy
1440181643Skmacystatic void
1441181643Skmacyxn_tick(void *xsc)
1442181643Skmacy{
1443181643Skmacy	struct netfront_info *sc;
1444181643Skmacy
1445181643Skmacy	sc = xsc;
1446181643Skmacy	XN_RX_LOCK(sc);
1447181643Skmacy	xn_tick_locked(sc);
1448181643Skmacy	XN_RX_UNLOCK(sc);
1449181643Skmacy
1450181643Skmacy}
1451208901Sken
1452208901Sken/**
1453208901Sken * \brief Count the number of fragments in an mbuf chain.
1454208901Sken *
1455208901Sken * Surprisingly, there isn't an M* macro for this.
1456208901Sken */
1457208901Skenstatic inline int
1458208901Skenxn_count_frags(struct mbuf *m)
1459181643Skmacy{
1460208901Sken	int nfrags;
1461208901Sken
1462208901Sken	for (nfrags = 0; m != NULL; m = m->m_next)
1463208901Sken		nfrags++;
1464208901Sken
1465208901Sken	return (nfrags);
1466208901Sken}
1467208901Sken
1468208901Sken/**
1469208901Sken * Given an mbuf chain, make sure we have enough room and then push
1470208901Sken * it onto the transmit ring.
1471208901Sken */
1472208901Skenstatic int
1473208901Skenxn_assemble_tx_request(struct netfront_info *sc, struct mbuf *m_head)
1474208901Sken{
1475208901Sken	struct ifnet *ifp;
1476208901Sken	struct mbuf *m;
1477208901Sken	u_int nfrags;
1478208901Sken	netif_extra_info_t *extra;
1479185605Skmacy	int otherend_id;
1480181643Skmacy
1481208901Sken	ifp = sc->xn_ifp;
1482181643Skmacy
1483208901Sken	/**
1484208901Sken	 * Defragment the mbuf if necessary.
1485208901Sken	 */
1486208901Sken	nfrags = xn_count_frags(m_head);
1487181643Skmacy
1488208901Sken	/*
1489208901Sken	 * Check to see whether this request is longer than netback
1490208901Sken	 * can handle, and try to defrag it.
1491208901Sken	 */
1492208901Sken	/**
1493208901Sken	 * It is a bit lame, but the netback driver in Linux can't
1494208901Sken	 * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of
1495208901Sken	 * the Linux network stack.
1496208901Sken	 */
1497208901Sken	if (nfrags > MAX_TX_REQ_FRAGS) {
1498208901Sken		m = m_defrag(m_head, M_DONTWAIT);
1499208901Sken		if (!m) {
1500208901Sken			/*
1501208901Sken			 * Defrag failed, so free the mbuf and
1502208901Sken			 * therefore drop the packet.
1503208901Sken			 */
1504208901Sken			m_freem(m_head);
1505208901Sken			return (EMSGSIZE);
1506189699Sdfr		}
1507208901Sken		m_head = m;
1508208901Sken	}
1509189699Sdfr
1510208901Sken	/* Determine how many fragments now exist */
1511208901Sken	nfrags = xn_count_frags(m_head);
1512192871Sadrian
1513208901Sken	/*
1514208901Sken	 * Check to see whether the defragmented packet has too many
1515208901Sken	 * segments for the Linux netback driver.
1516208901Sken	 */
1517208901Sken	/**
1518208901Sken	 * The FreeBSD TCP stack, with TSO enabled, can produce a chain
1519208901Sken	 * of mbufs longer than Linux can handle.  Make sure we don't
1520208901Sken	 * pass a too-long chain over to the other side by dropping the
1521208901Sken	 * packet.  It doesn't look like there is currently a way to
1522208901Sken	 * tell the TCP stack to generate a shorter chain of packets.
1523208901Sken	 */
1524208901Sken	if (nfrags > MAX_TX_REQ_FRAGS) {
1525214077Sgibbs#ifdef DEBUG
1526214077Sgibbs		printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback "
1527214077Sgibbs		       "won't be able to handle it, dropping\n",
1528214077Sgibbs		       __func__, nfrags, MAX_TX_REQ_FRAGS);
1529214077Sgibbs#endif
1530208901Sken		m_freem(m_head);
1531208901Sken		return (EMSGSIZE);
1532208901Sken	}
1533192871Sadrian
1534208901Sken	/*
1535208901Sken	 * This check should be redundant.  We've already verified that we
1536208901Sken	 * have enough slots in the ring to handle a packet of maximum
1537208901Sken	 * size, and that our packet is less than the maximum size.  Keep
1538208901Sken	 * it in here as an assert for now just to make certain that
1539208901Sken	 * xn_tx_chain_cnt is accurate.
1540208901Sken	 */
1541208901Sken	KASSERT((sc->xn_cdata.xn_tx_chain_cnt + nfrags) <= NET_TX_RING_SIZE,
1542208901Sken		("%s: xn_tx_chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE "
1543208901Sken		 "(%d)!", __func__, (int) sc->xn_cdata.xn_tx_chain_cnt,
1544208901Sken                    (int) nfrags, (int) NET_TX_RING_SIZE));
1545192871Sadrian
1546208901Sken	/*
1547208901Sken	 * Start packing the mbufs in this chain into
1548208901Sken	 * the fragment pointers. Stop when we run out
1549208901Sken	 * of fragments or hit the end of the mbuf chain.
1550208901Sken	 */
1551208901Sken	m = m_head;
1552208901Sken	extra = NULL;
1553208901Sken	otherend_id = xenbus_get_otherend_id(sc->xbdev);
1554208901Sken	for (m = m_head; m; m = m->m_next) {
1555208901Sken		netif_tx_request_t *tx;
1556208901Sken		uintptr_t id;
1557208901Sken		grant_ref_t ref;
1558208901Sken		u_long mfn; /* XXX Wrong type? */
1559192871Sadrian
1560208901Sken		tx = RING_GET_REQUEST(&sc->tx, sc->tx.req_prod_pvt);
1561208901Sken		id = get_id_from_freelist(sc->tx_mbufs);
1562208901Sken		if (id == 0)
1563208901Sken			panic("xn_start_locked: was allocated the freelist head!\n");
1564208901Sken		sc->xn_cdata.xn_tx_chain_cnt++;
1565208901Sken		if (sc->xn_cdata.xn_tx_chain_cnt > NET_TX_RING_SIZE)
1566208901Sken			panic("xn_start_locked: tx_chain_cnt must be <= NET_TX_RING_SIZE\n");
1567208901Sken		sc->tx_mbufs[id] = m;
1568208901Sken		tx->id = id;
1569208901Sken		ref = gnttab_claim_grant_reference(&sc->gref_tx_head);
1570208901Sken		KASSERT((short)ref >= 0, ("Negative ref"));
1571208901Sken		mfn = virt_to_mfn(mtod(m, vm_offset_t));
1572208901Sken		gnttab_grant_foreign_access_ref(ref, otherend_id,
1573208901Sken		    mfn, GNTMAP_readonly);
1574208901Sken		tx->gref = sc->grant_tx_ref[id] = ref;
1575208901Sken		tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1);
1576208901Sken		tx->flags = 0;
1577208901Sken		if (m == m_head) {
1578208901Sken			/*
1579208901Sken			 * The first fragment has the entire packet
1580208901Sken			 * size, subsequent fragments have just the
1581208901Sken			 * fragment size. The backend works out the
1582208901Sken			 * true size of the first fragment by
1583208901Sken			 * subtracting the sizes of the other
1584208901Sken			 * fragments.
1585208901Sken			 */
1586208901Sken			tx->size = m->m_pkthdr.len;
1587189699Sdfr
1588208901Sken			/*
1589208901Sken			 * The first fragment contains the checksum flags
1590208901Sken			 * and is optionally followed by extra data for
1591208901Sken			 * TSO etc.
1592208901Sken			 */
1593208901Sken			/**
1594208901Sken			 * CSUM_TSO requires checksum offloading.
1595208901Sken			 * Some versions of FreeBSD fail to
1596208901Sken			 * set CSUM_TCP in the CSUM_TSO case,
1597208901Sken			 * so we have to test for CSUM_TSO
1598208901Sken			 * explicitly.
1599208901Sken			 */
1600208901Sken			if (m->m_pkthdr.csum_flags
1601208901Sken			    & (CSUM_DELAY_DATA | CSUM_TSO)) {
1602208901Sken				tx->flags |= (NETTXF_csum_blank
1603208901Sken				    | NETTXF_data_validated);
1604208901Sken			}
1605189699Sdfr#if __FreeBSD_version >= 700000
1606208901Sken			if (m->m_pkthdr.csum_flags & CSUM_TSO) {
1607208901Sken				struct netif_extra_info *gso =
1608208901Sken					(struct netif_extra_info *)
1609208901Sken					RING_GET_REQUEST(&sc->tx,
1610208901Sken							 ++sc->tx.req_prod_pvt);
1611189699Sdfr
1612208901Sken				tx->flags |= NETTXF_extra_info;
1613189699Sdfr
1614208901Sken				gso->u.gso.size = m->m_pkthdr.tso_segsz;
1615208901Sken				gso->u.gso.type =
1616208901Sken					XEN_NETIF_GSO_TYPE_TCPV4;
1617208901Sken				gso->u.gso.pad = 0;
1618208901Sken				gso->u.gso.features = 0;
1619189699Sdfr
1620208901Sken				gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
1621208901Sken				gso->flags = 0;
1622208901Sken			}
1623181643Skmacy#endif
1624208901Sken		} else {
1625208901Sken			tx->size = m->m_len;
1626189699Sdfr		}
1627208901Sken		if (m->m_next)
1628208901Sken			tx->flags |= NETTXF_more_data;
1629181643Skmacy
1630208901Sken		sc->tx.req_prod_pvt++;
1631208901Sken	}
1632208901Sken	BPF_MTAP(ifp, m_head);
1633181643Skmacy
1634208901Sken	sc->stats.tx_bytes += m_head->m_pkthdr.len;
1635208901Sken	sc->stats.tx_packets++;
1636208901Sken
1637208901Sken	return (0);
1638208901Sken}
1639208901Sken
1640208901Skenstatic void
1641208901Skenxn_start_locked(struct ifnet *ifp)
1642208901Sken{
1643208901Sken	struct netfront_info *sc;
1644208901Sken	struct mbuf *m_head;
1645208901Sken	int notify;
1646208901Sken
1647208901Sken	sc = ifp->if_softc;
1648208901Sken
1649208901Sken	if (!netfront_carrier_ok(sc))
1650208901Sken		return;
1651208901Sken
1652208901Sken	/*
1653208901Sken	 * While we have enough transmit slots available for at least one
1654208901Sken	 * maximum-sized packet, pull mbufs off the queue and put them on
1655208901Sken	 * the transmit ring.
1656208901Sken	 */
1657208901Sken	while (xn_tx_slot_available(sc)) {
1658208901Sken		IF_DEQUEUE(&ifp->if_snd, m_head);
1659208901Sken		if (m_head == NULL)
1660208901Sken			break;
1661208901Sken
1662208901Sken		if (xn_assemble_tx_request(sc, m_head) != 0)
1663208901Sken			break;
1664181643Skmacy	}
1665181643Skmacy
1666181643Skmacy	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify);
1667181643Skmacy	if (notify)
1668181643Skmacy		notify_remote_via_irq(sc->irq);
1669181643Skmacy
1670181643Skmacy	if (RING_FULL(&sc->tx)) {
1671181643Skmacy		sc->tx_full = 1;
1672181643Skmacy#if 0
1673181643Skmacy		netif_stop_queue(dev);
1674181643Skmacy#endif
1675181643Skmacy	}
1676208901Sken}
1677181643Skmacy
1678181643Skmacy
1679181643Skmacystatic void
1680181643Skmacyxn_start(struct ifnet *ifp)
1681181643Skmacy{
1682181643Skmacy	struct netfront_info *sc;
1683181643Skmacy	sc = ifp->if_softc;
1684181643Skmacy	XN_TX_LOCK(sc);
1685181643Skmacy	xn_start_locked(ifp);
1686181643Skmacy	XN_TX_UNLOCK(sc);
1687181643Skmacy}
1688181643Skmacy
1689181643Skmacy/* equivalent of network_open() in Linux */
1690181643Skmacystatic void
1691181643Skmacyxn_ifinit_locked(struct netfront_info *sc)
1692181643Skmacy{
1693181643Skmacy	struct ifnet *ifp;
1694181643Skmacy
1695181643Skmacy	XN_LOCK_ASSERT(sc);
1696181643Skmacy
1697181643Skmacy	ifp = sc->xn_ifp;
1698181643Skmacy
1699181643Skmacy	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1700181643Skmacy		return;
1701181643Skmacy
1702181643Skmacy	xn_stop(sc);
1703181643Skmacy
1704181643Skmacy	network_alloc_rx_buffers(sc);
1705181643Skmacy	sc->rx.sring->rsp_event = sc->rx.rsp_cons + 1;
1706181643Skmacy
1707181643Skmacy	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1708181643Skmacy	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1709199997Sgibbs	if_link_state_change(ifp, LINK_STATE_UP);
1710181643Skmacy
1711181643Skmacy	callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
1712181643Skmacy
1713181643Skmacy}
1714181643Skmacy
1715181643Skmacy
1716181643Skmacystatic void
1717181643Skmacyxn_ifinit(void *xsc)
1718181643Skmacy{
1719181643Skmacy	struct netfront_info *sc = xsc;
1720181643Skmacy
1721181643Skmacy	XN_LOCK(sc);
1722181643Skmacy	xn_ifinit_locked(sc);
1723181643Skmacy	XN_UNLOCK(sc);
1724181643Skmacy
1725181643Skmacy}
1726181643Skmacy
1727181643Skmacy
1728181643Skmacystatic int
1729181643Skmacyxn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1730181643Skmacy{
1731181643Skmacy	struct netfront_info *sc = ifp->if_softc;
1732181643Skmacy	struct ifreq *ifr = (struct ifreq *) data;
1733221130Sbz#ifdef INET
1734181643Skmacy	struct ifaddr *ifa = (struct ifaddr *)data;
1735221130Sbz#endif
1736181643Skmacy
1737181643Skmacy	int mask, error = 0;
1738181643Skmacy	switch(cmd) {
1739181643Skmacy	case SIOCSIFADDR:
1740181643Skmacy	case SIOCGIFADDR:
1741221130Sbz#ifdef INET
1742181643Skmacy		XN_LOCK(sc);
1743181643Skmacy		if (ifa->ifa_addr->sa_family == AF_INET) {
1744181643Skmacy			ifp->if_flags |= IFF_UP;
1745181643Skmacy			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1746181643Skmacy				xn_ifinit_locked(sc);
1747181643Skmacy			arp_ifinit(ifp, ifa);
1748189699Sdfr			XN_UNLOCK(sc);
1749185473Sdfr		} else {
1750189699Sdfr			XN_UNLOCK(sc);
1751221130Sbz#endif
1752181643Skmacy			error = ether_ioctl(ifp, cmd, data);
1753221130Sbz#ifdef INET
1754185473Sdfr		}
1755221130Sbz#endif
1756181643Skmacy		break;
1757181643Skmacy	case SIOCSIFMTU:
1758181643Skmacy		/* XXX can we alter the MTU on a VN ?*/
1759181643Skmacy#ifdef notyet
1760181643Skmacy		if (ifr->ifr_mtu > XN_JUMBO_MTU)
1761181643Skmacy			error = EINVAL;
1762181643Skmacy		else
1763181643Skmacy#endif
1764181643Skmacy		{
1765181643Skmacy			ifp->if_mtu = ifr->ifr_mtu;
1766181643Skmacy			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1767181643Skmacy			xn_ifinit(sc);
1768181643Skmacy		}
1769181643Skmacy		break;
1770181643Skmacy	case SIOCSIFFLAGS:
1771181643Skmacy		XN_LOCK(sc);
1772181643Skmacy		if (ifp->if_flags & IFF_UP) {
1773181643Skmacy			/*
1774181643Skmacy			 * If only the state of the PROMISC flag changed,
1775181643Skmacy			 * then just use the 'set promisc mode' command
1776181643Skmacy			 * instead of reinitializing the entire NIC. Doing
1777181643Skmacy			 * a full re-init means reloading the firmware and
1778181643Skmacy			 * waiting for it to start up, which may take a
1779181643Skmacy			 * second or two.
1780181643Skmacy			 */
1781181643Skmacy#ifdef notyet
1782181643Skmacy			/* No promiscuous mode with Xen */
1783181643Skmacy			if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1784181643Skmacy			    ifp->if_flags & IFF_PROMISC &&
1785181643Skmacy			    !(sc->xn_if_flags & IFF_PROMISC)) {
1786181643Skmacy				XN_SETBIT(sc, XN_RX_MODE,
1787181643Skmacy					  XN_RXMODE_RX_PROMISC);
1788181643Skmacy			} else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1789181643Skmacy				   !(ifp->if_flags & IFF_PROMISC) &&
1790181643Skmacy				   sc->xn_if_flags & IFF_PROMISC) {
1791181643Skmacy				XN_CLRBIT(sc, XN_RX_MODE,
1792181643Skmacy					  XN_RXMODE_RX_PROMISC);
1793181643Skmacy			} else
1794181643Skmacy#endif
1795181643Skmacy				xn_ifinit_locked(sc);
1796181643Skmacy		} else {
1797181643Skmacy			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1798181643Skmacy				xn_stop(sc);
1799181643Skmacy			}
1800181643Skmacy		}
1801181643Skmacy		sc->xn_if_flags = ifp->if_flags;
1802181643Skmacy		XN_UNLOCK(sc);
1803181643Skmacy		error = 0;
1804181643Skmacy		break;
1805181643Skmacy	case SIOCSIFCAP:
1806181643Skmacy		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1807189699Sdfr		if (mask & IFCAP_TXCSUM) {
1808189699Sdfr			if (IFCAP_TXCSUM & ifp->if_capenable) {
1809189699Sdfr				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1810189699Sdfr				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1811189699Sdfr				    | CSUM_IP | CSUM_TSO);
1812189699Sdfr			} else {
1813189699Sdfr				ifp->if_capenable |= IFCAP_TXCSUM;
1814189699Sdfr				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
1815189699Sdfr				    | CSUM_IP);
1816189699Sdfr			}
1817181643Skmacy		}
1818189699Sdfr		if (mask & IFCAP_RXCSUM) {
1819189699Sdfr			ifp->if_capenable ^= IFCAP_RXCSUM;
1820189699Sdfr		}
1821189699Sdfr#if __FreeBSD_version >= 700000
1822189699Sdfr		if (mask & IFCAP_TSO4) {
1823189699Sdfr			if (IFCAP_TSO4 & ifp->if_capenable) {
1824189699Sdfr				ifp->if_capenable &= ~IFCAP_TSO4;
1825189699Sdfr				ifp->if_hwassist &= ~CSUM_TSO;
1826189699Sdfr			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
1827189699Sdfr				ifp->if_capenable |= IFCAP_TSO4;
1828189699Sdfr				ifp->if_hwassist |= CSUM_TSO;
1829189699Sdfr			} else {
1830192927Sadrian				IPRINTK("Xen requires tx checksum offload"
1831189699Sdfr				    " be enabled to use TSO\n");
1832189699Sdfr				error = EINVAL;
1833189699Sdfr			}
1834189699Sdfr		}
1835189699Sdfr		if (mask & IFCAP_LRO) {
1836189699Sdfr			ifp->if_capenable ^= IFCAP_LRO;
1837189699Sdfr
1838189699Sdfr		}
1839189699Sdfr#endif
1840181643Skmacy		error = 0;
1841181643Skmacy		break;
1842181643Skmacy	case SIOCADDMULTI:
1843181643Skmacy	case SIOCDELMULTI:
1844181643Skmacy#ifdef notyet
1845181643Skmacy		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1846181643Skmacy			XN_LOCK(sc);
1847181643Skmacy			xn_setmulti(sc);
1848181643Skmacy			XN_UNLOCK(sc);
1849181643Skmacy			error = 0;
1850181643Skmacy		}
1851181643Skmacy#endif
1852181643Skmacy		/* FALLTHROUGH */
1853181643Skmacy	case SIOCSIFMEDIA:
1854181643Skmacy	case SIOCGIFMEDIA:
1855199997Sgibbs		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1856181643Skmacy		break;
1857181643Skmacy	default:
1858181643Skmacy		error = ether_ioctl(ifp, cmd, data);
1859181643Skmacy	}
1860181643Skmacy
1861181643Skmacy	return (error);
1862181643Skmacy}
1863181643Skmacy
1864181643Skmacystatic void
1865181643Skmacyxn_stop(struct netfront_info *sc)
1866181643Skmacy{
1867181643Skmacy	struct ifnet *ifp;
1868181643Skmacy
1869181643Skmacy	XN_LOCK_ASSERT(sc);
1870181643Skmacy
1871181643Skmacy	ifp = sc->xn_ifp;
1872181643Skmacy
1873181643Skmacy	callout_stop(&sc->xn_stat_ch);
1874181643Skmacy
1875181643Skmacy	xn_free_rx_ring(sc);
1876181643Skmacy	xn_free_tx_ring(sc);
1877181643Skmacy
1878181643Skmacy	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1879199997Sgibbs	if_link_state_change(ifp, LINK_STATE_DOWN);
1880181643Skmacy}
1881181643Skmacy
1882181643Skmacy/* START of Xenolinux helper functions adapted to FreeBSD */
1883185605Skmacyint
1884185605Skmacynetwork_connect(struct netfront_info *np)
1885181643Skmacy{
1886186557Skmacy	int i, requeue_idx, error;
1887181643Skmacy	grant_ref_t ref;
1888181643Skmacy	netif_rx_request_t *req;
1889181643Skmacy	u_int feature_rx_copy, feature_rx_flip;
1890181643Skmacy
1891214077Sgibbs	error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev),
1892186557Skmacy	    "feature-rx-copy", NULL, "%u", &feature_rx_copy);
1893186557Skmacy	if (error)
1894181643Skmacy		feature_rx_copy = 0;
1895214077Sgibbs	error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev),
1896186557Skmacy	    "feature-rx-flip", NULL, "%u", &feature_rx_flip);
1897186557Skmacy	if (error)
1898181643Skmacy		feature_rx_flip = 1;
1899181643Skmacy
1900181643Skmacy	/*
1901181643Skmacy	 * Copy packets on receive path if:
1902181643Skmacy	 *  (a) This was requested by user, and the backend supports it; or
1903181643Skmacy	 *  (b) Flipping was requested, but this is unsupported by the backend.
1904181643Skmacy	 */
1905181643Skmacy	np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
1906181643Skmacy				(MODPARM_rx_flip && !feature_rx_flip));
1907181643Skmacy
1908181643Skmacy	/* Recovery procedure: */
1909186557Skmacy	error = talk_to_backend(np->xbdev, np);
1910186557Skmacy	if (error)
1911186557Skmacy		return (error);
1912181643Skmacy
1913181643Skmacy	/* Step 1: Reinitialise variables. */
1914181643Skmacy	netif_release_tx_bufs(np);
1915181643Skmacy
1916181643Skmacy	/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
1917181643Skmacy	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
1918181643Skmacy		struct mbuf *m;
1919186557Skmacy		u_long pfn;
1920181643Skmacy
1921181643Skmacy		if (np->rx_mbufs[i] == NULL)
1922181643Skmacy			continue;
1923181643Skmacy
1924181643Skmacy		m = np->rx_mbufs[requeue_idx] = xennet_get_rx_mbuf(np, i);
1925181643Skmacy		ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
1926208901Sken
1927181643Skmacy		req = RING_GET_REQUEST(&np->rx, requeue_idx);
1928186557Skmacy		pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT;
1929181643Skmacy
1930181643Skmacy		if (!np->copying_receiver) {
1931181643Skmacy			gnttab_grant_foreign_transfer_ref(ref,
1932185605Skmacy			    xenbus_get_otherend_id(np->xbdev),
1933186557Skmacy			    pfn);
1934181643Skmacy		} else {
1935181643Skmacy			gnttab_grant_foreign_access_ref(ref,
1936185605Skmacy			    xenbus_get_otherend_id(np->xbdev),
1937186557Skmacy			    PFNTOMFN(pfn), 0);
1938181643Skmacy		}
1939181643Skmacy		req->gref = ref;
1940181643Skmacy		req->id   = requeue_idx;
1941181643Skmacy
1942181643Skmacy		requeue_idx++;
1943181643Skmacy	}
1944181643Skmacy
1945181643Skmacy	np->rx.req_prod_pvt = requeue_idx;
1946181643Skmacy
1947181643Skmacy	/* Step 3: All public and private state should now be sane.  Get
1948181643Skmacy	 * ready to start sending and receiving packets and give the driver
1949181643Skmacy	 * domain a kick because we've probably just requeued some
1950181643Skmacy	 * packets.
1951181643Skmacy	 */
1952181643Skmacy	netfront_carrier_on(np);
1953181643Skmacy	notify_remote_via_irq(np->irq);
1954181643Skmacy	XN_TX_LOCK(np);
1955181643Skmacy	xn_txeof(np);
1956181643Skmacy	XN_TX_UNLOCK(np);
1957181643Skmacy	network_alloc_rx_buffers(np);
1958181643Skmacy
1959181643Skmacy	return (0);
1960181643Skmacy}
1961181643Skmacy
1962181643Skmacystatic void
1963181643Skmacyshow_device(struct netfront_info *sc)
1964181643Skmacy{
1965181643Skmacy#ifdef DEBUG
1966181643Skmacy	if (sc) {
1967181643Skmacy		IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
1968181643Skmacy			sc->xn_ifno,
1969181643Skmacy			be_state_name[sc->xn_backend_state],
1970181643Skmacy			sc->xn_user_state ? "open" : "closed",
1971181643Skmacy			sc->xn_evtchn,
1972181643Skmacy			sc->xn_irq,
1973181643Skmacy			sc->xn_tx_if,
1974181643Skmacy			sc->xn_rx_if);
1975181643Skmacy	} else {
1976181643Skmacy		IPRINTK("<vif NULL>\n");
1977181643Skmacy	}
1978181643Skmacy#endif
1979181643Skmacy}
1980181643Skmacy
1981181643Skmacy/** Create a network device.
1982181643Skmacy * @param handle device handle
1983181643Skmacy */
1984185605Skmacyint
1985185605Skmacycreate_netdev(device_t dev)
1986181643Skmacy{
1987181643Skmacy	int i;
1988181643Skmacy	struct netfront_info *np;
1989181643Skmacy	int err;
1990181643Skmacy	struct ifnet *ifp;
1991181643Skmacy
1992185605Skmacy	np = device_get_softc(dev);
1993181643Skmacy
1994181643Skmacy	np->xbdev         = dev;
1995181643Skmacy
1996181643Skmacy	XN_LOCK_INIT(np, xennetif);
1997199997Sgibbs
1998199997Sgibbs	ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts);
1999199997Sgibbs	ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL);
2000199997Sgibbs	ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL);
2001199997Sgibbs
2002181643Skmacy	np->rx_target     = RX_MIN_TARGET;
2003181643Skmacy	np->rx_min_target = RX_MIN_TARGET;
2004181643Skmacy	np->rx_max_target = RX_MAX_TARGET;
2005181643Skmacy
2006181643Skmacy	/* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
2007181643Skmacy	for (i = 0; i <= NET_TX_RING_SIZE; i++) {
2008181643Skmacy		np->tx_mbufs[i] = (void *) ((u_long) i+1);
2009214077Sgibbs		np->grant_tx_ref[i] = GRANT_REF_INVALID;
2010181643Skmacy	}
2011208901Sken	np->tx_mbufs[NET_TX_RING_SIZE] = (void *)0;
2012208901Sken
2013181643Skmacy	for (i = 0; i <= NET_RX_RING_SIZE; i++) {
2014208901Sken
2015181643Skmacy		np->rx_mbufs[i] = NULL;
2016214077Sgibbs		np->grant_rx_ref[i] = GRANT_REF_INVALID;
2017181643Skmacy	}
2018181643Skmacy	/* A grant for every tx ring slot */
2019208901Sken	if (gnttab_alloc_grant_references(NET_TX_RING_SIZE,
2020208901Sken					  &np->gref_tx_head) != 0) {
2021204158Skmacy		IPRINTK("#### netfront can't alloc tx grant refs\n");
2022181643Skmacy		err = ENOMEM;
2023181643Skmacy		goto exit;
2024181643Skmacy	}
2025181643Skmacy	/* A grant for every rx ring slot */
2026181643Skmacy	if (gnttab_alloc_grant_references(RX_MAX_TARGET,
2027208901Sken					  &np->gref_rx_head) != 0) {
2028204158Skmacy		WPRINTK("#### netfront can't alloc rx grant refs\n");
2029181643Skmacy		gnttab_free_grant_references(np->gref_tx_head);
2030181643Skmacy		err = ENOMEM;
2031181643Skmacy		goto exit;
2032181643Skmacy	}
2033181643Skmacy
2034181643Skmacy	err = xen_net_read_mac(dev, np->mac);
2035181643Skmacy	if (err) {
2036185605Skmacy		xenbus_dev_fatal(dev, err, "parsing %s/mac",
2037185605Skmacy		    xenbus_get_node(dev));
2038181643Skmacy		goto out;
2039181643Skmacy	}
2040181643Skmacy
2041181643Skmacy	/* Set up ifnet structure */
2042185605Skmacy	ifp = np->xn_ifp = if_alloc(IFT_ETHER);
2043181643Skmacy    	ifp->if_softc = np;
2044185605Skmacy    	if_initname(ifp, "xn",  device_get_unit(dev));
2045186557Skmacy    	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2046181643Skmacy    	ifp->if_ioctl = xn_ioctl;
2047181643Skmacy    	ifp->if_output = ether_output;
2048181643Skmacy    	ifp->if_start = xn_start;
2049204158Skmacy#ifdef notyet
2050204158Skmacy    	ifp->if_watchdog = xn_watchdog;
2051204158Skmacy#endif
2052181643Skmacy    	ifp->if_init = xn_ifinit;
2053181643Skmacy    	ifp->if_mtu = ETHERMTU;
2054181643Skmacy    	ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
2055181643Skmacy
2056181643Skmacy    	ifp->if_hwassist = XN_CSUM_FEATURES;
2057181643Skmacy    	ifp->if_capabilities = IFCAP_HWCSUM;
2058189699Sdfr#if __FreeBSD_version >= 700000
2059208901Sken	ifp->if_capabilities |= IFCAP_TSO4;
2060189699Sdfr	if (xn_enable_lro) {
2061189699Sdfr		int err = tcp_lro_init(&np->xn_lro);
2062189699Sdfr		if (err) {
2063189699Sdfr			device_printf(dev, "LRO initialization failed\n");
2064189699Sdfr			goto exit;
2065189699Sdfr		}
2066189699Sdfr		np->xn_lro.ifp = ifp;
2067189699Sdfr		ifp->if_capabilities |= IFCAP_LRO;
2068189699Sdfr	}
2069189699Sdfr#endif
2070181643Skmacy    	ifp->if_capenable = ifp->if_capabilities;
2071181643Skmacy
2072181643Skmacy    	ether_ifattach(ifp, np->mac);
2073181643Skmacy    	callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE);
2074181643Skmacy	netfront_carrier_off(np);
2075181643Skmacy
2076181643Skmacy	return (0);
2077181643Skmacy
2078181643Skmacyexit:
2079181643Skmacy	gnttab_free_grant_references(np->gref_tx_head);
2080181643Skmacyout:
2081181643Skmacy	panic("do something smart");
2082181643Skmacy
2083181643Skmacy}
2084181643Skmacy
2085181643Skmacy/**
2086181643Skmacy * Handle the change of state of the backend to Closing.  We must delete our
2087181643Skmacy * device-layer structures now, to ensure that writes are flushed through to
2088181643Skmacy * the backend.  Once is this done, we can switch to Closed in
2089181643Skmacy * acknowledgement.
2090181643Skmacy */
2091181643Skmacy#if 0
2092199997Sgibbsstatic void
2093199997Sgibbsnetfront_closing(device_t dev)
2094181643Skmacy{
2095181643Skmacy#if 0
2096181643Skmacy	struct netfront_info *info = dev->dev_driver_data;
2097181643Skmacy
2098181643Skmacy	DPRINTK("netfront_closing: %s removed\n", dev->nodename);
2099181643Skmacy
2100181643Skmacy	close_netdev(info);
2101181643Skmacy#endif
2102181643Skmacy	xenbus_switch_state(dev, XenbusStateClosed);
2103181643Skmacy}
2104181643Skmacy#endif
2105181643Skmacy
2106199997Sgibbsstatic int
2107199997Sgibbsnetfront_detach(device_t dev)
2108181643Skmacy{
2109185605Skmacy	struct netfront_info *info = device_get_softc(dev);
2110181643Skmacy
2111185605Skmacy	DPRINTK("%s\n", xenbus_get_node(dev));
2112181643Skmacy
2113181643Skmacy	netif_free(info);
2114181643Skmacy
2115181643Skmacy	return 0;
2116181643Skmacy}
2117181643Skmacy
2118199997Sgibbsstatic void
2119199997Sgibbsnetif_free(struct netfront_info *info)
2120181643Skmacy{
2121181643Skmacy	netif_disconnect_backend(info);
2122181643Skmacy#if 0
2123181643Skmacy	close_netdev(info);
2124181643Skmacy#endif
2125181643Skmacy}
2126181643Skmacy
2127199997Sgibbsstatic void
2128199997Sgibbsnetif_disconnect_backend(struct netfront_info *info)
2129181643Skmacy{
2130186557Skmacy	XN_RX_LOCK(info);
2131186557Skmacy	XN_TX_LOCK(info);
2132186557Skmacy	netfront_carrier_off(info);
2133186557Skmacy	XN_TX_UNLOCK(info);
2134186557Skmacy	XN_RX_UNLOCK(info);
2135186557Skmacy
2136181643Skmacy	end_access(info->tx_ring_ref, info->tx.sring);
2137181643Skmacy	end_access(info->rx_ring_ref, info->rx.sring);
2138214077Sgibbs	info->tx_ring_ref = GRANT_REF_INVALID;
2139214077Sgibbs	info->rx_ring_ref = GRANT_REF_INVALID;
2140181643Skmacy	info->tx.sring = NULL;
2141181643Skmacy	info->rx.sring = NULL;
2142181643Skmacy
2143181643Skmacy	if (info->irq)
2144186557Skmacy		unbind_from_irqhandler(info->irq);
2145186557Skmacy
2146181643Skmacy	info->irq = 0;
2147181643Skmacy}
2148181643Skmacy
2149181643Skmacy
2150199997Sgibbsstatic void
2151199997Sgibbsend_access(int ref, void *page)
2152181643Skmacy{
2153214077Sgibbs	if (ref != GRANT_REF_INVALID)
2154183375Skmacy		gnttab_end_foreign_access(ref, page);
2155181643Skmacy}
2156181643Skmacy
2157199997Sgibbsstatic int
2158199997Sgibbsxn_ifmedia_upd(struct ifnet *ifp)
2159199997Sgibbs{
2160199997Sgibbs	return (0);
2161199997Sgibbs}
2162199997Sgibbs
2163199997Sgibbsstatic void
2164199997Sgibbsxn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
2165199997Sgibbs{
2166199997Sgibbs	ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE;
2167199997Sgibbs	ifmr->ifm_active = IFM_ETHER|IFM_MANUAL;
2168199997Sgibbs}
2169199997Sgibbs
2170181643Skmacy/* ** Driver registration ** */
2171185605Skmacystatic device_method_t netfront_methods[] = {
2172185605Skmacy	/* Device interface */
2173185605Skmacy	DEVMETHOD(device_probe,         netfront_probe),
2174185605Skmacy	DEVMETHOD(device_attach,        netfront_attach),
2175185605Skmacy	DEVMETHOD(device_detach,        netfront_detach),
2176185605Skmacy	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
2177185605Skmacy	DEVMETHOD(device_suspend,       bus_generic_suspend),
2178185605Skmacy	DEVMETHOD(device_resume,        netfront_resume),
2179185605Skmacy
2180185605Skmacy	/* Xenbus interface */
2181214077Sgibbs	DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed),
2182181643Skmacy
2183185605Skmacy	{ 0, 0 }
2184185605Skmacy};
2185181643Skmacy
2186185605Skmacystatic driver_t netfront_driver = {
2187185605Skmacy	"xn",
2188185605Skmacy	netfront_methods,
2189185605Skmacy	sizeof(struct netfront_info),
2190185605Skmacy};
2191185605Skmacydevclass_t netfront_devclass;
2192185605Skmacy
2193214077SgibbsDRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, 0, 0);
2194