netback.c revision 228471
1/*
2 * Copyright (c) 2006, Cisco Systems, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors
15 *    may be used to endorse or promote products derived from this software
16 *    without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/sys/dev/xen/netback/netback.c 228471 2011-12-13 14:06:01Z ed $");
33#include "opt_sctp.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/sockio.h>
38#include <sys/mbuf.h>
39#include <sys/malloc.h>
40#include <sys/kernel.h>
41#include <sys/socket.h>
42#include <sys/queue.h>
43#include <sys/taskqueue.h>
44
45#include <sys/module.h>
46#include <sys/bus.h>
47#include <sys/sysctl.h>
48
49#include <net/if.h>
50#include <net/if_arp.h>
51#include <net/if_types.h>
52#include <net/ethernet.h>
53#include <net/if_bridgevar.h>
54
55#include <netinet/in_systm.h>
56#include <netinet/in.h>
57#include <netinet/in_var.h>
58#include <netinet/ip.h>
59#include <netinet/tcp.h>
60#include <netinet/udp.h>
61#ifdef SCTP
62#include <netinet/sctp.h>
63#include <netinet/sctp_crc32.h>
64#endif
65
66#include <vm/vm_extern.h>
67#include <vm/vm_kern.h>
68
69#include <machine/in_cksum.h>
70#include <machine/xen-os.h>
71#include <machine/hypervisor.h>
72#include <machine/hypervisor-ifs.h>
73#include <machine/xen_intr.h>
74#include <machine/evtchn.h>
75#include <machine/xenbus.h>
76#include <machine/gnttab.h>
77#include <machine/xen-public/memory.h>
78#include <dev/xen/xenbus/xenbus_comms.h>
79
80
81#ifdef XEN_NETBACK_DEBUG
82#define DPRINTF(fmt, args...) \
83    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
84#else
85#define DPRINTF(fmt, args...) ((void)0)
86#endif
87
88#ifdef XEN_NETBACK_DEBUG_LOTS
89#define DDPRINTF(fmt, args...) \
90    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
91#define DPRINTF_MBUF(_m) print_mbuf(_m, 0)
92#define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len)
93#else
94#define DDPRINTF(fmt, args...) ((void)0)
95#define DPRINTF_MBUF(_m) ((void)0)
96#define DPRINTF_MBUF_LEN(_m, _len) ((void)0)
97#endif
98
99#define WPRINTF(fmt, args...) \
100    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
101
102#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
103#define BUG_ON PANIC_IF
104
105#define IFNAME(_np) (_np)->ifp->if_xname
106
107#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
108#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
109
110struct ring_ref {
111	vm_offset_t va;
112	grant_handle_t handle;
113	uint64_t bus_addr;
114};
115
116typedef struct netback_info {
117
118	/* Schedule lists */
119	STAILQ_ENTRY(netback_info) next_tx;
120	STAILQ_ENTRY(netback_info) next_rx;
121	int on_tx_sched_list;
122	int on_rx_sched_list;
123
124	struct xenbus_device *xdev;
125	XenbusState frontend_state;
126
127	domid_t domid;
128	int handle;
129	char *bridge;
130
131	int rings_connected;
132	struct ring_ref tx_ring_ref;
133	struct ring_ref rx_ring_ref;
134	netif_tx_back_ring_t tx;
135	netif_rx_back_ring_t rx;
136	evtchn_port_t evtchn;
137	int irq;
138	void *irq_cookie;
139
140	struct ifnet *ifp;
141	int ref_cnt;
142
143	device_t ndev;
144	int attached;
145} netif_t;
146
147
148#define MAX_PENDING_REQS 256
149#define PKT_PROT_LEN 64
150
151static struct {
152	netif_tx_request_t req;
153	netif_t *netif;
154} pending_tx_info[MAX_PENDING_REQS];
155static uint16_t pending_ring[MAX_PENDING_REQS];
156typedef unsigned int PEND_RING_IDX;
157#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
158static PEND_RING_IDX pending_prod, pending_cons;
159#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
160
161static unsigned long mmap_vstart;
162#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
163
164/* Freed TX mbufs get batched on this ring before return to pending_ring. */
165static uint16_t dealloc_ring[MAX_PENDING_REQS];
166static PEND_RING_IDX dealloc_prod, dealloc_cons;
167
168static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
169static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
170static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
171
172static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
173static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
174static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
175
176static struct task net_tx_task, net_rx_task;
177static struct callout rx_task_callout;
178
179static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list =
180	STAILQ_HEAD_INITIALIZER(tx_sched_list);
181static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list =
182	STAILQ_HEAD_INITIALIZER(rx_sched_list);
183static struct mtx tx_sched_list_lock;
184static struct mtx rx_sched_list_lock;
185
186static int vif_unit_maker = 0;
187
188/* Protos */
189static void netback_start(struct ifnet *ifp);
190static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
191static int vif_add_dev(struct xenbus_device *xdev);
192static void disconnect_rings(netif_t *netif);
193
194#ifdef XEN_NETBACK_DEBUG_LOTS
195/* Debug code to display the contents of an mbuf */
196static void
197print_mbuf(struct mbuf *m, int max)
198{
199	int i, j=0;
200	printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len);
201	for (; m; m = m->m_next) {
202		unsigned char *d = m->m_data;
203		for (i=0; i < m->m_len; i++) {
204			if (max && j == max)
205				break;
206			if ((j++ % 16) == 0)
207				printf("\n%04x:", j);
208			printf(" %02x", d[i]);
209		}
210	}
211	printf("\n");
212}
213#endif
214
215
216#define MAX_MFN_ALLOC 64
217static unsigned long mfn_list[MAX_MFN_ALLOC];
218static unsigned int alloc_index = 0;
219
220static unsigned long
221alloc_mfn(void)
222{
223	unsigned long mfn = 0;
224	struct xen_memory_reservation reservation = {
225		.extent_start = mfn_list,
226		.nr_extents   = MAX_MFN_ALLOC,
227		.extent_order = 0,
228		.domid        = DOMID_SELF
229	};
230	if ( unlikely(alloc_index == 0) )
231		alloc_index = HYPERVISOR_memory_op(
232			XENMEM_increase_reservation, &reservation);
233	if ( alloc_index != 0 )
234		mfn = mfn_list[--alloc_index];
235	return mfn;
236}
237
238static unsigned long
239alloc_empty_page_range(unsigned long nr_pages)
240{
241	void *pages;
242	int i = 0, j = 0;
243	multicall_entry_t mcl[17];
244	unsigned long mfn_list[16];
245	struct xen_memory_reservation reservation = {
246		.extent_start = mfn_list,
247		.nr_extents   = 0,
248		.address_bits = 0,
249		.extent_order = 0,
250		.domid        = DOMID_SELF
251	};
252
253	pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
254	if (pages == NULL)
255		return 0;
256
257	memset(mcl, 0, sizeof(mcl));
258
259	while (i < nr_pages) {
260		unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
261
262		mcl[j].op = __HYPERVISOR_update_va_mapping;
263		mcl[j].args[0] = va;
264
265		mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
266
267		xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;
268
269		if (j == 16 || i == nr_pages) {
270			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;
271
272			reservation.nr_extents = j;
273
274			mcl[j].op = __HYPERVISOR_memory_op;
275			mcl[j].args[0] = XENMEM_decrease_reservation;
276			mcl[j].args[1] =  (unsigned long)&reservation;
277
278			(void)HYPERVISOR_multicall(mcl, j+1);
279
280			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
281			j = 0;
282		}
283	}
284
285	return (unsigned long)pages;
286}
287
288#ifdef XEN_NETBACK_FIXUP_CSUM
289static void
290fixup_checksum(struct mbuf *m)
291{
292	struct ether_header *eh = mtod(m, struct ether_header *);
293	struct ip *ip = (struct ip *)(eh + 1);
294	int iphlen = ip->ip_hl << 2;
295	int iplen = ntohs(ip->ip_len);
296
297	if ((m->m_pkthdr.csum_flags & CSUM_TCP)) {
298		struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen);
299		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
300			htons(IPPROTO_TCP + (iplen - iphlen)));
301		th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen);
302		m->m_pkthdr.csum_flags &= ~CSUM_TCP;
303#ifdef SCTP
304	} else if (sw_csum & CSUM_SCTP) {
305		sctp_delayed_cksum(m, iphlen);
306		sw_csum &= ~CSUM_SCTP;
307#endif
308	} else {
309		u_short csum;
310		struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen);
311		uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
312			htons(IPPROTO_UDP + (iplen - iphlen)));
313		if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0)
314			csum = 0xffff;
315		uh->uh_sum = csum;
316		m->m_pkthdr.csum_flags &= ~CSUM_UDP;
317	}
318}
319#endif
320
321/* Add the interface to the specified bridge */
322static int
323add_to_bridge(struct ifnet *ifp, char *bridge)
324{
325	struct ifdrv ifd;
326	struct ifbreq ifb;
327	struct ifnet *ifp_bridge = ifunit(bridge);
328
329	if (!ifp_bridge)
330		return ENOENT;
331
332	bzero(&ifd, sizeof(ifd));
333	bzero(&ifb, sizeof(ifb));
334
335	strcpy(ifb.ifbr_ifsname, ifp->if_xname);
336	strcpy(ifd.ifd_name, ifp->if_xname);
337	ifd.ifd_cmd = BRDGADD;
338	ifd.ifd_len = sizeof(ifb);
339	ifd.ifd_data = &ifb;
340
341	return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd);
342
343}
344
345static int
346netif_create(int handle, struct xenbus_device *xdev, char *bridge)
347{
348	netif_t *netif;
349	struct ifnet *ifp;
350
351	netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO);
352	if (!netif)
353		return ENOMEM;
354
355	netif->ref_cnt = 1;
356	netif->handle = handle;
357	netif->domid = xdev->otherend_id;
358	netif->xdev = xdev;
359	netif->bridge = bridge;
360	xdev->data = netif;
361
362	/* Set up ifnet structure */
363	ifp = netif->ifp = if_alloc(IFT_ETHER);
364	if (!ifp) {
365		if (bridge)
366			free(bridge, M_DEVBUF);
367		free(netif, M_DEVBUF);
368		return ENOMEM;
369	}
370
371	ifp->if_softc = netif;
372	if_initname(ifp, "vif",
373		atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ );
374	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
375	ifp->if_output = ether_output;
376	ifp->if_start = netback_start;
377	ifp->if_ioctl = netback_ioctl;
378	ifp->if_mtu = ETHERMTU;
379	ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
380
381	DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle);
382
383	return 0;
384}
385
386static void
387netif_get(netif_t *netif)
388{
389	atomic_add_int(&netif->ref_cnt, 1);
390}
391
392static void
393netif_put(netif_t *netif)
394{
395	if (atomic_fetchadd_int(&netif->ref_cnt, -1) == 1) {
396		DPRINTF("%s\n", IFNAME(netif));
397		disconnect_rings(netif);
398		if (netif->ifp) {
399			if_free(netif->ifp);
400			netif->ifp = NULL;
401		}
402		if (netif->bridge)
403			free(netif->bridge, M_DEVBUF);
404		free(netif, M_DEVBUF);
405	}
406}
407
408static int
409netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
410{
411	switch (cmd) {
412	case SIOCSIFFLAGS:
413	DDPRINTF("%s cmd=SIOCSIFFLAGS flags=%x\n",
414			IFNAME((struct netback_info *)ifp->if_softc), ((struct ifreq *)data)->ifr_flags);
415		return 0;
416	}
417
418	DDPRINTF("%s cmd=%lx\n", IFNAME((struct netback_info *)ifp->if_softc), cmd);
419
420	return ether_ioctl(ifp, cmd, data);
421}
422
423static inline void
424maybe_schedule_tx_action(void)
425{
426	smp_mb();
427	if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && !STAILQ_EMPTY(&tx_sched_list))
428		taskqueue_enqueue(taskqueue_swi, &net_tx_task);
429}
430
431/* Removes netif from front of list and does not call netif_put() (caller must) */
432static netif_t *
433remove_from_tx_schedule_list(void)
434{
435	netif_t *netif;
436
437	mtx_lock(&tx_sched_list_lock);
438
439	if ((netif = STAILQ_FIRST(&tx_sched_list))) {
440		STAILQ_REMOVE(&tx_sched_list, netif, netback_info, next_tx);
441		STAILQ_NEXT(netif, next_tx) = NULL;
442		netif->on_tx_sched_list = 0;
443	}
444
445	mtx_unlock(&tx_sched_list_lock);
446
447	return netif;
448}
449
450/* Adds netif to end of list and calls netif_get() */
451static void
452add_to_tx_schedule_list_tail(netif_t *netif)
453{
454	if (netif->on_tx_sched_list)
455		return;
456
457	mtx_lock(&tx_sched_list_lock);
458	if (!netif->on_tx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
459		netif_get(netif);
460		STAILQ_INSERT_TAIL(&tx_sched_list, netif, next_tx);
461		netif->on_tx_sched_list = 1;
462	}
463	mtx_unlock(&tx_sched_list_lock);
464}
465
466/*
467 * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
468 * If this driver is pipelining transmit requests then we can be very
469 * aggressive in avoiding new-packet notifications -- frontend only needs to
470 * send a notification if there are no outstanding unreceived responses.
471 * If we may be buffer transmit buffers for any reason then we must be rather
472 * more conservative and treat this as the final check for pending work.
473 */
474static void
475netif_schedule_tx_work(netif_t *netif)
476{
477	int more_to_do;
478
479#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
480	more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
481#else
482	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
483#endif
484
485	if (more_to_do) {
486		DDPRINTF("Adding %s to tx sched list\n", IFNAME(netif));
487		add_to_tx_schedule_list_tail(netif);
488		maybe_schedule_tx_action();
489	}
490}
491
492static struct mtx dealloc_lock;
493MTX_SYSINIT(netback_dealloc, &dealloc_lock, "DEALLOC LOCK", MTX_SPIN | MTX_NOWITNESS);
494
495static void
496netif_idx_release(uint16_t pending_idx)
497{
498	mtx_lock_spin(&dealloc_lock);
499	dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
500	mtx_unlock_spin(&dealloc_lock);
501
502	taskqueue_enqueue(taskqueue_swi, &net_tx_task);
503}
504
505static void
506make_tx_response(netif_t *netif,
507				 uint16_t    id,
508				 int8_t      st)
509{
510	RING_IDX i = netif->tx.rsp_prod_pvt;
511	netif_tx_response_t *resp;
512	int notify;
513
514	resp = RING_GET_RESPONSE(&netif->tx, i);
515	resp->id     = id;
516	resp->status = st;
517
518	netif->tx.rsp_prod_pvt = ++i;
519	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
520	if (notify)
521		notify_remote_via_irq(netif->irq);
522
523#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
524	if (i == netif->tx.req_cons) {
525		int more_to_do;
526		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
527		if (more_to_do)
528			add_to_tx_schedule_list_tail(netif);
529	}
530#endif
531}
532
533static inline void
534net_tx_action_dealloc(void)
535{
536	gnttab_unmap_grant_ref_t *gop;
537	uint16_t pending_idx;
538	PEND_RING_IDX dc, dp;
539	netif_t *netif;
540	int ret;
541
542	dc = dealloc_cons;
543	dp = dealloc_prod;
544
545	/*
546	 * Free up any grants we have finished using
547	 */
548	gop = tx_unmap_ops;
549	while (dc != dp) {
550		pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
551		gop->host_addr    = MMAP_VADDR(pending_idx);
552		gop->dev_bus_addr = 0;
553		gop->handle       = grant_tx_handle[pending_idx];
554		gop++;
555	}
556	ret = HYPERVISOR_grant_table_op(
557		GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
558	BUG_ON(ret);
559
560	while (dealloc_cons != dp) {
561		pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
562
563		netif = pending_tx_info[pending_idx].netif;
564
565		make_tx_response(netif, pending_tx_info[pending_idx].req.id,
566				 NETIF_RSP_OKAY);
567
568		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
569
570		netif_put(netif);
571	}
572}
573
574static void
575netif_page_release(void *buf, void *args)
576{
577	uint16_t pending_idx = (unsigned int)args;
578
579	DDPRINTF("pending_idx=%u\n", pending_idx);
580
581	KASSERT(pending_idx < MAX_PENDING_REQS, ("%s: bad index %u", __func__, pending_idx));
582
583	netif_idx_release(pending_idx);
584}
585
586static void
587net_tx_action(void *context, int pending)
588{
589	struct mbuf *m;
590	netif_t *netif;
591	netif_tx_request_t txreq;
592	uint16_t pending_idx;
593	RING_IDX i;
594	gnttab_map_grant_ref_t *mop;
595	int ret, work_to_do;
596	struct mbuf *txq = NULL, *txq_last = NULL;
597
598	if (dealloc_cons != dealloc_prod)
599		net_tx_action_dealloc();
600
601	mop = tx_map_ops;
602	while ((NR_PENDING_REQS < MAX_PENDING_REQS) && !STAILQ_EMPTY(&tx_sched_list)) {
603
604		/* Get a netif from the list with work to do. */
605		netif = remove_from_tx_schedule_list();
606
607		DDPRINTF("Processing %s (prod=%u, cons=%u)\n",
608				IFNAME(netif), netif->tx.sring->req_prod, netif->tx.req_cons);
609
610		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
611		if (!work_to_do) {
612			netif_put(netif);
613			continue;
614		}
615
616		i = netif->tx.req_cons;
617		rmb(); /* Ensure that we see the request before we copy it. */
618		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
619
620		/* If we want credit-based scheduling, coud add it here - WORK */
621
622		netif->tx.req_cons++;
623
624		netif_schedule_tx_work(netif);
625
626		if (unlikely(txreq.size < ETHER_HDR_LEN) ||
627		    unlikely(txreq.size > (ETHER_MAX_LEN-ETHER_CRC_LEN))) {
628			WPRINTF("Bad packet size: %d\n", txreq.size);
629			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
630			netif_put(netif);
631			continue;
632		}
633
634		/* No crossing a page as the payload mustn't fragment. */
635		if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
636			WPRINTF("txreq.offset: %x, size: %u, end: %u\n",
637				txreq.offset, txreq.size,
638				(txreq.offset & PAGE_MASK) + txreq.size);
639			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
640			netif_put(netif);
641			continue;
642		}
643
644		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
645
646		MGETHDR(m, M_DONTWAIT, MT_DATA);
647		if (!m) {
648			WPRINTF("Failed to allocate mbuf\n");
649			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
650			netif_put(netif);
651			break;
652		}
653		m->m_pkthdr.rcvif = netif->ifp;
654
655		if ((m->m_pkthdr.len = txreq.size) > PKT_PROT_LEN) {
656			struct mbuf *n;
657			MGET(n, M_DONTWAIT, MT_DATA);
658			if (!(m->m_next = n)) {
659				m_freem(m);
660				WPRINTF("Failed to allocate second mbuf\n");
661				make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
662				netif_put(netif);
663				break;
664			}
665			n->m_len = txreq.size - PKT_PROT_LEN;
666			m->m_len = PKT_PROT_LEN;
667		} else
668			m->m_len = txreq.size;
669
670		mop->host_addr = MMAP_VADDR(pending_idx);
671		mop->dom       = netif->domid;
672		mop->ref       = txreq.gref;
673		mop->flags     = GNTMAP_host_map | GNTMAP_readonly;
674		mop++;
675
676		memcpy(&pending_tx_info[pending_idx].req,
677		       &txreq, sizeof(txreq));
678		pending_tx_info[pending_idx].netif = netif;
679		*((uint16_t *)m->m_data) = pending_idx;
680
681		if (txq_last)
682			txq_last->m_nextpkt = m;
683		else
684			txq = m;
685		txq_last = m;
686
687		pending_cons++;
688
689		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
690			break;
691	}
692
693	if (!txq)
694		return;
695
696	ret = HYPERVISOR_grant_table_op(
697		GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
698	BUG_ON(ret);
699
700	mop = tx_map_ops;
701	while ((m = txq) != NULL) {
702		caddr_t data;
703
704		txq = m->m_nextpkt;
705		m->m_nextpkt = NULL;
706
707		pending_idx = *((uint16_t *)m->m_data);
708		netif       = pending_tx_info[pending_idx].netif;
709		memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
710
711		/* Check the remap error code. */
712		if (unlikely(mop->status)) {
713			WPRINTF("#### netback grant fails\n");
714			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
715			netif_put(netif);
716			m_freem(m);
717			mop++;
718			pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
719			continue;
720		}
721
722#if 0
723		/* Can't do this in FreeBSD since vtophys() returns the pfn */
724		/* of the remote domain who loaned us the machine page - DPT */
725		xen_phys_machine[(vtophys(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT)] =
726			mop->dev_bus_addr >> PAGE_SHIFT;
727#endif
728		grant_tx_handle[pending_idx] = mop->handle;
729
730		/* Setup data in mbuf (lengths are already set) */
731		data = (caddr_t)(MMAP_VADDR(pending_idx)|txreq.offset);
732		bcopy(data, m->m_data, m->m_len);
733		if (m->m_next) {
734			struct mbuf *n = m->m_next;
735			MEXTADD(n, MMAP_VADDR(pending_idx), PAGE_SIZE, netif_page_release,
736				(void *)(unsigned int)pending_idx, M_RDONLY, EXT_NET_DRV);
737			n->m_data = &data[PKT_PROT_LEN];
738		} else {
739			/* Schedule a response immediately. */
740			netif_idx_release(pending_idx);
741		}
742
743		if ((txreq.flags & NETTXF_data_validated)) {
744			/* Tell the stack the checksums are okay */
745			m->m_pkthdr.csum_flags |=
746				(CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
747			m->m_pkthdr.csum_data = 0xffff;
748		}
749
750		/* If necessary, inform stack to compute the checksums if it forwards the packet */
751		if ((txreq.flags & NETTXF_csum_blank)) {
752			struct ether_header *eh = mtod(m, struct ether_header *);
753			if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
754				struct ip *ip = (struct ip *)&m->m_data[14];
755				if (ip->ip_p == IPPROTO_TCP)
756					m->m_pkthdr.csum_flags |= CSUM_TCP;
757				else if (ip->ip_p == IPPROTO_UDP)
758					m->m_pkthdr.csum_flags |= CSUM_UDP;
759			}
760		}
761
762		netif->ifp->if_ibytes += m->m_pkthdr.len;
763		netif->ifp->if_ipackets++;
764
765		DDPRINTF("RECV %d bytes from %s (cflags=%x)\n",
766			m->m_pkthdr.len, IFNAME(netif), m->m_pkthdr.csum_flags);
767		DPRINTF_MBUF_LEN(m, 128);
768
769		(*netif->ifp->if_input)(netif->ifp, m);
770
771		mop++;
772	}
773}
774
775/* Handle interrupt from a frontend */
776static void
777netback_intr(void *arg)
778{
779	netif_t *netif = arg;
780	DDPRINTF("%s\n", IFNAME(netif));
781	add_to_tx_schedule_list_tail(netif);
782	maybe_schedule_tx_action();
783}
784
785/* Removes netif from front of list and does not call netif_put() (caller must) */
786static netif_t *
787remove_from_rx_schedule_list(void)
788{
789	netif_t *netif;
790
791	mtx_lock(&rx_sched_list_lock);
792
793	if ((netif = STAILQ_FIRST(&rx_sched_list))) {
794		STAILQ_REMOVE(&rx_sched_list, netif, netback_info, next_rx);
795		STAILQ_NEXT(netif, next_rx) = NULL;
796		netif->on_rx_sched_list = 0;
797	}
798
799	mtx_unlock(&rx_sched_list_lock);
800
801	return netif;
802}
803
804/* Adds netif to end of list and calls netif_get() */
805static void
806add_to_rx_schedule_list_tail(netif_t *netif)
807{
808	if (netif->on_rx_sched_list)
809		return;
810
811	mtx_lock(&rx_sched_list_lock);
812	if (!netif->on_rx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
813		netif_get(netif);
814		STAILQ_INSERT_TAIL(&rx_sched_list, netif, next_rx);
815		netif->on_rx_sched_list = 1;
816	}
817	mtx_unlock(&rx_sched_list_lock);
818}
819
820static int
821make_rx_response(netif_t *netif, uint16_t id, int8_t st,
822				 uint16_t offset, uint16_t size, uint16_t flags)
823{
824	RING_IDX i = netif->rx.rsp_prod_pvt;
825	netif_rx_response_t *resp;
826	int notify;
827
828	resp = RING_GET_RESPONSE(&netif->rx, i);
829	resp->offset     = offset;
830	resp->flags      = flags;
831	resp->id         = id;
832	resp->status     = (int16_t)size;
833	if (st < 0)
834		resp->status = (int16_t)st;
835
836	DDPRINTF("rx resp(%d): off=%x fl=%x id=%x stat=%d\n",
837		i, resp->offset, resp->flags, resp->id, resp->status);
838
839	netif->rx.rsp_prod_pvt = ++i;
840	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify);
841
842	return notify;
843}
844
845static int
846netif_rx(netif_t *netif)
847{
848	struct ifnet *ifp = netif->ifp;
849	struct mbuf *m;
850	multicall_entry_t *mcl;
851	mmu_update_t *mmu;
852	gnttab_transfer_t *gop;
853	unsigned long vdata, old_mfn, new_mfn;
854	struct mbuf *rxq = NULL, *rxq_last = NULL;
855	int ret, notify = 0, pkts_dequeued = 0;
856
857	DDPRINTF("%s\n", IFNAME(netif));
858
859	mcl = rx_mcl;
860	mmu = rx_mmu;
861	gop = grant_rx_op;
862
863	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
864
865		/* Quit if the target domain has no receive buffers */
866		if (netif->rx.req_cons == netif->rx.sring->req_prod)
867			break;
868
869		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
870		if (m == NULL)
871			break;
872
873		pkts_dequeued++;
874
875		/* Check if we need to copy the data */
876		if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) ||
877			(*m->m_ext.ref_cnt > 1) || m->m_next != NULL) {
878			struct mbuf *n;
879
880			DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n",
881				m->m_flags,
882				(m->m_flags & M_EXT) ? m->m_ext.ext_type : 0,
883				(m->m_flags & M_EXT) ? *m->m_ext.ref_cnt : 0,
884				(unsigned int)m->m_next);
885
886			/* Make copy */
887			MGETHDR(n, M_DONTWAIT, MT_DATA);
888			if (!n)
889				goto drop;
890
891			MCLGET(n, M_DONTWAIT);
892			if (!(n->m_flags & M_EXT)) {
893				m_freem(n);
894				goto drop;
895			}
896
897			/* Leave space at front and keep current alignment */
898			n->m_data += 16 + ((unsigned int)m->m_data & 0x3);
899
900			if (m->m_pkthdr.len > M_TRAILINGSPACE(n)) {
901				WPRINTF("pkt to big %d\n", m->m_pkthdr.len);
902				m_freem(n);
903				goto drop;
904			}
905			m_copydata(m, 0, m->m_pkthdr.len, n->m_data);
906			n->m_pkthdr.len = n->m_len = m->m_pkthdr.len;
907			n->m_pkthdr.csum_flags = (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA);
908			m_freem(m);
909			m = n;
910		}
911
912		vdata = (unsigned long)m->m_data;
913		old_mfn = vtomach(vdata) >> PAGE_SHIFT;
914
915		if ((new_mfn = alloc_mfn()) == 0)
916			goto drop;
917
918#ifdef XEN_NETBACK_FIXUP_CSUM
919		/* Check if we need to compute a checksum.  This happens */
920		/* when bridging from one domain to another. */
921		if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) ||
922			(m->m_pkthdr.csum_flags & CSUM_SCTP))
923			fixup_checksum(m);
924#endif
925
926		xen_phys_machine[(vtophys(vdata) >> PAGE_SHIFT)] = new_mfn;
927
928		mcl->op = __HYPERVISOR_update_va_mapping;
929		mcl->args[0] = vdata;
930		mcl->args[1] = (new_mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A;
931		mcl->args[2] = 0;
932		mcl->args[3] = 0;
933		mcl++;
934
935		gop->mfn = old_mfn;
936		gop->domid = netif->domid;
937		gop->ref = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons)->gref;
938		netif->rx.req_cons++;
939		gop++;
940
941		mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
942		mmu->val = vtophys(vdata) >> PAGE_SHIFT;
943		mmu++;
944
945		if (rxq_last)
946			rxq_last->m_nextpkt = m;
947		else
948			rxq = m;
949		rxq_last = m;
950
951		DDPRINTF("XMIT %d bytes to %s\n", m->m_pkthdr.len, IFNAME(netif));
952		DPRINTF_MBUF_LEN(m, 128);
953
954		/* Filled the batch queue? */
955		if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op))
956			break;
957
958		continue;
959	drop:
960		DDPRINTF("dropping pkt\n");
961		ifp->if_oerrors++;
962		m_freem(m);
963	}
964
965	if (mcl == rx_mcl)
966		return pkts_dequeued;
967
968	mcl->op = __HYPERVISOR_mmu_update;
969	mcl->args[0] = (unsigned long)rx_mmu;
970	mcl->args[1] = mmu - rx_mmu;
971	mcl->args[2] = 0;
972	mcl->args[3] = DOMID_SELF;
973	mcl++;
974
975	mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
976	ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
977	BUG_ON(ret != 0);
978
979	ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, gop - grant_rx_op);
980	BUG_ON(ret != 0);
981
982	mcl = rx_mcl;
983	gop = grant_rx_op;
984
985	while ((m = rxq) != NULL) {
986		int8_t status;
987		uint16_t id, flags = 0;
988
989		rxq = m->m_nextpkt;
990		m->m_nextpkt = NULL;
991
992		/* Rederive the machine addresses. */
993		new_mfn = mcl->args[1] >> PAGE_SHIFT;
994		old_mfn = gop->mfn;
995
996		ifp->if_obytes += m->m_pkthdr.len;
997		ifp->if_opackets++;
998
999		/* The update_va_mapping() must not fail. */
1000		BUG_ON(mcl->result != 0);
1001
1002		/* Setup flags */
1003		if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA))
1004			flags |= NETRXF_csum_blank | NETRXF_data_validated;
1005		else if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
1006			flags |= NETRXF_data_validated;
1007
1008		/* Check the reassignment error code. */
1009		status = NETIF_RSP_OKAY;
1010		if (gop->status != 0) {
1011			DPRINTF("Bad status %d from grant transfer to DOM%u\n",
1012				gop->status, netif->domid);
1013			/*
1014			 * Page no longer belongs to us unless GNTST_bad_page,
1015			 * but that should be a fatal error anyway.
1016			 */
1017			BUG_ON(gop->status == GNTST_bad_page);
1018			status = NETIF_RSP_ERROR;
1019		}
1020		id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id;
1021		notify |= make_rx_response(netif, id, status,
1022					(unsigned long)m->m_data & PAGE_MASK,
1023					m->m_pkthdr.len, flags);
1024
1025		m_freem(m);
1026		mcl++;
1027		gop++;
1028	}
1029
1030	if (notify)
1031		notify_remote_via_irq(netif->irq);
1032
1033	return pkts_dequeued;
1034}
1035
1036static void
1037rx_task_timer(void *arg)
1038{
1039	DDPRINTF("\n");
1040	taskqueue_enqueue(taskqueue_swi, &net_rx_task);
1041}
1042
1043static void
1044net_rx_action(void *context, int pending)
1045{
1046	netif_t *netif, *last_zero_work = NULL;
1047
1048	DDPRINTF("\n");
1049
1050	while ((netif = remove_from_rx_schedule_list())) {
1051		struct ifnet *ifp = netif->ifp;
1052
1053		if (netif == last_zero_work) {
1054			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1055				add_to_rx_schedule_list_tail(netif);
1056			netif_put(netif);
1057			if (!STAILQ_EMPTY(&rx_sched_list))
1058				callout_reset(&rx_task_callout, 1, rx_task_timer, NULL);
1059			break;
1060		}
1061
1062		if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1063			if (netif_rx(netif))
1064				last_zero_work = NULL;
1065			else if (!last_zero_work)
1066				last_zero_work = netif;
1067			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1068				add_to_rx_schedule_list_tail(netif);
1069		}
1070
1071		netif_put(netif);
1072	}
1073}
1074
1075static void
1076netback_start(struct ifnet *ifp)
1077{
1078	netif_t *netif = (netif_t *)ifp->if_softc;
1079
1080	DDPRINTF("%s\n", IFNAME(netif));
1081
1082	add_to_rx_schedule_list_tail(netif);
1083	taskqueue_enqueue(taskqueue_swi, &net_rx_task);
1084}
1085
1086/* Map a grant ref to a ring */
1087static int
1088map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring)
1089{
1090	struct gnttab_map_grant_ref op;
1091
1092	ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
1093	if (ring->va == 0)
1094		return ENOMEM;
1095
1096	op.host_addr = ring->va;
1097	op.flags = GNTMAP_host_map;
1098	op.ref = ref;
1099	op.dom = dom;
1100	HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
1101	if (op.status) {
1102		WPRINTF("grant table op err=%d\n", op.status);
1103		kmem_free(kernel_map, ring->va, PAGE_SIZE);
1104		ring->va = 0;
1105		return EACCES;
1106	}
1107
1108	ring->handle = op.handle;
1109	ring->bus_addr = op.dev_bus_addr;
1110
1111	return 0;
1112}
1113
1114/* Unmap grant ref for a ring */
1115static void
1116unmap_ring(struct ring_ref *ring)
1117{
1118	struct gnttab_unmap_grant_ref op;
1119
1120	op.host_addr = ring->va;
1121	op.dev_bus_addr = ring->bus_addr;
1122	op.handle = ring->handle;
1123	HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
1124	if (op.status)
1125		WPRINTF("grant table op err=%d\n", op.status);
1126
1127	kmem_free(kernel_map, ring->va, PAGE_SIZE);
1128	ring->va = 0;
1129}
1130
1131static int
1132connect_rings(netif_t *netif)
1133{
1134	struct xenbus_device *xdev = netif->xdev;
1135	netif_tx_sring_t *txs;
1136	netif_rx_sring_t *rxs;
1137	unsigned long tx_ring_ref, rx_ring_ref;
1138	evtchn_port_t evtchn;
1139	evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
1140	int err;
1141
1142	// Grab FE data and map his memory
1143	err = xenbus_gather(NULL, xdev->otherend,
1144			"tx-ring-ref", "%lu", &tx_ring_ref,
1145		    "rx-ring-ref", "%lu", &rx_ring_ref,
1146		    "event-channel", "%u", &evtchn, NULL);
1147	if (err) {
1148		xenbus_dev_fatal(xdev, err,
1149			"reading %s/ring-ref and event-channel",
1150			xdev->otherend);
1151		return err;
1152	}
1153
1154	err = map_ring(tx_ring_ref, netif->domid, &netif->tx_ring_ref);
1155	if (err) {
1156		xenbus_dev_fatal(xdev, err, "mapping tx ring");
1157		return err;
1158	}
1159	txs = (netif_tx_sring_t *)netif->tx_ring_ref.va;
1160	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
1161
1162	err = map_ring(rx_ring_ref, netif->domid, &netif->rx_ring_ref);
1163	if (err) {
1164		unmap_ring(&netif->tx_ring_ref);
1165		xenbus_dev_fatal(xdev, err, "mapping rx ring");
1166		return err;
1167	}
1168	rxs = (netif_rx_sring_t *)netif->rx_ring_ref.va;
1169	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
1170
1171	op.u.bind_interdomain.remote_dom = netif->domid;
1172	op.u.bind_interdomain.remote_port = evtchn;
1173	err = HYPERVISOR_event_channel_op(&op);
1174	if (err) {
1175		unmap_ring(&netif->tx_ring_ref);
1176		unmap_ring(&netif->rx_ring_ref);
1177		xenbus_dev_fatal(xdev, err, "binding event channel");
1178		return err;
1179	}
1180	netif->evtchn = op.u.bind_interdomain.local_port;
1181
1182	/* bind evtchn to irq handler */
1183	netif->irq =
1184		bind_evtchn_to_irqhandler(netif->evtchn, "netback",
1185			netback_intr, netif, INTR_TYPE_NET|INTR_MPSAFE, &netif->irq_cookie);
1186
1187	netif->rings_connected = 1;
1188
1189	DPRINTF("%s connected! evtchn=%d irq=%d\n",
1190		IFNAME(netif), netif->evtchn, netif->irq);
1191
1192	return 0;
1193}
1194
1195static void
1196disconnect_rings(netif_t *netif)
1197{
1198	DPRINTF("\n");
1199
1200	if (netif->rings_connected) {
1201		unbind_from_irqhandler(netif->irq, netif->irq_cookie);
1202		netif->irq = 0;
1203		unmap_ring(&netif->tx_ring_ref);
1204		unmap_ring(&netif->rx_ring_ref);
1205		netif->rings_connected = 0;
1206	}
1207}
1208
1209static void
1210connect(netif_t *netif)
1211{
1212	if (!netif->xdev ||
1213		!netif->attached ||
1214		netif->frontend_state != XenbusStateConnected) {
1215		return;
1216	}
1217
1218	if (!connect_rings(netif)) {
1219		xenbus_switch_state(netif->xdev, NULL, XenbusStateConnected);
1220
1221		/* Turn on interface */
1222		netif->ifp->if_drv_flags |= IFF_DRV_RUNNING;
1223		netif->ifp->if_flags |= IFF_UP;
1224	}
1225}
1226
1227static int
1228netback_remove(struct xenbus_device *xdev)
1229{
1230	netif_t *netif = xdev->data;
1231	device_t ndev;
1232
1233	DPRINTF("remove %s\n", xdev->nodename);
1234
1235	if ((ndev = netif->ndev)) {
1236		netif->ndev = NULL;
1237		mtx_lock(&Giant);
1238		device_detach(ndev);
1239		mtx_unlock(&Giant);
1240	}
1241
1242	xdev->data = NULL;
1243	netif->xdev = NULL;
1244	netif_put(netif);
1245
1246	return 0;
1247}
1248
1249/**
1250 * Entry point to this code when a new device is created.  Allocate the basic
1251 * structures and the ring buffers for communication with the frontend.
1252 * Switch to Connected state.
1253 */
1254static int
1255netback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id)
1256{
1257	int err;
1258	long handle;
1259	char *bridge;
1260
1261	DPRINTF("node=%s\n", xdev->nodename);
1262
1263	/* Grab the handle */
1264	err = xenbus_scanf(NULL, xdev->nodename, "handle", "%li", &handle);
1265	if (err != 1) {
1266		xenbus_dev_fatal(xdev, err, "reading handle");
1267		return err;
1268	}
1269
1270	/* Check for bridge */
1271	bridge = xenbus_read(NULL, xdev->nodename, "bridge", NULL);
1272	if (IS_ERR(bridge))
1273		bridge = NULL;
1274
1275	err = xenbus_switch_state(xdev, NULL, XenbusStateInitWait);
1276	if (err) {
1277		xenbus_dev_fatal(xdev, err, "writing switch state");
1278		return err;
1279	}
1280
1281	err = netif_create(handle, xdev, bridge);
1282	if (err) {
1283		xenbus_dev_fatal(xdev, err, "creating netif");
1284		return err;
1285	}
1286
1287	err = vif_add_dev(xdev);
1288	if (err) {
1289		netif_put((netif_t *)xdev->data);
1290		xenbus_dev_fatal(xdev, err, "adding vif device");
1291		return err;
1292	}
1293
1294	return 0;
1295}
1296
1297/**
1298 * We are reconnecting to the backend, due to a suspend/resume, or a backend
1299 * driver restart.  We tear down our netif structure and recreate it, but
1300 * leave the device-layer structures intact so that this is transparent to the
1301 * rest of the kernel.
1302 */
1303static int netback_resume(struct xenbus_device *xdev)
1304{
1305	DPRINTF("node=%s\n", xdev->nodename);
1306	return 0;
1307}
1308
1309
1310/**
1311 * Callback received when the frontend's state changes.
1312 */
1313static void frontend_changed(struct xenbus_device *xdev,
1314							 XenbusState frontend_state)
1315{
1316	netif_t *netif = xdev->data;
1317
1318	DPRINTF("state=%d\n", frontend_state);
1319
1320	netif->frontend_state = frontend_state;
1321
1322	switch (frontend_state) {
1323	case XenbusStateInitialising:
1324	case XenbusStateInitialised:
1325		break;
1326	case XenbusStateConnected:
1327		connect(netif);
1328		break;
1329	case XenbusStateClosing:
1330		xenbus_switch_state(xdev, NULL, XenbusStateClosing);
1331		break;
1332	case XenbusStateClosed:
1333		xenbus_remove_device(xdev);
1334		break;
1335	case XenbusStateUnknown:
1336	case XenbusStateInitWait:
1337		xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend",
1338						 frontend_state);
1339		break;
1340	}
1341}
1342
1343/* ** Driver registration ** */
1344
1345static struct xenbus_device_id netback_ids[] = {
1346	{ "vif" },
1347	{ "" }
1348};
1349
1350static struct xenbus_driver netback = {
1351	.name = "netback",
1352	.ids = netback_ids,
1353	.probe = netback_probe,
1354	.remove = netback_remove,
1355	.resume= netback_resume,
1356	.otherend_changed = frontend_changed,
1357};
1358
1359static void
1360netback_init(void *unused)
1361{
1362	callout_init(&rx_task_callout, CALLOUT_MPSAFE);
1363
1364	mmap_vstart = alloc_empty_page_range(MAX_PENDING_REQS);
1365	BUG_ON(!mmap_vstart);
1366
1367	pending_cons = 0;
1368	for (pending_prod = 0; pending_prod < MAX_PENDING_REQS; pending_prod++)
1369		pending_ring[pending_prod] = pending_prod;
1370
1371	TASK_INIT(&net_tx_task, 0, net_tx_action, NULL);
1372	TASK_INIT(&net_rx_task, 0, net_rx_action, NULL);
1373	mtx_init(&tx_sched_list_lock, "nb_tx_sched_lock", "netback tx sched lock", MTX_DEF);
1374	mtx_init(&rx_sched_list_lock, "nb_rx_sched_lock", "netback rx sched lock", MTX_DEF);
1375
1376	DPRINTF("registering %s\n", netback.name);
1377
1378	xenbus_register_backend(&netback);
1379}
1380
1381SYSINIT(xnbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, netback_init, NULL)
1382
1383static int
1384vif_add_dev(struct xenbus_device *xdev)
1385{
1386	netif_t *netif = xdev->data;
1387	device_t nexus, ndev;
1388	devclass_t dc;
1389	int err = 0;
1390
1391	mtx_lock(&Giant);
1392
1393	/* We will add a vif device as a child of nexus0 (for now) */
1394	if (!(dc = devclass_find("nexus")) ||
1395		!(nexus = devclass_get_device(dc, 0))) {
1396		WPRINTF("could not find nexus0!\n");
1397		err = ENOENT;
1398		goto done;
1399	}
1400
1401
1402	/* Create a newbus device representing the vif */
1403	ndev = BUS_ADD_CHILD(nexus, 0, "vif", netif->ifp->if_dunit);
1404	if (!ndev) {
1405		WPRINTF("could not create newbus device %s!\n", IFNAME(netif));
1406		err = EFAULT;
1407		goto done;
1408	}
1409
1410	netif_get(netif);
1411	device_set_ivars(ndev, netif);
1412	netif->ndev = ndev;
1413
1414	device_probe_and_attach(ndev);
1415
1416 done:
1417
1418	mtx_unlock(&Giant);
1419
1420	return err;
1421}
1422
1423enum {
1424	VIF_SYSCTL_DOMID,
1425	VIF_SYSCTL_HANDLE,
1426	VIF_SYSCTL_TXRING,
1427	VIF_SYSCTL_RXRING,
1428};
1429
1430static char *
1431vif_sysctl_ring_info(netif_t *netif, int cmd)
1432{
1433	char *buf = malloc(256, M_DEVBUF, M_WAITOK);
1434	if (buf) {
1435		if (!netif->rings_connected)
1436			sprintf(buf, "rings not connected\n");
1437		else if (cmd == VIF_SYSCTL_TXRING) {
1438			netif_tx_back_ring_t *tx = &netif->tx;
1439			sprintf(buf, "nr_ents=%x req_cons=%x"
1440					" req_prod=%x req_event=%x"
1441					" rsp_prod=%x rsp_event=%x",
1442					tx->nr_ents, tx->req_cons,
1443					tx->sring->req_prod, tx->sring->req_event,
1444					tx->sring->rsp_prod, tx->sring->rsp_event);
1445		} else {
1446			netif_rx_back_ring_t *rx = &netif->rx;
1447			sprintf(buf, "nr_ents=%x req_cons=%x"
1448					" req_prod=%x req_event=%x"
1449					" rsp_prod=%x rsp_event=%x",
1450					rx->nr_ents, rx->req_cons,
1451					rx->sring->req_prod, rx->sring->req_event,
1452					rx->sring->rsp_prod, rx->sring->rsp_event);
1453		}
1454	}
1455	return buf;
1456}
1457
1458static int
1459vif_sysctl_handler(SYSCTL_HANDLER_ARGS)
1460{
1461	device_t dev = (device_t)arg1;
1462	netif_t *netif = (netif_t *)device_get_ivars(dev);
1463	const char *value;
1464	char *buf = NULL;
1465	int err;
1466
1467	switch (arg2) {
1468	case VIF_SYSCTL_DOMID:
1469		return sysctl_handle_int(oidp, NULL, netif->domid, req);
1470	case VIF_SYSCTL_HANDLE:
1471		return sysctl_handle_int(oidp, NULL, netif->handle, req);
1472	case VIF_SYSCTL_TXRING:
1473	case VIF_SYSCTL_RXRING:
1474		value = buf = vif_sysctl_ring_info(netif, arg2);
1475		break;
1476	default:
1477		return (EINVAL);
1478	}
1479
1480	err = SYSCTL_OUT(req, value, strlen(value));
1481	if (buf != NULL)
1482		free(buf, M_DEVBUF);
1483
1484	return err;
1485}
1486
1487/* Newbus vif device driver probe */
1488static int
1489vif_probe(device_t dev)
1490{
1491	DDPRINTF("vif%d\n", device_get_unit(dev));
1492	return 0;
1493}
1494
1495/* Newbus vif device driver attach */
1496static int
1497vif_attach(device_t dev)
1498{
1499	netif_t *netif = (netif_t *)device_get_ivars(dev);
1500	uint8_t mac[ETHER_ADDR_LEN];
1501
1502	DDPRINTF("%s\n", IFNAME(netif));
1503
1504	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1505	    OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD,
1506	    dev, VIF_SYSCTL_DOMID, vif_sysctl_handler, "I",
1507	    "domid of frontend");
1508	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1509	    OID_AUTO, "handle", CTLTYPE_INT|CTLFLAG_RD,
1510	    dev, VIF_SYSCTL_HANDLE, vif_sysctl_handler, "I",
1511	    "handle of frontend");
1512#ifdef XEN_NETBACK_DEBUG
1513	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1514	    OID_AUTO, "txring", CTLTYPE_STRING | CTLFLAG_RD,
1515	    dev, VIF_SYSCTL_TXRING, vif_sysctl_handler, "A",
1516	    "tx ring info");
1517	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1518	    OID_AUTO, "rxring", CTLTYPE_STRING | CTLFLAG_RD,
1519	    dev, VIF_SYSCTL_RXRING, vif_sysctl_handler, "A",
1520	    "rx ring info");
1521#endif
1522
1523	memset(mac, 0xff, sizeof(mac));
1524	mac[0] &= ~0x01;
1525
1526	ether_ifattach(netif->ifp, mac);
1527	netif->attached = 1;
1528
1529	connect(netif);
1530
1531	if (netif->bridge) {
1532		DPRINTF("Adding %s to bridge %s\n", IFNAME(netif), netif->bridge);
1533		int err = add_to_bridge(netif->ifp, netif->bridge);
1534		if (err) {
1535			WPRINTF("Error adding %s to %s; err=%d\n",
1536				IFNAME(netif), netif->bridge, err);
1537		}
1538	}
1539
1540	return bus_generic_attach(dev);
1541}
1542
1543/* Newbus vif device driver detach */
1544static int
1545vif_detach(device_t dev)
1546{
1547	netif_t *netif = (netif_t *)device_get_ivars(dev);
1548	struct ifnet *ifp = netif->ifp;
1549
1550	DDPRINTF("%s\n", IFNAME(netif));
1551
1552	/* Tell the stack that the interface is no longer active */
1553	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1554
1555	ether_ifdetach(ifp);
1556
1557	bus_generic_detach(dev);
1558
1559	netif->attached = 0;
1560
1561	netif_put(netif);
1562
1563	return 0;
1564}
1565
1566static device_method_t vif_methods[] = {
1567	/* Device interface */
1568	DEVMETHOD(device_probe,		vif_probe),
1569	DEVMETHOD(device_attach, 	vif_attach),
1570	DEVMETHOD(device_detach,	vif_detach),
1571	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
1572	DEVMETHOD(device_suspend,	bus_generic_suspend),
1573	DEVMETHOD(device_resume,	bus_generic_resume),
1574	{0, 0}
1575};
1576
1577static devclass_t vif_devclass;
1578
1579static driver_t vif_driver = {
1580	"vif",
1581	vif_methods,
1582	0,
1583};
1584
1585DRIVER_MODULE(vif, nexus, vif_driver, vif_devclass, 0, 0);
1586
1587
1588/*
1589 * Local variables:
1590 * mode: C
1591 * c-set-style: "BSD"
1592 * c-basic-offset: 4
1593 * tab-width: 4
1594 * indent-tabs-mode: t
1595 * End:
1596 */
1597