1/*	$NetBSD: vnet.c,v 1.10 2023/12/14 20:17:18 andvar Exp $	*/
2/*	$OpenBSD: vnet.c,v 1.62 2020/07/10 13:26:36 patrick Exp $	*/
3/*
4 * Copyright (c) 2009, 2015 Mark Kettenis
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/kmem.h>
20#include <sys/param.h>
21#include <sys/atomic.h>
22#include <sys/callout.h>
23#include <sys/device.h>
24#include <sys/malloc.h>
25#include <sys/pool.h>
26#include <sys/mbuf.h>
27#include <sys/socket.h>
28#include <sys/sockio.h>
29#include <sys/systm.h>
30
31#include <machine/autoconf.h>
32#include <machine/hypervisor.h>
33#include <machine/openfirm.h>
34
35#include <net/if.h>
36#include <net/if_media.h>
37
38#include <netinet/in.h>
39#include <net/if_ether.h>
40
41#if NBPFILTER > 0
42#include <net/bpf.h>
43#endif
44
45#include <uvm/uvm_extern.h>
46
47#include <sparc64/dev/cbusvar.h>
48#include <sparc64/dev/ldcvar.h>
49#include <sparc64/dev/viovar.h>
50
51#ifdef VNET_DEBUG
52#define DPRINTF(x)	printf x
53#else
54#define DPRINTF(x)
55#endif
56
57#define VNET_TX_ENTRIES		32
58#define VNET_RX_ENTRIES		32
59
60struct vnet_attr_info {
61	struct vio_msg_tag	tag;
62	uint8_t			xfer_mode;
63	uint8_t			addr_type;
64	uint16_t		ack_freq;
65	uint32_t		_reserved1;
66	uint64_t		addr;
67	uint64_t		mtu;
68	uint64_t		_reserved2[3];
69};
70
71/* Address types. */
72#define VNET_ADDR_ETHERMAC	0x01
73
74/* Sub-Type envelopes. */
75#define VNET_MCAST_INFO		0x0101
76
77#define VNET_NUM_MCAST		7
78
79struct vnet_mcast_info {
80	struct vio_msg_tag	tag;
81	uint8_t			set;
82	uint8_t			count;
83	uint8_t			mcast_addr[VNET_NUM_MCAST][ETHER_ADDR_LEN];
84	uint32_t		_reserved;
85};
86
87struct vnet_desc {
88	struct vio_dring_hdr	hdr;
89	uint32_t		nbytes;
90	uint32_t		ncookies;
91	struct ldc_cookie	cookie[2];
92};
93
94struct vnet_desc_msg {
95	struct vio_msg_tag	tag;
96	uint64_t		seq_no;
97	uint64_t		desc_handle;
98	uint32_t		nbytes;
99	uint32_t		ncookies;
100	struct ldc_cookie	cookie[1];
101};
102
103struct vnet_dring {
104	bus_dmamap_t		vd_map;
105	bus_dma_segment_t	vd_seg;
106	struct vnet_desc	*vd_desc;
107	int			vd_nentries;
108};
109
110struct vnet_dring *vnet_dring_alloc(bus_dma_tag_t, int);
111void	vnet_dring_free(bus_dma_tag_t, struct vnet_dring *);
112
113/*
114 * For now, we only support vNet 1.0.
115 */
116#define VNET_MAJOR	1
117#define VNET_MINOR	0
118
119/*
120 * The vNet protocol wants the IP header to be 64-bit aligned, so
121 * define out own variant of ETHER_ALIGN.
122 */
123#define VNET_ETHER_ALIGN	6
124
125struct vnet_soft_desc {
126	int		vsd_map_idx;
127	unsigned char *vsd_buf;
128};
129
130struct vnet_softc {
131	device_t	sc_dv;
132	bus_space_tag_t	sc_bustag;
133	bus_dma_tag_t	sc_dmatag;
134
135	uint64_t	sc_tx_ino;
136	uint64_t	sc_rx_ino;
137	void		*sc_tx_ih;
138	void		*sc_rx_ih;
139
140	struct ldc_conn	sc_lc;
141
142	uint16_t	sc_vio_state;
143#define VIO_SND_VER_INFO	0x0001
144#define VIO_ACK_VER_INFO	0x0002
145#define VIO_RCV_VER_INFO	0x0004
146#define VIO_SND_ATTR_INFO	0x0008
147#define VIO_ACK_ATTR_INFO	0x0010
148#define VIO_RCV_ATTR_INFO	0x0020
149#define VIO_SND_DRING_REG	0x0040
150#define VIO_ACK_DRING_REG	0x0080
151#define VIO_RCV_DRING_REG	0x0100
152#define VIO_SND_RDX		0x0200
153#define VIO_ACK_RDX		0x0400
154#define VIO_RCV_RDX		0x0800
155
156	struct callout	sc_handshake_co;
157
158	uint8_t		sc_xfer_mode;
159
160	uint32_t	sc_local_sid;
161	uint64_t	sc_dring_ident;
162	uint64_t	sc_seq_no;
163
164	u_int		sc_tx_prod;
165	u_int		sc_tx_cons;
166
167	u_int		sc_peer_state;
168
169	struct ldc_map	*sc_lm;
170	struct vnet_dring *sc_vd;
171	struct vnet_soft_desc *sc_vsd;
172#define VNET_NUM_SOFT_DESC	128
173
174	size_t		sc_peer_desc_size;
175	struct ldc_cookie sc_peer_dring_cookie;
176	int		sc_peer_dring_nentries;
177
178	struct pool	sc_pool;
179
180	struct ethercom	sc_ethercom;
181	struct ifmedia	sc_media;
182	u_int8_t sc_macaddr[ETHER_ADDR_LEN];
183};
184
185int vnet_match (device_t, cfdata_t, void *);
186void vnet_attach (device_t, device_t, void *);
187
188CFATTACH_DECL_NEW(vnet, sizeof(struct vnet_softc),
189    vnet_match, vnet_attach, NULL, NULL);
190
191int	vnet_tx_intr(void *);
192int	vnet_rx_intr(void *);
193void	vnet_handshake(void *);
194
195void	vio_rx_data(struct ldc_conn *, struct ldc_pkt *);
196void	vnet_rx_vio_ctrl(struct vnet_softc *, struct vio_msg *);
197void	vnet_rx_vio_ver_info(struct vnet_softc *, struct vio_msg_tag *);
198void	vnet_rx_vio_attr_info(struct vnet_softc *, struct vio_msg_tag *);
199void	vnet_rx_vio_dring_reg(struct vnet_softc *, struct vio_msg_tag *);
200void	vnet_rx_vio_rdx(struct vnet_softc *sc, struct vio_msg_tag *);
201void	vnet_rx_vio_mcast_info(struct vnet_softc *sc, struct vio_msg_tag *);
202void	vnet_rx_vio_data(struct vnet_softc *sc, struct vio_msg *);
203void	vnet_rx_vio_desc_data(struct vnet_softc *sc, struct vio_msg_tag *);
204void	vnet_rx_vio_dring_data(struct vnet_softc *sc, struct vio_msg_tag *);
205
206void	vnet_ldc_reset(struct ldc_conn *);
207void	vnet_ldc_start(struct ldc_conn *);
208
209void	vnet_sendmsg(struct vnet_softc *, void *, size_t);
210void	vnet_send_ver_info(struct vnet_softc *, uint16_t, uint16_t);
211void	vnet_send_attr_info(struct vnet_softc *);
212void	vnet_send_dring_reg(struct vnet_softc *);
213void	vio_send_rdx(struct vnet_softc *);
214void	vnet_send_dring_data(struct vnet_softc *, uint32_t);
215
216void	vnet_start(struct ifnet *);
217void	vnet_start_desc(struct ifnet *);
218int		vnet_ioctl(struct ifnet *, u_long, void *);
219void	vnet_watchdog(struct ifnet *);
220
221int		vnet_media_change(struct ifnet *);
222void	vnet_media_status(struct ifnet *, struct ifmediareq *);
223
224void	vnet_link_state(struct vnet_softc *sc);
225
226void	vnet_setmulti(struct vnet_softc *, int);
227
228int		vnet_init(struct ifnet *);
229void	vnet_stop(struct ifnet *, int);
230
231int vnet_match(device_t parent, cfdata_t match, void *aux)
232{
233
234	struct cbus_attach_args *ca = aux;
235
236	if (strcmp(ca->ca_name, "network") == 0)
237		return (1);
238
239	return (0);
240}
241
242void
243vnet_attach(struct device *parent, struct device *self, void *aux)
244{
245	struct vnet_softc *sc = device_private(self);
246	struct cbus_attach_args *ca = aux;
247	struct ldc_conn *lc;
248	struct ifnet *ifp;
249
250	sc->sc_dv = self;
251	sc->sc_bustag = ca->ca_bustag;
252	sc->sc_dmatag = ca->ca_dmatag;
253	sc->sc_tx_ino = ca->ca_tx_ino;
254	sc->sc_rx_ino = ca->ca_rx_ino;
255
256	printf(": ivec 0x%" PRIx64 ", 0x%" PRIx64, sc->sc_tx_ino, sc->sc_rx_ino);
257
258	/*
259	 * Un-configure queues before registering interrupt handlers,
260	 * such that we dont get any stale LDC packets or events.
261	 */
262	hv_ldc_tx_qconf(ca->ca_id, 0, 0);
263	hv_ldc_rx_qconf(ca->ca_id, 0, 0);
264
265	sc->sc_tx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_tx_ino,
266	    IPL_NET, vnet_tx_intr, sc);
267	sc->sc_rx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_rx_ino,
268	    IPL_NET, vnet_rx_intr, sc);
269	if (sc->sc_tx_ih == NULL || sc->sc_rx_ih == NULL) {
270		printf(", can't establish interrupts\n");
271		return;
272	}
273
274	lc = &sc->sc_lc;
275	lc->lc_id = ca->ca_id;
276	lc->lc_sc = sc;
277	lc->lc_reset = vnet_ldc_reset;
278	lc->lc_start = vnet_ldc_start;
279	lc->lc_rx_data = vio_rx_data;
280
281	callout_init(&sc->sc_handshake_co, 0);
282
283	sc->sc_peer_state = VIO_DP_STOPPED;
284
285	lc->lc_txq = ldc_queue_alloc(VNET_TX_ENTRIES);
286	if (lc->lc_txq == NULL) {
287		printf(", can't allocate tx queue\n");
288		return;
289	}
290
291	lc->lc_rxq = ldc_queue_alloc(VNET_RX_ENTRIES);
292	if (lc->lc_rxq == NULL) {
293		printf(", can't allocate rx queue\n");
294		goto free_txqueue;
295	}
296
297	if (OF_getprop(ca->ca_node, "local-mac-address",
298				   sc->sc_macaddr, ETHER_ADDR_LEN) > 0) {
299		printf(", address %s", ether_sprintf(sc->sc_macaddr));
300	} else {
301		printf(", cannot retrieve local mac address\n");
302		return;
303	}
304
305	/*
306	 * Each interface gets its own pool.
307	 */
308	pool_init(&sc->sc_pool, /*size*/2048, /*align*/0, /*align_offset*/0,
309	    /*flags*/0, /*wchan*/device_xname(sc->sc_dv), /*palloc*/NULL,
310	    IPL_NET);
311
312	ifp = &sc->sc_ethercom.ec_if;
313	ifp->if_softc = sc;
314	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
315	ifp->if_init = vnet_init;
316	ifp->if_ioctl = vnet_ioctl;
317	ifp->if_start = vnet_start;
318	ifp->if_stop = vnet_stop;
319	ifp->if_watchdog = vnet_watchdog;
320	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
321	IFQ_SET_MAXLEN(&ifp->if_snd, 31); /* XXX */
322
323	ifmedia_init(&sc->sc_media, 0, vnet_media_change, vnet_media_status);
324	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
325	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
326
327	if_attach(ifp);
328	ether_ifattach(ifp, sc->sc_macaddr);
329
330	printf("\n");
331	return;
332free_txqueue:
333	ldc_queue_free(lc->lc_txq);
334}
335
336int
337vnet_tx_intr(void *arg)
338{
339	struct vnet_softc *sc = arg;
340	struct ldc_conn *lc = &sc->sc_lc;
341	uint64_t tx_head, tx_tail, tx_state;
342
343	hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
344	if (tx_state != lc->lc_tx_state) {
345		switch (tx_state) {
346		case LDC_CHANNEL_DOWN:
347			DPRINTF(("%s: Tx link down\n", __func__));
348			break;
349		case LDC_CHANNEL_UP:
350			DPRINTF(("%s: Tx link up\n", __func__));
351			break;
352		case LDC_CHANNEL_RESET:
353			DPRINTF(("%s: Tx link reset\n", __func__));
354			break;
355		}
356		lc->lc_tx_state = tx_state;
357	}
358
359	return (1);
360}
361
362int
363vnet_rx_intr(void *arg)
364{
365	struct vnet_softc *sc = arg;
366	struct ldc_conn *lc = &sc->sc_lc;
367	uint64_t rx_head, rx_tail, rx_state;
368	struct ldc_pkt *lp;
369	int err;
370
371	err = hv_ldc_rx_get_state(lc->lc_id, &rx_head, &rx_tail, &rx_state);
372	if (err == H_EINVAL) {
373		printf("hv_ldc_rx_get_state failed\n");
374		return (0);
375	}
376	if (err != H_EOK) {
377		printf("hv_ldc_rx_get_state %d\n", err);
378		return (0);
379	}
380
381	if (rx_state != lc->lc_rx_state) {
382		switch (rx_state) {
383		case LDC_CHANNEL_DOWN:
384			lc->lc_tx_seqid = 0;
385			lc->lc_state = 0;
386			lc->lc_reset(lc);
387			if (rx_head == rx_tail)
388				break;
389			/* Discard and ack pending I/O. */
390			DPRINTF(("setting rx qhead to %" PRId64 "\n", rx_tail));
391			err = hv_ldc_rx_set_qhead(lc->lc_id, rx_tail);
392			if (err == H_EOK)
393				break;
394			printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
395			break;
396		case LDC_CHANNEL_UP:
397			callout_reset(&sc->sc_handshake_co, hz / 2, vnet_handshake, sc);
398			break;
399		case LDC_CHANNEL_RESET:
400			DPRINTF(("%s: Rx link reset\n", __func__));
401			lc->lc_tx_seqid = 0;
402			lc->lc_state = 0;
403			lc->lc_reset(lc);
404			callout_reset(&sc->sc_handshake_co, hz / 2, vnet_handshake, sc);
405			if (rx_head == rx_tail) {
406				break;
407			}
408			/* Discard and ack pending I/O. */
409			DPRINTF(("setting rx qhead to %" PRId64 "\n", rx_tail));
410			err = hv_ldc_rx_set_qhead(lc->lc_id, rx_tail);
411			if (err == H_EOK)
412				break;
413			printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
414			break;
415		default:
416			DPRINTF(("%s: unhandled rx_state %" PRIx64 "\n", __func__, rx_state));
417			break;
418		}
419		lc->lc_rx_state = rx_state;
420		return (1);
421	} else {
422	}
423
424	if (rx_head == rx_tail)
425	{
426		DPRINTF(("%s: head eq tail\n", __func__));
427		return (0);
428	}
429	lp = (struct ldc_pkt *)(uintptr_t)(lc->lc_rxq->lq_va + rx_head);
430	switch (lp->type) {
431	case LDC_CTRL:
432		DPRINTF(("%s: LDC_CTRL\n", __func__));
433		ldc_rx_ctrl(lc, lp);
434		break;
435
436	case LDC_DATA:
437		DPRINTF(("%s: LDC_DATA\n", __func__));
438		ldc_rx_data(lc, lp);
439		break;
440
441	default:
442		DPRINTF(("%s: unhandled type %0x02/%0x02/%0x02\n",
443				 __func__, lp->type, lp->stype, lp->ctrl));
444		console_debugger();
445		ldc_reset(lc);
446		break;
447	}
448
449	if (lc->lc_state == 0)
450		return (1);
451
452	rx_head += sizeof(*lp);
453	rx_head &= ((lc->lc_rxq->lq_nentries * sizeof(*lp)) - 1);
454	err = hv_ldc_rx_set_qhead(lc->lc_id, rx_head);
455	if (err != H_EOK)
456		printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
457	return (1);
458}
459
460void
461vnet_handshake(void *arg)
462{
463	struct vnet_softc *sc = arg;
464
465	ldc_send_vers(&sc->sc_lc);
466}
467
468void
469vio_rx_data(struct ldc_conn *lc, struct ldc_pkt *lp)
470{
471	struct vio_msg *vm = (struct vio_msg *)lp;
472
473	switch (vm->type) {
474	case VIO_TYPE_CTRL:
475		if ((lp->env & LDC_FRAG_START) == 0 &&
476		    (lp->env & LDC_FRAG_STOP) == 0) {
477			DPRINTF(("%s: FRAG_START==0 and FRAG_STOP==0\n", __func__));
478			return;
479		}
480		vnet_rx_vio_ctrl(lc->lc_sc, vm);
481		break;
482
483	case VIO_TYPE_DATA:
484		if((lp->env & LDC_FRAG_START) == 0) {
485			DPRINTF(("%s: FRAG_START==0\n", __func__));
486			return;
487		}
488		vnet_rx_vio_data(lc->lc_sc, vm);
489		break;
490
491	default:
492		DPRINTF(("Unhandled packet type 0x%02x\n", vm->type));
493		ldc_reset(lc);
494		break;
495	}
496}
497
498void
499vnet_rx_vio_ctrl(struct vnet_softc *sc, struct vio_msg *vm)
500{
501	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
502
503	switch (tag->stype_env) {
504	case VIO_VER_INFO:
505		vnet_rx_vio_ver_info(sc, tag);
506		break;
507	case VIO_ATTR_INFO:
508		vnet_rx_vio_attr_info(sc, tag);
509		break;
510	case VIO_DRING_REG:
511		vnet_rx_vio_dring_reg(sc, tag);
512		break;
513	case VIO_RDX:
514		vnet_rx_vio_rdx(sc, tag);
515		break;
516	case VNET_MCAST_INFO:
517		vnet_rx_vio_mcast_info(sc, tag);
518		break;
519	default:
520		printf("%s: CTRL/0x%02x/0x%04x FIXME\n",
521				 __func__, tag->stype, tag->stype_env);
522		break;
523	}
524}
525
526void
527vnet_rx_vio_ver_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
528{
529	struct vio_ver_info *vi = (struct vio_ver_info *)tag;
530
531	switch (vi->tag.stype) {
532	case VIO_SUBTYPE_INFO:
533		DPRINTF(("CTRL/INFO/VER_INFO\n"));
534
535		/* Make sure we're talking to a virtual network device. */
536		if (vi->dev_class != VDEV_NETWORK &&
537		    vi->dev_class != VDEV_NETWORK_SWITCH) {
538			DPRINTF(("Class is not network or network switch\n"));
539			/* Huh, we're not talking to a network device? */
540			printf("Not a network device\n");
541			vi->tag.stype = VIO_SUBTYPE_NACK;
542			vnet_sendmsg(sc, vi, sizeof(*vi));
543			return;
544		}
545
546		if (vi->major != VNET_MAJOR) {
547			DPRINTF(("Major mismatch %" PRId8 " vs %" PRId8 "\n",
548					 vi->major, VNET_MAJOR));
549			vi->tag.stype = VIO_SUBTYPE_NACK;
550			vi->major = VNET_MAJOR;
551			vi->minor = VNET_MINOR;
552			vnet_sendmsg(sc, vi, sizeof(*vi));
553			return;
554		}
555
556		vi->tag.stype = VIO_SUBTYPE_ACK;
557		vi->tag.sid = sc->sc_local_sid;
558		vi->minor = VNET_MINOR;
559		vnet_sendmsg(sc, vi, sizeof(*vi));
560		sc->sc_vio_state |= VIO_RCV_VER_INFO;
561		break;
562
563	case VIO_SUBTYPE_ACK:
564		DPRINTF(("CTRL/ACK/VER_INFO\n"));
565		if (!ISSET(sc->sc_vio_state, VIO_SND_VER_INFO)) {
566			ldc_reset(&sc->sc_lc);
567			break;
568		}
569		sc->sc_vio_state |= VIO_ACK_VER_INFO;
570		break;
571
572	default:
573		DPRINTF(("CTRL/0x%02x/VER_INFO\n", vi->tag.stype));
574		break;
575	}
576
577	if (ISSET(sc->sc_vio_state, VIO_RCV_VER_INFO) &&
578	    ISSET(sc->sc_vio_state, VIO_ACK_VER_INFO))
579		vnet_send_attr_info(sc);
580}
581
582void
583vnet_rx_vio_attr_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
584{
585	struct vnet_attr_info *ai = (struct vnet_attr_info *)tag;
586
587	switch (ai->tag.stype) {
588	case VIO_SUBTYPE_INFO:
589		DPRINTF(("CTRL/INFO/ATTR_INFO\n"));
590		sc->sc_xfer_mode = ai->xfer_mode;
591		ai->tag.stype = VIO_SUBTYPE_ACK;
592		ai->tag.sid = sc->sc_local_sid;
593		vnet_sendmsg(sc, ai, sizeof(*ai));
594		sc->sc_vio_state |= VIO_RCV_ATTR_INFO;
595		break;
596
597	case VIO_SUBTYPE_ACK:
598		DPRINTF(("CTRL/ACK/ATTR_INFO\n"));
599		if (!ISSET(sc->sc_vio_state, VIO_SND_ATTR_INFO)) {
600			ldc_reset(&sc->sc_lc);
601			break;
602		}
603		sc->sc_vio_state |= VIO_ACK_ATTR_INFO;
604		break;
605
606	default:
607		DPRINTF(("CTRL/0x%02x/ATTR_INFO\n", ai->tag.stype));
608		break;
609	}
610
611	if (ISSET(sc->sc_vio_state, VIO_RCV_ATTR_INFO) &&
612	    ISSET(sc->sc_vio_state, VIO_ACK_ATTR_INFO)) {
613		if (sc->sc_xfer_mode == VIO_DRING_MODE)
614			vnet_send_dring_reg(sc);
615		else
616			vio_send_rdx(sc);
617	}
618}
619
620void
621vnet_rx_vio_dring_reg(struct vnet_softc *sc, struct vio_msg_tag *tag)
622{
623	struct vio_dring_reg *dr = (struct vio_dring_reg *)tag;
624
625	switch (dr->tag.stype) {
626	case VIO_SUBTYPE_INFO:
627		DPRINTF(("CTRL/INFO/DRING_REG\n"));
628		sc->sc_peer_dring_nentries = dr->num_descriptors;
629		sc->sc_peer_desc_size = dr->descriptor_size;
630		sc->sc_peer_dring_cookie = dr->cookie[0];
631
632		dr->tag.stype = VIO_SUBTYPE_ACK;
633		dr->tag.sid = sc->sc_local_sid;
634		vnet_sendmsg(sc, dr, sizeof(*dr));
635		sc->sc_vio_state |= VIO_RCV_DRING_REG;
636		break;
637
638	case VIO_SUBTYPE_ACK:
639		DPRINTF(("CTRL/ACK/DRING_REG\n"));
640		if (!ISSET(sc->sc_vio_state, VIO_SND_DRING_REG)) {
641			ldc_reset(&sc->sc_lc);
642			break;
643		}
644
645		sc->sc_dring_ident = dr->dring_ident;
646		sc->sc_seq_no = 1;
647
648		sc->sc_vio_state |= VIO_ACK_DRING_REG;
649		break;
650
651	default:
652		DPRINTF(("CTRL/0x%02x/DRING_REG\n", dr->tag.stype));
653		break;
654	}
655
656	if (ISSET(sc->sc_vio_state, VIO_RCV_DRING_REG) &&
657	    ISSET(sc->sc_vio_state, VIO_ACK_DRING_REG))
658		vio_send_rdx(sc);
659}
660
661void
662vnet_rx_vio_rdx(struct vnet_softc *sc, struct vio_msg_tag *tag)
663{
664	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
665
666	switch(tag->stype) {
667	case VIO_SUBTYPE_INFO:
668		DPRINTF(("CTRL/INFO/RDX\n"));
669		tag->stype = VIO_SUBTYPE_ACK;
670		tag->sid = sc->sc_local_sid;
671		vnet_sendmsg(sc, tag, sizeof(*tag));
672		sc->sc_vio_state |= VIO_RCV_RDX;
673		break;
674
675	case VIO_SUBTYPE_ACK:
676		DPRINTF(("CTRL/ACK/RDX\n"));
677		if (!ISSET(sc->sc_vio_state, VIO_SND_RDX)) {
678			ldc_reset(&sc->sc_lc);
679			break;
680		}
681		sc->sc_vio_state |= VIO_ACK_RDX;
682		break;
683
684	default:
685		DPRINTF(("CTRL/0x%02x/RDX (VIO)\n", tag->stype));
686		break;
687	}
688
689	if (ISSET(sc->sc_vio_state, VIO_RCV_RDX) &&
690	    ISSET(sc->sc_vio_state, VIO_ACK_RDX)) {
691		/* Link is up! */
692		vnet_link_state(sc);
693
694		/* Configure multicast now that we can. */
695		vnet_setmulti(sc, 1);
696
697		KERNEL_LOCK(1, curlwp);
698		vnet_start(ifp);
699		KERNEL_UNLOCK_ONE(curlwp);
700	}
701}
702
703void
704vnet_rx_vio_mcast_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
705{
706	switch(tag->stype) {
707
708		case VIO_SUBTYPE_INFO:
709			DPRINTF(("CTRL/INFO/MCAST_INFO\n"));
710			break;
711
712		case VIO_SUBTYPE_ACK:
713			DPRINTF(("CTRL/ACK/MCAST_INFO\n"));
714			break;
715
716		case VIO_SUBTYPE_NACK:
717			DPRINTF(("CTRL/NACK/MCAST_INFO\n"));
718			break;
719
720		default:
721			printf("%s: CTRL/0x%02x/0x%04x\n",
722				   __func__, tag->stype, tag->stype_env);
723			break;
724	}
725}
726
727void
728vnet_rx_vio_data(struct vnet_softc *sc, struct vio_msg *vm)
729{
730	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
731
732	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
733	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX)) {
734		DPRINTF(("Spurious DATA/0x%02x/0x%04x\n", tag->stype,
735		    tag->stype_env));
736		return;
737	}
738
739	switch(tag->stype_env) {
740	case VIO_DESC_DATA:
741		vnet_rx_vio_desc_data(sc, tag);
742		break;
743
744	case VIO_DRING_DATA:
745		vnet_rx_vio_dring_data(sc, tag);
746		break;
747
748	default:
749		DPRINTF(("DATA/0x%02x/0x%04x\n", tag->stype, tag->stype_env));
750		break;
751	}
752}
753
754void
755vnet_rx_vio_desc_data(struct vnet_softc *sc, struct vio_msg_tag *tag)
756{
757
758	struct vnet_desc_msg *dm = (struct vnet_desc_msg *)tag;
759	struct ldc_conn *lc = &sc->sc_lc;
760	struct ldc_map *map = sc->sc_lm;
761	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
762	struct mbuf *m;
763	unsigned char *buf;
764	paddr_t pa;
765	psize_t nbytes;
766	u_int cons;
767	int err;
768
769	switch(tag->stype) {
770	case VIO_SUBTYPE_INFO:
771		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
772		if (buf == NULL) {
773			if_statinc(ifp, if_ierrors);
774			goto skip;
775		}
776		nbytes = roundup(dm->nbytes, 8);
777
778		if (dm->nbytes > (ETHER_MAX_LEN - ETHER_CRC_LEN)) {
779			if_statinc(ifp, if_ierrors);
780			goto skip;
781		}
782
783		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
784		err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
785		    dm->cookie[0].addr, pa, nbytes, &nbytes);
786		if (err != H_EOK) {
787			pool_put(&sc->sc_pool, buf);
788			if_statinc(ifp, if_ierrors);
789			goto skip;
790		}
791
792		/* Stupid OBP doesn't align properly. */
793		m = m_devget(buf, dm->nbytes, 0, ifp);
794		pool_put(&sc->sc_pool, buf);
795		if (m == NULL) {
796			if_statinc(ifp, if_ierrors);
797			goto skip;
798		}
799
800		/* Pass it on. */
801		if_percpuq_enqueue(ifp->if_percpuq, m);
802	skip:
803		dm->tag.stype = VIO_SUBTYPE_ACK;
804		dm->tag.sid = sc->sc_local_sid;
805		vnet_sendmsg(sc, dm, sizeof(*dm));
806		break;
807
808	case VIO_SUBTYPE_ACK:
809		DPRINTF(("DATA/ACK/DESC_DATA\n"));
810
811		if (dm->desc_handle != sc->sc_tx_cons) {
812			printf("out of order\n");
813			return;
814		}
815
816		cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
817
818		map->lm_slot[sc->sc_vsd[cons].vsd_map_idx].entry = 0;
819		atomic_dec_32(&map->lm_count);
820
821		pool_put(&sc->sc_pool, sc->sc_vsd[cons].vsd_buf);
822		sc->sc_vsd[cons].vsd_buf = NULL;
823
824		sc->sc_tx_cons++;
825		break;
826
827	case VIO_SUBTYPE_NACK:
828		DPRINTF(("DATA/NACK/DESC_DATA\n"));
829		break;
830
831	default:
832		DPRINTF(("DATA/0x%02x/DESC_DATA\n", tag->stype));
833		break;
834	}
835}
836
837void
838vnet_rx_vio_dring_data(struct vnet_softc *sc, struct vio_msg_tag *tag)
839{
840	struct vio_dring_msg *dm = (struct vio_dring_msg *)tag;
841	struct ldc_conn *lc = &sc->sc_lc;
842	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
843	struct mbuf *m = NULL;
844	paddr_t pa;
845	psize_t nbytes;
846	int err;
847
848	switch(tag->stype) {
849	case VIO_SUBTYPE_INFO:
850	{
851		DPRINTF(("%s: VIO_SUBTYPE_INFO\n", __func__));
852		struct vnet_desc desc;
853		uint64_t cookie;
854		paddr_t desc_pa;
855		int idx, ack_end_idx = -1;
856
857		idx = dm->start_idx;
858		for (;;) {
859			cookie = sc->sc_peer_dring_cookie.addr;
860			cookie += idx * sc->sc_peer_desc_size;
861			nbytes = sc->sc_peer_desc_size;
862			pmap_extract(pmap_kernel(), (vaddr_t)&desc, &desc_pa);
863			err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN, cookie,
864			    desc_pa, nbytes, &nbytes);
865			if (err != H_EOK) {
866				printf("hv_ldc_copy_in %d\n", err);
867				break;
868			}
869
870			if (desc.hdr.dstate != VIO_DESC_READY)
871				break;
872
873			if (desc.nbytes > (ETHER_MAX_LEN - ETHER_CRC_LEN)) {
874				if_statinc(ifp, if_ierrors);
875				goto skip;
876			}
877
878			MGETHDR(m, M_DONTWAIT, MT_DATA);
879			if (m == NULL) {
880				DPRINTF(("%s: MGETHDR failed\n", __func__));
881				if_statinc(ifp, if_ierrors);
882				goto skip;
883			}
884			MCLGET(m, M_DONTWAIT);
885			if ((m->m_flags & M_EXT) == 0)
886				break;
887			m->m_len = m->m_pkthdr.len = desc.nbytes;
888			nbytes = roundup(desc.nbytes + VNET_ETHER_ALIGN, 8);
889
890			pmap_extract(pmap_kernel(), (vaddr_t)m->m_data, &pa);
891			err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
892			    desc.cookie[0].addr, pa, nbytes, &nbytes);
893			if (err != H_EOK) {
894				m_freem(m);
895				goto skip;
896			}
897			m->m_data += VNET_ETHER_ALIGN;
898			m_set_rcvif(m, ifp);
899
900			if_percpuq_enqueue(ifp->if_percpuq, m);
901
902		skip:
903			desc.hdr.dstate = VIO_DESC_DONE;
904			nbytes = sc->sc_peer_desc_size;
905			err = hv_ldc_copy(lc->lc_id, LDC_COPY_OUT, cookie,
906			    desc_pa, nbytes, &nbytes);
907			if (err != H_EOK)
908				printf("hv_ldc_copy_out %d\n", err);
909
910			ack_end_idx = idx;
911			if (++idx == sc->sc_peer_dring_nentries)
912				idx = 0;
913		}
914
915		if (ack_end_idx == -1) {
916			dm->tag.stype = VIO_SUBTYPE_NACK;
917		} else {
918			dm->tag.stype = VIO_SUBTYPE_ACK;
919			dm->end_idx = ack_end_idx;
920		}
921		dm->tag.sid = sc->sc_local_sid;
922		dm->proc_state = VIO_DP_STOPPED;
923		vnet_sendmsg(sc, dm, sizeof(*dm));
924		break;
925	}
926
927	case VIO_SUBTYPE_ACK:
928	{
929		DPRINTF(("%s: VIO_SUBTYPE_ACK\n", __func__));
930		struct ldc_map *map = sc->sc_lm;
931		u_int cons, count;
932
933		sc->sc_peer_state = dm->proc_state;
934
935		cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
936		while (sc->sc_vd->vd_desc[cons].hdr.dstate == VIO_DESC_DONE) {
937			map->lm_slot[sc->sc_vsd[cons].vsd_map_idx].entry = 0;
938			atomic_dec_32(&map->lm_count);
939
940			pool_put(&sc->sc_pool, sc->sc_vsd[cons].vsd_buf);
941			sc->sc_vsd[cons].vsd_buf = NULL;
942
943			sc->sc_vd->vd_desc[cons].hdr.dstate = VIO_DESC_FREE;
944			sc->sc_tx_cons++;
945			cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
946		}
947
948		count = sc->sc_tx_prod - sc->sc_tx_cons;
949		if (count > 0 && sc->sc_peer_state != VIO_DP_ACTIVE)
950			vnet_send_dring_data(sc, cons);
951
952		KERNEL_LOCK(1, curlwp);
953		if (count == 0)
954			ifp->if_timer = 0;
955
956		vnet_start(ifp);
957		KERNEL_UNLOCK_ONE(curlwp);
958		break;
959	}
960
961	case VIO_SUBTYPE_NACK:
962		DPRINTF(("DATA/NACK/DRING_DATA\n"));
963		sc->sc_peer_state = VIO_DP_STOPPED;
964		break;
965
966	default:
967		DPRINTF(("DATA/0x%02x/DRING_DATA\n", tag->stype));
968		break;
969	}
970}
971
972void
973vnet_ldc_reset(struct ldc_conn *lc)
974{
975
976	struct vnet_softc *sc = lc->lc_sc;
977	int i;
978
979	callout_stop(&sc->sc_handshake_co);
980	sc->sc_tx_prod = sc->sc_tx_cons = 0;
981	sc->sc_peer_state = VIO_DP_STOPPED;
982	sc->sc_vio_state = 0;
983	vnet_link_state(sc);
984
985	sc->sc_lm->lm_next = 1;
986	sc->sc_lm->lm_count = 1;
987	for (i = 1; i < sc->sc_lm->lm_nentries; i++)
988		sc->sc_lm->lm_slot[i].entry = 0;
989
990	for (i = 0; i < sc->sc_vd->vd_nentries; i++) {
991		if (sc->sc_vsd[i].vsd_buf) {
992			pool_put(&sc->sc_pool, sc->sc_vsd[i].vsd_buf);
993			sc->sc_vsd[i].vsd_buf = NULL;
994		}
995		sc->sc_vd->vd_desc[i].hdr.dstate = VIO_DESC_FREE;
996	}
997}
998
999void
1000vnet_ldc_start(struct ldc_conn *lc)
1001{
1002	struct vnet_softc *sc = lc->lc_sc;
1003	callout_stop(&sc->sc_handshake_co);
1004	vnet_send_ver_info(sc, VNET_MAJOR, VNET_MINOR);
1005}
1006
1007void
1008vnet_sendmsg(struct vnet_softc *sc, void *msg, size_t len)
1009{
1010	struct ldc_conn *lc = &sc->sc_lc;
1011	int err;
1012
1013	err = ldc_send_unreliable(lc, msg, len);
1014	if (err)
1015		printf("%s: ldc_send_unreliable: %d\n", __func__, err);
1016}
1017
1018void
1019vnet_send_ver_info(struct vnet_softc *sc, uint16_t major, uint16_t minor)
1020{
1021	struct vio_ver_info vi;
1022
1023	bzero(&vi, sizeof(vi));
1024	vi.tag.type = VIO_TYPE_CTRL;
1025	vi.tag.stype = VIO_SUBTYPE_INFO;
1026	vi.tag.stype_env = VIO_VER_INFO;
1027	vi.tag.sid = sc->sc_local_sid;
1028	vi.major = major;
1029	vi.minor = minor;
1030	vi.dev_class = VDEV_NETWORK;
1031	vnet_sendmsg(sc, &vi, sizeof(vi));
1032
1033	sc->sc_vio_state |= VIO_SND_VER_INFO;
1034}
1035
1036void
1037vnet_send_attr_info(struct vnet_softc *sc)
1038{
1039	struct vnet_attr_info ai;
1040	int i;
1041
1042	bzero(&ai, sizeof(ai));
1043	ai.tag.type = VIO_TYPE_CTRL;
1044	ai.tag.stype = VIO_SUBTYPE_INFO;
1045	ai.tag.stype_env = VIO_ATTR_INFO;
1046	ai.tag.sid = sc->sc_local_sid;
1047	ai.xfer_mode = VIO_DRING_MODE;
1048	ai.addr_type = VNET_ADDR_ETHERMAC;
1049	ai.ack_freq = 0;
1050	ai.addr = 0;
1051	for (i = 0; i < ETHER_ADDR_LEN; i++) {
1052		ai.addr <<= 8;
1053		ai.addr |= sc->sc_macaddr[i];
1054	}
1055	ai.mtu = ETHER_MAX_LEN - ETHER_CRC_LEN;
1056	vnet_sendmsg(sc, &ai, sizeof(ai));
1057
1058	sc->sc_vio_state |= VIO_SND_ATTR_INFO;
1059}
1060
1061void
1062vnet_send_dring_reg(struct vnet_softc *sc)
1063{
1064	struct vio_dring_reg dr;
1065
1066	bzero(&dr, sizeof(dr));
1067	dr.tag.type = VIO_TYPE_CTRL;
1068	dr.tag.stype = VIO_SUBTYPE_INFO;
1069	dr.tag.stype_env = VIO_DRING_REG;
1070	dr.tag.sid = sc->sc_local_sid;
1071	dr.dring_ident = 0;
1072	dr.num_descriptors = sc->sc_vd->vd_nentries;
1073	dr.descriptor_size = sizeof(struct vnet_desc);
1074	dr.options = VIO_TX_RING;
1075	dr.ncookies = 1;
1076	dr.cookie[0].addr = 0;
1077	dr.cookie[0].size = PAGE_SIZE;
1078	vnet_sendmsg(sc, &dr, sizeof(dr));
1079
1080	sc->sc_vio_state |= VIO_SND_DRING_REG;
1081};
1082
1083void
1084vio_send_rdx(struct vnet_softc *sc)
1085{
1086	struct vio_msg_tag tag;
1087
1088	tag.type = VIO_TYPE_CTRL;
1089	tag.stype = VIO_SUBTYPE_INFO;
1090	tag.stype_env = VIO_RDX;
1091	tag.sid = sc->sc_local_sid;
1092	vnet_sendmsg(sc, &tag, sizeof(tag));
1093
1094	sc->sc_vio_state |= VIO_SND_RDX;
1095}
1096
1097void
1098vnet_send_dring_data(struct vnet_softc *sc, uint32_t start_idx)
1099{
1100	struct vio_dring_msg dm;
1101	u_int peer_state;
1102
1103	peer_state = atomic_swap_uint(&sc->sc_peer_state, VIO_DP_ACTIVE);
1104	if (peer_state == VIO_DP_ACTIVE) {
1105		DPRINTF(("%s: peer_state == VIO_DP_ACTIVE\n", __func__));
1106		return;
1107	}
1108
1109	bzero(&dm, sizeof(dm));
1110	dm.tag.type = VIO_TYPE_DATA;
1111	dm.tag.stype = VIO_SUBTYPE_INFO;
1112	dm.tag.stype_env = VIO_DRING_DATA;
1113	dm.tag.sid = sc->sc_local_sid;
1114	dm.seq_no = sc->sc_seq_no++;
1115	dm.dring_ident = sc->sc_dring_ident;
1116	dm.start_idx = start_idx;
1117	dm.end_idx = -1;
1118	vnet_sendmsg(sc, &dm, sizeof(dm));
1119}
1120
1121void
1122vnet_start(struct ifnet *ifp)
1123{
1124	struct vnet_softc *sc = ifp->if_softc;
1125	struct ldc_conn *lc = &sc->sc_lc;
1126	struct ldc_map *map = sc->sc_lm;
1127	struct mbuf *m;
1128	paddr_t pa;
1129	unsigned char *buf;
1130	uint64_t tx_head, tx_tail, tx_state;
1131	u_int start, prod, count;
1132	int err;
1133	if (!(ifp->if_flags & IFF_RUNNING))
1134	{
1135		DPRINTF(("%s: not in RUNNING state\n", __func__));
1136		return;
1137	}
1138
1139	if (IFQ_IS_EMPTY(&ifp->if_snd))
1140	{
1141		DPRINTF(("%s: queue is empty\n", __func__));
1142		return;
1143	} else {
1144		DPRINTF(("%s: queue size %d\n", __func__, ifp->if_snd.ifq_len));
1145	}
1146
1147	/*
1148	 * We cannot transmit packets until a VIO connection has been
1149	 * established.
1150	 */
1151	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
1152	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1153	{
1154		DPRINTF(("%s: vio connection not established yet\n", __func__));
1155		return;
1156	}
1157
1158	/*
1159	 * Make sure there is room in the LDC transmit queue to send a
1160	 * DRING_DATA message.
1161	 */
1162	err = hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
1163	if (err != H_EOK) {
1164		DPRINTF(("%s: no room in ldc transmit queue\n", __func__));
1165		return;
1166	}
1167	tx_tail += sizeof(struct ldc_pkt);
1168	tx_tail &= ((lc->lc_txq->lq_nentries * sizeof(struct ldc_pkt)) - 1);
1169	if (tx_tail == tx_head) {
1170		{
1171			DPRINTF(("%s: tail equals head\n", __func__));
1172			return;
1173		}
1174	}
1175
1176	if (sc->sc_xfer_mode == VIO_DESC_MODE) {
1177		DPRINTF(("%s: vio_desc_mode\n", __func__));
1178		vnet_start_desc(ifp);
1179		return;
1180	}
1181
1182	start = prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1183	while (sc->sc_vd->vd_desc[prod].hdr.dstate == VIO_DESC_FREE) {
1184		count = sc->sc_tx_prod - sc->sc_tx_cons;
1185		if (count >= (sc->sc_vd->vd_nentries - 1) ||
1186		    map->lm_count >= map->lm_nentries) {
1187			DPRINTF(("%s: count issue\n", __func__));
1188			break;
1189		}
1190
1191		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
1192		if (buf == NULL) {
1193			DPRINTF(("%s: buff is NULL\n", __func__));
1194			break;
1195		}
1196		IFQ_DEQUEUE(&ifp->if_snd, m);
1197		if (m == NULL) {
1198			pool_put(&sc->sc_pool, buf);
1199			break;
1200		}
1201
1202		m_copydata(m, 0, m->m_pkthdr.len, buf + VNET_ETHER_ALIGN);
1203
1204#if NBPFILTER > 0
1205		/*
1206		 * If BPF is listening on this interface, let it see the
1207		 * packet before we commit it to the wire.
1208		 */
1209		DPRINTF(("%s: before bpf\n", __func__));
1210		bpf_mtap(ifp, m, BPF_D_OUT);
1211		DPRINTF(("%s: after bpf\n", __func__));
1212#endif
1213
1214		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
1215		KASSERT((pa & ~PAGE_MASK) == (pa & LDC_MTE_RA_MASK));
1216		while (map->lm_slot[map->lm_next].entry != 0) {
1217			map->lm_next++;
1218			map->lm_next &= (map->lm_nentries - 1);
1219		}
1220		map->lm_slot[map->lm_next].entry = (pa & LDC_MTE_RA_MASK);
1221		map->lm_slot[map->lm_next].entry |= LDC_MTE_CPR;
1222		atomic_inc_32(&map->lm_count);
1223		sc->sc_vd->vd_desc[prod].nbytes = MAX(m->m_pkthdr.len, 60);
1224		sc->sc_vd->vd_desc[prod].ncookies = 1;
1225		sc->sc_vd->vd_desc[prod].cookie[0].addr =
1226		    map->lm_next << PAGE_SHIFT | (pa & PAGE_MASK);
1227		sc->sc_vd->vd_desc[prod].cookie[0].size = 2048;
1228		membar_producer();
1229		sc->sc_vd->vd_desc[prod].hdr.dstate = VIO_DESC_READY;
1230
1231		sc->sc_vsd[prod].vsd_map_idx = map->lm_next;
1232		sc->sc_vsd[prod].vsd_buf = buf;
1233
1234		sc->sc_tx_prod++;
1235		prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1236
1237		m_freem(m);
1238	}
1239
1240	membar_producer();
1241
1242	if (start != prod && sc->sc_peer_state != VIO_DP_ACTIVE) {
1243		vnet_send_dring_data(sc, start);
1244		ifp->if_timer = 5;
1245	}
1246
1247}
1248
1249void
1250vnet_start_desc(struct ifnet *ifp)
1251{
1252	struct vnet_softc *sc = ifp->if_softc;
1253	struct ldc_map *map = sc->sc_lm;
1254	struct vnet_desc_msg dm;
1255	struct mbuf *m;
1256	paddr_t pa;
1257	unsigned char *buf;
1258	u_int prod, count;
1259
1260	for (;;) {
1261		count = sc->sc_tx_prod - sc->sc_tx_cons;
1262		if (count >= (sc->sc_vd->vd_nentries - 1) ||
1263		    map->lm_count >= map->lm_nentries) {
1264			return;
1265		}
1266
1267		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
1268		if (buf == NULL) {
1269			return;
1270		}
1271
1272		IFQ_DEQUEUE(&ifp->if_snd, m);
1273
1274		if (m == NULL) {
1275			pool_put(&sc->sc_pool, buf);
1276			return;
1277		}
1278
1279		m_copydata(m, 0, m->m_pkthdr.len, buf);
1280
1281#if NBPFILTER > 0
1282		/*
1283		 * If BPF is listening on this interface, let it see the
1284		 * packet before we commit it to the wire.
1285		 */
1286		bpf_mtap(ifp, m, BPF_D_OUT);
1287#endif
1288
1289		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
1290		KASSERT((pa & ~PAGE_MASK) == (pa & LDC_MTE_RA_MASK));
1291		while (map->lm_slot[map->lm_next].entry != 0) {
1292			map->lm_next++;
1293			map->lm_next &= (map->lm_nentries - 1);
1294		}
1295		map->lm_slot[map->lm_next].entry = (pa & LDC_MTE_RA_MASK);
1296		map->lm_slot[map->lm_next].entry |= LDC_MTE_CPR;
1297		atomic_inc_32(&map->lm_count);
1298
1299		prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1300		sc->sc_vsd[prod].vsd_map_idx = map->lm_next;
1301		sc->sc_vsd[prod].vsd_buf = buf;
1302
1303		bzero(&dm, sizeof(dm));
1304		dm.tag.type = VIO_TYPE_DATA;
1305		dm.tag.stype = VIO_SUBTYPE_INFO;
1306		dm.tag.stype_env = VIO_DESC_DATA;
1307		dm.tag.sid = sc->sc_local_sid;
1308		dm.seq_no = sc->sc_seq_no++;
1309		dm.desc_handle = sc->sc_tx_prod;
1310		dm.nbytes = MAX(m->m_pkthdr.len, 60);
1311		dm.ncookies = 1;
1312		dm.cookie[0].addr =
1313			map->lm_next << PAGE_SHIFT | (pa & PAGE_MASK);
1314		dm.cookie[0].size = 2048;
1315		vnet_sendmsg(sc, &dm, sizeof(dm));
1316
1317		sc->sc_tx_prod++;
1318		sc->sc_tx_prod &= (sc->sc_vd->vd_nentries - 1);
1319
1320		m_freem(m);
1321	}
1322}
1323
1324int
1325vnet_ioctl(struct ifnet *ifp, u_long cmd, void* data)
1326{
1327	struct vnet_softc *sc = ifp->if_softc;
1328	struct ifreq *ifr = (struct ifreq *)data;
1329	int s, error = 0;
1330
1331	s = splnet();
1332
1333	switch (cmd) {
1334
1335		case SIOCSIFADDR:
1336			ifp->if_flags |= IFF_UP;
1337			/* FALLTHROUGH */
1338		case SIOCSIFFLAGS:
1339			if (ifp->if_flags & IFF_UP) {
1340				if ((ifp->if_flags & IFF_RUNNING) == 0)
1341					vnet_init(ifp);
1342			} else {
1343				if (ifp->if_flags & IFF_RUNNING)
1344					vnet_stop(ifp, 0);
1345			}
1346		break;
1347
1348		case SIOCGIFMEDIA:
1349		case SIOCSIFMEDIA:
1350			error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1351			break;
1352
1353		case SIOCADDMULTI:
1354		case SIOCDELMULTI:
1355			/*
1356			 * XXX Removing all multicast addresses and adding
1357			 * most of them back, is somewhat retarded.
1358			 */
1359			vnet_setmulti(sc, 0);
1360			error = ether_ioctl(ifp, cmd, data);
1361			vnet_setmulti(sc, 1);
1362			if (error == ENETRESET)
1363				error = 0;
1364			break;
1365
1366		default:
1367			error = ether_ioctl(ifp, cmd, data);
1368	}
1369
1370	splx(s);
1371
1372	return (error);
1373}
1374
1375void
1376vnet_watchdog(struct ifnet *ifp)
1377{
1378
1379	struct vnet_softc *sc = ifp->if_softc;
1380
1381	printf("%s: watchdog timeout\n", device_xname(sc->sc_dv));
1382}
1383
1384int
1385vnet_media_change(struct ifnet *ifp)
1386{
1387	return (0);
1388}
1389
1390void
1391vnet_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1392{
1393	imr->ifm_active = IFM_ETHER | IFM_AUTO;
1394	imr->ifm_status = IFM_AVALID;
1395	if (ifp->if_link_state == LINK_STATE_UP &&
1396	    ifp->if_flags & IFF_UP)
1397		imr->ifm_status |= IFM_ACTIVE;
1398}
1399
1400void
1401vnet_link_state(struct vnet_softc *sc)
1402{
1403	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1404	int link_state = LINK_STATE_DOWN;
1405
1406	KERNEL_LOCK(1, curlwp);
1407	if (ISSET(sc->sc_vio_state, VIO_RCV_RDX) &&
1408	    ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1409		link_state = LINK_STATE_UP;
1410	if (ifp->if_link_state != link_state) {
1411		if_link_state_change(ifp, link_state);
1412	}
1413	KERNEL_UNLOCK_ONE(curlwp);
1414}
1415
1416void
1417vnet_setmulti(struct vnet_softc *sc, int set)
1418{
1419	struct ethercom *ec = &sc->sc_ethercom;
1420	struct ether_multi *enm;
1421	struct ether_multistep step;
1422	struct vnet_mcast_info mi;
1423	int count = 0;
1424
1425	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
1426	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1427		return;
1428
1429	bzero(&mi, sizeof(mi));
1430	mi.tag.type = VIO_TYPE_CTRL;
1431	mi.tag.stype = VIO_SUBTYPE_INFO;
1432	mi.tag.stype_env = VNET_MCAST_INFO;
1433	mi.tag.sid = sc->sc_local_sid;
1434	mi.set = set ? 1 : 0;
1435	KERNEL_LOCK(1, curlwp);
1436	ETHER_FIRST_MULTI(step, ec, enm);
1437	while (enm != NULL) {
1438		/* XXX What about multicast ranges? */
1439		bcopy(enm->enm_addrlo, mi.mcast_addr[count], ETHER_ADDR_LEN);
1440		ETHER_NEXT_MULTI(step, enm);
1441
1442		count++;
1443		if (count < VNET_NUM_MCAST)
1444			continue;
1445
1446		mi.count = VNET_NUM_MCAST;
1447		vnet_sendmsg(sc, &mi, sizeof(mi));
1448		count = 0;
1449	}
1450
1451	if (count > 0) {
1452		mi.count = count;
1453		vnet_sendmsg(sc, &mi, sizeof(mi));
1454	}
1455	KERNEL_UNLOCK_ONE(curlwp);
1456}
1457
1458
1459int
1460vnet_init(struct ifnet *ifp)
1461{
1462	struct vnet_softc *sc = ifp->if_softc;
1463	struct ldc_conn *lc = &sc->sc_lc;
1464	int err;
1465	vaddr_t va;
1466	paddr_t pa;
1467	sc->sc_lm = ldc_map_alloc(2048);
1468	if (sc->sc_lm == NULL)
1469		return ENOMEM;
1470
1471	va = (vaddr_t)sc->sc_lm->lm_slot;
1472	pa = 0;
1473	if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
1474		panic("pmap_extract failed %lx\n", va);
1475	err = hv_ldc_set_map_table(lc->lc_id, pa, 2048);
1476	if (err != H_EOK) {
1477		printf("hv_ldc_set_map_table %d\n", err);
1478		return EINVAL;
1479	}
1480
1481	sc->sc_vd = vnet_dring_alloc(sc->sc_dmatag, VNET_NUM_SOFT_DESC);
1482	if (sc->sc_vd == NULL)
1483		return ENOMEM;
1484	sc->sc_vsd = malloc(VNET_NUM_SOFT_DESC * sizeof(*sc->sc_vsd), M_DEVBUF,
1485	    M_NOWAIT|M_ZERO);
1486	if (sc->sc_vsd == NULL)
1487		return ENOMEM;
1488
1489	va = (vaddr_t)sc->sc_vd->vd_desc;
1490	pa = 0;
1491	if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
1492		panic("pmap_extract failed %lx\n", va);
1493	sc->sc_lm->lm_slot[0].entry = pa;
1494	sc->sc_lm->lm_slot[0].entry &= LDC_MTE_RA_MASK;
1495	sc->sc_lm->lm_slot[0].entry |= LDC_MTE_CPR | LDC_MTE_CPW;
1496	sc->sc_lm->lm_next = 1;
1497	sc->sc_lm->lm_count = 1;
1498
1499	va = lc->lc_txq->lq_va;
1500	pa = 0;
1501	if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
1502		panic("pmap_extract failed %lx\n", va);
1503	err = hv_ldc_tx_qconf(lc->lc_id, pa, lc->lc_txq->lq_nentries);
1504	if (err != H_EOK)
1505		printf("hv_ldc_tx_qconf %d\n", err);
1506
1507	va = (vaddr_t)lc->lc_rxq->lq_va;
1508	pa = 0;
1509	if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
1510	  panic("pmap_extract failed %lx\n", va);
1511
1512	err = hv_ldc_rx_qconf(lc->lc_id, pa, lc->lc_rxq->lq_nentries);
1513	if (err != H_EOK)
1514		printf("hv_ldc_rx_qconf %d\n", err);
1515
1516	cbus_intr_setenabled(sc->sc_bustag, sc->sc_tx_ino, INTR_ENABLED);
1517	cbus_intr_setenabled(sc->sc_bustag, sc->sc_rx_ino, INTR_ENABLED);
1518
1519	ldc_send_vers(lc);
1520
1521	ifp->if_flags |= IFF_RUNNING;
1522
1523	return 0;
1524}
1525
1526void
1527vnet_stop(struct ifnet *ifp, int disable)
1528
1529{
1530	struct vnet_softc *sc = ifp->if_softc;
1531	struct ldc_conn *lc = &sc->sc_lc;
1532
1533	ifp->if_flags &= ~IFF_RUNNING;
1534	ifp->if_timer = 0;
1535
1536	cbus_intr_setenabled(sc->sc_bustag, sc->sc_tx_ino, INTR_DISABLED);
1537	cbus_intr_setenabled(sc->sc_bustag, sc->sc_rx_ino, INTR_DISABLED);
1538
1539#if 0
1540openbsd XXX
1541	intr_barrier(sc->sc_tx_ih);
1542	intr_barrier(sc->sc_rx_ih);
1543#else
1544	printf("vnet_stop() intr_barrier() not available\n");
1545#endif
1546
1547	hv_ldc_tx_qconf(lc->lc_id, 0, 0);
1548	hv_ldc_rx_qconf(lc->lc_id, 0, 0);
1549	lc->lc_tx_seqid = 0;
1550	lc->lc_state = 0;
1551	lc->lc_tx_state = lc->lc_rx_state = LDC_CHANNEL_DOWN;
1552	vnet_ldc_reset(lc);
1553
1554	free(sc->sc_vsd, M_DEVBUF);
1555
1556	vnet_dring_free(sc->sc_dmatag, sc->sc_vd);
1557
1558	hv_ldc_set_map_table(lc->lc_id, 0, 0);
1559	ldc_map_free(sc->sc_lm);
1560}
1561
1562struct vnet_dring *
1563vnet_dring_alloc(bus_dma_tag_t t, int nentries)
1564{
1565	struct vnet_dring *vd;
1566	bus_size_t size;
1567	vaddr_t va;
1568	int i;
1569
1570	vd = kmem_zalloc(sizeof(struct vnet_dring), KM_SLEEP);
1571	if (vd == NULL)
1572		return NULL;
1573
1574	size = roundup(nentries * sizeof(struct vnet_desc), PAGE_SIZE);
1575
1576	va = (vaddr_t)kmem_zalloc(size, KM_SLEEP);
1577	vd->vd_desc = (struct vnet_desc *)va;
1578	vd->vd_nentries = nentries;
1579	bzero(vd->vd_desc, nentries * sizeof(struct vnet_desc));
1580	for (i = 0; i < vd->vd_nentries; i++)
1581		vd->vd_desc[i].hdr.dstate = VIO_DESC_FREE;
1582	return (vd);
1583
1584	return (NULL);
1585}
1586
1587void
1588vnet_dring_free(bus_dma_tag_t t, struct vnet_dring *vd)
1589{
1590
1591	bus_size_t size;
1592
1593	size = vd->vd_nentries * sizeof(struct vnet_desc);
1594	size = roundup(size, PAGE_SIZE);
1595
1596	kmem_free(vd->vd_desc, size);
1597	kmem_free(vd, size);
1598}
1599
1600