1/*	$OpenBSD: vdsp.c,v 1.48 2021/10/24 17:05:04 mpi Exp $	*/
2/*
3 * Copyright (c) 2009, 2011, 2014 Mark Kettenis
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <sys/param.h>
19#include <sys/conf.h>
20#include <sys/proc.h>
21#include <sys/buf.h>
22#include <sys/device.h>
23#include <sys/disklabel.h>
24#include <sys/fcntl.h>
25#include <sys/lock.h>
26#include <sys/malloc.h>
27#include <sys/mutex.h>
28#include <sys/namei.h>
29#include <sys/systm.h>
30#include <sys/task.h>
31#include <sys/vnode.h>
32#include <sys/dkio.h>
33#include <sys/specdev.h>
34
35#include <machine/autoconf.h>
36#include <machine/conf.h>
37#include <machine/hypervisor.h>
38#include <machine/mdesc.h>
39
40#include <uvm/uvm_extern.h>
41
42#include <scsi/scsi_all.h>
43#include <scsi/scsi_disk.h>
44#include <scsi/scsiconf.h>
45
46#include <isofs/cd9660/iso.h>
47
48#include <dev/sun/disklabel.h>
49
50#include <sparc64/dev/cbusvar.h>
51#include <sparc64/dev/ldcvar.h>
52#include <sparc64/dev/viovar.h>
53
54#ifdef VDSP_DEBUG
55#define DPRINTF(x)	printf x
56#else
57#define DPRINTF(x)
58#endif
59
60#define VDSK_TX_ENTRIES			64
61#define VDSK_RX_ENTRIES			64
62
63#define VDSK_MAX_DESCRIPTORS		1024
64#define VDSK_MAX_DESCRIPTOR_SIZE	512
65
66struct vd_attr_info {
67	struct vio_msg_tag	tag;
68	uint8_t			xfer_mode;
69	uint8_t			vd_type;
70	uint8_t			vd_mtype;
71	uint8_t			_reserved1;
72	uint32_t		vdisk_block_size;
73	uint64_t		operations;
74	uint64_t		vdisk_size;
75	uint64_t		max_xfer_sz;
76	uint64_t		_reserved2[2];
77};
78
79#define VD_DISK_TYPE_SLICE	0x01
80#define VD_DISK_TYPE_DISK	0x02
81
82#define VD_MEDIA_TYPE_FIXED	0x01
83#define VD_MEDIA_TYPE_CD	0x02
84#define VD_MEDIA_TYPE_DVD	0x03
85
86/* vDisk version 1.0. */
87#define VD_OP_BREAD		0x01
88#define VD_OP_BWRITE		0x02
89#define VD_OP_FLUSH		0x03
90#define VD_OP_GET_WCE		0x04
91#define VD_OP_SET_WCE		0x05
92#define VD_OP_GET_VTOC		0x06
93#define VD_OP_SET_VTOC		0x07
94#define VD_OP_GET_DISKGEOM	0x08
95#define VD_OP_SET_DISKGEOM	0x09
96#define VD_OP_GET_DEVID		0x0b
97#define VD_OP_GET_EFI		0x0c
98#define VD_OP_SET_EFI		0x0d
99
100/* vDisk version 1.1 */
101#define VD_OP_SCSICMD		0x0a
102#define VD_OP_RESET		0x0e
103#define VD_OP_GET_ACCESS	0x0f
104#define VD_OP_SET_ACCESS	0x10
105#define VD_OP_GET_CAPACITY	0x11
106
107/* Sun standard fields. */
108struct sun_vtoc_preamble {
109	char	sl_text[128];
110	u_int	sl_version;	/* label version */
111	char	sl_volume[8];	/* short volume name */
112	u_short	sl_nparts;	/* partition count */
113
114	struct sun_partinfo sl_part[8];
115
116	u_int	sl_bootinfo[3];
117	u_int	sl_sanity;
118};
119
120struct vd_vtoc_part {
121	uint16_t	id_tag;
122	uint16_t	perm;
123	uint32_t	reserved;
124	uint64_t	start;
125	uint64_t	nblocks;
126
127};
128struct vd_vtoc {
129	uint8_t		volume_name[8];
130	uint16_t	sector_size;
131	uint16_t	num_partitions;
132	uint32_t	reserved;
133	uint8_t		ascii_label[128];
134	struct vd_vtoc_part partition[8];
135};
136
137struct vd_diskgeom {
138	uint16_t	ncyl;
139	uint16_t	acyl;
140	uint16_t	bcyl;
141	uint16_t	nhead;
142	uint16_t	nsect;
143	uint16_t	intrlv;
144	uint16_t	apc;
145	uint16_t	rpm;
146	uint16_t	pcyl;
147	uint16_t	write_reinstruct;
148	uint16_t	read_reinstruct;
149};
150
151struct vd_desc {
152	struct vio_dring_hdr	hdr;
153	uint64_t		req_id;
154	uint8_t			operation;
155	uint8_t			slice;
156	uint16_t		_reserved1;
157	uint32_t		status;
158	uint64_t		offset;
159	uint64_t		size;
160	uint32_t		ncookies;
161	uint32_t		_reserved2;
162	struct ldc_cookie	cookie[1];
163};
164
165#define VD_SLICE_NONE		0xff
166
167struct vdsk_desc_msg {
168	struct vio_msg_tag	tag;
169	uint64_t		seq_no;
170	uint64_t		desc_handle;
171	uint64_t		req_id;
172	uint8_t			operation;
173	uint8_t			slice;
174	uint16_t		_reserved1;
175	uint32_t		status;
176	uint64_t		offset;
177	uint64_t		size;
178	uint32_t		ncookies;
179	uint32_t		_reserved2;
180	struct ldc_cookie	cookie[1];
181};
182
183/*
184 * We support vDisk 1.1.
185 */
186#define VDSK_MAJOR	1
187#define VDSK_MINOR	1
188
189/*
190 * But we only support a subset of the defined commands.
191 */
192#define VD_OP_MASK \
193    ((1 << VD_OP_BREAD) | (1 << VD_OP_BWRITE) | (1 << VD_OP_FLUSH) | \
194     (1 << VD_OP_GET_WCE) | (1 << VD_OP_SET_WCE) | \
195     (1 << VD_OP_GET_VTOC) | (1 << VD_OP_SET_VTOC) | \
196     (1 << VD_OP_GET_DISKGEOM))
197
198struct vdsp_softc {
199	struct device	sc_dv;
200	int		sc_idx;
201	bus_space_tag_t	sc_bustag;
202	bus_dma_tag_t	sc_dmatag;
203
204	uint64_t	sc_tx_ino;
205	uint64_t	sc_rx_ino;
206	void		*sc_tx_ih;
207	void		*sc_rx_ih;
208
209	struct ldc_conn	sc_lc;
210
211	uint16_t	sc_vio_state;
212#define VIO_SND_VER_INFO	0x0001
213#define VIO_ACK_VER_INFO	0x0002
214#define VIO_RCV_VER_INFO	0x0004
215#define VIO_SND_ATTR_INFO	0x0008
216#define VIO_ACK_ATTR_INFO	0x0010
217#define VIO_RCV_ATTR_INFO	0x0020
218#define VIO_SND_DRING_REG	0x0040
219#define VIO_ACK_DRING_REG	0x0080
220#define VIO_RCV_DRING_REG	0x0100
221#define VIO_SND_RDX		0x0200
222#define VIO_ACK_RDX		0x0400
223#define VIO_RCV_RDX		0x0800
224
225	uint16_t	sc_major;
226	uint16_t	sc_minor;
227
228	uint8_t		sc_xfer_mode;
229
230	uint32_t	sc_local_sid;
231	uint64_t	sc_seq_no;
232
233	uint64_t	sc_dring_ident;
234	uint32_t	sc_num_descriptors;
235	uint32_t	sc_descriptor_size;
236	struct ldc_cookie sc_dring_cookie;
237
238	struct task	sc_open_task;
239	struct task	sc_alloc_task;
240	struct task	sc_close_task;
241
242	struct mutex	sc_desc_mtx;
243	struct vdsk_desc_msg *sc_desc_msg[VDSK_RX_ENTRIES];
244	int		sc_desc_head;
245	int		sc_desc_tail;
246
247	struct task	sc_read_task;
248
249	caddr_t		sc_vd;
250	struct task	sc_vd_task;
251	struct vd_desc	**sc_vd_ring;
252	u_int		sc_vd_prod;
253	u_int		sc_vd_cons;
254
255	uint32_t	sc_vdisk_block_size;
256	uint64_t	sc_vdisk_size;
257
258	struct vnode	*sc_vp;
259
260	struct sun_disklabel *sc_label;
261	uint16_t	sc_ncyl;
262	uint16_t	sc_acyl;
263	uint16_t	sc_nhead;
264	uint16_t	sc_nsect;
265};
266
267int	vdsp_match(struct device *, void *, void *);
268void	vdsp_attach(struct device *, struct device *, void *);
269
270const struct cfattach vdsp_ca = {
271	sizeof(struct vdsp_softc), vdsp_match, vdsp_attach
272};
273
274struct cfdriver vdsp_cd = {
275	NULL, "vdsp", DV_DULL
276};
277
278int	vdsp_tx_intr(void *);
279int	vdsp_rx_intr(void *);
280
281void	vdsp_rx_data(struct ldc_conn *, struct ldc_pkt *);
282void	vdsp_rx_vio_ctrl(struct vdsp_softc *, struct vio_msg *);
283void	vdsp_rx_vio_ver_info(struct vdsp_softc *, struct vio_msg_tag *);
284void	vdsp_rx_vio_attr_info(struct vdsp_softc *, struct vio_msg_tag *);
285void	vdsp_rx_vio_dring_reg(struct vdsp_softc *, struct vio_msg_tag *);
286void	vdsp_rx_vio_rdx(struct vdsp_softc *sc, struct vio_msg_tag *);
287void	vdsp_rx_vio_data(struct vdsp_softc *sc, struct vio_msg *);
288void	vdsp_rx_vio_dring_data(struct vdsp_softc *sc,
289	    struct vio_msg_tag *);
290void	vdsp_rx_vio_desc_data(struct vdsp_softc *sc, struct vio_msg_tag *);
291
292void	vdsp_ldc_reset(struct ldc_conn *);
293void	vdsp_ldc_start(struct ldc_conn *);
294
295void	vdsp_sendmsg(struct vdsp_softc *, void *, size_t, int dowait);
296
297void	vdsp_open(void *);
298void	vdsp_close(void *);
299void	vdsp_alloc(void *);
300void	vdsp_readlabel(struct vdsp_softc *);
301int	vdsp_writelabel(struct vdsp_softc *);
302int	vdsp_is_iso(struct vdsp_softc *);
303void	vdsp_read(void *);
304void	vdsp_read_desc(struct vdsp_softc *, struct vdsk_desc_msg *);
305void	vdsp_vd_task(void *);
306void	vdsp_read_dring(void *, void *);
307void	vdsp_write_dring(void *, void *);
308void	vdsp_flush_dring(void *, void *);
309void	vdsp_get_vtoc(void *, void *);
310void	vdsp_set_vtoc(void *, void *);
311void	vdsp_get_diskgeom(void *, void *);
312void	vdsp_unimp(void *, void *);
313
314void	vdsp_ack_desc(struct vdsp_softc *, struct vd_desc *);
315
316int
317vdsp_match(struct device *parent, void *match, void *aux)
318{
319	struct cbus_attach_args *ca = aux;
320
321	if (strcmp(ca->ca_name, "vds-port") == 0)
322		return (1);
323
324	return (0);
325}
326
327void
328vdsp_attach(struct device *parent, struct device *self, void *aux)
329{
330	struct vdsp_softc *sc = (struct vdsp_softc *)self;
331	struct cbus_attach_args *ca = aux;
332	struct ldc_conn *lc;
333
334	sc->sc_idx = ca->ca_idx;
335	sc->sc_bustag = ca->ca_bustag;
336	sc->sc_dmatag = ca->ca_dmatag;
337	sc->sc_tx_ino = ca->ca_tx_ino;
338	sc->sc_rx_ino = ca->ca_rx_ino;
339
340	printf(": ivec 0x%llx, 0x%llx", sc->sc_tx_ino, sc->sc_rx_ino);
341
342	mtx_init(&sc->sc_desc_mtx, IPL_BIO);
343
344	/*
345	 * Un-configure queues before registering interrupt handlers,
346	 * such that we dont get any stale LDC packets or events.
347	 */
348	hv_ldc_tx_qconf(ca->ca_id, 0, 0);
349	hv_ldc_rx_qconf(ca->ca_id, 0, 0);
350
351	sc->sc_tx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_tx_ino,
352	    IPL_BIO, BUS_INTR_ESTABLISH_MPSAFE, vdsp_tx_intr, sc,
353	    sc->sc_dv.dv_xname);
354	sc->sc_rx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_rx_ino,
355	    IPL_BIO, BUS_INTR_ESTABLISH_MPSAFE, vdsp_rx_intr, sc,
356	    sc->sc_dv.dv_xname);
357	if (sc->sc_tx_ih == NULL || sc->sc_rx_ih == NULL) {
358		printf(", can't establish interrupt\n");
359		return;
360	}
361
362	lc = &sc->sc_lc;
363	lc->lc_id = ca->ca_id;
364	lc->lc_sc = sc;
365	lc->lc_reset = vdsp_ldc_reset;
366	lc->lc_start = vdsp_ldc_start;
367	lc->lc_rx_data = vdsp_rx_data;
368
369	lc->lc_txq = ldc_queue_alloc(sc->sc_dmatag, VDSK_TX_ENTRIES);
370	if (lc->lc_txq == NULL) {
371		printf(", can't allocate tx queue\n");
372		return;
373	}
374
375	lc->lc_rxq = ldc_queue_alloc(sc->sc_dmatag, VDSK_RX_ENTRIES);
376	if (lc->lc_rxq == NULL) {
377		printf(", can't allocate rx queue\n");
378		goto free_txqueue;
379	}
380
381	task_set(&sc->sc_open_task, vdsp_open, sc);
382	task_set(&sc->sc_alloc_task, vdsp_alloc, sc);
383	task_set(&sc->sc_close_task, vdsp_close, sc);
384	task_set(&sc->sc_read_task, vdsp_read, sc);
385
386	printf("\n");
387
388	return;
389
390#if 0
391free_rxqueue:
392	ldc_queue_free(sc->sc_dmatag, lc->lc_rxq);
393#endif
394free_txqueue:
395	ldc_queue_free(sc->sc_dmatag, lc->lc_txq);
396}
397
398int
399vdsp_tx_intr(void *arg)
400{
401	struct vdsp_softc *sc = arg;
402	struct ldc_conn *lc = &sc->sc_lc;
403	uint64_t tx_head, tx_tail, tx_state;
404	int err;
405
406	err = hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
407	if (err != H_EOK) {
408		printf("hv_ldc_rx_get_state %d\n", err);
409		return (0);
410	}
411
412	if (tx_state != lc->lc_tx_state) {
413		switch (tx_state) {
414		case LDC_CHANNEL_DOWN:
415			DPRINTF(("%s: Tx link down\n", __func__));
416			break;
417		case LDC_CHANNEL_UP:
418			DPRINTF(("%s: Tx link up\n", __func__));
419			break;
420		case LDC_CHANNEL_RESET:
421			DPRINTF(("%s: Tx link reset\n", __func__));
422			break;
423		}
424		lc->lc_tx_state = tx_state;
425	}
426
427	wakeup(lc->lc_txq);
428	return (1);
429}
430
431int
432vdsp_rx_intr(void *arg)
433{
434	struct vdsp_softc *sc = arg;
435	struct ldc_conn *lc = &sc->sc_lc;
436	uint64_t rx_head, rx_tail, rx_state;
437	struct ldc_pkt *lp;
438	int err;
439
440	err = hv_ldc_rx_get_state(lc->lc_id, &rx_head, &rx_tail, &rx_state);
441	if (err == H_EINVAL)
442		return (0);
443	if (err != H_EOK) {
444		printf("hv_ldc_rx_get_state %d\n", err);
445		return (0);
446	}
447
448	if (rx_state != lc->lc_rx_state) {
449		switch (rx_state) {
450		case LDC_CHANNEL_DOWN:
451			DPRINTF(("%s: Rx link down\n", __func__));
452			lc->lc_tx_seqid = 0;
453			lc->lc_state = 0;
454			lc->lc_reset(lc);
455			break;
456		case LDC_CHANNEL_UP:
457			DPRINTF(("%s: Rx link up\n", __func__));
458			break;
459		case LDC_CHANNEL_RESET:
460			DPRINTF(("%s: Rx link reset\n", __func__));
461			lc->lc_tx_seqid = 0;
462			lc->lc_state = 0;
463			lc->lc_reset(lc);
464			break;
465		}
466		lc->lc_rx_state = rx_state;
467		return (1);
468	}
469
470	if (lc->lc_rx_state == LDC_CHANNEL_DOWN)
471		return (1);
472
473	lp = (struct ldc_pkt *)(lc->lc_rxq->lq_va + rx_head);
474	switch (lp->type) {
475	case LDC_CTRL:
476		ldc_rx_ctrl(lc, lp);
477		break;
478
479	case LDC_DATA:
480		ldc_rx_data(lc, lp);
481		break;
482
483	default:
484		DPRINTF(("0x%02x/0x%02x/0x%02x\n", lp->type, lp->stype,
485		    lp->ctrl));
486		ldc_reset(lc);
487		break;
488	}
489
490	rx_head += sizeof(*lp);
491	rx_head &= ((lc->lc_rxq->lq_nentries * sizeof(*lp)) - 1);
492	err = hv_ldc_rx_set_qhead(lc->lc_id, rx_head);
493	if (err != H_EOK)
494		printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
495
496	return (1);
497}
498
499void
500vdsp_rx_data(struct ldc_conn *lc, struct ldc_pkt *lp)
501{
502	struct vio_msg *vm = (struct vio_msg *)lp;
503
504	switch (vm->type) {
505	case VIO_TYPE_CTRL:
506		if ((lp->env & LDC_FRAG_START) == 0 &&
507		    (lp->env & LDC_FRAG_STOP) == 0)
508			return;
509		vdsp_rx_vio_ctrl(lc->lc_sc, vm);
510		break;
511
512	case VIO_TYPE_DATA:
513		if((lp->env & LDC_FRAG_START) == 0)
514			return;
515		vdsp_rx_vio_data(lc->lc_sc, vm);
516		break;
517
518	default:
519		DPRINTF(("Unhandled packet type 0x%02x\n", vm->type));
520		ldc_reset(lc);
521		break;
522	}
523}
524
525void
526vdsp_rx_vio_ctrl(struct vdsp_softc *sc, struct vio_msg *vm)
527{
528	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
529
530	switch (tag->stype_env) {
531	case VIO_VER_INFO:
532		vdsp_rx_vio_ver_info(sc, tag);
533		break;
534	case VIO_ATTR_INFO:
535		vdsp_rx_vio_attr_info(sc, tag);
536		break;
537	case VIO_DRING_REG:
538		vdsp_rx_vio_dring_reg(sc, tag);
539		break;
540	case VIO_RDX:
541		vdsp_rx_vio_rdx(sc, tag);
542		break;
543	default:
544		DPRINTF(("CTRL/0x%02x/0x%04x\n", tag->stype, tag->stype_env));
545		break;
546	}
547}
548
549void
550vdsp_rx_vio_ver_info(struct vdsp_softc *sc, struct vio_msg_tag *tag)
551{
552	struct vio_ver_info *vi = (struct vio_ver_info *)tag;
553
554	switch (vi->tag.stype) {
555	case VIO_SUBTYPE_INFO:
556		DPRINTF(("CTRL/INFO/VER_INFO\n"));
557
558		/* Make sure we're talking to a virtual disk. */
559		if (vi->dev_class != VDEV_DISK) {
560			/* Huh, we're not talking to a disk device? */
561			printf("%s: peer is not a disk device\n",
562			    sc->sc_dv.dv_xname);
563			vi->tag.stype = VIO_SUBTYPE_NACK;
564			vi->major = 0;
565			vdsp_sendmsg(sc, vi, sizeof(*vi), 0);
566			return;
567		}
568
569		if (vi->major != VDSK_MAJOR) {
570			vi->tag.stype = VIO_SUBTYPE_NACK;
571			vi->major = VDSK_MAJOR;
572			vi->minor = VDSK_MINOR;
573			vdsp_sendmsg(sc, vi, sizeof(*vi), 0);
574			return;
575		}
576
577		sc->sc_major = vi->major;
578		sc->sc_minor = vi->minor;
579		sc->sc_local_sid = vi->tag.sid;
580
581		vi->tag.stype = VIO_SUBTYPE_ACK;
582		if (vi->minor > VDSK_MINOR)
583			vi->minor = VDSK_MINOR;
584		vi->dev_class = VDEV_DISK_SERVER;
585		vdsp_sendmsg(sc, vi, sizeof(*vi), 0);
586		sc->sc_vio_state |= VIO_RCV_VER_INFO;
587		break;
588
589	case VIO_SUBTYPE_ACK:
590		DPRINTF(("CTRL/ACK/VER_INFO\n"));
591		break;
592
593	default:
594		DPRINTF(("CTRL/0x%02x/VER_INFO\n", vi->tag.stype));
595		break;
596	}
597}
598
599void
600vdsp_rx_vio_attr_info(struct vdsp_softc *sc, struct vio_msg_tag *tag)
601{
602	struct vd_attr_info *ai = (struct vd_attr_info *)tag;
603
604	switch (ai->tag.stype) {
605	case VIO_SUBTYPE_INFO:
606		DPRINTF(("CTRL/INFO/ATTR_INFO\n"));
607
608		if (ai->xfer_mode != VIO_DESC_MODE &&
609		    ai->xfer_mode != VIO_DRING_MODE) {
610			printf("%s: peer uses unsupported xfer mode 0x%02x\n",
611			    sc->sc_dv.dv_xname, ai->xfer_mode);
612			ai->tag.stype = VIO_SUBTYPE_NACK;
613			vdsp_sendmsg(sc, ai, sizeof(*ai), 0);
614			return;
615		}
616		sc->sc_xfer_mode = ai->xfer_mode;
617		sc->sc_vio_state |= VIO_RCV_ATTR_INFO;
618
619		task_add(systq, &sc->sc_open_task);
620		break;
621
622	case VIO_SUBTYPE_ACK:
623		DPRINTF(("CTRL/ACK/ATTR_INFO\n"));
624		break;
625
626	default:
627		DPRINTF(("CTRL/0x%02x/ATTR_INFO\n", ai->tag.stype));
628		break;
629	}
630}
631
632void
633vdsp_rx_vio_dring_reg(struct vdsp_softc *sc, struct vio_msg_tag *tag)
634{
635	struct vio_dring_reg *dr = (struct vio_dring_reg *)tag;
636
637	switch (dr->tag.stype) {
638	case VIO_SUBTYPE_INFO:
639		DPRINTF(("CTRL/INFO/DRING_REG\n"));
640
641		if (dr->num_descriptors > VDSK_MAX_DESCRIPTORS ||
642		    dr->descriptor_size > VDSK_MAX_DESCRIPTOR_SIZE ||
643		    dr->ncookies > 1) {
644			dr->tag.stype = VIO_SUBTYPE_NACK;
645			vdsp_sendmsg(sc, dr, sizeof(*dr), 0);
646			return;
647		}
648		sc->sc_num_descriptors = dr->num_descriptors;
649		sc->sc_descriptor_size = dr->descriptor_size;
650		sc->sc_dring_cookie = dr->cookie[0];
651		sc->sc_vio_state |= VIO_RCV_DRING_REG;
652
653		task_add(systq, &sc->sc_alloc_task);
654		break;
655
656	case VIO_SUBTYPE_ACK:
657		DPRINTF(("CTRL/ACK/DRING_REG\n"));
658		break;
659
660	default:
661		DPRINTF(("CTRL/0x%02x/DRING_REG\n", dr->tag.stype));
662		break;
663	}
664}
665
666void
667vdsp_rx_vio_rdx(struct vdsp_softc *sc, struct vio_msg_tag *tag)
668{
669	switch(tag->stype) {
670	case VIO_SUBTYPE_INFO:
671		DPRINTF(("CTRL/INFO/RDX\n"));
672
673		tag->stype = VIO_SUBTYPE_ACK;
674		tag->sid = sc->sc_local_sid;
675		vdsp_sendmsg(sc, tag, sizeof(*tag), 0);
676		sc->sc_vio_state |= VIO_RCV_RDX;
677		break;
678
679	case VIO_SUBTYPE_ACK:
680		DPRINTF(("CTRL/ACK/RDX\n"));
681		break;
682
683	default:
684		DPRINTF(("CTRL/0x%02x/RDX (VIO)\n", tag->stype));
685		break;
686	}
687}
688
689void
690vdsp_rx_vio_data(struct vdsp_softc *sc, struct vio_msg *vm)
691{
692	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
693
694	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX)) {
695		DPRINTF(("Spurious DATA/0x%02x/0x%04x\n", tag->stype,
696		    tag->stype_env));
697		return;
698	}
699
700	switch(tag->stype_env) {
701	case VIO_DESC_DATA:
702		vdsp_rx_vio_desc_data(sc, tag);
703		break;
704
705	case VIO_DRING_DATA:
706		vdsp_rx_vio_dring_data(sc, tag);
707		break;
708
709	default:
710		DPRINTF(("DATA/0x%02x/0x%04x\n", tag->stype, tag->stype_env));
711		break;
712	}
713}
714
715void
716vdsp_rx_vio_dring_data(struct vdsp_softc *sc, struct vio_msg_tag *tag)
717{
718	struct vio_dring_msg *dm = (struct vio_dring_msg *)tag;
719	struct vd_desc *vd;
720	vaddr_t va;
721	paddr_t pa;
722	uint64_t size, off;
723	psize_t nbytes;
724	int err;
725
726	switch(tag->stype) {
727	case VIO_SUBTYPE_INFO:
728		DPRINTF(("DATA/INFO/DRING_DATA\n"));
729
730		if (dm->dring_ident != sc->sc_dring_ident ||
731		    dm->start_idx >= sc->sc_num_descriptors) {
732			dm->tag.stype = VIO_SUBTYPE_NACK;
733			vdsp_sendmsg(sc, dm, sizeof(*dm), 0);
734			return;
735		}
736
737		off = dm->start_idx * sc->sc_descriptor_size;
738		vd = (struct vd_desc *)(sc->sc_vd + off);
739		va = (vaddr_t)vd;
740		size = sc->sc_descriptor_size;
741		while (size > 0) {
742			pmap_extract(pmap_kernel(), va, &pa);
743			nbytes = MIN(size, PAGE_SIZE - (off & PAGE_MASK));
744			err = hv_ldc_copy(sc->sc_lc.lc_id, LDC_COPY_IN,
745			    sc->sc_dring_cookie.addr + off, pa,
746			    nbytes, &nbytes);
747			if (err != H_EOK) {
748				printf("%s: hv_ldc_copy %d\n", __func__, err);
749				return;
750			}
751			va += nbytes;
752			size -= nbytes;
753			off += nbytes;
754		}
755
756		sc->sc_vd_ring[sc->sc_vd_prod % sc->sc_num_descriptors] = vd;
757		membar_producer();
758		sc->sc_vd_prod++;
759		task_add(systq, &sc->sc_vd_task);
760
761		break;
762
763	case VIO_SUBTYPE_ACK:
764		DPRINTF(("DATA/ACK/DRING_DATA\n"));
765		break;
766
767	case VIO_SUBTYPE_NACK:
768		DPRINTF(("DATA/NACK/DRING_DATA\n"));
769		break;
770
771	default:
772		DPRINTF(("DATA/0x%02x/DRING_DATA\n", tag->stype));
773		break;
774	}
775}
776
777void
778vdsp_vd_task(void *xsc)
779{
780	struct vdsp_softc *sc = xsc;
781	struct vd_desc *vd;
782
783	while (sc->sc_vd_cons != sc->sc_vd_prod) {
784		membar_consumer();
785		vd = sc->sc_vd_ring[sc->sc_vd_cons++ % sc->sc_num_descriptors];
786
787		DPRINTF(("%s: operation %x\n", sc->sc_dv.dv_xname,
788		    vd->operation));
789		switch (vd->operation) {
790		case VD_OP_BREAD:
791			vdsp_read_dring(sc, vd);
792			break;
793		case VD_OP_BWRITE:
794			vdsp_write_dring(sc, vd);
795			break;
796		case VD_OP_FLUSH:
797			vdsp_flush_dring(sc, vd);
798			break;
799		case VD_OP_GET_VTOC:
800			vdsp_get_vtoc(sc, vd);
801			break;
802		case VD_OP_SET_VTOC:
803			vdsp_set_vtoc(sc, vd);
804			break;
805		case VD_OP_GET_DISKGEOM:
806			vdsp_get_diskgeom(sc, vd);
807			break;
808		case VD_OP_GET_WCE:
809		case VD_OP_SET_WCE:
810		case VD_OP_GET_DEVID:
811			/*
812			 * Solaris issues VD_OP_GET_DEVID despite the
813			 * fact that we don't advertise it.  It seems
814			 * to be able to handle failure just fine, so
815			 * we silently ignore it.
816			 */
817			vdsp_unimp(sc, vd);
818			break;
819		default:
820			printf("%s: unsupported operation 0x%02x\n",
821			    sc->sc_dv.dv_xname, vd->operation);
822			vdsp_unimp(sc, vd);
823			break;
824		}
825	}
826}
827
828void
829vdsp_rx_vio_desc_data(struct vdsp_softc *sc, struct vio_msg_tag *tag)
830{
831	struct vdsk_desc_msg *dm = (struct vdsk_desc_msg *)tag;
832
833	switch(tag->stype) {
834	case VIO_SUBTYPE_INFO:
835		DPRINTF(("DATA/INFO/DESC_DATA\n"));
836
837		switch (dm->operation) {
838		case VD_OP_BREAD:
839			mtx_enter(&sc->sc_desc_mtx);
840			sc->sc_desc_msg[sc->sc_desc_head++] = dm;
841			sc->sc_desc_head &= (VDSK_RX_ENTRIES - 1);
842			KASSERT(sc->sc_desc_head != sc->sc_desc_tail);
843			mtx_leave(&sc->sc_desc_mtx);
844			task_add(systq, &sc->sc_read_task);
845			break;
846		default:
847			printf("%s: unsupported operation 0x%02x\n",
848			    sc->sc_dv.dv_xname, dm->operation);
849			break;
850		}
851		break;
852
853	case VIO_SUBTYPE_ACK:
854		DPRINTF(("DATA/ACK/DESC_DATA\n"));
855		break;
856
857	case VIO_SUBTYPE_NACK:
858		DPRINTF(("DATA/NACK/DESC_DATA\n"));
859		break;
860
861	default:
862		DPRINTF(("DATA/0x%02x/DESC_DATA\n", tag->stype));
863		break;
864	}
865}
866
867void
868vdsp_ldc_reset(struct ldc_conn *lc)
869{
870	struct vdsp_softc *sc = lc->lc_sc;
871
872	sc->sc_vio_state = 0;
873	task_add(systq, &sc->sc_close_task);
874}
875
876void
877vdsp_ldc_start(struct ldc_conn *lc)
878{
879	/* The vDisk client is supposed to initiate the handshake. */
880}
881
882void
883vdsp_sendmsg(struct vdsp_softc *sc, void *msg, size_t len, int dowait)
884{
885	struct ldc_conn *lc = &sc->sc_lc;
886	int err;
887
888	do {
889		err = ldc_send_unreliable(lc, msg, len);
890		if (dowait && err == EWOULDBLOCK) {
891			/*
892			 * Seems like the hypervisor doesn't actually
893			 * generate interrupts for transmit queues, so
894			 * we specify a timeout such that we don't
895			 * block forever.
896			 */
897			err = tsleep_nsec(lc->lc_txq, PWAIT, "vdsp",
898			    MSEC_TO_NSEC(10));
899		}
900	} while (dowait && err == EWOULDBLOCK);
901}
902
903void
904vdsp_open(void *arg1)
905{
906	struct vdsp_softc *sc = arg1;
907	struct proc *p = curproc;
908	struct vd_attr_info ai;
909
910	if (sc->sc_vp == NULL) {
911		struct nameidata nd;
912		struct vattr va;
913		struct partinfo pi;
914		const char *name;
915		dev_t dev;
916		int error;
917
918		name = mdesc_get_prop_str(sc->sc_idx, "vds-block-device");
919		if (name == NULL)
920			return;
921
922		NDINIT(&nd, 0, 0, UIO_SYSSPACE, name, p);
923		error = vn_open(&nd, FREAD | FWRITE, 0);
924		if (error) {
925			printf("VOP_OPEN: %s, %d\n", name, error);
926			return;
927		}
928
929		if (nd.ni_vp->v_type == VBLK) {
930			dev = nd.ni_vp->v_rdev;
931			error = (*bdevsw[major(dev)].d_ioctl)(dev,
932			    DIOCGPART, (caddr_t)&pi, FREAD, curproc);
933			if (error)
934				printf("DIOCGPART: %s, %d\n", name, error);
935			sc->sc_vdisk_block_size = pi.disklab->d_secsize;
936			sc->sc_vdisk_size = DL_GETPSIZE(pi.part);
937		} else {
938			error = VOP_GETATTR(nd.ni_vp, &va, p->p_ucred, p);
939			if (error)
940				printf("VOP_GETATTR: %s, %d\n", name, error);
941			sc->sc_vdisk_block_size = DEV_BSIZE;
942			sc->sc_vdisk_size = va.va_size / DEV_BSIZE;
943		}
944
945		VOP_UNLOCK(nd.ni_vp);
946		sc->sc_vp = nd.ni_vp;
947
948		vdsp_readlabel(sc);
949	}
950
951	bzero(&ai, sizeof(ai));
952	ai.tag.type = VIO_TYPE_CTRL;
953	ai.tag.stype = VIO_SUBTYPE_ACK;
954	ai.tag.stype_env = VIO_ATTR_INFO;
955	ai.tag.sid = sc->sc_local_sid;
956	ai.xfer_mode = sc->sc_xfer_mode;
957	ai.vd_type = VD_DISK_TYPE_DISK;
958	if (sc->sc_major > 1 || sc->sc_minor >= 1) {
959		if (vdsp_is_iso(sc))
960			ai.vd_mtype = VD_MEDIA_TYPE_CD;
961		else
962			ai.vd_mtype = VD_MEDIA_TYPE_FIXED;
963	}
964	ai.vdisk_block_size = sc->sc_vdisk_block_size;
965	ai.operations = VD_OP_MASK;
966	ai.vdisk_size = sc->sc_vdisk_size;
967	ai.max_xfer_sz = MAXPHYS / sc->sc_vdisk_block_size;
968	vdsp_sendmsg(sc, &ai, sizeof(ai), 1);
969}
970
971void
972vdsp_close(void *arg1)
973{
974	struct vdsp_softc *sc = arg1;
975	struct proc *p = curproc;
976
977	sc->sc_seq_no = 0;
978
979	free(sc->sc_vd, M_DEVBUF, 0);
980	sc->sc_vd = NULL;
981	free(sc->sc_vd_ring, M_DEVBUF,
982	     sc->sc_num_descriptors * sizeof(*sc->sc_vd_ring));
983	sc->sc_vd_ring = NULL;
984	free(sc->sc_label, M_DEVBUF, 0);
985	sc->sc_label = NULL;
986	if (sc->sc_vp) {
987		vn_close(sc->sc_vp, FREAD | FWRITE, p->p_ucred, p);
988		sc->sc_vp = NULL;
989	}
990}
991
992void
993vdsp_readlabel(struct vdsp_softc *sc)
994{
995	struct proc *p = curproc;
996	struct iovec iov;
997	struct uio uio;
998	int err;
999
1000	if (sc->sc_vp == NULL)
1001		return;
1002
1003	sc->sc_label = malloc(sizeof(*sc->sc_label), M_DEVBUF, M_WAITOK);
1004
1005	iov.iov_base = sc->sc_label;
1006	iov.iov_len = sizeof(*sc->sc_label);
1007	uio.uio_iov = &iov;
1008	uio.uio_iovcnt = 1;
1009	uio.uio_offset = 0;
1010	uio.uio_resid = sizeof(*sc->sc_label);
1011	uio.uio_segflg = UIO_SYSSPACE;
1012	uio.uio_rw = UIO_READ;
1013	uio.uio_procp = p;
1014
1015	vn_lock(sc->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1016	err = VOP_READ(sc->sc_vp, &uio, 0, p->p_ucred);
1017	VOP_UNLOCK(sc->sc_vp);
1018	if (err) {
1019		free(sc->sc_label, M_DEVBUF, 0);
1020		sc->sc_label = NULL;
1021	}
1022}
1023
1024int
1025vdsp_writelabel(struct vdsp_softc *sc)
1026{
1027	struct proc *p = curproc;
1028	struct iovec iov;
1029	struct uio uio;
1030	int err;
1031
1032	if (sc->sc_vp == NULL || sc->sc_label == NULL)
1033		return (EINVAL);
1034
1035	iov.iov_base = sc->sc_label;
1036	iov.iov_len = sizeof(*sc->sc_label);
1037	uio.uio_iov = &iov;
1038	uio.uio_iovcnt = 1;
1039	uio.uio_offset = 0;
1040	uio.uio_resid = sizeof(*sc->sc_label);
1041	uio.uio_segflg = UIO_SYSSPACE;
1042	uio.uio_rw = UIO_WRITE;
1043	uio.uio_procp = p;
1044
1045	vn_lock(sc->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1046	err = VOP_WRITE(sc->sc_vp, &uio, 0, p->p_ucred);
1047	VOP_UNLOCK(sc->sc_vp);
1048
1049	return (err);
1050}
1051
1052int
1053vdsp_is_iso(struct vdsp_softc *sc)
1054{
1055	struct proc *p = curproc;
1056	struct iovec iov;
1057	struct uio uio;
1058	struct iso_volume_descriptor *vdp;
1059	int err;
1060
1061	if (sc->sc_vp == NULL)
1062		return (0);
1063
1064	vdp = malloc(sizeof(*vdp), M_DEVBUF, M_WAITOK);
1065
1066	iov.iov_base = vdp;
1067	iov.iov_len = sizeof(*vdp);
1068	uio.uio_iov = &iov;
1069	uio.uio_iovcnt = 1;
1070	uio.uio_offset = 16 * ISO_DEFAULT_BLOCK_SIZE;
1071	uio.uio_resid = sizeof(*vdp);
1072	uio.uio_segflg = UIO_SYSSPACE;
1073	uio.uio_rw = UIO_READ;
1074	uio.uio_procp = p;
1075
1076	vn_lock(sc->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1077	err = VOP_READ(sc->sc_vp, &uio, 0, p->p_ucred);
1078	VOP_UNLOCK(sc->sc_vp);
1079
1080	if (err == 0 && memcmp(vdp->id, ISO_STANDARD_ID, sizeof(vdp->id)))
1081		err = ENOENT;
1082
1083	free(vdp, M_DEVBUF, 0);
1084	return (err == 0);
1085}
1086
1087void
1088vdsp_alloc(void *arg1)
1089{
1090	struct vdsp_softc *sc = arg1;
1091	struct vio_dring_reg dr;
1092
1093	KASSERT(sc->sc_num_descriptors <= VDSK_MAX_DESCRIPTORS);
1094	KASSERT(sc->sc_descriptor_size <= VDSK_MAX_DESCRIPTOR_SIZE);
1095	sc->sc_vd = mallocarray(sc->sc_num_descriptors,
1096	    sc->sc_descriptor_size, M_DEVBUF, M_WAITOK);
1097	sc->sc_vd_ring = mallocarray(sc->sc_num_descriptors,
1098	    sizeof(*sc->sc_vd_ring), M_DEVBUF, M_WAITOK);
1099	task_set(&sc->sc_vd_task, vdsp_vd_task, sc);
1100
1101	bzero(&dr, sizeof(dr));
1102	dr.tag.type = VIO_TYPE_CTRL;
1103	dr.tag.stype = VIO_SUBTYPE_ACK;
1104	dr.tag.stype_env = VIO_DRING_REG;
1105	dr.tag.sid = sc->sc_local_sid;
1106	dr.dring_ident = ++sc->sc_dring_ident;
1107	vdsp_sendmsg(sc, &dr, sizeof(dr), 1);
1108}
1109
1110void
1111vdsp_read(void *arg1)
1112{
1113	struct vdsp_softc *sc = arg1;
1114
1115	mtx_enter(&sc->sc_desc_mtx);
1116	while (sc->sc_desc_tail != sc->sc_desc_head) {
1117		mtx_leave(&sc->sc_desc_mtx);
1118		vdsp_read_desc(sc, sc->sc_desc_msg[sc->sc_desc_tail]);
1119		mtx_enter(&sc->sc_desc_mtx);
1120		sc->sc_desc_tail++;
1121		sc->sc_desc_tail &= (VDSK_RX_ENTRIES - 1);
1122	}
1123	mtx_leave(&sc->sc_desc_mtx);
1124}
1125
1126void
1127vdsp_read_desc(struct vdsp_softc *sc, struct vdsk_desc_msg *dm)
1128{
1129	struct ldc_conn *lc = &sc->sc_lc;
1130	struct proc *p = curproc;
1131	struct iovec iov;
1132	struct uio uio;
1133	caddr_t buf;
1134	vaddr_t va;
1135	paddr_t pa;
1136	uint64_t size, off;
1137	psize_t nbytes;
1138	int err, i;
1139
1140	if (sc->sc_vp == NULL)
1141		return;
1142
1143	buf = malloc(dm->size, M_DEVBUF, M_WAITOK);
1144
1145	iov.iov_base = buf;
1146	iov.iov_len = dm->size;
1147	uio.uio_iov = &iov;
1148	uio.uio_iovcnt = 1;
1149	uio.uio_offset = dm->offset * DEV_BSIZE;
1150	uio.uio_resid = dm->size;
1151	uio.uio_segflg = UIO_SYSSPACE;
1152	uio.uio_rw = UIO_READ;
1153	uio.uio_procp = p;
1154
1155	vn_lock(sc->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1156	dm->status = VOP_READ(sc->sc_vp, &uio, 0, p->p_ucred);
1157	VOP_UNLOCK(sc->sc_vp);
1158
1159	KERNEL_UNLOCK();
1160	if (dm->status == 0) {
1161		i = 0;
1162		va = (vaddr_t)buf;
1163		size = dm->size;
1164		off = 0;
1165		while (size > 0 && i < dm->ncookies) {
1166			pmap_extract(pmap_kernel(), va, &pa);
1167			nbytes = MIN(size, dm->cookie[i].size - off);
1168			nbytes = MIN(nbytes, PAGE_SIZE - (off & PAGE_MASK));
1169			err = hv_ldc_copy(lc->lc_id, LDC_COPY_OUT,
1170			    dm->cookie[i].addr + off, pa, nbytes, &nbytes);
1171			if (err != H_EOK) {
1172				printf("%s: hv_ldc_copy: %d\n", __func__, err);
1173				dm->status = EIO;
1174				KERNEL_LOCK();
1175				goto fail;
1176			}
1177			va += nbytes;
1178			size -= nbytes;
1179			off += nbytes;
1180			if (off >= dm->cookie[i].size) {
1181				off = 0;
1182				i++;
1183			}
1184		}
1185	}
1186	KERNEL_LOCK();
1187
1188fail:
1189	free(buf, M_DEVBUF, 0);
1190
1191	/* ACK the descriptor. */
1192	dm->tag.stype = VIO_SUBTYPE_ACK;
1193	dm->tag.sid = sc->sc_local_sid;
1194	vdsp_sendmsg(sc, dm, sizeof(*dm) +
1195	    (dm->ncookies - 1) * sizeof(struct ldc_cookie), 1);
1196}
1197
1198void
1199vdsp_read_dring(void *arg1, void *arg2)
1200{
1201	struct vdsp_softc *sc = arg1;
1202	struct ldc_conn *lc = &sc->sc_lc;
1203	struct vd_desc *vd = arg2;
1204	struct proc *p = curproc;
1205	struct iovec iov;
1206	struct uio uio;
1207	caddr_t buf;
1208	vaddr_t va;
1209	paddr_t pa;
1210	uint64_t size, off;
1211	psize_t nbytes;
1212	int err, i;
1213
1214	if (sc->sc_vp == NULL)
1215		return;
1216
1217	buf = malloc(vd->size, M_DEVBUF, M_WAITOK);
1218
1219	iov.iov_base = buf;
1220	iov.iov_len = vd->size;
1221	uio.uio_iov = &iov;
1222	uio.uio_iovcnt = 1;
1223	uio.uio_offset = vd->offset * DEV_BSIZE;
1224	uio.uio_resid = vd->size;
1225	uio.uio_segflg = UIO_SYSSPACE;
1226	uio.uio_rw = UIO_READ;
1227	uio.uio_procp = p;
1228
1229	vn_lock(sc->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1230	vd->status = VOP_READ(sc->sc_vp, &uio, 0, p->p_ucred);
1231	VOP_UNLOCK(sc->sc_vp);
1232
1233	KERNEL_UNLOCK();
1234	if (vd->status == 0) {
1235		i = 0;
1236		va = (vaddr_t)buf;
1237		size = vd->size;
1238		off = 0;
1239		while (size > 0 && i < vd->ncookies) {
1240			pmap_extract(pmap_kernel(), va, &pa);
1241			nbytes = MIN(size, vd->cookie[i].size - off);
1242			nbytes = MIN(nbytes, PAGE_SIZE - (off & PAGE_MASK));
1243			err = hv_ldc_copy(lc->lc_id, LDC_COPY_OUT,
1244			    vd->cookie[i].addr + off, pa, nbytes, &nbytes);
1245			if (err != H_EOK) {
1246				printf("%s: hv_ldc_copy: %d\n", __func__, err);
1247				vd->status = EIO;
1248				KERNEL_LOCK();
1249				goto fail;
1250			}
1251			va += nbytes;
1252			size -= nbytes;
1253			off += nbytes;
1254			if (off >= vd->cookie[i].size) {
1255				off = 0;
1256				i++;
1257			}
1258		}
1259	}
1260	KERNEL_LOCK();
1261
1262fail:
1263	free(buf, M_DEVBUF, 0);
1264
1265	/* ACK the descriptor. */
1266	vd->hdr.dstate = VIO_DESC_DONE;
1267	vdsp_ack_desc(sc, vd);
1268}
1269
1270void
1271vdsp_write_dring(void *arg1, void *arg2)
1272{
1273	struct vdsp_softc *sc = arg1;
1274	struct ldc_conn *lc = &sc->sc_lc;
1275	struct vd_desc *vd = arg2;
1276	struct proc *p = curproc;
1277	struct iovec iov;
1278	struct uio uio;
1279	caddr_t buf;
1280	vaddr_t va;
1281	paddr_t pa;
1282	uint64_t size, off;
1283	psize_t nbytes;
1284	int err, i;
1285
1286	if (sc->sc_vp == NULL)
1287		return;
1288
1289	buf = malloc(vd->size, M_DEVBUF, M_WAITOK);
1290
1291	KERNEL_UNLOCK();
1292	i = 0;
1293	va = (vaddr_t)buf;
1294	size = vd->size;
1295	off = 0;
1296	while (size > 0 && i < vd->ncookies) {
1297		pmap_extract(pmap_kernel(), va, &pa);
1298		nbytes = MIN(size, vd->cookie[i].size - off);
1299		nbytes = MIN(nbytes, PAGE_SIZE - (off & PAGE_MASK));
1300		err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
1301		    vd->cookie[i].addr + off, pa, nbytes, &nbytes);
1302		if (err != H_EOK) {
1303			printf("%s: hv_ldc_copy: %d\n", __func__, err);
1304			vd->status = EIO;
1305			KERNEL_LOCK();
1306			goto fail;
1307		}
1308		va += nbytes;
1309		size -= nbytes;
1310		off += nbytes;
1311		if (off >= vd->cookie[i].size) {
1312			off = 0;
1313			i++;
1314		}
1315	}
1316	KERNEL_LOCK();
1317
1318	iov.iov_base = buf;
1319	iov.iov_len = vd->size;
1320	uio.uio_iov = &iov;
1321	uio.uio_iovcnt = 1;
1322	uio.uio_offset = vd->offset * DEV_BSIZE;
1323	uio.uio_resid = vd->size;
1324	uio.uio_segflg = UIO_SYSSPACE;
1325	uio.uio_rw = UIO_WRITE;
1326	uio.uio_procp = p;
1327
1328	vn_lock(sc->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1329	vd->status = VOP_WRITE(sc->sc_vp, &uio, 0, p->p_ucred);
1330	VOP_UNLOCK(sc->sc_vp);
1331
1332fail:
1333	free(buf, M_DEVBUF, 0);
1334
1335	/* ACK the descriptor. */
1336	vd->hdr.dstate = VIO_DESC_DONE;
1337	vdsp_ack_desc(sc, vd);
1338}
1339
1340void
1341vdsp_flush_dring(void *arg1, void *arg2)
1342{
1343	struct vdsp_softc *sc = arg1;
1344	struct vd_desc *vd = arg2;
1345
1346	if (sc->sc_vp == NULL)
1347		return;
1348
1349	/* ACK the descriptor. */
1350	vd->status = 0;
1351	vd->hdr.dstate = VIO_DESC_DONE;
1352	vdsp_ack_desc(sc, vd);
1353}
1354
1355void
1356vdsp_get_vtoc(void *arg1, void *arg2)
1357{
1358	struct vdsp_softc *sc = arg1;
1359	struct ldc_conn *lc = &sc->sc_lc;
1360	struct vd_desc *vd = arg2;
1361	struct sun_vtoc_preamble *sl;
1362	struct vd_vtoc *vt;
1363	vaddr_t va;
1364	paddr_t pa;
1365	uint64_t size, off;
1366	psize_t nbytes;
1367	int err, i;
1368
1369	vt = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
1370
1371	if (sc->sc_label == NULL)
1372		vdsp_readlabel(sc);
1373
1374	if (sc->sc_label && sc->sc_label->sl_magic == SUN_DKMAGIC) {
1375		sl = (struct sun_vtoc_preamble *)sc->sc_label;
1376
1377		memcpy(vt->ascii_label, sl->sl_text, sizeof(sl->sl_text));
1378		memcpy(vt->volume_name, sl->sl_volume, sizeof(sl->sl_volume));
1379		vt->sector_size = DEV_BSIZE;
1380		vt->num_partitions = sl->sl_nparts;
1381		for (i = 0; i < vt->num_partitions; i++) {
1382			vt->partition[i].id_tag = sl->sl_part[i].spi_tag;
1383			vt->partition[i].perm = sl->sl_part[i].spi_flag;
1384			vt->partition[i].start =
1385			    sc->sc_label->sl_part[i].sdkp_cyloffset *
1386				sc->sc_label->sl_ntracks *
1387				sc->sc_label->sl_nsectors;
1388			vt->partition[i].nblocks =
1389			    sc->sc_label->sl_part[i].sdkp_nsectors;
1390		}
1391	} else {
1392		uint64_t disk_size;
1393		int unit;
1394
1395		/* Human-readable disk size. */
1396		disk_size = sc->sc_vdisk_size * sc->sc_vdisk_block_size;
1397		disk_size >>= 10;
1398		unit = 'K';
1399		if (disk_size > (2 << 10)) {
1400			disk_size >>= 10;
1401			unit = 'M';
1402		}
1403		if (disk_size > (2 << 10)) {
1404			disk_size >>= 10;
1405			unit = 'G';
1406		}
1407
1408		snprintf(vt->ascii_label, sizeof(vt->ascii_label),
1409		    "OpenBSD-DiskImage-%lld%cB cyl %d alt %d hd %d sec %d",
1410		    disk_size, unit, sc->sc_ncyl, sc->sc_acyl,
1411		    sc->sc_nhead, sc->sc_nsect);
1412		vt->sector_size = sc->sc_vdisk_block_size;
1413		vt->num_partitions = 8;
1414		vt->partition[2].id_tag = SPTAG_WHOLE_DISK;
1415		vt->partition[2].nblocks =
1416		    sc->sc_ncyl * sc->sc_nhead * sc->sc_nsect;
1417	}
1418
1419	i = 0;
1420	va = (vaddr_t)vt;
1421	size = roundup(sizeof(*vt), 64);
1422	off = 0;
1423	while (size > 0 && i < vd->ncookies) {
1424		pmap_extract(pmap_kernel(), va, &pa);
1425		nbytes = MIN(size, vd->cookie[i].size - off);
1426		nbytes = MIN(nbytes, PAGE_SIZE - (off & PAGE_MASK));
1427		err = hv_ldc_copy(lc->lc_id, LDC_COPY_OUT,
1428		    vd->cookie[i].addr + off, pa, nbytes, &nbytes);
1429		if (err != H_EOK) {
1430			printf("%s: hv_ldc_copy: %d\n", __func__, err);
1431			vd->status = EIO;
1432			goto fail;
1433		}
1434		va += nbytes;
1435		size -= nbytes;
1436		off += nbytes;
1437		if (off >= vd->cookie[i].size) {
1438			off = 0;
1439			i++;
1440		}
1441	}
1442
1443	vd->status = 0;
1444
1445fail:
1446	free(vt, M_DEVBUF, 0);
1447
1448	/* ACK the descriptor. */
1449	vd->hdr.dstate = VIO_DESC_DONE;
1450	vdsp_ack_desc(sc, vd);
1451}
1452
1453void
1454vdsp_set_vtoc(void *arg1, void *arg2)
1455{
1456	struct vdsp_softc *sc = arg1;
1457	struct ldc_conn *lc = &sc->sc_lc;
1458	struct vd_desc *vd = arg2;
1459	struct sun_vtoc_preamble *sl;
1460	struct vd_vtoc *vt;
1461	u_short cksum = 0, *sp1, *sp2;
1462	vaddr_t va;
1463	paddr_t pa;
1464	uint64_t size, off;
1465	psize_t nbytes;
1466	int err, i;
1467
1468	vt = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
1469
1470	i = 0;
1471	va = (vaddr_t)vt;
1472	size = sizeof(*vt);
1473	off = 0;
1474	while (size > 0 && i < vd->ncookies) {
1475		pmap_extract(pmap_kernel(), va, &pa);
1476		nbytes = MIN(size, vd->cookie[i].size - off);
1477		nbytes = MIN(nbytes, PAGE_SIZE - (off & PAGE_MASK));
1478		err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
1479		    vd->cookie[i].addr + off, pa, nbytes, &nbytes);
1480		if (err != H_EOK) {
1481			printf("%s: hv_ldc_copy: %d\n", __func__, err);
1482			vd->status = EIO;
1483			goto fail;
1484		}
1485		va += nbytes;
1486		size -= nbytes;
1487		off += nbytes;
1488		if (off >= vd->cookie[i].size) {
1489			off = 0;
1490			i++;
1491		}
1492	}
1493
1494	if (vt->num_partitions > nitems(sc->sc_label->sl_part)) {
1495		vd->status = EINVAL;
1496		goto fail;
1497	}
1498
1499	if (sc->sc_label == NULL || sc->sc_label->sl_magic != SUN_DKMAGIC) {
1500		sc->sc_label = malloc(sizeof(*sc->sc_label),
1501		    M_DEVBUF, M_WAITOK | M_ZERO);
1502
1503		sc->sc_label->sl_ntracks = sc->sc_nhead;
1504		sc->sc_label->sl_nsectors = sc->sc_nsect;
1505		sc->sc_label->sl_ncylinders = sc->sc_ncyl;
1506		sc->sc_label->sl_acylinders = sc->sc_acyl;
1507		sc->sc_label->sl_pcylinders = sc->sc_ncyl + sc->sc_acyl;
1508		sc->sc_label->sl_rpm = 3600;
1509
1510		sc->sc_label->sl_magic = SUN_DKMAGIC;
1511	}
1512
1513	sl = (struct sun_vtoc_preamble *)sc->sc_label;
1514	memcpy(sl->sl_text, vt->ascii_label, sizeof(sl->sl_text));
1515	sl->sl_version = 0x01;
1516	memcpy(sl->sl_volume, vt->volume_name, sizeof(sl->sl_volume));
1517	sl->sl_nparts = vt->num_partitions;
1518	for (i = 0; i < vt->num_partitions; i++) {
1519		sl->sl_part[i].spi_tag = vt->partition[i].id_tag;
1520		sl->sl_part[i].spi_flag = vt->partition[i].perm;
1521		sc->sc_label->sl_part[i].sdkp_cyloffset =
1522		    vt->partition[i].start / (sc->sc_nhead * sc->sc_nsect);
1523		sc->sc_label->sl_part[i].sdkp_nsectors =
1524		    vt->partition[i].nblocks;
1525	}
1526	sl->sl_sanity = 0x600ddeee;
1527
1528	/* Compute the checksum. */
1529	sp1 = (u_short *)sc->sc_label;
1530	sp2 = (u_short *)(sc->sc_label + 1);
1531	while (sp1 < sp2)
1532		cksum ^= *sp1++;
1533	sc->sc_label->sl_cksum = cksum;
1534
1535	vd->status = vdsp_writelabel(sc);
1536
1537fail:
1538	free(vt, M_DEVBUF, 0);
1539
1540	/* ACK the descriptor. */
1541	vd->hdr.dstate = VIO_DESC_DONE;
1542	vdsp_ack_desc(sc, vd);
1543}
1544
1545void
1546vdsp_get_diskgeom(void *arg1, void *arg2)
1547{
1548	struct vdsp_softc *sc = arg1;
1549	struct ldc_conn *lc = &sc->sc_lc;
1550	struct vd_desc *vd = arg2;
1551	struct vd_diskgeom *vg;
1552	vaddr_t va;
1553	paddr_t pa;
1554	uint64_t size, off;
1555	psize_t nbytes;
1556	int err, i;
1557
1558	vg = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
1559
1560	if (sc->sc_label == NULL)
1561		vdsp_readlabel(sc);
1562
1563	if (sc->sc_label && sc->sc_label->sl_magic == SUN_DKMAGIC) {
1564		vg->ncyl = sc->sc_label->sl_ncylinders;
1565		vg->acyl = sc->sc_label->sl_acylinders;
1566		vg->nhead = sc->sc_label->sl_ntracks;
1567		vg->nsect = sc->sc_label->sl_nsectors;
1568		vg->intrlv = sc->sc_label->sl_interleave;
1569		vg->apc = sc->sc_label->sl_sparespercyl;
1570		vg->rpm = sc->sc_label->sl_rpm;
1571		vg->pcyl = sc->sc_label->sl_pcylinders;
1572	} else {
1573		uint64_t disk_size, block_size;
1574
1575		disk_size = sc->sc_vdisk_size * sc->sc_vdisk_block_size;
1576		block_size = sc->sc_vdisk_block_size;
1577
1578		if (disk_size >= 8L * 1024 * 1024 * 1024) {
1579			vg->nhead = 96;
1580			vg->nsect = 768;
1581		} else if (disk_size >= 2 *1024 * 1024) {
1582			vg->nhead = 1;
1583			vg->nsect = 600;
1584		} else {
1585			vg->nhead = 1;
1586			vg->nsect = 200;
1587		}
1588
1589		vg->pcyl = disk_size / (block_size * vg->nhead * vg->nsect);
1590		if (vg->pcyl == 0)
1591			vg->pcyl = 1;
1592		if (vg->pcyl > 2)
1593			vg->acyl = 2;
1594		vg->ncyl = vg->pcyl - vg->acyl;
1595
1596		vg->rpm = 3600;
1597	}
1598
1599	sc->sc_ncyl = vg->ncyl;
1600	sc->sc_acyl = vg->acyl;
1601	sc->sc_nhead = vg->nhead;
1602	sc->sc_nsect = vg->nsect;
1603
1604	i = 0;
1605	va = (vaddr_t)vg;
1606	size = roundup(sizeof(*vg), 64);
1607	off = 0;
1608	while (size > 0 && i < vd->ncookies) {
1609		pmap_extract(pmap_kernel(), va, &pa);
1610		nbytes = MIN(size, vd->cookie[i].size - off);
1611		nbytes = MIN(nbytes, PAGE_SIZE - (off & PAGE_MASK));
1612		err = hv_ldc_copy(lc->lc_id, LDC_COPY_OUT,
1613		    vd->cookie[i].addr + off, pa, nbytes, &nbytes);
1614		if (err != H_EOK) {
1615			printf("%s: hv_ldc_copy: %d\n", __func__, err);
1616			vd->status = EIO;
1617			goto fail;
1618		}
1619		va += nbytes;
1620		size -= nbytes;
1621		off += nbytes;
1622		if (off >= vd->cookie[i].size) {
1623			off = 0;
1624			i++;
1625		}
1626	}
1627
1628	vd->status = 0;
1629
1630fail:
1631	free(vg, M_DEVBUF, 0);
1632
1633	/* ACK the descriptor. */
1634	vd->hdr.dstate = VIO_DESC_DONE;
1635	vdsp_ack_desc(sc, vd);
1636}
1637
1638void
1639vdsp_unimp(void *arg1, void *arg2)
1640{
1641	struct vdsp_softc *sc = arg1;
1642	struct vd_desc *vd = arg2;
1643
1644	/* ACK the descriptor. */
1645	vd->status = ENOTSUP;
1646	vd->hdr.dstate = VIO_DESC_DONE;
1647	vdsp_ack_desc(sc, vd);
1648}
1649
1650void
1651vdsp_ack_desc(struct vdsp_softc *sc, struct vd_desc *vd)
1652{
1653	struct vio_dring_msg dm;
1654	vaddr_t va;
1655	paddr_t pa;
1656	uint64_t size, off;
1657	psize_t nbytes;
1658	int err;
1659
1660	va = (vaddr_t)vd;
1661	off = (caddr_t)vd - sc->sc_vd;
1662	size = sc->sc_descriptor_size;
1663	while (size > 0) {
1664		pmap_extract(pmap_kernel(), va, &pa);
1665		nbytes = MIN(size, PAGE_SIZE - (off & PAGE_MASK));
1666		err = hv_ldc_copy(sc->sc_lc.lc_id, LDC_COPY_OUT,
1667		    sc->sc_dring_cookie.addr + off, pa, nbytes, &nbytes);
1668		if (err != H_EOK) {
1669			printf("%s: hv_ldc_copy %d\n", __func__, err);
1670			return;
1671		}
1672		va += nbytes;
1673		size -= nbytes;
1674		off += nbytes;
1675	}
1676
1677	/* ACK the descriptor. */
1678	bzero(&dm, sizeof(dm));
1679	dm.tag.type = VIO_TYPE_DATA;
1680	dm.tag.stype = VIO_SUBTYPE_ACK;
1681	dm.tag.stype_env = VIO_DRING_DATA;
1682	dm.tag.sid = sc->sc_local_sid;
1683	dm.seq_no = ++sc->sc_seq_no;
1684	dm.dring_ident = sc->sc_dring_ident;
1685	off = (caddr_t)vd - sc->sc_vd;
1686	dm.start_idx = off / sc->sc_descriptor_size;
1687	dm.end_idx = off / sc->sc_descriptor_size;
1688	vdsp_sendmsg(sc, &dm, sizeof(dm), 1);
1689}
1690
1691int
1692vdspopen(dev_t dev, int flag, int mode, struct proc *p)
1693{
1694	struct vdsp_softc *sc;
1695	struct ldc_conn *lc;
1696	int unit = minor(dev);
1697	int err;
1698
1699	if (unit >= vdsp_cd.cd_ndevs)
1700		return (ENXIO);
1701	sc = vdsp_cd.cd_devs[unit];
1702	if (sc == NULL)
1703		return (ENXIO);
1704
1705	lc = &sc->sc_lc;
1706
1707	err = hv_ldc_tx_qconf(lc->lc_id,
1708	    lc->lc_txq->lq_map->dm_segs[0].ds_addr, lc->lc_txq->lq_nentries);
1709	if (err != H_EOK)
1710		printf("%s: hv_ldc_tx_qconf %d\n", __func__, err);
1711
1712	err = hv_ldc_rx_qconf(lc->lc_id,
1713	    lc->lc_rxq->lq_map->dm_segs[0].ds_addr, lc->lc_rxq->lq_nentries);
1714	if (err != H_EOK)
1715		printf("%s: hv_ldc_rx_qconf %d\n", __func__, err);
1716
1717	cbus_intr_setenabled(sc->sc_bustag, sc->sc_tx_ino, INTR_ENABLED);
1718	cbus_intr_setenabled(sc->sc_bustag, sc->sc_rx_ino, INTR_ENABLED);
1719
1720	return (0);
1721}
1722
1723int
1724vdspclose(dev_t dev, int flag, int mode, struct proc *p)
1725{
1726	struct vdsp_softc *sc;
1727	int unit = minor(dev);
1728
1729	if (unit >= vdsp_cd.cd_ndevs)
1730		return (ENXIO);
1731	sc = vdsp_cd.cd_devs[unit];
1732	if (sc == NULL)
1733		return (ENXIO);
1734
1735	cbus_intr_setenabled(sc->sc_bustag, sc->sc_tx_ino, INTR_DISABLED);
1736	cbus_intr_setenabled(sc->sc_bustag, sc->sc_rx_ino, INTR_DISABLED);
1737
1738	hv_ldc_tx_qconf(sc->sc_lc.lc_id, 0, 0);
1739	hv_ldc_rx_qconf(sc->sc_lc.lc_id, 0, 0);
1740
1741	task_add(systq, &sc->sc_close_task);
1742	return (0);
1743}
1744
1745int
1746vdspioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
1747{
1748	struct vdsp_softc *sc;
1749	int unit = minor(dev);
1750
1751	if (unit >= vdsp_cd.cd_ndevs)
1752		return (ENXIO);
1753	sc = vdsp_cd.cd_devs[unit];
1754	if (sc == NULL)
1755		return (ENXIO);
1756
1757	return (ENOTTY);
1758}
1759