1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/errno.h>
28#include <sys/types.h>
29#include <sys/conf.h>
30#include <sys/kmem.h>
31#include <sys/ddi.h>
32#include <sys/stat.h>
33#include <sys/sunddi.h>
34#include <sys/file.h>
35#include <sys/open.h>
36#include <sys/modctl.h>
37#include <sys/ddi_impldefs.h>
38#include <vm/seg_kmem.h>
39#include <sys/vmsystm.h>
40#include <sys/sysmacros.h>
41#include <sys/ddidevmap.h>
42#include <sys/avl.h>
43#ifdef __xpv
44#include <sys/hypervisor.h>
45#endif
46
47#include <sys/xsvc.h>
48
49/* total max memory which can be alloced with ioctl interface */
50uint64_t xsvc_max_memory = 10 * 1024 * 1024;
51
52extern void i86_va_map(caddr_t vaddr, struct as *asp, caddr_t kaddr);
53
54
55static int xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred);
56static int xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred);
57static int xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
58    int *rval);
59static int xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
60    size_t *maplen, uint_t model);
61static int xsvc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
62static int xsvc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
63static int xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
64    void **result);
65
66static 	struct cb_ops xsvc_cb_ops = {
67	xsvc_open,		/* cb_open */
68	xsvc_close,		/* cb_close */
69	nodev,			/* cb_strategy */
70	nodev,			/* cb_print */
71	nodev,			/* cb_dump */
72	nodev,			/* cb_read */
73	nodev,			/* cb_write */
74	xsvc_ioctl,		/* cb_ioctl */
75	xsvc_devmap,		/* cb_devmap */
76	NULL,			/* cb_mmap */
77	NULL,			/* cb_segmap */
78	nochpoll,		/* cb_chpoll */
79	ddi_prop_op,		/* cb_prop_op */
80	NULL,			/* cb_stream */
81	D_NEW | D_MP | D_64BIT | D_DEVMAP,	/* cb_flag */
82	CB_REV
83};
84
85static struct dev_ops xsvc_dev_ops = {
86	DEVO_REV,		/* devo_rev */
87	0,			/* devo_refcnt */
88	xsvc_getinfo,		/* devo_getinfo */
89	nulldev,		/* devo_identify */
90	nulldev,		/* devo_probe */
91	xsvc_attach,		/* devo_attach */
92	xsvc_detach,		/* devo_detach */
93	nodev,			/* devo_reset */
94	&xsvc_cb_ops,		/* devo_cb_ops */
95	NULL,			/* devo_bus_ops */
96	NULL,			/* power */
97	ddi_quiesce_not_needed,		/* quiesce */
98};
99
100static struct modldrv xsvc_modldrv = {
101	&mod_driverops,		/* Type of module.  This one is a driver */
102	"xsvc driver",		/* Name of the module. */
103	&xsvc_dev_ops,		/* driver ops */
104};
105
106static struct modlinkage xsvc_modlinkage = {
107	MODREV_1,
108	(void *) &xsvc_modldrv,
109	NULL
110};
111
112
113static int xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode);
114static int xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode);
115static int xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode);
116static int xsvc_mem_alloc(xsvc_state_t *state, uint64_t key,
117    xsvc_mem_t **mp);
118static void xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp);
119static xsvc_mem_t *xsvc_mem_lookup(xsvc_state_t *state,
120    uint64_t key);
121static int xsvc_mnode_key_compare(const void *q, const void *e);
122static int xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
123    ddi_umem_cookie_t *cookiep);
124static void xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep);
125
126
127void *xsvc_statep;
128
129static ddi_device_acc_attr_t xsvc_device_attr = {
130	DDI_DEVICE_ATTR_V0,
131	DDI_NEVERSWAP_ACC,
132	DDI_STRICTORDER_ACC
133};
134
135static int xsvc_devmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
136    offset_t off, size_t len, void **pvtp);
137static int xsvc_devmap_dup(devmap_cookie_t dhp, void *pvtp,
138    devmap_cookie_t new_dhp, void **new_pvtp);
139static void xsvc_devmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
140    size_t len, devmap_cookie_t new_dhp1, void **new_pvtp1,
141    devmap_cookie_t new_dhp2, void **new_pvtp2);
142
143
144static struct devmap_callback_ctl xsvc_callbk = {
145	DEVMAP_OPS_REV,
146	xsvc_devmap_map,
147	NULL,
148	xsvc_devmap_dup,
149	xsvc_devmap_unmap
150};
151
152
153/*
154 * _init()
155 *
156 */
157int
158_init(void)
159{
160	int err;
161
162	err = ddi_soft_state_init(&xsvc_statep, sizeof (xsvc_state_t), 1);
163	if (err != 0) {
164		return (err);
165	}
166
167	err = mod_install(&xsvc_modlinkage);
168	if (err != 0) {
169		ddi_soft_state_fini(&xsvc_statep);
170		return (err);
171	}
172
173	return (0);
174}
175
176/*
177 * _info()
178 *
179 */
180int
181_info(struct modinfo *modinfop)
182{
183	return (mod_info(&xsvc_modlinkage, modinfop));
184}
185
186/*
187 * _fini()
188 *
189 */
190int
191_fini(void)
192{
193	int err;
194
195	err = mod_remove(&xsvc_modlinkage);
196	if (err != 0) {
197		return (err);
198	}
199
200	ddi_soft_state_fini(&xsvc_statep);
201
202	return (0);
203}
204
205/*
206 * xsvc_attach()
207 *
208 */
209static int
210xsvc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
211{
212	xsvc_state_t *state;
213	int maxallocmem;
214	int instance;
215	int err;
216
217
218	switch (cmd) {
219	case DDI_ATTACH:
220		break;
221
222	case DDI_RESUME:
223		return (DDI_SUCCESS);
224
225	default:
226		return (DDI_FAILURE);
227	}
228
229	instance = ddi_get_instance(dip);
230	err = ddi_soft_state_zalloc(xsvc_statep, instance);
231	if (err != DDI_SUCCESS) {
232		return (DDI_FAILURE);
233	}
234	state = ddi_get_soft_state(xsvc_statep, instance);
235	if (state == NULL) {
236		goto attachfail_get_soft_state;
237	}
238
239	state->xs_dip = dip;
240	state->xs_instance = instance;
241
242	/* Initialize allocation count */
243	mutex_init(&state->xs_mutex, NULL, MUTEX_DRIVER, NULL);
244	state->xs_currently_alloced = 0;
245
246	mutex_init(&state->xs_cookie_mutex, NULL, MUTEX_DRIVER, NULL);
247
248	/* create the minor node (for the ioctl) */
249	err = ddi_create_minor_node(dip, "xsvc", S_IFCHR, instance, DDI_PSEUDO,
250	    0);
251	if (err != DDI_SUCCESS) {
252		goto attachfail_minor_node;
253	}
254
255	/*
256	 * the maxallocmem property will override the default (xsvc_max_memory).
257	 * This is the maximum total memory the ioctl will allow to be alloced.
258	 */
259	maxallocmem = ddi_prop_get_int(DDI_DEV_T_ANY, state->xs_dip,
260	    DDI_PROP_DONTPASS, "maxallocmem", -1);
261	if (maxallocmem >= 0) {
262		xsvc_max_memory = maxallocmem * 1024;
263	}
264
265	/* Initialize list of memory allocs */
266	mutex_init(&state->xs_mlist.ml_mutex, NULL, MUTEX_DRIVER, NULL);
267	avl_create(&state->xs_mlist.ml_avl, xsvc_mnode_key_compare,
268	    sizeof (xsvc_mnode_t), offsetof(xsvc_mnode_t, mn_link));
269
270	/* Report that driver was loaded */
271	ddi_report_dev(dip);
272
273	return (DDI_SUCCESS);
274
275attachfail_minor_node:
276	mutex_destroy(&state->xs_cookie_mutex);
277	mutex_destroy(&state->xs_mutex);
278attachfail_get_soft_state:
279	(void) ddi_soft_state_free(xsvc_statep, instance);
280
281	return (err);
282}
283
284/*
285 * xsvc_detach()
286 *
287 */
288static int
289xsvc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
290{
291	xsvc_state_t *state;
292	xsvc_mnode_t *mnode;
293	xsvc_mem_t *mp;
294	int instance;
295
296
297	instance = ddi_get_instance(dip);
298	state = ddi_get_soft_state(xsvc_statep, instance);
299	if (state == NULL) {
300		return (DDI_FAILURE);
301	}
302
303	switch (cmd) {
304	case DDI_DETACH:
305		break;
306
307	case DDI_SUSPEND:
308		return (DDI_SUCCESS);
309
310	default:
311		return (DDI_FAILURE);
312	}
313
314	ddi_remove_minor_node(dip, NULL);
315
316	/* Free any memory on list */
317	while ((mnode = avl_first(&state->xs_mlist.ml_avl)) != NULL) {
318		mp = mnode->mn_home;
319		xsvc_mem_free(state, mp);
320	}
321
322	/* remove list */
323	avl_destroy(&state->xs_mlist.ml_avl);
324	mutex_destroy(&state->xs_mlist.ml_mutex);
325
326	mutex_destroy(&state->xs_cookie_mutex);
327	mutex_destroy(&state->xs_mutex);
328	(void) ddi_soft_state_free(xsvc_statep, state->xs_instance);
329	return (DDI_SUCCESS);
330}
331
332/*
333 * xsvc_getinfo()
334 *
335 */
336/*ARGSUSED*/
337static int
338xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
339{
340	xsvc_state_t *state;
341	int instance;
342	dev_t dev;
343	int err;
344
345
346	dev = (dev_t)arg;
347	instance = getminor(dev);
348
349	switch (cmd) {
350	case DDI_INFO_DEVT2DEVINFO:
351		state = ddi_get_soft_state(xsvc_statep, instance);
352		if (state == NULL) {
353			return (DDI_FAILURE);
354		}
355		*result = (void *)state->xs_dip;
356		err = DDI_SUCCESS;
357		break;
358
359	case DDI_INFO_DEVT2INSTANCE:
360		*result = (void *)(uintptr_t)instance;
361		err = DDI_SUCCESS;
362		break;
363
364	default:
365		err = DDI_FAILURE;
366		break;
367	}
368
369	return (err);
370}
371
372
373/*
374 * xsvc_open()
375 *
376 */
377/*ARGSUSED*/
378static int
379xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred)
380{
381	xsvc_state_t *state;
382	int instance;
383
384	instance = getminor(*devp);
385	state = ddi_get_soft_state(xsvc_statep, instance);
386	if (state == NULL) {
387		return (ENXIO);
388	}
389
390	return (0);
391}
392
393/*
394 * xsvc_close()
395 *
396 */
397/*ARGSUSED*/
398static int
399xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred)
400{
401	return (0);
402}
403
404/*
405 * xsvc_ioctl()
406 *
407 */
408/*ARGSUSED*/
409static int
410xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rval)
411{
412	xsvc_state_t *state;
413	int instance;
414	int err;
415
416
417	err = drv_priv(cred);
418	if (err != 0) {
419		return (EPERM);
420	}
421	instance = getminor(dev);
422	if (instance == -1) {
423		return (EBADF);
424	}
425	state = ddi_get_soft_state(xsvc_statep, instance);
426	if (state == NULL) {
427		return (EBADF);
428	}
429
430	switch (cmd) {
431	case XSVC_ALLOC_MEM:
432		err = xsvc_ioctl_alloc_memory(state, (void *)arg, mode);
433		break;
434
435	case XSVC_FREE_MEM:
436		err = xsvc_ioctl_free_memory(state, (void *)arg, mode);
437		break;
438
439	case XSVC_FLUSH_MEM:
440		err = xsvc_ioctl_flush_memory(state, (void *)arg, mode);
441		break;
442
443	default:
444		err = ENXIO;
445	}
446
447	return (err);
448}
449
450/*
451 * xsvc_ioctl_alloc_memory()
452 *
453 */
454static int
455xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode)
456{
457	xsvc_mem_req_32 params32;
458	xsvc_mloc_32 *usgl32;
459	xsvc_mem_req params;
460	xsvc_mloc_32 sgl32;
461	xsvc_mloc *usgl;
462	xsvc_mem_t *mp;
463	xsvc_mloc sgl;
464	uint64_t key;
465	size_t size;
466	int err;
467	int i;
468
469
470	/* Copy in the params, then get the size and key */
471	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
472		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
473		    mode);
474		if (err != 0) {
475			return (EFAULT);
476		}
477
478		key = (uint64_t)params32.xsvc_mem_reqid;
479		size = P2ROUNDUP((size_t)params32.xsvc_mem_size, PAGESIZE);
480	} else {
481		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
482		if (err != 0) {
483			return (EFAULT);
484		}
485		key = (uint64_t)params.xsvc_mem_reqid;
486		size = P2ROUNDUP(params.xsvc_mem_size, PAGESIZE);
487	}
488
489	/*
490	 * make sure this doesn't put us over the maximum allowed to be
491	 * allocated
492	 */
493	mutex_enter(&state->xs_mutex);
494	if ((state->xs_currently_alloced + size) > xsvc_max_memory) {
495		mutex_exit(&state->xs_mutex);
496		return (EAGAIN);
497	}
498	state->xs_currently_alloced += size;
499	mutex_exit(&state->xs_mutex);
500
501	/* get state to track this memory */
502	err = xsvc_mem_alloc(state, key, &mp);
503	if (err != 0) {
504		return (err);
505	}
506	mp->xm_size = size;
507
508	/* allocate and bind the memory */
509	mp->xm_dma_attr.dma_attr_version = DMA_ATTR_V0;
510	mp->xm_dma_attr.dma_attr_count_max = (uint64_t)0xFFFFFFFF;
511	mp->xm_dma_attr.dma_attr_burstsizes = 1;
512	mp->xm_dma_attr.dma_attr_minxfer = 1;
513	mp->xm_dma_attr.dma_attr_maxxfer = (uint64_t)0xFFFFFFFF;
514	mp->xm_dma_attr.dma_attr_seg = (uint64_t)0xFFFFFFFF;
515	mp->xm_dma_attr.dma_attr_granular = 1;
516	mp->xm_dma_attr.dma_attr_flags = 0;
517
518	/* Finish converting params */
519	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
520		mp->xm_dma_attr.dma_attr_addr_lo = params32.xsvc_mem_addr_lo;
521		mp->xm_dma_attr.dma_attr_addr_hi = params32.xsvc_mem_addr_hi;
522		mp->xm_dma_attr.dma_attr_sgllen = params32.xsvc_mem_sgllen;
523		usgl32 = (xsvc_mloc_32 *)(uintptr_t)params32.xsvc_sg_list;
524		mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
525		    params32.xsvc_mem_align, PAGESIZE);
526	} else {
527		mp->xm_dma_attr.dma_attr_addr_lo = params.xsvc_mem_addr_lo;
528		mp->xm_dma_attr.dma_attr_addr_hi = params.xsvc_mem_addr_hi;
529		mp->xm_dma_attr.dma_attr_sgllen = params.xsvc_mem_sgllen;
530		usgl = (xsvc_mloc *)(uintptr_t)params.xsvc_sg_list;
531		mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
532		    params.xsvc_mem_align, PAGESIZE);
533	}
534
535	mp->xm_device_attr = xsvc_device_attr;
536
537	err = ddi_dma_alloc_handle(state->xs_dip, &mp->xm_dma_attr,
538	    DDI_DMA_SLEEP, NULL, &mp->xm_dma_handle);
539	if (err != DDI_SUCCESS) {
540		err = EINVAL;
541		goto allocfail_alloc_handle;
542	}
543
544	/* don't sleep here so we don't get stuck in contig alloc */
545	err = ddi_dma_mem_alloc(mp->xm_dma_handle, mp->xm_size,
546	    &mp->xm_device_attr, DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, NULL,
547	    &mp->xm_addr, &mp->xm_real_length, &mp->xm_mem_handle);
548	if (err != DDI_SUCCESS) {
549		err = EINVAL;
550		goto allocfail_alloc_mem;
551	}
552
553	err = ddi_dma_addr_bind_handle(mp->xm_dma_handle, NULL, mp->xm_addr,
554	    mp->xm_size, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
555	    NULL, &mp->xm_cookie, &mp->xm_cookie_count);
556	if (err != DDI_DMA_MAPPED) {
557		err = EFAULT;
558		goto allocfail_bind;
559	}
560
561	/* return sgl */
562	for (i = 0; i < mp->xm_cookie_count; i++) {
563		if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
564			sgl32.mloc_addr = mp->xm_cookie.dmac_laddress;
565			sgl32.mloc_size = mp->xm_cookie.dmac_size;
566			err = ddi_copyout(&sgl32, &usgl32[i],
567			    sizeof (xsvc_mloc_32), mode);
568			if (err != 0) {
569				err = EFAULT;
570				goto allocfail_copyout;
571			}
572		} else {
573			sgl.mloc_addr = mp->xm_cookie.dmac_laddress;
574			sgl.mloc_size = mp->xm_cookie.dmac_size;
575			err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc),
576			    mode);
577			if (err != 0) {
578				err = EFAULT;
579				goto allocfail_copyout;
580			}
581		}
582		ddi_dma_nextcookie(mp->xm_dma_handle, &mp->xm_cookie);
583	}
584
585	/* set the last sgl entry to 0 to indicate cookie count */
586	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
587		sgl32.mloc_addr = 0;
588		sgl32.mloc_size = 0;
589		err = ddi_copyout(&sgl32, &usgl32[i], sizeof (xsvc_mloc_32),
590		    mode);
591		if (err != 0) {
592			err = EFAULT;
593			goto allocfail_copyout;
594		}
595	} else {
596		sgl.mloc_addr = 0;
597		sgl.mloc_size = 0;
598		err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc), mode);
599		if (err != 0) {
600			err = EFAULT;
601			goto allocfail_copyout;
602		}
603	}
604
605	return (0);
606
607allocfail_copyout:
608	(void) ddi_dma_unbind_handle(mp->xm_dma_handle);
609allocfail_bind:
610	ddi_dma_mem_free(&mp->xm_mem_handle);
611allocfail_alloc_mem:
612	ddi_dma_free_handle(&mp->xm_dma_handle);
613allocfail_alloc_handle:
614	mp->xm_dma_handle = NULL;
615	xsvc_mem_free(state, mp);
616
617	mutex_enter(&state->xs_mutex);
618	state->xs_currently_alloced = state->xs_currently_alloced - size;
619	mutex_exit(&state->xs_mutex);
620
621	return (err);
622}
623
624/*
625 * xsvc_ioctl_flush_memory()
626 *
627 */
628static int
629xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode)
630{
631	xsvc_mem_req_32 params32;
632	xsvc_mem_req params;
633	xsvc_mem_t *mp;
634	uint64_t key;
635	int err;
636
637
638	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
639		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
640		    mode);
641		if (err != 0) {
642			return (EFAULT);
643		}
644		key = (uint64_t)params32.xsvc_mem_reqid;
645	} else {
646		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
647		if (err != 0) {
648			return (EFAULT);
649		}
650		key = (uint64_t)params.xsvc_mem_reqid;
651	}
652
653	/* find the memory */
654	mp = xsvc_mem_lookup(state, key);
655	if (mp == NULL) {
656		return (EINVAL);
657	}
658
659	(void) ddi_dma_sync(mp->xm_dma_handle, 0, 0, DDI_DMA_SYNC_FORCPU);
660
661	return (0);
662}
663
664
665/*
666 * xsvc_ioctl_free_memory()
667 *
668 */
669static int
670xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode)
671{
672	xsvc_mem_req_32 params32;
673	xsvc_mem_req params;
674	xsvc_mem_t *mp;
675	uint64_t key;
676	int err;
677
678
679	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
680		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
681		    mode);
682		if (err != 0) {
683			return (EFAULT);
684		}
685		key = (uint64_t)params32.xsvc_mem_reqid;
686	} else {
687		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
688		if (err != 0) {
689			return (EFAULT);
690		}
691		key = (uint64_t)params.xsvc_mem_reqid;
692	}
693
694	/* find the memory */
695	mp = xsvc_mem_lookup(state, key);
696	if (mp == NULL) {
697		return (EINVAL);
698	}
699
700	xsvc_mem_free(state, mp);
701
702	return (0);
703}
704
705/*
706 * xsvc_mem_alloc()
707 *
708 */
709static int
710xsvc_mem_alloc(xsvc_state_t *state, uint64_t key, xsvc_mem_t **mp)
711{
712	xsvc_mem_t *mem;
713
714	mem = xsvc_mem_lookup(state, key);
715	if (mem != NULL) {
716		xsvc_mem_free(state, mem);
717	}
718
719	*mp = kmem_alloc(sizeof (xsvc_mem_t), KM_SLEEP);
720	(*mp)->xm_mnode.mn_home = *mp;
721	(*mp)->xm_mnode.mn_key = key;
722
723	mutex_enter(&state->xs_mlist.ml_mutex);
724	avl_add(&state->xs_mlist.ml_avl, &(*mp)->xm_mnode);
725	mutex_exit(&state->xs_mlist.ml_mutex);
726
727	return (0);
728}
729
730/*
731 * xsvc_mem_free()
732 *
733 */
734static void
735xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp)
736{
737	if (mp->xm_dma_handle != NULL) {
738		(void) ddi_dma_unbind_handle(mp->xm_dma_handle);
739		ddi_dma_mem_free(&mp->xm_mem_handle);
740		ddi_dma_free_handle(&mp->xm_dma_handle);
741
742		mutex_enter(&state->xs_mutex);
743		state->xs_currently_alloced = state->xs_currently_alloced -
744		    mp->xm_size;
745		mutex_exit(&state->xs_mutex);
746	}
747
748	mutex_enter(&state->xs_mlist.ml_mutex);
749	avl_remove(&state->xs_mlist.ml_avl, &mp->xm_mnode);
750	mutex_exit(&state->xs_mlist.ml_mutex);
751
752	kmem_free(mp, sizeof (*mp));
753}
754
755/*
756 * xsvc_mem_lookup()
757 *
758 */
759static xsvc_mem_t *
760xsvc_mem_lookup(xsvc_state_t *state, uint64_t key)
761{
762	xsvc_mnode_t mnode;
763	xsvc_mnode_t *mnp;
764	avl_index_t where;
765	xsvc_mem_t *mp;
766
767	mnode.mn_key = key;
768	mutex_enter(&state->xs_mlist.ml_mutex);
769	mnp = avl_find(&state->xs_mlist.ml_avl, &mnode, &where);
770	mutex_exit(&state->xs_mlist.ml_mutex);
771
772	if (mnp != NULL) {
773		mp = mnp->mn_home;
774	} else {
775		mp = NULL;
776	}
777
778	return (mp);
779}
780
781/*
782 * xsvc_mnode_key_compare()
783 *
784 */
785static int
786xsvc_mnode_key_compare(const void *q, const void *e)
787{
788	xsvc_mnode_t *n1;
789	xsvc_mnode_t *n2;
790
791	n1 = (xsvc_mnode_t *)q;
792	n2 = (xsvc_mnode_t *)e;
793
794	if (n1->mn_key < n2->mn_key) {
795		return (-1);
796	} else if (n1->mn_key > n2->mn_key) {
797		return (1);
798	} else {
799		return (0);
800	}
801}
802
803/*
804 * xsvc_devmap()
805 *
806 */
807/*ARGSUSED*/
808static int
809xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
810		size_t *maplen, uint_t model)
811{
812	ddi_umem_cookie_t cookie;
813	xsvc_state_t *state;
814	offset_t off_align;
815	size_t npages;
816	caddr_t kvai;
817	size_t psize;
818	int instance;
819	caddr_t kva;
820	pfn_t pfn;
821	int err;
822	int i;
823
824
825	instance = getminor(dev);
826	state = ddi_get_soft_state(xsvc_statep, instance);
827	if (state == NULL) {
828		return (ENXIO);
829	}
830
831	/*
832	 * On 64-bit kernels, if we have a 32-bit application doing a mmap(),
833	 * smmap32 will sign extend the offset. We need to undo that since
834	 * we are passed a physical address in off, not a offset.
835	 */
836#if defined(__amd64)
837	if (((model & DDI_MODEL_MASK) == DDI_MODEL_ILP32) &&
838	    ((off & ~0xFFFFFFFFll) == ~0xFFFFFFFFll)) {
839		off = off & 0xFFFFFFFF;
840	}
841#endif
842
843#ifdef __xpv
844	/*
845	 * we won't allow guest OSes to devmap mfn/pfns. Maybe we'll relax
846	 * this some later when there is a good reason.
847	 */
848	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
849		return (-1);
850	}
851
852	/* we will always treat this as a foreign MFN */
853	pfn = xen_assign_pfn(btop(off));
854#else
855	pfn = btop(off);
856#endif
857	/* always work with whole pages */
858
859	off_align = P2ALIGN(off, PAGESIZE);
860	psize = P2ROUNDUP(off + len, PAGESIZE) - off_align;
861
862	/*
863	 * if this is memory we're trying to map into user space, we first
864	 * need to map the PFNs into KVA, then build up a umem cookie, and
865	 * finally do a umem_setup to map it in.
866	 */
867	if (pf_is_memory(pfn)) {
868		npages = btop(psize);
869
870		kva = vmem_alloc(heap_arena, psize, VM_SLEEP);
871		if (kva == NULL) {
872			return (-1);
873		}
874
875		kvai = kva;
876		for (i = 0; i < npages; i++) {
877			hat_devload(kas.a_hat, kvai, PAGESIZE, pfn,
878			    PROT_READ | PROT_WRITE, HAT_LOAD_LOCK);
879			pfn++;
880			kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
881		}
882
883		err = xsvc_umem_cookie_alloc(kva, psize, KM_SLEEP, &cookie);
884		if (err != 0) {
885			goto devmapfail_cookie_alloc;
886		}
887
888		if ((err = devmap_umem_setup(dhp, state->xs_dip, &xsvc_callbk,
889		    cookie, 0, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
890			goto devmapfail_umem_setup;
891		}
892		*maplen = psize;
893
894	/*
895	 * If this is not memory (or a foreign MFN in i86xpv), go through
896	 * devmem_setup.
897	 */
898	} else {
899		if ((err = devmap_devmem_setup(dhp, state->xs_dip, NULL, 0,
900		    off_align, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
901			return (err);
902		}
903		*maplen = psize;
904	}
905
906	return (0);
907
908devmapfail_umem_setup:
909	xsvc_umem_cookie_free(&cookie);
910
911devmapfail_cookie_alloc:
912	kvai = kva;
913	for (i = 0; i < npages; i++) {
914		hat_unload(kas.a_hat, kvai, PAGESIZE,
915		    HAT_UNLOAD_UNLOCK);
916		kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
917	}
918	vmem_free(heap_arena, kva, psize);
919
920	return (err);
921}
922
923/*
924 * xsvc_umem_cookie_alloc()
925 *
926 *   allocate a umem cookie to be used in devmap_umem_setup using KVA already
927 *   allocated.
928 */
929int
930xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
931    ddi_umem_cookie_t *cookiep)
932{
933	struct ddi_umem_cookie *umem_cookiep;
934
935	umem_cookiep = kmem_zalloc(sizeof (struct ddi_umem_cookie), flags);
936	if (umem_cookiep == NULL) {
937		*cookiep = NULL;
938		return (-1);
939	}
940
941	umem_cookiep->cvaddr = kva;
942	umem_cookiep->type = KMEM_NON_PAGEABLE;
943	umem_cookiep->size = size;
944	*cookiep = (ddi_umem_cookie_t *)umem_cookiep;
945
946	return (0);
947}
948
949/*
950 * xsvc_umem_cookie_free()
951 *
952 */
953static void
954xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep)
955{
956	kmem_free(*cookiep, sizeof (struct ddi_umem_cookie));
957	*cookiep = NULL;
958}
959
960
961/*
962 * xsvc_devmap_map()
963 *
964 */
965/*ARGSUSED*/
966static int
967xsvc_devmap_map(devmap_cookie_t dhc, dev_t dev, uint_t flags, offset_t off,
968    size_t len, void **pvtp)
969{
970	struct ddi_umem_cookie *cp;
971	devmap_handle_t *dhp;
972	xsvc_state_t *state;
973	int instance;
974
975
976	instance = getminor(dev);
977	state = ddi_get_soft_state(xsvc_statep, instance);
978	if (state == NULL) {
979		return (ENXIO);
980	}
981
982	dhp = (devmap_handle_t *)dhc;
983	/* This driver only supports MAP_SHARED, not MAP_PRIVATE */
984	if (flags & MAP_PRIVATE) {
985		cmn_err(CE_WARN, "!xsvc driver doesn't support MAP_PRIVATE");
986		return (EINVAL);
987	}
988
989	cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
990	cp->cook_refcnt = 1;
991
992	*pvtp = state;
993	return (0);
994}
995
996
997/*
998 * xsvc_devmap_dup()
999 *
1000 *   keep a reference count for forks so we don't unmap if we have multiple
1001 *   mappings.
1002 */
1003/*ARGSUSED*/
1004static int
1005xsvc_devmap_dup(devmap_cookie_t dhc, void *pvtp, devmap_cookie_t new_dhp,
1006    void **new_pvtp)
1007{
1008	struct ddi_umem_cookie *cp;
1009	devmap_handle_t *dhp;
1010	xsvc_state_t *state;
1011
1012
1013	state = (xsvc_state_t *)pvtp;
1014	dhp = (devmap_handle_t *)dhc;
1015
1016	mutex_enter(&state->xs_cookie_mutex);
1017	cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
1018	if (cp == NULL) {
1019		mutex_exit(&state->xs_cookie_mutex);
1020		return (ENOMEM);
1021	}
1022
1023	cp->cook_refcnt++;
1024	mutex_exit(&state->xs_cookie_mutex);
1025
1026	*new_pvtp = state;
1027	return (0);
1028}
1029
1030
1031/*
1032 * xsvc_devmap_unmap()
1033 *
1034 *   This routine is only call if we were mapping in memory in xsvc_devmap().
1035 *   i.e. we only pass in xsvc_callbk to devmap_umem_setup if pf_is_memory()
1036 *   was true. It would have been nice if devmap_callback_ctl had an args param.
1037 *   We wouldn't have had to look into the devmap_handle and into the umem
1038 *   cookie.
1039 */
1040/*ARGSUSED*/
1041static void
1042xsvc_devmap_unmap(devmap_cookie_t dhc, void *pvtp, offset_t off, size_t len,
1043    devmap_cookie_t new_dhp1, void **new_pvtp1, devmap_cookie_t new_dhp2,
1044    void **new_pvtp2)
1045{
1046	struct ddi_umem_cookie *ncp;
1047	struct ddi_umem_cookie *cp;
1048	devmap_handle_t *ndhp;
1049	devmap_handle_t *dhp;
1050	xsvc_state_t *state;
1051	size_t npages;
1052	caddr_t kvai;
1053	caddr_t kva;
1054	size_t size;
1055	int i;
1056
1057
1058	state = (xsvc_state_t *)pvtp;
1059	mutex_enter(&state->xs_cookie_mutex);
1060
1061	/* peek into the umem cookie to figure out what we need to free up */
1062	dhp = (devmap_handle_t *)dhc;
1063	cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
1064	ASSERT(cp != NULL);
1065
1066	if (new_dhp1 != NULL) {
1067		ndhp = (devmap_handle_t *)new_dhp1;
1068		ncp = (struct ddi_umem_cookie *)ndhp->dh_cookie;
1069		ncp->cook_refcnt++;
1070		*new_pvtp1 = state;
1071	}
1072	if (new_dhp2 != NULL) {
1073		ndhp = (devmap_handle_t *)new_dhp2;
1074		ncp = (struct ddi_umem_cookie *)ndhp->dh_cookie;
1075		ncp->cook_refcnt++;
1076		*new_pvtp2 = state;
1077	}
1078
1079	cp->cook_refcnt--;
1080	if (cp->cook_refcnt == 0) {
1081		kva = cp->cvaddr;
1082		size = cp->size;
1083
1084		/*
1085		 * free up the umem cookie, then unmap all the pages what we
1086		 * mapped in during devmap, then free up the kva space.
1087		 */
1088		npages = btop(size);
1089		xsvc_umem_cookie_free(&dhp->dh_cookie);
1090		kvai = kva;
1091		for (i = 0; i < npages; i++) {
1092			hat_unload(kas.a_hat, kvai, PAGESIZE,
1093			    HAT_UNLOAD_UNLOCK);
1094			kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
1095		}
1096		vmem_free(heap_arena, kva, size);
1097	}
1098
1099	mutex_exit(&state->xs_cookie_mutex);
1100}
1101