1/*	$NetBSD: mm.c,v 1.14 2011/06/12 03:35:51 rmind Exp $	*/
2
3/*-
4 * Copyright (c) 2002, 2008, 2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Christos Zoulas, Joerg Sonnenberger and Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * Special /dev/{mem,kmem,zero,null} memory devices.
34 */
35
36#include <sys/cdefs.h>
37__KERNEL_RCSID(0, "$NetBSD: mm.c,v 1.14 2011/06/12 03:35:51 rmind Exp $");
38
39#include "opt_compat_netbsd.h"
40
41#include <sys/param.h>
42#include <sys/conf.h>
43#include <sys/ioctl.h>
44#include <sys/mman.h>
45#include <sys/uio.h>
46#include <sys/termios.h>
47
48#include <dev/mm.h>
49
50#include <uvm/uvm_extern.h>
51
52static void *		dev_zero_page	__read_mostly;
53static kmutex_t		dev_mem_lock	__cacheline_aligned;
54static vaddr_t		dev_mem_addr	__read_mostly;
55
56static dev_type_read(mm_readwrite);
57static dev_type_ioctl(mm_ioctl);
58static dev_type_mmap(mm_mmap);
59static dev_type_ioctl(mm_ioctl);
60
61const struct cdevsw mem_cdevsw = {
62#ifdef __HAVE_MM_MD_OPEN
63	mm_md_open,
64#else
65	nullopen,
66#endif
67	nullclose, mm_readwrite, mm_readwrite,
68	mm_ioctl, nostop, notty, nopoll, mm_mmap, nokqfilter,
69	D_MPSAFE
70};
71
72#ifdef pmax	/* XXX */
73const struct cdevsw mem_ultrix_cdevsw = {
74	nullopen, nullclose, mm_readwrite, mm_readwrite, mm_ioctl,
75	nostop, notty, nopoll, mm_mmap, nokqfilter, D_MPSAFE
76};
77#endif
78
79/*
80 * mm_init: initialize memory device driver.
81 */
82void
83mm_init(void)
84{
85	vaddr_t pg;
86
87	mutex_init(&dev_mem_lock, MUTEX_DEFAULT, IPL_NONE);
88
89	/* Read-only zero-page. */
90	pg = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
91	KASSERT(pg != 0);
92#if 0
93	pmap_protect(pmap_kernel(), pg, pg + PAGE_SIZE, VM_PROT_READ);
94#endif
95	pmap_update(pmap_kernel());
96	dev_zero_page = (void *)pg;
97
98#ifndef __HAVE_MM_MD_CACHE_ALIASING
99	/* KVA for mappings during I/O. */
100	dev_mem_addr = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
101	    UVM_KMF_VAONLY|UVM_KMF_WAITVA);
102	KASSERT(dev_mem_addr != 0);
103#else
104	dev_mem_addr = 0;
105#endif
106}
107
108
109/*
110 * dev_mem_getva: get a special virtual address.  If architecture requires,
111 * allocate VA according to PA, which avoids cache-aliasing issues.  Use a
112 * constant, general mapping address otherwise.
113 */
114static inline vaddr_t
115dev_mem_getva(paddr_t pa)
116{
117#ifdef __HAVE_MM_MD_CACHE_ALIASING
118	const vsize_t coloroff = trunc_page(pa) & ptoa(uvmexp.colormask);
119	const vaddr_t kva = uvm_km_alloc(kernel_map, PAGE_SIZE + coloroff,
120	    ptoa(uvmexp.ncolors), UVM_KMF_VAONLY | UVM_KMF_WAITVA);
121
122	return kva + coloroff;
123#else
124	return dev_mem_addr;
125#endif
126}
127
128static inline void
129dev_mem_relva(paddr_t pa, vaddr_t va)
130{
131#ifdef __HAVE_MM_MD_CACHE_ALIASING
132	const vsize_t coloroff = trunc_page(pa) & ptoa(uvmexp.colormask);
133	const vaddr_t origva = va - coloroff;
134
135	uvm_km_free(kernel_map, origva, PAGE_SIZE + coloroff, UVM_KMF_VAONLY);
136#else
137	KASSERT(dev_mem_addr == va);
138#endif
139}
140
141/*
142 * dev_kmem_readwrite: helper for DEV_MEM (/dev/mem) case of R/W.
143 */
144static int
145dev_mem_readwrite(struct uio *uio, struct iovec *iov)
146{
147	paddr_t paddr;
148	vaddr_t vaddr;
149	vm_prot_t prot;
150	size_t len, offset;
151	bool have_direct;
152	int error;
153
154	/* Check for wrap around. */
155	if ((intptr_t)uio->uio_offset != uio->uio_offset) {
156		return EFAULT;
157	}
158	paddr = uio->uio_offset & ~PAGE_MASK;
159	prot = (uio->uio_rw == UIO_WRITE) ? VM_PROT_WRITE : VM_PROT_READ;
160	error = mm_md_physacc(paddr, prot);
161	if (error) {
162		return error;
163	}
164	offset = uio->uio_offset & PAGE_MASK;
165	len = MIN(uio->uio_resid, PAGE_SIZE - offset);
166
167#ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS
168	/* Is physical address directly mapped?  Return VA. */
169	have_direct = mm_md_direct_mapped_phys(paddr, &vaddr);
170#else
171	vaddr = 0;
172	have_direct = false;
173#endif
174	if (!have_direct) {
175		/* Get a special virtual address. */
176		const vaddr_t va = dev_mem_getva(paddr);
177
178		/* Map selected KVA to physical address. */
179		mutex_enter(&dev_mem_lock);
180		pmap_kenter_pa(va, paddr, prot, 0);
181		pmap_update(pmap_kernel());
182
183		/* Perform I/O. */
184		vaddr = va + offset;
185		error = uiomove((void *)vaddr, len, uio);
186
187		/* Unmap, flush before unlock. */
188		pmap_kremove(va, PAGE_SIZE);
189		pmap_update(pmap_kernel());
190		mutex_exit(&dev_mem_lock);
191
192		/* "Release" the virtual address. */
193		dev_mem_relva(paddr, va);
194	} else {
195		/* Direct map, just perform I/O. */
196		vaddr += offset;
197		error = uiomove((void *)vaddr, len, uio);
198	}
199	return error;
200}
201
202/*
203 * dev_kmem_readwrite: helper for DEV_KMEM (/dev/kmem) case of R/W.
204 */
205static int
206dev_kmem_readwrite(struct uio *uio, struct iovec *iov)
207{
208	void *addr;
209	size_t len, offset;
210	vm_prot_t prot;
211	int error;
212	bool md_kva;
213
214	/* Check for wrap around. */
215	addr = (void *)(intptr_t)uio->uio_offset;
216	if ((uintptr_t)addr != uio->uio_offset) {
217		return EFAULT;
218	}
219	/*
220	 * Handle non-page aligned offset.
221	 * Otherwise, we operate in page-by-page basis.
222	 */
223	offset = uio->uio_offset & PAGE_MASK;
224	len = MIN(uio->uio_resid, PAGE_SIZE - offset);
225	prot = (uio->uio_rw == UIO_WRITE) ? VM_PROT_WRITE : VM_PROT_READ;
226
227	md_kva = false;
228
229#ifdef __HAVE_MM_MD_DIRECT_MAPPED_IO
230	paddr_t paddr;
231	/* MD case: is this is a directly mapped address? */
232	if (mm_md_direct_mapped_io(addr, &paddr)) {
233		/* If so, validate physical address. */
234		error = mm_md_physacc(paddr, prot);
235		if (error) {
236			return error;
237		}
238		md_kva = true;
239	}
240#endif
241	if (!md_kva) {
242		bool checked = false;
243
244#ifdef __HAVE_MM_MD_KERNACC
245		/* MD check for the address. */
246		error = mm_md_kernacc(addr, prot, &checked);
247		if (error) {
248			return error;
249		}
250#endif
251		/* UVM check for the address (unless MD indicated to not). */
252		if (!checked && !uvm_kernacc(addr, len, prot)) {
253			return EFAULT;
254		}
255	}
256	error = uiomove(addr, len, uio);
257	return error;
258}
259
260/*
261 * dev_zero_readwrite: helper for DEV_ZERO (/dev/null) case of R/W.
262 */
263static inline int
264dev_zero_readwrite(struct uio *uio, struct iovec *iov)
265{
266	size_t len;
267
268	/* Nothing to do for the write case. */
269	if (uio->uio_rw == UIO_WRITE) {
270		uio->uio_resid = 0;
271		return 0;
272	}
273	/*
274	 * Read in page-by-page basis, caller will continue.
275	 * Cut appropriately for a single/last-iteration cases.
276	 */
277	len = MIN(iov->iov_len, PAGE_SIZE);
278	return uiomove(dev_zero_page, len, uio);
279}
280
281/*
282 * mm_readwrite: general memory R/W function.
283 */
284static int
285mm_readwrite(dev_t dev, struct uio *uio, int flags)
286{
287	struct iovec *iov;
288	int error;
289
290#ifdef __HAVE_MM_MD_READWRITE
291	/* If defined - there are extra MD cases. */
292	switch (minor(dev)) {
293	case DEV_MEM:
294	case DEV_KMEM:
295	case DEV_NULL:
296	case DEV_ZERO:
297#if defined(COMPAT_16) && defined(__arm)
298	case _DEV_ZERO_oARM:
299#endif
300		break;
301	default:
302		return mm_md_readwrite(dev, uio);
303	}
304#endif
305	error = 0;
306	while (uio->uio_resid > 0 && error == 0) {
307		iov = uio->uio_iov;
308		if (iov->iov_len == 0) {
309			/* Processed; next I/O vector. */
310			uio->uio_iov++;
311			uio->uio_iovcnt--;
312			KASSERT(uio->uio_iovcnt >= 0);
313			continue;
314		}
315		/* Helper functions will process in page-by-page basis. */
316		switch (minor(dev)) {
317		case DEV_MEM:
318			error = dev_mem_readwrite(uio, iov);
319			break;
320		case DEV_KMEM:
321			error = dev_kmem_readwrite(uio, iov);
322			break;
323		case DEV_NULL:
324			if (uio->uio_rw == UIO_WRITE) {
325				uio->uio_resid = 0;
326			}
327			/* Break directly out of the loop. */
328			return 0;
329#if defined(COMPAT_16) && defined(__arm)
330		case _DEV_ZERO_oARM:
331#endif
332		case DEV_ZERO:
333			error = dev_zero_readwrite(uio, iov);
334			break;
335		default:
336			error = ENXIO;
337			break;
338		}
339	}
340	return error;
341}
342
343/*
344 * mm_mmap: general mmap() handler.
345 */
346static paddr_t
347mm_mmap(dev_t dev, off_t off, int acc)
348{
349	vm_prot_t prot;
350
351#ifdef __HAVE_MM_MD_MMAP
352	/* If defined - there are extra mmap() MD cases. */
353	switch (minor(dev)) {
354	case DEV_MEM:
355	case DEV_KMEM:
356	case DEV_NULL:
357#if defined(COMPAT_16) && defined(__arm)
358	case _DEV_ZERO_oARM:
359#endif
360	case DEV_ZERO:
361		break;
362	default:
363		return mm_md_mmap(dev, off, acc);
364	}
365#endif
366	/*
367	 * /dev/null does not make sense, /dev/kmem is volatile and
368	 * /dev/zero is handled in mmap already.
369	 */
370	if (minor(dev) != DEV_MEM) {
371		return -1;
372	}
373
374	prot = 0;
375	if (acc & PROT_EXEC)
376		prot |= VM_PROT_EXECUTE;
377	if (acc & PROT_READ)
378		prot |= VM_PROT_READ;
379	if (acc & PROT_WRITE)
380		prot |= VM_PROT_WRITE;
381
382	/* Validate the physical address. */
383	if (mm_md_physacc(off, prot) != 0) {
384		return -1;
385	}
386	return off >> PGSHIFT;
387}
388
389static int
390mm_ioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
391{
392
393	switch (cmd) {
394	case FIONBIO:
395		/* We never block anyway. */
396		return 0;
397
398	case FIOSETOWN:
399	case FIOGETOWN:
400	case TIOCGPGRP:
401	case TIOCSPGRP:
402	case TIOCGETA:
403		return ENOTTY;
404
405	case FIOASYNC:
406		if ((*(int *)data) == 0) {
407			return 0;
408		}
409		/* FALLTHROUGH */
410	default:
411		return EOPNOTSUPP;
412	}
413}
414