1219820Sjeff/*-
2219820Sjeff * Copyright (c) 2010 Isilon Systems, Inc.
3219820Sjeff * Copyright (c) 2010 iX Systems, Inc.
4219820Sjeff * Copyright (c) 2010 Panasas, Inc.
5271127Shselasky * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
6219820Sjeff * All rights reserved.
7219820Sjeff *
8219820Sjeff * Redistribution and use in source and binary forms, with or without
9219820Sjeff * modification, are permitted provided that the following conditions
10219820Sjeff * are met:
11219820Sjeff * 1. Redistributions of source code must retain the above copyright
12219820Sjeff *    notice unmodified, this list of conditions, and the following
13219820Sjeff *    disclaimer.
14219820Sjeff * 2. Redistributions in binary form must reproduce the above copyright
15219820Sjeff *    notice, this list of conditions and the following disclaimer in the
16219820Sjeff *    documentation and/or other materials provided with the distribution.
17219820Sjeff *
18219820Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19219820Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20219820Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21219820Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22219820Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23219820Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24219820Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25219820Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26219820Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27219820Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28219820Sjeff */
29219820Sjeff
30219820Sjeff#include <sys/param.h>
31219820Sjeff#include <sys/systm.h>
32219820Sjeff#include <sys/malloc.h>
33219820Sjeff#include <sys/kernel.h>
34219820Sjeff#include <sys/sysctl.h>
35282513Shselasky#include <sys/proc.h>
36287637Sjhb#include <sys/sglist.h>
37282513Shselasky#include <sys/sleepqueue.h>
38219820Sjeff#include <sys/lock.h>
39219820Sjeff#include <sys/mutex.h>
40219820Sjeff#include <sys/bus.h>
41219820Sjeff#include <sys/fcntl.h>
42219820Sjeff#include <sys/file.h>
43219820Sjeff#include <sys/filio.h>
44248084Sattilio#include <sys/rwlock.h>
45219820Sjeff
46219820Sjeff#include <vm/vm.h>
47219820Sjeff#include <vm/pmap.h>
48219820Sjeff
49219820Sjeff#include <machine/stdarg.h>
50219820Sjeff#include <machine/pmap.h>
51219820Sjeff
52219820Sjeff#include <linux/kobject.h>
53219820Sjeff#include <linux/device.h>
54219820Sjeff#include <linux/slab.h>
55219820Sjeff#include <linux/module.h>
56324685Shselasky#include <linux/moduleparam.h>
57219820Sjeff#include <linux/cdev.h>
58219820Sjeff#include <linux/file.h>
59219820Sjeff#include <linux/sysfs.h>
60219820Sjeff#include <linux/mm.h>
61219820Sjeff#include <linux/io.h>
62219820Sjeff#include <linux/vmalloc.h>
63282513Shselasky#include <linux/timer.h>
64282513Shselasky#include <linux/netdevice.h>
65219820Sjeff
66219820Sjeff#include <vm/vm_pager.h>
67219820Sjeff
68293151Shselasky#include <linux/workqueue.h>
69293151Shselasky
70324685ShselaskySYSCTL_NODE(_compat, OID_AUTO, linuxkpi, CTLFLAG_RW, 0, "LinuxKPI parameters");
71324685Shselasky
72219820SjeffMALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat");
73219820Sjeff
74219820Sjeff#include <linux/rbtree.h>
75219820Sjeff/* Undo Linux compat changes. */
76219820Sjeff#undef RB_ROOT
77219820Sjeff#undef file
78219820Sjeff#undef cdev
79219820Sjeff#define	RB_ROOT(head)	(head)->rbh_root
80219820Sjeff
81219820Sjeffstruct kobject class_root;
82219820Sjeffstruct device linux_rootdev;
83219820Sjeffstruct class miscclass;
84219820Sjeffstruct list_head pci_drivers;
85219820Sjeffstruct list_head pci_devices;
86219820Sjeffspinlock_t pci_lock;
87219820Sjeff
88282513Shselaskyunsigned long linux_timer_hz_mask;
89282513Shselasky
90219820Sjeffint
91219820Sjeffpanic_cmp(struct rb_node *one, struct rb_node *two)
92219820Sjeff{
93219820Sjeff	panic("no cmp");
94219820Sjeff}
95219820Sjeff
96219820SjeffRB_GENERATE(linux_root, rb_node, __entry, panic_cmp);
97293151Shselasky
98219820Sjeffint
99293151Shselaskykobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list args)
100293151Shselasky{
101293151Shselasky	va_list tmp_va;
102293151Shselasky	int len;
103293151Shselasky	char *old;
104293151Shselasky	char *name;
105293151Shselasky	char dummy;
106293151Shselasky
107293151Shselasky	old = kobj->name;
108293151Shselasky
109293151Shselasky	if (old && fmt == NULL)
110293151Shselasky		return (0);
111293151Shselasky
112293151Shselasky	/* compute length of string */
113293151Shselasky	va_copy(tmp_va, args);
114293151Shselasky	len = vsnprintf(&dummy, 0, fmt, tmp_va);
115293151Shselasky	va_end(tmp_va);
116293151Shselasky
117293151Shselasky	/* account for zero termination */
118293151Shselasky	len++;
119293151Shselasky
120293151Shselasky	/* check for error */
121293151Shselasky	if (len < 1)
122293151Shselasky		return (-EINVAL);
123293151Shselasky
124293151Shselasky	/* allocate memory for string */
125293151Shselasky	name = kzalloc(len, GFP_KERNEL);
126293151Shselasky	if (name == NULL)
127293151Shselasky		return (-ENOMEM);
128293151Shselasky	vsnprintf(name, len, fmt, args);
129293151Shselasky	kobj->name = name;
130293151Shselasky
131293151Shselasky	/* free old string */
132293151Shselasky	kfree(old);
133293151Shselasky
134293151Shselasky	/* filter new string */
135293151Shselasky	for (; *name != '\0'; name++)
136293151Shselasky		if (*name == '/')
137293151Shselasky			*name = '!';
138293151Shselasky	return (0);
139293151Shselasky}
140293151Shselasky
141293151Shselaskyint
142219820Sjeffkobject_set_name(struct kobject *kobj, const char *fmt, ...)
143219820Sjeff{
144219820Sjeff	va_list args;
145219820Sjeff	int error;
146219820Sjeff
147219820Sjeff	va_start(args, fmt);
148219820Sjeff	error = kobject_set_name_vargs(kobj, fmt, args);
149219820Sjeff	va_end(args);
150219820Sjeff
151219820Sjeff	return (error);
152219820Sjeff}
153219820Sjeff
154219820Sjeffstatic inline int
155219820Sjeffkobject_add_complete(struct kobject *kobj, struct kobject *parent)
156219820Sjeff{
157219820Sjeff	struct kobj_type *t;
158219820Sjeff	int error;
159219820Sjeff
160219820Sjeff	kobj->parent = kobject_get(parent);
161219820Sjeff	error = sysfs_create_dir(kobj);
162219820Sjeff	if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) {
163219820Sjeff		struct attribute **attr;
164219820Sjeff		t = kobj->ktype;
165219820Sjeff
166219820Sjeff		for (attr = t->default_attrs; *attr != NULL; attr++) {
167219820Sjeff			error = sysfs_create_file(kobj, *attr);
168219820Sjeff			if (error)
169219820Sjeff				break;
170219820Sjeff		}
171219820Sjeff		if (error)
172219820Sjeff			sysfs_remove_dir(kobj);
173219820Sjeff
174219820Sjeff	}
175219820Sjeff	return (error);
176219820Sjeff}
177219820Sjeff
178219820Sjeffint
179219820Sjeffkobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...)
180219820Sjeff{
181219820Sjeff	va_list args;
182219820Sjeff	int error;
183219820Sjeff
184219820Sjeff	va_start(args, fmt);
185219820Sjeff	error = kobject_set_name_vargs(kobj, fmt, args);
186219820Sjeff	va_end(args);
187219820Sjeff	if (error)
188219820Sjeff		return (error);
189219820Sjeff
190219820Sjeff	return kobject_add_complete(kobj, parent);
191219820Sjeff}
192219820Sjeff
193219820Sjeffvoid
194219820Sjeffkobject_release(struct kref *kref)
195219820Sjeff{
196219820Sjeff	struct kobject *kobj;
197219820Sjeff	char *name;
198219820Sjeff
199219820Sjeff	kobj = container_of(kref, struct kobject, kref);
200219820Sjeff	sysfs_remove_dir(kobj);
201219820Sjeff	if (kobj->parent)
202219820Sjeff		kobject_put(kobj->parent);
203219820Sjeff	kobj->parent = NULL;
204219820Sjeff	name = kobj->name;
205219820Sjeff	if (kobj->ktype && kobj->ktype->release)
206219820Sjeff		kobj->ktype->release(kobj);
207219820Sjeff	kfree(name);
208219820Sjeff}
209219820Sjeff
210219820Sjeffstatic void
211219820Sjeffkobject_kfree(struct kobject *kobj)
212219820Sjeff{
213219820Sjeff	kfree(kobj);
214219820Sjeff}
215219820Sjeff
216271127Shselaskystatic void
217271127Shselaskykobject_kfree_name(struct kobject *kobj)
218271127Shselasky{
219271127Shselasky	if (kobj) {
220271127Shselasky		kfree(kobj->name);
221271127Shselasky	}
222271127Shselasky}
223271127Shselasky
224219820Sjeffstruct kobj_type kfree_type = { .release = kobject_kfree };
225219820Sjeff
226277139Shselaskystatic void
227277139Shselaskydev_release(struct device *dev)
228277139Shselasky{
229277139Shselasky	pr_debug("dev_release: %s\n", dev_name(dev));
230277139Shselasky	kfree(dev);
231277139Shselasky}
232277139Shselasky
233219820Sjeffstruct device *
234219820Sjeffdevice_create(struct class *class, struct device *parent, dev_t devt,
235219820Sjeff    void *drvdata, const char *fmt, ...)
236219820Sjeff{
237219820Sjeff	struct device *dev;
238219820Sjeff	va_list args;
239219820Sjeff
240219820Sjeff	dev = kzalloc(sizeof(*dev), M_WAITOK);
241219820Sjeff	dev->parent = parent;
242219820Sjeff	dev->class = class;
243219820Sjeff	dev->devt = devt;
244219820Sjeff	dev->driver_data = drvdata;
245277139Shselasky	dev->release = dev_release;
246219820Sjeff	va_start(args, fmt);
247219820Sjeff	kobject_set_name_vargs(&dev->kobj, fmt, args);
248219820Sjeff	va_end(args);
249219820Sjeff	device_register(dev);
250219820Sjeff
251219820Sjeff	return (dev);
252219820Sjeff}
253219820Sjeff
254219820Sjeffint
255219820Sjeffkobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype,
256219820Sjeff    struct kobject *parent, const char *fmt, ...)
257219820Sjeff{
258219820Sjeff	va_list args;
259219820Sjeff	int error;
260219820Sjeff
261219820Sjeff	kobject_init(kobj, ktype);
262219820Sjeff	kobj->ktype = ktype;
263219820Sjeff	kobj->parent = parent;
264219820Sjeff	kobj->name = NULL;
265219820Sjeff
266219820Sjeff	va_start(args, fmt);
267219820Sjeff	error = kobject_set_name_vargs(kobj, fmt, args);
268219820Sjeff	va_end(args);
269219820Sjeff	if (error)
270219820Sjeff		return (error);
271219820Sjeff	return kobject_add_complete(kobj, parent);
272219820Sjeff}
273219820Sjeff
274219820Sjeffstatic void
275219820Sjefflinux_file_dtor(void *cdp)
276219820Sjeff{
277219820Sjeff	struct linux_file *filp;
278219820Sjeff
279219820Sjeff	filp = cdp;
280251617Sjhb	filp->f_op->release(filp->f_vnode, filp);
281251617Sjhb	vdrop(filp->f_vnode);
282219820Sjeff	kfree(filp);
283219820Sjeff}
284219820Sjeff
285219820Sjeffstatic int
286219820Sjefflinux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
287219820Sjeff{
288219820Sjeff	struct linux_cdev *ldev;
289219820Sjeff	struct linux_file *filp;
290219820Sjeff	struct file *file;
291219820Sjeff	int error;
292219820Sjeff
293219820Sjeff	file = curthread->td_fpop;
294219820Sjeff	ldev = dev->si_drv1;
295219820Sjeff	if (ldev == NULL)
296219820Sjeff		return (ENODEV);
297219820Sjeff	filp = kzalloc(sizeof(*filp), GFP_KERNEL);
298219820Sjeff	filp->f_dentry = &filp->f_dentry_store;
299219820Sjeff	filp->f_op = ldev->ops;
300219820Sjeff	filp->f_flags = file->f_flag;
301251617Sjhb	vhold(file->f_vnode);
302251617Sjhb	filp->f_vnode = file->f_vnode;
303219820Sjeff	if (filp->f_op->open) {
304219820Sjeff		error = -filp->f_op->open(file->f_vnode, filp);
305219820Sjeff		if (error) {
306219820Sjeff			kfree(filp);
307219820Sjeff			return (error);
308219820Sjeff		}
309219820Sjeff	}
310219820Sjeff	error = devfs_set_cdevpriv(filp, linux_file_dtor);
311219820Sjeff	if (error) {
312219820Sjeff		filp->f_op->release(file->f_vnode, filp);
313219820Sjeff		kfree(filp);
314219820Sjeff		return (error);
315219820Sjeff	}
316219820Sjeff
317219820Sjeff	return 0;
318219820Sjeff}
319219820Sjeff
320219820Sjeffstatic int
321219820Sjefflinux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
322219820Sjeff{
323219820Sjeff	struct linux_cdev *ldev;
324219820Sjeff	struct linux_file *filp;
325219820Sjeff	struct file *file;
326219820Sjeff	int error;
327219820Sjeff
328219820Sjeff	file = curthread->td_fpop;
329219820Sjeff	ldev = dev->si_drv1;
330219820Sjeff	if (ldev == NULL)
331219820Sjeff		return (0);
332219820Sjeff	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
333219820Sjeff		return (error);
334219820Sjeff	filp->f_flags = file->f_flag;
335255932Salfred        devfs_clear_cdevpriv();
336255932Salfred
337219820Sjeff
338219820Sjeff	return (0);
339219820Sjeff}
340219820Sjeff
341219820Sjeffstatic int
342219820Sjefflinux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
343219820Sjeff    struct thread *td)
344219820Sjeff{
345219820Sjeff	struct linux_cdev *ldev;
346219820Sjeff	struct linux_file *filp;
347219820Sjeff	struct file *file;
348219820Sjeff	int error;
349219820Sjeff
350219820Sjeff	file = curthread->td_fpop;
351219820Sjeff	ldev = dev->si_drv1;
352219820Sjeff	if (ldev == NULL)
353219820Sjeff		return (0);
354219820Sjeff	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
355219820Sjeff		return (error);
356219820Sjeff	filp->f_flags = file->f_flag;
357219820Sjeff	/*
358219820Sjeff	 * Linux does not have a generic ioctl copyin/copyout layer.  All
359219820Sjeff	 * linux ioctls must be converted to void ioctls which pass a
360219820Sjeff	 * pointer to the address of the data.  We want the actual user
361219820Sjeff	 * address so we dereference here.
362219820Sjeff	 */
363219820Sjeff	data = *(void **)data;
364219820Sjeff	if (filp->f_op->unlocked_ioctl)
365219820Sjeff		error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data);
366219820Sjeff	else
367219820Sjeff		error = ENOTTY;
368219820Sjeff
369219820Sjeff	return (error);
370219820Sjeff}
371219820Sjeff
372219820Sjeffstatic int
373219820Sjefflinux_dev_read(struct cdev *dev, struct uio *uio, int ioflag)
374219820Sjeff{
375219820Sjeff	struct linux_cdev *ldev;
376219820Sjeff	struct linux_file *filp;
377219820Sjeff	struct file *file;
378219820Sjeff	ssize_t bytes;
379219820Sjeff	int error;
380219820Sjeff
381219820Sjeff	file = curthread->td_fpop;
382219820Sjeff	ldev = dev->si_drv1;
383219820Sjeff	if (ldev == NULL)
384219820Sjeff		return (0);
385219820Sjeff	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
386219820Sjeff		return (error);
387219820Sjeff	filp->f_flags = file->f_flag;
388219820Sjeff	if (uio->uio_iovcnt != 1)
389219820Sjeff		panic("linux_dev_read: uio %p iovcnt %d",
390219820Sjeff		    uio, uio->uio_iovcnt);
391219820Sjeff	if (filp->f_op->read) {
392219820Sjeff		bytes = filp->f_op->read(filp, uio->uio_iov->iov_base,
393219820Sjeff		    uio->uio_iov->iov_len, &uio->uio_offset);
394219820Sjeff		if (bytes >= 0) {
395219820Sjeff			uio->uio_iov->iov_base += bytes;
396219820Sjeff			uio->uio_iov->iov_len -= bytes;
397219820Sjeff			uio->uio_resid -= bytes;
398219820Sjeff		} else
399219820Sjeff			error = -bytes;
400219820Sjeff	} else
401219820Sjeff		error = ENXIO;
402219820Sjeff
403219820Sjeff	return (error);
404219820Sjeff}
405219820Sjeff
406219820Sjeffstatic int
407219820Sjefflinux_dev_write(struct cdev *dev, struct uio *uio, int ioflag)
408219820Sjeff{
409219820Sjeff	struct linux_cdev *ldev;
410219820Sjeff	struct linux_file *filp;
411219820Sjeff	struct file *file;
412219820Sjeff	ssize_t bytes;
413219820Sjeff	int error;
414219820Sjeff
415219820Sjeff	file = curthread->td_fpop;
416219820Sjeff	ldev = dev->si_drv1;
417219820Sjeff	if (ldev == NULL)
418219820Sjeff		return (0);
419219820Sjeff	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
420219820Sjeff		return (error);
421219820Sjeff	filp->f_flags = file->f_flag;
422219820Sjeff	if (uio->uio_iovcnt != 1)
423219820Sjeff		panic("linux_dev_write: uio %p iovcnt %d",
424219820Sjeff		    uio, uio->uio_iovcnt);
425219820Sjeff	if (filp->f_op->write) {
426219820Sjeff		bytes = filp->f_op->write(filp, uio->uio_iov->iov_base,
427219820Sjeff		    uio->uio_iov->iov_len, &uio->uio_offset);
428219820Sjeff		if (bytes >= 0) {
429219820Sjeff			uio->uio_iov->iov_base += bytes;
430219820Sjeff			uio->uio_iov->iov_len -= bytes;
431219820Sjeff			uio->uio_resid -= bytes;
432219820Sjeff		} else
433219820Sjeff			error = -bytes;
434219820Sjeff	} else
435219820Sjeff		error = ENXIO;
436219820Sjeff
437219820Sjeff	return (error);
438219820Sjeff}
439219820Sjeff
440219820Sjeffstatic int
441219820Sjefflinux_dev_poll(struct cdev *dev, int events, struct thread *td)
442219820Sjeff{
443219820Sjeff	struct linux_cdev *ldev;
444219820Sjeff	struct linux_file *filp;
445219820Sjeff	struct file *file;
446219820Sjeff	int revents;
447219820Sjeff	int error;
448219820Sjeff
449219820Sjeff	file = curthread->td_fpop;
450219820Sjeff	ldev = dev->si_drv1;
451219820Sjeff	if (ldev == NULL)
452219820Sjeff		return (0);
453219820Sjeff	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
454219820Sjeff		return (error);
455219820Sjeff	filp->f_flags = file->f_flag;
456219820Sjeff	if (filp->f_op->poll)
457219820Sjeff		revents = filp->f_op->poll(filp, NULL) & events;
458219820Sjeff	else
459219820Sjeff		revents = 0;
460219820Sjeff
461219820Sjeff	return (revents);
462219820Sjeff}
463219820Sjeff
464219820Sjeffstatic int
465219820Sjefflinux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
466219820Sjeff    vm_size_t size, struct vm_object **object, int nprot)
467219820Sjeff{
468219820Sjeff	struct linux_cdev *ldev;
469219820Sjeff	struct linux_file *filp;
470219820Sjeff	struct file *file;
471219820Sjeff	struct vm_area_struct vma;
472219820Sjeff	int error;
473219820Sjeff
474219820Sjeff	file = curthread->td_fpop;
475219820Sjeff	ldev = dev->si_drv1;
476219820Sjeff	if (ldev == NULL)
477219820Sjeff		return (ENODEV);
478219820Sjeff	if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
479219820Sjeff		return (error);
480219820Sjeff	filp->f_flags = file->f_flag;
481219820Sjeff	vma.vm_start = 0;
482287637Sjhb	vma.vm_end = size;
483219820Sjeff	vma.vm_pgoff = *offset / PAGE_SIZE;
484219820Sjeff	vma.vm_pfn = 0;
485294636Sjhb	vma.vm_page_prot = VM_MEMATTR_DEFAULT;
486219820Sjeff	if (filp->f_op->mmap) {
487219820Sjeff		error = -filp->f_op->mmap(filp, &vma);
488219820Sjeff		if (error == 0) {
489287637Sjhb			struct sglist *sg;
490287637Sjhb
491287637Sjhb			sg = sglist_alloc(1, M_WAITOK);
492287637Sjhb			sglist_append_phys(sg,
493287637Sjhb			    (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT, vma.vm_len);
494287637Sjhb			*object = vm_pager_allocate(OBJT_SG, sg, vma.vm_len,
495287637Sjhb			    nprot, 0, curthread->td_ucred);
496287637Sjhb		        if (*object == NULL) {
497287637Sjhb				sglist_free(sg);
498287637Sjhb				return (EINVAL);
499287637Sjhb			}
500287637Sjhb			*offset = 0;
501287637Sjhb			if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) {
502287637Sjhb				VM_OBJECT_WLOCK(*object);
503287637Sjhb				vm_object_set_memattr(*object,
504287637Sjhb				    vma.vm_page_prot);
505287637Sjhb				VM_OBJECT_WUNLOCK(*object);
506287637Sjhb			}
507219820Sjeff		}
508219820Sjeff	} else
509219820Sjeff		error = ENODEV;
510219820Sjeff
511219820Sjeff	return (error);
512219820Sjeff}
513219820Sjeff
514219820Sjeffstruct cdevsw linuxcdevsw = {
515219820Sjeff	.d_version = D_VERSION,
516219820Sjeff	.d_flags = D_TRACKCLOSE,
517219820Sjeff	.d_open = linux_dev_open,
518219820Sjeff	.d_close = linux_dev_close,
519219820Sjeff	.d_read = linux_dev_read,
520219820Sjeff	.d_write = linux_dev_write,
521219820Sjeff	.d_ioctl = linux_dev_ioctl,
522219820Sjeff	.d_mmap_single = linux_dev_mmap_single,
523219820Sjeff	.d_poll = linux_dev_poll,
524219820Sjeff};
525219820Sjeff
526219820Sjeffstatic int
527219820Sjefflinux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred,
528219820Sjeff    int flags, struct thread *td)
529219820Sjeff{
530219820Sjeff	struct linux_file *filp;
531219820Sjeff	ssize_t bytes;
532219820Sjeff	int error;
533219820Sjeff
534219820Sjeff	error = 0;
535219820Sjeff	filp = (struct linux_file *)file->f_data;
536219820Sjeff	filp->f_flags = file->f_flag;
537219820Sjeff	if (uio->uio_iovcnt != 1)
538219820Sjeff		panic("linux_file_read: uio %p iovcnt %d",
539219820Sjeff		    uio, uio->uio_iovcnt);
540219820Sjeff	if (filp->f_op->read) {
541219820Sjeff		bytes = filp->f_op->read(filp, uio->uio_iov->iov_base,
542219820Sjeff		    uio->uio_iov->iov_len, &uio->uio_offset);
543219820Sjeff		if (bytes >= 0) {
544219820Sjeff			uio->uio_iov->iov_base += bytes;
545219820Sjeff			uio->uio_iov->iov_len -= bytes;
546219820Sjeff			uio->uio_resid -= bytes;
547219820Sjeff		} else
548219820Sjeff			error = -bytes;
549219820Sjeff	} else
550219820Sjeff		error = ENXIO;
551219820Sjeff
552219820Sjeff	return (error);
553219820Sjeff}
554219820Sjeff
555219820Sjeffstatic int
556219820Sjefflinux_file_poll(struct file *file, int events, struct ucred *active_cred,
557219820Sjeff    struct thread *td)
558219820Sjeff{
559219820Sjeff	struct linux_file *filp;
560219820Sjeff	int revents;
561219820Sjeff
562219820Sjeff	filp = (struct linux_file *)file->f_data;
563219820Sjeff	filp->f_flags = file->f_flag;
564219820Sjeff	if (filp->f_op->poll)
565219820Sjeff		revents = filp->f_op->poll(filp, NULL) & events;
566219820Sjeff	else
567219820Sjeff		revents = 0;
568219820Sjeff
569219820Sjeff	return (0);
570219820Sjeff}
571219820Sjeff
572219820Sjeffstatic int
573219820Sjefflinux_file_close(struct file *file, struct thread *td)
574219820Sjeff{
575219820Sjeff	struct linux_file *filp;
576219820Sjeff	int error;
577219820Sjeff
578219820Sjeff	filp = (struct linux_file *)file->f_data;
579219820Sjeff	filp->f_flags = file->f_flag;
580219820Sjeff	error = -filp->f_op->release(NULL, filp);
581219820Sjeff	funsetown(&filp->f_sigio);
582219820Sjeff	kfree(filp);
583219820Sjeff
584219820Sjeff	return (error);
585219820Sjeff}
586219820Sjeff
587219820Sjeffstatic int
588219820Sjefflinux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred,
589219820Sjeff    struct thread *td)
590219820Sjeff{
591219820Sjeff	struct linux_file *filp;
592219820Sjeff	int error;
593219820Sjeff
594219820Sjeff	filp = (struct linux_file *)fp->f_data;
595219820Sjeff	filp->f_flags = fp->f_flag;
596219820Sjeff	error = 0;
597219820Sjeff
598219820Sjeff	switch (cmd) {
599219820Sjeff	case FIONBIO:
600219820Sjeff		break;
601219820Sjeff	case FIOASYNC:
602219820Sjeff		if (filp->f_op->fasync == NULL)
603219820Sjeff			break;
604219820Sjeff		error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC);
605219820Sjeff		break;
606219820Sjeff	case FIOSETOWN:
607219820Sjeff		error = fsetown(*(int *)data, &filp->f_sigio);
608219820Sjeff		if (error == 0)
609219820Sjeff			error = filp->f_op->fasync(0, filp,
610219820Sjeff			    fp->f_flag & FASYNC);
611219820Sjeff		break;
612219820Sjeff	case FIOGETOWN:
613219820Sjeff		*(int *)data = fgetown(&filp->f_sigio);
614219820Sjeff		break;
615219820Sjeff	default:
616219820Sjeff		error = ENOTTY;
617219820Sjeff		break;
618219820Sjeff	}
619219820Sjeff	return (error);
620219820Sjeff}
621219820Sjeff
622219820Sjeffstruct fileops linuxfileops = {
623219820Sjeff	.fo_read = linux_file_read,
624219820Sjeff	.fo_poll = linux_file_poll,
625219820Sjeff	.fo_close = linux_file_close,
626224914Skib	.fo_ioctl = linux_file_ioctl,
627224914Skib	.fo_chmod = invfo_chmod,
628224914Skib	.fo_chown = invfo_chown,
629254356Sglebius	.fo_sendfile = invfo_sendfile,
630219820Sjeff};
631219820Sjeff
632219820Sjeff/*
633219820Sjeff * Hash of vmmap addresses.  This is infrequently accessed and does not
634219820Sjeff * need to be particularly large.  This is done because we must store the
635219820Sjeff * caller's idea of the map size to properly unmap.
636219820Sjeff */
637219820Sjeffstruct vmmap {
638219820Sjeff	LIST_ENTRY(vmmap)	vm_next;
639219820Sjeff	void 			*vm_addr;
640219820Sjeff	unsigned long		vm_size;
641219820Sjeff};
642219820Sjeff
643282513Shselaskystruct vmmaphd {
644282513Shselasky	struct vmmap *lh_first;
645282513Shselasky};
646219820Sjeff#define	VMMAP_HASH_SIZE	64
647219820Sjeff#define	VMMAP_HASH_MASK	(VMMAP_HASH_SIZE - 1)
648219820Sjeff#define	VM_HASH(addr)	((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK
649219820Sjeffstatic struct vmmaphd vmmaphead[VMMAP_HASH_SIZE];
650219820Sjeffstatic struct mtx vmmaplock;
651219820Sjeff
652219820Sjeffstatic void
653219820Sjeffvmmap_add(void *addr, unsigned long size)
654219820Sjeff{
655219820Sjeff	struct vmmap *vmmap;
656219820Sjeff
657219820Sjeff	vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL);
658219820Sjeff	mtx_lock(&vmmaplock);
659219820Sjeff	vmmap->vm_size = size;
660219820Sjeff	vmmap->vm_addr = addr;
661219820Sjeff	LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next);
662219820Sjeff	mtx_unlock(&vmmaplock);
663219820Sjeff}
664219820Sjeff
665219820Sjeffstatic struct vmmap *
666219820Sjeffvmmap_remove(void *addr)
667219820Sjeff{
668219820Sjeff	struct vmmap *vmmap;
669219820Sjeff
670219820Sjeff	mtx_lock(&vmmaplock);
671219820Sjeff	LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next)
672219820Sjeff		if (vmmap->vm_addr == addr)
673219820Sjeff			break;
674219820Sjeff	if (vmmap)
675219820Sjeff		LIST_REMOVE(vmmap, vm_next);
676219820Sjeff	mtx_unlock(&vmmaplock);
677219820Sjeff
678219820Sjeff	return (vmmap);
679219820Sjeff}
680219820Sjeff
681219820Sjeffvoid *
682219820Sjeff_ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr)
683219820Sjeff{
684219820Sjeff	void *addr;
685219820Sjeff
686219820Sjeff	addr = pmap_mapdev_attr(phys_addr, size, attr);
687219820Sjeff	if (addr == NULL)
688219820Sjeff		return (NULL);
689219820Sjeff	vmmap_add(addr, size);
690219820Sjeff
691219820Sjeff	return (addr);
692219820Sjeff}
693219820Sjeff
694219820Sjeffvoid
695219820Sjeffiounmap(void *addr)
696219820Sjeff{
697219820Sjeff	struct vmmap *vmmap;
698219820Sjeff
699219820Sjeff	vmmap = vmmap_remove(addr);
700219820Sjeff	if (vmmap == NULL)
701219820Sjeff		return;
702219820Sjeff	pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size);
703219820Sjeff	kfree(vmmap);
704219820Sjeff}
705219820Sjeff
706219820Sjeff
707219820Sjeffvoid *
708219820Sjeffvmap(struct page **pages, unsigned int count, unsigned long flags, int prot)
709219820Sjeff{
710219820Sjeff	vm_offset_t off;
711219820Sjeff	size_t size;
712219820Sjeff
713219820Sjeff	size = count * PAGE_SIZE;
714254025Sjeff	off = kva_alloc(size);
715219820Sjeff	if (off == 0)
716219820Sjeff		return (NULL);
717219820Sjeff	vmmap_add((void *)off, size);
718219820Sjeff	pmap_qenter(off, pages, count);
719219820Sjeff
720219820Sjeff	return ((void *)off);
721219820Sjeff}
722219820Sjeff
723219820Sjeffvoid
724219820Sjeffvunmap(void *addr)
725219820Sjeff{
726219820Sjeff	struct vmmap *vmmap;
727219820Sjeff
728219820Sjeff	vmmap = vmmap_remove(addr);
729219820Sjeff	if (vmmap == NULL)
730219820Sjeff		return;
731219820Sjeff	pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE);
732254025Sjeff	kva_free((vm_offset_t)addr, vmmap->vm_size);
733219820Sjeff	kfree(vmmap);
734219820Sjeff}
735219820Sjeff
736285410Shselaskychar *
737285410Shselaskykvasprintf(gfp_t gfp, const char *fmt, va_list ap)
738285410Shselasky{
739285410Shselasky	unsigned int len;
740285410Shselasky	char *p;
741285410Shselasky	va_list aq;
742282513Shselasky
743285410Shselasky	va_copy(aq, ap);
744285410Shselasky	len = vsnprintf(NULL, 0, fmt, aq);
745285410Shselasky	va_end(aq);
746285410Shselasky
747285410Shselasky	p = kmalloc(len + 1, gfp);
748285410Shselasky	if (p != NULL)
749285410Shselasky		vsnprintf(p, len + 1, fmt, ap);
750285410Shselasky
751285410Shselasky	return (p);
752285410Shselasky}
753285410Shselasky
754282513Shselaskychar *
755282513Shselaskykasprintf(gfp_t gfp, const char *fmt, ...)
756282513Shselasky{
757282513Shselasky	va_list ap;
758282513Shselasky	char *p;
759282513Shselasky
760282513Shselasky	va_start(ap, fmt);
761282513Shselasky	p = kvasprintf(gfp, fmt, ap);
762282513Shselasky	va_end(ap);
763282513Shselasky
764285410Shselasky	return (p);
765282513Shselasky}
766282513Shselasky
767282513Shselaskystatic int
768282513Shselaskylinux_timer_jiffies_until(unsigned long expires)
769282513Shselasky{
770282513Shselasky	int delta = expires - jiffies;
771282513Shselasky	/* guard against already expired values */
772282513Shselasky	if (delta < 1)
773282513Shselasky		delta = 1;
774282513Shselasky	return (delta);
775282513Shselasky}
776282513Shselasky
777219820Sjeffstatic void
778282513Shselaskylinux_timer_callback_wrapper(void *context)
779282513Shselasky{
780282513Shselasky	struct timer_list *timer;
781282513Shselasky
782282513Shselasky	timer = context;
783282513Shselasky	timer->function(timer->data);
784282513Shselasky}
785282513Shselasky
786282513Shselaskyvoid
787282513Shselaskymod_timer(struct timer_list *timer, unsigned long expires)
788282513Shselasky{
789282513Shselasky
790282513Shselasky	timer->expires = expires;
791282513Shselasky	callout_reset(&timer->timer_callout,
792282513Shselasky	    linux_timer_jiffies_until(expires),
793282513Shselasky	    &linux_timer_callback_wrapper, timer);
794282513Shselasky}
795282513Shselasky
796282513Shselaskyvoid
797282513Shselaskyadd_timer(struct timer_list *timer)
798282513Shselasky{
799282513Shselasky
800282513Shselasky	callout_reset(&timer->timer_callout,
801282513Shselasky	    linux_timer_jiffies_until(timer->expires),
802282513Shselasky	    &linux_timer_callback_wrapper, timer);
803282513Shselasky}
804282513Shselasky
805282513Shselaskystatic void
806282513Shselaskylinux_timer_init(void *arg)
807282513Shselasky{
808282513Shselasky
809282513Shselasky	/*
810282513Shselasky	 * Compute an internal HZ value which can divide 2**32 to
811282513Shselasky	 * avoid timer rounding problems when the tick value wraps
812282513Shselasky	 * around 2**32:
813282513Shselasky	 */
814282513Shselasky	linux_timer_hz_mask = 1;
815282513Shselasky	while (linux_timer_hz_mask < (unsigned long)hz)
816282513Shselasky		linux_timer_hz_mask *= 2;
817282513Shselasky	linux_timer_hz_mask--;
818282513Shselasky}
819282513ShselaskySYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL);
820282513Shselasky
821282513Shselaskyvoid
822282513Shselaskylinux_complete_common(struct completion *c, int all)
823282513Shselasky{
824282513Shselasky	int wakeup_swapper;
825282513Shselasky
826282513Shselasky	sleepq_lock(c);
827282513Shselasky	c->done++;
828282513Shselasky	if (all)
829282513Shselasky		wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0);
830282513Shselasky	else
831282513Shselasky		wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0);
832282513Shselasky	sleepq_release(c);
833282513Shselasky	if (wakeup_swapper)
834282513Shselasky		kick_proc0();
835282513Shselasky}
836282513Shselasky
837282513Shselasky/*
838282513Shselasky * Indefinite wait for done != 0 with or without signals.
839282513Shselasky */
840282513Shselaskylong
841282513Shselaskylinux_wait_for_common(struct completion *c, int flags)
842282513Shselasky{
843282513Shselasky
844282513Shselasky	if (flags != 0)
845282513Shselasky		flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP;
846282513Shselasky	else
847282513Shselasky		flags = SLEEPQ_SLEEP;
848282513Shselasky	for (;;) {
849282513Shselasky		sleepq_lock(c);
850282513Shselasky		if (c->done)
851282513Shselasky			break;
852282513Shselasky		sleepq_add(c, NULL, "completion", flags, 0);
853282513Shselasky		if (flags & SLEEPQ_INTERRUPTIBLE) {
854282513Shselasky			if (sleepq_wait_sig(c, 0) != 0)
855282513Shselasky				return (-ERESTARTSYS);
856282513Shselasky		} else
857282513Shselasky			sleepq_wait(c, 0);
858282513Shselasky	}
859282513Shselasky	c->done--;
860282513Shselasky	sleepq_release(c);
861282513Shselasky
862282513Shselasky	return (0);
863282513Shselasky}
864282513Shselasky
865282513Shselasky/*
866282513Shselasky * Time limited wait for done != 0 with or without signals.
867282513Shselasky */
868282513Shselaskylong
869282513Shselaskylinux_wait_for_timeout_common(struct completion *c, long timeout, int flags)
870282513Shselasky{
871282513Shselasky	long end = jiffies + timeout;
872282513Shselasky
873282513Shselasky	if (flags != 0)
874282513Shselasky		flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP;
875282513Shselasky	else
876282513Shselasky		flags = SLEEPQ_SLEEP;
877282513Shselasky	for (;;) {
878282513Shselasky		int ret;
879282513Shselasky
880282513Shselasky		sleepq_lock(c);
881282513Shselasky		if (c->done)
882282513Shselasky			break;
883282513Shselasky		sleepq_add(c, NULL, "completion", flags, 0);
884282513Shselasky		sleepq_set_timeout(c, linux_timer_jiffies_until(end));
885282513Shselasky		if (flags & SLEEPQ_INTERRUPTIBLE)
886282513Shselasky			ret = sleepq_timedwait_sig(c, 0);
887282513Shselasky		else
888282513Shselasky			ret = sleepq_timedwait(c, 0);
889282513Shselasky		if (ret != 0) {
890282513Shselasky			/* check for timeout or signal */
891282513Shselasky			if (ret == EWOULDBLOCK)
892282513Shselasky				return (0);
893282513Shselasky			else
894282513Shselasky				return (-ERESTARTSYS);
895282513Shselasky		}
896282513Shselasky	}
897282513Shselasky	c->done--;
898282513Shselasky	sleepq_release(c);
899282513Shselasky
900282513Shselasky	/* return how many jiffies are left */
901282513Shselasky	return (linux_timer_jiffies_until(end));
902282513Shselasky}
903282513Shselasky
904282513Shselaskyint
905282513Shselaskylinux_try_wait_for_completion(struct completion *c)
906282513Shselasky{
907282513Shselasky	int isdone;
908282513Shselasky
909282513Shselasky	isdone = 1;
910282513Shselasky	sleepq_lock(c);
911282513Shselasky	if (c->done)
912282513Shselasky		c->done--;
913282513Shselasky	else
914282513Shselasky		isdone = 0;
915282513Shselasky	sleepq_release(c);
916282513Shselasky	return (isdone);
917282513Shselasky}
918282513Shselasky
919282513Shselaskyint
920282513Shselaskylinux_completion_done(struct completion *c)
921282513Shselasky{
922282513Shselasky	int isdone;
923282513Shselasky
924282513Shselasky	isdone = 1;
925282513Shselasky	sleepq_lock(c);
926282513Shselasky	if (c->done == 0)
927282513Shselasky		isdone = 0;
928282513Shselasky	sleepq_release(c);
929282513Shselasky	return (isdone);
930282513Shselasky}
931282513Shselasky
932293151Shselaskyvoid
933293151Shselaskylinux_delayed_work_fn(void *arg)
934293151Shselasky{
935293151Shselasky	struct delayed_work *work;
936293151Shselasky
937293151Shselasky	work = arg;
938293151Shselasky	taskqueue_enqueue(work->work.taskqueue, &work->work.work_task);
939293151Shselasky}
940293151Shselasky
941293151Shselaskyvoid
942293151Shselaskylinux_work_fn(void *context, int pending)
943293151Shselasky{
944293151Shselasky	struct work_struct *work;
945293151Shselasky
946293151Shselasky	work = context;
947293151Shselasky	work->fn(work);
948293151Shselasky}
949293151Shselasky
950293151Shselaskyvoid
951293151Shselaskylinux_flush_fn(void *context, int pending)
952293151Shselasky{
953293151Shselasky}
954293151Shselasky
955293151Shselaskystruct workqueue_struct *
956293151Shselaskylinux_create_workqueue_common(const char *name, int cpus)
957293151Shselasky{
958293151Shselasky	struct workqueue_struct *wq;
959293151Shselasky
960293151Shselasky	wq = kmalloc(sizeof(*wq), M_WAITOK);
961293151Shselasky	wq->taskqueue = taskqueue_create(name, M_WAITOK,
962293151Shselasky	    taskqueue_thread_enqueue,  &wq->taskqueue);
963293151Shselasky	atomic_set(&wq->draining, 0);
964293151Shselasky	taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name);
965293151Shselasky
966293151Shselasky	return (wq);
967293151Shselasky}
968293151Shselasky
969293151Shselaskyvoid
970293151Shselaskydestroy_workqueue(struct workqueue_struct *wq)
971293151Shselasky{
972293151Shselasky	taskqueue_free(wq->taskqueue);
973293151Shselasky	kfree(wq);
974293151Shselasky}
975293151Shselasky
976282513Shselaskystatic void
977280540Shselaskylinux_compat_init(void *arg)
978219820Sjeff{
979219820Sjeff	struct sysctl_oid *rootoid;
980219820Sjeff	int i;
981219820Sjeff
982219820Sjeff	rootoid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(),
983219820Sjeff	    OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys");
984219820Sjeff	kobject_init(&class_root, &class_ktype);
985219820Sjeff	kobject_set_name(&class_root, "class");
986219820Sjeff	class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid),
987219820Sjeff	    OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class");
988219820Sjeff	kobject_init(&linux_rootdev.kobj, &dev_ktype);
989219820Sjeff	kobject_set_name(&linux_rootdev.kobj, "device");
990219820Sjeff	linux_rootdev.kobj.oidp = SYSCTL_ADD_NODE(NULL,
991219820Sjeff	    SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL,
992219820Sjeff	    "device");
993219820Sjeff	linux_rootdev.bsddev = root_bus;
994219820Sjeff	miscclass.name = "misc";
995219820Sjeff	class_register(&miscclass);
996219820Sjeff	INIT_LIST_HEAD(&pci_drivers);
997219820Sjeff	INIT_LIST_HEAD(&pci_devices);
998219820Sjeff	spin_lock_init(&pci_lock);
999219820Sjeff	mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF);
1000219820Sjeff	for (i = 0; i < VMMAP_HASH_SIZE; i++)
1001219820Sjeff		LIST_INIT(&vmmaphead[i]);
1002219820Sjeff}
1003219820Sjeff
1004219820SjeffSYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL);
1005271127Shselasky
1006271127Shselaskystatic void
1007280540Shselaskylinux_compat_uninit(void *arg)
1008271127Shselasky{
1009271127Shselasky	kobject_kfree_name(&class_root);
1010271127Shselasky	kobject_kfree_name(&linux_rootdev.kobj);
1011271127Shselasky	kobject_kfree_name(&miscclass.kobj);
1012271127Shselasky}
1013271127ShselaskySYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL);
1014