linux_file.c revision 177633
1/*-
2 * Copyright (c) 1994-1995 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/compat/linux/linux_file.c 177633 2008-03-26 15:23:12Z dfr $");
31
32#include "opt_compat.h"
33#include "opt_mac.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/conf.h>
38#include <sys/dirent.h>
39#include <sys/fcntl.h>
40#include <sys/file.h>
41#include <sys/filedesc.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mount.h>
45#include <sys/mutex.h>
46#include <sys/namei.h>
47#include <sys/proc.h>
48#include <sys/stat.h>
49#include <sys/sx.h>
50#include <sys/syscallsubr.h>
51#include <sys/sysproto.h>
52#include <sys/tty.h>
53#include <sys/unistd.h>
54#include <sys/vnode.h>
55
56#include <security/mac/mac_framework.h>
57
58#include <ufs/ufs/extattr.h>
59#include <ufs/ufs/quota.h>
60#include <ufs/ufs/ufsmount.h>
61
62#ifdef COMPAT_LINUX32
63#include <machine/../linux32/linux.h>
64#include <machine/../linux32/linux32_proto.h>
65#else
66#include <machine/../linux/linux.h>
67#include <machine/../linux/linux_proto.h>
68#endif
69#include <compat/linux/linux_util.h>
70
71int
72linux_creat(struct thread *td, struct linux_creat_args *args)
73{
74    char *path;
75    int error;
76
77    LCONVPATHEXIST(td, args->path, &path);
78
79#ifdef DEBUG
80	if (ldebug(creat))
81		printf(ARGS(creat, "%s, %d"), path, args->mode);
82#endif
83    error = kern_open(td, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC,
84	args->mode);
85    LFREEPATH(path);
86    return (error);
87}
88
89
90static int
91linux_common_open(struct thread *td, char *path, int l_flags, int mode, int openat)
92{
93    struct proc *p = td->td_proc;
94    struct file *fp;
95    int fd;
96    int bsd_flags, error;
97
98    bsd_flags = 0;
99    switch (l_flags & LINUX_O_ACCMODE) {
100    case LINUX_O_WRONLY:
101	bsd_flags |= O_WRONLY;
102	break;
103    case LINUX_O_RDWR:
104	bsd_flags |= O_RDWR;
105	break;
106    default:
107	bsd_flags |= O_RDONLY;
108    }
109    if (l_flags & LINUX_O_NDELAY)
110	bsd_flags |= O_NONBLOCK;
111    if (l_flags & LINUX_O_APPEND)
112	bsd_flags |= O_APPEND;
113    if (l_flags & LINUX_O_SYNC)
114	bsd_flags |= O_FSYNC;
115    if (l_flags & LINUX_O_NONBLOCK)
116	bsd_flags |= O_NONBLOCK;
117    if (l_flags & LINUX_FASYNC)
118	bsd_flags |= O_ASYNC;
119    if (l_flags & LINUX_O_CREAT)
120	bsd_flags |= O_CREAT;
121    if (l_flags & LINUX_O_TRUNC)
122	bsd_flags |= O_TRUNC;
123    if (l_flags & LINUX_O_EXCL)
124	bsd_flags |= O_EXCL;
125    if (l_flags & LINUX_O_NOCTTY)
126	bsd_flags |= O_NOCTTY;
127    if (l_flags & LINUX_O_DIRECT)
128	bsd_flags |= O_DIRECT;
129    if (l_flags & LINUX_O_NOFOLLOW)
130	bsd_flags |= O_NOFOLLOW;
131    /* XXX LINUX_O_NOATIME: unable to be easily implemented. */
132
133    error = kern_open(td, path, UIO_SYSSPACE, bsd_flags, mode);
134    if (!error) {
135	    fd = td->td_retval[0];
136	    /*
137	     * XXX In between kern_open() and fget(), another process
138	     * having the same filedesc could use that fd without
139	     * checking below.
140	     */
141	    error = fget(td, fd, &fp);
142	    if (!error) {
143		    sx_slock(&proctree_lock);
144		    PROC_LOCK(p);
145		    if (!(bsd_flags & O_NOCTTY) &&
146			SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
147			    PROC_UNLOCK(p);
148			    sx_unlock(&proctree_lock);
149			    if (fp->f_type == DTYPE_VNODE)
150				    (void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
151					     td->td_ucred, td);
152		    } else {
153			    PROC_UNLOCK(p);
154			    sx_sunlock(&proctree_lock);
155		    }
156		    if (l_flags & LINUX_O_DIRECTORY) {
157			    if (fp->f_type != DTYPE_VNODE ||
158				fp->f_vnode->v_type != VDIR) {
159				    error = ENOTDIR;
160			    }
161		    }
162		    fdrop(fp, td);
163		    /*
164		     * XXX as above, fdrop()/kern_close() pair is racy.
165		     */
166		    if (error)
167			    kern_close(td, fd);
168	    }
169    }
170
171#ifdef DEBUG
172    if (ldebug(open))
173	    printf(LMSG("open returns error %d"), error);
174#endif
175    if (!openat)
176	LFREEPATH(path);
177    return error;
178}
179
180/*
181 * common code for linux *at set of syscalls
182 *
183 * works like this:
184 * if filename is absolute
185 *    ignore dirfd
186 * else
187 *    if dirfd == AT_FDCWD
188 *       return CWD/filename
189 *    else
190 *       return DIRFD/filename
191 */
192static int
193linux_at(struct thread *td, int dirfd, char *filename, char **newpath, char **freebuf)
194{
195   	struct file *fp;
196	int error = 0, vfslocked;
197	struct vnode *dvp;
198	struct filedesc *fdp = td->td_proc->p_fd;
199	char *fullpath = "unknown";
200	char *freepath = NULL;
201
202	/* don't do anything if the pathname is absolute */
203	if (*filename == '/') {
204	   	*newpath= filename;
205	   	return (0);
206	}
207
208	/* check for AT_FDWCD */
209	if (dirfd == LINUX_AT_FDCWD) {
210	   	FILEDESC_SLOCK(fdp);
211		dvp = fdp->fd_cdir;
212		vref(dvp);
213	   	FILEDESC_SUNLOCK(fdp);
214	} else {
215	   	error = fget(td, dirfd, &fp);
216		if (error)
217		   	return (error);
218		dvp = fp->f_vnode;
219		/* only a dir can be dfd */
220		if (dvp->v_type != VDIR) {
221		   	fdrop(fp, td);
222			return (ENOTDIR);
223		}
224		vref(dvp);
225		fdrop(fp, td);
226	}
227
228	/*
229	 * XXXRW: This is bogus, as vn_fullpath() returns only an advisory
230	 * file path, and may fail in several common situations, including
231	 * for file systmes that don't use the name cache, and if the entry
232	 * for the file falls out of the name cache.  We should implement
233	 * openat() in the FreeBSD native system call layer properly (using a
234	 * requested starting directory), and have Linux and other ABIs wrap
235	 * the native implementation.
236	 */
237	error = vn_fullpath(td, dvp, &fullpath, &freepath);
238	if (!error) {
239	   	*newpath = malloc(strlen(fullpath) + strlen(filename) + 2, M_TEMP, M_WAITOK | M_ZERO);
240		*freebuf = freepath;
241		sprintf(*newpath, "%s/%s", fullpath, filename);
242	} else {
243		*newpath = NULL;
244	}
245	vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
246	vrele(dvp);
247	VFS_UNLOCK_GIANT(vfslocked);
248	return (error);
249}
250
251int
252linux_openat(struct thread *td, struct linux_openat_args *args)
253{
254	char *newpath, *oldpath, *freebuf, *path;
255	int error;
256
257	oldpath = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
258	error = copyinstr(args->filename, oldpath, MAXPATHLEN, NULL);
259	if (error) {
260		free(oldpath, M_TEMP);
261		return (error);
262	}
263#ifdef DEBUG
264	if (ldebug(openat))
265		printf(ARGS(openat, "%i, %s, 0x%x, 0x%x"), args->dfd,
266		    oldpath, args->flags, args->mode);
267#endif
268	newpath = freebuf = NULL;
269	error = linux_at(td, args->dfd, oldpath, &newpath, &freebuf);
270	if (error == 0) {
271#ifdef DEBUG
272		if (ldebug(openat))
273			printf(LMSG("newpath: %s"), newpath);
274#endif
275		if (args->flags & LINUX_O_CREAT)
276			LCONVPATH_SEG(td, newpath, &path, 1, UIO_SYSSPACE);
277		else
278			LCONVPATH_SEG(td, newpath, &path, 0, UIO_SYSSPACE);
279	}
280	if (freebuf)
281	   	free(freebuf, M_TEMP);
282	if (*oldpath != '/')
283   	   	free(newpath, M_TEMP);
284	if (error == 0) {
285		error = linux_common_open(td, path, args->flags,
286		    args->mode, 1);
287		LFREEPATH(path);
288	}
289	free(oldpath, M_TEMP);
290	return (error);
291}
292
293int
294linux_open(struct thread *td, struct linux_open_args *args)
295{
296    char *path;
297
298    if (args->flags & LINUX_O_CREAT)
299	LCONVPATHCREAT(td, args->path, &path);
300    else
301	LCONVPATHEXIST(td, args->path, &path);
302
303#ifdef DEBUG
304	if (ldebug(open))
305		printf(ARGS(open, "%s, 0x%x, 0x%x"),
306		    path, args->flags, args->mode);
307#endif
308
309    return linux_common_open(td, path, args->flags, args->mode, 0);
310}
311
312int
313linux_lseek(struct thread *td, struct linux_lseek_args *args)
314{
315
316    struct lseek_args /* {
317	int fd;
318	int pad;
319	off_t offset;
320	int whence;
321    } */ tmp_args;
322    int error;
323
324#ifdef DEBUG
325	if (ldebug(lseek))
326		printf(ARGS(lseek, "%d, %ld, %d"),
327		    args->fdes, (long)args->off, args->whence);
328#endif
329    tmp_args.fd = args->fdes;
330    tmp_args.offset = (off_t)args->off;
331    tmp_args.whence = args->whence;
332    error = lseek(td, &tmp_args);
333    return error;
334}
335
336int
337linux_llseek(struct thread *td, struct linux_llseek_args *args)
338{
339	struct lseek_args bsd_args;
340	int error;
341	off_t off;
342
343#ifdef DEBUG
344	if (ldebug(llseek))
345		printf(ARGS(llseek, "%d, %d:%d, %d"),
346		    args->fd, args->ohigh, args->olow, args->whence);
347#endif
348	off = (args->olow) | (((off_t) args->ohigh) << 32);
349
350	bsd_args.fd = args->fd;
351	bsd_args.offset = off;
352	bsd_args.whence = args->whence;
353
354	if ((error = lseek(td, &bsd_args)))
355		return error;
356
357	if ((error = copyout(td->td_retval, args->res, sizeof (off_t))))
358		return error;
359
360	td->td_retval[0] = 0;
361	return 0;
362}
363
364int
365linux_readdir(struct thread *td, struct linux_readdir_args *args)
366{
367	struct linux_getdents_args lda;
368
369	lda.fd = args->fd;
370	lda.dent = args->dent;
371	lda.count = 1;
372	return linux_getdents(td, &lda);
373}
374
375/*
376 * Note that linux_getdents(2) and linux_getdents64(2) have the same
377 * arguments. They only differ in the definition of struct dirent they
378 * operate on. We use this to common the code, with the exception of
379 * accessing struct dirent. Note that linux_readdir(2) is implemented
380 * by means of linux_getdents(2). In this case we never operate on
381 * struct dirent64 and thus don't need to handle it...
382 */
383
384struct l_dirent {
385	l_long		d_ino;
386	l_off_t		d_off;
387	l_ushort	d_reclen;
388	char		d_name[LINUX_NAME_MAX + 1];
389};
390
391struct l_dirent64 {
392	uint64_t	d_ino;
393	int64_t		d_off;
394	l_ushort	d_reclen;
395	u_char		d_type;
396	char		d_name[LINUX_NAME_MAX + 1];
397};
398
399#define LINUX_RECLEN(de,namlen) \
400    ALIGN((((char *)&(de)->d_name - (char *)de) + (namlen) + 1))
401
402#define	LINUX_DIRBLKSIZ		512
403
404static int
405getdents_common(struct thread *td, struct linux_getdents64_args *args,
406    int is64bit)
407{
408	struct dirent *bdp;
409	struct vnode *vp;
410	caddr_t inp, buf;		/* BSD-format */
411	int len, reclen;		/* BSD-format */
412	caddr_t outp;			/* Linux-format */
413	int resid, linuxreclen=0;	/* Linux-format */
414	struct file *fp;
415	struct uio auio;
416	struct iovec aiov;
417	off_t off;
418	struct l_dirent linux_dirent;
419	struct l_dirent64 linux_dirent64;
420	int buflen, error, eofflag, nbytes, justone;
421	u_long *cookies = NULL, *cookiep;
422	int ncookies, vfslocked;
423
424	nbytes = args->count;
425	if (nbytes == 1) {
426		/* readdir(2) case. Always struct dirent. */
427		if (is64bit)
428			return (EINVAL);
429		nbytes = sizeof(linux_dirent);
430		justone = 1;
431	} else
432		justone = 0;
433
434	if ((error = getvnode(td->td_proc->p_fd, args->fd, &fp)) != 0)
435		return (error);
436
437	if ((fp->f_flag & FREAD) == 0) {
438		fdrop(fp, td);
439		return (EBADF);
440	}
441
442	vp = fp->f_vnode;
443	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
444	if (vp->v_type != VDIR) {
445		VFS_UNLOCK_GIANT(vfslocked);
446		fdrop(fp, td);
447		return (EINVAL);
448	}
449
450	off = fp->f_offset;
451
452	buflen = max(LINUX_DIRBLKSIZ, nbytes);
453	buflen = min(buflen, MAXBSIZE);
454	buf = malloc(buflen, M_TEMP, M_WAITOK);
455	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
456
457again:
458	aiov.iov_base = buf;
459	aiov.iov_len = buflen;
460	auio.uio_iov = &aiov;
461	auio.uio_iovcnt = 1;
462	auio.uio_rw = UIO_READ;
463	auio.uio_segflg = UIO_SYSSPACE;
464	auio.uio_td = td;
465	auio.uio_resid = buflen;
466	auio.uio_offset = off;
467
468	if (cookies) {
469		free(cookies, M_TEMP);
470		cookies = NULL;
471	}
472
473#ifdef MAC
474	/*
475	 * Do directory search MAC check using non-cached credentials.
476	 */
477	if ((error = mac_vnode_check_readdir(td->td_ucred, vp)))
478		goto out;
479#endif /* MAC */
480	if ((error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies,
481		 &cookies)))
482		goto out;
483
484	inp = buf;
485	outp = (caddr_t)args->dirent;
486	resid = nbytes;
487	if ((len = buflen - auio.uio_resid) <= 0)
488		goto eof;
489
490	cookiep = cookies;
491
492	if (cookies) {
493		/*
494		 * When using cookies, the vfs has the option of reading from
495		 * a different offset than that supplied (UFS truncates the
496		 * offset to a block boundary to make sure that it never reads
497		 * partway through a directory entry, even if the directory
498		 * has been compacted).
499		 */
500		while (len > 0 && ncookies > 0 && *cookiep <= off) {
501			bdp = (struct dirent *) inp;
502			len -= bdp->d_reclen;
503			inp += bdp->d_reclen;
504			cookiep++;
505			ncookies--;
506		}
507	}
508
509	while (len > 0) {
510		if (cookiep && ncookies == 0)
511			break;
512		bdp = (struct dirent *) inp;
513		reclen = bdp->d_reclen;
514		if (reclen & 3) {
515			error = EFAULT;
516			goto out;
517		}
518
519		if (bdp->d_fileno == 0) {
520			inp += reclen;
521			if (cookiep) {
522				off = *cookiep++;
523				ncookies--;
524			} else
525				off += reclen;
526
527			len -= reclen;
528			continue;
529		}
530
531		linuxreclen = (is64bit)
532		    ? LINUX_RECLEN(&linux_dirent64, bdp->d_namlen)
533		    : LINUX_RECLEN(&linux_dirent, bdp->d_namlen);
534
535		if (reclen > len || resid < linuxreclen) {
536			outp++;
537			break;
538		}
539
540		if (justone) {
541			/* readdir(2) case. */
542			linux_dirent.d_ino = (l_long)bdp->d_fileno;
543			linux_dirent.d_off = (l_off_t)linuxreclen;
544			linux_dirent.d_reclen = (l_ushort)bdp->d_namlen;
545			strcpy(linux_dirent.d_name, bdp->d_name);
546			error = copyout(&linux_dirent, outp, linuxreclen);
547		} else {
548			if (is64bit) {
549				linux_dirent64.d_ino = bdp->d_fileno;
550				linux_dirent64.d_off = (cookiep)
551				    ? (l_off_t)*cookiep
552				    : (l_off_t)(off + reclen);
553				linux_dirent64.d_reclen =
554				    (l_ushort)linuxreclen;
555				linux_dirent64.d_type = bdp->d_type;
556				strcpy(linux_dirent64.d_name, bdp->d_name);
557				error = copyout(&linux_dirent64, outp,
558				    linuxreclen);
559			} else {
560				linux_dirent.d_ino = bdp->d_fileno;
561				linux_dirent.d_off = (cookiep)
562				    ? (l_off_t)*cookiep
563				    : (l_off_t)(off + reclen);
564				linux_dirent.d_reclen = (l_ushort)linuxreclen;
565				strcpy(linux_dirent.d_name, bdp->d_name);
566				error = copyout(&linux_dirent, outp,
567				    linuxreclen);
568			}
569		}
570		if (error)
571			goto out;
572
573		inp += reclen;
574		if (cookiep) {
575			off = *cookiep++;
576			ncookies--;
577		} else
578			off += reclen;
579
580		outp += linuxreclen;
581		resid -= linuxreclen;
582		len -= reclen;
583		if (justone)
584			break;
585	}
586
587	if (outp == (caddr_t)args->dirent)
588		goto again;
589
590	fp->f_offset = off;
591	if (justone)
592		nbytes = resid + linuxreclen;
593
594eof:
595	td->td_retval[0] = nbytes - resid;
596
597out:
598	if (cookies)
599		free(cookies, M_TEMP);
600
601	VOP_UNLOCK(vp, 0);
602	VFS_UNLOCK_GIANT(vfslocked);
603	fdrop(fp, td);
604	free(buf, M_TEMP);
605	return (error);
606}
607
608int
609linux_getdents(struct thread *td, struct linux_getdents_args *args)
610{
611
612#ifdef DEBUG
613	if (ldebug(getdents))
614		printf(ARGS(getdents, "%d, *, %d"), args->fd, args->count);
615#endif
616
617	return (getdents_common(td, (struct linux_getdents64_args*)args, 0));
618}
619
620int
621linux_getdents64(struct thread *td, struct linux_getdents64_args *args)
622{
623
624#ifdef DEBUG
625	if (ldebug(getdents64))
626		printf(ARGS(getdents64, "%d, *, %d"), args->fd, args->count);
627#endif
628
629	return (getdents_common(td, args, 1));
630}
631
632/*
633 * These exist mainly for hooks for doing /compat/linux translation.
634 */
635
636int
637linux_access(struct thread *td, struct linux_access_args *args)
638{
639	char *path;
640	int error;
641
642	/* linux convention */
643	if (args->flags & ~(F_OK | X_OK | W_OK | R_OK))
644		return (EINVAL);
645
646	LCONVPATHEXIST(td, args->path, &path);
647
648#ifdef DEBUG
649	if (ldebug(access))
650		printf(ARGS(access, "%s, %d"), path, args->flags);
651#endif
652	error = kern_access(td, path, UIO_SYSSPACE, args->flags);
653	LFREEPATH(path);
654
655	return (error);
656}
657
658int
659linux_unlink(struct thread *td, struct linux_unlink_args *args)
660{
661	char *path;
662	int error;
663	struct stat st;
664
665	LCONVPATHEXIST(td, args->path, &path);
666
667#ifdef DEBUG
668	if (ldebug(unlink))
669		printf(ARGS(unlink, "%s"), path);
670#endif
671
672	error = kern_unlink(td, path, UIO_SYSSPACE);
673	if (error == EPERM)
674		/* Introduce POSIX noncompliant behaviour of Linux */
675		if (kern_stat(td, path, UIO_SYSSPACE, &st) == 0)
676			if (S_ISDIR(st.st_mode))
677				error = EISDIR;
678	LFREEPATH(path);
679	return (error);
680}
681
682int
683linux_chdir(struct thread *td, struct linux_chdir_args *args)
684{
685	char *path;
686	int error;
687
688	LCONVPATHEXIST(td, args->path, &path);
689
690#ifdef DEBUG
691	if (ldebug(chdir))
692		printf(ARGS(chdir, "%s"), path);
693#endif
694	error = kern_chdir(td, path, UIO_SYSSPACE);
695	LFREEPATH(path);
696	return (error);
697}
698
699int
700linux_chmod(struct thread *td, struct linux_chmod_args *args)
701{
702	char *path;
703	int error;
704
705	LCONVPATHEXIST(td, args->path, &path);
706
707#ifdef DEBUG
708	if (ldebug(chmod))
709		printf(ARGS(chmod, "%s, %d"), path, args->mode);
710#endif
711	error = kern_chmod(td, path, UIO_SYSSPACE, args->mode);
712	LFREEPATH(path);
713	return (error);
714}
715
716int
717linux_mkdir(struct thread *td, struct linux_mkdir_args *args)
718{
719	char *path;
720	int error;
721
722	LCONVPATHCREAT(td, args->path, &path);
723
724#ifdef DEBUG
725	if (ldebug(mkdir))
726		printf(ARGS(mkdir, "%s, %d"), path, args->mode);
727#endif
728	error = kern_mkdir(td, path, UIO_SYSSPACE, args->mode);
729	LFREEPATH(path);
730	return (error);
731}
732
733int
734linux_rmdir(struct thread *td, struct linux_rmdir_args *args)
735{
736	char *path;
737	int error;
738
739	LCONVPATHEXIST(td, args->path, &path);
740
741#ifdef DEBUG
742	if (ldebug(rmdir))
743		printf(ARGS(rmdir, "%s"), path);
744#endif
745	error = kern_rmdir(td, path, UIO_SYSSPACE);
746	LFREEPATH(path);
747	return (error);
748}
749
750int
751linux_rename(struct thread *td, struct linux_rename_args *args)
752{
753	char *from, *to;
754	int error;
755
756	LCONVPATHEXIST(td, args->from, &from);
757	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
758	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1);
759	if (to == NULL) {
760		LFREEPATH(from);
761		return (error);
762	}
763
764#ifdef DEBUG
765	if (ldebug(rename))
766		printf(ARGS(rename, "%s, %s"), from, to);
767#endif
768	error = kern_rename(td, from, to, UIO_SYSSPACE);
769	LFREEPATH(from);
770	LFREEPATH(to);
771	return (error);
772}
773
774int
775linux_symlink(struct thread *td, struct linux_symlink_args *args)
776{
777	char *path, *to;
778	int error;
779
780	LCONVPATHEXIST(td, args->path, &path);
781	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
782	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1);
783	if (to == NULL) {
784		LFREEPATH(path);
785		return (error);
786	}
787
788#ifdef DEBUG
789	if (ldebug(symlink))
790		printf(ARGS(symlink, "%s, %s"), path, to);
791#endif
792	error = kern_symlink(td, path, to, UIO_SYSSPACE);
793	LFREEPATH(path);
794	LFREEPATH(to);
795	return (error);
796}
797
798int
799linux_readlink(struct thread *td, struct linux_readlink_args *args)
800{
801	char *name;
802	int error;
803
804	LCONVPATHEXIST(td, args->name, &name);
805
806#ifdef DEBUG
807	if (ldebug(readlink))
808		printf(ARGS(readlink, "%s, %p, %d"), name, (void *)args->buf,
809		    args->count);
810#endif
811	error = kern_readlink(td, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE,
812	    args->count);
813	LFREEPATH(name);
814	return (error);
815}
816
817int
818linux_truncate(struct thread *td, struct linux_truncate_args *args)
819{
820	char *path;
821	int error;
822
823	LCONVPATHEXIST(td, args->path, &path);
824
825#ifdef DEBUG
826	if (ldebug(truncate))
827		printf(ARGS(truncate, "%s, %ld"), path, (long)args->length);
828#endif
829
830	error = kern_truncate(td, path, UIO_SYSSPACE, args->length);
831	LFREEPATH(path);
832	return (error);
833}
834
835int
836linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args)
837{
838	struct ftruncate_args /* {
839		int fd;
840		int pad;
841		off_t length;
842		} */ nuap;
843
844	nuap.fd = args->fd;
845	nuap.length = args->length;
846	return (ftruncate(td, &nuap));
847}
848
849int
850linux_link(struct thread *td, struct linux_link_args *args)
851{
852	char *path, *to;
853	int error;
854
855	LCONVPATHEXIST(td, args->path, &path);
856	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
857	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1);
858	if (to == NULL) {
859		LFREEPATH(path);
860		return (error);
861	}
862
863#ifdef DEBUG
864	if (ldebug(link))
865		printf(ARGS(link, "%s, %s"), path, to);
866#endif
867	error = kern_link(td, path, to, UIO_SYSSPACE);
868	LFREEPATH(path);
869	LFREEPATH(to);
870	return (error);
871}
872
873int
874linux_fdatasync(td, uap)
875	struct thread *td;
876	struct linux_fdatasync_args *uap;
877{
878	struct fsync_args bsd;
879
880	bsd.fd = uap->fd;
881	return fsync(td, &bsd);
882}
883
884int
885linux_pread(td, uap)
886	struct thread *td;
887	struct linux_pread_args *uap;
888{
889	struct pread_args bsd;
890	struct vnode *vp;
891	int error;
892
893	bsd.fd = uap->fd;
894	bsd.buf = uap->buf;
895	bsd.nbyte = uap->nbyte;
896	bsd.offset = uap->offset;
897
898	error = pread(td, &bsd);
899
900	if (error == 0) {
901   	   	/* This seems to violate POSIX but linux does it */
902   	   	if ((error = fgetvp(td, uap->fd, &vp)) != 0)
903   		   	return (error);
904		if (vp->v_type == VDIR) {
905   		   	vrele(vp);
906			return (EISDIR);
907		}
908		vrele(vp);
909	}
910
911	return (error);
912}
913
914int
915linux_pwrite(td, uap)
916	struct thread *td;
917	struct linux_pwrite_args *uap;
918{
919	struct pwrite_args bsd;
920
921	bsd.fd = uap->fd;
922	bsd.buf = uap->buf;
923	bsd.nbyte = uap->nbyte;
924	bsd.offset = uap->offset;
925	return pwrite(td, &bsd);
926}
927
928int
929linux_mount(struct thread *td, struct linux_mount_args *args)
930{
931	struct ufs_args ufs;
932	char fstypename[MFSNAMELEN];
933	char mntonname[MNAMELEN], mntfromname[MNAMELEN];
934	int error;
935	int fsflags;
936	void *fsdata;
937
938	error = copyinstr(args->filesystemtype, fstypename, MFSNAMELEN - 1,
939	    NULL);
940	if (error)
941		return (error);
942	error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL);
943	if (error)
944		return (error);
945	error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL);
946	if (error)
947		return (error);
948
949#ifdef DEBUG
950	if (ldebug(mount))
951		printf(ARGS(mount, "%s, %s, %s"),
952		    fstypename, mntfromname, mntonname);
953#endif
954
955	if (strcmp(fstypename, "ext2") == 0) {
956		strcpy(fstypename, "ext2fs");
957		fsdata = &ufs;
958		ufs.fspec = mntfromname;
959#define DEFAULT_ROOTID		-2
960		ufs.export.ex_root = DEFAULT_ROOTID;
961		ufs.export.ex_flags =
962		    args->rwflag & LINUX_MS_RDONLY ? MNT_EXRDONLY : 0;
963	} else if (strcmp(fstypename, "proc") == 0) {
964		strcpy(fstypename, "linprocfs");
965		fsdata = NULL;
966	} else {
967		return (ENODEV);
968	}
969
970	fsflags = 0;
971
972	if ((args->rwflag & 0xffff0000) == 0xc0ed0000) {
973		/*
974		 * Linux SYNC flag is not included; the closest equivalent
975		 * FreeBSD has is !ASYNC, which is our default.
976		 */
977		if (args->rwflag & LINUX_MS_RDONLY)
978			fsflags |= MNT_RDONLY;
979		if (args->rwflag & LINUX_MS_NOSUID)
980			fsflags |= MNT_NOSUID;
981		if (args->rwflag & LINUX_MS_NOEXEC)
982			fsflags |= MNT_NOEXEC;
983		if (args->rwflag & LINUX_MS_REMOUNT)
984			fsflags |= MNT_UPDATE;
985	}
986
987	if (strcmp(fstypename, "linprocfs") == 0) {
988		error = kernel_vmount(fsflags,
989			"fstype", fstypename,
990			"fspath", mntonname,
991			NULL);
992	} else
993		error = EOPNOTSUPP;
994	return (error);
995}
996
997int
998linux_oldumount(struct thread *td, struct linux_oldumount_args *args)
999{
1000	struct linux_umount_args args2;
1001
1002	args2.path = args->path;
1003	args2.flags = 0;
1004	return (linux_umount(td, &args2));
1005}
1006
1007int
1008linux_umount(struct thread *td, struct linux_umount_args *args)
1009{
1010	struct unmount_args bsd;
1011
1012	bsd.path = args->path;
1013	bsd.flags = args->flags;	/* XXX correct? */
1014	return (unmount(td, &bsd));
1015}
1016
1017/*
1018 * fcntl family of syscalls
1019 */
1020
1021struct l_flock {
1022	l_short		l_type;
1023	l_short		l_whence;
1024	l_off_t		l_start;
1025	l_off_t		l_len;
1026	l_pid_t		l_pid;
1027}
1028#if defined(__amd64__) && defined(COMPAT_LINUX32)
1029__packed
1030#endif
1031;
1032
1033static void
1034linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock)
1035{
1036	switch (linux_flock->l_type) {
1037	case LINUX_F_RDLCK:
1038		bsd_flock->l_type = F_RDLCK;
1039		break;
1040	case LINUX_F_WRLCK:
1041		bsd_flock->l_type = F_WRLCK;
1042		break;
1043	case LINUX_F_UNLCK:
1044		bsd_flock->l_type = F_UNLCK;
1045		break;
1046	default:
1047		bsd_flock->l_type = -1;
1048		break;
1049	}
1050	bsd_flock->l_whence = linux_flock->l_whence;
1051	bsd_flock->l_start = (off_t)linux_flock->l_start;
1052	bsd_flock->l_len = (off_t)linux_flock->l_len;
1053	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1054	bsd_flock->l_sysid = 0;
1055}
1056
1057static void
1058bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock)
1059{
1060	switch (bsd_flock->l_type) {
1061	case F_RDLCK:
1062		linux_flock->l_type = LINUX_F_RDLCK;
1063		break;
1064	case F_WRLCK:
1065		linux_flock->l_type = LINUX_F_WRLCK;
1066		break;
1067	case F_UNLCK:
1068		linux_flock->l_type = LINUX_F_UNLCK;
1069		break;
1070	}
1071	linux_flock->l_whence = bsd_flock->l_whence;
1072	linux_flock->l_start = (l_off_t)bsd_flock->l_start;
1073	linux_flock->l_len = (l_off_t)bsd_flock->l_len;
1074	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1075}
1076
1077#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1078struct l_flock64 {
1079	l_short		l_type;
1080	l_short		l_whence;
1081	l_loff_t	l_start;
1082	l_loff_t	l_len;
1083	l_pid_t		l_pid;
1084}
1085#if defined(__amd64__) && defined(COMPAT_LINUX32)
1086__packed
1087#endif
1088;
1089
1090static void
1091linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock)
1092{
1093	switch (linux_flock->l_type) {
1094	case LINUX_F_RDLCK:
1095		bsd_flock->l_type = F_RDLCK;
1096		break;
1097	case LINUX_F_WRLCK:
1098		bsd_flock->l_type = F_WRLCK;
1099		break;
1100	case LINUX_F_UNLCK:
1101		bsd_flock->l_type = F_UNLCK;
1102		break;
1103	default:
1104		bsd_flock->l_type = -1;
1105		break;
1106	}
1107	bsd_flock->l_whence = linux_flock->l_whence;
1108	bsd_flock->l_start = (off_t)linux_flock->l_start;
1109	bsd_flock->l_len = (off_t)linux_flock->l_len;
1110	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1111	bsd_flock->l_sysid = 0;
1112}
1113
1114static void
1115bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock)
1116{
1117	switch (bsd_flock->l_type) {
1118	case F_RDLCK:
1119		linux_flock->l_type = LINUX_F_RDLCK;
1120		break;
1121	case F_WRLCK:
1122		linux_flock->l_type = LINUX_F_WRLCK;
1123		break;
1124	case F_UNLCK:
1125		linux_flock->l_type = LINUX_F_UNLCK;
1126		break;
1127	}
1128	linux_flock->l_whence = bsd_flock->l_whence;
1129	linux_flock->l_start = (l_loff_t)bsd_flock->l_start;
1130	linux_flock->l_len = (l_loff_t)bsd_flock->l_len;
1131	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1132}
1133#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1134
1135static int
1136fcntl_common(struct thread *td, struct linux_fcntl64_args *args)
1137{
1138	struct l_flock linux_flock;
1139	struct flock bsd_flock;
1140	struct file *fp;
1141	long arg;
1142	int error, result;
1143
1144	switch (args->cmd) {
1145	case LINUX_F_DUPFD:
1146		return (kern_fcntl(td, args->fd, F_DUPFD, args->arg));
1147
1148	case LINUX_F_GETFD:
1149		return (kern_fcntl(td, args->fd, F_GETFD, 0));
1150
1151	case LINUX_F_SETFD:
1152		return (kern_fcntl(td, args->fd, F_SETFD, args->arg));
1153
1154	case LINUX_F_GETFL:
1155		error = kern_fcntl(td, args->fd, F_GETFL, 0);
1156		result = td->td_retval[0];
1157		td->td_retval[0] = 0;
1158		if (result & O_RDONLY)
1159			td->td_retval[0] |= LINUX_O_RDONLY;
1160		if (result & O_WRONLY)
1161			td->td_retval[0] |= LINUX_O_WRONLY;
1162		if (result & O_RDWR)
1163			td->td_retval[0] |= LINUX_O_RDWR;
1164		if (result & O_NDELAY)
1165			td->td_retval[0] |= LINUX_O_NONBLOCK;
1166		if (result & O_APPEND)
1167			td->td_retval[0] |= LINUX_O_APPEND;
1168		if (result & O_FSYNC)
1169			td->td_retval[0] |= LINUX_O_SYNC;
1170		if (result & O_ASYNC)
1171			td->td_retval[0] |= LINUX_FASYNC;
1172#ifdef LINUX_O_NOFOLLOW
1173		if (result & O_NOFOLLOW)
1174			td->td_retval[0] |= LINUX_O_NOFOLLOW;
1175#endif
1176#ifdef LINUX_O_DIRECT
1177		if (result & O_DIRECT)
1178			td->td_retval[0] |= LINUX_O_DIRECT;
1179#endif
1180		return (error);
1181
1182	case LINUX_F_SETFL:
1183		arg = 0;
1184		if (args->arg & LINUX_O_NDELAY)
1185			arg |= O_NONBLOCK;
1186		if (args->arg & LINUX_O_APPEND)
1187			arg |= O_APPEND;
1188		if (args->arg & LINUX_O_SYNC)
1189			arg |= O_FSYNC;
1190		if (args->arg & LINUX_FASYNC)
1191			arg |= O_ASYNC;
1192#ifdef LINUX_O_NOFOLLOW
1193		if (args->arg & LINUX_O_NOFOLLOW)
1194			arg |= O_NOFOLLOW;
1195#endif
1196#ifdef LINUX_O_DIRECT
1197		if (args->arg & LINUX_O_DIRECT)
1198			arg |= O_DIRECT;
1199#endif
1200		return (kern_fcntl(td, args->fd, F_SETFL, arg));
1201
1202	case LINUX_F_GETLK:
1203		error = copyin((void *)args->arg, &linux_flock,
1204		    sizeof(linux_flock));
1205		if (error)
1206			return (error);
1207		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1208		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1209		if (error)
1210			return (error);
1211		bsd_to_linux_flock(&bsd_flock, &linux_flock);
1212		return (copyout(&linux_flock, (void *)args->arg,
1213		    sizeof(linux_flock)));
1214
1215	case LINUX_F_SETLK:
1216		error = copyin((void *)args->arg, &linux_flock,
1217		    sizeof(linux_flock));
1218		if (error)
1219			return (error);
1220		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1221		return (kern_fcntl(td, args->fd, F_SETLK,
1222		    (intptr_t)&bsd_flock));
1223
1224	case LINUX_F_SETLKW:
1225		error = copyin((void *)args->arg, &linux_flock,
1226		    sizeof(linux_flock));
1227		if (error)
1228			return (error);
1229		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1230		return (kern_fcntl(td, args->fd, F_SETLKW,
1231		     (intptr_t)&bsd_flock));
1232
1233	case LINUX_F_GETOWN:
1234		return (kern_fcntl(td, args->fd, F_GETOWN, 0));
1235
1236	case LINUX_F_SETOWN:
1237		/*
1238		 * XXX some Linux applications depend on F_SETOWN having no
1239		 * significant effect for pipes (SIGIO is not delivered for
1240		 * pipes under Linux-2.2.35 at least).
1241		 */
1242		error = fget(td, args->fd, &fp);
1243		if (error)
1244			return (error);
1245		if (fp->f_type == DTYPE_PIPE) {
1246			fdrop(fp, td);
1247			return (EINVAL);
1248		}
1249		fdrop(fp, td);
1250
1251		return (kern_fcntl(td, args->fd, F_SETOWN, args->arg));
1252	}
1253
1254	return (EINVAL);
1255}
1256
1257int
1258linux_fcntl(struct thread *td, struct linux_fcntl_args *args)
1259{
1260	struct linux_fcntl64_args args64;
1261
1262#ifdef DEBUG
1263	if (ldebug(fcntl))
1264		printf(ARGS(fcntl, "%d, %08x, *"), args->fd, args->cmd);
1265#endif
1266
1267	args64.fd = args->fd;
1268	args64.cmd = args->cmd;
1269	args64.arg = args->arg;
1270	return (fcntl_common(td, &args64));
1271}
1272
1273#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1274int
1275linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args)
1276{
1277	struct l_flock64 linux_flock;
1278	struct flock bsd_flock;
1279	int error;
1280
1281#ifdef DEBUG
1282	if (ldebug(fcntl64))
1283		printf(ARGS(fcntl64, "%d, %08x, *"), args->fd, args->cmd);
1284#endif
1285
1286	switch (args->cmd) {
1287	case LINUX_F_GETLK64:
1288		error = copyin((void *)args->arg, &linux_flock,
1289		    sizeof(linux_flock));
1290		if (error)
1291			return (error);
1292		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1293		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1294		if (error)
1295			return (error);
1296		bsd_to_linux_flock64(&bsd_flock, &linux_flock);
1297		return (copyout(&linux_flock, (void *)args->arg,
1298			    sizeof(linux_flock)));
1299
1300	case LINUX_F_SETLK64:
1301		error = copyin((void *)args->arg, &linux_flock,
1302		    sizeof(linux_flock));
1303		if (error)
1304			return (error);
1305		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1306		return (kern_fcntl(td, args->fd, F_SETLK,
1307		    (intptr_t)&bsd_flock));
1308
1309	case LINUX_F_SETLKW64:
1310		error = copyin((void *)args->arg, &linux_flock,
1311		    sizeof(linux_flock));
1312		if (error)
1313			return (error);
1314		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1315		return (kern_fcntl(td, args->fd, F_SETLKW,
1316		    (intptr_t)&bsd_flock));
1317	}
1318
1319	return (fcntl_common(td, args));
1320}
1321#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1322
1323int
1324linux_chown(struct thread *td, struct linux_chown_args *args)
1325{
1326	char *path;
1327	int error;
1328
1329	LCONVPATHEXIST(td, args->path, &path);
1330
1331#ifdef DEBUG
1332	if (ldebug(chown))
1333		printf(ARGS(chown, "%s, %d, %d"), path, args->uid, args->gid);
1334#endif
1335	error = kern_chown(td, path, UIO_SYSSPACE, args->uid, args->gid);
1336	LFREEPATH(path);
1337	return (error);
1338}
1339
1340int
1341linux_lchown(struct thread *td, struct linux_lchown_args *args)
1342{
1343	char *path;
1344	int error;
1345
1346	LCONVPATHEXIST(td, args->path, &path);
1347
1348#ifdef DEBUG
1349	if (ldebug(lchown))
1350		printf(ARGS(lchown, "%s, %d, %d"), path, args->uid, args->gid);
1351#endif
1352	error = kern_lchown(td, path, UIO_SYSSPACE, args->uid, args->gid);
1353	LFREEPATH(path);
1354	return (error);
1355}
1356