linux_file.c revision 171216
1/*-
2 * Copyright (c) 1994-1995 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/compat/linux/linux_file.c 171216 2007-07-04 23:06:43Z peter $");
31
32#include "opt_compat.h"
33#include "opt_mac.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/conf.h>
38#include <sys/dirent.h>
39#include <sys/fcntl.h>
40#include <sys/file.h>
41#include <sys/filedesc.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mount.h>
45#include <sys/mutex.h>
46#include <sys/namei.h>
47#include <sys/proc.h>
48#include <sys/stat.h>
49#include <sys/sx.h>
50#include <sys/syscallsubr.h>
51#include <sys/sysproto.h>
52#include <sys/tty.h>
53#include <sys/unistd.h>
54#include <sys/vnode.h>
55
56#include <security/mac/mac_framework.h>
57
58#include <ufs/ufs/extattr.h>
59#include <ufs/ufs/quota.h>
60#include <ufs/ufs/ufsmount.h>
61
62#ifdef COMPAT_LINUX32
63#include <machine/../linux32/linux.h>
64#include <machine/../linux32/linux32_proto.h>
65#else
66#include <machine/../linux/linux.h>
67#include <machine/../linux/linux_proto.h>
68#endif
69#include <compat/linux/linux_util.h>
70
71int
72linux_creat(struct thread *td, struct linux_creat_args *args)
73{
74    char *path;
75    int error;
76
77    LCONVPATHEXIST(td, args->path, &path);
78
79#ifdef DEBUG
80	if (ldebug(creat))
81		printf(ARGS(creat, "%s, %d"), path, args->mode);
82#endif
83    error = kern_open(td, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC,
84	args->mode);
85    LFREEPATH(path);
86    return (error);
87}
88
89
90static int
91linux_common_open(struct thread *td, char *path, int l_flags, int mode, int openat)
92{
93    struct proc *p = td->td_proc;
94    struct file *fp;
95    int fd;
96    int bsd_flags, error;
97
98    bsd_flags = 0;
99    switch (l_flags & LINUX_O_ACCMODE) {
100    case LINUX_O_WRONLY:
101	bsd_flags |= O_WRONLY;
102	break;
103    case LINUX_O_RDWR:
104	bsd_flags |= O_RDWR;
105	break;
106    default:
107	bsd_flags |= O_RDONLY;
108    }
109    if (l_flags & LINUX_O_NDELAY)
110	bsd_flags |= O_NONBLOCK;
111    if (l_flags & LINUX_O_APPEND)
112	bsd_flags |= O_APPEND;
113    if (l_flags & LINUX_O_SYNC)
114	bsd_flags |= O_FSYNC;
115    if (l_flags & LINUX_O_NONBLOCK)
116	bsd_flags |= O_NONBLOCK;
117    if (l_flags & LINUX_FASYNC)
118	bsd_flags |= O_ASYNC;
119    if (l_flags & LINUX_O_CREAT)
120	bsd_flags |= O_CREAT;
121    if (l_flags & LINUX_O_TRUNC)
122	bsd_flags |= O_TRUNC;
123    if (l_flags & LINUX_O_EXCL)
124	bsd_flags |= O_EXCL;
125    if (l_flags & LINUX_O_NOCTTY)
126	bsd_flags |= O_NOCTTY;
127    if (l_flags & LINUX_O_DIRECT)
128	bsd_flags |= O_DIRECT;
129    if (l_flags & LINUX_O_NOFOLLOW)
130	bsd_flags |= O_NOFOLLOW;
131    /* XXX LINUX_O_NOATIME: unable to be easily implemented. */
132
133    error = kern_open(td, path, UIO_SYSSPACE, bsd_flags, mode);
134    if (!error) {
135	    fd = td->td_retval[0];
136	    /*
137	     * XXX In between kern_open() and fget(), another process
138	     * having the same filedesc could use that fd without
139	     * checking below.
140	     */
141	    error = fget(td, fd, &fp);
142	    if (!error) {
143		    sx_slock(&proctree_lock);
144		    PROC_LOCK(p);
145		    if (!(bsd_flags & O_NOCTTY) &&
146			SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
147			    PROC_UNLOCK(p);
148			    sx_unlock(&proctree_lock);
149			    if (fp->f_type == DTYPE_VNODE)
150				    (void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
151					     td->td_ucred, td);
152		    } else {
153			    PROC_UNLOCK(p);
154			    sx_sunlock(&proctree_lock);
155		    }
156		    if (l_flags & LINUX_O_DIRECTORY) {
157			    if (fp->f_type != DTYPE_VNODE ||
158				fp->f_vnode->v_type != VDIR) {
159				    error = ENOTDIR;
160			    }
161		    }
162		    fdrop(fp, td);
163		    /*
164		     * XXX as above, fdrop()/kern_close() pair is racy.
165		     */
166		    if (error)
167			    kern_close(td, fd);
168	    }
169    }
170
171#ifdef DEBUG
172    if (ldebug(open))
173	    printf(LMSG("open returns error %d"), error);
174#endif
175    if (!openat)
176	LFREEPATH(path);
177    return error;
178}
179
180/*
181 * common code for linux *at set of syscalls
182 *
183 * works like this:
184 * if filename is absolute
185 *    ignore dirfd
186 * else
187 *    if dirfd == AT_FDCWD
188 *       return CWD/filename
189 *    else
190 *       return DIRFD/filename
191 */
192static int
193linux_at(struct thread *td, int dirfd, char *filename, char **newpath, char **freebuf)
194{
195   	struct file *fp;
196	int error = 0, vfslocked;
197	struct vnode *dvp;
198	struct filedesc *fdp = td->td_proc->p_fd;
199	char *fullpath = "unknown";
200	char *freepath = NULL;
201
202	/* don't do anything if the pathname is absolute */
203	if (*filename == '/') {
204	   	*newpath= filename;
205	   	return (0);
206	}
207
208	/* check for AT_FDWCD */
209	if (dirfd == LINUX_AT_FDCWD) {
210	   	FILEDESC_SLOCK(fdp);
211		dvp = fdp->fd_cdir;
212		vref(dvp);
213	   	FILEDESC_SUNLOCK(fdp);
214	} else {
215	   	error = fget(td, dirfd, &fp);
216		if (error)
217		   	return (error);
218		dvp = fp->f_vnode;
219		/* only a dir can be dfd */
220		if (dvp->v_type != VDIR) {
221		   	fdrop(fp, td);
222			return (ENOTDIR);
223		}
224		vref(dvp);
225		fdrop(fp, td);
226	}
227
228	/*
229	 * XXXRW: This is bogus, as vn_fullpath() returns only an advisory
230	 * file path, and may fail in several common situations, including
231	 * for file systmes that don't use the name cache, and if the entry
232	 * for the file falls out of the name cache.  We should implement
233	 * openat() in the FreeBSD native system call layer properly (using a
234	 * requested starting directory), and have Linux and other ABIs wrap
235	 * the native implementation.
236	 */
237	error = vn_fullpath(td, dvp, &fullpath, &freepath);
238	if (!error) {
239	   	*newpath = malloc(strlen(fullpath) + strlen(filename) + 2, M_TEMP, M_WAITOK | M_ZERO);
240		*freebuf = freepath;
241		sprintf(*newpath, "%s/%s", fullpath, filename);
242	} else {
243		*newpath = NULL;
244	}
245	vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
246	vrele(dvp);
247	VFS_UNLOCK_GIANT(vfslocked);
248	return (error);
249}
250
251int
252linux_openat(struct thread *td, struct linux_openat_args *args)
253{
254   	char *newpath, *oldpath, *freebuf = NULL, *path;
255	int error;
256
257	oldpath = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
258	error = copyinstr(args->filename, oldpath, MAXPATHLEN, NULL);
259
260#ifdef DEBUG
261	if (ldebug(openat))
262		printf(ARGS(openat, "%i, %s, 0x%x, 0x%x"), args->dfd,
263		    oldpath, args->flags, args->mode);
264#endif
265
266	error = linux_at(td, args->dfd, oldpath, &newpath, &freebuf);
267	if (error)
268	   	return (error);
269#ifdef DEBUG
270	printf(LMSG("newpath: %s"), newpath);
271#endif
272    	if (args->flags & LINUX_O_CREAT)
273		LCONVPATH_SEG(td, newpath, &path, 1, UIO_SYSSPACE);
274    	else
275		LCONVPATH_SEG(td, newpath, &path, 0, UIO_SYSSPACE);
276	if (freebuf)
277	   	free(freebuf, M_TEMP);
278	if (*oldpath != '/')
279   	   	free(newpath, M_TEMP);
280
281	error = linux_common_open(td, path, args->flags, args->mode, 1);
282	free(oldpath, M_TEMP);
283	return (error);
284}
285
286int
287linux_open(struct thread *td, struct linux_open_args *args)
288{
289    char *path;
290
291    if (args->flags & LINUX_O_CREAT)
292	LCONVPATHCREAT(td, args->path, &path);
293    else
294	LCONVPATHEXIST(td, args->path, &path);
295
296#ifdef DEBUG
297	if (ldebug(open))
298		printf(ARGS(open, "%s, 0x%x, 0x%x"),
299		    path, args->flags, args->mode);
300#endif
301
302    return linux_common_open(td, path, args->flags, args->mode, 0);
303}
304
305int
306linux_lseek(struct thread *td, struct linux_lseek_args *args)
307{
308
309    struct lseek_args /* {
310	int fd;
311	int pad;
312	off_t offset;
313	int whence;
314    } */ tmp_args;
315    int error;
316
317#ifdef DEBUG
318	if (ldebug(lseek))
319		printf(ARGS(lseek, "%d, %ld, %d"),
320		    args->fdes, (long)args->off, args->whence);
321#endif
322    tmp_args.fd = args->fdes;
323    tmp_args.offset = (off_t)args->off;
324    tmp_args.whence = args->whence;
325    error = lseek(td, &tmp_args);
326    return error;
327}
328
329int
330linux_llseek(struct thread *td, struct linux_llseek_args *args)
331{
332	struct lseek_args bsd_args;
333	int error;
334	off_t off;
335
336#ifdef DEBUG
337	if (ldebug(llseek))
338		printf(ARGS(llseek, "%d, %d:%d, %d"),
339		    args->fd, args->ohigh, args->olow, args->whence);
340#endif
341	off = (args->olow) | (((off_t) args->ohigh) << 32);
342
343	bsd_args.fd = args->fd;
344	bsd_args.offset = off;
345	bsd_args.whence = args->whence;
346
347	if ((error = lseek(td, &bsd_args)))
348		return error;
349
350	if ((error = copyout(td->td_retval, args->res, sizeof (off_t))))
351		return error;
352
353	td->td_retval[0] = 0;
354	return 0;
355}
356
357int
358linux_readdir(struct thread *td, struct linux_readdir_args *args)
359{
360	struct linux_getdents_args lda;
361
362	lda.fd = args->fd;
363	lda.dent = args->dent;
364	lda.count = 1;
365	return linux_getdents(td, &lda);
366}
367
368/*
369 * Note that linux_getdents(2) and linux_getdents64(2) have the same
370 * arguments. They only differ in the definition of struct dirent they
371 * operate on. We use this to common the code, with the exception of
372 * accessing struct dirent. Note that linux_readdir(2) is implemented
373 * by means of linux_getdents(2). In this case we never operate on
374 * struct dirent64 and thus don't need to handle it...
375 */
376
377struct l_dirent {
378	l_long		d_ino;
379	l_off_t		d_off;
380	l_ushort	d_reclen;
381	char		d_name[LINUX_NAME_MAX + 1];
382};
383
384struct l_dirent64 {
385	uint64_t	d_ino;
386	int64_t		d_off;
387	l_ushort	d_reclen;
388	u_char		d_type;
389	char		d_name[LINUX_NAME_MAX + 1];
390};
391
392#define LINUX_RECLEN(de,namlen) \
393    ALIGN((((char *)&(de)->d_name - (char *)de) + (namlen) + 1))
394
395#define	LINUX_DIRBLKSIZ		512
396
397static int
398getdents_common(struct thread *td, struct linux_getdents64_args *args,
399    int is64bit)
400{
401	struct dirent *bdp;
402	struct vnode *vp;
403	caddr_t inp, buf;		/* BSD-format */
404	int len, reclen;		/* BSD-format */
405	caddr_t outp;			/* Linux-format */
406	int resid, linuxreclen=0;	/* Linux-format */
407	struct file *fp;
408	struct uio auio;
409	struct iovec aiov;
410	off_t off;
411	struct l_dirent linux_dirent;
412	struct l_dirent64 linux_dirent64;
413	int buflen, error, eofflag, nbytes, justone;
414	u_long *cookies = NULL, *cookiep;
415	int ncookies, vfslocked;
416
417	nbytes = args->count;
418	if (nbytes == 1) {
419		/* readdir(2) case. Always struct dirent. */
420		if (is64bit)
421			return (EINVAL);
422		nbytes = sizeof(linux_dirent);
423		justone = 1;
424	} else
425		justone = 0;
426
427	if ((error = getvnode(td->td_proc->p_fd, args->fd, &fp)) != 0)
428		return (error);
429
430	if ((fp->f_flag & FREAD) == 0) {
431		fdrop(fp, td);
432		return (EBADF);
433	}
434
435	vp = fp->f_vnode;
436	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
437	if (vp->v_type != VDIR) {
438		VFS_UNLOCK_GIANT(vfslocked);
439		fdrop(fp, td);
440		return (EINVAL);
441	}
442
443	off = fp->f_offset;
444
445	buflen = max(LINUX_DIRBLKSIZ, nbytes);
446	buflen = min(buflen, MAXBSIZE);
447	buf = malloc(buflen, M_TEMP, M_WAITOK);
448	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
449
450again:
451	aiov.iov_base = buf;
452	aiov.iov_len = buflen;
453	auio.uio_iov = &aiov;
454	auio.uio_iovcnt = 1;
455	auio.uio_rw = UIO_READ;
456	auio.uio_segflg = UIO_SYSSPACE;
457	auio.uio_td = td;
458	auio.uio_resid = buflen;
459	auio.uio_offset = off;
460
461	if (cookies) {
462		free(cookies, M_TEMP);
463		cookies = NULL;
464	}
465
466#ifdef MAC
467	/*
468	 * Do directory search MAC check using non-cached credentials.
469	 */
470	if ((error = mac_check_vnode_readdir(td->td_ucred, vp)))
471		goto out;
472#endif /* MAC */
473	if ((error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies,
474		 &cookies)))
475		goto out;
476
477	inp = buf;
478	outp = (caddr_t)args->dirent;
479	resid = nbytes;
480	if ((len = buflen - auio.uio_resid) <= 0)
481		goto eof;
482
483	cookiep = cookies;
484
485	if (cookies) {
486		/*
487		 * When using cookies, the vfs has the option of reading from
488		 * a different offset than that supplied (UFS truncates the
489		 * offset to a block boundary to make sure that it never reads
490		 * partway through a directory entry, even if the directory
491		 * has been compacted).
492		 */
493		while (len > 0 && ncookies > 0 && *cookiep <= off) {
494			bdp = (struct dirent *) inp;
495			len -= bdp->d_reclen;
496			inp += bdp->d_reclen;
497			cookiep++;
498			ncookies--;
499		}
500	}
501
502	while (len > 0) {
503		if (cookiep && ncookies == 0)
504			break;
505		bdp = (struct dirent *) inp;
506		reclen = bdp->d_reclen;
507		if (reclen & 3) {
508			error = EFAULT;
509			goto out;
510		}
511
512		if (bdp->d_fileno == 0) {
513			inp += reclen;
514			if (cookiep) {
515				off = *cookiep++;
516				ncookies--;
517			} else
518				off += reclen;
519
520			len -= reclen;
521			continue;
522		}
523
524		linuxreclen = (is64bit)
525		    ? LINUX_RECLEN(&linux_dirent64, bdp->d_namlen)
526		    : LINUX_RECLEN(&linux_dirent, bdp->d_namlen);
527
528		if (reclen > len || resid < linuxreclen) {
529			outp++;
530			break;
531		}
532
533		if (justone) {
534			/* readdir(2) case. */
535			linux_dirent.d_ino = (l_long)bdp->d_fileno;
536			linux_dirent.d_off = (l_off_t)linuxreclen;
537			linux_dirent.d_reclen = (l_ushort)bdp->d_namlen;
538			strcpy(linux_dirent.d_name, bdp->d_name);
539			error = copyout(&linux_dirent, outp, linuxreclen);
540		} else {
541			if (is64bit) {
542				linux_dirent64.d_ino = bdp->d_fileno;
543				linux_dirent64.d_off = (cookiep)
544				    ? (l_off_t)*cookiep
545				    : (l_off_t)(off + reclen);
546				linux_dirent64.d_reclen =
547				    (l_ushort)linuxreclen;
548				linux_dirent64.d_type = bdp->d_type;
549				strcpy(linux_dirent64.d_name, bdp->d_name);
550				error = copyout(&linux_dirent64, outp,
551				    linuxreclen);
552			} else {
553				linux_dirent.d_ino = bdp->d_fileno;
554				linux_dirent.d_off = (cookiep)
555				    ? (l_off_t)*cookiep
556				    : (l_off_t)(off + reclen);
557				linux_dirent.d_reclen = (l_ushort)linuxreclen;
558				strcpy(linux_dirent.d_name, bdp->d_name);
559				error = copyout(&linux_dirent, outp,
560				    linuxreclen);
561			}
562		}
563		if (error)
564			goto out;
565
566		inp += reclen;
567		if (cookiep) {
568			off = *cookiep++;
569			ncookies--;
570		} else
571			off += reclen;
572
573		outp += linuxreclen;
574		resid -= linuxreclen;
575		len -= reclen;
576		if (justone)
577			break;
578	}
579
580	if (outp == (caddr_t)args->dirent)
581		goto again;
582
583	fp->f_offset = off;
584	if (justone)
585		nbytes = resid + linuxreclen;
586
587eof:
588	td->td_retval[0] = nbytes - resid;
589
590out:
591	if (cookies)
592		free(cookies, M_TEMP);
593
594	VOP_UNLOCK(vp, 0, td);
595	VFS_UNLOCK_GIANT(vfslocked);
596	fdrop(fp, td);
597	free(buf, M_TEMP);
598	return (error);
599}
600
601int
602linux_getdents(struct thread *td, struct linux_getdents_args *args)
603{
604
605#ifdef DEBUG
606	if (ldebug(getdents))
607		printf(ARGS(getdents, "%d, *, %d"), args->fd, args->count);
608#endif
609
610	return (getdents_common(td, (struct linux_getdents64_args*)args, 0));
611}
612
613int
614linux_getdents64(struct thread *td, struct linux_getdents64_args *args)
615{
616
617#ifdef DEBUG
618	if (ldebug(getdents64))
619		printf(ARGS(getdents64, "%d, *, %d"), args->fd, args->count);
620#endif
621
622	return (getdents_common(td, args, 1));
623}
624
625/*
626 * These exist mainly for hooks for doing /compat/linux translation.
627 */
628
629int
630linux_access(struct thread *td, struct linux_access_args *args)
631{
632	char *path;
633	int error;
634
635	/* linux convention */
636	if (args->flags & ~(F_OK | X_OK | W_OK | R_OK))
637		return (EINVAL);
638
639	LCONVPATHEXIST(td, args->path, &path);
640
641#ifdef DEBUG
642	if (ldebug(access))
643		printf(ARGS(access, "%s, %d"), path, args->flags);
644#endif
645	error = kern_access(td, path, UIO_SYSSPACE, args->flags);
646	LFREEPATH(path);
647
648	return (error);
649}
650
651int
652linux_unlink(struct thread *td, struct linux_unlink_args *args)
653{
654	char *path;
655	int error;
656	struct stat st;
657
658	LCONVPATHEXIST(td, args->path, &path);
659
660#ifdef DEBUG
661	if (ldebug(unlink))
662		printf(ARGS(unlink, "%s"), path);
663#endif
664
665	error = kern_unlink(td, path, UIO_SYSSPACE);
666	if (error == EPERM)
667		/* Introduce POSIX noncompliant behaviour of Linux */
668		if (kern_stat(td, path, UIO_SYSSPACE, &st) == 0)
669			if (S_ISDIR(st.st_mode))
670				error = EISDIR;
671	LFREEPATH(path);
672	return (error);
673}
674
675int
676linux_chdir(struct thread *td, struct linux_chdir_args *args)
677{
678	char *path;
679	int error;
680
681	LCONVPATHEXIST(td, args->path, &path);
682
683#ifdef DEBUG
684	if (ldebug(chdir))
685		printf(ARGS(chdir, "%s"), path);
686#endif
687	error = kern_chdir(td, path, UIO_SYSSPACE);
688	LFREEPATH(path);
689	return (error);
690}
691
692int
693linux_chmod(struct thread *td, struct linux_chmod_args *args)
694{
695	char *path;
696	int error;
697
698	LCONVPATHEXIST(td, args->path, &path);
699
700#ifdef DEBUG
701	if (ldebug(chmod))
702		printf(ARGS(chmod, "%s, %d"), path, args->mode);
703#endif
704	error = kern_chmod(td, path, UIO_SYSSPACE, args->mode);
705	LFREEPATH(path);
706	return (error);
707}
708
709int
710linux_mkdir(struct thread *td, struct linux_mkdir_args *args)
711{
712	char *path;
713	int error;
714
715	LCONVPATHCREAT(td, args->path, &path);
716
717#ifdef DEBUG
718	if (ldebug(mkdir))
719		printf(ARGS(mkdir, "%s, %d"), path, args->mode);
720#endif
721	error = kern_mkdir(td, path, UIO_SYSSPACE, args->mode);
722	LFREEPATH(path);
723	return (error);
724}
725
726int
727linux_rmdir(struct thread *td, struct linux_rmdir_args *args)
728{
729	char *path;
730	int error;
731
732	LCONVPATHEXIST(td, args->path, &path);
733
734#ifdef DEBUG
735	if (ldebug(rmdir))
736		printf(ARGS(rmdir, "%s"), path);
737#endif
738	error = kern_rmdir(td, path, UIO_SYSSPACE);
739	LFREEPATH(path);
740	return (error);
741}
742
743int
744linux_rename(struct thread *td, struct linux_rename_args *args)
745{
746	char *from, *to;
747	int error;
748
749	LCONVPATHEXIST(td, args->from, &from);
750	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
751	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1);
752	if (to == NULL) {
753		LFREEPATH(from);
754		return (error);
755	}
756
757#ifdef DEBUG
758	if (ldebug(rename))
759		printf(ARGS(rename, "%s, %s"), from, to);
760#endif
761	error = kern_rename(td, from, to, UIO_SYSSPACE);
762	LFREEPATH(from);
763	LFREEPATH(to);
764	return (error);
765}
766
767int
768linux_symlink(struct thread *td, struct linux_symlink_args *args)
769{
770	char *path, *to;
771	int error;
772
773	LCONVPATHEXIST(td, args->path, &path);
774	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
775	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1);
776	if (to == NULL) {
777		LFREEPATH(path);
778		return (error);
779	}
780
781#ifdef DEBUG
782	if (ldebug(symlink))
783		printf(ARGS(symlink, "%s, %s"), path, to);
784#endif
785	error = kern_symlink(td, path, to, UIO_SYSSPACE);
786	LFREEPATH(path);
787	LFREEPATH(to);
788	return (error);
789}
790
791int
792linux_readlink(struct thread *td, struct linux_readlink_args *args)
793{
794	char *name;
795	int error;
796
797	LCONVPATHEXIST(td, args->name, &name);
798
799#ifdef DEBUG
800	if (ldebug(readlink))
801		printf(ARGS(readlink, "%s, %p, %d"), name, (void *)args->buf,
802		    args->count);
803#endif
804	error = kern_readlink(td, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE,
805	    args->count);
806	LFREEPATH(name);
807	return (error);
808}
809
810int
811linux_truncate(struct thread *td, struct linux_truncate_args *args)
812{
813	char *path;
814	int error;
815
816	LCONVPATHEXIST(td, args->path, &path);
817
818#ifdef DEBUG
819	if (ldebug(truncate))
820		printf(ARGS(truncate, "%s, %ld"), path, (long)args->length);
821#endif
822
823	error = kern_truncate(td, path, UIO_SYSSPACE, args->length);
824	LFREEPATH(path);
825	return (error);
826}
827
828int
829linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args)
830{
831	struct ftruncate_args /* {
832		int fd;
833		int pad;
834		off_t length;
835		} */ nuap;
836
837	nuap.fd = args->fd;
838	nuap.length = args->length;
839	return (ftruncate(td, &nuap));
840}
841
842int
843linux_link(struct thread *td, struct linux_link_args *args)
844{
845	char *path, *to;
846	int error;
847
848	LCONVPATHEXIST(td, args->path, &path);
849	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
850	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1);
851	if (to == NULL) {
852		LFREEPATH(path);
853		return (error);
854	}
855
856#ifdef DEBUG
857	if (ldebug(link))
858		printf(ARGS(link, "%s, %s"), path, to);
859#endif
860	error = kern_link(td, path, to, UIO_SYSSPACE);
861	LFREEPATH(path);
862	LFREEPATH(to);
863	return (error);
864}
865
866int
867linux_fdatasync(td, uap)
868	struct thread *td;
869	struct linux_fdatasync_args *uap;
870{
871	struct fsync_args bsd;
872
873	bsd.fd = uap->fd;
874	return fsync(td, &bsd);
875}
876
877int
878linux_pread(td, uap)
879	struct thread *td;
880	struct linux_pread_args *uap;
881{
882	struct pread_args bsd;
883	struct vnode *vp;
884	int error;
885
886	bsd.fd = uap->fd;
887	bsd.buf = uap->buf;
888	bsd.nbyte = uap->nbyte;
889	bsd.offset = uap->offset;
890
891	error = pread(td, &bsd);
892
893	if (error == 0) {
894   	   	/* This seems to violate POSIX but linux does it */
895   	   	if ((error = fgetvp(td, uap->fd, &vp)) != 0)
896   		   	return (error);
897		if (vp->v_type == VDIR) {
898   		   	vrele(vp);
899			return (EISDIR);
900		}
901		vrele(vp);
902	}
903
904	return (error);
905}
906
907int
908linux_pwrite(td, uap)
909	struct thread *td;
910	struct linux_pwrite_args *uap;
911{
912	struct pwrite_args bsd;
913
914	bsd.fd = uap->fd;
915	bsd.buf = uap->buf;
916	bsd.nbyte = uap->nbyte;
917	bsd.offset = uap->offset;
918	return pwrite(td, &bsd);
919}
920
921int
922linux_mount(struct thread *td, struct linux_mount_args *args)
923{
924	struct ufs_args ufs;
925	char fstypename[MFSNAMELEN];
926	char mntonname[MNAMELEN], mntfromname[MNAMELEN];
927	int error;
928	int fsflags;
929	void *fsdata;
930
931	error = copyinstr(args->filesystemtype, fstypename, MFSNAMELEN - 1,
932	    NULL);
933	if (error)
934		return (error);
935	error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL);
936	if (error)
937		return (error);
938	error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL);
939	if (error)
940		return (error);
941
942#ifdef DEBUG
943	if (ldebug(mount))
944		printf(ARGS(mount, "%s, %s, %s"),
945		    fstypename, mntfromname, mntonname);
946#endif
947
948	if (strcmp(fstypename, "ext2") == 0) {
949		strcpy(fstypename, "ext2fs");
950		fsdata = &ufs;
951		ufs.fspec = mntfromname;
952#define DEFAULT_ROOTID		-2
953		ufs.export.ex_root = DEFAULT_ROOTID;
954		ufs.export.ex_flags =
955		    args->rwflag & LINUX_MS_RDONLY ? MNT_EXRDONLY : 0;
956	} else if (strcmp(fstypename, "proc") == 0) {
957		strcpy(fstypename, "linprocfs");
958		fsdata = NULL;
959	} else {
960		return (ENODEV);
961	}
962
963	fsflags = 0;
964
965	if ((args->rwflag & 0xffff0000) == 0xc0ed0000) {
966		/*
967		 * Linux SYNC flag is not included; the closest equivalent
968		 * FreeBSD has is !ASYNC, which is our default.
969		 */
970		if (args->rwflag & LINUX_MS_RDONLY)
971			fsflags |= MNT_RDONLY;
972		if (args->rwflag & LINUX_MS_NOSUID)
973			fsflags |= MNT_NOSUID;
974		if (args->rwflag & LINUX_MS_NOEXEC)
975			fsflags |= MNT_NOEXEC;
976		if (args->rwflag & LINUX_MS_REMOUNT)
977			fsflags |= MNT_UPDATE;
978	}
979
980	if (strcmp(fstypename, "linprocfs") == 0) {
981		error = kernel_vmount(fsflags,
982			"fstype", fstypename,
983			"fspath", mntonname,
984			NULL);
985	} else
986		error = EOPNOTSUPP;
987	return (error);
988}
989
990int
991linux_oldumount(struct thread *td, struct linux_oldumount_args *args)
992{
993	struct linux_umount_args args2;
994
995	args2.path = args->path;
996	args2.flags = 0;
997	return (linux_umount(td, &args2));
998}
999
1000int
1001linux_umount(struct thread *td, struct linux_umount_args *args)
1002{
1003	struct unmount_args bsd;
1004
1005	bsd.path = args->path;
1006	bsd.flags = args->flags;	/* XXX correct? */
1007	return (unmount(td, &bsd));
1008}
1009
1010/*
1011 * fcntl family of syscalls
1012 */
1013
1014struct l_flock {
1015	l_short		l_type;
1016	l_short		l_whence;
1017	l_off_t		l_start;
1018	l_off_t		l_len;
1019	l_pid_t		l_pid;
1020}
1021#if defined(__amd64__) && defined(COMPAT_LINUX32)
1022__packed
1023#endif
1024;
1025
1026static void
1027linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock)
1028{
1029	switch (linux_flock->l_type) {
1030	case LINUX_F_RDLCK:
1031		bsd_flock->l_type = F_RDLCK;
1032		break;
1033	case LINUX_F_WRLCK:
1034		bsd_flock->l_type = F_WRLCK;
1035		break;
1036	case LINUX_F_UNLCK:
1037		bsd_flock->l_type = F_UNLCK;
1038		break;
1039	default:
1040		bsd_flock->l_type = -1;
1041		break;
1042	}
1043	bsd_flock->l_whence = linux_flock->l_whence;
1044	bsd_flock->l_start = (off_t)linux_flock->l_start;
1045	bsd_flock->l_len = (off_t)linux_flock->l_len;
1046	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1047}
1048
1049static void
1050bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock)
1051{
1052	switch (bsd_flock->l_type) {
1053	case F_RDLCK:
1054		linux_flock->l_type = LINUX_F_RDLCK;
1055		break;
1056	case F_WRLCK:
1057		linux_flock->l_type = LINUX_F_WRLCK;
1058		break;
1059	case F_UNLCK:
1060		linux_flock->l_type = LINUX_F_UNLCK;
1061		break;
1062	}
1063	linux_flock->l_whence = bsd_flock->l_whence;
1064	linux_flock->l_start = (l_off_t)bsd_flock->l_start;
1065	linux_flock->l_len = (l_off_t)bsd_flock->l_len;
1066	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1067}
1068
1069#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1070struct l_flock64 {
1071	l_short		l_type;
1072	l_short		l_whence;
1073	l_loff_t	l_start;
1074	l_loff_t	l_len;
1075	l_pid_t		l_pid;
1076}
1077#if defined(__amd64__) && defined(COMPAT_LINUX32)
1078__packed
1079#endif
1080;
1081
1082static void
1083linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock)
1084{
1085	switch (linux_flock->l_type) {
1086	case LINUX_F_RDLCK:
1087		bsd_flock->l_type = F_RDLCK;
1088		break;
1089	case LINUX_F_WRLCK:
1090		bsd_flock->l_type = F_WRLCK;
1091		break;
1092	case LINUX_F_UNLCK:
1093		bsd_flock->l_type = F_UNLCK;
1094		break;
1095	default:
1096		bsd_flock->l_type = -1;
1097		break;
1098	}
1099	bsd_flock->l_whence = linux_flock->l_whence;
1100	bsd_flock->l_start = (off_t)linux_flock->l_start;
1101	bsd_flock->l_len = (off_t)linux_flock->l_len;
1102	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1103}
1104
1105static void
1106bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock)
1107{
1108	switch (bsd_flock->l_type) {
1109	case F_RDLCK:
1110		linux_flock->l_type = LINUX_F_RDLCK;
1111		break;
1112	case F_WRLCK:
1113		linux_flock->l_type = LINUX_F_WRLCK;
1114		break;
1115	case F_UNLCK:
1116		linux_flock->l_type = LINUX_F_UNLCK;
1117		break;
1118	}
1119	linux_flock->l_whence = bsd_flock->l_whence;
1120	linux_flock->l_start = (l_loff_t)bsd_flock->l_start;
1121	linux_flock->l_len = (l_loff_t)bsd_flock->l_len;
1122	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1123}
1124#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1125
1126static int
1127fcntl_common(struct thread *td, struct linux_fcntl64_args *args)
1128{
1129	struct l_flock linux_flock;
1130	struct flock bsd_flock;
1131	struct file *fp;
1132	long arg;
1133	int error, result;
1134
1135	switch (args->cmd) {
1136	case LINUX_F_DUPFD:
1137		return (kern_fcntl(td, args->fd, F_DUPFD, args->arg));
1138
1139	case LINUX_F_GETFD:
1140		return (kern_fcntl(td, args->fd, F_GETFD, 0));
1141
1142	case LINUX_F_SETFD:
1143		return (kern_fcntl(td, args->fd, F_SETFD, args->arg));
1144
1145	case LINUX_F_GETFL:
1146		error = kern_fcntl(td, args->fd, F_GETFL, 0);
1147		result = td->td_retval[0];
1148		td->td_retval[0] = 0;
1149		if (result & O_RDONLY)
1150			td->td_retval[0] |= LINUX_O_RDONLY;
1151		if (result & O_WRONLY)
1152			td->td_retval[0] |= LINUX_O_WRONLY;
1153		if (result & O_RDWR)
1154			td->td_retval[0] |= LINUX_O_RDWR;
1155		if (result & O_NDELAY)
1156			td->td_retval[0] |= LINUX_O_NONBLOCK;
1157		if (result & O_APPEND)
1158			td->td_retval[0] |= LINUX_O_APPEND;
1159		if (result & O_FSYNC)
1160			td->td_retval[0] |= LINUX_O_SYNC;
1161		if (result & O_ASYNC)
1162			td->td_retval[0] |= LINUX_FASYNC;
1163#ifdef LINUX_O_NOFOLLOW
1164		if (result & O_NOFOLLOW)
1165			td->td_retval[0] |= LINUX_O_NOFOLLOW;
1166#endif
1167#ifdef LINUX_O_DIRECT
1168		if (result & O_DIRECT)
1169			td->td_retval[0] |= LINUX_O_DIRECT;
1170#endif
1171		return (error);
1172
1173	case LINUX_F_SETFL:
1174		arg = 0;
1175		if (args->arg & LINUX_O_NDELAY)
1176			arg |= O_NONBLOCK;
1177		if (args->arg & LINUX_O_APPEND)
1178			arg |= O_APPEND;
1179		if (args->arg & LINUX_O_SYNC)
1180			arg |= O_FSYNC;
1181		if (args->arg & LINUX_FASYNC)
1182			arg |= O_ASYNC;
1183#ifdef LINUX_O_NOFOLLOW
1184		if (args->arg & LINUX_O_NOFOLLOW)
1185			arg |= O_NOFOLLOW;
1186#endif
1187#ifdef LINUX_O_DIRECT
1188		if (args->arg & LINUX_O_DIRECT)
1189			arg |= O_DIRECT;
1190#endif
1191		return (kern_fcntl(td, args->fd, F_SETFL, arg));
1192
1193	case LINUX_F_GETLK:
1194		error = copyin((void *)args->arg, &linux_flock,
1195		    sizeof(linux_flock));
1196		if (error)
1197			return (error);
1198		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1199		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1200		if (error)
1201			return (error);
1202		bsd_to_linux_flock(&bsd_flock, &linux_flock);
1203		return (copyout(&linux_flock, (void *)args->arg,
1204		    sizeof(linux_flock)));
1205
1206	case LINUX_F_SETLK:
1207		error = copyin((void *)args->arg, &linux_flock,
1208		    sizeof(linux_flock));
1209		if (error)
1210			return (error);
1211		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1212		return (kern_fcntl(td, args->fd, F_SETLK,
1213		    (intptr_t)&bsd_flock));
1214
1215	case LINUX_F_SETLKW:
1216		error = copyin((void *)args->arg, &linux_flock,
1217		    sizeof(linux_flock));
1218		if (error)
1219			return (error);
1220		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1221		return (kern_fcntl(td, args->fd, F_SETLKW,
1222		     (intptr_t)&bsd_flock));
1223
1224	case LINUX_F_GETOWN:
1225		return (kern_fcntl(td, args->fd, F_GETOWN, 0));
1226
1227	case LINUX_F_SETOWN:
1228		/*
1229		 * XXX some Linux applications depend on F_SETOWN having no
1230		 * significant effect for pipes (SIGIO is not delivered for
1231		 * pipes under Linux-2.2.35 at least).
1232		 */
1233		error = fget(td, args->fd, &fp);
1234		if (error)
1235			return (error);
1236		if (fp->f_type == DTYPE_PIPE) {
1237			fdrop(fp, td);
1238			return (EINVAL);
1239		}
1240		fdrop(fp, td);
1241
1242		return (kern_fcntl(td, args->fd, F_SETOWN, args->arg));
1243	}
1244
1245	return (EINVAL);
1246}
1247
1248int
1249linux_fcntl(struct thread *td, struct linux_fcntl_args *args)
1250{
1251	struct linux_fcntl64_args args64;
1252
1253#ifdef DEBUG
1254	if (ldebug(fcntl))
1255		printf(ARGS(fcntl, "%d, %08x, *"), args->fd, args->cmd);
1256#endif
1257
1258	args64.fd = args->fd;
1259	args64.cmd = args->cmd;
1260	args64.arg = args->arg;
1261	return (fcntl_common(td, &args64));
1262}
1263
1264#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1265int
1266linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args)
1267{
1268	struct l_flock64 linux_flock;
1269	struct flock bsd_flock;
1270	int error;
1271
1272#ifdef DEBUG
1273	if (ldebug(fcntl64))
1274		printf(ARGS(fcntl64, "%d, %08x, *"), args->fd, args->cmd);
1275#endif
1276
1277	switch (args->cmd) {
1278	case LINUX_F_GETLK64:
1279		error = copyin((void *)args->arg, &linux_flock,
1280		    sizeof(linux_flock));
1281		if (error)
1282			return (error);
1283		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1284		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1285		if (error)
1286			return (error);
1287		bsd_to_linux_flock64(&bsd_flock, &linux_flock);
1288		return (copyout(&linux_flock, (void *)args->arg,
1289			    sizeof(linux_flock)));
1290
1291	case LINUX_F_SETLK64:
1292		error = copyin((void *)args->arg, &linux_flock,
1293		    sizeof(linux_flock));
1294		if (error)
1295			return (error);
1296		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1297		return (kern_fcntl(td, args->fd, F_SETLK,
1298		    (intptr_t)&bsd_flock));
1299
1300	case LINUX_F_SETLKW64:
1301		error = copyin((void *)args->arg, &linux_flock,
1302		    sizeof(linux_flock));
1303		if (error)
1304			return (error);
1305		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1306		return (kern_fcntl(td, args->fd, F_SETLKW,
1307		    (intptr_t)&bsd_flock));
1308	}
1309
1310	return (fcntl_common(td, args));
1311}
1312#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1313
1314int
1315linux_chown(struct thread *td, struct linux_chown_args *args)
1316{
1317	char *path;
1318	int error;
1319
1320	LCONVPATHEXIST(td, args->path, &path);
1321
1322#ifdef DEBUG
1323	if (ldebug(chown))
1324		printf(ARGS(chown, "%s, %d, %d"), path, args->uid, args->gid);
1325#endif
1326	error = kern_chown(td, path, UIO_SYSSPACE, args->uid, args->gid);
1327	LFREEPATH(path);
1328	return (error);
1329}
1330
1331int
1332linux_lchown(struct thread *td, struct linux_lchown_args *args)
1333{
1334	char *path;
1335	int error;
1336
1337	LCONVPATHEXIST(td, args->path, &path);
1338
1339#ifdef DEBUG
1340	if (ldebug(lchown))
1341		printf(ARGS(lchown, "%s, %d, %d"), path, args->uid, args->gid);
1342#endif
1343	error = kern_lchown(td, path, UIO_SYSSPACE, args->uid, args->gid);
1344	LFREEPATH(path);
1345	return (error);
1346}
1347