linux_file.c revision 168014
1185493Speter/*-
2185493Speter * Copyright (c) 1994-1995 S�ren Schmidt
3185493Speter * All rights reserved.
4185493Speter *
5185493Speter * Redistribution and use in source and binary forms, with or without
6185493Speter * modification, are permitted provided that the following conditions
7185493Speter * are met:
8185493Speter * 1. Redistributions of source code must retain the above copyright
9185493Speter *    notice, this list of conditions and the following disclaimer
10185493Speter *    in this position and unchanged.
11185493Speter * 2. Redistributions in binary form must reproduce the above copyright
12185493Speter *    notice, this list of conditions and the following disclaimer in the
13185493Speter *    documentation and/or other materials provided with the distribution.
14185493Speter * 3. The name of the author may not be used to endorse or promote products
15185493Speter *    derived from this software without specific prior written permission
16185493Speter *
17185493Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18185493Speter * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19185493Speter * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20185493Speter * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21185493Speter * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22185493Speter * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23185493Speter * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24185493Speter * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25185493Speter * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26185493Speter * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27185493Speter */
28185493Speter
29185493Speter#include <sys/cdefs.h>
30185493Speter__FBSDID("$FreeBSD: head/sys/compat/linux/linux_file.c 168014 2007-03-29 02:11:46Z julian $");
31185493Speter
32185493Speter#include "opt_compat.h"
33185493Speter#include "opt_mac.h"
34185493Speter
35185493Speter#include <sys/param.h>
36185493Speter#include <sys/systm.h>
37185493Speter#include <sys/conf.h>
38185493Speter#include <sys/dirent.h>
39185493Speter#include <sys/fcntl.h>
40185493Speter#include <sys/file.h>
41185493Speter#include <sys/filedesc.h>
42185493Speter#include <sys/lock.h>
43185493Speter#include <sys/malloc.h>
44185493Speter#include <sys/mount.h>
45185553Speter#include <sys/mutex.h>
46185493Speter#include <sys/namei.h>
47185493Speter#include <sys/proc.h>
48185493Speter#include <sys/stat.h>
49185493Speter#include <sys/sx.h>
50185493Speter#include <sys/syscallsubr.h>
51185493Speter#include <sys/sysproto.h>
52185493Speter#include <sys/tty.h>
53185493Speter#include <sys/unistd.h>
54185493Speter#include <sys/vnode.h>
55185493Speter
56185493Speter#include <security/mac/mac_framework.h>
57185493Speter
58185493Speter#include <ufs/ufs/extattr.h>
59185493Speter#include <ufs/ufs/quota.h>
60185553Speter#include <ufs/ufs/ufsmount.h>
61185493Speter
62185493Speter#ifdef COMPAT_LINUX32
63185493Speter#include <machine/../linux32/linux.h>
64185493Speter#include <machine/../linux32/linux32_proto.h>
65185493Speter#else
66185493Speter#include <machine/../linux/linux.h>
67185493Speter#include <machine/../linux/linux_proto.h>
68185493Speter#endif
69185493Speter#include <compat/linux/linux_util.h>
70185493Speter
71185493Speterint
72185493Speterlinux_creat(struct thread *td, struct linux_creat_args *args)
73{
74    char *path;
75    int error;
76
77    LCONVPATHEXIST(td, args->path, &path);
78
79#ifdef DEBUG
80	if (ldebug(creat))
81		printf(ARGS(creat, "%s, %d"), path, args->mode);
82#endif
83    error = kern_open(td, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC,
84	args->mode);
85    LFREEPATH(path);
86    return (error);
87}
88
89
90static int
91linux_common_open(struct thread *td, char *path, int l_flags, int mode, int openat)
92{
93    struct proc *p = td->td_proc;
94    struct file *fp;
95    int fd;
96    int bsd_flags, error;
97
98    bsd_flags = 0;
99    switch (l_flags & LINUX_O_ACCMODE) {
100    case LINUX_O_WRONLY:
101	bsd_flags |= O_WRONLY;
102	break;
103    case LINUX_O_RDWR:
104	bsd_flags |= O_RDWR;
105	break;
106    default:
107	bsd_flags |= O_RDONLY;
108    }
109    if (l_flags & LINUX_O_NDELAY)
110	bsd_flags |= O_NONBLOCK;
111    if (l_flags & LINUX_O_APPEND)
112	bsd_flags |= O_APPEND;
113    if (l_flags & LINUX_O_SYNC)
114	bsd_flags |= O_FSYNC;
115    if (l_flags & LINUX_O_NONBLOCK)
116	bsd_flags |= O_NONBLOCK;
117    if (l_flags & LINUX_FASYNC)
118	bsd_flags |= O_ASYNC;
119    if (l_flags & LINUX_O_CREAT)
120	bsd_flags |= O_CREAT;
121    if (l_flags & LINUX_O_TRUNC)
122	bsd_flags |= O_TRUNC;
123    if (l_flags & LINUX_O_EXCL)
124	bsd_flags |= O_EXCL;
125    if (l_flags & LINUX_O_NOCTTY)
126	bsd_flags |= O_NOCTTY;
127    if (l_flags & LINUX_O_DIRECT)
128	bsd_flags |= O_DIRECT;
129    if (l_flags & LINUX_O_NOFOLLOW)
130	bsd_flags |= O_NOFOLLOW;
131    /* XXX LINUX_O_NOATIME: unable to be easily implemented. */
132
133    error = kern_open(td, path, UIO_SYSSPACE, bsd_flags, mode);
134    if (!error) {
135	    fd = td->td_retval[0];
136	    /*
137	     * XXX In between kern_open() and fget(), another process
138	     * having the same filedesc could use that fd without
139	     * checking below.
140	     */
141	    error = fget(td, fd, &fp);
142	    if (!error) {
143		    sx_slock(&proctree_lock);
144		    PROC_LOCK(p);
145		    if (!(bsd_flags & O_NOCTTY) &&
146			SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
147			    PROC_UNLOCK(p);
148			    sx_unlock(&proctree_lock);
149			    if (fp->f_type == DTYPE_VNODE)
150				    (void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
151					     td->td_ucred, td);
152		    } else {
153			    PROC_UNLOCK(p);
154			    sx_sunlock(&proctree_lock);
155		    }
156		    if (l_flags & LINUX_O_DIRECTORY) {
157			    if (fp->f_type != DTYPE_VNODE ||
158				fp->f_vnode->v_type != VDIR) {
159				    error = ENOTDIR;
160			    }
161		    }
162		    fdrop(fp, td);
163		    /*
164		     * XXX as above, fdrop()/kern_close() pair is racy.
165		     */
166		    if (error)
167			    kern_close(td, fd);
168	    }
169    }
170
171#ifdef DEBUG
172    if (ldebug(open))
173	    printf(LMSG("open returns error %d"), error);
174#endif
175    if (!openat)
176	LFREEPATH(path);
177    return error;
178}
179
180/*
181 * common code for linux *at set of syscalls
182 *
183 * works like this:
184 * if filename is absolute
185 *    ignore dirfd
186 * else
187 *    if dirfd == AT_FDCWD
188 *       return CWD/filename
189 *    else
190 *       return DIRFD/filename
191 */
192static int
193linux_at(struct thread *td, int dirfd, char *filename, char **newpath, char **freebuf)
194{
195   	struct file *fp;
196	int error = 0;
197	struct vnode *dvp;
198	struct filedesc *fdp = td->td_proc->p_fd;
199	char *fullpath = "unknown";
200	char *freepath = NULL;
201
202	/* don't do anything if the pathname is absolute */
203	if (*filename == '/') {
204	   	*newpath= filename;
205	   	return (0);
206	}
207
208	/* check for AT_FDWCD */
209	if (dirfd == LINUX_AT_FDCWD) {
210	   	FILEDESC_LOCK(fdp);
211		dvp = fdp->fd_cdir;
212	   	FILEDESC_UNLOCK(fdp);
213	} else {
214	   	error = fget(td, dirfd, &fp);
215		if (error)
216		   	return (error);
217		dvp = fp->f_vnode;
218		/* only a dir can be dfd */
219		if (dvp->v_type != VDIR) {
220		   	fdrop(fp, td);
221			return (ENOTDIR);
222		}
223		fdrop(fp, td);
224	}
225
226	error = vn_fullpath(td, dvp, &fullpath, &freepath);
227	if (!error) {
228	   	*newpath = malloc(strlen(fullpath) + strlen(filename) + 2, M_TEMP, M_WAITOK | M_ZERO);
229		*freebuf = freepath;
230		sprintf(*newpath, "%s/%s", fullpath, filename);
231	}
232
233	return (error);
234}
235
236int
237linux_openat(struct thread *td, struct linux_openat_args *args)
238{
239   	char *newpath, *oldpath, *freebuf = NULL, *path;
240	int error;
241
242	oldpath = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
243	error = copyinstr(args->filename, oldpath, MAXPATHLEN, NULL);
244
245#ifdef DEBUG
246	if (ldebug(openat))
247		printf(ARGS(openat, "%i, %s, 0x%x, 0x%x"), args->dfd,
248		    oldpath, args->flags, args->mode);
249#endif
250
251	error = linux_at(td, args->dfd, oldpath, &newpath, &freebuf);
252	if (error)
253	   	return (error);
254#ifdef DEBUG
255	printf(LMSG("newpath: %s"), newpath);
256#endif
257    	if (args->flags & LINUX_O_CREAT)
258		LCONVPATH_SEG(td, newpath, &path, 1, UIO_SYSSPACE);
259    	else
260		LCONVPATH_SEG(td, newpath, &path, 0, UIO_SYSSPACE);
261	if (freebuf)
262	   	free(freebuf, M_TEMP);
263	if (*oldpath != '/')
264   	   	free(newpath, M_TEMP);
265
266	error = linux_common_open(td, path, args->flags, args->mode, 1);
267	free(oldpath, M_TEMP);
268	return (error);
269}
270
271int
272linux_open(struct thread *td, struct linux_open_args *args)
273{
274    char *path;
275
276    if (args->flags & LINUX_O_CREAT)
277	LCONVPATHCREAT(td, args->path, &path);
278    else
279	LCONVPATHEXIST(td, args->path, &path);
280
281#ifdef DEBUG
282	if (ldebug(open))
283		printf(ARGS(open, "%s, 0x%x, 0x%x"),
284		    path, args->flags, args->mode);
285#endif
286
287    return linux_common_open(td, path, args->flags, args->mode, 0);
288}
289
290int
291linux_lseek(struct thread *td, struct linux_lseek_args *args)
292{
293
294    struct lseek_args /* {
295	int fd;
296	int pad;
297	off_t offset;
298	int whence;
299    } */ tmp_args;
300    int error;
301
302#ifdef DEBUG
303	if (ldebug(lseek))
304		printf(ARGS(lseek, "%d, %ld, %d"),
305		    args->fdes, (long)args->off, args->whence);
306#endif
307    tmp_args.fd = args->fdes;
308    tmp_args.offset = (off_t)args->off;
309    tmp_args.whence = args->whence;
310    error = lseek(td, &tmp_args);
311    return error;
312}
313
314int
315linux_llseek(struct thread *td, struct linux_llseek_args *args)
316{
317	struct lseek_args bsd_args;
318	int error;
319	off_t off;
320
321#ifdef DEBUG
322	if (ldebug(llseek))
323		printf(ARGS(llseek, "%d, %d:%d, %d"),
324		    args->fd, args->ohigh, args->olow, args->whence);
325#endif
326	off = (args->olow) | (((off_t) args->ohigh) << 32);
327
328	bsd_args.fd = args->fd;
329	bsd_args.offset = off;
330	bsd_args.whence = args->whence;
331
332	if ((error = lseek(td, &bsd_args)))
333		return error;
334
335	if ((error = copyout(td->td_retval, args->res, sizeof (off_t))))
336		return error;
337
338	td->td_retval[0] = 0;
339	return 0;
340}
341
342int
343linux_readdir(struct thread *td, struct linux_readdir_args *args)
344{
345	struct linux_getdents_args lda;
346
347	lda.fd = args->fd;
348	lda.dent = args->dent;
349	lda.count = 1;
350	return linux_getdents(td, &lda);
351}
352
353/*
354 * Note that linux_getdents(2) and linux_getdents64(2) have the same
355 * arguments. They only differ in the definition of struct dirent they
356 * operate on. We use this to common the code, with the exception of
357 * accessing struct dirent. Note that linux_readdir(2) is implemented
358 * by means of linux_getdents(2). In this case we never operate on
359 * struct dirent64 and thus don't need to handle it...
360 */
361
362struct l_dirent {
363	l_long		d_ino;
364	l_off_t		d_off;
365	l_ushort	d_reclen;
366	char		d_name[LINUX_NAME_MAX + 1];
367};
368
369struct l_dirent64 {
370	uint64_t	d_ino;
371	int64_t		d_off;
372	l_ushort	d_reclen;
373	u_char		d_type;
374	char		d_name[LINUX_NAME_MAX + 1];
375};
376
377#define LINUX_RECLEN(de,namlen) \
378    ALIGN((((char *)&(de)->d_name - (char *)de) + (namlen) + 1))
379
380#define	LINUX_DIRBLKSIZ		512
381
382static int
383getdents_common(struct thread *td, struct linux_getdents64_args *args,
384    int is64bit)
385{
386	struct dirent *bdp;
387	struct vnode *vp;
388	caddr_t inp, buf;		/* BSD-format */
389	int len, reclen;		/* BSD-format */
390	caddr_t outp;			/* Linux-format */
391	int resid, linuxreclen=0;	/* Linux-format */
392	struct file *fp;
393	struct uio auio;
394	struct iovec aiov;
395	off_t off;
396	struct l_dirent linux_dirent;
397	struct l_dirent64 linux_dirent64;
398	int buflen, error, eofflag, nbytes, justone;
399	u_long *cookies = NULL, *cookiep;
400	int ncookies, vfslocked;
401
402	nbytes = args->count;
403	if (nbytes == 1) {
404		/* readdir(2) case. Always struct dirent. */
405		if (is64bit)
406			return (EINVAL);
407		nbytes = sizeof(linux_dirent);
408		justone = 1;
409	} else
410		justone = 0;
411
412	if ((error = getvnode(td->td_proc->p_fd, args->fd, &fp)) != 0)
413		return (error);
414
415	if ((fp->f_flag & FREAD) == 0) {
416		fdrop(fp, td);
417		return (EBADF);
418	}
419
420	vp = fp->f_vnode;
421	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
422	if (vp->v_type != VDIR) {
423		VFS_UNLOCK_GIANT(vfslocked);
424		fdrop(fp, td);
425		return (EINVAL);
426	}
427
428	off = fp->f_offset;
429
430	buflen = max(LINUX_DIRBLKSIZ, nbytes);
431	buflen = min(buflen, MAXBSIZE);
432	buf = malloc(buflen, M_TEMP, M_WAITOK);
433	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
434
435again:
436	aiov.iov_base = buf;
437	aiov.iov_len = buflen;
438	auio.uio_iov = &aiov;
439	auio.uio_iovcnt = 1;
440	auio.uio_rw = UIO_READ;
441	auio.uio_segflg = UIO_SYSSPACE;
442	auio.uio_td = td;
443	auio.uio_resid = buflen;
444	auio.uio_offset = off;
445
446	if (cookies) {
447		free(cookies, M_TEMP);
448		cookies = NULL;
449	}
450
451#ifdef MAC
452	/*
453	 * Do directory search MAC check using non-cached credentials.
454	 */
455	if ((error = mac_check_vnode_readdir(td->td_ucred, vp)))
456		goto out;
457#endif /* MAC */
458	if ((error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies,
459		 &cookies)))
460		goto out;
461
462	inp = buf;
463	outp = (caddr_t)args->dirent;
464	resid = nbytes;
465	if ((len = buflen - auio.uio_resid) <= 0)
466		goto eof;
467
468	cookiep = cookies;
469
470	if (cookies) {
471		/*
472		 * When using cookies, the vfs has the option of reading from
473		 * a different offset than that supplied (UFS truncates the
474		 * offset to a block boundary to make sure that it never reads
475		 * partway through a directory entry, even if the directory
476		 * has been compacted).
477		 */
478		while (len > 0 && ncookies > 0 && *cookiep <= off) {
479			bdp = (struct dirent *) inp;
480			len -= bdp->d_reclen;
481			inp += bdp->d_reclen;
482			cookiep++;
483			ncookies--;
484		}
485	}
486
487	while (len > 0) {
488		if (cookiep && ncookies == 0)
489			break;
490		bdp = (struct dirent *) inp;
491		reclen = bdp->d_reclen;
492		if (reclen & 3) {
493			error = EFAULT;
494			goto out;
495		}
496
497		if (bdp->d_fileno == 0) {
498			inp += reclen;
499			if (cookiep) {
500				off = *cookiep++;
501				ncookies--;
502			} else
503				off += reclen;
504
505			len -= reclen;
506			continue;
507		}
508
509		linuxreclen = (is64bit)
510		    ? LINUX_RECLEN(&linux_dirent64, bdp->d_namlen)
511		    : LINUX_RECLEN(&linux_dirent, bdp->d_namlen);
512
513		if (reclen > len || resid < linuxreclen) {
514			outp++;
515			break;
516		}
517
518		if (justone) {
519			/* readdir(2) case. */
520			linux_dirent.d_ino = (l_long)bdp->d_fileno;
521			linux_dirent.d_off = (l_off_t)linuxreclen;
522			linux_dirent.d_reclen = (l_ushort)bdp->d_namlen;
523			strcpy(linux_dirent.d_name, bdp->d_name);
524			error = copyout(&linux_dirent, outp, linuxreclen);
525		} else {
526			if (is64bit) {
527				linux_dirent64.d_ino = bdp->d_fileno;
528				linux_dirent64.d_off = (cookiep)
529				    ? (l_off_t)*cookiep
530				    : (l_off_t)(off + reclen);
531				linux_dirent64.d_reclen =
532				    (l_ushort)linuxreclen;
533				linux_dirent64.d_type = bdp->d_type;
534				strcpy(linux_dirent64.d_name, bdp->d_name);
535				error = copyout(&linux_dirent64, outp,
536				    linuxreclen);
537			} else {
538				linux_dirent.d_ino = bdp->d_fileno;
539				linux_dirent.d_off = (cookiep)
540				    ? (l_off_t)*cookiep
541				    : (l_off_t)(off + reclen);
542				linux_dirent.d_reclen = (l_ushort)linuxreclen;
543				strcpy(linux_dirent.d_name, bdp->d_name);
544				error = copyout(&linux_dirent, outp,
545				    linuxreclen);
546			}
547		}
548		if (error)
549			goto out;
550
551		inp += reclen;
552		if (cookiep) {
553			off = *cookiep++;
554			ncookies--;
555		} else
556			off += reclen;
557
558		outp += linuxreclen;
559		resid -= linuxreclen;
560		len -= reclen;
561		if (justone)
562			break;
563	}
564
565	if (outp == (caddr_t)args->dirent)
566		goto again;
567
568	fp->f_offset = off;
569	if (justone)
570		nbytes = resid + linuxreclen;
571
572eof:
573	td->td_retval[0] = nbytes - resid;
574
575out:
576	if (cookies)
577		free(cookies, M_TEMP);
578
579	VOP_UNLOCK(vp, 0, td);
580	VFS_UNLOCK_GIANT(vfslocked);
581	fdrop(fp, td);
582	free(buf, M_TEMP);
583	return (error);
584}
585
586int
587linux_getdents(struct thread *td, struct linux_getdents_args *args)
588{
589
590#ifdef DEBUG
591	if (ldebug(getdents))
592		printf(ARGS(getdents, "%d, *, %d"), args->fd, args->count);
593#endif
594
595	return (getdents_common(td, (struct linux_getdents64_args*)args, 0));
596}
597
598int
599linux_getdents64(struct thread *td, struct linux_getdents64_args *args)
600{
601
602#ifdef DEBUG
603	if (ldebug(getdents64))
604		printf(ARGS(getdents64, "%d, *, %d"), args->fd, args->count);
605#endif
606
607	return (getdents_common(td, args, 1));
608}
609
610/*
611 * These exist mainly for hooks for doing /compat/linux translation.
612 */
613
614int
615linux_access(struct thread *td, struct linux_access_args *args)
616{
617	char *path;
618	int error;
619
620	/* linux convention */
621	if (args->flags & ~(F_OK | X_OK | W_OK | R_OK))
622		return (EINVAL);
623
624	LCONVPATHEXIST(td, args->path, &path);
625
626#ifdef DEBUG
627	if (ldebug(access))
628		printf(ARGS(access, "%s, %d"), path, args->flags);
629#endif
630	error = kern_access(td, path, UIO_SYSSPACE, args->flags);
631	LFREEPATH(path);
632
633	return (error);
634}
635
636int
637linux_unlink(struct thread *td, struct linux_unlink_args *args)
638{
639	char *path;
640	int error;
641	struct stat st;
642
643	LCONVPATHEXIST(td, args->path, &path);
644
645#ifdef DEBUG
646	if (ldebug(unlink))
647		printf(ARGS(unlink, "%s"), path);
648#endif
649
650	error = kern_unlink(td, path, UIO_SYSSPACE);
651	if (error == EPERM)
652		/* Introduce POSIX noncompliant behaviour of Linux */
653		if (kern_stat(td, path, UIO_SYSSPACE, &st) == 0)
654			if (S_ISDIR(st.st_mode))
655				error = EISDIR;
656	LFREEPATH(path);
657	return (error);
658}
659
660int
661linux_chdir(struct thread *td, struct linux_chdir_args *args)
662{
663	char *path;
664	int error;
665
666	LCONVPATHEXIST(td, args->path, &path);
667
668#ifdef DEBUG
669	if (ldebug(chdir))
670		printf(ARGS(chdir, "%s"), path);
671#endif
672	error = kern_chdir(td, path, UIO_SYSSPACE);
673	LFREEPATH(path);
674	return (error);
675}
676
677int
678linux_chmod(struct thread *td, struct linux_chmod_args *args)
679{
680	char *path;
681	int error;
682
683	LCONVPATHEXIST(td, args->path, &path);
684
685#ifdef DEBUG
686	if (ldebug(chmod))
687		printf(ARGS(chmod, "%s, %d"), path, args->mode);
688#endif
689	error = kern_chmod(td, path, UIO_SYSSPACE, args->mode);
690	LFREEPATH(path);
691	return (error);
692}
693
694int
695linux_mkdir(struct thread *td, struct linux_mkdir_args *args)
696{
697	char *path;
698	int error;
699
700	LCONVPATHCREAT(td, args->path, &path);
701
702#ifdef DEBUG
703	if (ldebug(mkdir))
704		printf(ARGS(mkdir, "%s, %d"), path, args->mode);
705#endif
706	error = kern_mkdir(td, path, UIO_SYSSPACE, args->mode);
707	LFREEPATH(path);
708	return (error);
709}
710
711int
712linux_rmdir(struct thread *td, struct linux_rmdir_args *args)
713{
714	char *path;
715	int error;
716
717	LCONVPATHEXIST(td, args->path, &path);
718
719#ifdef DEBUG
720	if (ldebug(rmdir))
721		printf(ARGS(rmdir, "%s"), path);
722#endif
723	error = kern_rmdir(td, path, UIO_SYSSPACE);
724	LFREEPATH(path);
725	return (error);
726}
727
728int
729linux_rename(struct thread *td, struct linux_rename_args *args)
730{
731	char *from, *to;
732	int error;
733
734	LCONVPATHEXIST(td, args->from, &from);
735	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
736	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1);
737	if (to == NULL) {
738		LFREEPATH(from);
739		return (error);
740	}
741
742#ifdef DEBUG
743	if (ldebug(rename))
744		printf(ARGS(rename, "%s, %s"), from, to);
745#endif
746	error = kern_rename(td, from, to, UIO_SYSSPACE);
747	LFREEPATH(from);
748	LFREEPATH(to);
749	return (error);
750}
751
752int
753linux_symlink(struct thread *td, struct linux_symlink_args *args)
754{
755	char *path, *to;
756	int error;
757
758	LCONVPATHEXIST(td, args->path, &path);
759	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
760	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1);
761	if (to == NULL) {
762		LFREEPATH(path);
763		return (error);
764	}
765
766#ifdef DEBUG
767	if (ldebug(symlink))
768		printf(ARGS(symlink, "%s, %s"), path, to);
769#endif
770	error = kern_symlink(td, path, to, UIO_SYSSPACE);
771	LFREEPATH(path);
772	LFREEPATH(to);
773	return (error);
774}
775
776int
777linux_readlink(struct thread *td, struct linux_readlink_args *args)
778{
779	char *name;
780	int error;
781
782	LCONVPATHEXIST(td, args->name, &name);
783
784#ifdef DEBUG
785	if (ldebug(readlink))
786		printf(ARGS(readlink, "%s, %p, %d"), name, (void *)args->buf,
787		    args->count);
788#endif
789	error = kern_readlink(td, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE,
790	    args->count);
791	LFREEPATH(name);
792	return (error);
793}
794
795int
796linux_truncate(struct thread *td, struct linux_truncate_args *args)
797{
798	char *path;
799	int error;
800
801	LCONVPATHEXIST(td, args->path, &path);
802
803#ifdef DEBUG
804	if (ldebug(truncate))
805		printf(ARGS(truncate, "%s, %ld"), path, (long)args->length);
806#endif
807
808	error = kern_truncate(td, path, UIO_SYSSPACE, args->length);
809	LFREEPATH(path);
810	return (error);
811}
812
813int
814linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args)
815{
816	struct ftruncate_args /* {
817		int fd;
818		int pad;
819		off_t length;
820		} */ nuap;
821
822	nuap.fd = args->fd;
823	nuap.pad = 0;
824	nuap.length = args->length;
825	return (ftruncate(td, &nuap));
826}
827
828int
829linux_link(struct thread *td, struct linux_link_args *args)
830{
831	char *path, *to;
832	int error;
833
834	LCONVPATHEXIST(td, args->path, &path);
835	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
836	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1);
837	if (to == NULL) {
838		LFREEPATH(path);
839		return (error);
840	}
841
842#ifdef DEBUG
843	if (ldebug(link))
844		printf(ARGS(link, "%s, %s"), path, to);
845#endif
846	error = kern_link(td, path, to, UIO_SYSSPACE);
847	LFREEPATH(path);
848	LFREEPATH(to);
849	return (error);
850}
851
852int
853linux_fdatasync(td, uap)
854	struct thread *td;
855	struct linux_fdatasync_args *uap;
856{
857	struct fsync_args bsd;
858
859	bsd.fd = uap->fd;
860	return fsync(td, &bsd);
861}
862
863int
864linux_pread(td, uap)
865	struct thread *td;
866	struct linux_pread_args *uap;
867{
868	struct pread_args bsd;
869	struct vnode *vp;
870	int error;
871
872	bsd.fd = uap->fd;
873	bsd.buf = uap->buf;
874	bsd.nbyte = uap->nbyte;
875	bsd.offset = uap->offset;
876
877	error = pread(td, &bsd);
878
879	if (error == 0) {
880   	   	/* This seems to violate POSIX but linux does it */
881   	   	if ((error = fgetvp(td, uap->fd, &vp)) != 0)
882   		   	return (error);
883		if (vp->v_type == VDIR) {
884   		   	vrele(vp);
885			return (EISDIR);
886		}
887		vrele(vp);
888	}
889
890	return (error);
891}
892
893int
894linux_pwrite(td, uap)
895	struct thread *td;
896	struct linux_pwrite_args *uap;
897{
898	struct pwrite_args bsd;
899
900	bsd.fd = uap->fd;
901	bsd.buf = uap->buf;
902	bsd.nbyte = uap->nbyte;
903	bsd.offset = uap->offset;
904	return pwrite(td, &bsd);
905}
906
907int
908linux_mount(struct thread *td, struct linux_mount_args *args)
909{
910	struct ufs_args ufs;
911	char fstypename[MFSNAMELEN];
912	char mntonname[MNAMELEN], mntfromname[MNAMELEN];
913	int error;
914	int fsflags;
915	void *fsdata;
916
917	error = copyinstr(args->filesystemtype, fstypename, MFSNAMELEN - 1,
918	    NULL);
919	if (error)
920		return (error);
921	error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL);
922	if (error)
923		return (error);
924	error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL);
925	if (error)
926		return (error);
927
928#ifdef DEBUG
929	if (ldebug(mount))
930		printf(ARGS(mount, "%s, %s, %s"),
931		    fstypename, mntfromname, mntonname);
932#endif
933
934	if (strcmp(fstypename, "ext2") == 0) {
935		strcpy(fstypename, "ext2fs");
936		fsdata = &ufs;
937		ufs.fspec = mntfromname;
938#define DEFAULT_ROOTID		-2
939		ufs.export.ex_root = DEFAULT_ROOTID;
940		ufs.export.ex_flags =
941		    args->rwflag & LINUX_MS_RDONLY ? MNT_EXRDONLY : 0;
942	} else if (strcmp(fstypename, "proc") == 0) {
943		strcpy(fstypename, "linprocfs");
944		fsdata = NULL;
945	} else {
946		return (ENODEV);
947	}
948
949	fsflags = 0;
950
951	if ((args->rwflag & 0xffff0000) == 0xc0ed0000) {
952		/*
953		 * Linux SYNC flag is not included; the closest equivalent
954		 * FreeBSD has is !ASYNC, which is our default.
955		 */
956		if (args->rwflag & LINUX_MS_RDONLY)
957			fsflags |= MNT_RDONLY;
958		if (args->rwflag & LINUX_MS_NOSUID)
959			fsflags |= MNT_NOSUID;
960		if (args->rwflag & LINUX_MS_NOEXEC)
961			fsflags |= MNT_NOEXEC;
962		if (args->rwflag & LINUX_MS_REMOUNT)
963			fsflags |= MNT_UPDATE;
964	}
965
966	if (strcmp(fstypename, "linprocfs") == 0) {
967		error = kernel_vmount(fsflags,
968			"fstype", fstypename,
969			"fspath", mntonname,
970			NULL);
971	} else
972		error = EOPNOTSUPP;
973	return (error);
974}
975
976int
977linux_oldumount(struct thread *td, struct linux_oldumount_args *args)
978{
979	struct linux_umount_args args2;
980
981	args2.path = args->path;
982	args2.flags = 0;
983	return (linux_umount(td, &args2));
984}
985
986int
987linux_umount(struct thread *td, struct linux_umount_args *args)
988{
989	struct unmount_args bsd;
990
991	bsd.path = args->path;
992	bsd.flags = args->flags;	/* XXX correct? */
993	return (unmount(td, &bsd));
994}
995
996/*
997 * fcntl family of syscalls
998 */
999
1000struct l_flock {
1001	l_short		l_type;
1002	l_short		l_whence;
1003	l_off_t		l_start;
1004	l_off_t		l_len;
1005	l_pid_t		l_pid;
1006}
1007#if defined(__amd64__) && defined(COMPAT_LINUX32)
1008__packed
1009#endif
1010;
1011
1012static void
1013linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock)
1014{
1015	switch (linux_flock->l_type) {
1016	case LINUX_F_RDLCK:
1017		bsd_flock->l_type = F_RDLCK;
1018		break;
1019	case LINUX_F_WRLCK:
1020		bsd_flock->l_type = F_WRLCK;
1021		break;
1022	case LINUX_F_UNLCK:
1023		bsd_flock->l_type = F_UNLCK;
1024		break;
1025	default:
1026		bsd_flock->l_type = -1;
1027		break;
1028	}
1029	bsd_flock->l_whence = linux_flock->l_whence;
1030	bsd_flock->l_start = (off_t)linux_flock->l_start;
1031	bsd_flock->l_len = (off_t)linux_flock->l_len;
1032	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1033}
1034
1035static void
1036bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock)
1037{
1038	switch (bsd_flock->l_type) {
1039	case F_RDLCK:
1040		linux_flock->l_type = LINUX_F_RDLCK;
1041		break;
1042	case F_WRLCK:
1043		linux_flock->l_type = LINUX_F_WRLCK;
1044		break;
1045	case F_UNLCK:
1046		linux_flock->l_type = LINUX_F_UNLCK;
1047		break;
1048	}
1049	linux_flock->l_whence = bsd_flock->l_whence;
1050	linux_flock->l_start = (l_off_t)bsd_flock->l_start;
1051	linux_flock->l_len = (l_off_t)bsd_flock->l_len;
1052	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1053}
1054
1055#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1056struct l_flock64 {
1057	l_short		l_type;
1058	l_short		l_whence;
1059	l_loff_t	l_start;
1060	l_loff_t	l_len;
1061	l_pid_t		l_pid;
1062}
1063#if defined(__amd64__) && defined(COMPAT_LINUX32)
1064__packed
1065#endif
1066;
1067
1068static void
1069linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock)
1070{
1071	switch (linux_flock->l_type) {
1072	case LINUX_F_RDLCK:
1073		bsd_flock->l_type = F_RDLCK;
1074		break;
1075	case LINUX_F_WRLCK:
1076		bsd_flock->l_type = F_WRLCK;
1077		break;
1078	case LINUX_F_UNLCK:
1079		bsd_flock->l_type = F_UNLCK;
1080		break;
1081	default:
1082		bsd_flock->l_type = -1;
1083		break;
1084	}
1085	bsd_flock->l_whence = linux_flock->l_whence;
1086	bsd_flock->l_start = (off_t)linux_flock->l_start;
1087	bsd_flock->l_len = (off_t)linux_flock->l_len;
1088	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1089}
1090
1091static void
1092bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock)
1093{
1094	switch (bsd_flock->l_type) {
1095	case F_RDLCK:
1096		linux_flock->l_type = LINUX_F_RDLCK;
1097		break;
1098	case F_WRLCK:
1099		linux_flock->l_type = LINUX_F_WRLCK;
1100		break;
1101	case F_UNLCK:
1102		linux_flock->l_type = LINUX_F_UNLCK;
1103		break;
1104	}
1105	linux_flock->l_whence = bsd_flock->l_whence;
1106	linux_flock->l_start = (l_loff_t)bsd_flock->l_start;
1107	linux_flock->l_len = (l_loff_t)bsd_flock->l_len;
1108	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1109}
1110#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1111
1112static int
1113fcntl_common(struct thread *td, struct linux_fcntl64_args *args)
1114{
1115	struct l_flock linux_flock;
1116	struct flock bsd_flock;
1117	struct file *fp;
1118	long arg;
1119	int error, result;
1120
1121	switch (args->cmd) {
1122	case LINUX_F_DUPFD:
1123		return (kern_fcntl(td, args->fd, F_DUPFD, args->arg));
1124
1125	case LINUX_F_GETFD:
1126		return (kern_fcntl(td, args->fd, F_GETFD, 0));
1127
1128	case LINUX_F_SETFD:
1129		return (kern_fcntl(td, args->fd, F_SETFD, args->arg));
1130
1131	case LINUX_F_GETFL:
1132		error = kern_fcntl(td, args->fd, F_GETFL, 0);
1133		result = td->td_retval[0];
1134		td->td_retval[0] = 0;
1135		if (result & O_RDONLY)
1136			td->td_retval[0] |= LINUX_O_RDONLY;
1137		if (result & O_WRONLY)
1138			td->td_retval[0] |= LINUX_O_WRONLY;
1139		if (result & O_RDWR)
1140			td->td_retval[0] |= LINUX_O_RDWR;
1141		if (result & O_NDELAY)
1142			td->td_retval[0] |= LINUX_O_NONBLOCK;
1143		if (result & O_APPEND)
1144			td->td_retval[0] |= LINUX_O_APPEND;
1145		if (result & O_FSYNC)
1146			td->td_retval[0] |= LINUX_O_SYNC;
1147		if (result & O_ASYNC)
1148			td->td_retval[0] |= LINUX_FASYNC;
1149#ifdef LINUX_O_NOFOLLOW
1150		if (result & O_NOFOLLOW)
1151			td->td_retval[0] |= LINUX_O_NOFOLLOW;
1152#endif
1153#ifdef LINUX_O_DIRECT
1154		if (result & O_DIRECT)
1155			td->td_retval[0] |= LINUX_O_DIRECT;
1156#endif
1157		return (error);
1158
1159	case LINUX_F_SETFL:
1160		arg = 0;
1161		if (args->arg & LINUX_O_NDELAY)
1162			arg |= O_NONBLOCK;
1163		if (args->arg & LINUX_O_APPEND)
1164			arg |= O_APPEND;
1165		if (args->arg & LINUX_O_SYNC)
1166			arg |= O_FSYNC;
1167		if (args->arg & LINUX_FASYNC)
1168			arg |= O_ASYNC;
1169#ifdef LINUX_O_NOFOLLOW
1170		if (args->arg & LINUX_O_NOFOLLOW)
1171			arg |= O_NOFOLLOW;
1172#endif
1173#ifdef LINUX_O_DIRECT
1174		if (args->arg & LINUX_O_DIRECT)
1175			arg |= O_DIRECT;
1176#endif
1177		return (kern_fcntl(td, args->fd, F_SETFL, arg));
1178
1179	case LINUX_F_GETLK:
1180		error = copyin((void *)args->arg, &linux_flock,
1181		    sizeof(linux_flock));
1182		if (error)
1183			return (error);
1184		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1185		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1186		if (error)
1187			return (error);
1188		bsd_to_linux_flock(&bsd_flock, &linux_flock);
1189		return (copyout(&linux_flock, (void *)args->arg,
1190		    sizeof(linux_flock)));
1191
1192	case LINUX_F_SETLK:
1193		error = copyin((void *)args->arg, &linux_flock,
1194		    sizeof(linux_flock));
1195		if (error)
1196			return (error);
1197		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1198		return (kern_fcntl(td, args->fd, F_SETLK,
1199		    (intptr_t)&bsd_flock));
1200
1201	case LINUX_F_SETLKW:
1202		error = copyin((void *)args->arg, &linux_flock,
1203		    sizeof(linux_flock));
1204		if (error)
1205			return (error);
1206		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1207		return (kern_fcntl(td, args->fd, F_SETLKW,
1208		     (intptr_t)&bsd_flock));
1209
1210	case LINUX_F_GETOWN:
1211		return (kern_fcntl(td, args->fd, F_GETOWN, 0));
1212
1213	case LINUX_F_SETOWN:
1214		/*
1215		 * XXX some Linux applications depend on F_SETOWN having no
1216		 * significant effect for pipes (SIGIO is not delivered for
1217		 * pipes under Linux-2.2.35 at least).
1218		 */
1219		error = fget(td, args->fd, &fp);
1220		if (error)
1221			return (error);
1222		if (fp->f_type == DTYPE_PIPE) {
1223			fdrop(fp, td);
1224			return (EINVAL);
1225		}
1226		fdrop(fp, td);
1227
1228		return (kern_fcntl(td, args->fd, F_SETOWN, args->arg));
1229	}
1230
1231	return (EINVAL);
1232}
1233
1234int
1235linux_fcntl(struct thread *td, struct linux_fcntl_args *args)
1236{
1237	struct linux_fcntl64_args args64;
1238
1239#ifdef DEBUG
1240	if (ldebug(fcntl))
1241		printf(ARGS(fcntl, "%d, %08x, *"), args->fd, args->cmd);
1242#endif
1243
1244	args64.fd = args->fd;
1245	args64.cmd = args->cmd;
1246	args64.arg = args->arg;
1247	return (fcntl_common(td, &args64));
1248}
1249
1250#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1251int
1252linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args)
1253{
1254	struct l_flock64 linux_flock;
1255	struct flock bsd_flock;
1256	int error;
1257
1258#ifdef DEBUG
1259	if (ldebug(fcntl64))
1260		printf(ARGS(fcntl64, "%d, %08x, *"), args->fd, args->cmd);
1261#endif
1262
1263	switch (args->cmd) {
1264	case LINUX_F_GETLK64:
1265		error = copyin((void *)args->arg, &linux_flock,
1266		    sizeof(linux_flock));
1267		if (error)
1268			return (error);
1269		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1270		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1271		if (error)
1272			return (error);
1273		bsd_to_linux_flock64(&bsd_flock, &linux_flock);
1274		return (copyout(&linux_flock, (void *)args->arg,
1275			    sizeof(linux_flock)));
1276
1277	case LINUX_F_SETLK64:
1278		error = copyin((void *)args->arg, &linux_flock,
1279		    sizeof(linux_flock));
1280		if (error)
1281			return (error);
1282		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1283		return (kern_fcntl(td, args->fd, F_SETLK,
1284		    (intptr_t)&bsd_flock));
1285
1286	case LINUX_F_SETLKW64:
1287		error = copyin((void *)args->arg, &linux_flock,
1288		    sizeof(linux_flock));
1289		if (error)
1290			return (error);
1291		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1292		return (kern_fcntl(td, args->fd, F_SETLKW,
1293		    (intptr_t)&bsd_flock));
1294	}
1295
1296	return (fcntl_common(td, args));
1297}
1298#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1299
1300int
1301linux_chown(struct thread *td, struct linux_chown_args *args)
1302{
1303	char *path;
1304	int error;
1305
1306	LCONVPATHEXIST(td, args->path, &path);
1307
1308#ifdef DEBUG
1309	if (ldebug(chown))
1310		printf(ARGS(chown, "%s, %d, %d"), path, args->uid, args->gid);
1311#endif
1312	error = kern_chown(td, path, UIO_SYSSPACE, args->uid, args->gid);
1313	LFREEPATH(path);
1314	return (error);
1315}
1316
1317int
1318linux_lchown(struct thread *td, struct linux_lchown_args *args)
1319{
1320	char *path;
1321	int error;
1322
1323	LCONVPATHEXIST(td, args->path, &path);
1324
1325#ifdef DEBUG
1326	if (ldebug(lchown))
1327		printf(ARGS(lchown, "%s, %d, %d"), path, args->uid, args->gid);
1328#endif
1329	error = kern_lchown(td, path, UIO_SYSSPACE, args->uid, args->gid);
1330	LFREEPATH(path);
1331	return (error);
1332}
1333