linux_pipe.c revision 1.1
1/*	$NetBSD: linux_pipe.c,v 1.1 1995/02/28 23:25:07 fvdl Exp $	*/
2
3/*
4 * Copyright (c) 1995 Frank van der Linden
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *      This product includes software developed for the NetBSD Project
18 *      by Frank van der Linden
19 * 4. The name of the author may not be used to endorse or promote products
20 *    derived from this software without specific prior written permission
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34/*
35 * Linux compatibility module. Try to deal with various Linux system calls.
36 */
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/namei.h>
41#include <sys/proc.h>
42#include <sys/dir.h>
43#include <sys/file.h>
44#include <sys/stat.h>
45#include <sys/filedesc.h>
46#include <sys/ioctl.h>
47#include <sys/kernel.h>
48#include <sys/malloc.h>
49#include <sys/mbuf.h>
50#include <sys/mman.h>
51#include <sys/mount.h>
52#include <sys/ptrace.h>
53#include <sys/resource.h>
54#include <sys/resourcevar.h>
55#include <sys/signal.h>
56#include <sys/signalvar.h>
57#include <sys/socket.h>
58#include <sys/time.h>
59#include <sys/times.h>
60#include <sys/vnode.h>
61#include <sys/uio.h>
62#include <sys/wait.h>
63#include <sys/utsname.h>
64#include <sys/unistd.h>
65
66#include <sys/syscallargs.h>
67
68#include <vm/vm.h>
69#include <vm/vm_param.h>
70
71#include <compat/linux/linux_types.h>
72#include <compat/linux/linux_fcntl.h>
73#include <compat/linux/linux_mmap.h>
74#include <compat/linux/linux_syscallargs.h>
75#include <compat/linux/linux_util.h>
76#include <compat/linux/linux_dirent.h>
77
78/*
79 * The information on a terminated (or stopped) process needs
80 * to be converted in order for Linux binaries to get a valid signal
81 * number out of it.
82 */
83static int
84bsd_to_linux_wstat(status)
85	int *status;
86{
87	if (WIFSIGNALED(*status))
88		*status = (*status & ~0177) |
89		    bsd_to_linux_sig(WTERMSIG(*status));
90	else if (WIFSTOPPED(*status))
91		*status = (*status & ~0xff00) |
92		    (bsd_to_linux_sig(WSTOPSIG(*status)) << 8);
93}
94
95/*
96 * waitpid(2). Passed on to the NetBSD call, surrounded by code to
97 * reserve some space for a NetBSD-style wait status, and converting
98 * it to what Linux wants.
99 */
100int
101linux_waitpid(p, uap, retval)
102	struct proc *p;
103	struct linux_waitpid_args /* {
104		syscallarg(int) pid;
105		syscallarg(int *) status;
106		syscallarg(int) options;
107	} */ *uap;
108	register_t *retval;
109{
110	struct wait4_args w4a;
111	int error, *status, tstat;
112	caddr_t sg;
113
114	sg = stackgap_init();
115	status = (int *) stackgap_alloc(&sg, sizeof status);
116
117	SCARG(&w4a, pid) = SCARG(uap, pid);
118	SCARG(&w4a, status) = status;
119	SCARG(&w4a, options) = SCARG(uap, options);
120	SCARG(&w4a, rusage) = NULL;
121
122	if ((error = wait4(p, &w4a, retval)))
123		return error;
124
125	if ((error = copyin(status, &tstat, sizeof tstat)))
126		return error;
127
128	bsd_to_linux_wstat(&tstat);
129
130	return copyout(&tstat, SCARG(uap, status), sizeof tstat);
131}
132
133/*
134 * This is very much the same as waitpid()
135 */
136int
137linux_wait4(p, uap, retval)
138	struct proc *p;
139	struct linux_wait4_args /* {
140		syscallarg(int) pid;
141		syscallarg(int *) status;
142		syscallarg(int) options;
143		syscallarg(struct rusage *) rusage;
144	} */ *uap;
145	register_t *retval;
146{
147	struct wait4_args w4a;
148	int error, *status, tstat;
149	caddr_t sg;
150
151	sg = stackgap_init();
152	status = (int *) stackgap_alloc(&sg, sizeof status);
153
154	SCARG(&w4a, pid) = SCARG(uap, pid);
155	SCARG(&w4a, status) = status;
156	SCARG(&w4a, options) = SCARG(uap, options);
157	SCARG(&w4a, rusage) = SCARG(uap, rusage);
158
159	if ((error = wait4(p, &w4a, retval)))
160		return error;
161
162	if ((error = copyin(status, &tstat, sizeof tstat)))
163		return error;
164
165	bsd_to_linux_wstat(&tstat);
166
167	return copyout(&tstat, SCARG(uap, status), sizeof tstat);
168}
169
170/*
171 * This is the old brk(2) call. I don't think anything in the Linux
172 * world uses this anymore
173 */
174int
175linux_break(p, uap, retval)
176	struct proc *p;
177	struct linux_brk_args /* {
178		syscallarg(char *) nsize;
179	} */ *uap;
180	register_t *retval;
181{
182	return ENOSYS;
183}
184
185/*
186 * Linux brk(2). The check if the new address is >= the old one is
187 * done in the kernel in Linux. NetBSD does it in the library.
188 */
189int
190linux_brk(p, uap, retval)
191	struct proc *p;
192	struct linux_brk_args /* {
193		syscallarg(char *) nsize;
194	} */ *uap;
195	register_t *retval;
196{
197	char *nbrk = SCARG(uap, nsize);
198	struct obreak_args oba;
199	struct vmspace *vm = p->p_vmspace;
200	int error = 0;
201	caddr_t oldbrk, newbrk;
202
203	oldbrk = vm->vm_daddr + ctob(vm->vm_dsize);
204	/*
205	 * XXX inconsistent.. Linux always returns at least the old
206	 * brk value, but it will be page-aligned if this fails,
207	 * and possibly not page aligned if it succeeds (the user
208	 * supplied pointer is returned).
209	 */
210	SCARG(&oba, nsize) = nbrk;
211
212	if ((caddr_t) nbrk > vm->vm_daddr && obreak(p, &oba, retval) == 0)
213		retval[0] = (register_t) nbrk;
214	else
215		retval[0] = (register_t) oldbrk;
216
217	return 0;
218}
219
220/*
221 * I wonder why Linux has gettimeofday() _and_ time().. Still, we
222 * need to deal with it.
223 */
224int
225linux_time(p, uap, retval)
226	struct proc *p;
227	struct linux_time_args /* {
228		linux_time_t *t;
229	} */ *uap;
230	register_t *retval;
231{
232	struct timeval atv;
233	linux_time_t tt;
234	int error;
235
236	microtime(&atv);
237
238	tt = atv.tv_sec;
239	if (SCARG(uap, t) && (error = copyout(&tt, SCARG(uap, t), sizeof tt)))
240		return error;
241
242	retval[0] = tt;
243	return 0;
244}
245
246/*
247 * The statfs and fstatfs called are not implemented yet. They're
248 * easy, but just not important for the binaries I wanted to get
249 * running.
250 */
251int
252linux_statfs(p, uap, retval)
253	struct proc *p;
254	struct linux_statfs_args /* {
255		syscallarg(char *) path;
256		syscallarg(struct linux_statfs *) sp;
257	} */ *uap;
258	register_t *retval;
259{
260	return ENOSYS;
261}
262
263int
264linux_fstatfs(p, uap, retval)
265	struct proc *p;
266	struct linux_fstatfs_args /* {
267		syscallarg(char *) path;
268		syscallarg(struct linux_statfs *) sp;
269	} */ *uap;
270	register_t *retval;
271{
272	return ENOSYS;
273}
274
275/*
276 * uname(). Just copy the info from the various strings stored in the
277 * kernel, and put it in the Linux utsname structure. That structure
278 * is almost the same as the NetBSD one, only it has fields 65 characters
279 * long, and an extra domainname field.
280 */
281int
282linux_uname(p, uap, retval)
283	struct proc *p;
284	struct linux_uname_args /* {
285		syscallarg(struct linux_utsname *) up;
286	} */ *uap;
287	register_t *retval;
288{
289	extern char ostype[], osrelease[], version[], hostname[], domainname[];
290	extern char machine[];
291	struct linux_utsname tluts;
292	int len;
293	char *cp;
294
295	strncpy(tluts.l_sysname, ostype, sizeof (tluts.l_sysname));
296	strncpy(tluts.l_nodename, hostname, sizeof (tluts.l_nodename));
297	strncpy(tluts.l_release, osrelease, sizeof (tluts.l_release));
298	strncpy(tluts.l_machine, machine, sizeof (tluts.l_machine));
299	strncpy(tluts.l_domainname, domainname, sizeof (tluts.l_domainname));
300	strncpy(tluts.l_version, version, sizeof (tluts.l_version));
301
302	/* This part taken from the the uname() in libc */
303	len = sizeof (tluts.l_version);
304	for (cp = tluts.l_version; len--; ++cp)
305		if (*cp == '\n' || *cp == '\t')
306			if (len > 1)
307				*cp = ' ';
308			else
309				*cp = '\0';
310
311	return copyout(&tluts, SCARG(uap, up), sizeof tluts);
312}
313
314/*
315 * Linux wants to pass everything to a syscall in registers. However,
316 * mmap() has 6 of them. Oops: out of register error. They just pass
317 * everything in a structure.
318 */
319int
320linux_mmap(p, uap, retval)
321	struct proc *p;
322	struct linux_mmap_args /* {
323		syscallarg(struct linux_mmap *) lmp;
324	} */ *uap;
325	register_t *retval;
326{
327	struct linux_mmap lmap;
328	struct mmap_args cma;
329	int error, flags;
330
331	if ((error = copyin(SCARG(uap, lmp), &lmap, sizeof lmap)))
332		return error;
333
334	flags = 0;
335	flags |= cvtto_bsd_mask(lmap.lm_flags, LINUX_MAP_SHARED, MAP_SHARED);
336	flags |= cvtto_bsd_mask(lmap.lm_flags, LINUX_MAP_PRIVATE, MAP_PRIVATE);
337	flags |= cvtto_bsd_mask(lmap.lm_flags, LINUX_MAP_FIXED, MAP_FIXED);
338	flags |= cvtto_bsd_mask(lmap.lm_flags, LINUX_MAP_ANON, MAP_ANON);
339
340	SCARG(&cma,addr) = lmap.lm_addr;
341	SCARG(&cma,len) = lmap.lm_len;
342 	SCARG(&cma,prot) = lmap.lm_prot;
343	SCARG(&cma,flags) = flags;
344	SCARG(&cma,fd) = lmap.lm_fd;
345	SCARG(&cma,pad) = 0;
346	SCARG(&cma,pos) = lmap.lm_pos;
347
348	return mmap(p, &cma, retval);
349}
350
351/*
352 * Linux doesn't use the retval[1] value to determine whether
353 * we are the child or parent.
354 */
355int
356linux_fork(p, uap, retval)
357	struct proc *p;
358	void *uap;
359	register_t *retval;
360{
361	int error;
362
363	if ((error = fork(p, uap, retval)))
364		return error;
365
366	if (retval[1] == 1)
367		retval[0] = 0;
368
369	return 0;
370}
371
372/*
373 * This code is partly stolen from src/lib/libc/compat-43/times.c
374 * XXX - CLK_TCK isn't declared in /sys, just in <time.h>, done here
375 */
376
377#define CLK_TCK 100
378#define	CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
379
380int
381linux_times(p, uap, retval)
382	struct proc *p;
383	struct linux_times_args /* {
384		syscallarg(struct times *) tms;
385	} */ *uap;
386	register_t *retval;
387{
388	struct timeval t;
389	struct linux_tms ltms;
390	struct rusage ru;
391	int error;
392
393	calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
394	ltms.ltms_utime = CONVTCK(ru.ru_utime);
395	ltms.ltms_stime = CONVTCK(ru.ru_stime);
396
397	ltms.ltms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
398	ltms.ltms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
399
400	if ((error = copyout(&ltms, SCARG(uap, tms), sizeof ltms)))
401		return error;
402
403	microtime(&t);
404
405	retval[0] = ((linux_clock_t)(CONVTCK(t)));
406	return 0;
407}
408
409/*
410 * NetBSD passes fd[0] in retval[0], and fd[1] in retval[1].
411 * Linux directly passes the pointer.
412 */
413int
414linux_pipe(p, uap, retval)
415	struct proc *p;
416	struct linux_pipe_args /* {
417		syscallarg(int *) pfds;
418	} */ *uap;
419	register_t *retval;
420{
421	int error;
422
423	if ((error = pipe(p, 0, retval)))
424		return error;
425
426	/* Assumes register_t is an int */
427
428	if ((error = copyout(retval, SCARG(uap, pfds), 2 * sizeof (int))))
429		return error;
430
431	retval[0] = 0;
432	return 0;
433}
434
435/*
436 * Alarm. This is a libc call which used setitimer(2) in NetBSD.
437 * Fiddle with the timers to make it work.
438 */
439int
440linux_alarm(p, uap, retval)
441	struct proc *p;
442	struct linux_alarm_args /* {
443		syscallarg(unsigned int) secs;
444	} */ *uap;
445	register_t *retval;
446{
447	int error, s;
448	struct itimerval *itp, it;
449
450	itp = &p->p_realtimer;
451	s = splclock();
452	/*
453	 * Clear any pending timer alarms.
454	 */
455	untimeout(realitexpire, p);
456	timerclear(&itp->it_interval);
457	if (timerisset(&itp->it_value) &&
458	    timercmp(&itp->it_value, &time, >))
459		__timersub(&itp->it_value, &time);
460	/*
461	 * Return how many seconds were left (rounded up)
462	 */
463	retval[0] = itp->it_value.tv_sec;
464	if (itp->it_value.tv_usec)
465		retval[0]++;
466
467	/*
468	 * alarm(0) just resets the timer.
469	 */
470	if (SCARG(uap, secs) == 0) {
471		timerclear(&itp->it_value);
472		splx(s);
473		return 0;
474	}
475
476	/*
477	 * Check the new alarm time for sanity, and set it.
478	 */
479	timerclear(&it.it_interval);
480	it.it_value.tv_sec = SCARG(uap, secs);
481	it.it_value.tv_usec = 0;
482	if (itimerfix(&it.it_value) || itimerfix(&it.it_interval)) {
483		splx(s);
484		return (EINVAL);
485	}
486
487	if (timerisset(&it.it_value)) {
488		__timeradd(&it.it_value, &time);
489		timeout(realitexpire, p, hzto(&it.it_value));
490	}
491	p->p_realtimer = it;
492	splx(s);
493
494	return 0;
495}
496
497/*
498 * utime(). Do conversion to things that utimes() understands,
499 * and pass it on.
500 */
501int
502linux_utime(p, uap, retval)
503	struct proc *p;
504	struct linux_utime_args /* {
505		syscallarg(char *) path;
506		syscallarg(struct linux_utimbuf *)times;
507	} */ *uap;
508	register_t *retval;
509{
510	caddr_t sg;
511	int error;
512	struct utimes_args ua;
513	struct timeval tv[2], *tvp;
514	struct linux_utimbuf lut;
515
516	sg = stackgap_init();
517	CHECK_ALT(p, &sg, SCARG(uap, path));
518
519	SCARG(&ua, path) = SCARG(uap, path);
520
521	if (SCARG(uap, times) != NULL) {
522		if ((error = copyin(SCARG(uap, times), &lut, sizeof lut)))
523			return error;
524		tv[0].tv_usec = tv[1].tv_usec = 0;
525		tv[0].tv_sec = lut.l_actime;
526		tv[1].tv_sec = lut.l_modtime;
527		tvp = (struct timeval *) stackgap_alloc(sizeof tv);
528		if ((error = copyout(tv, tvp, sizeof tv)))
529			return error;
530		SCARG(&ua, tptr) = tvp;
531	}
532	else
533		SCARG(&ua, tptr) = NULL;
534
535	return utimes(p, uap, retval);
536}
537
538/*
539 * Linux 'readdir' call. This code is mostly taken from the
540 * SunOS getdents call (see compat/sunos/sunos_misc.c), though
541 * an attempt has been made to keep it a little cleaner (failing
542 * miserably, because of the cruft needed if count 1 is passed).
543 *
544 * Read in BSD-style entries, convert them, and copy them out.
545 * Note that the Linux d_reclen is actually the name length,
546 * and d_off is the reclen.
547 *
548 * Note that this doesn't handle union-mounted filesystems.
549 */
550int
551linux_readdir(p, uap, retval)
552	struct proc *p;
553	struct linux_readdir_args /* {
554		syscallarg(int) fd;
555		syscallarg(struct linux_dirent *) dent;
556		syscallarg(unsigned int) count;
557	} */ *uap;
558	register_t *retval;
559{
560	register struct dirent *bdp;
561	struct vnode *vp;
562	caddr_t	inp, buf;	/* BSD-format */
563	int len, reclen;	/* BSD-format */
564	caddr_t outp;		/* Linux-format */
565	int resid, linuxreclen;	/* Linux-format */
566	struct file *fp;
567	struct uio auio;
568	struct iovec aiov;
569	struct linux_dirent idb;
570	off_t off;		/* true file offset */
571	linux_off_t soff;	/* Linux file offset */
572	int buflen, error, eofflag, nbytes, justone;
573	struct vattr va;
574
575	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
576		return (error);
577
578	if ((fp->f_flag & FREAD) == 0)
579		return (EBADF);
580
581	vp = (struct vnode *) fp->f_data;
582
583	if (vp->v_type != VDIR)	/* XXX  vnode readdir op should do this */
584		return (EINVAL);
585
586	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)))
587		return error;
588
589	nbytes = SCARG(uap, count);
590	if (nbytes == 1) {	/* Need this for older Linux libs, apparently */
591		nbytes = sizeof (struct linux_dirent);
592		justone = 1;
593	}
594	else
595		justone = 0;
596
597	buflen = max(va.va_blocksize, nbytes);
598	buf = malloc(buflen, M_TEMP, M_WAITOK);
599	VOP_LOCK(vp);
600	off = fp->f_offset;
601again:
602	aiov.iov_base = buf;
603	aiov.iov_len = buflen;
604	auio.uio_iov = &aiov;
605	auio.uio_iovcnt = 1;
606	auio.uio_rw = UIO_READ;
607	auio.uio_segflg = UIO_SYSSPACE;
608	auio.uio_procp = p;
609	auio.uio_resid = buflen;
610	auio.uio_offset = off;
611	/*
612         * First we read into the malloc'ed buffer, then
613         * we massage it into user space, one record at a time.
614         */
615	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, (u_long *) 0, 0);
616	if (error)
617		goto out;
618
619	inp = buf;
620	outp = (caddr_t) SCARG(uap, dent);
621	resid = nbytes;
622	if ((len = buflen - auio.uio_resid) == 0)
623		goto eof;
624
625	for (; len > 0; len -= reclen) {
626		reclen = ((struct dirent *) inp)->d_reclen;
627		if (reclen & 3)
628			panic("linux_readdir");
629		off += reclen;	/* each entry points to next */
630		bdp = (struct dirent *) inp;
631		if (bdp->d_fileno == 0) {
632			inp += reclen;	/* it is a hole; squish it out */
633			continue;
634		}
635		linuxreclen = LINUX_RECLEN(&idb, bdp->d_namlen);
636		if (reclen > len || resid < linuxreclen) {
637			/* entry too big for buffer, so just stop */
638			outp++;
639			break;
640		}
641		/*
642		 * Massage in place to make a Linux-shaped dirent (otherwise
643		 * we have to worry about touching user memory outside of
644		 * the copyout() call).
645		 */
646		idb.l_dino = (long) bdp->d_fileno;
647		idb.l_doff = (linux_off_t) linuxreclen;
648		idb.l_dreclen = (u_short) bdp->d_namlen;	/* sigh */
649		strcpy(idb.l_dname, bdp->d_name);
650		if ((error = copyout((caddr_t)&idb, outp, linuxreclen)))
651			goto out;
652		/* advance past this real entry */
653		inp += reclen;
654		/* advance output past Linux-shaped entry */
655		outp += linuxreclen;
656		resid -= linuxreclen;
657		if (justone)
658			break;
659	}
660
661	/* if we squished out the whole block, try again */
662	if (outp == (caddr_t) SCARG(uap, dent))
663		goto again;
664	fp->f_offset = off;	/* update the vnode offset */
665
666	if (justone)
667		nbytes = resid + linuxreclen;
668
669eof:
670	*retval = nbytes - resid;
671out:
672	VOP_UNLOCK(vp);
673	free(buf, M_TEMP);
674	return error;
675}
676
677/*
678 * Out of register error once more.. Apart from that, no difference.
679 */
680int
681linux_select(p, uap, retval)
682	struct proc *p;
683	struct linux_select_args /* {
684		syscallarg(struct linux_select *) lsp;
685	} */ *uap;
686	register_t *retval;
687{
688	struct linux_select ls;
689	struct select_args bsa;
690	int error;
691
692	if ((error = copyin(SCARG(uap, lsp), (caddr_t) &ls, sizeof ls)))
693		return error;
694
695	SCARG(&bsa, nd) = ls.nfds;
696	SCARG(&bsa, in) = ls.readfds;
697	SCARG(&bsa, ou) = ls.writefds;
698	SCARG(&bsa, ex) = ls.exceptfds;
699	SCARG(&bsa, tv) = ls.timeout;
700
701	return select(p, &bsa, retval);
702}
703
704/*
705 * Get the process group of a certain process. Look it up
706 * and return the value.
707 */
708int
709linux_getpgid(p, uap, retval)
710	struct proc *p;
711	struct linux_getpgid_args /* {
712		syscallarg(int) pid;
713	} */ *uap;
714	register_t *retval;
715{
716	struct proc *targp;
717
718	if (SCARG(uap, pid) != 0 && SCARG(uap, pid) != p->p_pid)
719		if ((targp = pfind(SCARG(uap, pid))) == 0)
720			return ESRCH;
721	else
722		targp = p;
723
724	retval[0] = targp->p_pgid;
725	return 0;
726}
727