linux_misc.c revision 91392
1/*-
2 * Copyright (c) 1994-1995 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software withough specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/compat/linux/linux_misc.c 91392 2002-02-27 16:47:27Z robert $
29 */
30
31#include "opt_compat.h"
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/fcntl.h>
36#include <sys/imgact_aout.h>
37#include <sys/jail.h>
38#include <sys/kernel.h>
39#include <sys/lock.h>
40#include <sys/mman.h>
41#include <sys/mount.h>
42#include <sys/mutex.h>
43#include <sys/namei.h>
44#include <sys/poll.h>
45#include <sys/proc.h>
46#include <sys/blist.h>
47#include <sys/reboot.h>
48#include <sys/resourcevar.h>
49#include <sys/signalvar.h>
50#include <sys/stat.h>
51#include <sys/sysctl.h>
52#include <sys/sysproto.h>
53#include <sys/time.h>
54#include <sys/unistd.h>
55#include <sys/vmmeter.h>
56#include <sys/vnode.h>
57#include <sys/wait.h>
58
59#include <vm/vm.h>
60#include <vm/pmap.h>
61#include <vm/vm_kern.h>
62#include <vm/vm_map.h>
63#include <vm/vm_extern.h>
64#include <vm/vm_object.h>
65#include <vm/vm_zone.h>
66#include <vm/swap_pager.h>
67
68#include <machine/frame.h>
69#include <machine/limits.h>
70#include <machine/psl.h>
71#include <machine/sysarch.h>
72#ifdef __i386__
73#include <machine/segments.h>
74#endif
75
76#include <posix4/sched.h>
77
78#include <machine/../linux/linux.h>
79#include <machine/../linux/linux_proto.h>
80#include <compat/linux/linux_mib.h>
81#include <compat/linux/linux_util.h>
82
83#ifdef __alpha__
84#define BSD_TO_LINUX_SIGNAL(sig)       (sig)
85#else
86#define BSD_TO_LINUX_SIGNAL(sig)	\
87	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
88#endif
89
90#ifndef __alpha__
91static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
92	RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
93	RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
94	RLIMIT_MEMLOCK, -1
95};
96#endif /*!__alpha__*/
97
98struct l_sysinfo {
99	l_long		uptime;		/* Seconds since boot */
100	l_ulong		loads[3];	/* 1, 5, and 15 minute load averages */
101	l_ulong		totalram;	/* Total usable main memory size */
102	l_ulong		freeram;	/* Available memory size */
103	l_ulong		sharedram;	/* Amount of shared memory */
104	l_ulong		bufferram;	/* Memory used by buffers */
105	l_ulong		totalswap;	/* Total swap space size */
106	l_ulong		freeswap;	/* swap space still available */
107	l_ushort	procs;		/* Number of current processes */
108	char		_f[22];		/* Pads structure to 64 bytes */
109};
110#ifndef __alpha__
111int
112linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
113{
114	struct l_sysinfo sysinfo;
115	vm_object_t object;
116	int i;
117	struct timespec ts;
118
119	/* Uptime is copied out of print_uptime() in kern_shutdown.c */
120	getnanouptime(&ts);
121	i = 0;
122	if (ts.tv_sec >= 86400) {
123		ts.tv_sec %= 86400;
124		i = 1;
125	}
126	if (i || ts.tv_sec >= 3600) {
127		ts.tv_sec %= 3600;
128		i = 1;
129	}
130	if (i || ts.tv_sec >= 60) {
131		ts.tv_sec %= 60;
132		i = 1;
133	}
134	sysinfo.uptime=ts.tv_sec;
135
136	/* Use the information from the mib to get our load averages */
137	for (i = 0; i < 3; i++)
138		sysinfo.loads[i] = averunnable.ldavg[i];
139
140	sysinfo.totalram = physmem * PAGE_SIZE;
141	sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE;
142
143	sysinfo.sharedram = 0;
144	for (object = TAILQ_FIRST(&vm_object_list); object != NULL;
145	     object = TAILQ_NEXT(object, object_list))
146		if (object->shadow_count > 1)
147			sysinfo.sharedram += object->resident_page_count;
148
149	sysinfo.sharedram *= PAGE_SIZE;
150	sysinfo.bufferram = 0;
151
152	if (swapblist == NULL) {
153		sysinfo.totalswap= 0;
154		sysinfo.freeswap = 0;
155	} else {
156		sysinfo.totalswap = swapblist->bl_blocks * 1024;
157		sysinfo.freeswap = swapblist->bl_root->u.bmu_avail * PAGE_SIZE;
158	}
159
160	sysinfo.procs = 20; /* Hack */
161
162	return copyout(&sysinfo, (caddr_t)args->info, sizeof(sysinfo));
163}
164#endif /*!__alpha__*/
165
166#ifndef __alpha__
167int
168linux_alarm(struct thread *td, struct linux_alarm_args *args)
169{
170	struct itimerval it, old_it;
171	struct timeval tv;
172	int s;
173
174#ifdef DEBUG
175	if (ldebug(alarm))
176		printf(ARGS(alarm, "%u"), args->secs);
177#endif
178
179	if (args->secs > 100000000)
180		return EINVAL;
181
182	it.it_value.tv_sec = (long)args->secs;
183	it.it_value.tv_usec = 0;
184	it.it_interval.tv_sec = 0;
185	it.it_interval.tv_usec = 0;
186	s = splsoftclock();
187	old_it = td->td_proc->p_realtimer;
188	getmicrouptime(&tv);
189	if (timevalisset(&old_it.it_value))
190		callout_stop(&td->td_proc->p_itcallout);
191	if (it.it_value.tv_sec != 0) {
192		callout_reset(&td->td_proc->p_itcallout, tvtohz(&it.it_value),
193		    realitexpire, td->td_proc);
194		timevaladd(&it.it_value, &tv);
195	}
196	td->td_proc->p_realtimer = it;
197	splx(s);
198	if (timevalcmp(&old_it.it_value, &tv, >)) {
199		timevalsub(&old_it.it_value, &tv);
200		if (old_it.it_value.tv_usec != 0)
201			old_it.it_value.tv_sec++;
202		td->td_retval[0] = old_it.it_value.tv_sec;
203	}
204	return 0;
205}
206#endif /*!__alpha__*/
207
208int
209linux_brk(struct thread *td, struct linux_brk_args *args)
210{
211	struct vmspace *vm = td->td_proc->p_vmspace;
212	vm_offset_t new, old;
213	struct obreak_args /* {
214		char * nsize;
215	} */ tmp;
216
217#ifdef DEBUG
218	if (ldebug(brk))
219		printf(ARGS(brk, "%p"), (void *)args->dsend);
220#endif
221	old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
222	new = (vm_offset_t)args->dsend;
223	tmp.nsize = (char *) new;
224	if (((caddr_t)new > vm->vm_daddr) && !obreak(td, &tmp))
225		td->td_retval[0] = (long)new;
226	else
227		td->td_retval[0] = (long)old;
228
229	return 0;
230}
231
232int
233linux_uselib(struct thread *td, struct linux_uselib_args *args)
234{
235	struct nameidata ni;
236	struct vnode *vp;
237	struct exec *a_out;
238	struct vattr attr;
239	vm_offset_t vmaddr;
240	unsigned long file_offset;
241	vm_offset_t buffer;
242	unsigned long bss_size;
243	int error;
244	caddr_t sg;
245	int locked;
246
247	sg = stackgap_init();
248	CHECKALTEXIST(td, &sg, args->library);
249
250#ifdef DEBUG
251	if (ldebug(uselib))
252		printf(ARGS(uselib, "%s"), args->library);
253#endif
254
255	a_out = NULL;
256	locked = 0;
257	vp = NULL;
258
259	NDINIT(&ni, LOOKUP, FOLLOW|LOCKLEAF, UIO_USERSPACE, args->library, td);
260	error = namei(&ni);
261	if (error)
262		goto cleanup;
263
264	vp = ni.ni_vp;
265	/*
266	 * XXX - This looks like a bogus check. A LOCKLEAF namei should not
267	 * succeed without returning a vnode.
268	 */
269	if (vp == NULL) {
270		error = ENOEXEC;	/* ?? */
271		goto cleanup;
272	}
273	NDFREE(&ni, NDF_ONLY_PNBUF);
274
275	/*
276	 * From here on down, we have a locked vnode that must be unlocked.
277	 */
278	locked++;
279
280	/* Writable? */
281	if (vp->v_writecount) {
282		error = ETXTBSY;
283		goto cleanup;
284	}
285
286	/* Executable? */
287	error = VOP_GETATTR(vp, &attr, td->td_proc->p_ucred, td);
288	if (error)
289		goto cleanup;
290
291	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
292	    ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
293		error = ENOEXEC;
294		goto cleanup;
295	}
296
297	/* Sensible size? */
298	if (attr.va_size == 0) {
299		error = ENOEXEC;
300		goto cleanup;
301	}
302
303	/* Can we access it? */
304	error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred, td);
305	if (error)
306		goto cleanup;
307
308	error = VOP_OPEN(vp, FREAD, td->td_proc->p_ucred, td);
309	if (error)
310		goto cleanup;
311
312	/*
313	 * Lock no longer needed
314	 */
315	VOP_UNLOCK(vp, 0, td);
316	locked = 0;
317
318	/* Pull in executable header into kernel_map */
319	error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
320	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
321	if (error)
322		goto cleanup;
323
324	/* Is it a Linux binary ? */
325	if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
326		error = ENOEXEC;
327		goto cleanup;
328	}
329
330	/*
331	 * While we are here, we should REALLY do some more checks
332	 */
333
334	/* Set file/virtual offset based on a.out variant. */
335	switch ((int)(a_out->a_magic & 0xffff)) {
336	case 0413:	/* ZMAGIC */
337		file_offset = 1024;
338		break;
339	case 0314:	/* QMAGIC */
340		file_offset = 0;
341		break;
342	default:
343		error = ENOEXEC;
344		goto cleanup;
345	}
346
347	bss_size = round_page(a_out->a_bss);
348
349	/* Check various fields in header for validity/bounds. */
350	if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
351		error = ENOEXEC;
352		goto cleanup;
353	}
354
355	/* text + data can't exceed file size */
356	if (a_out->a_data + a_out->a_text > attr.va_size) {
357		error = EFAULT;
358		goto cleanup;
359	}
360
361	/* To protect td->td_proc->p_rlimit in the if condition. */
362	mtx_assert(&Giant, MA_OWNED);
363
364	/*
365	 * text/data/bss must not exceed limits
366	 * XXX - this is not complete. it should check current usage PLUS
367	 * the resources needed by this library.
368	 */
369	if (a_out->a_text > maxtsiz ||
370	    a_out->a_data + bss_size >
371	    td->td_proc->p_rlimit[RLIMIT_DATA].rlim_cur) {
372		error = ENOMEM;
373		goto cleanup;
374	}
375
376	/* prevent more writers */
377	vp->v_flag |= VTEXT;
378
379	/*
380	 * Check if file_offset page aligned. Currently we cannot handle
381	 * misalinged file offsets, and so we read in the entire image
382	 * (what a waste).
383	 */
384	if (file_offset & PAGE_MASK) {
385#ifdef DEBUG
386		printf("uselib: Non page aligned binary %lu\n", file_offset);
387#endif
388		/* Map text+data read/write/execute */
389
390		/* a_entry is the load address and is page aligned */
391		vmaddr = trunc_page(a_out->a_entry);
392
393		/* get anon user mapping, read+write+execute */
394		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
395		    &vmaddr, a_out->a_text + a_out->a_data, FALSE, VM_PROT_ALL,
396		    VM_PROT_ALL, 0);
397		if (error)
398			goto cleanup;
399
400		/* map file into kernel_map */
401		error = vm_mmap(kernel_map, &buffer,
402		    round_page(a_out->a_text + a_out->a_data + file_offset),
403		    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp,
404		    trunc_page(file_offset));
405		if (error)
406			goto cleanup;
407
408		/* copy from kernel VM space to user space */
409		error = copyout((caddr_t)(uintptr_t)(buffer + file_offset),
410		    (caddr_t)vmaddr, a_out->a_text + a_out->a_data);
411
412		/* release temporary kernel space */
413		vm_map_remove(kernel_map, buffer, buffer +
414		    round_page(a_out->a_text + a_out->a_data + file_offset));
415
416		if (error)
417			goto cleanup;
418	} else {
419#ifdef DEBUG
420		printf("uselib: Page aligned binary %lu\n", file_offset);
421#endif
422		/*
423		 * for QMAGIC, a_entry is 20 bytes beyond the load address
424		 * to skip the executable header
425		 */
426		vmaddr = trunc_page(a_out->a_entry);
427
428		/*
429		 * Map it all into the process's space as a single
430		 * copy-on-write "data" segment.
431		 */
432		error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr,
433		    a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
434		    MAP_PRIVATE | MAP_FIXED, (caddr_t)vp, file_offset);
435		if (error)
436			goto cleanup;
437	}
438#ifdef DEBUG
439	printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long*)vmaddr)[0],
440	    ((long*)vmaddr)[1]);
441#endif
442	if (bss_size != 0) {
443		/* Calculate BSS start address */
444		vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
445		    a_out->a_data;
446
447		/* allocate some 'anon' space */
448		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
449		    &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0);
450		if (error)
451			goto cleanup;
452	}
453
454cleanup:
455	/* Unlock vnode if needed */
456	if (locked)
457		VOP_UNLOCK(vp, 0, td);
458
459	/* Release the kernel mapping. */
460	if (a_out)
461		vm_map_remove(kernel_map, (vm_offset_t)a_out,
462		    (vm_offset_t)a_out + PAGE_SIZE);
463
464	return error;
465}
466
467int
468linux_select(struct thread *td, struct linux_select_args *args)
469{
470	struct select_args bsa;
471	struct timeval tv0, tv1, utv, *tvp;
472	caddr_t sg;
473	int error;
474
475#ifdef DEBUG
476	if (ldebug(select))
477		printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds,
478		    (void *)args->readfds, (void *)args->writefds,
479		    (void *)args->exceptfds, (void *)args->timeout);
480#endif
481
482	error = 0;
483	bsa.nd = args->nfds;
484	bsa.in = args->readfds;
485	bsa.ou = args->writefds;
486	bsa.ex = args->exceptfds;
487	bsa.tv = (struct timeval *)args->timeout;
488
489	/*
490	 * Store current time for computation of the amount of
491	 * time left.
492	 */
493	if (args->timeout) {
494		if ((error = copyin((caddr_t)args->timeout, &utv,
495		    sizeof(utv))))
496			goto select_out;
497#ifdef DEBUG
498		if (ldebug(select))
499			printf(LMSG("incoming timeout (%ld/%ld)"),
500			    utv.tv_sec, utv.tv_usec);
501#endif
502
503		if (itimerfix(&utv)) {
504			/*
505			 * The timeval was invalid.  Convert it to something
506			 * valid that will act as it does under Linux.
507			 */
508			sg = stackgap_init();
509			tvp = stackgap_alloc(&sg, sizeof(utv));
510			utv.tv_sec += utv.tv_usec / 1000000;
511			utv.tv_usec %= 1000000;
512			if (utv.tv_usec < 0) {
513				utv.tv_sec -= 1;
514				utv.tv_usec += 1000000;
515			}
516			if (utv.tv_sec < 0)
517				timevalclear(&utv);
518			if ((error = copyout(&utv, tvp, sizeof(utv))))
519				goto select_out;
520			bsa.tv = tvp;
521		}
522		microtime(&tv0);
523	}
524
525	error = select(td, &bsa);
526#ifdef DEBUG
527	if (ldebug(select))
528		printf(LMSG("real select returns %d"), error);
529#endif
530	if (error) {
531		/*
532		 * See fs/select.c in the Linux kernel.  Without this,
533		 * Maelstrom doesn't work.
534		 */
535		if (error == ERESTART)
536			error = EINTR;
537		goto select_out;
538	}
539
540	if (args->timeout) {
541		if (td->td_retval[0]) {
542			/*
543			 * Compute how much time was left of the timeout,
544			 * by subtracting the current time and the time
545			 * before we started the call, and subtracting
546			 * that result from the user-supplied value.
547			 */
548			microtime(&tv1);
549			timevalsub(&tv1, &tv0);
550			timevalsub(&utv, &tv1);
551			if (utv.tv_sec < 0)
552				timevalclear(&utv);
553		} else
554			timevalclear(&utv);
555#ifdef DEBUG
556		if (ldebug(select))
557			printf(LMSG("outgoing timeout (%ld/%ld)"),
558			    utv.tv_sec, utv.tv_usec);
559#endif
560		if ((error = copyout(&utv, (caddr_t)args->timeout,
561		    sizeof(utv))))
562			goto select_out;
563	}
564
565select_out:
566#ifdef DEBUG
567	if (ldebug(select))
568		printf(LMSG("select_out -> %d"), error);
569#endif
570	return error;
571}
572
573int
574linux_mremap(struct thread *td, struct linux_mremap_args *args)
575{
576	struct munmap_args /* {
577		void *addr;
578		size_t len;
579	} */ bsd_args;
580	int error = 0;
581
582#ifdef DEBUG
583	if (ldebug(mremap))
584		printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"),
585		    (void *)args->addr,
586		    (unsigned long)args->old_len,
587		    (unsigned long)args->new_len,
588		    (unsigned long)args->flags);
589#endif
590	args->new_len = round_page(args->new_len);
591	args->old_len = round_page(args->old_len);
592
593	if (args->new_len > args->old_len) {
594		td->td_retval[0] = 0;
595		return ENOMEM;
596	}
597
598	if (args->new_len < args->old_len) {
599		bsd_args.addr = (caddr_t)(args->addr + args->new_len);
600		bsd_args.len = args->old_len - args->new_len;
601		error = munmap(td, &bsd_args);
602	}
603
604	td->td_retval[0] = error ? 0 : (u_long)args->addr;
605	return error;
606}
607
608int
609linux_msync(struct thread *td, struct linux_msync_args *args)
610{
611	struct msync_args bsd_args;
612
613	bsd_args.addr = (caddr_t)args->addr;
614	bsd_args.len = args->len;
615	bsd_args.flags = 0;	/* XXX ignore */
616
617	return msync(td, &bsd_args);
618}
619
620#ifndef __alpha__
621int
622linux_time(struct thread *td, struct linux_time_args *args)
623{
624	struct timeval tv;
625	l_time_t tm;
626	int error;
627
628#ifdef DEBUG
629	if (ldebug(time))
630		printf(ARGS(time, "*"));
631#endif
632
633	microtime(&tv);
634	tm = tv.tv_sec;
635	if (args->tm && (error = copyout(&tm, (caddr_t)args->tm, sizeof(tm))))
636		return error;
637	td->td_retval[0] = tm;
638	return 0;
639}
640#endif	/*!__alpha__*/
641
642struct l_times_argv {
643	l_long		tms_utime;
644	l_long		tms_stime;
645	l_long		tms_cutime;
646	l_long		tms_cstime;
647};
648
649#ifdef __alpha__
650#define CLK_TCK 1024	/* Linux uses 1024 on alpha */
651#else
652#define CLK_TCK 100	/* Linux uses 100 */
653#endif
654
655#define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
656
657int
658linux_times(struct thread *td, struct linux_times_args *args)
659{
660	struct timeval tv;
661	struct l_times_argv tms;
662	struct rusage ru;
663	int error;
664
665#ifdef DEBUG
666	if (ldebug(times))
667		printf(ARGS(times, "*"));
668#endif
669
670	mtx_lock_spin(&sched_lock);
671	calcru(td->td_proc, &ru.ru_utime, &ru.ru_stime, NULL);
672	mtx_unlock_spin(&sched_lock);
673
674	tms.tms_utime = CONVTCK(ru.ru_utime);
675	tms.tms_stime = CONVTCK(ru.ru_stime);
676
677	tms.tms_cutime = CONVTCK(td->td_proc->p_stats->p_cru.ru_utime);
678	tms.tms_cstime = CONVTCK(td->td_proc->p_stats->p_cru.ru_stime);
679
680	if ((error = copyout(&tms, (caddr_t)args->buf, sizeof(tms))))
681		return error;
682
683	microuptime(&tv);
684	td->td_retval[0] = (int)CONVTCK(tv);
685	return 0;
686}
687
688int
689linux_newuname(struct thread *td, struct linux_newuname_args *args)
690{
691	struct l_new_utsname utsname;
692	char osname[LINUX_MAX_UTSNAME];
693	char osrelease[LINUX_MAX_UTSNAME];
694
695#ifdef DEBUG
696	if (ldebug(newuname))
697		printf(ARGS(newuname, "*"));
698#endif
699
700	linux_get_osname(td->td_proc, osname);
701	linux_get_osrelease(td->td_proc, osrelease);
702
703	bzero(&utsname, sizeof(utsname));
704	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
705	getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME-1);
706	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
707	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
708	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
709	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
710
711	return (copyout(&utsname, (caddr_t)args->buf, sizeof(utsname)));
712}
713
714#if defined(__i386__)
715struct l_utimbuf {
716	l_time_t l_actime;
717	l_time_t l_modtime;
718};
719
720int
721linux_utime(struct thread *td, struct linux_utime_args *args)
722{
723	struct utimes_args /* {
724		char	*path;
725		struct	timeval *tptr;
726	} */ bsdutimes;
727	struct timeval tv[2], *tvp;
728	struct l_utimbuf lut;
729	int error;
730	caddr_t sg;
731
732	sg = stackgap_init();
733	CHECKALTEXIST(td, &sg, args->fname);
734
735#ifdef DEBUG
736	if (ldebug(utime))
737		printf(ARGS(utime, "%s, *"), args->fname);
738#endif
739
740	if (args->times) {
741		if ((error = copyin((caddr_t)args->times, &lut, sizeof lut)))
742			return error;
743		tv[0].tv_sec = lut.l_actime;
744		tv[0].tv_usec = 0;
745		tv[1].tv_sec = lut.l_modtime;
746		tv[1].tv_usec = 0;
747		/* so that utimes can copyin */
748		tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
749		if (tvp == NULL)
750			return (ENAMETOOLONG);
751		if ((error = copyout(tv, tvp, sizeof(tv))))
752			return error;
753		bsdutimes.tptr = tvp;
754	} else
755		bsdutimes.tptr = NULL;
756
757	bsdutimes.path = args->fname;
758	return utimes(td, &bsdutimes);
759}
760#endif /* __i386__ */
761
762#define __WCLONE 0x80000000
763
764#ifndef __alpha__
765int
766linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
767{
768	struct wait_args /* {
769		int pid;
770		int *status;
771		int options;
772		struct	rusage *rusage;
773	} */ tmp;
774	int error, tmpstat;
775
776#ifdef DEBUG
777	if (ldebug(waitpid))
778		printf(ARGS(waitpid, "%d, %p, %d"),
779		    args->pid, (void *)args->status, args->options);
780#endif
781
782	tmp.pid = args->pid;
783	tmp.status = args->status;
784	tmp.options = (args->options & (WNOHANG | WUNTRACED));
785	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
786	if (args->options & __WCLONE)
787		tmp.options |= WLINUXCLONE;
788	tmp.rusage = NULL;
789
790	if ((error = wait4(td, &tmp)) != 0)
791		return error;
792
793	if (args->status) {
794		if ((error = copyin((caddr_t)args->status, &tmpstat,
795		    sizeof(int))) != 0)
796			return error;
797		tmpstat &= 0xffff;
798		if (WIFSIGNALED(tmpstat))
799			tmpstat = (tmpstat & 0xffffff80) |
800			    BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
801		else if (WIFSTOPPED(tmpstat))
802			tmpstat = (tmpstat & 0xffff00ff) |
803			    (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
804		return copyout(&tmpstat, (caddr_t)args->status, sizeof(int));
805	}
806
807	return 0;
808}
809#endif	/*!__alpha__*/
810
811int
812linux_wait4(struct thread *td, struct linux_wait4_args *args)
813{
814	struct wait_args /* {
815		int pid;
816		int *status;
817		int options;
818		struct	rusage *rusage;
819	} */ tmp;
820	int error, tmpstat;
821
822#ifdef DEBUG
823	if (ldebug(wait4))
824		printf(ARGS(wait4, "%d, %p, %d, %p"),
825		    args->pid, (void *)args->status, args->options,
826		    (void *)args->rusage);
827#endif
828
829	tmp.pid = args->pid;
830	tmp.status = args->status;
831	tmp.options = (args->options & (WNOHANG | WUNTRACED));
832	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
833	if (args->options & __WCLONE)
834		tmp.options |= WLINUXCLONE;
835	tmp.rusage = (struct rusage *)args->rusage;
836
837	if ((error = wait4(td, &tmp)) != 0)
838		return error;
839
840	SIGDELSET(td->td_proc->p_siglist, SIGCHLD);
841
842	if (args->status) {
843		if ((error = copyin((caddr_t)args->status, &tmpstat,
844		    sizeof(int))) != 0)
845			return error;
846		tmpstat &= 0xffff;
847		if (WIFSIGNALED(tmpstat))
848			tmpstat = (tmpstat & 0xffffff80) |
849			    BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
850		else if (WIFSTOPPED(tmpstat))
851			tmpstat = (tmpstat & 0xffff00ff) |
852			    (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
853		return copyout(&tmpstat, (caddr_t)args->status, sizeof(int));
854	}
855
856	return 0;
857}
858
859int
860linux_mknod(struct thread *td, struct linux_mknod_args *args)
861{
862	caddr_t sg;
863	struct mknod_args bsd_mknod;
864	struct mkfifo_args bsd_mkfifo;
865
866	sg = stackgap_init();
867
868	CHECKALTCREAT(td, &sg, args->path);
869
870#ifdef DEBUG
871	if (ldebug(mknod))
872		printf(ARGS(mknod, "%s, %d, %d"),
873		    args->path, args->mode, args->dev);
874#endif
875
876	if (args->mode & S_IFIFO) {
877		bsd_mkfifo.path = args->path;
878		bsd_mkfifo.mode = args->mode;
879		return mkfifo(td, &bsd_mkfifo);
880	} else {
881		bsd_mknod.path = args->path;
882		bsd_mknod.mode = args->mode;
883		bsd_mknod.dev = args->dev;
884		return mknod(td, &bsd_mknod);
885	}
886}
887
888/*
889 * UGH! This is just about the dumbest idea I've ever heard!!
890 */
891int
892linux_personality(struct thread *td, struct linux_personality_args *args)
893{
894#ifdef DEBUG
895	if (ldebug(personality))
896		printf(ARGS(personality, "%d"), args->per);
897#endif
898#ifndef __alpha__
899	if (args->per != 0)
900		return EINVAL;
901#endif
902
903	/* Yes Jim, it's still a Linux... */
904	td->td_retval[0] = 0;
905	return 0;
906}
907
908/*
909 * Wrappers for get/setitimer for debugging..
910 */
911int
912linux_setitimer(struct thread *td, struct linux_setitimer_args *args)
913{
914	struct setitimer_args bsa;
915	struct itimerval foo;
916	int error;
917
918#ifdef DEBUG
919	if (ldebug(setitimer))
920		printf(ARGS(setitimer, "%p, %p"),
921		    (void *)args->itv, (void *)args->oitv);
922#endif
923	bsa.which = args->which;
924	bsa.itv = (struct itimerval *)args->itv;
925	bsa.oitv = (struct itimerval *)args->oitv;
926	if (args->itv) {
927	    if ((error = copyin((caddr_t)args->itv, &foo, sizeof(foo))))
928		return error;
929#ifdef DEBUG
930	    if (ldebug(setitimer)) {
931	        printf("setitimer: value: sec: %ld, usec: %ld\n",
932		    foo.it_value.tv_sec, foo.it_value.tv_usec);
933	        printf("setitimer: interval: sec: %ld, usec: %ld\n",
934		    foo.it_interval.tv_sec, foo.it_interval.tv_usec);
935	    }
936#endif
937	}
938	return setitimer(td, &bsa);
939}
940
941int
942linux_getitimer(struct thread *td, struct linux_getitimer_args *args)
943{
944	struct getitimer_args bsa;
945#ifdef DEBUG
946	if (ldebug(getitimer))
947		printf(ARGS(getitimer, "%p"), (void *)args->itv);
948#endif
949	bsa.which = args->which;
950	bsa.itv = (struct itimerval *)args->itv;
951	return getitimer(td, &bsa);
952}
953
954#ifndef __alpha__
955int
956linux_nice(struct thread *td, struct linux_nice_args *args)
957{
958	struct setpriority_args	bsd_args;
959
960	bsd_args.which = PRIO_PROCESS;
961	bsd_args.who = 0;	/* current process */
962	bsd_args.prio = args->inc;
963	return setpriority(td, &bsd_args);
964}
965#endif	/*!__alpha__*/
966
967int
968linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
969{
970	struct ucred *newcred, *oldcred;
971	l_gid_t linux_gidset[NGROUPS];
972	gid_t *bsd_gidset;
973	int ngrp, error;
974
975	ngrp = args->gidsetsize;
976	oldcred = td->td_proc->p_ucred;
977
978	/*
979	 * cr_groups[0] holds egid. Setting the whole set from
980	 * the supplied set will cause egid to be changed too.
981	 * Keep cr_groups[0] unchanged to prevent that.
982	 */
983
984	if ((error = suser_xxx(oldcred, NULL, PRISON_ROOT)) != 0)
985		return (error);
986
987	if (ngrp >= NGROUPS)
988		return (EINVAL);
989
990	newcred = crdup(oldcred);
991	if (ngrp > 0) {
992		error = copyin((caddr_t)args->grouplist, linux_gidset,
993			       ngrp * sizeof(l_gid_t));
994		if (error)
995			return (error);
996
997		newcred->cr_ngroups = ngrp + 1;
998
999		bsd_gidset = newcred->cr_groups;
1000		ngrp--;
1001		while (ngrp >= 0) {
1002			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1003			ngrp--;
1004		}
1005	}
1006	else
1007		newcred->cr_ngroups = 1;
1008
1009	setsugid(td->td_proc);
1010	td->td_proc->p_ucred = newcred;
1011	crfree(oldcred);
1012	return (0);
1013}
1014
1015int
1016linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
1017{
1018	struct ucred *cred;
1019	l_gid_t linux_gidset[NGROUPS];
1020	gid_t *bsd_gidset;
1021	int bsd_gidsetsz, ngrp, error;
1022
1023	cred = td->td_proc->p_ucred;
1024	bsd_gidset = cred->cr_groups;
1025	bsd_gidsetsz = cred->cr_ngroups - 1;
1026
1027	/*
1028	 * cr_groups[0] holds egid. Returning the whole set
1029	 * here will cause a duplicate. Exclude cr_groups[0]
1030	 * to prevent that.
1031	 */
1032
1033	if ((ngrp = args->gidsetsize) == 0) {
1034		td->td_retval[0] = bsd_gidsetsz;
1035		return (0);
1036	}
1037
1038	if (ngrp < bsd_gidsetsz)
1039		return (EINVAL);
1040
1041	ngrp = 0;
1042	while (ngrp < bsd_gidsetsz) {
1043		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1044		ngrp++;
1045	}
1046
1047	if ((error = copyout(linux_gidset, (caddr_t)args->grouplist,
1048	    ngrp * sizeof(l_gid_t))))
1049		return (error);
1050
1051	td->td_retval[0] = ngrp;
1052	return (0);
1053}
1054
1055#ifndef __alpha__
1056int
1057linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
1058{
1059	struct __setrlimit_args bsd;
1060	struct l_rlimit rlim;
1061	int error;
1062	caddr_t sg = stackgap_init();
1063
1064#ifdef DEBUG
1065	if (ldebug(setrlimit))
1066		printf(ARGS(setrlimit, "%d, %p"),
1067		    args->resource, (void *)args->rlim);
1068#endif
1069
1070	if (args->resource >= LINUX_RLIM_NLIMITS)
1071		return (EINVAL);
1072
1073	bsd.which = linux_to_bsd_resource[args->resource];
1074	if (bsd.which == -1)
1075		return (EINVAL);
1076
1077	error = copyin((caddr_t)args->rlim, &rlim, sizeof(rlim));
1078	if (error)
1079		return (error);
1080
1081	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1082	bsd.rlp->rlim_cur = (rlim_t)rlim.rlim_cur;
1083	bsd.rlp->rlim_max = (rlim_t)rlim.rlim_max;
1084	return (setrlimit(td, &bsd));
1085}
1086
1087int
1088linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
1089{
1090	struct __getrlimit_args bsd;
1091	struct l_rlimit rlim;
1092	int error;
1093	caddr_t sg = stackgap_init();
1094
1095#ifdef DEBUG
1096	if (ldebug(old_getrlimit))
1097		printf(ARGS(old_getrlimit, "%d, %p"),
1098		    args->resource, (void *)args->rlim);
1099#endif
1100
1101	if (args->resource >= LINUX_RLIM_NLIMITS)
1102		return (EINVAL);
1103
1104	bsd.which = linux_to_bsd_resource[args->resource];
1105	if (bsd.which == -1)
1106		return (EINVAL);
1107
1108	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1109	error = getrlimit(td, &bsd);
1110	if (error)
1111		return (error);
1112
1113	rlim.rlim_cur = (unsigned long)bsd.rlp->rlim_cur;
1114	if (rlim.rlim_cur == ULONG_MAX)
1115		rlim.rlim_cur = LONG_MAX;
1116	rlim.rlim_max = (unsigned long)bsd.rlp->rlim_max;
1117	if (rlim.rlim_max == ULONG_MAX)
1118		rlim.rlim_max = LONG_MAX;
1119	return (copyout(&rlim, (caddr_t)args->rlim, sizeof(rlim)));
1120}
1121
1122int
1123linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
1124{
1125	struct __getrlimit_args bsd;
1126	struct l_rlimit rlim;
1127	int error;
1128	caddr_t sg = stackgap_init();
1129
1130#ifdef DEBUG
1131	if (ldebug(getrlimit))
1132		printf(ARGS(getrlimit, "%d, %p"),
1133		    args->resource, (void *)args->rlim);
1134#endif
1135
1136	if (args->resource >= LINUX_RLIM_NLIMITS)
1137		return (EINVAL);
1138
1139	bsd.which = linux_to_bsd_resource[args->resource];
1140	if (bsd.which == -1)
1141		return (EINVAL);
1142
1143	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1144	error = getrlimit(td, &bsd);
1145	if (error)
1146		return (error);
1147
1148	rlim.rlim_cur = (l_ulong)bsd.rlp->rlim_cur;
1149	rlim.rlim_max = (l_ulong)bsd.rlp->rlim_max;
1150	return (copyout(&rlim, (caddr_t)args->rlim, sizeof(rlim)));
1151}
1152#endif /*!__alpha__*/
1153
1154int
1155linux_sched_setscheduler(struct thread *td,
1156    struct linux_sched_setscheduler_args *args)
1157{
1158	struct sched_setscheduler_args bsd;
1159
1160#ifdef DEBUG
1161	if (ldebug(sched_setscheduler))
1162		printf(ARGS(sched_setscheduler, "%d, %d, %p"),
1163		    args->pid, args->policy, (const void *)args->param);
1164#endif
1165
1166	switch (args->policy) {
1167	case LINUX_SCHED_OTHER:
1168		bsd.policy = SCHED_OTHER;
1169		break;
1170	case LINUX_SCHED_FIFO:
1171		bsd.policy = SCHED_FIFO;
1172		break;
1173	case LINUX_SCHED_RR:
1174		bsd.policy = SCHED_RR;
1175		break;
1176	default:
1177		return EINVAL;
1178	}
1179
1180	bsd.pid = args->pid;
1181	bsd.param = (struct sched_param *)args->param;
1182	return sched_setscheduler(td, &bsd);
1183}
1184
1185int
1186linux_sched_getscheduler(struct thread *td,
1187    struct linux_sched_getscheduler_args *args)
1188{
1189	struct sched_getscheduler_args bsd;
1190	int error;
1191
1192#ifdef DEBUG
1193	if (ldebug(sched_getscheduler))
1194		printf(ARGS(sched_getscheduler, "%d"), args->pid);
1195#endif
1196
1197	bsd.pid = args->pid;
1198	error = sched_getscheduler(td, &bsd);
1199
1200	switch (td->td_retval[0]) {
1201	case SCHED_OTHER:
1202		td->td_retval[0] = LINUX_SCHED_OTHER;
1203		break;
1204	case SCHED_FIFO:
1205		td->td_retval[0] = LINUX_SCHED_FIFO;
1206		break;
1207	case SCHED_RR:
1208		td->td_retval[0] = LINUX_SCHED_RR;
1209		break;
1210	}
1211
1212	return error;
1213}
1214
1215int
1216linux_sched_get_priority_max(struct thread *td,
1217    struct linux_sched_get_priority_max_args *args)
1218{
1219	struct sched_get_priority_max_args bsd;
1220
1221#ifdef DEBUG
1222	if (ldebug(sched_get_priority_max))
1223		printf(ARGS(sched_get_priority_max, "%d"), args->policy);
1224#endif
1225
1226	switch (args->policy) {
1227	case LINUX_SCHED_OTHER:
1228		bsd.policy = SCHED_OTHER;
1229		break;
1230	case LINUX_SCHED_FIFO:
1231		bsd.policy = SCHED_FIFO;
1232		break;
1233	case LINUX_SCHED_RR:
1234		bsd.policy = SCHED_RR;
1235		break;
1236	default:
1237		return EINVAL;
1238	}
1239	return sched_get_priority_max(td, &bsd);
1240}
1241
1242int
1243linux_sched_get_priority_min(struct thread *td,
1244    struct linux_sched_get_priority_min_args *args)
1245{
1246	struct sched_get_priority_min_args bsd;
1247
1248#ifdef DEBUG
1249	if (ldebug(sched_get_priority_min))
1250		printf(ARGS(sched_get_priority_min, "%d"), args->policy);
1251#endif
1252
1253	switch (args->policy) {
1254	case LINUX_SCHED_OTHER:
1255		bsd.policy = SCHED_OTHER;
1256		break;
1257	case LINUX_SCHED_FIFO:
1258		bsd.policy = SCHED_FIFO;
1259		break;
1260	case LINUX_SCHED_RR:
1261		bsd.policy = SCHED_RR;
1262		break;
1263	default:
1264		return EINVAL;
1265	}
1266	return sched_get_priority_min(td, &bsd);
1267}
1268
1269#define REBOOT_CAD_ON	0x89abcdef
1270#define REBOOT_CAD_OFF	0
1271#define REBOOT_HALT	0xcdef0123
1272
1273int
1274linux_reboot(struct thread *td, struct linux_reboot_args *args)
1275{
1276	struct reboot_args bsd_args;
1277
1278#ifdef DEBUG
1279	if (ldebug(reboot))
1280		printf(ARGS(reboot, "0x%x"), args->cmd);
1281#endif
1282	if (args->cmd == REBOOT_CAD_ON || args->cmd == REBOOT_CAD_OFF)
1283		return (0);
1284	bsd_args.opt = (args->cmd == REBOOT_HALT) ? RB_HALT : 0;
1285	return (reboot(td, &bsd_args));
1286}
1287
1288#ifndef __alpha__
1289
1290/*
1291 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify
1292 * td->td_retval[1] when COMPAT_43 or COMPAT_SUNOS is defined. This
1293 * globbers registers that are assumed to be preserved. The following
1294 * lightweight syscalls fixes this. See also linux_getgid16() and
1295 * linux_getuid16() in linux_uid16.c.
1296 *
1297 * linux_getpid() - MP SAFE
1298 * linux_getgid() - MP SAFE
1299 * linux_getuid() - MP SAFE
1300 */
1301
1302int
1303linux_getpid(struct thread *td, struct linux_getpid_args *args)
1304{
1305
1306	td->td_retval[0] = td->td_proc->p_pid;
1307	return (0);
1308}
1309
1310int
1311linux_getgid(struct thread *td, struct linux_getgid_args *args)
1312{
1313
1314	td->td_retval[0] = td->td_proc->p_ucred->cr_rgid;
1315	return (0);
1316}
1317
1318int
1319linux_getuid(struct thread *td, struct linux_getuid_args *args)
1320{
1321
1322	td->td_retval[0] = td->td_proc->p_ucred->cr_ruid;
1323	return (0);
1324}
1325
1326#endif /*!__alpha__*/
1327
1328int
1329linux_getsid(struct thread *td, struct linux_getsid_args *args)
1330{
1331	struct getsid_args bsd;
1332	bsd.pid = args->pid;
1333	return getsid(td, &bsd);
1334}
1335