linux_misc.c revision 113613
1/*-
2 * Copyright (c) 1994-1995 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/compat/linux/linux_misc.c 113613 2003-04-17 22:02:47Z jhb $
29 */
30
31#include "opt_mac.h"
32
33#include <sys/param.h>
34#include <sys/blist.h>
35#include <sys/fcntl.h>
36#include <sys/imgact_aout.h>
37#include <sys/jail.h>
38#include <sys/kernel.h>
39#include <sys/lock.h>
40#include <sys/mac.h>
41#include <sys/malloc.h>
42#include <sys/mman.h>
43#include <sys/mount.h>
44#include <sys/mutex.h>
45#include <sys/namei.h>
46#include <sys/proc.h>
47#include <sys/reboot.h>
48#include <sys/resourcevar.h>
49#include <sys/signalvar.h>
50#include <sys/stat.h>
51#include <sys/syscallsubr.h>
52#include <sys/sysctl.h>
53#include <sys/sysproto.h>
54#include <sys/systm.h>
55#include <sys/time.h>
56#include <sys/vmmeter.h>
57#include <sys/vnode.h>
58#include <sys/wait.h>
59
60#include <vm/vm.h>
61#include <vm/pmap.h>
62#include <vm/vm_kern.h>
63#include <vm/vm_map.h>
64#include <vm/vm_extern.h>
65#include <vm/vm_object.h>
66#include <vm/swap_pager.h>
67
68#include <machine/limits.h>
69
70#include <posix4/sched.h>
71
72#include <machine/../linux/linux.h>
73#include <machine/../linux/linux_proto.h>
74
75#include <compat/linux/linux_mib.h>
76#include <compat/linux/linux_util.h>
77
78#ifdef __alpha__
79#define BSD_TO_LINUX_SIGNAL(sig)       (sig)
80#else
81#define BSD_TO_LINUX_SIGNAL(sig)	\
82	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
83#endif
84
85#ifndef __alpha__
86static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
87	RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
88	RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
89	RLIMIT_MEMLOCK, -1
90};
91#endif /*!__alpha__*/
92
93struct l_sysinfo {
94	l_long		uptime;		/* Seconds since boot */
95	l_ulong		loads[3];	/* 1, 5, and 15 minute load averages */
96	l_ulong		totalram;	/* Total usable main memory size */
97	l_ulong		freeram;	/* Available memory size */
98	l_ulong		sharedram;	/* Amount of shared memory */
99	l_ulong		bufferram;	/* Memory used by buffers */
100	l_ulong		totalswap;	/* Total swap space size */
101	l_ulong		freeswap;	/* swap space still available */
102	l_ushort	procs;		/* Number of current processes */
103	char		_f[22];		/* Pads structure to 64 bytes */
104};
105#ifndef __alpha__
106int
107linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
108{
109	struct l_sysinfo sysinfo;
110	vm_object_t object;
111	int i;
112	struct timespec ts;
113
114	/* Uptime is copied out of print_uptime() in kern_shutdown.c */
115	getnanouptime(&ts);
116	i = 0;
117	if (ts.tv_sec >= 86400) {
118		ts.tv_sec %= 86400;
119		i = 1;
120	}
121	if (i || ts.tv_sec >= 3600) {
122		ts.tv_sec %= 3600;
123		i = 1;
124	}
125	if (i || ts.tv_sec >= 60) {
126		ts.tv_sec %= 60;
127		i = 1;
128	}
129	sysinfo.uptime=ts.tv_sec;
130
131	/* Use the information from the mib to get our load averages */
132	for (i = 0; i < 3; i++)
133		sysinfo.loads[i] = averunnable.ldavg[i];
134
135	sysinfo.totalram = physmem * PAGE_SIZE;
136	sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE;
137
138	sysinfo.sharedram = 0;
139	for (object = TAILQ_FIRST(&vm_object_list); object != NULL;
140	     object = TAILQ_NEXT(object, object_list))
141		if (object->shadow_count > 1)
142			sysinfo.sharedram += object->resident_page_count;
143
144	sysinfo.sharedram *= PAGE_SIZE;
145	sysinfo.bufferram = 0;
146
147	if (swapblist == NULL) {
148		sysinfo.totalswap= 0;
149		sysinfo.freeswap = 0;
150	} else {
151		sysinfo.totalswap = swapblist->bl_blocks * 1024;
152		sysinfo.freeswap = swapblist->bl_root->u.bmu_avail * PAGE_SIZE;
153	}
154
155	sysinfo.procs = 20; /* Hack */
156
157	return copyout(&sysinfo, args->info, sizeof(sysinfo));
158}
159#endif /*!__alpha__*/
160
161#ifndef __alpha__
162int
163linux_alarm(struct thread *td, struct linux_alarm_args *args)
164{
165	struct itimerval it, old_it;
166	struct timeval tv;
167	struct proc *p;
168
169#ifdef DEBUG
170	if (ldebug(alarm))
171		printf(ARGS(alarm, "%u"), args->secs);
172#endif
173
174	if (args->secs > 100000000)
175		return EINVAL;
176
177	it.it_value.tv_sec = (long)args->secs;
178	it.it_value.tv_usec = 0;
179	it.it_interval.tv_sec = 0;
180	it.it_interval.tv_usec = 0;
181	p = td->td_proc;
182	PROC_LOCK(p);
183	old_it = p->p_realtimer;
184	getmicrouptime(&tv);
185	if (timevalisset(&old_it.it_value))
186		callout_stop(&p->p_itcallout);
187	if (it.it_value.tv_sec != 0) {
188		callout_reset(&p->p_itcallout, tvtohz(&it.it_value),
189		    realitexpire, p);
190		timevaladd(&it.it_value, &tv);
191	}
192	p->p_realtimer = it;
193	PROC_UNLOCK(p);
194	if (timevalcmp(&old_it.it_value, &tv, >)) {
195		timevalsub(&old_it.it_value, &tv);
196		if (old_it.it_value.tv_usec != 0)
197			old_it.it_value.tv_sec++;
198		td->td_retval[0] = old_it.it_value.tv_sec;
199	}
200	return 0;
201}
202#endif /*!__alpha__*/
203
204int
205linux_brk(struct thread *td, struct linux_brk_args *args)
206{
207	struct vmspace *vm = td->td_proc->p_vmspace;
208	vm_offset_t new, old;
209	struct obreak_args /* {
210		char * nsize;
211	} */ tmp;
212
213#ifdef DEBUG
214	if (ldebug(brk))
215		printf(ARGS(brk, "%p"), (void *)args->dsend);
216#endif
217	old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
218	new = (vm_offset_t)args->dsend;
219	tmp.nsize = (char *) new;
220	if (((caddr_t)new > vm->vm_daddr) && !obreak(td, &tmp))
221		td->td_retval[0] = (long)new;
222	else
223		td->td_retval[0] = (long)old;
224
225	return 0;
226}
227
228int
229linux_uselib(struct thread *td, struct linux_uselib_args *args)
230{
231	struct nameidata ni;
232	struct vnode *vp;
233	struct exec *a_out;
234	struct vattr attr;
235	vm_offset_t vmaddr;
236	unsigned long file_offset;
237	vm_offset_t buffer;
238	unsigned long bss_size;
239	char *library;
240	int error;
241	int locked;
242
243	LCONVPATHEXIST(td, args->library, &library);
244
245#ifdef DEBUG
246	if (ldebug(uselib))
247		printf(ARGS(uselib, "%s"), library);
248#endif
249
250	a_out = NULL;
251	locked = 0;
252	vp = NULL;
253
254	/*
255	 * XXX: This code should make use of vn_open(), rather than doing
256	 * all this stuff itself.
257	 */
258	NDINIT(&ni, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, library, td);
259	error = namei(&ni);
260	LFREEPATH(library);
261	if (error)
262		goto cleanup;
263
264	vp = ni.ni_vp;
265	/*
266	 * XXX - This looks like a bogus check. A LOCKLEAF namei should not
267	 * succeed without returning a vnode.
268	 */
269	if (vp == NULL) {
270		error = ENOEXEC;	/* ?? */
271		goto cleanup;
272	}
273	NDFREE(&ni, NDF_ONLY_PNBUF);
274
275	/*
276	 * From here on down, we have a locked vnode that must be unlocked.
277	 */
278	locked++;
279
280	/* Writable? */
281	if (vp->v_writecount) {
282		error = ETXTBSY;
283		goto cleanup;
284	}
285
286	/* Executable? */
287	error = VOP_GETATTR(vp, &attr, td->td_ucred, td);
288	if (error)
289		goto cleanup;
290
291	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
292	    ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
293		error = ENOEXEC;
294		goto cleanup;
295	}
296
297	/* Sensible size? */
298	if (attr.va_size == 0) {
299		error = ENOEXEC;
300		goto cleanup;
301	}
302
303	/* Can we access it? */
304	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
305	if (error)
306		goto cleanup;
307
308	/*
309	 * XXX: This should use vn_open() so that it is properly authorized,
310	 * and to reduce code redundancy all over the place here.
311	 */
312#ifdef MAC
313	error = mac_check_vnode_open(td->td_ucred, vp, FREAD);
314	if (error)
315		goto cleanup;
316#endif
317	error = VOP_OPEN(vp, FREAD, td->td_ucred, td);
318	if (error)
319		goto cleanup;
320
321	/* Pull in executable header into kernel_map */
322	error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
323	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
324	/*
325	 * Lock no longer needed
326	 */
327	locked = 0;
328	VOP_UNLOCK(vp, 0, td);
329
330	if (error)
331		goto cleanup;
332
333	/* Is it a Linux binary ? */
334	if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
335		error = ENOEXEC;
336		goto cleanup;
337	}
338
339	/*
340	 * While we are here, we should REALLY do some more checks
341	 */
342
343	/* Set file/virtual offset based on a.out variant. */
344	switch ((int)(a_out->a_magic & 0xffff)) {
345	case 0413:	/* ZMAGIC */
346		file_offset = 1024;
347		break;
348	case 0314:	/* QMAGIC */
349		file_offset = 0;
350		break;
351	default:
352		error = ENOEXEC;
353		goto cleanup;
354	}
355
356	bss_size = round_page(a_out->a_bss);
357
358	/* Check various fields in header for validity/bounds. */
359	if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
360		error = ENOEXEC;
361		goto cleanup;
362	}
363
364	/* text + data can't exceed file size */
365	if (a_out->a_data + a_out->a_text > attr.va_size) {
366		error = EFAULT;
367		goto cleanup;
368	}
369
370	/* To protect td->td_proc->p_rlimit in the if condition. */
371	mtx_assert(&Giant, MA_OWNED);
372
373	/*
374	 * text/data/bss must not exceed limits
375	 * XXX - this is not complete. it should check current usage PLUS
376	 * the resources needed by this library.
377	 */
378	if (a_out->a_text > maxtsiz ||
379	    a_out->a_data + bss_size >
380	    td->td_proc->p_rlimit[RLIMIT_DATA].rlim_cur) {
381		error = ENOMEM;
382		goto cleanup;
383	}
384
385	mp_fixme("Unlocked vflags access.");
386	/* prevent more writers */
387	vp->v_vflag |= VV_TEXT;
388
389	/*
390	 * Check if file_offset page aligned. Currently we cannot handle
391	 * misalinged file offsets, and so we read in the entire image
392	 * (what a waste).
393	 */
394	if (file_offset & PAGE_MASK) {
395#ifdef DEBUG
396		printf("uselib: Non page aligned binary %lu\n", file_offset);
397#endif
398		/* Map text+data read/write/execute */
399
400		/* a_entry is the load address and is page aligned */
401		vmaddr = trunc_page(a_out->a_entry);
402
403		/* get anon user mapping, read+write+execute */
404		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
405		    &vmaddr, a_out->a_text + a_out->a_data, FALSE, VM_PROT_ALL,
406		    VM_PROT_ALL, 0);
407		if (error)
408			goto cleanup;
409
410		/* map file into kernel_map */
411		error = vm_mmap(kernel_map, &buffer,
412		    round_page(a_out->a_text + a_out->a_data + file_offset),
413		    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp,
414		    trunc_page(file_offset));
415		if (error)
416			goto cleanup;
417
418		/* copy from kernel VM space to user space */
419		error = copyout((void *)(buffer + file_offset),
420		    (void *)vmaddr, a_out->a_text + a_out->a_data);
421
422		/* release temporary kernel space */
423		vm_map_remove(kernel_map, buffer, buffer +
424		    round_page(a_out->a_text + a_out->a_data + file_offset));
425
426		if (error)
427			goto cleanup;
428	} else {
429#ifdef DEBUG
430		printf("uselib: Page aligned binary %lu\n", file_offset);
431#endif
432		/*
433		 * for QMAGIC, a_entry is 20 bytes beyond the load address
434		 * to skip the executable header
435		 */
436		vmaddr = trunc_page(a_out->a_entry);
437
438		/*
439		 * Map it all into the process's space as a single
440		 * copy-on-write "data" segment.
441		 */
442		error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr,
443		    a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
444		    MAP_PRIVATE | MAP_FIXED, (caddr_t)vp, file_offset);
445		if (error)
446			goto cleanup;
447	}
448#ifdef DEBUG
449	printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long*)vmaddr)[0],
450	    ((long*)vmaddr)[1]);
451#endif
452	if (bss_size != 0) {
453		/* Calculate BSS start address */
454		vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
455		    a_out->a_data;
456
457		/* allocate some 'anon' space */
458		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
459		    &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0);
460		if (error)
461			goto cleanup;
462	}
463
464cleanup:
465	/* Unlock vnode if needed */
466	if (locked)
467		VOP_UNLOCK(vp, 0, td);
468
469	/* Release the kernel mapping. */
470	if (a_out)
471		vm_map_remove(kernel_map, (vm_offset_t)a_out,
472		    (vm_offset_t)a_out + PAGE_SIZE);
473
474	return error;
475}
476
477int
478linux_select(struct thread *td, struct linux_select_args *args)
479{
480	struct timeval tv0, tv1, utv, *tvp;
481	int error;
482
483#ifdef DEBUG
484	if (ldebug(select))
485		printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds,
486		    (void *)args->readfds, (void *)args->writefds,
487		    (void *)args->exceptfds, (void *)args->timeout);
488#endif
489
490	/*
491	 * Store current time for computation of the amount of
492	 * time left.
493	 */
494	if (args->timeout) {
495		if ((error = copyin(args->timeout, &utv, sizeof(utv))))
496			goto select_out;
497#ifdef DEBUG
498		if (ldebug(select))
499			printf(LMSG("incoming timeout (%ld/%ld)"),
500			    utv.tv_sec, utv.tv_usec);
501#endif
502
503		if (itimerfix(&utv)) {
504			/*
505			 * The timeval was invalid.  Convert it to something
506			 * valid that will act as it does under Linux.
507			 */
508			utv.tv_sec += utv.tv_usec / 1000000;
509			utv.tv_usec %= 1000000;
510			if (utv.tv_usec < 0) {
511				utv.tv_sec -= 1;
512				utv.tv_usec += 1000000;
513			}
514			if (utv.tv_sec < 0)
515				timevalclear(&utv);
516		}
517		microtime(&tv0);
518		tvp = &utv;
519	} else
520		tvp = NULL;
521
522	error = kern_select(td, args->nfds, args->readfds, args->writefds,
523	    args->exceptfds, tvp);
524
525#ifdef DEBUG
526	if (ldebug(select))
527		printf(LMSG("real select returns %d"), error);
528#endif
529	if (error) {
530		/*
531		 * See fs/select.c in the Linux kernel.  Without this,
532		 * Maelstrom doesn't work.
533		 */
534		if (error == ERESTART)
535			error = EINTR;
536		goto select_out;
537	}
538
539	if (args->timeout) {
540		if (td->td_retval[0]) {
541			/*
542			 * Compute how much time was left of the timeout,
543			 * by subtracting the current time and the time
544			 * before we started the call, and subtracting
545			 * that result from the user-supplied value.
546			 */
547			microtime(&tv1);
548			timevalsub(&tv1, &tv0);
549			timevalsub(&utv, &tv1);
550			if (utv.tv_sec < 0)
551				timevalclear(&utv);
552		} else
553			timevalclear(&utv);
554#ifdef DEBUG
555		if (ldebug(select))
556			printf(LMSG("outgoing timeout (%ld/%ld)"),
557			    utv.tv_sec, utv.tv_usec);
558#endif
559		if ((error = copyout(&utv, args->timeout, sizeof(utv))))
560			goto select_out;
561	}
562
563select_out:
564#ifdef DEBUG
565	if (ldebug(select))
566		printf(LMSG("select_out -> %d"), error);
567#endif
568	return error;
569}
570
571int
572linux_mremap(struct thread *td, struct linux_mremap_args *args)
573{
574	struct munmap_args /* {
575		void *addr;
576		size_t len;
577	} */ bsd_args;
578	int error = 0;
579
580#ifdef DEBUG
581	if (ldebug(mremap))
582		printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"),
583		    (void *)args->addr,
584		    (unsigned long)args->old_len,
585		    (unsigned long)args->new_len,
586		    (unsigned long)args->flags);
587#endif
588	args->new_len = round_page(args->new_len);
589	args->old_len = round_page(args->old_len);
590
591	if (args->new_len > args->old_len) {
592		td->td_retval[0] = 0;
593		return ENOMEM;
594	}
595
596	if (args->new_len < args->old_len) {
597		bsd_args.addr = (caddr_t)(args->addr + args->new_len);
598		bsd_args.len = args->old_len - args->new_len;
599		error = munmap(td, &bsd_args);
600	}
601
602	td->td_retval[0] = error ? 0 : (uintptr_t)args->addr;
603	return error;
604}
605
606#define LINUX_MS_ASYNC       0x0001
607#define LINUX_MS_INVALIDATE  0x0002
608#define LINUX_MS_SYNC        0x0004
609
610int
611linux_msync(struct thread *td, struct linux_msync_args *args)
612{
613	struct msync_args bsd_args;
614
615	bsd_args.addr = (caddr_t)args->addr;
616	bsd_args.len = args->len;
617	bsd_args.flags = args->fl & ~LINUX_MS_SYNC;
618
619	return msync(td, &bsd_args);
620}
621
622#ifndef __alpha__
623int
624linux_time(struct thread *td, struct linux_time_args *args)
625{
626	struct timeval tv;
627	l_time_t tm;
628	int error;
629
630#ifdef DEBUG
631	if (ldebug(time))
632		printf(ARGS(time, "*"));
633#endif
634
635	microtime(&tv);
636	tm = tv.tv_sec;
637	if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm))))
638		return error;
639	td->td_retval[0] = tm;
640	return 0;
641}
642#endif	/*!__alpha__*/
643
644struct l_times_argv {
645	l_long		tms_utime;
646	l_long		tms_stime;
647	l_long		tms_cutime;
648	l_long		tms_cstime;
649};
650
651#ifdef __alpha__
652#define CLK_TCK 1024	/* Linux uses 1024 on alpha */
653#else
654#define CLK_TCK 100	/* Linux uses 100 */
655#endif
656
657#define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
658
659int
660linux_times(struct thread *td, struct linux_times_args *args)
661{
662	struct timeval tv;
663	struct l_times_argv tms;
664	struct rusage ru;
665	int error;
666
667#ifdef DEBUG
668	if (ldebug(times))
669		printf(ARGS(times, "*"));
670#endif
671
672	mtx_lock_spin(&sched_lock);
673	calcru(td->td_proc, &ru.ru_utime, &ru.ru_stime, NULL);
674	mtx_unlock_spin(&sched_lock);
675
676	tms.tms_utime = CONVTCK(ru.ru_utime);
677	tms.tms_stime = CONVTCK(ru.ru_stime);
678
679	tms.tms_cutime = CONVTCK(td->td_proc->p_stats->p_cru.ru_utime);
680	tms.tms_cstime = CONVTCK(td->td_proc->p_stats->p_cru.ru_stime);
681
682	if ((error = copyout(&tms, args->buf, sizeof(tms))))
683		return error;
684
685	microuptime(&tv);
686	td->td_retval[0] = (int)CONVTCK(tv);
687	return 0;
688}
689
690int
691linux_newuname(struct thread *td, struct linux_newuname_args *args)
692{
693	struct l_new_utsname utsname;
694	char osname[LINUX_MAX_UTSNAME];
695	char osrelease[LINUX_MAX_UTSNAME];
696
697#ifdef DEBUG
698	if (ldebug(newuname))
699		printf(ARGS(newuname, "*"));
700#endif
701
702	linux_get_osname(td, osname);
703	linux_get_osrelease(td, osrelease);
704
705	bzero(&utsname, sizeof(utsname));
706	strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME);
707	getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME);
708	strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME);
709	strlcpy(utsname.version, version, LINUX_MAX_UTSNAME);
710	strlcpy(utsname.machine, machine, LINUX_MAX_UTSNAME);
711	strlcpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME);
712
713	return (copyout(&utsname, args->buf, sizeof(utsname)));
714}
715
716#if defined(__i386__)
717struct l_utimbuf {
718	l_time_t l_actime;
719	l_time_t l_modtime;
720};
721
722int
723linux_utime(struct thread *td, struct linux_utime_args *args)
724{
725	struct timeval tv[2], *tvp;
726	struct l_utimbuf lut;
727	char *fname;
728	int error;
729
730	LCONVPATHEXIST(td, args->fname, &fname);
731
732#ifdef DEBUG
733	if (ldebug(utime))
734		printf(ARGS(utime, "%s, *"), fname);
735#endif
736
737	if (args->times) {
738		if ((error = copyin(args->times, &lut, sizeof lut))) {
739			LFREEPATH(fname);
740			return error;
741		}
742		tv[0].tv_sec = lut.l_actime;
743		tv[0].tv_usec = 0;
744		tv[1].tv_sec = lut.l_modtime;
745		tv[1].tv_usec = 0;
746		tvp = tv;
747	} else
748		tvp = NULL;
749
750	error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE);
751	LFREEPATH(fname);
752	return (error);
753}
754#endif /* __i386__ */
755
756#define __WCLONE 0x80000000
757
758#ifndef __alpha__
759int
760linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
761{
762	struct wait_args /* {
763		int pid;
764		int *status;
765		int options;
766		struct	rusage *rusage;
767	} */ tmp;
768	int error, tmpstat;
769
770#ifdef DEBUG
771	if (ldebug(waitpid))
772		printf(ARGS(waitpid, "%d, %p, %d"),
773		    args->pid, (void *)args->status, args->options);
774#endif
775
776	tmp.pid = args->pid;
777	tmp.status = args->status;
778	tmp.options = (args->options & (WNOHANG | WUNTRACED));
779	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
780	if (args->options & __WCLONE)
781		tmp.options |= WLINUXCLONE;
782	tmp.rusage = NULL;
783
784	if ((error = wait4(td, &tmp)) != 0)
785		return error;
786
787	if (args->status) {
788		if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
789			return error;
790		tmpstat &= 0xffff;
791		if (WIFSIGNALED(tmpstat))
792			tmpstat = (tmpstat & 0xffffff80) |
793			    BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
794		else if (WIFSTOPPED(tmpstat))
795			tmpstat = (tmpstat & 0xffff00ff) |
796			    (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
797		return copyout(&tmpstat, args->status, sizeof(int));
798	}
799
800	return 0;
801}
802#endif	/*!__alpha__*/
803
804int
805linux_wait4(struct thread *td, struct linux_wait4_args *args)
806{
807	struct wait_args /* {
808		int pid;
809		int *status;
810		int options;
811		struct	rusage *rusage;
812	} */ tmp;
813	int error, tmpstat;
814	struct proc *p;
815
816#ifdef DEBUG
817	if (ldebug(wait4))
818		printf(ARGS(wait4, "%d, %p, %d, %p"),
819		    args->pid, (void *)args->status, args->options,
820		    (void *)args->rusage);
821#endif
822
823	tmp.pid = args->pid;
824	tmp.status = args->status;
825	tmp.options = (args->options & (WNOHANG | WUNTRACED));
826	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
827	if (args->options & __WCLONE)
828		tmp.options |= WLINUXCLONE;
829	tmp.rusage = (struct rusage *)args->rusage;
830
831	if ((error = wait4(td, &tmp)) != 0)
832		return error;
833
834	p = td->td_proc;
835	PROC_LOCK(p);
836	SIGDELSET(p->p_siglist, SIGCHLD);
837	PROC_UNLOCK(p);
838
839	if (args->status) {
840		if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
841			return error;
842		tmpstat &= 0xffff;
843		if (WIFSIGNALED(tmpstat))
844			tmpstat = (tmpstat & 0xffffff80) |
845			    BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
846		else if (WIFSTOPPED(tmpstat))
847			tmpstat = (tmpstat & 0xffff00ff) |
848			    (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
849		return copyout(&tmpstat, args->status, sizeof(int));
850	}
851
852	return 0;
853}
854
855int
856linux_mknod(struct thread *td, struct linux_mknod_args *args)
857{
858	char *path;
859	int error;
860
861	LCONVPATHCREAT(td, args->path, &path);
862
863#ifdef DEBUG
864	if (ldebug(mknod))
865		printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev);
866#endif
867
868	if (args->mode & S_IFIFO)
869		error = kern_mkfifo(td, path, UIO_SYSSPACE, args->mode);
870	else
871		error = kern_mknod(td, path, UIO_SYSSPACE, args->mode,
872		    args->dev);
873	LFREEPATH(path);
874	return (error);
875}
876
877/*
878 * UGH! This is just about the dumbest idea I've ever heard!!
879 */
880int
881linux_personality(struct thread *td, struct linux_personality_args *args)
882{
883#ifdef DEBUG
884	if (ldebug(personality))
885		printf(ARGS(personality, "%lu"), (unsigned long)args->per);
886#endif
887#ifndef __alpha__
888	if (args->per != 0)
889		return EINVAL;
890#endif
891
892	/* Yes Jim, it's still a Linux... */
893	td->td_retval[0] = 0;
894	return 0;
895}
896
897/*
898 * Wrappers for get/setitimer for debugging..
899 */
900int
901linux_setitimer(struct thread *td, struct linux_setitimer_args *args)
902{
903	struct setitimer_args bsa;
904	struct itimerval foo;
905	int error;
906
907#ifdef DEBUG
908	if (ldebug(setitimer))
909		printf(ARGS(setitimer, "%p, %p"),
910		    (void *)args->itv, (void *)args->oitv);
911#endif
912	bsa.which = args->which;
913	bsa.itv = (struct itimerval *)args->itv;
914	bsa.oitv = (struct itimerval *)args->oitv;
915	if (args->itv) {
916	    if ((error = copyin(args->itv, &foo, sizeof(foo))))
917		return error;
918#ifdef DEBUG
919	    if (ldebug(setitimer)) {
920		printf("setitimer: value: sec: %ld, usec: %ld\n",
921		    foo.it_value.tv_sec, foo.it_value.tv_usec);
922		printf("setitimer: interval: sec: %ld, usec: %ld\n",
923		    foo.it_interval.tv_sec, foo.it_interval.tv_usec);
924	    }
925#endif
926	}
927	return setitimer(td, &bsa);
928}
929
930int
931linux_getitimer(struct thread *td, struct linux_getitimer_args *args)
932{
933	struct getitimer_args bsa;
934#ifdef DEBUG
935	if (ldebug(getitimer))
936		printf(ARGS(getitimer, "%p"), (void *)args->itv);
937#endif
938	bsa.which = args->which;
939	bsa.itv = (struct itimerval *)args->itv;
940	return getitimer(td, &bsa);
941}
942
943#ifndef __alpha__
944int
945linux_nice(struct thread *td, struct linux_nice_args *args)
946{
947	struct setpriority_args	bsd_args;
948
949	bsd_args.which = PRIO_PROCESS;
950	bsd_args.who = 0;	/* current process */
951	bsd_args.prio = args->inc;
952	return setpriority(td, &bsd_args);
953}
954#endif	/*!__alpha__*/
955
956int
957linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
958{
959	struct ucred *newcred, *oldcred;
960	l_gid_t linux_gidset[NGROUPS];
961	gid_t *bsd_gidset;
962	int ngrp, error;
963	struct proc *p;
964
965	ngrp = args->gidsetsize;
966	if (ngrp >= NGROUPS)
967		return (EINVAL);
968	error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t));
969	if (error)
970		return (error);
971	newcred = crget();
972	p = td->td_proc;
973	PROC_LOCK(p);
974	oldcred = p->p_ucred;
975
976	/*
977	 * cr_groups[0] holds egid. Setting the whole set from
978	 * the supplied set will cause egid to be changed too.
979	 * Keep cr_groups[0] unchanged to prevent that.
980	 */
981
982	if ((error = suser_cred(oldcred, PRISON_ROOT)) != 0) {
983		PROC_UNLOCK(p);
984		crfree(newcred);
985		return (error);
986	}
987
988	crcopy(newcred, oldcred);
989	if (ngrp > 0) {
990		newcred->cr_ngroups = ngrp + 1;
991
992		bsd_gidset = newcred->cr_groups;
993		ngrp--;
994		while (ngrp >= 0) {
995			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
996			ngrp--;
997		}
998	}
999	else
1000		newcred->cr_ngroups = 1;
1001
1002	setsugid(p);
1003	p->p_ucred = newcred;
1004	PROC_UNLOCK(p);
1005	crfree(oldcred);
1006	return (0);
1007}
1008
1009int
1010linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
1011{
1012	struct ucred *cred;
1013	l_gid_t linux_gidset[NGROUPS];
1014	gid_t *bsd_gidset;
1015	int bsd_gidsetsz, ngrp, error;
1016
1017	cred = td->td_ucred;
1018	bsd_gidset = cred->cr_groups;
1019	bsd_gidsetsz = cred->cr_ngroups - 1;
1020
1021	/*
1022	 * cr_groups[0] holds egid. Returning the whole set
1023	 * here will cause a duplicate. Exclude cr_groups[0]
1024	 * to prevent that.
1025	 */
1026
1027	if ((ngrp = args->gidsetsize) == 0) {
1028		td->td_retval[0] = bsd_gidsetsz;
1029		return (0);
1030	}
1031
1032	if (ngrp < bsd_gidsetsz)
1033		return (EINVAL);
1034
1035	ngrp = 0;
1036	while (ngrp < bsd_gidsetsz) {
1037		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1038		ngrp++;
1039	}
1040
1041	if ((error = copyout(linux_gidset, args->grouplist,
1042	    ngrp * sizeof(l_gid_t))))
1043		return (error);
1044
1045	td->td_retval[0] = ngrp;
1046	return (0);
1047}
1048
1049#ifndef __alpha__
1050int
1051linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
1052{
1053	struct rlimit bsd_rlim;
1054	struct l_rlimit rlim;
1055	u_int which;
1056	int error;
1057
1058#ifdef DEBUG
1059	if (ldebug(setrlimit))
1060		printf(ARGS(setrlimit, "%d, %p"),
1061		    args->resource, (void *)args->rlim);
1062#endif
1063
1064	if (args->resource >= LINUX_RLIM_NLIMITS)
1065		return (EINVAL);
1066
1067	which = linux_to_bsd_resource[args->resource];
1068	if (which == -1)
1069		return (EINVAL);
1070
1071	error = copyin(args->rlim, &rlim, sizeof(rlim));
1072	if (error)
1073		return (error);
1074
1075	bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur;
1076	bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max;
1077	return (dosetrlimit(td, which, &bsd_rlim));
1078}
1079
1080int
1081linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
1082{
1083	struct l_rlimit rlim;
1084	struct proc *p = td->td_proc;
1085	struct rlimit *bsd_rlp;
1086	u_int which;
1087
1088#ifdef DEBUG
1089	if (ldebug(old_getrlimit))
1090		printf(ARGS(old_getrlimit, "%d, %p"),
1091		    args->resource, (void *)args->rlim);
1092#endif
1093
1094	if (args->resource >= LINUX_RLIM_NLIMITS)
1095		return (EINVAL);
1096
1097	which = linux_to_bsd_resource[args->resource];
1098	if (which == -1)
1099		return (EINVAL);
1100	bsd_rlp = &p->p_rlimit[which];
1101
1102	rlim.rlim_cur = (unsigned long)bsd_rlp->rlim_cur;
1103	if (rlim.rlim_cur == ULONG_MAX)
1104		rlim.rlim_cur = LONG_MAX;
1105	rlim.rlim_max = (unsigned long)bsd_rlp->rlim_max;
1106	if (rlim.rlim_max == ULONG_MAX)
1107		rlim.rlim_max = LONG_MAX;
1108	return (copyout(&rlim, args->rlim, sizeof(rlim)));
1109}
1110
1111int
1112linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
1113{
1114	struct l_rlimit rlim;
1115	struct proc *p = td->td_proc;
1116	struct rlimit *bsd_rlp;
1117	u_int which;
1118
1119#ifdef DEBUG
1120	if (ldebug(getrlimit))
1121		printf(ARGS(getrlimit, "%d, %p"),
1122		    args->resource, (void *)args->rlim);
1123#endif
1124
1125	if (args->resource >= LINUX_RLIM_NLIMITS)
1126		return (EINVAL);
1127
1128	which = linux_to_bsd_resource[args->resource];
1129	if (which == -1)
1130		return (EINVAL);
1131	bsd_rlp = &p->p_rlimit[which];
1132
1133	rlim.rlim_cur = (l_ulong)bsd_rlp->rlim_cur;
1134	rlim.rlim_max = (l_ulong)bsd_rlp->rlim_max;
1135	return (copyout(&rlim, args->rlim, sizeof(rlim)));
1136}
1137#endif /*!__alpha__*/
1138
1139int
1140linux_sched_setscheduler(struct thread *td,
1141    struct linux_sched_setscheduler_args *args)
1142{
1143	struct sched_setscheduler_args bsd;
1144
1145#ifdef DEBUG
1146	if (ldebug(sched_setscheduler))
1147		printf(ARGS(sched_setscheduler, "%d, %d, %p"),
1148		    args->pid, args->policy, (const void *)args->param);
1149#endif
1150
1151	switch (args->policy) {
1152	case LINUX_SCHED_OTHER:
1153		bsd.policy = SCHED_OTHER;
1154		break;
1155	case LINUX_SCHED_FIFO:
1156		bsd.policy = SCHED_FIFO;
1157		break;
1158	case LINUX_SCHED_RR:
1159		bsd.policy = SCHED_RR;
1160		break;
1161	default:
1162		return EINVAL;
1163	}
1164
1165	bsd.pid = args->pid;
1166	bsd.param = (struct sched_param *)args->param;
1167	return sched_setscheduler(td, &bsd);
1168}
1169
1170int
1171linux_sched_getscheduler(struct thread *td,
1172    struct linux_sched_getscheduler_args *args)
1173{
1174	struct sched_getscheduler_args bsd;
1175	int error;
1176
1177#ifdef DEBUG
1178	if (ldebug(sched_getscheduler))
1179		printf(ARGS(sched_getscheduler, "%d"), args->pid);
1180#endif
1181
1182	bsd.pid = args->pid;
1183	error = sched_getscheduler(td, &bsd);
1184
1185	switch (td->td_retval[0]) {
1186	case SCHED_OTHER:
1187		td->td_retval[0] = LINUX_SCHED_OTHER;
1188		break;
1189	case SCHED_FIFO:
1190		td->td_retval[0] = LINUX_SCHED_FIFO;
1191		break;
1192	case SCHED_RR:
1193		td->td_retval[0] = LINUX_SCHED_RR;
1194		break;
1195	}
1196
1197	return error;
1198}
1199
1200int
1201linux_sched_get_priority_max(struct thread *td,
1202    struct linux_sched_get_priority_max_args *args)
1203{
1204	struct sched_get_priority_max_args bsd;
1205
1206#ifdef DEBUG
1207	if (ldebug(sched_get_priority_max))
1208		printf(ARGS(sched_get_priority_max, "%d"), args->policy);
1209#endif
1210
1211	switch (args->policy) {
1212	case LINUX_SCHED_OTHER:
1213		bsd.policy = SCHED_OTHER;
1214		break;
1215	case LINUX_SCHED_FIFO:
1216		bsd.policy = SCHED_FIFO;
1217		break;
1218	case LINUX_SCHED_RR:
1219		bsd.policy = SCHED_RR;
1220		break;
1221	default:
1222		return EINVAL;
1223	}
1224	return sched_get_priority_max(td, &bsd);
1225}
1226
1227int
1228linux_sched_get_priority_min(struct thread *td,
1229    struct linux_sched_get_priority_min_args *args)
1230{
1231	struct sched_get_priority_min_args bsd;
1232
1233#ifdef DEBUG
1234	if (ldebug(sched_get_priority_min))
1235		printf(ARGS(sched_get_priority_min, "%d"), args->policy);
1236#endif
1237
1238	switch (args->policy) {
1239	case LINUX_SCHED_OTHER:
1240		bsd.policy = SCHED_OTHER;
1241		break;
1242	case LINUX_SCHED_FIFO:
1243		bsd.policy = SCHED_FIFO;
1244		break;
1245	case LINUX_SCHED_RR:
1246		bsd.policy = SCHED_RR;
1247		break;
1248	default:
1249		return EINVAL;
1250	}
1251	return sched_get_priority_min(td, &bsd);
1252}
1253
1254#define REBOOT_CAD_ON	0x89abcdef
1255#define REBOOT_CAD_OFF	0
1256#define REBOOT_HALT	0xcdef0123
1257
1258int
1259linux_reboot(struct thread *td, struct linux_reboot_args *args)
1260{
1261	struct reboot_args bsd_args;
1262
1263#ifdef DEBUG
1264	if (ldebug(reboot))
1265		printf(ARGS(reboot, "0x%x"), args->cmd);
1266#endif
1267	if (args->cmd == REBOOT_CAD_ON || args->cmd == REBOOT_CAD_OFF)
1268		return (0);
1269	bsd_args.opt = (args->cmd == REBOOT_HALT) ? RB_HALT : 0;
1270	return (reboot(td, &bsd_args));
1271}
1272
1273#ifndef __alpha__
1274
1275/*
1276 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify
1277 * td->td_retval[1] when COMPAT_43 or COMPAT_SUNOS is defined. This
1278 * globbers registers that are assumed to be preserved. The following
1279 * lightweight syscalls fixes this. See also linux_getgid16() and
1280 * linux_getuid16() in linux_uid16.c.
1281 *
1282 * linux_getpid() - MP SAFE
1283 * linux_getgid() - MP SAFE
1284 * linux_getuid() - MP SAFE
1285 */
1286
1287int
1288linux_getpid(struct thread *td, struct linux_getpid_args *args)
1289{
1290
1291	td->td_retval[0] = td->td_proc->p_pid;
1292	return (0);
1293}
1294
1295int
1296linux_getgid(struct thread *td, struct linux_getgid_args *args)
1297{
1298
1299	td->td_retval[0] = td->td_ucred->cr_rgid;
1300	return (0);
1301}
1302
1303int
1304linux_getuid(struct thread *td, struct linux_getuid_args *args)
1305{
1306
1307	td->td_retval[0] = td->td_ucred->cr_ruid;
1308	return (0);
1309}
1310
1311#endif /*!__alpha__*/
1312
1313int
1314linux_getsid(struct thread *td, struct linux_getsid_args *args)
1315{
1316	struct getsid_args bsd;
1317	bsd.pid = args->pid;
1318	return getsid(td, &bsd);
1319}
1320