linux_misc.c revision 102954
1/*-
2 * Copyright (c) 1994-1995 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/compat/linux/linux_misc.c 102954 2002-09-05 08:13:20Z bde $
29 */
30
31#include "opt_mac.h"
32
33#include <sys/param.h>
34#include <sys/blist.h>
35#include <sys/fcntl.h>
36#include <sys/imgact_aout.h>
37#include <sys/jail.h>
38#include <sys/kernel.h>
39#include <sys/lock.h>
40#include <sys/mac.h>
41#include <sys/malloc.h>
42#include <sys/mman.h>
43#include <sys/mount.h>
44#include <sys/mutex.h>
45#include <sys/namei.h>
46#include <sys/proc.h>
47#include <sys/reboot.h>
48#include <sys/resourcevar.h>
49#include <sys/signalvar.h>
50#include <sys/stat.h>
51#include <sys/syscallsubr.h>
52#include <sys/sysctl.h>
53#include <sys/sysproto.h>
54#include <sys/systm.h>
55#include <sys/time.h>
56#include <sys/vmmeter.h>
57#include <sys/vnode.h>
58#include <sys/wait.h>
59
60#include <vm/vm.h>
61#include <vm/pmap.h>
62#include <vm/vm_kern.h>
63#include <vm/vm_map.h>
64#include <vm/vm_extern.h>
65#include <vm/vm_object.h>
66#include <vm/swap_pager.h>
67
68#include <machine/limits.h>
69
70#include <posix4/sched.h>
71
72#include <machine/../linux/linux.h>
73#include <machine/../linux/linux_proto.h>
74
75#include <compat/linux/linux_mib.h>
76#include <compat/linux/linux_util.h>
77
78#ifdef __alpha__
79#define BSD_TO_LINUX_SIGNAL(sig)       (sig)
80#else
81#define BSD_TO_LINUX_SIGNAL(sig)	\
82	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
83#endif
84
85#ifndef __alpha__
86static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
87	RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
88	RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
89	RLIMIT_MEMLOCK, -1
90};
91#endif /*!__alpha__*/
92
93struct l_sysinfo {
94	l_long		uptime;		/* Seconds since boot */
95	l_ulong		loads[3];	/* 1, 5, and 15 minute load averages */
96	l_ulong		totalram;	/* Total usable main memory size */
97	l_ulong		freeram;	/* Available memory size */
98	l_ulong		sharedram;	/* Amount of shared memory */
99	l_ulong		bufferram;	/* Memory used by buffers */
100	l_ulong		totalswap;	/* Total swap space size */
101	l_ulong		freeswap;	/* swap space still available */
102	l_ushort	procs;		/* Number of current processes */
103	char		_f[22];		/* Pads structure to 64 bytes */
104};
105#ifndef __alpha__
106int
107linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
108{
109	struct l_sysinfo sysinfo;
110	vm_object_t object;
111	int i;
112	struct timespec ts;
113
114	/* Uptime is copied out of print_uptime() in kern_shutdown.c */
115	getnanouptime(&ts);
116	i = 0;
117	if (ts.tv_sec >= 86400) {
118		ts.tv_sec %= 86400;
119		i = 1;
120	}
121	if (i || ts.tv_sec >= 3600) {
122		ts.tv_sec %= 3600;
123		i = 1;
124	}
125	if (i || ts.tv_sec >= 60) {
126		ts.tv_sec %= 60;
127		i = 1;
128	}
129	sysinfo.uptime=ts.tv_sec;
130
131	/* Use the information from the mib to get our load averages */
132	for (i = 0; i < 3; i++)
133		sysinfo.loads[i] = averunnable.ldavg[i];
134
135	sysinfo.totalram = physmem * PAGE_SIZE;
136	sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE;
137
138	sysinfo.sharedram = 0;
139	for (object = TAILQ_FIRST(&vm_object_list); object != NULL;
140	     object = TAILQ_NEXT(object, object_list))
141		if (object->shadow_count > 1)
142			sysinfo.sharedram += object->resident_page_count;
143
144	sysinfo.sharedram *= PAGE_SIZE;
145	sysinfo.bufferram = 0;
146
147	if (swapblist == NULL) {
148		sysinfo.totalswap= 0;
149		sysinfo.freeswap = 0;
150	} else {
151		sysinfo.totalswap = swapblist->bl_blocks * 1024;
152		sysinfo.freeswap = swapblist->bl_root->u.bmu_avail * PAGE_SIZE;
153	}
154
155	sysinfo.procs = 20; /* Hack */
156
157	return copyout(&sysinfo, (caddr_t)args->info, sizeof(sysinfo));
158}
159#endif /*!__alpha__*/
160
161#ifndef __alpha__
162int
163linux_alarm(struct thread *td, struct linux_alarm_args *args)
164{
165	struct itimerval it, old_it;
166	struct timeval tv;
167	int s;
168
169#ifdef DEBUG
170	if (ldebug(alarm))
171		printf(ARGS(alarm, "%u"), args->secs);
172#endif
173
174	if (args->secs > 100000000)
175		return EINVAL;
176
177	it.it_value.tv_sec = (long)args->secs;
178	it.it_value.tv_usec = 0;
179	it.it_interval.tv_sec = 0;
180	it.it_interval.tv_usec = 0;
181	s = splsoftclock();
182	old_it = td->td_proc->p_realtimer;
183	getmicrouptime(&tv);
184	if (timevalisset(&old_it.it_value))
185		callout_stop(&td->td_proc->p_itcallout);
186	if (it.it_value.tv_sec != 0) {
187		callout_reset(&td->td_proc->p_itcallout, tvtohz(&it.it_value),
188		    realitexpire, td->td_proc);
189		timevaladd(&it.it_value, &tv);
190	}
191	td->td_proc->p_realtimer = it;
192	splx(s);
193	if (timevalcmp(&old_it.it_value, &tv, >)) {
194		timevalsub(&old_it.it_value, &tv);
195		if (old_it.it_value.tv_usec != 0)
196			old_it.it_value.tv_sec++;
197		td->td_retval[0] = old_it.it_value.tv_sec;
198	}
199	return 0;
200}
201#endif /*!__alpha__*/
202
203int
204linux_brk(struct thread *td, struct linux_brk_args *args)
205{
206	struct vmspace *vm = td->td_proc->p_vmspace;
207	vm_offset_t new, old;
208	struct obreak_args /* {
209		char * nsize;
210	} */ tmp;
211
212#ifdef DEBUG
213	if (ldebug(brk))
214		printf(ARGS(brk, "%p"), (void *)args->dsend);
215#endif
216	old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
217	new = (vm_offset_t)args->dsend;
218	tmp.nsize = (char *) new;
219	if (((caddr_t)new > vm->vm_daddr) && !obreak(td, &tmp))
220		td->td_retval[0] = (long)new;
221	else
222		td->td_retval[0] = (long)old;
223
224	return 0;
225}
226
227int
228linux_uselib(struct thread *td, struct linux_uselib_args *args)
229{
230	struct nameidata ni;
231	struct vnode *vp;
232	struct exec *a_out;
233	struct vattr attr;
234	vm_offset_t vmaddr;
235	unsigned long file_offset;
236	vm_offset_t buffer;
237	unsigned long bss_size;
238	char *library;
239	int error;
240	int locked;
241
242	LCONVPATHEXIST(td, args->library, &library);
243
244#ifdef DEBUG
245	if (ldebug(uselib))
246		printf(ARGS(uselib, "%s"), library);
247#endif
248
249	a_out = NULL;
250	locked = 0;
251	vp = NULL;
252
253	/*
254	 * XXX: This code should make use of vn_open(), rather than doing
255	 * all this stuff itself.
256	 */
257	NDINIT(&ni, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, library, td);
258	error = namei(&ni);
259	LFREEPATH(library);
260	if (error)
261		goto cleanup;
262
263	vp = ni.ni_vp;
264	/*
265	 * XXX - This looks like a bogus check. A LOCKLEAF namei should not
266	 * succeed without returning a vnode.
267	 */
268	if (vp == NULL) {
269		error = ENOEXEC;	/* ?? */
270		goto cleanup;
271	}
272	NDFREE(&ni, NDF_ONLY_PNBUF);
273
274	/*
275	 * From here on down, we have a locked vnode that must be unlocked.
276	 */
277	locked++;
278
279	/* Writable? */
280	if (vp->v_writecount) {
281		error = ETXTBSY;
282		goto cleanup;
283	}
284
285	/* Executable? */
286	error = VOP_GETATTR(vp, &attr, td->td_ucred, td);
287	if (error)
288		goto cleanup;
289
290	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
291	    ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
292		error = ENOEXEC;
293		goto cleanup;
294	}
295
296	/* Sensible size? */
297	if (attr.va_size == 0) {
298		error = ENOEXEC;
299		goto cleanup;
300	}
301
302	/* Can we access it? */
303	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
304	if (error)
305		goto cleanup;
306
307	/*
308	 * XXX: This should use vn_open() so that it is properly authorized,
309	 * and to reduce code redundancy all over the place here.
310	 */
311#ifdef MAC
312	error = mac_check_vnode_open(td->td_ucred, vp, FREAD);
313	if (error)
314		goto cleanup;
315#endif
316	error = VOP_OPEN(vp, FREAD, td->td_ucred, td);
317	if (error)
318		goto cleanup;
319
320	/*
321	 * Lock no longer needed
322	 */
323	VOP_UNLOCK(vp, 0, td);
324	locked = 0;
325
326	/* Pull in executable header into kernel_map */
327	error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
328	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
329	if (error)
330		goto cleanup;
331
332	/* Is it a Linux binary ? */
333	if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
334		error = ENOEXEC;
335		goto cleanup;
336	}
337
338	/*
339	 * While we are here, we should REALLY do some more checks
340	 */
341
342	/* Set file/virtual offset based on a.out variant. */
343	switch ((int)(a_out->a_magic & 0xffff)) {
344	case 0413:	/* ZMAGIC */
345		file_offset = 1024;
346		break;
347	case 0314:	/* QMAGIC */
348		file_offset = 0;
349		break;
350	default:
351		error = ENOEXEC;
352		goto cleanup;
353	}
354
355	bss_size = round_page(a_out->a_bss);
356
357	/* Check various fields in header for validity/bounds. */
358	if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
359		error = ENOEXEC;
360		goto cleanup;
361	}
362
363	/* text + data can't exceed file size */
364	if (a_out->a_data + a_out->a_text > attr.va_size) {
365		error = EFAULT;
366		goto cleanup;
367	}
368
369	/* To protect td->td_proc->p_rlimit in the if condition. */
370	mtx_assert(&Giant, MA_OWNED);
371
372	/*
373	 * text/data/bss must not exceed limits
374	 * XXX - this is not complete. it should check current usage PLUS
375	 * the resources needed by this library.
376	 */
377	if (a_out->a_text > maxtsiz ||
378	    a_out->a_data + bss_size >
379	    td->td_proc->p_rlimit[RLIMIT_DATA].rlim_cur) {
380		error = ENOMEM;
381		goto cleanup;
382	}
383
384	mp_fixme("Unlocked vflags access.");
385	/* prevent more writers */
386	vp->v_vflag |= VV_TEXT;
387
388	/*
389	 * Check if file_offset page aligned. Currently we cannot handle
390	 * misalinged file offsets, and so we read in the entire image
391	 * (what a waste).
392	 */
393	if (file_offset & PAGE_MASK) {
394#ifdef DEBUG
395		printf("uselib: Non page aligned binary %lu\n", file_offset);
396#endif
397		/* Map text+data read/write/execute */
398
399		/* a_entry is the load address and is page aligned */
400		vmaddr = trunc_page(a_out->a_entry);
401
402		/* get anon user mapping, read+write+execute */
403		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
404		    &vmaddr, a_out->a_text + a_out->a_data, FALSE, VM_PROT_ALL,
405		    VM_PROT_ALL, 0);
406		if (error)
407			goto cleanup;
408
409		/* map file into kernel_map */
410		error = vm_mmap(kernel_map, &buffer,
411		    round_page(a_out->a_text + a_out->a_data + file_offset),
412		    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp,
413		    trunc_page(file_offset));
414		if (error)
415			goto cleanup;
416
417		/* copy from kernel VM space to user space */
418		error = copyout((caddr_t)(uintptr_t)(buffer + file_offset),
419		    (caddr_t)vmaddr, a_out->a_text + a_out->a_data);
420
421		/* release temporary kernel space */
422		vm_map_remove(kernel_map, buffer, buffer +
423		    round_page(a_out->a_text + a_out->a_data + file_offset));
424
425		if (error)
426			goto cleanup;
427	} else {
428#ifdef DEBUG
429		printf("uselib: Page aligned binary %lu\n", file_offset);
430#endif
431		/*
432		 * for QMAGIC, a_entry is 20 bytes beyond the load address
433		 * to skip the executable header
434		 */
435		vmaddr = trunc_page(a_out->a_entry);
436
437		/*
438		 * Map it all into the process's space as a single
439		 * copy-on-write "data" segment.
440		 */
441		error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr,
442		    a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
443		    MAP_PRIVATE | MAP_FIXED, (caddr_t)vp, file_offset);
444		if (error)
445			goto cleanup;
446	}
447#ifdef DEBUG
448	printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long*)vmaddr)[0],
449	    ((long*)vmaddr)[1]);
450#endif
451	if (bss_size != 0) {
452		/* Calculate BSS start address */
453		vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
454		    a_out->a_data;
455
456		/* allocate some 'anon' space */
457		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
458		    &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0);
459		if (error)
460			goto cleanup;
461	}
462
463cleanup:
464	/* Unlock vnode if needed */
465	if (locked)
466		VOP_UNLOCK(vp, 0, td);
467
468	/* Release the kernel mapping. */
469	if (a_out)
470		vm_map_remove(kernel_map, (vm_offset_t)a_out,
471		    (vm_offset_t)a_out + PAGE_SIZE);
472
473	return error;
474}
475
476int
477linux_select(struct thread *td, struct linux_select_args *args)
478{
479	struct timeval tv0, tv1, utv, *tvp;
480	int error;
481
482#ifdef DEBUG
483	if (ldebug(select))
484		printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds,
485		    (void *)args->readfds, (void *)args->writefds,
486		    (void *)args->exceptfds, (void *)args->timeout);
487#endif
488
489	/*
490	 * Store current time for computation of the amount of
491	 * time left.
492	 */
493	if (args->timeout) {
494		if ((error = copyin((caddr_t)args->timeout, &utv,
495		    sizeof(utv))))
496			goto select_out;
497#ifdef DEBUG
498		if (ldebug(select))
499			printf(LMSG("incoming timeout (%ld/%ld)"),
500			    utv.tv_sec, utv.tv_usec);
501#endif
502
503		if (itimerfix(&utv)) {
504			/*
505			 * The timeval was invalid.  Convert it to something
506			 * valid that will act as it does under Linux.
507			 */
508			utv.tv_sec += utv.tv_usec / 1000000;
509			utv.tv_usec %= 1000000;
510			if (utv.tv_usec < 0) {
511				utv.tv_sec -= 1;
512				utv.tv_usec += 1000000;
513			}
514			if (utv.tv_sec < 0)
515				timevalclear(&utv);
516		}
517		microtime(&tv0);
518		tvp = &utv;
519	} else
520		tvp = NULL;
521
522	error = kern_select(td, args->nfds, args->readfds, args->writefds,
523	    args->exceptfds, tvp);
524
525#ifdef DEBUG
526	if (ldebug(select))
527		printf(LMSG("real select returns %d"), error);
528#endif
529	if (error) {
530		/*
531		 * See fs/select.c in the Linux kernel.  Without this,
532		 * Maelstrom doesn't work.
533		 */
534		if (error == ERESTART)
535			error = EINTR;
536		goto select_out;
537	}
538
539	if (args->timeout) {
540		if (td->td_retval[0]) {
541			/*
542			 * Compute how much time was left of the timeout,
543			 * by subtracting the current time and the time
544			 * before we started the call, and subtracting
545			 * that result from the user-supplied value.
546			 */
547			microtime(&tv1);
548			timevalsub(&tv1, &tv0);
549			timevalsub(&utv, &tv1);
550			if (utv.tv_sec < 0)
551				timevalclear(&utv);
552		} else
553			timevalclear(&utv);
554#ifdef DEBUG
555		if (ldebug(select))
556			printf(LMSG("outgoing timeout (%ld/%ld)"),
557			    utv.tv_sec, utv.tv_usec);
558#endif
559		if ((error = copyout(&utv, (caddr_t)args->timeout,
560		    sizeof(utv))))
561			goto select_out;
562	}
563
564select_out:
565#ifdef DEBUG
566	if (ldebug(select))
567		printf(LMSG("select_out -> %d"), error);
568#endif
569	return error;
570}
571
572int
573linux_mremap(struct thread *td, struct linux_mremap_args *args)
574{
575	struct munmap_args /* {
576		void *addr;
577		size_t len;
578	} */ bsd_args;
579	int error = 0;
580
581#ifdef DEBUG
582	if (ldebug(mremap))
583		printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"),
584		    (void *)args->addr,
585		    (unsigned long)args->old_len,
586		    (unsigned long)args->new_len,
587		    (unsigned long)args->flags);
588#endif
589	args->new_len = round_page(args->new_len);
590	args->old_len = round_page(args->old_len);
591
592	if (args->new_len > args->old_len) {
593		td->td_retval[0] = 0;
594		return ENOMEM;
595	}
596
597	if (args->new_len < args->old_len) {
598		bsd_args.addr = (caddr_t)(args->addr + args->new_len);
599		bsd_args.len = args->old_len - args->new_len;
600		error = munmap(td, &bsd_args);
601	}
602
603	td->td_retval[0] = error ? 0 : (u_long)args->addr;
604	return error;
605}
606
607int
608linux_msync(struct thread *td, struct linux_msync_args *args)
609{
610	struct msync_args bsd_args;
611
612	bsd_args.addr = (caddr_t)args->addr;
613	bsd_args.len = args->len;
614	bsd_args.flags = 0;	/* XXX ignore */
615
616	return msync(td, &bsd_args);
617}
618
619#ifndef __alpha__
620int
621linux_time(struct thread *td, struct linux_time_args *args)
622{
623	struct timeval tv;
624	l_time_t tm;
625	int error;
626
627#ifdef DEBUG
628	if (ldebug(time))
629		printf(ARGS(time, "*"));
630#endif
631
632	microtime(&tv);
633	tm = tv.tv_sec;
634	if (args->tm && (error = copyout(&tm, (caddr_t)args->tm, sizeof(tm))))
635		return error;
636	td->td_retval[0] = tm;
637	return 0;
638}
639#endif	/*!__alpha__*/
640
641struct l_times_argv {
642	l_long		tms_utime;
643	l_long		tms_stime;
644	l_long		tms_cutime;
645	l_long		tms_cstime;
646};
647
648#ifdef __alpha__
649#define CLK_TCK 1024	/* Linux uses 1024 on alpha */
650#else
651#define CLK_TCK 100	/* Linux uses 100 */
652#endif
653
654#define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
655
656int
657linux_times(struct thread *td, struct linux_times_args *args)
658{
659	struct timeval tv;
660	struct l_times_argv tms;
661	struct rusage ru;
662	int error;
663
664#ifdef DEBUG
665	if (ldebug(times))
666		printf(ARGS(times, "*"));
667#endif
668
669	mtx_lock_spin(&sched_lock);
670	calcru(td->td_proc, &ru.ru_utime, &ru.ru_stime, NULL);
671	mtx_unlock_spin(&sched_lock);
672
673	tms.tms_utime = CONVTCK(ru.ru_utime);
674	tms.tms_stime = CONVTCK(ru.ru_stime);
675
676	tms.tms_cutime = CONVTCK(td->td_proc->p_stats->p_cru.ru_utime);
677	tms.tms_cstime = CONVTCK(td->td_proc->p_stats->p_cru.ru_stime);
678
679	if ((error = copyout(&tms, (caddr_t)args->buf, sizeof(tms))))
680		return error;
681
682	microuptime(&tv);
683	td->td_retval[0] = (int)CONVTCK(tv);
684	return 0;
685}
686
687int
688linux_newuname(struct thread *td, struct linux_newuname_args *args)
689{
690	struct l_new_utsname utsname;
691	char osname[LINUX_MAX_UTSNAME];
692	char osrelease[LINUX_MAX_UTSNAME];
693
694#ifdef DEBUG
695	if (ldebug(newuname))
696		printf(ARGS(newuname, "*"));
697#endif
698
699	linux_get_osname(td->td_proc, osname);
700	linux_get_osrelease(td->td_proc, osrelease);
701
702	bzero(&utsname, sizeof(utsname));
703	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
704	getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME-1);
705	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
706	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
707	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
708	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
709
710	return (copyout(&utsname, (caddr_t)args->buf, sizeof(utsname)));
711}
712
713#if defined(__i386__)
714struct l_utimbuf {
715	l_time_t l_actime;
716	l_time_t l_modtime;
717};
718
719int
720linux_utime(struct thread *td, struct linux_utime_args *args)
721{
722	struct timeval tv[2], *tvp;
723	struct l_utimbuf lut;
724	char *fname;
725	int error;
726
727	LCONVPATHEXIST(td, args->fname, &fname);
728
729#ifdef DEBUG
730	if (ldebug(utime))
731		printf(ARGS(utime, "%s, *"), fname);
732#endif
733
734	if (args->times) {
735		if ((error = copyin((caddr_t)args->times, &lut, sizeof lut))) {
736			LFREEPATH(fname);
737			return error;
738		}
739		tv[0].tv_sec = lut.l_actime;
740		tv[0].tv_usec = 0;
741		tv[1].tv_sec = lut.l_modtime;
742		tv[1].tv_usec = 0;
743		tvp = tv;
744	} else
745		tvp = NULL;
746
747	error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE);
748	LFREEPATH(fname);
749	return (error);
750}
751#endif /* __i386__ */
752
753#define __WCLONE 0x80000000
754
755#ifndef __alpha__
756int
757linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
758{
759	struct wait_args /* {
760		int pid;
761		int *status;
762		int options;
763		struct	rusage *rusage;
764	} */ tmp;
765	int error, tmpstat;
766
767#ifdef DEBUG
768	if (ldebug(waitpid))
769		printf(ARGS(waitpid, "%d, %p, %d"),
770		    args->pid, (void *)args->status, args->options);
771#endif
772
773	tmp.pid = args->pid;
774	tmp.status = args->status;
775	tmp.options = (args->options & (WNOHANG | WUNTRACED));
776	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
777	if (args->options & __WCLONE)
778		tmp.options |= WLINUXCLONE;
779	tmp.rusage = NULL;
780
781	if ((error = wait4(td, &tmp)) != 0)
782		return error;
783
784	if (args->status) {
785		if ((error = copyin((caddr_t)args->status, &tmpstat,
786		    sizeof(int))) != 0)
787			return error;
788		tmpstat &= 0xffff;
789		if (WIFSIGNALED(tmpstat))
790			tmpstat = (tmpstat & 0xffffff80) |
791			    BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
792		else if (WIFSTOPPED(tmpstat))
793			tmpstat = (tmpstat & 0xffff00ff) |
794			    (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
795		return copyout(&tmpstat, (caddr_t)args->status, sizeof(int));
796	}
797
798	return 0;
799}
800#endif	/*!__alpha__*/
801
802int
803linux_wait4(struct thread *td, struct linux_wait4_args *args)
804{
805	struct wait_args /* {
806		int pid;
807		int *status;
808		int options;
809		struct	rusage *rusage;
810	} */ tmp;
811	int error, tmpstat;
812
813#ifdef DEBUG
814	if (ldebug(wait4))
815		printf(ARGS(wait4, "%d, %p, %d, %p"),
816		    args->pid, (void *)args->status, args->options,
817		    (void *)args->rusage);
818#endif
819
820	tmp.pid = args->pid;
821	tmp.status = args->status;
822	tmp.options = (args->options & (WNOHANG | WUNTRACED));
823	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
824	if (args->options & __WCLONE)
825		tmp.options |= WLINUXCLONE;
826	tmp.rusage = (struct rusage *)args->rusage;
827
828	if ((error = wait4(td, &tmp)) != 0)
829		return error;
830
831	SIGDELSET(td->td_proc->p_siglist, SIGCHLD);
832
833	if (args->status) {
834		if ((error = copyin((caddr_t)args->status, &tmpstat,
835		    sizeof(int))) != 0)
836			return error;
837		tmpstat &= 0xffff;
838		if (WIFSIGNALED(tmpstat))
839			tmpstat = (tmpstat & 0xffffff80) |
840			    BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
841		else if (WIFSTOPPED(tmpstat))
842			tmpstat = (tmpstat & 0xffff00ff) |
843			    (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
844		return copyout(&tmpstat, (caddr_t)args->status, sizeof(int));
845	}
846
847	return 0;
848}
849
850int
851linux_mknod(struct thread *td, struct linux_mknod_args *args)
852{
853	char *path;
854	int error;
855
856	LCONVPATHCREAT(td, args->path, &path);
857
858#ifdef DEBUG
859	if (ldebug(mknod))
860		printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev);
861#endif
862
863	if (args->mode & S_IFIFO)
864		error = kern_mkfifo(td, path, UIO_SYSSPACE, args->mode);
865	else
866		error = kern_mknod(td, path, UIO_SYSSPACE, args->mode,
867		    args->dev);
868	LFREEPATH(path);
869	return (error);
870}
871
872/*
873 * UGH! This is just about the dumbest idea I've ever heard!!
874 */
875int
876linux_personality(struct thread *td, struct linux_personality_args *args)
877{
878#ifdef DEBUG
879	if (ldebug(personality))
880		printf(ARGS(personality, "%d"), args->per);
881#endif
882#ifndef __alpha__
883	if (args->per != 0)
884		return EINVAL;
885#endif
886
887	/* Yes Jim, it's still a Linux... */
888	td->td_retval[0] = 0;
889	return 0;
890}
891
892/*
893 * Wrappers for get/setitimer for debugging..
894 */
895int
896linux_setitimer(struct thread *td, struct linux_setitimer_args *args)
897{
898	struct setitimer_args bsa;
899	struct itimerval foo;
900	int error;
901
902#ifdef DEBUG
903	if (ldebug(setitimer))
904		printf(ARGS(setitimer, "%p, %p"),
905		    (void *)args->itv, (void *)args->oitv);
906#endif
907	bsa.which = args->which;
908	bsa.itv = (struct itimerval *)args->itv;
909	bsa.oitv = (struct itimerval *)args->oitv;
910	if (args->itv) {
911	    if ((error = copyin((caddr_t)args->itv, &foo, sizeof(foo))))
912		return error;
913#ifdef DEBUG
914	    if (ldebug(setitimer)) {
915	        printf("setitimer: value: sec: %ld, usec: %ld\n",
916		    foo.it_value.tv_sec, foo.it_value.tv_usec);
917	        printf("setitimer: interval: sec: %ld, usec: %ld\n",
918		    foo.it_interval.tv_sec, foo.it_interval.tv_usec);
919	    }
920#endif
921	}
922	return setitimer(td, &bsa);
923}
924
925int
926linux_getitimer(struct thread *td, struct linux_getitimer_args *args)
927{
928	struct getitimer_args bsa;
929#ifdef DEBUG
930	if (ldebug(getitimer))
931		printf(ARGS(getitimer, "%p"), (void *)args->itv);
932#endif
933	bsa.which = args->which;
934	bsa.itv = (struct itimerval *)args->itv;
935	return getitimer(td, &bsa);
936}
937
938#ifndef __alpha__
939int
940linux_nice(struct thread *td, struct linux_nice_args *args)
941{
942	struct setpriority_args	bsd_args;
943
944	bsd_args.which = PRIO_PROCESS;
945	bsd_args.who = 0;	/* current process */
946	bsd_args.prio = args->inc;
947	return setpriority(td, &bsd_args);
948}
949#endif	/*!__alpha__*/
950
951int
952linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
953{
954	struct ucred *newcred, *oldcred;
955	l_gid_t linux_gidset[NGROUPS];
956	gid_t *bsd_gidset;
957	int ngrp, error;
958	struct proc *p;
959
960	ngrp = args->gidsetsize;
961	if (ngrp >= NGROUPS)
962		return (EINVAL);
963	error = copyin((caddr_t)args->grouplist, linux_gidset,
964	    ngrp * sizeof(l_gid_t));
965	if (error)
966		return (error);
967	newcred = crget();
968	p = td->td_proc;
969	PROC_LOCK(p);
970	oldcred = p->p_ucred;
971
972	/*
973	 * cr_groups[0] holds egid. Setting the whole set from
974	 * the supplied set will cause egid to be changed too.
975	 * Keep cr_groups[0] unchanged to prevent that.
976	 */
977
978	if ((error = suser_cred(oldcred, PRISON_ROOT)) != 0) {
979		PROC_UNLOCK(p);
980		crfree(newcred);
981		return (error);
982	}
983
984	crcopy(newcred, oldcred);
985	if (ngrp > 0) {
986		newcred->cr_ngroups = ngrp + 1;
987
988		bsd_gidset = newcred->cr_groups;
989		ngrp--;
990		while (ngrp >= 0) {
991			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
992			ngrp--;
993		}
994	}
995	else
996		newcred->cr_ngroups = 1;
997
998	setsugid(p);
999	p->p_ucred = newcred;
1000	PROC_UNLOCK(p);
1001	crfree(oldcred);
1002	return (0);
1003}
1004
1005int
1006linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
1007{
1008	struct ucred *cred;
1009	l_gid_t linux_gidset[NGROUPS];
1010	gid_t *bsd_gidset;
1011	int bsd_gidsetsz, ngrp, error;
1012
1013	cred = td->td_ucred;
1014	bsd_gidset = cred->cr_groups;
1015	bsd_gidsetsz = cred->cr_ngroups - 1;
1016
1017	/*
1018	 * cr_groups[0] holds egid. Returning the whole set
1019	 * here will cause a duplicate. Exclude cr_groups[0]
1020	 * to prevent that.
1021	 */
1022
1023	if ((ngrp = args->gidsetsize) == 0) {
1024		td->td_retval[0] = bsd_gidsetsz;
1025		return (0);
1026	}
1027
1028	if (ngrp < bsd_gidsetsz)
1029		return (EINVAL);
1030
1031	ngrp = 0;
1032	while (ngrp < bsd_gidsetsz) {
1033		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1034		ngrp++;
1035	}
1036
1037	if ((error = copyout(linux_gidset, (caddr_t)args->grouplist,
1038	    ngrp * sizeof(l_gid_t))))
1039		return (error);
1040
1041	td->td_retval[0] = ngrp;
1042	return (0);
1043}
1044
1045#ifndef __alpha__
1046int
1047linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
1048{
1049	struct rlimit bsd_rlim;
1050	struct l_rlimit rlim;
1051	u_int which;
1052	int error;
1053
1054#ifdef DEBUG
1055	if (ldebug(setrlimit))
1056		printf(ARGS(setrlimit, "%d, %p"),
1057		    args->resource, (void *)args->rlim);
1058#endif
1059
1060	if (args->resource >= LINUX_RLIM_NLIMITS)
1061		return (EINVAL);
1062
1063	which = linux_to_bsd_resource[args->resource];
1064	if (which == -1)
1065		return (EINVAL);
1066
1067	error = copyin((caddr_t)args->rlim, &rlim, sizeof(rlim));
1068	if (error)
1069		return (error);
1070
1071	bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur;
1072	bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max;
1073	return (dosetrlimit(td, which, &bsd_rlim));
1074}
1075
1076int
1077linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
1078{
1079	struct l_rlimit rlim;
1080	struct proc *p = td->td_proc;
1081	struct rlimit *bsd_rlp;
1082	u_int which;
1083
1084#ifdef DEBUG
1085	if (ldebug(old_getrlimit))
1086		printf(ARGS(old_getrlimit, "%d, %p"),
1087		    args->resource, (void *)args->rlim);
1088#endif
1089
1090	if (args->resource >= LINUX_RLIM_NLIMITS)
1091		return (EINVAL);
1092
1093	which = linux_to_bsd_resource[args->resource];
1094	if (which == -1)
1095		return (EINVAL);
1096	bsd_rlp = &p->p_rlimit[which];
1097
1098	rlim.rlim_cur = (unsigned long)bsd_rlp->rlim_cur;
1099	if (rlim.rlim_cur == ULONG_MAX)
1100		rlim.rlim_cur = LONG_MAX;
1101	rlim.rlim_max = (unsigned long)bsd_rlp->rlim_max;
1102	if (rlim.rlim_max == ULONG_MAX)
1103		rlim.rlim_max = LONG_MAX;
1104	return (copyout(&rlim, (caddr_t)args->rlim, sizeof(rlim)));
1105}
1106
1107int
1108linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
1109{
1110	struct l_rlimit rlim;
1111	struct proc *p = td->td_proc;
1112	struct rlimit *bsd_rlp;
1113	u_int which;
1114
1115#ifdef DEBUG
1116	if (ldebug(getrlimit))
1117		printf(ARGS(getrlimit, "%d, %p"),
1118		    args->resource, (void *)args->rlim);
1119#endif
1120
1121	if (args->resource >= LINUX_RLIM_NLIMITS)
1122		return (EINVAL);
1123
1124	which = linux_to_bsd_resource[args->resource];
1125	if (which == -1)
1126		return (EINVAL);
1127	bsd_rlp = &p->p_rlimit[which];
1128
1129	rlim.rlim_cur = (l_ulong)bsd_rlp->rlim_cur;
1130	rlim.rlim_max = (l_ulong)bsd_rlp->rlim_max;
1131	return (copyout(&rlim, (caddr_t)args->rlim, sizeof(rlim)));
1132}
1133#endif /*!__alpha__*/
1134
1135int
1136linux_sched_setscheduler(struct thread *td,
1137    struct linux_sched_setscheduler_args *args)
1138{
1139	struct sched_setscheduler_args bsd;
1140
1141#ifdef DEBUG
1142	if (ldebug(sched_setscheduler))
1143		printf(ARGS(sched_setscheduler, "%d, %d, %p"),
1144		    args->pid, args->policy, (const void *)args->param);
1145#endif
1146
1147	switch (args->policy) {
1148	case LINUX_SCHED_OTHER:
1149		bsd.policy = SCHED_OTHER;
1150		break;
1151	case LINUX_SCHED_FIFO:
1152		bsd.policy = SCHED_FIFO;
1153		break;
1154	case LINUX_SCHED_RR:
1155		bsd.policy = SCHED_RR;
1156		break;
1157	default:
1158		return EINVAL;
1159	}
1160
1161	bsd.pid = args->pid;
1162	bsd.param = (struct sched_param *)args->param;
1163	return sched_setscheduler(td, &bsd);
1164}
1165
1166int
1167linux_sched_getscheduler(struct thread *td,
1168    struct linux_sched_getscheduler_args *args)
1169{
1170	struct sched_getscheduler_args bsd;
1171	int error;
1172
1173#ifdef DEBUG
1174	if (ldebug(sched_getscheduler))
1175		printf(ARGS(sched_getscheduler, "%d"), args->pid);
1176#endif
1177
1178	bsd.pid = args->pid;
1179	error = sched_getscheduler(td, &bsd);
1180
1181	switch (td->td_retval[0]) {
1182	case SCHED_OTHER:
1183		td->td_retval[0] = LINUX_SCHED_OTHER;
1184		break;
1185	case SCHED_FIFO:
1186		td->td_retval[0] = LINUX_SCHED_FIFO;
1187		break;
1188	case SCHED_RR:
1189		td->td_retval[0] = LINUX_SCHED_RR;
1190		break;
1191	}
1192
1193	return error;
1194}
1195
1196int
1197linux_sched_get_priority_max(struct thread *td,
1198    struct linux_sched_get_priority_max_args *args)
1199{
1200	struct sched_get_priority_max_args bsd;
1201
1202#ifdef DEBUG
1203	if (ldebug(sched_get_priority_max))
1204		printf(ARGS(sched_get_priority_max, "%d"), args->policy);
1205#endif
1206
1207	switch (args->policy) {
1208	case LINUX_SCHED_OTHER:
1209		bsd.policy = SCHED_OTHER;
1210		break;
1211	case LINUX_SCHED_FIFO:
1212		bsd.policy = SCHED_FIFO;
1213		break;
1214	case LINUX_SCHED_RR:
1215		bsd.policy = SCHED_RR;
1216		break;
1217	default:
1218		return EINVAL;
1219	}
1220	return sched_get_priority_max(td, &bsd);
1221}
1222
1223int
1224linux_sched_get_priority_min(struct thread *td,
1225    struct linux_sched_get_priority_min_args *args)
1226{
1227	struct sched_get_priority_min_args bsd;
1228
1229#ifdef DEBUG
1230	if (ldebug(sched_get_priority_min))
1231		printf(ARGS(sched_get_priority_min, "%d"), args->policy);
1232#endif
1233
1234	switch (args->policy) {
1235	case LINUX_SCHED_OTHER:
1236		bsd.policy = SCHED_OTHER;
1237		break;
1238	case LINUX_SCHED_FIFO:
1239		bsd.policy = SCHED_FIFO;
1240		break;
1241	case LINUX_SCHED_RR:
1242		bsd.policy = SCHED_RR;
1243		break;
1244	default:
1245		return EINVAL;
1246	}
1247	return sched_get_priority_min(td, &bsd);
1248}
1249
1250#define REBOOT_CAD_ON	0x89abcdef
1251#define REBOOT_CAD_OFF	0
1252#define REBOOT_HALT	0xcdef0123
1253
1254int
1255linux_reboot(struct thread *td, struct linux_reboot_args *args)
1256{
1257	struct reboot_args bsd_args;
1258
1259#ifdef DEBUG
1260	if (ldebug(reboot))
1261		printf(ARGS(reboot, "0x%x"), args->cmd);
1262#endif
1263	if (args->cmd == REBOOT_CAD_ON || args->cmd == REBOOT_CAD_OFF)
1264		return (0);
1265	bsd_args.opt = (args->cmd == REBOOT_HALT) ? RB_HALT : 0;
1266	return (reboot(td, &bsd_args));
1267}
1268
1269#ifndef __alpha__
1270
1271/*
1272 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify
1273 * td->td_retval[1] when COMPAT_43 or COMPAT_SUNOS is defined. This
1274 * globbers registers that are assumed to be preserved. The following
1275 * lightweight syscalls fixes this. See also linux_getgid16() and
1276 * linux_getuid16() in linux_uid16.c.
1277 *
1278 * linux_getpid() - MP SAFE
1279 * linux_getgid() - MP SAFE
1280 * linux_getuid() - MP SAFE
1281 */
1282
1283int
1284linux_getpid(struct thread *td, struct linux_getpid_args *args)
1285{
1286
1287	td->td_retval[0] = td->td_proc->p_pid;
1288	return (0);
1289}
1290
1291int
1292linux_getgid(struct thread *td, struct linux_getgid_args *args)
1293{
1294
1295	td->td_retval[0] = td->td_ucred->cr_rgid;
1296	return (0);
1297}
1298
1299int
1300linux_getuid(struct thread *td, struct linux_getuid_args *args)
1301{
1302
1303	td->td_retval[0] = td->td_ucred->cr_ruid;
1304	return (0);
1305}
1306
1307#endif /*!__alpha__*/
1308
1309int
1310linux_getsid(struct thread *td, struct linux_getsid_args *args)
1311{
1312	struct getsid_args bsd;
1313	bsd.pid = args->pid;
1314	return getsid(td, &bsd);
1315}
1316