linux_misc.c revision 301051
1/*- 2 * Copyright (c) 2002 Doug Rabson 3 * Copyright (c) 1994-1995 S��ren Schmidt 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer 11 * in this position and unchanged. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: releng/10.2/sys/compat/linux/linux_misc.c 301051 2016-05-31 16:55:45Z glebius $"); 32 33#include "opt_compat.h" 34#include "opt_kdtrace.h" 35 36#include <sys/param.h> 37#include <sys/blist.h> 38#include <sys/fcntl.h> 39#if defined(__i386__) 40#include <sys/imgact_aout.h> 41#endif 42#include <sys/jail.h> 43#include <sys/kernel.h> 44#include <sys/limits.h> 45#include <sys/lock.h> 46#include <sys/malloc.h> 47#include <sys/mman.h> 48#include <sys/mount.h> 49#include <sys/mutex.h> 50#include <sys/namei.h> 51#include <sys/priv.h> 52#include <sys/proc.h> 53#include <sys/reboot.h> 54#include <sys/racct.h> 55#include <sys/resourcevar.h> 56#include <sys/sched.h> 57#include <sys/sdt.h> 58#include <sys/signalvar.h> 59#include <sys/stat.h> 60#include <sys/syscallsubr.h> 61#include <sys/sysctl.h> 62#include <sys/sysproto.h> 63#include <sys/systm.h> 64#include <sys/time.h> 65#include <sys/vmmeter.h> 66#include <sys/vnode.h> 67#include <sys/wait.h> 68#include <sys/cpuset.h> 69 70#include <security/mac/mac_framework.h> 71 72#include <vm/vm.h> 73#include <vm/pmap.h> 74#include <vm/vm_kern.h> 75#include <vm/vm_map.h> 76#include <vm/vm_extern.h> 77#include <vm/vm_object.h> 78#include <vm/swap_pager.h> 79 80#ifdef COMPAT_LINUX32 81#include <machine/../linux32/linux.h> 82#include <machine/../linux32/linux32_proto.h> 83#else 84#include <machine/../linux/linux.h> 85#include <machine/../linux/linux_proto.h> 86#endif 87 88#include <compat/linux/linux_dtrace.h> 89#include <compat/linux/linux_file.h> 90#include <compat/linux/linux_mib.h> 91#include <compat/linux/linux_signal.h> 92#include <compat/linux/linux_util.h> 93#include <compat/linux/linux_sysproto.h> 94#include <compat/linux/linux_emul.h> 95#include <compat/linux/linux_misc.h> 96 97/* DTrace init */ 98LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); 99 100/* Linuxulator-global DTrace probes */ 101LIN_SDT_PROBE_DECLARE(locks, emul_lock, locked); 102LIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock); 103LIN_SDT_PROBE_DECLARE(locks, emul_shared_rlock, locked); 104LIN_SDT_PROBE_DECLARE(locks, emul_shared_rlock, unlock); 105LIN_SDT_PROBE_DECLARE(locks, emul_shared_wlock, locked); 106LIN_SDT_PROBE_DECLARE(locks, emul_shared_wlock, unlock); 107 108int stclohz; /* Statistics clock frequency */ 109 110static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 111 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 112 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 113 RLIMIT_MEMLOCK, RLIMIT_AS 114}; 115 116struct l_sysinfo { 117 l_long uptime; /* Seconds since boot */ 118 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 119#define LINUX_SYSINFO_LOADS_SCALE 65536 120 l_ulong totalram; /* Total usable main memory size */ 121 l_ulong freeram; /* Available memory size */ 122 l_ulong sharedram; /* Amount of shared memory */ 123 l_ulong bufferram; /* Memory used by buffers */ 124 l_ulong totalswap; /* Total swap space size */ 125 l_ulong freeswap; /* swap space still available */ 126 l_ushort procs; /* Number of current processes */ 127 l_ushort pads; 128 l_ulong totalbig; 129 l_ulong freebig; 130 l_uint mem_unit; 131 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 132}; 133int 134linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 135{ 136 struct l_sysinfo sysinfo; 137 vm_object_t object; 138 int i, j; 139 struct timespec ts; 140 141 bzero(&sysinfo, sizeof(sysinfo)); 142 getnanouptime(&ts); 143 if (ts.tv_nsec != 0) 144 ts.tv_sec++; 145 sysinfo.uptime = ts.tv_sec; 146 147 /* Use the information from the mib to get our load averages */ 148 for (i = 0; i < 3; i++) 149 sysinfo.loads[i] = averunnable.ldavg[i] * 150 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 151 152 sysinfo.totalram = physmem * PAGE_SIZE; 153 sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE; 154 155 sysinfo.sharedram = 0; 156 mtx_lock(&vm_object_list_mtx); 157 TAILQ_FOREACH(object, &vm_object_list, object_list) 158 if (object->shadow_count > 1) 159 sysinfo.sharedram += object->resident_page_count; 160 mtx_unlock(&vm_object_list_mtx); 161 162 sysinfo.sharedram *= PAGE_SIZE; 163 sysinfo.bufferram = 0; 164 165 swap_pager_status(&i, &j); 166 sysinfo.totalswap = i * PAGE_SIZE; 167 sysinfo.freeswap = (i - j) * PAGE_SIZE; 168 169 sysinfo.procs = nprocs; 170 171 /* The following are only present in newer Linux kernels. */ 172 sysinfo.totalbig = 0; 173 sysinfo.freebig = 0; 174 sysinfo.mem_unit = 1; 175 176 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 177} 178 179int 180linux_alarm(struct thread *td, struct linux_alarm_args *args) 181{ 182 struct itimerval it, old_it; 183 u_int secs; 184 int error; 185 186#ifdef DEBUG 187 if (ldebug(alarm)) 188 printf(ARGS(alarm, "%u"), args->secs); 189#endif 190 191 secs = args->secs; 192 193 if (secs > INT_MAX) 194 secs = INT_MAX; 195 196 it.it_value.tv_sec = (long) secs; 197 it.it_value.tv_usec = 0; 198 it.it_interval.tv_sec = 0; 199 it.it_interval.tv_usec = 0; 200 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 201 if (error) 202 return (error); 203 if (timevalisset(&old_it.it_value)) { 204 if (old_it.it_value.tv_usec != 0) 205 old_it.it_value.tv_sec++; 206 td->td_retval[0] = old_it.it_value.tv_sec; 207 } 208 return (0); 209} 210 211int 212linux_brk(struct thread *td, struct linux_brk_args *args) 213{ 214 struct vmspace *vm = td->td_proc->p_vmspace; 215 vm_offset_t new, old; 216 struct obreak_args /* { 217 char * nsize; 218 } */ tmp; 219 220#ifdef DEBUG 221 if (ldebug(brk)) 222 printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend); 223#endif 224 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 225 new = (vm_offset_t)args->dsend; 226 tmp.nsize = (char *)new; 227 if (((caddr_t)new > vm->vm_daddr) && !sys_obreak(td, &tmp)) 228 td->td_retval[0] = (long)new; 229 else 230 td->td_retval[0] = (long)old; 231 232 return (0); 233} 234 235#if defined(__i386__) 236/* XXX: what about amd64/linux32? */ 237 238int 239linux_uselib(struct thread *td, struct linux_uselib_args *args) 240{ 241 struct nameidata ni; 242 struct vnode *vp; 243 struct exec *a_out; 244 struct vattr attr; 245 vm_offset_t vmaddr; 246 unsigned long file_offset; 247 unsigned long bss_size; 248 char *library; 249 ssize_t aresid; 250 int error, locked, writecount; 251 252 LCONVPATHEXIST(td, args->library, &library); 253 254#ifdef DEBUG 255 if (ldebug(uselib)) 256 printf(ARGS(uselib, "%s"), library); 257#endif 258 259 a_out = NULL; 260 locked = 0; 261 vp = NULL; 262 263 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 264 UIO_SYSSPACE, library, td); 265 error = namei(&ni); 266 LFREEPATH(library); 267 if (error) 268 goto cleanup; 269 270 vp = ni.ni_vp; 271 NDFREE(&ni, NDF_ONLY_PNBUF); 272 273 /* 274 * From here on down, we have a locked vnode that must be unlocked. 275 * XXX: The code below largely duplicates exec_check_permissions(). 276 */ 277 locked = 1; 278 279 /* Writable? */ 280 error = VOP_GET_WRITECOUNT(vp, &writecount); 281 if (error != 0) 282 goto cleanup; 283 if (writecount != 0) { 284 error = ETXTBSY; 285 goto cleanup; 286 } 287 288 /* Executable? */ 289 error = VOP_GETATTR(vp, &attr, td->td_ucred); 290 if (error) 291 goto cleanup; 292 293 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 294 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 295 /* EACCESS is what exec(2) returns. */ 296 error = ENOEXEC; 297 goto cleanup; 298 } 299 300 /* Sensible size? */ 301 if (attr.va_size == 0) { 302 error = ENOEXEC; 303 goto cleanup; 304 } 305 306 /* Can we access it? */ 307 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 308 if (error) 309 goto cleanup; 310 311 /* 312 * XXX: This should use vn_open() so that it is properly authorized, 313 * and to reduce code redundancy all over the place here. 314 * XXX: Not really, it duplicates far more of exec_check_permissions() 315 * than vn_open(). 316 */ 317#ifdef MAC 318 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 319 if (error) 320 goto cleanup; 321#endif 322 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 323 if (error) 324 goto cleanup; 325 326 /* Pull in executable header into exec_map */ 327 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 328 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 329 if (error) 330 goto cleanup; 331 332 /* Is it a Linux binary ? */ 333 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 334 error = ENOEXEC; 335 goto cleanup; 336 } 337 338 /* 339 * While we are here, we should REALLY do some more checks 340 */ 341 342 /* Set file/virtual offset based on a.out variant. */ 343 switch ((int)(a_out->a_magic & 0xffff)) { 344 case 0413: /* ZMAGIC */ 345 file_offset = 1024; 346 break; 347 case 0314: /* QMAGIC */ 348 file_offset = 0; 349 break; 350 default: 351 error = ENOEXEC; 352 goto cleanup; 353 } 354 355 bss_size = round_page(a_out->a_bss); 356 357 /* Check various fields in header for validity/bounds. */ 358 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 359 error = ENOEXEC; 360 goto cleanup; 361 } 362 363 /* text + data can't exceed file size */ 364 if (a_out->a_data + a_out->a_text > attr.va_size) { 365 error = EFAULT; 366 goto cleanup; 367 } 368 369 /* 370 * text/data/bss must not exceed limits 371 * XXX - this is not complete. it should check current usage PLUS 372 * the resources needed by this library. 373 */ 374 PROC_LOCK(td->td_proc); 375 if (a_out->a_text > maxtsiz || 376 a_out->a_data + bss_size > lim_cur(td->td_proc, RLIMIT_DATA) || 377 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 378 bss_size) != 0) { 379 PROC_UNLOCK(td->td_proc); 380 error = ENOMEM; 381 goto cleanup; 382 } 383 PROC_UNLOCK(td->td_proc); 384 385 /* 386 * Prevent more writers. 387 * XXX: Note that if any of the VM operations fail below we don't 388 * clear this flag. 389 */ 390 VOP_SET_TEXT(vp); 391 392 /* 393 * Lock no longer needed 394 */ 395 locked = 0; 396 VOP_UNLOCK(vp, 0); 397 398 /* 399 * Check if file_offset page aligned. Currently we cannot handle 400 * misalinged file offsets, and so we read in the entire image 401 * (what a waste). 402 */ 403 if (file_offset & PAGE_MASK) { 404#ifdef DEBUG 405 printf("uselib: Non page aligned binary %lu\n", file_offset); 406#endif 407 /* Map text+data read/write/execute */ 408 409 /* a_entry is the load address and is page aligned */ 410 vmaddr = trunc_page(a_out->a_entry); 411 412 /* get anon user mapping, read+write+execute */ 413 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 414 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 415 VM_PROT_ALL, VM_PROT_ALL, 0); 416 if (error) 417 goto cleanup; 418 419 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 420 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 421 td->td_ucred, NOCRED, &aresid, td); 422 if (error != 0) 423 goto cleanup; 424 if (aresid != 0) { 425 error = ENOEXEC; 426 goto cleanup; 427 } 428 } else { 429#ifdef DEBUG 430 printf("uselib: Page aligned binary %lu\n", file_offset); 431#endif 432 /* 433 * for QMAGIC, a_entry is 20 bytes beyond the load address 434 * to skip the executable header 435 */ 436 vmaddr = trunc_page(a_out->a_entry); 437 438 /* 439 * Map it all into the process's space as a single 440 * copy-on-write "data" segment. 441 */ 442 error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr, 443 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 444 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 445 if (error) 446 goto cleanup; 447 } 448#ifdef DEBUG 449 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0], 450 ((long *)vmaddr)[1]); 451#endif 452 if (bss_size != 0) { 453 /* Calculate BSS start address */ 454 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 455 a_out->a_data; 456 457 /* allocate some 'anon' space */ 458 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 459 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 460 VM_PROT_ALL, 0); 461 if (error) 462 goto cleanup; 463 } 464 465cleanup: 466 /* Unlock vnode if needed */ 467 if (locked) 468 VOP_UNLOCK(vp, 0); 469 470 /* Release the temporary mapping. */ 471 if (a_out) 472 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 473 474 return (error); 475} 476 477#endif /* __i386__ */ 478 479int 480linux_select(struct thread *td, struct linux_select_args *args) 481{ 482 l_timeval ltv; 483 struct timeval tv0, tv1, utv, *tvp; 484 int error; 485 486#ifdef DEBUG 487 if (ldebug(select)) 488 printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, 489 (void *)args->readfds, (void *)args->writefds, 490 (void *)args->exceptfds, (void *)args->timeout); 491#endif 492 493 /* 494 * Store current time for computation of the amount of 495 * time left. 496 */ 497 if (args->timeout) { 498 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 499 goto select_out; 500 utv.tv_sec = ltv.tv_sec; 501 utv.tv_usec = ltv.tv_usec; 502#ifdef DEBUG 503 if (ldebug(select)) 504 printf(LMSG("incoming timeout (%jd/%ld)"), 505 (intmax_t)utv.tv_sec, utv.tv_usec); 506#endif 507 508 if (itimerfix(&utv)) { 509 /* 510 * The timeval was invalid. Convert it to something 511 * valid that will act as it does under Linux. 512 */ 513 utv.tv_sec += utv.tv_usec / 1000000; 514 utv.tv_usec %= 1000000; 515 if (utv.tv_usec < 0) { 516 utv.tv_sec -= 1; 517 utv.tv_usec += 1000000; 518 } 519 if (utv.tv_sec < 0) 520 timevalclear(&utv); 521 } 522 microtime(&tv0); 523 tvp = &utv; 524 } else 525 tvp = NULL; 526 527 error = kern_select(td, args->nfds, args->readfds, args->writefds, 528 args->exceptfds, tvp, sizeof(l_int) * 8); 529 530#ifdef DEBUG 531 if (ldebug(select)) 532 printf(LMSG("real select returns %d"), error); 533#endif 534 if (error) 535 goto select_out; 536 537 if (args->timeout) { 538 if (td->td_retval[0]) { 539 /* 540 * Compute how much time was left of the timeout, 541 * by subtracting the current time and the time 542 * before we started the call, and subtracting 543 * that result from the user-supplied value. 544 */ 545 microtime(&tv1); 546 timevalsub(&tv1, &tv0); 547 timevalsub(&utv, &tv1); 548 if (utv.tv_sec < 0) 549 timevalclear(&utv); 550 } else 551 timevalclear(&utv); 552#ifdef DEBUG 553 if (ldebug(select)) 554 printf(LMSG("outgoing timeout (%jd/%ld)"), 555 (intmax_t)utv.tv_sec, utv.tv_usec); 556#endif 557 ltv.tv_sec = utv.tv_sec; 558 ltv.tv_usec = utv.tv_usec; 559 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 560 goto select_out; 561 } 562 563select_out: 564#ifdef DEBUG 565 if (ldebug(select)) 566 printf(LMSG("select_out -> %d"), error); 567#endif 568 return (error); 569} 570 571int 572linux_mremap(struct thread *td, struct linux_mremap_args *args) 573{ 574 struct munmap_args /* { 575 void *addr; 576 size_t len; 577 } */ bsd_args; 578 int error = 0; 579 580#ifdef DEBUG 581 if (ldebug(mremap)) 582 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), 583 (void *)(uintptr_t)args->addr, 584 (unsigned long)args->old_len, 585 (unsigned long)args->new_len, 586 (unsigned long)args->flags); 587#endif 588 589 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 590 td->td_retval[0] = 0; 591 return (EINVAL); 592 } 593 594 /* 595 * Check for the page alignment. 596 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 597 */ 598 if (args->addr & PAGE_MASK) { 599 td->td_retval[0] = 0; 600 return (EINVAL); 601 } 602 603 args->new_len = round_page(args->new_len); 604 args->old_len = round_page(args->old_len); 605 606 if (args->new_len > args->old_len) { 607 td->td_retval[0] = 0; 608 return (ENOMEM); 609 } 610 611 if (args->new_len < args->old_len) { 612 bsd_args.addr = 613 (caddr_t)((uintptr_t)args->addr + args->new_len); 614 bsd_args.len = args->old_len - args->new_len; 615 error = sys_munmap(td, &bsd_args); 616 } 617 618 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 619 return (error); 620} 621 622#define LINUX_MS_ASYNC 0x0001 623#define LINUX_MS_INVALIDATE 0x0002 624#define LINUX_MS_SYNC 0x0004 625 626int 627linux_msync(struct thread *td, struct linux_msync_args *args) 628{ 629 struct msync_args bsd_args; 630 631 bsd_args.addr = (caddr_t)(uintptr_t)args->addr; 632 bsd_args.len = (uintptr_t)args->len; 633 bsd_args.flags = args->fl & ~LINUX_MS_SYNC; 634 635 return (sys_msync(td, &bsd_args)); 636} 637 638int 639linux_time(struct thread *td, struct linux_time_args *args) 640{ 641 struct timeval tv; 642 l_time_t tm; 643 int error; 644 645#ifdef DEBUG 646 if (ldebug(time)) 647 printf(ARGS(time, "*")); 648#endif 649 650 microtime(&tv); 651 tm = tv.tv_sec; 652 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 653 return (error); 654 td->td_retval[0] = tm; 655 return (0); 656} 657 658struct l_times_argv { 659 l_clock_t tms_utime; 660 l_clock_t tms_stime; 661 l_clock_t tms_cutime; 662 l_clock_t tms_cstime; 663}; 664 665 666/* 667 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 668 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 669 * auxiliary vector entry. 670 */ 671#define CLK_TCK 100 672 673#define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 674#define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 675 676#define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 677 CONVNTCK(r) : CONVOTCK(r)) 678 679int 680linux_times(struct thread *td, struct linux_times_args *args) 681{ 682 struct timeval tv, utime, stime, cutime, cstime; 683 struct l_times_argv tms; 684 struct proc *p; 685 int error; 686 687#ifdef DEBUG 688 if (ldebug(times)) 689 printf(ARGS(times, "*")); 690#endif 691 692 if (args->buf != NULL) { 693 p = td->td_proc; 694 PROC_LOCK(p); 695 PROC_SLOCK(p); 696 calcru(p, &utime, &stime); 697 PROC_SUNLOCK(p); 698 calccru(p, &cutime, &cstime); 699 PROC_UNLOCK(p); 700 701 tms.tms_utime = CONVTCK(utime); 702 tms.tms_stime = CONVTCK(stime); 703 704 tms.tms_cutime = CONVTCK(cutime); 705 tms.tms_cstime = CONVTCK(cstime); 706 707 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 708 return (error); 709 } 710 711 microuptime(&tv); 712 td->td_retval[0] = (int)CONVTCK(tv); 713 return (0); 714} 715 716int 717linux_newuname(struct thread *td, struct linux_newuname_args *args) 718{ 719 struct l_new_utsname utsname; 720 char osname[LINUX_MAX_UTSNAME]; 721 char osrelease[LINUX_MAX_UTSNAME]; 722 char *p; 723 724#ifdef DEBUG 725 if (ldebug(newuname)) 726 printf(ARGS(newuname, "*")); 727#endif 728 729 linux_get_osname(td, osname); 730 linux_get_osrelease(td, osrelease); 731 732 bzero(&utsname, sizeof(utsname)); 733 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 734 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 735 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 736 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 737 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 738 for (p = utsname.version; *p != '\0'; ++p) 739 if (*p == '\n') { 740 *p = '\0'; 741 break; 742 } 743 strlcpy(utsname.machine, linux_platform, LINUX_MAX_UTSNAME); 744 745 return (copyout(&utsname, args->buf, sizeof(utsname))); 746} 747 748#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 749struct l_utimbuf { 750 l_time_t l_actime; 751 l_time_t l_modtime; 752}; 753 754int 755linux_utime(struct thread *td, struct linux_utime_args *args) 756{ 757 struct timeval tv[2], *tvp; 758 struct l_utimbuf lut; 759 char *fname; 760 int error; 761 762 LCONVPATHEXIST(td, args->fname, &fname); 763 764#ifdef DEBUG 765 if (ldebug(utime)) 766 printf(ARGS(utime, "%s, *"), fname); 767#endif 768 769 if (args->times) { 770 if ((error = copyin(args->times, &lut, sizeof lut))) { 771 LFREEPATH(fname); 772 return (error); 773 } 774 tv[0].tv_sec = lut.l_actime; 775 tv[0].tv_usec = 0; 776 tv[1].tv_sec = lut.l_modtime; 777 tv[1].tv_usec = 0; 778 tvp = tv; 779 } else 780 tvp = NULL; 781 782 error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 783 LFREEPATH(fname); 784 return (error); 785} 786 787int 788linux_utimes(struct thread *td, struct linux_utimes_args *args) 789{ 790 l_timeval ltv[2]; 791 struct timeval tv[2], *tvp = NULL; 792 char *fname; 793 int error; 794 795 LCONVPATHEXIST(td, args->fname, &fname); 796 797#ifdef DEBUG 798 if (ldebug(utimes)) 799 printf(ARGS(utimes, "%s, *"), fname); 800#endif 801 802 if (args->tptr != NULL) { 803 if ((error = copyin(args->tptr, ltv, sizeof ltv))) { 804 LFREEPATH(fname); 805 return (error); 806 } 807 tv[0].tv_sec = ltv[0].tv_sec; 808 tv[0].tv_usec = ltv[0].tv_usec; 809 tv[1].tv_sec = ltv[1].tv_sec; 810 tv[1].tv_usec = ltv[1].tv_usec; 811 tvp = tv; 812 } 813 814 error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 815 LFREEPATH(fname); 816 return (error); 817} 818 819int 820linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 821{ 822 l_timeval ltv[2]; 823 struct timeval tv[2], *tvp = NULL; 824 char *fname; 825 int error, dfd; 826 827 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 828 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 829 830#ifdef DEBUG 831 if (ldebug(futimesat)) 832 printf(ARGS(futimesat, "%s, *"), fname); 833#endif 834 835 if (args->utimes != NULL) { 836 if ((error = copyin(args->utimes, ltv, sizeof ltv))) { 837 LFREEPATH(fname); 838 return (error); 839 } 840 tv[0].tv_sec = ltv[0].tv_sec; 841 tv[0].tv_usec = ltv[0].tv_usec; 842 tv[1].tv_sec = ltv[1].tv_sec; 843 tv[1].tv_usec = ltv[1].tv_usec; 844 tvp = tv; 845 } 846 847 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 848 LFREEPATH(fname); 849 return (error); 850} 851#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 852 853int 854linux_common_wait(struct thread *td, int pid, int *status, 855 int options, struct rusage *ru) 856{ 857 int error, tmpstat; 858 859 error = kern_wait(td, pid, &tmpstat, options, ru); 860 if (error) 861 return (error); 862 863 if (status) { 864 tmpstat &= 0xffff; 865 if (WIFSIGNALED(tmpstat)) 866 tmpstat = (tmpstat & 0xffffff80) | 867 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); 868 else if (WIFSTOPPED(tmpstat)) 869 tmpstat = (tmpstat & 0xffff00ff) | 870 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); 871 error = copyout(&tmpstat, status, sizeof(int)); 872 } 873 874 return (error); 875} 876 877int 878linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 879{ 880 int options; 881 882#ifdef DEBUG 883 if (ldebug(waitpid)) 884 printf(ARGS(waitpid, "%d, %p, %d"), 885 args->pid, (void *)args->status, args->options); 886#endif 887 /* 888 * this is necessary because the test in kern_wait doesn't work 889 * because we mess with the options here 890 */ 891 if (args->options & ~(WUNTRACED | WNOHANG | WCONTINUED | __WCLONE)) 892 return (EINVAL); 893 894 options = (args->options & (WNOHANG | WUNTRACED)); 895 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 896 if (args->options & __WCLONE) 897 options |= WLINUXCLONE; 898 899 return (linux_common_wait(td, args->pid, args->status, options, NULL)); 900} 901 902 903int 904linux_mknod(struct thread *td, struct linux_mknod_args *args) 905{ 906 char *path; 907 int error; 908 909 LCONVPATHCREAT(td, args->path, &path); 910 911#ifdef DEBUG 912 if (ldebug(mknod)) 913 printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev); 914#endif 915 916 switch (args->mode & S_IFMT) { 917 case S_IFIFO: 918 case S_IFSOCK: 919 error = kern_mkfifo(td, path, UIO_SYSSPACE, args->mode); 920 break; 921 922 case S_IFCHR: 923 case S_IFBLK: 924 error = kern_mknod(td, path, UIO_SYSSPACE, args->mode, 925 args->dev); 926 break; 927 928 case S_IFDIR: 929 error = EPERM; 930 break; 931 932 case 0: 933 args->mode |= S_IFREG; 934 /* FALLTHROUGH */ 935 case S_IFREG: 936 error = kern_open(td, path, UIO_SYSSPACE, 937 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 938 if (error == 0) 939 kern_close(td, td->td_retval[0]); 940 break; 941 942 default: 943 error = EINVAL; 944 break; 945 } 946 LFREEPATH(path); 947 return (error); 948} 949 950int 951linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 952{ 953 char *path; 954 int error, dfd; 955 956 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 957 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 958 959#ifdef DEBUG 960 if (ldebug(mknodat)) 961 printf(ARGS(mknodat, "%s, %d, %d"), path, args->mode, args->dev); 962#endif 963 964 switch (args->mode & S_IFMT) { 965 case S_IFIFO: 966 case S_IFSOCK: 967 error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); 968 break; 969 970 case S_IFCHR: 971 case S_IFBLK: 972 error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, 973 args->dev); 974 break; 975 976 case S_IFDIR: 977 error = EPERM; 978 break; 979 980 case 0: 981 args->mode |= S_IFREG; 982 /* FALLTHROUGH */ 983 case S_IFREG: 984 error = kern_openat(td, dfd, path, UIO_SYSSPACE, 985 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 986 if (error == 0) 987 kern_close(td, td->td_retval[0]); 988 break; 989 990 default: 991 error = EINVAL; 992 break; 993 } 994 LFREEPATH(path); 995 return (error); 996} 997 998/* 999 * UGH! This is just about the dumbest idea I've ever heard!! 1000 */ 1001int 1002linux_personality(struct thread *td, struct linux_personality_args *args) 1003{ 1004#ifdef DEBUG 1005 if (ldebug(personality)) 1006 printf(ARGS(personality, "%lu"), (unsigned long)args->per); 1007#endif 1008 if (args->per != 0) 1009 return (EINVAL); 1010 1011 /* Yes Jim, it's still a Linux... */ 1012 td->td_retval[0] = 0; 1013 return (0); 1014} 1015 1016struct l_itimerval { 1017 l_timeval it_interval; 1018 l_timeval it_value; 1019}; 1020 1021#define B2L_ITIMERVAL(bip, lip) \ 1022 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1023 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1024 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1025 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1026 1027int 1028linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1029{ 1030 int error; 1031 struct l_itimerval ls; 1032 struct itimerval aitv, oitv; 1033 1034#ifdef DEBUG 1035 if (ldebug(setitimer)) 1036 printf(ARGS(setitimer, "%p, %p"), 1037 (void *)uap->itv, (void *)uap->oitv); 1038#endif 1039 1040 if (uap->itv == NULL) { 1041 uap->itv = uap->oitv; 1042 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1043 } 1044 1045 error = copyin(uap->itv, &ls, sizeof(ls)); 1046 if (error != 0) 1047 return (error); 1048 B2L_ITIMERVAL(&aitv, &ls); 1049#ifdef DEBUG 1050 if (ldebug(setitimer)) { 1051 printf("setitimer: value: sec: %jd, usec: %ld\n", 1052 (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec); 1053 printf("setitimer: interval: sec: %jd, usec: %ld\n", 1054 (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec); 1055 } 1056#endif 1057 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1058 if (error != 0 || uap->oitv == NULL) 1059 return (error); 1060 B2L_ITIMERVAL(&ls, &oitv); 1061 1062 return (copyout(&ls, uap->oitv, sizeof(ls))); 1063} 1064 1065int 1066linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1067{ 1068 int error; 1069 struct l_itimerval ls; 1070 struct itimerval aitv; 1071 1072#ifdef DEBUG 1073 if (ldebug(getitimer)) 1074 printf(ARGS(getitimer, "%p"), (void *)uap->itv); 1075#endif 1076 error = kern_getitimer(td, uap->which, &aitv); 1077 if (error != 0) 1078 return (error); 1079 B2L_ITIMERVAL(&ls, &aitv); 1080 return (copyout(&ls, uap->itv, sizeof(ls))); 1081} 1082 1083int 1084linux_nice(struct thread *td, struct linux_nice_args *args) 1085{ 1086 struct setpriority_args bsd_args; 1087 1088 bsd_args.which = PRIO_PROCESS; 1089 bsd_args.who = 0; /* current process */ 1090 bsd_args.prio = args->inc; 1091 return (sys_setpriority(td, &bsd_args)); 1092} 1093 1094int 1095linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1096{ 1097 struct ucred *newcred, *oldcred; 1098 l_gid_t *linux_gidset; 1099 gid_t *bsd_gidset; 1100 int ngrp, error; 1101 struct proc *p; 1102 1103 ngrp = args->gidsetsize; 1104 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1105 return (EINVAL); 1106 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_TEMP, M_WAITOK); 1107 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1108 if (error) 1109 goto out; 1110 newcred = crget(); 1111 crextend(newcred, ngrp + 1); 1112 p = td->td_proc; 1113 PROC_LOCK(p); 1114 oldcred = p->p_ucred; 1115 crcopy(newcred, oldcred); 1116 1117 /* 1118 * cr_groups[0] holds egid. Setting the whole set from 1119 * the supplied set will cause egid to be changed too. 1120 * Keep cr_groups[0] unchanged to prevent that. 1121 */ 1122 1123 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) { 1124 PROC_UNLOCK(p); 1125 crfree(newcred); 1126 goto out; 1127 } 1128 1129 if (ngrp > 0) { 1130 newcred->cr_ngroups = ngrp + 1; 1131 1132 bsd_gidset = newcred->cr_groups; 1133 ngrp--; 1134 while (ngrp >= 0) { 1135 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1136 ngrp--; 1137 } 1138 } else 1139 newcred->cr_ngroups = 1; 1140 1141 setsugid(p); 1142 p->p_ucred = newcred; 1143 PROC_UNLOCK(p); 1144 crfree(oldcred); 1145 error = 0; 1146out: 1147 free(linux_gidset, M_TEMP); 1148 return (error); 1149} 1150 1151int 1152linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1153{ 1154 struct ucred *cred; 1155 l_gid_t *linux_gidset; 1156 gid_t *bsd_gidset; 1157 int bsd_gidsetsz, ngrp, error; 1158 1159 cred = td->td_ucred; 1160 bsd_gidset = cred->cr_groups; 1161 bsd_gidsetsz = cred->cr_ngroups - 1; 1162 1163 /* 1164 * cr_groups[0] holds egid. Returning the whole set 1165 * here will cause a duplicate. Exclude cr_groups[0] 1166 * to prevent that. 1167 */ 1168 1169 if ((ngrp = args->gidsetsize) == 0) { 1170 td->td_retval[0] = bsd_gidsetsz; 1171 return (0); 1172 } 1173 1174 if (ngrp < bsd_gidsetsz) 1175 return (EINVAL); 1176 1177 ngrp = 0; 1178 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1179 M_TEMP, M_WAITOK); 1180 while (ngrp < bsd_gidsetsz) { 1181 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1182 ngrp++; 1183 } 1184 1185 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1186 free(linux_gidset, M_TEMP); 1187 if (error) 1188 return (error); 1189 1190 td->td_retval[0] = ngrp; 1191 return (0); 1192} 1193 1194int 1195linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1196{ 1197 struct rlimit bsd_rlim; 1198 struct l_rlimit rlim; 1199 u_int which; 1200 int error; 1201 1202#ifdef DEBUG 1203 if (ldebug(setrlimit)) 1204 printf(ARGS(setrlimit, "%d, %p"), 1205 args->resource, (void *)args->rlim); 1206#endif 1207 1208 if (args->resource >= LINUX_RLIM_NLIMITS) 1209 return (EINVAL); 1210 1211 which = linux_to_bsd_resource[args->resource]; 1212 if (which == -1) 1213 return (EINVAL); 1214 1215 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1216 if (error) 1217 return (error); 1218 1219 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1220 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1221 return (kern_setrlimit(td, which, &bsd_rlim)); 1222} 1223 1224int 1225linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1226{ 1227 struct l_rlimit rlim; 1228 struct proc *p = td->td_proc; 1229 struct rlimit bsd_rlim; 1230 u_int which; 1231 1232#ifdef DEBUG 1233 if (ldebug(old_getrlimit)) 1234 printf(ARGS(old_getrlimit, "%d, %p"), 1235 args->resource, (void *)args->rlim); 1236#endif 1237 1238 if (args->resource >= LINUX_RLIM_NLIMITS) 1239 return (EINVAL); 1240 1241 which = linux_to_bsd_resource[args->resource]; 1242 if (which == -1) 1243 return (EINVAL); 1244 1245 PROC_LOCK(p); 1246 lim_rlimit(p, which, &bsd_rlim); 1247 PROC_UNLOCK(p); 1248 1249#ifdef COMPAT_LINUX32 1250 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1251 if (rlim.rlim_cur == UINT_MAX) 1252 rlim.rlim_cur = INT_MAX; 1253 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1254 if (rlim.rlim_max == UINT_MAX) 1255 rlim.rlim_max = INT_MAX; 1256#else 1257 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1258 if (rlim.rlim_cur == ULONG_MAX) 1259 rlim.rlim_cur = LONG_MAX; 1260 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1261 if (rlim.rlim_max == ULONG_MAX) 1262 rlim.rlim_max = LONG_MAX; 1263#endif 1264 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1265} 1266 1267int 1268linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1269{ 1270 struct l_rlimit rlim; 1271 struct proc *p = td->td_proc; 1272 struct rlimit bsd_rlim; 1273 u_int which; 1274 1275#ifdef DEBUG 1276 if (ldebug(getrlimit)) 1277 printf(ARGS(getrlimit, "%d, %p"), 1278 args->resource, (void *)args->rlim); 1279#endif 1280 1281 if (args->resource >= LINUX_RLIM_NLIMITS) 1282 return (EINVAL); 1283 1284 which = linux_to_bsd_resource[args->resource]; 1285 if (which == -1) 1286 return (EINVAL); 1287 1288 PROC_LOCK(p); 1289 lim_rlimit(p, which, &bsd_rlim); 1290 PROC_UNLOCK(p); 1291 1292 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1293 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1294 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1295} 1296 1297int 1298linux_sched_setscheduler(struct thread *td, 1299 struct linux_sched_setscheduler_args *args) 1300{ 1301 struct sched_setscheduler_args bsd; 1302 1303#ifdef DEBUG 1304 if (ldebug(sched_setscheduler)) 1305 printf(ARGS(sched_setscheduler, "%d, %d, %p"), 1306 args->pid, args->policy, (const void *)args->param); 1307#endif 1308 1309 switch (args->policy) { 1310 case LINUX_SCHED_OTHER: 1311 bsd.policy = SCHED_OTHER; 1312 break; 1313 case LINUX_SCHED_FIFO: 1314 bsd.policy = SCHED_FIFO; 1315 break; 1316 case LINUX_SCHED_RR: 1317 bsd.policy = SCHED_RR; 1318 break; 1319 default: 1320 return (EINVAL); 1321 } 1322 1323 bsd.pid = args->pid; 1324 bsd.param = (struct sched_param *)args->param; 1325 return (sys_sched_setscheduler(td, &bsd)); 1326} 1327 1328int 1329linux_sched_getscheduler(struct thread *td, 1330 struct linux_sched_getscheduler_args *args) 1331{ 1332 struct sched_getscheduler_args bsd; 1333 int error; 1334 1335#ifdef DEBUG 1336 if (ldebug(sched_getscheduler)) 1337 printf(ARGS(sched_getscheduler, "%d"), args->pid); 1338#endif 1339 1340 bsd.pid = args->pid; 1341 error = sys_sched_getscheduler(td, &bsd); 1342 1343 switch (td->td_retval[0]) { 1344 case SCHED_OTHER: 1345 td->td_retval[0] = LINUX_SCHED_OTHER; 1346 break; 1347 case SCHED_FIFO: 1348 td->td_retval[0] = LINUX_SCHED_FIFO; 1349 break; 1350 case SCHED_RR: 1351 td->td_retval[0] = LINUX_SCHED_RR; 1352 break; 1353 } 1354 1355 return (error); 1356} 1357 1358int 1359linux_sched_get_priority_max(struct thread *td, 1360 struct linux_sched_get_priority_max_args *args) 1361{ 1362 struct sched_get_priority_max_args bsd; 1363 1364#ifdef DEBUG 1365 if (ldebug(sched_get_priority_max)) 1366 printf(ARGS(sched_get_priority_max, "%d"), args->policy); 1367#endif 1368 1369 switch (args->policy) { 1370 case LINUX_SCHED_OTHER: 1371 bsd.policy = SCHED_OTHER; 1372 break; 1373 case LINUX_SCHED_FIFO: 1374 bsd.policy = SCHED_FIFO; 1375 break; 1376 case LINUX_SCHED_RR: 1377 bsd.policy = SCHED_RR; 1378 break; 1379 default: 1380 return (EINVAL); 1381 } 1382 return (sys_sched_get_priority_max(td, &bsd)); 1383} 1384 1385int 1386linux_sched_get_priority_min(struct thread *td, 1387 struct linux_sched_get_priority_min_args *args) 1388{ 1389 struct sched_get_priority_min_args bsd; 1390 1391#ifdef DEBUG 1392 if (ldebug(sched_get_priority_min)) 1393 printf(ARGS(sched_get_priority_min, "%d"), args->policy); 1394#endif 1395 1396 switch (args->policy) { 1397 case LINUX_SCHED_OTHER: 1398 bsd.policy = SCHED_OTHER; 1399 break; 1400 case LINUX_SCHED_FIFO: 1401 bsd.policy = SCHED_FIFO; 1402 break; 1403 case LINUX_SCHED_RR: 1404 bsd.policy = SCHED_RR; 1405 break; 1406 default: 1407 return (EINVAL); 1408 } 1409 return (sys_sched_get_priority_min(td, &bsd)); 1410} 1411 1412#define REBOOT_CAD_ON 0x89abcdef 1413#define REBOOT_CAD_OFF 0 1414#define REBOOT_HALT 0xcdef0123 1415#define REBOOT_RESTART 0x01234567 1416#define REBOOT_RESTART2 0xA1B2C3D4 1417#define REBOOT_POWEROFF 0x4321FEDC 1418#define REBOOT_MAGIC1 0xfee1dead 1419#define REBOOT_MAGIC2 0x28121969 1420#define REBOOT_MAGIC2A 0x05121996 1421#define REBOOT_MAGIC2B 0x16041998 1422 1423int 1424linux_reboot(struct thread *td, struct linux_reboot_args *args) 1425{ 1426 struct reboot_args bsd_args; 1427 1428#ifdef DEBUG 1429 if (ldebug(reboot)) 1430 printf(ARGS(reboot, "0x%x"), args->cmd); 1431#endif 1432 1433 if (args->magic1 != REBOOT_MAGIC1) 1434 return (EINVAL); 1435 1436 switch (args->magic2) { 1437 case REBOOT_MAGIC2: 1438 case REBOOT_MAGIC2A: 1439 case REBOOT_MAGIC2B: 1440 break; 1441 default: 1442 return (EINVAL); 1443 } 1444 1445 switch (args->cmd) { 1446 case REBOOT_CAD_ON: 1447 case REBOOT_CAD_OFF: 1448 return (priv_check(td, PRIV_REBOOT)); 1449 case REBOOT_HALT: 1450 bsd_args.opt = RB_HALT; 1451 break; 1452 case REBOOT_RESTART: 1453 case REBOOT_RESTART2: 1454 bsd_args.opt = 0; 1455 break; 1456 case REBOOT_POWEROFF: 1457 bsd_args.opt = RB_POWEROFF; 1458 break; 1459 default: 1460 return (EINVAL); 1461 } 1462 return (sys_reboot(td, &bsd_args)); 1463} 1464 1465 1466/* 1467 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify 1468 * td->td_retval[1] when COMPAT_43 is defined. This clobbers registers that 1469 * are assumed to be preserved. The following lightweight syscalls fixes 1470 * this. See also linux_getgid16() and linux_getuid16() in linux_uid16.c 1471 * 1472 * linux_getpid() - MP SAFE 1473 * linux_getgid() - MP SAFE 1474 * linux_getuid() - MP SAFE 1475 */ 1476 1477int 1478linux_getpid(struct thread *td, struct linux_getpid_args *args) 1479{ 1480 struct linux_emuldata *em; 1481 1482#ifdef DEBUG 1483 if (ldebug(getpid)) 1484 printf(ARGS(getpid, "")); 1485#endif 1486 1487 if (linux_use26(td)) { 1488 em = em_find(td->td_proc, EMUL_DONTLOCK); 1489 KASSERT(em != NULL, ("getpid: emuldata not found.\n")); 1490 td->td_retval[0] = em->shared->group_pid; 1491 } else { 1492 td->td_retval[0] = td->td_proc->p_pid; 1493 } 1494 1495 return (0); 1496} 1497 1498int 1499linux_gettid(struct thread *td, struct linux_gettid_args *args) 1500{ 1501 1502#ifdef DEBUG 1503 if (ldebug(gettid)) 1504 printf(ARGS(gettid, "")); 1505#endif 1506 1507 td->td_retval[0] = td->td_proc->p_pid; 1508 return (0); 1509} 1510 1511 1512int 1513linux_getppid(struct thread *td, struct linux_getppid_args *args) 1514{ 1515 struct linux_emuldata *em; 1516 struct proc *p, *pp; 1517 1518#ifdef DEBUG 1519 if (ldebug(getppid)) 1520 printf(ARGS(getppid, "")); 1521#endif 1522 1523 if (!linux_use26(td)) { 1524 PROC_LOCK(td->td_proc); 1525 td->td_retval[0] = td->td_proc->p_pptr->p_pid; 1526 PROC_UNLOCK(td->td_proc); 1527 return (0); 1528 } 1529 1530 em = em_find(td->td_proc, EMUL_DONTLOCK); 1531 1532 KASSERT(em != NULL, ("getppid: process emuldata not found.\n")); 1533 1534 /* find the group leader */ 1535 p = pfind(em->shared->group_pid); 1536 1537 if (p == NULL) { 1538#ifdef DEBUG 1539 printf(LMSG("parent process not found.\n")); 1540#endif 1541 return (0); 1542 } 1543 1544 pp = p->p_pptr; /* switch to parent */ 1545 PROC_LOCK(pp); 1546 PROC_UNLOCK(p); 1547 1548 /* if its also linux process */ 1549 if (pp->p_sysent == &elf_linux_sysvec) { 1550 em = em_find(pp, EMUL_DONTLOCK); 1551 KASSERT(em != NULL, ("getppid: parent emuldata not found.\n")); 1552 1553 td->td_retval[0] = em->shared->group_pid; 1554 } else 1555 td->td_retval[0] = pp->p_pid; 1556 1557 PROC_UNLOCK(pp); 1558 1559 return (0); 1560} 1561 1562int 1563linux_getgid(struct thread *td, struct linux_getgid_args *args) 1564{ 1565 1566#ifdef DEBUG 1567 if (ldebug(getgid)) 1568 printf(ARGS(getgid, "")); 1569#endif 1570 1571 td->td_retval[0] = td->td_ucred->cr_rgid; 1572 return (0); 1573} 1574 1575int 1576linux_getuid(struct thread *td, struct linux_getuid_args *args) 1577{ 1578 1579#ifdef DEBUG 1580 if (ldebug(getuid)) 1581 printf(ARGS(getuid, "")); 1582#endif 1583 1584 td->td_retval[0] = td->td_ucred->cr_ruid; 1585 return (0); 1586} 1587 1588 1589int 1590linux_getsid(struct thread *td, struct linux_getsid_args *args) 1591{ 1592 struct getsid_args bsd; 1593 1594#ifdef DEBUG 1595 if (ldebug(getsid)) 1596 printf(ARGS(getsid, "%i"), args->pid); 1597#endif 1598 1599 bsd.pid = args->pid; 1600 return (sys_getsid(td, &bsd)); 1601} 1602 1603int 1604linux_nosys(struct thread *td, struct nosys_args *ignore) 1605{ 1606 1607 return (ENOSYS); 1608} 1609 1610int 1611linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1612{ 1613 struct getpriority_args bsd_args; 1614 int error; 1615 1616#ifdef DEBUG 1617 if (ldebug(getpriority)) 1618 printf(ARGS(getpriority, "%i, %i"), args->which, args->who); 1619#endif 1620 1621 bsd_args.which = args->which; 1622 bsd_args.who = args->who; 1623 error = sys_getpriority(td, &bsd_args); 1624 td->td_retval[0] = 20 - td->td_retval[0]; 1625 return (error); 1626} 1627 1628int 1629linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1630{ 1631 int name[2]; 1632 1633#ifdef DEBUG 1634 if (ldebug(sethostname)) 1635 printf(ARGS(sethostname, "*, %i"), args->len); 1636#endif 1637 1638 name[0] = CTL_KERN; 1639 name[1] = KERN_HOSTNAME; 1640 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1641 args->len, 0, 0)); 1642} 1643 1644int 1645linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1646{ 1647 int name[2]; 1648 1649#ifdef DEBUG 1650 if (ldebug(setdomainname)) 1651 printf(ARGS(setdomainname, "*, %i"), args->len); 1652#endif 1653 1654 name[0] = CTL_KERN; 1655 name[1] = KERN_NISDOMAINNAME; 1656 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1657 args->len, 0, 0)); 1658} 1659 1660int 1661linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1662{ 1663 struct linux_emuldata *em; 1664 1665#ifdef DEBUG 1666 if (ldebug(exit_group)) 1667 printf(ARGS(exit_group, "%i"), args->error_code); 1668#endif 1669 1670 em = em_find(td->td_proc, EMUL_DONTLOCK); 1671 if (em->shared->refs > 1) { 1672 EMUL_SHARED_WLOCK(&emul_shared_lock); 1673 em->shared->flags |= EMUL_SHARED_HASXSTAT; 1674 em->shared->xstat = W_EXITCODE(args->error_code, 0); 1675 EMUL_SHARED_WUNLOCK(&emul_shared_lock); 1676 if (linux_use26(td)) 1677 linux_kill_threads(td, SIGKILL); 1678 } 1679 1680 /* 1681 * XXX: we should send a signal to the parent if 1682 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1683 * as it doesnt occur often. 1684 */ 1685 exit1(td, W_EXITCODE(args->error_code, 0)); 1686 1687 return (0); 1688} 1689 1690#define _LINUX_CAPABILITY_VERSION 0x19980330 1691 1692struct l_user_cap_header { 1693 l_int version; 1694 l_int pid; 1695}; 1696 1697struct l_user_cap_data { 1698 l_int effective; 1699 l_int permitted; 1700 l_int inheritable; 1701}; 1702 1703int 1704linux_capget(struct thread *td, struct linux_capget_args *args) 1705{ 1706 struct l_user_cap_header luch; 1707 struct l_user_cap_data lucd; 1708 int error; 1709 1710 if (args->hdrp == NULL) 1711 return (EFAULT); 1712 1713 error = copyin(args->hdrp, &luch, sizeof(luch)); 1714 if (error != 0) 1715 return (error); 1716 1717 if (luch.version != _LINUX_CAPABILITY_VERSION) { 1718 luch.version = _LINUX_CAPABILITY_VERSION; 1719 error = copyout(&luch, args->hdrp, sizeof(luch)); 1720 if (error) 1721 return (error); 1722 return (EINVAL); 1723 } 1724 1725 if (luch.pid) 1726 return (EPERM); 1727 1728 if (args->datap) { 1729 /* 1730 * The current implementation doesn't support setting 1731 * a capability (it's essentially a stub) so indicate 1732 * that no capabilities are currently set or available 1733 * to request. 1734 */ 1735 bzero (&lucd, sizeof(lucd)); 1736 error = copyout(&lucd, args->datap, sizeof(lucd)); 1737 } 1738 1739 return (error); 1740} 1741 1742int 1743linux_capset(struct thread *td, struct linux_capset_args *args) 1744{ 1745 struct l_user_cap_header luch; 1746 struct l_user_cap_data lucd; 1747 int error; 1748 1749 if (args->hdrp == NULL || args->datap == NULL) 1750 return (EFAULT); 1751 1752 error = copyin(args->hdrp, &luch, sizeof(luch)); 1753 if (error != 0) 1754 return (error); 1755 1756 if (luch.version != _LINUX_CAPABILITY_VERSION) { 1757 luch.version = _LINUX_CAPABILITY_VERSION; 1758 error = copyout(&luch, args->hdrp, sizeof(luch)); 1759 if (error) 1760 return (error); 1761 return (EINVAL); 1762 } 1763 1764 if (luch.pid) 1765 return (EPERM); 1766 1767 error = copyin(args->datap, &lucd, sizeof(lucd)); 1768 if (error != 0) 1769 return (error); 1770 1771 /* We currently don't support setting any capabilities. */ 1772 if (lucd.effective || lucd.permitted || lucd.inheritable) { 1773 linux_msg(td, 1774 "capset effective=0x%x, permitted=0x%x, " 1775 "inheritable=0x%x is not implemented", 1776 (int)lucd.effective, (int)lucd.permitted, 1777 (int)lucd.inheritable); 1778 return (EPERM); 1779 } 1780 1781 return (0); 1782} 1783 1784int 1785linux_prctl(struct thread *td, struct linux_prctl_args *args) 1786{ 1787 int error = 0, max_size; 1788 struct proc *p = td->td_proc; 1789 char comm[LINUX_MAX_COMM_LEN]; 1790 struct linux_emuldata *em; 1791 int pdeath_signal; 1792 1793#ifdef DEBUG 1794 if (ldebug(prctl)) 1795 printf(ARGS(prctl, "%d, %d, %d, %d, %d"), args->option, 1796 args->arg2, args->arg3, args->arg4, args->arg5); 1797#endif 1798 1799 switch (args->option) { 1800 case LINUX_PR_SET_PDEATHSIG: 1801 if (!LINUX_SIG_VALID(args->arg2)) 1802 return (EINVAL); 1803 em = em_find(p, EMUL_DOLOCK); 1804 KASSERT(em != NULL, ("prctl: emuldata not found.\n")); 1805 em->pdeath_signal = args->arg2; 1806 EMUL_UNLOCK(&emul_lock); 1807 break; 1808 case LINUX_PR_GET_PDEATHSIG: 1809 em = em_find(p, EMUL_DOLOCK); 1810 KASSERT(em != NULL, ("prctl: emuldata not found.\n")); 1811 pdeath_signal = em->pdeath_signal; 1812 EMUL_UNLOCK(&emul_lock); 1813 error = copyout(&pdeath_signal, 1814 (void *)(register_t)args->arg2, 1815 sizeof(pdeath_signal)); 1816 break; 1817 case LINUX_PR_GET_KEEPCAPS: 1818 /* 1819 * Indicate that we always clear the effective and 1820 * permitted capability sets when the user id becomes 1821 * non-zero (actually the capability sets are simply 1822 * always zero in the current implementation). 1823 */ 1824 td->td_retval[0] = 0; 1825 break; 1826 case LINUX_PR_SET_KEEPCAPS: 1827 /* 1828 * Ignore requests to keep the effective and permitted 1829 * capability sets when the user id becomes non-zero. 1830 */ 1831 break; 1832 case LINUX_PR_SET_NAME: 1833 /* 1834 * To be on the safe side we need to make sure to not 1835 * overflow the size a linux program expects. We already 1836 * do this here in the copyin, so that we don't need to 1837 * check on copyout. 1838 */ 1839 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1840 error = copyinstr((void *)(register_t)args->arg2, comm, 1841 max_size, NULL); 1842 1843 /* Linux silently truncates the name if it is too long. */ 1844 if (error == ENAMETOOLONG) { 1845 /* 1846 * XXX: copyinstr() isn't documented to populate the 1847 * array completely, so do a copyin() to be on the 1848 * safe side. This should be changed in case 1849 * copyinstr() is changed to guarantee this. 1850 */ 1851 error = copyin((void *)(register_t)args->arg2, comm, 1852 max_size - 1); 1853 comm[max_size - 1] = '\0'; 1854 } 1855 if (error) 1856 return (error); 1857 1858 PROC_LOCK(p); 1859 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1860 PROC_UNLOCK(p); 1861 break; 1862 case LINUX_PR_GET_NAME: 1863 PROC_LOCK(p); 1864 strlcpy(comm, p->p_comm, sizeof(comm)); 1865 PROC_UNLOCK(p); 1866 error = copyout(comm, (void *)(register_t)args->arg2, 1867 strlen(comm) + 1); 1868 break; 1869 default: 1870 error = EINVAL; 1871 break; 1872 } 1873 1874 return (error); 1875} 1876 1877/* 1878 * Get affinity of a process. 1879 */ 1880int 1881linux_sched_getaffinity(struct thread *td, 1882 struct linux_sched_getaffinity_args *args) 1883{ 1884 int error; 1885 struct cpuset_getaffinity_args cga; 1886 1887#ifdef DEBUG 1888 if (ldebug(sched_getaffinity)) 1889 printf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid, 1890 args->len); 1891#endif 1892 if (args->len < sizeof(cpuset_t)) 1893 return (EINVAL); 1894 1895 cga.level = CPU_LEVEL_WHICH; 1896 cga.which = CPU_WHICH_PID; 1897 cga.id = args->pid; 1898 cga.cpusetsize = sizeof(cpuset_t); 1899 cga.mask = (cpuset_t *) args->user_mask_ptr; 1900 1901 if ((error = sys_cpuset_getaffinity(td, &cga)) == 0) 1902 td->td_retval[0] = sizeof(cpuset_t); 1903 1904 return (error); 1905} 1906 1907/* 1908 * Set affinity of a process. 1909 */ 1910int 1911linux_sched_setaffinity(struct thread *td, 1912 struct linux_sched_setaffinity_args *args) 1913{ 1914 struct cpuset_setaffinity_args csa; 1915 1916#ifdef DEBUG 1917 if (ldebug(sched_setaffinity)) 1918 printf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid, 1919 args->len); 1920#endif 1921 if (args->len < sizeof(cpuset_t)) 1922 return (EINVAL); 1923 1924 csa.level = CPU_LEVEL_WHICH; 1925 csa.which = CPU_WHICH_PID; 1926 csa.id = args->pid; 1927 csa.cpusetsize = sizeof(cpuset_t); 1928 csa.mask = (cpuset_t *) args->user_mask_ptr; 1929 1930 return (sys_cpuset_setaffinity(td, &csa)); 1931} 1932