1/*- 2 * Copyright (c) 2002 Doug Rabson 3 * Copyright (c) 1994-1995 S��ren Schmidt 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer 11 * in this position and unchanged. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: releng/10.3/sys/compat/linux/linux_misc.c 301052 2016-05-31 16:55:50Z glebius $"); 32 33#include "opt_compat.h" 34#include "opt_kdtrace.h" 35 36#include <sys/param.h> 37#include <sys/blist.h> 38#include <sys/fcntl.h> 39#if defined(__i386__) 40#include <sys/imgact_aout.h> 41#endif 42#include <sys/jail.h> 43#include <sys/kernel.h> 44#include <sys/limits.h> 45#include <sys/lock.h> 46#include <sys/malloc.h> 47#include <sys/mman.h> 48#include <sys/mount.h> 49#include <sys/mutex.h> 50#include <sys/namei.h> 51#include <sys/priv.h> 52#include <sys/proc.h> 53#include <sys/reboot.h> 54#include <sys/racct.h> 55#include <sys/resourcevar.h> 56#include <sys/sched.h> 57#include <sys/sdt.h> 58#include <sys/signalvar.h> 59#include <sys/stat.h> 60#include <sys/syscallsubr.h> 61#include <sys/sysctl.h> 62#include <sys/sysproto.h> 63#include <sys/systm.h> 64#include <sys/time.h> 65#include <sys/vmmeter.h> 66#include <sys/vnode.h> 67#include <sys/wait.h> 68#include <sys/cpuset.h> 69 70#include <security/mac/mac_framework.h> 71 72#include <vm/vm.h> 73#include <vm/pmap.h> 74#include <vm/vm_kern.h> 75#include <vm/vm_map.h> 76#include <vm/vm_extern.h> 77#include <vm/vm_object.h> 78#include <vm/swap_pager.h> 79 80#ifdef COMPAT_LINUX32 81#include <machine/../linux32/linux.h> 82#include <machine/../linux32/linux32_proto.h> 83#else 84#include <machine/../linux/linux.h> 85#include <machine/../linux/linux_proto.h> 86#endif 87 88#include <compat/linux/linux_dtrace.h> 89#include <compat/linux/linux_file.h> 90#include <compat/linux/linux_mib.h> 91#include <compat/linux/linux_signal.h> 92#include <compat/linux/linux_timer.h> 93#include <compat/linux/linux_util.h> 94#include <compat/linux/linux_sysproto.h> 95#include <compat/linux/linux_emul.h> 96#include <compat/linux/linux_misc.h> 97 98/** 99 * Special DTrace provider for the linuxulator. 100 * 101 * In this file we define the provider for the entire linuxulator. All 102 * modules (= files of the linuxulator) use it. 103 * 104 * We define a different name depending on the emulated bitsize, see 105 * ../../<ARCH>/linux{,32}/linux.h, e.g.: 106 * native bitsize = linuxulator 107 * amd64, 32bit emulation = linuxulator32 108 */ 109LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); 110 111int stclohz; /* Statistics clock frequency */ 112 113static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 114 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 115 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 116 RLIMIT_MEMLOCK, RLIMIT_AS 117}; 118 119struct l_sysinfo { 120 l_long uptime; /* Seconds since boot */ 121 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 122#define LINUX_SYSINFO_LOADS_SCALE 65536 123 l_ulong totalram; /* Total usable main memory size */ 124 l_ulong freeram; /* Available memory size */ 125 l_ulong sharedram; /* Amount of shared memory */ 126 l_ulong bufferram; /* Memory used by buffers */ 127 l_ulong totalswap; /* Total swap space size */ 128 l_ulong freeswap; /* swap space still available */ 129 l_ushort procs; /* Number of current processes */ 130 l_ushort pads; 131 l_ulong totalbig; 132 l_ulong freebig; 133 l_uint mem_unit; 134 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 135}; 136 137struct l_pselect6arg { 138 l_uintptr_t ss; 139 l_size_t ss_len; 140}; 141 142static int linux_utimensat_nsec_valid(l_long); 143 144 145int 146linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 147{ 148 struct l_sysinfo sysinfo; 149 vm_object_t object; 150 int i, j; 151 struct timespec ts; 152 153 bzero(&sysinfo, sizeof(sysinfo)); 154 getnanouptime(&ts); 155 if (ts.tv_nsec != 0) 156 ts.tv_sec++; 157 sysinfo.uptime = ts.tv_sec; 158 159 /* Use the information from the mib to get our load averages */ 160 for (i = 0; i < 3; i++) 161 sysinfo.loads[i] = averunnable.ldavg[i] * 162 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 163 164 sysinfo.totalram = physmem * PAGE_SIZE; 165 sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE; 166 167 sysinfo.sharedram = 0; 168 mtx_lock(&vm_object_list_mtx); 169 TAILQ_FOREACH(object, &vm_object_list, object_list) 170 if (object->shadow_count > 1) 171 sysinfo.sharedram += object->resident_page_count; 172 mtx_unlock(&vm_object_list_mtx); 173 174 sysinfo.sharedram *= PAGE_SIZE; 175 sysinfo.bufferram = 0; 176 177 swap_pager_status(&i, &j); 178 sysinfo.totalswap = i * PAGE_SIZE; 179 sysinfo.freeswap = (i - j) * PAGE_SIZE; 180 181 sysinfo.procs = nprocs; 182 183 /* The following are only present in newer Linux kernels. */ 184 sysinfo.totalbig = 0; 185 sysinfo.freebig = 0; 186 sysinfo.mem_unit = 1; 187 188 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 189} 190 191int 192linux_alarm(struct thread *td, struct linux_alarm_args *args) 193{ 194 struct itimerval it, old_it; 195 u_int secs; 196 int error; 197 198#ifdef DEBUG 199 if (ldebug(alarm)) 200 printf(ARGS(alarm, "%u"), args->secs); 201#endif 202 203 secs = args->secs; 204 205 if (secs > INT_MAX) 206 secs = INT_MAX; 207 208 it.it_value.tv_sec = (long) secs; 209 it.it_value.tv_usec = 0; 210 it.it_interval.tv_sec = 0; 211 it.it_interval.tv_usec = 0; 212 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 213 if (error) 214 return (error); 215 if (timevalisset(&old_it.it_value)) { 216 if (old_it.it_value.tv_usec != 0) 217 old_it.it_value.tv_sec++; 218 td->td_retval[0] = old_it.it_value.tv_sec; 219 } 220 return (0); 221} 222 223int 224linux_brk(struct thread *td, struct linux_brk_args *args) 225{ 226 struct vmspace *vm = td->td_proc->p_vmspace; 227 vm_offset_t new, old; 228 struct obreak_args /* { 229 char * nsize; 230 } */ tmp; 231 232#ifdef DEBUG 233 if (ldebug(brk)) 234 printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend); 235#endif 236 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 237 new = (vm_offset_t)args->dsend; 238 tmp.nsize = (char *)new; 239 if (((caddr_t)new > vm->vm_daddr) && !sys_obreak(td, &tmp)) 240 td->td_retval[0] = (long)new; 241 else 242 td->td_retval[0] = (long)old; 243 244 return (0); 245} 246 247#if defined(__i386__) 248/* XXX: what about amd64/linux32? */ 249 250int 251linux_uselib(struct thread *td, struct linux_uselib_args *args) 252{ 253 struct nameidata ni; 254 struct vnode *vp; 255 struct exec *a_out; 256 struct vattr attr; 257 vm_offset_t vmaddr; 258 unsigned long file_offset; 259 unsigned long bss_size; 260 char *library; 261 ssize_t aresid; 262 int error, locked, writecount; 263 264 LCONVPATHEXIST(td, args->library, &library); 265 266#ifdef DEBUG 267 if (ldebug(uselib)) 268 printf(ARGS(uselib, "%s"), library); 269#endif 270 271 a_out = NULL; 272 locked = 0; 273 vp = NULL; 274 275 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 276 UIO_SYSSPACE, library, td); 277 error = namei(&ni); 278 LFREEPATH(library); 279 if (error) 280 goto cleanup; 281 282 vp = ni.ni_vp; 283 NDFREE(&ni, NDF_ONLY_PNBUF); 284 285 /* 286 * From here on down, we have a locked vnode that must be unlocked. 287 * XXX: The code below largely duplicates exec_check_permissions(). 288 */ 289 locked = 1; 290 291 /* Writable? */ 292 error = VOP_GET_WRITECOUNT(vp, &writecount); 293 if (error != 0) 294 goto cleanup; 295 if (writecount != 0) { 296 error = ETXTBSY; 297 goto cleanup; 298 } 299 300 /* Executable? */ 301 error = VOP_GETATTR(vp, &attr, td->td_ucred); 302 if (error) 303 goto cleanup; 304 305 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 306 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 307 /* EACCESS is what exec(2) returns. */ 308 error = ENOEXEC; 309 goto cleanup; 310 } 311 312 /* Sensible size? */ 313 if (attr.va_size == 0) { 314 error = ENOEXEC; 315 goto cleanup; 316 } 317 318 /* Can we access it? */ 319 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 320 if (error) 321 goto cleanup; 322 323 /* 324 * XXX: This should use vn_open() so that it is properly authorized, 325 * and to reduce code redundancy all over the place here. 326 * XXX: Not really, it duplicates far more of exec_check_permissions() 327 * than vn_open(). 328 */ 329#ifdef MAC 330 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 331 if (error) 332 goto cleanup; 333#endif 334 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 335 if (error) 336 goto cleanup; 337 338 /* Pull in executable header into exec_map */ 339 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 340 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 341 if (error) 342 goto cleanup; 343 344 /* Is it a Linux binary ? */ 345 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 346 error = ENOEXEC; 347 goto cleanup; 348 } 349 350 /* 351 * While we are here, we should REALLY do some more checks 352 */ 353 354 /* Set file/virtual offset based on a.out variant. */ 355 switch ((int)(a_out->a_magic & 0xffff)) { 356 case 0413: /* ZMAGIC */ 357 file_offset = 1024; 358 break; 359 case 0314: /* QMAGIC */ 360 file_offset = 0; 361 break; 362 default: 363 error = ENOEXEC; 364 goto cleanup; 365 } 366 367 bss_size = round_page(a_out->a_bss); 368 369 /* Check various fields in header for validity/bounds. */ 370 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 371 error = ENOEXEC; 372 goto cleanup; 373 } 374 375 /* text + data can't exceed file size */ 376 if (a_out->a_data + a_out->a_text > attr.va_size) { 377 error = EFAULT; 378 goto cleanup; 379 } 380 381 /* 382 * text/data/bss must not exceed limits 383 * XXX - this is not complete. it should check current usage PLUS 384 * the resources needed by this library. 385 */ 386 PROC_LOCK(td->td_proc); 387 if (a_out->a_text > maxtsiz || 388 a_out->a_data + bss_size > lim_cur(td->td_proc, RLIMIT_DATA) || 389 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 390 bss_size) != 0) { 391 PROC_UNLOCK(td->td_proc); 392 error = ENOMEM; 393 goto cleanup; 394 } 395 PROC_UNLOCK(td->td_proc); 396 397 /* 398 * Prevent more writers. 399 * XXX: Note that if any of the VM operations fail below we don't 400 * clear this flag. 401 */ 402 VOP_SET_TEXT(vp); 403 404 /* 405 * Lock no longer needed 406 */ 407 locked = 0; 408 VOP_UNLOCK(vp, 0); 409 410 /* 411 * Check if file_offset page aligned. Currently we cannot handle 412 * misalinged file offsets, and so we read in the entire image 413 * (what a waste). 414 */ 415 if (file_offset & PAGE_MASK) { 416#ifdef DEBUG 417 printf("uselib: Non page aligned binary %lu\n", file_offset); 418#endif 419 /* Map text+data read/write/execute */ 420 421 /* a_entry is the load address and is page aligned */ 422 vmaddr = trunc_page(a_out->a_entry); 423 424 /* get anon user mapping, read+write+execute */ 425 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 426 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 427 VM_PROT_ALL, VM_PROT_ALL, 0); 428 if (error) 429 goto cleanup; 430 431 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 432 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 433 td->td_ucred, NOCRED, &aresid, td); 434 if (error != 0) 435 goto cleanup; 436 if (aresid != 0) { 437 error = ENOEXEC; 438 goto cleanup; 439 } 440 } else { 441#ifdef DEBUG 442 printf("uselib: Page aligned binary %lu\n", file_offset); 443#endif 444 /* 445 * for QMAGIC, a_entry is 20 bytes beyond the load address 446 * to skip the executable header 447 */ 448 vmaddr = trunc_page(a_out->a_entry); 449 450 /* 451 * Map it all into the process's space as a single 452 * copy-on-write "data" segment. 453 */ 454 error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr, 455 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 456 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 457 if (error) 458 goto cleanup; 459 } 460#ifdef DEBUG 461 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0], 462 ((long *)vmaddr)[1]); 463#endif 464 if (bss_size != 0) { 465 /* Calculate BSS start address */ 466 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 467 a_out->a_data; 468 469 /* allocate some 'anon' space */ 470 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 471 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 472 VM_PROT_ALL, 0); 473 if (error) 474 goto cleanup; 475 } 476 477cleanup: 478 /* Unlock vnode if needed */ 479 if (locked) 480 VOP_UNLOCK(vp, 0); 481 482 /* Release the temporary mapping. */ 483 if (a_out) 484 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 485 486 return (error); 487} 488 489#endif /* __i386__ */ 490 491int 492linux_select(struct thread *td, struct linux_select_args *args) 493{ 494 l_timeval ltv; 495 struct timeval tv0, tv1, utv, *tvp; 496 int error; 497 498#ifdef DEBUG 499 if (ldebug(select)) 500 printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, 501 (void *)args->readfds, (void *)args->writefds, 502 (void *)args->exceptfds, (void *)args->timeout); 503#endif 504 505 /* 506 * Store current time for computation of the amount of 507 * time left. 508 */ 509 if (args->timeout) { 510 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 511 goto select_out; 512 utv.tv_sec = ltv.tv_sec; 513 utv.tv_usec = ltv.tv_usec; 514#ifdef DEBUG 515 if (ldebug(select)) 516 printf(LMSG("incoming timeout (%jd/%ld)"), 517 (intmax_t)utv.tv_sec, utv.tv_usec); 518#endif 519 520 if (itimerfix(&utv)) { 521 /* 522 * The timeval was invalid. Convert it to something 523 * valid that will act as it does under Linux. 524 */ 525 utv.tv_sec += utv.tv_usec / 1000000; 526 utv.tv_usec %= 1000000; 527 if (utv.tv_usec < 0) { 528 utv.tv_sec -= 1; 529 utv.tv_usec += 1000000; 530 } 531 if (utv.tv_sec < 0) 532 timevalclear(&utv); 533 } 534 microtime(&tv0); 535 tvp = &utv; 536 } else 537 tvp = NULL; 538 539 error = kern_select(td, args->nfds, args->readfds, args->writefds, 540 args->exceptfds, tvp, LINUX_NFDBITS); 541 542#ifdef DEBUG 543 if (ldebug(select)) 544 printf(LMSG("real select returns %d"), error); 545#endif 546 if (error) 547 goto select_out; 548 549 if (args->timeout) { 550 if (td->td_retval[0]) { 551 /* 552 * Compute how much time was left of the timeout, 553 * by subtracting the current time and the time 554 * before we started the call, and subtracting 555 * that result from the user-supplied value. 556 */ 557 microtime(&tv1); 558 timevalsub(&tv1, &tv0); 559 timevalsub(&utv, &tv1); 560 if (utv.tv_sec < 0) 561 timevalclear(&utv); 562 } else 563 timevalclear(&utv); 564#ifdef DEBUG 565 if (ldebug(select)) 566 printf(LMSG("outgoing timeout (%jd/%ld)"), 567 (intmax_t)utv.tv_sec, utv.tv_usec); 568#endif 569 ltv.tv_sec = utv.tv_sec; 570 ltv.tv_usec = utv.tv_usec; 571 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 572 goto select_out; 573 } 574 575select_out: 576#ifdef DEBUG 577 if (ldebug(select)) 578 printf(LMSG("select_out -> %d"), error); 579#endif 580 return (error); 581} 582 583int 584linux_mremap(struct thread *td, struct linux_mremap_args *args) 585{ 586 struct munmap_args /* { 587 void *addr; 588 size_t len; 589 } */ bsd_args; 590 int error = 0; 591 592#ifdef DEBUG 593 if (ldebug(mremap)) 594 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), 595 (void *)(uintptr_t)args->addr, 596 (unsigned long)args->old_len, 597 (unsigned long)args->new_len, 598 (unsigned long)args->flags); 599#endif 600 601 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 602 td->td_retval[0] = 0; 603 return (EINVAL); 604 } 605 606 /* 607 * Check for the page alignment. 608 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 609 */ 610 if (args->addr & PAGE_MASK) { 611 td->td_retval[0] = 0; 612 return (EINVAL); 613 } 614 615 args->new_len = round_page(args->new_len); 616 args->old_len = round_page(args->old_len); 617 618 if (args->new_len > args->old_len) { 619 td->td_retval[0] = 0; 620 return (ENOMEM); 621 } 622 623 if (args->new_len < args->old_len) { 624 bsd_args.addr = 625 (caddr_t)((uintptr_t)args->addr + args->new_len); 626 bsd_args.len = args->old_len - args->new_len; 627 error = sys_munmap(td, &bsd_args); 628 } 629 630 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 631 return (error); 632} 633 634#define LINUX_MS_ASYNC 0x0001 635#define LINUX_MS_INVALIDATE 0x0002 636#define LINUX_MS_SYNC 0x0004 637 638int 639linux_msync(struct thread *td, struct linux_msync_args *args) 640{ 641 struct msync_args bsd_args; 642 643 bsd_args.addr = (caddr_t)(uintptr_t)args->addr; 644 bsd_args.len = (uintptr_t)args->len; 645 bsd_args.flags = args->fl & ~LINUX_MS_SYNC; 646 647 return (sys_msync(td, &bsd_args)); 648} 649 650int 651linux_time(struct thread *td, struct linux_time_args *args) 652{ 653 struct timeval tv; 654 l_time_t tm; 655 int error; 656 657#ifdef DEBUG 658 if (ldebug(time)) 659 printf(ARGS(time, "*")); 660#endif 661 662 microtime(&tv); 663 tm = tv.tv_sec; 664 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 665 return (error); 666 td->td_retval[0] = tm; 667 return (0); 668} 669 670struct l_times_argv { 671 l_clock_t tms_utime; 672 l_clock_t tms_stime; 673 l_clock_t tms_cutime; 674 l_clock_t tms_cstime; 675}; 676 677 678/* 679 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 680 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 681 * auxiliary vector entry. 682 */ 683#define CLK_TCK 100 684 685#define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 686#define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 687 688#define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 689 CONVNTCK(r) : CONVOTCK(r)) 690 691int 692linux_times(struct thread *td, struct linux_times_args *args) 693{ 694 struct timeval tv, utime, stime, cutime, cstime; 695 struct l_times_argv tms; 696 struct proc *p; 697 int error; 698 699#ifdef DEBUG 700 if (ldebug(times)) 701 printf(ARGS(times, "*")); 702#endif 703 704 if (args->buf != NULL) { 705 p = td->td_proc; 706 PROC_LOCK(p); 707 PROC_STATLOCK(p); 708 calcru(p, &utime, &stime); 709 PROC_STATUNLOCK(p); 710 calccru(p, &cutime, &cstime); 711 PROC_UNLOCK(p); 712 713 tms.tms_utime = CONVTCK(utime); 714 tms.tms_stime = CONVTCK(stime); 715 716 tms.tms_cutime = CONVTCK(cutime); 717 tms.tms_cstime = CONVTCK(cstime); 718 719 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 720 return (error); 721 } 722 723 microuptime(&tv); 724 td->td_retval[0] = (int)CONVTCK(tv); 725 return (0); 726} 727 728int 729linux_newuname(struct thread *td, struct linux_newuname_args *args) 730{ 731 struct l_new_utsname utsname; 732 char osname[LINUX_MAX_UTSNAME]; 733 char osrelease[LINUX_MAX_UTSNAME]; 734 char *p; 735 736#ifdef DEBUG 737 if (ldebug(newuname)) 738 printf(ARGS(newuname, "*")); 739#endif 740 741 linux_get_osname(td, osname); 742 linux_get_osrelease(td, osrelease); 743 744 bzero(&utsname, sizeof(utsname)); 745 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 746 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 747 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 748 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 749 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 750 for (p = utsname.version; *p != '\0'; ++p) 751 if (*p == '\n') { 752 *p = '\0'; 753 break; 754 } 755 strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); 756 757 return (copyout(&utsname, args->buf, sizeof(utsname))); 758} 759 760struct l_utimbuf { 761 l_time_t l_actime; 762 l_time_t l_modtime; 763}; 764 765int 766linux_utime(struct thread *td, struct linux_utime_args *args) 767{ 768 struct timeval tv[2], *tvp; 769 struct l_utimbuf lut; 770 char *fname; 771 int error; 772 773 LCONVPATHEXIST(td, args->fname, &fname); 774 775#ifdef DEBUG 776 if (ldebug(utime)) 777 printf(ARGS(utime, "%s, *"), fname); 778#endif 779 780 if (args->times) { 781 if ((error = copyin(args->times, &lut, sizeof lut))) { 782 LFREEPATH(fname); 783 return (error); 784 } 785 tv[0].tv_sec = lut.l_actime; 786 tv[0].tv_usec = 0; 787 tv[1].tv_sec = lut.l_modtime; 788 tv[1].tv_usec = 0; 789 tvp = tv; 790 } else 791 tvp = NULL; 792 793 error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 794 LFREEPATH(fname); 795 return (error); 796} 797 798int 799linux_utimes(struct thread *td, struct linux_utimes_args *args) 800{ 801 l_timeval ltv[2]; 802 struct timeval tv[2], *tvp = NULL; 803 char *fname; 804 int error; 805 806 LCONVPATHEXIST(td, args->fname, &fname); 807 808#ifdef DEBUG 809 if (ldebug(utimes)) 810 printf(ARGS(utimes, "%s, *"), fname); 811#endif 812 813 if (args->tptr != NULL) { 814 if ((error = copyin(args->tptr, ltv, sizeof ltv))) { 815 LFREEPATH(fname); 816 return (error); 817 } 818 tv[0].tv_sec = ltv[0].tv_sec; 819 tv[0].tv_usec = ltv[0].tv_usec; 820 tv[1].tv_sec = ltv[1].tv_sec; 821 tv[1].tv_usec = ltv[1].tv_usec; 822 tvp = tv; 823 } 824 825 error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 826 LFREEPATH(fname); 827 return (error); 828} 829 830static int 831linux_utimensat_nsec_valid(l_long nsec) 832{ 833 834 if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW) 835 return (0); 836 if (nsec >= 0 && nsec <= 999999999) 837 return (0); 838 return (1); 839} 840 841int 842linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 843{ 844 struct l_timespec l_times[2]; 845 struct timespec times[2], *timesp = NULL; 846 char *path = NULL; 847 int error, dfd, flags = 0; 848 849 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 850 851#ifdef DEBUG 852 if (ldebug(utimensat)) 853 printf(ARGS(utimensat, "%d, *"), dfd); 854#endif 855 856 if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW) 857 return (EINVAL); 858 859 if (args->times != NULL) { 860 error = copyin(args->times, l_times, sizeof(l_times)); 861 if (error != 0) 862 return (error); 863 864 if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 || 865 linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0) 866 return (EINVAL); 867 868 times[0].tv_sec = l_times[0].tv_sec; 869 switch (l_times[0].tv_nsec) 870 { 871 case LINUX_UTIME_OMIT: 872 times[0].tv_nsec = UTIME_OMIT; 873 break; 874 case LINUX_UTIME_NOW: 875 times[0].tv_nsec = UTIME_NOW; 876 break; 877 default: 878 times[0].tv_nsec = l_times[0].tv_nsec; 879 } 880 881 times[1].tv_sec = l_times[1].tv_sec; 882 switch (l_times[1].tv_nsec) 883 { 884 case LINUX_UTIME_OMIT: 885 times[1].tv_nsec = UTIME_OMIT; 886 break; 887 case LINUX_UTIME_NOW: 888 times[1].tv_nsec = UTIME_NOW; 889 break; 890 default: 891 times[1].tv_nsec = l_times[1].tv_nsec; 892 break; 893 } 894 timesp = times; 895 } 896 897 if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) 898 /* This breaks POSIX, but is what the Linux kernel does 899 * _on purpose_ (documented in the man page for utimensat(2)), 900 * so we must follow that behaviour. */ 901 return (0); 902 903 if (args->pathname != NULL) 904 LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); 905 else if (args->flags != 0) 906 return (EINVAL); 907 908 if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) 909 flags |= AT_SYMLINK_NOFOLLOW; 910 911 if (path == NULL) 912 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 913 else { 914 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 915 UIO_SYSSPACE, flags); 916 LFREEPATH(path); 917 } 918 919 return (error); 920} 921 922int 923linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 924{ 925 l_timeval ltv[2]; 926 struct timeval tv[2], *tvp = NULL; 927 char *fname; 928 int error, dfd; 929 930 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 931 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 932 933#ifdef DEBUG 934 if (ldebug(futimesat)) 935 printf(ARGS(futimesat, "%s, *"), fname); 936#endif 937 938 if (args->utimes != NULL) { 939 if ((error = copyin(args->utimes, ltv, sizeof ltv))) { 940 LFREEPATH(fname); 941 return (error); 942 } 943 tv[0].tv_sec = ltv[0].tv_sec; 944 tv[0].tv_usec = ltv[0].tv_usec; 945 tv[1].tv_sec = ltv[1].tv_sec; 946 tv[1].tv_usec = ltv[1].tv_usec; 947 tvp = tv; 948 } 949 950 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 951 LFREEPATH(fname); 952 return (error); 953} 954 955int 956linux_common_wait(struct thread *td, int pid, int *status, 957 int options, struct rusage *ru) 958{ 959 int error, tmpstat; 960 961 error = kern_wait(td, pid, &tmpstat, options, ru); 962 if (error) 963 return (error); 964 965 if (status) { 966 tmpstat &= 0xffff; 967 if (WIFSIGNALED(tmpstat)) 968 tmpstat = (tmpstat & 0xffffff80) | 969 bsd_to_linux_signal(WTERMSIG(tmpstat)); 970 else if (WIFSTOPPED(tmpstat)) 971 tmpstat = (tmpstat & 0xffff00ff) | 972 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 973 else if (WIFCONTINUED(tmpstat)) 974 tmpstat = 0xffff; 975 error = copyout(&tmpstat, status, sizeof(int)); 976 } 977 978 return (error); 979} 980 981#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 982int 983linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 984{ 985 struct linux_wait4_args wait4_args; 986 987#ifdef DEBUG 988 if (ldebug(waitpid)) 989 printf(ARGS(waitpid, "%d, %p, %d"), 990 args->pid, (void *)args->status, args->options); 991#endif 992 993 wait4_args.pid = args->pid; 994 wait4_args.status = args->status; 995 wait4_args.options = args->options; 996 wait4_args.rusage = NULL; 997 998 return (linux_wait4(td, &wait4_args)); 999} 1000#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1001 1002int 1003linux_wait4(struct thread *td, struct linux_wait4_args *args) 1004{ 1005 int error, options; 1006 struct rusage ru, *rup; 1007 1008#ifdef DEBUG 1009 if (ldebug(wait4)) 1010 printf(ARGS(wait4, "%d, %p, %d, %p"), 1011 args->pid, (void *)args->status, args->options, 1012 (void *)args->rusage); 1013#endif 1014 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 1015 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 1016 return (EINVAL); 1017 1018 options = WEXITED; 1019 linux_to_bsd_waitopts(args->options, &options); 1020 1021 if (args->rusage != NULL) 1022 rup = &ru; 1023 else 1024 rup = NULL; 1025 error = linux_common_wait(td, args->pid, args->status, options, rup); 1026 if (error != 0) 1027 return (error); 1028 if (args->rusage != NULL) 1029 error = linux_copyout_rusage(&ru, args->rusage); 1030 return (error); 1031} 1032 1033int 1034linux_waitid(struct thread *td, struct linux_waitid_args *args) 1035{ 1036 int status, options, sig; 1037 struct __wrusage wru; 1038 siginfo_t siginfo; 1039 l_siginfo_t lsi; 1040 idtype_t idtype; 1041 struct proc *p; 1042 int error; 1043 1044 options = 0; 1045 linux_to_bsd_waitopts(args->options, &options); 1046 1047 if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) 1048 return (EINVAL); 1049 if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) 1050 return (EINVAL); 1051 1052 switch (args->idtype) { 1053 case LINUX_P_ALL: 1054 idtype = P_ALL; 1055 break; 1056 case LINUX_P_PID: 1057 if (args->id <= 0) 1058 return (EINVAL); 1059 idtype = P_PID; 1060 break; 1061 case LINUX_P_PGID: 1062 if (args->id <= 0) 1063 return (EINVAL); 1064 idtype = P_PGID; 1065 break; 1066 default: 1067 return (EINVAL); 1068 } 1069 1070 error = kern_wait6(td, idtype, args->id, &status, options, 1071 &wru, &siginfo); 1072 if (error != 0) 1073 return (error); 1074 if (args->rusage != NULL) { 1075 error = linux_copyout_rusage(&wru.wru_children, 1076 args->rusage); 1077 if (error != 0) 1078 return (error); 1079 } 1080 if (args->info != NULL) { 1081 p = td->td_proc; 1082 if (td->td_retval[0] == 0) 1083 bzero(&lsi, sizeof(lsi)); 1084 else { 1085 sig = bsd_to_linux_signal(siginfo.si_signo); 1086 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1087 } 1088 error = copyout(&lsi, args->info, sizeof(lsi)); 1089 } 1090 td->td_retval[0] = 0; 1091 1092 return (error); 1093} 1094 1095int 1096linux_mknod(struct thread *td, struct linux_mknod_args *args) 1097{ 1098 char *path; 1099 int error; 1100 1101 LCONVPATHCREAT(td, args->path, &path); 1102 1103#ifdef DEBUG 1104 if (ldebug(mknod)) 1105 printf(ARGS(mknod, "%s, %d, %ju"), path, args->mode, 1106 (uintmax_t)args->dev); 1107#endif 1108 1109 switch (args->mode & S_IFMT) { 1110 case S_IFIFO: 1111 case S_IFSOCK: 1112 error = kern_mkfifo(td, path, UIO_SYSSPACE, args->mode); 1113 break; 1114 1115 case S_IFCHR: 1116 case S_IFBLK: 1117 error = kern_mknod(td, path, UIO_SYSSPACE, args->mode, 1118 args->dev); 1119 break; 1120 1121 case S_IFDIR: 1122 error = EPERM; 1123 break; 1124 1125 case 0: 1126 args->mode |= S_IFREG; 1127 /* FALLTHROUGH */ 1128 case S_IFREG: 1129 error = kern_open(td, path, UIO_SYSSPACE, 1130 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1131 if (error == 0) 1132 kern_close(td, td->td_retval[0]); 1133 break; 1134 1135 default: 1136 error = EINVAL; 1137 break; 1138 } 1139 LFREEPATH(path); 1140 return (error); 1141} 1142 1143int 1144linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1145{ 1146 char *path; 1147 int error, dfd; 1148 1149 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1150 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 1151 1152#ifdef DEBUG 1153 if (ldebug(mknodat)) 1154 printf(ARGS(mknodat, "%s, %d, %d"), path, args->mode, args->dev); 1155#endif 1156 1157 switch (args->mode & S_IFMT) { 1158 case S_IFIFO: 1159 case S_IFSOCK: 1160 error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); 1161 break; 1162 1163 case S_IFCHR: 1164 case S_IFBLK: 1165 error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, 1166 args->dev); 1167 break; 1168 1169 case S_IFDIR: 1170 error = EPERM; 1171 break; 1172 1173 case 0: 1174 args->mode |= S_IFREG; 1175 /* FALLTHROUGH */ 1176 case S_IFREG: 1177 error = kern_openat(td, dfd, path, UIO_SYSSPACE, 1178 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1179 if (error == 0) 1180 kern_close(td, td->td_retval[0]); 1181 break; 1182 1183 default: 1184 error = EINVAL; 1185 break; 1186 } 1187 LFREEPATH(path); 1188 return (error); 1189} 1190 1191/* 1192 * UGH! This is just about the dumbest idea I've ever heard!! 1193 */ 1194int 1195linux_personality(struct thread *td, struct linux_personality_args *args) 1196{ 1197#ifdef DEBUG 1198 if (ldebug(personality)) 1199 printf(ARGS(personality, "%lu"), (unsigned long)args->per); 1200#endif 1201 if (args->per != 0) 1202 return (EINVAL); 1203 1204 /* Yes Jim, it's still a Linux... */ 1205 td->td_retval[0] = 0; 1206 return (0); 1207} 1208 1209struct l_itimerval { 1210 l_timeval it_interval; 1211 l_timeval it_value; 1212}; 1213 1214#define B2L_ITIMERVAL(bip, lip) \ 1215 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1216 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1217 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1218 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1219 1220int 1221linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1222{ 1223 int error; 1224 struct l_itimerval ls; 1225 struct itimerval aitv, oitv; 1226 1227#ifdef DEBUG 1228 if (ldebug(setitimer)) 1229 printf(ARGS(setitimer, "%p, %p"), 1230 (void *)uap->itv, (void *)uap->oitv); 1231#endif 1232 1233 if (uap->itv == NULL) { 1234 uap->itv = uap->oitv; 1235 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1236 } 1237 1238 error = copyin(uap->itv, &ls, sizeof(ls)); 1239 if (error != 0) 1240 return (error); 1241 B2L_ITIMERVAL(&aitv, &ls); 1242#ifdef DEBUG 1243 if (ldebug(setitimer)) { 1244 printf("setitimer: value: sec: %jd, usec: %ld\n", 1245 (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec); 1246 printf("setitimer: interval: sec: %jd, usec: %ld\n", 1247 (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec); 1248 } 1249#endif 1250 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1251 if (error != 0 || uap->oitv == NULL) 1252 return (error); 1253 B2L_ITIMERVAL(&ls, &oitv); 1254 1255 return (copyout(&ls, uap->oitv, sizeof(ls))); 1256} 1257 1258int 1259linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1260{ 1261 int error; 1262 struct l_itimerval ls; 1263 struct itimerval aitv; 1264 1265#ifdef DEBUG 1266 if (ldebug(getitimer)) 1267 printf(ARGS(getitimer, "%p"), (void *)uap->itv); 1268#endif 1269 error = kern_getitimer(td, uap->which, &aitv); 1270 if (error != 0) 1271 return (error); 1272 B2L_ITIMERVAL(&ls, &aitv); 1273 return (copyout(&ls, uap->itv, sizeof(ls))); 1274} 1275 1276#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1277int 1278linux_nice(struct thread *td, struct linux_nice_args *args) 1279{ 1280 struct setpriority_args bsd_args; 1281 1282 bsd_args.which = PRIO_PROCESS; 1283 bsd_args.who = 0; /* current process */ 1284 bsd_args.prio = args->inc; 1285 return (sys_setpriority(td, &bsd_args)); 1286} 1287#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1288 1289int 1290linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1291{ 1292 struct ucred *newcred, *oldcred; 1293 l_gid_t *linux_gidset; 1294 gid_t *bsd_gidset; 1295 int ngrp, error; 1296 struct proc *p; 1297 1298 ngrp = args->gidsetsize; 1299 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1300 return (EINVAL); 1301 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1302 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1303 if (error) 1304 goto out; 1305 newcred = crget(); 1306 crextend(newcred, ngrp + 1); 1307 p = td->td_proc; 1308 PROC_LOCK(p); 1309 oldcred = p->p_ucred; 1310 crcopy(newcred, oldcred); 1311 1312 /* 1313 * cr_groups[0] holds egid. Setting the whole set from 1314 * the supplied set will cause egid to be changed too. 1315 * Keep cr_groups[0] unchanged to prevent that. 1316 */ 1317 1318 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) { 1319 PROC_UNLOCK(p); 1320 crfree(newcred); 1321 goto out; 1322 } 1323 1324 if (ngrp > 0) { 1325 newcred->cr_ngroups = ngrp + 1; 1326 1327 bsd_gidset = newcred->cr_groups; 1328 ngrp--; 1329 while (ngrp >= 0) { 1330 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1331 ngrp--; 1332 } 1333 } else 1334 newcred->cr_ngroups = 1; 1335 1336 setsugid(p); 1337 p->p_ucred = newcred; 1338 PROC_UNLOCK(p); 1339 crfree(oldcred); 1340 error = 0; 1341out: 1342 free(linux_gidset, M_LINUX); 1343 return (error); 1344} 1345 1346int 1347linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1348{ 1349 struct ucred *cred; 1350 l_gid_t *linux_gidset; 1351 gid_t *bsd_gidset; 1352 int bsd_gidsetsz, ngrp, error; 1353 1354 cred = td->td_ucred; 1355 bsd_gidset = cred->cr_groups; 1356 bsd_gidsetsz = cred->cr_ngroups - 1; 1357 1358 /* 1359 * cr_groups[0] holds egid. Returning the whole set 1360 * here will cause a duplicate. Exclude cr_groups[0] 1361 * to prevent that. 1362 */ 1363 1364 if ((ngrp = args->gidsetsize) == 0) { 1365 td->td_retval[0] = bsd_gidsetsz; 1366 return (0); 1367 } 1368 1369 if (ngrp < bsd_gidsetsz) 1370 return (EINVAL); 1371 1372 ngrp = 0; 1373 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1374 M_LINUX, M_WAITOK); 1375 while (ngrp < bsd_gidsetsz) { 1376 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1377 ngrp++; 1378 } 1379 1380 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1381 free(linux_gidset, M_LINUX); 1382 if (error) 1383 return (error); 1384 1385 td->td_retval[0] = ngrp; 1386 return (0); 1387} 1388 1389int 1390linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1391{ 1392 struct rlimit bsd_rlim; 1393 struct l_rlimit rlim; 1394 u_int which; 1395 int error; 1396 1397#ifdef DEBUG 1398 if (ldebug(setrlimit)) 1399 printf(ARGS(setrlimit, "%d, %p"), 1400 args->resource, (void *)args->rlim); 1401#endif 1402 1403 if (args->resource >= LINUX_RLIM_NLIMITS) 1404 return (EINVAL); 1405 1406 which = linux_to_bsd_resource[args->resource]; 1407 if (which == -1) 1408 return (EINVAL); 1409 1410 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1411 if (error) 1412 return (error); 1413 1414 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1415 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1416 return (kern_setrlimit(td, which, &bsd_rlim)); 1417} 1418 1419#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1420int 1421linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1422{ 1423 struct l_rlimit rlim; 1424 struct proc *p = td->td_proc; 1425 struct rlimit bsd_rlim; 1426 u_int which; 1427 1428#ifdef DEBUG 1429 if (ldebug(old_getrlimit)) 1430 printf(ARGS(old_getrlimit, "%d, %p"), 1431 args->resource, (void *)args->rlim); 1432#endif 1433 1434 if (args->resource >= LINUX_RLIM_NLIMITS) 1435 return (EINVAL); 1436 1437 which = linux_to_bsd_resource[args->resource]; 1438 if (which == -1) 1439 return (EINVAL); 1440 1441 PROC_LOCK(p); 1442 lim_rlimit(p, which, &bsd_rlim); 1443 PROC_UNLOCK(p); 1444 1445#ifdef COMPAT_LINUX32 1446 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1447 if (rlim.rlim_cur == UINT_MAX) 1448 rlim.rlim_cur = INT_MAX; 1449 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1450 if (rlim.rlim_max == UINT_MAX) 1451 rlim.rlim_max = INT_MAX; 1452#else 1453 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1454 if (rlim.rlim_cur == ULONG_MAX) 1455 rlim.rlim_cur = LONG_MAX; 1456 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1457 if (rlim.rlim_max == ULONG_MAX) 1458 rlim.rlim_max = LONG_MAX; 1459#endif 1460 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1461} 1462#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1463 1464int 1465linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1466{ 1467 struct l_rlimit rlim; 1468 struct proc *p = td->td_proc; 1469 struct rlimit bsd_rlim; 1470 u_int which; 1471 1472#ifdef DEBUG 1473 if (ldebug(getrlimit)) 1474 printf(ARGS(getrlimit, "%d, %p"), 1475 args->resource, (void *)args->rlim); 1476#endif 1477 1478 if (args->resource >= LINUX_RLIM_NLIMITS) 1479 return (EINVAL); 1480 1481 which = linux_to_bsd_resource[args->resource]; 1482 if (which == -1) 1483 return (EINVAL); 1484 1485 PROC_LOCK(p); 1486 lim_rlimit(p, which, &bsd_rlim); 1487 PROC_UNLOCK(p); 1488 1489 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1490 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1491 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1492} 1493 1494int 1495linux_sched_setscheduler(struct thread *td, 1496 struct linux_sched_setscheduler_args *args) 1497{ 1498 struct sched_param sched_param; 1499 struct thread *tdt; 1500 int error, policy; 1501 1502#ifdef DEBUG 1503 if (ldebug(sched_setscheduler)) 1504 printf(ARGS(sched_setscheduler, "%d, %d, %p"), 1505 args->pid, args->policy, (const void *)args->param); 1506#endif 1507 1508 switch (args->policy) { 1509 case LINUX_SCHED_OTHER: 1510 policy = SCHED_OTHER; 1511 break; 1512 case LINUX_SCHED_FIFO: 1513 policy = SCHED_FIFO; 1514 break; 1515 case LINUX_SCHED_RR: 1516 policy = SCHED_RR; 1517 break; 1518 default: 1519 return (EINVAL); 1520 } 1521 1522 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1523 if (error) 1524 return (error); 1525 1526 tdt = linux_tdfind(td, args->pid, -1); 1527 if (tdt == NULL) 1528 return (ESRCH); 1529 1530 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1531 PROC_UNLOCK(tdt->td_proc); 1532 return (error); 1533} 1534 1535int 1536linux_sched_getscheduler(struct thread *td, 1537 struct linux_sched_getscheduler_args *args) 1538{ 1539 struct thread *tdt; 1540 int error, policy; 1541 1542#ifdef DEBUG 1543 if (ldebug(sched_getscheduler)) 1544 printf(ARGS(sched_getscheduler, "%d"), args->pid); 1545#endif 1546 1547 tdt = linux_tdfind(td, args->pid, -1); 1548 if (tdt == NULL) 1549 return (ESRCH); 1550 1551 error = kern_sched_getscheduler(td, tdt, &policy); 1552 PROC_UNLOCK(tdt->td_proc); 1553 1554 switch (policy) { 1555 case SCHED_OTHER: 1556 td->td_retval[0] = LINUX_SCHED_OTHER; 1557 break; 1558 case SCHED_FIFO: 1559 td->td_retval[0] = LINUX_SCHED_FIFO; 1560 break; 1561 case SCHED_RR: 1562 td->td_retval[0] = LINUX_SCHED_RR; 1563 break; 1564 } 1565 return (error); 1566} 1567 1568int 1569linux_sched_get_priority_max(struct thread *td, 1570 struct linux_sched_get_priority_max_args *args) 1571{ 1572 struct sched_get_priority_max_args bsd; 1573 1574#ifdef DEBUG 1575 if (ldebug(sched_get_priority_max)) 1576 printf(ARGS(sched_get_priority_max, "%d"), args->policy); 1577#endif 1578 1579 switch (args->policy) { 1580 case LINUX_SCHED_OTHER: 1581 bsd.policy = SCHED_OTHER; 1582 break; 1583 case LINUX_SCHED_FIFO: 1584 bsd.policy = SCHED_FIFO; 1585 break; 1586 case LINUX_SCHED_RR: 1587 bsd.policy = SCHED_RR; 1588 break; 1589 default: 1590 return (EINVAL); 1591 } 1592 return (sys_sched_get_priority_max(td, &bsd)); 1593} 1594 1595int 1596linux_sched_get_priority_min(struct thread *td, 1597 struct linux_sched_get_priority_min_args *args) 1598{ 1599 struct sched_get_priority_min_args bsd; 1600 1601#ifdef DEBUG 1602 if (ldebug(sched_get_priority_min)) 1603 printf(ARGS(sched_get_priority_min, "%d"), args->policy); 1604#endif 1605 1606 switch (args->policy) { 1607 case LINUX_SCHED_OTHER: 1608 bsd.policy = SCHED_OTHER; 1609 break; 1610 case LINUX_SCHED_FIFO: 1611 bsd.policy = SCHED_FIFO; 1612 break; 1613 case LINUX_SCHED_RR: 1614 bsd.policy = SCHED_RR; 1615 break; 1616 default: 1617 return (EINVAL); 1618 } 1619 return (sys_sched_get_priority_min(td, &bsd)); 1620} 1621 1622#define REBOOT_CAD_ON 0x89abcdef 1623#define REBOOT_CAD_OFF 0 1624#define REBOOT_HALT 0xcdef0123 1625#define REBOOT_RESTART 0x01234567 1626#define REBOOT_RESTART2 0xA1B2C3D4 1627#define REBOOT_POWEROFF 0x4321FEDC 1628#define REBOOT_MAGIC1 0xfee1dead 1629#define REBOOT_MAGIC2 0x28121969 1630#define REBOOT_MAGIC2A 0x05121996 1631#define REBOOT_MAGIC2B 0x16041998 1632 1633int 1634linux_reboot(struct thread *td, struct linux_reboot_args *args) 1635{ 1636 struct reboot_args bsd_args; 1637 1638#ifdef DEBUG 1639 if (ldebug(reboot)) 1640 printf(ARGS(reboot, "0x%x"), args->cmd); 1641#endif 1642 1643 if (args->magic1 != REBOOT_MAGIC1) 1644 return (EINVAL); 1645 1646 switch (args->magic2) { 1647 case REBOOT_MAGIC2: 1648 case REBOOT_MAGIC2A: 1649 case REBOOT_MAGIC2B: 1650 break; 1651 default: 1652 return (EINVAL); 1653 } 1654 1655 switch (args->cmd) { 1656 case REBOOT_CAD_ON: 1657 case REBOOT_CAD_OFF: 1658 return (priv_check(td, PRIV_REBOOT)); 1659 case REBOOT_HALT: 1660 bsd_args.opt = RB_HALT; 1661 break; 1662 case REBOOT_RESTART: 1663 case REBOOT_RESTART2: 1664 bsd_args.opt = 0; 1665 break; 1666 case REBOOT_POWEROFF: 1667 bsd_args.opt = RB_POWEROFF; 1668 break; 1669 default: 1670 return (EINVAL); 1671 } 1672 return (sys_reboot(td, &bsd_args)); 1673} 1674 1675 1676/* 1677 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify 1678 * td->td_retval[1] when COMPAT_43 is defined. This clobbers registers that 1679 * are assumed to be preserved. The following lightweight syscalls fixes 1680 * this. See also linux_getgid16() and linux_getuid16() in linux_uid16.c 1681 * 1682 * linux_getpid() - MP SAFE 1683 * linux_getgid() - MP SAFE 1684 * linux_getuid() - MP SAFE 1685 */ 1686 1687int 1688linux_getpid(struct thread *td, struct linux_getpid_args *args) 1689{ 1690 1691#ifdef DEBUG 1692 if (ldebug(getpid)) 1693 printf(ARGS(getpid, "")); 1694#endif 1695 td->td_retval[0] = td->td_proc->p_pid; 1696 1697 return (0); 1698} 1699 1700int 1701linux_gettid(struct thread *td, struct linux_gettid_args *args) 1702{ 1703 struct linux_emuldata *em; 1704 1705#ifdef DEBUG 1706 if (ldebug(gettid)) 1707 printf(ARGS(gettid, "")); 1708#endif 1709 1710 em = em_find(td); 1711 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1712 1713 td->td_retval[0] = em->em_tid; 1714 1715 return (0); 1716} 1717 1718 1719int 1720linux_getppid(struct thread *td, struct linux_getppid_args *args) 1721{ 1722 1723#ifdef DEBUG 1724 if (ldebug(getppid)) 1725 printf(ARGS(getppid, "")); 1726#endif 1727 1728 PROC_LOCK(td->td_proc); 1729 td->td_retval[0] = td->td_proc->p_pptr->p_pid; 1730 PROC_UNLOCK(td->td_proc); 1731 return (0); 1732} 1733 1734int 1735linux_getgid(struct thread *td, struct linux_getgid_args *args) 1736{ 1737 1738#ifdef DEBUG 1739 if (ldebug(getgid)) 1740 printf(ARGS(getgid, "")); 1741#endif 1742 1743 td->td_retval[0] = td->td_ucred->cr_rgid; 1744 return (0); 1745} 1746 1747int 1748linux_getuid(struct thread *td, struct linux_getuid_args *args) 1749{ 1750 1751#ifdef DEBUG 1752 if (ldebug(getuid)) 1753 printf(ARGS(getuid, "")); 1754#endif 1755 1756 td->td_retval[0] = td->td_ucred->cr_ruid; 1757 return (0); 1758} 1759 1760 1761int 1762linux_getsid(struct thread *td, struct linux_getsid_args *args) 1763{ 1764 struct getsid_args bsd; 1765 1766#ifdef DEBUG 1767 if (ldebug(getsid)) 1768 printf(ARGS(getsid, "%i"), args->pid); 1769#endif 1770 1771 bsd.pid = args->pid; 1772 return (sys_getsid(td, &bsd)); 1773} 1774 1775int 1776linux_nosys(struct thread *td, struct nosys_args *ignore) 1777{ 1778 1779 return (ENOSYS); 1780} 1781 1782int 1783linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1784{ 1785 struct getpriority_args bsd_args; 1786 int error; 1787 1788#ifdef DEBUG 1789 if (ldebug(getpriority)) 1790 printf(ARGS(getpriority, "%i, %i"), args->which, args->who); 1791#endif 1792 1793 bsd_args.which = args->which; 1794 bsd_args.who = args->who; 1795 error = sys_getpriority(td, &bsd_args); 1796 td->td_retval[0] = 20 - td->td_retval[0]; 1797 return (error); 1798} 1799 1800int 1801linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1802{ 1803 int name[2]; 1804 1805#ifdef DEBUG 1806 if (ldebug(sethostname)) 1807 printf(ARGS(sethostname, "*, %i"), args->len); 1808#endif 1809 1810 name[0] = CTL_KERN; 1811 name[1] = KERN_HOSTNAME; 1812 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1813 args->len, 0, 0)); 1814} 1815 1816int 1817linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1818{ 1819 int name[2]; 1820 1821#ifdef DEBUG 1822 if (ldebug(setdomainname)) 1823 printf(ARGS(setdomainname, "*, %i"), args->len); 1824#endif 1825 1826 name[0] = CTL_KERN; 1827 name[1] = KERN_NISDOMAINNAME; 1828 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1829 args->len, 0, 0)); 1830} 1831 1832int 1833linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1834{ 1835 1836#ifdef DEBUG 1837 if (ldebug(exit_group)) 1838 printf(ARGS(exit_group, "%i"), args->error_code); 1839#endif 1840 1841 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1842 args->error_code); 1843 1844 /* 1845 * XXX: we should send a signal to the parent if 1846 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1847 * as it doesnt occur often. 1848 */ 1849 exit1(td, W_EXITCODE(args->error_code, 0)); 1850 /* NOTREACHED */ 1851} 1852 1853#define _LINUX_CAPABILITY_VERSION 0x19980330 1854 1855struct l_user_cap_header { 1856 l_int version; 1857 l_int pid; 1858}; 1859 1860struct l_user_cap_data { 1861 l_int effective; 1862 l_int permitted; 1863 l_int inheritable; 1864}; 1865 1866int 1867linux_capget(struct thread *td, struct linux_capget_args *args) 1868{ 1869 struct l_user_cap_header luch; 1870 struct l_user_cap_data lucd; 1871 int error; 1872 1873 if (args->hdrp == NULL) 1874 return (EFAULT); 1875 1876 error = copyin(args->hdrp, &luch, sizeof(luch)); 1877 if (error != 0) 1878 return (error); 1879 1880 if (luch.version != _LINUX_CAPABILITY_VERSION) { 1881 luch.version = _LINUX_CAPABILITY_VERSION; 1882 error = copyout(&luch, args->hdrp, sizeof(luch)); 1883 if (error) 1884 return (error); 1885 return (EINVAL); 1886 } 1887 1888 if (luch.pid) 1889 return (EPERM); 1890 1891 if (args->datap) { 1892 /* 1893 * The current implementation doesn't support setting 1894 * a capability (it's essentially a stub) so indicate 1895 * that no capabilities are currently set or available 1896 * to request. 1897 */ 1898 bzero (&lucd, sizeof(lucd)); 1899 error = copyout(&lucd, args->datap, sizeof(lucd)); 1900 } 1901 1902 return (error); 1903} 1904 1905int 1906linux_capset(struct thread *td, struct linux_capset_args *args) 1907{ 1908 struct l_user_cap_header luch; 1909 struct l_user_cap_data lucd; 1910 int error; 1911 1912 if (args->hdrp == NULL || args->datap == NULL) 1913 return (EFAULT); 1914 1915 error = copyin(args->hdrp, &luch, sizeof(luch)); 1916 if (error != 0) 1917 return (error); 1918 1919 if (luch.version != _LINUX_CAPABILITY_VERSION) { 1920 luch.version = _LINUX_CAPABILITY_VERSION; 1921 error = copyout(&luch, args->hdrp, sizeof(luch)); 1922 if (error) 1923 return (error); 1924 return (EINVAL); 1925 } 1926 1927 if (luch.pid) 1928 return (EPERM); 1929 1930 error = copyin(args->datap, &lucd, sizeof(lucd)); 1931 if (error != 0) 1932 return (error); 1933 1934 /* We currently don't support setting any capabilities. */ 1935 if (lucd.effective || lucd.permitted || lucd.inheritable) { 1936 linux_msg(td, 1937 "capset effective=0x%x, permitted=0x%x, " 1938 "inheritable=0x%x is not implemented", 1939 (int)lucd.effective, (int)lucd.permitted, 1940 (int)lucd.inheritable); 1941 return (EPERM); 1942 } 1943 1944 return (0); 1945} 1946 1947int 1948linux_prctl(struct thread *td, struct linux_prctl_args *args) 1949{ 1950 int error = 0, max_size; 1951 struct proc *p = td->td_proc; 1952 char comm[LINUX_MAX_COMM_LEN]; 1953 struct linux_emuldata *em; 1954 int pdeath_signal; 1955 1956#ifdef DEBUG 1957 if (ldebug(prctl)) 1958 printf(ARGS(prctl, "%d, %ju, %ju, %ju, %ju"), args->option, 1959 (uintmax_t)args->arg2, (uintmax_t)args->arg3, 1960 (uintmax_t)args->arg4, (uintmax_t)args->arg5); 1961#endif 1962 1963 switch (args->option) { 1964 case LINUX_PR_SET_PDEATHSIG: 1965 if (!LINUX_SIG_VALID(args->arg2)) 1966 return (EINVAL); 1967 em = em_find(td); 1968 KASSERT(em != NULL, ("prctl: emuldata not found.\n")); 1969 em->pdeath_signal = args->arg2; 1970 break; 1971 case LINUX_PR_GET_PDEATHSIG: 1972 em = em_find(td); 1973 KASSERT(em != NULL, ("prctl: emuldata not found.\n")); 1974 pdeath_signal = em->pdeath_signal; 1975 error = copyout(&pdeath_signal, 1976 (void *)(register_t)args->arg2, 1977 sizeof(pdeath_signal)); 1978 break; 1979 case LINUX_PR_GET_KEEPCAPS: 1980 /* 1981 * Indicate that we always clear the effective and 1982 * permitted capability sets when the user id becomes 1983 * non-zero (actually the capability sets are simply 1984 * always zero in the current implementation). 1985 */ 1986 td->td_retval[0] = 0; 1987 break; 1988 case LINUX_PR_SET_KEEPCAPS: 1989 /* 1990 * Ignore requests to keep the effective and permitted 1991 * capability sets when the user id becomes non-zero. 1992 */ 1993 break; 1994 case LINUX_PR_SET_NAME: 1995 /* 1996 * To be on the safe side we need to make sure to not 1997 * overflow the size a linux program expects. We already 1998 * do this here in the copyin, so that we don't need to 1999 * check on copyout. 2000 */ 2001 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 2002 error = copyinstr((void *)(register_t)args->arg2, comm, 2003 max_size, NULL); 2004 2005 /* Linux silently truncates the name if it is too long. */ 2006 if (error == ENAMETOOLONG) { 2007 /* 2008 * XXX: copyinstr() isn't documented to populate the 2009 * array completely, so do a copyin() to be on the 2010 * safe side. This should be changed in case 2011 * copyinstr() is changed to guarantee this. 2012 */ 2013 error = copyin((void *)(register_t)args->arg2, comm, 2014 max_size - 1); 2015 comm[max_size - 1] = '\0'; 2016 } 2017 if (error) 2018 return (error); 2019 2020 PROC_LOCK(p); 2021 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 2022 PROC_UNLOCK(p); 2023 break; 2024 case LINUX_PR_GET_NAME: 2025 PROC_LOCK(p); 2026 strlcpy(comm, p->p_comm, sizeof(comm)); 2027 PROC_UNLOCK(p); 2028 error = copyout(comm, (void *)(register_t)args->arg2, 2029 strlen(comm) + 1); 2030 break; 2031 default: 2032 error = EINVAL; 2033 break; 2034 } 2035 2036 return (error); 2037} 2038 2039int 2040linux_sched_setparam(struct thread *td, 2041 struct linux_sched_setparam_args *uap) 2042{ 2043 struct sched_param sched_param; 2044 struct thread *tdt; 2045 int error; 2046 2047#ifdef DEBUG 2048 if (ldebug(sched_setparam)) 2049 printf(ARGS(sched_setparam, "%d, *"), uap->pid); 2050#endif 2051 2052 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 2053 if (error) 2054 return (error); 2055 2056 tdt = linux_tdfind(td, uap->pid, -1); 2057 if (tdt == NULL) 2058 return (ESRCH); 2059 2060 error = kern_sched_setparam(td, tdt, &sched_param); 2061 PROC_UNLOCK(tdt->td_proc); 2062 return (error); 2063} 2064 2065int 2066linux_sched_getparam(struct thread *td, 2067 struct linux_sched_getparam_args *uap) 2068{ 2069 struct sched_param sched_param; 2070 struct thread *tdt; 2071 int error; 2072 2073#ifdef DEBUG 2074 if (ldebug(sched_getparam)) 2075 printf(ARGS(sched_getparam, "%d, *"), uap->pid); 2076#endif 2077 2078 tdt = linux_tdfind(td, uap->pid, -1); 2079 if (tdt == NULL) 2080 return (ESRCH); 2081 2082 error = kern_sched_getparam(td, tdt, &sched_param); 2083 PROC_UNLOCK(tdt->td_proc); 2084 if (error == 0) 2085 error = copyout(&sched_param, uap->param, 2086 sizeof(sched_param)); 2087 return (error); 2088} 2089 2090/* 2091 * Get affinity of a process. 2092 */ 2093int 2094linux_sched_getaffinity(struct thread *td, 2095 struct linux_sched_getaffinity_args *args) 2096{ 2097 int error; 2098 struct thread *tdt; 2099 struct cpuset_getaffinity_args cga; 2100 2101#ifdef DEBUG 2102 if (ldebug(sched_getaffinity)) 2103 printf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid, 2104 args->len); 2105#endif 2106 if (args->len < sizeof(cpuset_t)) 2107 return (EINVAL); 2108 2109 tdt = linux_tdfind(td, args->pid, -1); 2110 if (tdt == NULL) 2111 return (ESRCH); 2112 2113 PROC_UNLOCK(tdt->td_proc); 2114 cga.level = CPU_LEVEL_WHICH; 2115 cga.which = CPU_WHICH_TID; 2116 cga.id = tdt->td_tid; 2117 cga.cpusetsize = sizeof(cpuset_t); 2118 cga.mask = (cpuset_t *) args->user_mask_ptr; 2119 2120 if ((error = sys_cpuset_getaffinity(td, &cga)) == 0) 2121 td->td_retval[0] = sizeof(cpuset_t); 2122 2123 return (error); 2124} 2125 2126/* 2127 * Set affinity of a process. 2128 */ 2129int 2130linux_sched_setaffinity(struct thread *td, 2131 struct linux_sched_setaffinity_args *args) 2132{ 2133 struct cpuset_setaffinity_args csa; 2134 struct thread *tdt; 2135 2136#ifdef DEBUG 2137 if (ldebug(sched_setaffinity)) 2138 printf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid, 2139 args->len); 2140#endif 2141 if (args->len < sizeof(cpuset_t)) 2142 return (EINVAL); 2143 2144 tdt = linux_tdfind(td, args->pid, -1); 2145 if (tdt == NULL) 2146 return (ESRCH); 2147 2148 PROC_UNLOCK(tdt->td_proc); 2149 csa.level = CPU_LEVEL_WHICH; 2150 csa.which = CPU_WHICH_TID; 2151 csa.id = tdt->td_tid; 2152 csa.cpusetsize = sizeof(cpuset_t); 2153 csa.mask = (cpuset_t *) args->user_mask_ptr; 2154 2155 return (sys_cpuset_setaffinity(td, &csa)); 2156} 2157 2158struct linux_rlimit64 { 2159 uint64_t rlim_cur; 2160 uint64_t rlim_max; 2161}; 2162 2163int 2164linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2165{ 2166 struct rlimit rlim, nrlim; 2167 struct linux_rlimit64 lrlim; 2168 struct proc *p; 2169 u_int which; 2170 int flags; 2171 int error; 2172 2173#ifdef DEBUG 2174 if (ldebug(prlimit64)) 2175 printf(ARGS(prlimit64, "%d, %d, %p, %p"), args->pid, 2176 args->resource, (void *)args->new, (void *)args->old); 2177#endif 2178 2179 if (args->resource >= LINUX_RLIM_NLIMITS) 2180 return (EINVAL); 2181 2182 which = linux_to_bsd_resource[args->resource]; 2183 if (which == -1) 2184 return (EINVAL); 2185 2186 if (args->new != NULL) { 2187 /* 2188 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2189 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2190 * as INFINITY so we do not need a conversion even. 2191 */ 2192 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2193 if (error != 0) 2194 return (error); 2195 } 2196 2197 flags = PGET_HOLD | PGET_NOTWEXIT; 2198 if (args->new != NULL) 2199 flags |= PGET_CANDEBUG; 2200 else 2201 flags |= PGET_CANSEE; 2202 error = pget(args->pid, flags, &p); 2203 if (error != 0) 2204 return (error); 2205 2206 if (args->old != NULL) { 2207 PROC_LOCK(p); 2208 lim_rlimit(p, which, &rlim); 2209 PROC_UNLOCK(p); 2210 if (rlim.rlim_cur == RLIM_INFINITY) 2211 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2212 else 2213 lrlim.rlim_cur = rlim.rlim_cur; 2214 if (rlim.rlim_max == RLIM_INFINITY) 2215 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2216 else 2217 lrlim.rlim_max = rlim.rlim_max; 2218 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2219 if (error != 0) 2220 goto out; 2221 } 2222 2223 if (args->new != NULL) 2224 error = kern_proc_setrlimit(td, p, which, &nrlim); 2225 2226 out: 2227 PRELE(p); 2228 return (error); 2229} 2230 2231int 2232linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2233{ 2234 struct timeval utv, tv0, tv1, *tvp; 2235 struct l_pselect6arg lpse6; 2236 struct l_timespec lts; 2237 struct timespec uts; 2238 l_sigset_t l_ss; 2239 sigset_t *ssp; 2240 sigset_t ss; 2241 int error; 2242 2243 ssp = NULL; 2244 if (args->sig != NULL) { 2245 error = copyin(args->sig, &lpse6, sizeof(lpse6)); 2246 if (error != 0) 2247 return (error); 2248 if (lpse6.ss_len != sizeof(l_ss)) 2249 return (EINVAL); 2250 if (lpse6.ss != 0) { 2251 error = copyin(PTRIN(lpse6.ss), &l_ss, 2252 sizeof(l_ss)); 2253 if (error != 0) 2254 return (error); 2255 linux_to_bsd_sigset(&l_ss, &ss); 2256 ssp = &ss; 2257 } 2258 } 2259 2260 /* 2261 * Currently glibc changes nanosecond number to microsecond. 2262 * This mean losing precision but for now it is hardly seen. 2263 */ 2264 if (args->tsp != NULL) { 2265 error = copyin(args->tsp, <s, sizeof(lts)); 2266 if (error != 0) 2267 return (error); 2268 error = linux_to_native_timespec(&uts, <s); 2269 if (error != 0) 2270 return (error); 2271 2272 TIMESPEC_TO_TIMEVAL(&utv, &uts); 2273 if (itimerfix(&utv)) 2274 return (EINVAL); 2275 2276 microtime(&tv0); 2277 tvp = &utv; 2278 } else 2279 tvp = NULL; 2280 2281 error = kern_pselect(td, args->nfds, args->readfds, args->writefds, 2282 args->exceptfds, tvp, ssp, LINUX_NFDBITS); 2283 2284 if (error == 0 && args->tsp != NULL) { 2285 if (td->td_retval[0] != 0) { 2286 /* 2287 * Compute how much time was left of the timeout, 2288 * by subtracting the current time and the time 2289 * before we started the call, and subtracting 2290 * that result from the user-supplied value. 2291 */ 2292 2293 microtime(&tv1); 2294 timevalsub(&tv1, &tv0); 2295 timevalsub(&utv, &tv1); 2296 if (utv.tv_sec < 0) 2297 timevalclear(&utv); 2298 } else 2299 timevalclear(&utv); 2300 2301 TIMEVAL_TO_TIMESPEC(&utv, &uts); 2302 2303 native_to_linux_timespec(<s, &uts); 2304 error = copyout(<s, args->tsp, sizeof(lts)); 2305 } 2306 2307 return (error); 2308} 2309 2310int 2311linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2312{ 2313 struct timespec ts0, ts1; 2314 struct l_timespec lts; 2315 struct timespec uts, *tsp; 2316 l_sigset_t l_ss; 2317 sigset_t *ssp; 2318 sigset_t ss; 2319 int error; 2320 2321 if (args->sset != NULL) { 2322 if (args->ssize != sizeof(l_ss)) 2323 return (EINVAL); 2324 error = copyin(args->sset, &l_ss, sizeof(l_ss)); 2325 if (error) 2326 return (error); 2327 linux_to_bsd_sigset(&l_ss, &ss); 2328 ssp = &ss; 2329 } else 2330 ssp = NULL; 2331 if (args->tsp != NULL) { 2332 error = copyin(args->tsp, <s, sizeof(lts)); 2333 if (error) 2334 return (error); 2335 error = linux_to_native_timespec(&uts, <s); 2336 if (error != 0) 2337 return (error); 2338 2339 nanotime(&ts0); 2340 tsp = &uts; 2341 } else 2342 tsp = NULL; 2343 2344 error = kern_poll(td, args->fds, args->nfds, tsp, ssp); 2345 2346 if (error == 0 && args->tsp != NULL) { 2347 if (td->td_retval[0]) { 2348 nanotime(&ts1); 2349 timespecsub(&ts1, &ts0); 2350 timespecsub(&uts, &ts1); 2351 if (uts.tv_sec < 0) 2352 timespecclear(&uts); 2353 } else 2354 timespecclear(&uts); 2355 2356 native_to_linux_timespec(<s, &uts); 2357 error = copyout(<s, args->tsp, sizeof(lts)); 2358 } 2359 2360 return (error); 2361} 2362 2363#if defined(DEBUG) || defined(KTR) 2364/* XXX: can be removed when every ldebug(...) and KTR stuff are removed. */ 2365 2366#ifdef COMPAT_LINUX32 2367#define L_MAXSYSCALL LINUX32_SYS_MAXSYSCALL 2368#else 2369#define L_MAXSYSCALL LINUX_SYS_MAXSYSCALL 2370#endif 2371 2372u_char linux_debug_map[howmany(L_MAXSYSCALL, sizeof(u_char))]; 2373 2374static int 2375linux_debug(int syscall, int toggle, int global) 2376{ 2377 2378 if (global) { 2379 char c = toggle ? 0 : 0xff; 2380 2381 memset(linux_debug_map, c, sizeof(linux_debug_map)); 2382 return (0); 2383 } 2384 if (syscall < 0 || syscall >= L_MAXSYSCALL) 2385 return (EINVAL); 2386 if (toggle) 2387 clrbit(linux_debug_map, syscall); 2388 else 2389 setbit(linux_debug_map, syscall); 2390 return (0); 2391} 2392#undef L_MAXSYSCALL 2393 2394/* 2395 * Usage: sysctl linux.debug=<syscall_nr>.<0/1> 2396 * 2397 * E.g.: sysctl linux.debug=21.0 2398 * 2399 * As a special case, syscall "all" will apply to all syscalls globally. 2400 */ 2401#define LINUX_MAX_DEBUGSTR 16 2402int 2403linux_sysctl_debug(SYSCTL_HANDLER_ARGS) 2404{ 2405 char value[LINUX_MAX_DEBUGSTR], *p; 2406 int error, sysc, toggle; 2407 int global = 0; 2408 2409 value[0] = '\0'; 2410 error = sysctl_handle_string(oidp, value, LINUX_MAX_DEBUGSTR, req); 2411 if (error || req->newptr == NULL) 2412 return (error); 2413 for (p = value; *p != '\0' && *p != '.'; p++); 2414 if (*p == '\0') 2415 return (EINVAL); 2416 *p++ = '\0'; 2417 sysc = strtol(value, NULL, 0); 2418 toggle = strtol(p, NULL, 0); 2419 if (strcmp(value, "all") == 0) 2420 global = 1; 2421 error = linux_debug(sysc, toggle, global); 2422 return (error); 2423} 2424 2425#endif /* DEBUG || KTR */ 2426 2427int 2428linux_sched_rr_get_interval(struct thread *td, 2429 struct linux_sched_rr_get_interval_args *uap) 2430{ 2431 struct timespec ts; 2432 struct l_timespec lts; 2433 struct thread *tdt; 2434 int error; 2435 2436 /* 2437 * According to man in case the invalid pid specified 2438 * EINVAL should be returned. 2439 */ 2440 if (uap->pid < 0) 2441 return (EINVAL); 2442 2443 tdt = linux_tdfind(td, uap->pid, -1); 2444 if (tdt == NULL) 2445 return (ESRCH); 2446 2447 error = kern_sched_rr_get_interval_td(td, tdt, &ts); 2448 PROC_UNLOCK(tdt->td_proc); 2449 if (error != 0) 2450 return (error); 2451 native_to_linux_timespec(<s, &ts); 2452 return (copyout(<s, uap->interval, sizeof(lts))); 2453} 2454 2455/* 2456 * In case when the Linux thread is the initial thread in 2457 * the thread group thread id is equal to the process id. 2458 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2459 */ 2460struct thread * 2461linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2462{ 2463 struct linux_emuldata *em; 2464 struct thread *tdt; 2465 struct proc *p; 2466 2467 tdt = NULL; 2468 if (tid == 0 || tid == td->td_tid) { 2469 tdt = td; 2470 PROC_LOCK(tdt->td_proc); 2471 } else if (tid > PID_MAX) 2472 tdt = tdfind(tid, pid); 2473 else { 2474 /* 2475 * Initial thread where the tid equal to the pid. 2476 */ 2477 p = pfind(tid); 2478 if (p != NULL) { 2479 if (SV_PROC_ABI(p) != SV_ABI_LINUX) { 2480 /* 2481 * p is not a Linuxulator process. 2482 */ 2483 PROC_UNLOCK(p); 2484 return (NULL); 2485 } 2486 FOREACH_THREAD_IN_PROC(p, tdt) { 2487 em = em_find(tdt); 2488 if (tid == em->em_tid) 2489 return (tdt); 2490 } 2491 PROC_UNLOCK(p); 2492 } 2493 return (NULL); 2494 } 2495 2496 return (tdt); 2497} 2498 2499void 2500linux_to_bsd_waitopts(int options, int *bsdopts) 2501{ 2502 2503 if (options & LINUX_WNOHANG) 2504 *bsdopts |= WNOHANG; 2505 if (options & LINUX_WUNTRACED) 2506 *bsdopts |= WUNTRACED; 2507 if (options & LINUX_WEXITED) 2508 *bsdopts |= WEXITED; 2509 if (options & LINUX_WCONTINUED) 2510 *bsdopts |= WCONTINUED; 2511 if (options & LINUX_WNOWAIT) 2512 *bsdopts |= WNOWAIT; 2513 2514 if (options & __WCLONE) 2515 *bsdopts |= WLINUXCLONE; 2516} 2517