1/*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 S��ren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD$"); 34 35#include "opt_compat.h" 36 37#include <sys/param.h> 38#include <sys/blist.h> 39#include <sys/fcntl.h> 40#if defined(__i386__) 41#include <sys/imgact_aout.h> 42#endif 43#include <sys/jail.h> 44#include <sys/kernel.h> 45#include <sys/limits.h> 46#include <sys/lock.h> 47#include <sys/malloc.h> 48#include <sys/mman.h> 49#include <sys/mount.h> 50#include <sys/msgbuf.h> 51#include <sys/mutex.h> 52#include <sys/namei.h> 53#include <sys/priv.h> 54#include <sys/proc.h> 55#include <sys/procctl.h> 56#include <sys/reboot.h> 57#include <sys/racct.h> 58#include <sys/random.h> 59#include <sys/resourcevar.h> 60#include <sys/sched.h> 61#include <sys/sdt.h> 62#include <sys/signalvar.h> 63#include <sys/stat.h> 64#include <sys/syscallsubr.h> 65#include <sys/sysctl.h> 66#include <sys/sysproto.h> 67#include <sys/systm.h> 68#include <sys/time.h> 69#include <sys/vmmeter.h> 70#include <sys/vnode.h> 71#include <sys/wait.h> 72#include <sys/cpuset.h> 73#include <sys/uio.h> 74 75#include <security/mac/mac_framework.h> 76 77#include <vm/vm.h> 78#include <vm/pmap.h> 79#include <vm/vm_kern.h> 80#include <vm/vm_map.h> 81#include <vm/vm_extern.h> 82#include <vm/swap_pager.h> 83 84#ifdef COMPAT_LINUX32 85#include <machine/../linux32/linux.h> 86#include <machine/../linux32/linux32_proto.h> 87#else 88#include <machine/../linux/linux.h> 89#include <machine/../linux/linux_proto.h> 90#endif 91 92#include <compat/linux/linux_dtrace.h> 93#include <compat/linux/linux_file.h> 94#include <compat/linux/linux_mib.h> 95#include <compat/linux/linux_signal.h> 96#include <compat/linux/linux_timer.h> 97#include <compat/linux/linux_util.h> 98#include <compat/linux/linux_sysproto.h> 99#include <compat/linux/linux_emul.h> 100#include <compat/linux/linux_misc.h> 101 102int stclohz; /* Statistics clock frequency */ 103 104static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 105 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 106 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 107 RLIMIT_MEMLOCK, RLIMIT_AS 108}; 109 110struct l_sysinfo { 111 l_long uptime; /* Seconds since boot */ 112 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 113#define LINUX_SYSINFO_LOADS_SCALE 65536 114 l_ulong totalram; /* Total usable main memory size */ 115 l_ulong freeram; /* Available memory size */ 116 l_ulong sharedram; /* Amount of shared memory */ 117 l_ulong bufferram; /* Memory used by buffers */ 118 l_ulong totalswap; /* Total swap space size */ 119 l_ulong freeswap; /* swap space still available */ 120 l_ushort procs; /* Number of current processes */ 121 l_ushort pads; 122 l_ulong totalhigh; 123 l_ulong freehigh; 124 l_uint mem_unit; 125 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 126}; 127 128struct l_pselect6arg { 129 l_uintptr_t ss; 130 l_size_t ss_len; 131}; 132 133static int linux_utimensat_nsec_valid(l_long); 134 135 136int 137linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 138{ 139 struct l_sysinfo sysinfo; 140 int i, j; 141 struct timespec ts; 142 143 bzero(&sysinfo, sizeof(sysinfo)); 144 getnanouptime(&ts); 145 if (ts.tv_nsec != 0) 146 ts.tv_sec++; 147 sysinfo.uptime = ts.tv_sec; 148 149 /* Use the information from the mib to get our load averages */ 150 for (i = 0; i < 3; i++) 151 sysinfo.loads[i] = averunnable.ldavg[i] * 152 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 153 154 sysinfo.totalram = physmem * PAGE_SIZE; 155 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 156 157 /* 158 * sharedram counts pages allocated to named, swap-backed objects such 159 * as shared memory segments and tmpfs files. There is no cheap way to 160 * compute this, so just leave the field unpopulated. Linux itself only 161 * started setting this field in the 3.x timeframe. 162 */ 163 sysinfo.sharedram = 0; 164 sysinfo.bufferram = 0; 165 166 swap_pager_status(&i, &j); 167 sysinfo.totalswap = i * PAGE_SIZE; 168 sysinfo.freeswap = (i - j) * PAGE_SIZE; 169 170 sysinfo.procs = nprocs; 171 172 /* 173 * Platforms supported by the emulation layer do not have a notion of 174 * high memory. 175 */ 176 sysinfo.totalhigh = 0; 177 sysinfo.freehigh = 0; 178 179 sysinfo.mem_unit = 1; 180 181 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 182} 183 184#ifdef LINUX_LEGACY_SYSCALLS 185int 186linux_alarm(struct thread *td, struct linux_alarm_args *args) 187{ 188 struct itimerval it, old_it; 189 u_int secs; 190 int error; 191 192 secs = args->secs; 193 /* 194 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 195 * to match kern_setitimer()'s limit to avoid error from it. 196 * 197 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 198 * platforms. 199 */ 200 if (secs > INT32_MAX / 2) 201 secs = INT32_MAX / 2; 202 203 it.it_value.tv_sec = secs; 204 it.it_value.tv_usec = 0; 205 timevalclear(&it.it_interval); 206 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 207 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 208 209 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 210 old_it.it_value.tv_usec >= 500000) 211 old_it.it_value.tv_sec++; 212 td->td_retval[0] = old_it.it_value.tv_sec; 213 return (0); 214} 215#endif 216 217int 218linux_brk(struct thread *td, struct linux_brk_args *args) 219{ 220 struct vmspace *vm = td->td_proc->p_vmspace; 221 uintptr_t new, old; 222 223 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 224 new = (uintptr_t)args->dsend; 225 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 226 td->td_retval[0] = (register_t)new; 227 else 228 td->td_retval[0] = (register_t)old; 229 230 return (0); 231} 232 233#if defined(__i386__) 234/* XXX: what about amd64/linux32? */ 235 236int 237linux_uselib(struct thread *td, struct linux_uselib_args *args) 238{ 239 struct nameidata ni; 240 struct vnode *vp; 241 struct exec *a_out; 242 vm_map_t map; 243 vm_map_entry_t entry; 244 struct vattr attr; 245 vm_offset_t vmaddr; 246 unsigned long file_offset; 247 unsigned long bss_size; 248 char *library; 249 ssize_t aresid; 250 int error; 251 bool locked, opened, textset; 252 253 LCONVPATHEXIST(td, args->library, &library); 254 255 a_out = NULL; 256 vp = NULL; 257 locked = false; 258 textset = false; 259 opened = false; 260 261 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 262 UIO_SYSSPACE, library, td); 263 error = namei(&ni); 264 LFREEPATH(library); 265 if (error) 266 goto cleanup; 267 268 vp = ni.ni_vp; 269 NDFREE(&ni, NDF_ONLY_PNBUF); 270 271 /* 272 * From here on down, we have a locked vnode that must be unlocked. 273 * XXX: The code below largely duplicates exec_check_permissions(). 274 */ 275 locked = true; 276 277 /* Executable? */ 278 error = VOP_GETATTR(vp, &attr, td->td_ucred); 279 if (error) 280 goto cleanup; 281 282 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 283 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 284 /* EACCESS is what exec(2) returns. */ 285 error = ENOEXEC; 286 goto cleanup; 287 } 288 289 /* Sensible size? */ 290 if (attr.va_size == 0) { 291 error = ENOEXEC; 292 goto cleanup; 293 } 294 295 /* Can we access it? */ 296 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 297 if (error) 298 goto cleanup; 299 300 /* 301 * XXX: This should use vn_open() so that it is properly authorized, 302 * and to reduce code redundancy all over the place here. 303 * XXX: Not really, it duplicates far more of exec_check_permissions() 304 * than vn_open(). 305 */ 306#ifdef MAC 307 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 308 if (error) 309 goto cleanup; 310#endif 311 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 312 if (error) 313 goto cleanup; 314 opened = true; 315 316 /* Pull in executable header into exec_map */ 317 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 318 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 319 if (error) 320 goto cleanup; 321 322 /* Is it a Linux binary ? */ 323 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 324 error = ENOEXEC; 325 goto cleanup; 326 } 327 328 /* 329 * While we are here, we should REALLY do some more checks 330 */ 331 332 /* Set file/virtual offset based on a.out variant. */ 333 switch ((int)(a_out->a_magic & 0xffff)) { 334 case 0413: /* ZMAGIC */ 335 file_offset = 1024; 336 break; 337 case 0314: /* QMAGIC */ 338 file_offset = 0; 339 break; 340 default: 341 error = ENOEXEC; 342 goto cleanup; 343 } 344 345 bss_size = round_page(a_out->a_bss); 346 347 /* Check various fields in header for validity/bounds. */ 348 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 349 error = ENOEXEC; 350 goto cleanup; 351 } 352 353 /* text + data can't exceed file size */ 354 if (a_out->a_data + a_out->a_text > attr.va_size) { 355 error = EFAULT; 356 goto cleanup; 357 } 358 359 /* 360 * text/data/bss must not exceed limits 361 * XXX - this is not complete. it should check current usage PLUS 362 * the resources needed by this library. 363 */ 364 PROC_LOCK(td->td_proc); 365 if (a_out->a_text > maxtsiz || 366 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || 367 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 368 bss_size) != 0) { 369 PROC_UNLOCK(td->td_proc); 370 error = ENOMEM; 371 goto cleanup; 372 } 373 PROC_UNLOCK(td->td_proc); 374 375 /* 376 * Prevent more writers. 377 */ 378 error = VOP_SET_TEXT(vp); 379 if (error != 0) 380 goto cleanup; 381 textset = true; 382 383 /* 384 * Lock no longer needed 385 */ 386 locked = false; 387 VOP_UNLOCK(vp, 0); 388 389 /* 390 * Check if file_offset page aligned. Currently we cannot handle 391 * misalinged file offsets, and so we read in the entire image 392 * (what a waste). 393 */ 394 if (file_offset & PAGE_MASK) { 395 /* Map text+data read/write/execute */ 396 397 /* a_entry is the load address and is page aligned */ 398 vmaddr = trunc_page(a_out->a_entry); 399 400 /* get anon user mapping, read+write+execute */ 401 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 402 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 403 VM_PROT_ALL, VM_PROT_ALL, 0); 404 if (error) 405 goto cleanup; 406 407 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 408 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 409 td->td_ucred, NOCRED, &aresid, td); 410 if (error != 0) 411 goto cleanup; 412 if (aresid != 0) { 413 error = ENOEXEC; 414 goto cleanup; 415 } 416 } else { 417 /* 418 * for QMAGIC, a_entry is 20 bytes beyond the load address 419 * to skip the executable header 420 */ 421 vmaddr = trunc_page(a_out->a_entry); 422 423 /* 424 * Map it all into the process's space as a single 425 * copy-on-write "data" segment. 426 */ 427 map = &td->td_proc->p_vmspace->vm_map; 428 error = vm_mmap(map, &vmaddr, 429 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 430 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 431 if (error) 432 goto cleanup; 433 vm_map_lock(map); 434 if (!vm_map_lookup_entry(map, vmaddr, &entry)) { 435 vm_map_unlock(map); 436 error = EDOOFUS; 437 goto cleanup; 438 } 439 entry->eflags |= MAP_ENTRY_VN_EXEC; 440 vm_map_unlock(map); 441 textset = false; 442 } 443 444 if (bss_size != 0) { 445 /* Calculate BSS start address */ 446 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 447 a_out->a_data; 448 449 /* allocate some 'anon' space */ 450 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 451 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 452 VM_PROT_ALL, 0); 453 if (error) 454 goto cleanup; 455 } 456 457cleanup: 458 if (opened) { 459 if (locked) 460 VOP_UNLOCK(vp, 0); 461 locked = false; 462 VOP_CLOSE(vp, FREAD, td->td_ucred, td); 463 } 464 if (textset) { 465 if (!locked) { 466 locked = true; 467 VOP_LOCK(vp, LK_SHARED | LK_RETRY); 468 } 469 VOP_UNSET_TEXT_CHECKED(vp); 470 } 471 if (locked) 472 VOP_UNLOCK(vp, 0); 473 474 /* Release the temporary mapping. */ 475 if (a_out) 476 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 477 478 return (error); 479} 480 481#endif /* __i386__ */ 482 483#ifdef LINUX_LEGACY_SYSCALLS 484int 485linux_select(struct thread *td, struct linux_select_args *args) 486{ 487 l_timeval ltv; 488 struct timeval tv0, tv1, utv, *tvp; 489 int error; 490 491 /* 492 * Store current time for computation of the amount of 493 * time left. 494 */ 495 if (args->timeout) { 496 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 497 goto select_out; 498 utv.tv_sec = ltv.tv_sec; 499 utv.tv_usec = ltv.tv_usec; 500 501 if (itimerfix(&utv)) { 502 /* 503 * The timeval was invalid. Convert it to something 504 * valid that will act as it does under Linux. 505 */ 506 utv.tv_sec += utv.tv_usec / 1000000; 507 utv.tv_usec %= 1000000; 508 if (utv.tv_usec < 0) { 509 utv.tv_sec -= 1; 510 utv.tv_usec += 1000000; 511 } 512 if (utv.tv_sec < 0) 513 timevalclear(&utv); 514 } 515 microtime(&tv0); 516 tvp = &utv; 517 } else 518 tvp = NULL; 519 520 error = kern_select(td, args->nfds, args->readfds, args->writefds, 521 args->exceptfds, tvp, LINUX_NFDBITS); 522 if (error) 523 goto select_out; 524 525 if (args->timeout) { 526 if (td->td_retval[0]) { 527 /* 528 * Compute how much time was left of the timeout, 529 * by subtracting the current time and the time 530 * before we started the call, and subtracting 531 * that result from the user-supplied value. 532 */ 533 microtime(&tv1); 534 timevalsub(&tv1, &tv0); 535 timevalsub(&utv, &tv1); 536 if (utv.tv_sec < 0) 537 timevalclear(&utv); 538 } else 539 timevalclear(&utv); 540 ltv.tv_sec = utv.tv_sec; 541 ltv.tv_usec = utv.tv_usec; 542 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 543 goto select_out; 544 } 545 546select_out: 547 return (error); 548} 549#endif 550 551int 552linux_mremap(struct thread *td, struct linux_mremap_args *args) 553{ 554 uintptr_t addr; 555 size_t len; 556 int error = 0; 557 558 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 559 td->td_retval[0] = 0; 560 return (EINVAL); 561 } 562 563 /* 564 * Check for the page alignment. 565 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 566 */ 567 if (args->addr & PAGE_MASK) { 568 td->td_retval[0] = 0; 569 return (EINVAL); 570 } 571 572 args->new_len = round_page(args->new_len); 573 args->old_len = round_page(args->old_len); 574 575 if (args->new_len > args->old_len) { 576 td->td_retval[0] = 0; 577 return (ENOMEM); 578 } 579 580 if (args->new_len < args->old_len) { 581 addr = args->addr + args->new_len; 582 len = args->old_len - args->new_len; 583 error = kern_munmap(td, addr, len); 584 } 585 586 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 587 return (error); 588} 589 590#define LINUX_MS_ASYNC 0x0001 591#define LINUX_MS_INVALIDATE 0x0002 592#define LINUX_MS_SYNC 0x0004 593 594int 595linux_msync(struct thread *td, struct linux_msync_args *args) 596{ 597 598 return (kern_msync(td, args->addr, args->len, 599 args->fl & ~LINUX_MS_SYNC)); 600} 601 602#ifdef LINUX_LEGACY_SYSCALLS 603int 604linux_time(struct thread *td, struct linux_time_args *args) 605{ 606 struct timeval tv; 607 l_time_t tm; 608 int error; 609 610 microtime(&tv); 611 tm = tv.tv_sec; 612 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 613 return (error); 614 td->td_retval[0] = tm; 615 return (0); 616} 617#endif 618 619struct l_times_argv { 620 l_clock_t tms_utime; 621 l_clock_t tms_stime; 622 l_clock_t tms_cutime; 623 l_clock_t tms_cstime; 624}; 625 626 627/* 628 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 629 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 630 * auxiliary vector entry. 631 */ 632#define CLK_TCK 100 633 634#define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 635#define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 636 637#define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 638 CONVNTCK(r) : CONVOTCK(r)) 639 640int 641linux_times(struct thread *td, struct linux_times_args *args) 642{ 643 struct timeval tv, utime, stime, cutime, cstime; 644 struct l_times_argv tms; 645 struct proc *p; 646 int error; 647 648 if (args->buf != NULL) { 649 p = td->td_proc; 650 PROC_LOCK(p); 651 PROC_STATLOCK(p); 652 calcru(p, &utime, &stime); 653 PROC_STATUNLOCK(p); 654 calccru(p, &cutime, &cstime); 655 PROC_UNLOCK(p); 656 657 tms.tms_utime = CONVTCK(utime); 658 tms.tms_stime = CONVTCK(stime); 659 660 tms.tms_cutime = CONVTCK(cutime); 661 tms.tms_cstime = CONVTCK(cstime); 662 663 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 664 return (error); 665 } 666 667 microuptime(&tv); 668 td->td_retval[0] = (int)CONVTCK(tv); 669 return (0); 670} 671 672int 673linux_newuname(struct thread *td, struct linux_newuname_args *args) 674{ 675 struct l_new_utsname utsname; 676 char osname[LINUX_MAX_UTSNAME]; 677 char osrelease[LINUX_MAX_UTSNAME]; 678 char *p; 679 680 linux_get_osname(td, osname); 681 linux_get_osrelease(td, osrelease); 682 683 bzero(&utsname, sizeof(utsname)); 684 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 685 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 686 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 687 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 688 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 689 for (p = utsname.version; *p != '\0'; ++p) 690 if (*p == '\n') { 691 *p = '\0'; 692 break; 693 } 694#if defined(__amd64__) 695 /* 696 * On amd64, Linux uname(2) needs to return "x86_64" 697 * for both 64-bit and 32-bit applications. On 32-bit, 698 * the string returned by getauxval(AT_PLATFORM) needs 699 * to remain "i686", though. 700 */ 701 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 702#else 703 strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); 704#endif 705 706 return (copyout(&utsname, args->buf, sizeof(utsname))); 707} 708 709struct l_utimbuf { 710 l_time_t l_actime; 711 l_time_t l_modtime; 712}; 713 714#ifdef LINUX_LEGACY_SYSCALLS 715int 716linux_utime(struct thread *td, struct linux_utime_args *args) 717{ 718 struct timeval tv[2], *tvp; 719 struct l_utimbuf lut; 720 char *fname; 721 int error; 722 723 LCONVPATHEXIST(td, args->fname, &fname); 724 725 if (args->times) { 726 if ((error = copyin(args->times, &lut, sizeof lut))) { 727 LFREEPATH(fname); 728 return (error); 729 } 730 tv[0].tv_sec = lut.l_actime; 731 tv[0].tv_usec = 0; 732 tv[1].tv_sec = lut.l_modtime; 733 tv[1].tv_usec = 0; 734 tvp = tv; 735 } else 736 tvp = NULL; 737 738 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 739 UIO_SYSSPACE); 740 LFREEPATH(fname); 741 return (error); 742} 743#endif 744 745#ifdef LINUX_LEGACY_SYSCALLS 746int 747linux_utimes(struct thread *td, struct linux_utimes_args *args) 748{ 749 l_timeval ltv[2]; 750 struct timeval tv[2], *tvp = NULL; 751 char *fname; 752 int error; 753 754 LCONVPATHEXIST(td, args->fname, &fname); 755 756 if (args->tptr != NULL) { 757 if ((error = copyin(args->tptr, ltv, sizeof ltv))) { 758 LFREEPATH(fname); 759 return (error); 760 } 761 tv[0].tv_sec = ltv[0].tv_sec; 762 tv[0].tv_usec = ltv[0].tv_usec; 763 tv[1].tv_sec = ltv[1].tv_sec; 764 tv[1].tv_usec = ltv[1].tv_usec; 765 tvp = tv; 766 } 767 768 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 769 tvp, UIO_SYSSPACE); 770 LFREEPATH(fname); 771 return (error); 772} 773#endif 774 775static int 776linux_utimensat_nsec_valid(l_long nsec) 777{ 778 779 if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW) 780 return (0); 781 if (nsec >= 0 && nsec <= 999999999) 782 return (0); 783 return (1); 784} 785 786int 787linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 788{ 789 struct l_timespec l_times[2]; 790 struct timespec times[2], *timesp = NULL; 791 char *path = NULL; 792 int error, dfd, flags = 0; 793 794 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 795 796 if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW) 797 return (EINVAL); 798 799 if (args->times != NULL) { 800 error = copyin(args->times, l_times, sizeof(l_times)); 801 if (error != 0) 802 return (error); 803 804 if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 || 805 linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0) 806 return (EINVAL); 807 808 times[0].tv_sec = l_times[0].tv_sec; 809 switch (l_times[0].tv_nsec) 810 { 811 case LINUX_UTIME_OMIT: 812 times[0].tv_nsec = UTIME_OMIT; 813 break; 814 case LINUX_UTIME_NOW: 815 times[0].tv_nsec = UTIME_NOW; 816 break; 817 default: 818 times[0].tv_nsec = l_times[0].tv_nsec; 819 } 820 821 times[1].tv_sec = l_times[1].tv_sec; 822 switch (l_times[1].tv_nsec) 823 { 824 case LINUX_UTIME_OMIT: 825 times[1].tv_nsec = UTIME_OMIT; 826 break; 827 case LINUX_UTIME_NOW: 828 times[1].tv_nsec = UTIME_NOW; 829 break; 830 default: 831 times[1].tv_nsec = l_times[1].tv_nsec; 832 break; 833 } 834 timesp = times; 835 836 /* This breaks POSIX, but is what the Linux kernel does 837 * _on purpose_ (documented in the man page for utimensat(2)), 838 * so we must follow that behaviour. */ 839 if (times[0].tv_nsec == UTIME_OMIT && 840 times[1].tv_nsec == UTIME_OMIT) 841 return (0); 842 } 843 844 if (args->pathname != NULL) 845 LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); 846 else if (args->flags != 0) 847 return (EINVAL); 848 849 if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) 850 flags |= AT_SYMLINK_NOFOLLOW; 851 852 if (path == NULL) 853 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 854 else { 855 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 856 UIO_SYSSPACE, flags); 857 LFREEPATH(path); 858 } 859 860 return (error); 861} 862 863#ifdef LINUX_LEGACY_SYSCALLS 864int 865linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 866{ 867 l_timeval ltv[2]; 868 struct timeval tv[2], *tvp = NULL; 869 char *fname; 870 int error, dfd; 871 872 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 873 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 874 875 if (args->utimes != NULL) { 876 if ((error = copyin(args->utimes, ltv, sizeof ltv))) { 877 LFREEPATH(fname); 878 return (error); 879 } 880 tv[0].tv_sec = ltv[0].tv_sec; 881 tv[0].tv_usec = ltv[0].tv_usec; 882 tv[1].tv_sec = ltv[1].tv_sec; 883 tv[1].tv_usec = ltv[1].tv_usec; 884 tvp = tv; 885 } 886 887 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 888 LFREEPATH(fname); 889 return (error); 890} 891#endif 892 893static int 894linux_common_wait(struct thread *td, int pid, int *statusp, 895 int options, struct __wrusage *wrup) 896{ 897 siginfo_t siginfo; 898 idtype_t idtype; 899 id_t id; 900 int error, status, tmpstat; 901 902 if (pid == WAIT_ANY) { 903 idtype = P_ALL; 904 id = 0; 905 } else if (pid < 0) { 906 idtype = P_PGID; 907 id = (id_t)-pid; 908 } else { 909 idtype = P_PID; 910 id = (id_t)pid; 911 } 912 913 /* 914 * For backward compatibility we implicitly add flags WEXITED 915 * and WTRAPPED here. 916 */ 917 options |= WEXITED | WTRAPPED; 918 error = kern_wait6(td, idtype, id, &status, options, wrup, &siginfo); 919 if (error) 920 return (error); 921 922 if (statusp) { 923 tmpstat = status & 0xffff; 924 if (WIFSIGNALED(tmpstat)) { 925 tmpstat = (tmpstat & 0xffffff80) | 926 bsd_to_linux_signal(WTERMSIG(tmpstat)); 927 } else if (WIFSTOPPED(tmpstat)) { 928 tmpstat = (tmpstat & 0xffff00ff) | 929 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 930#if defined(__amd64__) && !defined(COMPAT_LINUX32) 931 if (WSTOPSIG(status) == SIGTRAP) { 932 tmpstat = linux_ptrace_status(td, 933 siginfo.si_pid, tmpstat); 934 } 935#endif 936 } else if (WIFCONTINUED(tmpstat)) { 937 tmpstat = 0xffff; 938 } 939 error = copyout(&tmpstat, statusp, sizeof(int)); 940 } 941 942 return (error); 943} 944 945#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 946int 947linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 948{ 949 struct linux_wait4_args wait4_args; 950 951 wait4_args.pid = args->pid; 952 wait4_args.status = args->status; 953 wait4_args.options = args->options; 954 wait4_args.rusage = NULL; 955 956 return (linux_wait4(td, &wait4_args)); 957} 958#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 959 960int 961linux_wait4(struct thread *td, struct linux_wait4_args *args) 962{ 963 int error, options; 964 struct __wrusage wru, *wrup; 965 966 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 967 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 968 return (EINVAL); 969 970 options = WEXITED; 971 linux_to_bsd_waitopts(args->options, &options); 972 973 if (args->rusage != NULL) 974 wrup = &wru; 975 else 976 wrup = NULL; 977 error = linux_common_wait(td, args->pid, args->status, options, wrup); 978 if (error != 0) 979 return (error); 980 if (args->rusage != NULL) 981 error = linux_copyout_rusage(&wru.wru_self, args->rusage); 982 return (error); 983} 984 985int 986linux_waitid(struct thread *td, struct linux_waitid_args *args) 987{ 988 int status, options, sig; 989 struct __wrusage wru; 990 siginfo_t siginfo; 991 l_siginfo_t lsi; 992 idtype_t idtype; 993 struct proc *p; 994 int error; 995 996 options = 0; 997 linux_to_bsd_waitopts(args->options, &options); 998 999 if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) 1000 return (EINVAL); 1001 if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) 1002 return (EINVAL); 1003 1004 switch (args->idtype) { 1005 case LINUX_P_ALL: 1006 idtype = P_ALL; 1007 break; 1008 case LINUX_P_PID: 1009 if (args->id <= 0) 1010 return (EINVAL); 1011 idtype = P_PID; 1012 break; 1013 case LINUX_P_PGID: 1014 if (args->id <= 0) 1015 return (EINVAL); 1016 idtype = P_PGID; 1017 break; 1018 default: 1019 return (EINVAL); 1020 } 1021 1022 error = kern_wait6(td, idtype, args->id, &status, options, 1023 &wru, &siginfo); 1024 if (error != 0) 1025 return (error); 1026 if (args->rusage != NULL) { 1027 error = linux_copyout_rusage(&wru.wru_children, 1028 args->rusage); 1029 if (error != 0) 1030 return (error); 1031 } 1032 if (args->info != NULL) { 1033 p = td->td_proc; 1034 bzero(&lsi, sizeof(lsi)); 1035 if (td->td_retval[0] != 0) { 1036 sig = bsd_to_linux_signal(siginfo.si_signo); 1037 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1038 } 1039 error = copyout(&lsi, args->info, sizeof(lsi)); 1040 } 1041 td->td_retval[0] = 0; 1042 1043 return (error); 1044} 1045 1046#ifdef LINUX_LEGACY_SYSCALLS 1047int 1048linux_mknod(struct thread *td, struct linux_mknod_args *args) 1049{ 1050 char *path; 1051 int error; 1052 1053 LCONVPATHCREAT(td, args->path, &path); 1054 1055 switch (args->mode & S_IFMT) { 1056 case S_IFIFO: 1057 case S_IFSOCK: 1058 error = kern_mkfifoat(td, AT_FDCWD, path, UIO_SYSSPACE, 1059 args->mode); 1060 break; 1061 1062 case S_IFCHR: 1063 case S_IFBLK: 1064 error = kern_mknodat(td, AT_FDCWD, path, UIO_SYSSPACE, 1065 args->mode, args->dev); 1066 break; 1067 1068 case S_IFDIR: 1069 error = EPERM; 1070 break; 1071 1072 case 0: 1073 args->mode |= S_IFREG; 1074 /* FALLTHROUGH */ 1075 case S_IFREG: 1076 error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, 1077 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1078 if (error == 0) 1079 kern_close(td, td->td_retval[0]); 1080 break; 1081 1082 default: 1083 error = EINVAL; 1084 break; 1085 } 1086 LFREEPATH(path); 1087 return (error); 1088} 1089#endif 1090 1091int 1092linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1093{ 1094 char *path; 1095 int error, dfd; 1096 1097 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1098 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 1099 1100 switch (args->mode & S_IFMT) { 1101 case S_IFIFO: 1102 case S_IFSOCK: 1103 error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); 1104 break; 1105 1106 case S_IFCHR: 1107 case S_IFBLK: 1108 error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, 1109 args->dev); 1110 break; 1111 1112 case S_IFDIR: 1113 error = EPERM; 1114 break; 1115 1116 case 0: 1117 args->mode |= S_IFREG; 1118 /* FALLTHROUGH */ 1119 case S_IFREG: 1120 error = kern_openat(td, dfd, path, UIO_SYSSPACE, 1121 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1122 if (error == 0) 1123 kern_close(td, td->td_retval[0]); 1124 break; 1125 1126 default: 1127 error = EINVAL; 1128 break; 1129 } 1130 LFREEPATH(path); 1131 return (error); 1132} 1133 1134/* 1135 * UGH! This is just about the dumbest idea I've ever heard!! 1136 */ 1137int 1138linux_personality(struct thread *td, struct linux_personality_args *args) 1139{ 1140 struct linux_pemuldata *pem; 1141 struct proc *p = td->td_proc; 1142 uint32_t old; 1143 1144 PROC_LOCK(p); 1145 pem = pem_find(p); 1146 old = pem->persona; 1147 if (args->per != 0xffffffff) 1148 pem->persona = args->per; 1149 PROC_UNLOCK(p); 1150 1151 td->td_retval[0] = old; 1152 return (0); 1153} 1154 1155struct l_itimerval { 1156 l_timeval it_interval; 1157 l_timeval it_value; 1158}; 1159 1160#define B2L_ITIMERVAL(bip, lip) \ 1161 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1162 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1163 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1164 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1165 1166int 1167linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1168{ 1169 int error; 1170 struct l_itimerval ls; 1171 struct itimerval aitv, oitv; 1172 1173 if (uap->itv == NULL) { 1174 uap->itv = uap->oitv; 1175 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1176 } 1177 1178 error = copyin(uap->itv, &ls, sizeof(ls)); 1179 if (error != 0) 1180 return (error); 1181 B2L_ITIMERVAL(&aitv, &ls); 1182 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1183 if (error != 0 || uap->oitv == NULL) 1184 return (error); 1185 B2L_ITIMERVAL(&ls, &oitv); 1186 1187 return (copyout(&ls, uap->oitv, sizeof(ls))); 1188} 1189 1190int 1191linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1192{ 1193 int error; 1194 struct l_itimerval ls; 1195 struct itimerval aitv; 1196 1197 error = kern_getitimer(td, uap->which, &aitv); 1198 if (error != 0) 1199 return (error); 1200 B2L_ITIMERVAL(&ls, &aitv); 1201 return (copyout(&ls, uap->itv, sizeof(ls))); 1202} 1203 1204#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1205int 1206linux_nice(struct thread *td, struct linux_nice_args *args) 1207{ 1208 struct setpriority_args bsd_args; 1209 1210 bsd_args.which = PRIO_PROCESS; 1211 bsd_args.who = 0; /* current process */ 1212 bsd_args.prio = args->inc; 1213 return (sys_setpriority(td, &bsd_args)); 1214} 1215#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1216 1217int 1218linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1219{ 1220 struct ucred *newcred, *oldcred; 1221 l_gid_t *linux_gidset; 1222 gid_t *bsd_gidset; 1223 int ngrp, error; 1224 struct proc *p; 1225 1226 ngrp = args->gidsetsize; 1227 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1228 return (EINVAL); 1229 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1230 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1231 if (error) 1232 goto out; 1233 newcred = crget(); 1234 crextend(newcred, ngrp + 1); 1235 p = td->td_proc; 1236 PROC_LOCK(p); 1237 oldcred = p->p_ucred; 1238 crcopy(newcred, oldcred); 1239 1240 /* 1241 * cr_groups[0] holds egid. Setting the whole set from 1242 * the supplied set will cause egid to be changed too. 1243 * Keep cr_groups[0] unchanged to prevent that. 1244 */ 1245 1246 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) { 1247 PROC_UNLOCK(p); 1248 crfree(newcred); 1249 goto out; 1250 } 1251 1252 if (ngrp > 0) { 1253 newcred->cr_ngroups = ngrp + 1; 1254 1255 bsd_gidset = newcred->cr_groups; 1256 ngrp--; 1257 while (ngrp >= 0) { 1258 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1259 ngrp--; 1260 } 1261 } else 1262 newcred->cr_ngroups = 1; 1263 1264 setsugid(p); 1265 proc_set_cred(p, newcred); 1266 PROC_UNLOCK(p); 1267 crfree(oldcred); 1268 error = 0; 1269out: 1270 free(linux_gidset, M_LINUX); 1271 return (error); 1272} 1273 1274int 1275linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1276{ 1277 struct ucred *cred; 1278 l_gid_t *linux_gidset; 1279 gid_t *bsd_gidset; 1280 int bsd_gidsetsz, ngrp, error; 1281 1282 cred = td->td_ucred; 1283 bsd_gidset = cred->cr_groups; 1284 bsd_gidsetsz = cred->cr_ngroups - 1; 1285 1286 /* 1287 * cr_groups[0] holds egid. Returning the whole set 1288 * here will cause a duplicate. Exclude cr_groups[0] 1289 * to prevent that. 1290 */ 1291 1292 if ((ngrp = args->gidsetsize) == 0) { 1293 td->td_retval[0] = bsd_gidsetsz; 1294 return (0); 1295 } 1296 1297 if (ngrp < bsd_gidsetsz) 1298 return (EINVAL); 1299 1300 ngrp = 0; 1301 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1302 M_LINUX, M_WAITOK); 1303 while (ngrp < bsd_gidsetsz) { 1304 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1305 ngrp++; 1306 } 1307 1308 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1309 free(linux_gidset, M_LINUX); 1310 if (error) 1311 return (error); 1312 1313 td->td_retval[0] = ngrp; 1314 return (0); 1315} 1316 1317int 1318linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1319{ 1320 struct rlimit bsd_rlim; 1321 struct l_rlimit rlim; 1322 u_int which; 1323 int error; 1324 1325 if (args->resource >= LINUX_RLIM_NLIMITS) 1326 return (EINVAL); 1327 1328 which = linux_to_bsd_resource[args->resource]; 1329 if (which == -1) 1330 return (EINVAL); 1331 1332 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1333 if (error) 1334 return (error); 1335 1336 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1337 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1338 return (kern_setrlimit(td, which, &bsd_rlim)); 1339} 1340 1341#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1342int 1343linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1344{ 1345 struct l_rlimit rlim; 1346 struct rlimit bsd_rlim; 1347 u_int which; 1348 1349 if (args->resource >= LINUX_RLIM_NLIMITS) 1350 return (EINVAL); 1351 1352 which = linux_to_bsd_resource[args->resource]; 1353 if (which == -1) 1354 return (EINVAL); 1355 1356 lim_rlimit(td, which, &bsd_rlim); 1357 1358#ifdef COMPAT_LINUX32 1359 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1360 if (rlim.rlim_cur == UINT_MAX) 1361 rlim.rlim_cur = INT_MAX; 1362 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1363 if (rlim.rlim_max == UINT_MAX) 1364 rlim.rlim_max = INT_MAX; 1365#else 1366 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1367 if (rlim.rlim_cur == ULONG_MAX) 1368 rlim.rlim_cur = LONG_MAX; 1369 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1370 if (rlim.rlim_max == ULONG_MAX) 1371 rlim.rlim_max = LONG_MAX; 1372#endif 1373 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1374} 1375#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1376 1377int 1378linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1379{ 1380 struct l_rlimit rlim; 1381 struct rlimit bsd_rlim; 1382 u_int which; 1383 1384 if (args->resource >= LINUX_RLIM_NLIMITS) 1385 return (EINVAL); 1386 1387 which = linux_to_bsd_resource[args->resource]; 1388 if (which == -1) 1389 return (EINVAL); 1390 1391 lim_rlimit(td, which, &bsd_rlim); 1392 1393 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1394 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1395 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1396} 1397 1398int 1399linux_sched_setscheduler(struct thread *td, 1400 struct linux_sched_setscheduler_args *args) 1401{ 1402 struct sched_param sched_param; 1403 struct thread *tdt; 1404 int error, policy; 1405 1406 switch (args->policy) { 1407 case LINUX_SCHED_OTHER: 1408 policy = SCHED_OTHER; 1409 break; 1410 case LINUX_SCHED_FIFO: 1411 policy = SCHED_FIFO; 1412 break; 1413 case LINUX_SCHED_RR: 1414 policy = SCHED_RR; 1415 break; 1416 default: 1417 return (EINVAL); 1418 } 1419 1420 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1421 if (error) 1422 return (error); 1423 1424 if (linux_map_sched_prio) { 1425 switch (policy) { 1426 case SCHED_OTHER: 1427 if (sched_param.sched_priority != 0) 1428 return (EINVAL); 1429 1430 sched_param.sched_priority = 1431 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1432 break; 1433 case SCHED_FIFO: 1434 case SCHED_RR: 1435 if (sched_param.sched_priority < 1 || 1436 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1437 return (EINVAL); 1438 1439 /* 1440 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1441 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1442 */ 1443 sched_param.sched_priority = 1444 (sched_param.sched_priority - 1) * 1445 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1446 (LINUX_MAX_RT_PRIO - 1); 1447 break; 1448 } 1449 } 1450 1451 tdt = linux_tdfind(td, args->pid, -1); 1452 if (tdt == NULL) 1453 return (ESRCH); 1454 1455 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1456 PROC_UNLOCK(tdt->td_proc); 1457 return (error); 1458} 1459 1460int 1461linux_sched_getscheduler(struct thread *td, 1462 struct linux_sched_getscheduler_args *args) 1463{ 1464 struct thread *tdt; 1465 int error, policy; 1466 1467 tdt = linux_tdfind(td, args->pid, -1); 1468 if (tdt == NULL) 1469 return (ESRCH); 1470 1471 error = kern_sched_getscheduler(td, tdt, &policy); 1472 PROC_UNLOCK(tdt->td_proc); 1473 1474 switch (policy) { 1475 case SCHED_OTHER: 1476 td->td_retval[0] = LINUX_SCHED_OTHER; 1477 break; 1478 case SCHED_FIFO: 1479 td->td_retval[0] = LINUX_SCHED_FIFO; 1480 break; 1481 case SCHED_RR: 1482 td->td_retval[0] = LINUX_SCHED_RR; 1483 break; 1484 } 1485 return (error); 1486} 1487 1488int 1489linux_sched_get_priority_max(struct thread *td, 1490 struct linux_sched_get_priority_max_args *args) 1491{ 1492 struct sched_get_priority_max_args bsd; 1493 1494 if (linux_map_sched_prio) { 1495 switch (args->policy) { 1496 case LINUX_SCHED_OTHER: 1497 td->td_retval[0] = 0; 1498 return (0); 1499 case LINUX_SCHED_FIFO: 1500 case LINUX_SCHED_RR: 1501 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1502 return (0); 1503 default: 1504 return (EINVAL); 1505 } 1506 } 1507 1508 switch (args->policy) { 1509 case LINUX_SCHED_OTHER: 1510 bsd.policy = SCHED_OTHER; 1511 break; 1512 case LINUX_SCHED_FIFO: 1513 bsd.policy = SCHED_FIFO; 1514 break; 1515 case LINUX_SCHED_RR: 1516 bsd.policy = SCHED_RR; 1517 break; 1518 default: 1519 return (EINVAL); 1520 } 1521 return (sys_sched_get_priority_max(td, &bsd)); 1522} 1523 1524int 1525linux_sched_get_priority_min(struct thread *td, 1526 struct linux_sched_get_priority_min_args *args) 1527{ 1528 struct sched_get_priority_min_args bsd; 1529 1530 if (linux_map_sched_prio) { 1531 switch (args->policy) { 1532 case LINUX_SCHED_OTHER: 1533 td->td_retval[0] = 0; 1534 return (0); 1535 case LINUX_SCHED_FIFO: 1536 case LINUX_SCHED_RR: 1537 td->td_retval[0] = 1; 1538 return (0); 1539 default: 1540 return (EINVAL); 1541 } 1542 } 1543 1544 switch (args->policy) { 1545 case LINUX_SCHED_OTHER: 1546 bsd.policy = SCHED_OTHER; 1547 break; 1548 case LINUX_SCHED_FIFO: 1549 bsd.policy = SCHED_FIFO; 1550 break; 1551 case LINUX_SCHED_RR: 1552 bsd.policy = SCHED_RR; 1553 break; 1554 default: 1555 return (EINVAL); 1556 } 1557 return (sys_sched_get_priority_min(td, &bsd)); 1558} 1559 1560#define REBOOT_CAD_ON 0x89abcdef 1561#define REBOOT_CAD_OFF 0 1562#define REBOOT_HALT 0xcdef0123 1563#define REBOOT_RESTART 0x01234567 1564#define REBOOT_RESTART2 0xA1B2C3D4 1565#define REBOOT_POWEROFF 0x4321FEDC 1566#define REBOOT_MAGIC1 0xfee1dead 1567#define REBOOT_MAGIC2 0x28121969 1568#define REBOOT_MAGIC2A 0x05121996 1569#define REBOOT_MAGIC2B 0x16041998 1570 1571int 1572linux_reboot(struct thread *td, struct linux_reboot_args *args) 1573{ 1574 struct reboot_args bsd_args; 1575 1576 if (args->magic1 != REBOOT_MAGIC1) 1577 return (EINVAL); 1578 1579 switch (args->magic2) { 1580 case REBOOT_MAGIC2: 1581 case REBOOT_MAGIC2A: 1582 case REBOOT_MAGIC2B: 1583 break; 1584 default: 1585 return (EINVAL); 1586 } 1587 1588 switch (args->cmd) { 1589 case REBOOT_CAD_ON: 1590 case REBOOT_CAD_OFF: 1591 return (priv_check(td, PRIV_REBOOT)); 1592 case REBOOT_HALT: 1593 bsd_args.opt = RB_HALT; 1594 break; 1595 case REBOOT_RESTART: 1596 case REBOOT_RESTART2: 1597 bsd_args.opt = 0; 1598 break; 1599 case REBOOT_POWEROFF: 1600 bsd_args.opt = RB_POWEROFF; 1601 break; 1602 default: 1603 return (EINVAL); 1604 } 1605 return (sys_reboot(td, &bsd_args)); 1606} 1607 1608 1609int 1610linux_getpid(struct thread *td, struct linux_getpid_args *args) 1611{ 1612 1613 td->td_retval[0] = td->td_proc->p_pid; 1614 1615 return (0); 1616} 1617 1618int 1619linux_gettid(struct thread *td, struct linux_gettid_args *args) 1620{ 1621 struct linux_emuldata *em; 1622 1623 em = em_find(td); 1624 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1625 1626 td->td_retval[0] = em->em_tid; 1627 1628 return (0); 1629} 1630 1631 1632int 1633linux_getppid(struct thread *td, struct linux_getppid_args *args) 1634{ 1635 1636 td->td_retval[0] = kern_getppid(td); 1637 return (0); 1638} 1639 1640int 1641linux_getgid(struct thread *td, struct linux_getgid_args *args) 1642{ 1643 1644 td->td_retval[0] = td->td_ucred->cr_rgid; 1645 return (0); 1646} 1647 1648int 1649linux_getuid(struct thread *td, struct linux_getuid_args *args) 1650{ 1651 1652 td->td_retval[0] = td->td_ucred->cr_ruid; 1653 return (0); 1654} 1655 1656 1657int 1658linux_getsid(struct thread *td, struct linux_getsid_args *args) 1659{ 1660 struct getsid_args bsd; 1661 1662 bsd.pid = args->pid; 1663 return (sys_getsid(td, &bsd)); 1664} 1665 1666int 1667linux_nosys(struct thread *td, struct nosys_args *ignore) 1668{ 1669 1670 return (ENOSYS); 1671} 1672 1673int 1674linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1675{ 1676 struct getpriority_args bsd_args; 1677 int error; 1678 1679 bsd_args.which = args->which; 1680 bsd_args.who = args->who; 1681 error = sys_getpriority(td, &bsd_args); 1682 td->td_retval[0] = 20 - td->td_retval[0]; 1683 return (error); 1684} 1685 1686int 1687linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1688{ 1689 int name[2]; 1690 1691 name[0] = CTL_KERN; 1692 name[1] = KERN_HOSTNAME; 1693 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1694 args->len, 0, 0)); 1695} 1696 1697int 1698linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1699{ 1700 int name[2]; 1701 1702 name[0] = CTL_KERN; 1703 name[1] = KERN_NISDOMAINNAME; 1704 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1705 args->len, 0, 0)); 1706} 1707 1708int 1709linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1710{ 1711 1712 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1713 args->error_code); 1714 1715 /* 1716 * XXX: we should send a signal to the parent if 1717 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1718 * as it doesnt occur often. 1719 */ 1720 exit1(td, args->error_code, 0); 1721 /* NOTREACHED */ 1722} 1723 1724#define _LINUX_CAPABILITY_VERSION_1 0x19980330 1725#define _LINUX_CAPABILITY_VERSION_2 0x20071026 1726#define _LINUX_CAPABILITY_VERSION_3 0x20080522 1727 1728struct l_user_cap_header { 1729 l_int version; 1730 l_int pid; 1731}; 1732 1733struct l_user_cap_data { 1734 l_int effective; 1735 l_int permitted; 1736 l_int inheritable; 1737}; 1738 1739int 1740linux_capget(struct thread *td, struct linux_capget_args *uap) 1741{ 1742 struct l_user_cap_header luch; 1743 struct l_user_cap_data lucd[2]; 1744 int error, u32s; 1745 1746 if (uap->hdrp == NULL) 1747 return (EFAULT); 1748 1749 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1750 if (error != 0) 1751 return (error); 1752 1753 switch (luch.version) { 1754 case _LINUX_CAPABILITY_VERSION_1: 1755 u32s = 1; 1756 break; 1757 case _LINUX_CAPABILITY_VERSION_2: 1758 case _LINUX_CAPABILITY_VERSION_3: 1759 u32s = 2; 1760 break; 1761 default: 1762 luch.version = _LINUX_CAPABILITY_VERSION_1; 1763 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1764 if (error) 1765 return (error); 1766 return (EINVAL); 1767 } 1768 1769 if (luch.pid) 1770 return (EPERM); 1771 1772 if (uap->datap) { 1773 /* 1774 * The current implementation doesn't support setting 1775 * a capability (it's essentially a stub) so indicate 1776 * that no capabilities are currently set or available 1777 * to request. 1778 */ 1779 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1780 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1781 } 1782 1783 return (error); 1784} 1785 1786int 1787linux_capset(struct thread *td, struct linux_capset_args *uap) 1788{ 1789 struct l_user_cap_header luch; 1790 struct l_user_cap_data lucd[2]; 1791 int error, i, u32s; 1792 1793 if (uap->hdrp == NULL || uap->datap == NULL) 1794 return (EFAULT); 1795 1796 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1797 if (error != 0) 1798 return (error); 1799 1800 switch (luch.version) { 1801 case _LINUX_CAPABILITY_VERSION_1: 1802 u32s = 1; 1803 break; 1804 case _LINUX_CAPABILITY_VERSION_2: 1805 case _LINUX_CAPABILITY_VERSION_3: 1806 u32s = 2; 1807 break; 1808 default: 1809 luch.version = _LINUX_CAPABILITY_VERSION_1; 1810 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1811 if (error) 1812 return (error); 1813 return (EINVAL); 1814 } 1815 1816 if (luch.pid) 1817 return (EPERM); 1818 1819 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1820 if (error != 0) 1821 return (error); 1822 1823 /* We currently don't support setting any capabilities. */ 1824 for (i = 0; i < u32s; i++) { 1825 if (lucd[i].effective || lucd[i].permitted || 1826 lucd[i].inheritable) { 1827 linux_msg(td, 1828 "capset[%d] effective=0x%x, permitted=0x%x, " 1829 "inheritable=0x%x is not implemented", i, 1830 (int)lucd[i].effective, (int)lucd[i].permitted, 1831 (int)lucd[i].inheritable); 1832 return (EPERM); 1833 } 1834 } 1835 1836 return (0); 1837} 1838 1839int 1840linux_prctl(struct thread *td, struct linux_prctl_args *args) 1841{ 1842 int error = 0, max_size; 1843 struct proc *p = td->td_proc; 1844 char comm[LINUX_MAX_COMM_LEN]; 1845 int pdeath_signal; 1846 1847 switch (args->option) { 1848 case LINUX_PR_SET_PDEATHSIG: 1849 if (!LINUX_SIG_VALID(args->arg2)) 1850 return (EINVAL); 1851 pdeath_signal = linux_to_bsd_signal(args->arg2); 1852 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1853 &pdeath_signal)); 1854 case LINUX_PR_GET_PDEATHSIG: 1855 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1856 &pdeath_signal); 1857 if (error != 0) 1858 return (error); 1859 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1860 return (copyout(&pdeath_signal, 1861 (void *)(register_t)args->arg2, 1862 sizeof(pdeath_signal))); 1863 break; 1864 case LINUX_PR_GET_KEEPCAPS: 1865 /* 1866 * Indicate that we always clear the effective and 1867 * permitted capability sets when the user id becomes 1868 * non-zero (actually the capability sets are simply 1869 * always zero in the current implementation). 1870 */ 1871 td->td_retval[0] = 0; 1872 break; 1873 case LINUX_PR_SET_KEEPCAPS: 1874 /* 1875 * Ignore requests to keep the effective and permitted 1876 * capability sets when the user id becomes non-zero. 1877 */ 1878 break; 1879 case LINUX_PR_SET_NAME: 1880 /* 1881 * To be on the safe side we need to make sure to not 1882 * overflow the size a Linux program expects. We already 1883 * do this here in the copyin, so that we don't need to 1884 * check on copyout. 1885 */ 1886 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1887 error = copyinstr((void *)(register_t)args->arg2, comm, 1888 max_size, NULL); 1889 1890 /* Linux silently truncates the name if it is too long. */ 1891 if (error == ENAMETOOLONG) { 1892 /* 1893 * XXX: copyinstr() isn't documented to populate the 1894 * array completely, so do a copyin() to be on the 1895 * safe side. This should be changed in case 1896 * copyinstr() is changed to guarantee this. 1897 */ 1898 error = copyin((void *)(register_t)args->arg2, comm, 1899 max_size - 1); 1900 comm[max_size - 1] = '\0'; 1901 } 1902 if (error) 1903 return (error); 1904 1905 PROC_LOCK(p); 1906 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1907 PROC_UNLOCK(p); 1908 break; 1909 case LINUX_PR_GET_NAME: 1910 PROC_LOCK(p); 1911 strlcpy(comm, p->p_comm, sizeof(comm)); 1912 PROC_UNLOCK(p); 1913 error = copyout(comm, (void *)(register_t)args->arg2, 1914 strlen(comm) + 1); 1915 break; 1916 default: 1917 error = EINVAL; 1918 break; 1919 } 1920 1921 return (error); 1922} 1923 1924int 1925linux_sched_setparam(struct thread *td, 1926 struct linux_sched_setparam_args *uap) 1927{ 1928 struct sched_param sched_param; 1929 struct thread *tdt; 1930 int error, policy; 1931 1932 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 1933 if (error) 1934 return (error); 1935 1936 tdt = linux_tdfind(td, uap->pid, -1); 1937 if (tdt == NULL) 1938 return (ESRCH); 1939 1940 if (linux_map_sched_prio) { 1941 error = kern_sched_getscheduler(td, tdt, &policy); 1942 if (error) 1943 goto out; 1944 1945 switch (policy) { 1946 case SCHED_OTHER: 1947 if (sched_param.sched_priority != 0) { 1948 error = EINVAL; 1949 goto out; 1950 } 1951 sched_param.sched_priority = 1952 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1953 break; 1954 case SCHED_FIFO: 1955 case SCHED_RR: 1956 if (sched_param.sched_priority < 1 || 1957 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 1958 error = EINVAL; 1959 goto out; 1960 } 1961 /* 1962 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1963 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1964 */ 1965 sched_param.sched_priority = 1966 (sched_param.sched_priority - 1) * 1967 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1968 (LINUX_MAX_RT_PRIO - 1); 1969 break; 1970 } 1971 } 1972 1973 error = kern_sched_setparam(td, tdt, &sched_param); 1974out: PROC_UNLOCK(tdt->td_proc); 1975 return (error); 1976} 1977 1978int 1979linux_sched_getparam(struct thread *td, 1980 struct linux_sched_getparam_args *uap) 1981{ 1982 struct sched_param sched_param; 1983 struct thread *tdt; 1984 int error, policy; 1985 1986 tdt = linux_tdfind(td, uap->pid, -1); 1987 if (tdt == NULL) 1988 return (ESRCH); 1989 1990 error = kern_sched_getparam(td, tdt, &sched_param); 1991 if (error) { 1992 PROC_UNLOCK(tdt->td_proc); 1993 return (error); 1994 } 1995 1996 if (linux_map_sched_prio) { 1997 error = kern_sched_getscheduler(td, tdt, &policy); 1998 PROC_UNLOCK(tdt->td_proc); 1999 if (error) 2000 return (error); 2001 2002 switch (policy) { 2003 case SCHED_OTHER: 2004 sched_param.sched_priority = 0; 2005 break; 2006 case SCHED_FIFO: 2007 case SCHED_RR: 2008 /* 2009 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 2010 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 2011 */ 2012 sched_param.sched_priority = 2013 (sched_param.sched_priority * 2014 (LINUX_MAX_RT_PRIO - 1) + 2015 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 2016 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 2017 break; 2018 } 2019 } else 2020 PROC_UNLOCK(tdt->td_proc); 2021 2022 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 2023 return (error); 2024} 2025 2026/* 2027 * Get affinity of a process. 2028 */ 2029int 2030linux_sched_getaffinity(struct thread *td, 2031 struct linux_sched_getaffinity_args *args) 2032{ 2033 int error; 2034 struct thread *tdt; 2035 2036 if (args->len < sizeof(cpuset_t)) 2037 return (EINVAL); 2038 2039 tdt = linux_tdfind(td, args->pid, -1); 2040 if (tdt == NULL) 2041 return (ESRCH); 2042 2043 PROC_UNLOCK(tdt->td_proc); 2044 2045 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2046 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); 2047 if (error == 0) 2048 td->td_retval[0] = sizeof(cpuset_t); 2049 2050 return (error); 2051} 2052 2053/* 2054 * Set affinity of a process. 2055 */ 2056int 2057linux_sched_setaffinity(struct thread *td, 2058 struct linux_sched_setaffinity_args *args) 2059{ 2060 struct thread *tdt; 2061 2062 if (args->len < sizeof(cpuset_t)) 2063 return (EINVAL); 2064 2065 tdt = linux_tdfind(td, args->pid, -1); 2066 if (tdt == NULL) 2067 return (ESRCH); 2068 2069 PROC_UNLOCK(tdt->td_proc); 2070 2071 return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2072 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); 2073} 2074 2075struct linux_rlimit64 { 2076 uint64_t rlim_cur; 2077 uint64_t rlim_max; 2078}; 2079 2080int 2081linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2082{ 2083 struct rlimit rlim, nrlim; 2084 struct linux_rlimit64 lrlim; 2085 struct proc *p; 2086 u_int which; 2087 int flags; 2088 int error; 2089 2090 if (args->resource >= LINUX_RLIM_NLIMITS) 2091 return (EINVAL); 2092 2093 which = linux_to_bsd_resource[args->resource]; 2094 if (which == -1) 2095 return (EINVAL); 2096 2097 if (args->new != NULL) { 2098 /* 2099 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2100 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2101 * as INFINITY so we do not need a conversion even. 2102 */ 2103 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2104 if (error != 0) 2105 return (error); 2106 } 2107 2108 flags = PGET_HOLD | PGET_NOTWEXIT; 2109 if (args->new != NULL) 2110 flags |= PGET_CANDEBUG; 2111 else 2112 flags |= PGET_CANSEE; 2113 if (args->pid == 0) { 2114 p = td->td_proc; 2115 PHOLD(p); 2116 } else { 2117 error = pget(args->pid, flags, &p); 2118 if (error != 0) 2119 return (error); 2120 } 2121 if (args->old != NULL) { 2122 PROC_LOCK(p); 2123 lim_rlimit_proc(p, which, &rlim); 2124 PROC_UNLOCK(p); 2125 if (rlim.rlim_cur == RLIM_INFINITY) 2126 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2127 else 2128 lrlim.rlim_cur = rlim.rlim_cur; 2129 if (rlim.rlim_max == RLIM_INFINITY) 2130 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2131 else 2132 lrlim.rlim_max = rlim.rlim_max; 2133 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2134 if (error != 0) 2135 goto out; 2136 } 2137 2138 if (args->new != NULL) 2139 error = kern_proc_setrlimit(td, p, which, &nrlim); 2140 2141 out: 2142 PRELE(p); 2143 return (error); 2144} 2145 2146int 2147linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2148{ 2149 struct timeval utv, tv0, tv1, *tvp; 2150 struct l_pselect6arg lpse6; 2151 struct l_timespec lts; 2152 struct timespec uts; 2153 l_sigset_t l_ss; 2154 sigset_t *ssp; 2155 sigset_t ss; 2156 int error; 2157 2158 ssp = NULL; 2159 if (args->sig != NULL) { 2160 error = copyin(args->sig, &lpse6, sizeof(lpse6)); 2161 if (error != 0) 2162 return (error); 2163 if (lpse6.ss_len != sizeof(l_ss)) 2164 return (EINVAL); 2165 if (lpse6.ss != 0) { 2166 error = copyin(PTRIN(lpse6.ss), &l_ss, 2167 sizeof(l_ss)); 2168 if (error != 0) 2169 return (error); 2170 linux_to_bsd_sigset(&l_ss, &ss); 2171 ssp = &ss; 2172 } 2173 } 2174 2175 /* 2176 * Currently glibc changes nanosecond number to microsecond. 2177 * This mean losing precision but for now it is hardly seen. 2178 */ 2179 if (args->tsp != NULL) { 2180 error = copyin(args->tsp, <s, sizeof(lts)); 2181 if (error != 0) 2182 return (error); 2183 error = linux_to_native_timespec(&uts, <s); 2184 if (error != 0) 2185 return (error); 2186 2187 TIMESPEC_TO_TIMEVAL(&utv, &uts); 2188 if (itimerfix(&utv)) 2189 return (EINVAL); 2190 2191 microtime(&tv0); 2192 tvp = &utv; 2193 } else 2194 tvp = NULL; 2195 2196 error = kern_pselect(td, args->nfds, args->readfds, args->writefds, 2197 args->exceptfds, tvp, ssp, LINUX_NFDBITS); 2198 2199 if (error == 0 && args->tsp != NULL) { 2200 if (td->td_retval[0] != 0) { 2201 /* 2202 * Compute how much time was left of the timeout, 2203 * by subtracting the current time and the time 2204 * before we started the call, and subtracting 2205 * that result from the user-supplied value. 2206 */ 2207 2208 microtime(&tv1); 2209 timevalsub(&tv1, &tv0); 2210 timevalsub(&utv, &tv1); 2211 if (utv.tv_sec < 0) 2212 timevalclear(&utv); 2213 } else 2214 timevalclear(&utv); 2215 2216 TIMEVAL_TO_TIMESPEC(&utv, &uts); 2217 2218 error = native_to_linux_timespec(<s, &uts); 2219 if (error == 0) 2220 error = copyout(<s, args->tsp, sizeof(lts)); 2221 } 2222 2223 return (error); 2224} 2225 2226int 2227linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2228{ 2229 struct timespec ts0, ts1; 2230 struct l_timespec lts; 2231 struct timespec uts, *tsp; 2232 l_sigset_t l_ss; 2233 sigset_t *ssp; 2234 sigset_t ss; 2235 int error; 2236 2237 if (args->sset != NULL) { 2238 if (args->ssize != sizeof(l_ss)) 2239 return (EINVAL); 2240 error = copyin(args->sset, &l_ss, sizeof(l_ss)); 2241 if (error) 2242 return (error); 2243 linux_to_bsd_sigset(&l_ss, &ss); 2244 ssp = &ss; 2245 } else 2246 ssp = NULL; 2247 if (args->tsp != NULL) { 2248 error = copyin(args->tsp, <s, sizeof(lts)); 2249 if (error) 2250 return (error); 2251 error = linux_to_native_timespec(&uts, <s); 2252 if (error != 0) 2253 return (error); 2254 2255 nanotime(&ts0); 2256 tsp = &uts; 2257 } else 2258 tsp = NULL; 2259 2260 error = kern_poll(td, args->fds, args->nfds, tsp, ssp); 2261 2262 if (error == 0 && args->tsp != NULL) { 2263 if (td->td_retval[0]) { 2264 nanotime(&ts1); 2265 timespecsub(&ts1, &ts0, &ts1); 2266 timespecsub(&uts, &ts1, &uts); 2267 if (uts.tv_sec < 0) 2268 timespecclear(&uts); 2269 } else 2270 timespecclear(&uts); 2271 2272 error = native_to_linux_timespec(<s, &uts); 2273 if (error == 0) 2274 error = copyout(<s, args->tsp, sizeof(lts)); 2275 } 2276 2277 return (error); 2278} 2279 2280int 2281linux_sched_rr_get_interval(struct thread *td, 2282 struct linux_sched_rr_get_interval_args *uap) 2283{ 2284 struct timespec ts; 2285 struct l_timespec lts; 2286 struct thread *tdt; 2287 int error; 2288 2289 /* 2290 * According to man in case the invalid pid specified 2291 * EINVAL should be returned. 2292 */ 2293 if (uap->pid < 0) 2294 return (EINVAL); 2295 2296 tdt = linux_tdfind(td, uap->pid, -1); 2297 if (tdt == NULL) 2298 return (ESRCH); 2299 2300 error = kern_sched_rr_get_interval_td(td, tdt, &ts); 2301 PROC_UNLOCK(tdt->td_proc); 2302 if (error != 0) 2303 return (error); 2304 error = native_to_linux_timespec(<s, &ts); 2305 if (error != 0) 2306 return (error); 2307 return (copyout(<s, uap->interval, sizeof(lts))); 2308} 2309 2310/* 2311 * In case when the Linux thread is the initial thread in 2312 * the thread group thread id is equal to the process id. 2313 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2314 */ 2315struct thread * 2316linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2317{ 2318 struct linux_emuldata *em; 2319 struct thread *tdt; 2320 struct proc *p; 2321 2322 tdt = NULL; 2323 if (tid == 0 || tid == td->td_tid) { 2324 tdt = td; 2325 PROC_LOCK(tdt->td_proc); 2326 } else if (tid > PID_MAX) 2327 tdt = tdfind(tid, pid); 2328 else { 2329 /* 2330 * Initial thread where the tid equal to the pid. 2331 */ 2332 p = pfind(tid); 2333 if (p != NULL) { 2334 if (SV_PROC_ABI(p) != SV_ABI_LINUX) { 2335 /* 2336 * p is not a Linuxulator process. 2337 */ 2338 PROC_UNLOCK(p); 2339 return (NULL); 2340 } 2341 FOREACH_THREAD_IN_PROC(p, tdt) { 2342 em = em_find(tdt); 2343 if (tid == em->em_tid) 2344 return (tdt); 2345 } 2346 PROC_UNLOCK(p); 2347 } 2348 return (NULL); 2349 } 2350 2351 return (tdt); 2352} 2353 2354void 2355linux_to_bsd_waitopts(int options, int *bsdopts) 2356{ 2357 2358 if (options & LINUX_WNOHANG) 2359 *bsdopts |= WNOHANG; 2360 if (options & LINUX_WUNTRACED) 2361 *bsdopts |= WUNTRACED; 2362 if (options & LINUX_WEXITED) 2363 *bsdopts |= WEXITED; 2364 if (options & LINUX_WCONTINUED) 2365 *bsdopts |= WCONTINUED; 2366 if (options & LINUX_WNOWAIT) 2367 *bsdopts |= WNOWAIT; 2368 2369 if (options & __WCLONE) 2370 *bsdopts |= WLINUXCLONE; 2371} 2372 2373int 2374linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2375{ 2376 struct uio uio; 2377 struct iovec iov; 2378 int error; 2379 2380 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2381 return (EINVAL); 2382 if (args->count > INT_MAX) 2383 args->count = INT_MAX; 2384 2385 iov.iov_base = args->buf; 2386 iov.iov_len = args->count; 2387 2388 uio.uio_iov = &iov; 2389 uio.uio_iovcnt = 1; 2390 uio.uio_resid = iov.iov_len; 2391 uio.uio_segflg = UIO_USERSPACE; 2392 uio.uio_rw = UIO_READ; 2393 uio.uio_td = td; 2394 2395 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2396 if (error == 0) 2397 td->td_retval[0] = args->count - uio.uio_resid; 2398 return (error); 2399} 2400 2401int 2402linux_mincore(struct thread *td, struct linux_mincore_args *args) 2403{ 2404 2405 /* Needs to be page-aligned */ 2406 if (args->start & PAGE_MASK) 2407 return (EINVAL); 2408 return (kern_mincore(td, args->start, args->len, args->vec)); 2409} 2410 2411#define SYSLOG_TAG "<6>" 2412 2413int 2414linux_syslog(struct thread *td, struct linux_syslog_args *args) 2415{ 2416 char buf[128], *src, *dst; 2417 u_int seq; 2418 int buflen, error; 2419 2420 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2421 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2422 return (EINVAL); 2423 } 2424 2425 if (args->len < 6) { 2426 td->td_retval[0] = 0; 2427 return (0); 2428 } 2429 2430 error = priv_check(td, PRIV_MSGBUF); 2431 if (error) 2432 return (error); 2433 2434 mtx_lock(&msgbuf_lock); 2435 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2436 mtx_unlock(&msgbuf_lock); 2437 2438 dst = args->buf; 2439 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2440 /* The -1 is to skip the trailing '\0'. */ 2441 dst += sizeof(SYSLOG_TAG) - 1; 2442 2443 while (error == 0) { 2444 mtx_lock(&msgbuf_lock); 2445 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2446 mtx_unlock(&msgbuf_lock); 2447 2448 if (buflen == 0) 2449 break; 2450 2451 for (src = buf; src < buf + buflen && error == 0; src++) { 2452 if (*src == '\0') 2453 continue; 2454 2455 if (dst >= args->buf + args->len) 2456 goto out; 2457 2458 error = copyout(src, dst, 1); 2459 dst++; 2460 2461 if (*src == '\n' && *(src + 1) != '<' && 2462 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2463 error = copyout(&SYSLOG_TAG, 2464 dst, sizeof(SYSLOG_TAG)); 2465 dst += sizeof(SYSLOG_TAG) - 1; 2466 } 2467 } 2468 } 2469out: 2470 td->td_retval[0] = dst - args->buf; 2471 return (error); 2472} 2473 2474int 2475linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2476{ 2477 int cpu, error, node; 2478 2479 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2480 error = 0; 2481 node = cpuid_to_pcpu[cpu]->pc_domain; 2482 2483 if (args->cpu != NULL) 2484 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2485 if (args->node != NULL) 2486 error = copyout(&node, args->node, sizeof(l_int)); 2487 return (error); 2488} 2489