linux_misc.c revision 293499
1/*- 2 * Copyright (c) 2002 Doug Rabson 3 * Copyright (c) 1994-1995 S��ren Schmidt 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer 11 * in this position and unchanged. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: stable/10/sys/compat/linux/linux_misc.c 293499 2016-01-09 15:22:50Z dchagin $"); 32 33#include "opt_compat.h" 34#include "opt_kdtrace.h" 35 36#include <sys/param.h> 37#include <sys/blist.h> 38#include <sys/fcntl.h> 39#if defined(__i386__) 40#include <sys/imgact_aout.h> 41#endif 42#include <sys/jail.h> 43#include <sys/kernel.h> 44#include <sys/limits.h> 45#include <sys/lock.h> 46#include <sys/malloc.h> 47#include <sys/mman.h> 48#include <sys/mount.h> 49#include <sys/mutex.h> 50#include <sys/namei.h> 51#include <sys/priv.h> 52#include <sys/proc.h> 53#include <sys/reboot.h> 54#include <sys/racct.h> 55#include <sys/resourcevar.h> 56#include <sys/sched.h> 57#include <sys/signalvar.h> 58#include <sys/stat.h> 59#include <sys/syscallsubr.h> 60#include <sys/sysctl.h> 61#include <sys/sysproto.h> 62#include <sys/systm.h> 63#include <sys/time.h> 64#include <sys/vmmeter.h> 65#include <sys/vnode.h> 66#include <sys/wait.h> 67#include <sys/cpuset.h> 68 69#include <security/mac/mac_framework.h> 70 71#include <vm/vm.h> 72#include <vm/pmap.h> 73#include <vm/vm_kern.h> 74#include <vm/vm_map.h> 75#include <vm/vm_extern.h> 76#include <vm/vm_object.h> 77#include <vm/swap_pager.h> 78 79#ifdef COMPAT_LINUX32 80#include <machine/../linux32/linux.h> 81#include <machine/../linux32/linux32_proto.h> 82#else 83#include <machine/../linux/linux.h> 84#include <machine/../linux/linux_proto.h> 85#endif 86 87#include <compat/linux/linux_file.h> 88#include <compat/linux/linux_mib.h> 89#include <compat/linux/linux_signal.h> 90#include <compat/linux/linux_util.h> 91#include <compat/linux/linux_sysproto.h> 92#include <compat/linux/linux_emul.h> 93#include <compat/linux/linux_misc.h> 94 95int stclohz; /* Statistics clock frequency */ 96 97static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 98 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 99 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 100 RLIMIT_MEMLOCK, RLIMIT_AS 101}; 102 103struct l_sysinfo { 104 l_long uptime; /* Seconds since boot */ 105 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 106#define LINUX_SYSINFO_LOADS_SCALE 65536 107 l_ulong totalram; /* Total usable main memory size */ 108 l_ulong freeram; /* Available memory size */ 109 l_ulong sharedram; /* Amount of shared memory */ 110 l_ulong bufferram; /* Memory used by buffers */ 111 l_ulong totalswap; /* Total swap space size */ 112 l_ulong freeswap; /* swap space still available */ 113 l_ushort procs; /* Number of current processes */ 114 l_ushort pads; 115 l_ulong totalbig; 116 l_ulong freebig; 117 l_uint mem_unit; 118 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 119}; 120int 121linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 122{ 123 struct l_sysinfo sysinfo; 124 vm_object_t object; 125 int i, j; 126 struct timespec ts; 127 128 getnanouptime(&ts); 129 if (ts.tv_nsec != 0) 130 ts.tv_sec++; 131 sysinfo.uptime = ts.tv_sec; 132 133 /* Use the information from the mib to get our load averages */ 134 for (i = 0; i < 3; i++) 135 sysinfo.loads[i] = averunnable.ldavg[i] * 136 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 137 138 sysinfo.totalram = physmem * PAGE_SIZE; 139 sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE; 140 141 sysinfo.sharedram = 0; 142 mtx_lock(&vm_object_list_mtx); 143 TAILQ_FOREACH(object, &vm_object_list, object_list) 144 if (object->shadow_count > 1) 145 sysinfo.sharedram += object->resident_page_count; 146 mtx_unlock(&vm_object_list_mtx); 147 148 sysinfo.sharedram *= PAGE_SIZE; 149 sysinfo.bufferram = 0; 150 151 swap_pager_status(&i, &j); 152 sysinfo.totalswap = i * PAGE_SIZE; 153 sysinfo.freeswap = (i - j) * PAGE_SIZE; 154 155 sysinfo.procs = nprocs; 156 157 /* The following are only present in newer Linux kernels. */ 158 sysinfo.totalbig = 0; 159 sysinfo.freebig = 0; 160 sysinfo.mem_unit = 1; 161 162 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 163} 164 165int 166linux_alarm(struct thread *td, struct linux_alarm_args *args) 167{ 168 struct itimerval it, old_it; 169 u_int secs; 170 int error; 171 172#ifdef DEBUG 173 if (ldebug(alarm)) 174 printf(ARGS(alarm, "%u"), args->secs); 175#endif 176 177 secs = args->secs; 178 179 if (secs > INT_MAX) 180 secs = INT_MAX; 181 182 it.it_value.tv_sec = (long) secs; 183 it.it_value.tv_usec = 0; 184 it.it_interval.tv_sec = 0; 185 it.it_interval.tv_usec = 0; 186 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 187 if (error) 188 return (error); 189 if (timevalisset(&old_it.it_value)) { 190 if (old_it.it_value.tv_usec != 0) 191 old_it.it_value.tv_sec++; 192 td->td_retval[0] = old_it.it_value.tv_sec; 193 } 194 return (0); 195} 196 197int 198linux_brk(struct thread *td, struct linux_brk_args *args) 199{ 200 struct vmspace *vm = td->td_proc->p_vmspace; 201 vm_offset_t new, old; 202 struct obreak_args /* { 203 char * nsize; 204 } */ tmp; 205 206#ifdef DEBUG 207 if (ldebug(brk)) 208 printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend); 209#endif 210 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 211 new = (vm_offset_t)args->dsend; 212 tmp.nsize = (char *)new; 213 if (((caddr_t)new > vm->vm_daddr) && !sys_obreak(td, &tmp)) 214 td->td_retval[0] = (long)new; 215 else 216 td->td_retval[0] = (long)old; 217 218 return (0); 219} 220 221#if defined(__i386__) 222/* XXX: what about amd64/linux32? */ 223 224int 225linux_uselib(struct thread *td, struct linux_uselib_args *args) 226{ 227 struct nameidata ni; 228 struct vnode *vp; 229 struct exec *a_out; 230 struct vattr attr; 231 vm_offset_t vmaddr; 232 unsigned long file_offset; 233 unsigned long bss_size; 234 char *library; 235 ssize_t aresid; 236 int error, locked, writecount; 237 238 LCONVPATHEXIST(td, args->library, &library); 239 240#ifdef DEBUG 241 if (ldebug(uselib)) 242 printf(ARGS(uselib, "%s"), library); 243#endif 244 245 a_out = NULL; 246 locked = 0; 247 vp = NULL; 248 249 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 250 UIO_SYSSPACE, library, td); 251 error = namei(&ni); 252 LFREEPATH(library); 253 if (error) 254 goto cleanup; 255 256 vp = ni.ni_vp; 257 NDFREE(&ni, NDF_ONLY_PNBUF); 258 259 /* 260 * From here on down, we have a locked vnode that must be unlocked. 261 * XXX: The code below largely duplicates exec_check_permissions(). 262 */ 263 locked = 1; 264 265 /* Writable? */ 266 error = VOP_GET_WRITECOUNT(vp, &writecount); 267 if (error != 0) 268 goto cleanup; 269 if (writecount != 0) { 270 error = ETXTBSY; 271 goto cleanup; 272 } 273 274 /* Executable? */ 275 error = VOP_GETATTR(vp, &attr, td->td_ucred); 276 if (error) 277 goto cleanup; 278 279 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 280 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 281 /* EACCESS is what exec(2) returns. */ 282 error = ENOEXEC; 283 goto cleanup; 284 } 285 286 /* Sensible size? */ 287 if (attr.va_size == 0) { 288 error = ENOEXEC; 289 goto cleanup; 290 } 291 292 /* Can we access it? */ 293 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 294 if (error) 295 goto cleanup; 296 297 /* 298 * XXX: This should use vn_open() so that it is properly authorized, 299 * and to reduce code redundancy all over the place here. 300 * XXX: Not really, it duplicates far more of exec_check_permissions() 301 * than vn_open(). 302 */ 303#ifdef MAC 304 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 305 if (error) 306 goto cleanup; 307#endif 308 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 309 if (error) 310 goto cleanup; 311 312 /* Pull in executable header into exec_map */ 313 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 314 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 315 if (error) 316 goto cleanup; 317 318 /* Is it a Linux binary ? */ 319 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 320 error = ENOEXEC; 321 goto cleanup; 322 } 323 324 /* 325 * While we are here, we should REALLY do some more checks 326 */ 327 328 /* Set file/virtual offset based on a.out variant. */ 329 switch ((int)(a_out->a_magic & 0xffff)) { 330 case 0413: /* ZMAGIC */ 331 file_offset = 1024; 332 break; 333 case 0314: /* QMAGIC */ 334 file_offset = 0; 335 break; 336 default: 337 error = ENOEXEC; 338 goto cleanup; 339 } 340 341 bss_size = round_page(a_out->a_bss); 342 343 /* Check various fields in header for validity/bounds. */ 344 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 345 error = ENOEXEC; 346 goto cleanup; 347 } 348 349 /* text + data can't exceed file size */ 350 if (a_out->a_data + a_out->a_text > attr.va_size) { 351 error = EFAULT; 352 goto cleanup; 353 } 354 355 /* 356 * text/data/bss must not exceed limits 357 * XXX - this is not complete. it should check current usage PLUS 358 * the resources needed by this library. 359 */ 360 PROC_LOCK(td->td_proc); 361 if (a_out->a_text > maxtsiz || 362 a_out->a_data + bss_size > lim_cur(td->td_proc, RLIMIT_DATA) || 363 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 364 bss_size) != 0) { 365 PROC_UNLOCK(td->td_proc); 366 error = ENOMEM; 367 goto cleanup; 368 } 369 PROC_UNLOCK(td->td_proc); 370 371 /* 372 * Prevent more writers. 373 * XXX: Note that if any of the VM operations fail below we don't 374 * clear this flag. 375 */ 376 VOP_SET_TEXT(vp); 377 378 /* 379 * Lock no longer needed 380 */ 381 locked = 0; 382 VOP_UNLOCK(vp, 0); 383 384 /* 385 * Check if file_offset page aligned. Currently we cannot handle 386 * misalinged file offsets, and so we read in the entire image 387 * (what a waste). 388 */ 389 if (file_offset & PAGE_MASK) { 390#ifdef DEBUG 391 printf("uselib: Non page aligned binary %lu\n", file_offset); 392#endif 393 /* Map text+data read/write/execute */ 394 395 /* a_entry is the load address and is page aligned */ 396 vmaddr = trunc_page(a_out->a_entry); 397 398 /* get anon user mapping, read+write+execute */ 399 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 400 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 401 VM_PROT_ALL, VM_PROT_ALL, 0); 402 if (error) 403 goto cleanup; 404 405 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 406 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 407 td->td_ucred, NOCRED, &aresid, td); 408 if (error != 0) 409 goto cleanup; 410 if (aresid != 0) { 411 error = ENOEXEC; 412 goto cleanup; 413 } 414 } else { 415#ifdef DEBUG 416 printf("uselib: Page aligned binary %lu\n", file_offset); 417#endif 418 /* 419 * for QMAGIC, a_entry is 20 bytes beyond the load address 420 * to skip the executable header 421 */ 422 vmaddr = trunc_page(a_out->a_entry); 423 424 /* 425 * Map it all into the process's space as a single 426 * copy-on-write "data" segment. 427 */ 428 error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr, 429 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 430 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 431 if (error) 432 goto cleanup; 433 } 434#ifdef DEBUG 435 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0], 436 ((long *)vmaddr)[1]); 437#endif 438 if (bss_size != 0) { 439 /* Calculate BSS start address */ 440 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 441 a_out->a_data; 442 443 /* allocate some 'anon' space */ 444 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 445 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 446 VM_PROT_ALL, 0); 447 if (error) 448 goto cleanup; 449 } 450 451cleanup: 452 /* Unlock vnode if needed */ 453 if (locked) 454 VOP_UNLOCK(vp, 0); 455 456 /* Release the temporary mapping. */ 457 if (a_out) 458 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 459 460 return (error); 461} 462 463#endif /* __i386__ */ 464 465int 466linux_select(struct thread *td, struct linux_select_args *args) 467{ 468 l_timeval ltv; 469 struct timeval tv0, tv1, utv, *tvp; 470 int error; 471 472#ifdef DEBUG 473 if (ldebug(select)) 474 printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, 475 (void *)args->readfds, (void *)args->writefds, 476 (void *)args->exceptfds, (void *)args->timeout); 477#endif 478 479 /* 480 * Store current time for computation of the amount of 481 * time left. 482 */ 483 if (args->timeout) { 484 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 485 goto select_out; 486 utv.tv_sec = ltv.tv_sec; 487 utv.tv_usec = ltv.tv_usec; 488#ifdef DEBUG 489 if (ldebug(select)) 490 printf(LMSG("incoming timeout (%jd/%ld)"), 491 (intmax_t)utv.tv_sec, utv.tv_usec); 492#endif 493 494 if (itimerfix(&utv)) { 495 /* 496 * The timeval was invalid. Convert it to something 497 * valid that will act as it does under Linux. 498 */ 499 utv.tv_sec += utv.tv_usec / 1000000; 500 utv.tv_usec %= 1000000; 501 if (utv.tv_usec < 0) { 502 utv.tv_sec -= 1; 503 utv.tv_usec += 1000000; 504 } 505 if (utv.tv_sec < 0) 506 timevalclear(&utv); 507 } 508 microtime(&tv0); 509 tvp = &utv; 510 } else 511 tvp = NULL; 512 513 error = kern_select(td, args->nfds, args->readfds, args->writefds, 514 args->exceptfds, tvp, sizeof(l_int) * 8); 515 516#ifdef DEBUG 517 if (ldebug(select)) 518 printf(LMSG("real select returns %d"), error); 519#endif 520 if (error) 521 goto select_out; 522 523 if (args->timeout) { 524 if (td->td_retval[0]) { 525 /* 526 * Compute how much time was left of the timeout, 527 * by subtracting the current time and the time 528 * before we started the call, and subtracting 529 * that result from the user-supplied value. 530 */ 531 microtime(&tv1); 532 timevalsub(&tv1, &tv0); 533 timevalsub(&utv, &tv1); 534 if (utv.tv_sec < 0) 535 timevalclear(&utv); 536 } else 537 timevalclear(&utv); 538#ifdef DEBUG 539 if (ldebug(select)) 540 printf(LMSG("outgoing timeout (%jd/%ld)"), 541 (intmax_t)utv.tv_sec, utv.tv_usec); 542#endif 543 ltv.tv_sec = utv.tv_sec; 544 ltv.tv_usec = utv.tv_usec; 545 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 546 goto select_out; 547 } 548 549select_out: 550#ifdef DEBUG 551 if (ldebug(select)) 552 printf(LMSG("select_out -> %d"), error); 553#endif 554 return (error); 555} 556 557int 558linux_mremap(struct thread *td, struct linux_mremap_args *args) 559{ 560 struct munmap_args /* { 561 void *addr; 562 size_t len; 563 } */ bsd_args; 564 int error = 0; 565 566#ifdef DEBUG 567 if (ldebug(mremap)) 568 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), 569 (void *)(uintptr_t)args->addr, 570 (unsigned long)args->old_len, 571 (unsigned long)args->new_len, 572 (unsigned long)args->flags); 573#endif 574 575 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 576 td->td_retval[0] = 0; 577 return (EINVAL); 578 } 579 580 /* 581 * Check for the page alignment. 582 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 583 */ 584 if (args->addr & PAGE_MASK) { 585 td->td_retval[0] = 0; 586 return (EINVAL); 587 } 588 589 args->new_len = round_page(args->new_len); 590 args->old_len = round_page(args->old_len); 591 592 if (args->new_len > args->old_len) { 593 td->td_retval[0] = 0; 594 return (ENOMEM); 595 } 596 597 if (args->new_len < args->old_len) { 598 bsd_args.addr = 599 (caddr_t)((uintptr_t)args->addr + args->new_len); 600 bsd_args.len = args->old_len - args->new_len; 601 error = sys_munmap(td, &bsd_args); 602 } 603 604 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 605 return (error); 606} 607 608#define LINUX_MS_ASYNC 0x0001 609#define LINUX_MS_INVALIDATE 0x0002 610#define LINUX_MS_SYNC 0x0004 611 612int 613linux_msync(struct thread *td, struct linux_msync_args *args) 614{ 615 struct msync_args bsd_args; 616 617 bsd_args.addr = (caddr_t)(uintptr_t)args->addr; 618 bsd_args.len = (uintptr_t)args->len; 619 bsd_args.flags = args->fl & ~LINUX_MS_SYNC; 620 621 return (sys_msync(td, &bsd_args)); 622} 623 624int 625linux_time(struct thread *td, struct linux_time_args *args) 626{ 627 struct timeval tv; 628 l_time_t tm; 629 int error; 630 631#ifdef DEBUG 632 if (ldebug(time)) 633 printf(ARGS(time, "*")); 634#endif 635 636 microtime(&tv); 637 tm = tv.tv_sec; 638 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 639 return (error); 640 td->td_retval[0] = tm; 641 return (0); 642} 643 644struct l_times_argv { 645 l_clock_t tms_utime; 646 l_clock_t tms_stime; 647 l_clock_t tms_cutime; 648 l_clock_t tms_cstime; 649}; 650 651 652/* 653 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 654 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 655 * auxiliary vector entry. 656 */ 657#define CLK_TCK 100 658 659#define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 660#define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 661 662#define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 663 CONVNTCK(r) : CONVOTCK(r)) 664 665int 666linux_times(struct thread *td, struct linux_times_args *args) 667{ 668 struct timeval tv, utime, stime, cutime, cstime; 669 struct l_times_argv tms; 670 struct proc *p; 671 int error; 672 673#ifdef DEBUG 674 if (ldebug(times)) 675 printf(ARGS(times, "*")); 676#endif 677 678 if (args->buf != NULL) { 679 p = td->td_proc; 680 PROC_LOCK(p); 681 PROC_STATLOCK(p); 682 calcru(p, &utime, &stime); 683 PROC_STATUNLOCK(p); 684 calccru(p, &cutime, &cstime); 685 PROC_UNLOCK(p); 686 687 tms.tms_utime = CONVTCK(utime); 688 tms.tms_stime = CONVTCK(stime); 689 690 tms.tms_cutime = CONVTCK(cutime); 691 tms.tms_cstime = CONVTCK(cstime); 692 693 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 694 return (error); 695 } 696 697 microuptime(&tv); 698 td->td_retval[0] = (int)CONVTCK(tv); 699 return (0); 700} 701 702int 703linux_newuname(struct thread *td, struct linux_newuname_args *args) 704{ 705 struct l_new_utsname utsname; 706 char osname[LINUX_MAX_UTSNAME]; 707 char osrelease[LINUX_MAX_UTSNAME]; 708 char *p; 709 710#ifdef DEBUG 711 if (ldebug(newuname)) 712 printf(ARGS(newuname, "*")); 713#endif 714 715 linux_get_osname(td, osname); 716 linux_get_osrelease(td, osrelease); 717 718 bzero(&utsname, sizeof(utsname)); 719 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 720 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 721 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 722 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 723 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 724 for (p = utsname.version; *p != '\0'; ++p) 725 if (*p == '\n') { 726 *p = '\0'; 727 break; 728 } 729 strlcpy(utsname.machine, linux_platform, LINUX_MAX_UTSNAME); 730 731 return (copyout(&utsname, args->buf, sizeof(utsname))); 732} 733 734#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 735struct l_utimbuf { 736 l_time_t l_actime; 737 l_time_t l_modtime; 738}; 739 740int 741linux_utime(struct thread *td, struct linux_utime_args *args) 742{ 743 struct timeval tv[2], *tvp; 744 struct l_utimbuf lut; 745 char *fname; 746 int error; 747 748 LCONVPATHEXIST(td, args->fname, &fname); 749 750#ifdef DEBUG 751 if (ldebug(utime)) 752 printf(ARGS(utime, "%s, *"), fname); 753#endif 754 755 if (args->times) { 756 if ((error = copyin(args->times, &lut, sizeof lut))) { 757 LFREEPATH(fname); 758 return (error); 759 } 760 tv[0].tv_sec = lut.l_actime; 761 tv[0].tv_usec = 0; 762 tv[1].tv_sec = lut.l_modtime; 763 tv[1].tv_usec = 0; 764 tvp = tv; 765 } else 766 tvp = NULL; 767 768 error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 769 LFREEPATH(fname); 770 return (error); 771} 772 773int 774linux_utimes(struct thread *td, struct linux_utimes_args *args) 775{ 776 l_timeval ltv[2]; 777 struct timeval tv[2], *tvp = NULL; 778 char *fname; 779 int error; 780 781 LCONVPATHEXIST(td, args->fname, &fname); 782 783#ifdef DEBUG 784 if (ldebug(utimes)) 785 printf(ARGS(utimes, "%s, *"), fname); 786#endif 787 788 if (args->tptr != NULL) { 789 if ((error = copyin(args->tptr, ltv, sizeof ltv))) { 790 LFREEPATH(fname); 791 return (error); 792 } 793 tv[0].tv_sec = ltv[0].tv_sec; 794 tv[0].tv_usec = ltv[0].tv_usec; 795 tv[1].tv_sec = ltv[1].tv_sec; 796 tv[1].tv_usec = ltv[1].tv_usec; 797 tvp = tv; 798 } 799 800 error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 801 LFREEPATH(fname); 802 return (error); 803} 804 805int 806linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 807{ 808 l_timeval ltv[2]; 809 struct timeval tv[2], *tvp = NULL; 810 char *fname; 811 int error, dfd; 812 813 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 814 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 815 816#ifdef DEBUG 817 if (ldebug(futimesat)) 818 printf(ARGS(futimesat, "%s, *"), fname); 819#endif 820 821 if (args->utimes != NULL) { 822 if ((error = copyin(args->utimes, ltv, sizeof ltv))) { 823 LFREEPATH(fname); 824 return (error); 825 } 826 tv[0].tv_sec = ltv[0].tv_sec; 827 tv[0].tv_usec = ltv[0].tv_usec; 828 tv[1].tv_sec = ltv[1].tv_sec; 829 tv[1].tv_usec = ltv[1].tv_usec; 830 tvp = tv; 831 } 832 833 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 834 LFREEPATH(fname); 835 return (error); 836} 837#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 838 839int 840linux_common_wait(struct thread *td, int pid, int *status, 841 int options, struct rusage *ru) 842{ 843 int error, tmpstat; 844 845 error = kern_wait(td, pid, &tmpstat, options, ru); 846 if (error) 847 return (error); 848 849 if (status) { 850 tmpstat &= 0xffff; 851 if (WIFSIGNALED(tmpstat)) 852 tmpstat = (tmpstat & 0xffffff80) | 853 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); 854 else if (WIFSTOPPED(tmpstat)) 855 tmpstat = (tmpstat & 0xffff00ff) | 856 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); 857 error = copyout(&tmpstat, status, sizeof(int)); 858 } 859 860 return (error); 861} 862 863int 864linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 865{ 866 int options; 867 868#ifdef DEBUG 869 if (ldebug(waitpid)) 870 printf(ARGS(waitpid, "%d, %p, %d"), 871 args->pid, (void *)args->status, args->options); 872#endif 873 /* 874 * this is necessary because the test in kern_wait doesn't work 875 * because we mess with the options here 876 */ 877 if (args->options & ~(WUNTRACED | WNOHANG | WCONTINUED | __WCLONE)) 878 return (EINVAL); 879 880 options = (args->options & (WNOHANG | WUNTRACED)); 881 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 882 if (args->options & __WCLONE) 883 options |= WLINUXCLONE; 884 885 return (linux_common_wait(td, args->pid, args->status, options, NULL)); 886} 887 888 889int 890linux_mknod(struct thread *td, struct linux_mknod_args *args) 891{ 892 char *path; 893 int error; 894 895 LCONVPATHCREAT(td, args->path, &path); 896 897#ifdef DEBUG 898 if (ldebug(mknod)) 899 printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev); 900#endif 901 902 switch (args->mode & S_IFMT) { 903 case S_IFIFO: 904 case S_IFSOCK: 905 error = kern_mkfifo(td, path, UIO_SYSSPACE, args->mode); 906 break; 907 908 case S_IFCHR: 909 case S_IFBLK: 910 error = kern_mknod(td, path, UIO_SYSSPACE, args->mode, 911 args->dev); 912 break; 913 914 case S_IFDIR: 915 error = EPERM; 916 break; 917 918 case 0: 919 args->mode |= S_IFREG; 920 /* FALLTHROUGH */ 921 case S_IFREG: 922 error = kern_open(td, path, UIO_SYSSPACE, 923 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 924 if (error == 0) 925 kern_close(td, td->td_retval[0]); 926 break; 927 928 default: 929 error = EINVAL; 930 break; 931 } 932 LFREEPATH(path); 933 return (error); 934} 935 936int 937linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 938{ 939 char *path; 940 int error, dfd; 941 942 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 943 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 944 945#ifdef DEBUG 946 if (ldebug(mknodat)) 947 printf(ARGS(mknodat, "%s, %d, %d"), path, args->mode, args->dev); 948#endif 949 950 switch (args->mode & S_IFMT) { 951 case S_IFIFO: 952 case S_IFSOCK: 953 error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); 954 break; 955 956 case S_IFCHR: 957 case S_IFBLK: 958 error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, 959 args->dev); 960 break; 961 962 case S_IFDIR: 963 error = EPERM; 964 break; 965 966 case 0: 967 args->mode |= S_IFREG; 968 /* FALLTHROUGH */ 969 case S_IFREG: 970 error = kern_openat(td, dfd, path, UIO_SYSSPACE, 971 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 972 if (error == 0) 973 kern_close(td, td->td_retval[0]); 974 break; 975 976 default: 977 error = EINVAL; 978 break; 979 } 980 LFREEPATH(path); 981 return (error); 982} 983 984/* 985 * UGH! This is just about the dumbest idea I've ever heard!! 986 */ 987int 988linux_personality(struct thread *td, struct linux_personality_args *args) 989{ 990#ifdef DEBUG 991 if (ldebug(personality)) 992 printf(ARGS(personality, "%lu"), (unsigned long)args->per); 993#endif 994 if (args->per != 0) 995 return (EINVAL); 996 997 /* Yes Jim, it's still a Linux... */ 998 td->td_retval[0] = 0; 999 return (0); 1000} 1001 1002struct l_itimerval { 1003 l_timeval it_interval; 1004 l_timeval it_value; 1005}; 1006 1007#define B2L_ITIMERVAL(bip, lip) \ 1008 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1009 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1010 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1011 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1012 1013int 1014linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1015{ 1016 int error; 1017 struct l_itimerval ls; 1018 struct itimerval aitv, oitv; 1019 1020#ifdef DEBUG 1021 if (ldebug(setitimer)) 1022 printf(ARGS(setitimer, "%p, %p"), 1023 (void *)uap->itv, (void *)uap->oitv); 1024#endif 1025 1026 if (uap->itv == NULL) { 1027 uap->itv = uap->oitv; 1028 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1029 } 1030 1031 error = copyin(uap->itv, &ls, sizeof(ls)); 1032 if (error != 0) 1033 return (error); 1034 B2L_ITIMERVAL(&aitv, &ls); 1035#ifdef DEBUG 1036 if (ldebug(setitimer)) { 1037 printf("setitimer: value: sec: %jd, usec: %ld\n", 1038 (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec); 1039 printf("setitimer: interval: sec: %jd, usec: %ld\n", 1040 (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec); 1041 } 1042#endif 1043 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1044 if (error != 0 || uap->oitv == NULL) 1045 return (error); 1046 B2L_ITIMERVAL(&ls, &oitv); 1047 1048 return (copyout(&ls, uap->oitv, sizeof(ls))); 1049} 1050 1051int 1052linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1053{ 1054 int error; 1055 struct l_itimerval ls; 1056 struct itimerval aitv; 1057 1058#ifdef DEBUG 1059 if (ldebug(getitimer)) 1060 printf(ARGS(getitimer, "%p"), (void *)uap->itv); 1061#endif 1062 error = kern_getitimer(td, uap->which, &aitv); 1063 if (error != 0) 1064 return (error); 1065 B2L_ITIMERVAL(&ls, &aitv); 1066 return (copyout(&ls, uap->itv, sizeof(ls))); 1067} 1068 1069int 1070linux_nice(struct thread *td, struct linux_nice_args *args) 1071{ 1072 struct setpriority_args bsd_args; 1073 1074 bsd_args.which = PRIO_PROCESS; 1075 bsd_args.who = 0; /* current process */ 1076 bsd_args.prio = args->inc; 1077 return (sys_setpriority(td, &bsd_args)); 1078} 1079 1080int 1081linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1082{ 1083 struct ucred *newcred, *oldcred; 1084 l_gid_t *linux_gidset; 1085 gid_t *bsd_gidset; 1086 int ngrp, error; 1087 struct proc *p; 1088 1089 ngrp = args->gidsetsize; 1090 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1091 return (EINVAL); 1092 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_TEMP, M_WAITOK); 1093 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1094 if (error) 1095 goto out; 1096 newcred = crget(); 1097 p = td->td_proc; 1098 PROC_LOCK(p); 1099 oldcred = crcopysafe(p, newcred); 1100 1101 /* 1102 * cr_groups[0] holds egid. Setting the whole set from 1103 * the supplied set will cause egid to be changed too. 1104 * Keep cr_groups[0] unchanged to prevent that. 1105 */ 1106 1107 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) { 1108 PROC_UNLOCK(p); 1109 crfree(newcred); 1110 goto out; 1111 } 1112 1113 if (ngrp > 0) { 1114 newcred->cr_ngroups = ngrp + 1; 1115 1116 bsd_gidset = newcred->cr_groups; 1117 ngrp--; 1118 while (ngrp >= 0) { 1119 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1120 ngrp--; 1121 } 1122 } else 1123 newcred->cr_ngroups = 1; 1124 1125 setsugid(p); 1126 p->p_ucred = newcred; 1127 PROC_UNLOCK(p); 1128 crfree(oldcred); 1129 error = 0; 1130out: 1131 free(linux_gidset, M_TEMP); 1132 return (error); 1133} 1134 1135int 1136linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1137{ 1138 struct ucred *cred; 1139 l_gid_t *linux_gidset; 1140 gid_t *bsd_gidset; 1141 int bsd_gidsetsz, ngrp, error; 1142 1143 cred = td->td_ucred; 1144 bsd_gidset = cred->cr_groups; 1145 bsd_gidsetsz = cred->cr_ngroups - 1; 1146 1147 /* 1148 * cr_groups[0] holds egid. Returning the whole set 1149 * here will cause a duplicate. Exclude cr_groups[0] 1150 * to prevent that. 1151 */ 1152 1153 if ((ngrp = args->gidsetsize) == 0) { 1154 td->td_retval[0] = bsd_gidsetsz; 1155 return (0); 1156 } 1157 1158 if (ngrp < bsd_gidsetsz) 1159 return (EINVAL); 1160 1161 ngrp = 0; 1162 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1163 M_TEMP, M_WAITOK); 1164 while (ngrp < bsd_gidsetsz) { 1165 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1166 ngrp++; 1167 } 1168 1169 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1170 free(linux_gidset, M_TEMP); 1171 if (error) 1172 return (error); 1173 1174 td->td_retval[0] = ngrp; 1175 return (0); 1176} 1177 1178int 1179linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1180{ 1181 struct rlimit bsd_rlim; 1182 struct l_rlimit rlim; 1183 u_int which; 1184 int error; 1185 1186#ifdef DEBUG 1187 if (ldebug(setrlimit)) 1188 printf(ARGS(setrlimit, "%d, %p"), 1189 args->resource, (void *)args->rlim); 1190#endif 1191 1192 if (args->resource >= LINUX_RLIM_NLIMITS) 1193 return (EINVAL); 1194 1195 which = linux_to_bsd_resource[args->resource]; 1196 if (which == -1) 1197 return (EINVAL); 1198 1199 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1200 if (error) 1201 return (error); 1202 1203 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1204 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1205 return (kern_setrlimit(td, which, &bsd_rlim)); 1206} 1207 1208int 1209linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1210{ 1211 struct l_rlimit rlim; 1212 struct proc *p = td->td_proc; 1213 struct rlimit bsd_rlim; 1214 u_int which; 1215 1216#ifdef DEBUG 1217 if (ldebug(old_getrlimit)) 1218 printf(ARGS(old_getrlimit, "%d, %p"), 1219 args->resource, (void *)args->rlim); 1220#endif 1221 1222 if (args->resource >= LINUX_RLIM_NLIMITS) 1223 return (EINVAL); 1224 1225 which = linux_to_bsd_resource[args->resource]; 1226 if (which == -1) 1227 return (EINVAL); 1228 1229 PROC_LOCK(p); 1230 lim_rlimit(p, which, &bsd_rlim); 1231 PROC_UNLOCK(p); 1232 1233#ifdef COMPAT_LINUX32 1234 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1235 if (rlim.rlim_cur == UINT_MAX) 1236 rlim.rlim_cur = INT_MAX; 1237 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1238 if (rlim.rlim_max == UINT_MAX) 1239 rlim.rlim_max = INT_MAX; 1240#else 1241 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1242 if (rlim.rlim_cur == ULONG_MAX) 1243 rlim.rlim_cur = LONG_MAX; 1244 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1245 if (rlim.rlim_max == ULONG_MAX) 1246 rlim.rlim_max = LONG_MAX; 1247#endif 1248 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1249} 1250 1251int 1252linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1253{ 1254 struct l_rlimit rlim; 1255 struct proc *p = td->td_proc; 1256 struct rlimit bsd_rlim; 1257 u_int which; 1258 1259#ifdef DEBUG 1260 if (ldebug(getrlimit)) 1261 printf(ARGS(getrlimit, "%d, %p"), 1262 args->resource, (void *)args->rlim); 1263#endif 1264 1265 if (args->resource >= LINUX_RLIM_NLIMITS) 1266 return (EINVAL); 1267 1268 which = linux_to_bsd_resource[args->resource]; 1269 if (which == -1) 1270 return (EINVAL); 1271 1272 PROC_LOCK(p); 1273 lim_rlimit(p, which, &bsd_rlim); 1274 PROC_UNLOCK(p); 1275 1276 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1277 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1278 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1279} 1280 1281int 1282linux_sched_setscheduler(struct thread *td, 1283 struct linux_sched_setscheduler_args *args) 1284{ 1285 struct sched_param sched_param; 1286 struct thread *tdt; 1287 int error, policy; 1288 1289#ifdef DEBUG 1290 if (ldebug(sched_setscheduler)) 1291 printf(ARGS(sched_setscheduler, "%d, %d, %p"), 1292 args->pid, args->policy, (const void *)args->param); 1293#endif 1294 1295 switch (args->policy) { 1296 case LINUX_SCHED_OTHER: 1297 policy = SCHED_OTHER; 1298 break; 1299 case LINUX_SCHED_FIFO: 1300 policy = SCHED_FIFO; 1301 break; 1302 case LINUX_SCHED_RR: 1303 policy = SCHED_RR; 1304 break; 1305 default: 1306 return (EINVAL); 1307 } 1308 1309 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1310 if (error) 1311 return (error); 1312 1313 tdt = linux_tdfind(td, args->pid, -1); 1314 if (tdt == NULL) 1315 return (ESRCH); 1316 1317 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1318 PROC_UNLOCK(tdt->td_proc); 1319 return (error); 1320} 1321 1322int 1323linux_sched_getscheduler(struct thread *td, 1324 struct linux_sched_getscheduler_args *args) 1325{ 1326 struct thread *tdt; 1327 int error, policy; 1328 1329#ifdef DEBUG 1330 if (ldebug(sched_getscheduler)) 1331 printf(ARGS(sched_getscheduler, "%d"), args->pid); 1332#endif 1333 1334 tdt = linux_tdfind(td, args->pid, -1); 1335 if (tdt == NULL) 1336 return (ESRCH); 1337 1338 error = kern_sched_getscheduler(td, tdt, &policy); 1339 PROC_UNLOCK(tdt->td_proc); 1340 1341 switch (policy) { 1342 case SCHED_OTHER: 1343 td->td_retval[0] = LINUX_SCHED_OTHER; 1344 break; 1345 case SCHED_FIFO: 1346 td->td_retval[0] = LINUX_SCHED_FIFO; 1347 break; 1348 case SCHED_RR: 1349 td->td_retval[0] = LINUX_SCHED_RR; 1350 break; 1351 } 1352 return (error); 1353} 1354 1355int 1356linux_sched_get_priority_max(struct thread *td, 1357 struct linux_sched_get_priority_max_args *args) 1358{ 1359 struct sched_get_priority_max_args bsd; 1360 1361#ifdef DEBUG 1362 if (ldebug(sched_get_priority_max)) 1363 printf(ARGS(sched_get_priority_max, "%d"), args->policy); 1364#endif 1365 1366 switch (args->policy) { 1367 case LINUX_SCHED_OTHER: 1368 bsd.policy = SCHED_OTHER; 1369 break; 1370 case LINUX_SCHED_FIFO: 1371 bsd.policy = SCHED_FIFO; 1372 break; 1373 case LINUX_SCHED_RR: 1374 bsd.policy = SCHED_RR; 1375 break; 1376 default: 1377 return (EINVAL); 1378 } 1379 return (sys_sched_get_priority_max(td, &bsd)); 1380} 1381 1382int 1383linux_sched_get_priority_min(struct thread *td, 1384 struct linux_sched_get_priority_min_args *args) 1385{ 1386 struct sched_get_priority_min_args bsd; 1387 1388#ifdef DEBUG 1389 if (ldebug(sched_get_priority_min)) 1390 printf(ARGS(sched_get_priority_min, "%d"), args->policy); 1391#endif 1392 1393 switch (args->policy) { 1394 case LINUX_SCHED_OTHER: 1395 bsd.policy = SCHED_OTHER; 1396 break; 1397 case LINUX_SCHED_FIFO: 1398 bsd.policy = SCHED_FIFO; 1399 break; 1400 case LINUX_SCHED_RR: 1401 bsd.policy = SCHED_RR; 1402 break; 1403 default: 1404 return (EINVAL); 1405 } 1406 return (sys_sched_get_priority_min(td, &bsd)); 1407} 1408 1409#define REBOOT_CAD_ON 0x89abcdef 1410#define REBOOT_CAD_OFF 0 1411#define REBOOT_HALT 0xcdef0123 1412#define REBOOT_RESTART 0x01234567 1413#define REBOOT_RESTART2 0xA1B2C3D4 1414#define REBOOT_POWEROFF 0x4321FEDC 1415#define REBOOT_MAGIC1 0xfee1dead 1416#define REBOOT_MAGIC2 0x28121969 1417#define REBOOT_MAGIC2A 0x05121996 1418#define REBOOT_MAGIC2B 0x16041998 1419 1420int 1421linux_reboot(struct thread *td, struct linux_reboot_args *args) 1422{ 1423 struct reboot_args bsd_args; 1424 1425#ifdef DEBUG 1426 if (ldebug(reboot)) 1427 printf(ARGS(reboot, "0x%x"), args->cmd); 1428#endif 1429 1430 if (args->magic1 != REBOOT_MAGIC1) 1431 return (EINVAL); 1432 1433 switch (args->magic2) { 1434 case REBOOT_MAGIC2: 1435 case REBOOT_MAGIC2A: 1436 case REBOOT_MAGIC2B: 1437 break; 1438 default: 1439 return (EINVAL); 1440 } 1441 1442 switch (args->cmd) { 1443 case REBOOT_CAD_ON: 1444 case REBOOT_CAD_OFF: 1445 return (priv_check(td, PRIV_REBOOT)); 1446 case REBOOT_HALT: 1447 bsd_args.opt = RB_HALT; 1448 break; 1449 case REBOOT_RESTART: 1450 case REBOOT_RESTART2: 1451 bsd_args.opt = 0; 1452 break; 1453 case REBOOT_POWEROFF: 1454 bsd_args.opt = RB_POWEROFF; 1455 break; 1456 default: 1457 return (EINVAL); 1458 } 1459 return (sys_reboot(td, &bsd_args)); 1460} 1461 1462 1463/* 1464 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify 1465 * td->td_retval[1] when COMPAT_43 is defined. This clobbers registers that 1466 * are assumed to be preserved. The following lightweight syscalls fixes 1467 * this. See also linux_getgid16() and linux_getuid16() in linux_uid16.c 1468 * 1469 * linux_getpid() - MP SAFE 1470 * linux_getgid() - MP SAFE 1471 * linux_getuid() - MP SAFE 1472 */ 1473 1474int 1475linux_getpid(struct thread *td, struct linux_getpid_args *args) 1476{ 1477 1478#ifdef DEBUG 1479 if (ldebug(getpid)) 1480 printf(ARGS(getpid, "")); 1481#endif 1482 td->td_retval[0] = td->td_proc->p_pid; 1483 1484 return (0); 1485} 1486 1487int 1488linux_gettid(struct thread *td, struct linux_gettid_args *args) 1489{ 1490 struct linux_emuldata *em; 1491 1492#ifdef DEBUG 1493 if (ldebug(gettid)) 1494 printf(ARGS(gettid, "")); 1495#endif 1496 1497 em = em_find(td); 1498 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1499 1500 td->td_retval[0] = em->em_tid; 1501 1502 return (0); 1503} 1504 1505 1506int 1507linux_getppid(struct thread *td, struct linux_getppid_args *args) 1508{ 1509 1510#ifdef DEBUG 1511 if (ldebug(getppid)) 1512 printf(ARGS(getppid, "")); 1513#endif 1514 1515 PROC_LOCK(td->td_proc); 1516 td->td_retval[0] = td->td_proc->p_pptr->p_pid; 1517 PROC_UNLOCK(td->td_proc); 1518 return (0); 1519} 1520 1521int 1522linux_getgid(struct thread *td, struct linux_getgid_args *args) 1523{ 1524 1525#ifdef DEBUG 1526 if (ldebug(getgid)) 1527 printf(ARGS(getgid, "")); 1528#endif 1529 1530 td->td_retval[0] = td->td_ucred->cr_rgid; 1531 return (0); 1532} 1533 1534int 1535linux_getuid(struct thread *td, struct linux_getuid_args *args) 1536{ 1537 1538#ifdef DEBUG 1539 if (ldebug(getuid)) 1540 printf(ARGS(getuid, "")); 1541#endif 1542 1543 td->td_retval[0] = td->td_ucred->cr_ruid; 1544 return (0); 1545} 1546 1547 1548int 1549linux_getsid(struct thread *td, struct linux_getsid_args *args) 1550{ 1551 struct getsid_args bsd; 1552 1553#ifdef DEBUG 1554 if (ldebug(getsid)) 1555 printf(ARGS(getsid, "%i"), args->pid); 1556#endif 1557 1558 bsd.pid = args->pid; 1559 return (sys_getsid(td, &bsd)); 1560} 1561 1562int 1563linux_nosys(struct thread *td, struct nosys_args *ignore) 1564{ 1565 1566 return (ENOSYS); 1567} 1568 1569int 1570linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1571{ 1572 struct getpriority_args bsd_args; 1573 int error; 1574 1575#ifdef DEBUG 1576 if (ldebug(getpriority)) 1577 printf(ARGS(getpriority, "%i, %i"), args->which, args->who); 1578#endif 1579 1580 bsd_args.which = args->which; 1581 bsd_args.who = args->who; 1582 error = sys_getpriority(td, &bsd_args); 1583 td->td_retval[0] = 20 - td->td_retval[0]; 1584 return (error); 1585} 1586 1587int 1588linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1589{ 1590 int name[2]; 1591 1592#ifdef DEBUG 1593 if (ldebug(sethostname)) 1594 printf(ARGS(sethostname, "*, %i"), args->len); 1595#endif 1596 1597 name[0] = CTL_KERN; 1598 name[1] = KERN_HOSTNAME; 1599 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1600 args->len, 0, 0)); 1601} 1602 1603int 1604linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1605{ 1606 int name[2]; 1607 1608#ifdef DEBUG 1609 if (ldebug(setdomainname)) 1610 printf(ARGS(setdomainname, "*, %i"), args->len); 1611#endif 1612 1613 name[0] = CTL_KERN; 1614 name[1] = KERN_NISDOMAINNAME; 1615 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1616 args->len, 0, 0)); 1617} 1618 1619int 1620linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1621{ 1622 1623#ifdef DEBUG 1624 if (ldebug(exit_group)) 1625 printf(ARGS(exit_group, "%i"), args->error_code); 1626#endif 1627 1628 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1629 args->error_code); 1630 1631 /* 1632 * XXX: we should send a signal to the parent if 1633 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1634 * as it doesnt occur often. 1635 */ 1636 exit1(td, W_EXITCODE(args->error_code, 0)); 1637 /* NOTREACHED */ 1638} 1639 1640#define _LINUX_CAPABILITY_VERSION 0x19980330 1641 1642struct l_user_cap_header { 1643 l_int version; 1644 l_int pid; 1645}; 1646 1647struct l_user_cap_data { 1648 l_int effective; 1649 l_int permitted; 1650 l_int inheritable; 1651}; 1652 1653int 1654linux_capget(struct thread *td, struct linux_capget_args *args) 1655{ 1656 struct l_user_cap_header luch; 1657 struct l_user_cap_data lucd; 1658 int error; 1659 1660 if (args->hdrp == NULL) 1661 return (EFAULT); 1662 1663 error = copyin(args->hdrp, &luch, sizeof(luch)); 1664 if (error != 0) 1665 return (error); 1666 1667 if (luch.version != _LINUX_CAPABILITY_VERSION) { 1668 luch.version = _LINUX_CAPABILITY_VERSION; 1669 error = copyout(&luch, args->hdrp, sizeof(luch)); 1670 if (error) 1671 return (error); 1672 return (EINVAL); 1673 } 1674 1675 if (luch.pid) 1676 return (EPERM); 1677 1678 if (args->datap) { 1679 /* 1680 * The current implementation doesn't support setting 1681 * a capability (it's essentially a stub) so indicate 1682 * that no capabilities are currently set or available 1683 * to request. 1684 */ 1685 bzero (&lucd, sizeof(lucd)); 1686 error = copyout(&lucd, args->datap, sizeof(lucd)); 1687 } 1688 1689 return (error); 1690} 1691 1692int 1693linux_capset(struct thread *td, struct linux_capset_args *args) 1694{ 1695 struct l_user_cap_header luch; 1696 struct l_user_cap_data lucd; 1697 int error; 1698 1699 if (args->hdrp == NULL || args->datap == NULL) 1700 return (EFAULT); 1701 1702 error = copyin(args->hdrp, &luch, sizeof(luch)); 1703 if (error != 0) 1704 return (error); 1705 1706 if (luch.version != _LINUX_CAPABILITY_VERSION) { 1707 luch.version = _LINUX_CAPABILITY_VERSION; 1708 error = copyout(&luch, args->hdrp, sizeof(luch)); 1709 if (error) 1710 return (error); 1711 return (EINVAL); 1712 } 1713 1714 if (luch.pid) 1715 return (EPERM); 1716 1717 error = copyin(args->datap, &lucd, sizeof(lucd)); 1718 if (error != 0) 1719 return (error); 1720 1721 /* We currently don't support setting any capabilities. */ 1722 if (lucd.effective || lucd.permitted || lucd.inheritable) { 1723 linux_msg(td, 1724 "capset effective=0x%x, permitted=0x%x, " 1725 "inheritable=0x%x is not implemented", 1726 (int)lucd.effective, (int)lucd.permitted, 1727 (int)lucd.inheritable); 1728 return (EPERM); 1729 } 1730 1731 return (0); 1732} 1733 1734int 1735linux_prctl(struct thread *td, struct linux_prctl_args *args) 1736{ 1737 int error = 0, max_size; 1738 struct proc *p = td->td_proc; 1739 char comm[LINUX_MAX_COMM_LEN]; 1740 struct linux_emuldata *em; 1741 int pdeath_signal; 1742 1743#ifdef DEBUG 1744 if (ldebug(prctl)) 1745 printf(ARGS(prctl, "%d, %d, %d, %d, %d"), args->option, 1746 args->arg2, args->arg3, args->arg4, args->arg5); 1747#endif 1748 1749 switch (args->option) { 1750 case LINUX_PR_SET_PDEATHSIG: 1751 if (!LINUX_SIG_VALID(args->arg2)) 1752 return (EINVAL); 1753 em = em_find(td); 1754 KASSERT(em != NULL, ("prctl: emuldata not found.\n")); 1755 em->pdeath_signal = args->arg2; 1756 break; 1757 case LINUX_PR_GET_PDEATHSIG: 1758 em = em_find(td); 1759 KASSERT(em != NULL, ("prctl: emuldata not found.\n")); 1760 pdeath_signal = em->pdeath_signal; 1761 error = copyout(&pdeath_signal, 1762 (void *)(register_t)args->arg2, 1763 sizeof(pdeath_signal)); 1764 break; 1765 case LINUX_PR_GET_KEEPCAPS: 1766 /* 1767 * Indicate that we always clear the effective and 1768 * permitted capability sets when the user id becomes 1769 * non-zero (actually the capability sets are simply 1770 * always zero in the current implementation). 1771 */ 1772 td->td_retval[0] = 0; 1773 break; 1774 case LINUX_PR_SET_KEEPCAPS: 1775 /* 1776 * Ignore requests to keep the effective and permitted 1777 * capability sets when the user id becomes non-zero. 1778 */ 1779 break; 1780 case LINUX_PR_SET_NAME: 1781 /* 1782 * To be on the safe side we need to make sure to not 1783 * overflow the size a linux program expects. We already 1784 * do this here in the copyin, so that we don't need to 1785 * check on copyout. 1786 */ 1787 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1788 error = copyinstr((void *)(register_t)args->arg2, comm, 1789 max_size, NULL); 1790 1791 /* Linux silently truncates the name if it is too long. */ 1792 if (error == ENAMETOOLONG) { 1793 /* 1794 * XXX: copyinstr() isn't documented to populate the 1795 * array completely, so do a copyin() to be on the 1796 * safe side. This should be changed in case 1797 * copyinstr() is changed to guarantee this. 1798 */ 1799 error = copyin((void *)(register_t)args->arg2, comm, 1800 max_size - 1); 1801 comm[max_size - 1] = '\0'; 1802 } 1803 if (error) 1804 return (error); 1805 1806 PROC_LOCK(p); 1807 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1808 PROC_UNLOCK(p); 1809 break; 1810 case LINUX_PR_GET_NAME: 1811 PROC_LOCK(p); 1812 strlcpy(comm, p->p_comm, sizeof(comm)); 1813 PROC_UNLOCK(p); 1814 error = copyout(comm, (void *)(register_t)args->arg2, 1815 strlen(comm) + 1); 1816 break; 1817 default: 1818 error = EINVAL; 1819 break; 1820 } 1821 1822 return (error); 1823} 1824 1825int 1826linux_sched_setparam(struct thread *td, 1827 struct linux_sched_setparam_args *uap) 1828{ 1829 struct sched_param sched_param; 1830 struct thread *tdt; 1831 int error; 1832 1833#ifdef DEBUG 1834 if (ldebug(sched_setparam)) 1835 printf(ARGS(sched_setparam, "%d, *"), uap->pid); 1836#endif 1837 1838 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 1839 if (error) 1840 return (error); 1841 1842 tdt = linux_tdfind(td, uap->pid, -1); 1843 if (tdt == NULL) 1844 return (ESRCH); 1845 1846 error = kern_sched_setparam(td, tdt, &sched_param); 1847 PROC_UNLOCK(tdt->td_proc); 1848 return (error); 1849} 1850 1851int 1852linux_sched_getparam(struct thread *td, 1853 struct linux_sched_getparam_args *uap) 1854{ 1855 struct sched_param sched_param; 1856 struct thread *tdt; 1857 int error; 1858 1859#ifdef DEBUG 1860 if (ldebug(sched_getparam)) 1861 printf(ARGS(sched_getparam, "%d, *"), uap->pid); 1862#endif 1863 1864 tdt = linux_tdfind(td, uap->pid, -1); 1865 if (tdt == NULL) 1866 return (ESRCH); 1867 1868 error = kern_sched_getparam(td, tdt, &sched_param); 1869 PROC_UNLOCK(tdt->td_proc); 1870 if (error == 0) 1871 error = copyout(&sched_param, uap->param, 1872 sizeof(sched_param)); 1873 return (error); 1874} 1875 1876/* 1877 * Get affinity of a process. 1878 */ 1879int 1880linux_sched_getaffinity(struct thread *td, 1881 struct linux_sched_getaffinity_args *args) 1882{ 1883 int error; 1884 struct thread *tdt; 1885 struct cpuset_getaffinity_args cga; 1886 1887#ifdef DEBUG 1888 if (ldebug(sched_getaffinity)) 1889 printf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid, 1890 args->len); 1891#endif 1892 if (args->len < sizeof(cpuset_t)) 1893 return (EINVAL); 1894 1895 tdt = linux_tdfind(td, args->pid, -1); 1896 if (tdt == NULL) 1897 return (ESRCH); 1898 1899 PROC_UNLOCK(tdt->td_proc); 1900 cga.level = CPU_LEVEL_WHICH; 1901 cga.which = CPU_WHICH_TID; 1902 cga.id = tdt->td_tid; 1903 cga.cpusetsize = sizeof(cpuset_t); 1904 cga.mask = (cpuset_t *) args->user_mask_ptr; 1905 1906 if ((error = sys_cpuset_getaffinity(td, &cga)) == 0) 1907 td->td_retval[0] = sizeof(cpuset_t); 1908 1909 return (error); 1910} 1911 1912/* 1913 * Set affinity of a process. 1914 */ 1915int 1916linux_sched_setaffinity(struct thread *td, 1917 struct linux_sched_setaffinity_args *args) 1918{ 1919 struct cpuset_setaffinity_args csa; 1920 struct thread *tdt; 1921 1922#ifdef DEBUG 1923 if (ldebug(sched_setaffinity)) 1924 printf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid, 1925 args->len); 1926#endif 1927 if (args->len < sizeof(cpuset_t)) 1928 return (EINVAL); 1929 1930 tdt = linux_tdfind(td, args->pid, -1); 1931 if (tdt == NULL) 1932 return (ESRCH); 1933 1934 PROC_UNLOCK(tdt->td_proc); 1935 csa.level = CPU_LEVEL_WHICH; 1936 csa.which = CPU_WHICH_TID; 1937 csa.id = tdt->td_tid; 1938 csa.cpusetsize = sizeof(cpuset_t); 1939 csa.mask = (cpuset_t *) args->user_mask_ptr; 1940 1941 return (sys_cpuset_setaffinity(td, &csa)); 1942} 1943 1944int 1945linux_sched_rr_get_interval(struct thread *td, 1946 struct linux_sched_rr_get_interval_args *uap) 1947{ 1948 struct timespec ts; 1949 struct l_timespec lts; 1950 struct thread *tdt; 1951 int error; 1952 1953 tdt = linux_tdfind(td, uap->pid, -1); 1954 if (tdt == NULL) 1955 return (ESRCH); 1956 1957 error = kern_sched_rr_get_interval_td(td, tdt, &ts); 1958 PROC_UNLOCK(tdt->td_proc); 1959 if (error != 0) 1960 return (error); 1961 lts.tv_sec = ts.tv_sec; 1962 lts.tv_nsec = ts.tv_nsec; 1963 return (copyout(<s, uap->interval, sizeof(lts))); 1964} 1965 1966/* 1967 * In case when the Linux thread is the initial thread in 1968 * the thread group thread id is equal to the process id. 1969 * Glibc depends on this magic (assert in pthread_getattr_np.c). 1970 */ 1971struct thread * 1972linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 1973{ 1974 struct linux_emuldata *em; 1975 struct thread *tdt; 1976 struct proc *p; 1977 1978 tdt = NULL; 1979 if (tid == 0 || tid == td->td_tid) { 1980 tdt = td; 1981 PROC_LOCK(tdt->td_proc); 1982 } else if (tid > PID_MAX) 1983 tdt = tdfind(tid, pid); 1984 else { 1985 /* 1986 * Initial thread where the tid equal to the pid. 1987 */ 1988 p = pfind(tid); 1989 if (p != NULL) { 1990 if (SV_PROC_ABI(p) != SV_ABI_LINUX) { 1991 /* 1992 * p is not a Linuxulator process. 1993 */ 1994 PROC_UNLOCK(p); 1995 return (NULL); 1996 } 1997 FOREACH_THREAD_IN_PROC(p, tdt) { 1998 em = em_find(tdt); 1999 if (tid == em->em_tid) 2000 return (tdt); 2001 } 2002 PROC_UNLOCK(p); 2003 } 2004 return (NULL); 2005 } 2006 2007 return (tdt); 2008} 2009 2010void 2011linux_to_bsd_waitopts(int options, int *bsdopts) 2012{ 2013 2014 if (options & LINUX_WNOHANG) 2015 *bsdopts |= WNOHANG; 2016 if (options & LINUX_WUNTRACED) 2017 *bsdopts |= WUNTRACED; 2018 if (options & LINUX_WEXITED) 2019 *bsdopts |= WEXITED; 2020 if (options & LINUX_WCONTINUED) 2021 *bsdopts |= WCONTINUED; 2022 if (options & LINUX_WNOWAIT) 2023 *bsdopts |= WNOWAIT; 2024 2025 if (options & __WCLONE) 2026 *bsdopts |= WLINUXCLONE; 2027} 2028