linux_misc.c revision 301049
1/*- 2 * Copyright (c) 2002 Doug Rabson 3 * Copyright (c) 1994-1995 S�ren Schmidt 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer 11 * in this position and unchanged. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: releng/9.3/sys/compat/linux/linux_misc.c 301049 2016-05-31 16:55:37Z glebius $"); 32 33#include "opt_compat.h" 34#include "opt_kdtrace.h" 35 36#include <sys/param.h> 37#include <sys/blist.h> 38#include <sys/fcntl.h> 39#if defined(__i386__) 40#include <sys/imgact_aout.h> 41#endif 42#include <sys/jail.h> 43#include <sys/kernel.h> 44#include <sys/limits.h> 45#include <sys/lock.h> 46#include <sys/malloc.h> 47#include <sys/mman.h> 48#include <sys/mount.h> 49#include <sys/mutex.h> 50#include <sys/namei.h> 51#include <sys/priv.h> 52#include <sys/proc.h> 53#include <sys/reboot.h> 54#include <sys/racct.h> 55#include <sys/resourcevar.h> 56#include <sys/sched.h> 57#include <sys/sdt.h> 58#include <sys/signalvar.h> 59#include <sys/stat.h> 60#include <sys/syscallsubr.h> 61#include <sys/sysctl.h> 62#include <sys/sysproto.h> 63#include <sys/systm.h> 64#include <sys/time.h> 65#include <sys/vmmeter.h> 66#include <sys/vnode.h> 67#include <sys/wait.h> 68#include <sys/cpuset.h> 69 70#include <security/mac/mac_framework.h> 71 72#include <vm/vm.h> 73#include <vm/pmap.h> 74#include <vm/vm_kern.h> 75#include <vm/vm_map.h> 76#include <vm/vm_extern.h> 77#include <vm/vm_object.h> 78#include <vm/swap_pager.h> 79 80#ifdef COMPAT_LINUX32 81#include <machine/../linux32/linux.h> 82#include <machine/../linux32/linux32_proto.h> 83#else 84#include <machine/../linux/linux.h> 85#include <machine/../linux/linux_proto.h> 86#endif 87 88#include <compat/linux/linux_dtrace.h> 89#include <compat/linux/linux_file.h> 90#include <compat/linux/linux_mib.h> 91#include <compat/linux/linux_signal.h> 92#include <compat/linux/linux_util.h> 93#include <compat/linux/linux_sysproto.h> 94#include <compat/linux/linux_emul.h> 95#include <compat/linux/linux_misc.h> 96 97/* DTrace init */ 98LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); 99 100/* Linuxulator-global DTrace probes */ 101LIN_SDT_PROBE_DECLARE(locks, emul_lock, locked); 102LIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock); 103LIN_SDT_PROBE_DECLARE(locks, emul_shared_rlock, locked); 104LIN_SDT_PROBE_DECLARE(locks, emul_shared_rlock, unlock); 105LIN_SDT_PROBE_DECLARE(locks, emul_shared_wlock, locked); 106LIN_SDT_PROBE_DECLARE(locks, emul_shared_wlock, unlock); 107 108int stclohz; /* Statistics clock frequency */ 109 110static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 111 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 112 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 113 RLIMIT_MEMLOCK, RLIMIT_AS 114}; 115 116struct l_sysinfo { 117 l_long uptime; /* Seconds since boot */ 118 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 119#define LINUX_SYSINFO_LOADS_SCALE 65536 120 l_ulong totalram; /* Total usable main memory size */ 121 l_ulong freeram; /* Available memory size */ 122 l_ulong sharedram; /* Amount of shared memory */ 123 l_ulong bufferram; /* Memory used by buffers */ 124 l_ulong totalswap; /* Total swap space size */ 125 l_ulong freeswap; /* swap space still available */ 126 l_ushort procs; /* Number of current processes */ 127 l_ushort pads; 128 l_ulong totalbig; 129 l_ulong freebig; 130 l_uint mem_unit; 131 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 132}; 133int 134linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 135{ 136 struct l_sysinfo sysinfo; 137 vm_object_t object; 138 int i, j; 139 struct timespec ts; 140 141 bzero(&sysinfo, sizeof(sysinfo)); 142 getnanouptime(&ts); 143 if (ts.tv_nsec != 0) 144 ts.tv_sec++; 145 sysinfo.uptime = ts.tv_sec; 146 147 /* Use the information from the mib to get our load averages */ 148 for (i = 0; i < 3; i++) 149 sysinfo.loads[i] = averunnable.ldavg[i] * 150 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 151 152 sysinfo.totalram = physmem * PAGE_SIZE; 153 sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE; 154 155 sysinfo.sharedram = 0; 156 mtx_lock(&vm_object_list_mtx); 157 TAILQ_FOREACH(object, &vm_object_list, object_list) 158 if (object->shadow_count > 1) 159 sysinfo.sharedram += object->resident_page_count; 160 mtx_unlock(&vm_object_list_mtx); 161 162 sysinfo.sharedram *= PAGE_SIZE; 163 sysinfo.bufferram = 0; 164 165 swap_pager_status(&i, &j); 166 sysinfo.totalswap = i * PAGE_SIZE; 167 sysinfo.freeswap = (i - j) * PAGE_SIZE; 168 169 sysinfo.procs = nprocs; 170 171 /* The following are only present in newer Linux kernels. */ 172 sysinfo.totalbig = 0; 173 sysinfo.freebig = 0; 174 sysinfo.mem_unit = 1; 175 176 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 177} 178 179int 180linux_alarm(struct thread *td, struct linux_alarm_args *args) 181{ 182 struct itimerval it, old_it; 183 u_int secs; 184 int error; 185 186#ifdef DEBUG 187 if (ldebug(alarm)) 188 printf(ARGS(alarm, "%u"), args->secs); 189#endif 190 191 secs = args->secs; 192 193 if (secs > INT_MAX) 194 secs = INT_MAX; 195 196 it.it_value.tv_sec = (long) secs; 197 it.it_value.tv_usec = 0; 198 it.it_interval.tv_sec = 0; 199 it.it_interval.tv_usec = 0; 200 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 201 if (error) 202 return (error); 203 if (timevalisset(&old_it.it_value)) { 204 if (old_it.it_value.tv_usec != 0) 205 old_it.it_value.tv_sec++; 206 td->td_retval[0] = old_it.it_value.tv_sec; 207 } 208 return (0); 209} 210 211int 212linux_brk(struct thread *td, struct linux_brk_args *args) 213{ 214 struct vmspace *vm = td->td_proc->p_vmspace; 215 vm_offset_t new, old; 216 struct obreak_args /* { 217 char * nsize; 218 } */ tmp; 219 220#ifdef DEBUG 221 if (ldebug(brk)) 222 printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend); 223#endif 224 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 225 new = (vm_offset_t)args->dsend; 226 tmp.nsize = (char *)new; 227 if (((caddr_t)new > vm->vm_daddr) && !sys_obreak(td, &tmp)) 228 td->td_retval[0] = (long)new; 229 else 230 td->td_retval[0] = (long)old; 231 232 return (0); 233} 234 235#if defined(__i386__) 236/* XXX: what about amd64/linux32? */ 237 238int 239linux_uselib(struct thread *td, struct linux_uselib_args *args) 240{ 241 struct nameidata ni; 242 struct vnode *vp; 243 struct exec *a_out; 244 struct vattr attr; 245 vm_offset_t vmaddr; 246 unsigned long file_offset; 247 unsigned long bss_size; 248 char *library; 249 ssize_t aresid; 250 int error, locked, vfslocked, writecount; 251 252 LCONVPATHEXIST(td, args->library, &library); 253 254#ifdef DEBUG 255 if (ldebug(uselib)) 256 printf(ARGS(uselib, "%s"), library); 257#endif 258 259 a_out = NULL; 260 vfslocked = 0; 261 locked = 0; 262 vp = NULL; 263 264 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1, 265 UIO_SYSSPACE, library, td); 266 error = namei(&ni); 267 LFREEPATH(library); 268 if (error) 269 goto cleanup; 270 271 vp = ni.ni_vp; 272 vfslocked = NDHASGIANT(&ni); 273 NDFREE(&ni, NDF_ONLY_PNBUF); 274 275 /* 276 * From here on down, we have a locked vnode that must be unlocked. 277 * XXX: The code below largely duplicates exec_check_permissions(). 278 */ 279 locked = 1; 280 281 /* Writable? */ 282 error = VOP_GET_WRITECOUNT(vp, &writecount); 283 if (error != 0) 284 goto cleanup; 285 if (writecount != 0) { 286 error = ETXTBSY; 287 goto cleanup; 288 } 289 290 /* Executable? */ 291 error = VOP_GETATTR(vp, &attr, td->td_ucred); 292 if (error) 293 goto cleanup; 294 295 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 296 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 297 /* EACCESS is what exec(2) returns. */ 298 error = ENOEXEC; 299 goto cleanup; 300 } 301 302 /* Sensible size? */ 303 if (attr.va_size == 0) { 304 error = ENOEXEC; 305 goto cleanup; 306 } 307 308 /* Can we access it? */ 309 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 310 if (error) 311 goto cleanup; 312 313 /* 314 * XXX: This should use vn_open() so that it is properly authorized, 315 * and to reduce code redundancy all over the place here. 316 * XXX: Not really, it duplicates far more of exec_check_permissions() 317 * than vn_open(). 318 */ 319#ifdef MAC 320 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 321 if (error) 322 goto cleanup; 323#endif 324 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 325 if (error) 326 goto cleanup; 327 328 /* Pull in executable header into exec_map */ 329 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 330 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 331 if (error) 332 goto cleanup; 333 334 /* Is it a Linux binary ? */ 335 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 336 error = ENOEXEC; 337 goto cleanup; 338 } 339 340 /* 341 * While we are here, we should REALLY do some more checks 342 */ 343 344 /* Set file/virtual offset based on a.out variant. */ 345 switch ((int)(a_out->a_magic & 0xffff)) { 346 case 0413: /* ZMAGIC */ 347 file_offset = 1024; 348 break; 349 case 0314: /* QMAGIC */ 350 file_offset = 0; 351 break; 352 default: 353 error = ENOEXEC; 354 goto cleanup; 355 } 356 357 bss_size = round_page(a_out->a_bss); 358 359 /* Check various fields in header for validity/bounds. */ 360 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 361 error = ENOEXEC; 362 goto cleanup; 363 } 364 365 /* text + data can't exceed file size */ 366 if (a_out->a_data + a_out->a_text > attr.va_size) { 367 error = EFAULT; 368 goto cleanup; 369 } 370 371 /* 372 * text/data/bss must not exceed limits 373 * XXX - this is not complete. it should check current usage PLUS 374 * the resources needed by this library. 375 */ 376 PROC_LOCK(td->td_proc); 377 if (a_out->a_text > maxtsiz || 378 a_out->a_data + bss_size > lim_cur(td->td_proc, RLIMIT_DATA) || 379 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 380 bss_size) != 0) { 381 PROC_UNLOCK(td->td_proc); 382 error = ENOMEM; 383 goto cleanup; 384 } 385 PROC_UNLOCK(td->td_proc); 386 387 /* 388 * Prevent more writers. 389 * XXX: Note that if any of the VM operations fail below we don't 390 * clear this flag. 391 */ 392 VOP_SET_TEXT(vp); 393 394 /* 395 * Lock no longer needed 396 */ 397 locked = 0; 398 VOP_UNLOCK(vp, 0); 399 VFS_UNLOCK_GIANT(vfslocked); 400 401 /* 402 * Check if file_offset page aligned. Currently we cannot handle 403 * misalinged file offsets, and so we read in the entire image 404 * (what a waste). 405 */ 406 if (file_offset & PAGE_MASK) { 407#ifdef DEBUG 408 printf("uselib: Non page aligned binary %lu\n", file_offset); 409#endif 410 /* Map text+data read/write/execute */ 411 412 /* a_entry is the load address and is page aligned */ 413 vmaddr = trunc_page(a_out->a_entry); 414 415 /* get anon user mapping, read+write+execute */ 416 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 417 &vmaddr, a_out->a_text + a_out->a_data, FALSE, VM_PROT_ALL, 418 VM_PROT_ALL, 0); 419 if (error) 420 goto cleanup; 421 422 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 423 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 424 td->td_ucred, NOCRED, &aresid, td); 425 if (error != 0) 426 goto cleanup; 427 if (aresid != 0) { 428 error = ENOEXEC; 429 goto cleanup; 430 } 431 } else { 432#ifdef DEBUG 433 printf("uselib: Page aligned binary %lu\n", file_offset); 434#endif 435 /* 436 * for QMAGIC, a_entry is 20 bytes beyond the load address 437 * to skip the executable header 438 */ 439 vmaddr = trunc_page(a_out->a_entry); 440 441 /* 442 * Map it all into the process's space as a single 443 * copy-on-write "data" segment. 444 */ 445 error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr, 446 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 447 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 448 if (error) 449 goto cleanup; 450 } 451#ifdef DEBUG 452 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0], 453 ((long *)vmaddr)[1]); 454#endif 455 if (bss_size != 0) { 456 /* Calculate BSS start address */ 457 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 458 a_out->a_data; 459 460 /* allocate some 'anon' space */ 461 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 462 &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); 463 if (error) 464 goto cleanup; 465 } 466 467cleanup: 468 /* Unlock vnode if needed */ 469 if (locked) { 470 VOP_UNLOCK(vp, 0); 471 VFS_UNLOCK_GIANT(vfslocked); 472 } 473 474 /* Release the temporary mapping. */ 475 if (a_out) 476 kmem_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 477 478 return (error); 479} 480 481#endif /* __i386__ */ 482 483int 484linux_select(struct thread *td, struct linux_select_args *args) 485{ 486 l_timeval ltv; 487 struct timeval tv0, tv1, utv, *tvp; 488 int error; 489 490#ifdef DEBUG 491 if (ldebug(select)) 492 printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, 493 (void *)args->readfds, (void *)args->writefds, 494 (void *)args->exceptfds, (void *)args->timeout); 495#endif 496 497 /* 498 * Store current time for computation of the amount of 499 * time left. 500 */ 501 if (args->timeout) { 502 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 503 goto select_out; 504 utv.tv_sec = ltv.tv_sec; 505 utv.tv_usec = ltv.tv_usec; 506#ifdef DEBUG 507 if (ldebug(select)) 508 printf(LMSG("incoming timeout (%jd/%ld)"), 509 (intmax_t)utv.tv_sec, utv.tv_usec); 510#endif 511 512 if (itimerfix(&utv)) { 513 /* 514 * The timeval was invalid. Convert it to something 515 * valid that will act as it does under Linux. 516 */ 517 utv.tv_sec += utv.tv_usec / 1000000; 518 utv.tv_usec %= 1000000; 519 if (utv.tv_usec < 0) { 520 utv.tv_sec -= 1; 521 utv.tv_usec += 1000000; 522 } 523 if (utv.tv_sec < 0) 524 timevalclear(&utv); 525 } 526 microtime(&tv0); 527 tvp = &utv; 528 } else 529 tvp = NULL; 530 531 error = kern_select(td, args->nfds, args->readfds, args->writefds, 532 args->exceptfds, tvp, sizeof(l_int) * 8); 533 534#ifdef DEBUG 535 if (ldebug(select)) 536 printf(LMSG("real select returns %d"), error); 537#endif 538 if (error) 539 goto select_out; 540 541 if (args->timeout) { 542 if (td->td_retval[0]) { 543 /* 544 * Compute how much time was left of the timeout, 545 * by subtracting the current time and the time 546 * before we started the call, and subtracting 547 * that result from the user-supplied value. 548 */ 549 microtime(&tv1); 550 timevalsub(&tv1, &tv0); 551 timevalsub(&utv, &tv1); 552 if (utv.tv_sec < 0) 553 timevalclear(&utv); 554 } else 555 timevalclear(&utv); 556#ifdef DEBUG 557 if (ldebug(select)) 558 printf(LMSG("outgoing timeout (%jd/%ld)"), 559 (intmax_t)utv.tv_sec, utv.tv_usec); 560#endif 561 ltv.tv_sec = utv.tv_sec; 562 ltv.tv_usec = utv.tv_usec; 563 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 564 goto select_out; 565 } 566 567select_out: 568#ifdef DEBUG 569 if (ldebug(select)) 570 printf(LMSG("select_out -> %d"), error); 571#endif 572 return (error); 573} 574 575int 576linux_mremap(struct thread *td, struct linux_mremap_args *args) 577{ 578 struct munmap_args /* { 579 void *addr; 580 size_t len; 581 } */ bsd_args; 582 int error = 0; 583 584#ifdef DEBUG 585 if (ldebug(mremap)) 586 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), 587 (void *)(uintptr_t)args->addr, 588 (unsigned long)args->old_len, 589 (unsigned long)args->new_len, 590 (unsigned long)args->flags); 591#endif 592 593 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 594 td->td_retval[0] = 0; 595 return (EINVAL); 596 } 597 598 /* 599 * Check for the page alignment. 600 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 601 */ 602 if (args->addr & PAGE_MASK) { 603 td->td_retval[0] = 0; 604 return (EINVAL); 605 } 606 607 args->new_len = round_page(args->new_len); 608 args->old_len = round_page(args->old_len); 609 610 if (args->new_len > args->old_len) { 611 td->td_retval[0] = 0; 612 return (ENOMEM); 613 } 614 615 if (args->new_len < args->old_len) { 616 bsd_args.addr = 617 (caddr_t)((uintptr_t)args->addr + args->new_len); 618 bsd_args.len = args->old_len - args->new_len; 619 error = sys_munmap(td, &bsd_args); 620 } 621 622 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 623 return (error); 624} 625 626#define LINUX_MS_ASYNC 0x0001 627#define LINUX_MS_INVALIDATE 0x0002 628#define LINUX_MS_SYNC 0x0004 629 630int 631linux_msync(struct thread *td, struct linux_msync_args *args) 632{ 633 struct msync_args bsd_args; 634 635 bsd_args.addr = (caddr_t)(uintptr_t)args->addr; 636 bsd_args.len = (uintptr_t)args->len; 637 bsd_args.flags = args->fl & ~LINUX_MS_SYNC; 638 639 return (sys_msync(td, &bsd_args)); 640} 641 642int 643linux_time(struct thread *td, struct linux_time_args *args) 644{ 645 struct timeval tv; 646 l_time_t tm; 647 int error; 648 649#ifdef DEBUG 650 if (ldebug(time)) 651 printf(ARGS(time, "*")); 652#endif 653 654 microtime(&tv); 655 tm = tv.tv_sec; 656 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 657 return (error); 658 td->td_retval[0] = tm; 659 return (0); 660} 661 662struct l_times_argv { 663 l_clock_t tms_utime; 664 l_clock_t tms_stime; 665 l_clock_t tms_cutime; 666 l_clock_t tms_cstime; 667}; 668 669 670/* 671 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 672 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 673 * auxiliary vector entry. 674 */ 675#define CLK_TCK 100 676 677#define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 678#define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 679 680#define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 681 CONVNTCK(r) : CONVOTCK(r)) 682 683int 684linux_times(struct thread *td, struct linux_times_args *args) 685{ 686 struct timeval tv, utime, stime, cutime, cstime; 687 struct l_times_argv tms; 688 struct proc *p; 689 int error; 690 691#ifdef DEBUG 692 if (ldebug(times)) 693 printf(ARGS(times, "*")); 694#endif 695 696 if (args->buf != NULL) { 697 p = td->td_proc; 698 PROC_LOCK(p); 699 PROC_SLOCK(p); 700 calcru(p, &utime, &stime); 701 PROC_SUNLOCK(p); 702 calccru(p, &cutime, &cstime); 703 PROC_UNLOCK(p); 704 705 tms.tms_utime = CONVTCK(utime); 706 tms.tms_stime = CONVTCK(stime); 707 708 tms.tms_cutime = CONVTCK(cutime); 709 tms.tms_cstime = CONVTCK(cstime); 710 711 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 712 return (error); 713 } 714 715 microuptime(&tv); 716 td->td_retval[0] = (int)CONVTCK(tv); 717 return (0); 718} 719 720int 721linux_newuname(struct thread *td, struct linux_newuname_args *args) 722{ 723 struct l_new_utsname utsname; 724 char osname[LINUX_MAX_UTSNAME]; 725 char osrelease[LINUX_MAX_UTSNAME]; 726 char *p; 727 728#ifdef DEBUG 729 if (ldebug(newuname)) 730 printf(ARGS(newuname, "*")); 731#endif 732 733 linux_get_osname(td, osname); 734 linux_get_osrelease(td, osrelease); 735 736 bzero(&utsname, sizeof(utsname)); 737 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 738 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 739 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 740 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 741 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 742 for (p = utsname.version; *p != '\0'; ++p) 743 if (*p == '\n') { 744 *p = '\0'; 745 break; 746 } 747 strlcpy(utsname.machine, linux_platform, LINUX_MAX_UTSNAME); 748 749 return (copyout(&utsname, args->buf, sizeof(utsname))); 750} 751 752#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 753struct l_utimbuf { 754 l_time_t l_actime; 755 l_time_t l_modtime; 756}; 757 758int 759linux_utime(struct thread *td, struct linux_utime_args *args) 760{ 761 struct timeval tv[2], *tvp; 762 struct l_utimbuf lut; 763 char *fname; 764 int error; 765 766 LCONVPATHEXIST(td, args->fname, &fname); 767 768#ifdef DEBUG 769 if (ldebug(utime)) 770 printf(ARGS(utime, "%s, *"), fname); 771#endif 772 773 if (args->times) { 774 if ((error = copyin(args->times, &lut, sizeof lut))) { 775 LFREEPATH(fname); 776 return (error); 777 } 778 tv[0].tv_sec = lut.l_actime; 779 tv[0].tv_usec = 0; 780 tv[1].tv_sec = lut.l_modtime; 781 tv[1].tv_usec = 0; 782 tvp = tv; 783 } else 784 tvp = NULL; 785 786 error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 787 LFREEPATH(fname); 788 return (error); 789} 790 791int 792linux_utimes(struct thread *td, struct linux_utimes_args *args) 793{ 794 l_timeval ltv[2]; 795 struct timeval tv[2], *tvp = NULL; 796 char *fname; 797 int error; 798 799 LCONVPATHEXIST(td, args->fname, &fname); 800 801#ifdef DEBUG 802 if (ldebug(utimes)) 803 printf(ARGS(utimes, "%s, *"), fname); 804#endif 805 806 if (args->tptr != NULL) { 807 if ((error = copyin(args->tptr, ltv, sizeof ltv))) { 808 LFREEPATH(fname); 809 return (error); 810 } 811 tv[0].tv_sec = ltv[0].tv_sec; 812 tv[0].tv_usec = ltv[0].tv_usec; 813 tv[1].tv_sec = ltv[1].tv_sec; 814 tv[1].tv_usec = ltv[1].tv_usec; 815 tvp = tv; 816 } 817 818 error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 819 LFREEPATH(fname); 820 return (error); 821} 822 823int 824linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 825{ 826 l_timeval ltv[2]; 827 struct timeval tv[2], *tvp = NULL; 828 char *fname; 829 int error, dfd; 830 831 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 832 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 833 834#ifdef DEBUG 835 if (ldebug(futimesat)) 836 printf(ARGS(futimesat, "%s, *"), fname); 837#endif 838 839 if (args->utimes != NULL) { 840 if ((error = copyin(args->utimes, ltv, sizeof ltv))) { 841 LFREEPATH(fname); 842 return (error); 843 } 844 tv[0].tv_sec = ltv[0].tv_sec; 845 tv[0].tv_usec = ltv[0].tv_usec; 846 tv[1].tv_sec = ltv[1].tv_sec; 847 tv[1].tv_usec = ltv[1].tv_usec; 848 tvp = tv; 849 } 850 851 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 852 LFREEPATH(fname); 853 return (error); 854} 855#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 856 857int 858linux_common_wait(struct thread *td, int pid, int *status, 859 int options, struct rusage *ru) 860{ 861 int error, tmpstat; 862 863 error = kern_wait(td, pid, &tmpstat, options, ru); 864 if (error) 865 return (error); 866 867 if (status) { 868 tmpstat &= 0xffff; 869 if (WIFSIGNALED(tmpstat)) 870 tmpstat = (tmpstat & 0xffffff80) | 871 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); 872 else if (WIFSTOPPED(tmpstat)) 873 tmpstat = (tmpstat & 0xffff00ff) | 874 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); 875 error = copyout(&tmpstat, status, sizeof(int)); 876 } 877 878 return (error); 879} 880 881int 882linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 883{ 884 int options; 885 886#ifdef DEBUG 887 if (ldebug(waitpid)) 888 printf(ARGS(waitpid, "%d, %p, %d"), 889 args->pid, (void *)args->status, args->options); 890#endif 891 /* 892 * this is necessary because the test in kern_wait doesn't work 893 * because we mess with the options here 894 */ 895 if (args->options & ~(WUNTRACED | WNOHANG | WCONTINUED | __WCLONE)) 896 return (EINVAL); 897 898 options = (args->options & (WNOHANG | WUNTRACED)); 899 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 900 if (args->options & __WCLONE) 901 options |= WLINUXCLONE; 902 903 return (linux_common_wait(td, args->pid, args->status, options, NULL)); 904} 905 906 907int 908linux_mknod(struct thread *td, struct linux_mknod_args *args) 909{ 910 char *path; 911 int error; 912 913 LCONVPATHCREAT(td, args->path, &path); 914 915#ifdef DEBUG 916 if (ldebug(mknod)) 917 printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev); 918#endif 919 920 switch (args->mode & S_IFMT) { 921 case S_IFIFO: 922 case S_IFSOCK: 923 error = kern_mkfifo(td, path, UIO_SYSSPACE, args->mode); 924 break; 925 926 case S_IFCHR: 927 case S_IFBLK: 928 error = kern_mknod(td, path, UIO_SYSSPACE, args->mode, 929 args->dev); 930 break; 931 932 case S_IFDIR: 933 error = EPERM; 934 break; 935 936 case 0: 937 args->mode |= S_IFREG; 938 /* FALLTHROUGH */ 939 case S_IFREG: 940 error = kern_open(td, path, UIO_SYSSPACE, 941 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 942 if (error == 0) 943 kern_close(td, td->td_retval[0]); 944 break; 945 946 default: 947 error = EINVAL; 948 break; 949 } 950 LFREEPATH(path); 951 return (error); 952} 953 954int 955linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 956{ 957 char *path; 958 int error, dfd; 959 960 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 961 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 962 963#ifdef DEBUG 964 if (ldebug(mknodat)) 965 printf(ARGS(mknodat, "%s, %d, %d"), path, args->mode, args->dev); 966#endif 967 968 switch (args->mode & S_IFMT) { 969 case S_IFIFO: 970 case S_IFSOCK: 971 error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); 972 break; 973 974 case S_IFCHR: 975 case S_IFBLK: 976 error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, 977 args->dev); 978 break; 979 980 case S_IFDIR: 981 error = EPERM; 982 break; 983 984 case 0: 985 args->mode |= S_IFREG; 986 /* FALLTHROUGH */ 987 case S_IFREG: 988 error = kern_openat(td, dfd, path, UIO_SYSSPACE, 989 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 990 if (error == 0) 991 kern_close(td, td->td_retval[0]); 992 break; 993 994 default: 995 error = EINVAL; 996 break; 997 } 998 LFREEPATH(path); 999 return (error); 1000} 1001 1002/* 1003 * UGH! This is just about the dumbest idea I've ever heard!! 1004 */ 1005int 1006linux_personality(struct thread *td, struct linux_personality_args *args) 1007{ 1008#ifdef DEBUG 1009 if (ldebug(personality)) 1010 printf(ARGS(personality, "%lu"), (unsigned long)args->per); 1011#endif 1012 if (args->per != 0) 1013 return (EINVAL); 1014 1015 /* Yes Jim, it's still a Linux... */ 1016 td->td_retval[0] = 0; 1017 return (0); 1018} 1019 1020struct l_itimerval { 1021 l_timeval it_interval; 1022 l_timeval it_value; 1023}; 1024 1025#define B2L_ITIMERVAL(bip, lip) \ 1026 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1027 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1028 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1029 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1030 1031int 1032linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1033{ 1034 int error; 1035 struct l_itimerval ls; 1036 struct itimerval aitv, oitv; 1037 1038#ifdef DEBUG 1039 if (ldebug(setitimer)) 1040 printf(ARGS(setitimer, "%p, %p"), 1041 (void *)uap->itv, (void *)uap->oitv); 1042#endif 1043 1044 if (uap->itv == NULL) { 1045 uap->itv = uap->oitv; 1046 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1047 } 1048 1049 error = copyin(uap->itv, &ls, sizeof(ls)); 1050 if (error != 0) 1051 return (error); 1052 B2L_ITIMERVAL(&aitv, &ls); 1053#ifdef DEBUG 1054 if (ldebug(setitimer)) { 1055 printf("setitimer: value: sec: %jd, usec: %ld\n", 1056 (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec); 1057 printf("setitimer: interval: sec: %jd, usec: %ld\n", 1058 (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec); 1059 } 1060#endif 1061 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1062 if (error != 0 || uap->oitv == NULL) 1063 return (error); 1064 B2L_ITIMERVAL(&ls, &oitv); 1065 1066 return (copyout(&ls, uap->oitv, sizeof(ls))); 1067} 1068 1069int 1070linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1071{ 1072 int error; 1073 struct l_itimerval ls; 1074 struct itimerval aitv; 1075 1076#ifdef DEBUG 1077 if (ldebug(getitimer)) 1078 printf(ARGS(getitimer, "%p"), (void *)uap->itv); 1079#endif 1080 error = kern_getitimer(td, uap->which, &aitv); 1081 if (error != 0) 1082 return (error); 1083 B2L_ITIMERVAL(&ls, &aitv); 1084 return (copyout(&ls, uap->itv, sizeof(ls))); 1085} 1086 1087int 1088linux_nice(struct thread *td, struct linux_nice_args *args) 1089{ 1090 struct setpriority_args bsd_args; 1091 1092 bsd_args.which = PRIO_PROCESS; 1093 bsd_args.who = 0; /* current process */ 1094 bsd_args.prio = args->inc; 1095 return (sys_setpriority(td, &bsd_args)); 1096} 1097 1098int 1099linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1100{ 1101 struct ucred *newcred, *oldcred; 1102 l_gid_t *linux_gidset; 1103 gid_t *bsd_gidset; 1104 int ngrp, error; 1105 struct proc *p; 1106 1107 ngrp = args->gidsetsize; 1108 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1109 return (EINVAL); 1110 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_TEMP, M_WAITOK); 1111 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1112 if (error) 1113 goto out; 1114 newcred = crget(); 1115 crextend(newcred, ngrp + 1); 1116 p = td->td_proc; 1117 PROC_LOCK(p); 1118 oldcred = p->p_ucred; 1119 crcopy(newcred, oldcred); 1120 1121 /* 1122 * cr_groups[0] holds egid. Setting the whole set from 1123 * the supplied set will cause egid to be changed too. 1124 * Keep cr_groups[0] unchanged to prevent that. 1125 */ 1126 1127 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) { 1128 PROC_UNLOCK(p); 1129 crfree(newcred); 1130 goto out; 1131 } 1132 1133 if (ngrp > 0) { 1134 newcred->cr_ngroups = ngrp + 1; 1135 1136 bsd_gidset = newcred->cr_groups; 1137 ngrp--; 1138 while (ngrp >= 0) { 1139 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1140 ngrp--; 1141 } 1142 } else 1143 newcred->cr_ngroups = 1; 1144 1145 setsugid(p); 1146 p->p_ucred = newcred; 1147 PROC_UNLOCK(p); 1148 crfree(oldcred); 1149 error = 0; 1150out: 1151 free(linux_gidset, M_TEMP); 1152 return (error); 1153} 1154 1155int 1156linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1157{ 1158 struct ucred *cred; 1159 l_gid_t *linux_gidset; 1160 gid_t *bsd_gidset; 1161 int bsd_gidsetsz, ngrp, error; 1162 1163 cred = td->td_ucred; 1164 bsd_gidset = cred->cr_groups; 1165 bsd_gidsetsz = cred->cr_ngroups - 1; 1166 1167 /* 1168 * cr_groups[0] holds egid. Returning the whole set 1169 * here will cause a duplicate. Exclude cr_groups[0] 1170 * to prevent that. 1171 */ 1172 1173 if ((ngrp = args->gidsetsize) == 0) { 1174 td->td_retval[0] = bsd_gidsetsz; 1175 return (0); 1176 } 1177 1178 if (ngrp < bsd_gidsetsz) 1179 return (EINVAL); 1180 1181 ngrp = 0; 1182 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1183 M_TEMP, M_WAITOK); 1184 while (ngrp < bsd_gidsetsz) { 1185 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1186 ngrp++; 1187 } 1188 1189 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1190 free(linux_gidset, M_TEMP); 1191 if (error) 1192 return (error); 1193 1194 td->td_retval[0] = ngrp; 1195 return (0); 1196} 1197 1198int 1199linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1200{ 1201 struct rlimit bsd_rlim; 1202 struct l_rlimit rlim; 1203 u_int which; 1204 int error; 1205 1206#ifdef DEBUG 1207 if (ldebug(setrlimit)) 1208 printf(ARGS(setrlimit, "%d, %p"), 1209 args->resource, (void *)args->rlim); 1210#endif 1211 1212 if (args->resource >= LINUX_RLIM_NLIMITS) 1213 return (EINVAL); 1214 1215 which = linux_to_bsd_resource[args->resource]; 1216 if (which == -1) 1217 return (EINVAL); 1218 1219 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1220 if (error) 1221 return (error); 1222 1223 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1224 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1225 return (kern_setrlimit(td, which, &bsd_rlim)); 1226} 1227 1228int 1229linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1230{ 1231 struct l_rlimit rlim; 1232 struct proc *p = td->td_proc; 1233 struct rlimit bsd_rlim; 1234 u_int which; 1235 1236#ifdef DEBUG 1237 if (ldebug(old_getrlimit)) 1238 printf(ARGS(old_getrlimit, "%d, %p"), 1239 args->resource, (void *)args->rlim); 1240#endif 1241 1242 if (args->resource >= LINUX_RLIM_NLIMITS) 1243 return (EINVAL); 1244 1245 which = linux_to_bsd_resource[args->resource]; 1246 if (which == -1) 1247 return (EINVAL); 1248 1249 PROC_LOCK(p); 1250 lim_rlimit(p, which, &bsd_rlim); 1251 PROC_UNLOCK(p); 1252 1253#ifdef COMPAT_LINUX32 1254 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1255 if (rlim.rlim_cur == UINT_MAX) 1256 rlim.rlim_cur = INT_MAX; 1257 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1258 if (rlim.rlim_max == UINT_MAX) 1259 rlim.rlim_max = INT_MAX; 1260#else 1261 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1262 if (rlim.rlim_cur == ULONG_MAX) 1263 rlim.rlim_cur = LONG_MAX; 1264 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1265 if (rlim.rlim_max == ULONG_MAX) 1266 rlim.rlim_max = LONG_MAX; 1267#endif 1268 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1269} 1270 1271int 1272linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1273{ 1274 struct l_rlimit rlim; 1275 struct proc *p = td->td_proc; 1276 struct rlimit bsd_rlim; 1277 u_int which; 1278 1279#ifdef DEBUG 1280 if (ldebug(getrlimit)) 1281 printf(ARGS(getrlimit, "%d, %p"), 1282 args->resource, (void *)args->rlim); 1283#endif 1284 1285 if (args->resource >= LINUX_RLIM_NLIMITS) 1286 return (EINVAL); 1287 1288 which = linux_to_bsd_resource[args->resource]; 1289 if (which == -1) 1290 return (EINVAL); 1291 1292 PROC_LOCK(p); 1293 lim_rlimit(p, which, &bsd_rlim); 1294 PROC_UNLOCK(p); 1295 1296 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1297 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1298 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1299} 1300 1301int 1302linux_sched_setscheduler(struct thread *td, 1303 struct linux_sched_setscheduler_args *args) 1304{ 1305 struct sched_setscheduler_args bsd; 1306 1307#ifdef DEBUG 1308 if (ldebug(sched_setscheduler)) 1309 printf(ARGS(sched_setscheduler, "%d, %d, %p"), 1310 args->pid, args->policy, (const void *)args->param); 1311#endif 1312 1313 switch (args->policy) { 1314 case LINUX_SCHED_OTHER: 1315 bsd.policy = SCHED_OTHER; 1316 break; 1317 case LINUX_SCHED_FIFO: 1318 bsd.policy = SCHED_FIFO; 1319 break; 1320 case LINUX_SCHED_RR: 1321 bsd.policy = SCHED_RR; 1322 break; 1323 default: 1324 return (EINVAL); 1325 } 1326 1327 bsd.pid = args->pid; 1328 bsd.param = (struct sched_param *)args->param; 1329 return (sys_sched_setscheduler(td, &bsd)); 1330} 1331 1332int 1333linux_sched_getscheduler(struct thread *td, 1334 struct linux_sched_getscheduler_args *args) 1335{ 1336 struct sched_getscheduler_args bsd; 1337 int error; 1338 1339#ifdef DEBUG 1340 if (ldebug(sched_getscheduler)) 1341 printf(ARGS(sched_getscheduler, "%d"), args->pid); 1342#endif 1343 1344 bsd.pid = args->pid; 1345 error = sys_sched_getscheduler(td, &bsd); 1346 1347 switch (td->td_retval[0]) { 1348 case SCHED_OTHER: 1349 td->td_retval[0] = LINUX_SCHED_OTHER; 1350 break; 1351 case SCHED_FIFO: 1352 td->td_retval[0] = LINUX_SCHED_FIFO; 1353 break; 1354 case SCHED_RR: 1355 td->td_retval[0] = LINUX_SCHED_RR; 1356 break; 1357 } 1358 1359 return (error); 1360} 1361 1362int 1363linux_sched_get_priority_max(struct thread *td, 1364 struct linux_sched_get_priority_max_args *args) 1365{ 1366 struct sched_get_priority_max_args bsd; 1367 1368#ifdef DEBUG 1369 if (ldebug(sched_get_priority_max)) 1370 printf(ARGS(sched_get_priority_max, "%d"), args->policy); 1371#endif 1372 1373 switch (args->policy) { 1374 case LINUX_SCHED_OTHER: 1375 bsd.policy = SCHED_OTHER; 1376 break; 1377 case LINUX_SCHED_FIFO: 1378 bsd.policy = SCHED_FIFO; 1379 break; 1380 case LINUX_SCHED_RR: 1381 bsd.policy = SCHED_RR; 1382 break; 1383 default: 1384 return (EINVAL); 1385 } 1386 return (sys_sched_get_priority_max(td, &bsd)); 1387} 1388 1389int 1390linux_sched_get_priority_min(struct thread *td, 1391 struct linux_sched_get_priority_min_args *args) 1392{ 1393 struct sched_get_priority_min_args bsd; 1394 1395#ifdef DEBUG 1396 if (ldebug(sched_get_priority_min)) 1397 printf(ARGS(sched_get_priority_min, "%d"), args->policy); 1398#endif 1399 1400 switch (args->policy) { 1401 case LINUX_SCHED_OTHER: 1402 bsd.policy = SCHED_OTHER; 1403 break; 1404 case LINUX_SCHED_FIFO: 1405 bsd.policy = SCHED_FIFO; 1406 break; 1407 case LINUX_SCHED_RR: 1408 bsd.policy = SCHED_RR; 1409 break; 1410 default: 1411 return (EINVAL); 1412 } 1413 return (sys_sched_get_priority_min(td, &bsd)); 1414} 1415 1416#define REBOOT_CAD_ON 0x89abcdef 1417#define REBOOT_CAD_OFF 0 1418#define REBOOT_HALT 0xcdef0123 1419#define REBOOT_RESTART 0x01234567 1420#define REBOOT_RESTART2 0xA1B2C3D4 1421#define REBOOT_POWEROFF 0x4321FEDC 1422#define REBOOT_MAGIC1 0xfee1dead 1423#define REBOOT_MAGIC2 0x28121969 1424#define REBOOT_MAGIC2A 0x05121996 1425#define REBOOT_MAGIC2B 0x16041998 1426 1427int 1428linux_reboot(struct thread *td, struct linux_reboot_args *args) 1429{ 1430 struct reboot_args bsd_args; 1431 1432#ifdef DEBUG 1433 if (ldebug(reboot)) 1434 printf(ARGS(reboot, "0x%x"), args->cmd); 1435#endif 1436 1437 if (args->magic1 != REBOOT_MAGIC1) 1438 return (EINVAL); 1439 1440 switch (args->magic2) { 1441 case REBOOT_MAGIC2: 1442 case REBOOT_MAGIC2A: 1443 case REBOOT_MAGIC2B: 1444 break; 1445 default: 1446 return (EINVAL); 1447 } 1448 1449 switch (args->cmd) { 1450 case REBOOT_CAD_ON: 1451 case REBOOT_CAD_OFF: 1452 return (priv_check(td, PRIV_REBOOT)); 1453 case REBOOT_HALT: 1454 bsd_args.opt = RB_HALT; 1455 break; 1456 case REBOOT_RESTART: 1457 case REBOOT_RESTART2: 1458 bsd_args.opt = 0; 1459 break; 1460 case REBOOT_POWEROFF: 1461 bsd_args.opt = RB_POWEROFF; 1462 break; 1463 default: 1464 return (EINVAL); 1465 } 1466 return (sys_reboot(td, &bsd_args)); 1467} 1468 1469 1470/* 1471 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify 1472 * td->td_retval[1] when COMPAT_43 is defined. This clobbers registers that 1473 * are assumed to be preserved. The following lightweight syscalls fixes 1474 * this. See also linux_getgid16() and linux_getuid16() in linux_uid16.c 1475 * 1476 * linux_getpid() - MP SAFE 1477 * linux_getgid() - MP SAFE 1478 * linux_getuid() - MP SAFE 1479 */ 1480 1481int 1482linux_getpid(struct thread *td, struct linux_getpid_args *args) 1483{ 1484 struct linux_emuldata *em; 1485 1486#ifdef DEBUG 1487 if (ldebug(getpid)) 1488 printf(ARGS(getpid, "")); 1489#endif 1490 1491 if (linux_use26(td)) { 1492 em = em_find(td->td_proc, EMUL_DONTLOCK); 1493 KASSERT(em != NULL, ("getpid: emuldata not found.\n")); 1494 td->td_retval[0] = em->shared->group_pid; 1495 } else { 1496 td->td_retval[0] = td->td_proc->p_pid; 1497 } 1498 1499 return (0); 1500} 1501 1502int 1503linux_gettid(struct thread *td, struct linux_gettid_args *args) 1504{ 1505 1506#ifdef DEBUG 1507 if (ldebug(gettid)) 1508 printf(ARGS(gettid, "")); 1509#endif 1510 1511 td->td_retval[0] = td->td_proc->p_pid; 1512 return (0); 1513} 1514 1515 1516int 1517linux_getppid(struct thread *td, struct linux_getppid_args *args) 1518{ 1519 struct linux_emuldata *em; 1520 struct proc *p, *pp; 1521 1522#ifdef DEBUG 1523 if (ldebug(getppid)) 1524 printf(ARGS(getppid, "")); 1525#endif 1526 1527 if (!linux_use26(td)) { 1528 PROC_LOCK(td->td_proc); 1529 td->td_retval[0] = td->td_proc->p_pptr->p_pid; 1530 PROC_UNLOCK(td->td_proc); 1531 return (0); 1532 } 1533 1534 em = em_find(td->td_proc, EMUL_DONTLOCK); 1535 1536 KASSERT(em != NULL, ("getppid: process emuldata not found.\n")); 1537 1538 /* find the group leader */ 1539 p = pfind(em->shared->group_pid); 1540 1541 if (p == NULL) { 1542#ifdef DEBUG 1543 printf(LMSG("parent process not found.\n")); 1544#endif 1545 return (0); 1546 } 1547 1548 pp = p->p_pptr; /* switch to parent */ 1549 PROC_LOCK(pp); 1550 PROC_UNLOCK(p); 1551 1552 /* if its also linux process */ 1553 if (pp->p_sysent == &elf_linux_sysvec) { 1554 em = em_find(pp, EMUL_DONTLOCK); 1555 KASSERT(em != NULL, ("getppid: parent emuldata not found.\n")); 1556 1557 td->td_retval[0] = em->shared->group_pid; 1558 } else 1559 td->td_retval[0] = pp->p_pid; 1560 1561 PROC_UNLOCK(pp); 1562 1563 return (0); 1564} 1565 1566int 1567linux_getgid(struct thread *td, struct linux_getgid_args *args) 1568{ 1569 1570#ifdef DEBUG 1571 if (ldebug(getgid)) 1572 printf(ARGS(getgid, "")); 1573#endif 1574 1575 td->td_retval[0] = td->td_ucred->cr_rgid; 1576 return (0); 1577} 1578 1579int 1580linux_getuid(struct thread *td, struct linux_getuid_args *args) 1581{ 1582 1583#ifdef DEBUG 1584 if (ldebug(getuid)) 1585 printf(ARGS(getuid, "")); 1586#endif 1587 1588 td->td_retval[0] = td->td_ucred->cr_ruid; 1589 return (0); 1590} 1591 1592 1593int 1594linux_getsid(struct thread *td, struct linux_getsid_args *args) 1595{ 1596 struct getsid_args bsd; 1597 1598#ifdef DEBUG 1599 if (ldebug(getsid)) 1600 printf(ARGS(getsid, "%i"), args->pid); 1601#endif 1602 1603 bsd.pid = args->pid; 1604 return (sys_getsid(td, &bsd)); 1605} 1606 1607int 1608linux_nosys(struct thread *td, struct nosys_args *ignore) 1609{ 1610 1611 return (ENOSYS); 1612} 1613 1614int 1615linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1616{ 1617 struct getpriority_args bsd_args; 1618 int error; 1619 1620#ifdef DEBUG 1621 if (ldebug(getpriority)) 1622 printf(ARGS(getpriority, "%i, %i"), args->which, args->who); 1623#endif 1624 1625 bsd_args.which = args->which; 1626 bsd_args.who = args->who; 1627 error = sys_getpriority(td, &bsd_args); 1628 td->td_retval[0] = 20 - td->td_retval[0]; 1629 return (error); 1630} 1631 1632int 1633linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1634{ 1635 int name[2]; 1636 1637#ifdef DEBUG 1638 if (ldebug(sethostname)) 1639 printf(ARGS(sethostname, "*, %i"), args->len); 1640#endif 1641 1642 name[0] = CTL_KERN; 1643 name[1] = KERN_HOSTNAME; 1644 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1645 args->len, 0, 0)); 1646} 1647 1648int 1649linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1650{ 1651 int name[2]; 1652 1653#ifdef DEBUG 1654 if (ldebug(setdomainname)) 1655 printf(ARGS(setdomainname, "*, %i"), args->len); 1656#endif 1657 1658 name[0] = CTL_KERN; 1659 name[1] = KERN_NISDOMAINNAME; 1660 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1661 args->len, 0, 0)); 1662} 1663 1664int 1665linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1666{ 1667 struct linux_emuldata *em; 1668 1669#ifdef DEBUG 1670 if (ldebug(exit_group)) 1671 printf(ARGS(exit_group, "%i"), args->error_code); 1672#endif 1673 1674 em = em_find(td->td_proc, EMUL_DONTLOCK); 1675 if (em->shared->refs > 1) { 1676 EMUL_SHARED_WLOCK(&emul_shared_lock); 1677 em->shared->flags |= EMUL_SHARED_HASXSTAT; 1678 em->shared->xstat = W_EXITCODE(args->error_code, 0); 1679 EMUL_SHARED_WUNLOCK(&emul_shared_lock); 1680 if (linux_use26(td)) 1681 linux_kill_threads(td, SIGKILL); 1682 } 1683 1684 /* 1685 * XXX: we should send a signal to the parent if 1686 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1687 * as it doesnt occur often. 1688 */ 1689 exit1(td, W_EXITCODE(args->error_code, 0)); 1690 1691 return (0); 1692} 1693 1694#define _LINUX_CAPABILITY_VERSION 0x19980330 1695 1696struct l_user_cap_header { 1697 l_int version; 1698 l_int pid; 1699}; 1700 1701struct l_user_cap_data { 1702 l_int effective; 1703 l_int permitted; 1704 l_int inheritable; 1705}; 1706 1707int 1708linux_capget(struct thread *td, struct linux_capget_args *args) 1709{ 1710 struct l_user_cap_header luch; 1711 struct l_user_cap_data lucd; 1712 int error; 1713 1714 if (args->hdrp == NULL) 1715 return (EFAULT); 1716 1717 error = copyin(args->hdrp, &luch, sizeof(luch)); 1718 if (error != 0) 1719 return (error); 1720 1721 if (luch.version != _LINUX_CAPABILITY_VERSION) { 1722 luch.version = _LINUX_CAPABILITY_VERSION; 1723 error = copyout(&luch, args->hdrp, sizeof(luch)); 1724 if (error) 1725 return (error); 1726 return (EINVAL); 1727 } 1728 1729 if (luch.pid) 1730 return (EPERM); 1731 1732 if (args->datap) { 1733 /* 1734 * The current implementation doesn't support setting 1735 * a capability (it's essentially a stub) so indicate 1736 * that no capabilities are currently set or available 1737 * to request. 1738 */ 1739 bzero (&lucd, sizeof(lucd)); 1740 error = copyout(&lucd, args->datap, sizeof(lucd)); 1741 } 1742 1743 return (error); 1744} 1745 1746int 1747linux_capset(struct thread *td, struct linux_capset_args *args) 1748{ 1749 struct l_user_cap_header luch; 1750 struct l_user_cap_data lucd; 1751 int error; 1752 1753 if (args->hdrp == NULL || args->datap == NULL) 1754 return (EFAULT); 1755 1756 error = copyin(args->hdrp, &luch, sizeof(luch)); 1757 if (error != 0) 1758 return (error); 1759 1760 if (luch.version != _LINUX_CAPABILITY_VERSION) { 1761 luch.version = _LINUX_CAPABILITY_VERSION; 1762 error = copyout(&luch, args->hdrp, sizeof(luch)); 1763 if (error) 1764 return (error); 1765 return (EINVAL); 1766 } 1767 1768 if (luch.pid) 1769 return (EPERM); 1770 1771 error = copyin(args->datap, &lucd, sizeof(lucd)); 1772 if (error != 0) 1773 return (error); 1774 1775 /* We currently don't support setting any capabilities. */ 1776 if (lucd.effective || lucd.permitted || lucd.inheritable) { 1777 linux_msg(td, 1778 "capset effective=0x%x, permitted=0x%x, " 1779 "inheritable=0x%x is not implemented", 1780 (int)lucd.effective, (int)lucd.permitted, 1781 (int)lucd.inheritable); 1782 return (EPERM); 1783 } 1784 1785 return (0); 1786} 1787 1788int 1789linux_prctl(struct thread *td, struct linux_prctl_args *args) 1790{ 1791 int error = 0, max_size; 1792 struct proc *p = td->td_proc; 1793 char comm[LINUX_MAX_COMM_LEN]; 1794 struct linux_emuldata *em; 1795 int pdeath_signal; 1796 1797#ifdef DEBUG 1798 if (ldebug(prctl)) 1799 printf(ARGS(prctl, "%d, %d, %d, %d, %d"), args->option, 1800 args->arg2, args->arg3, args->arg4, args->arg5); 1801#endif 1802 1803 switch (args->option) { 1804 case LINUX_PR_SET_PDEATHSIG: 1805 if (!LINUX_SIG_VALID(args->arg2)) 1806 return (EINVAL); 1807 em = em_find(p, EMUL_DOLOCK); 1808 KASSERT(em != NULL, ("prctl: emuldata not found.\n")); 1809 em->pdeath_signal = args->arg2; 1810 EMUL_UNLOCK(&emul_lock); 1811 break; 1812 case LINUX_PR_GET_PDEATHSIG: 1813 em = em_find(p, EMUL_DOLOCK); 1814 KASSERT(em != NULL, ("prctl: emuldata not found.\n")); 1815 pdeath_signal = em->pdeath_signal; 1816 EMUL_UNLOCK(&emul_lock); 1817 error = copyout(&pdeath_signal, 1818 (void *)(register_t)args->arg2, 1819 sizeof(pdeath_signal)); 1820 break; 1821 case LINUX_PR_GET_KEEPCAPS: 1822 /* 1823 * Indicate that we always clear the effective and 1824 * permitted capability sets when the user id becomes 1825 * non-zero (actually the capability sets are simply 1826 * always zero in the current implementation). 1827 */ 1828 td->td_retval[0] = 0; 1829 break; 1830 case LINUX_PR_SET_KEEPCAPS: 1831 /* 1832 * Ignore requests to keep the effective and permitted 1833 * capability sets when the user id becomes non-zero. 1834 */ 1835 break; 1836 case LINUX_PR_SET_NAME: 1837 /* 1838 * To be on the safe side we need to make sure to not 1839 * overflow the size a linux program expects. We already 1840 * do this here in the copyin, so that we don't need to 1841 * check on copyout. 1842 */ 1843 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1844 error = copyinstr((void *)(register_t)args->arg2, comm, 1845 max_size, NULL); 1846 1847 /* Linux silently truncates the name if it is too long. */ 1848 if (error == ENAMETOOLONG) { 1849 /* 1850 * XXX: copyinstr() isn't documented to populate the 1851 * array completely, so do a copyin() to be on the 1852 * safe side. This should be changed in case 1853 * copyinstr() is changed to guarantee this. 1854 */ 1855 error = copyin((void *)(register_t)args->arg2, comm, 1856 max_size - 1); 1857 comm[max_size - 1] = '\0'; 1858 } 1859 if (error) 1860 return (error); 1861 1862 PROC_LOCK(p); 1863 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1864 PROC_UNLOCK(p); 1865 break; 1866 case LINUX_PR_GET_NAME: 1867 PROC_LOCK(p); 1868 strlcpy(comm, p->p_comm, sizeof(comm)); 1869 PROC_UNLOCK(p); 1870 error = copyout(comm, (void *)(register_t)args->arg2, 1871 strlen(comm) + 1); 1872 break; 1873 default: 1874 error = EINVAL; 1875 break; 1876 } 1877 1878 return (error); 1879} 1880 1881/* 1882 * Get affinity of a process. 1883 */ 1884int 1885linux_sched_getaffinity(struct thread *td, 1886 struct linux_sched_getaffinity_args *args) 1887{ 1888 int error; 1889 struct cpuset_getaffinity_args cga; 1890 1891#ifdef DEBUG 1892 if (ldebug(sched_getaffinity)) 1893 printf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid, 1894 args->len); 1895#endif 1896 if (args->len < sizeof(cpuset_t)) 1897 return (EINVAL); 1898 1899 cga.level = CPU_LEVEL_WHICH; 1900 cga.which = CPU_WHICH_PID; 1901 cga.id = args->pid; 1902 cga.cpusetsize = sizeof(cpuset_t); 1903 cga.mask = (cpuset_t *) args->user_mask_ptr; 1904 1905 if ((error = sys_cpuset_getaffinity(td, &cga)) == 0) 1906 td->td_retval[0] = sizeof(cpuset_t); 1907 1908 return (error); 1909} 1910 1911/* 1912 * Set affinity of a process. 1913 */ 1914int 1915linux_sched_setaffinity(struct thread *td, 1916 struct linux_sched_setaffinity_args *args) 1917{ 1918 struct cpuset_setaffinity_args csa; 1919 1920#ifdef DEBUG 1921 if (ldebug(sched_setaffinity)) 1922 printf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid, 1923 args->len); 1924#endif 1925 if (args->len < sizeof(cpuset_t)) 1926 return (EINVAL); 1927 1928 csa.level = CPU_LEVEL_WHICH; 1929 csa.which = CPU_WHICH_PID; 1930 csa.id = args->pid; 1931 csa.cpusetsize = sizeof(cpuset_t); 1932 csa.mask = (cpuset_t *) args->user_mask_ptr; 1933 1934 return (sys_cpuset_setaffinity(td, &csa)); 1935} 1936