os_linux.cpp revision 8124:916e4d2fb9ef
1/*
2 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25// no precompiled headers
26#include "classfile/classLoader.hpp"
27#include "classfile/systemDictionary.hpp"
28#include "classfile/vmSymbols.hpp"
29#include "code/icBuffer.hpp"
30#include "code/vtableStubs.hpp"
31#include "compiler/compileBroker.hpp"
32#include "compiler/disassembler.hpp"
33#include "interpreter/interpreter.hpp"
34#include "jvm_linux.h"
35#include "memory/allocation.inline.hpp"
36#include "memory/filemap.hpp"
37#include "mutex_linux.inline.hpp"
38#include "oops/oop.inline.hpp"
39#include "os_linux.inline.hpp"
40#include "os_share_linux.hpp"
41#include "prims/jniFastGetField.hpp"
42#include "prims/jvm.h"
43#include "prims/jvm_misc.hpp"
44#include "runtime/arguments.hpp"
45#include "runtime/atomic.inline.hpp"
46#include "runtime/extendedPC.hpp"
47#include "runtime/globals.hpp"
48#include "runtime/interfaceSupport.hpp"
49#include "runtime/init.hpp"
50#include "runtime/java.hpp"
51#include "runtime/javaCalls.hpp"
52#include "runtime/mutexLocker.hpp"
53#include "runtime/objectMonitor.hpp"
54#include "runtime/orderAccess.inline.hpp"
55#include "runtime/osThread.hpp"
56#include "runtime/perfMemory.hpp"
57#include "runtime/sharedRuntime.hpp"
58#include "runtime/statSampler.hpp"
59#include "runtime/stubRoutines.hpp"
60#include "runtime/thread.inline.hpp"
61#include "runtime/threadCritical.hpp"
62#include "runtime/timer.hpp"
63#include "services/attachListener.hpp"
64#include "services/memTracker.hpp"
65#include "services/runtimeService.hpp"
66#include "utilities/decoder.hpp"
67#include "utilities/defaultStream.hpp"
68#include "utilities/events.hpp"
69#include "utilities/elfFile.hpp"
70#include "utilities/growableArray.hpp"
71#include "utilities/macros.hpp"
72#include "utilities/vmError.hpp"
73
74// put OS-includes here
75# include <sys/types.h>
76# include <sys/mman.h>
77# include <sys/stat.h>
78# include <sys/select.h>
79# include <pthread.h>
80# include <signal.h>
81# include <errno.h>
82# include <dlfcn.h>
83# include <stdio.h>
84# include <unistd.h>
85# include <sys/resource.h>
86# include <pthread.h>
87# include <sys/stat.h>
88# include <sys/time.h>
89# include <sys/times.h>
90# include <sys/utsname.h>
91# include <sys/socket.h>
92# include <sys/wait.h>
93# include <pwd.h>
94# include <poll.h>
95# include <semaphore.h>
96# include <fcntl.h>
97# include <string.h>
98# include <syscall.h>
99# include <sys/sysinfo.h>
100# include <gnu/libc-version.h>
101# include <sys/ipc.h>
102# include <sys/shm.h>
103# include <link.h>
104# include <stdint.h>
105# include <inttypes.h>
106# include <sys/ioctl.h>
107
108PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
109
110// if RUSAGE_THREAD for getrusage() has not been defined, do it here. The code calling
111// getrusage() is prepared to handle the associated failure.
112#ifndef RUSAGE_THREAD
113  #define RUSAGE_THREAD   (1)               /* only the calling thread */
114#endif
115
116#define MAX_PATH    (2 * K)
117
118#define MAX_SECS 100000000
119
120// for timer info max values which include all bits
121#define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
122
123#define LARGEPAGES_BIT (1 << 6)
124////////////////////////////////////////////////////////////////////////////////
125// global variables
126julong os::Linux::_physical_memory = 0;
127
128address   os::Linux::_initial_thread_stack_bottom = NULL;
129uintptr_t os::Linux::_initial_thread_stack_size   = 0;
130
131int (*os::Linux::_clock_gettime)(clockid_t, struct timespec *) = NULL;
132int (*os::Linux::_pthread_getcpuclockid)(pthread_t, clockid_t *) = NULL;
133int (*os::Linux::_pthread_setname_np)(pthread_t, const char*) = NULL;
134Mutex* os::Linux::_createThread_lock = NULL;
135pthread_t os::Linux::_main_thread;
136int os::Linux::_page_size = -1;
137const int os::Linux::_vm_default_page_size = (8 * K);
138bool os::Linux::_is_floating_stack = false;
139bool os::Linux::_is_NPTL = false;
140bool os::Linux::_supports_fast_thread_cpu_time = false;
141const char * os::Linux::_glibc_version = NULL;
142const char * os::Linux::_libpthread_version = NULL;
143pthread_condattr_t os::Linux::_condattr[1];
144
145static jlong initial_time_count=0;
146
147static int clock_tics_per_sec = 100;
148
149// For diagnostics to print a message once. see run_periodic_checks
150static sigset_t check_signal_done;
151static bool check_signals = true;
152
153static pid_t _initial_pid = 0;
154
155// Signal number used to suspend/resume a thread
156
157// do not use any signal number less than SIGSEGV, see 4355769
158static int SR_signum = SIGUSR2;
159sigset_t SR_sigset;
160
161// Declarations
162static void unpackTime(timespec* absTime, bool isAbsolute, jlong time);
163
164// utility functions
165
166static int SR_initialize();
167
168julong os::available_memory() {
169  return Linux::available_memory();
170}
171
172julong os::Linux::available_memory() {
173  // values in struct sysinfo are "unsigned long"
174  struct sysinfo si;
175  sysinfo(&si);
176
177  return (julong)si.freeram * si.mem_unit;
178}
179
180julong os::physical_memory() {
181  return Linux::physical_memory();
182}
183
184// Return true if user is running as root.
185
186bool os::have_special_privileges() {
187  static bool init = false;
188  static bool privileges = false;
189  if (!init) {
190    privileges = (getuid() != geteuid()) || (getgid() != getegid());
191    init = true;
192  }
193  return privileges;
194}
195
196
197#ifndef SYS_gettid
198// i386: 224, ia64: 1105, amd64: 186, sparc 143
199  #ifdef __ia64__
200    #define SYS_gettid 1105
201  #elif __i386__
202    #define SYS_gettid 224
203  #elif __amd64__
204    #define SYS_gettid 186
205  #elif __sparc__
206    #define SYS_gettid 143
207  #else
208    #error define gettid for the arch
209  #endif
210#endif
211
212// Cpu architecture string
213static char cpu_arch[] = HOTSPOT_LIB_ARCH;
214
215
216// pid_t gettid()
217//
218// Returns the kernel thread id of the currently running thread. Kernel
219// thread id is used to access /proc.
220//
221// (Note that getpid() on LinuxThreads returns kernel thread id too; but
222// on NPTL, it returns the same pid for all threads, as required by POSIX.)
223//
224pid_t os::Linux::gettid() {
225  int rslt = syscall(SYS_gettid);
226  if (rslt == -1) {
227    // old kernel, no NPTL support
228    return getpid();
229  } else {
230    return (pid_t)rslt;
231  }
232}
233
234// Most versions of linux have a bug where the number of processors are
235// determined by looking at the /proc file system.  In a chroot environment,
236// the system call returns 1.  This causes the VM to act as if it is
237// a single processor and elide locking (see is_MP() call).
238static bool unsafe_chroot_detected = false;
239static const char *unstable_chroot_error = "/proc file system not found.\n"
240                     "Java may be unstable running multithreaded in a chroot "
241                     "environment on Linux when /proc filesystem is not mounted.";
242
243void os::Linux::initialize_system_info() {
244  set_processor_count(sysconf(_SC_NPROCESSORS_CONF));
245  if (processor_count() == 1) {
246    pid_t pid = os::Linux::gettid();
247    char fname[32];
248    jio_snprintf(fname, sizeof(fname), "/proc/%d", pid);
249    FILE *fp = fopen(fname, "r");
250    if (fp == NULL) {
251      unsafe_chroot_detected = true;
252    } else {
253      fclose(fp);
254    }
255  }
256  _physical_memory = (julong)sysconf(_SC_PHYS_PAGES) * (julong)sysconf(_SC_PAGESIZE);
257  assert(processor_count() > 0, "linux error");
258}
259
260void os::init_system_properties_values() {
261  // The next steps are taken in the product version:
262  //
263  // Obtain the JAVA_HOME value from the location of libjvm.so.
264  // This library should be located at:
265  // <JAVA_HOME>/jre/lib/<arch>/{client|server}/libjvm.so.
266  //
267  // If "/jre/lib/" appears at the right place in the path, then we
268  // assume libjvm.so is installed in a JDK and we use this path.
269  //
270  // Otherwise exit with message: "Could not create the Java virtual machine."
271  //
272  // The following extra steps are taken in the debugging version:
273  //
274  // If "/jre/lib/" does NOT appear at the right place in the path
275  // instead of exit check for $JAVA_HOME environment variable.
276  //
277  // If it is defined and we are able to locate $JAVA_HOME/jre/lib/<arch>,
278  // then we append a fake suffix "hotspot/libjvm.so" to this path so
279  // it looks like libjvm.so is installed there
280  // <JAVA_HOME>/jre/lib/<arch>/hotspot/libjvm.so.
281  //
282  // Otherwise exit.
283  //
284  // Important note: if the location of libjvm.so changes this
285  // code needs to be changed accordingly.
286
287  // See ld(1):
288  //      The linker uses the following search paths to locate required
289  //      shared libraries:
290  //        1: ...
291  //        ...
292  //        7: The default directories, normally /lib and /usr/lib.
293#if defined(AMD64) || defined(_LP64) && (defined(SPARC) || defined(PPC) || defined(S390))
294  #define DEFAULT_LIBPATH "/usr/lib64:/lib64:/lib:/usr/lib"
295#else
296  #define DEFAULT_LIBPATH "/lib:/usr/lib"
297#endif
298
299// Base path of extensions installed on the system.
300#define SYS_EXT_DIR     "/usr/java/packages"
301#define EXTENSIONS_DIR  "/lib/ext"
302
303  // Buffer that fits several sprintfs.
304  // Note that the space for the colon and the trailing null are provided
305  // by the nulls included by the sizeof operator.
306  const size_t bufsize =
307    MAX2((size_t)MAXPATHLEN,  // For dll_dir & friends.
308         (size_t)MAXPATHLEN + sizeof(EXTENSIONS_DIR) + sizeof(SYS_EXT_DIR) + sizeof(EXTENSIONS_DIR)); // extensions dir
309  char *buf = (char *)NEW_C_HEAP_ARRAY(char, bufsize, mtInternal);
310
311  // sysclasspath, java_home, dll_dir
312  {
313    char *pslash;
314    os::jvm_path(buf, bufsize);
315
316    // Found the full path to libjvm.so.
317    // Now cut the path to <java_home>/jre if we can.
318    pslash = strrchr(buf, '/');
319    if (pslash != NULL) {
320      *pslash = '\0';            // Get rid of /libjvm.so.
321    }
322    pslash = strrchr(buf, '/');
323    if (pslash != NULL) {
324      *pslash = '\0';            // Get rid of /{client|server|hotspot}.
325    }
326    Arguments::set_dll_dir(buf);
327
328    if (pslash != NULL) {
329      pslash = strrchr(buf, '/');
330      if (pslash != NULL) {
331        *pslash = '\0';          // Get rid of /<arch>.
332        pslash = strrchr(buf, '/');
333        if (pslash != NULL) {
334          *pslash = '\0';        // Get rid of /lib.
335        }
336      }
337    }
338    Arguments::set_java_home(buf);
339    set_boot_path('/', ':');
340  }
341
342  // Where to look for native libraries.
343  //
344  // Note: Due to a legacy implementation, most of the library path
345  // is set in the launcher. This was to accomodate linking restrictions
346  // on legacy Linux implementations (which are no longer supported).
347  // Eventually, all the library path setting will be done here.
348  //
349  // However, to prevent the proliferation of improperly built native
350  // libraries, the new path component /usr/java/packages is added here.
351  // Eventually, all the library path setting will be done here.
352  {
353    // Get the user setting of LD_LIBRARY_PATH, and prepended it. It
354    // should always exist (until the legacy problem cited above is
355    // addressed).
356    const char *v = ::getenv("LD_LIBRARY_PATH");
357    const char *v_colon = ":";
358    if (v == NULL) { v = ""; v_colon = ""; }
359    // That's +1 for the colon and +1 for the trailing '\0'.
360    char *ld_library_path = (char *)NEW_C_HEAP_ARRAY(char,
361                                                     strlen(v) + 1 +
362                                                     sizeof(SYS_EXT_DIR) + sizeof("/lib/") + strlen(cpu_arch) + sizeof(DEFAULT_LIBPATH) + 1,
363                                                     mtInternal);
364    sprintf(ld_library_path, "%s%s" SYS_EXT_DIR "/lib/%s:" DEFAULT_LIBPATH, v, v_colon, cpu_arch);
365    Arguments::set_library_path(ld_library_path);
366    FREE_C_HEAP_ARRAY(char, ld_library_path);
367  }
368
369  // Extensions directories.
370  sprintf(buf, "%s" EXTENSIONS_DIR ":" SYS_EXT_DIR EXTENSIONS_DIR, Arguments::get_java_home());
371  Arguments::set_ext_dirs(buf);
372
373  FREE_C_HEAP_ARRAY(char, buf);
374
375#undef DEFAULT_LIBPATH
376#undef SYS_EXT_DIR
377#undef EXTENSIONS_DIR
378}
379
380////////////////////////////////////////////////////////////////////////////////
381// breakpoint support
382
383void os::breakpoint() {
384  BREAKPOINT;
385}
386
387extern "C" void breakpoint() {
388  // use debugger to set breakpoint here
389}
390
391////////////////////////////////////////////////////////////////////////////////
392// signal support
393
394debug_only(static bool signal_sets_initialized = false);
395static sigset_t unblocked_sigs, vm_sigs, allowdebug_blocked_sigs;
396
397bool os::Linux::is_sig_ignored(int sig) {
398  struct sigaction oact;
399  sigaction(sig, (struct sigaction*)NULL, &oact);
400  void* ohlr = oact.sa_sigaction ? CAST_FROM_FN_PTR(void*,  oact.sa_sigaction)
401                                 : CAST_FROM_FN_PTR(void*,  oact.sa_handler);
402  if (ohlr == CAST_FROM_FN_PTR(void*, SIG_IGN)) {
403    return true;
404  } else {
405    return false;
406  }
407}
408
409void os::Linux::signal_sets_init() {
410  // Should also have an assertion stating we are still single-threaded.
411  assert(!signal_sets_initialized, "Already initialized");
412  // Fill in signals that are necessarily unblocked for all threads in
413  // the VM. Currently, we unblock the following signals:
414  // SHUTDOWN{1,2,3}_SIGNAL: for shutdown hooks support (unless over-ridden
415  //                         by -Xrs (=ReduceSignalUsage));
416  // BREAK_SIGNAL which is unblocked only by the VM thread and blocked by all
417  // other threads. The "ReduceSignalUsage" boolean tells us not to alter
418  // the dispositions or masks wrt these signals.
419  // Programs embedding the VM that want to use the above signals for their
420  // own purposes must, at this time, use the "-Xrs" option to prevent
421  // interference with shutdown hooks and BREAK_SIGNAL thread dumping.
422  // (See bug 4345157, and other related bugs).
423  // In reality, though, unblocking these signals is really a nop, since
424  // these signals are not blocked by default.
425  sigemptyset(&unblocked_sigs);
426  sigemptyset(&allowdebug_blocked_sigs);
427  sigaddset(&unblocked_sigs, SIGILL);
428  sigaddset(&unblocked_sigs, SIGSEGV);
429  sigaddset(&unblocked_sigs, SIGBUS);
430  sigaddset(&unblocked_sigs, SIGFPE);
431#if defined(PPC64)
432  sigaddset(&unblocked_sigs, SIGTRAP);
433#endif
434  sigaddset(&unblocked_sigs, SR_signum);
435
436  if (!ReduceSignalUsage) {
437    if (!os::Linux::is_sig_ignored(SHUTDOWN1_SIGNAL)) {
438      sigaddset(&unblocked_sigs, SHUTDOWN1_SIGNAL);
439      sigaddset(&allowdebug_blocked_sigs, SHUTDOWN1_SIGNAL);
440    }
441    if (!os::Linux::is_sig_ignored(SHUTDOWN2_SIGNAL)) {
442      sigaddset(&unblocked_sigs, SHUTDOWN2_SIGNAL);
443      sigaddset(&allowdebug_blocked_sigs, SHUTDOWN2_SIGNAL);
444    }
445    if (!os::Linux::is_sig_ignored(SHUTDOWN3_SIGNAL)) {
446      sigaddset(&unblocked_sigs, SHUTDOWN3_SIGNAL);
447      sigaddset(&allowdebug_blocked_sigs, SHUTDOWN3_SIGNAL);
448    }
449  }
450  // Fill in signals that are blocked by all but the VM thread.
451  sigemptyset(&vm_sigs);
452  if (!ReduceSignalUsage) {
453    sigaddset(&vm_sigs, BREAK_SIGNAL);
454  }
455  debug_only(signal_sets_initialized = true);
456
457}
458
459// These are signals that are unblocked while a thread is running Java.
460// (For some reason, they get blocked by default.)
461sigset_t* os::Linux::unblocked_signals() {
462  assert(signal_sets_initialized, "Not initialized");
463  return &unblocked_sigs;
464}
465
466// These are the signals that are blocked while a (non-VM) thread is
467// running Java. Only the VM thread handles these signals.
468sigset_t* os::Linux::vm_signals() {
469  assert(signal_sets_initialized, "Not initialized");
470  return &vm_sigs;
471}
472
473// These are signals that are blocked during cond_wait to allow debugger in
474sigset_t* os::Linux::allowdebug_blocked_signals() {
475  assert(signal_sets_initialized, "Not initialized");
476  return &allowdebug_blocked_sigs;
477}
478
479void os::Linux::hotspot_sigmask(Thread* thread) {
480
481  //Save caller's signal mask before setting VM signal mask
482  sigset_t caller_sigmask;
483  pthread_sigmask(SIG_BLOCK, NULL, &caller_sigmask);
484
485  OSThread* osthread = thread->osthread();
486  osthread->set_caller_sigmask(caller_sigmask);
487
488  pthread_sigmask(SIG_UNBLOCK, os::Linux::unblocked_signals(), NULL);
489
490  if (!ReduceSignalUsage) {
491    if (thread->is_VM_thread()) {
492      // Only the VM thread handles BREAK_SIGNAL ...
493      pthread_sigmask(SIG_UNBLOCK, vm_signals(), NULL);
494    } else {
495      // ... all other threads block BREAK_SIGNAL
496      pthread_sigmask(SIG_BLOCK, vm_signals(), NULL);
497    }
498  }
499}
500
501//////////////////////////////////////////////////////////////////////////////
502// detecting pthread library
503
504void os::Linux::libpthread_init() {
505  // Save glibc and pthread version strings. Note that _CS_GNU_LIBC_VERSION
506  // and _CS_GNU_LIBPTHREAD_VERSION are supported in glibc >= 2.3.2. Use a
507  // generic name for earlier versions.
508  // Define macros here so we can build HotSpot on old systems.
509#ifndef _CS_GNU_LIBC_VERSION
510  #define _CS_GNU_LIBC_VERSION 2
511#endif
512#ifndef _CS_GNU_LIBPTHREAD_VERSION
513  #define _CS_GNU_LIBPTHREAD_VERSION 3
514#endif
515
516  size_t n = confstr(_CS_GNU_LIBC_VERSION, NULL, 0);
517  if (n > 0) {
518    char *str = (char *)malloc(n, mtInternal);
519    confstr(_CS_GNU_LIBC_VERSION, str, n);
520    os::Linux::set_glibc_version(str);
521  } else {
522    // _CS_GNU_LIBC_VERSION is not supported, try gnu_get_libc_version()
523    static char _gnu_libc_version[32];
524    jio_snprintf(_gnu_libc_version, sizeof(_gnu_libc_version),
525                 "glibc %s %s", gnu_get_libc_version(), gnu_get_libc_release());
526    os::Linux::set_glibc_version(_gnu_libc_version);
527  }
528
529  n = confstr(_CS_GNU_LIBPTHREAD_VERSION, NULL, 0);
530  if (n > 0) {
531    char *str = (char *)malloc(n, mtInternal);
532    confstr(_CS_GNU_LIBPTHREAD_VERSION, str, n);
533    // Vanilla RH-9 (glibc 2.3.2) has a bug that confstr() always tells
534    // us "NPTL-0.29" even we are running with LinuxThreads. Check if this
535    // is the case. LinuxThreads has a hard limit on max number of threads.
536    // So sysconf(_SC_THREAD_THREADS_MAX) will return a positive value.
537    // On the other hand, NPTL does not have such a limit, sysconf()
538    // will return -1 and errno is not changed. Check if it is really NPTL.
539    if (strcmp(os::Linux::glibc_version(), "glibc 2.3.2") == 0 &&
540        strstr(str, "NPTL") &&
541        sysconf(_SC_THREAD_THREADS_MAX) > 0) {
542      free(str);
543      os::Linux::set_libpthread_version("linuxthreads");
544    } else {
545      os::Linux::set_libpthread_version(str);
546    }
547  } else {
548    // glibc before 2.3.2 only has LinuxThreads.
549    os::Linux::set_libpthread_version("linuxthreads");
550  }
551
552  if (strstr(libpthread_version(), "NPTL")) {
553    os::Linux::set_is_NPTL();
554  } else {
555    os::Linux::set_is_LinuxThreads();
556  }
557
558  // LinuxThreads have two flavors: floating-stack mode, which allows variable
559  // stack size; and fixed-stack mode. NPTL is always floating-stack.
560  if (os::Linux::is_NPTL() || os::Linux::supports_variable_stack_size()) {
561    os::Linux::set_is_floating_stack();
562  }
563}
564
565/////////////////////////////////////////////////////////////////////////////
566// thread stack
567
568// Force Linux kernel to expand current thread stack. If "bottom" is close
569// to the stack guard, caller should block all signals.
570//
571// MAP_GROWSDOWN:
572//   A special mmap() flag that is used to implement thread stacks. It tells
573//   kernel that the memory region should extend downwards when needed. This
574//   allows early versions of LinuxThreads to only mmap the first few pages
575//   when creating a new thread. Linux kernel will automatically expand thread
576//   stack as needed (on page faults).
577//
578//   However, because the memory region of a MAP_GROWSDOWN stack can grow on
579//   demand, if a page fault happens outside an already mapped MAP_GROWSDOWN
580//   region, it's hard to tell if the fault is due to a legitimate stack
581//   access or because of reading/writing non-exist memory (e.g. buffer
582//   overrun). As a rule, if the fault happens below current stack pointer,
583//   Linux kernel does not expand stack, instead a SIGSEGV is sent to the
584//   application (see Linux kernel fault.c).
585//
586//   This Linux feature can cause SIGSEGV when VM bangs thread stack for
587//   stack overflow detection.
588//
589//   Newer version of LinuxThreads (since glibc-2.2, or, RH-7.x) and NPTL do
590//   not use this flag. However, the stack of initial thread is not created
591//   by pthread, it is still MAP_GROWSDOWN. Also it's possible (though
592//   unlikely) that user code can create a thread with MAP_GROWSDOWN stack
593//   and then attach the thread to JVM.
594//
595// To get around the problem and allow stack banging on Linux, we need to
596// manually expand thread stack after receiving the SIGSEGV.
597//
598// There are two ways to expand thread stack to address "bottom", we used
599// both of them in JVM before 1.5:
600//   1. adjust stack pointer first so that it is below "bottom", and then
601//      touch "bottom"
602//   2. mmap() the page in question
603//
604// Now alternate signal stack is gone, it's harder to use 2. For instance,
605// if current sp is already near the lower end of page 101, and we need to
606// call mmap() to map page 100, it is possible that part of the mmap() frame
607// will be placed in page 100. When page 100 is mapped, it is zero-filled.
608// That will destroy the mmap() frame and cause VM to crash.
609//
610// The following code works by adjusting sp first, then accessing the "bottom"
611// page to force a page fault. Linux kernel will then automatically expand the
612// stack mapping.
613//
614// _expand_stack_to() assumes its frame size is less than page size, which
615// should always be true if the function is not inlined.
616
617#if __GNUC__ < 3    // gcc 2.x does not support noinline attribute
618  #define NOINLINE
619#else
620  #define NOINLINE __attribute__ ((noinline))
621#endif
622
623static void _expand_stack_to(address bottom) NOINLINE;
624
625static void _expand_stack_to(address bottom) {
626  address sp;
627  size_t size;
628  volatile char *p;
629
630  // Adjust bottom to point to the largest address within the same page, it
631  // gives us a one-page buffer if alloca() allocates slightly more memory.
632  bottom = (address)align_size_down((uintptr_t)bottom, os::Linux::page_size());
633  bottom += os::Linux::page_size() - 1;
634
635  // sp might be slightly above current stack pointer; if that's the case, we
636  // will alloca() a little more space than necessary, which is OK. Don't use
637  // os::current_stack_pointer(), as its result can be slightly below current
638  // stack pointer, causing us to not alloca enough to reach "bottom".
639  sp = (address)&sp;
640
641  if (sp > bottom) {
642    size = sp - bottom;
643    p = (volatile char *)alloca(size);
644    assert(p != NULL && p <= (volatile char *)bottom, "alloca problem?");
645    p[0] = '\0';
646  }
647}
648
649bool os::Linux::manually_expand_stack(JavaThread * t, address addr) {
650  assert(t!=NULL, "just checking");
651  assert(t->osthread()->expanding_stack(), "expand should be set");
652  assert(t->stack_base() != NULL, "stack_base was not initialized");
653
654  if (addr <  t->stack_base() && addr >= t->stack_yellow_zone_base()) {
655    sigset_t mask_all, old_sigset;
656    sigfillset(&mask_all);
657    pthread_sigmask(SIG_SETMASK, &mask_all, &old_sigset);
658    _expand_stack_to(addr);
659    pthread_sigmask(SIG_SETMASK, &old_sigset, NULL);
660    return true;
661  }
662  return false;
663}
664
665//////////////////////////////////////////////////////////////////////////////
666// create new thread
667
668static address highest_vm_reserved_address();
669
670// check if it's safe to start a new thread
671static bool _thread_safety_check(Thread* thread) {
672  if (os::Linux::is_LinuxThreads() && !os::Linux::is_floating_stack()) {
673    // Fixed stack LinuxThreads (SuSE Linux/x86, and some versions of Redhat)
674    //   Heap is mmap'ed at lower end of memory space. Thread stacks are
675    //   allocated (MAP_FIXED) from high address space. Every thread stack
676    //   occupies a fixed size slot (usually 2Mbytes, but user can change
677    //   it to other values if they rebuild LinuxThreads).
678    //
679    // Problem with MAP_FIXED is that mmap() can still succeed even part of
680    // the memory region has already been mmap'ed. That means if we have too
681    // many threads and/or very large heap, eventually thread stack will
682    // collide with heap.
683    //
684    // Here we try to prevent heap/stack collision by comparing current
685    // stack bottom with the highest address that has been mmap'ed by JVM
686    // plus a safety margin for memory maps created by native code.
687    //
688    // This feature can be disabled by setting ThreadSafetyMargin to 0
689    //
690    if (ThreadSafetyMargin > 0) {
691      address stack_bottom = os::current_stack_base() - os::current_stack_size();
692
693      // not safe if our stack extends below the safety margin
694      return stack_bottom - ThreadSafetyMargin >= highest_vm_reserved_address();
695    } else {
696      return true;
697    }
698  } else {
699    // Floating stack LinuxThreads or NPTL:
700    //   Unlike fixed stack LinuxThreads, thread stacks are not MAP_FIXED. When
701    //   there's not enough space left, pthread_create() will fail. If we come
702    //   here, that means enough space has been reserved for stack.
703    return true;
704  }
705}
706
707// Thread start routine for all newly created threads
708static void *java_start(Thread *thread) {
709  // Try to randomize the cache line index of hot stack frames.
710  // This helps when threads of the same stack traces evict each other's
711  // cache lines. The threads can be either from the same JVM instance, or
712  // from different JVM instances. The benefit is especially true for
713  // processors with hyperthreading technology.
714  static int counter = 0;
715  int pid = os::current_process_id();
716  alloca(((pid ^ counter++) & 7) * 128);
717
718  ThreadLocalStorage::set_thread(thread);
719
720  OSThread* osthread = thread->osthread();
721  Monitor* sync = osthread->startThread_lock();
722
723  // non floating stack LinuxThreads needs extra check, see above
724  if (!_thread_safety_check(thread)) {
725    // notify parent thread
726    MutexLockerEx ml(sync, Mutex::_no_safepoint_check_flag);
727    osthread->set_state(ZOMBIE);
728    sync->notify_all();
729    return NULL;
730  }
731
732  // thread_id is kernel thread id (similar to Solaris LWP id)
733  osthread->set_thread_id(os::Linux::gettid());
734
735  if (UseNUMA) {
736    int lgrp_id = os::numa_get_group_id();
737    if (lgrp_id != -1) {
738      thread->set_lgrp_id(lgrp_id);
739    }
740  }
741  // initialize signal mask for this thread
742  os::Linux::hotspot_sigmask(thread);
743
744  // initialize floating point control register
745  os::Linux::init_thread_fpu_state();
746
747  // handshaking with parent thread
748  {
749    MutexLockerEx ml(sync, Mutex::_no_safepoint_check_flag);
750
751    // notify parent thread
752    osthread->set_state(INITIALIZED);
753    sync->notify_all();
754
755    // wait until os::start_thread()
756    while (osthread->get_state() == INITIALIZED) {
757      sync->wait(Mutex::_no_safepoint_check_flag);
758    }
759  }
760
761  // call one more level start routine
762  thread->run();
763
764  return 0;
765}
766
767bool os::create_thread(Thread* thread, ThreadType thr_type,
768                       size_t stack_size) {
769  assert(thread->osthread() == NULL, "caller responsible");
770
771  // Allocate the OSThread object
772  OSThread* osthread = new OSThread(NULL, NULL);
773  if (osthread == NULL) {
774    return false;
775  }
776
777  // set the correct thread state
778  osthread->set_thread_type(thr_type);
779
780  // Initial state is ALLOCATED but not INITIALIZED
781  osthread->set_state(ALLOCATED);
782
783  thread->set_osthread(osthread);
784
785  // init thread attributes
786  pthread_attr_t attr;
787  pthread_attr_init(&attr);
788  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
789
790  // stack size
791  if (os::Linux::supports_variable_stack_size()) {
792    // calculate stack size if it's not specified by caller
793    if (stack_size == 0) {
794      stack_size = os::Linux::default_stack_size(thr_type);
795
796      switch (thr_type) {
797      case os::java_thread:
798        // Java threads use ThreadStackSize which default value can be
799        // changed with the flag -Xss
800        assert(JavaThread::stack_size_at_create() > 0, "this should be set");
801        stack_size = JavaThread::stack_size_at_create();
802        break;
803      case os::compiler_thread:
804        if (CompilerThreadStackSize > 0) {
805          stack_size = (size_t)(CompilerThreadStackSize * K);
806          break;
807        } // else fall through:
808          // use VMThreadStackSize if CompilerThreadStackSize is not defined
809      case os::vm_thread:
810      case os::pgc_thread:
811      case os::cgc_thread:
812      case os::watcher_thread:
813        if (VMThreadStackSize > 0) stack_size = (size_t)(VMThreadStackSize * K);
814        break;
815      }
816    }
817
818    stack_size = MAX2(stack_size, os::Linux::min_stack_allowed);
819    pthread_attr_setstacksize(&attr, stack_size);
820  } else {
821    // let pthread_create() pick the default value.
822  }
823
824  // glibc guard page
825  pthread_attr_setguardsize(&attr, os::Linux::default_guard_size(thr_type));
826
827  ThreadState state;
828
829  {
830    // Serialize thread creation if we are running with fixed stack LinuxThreads
831    bool lock = os::Linux::is_LinuxThreads() && !os::Linux::is_floating_stack();
832    if (lock) {
833      os::Linux::createThread_lock()->lock_without_safepoint_check();
834    }
835
836    pthread_t tid;
837    int ret = pthread_create(&tid, &attr, (void* (*)(void*)) java_start, thread);
838
839    pthread_attr_destroy(&attr);
840
841    if (ret != 0) {
842      if (PrintMiscellaneous && (Verbose || WizardMode)) {
843        perror("pthread_create()");
844      }
845      // Need to clean up stuff we've allocated so far
846      thread->set_osthread(NULL);
847      delete osthread;
848      if (lock) os::Linux::createThread_lock()->unlock();
849      return false;
850    }
851
852    // Store pthread info into the OSThread
853    osthread->set_pthread_id(tid);
854
855    // Wait until child thread is either initialized or aborted
856    {
857      Monitor* sync_with_child = osthread->startThread_lock();
858      MutexLockerEx ml(sync_with_child, Mutex::_no_safepoint_check_flag);
859      while ((state = osthread->get_state()) == ALLOCATED) {
860        sync_with_child->wait(Mutex::_no_safepoint_check_flag);
861      }
862    }
863
864    if (lock) {
865      os::Linux::createThread_lock()->unlock();
866    }
867  }
868
869  // Aborted due to thread limit being reached
870  if (state == ZOMBIE) {
871    thread->set_osthread(NULL);
872    delete osthread;
873    return false;
874  }
875
876  // The thread is returned suspended (in state INITIALIZED),
877  // and is started higher up in the call chain
878  assert(state == INITIALIZED, "race condition");
879  return true;
880}
881
882/////////////////////////////////////////////////////////////////////////////
883// attach existing thread
884
885// bootstrap the main thread
886bool os::create_main_thread(JavaThread* thread) {
887  assert(os::Linux::_main_thread == pthread_self(), "should be called inside main thread");
888  return create_attached_thread(thread);
889}
890
891bool os::create_attached_thread(JavaThread* thread) {
892#ifdef ASSERT
893  thread->verify_not_published();
894#endif
895
896  // Allocate the OSThread object
897  OSThread* osthread = new OSThread(NULL, NULL);
898
899  if (osthread == NULL) {
900    return false;
901  }
902
903  // Store pthread info into the OSThread
904  osthread->set_thread_id(os::Linux::gettid());
905  osthread->set_pthread_id(::pthread_self());
906
907  // initialize floating point control register
908  os::Linux::init_thread_fpu_state();
909
910  // Initial thread state is RUNNABLE
911  osthread->set_state(RUNNABLE);
912
913  thread->set_osthread(osthread);
914
915  if (UseNUMA) {
916    int lgrp_id = os::numa_get_group_id();
917    if (lgrp_id != -1) {
918      thread->set_lgrp_id(lgrp_id);
919    }
920  }
921
922  if (os::Linux::is_initial_thread()) {
923    // If current thread is initial thread, its stack is mapped on demand,
924    // see notes about MAP_GROWSDOWN. Here we try to force kernel to map
925    // the entire stack region to avoid SEGV in stack banging.
926    // It is also useful to get around the heap-stack-gap problem on SuSE
927    // kernel (see 4821821 for details). We first expand stack to the top
928    // of yellow zone, then enable stack yellow zone (order is significant,
929    // enabling yellow zone first will crash JVM on SuSE Linux), so there
930    // is no gap between the last two virtual memory regions.
931
932    JavaThread *jt = (JavaThread *)thread;
933    address addr = jt->stack_yellow_zone_base();
934    assert(addr != NULL, "initialization problem?");
935    assert(jt->stack_available(addr) > 0, "stack guard should not be enabled");
936
937    osthread->set_expanding_stack();
938    os::Linux::manually_expand_stack(jt, addr);
939    osthread->clear_expanding_stack();
940  }
941
942  // initialize signal mask for this thread
943  // and save the caller's signal mask
944  os::Linux::hotspot_sigmask(thread);
945
946  return true;
947}
948
949void os::pd_start_thread(Thread* thread) {
950  OSThread * osthread = thread->osthread();
951  assert(osthread->get_state() != INITIALIZED, "just checking");
952  Monitor* sync_with_child = osthread->startThread_lock();
953  MutexLockerEx ml(sync_with_child, Mutex::_no_safepoint_check_flag);
954  sync_with_child->notify();
955}
956
957// Free Linux resources related to the OSThread
958void os::free_thread(OSThread* osthread) {
959  assert(osthread != NULL, "osthread not set");
960
961  if (Thread::current()->osthread() == osthread) {
962    // Restore caller's signal mask
963    sigset_t sigmask = osthread->caller_sigmask();
964    pthread_sigmask(SIG_SETMASK, &sigmask, NULL);
965  }
966
967  delete osthread;
968}
969
970//////////////////////////////////////////////////////////////////////////////
971// thread local storage
972
973// Restore the thread pointer if the destructor is called. This is in case
974// someone from JNI code sets up a destructor with pthread_key_create to run
975// detachCurrentThread on thread death. Unless we restore the thread pointer we
976// will hang or crash. When detachCurrentThread is called the key will be set
977// to null and we will not be called again. If detachCurrentThread is never
978// called we could loop forever depending on the pthread implementation.
979static void restore_thread_pointer(void* p) {
980  Thread* thread = (Thread*) p;
981  os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread);
982}
983
984int os::allocate_thread_local_storage() {
985  pthread_key_t key;
986  int rslt = pthread_key_create(&key, restore_thread_pointer);
987  assert(rslt == 0, "cannot allocate thread local storage");
988  return (int)key;
989}
990
991// Note: This is currently not used by VM, as we don't destroy TLS key
992// on VM exit.
993void os::free_thread_local_storage(int index) {
994  int rslt = pthread_key_delete((pthread_key_t)index);
995  assert(rslt == 0, "invalid index");
996}
997
998void os::thread_local_storage_at_put(int index, void* value) {
999  int rslt = pthread_setspecific((pthread_key_t)index, value);
1000  assert(rslt == 0, "pthread_setspecific failed");
1001}
1002
1003extern "C" Thread* get_thread() {
1004  return ThreadLocalStorage::thread();
1005}
1006
1007//////////////////////////////////////////////////////////////////////////////
1008// initial thread
1009
1010// Check if current thread is the initial thread, similar to Solaris thr_main.
1011bool os::Linux::is_initial_thread(void) {
1012  char dummy;
1013  // If called before init complete, thread stack bottom will be null.
1014  // Can be called if fatal error occurs before initialization.
1015  if (initial_thread_stack_bottom() == NULL) return false;
1016  assert(initial_thread_stack_bottom() != NULL &&
1017         initial_thread_stack_size()   != 0,
1018         "os::init did not locate initial thread's stack region");
1019  if ((address)&dummy >= initial_thread_stack_bottom() &&
1020      (address)&dummy < initial_thread_stack_bottom() + initial_thread_stack_size()) {
1021    return true;
1022  } else {
1023    return false;
1024  }
1025}
1026
1027// Find the virtual memory area that contains addr
1028static bool find_vma(address addr, address* vma_low, address* vma_high) {
1029  FILE *fp = fopen("/proc/self/maps", "r");
1030  if (fp) {
1031    address low, high;
1032    while (!feof(fp)) {
1033      if (fscanf(fp, "%p-%p", &low, &high) == 2) {
1034        if (low <= addr && addr < high) {
1035          if (vma_low)  *vma_low  = low;
1036          if (vma_high) *vma_high = high;
1037          fclose(fp);
1038          return true;
1039        }
1040      }
1041      for (;;) {
1042        int ch = fgetc(fp);
1043        if (ch == EOF || ch == (int)'\n') break;
1044      }
1045    }
1046    fclose(fp);
1047  }
1048  return false;
1049}
1050
1051// Locate initial thread stack. This special handling of initial thread stack
1052// is needed because pthread_getattr_np() on most (all?) Linux distros returns
1053// bogus value for initial thread.
1054void os::Linux::capture_initial_stack(size_t max_size) {
1055  // stack size is the easy part, get it from RLIMIT_STACK
1056  size_t stack_size;
1057  struct rlimit rlim;
1058  getrlimit(RLIMIT_STACK, &rlim);
1059  stack_size = rlim.rlim_cur;
1060
1061  // 6308388: a bug in ld.so will relocate its own .data section to the
1062  //   lower end of primordial stack; reduce ulimit -s value a little bit
1063  //   so we won't install guard page on ld.so's data section.
1064  stack_size -= 2 * page_size();
1065
1066  // 4441425: avoid crash with "unlimited" stack size on SuSE 7.1 or Redhat
1067  //   7.1, in both cases we will get 2G in return value.
1068  // 4466587: glibc 2.2.x compiled w/o "--enable-kernel=2.4.0" (RH 7.0,
1069  //   SuSE 7.2, Debian) can not handle alternate signal stack correctly
1070  //   for initial thread if its stack size exceeds 6M. Cap it at 2M,
1071  //   in case other parts in glibc still assumes 2M max stack size.
1072  // FIXME: alt signal stack is gone, maybe we can relax this constraint?
1073  // Problem still exists RH7.2 (IA64 anyway) but 2MB is a little small
1074  if (stack_size > 2 * K * K IA64_ONLY(*2)) {
1075    stack_size = 2 * K * K IA64_ONLY(*2);
1076  }
1077  // Try to figure out where the stack base (top) is. This is harder.
1078  //
1079  // When an application is started, glibc saves the initial stack pointer in
1080  // a global variable "__libc_stack_end", which is then used by system
1081  // libraries. __libc_stack_end should be pretty close to stack top. The
1082  // variable is available since the very early days. However, because it is
1083  // a private interface, it could disappear in the future.
1084  //
1085  // Linux kernel saves start_stack information in /proc/<pid>/stat. Similar
1086  // to __libc_stack_end, it is very close to stack top, but isn't the real
1087  // stack top. Note that /proc may not exist if VM is running as a chroot
1088  // program, so reading /proc/<pid>/stat could fail. Also the contents of
1089  // /proc/<pid>/stat could change in the future (though unlikely).
1090  //
1091  // We try __libc_stack_end first. If that doesn't work, look for
1092  // /proc/<pid>/stat. If neither of them works, we use current stack pointer
1093  // as a hint, which should work well in most cases.
1094
1095  uintptr_t stack_start;
1096
1097  // try __libc_stack_end first
1098  uintptr_t *p = (uintptr_t *)dlsym(RTLD_DEFAULT, "__libc_stack_end");
1099  if (p && *p) {
1100    stack_start = *p;
1101  } else {
1102    // see if we can get the start_stack field from /proc/self/stat
1103    FILE *fp;
1104    int pid;
1105    char state;
1106    int ppid;
1107    int pgrp;
1108    int session;
1109    int nr;
1110    int tpgrp;
1111    unsigned long flags;
1112    unsigned long minflt;
1113    unsigned long cminflt;
1114    unsigned long majflt;
1115    unsigned long cmajflt;
1116    unsigned long utime;
1117    unsigned long stime;
1118    long cutime;
1119    long cstime;
1120    long prio;
1121    long nice;
1122    long junk;
1123    long it_real;
1124    uintptr_t start;
1125    uintptr_t vsize;
1126    intptr_t rss;
1127    uintptr_t rsslim;
1128    uintptr_t scodes;
1129    uintptr_t ecode;
1130    int i;
1131
1132    // Figure what the primordial thread stack base is. Code is inspired
1133    // by email from Hans Boehm. /proc/self/stat begins with current pid,
1134    // followed by command name surrounded by parentheses, state, etc.
1135    char stat[2048];
1136    int statlen;
1137
1138    fp = fopen("/proc/self/stat", "r");
1139    if (fp) {
1140      statlen = fread(stat, 1, 2047, fp);
1141      stat[statlen] = '\0';
1142      fclose(fp);
1143
1144      // Skip pid and the command string. Note that we could be dealing with
1145      // weird command names, e.g. user could decide to rename java launcher
1146      // to "java 1.4.2 :)", then the stat file would look like
1147      //                1234 (java 1.4.2 :)) R ... ...
1148      // We don't really need to know the command string, just find the last
1149      // occurrence of ")" and then start parsing from there. See bug 4726580.
1150      char * s = strrchr(stat, ')');
1151
1152      i = 0;
1153      if (s) {
1154        // Skip blank chars
1155        do { s++; } while (s && isspace(*s));
1156
1157#define _UFM UINTX_FORMAT
1158#define _DFM INTX_FORMAT
1159
1160        //                                     1   1   1   1   1   1   1   1   1   1   2   2    2    2    2    2    2    2    2
1161        //              3  4  5  6  7  8   9   0   1   2   3   4   5   6   7   8   9   0   1    2    3    4    5    6    7    8
1162        i = sscanf(s, "%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld " _UFM _UFM _DFM _UFM _UFM _UFM _UFM,
1163                   &state,          // 3  %c
1164                   &ppid,           // 4  %d
1165                   &pgrp,           // 5  %d
1166                   &session,        // 6  %d
1167                   &nr,             // 7  %d
1168                   &tpgrp,          // 8  %d
1169                   &flags,          // 9  %lu
1170                   &minflt,         // 10 %lu
1171                   &cminflt,        // 11 %lu
1172                   &majflt,         // 12 %lu
1173                   &cmajflt,        // 13 %lu
1174                   &utime,          // 14 %lu
1175                   &stime,          // 15 %lu
1176                   &cutime,         // 16 %ld
1177                   &cstime,         // 17 %ld
1178                   &prio,           // 18 %ld
1179                   &nice,           // 19 %ld
1180                   &junk,           // 20 %ld
1181                   &it_real,        // 21 %ld
1182                   &start,          // 22 UINTX_FORMAT
1183                   &vsize,          // 23 UINTX_FORMAT
1184                   &rss,            // 24 INTX_FORMAT
1185                   &rsslim,         // 25 UINTX_FORMAT
1186                   &scodes,         // 26 UINTX_FORMAT
1187                   &ecode,          // 27 UINTX_FORMAT
1188                   &stack_start);   // 28 UINTX_FORMAT
1189      }
1190
1191#undef _UFM
1192#undef _DFM
1193
1194      if (i != 28 - 2) {
1195        assert(false, "Bad conversion from /proc/self/stat");
1196        // product mode - assume we are the initial thread, good luck in the
1197        // embedded case.
1198        warning("Can't detect initial thread stack location - bad conversion");
1199        stack_start = (uintptr_t) &rlim;
1200      }
1201    } else {
1202      // For some reason we can't open /proc/self/stat (for example, running on
1203      // FreeBSD with a Linux emulator, or inside chroot), this should work for
1204      // most cases, so don't abort:
1205      warning("Can't detect initial thread stack location - no /proc/self/stat");
1206      stack_start = (uintptr_t) &rlim;
1207    }
1208  }
1209
1210  // Now we have a pointer (stack_start) very close to the stack top, the
1211  // next thing to do is to figure out the exact location of stack top. We
1212  // can find out the virtual memory area that contains stack_start by
1213  // reading /proc/self/maps, it should be the last vma in /proc/self/maps,
1214  // and its upper limit is the real stack top. (again, this would fail if
1215  // running inside chroot, because /proc may not exist.)
1216
1217  uintptr_t stack_top;
1218  address low, high;
1219  if (find_vma((address)stack_start, &low, &high)) {
1220    // success, "high" is the true stack top. (ignore "low", because initial
1221    // thread stack grows on demand, its real bottom is high - RLIMIT_STACK.)
1222    stack_top = (uintptr_t)high;
1223  } else {
1224    // failed, likely because /proc/self/maps does not exist
1225    warning("Can't detect initial thread stack location - find_vma failed");
1226    // best effort: stack_start is normally within a few pages below the real
1227    // stack top, use it as stack top, and reduce stack size so we won't put
1228    // guard page outside stack.
1229    stack_top = stack_start;
1230    stack_size -= 16 * page_size();
1231  }
1232
1233  // stack_top could be partially down the page so align it
1234  stack_top = align_size_up(stack_top, page_size());
1235
1236  if (max_size && stack_size > max_size) {
1237    _initial_thread_stack_size = max_size;
1238  } else {
1239    _initial_thread_stack_size = stack_size;
1240  }
1241
1242  _initial_thread_stack_size = align_size_down(_initial_thread_stack_size, page_size());
1243  _initial_thread_stack_bottom = (address)stack_top - _initial_thread_stack_size;
1244}
1245
1246////////////////////////////////////////////////////////////////////////////////
1247// time support
1248
1249// Time since start-up in seconds to a fine granularity.
1250// Used by VMSelfDestructTimer and the MemProfiler.
1251double os::elapsedTime() {
1252
1253  return ((double)os::elapsed_counter()) / os::elapsed_frequency(); // nanosecond resolution
1254}
1255
1256jlong os::elapsed_counter() {
1257  return javaTimeNanos() - initial_time_count;
1258}
1259
1260jlong os::elapsed_frequency() {
1261  return NANOSECS_PER_SEC; // nanosecond resolution
1262}
1263
1264bool os::supports_vtime() { return true; }
1265bool os::enable_vtime()   { return false; }
1266bool os::vtime_enabled()  { return false; }
1267
1268double os::elapsedVTime() {
1269  struct rusage usage;
1270  int retval = getrusage(RUSAGE_THREAD, &usage);
1271  if (retval == 0) {
1272    return (double) (usage.ru_utime.tv_sec + usage.ru_stime.tv_sec) + (double) (usage.ru_utime.tv_usec + usage.ru_stime.tv_usec) / (1000 * 1000);
1273  } else {
1274    // better than nothing, but not much
1275    return elapsedTime();
1276  }
1277}
1278
1279jlong os::javaTimeMillis() {
1280  timeval time;
1281  int status = gettimeofday(&time, NULL);
1282  assert(status != -1, "linux error");
1283  return jlong(time.tv_sec) * 1000  +  jlong(time.tv_usec / 1000);
1284}
1285
1286void os::javaTimeSystemUTC(jlong &seconds, jlong &nanos) {
1287  timeval time;
1288  int status = gettimeofday(&time, NULL);
1289  assert(status != -1, "linux error");
1290  seconds = jlong(time.tv_sec);
1291  nanos = jlong(time.tv_usec) * 1000;
1292}
1293
1294
1295#ifndef CLOCK_MONOTONIC
1296  #define CLOCK_MONOTONIC (1)
1297#endif
1298
1299void os::Linux::clock_init() {
1300  // we do dlopen's in this particular order due to bug in linux
1301  // dynamical loader (see 6348968) leading to crash on exit
1302  void* handle = dlopen("librt.so.1", RTLD_LAZY);
1303  if (handle == NULL) {
1304    handle = dlopen("librt.so", RTLD_LAZY);
1305  }
1306
1307  if (handle) {
1308    int (*clock_getres_func)(clockid_t, struct timespec*) =
1309           (int(*)(clockid_t, struct timespec*))dlsym(handle, "clock_getres");
1310    int (*clock_gettime_func)(clockid_t, struct timespec*) =
1311           (int(*)(clockid_t, struct timespec*))dlsym(handle, "clock_gettime");
1312    if (clock_getres_func && clock_gettime_func) {
1313      // See if monotonic clock is supported by the kernel. Note that some
1314      // early implementations simply return kernel jiffies (updated every
1315      // 1/100 or 1/1000 second). It would be bad to use such a low res clock
1316      // for nano time (though the monotonic property is still nice to have).
1317      // It's fixed in newer kernels, however clock_getres() still returns
1318      // 1/HZ. We check if clock_getres() works, but will ignore its reported
1319      // resolution for now. Hopefully as people move to new kernels, this
1320      // won't be a problem.
1321      struct timespec res;
1322      struct timespec tp;
1323      if (clock_getres_func (CLOCK_MONOTONIC, &res) == 0 &&
1324          clock_gettime_func(CLOCK_MONOTONIC, &tp)  == 0) {
1325        // yes, monotonic clock is supported
1326        _clock_gettime = clock_gettime_func;
1327        return;
1328      } else {
1329        // close librt if there is no monotonic clock
1330        dlclose(handle);
1331      }
1332    }
1333  }
1334  warning("No monotonic clock was available - timed services may " \
1335          "be adversely affected if the time-of-day clock changes");
1336}
1337
1338#ifndef SYS_clock_getres
1339  #if defined(IA32) || defined(AMD64)
1340    #define SYS_clock_getres IA32_ONLY(266)  AMD64_ONLY(229)
1341    #define sys_clock_getres(x,y)  ::syscall(SYS_clock_getres, x, y)
1342  #else
1343    #warning "SYS_clock_getres not defined for this platform, disabling fast_thread_cpu_time"
1344    #define sys_clock_getres(x,y)  -1
1345  #endif
1346#else
1347  #define sys_clock_getres(x,y)  ::syscall(SYS_clock_getres, x, y)
1348#endif
1349
1350void os::Linux::fast_thread_clock_init() {
1351  if (!UseLinuxPosixThreadCPUClocks) {
1352    return;
1353  }
1354  clockid_t clockid;
1355  struct timespec tp;
1356  int (*pthread_getcpuclockid_func)(pthread_t, clockid_t *) =
1357      (int(*)(pthread_t, clockid_t *)) dlsym(RTLD_DEFAULT, "pthread_getcpuclockid");
1358
1359  // Switch to using fast clocks for thread cpu time if
1360  // the sys_clock_getres() returns 0 error code.
1361  // Note, that some kernels may support the current thread
1362  // clock (CLOCK_THREAD_CPUTIME_ID) but not the clocks
1363  // returned by the pthread_getcpuclockid().
1364  // If the fast Posix clocks are supported then the sys_clock_getres()
1365  // must return at least tp.tv_sec == 0 which means a resolution
1366  // better than 1 sec. This is extra check for reliability.
1367
1368  if (pthread_getcpuclockid_func &&
1369      pthread_getcpuclockid_func(_main_thread, &clockid) == 0 &&
1370      sys_clock_getres(clockid, &tp) == 0 && tp.tv_sec == 0) {
1371    _supports_fast_thread_cpu_time = true;
1372    _pthread_getcpuclockid = pthread_getcpuclockid_func;
1373  }
1374}
1375
1376jlong os::javaTimeNanos() {
1377  if (os::supports_monotonic_clock()) {
1378    struct timespec tp;
1379    int status = Linux::clock_gettime(CLOCK_MONOTONIC, &tp);
1380    assert(status == 0, "gettime error");
1381    jlong result = jlong(tp.tv_sec) * (1000 * 1000 * 1000) + jlong(tp.tv_nsec);
1382    return result;
1383  } else {
1384    timeval time;
1385    int status = gettimeofday(&time, NULL);
1386    assert(status != -1, "linux error");
1387    jlong usecs = jlong(time.tv_sec) * (1000 * 1000) + jlong(time.tv_usec);
1388    return 1000 * usecs;
1389  }
1390}
1391
1392void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) {
1393  if (os::supports_monotonic_clock()) {
1394    info_ptr->max_value = ALL_64_BITS;
1395
1396    // CLOCK_MONOTONIC - amount of time since some arbitrary point in the past
1397    info_ptr->may_skip_backward = false;      // not subject to resetting or drifting
1398    info_ptr->may_skip_forward = false;       // not subject to resetting or drifting
1399  } else {
1400    // gettimeofday - based on time in seconds since the Epoch thus does not wrap
1401    info_ptr->max_value = ALL_64_BITS;
1402
1403    // gettimeofday is a real time clock so it skips
1404    info_ptr->may_skip_backward = true;
1405    info_ptr->may_skip_forward = true;
1406  }
1407
1408  info_ptr->kind = JVMTI_TIMER_ELAPSED;                // elapsed not CPU time
1409}
1410
1411// Return the real, user, and system times in seconds from an
1412// arbitrary fixed point in the past.
1413bool os::getTimesSecs(double* process_real_time,
1414                      double* process_user_time,
1415                      double* process_system_time) {
1416  struct tms ticks;
1417  clock_t real_ticks = times(&ticks);
1418
1419  if (real_ticks == (clock_t) (-1)) {
1420    return false;
1421  } else {
1422    double ticks_per_second = (double) clock_tics_per_sec;
1423    *process_user_time = ((double) ticks.tms_utime) / ticks_per_second;
1424    *process_system_time = ((double) ticks.tms_stime) / ticks_per_second;
1425    *process_real_time = ((double) real_ticks) / ticks_per_second;
1426
1427    return true;
1428  }
1429}
1430
1431
1432char * os::local_time_string(char *buf, size_t buflen) {
1433  struct tm t;
1434  time_t long_time;
1435  time(&long_time);
1436  localtime_r(&long_time, &t);
1437  jio_snprintf(buf, buflen, "%d-%02d-%02d %02d:%02d:%02d",
1438               t.tm_year + 1900, t.tm_mon + 1, t.tm_mday,
1439               t.tm_hour, t.tm_min, t.tm_sec);
1440  return buf;
1441}
1442
1443struct tm* os::localtime_pd(const time_t* clock, struct tm*  res) {
1444  return localtime_r(clock, res);
1445}
1446
1447////////////////////////////////////////////////////////////////////////////////
1448// runtime exit support
1449
1450// Note: os::shutdown() might be called very early during initialization, or
1451// called from signal handler. Before adding something to os::shutdown(), make
1452// sure it is async-safe and can handle partially initialized VM.
1453void os::shutdown() {
1454
1455  // allow PerfMemory to attempt cleanup of any persistent resources
1456  perfMemory_exit();
1457
1458  // needs to remove object in file system
1459  AttachListener::abort();
1460
1461  // flush buffered output, finish log files
1462  ostream_abort();
1463
1464  // Check for abort hook
1465  abort_hook_t abort_hook = Arguments::abort_hook();
1466  if (abort_hook != NULL) {
1467    abort_hook();
1468  }
1469
1470}
1471
1472// Note: os::abort() might be called very early during initialization, or
1473// called from signal handler. Before adding something to os::abort(), make
1474// sure it is async-safe and can handle partially initialized VM.
1475void os::abort(bool dump_core) {
1476  os::shutdown();
1477  if (dump_core) {
1478#ifndef PRODUCT
1479    fdStream out(defaultStream::output_fd());
1480    out.print_raw("Current thread is ");
1481    char buf[16];
1482    jio_snprintf(buf, sizeof(buf), UINTX_FORMAT, os::current_thread_id());
1483    out.print_raw_cr(buf);
1484    out.print_raw_cr("Dumping core ...");
1485#endif
1486    ::abort(); // dump core
1487  }
1488
1489  ::exit(1);
1490}
1491
1492// Die immediately, no exit hook, no abort hook, no cleanup.
1493void os::die() {
1494  // _exit() on LinuxThreads only kills current thread
1495  ::abort();
1496}
1497
1498
1499// This method is a copy of JDK's sysGetLastErrorString
1500// from src/solaris/hpi/src/system_md.c
1501
1502size_t os::lasterror(char *buf, size_t len) {
1503  if (errno == 0)  return 0;
1504
1505  const char *s = ::strerror(errno);
1506  size_t n = ::strlen(s);
1507  if (n >= len) {
1508    n = len - 1;
1509  }
1510  ::strncpy(buf, s, n);
1511  buf[n] = '\0';
1512  return n;
1513}
1514
1515intx os::current_thread_id() { return (intx)pthread_self(); }
1516int os::current_process_id() {
1517
1518  // Under the old linux thread library, linux gives each thread
1519  // its own process id. Because of this each thread will return
1520  // a different pid if this method were to return the result
1521  // of getpid(2). Linux provides no api that returns the pid
1522  // of the launcher thread for the vm. This implementation
1523  // returns a unique pid, the pid of the launcher thread
1524  // that starts the vm 'process'.
1525
1526  // Under the NPTL, getpid() returns the same pid as the
1527  // launcher thread rather than a unique pid per thread.
1528  // Use gettid() if you want the old pre NPTL behaviour.
1529
1530  // if you are looking for the result of a call to getpid() that
1531  // returns a unique pid for the calling thread, then look at the
1532  // OSThread::thread_id() method in osThread_linux.hpp file
1533
1534  return (int)(_initial_pid ? _initial_pid : getpid());
1535}
1536
1537// DLL functions
1538
1539const char* os::dll_file_extension() { return ".so"; }
1540
1541// This must be hard coded because it's the system's temporary
1542// directory not the java application's temp directory, ala java.io.tmpdir.
1543const char* os::get_temp_directory() { return "/tmp"; }
1544
1545static bool file_exists(const char* filename) {
1546  struct stat statbuf;
1547  if (filename == NULL || strlen(filename) == 0) {
1548    return false;
1549  }
1550  return os::stat(filename, &statbuf) == 0;
1551}
1552
1553bool os::dll_build_name(char* buffer, size_t buflen,
1554                        const char* pname, const char* fname) {
1555  bool retval = false;
1556  // Copied from libhpi
1557  const size_t pnamelen = pname ? strlen(pname) : 0;
1558
1559  // Return error on buffer overflow.
1560  if (pnamelen + strlen(fname) + 10 > (size_t) buflen) {
1561    return retval;
1562  }
1563
1564  if (pnamelen == 0) {
1565    snprintf(buffer, buflen, "lib%s.so", fname);
1566    retval = true;
1567  } else if (strchr(pname, *os::path_separator()) != NULL) {
1568    int n;
1569    char** pelements = split_path(pname, &n);
1570    if (pelements == NULL) {
1571      return false;
1572    }
1573    for (int i = 0; i < n; i++) {
1574      // Really shouldn't be NULL, but check can't hurt
1575      if (pelements[i] == NULL || strlen(pelements[i]) == 0) {
1576        continue; // skip the empty path values
1577      }
1578      snprintf(buffer, buflen, "%s/lib%s.so", pelements[i], fname);
1579      if (file_exists(buffer)) {
1580        retval = true;
1581        break;
1582      }
1583    }
1584    // release the storage
1585    for (int i = 0; i < n; i++) {
1586      if (pelements[i] != NULL) {
1587        FREE_C_HEAP_ARRAY(char, pelements[i]);
1588      }
1589    }
1590    if (pelements != NULL) {
1591      FREE_C_HEAP_ARRAY(char*, pelements);
1592    }
1593  } else {
1594    snprintf(buffer, buflen, "%s/lib%s.so", pname, fname);
1595    retval = true;
1596  }
1597  return retval;
1598}
1599
1600// check if addr is inside libjvm.so
1601bool os::address_is_in_vm(address addr) {
1602  static address libjvm_base_addr;
1603  Dl_info dlinfo;
1604
1605  if (libjvm_base_addr == NULL) {
1606    if (dladdr(CAST_FROM_FN_PTR(void *, os::address_is_in_vm), &dlinfo) != 0) {
1607      libjvm_base_addr = (address)dlinfo.dli_fbase;
1608    }
1609    assert(libjvm_base_addr !=NULL, "Cannot obtain base address for libjvm");
1610  }
1611
1612  if (dladdr((void *)addr, &dlinfo) != 0) {
1613    if (libjvm_base_addr == (address)dlinfo.dli_fbase) return true;
1614  }
1615
1616  return false;
1617}
1618
1619bool os::dll_address_to_function_name(address addr, char *buf,
1620                                      int buflen, int *offset) {
1621  // buf is not optional, but offset is optional
1622  assert(buf != NULL, "sanity check");
1623
1624  Dl_info dlinfo;
1625
1626  if (dladdr((void*)addr, &dlinfo) != 0) {
1627    // see if we have a matching symbol
1628    if (dlinfo.dli_saddr != NULL && dlinfo.dli_sname != NULL) {
1629      if (!Decoder::demangle(dlinfo.dli_sname, buf, buflen)) {
1630        jio_snprintf(buf, buflen, "%s", dlinfo.dli_sname);
1631      }
1632      if (offset != NULL) *offset = addr - (address)dlinfo.dli_saddr;
1633      return true;
1634    }
1635    // no matching symbol so try for just file info
1636    if (dlinfo.dli_fname != NULL && dlinfo.dli_fbase != NULL) {
1637      if (Decoder::decode((address)(addr - (address)dlinfo.dli_fbase),
1638                          buf, buflen, offset, dlinfo.dli_fname)) {
1639        return true;
1640      }
1641    }
1642  }
1643
1644  buf[0] = '\0';
1645  if (offset != NULL) *offset = -1;
1646  return false;
1647}
1648
1649struct _address_to_library_name {
1650  address addr;          // input : memory address
1651  size_t  buflen;        //         size of fname
1652  char*   fname;         // output: library name
1653  address base;          //         library base addr
1654};
1655
1656static int address_to_library_name_callback(struct dl_phdr_info *info,
1657                                            size_t size, void *data) {
1658  int i;
1659  bool found = false;
1660  address libbase = NULL;
1661  struct _address_to_library_name * d = (struct _address_to_library_name *)data;
1662
1663  // iterate through all loadable segments
1664  for (i = 0; i < info->dlpi_phnum; i++) {
1665    address segbase = (address)(info->dlpi_addr + info->dlpi_phdr[i].p_vaddr);
1666    if (info->dlpi_phdr[i].p_type == PT_LOAD) {
1667      // base address of a library is the lowest address of its loaded
1668      // segments.
1669      if (libbase == NULL || libbase > segbase) {
1670        libbase = segbase;
1671      }
1672      // see if 'addr' is within current segment
1673      if (segbase <= d->addr &&
1674          d->addr < segbase + info->dlpi_phdr[i].p_memsz) {
1675        found = true;
1676      }
1677    }
1678  }
1679
1680  // dlpi_name is NULL or empty if the ELF file is executable, return 0
1681  // so dll_address_to_library_name() can fall through to use dladdr() which
1682  // can figure out executable name from argv[0].
1683  if (found && info->dlpi_name && info->dlpi_name[0]) {
1684    d->base = libbase;
1685    if (d->fname) {
1686      jio_snprintf(d->fname, d->buflen, "%s", info->dlpi_name);
1687    }
1688    return 1;
1689  }
1690  return 0;
1691}
1692
1693bool os::dll_address_to_library_name(address addr, char* buf,
1694                                     int buflen, int* offset) {
1695  // buf is not optional, but offset is optional
1696  assert(buf != NULL, "sanity check");
1697
1698  Dl_info dlinfo;
1699  struct _address_to_library_name data;
1700
1701  // There is a bug in old glibc dladdr() implementation that it could resolve
1702  // to wrong library name if the .so file has a base address != NULL. Here
1703  // we iterate through the program headers of all loaded libraries to find
1704  // out which library 'addr' really belongs to. This workaround can be
1705  // removed once the minimum requirement for glibc is moved to 2.3.x.
1706  data.addr = addr;
1707  data.fname = buf;
1708  data.buflen = buflen;
1709  data.base = NULL;
1710  int rslt = dl_iterate_phdr(address_to_library_name_callback, (void *)&data);
1711
1712  if (rslt) {
1713    // buf already contains library name
1714    if (offset) *offset = addr - data.base;
1715    return true;
1716  }
1717  if (dladdr((void*)addr, &dlinfo) != 0) {
1718    if (dlinfo.dli_fname != NULL) {
1719      jio_snprintf(buf, buflen, "%s", dlinfo.dli_fname);
1720    }
1721    if (dlinfo.dli_fbase != NULL && offset != NULL) {
1722      *offset = addr - (address)dlinfo.dli_fbase;
1723    }
1724    return true;
1725  }
1726
1727  buf[0] = '\0';
1728  if (offset) *offset = -1;
1729  return false;
1730}
1731
1732// Loads .dll/.so and
1733// in case of error it checks if .dll/.so was built for the
1734// same architecture as Hotspot is running on
1735
1736
1737// Remember the stack's state. The Linux dynamic linker will change
1738// the stack to 'executable' at most once, so we must safepoint only once.
1739bool os::Linux::_stack_is_executable = false;
1740
1741// VM operation that loads a library.  This is necessary if stack protection
1742// of the Java stacks can be lost during loading the library.  If we
1743// do not stop the Java threads, they can stack overflow before the stacks
1744// are protected again.
1745class VM_LinuxDllLoad: public VM_Operation {
1746 private:
1747  const char *_filename;
1748  char *_ebuf;
1749  int _ebuflen;
1750  void *_lib;
1751 public:
1752  VM_LinuxDllLoad(const char *fn, char *ebuf, int ebuflen) :
1753    _filename(fn), _ebuf(ebuf), _ebuflen(ebuflen), _lib(NULL) {}
1754  VMOp_Type type() const { return VMOp_LinuxDllLoad; }
1755  void doit() {
1756    _lib = os::Linux::dll_load_in_vmthread(_filename, _ebuf, _ebuflen);
1757    os::Linux::_stack_is_executable = true;
1758  }
1759  void* loaded_library() { return _lib; }
1760};
1761
1762void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
1763  void * result = NULL;
1764  bool load_attempted = false;
1765
1766  // Check whether the library to load might change execution rights
1767  // of the stack. If they are changed, the protection of the stack
1768  // guard pages will be lost. We need a safepoint to fix this.
1769  //
1770  // See Linux man page execstack(8) for more info.
1771  if (os::uses_stack_guard_pages() && !os::Linux::_stack_is_executable) {
1772    ElfFile ef(filename);
1773    if (!ef.specifies_noexecstack()) {
1774      if (!is_init_completed()) {
1775        os::Linux::_stack_is_executable = true;
1776        // This is OK - No Java threads have been created yet, and hence no
1777        // stack guard pages to fix.
1778        //
1779        // This should happen only when you are building JDK7 using a very
1780        // old version of JDK6 (e.g., with JPRT) and running test_gamma.
1781        //
1782        // Dynamic loader will make all stacks executable after
1783        // this function returns, and will not do that again.
1784        assert(Threads::first() == NULL, "no Java threads should exist yet.");
1785      } else {
1786        warning("You have loaded library %s which might have disabled stack guard. "
1787                "The VM will try to fix the stack guard now.\n"
1788                "It's highly recommended that you fix the library with "
1789                "'execstack -c <libfile>', or link it with '-z noexecstack'.",
1790                filename);
1791
1792        assert(Thread::current()->is_Java_thread(), "must be Java thread");
1793        JavaThread *jt = JavaThread::current();
1794        if (jt->thread_state() != _thread_in_native) {
1795          // This happens when a compiler thread tries to load a hsdis-<arch>.so file
1796          // that requires ExecStack. Cannot enter safe point. Let's give up.
1797          warning("Unable to fix stack guard. Giving up.");
1798        } else {
1799          if (!LoadExecStackDllInVMThread) {
1800            // This is for the case where the DLL has an static
1801            // constructor function that executes JNI code. We cannot
1802            // load such DLLs in the VMThread.
1803            result = os::Linux::dlopen_helper(filename, ebuf, ebuflen);
1804          }
1805
1806          ThreadInVMfromNative tiv(jt);
1807          debug_only(VMNativeEntryWrapper vew;)
1808
1809          VM_LinuxDllLoad op(filename, ebuf, ebuflen);
1810          VMThread::execute(&op);
1811          if (LoadExecStackDllInVMThread) {
1812            result = op.loaded_library();
1813          }
1814          load_attempted = true;
1815        }
1816      }
1817    }
1818  }
1819
1820  if (!load_attempted) {
1821    result = os::Linux::dlopen_helper(filename, ebuf, ebuflen);
1822  }
1823
1824  if (result != NULL) {
1825    // Successful loading
1826    return result;
1827  }
1828
1829  Elf32_Ehdr elf_head;
1830  int diag_msg_max_length=ebuflen-strlen(ebuf);
1831  char* diag_msg_buf=ebuf+strlen(ebuf);
1832
1833  if (diag_msg_max_length==0) {
1834    // No more space in ebuf for additional diagnostics message
1835    return NULL;
1836  }
1837
1838
1839  int file_descriptor= ::open(filename, O_RDONLY | O_NONBLOCK);
1840
1841  if (file_descriptor < 0) {
1842    // Can't open library, report dlerror() message
1843    return NULL;
1844  }
1845
1846  bool failed_to_read_elf_head=
1847    (sizeof(elf_head)!=
1848     (::read(file_descriptor, &elf_head,sizeof(elf_head))));
1849
1850  ::close(file_descriptor);
1851  if (failed_to_read_elf_head) {
1852    // file i/o error - report dlerror() msg
1853    return NULL;
1854  }
1855
1856  typedef struct {
1857    Elf32_Half  code;         // Actual value as defined in elf.h
1858    Elf32_Half  compat_class; // Compatibility of archs at VM's sense
1859    char        elf_class;    // 32 or 64 bit
1860    char        endianess;    // MSB or LSB
1861    char*       name;         // String representation
1862  } arch_t;
1863
1864#ifndef EM_486
1865  #define EM_486          6               /* Intel 80486 */
1866#endif
1867#ifndef EM_AARCH64
1868  #define EM_AARCH64    183               /* ARM AARCH64 */
1869#endif
1870
1871  static const arch_t arch_array[]={
1872    {EM_386,         EM_386,     ELFCLASS32, ELFDATA2LSB, (char*)"IA 32"},
1873    {EM_486,         EM_386,     ELFCLASS32, ELFDATA2LSB, (char*)"IA 32"},
1874    {EM_IA_64,       EM_IA_64,   ELFCLASS64, ELFDATA2LSB, (char*)"IA 64"},
1875    {EM_X86_64,      EM_X86_64,  ELFCLASS64, ELFDATA2LSB, (char*)"AMD 64"},
1876    {EM_SPARC,       EM_SPARC,   ELFCLASS32, ELFDATA2MSB, (char*)"Sparc 32"},
1877    {EM_SPARC32PLUS, EM_SPARC,   ELFCLASS32, ELFDATA2MSB, (char*)"Sparc 32"},
1878    {EM_SPARCV9,     EM_SPARCV9, ELFCLASS64, ELFDATA2MSB, (char*)"Sparc v9 64"},
1879    {EM_PPC,         EM_PPC,     ELFCLASS32, ELFDATA2MSB, (char*)"Power PC 32"},
1880#if defined(VM_LITTLE_ENDIAN)
1881    {EM_PPC64,       EM_PPC64,   ELFCLASS64, ELFDATA2LSB, (char*)"Power PC 64"},
1882#else
1883    {EM_PPC64,       EM_PPC64,   ELFCLASS64, ELFDATA2MSB, (char*)"Power PC 64"},
1884#endif
1885    {EM_ARM,         EM_ARM,     ELFCLASS32,   ELFDATA2LSB, (char*)"ARM"},
1886    {EM_S390,        EM_S390,    ELFCLASSNONE, ELFDATA2MSB, (char*)"IBM System/390"},
1887    {EM_ALPHA,       EM_ALPHA,   ELFCLASS64, ELFDATA2LSB, (char*)"Alpha"},
1888    {EM_MIPS_RS3_LE, EM_MIPS_RS3_LE, ELFCLASS32, ELFDATA2LSB, (char*)"MIPSel"},
1889    {EM_MIPS,        EM_MIPS,    ELFCLASS32, ELFDATA2MSB, (char*)"MIPS"},
1890    {EM_PARISC,      EM_PARISC,  ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"},
1891    {EM_68K,         EM_68K,     ELFCLASS32, ELFDATA2MSB, (char*)"M68k"},
1892    {EM_AARCH64,     EM_AARCH64, ELFCLASS64, ELFDATA2LSB, (char*)"AARCH64"},
1893  };
1894
1895#if  (defined IA32)
1896  static  Elf32_Half running_arch_code=EM_386;
1897#elif   (defined AMD64)
1898  static  Elf32_Half running_arch_code=EM_X86_64;
1899#elif  (defined IA64)
1900  static  Elf32_Half running_arch_code=EM_IA_64;
1901#elif  (defined __sparc) && (defined _LP64)
1902  static  Elf32_Half running_arch_code=EM_SPARCV9;
1903#elif  (defined __sparc) && (!defined _LP64)
1904  static  Elf32_Half running_arch_code=EM_SPARC;
1905#elif  (defined __powerpc64__)
1906  static  Elf32_Half running_arch_code=EM_PPC64;
1907#elif  (defined __powerpc__)
1908  static  Elf32_Half running_arch_code=EM_PPC;
1909#elif  (defined ARM)
1910  static  Elf32_Half running_arch_code=EM_ARM;
1911#elif  (defined S390)
1912  static  Elf32_Half running_arch_code=EM_S390;
1913#elif  (defined ALPHA)
1914  static  Elf32_Half running_arch_code=EM_ALPHA;
1915#elif  (defined MIPSEL)
1916  static  Elf32_Half running_arch_code=EM_MIPS_RS3_LE;
1917#elif  (defined PARISC)
1918  static  Elf32_Half running_arch_code=EM_PARISC;
1919#elif  (defined MIPS)
1920  static  Elf32_Half running_arch_code=EM_MIPS;
1921#elif  (defined M68K)
1922  static  Elf32_Half running_arch_code=EM_68K;
1923#elif  (defined AARCH64)
1924  static  Elf32_Half running_arch_code=EM_AARCH64;
1925#else
1926    #error Method os::dll_load requires that one of following is defined:\
1927         IA32, AMD64, IA64, __sparc, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, PARISC, M68K, AARCH64
1928#endif
1929
1930  // Identify compatability class for VM's architecture and library's architecture
1931  // Obtain string descriptions for architectures
1932
1933  arch_t lib_arch={elf_head.e_machine,0,elf_head.e_ident[EI_CLASS], elf_head.e_ident[EI_DATA], NULL};
1934  int running_arch_index=-1;
1935
1936  for (unsigned int i=0; i < ARRAY_SIZE(arch_array); i++) {
1937    if (running_arch_code == arch_array[i].code) {
1938      running_arch_index    = i;
1939    }
1940    if (lib_arch.code == arch_array[i].code) {
1941      lib_arch.compat_class = arch_array[i].compat_class;
1942      lib_arch.name         = arch_array[i].name;
1943    }
1944  }
1945
1946  assert(running_arch_index != -1,
1947         "Didn't find running architecture code (running_arch_code) in arch_array");
1948  if (running_arch_index == -1) {
1949    // Even though running architecture detection failed
1950    // we may still continue with reporting dlerror() message
1951    return NULL;
1952  }
1953
1954  if (lib_arch.endianess != arch_array[running_arch_index].endianess) {
1955    ::snprintf(diag_msg_buf, diag_msg_max_length-1," (Possible cause: endianness mismatch)");
1956    return NULL;
1957  }
1958
1959#ifndef S390
1960  if (lib_arch.elf_class != arch_array[running_arch_index].elf_class) {
1961    ::snprintf(diag_msg_buf, diag_msg_max_length-1," (Possible cause: architecture word width mismatch)");
1962    return NULL;
1963  }
1964#endif // !S390
1965
1966  if (lib_arch.compat_class != arch_array[running_arch_index].compat_class) {
1967    if (lib_arch.name!=NULL) {
1968      ::snprintf(diag_msg_buf, diag_msg_max_length-1,
1969                 " (Possible cause: can't load %s-bit .so on a %s-bit platform)",
1970                 lib_arch.name, arch_array[running_arch_index].name);
1971    } else {
1972      ::snprintf(diag_msg_buf, diag_msg_max_length-1,
1973                 " (Possible cause: can't load this .so (machine code=0x%x) on a %s-bit platform)",
1974                 lib_arch.code,
1975                 arch_array[running_arch_index].name);
1976    }
1977  }
1978
1979  return NULL;
1980}
1981
1982void * os::Linux::dlopen_helper(const char *filename, char *ebuf,
1983                                int ebuflen) {
1984  void * result = ::dlopen(filename, RTLD_LAZY);
1985  if (result == NULL) {
1986    ::strncpy(ebuf, ::dlerror(), ebuflen - 1);
1987    ebuf[ebuflen-1] = '\0';
1988  }
1989  return result;
1990}
1991
1992void * os::Linux::dll_load_in_vmthread(const char *filename, char *ebuf,
1993                                       int ebuflen) {
1994  void * result = NULL;
1995  if (LoadExecStackDllInVMThread) {
1996    result = dlopen_helper(filename, ebuf, ebuflen);
1997  }
1998
1999  // Since 7019808, libjvm.so is linked with -noexecstack. If the VM loads a
2000  // library that requires an executable stack, or which does not have this
2001  // stack attribute set, dlopen changes the stack attribute to executable. The
2002  // read protection of the guard pages gets lost.
2003  //
2004  // Need to check _stack_is_executable again as multiple VM_LinuxDllLoad
2005  // may have been queued at the same time.
2006
2007  if (!_stack_is_executable) {
2008    JavaThread *jt = Threads::first();
2009
2010    while (jt) {
2011      if (!jt->stack_guard_zone_unused() &&        // Stack not yet fully initialized
2012          jt->stack_yellow_zone_enabled()) {       // No pending stack overflow exceptions
2013        if (!os::guard_memory((char *) jt->stack_red_zone_base() - jt->stack_red_zone_size(),
2014                              jt->stack_yellow_zone_size() + jt->stack_red_zone_size())) {
2015          warning("Attempt to reguard stack yellow zone failed.");
2016        }
2017      }
2018      jt = jt->next();
2019    }
2020  }
2021
2022  return result;
2023}
2024
2025void* os::dll_lookup(void* handle, const char* name) {
2026  void* res = dlsym(handle, name);
2027  return res;
2028}
2029
2030void* os::get_default_process_handle() {
2031  return (void*)::dlopen(NULL, RTLD_LAZY);
2032}
2033
2034static bool _print_ascii_file(const char* filename, outputStream* st) {
2035  int fd = ::open(filename, O_RDONLY);
2036  if (fd == -1) {
2037    return false;
2038  }
2039
2040  char buf[32];
2041  int bytes;
2042  while ((bytes = ::read(fd, buf, sizeof(buf))) > 0) {
2043    st->print_raw(buf, bytes);
2044  }
2045
2046  ::close(fd);
2047
2048  return true;
2049}
2050
2051void os::print_dll_info(outputStream *st) {
2052  st->print_cr("Dynamic libraries:");
2053
2054  char fname[32];
2055  pid_t pid = os::Linux::gettid();
2056
2057  jio_snprintf(fname, sizeof(fname), "/proc/%d/maps", pid);
2058
2059  if (!_print_ascii_file(fname, st)) {
2060    st->print("Can not get library information for pid = %d\n", pid);
2061  }
2062}
2063
2064int os::get_loaded_modules_info(os::LoadedModulesCallbackFunc callback, void *param) {
2065  FILE *procmapsFile = NULL;
2066
2067  // Open the procfs maps file for the current process
2068  if ((procmapsFile = fopen("/proc/self/maps", "r")) != NULL) {
2069    // Allocate PATH_MAX for file name plus a reasonable size for other fields.
2070    char line[PATH_MAX + 100];
2071
2072    // Read line by line from 'file'
2073    while (fgets(line, sizeof(line), procmapsFile) != NULL) {
2074      u8 base, top, offset, inode;
2075      char permissions[5];
2076      char device[6];
2077      char name[PATH_MAX + 1];
2078
2079      // Parse fields from line
2080      sscanf(line, "%lx-%lx %4s %lx %5s %ld %s", &base, &top, permissions, &offset, device, &inode, name);
2081
2082      // Filter by device id '00:00' so that we only get file system mapped files.
2083      if (strcmp(device, "00:00") != 0) {
2084
2085        // Call callback with the fields of interest
2086        if(callback(name, (address)base, (address)top, param)) {
2087          // Oops abort, callback aborted
2088          fclose(procmapsFile);
2089          return 1;
2090        }
2091      }
2092    }
2093    fclose(procmapsFile);
2094  }
2095  return 0;
2096}
2097
2098void os::print_os_info_brief(outputStream* st) {
2099  os::Linux::print_distro_info(st);
2100
2101  os::Posix::print_uname_info(st);
2102
2103  os::Linux::print_libversion_info(st);
2104
2105}
2106
2107void os::print_os_info(outputStream* st) {
2108  st->print("OS:");
2109
2110  os::Linux::print_distro_info(st);
2111
2112  os::Posix::print_uname_info(st);
2113
2114  // Print warning if unsafe chroot environment detected
2115  if (unsafe_chroot_detected) {
2116    st->print("WARNING!! ");
2117    st->print_cr("%s", unstable_chroot_error);
2118  }
2119
2120  os::Linux::print_libversion_info(st);
2121
2122  os::Posix::print_rlimit_info(st);
2123
2124  os::Posix::print_load_average(st);
2125
2126  os::Linux::print_full_memory_info(st);
2127}
2128
2129// Try to identify popular distros.
2130// Most Linux distributions have a /etc/XXX-release file, which contains
2131// the OS version string. Newer Linux distributions have a /etc/lsb-release
2132// file that also contains the OS version string. Some have more than one
2133// /etc/XXX-release file (e.g. Mandrake has both /etc/mandrake-release and
2134// /etc/redhat-release.), so the order is important.
2135// Any Linux that is based on Redhat (i.e. Oracle, Mandrake, Sun JDS...) have
2136// their own specific XXX-release file as well as a redhat-release file.
2137// Because of this the XXX-release file needs to be searched for before the
2138// redhat-release file.
2139// Since Red Hat has a lsb-release file that is not very descriptive the
2140// search for redhat-release needs to be before lsb-release.
2141// Since the lsb-release file is the new standard it needs to be searched
2142// before the older style release files.
2143// Searching system-release (Red Hat) and os-release (other Linuxes) are a
2144// next to last resort.  The os-release file is a new standard that contains
2145// distribution information and the system-release file seems to be an old
2146// standard that has been replaced by the lsb-release and os-release files.
2147// Searching for the debian_version file is the last resort.  It contains
2148// an informative string like "6.0.6" or "wheezy/sid". Because of this
2149// "Debian " is printed before the contents of the debian_version file.
2150void os::Linux::print_distro_info(outputStream* st) {
2151  if (!_print_ascii_file("/etc/oracle-release", st) &&
2152      !_print_ascii_file("/etc/mandriva-release", st) &&
2153      !_print_ascii_file("/etc/mandrake-release", st) &&
2154      !_print_ascii_file("/etc/sun-release", st) &&
2155      !_print_ascii_file("/etc/redhat-release", st) &&
2156      !_print_ascii_file("/etc/lsb-release", st) &&
2157      !_print_ascii_file("/etc/SuSE-release", st) &&
2158      !_print_ascii_file("/etc/turbolinux-release", st) &&
2159      !_print_ascii_file("/etc/gentoo-release", st) &&
2160      !_print_ascii_file("/etc/ltib-release", st) &&
2161      !_print_ascii_file("/etc/angstrom-version", st) &&
2162      !_print_ascii_file("/etc/system-release", st) &&
2163      !_print_ascii_file("/etc/os-release", st)) {
2164
2165    if (file_exists("/etc/debian_version")) {
2166      st->print("Debian ");
2167      _print_ascii_file("/etc/debian_version", st);
2168    } else {
2169      st->print("Linux");
2170    }
2171  }
2172  st->cr();
2173}
2174
2175void os::Linux::print_libversion_info(outputStream* st) {
2176  // libc, pthread
2177  st->print("libc:");
2178  st->print("%s ", os::Linux::glibc_version());
2179  st->print("%s ", os::Linux::libpthread_version());
2180  if (os::Linux::is_LinuxThreads()) {
2181    st->print("(%s stack)", os::Linux::is_floating_stack() ? "floating" : "fixed");
2182  }
2183  st->cr();
2184}
2185
2186void os::Linux::print_full_memory_info(outputStream* st) {
2187  st->print("\n/proc/meminfo:\n");
2188  _print_ascii_file("/proc/meminfo", st);
2189  st->cr();
2190}
2191
2192void os::print_memory_info(outputStream* st) {
2193
2194  st->print("Memory:");
2195  st->print(" %dk page", os::vm_page_size()>>10);
2196
2197  // values in struct sysinfo are "unsigned long"
2198  struct sysinfo si;
2199  sysinfo(&si);
2200
2201  st->print(", physical " UINT64_FORMAT "k",
2202            os::physical_memory() >> 10);
2203  st->print("(" UINT64_FORMAT "k free)",
2204            os::available_memory() >> 10);
2205  st->print(", swap " UINT64_FORMAT "k",
2206            ((jlong)si.totalswap * si.mem_unit) >> 10);
2207  st->print("(" UINT64_FORMAT "k free)",
2208            ((jlong)si.freeswap * si.mem_unit) >> 10);
2209  st->cr();
2210}
2211
2212void os::pd_print_cpu_info(outputStream* st) {
2213  st->print("\n/proc/cpuinfo:\n");
2214  if (!_print_ascii_file("/proc/cpuinfo", st)) {
2215    st->print("  <Not Available>");
2216  }
2217  st->cr();
2218}
2219
2220void os::print_siginfo(outputStream* st, void* siginfo) {
2221  const siginfo_t* si = (const siginfo_t*)siginfo;
2222
2223  os::Posix::print_siginfo_brief(st, si);
2224#if INCLUDE_CDS
2225  if (si && (si->si_signo == SIGBUS || si->si_signo == SIGSEGV) &&
2226      UseSharedSpaces) {
2227    FileMapInfo* mapinfo = FileMapInfo::current_info();
2228    if (mapinfo->is_in_shared_space(si->si_addr)) {
2229      st->print("\n\nError accessing class data sharing archive."   \
2230                " Mapped file inaccessible during execution, "      \
2231                " possible disk/network problem.");
2232    }
2233  }
2234#endif
2235  st->cr();
2236}
2237
2238
2239static void print_signal_handler(outputStream* st, int sig,
2240                                 char* buf, size_t buflen);
2241
2242void os::print_signal_handlers(outputStream* st, char* buf, size_t buflen) {
2243  st->print_cr("Signal Handlers:");
2244  print_signal_handler(st, SIGSEGV, buf, buflen);
2245  print_signal_handler(st, SIGBUS , buf, buflen);
2246  print_signal_handler(st, SIGFPE , buf, buflen);
2247  print_signal_handler(st, SIGPIPE, buf, buflen);
2248  print_signal_handler(st, SIGXFSZ, buf, buflen);
2249  print_signal_handler(st, SIGILL , buf, buflen);
2250  print_signal_handler(st, INTERRUPT_SIGNAL, buf, buflen);
2251  print_signal_handler(st, SR_signum, buf, buflen);
2252  print_signal_handler(st, SHUTDOWN1_SIGNAL, buf, buflen);
2253  print_signal_handler(st, SHUTDOWN2_SIGNAL , buf, buflen);
2254  print_signal_handler(st, SHUTDOWN3_SIGNAL , buf, buflen);
2255  print_signal_handler(st, BREAK_SIGNAL, buf, buflen);
2256#if defined(PPC64)
2257  print_signal_handler(st, SIGTRAP, buf, buflen);
2258#endif
2259}
2260
2261static char saved_jvm_path[MAXPATHLEN] = {0};
2262
2263// Find the full path to the current module, libjvm.so
2264void os::jvm_path(char *buf, jint buflen) {
2265  // Error checking.
2266  if (buflen < MAXPATHLEN) {
2267    assert(false, "must use a large-enough buffer");
2268    buf[0] = '\0';
2269    return;
2270  }
2271  // Lazy resolve the path to current module.
2272  if (saved_jvm_path[0] != 0) {
2273    strcpy(buf, saved_jvm_path);
2274    return;
2275  }
2276
2277  char dli_fname[MAXPATHLEN];
2278  bool ret = dll_address_to_library_name(
2279                                         CAST_FROM_FN_PTR(address, os::jvm_path),
2280                                         dli_fname, sizeof(dli_fname), NULL);
2281  assert(ret, "cannot locate libjvm");
2282  char *rp = NULL;
2283  if (ret && dli_fname[0] != '\0') {
2284    rp = realpath(dli_fname, buf);
2285  }
2286  if (rp == NULL) {
2287    return;
2288  }
2289
2290  if (Arguments::sun_java_launcher_is_altjvm()) {
2291    // Support for the java launcher's '-XXaltjvm=<path>' option. Typical
2292    // value for buf is "<JAVA_HOME>/jre/lib/<arch>/<vmtype>/libjvm.so".
2293    // If "/jre/lib/" appears at the right place in the string, then
2294    // assume we are installed in a JDK and we're done. Otherwise, check
2295    // for a JAVA_HOME environment variable and fix up the path so it
2296    // looks like libjvm.so is installed there (append a fake suffix
2297    // hotspot/libjvm.so).
2298    const char *p = buf + strlen(buf) - 1;
2299    for (int count = 0; p > buf && count < 5; ++count) {
2300      for (--p; p > buf && *p != '/'; --p)
2301        /* empty */ ;
2302    }
2303
2304    if (strncmp(p, "/jre/lib/", 9) != 0) {
2305      // Look for JAVA_HOME in the environment.
2306      char* java_home_var = ::getenv("JAVA_HOME");
2307      if (java_home_var != NULL && java_home_var[0] != 0) {
2308        char* jrelib_p;
2309        int len;
2310
2311        // Check the current module name "libjvm.so".
2312        p = strrchr(buf, '/');
2313        if (p == NULL) {
2314          return;
2315        }
2316        assert(strstr(p, "/libjvm") == p, "invalid library name");
2317
2318        rp = realpath(java_home_var, buf);
2319        if (rp == NULL) {
2320          return;
2321        }
2322
2323        // determine if this is a legacy image or modules image
2324        // modules image doesn't have "jre" subdirectory
2325        len = strlen(buf);
2326        assert(len < buflen, "Ran out of buffer room");
2327        jrelib_p = buf + len;
2328        snprintf(jrelib_p, buflen-len, "/jre/lib/%s", cpu_arch);
2329        if (0 != access(buf, F_OK)) {
2330          snprintf(jrelib_p, buflen-len, "/lib/%s", cpu_arch);
2331        }
2332
2333        if (0 == access(buf, F_OK)) {
2334          // Use current module name "libjvm.so"
2335          len = strlen(buf);
2336          snprintf(buf + len, buflen-len, "/hotspot/libjvm.so");
2337        } else {
2338          // Go back to path of .so
2339          rp = realpath(dli_fname, buf);
2340          if (rp == NULL) {
2341            return;
2342          }
2343        }
2344      }
2345    }
2346  }
2347
2348  strncpy(saved_jvm_path, buf, MAXPATHLEN);
2349  saved_jvm_path[MAXPATHLEN - 1] = '\0';
2350}
2351
2352void os::print_jni_name_prefix_on(outputStream* st, int args_size) {
2353  // no prefix required, not even "_"
2354}
2355
2356void os::print_jni_name_suffix_on(outputStream* st, int args_size) {
2357  // no suffix required
2358}
2359
2360////////////////////////////////////////////////////////////////////////////////
2361// sun.misc.Signal support
2362
2363static volatile jint sigint_count = 0;
2364
2365static void UserHandler(int sig, void *siginfo, void *context) {
2366  // 4511530 - sem_post is serialized and handled by the manager thread. When
2367  // the program is interrupted by Ctrl-C, SIGINT is sent to every thread. We
2368  // don't want to flood the manager thread with sem_post requests.
2369  if (sig == SIGINT && Atomic::add(1, &sigint_count) > 1) {
2370    return;
2371  }
2372
2373  // Ctrl-C is pressed during error reporting, likely because the error
2374  // handler fails to abort. Let VM die immediately.
2375  if (sig == SIGINT && is_error_reported()) {
2376    os::die();
2377  }
2378
2379  os::signal_notify(sig);
2380}
2381
2382void* os::user_handler() {
2383  return CAST_FROM_FN_PTR(void*, UserHandler);
2384}
2385
2386class Semaphore : public StackObj {
2387 public:
2388  Semaphore();
2389  ~Semaphore();
2390  void signal();
2391  void wait();
2392  bool trywait();
2393  bool timedwait(unsigned int sec, int nsec);
2394 private:
2395  sem_t _semaphore;
2396};
2397
2398Semaphore::Semaphore() {
2399  sem_init(&_semaphore, 0, 0);
2400}
2401
2402Semaphore::~Semaphore() {
2403  sem_destroy(&_semaphore);
2404}
2405
2406void Semaphore::signal() {
2407  sem_post(&_semaphore);
2408}
2409
2410void Semaphore::wait() {
2411  sem_wait(&_semaphore);
2412}
2413
2414bool Semaphore::trywait() {
2415  return sem_trywait(&_semaphore) == 0;
2416}
2417
2418bool Semaphore::timedwait(unsigned int sec, int nsec) {
2419
2420  struct timespec ts;
2421  // Semaphore's are always associated with CLOCK_REALTIME
2422  os::Linux::clock_gettime(CLOCK_REALTIME, &ts);
2423  // see unpackTime for discussion on overflow checking
2424  if (sec >= MAX_SECS) {
2425    ts.tv_sec += MAX_SECS;
2426    ts.tv_nsec = 0;
2427  } else {
2428    ts.tv_sec += sec;
2429    ts.tv_nsec += nsec;
2430    if (ts.tv_nsec >= NANOSECS_PER_SEC) {
2431      ts.tv_nsec -= NANOSECS_PER_SEC;
2432      ++ts.tv_sec; // note: this must be <= max_secs
2433    }
2434  }
2435
2436  while (1) {
2437    int result = sem_timedwait(&_semaphore, &ts);
2438    if (result == 0) {
2439      return true;
2440    } else if (errno == EINTR) {
2441      continue;
2442    } else if (errno == ETIMEDOUT) {
2443      return false;
2444    } else {
2445      return false;
2446    }
2447  }
2448}
2449
2450extern "C" {
2451  typedef void (*sa_handler_t)(int);
2452  typedef void (*sa_sigaction_t)(int, siginfo_t *, void *);
2453}
2454
2455void* os::signal(int signal_number, void* handler) {
2456  struct sigaction sigAct, oldSigAct;
2457
2458  sigfillset(&(sigAct.sa_mask));
2459  sigAct.sa_flags   = SA_RESTART|SA_SIGINFO;
2460  sigAct.sa_handler = CAST_TO_FN_PTR(sa_handler_t, handler);
2461
2462  if (sigaction(signal_number, &sigAct, &oldSigAct)) {
2463    // -1 means registration failed
2464    return (void *)-1;
2465  }
2466
2467  return CAST_FROM_FN_PTR(void*, oldSigAct.sa_handler);
2468}
2469
2470void os::signal_raise(int signal_number) {
2471  ::raise(signal_number);
2472}
2473
2474// The following code is moved from os.cpp for making this
2475// code platform specific, which it is by its very nature.
2476
2477// Will be modified when max signal is changed to be dynamic
2478int os::sigexitnum_pd() {
2479  return NSIG;
2480}
2481
2482// a counter for each possible signal value
2483static volatile jint pending_signals[NSIG+1] = { 0 };
2484
2485// Linux(POSIX) specific hand shaking semaphore.
2486static sem_t sig_sem;
2487static Semaphore sr_semaphore;
2488
2489void os::signal_init_pd() {
2490  // Initialize signal structures
2491  ::memset((void*)pending_signals, 0, sizeof(pending_signals));
2492
2493  // Initialize signal semaphore
2494  ::sem_init(&sig_sem, 0, 0);
2495}
2496
2497void os::signal_notify(int sig) {
2498  Atomic::inc(&pending_signals[sig]);
2499  ::sem_post(&sig_sem);
2500}
2501
2502static int check_pending_signals(bool wait) {
2503  Atomic::store(0, &sigint_count);
2504  for (;;) {
2505    for (int i = 0; i < NSIG + 1; i++) {
2506      jint n = pending_signals[i];
2507      if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) {
2508        return i;
2509      }
2510    }
2511    if (!wait) {
2512      return -1;
2513    }
2514    JavaThread *thread = JavaThread::current();
2515    ThreadBlockInVM tbivm(thread);
2516
2517    bool threadIsSuspended;
2518    do {
2519      thread->set_suspend_equivalent();
2520      // cleared by handle_special_suspend_equivalent_condition() or java_suspend_self()
2521      ::sem_wait(&sig_sem);
2522
2523      // were we externally suspended while we were waiting?
2524      threadIsSuspended = thread->handle_special_suspend_equivalent_condition();
2525      if (threadIsSuspended) {
2526        // The semaphore has been incremented, but while we were waiting
2527        // another thread suspended us. We don't want to continue running
2528        // while suspended because that would surprise the thread that
2529        // suspended us.
2530        ::sem_post(&sig_sem);
2531
2532        thread->java_suspend_self();
2533      }
2534    } while (threadIsSuspended);
2535  }
2536}
2537
2538int os::signal_lookup() {
2539  return check_pending_signals(false);
2540}
2541
2542int os::signal_wait() {
2543  return check_pending_signals(true);
2544}
2545
2546////////////////////////////////////////////////////////////////////////////////
2547// Virtual Memory
2548
2549int os::vm_page_size() {
2550  // Seems redundant as all get out
2551  assert(os::Linux::page_size() != -1, "must call os::init");
2552  return os::Linux::page_size();
2553}
2554
2555// Solaris allocates memory by pages.
2556int os::vm_allocation_granularity() {
2557  assert(os::Linux::page_size() != -1, "must call os::init");
2558  return os::Linux::page_size();
2559}
2560
2561// Rationale behind this function:
2562//  current (Mon Apr 25 20:12:18 MSD 2005) oprofile drops samples without executable
2563//  mapping for address (see lookup_dcookie() in the kernel module), thus we cannot get
2564//  samples for JITted code. Here we create private executable mapping over the code cache
2565//  and then we can use standard (well, almost, as mapping can change) way to provide
2566//  info for the reporting script by storing timestamp and location of symbol
2567void linux_wrap_code(char* base, size_t size) {
2568  static volatile jint cnt = 0;
2569
2570  if (!UseOprofile) {
2571    return;
2572  }
2573
2574  char buf[PATH_MAX+1];
2575  int num = Atomic::add(1, &cnt);
2576
2577  snprintf(buf, sizeof(buf), "%s/hs-vm-%d-%d",
2578           os::get_temp_directory(), os::current_process_id(), num);
2579  unlink(buf);
2580
2581  int fd = ::open(buf, O_CREAT | O_RDWR, S_IRWXU);
2582
2583  if (fd != -1) {
2584    off_t rv = ::lseek(fd, size-2, SEEK_SET);
2585    if (rv != (off_t)-1) {
2586      if (::write(fd, "", 1) == 1) {
2587        mmap(base, size,
2588             PROT_READ|PROT_WRITE|PROT_EXEC,
2589             MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE, fd, 0);
2590      }
2591    }
2592    ::close(fd);
2593    unlink(buf);
2594  }
2595}
2596
2597static bool recoverable_mmap_error(int err) {
2598  // See if the error is one we can let the caller handle. This
2599  // list of errno values comes from JBS-6843484. I can't find a
2600  // Linux man page that documents this specific set of errno
2601  // values so while this list currently matches Solaris, it may
2602  // change as we gain experience with this failure mode.
2603  switch (err) {
2604  case EBADF:
2605  case EINVAL:
2606  case ENOTSUP:
2607    // let the caller deal with these errors
2608    return true;
2609
2610  default:
2611    // Any remaining errors on this OS can cause our reserved mapping
2612    // to be lost. That can cause confusion where different data
2613    // structures think they have the same memory mapped. The worst
2614    // scenario is if both the VM and a library think they have the
2615    // same memory mapped.
2616    return false;
2617  }
2618}
2619
2620static void warn_fail_commit_memory(char* addr, size_t size, bool exec,
2621                                    int err) {
2622  warning("INFO: os::commit_memory(" PTR_FORMAT ", " SIZE_FORMAT
2623          ", %d) failed; error='%s' (errno=%d)", addr, size, exec,
2624          strerror(err), err);
2625}
2626
2627static void warn_fail_commit_memory(char* addr, size_t size,
2628                                    size_t alignment_hint, bool exec,
2629                                    int err) {
2630  warning("INFO: os::commit_memory(" PTR_FORMAT ", " SIZE_FORMAT
2631          ", " SIZE_FORMAT ", %d) failed; error='%s' (errno=%d)", addr, size,
2632          alignment_hint, exec, strerror(err), err);
2633}
2634
2635// NOTE: Linux kernel does not really reserve the pages for us.
2636//       All it does is to check if there are enough free pages
2637//       left at the time of mmap(). This could be a potential
2638//       problem.
2639int os::Linux::commit_memory_impl(char* addr, size_t size, bool exec) {
2640  int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
2641  uintptr_t res = (uintptr_t) ::mmap(addr, size, prot,
2642                                     MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
2643  if (res != (uintptr_t) MAP_FAILED) {
2644    if (UseNUMAInterleaving) {
2645      numa_make_global(addr, size);
2646    }
2647    return 0;
2648  }
2649
2650  int err = errno;  // save errno from mmap() call above
2651
2652  if (!recoverable_mmap_error(err)) {
2653    warn_fail_commit_memory(addr, size, exec, err);
2654    vm_exit_out_of_memory(size, OOM_MMAP_ERROR, "committing reserved memory.");
2655  }
2656
2657  return err;
2658}
2659
2660bool os::pd_commit_memory(char* addr, size_t size, bool exec) {
2661  return os::Linux::commit_memory_impl(addr, size, exec) == 0;
2662}
2663
2664void os::pd_commit_memory_or_exit(char* addr, size_t size, bool exec,
2665                                  const char* mesg) {
2666  assert(mesg != NULL, "mesg must be specified");
2667  int err = os::Linux::commit_memory_impl(addr, size, exec);
2668  if (err != 0) {
2669    // the caller wants all commit errors to exit with the specified mesg:
2670    warn_fail_commit_memory(addr, size, exec, err);
2671    vm_exit_out_of_memory(size, OOM_MMAP_ERROR, mesg);
2672  }
2673}
2674
2675// Define MAP_HUGETLB here so we can build HotSpot on old systems.
2676#ifndef MAP_HUGETLB
2677  #define MAP_HUGETLB 0x40000
2678#endif
2679
2680// Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
2681#ifndef MADV_HUGEPAGE
2682  #define MADV_HUGEPAGE 14
2683#endif
2684
2685int os::Linux::commit_memory_impl(char* addr, size_t size,
2686                                  size_t alignment_hint, bool exec) {
2687  int err = os::Linux::commit_memory_impl(addr, size, exec);
2688  if (err == 0) {
2689    realign_memory(addr, size, alignment_hint);
2690  }
2691  return err;
2692}
2693
2694bool os::pd_commit_memory(char* addr, size_t size, size_t alignment_hint,
2695                          bool exec) {
2696  return os::Linux::commit_memory_impl(addr, size, alignment_hint, exec) == 0;
2697}
2698
2699void os::pd_commit_memory_or_exit(char* addr, size_t size,
2700                                  size_t alignment_hint, bool exec,
2701                                  const char* mesg) {
2702  assert(mesg != NULL, "mesg must be specified");
2703  int err = os::Linux::commit_memory_impl(addr, size, alignment_hint, exec);
2704  if (err != 0) {
2705    // the caller wants all commit errors to exit with the specified mesg:
2706    warn_fail_commit_memory(addr, size, alignment_hint, exec, err);
2707    vm_exit_out_of_memory(size, OOM_MMAP_ERROR, mesg);
2708  }
2709}
2710
2711void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
2712  if (UseTransparentHugePages && alignment_hint > (size_t)vm_page_size()) {
2713    // We don't check the return value: madvise(MADV_HUGEPAGE) may not
2714    // be supported or the memory may already be backed by huge pages.
2715    ::madvise(addr, bytes, MADV_HUGEPAGE);
2716  }
2717}
2718
2719void os::pd_free_memory(char *addr, size_t bytes, size_t alignment_hint) {
2720  // This method works by doing an mmap over an existing mmaping and effectively discarding
2721  // the existing pages. However it won't work for SHM-based large pages that cannot be
2722  // uncommitted at all. We don't do anything in this case to avoid creating a segment with
2723  // small pages on top of the SHM segment. This method always works for small pages, so we
2724  // allow that in any case.
2725  if (alignment_hint <= (size_t)os::vm_page_size() || can_commit_large_page_memory()) {
2726    commit_memory(addr, bytes, alignment_hint, !ExecMem);
2727  }
2728}
2729
2730void os::numa_make_global(char *addr, size_t bytes) {
2731  Linux::numa_interleave_memory(addr, bytes);
2732}
2733
2734// Define for numa_set_bind_policy(int). Setting the argument to 0 will set the
2735// bind policy to MPOL_PREFERRED for the current thread.
2736#define USE_MPOL_PREFERRED 0
2737
2738void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
2739  // To make NUMA and large pages more robust when both enabled, we need to ease
2740  // the requirements on where the memory should be allocated. MPOL_BIND is the
2741  // default policy and it will force memory to be allocated on the specified
2742  // node. Changing this to MPOL_PREFERRED will prefer to allocate the memory on
2743  // the specified node, but will not force it. Using this policy will prevent
2744  // getting SIGBUS when trying to allocate large pages on NUMA nodes with no
2745  // free large pages.
2746  Linux::numa_set_bind_policy(USE_MPOL_PREFERRED);
2747  Linux::numa_tonode_memory(addr, bytes, lgrp_hint);
2748}
2749
2750bool os::numa_topology_changed() { return false; }
2751
2752size_t os::numa_get_groups_num() {
2753  int max_node = Linux::numa_max_node();
2754  return max_node > 0 ? max_node + 1 : 1;
2755}
2756
2757int os::numa_get_group_id() {
2758  int cpu_id = Linux::sched_getcpu();
2759  if (cpu_id != -1) {
2760    int lgrp_id = Linux::get_node_by_cpu(cpu_id);
2761    if (lgrp_id != -1) {
2762      return lgrp_id;
2763    }
2764  }
2765  return 0;
2766}
2767
2768size_t os::numa_get_leaf_groups(int *ids, size_t size) {
2769  for (size_t i = 0; i < size; i++) {
2770    ids[i] = i;
2771  }
2772  return size;
2773}
2774
2775bool os::get_page_info(char *start, page_info* info) {
2776  return false;
2777}
2778
2779char *os::scan_pages(char *start, char* end, page_info* page_expected,
2780                     page_info* page_found) {
2781  return end;
2782}
2783
2784
2785int os::Linux::sched_getcpu_syscall(void) {
2786  unsigned int cpu;
2787  int retval = -1;
2788
2789#if defined(IA32)
2790  #ifndef SYS_getcpu
2791    #define SYS_getcpu 318
2792  #endif
2793  retval = syscall(SYS_getcpu, &cpu, NULL, NULL);
2794#elif defined(AMD64)
2795// Unfortunately we have to bring all these macros here from vsyscall.h
2796// to be able to compile on old linuxes.
2797  #define __NR_vgetcpu 2
2798  #define VSYSCALL_START (-10UL << 20)
2799  #define VSYSCALL_SIZE 1024
2800  #define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr))
2801  typedef long (*vgetcpu_t)(unsigned int *cpu, unsigned int *node, unsigned long *tcache);
2802  vgetcpu_t vgetcpu = (vgetcpu_t)VSYSCALL_ADDR(__NR_vgetcpu);
2803  retval = vgetcpu(&cpu, NULL, NULL);
2804#endif
2805
2806  return (retval == -1) ? retval : cpu;
2807}
2808
2809// Something to do with the numa-aware allocator needs these symbols
2810extern "C" JNIEXPORT void numa_warn(int number, char *where, ...) { }
2811extern "C" JNIEXPORT void numa_error(char *where) { }
2812extern "C" JNIEXPORT int fork1() { return fork(); }
2813
2814
2815// If we are running with libnuma version > 2, then we should
2816// be trying to use symbols with versions 1.1
2817// If we are running with earlier version, which did not have symbol versions,
2818// we should use the base version.
2819void* os::Linux::libnuma_dlsym(void* handle, const char *name) {
2820  void *f = dlvsym(handle, name, "libnuma_1.1");
2821  if (f == NULL) {
2822    f = dlsym(handle, name);
2823  }
2824  return f;
2825}
2826
2827bool os::Linux::libnuma_init() {
2828  // sched_getcpu() should be in libc.
2829  set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t,
2830                                  dlsym(RTLD_DEFAULT, "sched_getcpu")));
2831
2832  // If it's not, try a direct syscall.
2833  if (sched_getcpu() == -1) {
2834    set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t,
2835                                    (void*)&sched_getcpu_syscall));
2836  }
2837
2838  if (sched_getcpu() != -1) { // Does it work?
2839    void *handle = dlopen("libnuma.so.1", RTLD_LAZY);
2840    if (handle != NULL) {
2841      set_numa_node_to_cpus(CAST_TO_FN_PTR(numa_node_to_cpus_func_t,
2842                                           libnuma_dlsym(handle, "numa_node_to_cpus")));
2843      set_numa_max_node(CAST_TO_FN_PTR(numa_max_node_func_t,
2844                                       libnuma_dlsym(handle, "numa_max_node")));
2845      set_numa_available(CAST_TO_FN_PTR(numa_available_func_t,
2846                                        libnuma_dlsym(handle, "numa_available")));
2847      set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t,
2848                                            libnuma_dlsym(handle, "numa_tonode_memory")));
2849      set_numa_interleave_memory(CAST_TO_FN_PTR(numa_interleave_memory_func_t,
2850                                                libnuma_dlsym(handle, "numa_interleave_memory")));
2851      set_numa_set_bind_policy(CAST_TO_FN_PTR(numa_set_bind_policy_func_t,
2852                                              libnuma_dlsym(handle, "numa_set_bind_policy")));
2853
2854
2855      if (numa_available() != -1) {
2856        set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes"));
2857        // Create a cpu -> node mapping
2858        _cpu_to_node = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int>(0, true);
2859        rebuild_cpu_to_node_map();
2860        return true;
2861      }
2862    }
2863  }
2864  return false;
2865}
2866
2867// rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id.
2868// The table is later used in get_node_by_cpu().
2869void os::Linux::rebuild_cpu_to_node_map() {
2870  const size_t NCPUS = 32768; // Since the buffer size computation is very obscure
2871                              // in libnuma (possible values are starting from 16,
2872                              // and continuing up with every other power of 2, but less
2873                              // than the maximum number of CPUs supported by kernel), and
2874                              // is a subject to change (in libnuma version 2 the requirements
2875                              // are more reasonable) we'll just hardcode the number they use
2876                              // in the library.
2877  const size_t BitsPerCLong = sizeof(long) * CHAR_BIT;
2878
2879  size_t cpu_num = os::active_processor_count();
2880  size_t cpu_map_size = NCPUS / BitsPerCLong;
2881  size_t cpu_map_valid_size =
2882    MIN2((cpu_num + BitsPerCLong - 1) / BitsPerCLong, cpu_map_size);
2883
2884  cpu_to_node()->clear();
2885  cpu_to_node()->at_grow(cpu_num - 1);
2886  size_t node_num = numa_get_groups_num();
2887
2888  unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size, mtInternal);
2889  for (size_t i = 0; i < node_num; i++) {
2890    if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) {
2891      for (size_t j = 0; j < cpu_map_valid_size; j++) {
2892        if (cpu_map[j] != 0) {
2893          for (size_t k = 0; k < BitsPerCLong; k++) {
2894            if (cpu_map[j] & (1UL << k)) {
2895              cpu_to_node()->at_put(j * BitsPerCLong + k, i);
2896            }
2897          }
2898        }
2899      }
2900    }
2901  }
2902  FREE_C_HEAP_ARRAY(unsigned long, cpu_map);
2903}
2904
2905int os::Linux::get_node_by_cpu(int cpu_id) {
2906  if (cpu_to_node() != NULL && cpu_id >= 0 && cpu_id < cpu_to_node()->length()) {
2907    return cpu_to_node()->at(cpu_id);
2908  }
2909  return -1;
2910}
2911
2912GrowableArray<int>* os::Linux::_cpu_to_node;
2913os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu;
2914os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus;
2915os::Linux::numa_max_node_func_t os::Linux::_numa_max_node;
2916os::Linux::numa_available_func_t os::Linux::_numa_available;
2917os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory;
2918os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory;
2919os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
2920unsigned long* os::Linux::_numa_all_nodes;
2921
2922bool os::pd_uncommit_memory(char* addr, size_t size) {
2923  uintptr_t res = (uintptr_t) ::mmap(addr, size, PROT_NONE,
2924                                     MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE|MAP_ANONYMOUS, -1, 0);
2925  return res  != (uintptr_t) MAP_FAILED;
2926}
2927
2928static address get_stack_commited_bottom(address bottom, size_t size) {
2929  address nbot = bottom;
2930  address ntop = bottom + size;
2931
2932  size_t page_sz = os::vm_page_size();
2933  unsigned pages = size / page_sz;
2934
2935  unsigned char vec[1];
2936  unsigned imin = 1, imax = pages + 1, imid;
2937  int mincore_return_value = 0;
2938
2939  assert(imin <= imax, "Unexpected page size");
2940
2941  while (imin < imax) {
2942    imid = (imax + imin) / 2;
2943    nbot = ntop - (imid * page_sz);
2944
2945    // Use a trick with mincore to check whether the page is mapped or not.
2946    // mincore sets vec to 1 if page resides in memory and to 0 if page
2947    // is swapped output but if page we are asking for is unmapped
2948    // it returns -1,ENOMEM
2949    mincore_return_value = mincore(nbot, page_sz, vec);
2950
2951    if (mincore_return_value == -1) {
2952      // Page is not mapped go up
2953      // to find first mapped page
2954      if (errno != EAGAIN) {
2955        assert(errno == ENOMEM, "Unexpected mincore errno");
2956        imax = imid;
2957      }
2958    } else {
2959      // Page is mapped go down
2960      // to find first not mapped page
2961      imin = imid + 1;
2962    }
2963  }
2964
2965  nbot = nbot + page_sz;
2966
2967  // Adjust stack bottom one page up if last checked page is not mapped
2968  if (mincore_return_value == -1) {
2969    nbot = nbot + page_sz;
2970  }
2971
2972  return nbot;
2973}
2974
2975
2976// Linux uses a growable mapping for the stack, and if the mapping for
2977// the stack guard pages is not removed when we detach a thread the
2978// stack cannot grow beyond the pages where the stack guard was
2979// mapped.  If at some point later in the process the stack expands to
2980// that point, the Linux kernel cannot expand the stack any further
2981// because the guard pages are in the way, and a segfault occurs.
2982//
2983// However, it's essential not to split the stack region by unmapping
2984// a region (leaving a hole) that's already part of the stack mapping,
2985// so if the stack mapping has already grown beyond the guard pages at
2986// the time we create them, we have to truncate the stack mapping.
2987// So, we need to know the extent of the stack mapping when
2988// create_stack_guard_pages() is called.
2989
2990// We only need this for stacks that are growable: at the time of
2991// writing thread stacks don't use growable mappings (i.e. those
2992// creeated with MAP_GROWSDOWN), and aren't marked "[stack]", so this
2993// only applies to the main thread.
2994
2995// If the (growable) stack mapping already extends beyond the point
2996// where we're going to put our guard pages, truncate the mapping at
2997// that point by munmap()ping it.  This ensures that when we later
2998// munmap() the guard pages we don't leave a hole in the stack
2999// mapping. This only affects the main/initial thread
3000
3001bool os::pd_create_stack_guard_pages(char* addr, size_t size) {
3002  if (os::Linux::is_initial_thread()) {
3003    // As we manually grow stack up to bottom inside create_attached_thread(),
3004    // it's likely that os::Linux::initial_thread_stack_bottom is mapped and
3005    // we don't need to do anything special.
3006    // Check it first, before calling heavy function.
3007    uintptr_t stack_extent = (uintptr_t) os::Linux::initial_thread_stack_bottom();
3008    unsigned char vec[1];
3009
3010    if (mincore((address)stack_extent, os::vm_page_size(), vec) == -1) {
3011      // Fallback to slow path on all errors, including EAGAIN
3012      stack_extent = (uintptr_t) get_stack_commited_bottom(
3013                                                           os::Linux::initial_thread_stack_bottom(),
3014                                                           (size_t)addr - stack_extent);
3015    }
3016
3017    if (stack_extent < (uintptr_t)addr) {
3018      ::munmap((void*)stack_extent, (uintptr_t)(addr - stack_extent));
3019    }
3020  }
3021
3022  return os::commit_memory(addr, size, !ExecMem);
3023}
3024
3025// If this is a growable mapping, remove the guard pages entirely by
3026// munmap()ping them.  If not, just call uncommit_memory(). This only
3027// affects the main/initial thread, but guard against future OS changes
3028// It's safe to always unmap guard pages for initial thread because we
3029// always place it right after end of the mapped region
3030
3031bool os::remove_stack_guard_pages(char* addr, size_t size) {
3032  uintptr_t stack_extent, stack_base;
3033
3034  if (os::Linux::is_initial_thread()) {
3035    return ::munmap(addr, size) == 0;
3036  }
3037
3038  return os::uncommit_memory(addr, size);
3039}
3040
3041static address _highest_vm_reserved_address = NULL;
3042
3043// If 'fixed' is true, anon_mmap() will attempt to reserve anonymous memory
3044// at 'requested_addr'. If there are existing memory mappings at the same
3045// location, however, they will be overwritten. If 'fixed' is false,
3046// 'requested_addr' is only treated as a hint, the return value may or
3047// may not start from the requested address. Unlike Linux mmap(), this
3048// function returns NULL to indicate failure.
3049static char* anon_mmap(char* requested_addr, size_t bytes, bool fixed) {
3050  char * addr;
3051  int flags;
3052
3053  flags = MAP_PRIVATE | MAP_NORESERVE | MAP_ANONYMOUS;
3054  if (fixed) {
3055    assert((uintptr_t)requested_addr % os::Linux::page_size() == 0, "unaligned address");
3056    flags |= MAP_FIXED;
3057  }
3058
3059  // Map reserved/uncommitted pages PROT_NONE so we fail early if we
3060  // touch an uncommitted page. Otherwise, the read/write might
3061  // succeed if we have enough swap space to back the physical page.
3062  addr = (char*)::mmap(requested_addr, bytes, PROT_NONE,
3063                       flags, -1, 0);
3064
3065  if (addr != MAP_FAILED) {
3066    // anon_mmap() should only get called during VM initialization,
3067    // don't need lock (actually we can skip locking even it can be called
3068    // from multiple threads, because _highest_vm_reserved_address is just a
3069    // hint about the upper limit of non-stack memory regions.)
3070    if ((address)addr + bytes > _highest_vm_reserved_address) {
3071      _highest_vm_reserved_address = (address)addr + bytes;
3072    }
3073  }
3074
3075  return addr == MAP_FAILED ? NULL : addr;
3076}
3077
3078// Don't update _highest_vm_reserved_address, because there might be memory
3079// regions above addr + size. If so, releasing a memory region only creates
3080// a hole in the address space, it doesn't help prevent heap-stack collision.
3081//
3082static int anon_munmap(char * addr, size_t size) {
3083  return ::munmap(addr, size) == 0;
3084}
3085
3086char* os::pd_reserve_memory(size_t bytes, char* requested_addr,
3087                            size_t alignment_hint) {
3088  return anon_mmap(requested_addr, bytes, (requested_addr != NULL));
3089}
3090
3091bool os::pd_release_memory(char* addr, size_t size) {
3092  return anon_munmap(addr, size);
3093}
3094
3095static address highest_vm_reserved_address() {
3096  return _highest_vm_reserved_address;
3097}
3098
3099static bool linux_mprotect(char* addr, size_t size, int prot) {
3100  // Linux wants the mprotect address argument to be page aligned.
3101  char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size());
3102
3103  // According to SUSv3, mprotect() should only be used with mappings
3104  // established by mmap(), and mmap() always maps whole pages. Unaligned
3105  // 'addr' likely indicates problem in the VM (e.g. trying to change
3106  // protection of malloc'ed or statically allocated memory). Check the
3107  // caller if you hit this assert.
3108  assert(addr == bottom, "sanity check");
3109
3110  size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size());
3111  return ::mprotect(bottom, size, prot) == 0;
3112}
3113
3114// Set protections specified
3115bool os::protect_memory(char* addr, size_t bytes, ProtType prot,
3116                        bool is_committed) {
3117  unsigned int p = 0;
3118  switch (prot) {
3119  case MEM_PROT_NONE: p = PROT_NONE; break;
3120  case MEM_PROT_READ: p = PROT_READ; break;
3121  case MEM_PROT_RW:   p = PROT_READ|PROT_WRITE; break;
3122  case MEM_PROT_RWX:  p = PROT_READ|PROT_WRITE|PROT_EXEC; break;
3123  default:
3124    ShouldNotReachHere();
3125  }
3126  // is_committed is unused.
3127  return linux_mprotect(addr, bytes, p);
3128}
3129
3130bool os::guard_memory(char* addr, size_t size) {
3131  return linux_mprotect(addr, size, PROT_NONE);
3132}
3133
3134bool os::unguard_memory(char* addr, size_t size) {
3135  return linux_mprotect(addr, size, PROT_READ|PROT_WRITE);
3136}
3137
3138bool os::Linux::transparent_huge_pages_sanity_check(bool warn,
3139                                                    size_t page_size) {
3140  bool result = false;
3141  void *p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE,
3142                 MAP_ANONYMOUS|MAP_PRIVATE,
3143                 -1, 0);
3144  if (p != MAP_FAILED) {
3145    void *aligned_p = align_ptr_up(p, page_size);
3146
3147    result = madvise(aligned_p, page_size, MADV_HUGEPAGE) == 0;
3148
3149    munmap(p, page_size * 2);
3150  }
3151
3152  if (warn && !result) {
3153    warning("TransparentHugePages is not supported by the operating system.");
3154  }
3155
3156  return result;
3157}
3158
3159bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
3160  bool result = false;
3161  void *p = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
3162                 MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
3163                 -1, 0);
3164
3165  if (p != MAP_FAILED) {
3166    // We don't know if this really is a huge page or not.
3167    FILE *fp = fopen("/proc/self/maps", "r");
3168    if (fp) {
3169      while (!feof(fp)) {
3170        char chars[257];
3171        long x = 0;
3172        if (fgets(chars, sizeof(chars), fp)) {
3173          if (sscanf(chars, "%lx-%*x", &x) == 1
3174              && x == (long)p) {
3175            if (strstr (chars, "hugepage")) {
3176              result = true;
3177              break;
3178            }
3179          }
3180        }
3181      }
3182      fclose(fp);
3183    }
3184    munmap(p, page_size);
3185  }
3186
3187  if (warn && !result) {
3188    warning("HugeTLBFS is not supported by the operating system.");
3189  }
3190
3191  return result;
3192}
3193
3194// Set the coredump_filter bits to include largepages in core dump (bit 6)
3195//
3196// From the coredump_filter documentation:
3197//
3198// - (bit 0) anonymous private memory
3199// - (bit 1) anonymous shared memory
3200// - (bit 2) file-backed private memory
3201// - (bit 3) file-backed shared memory
3202// - (bit 4) ELF header pages in file-backed private memory areas (it is
3203//           effective only if the bit 2 is cleared)
3204// - (bit 5) hugetlb private memory
3205// - (bit 6) hugetlb shared memory
3206//
3207static void set_coredump_filter(void) {
3208  FILE *f;
3209  long cdm;
3210
3211  if ((f = fopen("/proc/self/coredump_filter", "r+")) == NULL) {
3212    return;
3213  }
3214
3215  if (fscanf(f, "%lx", &cdm) != 1) {
3216    fclose(f);
3217    return;
3218  }
3219
3220  rewind(f);
3221
3222  if ((cdm & LARGEPAGES_BIT) == 0) {
3223    cdm |= LARGEPAGES_BIT;
3224    fprintf(f, "%#lx", cdm);
3225  }
3226
3227  fclose(f);
3228}
3229
3230// Large page support
3231
3232static size_t _large_page_size = 0;
3233
3234size_t os::Linux::find_large_page_size() {
3235  size_t large_page_size = 0;
3236
3237  // large_page_size on Linux is used to round up heap size. x86 uses either
3238  // 2M or 4M page, depending on whether PAE (Physical Address Extensions)
3239  // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use
3240  // page as large as 256M.
3241  //
3242  // Here we try to figure out page size by parsing /proc/meminfo and looking
3243  // for a line with the following format:
3244  //    Hugepagesize:     2048 kB
3245  //
3246  // If we can't determine the value (e.g. /proc is not mounted, or the text
3247  // format has been changed), we'll use the largest page size supported by
3248  // the processor.
3249
3250#ifndef ZERO
3251  large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M)
3252                     ARM32_ONLY(2 * M) PPC_ONLY(4 * M) AARCH64_ONLY(2 * M);
3253#endif // ZERO
3254
3255  FILE *fp = fopen("/proc/meminfo", "r");
3256  if (fp) {
3257    while (!feof(fp)) {
3258      int x = 0;
3259      char buf[16];
3260      if (fscanf(fp, "Hugepagesize: %d", &x) == 1) {
3261        if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) {
3262          large_page_size = x * K;
3263          break;
3264        }
3265      } else {
3266        // skip to next line
3267        for (;;) {
3268          int ch = fgetc(fp);
3269          if (ch == EOF || ch == (int)'\n') break;
3270        }
3271      }
3272    }
3273    fclose(fp);
3274  }
3275
3276  if (!FLAG_IS_DEFAULT(LargePageSizeInBytes) && LargePageSizeInBytes != large_page_size) {
3277    warning("Setting LargePageSizeInBytes has no effect on this OS. Large page size is "
3278            SIZE_FORMAT "%s.", byte_size_in_proper_unit(large_page_size),
3279            proper_unit_for_byte_size(large_page_size));
3280  }
3281
3282  return large_page_size;
3283}
3284
3285size_t os::Linux::setup_large_page_size() {
3286  _large_page_size = Linux::find_large_page_size();
3287  const size_t default_page_size = (size_t)Linux::page_size();
3288  if (_large_page_size > default_page_size) {
3289    _page_sizes[0] = _large_page_size;
3290    _page_sizes[1] = default_page_size;
3291    _page_sizes[2] = 0;
3292  }
3293
3294  return _large_page_size;
3295}
3296
3297bool os::Linux::setup_large_page_type(size_t page_size) {
3298  if (FLAG_IS_DEFAULT(UseHugeTLBFS) &&
3299      FLAG_IS_DEFAULT(UseSHM) &&
3300      FLAG_IS_DEFAULT(UseTransparentHugePages)) {
3301
3302    // The type of large pages has not been specified by the user.
3303
3304    // Try UseHugeTLBFS and then UseSHM.
3305    UseHugeTLBFS = UseSHM = true;
3306
3307    // Don't try UseTransparentHugePages since there are known
3308    // performance issues with it turned on. This might change in the future.
3309    UseTransparentHugePages = false;
3310  }
3311
3312  if (UseTransparentHugePages) {
3313    bool warn_on_failure = !FLAG_IS_DEFAULT(UseTransparentHugePages);
3314    if (transparent_huge_pages_sanity_check(warn_on_failure, page_size)) {
3315      UseHugeTLBFS = false;
3316      UseSHM = false;
3317      return true;
3318    }
3319    UseTransparentHugePages = false;
3320  }
3321
3322  if (UseHugeTLBFS) {
3323    bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
3324    if (hugetlbfs_sanity_check(warn_on_failure, page_size)) {
3325      UseSHM = false;
3326      return true;
3327    }
3328    UseHugeTLBFS = false;
3329  }
3330
3331  return UseSHM;
3332}
3333
3334void os::large_page_init() {
3335  if (!UseLargePages &&
3336      !UseTransparentHugePages &&
3337      !UseHugeTLBFS &&
3338      !UseSHM) {
3339    // Not using large pages.
3340    return;
3341  }
3342
3343  if (!FLAG_IS_DEFAULT(UseLargePages) && !UseLargePages) {
3344    // The user explicitly turned off large pages.
3345    // Ignore the rest of the large pages flags.
3346    UseTransparentHugePages = false;
3347    UseHugeTLBFS = false;
3348    UseSHM = false;
3349    return;
3350  }
3351
3352  size_t large_page_size = Linux::setup_large_page_size();
3353  UseLargePages          = Linux::setup_large_page_type(large_page_size);
3354
3355  set_coredump_filter();
3356}
3357
3358#ifndef SHM_HUGETLB
3359  #define SHM_HUGETLB 04000
3360#endif
3361
3362char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment,
3363                                            char* req_addr, bool exec) {
3364  // "exec" is passed in but not used.  Creating the shared image for
3365  // the code cache doesn't have an SHM_X executable permission to check.
3366  assert(UseLargePages && UseSHM, "only for SHM large pages");
3367  assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address");
3368
3369  if (!is_size_aligned(bytes, os::large_page_size()) || alignment > os::large_page_size()) {
3370    return NULL; // Fallback to small pages.
3371  }
3372
3373  key_t key = IPC_PRIVATE;
3374  char *addr;
3375
3376  bool warn_on_failure = UseLargePages &&
3377                        (!FLAG_IS_DEFAULT(UseLargePages) ||
3378                         !FLAG_IS_DEFAULT(UseSHM) ||
3379                         !FLAG_IS_DEFAULT(LargePageSizeInBytes));
3380  char msg[128];
3381
3382  // Create a large shared memory region to attach to based on size.
3383  // Currently, size is the total size of the heap
3384  int shmid = shmget(key, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W);
3385  if (shmid == -1) {
3386    // Possible reasons for shmget failure:
3387    // 1. shmmax is too small for Java heap.
3388    //    > check shmmax value: cat /proc/sys/kernel/shmmax
3389    //    > increase shmmax value: echo "0xffffffff" > /proc/sys/kernel/shmmax
3390    // 2. not enough large page memory.
3391    //    > check available large pages: cat /proc/meminfo
3392    //    > increase amount of large pages:
3393    //          echo new_value > /proc/sys/vm/nr_hugepages
3394    //      Note 1: different Linux may use different name for this property,
3395    //            e.g. on Redhat AS-3 it is "hugetlb_pool".
3396    //      Note 2: it's possible there's enough physical memory available but
3397    //            they are so fragmented after a long run that they can't
3398    //            coalesce into large pages. Try to reserve large pages when
3399    //            the system is still "fresh".
3400    if (warn_on_failure) {
3401      jio_snprintf(msg, sizeof(msg), "Failed to reserve shared memory (errno = %d).", errno);
3402      warning("%s", msg);
3403    }
3404    return NULL;
3405  }
3406
3407  // attach to the region
3408  addr = (char*)shmat(shmid, req_addr, 0);
3409  int err = errno;
3410
3411  // Remove shmid. If shmat() is successful, the actual shared memory segment
3412  // will be deleted when it's detached by shmdt() or when the process
3413  // terminates. If shmat() is not successful this will remove the shared
3414  // segment immediately.
3415  shmctl(shmid, IPC_RMID, NULL);
3416
3417  if ((intptr_t)addr == -1) {
3418    if (warn_on_failure) {
3419      jio_snprintf(msg, sizeof(msg), "Failed to attach shared memory (errno = %d).", err);
3420      warning("%s", msg);
3421    }
3422    return NULL;
3423  }
3424
3425  return addr;
3426}
3427
3428static void warn_on_large_pages_failure(char* req_addr, size_t bytes,
3429                                        int error) {
3430  assert(error == ENOMEM, "Only expect to fail if no memory is available");
3431
3432  bool warn_on_failure = UseLargePages &&
3433      (!FLAG_IS_DEFAULT(UseLargePages) ||
3434       !FLAG_IS_DEFAULT(UseHugeTLBFS) ||
3435       !FLAG_IS_DEFAULT(LargePageSizeInBytes));
3436
3437  if (warn_on_failure) {
3438    char msg[128];
3439    jio_snprintf(msg, sizeof(msg), "Failed to reserve large pages memory req_addr: "
3440                 PTR_FORMAT " bytes: " SIZE_FORMAT " (errno = %d).", req_addr, bytes, error);
3441    warning("%s", msg);
3442  }
3443}
3444
3445char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes,
3446                                                        char* req_addr,
3447                                                        bool exec) {
3448  assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages");
3449  assert(is_size_aligned(bytes, os::large_page_size()), "Unaligned size");
3450  assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address");
3451
3452  int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
3453  char* addr = (char*)::mmap(req_addr, bytes, prot,
3454                             MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB,
3455                             -1, 0);
3456
3457  if (addr == MAP_FAILED) {
3458    warn_on_large_pages_failure(req_addr, bytes, errno);
3459    return NULL;
3460  }
3461
3462  assert(is_ptr_aligned(addr, os::large_page_size()), "Must be");
3463
3464  return addr;
3465}
3466
3467char* os::Linux::reserve_memory_special_huge_tlbfs_mixed(size_t bytes,
3468                                                         size_t alignment,
3469                                                         char* req_addr,
3470                                                         bool exec) {
3471  size_t large_page_size = os::large_page_size();
3472
3473  assert(bytes >= large_page_size, "Shouldn't allocate large pages for small sizes");
3474
3475  // Allocate small pages.
3476
3477  char* start;
3478  if (req_addr != NULL) {
3479    assert(is_ptr_aligned(req_addr, alignment), "Must be");
3480    assert(is_size_aligned(bytes, alignment), "Must be");
3481    start = os::reserve_memory(bytes, req_addr);
3482    assert(start == NULL || start == req_addr, "Must be");
3483  } else {
3484    start = os::reserve_memory_aligned(bytes, alignment);
3485  }
3486
3487  if (start == NULL) {
3488    return NULL;
3489  }
3490
3491  assert(is_ptr_aligned(start, alignment), "Must be");
3492
3493  if (MemTracker::tracking_level() > NMT_minimal) {
3494    // os::reserve_memory_special will record this memory area.
3495    // Need to release it here to prevent overlapping reservations.
3496    Tracker tkr = MemTracker::get_virtual_memory_release_tracker();
3497    tkr.record((address)start, bytes);
3498  }
3499
3500  char* end = start + bytes;
3501
3502  // Find the regions of the allocated chunk that can be promoted to large pages.
3503  char* lp_start = (char*)align_ptr_up(start, large_page_size);
3504  char* lp_end   = (char*)align_ptr_down(end, large_page_size);
3505
3506  size_t lp_bytes = lp_end - lp_start;
3507
3508  assert(is_size_aligned(lp_bytes, large_page_size), "Must be");
3509
3510  if (lp_bytes == 0) {
3511    // The mapped region doesn't even span the start and the end of a large page.
3512    // Fall back to allocate a non-special area.
3513    ::munmap(start, end - start);
3514    return NULL;
3515  }
3516
3517  int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
3518
3519
3520  void* result;
3521
3522  if (start != lp_start) {
3523    result = ::mmap(start, lp_start - start, prot,
3524                    MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED,
3525                    -1, 0);
3526    if (result == MAP_FAILED) {
3527      ::munmap(lp_start, end - lp_start);
3528      return NULL;
3529    }
3530  }
3531
3532  result = ::mmap(lp_start, lp_bytes, prot,
3533                  MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED|MAP_HUGETLB,
3534                  -1, 0);
3535  if (result == MAP_FAILED) {
3536    warn_on_large_pages_failure(req_addr, bytes, errno);
3537    // If the mmap above fails, the large pages region will be unmapped and we
3538    // have regions before and after with small pages. Release these regions.
3539    //
3540    // |  mapped  |  unmapped  |  mapped  |
3541    // ^          ^            ^          ^
3542    // start      lp_start     lp_end     end
3543    //
3544    ::munmap(start, lp_start - start);
3545    ::munmap(lp_end, end - lp_end);
3546    return NULL;
3547  }
3548
3549  if (lp_end != end) {
3550    result = ::mmap(lp_end, end - lp_end, prot,
3551                    MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED,
3552                    -1, 0);
3553    if (result == MAP_FAILED) {
3554      ::munmap(start, lp_end - start);
3555      return NULL;
3556    }
3557  }
3558
3559  return start;
3560}
3561
3562char* os::Linux::reserve_memory_special_huge_tlbfs(size_t bytes,
3563                                                   size_t alignment,
3564                                                   char* req_addr,
3565                                                   bool exec) {
3566  assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages");
3567  assert(is_ptr_aligned(req_addr, alignment), "Must be");
3568  assert(is_power_of_2(alignment), "Must be");
3569  assert(is_power_of_2(os::large_page_size()), "Must be");
3570  assert(bytes >= os::large_page_size(), "Shouldn't allocate large pages for small sizes");
3571
3572  if (is_size_aligned(bytes, os::large_page_size()) && alignment <= os::large_page_size()) {
3573    return reserve_memory_special_huge_tlbfs_only(bytes, req_addr, exec);
3574  } else {
3575    return reserve_memory_special_huge_tlbfs_mixed(bytes, alignment, req_addr, exec);
3576  }
3577}
3578
3579char* os::reserve_memory_special(size_t bytes, size_t alignment,
3580                                 char* req_addr, bool exec) {
3581  assert(UseLargePages, "only for large pages");
3582
3583  char* addr;
3584  if (UseSHM) {
3585    addr = os::Linux::reserve_memory_special_shm(bytes, alignment, req_addr, exec);
3586  } else {
3587    assert(UseHugeTLBFS, "must be");
3588    addr = os::Linux::reserve_memory_special_huge_tlbfs(bytes, alignment, req_addr, exec);
3589  }
3590
3591  if (addr != NULL) {
3592    if (UseNUMAInterleaving) {
3593      numa_make_global(addr, bytes);
3594    }
3595
3596    // The memory is committed
3597    MemTracker::record_virtual_memory_reserve_and_commit((address)addr, bytes, CALLER_PC);
3598  }
3599
3600  return addr;
3601}
3602
3603bool os::Linux::release_memory_special_shm(char* base, size_t bytes) {
3604  // detaching the SHM segment will also delete it, see reserve_memory_special_shm()
3605  return shmdt(base) == 0;
3606}
3607
3608bool os::Linux::release_memory_special_huge_tlbfs(char* base, size_t bytes) {
3609  return pd_release_memory(base, bytes);
3610}
3611
3612bool os::release_memory_special(char* base, size_t bytes) {
3613  bool res;
3614  if (MemTracker::tracking_level() > NMT_minimal) {
3615    Tracker tkr = MemTracker::get_virtual_memory_release_tracker();
3616    res = os::Linux::release_memory_special_impl(base, bytes);
3617    if (res) {
3618      tkr.record((address)base, bytes);
3619    }
3620
3621  } else {
3622    res = os::Linux::release_memory_special_impl(base, bytes);
3623  }
3624  return res;
3625}
3626
3627bool os::Linux::release_memory_special_impl(char* base, size_t bytes) {
3628  assert(UseLargePages, "only for large pages");
3629  bool res;
3630
3631  if (UseSHM) {
3632    res = os::Linux::release_memory_special_shm(base, bytes);
3633  } else {
3634    assert(UseHugeTLBFS, "must be");
3635    res = os::Linux::release_memory_special_huge_tlbfs(base, bytes);
3636  }
3637  return res;
3638}
3639
3640size_t os::large_page_size() {
3641  return _large_page_size;
3642}
3643
3644// With SysV SHM the entire memory region must be allocated as shared
3645// memory.
3646// HugeTLBFS allows application to commit large page memory on demand.
3647// However, when committing memory with HugeTLBFS fails, the region
3648// that was supposed to be committed will lose the old reservation
3649// and allow other threads to steal that memory region. Because of this
3650// behavior we can't commit HugeTLBFS memory.
3651bool os::can_commit_large_page_memory() {
3652  return UseTransparentHugePages;
3653}
3654
3655bool os::can_execute_large_page_memory() {
3656  return UseTransparentHugePages || UseHugeTLBFS;
3657}
3658
3659// Reserve memory at an arbitrary address, only if that area is
3660// available (and not reserved for something else).
3661
3662char* os::pd_attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
3663  const int max_tries = 10;
3664  char* base[max_tries];
3665  size_t size[max_tries];
3666  const size_t gap = 0x000000;
3667
3668  // Assert only that the size is a multiple of the page size, since
3669  // that's all that mmap requires, and since that's all we really know
3670  // about at this low abstraction level.  If we need higher alignment,
3671  // we can either pass an alignment to this method or verify alignment
3672  // in one of the methods further up the call chain.  See bug 5044738.
3673  assert(bytes % os::vm_page_size() == 0, "reserving unexpected size block");
3674
3675  // Repeatedly allocate blocks until the block is allocated at the
3676  // right spot. Give up after max_tries. Note that reserve_memory() will
3677  // automatically update _highest_vm_reserved_address if the call is
3678  // successful. The variable tracks the highest memory address every reserved
3679  // by JVM. It is used to detect heap-stack collision if running with
3680  // fixed-stack LinuxThreads. Because here we may attempt to reserve more
3681  // space than needed, it could confuse the collision detecting code. To
3682  // solve the problem, save current _highest_vm_reserved_address and
3683  // calculate the correct value before return.
3684  address old_highest = _highest_vm_reserved_address;
3685
3686  // Linux mmap allows caller to pass an address as hint; give it a try first,
3687  // if kernel honors the hint then we can return immediately.
3688  char * addr = anon_mmap(requested_addr, bytes, false);
3689  if (addr == requested_addr) {
3690    return requested_addr;
3691  }
3692
3693  if (addr != NULL) {
3694    // mmap() is successful but it fails to reserve at the requested address
3695    anon_munmap(addr, bytes);
3696  }
3697
3698  int i;
3699  for (i = 0; i < max_tries; ++i) {
3700    base[i] = reserve_memory(bytes);
3701
3702    if (base[i] != NULL) {
3703      // Is this the block we wanted?
3704      if (base[i] == requested_addr) {
3705        size[i] = bytes;
3706        break;
3707      }
3708
3709      // Does this overlap the block we wanted? Give back the overlapped
3710      // parts and try again.
3711
3712      ptrdiff_t top_overlap = requested_addr + (bytes + gap) - base[i];
3713      if (top_overlap >= 0 && (size_t)top_overlap < bytes) {
3714        unmap_memory(base[i], top_overlap);
3715        base[i] += top_overlap;
3716        size[i] = bytes - top_overlap;
3717      } else {
3718        ptrdiff_t bottom_overlap = base[i] + bytes - requested_addr;
3719        if (bottom_overlap >= 0 && (size_t)bottom_overlap < bytes) {
3720          unmap_memory(requested_addr, bottom_overlap);
3721          size[i] = bytes - bottom_overlap;
3722        } else {
3723          size[i] = bytes;
3724        }
3725      }
3726    }
3727  }
3728
3729  // Give back the unused reserved pieces.
3730
3731  for (int j = 0; j < i; ++j) {
3732    if (base[j] != NULL) {
3733      unmap_memory(base[j], size[j]);
3734    }
3735  }
3736
3737  if (i < max_tries) {
3738    _highest_vm_reserved_address = MAX2(old_highest, (address)requested_addr + bytes);
3739    return requested_addr;
3740  } else {
3741    _highest_vm_reserved_address = old_highest;
3742    return NULL;
3743  }
3744}
3745
3746size_t os::read(int fd, void *buf, unsigned int nBytes) {
3747  return ::read(fd, buf, nBytes);
3748}
3749
3750size_t os::read_at(int fd, void *buf, unsigned int nBytes, jlong offset) {
3751  return ::pread(fd, buf, nBytes, offset);
3752}
3753
3754// Short sleep, direct OS call.
3755//
3756// Note: certain versions of Linux CFS scheduler (since 2.6.23) do not guarantee
3757// sched_yield(2) will actually give up the CPU:
3758//
3759//   * Alone on this pariticular CPU, keeps running.
3760//   * Before the introduction of "skip_buddy" with "compat_yield" disabled
3761//     (pre 2.6.39).
3762//
3763// So calling this with 0 is an alternative.
3764//
3765void os::naked_short_sleep(jlong ms) {
3766  struct timespec req;
3767
3768  assert(ms < 1000, "Un-interruptable sleep, short time use only");
3769  req.tv_sec = 0;
3770  if (ms > 0) {
3771    req.tv_nsec = (ms % 1000) * 1000000;
3772  } else {
3773    req.tv_nsec = 1;
3774  }
3775
3776  nanosleep(&req, NULL);
3777
3778  return;
3779}
3780
3781// Sleep forever; naked call to OS-specific sleep; use with CAUTION
3782void os::infinite_sleep() {
3783  while (true) {    // sleep forever ...
3784    ::sleep(100);   // ... 100 seconds at a time
3785  }
3786}
3787
3788// Used to convert frequent JVM_Yield() to nops
3789bool os::dont_yield() {
3790  return DontYieldALot;
3791}
3792
3793void os::naked_yield() {
3794  sched_yield();
3795}
3796
3797////////////////////////////////////////////////////////////////////////////////
3798// thread priority support
3799
3800// Note: Normal Linux applications are run with SCHED_OTHER policy. SCHED_OTHER
3801// only supports dynamic priority, static priority must be zero. For real-time
3802// applications, Linux supports SCHED_RR which allows static priority (1-99).
3803// However, for large multi-threaded applications, SCHED_RR is not only slower
3804// than SCHED_OTHER, but also very unstable (my volano tests hang hard 4 out
3805// of 5 runs - Sep 2005).
3806//
3807// The following code actually changes the niceness of kernel-thread/LWP. It
3808// has an assumption that setpriority() only modifies one kernel-thread/LWP,
3809// not the entire user process, and user level threads are 1:1 mapped to kernel
3810// threads. It has always been the case, but could change in the future. For
3811// this reason, the code should not be used as default (ThreadPriorityPolicy=0).
3812// It is only used when ThreadPriorityPolicy=1 and requires root privilege.
3813
3814int os::java_to_os_priority[CriticalPriority + 1] = {
3815  19,              // 0 Entry should never be used
3816
3817   4,              // 1 MinPriority
3818   3,              // 2
3819   2,              // 3
3820
3821   1,              // 4
3822   0,              // 5 NormPriority
3823  -1,              // 6
3824
3825  -2,              // 7
3826  -3,              // 8
3827  -4,              // 9 NearMaxPriority
3828
3829  -5,              // 10 MaxPriority
3830
3831  -5               // 11 CriticalPriority
3832};
3833
3834static int prio_init() {
3835  if (ThreadPriorityPolicy == 1) {
3836    // Only root can raise thread priority. Don't allow ThreadPriorityPolicy=1
3837    // if effective uid is not root. Perhaps, a more elegant way of doing
3838    // this is to test CAP_SYS_NICE capability, but that will require libcap.so
3839    if (geteuid() != 0) {
3840      if (!FLAG_IS_DEFAULT(ThreadPriorityPolicy)) {
3841        warning("-XX:ThreadPriorityPolicy requires root privilege on Linux");
3842      }
3843      ThreadPriorityPolicy = 0;
3844    }
3845  }
3846  if (UseCriticalJavaThreadPriority) {
3847    os::java_to_os_priority[MaxPriority] = os::java_to_os_priority[CriticalPriority];
3848  }
3849  return 0;
3850}
3851
3852OSReturn os::set_native_priority(Thread* thread, int newpri) {
3853  if (!UseThreadPriorities || ThreadPriorityPolicy == 0) return OS_OK;
3854
3855  int ret = setpriority(PRIO_PROCESS, thread->osthread()->thread_id(), newpri);
3856  return (ret == 0) ? OS_OK : OS_ERR;
3857}
3858
3859OSReturn os::get_native_priority(const Thread* const thread,
3860                                 int *priority_ptr) {
3861  if (!UseThreadPriorities || ThreadPriorityPolicy == 0) {
3862    *priority_ptr = java_to_os_priority[NormPriority];
3863    return OS_OK;
3864  }
3865
3866  errno = 0;
3867  *priority_ptr = getpriority(PRIO_PROCESS, thread->osthread()->thread_id());
3868  return (*priority_ptr != -1 || errno == 0 ? OS_OK : OS_ERR);
3869}
3870
3871// Hint to the underlying OS that a task switch would not be good.
3872// Void return because it's a hint and can fail.
3873void os::hint_no_preempt() {}
3874
3875////////////////////////////////////////////////////////////////////////////////
3876// suspend/resume support
3877
3878//  the low-level signal-based suspend/resume support is a remnant from the
3879//  old VM-suspension that used to be for java-suspension, safepoints etc,
3880//  within hotspot. Now there is a single use-case for this:
3881//    - calling get_thread_pc() on the VMThread by the flat-profiler task
3882//      that runs in the watcher thread.
3883//  The remaining code is greatly simplified from the more general suspension
3884//  code that used to be used.
3885//
3886//  The protocol is quite simple:
3887//  - suspend:
3888//      - sends a signal to the target thread
3889//      - polls the suspend state of the osthread using a yield loop
3890//      - target thread signal handler (SR_handler) sets suspend state
3891//        and blocks in sigsuspend until continued
3892//  - resume:
3893//      - sets target osthread state to continue
3894//      - sends signal to end the sigsuspend loop in the SR_handler
3895//
3896//  Note that the SR_lock plays no role in this suspend/resume protocol.
3897
3898static void resume_clear_context(OSThread *osthread) {
3899  osthread->set_ucontext(NULL);
3900  osthread->set_siginfo(NULL);
3901}
3902
3903static void suspend_save_context(OSThread *osthread, siginfo_t* siginfo,
3904                                 ucontext_t* context) {
3905  osthread->set_ucontext(context);
3906  osthread->set_siginfo(siginfo);
3907}
3908
3909// Handler function invoked when a thread's execution is suspended or
3910// resumed. We have to be careful that only async-safe functions are
3911// called here (Note: most pthread functions are not async safe and
3912// should be avoided.)
3913//
3914// Note: sigwait() is a more natural fit than sigsuspend() from an
3915// interface point of view, but sigwait() prevents the signal hander
3916// from being run. libpthread would get very confused by not having
3917// its signal handlers run and prevents sigwait()'s use with the
3918// mutex granting granting signal.
3919//
3920// Currently only ever called on the VMThread and JavaThreads (PC sampling)
3921//
3922static void SR_handler(int sig, siginfo_t* siginfo, ucontext_t* context) {
3923  // Save and restore errno to avoid confusing native code with EINTR
3924  // after sigsuspend.
3925  int old_errno = errno;
3926
3927  Thread* thread = Thread::current();
3928  OSThread* osthread = thread->osthread();
3929  assert(thread->is_VM_thread() || thread->is_Java_thread(), "Must be VMThread or JavaThread");
3930
3931  os::SuspendResume::State current = osthread->sr.state();
3932  if (current == os::SuspendResume::SR_SUSPEND_REQUEST) {
3933    suspend_save_context(osthread, siginfo, context);
3934
3935    // attempt to switch the state, we assume we had a SUSPEND_REQUEST
3936    os::SuspendResume::State state = osthread->sr.suspended();
3937    if (state == os::SuspendResume::SR_SUSPENDED) {
3938      sigset_t suspend_set;  // signals for sigsuspend()
3939
3940      // get current set of blocked signals and unblock resume signal
3941      pthread_sigmask(SIG_BLOCK, NULL, &suspend_set);
3942      sigdelset(&suspend_set, SR_signum);
3943
3944      sr_semaphore.signal();
3945      // wait here until we are resumed
3946      while (1) {
3947        sigsuspend(&suspend_set);
3948
3949        os::SuspendResume::State result = osthread->sr.running();
3950        if (result == os::SuspendResume::SR_RUNNING) {
3951          sr_semaphore.signal();
3952          break;
3953        }
3954      }
3955
3956    } else if (state == os::SuspendResume::SR_RUNNING) {
3957      // request was cancelled, continue
3958    } else {
3959      ShouldNotReachHere();
3960    }
3961
3962    resume_clear_context(osthread);
3963  } else if (current == os::SuspendResume::SR_RUNNING) {
3964    // request was cancelled, continue
3965  } else if (current == os::SuspendResume::SR_WAKEUP_REQUEST) {
3966    // ignore
3967  } else {
3968    // ignore
3969  }
3970
3971  errno = old_errno;
3972}
3973
3974
3975static int SR_initialize() {
3976  struct sigaction act;
3977  char *s;
3978  // Get signal number to use for suspend/resume
3979  if ((s = ::getenv("_JAVA_SR_SIGNUM")) != 0) {
3980    int sig = ::strtol(s, 0, 10);
3981    if (sig > 0 || sig < _NSIG) {
3982      SR_signum = sig;
3983    }
3984  }
3985
3986  assert(SR_signum > SIGSEGV && SR_signum > SIGBUS,
3987         "SR_signum must be greater than max(SIGSEGV, SIGBUS), see 4355769");
3988
3989  sigemptyset(&SR_sigset);
3990  sigaddset(&SR_sigset, SR_signum);
3991
3992  // Set up signal handler for suspend/resume
3993  act.sa_flags = SA_RESTART|SA_SIGINFO;
3994  act.sa_handler = (void (*)(int)) SR_handler;
3995
3996  // SR_signum is blocked by default.
3997  // 4528190 - We also need to block pthread restart signal (32 on all
3998  // supported Linux platforms). Note that LinuxThreads need to block
3999  // this signal for all threads to work properly. So we don't have
4000  // to use hard-coded signal number when setting up the mask.
4001  pthread_sigmask(SIG_BLOCK, NULL, &act.sa_mask);
4002
4003  if (sigaction(SR_signum, &act, 0) == -1) {
4004    return -1;
4005  }
4006
4007  // Save signal flag
4008  os::Linux::set_our_sigflags(SR_signum, act.sa_flags);
4009  return 0;
4010}
4011
4012static int sr_notify(OSThread* osthread) {
4013  int status = pthread_kill(osthread->pthread_id(), SR_signum);
4014  assert_status(status == 0, status, "pthread_kill");
4015  return status;
4016}
4017
4018// "Randomly" selected value for how long we want to spin
4019// before bailing out on suspending a thread, also how often
4020// we send a signal to a thread we want to resume
4021static const int RANDOMLY_LARGE_INTEGER = 1000000;
4022static const int RANDOMLY_LARGE_INTEGER2 = 100;
4023
4024// returns true on success and false on error - really an error is fatal
4025// but this seems the normal response to library errors
4026static bool do_suspend(OSThread* osthread) {
4027  assert(osthread->sr.is_running(), "thread should be running");
4028  assert(!sr_semaphore.trywait(), "semaphore has invalid state");
4029
4030  // mark as suspended and send signal
4031  if (osthread->sr.request_suspend() != os::SuspendResume::SR_SUSPEND_REQUEST) {
4032    // failed to switch, state wasn't running?
4033    ShouldNotReachHere();
4034    return false;
4035  }
4036
4037  if (sr_notify(osthread) != 0) {
4038    ShouldNotReachHere();
4039  }
4040
4041  // managed to send the signal and switch to SUSPEND_REQUEST, now wait for SUSPENDED
4042  while (true) {
4043    if (sr_semaphore.timedwait(0, 2 * NANOSECS_PER_MILLISEC)) {
4044      break;
4045    } else {
4046      // timeout
4047      os::SuspendResume::State cancelled = osthread->sr.cancel_suspend();
4048      if (cancelled == os::SuspendResume::SR_RUNNING) {
4049        return false;
4050      } else if (cancelled == os::SuspendResume::SR_SUSPENDED) {
4051        // make sure that we consume the signal on the semaphore as well
4052        sr_semaphore.wait();
4053        break;
4054      } else {
4055        ShouldNotReachHere();
4056        return false;
4057      }
4058    }
4059  }
4060
4061  guarantee(osthread->sr.is_suspended(), "Must be suspended");
4062  return true;
4063}
4064
4065static void do_resume(OSThread* osthread) {
4066  assert(osthread->sr.is_suspended(), "thread should be suspended");
4067  assert(!sr_semaphore.trywait(), "invalid semaphore state");
4068
4069  if (osthread->sr.request_wakeup() != os::SuspendResume::SR_WAKEUP_REQUEST) {
4070    // failed to switch to WAKEUP_REQUEST
4071    ShouldNotReachHere();
4072    return;
4073  }
4074
4075  while (true) {
4076    if (sr_notify(osthread) == 0) {
4077      if (sr_semaphore.timedwait(0, 2 * NANOSECS_PER_MILLISEC)) {
4078        if (osthread->sr.is_running()) {
4079          return;
4080        }
4081      }
4082    } else {
4083      ShouldNotReachHere();
4084    }
4085  }
4086
4087  guarantee(osthread->sr.is_running(), "Must be running!");
4088}
4089
4090///////////////////////////////////////////////////////////////////////////////////
4091// signal handling (except suspend/resume)
4092
4093// This routine may be used by user applications as a "hook" to catch signals.
4094// The user-defined signal handler must pass unrecognized signals to this
4095// routine, and if it returns true (non-zero), then the signal handler must
4096// return immediately.  If the flag "abort_if_unrecognized" is true, then this
4097// routine will never retun false (zero), but instead will execute a VM panic
4098// routine kill the process.
4099//
4100// If this routine returns false, it is OK to call it again.  This allows
4101// the user-defined signal handler to perform checks either before or after
4102// the VM performs its own checks.  Naturally, the user code would be making
4103// a serious error if it tried to handle an exception (such as a null check
4104// or breakpoint) that the VM was generating for its own correct operation.
4105//
4106// This routine may recognize any of the following kinds of signals:
4107//    SIGBUS, SIGSEGV, SIGILL, SIGFPE, SIGQUIT, SIGPIPE, SIGXFSZ, SIGUSR1.
4108// It should be consulted by handlers for any of those signals.
4109//
4110// The caller of this routine must pass in the three arguments supplied
4111// to the function referred to in the "sa_sigaction" (not the "sa_handler")
4112// field of the structure passed to sigaction().  This routine assumes that
4113// the sa_flags field passed to sigaction() includes SA_SIGINFO and SA_RESTART.
4114//
4115// Note that the VM will print warnings if it detects conflicting signal
4116// handlers, unless invoked with the option "-XX:+AllowUserSignalHandlers".
4117//
4118extern "C" JNIEXPORT int JVM_handle_linux_signal(int signo,
4119                                                 siginfo_t* siginfo,
4120                                                 void* ucontext,
4121                                                 int abort_if_unrecognized);
4122
4123void signalHandler(int sig, siginfo_t* info, void* uc) {
4124  assert(info != NULL && uc != NULL, "it must be old kernel");
4125  int orig_errno = errno;  // Preserve errno value over signal handler.
4126  JVM_handle_linux_signal(sig, info, uc, true);
4127  errno = orig_errno;
4128}
4129
4130
4131// This boolean allows users to forward their own non-matching signals
4132// to JVM_handle_linux_signal, harmlessly.
4133bool os::Linux::signal_handlers_are_installed = false;
4134
4135// For signal-chaining
4136struct sigaction os::Linux::sigact[MAXSIGNUM];
4137unsigned int os::Linux::sigs = 0;
4138bool os::Linux::libjsig_is_loaded = false;
4139typedef struct sigaction *(*get_signal_t)(int);
4140get_signal_t os::Linux::get_signal_action = NULL;
4141
4142struct sigaction* os::Linux::get_chained_signal_action(int sig) {
4143  struct sigaction *actp = NULL;
4144
4145  if (libjsig_is_loaded) {
4146    // Retrieve the old signal handler from libjsig
4147    actp = (*get_signal_action)(sig);
4148  }
4149  if (actp == NULL) {
4150    // Retrieve the preinstalled signal handler from jvm
4151    actp = get_preinstalled_handler(sig);
4152  }
4153
4154  return actp;
4155}
4156
4157static bool call_chained_handler(struct sigaction *actp, int sig,
4158                                 siginfo_t *siginfo, void *context) {
4159  // Call the old signal handler
4160  if (actp->sa_handler == SIG_DFL) {
4161    // It's more reasonable to let jvm treat it as an unexpected exception
4162    // instead of taking the default action.
4163    return false;
4164  } else if (actp->sa_handler != SIG_IGN) {
4165    if ((actp->sa_flags & SA_NODEFER) == 0) {
4166      // automaticlly block the signal
4167      sigaddset(&(actp->sa_mask), sig);
4168    }
4169
4170    sa_handler_t hand;
4171    sa_sigaction_t sa;
4172    bool siginfo_flag_set = (actp->sa_flags & SA_SIGINFO) != 0;
4173    // retrieve the chained handler
4174    if (siginfo_flag_set) {
4175      sa = actp->sa_sigaction;
4176    } else {
4177      hand = actp->sa_handler;
4178    }
4179
4180    if ((actp->sa_flags & SA_RESETHAND) != 0) {
4181      actp->sa_handler = SIG_DFL;
4182    }
4183
4184    // try to honor the signal mask
4185    sigset_t oset;
4186    pthread_sigmask(SIG_SETMASK, &(actp->sa_mask), &oset);
4187
4188    // call into the chained handler
4189    if (siginfo_flag_set) {
4190      (*sa)(sig, siginfo, context);
4191    } else {
4192      (*hand)(sig);
4193    }
4194
4195    // restore the signal mask
4196    pthread_sigmask(SIG_SETMASK, &oset, 0);
4197  }
4198  // Tell jvm's signal handler the signal is taken care of.
4199  return true;
4200}
4201
4202bool os::Linux::chained_handler(int sig, siginfo_t* siginfo, void* context) {
4203  bool chained = false;
4204  // signal-chaining
4205  if (UseSignalChaining) {
4206    struct sigaction *actp = get_chained_signal_action(sig);
4207    if (actp != NULL) {
4208      chained = call_chained_handler(actp, sig, siginfo, context);
4209    }
4210  }
4211  return chained;
4212}
4213
4214struct sigaction* os::Linux::get_preinstalled_handler(int sig) {
4215  if ((((unsigned int)1 << sig) & sigs) != 0) {
4216    return &sigact[sig];
4217  }
4218  return NULL;
4219}
4220
4221void os::Linux::save_preinstalled_handler(int sig, struct sigaction& oldAct) {
4222  assert(sig > 0 && sig < MAXSIGNUM, "vm signal out of expected range");
4223  sigact[sig] = oldAct;
4224  sigs |= (unsigned int)1 << sig;
4225}
4226
4227// for diagnostic
4228int os::Linux::sigflags[MAXSIGNUM];
4229
4230int os::Linux::get_our_sigflags(int sig) {
4231  assert(sig > 0 && sig < MAXSIGNUM, "vm signal out of expected range");
4232  return sigflags[sig];
4233}
4234
4235void os::Linux::set_our_sigflags(int sig, int flags) {
4236  assert(sig > 0 && sig < MAXSIGNUM, "vm signal out of expected range");
4237  sigflags[sig] = flags;
4238}
4239
4240void os::Linux::set_signal_handler(int sig, bool set_installed) {
4241  // Check for overwrite.
4242  struct sigaction oldAct;
4243  sigaction(sig, (struct sigaction*)NULL, &oldAct);
4244
4245  void* oldhand = oldAct.sa_sigaction
4246                ? CAST_FROM_FN_PTR(void*,  oldAct.sa_sigaction)
4247                : CAST_FROM_FN_PTR(void*,  oldAct.sa_handler);
4248  if (oldhand != CAST_FROM_FN_PTR(void*, SIG_DFL) &&
4249      oldhand != CAST_FROM_FN_PTR(void*, SIG_IGN) &&
4250      oldhand != CAST_FROM_FN_PTR(void*, (sa_sigaction_t)signalHandler)) {
4251    if (AllowUserSignalHandlers || !set_installed) {
4252      // Do not overwrite; user takes responsibility to forward to us.
4253      return;
4254    } else if (UseSignalChaining) {
4255      // save the old handler in jvm
4256      save_preinstalled_handler(sig, oldAct);
4257      // libjsig also interposes the sigaction() call below and saves the
4258      // old sigaction on it own.
4259    } else {
4260      fatal(err_msg("Encountered unexpected pre-existing sigaction handler "
4261                    "%#lx for signal %d.", (long)oldhand, sig));
4262    }
4263  }
4264
4265  struct sigaction sigAct;
4266  sigfillset(&(sigAct.sa_mask));
4267  sigAct.sa_handler = SIG_DFL;
4268  if (!set_installed) {
4269    sigAct.sa_flags = SA_SIGINFO|SA_RESTART;
4270  } else {
4271    sigAct.sa_sigaction = signalHandler;
4272    sigAct.sa_flags = SA_SIGINFO|SA_RESTART;
4273  }
4274  // Save flags, which are set by ours
4275  assert(sig > 0 && sig < MAXSIGNUM, "vm signal out of expected range");
4276  sigflags[sig] = sigAct.sa_flags;
4277
4278  int ret = sigaction(sig, &sigAct, &oldAct);
4279  assert(ret == 0, "check");
4280
4281  void* oldhand2  = oldAct.sa_sigaction
4282                  ? CAST_FROM_FN_PTR(void*, oldAct.sa_sigaction)
4283                  : CAST_FROM_FN_PTR(void*, oldAct.sa_handler);
4284  assert(oldhand2 == oldhand, "no concurrent signal handler installation");
4285}
4286
4287// install signal handlers for signals that HotSpot needs to
4288// handle in order to support Java-level exception handling.
4289
4290void os::Linux::install_signal_handlers() {
4291  if (!signal_handlers_are_installed) {
4292    signal_handlers_are_installed = true;
4293
4294    // signal-chaining
4295    typedef void (*signal_setting_t)();
4296    signal_setting_t begin_signal_setting = NULL;
4297    signal_setting_t end_signal_setting = NULL;
4298    begin_signal_setting = CAST_TO_FN_PTR(signal_setting_t,
4299                                          dlsym(RTLD_DEFAULT, "JVM_begin_signal_setting"));
4300    if (begin_signal_setting != NULL) {
4301      end_signal_setting = CAST_TO_FN_PTR(signal_setting_t,
4302                                          dlsym(RTLD_DEFAULT, "JVM_end_signal_setting"));
4303      get_signal_action = CAST_TO_FN_PTR(get_signal_t,
4304                                         dlsym(RTLD_DEFAULT, "JVM_get_signal_action"));
4305      libjsig_is_loaded = true;
4306      assert(UseSignalChaining, "should enable signal-chaining");
4307    }
4308    if (libjsig_is_loaded) {
4309      // Tell libjsig jvm is setting signal handlers
4310      (*begin_signal_setting)();
4311    }
4312
4313    set_signal_handler(SIGSEGV, true);
4314    set_signal_handler(SIGPIPE, true);
4315    set_signal_handler(SIGBUS, true);
4316    set_signal_handler(SIGILL, true);
4317    set_signal_handler(SIGFPE, true);
4318#if defined(PPC64)
4319    set_signal_handler(SIGTRAP, true);
4320#endif
4321    set_signal_handler(SIGXFSZ, true);
4322
4323    if (libjsig_is_loaded) {
4324      // Tell libjsig jvm finishes setting signal handlers
4325      (*end_signal_setting)();
4326    }
4327
4328    // We don't activate signal checker if libjsig is in place, we trust ourselves
4329    // and if UserSignalHandler is installed all bets are off.
4330    // Log that signal checking is off only if -verbose:jni is specified.
4331    if (CheckJNICalls) {
4332      if (libjsig_is_loaded) {
4333        if (PrintJNIResolving) {
4334          tty->print_cr("Info: libjsig is activated, all active signal checking is disabled");
4335        }
4336        check_signals = false;
4337      }
4338      if (AllowUserSignalHandlers) {
4339        if (PrintJNIResolving) {
4340          tty->print_cr("Info: AllowUserSignalHandlers is activated, all active signal checking is disabled");
4341        }
4342        check_signals = false;
4343      }
4344    }
4345  }
4346}
4347
4348// This is the fastest way to get thread cpu time on Linux.
4349// Returns cpu time (user+sys) for any thread, not only for current.
4350// POSIX compliant clocks are implemented in the kernels 2.6.16+.
4351// It might work on 2.6.10+ with a special kernel/glibc patch.
4352// For reference, please, see IEEE Std 1003.1-2004:
4353//   http://www.unix.org/single_unix_specification
4354
4355jlong os::Linux::fast_thread_cpu_time(clockid_t clockid) {
4356  struct timespec tp;
4357  int rc = os::Linux::clock_gettime(clockid, &tp);
4358  assert(rc == 0, "clock_gettime is expected to return 0 code");
4359
4360  return (tp.tv_sec * NANOSECS_PER_SEC) + tp.tv_nsec;
4361}
4362
4363/////
4364// glibc on Linux platform uses non-documented flag
4365// to indicate, that some special sort of signal
4366// trampoline is used.
4367// We will never set this flag, and we should
4368// ignore this flag in our diagnostic
4369#ifdef SIGNIFICANT_SIGNAL_MASK
4370  #undef SIGNIFICANT_SIGNAL_MASK
4371#endif
4372#define SIGNIFICANT_SIGNAL_MASK (~0x04000000)
4373
4374static const char* get_signal_handler_name(address handler,
4375                                           char* buf, int buflen) {
4376  int offset;
4377  bool found = os::dll_address_to_library_name(handler, buf, buflen, &offset);
4378  if (found) {
4379    // skip directory names
4380    const char *p1, *p2;
4381    p1 = buf;
4382    size_t len = strlen(os::file_separator());
4383    while ((p2 = strstr(p1, os::file_separator())) != NULL) p1 = p2 + len;
4384    jio_snprintf(buf, buflen, "%s+0x%x", p1, offset);
4385  } else {
4386    jio_snprintf(buf, buflen, PTR_FORMAT, handler);
4387  }
4388  return buf;
4389}
4390
4391static void print_signal_handler(outputStream* st, int sig,
4392                                 char* buf, size_t buflen) {
4393  struct sigaction sa;
4394
4395  sigaction(sig, NULL, &sa);
4396
4397  // See comment for SIGNIFICANT_SIGNAL_MASK define
4398  sa.sa_flags &= SIGNIFICANT_SIGNAL_MASK;
4399
4400  st->print("%s: ", os::exception_name(sig, buf, buflen));
4401
4402  address handler = (sa.sa_flags & SA_SIGINFO)
4403    ? CAST_FROM_FN_PTR(address, sa.sa_sigaction)
4404    : CAST_FROM_FN_PTR(address, sa.sa_handler);
4405
4406  if (handler == CAST_FROM_FN_PTR(address, SIG_DFL)) {
4407    st->print("SIG_DFL");
4408  } else if (handler == CAST_FROM_FN_PTR(address, SIG_IGN)) {
4409    st->print("SIG_IGN");
4410  } else {
4411    st->print("[%s]", get_signal_handler_name(handler, buf, buflen));
4412  }
4413
4414  st->print(", sa_mask[0]=");
4415  os::Posix::print_signal_set_short(st, &sa.sa_mask);
4416
4417  address rh = VMError::get_resetted_sighandler(sig);
4418  // May be, handler was resetted by VMError?
4419  if (rh != NULL) {
4420    handler = rh;
4421    sa.sa_flags = VMError::get_resetted_sigflags(sig) & SIGNIFICANT_SIGNAL_MASK;
4422  }
4423
4424  st->print(", sa_flags=");
4425  os::Posix::print_sa_flags(st, sa.sa_flags);
4426
4427  // Check: is it our handler?
4428  if (handler == CAST_FROM_FN_PTR(address, (sa_sigaction_t)signalHandler) ||
4429      handler == CAST_FROM_FN_PTR(address, (sa_sigaction_t)SR_handler)) {
4430    // It is our signal handler
4431    // check for flags, reset system-used one!
4432    if ((int)sa.sa_flags != os::Linux::get_our_sigflags(sig)) {
4433      st->print(
4434                ", flags was changed from " PTR32_FORMAT ", consider using jsig library",
4435                os::Linux::get_our_sigflags(sig));
4436    }
4437  }
4438  st->cr();
4439}
4440
4441
4442#define DO_SIGNAL_CHECK(sig)                      \
4443  do {                                            \
4444    if (!sigismember(&check_signal_done, sig)) {  \
4445      os::Linux::check_signal_handler(sig);       \
4446    }                                             \
4447  } while (0)
4448
4449// This method is a periodic task to check for misbehaving JNI applications
4450// under CheckJNI, we can add any periodic checks here
4451
4452void os::run_periodic_checks() {
4453  if (check_signals == false) return;
4454
4455  // SEGV and BUS if overridden could potentially prevent
4456  // generation of hs*.log in the event of a crash, debugging
4457  // such a case can be very challenging, so we absolutely
4458  // check the following for a good measure:
4459  DO_SIGNAL_CHECK(SIGSEGV);
4460  DO_SIGNAL_CHECK(SIGILL);
4461  DO_SIGNAL_CHECK(SIGFPE);
4462  DO_SIGNAL_CHECK(SIGBUS);
4463  DO_SIGNAL_CHECK(SIGPIPE);
4464  DO_SIGNAL_CHECK(SIGXFSZ);
4465#if defined(PPC64)
4466  DO_SIGNAL_CHECK(SIGTRAP);
4467#endif
4468
4469  // ReduceSignalUsage allows the user to override these handlers
4470  // see comments at the very top and jvm_solaris.h
4471  if (!ReduceSignalUsage) {
4472    DO_SIGNAL_CHECK(SHUTDOWN1_SIGNAL);
4473    DO_SIGNAL_CHECK(SHUTDOWN2_SIGNAL);
4474    DO_SIGNAL_CHECK(SHUTDOWN3_SIGNAL);
4475    DO_SIGNAL_CHECK(BREAK_SIGNAL);
4476  }
4477
4478  DO_SIGNAL_CHECK(SR_signum);
4479  DO_SIGNAL_CHECK(INTERRUPT_SIGNAL);
4480}
4481
4482typedef int (*os_sigaction_t)(int, const struct sigaction *, struct sigaction *);
4483
4484static os_sigaction_t os_sigaction = NULL;
4485
4486void os::Linux::check_signal_handler(int sig) {
4487  char buf[O_BUFLEN];
4488  address jvmHandler = NULL;
4489
4490
4491  struct sigaction act;
4492  if (os_sigaction == NULL) {
4493    // only trust the default sigaction, in case it has been interposed
4494    os_sigaction = (os_sigaction_t)dlsym(RTLD_DEFAULT, "sigaction");
4495    if (os_sigaction == NULL) return;
4496  }
4497
4498  os_sigaction(sig, (struct sigaction*)NULL, &act);
4499
4500
4501  act.sa_flags &= SIGNIFICANT_SIGNAL_MASK;
4502
4503  address thisHandler = (act.sa_flags & SA_SIGINFO)
4504    ? CAST_FROM_FN_PTR(address, act.sa_sigaction)
4505    : CAST_FROM_FN_PTR(address, act.sa_handler);
4506
4507
4508  switch (sig) {
4509  case SIGSEGV:
4510  case SIGBUS:
4511  case SIGFPE:
4512  case SIGPIPE:
4513  case SIGILL:
4514  case SIGXFSZ:
4515    jvmHandler = CAST_FROM_FN_PTR(address, (sa_sigaction_t)signalHandler);
4516    break;
4517
4518  case SHUTDOWN1_SIGNAL:
4519  case SHUTDOWN2_SIGNAL:
4520  case SHUTDOWN3_SIGNAL:
4521  case BREAK_SIGNAL:
4522    jvmHandler = (address)user_handler();
4523    break;
4524
4525  case INTERRUPT_SIGNAL:
4526    jvmHandler = CAST_FROM_FN_PTR(address, SIG_DFL);
4527    break;
4528
4529  default:
4530    if (sig == SR_signum) {
4531      jvmHandler = CAST_FROM_FN_PTR(address, (sa_sigaction_t)SR_handler);
4532    } else {
4533      return;
4534    }
4535    break;
4536  }
4537
4538  if (thisHandler != jvmHandler) {
4539    tty->print("Warning: %s handler ", exception_name(sig, buf, O_BUFLEN));
4540    tty->print("expected:%s", get_signal_handler_name(jvmHandler, buf, O_BUFLEN));
4541    tty->print_cr("  found:%s", get_signal_handler_name(thisHandler, buf, O_BUFLEN));
4542    // No need to check this sig any longer
4543    sigaddset(&check_signal_done, sig);
4544    // Running under non-interactive shell, SHUTDOWN2_SIGNAL will be reassigned SIG_IGN
4545    if (sig == SHUTDOWN2_SIGNAL && !isatty(fileno(stdin))) {
4546      tty->print_cr("Running in non-interactive shell, %s handler is replaced by shell",
4547                    exception_name(sig, buf, O_BUFLEN));
4548    }
4549  } else if(os::Linux::get_our_sigflags(sig) != 0 && (int)act.sa_flags != os::Linux::get_our_sigflags(sig)) {
4550    tty->print("Warning: %s handler flags ", exception_name(sig, buf, O_BUFLEN));
4551    tty->print("expected:" PTR32_FORMAT, os::Linux::get_our_sigflags(sig));
4552    tty->print_cr("  found:" PTR32_FORMAT, act.sa_flags);
4553    // No need to check this sig any longer
4554    sigaddset(&check_signal_done, sig);
4555  }
4556
4557  // Dump all the signal
4558  if (sigismember(&check_signal_done, sig)) {
4559    print_signal_handlers(tty, buf, O_BUFLEN);
4560  }
4561}
4562
4563extern void report_error(char* file_name, int line_no, char* title,
4564                         char* format, ...);
4565
4566extern bool signal_name(int signo, char* buf, size_t len);
4567
4568const char* os::exception_name(int exception_code, char* buf, size_t size) {
4569  if (0 < exception_code && exception_code <= SIGRTMAX) {
4570    // signal
4571    if (!signal_name(exception_code, buf, size)) {
4572      jio_snprintf(buf, size, "SIG%d", exception_code);
4573    }
4574    return buf;
4575  } else {
4576    return NULL;
4577  }
4578}
4579
4580// this is called _before_ the most of global arguments have been parsed
4581void os::init(void) {
4582  char dummy;   // used to get a guess on initial stack address
4583//  first_hrtime = gethrtime();
4584
4585  // With LinuxThreads the JavaMain thread pid (primordial thread)
4586  // is different than the pid of the java launcher thread.
4587  // So, on Linux, the launcher thread pid is passed to the VM
4588  // via the sun.java.launcher.pid property.
4589  // Use this property instead of getpid() if it was correctly passed.
4590  // See bug 6351349.
4591  pid_t java_launcher_pid = (pid_t) Arguments::sun_java_launcher_pid();
4592
4593  _initial_pid = (java_launcher_pid > 0) ? java_launcher_pid : getpid();
4594
4595  clock_tics_per_sec = sysconf(_SC_CLK_TCK);
4596
4597  init_random(1234567);
4598
4599  ThreadCritical::initialize();
4600
4601  Linux::set_page_size(sysconf(_SC_PAGESIZE));
4602  if (Linux::page_size() == -1) {
4603    fatal(err_msg("os_linux.cpp: os::init: sysconf failed (%s)",
4604                  strerror(errno)));
4605  }
4606  init_page_sizes((size_t) Linux::page_size());
4607
4608  Linux::initialize_system_info();
4609
4610  // main_thread points to the aboriginal thread
4611  Linux::_main_thread = pthread_self();
4612
4613  Linux::clock_init();
4614  initial_time_count = javaTimeNanos();
4615
4616  // pthread_condattr initialization for monotonic clock
4617  int status;
4618  pthread_condattr_t* _condattr = os::Linux::condAttr();
4619  if ((status = pthread_condattr_init(_condattr)) != 0) {
4620    fatal(err_msg("pthread_condattr_init: %s", strerror(status)));
4621  }
4622  // Only set the clock if CLOCK_MONOTONIC is available
4623  if (os::supports_monotonic_clock()) {
4624    if ((status = pthread_condattr_setclock(_condattr, CLOCK_MONOTONIC)) != 0) {
4625      if (status == EINVAL) {
4626        warning("Unable to use monotonic clock with relative timed-waits" \
4627                " - changes to the time-of-day clock may have adverse affects");
4628      } else {
4629        fatal(err_msg("pthread_condattr_setclock: %s", strerror(status)));
4630      }
4631    }
4632  }
4633  // else it defaults to CLOCK_REALTIME
4634
4635  // If the pagesize of the VM is greater than 8K determine the appropriate
4636  // number of initial guard pages.  The user can change this with the
4637  // command line arguments, if needed.
4638  if (vm_page_size() > (int)Linux::vm_default_page_size()) {
4639    StackYellowPages = 1;
4640    StackRedPages = 1;
4641    StackShadowPages = round_to((StackShadowPages*Linux::vm_default_page_size()), vm_page_size()) / vm_page_size();
4642  }
4643
4644  // retrieve entry point for pthread_setname_np
4645  Linux::_pthread_setname_np =
4646    (int(*)(pthread_t, const char*))dlsym(RTLD_DEFAULT, "pthread_setname_np");
4647
4648}
4649
4650// To install functions for atexit system call
4651extern "C" {
4652  static void perfMemory_exit_helper() {
4653    perfMemory_exit();
4654  }
4655}
4656
4657// this is called _after_ the global arguments have been parsed
4658jint os::init_2(void) {
4659  Linux::fast_thread_clock_init();
4660
4661  // Allocate a single page and mark it as readable for safepoint polling
4662  address polling_page = (address) ::mmap(NULL, Linux::page_size(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
4663  guarantee(polling_page != MAP_FAILED, "os::init_2: failed to allocate polling page");
4664
4665  os::set_polling_page(polling_page);
4666
4667#ifndef PRODUCT
4668  if (Verbose && PrintMiscellaneous) {
4669    tty->print("[SafePoint Polling address: " INTPTR_FORMAT "]\n",
4670               (intptr_t)polling_page);
4671  }
4672#endif
4673
4674  if (!UseMembar) {
4675    address mem_serialize_page = (address) ::mmap(NULL, Linux::page_size(), PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
4676    guarantee(mem_serialize_page != MAP_FAILED, "mmap Failed for memory serialize page");
4677    os::set_memory_serialize_page(mem_serialize_page);
4678
4679#ifndef PRODUCT
4680    if (Verbose && PrintMiscellaneous) {
4681      tty->print("[Memory Serialize  Page address: " INTPTR_FORMAT "]\n",
4682                 (intptr_t)mem_serialize_page);
4683    }
4684#endif
4685  }
4686
4687  // initialize suspend/resume support - must do this before signal_sets_init()
4688  if (SR_initialize() != 0) {
4689    perror("SR_initialize failed");
4690    return JNI_ERR;
4691  }
4692
4693  Linux::signal_sets_init();
4694  Linux::install_signal_handlers();
4695
4696  // Check minimum allowable stack size for thread creation and to initialize
4697  // the java system classes, including StackOverflowError - depends on page
4698  // size.  Add a page for compiler2 recursion in main thread.
4699  // Add in 2*BytesPerWord times page size to account for VM stack during
4700  // class initialization depending on 32 or 64 bit VM.
4701  os::Linux::min_stack_allowed = MAX2(os::Linux::min_stack_allowed,
4702                                      (size_t)(StackYellowPages+StackRedPages+StackShadowPages) * Linux::page_size() +
4703                                      (2*BytesPerWord COMPILER2_PRESENT(+1)) * Linux::vm_default_page_size());
4704
4705  size_t threadStackSizeInBytes = ThreadStackSize * K;
4706  if (threadStackSizeInBytes != 0 &&
4707      threadStackSizeInBytes < os::Linux::min_stack_allowed) {
4708    tty->print_cr("\nThe stack size specified is too small, "
4709                  "Specify at least %dk",
4710                  os::Linux::min_stack_allowed/ K);
4711    return JNI_ERR;
4712  }
4713
4714  // Make the stack size a multiple of the page size so that
4715  // the yellow/red zones can be guarded.
4716  JavaThread::set_stack_size_at_create(round_to(threadStackSizeInBytes,
4717                                                vm_page_size()));
4718
4719  Linux::capture_initial_stack(JavaThread::stack_size_at_create());
4720
4721#if defined(IA32)
4722  workaround_expand_exec_shield_cs_limit();
4723#endif
4724
4725  Linux::libpthread_init();
4726  if (PrintMiscellaneous && (Verbose || WizardMode)) {
4727    tty->print_cr("[HotSpot is running with %s, %s(%s)]\n",
4728                  Linux::glibc_version(), Linux::libpthread_version(),
4729                  Linux::is_floating_stack() ? "floating stack" : "fixed stack");
4730  }
4731
4732  if (UseNUMA) {
4733    if (!Linux::libnuma_init()) {
4734      UseNUMA = false;
4735    } else {
4736      if ((Linux::numa_max_node() < 1)) {
4737        // There's only one node(they start from 0), disable NUMA.
4738        UseNUMA = false;
4739      }
4740    }
4741    // With SHM and HugeTLBFS large pages we cannot uncommit a page, so there's no way
4742    // we can make the adaptive lgrp chunk resizing work. If the user specified
4743    // both UseNUMA and UseLargePages (or UseSHM/UseHugeTLBFS) on the command line - warn and
4744    // disable adaptive resizing.
4745    if (UseNUMA && UseLargePages && !can_commit_large_page_memory()) {
4746      if (FLAG_IS_DEFAULT(UseNUMA)) {
4747        UseNUMA = false;
4748      } else {
4749        if (FLAG_IS_DEFAULT(UseLargePages) &&
4750            FLAG_IS_DEFAULT(UseSHM) &&
4751            FLAG_IS_DEFAULT(UseHugeTLBFS)) {
4752          UseLargePages = false;
4753        } else {
4754          warning("UseNUMA is not fully compatible with SHM/HugeTLBFS large pages, disabling adaptive resizing");
4755          UseAdaptiveSizePolicy = false;
4756          UseAdaptiveNUMAChunkSizing = false;
4757        }
4758      }
4759    }
4760    if (!UseNUMA && ForceNUMA) {
4761      UseNUMA = true;
4762    }
4763  }
4764
4765  if (MaxFDLimit) {
4766    // set the number of file descriptors to max. print out error
4767    // if getrlimit/setrlimit fails but continue regardless.
4768    struct rlimit nbr_files;
4769    int status = getrlimit(RLIMIT_NOFILE, &nbr_files);
4770    if (status != 0) {
4771      if (PrintMiscellaneous && (Verbose || WizardMode)) {
4772        perror("os::init_2 getrlimit failed");
4773      }
4774    } else {
4775      nbr_files.rlim_cur = nbr_files.rlim_max;
4776      status = setrlimit(RLIMIT_NOFILE, &nbr_files);
4777      if (status != 0) {
4778        if (PrintMiscellaneous && (Verbose || WizardMode)) {
4779          perror("os::init_2 setrlimit failed");
4780        }
4781      }
4782    }
4783  }
4784
4785  // Initialize lock used to serialize thread creation (see os::create_thread)
4786  Linux::set_createThread_lock(new Mutex(Mutex::leaf, "createThread_lock", false));
4787
4788  // at-exit methods are called in the reverse order of their registration.
4789  // atexit functions are called on return from main or as a result of a
4790  // call to exit(3C). There can be only 32 of these functions registered
4791  // and atexit() does not set errno.
4792
4793  if (PerfAllowAtExitRegistration) {
4794    // only register atexit functions if PerfAllowAtExitRegistration is set.
4795    // atexit functions can be delayed until process exit time, which
4796    // can be problematic for embedded VM situations. Embedded VMs should
4797    // call DestroyJavaVM() to assure that VM resources are released.
4798
4799    // note: perfMemory_exit_helper atexit function may be removed in
4800    // the future if the appropriate cleanup code can be added to the
4801    // VM_Exit VMOperation's doit method.
4802    if (atexit(perfMemory_exit_helper) != 0) {
4803      warning("os::init_2 atexit(perfMemory_exit_helper) failed");
4804    }
4805  }
4806
4807  // initialize thread priority policy
4808  prio_init();
4809
4810  return JNI_OK;
4811}
4812
4813// Mark the polling page as unreadable
4814void os::make_polling_page_unreadable(void) {
4815  if (!guard_memory((char*)_polling_page, Linux::page_size())) {
4816    fatal("Could not disable polling page");
4817  }
4818}
4819
4820// Mark the polling page as readable
4821void os::make_polling_page_readable(void) {
4822  if (!linux_mprotect((char *)_polling_page, Linux::page_size(), PROT_READ)) {
4823    fatal("Could not enable polling page");
4824  }
4825}
4826
4827int os::active_processor_count() {
4828  // Linux doesn't yet have a (official) notion of processor sets,
4829  // so just return the number of online processors.
4830  int online_cpus = ::sysconf(_SC_NPROCESSORS_ONLN);
4831  assert(online_cpus > 0 && online_cpus <= processor_count(), "sanity check");
4832  return online_cpus;
4833}
4834
4835void os::set_native_thread_name(const char *name) {
4836  if (Linux::_pthread_setname_np) {
4837    char buf [16]; // according to glibc manpage, 16 chars incl. '/0'
4838    snprintf(buf, sizeof(buf), "%s", name);
4839    buf[sizeof(buf) - 1] = '\0';
4840    const int rc = Linux::_pthread_setname_np(pthread_self(), buf);
4841    // ERANGE should not happen; all other errors should just be ignored.
4842    assert(rc != ERANGE, "pthread_setname_np failed");
4843  }
4844}
4845
4846bool os::distribute_processes(uint length, uint* distribution) {
4847  // Not yet implemented.
4848  return false;
4849}
4850
4851bool os::bind_to_processor(uint processor_id) {
4852  // Not yet implemented.
4853  return false;
4854}
4855
4856///
4857
4858void os::SuspendedThreadTask::internal_do_task() {
4859  if (do_suspend(_thread->osthread())) {
4860    SuspendedThreadTaskContext context(_thread, _thread->osthread()->ucontext());
4861    do_task(context);
4862    do_resume(_thread->osthread());
4863  }
4864}
4865
4866class PcFetcher : public os::SuspendedThreadTask {
4867 public:
4868  PcFetcher(Thread* thread) : os::SuspendedThreadTask(thread) {}
4869  ExtendedPC result();
4870 protected:
4871  void do_task(const os::SuspendedThreadTaskContext& context);
4872 private:
4873  ExtendedPC _epc;
4874};
4875
4876ExtendedPC PcFetcher::result() {
4877  guarantee(is_done(), "task is not done yet.");
4878  return _epc;
4879}
4880
4881void PcFetcher::do_task(const os::SuspendedThreadTaskContext& context) {
4882  Thread* thread = context.thread();
4883  OSThread* osthread = thread->osthread();
4884  if (osthread->ucontext() != NULL) {
4885    _epc = os::Linux::ucontext_get_pc((ucontext_t *) context.ucontext());
4886  } else {
4887    // NULL context is unexpected, double-check this is the VMThread
4888    guarantee(thread->is_VM_thread(), "can only be called for VMThread");
4889  }
4890}
4891
4892// Suspends the target using the signal mechanism and then grabs the PC before
4893// resuming the target. Used by the flat-profiler only
4894ExtendedPC os::get_thread_pc(Thread* thread) {
4895  // Make sure that it is called by the watcher for the VMThread
4896  assert(Thread::current()->is_Watcher_thread(), "Must be watcher");
4897  assert(thread->is_VM_thread(), "Can only be called for VMThread");
4898
4899  PcFetcher fetcher(thread);
4900  fetcher.run();
4901  return fetcher.result();
4902}
4903
4904int os::Linux::safe_cond_timedwait(pthread_cond_t *_cond,
4905                                   pthread_mutex_t *_mutex,
4906                                   const struct timespec *_abstime) {
4907  if (is_NPTL()) {
4908    return pthread_cond_timedwait(_cond, _mutex, _abstime);
4909  } else {
4910    // 6292965: LinuxThreads pthread_cond_timedwait() resets FPU control
4911    // word back to default 64bit precision if condvar is signaled. Java
4912    // wants 53bit precision.  Save and restore current value.
4913    int fpu = get_fpu_control_word();
4914    int status = pthread_cond_timedwait(_cond, _mutex, _abstime);
4915    set_fpu_control_word(fpu);
4916    return status;
4917  }
4918}
4919
4920////////////////////////////////////////////////////////////////////////////////
4921// debug support
4922
4923bool os::find(address addr, outputStream* st) {
4924  Dl_info dlinfo;
4925  memset(&dlinfo, 0, sizeof(dlinfo));
4926  if (dladdr(addr, &dlinfo) != 0) {
4927    st->print(PTR_FORMAT ": ", addr);
4928    if (dlinfo.dli_sname != NULL && dlinfo.dli_saddr != NULL) {
4929      st->print("%s+%#x", dlinfo.dli_sname,
4930                addr - (intptr_t)dlinfo.dli_saddr);
4931    } else if (dlinfo.dli_fbase != NULL) {
4932      st->print("<offset %#x>", addr - (intptr_t)dlinfo.dli_fbase);
4933    } else {
4934      st->print("<absolute address>");
4935    }
4936    if (dlinfo.dli_fname != NULL) {
4937      st->print(" in %s", dlinfo.dli_fname);
4938    }
4939    if (dlinfo.dli_fbase != NULL) {
4940      st->print(" at " PTR_FORMAT, dlinfo.dli_fbase);
4941    }
4942    st->cr();
4943
4944    if (Verbose) {
4945      // decode some bytes around the PC
4946      address begin = clamp_address_in_page(addr-40, addr, os::vm_page_size());
4947      address end   = clamp_address_in_page(addr+40, addr, os::vm_page_size());
4948      address       lowest = (address) dlinfo.dli_sname;
4949      if (!lowest)  lowest = (address) dlinfo.dli_fbase;
4950      if (begin < lowest)  begin = lowest;
4951      Dl_info dlinfo2;
4952      if (dladdr(end, &dlinfo2) != 0 && dlinfo2.dli_saddr != dlinfo.dli_saddr
4953          && end > dlinfo2.dli_saddr && dlinfo2.dli_saddr > begin) {
4954        end = (address) dlinfo2.dli_saddr;
4955      }
4956      Disassembler::decode(begin, end, st);
4957    }
4958    return true;
4959  }
4960  return false;
4961}
4962
4963////////////////////////////////////////////////////////////////////////////////
4964// misc
4965
4966// This does not do anything on Linux. This is basically a hook for being
4967// able to use structured exception handling (thread-local exception filters)
4968// on, e.g., Win32.
4969void
4970os::os_exception_wrapper(java_call_t f, JavaValue* value, methodHandle* method,
4971                         JavaCallArguments* args, Thread* thread) {
4972  f(value, method, args, thread);
4973}
4974
4975void os::print_statistics() {
4976}
4977
4978int os::message_box(const char* title, const char* message) {
4979  int i;
4980  fdStream err(defaultStream::error_fd());
4981  for (i = 0; i < 78; i++) err.print_raw("=");
4982  err.cr();
4983  err.print_raw_cr(title);
4984  for (i = 0; i < 78; i++) err.print_raw("-");
4985  err.cr();
4986  err.print_raw_cr(message);
4987  for (i = 0; i < 78; i++) err.print_raw("=");
4988  err.cr();
4989
4990  char buf[16];
4991  // Prevent process from exiting upon "read error" without consuming all CPU
4992  while (::read(0, buf, sizeof(buf)) <= 0) { ::sleep(100); }
4993
4994  return buf[0] == 'y' || buf[0] == 'Y';
4995}
4996
4997int os::stat(const char *path, struct stat *sbuf) {
4998  char pathbuf[MAX_PATH];
4999  if (strlen(path) > MAX_PATH - 1) {
5000    errno = ENAMETOOLONG;
5001    return -1;
5002  }
5003  os::native_path(strcpy(pathbuf, path));
5004  return ::stat(pathbuf, sbuf);
5005}
5006
5007bool os::check_heap(bool force) {
5008  return true;
5009}
5010
5011// Is a (classpath) directory empty?
5012bool os::dir_is_empty(const char* path) {
5013  DIR *dir = NULL;
5014  struct dirent *ptr;
5015
5016  dir = opendir(path);
5017  if (dir == NULL) return true;
5018
5019  // Scan the directory
5020  bool result = true;
5021  char buf[sizeof(struct dirent) + MAX_PATH];
5022  while (result && (ptr = ::readdir(dir)) != NULL) {
5023    if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0) {
5024      result = false;
5025    }
5026  }
5027  closedir(dir);
5028  return result;
5029}
5030
5031// This code originates from JDK's sysOpen and open64_w
5032// from src/solaris/hpi/src/system_md.c
5033
5034int os::open(const char *path, int oflag, int mode) {
5035  if (strlen(path) > MAX_PATH - 1) {
5036    errno = ENAMETOOLONG;
5037    return -1;
5038  }
5039
5040  // All file descriptors that are opened in the Java process and not
5041  // specifically destined for a subprocess should have the close-on-exec
5042  // flag set.  If we don't set it, then careless 3rd party native code
5043  // might fork and exec without closing all appropriate file descriptors
5044  // (e.g. as we do in closeDescriptors in UNIXProcess.c), and this in
5045  // turn might:
5046  //
5047  // - cause end-of-file to fail to be detected on some file
5048  //   descriptors, resulting in mysterious hangs, or
5049  //
5050  // - might cause an fopen in the subprocess to fail on a system
5051  //   suffering from bug 1085341.
5052  //
5053  // (Yes, the default setting of the close-on-exec flag is a Unix
5054  // design flaw)
5055  //
5056  // See:
5057  // 1085341: 32-bit stdio routines should support file descriptors >255
5058  // 4843136: (process) pipe file descriptor from Runtime.exec not being closed
5059  // 6339493: (process) Runtime.exec does not close all file descriptors on Solaris 9
5060  //
5061  // Modern Linux kernels (after 2.6.23 2007) support O_CLOEXEC with open().
5062  // O_CLOEXEC is preferable to using FD_CLOEXEC on an open file descriptor
5063  // because it saves a system call and removes a small window where the flag
5064  // is unset.  On ancient Linux kernels the O_CLOEXEC flag will be ignored
5065  // and we fall back to using FD_CLOEXEC (see below).
5066#ifdef O_CLOEXEC
5067  oflag |= O_CLOEXEC;
5068#endif
5069
5070  int fd = ::open64(path, oflag, mode);
5071  if (fd == -1) return -1;
5072
5073  //If the open succeeded, the file might still be a directory
5074  {
5075    struct stat64 buf64;
5076    int ret = ::fstat64(fd, &buf64);
5077    int st_mode = buf64.st_mode;
5078
5079    if (ret != -1) {
5080      if ((st_mode & S_IFMT) == S_IFDIR) {
5081        errno = EISDIR;
5082        ::close(fd);
5083        return -1;
5084      }
5085    } else {
5086      ::close(fd);
5087      return -1;
5088    }
5089  }
5090
5091#ifdef FD_CLOEXEC
5092  // Validate that the use of the O_CLOEXEC flag on open above worked.
5093  // With recent kernels, we will perform this check exactly once.
5094  static sig_atomic_t O_CLOEXEC_is_known_to_work = 0;
5095  if (!O_CLOEXEC_is_known_to_work) {
5096    int flags = ::fcntl(fd, F_GETFD);
5097    if (flags != -1) {
5098      if ((flags & FD_CLOEXEC) != 0)
5099        O_CLOEXEC_is_known_to_work = 1;
5100      else
5101        ::fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
5102    }
5103  }
5104#endif
5105
5106  return fd;
5107}
5108
5109
5110// create binary file, rewriting existing file if required
5111int os::create_binary_file(const char* path, bool rewrite_existing) {
5112  int oflags = O_WRONLY | O_CREAT;
5113  if (!rewrite_existing) {
5114    oflags |= O_EXCL;
5115  }
5116  return ::open64(path, oflags, S_IREAD | S_IWRITE);
5117}
5118
5119// return current position of file pointer
5120jlong os::current_file_offset(int fd) {
5121  return (jlong)::lseek64(fd, (off64_t)0, SEEK_CUR);
5122}
5123
5124// move file pointer to the specified offset
5125jlong os::seek_to_file_offset(int fd, jlong offset) {
5126  return (jlong)::lseek64(fd, (off64_t)offset, SEEK_SET);
5127}
5128
5129// This code originates from JDK's sysAvailable
5130// from src/solaris/hpi/src/native_threads/src/sys_api_td.c
5131
5132int os::available(int fd, jlong *bytes) {
5133  jlong cur, end;
5134  int mode;
5135  struct stat64 buf64;
5136
5137  if (::fstat64(fd, &buf64) >= 0) {
5138    mode = buf64.st_mode;
5139    if (S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) {
5140      // XXX: is the following call interruptible? If so, this might
5141      // need to go through the INTERRUPT_IO() wrapper as for other
5142      // blocking, interruptible calls in this file.
5143      int n;
5144      if (::ioctl(fd, FIONREAD, &n) >= 0) {
5145        *bytes = n;
5146        return 1;
5147      }
5148    }
5149  }
5150  if ((cur = ::lseek64(fd, 0L, SEEK_CUR)) == -1) {
5151    return 0;
5152  } else if ((end = ::lseek64(fd, 0L, SEEK_END)) == -1) {
5153    return 0;
5154  } else if (::lseek64(fd, cur, SEEK_SET) == -1) {
5155    return 0;
5156  }
5157  *bytes = end - cur;
5158  return 1;
5159}
5160
5161// Map a block of memory.
5162char* os::pd_map_memory(int fd, const char* file_name, size_t file_offset,
5163                        char *addr, size_t bytes, bool read_only,
5164                        bool allow_exec) {
5165  int prot;
5166  int flags = MAP_PRIVATE;
5167
5168  if (read_only) {
5169    prot = PROT_READ;
5170  } else {
5171    prot = PROT_READ | PROT_WRITE;
5172  }
5173
5174  if (allow_exec) {
5175    prot |= PROT_EXEC;
5176  }
5177
5178  if (addr != NULL) {
5179    flags |= MAP_FIXED;
5180  }
5181
5182  char* mapped_address = (char*)mmap(addr, (size_t)bytes, prot, flags,
5183                                     fd, file_offset);
5184  if (mapped_address == MAP_FAILED) {
5185    return NULL;
5186  }
5187  return mapped_address;
5188}
5189
5190
5191// Remap a block of memory.
5192char* os::pd_remap_memory(int fd, const char* file_name, size_t file_offset,
5193                          char *addr, size_t bytes, bool read_only,
5194                          bool allow_exec) {
5195  // same as map_memory() on this OS
5196  return os::map_memory(fd, file_name, file_offset, addr, bytes, read_only,
5197                        allow_exec);
5198}
5199
5200
5201// Unmap a block of memory.
5202bool os::pd_unmap_memory(char* addr, size_t bytes) {
5203  return munmap(addr, bytes) == 0;
5204}
5205
5206static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time);
5207
5208static clockid_t thread_cpu_clockid(Thread* thread) {
5209  pthread_t tid = thread->osthread()->pthread_id();
5210  clockid_t clockid;
5211
5212  // Get thread clockid
5213  int rc = os::Linux::pthread_getcpuclockid(tid, &clockid);
5214  assert(rc == 0, "pthread_getcpuclockid is expected to return 0 code");
5215  return clockid;
5216}
5217
5218// current_thread_cpu_time(bool) and thread_cpu_time(Thread*, bool)
5219// are used by JVM M&M and JVMTI to get user+sys or user CPU time
5220// of a thread.
5221//
5222// current_thread_cpu_time() and thread_cpu_time(Thread*) returns
5223// the fast estimate available on the platform.
5224
5225jlong os::current_thread_cpu_time() {
5226  if (os::Linux::supports_fast_thread_cpu_time()) {
5227    return os::Linux::fast_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
5228  } else {
5229    // return user + sys since the cost is the same
5230    return slow_thread_cpu_time(Thread::current(), true /* user + sys */);
5231  }
5232}
5233
5234jlong os::thread_cpu_time(Thread* thread) {
5235  // consistent with what current_thread_cpu_time() returns
5236  if (os::Linux::supports_fast_thread_cpu_time()) {
5237    return os::Linux::fast_thread_cpu_time(thread_cpu_clockid(thread));
5238  } else {
5239    return slow_thread_cpu_time(thread, true /* user + sys */);
5240  }
5241}
5242
5243jlong os::current_thread_cpu_time(bool user_sys_cpu_time) {
5244  if (user_sys_cpu_time && os::Linux::supports_fast_thread_cpu_time()) {
5245    return os::Linux::fast_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
5246  } else {
5247    return slow_thread_cpu_time(Thread::current(), user_sys_cpu_time);
5248  }
5249}
5250
5251jlong os::thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
5252  if (user_sys_cpu_time && os::Linux::supports_fast_thread_cpu_time()) {
5253    return os::Linux::fast_thread_cpu_time(thread_cpu_clockid(thread));
5254  } else {
5255    return slow_thread_cpu_time(thread, user_sys_cpu_time);
5256  }
5257}
5258
5259//  -1 on error.
5260static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
5261  pid_t  tid = thread->osthread()->thread_id();
5262  char *s;
5263  char stat[2048];
5264  int statlen;
5265  char proc_name[64];
5266  int count;
5267  long sys_time, user_time;
5268  char cdummy;
5269  int idummy;
5270  long ldummy;
5271  FILE *fp;
5272
5273  snprintf(proc_name, 64, "/proc/self/task/%d/stat", tid);
5274  fp = fopen(proc_name, "r");
5275  if (fp == NULL) return -1;
5276  statlen = fread(stat, 1, 2047, fp);
5277  stat[statlen] = '\0';
5278  fclose(fp);
5279
5280  // Skip pid and the command string. Note that we could be dealing with
5281  // weird command names, e.g. user could decide to rename java launcher
5282  // to "java 1.4.2 :)", then the stat file would look like
5283  //                1234 (java 1.4.2 :)) R ... ...
5284  // We don't really need to know the command string, just find the last
5285  // occurrence of ")" and then start parsing from there. See bug 4726580.
5286  s = strrchr(stat, ')');
5287  if (s == NULL) return -1;
5288
5289  // Skip blank chars
5290  do { s++; } while (s && isspace(*s));
5291
5292  count = sscanf(s,"%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
5293                 &cdummy, &idummy, &idummy, &idummy, &idummy, &idummy,
5294                 &ldummy, &ldummy, &ldummy, &ldummy, &ldummy,
5295                 &user_time, &sys_time);
5296  if (count != 13) return -1;
5297  if (user_sys_cpu_time) {
5298    return ((jlong)sys_time + (jlong)user_time) * (1000000000 / clock_tics_per_sec);
5299  } else {
5300    return (jlong)user_time * (1000000000 / clock_tics_per_sec);
5301  }
5302}
5303
5304void os::current_thread_cpu_time_info(jvmtiTimerInfo *info_ptr) {
5305  info_ptr->max_value = ALL_64_BITS;       // will not wrap in less than 64 bits
5306  info_ptr->may_skip_backward = false;     // elapsed time not wall time
5307  info_ptr->may_skip_forward = false;      // elapsed time not wall time
5308  info_ptr->kind = JVMTI_TIMER_TOTAL_CPU;  // user+system time is returned
5309}
5310
5311void os::thread_cpu_time_info(jvmtiTimerInfo *info_ptr) {
5312  info_ptr->max_value = ALL_64_BITS;       // will not wrap in less than 64 bits
5313  info_ptr->may_skip_backward = false;     // elapsed time not wall time
5314  info_ptr->may_skip_forward = false;      // elapsed time not wall time
5315  info_ptr->kind = JVMTI_TIMER_TOTAL_CPU;  // user+system time is returned
5316}
5317
5318bool os::is_thread_cpu_time_supported() {
5319  return true;
5320}
5321
5322// System loadavg support.  Returns -1 if load average cannot be obtained.
5323// Linux doesn't yet have a (official) notion of processor sets,
5324// so just return the system wide load average.
5325int os::loadavg(double loadavg[], int nelem) {
5326  return ::getloadavg(loadavg, nelem);
5327}
5328
5329void os::pause() {
5330  char filename[MAX_PATH];
5331  if (PauseAtStartupFile && PauseAtStartupFile[0]) {
5332    jio_snprintf(filename, MAX_PATH, PauseAtStartupFile);
5333  } else {
5334    jio_snprintf(filename, MAX_PATH, "./vm.paused.%d", current_process_id());
5335  }
5336
5337  int fd = ::open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666);
5338  if (fd != -1) {
5339    struct stat buf;
5340    ::close(fd);
5341    while (::stat(filename, &buf) == 0) {
5342      (void)::poll(NULL, 0, 100);
5343    }
5344  } else {
5345    jio_fprintf(stderr,
5346                "Could not open pause file '%s', continuing immediately.\n", filename);
5347  }
5348}
5349
5350
5351// Refer to the comments in os_solaris.cpp park-unpark. The next two
5352// comment paragraphs are worth repeating here:
5353//
5354// Assumption:
5355//    Only one parker can exist on an event, which is why we allocate
5356//    them per-thread. Multiple unparkers can coexist.
5357//
5358// _Event serves as a restricted-range semaphore.
5359//   -1 : thread is blocked, i.e. there is a waiter
5360//    0 : neutral: thread is running or ready,
5361//        could have been signaled after a wait started
5362//    1 : signaled - thread is running or ready
5363//
5364// Beware -- Some versions of NPTL embody a flaw where pthread_cond_timedwait() can
5365// hang indefinitely.  For instance NPTL 0.60 on 2.4.21-4ELsmp is vulnerable.
5366// For specifics regarding the bug see GLIBC BUGID 261237 :
5367//    http://www.mail-archive.com/debian-glibc@lists.debian.org/msg10837.html.
5368// Briefly, pthread_cond_timedwait() calls with an expiry time that's not in the future
5369// will either hang or corrupt the condvar, resulting in subsequent hangs if the condvar
5370// is used.  (The simple C test-case provided in the GLIBC bug report manifests the
5371// hang).  The JVM is vulernable via sleep(), Object.wait(timo), LockSupport.parkNanos()
5372// and monitorenter when we're using 1-0 locking.  All those operations may result in
5373// calls to pthread_cond_timedwait().  Using LD_ASSUME_KERNEL to use an older version
5374// of libpthread avoids the problem, but isn't practical.
5375//
5376// Possible remedies:
5377//
5378// 1.   Establish a minimum relative wait time.  50 to 100 msecs seems to work.
5379//      This is palliative and probabilistic, however.  If the thread is preempted
5380//      between the call to compute_abstime() and pthread_cond_timedwait(), more
5381//      than the minimum period may have passed, and the abstime may be stale (in the
5382//      past) resultin in a hang.   Using this technique reduces the odds of a hang
5383//      but the JVM is still vulnerable, particularly on heavily loaded systems.
5384//
5385// 2.   Modify park-unpark to use per-thread (per ParkEvent) pipe-pairs instead
5386//      of the usual flag-condvar-mutex idiom.  The write side of the pipe is set
5387//      NDELAY. unpark() reduces to write(), park() reduces to read() and park(timo)
5388//      reduces to poll()+read().  This works well, but consumes 2 FDs per extant
5389//      thread.
5390//
5391// 3.   Embargo pthread_cond_timedwait() and implement a native "chron" thread
5392//      that manages timeouts.  We'd emulate pthread_cond_timedwait() by enqueuing
5393//      a timeout request to the chron thread and then blocking via pthread_cond_wait().
5394//      This also works well.  In fact it avoids kernel-level scalability impediments
5395//      on certain platforms that don't handle lots of active pthread_cond_timedwait()
5396//      timers in a graceful fashion.
5397//
5398// 4.   When the abstime value is in the past it appears that control returns
5399//      correctly from pthread_cond_timedwait(), but the condvar is left corrupt.
5400//      Subsequent timedwait/wait calls may hang indefinitely.  Given that, we
5401//      can avoid the problem by reinitializing the condvar -- by cond_destroy()
5402//      followed by cond_init() -- after all calls to pthread_cond_timedwait().
5403//      It may be possible to avoid reinitialization by checking the return
5404//      value from pthread_cond_timedwait().  In addition to reinitializing the
5405//      condvar we must establish the invariant that cond_signal() is only called
5406//      within critical sections protected by the adjunct mutex.  This prevents
5407//      cond_signal() from "seeing" a condvar that's in the midst of being
5408//      reinitialized or that is corrupt.  Sadly, this invariant obviates the
5409//      desirable signal-after-unlock optimization that avoids futile context switching.
5410//
5411//      I'm also concerned that some versions of NTPL might allocate an auxilliary
5412//      structure when a condvar is used or initialized.  cond_destroy()  would
5413//      release the helper structure.  Our reinitialize-after-timedwait fix
5414//      put excessive stress on malloc/free and locks protecting the c-heap.
5415//
5416// We currently use (4).  See the WorkAroundNTPLTimedWaitHang flag.
5417// It may be possible to refine (4) by checking the kernel and NTPL verisons
5418// and only enabling the work-around for vulnerable environments.
5419
5420// utility to compute the abstime argument to timedwait:
5421// millis is the relative timeout time
5422// abstime will be the absolute timeout time
5423// TODO: replace compute_abstime() with unpackTime()
5424
5425static struct timespec* compute_abstime(timespec* abstime, jlong millis) {
5426  if (millis < 0)  millis = 0;
5427
5428  jlong seconds = millis / 1000;
5429  millis %= 1000;
5430  if (seconds > 50000000) { // see man cond_timedwait(3T)
5431    seconds = 50000000;
5432  }
5433
5434  if (os::supports_monotonic_clock()) {
5435    struct timespec now;
5436    int status = os::Linux::clock_gettime(CLOCK_MONOTONIC, &now);
5437    assert_status(status == 0, status, "clock_gettime");
5438    abstime->tv_sec = now.tv_sec  + seconds;
5439    long nanos = now.tv_nsec + millis * NANOSECS_PER_MILLISEC;
5440    if (nanos >= NANOSECS_PER_SEC) {
5441      abstime->tv_sec += 1;
5442      nanos -= NANOSECS_PER_SEC;
5443    }
5444    abstime->tv_nsec = nanos;
5445  } else {
5446    struct timeval now;
5447    int status = gettimeofday(&now, NULL);
5448    assert(status == 0, "gettimeofday");
5449    abstime->tv_sec = now.tv_sec  + seconds;
5450    long usec = now.tv_usec + millis * 1000;
5451    if (usec >= 1000000) {
5452      abstime->tv_sec += 1;
5453      usec -= 1000000;
5454    }
5455    abstime->tv_nsec = usec * 1000;
5456  }
5457  return abstime;
5458}
5459
5460void os::PlatformEvent::park() {       // AKA "down()"
5461  // Transitions for _Event:
5462  //   -1 => -1 : illegal
5463  //    1 =>  0 : pass - return immediately
5464  //    0 => -1 : block; then set _Event to 0 before returning
5465
5466  // Invariant: Only the thread associated with the Event/PlatformEvent
5467  // may call park().
5468  // TODO: assert that _Assoc != NULL or _Assoc == Self
5469  assert(_nParked == 0, "invariant");
5470
5471  int v;
5472  for (;;) {
5473    v = _Event;
5474    if (Atomic::cmpxchg(v-1, &_Event, v) == v) break;
5475  }
5476  guarantee(v >= 0, "invariant");
5477  if (v == 0) {
5478    // Do this the hard way by blocking ...
5479    int status = pthread_mutex_lock(_mutex);
5480    assert_status(status == 0, status, "mutex_lock");
5481    guarantee(_nParked == 0, "invariant");
5482    ++_nParked;
5483    while (_Event < 0) {
5484      status = pthread_cond_wait(_cond, _mutex);
5485      // for some reason, under 2.7 lwp_cond_wait() may return ETIME ...
5486      // Treat this the same as if the wait was interrupted
5487      if (status == ETIME) { status = EINTR; }
5488      assert_status(status == 0 || status == EINTR, status, "cond_wait");
5489    }
5490    --_nParked;
5491
5492    _Event = 0;
5493    status = pthread_mutex_unlock(_mutex);
5494    assert_status(status == 0, status, "mutex_unlock");
5495    // Paranoia to ensure our locked and lock-free paths interact
5496    // correctly with each other.
5497    OrderAccess::fence();
5498  }
5499  guarantee(_Event >= 0, "invariant");
5500}
5501
5502int os::PlatformEvent::park(jlong millis) {
5503  // Transitions for _Event:
5504  //   -1 => -1 : illegal
5505  //    1 =>  0 : pass - return immediately
5506  //    0 => -1 : block; then set _Event to 0 before returning
5507
5508  guarantee(_nParked == 0, "invariant");
5509
5510  int v;
5511  for (;;) {
5512    v = _Event;
5513    if (Atomic::cmpxchg(v-1, &_Event, v) == v) break;
5514  }
5515  guarantee(v >= 0, "invariant");
5516  if (v != 0) return OS_OK;
5517
5518  // We do this the hard way, by blocking the thread.
5519  // Consider enforcing a minimum timeout value.
5520  struct timespec abst;
5521  compute_abstime(&abst, millis);
5522
5523  int ret = OS_TIMEOUT;
5524  int status = pthread_mutex_lock(_mutex);
5525  assert_status(status == 0, status, "mutex_lock");
5526  guarantee(_nParked == 0, "invariant");
5527  ++_nParked;
5528
5529  // Object.wait(timo) will return because of
5530  // (a) notification
5531  // (b) timeout
5532  // (c) thread.interrupt
5533  //
5534  // Thread.interrupt and object.notify{All} both call Event::set.
5535  // That is, we treat thread.interrupt as a special case of notification.
5536  // We ignore spurious OS wakeups unless FilterSpuriousWakeups is false.
5537  // We assume all ETIME returns are valid.
5538  //
5539  // TODO: properly differentiate simultaneous notify+interrupt.
5540  // In that case, we should propagate the notify to another waiter.
5541
5542  while (_Event < 0) {
5543    status = os::Linux::safe_cond_timedwait(_cond, _mutex, &abst);
5544    if (status != 0 && WorkAroundNPTLTimedWaitHang) {
5545      pthread_cond_destroy(_cond);
5546      pthread_cond_init(_cond, os::Linux::condAttr());
5547    }
5548    assert_status(status == 0 || status == EINTR ||
5549                  status == ETIME || status == ETIMEDOUT,
5550                  status, "cond_timedwait");
5551    if (!FilterSpuriousWakeups) break;                 // previous semantics
5552    if (status == ETIME || status == ETIMEDOUT) break;
5553    // We consume and ignore EINTR and spurious wakeups.
5554  }
5555  --_nParked;
5556  if (_Event >= 0) {
5557    ret = OS_OK;
5558  }
5559  _Event = 0;
5560  status = pthread_mutex_unlock(_mutex);
5561  assert_status(status == 0, status, "mutex_unlock");
5562  assert(_nParked == 0, "invariant");
5563  // Paranoia to ensure our locked and lock-free paths interact
5564  // correctly with each other.
5565  OrderAccess::fence();
5566  return ret;
5567}
5568
5569void os::PlatformEvent::unpark() {
5570  // Transitions for _Event:
5571  //    0 => 1 : just return
5572  //    1 => 1 : just return
5573  //   -1 => either 0 or 1; must signal target thread
5574  //         That is, we can safely transition _Event from -1 to either
5575  //         0 or 1.
5576  // See also: "Semaphores in Plan 9" by Mullender & Cox
5577  //
5578  // Note: Forcing a transition from "-1" to "1" on an unpark() means
5579  // that it will take two back-to-back park() calls for the owning
5580  // thread to block. This has the benefit of forcing a spurious return
5581  // from the first park() call after an unpark() call which will help
5582  // shake out uses of park() and unpark() without condition variables.
5583
5584  if (Atomic::xchg(1, &_Event) >= 0) return;
5585
5586  // Wait for the thread associated with the event to vacate
5587  int status = pthread_mutex_lock(_mutex);
5588  assert_status(status == 0, status, "mutex_lock");
5589  int AnyWaiters = _nParked;
5590  assert(AnyWaiters == 0 || AnyWaiters == 1, "invariant");
5591  if (AnyWaiters != 0 && WorkAroundNPTLTimedWaitHang) {
5592    AnyWaiters = 0;
5593    pthread_cond_signal(_cond);
5594  }
5595  status = pthread_mutex_unlock(_mutex);
5596  assert_status(status == 0, status, "mutex_unlock");
5597  if (AnyWaiters != 0) {
5598    // Note that we signal() *after* dropping the lock for "immortal" Events.
5599    // This is safe and avoids a common class of  futile wakeups.  In rare
5600    // circumstances this can cause a thread to return prematurely from
5601    // cond_{timed}wait() but the spurious wakeup is benign and the victim
5602    // will simply re-test the condition and re-park itself.
5603    // This provides particular benefit if the underlying platform does not
5604    // provide wait morphing.
5605    status = pthread_cond_signal(_cond);
5606    assert_status(status == 0, status, "cond_signal");
5607  }
5608}
5609
5610
5611// JSR166
5612// -------------------------------------------------------
5613
5614// The solaris and linux implementations of park/unpark are fairly
5615// conservative for now, but can be improved. They currently use a
5616// mutex/condvar pair, plus a a count.
5617// Park decrements count if > 0, else does a condvar wait.  Unpark
5618// sets count to 1 and signals condvar.  Only one thread ever waits
5619// on the condvar. Contention seen when trying to park implies that someone
5620// is unparking you, so don't wait. And spurious returns are fine, so there
5621// is no need to track notifications.
5622
5623// This code is common to linux and solaris and will be moved to a
5624// common place in dolphin.
5625//
5626// The passed in time value is either a relative time in nanoseconds
5627// or an absolute time in milliseconds. Either way it has to be unpacked
5628// into suitable seconds and nanoseconds components and stored in the
5629// given timespec structure.
5630// Given time is a 64-bit value and the time_t used in the timespec is only
5631// a signed-32-bit value (except on 64-bit Linux) we have to watch for
5632// overflow if times way in the future are given. Further on Solaris versions
5633// prior to 10 there is a restriction (see cond_timedwait) that the specified
5634// number of seconds, in abstime, is less than current_time  + 100,000,000.
5635// As it will be 28 years before "now + 100000000" will overflow we can
5636// ignore overflow and just impose a hard-limit on seconds using the value
5637// of "now + 100,000,000". This places a limit on the timeout of about 3.17
5638// years from "now".
5639
5640static void unpackTime(timespec* absTime, bool isAbsolute, jlong time) {
5641  assert(time > 0, "convertTime");
5642  time_t max_secs = 0;
5643
5644  if (!os::supports_monotonic_clock() || isAbsolute) {
5645    struct timeval now;
5646    int status = gettimeofday(&now, NULL);
5647    assert(status == 0, "gettimeofday");
5648
5649    max_secs = now.tv_sec + MAX_SECS;
5650
5651    if (isAbsolute) {
5652      jlong secs = time / 1000;
5653      if (secs > max_secs) {
5654        absTime->tv_sec = max_secs;
5655      } else {
5656        absTime->tv_sec = secs;
5657      }
5658      absTime->tv_nsec = (time % 1000) * NANOSECS_PER_MILLISEC;
5659    } else {
5660      jlong secs = time / NANOSECS_PER_SEC;
5661      if (secs >= MAX_SECS) {
5662        absTime->tv_sec = max_secs;
5663        absTime->tv_nsec = 0;
5664      } else {
5665        absTime->tv_sec = now.tv_sec + secs;
5666        absTime->tv_nsec = (time % NANOSECS_PER_SEC) + now.tv_usec*1000;
5667        if (absTime->tv_nsec >= NANOSECS_PER_SEC) {
5668          absTime->tv_nsec -= NANOSECS_PER_SEC;
5669          ++absTime->tv_sec; // note: this must be <= max_secs
5670        }
5671      }
5672    }
5673  } else {
5674    // must be relative using monotonic clock
5675    struct timespec now;
5676    int status = os::Linux::clock_gettime(CLOCK_MONOTONIC, &now);
5677    assert_status(status == 0, status, "clock_gettime");
5678    max_secs = now.tv_sec + MAX_SECS;
5679    jlong secs = time / NANOSECS_PER_SEC;
5680    if (secs >= MAX_SECS) {
5681      absTime->tv_sec = max_secs;
5682      absTime->tv_nsec = 0;
5683    } else {
5684      absTime->tv_sec = now.tv_sec + secs;
5685      absTime->tv_nsec = (time % NANOSECS_PER_SEC) + now.tv_nsec;
5686      if (absTime->tv_nsec >= NANOSECS_PER_SEC) {
5687        absTime->tv_nsec -= NANOSECS_PER_SEC;
5688        ++absTime->tv_sec; // note: this must be <= max_secs
5689      }
5690    }
5691  }
5692  assert(absTime->tv_sec >= 0, "tv_sec < 0");
5693  assert(absTime->tv_sec <= max_secs, "tv_sec > max_secs");
5694  assert(absTime->tv_nsec >= 0, "tv_nsec < 0");
5695  assert(absTime->tv_nsec < NANOSECS_PER_SEC, "tv_nsec >= nanos_per_sec");
5696}
5697
5698void Parker::park(bool isAbsolute, jlong time) {
5699  // Ideally we'd do something useful while spinning, such
5700  // as calling unpackTime().
5701
5702  // Optional fast-path check:
5703  // Return immediately if a permit is available.
5704  // We depend on Atomic::xchg() having full barrier semantics
5705  // since we are doing a lock-free update to _counter.
5706  if (Atomic::xchg(0, &_counter) > 0) return;
5707
5708  Thread* thread = Thread::current();
5709  assert(thread->is_Java_thread(), "Must be JavaThread");
5710  JavaThread *jt = (JavaThread *)thread;
5711
5712  // Optional optimization -- avoid state transitions if there's an interrupt pending.
5713  // Check interrupt before trying to wait
5714  if (Thread::is_interrupted(thread, false)) {
5715    return;
5716  }
5717
5718  // Next, demultiplex/decode time arguments
5719  timespec absTime;
5720  if (time < 0 || (isAbsolute && time == 0)) { // don't wait at all
5721    return;
5722  }
5723  if (time > 0) {
5724    unpackTime(&absTime, isAbsolute, time);
5725  }
5726
5727
5728  // Enter safepoint region
5729  // Beware of deadlocks such as 6317397.
5730  // The per-thread Parker:: mutex is a classic leaf-lock.
5731  // In particular a thread must never block on the Threads_lock while
5732  // holding the Parker:: mutex.  If safepoints are pending both the
5733  // the ThreadBlockInVM() CTOR and DTOR may grab Threads_lock.
5734  ThreadBlockInVM tbivm(jt);
5735
5736  // Don't wait if cannot get lock since interference arises from
5737  // unblocking.  Also. check interrupt before trying wait
5738  if (Thread::is_interrupted(thread, false) || pthread_mutex_trylock(_mutex) != 0) {
5739    return;
5740  }
5741
5742  int status;
5743  if (_counter > 0)  { // no wait needed
5744    _counter = 0;
5745    status = pthread_mutex_unlock(_mutex);
5746    assert(status == 0, "invariant");
5747    // Paranoia to ensure our locked and lock-free paths interact
5748    // correctly with each other and Java-level accesses.
5749    OrderAccess::fence();
5750    return;
5751  }
5752
5753#ifdef ASSERT
5754  // Don't catch signals while blocked; let the running threads have the signals.
5755  // (This allows a debugger to break into the running thread.)
5756  sigset_t oldsigs;
5757  sigset_t* allowdebug_blocked = os::Linux::allowdebug_blocked_signals();
5758  pthread_sigmask(SIG_BLOCK, allowdebug_blocked, &oldsigs);
5759#endif
5760
5761  OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */);
5762  jt->set_suspend_equivalent();
5763  // cleared by handle_special_suspend_equivalent_condition() or java_suspend_self()
5764
5765  assert(_cur_index == -1, "invariant");
5766  if (time == 0) {
5767    _cur_index = REL_INDEX; // arbitrary choice when not timed
5768    status = pthread_cond_wait(&_cond[_cur_index], _mutex);
5769  } else {
5770    _cur_index = isAbsolute ? ABS_INDEX : REL_INDEX;
5771    status = os::Linux::safe_cond_timedwait(&_cond[_cur_index], _mutex, &absTime);
5772    if (status != 0 && WorkAroundNPTLTimedWaitHang) {
5773      pthread_cond_destroy(&_cond[_cur_index]);
5774      pthread_cond_init(&_cond[_cur_index], isAbsolute ? NULL : os::Linux::condAttr());
5775    }
5776  }
5777  _cur_index = -1;
5778  assert_status(status == 0 || status == EINTR ||
5779                status == ETIME || status == ETIMEDOUT,
5780                status, "cond_timedwait");
5781
5782#ifdef ASSERT
5783  pthread_sigmask(SIG_SETMASK, &oldsigs, NULL);
5784#endif
5785
5786  _counter = 0;
5787  status = pthread_mutex_unlock(_mutex);
5788  assert_status(status == 0, status, "invariant");
5789  // Paranoia to ensure our locked and lock-free paths interact
5790  // correctly with each other and Java-level accesses.
5791  OrderAccess::fence();
5792
5793  // If externally suspended while waiting, re-suspend
5794  if (jt->handle_special_suspend_equivalent_condition()) {
5795    jt->java_suspend_self();
5796  }
5797}
5798
5799void Parker::unpark() {
5800  int status = pthread_mutex_lock(_mutex);
5801  assert(status == 0, "invariant");
5802  const int s = _counter;
5803  _counter = 1;
5804  if (s < 1) {
5805    // thread might be parked
5806    if (_cur_index != -1) {
5807      // thread is definitely parked
5808      if (WorkAroundNPTLTimedWaitHang) {
5809        status = pthread_cond_signal(&_cond[_cur_index]);
5810        assert(status == 0, "invariant");
5811        status = pthread_mutex_unlock(_mutex);
5812        assert(status == 0, "invariant");
5813      } else {
5814        status = pthread_mutex_unlock(_mutex);
5815        assert(status == 0, "invariant");
5816        status = pthread_cond_signal(&_cond[_cur_index]);
5817        assert(status == 0, "invariant");
5818      }
5819    } else {
5820      pthread_mutex_unlock(_mutex);
5821      assert(status == 0, "invariant");
5822    }
5823  } else {
5824    pthread_mutex_unlock(_mutex);
5825    assert(status == 0, "invariant");
5826  }
5827}
5828
5829
5830extern char** environ;
5831
5832#ifndef __NR_fork
5833  #define __NR_fork IA32_ONLY(2) IA64_ONLY(not defined) AMD64_ONLY(57) AARCH64_ONLY(1079)
5834#endif
5835
5836#ifndef __NR_execve
5837  #define __NR_execve IA32_ONLY(11) IA64_ONLY(1033) AMD64_ONLY(59) AARCH64_ONLY(221)
5838#endif
5839
5840// Run the specified command in a separate process. Return its exit value,
5841// or -1 on failure (e.g. can't fork a new process).
5842// Unlike system(), this function can be called from signal handler. It
5843// doesn't block SIGINT et al.
5844int os::fork_and_exec(char* cmd) {
5845  const char * argv[4] = {"sh", "-c", cmd, NULL};
5846
5847  // fork() in LinuxThreads/NPTL is not async-safe. It needs to run
5848  // pthread_atfork handlers and reset pthread library. All we need is a
5849  // separate process to execve. Make a direct syscall to fork process.
5850  // On IA64 there's no fork syscall, we have to use fork() and hope for
5851  // the best...
5852  pid_t pid = NOT_IA64(syscall(__NR_fork);)
5853  IA64_ONLY(fork();)
5854
5855  if (pid < 0) {
5856    // fork failed
5857    return -1;
5858
5859  } else if (pid == 0) {
5860    // child process
5861
5862    // execve() in LinuxThreads will call pthread_kill_other_threads_np()
5863    // first to kill every thread on the thread list. Because this list is
5864    // not reset by fork() (see notes above), execve() will instead kill
5865    // every thread in the parent process. We know this is the only thread
5866    // in the new process, so make a system call directly.
5867    // IA64 should use normal execve() from glibc to match the glibc fork()
5868    // above.
5869    NOT_IA64(syscall(__NR_execve, "/bin/sh", argv, environ);)
5870    IA64_ONLY(execve("/bin/sh", (char* const*)argv, environ);)
5871
5872    // execve failed
5873    _exit(-1);
5874
5875  } else  {
5876    // copied from J2SE ..._waitForProcessExit() in UNIXProcess_md.c; we don't
5877    // care about the actual exit code, for now.
5878
5879    int status;
5880
5881    // Wait for the child process to exit.  This returns immediately if
5882    // the child has already exited. */
5883    while (waitpid(pid, &status, 0) < 0) {
5884      switch (errno) {
5885      case ECHILD: return 0;
5886      case EINTR: break;
5887      default: return -1;
5888      }
5889    }
5890
5891    if (WIFEXITED(status)) {
5892      // The child exited normally; get its exit code.
5893      return WEXITSTATUS(status);
5894    } else if (WIFSIGNALED(status)) {
5895      // The child exited because of a signal
5896      // The best value to return is 0x80 + signal number,
5897      // because that is what all Unix shells do, and because
5898      // it allows callers to distinguish between process exit and
5899      // process death by signal.
5900      return 0x80 + WTERMSIG(status);
5901    } else {
5902      // Unknown exit code; pass it through
5903      return status;
5904    }
5905  }
5906}
5907
5908// is_headless_jre()
5909//
5910// Test for the existence of xawt/libmawt.so or libawt_xawt.so
5911// in order to report if we are running in a headless jre
5912//
5913// Since JDK8 xawt/libmawt.so was moved into the same directory
5914// as libawt.so, and renamed libawt_xawt.so
5915//
5916bool os::is_headless_jre() {
5917  struct stat statbuf;
5918  char buf[MAXPATHLEN];
5919  char libmawtpath[MAXPATHLEN];
5920  const char *xawtstr  = "/xawt/libmawt.so";
5921  const char *new_xawtstr = "/libawt_xawt.so";
5922  char *p;
5923
5924  // Get path to libjvm.so
5925  os::jvm_path(buf, sizeof(buf));
5926
5927  // Get rid of libjvm.so
5928  p = strrchr(buf, '/');
5929  if (p == NULL) {
5930    return false;
5931  } else {
5932    *p = '\0';
5933  }
5934
5935  // Get rid of client or server
5936  p = strrchr(buf, '/');
5937  if (p == NULL) {
5938    return false;
5939  } else {
5940    *p = '\0';
5941  }
5942
5943  // check xawt/libmawt.so
5944  strcpy(libmawtpath, buf);
5945  strcat(libmawtpath, xawtstr);
5946  if (::stat(libmawtpath, &statbuf) == 0) return false;
5947
5948  // check libawt_xawt.so
5949  strcpy(libmawtpath, buf);
5950  strcat(libmawtpath, new_xawtstr);
5951  if (::stat(libmawtpath, &statbuf) == 0) return false;
5952
5953  return true;
5954}
5955
5956// Get the default path to the core file
5957// Returns the length of the string
5958int os::get_core_path(char* buffer, size_t bufferSize) {
5959  /*
5960   * Max length of /proc/sys/kernel/core_pattern is 128 characters.
5961   * See https://www.kernel.org/doc/Documentation/sysctl/kernel.txt
5962   */
5963  const int core_pattern_len = 129;
5964  char core_pattern[core_pattern_len] = {0};
5965
5966  int core_pattern_file = ::open("/proc/sys/kernel/core_pattern", O_RDONLY);
5967  if (core_pattern_file != -1) {
5968    ssize_t ret = ::read(core_pattern_file, core_pattern, core_pattern_len);
5969    ::close(core_pattern_file);
5970
5971    if (ret > 0) {
5972      char *last_char = core_pattern + strlen(core_pattern) - 1;
5973
5974      if (*last_char == '\n') {
5975        *last_char = '\0';
5976      }
5977    }
5978  }
5979
5980  if (strlen(core_pattern) == 0) {
5981    return -1;
5982  }
5983
5984  char *pid_pos = strstr(core_pattern, "%p");
5985  int written;
5986
5987  if (core_pattern[0] == '/') {
5988    written = jio_snprintf(buffer, bufferSize, core_pattern);
5989  } else {
5990    char cwd[PATH_MAX];
5991
5992    const char* p = get_current_directory(cwd, PATH_MAX);
5993    if (p == NULL) {
5994      return -1;
5995    }
5996
5997    if (core_pattern[0] == '|') {
5998      written = jio_snprintf(buffer, bufferSize,
5999                        "\"%s\" (or dumping to %s/core.%d)",
6000                                     &core_pattern[1], p, current_process_id());
6001    } else {
6002      written = jio_snprintf(buffer, bufferSize, "%s/%s", p, core_pattern);
6003    }
6004  }
6005
6006  if (written < 0) {
6007    return -1;
6008  }
6009
6010  if (((size_t)written < bufferSize) && (pid_pos == NULL) && (core_pattern[0] != '|')) {
6011    int core_uses_pid_file = ::open("/proc/sys/kernel/core_uses_pid", O_RDONLY);
6012
6013    if (core_uses_pid_file != -1) {
6014      char core_uses_pid = 0;
6015      ssize_t ret = ::read(core_uses_pid_file, &core_uses_pid, 1);
6016      ::close(core_uses_pid_file);
6017
6018      if (core_uses_pid == '1') {
6019        jio_snprintf(buffer + written, bufferSize - written,
6020                                          ".%d", current_process_id());
6021      }
6022    }
6023  }
6024
6025  return strlen(buffer);
6026}
6027
6028/////////////// Unit tests ///////////////
6029
6030#ifndef PRODUCT
6031
6032#define test_log(...)              \
6033  do {                             \
6034    if (VerboseInternalVMTests) {  \
6035      tty->print_cr(__VA_ARGS__);  \
6036      tty->flush();                \
6037    }                              \
6038  } while (false)
6039
6040class TestReserveMemorySpecial : AllStatic {
6041 public:
6042  static void small_page_write(void* addr, size_t size) {
6043    size_t page_size = os::vm_page_size();
6044
6045    char* end = (char*)addr + size;
6046    for (char* p = (char*)addr; p < end; p += page_size) {
6047      *p = 1;
6048    }
6049  }
6050
6051  static void test_reserve_memory_special_huge_tlbfs_only(size_t size) {
6052    if (!UseHugeTLBFS) {
6053      return;
6054    }
6055
6056    test_log("test_reserve_memory_special_huge_tlbfs_only(" SIZE_FORMAT ")", size);
6057
6058    char* addr = os::Linux::reserve_memory_special_huge_tlbfs_only(size, NULL, false);
6059
6060    if (addr != NULL) {
6061      small_page_write(addr, size);
6062
6063      os::Linux::release_memory_special_huge_tlbfs(addr, size);
6064    }
6065  }
6066
6067  static void test_reserve_memory_special_huge_tlbfs_only() {
6068    if (!UseHugeTLBFS) {
6069      return;
6070    }
6071
6072    size_t lp = os::large_page_size();
6073
6074    for (size_t size = lp; size <= lp * 10; size += lp) {
6075      test_reserve_memory_special_huge_tlbfs_only(size);
6076    }
6077  }
6078
6079  static void test_reserve_memory_special_huge_tlbfs_mixed(size_t size, size_t alignment) {
6080    if (!UseHugeTLBFS) {
6081      return;
6082    }
6083
6084    test_log("test_reserve_memory_special_huge_tlbfs_mixed(" SIZE_FORMAT ", " SIZE_FORMAT ")",
6085             size, alignment);
6086
6087    assert(size >= os::large_page_size(), "Incorrect input to test");
6088
6089    char* addr = os::Linux::reserve_memory_special_huge_tlbfs_mixed(size, alignment, NULL, false);
6090
6091    if (addr != NULL) {
6092      small_page_write(addr, size);
6093
6094      os::Linux::release_memory_special_huge_tlbfs(addr, size);
6095    }
6096  }
6097
6098  static void test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(size_t size) {
6099    size_t lp = os::large_page_size();
6100    size_t ag = os::vm_allocation_granularity();
6101
6102    for (size_t alignment = ag; is_size_aligned(size, alignment); alignment *= 2) {
6103      test_reserve_memory_special_huge_tlbfs_mixed(size, alignment);
6104    }
6105  }
6106
6107  static void test_reserve_memory_special_huge_tlbfs_mixed() {
6108    size_t lp = os::large_page_size();
6109    size_t ag = os::vm_allocation_granularity();
6110
6111    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp);
6112    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp + ag);
6113    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp + lp / 2);
6114    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2);
6115    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2 + ag);
6116    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2 - ag);
6117    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2 + lp / 2);
6118    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 10);
6119    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 10 + lp / 2);
6120  }
6121
6122  static void test_reserve_memory_special_huge_tlbfs() {
6123    if (!UseHugeTLBFS) {
6124      return;
6125    }
6126
6127    test_reserve_memory_special_huge_tlbfs_only();
6128    test_reserve_memory_special_huge_tlbfs_mixed();
6129  }
6130
6131  static void test_reserve_memory_special_shm(size_t size, size_t alignment) {
6132    if (!UseSHM) {
6133      return;
6134    }
6135
6136    test_log("test_reserve_memory_special_shm(" SIZE_FORMAT ", " SIZE_FORMAT ")", size, alignment);
6137
6138    char* addr = os::Linux::reserve_memory_special_shm(size, alignment, NULL, false);
6139
6140    if (addr != NULL) {
6141      assert(is_ptr_aligned(addr, alignment), "Check");
6142      assert(is_ptr_aligned(addr, os::large_page_size()), "Check");
6143
6144      small_page_write(addr, size);
6145
6146      os::Linux::release_memory_special_shm(addr, size);
6147    }
6148  }
6149
6150  static void test_reserve_memory_special_shm() {
6151    size_t lp = os::large_page_size();
6152    size_t ag = os::vm_allocation_granularity();
6153
6154    for (size_t size = ag; size < lp * 3; size += ag) {
6155      for (size_t alignment = ag; is_size_aligned(size, alignment); alignment *= 2) {
6156        test_reserve_memory_special_shm(size, alignment);
6157      }
6158    }
6159  }
6160
6161  static void test() {
6162    test_reserve_memory_special_huge_tlbfs();
6163    test_reserve_memory_special_shm();
6164  }
6165};
6166
6167void TestReserveMemorySpecial_test() {
6168  TestReserveMemorySpecial::test();
6169}
6170
6171#endif
6172