os_linux.cpp revision 6266:469835cd5494
1/*
2 * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25// no precompiled headers
26#include "classfile/classLoader.hpp"
27#include "classfile/systemDictionary.hpp"
28#include "classfile/vmSymbols.hpp"
29#include "code/icBuffer.hpp"
30#include "code/vtableStubs.hpp"
31#include "compiler/compileBroker.hpp"
32#include "compiler/disassembler.hpp"
33#include "interpreter/interpreter.hpp"
34#include "jvm_linux.h"
35#include "memory/allocation.inline.hpp"
36#include "memory/filemap.hpp"
37#include "mutex_linux.inline.hpp"
38#include "oops/oop.inline.hpp"
39#include "os_share_linux.hpp"
40#include "prims/jniFastGetField.hpp"
41#include "prims/jvm.h"
42#include "prims/jvm_misc.hpp"
43#include "runtime/arguments.hpp"
44#include "runtime/extendedPC.hpp"
45#include "runtime/globals.hpp"
46#include "runtime/interfaceSupport.hpp"
47#include "runtime/init.hpp"
48#include "runtime/java.hpp"
49#include "runtime/javaCalls.hpp"
50#include "runtime/mutexLocker.hpp"
51#include "runtime/objectMonitor.hpp"
52#include "runtime/osThread.hpp"
53#include "runtime/perfMemory.hpp"
54#include "runtime/sharedRuntime.hpp"
55#include "runtime/statSampler.hpp"
56#include "runtime/stubRoutines.hpp"
57#include "runtime/thread.inline.hpp"
58#include "runtime/threadCritical.hpp"
59#include "runtime/timer.hpp"
60#include "services/attachListener.hpp"
61#include "services/memTracker.hpp"
62#include "services/runtimeService.hpp"
63#include "utilities/decoder.hpp"
64#include "utilities/defaultStream.hpp"
65#include "utilities/events.hpp"
66#include "utilities/elfFile.hpp"
67#include "utilities/growableArray.hpp"
68#include "utilities/vmError.hpp"
69
70// put OS-includes here
71# include <sys/types.h>
72# include <sys/mman.h>
73# include <sys/stat.h>
74# include <sys/select.h>
75# include <pthread.h>
76# include <signal.h>
77# include <errno.h>
78# include <dlfcn.h>
79# include <stdio.h>
80# include <unistd.h>
81# include <sys/resource.h>
82# include <pthread.h>
83# include <sys/stat.h>
84# include <sys/time.h>
85# include <sys/times.h>
86# include <sys/utsname.h>
87# include <sys/socket.h>
88# include <sys/wait.h>
89# include <pwd.h>
90# include <poll.h>
91# include <semaphore.h>
92# include <fcntl.h>
93# include <string.h>
94# include <syscall.h>
95# include <sys/sysinfo.h>
96# include <gnu/libc-version.h>
97# include <sys/ipc.h>
98# include <sys/shm.h>
99# include <link.h>
100# include <stdint.h>
101# include <inttypes.h>
102# include <sys/ioctl.h>
103
104// if RUSAGE_THREAD for getrusage() has not been defined, do it here. The code calling
105// getrusage() is prepared to handle the associated failure.
106#ifndef RUSAGE_THREAD
107#define RUSAGE_THREAD   (1)               /* only the calling thread */
108#endif
109
110#define MAX_PATH    (2 * K)
111
112#define MAX_SECS 100000000
113
114// for timer info max values which include all bits
115#define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
116
117#define LARGEPAGES_BIT (1 << 6)
118////////////////////////////////////////////////////////////////////////////////
119// global variables
120julong os::Linux::_physical_memory = 0;
121
122address   os::Linux::_initial_thread_stack_bottom = NULL;
123uintptr_t os::Linux::_initial_thread_stack_size   = 0;
124
125int (*os::Linux::_clock_gettime)(clockid_t, struct timespec *) = NULL;
126int (*os::Linux::_pthread_getcpuclockid)(pthread_t, clockid_t *) = NULL;
127Mutex* os::Linux::_createThread_lock = NULL;
128pthread_t os::Linux::_main_thread;
129int os::Linux::_page_size = -1;
130const int os::Linux::_vm_default_page_size = (8 * K);
131bool os::Linux::_is_floating_stack = false;
132bool os::Linux::_is_NPTL = false;
133bool os::Linux::_supports_fast_thread_cpu_time = false;
134const char * os::Linux::_glibc_version = NULL;
135const char * os::Linux::_libpthread_version = NULL;
136pthread_condattr_t os::Linux::_condattr[1];
137
138static jlong initial_time_count=0;
139
140static int clock_tics_per_sec = 100;
141
142// For diagnostics to print a message once. see run_periodic_checks
143static sigset_t check_signal_done;
144static bool check_signals = true;
145
146static pid_t _initial_pid = 0;
147
148/* Signal number used to suspend/resume a thread */
149
150/* do not use any signal number less than SIGSEGV, see 4355769 */
151static int SR_signum = SIGUSR2;
152sigset_t SR_sigset;
153
154/* Used to protect dlsym() calls */
155static pthread_mutex_t dl_mutex;
156
157// Declarations
158static void unpackTime(timespec* absTime, bool isAbsolute, jlong time);
159
160#ifdef JAVASE_EMBEDDED
161class MemNotifyThread: public Thread {
162  friend class VMStructs;
163 public:
164  virtual void run();
165
166 private:
167  static MemNotifyThread* _memnotify_thread;
168  int _fd;
169
170 public:
171
172  // Constructor
173  MemNotifyThread(int fd);
174
175  // Tester
176  bool is_memnotify_thread() const { return true; }
177
178  // Printing
179  char* name() const { return (char*)"Linux MemNotify Thread"; }
180
181  // Returns the single instance of the MemNotifyThread
182  static MemNotifyThread* memnotify_thread() { return _memnotify_thread; }
183
184  // Create and start the single instance of MemNotifyThread
185  static void start();
186};
187#endif // JAVASE_EMBEDDED
188
189// utility functions
190
191static int SR_initialize();
192
193julong os::available_memory() {
194  return Linux::available_memory();
195}
196
197julong os::Linux::available_memory() {
198  // values in struct sysinfo are "unsigned long"
199  struct sysinfo si;
200  sysinfo(&si);
201
202  return (julong)si.freeram * si.mem_unit;
203}
204
205julong os::physical_memory() {
206  return Linux::physical_memory();
207}
208
209////////////////////////////////////////////////////////////////////////////////
210// environment support
211
212bool os::getenv(const char* name, char* buf, int len) {
213  const char* val = ::getenv(name);
214  if (val != NULL && strlen(val) < (size_t)len) {
215    strcpy(buf, val);
216    return true;
217  }
218  if (len > 0) buf[0] = 0;  // return a null string
219  return false;
220}
221
222
223// Return true if user is running as root.
224
225bool os::have_special_privileges() {
226  static bool init = false;
227  static bool privileges = false;
228  if (!init) {
229    privileges = (getuid() != geteuid()) || (getgid() != getegid());
230    init = true;
231  }
232  return privileges;
233}
234
235
236#ifndef SYS_gettid
237// i386: 224, ia64: 1105, amd64: 186, sparc 143
238#ifdef __ia64__
239#define SYS_gettid 1105
240#elif __i386__
241#define SYS_gettid 224
242#elif __amd64__
243#define SYS_gettid 186
244#elif __sparc__
245#define SYS_gettid 143
246#else
247#error define gettid for the arch
248#endif
249#endif
250
251// Cpu architecture string
252#if   defined(ZERO)
253static char cpu_arch[] = ZERO_LIBARCH;
254#elif defined(IA64)
255static char cpu_arch[] = "ia64";
256#elif defined(IA32)
257static char cpu_arch[] = "i386";
258#elif defined(AMD64)
259static char cpu_arch[] = "amd64";
260#elif defined(ARM)
261static char cpu_arch[] = "arm";
262#elif defined(PPC32)
263static char cpu_arch[] = "ppc";
264#elif defined(PPC64)
265static char cpu_arch[] = "ppc64";
266#elif defined(SPARC)
267#  ifdef _LP64
268static char cpu_arch[] = "sparcv9";
269#  else
270static char cpu_arch[] = "sparc";
271#  endif
272#else
273#error Add appropriate cpu_arch setting
274#endif
275
276
277// pid_t gettid()
278//
279// Returns the kernel thread id of the currently running thread. Kernel
280// thread id is used to access /proc.
281//
282// (Note that getpid() on LinuxThreads returns kernel thread id too; but
283// on NPTL, it returns the same pid for all threads, as required by POSIX.)
284//
285pid_t os::Linux::gettid() {
286  int rslt = syscall(SYS_gettid);
287  if (rslt == -1) {
288     // old kernel, no NPTL support
289     return getpid();
290  } else {
291     return (pid_t)rslt;
292  }
293}
294
295// Most versions of linux have a bug where the number of processors are
296// determined by looking at the /proc file system.  In a chroot environment,
297// the system call returns 1.  This causes the VM to act as if it is
298// a single processor and elide locking (see is_MP() call).
299static bool unsafe_chroot_detected = false;
300static const char *unstable_chroot_error = "/proc file system not found.\n"
301                     "Java may be unstable running multithreaded in a chroot "
302                     "environment on Linux when /proc filesystem is not mounted.";
303
304void os::Linux::initialize_system_info() {
305  set_processor_count(sysconf(_SC_NPROCESSORS_CONF));
306  if (processor_count() == 1) {
307    pid_t pid = os::Linux::gettid();
308    char fname[32];
309    jio_snprintf(fname, sizeof(fname), "/proc/%d", pid);
310    FILE *fp = fopen(fname, "r");
311    if (fp == NULL) {
312      unsafe_chroot_detected = true;
313    } else {
314      fclose(fp);
315    }
316  }
317  _physical_memory = (julong)sysconf(_SC_PHYS_PAGES) * (julong)sysconf(_SC_PAGESIZE);
318  assert(processor_count() > 0, "linux error");
319}
320
321void os::init_system_properties_values() {
322  // The next steps are taken in the product version:
323  //
324  // Obtain the JAVA_HOME value from the location of libjvm.so.
325  // This library should be located at:
326  // <JAVA_HOME>/jre/lib/<arch>/{client|server}/libjvm.so.
327  //
328  // If "/jre/lib/" appears at the right place in the path, then we
329  // assume libjvm.so is installed in a JDK and we use this path.
330  //
331  // Otherwise exit with message: "Could not create the Java virtual machine."
332  //
333  // The following extra steps are taken in the debugging version:
334  //
335  // If "/jre/lib/" does NOT appear at the right place in the path
336  // instead of exit check for $JAVA_HOME environment variable.
337  //
338  // If it is defined and we are able to locate $JAVA_HOME/jre/lib/<arch>,
339  // then we append a fake suffix "hotspot/libjvm.so" to this path so
340  // it looks like libjvm.so is installed there
341  // <JAVA_HOME>/jre/lib/<arch>/hotspot/libjvm.so.
342  //
343  // Otherwise exit.
344  //
345  // Important note: if the location of libjvm.so changes this
346  // code needs to be changed accordingly.
347
348// See ld(1):
349//      The linker uses the following search paths to locate required
350//      shared libraries:
351//        1: ...
352//        ...
353//        7: The default directories, normally /lib and /usr/lib.
354#if defined(AMD64) || defined(_LP64) && (defined(SPARC) || defined(PPC) || defined(S390))
355#define DEFAULT_LIBPATH "/usr/lib64:/lib64:/lib:/usr/lib"
356#else
357#define DEFAULT_LIBPATH "/lib:/usr/lib"
358#endif
359
360// Base path of extensions installed on the system.
361#define SYS_EXT_DIR     "/usr/java/packages"
362#define EXTENSIONS_DIR  "/lib/ext"
363#define ENDORSED_DIR    "/lib/endorsed"
364
365  // Buffer that fits several sprintfs.
366  // Note that the space for the colon and the trailing null are provided
367  // by the nulls included by the sizeof operator.
368  const size_t bufsize =
369    MAX3((size_t)MAXPATHLEN,  // For dll_dir & friends.
370         (size_t)MAXPATHLEN + sizeof(EXTENSIONS_DIR) + sizeof(SYS_EXT_DIR) + sizeof(EXTENSIONS_DIR), // extensions dir
371         (size_t)MAXPATHLEN + sizeof(ENDORSED_DIR)); // endorsed dir
372  char *buf = (char *)NEW_C_HEAP_ARRAY(char, bufsize, mtInternal);
373
374  // sysclasspath, java_home, dll_dir
375  {
376    char *pslash;
377    os::jvm_path(buf, bufsize);
378
379    // Found the full path to libjvm.so.
380    // Now cut the path to <java_home>/jre if we can.
381    *(strrchr(buf, '/')) = '\0'; // Get rid of /libjvm.so.
382    pslash = strrchr(buf, '/');
383    if (pslash != NULL) {
384      *pslash = '\0';            // Get rid of /{client|server|hotspot}.
385    }
386    Arguments::set_dll_dir(buf);
387
388    if (pslash != NULL) {
389      pslash = strrchr(buf, '/');
390      if (pslash != NULL) {
391        *pslash = '\0';          // Get rid of /<arch>.
392        pslash = strrchr(buf, '/');
393        if (pslash != NULL) {
394          *pslash = '\0';        // Get rid of /lib.
395        }
396      }
397    }
398    Arguments::set_java_home(buf);
399    set_boot_path('/', ':');
400  }
401
402  // Where to look for native libraries.
403  //
404  // Note: Due to a legacy implementation, most of the library path
405  // is set in the launcher. This was to accomodate linking restrictions
406  // on legacy Linux implementations (which are no longer supported).
407  // Eventually, all the library path setting will be done here.
408  //
409  // However, to prevent the proliferation of improperly built native
410  // libraries, the new path component /usr/java/packages is added here.
411  // Eventually, all the library path setting will be done here.
412  {
413    // Get the user setting of LD_LIBRARY_PATH, and prepended it. It
414    // should always exist (until the legacy problem cited above is
415    // addressed).
416    const char *v = ::getenv("LD_LIBRARY_PATH");
417    const char *v_colon = ":";
418    if (v == NULL) { v = ""; v_colon = ""; }
419    // That's +1 for the colon and +1 for the trailing '\0'.
420    char *ld_library_path = (char *)NEW_C_HEAP_ARRAY(char,
421                                                     strlen(v) + 1 +
422                                                     sizeof(SYS_EXT_DIR) + sizeof("/lib/") + strlen(cpu_arch) + sizeof(DEFAULT_LIBPATH) + 1,
423                                                     mtInternal);
424    sprintf(ld_library_path, "%s%s" SYS_EXT_DIR "/lib/%s:" DEFAULT_LIBPATH, v, v_colon, cpu_arch);
425    Arguments::set_library_path(ld_library_path);
426    FREE_C_HEAP_ARRAY(char, ld_library_path, mtInternal);
427  }
428
429  // Extensions directories.
430  sprintf(buf, "%s" EXTENSIONS_DIR ":" SYS_EXT_DIR EXTENSIONS_DIR, Arguments::get_java_home());
431  Arguments::set_ext_dirs(buf);
432
433  // Endorsed standards default directory.
434  sprintf(buf, "%s" ENDORSED_DIR, Arguments::get_java_home());
435  Arguments::set_endorsed_dirs(buf);
436
437  FREE_C_HEAP_ARRAY(char, buf, mtInternal);
438
439#undef DEFAULT_LIBPATH
440#undef SYS_EXT_DIR
441#undef EXTENSIONS_DIR
442#undef ENDORSED_DIR
443}
444
445////////////////////////////////////////////////////////////////////////////////
446// breakpoint support
447
448void os::breakpoint() {
449  BREAKPOINT;
450}
451
452extern "C" void breakpoint() {
453  // use debugger to set breakpoint here
454}
455
456////////////////////////////////////////////////////////////////////////////////
457// signal support
458
459debug_only(static bool signal_sets_initialized = false);
460static sigset_t unblocked_sigs, vm_sigs, allowdebug_blocked_sigs;
461
462bool os::Linux::is_sig_ignored(int sig) {
463      struct sigaction oact;
464      sigaction(sig, (struct sigaction*)NULL, &oact);
465      void* ohlr = oact.sa_sigaction ? CAST_FROM_FN_PTR(void*,  oact.sa_sigaction)
466                                     : CAST_FROM_FN_PTR(void*,  oact.sa_handler);
467      if (ohlr == CAST_FROM_FN_PTR(void*, SIG_IGN))
468           return true;
469      else
470           return false;
471}
472
473void os::Linux::signal_sets_init() {
474  // Should also have an assertion stating we are still single-threaded.
475  assert(!signal_sets_initialized, "Already initialized");
476  // Fill in signals that are necessarily unblocked for all threads in
477  // the VM. Currently, we unblock the following signals:
478  // SHUTDOWN{1,2,3}_SIGNAL: for shutdown hooks support (unless over-ridden
479  //                         by -Xrs (=ReduceSignalUsage));
480  // BREAK_SIGNAL which is unblocked only by the VM thread and blocked by all
481  // other threads. The "ReduceSignalUsage" boolean tells us not to alter
482  // the dispositions or masks wrt these signals.
483  // Programs embedding the VM that want to use the above signals for their
484  // own purposes must, at this time, use the "-Xrs" option to prevent
485  // interference with shutdown hooks and BREAK_SIGNAL thread dumping.
486  // (See bug 4345157, and other related bugs).
487  // In reality, though, unblocking these signals is really a nop, since
488  // these signals are not blocked by default.
489  sigemptyset(&unblocked_sigs);
490  sigemptyset(&allowdebug_blocked_sigs);
491  sigaddset(&unblocked_sigs, SIGILL);
492  sigaddset(&unblocked_sigs, SIGSEGV);
493  sigaddset(&unblocked_sigs, SIGBUS);
494  sigaddset(&unblocked_sigs, SIGFPE);
495#if defined(PPC64)
496  sigaddset(&unblocked_sigs, SIGTRAP);
497#endif
498  sigaddset(&unblocked_sigs, SR_signum);
499
500  if (!ReduceSignalUsage) {
501   if (!os::Linux::is_sig_ignored(SHUTDOWN1_SIGNAL)) {
502      sigaddset(&unblocked_sigs, SHUTDOWN1_SIGNAL);
503      sigaddset(&allowdebug_blocked_sigs, SHUTDOWN1_SIGNAL);
504   }
505   if (!os::Linux::is_sig_ignored(SHUTDOWN2_SIGNAL)) {
506      sigaddset(&unblocked_sigs, SHUTDOWN2_SIGNAL);
507      sigaddset(&allowdebug_blocked_sigs, SHUTDOWN2_SIGNAL);
508   }
509   if (!os::Linux::is_sig_ignored(SHUTDOWN3_SIGNAL)) {
510      sigaddset(&unblocked_sigs, SHUTDOWN3_SIGNAL);
511      sigaddset(&allowdebug_blocked_sigs, SHUTDOWN3_SIGNAL);
512   }
513  }
514  // Fill in signals that are blocked by all but the VM thread.
515  sigemptyset(&vm_sigs);
516  if (!ReduceSignalUsage)
517    sigaddset(&vm_sigs, BREAK_SIGNAL);
518  debug_only(signal_sets_initialized = true);
519
520}
521
522// These are signals that are unblocked while a thread is running Java.
523// (For some reason, they get blocked by default.)
524sigset_t* os::Linux::unblocked_signals() {
525  assert(signal_sets_initialized, "Not initialized");
526  return &unblocked_sigs;
527}
528
529// These are the signals that are blocked while a (non-VM) thread is
530// running Java. Only the VM thread handles these signals.
531sigset_t* os::Linux::vm_signals() {
532  assert(signal_sets_initialized, "Not initialized");
533  return &vm_sigs;
534}
535
536// These are signals that are blocked during cond_wait to allow debugger in
537sigset_t* os::Linux::allowdebug_blocked_signals() {
538  assert(signal_sets_initialized, "Not initialized");
539  return &allowdebug_blocked_sigs;
540}
541
542void os::Linux::hotspot_sigmask(Thread* thread) {
543
544  //Save caller's signal mask before setting VM signal mask
545  sigset_t caller_sigmask;
546  pthread_sigmask(SIG_BLOCK, NULL, &caller_sigmask);
547
548  OSThread* osthread = thread->osthread();
549  osthread->set_caller_sigmask(caller_sigmask);
550
551  pthread_sigmask(SIG_UNBLOCK, os::Linux::unblocked_signals(), NULL);
552
553  if (!ReduceSignalUsage) {
554    if (thread->is_VM_thread()) {
555      // Only the VM thread handles BREAK_SIGNAL ...
556      pthread_sigmask(SIG_UNBLOCK, vm_signals(), NULL);
557    } else {
558      // ... all other threads block BREAK_SIGNAL
559      pthread_sigmask(SIG_BLOCK, vm_signals(), NULL);
560    }
561  }
562}
563
564//////////////////////////////////////////////////////////////////////////////
565// detecting pthread library
566
567void os::Linux::libpthread_init() {
568  // Save glibc and pthread version strings. Note that _CS_GNU_LIBC_VERSION
569  // and _CS_GNU_LIBPTHREAD_VERSION are supported in glibc >= 2.3.2. Use a
570  // generic name for earlier versions.
571  // Define macros here so we can build HotSpot on old systems.
572# ifndef _CS_GNU_LIBC_VERSION
573# define _CS_GNU_LIBC_VERSION 2
574# endif
575# ifndef _CS_GNU_LIBPTHREAD_VERSION
576# define _CS_GNU_LIBPTHREAD_VERSION 3
577# endif
578
579  size_t n = confstr(_CS_GNU_LIBC_VERSION, NULL, 0);
580  if (n > 0) {
581     char *str = (char *)malloc(n, mtInternal);
582     confstr(_CS_GNU_LIBC_VERSION, str, n);
583     os::Linux::set_glibc_version(str);
584  } else {
585     // _CS_GNU_LIBC_VERSION is not supported, try gnu_get_libc_version()
586     static char _gnu_libc_version[32];
587     jio_snprintf(_gnu_libc_version, sizeof(_gnu_libc_version),
588              "glibc %s %s", gnu_get_libc_version(), gnu_get_libc_release());
589     os::Linux::set_glibc_version(_gnu_libc_version);
590  }
591
592  n = confstr(_CS_GNU_LIBPTHREAD_VERSION, NULL, 0);
593  if (n > 0) {
594     char *str = (char *)malloc(n, mtInternal);
595     confstr(_CS_GNU_LIBPTHREAD_VERSION, str, n);
596     // Vanilla RH-9 (glibc 2.3.2) has a bug that confstr() always tells
597     // us "NPTL-0.29" even we are running with LinuxThreads. Check if this
598     // is the case. LinuxThreads has a hard limit on max number of threads.
599     // So sysconf(_SC_THREAD_THREADS_MAX) will return a positive value.
600     // On the other hand, NPTL does not have such a limit, sysconf()
601     // will return -1 and errno is not changed. Check if it is really NPTL.
602     if (strcmp(os::Linux::glibc_version(), "glibc 2.3.2") == 0 &&
603         strstr(str, "NPTL") &&
604         sysconf(_SC_THREAD_THREADS_MAX) > 0) {
605       free(str);
606       os::Linux::set_libpthread_version("linuxthreads");
607     } else {
608       os::Linux::set_libpthread_version(str);
609     }
610  } else {
611    // glibc before 2.3.2 only has LinuxThreads.
612    os::Linux::set_libpthread_version("linuxthreads");
613  }
614
615  if (strstr(libpthread_version(), "NPTL")) {
616     os::Linux::set_is_NPTL();
617  } else {
618     os::Linux::set_is_LinuxThreads();
619  }
620
621  // LinuxThreads have two flavors: floating-stack mode, which allows variable
622  // stack size; and fixed-stack mode. NPTL is always floating-stack.
623  if (os::Linux::is_NPTL() || os::Linux::supports_variable_stack_size()) {
624     os::Linux::set_is_floating_stack();
625  }
626}
627
628/////////////////////////////////////////////////////////////////////////////
629// thread stack
630
631// Force Linux kernel to expand current thread stack. If "bottom" is close
632// to the stack guard, caller should block all signals.
633//
634// MAP_GROWSDOWN:
635//   A special mmap() flag that is used to implement thread stacks. It tells
636//   kernel that the memory region should extend downwards when needed. This
637//   allows early versions of LinuxThreads to only mmap the first few pages
638//   when creating a new thread. Linux kernel will automatically expand thread
639//   stack as needed (on page faults).
640//
641//   However, because the memory region of a MAP_GROWSDOWN stack can grow on
642//   demand, if a page fault happens outside an already mapped MAP_GROWSDOWN
643//   region, it's hard to tell if the fault is due to a legitimate stack
644//   access or because of reading/writing non-exist memory (e.g. buffer
645//   overrun). As a rule, if the fault happens below current stack pointer,
646//   Linux kernel does not expand stack, instead a SIGSEGV is sent to the
647//   application (see Linux kernel fault.c).
648//
649//   This Linux feature can cause SIGSEGV when VM bangs thread stack for
650//   stack overflow detection.
651//
652//   Newer version of LinuxThreads (since glibc-2.2, or, RH-7.x) and NPTL do
653//   not use this flag. However, the stack of initial thread is not created
654//   by pthread, it is still MAP_GROWSDOWN. Also it's possible (though
655//   unlikely) that user code can create a thread with MAP_GROWSDOWN stack
656//   and then attach the thread to JVM.
657//
658// To get around the problem and allow stack banging on Linux, we need to
659// manually expand thread stack after receiving the SIGSEGV.
660//
661// There are two ways to expand thread stack to address "bottom", we used
662// both of them in JVM before 1.5:
663//   1. adjust stack pointer first so that it is below "bottom", and then
664//      touch "bottom"
665//   2. mmap() the page in question
666//
667// Now alternate signal stack is gone, it's harder to use 2. For instance,
668// if current sp is already near the lower end of page 101, and we need to
669// call mmap() to map page 100, it is possible that part of the mmap() frame
670// will be placed in page 100. When page 100 is mapped, it is zero-filled.
671// That will destroy the mmap() frame and cause VM to crash.
672//
673// The following code works by adjusting sp first, then accessing the "bottom"
674// page to force a page fault. Linux kernel will then automatically expand the
675// stack mapping.
676//
677// _expand_stack_to() assumes its frame size is less than page size, which
678// should always be true if the function is not inlined.
679
680#if __GNUC__ < 3    // gcc 2.x does not support noinline attribute
681#define NOINLINE
682#else
683#define NOINLINE __attribute__ ((noinline))
684#endif
685
686static void _expand_stack_to(address bottom) NOINLINE;
687
688static void _expand_stack_to(address bottom) {
689  address sp;
690  size_t size;
691  volatile char *p;
692
693  // Adjust bottom to point to the largest address within the same page, it
694  // gives us a one-page buffer if alloca() allocates slightly more memory.
695  bottom = (address)align_size_down((uintptr_t)bottom, os::Linux::page_size());
696  bottom += os::Linux::page_size() - 1;
697
698  // sp might be slightly above current stack pointer; if that's the case, we
699  // will alloca() a little more space than necessary, which is OK. Don't use
700  // os::current_stack_pointer(), as its result can be slightly below current
701  // stack pointer, causing us to not alloca enough to reach "bottom".
702  sp = (address)&sp;
703
704  if (sp > bottom) {
705    size = sp - bottom;
706    p = (volatile char *)alloca(size);
707    assert(p != NULL && p <= (volatile char *)bottom, "alloca problem?");
708    p[0] = '\0';
709  }
710}
711
712bool os::Linux::manually_expand_stack(JavaThread * t, address addr) {
713  assert(t!=NULL, "just checking");
714  assert(t->osthread()->expanding_stack(), "expand should be set");
715  assert(t->stack_base() != NULL, "stack_base was not initialized");
716
717  if (addr <  t->stack_base() && addr >= t->stack_yellow_zone_base()) {
718    sigset_t mask_all, old_sigset;
719    sigfillset(&mask_all);
720    pthread_sigmask(SIG_SETMASK, &mask_all, &old_sigset);
721    _expand_stack_to(addr);
722    pthread_sigmask(SIG_SETMASK, &old_sigset, NULL);
723    return true;
724  }
725  return false;
726}
727
728//////////////////////////////////////////////////////////////////////////////
729// create new thread
730
731static address highest_vm_reserved_address();
732
733// check if it's safe to start a new thread
734static bool _thread_safety_check(Thread* thread) {
735  if (os::Linux::is_LinuxThreads() && !os::Linux::is_floating_stack()) {
736    // Fixed stack LinuxThreads (SuSE Linux/x86, and some versions of Redhat)
737    //   Heap is mmap'ed at lower end of memory space. Thread stacks are
738    //   allocated (MAP_FIXED) from high address space. Every thread stack
739    //   occupies a fixed size slot (usually 2Mbytes, but user can change
740    //   it to other values if they rebuild LinuxThreads).
741    //
742    // Problem with MAP_FIXED is that mmap() can still succeed even part of
743    // the memory region has already been mmap'ed. That means if we have too
744    // many threads and/or very large heap, eventually thread stack will
745    // collide with heap.
746    //
747    // Here we try to prevent heap/stack collision by comparing current
748    // stack bottom with the highest address that has been mmap'ed by JVM
749    // plus a safety margin for memory maps created by native code.
750    //
751    // This feature can be disabled by setting ThreadSafetyMargin to 0
752    //
753    if (ThreadSafetyMargin > 0) {
754      address stack_bottom = os::current_stack_base() - os::current_stack_size();
755
756      // not safe if our stack extends below the safety margin
757      return stack_bottom - ThreadSafetyMargin >= highest_vm_reserved_address();
758    } else {
759      return true;
760    }
761  } else {
762    // Floating stack LinuxThreads or NPTL:
763    //   Unlike fixed stack LinuxThreads, thread stacks are not MAP_FIXED. When
764    //   there's not enough space left, pthread_create() will fail. If we come
765    //   here, that means enough space has been reserved for stack.
766    return true;
767  }
768}
769
770// Thread start routine for all newly created threads
771static void *java_start(Thread *thread) {
772  // Try to randomize the cache line index of hot stack frames.
773  // This helps when threads of the same stack traces evict each other's
774  // cache lines. The threads can be either from the same JVM instance, or
775  // from different JVM instances. The benefit is especially true for
776  // processors with hyperthreading technology.
777  static int counter = 0;
778  int pid = os::current_process_id();
779  alloca(((pid ^ counter++) & 7) * 128);
780
781  ThreadLocalStorage::set_thread(thread);
782
783  OSThread* osthread = thread->osthread();
784  Monitor* sync = osthread->startThread_lock();
785
786  // non floating stack LinuxThreads needs extra check, see above
787  if (!_thread_safety_check(thread)) {
788    // notify parent thread
789    MutexLockerEx ml(sync, Mutex::_no_safepoint_check_flag);
790    osthread->set_state(ZOMBIE);
791    sync->notify_all();
792    return NULL;
793  }
794
795  // thread_id is kernel thread id (similar to Solaris LWP id)
796  osthread->set_thread_id(os::Linux::gettid());
797
798  if (UseNUMA) {
799    int lgrp_id = os::numa_get_group_id();
800    if (lgrp_id != -1) {
801      thread->set_lgrp_id(lgrp_id);
802    }
803  }
804  // initialize signal mask for this thread
805  os::Linux::hotspot_sigmask(thread);
806
807  // initialize floating point control register
808  os::Linux::init_thread_fpu_state();
809
810  // handshaking with parent thread
811  {
812    MutexLockerEx ml(sync, Mutex::_no_safepoint_check_flag);
813
814    // notify parent thread
815    osthread->set_state(INITIALIZED);
816    sync->notify_all();
817
818    // wait until os::start_thread()
819    while (osthread->get_state() == INITIALIZED) {
820      sync->wait(Mutex::_no_safepoint_check_flag);
821    }
822  }
823
824  // call one more level start routine
825  thread->run();
826
827  return 0;
828}
829
830bool os::create_thread(Thread* thread, ThreadType thr_type, size_t stack_size) {
831  assert(thread->osthread() == NULL, "caller responsible");
832
833  // Allocate the OSThread object
834  OSThread* osthread = new OSThread(NULL, NULL);
835  if (osthread == NULL) {
836    return false;
837  }
838
839  // set the correct thread state
840  osthread->set_thread_type(thr_type);
841
842  // Initial state is ALLOCATED but not INITIALIZED
843  osthread->set_state(ALLOCATED);
844
845  thread->set_osthread(osthread);
846
847  // init thread attributes
848  pthread_attr_t attr;
849  pthread_attr_init(&attr);
850  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
851
852  // stack size
853  if (os::Linux::supports_variable_stack_size()) {
854    // calculate stack size if it's not specified by caller
855    if (stack_size == 0) {
856      stack_size = os::Linux::default_stack_size(thr_type);
857
858      switch (thr_type) {
859      case os::java_thread:
860        // Java threads use ThreadStackSize which default value can be
861        // changed with the flag -Xss
862        assert (JavaThread::stack_size_at_create() > 0, "this should be set");
863        stack_size = JavaThread::stack_size_at_create();
864        break;
865      case os::compiler_thread:
866        if (CompilerThreadStackSize > 0) {
867          stack_size = (size_t)(CompilerThreadStackSize * K);
868          break;
869        } // else fall through:
870          // use VMThreadStackSize if CompilerThreadStackSize is not defined
871      case os::vm_thread:
872      case os::pgc_thread:
873      case os::cgc_thread:
874      case os::watcher_thread:
875        if (VMThreadStackSize > 0) stack_size = (size_t)(VMThreadStackSize * K);
876        break;
877      }
878    }
879
880    stack_size = MAX2(stack_size, os::Linux::min_stack_allowed);
881    pthread_attr_setstacksize(&attr, stack_size);
882  } else {
883    // let pthread_create() pick the default value.
884  }
885
886  // glibc guard page
887  pthread_attr_setguardsize(&attr, os::Linux::default_guard_size(thr_type));
888
889  ThreadState state;
890
891  {
892    // Serialize thread creation if we are running with fixed stack LinuxThreads
893    bool lock = os::Linux::is_LinuxThreads() && !os::Linux::is_floating_stack();
894    if (lock) {
895      os::Linux::createThread_lock()->lock_without_safepoint_check();
896    }
897
898    pthread_t tid;
899    int ret = pthread_create(&tid, &attr, (void* (*)(void*)) java_start, thread);
900
901    pthread_attr_destroy(&attr);
902
903    if (ret != 0) {
904      if (PrintMiscellaneous && (Verbose || WizardMode)) {
905        perror("pthread_create()");
906      }
907      // Need to clean up stuff we've allocated so far
908      thread->set_osthread(NULL);
909      delete osthread;
910      if (lock) os::Linux::createThread_lock()->unlock();
911      return false;
912    }
913
914    // Store pthread info into the OSThread
915    osthread->set_pthread_id(tid);
916
917    // Wait until child thread is either initialized or aborted
918    {
919      Monitor* sync_with_child = osthread->startThread_lock();
920      MutexLockerEx ml(sync_with_child, Mutex::_no_safepoint_check_flag);
921      while ((state = osthread->get_state()) == ALLOCATED) {
922        sync_with_child->wait(Mutex::_no_safepoint_check_flag);
923      }
924    }
925
926    if (lock) {
927      os::Linux::createThread_lock()->unlock();
928    }
929  }
930
931  // Aborted due to thread limit being reached
932  if (state == ZOMBIE) {
933      thread->set_osthread(NULL);
934      delete osthread;
935      return false;
936  }
937
938  // The thread is returned suspended (in state INITIALIZED),
939  // and is started higher up in the call chain
940  assert(state == INITIALIZED, "race condition");
941  return true;
942}
943
944/////////////////////////////////////////////////////////////////////////////
945// attach existing thread
946
947// bootstrap the main thread
948bool os::create_main_thread(JavaThread* thread) {
949  assert(os::Linux::_main_thread == pthread_self(), "should be called inside main thread");
950  return create_attached_thread(thread);
951}
952
953bool os::create_attached_thread(JavaThread* thread) {
954#ifdef ASSERT
955    thread->verify_not_published();
956#endif
957
958  // Allocate the OSThread object
959  OSThread* osthread = new OSThread(NULL, NULL);
960
961  if (osthread == NULL) {
962    return false;
963  }
964
965  // Store pthread info into the OSThread
966  osthread->set_thread_id(os::Linux::gettid());
967  osthread->set_pthread_id(::pthread_self());
968
969  // initialize floating point control register
970  os::Linux::init_thread_fpu_state();
971
972  // Initial thread state is RUNNABLE
973  osthread->set_state(RUNNABLE);
974
975  thread->set_osthread(osthread);
976
977  if (UseNUMA) {
978    int lgrp_id = os::numa_get_group_id();
979    if (lgrp_id != -1) {
980      thread->set_lgrp_id(lgrp_id);
981    }
982  }
983
984  if (os::Linux::is_initial_thread()) {
985    // If current thread is initial thread, its stack is mapped on demand,
986    // see notes about MAP_GROWSDOWN. Here we try to force kernel to map
987    // the entire stack region to avoid SEGV in stack banging.
988    // It is also useful to get around the heap-stack-gap problem on SuSE
989    // kernel (see 4821821 for details). We first expand stack to the top
990    // of yellow zone, then enable stack yellow zone (order is significant,
991    // enabling yellow zone first will crash JVM on SuSE Linux), so there
992    // is no gap between the last two virtual memory regions.
993
994    JavaThread *jt = (JavaThread *)thread;
995    address addr = jt->stack_yellow_zone_base();
996    assert(addr != NULL, "initialization problem?");
997    assert(jt->stack_available(addr) > 0, "stack guard should not be enabled");
998
999    osthread->set_expanding_stack();
1000    os::Linux::manually_expand_stack(jt, addr);
1001    osthread->clear_expanding_stack();
1002  }
1003
1004  // initialize signal mask for this thread
1005  // and save the caller's signal mask
1006  os::Linux::hotspot_sigmask(thread);
1007
1008  return true;
1009}
1010
1011void os::pd_start_thread(Thread* thread) {
1012  OSThread * osthread = thread->osthread();
1013  assert(osthread->get_state() != INITIALIZED, "just checking");
1014  Monitor* sync_with_child = osthread->startThread_lock();
1015  MutexLockerEx ml(sync_with_child, Mutex::_no_safepoint_check_flag);
1016  sync_with_child->notify();
1017}
1018
1019// Free Linux resources related to the OSThread
1020void os::free_thread(OSThread* osthread) {
1021  assert(osthread != NULL, "osthread not set");
1022
1023  if (Thread::current()->osthread() == osthread) {
1024    // Restore caller's signal mask
1025    sigset_t sigmask = osthread->caller_sigmask();
1026    pthread_sigmask(SIG_SETMASK, &sigmask, NULL);
1027   }
1028
1029  delete osthread;
1030}
1031
1032//////////////////////////////////////////////////////////////////////////////
1033// thread local storage
1034
1035// Restore the thread pointer if the destructor is called. This is in case
1036// someone from JNI code sets up a destructor with pthread_key_create to run
1037// detachCurrentThread on thread death. Unless we restore the thread pointer we
1038// will hang or crash. When detachCurrentThread is called the key will be set
1039// to null and we will not be called again. If detachCurrentThread is never
1040// called we could loop forever depending on the pthread implementation.
1041static void restore_thread_pointer(void* p) {
1042  Thread* thread = (Thread*) p;
1043  os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread);
1044}
1045
1046int os::allocate_thread_local_storage() {
1047  pthread_key_t key;
1048  int rslt = pthread_key_create(&key, restore_thread_pointer);
1049  assert(rslt == 0, "cannot allocate thread local storage");
1050  return (int)key;
1051}
1052
1053// Note: This is currently not used by VM, as we don't destroy TLS key
1054// on VM exit.
1055void os::free_thread_local_storage(int index) {
1056  int rslt = pthread_key_delete((pthread_key_t)index);
1057  assert(rslt == 0, "invalid index");
1058}
1059
1060void os::thread_local_storage_at_put(int index, void* value) {
1061  int rslt = pthread_setspecific((pthread_key_t)index, value);
1062  assert(rslt == 0, "pthread_setspecific failed");
1063}
1064
1065extern "C" Thread* get_thread() {
1066  return ThreadLocalStorage::thread();
1067}
1068
1069//////////////////////////////////////////////////////////////////////////////
1070// initial thread
1071
1072// Check if current thread is the initial thread, similar to Solaris thr_main.
1073bool os::Linux::is_initial_thread(void) {
1074  char dummy;
1075  // If called before init complete, thread stack bottom will be null.
1076  // Can be called if fatal error occurs before initialization.
1077  if (initial_thread_stack_bottom() == NULL) return false;
1078  assert(initial_thread_stack_bottom() != NULL &&
1079         initial_thread_stack_size()   != 0,
1080         "os::init did not locate initial thread's stack region");
1081  if ((address)&dummy >= initial_thread_stack_bottom() &&
1082      (address)&dummy < initial_thread_stack_bottom() + initial_thread_stack_size())
1083       return true;
1084  else return false;
1085}
1086
1087// Find the virtual memory area that contains addr
1088static bool find_vma(address addr, address* vma_low, address* vma_high) {
1089  FILE *fp = fopen("/proc/self/maps", "r");
1090  if (fp) {
1091    address low, high;
1092    while (!feof(fp)) {
1093      if (fscanf(fp, "%p-%p", &low, &high) == 2) {
1094        if (low <= addr && addr < high) {
1095           if (vma_low)  *vma_low  = low;
1096           if (vma_high) *vma_high = high;
1097           fclose (fp);
1098           return true;
1099        }
1100      }
1101      for (;;) {
1102        int ch = fgetc(fp);
1103        if (ch == EOF || ch == (int)'\n') break;
1104      }
1105    }
1106    fclose(fp);
1107  }
1108  return false;
1109}
1110
1111// Locate initial thread stack. This special handling of initial thread stack
1112// is needed because pthread_getattr_np() on most (all?) Linux distros returns
1113// bogus value for initial thread.
1114void os::Linux::capture_initial_stack(size_t max_size) {
1115  // stack size is the easy part, get it from RLIMIT_STACK
1116  size_t stack_size;
1117  struct rlimit rlim;
1118  getrlimit(RLIMIT_STACK, &rlim);
1119  stack_size = rlim.rlim_cur;
1120
1121  // 6308388: a bug in ld.so will relocate its own .data section to the
1122  //   lower end of primordial stack; reduce ulimit -s value a little bit
1123  //   so we won't install guard page on ld.so's data section.
1124  stack_size -= 2 * page_size();
1125
1126  // 4441425: avoid crash with "unlimited" stack size on SuSE 7.1 or Redhat
1127  //   7.1, in both cases we will get 2G in return value.
1128  // 4466587: glibc 2.2.x compiled w/o "--enable-kernel=2.4.0" (RH 7.0,
1129  //   SuSE 7.2, Debian) can not handle alternate signal stack correctly
1130  //   for initial thread if its stack size exceeds 6M. Cap it at 2M,
1131  //   in case other parts in glibc still assumes 2M max stack size.
1132  // FIXME: alt signal stack is gone, maybe we can relax this constraint?
1133  // Problem still exists RH7.2 (IA64 anyway) but 2MB is a little small
1134  if (stack_size > 2 * K * K IA64_ONLY(*2))
1135      stack_size = 2 * K * K IA64_ONLY(*2);
1136  // Try to figure out where the stack base (top) is. This is harder.
1137  //
1138  // When an application is started, glibc saves the initial stack pointer in
1139  // a global variable "__libc_stack_end", which is then used by system
1140  // libraries. __libc_stack_end should be pretty close to stack top. The
1141  // variable is available since the very early days. However, because it is
1142  // a private interface, it could disappear in the future.
1143  //
1144  // Linux kernel saves start_stack information in /proc/<pid>/stat. Similar
1145  // to __libc_stack_end, it is very close to stack top, but isn't the real
1146  // stack top. Note that /proc may not exist if VM is running as a chroot
1147  // program, so reading /proc/<pid>/stat could fail. Also the contents of
1148  // /proc/<pid>/stat could change in the future (though unlikely).
1149  //
1150  // We try __libc_stack_end first. If that doesn't work, look for
1151  // /proc/<pid>/stat. If neither of them works, we use current stack pointer
1152  // as a hint, which should work well in most cases.
1153
1154  uintptr_t stack_start;
1155
1156  // try __libc_stack_end first
1157  uintptr_t *p = (uintptr_t *)dlsym(RTLD_DEFAULT, "__libc_stack_end");
1158  if (p && *p) {
1159    stack_start = *p;
1160  } else {
1161    // see if we can get the start_stack field from /proc/self/stat
1162    FILE *fp;
1163    int pid;
1164    char state;
1165    int ppid;
1166    int pgrp;
1167    int session;
1168    int nr;
1169    int tpgrp;
1170    unsigned long flags;
1171    unsigned long minflt;
1172    unsigned long cminflt;
1173    unsigned long majflt;
1174    unsigned long cmajflt;
1175    unsigned long utime;
1176    unsigned long stime;
1177    long cutime;
1178    long cstime;
1179    long prio;
1180    long nice;
1181    long junk;
1182    long it_real;
1183    uintptr_t start;
1184    uintptr_t vsize;
1185    intptr_t rss;
1186    uintptr_t rsslim;
1187    uintptr_t scodes;
1188    uintptr_t ecode;
1189    int i;
1190
1191    // Figure what the primordial thread stack base is. Code is inspired
1192    // by email from Hans Boehm. /proc/self/stat begins with current pid,
1193    // followed by command name surrounded by parentheses, state, etc.
1194    char stat[2048];
1195    int statlen;
1196
1197    fp = fopen("/proc/self/stat", "r");
1198    if (fp) {
1199      statlen = fread(stat, 1, 2047, fp);
1200      stat[statlen] = '\0';
1201      fclose(fp);
1202
1203      // Skip pid and the command string. Note that we could be dealing with
1204      // weird command names, e.g. user could decide to rename java launcher
1205      // to "java 1.4.2 :)", then the stat file would look like
1206      //                1234 (java 1.4.2 :)) R ... ...
1207      // We don't really need to know the command string, just find the last
1208      // occurrence of ")" and then start parsing from there. See bug 4726580.
1209      char * s = strrchr(stat, ')');
1210
1211      i = 0;
1212      if (s) {
1213        // Skip blank chars
1214        do s++; while (isspace(*s));
1215
1216#define _UFM UINTX_FORMAT
1217#define _DFM INTX_FORMAT
1218
1219        /*                                     1   1   1   1   1   1   1   1   1   1   2   2    2    2    2    2    2    2    2 */
1220        /*              3  4  5  6  7  8   9   0   1   2   3   4   5   6   7   8   9   0   1    2    3    4    5    6    7    8 */
1221        i = sscanf(s, "%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld " _UFM _UFM _DFM _UFM _UFM _UFM _UFM,
1222             &state,          /* 3  %c  */
1223             &ppid,           /* 4  %d  */
1224             &pgrp,           /* 5  %d  */
1225             &session,        /* 6  %d  */
1226             &nr,             /* 7  %d  */
1227             &tpgrp,          /* 8  %d  */
1228             &flags,          /* 9  %lu  */
1229             &minflt,         /* 10 %lu  */
1230             &cminflt,        /* 11 %lu  */
1231             &majflt,         /* 12 %lu  */
1232             &cmajflt,        /* 13 %lu  */
1233             &utime,          /* 14 %lu  */
1234             &stime,          /* 15 %lu  */
1235             &cutime,         /* 16 %ld  */
1236             &cstime,         /* 17 %ld  */
1237             &prio,           /* 18 %ld  */
1238             &nice,           /* 19 %ld  */
1239             &junk,           /* 20 %ld  */
1240             &it_real,        /* 21 %ld  */
1241             &start,          /* 22 UINTX_FORMAT */
1242             &vsize,          /* 23 UINTX_FORMAT */
1243             &rss,            /* 24 INTX_FORMAT  */
1244             &rsslim,         /* 25 UINTX_FORMAT */
1245             &scodes,         /* 26 UINTX_FORMAT */
1246             &ecode,          /* 27 UINTX_FORMAT */
1247             &stack_start);   /* 28 UINTX_FORMAT */
1248      }
1249
1250#undef _UFM
1251#undef _DFM
1252
1253      if (i != 28 - 2) {
1254         assert(false, "Bad conversion from /proc/self/stat");
1255         // product mode - assume we are the initial thread, good luck in the
1256         // embedded case.
1257         warning("Can't detect initial thread stack location - bad conversion");
1258         stack_start = (uintptr_t) &rlim;
1259      }
1260    } else {
1261      // For some reason we can't open /proc/self/stat (for example, running on
1262      // FreeBSD with a Linux emulator, or inside chroot), this should work for
1263      // most cases, so don't abort:
1264      warning("Can't detect initial thread stack location - no /proc/self/stat");
1265      stack_start = (uintptr_t) &rlim;
1266    }
1267  }
1268
1269  // Now we have a pointer (stack_start) very close to the stack top, the
1270  // next thing to do is to figure out the exact location of stack top. We
1271  // can find out the virtual memory area that contains stack_start by
1272  // reading /proc/self/maps, it should be the last vma in /proc/self/maps,
1273  // and its upper limit is the real stack top. (again, this would fail if
1274  // running inside chroot, because /proc may not exist.)
1275
1276  uintptr_t stack_top;
1277  address low, high;
1278  if (find_vma((address)stack_start, &low, &high)) {
1279    // success, "high" is the true stack top. (ignore "low", because initial
1280    // thread stack grows on demand, its real bottom is high - RLIMIT_STACK.)
1281    stack_top = (uintptr_t)high;
1282  } else {
1283    // failed, likely because /proc/self/maps does not exist
1284    warning("Can't detect initial thread stack location - find_vma failed");
1285    // best effort: stack_start is normally within a few pages below the real
1286    // stack top, use it as stack top, and reduce stack size so we won't put
1287    // guard page outside stack.
1288    stack_top = stack_start;
1289    stack_size -= 16 * page_size();
1290  }
1291
1292  // stack_top could be partially down the page so align it
1293  stack_top = align_size_up(stack_top, page_size());
1294
1295  if (max_size && stack_size > max_size) {
1296     _initial_thread_stack_size = max_size;
1297  } else {
1298     _initial_thread_stack_size = stack_size;
1299  }
1300
1301  _initial_thread_stack_size = align_size_down(_initial_thread_stack_size, page_size());
1302  _initial_thread_stack_bottom = (address)stack_top - _initial_thread_stack_size;
1303}
1304
1305////////////////////////////////////////////////////////////////////////////////
1306// time support
1307
1308// Time since start-up in seconds to a fine granularity.
1309// Used by VMSelfDestructTimer and the MemProfiler.
1310double os::elapsedTime() {
1311
1312  return ((double)os::elapsed_counter()) / os::elapsed_frequency(); // nanosecond resolution
1313}
1314
1315jlong os::elapsed_counter() {
1316  return javaTimeNanos() - initial_time_count;
1317}
1318
1319jlong os::elapsed_frequency() {
1320  return NANOSECS_PER_SEC; // nanosecond resolution
1321}
1322
1323bool os::supports_vtime() { return true; }
1324bool os::enable_vtime()   { return false; }
1325bool os::vtime_enabled()  { return false; }
1326
1327double os::elapsedVTime() {
1328  struct rusage usage;
1329  int retval = getrusage(RUSAGE_THREAD, &usage);
1330  if (retval == 0) {
1331    return (double) (usage.ru_utime.tv_sec + usage.ru_stime.tv_sec) + (double) (usage.ru_utime.tv_usec + usage.ru_stime.tv_usec) / (1000 * 1000);
1332  } else {
1333    // better than nothing, but not much
1334    return elapsedTime();
1335  }
1336}
1337
1338jlong os::javaTimeMillis() {
1339  timeval time;
1340  int status = gettimeofday(&time, NULL);
1341  assert(status != -1, "linux error");
1342  return jlong(time.tv_sec) * 1000  +  jlong(time.tv_usec / 1000);
1343}
1344
1345#ifndef CLOCK_MONOTONIC
1346#define CLOCK_MONOTONIC (1)
1347#endif
1348
1349void os::Linux::clock_init() {
1350  // we do dlopen's in this particular order due to bug in linux
1351  // dynamical loader (see 6348968) leading to crash on exit
1352  void* handle = dlopen("librt.so.1", RTLD_LAZY);
1353  if (handle == NULL) {
1354    handle = dlopen("librt.so", RTLD_LAZY);
1355  }
1356
1357  if (handle) {
1358    int (*clock_getres_func)(clockid_t, struct timespec*) =
1359           (int(*)(clockid_t, struct timespec*))dlsym(handle, "clock_getres");
1360    int (*clock_gettime_func)(clockid_t, struct timespec*) =
1361           (int(*)(clockid_t, struct timespec*))dlsym(handle, "clock_gettime");
1362    if (clock_getres_func && clock_gettime_func) {
1363      // See if monotonic clock is supported by the kernel. Note that some
1364      // early implementations simply return kernel jiffies (updated every
1365      // 1/100 or 1/1000 second). It would be bad to use such a low res clock
1366      // for nano time (though the monotonic property is still nice to have).
1367      // It's fixed in newer kernels, however clock_getres() still returns
1368      // 1/HZ. We check if clock_getres() works, but will ignore its reported
1369      // resolution for now. Hopefully as people move to new kernels, this
1370      // won't be a problem.
1371      struct timespec res;
1372      struct timespec tp;
1373      if (clock_getres_func (CLOCK_MONOTONIC, &res) == 0 &&
1374          clock_gettime_func(CLOCK_MONOTONIC, &tp)  == 0) {
1375        // yes, monotonic clock is supported
1376        _clock_gettime = clock_gettime_func;
1377        return;
1378      } else {
1379        // close librt if there is no monotonic clock
1380        dlclose(handle);
1381      }
1382    }
1383  }
1384  warning("No monotonic clock was available - timed services may " \
1385          "be adversely affected if the time-of-day clock changes");
1386}
1387
1388#ifndef SYS_clock_getres
1389
1390#if defined(IA32) || defined(AMD64)
1391#define SYS_clock_getres IA32_ONLY(266)  AMD64_ONLY(229)
1392#define sys_clock_getres(x,y)  ::syscall(SYS_clock_getres, x, y)
1393#else
1394#warning "SYS_clock_getres not defined for this platform, disabling fast_thread_cpu_time"
1395#define sys_clock_getres(x,y)  -1
1396#endif
1397
1398#else
1399#define sys_clock_getres(x,y)  ::syscall(SYS_clock_getres, x, y)
1400#endif
1401
1402void os::Linux::fast_thread_clock_init() {
1403  if (!UseLinuxPosixThreadCPUClocks) {
1404    return;
1405  }
1406  clockid_t clockid;
1407  struct timespec tp;
1408  int (*pthread_getcpuclockid_func)(pthread_t, clockid_t *) =
1409      (int(*)(pthread_t, clockid_t *)) dlsym(RTLD_DEFAULT, "pthread_getcpuclockid");
1410
1411  // Switch to using fast clocks for thread cpu time if
1412  // the sys_clock_getres() returns 0 error code.
1413  // Note, that some kernels may support the current thread
1414  // clock (CLOCK_THREAD_CPUTIME_ID) but not the clocks
1415  // returned by the pthread_getcpuclockid().
1416  // If the fast Posix clocks are supported then the sys_clock_getres()
1417  // must return at least tp.tv_sec == 0 which means a resolution
1418  // better than 1 sec. This is extra check for reliability.
1419
1420  if(pthread_getcpuclockid_func &&
1421     pthread_getcpuclockid_func(_main_thread, &clockid) == 0 &&
1422     sys_clock_getres(clockid, &tp) == 0 && tp.tv_sec == 0) {
1423
1424    _supports_fast_thread_cpu_time = true;
1425    _pthread_getcpuclockid = pthread_getcpuclockid_func;
1426  }
1427}
1428
1429jlong os::javaTimeNanos() {
1430  if (os::supports_monotonic_clock()) {
1431    struct timespec tp;
1432    int status = Linux::clock_gettime(CLOCK_MONOTONIC, &tp);
1433    assert(status == 0, "gettime error");
1434    jlong result = jlong(tp.tv_sec) * (1000 * 1000 * 1000) + jlong(tp.tv_nsec);
1435    return result;
1436  } else {
1437    timeval time;
1438    int status = gettimeofday(&time, NULL);
1439    assert(status != -1, "linux error");
1440    jlong usecs = jlong(time.tv_sec) * (1000 * 1000) + jlong(time.tv_usec);
1441    return 1000 * usecs;
1442  }
1443}
1444
1445void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) {
1446  if (os::supports_monotonic_clock()) {
1447    info_ptr->max_value = ALL_64_BITS;
1448
1449    // CLOCK_MONOTONIC - amount of time since some arbitrary point in the past
1450    info_ptr->may_skip_backward = false;      // not subject to resetting or drifting
1451    info_ptr->may_skip_forward = false;       // not subject to resetting or drifting
1452  } else {
1453    // gettimeofday - based on time in seconds since the Epoch thus does not wrap
1454    info_ptr->max_value = ALL_64_BITS;
1455
1456    // gettimeofday is a real time clock so it skips
1457    info_ptr->may_skip_backward = true;
1458    info_ptr->may_skip_forward = true;
1459  }
1460
1461  info_ptr->kind = JVMTI_TIMER_ELAPSED;                // elapsed not CPU time
1462}
1463
1464// Return the real, user, and system times in seconds from an
1465// arbitrary fixed point in the past.
1466bool os::getTimesSecs(double* process_real_time,
1467                      double* process_user_time,
1468                      double* process_system_time) {
1469  struct tms ticks;
1470  clock_t real_ticks = times(&ticks);
1471
1472  if (real_ticks == (clock_t) (-1)) {
1473    return false;
1474  } else {
1475    double ticks_per_second = (double) clock_tics_per_sec;
1476    *process_user_time = ((double) ticks.tms_utime) / ticks_per_second;
1477    *process_system_time = ((double) ticks.tms_stime) / ticks_per_second;
1478    *process_real_time = ((double) real_ticks) / ticks_per_second;
1479
1480    return true;
1481  }
1482}
1483
1484
1485char * os::local_time_string(char *buf, size_t buflen) {
1486  struct tm t;
1487  time_t long_time;
1488  time(&long_time);
1489  localtime_r(&long_time, &t);
1490  jio_snprintf(buf, buflen, "%d-%02d-%02d %02d:%02d:%02d",
1491               t.tm_year + 1900, t.tm_mon + 1, t.tm_mday,
1492               t.tm_hour, t.tm_min, t.tm_sec);
1493  return buf;
1494}
1495
1496struct tm* os::localtime_pd(const time_t* clock, struct tm*  res) {
1497  return localtime_r(clock, res);
1498}
1499
1500////////////////////////////////////////////////////////////////////////////////
1501// runtime exit support
1502
1503// Note: os::shutdown() might be called very early during initialization, or
1504// called from signal handler. Before adding something to os::shutdown(), make
1505// sure it is async-safe and can handle partially initialized VM.
1506void os::shutdown() {
1507
1508  // allow PerfMemory to attempt cleanup of any persistent resources
1509  perfMemory_exit();
1510
1511  // needs to remove object in file system
1512  AttachListener::abort();
1513
1514  // flush buffered output, finish log files
1515  ostream_abort();
1516
1517  // Check for abort hook
1518  abort_hook_t abort_hook = Arguments::abort_hook();
1519  if (abort_hook != NULL) {
1520    abort_hook();
1521  }
1522
1523}
1524
1525// Note: os::abort() might be called very early during initialization, or
1526// called from signal handler. Before adding something to os::abort(), make
1527// sure it is async-safe and can handle partially initialized VM.
1528void os::abort(bool dump_core) {
1529  os::shutdown();
1530  if (dump_core) {
1531#ifndef PRODUCT
1532    fdStream out(defaultStream::output_fd());
1533    out.print_raw("Current thread is ");
1534    char buf[16];
1535    jio_snprintf(buf, sizeof(buf), UINTX_FORMAT, os::current_thread_id());
1536    out.print_raw_cr(buf);
1537    out.print_raw_cr("Dumping core ...");
1538#endif
1539    ::abort(); // dump core
1540  }
1541
1542  ::exit(1);
1543}
1544
1545// Die immediately, no exit hook, no abort hook, no cleanup.
1546void os::die() {
1547  // _exit() on LinuxThreads only kills current thread
1548  ::abort();
1549}
1550
1551// unused on linux for now.
1552void os::set_error_file(const char *logfile) {}
1553
1554
1555// This method is a copy of JDK's sysGetLastErrorString
1556// from src/solaris/hpi/src/system_md.c
1557
1558size_t os::lasterror(char *buf, size_t len) {
1559
1560  if (errno == 0)  return 0;
1561
1562  const char *s = ::strerror(errno);
1563  size_t n = ::strlen(s);
1564  if (n >= len) {
1565    n = len - 1;
1566  }
1567  ::strncpy(buf, s, n);
1568  buf[n] = '\0';
1569  return n;
1570}
1571
1572intx os::current_thread_id() { return (intx)pthread_self(); }
1573int os::current_process_id() {
1574
1575  // Under the old linux thread library, linux gives each thread
1576  // its own process id. Because of this each thread will return
1577  // a different pid if this method were to return the result
1578  // of getpid(2). Linux provides no api that returns the pid
1579  // of the launcher thread for the vm. This implementation
1580  // returns a unique pid, the pid of the launcher thread
1581  // that starts the vm 'process'.
1582
1583  // Under the NPTL, getpid() returns the same pid as the
1584  // launcher thread rather than a unique pid per thread.
1585  // Use gettid() if you want the old pre NPTL behaviour.
1586
1587  // if you are looking for the result of a call to getpid() that
1588  // returns a unique pid for the calling thread, then look at the
1589  // OSThread::thread_id() method in osThread_linux.hpp file
1590
1591  return (int)(_initial_pid ? _initial_pid : getpid());
1592}
1593
1594// DLL functions
1595
1596const char* os::dll_file_extension() { return ".so"; }
1597
1598// This must be hard coded because it's the system's temporary
1599// directory not the java application's temp directory, ala java.io.tmpdir.
1600const char* os::get_temp_directory() { return "/tmp"; }
1601
1602static bool file_exists(const char* filename) {
1603  struct stat statbuf;
1604  if (filename == NULL || strlen(filename) == 0) {
1605    return false;
1606  }
1607  return os::stat(filename, &statbuf) == 0;
1608}
1609
1610bool os::dll_build_name(char* buffer, size_t buflen,
1611                        const char* pname, const char* fname) {
1612  bool retval = false;
1613  // Copied from libhpi
1614  const size_t pnamelen = pname ? strlen(pname) : 0;
1615
1616  // Return error on buffer overflow.
1617  if (pnamelen + strlen(fname) + 10 > (size_t) buflen) {
1618    return retval;
1619  }
1620
1621  if (pnamelen == 0) {
1622    snprintf(buffer, buflen, "lib%s.so", fname);
1623    retval = true;
1624  } else if (strchr(pname, *os::path_separator()) != NULL) {
1625    int n;
1626    char** pelements = split_path(pname, &n);
1627    if (pelements == NULL) {
1628      return false;
1629    }
1630    for (int i = 0 ; i < n ; i++) {
1631      // Really shouldn't be NULL, but check can't hurt
1632      if (pelements[i] == NULL || strlen(pelements[i]) == 0) {
1633        continue; // skip the empty path values
1634      }
1635      snprintf(buffer, buflen, "%s/lib%s.so", pelements[i], fname);
1636      if (file_exists(buffer)) {
1637        retval = true;
1638        break;
1639      }
1640    }
1641    // release the storage
1642    for (int i = 0 ; i < n ; i++) {
1643      if (pelements[i] != NULL) {
1644        FREE_C_HEAP_ARRAY(char, pelements[i], mtInternal);
1645      }
1646    }
1647    if (pelements != NULL) {
1648      FREE_C_HEAP_ARRAY(char*, pelements, mtInternal);
1649    }
1650  } else {
1651    snprintf(buffer, buflen, "%s/lib%s.so", pname, fname);
1652    retval = true;
1653  }
1654  return retval;
1655}
1656
1657// check if addr is inside libjvm.so
1658bool os::address_is_in_vm(address addr) {
1659  static address libjvm_base_addr;
1660  Dl_info dlinfo;
1661
1662  if (libjvm_base_addr == NULL) {
1663    if (dladdr(CAST_FROM_FN_PTR(void *, os::address_is_in_vm), &dlinfo) != 0) {
1664      libjvm_base_addr = (address)dlinfo.dli_fbase;
1665    }
1666    assert(libjvm_base_addr !=NULL, "Cannot obtain base address for libjvm");
1667  }
1668
1669  if (dladdr((void *)addr, &dlinfo) != 0) {
1670    if (libjvm_base_addr == (address)dlinfo.dli_fbase) return true;
1671  }
1672
1673  return false;
1674}
1675
1676bool os::dll_address_to_function_name(address addr, char *buf,
1677                                      int buflen, int *offset) {
1678  // buf is not optional, but offset is optional
1679  assert(buf != NULL, "sanity check");
1680
1681  Dl_info dlinfo;
1682
1683  if (dladdr((void*)addr, &dlinfo) != 0) {
1684    // see if we have a matching symbol
1685    if (dlinfo.dli_saddr != NULL && dlinfo.dli_sname != NULL) {
1686      if (!Decoder::demangle(dlinfo.dli_sname, buf, buflen)) {
1687        jio_snprintf(buf, buflen, "%s", dlinfo.dli_sname);
1688      }
1689      if (offset != NULL) *offset = addr - (address)dlinfo.dli_saddr;
1690      return true;
1691    }
1692    // no matching symbol so try for just file info
1693    if (dlinfo.dli_fname != NULL && dlinfo.dli_fbase != NULL) {
1694      if (Decoder::decode((address)(addr - (address)dlinfo.dli_fbase),
1695                          buf, buflen, offset, dlinfo.dli_fname)) {
1696        return true;
1697      }
1698    }
1699  }
1700
1701  buf[0] = '\0';
1702  if (offset != NULL) *offset = -1;
1703  return false;
1704}
1705
1706struct _address_to_library_name {
1707  address addr;          // input : memory address
1708  size_t  buflen;        //         size of fname
1709  char*   fname;         // output: library name
1710  address base;          //         library base addr
1711};
1712
1713static int address_to_library_name_callback(struct dl_phdr_info *info,
1714                                            size_t size, void *data) {
1715  int i;
1716  bool found = false;
1717  address libbase = NULL;
1718  struct _address_to_library_name * d = (struct _address_to_library_name *)data;
1719
1720  // iterate through all loadable segments
1721  for (i = 0; i < info->dlpi_phnum; i++) {
1722    address segbase = (address)(info->dlpi_addr + info->dlpi_phdr[i].p_vaddr);
1723    if (info->dlpi_phdr[i].p_type == PT_LOAD) {
1724      // base address of a library is the lowest address of its loaded
1725      // segments.
1726      if (libbase == NULL || libbase > segbase) {
1727        libbase = segbase;
1728      }
1729      // see if 'addr' is within current segment
1730      if (segbase <= d->addr &&
1731          d->addr < segbase + info->dlpi_phdr[i].p_memsz) {
1732        found = true;
1733      }
1734    }
1735  }
1736
1737  // dlpi_name is NULL or empty if the ELF file is executable, return 0
1738  // so dll_address_to_library_name() can fall through to use dladdr() which
1739  // can figure out executable name from argv[0].
1740  if (found && info->dlpi_name && info->dlpi_name[0]) {
1741    d->base = libbase;
1742    if (d->fname) {
1743      jio_snprintf(d->fname, d->buflen, "%s", info->dlpi_name);
1744    }
1745    return 1;
1746  }
1747  return 0;
1748}
1749
1750bool os::dll_address_to_library_name(address addr, char* buf,
1751                                     int buflen, int* offset) {
1752  // buf is not optional, but offset is optional
1753  assert(buf != NULL, "sanity check");
1754
1755  Dl_info dlinfo;
1756  struct _address_to_library_name data;
1757
1758  // There is a bug in old glibc dladdr() implementation that it could resolve
1759  // to wrong library name if the .so file has a base address != NULL. Here
1760  // we iterate through the program headers of all loaded libraries to find
1761  // out which library 'addr' really belongs to. This workaround can be
1762  // removed once the minimum requirement for glibc is moved to 2.3.x.
1763  data.addr = addr;
1764  data.fname = buf;
1765  data.buflen = buflen;
1766  data.base = NULL;
1767  int rslt = dl_iterate_phdr(address_to_library_name_callback, (void *)&data);
1768
1769  if (rslt) {
1770     // buf already contains library name
1771     if (offset) *offset = addr - data.base;
1772     return true;
1773  }
1774  if (dladdr((void*)addr, &dlinfo) != 0) {
1775    if (dlinfo.dli_fname != NULL) {
1776      jio_snprintf(buf, buflen, "%s", dlinfo.dli_fname);
1777    }
1778    if (dlinfo.dli_fbase != NULL && offset != NULL) {
1779      *offset = addr - (address)dlinfo.dli_fbase;
1780    }
1781    return true;
1782  }
1783
1784  buf[0] = '\0';
1785  if (offset) *offset = -1;
1786  return false;
1787}
1788
1789  // Loads .dll/.so and
1790  // in case of error it checks if .dll/.so was built for the
1791  // same architecture as Hotspot is running on
1792
1793
1794// Remember the stack's state. The Linux dynamic linker will change
1795// the stack to 'executable' at most once, so we must safepoint only once.
1796bool os::Linux::_stack_is_executable = false;
1797
1798// VM operation that loads a library.  This is necessary if stack protection
1799// of the Java stacks can be lost during loading the library.  If we
1800// do not stop the Java threads, they can stack overflow before the stacks
1801// are protected again.
1802class VM_LinuxDllLoad: public VM_Operation {
1803 private:
1804  const char *_filename;
1805  char *_ebuf;
1806  int _ebuflen;
1807  void *_lib;
1808 public:
1809  VM_LinuxDllLoad(const char *fn, char *ebuf, int ebuflen) :
1810    _filename(fn), _ebuf(ebuf), _ebuflen(ebuflen), _lib(NULL) {}
1811  VMOp_Type type() const { return VMOp_LinuxDllLoad; }
1812  void doit() {
1813    _lib = os::Linux::dll_load_in_vmthread(_filename, _ebuf, _ebuflen);
1814    os::Linux::_stack_is_executable = true;
1815  }
1816  void* loaded_library() { return _lib; }
1817};
1818
1819void * os::dll_load(const char *filename, char *ebuf, int ebuflen)
1820{
1821  void * result = NULL;
1822  bool load_attempted = false;
1823
1824  // Check whether the library to load might change execution rights
1825  // of the stack. If they are changed, the protection of the stack
1826  // guard pages will be lost. We need a safepoint to fix this.
1827  //
1828  // See Linux man page execstack(8) for more info.
1829  if (os::uses_stack_guard_pages() && !os::Linux::_stack_is_executable) {
1830    ElfFile ef(filename);
1831    if (!ef.specifies_noexecstack()) {
1832      if (!is_init_completed()) {
1833        os::Linux::_stack_is_executable = true;
1834        // This is OK - No Java threads have been created yet, and hence no
1835        // stack guard pages to fix.
1836        //
1837        // This should happen only when you are building JDK7 using a very
1838        // old version of JDK6 (e.g., with JPRT) and running test_gamma.
1839        //
1840        // Dynamic loader will make all stacks executable after
1841        // this function returns, and will not do that again.
1842        assert(Threads::first() == NULL, "no Java threads should exist yet.");
1843      } else {
1844        warning("You have loaded library %s which might have disabled stack guard. "
1845                "The VM will try to fix the stack guard now.\n"
1846                "It's highly recommended that you fix the library with "
1847                "'execstack -c <libfile>', or link it with '-z noexecstack'.",
1848                filename);
1849
1850        assert(Thread::current()->is_Java_thread(), "must be Java thread");
1851        JavaThread *jt = JavaThread::current();
1852        if (jt->thread_state() != _thread_in_native) {
1853          // This happens when a compiler thread tries to load a hsdis-<arch>.so file
1854          // that requires ExecStack. Cannot enter safe point. Let's give up.
1855          warning("Unable to fix stack guard. Giving up.");
1856        } else {
1857          if (!LoadExecStackDllInVMThread) {
1858            // This is for the case where the DLL has an static
1859            // constructor function that executes JNI code. We cannot
1860            // load such DLLs in the VMThread.
1861            result = os::Linux::dlopen_helper(filename, ebuf, ebuflen);
1862          }
1863
1864          ThreadInVMfromNative tiv(jt);
1865          debug_only(VMNativeEntryWrapper vew;)
1866
1867          VM_LinuxDllLoad op(filename, ebuf, ebuflen);
1868          VMThread::execute(&op);
1869          if (LoadExecStackDllInVMThread) {
1870            result = op.loaded_library();
1871          }
1872          load_attempted = true;
1873        }
1874      }
1875    }
1876  }
1877
1878  if (!load_attempted) {
1879    result = os::Linux::dlopen_helper(filename, ebuf, ebuflen);
1880  }
1881
1882  if (result != NULL) {
1883    // Successful loading
1884    return result;
1885  }
1886
1887  Elf32_Ehdr elf_head;
1888  int diag_msg_max_length=ebuflen-strlen(ebuf);
1889  char* diag_msg_buf=ebuf+strlen(ebuf);
1890
1891  if (diag_msg_max_length==0) {
1892    // No more space in ebuf for additional diagnostics message
1893    return NULL;
1894  }
1895
1896
1897  int file_descriptor= ::open(filename, O_RDONLY | O_NONBLOCK);
1898
1899  if (file_descriptor < 0) {
1900    // Can't open library, report dlerror() message
1901    return NULL;
1902  }
1903
1904  bool failed_to_read_elf_head=
1905    (sizeof(elf_head)!=
1906        (::read(file_descriptor, &elf_head,sizeof(elf_head)))) ;
1907
1908  ::close(file_descriptor);
1909  if (failed_to_read_elf_head) {
1910    // file i/o error - report dlerror() msg
1911    return NULL;
1912  }
1913
1914  typedef struct {
1915    Elf32_Half  code;         // Actual value as defined in elf.h
1916    Elf32_Half  compat_class; // Compatibility of archs at VM's sense
1917    char        elf_class;    // 32 or 64 bit
1918    char        endianess;    // MSB or LSB
1919    char*       name;         // String representation
1920  } arch_t;
1921
1922  #ifndef EM_486
1923  #define EM_486          6               /* Intel 80486 */
1924  #endif
1925
1926  static const arch_t arch_array[]={
1927    {EM_386,         EM_386,     ELFCLASS32, ELFDATA2LSB, (char*)"IA 32"},
1928    {EM_486,         EM_386,     ELFCLASS32, ELFDATA2LSB, (char*)"IA 32"},
1929    {EM_IA_64,       EM_IA_64,   ELFCLASS64, ELFDATA2LSB, (char*)"IA 64"},
1930    {EM_X86_64,      EM_X86_64,  ELFCLASS64, ELFDATA2LSB, (char*)"AMD 64"},
1931    {EM_SPARC,       EM_SPARC,   ELFCLASS32, ELFDATA2MSB, (char*)"Sparc 32"},
1932    {EM_SPARC32PLUS, EM_SPARC,   ELFCLASS32, ELFDATA2MSB, (char*)"Sparc 32"},
1933    {EM_SPARCV9,     EM_SPARCV9, ELFCLASS64, ELFDATA2MSB, (char*)"Sparc v9 64"},
1934    {EM_PPC,         EM_PPC,     ELFCLASS32, ELFDATA2MSB, (char*)"Power PC 32"},
1935    {EM_PPC64,       EM_PPC64,   ELFCLASS64, ELFDATA2MSB, (char*)"Power PC 64"},
1936    {EM_ARM,         EM_ARM,     ELFCLASS32,   ELFDATA2LSB, (char*)"ARM"},
1937    {EM_S390,        EM_S390,    ELFCLASSNONE, ELFDATA2MSB, (char*)"IBM System/390"},
1938    {EM_ALPHA,       EM_ALPHA,   ELFCLASS64, ELFDATA2LSB, (char*)"Alpha"},
1939    {EM_MIPS_RS3_LE, EM_MIPS_RS3_LE, ELFCLASS32, ELFDATA2LSB, (char*)"MIPSel"},
1940    {EM_MIPS,        EM_MIPS,    ELFCLASS32, ELFDATA2MSB, (char*)"MIPS"},
1941    {EM_PARISC,      EM_PARISC,  ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"},
1942    {EM_68K,         EM_68K,     ELFCLASS32, ELFDATA2MSB, (char*)"M68k"}
1943  };
1944
1945  #if  (defined IA32)
1946    static  Elf32_Half running_arch_code=EM_386;
1947  #elif   (defined AMD64)
1948    static  Elf32_Half running_arch_code=EM_X86_64;
1949  #elif  (defined IA64)
1950    static  Elf32_Half running_arch_code=EM_IA_64;
1951  #elif  (defined __sparc) && (defined _LP64)
1952    static  Elf32_Half running_arch_code=EM_SPARCV9;
1953  #elif  (defined __sparc) && (!defined _LP64)
1954    static  Elf32_Half running_arch_code=EM_SPARC;
1955  #elif  (defined __powerpc64__)
1956    static  Elf32_Half running_arch_code=EM_PPC64;
1957  #elif  (defined __powerpc__)
1958    static  Elf32_Half running_arch_code=EM_PPC;
1959  #elif  (defined ARM)
1960    static  Elf32_Half running_arch_code=EM_ARM;
1961  #elif  (defined S390)
1962    static  Elf32_Half running_arch_code=EM_S390;
1963  #elif  (defined ALPHA)
1964    static  Elf32_Half running_arch_code=EM_ALPHA;
1965  #elif  (defined MIPSEL)
1966    static  Elf32_Half running_arch_code=EM_MIPS_RS3_LE;
1967  #elif  (defined PARISC)
1968    static  Elf32_Half running_arch_code=EM_PARISC;
1969  #elif  (defined MIPS)
1970    static  Elf32_Half running_arch_code=EM_MIPS;
1971  #elif  (defined M68K)
1972    static  Elf32_Half running_arch_code=EM_68K;
1973  #else
1974    #error Method os::dll_load requires that one of following is defined:\
1975         IA32, AMD64, IA64, __sparc, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, PARISC, M68K
1976  #endif
1977
1978  // Identify compatability class for VM's architecture and library's architecture
1979  // Obtain string descriptions for architectures
1980
1981  arch_t lib_arch={elf_head.e_machine,0,elf_head.e_ident[EI_CLASS], elf_head.e_ident[EI_DATA], NULL};
1982  int running_arch_index=-1;
1983
1984  for (unsigned int i=0 ; i < ARRAY_SIZE(arch_array) ; i++ ) {
1985    if (running_arch_code == arch_array[i].code) {
1986      running_arch_index    = i;
1987    }
1988    if (lib_arch.code == arch_array[i].code) {
1989      lib_arch.compat_class = arch_array[i].compat_class;
1990      lib_arch.name         = arch_array[i].name;
1991    }
1992  }
1993
1994  assert(running_arch_index != -1,
1995    "Didn't find running architecture code (running_arch_code) in arch_array");
1996  if (running_arch_index == -1) {
1997    // Even though running architecture detection failed
1998    // we may still continue with reporting dlerror() message
1999    return NULL;
2000  }
2001
2002  if (lib_arch.endianess != arch_array[running_arch_index].endianess) {
2003    ::snprintf(diag_msg_buf, diag_msg_max_length-1," (Possible cause: endianness mismatch)");
2004    return NULL;
2005  }
2006
2007#ifndef S390
2008  if (lib_arch.elf_class != arch_array[running_arch_index].elf_class) {
2009    ::snprintf(diag_msg_buf, diag_msg_max_length-1," (Possible cause: architecture word width mismatch)");
2010    return NULL;
2011  }
2012#endif // !S390
2013
2014  if (lib_arch.compat_class != arch_array[running_arch_index].compat_class) {
2015    if ( lib_arch.name!=NULL ) {
2016      ::snprintf(diag_msg_buf, diag_msg_max_length-1,
2017        " (Possible cause: can't load %s-bit .so on a %s-bit platform)",
2018        lib_arch.name, arch_array[running_arch_index].name);
2019    } else {
2020      ::snprintf(diag_msg_buf, diag_msg_max_length-1,
2021      " (Possible cause: can't load this .so (machine code=0x%x) on a %s-bit platform)",
2022        lib_arch.code,
2023        arch_array[running_arch_index].name);
2024    }
2025  }
2026
2027  return NULL;
2028}
2029
2030void * os::Linux::dlopen_helper(const char *filename, char *ebuf, int ebuflen) {
2031  void * result = ::dlopen(filename, RTLD_LAZY);
2032  if (result == NULL) {
2033    ::strncpy(ebuf, ::dlerror(), ebuflen - 1);
2034    ebuf[ebuflen-1] = '\0';
2035  }
2036  return result;
2037}
2038
2039void * os::Linux::dll_load_in_vmthread(const char *filename, char *ebuf, int ebuflen) {
2040  void * result = NULL;
2041  if (LoadExecStackDllInVMThread) {
2042    result = dlopen_helper(filename, ebuf, ebuflen);
2043  }
2044
2045  // Since 7019808, libjvm.so is linked with -noexecstack. If the VM loads a
2046  // library that requires an executable stack, or which does not have this
2047  // stack attribute set, dlopen changes the stack attribute to executable. The
2048  // read protection of the guard pages gets lost.
2049  //
2050  // Need to check _stack_is_executable again as multiple VM_LinuxDllLoad
2051  // may have been queued at the same time.
2052
2053  if (!_stack_is_executable) {
2054    JavaThread *jt = Threads::first();
2055
2056    while (jt) {
2057      if (!jt->stack_guard_zone_unused() &&        // Stack not yet fully initialized
2058          jt->stack_yellow_zone_enabled()) {       // No pending stack overflow exceptions
2059        if (!os::guard_memory((char *) jt->stack_red_zone_base() - jt->stack_red_zone_size(),
2060                              jt->stack_yellow_zone_size() + jt->stack_red_zone_size())) {
2061          warning("Attempt to reguard stack yellow zone failed.");
2062        }
2063      }
2064      jt = jt->next();
2065    }
2066  }
2067
2068  return result;
2069}
2070
2071/*
2072 * glibc-2.0 libdl is not MT safe.  If you are building with any glibc,
2073 * chances are you might want to run the generated bits against glibc-2.0
2074 * libdl.so, so always use locking for any version of glibc.
2075 */
2076void* os::dll_lookup(void* handle, const char* name) {
2077  pthread_mutex_lock(&dl_mutex);
2078  void* res = dlsym(handle, name);
2079  pthread_mutex_unlock(&dl_mutex);
2080  return res;
2081}
2082
2083void* os::get_default_process_handle() {
2084  return (void*)::dlopen(NULL, RTLD_LAZY);
2085}
2086
2087static bool _print_ascii_file(const char* filename, outputStream* st) {
2088  int fd = ::open(filename, O_RDONLY);
2089  if (fd == -1) {
2090     return false;
2091  }
2092
2093  char buf[32];
2094  int bytes;
2095  while ((bytes = ::read(fd, buf, sizeof(buf))) > 0) {
2096    st->print_raw(buf, bytes);
2097  }
2098
2099  ::close(fd);
2100
2101  return true;
2102}
2103
2104void os::print_dll_info(outputStream *st) {
2105   st->print_cr("Dynamic libraries:");
2106
2107   char fname[32];
2108   pid_t pid = os::Linux::gettid();
2109
2110   jio_snprintf(fname, sizeof(fname), "/proc/%d/maps", pid);
2111
2112   if (!_print_ascii_file(fname, st)) {
2113     st->print("Can not get library information for pid = %d\n", pid);
2114   }
2115}
2116
2117void os::print_os_info_brief(outputStream* st) {
2118  os::Linux::print_distro_info(st);
2119
2120  os::Posix::print_uname_info(st);
2121
2122  os::Linux::print_libversion_info(st);
2123
2124}
2125
2126void os::print_os_info(outputStream* st) {
2127  st->print("OS:");
2128
2129  os::Linux::print_distro_info(st);
2130
2131  os::Posix::print_uname_info(st);
2132
2133  // Print warning if unsafe chroot environment detected
2134  if (unsafe_chroot_detected) {
2135    st->print("WARNING!! ");
2136    st->print_cr(unstable_chroot_error);
2137  }
2138
2139  os::Linux::print_libversion_info(st);
2140
2141  os::Posix::print_rlimit_info(st);
2142
2143  os::Posix::print_load_average(st);
2144
2145  os::Linux::print_full_memory_info(st);
2146}
2147
2148// Try to identify popular distros.
2149// Most Linux distributions have a /etc/XXX-release file, which contains
2150// the OS version string. Newer Linux distributions have a /etc/lsb-release
2151// file that also contains the OS version string. Some have more than one
2152// /etc/XXX-release file (e.g. Mandrake has both /etc/mandrake-release and
2153// /etc/redhat-release.), so the order is important.
2154// Any Linux that is based on Redhat (i.e. Oracle, Mandrake, Sun JDS...) have
2155// their own specific XXX-release file as well as a redhat-release file.
2156// Because of this the XXX-release file needs to be searched for before the
2157// redhat-release file.
2158// Since Red Hat has a lsb-release file that is not very descriptive the
2159// search for redhat-release needs to be before lsb-release.
2160// Since the lsb-release file is the new standard it needs to be searched
2161// before the older style release files.
2162// Searching system-release (Red Hat) and os-release (other Linuxes) are a
2163// next to last resort.  The os-release file is a new standard that contains
2164// distribution information and the system-release file seems to be an old
2165// standard that has been replaced by the lsb-release and os-release files.
2166// Searching for the debian_version file is the last resort.  It contains
2167// an informative string like "6.0.6" or "wheezy/sid". Because of this
2168// "Debian " is printed before the contents of the debian_version file.
2169void os::Linux::print_distro_info(outputStream* st) {
2170   if (!_print_ascii_file("/etc/oracle-release", st) &&
2171       !_print_ascii_file("/etc/mandriva-release", st) &&
2172       !_print_ascii_file("/etc/mandrake-release", st) &&
2173       !_print_ascii_file("/etc/sun-release", st) &&
2174       !_print_ascii_file("/etc/redhat-release", st) &&
2175       !_print_ascii_file("/etc/lsb-release", st) &&
2176       !_print_ascii_file("/etc/SuSE-release", st) &&
2177       !_print_ascii_file("/etc/turbolinux-release", st) &&
2178       !_print_ascii_file("/etc/gentoo-release", st) &&
2179       !_print_ascii_file("/etc/ltib-release", st) &&
2180       !_print_ascii_file("/etc/angstrom-version", st) &&
2181       !_print_ascii_file("/etc/system-release", st) &&
2182       !_print_ascii_file("/etc/os-release", st)) {
2183
2184       if (file_exists("/etc/debian_version")) {
2185         st->print("Debian ");
2186         _print_ascii_file("/etc/debian_version", st);
2187       } else {
2188         st->print("Linux");
2189       }
2190   }
2191   st->cr();
2192}
2193
2194void os::Linux::print_libversion_info(outputStream* st) {
2195  // libc, pthread
2196  st->print("libc:");
2197  st->print(os::Linux::glibc_version()); st->print(" ");
2198  st->print(os::Linux::libpthread_version()); st->print(" ");
2199  if (os::Linux::is_LinuxThreads()) {
2200     st->print("(%s stack)", os::Linux::is_floating_stack() ? "floating" : "fixed");
2201  }
2202  st->cr();
2203}
2204
2205void os::Linux::print_full_memory_info(outputStream* st) {
2206   st->print("\n/proc/meminfo:\n");
2207   _print_ascii_file("/proc/meminfo", st);
2208   st->cr();
2209}
2210
2211void os::print_memory_info(outputStream* st) {
2212
2213  st->print("Memory:");
2214  st->print(" %dk page", os::vm_page_size()>>10);
2215
2216  // values in struct sysinfo are "unsigned long"
2217  struct sysinfo si;
2218  sysinfo(&si);
2219
2220  st->print(", physical " UINT64_FORMAT "k",
2221            os::physical_memory() >> 10);
2222  st->print("(" UINT64_FORMAT "k free)",
2223            os::available_memory() >> 10);
2224  st->print(", swap " UINT64_FORMAT "k",
2225            ((jlong)si.totalswap * si.mem_unit) >> 10);
2226  st->print("(" UINT64_FORMAT "k free)",
2227            ((jlong)si.freeswap * si.mem_unit) >> 10);
2228  st->cr();
2229}
2230
2231void os::pd_print_cpu_info(outputStream* st) {
2232  st->print("\n/proc/cpuinfo:\n");
2233  if (!_print_ascii_file("/proc/cpuinfo", st)) {
2234    st->print("  <Not Available>");
2235  }
2236  st->cr();
2237}
2238
2239void os::print_siginfo(outputStream* st, void* siginfo) {
2240  const siginfo_t* si = (const siginfo_t*)siginfo;
2241
2242  os::Posix::print_siginfo_brief(st, si);
2243
2244  if (si && (si->si_signo == SIGBUS || si->si_signo == SIGSEGV) &&
2245      UseSharedSpaces) {
2246    FileMapInfo* mapinfo = FileMapInfo::current_info();
2247    if (mapinfo->is_in_shared_space(si->si_addr)) {
2248      st->print("\n\nError accessing class data sharing archive."   \
2249                " Mapped file inaccessible during execution, "      \
2250                " possible disk/network problem.");
2251    }
2252  }
2253  st->cr();
2254}
2255
2256
2257static void print_signal_handler(outputStream* st, int sig,
2258                                 char* buf, size_t buflen);
2259
2260void os::print_signal_handlers(outputStream* st, char* buf, size_t buflen) {
2261  st->print_cr("Signal Handlers:");
2262  print_signal_handler(st, SIGSEGV, buf, buflen);
2263  print_signal_handler(st, SIGBUS , buf, buflen);
2264  print_signal_handler(st, SIGFPE , buf, buflen);
2265  print_signal_handler(st, SIGPIPE, buf, buflen);
2266  print_signal_handler(st, SIGXFSZ, buf, buflen);
2267  print_signal_handler(st, SIGILL , buf, buflen);
2268  print_signal_handler(st, INTERRUPT_SIGNAL, buf, buflen);
2269  print_signal_handler(st, SR_signum, buf, buflen);
2270  print_signal_handler(st, SHUTDOWN1_SIGNAL, buf, buflen);
2271  print_signal_handler(st, SHUTDOWN2_SIGNAL , buf, buflen);
2272  print_signal_handler(st, SHUTDOWN3_SIGNAL , buf, buflen);
2273  print_signal_handler(st, BREAK_SIGNAL, buf, buflen);
2274#if defined(PPC64)
2275  print_signal_handler(st, SIGTRAP, buf, buflen);
2276#endif
2277}
2278
2279static char saved_jvm_path[MAXPATHLEN] = {0};
2280
2281// Find the full path to the current module, libjvm.so
2282void os::jvm_path(char *buf, jint buflen) {
2283  // Error checking.
2284  if (buflen < MAXPATHLEN) {
2285    assert(false, "must use a large-enough buffer");
2286    buf[0] = '\0';
2287    return;
2288  }
2289  // Lazy resolve the path to current module.
2290  if (saved_jvm_path[0] != 0) {
2291    strcpy(buf, saved_jvm_path);
2292    return;
2293  }
2294
2295  char dli_fname[MAXPATHLEN];
2296  bool ret = dll_address_to_library_name(
2297                CAST_FROM_FN_PTR(address, os::jvm_path),
2298                dli_fname, sizeof(dli_fname), NULL);
2299  assert(ret, "cannot locate libjvm");
2300  char *rp = NULL;
2301  if (ret && dli_fname[0] != '\0') {
2302    rp = realpath(dli_fname, buf);
2303  }
2304  if (rp == NULL)
2305    return;
2306
2307  if (Arguments::sun_java_launcher_is_altjvm()) {
2308    // Support for the java launcher's '-XXaltjvm=<path>' option. Typical
2309    // value for buf is "<JAVA_HOME>/jre/lib/<arch>/<vmtype>/libjvm.so".
2310    // If "/jre/lib/" appears at the right place in the string, then
2311    // assume we are installed in a JDK and we're done. Otherwise, check
2312    // for a JAVA_HOME environment variable and fix up the path so it
2313    // looks like libjvm.so is installed there (append a fake suffix
2314    // hotspot/libjvm.so).
2315    const char *p = buf + strlen(buf) - 1;
2316    for (int count = 0; p > buf && count < 5; ++count) {
2317      for (--p; p > buf && *p != '/'; --p)
2318        /* empty */ ;
2319    }
2320
2321    if (strncmp(p, "/jre/lib/", 9) != 0) {
2322      // Look for JAVA_HOME in the environment.
2323      char* java_home_var = ::getenv("JAVA_HOME");
2324      if (java_home_var != NULL && java_home_var[0] != 0) {
2325        char* jrelib_p;
2326        int len;
2327
2328        // Check the current module name "libjvm.so".
2329        p = strrchr(buf, '/');
2330        assert(strstr(p, "/libjvm") == p, "invalid library name");
2331
2332        rp = realpath(java_home_var, buf);
2333        if (rp == NULL)
2334          return;
2335
2336        // determine if this is a legacy image or modules image
2337        // modules image doesn't have "jre" subdirectory
2338        len = strlen(buf);
2339        jrelib_p = buf + len;
2340        snprintf(jrelib_p, buflen-len, "/jre/lib/%s", cpu_arch);
2341        if (0 != access(buf, F_OK)) {
2342          snprintf(jrelib_p, buflen-len, "/lib/%s", cpu_arch);
2343        }
2344
2345        if (0 == access(buf, F_OK)) {
2346          // Use current module name "libjvm.so"
2347          len = strlen(buf);
2348          snprintf(buf + len, buflen-len, "/hotspot/libjvm.so");
2349        } else {
2350          // Go back to path of .so
2351          rp = realpath(dli_fname, buf);
2352          if (rp == NULL)
2353            return;
2354        }
2355      }
2356    }
2357  }
2358
2359  strcpy(saved_jvm_path, buf);
2360}
2361
2362void os::print_jni_name_prefix_on(outputStream* st, int args_size) {
2363  // no prefix required, not even "_"
2364}
2365
2366void os::print_jni_name_suffix_on(outputStream* st, int args_size) {
2367  // no suffix required
2368}
2369
2370////////////////////////////////////////////////////////////////////////////////
2371// sun.misc.Signal support
2372
2373static volatile jint sigint_count = 0;
2374
2375static void
2376UserHandler(int sig, void *siginfo, void *context) {
2377  // 4511530 - sem_post is serialized and handled by the manager thread. When
2378  // the program is interrupted by Ctrl-C, SIGINT is sent to every thread. We
2379  // don't want to flood the manager thread with sem_post requests.
2380  if (sig == SIGINT && Atomic::add(1, &sigint_count) > 1)
2381      return;
2382
2383  // Ctrl-C is pressed during error reporting, likely because the error
2384  // handler fails to abort. Let VM die immediately.
2385  if (sig == SIGINT && is_error_reported()) {
2386     os::die();
2387  }
2388
2389  os::signal_notify(sig);
2390}
2391
2392void* os::user_handler() {
2393  return CAST_FROM_FN_PTR(void*, UserHandler);
2394}
2395
2396class Semaphore : public StackObj {
2397  public:
2398    Semaphore();
2399    ~Semaphore();
2400    void signal();
2401    void wait();
2402    bool trywait();
2403    bool timedwait(unsigned int sec, int nsec);
2404  private:
2405    sem_t _semaphore;
2406};
2407
2408Semaphore::Semaphore() {
2409  sem_init(&_semaphore, 0, 0);
2410}
2411
2412Semaphore::~Semaphore() {
2413  sem_destroy(&_semaphore);
2414}
2415
2416void Semaphore::signal() {
2417  sem_post(&_semaphore);
2418}
2419
2420void Semaphore::wait() {
2421  sem_wait(&_semaphore);
2422}
2423
2424bool Semaphore::trywait() {
2425  return sem_trywait(&_semaphore) == 0;
2426}
2427
2428bool Semaphore::timedwait(unsigned int sec, int nsec) {
2429
2430  struct timespec ts;
2431  // Semaphore's are always associated with CLOCK_REALTIME
2432  os::Linux::clock_gettime(CLOCK_REALTIME, &ts);
2433  // see unpackTime for discussion on overflow checking
2434  if (sec >= MAX_SECS) {
2435    ts.tv_sec += MAX_SECS;
2436    ts.tv_nsec = 0;
2437  } else {
2438    ts.tv_sec += sec;
2439    ts.tv_nsec += nsec;
2440    if (ts.tv_nsec >= NANOSECS_PER_SEC) {
2441      ts.tv_nsec -= NANOSECS_PER_SEC;
2442      ++ts.tv_sec; // note: this must be <= max_secs
2443    }
2444  }
2445
2446  while (1) {
2447    int result = sem_timedwait(&_semaphore, &ts);
2448    if (result == 0) {
2449      return true;
2450    } else if (errno == EINTR) {
2451      continue;
2452    } else if (errno == ETIMEDOUT) {
2453      return false;
2454    } else {
2455      return false;
2456    }
2457  }
2458}
2459
2460extern "C" {
2461  typedef void (*sa_handler_t)(int);
2462  typedef void (*sa_sigaction_t)(int, siginfo_t *, void *);
2463}
2464
2465void* os::signal(int signal_number, void* handler) {
2466  struct sigaction sigAct, oldSigAct;
2467
2468  sigfillset(&(sigAct.sa_mask));
2469  sigAct.sa_flags   = SA_RESTART|SA_SIGINFO;
2470  sigAct.sa_handler = CAST_TO_FN_PTR(sa_handler_t, handler);
2471
2472  if (sigaction(signal_number, &sigAct, &oldSigAct)) {
2473    // -1 means registration failed
2474    return (void *)-1;
2475  }
2476
2477  return CAST_FROM_FN_PTR(void*, oldSigAct.sa_handler);
2478}
2479
2480void os::signal_raise(int signal_number) {
2481  ::raise(signal_number);
2482}
2483
2484/*
2485 * The following code is moved from os.cpp for making this
2486 * code platform specific, which it is by its very nature.
2487 */
2488
2489// Will be modified when max signal is changed to be dynamic
2490int os::sigexitnum_pd() {
2491  return NSIG;
2492}
2493
2494// a counter for each possible signal value
2495static volatile jint pending_signals[NSIG+1] = { 0 };
2496
2497// Linux(POSIX) specific hand shaking semaphore.
2498static sem_t sig_sem;
2499static Semaphore sr_semaphore;
2500
2501void os::signal_init_pd() {
2502  // Initialize signal structures
2503  ::memset((void*)pending_signals, 0, sizeof(pending_signals));
2504
2505  // Initialize signal semaphore
2506  ::sem_init(&sig_sem, 0, 0);
2507}
2508
2509void os::signal_notify(int sig) {
2510  Atomic::inc(&pending_signals[sig]);
2511  ::sem_post(&sig_sem);
2512}
2513
2514static int check_pending_signals(bool wait) {
2515  Atomic::store(0, &sigint_count);
2516  for (;;) {
2517    for (int i = 0; i < NSIG + 1; i++) {
2518      jint n = pending_signals[i];
2519      if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) {
2520        return i;
2521      }
2522    }
2523    if (!wait) {
2524      return -1;
2525    }
2526    JavaThread *thread = JavaThread::current();
2527    ThreadBlockInVM tbivm(thread);
2528
2529    bool threadIsSuspended;
2530    do {
2531      thread->set_suspend_equivalent();
2532      // cleared by handle_special_suspend_equivalent_condition() or java_suspend_self()
2533      ::sem_wait(&sig_sem);
2534
2535      // were we externally suspended while we were waiting?
2536      threadIsSuspended = thread->handle_special_suspend_equivalent_condition();
2537      if (threadIsSuspended) {
2538        //
2539        // The semaphore has been incremented, but while we were waiting
2540        // another thread suspended us. We don't want to continue running
2541        // while suspended because that would surprise the thread that
2542        // suspended us.
2543        //
2544        ::sem_post(&sig_sem);
2545
2546        thread->java_suspend_self();
2547      }
2548    } while (threadIsSuspended);
2549  }
2550}
2551
2552int os::signal_lookup() {
2553  return check_pending_signals(false);
2554}
2555
2556int os::signal_wait() {
2557  return check_pending_signals(true);
2558}
2559
2560////////////////////////////////////////////////////////////////////////////////
2561// Virtual Memory
2562
2563int os::vm_page_size() {
2564  // Seems redundant as all get out
2565  assert(os::Linux::page_size() != -1, "must call os::init");
2566  return os::Linux::page_size();
2567}
2568
2569// Solaris allocates memory by pages.
2570int os::vm_allocation_granularity() {
2571  assert(os::Linux::page_size() != -1, "must call os::init");
2572  return os::Linux::page_size();
2573}
2574
2575// Rationale behind this function:
2576//  current (Mon Apr 25 20:12:18 MSD 2005) oprofile drops samples without executable
2577//  mapping for address (see lookup_dcookie() in the kernel module), thus we cannot get
2578//  samples for JITted code. Here we create private executable mapping over the code cache
2579//  and then we can use standard (well, almost, as mapping can change) way to provide
2580//  info for the reporting script by storing timestamp and location of symbol
2581void linux_wrap_code(char* base, size_t size) {
2582  static volatile jint cnt = 0;
2583
2584  if (!UseOprofile) {
2585    return;
2586  }
2587
2588  char buf[PATH_MAX+1];
2589  int num = Atomic::add(1, &cnt);
2590
2591  snprintf(buf, sizeof(buf), "%s/hs-vm-%d-%d",
2592           os::get_temp_directory(), os::current_process_id(), num);
2593  unlink(buf);
2594
2595  int fd = ::open(buf, O_CREAT | O_RDWR, S_IRWXU);
2596
2597  if (fd != -1) {
2598    off_t rv = ::lseek(fd, size-2, SEEK_SET);
2599    if (rv != (off_t)-1) {
2600      if (::write(fd, "", 1) == 1) {
2601        mmap(base, size,
2602             PROT_READ|PROT_WRITE|PROT_EXEC,
2603             MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE, fd, 0);
2604      }
2605    }
2606    ::close(fd);
2607    unlink(buf);
2608  }
2609}
2610
2611static bool recoverable_mmap_error(int err) {
2612  // See if the error is one we can let the caller handle. This
2613  // list of errno values comes from JBS-6843484. I can't find a
2614  // Linux man page that documents this specific set of errno
2615  // values so while this list currently matches Solaris, it may
2616  // change as we gain experience with this failure mode.
2617  switch (err) {
2618  case EBADF:
2619  case EINVAL:
2620  case ENOTSUP:
2621    // let the caller deal with these errors
2622    return true;
2623
2624  default:
2625    // Any remaining errors on this OS can cause our reserved mapping
2626    // to be lost. That can cause confusion where different data
2627    // structures think they have the same memory mapped. The worst
2628    // scenario is if both the VM and a library think they have the
2629    // same memory mapped.
2630    return false;
2631  }
2632}
2633
2634static void warn_fail_commit_memory(char* addr, size_t size, bool exec,
2635                                    int err) {
2636  warning("INFO: os::commit_memory(" PTR_FORMAT ", " SIZE_FORMAT
2637          ", %d) failed; error='%s' (errno=%d)", addr, size, exec,
2638          strerror(err), err);
2639}
2640
2641static void warn_fail_commit_memory(char* addr, size_t size,
2642                                    size_t alignment_hint, bool exec,
2643                                    int err) {
2644  warning("INFO: os::commit_memory(" PTR_FORMAT ", " SIZE_FORMAT
2645          ", " SIZE_FORMAT ", %d) failed; error='%s' (errno=%d)", addr, size,
2646          alignment_hint, exec, strerror(err), err);
2647}
2648
2649// NOTE: Linux kernel does not really reserve the pages for us.
2650//       All it does is to check if there are enough free pages
2651//       left at the time of mmap(). This could be a potential
2652//       problem.
2653int os::Linux::commit_memory_impl(char* addr, size_t size, bool exec) {
2654  int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
2655  uintptr_t res = (uintptr_t) ::mmap(addr, size, prot,
2656                                   MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
2657  if (res != (uintptr_t) MAP_FAILED) {
2658    if (UseNUMAInterleaving) {
2659      numa_make_global(addr, size);
2660    }
2661    return 0;
2662  }
2663
2664  int err = errno;  // save errno from mmap() call above
2665
2666  if (!recoverable_mmap_error(err)) {
2667    warn_fail_commit_memory(addr, size, exec, err);
2668    vm_exit_out_of_memory(size, OOM_MMAP_ERROR, "committing reserved memory.");
2669  }
2670
2671  return err;
2672}
2673
2674bool os::pd_commit_memory(char* addr, size_t size, bool exec) {
2675  return os::Linux::commit_memory_impl(addr, size, exec) == 0;
2676}
2677
2678void os::pd_commit_memory_or_exit(char* addr, size_t size, bool exec,
2679                                  const char* mesg) {
2680  assert(mesg != NULL, "mesg must be specified");
2681  int err = os::Linux::commit_memory_impl(addr, size, exec);
2682  if (err != 0) {
2683    // the caller wants all commit errors to exit with the specified mesg:
2684    warn_fail_commit_memory(addr, size, exec, err);
2685    vm_exit_out_of_memory(size, OOM_MMAP_ERROR, mesg);
2686  }
2687}
2688
2689// Define MAP_HUGETLB here so we can build HotSpot on old systems.
2690#ifndef MAP_HUGETLB
2691#define MAP_HUGETLB 0x40000
2692#endif
2693
2694// Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
2695#ifndef MADV_HUGEPAGE
2696#define MADV_HUGEPAGE 14
2697#endif
2698
2699int os::Linux::commit_memory_impl(char* addr, size_t size,
2700                                  size_t alignment_hint, bool exec) {
2701  int err = os::Linux::commit_memory_impl(addr, size, exec);
2702  if (err == 0) {
2703    realign_memory(addr, size, alignment_hint);
2704  }
2705  return err;
2706}
2707
2708bool os::pd_commit_memory(char* addr, size_t size, size_t alignment_hint,
2709                          bool exec) {
2710  return os::Linux::commit_memory_impl(addr, size, alignment_hint, exec) == 0;
2711}
2712
2713void os::pd_commit_memory_or_exit(char* addr, size_t size,
2714                                  size_t alignment_hint, bool exec,
2715                                  const char* mesg) {
2716  assert(mesg != NULL, "mesg must be specified");
2717  int err = os::Linux::commit_memory_impl(addr, size, alignment_hint, exec);
2718  if (err != 0) {
2719    // the caller wants all commit errors to exit with the specified mesg:
2720    warn_fail_commit_memory(addr, size, alignment_hint, exec, err);
2721    vm_exit_out_of_memory(size, OOM_MMAP_ERROR, mesg);
2722  }
2723}
2724
2725void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
2726  if (UseTransparentHugePages && alignment_hint > (size_t)vm_page_size()) {
2727    // We don't check the return value: madvise(MADV_HUGEPAGE) may not
2728    // be supported or the memory may already be backed by huge pages.
2729    ::madvise(addr, bytes, MADV_HUGEPAGE);
2730  }
2731}
2732
2733void os::pd_free_memory(char *addr, size_t bytes, size_t alignment_hint) {
2734  // This method works by doing an mmap over an existing mmaping and effectively discarding
2735  // the existing pages. However it won't work for SHM-based large pages that cannot be
2736  // uncommitted at all. We don't do anything in this case to avoid creating a segment with
2737  // small pages on top of the SHM segment. This method always works for small pages, so we
2738  // allow that in any case.
2739  if (alignment_hint <= (size_t)os::vm_page_size() || can_commit_large_page_memory()) {
2740    commit_memory(addr, bytes, alignment_hint, !ExecMem);
2741  }
2742}
2743
2744void os::numa_make_global(char *addr, size_t bytes) {
2745  Linux::numa_interleave_memory(addr, bytes);
2746}
2747
2748// Define for numa_set_bind_policy(int). Setting the argument to 0 will set the
2749// bind policy to MPOL_PREFERRED for the current thread.
2750#define USE_MPOL_PREFERRED 0
2751
2752void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
2753  // To make NUMA and large pages more robust when both enabled, we need to ease
2754  // the requirements on where the memory should be allocated. MPOL_BIND is the
2755  // default policy and it will force memory to be allocated on the specified
2756  // node. Changing this to MPOL_PREFERRED will prefer to allocate the memory on
2757  // the specified node, but will not force it. Using this policy will prevent
2758  // getting SIGBUS when trying to allocate large pages on NUMA nodes with no
2759  // free large pages.
2760  Linux::numa_set_bind_policy(USE_MPOL_PREFERRED);
2761  Linux::numa_tonode_memory(addr, bytes, lgrp_hint);
2762}
2763
2764bool os::numa_topology_changed()   { return false; }
2765
2766size_t os::numa_get_groups_num() {
2767  int max_node = Linux::numa_max_node();
2768  return max_node > 0 ? max_node + 1 : 1;
2769}
2770
2771int os::numa_get_group_id() {
2772  int cpu_id = Linux::sched_getcpu();
2773  if (cpu_id != -1) {
2774    int lgrp_id = Linux::get_node_by_cpu(cpu_id);
2775    if (lgrp_id != -1) {
2776      return lgrp_id;
2777    }
2778  }
2779  return 0;
2780}
2781
2782size_t os::numa_get_leaf_groups(int *ids, size_t size) {
2783  for (size_t i = 0; i < size; i++) {
2784    ids[i] = i;
2785  }
2786  return size;
2787}
2788
2789bool os::get_page_info(char *start, page_info* info) {
2790  return false;
2791}
2792
2793char *os::scan_pages(char *start, char* end, page_info* page_expected, page_info* page_found) {
2794  return end;
2795}
2796
2797
2798int os::Linux::sched_getcpu_syscall(void) {
2799  unsigned int cpu;
2800  int retval = -1;
2801
2802#if defined(IA32)
2803# ifndef SYS_getcpu
2804# define SYS_getcpu 318
2805# endif
2806  retval = syscall(SYS_getcpu, &cpu, NULL, NULL);
2807#elif defined(AMD64)
2808// Unfortunately we have to bring all these macros here from vsyscall.h
2809// to be able to compile on old linuxes.
2810# define __NR_vgetcpu 2
2811# define VSYSCALL_START (-10UL << 20)
2812# define VSYSCALL_SIZE 1024
2813# define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr))
2814  typedef long (*vgetcpu_t)(unsigned int *cpu, unsigned int *node, unsigned long *tcache);
2815  vgetcpu_t vgetcpu = (vgetcpu_t)VSYSCALL_ADDR(__NR_vgetcpu);
2816  retval = vgetcpu(&cpu, NULL, NULL);
2817#endif
2818
2819  return (retval == -1) ? retval : cpu;
2820}
2821
2822// Something to do with the numa-aware allocator needs these symbols
2823extern "C" JNIEXPORT void numa_warn(int number, char *where, ...) { }
2824extern "C" JNIEXPORT void numa_error(char *where) { }
2825extern "C" JNIEXPORT int fork1() { return fork(); }
2826
2827
2828// If we are running with libnuma version > 2, then we should
2829// be trying to use symbols with versions 1.1
2830// If we are running with earlier version, which did not have symbol versions,
2831// we should use the base version.
2832void* os::Linux::libnuma_dlsym(void* handle, const char *name) {
2833  void *f = dlvsym(handle, name, "libnuma_1.1");
2834  if (f == NULL) {
2835    f = dlsym(handle, name);
2836  }
2837  return f;
2838}
2839
2840bool os::Linux::libnuma_init() {
2841  // sched_getcpu() should be in libc.
2842  set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t,
2843                                  dlsym(RTLD_DEFAULT, "sched_getcpu")));
2844
2845  // If it's not, try a direct syscall.
2846  if (sched_getcpu() == -1)
2847    set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t, (void*)&sched_getcpu_syscall));
2848
2849  if (sched_getcpu() != -1) { // Does it work?
2850    void *handle = dlopen("libnuma.so.1", RTLD_LAZY);
2851    if (handle != NULL) {
2852      set_numa_node_to_cpus(CAST_TO_FN_PTR(numa_node_to_cpus_func_t,
2853                                           libnuma_dlsym(handle, "numa_node_to_cpus")));
2854      set_numa_max_node(CAST_TO_FN_PTR(numa_max_node_func_t,
2855                                       libnuma_dlsym(handle, "numa_max_node")));
2856      set_numa_available(CAST_TO_FN_PTR(numa_available_func_t,
2857                                        libnuma_dlsym(handle, "numa_available")));
2858      set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t,
2859                                            libnuma_dlsym(handle, "numa_tonode_memory")));
2860      set_numa_interleave_memory(CAST_TO_FN_PTR(numa_interleave_memory_func_t,
2861                                            libnuma_dlsym(handle, "numa_interleave_memory")));
2862      set_numa_set_bind_policy(CAST_TO_FN_PTR(numa_set_bind_policy_func_t,
2863                                            libnuma_dlsym(handle, "numa_set_bind_policy")));
2864
2865
2866      if (numa_available() != -1) {
2867        set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes"));
2868        // Create a cpu -> node mapping
2869        _cpu_to_node = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int>(0, true);
2870        rebuild_cpu_to_node_map();
2871        return true;
2872      }
2873    }
2874  }
2875  return false;
2876}
2877
2878// rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id.
2879// The table is later used in get_node_by_cpu().
2880void os::Linux::rebuild_cpu_to_node_map() {
2881  const size_t NCPUS = 32768; // Since the buffer size computation is very obscure
2882                              // in libnuma (possible values are starting from 16,
2883                              // and continuing up with every other power of 2, but less
2884                              // than the maximum number of CPUs supported by kernel), and
2885                              // is a subject to change (in libnuma version 2 the requirements
2886                              // are more reasonable) we'll just hardcode the number they use
2887                              // in the library.
2888  const size_t BitsPerCLong = sizeof(long) * CHAR_BIT;
2889
2890  size_t cpu_num = os::active_processor_count();
2891  size_t cpu_map_size = NCPUS / BitsPerCLong;
2892  size_t cpu_map_valid_size =
2893    MIN2((cpu_num + BitsPerCLong - 1) / BitsPerCLong, cpu_map_size);
2894
2895  cpu_to_node()->clear();
2896  cpu_to_node()->at_grow(cpu_num - 1);
2897  size_t node_num = numa_get_groups_num();
2898
2899  unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size, mtInternal);
2900  for (size_t i = 0; i < node_num; i++) {
2901    if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) {
2902      for (size_t j = 0; j < cpu_map_valid_size; j++) {
2903        if (cpu_map[j] != 0) {
2904          for (size_t k = 0; k < BitsPerCLong; k++) {
2905            if (cpu_map[j] & (1UL << k)) {
2906              cpu_to_node()->at_put(j * BitsPerCLong + k, i);
2907            }
2908          }
2909        }
2910      }
2911    }
2912  }
2913  FREE_C_HEAP_ARRAY(unsigned long, cpu_map, mtInternal);
2914}
2915
2916int os::Linux::get_node_by_cpu(int cpu_id) {
2917  if (cpu_to_node() != NULL && cpu_id >= 0 && cpu_id < cpu_to_node()->length()) {
2918    return cpu_to_node()->at(cpu_id);
2919  }
2920  return -1;
2921}
2922
2923GrowableArray<int>* os::Linux::_cpu_to_node;
2924os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu;
2925os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus;
2926os::Linux::numa_max_node_func_t os::Linux::_numa_max_node;
2927os::Linux::numa_available_func_t os::Linux::_numa_available;
2928os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory;
2929os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory;
2930os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
2931unsigned long* os::Linux::_numa_all_nodes;
2932
2933bool os::pd_uncommit_memory(char* addr, size_t size) {
2934  uintptr_t res = (uintptr_t) ::mmap(addr, size, PROT_NONE,
2935                MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE|MAP_ANONYMOUS, -1, 0);
2936  return res  != (uintptr_t) MAP_FAILED;
2937}
2938
2939static
2940address get_stack_commited_bottom(address bottom, size_t size) {
2941  address nbot = bottom;
2942  address ntop = bottom + size;
2943
2944  size_t page_sz = os::vm_page_size();
2945  unsigned pages = size / page_sz;
2946
2947  unsigned char vec[1];
2948  unsigned imin = 1, imax = pages + 1, imid;
2949  int mincore_return_value = 0;
2950
2951  assert(imin <= imax, "Unexpected page size");
2952
2953  while (imin < imax) {
2954    imid = (imax + imin) / 2;
2955    nbot = ntop - (imid * page_sz);
2956
2957    // Use a trick with mincore to check whether the page is mapped or not.
2958    // mincore sets vec to 1 if page resides in memory and to 0 if page
2959    // is swapped output but if page we are asking for is unmapped
2960    // it returns -1,ENOMEM
2961    mincore_return_value = mincore(nbot, page_sz, vec);
2962
2963    if (mincore_return_value == -1) {
2964      // Page is not mapped go up
2965      // to find first mapped page
2966      if (errno != EAGAIN) {
2967        assert(errno == ENOMEM, "Unexpected mincore errno");
2968        imax = imid;
2969      }
2970    } else {
2971      // Page is mapped go down
2972      // to find first not mapped page
2973      imin = imid + 1;
2974    }
2975  }
2976
2977  nbot = nbot + page_sz;
2978
2979  // Adjust stack bottom one page up if last checked page is not mapped
2980  if (mincore_return_value == -1) {
2981    nbot = nbot + page_sz;
2982  }
2983
2984  return nbot;
2985}
2986
2987
2988// Linux uses a growable mapping for the stack, and if the mapping for
2989// the stack guard pages is not removed when we detach a thread the
2990// stack cannot grow beyond the pages where the stack guard was
2991// mapped.  If at some point later in the process the stack expands to
2992// that point, the Linux kernel cannot expand the stack any further
2993// because the guard pages are in the way, and a segfault occurs.
2994//
2995// However, it's essential not to split the stack region by unmapping
2996// a region (leaving a hole) that's already part of the stack mapping,
2997// so if the stack mapping has already grown beyond the guard pages at
2998// the time we create them, we have to truncate the stack mapping.
2999// So, we need to know the extent of the stack mapping when
3000// create_stack_guard_pages() is called.
3001
3002// We only need this for stacks that are growable: at the time of
3003// writing thread stacks don't use growable mappings (i.e. those
3004// creeated with MAP_GROWSDOWN), and aren't marked "[stack]", so this
3005// only applies to the main thread.
3006
3007// If the (growable) stack mapping already extends beyond the point
3008// where we're going to put our guard pages, truncate the mapping at
3009// that point by munmap()ping it.  This ensures that when we later
3010// munmap() the guard pages we don't leave a hole in the stack
3011// mapping. This only affects the main/initial thread
3012
3013bool os::pd_create_stack_guard_pages(char* addr, size_t size) {
3014
3015  if (os::Linux::is_initial_thread()) {
3016    // As we manually grow stack up to bottom inside create_attached_thread(),
3017    // it's likely that os::Linux::initial_thread_stack_bottom is mapped and
3018    // we don't need to do anything special.
3019    // Check it first, before calling heavy function.
3020    uintptr_t stack_extent = (uintptr_t) os::Linux::initial_thread_stack_bottom();
3021    unsigned char vec[1];
3022
3023    if (mincore((address)stack_extent, os::vm_page_size(), vec) == -1) {
3024      // Fallback to slow path on all errors, including EAGAIN
3025      stack_extent = (uintptr_t) get_stack_commited_bottom(
3026                                    os::Linux::initial_thread_stack_bottom(),
3027                                    (size_t)addr - stack_extent);
3028    }
3029
3030    if (stack_extent < (uintptr_t)addr) {
3031      ::munmap((void*)stack_extent, (uintptr_t)(addr - stack_extent));
3032    }
3033  }
3034
3035  return os::commit_memory(addr, size, !ExecMem);
3036}
3037
3038// If this is a growable mapping, remove the guard pages entirely by
3039// munmap()ping them.  If not, just call uncommit_memory(). This only
3040// affects the main/initial thread, but guard against future OS changes
3041// It's safe to always unmap guard pages for initial thread because we
3042// always place it right after end of the mapped region
3043
3044bool os::remove_stack_guard_pages(char* addr, size_t size) {
3045  uintptr_t stack_extent, stack_base;
3046
3047  if (os::Linux::is_initial_thread()) {
3048    return ::munmap(addr, size) == 0;
3049  }
3050
3051  return os::uncommit_memory(addr, size);
3052}
3053
3054static address _highest_vm_reserved_address = NULL;
3055
3056// If 'fixed' is true, anon_mmap() will attempt to reserve anonymous memory
3057// at 'requested_addr'. If there are existing memory mappings at the same
3058// location, however, they will be overwritten. If 'fixed' is false,
3059// 'requested_addr' is only treated as a hint, the return value may or
3060// may not start from the requested address. Unlike Linux mmap(), this
3061// function returns NULL to indicate failure.
3062static char* anon_mmap(char* requested_addr, size_t bytes, bool fixed) {
3063  char * addr;
3064  int flags;
3065
3066  flags = MAP_PRIVATE | MAP_NORESERVE | MAP_ANONYMOUS;
3067  if (fixed) {
3068    assert((uintptr_t)requested_addr % os::Linux::page_size() == 0, "unaligned address");
3069    flags |= MAP_FIXED;
3070  }
3071
3072  // Map reserved/uncommitted pages PROT_NONE so we fail early if we
3073  // touch an uncommitted page. Otherwise, the read/write might
3074  // succeed if we have enough swap space to back the physical page.
3075  addr = (char*)::mmap(requested_addr, bytes, PROT_NONE,
3076                       flags, -1, 0);
3077
3078  if (addr != MAP_FAILED) {
3079    // anon_mmap() should only get called during VM initialization,
3080    // don't need lock (actually we can skip locking even it can be called
3081    // from multiple threads, because _highest_vm_reserved_address is just a
3082    // hint about the upper limit of non-stack memory regions.)
3083    if ((address)addr + bytes > _highest_vm_reserved_address) {
3084      _highest_vm_reserved_address = (address)addr + bytes;
3085    }
3086  }
3087
3088  return addr == MAP_FAILED ? NULL : addr;
3089}
3090
3091// Don't update _highest_vm_reserved_address, because there might be memory
3092// regions above addr + size. If so, releasing a memory region only creates
3093// a hole in the address space, it doesn't help prevent heap-stack collision.
3094//
3095static int anon_munmap(char * addr, size_t size) {
3096  return ::munmap(addr, size) == 0;
3097}
3098
3099char* os::pd_reserve_memory(size_t bytes, char* requested_addr,
3100                         size_t alignment_hint) {
3101  return anon_mmap(requested_addr, bytes, (requested_addr != NULL));
3102}
3103
3104bool os::pd_release_memory(char* addr, size_t size) {
3105  return anon_munmap(addr, size);
3106}
3107
3108static address highest_vm_reserved_address() {
3109  return _highest_vm_reserved_address;
3110}
3111
3112static bool linux_mprotect(char* addr, size_t size, int prot) {
3113  // Linux wants the mprotect address argument to be page aligned.
3114  char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size());
3115
3116  // According to SUSv3, mprotect() should only be used with mappings
3117  // established by mmap(), and mmap() always maps whole pages. Unaligned
3118  // 'addr' likely indicates problem in the VM (e.g. trying to change
3119  // protection of malloc'ed or statically allocated memory). Check the
3120  // caller if you hit this assert.
3121  assert(addr == bottom, "sanity check");
3122
3123  size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size());
3124  return ::mprotect(bottom, size, prot) == 0;
3125}
3126
3127// Set protections specified
3128bool os::protect_memory(char* addr, size_t bytes, ProtType prot,
3129                        bool is_committed) {
3130  unsigned int p = 0;
3131  switch (prot) {
3132  case MEM_PROT_NONE: p = PROT_NONE; break;
3133  case MEM_PROT_READ: p = PROT_READ; break;
3134  case MEM_PROT_RW:   p = PROT_READ|PROT_WRITE; break;
3135  case MEM_PROT_RWX:  p = PROT_READ|PROT_WRITE|PROT_EXEC; break;
3136  default:
3137    ShouldNotReachHere();
3138  }
3139  // is_committed is unused.
3140  return linux_mprotect(addr, bytes, p);
3141}
3142
3143bool os::guard_memory(char* addr, size_t size) {
3144  return linux_mprotect(addr, size, PROT_NONE);
3145}
3146
3147bool os::unguard_memory(char* addr, size_t size) {
3148  return linux_mprotect(addr, size, PROT_READ|PROT_WRITE);
3149}
3150
3151bool os::Linux::transparent_huge_pages_sanity_check(bool warn, size_t page_size) {
3152  bool result = false;
3153  void *p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE,
3154                 MAP_ANONYMOUS|MAP_PRIVATE,
3155                 -1, 0);
3156  if (p != MAP_FAILED) {
3157    void *aligned_p = align_ptr_up(p, page_size);
3158
3159    result = madvise(aligned_p, page_size, MADV_HUGEPAGE) == 0;
3160
3161    munmap(p, page_size * 2);
3162  }
3163
3164  if (warn && !result) {
3165    warning("TransparentHugePages is not supported by the operating system.");
3166  }
3167
3168  return result;
3169}
3170
3171bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
3172  bool result = false;
3173  void *p = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
3174                 MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
3175                 -1, 0);
3176
3177  if (p != MAP_FAILED) {
3178    // We don't know if this really is a huge page or not.
3179    FILE *fp = fopen("/proc/self/maps", "r");
3180    if (fp) {
3181      while (!feof(fp)) {
3182        char chars[257];
3183        long x = 0;
3184        if (fgets(chars, sizeof(chars), fp)) {
3185          if (sscanf(chars, "%lx-%*x", &x) == 1
3186              && x == (long)p) {
3187            if (strstr (chars, "hugepage")) {
3188              result = true;
3189              break;
3190            }
3191          }
3192        }
3193      }
3194      fclose(fp);
3195    }
3196    munmap(p, page_size);
3197  }
3198
3199  if (warn && !result) {
3200    warning("HugeTLBFS is not supported by the operating system.");
3201  }
3202
3203  return result;
3204}
3205
3206/*
3207* Set the coredump_filter bits to include largepages in core dump (bit 6)
3208*
3209* From the coredump_filter documentation:
3210*
3211* - (bit 0) anonymous private memory
3212* - (bit 1) anonymous shared memory
3213* - (bit 2) file-backed private memory
3214* - (bit 3) file-backed shared memory
3215* - (bit 4) ELF header pages in file-backed private memory areas (it is
3216*           effective only if the bit 2 is cleared)
3217* - (bit 5) hugetlb private memory
3218* - (bit 6) hugetlb shared memory
3219*/
3220static void set_coredump_filter(void) {
3221  FILE *f;
3222  long cdm;
3223
3224  if ((f = fopen("/proc/self/coredump_filter", "r+")) == NULL) {
3225    return;
3226  }
3227
3228  if (fscanf(f, "%lx", &cdm) != 1) {
3229    fclose(f);
3230    return;
3231  }
3232
3233  rewind(f);
3234
3235  if ((cdm & LARGEPAGES_BIT) == 0) {
3236    cdm |= LARGEPAGES_BIT;
3237    fprintf(f, "%#lx", cdm);
3238  }
3239
3240  fclose(f);
3241}
3242
3243// Large page support
3244
3245static size_t _large_page_size = 0;
3246
3247size_t os::Linux::find_large_page_size() {
3248  size_t large_page_size = 0;
3249
3250  // large_page_size on Linux is used to round up heap size. x86 uses either
3251  // 2M or 4M page, depending on whether PAE (Physical Address Extensions)
3252  // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use
3253  // page as large as 256M.
3254  //
3255  // Here we try to figure out page size by parsing /proc/meminfo and looking
3256  // for a line with the following format:
3257  //    Hugepagesize:     2048 kB
3258  //
3259  // If we can't determine the value (e.g. /proc is not mounted, or the text
3260  // format has been changed), we'll use the largest page size supported by
3261  // the processor.
3262
3263#ifndef ZERO
3264  large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M)
3265                     ARM_ONLY(2 * M) PPC_ONLY(4 * M);
3266#endif // ZERO
3267
3268  FILE *fp = fopen("/proc/meminfo", "r");
3269  if (fp) {
3270    while (!feof(fp)) {
3271      int x = 0;
3272      char buf[16];
3273      if (fscanf(fp, "Hugepagesize: %d", &x) == 1) {
3274        if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) {
3275          large_page_size = x * K;
3276          break;
3277        }
3278      } else {
3279        // skip to next line
3280        for (;;) {
3281          int ch = fgetc(fp);
3282          if (ch == EOF || ch == (int)'\n') break;
3283        }
3284      }
3285    }
3286    fclose(fp);
3287  }
3288
3289  if (!FLAG_IS_DEFAULT(LargePageSizeInBytes) && LargePageSizeInBytes != large_page_size) {
3290    warning("Setting LargePageSizeInBytes has no effect on this OS. Large page size is "
3291        SIZE_FORMAT "%s.", byte_size_in_proper_unit(large_page_size),
3292        proper_unit_for_byte_size(large_page_size));
3293  }
3294
3295  return large_page_size;
3296}
3297
3298size_t os::Linux::setup_large_page_size() {
3299  _large_page_size = Linux::find_large_page_size();
3300  const size_t default_page_size = (size_t)Linux::page_size();
3301  if (_large_page_size > default_page_size) {
3302    _page_sizes[0] = _large_page_size;
3303    _page_sizes[1] = default_page_size;
3304    _page_sizes[2] = 0;
3305  }
3306
3307  return _large_page_size;
3308}
3309
3310bool os::Linux::setup_large_page_type(size_t page_size) {
3311  if (FLAG_IS_DEFAULT(UseHugeTLBFS) &&
3312      FLAG_IS_DEFAULT(UseSHM) &&
3313      FLAG_IS_DEFAULT(UseTransparentHugePages)) {
3314
3315    // The type of large pages has not been specified by the user.
3316
3317    // Try UseHugeTLBFS and then UseSHM.
3318    UseHugeTLBFS = UseSHM = true;
3319
3320    // Don't try UseTransparentHugePages since there are known
3321    // performance issues with it turned on. This might change in the future.
3322    UseTransparentHugePages = false;
3323  }
3324
3325  if (UseTransparentHugePages) {
3326    bool warn_on_failure = !FLAG_IS_DEFAULT(UseTransparentHugePages);
3327    if (transparent_huge_pages_sanity_check(warn_on_failure, page_size)) {
3328      UseHugeTLBFS = false;
3329      UseSHM = false;
3330      return true;
3331    }
3332    UseTransparentHugePages = false;
3333  }
3334
3335  if (UseHugeTLBFS) {
3336    bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
3337    if (hugetlbfs_sanity_check(warn_on_failure, page_size)) {
3338      UseSHM = false;
3339      return true;
3340    }
3341    UseHugeTLBFS = false;
3342  }
3343
3344  return UseSHM;
3345}
3346
3347void os::large_page_init() {
3348  if (!UseLargePages &&
3349      !UseTransparentHugePages &&
3350      !UseHugeTLBFS &&
3351      !UseSHM) {
3352    // Not using large pages.
3353    return;
3354  }
3355
3356  if (!FLAG_IS_DEFAULT(UseLargePages) && !UseLargePages) {
3357    // The user explicitly turned off large pages.
3358    // Ignore the rest of the large pages flags.
3359    UseTransparentHugePages = false;
3360    UseHugeTLBFS = false;
3361    UseSHM = false;
3362    return;
3363  }
3364
3365  size_t large_page_size = Linux::setup_large_page_size();
3366  UseLargePages          = Linux::setup_large_page_type(large_page_size);
3367
3368  set_coredump_filter();
3369}
3370
3371#ifndef SHM_HUGETLB
3372#define SHM_HUGETLB 04000
3373#endif
3374
3375char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec) {
3376  // "exec" is passed in but not used.  Creating the shared image for
3377  // the code cache doesn't have an SHM_X executable permission to check.
3378  assert(UseLargePages && UseSHM, "only for SHM large pages");
3379  assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address");
3380
3381  if (!is_size_aligned(bytes, os::large_page_size()) || alignment > os::large_page_size()) {
3382    return NULL; // Fallback to small pages.
3383  }
3384
3385  key_t key = IPC_PRIVATE;
3386  char *addr;
3387
3388  bool warn_on_failure = UseLargePages &&
3389                        (!FLAG_IS_DEFAULT(UseLargePages) ||
3390                         !FLAG_IS_DEFAULT(UseSHM) ||
3391                         !FLAG_IS_DEFAULT(LargePageSizeInBytes)
3392                        );
3393  char msg[128];
3394
3395  // Create a large shared memory region to attach to based on size.
3396  // Currently, size is the total size of the heap
3397  int shmid = shmget(key, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W);
3398  if (shmid == -1) {
3399     // Possible reasons for shmget failure:
3400     // 1. shmmax is too small for Java heap.
3401     //    > check shmmax value: cat /proc/sys/kernel/shmmax
3402     //    > increase shmmax value: echo "0xffffffff" > /proc/sys/kernel/shmmax
3403     // 2. not enough large page memory.
3404     //    > check available large pages: cat /proc/meminfo
3405     //    > increase amount of large pages:
3406     //          echo new_value > /proc/sys/vm/nr_hugepages
3407     //      Note 1: different Linux may use different name for this property,
3408     //            e.g. on Redhat AS-3 it is "hugetlb_pool".
3409     //      Note 2: it's possible there's enough physical memory available but
3410     //            they are so fragmented after a long run that they can't
3411     //            coalesce into large pages. Try to reserve large pages when
3412     //            the system is still "fresh".
3413     if (warn_on_failure) {
3414       jio_snprintf(msg, sizeof(msg), "Failed to reserve shared memory (errno = %d).", errno);
3415       warning(msg);
3416     }
3417     return NULL;
3418  }
3419
3420  // attach to the region
3421  addr = (char*)shmat(shmid, req_addr, 0);
3422  int err = errno;
3423
3424  // Remove shmid. If shmat() is successful, the actual shared memory segment
3425  // will be deleted when it's detached by shmdt() or when the process
3426  // terminates. If shmat() is not successful this will remove the shared
3427  // segment immediately.
3428  shmctl(shmid, IPC_RMID, NULL);
3429
3430  if ((intptr_t)addr == -1) {
3431     if (warn_on_failure) {
3432       jio_snprintf(msg, sizeof(msg), "Failed to attach shared memory (errno = %d).", err);
3433       warning(msg);
3434     }
3435     return NULL;
3436  }
3437
3438  return addr;
3439}
3440
3441static void warn_on_large_pages_failure(char* req_addr, size_t bytes, int error) {
3442  assert(error == ENOMEM, "Only expect to fail if no memory is available");
3443
3444  bool warn_on_failure = UseLargePages &&
3445      (!FLAG_IS_DEFAULT(UseLargePages) ||
3446       !FLAG_IS_DEFAULT(UseHugeTLBFS) ||
3447       !FLAG_IS_DEFAULT(LargePageSizeInBytes));
3448
3449  if (warn_on_failure) {
3450    char msg[128];
3451    jio_snprintf(msg, sizeof(msg), "Failed to reserve large pages memory req_addr: "
3452        PTR_FORMAT " bytes: " SIZE_FORMAT " (errno = %d).", req_addr, bytes, error);
3453    warning(msg);
3454  }
3455}
3456
3457char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec) {
3458  assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages");
3459  assert(is_size_aligned(bytes, os::large_page_size()), "Unaligned size");
3460  assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address");
3461
3462  int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
3463  char* addr = (char*)::mmap(req_addr, bytes, prot,
3464                             MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB,
3465                             -1, 0);
3466
3467  if (addr == MAP_FAILED) {
3468    warn_on_large_pages_failure(req_addr, bytes, errno);
3469    return NULL;
3470  }
3471
3472  assert(is_ptr_aligned(addr, os::large_page_size()), "Must be");
3473
3474  return addr;
3475}
3476
3477char* os::Linux::reserve_memory_special_huge_tlbfs_mixed(size_t bytes, size_t alignment, char* req_addr, bool exec) {
3478  size_t large_page_size = os::large_page_size();
3479
3480  assert(bytes >= large_page_size, "Shouldn't allocate large pages for small sizes");
3481
3482  // Allocate small pages.
3483
3484  char* start;
3485  if (req_addr != NULL) {
3486    assert(is_ptr_aligned(req_addr, alignment), "Must be");
3487    assert(is_size_aligned(bytes, alignment), "Must be");
3488    start = os::reserve_memory(bytes, req_addr);
3489    assert(start == NULL || start == req_addr, "Must be");
3490  } else {
3491    start = os::reserve_memory_aligned(bytes, alignment);
3492  }
3493
3494  if (start == NULL) {
3495    return NULL;
3496  }
3497
3498  assert(is_ptr_aligned(start, alignment), "Must be");
3499
3500  // os::reserve_memory_special will record this memory area.
3501  // Need to release it here to prevent overlapping reservations.
3502  MemTracker::record_virtual_memory_release((address)start, bytes);
3503
3504  char* end = start + bytes;
3505
3506  // Find the regions of the allocated chunk that can be promoted to large pages.
3507  char* lp_start = (char*)align_ptr_up(start, large_page_size);
3508  char* lp_end   = (char*)align_ptr_down(end, large_page_size);
3509
3510  size_t lp_bytes = lp_end - lp_start;
3511
3512  assert(is_size_aligned(lp_bytes, large_page_size), "Must be");
3513
3514  if (lp_bytes == 0) {
3515    // The mapped region doesn't even span the start and the end of a large page.
3516    // Fall back to allocate a non-special area.
3517    ::munmap(start, end - start);
3518    return NULL;
3519  }
3520
3521  int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
3522
3523
3524  void* result;
3525
3526  if (start != lp_start) {
3527    result = ::mmap(start, lp_start - start, prot,
3528                    MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED,
3529                    -1, 0);
3530    if (result == MAP_FAILED) {
3531      ::munmap(lp_start, end - lp_start);
3532      return NULL;
3533    }
3534  }
3535
3536  result = ::mmap(lp_start, lp_bytes, prot,
3537                  MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED|MAP_HUGETLB,
3538                  -1, 0);
3539  if (result == MAP_FAILED) {
3540    warn_on_large_pages_failure(req_addr, bytes, errno);
3541    // If the mmap above fails, the large pages region will be unmapped and we
3542    // have regions before and after with small pages. Release these regions.
3543    //
3544    // |  mapped  |  unmapped  |  mapped  |
3545    // ^          ^            ^          ^
3546    // start      lp_start     lp_end     end
3547    //
3548    ::munmap(start, lp_start - start);
3549    ::munmap(lp_end, end - lp_end);
3550    return NULL;
3551  }
3552
3553  if (lp_end != end) {
3554      result = ::mmap(lp_end, end - lp_end, prot,
3555                      MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED,
3556                      -1, 0);
3557    if (result == MAP_FAILED) {
3558      ::munmap(start, lp_end - start);
3559      return NULL;
3560    }
3561  }
3562
3563  return start;
3564}
3565
3566char* os::Linux::reserve_memory_special_huge_tlbfs(size_t bytes, size_t alignment, char* req_addr, bool exec) {
3567  assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages");
3568  assert(is_ptr_aligned(req_addr, alignment), "Must be");
3569  assert(is_power_of_2(alignment), "Must be");
3570  assert(is_power_of_2(os::large_page_size()), "Must be");
3571  assert(bytes >= os::large_page_size(), "Shouldn't allocate large pages for small sizes");
3572
3573  if (is_size_aligned(bytes, os::large_page_size()) && alignment <= os::large_page_size()) {
3574    return reserve_memory_special_huge_tlbfs_only(bytes, req_addr, exec);
3575  } else {
3576    return reserve_memory_special_huge_tlbfs_mixed(bytes, alignment, req_addr, exec);
3577  }
3578}
3579
3580char* os::reserve_memory_special(size_t bytes, size_t alignment, char* req_addr, bool exec) {
3581  assert(UseLargePages, "only for large pages");
3582
3583  char* addr;
3584  if (UseSHM) {
3585    addr = os::Linux::reserve_memory_special_shm(bytes, alignment, req_addr, exec);
3586  } else {
3587    assert(UseHugeTLBFS, "must be");
3588    addr = os::Linux::reserve_memory_special_huge_tlbfs(bytes, alignment, req_addr, exec);
3589  }
3590
3591  if (addr != NULL) {
3592    if (UseNUMAInterleaving) {
3593      numa_make_global(addr, bytes);
3594    }
3595
3596    // The memory is committed
3597    MemTracker::record_virtual_memory_reserve_and_commit((address)addr, bytes, mtNone, CALLER_PC);
3598  }
3599
3600  return addr;
3601}
3602
3603bool os::Linux::release_memory_special_shm(char* base, size_t bytes) {
3604  // detaching the SHM segment will also delete it, see reserve_memory_special_shm()
3605  return shmdt(base) == 0;
3606}
3607
3608bool os::Linux::release_memory_special_huge_tlbfs(char* base, size_t bytes) {
3609  return pd_release_memory(base, bytes);
3610}
3611
3612bool os::release_memory_special(char* base, size_t bytes) {
3613  assert(UseLargePages, "only for large pages");
3614
3615  MemTracker::Tracker tkr = MemTracker::get_virtual_memory_release_tracker();
3616
3617  bool res;
3618  if (UseSHM) {
3619    res = os::Linux::release_memory_special_shm(base, bytes);
3620  } else {
3621    assert(UseHugeTLBFS, "must be");
3622    res = os::Linux::release_memory_special_huge_tlbfs(base, bytes);
3623  }
3624
3625  if (res) {
3626    tkr.record((address)base, bytes);
3627  } else {
3628    tkr.discard();
3629  }
3630
3631  return res;
3632}
3633
3634size_t os::large_page_size() {
3635  return _large_page_size;
3636}
3637
3638// With SysV SHM the entire memory region must be allocated as shared
3639// memory.
3640// HugeTLBFS allows application to commit large page memory on demand.
3641// However, when committing memory with HugeTLBFS fails, the region
3642// that was supposed to be committed will lose the old reservation
3643// and allow other threads to steal that memory region. Because of this
3644// behavior we can't commit HugeTLBFS memory.
3645bool os::can_commit_large_page_memory() {
3646  return UseTransparentHugePages;
3647}
3648
3649bool os::can_execute_large_page_memory() {
3650  return UseTransparentHugePages || UseHugeTLBFS;
3651}
3652
3653// Reserve memory at an arbitrary address, only if that area is
3654// available (and not reserved for something else).
3655
3656char* os::pd_attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
3657  const int max_tries = 10;
3658  char* base[max_tries];
3659  size_t size[max_tries];
3660  const size_t gap = 0x000000;
3661
3662  // Assert only that the size is a multiple of the page size, since
3663  // that's all that mmap requires, and since that's all we really know
3664  // about at this low abstraction level.  If we need higher alignment,
3665  // we can either pass an alignment to this method or verify alignment
3666  // in one of the methods further up the call chain.  See bug 5044738.
3667  assert(bytes % os::vm_page_size() == 0, "reserving unexpected size block");
3668
3669  // Repeatedly allocate blocks until the block is allocated at the
3670  // right spot. Give up after max_tries. Note that reserve_memory() will
3671  // automatically update _highest_vm_reserved_address if the call is
3672  // successful. The variable tracks the highest memory address every reserved
3673  // by JVM. It is used to detect heap-stack collision if running with
3674  // fixed-stack LinuxThreads. Because here we may attempt to reserve more
3675  // space than needed, it could confuse the collision detecting code. To
3676  // solve the problem, save current _highest_vm_reserved_address and
3677  // calculate the correct value before return.
3678  address old_highest = _highest_vm_reserved_address;
3679
3680  // Linux mmap allows caller to pass an address as hint; give it a try first,
3681  // if kernel honors the hint then we can return immediately.
3682  char * addr = anon_mmap(requested_addr, bytes, false);
3683  if (addr == requested_addr) {
3684     return requested_addr;
3685  }
3686
3687  if (addr != NULL) {
3688     // mmap() is successful but it fails to reserve at the requested address
3689     anon_munmap(addr, bytes);
3690  }
3691
3692  int i;
3693  for (i = 0; i < max_tries; ++i) {
3694    base[i] = reserve_memory(bytes);
3695
3696    if (base[i] != NULL) {
3697      // Is this the block we wanted?
3698      if (base[i] == requested_addr) {
3699        size[i] = bytes;
3700        break;
3701      }
3702
3703      // Does this overlap the block we wanted? Give back the overlapped
3704      // parts and try again.
3705
3706      size_t top_overlap = requested_addr + (bytes + gap) - base[i];
3707      if (top_overlap >= 0 && top_overlap < bytes) {
3708        unmap_memory(base[i], top_overlap);
3709        base[i] += top_overlap;
3710        size[i] = bytes - top_overlap;
3711      } else {
3712        size_t bottom_overlap = base[i] + bytes - requested_addr;
3713        if (bottom_overlap >= 0 && bottom_overlap < bytes) {
3714          unmap_memory(requested_addr, bottom_overlap);
3715          size[i] = bytes - bottom_overlap;
3716        } else {
3717          size[i] = bytes;
3718        }
3719      }
3720    }
3721  }
3722
3723  // Give back the unused reserved pieces.
3724
3725  for (int j = 0; j < i; ++j) {
3726    if (base[j] != NULL) {
3727      unmap_memory(base[j], size[j]);
3728    }
3729  }
3730
3731  if (i < max_tries) {
3732    _highest_vm_reserved_address = MAX2(old_highest, (address)requested_addr + bytes);
3733    return requested_addr;
3734  } else {
3735    _highest_vm_reserved_address = old_highest;
3736    return NULL;
3737  }
3738}
3739
3740size_t os::read(int fd, void *buf, unsigned int nBytes) {
3741  return ::read(fd, buf, nBytes);
3742}
3743
3744//
3745// Short sleep, direct OS call.
3746//
3747// Note: certain versions of Linux CFS scheduler (since 2.6.23) do not guarantee
3748// sched_yield(2) will actually give up the CPU:
3749//
3750//   * Alone on this pariticular CPU, keeps running.
3751//   * Before the introduction of "skip_buddy" with "compat_yield" disabled
3752//     (pre 2.6.39).
3753//
3754// So calling this with 0 is an alternative.
3755//
3756void os::naked_short_sleep(jlong ms) {
3757  struct timespec req;
3758
3759  assert(ms < 1000, "Un-interruptable sleep, short time use only");
3760  req.tv_sec = 0;
3761  if (ms > 0) {
3762    req.tv_nsec = (ms % 1000) * 1000000;
3763  }
3764  else {
3765    req.tv_nsec = 1;
3766  }
3767
3768  nanosleep(&req, NULL);
3769
3770  return;
3771}
3772
3773// Sleep forever; naked call to OS-specific sleep; use with CAUTION
3774void os::infinite_sleep() {
3775  while (true) {    // sleep forever ...
3776    ::sleep(100);   // ... 100 seconds at a time
3777  }
3778}
3779
3780// Used to convert frequent JVM_Yield() to nops
3781bool os::dont_yield() {
3782  return DontYieldALot;
3783}
3784
3785void os::yield() {
3786  sched_yield();
3787}
3788
3789os::YieldResult os::NakedYield() { sched_yield(); return os::YIELD_UNKNOWN ;}
3790
3791void os::yield_all(int attempts) {
3792  // Yields to all threads, including threads with lower priorities
3793  // Threads on Linux are all with same priority. The Solaris style
3794  // os::yield_all() with nanosleep(1ms) is not necessary.
3795  sched_yield();
3796}
3797
3798// Called from the tight loops to possibly influence time-sharing heuristics
3799void os::loop_breaker(int attempts) {
3800  os::yield_all(attempts);
3801}
3802
3803////////////////////////////////////////////////////////////////////////////////
3804// thread priority support
3805
3806// Note: Normal Linux applications are run with SCHED_OTHER policy. SCHED_OTHER
3807// only supports dynamic priority, static priority must be zero. For real-time
3808// applications, Linux supports SCHED_RR which allows static priority (1-99).
3809// However, for large multi-threaded applications, SCHED_RR is not only slower
3810// than SCHED_OTHER, but also very unstable (my volano tests hang hard 4 out
3811// of 5 runs - Sep 2005).
3812//
3813// The following code actually changes the niceness of kernel-thread/LWP. It
3814// has an assumption that setpriority() only modifies one kernel-thread/LWP,
3815// not the entire user process, and user level threads are 1:1 mapped to kernel
3816// threads. It has always been the case, but could change in the future. For
3817// this reason, the code should not be used as default (ThreadPriorityPolicy=0).
3818// It is only used when ThreadPriorityPolicy=1 and requires root privilege.
3819
3820int os::java_to_os_priority[CriticalPriority + 1] = {
3821  19,              // 0 Entry should never be used
3822
3823   4,              // 1 MinPriority
3824   3,              // 2
3825   2,              // 3
3826
3827   1,              // 4
3828   0,              // 5 NormPriority
3829  -1,              // 6
3830
3831  -2,              // 7
3832  -3,              // 8
3833  -4,              // 9 NearMaxPriority
3834
3835  -5,              // 10 MaxPriority
3836
3837  -5               // 11 CriticalPriority
3838};
3839
3840static int prio_init() {
3841  if (ThreadPriorityPolicy == 1) {
3842    // Only root can raise thread priority. Don't allow ThreadPriorityPolicy=1
3843    // if effective uid is not root. Perhaps, a more elegant way of doing
3844    // this is to test CAP_SYS_NICE capability, but that will require libcap.so
3845    if (geteuid() != 0) {
3846      if (!FLAG_IS_DEFAULT(ThreadPriorityPolicy)) {
3847        warning("-XX:ThreadPriorityPolicy requires root privilege on Linux");
3848      }
3849      ThreadPriorityPolicy = 0;
3850    }
3851  }
3852  if (UseCriticalJavaThreadPriority) {
3853    os::java_to_os_priority[MaxPriority] = os::java_to_os_priority[CriticalPriority];
3854  }
3855  return 0;
3856}
3857
3858OSReturn os::set_native_priority(Thread* thread, int newpri) {
3859  if ( !UseThreadPriorities || ThreadPriorityPolicy == 0 ) return OS_OK;
3860
3861  int ret = setpriority(PRIO_PROCESS, thread->osthread()->thread_id(), newpri);
3862  return (ret == 0) ? OS_OK : OS_ERR;
3863}
3864
3865OSReturn os::get_native_priority(const Thread* const thread, int *priority_ptr) {
3866  if ( !UseThreadPriorities || ThreadPriorityPolicy == 0 ) {
3867    *priority_ptr = java_to_os_priority[NormPriority];
3868    return OS_OK;
3869  }
3870
3871  errno = 0;
3872  *priority_ptr = getpriority(PRIO_PROCESS, thread->osthread()->thread_id());
3873  return (*priority_ptr != -1 || errno == 0 ? OS_OK : OS_ERR);
3874}
3875
3876// Hint to the underlying OS that a task switch would not be good.
3877// Void return because it's a hint and can fail.
3878void os::hint_no_preempt() {}
3879
3880////////////////////////////////////////////////////////////////////////////////
3881// suspend/resume support
3882
3883//  the low-level signal-based suspend/resume support is a remnant from the
3884//  old VM-suspension that used to be for java-suspension, safepoints etc,
3885//  within hotspot. Now there is a single use-case for this:
3886//    - calling get_thread_pc() on the VMThread by the flat-profiler task
3887//      that runs in the watcher thread.
3888//  The remaining code is greatly simplified from the more general suspension
3889//  code that used to be used.
3890//
3891//  The protocol is quite simple:
3892//  - suspend:
3893//      - sends a signal to the target thread
3894//      - polls the suspend state of the osthread using a yield loop
3895//      - target thread signal handler (SR_handler) sets suspend state
3896//        and blocks in sigsuspend until continued
3897//  - resume:
3898//      - sets target osthread state to continue
3899//      - sends signal to end the sigsuspend loop in the SR_handler
3900//
3901//  Note that the SR_lock plays no role in this suspend/resume protocol.
3902//
3903
3904static void resume_clear_context(OSThread *osthread) {
3905  osthread->set_ucontext(NULL);
3906  osthread->set_siginfo(NULL);
3907}
3908
3909static void suspend_save_context(OSThread *osthread, siginfo_t* siginfo, ucontext_t* context) {
3910  osthread->set_ucontext(context);
3911  osthread->set_siginfo(siginfo);
3912}
3913
3914//
3915// Handler function invoked when a thread's execution is suspended or
3916// resumed. We have to be careful that only async-safe functions are
3917// called here (Note: most pthread functions are not async safe and
3918// should be avoided.)
3919//
3920// Note: sigwait() is a more natural fit than sigsuspend() from an
3921// interface point of view, but sigwait() prevents the signal hander
3922// from being run. libpthread would get very confused by not having
3923// its signal handlers run and prevents sigwait()'s use with the
3924// mutex granting granting signal.
3925//
3926// Currently only ever called on the VMThread and JavaThreads (PC sampling)
3927//
3928static void SR_handler(int sig, siginfo_t* siginfo, ucontext_t* context) {
3929  // Save and restore errno to avoid confusing native code with EINTR
3930  // after sigsuspend.
3931  int old_errno = errno;
3932
3933  Thread* thread = Thread::current();
3934  OSThread* osthread = thread->osthread();
3935  assert(thread->is_VM_thread() || thread->is_Java_thread(), "Must be VMThread or JavaThread");
3936
3937  os::SuspendResume::State current = osthread->sr.state();
3938  if (current == os::SuspendResume::SR_SUSPEND_REQUEST) {
3939    suspend_save_context(osthread, siginfo, context);
3940
3941    // attempt to switch the state, we assume we had a SUSPEND_REQUEST
3942    os::SuspendResume::State state = osthread->sr.suspended();
3943    if (state == os::SuspendResume::SR_SUSPENDED) {
3944      sigset_t suspend_set;  // signals for sigsuspend()
3945
3946      // get current set of blocked signals and unblock resume signal
3947      pthread_sigmask(SIG_BLOCK, NULL, &suspend_set);
3948      sigdelset(&suspend_set, SR_signum);
3949
3950      sr_semaphore.signal();
3951      // wait here until we are resumed
3952      while (1) {
3953        sigsuspend(&suspend_set);
3954
3955        os::SuspendResume::State result = osthread->sr.running();
3956        if (result == os::SuspendResume::SR_RUNNING) {
3957          sr_semaphore.signal();
3958          break;
3959        }
3960      }
3961
3962    } else if (state == os::SuspendResume::SR_RUNNING) {
3963      // request was cancelled, continue
3964    } else {
3965      ShouldNotReachHere();
3966    }
3967
3968    resume_clear_context(osthread);
3969  } else if (current == os::SuspendResume::SR_RUNNING) {
3970    // request was cancelled, continue
3971  } else if (current == os::SuspendResume::SR_WAKEUP_REQUEST) {
3972    // ignore
3973  } else {
3974    // ignore
3975  }
3976
3977  errno = old_errno;
3978}
3979
3980
3981static int SR_initialize() {
3982  struct sigaction act;
3983  char *s;
3984  /* Get signal number to use for suspend/resume */
3985  if ((s = ::getenv("_JAVA_SR_SIGNUM")) != 0) {
3986    int sig = ::strtol(s, 0, 10);
3987    if (sig > 0 || sig < _NSIG) {
3988        SR_signum = sig;
3989    }
3990  }
3991
3992  assert(SR_signum > SIGSEGV && SR_signum > SIGBUS,
3993        "SR_signum must be greater than max(SIGSEGV, SIGBUS), see 4355769");
3994
3995  sigemptyset(&SR_sigset);
3996  sigaddset(&SR_sigset, SR_signum);
3997
3998  /* Set up signal handler for suspend/resume */
3999  act.sa_flags = SA_RESTART|SA_SIGINFO;
4000  act.sa_handler = (void (*)(int)) SR_handler;
4001
4002  // SR_signum is blocked by default.
4003  // 4528190 - We also need to block pthread restart signal (32 on all
4004  // supported Linux platforms). Note that LinuxThreads need to block
4005  // this signal for all threads to work properly. So we don't have
4006  // to use hard-coded signal number when setting up the mask.
4007  pthread_sigmask(SIG_BLOCK, NULL, &act.sa_mask);
4008
4009  if (sigaction(SR_signum, &act, 0) == -1) {
4010    return -1;
4011  }
4012
4013  // Save signal flag
4014  os::Linux::set_our_sigflags(SR_signum, act.sa_flags);
4015  return 0;
4016}
4017
4018static int sr_notify(OSThread* osthread) {
4019  int status = pthread_kill(osthread->pthread_id(), SR_signum);
4020  assert_status(status == 0, status, "pthread_kill");
4021  return status;
4022}
4023
4024// "Randomly" selected value for how long we want to spin
4025// before bailing out on suspending a thread, also how often
4026// we send a signal to a thread we want to resume
4027static const int RANDOMLY_LARGE_INTEGER = 1000000;
4028static const int RANDOMLY_LARGE_INTEGER2 = 100;
4029
4030// returns true on success and false on error - really an error is fatal
4031// but this seems the normal response to library errors
4032static bool do_suspend(OSThread* osthread) {
4033  assert(osthread->sr.is_running(), "thread should be running");
4034  assert(!sr_semaphore.trywait(), "semaphore has invalid state");
4035
4036  // mark as suspended and send signal
4037  if (osthread->sr.request_suspend() != os::SuspendResume::SR_SUSPEND_REQUEST) {
4038    // failed to switch, state wasn't running?
4039    ShouldNotReachHere();
4040    return false;
4041  }
4042
4043  if (sr_notify(osthread) != 0) {
4044    ShouldNotReachHere();
4045  }
4046
4047  // managed to send the signal and switch to SUSPEND_REQUEST, now wait for SUSPENDED
4048  while (true) {
4049    if (sr_semaphore.timedwait(0, 2 * NANOSECS_PER_MILLISEC)) {
4050      break;
4051    } else {
4052      // timeout
4053      os::SuspendResume::State cancelled = osthread->sr.cancel_suspend();
4054      if (cancelled == os::SuspendResume::SR_RUNNING) {
4055        return false;
4056      } else if (cancelled == os::SuspendResume::SR_SUSPENDED) {
4057        // make sure that we consume the signal on the semaphore as well
4058        sr_semaphore.wait();
4059        break;
4060      } else {
4061        ShouldNotReachHere();
4062        return false;
4063      }
4064    }
4065  }
4066
4067  guarantee(osthread->sr.is_suspended(), "Must be suspended");
4068  return true;
4069}
4070
4071static void do_resume(OSThread* osthread) {
4072  assert(osthread->sr.is_suspended(), "thread should be suspended");
4073  assert(!sr_semaphore.trywait(), "invalid semaphore state");
4074
4075  if (osthread->sr.request_wakeup() != os::SuspendResume::SR_WAKEUP_REQUEST) {
4076    // failed to switch to WAKEUP_REQUEST
4077    ShouldNotReachHere();
4078    return;
4079  }
4080
4081  while (true) {
4082    if (sr_notify(osthread) == 0) {
4083      if (sr_semaphore.timedwait(0, 2 * NANOSECS_PER_MILLISEC)) {
4084        if (osthread->sr.is_running()) {
4085          return;
4086        }
4087      }
4088    } else {
4089      ShouldNotReachHere();
4090    }
4091  }
4092
4093  guarantee(osthread->sr.is_running(), "Must be running!");
4094}
4095
4096///////////////////////////////////////////////////////////////////////////////////
4097// signal handling (except suspend/resume)
4098
4099// This routine may be used by user applications as a "hook" to catch signals.
4100// The user-defined signal handler must pass unrecognized signals to this
4101// routine, and if it returns true (non-zero), then the signal handler must
4102// return immediately.  If the flag "abort_if_unrecognized" is true, then this
4103// routine will never retun false (zero), but instead will execute a VM panic
4104// routine kill the process.
4105//
4106// If this routine returns false, it is OK to call it again.  This allows
4107// the user-defined signal handler to perform checks either before or after
4108// the VM performs its own checks.  Naturally, the user code would be making
4109// a serious error if it tried to handle an exception (such as a null check
4110// or breakpoint) that the VM was generating for its own correct operation.
4111//
4112// This routine may recognize any of the following kinds of signals:
4113//    SIGBUS, SIGSEGV, SIGILL, SIGFPE, SIGQUIT, SIGPIPE, SIGXFSZ, SIGUSR1.
4114// It should be consulted by handlers for any of those signals.
4115//
4116// The caller of this routine must pass in the three arguments supplied
4117// to the function referred to in the "sa_sigaction" (not the "sa_handler")
4118// field of the structure passed to sigaction().  This routine assumes that
4119// the sa_flags field passed to sigaction() includes SA_SIGINFO and SA_RESTART.
4120//
4121// Note that the VM will print warnings if it detects conflicting signal
4122// handlers, unless invoked with the option "-XX:+AllowUserSignalHandlers".
4123//
4124extern "C" JNIEXPORT int
4125JVM_handle_linux_signal(int signo, siginfo_t* siginfo,
4126                        void* ucontext, int abort_if_unrecognized);
4127
4128void signalHandler(int sig, siginfo_t* info, void* uc) {
4129  assert(info != NULL && uc != NULL, "it must be old kernel");
4130  int orig_errno = errno;  // Preserve errno value over signal handler.
4131  JVM_handle_linux_signal(sig, info, uc, true);
4132  errno = orig_errno;
4133}
4134
4135
4136// This boolean allows users to forward their own non-matching signals
4137// to JVM_handle_linux_signal, harmlessly.
4138bool os::Linux::signal_handlers_are_installed = false;
4139
4140// For signal-chaining
4141struct sigaction os::Linux::sigact[MAXSIGNUM];
4142unsigned int os::Linux::sigs = 0;
4143bool os::Linux::libjsig_is_loaded = false;
4144typedef struct sigaction *(*get_signal_t)(int);
4145get_signal_t os::Linux::get_signal_action = NULL;
4146
4147struct sigaction* os::Linux::get_chained_signal_action(int sig) {
4148  struct sigaction *actp = NULL;
4149
4150  if (libjsig_is_loaded) {
4151    // Retrieve the old signal handler from libjsig
4152    actp = (*get_signal_action)(sig);
4153  }
4154  if (actp == NULL) {
4155    // Retrieve the preinstalled signal handler from jvm
4156    actp = get_preinstalled_handler(sig);
4157  }
4158
4159  return actp;
4160}
4161
4162static bool call_chained_handler(struct sigaction *actp, int sig,
4163                                 siginfo_t *siginfo, void *context) {
4164  // Call the old signal handler
4165  if (actp->sa_handler == SIG_DFL) {
4166    // It's more reasonable to let jvm treat it as an unexpected exception
4167    // instead of taking the default action.
4168    return false;
4169  } else if (actp->sa_handler != SIG_IGN) {
4170    if ((actp->sa_flags & SA_NODEFER) == 0) {
4171      // automaticlly block the signal
4172      sigaddset(&(actp->sa_mask), sig);
4173    }
4174
4175    sa_handler_t hand;
4176    sa_sigaction_t sa;
4177    bool siginfo_flag_set = (actp->sa_flags & SA_SIGINFO) != 0;
4178    // retrieve the chained handler
4179    if (siginfo_flag_set) {
4180      sa = actp->sa_sigaction;
4181    } else {
4182      hand = actp->sa_handler;
4183    }
4184
4185    if ((actp->sa_flags & SA_RESETHAND) != 0) {
4186      actp->sa_handler = SIG_DFL;
4187    }
4188
4189    // try to honor the signal mask
4190    sigset_t oset;
4191    pthread_sigmask(SIG_SETMASK, &(actp->sa_mask), &oset);
4192
4193    // call into the chained handler
4194    if (siginfo_flag_set) {
4195      (*sa)(sig, siginfo, context);
4196    } else {
4197      (*hand)(sig);
4198    }
4199
4200    // restore the signal mask
4201    pthread_sigmask(SIG_SETMASK, &oset, 0);
4202  }
4203  // Tell jvm's signal handler the signal is taken care of.
4204  return true;
4205}
4206
4207bool os::Linux::chained_handler(int sig, siginfo_t* siginfo, void* context) {
4208  bool chained = false;
4209  // signal-chaining
4210  if (UseSignalChaining) {
4211    struct sigaction *actp = get_chained_signal_action(sig);
4212    if (actp != NULL) {
4213      chained = call_chained_handler(actp, sig, siginfo, context);
4214    }
4215  }
4216  return chained;
4217}
4218
4219struct sigaction* os::Linux::get_preinstalled_handler(int sig) {
4220  if ((( (unsigned int)1 << sig ) & sigs) != 0) {
4221    return &sigact[sig];
4222  }
4223  return NULL;
4224}
4225
4226void os::Linux::save_preinstalled_handler(int sig, struct sigaction& oldAct) {
4227  assert(sig > 0 && sig < MAXSIGNUM, "vm signal out of expected range");
4228  sigact[sig] = oldAct;
4229  sigs |= (unsigned int)1 << sig;
4230}
4231
4232// for diagnostic
4233int os::Linux::sigflags[MAXSIGNUM];
4234
4235int os::Linux::get_our_sigflags(int sig) {
4236  assert(sig > 0 && sig < MAXSIGNUM, "vm signal out of expected range");
4237  return sigflags[sig];
4238}
4239
4240void os::Linux::set_our_sigflags(int sig, int flags) {
4241  assert(sig > 0 && sig < MAXSIGNUM, "vm signal out of expected range");
4242  sigflags[sig] = flags;
4243}
4244
4245void os::Linux::set_signal_handler(int sig, bool set_installed) {
4246  // Check for overwrite.
4247  struct sigaction oldAct;
4248  sigaction(sig, (struct sigaction*)NULL, &oldAct);
4249
4250  void* oldhand = oldAct.sa_sigaction
4251                ? CAST_FROM_FN_PTR(void*,  oldAct.sa_sigaction)
4252                : CAST_FROM_FN_PTR(void*,  oldAct.sa_handler);
4253  if (oldhand != CAST_FROM_FN_PTR(void*, SIG_DFL) &&
4254      oldhand != CAST_FROM_FN_PTR(void*, SIG_IGN) &&
4255      oldhand != CAST_FROM_FN_PTR(void*, (sa_sigaction_t)signalHandler)) {
4256    if (AllowUserSignalHandlers || !set_installed) {
4257      // Do not overwrite; user takes responsibility to forward to us.
4258      return;
4259    } else if (UseSignalChaining) {
4260      // save the old handler in jvm
4261      save_preinstalled_handler(sig, oldAct);
4262      // libjsig also interposes the sigaction() call below and saves the
4263      // old sigaction on it own.
4264    } else {
4265      fatal(err_msg("Encountered unexpected pre-existing sigaction handler "
4266                    "%#lx for signal %d.", (long)oldhand, sig));
4267    }
4268  }
4269
4270  struct sigaction sigAct;
4271  sigfillset(&(sigAct.sa_mask));
4272  sigAct.sa_handler = SIG_DFL;
4273  if (!set_installed) {
4274    sigAct.sa_flags = SA_SIGINFO|SA_RESTART;
4275  } else {
4276    sigAct.sa_sigaction = signalHandler;
4277    sigAct.sa_flags = SA_SIGINFO|SA_RESTART;
4278  }
4279  // Save flags, which are set by ours
4280  assert(sig > 0 && sig < MAXSIGNUM, "vm signal out of expected range");
4281  sigflags[sig] = sigAct.sa_flags;
4282
4283  int ret = sigaction(sig, &sigAct, &oldAct);
4284  assert(ret == 0, "check");
4285
4286  void* oldhand2  = oldAct.sa_sigaction
4287                  ? CAST_FROM_FN_PTR(void*, oldAct.sa_sigaction)
4288                  : CAST_FROM_FN_PTR(void*, oldAct.sa_handler);
4289  assert(oldhand2 == oldhand, "no concurrent signal handler installation");
4290}
4291
4292// install signal handlers for signals that HotSpot needs to
4293// handle in order to support Java-level exception handling.
4294
4295void os::Linux::install_signal_handlers() {
4296  if (!signal_handlers_are_installed) {
4297    signal_handlers_are_installed = true;
4298
4299    // signal-chaining
4300    typedef void (*signal_setting_t)();
4301    signal_setting_t begin_signal_setting = NULL;
4302    signal_setting_t end_signal_setting = NULL;
4303    begin_signal_setting = CAST_TO_FN_PTR(signal_setting_t,
4304                             dlsym(RTLD_DEFAULT, "JVM_begin_signal_setting"));
4305    if (begin_signal_setting != NULL) {
4306      end_signal_setting = CAST_TO_FN_PTR(signal_setting_t,
4307                             dlsym(RTLD_DEFAULT, "JVM_end_signal_setting"));
4308      get_signal_action = CAST_TO_FN_PTR(get_signal_t,
4309                            dlsym(RTLD_DEFAULT, "JVM_get_signal_action"));
4310      libjsig_is_loaded = true;
4311      assert(UseSignalChaining, "should enable signal-chaining");
4312    }
4313    if (libjsig_is_loaded) {
4314      // Tell libjsig jvm is setting signal handlers
4315      (*begin_signal_setting)();
4316    }
4317
4318    set_signal_handler(SIGSEGV, true);
4319    set_signal_handler(SIGPIPE, true);
4320    set_signal_handler(SIGBUS, true);
4321    set_signal_handler(SIGILL, true);
4322    set_signal_handler(SIGFPE, true);
4323#if defined(PPC64)
4324    set_signal_handler(SIGTRAP, true);
4325#endif
4326    set_signal_handler(SIGXFSZ, true);
4327
4328    if (libjsig_is_loaded) {
4329      // Tell libjsig jvm finishes setting signal handlers
4330      (*end_signal_setting)();
4331    }
4332
4333    // We don't activate signal checker if libjsig is in place, we trust ourselves
4334    // and if UserSignalHandler is installed all bets are off.
4335    // Log that signal checking is off only if -verbose:jni is specified.
4336    if (CheckJNICalls) {
4337      if (libjsig_is_loaded) {
4338        if (PrintJNIResolving) {
4339          tty->print_cr("Info: libjsig is activated, all active signal checking is disabled");
4340        }
4341        check_signals = false;
4342      }
4343      if (AllowUserSignalHandlers) {
4344        if (PrintJNIResolving) {
4345          tty->print_cr("Info: AllowUserSignalHandlers is activated, all active signal checking is disabled");
4346        }
4347        check_signals = false;
4348      }
4349    }
4350  }
4351}
4352
4353// This is the fastest way to get thread cpu time on Linux.
4354// Returns cpu time (user+sys) for any thread, not only for current.
4355// POSIX compliant clocks are implemented in the kernels 2.6.16+.
4356// It might work on 2.6.10+ with a special kernel/glibc patch.
4357// For reference, please, see IEEE Std 1003.1-2004:
4358//   http://www.unix.org/single_unix_specification
4359
4360jlong os::Linux::fast_thread_cpu_time(clockid_t clockid) {
4361  struct timespec tp;
4362  int rc = os::Linux::clock_gettime(clockid, &tp);
4363  assert(rc == 0, "clock_gettime is expected to return 0 code");
4364
4365  return (tp.tv_sec * NANOSECS_PER_SEC) + tp.tv_nsec;
4366}
4367
4368/////
4369// glibc on Linux platform uses non-documented flag
4370// to indicate, that some special sort of signal
4371// trampoline is used.
4372// We will never set this flag, and we should
4373// ignore this flag in our diagnostic
4374#ifdef SIGNIFICANT_SIGNAL_MASK
4375#undef SIGNIFICANT_SIGNAL_MASK
4376#endif
4377#define SIGNIFICANT_SIGNAL_MASK (~0x04000000)
4378
4379static const char* get_signal_handler_name(address handler,
4380                                           char* buf, int buflen) {
4381  int offset;
4382  bool found = os::dll_address_to_library_name(handler, buf, buflen, &offset);
4383  if (found) {
4384    // skip directory names
4385    const char *p1, *p2;
4386    p1 = buf;
4387    size_t len = strlen(os::file_separator());
4388    while ((p2 = strstr(p1, os::file_separator())) != NULL) p1 = p2 + len;
4389    jio_snprintf(buf, buflen, "%s+0x%x", p1, offset);
4390  } else {
4391    jio_snprintf(buf, buflen, PTR_FORMAT, handler);
4392  }
4393  return buf;
4394}
4395
4396static void print_signal_handler(outputStream* st, int sig,
4397                                 char* buf, size_t buflen) {
4398  struct sigaction sa;
4399
4400  sigaction(sig, NULL, &sa);
4401
4402  // See comment for SIGNIFICANT_SIGNAL_MASK define
4403  sa.sa_flags &= SIGNIFICANT_SIGNAL_MASK;
4404
4405  st->print("%s: ", os::exception_name(sig, buf, buflen));
4406
4407  address handler = (sa.sa_flags & SA_SIGINFO)
4408    ? CAST_FROM_FN_PTR(address, sa.sa_sigaction)
4409    : CAST_FROM_FN_PTR(address, sa.sa_handler);
4410
4411  if (handler == CAST_FROM_FN_PTR(address, SIG_DFL)) {
4412    st->print("SIG_DFL");
4413  } else if (handler == CAST_FROM_FN_PTR(address, SIG_IGN)) {
4414    st->print("SIG_IGN");
4415  } else {
4416    st->print("[%s]", get_signal_handler_name(handler, buf, buflen));
4417  }
4418
4419  st->print(", sa_mask[0]=");
4420  os::Posix::print_signal_set_short(st, &sa.sa_mask);
4421
4422  address rh = VMError::get_resetted_sighandler(sig);
4423  // May be, handler was resetted by VMError?
4424  if(rh != NULL) {
4425    handler = rh;
4426    sa.sa_flags = VMError::get_resetted_sigflags(sig) & SIGNIFICANT_SIGNAL_MASK;
4427  }
4428
4429  st->print(", sa_flags=");
4430  os::Posix::print_sa_flags(st, sa.sa_flags);
4431
4432  // Check: is it our handler?
4433  if(handler == CAST_FROM_FN_PTR(address, (sa_sigaction_t)signalHandler) ||
4434     handler == CAST_FROM_FN_PTR(address, (sa_sigaction_t)SR_handler)) {
4435    // It is our signal handler
4436    // check for flags, reset system-used one!
4437    if((int)sa.sa_flags != os::Linux::get_our_sigflags(sig)) {
4438      st->print(
4439                ", flags was changed from " PTR32_FORMAT ", consider using jsig library",
4440                os::Linux::get_our_sigflags(sig));
4441    }
4442  }
4443  st->cr();
4444}
4445
4446
4447#define DO_SIGNAL_CHECK(sig) \
4448  if (!sigismember(&check_signal_done, sig)) \
4449    os::Linux::check_signal_handler(sig)
4450
4451// This method is a periodic task to check for misbehaving JNI applications
4452// under CheckJNI, we can add any periodic checks here
4453
4454void os::run_periodic_checks() {
4455
4456  if (check_signals == false) return;
4457
4458  // SEGV and BUS if overridden could potentially prevent
4459  // generation of hs*.log in the event of a crash, debugging
4460  // such a case can be very challenging, so we absolutely
4461  // check the following for a good measure:
4462  DO_SIGNAL_CHECK(SIGSEGV);
4463  DO_SIGNAL_CHECK(SIGILL);
4464  DO_SIGNAL_CHECK(SIGFPE);
4465  DO_SIGNAL_CHECK(SIGBUS);
4466  DO_SIGNAL_CHECK(SIGPIPE);
4467  DO_SIGNAL_CHECK(SIGXFSZ);
4468#if defined(PPC64)
4469  DO_SIGNAL_CHECK(SIGTRAP);
4470#endif
4471
4472  // ReduceSignalUsage allows the user to override these handlers
4473  // see comments at the very top and jvm_solaris.h
4474  if (!ReduceSignalUsage) {
4475    DO_SIGNAL_CHECK(SHUTDOWN1_SIGNAL);
4476    DO_SIGNAL_CHECK(SHUTDOWN2_SIGNAL);
4477    DO_SIGNAL_CHECK(SHUTDOWN3_SIGNAL);
4478    DO_SIGNAL_CHECK(BREAK_SIGNAL);
4479  }
4480
4481  DO_SIGNAL_CHECK(SR_signum);
4482  DO_SIGNAL_CHECK(INTERRUPT_SIGNAL);
4483}
4484
4485typedef int (*os_sigaction_t)(int, const struct sigaction *, struct sigaction *);
4486
4487static os_sigaction_t os_sigaction = NULL;
4488
4489void os::Linux::check_signal_handler(int sig) {
4490  char buf[O_BUFLEN];
4491  address jvmHandler = NULL;
4492
4493
4494  struct sigaction act;
4495  if (os_sigaction == NULL) {
4496    // only trust the default sigaction, in case it has been interposed
4497    os_sigaction = (os_sigaction_t)dlsym(RTLD_DEFAULT, "sigaction");
4498    if (os_sigaction == NULL) return;
4499  }
4500
4501  os_sigaction(sig, (struct sigaction*)NULL, &act);
4502
4503
4504  act.sa_flags &= SIGNIFICANT_SIGNAL_MASK;
4505
4506  address thisHandler = (act.sa_flags & SA_SIGINFO)
4507    ? CAST_FROM_FN_PTR(address, act.sa_sigaction)
4508    : CAST_FROM_FN_PTR(address, act.sa_handler) ;
4509
4510
4511  switch(sig) {
4512  case SIGSEGV:
4513  case SIGBUS:
4514  case SIGFPE:
4515  case SIGPIPE:
4516  case SIGILL:
4517  case SIGXFSZ:
4518    jvmHandler = CAST_FROM_FN_PTR(address, (sa_sigaction_t)signalHandler);
4519    break;
4520
4521  case SHUTDOWN1_SIGNAL:
4522  case SHUTDOWN2_SIGNAL:
4523  case SHUTDOWN3_SIGNAL:
4524  case BREAK_SIGNAL:
4525    jvmHandler = (address)user_handler();
4526    break;
4527
4528  case INTERRUPT_SIGNAL:
4529    jvmHandler = CAST_FROM_FN_PTR(address, SIG_DFL);
4530    break;
4531
4532  default:
4533    if (sig == SR_signum) {
4534      jvmHandler = CAST_FROM_FN_PTR(address, (sa_sigaction_t)SR_handler);
4535    } else {
4536      return;
4537    }
4538    break;
4539  }
4540
4541  if (thisHandler != jvmHandler) {
4542    tty->print("Warning: %s handler ", exception_name(sig, buf, O_BUFLEN));
4543    tty->print("expected:%s", get_signal_handler_name(jvmHandler, buf, O_BUFLEN));
4544    tty->print_cr("  found:%s", get_signal_handler_name(thisHandler, buf, O_BUFLEN));
4545    // No need to check this sig any longer
4546    sigaddset(&check_signal_done, sig);
4547    // Running under non-interactive shell, SHUTDOWN2_SIGNAL will be reassigned SIG_IGN
4548    if (sig == SHUTDOWN2_SIGNAL && !isatty(fileno(stdin))) {
4549      tty->print_cr("Running in non-interactive shell, %s handler is replaced by shell",
4550                    exception_name(sig, buf, O_BUFLEN));
4551    }
4552  } else if(os::Linux::get_our_sigflags(sig) != 0 && (int)act.sa_flags != os::Linux::get_our_sigflags(sig)) {
4553    tty->print("Warning: %s handler flags ", exception_name(sig, buf, O_BUFLEN));
4554    tty->print("expected:" PTR32_FORMAT, os::Linux::get_our_sigflags(sig));
4555    tty->print_cr("  found:" PTR32_FORMAT, act.sa_flags);
4556    // No need to check this sig any longer
4557    sigaddset(&check_signal_done, sig);
4558  }
4559
4560  // Dump all the signal
4561  if (sigismember(&check_signal_done, sig)) {
4562    print_signal_handlers(tty, buf, O_BUFLEN);
4563  }
4564}
4565
4566extern void report_error(char* file_name, int line_no, char* title, char* format, ...);
4567
4568extern bool signal_name(int signo, char* buf, size_t len);
4569
4570const char* os::exception_name(int exception_code, char* buf, size_t size) {
4571  if (0 < exception_code && exception_code <= SIGRTMAX) {
4572    // signal
4573    if (!signal_name(exception_code, buf, size)) {
4574      jio_snprintf(buf, size, "SIG%d", exception_code);
4575    }
4576    return buf;
4577  } else {
4578    return NULL;
4579  }
4580}
4581
4582// this is called _before_ the most of global arguments have been parsed
4583void os::init(void) {
4584  char dummy;   /* used to get a guess on initial stack address */
4585//  first_hrtime = gethrtime();
4586
4587  // With LinuxThreads the JavaMain thread pid (primordial thread)
4588  // is different than the pid of the java launcher thread.
4589  // So, on Linux, the launcher thread pid is passed to the VM
4590  // via the sun.java.launcher.pid property.
4591  // Use this property instead of getpid() if it was correctly passed.
4592  // See bug 6351349.
4593  pid_t java_launcher_pid = (pid_t) Arguments::sun_java_launcher_pid();
4594
4595  _initial_pid = (java_launcher_pid > 0) ? java_launcher_pid : getpid();
4596
4597  clock_tics_per_sec = sysconf(_SC_CLK_TCK);
4598
4599  init_random(1234567);
4600
4601  ThreadCritical::initialize();
4602
4603  Linux::set_page_size(sysconf(_SC_PAGESIZE));
4604  if (Linux::page_size() == -1) {
4605    fatal(err_msg("os_linux.cpp: os::init: sysconf failed (%s)",
4606                  strerror(errno)));
4607  }
4608  init_page_sizes((size_t) Linux::page_size());
4609
4610  Linux::initialize_system_info();
4611
4612  // main_thread points to the aboriginal thread
4613  Linux::_main_thread = pthread_self();
4614
4615  Linux::clock_init();
4616  initial_time_count = javaTimeNanos();
4617
4618  // pthread_condattr initialization for monotonic clock
4619  int status;
4620  pthread_condattr_t* _condattr = os::Linux::condAttr();
4621  if ((status = pthread_condattr_init(_condattr)) != 0) {
4622    fatal(err_msg("pthread_condattr_init: %s", strerror(status)));
4623  }
4624  // Only set the clock if CLOCK_MONOTONIC is available
4625  if (os::supports_monotonic_clock()) {
4626    if ((status = pthread_condattr_setclock(_condattr, CLOCK_MONOTONIC)) != 0) {
4627      if (status == EINVAL) {
4628        warning("Unable to use monotonic clock with relative timed-waits" \
4629                " - changes to the time-of-day clock may have adverse affects");
4630      } else {
4631        fatal(err_msg("pthread_condattr_setclock: %s", strerror(status)));
4632      }
4633    }
4634  }
4635  // else it defaults to CLOCK_REALTIME
4636
4637  pthread_mutex_init(&dl_mutex, NULL);
4638
4639  // If the pagesize of the VM is greater than 8K determine the appropriate
4640  // number of initial guard pages.  The user can change this with the
4641  // command line arguments, if needed.
4642  if (vm_page_size() > (int)Linux::vm_default_page_size()) {
4643    StackYellowPages = 1;
4644    StackRedPages = 1;
4645    StackShadowPages = round_to((StackShadowPages*Linux::vm_default_page_size()), vm_page_size()) / vm_page_size();
4646  }
4647}
4648
4649// To install functions for atexit system call
4650extern "C" {
4651  static void perfMemory_exit_helper() {
4652    perfMemory_exit();
4653  }
4654}
4655
4656// this is called _after_ the global arguments have been parsed
4657jint os::init_2(void)
4658{
4659  Linux::fast_thread_clock_init();
4660
4661  // Allocate a single page and mark it as readable for safepoint polling
4662  address polling_page = (address) ::mmap(NULL, Linux::page_size(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
4663  guarantee( polling_page != MAP_FAILED, "os::init_2: failed to allocate polling page" );
4664
4665  os::set_polling_page( polling_page );
4666
4667#ifndef PRODUCT
4668  if(Verbose && PrintMiscellaneous)
4669    tty->print("[SafePoint Polling address: " INTPTR_FORMAT "]\n", (intptr_t)polling_page);
4670#endif
4671
4672  if (!UseMembar) {
4673    address mem_serialize_page = (address) ::mmap(NULL, Linux::page_size(), PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
4674    guarantee( mem_serialize_page != MAP_FAILED, "mmap Failed for memory serialize page");
4675    os::set_memory_serialize_page( mem_serialize_page );
4676
4677#ifndef PRODUCT
4678    if(Verbose && PrintMiscellaneous)
4679      tty->print("[Memory Serialize  Page address: " INTPTR_FORMAT "]\n", (intptr_t)mem_serialize_page);
4680#endif
4681  }
4682
4683  // initialize suspend/resume support - must do this before signal_sets_init()
4684  if (SR_initialize() != 0) {
4685    perror("SR_initialize failed");
4686    return JNI_ERR;
4687  }
4688
4689  Linux::signal_sets_init();
4690  Linux::install_signal_handlers();
4691
4692  // Check minimum allowable stack size for thread creation and to initialize
4693  // the java system classes, including StackOverflowError - depends on page
4694  // size.  Add a page for compiler2 recursion in main thread.
4695  // Add in 2*BytesPerWord times page size to account for VM stack during
4696  // class initialization depending on 32 or 64 bit VM.
4697  os::Linux::min_stack_allowed = MAX2(os::Linux::min_stack_allowed,
4698            (size_t)(StackYellowPages+StackRedPages+StackShadowPages) * Linux::page_size() +
4699                    (2*BytesPerWord COMPILER2_PRESENT(+1)) * Linux::vm_default_page_size());
4700
4701  size_t threadStackSizeInBytes = ThreadStackSize * K;
4702  if (threadStackSizeInBytes != 0 &&
4703      threadStackSizeInBytes < os::Linux::min_stack_allowed) {
4704        tty->print_cr("\nThe stack size specified is too small, "
4705                      "Specify at least %dk",
4706                      os::Linux::min_stack_allowed/ K);
4707        return JNI_ERR;
4708  }
4709
4710  // Make the stack size a multiple of the page size so that
4711  // the yellow/red zones can be guarded.
4712  JavaThread::set_stack_size_at_create(round_to(threadStackSizeInBytes,
4713        vm_page_size()));
4714
4715  Linux::capture_initial_stack(JavaThread::stack_size_at_create());
4716
4717#if defined(IA32)
4718  workaround_expand_exec_shield_cs_limit();
4719#endif
4720
4721  Linux::libpthread_init();
4722  if (PrintMiscellaneous && (Verbose || WizardMode)) {
4723     tty->print_cr("[HotSpot is running with %s, %s(%s)]\n",
4724          Linux::glibc_version(), Linux::libpthread_version(),
4725          Linux::is_floating_stack() ? "floating stack" : "fixed stack");
4726  }
4727
4728  if (UseNUMA) {
4729    if (!Linux::libnuma_init()) {
4730      UseNUMA = false;
4731    } else {
4732      if ((Linux::numa_max_node() < 1)) {
4733        // There's only one node(they start from 0), disable NUMA.
4734        UseNUMA = false;
4735      }
4736    }
4737    // With SHM and HugeTLBFS large pages we cannot uncommit a page, so there's no way
4738    // we can make the adaptive lgrp chunk resizing work. If the user specified
4739    // both UseNUMA and UseLargePages (or UseSHM/UseHugeTLBFS) on the command line - warn and
4740    // disable adaptive resizing.
4741    if (UseNUMA && UseLargePages && !can_commit_large_page_memory()) {
4742      if (FLAG_IS_DEFAULT(UseNUMA)) {
4743        UseNUMA = false;
4744      } else {
4745        if (FLAG_IS_DEFAULT(UseLargePages) &&
4746            FLAG_IS_DEFAULT(UseSHM) &&
4747            FLAG_IS_DEFAULT(UseHugeTLBFS)) {
4748          UseLargePages = false;
4749        } else {
4750          warning("UseNUMA is not fully compatible with SHM/HugeTLBFS large pages, disabling adaptive resizing");
4751          UseAdaptiveSizePolicy = false;
4752          UseAdaptiveNUMAChunkSizing = false;
4753        }
4754      }
4755    }
4756    if (!UseNUMA && ForceNUMA) {
4757      UseNUMA = true;
4758    }
4759  }
4760
4761  if (MaxFDLimit) {
4762    // set the number of file descriptors to max. print out error
4763    // if getrlimit/setrlimit fails but continue regardless.
4764    struct rlimit nbr_files;
4765    int status = getrlimit(RLIMIT_NOFILE, &nbr_files);
4766    if (status != 0) {
4767      if (PrintMiscellaneous && (Verbose || WizardMode))
4768        perror("os::init_2 getrlimit failed");
4769    } else {
4770      nbr_files.rlim_cur = nbr_files.rlim_max;
4771      status = setrlimit(RLIMIT_NOFILE, &nbr_files);
4772      if (status != 0) {
4773        if (PrintMiscellaneous && (Verbose || WizardMode))
4774          perror("os::init_2 setrlimit failed");
4775      }
4776    }
4777  }
4778
4779  // Initialize lock used to serialize thread creation (see os::create_thread)
4780  Linux::set_createThread_lock(new Mutex(Mutex::leaf, "createThread_lock", false));
4781
4782  // at-exit methods are called in the reverse order of their registration.
4783  // atexit functions are called on return from main or as a result of a
4784  // call to exit(3C). There can be only 32 of these functions registered
4785  // and atexit() does not set errno.
4786
4787  if (PerfAllowAtExitRegistration) {
4788    // only register atexit functions if PerfAllowAtExitRegistration is set.
4789    // atexit functions can be delayed until process exit time, which
4790    // can be problematic for embedded VM situations. Embedded VMs should
4791    // call DestroyJavaVM() to assure that VM resources are released.
4792
4793    // note: perfMemory_exit_helper atexit function may be removed in
4794    // the future if the appropriate cleanup code can be added to the
4795    // VM_Exit VMOperation's doit method.
4796    if (atexit(perfMemory_exit_helper) != 0) {
4797      warning("os::init_2 atexit(perfMemory_exit_helper) failed");
4798    }
4799  }
4800
4801  // initialize thread priority policy
4802  prio_init();
4803
4804  return JNI_OK;
4805}
4806
4807// this is called at the end of vm_initialization
4808void os::init_3(void) {
4809#ifdef JAVASE_EMBEDDED
4810  // Start the MemNotifyThread
4811  if (LowMemoryProtection) {
4812    MemNotifyThread::start();
4813  }
4814  return;
4815#endif
4816}
4817
4818// Mark the polling page as unreadable
4819void os::make_polling_page_unreadable(void) {
4820  if( !guard_memory((char*)_polling_page, Linux::page_size()) )
4821    fatal("Could not disable polling page");
4822};
4823
4824// Mark the polling page as readable
4825void os::make_polling_page_readable(void) {
4826  if( !linux_mprotect((char *)_polling_page, Linux::page_size(), PROT_READ)) {
4827    fatal("Could not enable polling page");
4828  }
4829};
4830
4831int os::active_processor_count() {
4832  // Linux doesn't yet have a (official) notion of processor sets,
4833  // so just return the number of online processors.
4834  int online_cpus = ::sysconf(_SC_NPROCESSORS_ONLN);
4835  assert(online_cpus > 0 && online_cpus <= processor_count(), "sanity check");
4836  return online_cpus;
4837}
4838
4839void os::set_native_thread_name(const char *name) {
4840  // Not yet implemented.
4841  return;
4842}
4843
4844bool os::distribute_processes(uint length, uint* distribution) {
4845  // Not yet implemented.
4846  return false;
4847}
4848
4849bool os::bind_to_processor(uint processor_id) {
4850  // Not yet implemented.
4851  return false;
4852}
4853
4854///
4855
4856void os::SuspendedThreadTask::internal_do_task() {
4857  if (do_suspend(_thread->osthread())) {
4858    SuspendedThreadTaskContext context(_thread, _thread->osthread()->ucontext());
4859    do_task(context);
4860    do_resume(_thread->osthread());
4861  }
4862}
4863
4864class PcFetcher : public os::SuspendedThreadTask {
4865public:
4866  PcFetcher(Thread* thread) : os::SuspendedThreadTask(thread) {}
4867  ExtendedPC result();
4868protected:
4869  void do_task(const os::SuspendedThreadTaskContext& context);
4870private:
4871  ExtendedPC _epc;
4872};
4873
4874ExtendedPC PcFetcher::result() {
4875  guarantee(is_done(), "task is not done yet.");
4876  return _epc;
4877}
4878
4879void PcFetcher::do_task(const os::SuspendedThreadTaskContext& context) {
4880  Thread* thread = context.thread();
4881  OSThread* osthread = thread->osthread();
4882  if (osthread->ucontext() != NULL) {
4883    _epc = os::Linux::ucontext_get_pc((ucontext_t *) context.ucontext());
4884  } else {
4885    // NULL context is unexpected, double-check this is the VMThread
4886    guarantee(thread->is_VM_thread(), "can only be called for VMThread");
4887  }
4888}
4889
4890// Suspends the target using the signal mechanism and then grabs the PC before
4891// resuming the target. Used by the flat-profiler only
4892ExtendedPC os::get_thread_pc(Thread* thread) {
4893  // Make sure that it is called by the watcher for the VMThread
4894  assert(Thread::current()->is_Watcher_thread(), "Must be watcher");
4895  assert(thread->is_VM_thread(), "Can only be called for VMThread");
4896
4897  PcFetcher fetcher(thread);
4898  fetcher.run();
4899  return fetcher.result();
4900}
4901
4902int os::Linux::safe_cond_timedwait(pthread_cond_t *_cond, pthread_mutex_t *_mutex, const struct timespec *_abstime)
4903{
4904   if (is_NPTL()) {
4905      return pthread_cond_timedwait(_cond, _mutex, _abstime);
4906   } else {
4907      // 6292965: LinuxThreads pthread_cond_timedwait() resets FPU control
4908      // word back to default 64bit precision if condvar is signaled. Java
4909      // wants 53bit precision.  Save and restore current value.
4910      int fpu = get_fpu_control_word();
4911      int status = pthread_cond_timedwait(_cond, _mutex, _abstime);
4912      set_fpu_control_word(fpu);
4913      return status;
4914   }
4915}
4916
4917////////////////////////////////////////////////////////////////////////////////
4918// debug support
4919
4920bool os::find(address addr, outputStream* st) {
4921  Dl_info dlinfo;
4922  memset(&dlinfo, 0, sizeof(dlinfo));
4923  if (dladdr(addr, &dlinfo) != 0) {
4924    st->print(PTR_FORMAT ": ", addr);
4925    if (dlinfo.dli_sname != NULL && dlinfo.dli_saddr != NULL) {
4926      st->print("%s+%#x", dlinfo.dli_sname,
4927                 addr - (intptr_t)dlinfo.dli_saddr);
4928    } else if (dlinfo.dli_fbase != NULL) {
4929      st->print("<offset %#x>", addr - (intptr_t)dlinfo.dli_fbase);
4930    } else {
4931      st->print("<absolute address>");
4932    }
4933    if (dlinfo.dli_fname != NULL) {
4934      st->print(" in %s", dlinfo.dli_fname);
4935    }
4936    if (dlinfo.dli_fbase != NULL) {
4937      st->print(" at " PTR_FORMAT, dlinfo.dli_fbase);
4938    }
4939    st->cr();
4940
4941    if (Verbose) {
4942      // decode some bytes around the PC
4943      address begin = clamp_address_in_page(addr-40, addr, os::vm_page_size());
4944      address end   = clamp_address_in_page(addr+40, addr, os::vm_page_size());
4945      address       lowest = (address) dlinfo.dli_sname;
4946      if (!lowest)  lowest = (address) dlinfo.dli_fbase;
4947      if (begin < lowest)  begin = lowest;
4948      Dl_info dlinfo2;
4949      if (dladdr(end, &dlinfo2) != 0 && dlinfo2.dli_saddr != dlinfo.dli_saddr
4950          && end > dlinfo2.dli_saddr && dlinfo2.dli_saddr > begin)
4951        end = (address) dlinfo2.dli_saddr;
4952      Disassembler::decode(begin, end, st);
4953    }
4954    return true;
4955  }
4956  return false;
4957}
4958
4959////////////////////////////////////////////////////////////////////////////////
4960// misc
4961
4962// This does not do anything on Linux. This is basically a hook for being
4963// able to use structured exception handling (thread-local exception filters)
4964// on, e.g., Win32.
4965void
4966os::os_exception_wrapper(java_call_t f, JavaValue* value, methodHandle* method,
4967                         JavaCallArguments* args, Thread* thread) {
4968  f(value, method, args, thread);
4969}
4970
4971void os::print_statistics() {
4972}
4973
4974int os::message_box(const char* title, const char* message) {
4975  int i;
4976  fdStream err(defaultStream::error_fd());
4977  for (i = 0; i < 78; i++) err.print_raw("=");
4978  err.cr();
4979  err.print_raw_cr(title);
4980  for (i = 0; i < 78; i++) err.print_raw("-");
4981  err.cr();
4982  err.print_raw_cr(message);
4983  for (i = 0; i < 78; i++) err.print_raw("=");
4984  err.cr();
4985
4986  char buf[16];
4987  // Prevent process from exiting upon "read error" without consuming all CPU
4988  while (::read(0, buf, sizeof(buf)) <= 0) { ::sleep(100); }
4989
4990  return buf[0] == 'y' || buf[0] == 'Y';
4991}
4992
4993int os::stat(const char *path, struct stat *sbuf) {
4994  char pathbuf[MAX_PATH];
4995  if (strlen(path) > MAX_PATH - 1) {
4996    errno = ENAMETOOLONG;
4997    return -1;
4998  }
4999  os::native_path(strcpy(pathbuf, path));
5000  return ::stat(pathbuf, sbuf);
5001}
5002
5003bool os::check_heap(bool force) {
5004  return true;
5005}
5006
5007int local_vsnprintf(char* buf, size_t count, const char* format, va_list args) {
5008  return ::vsnprintf(buf, count, format, args);
5009}
5010
5011// Is a (classpath) directory empty?
5012bool os::dir_is_empty(const char* path) {
5013  DIR *dir = NULL;
5014  struct dirent *ptr;
5015
5016  dir = opendir(path);
5017  if (dir == NULL) return true;
5018
5019  /* Scan the directory */
5020  bool result = true;
5021  char buf[sizeof(struct dirent) + MAX_PATH];
5022  while (result && (ptr = ::readdir(dir)) != NULL) {
5023    if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0) {
5024      result = false;
5025    }
5026  }
5027  closedir(dir);
5028  return result;
5029}
5030
5031// This code originates from JDK's sysOpen and open64_w
5032// from src/solaris/hpi/src/system_md.c
5033
5034#ifndef O_DELETE
5035#define O_DELETE 0x10000
5036#endif
5037
5038// Open a file. Unlink the file immediately after open returns
5039// if the specified oflag has the O_DELETE flag set.
5040// O_DELETE is used only in j2se/src/share/native/java/util/zip/ZipFile.c
5041
5042int os::open(const char *path, int oflag, int mode) {
5043
5044  if (strlen(path) > MAX_PATH - 1) {
5045    errno = ENAMETOOLONG;
5046    return -1;
5047  }
5048  int fd;
5049  int o_delete = (oflag & O_DELETE);
5050  oflag = oflag & ~O_DELETE;
5051
5052  fd = ::open64(path, oflag, mode);
5053  if (fd == -1) return -1;
5054
5055  //If the open succeeded, the file might still be a directory
5056  {
5057    struct stat64 buf64;
5058    int ret = ::fstat64(fd, &buf64);
5059    int st_mode = buf64.st_mode;
5060
5061    if (ret != -1) {
5062      if ((st_mode & S_IFMT) == S_IFDIR) {
5063        errno = EISDIR;
5064        ::close(fd);
5065        return -1;
5066      }
5067    } else {
5068      ::close(fd);
5069      return -1;
5070    }
5071  }
5072
5073    /*
5074     * All file descriptors that are opened in the JVM and not
5075     * specifically destined for a subprocess should have the
5076     * close-on-exec flag set.  If we don't set it, then careless 3rd
5077     * party native code might fork and exec without closing all
5078     * appropriate file descriptors (e.g. as we do in closeDescriptors in
5079     * UNIXProcess.c), and this in turn might:
5080     *
5081     * - cause end-of-file to fail to be detected on some file
5082     *   descriptors, resulting in mysterious hangs, or
5083     *
5084     * - might cause an fopen in the subprocess to fail on a system
5085     *   suffering from bug 1085341.
5086     *
5087     * (Yes, the default setting of the close-on-exec flag is a Unix
5088     * design flaw)
5089     *
5090     * See:
5091     * 1085341: 32-bit stdio routines should support file descriptors >255
5092     * 4843136: (process) pipe file descriptor from Runtime.exec not being closed
5093     * 6339493: (process) Runtime.exec does not close all file descriptors on Solaris 9
5094     */
5095#ifdef FD_CLOEXEC
5096    {
5097        int flags = ::fcntl(fd, F_GETFD);
5098        if (flags != -1)
5099            ::fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
5100    }
5101#endif
5102
5103  if (o_delete != 0) {
5104    ::unlink(path);
5105  }
5106  return fd;
5107}
5108
5109
5110// create binary file, rewriting existing file if required
5111int os::create_binary_file(const char* path, bool rewrite_existing) {
5112  int oflags = O_WRONLY | O_CREAT;
5113  if (!rewrite_existing) {
5114    oflags |= O_EXCL;
5115  }
5116  return ::open64(path, oflags, S_IREAD | S_IWRITE);
5117}
5118
5119// return current position of file pointer
5120jlong os::current_file_offset(int fd) {
5121  return (jlong)::lseek64(fd, (off64_t)0, SEEK_CUR);
5122}
5123
5124// move file pointer to the specified offset
5125jlong os::seek_to_file_offset(int fd, jlong offset) {
5126  return (jlong)::lseek64(fd, (off64_t)offset, SEEK_SET);
5127}
5128
5129// This code originates from JDK's sysAvailable
5130// from src/solaris/hpi/src/native_threads/src/sys_api_td.c
5131
5132int os::available(int fd, jlong *bytes) {
5133  jlong cur, end;
5134  int mode;
5135  struct stat64 buf64;
5136
5137  if (::fstat64(fd, &buf64) >= 0) {
5138    mode = buf64.st_mode;
5139    if (S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) {
5140      /*
5141      * XXX: is the following call interruptible? If so, this might
5142      * need to go through the INTERRUPT_IO() wrapper as for other
5143      * blocking, interruptible calls in this file.
5144      */
5145      int n;
5146      if (::ioctl(fd, FIONREAD, &n) >= 0) {
5147        *bytes = n;
5148        return 1;
5149      }
5150    }
5151  }
5152  if ((cur = ::lseek64(fd, 0L, SEEK_CUR)) == -1) {
5153    return 0;
5154  } else if ((end = ::lseek64(fd, 0L, SEEK_END)) == -1) {
5155    return 0;
5156  } else if (::lseek64(fd, cur, SEEK_SET) == -1) {
5157    return 0;
5158  }
5159  *bytes = end - cur;
5160  return 1;
5161}
5162
5163int os::socket_available(int fd, jint *pbytes) {
5164  // Linux doc says EINTR not returned, unlike Solaris
5165  int ret = ::ioctl(fd, FIONREAD, pbytes);
5166
5167  //%% note ioctl can return 0 when successful, JVM_SocketAvailable
5168  // is expected to return 0 on failure and 1 on success to the jdk.
5169  return (ret < 0) ? 0 : 1;
5170}
5171
5172// Map a block of memory.
5173char* os::pd_map_memory(int fd, const char* file_name, size_t file_offset,
5174                     char *addr, size_t bytes, bool read_only,
5175                     bool allow_exec) {
5176  int prot;
5177  int flags = MAP_PRIVATE;
5178
5179  if (read_only) {
5180    prot = PROT_READ;
5181  } else {
5182    prot = PROT_READ | PROT_WRITE;
5183  }
5184
5185  if (allow_exec) {
5186    prot |= PROT_EXEC;
5187  }
5188
5189  if (addr != NULL) {
5190    flags |= MAP_FIXED;
5191  }
5192
5193  char* mapped_address = (char*)mmap(addr, (size_t)bytes, prot, flags,
5194                                     fd, file_offset);
5195  if (mapped_address == MAP_FAILED) {
5196    return NULL;
5197  }
5198  return mapped_address;
5199}
5200
5201
5202// Remap a block of memory.
5203char* os::pd_remap_memory(int fd, const char* file_name, size_t file_offset,
5204                       char *addr, size_t bytes, bool read_only,
5205                       bool allow_exec) {
5206  // same as map_memory() on this OS
5207  return os::map_memory(fd, file_name, file_offset, addr, bytes, read_only,
5208                        allow_exec);
5209}
5210
5211
5212// Unmap a block of memory.
5213bool os::pd_unmap_memory(char* addr, size_t bytes) {
5214  return munmap(addr, bytes) == 0;
5215}
5216
5217static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time);
5218
5219static clockid_t thread_cpu_clockid(Thread* thread) {
5220  pthread_t tid = thread->osthread()->pthread_id();
5221  clockid_t clockid;
5222
5223  // Get thread clockid
5224  int rc = os::Linux::pthread_getcpuclockid(tid, &clockid);
5225  assert(rc == 0, "pthread_getcpuclockid is expected to return 0 code");
5226  return clockid;
5227}
5228
5229// current_thread_cpu_time(bool) and thread_cpu_time(Thread*, bool)
5230// are used by JVM M&M and JVMTI to get user+sys or user CPU time
5231// of a thread.
5232//
5233// current_thread_cpu_time() and thread_cpu_time(Thread*) returns
5234// the fast estimate available on the platform.
5235
5236jlong os::current_thread_cpu_time() {
5237  if (os::Linux::supports_fast_thread_cpu_time()) {
5238    return os::Linux::fast_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
5239  } else {
5240    // return user + sys since the cost is the same
5241    return slow_thread_cpu_time(Thread::current(), true /* user + sys */);
5242  }
5243}
5244
5245jlong os::thread_cpu_time(Thread* thread) {
5246  // consistent with what current_thread_cpu_time() returns
5247  if (os::Linux::supports_fast_thread_cpu_time()) {
5248    return os::Linux::fast_thread_cpu_time(thread_cpu_clockid(thread));
5249  } else {
5250    return slow_thread_cpu_time(thread, true /* user + sys */);
5251  }
5252}
5253
5254jlong os::current_thread_cpu_time(bool user_sys_cpu_time) {
5255  if (user_sys_cpu_time && os::Linux::supports_fast_thread_cpu_time()) {
5256    return os::Linux::fast_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
5257  } else {
5258    return slow_thread_cpu_time(Thread::current(), user_sys_cpu_time);
5259  }
5260}
5261
5262jlong os::thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
5263  if (user_sys_cpu_time && os::Linux::supports_fast_thread_cpu_time()) {
5264    return os::Linux::fast_thread_cpu_time(thread_cpu_clockid(thread));
5265  } else {
5266    return slow_thread_cpu_time(thread, user_sys_cpu_time);
5267  }
5268}
5269
5270//
5271//  -1 on error.
5272//
5273
5274static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
5275  static bool proc_task_unchecked = true;
5276  pid_t  tid = thread->osthread()->thread_id();
5277  char *s;
5278  char stat[2048];
5279  int statlen;
5280  char proc_name[64];
5281  int count;
5282  long sys_time, user_time;
5283  char cdummy;
5284  int idummy;
5285  long ldummy;
5286  FILE *fp;
5287
5288  snprintf(proc_name, 64, "/proc/%d/stat", tid);
5289
5290  // The /proc/<tid>/stat aggregates per-process usage on
5291  // new Linux kernels 2.6+ where NPTL is supported.
5292  // The /proc/self/task/<tid>/stat still has the per-thread usage.
5293  // See bug 6328462.
5294  // There possibly can be cases where there is no directory
5295  // /proc/self/task, so we check its availability.
5296  if (proc_task_unchecked && os::Linux::is_NPTL()) {
5297    // This is executed only once
5298    proc_task_unchecked = false;
5299    fp = fopen("/proc/self/task", "r");
5300    if (fp != NULL) {
5301      snprintf(proc_name, 64, "/proc/self/task/%d/stat", tid);
5302      fclose(fp);
5303    }
5304  }
5305
5306  fp = fopen(proc_name, "r");
5307  if ( fp == NULL ) return -1;
5308  statlen = fread(stat, 1, 2047, fp);
5309  stat[statlen] = '\0';
5310  fclose(fp);
5311
5312  // Skip pid and the command string. Note that we could be dealing with
5313  // weird command names, e.g. user could decide to rename java launcher
5314  // to "java 1.4.2 :)", then the stat file would look like
5315  //                1234 (java 1.4.2 :)) R ... ...
5316  // We don't really need to know the command string, just find the last
5317  // occurrence of ")" and then start parsing from there. See bug 4726580.
5318  s = strrchr(stat, ')');
5319  if (s == NULL ) return -1;
5320
5321  // Skip blank chars
5322  do s++; while (isspace(*s));
5323
5324  count = sscanf(s,"%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
5325                 &cdummy, &idummy, &idummy, &idummy, &idummy, &idummy,
5326                 &ldummy, &ldummy, &ldummy, &ldummy, &ldummy,
5327                 &user_time, &sys_time);
5328  if ( count != 13 ) return -1;
5329  if (user_sys_cpu_time) {
5330    return ((jlong)sys_time + (jlong)user_time) * (1000000000 / clock_tics_per_sec);
5331  } else {
5332    return (jlong)user_time * (1000000000 / clock_tics_per_sec);
5333  }
5334}
5335
5336void os::current_thread_cpu_time_info(jvmtiTimerInfo *info_ptr) {
5337  info_ptr->max_value = ALL_64_BITS;       // will not wrap in less than 64 bits
5338  info_ptr->may_skip_backward = false;     // elapsed time not wall time
5339  info_ptr->may_skip_forward = false;      // elapsed time not wall time
5340  info_ptr->kind = JVMTI_TIMER_TOTAL_CPU;  // user+system time is returned
5341}
5342
5343void os::thread_cpu_time_info(jvmtiTimerInfo *info_ptr) {
5344  info_ptr->max_value = ALL_64_BITS;       // will not wrap in less than 64 bits
5345  info_ptr->may_skip_backward = false;     // elapsed time not wall time
5346  info_ptr->may_skip_forward = false;      // elapsed time not wall time
5347  info_ptr->kind = JVMTI_TIMER_TOTAL_CPU;  // user+system time is returned
5348}
5349
5350bool os::is_thread_cpu_time_supported() {
5351  return true;
5352}
5353
5354// System loadavg support.  Returns -1 if load average cannot be obtained.
5355// Linux doesn't yet have a (official) notion of processor sets,
5356// so just return the system wide load average.
5357int os::loadavg(double loadavg[], int nelem) {
5358  return ::getloadavg(loadavg, nelem);
5359}
5360
5361void os::pause() {
5362  char filename[MAX_PATH];
5363  if (PauseAtStartupFile && PauseAtStartupFile[0]) {
5364    jio_snprintf(filename, MAX_PATH, PauseAtStartupFile);
5365  } else {
5366    jio_snprintf(filename, MAX_PATH, "./vm.paused.%d", current_process_id());
5367  }
5368
5369  int fd = ::open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666);
5370  if (fd != -1) {
5371    struct stat buf;
5372    ::close(fd);
5373    while (::stat(filename, &buf) == 0) {
5374      (void)::poll(NULL, 0, 100);
5375    }
5376  } else {
5377    jio_fprintf(stderr,
5378      "Could not open pause file '%s', continuing immediately.\n", filename);
5379  }
5380}
5381
5382
5383// Refer to the comments in os_solaris.cpp park-unpark.
5384//
5385// Beware -- Some versions of NPTL embody a flaw where pthread_cond_timedwait() can
5386// hang indefinitely.  For instance NPTL 0.60 on 2.4.21-4ELsmp is vulnerable.
5387// For specifics regarding the bug see GLIBC BUGID 261237 :
5388//    http://www.mail-archive.com/debian-glibc@lists.debian.org/msg10837.html.
5389// Briefly, pthread_cond_timedwait() calls with an expiry time that's not in the future
5390// will either hang or corrupt the condvar, resulting in subsequent hangs if the condvar
5391// is used.  (The simple C test-case provided in the GLIBC bug report manifests the
5392// hang).  The JVM is vulernable via sleep(), Object.wait(timo), LockSupport.parkNanos()
5393// and monitorenter when we're using 1-0 locking.  All those operations may result in
5394// calls to pthread_cond_timedwait().  Using LD_ASSUME_KERNEL to use an older version
5395// of libpthread avoids the problem, but isn't practical.
5396//
5397// Possible remedies:
5398//
5399// 1.   Establish a minimum relative wait time.  50 to 100 msecs seems to work.
5400//      This is palliative and probabilistic, however.  If the thread is preempted
5401//      between the call to compute_abstime() and pthread_cond_timedwait(), more
5402//      than the minimum period may have passed, and the abstime may be stale (in the
5403//      past) resultin in a hang.   Using this technique reduces the odds of a hang
5404//      but the JVM is still vulnerable, particularly on heavily loaded systems.
5405//
5406// 2.   Modify park-unpark to use per-thread (per ParkEvent) pipe-pairs instead
5407//      of the usual flag-condvar-mutex idiom.  The write side of the pipe is set
5408//      NDELAY. unpark() reduces to write(), park() reduces to read() and park(timo)
5409//      reduces to poll()+read().  This works well, but consumes 2 FDs per extant
5410//      thread.
5411//
5412// 3.   Embargo pthread_cond_timedwait() and implement a native "chron" thread
5413//      that manages timeouts.  We'd emulate pthread_cond_timedwait() by enqueuing
5414//      a timeout request to the chron thread and then blocking via pthread_cond_wait().
5415//      This also works well.  In fact it avoids kernel-level scalability impediments
5416//      on certain platforms that don't handle lots of active pthread_cond_timedwait()
5417//      timers in a graceful fashion.
5418//
5419// 4.   When the abstime value is in the past it appears that control returns
5420//      correctly from pthread_cond_timedwait(), but the condvar is left corrupt.
5421//      Subsequent timedwait/wait calls may hang indefinitely.  Given that, we
5422//      can avoid the problem by reinitializing the condvar -- by cond_destroy()
5423//      followed by cond_init() -- after all calls to pthread_cond_timedwait().
5424//      It may be possible to avoid reinitialization by checking the return
5425//      value from pthread_cond_timedwait().  In addition to reinitializing the
5426//      condvar we must establish the invariant that cond_signal() is only called
5427//      within critical sections protected by the adjunct mutex.  This prevents
5428//      cond_signal() from "seeing" a condvar that's in the midst of being
5429//      reinitialized or that is corrupt.  Sadly, this invariant obviates the
5430//      desirable signal-after-unlock optimization that avoids futile context switching.
5431//
5432//      I'm also concerned that some versions of NTPL might allocate an auxilliary
5433//      structure when a condvar is used or initialized.  cond_destroy()  would
5434//      release the helper structure.  Our reinitialize-after-timedwait fix
5435//      put excessive stress on malloc/free and locks protecting the c-heap.
5436//
5437// We currently use (4).  See the WorkAroundNTPLTimedWaitHang flag.
5438// It may be possible to refine (4) by checking the kernel and NTPL verisons
5439// and only enabling the work-around for vulnerable environments.
5440
5441// utility to compute the abstime argument to timedwait:
5442// millis is the relative timeout time
5443// abstime will be the absolute timeout time
5444// TODO: replace compute_abstime() with unpackTime()
5445
5446static struct timespec* compute_abstime(timespec* abstime, jlong millis) {
5447  if (millis < 0)  millis = 0;
5448
5449  jlong seconds = millis / 1000;
5450  millis %= 1000;
5451  if (seconds > 50000000) { // see man cond_timedwait(3T)
5452    seconds = 50000000;
5453  }
5454
5455  if (os::supports_monotonic_clock()) {
5456    struct timespec now;
5457    int status = os::Linux::clock_gettime(CLOCK_MONOTONIC, &now);
5458    assert_status(status == 0, status, "clock_gettime");
5459    abstime->tv_sec = now.tv_sec  + seconds;
5460    long nanos = now.tv_nsec + millis * NANOSECS_PER_MILLISEC;
5461    if (nanos >= NANOSECS_PER_SEC) {
5462      abstime->tv_sec += 1;
5463      nanos -= NANOSECS_PER_SEC;
5464    }
5465    abstime->tv_nsec = nanos;
5466  } else {
5467    struct timeval now;
5468    int status = gettimeofday(&now, NULL);
5469    assert(status == 0, "gettimeofday");
5470    abstime->tv_sec = now.tv_sec  + seconds;
5471    long usec = now.tv_usec + millis * 1000;
5472    if (usec >= 1000000) {
5473      abstime->tv_sec += 1;
5474      usec -= 1000000;
5475    }
5476    abstime->tv_nsec = usec * 1000;
5477  }
5478  return abstime;
5479}
5480
5481
5482// Test-and-clear _Event, always leaves _Event set to 0, returns immediately.
5483// Conceptually TryPark() should be equivalent to park(0).
5484
5485int os::PlatformEvent::TryPark() {
5486  for (;;) {
5487    const int v = _Event ;
5488    guarantee ((v == 0) || (v == 1), "invariant") ;
5489    if (Atomic::cmpxchg (0, &_Event, v) == v) return v  ;
5490  }
5491}
5492
5493void os::PlatformEvent::park() {       // AKA "down()"
5494  // Invariant: Only the thread associated with the Event/PlatformEvent
5495  // may call park().
5496  // TODO: assert that _Assoc != NULL or _Assoc == Self
5497  int v ;
5498  for (;;) {
5499      v = _Event ;
5500      if (Atomic::cmpxchg (v-1, &_Event, v) == v) break ;
5501  }
5502  guarantee (v >= 0, "invariant") ;
5503  if (v == 0) {
5504     // Do this the hard way by blocking ...
5505     int status = pthread_mutex_lock(_mutex);
5506     assert_status(status == 0, status, "mutex_lock");
5507     guarantee (_nParked == 0, "invariant") ;
5508     ++ _nParked ;
5509     while (_Event < 0) {
5510        status = pthread_cond_wait(_cond, _mutex);
5511        // for some reason, under 2.7 lwp_cond_wait() may return ETIME ...
5512        // Treat this the same as if the wait was interrupted
5513        if (status == ETIME) { status = EINTR; }
5514        assert_status(status == 0 || status == EINTR, status, "cond_wait");
5515     }
5516     -- _nParked ;
5517
5518    _Event = 0 ;
5519     status = pthread_mutex_unlock(_mutex);
5520     assert_status(status == 0, status, "mutex_unlock");
5521    // Paranoia to ensure our locked and lock-free paths interact
5522    // correctly with each other.
5523    OrderAccess::fence();
5524  }
5525  guarantee (_Event >= 0, "invariant") ;
5526}
5527
5528int os::PlatformEvent::park(jlong millis) {
5529  guarantee (_nParked == 0, "invariant") ;
5530
5531  int v ;
5532  for (;;) {
5533      v = _Event ;
5534      if (Atomic::cmpxchg (v-1, &_Event, v) == v) break ;
5535  }
5536  guarantee (v >= 0, "invariant") ;
5537  if (v != 0) return OS_OK ;
5538
5539  // We do this the hard way, by blocking the thread.
5540  // Consider enforcing a minimum timeout value.
5541  struct timespec abst;
5542  compute_abstime(&abst, millis);
5543
5544  int ret = OS_TIMEOUT;
5545  int status = pthread_mutex_lock(_mutex);
5546  assert_status(status == 0, status, "mutex_lock");
5547  guarantee (_nParked == 0, "invariant") ;
5548  ++_nParked ;
5549
5550  // Object.wait(timo) will return because of
5551  // (a) notification
5552  // (b) timeout
5553  // (c) thread.interrupt
5554  //
5555  // Thread.interrupt and object.notify{All} both call Event::set.
5556  // That is, we treat thread.interrupt as a special case of notification.
5557  // The underlying Solaris implementation, cond_timedwait, admits
5558  // spurious/premature wakeups, but the JLS/JVM spec prevents the
5559  // JVM from making those visible to Java code.  As such, we must
5560  // filter out spurious wakeups.  We assume all ETIME returns are valid.
5561  //
5562  // TODO: properly differentiate simultaneous notify+interrupt.
5563  // In that case, we should propagate the notify to another waiter.
5564
5565  while (_Event < 0) {
5566    status = os::Linux::safe_cond_timedwait(_cond, _mutex, &abst);
5567    if (status != 0 && WorkAroundNPTLTimedWaitHang) {
5568      pthread_cond_destroy (_cond);
5569      pthread_cond_init (_cond, os::Linux::condAttr()) ;
5570    }
5571    assert_status(status == 0 || status == EINTR ||
5572                  status == ETIME || status == ETIMEDOUT,
5573                  status, "cond_timedwait");
5574    if (!FilterSpuriousWakeups) break ;                 // previous semantics
5575    if (status == ETIME || status == ETIMEDOUT) break ;
5576    // We consume and ignore EINTR and spurious wakeups.
5577  }
5578  --_nParked ;
5579  if (_Event >= 0) {
5580     ret = OS_OK;
5581  }
5582  _Event = 0 ;
5583  status = pthread_mutex_unlock(_mutex);
5584  assert_status(status == 0, status, "mutex_unlock");
5585  assert (_nParked == 0, "invariant") ;
5586  // Paranoia to ensure our locked and lock-free paths interact
5587  // correctly with each other.
5588  OrderAccess::fence();
5589  return ret;
5590}
5591
5592void os::PlatformEvent::unpark() {
5593  // Transitions for _Event:
5594  //    0 :=> 1
5595  //    1 :=> 1
5596  //   -1 :=> either 0 or 1; must signal target thread
5597  //          That is, we can safely transition _Event from -1 to either
5598  //          0 or 1. Forcing 1 is slightly more efficient for back-to-back
5599  //          unpark() calls.
5600  // See also: "Semaphores in Plan 9" by Mullender & Cox
5601  //
5602  // Note: Forcing a transition from "-1" to "1" on an unpark() means
5603  // that it will take two back-to-back park() calls for the owning
5604  // thread to block. This has the benefit of forcing a spurious return
5605  // from the first park() call after an unpark() call which will help
5606  // shake out uses of park() and unpark() without condition variables.
5607
5608  if (Atomic::xchg(1, &_Event) >= 0) return;
5609
5610  // Wait for the thread associated with the event to vacate
5611  int status = pthread_mutex_lock(_mutex);
5612  assert_status(status == 0, status, "mutex_lock");
5613  int AnyWaiters = _nParked;
5614  assert(AnyWaiters == 0 || AnyWaiters == 1, "invariant");
5615  if (AnyWaiters != 0 && WorkAroundNPTLTimedWaitHang) {
5616    AnyWaiters = 0;
5617    pthread_cond_signal(_cond);
5618  }
5619  status = pthread_mutex_unlock(_mutex);
5620  assert_status(status == 0, status, "mutex_unlock");
5621  if (AnyWaiters != 0) {
5622    status = pthread_cond_signal(_cond);
5623    assert_status(status == 0, status, "cond_signal");
5624  }
5625
5626  // Note that we signal() _after dropping the lock for "immortal" Events.
5627  // This is safe and avoids a common class of  futile wakeups.  In rare
5628  // circumstances this can cause a thread to return prematurely from
5629  // cond_{timed}wait() but the spurious wakeup is benign and the victim will
5630  // simply re-test the condition and re-park itself.
5631}
5632
5633
5634// JSR166
5635// -------------------------------------------------------
5636
5637/*
5638 * The solaris and linux implementations of park/unpark are fairly
5639 * conservative for now, but can be improved. They currently use a
5640 * mutex/condvar pair, plus a a count.
5641 * Park decrements count if > 0, else does a condvar wait.  Unpark
5642 * sets count to 1 and signals condvar.  Only one thread ever waits
5643 * on the condvar. Contention seen when trying to park implies that someone
5644 * is unparking you, so don't wait. And spurious returns are fine, so there
5645 * is no need to track notifications.
5646 */
5647
5648/*
5649 * This code is common to linux and solaris and will be moved to a
5650 * common place in dolphin.
5651 *
5652 * The passed in time value is either a relative time in nanoseconds
5653 * or an absolute time in milliseconds. Either way it has to be unpacked
5654 * into suitable seconds and nanoseconds components and stored in the
5655 * given timespec structure.
5656 * Given time is a 64-bit value and the time_t used in the timespec is only
5657 * a signed-32-bit value (except on 64-bit Linux) we have to watch for
5658 * overflow if times way in the future are given. Further on Solaris versions
5659 * prior to 10 there is a restriction (see cond_timedwait) that the specified
5660 * number of seconds, in abstime, is less than current_time  + 100,000,000.
5661 * As it will be 28 years before "now + 100000000" will overflow we can
5662 * ignore overflow and just impose a hard-limit on seconds using the value
5663 * of "now + 100,000,000". This places a limit on the timeout of about 3.17
5664 * years from "now".
5665 */
5666
5667static void unpackTime(timespec* absTime, bool isAbsolute, jlong time) {
5668  assert (time > 0, "convertTime");
5669  time_t max_secs = 0;
5670
5671  if (!os::supports_monotonic_clock() || isAbsolute) {
5672    struct timeval now;
5673    int status = gettimeofday(&now, NULL);
5674    assert(status == 0, "gettimeofday");
5675
5676    max_secs = now.tv_sec + MAX_SECS;
5677
5678    if (isAbsolute) {
5679      jlong secs = time / 1000;
5680      if (secs > max_secs) {
5681        absTime->tv_sec = max_secs;
5682      } else {
5683        absTime->tv_sec = secs;
5684      }
5685      absTime->tv_nsec = (time % 1000) * NANOSECS_PER_MILLISEC;
5686    } else {
5687      jlong secs = time / NANOSECS_PER_SEC;
5688      if (secs >= MAX_SECS) {
5689        absTime->tv_sec = max_secs;
5690        absTime->tv_nsec = 0;
5691      } else {
5692        absTime->tv_sec = now.tv_sec + secs;
5693        absTime->tv_nsec = (time % NANOSECS_PER_SEC) + now.tv_usec*1000;
5694        if (absTime->tv_nsec >= NANOSECS_PER_SEC) {
5695          absTime->tv_nsec -= NANOSECS_PER_SEC;
5696          ++absTime->tv_sec; // note: this must be <= max_secs
5697        }
5698      }
5699    }
5700  } else {
5701    // must be relative using monotonic clock
5702    struct timespec now;
5703    int status = os::Linux::clock_gettime(CLOCK_MONOTONIC, &now);
5704    assert_status(status == 0, status, "clock_gettime");
5705    max_secs = now.tv_sec + MAX_SECS;
5706    jlong secs = time / NANOSECS_PER_SEC;
5707    if (secs >= MAX_SECS) {
5708      absTime->tv_sec = max_secs;
5709      absTime->tv_nsec = 0;
5710    } else {
5711      absTime->tv_sec = now.tv_sec + secs;
5712      absTime->tv_nsec = (time % NANOSECS_PER_SEC) + now.tv_nsec;
5713      if (absTime->tv_nsec >= NANOSECS_PER_SEC) {
5714        absTime->tv_nsec -= NANOSECS_PER_SEC;
5715        ++absTime->tv_sec; // note: this must be <= max_secs
5716      }
5717    }
5718  }
5719  assert(absTime->tv_sec >= 0, "tv_sec < 0");
5720  assert(absTime->tv_sec <= max_secs, "tv_sec > max_secs");
5721  assert(absTime->tv_nsec >= 0, "tv_nsec < 0");
5722  assert(absTime->tv_nsec < NANOSECS_PER_SEC, "tv_nsec >= nanos_per_sec");
5723}
5724
5725void Parker::park(bool isAbsolute, jlong time) {
5726  // Ideally we'd do something useful while spinning, such
5727  // as calling unpackTime().
5728
5729  // Optional fast-path check:
5730  // Return immediately if a permit is available.
5731  // We depend on Atomic::xchg() having full barrier semantics
5732  // since we are doing a lock-free update to _counter.
5733  if (Atomic::xchg(0, &_counter) > 0) return;
5734
5735  Thread* thread = Thread::current();
5736  assert(thread->is_Java_thread(), "Must be JavaThread");
5737  JavaThread *jt = (JavaThread *)thread;
5738
5739  // Optional optimization -- avoid state transitions if there's an interrupt pending.
5740  // Check interrupt before trying to wait
5741  if (Thread::is_interrupted(thread, false)) {
5742    return;
5743  }
5744
5745  // Next, demultiplex/decode time arguments
5746  timespec absTime;
5747  if (time < 0 || (isAbsolute && time == 0) ) { // don't wait at all
5748    return;
5749  }
5750  if (time > 0) {
5751    unpackTime(&absTime, isAbsolute, time);
5752  }
5753
5754
5755  // Enter safepoint region
5756  // Beware of deadlocks such as 6317397.
5757  // The per-thread Parker:: mutex is a classic leaf-lock.
5758  // In particular a thread must never block on the Threads_lock while
5759  // holding the Parker:: mutex.  If safepoints are pending both the
5760  // the ThreadBlockInVM() CTOR and DTOR may grab Threads_lock.
5761  ThreadBlockInVM tbivm(jt);
5762
5763  // Don't wait if cannot get lock since interference arises from
5764  // unblocking.  Also. check interrupt before trying wait
5765  if (Thread::is_interrupted(thread, false) || pthread_mutex_trylock(_mutex) != 0) {
5766    return;
5767  }
5768
5769  int status ;
5770  if (_counter > 0)  { // no wait needed
5771    _counter = 0;
5772    status = pthread_mutex_unlock(_mutex);
5773    assert (status == 0, "invariant") ;
5774    // Paranoia to ensure our locked and lock-free paths interact
5775    // correctly with each other and Java-level accesses.
5776    OrderAccess::fence();
5777    return;
5778  }
5779
5780#ifdef ASSERT
5781  // Don't catch signals while blocked; let the running threads have the signals.
5782  // (This allows a debugger to break into the running thread.)
5783  sigset_t oldsigs;
5784  sigset_t* allowdebug_blocked = os::Linux::allowdebug_blocked_signals();
5785  pthread_sigmask(SIG_BLOCK, allowdebug_blocked, &oldsigs);
5786#endif
5787
5788  OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */);
5789  jt->set_suspend_equivalent();
5790  // cleared by handle_special_suspend_equivalent_condition() or java_suspend_self()
5791
5792  assert(_cur_index == -1, "invariant");
5793  if (time == 0) {
5794    _cur_index = REL_INDEX; // arbitrary choice when not timed
5795    status = pthread_cond_wait (&_cond[_cur_index], _mutex) ;
5796  } else {
5797    _cur_index = isAbsolute ? ABS_INDEX : REL_INDEX;
5798    status = os::Linux::safe_cond_timedwait (&_cond[_cur_index], _mutex, &absTime) ;
5799    if (status != 0 && WorkAroundNPTLTimedWaitHang) {
5800      pthread_cond_destroy (&_cond[_cur_index]) ;
5801      pthread_cond_init    (&_cond[_cur_index], isAbsolute ? NULL : os::Linux::condAttr());
5802    }
5803  }
5804  _cur_index = -1;
5805  assert_status(status == 0 || status == EINTR ||
5806                status == ETIME || status == ETIMEDOUT,
5807                status, "cond_timedwait");
5808
5809#ifdef ASSERT
5810  pthread_sigmask(SIG_SETMASK, &oldsigs, NULL);
5811#endif
5812
5813  _counter = 0 ;
5814  status = pthread_mutex_unlock(_mutex) ;
5815  assert_status(status == 0, status, "invariant") ;
5816  // Paranoia to ensure our locked and lock-free paths interact
5817  // correctly with each other and Java-level accesses.
5818  OrderAccess::fence();
5819
5820  // If externally suspended while waiting, re-suspend
5821  if (jt->handle_special_suspend_equivalent_condition()) {
5822    jt->java_suspend_self();
5823  }
5824}
5825
5826void Parker::unpark() {
5827  int s, status ;
5828  status = pthread_mutex_lock(_mutex);
5829  assert (status == 0, "invariant") ;
5830  s = _counter;
5831  _counter = 1;
5832  if (s < 1) {
5833    // thread might be parked
5834    if (_cur_index != -1) {
5835      // thread is definitely parked
5836      if (WorkAroundNPTLTimedWaitHang) {
5837        status = pthread_cond_signal (&_cond[_cur_index]);
5838        assert (status == 0, "invariant");
5839        status = pthread_mutex_unlock(_mutex);
5840        assert (status == 0, "invariant");
5841      } else {
5842        status = pthread_mutex_unlock(_mutex);
5843        assert (status == 0, "invariant");
5844        status = pthread_cond_signal (&_cond[_cur_index]);
5845        assert (status == 0, "invariant");
5846      }
5847    } else {
5848      pthread_mutex_unlock(_mutex);
5849      assert (status == 0, "invariant") ;
5850    }
5851  } else {
5852    pthread_mutex_unlock(_mutex);
5853    assert (status == 0, "invariant") ;
5854  }
5855}
5856
5857
5858extern char** environ;
5859
5860#ifndef __NR_fork
5861#define __NR_fork IA32_ONLY(2) IA64_ONLY(not defined) AMD64_ONLY(57)
5862#endif
5863
5864#ifndef __NR_execve
5865#define __NR_execve IA32_ONLY(11) IA64_ONLY(1033) AMD64_ONLY(59)
5866#endif
5867
5868// Run the specified command in a separate process. Return its exit value,
5869// or -1 on failure (e.g. can't fork a new process).
5870// Unlike system(), this function can be called from signal handler. It
5871// doesn't block SIGINT et al.
5872int os::fork_and_exec(char* cmd) {
5873  const char * argv[4] = {"sh", "-c", cmd, NULL};
5874
5875  // fork() in LinuxThreads/NPTL is not async-safe. It needs to run
5876  // pthread_atfork handlers and reset pthread library. All we need is a
5877  // separate process to execve. Make a direct syscall to fork process.
5878  // On IA64 there's no fork syscall, we have to use fork() and hope for
5879  // the best...
5880  pid_t pid = NOT_IA64(syscall(__NR_fork);)
5881              IA64_ONLY(fork();)
5882
5883  if (pid < 0) {
5884    // fork failed
5885    return -1;
5886
5887  } else if (pid == 0) {
5888    // child process
5889
5890    // execve() in LinuxThreads will call pthread_kill_other_threads_np()
5891    // first to kill every thread on the thread list. Because this list is
5892    // not reset by fork() (see notes above), execve() will instead kill
5893    // every thread in the parent process. We know this is the only thread
5894    // in the new process, so make a system call directly.
5895    // IA64 should use normal execve() from glibc to match the glibc fork()
5896    // above.
5897    NOT_IA64(syscall(__NR_execve, "/bin/sh", argv, environ);)
5898    IA64_ONLY(execve("/bin/sh", (char* const*)argv, environ);)
5899
5900    // execve failed
5901    _exit(-1);
5902
5903  } else  {
5904    // copied from J2SE ..._waitForProcessExit() in UNIXProcess_md.c; we don't
5905    // care about the actual exit code, for now.
5906
5907    int status;
5908
5909    // Wait for the child process to exit.  This returns immediately if
5910    // the child has already exited. */
5911    while (waitpid(pid, &status, 0) < 0) {
5912        switch (errno) {
5913        case ECHILD: return 0;
5914        case EINTR: break;
5915        default: return -1;
5916        }
5917    }
5918
5919    if (WIFEXITED(status)) {
5920       // The child exited normally; get its exit code.
5921       return WEXITSTATUS(status);
5922    } else if (WIFSIGNALED(status)) {
5923       // The child exited because of a signal
5924       // The best value to return is 0x80 + signal number,
5925       // because that is what all Unix shells do, and because
5926       // it allows callers to distinguish between process exit and
5927       // process death by signal.
5928       return 0x80 + WTERMSIG(status);
5929    } else {
5930       // Unknown exit code; pass it through
5931       return status;
5932    }
5933  }
5934}
5935
5936// is_headless_jre()
5937//
5938// Test for the existence of xawt/libmawt.so or libawt_xawt.so
5939// in order to report if we are running in a headless jre
5940//
5941// Since JDK8 xawt/libmawt.so was moved into the same directory
5942// as libawt.so, and renamed libawt_xawt.so
5943//
5944bool os::is_headless_jre() {
5945    struct stat statbuf;
5946    char buf[MAXPATHLEN];
5947    char libmawtpath[MAXPATHLEN];
5948    const char *xawtstr  = "/xawt/libmawt.so";
5949    const char *new_xawtstr = "/libawt_xawt.so";
5950    char *p;
5951
5952    // Get path to libjvm.so
5953    os::jvm_path(buf, sizeof(buf));
5954
5955    // Get rid of libjvm.so
5956    p = strrchr(buf, '/');
5957    if (p == NULL) return false;
5958    else *p = '\0';
5959
5960    // Get rid of client or server
5961    p = strrchr(buf, '/');
5962    if (p == NULL) return false;
5963    else *p = '\0';
5964
5965    // check xawt/libmawt.so
5966    strcpy(libmawtpath, buf);
5967    strcat(libmawtpath, xawtstr);
5968    if (::stat(libmawtpath, &statbuf) == 0) return false;
5969
5970    // check libawt_xawt.so
5971    strcpy(libmawtpath, buf);
5972    strcat(libmawtpath, new_xawtstr);
5973    if (::stat(libmawtpath, &statbuf) == 0) return false;
5974
5975    return true;
5976}
5977
5978// Get the default path to the core file
5979// Returns the length of the string
5980int os::get_core_path(char* buffer, size_t bufferSize) {
5981  const char* p = get_current_directory(buffer, bufferSize);
5982
5983  if (p == NULL) {
5984    assert(p != NULL, "failed to get current directory");
5985    return 0;
5986  }
5987
5988  return strlen(buffer);
5989}
5990
5991#ifdef JAVASE_EMBEDDED
5992//
5993// A thread to watch the '/dev/mem_notify' device, which will tell us when the OS is running low on memory.
5994//
5995MemNotifyThread* MemNotifyThread::_memnotify_thread = NULL;
5996
5997// ctor
5998//
5999MemNotifyThread::MemNotifyThread(int fd): Thread() {
6000  assert(memnotify_thread() == NULL, "we can only allocate one MemNotifyThread");
6001  _fd = fd;
6002
6003  if (os::create_thread(this, os::os_thread)) {
6004    _memnotify_thread = this;
6005    os::set_priority(this, NearMaxPriority);
6006    os::start_thread(this);
6007  }
6008}
6009
6010// Where all the work gets done
6011//
6012void MemNotifyThread::run() {
6013  assert(this == memnotify_thread(), "expected the singleton MemNotifyThread");
6014
6015  // Set up the select arguments
6016  fd_set rfds;
6017  if (_fd != -1) {
6018    FD_ZERO(&rfds);
6019    FD_SET(_fd, &rfds);
6020  }
6021
6022  // Now wait for the mem_notify device to wake up
6023  while (1) {
6024    // Wait for the mem_notify device to signal us..
6025    int rc = select(_fd+1, _fd != -1 ? &rfds : NULL, NULL, NULL, NULL);
6026    if (rc == -1) {
6027      perror("select!\n");
6028      break;
6029    } else if (rc) {
6030      //ssize_t free_before = os::available_memory();
6031      //tty->print ("Notified: Free: %dK \n",os::available_memory()/1024);
6032
6033      // The kernel is telling us there is not much memory left...
6034      // try to do something about that
6035
6036      // If we are not already in a GC, try one.
6037      if (!Universe::heap()->is_gc_active()) {
6038        Universe::heap()->collect(GCCause::_allocation_failure);
6039
6040        //ssize_t free_after = os::available_memory();
6041        //tty->print ("Post-Notify: Free: %dK\n",free_after/1024);
6042        //tty->print ("GC freed: %dK\n", (free_after - free_before)/1024);
6043      }
6044      // We might want to do something like the following if we find the GC's are not helping...
6045      // Universe::heap()->size_policy()->set_gc_time_limit_exceeded(true);
6046    }
6047  }
6048}
6049
6050//
6051// See if the /dev/mem_notify device exists, and if so, start a thread to monitor it.
6052//
6053void MemNotifyThread::start() {
6054  int    fd;
6055  fd = open ("/dev/mem_notify", O_RDONLY, 0);
6056  if (fd < 0) {
6057      return;
6058  }
6059
6060  if (memnotify_thread() == NULL) {
6061    new MemNotifyThread(fd);
6062  }
6063}
6064
6065#endif // JAVASE_EMBEDDED
6066
6067
6068/////////////// Unit tests ///////////////
6069
6070#ifndef PRODUCT
6071
6072#define test_log(...) \
6073  do {\
6074    if (VerboseInternalVMTests) { \
6075      tty->print_cr(__VA_ARGS__); \
6076      tty->flush(); \
6077    }\
6078  } while (false)
6079
6080class TestReserveMemorySpecial : AllStatic {
6081 public:
6082  static void small_page_write(void* addr, size_t size) {
6083    size_t page_size = os::vm_page_size();
6084
6085    char* end = (char*)addr + size;
6086    for (char* p = (char*)addr; p < end; p += page_size) {
6087      *p = 1;
6088    }
6089  }
6090
6091  static void test_reserve_memory_special_huge_tlbfs_only(size_t size) {
6092    if (!UseHugeTLBFS) {
6093      return;
6094    }
6095
6096    test_log("test_reserve_memory_special_huge_tlbfs_only(" SIZE_FORMAT ")", size);
6097
6098    char* addr = os::Linux::reserve_memory_special_huge_tlbfs_only(size, NULL, false);
6099
6100    if (addr != NULL) {
6101      small_page_write(addr, size);
6102
6103      os::Linux::release_memory_special_huge_tlbfs(addr, size);
6104    }
6105  }
6106
6107  static void test_reserve_memory_special_huge_tlbfs_only() {
6108    if (!UseHugeTLBFS) {
6109      return;
6110    }
6111
6112    size_t lp = os::large_page_size();
6113
6114    for (size_t size = lp; size <= lp * 10; size += lp) {
6115      test_reserve_memory_special_huge_tlbfs_only(size);
6116    }
6117  }
6118
6119  static void test_reserve_memory_special_huge_tlbfs_mixed(size_t size, size_t alignment) {
6120    if (!UseHugeTLBFS) {
6121        return;
6122    }
6123
6124    test_log("test_reserve_memory_special_huge_tlbfs_mixed(" SIZE_FORMAT ", " SIZE_FORMAT ")",
6125        size, alignment);
6126
6127    assert(size >= os::large_page_size(), "Incorrect input to test");
6128
6129    char* addr = os::Linux::reserve_memory_special_huge_tlbfs_mixed(size, alignment, NULL, false);
6130
6131    if (addr != NULL) {
6132      small_page_write(addr, size);
6133
6134      os::Linux::release_memory_special_huge_tlbfs(addr, size);
6135    }
6136  }
6137
6138  static void test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(size_t size) {
6139    size_t lp = os::large_page_size();
6140    size_t ag = os::vm_allocation_granularity();
6141
6142    for (size_t alignment = ag; is_size_aligned(size, alignment); alignment *= 2) {
6143      test_reserve_memory_special_huge_tlbfs_mixed(size, alignment);
6144    }
6145  }
6146
6147  static void test_reserve_memory_special_huge_tlbfs_mixed() {
6148    size_t lp = os::large_page_size();
6149    size_t ag = os::vm_allocation_granularity();
6150
6151    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp);
6152    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp + ag);
6153    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp + lp / 2);
6154    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2);
6155    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2 + ag);
6156    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2 - ag);
6157    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2 + lp / 2);
6158    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 10);
6159    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 10 + lp / 2);
6160  }
6161
6162  static void test_reserve_memory_special_huge_tlbfs() {
6163    if (!UseHugeTLBFS) {
6164      return;
6165    }
6166
6167    test_reserve_memory_special_huge_tlbfs_only();
6168    test_reserve_memory_special_huge_tlbfs_mixed();
6169  }
6170
6171  static void test_reserve_memory_special_shm(size_t size, size_t alignment) {
6172    if (!UseSHM) {
6173      return;
6174    }
6175
6176    test_log("test_reserve_memory_special_shm(" SIZE_FORMAT ", " SIZE_FORMAT ")", size, alignment);
6177
6178    char* addr = os::Linux::reserve_memory_special_shm(size, alignment, NULL, false);
6179
6180    if (addr != NULL) {
6181      assert(is_ptr_aligned(addr, alignment), "Check");
6182      assert(is_ptr_aligned(addr, os::large_page_size()), "Check");
6183
6184      small_page_write(addr, size);
6185
6186      os::Linux::release_memory_special_shm(addr, size);
6187    }
6188  }
6189
6190  static void test_reserve_memory_special_shm() {
6191    size_t lp = os::large_page_size();
6192    size_t ag = os::vm_allocation_granularity();
6193
6194    for (size_t size = ag; size < lp * 3; size += ag) {
6195      for (size_t alignment = ag; is_size_aligned(size, alignment); alignment *= 2) {
6196        test_reserve_memory_special_shm(size, alignment);
6197      }
6198    }
6199  }
6200
6201  static void test() {
6202    test_reserve_memory_special_huge_tlbfs();
6203    test_reserve_memory_special_shm();
6204  }
6205};
6206
6207void TestReserveMemorySpecial_test() {
6208  TestReserveMemorySpecial::test();
6209}
6210
6211#endif
6212