os_linux.cpp revision 342:37f87013dfd8
129615Sjmg/* 229615Sjmg * Copyright 1999-2007 Sun Microsystems, Inc. All Rights Reserved. 329615Sjmg * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 429615Sjmg * 529615Sjmg * This code is free software; you can redistribute it and/or modify it 629615Sjmg * under the terms of the GNU General Public License version 2 only, as 729615Sjmg * published by the Free Software Foundation. 829615Sjmg * 929615Sjmg * This code is distributed in the hope that it will be useful, but WITHOUT 1029615Sjmg * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1129615Sjmg * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1229615Sjmg * version 2 for more details (a copy is included in the LICENSE file that 1329615Sjmg * accompanied this code). 1429615Sjmg * 1529615Sjmg * You should have received a copy of the GNU General Public License version 1629615Sjmg * 2 along with this work; if not, write to the Free Software Foundation, 1729615Sjmg * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1829615Sjmg * 1929615Sjmg * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 2029615Sjmg * CA 95054 USA or visit www.sun.com if you need additional information or 2129615Sjmg * have any questions. 2229615Sjmg * 2329615Sjmg */ 2429615Sjmg 2529615Sjmg// do not include precompiled header file 2629615Sjmg# include "incls/_os_linux.cpp.incl" 27124125Scharnier 28124125Scharnier// put OS-includes here 29124125Scharnier# include <sys/types.h> 3029615Sjmg# include <sys/mman.h> 3129615Sjmg# include <pthread.h> 32124125Scharnier# include <signal.h> 3329615Sjmg# include <errno.h> 3429615Sjmg# include <dlfcn.h> 3529615Sjmg# include <stdio.h> 3629615Sjmg# include <unistd.h> 3729615Sjmg# include <sys/resource.h> 3829615Sjmg# include <pthread.h> 3929615Sjmg# include <sys/stat.h> 4029615Sjmg# include <sys/time.h> 4150786Speter# include <sys/times.h> 4229615Sjmg# include <sys/utsname.h> 4329615Sjmg# include <sys/socket.h> 4429615Sjmg# include <sys/wait.h> 4529615Sjmg# include <pwd.h> 4629615Sjmg# include <poll.h> 4729615Sjmg# include <semaphore.h> 4829615Sjmg# include <fcntl.h> 4929615Sjmg# include <string.h> 5029615Sjmg# include <syscall.h> 5129615Sjmg# include <sys/sysinfo.h> 5229615Sjmg# include <gnu/libc-version.h> 5329615Sjmg# include <sys/ipc.h> 5429615Sjmg# include <sys/shm.h> 5529615Sjmg# include <link.h> 5639144Seivind 5729615Sjmg#define MAX_PATH (2 * K) 5829615Sjmg 5929615Sjmg// for timer info max values which include all bits 6029615Sjmg#define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF) 6129615Sjmg#define SEC_IN_NANOSECS 1000000000LL 6229615Sjmg 6329615Sjmg//////////////////////////////////////////////////////////////////////////////// 6429615Sjmg// global variables 6529615Sjmgjulong os::Linux::_physical_memory = 0; 6629615Sjmg 6731135Sjmgaddress os::Linux::_initial_thread_stack_bottom = NULL; 6829615Sjmguintptr_t os::Linux::_initial_thread_stack_size = 0; 6929615Sjmg 7029615Sjmgint (*os::Linux::_clock_gettime)(clockid_t, struct timespec *) = NULL; 7129615Sjmgint (*os::Linux::_pthread_getcpuclockid)(pthread_t, clockid_t *) = NULL; 7229615SjmgMutex* os::Linux::_createThread_lock = NULL; 7329615Sjmgpthread_t os::Linux::_main_thread; 7429615Sjmgint os::Linux::_page_size = -1; 7529615Sjmgbool os::Linux::_is_floating_stack = false; 7629615Sjmgbool os::Linux::_is_NPTL = false; 7729615Sjmgbool os::Linux::_supports_fast_thread_cpu_time = false; 7829615Sjmgchar * os::Linux::_glibc_version = NULL; 7929615Sjmgchar * os::Linux::_libpthread_version = NULL; 8029615Sjmg 8129615Sjmgstatic jlong initial_time_count=0; 8229615Sjmg 8329615Sjmgstatic int clock_tics_per_sec = 100; 8429615Sjmg 8529615Sjmg// For diagnostics to print a message once. see run_periodic_checks 8629615Sjmgstatic sigset_t check_signal_done; 8729615Sjmgstatic bool check_signals = true;; 8829615Sjmg 8929615Sjmgstatic pid_t _initial_pid = 0; 9029615Sjmg 9129615Sjmg/* Signal number used to suspend/resume a thread */ 9229615Sjmg 9329615Sjmg/* do not use any signal number less than SIGSEGV, see 4355769 */ 9429615Sjmgstatic int SR_signum = SIGUSR2; 9529615Sjmgsigset_t SR_sigset; 9629615Sjmg 9729615Sjmg//////////////////////////////////////////////////////////////////////////////// 9829615Sjmg// utility functions 9929615Sjmg 10029615Sjmgstatic int SR_initialize(); 10129615Sjmgstatic int SR_finalize(); 10229615Sjmg 10329615Sjmgjulong os::available_memory() { 10429615Sjmg return Linux::available_memory(); 10529615Sjmg} 10629615Sjmg 10729615Sjmgjulong os::Linux::available_memory() { 10829615Sjmg // values in struct sysinfo are "unsigned long" 10929615Sjmg struct sysinfo si; 11029615Sjmg sysinfo(&si); 11129615Sjmg 11229615Sjmg return (julong)si.freeram * si.mem_unit; 11329615Sjmg} 11429615Sjmg 11529615Sjmgjulong os::physical_memory() { 11629615Sjmg return Linux::physical_memory(); 11750786Speter} 11829615Sjmg 11929615Sjmgjulong os::allocatable_physical_memory(julong size) { 12029615Sjmg#ifdef _LP64 12129615Sjmg return size; 12229615Sjmg#else 12329615Sjmg julong result = MIN2(size, (julong)3800*M); 12429615Sjmg if (!is_allocatable(result)) { 12529615Sjmg // See comments under solaris for alignment considerations 12629615Sjmg julong reasonable_size = (julong)2*G - 2 * os::vm_page_size(); 12729615Sjmg result = MIN2(size, reasonable_size); 12829615Sjmg } 12929615Sjmg return result; 13029615Sjmg#endif // _LP64 13129615Sjmg} 13229615Sjmg 13329615Sjmg//////////////////////////////////////////////////////////////////////////////// 13429615Sjmg// environment support 13529615Sjmg 13629615Sjmgbool os::getenv(const char* name, char* buf, int len) { 13729615Sjmg const char* val = ::getenv(name); 13829615Sjmg if (val != NULL && strlen(val) < (size_t)len) { 13929615Sjmg strcpy(buf, val); 14029615Sjmg return true; 14129615Sjmg } 14229615Sjmg if (len > 0) buf[0] = 0; // return a null string 14329615Sjmg return false; 14429615Sjmg} 14529615Sjmg 14629615Sjmg 14729615Sjmg// Return true if user is running as root. 14829615Sjmg 14929615Sjmgbool os::have_special_privileges() { 15029615Sjmg static bool init = false; 15129615Sjmg static bool privileges = false; 15229615Sjmg if (!init) { 15329615Sjmg privileges = (getuid() != geteuid()) || (getgid() != getegid()); 15429615Sjmg init = true; 15529615Sjmg } 15629615Sjmg return privileges; 15729615Sjmg} 15829615Sjmg 15929615Sjmg 16029615Sjmg#ifndef SYS_gettid 16129615Sjmg// i386: 224, ia64: 1105, amd64: 186, sparc 143 16229615Sjmg#ifdef __ia64__ 16329615Sjmg#define SYS_gettid 1105 16429615Sjmg#elif __i386__ 16529615Sjmg#define SYS_gettid 224 16629615Sjmg#elif __amd64__ 16729615Sjmg#define SYS_gettid 186 16829615Sjmg#elif __sparc__ 16929615Sjmg#define SYS_gettid 143 17029615Sjmg#else 17129615Sjmg#error define gettid for the arch 17229615Sjmg#endif 17329615Sjmg#endif 17450786Speter 17529615Sjmg// Cpu architecture string 17629615Sjmg#if defined(IA64) 17729615Sjmgstatic char cpu_arch[] = "ia64"; 17829615Sjmg#elif defined(IA32) 17929615Sjmgstatic char cpu_arch[] = "i386"; 18029615Sjmg#elif defined(AMD64) 18129615Sjmgstatic char cpu_arch[] = "amd64"; 18229615Sjmg#elif defined(SPARC) 18329615Sjmg# ifdef _LP64 18450786Speterstatic char cpu_arch[] = "sparcv9"; 18529615Sjmg# else 18629615Sjmgstatic char cpu_arch[] = "sparc"; 18729615Sjmg# endif 18829615Sjmg#else 18929615Sjmg#error Add appropriate cpu_arch setting 19029615Sjmg#endif 19129615Sjmg 19229615Sjmg 19329615Sjmg// pid_t gettid() 19429615Sjmg// 19529615Sjmg// Returns the kernel thread id of the currently running thread. Kernel 19629615Sjmg// thread id is used to access /proc. 19729615Sjmg// 19829615Sjmg// (Note that getpid() on LinuxThreads returns kernel thread id too; but 19929615Sjmg// on NPTL, it returns the same pid for all threads, as required by POSIX.) 20029615Sjmg// 20129615Sjmgpid_t os::Linux::gettid() { 20229615Sjmg int rslt = syscall(SYS_gettid); 20329615Sjmg if (rslt == -1) { 20429615Sjmg // old kernel, no NPTL support 20529615Sjmg return getpid(); 20629615Sjmg } else { 20729615Sjmg return (pid_t)rslt; 20829615Sjmg } 20929615Sjmg} 21029615Sjmg 21129615Sjmg// Most versions of linux have a bug where the number of processors are 21229615Sjmg// determined by looking at the /proc file system. In a chroot environment, 21329615Sjmg// the system call returns 1. This causes the VM to act as if it is 21429615Sjmg// a single processor and elide locking (see is_MP() call). 21529615Sjmgstatic bool unsafe_chroot_detected = false; 21629615Sjmgstatic char *unstable_chroot_error = "/proc file system not found.\n" 21729615Sjmg "Java may be unstable running multithreaded in a chroot " 21829615Sjmg "environment on Linux when /proc filesystem is not mounted."; 21929615Sjmg 22029615Sjmgvoid os::Linux::initialize_system_info() { 22129615Sjmg _processor_count = sysconf(_SC_NPROCESSORS_CONF); 22229615Sjmg if (_processor_count == 1) { 22329615Sjmg pid_t pid = os::Linux::gettid(); 22429615Sjmg char fname[32]; 22529615Sjmg jio_snprintf(fname, sizeof(fname), "/proc/%d", pid); 22629615Sjmg FILE *fp = fopen(fname, "r"); 22729615Sjmg if (fp == NULL) { 22829615Sjmg unsafe_chroot_detected = true; 22929615Sjmg } else { 23029615Sjmg fclose(fp); 23129615Sjmg } 23229615Sjmg } 23329615Sjmg _physical_memory = (julong)sysconf(_SC_PHYS_PAGES) * (julong)sysconf(_SC_PAGESIZE); 23429615Sjmg assert(_processor_count > 0, "linux error"); 23529615Sjmg} 23629615Sjmg 23729615Sjmgvoid os::init_system_properties_values() { 23829615Sjmg// char arch[12]; 23929615Sjmg// sysinfo(SI_ARCHITECTURE, arch, sizeof(arch)); 24029615Sjmg 24129615Sjmg // The next steps are taken in the product version: 24229615Sjmg // 24329615Sjmg // Obtain the JAVA_HOME value from the location of libjvm[_g].so. 24429615Sjmg // This library should be located at: 24529615Sjmg // <JAVA_HOME>/jre/lib/<arch>/{client|server}/libjvm[_g].so. 24629615Sjmg // 24729615Sjmg // If "/jre/lib/" appears at the right place in the path, then we 24829615Sjmg // assume libjvm[_g].so is installed in a JDK and we use this path. 24929615Sjmg // 25029615Sjmg // Otherwise exit with message: "Could not create the Java virtual machine." 25129615Sjmg // 25229615Sjmg // The following extra steps are taken in the debugging version: 25329615Sjmg // 25429615Sjmg // If "/jre/lib/" does NOT appear at the right place in the path 25529615Sjmg // instead of exit check for $JAVA_HOME environment variable. 25629615Sjmg // 25729615Sjmg // If it is defined and we are able to locate $JAVA_HOME/jre/lib/<arch>, 25829615Sjmg // then we append a fake suffix "hotspot/libjvm[_g].so" to this path so 25929615Sjmg // it looks like libjvm[_g].so is installed there 26029615Sjmg // <JAVA_HOME>/jre/lib/<arch>/hotspot/libjvm[_g].so. 26129615Sjmg // 26229615Sjmg // Otherwise exit. 26329615Sjmg // 26429615Sjmg // Important note: if the location of libjvm.so changes this 26529615Sjmg // code needs to be changed accordingly. 26629615Sjmg 26729615Sjmg // The next few definitions allow the code to be verbatim: 26829615Sjmg#define malloc(n) (char*)NEW_C_HEAP_ARRAY(char, (n)) 26929615Sjmg#define getenv(n) ::getenv(n) 27029615Sjmg 27129615Sjmg/* 27229615Sjmg * See ld(1): 27329615Sjmg * The linker uses the following search paths to locate required 27429615Sjmg * shared libraries: 27529615Sjmg * 1: ... 27629615Sjmg * ... 27729615Sjmg * 7: The default directories, normally /lib and /usr/lib. 27829615Sjmg */ 27929615Sjmg#define DEFAULT_LIBPATH "/lib:/usr/lib" 28029615Sjmg 28129615Sjmg#define EXTENSIONS_DIR "/lib/ext" 28229615Sjmg#define ENDORSED_DIR "/lib/endorsed" 28329615Sjmg#define REG_DIR "/usr/java/packages" 28429615Sjmg 28529615Sjmg { 28629615Sjmg /* sysclasspath, java_home, dll_dir */ 28729615Sjmg { 28829615Sjmg char *home_path; 28929615Sjmg char *dll_path; 29029615Sjmg char *pslash; 29129615Sjmg char buf[MAXPATHLEN]; 29229615Sjmg os::jvm_path(buf, sizeof(buf)); 29329615Sjmg 29429615Sjmg // Found the full path to libjvm.so. 29529615Sjmg // Now cut the path to <java_home>/jre if we can. 29629615Sjmg *(strrchr(buf, '/')) = '\0'; /* get rid of /libjvm.so */ 29729615Sjmg pslash = strrchr(buf, '/'); 29829615Sjmg if (pslash != NULL) 29929615Sjmg *pslash = '\0'; /* get rid of /{client|server|hotspot} */ 30029615Sjmg dll_path = malloc(strlen(buf) + 1); 30129615Sjmg if (dll_path == NULL) 30229615Sjmg return; 30329615Sjmg strcpy(dll_path, buf); 30429615Sjmg Arguments::set_dll_dir(dll_path); 30529615Sjmg 30629615Sjmg if (pslash != NULL) { 30729615Sjmg pslash = strrchr(buf, '/'); 30850786Speter if (pslash != NULL) { 30929615Sjmg *pslash = '\0'; /* get rid of /<arch> */ 31029615Sjmg pslash = strrchr(buf, '/'); 31129615Sjmg if (pslash != NULL) 31250786Speter *pslash = '\0'; /* get rid of /lib */ 31329615Sjmg } 31429615Sjmg } 31529615Sjmg 31629615Sjmg home_path = malloc(strlen(buf) + 1); 31729615Sjmg if (home_path == NULL) 31829615Sjmg return; 31929615Sjmg strcpy(home_path, buf); 32029615Sjmg Arguments::set_java_home(home_path); 32129615Sjmg 32229615Sjmg if (!set_boot_path('/', ':')) 32329615Sjmg return; 32429615Sjmg } 32529615Sjmg 32629615Sjmg /* 32729615Sjmg * Where to look for native libraries 32829615Sjmg * 32929615Sjmg * Note: Due to a legacy implementation, most of the library path 33029615Sjmg * is set in the launcher. This was to accomodate linking restrictions 33129615Sjmg * on legacy Linux implementations (which are no longer supported). 33250786Speter * Eventually, all the library path setting will be done here. 33329615Sjmg * 33429615Sjmg * However, to prevent the proliferation of improperly built native 33529615Sjmg * libraries, the new path component /usr/java/packages is added here. 33629615Sjmg * Eventually, all the library path setting will be done here. 33729615Sjmg */ 33829615Sjmg { 33929615Sjmg char *ld_library_path; 34050786Speter 34129615Sjmg /* 34229615Sjmg * Construct the invariant part of ld_library_path. Note that the 34329615Sjmg * space for the colon and the trailing null are provided by the 34429615Sjmg * nulls included by the sizeof operator (so actually we allocate 34529615Sjmg * a byte more than necessary). 34629615Sjmg */ 34729615Sjmg ld_library_path = (char *) malloc(sizeof(REG_DIR) + sizeof("/lib/") + 34829615Sjmg strlen(cpu_arch) + sizeof(DEFAULT_LIBPATH)); 34929615Sjmg sprintf(ld_library_path, REG_DIR "/lib/%s:" DEFAULT_LIBPATH, cpu_arch); 35029615Sjmg 35129615Sjmg /* 35229615Sjmg * Get the user setting of LD_LIBRARY_PATH, and prepended it. It 35329615Sjmg * should always exist (until the legacy problem cited above is 35429615Sjmg * addressed). 35529615Sjmg */ 35629615Sjmg char *v = getenv("LD_LIBRARY_PATH"); 35729615Sjmg if (v != NULL) { 35829615Sjmg char *t = ld_library_path; 35929615Sjmg /* That's +1 for the colon and +1 for the trailing '\0' */ 36029615Sjmg ld_library_path = (char *) malloc(strlen(v) + 1 + strlen(t) + 1); 36129615Sjmg sprintf(ld_library_path, "%s:%s", v, t); 36250786Speter } 36329615Sjmg Arguments::set_library_path(ld_library_path); 36429615Sjmg } 36529615Sjmg 36629615Sjmg /* 36729615Sjmg * Extensions directories. 36829615Sjmg * 36929615Sjmg * Note that the space for the colon and the trailing null are provided 37050786Speter * by the nulls included by the sizeof operator (so actually one byte more 37129615Sjmg * than necessary is allocated). 37229615Sjmg */ 37329615Sjmg { 37429615Sjmg char *buf = malloc(strlen(Arguments::get_java_home()) + 37529615Sjmg sizeof(EXTENSIONS_DIR) + sizeof(REG_DIR) + sizeof(EXTENSIONS_DIR)); 37629615Sjmg sprintf(buf, "%s" EXTENSIONS_DIR ":" REG_DIR EXTENSIONS_DIR, 37729615Sjmg Arguments::get_java_home()); 37829615Sjmg Arguments::set_ext_dirs(buf); 37929615Sjmg } 38029615Sjmg 38129615Sjmg /* Endorsed standards default directory. */ 38229615Sjmg { 38329615Sjmg char * buf; 38429615Sjmg buf = malloc(strlen(Arguments::get_java_home()) + sizeof(ENDORSED_DIR)); 38529615Sjmg sprintf(buf, "%s" ENDORSED_DIR, Arguments::get_java_home()); 38650786Speter Arguments::set_endorsed_dirs(buf); 38729615Sjmg } 38829615Sjmg } 38950786Speter 39029615Sjmg#undef malloc 39129615Sjmg#undef getenv 39229615Sjmg#undef EXTENSIONS_DIR 39329615Sjmg#undef ENDORSED_DIR 39429615Sjmg 39529615Sjmg // Done 39629615Sjmg return; 39729615Sjmg} 39829615Sjmg 39950786Speter//////////////////////////////////////////////////////////////////////////////// 40029615Sjmg// breakpoint support 40129615Sjmg 40229615Sjmgvoid os::breakpoint() { 40329615Sjmg BREAKPOINT; 40429615Sjmg} 40550786Speter 40629615Sjmgextern "C" void breakpoint() { 40729615Sjmg // use debugger to set breakpoint here 40829615Sjmg} 40950786Speter 41029615Sjmg//////////////////////////////////////////////////////////////////////////////// 41129615Sjmg// signal support 41250786Speter 41329615Sjmgdebug_only(static bool signal_sets_initialized = false); 41429615Sjmgstatic sigset_t unblocked_sigs, vm_sigs, allowdebug_blocked_sigs; 41529615Sjmg 41629615Sjmgbool os::Linux::is_sig_ignored(int sig) { 41729615Sjmg struct sigaction oact; 41829615Sjmg sigaction(sig, (struct sigaction*)NULL, &oact); 41929615Sjmg void* ohlr = oact.sa_sigaction ? CAST_FROM_FN_PTR(void*, oact.sa_sigaction) 42029615Sjmg : CAST_FROM_FN_PTR(void*, oact.sa_handler); 42129615Sjmg if (ohlr == CAST_FROM_FN_PTR(void*, SIG_IGN)) 42229615Sjmg return true; 42329615Sjmg else 42429615Sjmg return false; 42529615Sjmg} 42629615Sjmg 42729615Sjmgvoid os::Linux::signal_sets_init() { 42829615Sjmg // Should also have an assertion stating we are still single-threaded. 42950786Speter assert(!signal_sets_initialized, "Already initialized"); 43029615Sjmg // Fill in signals that are necessarily unblocked for all threads in 43129615Sjmg // the VM. Currently, we unblock the following signals: 43229615Sjmg // SHUTDOWN{1,2,3}_SIGNAL: for shutdown hooks support (unless over-ridden 43329615Sjmg // by -Xrs (=ReduceSignalUsage)); 43429615Sjmg // BREAK_SIGNAL which is unblocked only by the VM thread and blocked by all 43529615Sjmg // other threads. The "ReduceSignalUsage" boolean tells us not to alter 43629615Sjmg // the dispositions or masks wrt these signals. 43729615Sjmg // Programs embedding the VM that want to use the above signals for their 43829615Sjmg // own purposes must, at this time, use the "-Xrs" option to prevent 43929615Sjmg // interference with shutdown hooks and BREAK_SIGNAL thread dumping. 44050786Speter // (See bug 4345157, and other related bugs). 44129615Sjmg // In reality, though, unblocking these signals is really a nop, since 44229615Sjmg // these signals are not blocked by default. 44329615Sjmg sigemptyset(&unblocked_sigs); 44429615Sjmg sigemptyset(&allowdebug_blocked_sigs); 44529615Sjmg sigaddset(&unblocked_sigs, SIGILL); 44629615Sjmg sigaddset(&unblocked_sigs, SIGSEGV); 44729615Sjmg sigaddset(&unblocked_sigs, SIGBUS); 44829615Sjmg sigaddset(&unblocked_sigs, SIGFPE); 44950786Speter sigaddset(&unblocked_sigs, SR_signum); 45029615Sjmg 45129615Sjmg if (!ReduceSignalUsage) { 45250786Speter if (!os::Linux::is_sig_ignored(SHUTDOWN1_SIGNAL)) { 45329615Sjmg sigaddset(&unblocked_sigs, SHUTDOWN1_SIGNAL); 45429615Sjmg sigaddset(&allowdebug_blocked_sigs, SHUTDOWN1_SIGNAL); 45550786Speter } 45629615Sjmg if (!os::Linux::is_sig_ignored(SHUTDOWN2_SIGNAL)) { 45729615Sjmg sigaddset(&unblocked_sigs, SHUTDOWN2_SIGNAL); 45850786Speter sigaddset(&allowdebug_blocked_sigs, SHUTDOWN2_SIGNAL); 45929615Sjmg } 46029615Sjmg if (!os::Linux::is_sig_ignored(SHUTDOWN3_SIGNAL)) { 46150786Speter sigaddset(&unblocked_sigs, SHUTDOWN3_SIGNAL); 46250786Speter sigaddset(&allowdebug_blocked_sigs, SHUTDOWN3_SIGNAL); 46329615Sjmg } 46429615Sjmg } 46550786Speter // Fill in signals that are blocked by all but the VM thread. 46629615Sjmg sigemptyset(&vm_sigs); 46729615Sjmg if (!ReduceSignalUsage) 46829615Sjmg sigaddset(&vm_sigs, BREAK_SIGNAL); 46929615Sjmg debug_only(signal_sets_initialized = true); 47029615Sjmg 47129615Sjmg} 47229615Sjmg 47329615Sjmg// These are signals that are unblocked while a thread is running Java. 47429615Sjmg// (For some reason, they get blocked by default.) 47529615Sjmgsigset_t* os::Linux::unblocked_signals() { 47629615Sjmg assert(signal_sets_initialized, "Not initialized"); 47729615Sjmg return &unblocked_sigs; 47829615Sjmg} 47929615Sjmg 48029615Sjmg// These are the signals that are blocked while a (non-VM) thread is 48129615Sjmg// running Java. Only the VM thread handles these signals. 48229615Sjmgsigset_t* os::Linux::vm_signals() { 48329615Sjmg assert(signal_sets_initialized, "Not initialized"); 48429615Sjmg return &vm_sigs; 48529615Sjmg} 48629615Sjmg 48729615Sjmg// These are signals that are blocked during cond_wait to allow debugger in 48850786Spetersigset_t* os::Linux::allowdebug_blocked_signals() { 48950786Speter assert(signal_sets_initialized, "Not initialized"); 49029615Sjmg return &allowdebug_blocked_sigs; 49129615Sjmg} 49229615Sjmg 49329615Sjmgvoid os::Linux::hotspot_sigmask(Thread* thread) { 49429615Sjmg 49529615Sjmg //Save caller's signal mask before setting VM signal mask 49639144Seivind sigset_t caller_sigmask; 49729615Sjmg pthread_sigmask(SIG_BLOCK, NULL, &caller_sigmask); 49829615Sjmg 49939144Seivind OSThread* osthread = thread->osthread(); 50039144Seivind osthread->set_caller_sigmask(caller_sigmask); 50129615Sjmg 50229615Sjmg pthread_sigmask(SIG_UNBLOCK, os::Linux::unblocked_signals(), NULL); 50339144Seivind 50429615Sjmg if (!ReduceSignalUsage) { 50529615Sjmg if (thread->is_VM_thread()) { 50629615Sjmg // Only the VM thread handles BREAK_SIGNAL ... 50729615Sjmg pthread_sigmask(SIG_UNBLOCK, vm_signals(), NULL); 50846422Sluoqi } else { 50946422Sluoqi // ... all other threads block BREAK_SIGNAL 51029615Sjmg pthread_sigmask(SIG_BLOCK, vm_signals(), NULL); 51146422Sluoqi } 51229615Sjmg } 51329615Sjmg} 51429615Sjmg 51529615Sjmg////////////////////////////////////////////////////////////////////////////// 51629615Sjmg// detecting pthread library 51739144Seivind 51829615Sjmgvoid os::Linux::libpthread_init() { 51929615Sjmg // Save glibc and pthread version strings. Note that _CS_GNU_LIBC_VERSION 52029615Sjmg // and _CS_GNU_LIBPTHREAD_VERSION are supported in glibc >= 2.3.2. Use a 52129615Sjmg // generic name for earlier versions. 52229615Sjmg // Define macros here so we can build HotSpot on old systems. 52350786Speter# ifndef _CS_GNU_LIBC_VERSION 52429615Sjmg# define _CS_GNU_LIBC_VERSION 2 52529615Sjmg# endif 52629615Sjmg# ifndef _CS_GNU_LIBPTHREAD_VERSION 52729615Sjmg# define _CS_GNU_LIBPTHREAD_VERSION 3 52829615Sjmg# endif 52929615Sjmg 53029615Sjmg size_t n = confstr(_CS_GNU_LIBC_VERSION, NULL, 0); 53129615Sjmg if (n > 0) { 53229615Sjmg char *str = (char *)malloc(n); 53329615Sjmg confstr(_CS_GNU_LIBC_VERSION, str, n); 53450786Speter os::Linux::set_glibc_version(str); 53529615Sjmg } else { 53650786Speter // _CS_GNU_LIBC_VERSION is not supported, try gnu_get_libc_version() 53729615Sjmg static char _gnu_libc_version[32]; 53829615Sjmg jio_snprintf(_gnu_libc_version, sizeof(_gnu_libc_version), 539101611Siedowse "glibc %s %s", gnu_get_libc_version(), gnu_get_libc_release()); 540101611Siedowse os::Linux::set_glibc_version(_gnu_libc_version); 54129615Sjmg } 54250786Speter 54329615Sjmg n = confstr(_CS_GNU_LIBPTHREAD_VERSION, NULL, 0); 54450786Speter if (n > 0) { 54529615Sjmg char *str = (char *)malloc(n); 54650786Speter confstr(_CS_GNU_LIBPTHREAD_VERSION, str, n); 54729615Sjmg 54829615Sjmg // Vanilla RH-9 (glibc 2.3.2) has a bug that confstr() always tells 54929615Sjmg // us "NPTL-0.29" even we are running with LinuxThreads. Check if this 55029615Sjmg // is the case: 55129615Sjmg if (strcmp(os::Linux::glibc_version(), "glibc 2.3.2") == 0 && 55229615Sjmg strstr(str, "NPTL")) { 55329615Sjmg // LinuxThreads has a hard limit on max number of threads. So 55429615Sjmg // sysconf(_SC_THREAD_THREADS_MAX) will return a positive value. 55529615Sjmg // On the other hand, NPTL does not have such a limit, sysconf() 55629615Sjmg // will return -1 and errno is not changed. Check if it is really 55729615Sjmg // NPTL: 55829615Sjmg if (sysconf(_SC_THREAD_THREADS_MAX) > 0) { 55929615Sjmg free(str); 56029615Sjmg str = "linuxthreads"; 56129615Sjmg } 56229615Sjmg } 56329615Sjmg os::Linux::set_libpthread_version(str); 56429615Sjmg } else { 56529615Sjmg // glibc before 2.3.2 only has LinuxThreads. 56629615Sjmg os::Linux::set_libpthread_version("linuxthreads"); 56750786Speter } 56829615Sjmg 56950786Speter if (strstr(libpthread_version(), "NPTL")) { 57029615Sjmg os::Linux::set_is_NPTL(); 57129615Sjmg } else { 57250786Speter os::Linux::set_is_LinuxThreads(); 57350786Speter } 57450786Speter 57529615Sjmg // LinuxThreads have two flavors: floating-stack mode, which allows variable 57629615Sjmg // stack size; and fixed-stack mode. NPTL is always floating-stack. 57729615Sjmg if (os::Linux::is_NPTL() || os::Linux::supports_variable_stack_size()) { 57829615Sjmg os::Linux::set_is_floating_stack(); 57929615Sjmg } 58029615Sjmg} 58129615Sjmg 58229615Sjmg///////////////////////////////////////////////////////////////////////////// 58329615Sjmg// thread stack 58429615Sjmg 58529615Sjmg// Force Linux kernel to expand current thread stack. If "bottom" is close 58647405Sdfr// to the stack guard, caller should block all signals. 58747405Sdfr// 58829615Sjmg// MAP_GROWSDOWN: 58929615Sjmg// A special mmap() flag that is used to implement thread stacks. It tells 59029615Sjmg// kernel that the memory region should extend downwards when needed. This 59147405Sdfr// allows early versions of LinuxThreads to only mmap the first few pages 59229615Sjmg// when creating a new thread. Linux kernel will automatically expand thread 593124125Scharnier// stack as needed (on page faults). 594124125Scharnier// 59547405Sdfr// However, because the memory region of a MAP_GROWSDOWN stack can grow on 59647405Sdfr// demand, if a page fault happens outside an already mapped MAP_GROWSDOWN 59729615Sjmg// region, it's hard to tell if the fault is due to a legitimate stack 59829615Sjmg// access or because of reading/writing non-exist memory (e.g. buffer 59929615Sjmg// overrun). As a rule, if the fault happens below current stack pointer, 60029615Sjmg// Linux kernel does not expand stack, instead a SIGSEGV is sent to the 60129615Sjmg// application (see Linux kernel fault.c). 602228672Sdim// 60329615Sjmg// This Linux feature can cause SIGSEGV when VM bangs thread stack for 60429615Sjmg// stack overflow detection. 60529615Sjmg// 60629615Sjmg// Newer version of LinuxThreads (since glibc-2.2, or, RH-7.x) and NPTL do 60729615Sjmg// not use this flag. However, the stack of initial thread is not created 608124125Scharnier// by pthread, it is still MAP_GROWSDOWN. Also it's possible (though 60929615Sjmg// unlikely) that user code can create a thread with MAP_GROWSDOWN stack 610124125Scharnier// and then attach the thread to JVM. 61129615Sjmg// 612// To get around the problem and allow stack banging on Linux, we need to 613// manually expand thread stack after receiving the SIGSEGV. 614// 615// There are two ways to expand thread stack to address "bottom", we used 616// both of them in JVM before 1.5: 617// 1. adjust stack pointer first so that it is below "bottom", and then 618// touch "bottom" 619// 2. mmap() the page in question 620// 621// Now alternate signal stack is gone, it's harder to use 2. For instance, 622// if current sp is already near the lower end of page 101, and we need to 623// call mmap() to map page 100, it is possible that part of the mmap() frame 624// will be placed in page 100. When page 100 is mapped, it is zero-filled. 625// That will destroy the mmap() frame and cause VM to crash. 626// 627// The following code works by adjusting sp first, then accessing the "bottom" 628// page to force a page fault. Linux kernel will then automatically expand the 629// stack mapping. 630// 631// _expand_stack_to() assumes its frame size is less than page size, which 632// should always be true if the function is not inlined. 633 634#if __GNUC__ < 3 // gcc 2.x does not support noinline attribute 635#define NOINLINE 636#else 637#define NOINLINE __attribute__ ((noinline)) 638#endif 639 640static void _expand_stack_to(address bottom) NOINLINE; 641 642static void _expand_stack_to(address bottom) { 643 address sp; 644 size_t size; 645 volatile char *p; 646 647 // Adjust bottom to point to the largest address within the same page, it 648 // gives us a one-page buffer if alloca() allocates slightly more memory. 649 bottom = (address)align_size_down((uintptr_t)bottom, os::Linux::page_size()); 650 bottom += os::Linux::page_size() - 1; 651 652 // sp might be slightly above current stack pointer; if that's the case, we 653 // will alloca() a little more space than necessary, which is OK. Don't use 654 // os::current_stack_pointer(), as its result can be slightly below current 655 // stack pointer, causing us to not alloca enough to reach "bottom". 656 sp = (address)&sp; 657 658 if (sp > bottom) { 659 size = sp - bottom; 660 p = (volatile char *)alloca(size); 661 assert(p != NULL && p <= (volatile char *)bottom, "alloca problem?"); 662 p[0] = '\0'; 663 } 664} 665 666bool os::Linux::manually_expand_stack(JavaThread * t, address addr) { 667 assert(t!=NULL, "just checking"); 668 assert(t->osthread()->expanding_stack(), "expand should be set"); 669 assert(t->stack_base() != NULL, "stack_base was not initialized"); 670 671 if (addr < t->stack_base() && addr >= t->stack_yellow_zone_base()) { 672 sigset_t mask_all, old_sigset; 673 sigfillset(&mask_all); 674 pthread_sigmask(SIG_SETMASK, &mask_all, &old_sigset); 675 _expand_stack_to(addr); 676 pthread_sigmask(SIG_SETMASK, &old_sigset, NULL); 677 return true; 678 } 679 return false; 680} 681 682////////////////////////////////////////////////////////////////////////////// 683// create new thread 684 685static address highest_vm_reserved_address(); 686 687// check if it's safe to start a new thread 688static bool _thread_safety_check(Thread* thread) { 689 if (os::Linux::is_LinuxThreads() && !os::Linux::is_floating_stack()) { 690 // Fixed stack LinuxThreads (SuSE Linux/x86, and some versions of Redhat) 691 // Heap is mmap'ed at lower end of memory space. Thread stacks are 692 // allocated (MAP_FIXED) from high address space. Every thread stack 693 // occupies a fixed size slot (usually 2Mbytes, but user can change 694 // it to other values if they rebuild LinuxThreads). 695 // 696 // Problem with MAP_FIXED is that mmap() can still succeed even part of 697 // the memory region has already been mmap'ed. That means if we have too 698 // many threads and/or very large heap, eventually thread stack will 699 // collide with heap. 700 // 701 // Here we try to prevent heap/stack collision by comparing current 702 // stack bottom with the highest address that has been mmap'ed by JVM 703 // plus a safety margin for memory maps created by native code. 704 // 705 // This feature can be disabled by setting ThreadSafetyMargin to 0 706 // 707 if (ThreadSafetyMargin > 0) { 708 address stack_bottom = os::current_stack_base() - os::current_stack_size(); 709 710 // not safe if our stack extends below the safety margin 711 return stack_bottom - ThreadSafetyMargin >= highest_vm_reserved_address(); 712 } else { 713 return true; 714 } 715 } else { 716 // Floating stack LinuxThreads or NPTL: 717 // Unlike fixed stack LinuxThreads, thread stacks are not MAP_FIXED. When 718 // there's not enough space left, pthread_create() will fail. If we come 719 // here, that means enough space has been reserved for stack. 720 return true; 721 } 722} 723 724// Thread start routine for all newly created threads 725static void *java_start(Thread *thread) { 726 // Try to randomize the cache line index of hot stack frames. 727 // This helps when threads of the same stack traces evict each other's 728 // cache lines. The threads can be either from the same JVM instance, or 729 // from different JVM instances. The benefit is especially true for 730 // processors with hyperthreading technology. 731 static int counter = 0; 732 int pid = os::current_process_id(); 733 alloca(((pid ^ counter++) & 7) * 128); 734 735 ThreadLocalStorage::set_thread(thread); 736 737 OSThread* osthread = thread->osthread(); 738 Monitor* sync = osthread->startThread_lock(); 739 740 // non floating stack LinuxThreads needs extra check, see above 741 if (!_thread_safety_check(thread)) { 742 // notify parent thread 743 MutexLockerEx ml(sync, Mutex::_no_safepoint_check_flag); 744 osthread->set_state(ZOMBIE); 745 sync->notify_all(); 746 return NULL; 747 } 748 749 // thread_id is kernel thread id (similar to Solaris LWP id) 750 osthread->set_thread_id(os::Linux::gettid()); 751 752 if (UseNUMA) { 753 int lgrp_id = os::numa_get_group_id(); 754 if (lgrp_id != -1) { 755 thread->set_lgrp_id(lgrp_id); 756 } 757 } 758 // initialize signal mask for this thread 759 os::Linux::hotspot_sigmask(thread); 760 761 // initialize floating point control register 762 os::Linux::init_thread_fpu_state(); 763 764 // handshaking with parent thread 765 { 766 MutexLockerEx ml(sync, Mutex::_no_safepoint_check_flag); 767 768 // notify parent thread 769 osthread->set_state(INITIALIZED); 770 sync->notify_all(); 771 772 // wait until os::start_thread() 773 while (osthread->get_state() == INITIALIZED) { 774 sync->wait(Mutex::_no_safepoint_check_flag); 775 } 776 } 777 778 // call one more level start routine 779 thread->run(); 780 781 return 0; 782} 783 784bool os::create_thread(Thread* thread, ThreadType thr_type, size_t stack_size) { 785 assert(thread->osthread() == NULL, "caller responsible"); 786 787 // Allocate the OSThread object 788 OSThread* osthread = new OSThread(NULL, NULL); 789 if (osthread == NULL) { 790 return false; 791 } 792 793 // set the correct thread state 794 osthread->set_thread_type(thr_type); 795 796 // Initial state is ALLOCATED but not INITIALIZED 797 osthread->set_state(ALLOCATED); 798 799 thread->set_osthread(osthread); 800 801 // init thread attributes 802 pthread_attr_t attr; 803 pthread_attr_init(&attr); 804 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); 805 806 // stack size 807 if (os::Linux::supports_variable_stack_size()) { 808 // calculate stack size if it's not specified by caller 809 if (stack_size == 0) { 810 stack_size = os::Linux::default_stack_size(thr_type); 811 812 switch (thr_type) { 813 case os::java_thread: 814 // Java threads use ThreadStackSize which default value can be changed with the flag -Xss 815 if (JavaThread::stack_size_at_create() > 0) stack_size = JavaThread::stack_size_at_create(); 816 break; 817 case os::compiler_thread: 818 if (CompilerThreadStackSize > 0) { 819 stack_size = (size_t)(CompilerThreadStackSize * K); 820 break; 821 } // else fall through: 822 // use VMThreadStackSize if CompilerThreadStackSize is not defined 823 case os::vm_thread: 824 case os::pgc_thread: 825 case os::cgc_thread: 826 case os::watcher_thread: 827 if (VMThreadStackSize > 0) stack_size = (size_t)(VMThreadStackSize * K); 828 break; 829 } 830 } 831 832 stack_size = MAX2(stack_size, os::Linux::min_stack_allowed); 833 pthread_attr_setstacksize(&attr, stack_size); 834 } else { 835 // let pthread_create() pick the default value. 836 } 837 838 // glibc guard page 839 pthread_attr_setguardsize(&attr, os::Linux::default_guard_size(thr_type)); 840 841 ThreadState state; 842 843 { 844 // Serialize thread creation if we are running with fixed stack LinuxThreads 845 bool lock = os::Linux::is_LinuxThreads() && !os::Linux::is_floating_stack(); 846 if (lock) { 847 os::Linux::createThread_lock()->lock_without_safepoint_check(); 848 } 849 850 pthread_t tid; 851 int ret = pthread_create(&tid, &attr, (void* (*)(void*)) java_start, thread); 852 853 pthread_attr_destroy(&attr); 854 855 if (ret != 0) { 856 if (PrintMiscellaneous && (Verbose || WizardMode)) { 857 perror("pthread_create()"); 858 } 859 // Need to clean up stuff we've allocated so far 860 thread->set_osthread(NULL); 861 delete osthread; 862 if (lock) os::Linux::createThread_lock()->unlock(); 863 return false; 864 } 865 866 // Store pthread info into the OSThread 867 osthread->set_pthread_id(tid); 868 869 // Wait until child thread is either initialized or aborted 870 { 871 Monitor* sync_with_child = osthread->startThread_lock(); 872 MutexLockerEx ml(sync_with_child, Mutex::_no_safepoint_check_flag); 873 while ((state = osthread->get_state()) == ALLOCATED) { 874 sync_with_child->wait(Mutex::_no_safepoint_check_flag); 875 } 876 } 877 878 if (lock) { 879 os::Linux::createThread_lock()->unlock(); 880 } 881 } 882 883 // Aborted due to thread limit being reached 884 if (state == ZOMBIE) { 885 thread->set_osthread(NULL); 886 delete osthread; 887 return false; 888 } 889 890 // The thread is returned suspended (in state INITIALIZED), 891 // and is started higher up in the call chain 892 assert(state == INITIALIZED, "race condition"); 893 return true; 894} 895 896///////////////////////////////////////////////////////////////////////////// 897// attach existing thread 898 899// bootstrap the main thread 900bool os::create_main_thread(JavaThread* thread) { 901 assert(os::Linux::_main_thread == pthread_self(), "should be called inside main thread"); 902 return create_attached_thread(thread); 903} 904 905bool os::create_attached_thread(JavaThread* thread) { 906#ifdef ASSERT 907 thread->verify_not_published(); 908#endif 909 910 // Allocate the OSThread object 911 OSThread* osthread = new OSThread(NULL, NULL); 912 913 if (osthread == NULL) { 914 return false; 915 } 916 917 // Store pthread info into the OSThread 918 osthread->set_thread_id(os::Linux::gettid()); 919 osthread->set_pthread_id(::pthread_self()); 920 921 // initialize floating point control register 922 os::Linux::init_thread_fpu_state(); 923 924 // Initial thread state is RUNNABLE 925 osthread->set_state(RUNNABLE); 926 927 thread->set_osthread(osthread); 928 929 if (UseNUMA) { 930 int lgrp_id = os::numa_get_group_id(); 931 if (lgrp_id != -1) { 932 thread->set_lgrp_id(lgrp_id); 933 } 934 } 935 936 if (os::Linux::is_initial_thread()) { 937 // If current thread is initial thread, its stack is mapped on demand, 938 // see notes about MAP_GROWSDOWN. Here we try to force kernel to map 939 // the entire stack region to avoid SEGV in stack banging. 940 // It is also useful to get around the heap-stack-gap problem on SuSE 941 // kernel (see 4821821 for details). We first expand stack to the top 942 // of yellow zone, then enable stack yellow zone (order is significant, 943 // enabling yellow zone first will crash JVM on SuSE Linux), so there 944 // is no gap between the last two virtual memory regions. 945 946 JavaThread *jt = (JavaThread *)thread; 947 address addr = jt->stack_yellow_zone_base(); 948 assert(addr != NULL, "initialization problem?"); 949 assert(jt->stack_available(addr) > 0, "stack guard should not be enabled"); 950 951 osthread->set_expanding_stack(); 952 os::Linux::manually_expand_stack(jt, addr); 953 osthread->clear_expanding_stack(); 954 } 955 956 // initialize signal mask for this thread 957 // and save the caller's signal mask 958 os::Linux::hotspot_sigmask(thread); 959 960 return true; 961} 962 963void os::pd_start_thread(Thread* thread) { 964 OSThread * osthread = thread->osthread(); 965 assert(osthread->get_state() != INITIALIZED, "just checking"); 966 Monitor* sync_with_child = osthread->startThread_lock(); 967 MutexLockerEx ml(sync_with_child, Mutex::_no_safepoint_check_flag); 968 sync_with_child->notify(); 969} 970 971// Free Linux resources related to the OSThread 972void os::free_thread(OSThread* osthread) { 973 assert(osthread != NULL, "osthread not set"); 974 975 if (Thread::current()->osthread() == osthread) { 976 // Restore caller's signal mask 977 sigset_t sigmask = osthread->caller_sigmask(); 978 pthread_sigmask(SIG_SETMASK, &sigmask, NULL); 979 } 980 981 delete osthread; 982} 983 984////////////////////////////////////////////////////////////////////////////// 985// thread local storage 986 987int os::allocate_thread_local_storage() { 988 pthread_key_t key; 989 int rslt = pthread_key_create(&key, NULL); 990 assert(rslt == 0, "cannot allocate thread local storage"); 991 return (int)key; 992} 993 994// Note: This is currently not used by VM, as we don't destroy TLS key 995// on VM exit. 996void os::free_thread_local_storage(int index) { 997 int rslt = pthread_key_delete((pthread_key_t)index); 998 assert(rslt == 0, "invalid index"); 999} 1000 1001void os::thread_local_storage_at_put(int index, void* value) { 1002 int rslt = pthread_setspecific((pthread_key_t)index, value); 1003 assert(rslt == 0, "pthread_setspecific failed"); 1004} 1005 1006extern "C" Thread* get_thread() { 1007 return ThreadLocalStorage::thread(); 1008} 1009 1010////////////////////////////////////////////////////////////////////////////// 1011// initial thread 1012 1013// Check if current thread is the initial thread, similar to Solaris thr_main. 1014bool os::Linux::is_initial_thread(void) { 1015 char dummy; 1016 // If called before init complete, thread stack bottom will be null. 1017 // Can be called if fatal error occurs before initialization. 1018 if (initial_thread_stack_bottom() == NULL) return false; 1019 assert(initial_thread_stack_bottom() != NULL && 1020 initial_thread_stack_size() != 0, 1021 "os::init did not locate initial thread's stack region"); 1022 if ((address)&dummy >= initial_thread_stack_bottom() && 1023 (address)&dummy < initial_thread_stack_bottom() + initial_thread_stack_size()) 1024 return true; 1025 else return false; 1026} 1027 1028// Find the virtual memory area that contains addr 1029static bool find_vma(address addr, address* vma_low, address* vma_high) { 1030 FILE *fp = fopen("/proc/self/maps", "r"); 1031 if (fp) { 1032 address low, high; 1033 while (!feof(fp)) { 1034 if (fscanf(fp, "%p-%p", &low, &high) == 2) { 1035 if (low <= addr && addr < high) { 1036 if (vma_low) *vma_low = low; 1037 if (vma_high) *vma_high = high; 1038 fclose (fp); 1039 return true; 1040 } 1041 } 1042 for (;;) { 1043 int ch = fgetc(fp); 1044 if (ch == EOF || ch == (int)'\n') break; 1045 } 1046 } 1047 fclose(fp); 1048 } 1049 return false; 1050} 1051 1052// Locate initial thread stack. This special handling of initial thread stack 1053// is needed because pthread_getattr_np() on most (all?) Linux distros returns 1054// bogus value for initial thread. 1055void os::Linux::capture_initial_stack(size_t max_size) { 1056 // stack size is the easy part, get it from RLIMIT_STACK 1057 size_t stack_size; 1058 struct rlimit rlim; 1059 getrlimit(RLIMIT_STACK, &rlim); 1060 stack_size = rlim.rlim_cur; 1061 1062 // 6308388: a bug in ld.so will relocate its own .data section to the 1063 // lower end of primordial stack; reduce ulimit -s value a little bit 1064 // so we won't install guard page on ld.so's data section. 1065 stack_size -= 2 * page_size(); 1066 1067 // 4441425: avoid crash with "unlimited" stack size on SuSE 7.1 or Redhat 1068 // 7.1, in both cases we will get 2G in return value. 1069 // 4466587: glibc 2.2.x compiled w/o "--enable-kernel=2.4.0" (RH 7.0, 1070 // SuSE 7.2, Debian) can not handle alternate signal stack correctly 1071 // for initial thread if its stack size exceeds 6M. Cap it at 2M, 1072 // in case other parts in glibc still assumes 2M max stack size. 1073 // FIXME: alt signal stack is gone, maybe we can relax this constraint? 1074#ifndef IA64 1075 if (stack_size > 2 * K * K) stack_size = 2 * K * K; 1076#else 1077 // Problem still exists RH7.2 (IA64 anyway) but 2MB is a little small 1078 if (stack_size > 4 * K * K) stack_size = 4 * K * K; 1079#endif 1080 1081 // Try to figure out where the stack base (top) is. This is harder. 1082 // 1083 // When an application is started, glibc saves the initial stack pointer in 1084 // a global variable "__libc_stack_end", which is then used by system 1085 // libraries. __libc_stack_end should be pretty close to stack top. The 1086 // variable is available since the very early days. However, because it is 1087 // a private interface, it could disappear in the future. 1088 // 1089 // Linux kernel saves start_stack information in /proc/<pid>/stat. Similar 1090 // to __libc_stack_end, it is very close to stack top, but isn't the real 1091 // stack top. Note that /proc may not exist if VM is running as a chroot 1092 // program, so reading /proc/<pid>/stat could fail. Also the contents of 1093 // /proc/<pid>/stat could change in the future (though unlikely). 1094 // 1095 // We try __libc_stack_end first. If that doesn't work, look for 1096 // /proc/<pid>/stat. If neither of them works, we use current stack pointer 1097 // as a hint, which should work well in most cases. 1098 1099 uintptr_t stack_start; 1100 1101 // try __libc_stack_end first 1102 uintptr_t *p = (uintptr_t *)dlsym(RTLD_DEFAULT, "__libc_stack_end"); 1103 if (p && *p) { 1104 stack_start = *p; 1105 } else { 1106 // see if we can get the start_stack field from /proc/self/stat 1107 FILE *fp; 1108 int pid; 1109 char state; 1110 int ppid; 1111 int pgrp; 1112 int session; 1113 int nr; 1114 int tpgrp; 1115 unsigned long flags; 1116 unsigned long minflt; 1117 unsigned long cminflt; 1118 unsigned long majflt; 1119 unsigned long cmajflt; 1120 unsigned long utime; 1121 unsigned long stime; 1122 long cutime; 1123 long cstime; 1124 long prio; 1125 long nice; 1126 long junk; 1127 long it_real; 1128 uintptr_t start; 1129 uintptr_t vsize; 1130 uintptr_t rss; 1131 unsigned long rsslim; 1132 uintptr_t scodes; 1133 uintptr_t ecode; 1134 int i; 1135 1136 // Figure what the primordial thread stack base is. Code is inspired 1137 // by email from Hans Boehm. /proc/self/stat begins with current pid, 1138 // followed by command name surrounded by parentheses, state, etc. 1139 char stat[2048]; 1140 int statlen; 1141 1142 fp = fopen("/proc/self/stat", "r"); 1143 if (fp) { 1144 statlen = fread(stat, 1, 2047, fp); 1145 stat[statlen] = '\0'; 1146 fclose(fp); 1147 1148 // Skip pid and the command string. Note that we could be dealing with 1149 // weird command names, e.g. user could decide to rename java launcher 1150 // to "java 1.4.2 :)", then the stat file would look like 1151 // 1234 (java 1.4.2 :)) R ... ... 1152 // We don't really need to know the command string, just find the last 1153 // occurrence of ")" and then start parsing from there. See bug 4726580. 1154 char * s = strrchr(stat, ')'); 1155 1156 i = 0; 1157 if (s) { 1158 // Skip blank chars 1159 do s++; while (isspace(*s)); 1160 1161 /* 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 */ 1162 /* 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 */ 1163 i = sscanf(s, "%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu", 1164 &state, /* 3 %c */ 1165 &ppid, /* 4 %d */ 1166 &pgrp, /* 5 %d */ 1167 &session, /* 6 %d */ 1168 &nr, /* 7 %d */ 1169 &tpgrp, /* 8 %d */ 1170 &flags, /* 9 %lu */ 1171 &minflt, /* 10 %lu */ 1172 &cminflt, /* 11 %lu */ 1173 &majflt, /* 12 %lu */ 1174 &cmajflt, /* 13 %lu */ 1175 &utime, /* 14 %lu */ 1176 &stime, /* 15 %lu */ 1177 &cutime, /* 16 %ld */ 1178 &cstime, /* 17 %ld */ 1179 &prio, /* 18 %ld */ 1180 &nice, /* 19 %ld */ 1181 &junk, /* 20 %ld */ 1182 &it_real, /* 21 %ld */ 1183 &start, /* 22 %lu */ 1184 &vsize, /* 23 %lu */ 1185 &rss, /* 24 %ld */ 1186 &rsslim, /* 25 %lu */ 1187 &scodes, /* 26 %lu */ 1188 &ecode, /* 27 %lu */ 1189 &stack_start); /* 28 %lu */ 1190 } 1191 1192 if (i != 28 - 2) { 1193 assert(false, "Bad conversion from /proc/self/stat"); 1194 // product mode - assume we are the initial thread, good luck in the 1195 // embedded case. 1196 warning("Can't detect initial thread stack location - bad conversion"); 1197 stack_start = (uintptr_t) &rlim; 1198 } 1199 } else { 1200 // For some reason we can't open /proc/self/stat (for example, running on 1201 // FreeBSD with a Linux emulator, or inside chroot), this should work for 1202 // most cases, so don't abort: 1203 warning("Can't detect initial thread stack location - no /proc/self/stat"); 1204 stack_start = (uintptr_t) &rlim; 1205 } 1206 } 1207 1208 // Now we have a pointer (stack_start) very close to the stack top, the 1209 // next thing to do is to figure out the exact location of stack top. We 1210 // can find out the virtual memory area that contains stack_start by 1211 // reading /proc/self/maps, it should be the last vma in /proc/self/maps, 1212 // and its upper limit is the real stack top. (again, this would fail if 1213 // running inside chroot, because /proc may not exist.) 1214 1215 uintptr_t stack_top; 1216 address low, high; 1217 if (find_vma((address)stack_start, &low, &high)) { 1218 // success, "high" is the true stack top. (ignore "low", because initial 1219 // thread stack grows on demand, its real bottom is high - RLIMIT_STACK.) 1220 stack_top = (uintptr_t)high; 1221 } else { 1222 // failed, likely because /proc/self/maps does not exist 1223 warning("Can't detect initial thread stack location - find_vma failed"); 1224 // best effort: stack_start is normally within a few pages below the real 1225 // stack top, use it as stack top, and reduce stack size so we won't put 1226 // guard page outside stack. 1227 stack_top = stack_start; 1228 stack_size -= 16 * page_size(); 1229 } 1230 1231 // stack_top could be partially down the page so align it 1232 stack_top = align_size_up(stack_top, page_size()); 1233 1234 if (max_size && stack_size > max_size) { 1235 _initial_thread_stack_size = max_size; 1236 } else { 1237 _initial_thread_stack_size = stack_size; 1238 } 1239 1240 _initial_thread_stack_size = align_size_down(_initial_thread_stack_size, page_size()); 1241 _initial_thread_stack_bottom = (address)stack_top - _initial_thread_stack_size; 1242} 1243 1244//////////////////////////////////////////////////////////////////////////////// 1245// time support 1246 1247// Time since start-up in seconds to a fine granularity. 1248// Used by VMSelfDestructTimer and the MemProfiler. 1249double os::elapsedTime() { 1250 1251 return (double)(os::elapsed_counter()) * 0.000001; 1252} 1253 1254jlong os::elapsed_counter() { 1255 timeval time; 1256 int status = gettimeofday(&time, NULL); 1257 return jlong(time.tv_sec) * 1000 * 1000 + jlong(time.tv_usec) - initial_time_count; 1258} 1259 1260jlong os::elapsed_frequency() { 1261 return (1000 * 1000); 1262} 1263 1264// For now, we say that linux does not support vtime. I have no idea 1265// whether it can actually be made to (DLD, 9/13/05). 1266 1267bool os::supports_vtime() { return false; } 1268bool os::enable_vtime() { return false; } 1269bool os::vtime_enabled() { return false; } 1270double os::elapsedVTime() { 1271 // better than nothing, but not much 1272 return elapsedTime(); 1273} 1274 1275jlong os::javaTimeMillis() { 1276 timeval time; 1277 int status = gettimeofday(&time, NULL); 1278 assert(status != -1, "linux error"); 1279 return jlong(time.tv_sec) * 1000 + jlong(time.tv_usec / 1000); 1280} 1281 1282#ifndef CLOCK_MONOTONIC 1283#define CLOCK_MONOTONIC (1) 1284#endif 1285 1286void os::Linux::clock_init() { 1287 // we do dlopen's in this particular order due to bug in linux 1288 // dynamical loader (see 6348968) leading to crash on exit 1289 void* handle = dlopen("librt.so.1", RTLD_LAZY); 1290 if (handle == NULL) { 1291 handle = dlopen("librt.so", RTLD_LAZY); 1292 } 1293 1294 if (handle) { 1295 int (*clock_getres_func)(clockid_t, struct timespec*) = 1296 (int(*)(clockid_t, struct timespec*))dlsym(handle, "clock_getres"); 1297 int (*clock_gettime_func)(clockid_t, struct timespec*) = 1298 (int(*)(clockid_t, struct timespec*))dlsym(handle, "clock_gettime"); 1299 if (clock_getres_func && clock_gettime_func) { 1300 // See if monotonic clock is supported by the kernel. Note that some 1301 // early implementations simply return kernel jiffies (updated every 1302 // 1/100 or 1/1000 second). It would be bad to use such a low res clock 1303 // for nano time (though the monotonic property is still nice to have). 1304 // It's fixed in newer kernels, however clock_getres() still returns 1305 // 1/HZ. We check if clock_getres() works, but will ignore its reported 1306 // resolution for now. Hopefully as people move to new kernels, this 1307 // won't be a problem. 1308 struct timespec res; 1309 struct timespec tp; 1310 if (clock_getres_func (CLOCK_MONOTONIC, &res) == 0 && 1311 clock_gettime_func(CLOCK_MONOTONIC, &tp) == 0) { 1312 // yes, monotonic clock is supported 1313 _clock_gettime = clock_gettime_func; 1314 } else { 1315 // close librt if there is no monotonic clock 1316 dlclose(handle); 1317 } 1318 } 1319 } 1320} 1321 1322#ifndef SYS_clock_getres 1323 1324#if defined(IA32) || defined(AMD64) 1325#define SYS_clock_getres IA32_ONLY(266) AMD64_ONLY(229) 1326#else 1327#error Value of SYS_clock_getres not known on this platform 1328#endif 1329 1330#endif 1331 1332#define sys_clock_getres(x,y) ::syscall(SYS_clock_getres, x, y) 1333 1334void os::Linux::fast_thread_clock_init() { 1335 if (!UseLinuxPosixThreadCPUClocks) { 1336 return; 1337 } 1338 clockid_t clockid; 1339 struct timespec tp; 1340 int (*pthread_getcpuclockid_func)(pthread_t, clockid_t *) = 1341 (int(*)(pthread_t, clockid_t *)) dlsym(RTLD_DEFAULT, "pthread_getcpuclockid"); 1342 1343 // Switch to using fast clocks for thread cpu time if 1344 // the sys_clock_getres() returns 0 error code. 1345 // Note, that some kernels may support the current thread 1346 // clock (CLOCK_THREAD_CPUTIME_ID) but not the clocks 1347 // returned by the pthread_getcpuclockid(). 1348 // If the fast Posix clocks are supported then the sys_clock_getres() 1349 // must return at least tp.tv_sec == 0 which means a resolution 1350 // better than 1 sec. This is extra check for reliability. 1351 1352 if(pthread_getcpuclockid_func && 1353 pthread_getcpuclockid_func(_main_thread, &clockid) == 0 && 1354 sys_clock_getres(clockid, &tp) == 0 && tp.tv_sec == 0) { 1355 1356 _supports_fast_thread_cpu_time = true; 1357 _pthread_getcpuclockid = pthread_getcpuclockid_func; 1358 } 1359} 1360 1361jlong os::javaTimeNanos() { 1362 if (Linux::supports_monotonic_clock()) { 1363 struct timespec tp; 1364 int status = Linux::clock_gettime(CLOCK_MONOTONIC, &tp); 1365 assert(status == 0, "gettime error"); 1366 jlong result = jlong(tp.tv_sec) * (1000 * 1000 * 1000) + jlong(tp.tv_nsec); 1367 return result; 1368 } else { 1369 timeval time; 1370 int status = gettimeofday(&time, NULL); 1371 assert(status != -1, "linux error"); 1372 jlong usecs = jlong(time.tv_sec) * (1000 * 1000) + jlong(time.tv_usec); 1373 return 1000 * usecs; 1374 } 1375} 1376 1377void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) { 1378 if (Linux::supports_monotonic_clock()) { 1379 info_ptr->max_value = ALL_64_BITS; 1380 1381 // CLOCK_MONOTONIC - amount of time since some arbitrary point in the past 1382 info_ptr->may_skip_backward = false; // not subject to resetting or drifting 1383 info_ptr->may_skip_forward = false; // not subject to resetting or drifting 1384 } else { 1385 // gettimeofday - based on time in seconds since the Epoch thus does not wrap 1386 info_ptr->max_value = ALL_64_BITS; 1387 1388 // gettimeofday is a real time clock so it skips 1389 info_ptr->may_skip_backward = true; 1390 info_ptr->may_skip_forward = true; 1391 } 1392 1393 info_ptr->kind = JVMTI_TIMER_ELAPSED; // elapsed not CPU time 1394} 1395 1396// Return the real, user, and system times in seconds from an 1397// arbitrary fixed point in the past. 1398bool os::getTimesSecs(double* process_real_time, 1399 double* process_user_time, 1400 double* process_system_time) { 1401 struct tms ticks; 1402 clock_t real_ticks = times(&ticks); 1403 1404 if (real_ticks == (clock_t) (-1)) { 1405 return false; 1406 } else { 1407 double ticks_per_second = (double) clock_tics_per_sec; 1408 *process_user_time = ((double) ticks.tms_utime) / ticks_per_second; 1409 *process_system_time = ((double) ticks.tms_stime) / ticks_per_second; 1410 *process_real_time = ((double) real_ticks) / ticks_per_second; 1411 1412 return true; 1413 } 1414} 1415 1416 1417char * os::local_time_string(char *buf, size_t buflen) { 1418 struct tm t; 1419 time_t long_time; 1420 time(&long_time); 1421 localtime_r(&long_time, &t); 1422 jio_snprintf(buf, buflen, "%d-%02d-%02d %02d:%02d:%02d", 1423 t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, 1424 t.tm_hour, t.tm_min, t.tm_sec); 1425 return buf; 1426} 1427 1428//////////////////////////////////////////////////////////////////////////////// 1429// runtime exit support 1430 1431// Note: os::shutdown() might be called very early during initialization, or 1432// called from signal handler. Before adding something to os::shutdown(), make 1433// sure it is async-safe and can handle partially initialized VM. 1434void os::shutdown() { 1435 1436 // allow PerfMemory to attempt cleanup of any persistent resources 1437 perfMemory_exit(); 1438 1439 // needs to remove object in file system 1440 AttachListener::abort(); 1441 1442 // flush buffered output, finish log files 1443 ostream_abort(); 1444 1445 // Check for abort hook 1446 abort_hook_t abort_hook = Arguments::abort_hook(); 1447 if (abort_hook != NULL) { 1448 abort_hook(); 1449 } 1450 1451} 1452 1453// Note: os::abort() might be called very early during initialization, or 1454// called from signal handler. Before adding something to os::abort(), make 1455// sure it is async-safe and can handle partially initialized VM. 1456void os::abort(bool dump_core) { 1457 os::shutdown(); 1458 if (dump_core) { 1459#ifndef PRODUCT 1460 fdStream out(defaultStream::output_fd()); 1461 out.print_raw("Current thread is "); 1462 char buf[16]; 1463 jio_snprintf(buf, sizeof(buf), UINTX_FORMAT, os::current_thread_id()); 1464 out.print_raw_cr(buf); 1465 out.print_raw_cr("Dumping core ..."); 1466#endif 1467 ::abort(); // dump core 1468 } 1469 1470 ::exit(1); 1471} 1472 1473// Die immediately, no exit hook, no abort hook, no cleanup. 1474void os::die() { 1475 // _exit() on LinuxThreads only kills current thread 1476 ::abort(); 1477} 1478 1479// unused on linux for now. 1480void os::set_error_file(const char *logfile) {} 1481 1482intx os::current_thread_id() { return (intx)pthread_self(); } 1483int os::current_process_id() { 1484 1485 // Under the old linux thread library, linux gives each thread 1486 // its own process id. Because of this each thread will return 1487 // a different pid if this method were to return the result 1488 // of getpid(2). Linux provides no api that returns the pid 1489 // of the launcher thread for the vm. This implementation 1490 // returns a unique pid, the pid of the launcher thread 1491 // that starts the vm 'process'. 1492 1493 // Under the NPTL, getpid() returns the same pid as the 1494 // launcher thread rather than a unique pid per thread. 1495 // Use gettid() if you want the old pre NPTL behaviour. 1496 1497 // if you are looking for the result of a call to getpid() that 1498 // returns a unique pid for the calling thread, then look at the 1499 // OSThread::thread_id() method in osThread_linux.hpp file 1500 1501 return (int)(_initial_pid ? _initial_pid : getpid()); 1502} 1503 1504// DLL functions 1505 1506const char* os::dll_file_extension() { return ".so"; } 1507 1508const char* os::get_temp_directory() { return "/tmp/"; } 1509 1510const char* os::get_current_directory(char *buf, int buflen) { 1511 return getcwd(buf, buflen); 1512} 1513 1514// check if addr is inside libjvm[_g].so 1515bool os::address_is_in_vm(address addr) { 1516 static address libjvm_base_addr; 1517 Dl_info dlinfo; 1518 1519 if (libjvm_base_addr == NULL) { 1520 dladdr(CAST_FROM_FN_PTR(void *, os::address_is_in_vm), &dlinfo); 1521 libjvm_base_addr = (address)dlinfo.dli_fbase; 1522 assert(libjvm_base_addr !=NULL, "Cannot obtain base address for libjvm"); 1523 } 1524 1525 if (dladdr((void *)addr, &dlinfo)) { 1526 if (libjvm_base_addr == (address)dlinfo.dli_fbase) return true; 1527 } 1528 1529 return false; 1530} 1531 1532bool os::dll_address_to_function_name(address addr, char *buf, 1533 int buflen, int *offset) { 1534 Dl_info dlinfo; 1535 1536 if (dladdr((void*)addr, &dlinfo) && dlinfo.dli_sname != NULL) { 1537 if (buf) jio_snprintf(buf, buflen, "%s", dlinfo.dli_sname); 1538 if (offset) *offset = addr - (address)dlinfo.dli_saddr; 1539 return true; 1540 } else { 1541 if (buf) buf[0] = '\0'; 1542 if (offset) *offset = -1; 1543 return false; 1544 } 1545} 1546 1547struct _address_to_library_name { 1548 address addr; // input : memory address 1549 size_t buflen; // size of fname 1550 char* fname; // output: library name 1551 address base; // library base addr 1552}; 1553 1554static int address_to_library_name_callback(struct dl_phdr_info *info, 1555 size_t size, void *data) { 1556 int i; 1557 bool found = false; 1558 address libbase = NULL; 1559 struct _address_to_library_name * d = (struct _address_to_library_name *)data; 1560 1561 // iterate through all loadable segments 1562 for (i = 0; i < info->dlpi_phnum; i++) { 1563 address segbase = (address)(info->dlpi_addr + info->dlpi_phdr[i].p_vaddr); 1564 if (info->dlpi_phdr[i].p_type == PT_LOAD) { 1565 // base address of a library is the lowest address of its loaded 1566 // segments. 1567 if (libbase == NULL || libbase > segbase) { 1568 libbase = segbase; 1569 } 1570 // see if 'addr' is within current segment 1571 if (segbase <= d->addr && 1572 d->addr < segbase + info->dlpi_phdr[i].p_memsz) { 1573 found = true; 1574 } 1575 } 1576 } 1577 1578 // dlpi_name is NULL or empty if the ELF file is executable, return 0 1579 // so dll_address_to_library_name() can fall through to use dladdr() which 1580 // can figure out executable name from argv[0]. 1581 if (found && info->dlpi_name && info->dlpi_name[0]) { 1582 d->base = libbase; 1583 if (d->fname) { 1584 jio_snprintf(d->fname, d->buflen, "%s", info->dlpi_name); 1585 } 1586 return 1; 1587 } 1588 return 0; 1589} 1590 1591bool os::dll_address_to_library_name(address addr, char* buf, 1592 int buflen, int* offset) { 1593 Dl_info dlinfo; 1594 struct _address_to_library_name data; 1595 1596 // There is a bug in old glibc dladdr() implementation that it could resolve 1597 // to wrong library name if the .so file has a base address != NULL. Here 1598 // we iterate through the program headers of all loaded libraries to find 1599 // out which library 'addr' really belongs to. This workaround can be 1600 // removed once the minimum requirement for glibc is moved to 2.3.x. 1601 data.addr = addr; 1602 data.fname = buf; 1603 data.buflen = buflen; 1604 data.base = NULL; 1605 int rslt = dl_iterate_phdr(address_to_library_name_callback, (void *)&data); 1606 1607 if (rslt) { 1608 // buf already contains library name 1609 if (offset) *offset = addr - data.base; 1610 return true; 1611 } else if (dladdr((void*)addr, &dlinfo)){ 1612 if (buf) jio_snprintf(buf, buflen, "%s", dlinfo.dli_fname); 1613 if (offset) *offset = addr - (address)dlinfo.dli_fbase; 1614 return true; 1615 } else { 1616 if (buf) buf[0] = '\0'; 1617 if (offset) *offset = -1; 1618 return false; 1619 } 1620} 1621 1622 // Loads .dll/.so and 1623 // in case of error it checks if .dll/.so was built for the 1624 // same architecture as Hotspot is running on 1625 1626void * os::dll_load(const char *filename, char *ebuf, int ebuflen) 1627{ 1628 void * result= ::dlopen(filename, RTLD_LAZY); 1629 if (result != NULL) { 1630 // Successful loading 1631 return result; 1632 } 1633 1634 Elf32_Ehdr elf_head; 1635 1636 // Read system error message into ebuf 1637 // It may or may not be overwritten below 1638 ::strncpy(ebuf, ::dlerror(), ebuflen-1); 1639 ebuf[ebuflen-1]='\0'; 1640 int diag_msg_max_length=ebuflen-strlen(ebuf); 1641 char* diag_msg_buf=ebuf+strlen(ebuf); 1642 1643 if (diag_msg_max_length==0) { 1644 // No more space in ebuf for additional diagnostics message 1645 return NULL; 1646 } 1647 1648 1649 int file_descriptor= ::open(filename, O_RDONLY | O_NONBLOCK); 1650 1651 if (file_descriptor < 0) { 1652 // Can't open library, report dlerror() message 1653 return NULL; 1654 } 1655 1656 bool failed_to_read_elf_head= 1657 (sizeof(elf_head)!= 1658 (::read(file_descriptor, &elf_head,sizeof(elf_head)))) ; 1659 1660 ::close(file_descriptor); 1661 if (failed_to_read_elf_head) { 1662 // file i/o error - report dlerror() msg 1663 return NULL; 1664 } 1665 1666 typedef struct { 1667 Elf32_Half code; // Actual value as defined in elf.h 1668 Elf32_Half compat_class; // Compatibility of archs at VM's sense 1669 char elf_class; // 32 or 64 bit 1670 char endianess; // MSB or LSB 1671 char* name; // String representation 1672 } arch_t; 1673 1674 #ifndef EM_486 1675 #define EM_486 6 /* Intel 80486 */ 1676 #endif 1677 1678 static const arch_t arch_array[]={ 1679 {EM_386, EM_386, ELFCLASS32, ELFDATA2LSB, (char*)"IA 32"}, 1680 {EM_486, EM_386, ELFCLASS32, ELFDATA2LSB, (char*)"IA 32"}, 1681 {EM_IA_64, EM_IA_64, ELFCLASS64, ELFDATA2LSB, (char*)"IA 64"}, 1682 {EM_X86_64, EM_X86_64, ELFCLASS64, ELFDATA2LSB, (char*)"AMD 64"}, 1683 {EM_SPARC, EM_SPARC, ELFCLASS32, ELFDATA2MSB, (char*)"Sparc 32"}, 1684 {EM_SPARC32PLUS, EM_SPARC, ELFCLASS32, ELFDATA2MSB, (char*)"Sparc 32"}, 1685 {EM_SPARCV9, EM_SPARCV9, ELFCLASS64, ELFDATA2MSB, (char*)"Sparc v9 64"}, 1686 {EM_PPC, EM_PPC, ELFCLASS32, ELFDATA2MSB, (char*)"Power PC 32"}, 1687 {EM_PPC64, EM_PPC64, ELFCLASS64, ELFDATA2MSB, (char*)"Power PC 64"} 1688 }; 1689 1690 #if (defined IA32) 1691 static Elf32_Half running_arch_code=EM_386; 1692 #elif (defined AMD64) 1693 static Elf32_Half running_arch_code=EM_X86_64; 1694 #elif (defined IA64) 1695 static Elf32_Half running_arch_code=EM_IA_64; 1696 #elif (defined __sparc) && (defined _LP64) 1697 static Elf32_Half running_arch_code=EM_SPARCV9; 1698 #elif (defined __sparc) && (!defined _LP64) 1699 static Elf32_Half running_arch_code=EM_SPARC; 1700 #elif (defined __powerpc64__) 1701 static Elf32_Half running_arch_code=EM_PPC64; 1702 #elif (defined __powerpc__) 1703 static Elf32_Half running_arch_code=EM_PPC; 1704 #else 1705 #error Method os::dll_load requires that one of following is defined:\ 1706 IA32, AMD64, IA64, __sparc, __powerpc__ 1707 #endif 1708 1709 // Identify compatability class for VM's architecture and library's architecture 1710 // Obtain string descriptions for architectures 1711 1712 arch_t lib_arch={elf_head.e_machine,0,elf_head.e_ident[EI_CLASS], elf_head.e_ident[EI_DATA], NULL}; 1713 int running_arch_index=-1; 1714 1715 for (unsigned int i=0 ; i < ARRAY_SIZE(arch_array) ; i++ ) { 1716 if (running_arch_code == arch_array[i].code) { 1717 running_arch_index = i; 1718 } 1719 if (lib_arch.code == arch_array[i].code) { 1720 lib_arch.compat_class = arch_array[i].compat_class; 1721 lib_arch.name = arch_array[i].name; 1722 } 1723 } 1724 1725 assert(running_arch_index != -1, 1726 "Didn't find running architecture code (running_arch_code) in arch_array"); 1727 if (running_arch_index == -1) { 1728 // Even though running architecture detection failed 1729 // we may still continue with reporting dlerror() message 1730 return NULL; 1731 } 1732 1733 if (lib_arch.endianess != arch_array[running_arch_index].endianess) { 1734 ::snprintf(diag_msg_buf, diag_msg_max_length-1," (Possible cause: endianness mismatch)"); 1735 return NULL; 1736 } 1737 1738 if (lib_arch.elf_class != arch_array[running_arch_index].elf_class) { 1739 ::snprintf(diag_msg_buf, diag_msg_max_length-1," (Possible cause: architecture word width mismatch)"); 1740 return NULL; 1741 } 1742 1743 if (lib_arch.compat_class != arch_array[running_arch_index].compat_class) { 1744 if ( lib_arch.name!=NULL ) { 1745 ::snprintf(diag_msg_buf, diag_msg_max_length-1, 1746 " (Possible cause: can't load %s-bit .so on a %s-bit platform)", 1747 lib_arch.name, arch_array[running_arch_index].name); 1748 } else { 1749 ::snprintf(diag_msg_buf, diag_msg_max_length-1, 1750 " (Possible cause: can't load this .so (machine code=0x%x) on a %s-bit platform)", 1751 lib_arch.code, 1752 arch_array[running_arch_index].name); 1753 } 1754 } 1755 1756 return NULL; 1757} 1758 1759 1760 1761 1762bool _print_ascii_file(const char* filename, outputStream* st) { 1763 int fd = open(filename, O_RDONLY); 1764 if (fd == -1) { 1765 return false; 1766 } 1767 1768 char buf[32]; 1769 int bytes; 1770 while ((bytes = read(fd, buf, sizeof(buf))) > 0) { 1771 st->print_raw(buf, bytes); 1772 } 1773 1774 close(fd); 1775 1776 return true; 1777} 1778 1779void os::print_dll_info(outputStream *st) { 1780 st->print_cr("Dynamic libraries:"); 1781 1782 char fname[32]; 1783 pid_t pid = os::Linux::gettid(); 1784 1785 jio_snprintf(fname, sizeof(fname), "/proc/%d/maps", pid); 1786 1787 if (!_print_ascii_file(fname, st)) { 1788 st->print("Can not get library information for pid = %d\n", pid); 1789 } 1790} 1791 1792 1793void os::print_os_info(outputStream* st) { 1794 st->print("OS:"); 1795 1796 // Try to identify popular distros. 1797 // Most Linux distributions have /etc/XXX-release file, which contains 1798 // the OS version string. Some have more than one /etc/XXX-release file 1799 // (e.g. Mandrake has both /etc/mandrake-release and /etc/redhat-release.), 1800 // so the order is important. 1801 if (!_print_ascii_file("/etc/mandrake-release", st) && 1802 !_print_ascii_file("/etc/sun-release", st) && 1803 !_print_ascii_file("/etc/redhat-release", st) && 1804 !_print_ascii_file("/etc/SuSE-release", st) && 1805 !_print_ascii_file("/etc/turbolinux-release", st) && 1806 !_print_ascii_file("/etc/gentoo-release", st) && 1807 !_print_ascii_file("/etc/debian_version", st)) { 1808 st->print("Linux"); 1809 } 1810 st->cr(); 1811 1812 // kernel 1813 st->print("uname:"); 1814 struct utsname name; 1815 uname(&name); 1816 st->print(name.sysname); st->print(" "); 1817 st->print(name.release); st->print(" "); 1818 st->print(name.version); st->print(" "); 1819 st->print(name.machine); 1820 st->cr(); 1821 1822 // Print warning if unsafe chroot environment detected 1823 if (unsafe_chroot_detected) { 1824 st->print("WARNING!! "); 1825 st->print_cr(unstable_chroot_error); 1826 } 1827 1828 // libc, pthread 1829 st->print("libc:"); 1830 st->print(os::Linux::glibc_version()); st->print(" "); 1831 st->print(os::Linux::libpthread_version()); st->print(" "); 1832 if (os::Linux::is_LinuxThreads()) { 1833 st->print("(%s stack)", os::Linux::is_floating_stack() ? "floating" : "fixed"); 1834 } 1835 st->cr(); 1836 1837 // rlimit 1838 st->print("rlimit:"); 1839 struct rlimit rlim; 1840 1841 st->print(" STACK "); 1842 getrlimit(RLIMIT_STACK, &rlim); 1843 if (rlim.rlim_cur == RLIM_INFINITY) st->print("infinity"); 1844 else st->print("%uk", rlim.rlim_cur >> 10); 1845 1846 st->print(", CORE "); 1847 getrlimit(RLIMIT_CORE, &rlim); 1848 if (rlim.rlim_cur == RLIM_INFINITY) st->print("infinity"); 1849 else st->print("%uk", rlim.rlim_cur >> 10); 1850 1851 st->print(", NPROC "); 1852 getrlimit(RLIMIT_NPROC, &rlim); 1853 if (rlim.rlim_cur == RLIM_INFINITY) st->print("infinity"); 1854 else st->print("%d", rlim.rlim_cur); 1855 1856 st->print(", NOFILE "); 1857 getrlimit(RLIMIT_NOFILE, &rlim); 1858 if (rlim.rlim_cur == RLIM_INFINITY) st->print("infinity"); 1859 else st->print("%d", rlim.rlim_cur); 1860 1861 st->print(", AS "); 1862 getrlimit(RLIMIT_AS, &rlim); 1863 if (rlim.rlim_cur == RLIM_INFINITY) st->print("infinity"); 1864 else st->print("%uk", rlim.rlim_cur >> 10); 1865 st->cr(); 1866 1867 // load average 1868 st->print("load average:"); 1869 double loadavg[3]; 1870 os::loadavg(loadavg, 3); 1871 st->print("%0.02f %0.02f %0.02f", loadavg[0], loadavg[1], loadavg[2]); 1872 st->cr(); 1873} 1874 1875void os::print_memory_info(outputStream* st) { 1876 1877 st->print("Memory:"); 1878 st->print(" %dk page", os::vm_page_size()>>10); 1879 1880 // values in struct sysinfo are "unsigned long" 1881 struct sysinfo si; 1882 sysinfo(&si); 1883 1884 st->print(", physical " UINT64_FORMAT "k", 1885 os::physical_memory() >> 10); 1886 st->print("(" UINT64_FORMAT "k free)", 1887 os::available_memory() >> 10); 1888 st->print(", swap " UINT64_FORMAT "k", 1889 ((jlong)si.totalswap * si.mem_unit) >> 10); 1890 st->print("(" UINT64_FORMAT "k free)", 1891 ((jlong)si.freeswap * si.mem_unit) >> 10); 1892 st->cr(); 1893} 1894 1895// Taken from /usr/include/bits/siginfo.h Supposed to be architecture specific 1896// but they're the same for all the linux arch that we support 1897// and they're the same for solaris but there's no common place to put this. 1898const char *ill_names[] = { "ILL0", "ILL_ILLOPC", "ILL_ILLOPN", "ILL_ILLADR", 1899 "ILL_ILLTRP", "ILL_PRVOPC", "ILL_PRVREG", 1900 "ILL_COPROC", "ILL_BADSTK" }; 1901 1902const char *fpe_names[] = { "FPE0", "FPE_INTDIV", "FPE_INTOVF", "FPE_FLTDIV", 1903 "FPE_FLTOVF", "FPE_FLTUND", "FPE_FLTRES", 1904 "FPE_FLTINV", "FPE_FLTSUB", "FPE_FLTDEN" }; 1905 1906const char *segv_names[] = { "SEGV0", "SEGV_MAPERR", "SEGV_ACCERR" }; 1907 1908const char *bus_names[] = { "BUS0", "BUS_ADRALN", "BUS_ADRERR", "BUS_OBJERR" }; 1909 1910void os::print_siginfo(outputStream* st, void* siginfo) { 1911 st->print("siginfo:"); 1912 1913 const int buflen = 100; 1914 char buf[buflen]; 1915 siginfo_t *si = (siginfo_t*)siginfo; 1916 st->print("si_signo=%s: ", os::exception_name(si->si_signo, buf, buflen)); 1917 if (si->si_errno != 0 && strerror_r(si->si_errno, buf, buflen) == 0) { 1918 st->print("si_errno=%s", buf); 1919 } else { 1920 st->print("si_errno=%d", si->si_errno); 1921 } 1922 const int c = si->si_code; 1923 assert(c > 0, "unexpected si_code"); 1924 switch (si->si_signo) { 1925 case SIGILL: 1926 st->print(", si_code=%d (%s)", c, c > 8 ? "" : ill_names[c]); 1927 st->print(", si_addr=" PTR_FORMAT, si->si_addr); 1928 break; 1929 case SIGFPE: 1930 st->print(", si_code=%d (%s)", c, c > 9 ? "" : fpe_names[c]); 1931 st->print(", si_addr=" PTR_FORMAT, si->si_addr); 1932 break; 1933 case SIGSEGV: 1934 st->print(", si_code=%d (%s)", c, c > 2 ? "" : segv_names[c]); 1935 st->print(", si_addr=" PTR_FORMAT, si->si_addr); 1936 break; 1937 case SIGBUS: 1938 st->print(", si_code=%d (%s)", c, c > 3 ? "" : bus_names[c]); 1939 st->print(", si_addr=" PTR_FORMAT, si->si_addr); 1940 break; 1941 default: 1942 st->print(", si_code=%d", si->si_code); 1943 // no si_addr 1944 } 1945 1946 if ((si->si_signo == SIGBUS || si->si_signo == SIGSEGV) && 1947 UseSharedSpaces) { 1948 FileMapInfo* mapinfo = FileMapInfo::current_info(); 1949 if (mapinfo->is_in_shared_space(si->si_addr)) { 1950 st->print("\n\nError accessing class data sharing archive." \ 1951 " Mapped file inaccessible during execution, " \ 1952 " possible disk/network problem."); 1953 } 1954 } 1955 st->cr(); 1956} 1957 1958 1959static void print_signal_handler(outputStream* st, int sig, 1960 char* buf, size_t buflen); 1961 1962void os::print_signal_handlers(outputStream* st, char* buf, size_t buflen) { 1963 st->print_cr("Signal Handlers:"); 1964 print_signal_handler(st, SIGSEGV, buf, buflen); 1965 print_signal_handler(st, SIGBUS , buf, buflen); 1966 print_signal_handler(st, SIGFPE , buf, buflen); 1967 print_signal_handler(st, SIGPIPE, buf, buflen); 1968 print_signal_handler(st, SIGXFSZ, buf, buflen); 1969 print_signal_handler(st, SIGILL , buf, buflen); 1970 print_signal_handler(st, INTERRUPT_SIGNAL, buf, buflen); 1971 print_signal_handler(st, SR_signum, buf, buflen); 1972 print_signal_handler(st, SHUTDOWN1_SIGNAL, buf, buflen); 1973 print_signal_handler(st, SHUTDOWN2_SIGNAL , buf, buflen); 1974 print_signal_handler(st, SHUTDOWN3_SIGNAL , buf, buflen); 1975 print_signal_handler(st, BREAK_SIGNAL, buf, buflen); 1976} 1977 1978static char saved_jvm_path[MAXPATHLEN] = {0}; 1979 1980// Find the full path to the current module, libjvm.so or libjvm_g.so 1981void os::jvm_path(char *buf, jint len) { 1982 // Error checking. 1983 if (len < MAXPATHLEN) { 1984 assert(false, "must use a large-enough buffer"); 1985 buf[0] = '\0'; 1986 return; 1987 } 1988 // Lazy resolve the path to current module. 1989 if (saved_jvm_path[0] != 0) { 1990 strcpy(buf, saved_jvm_path); 1991 return; 1992 } 1993 1994 char dli_fname[MAXPATHLEN]; 1995 bool ret = dll_address_to_library_name( 1996 CAST_FROM_FN_PTR(address, os::jvm_path), 1997 dli_fname, sizeof(dli_fname), NULL); 1998 assert(ret != 0, "cannot locate libjvm"); 1999 realpath(dli_fname, buf); 2000 2001 if (strcmp(Arguments::sun_java_launcher(), "gamma") == 0) { 2002 // Support for the gamma launcher. Typical value for buf is 2003 // "<JAVA_HOME>/jre/lib/<arch>/<vmtype>/libjvm.so". If "/jre/lib/" appears at 2004 // the right place in the string, then assume we are installed in a JDK and 2005 // we're done. Otherwise, check for a JAVA_HOME environment variable and fix 2006 // up the path so it looks like libjvm.so is installed there (append a 2007 // fake suffix hotspot/libjvm.so). 2008 const char *p = buf + strlen(buf) - 1; 2009 for (int count = 0; p > buf && count < 5; ++count) { 2010 for (--p; p > buf && *p != '/'; --p) 2011 /* empty */ ; 2012 } 2013 2014 if (strncmp(p, "/jre/lib/", 9) != 0) { 2015 // Look for JAVA_HOME in the environment. 2016 char* java_home_var = ::getenv("JAVA_HOME"); 2017 if (java_home_var != NULL && java_home_var[0] != 0) { 2018 // Check the current module name "libjvm.so" or "libjvm_g.so". 2019 p = strrchr(buf, '/'); 2020 assert(strstr(p, "/libjvm") == p, "invalid library name"); 2021 p = strstr(p, "_g") ? "_g" : ""; 2022 2023 realpath(java_home_var, buf); 2024 sprintf(buf + strlen(buf), "/jre/lib/%s", cpu_arch); 2025 if (0 == access(buf, F_OK)) { 2026 // Use current module name "libjvm[_g].so" instead of 2027 // "libjvm"debug_only("_g")".so" since for fastdebug version 2028 // we should have "libjvm.so" but debug_only("_g") adds "_g"! 2029 // It is used when we are choosing the HPI library's name 2030 // "libhpi[_g].so" in hpi::initialize_get_interface(). 2031 sprintf(buf + strlen(buf), "/hotspot/libjvm%s.so", p); 2032 } else { 2033 // Go back to path of .so 2034 realpath(dli_fname, buf); 2035 } 2036 } 2037 } 2038 } 2039 2040 strcpy(saved_jvm_path, buf); 2041} 2042 2043void os::print_jni_name_prefix_on(outputStream* st, int args_size) { 2044 // no prefix required, not even "_" 2045} 2046 2047void os::print_jni_name_suffix_on(outputStream* st, int args_size) { 2048 // no suffix required 2049} 2050 2051//////////////////////////////////////////////////////////////////////////////// 2052// sun.misc.Signal support 2053 2054static volatile jint sigint_count = 0; 2055 2056static void 2057UserHandler(int sig, void *siginfo, void *context) { 2058 // 4511530 - sem_post is serialized and handled by the manager thread. When 2059 // the program is interrupted by Ctrl-C, SIGINT is sent to every thread. We 2060 // don't want to flood the manager thread with sem_post requests. 2061 if (sig == SIGINT && Atomic::add(1, &sigint_count) > 1) 2062 return; 2063 2064 // Ctrl-C is pressed during error reporting, likely because the error 2065 // handler fails to abort. Let VM die immediately. 2066 if (sig == SIGINT && is_error_reported()) { 2067 os::die(); 2068 } 2069 2070 os::signal_notify(sig); 2071} 2072 2073void* os::user_handler() { 2074 return CAST_FROM_FN_PTR(void*, UserHandler); 2075} 2076 2077extern "C" { 2078 typedef void (*sa_handler_t)(int); 2079 typedef void (*sa_sigaction_t)(int, siginfo_t *, void *); 2080} 2081 2082void* os::signal(int signal_number, void* handler) { 2083 struct sigaction sigAct, oldSigAct; 2084 2085 sigfillset(&(sigAct.sa_mask)); 2086 sigAct.sa_flags = SA_RESTART|SA_SIGINFO; 2087 sigAct.sa_handler = CAST_TO_FN_PTR(sa_handler_t, handler); 2088 2089 if (sigaction(signal_number, &sigAct, &oldSigAct)) { 2090 // -1 means registration failed 2091 return (void *)-1; 2092 } 2093 2094 return CAST_FROM_FN_PTR(void*, oldSigAct.sa_handler); 2095} 2096 2097void os::signal_raise(int signal_number) { 2098 ::raise(signal_number); 2099} 2100 2101/* 2102 * The following code is moved from os.cpp for making this 2103 * code platform specific, which it is by its very nature. 2104 */ 2105 2106// Will be modified when max signal is changed to be dynamic 2107int os::sigexitnum_pd() { 2108 return NSIG; 2109} 2110 2111// a counter for each possible signal value 2112static volatile jint pending_signals[NSIG+1] = { 0 }; 2113 2114// Linux(POSIX) specific hand shaking semaphore. 2115static sem_t sig_sem; 2116 2117void os::signal_init_pd() { 2118 // Initialize signal structures 2119 ::memset((void*)pending_signals, 0, sizeof(pending_signals)); 2120 2121 // Initialize signal semaphore 2122 ::sem_init(&sig_sem, 0, 0); 2123} 2124 2125void os::signal_notify(int sig) { 2126 Atomic::inc(&pending_signals[sig]); 2127 ::sem_post(&sig_sem); 2128} 2129 2130static int check_pending_signals(bool wait) { 2131 Atomic::store(0, &sigint_count); 2132 for (;;) { 2133 for (int i = 0; i < NSIG + 1; i++) { 2134 jint n = pending_signals[i]; 2135 if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) { 2136 return i; 2137 } 2138 } 2139 if (!wait) { 2140 return -1; 2141 } 2142 JavaThread *thread = JavaThread::current(); 2143 ThreadBlockInVM tbivm(thread); 2144 2145 bool threadIsSuspended; 2146 do { 2147 thread->set_suspend_equivalent(); 2148 // cleared by handle_special_suspend_equivalent_condition() or java_suspend_self() 2149 ::sem_wait(&sig_sem); 2150 2151 // were we externally suspended while we were waiting? 2152 threadIsSuspended = thread->handle_special_suspend_equivalent_condition(); 2153 if (threadIsSuspended) { 2154 // 2155 // The semaphore has been incremented, but while we were waiting 2156 // another thread suspended us. We don't want to continue running 2157 // while suspended because that would surprise the thread that 2158 // suspended us. 2159 // 2160 ::sem_post(&sig_sem); 2161 2162 thread->java_suspend_self(); 2163 } 2164 } while (threadIsSuspended); 2165 } 2166} 2167 2168int os::signal_lookup() { 2169 return check_pending_signals(false); 2170} 2171 2172int os::signal_wait() { 2173 return check_pending_signals(true); 2174} 2175 2176//////////////////////////////////////////////////////////////////////////////// 2177// Virtual Memory 2178 2179int os::vm_page_size() { 2180 // Seems redundant as all get out 2181 assert(os::Linux::page_size() != -1, "must call os::init"); 2182 return os::Linux::page_size(); 2183} 2184 2185// Solaris allocates memory by pages. 2186int os::vm_allocation_granularity() { 2187 assert(os::Linux::page_size() != -1, "must call os::init"); 2188 return os::Linux::page_size(); 2189} 2190 2191// Rationale behind this function: 2192// current (Mon Apr 25 20:12:18 MSD 2005) oprofile drops samples without executable 2193// mapping for address (see lookup_dcookie() in the kernel module), thus we cannot get 2194// samples for JITted code. Here we create private executable mapping over the code cache 2195// and then we can use standard (well, almost, as mapping can change) way to provide 2196// info for the reporting script by storing timestamp and location of symbol 2197void linux_wrap_code(char* base, size_t size) { 2198 static volatile jint cnt = 0; 2199 2200 if (!UseOprofile) { 2201 return; 2202 } 2203 2204 char buf[40]; 2205 int num = Atomic::add(1, &cnt); 2206 2207 sprintf(buf, "/tmp/hs-vm-%d-%d", os::current_process_id(), num); 2208 unlink(buf); 2209 2210 int fd = open(buf, O_CREAT | O_RDWR, S_IRWXU); 2211 2212 if (fd != -1) { 2213 off_t rv = lseek(fd, size-2, SEEK_SET); 2214 if (rv != (off_t)-1) { 2215 if (write(fd, "", 1) == 1) { 2216 mmap(base, size, 2217 PROT_READ|PROT_WRITE|PROT_EXEC, 2218 MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE, fd, 0); 2219 } 2220 } 2221 close(fd); 2222 unlink(buf); 2223 } 2224} 2225 2226// NOTE: Linux kernel does not really reserve the pages for us. 2227// All it does is to check if there are enough free pages 2228// left at the time of mmap(). This could be a potential 2229// problem. 2230bool os::commit_memory(char* addr, size_t size) { 2231 uintptr_t res = (uintptr_t) ::mmap(addr, size, 2232 PROT_READ|PROT_WRITE|PROT_EXEC, 2233 MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0); 2234 return res != (uintptr_t) MAP_FAILED; 2235} 2236 2237bool os::commit_memory(char* addr, size_t size, size_t alignment_hint) { 2238 return commit_memory(addr, size); 2239} 2240 2241void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { } 2242 2243void os::free_memory(char *addr, size_t bytes) { 2244 uncommit_memory(addr, bytes); 2245} 2246 2247void os::numa_make_global(char *addr, size_t bytes) { } 2248 2249void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) { 2250 Linux::numa_tonode_memory(addr, bytes, lgrp_hint); 2251} 2252 2253bool os::numa_topology_changed() { return false; } 2254 2255size_t os::numa_get_groups_num() { 2256 int max_node = Linux::numa_max_node(); 2257 return max_node > 0 ? max_node + 1 : 1; 2258} 2259 2260int os::numa_get_group_id() { 2261 int cpu_id = Linux::sched_getcpu(); 2262 if (cpu_id != -1) { 2263 int lgrp_id = Linux::get_node_by_cpu(cpu_id); 2264 if (lgrp_id != -1) { 2265 return lgrp_id; 2266 } 2267 } 2268 return 0; 2269} 2270 2271size_t os::numa_get_leaf_groups(int *ids, size_t size) { 2272 for (size_t i = 0; i < size; i++) { 2273 ids[i] = i; 2274 } 2275 return size; 2276} 2277 2278bool os::get_page_info(char *start, page_info* info) { 2279 return false; 2280} 2281 2282char *os::scan_pages(char *start, char* end, page_info* page_expected, page_info* page_found) { 2283 return end; 2284} 2285 2286extern "C" void numa_warn(int number, char *where, ...) { } 2287extern "C" void numa_error(char *where) { } 2288 2289void os::Linux::libnuma_init() { 2290 // sched_getcpu() should be in libc. 2291 set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t, 2292 dlsym(RTLD_DEFAULT, "sched_getcpu"))); 2293 2294 if (sched_getcpu() != -1) { // Does it work? 2295 void *handle = dlopen("libnuma.so", RTLD_LAZY); 2296 if (handle != NULL) { 2297 set_numa_node_to_cpus(CAST_TO_FN_PTR(numa_node_to_cpus_func_t, 2298 dlsym(handle, "numa_node_to_cpus"))); 2299 set_numa_max_node(CAST_TO_FN_PTR(numa_max_node_func_t, 2300 dlsym(handle, "numa_max_node"))); 2301 set_numa_available(CAST_TO_FN_PTR(numa_available_func_t, 2302 dlsym(handle, "numa_available"))); 2303 set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t, 2304 dlsym(handle, "numa_tonode_memory"))); 2305 if (numa_available() != -1) { 2306 // Create a cpu -> node mapping 2307 _cpu_to_node = new (ResourceObj::C_HEAP) GrowableArray<int>(0, true); 2308 rebuild_cpu_to_node_map(); 2309 } 2310 } 2311 } 2312} 2313 2314// rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id. 2315// The table is later used in get_node_by_cpu(). 2316void os::Linux::rebuild_cpu_to_node_map() { 2317 int cpu_num = os::active_processor_count(); 2318 cpu_to_node()->clear(); 2319 cpu_to_node()->at_grow(cpu_num - 1); 2320 int node_num = numa_get_groups_num(); 2321 int cpu_map_size = (cpu_num + BitsPerLong - 1) / BitsPerLong; 2322 unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size); 2323 for (int i = 0; i < node_num; i++) { 2324 if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) { 2325 for (int j = 0; j < cpu_map_size; j++) { 2326 if (cpu_map[j] != 0) { 2327 for (int k = 0; k < BitsPerLong; k++) { 2328 if (cpu_map[j] & (1UL << k)) { 2329 cpu_to_node()->at_put(j * BitsPerLong + k, i); 2330 } 2331 } 2332 } 2333 } 2334 } 2335 } 2336 FREE_C_HEAP_ARRAY(unsigned long, cpu_map); 2337} 2338 2339int os::Linux::get_node_by_cpu(int cpu_id) { 2340 if (cpu_to_node() != NULL && cpu_id >= 0 && cpu_id < cpu_to_node()->length()) { 2341 return cpu_to_node()->at(cpu_id); 2342 } 2343 return -1; 2344} 2345 2346GrowableArray<int>* os::Linux::_cpu_to_node; 2347os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu; 2348os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus; 2349os::Linux::numa_max_node_func_t os::Linux::_numa_max_node; 2350os::Linux::numa_available_func_t os::Linux::_numa_available; 2351os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory; 2352 2353 2354bool os::uncommit_memory(char* addr, size_t size) { 2355 return ::mmap(addr, size, 2356 PROT_READ|PROT_WRITE|PROT_EXEC, 2357 MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE|MAP_ANONYMOUS, -1, 0) 2358 != MAP_FAILED; 2359} 2360 2361static address _highest_vm_reserved_address = NULL; 2362 2363// If 'fixed' is true, anon_mmap() will attempt to reserve anonymous memory 2364// at 'requested_addr'. If there are existing memory mappings at the same 2365// location, however, they will be overwritten. If 'fixed' is false, 2366// 'requested_addr' is only treated as a hint, the return value may or 2367// may not start from the requested address. Unlike Linux mmap(), this 2368// function returns NULL to indicate failure. 2369static char* anon_mmap(char* requested_addr, size_t bytes, bool fixed) { 2370 char * addr; 2371 int flags; 2372 2373 flags = MAP_PRIVATE | MAP_NORESERVE | MAP_ANONYMOUS; 2374 if (fixed) { 2375 assert((uintptr_t)requested_addr % os::Linux::page_size() == 0, "unaligned address"); 2376 flags |= MAP_FIXED; 2377 } 2378 2379 addr = (char*)::mmap(requested_addr, bytes, PROT_READ|PROT_WRITE|PROT_EXEC, 2380 flags, -1, 0); 2381 2382 if (addr != MAP_FAILED) { 2383 // anon_mmap() should only get called during VM initialization, 2384 // don't need lock (actually we can skip locking even it can be called 2385 // from multiple threads, because _highest_vm_reserved_address is just a 2386 // hint about the upper limit of non-stack memory regions.) 2387 if ((address)addr + bytes > _highest_vm_reserved_address) { 2388 _highest_vm_reserved_address = (address)addr + bytes; 2389 } 2390 } 2391 2392 return addr == MAP_FAILED ? NULL : addr; 2393} 2394 2395// Don't update _highest_vm_reserved_address, because there might be memory 2396// regions above addr + size. If so, releasing a memory region only creates 2397// a hole in the address space, it doesn't help prevent heap-stack collision. 2398// 2399static int anon_munmap(char * addr, size_t size) { 2400 return ::munmap(addr, size) == 0; 2401} 2402 2403char* os::reserve_memory(size_t bytes, char* requested_addr, 2404 size_t alignment_hint) { 2405 return anon_mmap(requested_addr, bytes, (requested_addr != NULL)); 2406} 2407 2408bool os::release_memory(char* addr, size_t size) { 2409 return anon_munmap(addr, size); 2410} 2411 2412static address highest_vm_reserved_address() { 2413 return _highest_vm_reserved_address; 2414} 2415 2416static bool linux_mprotect(char* addr, size_t size, int prot) { 2417 // Linux wants the mprotect address argument to be page aligned. 2418 char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size()); 2419 2420 // According to SUSv3, mprotect() should only be used with mappings 2421 // established by mmap(), and mmap() always maps whole pages. Unaligned 2422 // 'addr' likely indicates problem in the VM (e.g. trying to change 2423 // protection of malloc'ed or statically allocated memory). Check the 2424 // caller if you hit this assert. 2425 assert(addr == bottom, "sanity check"); 2426 2427 size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size()); 2428 return ::mprotect(bottom, size, prot) == 0; 2429} 2430 2431bool os::protect_memory(char* addr, size_t size) { 2432 return linux_mprotect(addr, size, PROT_READ); 2433} 2434 2435bool os::guard_memory(char* addr, size_t size) { 2436 return linux_mprotect(addr, size, PROT_NONE); 2437} 2438 2439bool os::unguard_memory(char* addr, size_t size) { 2440 return linux_mprotect(addr, size, PROT_READ|PROT_WRITE|PROT_EXEC); 2441} 2442 2443// Large page support 2444 2445static size_t _large_page_size = 0; 2446 2447bool os::large_page_init() { 2448 if (!UseLargePages) return false; 2449 2450 if (LargePageSizeInBytes) { 2451 _large_page_size = LargePageSizeInBytes; 2452 } else { 2453 // large_page_size on Linux is used to round up heap size. x86 uses either 2454 // 2M or 4M page, depending on whether PAE (Physical Address Extensions) 2455 // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use 2456 // page as large as 256M. 2457 // 2458 // Here we try to figure out page size by parsing /proc/meminfo and looking 2459 // for a line with the following format: 2460 // Hugepagesize: 2048 kB 2461 // 2462 // If we can't determine the value (e.g. /proc is not mounted, or the text 2463 // format has been changed), we'll use the largest page size supported by 2464 // the processor. 2465 2466 _large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M); 2467 2468 FILE *fp = fopen("/proc/meminfo", "r"); 2469 if (fp) { 2470 while (!feof(fp)) { 2471 int x = 0; 2472 char buf[16]; 2473 if (fscanf(fp, "Hugepagesize: %d", &x) == 1) { 2474 if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) { 2475 _large_page_size = x * K; 2476 break; 2477 } 2478 } else { 2479 // skip to next line 2480 for (;;) { 2481 int ch = fgetc(fp); 2482 if (ch == EOF || ch == (int)'\n') break; 2483 } 2484 } 2485 } 2486 fclose(fp); 2487 } 2488 } 2489 2490 const size_t default_page_size = (size_t)Linux::page_size(); 2491 if (_large_page_size > default_page_size) { 2492 _page_sizes[0] = _large_page_size; 2493 _page_sizes[1] = default_page_size; 2494 _page_sizes[2] = 0; 2495 } 2496 2497 // Large page support is available on 2.6 or newer kernel, some vendors 2498 // (e.g. Redhat) have backported it to their 2.4 based distributions. 2499 // We optimistically assume the support is available. If later it turns out 2500 // not true, VM will automatically switch to use regular page size. 2501 return true; 2502} 2503 2504#ifndef SHM_HUGETLB 2505#define SHM_HUGETLB 04000 2506#endif 2507 2508char* os::reserve_memory_special(size_t bytes) { 2509 assert(UseLargePages, "only for large pages"); 2510 2511 key_t key = IPC_PRIVATE; 2512 char *addr; 2513 2514 bool warn_on_failure = UseLargePages && 2515 (!FLAG_IS_DEFAULT(UseLargePages) || 2516 !FLAG_IS_DEFAULT(LargePageSizeInBytes) 2517 ); 2518 char msg[128]; 2519 2520 // Create a large shared memory region to attach to based on size. 2521 // Currently, size is the total size of the heap 2522 int shmid = shmget(key, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W); 2523 if (shmid == -1) { 2524 // Possible reasons for shmget failure: 2525 // 1. shmmax is too small for Java heap. 2526 // > check shmmax value: cat /proc/sys/kernel/shmmax 2527 // > increase shmmax value: echo "0xffffffff" > /proc/sys/kernel/shmmax 2528 // 2. not enough large page memory. 2529 // > check available large pages: cat /proc/meminfo 2530 // > increase amount of large pages: 2531 // echo new_value > /proc/sys/vm/nr_hugepages 2532 // Note 1: different Linux may use different name for this property, 2533 // e.g. on Redhat AS-3 it is "hugetlb_pool". 2534 // Note 2: it's possible there's enough physical memory available but 2535 // they are so fragmented after a long run that they can't 2536 // coalesce into large pages. Try to reserve large pages when 2537 // the system is still "fresh". 2538 if (warn_on_failure) { 2539 jio_snprintf(msg, sizeof(msg), "Failed to reserve shared memory (errno = %d).", errno); 2540 warning(msg); 2541 } 2542 return NULL; 2543 } 2544 2545 // attach to the region 2546 addr = (char*)shmat(shmid, NULL, 0); 2547 int err = errno; 2548 2549 // Remove shmid. If shmat() is successful, the actual shared memory segment 2550 // will be deleted when it's detached by shmdt() or when the process 2551 // terminates. If shmat() is not successful this will remove the shared 2552 // segment immediately. 2553 shmctl(shmid, IPC_RMID, NULL); 2554 2555 if ((intptr_t)addr == -1) { 2556 if (warn_on_failure) { 2557 jio_snprintf(msg, sizeof(msg), "Failed to attach shared memory (errno = %d).", err); 2558 warning(msg); 2559 } 2560 return NULL; 2561 } 2562 2563 return addr; 2564} 2565 2566bool os::release_memory_special(char* base, size_t bytes) { 2567 // detaching the SHM segment will also delete it, see reserve_memory_special() 2568 int rslt = shmdt(base); 2569 return rslt == 0; 2570} 2571 2572size_t os::large_page_size() { 2573 return _large_page_size; 2574} 2575 2576// Linux does not support anonymous mmap with large page memory. The only way 2577// to reserve large page memory without file backing is through SysV shared 2578// memory API. The entire memory region is committed and pinned upfront. 2579// Hopefully this will change in the future... 2580bool os::can_commit_large_page_memory() { 2581 return false; 2582} 2583 2584bool os::can_execute_large_page_memory() { 2585 return false; 2586} 2587 2588// Reserve memory at an arbitrary address, only if that area is 2589// available (and not reserved for something else). 2590 2591char* os::attempt_reserve_memory_at(size_t bytes, char* requested_addr) { 2592 const int max_tries = 10; 2593 char* base[max_tries]; 2594 size_t size[max_tries]; 2595 const size_t gap = 0x000000; 2596 2597 // Assert only that the size is a multiple of the page size, since 2598 // that's all that mmap requires, and since that's all we really know 2599 // about at this low abstraction level. If we need higher alignment, 2600 // we can either pass an alignment to this method or verify alignment 2601 // in one of the methods further up the call chain. See bug 5044738. 2602 assert(bytes % os::vm_page_size() == 0, "reserving unexpected size block"); 2603 2604 // Repeatedly allocate blocks until the block is allocated at the 2605 // right spot. Give up after max_tries. Note that reserve_memory() will 2606 // automatically update _highest_vm_reserved_address if the call is 2607 // successful. The variable tracks the highest memory address every reserved 2608 // by JVM. It is used to detect heap-stack collision if running with 2609 // fixed-stack LinuxThreads. Because here we may attempt to reserve more 2610 // space than needed, it could confuse the collision detecting code. To 2611 // solve the problem, save current _highest_vm_reserved_address and 2612 // calculate the correct value before return. 2613 address old_highest = _highest_vm_reserved_address; 2614 2615 // Linux mmap allows caller to pass an address as hint; give it a try first, 2616 // if kernel honors the hint then we can return immediately. 2617 char * addr = anon_mmap(requested_addr, bytes, false); 2618 if (addr == requested_addr) { 2619 return requested_addr; 2620 } 2621 2622 if (addr != NULL) { 2623 // mmap() is successful but it fails to reserve at the requested address 2624 anon_munmap(addr, bytes); 2625 } 2626 2627 int i; 2628 for (i = 0; i < max_tries; ++i) { 2629 base[i] = reserve_memory(bytes); 2630 2631 if (base[i] != NULL) { 2632 // Is this the block we wanted? 2633 if (base[i] == requested_addr) { 2634 size[i] = bytes; 2635 break; 2636 } 2637 2638 // Does this overlap the block we wanted? Give back the overlapped 2639 // parts and try again. 2640 2641 size_t top_overlap = requested_addr + (bytes + gap) - base[i]; 2642 if (top_overlap >= 0 && top_overlap < bytes) { 2643 unmap_memory(base[i], top_overlap); 2644 base[i] += top_overlap; 2645 size[i] = bytes - top_overlap; 2646 } else { 2647 size_t bottom_overlap = base[i] + bytes - requested_addr; 2648 if (bottom_overlap >= 0 && bottom_overlap < bytes) { 2649 unmap_memory(requested_addr, bottom_overlap); 2650 size[i] = bytes - bottom_overlap; 2651 } else { 2652 size[i] = bytes; 2653 } 2654 } 2655 } 2656 } 2657 2658 // Give back the unused reserved pieces. 2659 2660 for (int j = 0; j < i; ++j) { 2661 if (base[j] != NULL) { 2662 unmap_memory(base[j], size[j]); 2663 } 2664 } 2665 2666 if (i < max_tries) { 2667 _highest_vm_reserved_address = MAX2(old_highest, (address)requested_addr + bytes); 2668 return requested_addr; 2669 } else { 2670 _highest_vm_reserved_address = old_highest; 2671 return NULL; 2672 } 2673} 2674 2675size_t os::read(int fd, void *buf, unsigned int nBytes) { 2676 return ::read(fd, buf, nBytes); 2677} 2678 2679// TODO-FIXME: reconcile Solaris' os::sleep with the linux variation. 2680// Solaris uses poll(), linux uses park(). 2681// Poll() is likely a better choice, assuming that Thread.interrupt() 2682// generates a SIGUSRx signal. Note that SIGUSR1 can interfere with 2683// SIGSEGV, see 4355769. 2684 2685const int NANOSECS_PER_MILLISECS = 1000000; 2686 2687int os::sleep(Thread* thread, jlong millis, bool interruptible) { 2688 assert(thread == Thread::current(), "thread consistency check"); 2689 2690 ParkEvent * const slp = thread->_SleepEvent ; 2691 slp->reset() ; 2692 OrderAccess::fence() ; 2693 2694 if (interruptible) { 2695 jlong prevtime = javaTimeNanos(); 2696 2697 for (;;) { 2698 if (os::is_interrupted(thread, true)) { 2699 return OS_INTRPT; 2700 } 2701 2702 jlong newtime = javaTimeNanos(); 2703 2704 if (newtime - prevtime < 0) { 2705 // time moving backwards, should only happen if no monotonic clock 2706 // not a guarantee() because JVM should not abort on kernel/glibc bugs 2707 assert(!Linux::supports_monotonic_clock(), "time moving backwards"); 2708 } else { 2709 millis -= (newtime - prevtime) / NANOSECS_PER_MILLISECS; 2710 } 2711 2712 if(millis <= 0) { 2713 return OS_OK; 2714 } 2715 2716 prevtime = newtime; 2717 2718 { 2719 assert(thread->is_Java_thread(), "sanity check"); 2720 JavaThread *jt = (JavaThread *) thread; 2721 ThreadBlockInVM tbivm(jt); 2722 OSThreadWaitState osts(jt->osthread(), false /* not Object.wait() */); 2723 2724 jt->set_suspend_equivalent(); 2725 // cleared by handle_special_suspend_equivalent_condition() or 2726 // java_suspend_self() via check_and_wait_while_suspended() 2727 2728 slp->park(millis); 2729 2730 // were we externally suspended while we were waiting? 2731 jt->check_and_wait_while_suspended(); 2732 } 2733 } 2734 } else { 2735 OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */); 2736 jlong prevtime = javaTimeNanos(); 2737 2738 for (;;) { 2739 // It'd be nice to avoid the back-to-back javaTimeNanos() calls on 2740 // the 1st iteration ... 2741 jlong newtime = javaTimeNanos(); 2742 2743 if (newtime - prevtime < 0) { 2744 // time moving backwards, should only happen if no monotonic clock 2745 // not a guarantee() because JVM should not abort on kernel/glibc bugs 2746 assert(!Linux::supports_monotonic_clock(), "time moving backwards"); 2747 } else { 2748 millis -= (newtime - prevtime) / NANOSECS_PER_MILLISECS; 2749 } 2750 2751 if(millis <= 0) break ; 2752 2753 prevtime = newtime; 2754 slp->park(millis); 2755 } 2756 return OS_OK ; 2757 } 2758} 2759 2760int os::naked_sleep() { 2761 // %% make the sleep time an integer flag. for now use 1 millisec. 2762 return os::sleep(Thread::current(), 1, false); 2763} 2764 2765// Sleep forever; naked call to OS-specific sleep; use with CAUTION 2766void os::infinite_sleep() { 2767 while (true) { // sleep forever ... 2768 ::sleep(100); // ... 100 seconds at a time 2769 } 2770} 2771 2772// Used to convert frequent JVM_Yield() to nops 2773bool os::dont_yield() { 2774 return DontYieldALot; 2775} 2776 2777void os::yield() { 2778 sched_yield(); 2779} 2780 2781os::YieldResult os::NakedYield() { sched_yield(); return os::YIELD_UNKNOWN ;} 2782 2783void os::yield_all(int attempts) { 2784 // Yields to all threads, including threads with lower priorities 2785 // Threads on Linux are all with same priority. The Solaris style 2786 // os::yield_all() with nanosleep(1ms) is not necessary. 2787 sched_yield(); 2788} 2789 2790// Called from the tight loops to possibly influence time-sharing heuristics 2791void os::loop_breaker(int attempts) { 2792 os::yield_all(attempts); 2793} 2794 2795//////////////////////////////////////////////////////////////////////////////// 2796// thread priority support 2797 2798// Note: Normal Linux applications are run with SCHED_OTHER policy. SCHED_OTHER 2799// only supports dynamic priority, static priority must be zero. For real-time 2800// applications, Linux supports SCHED_RR which allows static priority (1-99). 2801// However, for large multi-threaded applications, SCHED_RR is not only slower 2802// than SCHED_OTHER, but also very unstable (my volano tests hang hard 4 out 2803// of 5 runs - Sep 2005). 2804// 2805// The following code actually changes the niceness of kernel-thread/LWP. It 2806// has an assumption that setpriority() only modifies one kernel-thread/LWP, 2807// not the entire user process, and user level threads are 1:1 mapped to kernel 2808// threads. It has always been the case, but could change in the future. For 2809// this reason, the code should not be used as default (ThreadPriorityPolicy=0). 2810// It is only used when ThreadPriorityPolicy=1 and requires root privilege. 2811 2812int os::java_to_os_priority[MaxPriority + 1] = { 2813 19, // 0 Entry should never be used 2814 2815 4, // 1 MinPriority 2816 3, // 2 2817 2, // 3 2818 2819 1, // 4 2820 0, // 5 NormPriority 2821 -1, // 6 2822 2823 -2, // 7 2824 -3, // 8 2825 -4, // 9 NearMaxPriority 2826 2827 -5 // 10 MaxPriority 2828}; 2829 2830static int prio_init() { 2831 if (ThreadPriorityPolicy == 1) { 2832 // Only root can raise thread priority. Don't allow ThreadPriorityPolicy=1 2833 // if effective uid is not root. Perhaps, a more elegant way of doing 2834 // this is to test CAP_SYS_NICE capability, but that will require libcap.so 2835 if (geteuid() != 0) { 2836 if (!FLAG_IS_DEFAULT(ThreadPriorityPolicy)) { 2837 warning("-XX:ThreadPriorityPolicy requires root privilege on Linux"); 2838 } 2839 ThreadPriorityPolicy = 0; 2840 } 2841 } 2842 return 0; 2843} 2844 2845OSReturn os::set_native_priority(Thread* thread, int newpri) { 2846 if ( !UseThreadPriorities || ThreadPriorityPolicy == 0 ) return OS_OK; 2847 2848 int ret = setpriority(PRIO_PROCESS, thread->osthread()->thread_id(), newpri); 2849 return (ret == 0) ? OS_OK : OS_ERR; 2850} 2851 2852OSReturn os::get_native_priority(const Thread* const thread, int *priority_ptr) { 2853 if ( !UseThreadPriorities || ThreadPriorityPolicy == 0 ) { 2854 *priority_ptr = java_to_os_priority[NormPriority]; 2855 return OS_OK; 2856 } 2857 2858 errno = 0; 2859 *priority_ptr = getpriority(PRIO_PROCESS, thread->osthread()->thread_id()); 2860 return (*priority_ptr != -1 || errno == 0 ? OS_OK : OS_ERR); 2861} 2862 2863// Hint to the underlying OS that a task switch would not be good. 2864// Void return because it's a hint and can fail. 2865void os::hint_no_preempt() {} 2866 2867//////////////////////////////////////////////////////////////////////////////// 2868// suspend/resume support 2869 2870// the low-level signal-based suspend/resume support is a remnant from the 2871// old VM-suspension that used to be for java-suspension, safepoints etc, 2872// within hotspot. Now there is a single use-case for this: 2873// - calling get_thread_pc() on the VMThread by the flat-profiler task 2874// that runs in the watcher thread. 2875// The remaining code is greatly simplified from the more general suspension 2876// code that used to be used. 2877// 2878// The protocol is quite simple: 2879// - suspend: 2880// - sends a signal to the target thread 2881// - polls the suspend state of the osthread using a yield loop 2882// - target thread signal handler (SR_handler) sets suspend state 2883// and blocks in sigsuspend until continued 2884// - resume: 2885// - sets target osthread state to continue 2886// - sends signal to end the sigsuspend loop in the SR_handler 2887// 2888// Note that the SR_lock plays no role in this suspend/resume protocol. 2889// 2890 2891static void resume_clear_context(OSThread *osthread) { 2892 osthread->set_ucontext(NULL); 2893 osthread->set_siginfo(NULL); 2894 2895 // notify the suspend action is completed, we have now resumed 2896 osthread->sr.clear_suspended(); 2897} 2898 2899static void suspend_save_context(OSThread *osthread, siginfo_t* siginfo, ucontext_t* context) { 2900 osthread->set_ucontext(context); 2901 osthread->set_siginfo(siginfo); 2902} 2903 2904// 2905// Handler function invoked when a thread's execution is suspended or 2906// resumed. We have to be careful that only async-safe functions are 2907// called here (Note: most pthread functions are not async safe and 2908// should be avoided.) 2909// 2910// Note: sigwait() is a more natural fit than sigsuspend() from an 2911// interface point of view, but sigwait() prevents the signal hander 2912// from being run. libpthread would get very confused by not having 2913// its signal handlers run and prevents sigwait()'s use with the 2914// mutex granting granting signal. 2915// 2916// Currently only ever called on the VMThread 2917// 2918static void SR_handler(int sig, siginfo_t* siginfo, ucontext_t* context) { 2919 // Save and restore errno to avoid confusing native code with EINTR 2920 // after sigsuspend. 2921 int old_errno = errno; 2922 2923 Thread* thread = Thread::current(); 2924 OSThread* osthread = thread->osthread(); 2925 assert(thread->is_VM_thread(), "Must be VMThread"); 2926 // read current suspend action 2927 int action = osthread->sr.suspend_action(); 2928 if (action == SR_SUSPEND) { 2929 suspend_save_context(osthread, siginfo, context); 2930 2931 // Notify the suspend action is about to be completed. do_suspend() 2932 // waits until SR_SUSPENDED is set and then returns. We will wait 2933 // here for a resume signal and that completes the suspend-other 2934 // action. do_suspend/do_resume is always called as a pair from 2935 // the same thread - so there are no races 2936 2937 // notify the caller 2938 osthread->sr.set_suspended(); 2939 2940 sigset_t suspend_set; // signals for sigsuspend() 2941 2942 // get current set of blocked signals and unblock resume signal 2943 pthread_sigmask(SIG_BLOCK, NULL, &suspend_set); 2944 sigdelset(&suspend_set, SR_signum); 2945 2946 // wait here until we are resumed 2947 do { 2948 sigsuspend(&suspend_set); 2949 // ignore all returns until we get a resume signal 2950 } while (osthread->sr.suspend_action() != SR_CONTINUE); 2951 2952 resume_clear_context(osthread); 2953 2954 } else { 2955 assert(action == SR_CONTINUE, "unexpected sr action"); 2956 // nothing special to do - just leave the handler 2957 } 2958 2959 errno = old_errno; 2960} 2961 2962 2963static int SR_initialize() { 2964 struct sigaction act; 2965 char *s; 2966 /* Get signal number to use for suspend/resume */ 2967 if ((s = ::getenv("_JAVA_SR_SIGNUM")) != 0) { 2968 int sig = ::strtol(s, 0, 10); 2969 if (sig > 0 || sig < _NSIG) { 2970 SR_signum = sig; 2971 } 2972 } 2973 2974 assert(SR_signum > SIGSEGV && SR_signum > SIGBUS, 2975 "SR_signum must be greater than max(SIGSEGV, SIGBUS), see 4355769"); 2976 2977 sigemptyset(&SR_sigset); 2978 sigaddset(&SR_sigset, SR_signum); 2979 2980 /* Set up signal handler for suspend/resume */ 2981 act.sa_flags = SA_RESTART|SA_SIGINFO; 2982 act.sa_handler = (void (*)(int)) SR_handler; 2983 2984 // SR_signum is blocked by default. 2985 // 4528190 - We also need to block pthread restart signal (32 on all 2986 // supported Linux platforms). Note that LinuxThreads need to block 2987 // this signal for all threads to work properly. So we don't have 2988 // to use hard-coded signal number when setting up the mask. 2989 pthread_sigmask(SIG_BLOCK, NULL, &act.sa_mask); 2990 2991 if (sigaction(SR_signum, &act, 0) == -1) { 2992 return -1; 2993 } 2994 2995 // Save signal flag 2996 os::Linux::set_our_sigflags(SR_signum, act.sa_flags); 2997 return 0; 2998} 2999 3000static int SR_finalize() { 3001 return 0; 3002} 3003 3004 3005// returns true on success and false on error - really an error is fatal 3006// but this seems the normal response to library errors 3007static bool do_suspend(OSThread* osthread) { 3008 // mark as suspended and send signal 3009 osthread->sr.set_suspend_action(SR_SUSPEND); 3010 int status = pthread_kill(osthread->pthread_id(), SR_signum); 3011 assert_status(status == 0, status, "pthread_kill"); 3012 3013 // check status and wait until notified of suspension 3014 if (status == 0) { 3015 for (int i = 0; !osthread->sr.is_suspended(); i++) { 3016 os::yield_all(i); 3017 } 3018 osthread->sr.set_suspend_action(SR_NONE); 3019 return true; 3020 } 3021 else { 3022 osthread->sr.set_suspend_action(SR_NONE); 3023 return false; 3024 } 3025} 3026 3027static void do_resume(OSThread* osthread) { 3028 assert(osthread->sr.is_suspended(), "thread should be suspended"); 3029 osthread->sr.set_suspend_action(SR_CONTINUE); 3030 3031 int status = pthread_kill(osthread->pthread_id(), SR_signum); 3032 assert_status(status == 0, status, "pthread_kill"); 3033 // check status and wait unit notified of resumption 3034 if (status == 0) { 3035 for (int i = 0; osthread->sr.is_suspended(); i++) { 3036 os::yield_all(i); 3037 } 3038 } 3039 osthread->sr.set_suspend_action(SR_NONE); 3040} 3041 3042//////////////////////////////////////////////////////////////////////////////// 3043// interrupt support 3044 3045void os::interrupt(Thread* thread) { 3046 assert(Thread::current() == thread || Threads_lock->owned_by_self(), 3047 "possibility of dangling Thread pointer"); 3048 3049 OSThread* osthread = thread->osthread(); 3050 3051 if (!osthread->interrupted()) { 3052 osthread->set_interrupted(true); 3053 // More than one thread can get here with the same value of osthread, 3054 // resulting in multiple notifications. We do, however, want the store 3055 // to interrupted() to be visible to other threads before we execute unpark(). 3056 OrderAccess::fence(); 3057 ParkEvent * const slp = thread->_SleepEvent ; 3058 if (slp != NULL) slp->unpark() ; 3059 } 3060 3061 // For JSR166. Unpark even if interrupt status already was set 3062 if (thread->is_Java_thread()) 3063 ((JavaThread*)thread)->parker()->unpark(); 3064 3065 ParkEvent * ev = thread->_ParkEvent ; 3066 if (ev != NULL) ev->unpark() ; 3067 3068} 3069 3070bool os::is_interrupted(Thread* thread, bool clear_interrupted) { 3071 assert(Thread::current() == thread || Threads_lock->owned_by_self(), 3072 "possibility of dangling Thread pointer"); 3073 3074 OSThread* osthread = thread->osthread(); 3075 3076 bool interrupted = osthread->interrupted(); 3077 3078 if (interrupted && clear_interrupted) { 3079 osthread->set_interrupted(false); 3080 // consider thread->_SleepEvent->reset() ... optional optimization 3081 } 3082 3083 return interrupted; 3084} 3085 3086/////////////////////////////////////////////////////////////////////////////////// 3087// signal handling (except suspend/resume) 3088 3089// This routine may be used by user applications as a "hook" to catch signals. 3090// The user-defined signal handler must pass unrecognized signals to this 3091// routine, and if it returns true (non-zero), then the signal handler must 3092// return immediately. If the flag "abort_if_unrecognized" is true, then this 3093// routine will never retun false (zero), but instead will execute a VM panic 3094// routine kill the process. 3095// 3096// If this routine returns false, it is OK to call it again. This allows 3097// the user-defined signal handler to perform checks either before or after 3098// the VM performs its own checks. Naturally, the user code would be making 3099// a serious error if it tried to handle an exception (such as a null check 3100// or breakpoint) that the VM was generating for its own correct operation. 3101// 3102// This routine may recognize any of the following kinds of signals: 3103// SIGBUS, SIGSEGV, SIGILL, SIGFPE, SIGQUIT, SIGPIPE, SIGXFSZ, SIGUSR1. 3104// It should be consulted by handlers for any of those signals. 3105// 3106// The caller of this routine must pass in the three arguments supplied 3107// to the function referred to in the "sa_sigaction" (not the "sa_handler") 3108// field of the structure passed to sigaction(). This routine assumes that 3109// the sa_flags field passed to sigaction() includes SA_SIGINFO and SA_RESTART. 3110// 3111// Note that the VM will print warnings if it detects conflicting signal 3112// handlers, unless invoked with the option "-XX:+AllowUserSignalHandlers". 3113// 3114extern "C" int 3115JVM_handle_linux_signal(int signo, siginfo_t* siginfo, 3116 void* ucontext, int abort_if_unrecognized); 3117 3118void signalHandler(int sig, siginfo_t* info, void* uc) { 3119 assert(info != NULL && uc != NULL, "it must be old kernel"); 3120 JVM_handle_linux_signal(sig, info, uc, true); 3121} 3122 3123 3124// This boolean allows users to forward their own non-matching signals 3125// to JVM_handle_linux_signal, harmlessly. 3126bool os::Linux::signal_handlers_are_installed = false; 3127 3128// For signal-chaining 3129struct sigaction os::Linux::sigact[MAXSIGNUM]; 3130unsigned int os::Linux::sigs = 0; 3131bool os::Linux::libjsig_is_loaded = false; 3132typedef struct sigaction *(*get_signal_t)(int); 3133get_signal_t os::Linux::get_signal_action = NULL; 3134 3135struct sigaction* os::Linux::get_chained_signal_action(int sig) { 3136 struct sigaction *actp = NULL; 3137 3138 if (libjsig_is_loaded) { 3139 // Retrieve the old signal handler from libjsig 3140 actp = (*get_signal_action)(sig); 3141 } 3142 if (actp == NULL) { 3143 // Retrieve the preinstalled signal handler from jvm 3144 actp = get_preinstalled_handler(sig); 3145 } 3146 3147 return actp; 3148} 3149 3150static bool call_chained_handler(struct sigaction *actp, int sig, 3151 siginfo_t *siginfo, void *context) { 3152 // Call the old signal handler 3153 if (actp->sa_handler == SIG_DFL) { 3154 // It's more reasonable to let jvm treat it as an unexpected exception 3155 // instead of taking the default action. 3156 return false; 3157 } else if (actp->sa_handler != SIG_IGN) { 3158 if ((actp->sa_flags & SA_NODEFER) == 0) { 3159 // automaticlly block the signal 3160 sigaddset(&(actp->sa_mask), sig); 3161 } 3162 3163 sa_handler_t hand; 3164 sa_sigaction_t sa; 3165 bool siginfo_flag_set = (actp->sa_flags & SA_SIGINFO) != 0; 3166 // retrieve the chained handler 3167 if (siginfo_flag_set) { 3168 sa = actp->sa_sigaction; 3169 } else { 3170 hand = actp->sa_handler; 3171 } 3172 3173 if ((actp->sa_flags & SA_RESETHAND) != 0) { 3174 actp->sa_handler = SIG_DFL; 3175 } 3176 3177 // try to honor the signal mask 3178 sigset_t oset; 3179 pthread_sigmask(SIG_SETMASK, &(actp->sa_mask), &oset); 3180 3181 // call into the chained handler 3182 if (siginfo_flag_set) { 3183 (*sa)(sig, siginfo, context); 3184 } else { 3185 (*hand)(sig); 3186 } 3187 3188 // restore the signal mask 3189 pthread_sigmask(SIG_SETMASK, &oset, 0); 3190 } 3191 // Tell jvm's signal handler the signal is taken care of. 3192 return true; 3193} 3194 3195bool os::Linux::chained_handler(int sig, siginfo_t* siginfo, void* context) { 3196 bool chained = false; 3197 // signal-chaining 3198 if (UseSignalChaining) { 3199 struct sigaction *actp = get_chained_signal_action(sig); 3200 if (actp != NULL) { 3201 chained = call_chained_handler(actp, sig, siginfo, context); 3202 } 3203 } 3204 return chained; 3205} 3206 3207struct sigaction* os::Linux::get_preinstalled_handler(int sig) { 3208 if ((( (unsigned int)1 << sig ) & sigs) != 0) { 3209 return &sigact[sig]; 3210 } 3211 return NULL; 3212} 3213 3214void os::Linux::save_preinstalled_handler(int sig, struct sigaction& oldAct) { 3215 assert(sig > 0 && sig < MAXSIGNUM, "vm signal out of expected range"); 3216 sigact[sig] = oldAct; 3217 sigs |= (unsigned int)1 << sig; 3218} 3219 3220// for diagnostic 3221int os::Linux::sigflags[MAXSIGNUM]; 3222 3223int os::Linux::get_our_sigflags(int sig) { 3224 assert(sig > 0 && sig < MAXSIGNUM, "vm signal out of expected range"); 3225 return sigflags[sig]; 3226} 3227 3228void os::Linux::set_our_sigflags(int sig, int flags) { 3229 assert(sig > 0 && sig < MAXSIGNUM, "vm signal out of expected range"); 3230 sigflags[sig] = flags; 3231} 3232 3233void os::Linux::set_signal_handler(int sig, bool set_installed) { 3234 // Check for overwrite. 3235 struct sigaction oldAct; 3236 sigaction(sig, (struct sigaction*)NULL, &oldAct); 3237 3238 void* oldhand = oldAct.sa_sigaction 3239 ? CAST_FROM_FN_PTR(void*, oldAct.sa_sigaction) 3240 : CAST_FROM_FN_PTR(void*, oldAct.sa_handler); 3241 if (oldhand != CAST_FROM_FN_PTR(void*, SIG_DFL) && 3242 oldhand != CAST_FROM_FN_PTR(void*, SIG_IGN) && 3243 oldhand != CAST_FROM_FN_PTR(void*, (sa_sigaction_t)signalHandler)) { 3244 if (AllowUserSignalHandlers || !set_installed) { 3245 // Do not overwrite; user takes responsibility to forward to us. 3246 return; 3247 } else if (UseSignalChaining) { 3248 // save the old handler in jvm 3249 save_preinstalled_handler(sig, oldAct); 3250 // libjsig also interposes the sigaction() call below and saves the 3251 // old sigaction on it own. 3252 } else { 3253 fatal2("Encountered unexpected pre-existing sigaction handler %#lx for signal %d.", (long)oldhand, sig); 3254 } 3255 } 3256 3257 struct sigaction sigAct; 3258 sigfillset(&(sigAct.sa_mask)); 3259 sigAct.sa_handler = SIG_DFL; 3260 if (!set_installed) { 3261 sigAct.sa_flags = SA_SIGINFO|SA_RESTART; 3262 } else { 3263 sigAct.sa_sigaction = signalHandler; 3264 sigAct.sa_flags = SA_SIGINFO|SA_RESTART; 3265 } 3266 // Save flags, which are set by ours 3267 assert(sig > 0 && sig < MAXSIGNUM, "vm signal out of expected range"); 3268 sigflags[sig] = sigAct.sa_flags; 3269 3270 int ret = sigaction(sig, &sigAct, &oldAct); 3271 assert(ret == 0, "check"); 3272 3273 void* oldhand2 = oldAct.sa_sigaction 3274 ? CAST_FROM_FN_PTR(void*, oldAct.sa_sigaction) 3275 : CAST_FROM_FN_PTR(void*, oldAct.sa_handler); 3276 assert(oldhand2 == oldhand, "no concurrent signal handler installation"); 3277} 3278 3279// install signal handlers for signals that HotSpot needs to 3280// handle in order to support Java-level exception handling. 3281 3282void os::Linux::install_signal_handlers() { 3283 if (!signal_handlers_are_installed) { 3284 signal_handlers_are_installed = true; 3285 3286 // signal-chaining 3287 typedef void (*signal_setting_t)(); 3288 signal_setting_t begin_signal_setting = NULL; 3289 signal_setting_t end_signal_setting = NULL; 3290 begin_signal_setting = CAST_TO_FN_PTR(signal_setting_t, 3291 dlsym(RTLD_DEFAULT, "JVM_begin_signal_setting")); 3292 if (begin_signal_setting != NULL) { 3293 end_signal_setting = CAST_TO_FN_PTR(signal_setting_t, 3294 dlsym(RTLD_DEFAULT, "JVM_end_signal_setting")); 3295 get_signal_action = CAST_TO_FN_PTR(get_signal_t, 3296 dlsym(RTLD_DEFAULT, "JVM_get_signal_action")); 3297 libjsig_is_loaded = true; 3298 assert(UseSignalChaining, "should enable signal-chaining"); 3299 } 3300 if (libjsig_is_loaded) { 3301 // Tell libjsig jvm is setting signal handlers 3302 (*begin_signal_setting)(); 3303 } 3304 3305 set_signal_handler(SIGSEGV, true); 3306 set_signal_handler(SIGPIPE, true); 3307 set_signal_handler(SIGBUS, true); 3308 set_signal_handler(SIGILL, true); 3309 set_signal_handler(SIGFPE, true); 3310 set_signal_handler(SIGXFSZ, true); 3311 3312 if (libjsig_is_loaded) { 3313 // Tell libjsig jvm finishes setting signal handlers 3314 (*end_signal_setting)(); 3315 } 3316 3317 // We don't activate signal checker if libjsig is in place, we trust ourselves 3318 // and if UserSignalHandler is installed all bets are off 3319 if (CheckJNICalls) { 3320 if (libjsig_is_loaded) { 3321 tty->print_cr("Info: libjsig is activated, all active signal checking is disabled"); 3322 check_signals = false; 3323 } 3324 if (AllowUserSignalHandlers) { 3325 tty->print_cr("Info: AllowUserSignalHandlers is activated, all active signal checking is disabled"); 3326 check_signals = false; 3327 } 3328 } 3329 } 3330} 3331 3332// This is the fastest way to get thread cpu time on Linux. 3333// Returns cpu time (user+sys) for any thread, not only for current. 3334// POSIX compliant clocks are implemented in the kernels 2.6.16+. 3335// It might work on 2.6.10+ with a special kernel/glibc patch. 3336// For reference, please, see IEEE Std 1003.1-2004: 3337// http://www.unix.org/single_unix_specification 3338 3339jlong os::Linux::fast_thread_cpu_time(clockid_t clockid) { 3340 struct timespec tp; 3341 int rc = os::Linux::clock_gettime(clockid, &tp); 3342 assert(rc == 0, "clock_gettime is expected to return 0 code"); 3343 3344 return (tp.tv_sec * SEC_IN_NANOSECS) + tp.tv_nsec; 3345} 3346 3347///// 3348// glibc on Linux platform uses non-documented flag 3349// to indicate, that some special sort of signal 3350// trampoline is used. 3351// We will never set this flag, and we should 3352// ignore this flag in our diagnostic 3353#ifdef SIGNIFICANT_SIGNAL_MASK 3354#undef SIGNIFICANT_SIGNAL_MASK 3355#endif 3356#define SIGNIFICANT_SIGNAL_MASK (~0x04000000) 3357 3358static const char* get_signal_handler_name(address handler, 3359 char* buf, int buflen) { 3360 int offset; 3361 bool found = os::dll_address_to_library_name(handler, buf, buflen, &offset); 3362 if (found) { 3363 // skip directory names 3364 const char *p1, *p2; 3365 p1 = buf; 3366 size_t len = strlen(os::file_separator()); 3367 while ((p2 = strstr(p1, os::file_separator())) != NULL) p1 = p2 + len; 3368 jio_snprintf(buf, buflen, "%s+0x%x", p1, offset); 3369 } else { 3370 jio_snprintf(buf, buflen, PTR_FORMAT, handler); 3371 } 3372 return buf; 3373} 3374 3375static void print_signal_handler(outputStream* st, int sig, 3376 char* buf, size_t buflen) { 3377 struct sigaction sa; 3378 3379 sigaction(sig, NULL, &sa); 3380 3381 // See comment for SIGNIFICANT_SIGNAL_MASK define 3382 sa.sa_flags &= SIGNIFICANT_SIGNAL_MASK; 3383 3384 st->print("%s: ", os::exception_name(sig, buf, buflen)); 3385 3386 address handler = (sa.sa_flags & SA_SIGINFO) 3387 ? CAST_FROM_FN_PTR(address, sa.sa_sigaction) 3388 : CAST_FROM_FN_PTR(address, sa.sa_handler); 3389 3390 if (handler == CAST_FROM_FN_PTR(address, SIG_DFL)) { 3391 st->print("SIG_DFL"); 3392 } else if (handler == CAST_FROM_FN_PTR(address, SIG_IGN)) { 3393 st->print("SIG_IGN"); 3394 } else { 3395 st->print("[%s]", get_signal_handler_name(handler, buf, buflen)); 3396 } 3397 3398 st->print(", sa_mask[0]=" PTR32_FORMAT, *(uint32_t*)&sa.sa_mask); 3399 3400 address rh = VMError::get_resetted_sighandler(sig); 3401 // May be, handler was resetted by VMError? 3402 if(rh != NULL) { 3403 handler = rh; 3404 sa.sa_flags = VMError::get_resetted_sigflags(sig) & SIGNIFICANT_SIGNAL_MASK; 3405 } 3406 3407 st->print(", sa_flags=" PTR32_FORMAT, sa.sa_flags); 3408 3409 // Check: is it our handler? 3410 if(handler == CAST_FROM_FN_PTR(address, (sa_sigaction_t)signalHandler) || 3411 handler == CAST_FROM_FN_PTR(address, (sa_sigaction_t)SR_handler)) { 3412 // It is our signal handler 3413 // check for flags, reset system-used one! 3414 if((int)sa.sa_flags != os::Linux::get_our_sigflags(sig)) { 3415 st->print( 3416 ", flags was changed from " PTR32_FORMAT ", consider using jsig library", 3417 os::Linux::get_our_sigflags(sig)); 3418 } 3419 } 3420 st->cr(); 3421} 3422 3423 3424#define DO_SIGNAL_CHECK(sig) \ 3425 if (!sigismember(&check_signal_done, sig)) \ 3426 os::Linux::check_signal_handler(sig) 3427 3428// This method is a periodic task to check for misbehaving JNI applications 3429// under CheckJNI, we can add any periodic checks here 3430 3431void os::run_periodic_checks() { 3432 3433 if (check_signals == false) return; 3434 3435 // SEGV and BUS if overridden could potentially prevent 3436 // generation of hs*.log in the event of a crash, debugging 3437 // such a case can be very challenging, so we absolutely 3438 // check the following for a good measure: 3439 DO_SIGNAL_CHECK(SIGSEGV); 3440 DO_SIGNAL_CHECK(SIGILL); 3441 DO_SIGNAL_CHECK(SIGFPE); 3442 DO_SIGNAL_CHECK(SIGBUS); 3443 DO_SIGNAL_CHECK(SIGPIPE); 3444 DO_SIGNAL_CHECK(SIGXFSZ); 3445 3446 3447 // ReduceSignalUsage allows the user to override these handlers 3448 // see comments at the very top and jvm_solaris.h 3449 if (!ReduceSignalUsage) { 3450 DO_SIGNAL_CHECK(SHUTDOWN1_SIGNAL); 3451 DO_SIGNAL_CHECK(SHUTDOWN2_SIGNAL); 3452 DO_SIGNAL_CHECK(SHUTDOWN3_SIGNAL); 3453 DO_SIGNAL_CHECK(BREAK_SIGNAL); 3454 } 3455 3456 DO_SIGNAL_CHECK(SR_signum); 3457 DO_SIGNAL_CHECK(INTERRUPT_SIGNAL); 3458} 3459 3460typedef int (*os_sigaction_t)(int, const struct sigaction *, struct sigaction *); 3461 3462static os_sigaction_t os_sigaction = NULL; 3463 3464void os::Linux::check_signal_handler(int sig) { 3465 char buf[O_BUFLEN]; 3466 address jvmHandler = NULL; 3467 3468 3469 struct sigaction act; 3470 if (os_sigaction == NULL) { 3471 // only trust the default sigaction, in case it has been interposed 3472 os_sigaction = (os_sigaction_t)dlsym(RTLD_DEFAULT, "sigaction"); 3473 if (os_sigaction == NULL) return; 3474 } 3475 3476 os_sigaction(sig, (struct sigaction*)NULL, &act); 3477 3478 3479 act.sa_flags &= SIGNIFICANT_SIGNAL_MASK; 3480 3481 address thisHandler = (act.sa_flags & SA_SIGINFO) 3482 ? CAST_FROM_FN_PTR(address, act.sa_sigaction) 3483 : CAST_FROM_FN_PTR(address, act.sa_handler) ; 3484 3485 3486 switch(sig) { 3487 case SIGSEGV: 3488 case SIGBUS: 3489 case SIGFPE: 3490 case SIGPIPE: 3491 case SIGILL: 3492 case SIGXFSZ: 3493 jvmHandler = CAST_FROM_FN_PTR(address, (sa_sigaction_t)signalHandler); 3494 break; 3495 3496 case SHUTDOWN1_SIGNAL: 3497 case SHUTDOWN2_SIGNAL: 3498 case SHUTDOWN3_SIGNAL: 3499 case BREAK_SIGNAL: 3500 jvmHandler = (address)user_handler(); 3501 break; 3502 3503 case INTERRUPT_SIGNAL: 3504 jvmHandler = CAST_FROM_FN_PTR(address, SIG_DFL); 3505 break; 3506 3507 default: 3508 if (sig == SR_signum) { 3509 jvmHandler = CAST_FROM_FN_PTR(address, (sa_sigaction_t)SR_handler); 3510 } else { 3511 return; 3512 } 3513 break; 3514 } 3515 3516 if (thisHandler != jvmHandler) { 3517 tty->print("Warning: %s handler ", exception_name(sig, buf, O_BUFLEN)); 3518 tty->print("expected:%s", get_signal_handler_name(jvmHandler, buf, O_BUFLEN)); 3519 tty->print_cr(" found:%s", get_signal_handler_name(thisHandler, buf, O_BUFLEN)); 3520 // No need to check this sig any longer 3521 sigaddset(&check_signal_done, sig); 3522 } else if(os::Linux::get_our_sigflags(sig) != 0 && (int)act.sa_flags != os::Linux::get_our_sigflags(sig)) { 3523 tty->print("Warning: %s handler flags ", exception_name(sig, buf, O_BUFLEN)); 3524 tty->print("expected:" PTR32_FORMAT, os::Linux::get_our_sigflags(sig)); 3525 tty->print_cr(" found:" PTR32_FORMAT, act.sa_flags); 3526 // No need to check this sig any longer 3527 sigaddset(&check_signal_done, sig); 3528 } 3529 3530 // Dump all the signal 3531 if (sigismember(&check_signal_done, sig)) { 3532 print_signal_handlers(tty, buf, O_BUFLEN); 3533 } 3534} 3535 3536extern void report_error(char* file_name, int line_no, char* title, char* format, ...); 3537 3538extern bool signal_name(int signo, char* buf, size_t len); 3539 3540const char* os::exception_name(int exception_code, char* buf, size_t size) { 3541 if (0 < exception_code && exception_code <= SIGRTMAX) { 3542 // signal 3543 if (!signal_name(exception_code, buf, size)) { 3544 jio_snprintf(buf, size, "SIG%d", exception_code); 3545 } 3546 return buf; 3547 } else { 3548 return NULL; 3549 } 3550} 3551 3552// this is called _before_ the most of global arguments have been parsed 3553void os::init(void) { 3554 char dummy; /* used to get a guess on initial stack address */ 3555// first_hrtime = gethrtime(); 3556 3557 // With LinuxThreads the JavaMain thread pid (primordial thread) 3558 // is different than the pid of the java launcher thread. 3559 // So, on Linux, the launcher thread pid is passed to the VM 3560 // via the sun.java.launcher.pid property. 3561 // Use this property instead of getpid() if it was correctly passed. 3562 // See bug 6351349. 3563 pid_t java_launcher_pid = (pid_t) Arguments::sun_java_launcher_pid(); 3564 3565 _initial_pid = (java_launcher_pid > 0) ? java_launcher_pid : getpid(); 3566 3567 clock_tics_per_sec = sysconf(_SC_CLK_TCK); 3568 3569 init_random(1234567); 3570 3571 ThreadCritical::initialize(); 3572 3573 Linux::set_page_size(sysconf(_SC_PAGESIZE)); 3574 if (Linux::page_size() == -1) { 3575 fatal1("os_linux.cpp: os::init: sysconf failed (%s)", strerror(errno)); 3576 } 3577 init_page_sizes((size_t) Linux::page_size()); 3578 3579 Linux::initialize_system_info(); 3580 3581 // main_thread points to the aboriginal thread 3582 Linux::_main_thread = pthread_self(); 3583 3584 Linux::clock_init(); 3585 initial_time_count = os::elapsed_counter(); 3586} 3587 3588// To install functions for atexit system call 3589extern "C" { 3590 static void perfMemory_exit_helper() { 3591 perfMemory_exit(); 3592 } 3593} 3594 3595// this is called _after_ the global arguments have been parsed 3596jint os::init_2(void) 3597{ 3598 Linux::fast_thread_clock_init(); 3599 3600 // Allocate a single page and mark it as readable for safepoint polling 3601 address polling_page = (address) ::mmap(NULL, Linux::page_size(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 3602 guarantee( polling_page != MAP_FAILED, "os::init_2: failed to allocate polling page" ); 3603 3604 os::set_polling_page( polling_page ); 3605 3606#ifndef PRODUCT 3607 if(Verbose && PrintMiscellaneous) 3608 tty->print("[SafePoint Polling address: " INTPTR_FORMAT "]\n", (intptr_t)polling_page); 3609#endif 3610 3611 if (!UseMembar) { 3612 address mem_serialize_page = (address) ::mmap(NULL, Linux::page_size(), PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 3613 guarantee( mem_serialize_page != NULL, "mmap Failed for memory serialize page"); 3614 os::set_memory_serialize_page( mem_serialize_page ); 3615 3616#ifndef PRODUCT 3617 if(Verbose && PrintMiscellaneous) 3618 tty->print("[Memory Serialize Page address: " INTPTR_FORMAT "]\n", (intptr_t)mem_serialize_page); 3619#endif 3620 } 3621 3622 FLAG_SET_DEFAULT(UseLargePages, os::large_page_init()); 3623 3624 // initialize suspend/resume support - must do this before signal_sets_init() 3625 if (SR_initialize() != 0) { 3626 perror("SR_initialize failed"); 3627 return JNI_ERR; 3628 } 3629 3630 Linux::signal_sets_init(); 3631 Linux::install_signal_handlers(); 3632 3633 size_t threadStackSizeInBytes = ThreadStackSize * K; 3634 if (threadStackSizeInBytes != 0 && 3635 threadStackSizeInBytes < Linux::min_stack_allowed) { 3636 tty->print_cr("\nThe stack size specified is too small, " 3637 "Specify at least %dk", 3638 Linux::min_stack_allowed / K); 3639 return JNI_ERR; 3640 } 3641 3642 // Make the stack size a multiple of the page size so that 3643 // the yellow/red zones can be guarded. 3644 JavaThread::set_stack_size_at_create(round_to(threadStackSizeInBytes, 3645 vm_page_size())); 3646 3647 Linux::capture_initial_stack(JavaThread::stack_size_at_create()); 3648 3649 Linux::libpthread_init(); 3650 if (PrintMiscellaneous && (Verbose || WizardMode)) { 3651 tty->print_cr("[HotSpot is running with %s, %s(%s)]\n", 3652 Linux::glibc_version(), Linux::libpthread_version(), 3653 Linux::is_floating_stack() ? "floating stack" : "fixed stack"); 3654 } 3655 3656 if (UseNUMA) { 3657 Linux::libnuma_init(); 3658 } 3659 3660 if (MaxFDLimit) { 3661 // set the number of file descriptors to max. print out error 3662 // if getrlimit/setrlimit fails but continue regardless. 3663 struct rlimit nbr_files; 3664 int status = getrlimit(RLIMIT_NOFILE, &nbr_files); 3665 if (status != 0) { 3666 if (PrintMiscellaneous && (Verbose || WizardMode)) 3667 perror("os::init_2 getrlimit failed"); 3668 } else { 3669 nbr_files.rlim_cur = nbr_files.rlim_max; 3670 status = setrlimit(RLIMIT_NOFILE, &nbr_files); 3671 if (status != 0) { 3672 if (PrintMiscellaneous && (Verbose || WizardMode)) 3673 perror("os::init_2 setrlimit failed"); 3674 } 3675 } 3676 } 3677 3678 // Initialize lock used to serialize thread creation (see os::create_thread) 3679 Linux::set_createThread_lock(new Mutex(Mutex::leaf, "createThread_lock", false)); 3680 3681 // Initialize HPI. 3682 jint hpi_result = hpi::initialize(); 3683 if (hpi_result != JNI_OK) { 3684 tty->print_cr("There was an error trying to initialize the HPI library."); 3685 return hpi_result; 3686 } 3687 3688 // at-exit methods are called in the reverse order of their registration. 3689 // atexit functions are called on return from main or as a result of a 3690 // call to exit(3C). There can be only 32 of these functions registered 3691 // and atexit() does not set errno. 3692 3693 if (PerfAllowAtExitRegistration) { 3694 // only register atexit functions if PerfAllowAtExitRegistration is set. 3695 // atexit functions can be delayed until process exit time, which 3696 // can be problematic for embedded VM situations. Embedded VMs should 3697 // call DestroyJavaVM() to assure that VM resources are released. 3698 3699 // note: perfMemory_exit_helper atexit function may be removed in 3700 // the future if the appropriate cleanup code can be added to the 3701 // VM_Exit VMOperation's doit method. 3702 if (atexit(perfMemory_exit_helper) != 0) { 3703 warning("os::init2 atexit(perfMemory_exit_helper) failed"); 3704 } 3705 } 3706 3707 // initialize thread priority policy 3708 prio_init(); 3709 3710 return JNI_OK; 3711} 3712 3713// Mark the polling page as unreadable 3714void os::make_polling_page_unreadable(void) { 3715 if( !guard_memory((char*)_polling_page, Linux::page_size()) ) 3716 fatal("Could not disable polling page"); 3717}; 3718 3719// Mark the polling page as readable 3720void os::make_polling_page_readable(void) { 3721 if( !protect_memory((char *)_polling_page, Linux::page_size()) ) 3722 fatal("Could not enable polling page"); 3723}; 3724 3725int os::active_processor_count() { 3726 // Linux doesn't yet have a (official) notion of processor sets, 3727 // so just return the number of online processors. 3728 int online_cpus = ::sysconf(_SC_NPROCESSORS_ONLN); 3729 assert(online_cpus > 0 && online_cpus <= processor_count(), "sanity check"); 3730 return online_cpus; 3731} 3732 3733bool os::distribute_processes(uint length, uint* distribution) { 3734 // Not yet implemented. 3735 return false; 3736} 3737 3738bool os::bind_to_processor(uint processor_id) { 3739 // Not yet implemented. 3740 return false; 3741} 3742 3743/// 3744 3745// Suspends the target using the signal mechanism and then grabs the PC before 3746// resuming the target. Used by the flat-profiler only 3747ExtendedPC os::get_thread_pc(Thread* thread) { 3748 // Make sure that it is called by the watcher for the VMThread 3749 assert(Thread::current()->is_Watcher_thread(), "Must be watcher"); 3750 assert(thread->is_VM_thread(), "Can only be called for VMThread"); 3751 3752 ExtendedPC epc; 3753 3754 OSThread* osthread = thread->osthread(); 3755 if (do_suspend(osthread)) { 3756 if (osthread->ucontext() != NULL) { 3757 epc = os::Linux::ucontext_get_pc(osthread->ucontext()); 3758 } else { 3759 // NULL context is unexpected, double-check this is the VMThread 3760 guarantee(thread->is_VM_thread(), "can only be called for VMThread"); 3761 } 3762 do_resume(osthread); 3763 } 3764 // failure means pthread_kill failed for some reason - arguably this is 3765 // a fatal problem, but such problems are ignored elsewhere 3766 3767 return epc; 3768} 3769 3770int os::Linux::safe_cond_timedwait(pthread_cond_t *_cond, pthread_mutex_t *_mutex, const struct timespec *_abstime) 3771{ 3772 if (is_NPTL()) { 3773 return pthread_cond_timedwait(_cond, _mutex, _abstime); 3774 } else { 3775#ifndef IA64 3776 // 6292965: LinuxThreads pthread_cond_timedwait() resets FPU control 3777 // word back to default 64bit precision if condvar is signaled. Java 3778 // wants 53bit precision. Save and restore current value. 3779 int fpu = get_fpu_control_word(); 3780#endif // IA64 3781 int status = pthread_cond_timedwait(_cond, _mutex, _abstime); 3782#ifndef IA64 3783 set_fpu_control_word(fpu); 3784#endif // IA64 3785 return status; 3786 } 3787} 3788 3789//////////////////////////////////////////////////////////////////////////////// 3790// debug support 3791 3792#ifndef PRODUCT 3793static address same_page(address x, address y) { 3794 int page_bits = -os::vm_page_size(); 3795 if ((intptr_t(x) & page_bits) == (intptr_t(y) & page_bits)) 3796 return x; 3797 else if (x > y) 3798 return (address)(intptr_t(y) | ~page_bits) + 1; 3799 else 3800 return (address)(intptr_t(y) & page_bits); 3801} 3802 3803bool os::find(address addr) { 3804 Dl_info dlinfo; 3805 memset(&dlinfo, 0, sizeof(dlinfo)); 3806 if (dladdr(addr, &dlinfo)) { 3807 tty->print(PTR_FORMAT ": ", addr); 3808 if (dlinfo.dli_sname != NULL) { 3809 tty->print("%s+%#x", dlinfo.dli_sname, 3810 addr - (intptr_t)dlinfo.dli_saddr); 3811 } else if (dlinfo.dli_fname) { 3812 tty->print("<offset %#x>", addr - (intptr_t)dlinfo.dli_fbase); 3813 } else { 3814 tty->print("<absolute address>"); 3815 } 3816 if (dlinfo.dli_fname) { 3817 tty->print(" in %s", dlinfo.dli_fname); 3818 } 3819 if (dlinfo.dli_fbase) { 3820 tty->print(" at " PTR_FORMAT, dlinfo.dli_fbase); 3821 } 3822 tty->cr(); 3823 3824 if (Verbose) { 3825 // decode some bytes around the PC 3826 address begin = same_page(addr-40, addr); 3827 address end = same_page(addr+40, addr); 3828 address lowest = (address) dlinfo.dli_sname; 3829 if (!lowest) lowest = (address) dlinfo.dli_fbase; 3830 if (begin < lowest) begin = lowest; 3831 Dl_info dlinfo2; 3832 if (dladdr(end, &dlinfo2) && dlinfo2.dli_saddr != dlinfo.dli_saddr 3833 && end > dlinfo2.dli_saddr && dlinfo2.dli_saddr > begin) 3834 end = (address) dlinfo2.dli_saddr; 3835 Disassembler::decode(begin, end); 3836 } 3837 return true; 3838 } 3839 return false; 3840} 3841 3842#endif 3843 3844//////////////////////////////////////////////////////////////////////////////// 3845// misc 3846 3847// This does not do anything on Linux. This is basically a hook for being 3848// able to use structured exception handling (thread-local exception filters) 3849// on, e.g., Win32. 3850void 3851os::os_exception_wrapper(java_call_t f, JavaValue* value, methodHandle* method, 3852 JavaCallArguments* args, Thread* thread) { 3853 f(value, method, args, thread); 3854} 3855 3856void os::print_statistics() { 3857} 3858 3859int os::message_box(const char* title, const char* message) { 3860 int i; 3861 fdStream err(defaultStream::error_fd()); 3862 for (i = 0; i < 78; i++) err.print_raw("="); 3863 err.cr(); 3864 err.print_raw_cr(title); 3865 for (i = 0; i < 78; i++) err.print_raw("-"); 3866 err.cr(); 3867 err.print_raw_cr(message); 3868 for (i = 0; i < 78; i++) err.print_raw("="); 3869 err.cr(); 3870 3871 char buf[16]; 3872 // Prevent process from exiting upon "read error" without consuming all CPU 3873 while (::read(0, buf, sizeof(buf)) <= 0) { ::sleep(100); } 3874 3875 return buf[0] == 'y' || buf[0] == 'Y'; 3876} 3877 3878int os::stat(const char *path, struct stat *sbuf) { 3879 char pathbuf[MAX_PATH]; 3880 if (strlen(path) > MAX_PATH - 1) { 3881 errno = ENAMETOOLONG; 3882 return -1; 3883 } 3884 hpi::native_path(strcpy(pathbuf, path)); 3885 return ::stat(pathbuf, sbuf); 3886} 3887 3888bool os::check_heap(bool force) { 3889 return true; 3890} 3891 3892int local_vsnprintf(char* buf, size_t count, const char* format, va_list args) { 3893 return ::vsnprintf(buf, count, format, args); 3894} 3895 3896// Is a (classpath) directory empty? 3897bool os::dir_is_empty(const char* path) { 3898 DIR *dir = NULL; 3899 struct dirent *ptr; 3900 3901 dir = opendir(path); 3902 if (dir == NULL) return true; 3903 3904 /* Scan the directory */ 3905 bool result = true; 3906 char buf[sizeof(struct dirent) + MAX_PATH]; 3907 while (result && (ptr = ::readdir(dir)) != NULL) { 3908 if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0) { 3909 result = false; 3910 } 3911 } 3912 closedir(dir); 3913 return result; 3914} 3915 3916// create binary file, rewriting existing file if required 3917int os::create_binary_file(const char* path, bool rewrite_existing) { 3918 int oflags = O_WRONLY | O_CREAT; 3919 if (!rewrite_existing) { 3920 oflags |= O_EXCL; 3921 } 3922 return ::open64(path, oflags, S_IREAD | S_IWRITE); 3923} 3924 3925// return current position of file pointer 3926jlong os::current_file_offset(int fd) { 3927 return (jlong)::lseek64(fd, (off64_t)0, SEEK_CUR); 3928} 3929 3930// move file pointer to the specified offset 3931jlong os::seek_to_file_offset(int fd, jlong offset) { 3932 return (jlong)::lseek64(fd, (off64_t)offset, SEEK_SET); 3933} 3934 3935// Map a block of memory. 3936char* os::map_memory(int fd, const char* file_name, size_t file_offset, 3937 char *addr, size_t bytes, bool read_only, 3938 bool allow_exec) { 3939 int prot; 3940 int flags; 3941 3942 if (read_only) { 3943 prot = PROT_READ; 3944 flags = MAP_SHARED; 3945 } else { 3946 prot = PROT_READ | PROT_WRITE; 3947 flags = MAP_PRIVATE; 3948 } 3949 3950 if (allow_exec) { 3951 prot |= PROT_EXEC; 3952 } 3953 3954 if (addr != NULL) { 3955 flags |= MAP_FIXED; 3956 } 3957 3958 char* mapped_address = (char*)mmap(addr, (size_t)bytes, prot, flags, 3959 fd, file_offset); 3960 if (mapped_address == MAP_FAILED) { 3961 return NULL; 3962 } 3963 return mapped_address; 3964} 3965 3966 3967// Remap a block of memory. 3968char* os::remap_memory(int fd, const char* file_name, size_t file_offset, 3969 char *addr, size_t bytes, bool read_only, 3970 bool allow_exec) { 3971 // same as map_memory() on this OS 3972 return os::map_memory(fd, file_name, file_offset, addr, bytes, read_only, 3973 allow_exec); 3974} 3975 3976 3977// Unmap a block of memory. 3978bool os::unmap_memory(char* addr, size_t bytes) { 3979 return munmap(addr, bytes) == 0; 3980} 3981 3982static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time); 3983 3984static clockid_t thread_cpu_clockid(Thread* thread) { 3985 pthread_t tid = thread->osthread()->pthread_id(); 3986 clockid_t clockid; 3987 3988 // Get thread clockid 3989 int rc = os::Linux::pthread_getcpuclockid(tid, &clockid); 3990 assert(rc == 0, "pthread_getcpuclockid is expected to return 0 code"); 3991 return clockid; 3992} 3993 3994// current_thread_cpu_time(bool) and thread_cpu_time(Thread*, bool) 3995// are used by JVM M&M and JVMTI to get user+sys or user CPU time 3996// of a thread. 3997// 3998// current_thread_cpu_time() and thread_cpu_time(Thread*) returns 3999// the fast estimate available on the platform. 4000 4001jlong os::current_thread_cpu_time() { 4002 if (os::Linux::supports_fast_thread_cpu_time()) { 4003 return os::Linux::fast_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID); 4004 } else { 4005 // return user + sys since the cost is the same 4006 return slow_thread_cpu_time(Thread::current(), true /* user + sys */); 4007 } 4008} 4009 4010jlong os::thread_cpu_time(Thread* thread) { 4011 // consistent with what current_thread_cpu_time() returns 4012 if (os::Linux::supports_fast_thread_cpu_time()) { 4013 return os::Linux::fast_thread_cpu_time(thread_cpu_clockid(thread)); 4014 } else { 4015 return slow_thread_cpu_time(thread, true /* user + sys */); 4016 } 4017} 4018 4019jlong os::current_thread_cpu_time(bool user_sys_cpu_time) { 4020 if (user_sys_cpu_time && os::Linux::supports_fast_thread_cpu_time()) { 4021 return os::Linux::fast_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID); 4022 } else { 4023 return slow_thread_cpu_time(Thread::current(), user_sys_cpu_time); 4024 } 4025} 4026 4027jlong os::thread_cpu_time(Thread *thread, bool user_sys_cpu_time) { 4028 if (user_sys_cpu_time && os::Linux::supports_fast_thread_cpu_time()) { 4029 return os::Linux::fast_thread_cpu_time(thread_cpu_clockid(thread)); 4030 } else { 4031 return slow_thread_cpu_time(thread, user_sys_cpu_time); 4032 } 4033} 4034 4035// 4036// -1 on error. 4037// 4038 4039static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time) { 4040 static bool proc_pid_cpu_avail = true; 4041 static bool proc_task_unchecked = true; 4042 static const char *proc_stat_path = "/proc/%d/stat"; 4043 pid_t tid = thread->osthread()->thread_id(); 4044 int i; 4045 char *s; 4046 char stat[2048]; 4047 int statlen; 4048 char proc_name[64]; 4049 int count; 4050 long sys_time, user_time; 4051 char string[64]; 4052 int idummy; 4053 long ldummy; 4054 FILE *fp; 4055 4056 // We first try accessing /proc/<pid>/cpu since this is faster to 4057 // process. If this file is not present (linux kernels 2.5 and above) 4058 // then we open /proc/<pid>/stat. 4059 if ( proc_pid_cpu_avail ) { 4060 sprintf(proc_name, "/proc/%d/cpu", tid); 4061 fp = fopen(proc_name, "r"); 4062 if ( fp != NULL ) { 4063 count = fscanf( fp, "%s %lu %lu\n", string, &user_time, &sys_time); 4064 fclose(fp); 4065 if ( count != 3 ) return -1; 4066 4067 if (user_sys_cpu_time) { 4068 return ((jlong)sys_time + (jlong)user_time) * (1000000000 / clock_tics_per_sec); 4069 } else { 4070 return (jlong)user_time * (1000000000 / clock_tics_per_sec); 4071 } 4072 } 4073 else proc_pid_cpu_avail = false; 4074 } 4075 4076 // The /proc/<tid>/stat aggregates per-process usage on 4077 // new Linux kernels 2.6+ where NPTL is supported. 4078 // The /proc/self/task/<tid>/stat still has the per-thread usage. 4079 // See bug 6328462. 4080 // There can be no directory /proc/self/task on kernels 2.4 with NPTL 4081 // and possibly in some other cases, so we check its availability. 4082 if (proc_task_unchecked && os::Linux::is_NPTL()) { 4083 // This is executed only once 4084 proc_task_unchecked = false; 4085 fp = fopen("/proc/self/task", "r"); 4086 if (fp != NULL) { 4087 proc_stat_path = "/proc/self/task/%d/stat"; 4088 fclose(fp); 4089 } 4090 } 4091 4092 sprintf(proc_name, proc_stat_path, tid); 4093 fp = fopen(proc_name, "r"); 4094 if ( fp == NULL ) return -1; 4095 statlen = fread(stat, 1, 2047, fp); 4096 stat[statlen] = '\0'; 4097 fclose(fp); 4098 4099 // Skip pid and the command string. Note that we could be dealing with 4100 // weird command names, e.g. user could decide to rename java launcher 4101 // to "java 1.4.2 :)", then the stat file would look like 4102 // 1234 (java 1.4.2 :)) R ... ... 4103 // We don't really need to know the command string, just find the last 4104 // occurrence of ")" and then start parsing from there. See bug 4726580. 4105 s = strrchr(stat, ')'); 4106 i = 0; 4107 if (s == NULL ) return -1; 4108 4109 // Skip blank chars 4110 do s++; while (isspace(*s)); 4111 4112 count = sscanf(s,"%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu", 4113 &idummy, &idummy, &idummy, &idummy, &idummy, &idummy, 4114 &ldummy, &ldummy, &ldummy, &ldummy, &ldummy, 4115 &user_time, &sys_time); 4116 if ( count != 13 ) return -1; 4117 if (user_sys_cpu_time) { 4118 return ((jlong)sys_time + (jlong)user_time) * (1000000000 / clock_tics_per_sec); 4119 } else { 4120 return (jlong)user_time * (1000000000 / clock_tics_per_sec); 4121 } 4122} 4123 4124void os::current_thread_cpu_time_info(jvmtiTimerInfo *info_ptr) { 4125 info_ptr->max_value = ALL_64_BITS; // will not wrap in less than 64 bits 4126 info_ptr->may_skip_backward = false; // elapsed time not wall time 4127 info_ptr->may_skip_forward = false; // elapsed time not wall time 4128 info_ptr->kind = JVMTI_TIMER_TOTAL_CPU; // user+system time is returned 4129} 4130 4131void os::thread_cpu_time_info(jvmtiTimerInfo *info_ptr) { 4132 info_ptr->max_value = ALL_64_BITS; // will not wrap in less than 64 bits 4133 info_ptr->may_skip_backward = false; // elapsed time not wall time 4134 info_ptr->may_skip_forward = false; // elapsed time not wall time 4135 info_ptr->kind = JVMTI_TIMER_TOTAL_CPU; // user+system time is returned 4136} 4137 4138bool os::is_thread_cpu_time_supported() { 4139 return true; 4140} 4141 4142// System loadavg support. Returns -1 if load average cannot be obtained. 4143// Linux doesn't yet have a (official) notion of processor sets, 4144// so just return the system wide load average. 4145int os::loadavg(double loadavg[], int nelem) { 4146 return ::getloadavg(loadavg, nelem); 4147} 4148 4149void os::pause() { 4150 char filename[MAX_PATH]; 4151 if (PauseAtStartupFile && PauseAtStartupFile[0]) { 4152 jio_snprintf(filename, MAX_PATH, PauseAtStartupFile); 4153 } else { 4154 jio_snprintf(filename, MAX_PATH, "./vm.paused.%d", current_process_id()); 4155 } 4156 4157 int fd = ::open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666); 4158 if (fd != -1) { 4159 struct stat buf; 4160 close(fd); 4161 while (::stat(filename, &buf) == 0) { 4162 (void)::poll(NULL, 0, 100); 4163 } 4164 } else { 4165 jio_fprintf(stderr, 4166 "Could not open pause file '%s', continuing immediately.\n", filename); 4167 } 4168} 4169 4170extern "C" { 4171 4172/** 4173 * NOTE: the following code is to keep the green threads code 4174 * in the libjava.so happy. Once the green threads is removed, 4175 * these code will no longer be needed. 4176 */ 4177int 4178jdk_waitpid(pid_t pid, int* status, int options) { 4179 return waitpid(pid, status, options); 4180} 4181 4182int 4183fork1() { 4184 return fork(); 4185} 4186 4187int 4188jdk_sem_init(sem_t *sem, int pshared, unsigned int value) { 4189 return sem_init(sem, pshared, value); 4190} 4191 4192int 4193jdk_sem_post(sem_t *sem) { 4194 return sem_post(sem); 4195} 4196 4197int 4198jdk_sem_wait(sem_t *sem) { 4199 return sem_wait(sem); 4200} 4201 4202int 4203jdk_pthread_sigmask(int how , const sigset_t* newmask, sigset_t* oldmask) { 4204 return pthread_sigmask(how , newmask, oldmask); 4205} 4206 4207} 4208 4209// Refer to the comments in os_solaris.cpp park-unpark. 4210// 4211// Beware -- Some versions of NPTL embody a flaw where pthread_cond_timedwait() can 4212// hang indefinitely. For instance NPTL 0.60 on 2.4.21-4ELsmp is vulnerable. 4213// For specifics regarding the bug see GLIBC BUGID 261237 : 4214// http://www.mail-archive.com/debian-glibc@lists.debian.org/msg10837.html. 4215// Briefly, pthread_cond_timedwait() calls with an expiry time that's not in the future 4216// will either hang or corrupt the condvar, resulting in subsequent hangs if the condvar 4217// is used. (The simple C test-case provided in the GLIBC bug report manifests the 4218// hang). The JVM is vulernable via sleep(), Object.wait(timo), LockSupport.parkNanos() 4219// and monitorenter when we're using 1-0 locking. All those operations may result in 4220// calls to pthread_cond_timedwait(). Using LD_ASSUME_KERNEL to use an older version 4221// of libpthread avoids the problem, but isn't practical. 4222// 4223// Possible remedies: 4224// 4225// 1. Establish a minimum relative wait time. 50 to 100 msecs seems to work. 4226// This is palliative and probabilistic, however. If the thread is preempted 4227// between the call to compute_abstime() and pthread_cond_timedwait(), more 4228// than the minimum period may have passed, and the abstime may be stale (in the 4229// past) resultin in a hang. Using this technique reduces the odds of a hang 4230// but the JVM is still vulnerable, particularly on heavily loaded systems. 4231// 4232// 2. Modify park-unpark to use per-thread (per ParkEvent) pipe-pairs instead 4233// of the usual flag-condvar-mutex idiom. The write side of the pipe is set 4234// NDELAY. unpark() reduces to write(), park() reduces to read() and park(timo) 4235// reduces to poll()+read(). This works well, but consumes 2 FDs per extant 4236// thread. 4237// 4238// 3. Embargo pthread_cond_timedwait() and implement a native "chron" thread 4239// that manages timeouts. We'd emulate pthread_cond_timedwait() by enqueuing 4240// a timeout request to the chron thread and then blocking via pthread_cond_wait(). 4241// This also works well. In fact it avoids kernel-level scalability impediments 4242// on certain platforms that don't handle lots of active pthread_cond_timedwait() 4243// timers in a graceful fashion. 4244// 4245// 4. When the abstime value is in the past it appears that control returns 4246// correctly from pthread_cond_timedwait(), but the condvar is left corrupt. 4247// Subsequent timedwait/wait calls may hang indefinitely. Given that, we 4248// can avoid the problem by reinitializing the condvar -- by cond_destroy() 4249// followed by cond_init() -- after all calls to pthread_cond_timedwait(). 4250// It may be possible to avoid reinitialization by checking the return 4251// value from pthread_cond_timedwait(). In addition to reinitializing the 4252// condvar we must establish the invariant that cond_signal() is only called 4253// within critical sections protected by the adjunct mutex. This prevents 4254// cond_signal() from "seeing" a condvar that's in the midst of being 4255// reinitialized or that is corrupt. Sadly, this invariant obviates the 4256// desirable signal-after-unlock optimization that avoids futile context switching. 4257// 4258// I'm also concerned that some versions of NTPL might allocate an auxilliary 4259// structure when a condvar is used or initialized. cond_destroy() would 4260// release the helper structure. Our reinitialize-after-timedwait fix 4261// put excessive stress on malloc/free and locks protecting the c-heap. 4262// 4263// We currently use (4). See the WorkAroundNTPLTimedWaitHang flag. 4264// It may be possible to refine (4) by checking the kernel and NTPL verisons 4265// and only enabling the work-around for vulnerable environments. 4266 4267// utility to compute the abstime argument to timedwait: 4268// millis is the relative timeout time 4269// abstime will be the absolute timeout time 4270// TODO: replace compute_abstime() with unpackTime() 4271 4272static struct timespec* compute_abstime(timespec* abstime, jlong millis) { 4273 if (millis < 0) millis = 0; 4274 struct timeval now; 4275 int status = gettimeofday(&now, NULL); 4276 assert(status == 0, "gettimeofday"); 4277 jlong seconds = millis / 1000; 4278 millis %= 1000; 4279 if (seconds > 50000000) { // see man cond_timedwait(3T) 4280 seconds = 50000000; 4281 } 4282 abstime->tv_sec = now.tv_sec + seconds; 4283 long usec = now.tv_usec + millis * 1000; 4284 if (usec >= 1000000) { 4285 abstime->tv_sec += 1; 4286 usec -= 1000000; 4287 } 4288 abstime->tv_nsec = usec * 1000; 4289 return abstime; 4290} 4291 4292 4293// Test-and-clear _Event, always leaves _Event set to 0, returns immediately. 4294// Conceptually TryPark() should be equivalent to park(0). 4295 4296int os::PlatformEvent::TryPark() { 4297 for (;;) { 4298 const int v = _Event ; 4299 guarantee ((v == 0) || (v == 1), "invariant") ; 4300 if (Atomic::cmpxchg (0, &_Event, v) == v) return v ; 4301 } 4302} 4303 4304void os::PlatformEvent::park() { // AKA "down()" 4305 // Invariant: Only the thread associated with the Event/PlatformEvent 4306 // may call park(). 4307 // TODO: assert that _Assoc != NULL or _Assoc == Self 4308 int v ; 4309 for (;;) { 4310 v = _Event ; 4311 if (Atomic::cmpxchg (v-1, &_Event, v) == v) break ; 4312 } 4313 guarantee (v >= 0, "invariant") ; 4314 if (v == 0) { 4315 // Do this the hard way by blocking ... 4316 int status = pthread_mutex_lock(_mutex); 4317 assert_status(status == 0, status, "mutex_lock"); 4318 guarantee (_nParked == 0, "invariant") ; 4319 ++ _nParked ; 4320 while (_Event < 0) { 4321 status = pthread_cond_wait(_cond, _mutex); 4322 // for some reason, under 2.7 lwp_cond_wait() may return ETIME ... 4323 // Treat this the same as if the wait was interrupted 4324 if (status == ETIME) { status = EINTR; } 4325 assert_status(status == 0 || status == EINTR, status, "cond_wait"); 4326 } 4327 -- _nParked ; 4328 4329 // In theory we could move the ST of 0 into _Event past the unlock(), 4330 // but then we'd need a MEMBAR after the ST. 4331 _Event = 0 ; 4332 status = pthread_mutex_unlock(_mutex); 4333 assert_status(status == 0, status, "mutex_unlock"); 4334 } 4335 guarantee (_Event >= 0, "invariant") ; 4336} 4337 4338int os::PlatformEvent::park(jlong millis) { 4339 guarantee (_nParked == 0, "invariant") ; 4340 4341 int v ; 4342 for (;;) { 4343 v = _Event ; 4344 if (Atomic::cmpxchg (v-1, &_Event, v) == v) break ; 4345 } 4346 guarantee (v >= 0, "invariant") ; 4347 if (v != 0) return OS_OK ; 4348 4349 // We do this the hard way, by blocking the thread. 4350 // Consider enforcing a minimum timeout value. 4351 struct timespec abst; 4352 compute_abstime(&abst, millis); 4353 4354 int ret = OS_TIMEOUT; 4355 int status = pthread_mutex_lock(_mutex); 4356 assert_status(status == 0, status, "mutex_lock"); 4357 guarantee (_nParked == 0, "invariant") ; 4358 ++_nParked ; 4359 4360 // Object.wait(timo) will return because of 4361 // (a) notification 4362 // (b) timeout 4363 // (c) thread.interrupt 4364 // 4365 // Thread.interrupt and object.notify{All} both call Event::set. 4366 // That is, we treat thread.interrupt as a special case of notification. 4367 // The underlying Solaris implementation, cond_timedwait, admits 4368 // spurious/premature wakeups, but the JLS/JVM spec prevents the 4369 // JVM from making those visible to Java code. As such, we must 4370 // filter out spurious wakeups. We assume all ETIME returns are valid. 4371 // 4372 // TODO: properly differentiate simultaneous notify+interrupt. 4373 // In that case, we should propagate the notify to another waiter. 4374 4375 while (_Event < 0) { 4376 status = os::Linux::safe_cond_timedwait(_cond, _mutex, &abst); 4377 if (status != 0 && WorkAroundNPTLTimedWaitHang) { 4378 pthread_cond_destroy (_cond); 4379 pthread_cond_init (_cond, NULL) ; 4380 } 4381 assert_status(status == 0 || status == EINTR || 4382 status == ETIME || status == ETIMEDOUT, 4383 status, "cond_timedwait"); 4384 if (!FilterSpuriousWakeups) break ; // previous semantics 4385 if (status == ETIME || status == ETIMEDOUT) break ; 4386 // We consume and ignore EINTR and spurious wakeups. 4387 } 4388 --_nParked ; 4389 if (_Event >= 0) { 4390 ret = OS_OK; 4391 } 4392 _Event = 0 ; 4393 status = pthread_mutex_unlock(_mutex); 4394 assert_status(status == 0, status, "mutex_unlock"); 4395 assert (_nParked == 0, "invariant") ; 4396 return ret; 4397} 4398 4399void os::PlatformEvent::unpark() { 4400 int v, AnyWaiters ; 4401 for (;;) { 4402 v = _Event ; 4403 if (v > 0) { 4404 // The LD of _Event could have reordered or be satisfied 4405 // by a read-aside from this processor's write buffer. 4406 // To avoid problems execute a barrier and then 4407 // ratify the value. 4408 OrderAccess::fence() ; 4409 if (_Event == v) return ; 4410 continue ; 4411 } 4412 if (Atomic::cmpxchg (v+1, &_Event, v) == v) break ; 4413 } 4414 if (v < 0) { 4415 // Wait for the thread associated with the event to vacate 4416 int status = pthread_mutex_lock(_mutex); 4417 assert_status(status == 0, status, "mutex_lock"); 4418 AnyWaiters = _nParked ; 4419 assert (AnyWaiters == 0 || AnyWaiters == 1, "invariant") ; 4420 if (AnyWaiters != 0 && WorkAroundNPTLTimedWaitHang) { 4421 AnyWaiters = 0 ; 4422 pthread_cond_signal (_cond); 4423 } 4424 status = pthread_mutex_unlock(_mutex); 4425 assert_status(status == 0, status, "mutex_unlock"); 4426 if (AnyWaiters != 0) { 4427 status = pthread_cond_signal(_cond); 4428 assert_status(status == 0, status, "cond_signal"); 4429 } 4430 } 4431 4432 // Note that we signal() _after dropping the lock for "immortal" Events. 4433 // This is safe and avoids a common class of futile wakeups. In rare 4434 // circumstances this can cause a thread to return prematurely from 4435 // cond_{timed}wait() but the spurious wakeup is benign and the victim will 4436 // simply re-test the condition and re-park itself. 4437} 4438 4439 4440// JSR166 4441// ------------------------------------------------------- 4442 4443/* 4444 * The solaris and linux implementations of park/unpark are fairly 4445 * conservative for now, but can be improved. They currently use a 4446 * mutex/condvar pair, plus a a count. 4447 * Park decrements count if > 0, else does a condvar wait. Unpark 4448 * sets count to 1 and signals condvar. Only one thread ever waits 4449 * on the condvar. Contention seen when trying to park implies that someone 4450 * is unparking you, so don't wait. And spurious returns are fine, so there 4451 * is no need to track notifications. 4452 */ 4453 4454 4455#define NANOSECS_PER_SEC 1000000000 4456#define NANOSECS_PER_MILLISEC 1000000 4457#define MAX_SECS 100000000 4458/* 4459 * This code is common to linux and solaris and will be moved to a 4460 * common place in dolphin. 4461 * 4462 * The passed in time value is either a relative time in nanoseconds 4463 * or an absolute time in milliseconds. Either way it has to be unpacked 4464 * into suitable seconds and nanoseconds components and stored in the 4465 * given timespec structure. 4466 * Given time is a 64-bit value and the time_t used in the timespec is only 4467 * a signed-32-bit value (except on 64-bit Linux) we have to watch for 4468 * overflow if times way in the future are given. Further on Solaris versions 4469 * prior to 10 there is a restriction (see cond_timedwait) that the specified 4470 * number of seconds, in abstime, is less than current_time + 100,000,000. 4471 * As it will be 28 years before "now + 100000000" will overflow we can 4472 * ignore overflow and just impose a hard-limit on seconds using the value 4473 * of "now + 100,000,000". This places a limit on the timeout of about 3.17 4474 * years from "now". 4475 */ 4476 4477static void unpackTime(timespec* absTime, bool isAbsolute, jlong time) { 4478 assert (time > 0, "convertTime"); 4479 4480 struct timeval now; 4481 int status = gettimeofday(&now, NULL); 4482 assert(status == 0, "gettimeofday"); 4483 4484 time_t max_secs = now.tv_sec + MAX_SECS; 4485 4486 if (isAbsolute) { 4487 jlong secs = time / 1000; 4488 if (secs > max_secs) { 4489 absTime->tv_sec = max_secs; 4490 } 4491 else { 4492 absTime->tv_sec = secs; 4493 } 4494 absTime->tv_nsec = (time % 1000) * NANOSECS_PER_MILLISEC; 4495 } 4496 else { 4497 jlong secs = time / NANOSECS_PER_SEC; 4498 if (secs >= MAX_SECS) { 4499 absTime->tv_sec = max_secs; 4500 absTime->tv_nsec = 0; 4501 } 4502 else { 4503 absTime->tv_sec = now.tv_sec + secs; 4504 absTime->tv_nsec = (time % NANOSECS_PER_SEC) + now.tv_usec*1000; 4505 if (absTime->tv_nsec >= NANOSECS_PER_SEC) { 4506 absTime->tv_nsec -= NANOSECS_PER_SEC; 4507 ++absTime->tv_sec; // note: this must be <= max_secs 4508 } 4509 } 4510 } 4511 assert(absTime->tv_sec >= 0, "tv_sec < 0"); 4512 assert(absTime->tv_sec <= max_secs, "tv_sec > max_secs"); 4513 assert(absTime->tv_nsec >= 0, "tv_nsec < 0"); 4514 assert(absTime->tv_nsec < NANOSECS_PER_SEC, "tv_nsec >= nanos_per_sec"); 4515} 4516 4517void Parker::park(bool isAbsolute, jlong time) { 4518 // Optional fast-path check: 4519 // Return immediately if a permit is available. 4520 if (_counter > 0) { 4521 _counter = 0 ; 4522 return ; 4523 } 4524 4525 Thread* thread = Thread::current(); 4526 assert(thread->is_Java_thread(), "Must be JavaThread"); 4527 JavaThread *jt = (JavaThread *)thread; 4528 4529 // Optional optimization -- avoid state transitions if there's an interrupt pending. 4530 // Check interrupt before trying to wait 4531 if (Thread::is_interrupted(thread, false)) { 4532 return; 4533 } 4534 4535 // Next, demultiplex/decode time arguments 4536 timespec absTime; 4537 if (time < 0) { // don't wait at all 4538 return; 4539 } 4540 if (time > 0) { 4541 unpackTime(&absTime, isAbsolute, time); 4542 } 4543 4544 4545 // Enter safepoint region 4546 // Beware of deadlocks such as 6317397. 4547 // The per-thread Parker:: mutex is a classic leaf-lock. 4548 // In particular a thread must never block on the Threads_lock while 4549 // holding the Parker:: mutex. If safepoints are pending both the 4550 // the ThreadBlockInVM() CTOR and DTOR may grab Threads_lock. 4551 ThreadBlockInVM tbivm(jt); 4552 4553 // Don't wait if cannot get lock since interference arises from 4554 // unblocking. Also. check interrupt before trying wait 4555 if (Thread::is_interrupted(thread, false) || pthread_mutex_trylock(_mutex) != 0) { 4556 return; 4557 } 4558 4559 int status ; 4560 if (_counter > 0) { // no wait needed 4561 _counter = 0; 4562 status = pthread_mutex_unlock(_mutex); 4563 assert (status == 0, "invariant") ; 4564 return; 4565 } 4566 4567#ifdef ASSERT 4568 // Don't catch signals while blocked; let the running threads have the signals. 4569 // (This allows a debugger to break into the running thread.) 4570 sigset_t oldsigs; 4571 sigset_t* allowdebug_blocked = os::Linux::allowdebug_blocked_signals(); 4572 pthread_sigmask(SIG_BLOCK, allowdebug_blocked, &oldsigs); 4573#endif 4574 4575 OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */); 4576 jt->set_suspend_equivalent(); 4577 // cleared by handle_special_suspend_equivalent_condition() or java_suspend_self() 4578 4579 if (time == 0) { 4580 status = pthread_cond_wait (_cond, _mutex) ; 4581 } else { 4582 status = os::Linux::safe_cond_timedwait (_cond, _mutex, &absTime) ; 4583 if (status != 0 && WorkAroundNPTLTimedWaitHang) { 4584 pthread_cond_destroy (_cond) ; 4585 pthread_cond_init (_cond, NULL); 4586 } 4587 } 4588 assert_status(status == 0 || status == EINTR || 4589 status == ETIME || status == ETIMEDOUT, 4590 status, "cond_timedwait"); 4591 4592#ifdef ASSERT 4593 pthread_sigmask(SIG_SETMASK, &oldsigs, NULL); 4594#endif 4595 4596 _counter = 0 ; 4597 status = pthread_mutex_unlock(_mutex) ; 4598 assert_status(status == 0, status, "invariant") ; 4599 // If externally suspended while waiting, re-suspend 4600 if (jt->handle_special_suspend_equivalent_condition()) { 4601 jt->java_suspend_self(); 4602 } 4603 4604} 4605 4606void Parker::unpark() { 4607 int s, status ; 4608 status = pthread_mutex_lock(_mutex); 4609 assert (status == 0, "invariant") ; 4610 s = _counter; 4611 _counter = 1; 4612 if (s < 1) { 4613 if (WorkAroundNPTLTimedWaitHang) { 4614 status = pthread_cond_signal (_cond) ; 4615 assert (status == 0, "invariant") ; 4616 status = pthread_mutex_unlock(_mutex); 4617 assert (status == 0, "invariant") ; 4618 } else { 4619 status = pthread_mutex_unlock(_mutex); 4620 assert (status == 0, "invariant") ; 4621 status = pthread_cond_signal (_cond) ; 4622 assert (status == 0, "invariant") ; 4623 } 4624 } else { 4625 pthread_mutex_unlock(_mutex); 4626 assert (status == 0, "invariant") ; 4627 } 4628} 4629 4630 4631extern char** environ; 4632 4633#ifndef __NR_fork 4634#define __NR_fork IA32_ONLY(2) IA64_ONLY(not defined) AMD64_ONLY(57) 4635#endif 4636 4637#ifndef __NR_execve 4638#define __NR_execve IA32_ONLY(11) IA64_ONLY(1033) AMD64_ONLY(59) 4639#endif 4640 4641// Run the specified command in a separate process. Return its exit value, 4642// or -1 on failure (e.g. can't fork a new process). 4643// Unlike system(), this function can be called from signal handler. It 4644// doesn't block SIGINT et al. 4645int os::fork_and_exec(char* cmd) { 4646 char * argv[4]; 4647 argv[0] = "sh"; 4648 argv[1] = "-c"; 4649 argv[2] = cmd; 4650 argv[3] = NULL; 4651 4652 // fork() in LinuxThreads/NPTL is not async-safe. It needs to run 4653 // pthread_atfork handlers and reset pthread library. All we need is a 4654 // separate process to execve. Make a direct syscall to fork process. 4655 // On IA64 there's no fork syscall, we have to use fork() and hope for 4656 // the best... 4657 pid_t pid = NOT_IA64(syscall(__NR_fork);) 4658 IA64_ONLY(fork();) 4659 4660 if (pid < 0) { 4661 // fork failed 4662 return -1; 4663 4664 } else if (pid == 0) { 4665 // child process 4666 4667 // execve() in LinuxThreads will call pthread_kill_other_threads_np() 4668 // first to kill every thread on the thread list. Because this list is 4669 // not reset by fork() (see notes above), execve() will instead kill 4670 // every thread in the parent process. We know this is the only thread 4671 // in the new process, so make a system call directly. 4672 // IA64 should use normal execve() from glibc to match the glibc fork() 4673 // above. 4674 NOT_IA64(syscall(__NR_execve, "/bin/sh", argv, environ);) 4675 IA64_ONLY(execve("/bin/sh", argv, environ);) 4676 4677 // execve failed 4678 _exit(-1); 4679 4680 } else { 4681 // copied from J2SE ..._waitForProcessExit() in UNIXProcess_md.c; we don't 4682 // care about the actual exit code, for now. 4683 4684 int status; 4685 4686 // Wait for the child process to exit. This returns immediately if 4687 // the child has already exited. */ 4688 while (waitpid(pid, &status, 0) < 0) { 4689 switch (errno) { 4690 case ECHILD: return 0; 4691 case EINTR: break; 4692 default: return -1; 4693 } 4694 } 4695 4696 if (WIFEXITED(status)) { 4697 // The child exited normally; get its exit code. 4698 return WEXITSTATUS(status); 4699 } else if (WIFSIGNALED(status)) { 4700 // The child exited because of a signal 4701 // The best value to return is 0x80 + signal number, 4702 // because that is what all Unix shells do, and because 4703 // it allows callers to distinguish between process exit and 4704 // process death by signal. 4705 return 0x80 + WTERMSIG(status); 4706 } else { 4707 // Unknown exit code; pass it through 4708 return status; 4709 } 4710 } 4711} 4712