1/* Copyright (C) 2021 Free Software Foundation, Inc. 2 Contributed by Oracle. 3 4 This file is part of GNU Binutils. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, 51 Franklin Street - Fifth Floor, Boston, 19 MA 02110-1301, USA. */ 20 21#include "config.h" 22#include <alloca.h> 23#include <dlfcn.h> 24#include <stdlib.h> 25#include <signal.h> 26#include <unistd.h> 27#include <pthread.h> 28 29#include "gp-defs.h" 30#include "collector.h" 31#include "gp-experiment.h" 32#include "memmgr.h" 33#include "tsd.h" 34 35/* Get dynamic module interface*/ 36#include "collector_module.h" 37 38/* Get definitions for SP_LEAF_CHECK_MARKER, SP_TRUNC_STACK_MARKER */ 39#include "data_pckts.h" 40 41#if ARCH(SPARC) 42struct frame 43{ 44 long fr_local[8]; /* saved locals */ 45 long fr_arg[6]; /* saved arguments [0 - 5] */ 46 struct frame *fr_savfp; /* saved frame pointer */ 47 long fr_savpc; /* saved program counter */ 48#if WSIZE(32) 49 char *fr_stret; /* struct return addr */ 50#endif 51 long fr_argd[6]; /* arg dump area */ 52 long fr_argx[1]; /* array of args past the sixth */ 53}; 54 55#elif ARCH(Intel) 56struct frame 57{ 58 unsigned long fr_savfp; 59 unsigned long fr_savpc; 60}; 61#endif 62 63/* Set the debug trace level */ 64#define DBG_LT0 0 65#define DBG_LT1 1 66#define DBG_LT2 2 67#define DBG_LT3 3 68 69int (*__collector_VM_ReadByteInstruction)(unsigned char *) = NULL; 70#define VM_NO_ACCESS (-1) 71#define VM_NOT_VM_MEMORY (-2) 72#define VM_NOT_X_SEGMENT (-3) 73 74#define isInside(p, bgn, end) ((p) >= (bgn) && (p) < (end)) 75 76/* 77 * Weed through all the arch dependent stuff to get the right definition 78 * for 'pc' in the ucontext structure. The system header files are mess 79 * dealing with all the arch (just look for PC, R_PC, REG_PC). 80 * 81 */ 82 83#if ARCH(SPARC) 84 85#define IN_BARRIER(x) \ 86 ( barrier_hdl && \ 87 (unsigned long)x >= barrier_hdl && \ 88 (unsigned long)x < barrier_hdlx ) 89static unsigned long barrier_hdl = 0; 90static unsigned long barrier_hdlx = 0; 91 92#if WSIZE(64) 93#define STACK_BIAS 2047 94#define IN_TRAP_HANDLER(x) \ 95 ( misalign_hdl && \ 96 (unsigned long)x >= misalign_hdl && \ 97 (unsigned long)x < misalign_hdlx ) 98static unsigned long misalign_hdl = 0; 99static unsigned long misalign_hdlx = 0; 100#elif WSIZE(32) 101#define STACK_BIAS 0 102#endif 103 104#if WSIZE(64) 105#define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[(reg)]) 106#define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_O6]) 107#define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_PC]) 108#else 109#define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.gregs[(reg)]) 110#define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_O6]) 111#define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_PC]) 112#endif 113 114#elif ARCH(Intel) 115#include "opcodes/disassemble.h" 116 117static int 118fprintf_func (void *arg ATTRIBUTE_UNUSED, const char *fmt ATTRIBUTE_UNUSED, ...) 119{ 120 return 0; 121} 122 123static int 124fprintf_styled_func (void *arg ATTRIBUTE_UNUSED, 125 enum disassembler_style st ATTRIBUTE_UNUSED, 126 const char *fmt ATTRIBUTE_UNUSED, ...) 127{ 128 return 0; 129} 130 131/* Get LENGTH bytes from info's buffer, at target address memaddr. 132 Transfer them to myaddr. */ 133static int 134read_memory_func (bfd_vma memaddr, bfd_byte *myaddr, unsigned int length, 135 disassemble_info *info) 136{ 137 unsigned int opb = info->octets_per_byte; 138 size_t end_addr_offset = length / opb; 139 size_t max_addr_offset = info->buffer_length / opb; 140 size_t octets = (memaddr - info->buffer_vma) * opb; 141 if (memaddr < info->buffer_vma 142 || memaddr - info->buffer_vma > max_addr_offset 143 || memaddr - info->buffer_vma + end_addr_offset > max_addr_offset 144 || (info->stop_vma && (memaddr >= info->stop_vma 145 || memaddr + end_addr_offset > info->stop_vma))) 146 return -1; 147 memcpy (myaddr, info->buffer + octets, length); 148 return 0; 149} 150 151static void 152print_address_func (bfd_vma addr ATTRIBUTE_UNUSED, 153 disassemble_info *info ATTRIBUTE_UNUSED) { } 154 155static asymbol * 156symbol_at_address_func (bfd_vma addr ATTRIBUTE_UNUSED, 157 disassemble_info *info ATTRIBUTE_UNUSED) 158{ 159 return NULL; 160} 161 162static bfd_boolean 163symbol_is_valid (asymbol *sym ATTRIBUTE_UNUSED, 164 disassemble_info *info ATTRIBUTE_UNUSED) 165{ 166 return TRUE; 167} 168 169static void 170memory_error_func (int status ATTRIBUTE_UNUSED, bfd_vma addr ATTRIBUTE_UNUSED, 171 disassemble_info *info ATTRIBUTE_UNUSED) { } 172 173 174#if WSIZE(32) 175#define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EIP]) 176#define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_ESP]) 177#define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EBP]) 178 179#elif WSIZE(64) 180#define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP]) 181#define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RSP]) 182#define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RBP]) 183#endif /* WSIZE() */ 184 185#elif ARCH(Aarch64) 186#define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[15]) 187#define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[13]) 188#define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[14]) 189#endif /* ARCH() */ 190 191/* 192 * FILL_CONTEXT() for all platforms 193 * Could use getcontext() except: 194 * - it's not guaranteed to be async signal safe 195 * - it's a system call and not that lightweight 196 * - it's not portable as of POSIX.1-2008 197 * So we just use low-level mechanisms to fill in the few fields we need. 198 */ 199#if ARCH(SPARC) 200#if WSIZE(32) 201#define FILL_CONTEXT(context) \ 202 { \ 203 greg_t fp; \ 204 __asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \ 205 __asm__ __volatile__( "ta 3" ); \ 206 GET_SP(context) = fp; \ 207 GET_PC(context) = (greg_t)0; \ 208 } 209 210#elif WSIZE(64) 211#define FILL_CONTEXT(context) \ 212 { \ 213 greg_t fp; \ 214 __asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \ 215 __asm__ __volatile__( "flushw" ); \ 216 GET_SP(context) = fp; \ 217 GET_PC(context) = (greg_t)0; \ 218 } 219#endif /* WSIZE() */ 220 221#elif ARCH(Intel) 222#define FILL_CONTEXT(context) \ 223 { \ 224 context->uc_link = NULL; \ 225 void *sp = __collector_getsp(); \ 226 GET_SP(context) = (intptr_t)sp; \ 227 GET_FP(context) = (intptr_t)__collector_getfp(); \ 228 GET_PC(context) = (intptr_t)__collector_getpc(); \ 229 context->uc_stack.ss_sp = sp; \ 230 context->uc_stack.ss_size = 0x100000; \ 231 } 232 233#elif ARCH(Aarch64) 234#define FILL_CONTEXT(context) \ 235 { getcontext(context); \ 236 context->uc_mcontext.sp = (__u64) __builtin_frame_address(0); \ 237 } 238 239#endif /* ARCH() */ 240 241static int 242getByteInstruction (unsigned char *p) 243{ 244 if (__collector_VM_ReadByteInstruction) 245 { 246 int v = __collector_VM_ReadByteInstruction (p); 247 if (v != VM_NOT_VM_MEMORY) 248 return v; 249 } 250 return *p; 251} 252 253struct DataHandle *dhndl = NULL; 254 255static unsigned unwind_key = COLLECTOR_TSD_INVALID_KEY; 256 257/* To support two OpenMP API's we use a pointer 258 * to the actual function. 259 */ 260int (*__collector_omp_stack_trace)(char*, int, hrtime_t, void*) = NULL; 261int (*__collector_mpi_stack_trace)(char*, int, hrtime_t) = NULL; 262 263#define DEFAULT_MAX_NFRAMES 256 264static int max_native_nframes = DEFAULT_MAX_NFRAMES; 265static int max_java_nframes = DEFAULT_MAX_NFRAMES; 266 267#define NATIVE_FRAME_BYTES(nframes) ( ((nframes)+1) * sizeof(long) ) 268#define JAVA_FRAME_BYTES(nframes) ( ((nframes)+1) * sizeof(long) * 2 + 16 ) 269#define OVERHEAD_BYTES ( 2 * sizeof(long) + 2 * sizeof(Stack_info) ) 270 271#define ROOT_UID 801425552975190205ULL 272#define ROOT_UID_INV 92251691606677ULL 273#define ROOT_IDX 13907816567264074199ULL 274#define ROOT_IDX_INV 2075111ULL 275#define UIDTableSize 1048576 276static volatile uint64_t *UIDTable = NULL; 277static volatile int seen_omp = 0; 278 279static int stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode); 280static FrameInfo compute_uid (Frame_packet *frp); 281static int omp_no_walk = 0; 282 283#if ARCH(Intel) 284#define ValTableSize 1048576 285#define OmpValTableSize 65536 286static unsigned long *AddrTable_RA_FROMFP = NULL; // Cache for RA_FROMFP pcs 287static unsigned long *AddrTable_RA_EOSTCK = NULL; // Cache for RA_EOSTCK pcs 288static struct WalkContext *OmpCurCtxs = NULL; 289static struct WalkContext *OmpCtxs = NULL; 290static uint32_t *OmpVals = NULL; 291static unsigned long *OmpRAs = NULL; 292static unsigned long adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend); 293static int parse_x86_AVX_instruction (unsigned char *pc); 294 295struct WalkContext 296{ 297 unsigned long pc; 298 unsigned long sp; 299 unsigned long fp; 300 unsigned long ln; 301 unsigned long sbase; /* stack boundary */ 302 unsigned long tbgn; /* current memory segment start */ 303 unsigned long tend; /* current memory segment end */ 304}; 305#endif 306 307#if defined(DEBUG) && ARCH(Intel) 308#include <execinfo.h> 309 310static void 311dump_stack (int nline) 312{ 313 if ((__collector_tracelevel & SP_DUMP_STACK) == 0) 314 return; 315 316 enum Constexpr { MAX_SIZE = 1024 }; 317 void *array[MAX_SIZE]; 318 size_t sz = backtrace (array, MAX_SIZE); 319 char **strings = backtrace_symbols (array, sz); 320 DprintfT (SP_DUMP_STACK, "\ndump_stack: %d size=%d\n", nline, (int) sz); 321 for (int i = 0; i < sz; i++) 322 DprintfT (SP_DUMP_STACK, " %3d: %p %s\n", i, array[i], 323 strings[i] ? strings[i] : "???"); 324} 325 326#define dump_targets(nline, ntrg, targets) \ 327 if ((__collector_tracelevel & SP_DUMP_UNWIND) != 0) \ 328 for(int i = 0; i < ntrg; i++) \ 329 DprintfT (SP_DUMP_UNWIND, " %2d: 0x%lx\n", i, (long) targets[i]) 330#else 331#define dump_stack(x) 332#define dump_targets(nline, ntrg, targets) 333#endif 334 335void 336__collector_ext_unwind_key_init (int isPthread, void * stack) 337{ 338 void * ptr = __collector_tsd_get_by_key (unwind_key); 339 if (ptr == NULL) 340 { 341 TprintfT (DBG_LT2, "__collector_ext_unwind_key_init: cannot get tsd\n"); 342 return; 343 } 344 if (isPthread) 345 { 346 size_t stack_size = 0; 347 void *stack_addr = 0; 348 pthread_t pthread = pthread_self (); 349 pthread_attr_t attr; 350 int err = pthread_getattr_np (pthread, &attr); 351 TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: pthread: 0x%lx err: %d\n", pthread, err); 352 if (err == 0) 353 { 354 err = pthread_attr_getstack (&attr, &stack_addr, &stack_size); 355 if (err == 0) 356 stack_addr = (char*) stack_addr + stack_size; 357 TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: stack_size=0x%lx eos=%p err=%d\n", 358 (long) stack_size, stack_addr, err); 359 err = pthread_attr_destroy (&attr); 360 TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: destroy: %d\n", err); 361 } 362 *(void**) ptr = stack_addr; 363 } 364 else 365 *(void**) ptr = stack; // cloned thread 366} 367 368void 369__collector_ext_unwind_init (int record) 370{ 371 int sz = UIDTableSize * sizeof (*UIDTable); 372 UIDTable = (uint64_t*) __collector_allocCSize (__collector_heap, sz, 1); 373 if (UIDTable == NULL) 374 { 375 __collector_terminate_expt (); 376 return; 377 } 378 CALL_UTIL (memset)((void*) UIDTable, 0, sz); 379 380 char *str = CALL_UTIL (getenv)("GPROFNG_JAVA_MAX_CALL_STACK_DEPTH"); 381 if (str != NULL && *str != 0) 382 { 383 char *endptr; 384 int n = CALL_UTIL (strtol)(str, &endptr, 0); 385 if (endptr != str && n >= 0) 386 { 387 if (n < 5) 388 n = 5; 389 if (n > MAX_STACKDEPTH) 390 n = MAX_STACKDEPTH; 391 max_java_nframes = n; 392 } 393 } 394 395 str = CALL_UTIL (getenv)("GPROFNG_MAX_CALL_STACK_DEPTH"); 396 if (str != NULL && *str != 0) 397 { 398 char *endptr = str; 399 int n = CALL_UTIL (strtol)(str, &endptr, 0); 400 if (endptr != str && n >= 0) 401 { 402 if (n < 5) 403 n = 5; 404 if (n > MAX_STACKDEPTH) 405 n = MAX_STACKDEPTH; 406 max_native_nframes = n; 407 } 408 } 409 410 TprintfT (DBG_LT0, "GPROFNG_MAX_CALL_STACK_DEPTH=%d GPROFNG_JAVA_MAX_CALL_STACK_DEPTH=%d\n", 411 max_native_nframes, max_java_nframes); 412 omp_no_walk = 1; 413 414 if (__collector_VM_ReadByteInstruction == NULL) 415 __collector_VM_ReadByteInstruction = (int(*)()) dlsym (RTLD_DEFAULT, "Async_VM_ReadByteInstruction"); 416 417#if ARCH(SPARC) 418#if WSIZE(64) 419 misalign_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler"); 420 misalign_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler_end"); 421 if (misalign_hdlx == 0) 422 misalign_hdlx = misalign_hdl + 292; 423 barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_"); 424 barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_"); 425 if (barrier_hdlx == 0) 426 barrier_hdl = 0; 427#else 428 barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_"); 429 barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_"); 430 if (barrier_hdlx == 0) 431 barrier_hdl = 0; 432#endif /* WSIZE() */ 433 434#elif ARCH(Intel) 435 sz = ValTableSize * sizeof (*AddrTable_RA_FROMFP); 436 AddrTable_RA_FROMFP = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1); 437 sz = ValTableSize * sizeof (*AddrTable_RA_EOSTCK); 438 AddrTable_RA_EOSTCK = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1); 439 if (omp_no_walk && (__collector_omp_stack_trace != NULL || __collector_mpi_stack_trace != NULL)) 440 { 441 sz = OmpValTableSize * sizeof (*OmpCurCtxs); 442 OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1); 443 sz = OmpValTableSize * sizeof (*OmpCtxs); 444 OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1); 445 sz = OmpValTableSize * sizeof (*OmpVals); 446 OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1); 447 sz = OmpValTableSize * sizeof (*OmpRAs); 448 OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1); 449 if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL) 450 { 451 TprintfT (0, "unwind_init() ERROR: failed; terminating experiment\n"); 452 __collector_terminate_expt (); 453 return; 454 } 455 } 456#endif /* ARCH() */ 457 458 if (record) 459 { 460 dhndl = __collector_create_handle (SP_FRINFO_FILE); 461 __collector_log_write ("<%s name=\"%s\" format=\"binary\"/>\n", SP_TAG_DATAPTR, SP_FRINFO_FILE); 462 } 463 464 unwind_key = __collector_tsd_create_key (sizeof (void*), NULL, NULL); 465 if (unwind_key == COLLECTOR_TSD_INVALID_KEY) 466 { 467 TprintfT (0, "unwind_init: ERROR: TSD key create failed.\n"); 468 __collector_log_write ("<%s kind=\"%s\" id=\"%d\">TSD key not created</%s>\n", 469 SP_TAG_EVENT, SP_JCMD_CERROR, COL_ERROR_GENERAL, SP_TAG_EVENT); 470 return; 471 } 472 TprintfT (0, "unwind_init() completed normally\n"); 473 return; 474} 475 476void 477__collector_ext_unwind_close () 478{ 479 __collector_delete_handle (dhndl); 480 dhndl = NULL; 481} 482 483void* 484__collector_ext_return_address (unsigned level) 485{ 486 if (NULL == UIDTable) //unwind not initialized yet 487 return NULL; 488 unsigned size = (level + 4) * sizeof (long); // need to strip __collector_get_return_address and its caller 489 ucontext_t context; 490 FILL_CONTEXT ((&context)); 491 char* buf = (char*) alloca (size); 492 if (buf == NULL) 493 { 494 TprintfT (DBG_LT0, "__collector_get_return_address: ERROR: alloca(%d) fails\n", size); 495 return NULL; 496 } 497 int sz = stack_unwind (buf, size, NULL, NULL, &context, 0); 498 if (sz < (level + 3) * sizeof (long)) 499 { 500 TprintfT (DBG_LT0, "__collector_get_return_address: size=%d, but stack_unwind returns %d\n", size, sz); 501 return NULL; 502 } 503 long *lbuf = (long*) buf; 504 TprintfT (DBG_LT2, "__collector_get_return_address: return %lx\n", lbuf[level + 2]); 505 return (void *) (lbuf[level + 2]); 506} 507/* 508 * Collector interface method getFrameInfo 509 */ 510FrameInfo 511__collector_get_frame_info (hrtime_t ts, int mode, void *arg) 512{ 513 ucontext_t *context = NULL; 514 void *bptr = NULL; 515 CM_Array *array = NULL; 516 517 int unwind_mode = 0; 518 int do_walk = 1; 519 520 if (mode & FRINFO_NO_WALK) 521 do_walk = 0; 522 int bmode = mode & 0xffff; 523 int pseudo_context = 0; 524 if (bmode == FRINFO_FROM_STACK_ARG || bmode == FRINFO_FROM_STACK) 525 { 526 bptr = arg; 527 context = (ucontext_t*) alloca (sizeof (ucontext_t)); 528 FILL_CONTEXT (context); 529 unwind_mode |= bmode; 530 } 531 else if (bmode == FRINFO_FROM_UC) 532 { 533 context = (ucontext_t*) arg; 534 if (context == NULL) 535 return (FrameInfo) 0; 536 if (GET_SP (context) == 0) 537 pseudo_context = 1; 538 } 539 else if (bmode == FRINFO_FROM_ARRAY) 540 { 541 array = (CM_Array*) arg; 542 if (array == NULL || array->length <= 0) 543 return (FrameInfo) 0; 544 } 545 else 546 return (FrameInfo) 0; 547 548 int max_frame_size = OVERHEAD_BYTES + NATIVE_FRAME_BYTES (max_native_nframes); 549 if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context) 550 max_frame_size += JAVA_FRAME_BYTES (max_java_nframes); 551 552 Frame_packet *frpckt = alloca (sizeof (Frame_packet) + max_frame_size); 553 frpckt->type = FRAME_PCKT; 554 frpckt->hsize = sizeof (Frame_packet); 555 556 char *d = (char*) (frpckt + 1); 557 int size = max_frame_size; 558 559#define MIN(a,b) ((a)<(b)?(a):(b)) 560 /* get Java info */ 561 if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context) 562 { 563 /* use only 2/3 of the buffer and leave the rest for the native stack */ 564 int tmpsz = MIN (size, JAVA_FRAME_BYTES (max_java_nframes)); 565 if (tmpsz > 0) 566 { 567 int sz = __collector_ext_jstack_unwind (d, tmpsz, context); 568 d += sz; 569 size -= sz; 570 } 571 } 572 573 /* get native stack */ 574 if (context) 575 { 576 Stack_info *sinfo = (Stack_info*) d; 577 int sz = sizeof (Stack_info); 578 d += sz; 579 size -= sz; 580#if ARCH(Intel) 581 if (omp_no_walk == 0) 582 do_walk = 1; 583#endif 584 if (do_walk == 0) 585 unwind_mode |= FRINFO_NO_WALK; 586 587 int tmpsz = MIN (size, NATIVE_FRAME_BYTES (max_native_nframes)); 588 if (tmpsz > 0) 589 { 590 sz = stack_unwind (d, tmpsz, bptr, NULL, context, unwind_mode); 591 d += sz; 592 size -= sz; 593 } 594 sinfo->kind = STACK_INFO; 595 sinfo->hsize = (d - (char*) sinfo); 596 } 597 598 /* create a stack image from user data */ 599 if (array && array->length > 0) 600 { 601 Stack_info *sinfo = (Stack_info*) d; 602 int sz = sizeof (Stack_info); 603 d += sz; 604 size -= sz; 605 sz = array->length; 606 if (sz > size) 607 sz = size; // YXXX should we mark this with truncation frame? 608 __collector_memcpy (d, array->bytes, sz); 609 d += sz; 610 size -= sz; 611 sinfo->kind = STACK_INFO; 612 sinfo->hsize = (d - (char*) sinfo); 613 } 614 615 /* Compute the total size */ 616 frpckt->tsize = d - (char*) frpckt; 617 FrameInfo uid = compute_uid (frpckt); 618 return uid; 619} 620 621FrameInfo 622compute_uid (Frame_packet *frp) 623{ 624 uint64_t idxs[LAST_INFO]; 625 uint64_t uid = ROOT_UID; 626 uint64_t idx = ROOT_IDX; 627 628 Common_info *cinfo = (Common_info*) ((char*) frp + frp->hsize); 629 char *end = (char*) frp + frp->tsize; 630 for (;;) 631 { 632 if ((char*) cinfo >= end || cinfo->hsize == 0 || 633 (char*) cinfo + cinfo->hsize > end) 634 break; 635 636 /* Start with a different value to avoid matching with uid */ 637 uint64_t uidt = 1; 638 uint64_t idxt = 1; 639 long *ptr = (long*) ((char*) cinfo + cinfo->hsize); 640 long *bnd = (long*) ((char*) cinfo + sizeof (Common_info)); 641 TprintfT (DBG_LT2, "compute_uid: Cnt=%ld: ", (long) cinfo->hsize); 642 while (ptr > bnd) 643 { 644 long val = *(--ptr); 645 tprintf (DBG_LT2, "0x%8.8llx ", (unsigned long long) val); 646 uidt = (uidt + val) * ROOT_UID; 647 idxt = (idxt + val) * ROOT_IDX; 648 uid = (uid + val) * ROOT_UID; 649 idx = (idx + val) * ROOT_IDX; 650 } 651 if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO) 652 { 653 cinfo->uid = uidt; 654 idxs[cinfo->kind] = idxt; 655 } 656 cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize); 657 } 658 tprintf (DBG_LT2, "\n"); 659 660 /* Check if we have already recorded that uid. 661 * The following fragment contains benign data races. 662 * It's important, though, that all reads from UIDTable 663 * happen before writes. 664 */ 665 int found1 = 0; 666 int idx1 = (int) ((idx >> 44) % UIDTableSize); 667 if (UIDTable[idx1] == uid) 668 found1 = 1; 669 int found2 = 0; 670 int idx2 = (int) ((idx >> 24) % UIDTableSize); 671 if (UIDTable[idx2] == uid) 672 found2 = 1; 673 int found3 = 0; 674 int idx3 = (int) ((idx >> 4) % UIDTableSize); 675 if (UIDTable[idx3] == uid) 676 found3 = 1; 677 if (!found1) 678 UIDTable[idx1] = uid; 679 if (!found2) 680 UIDTable[idx2] = uid; 681 if (!found3) 682 UIDTable[idx3] = uid; 683 684 if (found1 || found2 || found3) 685 return (FrameInfo) uid; 686 frp->uid = uid; 687 688 /* Compress info's */ 689 cinfo = (Common_info*) ((char*) frp + frp->hsize); 690 for (;;) 691 { 692 if ((char*) cinfo >= end || cinfo->hsize == 0 || 693 (char*) cinfo + cinfo->hsize > end) 694 break; 695 if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO) 696 { 697 long *ptr = (long*) ((char*) cinfo + sizeof (Common_info)); 698 long *bnd = (long*) ((char*) cinfo + cinfo->hsize); 699 uint64_t uidt = cinfo->uid; 700 uint64_t idxt = idxs[cinfo->kind]; 701 int found = 0; 702 int first = 1; 703 while (ptr < bnd - 1) 704 { 705 int idx1 = (int) ((idxt >> 44) % UIDTableSize); 706 if (UIDTable[idx1] == uidt) 707 { 708 found = 1; 709 break; 710 } 711 else if (first) 712 { 713 first = 0; 714 UIDTable[idx1] = uidt; 715 } 716 long val = *ptr++; 717 uidt = uidt * ROOT_UID_INV - val; 718 idxt = idxt * ROOT_IDX_INV - val; 719 } 720 if (found) 721 { 722 char *d = (char*) ptr; 723 char *s = (char*) bnd; 724 if (!first) 725 { 726 int i; 727 for (i = 0; i<sizeof (uidt); i++) 728 { 729 *d++ = (char) uidt; 730 uidt = uidt >> 8; 731 } 732 } 733 int delta = s - d; 734 while (s < end) 735 *d++ = *s++; 736 cinfo->kind |= COMPRESSED_INFO; 737 cinfo->hsize -= delta; 738 frp->tsize -= delta; 739 end -= delta; 740 } 741 } 742 cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize); 743 } 744 __collector_write_packet (dhndl, (CM_Packet*) frp); 745 return (FrameInfo) uid; 746} 747 748FrameInfo 749__collector_getUID (CM_Array *arg, FrameInfo suid) 750{ 751 if (arg->length % sizeof (long) != 0 || 752 (long) arg->bytes % sizeof (long) != 0) 753 return (FrameInfo) - 1; 754 if (arg->length == 0) 755 return suid; 756 757 uint64_t uid = suid ? suid : 1; 758 uint64_t idx = suid ? suid : 1; 759 long *ptr = (long*) ((char*) arg->bytes + arg->length); 760 long *bnd = (long*) (arg->bytes); 761 while (ptr > bnd) 762 { 763 long val = *(--ptr); 764 uid = (uid + val) * ROOT_UID; 765 idx = (idx + val) * ROOT_IDX; 766 } 767 768 /* Check if we have already recorded that uid. 769 * The following fragment contains benign data races. 770 * It's important, though, that all reads from UIDTable 771 * happen before writes. 772 */ 773 int found1 = 0; 774 int idx1 = (int) ((idx >> 44) % UIDTableSize); 775 if (UIDTable[idx1] == uid) 776 found1 = 1; 777 int found2 = 0; 778 int idx2 = (int) ((idx >> 24) % UIDTableSize); 779 if (UIDTable[idx2] == uid) 780 found2 = 1; 781 int found3 = 0; 782 int idx3 = (int) ((idx >> 4) % UIDTableSize); 783 if (UIDTable[idx3] == uid) 784 found3 = 1; 785 786 if (!found1) 787 UIDTable[idx1] = uid; 788 if (!found2) 789 UIDTable[idx2] = uid; 790 if (!found3) 791 UIDTable[idx3] = uid; 792 if (found1 || found2 || found3) 793 return (FrameInfo) uid; 794 795 int sz = sizeof (Uid_packet) + arg->length; 796 if (suid) 797 sz += sizeof (suid); 798 Uid_packet *uidp = alloca (sz); 799 uidp->tsize = sz; 800 uidp->type = UID_PCKT; 801 uidp->flags = 0; 802 uidp->uid = uid; 803 804 /* Compress */ 805 ptr = (long*) (arg->bytes); 806 bnd = (long*) ((char*) arg->bytes + arg->length); 807 long *dst = (long*) (uidp + 1); 808 uint64_t uidt = uid; 809 uint64_t idxt = idx; 810 uint64_t luid = suid; /* link uid */ 811 812 while (ptr < bnd) 813 { 814 815 long val = *ptr++; 816 *dst++ = val; 817 818 if ((bnd - ptr) > sizeof (uidt)) 819 { 820 uidt = uidt * ROOT_UID_INV - val; 821 idxt = idxt * ROOT_IDX_INV - val; 822 int idx1 = (int) ((idxt >> 44) % UIDTableSize); 823 if (UIDTable[idx1] == uidt) 824 { 825 luid = uidt; 826 break; 827 } 828 } 829 } 830 if (luid) 831 { 832 char *d = (char*) dst; 833 for (int i = 0; i<sizeof (luid); i++) 834 { 835 *d++ = (char) luid; 836 luid = luid >> 8; 837 } 838 uidp->flags |= COMPRESSED_INFO; 839 uidp->tsize = d - (char*) uidp; 840 } 841 __collector_write_packet (dhndl, (CM_Packet*) uidp); 842 843 return (FrameInfo) uid; 844} 845 846int 847__collector_getStackTrace (void *buf, int size, void *bptr, void *eptr, void *arg) 848{ 849 if (arg == (void*) __collector_omp_stack_trace) 850 seen_omp = 1; 851 int do_walk = 1; 852 if (arg == NULL || arg == (void*) __collector_omp_stack_trace) 853 { 854 do_walk = (arg == (void*) __collector_omp_stack_trace && omp_no_walk) ? 0 : 1; 855 ucontext_t *context = (ucontext_t*) alloca (sizeof (ucontext_t)); 856 FILL_CONTEXT (context); 857 arg = context; 858 } 859 int unwind_mode = 0; 860 if (do_walk == 0) 861 unwind_mode |= FRINFO_NO_WALK; 862 return stack_unwind (buf, size, bptr, eptr, arg, unwind_mode); 863} 864 865#if ARCH(SPARC) 866/* 867 * These are important data structures taken from the header files reg.h and 868 * ucontext.h. They are used for the stack trace algorithm explained below. 869 * 870 * typedef struct ucontext { 871 * u_long uc_flags; 872 * struct ucontext *uc_link; 873 * usigset_t uc_sigmask; 874 * stack_t uc_stack; 875 * mcontext_t uc_mcontext; 876 * long uc_filler[23]; 877 * } ucontext_t; 878 * 879 * #define SPARC_MAXREGWINDOW 31 880 * 881 * struct rwindow { 882 * greg_t rw_local[8]; 883 * greg_t rw_in[8]; 884 * }; 885 * 886 * #define rw_fp rw_in[6] 887 * #define rw_rtn rw_in[7] 888 * 889 * struct gwindows { 890 * int wbcnt; 891 * int *spbuf[SPARC_MAXREGWINDOW]; 892 * struct rwindow wbuf[SPARC_MAXREGWINDOW]; 893 * }; 894 * 895 * typedef struct gwindows gwindows_t; 896 * 897 * typedef struct { 898 * gregset_t gregs; 899 * gwindows_t *gwins; 900 * fpregset_t fpregs; 901 * long filler[21]; 902 * } mcontext_t; 903 * 904 * The stack would look like this when SIGPROF occurrs. 905 * 906 * ------------------------- <- high memory 907 * | | 908 * | | 909 * ------------------------- 910 * | | 911 * ------------------------- <- fp' <-| 912 * | | | 913 * : : | 914 * | | | 915 * ------------------------- | 916 * | fp |----------| 917 * | | 918 * ------------------------- <- sp' 919 * | | | | 920 * | gwins | <- saved stack pointers & | | 921 * | | register windows | |- mcontext 922 * ------------------------- | | 923 * | gregs | <- saved registers | | 924 * ------------------------- | 925 * | | |- ucontext 926 * ------------------------- <- ucp (ucontext pointer) | 927 * | | | 928 * | | |- siginfo 929 * ------------------------- <- sip (siginfo pointer) | 930 * | | 931 * ------------------------- <- sp 932 * 933 * Then the signal handler is called with: 934 * handler( signo, sip, uip ); 935 * When gwins is null, all the stack frames are saved in the user stack. 936 * In that case we can find sp' from gregs and walk the stack for a backtrace. 937 * However, if gwins is not null we will have a more complicated case. 938 * Wbcnt(in gwins) tells you how many saved register windows are valid. 939 * This is important because the kernel does not allocate the entire array. 940 * And the top most frame is saved in the lowest index element. The next 941 * paragraph explains the possible causes. 942 * 943 * There are two routines in the kernel to flush out user register windows. 944 * flush_user_windows and flush_user_windows_to_stack 945 * The first routine will not cause a page fault. Therefore if the user 946 * stack is not in memory, the register windows will be saved to the pcb. 947 * This can happen when the kernel is trying to deliver a signal and 948 * the user stack got swap out. The kernel will then build a new context for 949 * the signal handler and the saved register windows will 950 * be copied to the ucontext as show above. On the other hand, 951 * flush_user_windows_to_stack can cause a page fault, and if it failed 952 * then there is something wrong (stack overflow, misalign). 953 * The first saved register window does not necessary correspond to the 954 * first stack frame. So the current stack pointer must be compare with 955 * the stack pointers in spbuf to find a match. 956 * 957 * We will also follow the uc_link field in ucontext to trace also nested 958 * signal stack frames. 959 * 960 */ 961 962/* Dealing with trap handlers. 963 * When a user defined trap handler is invoked the return address 964 * (or actually the address of an instruction that raised the trap) 965 * is passed to the trap handler in %l6, whereas saved %o7 contains 966 * garbage. First, we need to find out if a particular pc belongs 967 * to the trap handler, and if so, take the %l6 value from the stack rather 968 * than %o7 from either the stack or the register. 969 * There are three possible situations represented 970 * by the following stacks: 971 * 972 * MARKER MARKER MARKER 973 * trap handler pc __func pc before 'save' __func pc after 'save' 974 * %l6 %o7 from reg %o7 (garbage) 975 * ... %l6 trap handler pc 976 * ... %l6 977 * ... 978 * where __func is a function called from the trap handler. 979 * 980 * Currently this is implemented to only deal with __misalign_trap_handler 981 * set for v9 FORTRAN applications. Implementation of IN_TRAP_HANDLER 982 * macro shows it. A general solution is postponed. 983 */ 984 985/* Special handling of unwind through the parallel loop barrier code: 986 * 987 * The library defines two symbols, __mt_EndOfTask_Barrier_ and 988 * __mt_EndOfTask_Barrier_Dummy_ representing the first word of 989 * the barrier sychronization code, and the first word following 990 * it. Whenever the leaf PC is between these two symbols, 991 * the unwind code is special-cased as follows: 992 * The __mt_EndOfTask_Barrier_ function is guaranteed to be a leaf 993 * function, so its return address is in a register, not saved on 994 * the stack. 995 * 996 * MARKER 997 * __mt_EndOfTask_Barrier_ PC -- the leaf PC 998 * loop body function address for the task -- implied caller of __mt_EndOfTask_Barrier_ 999 * this address is taken from the %O0 register 1000 * {mt_master or mt_slave} -- real caller of __mt_EndOfTask_Barrier_ 1001 * ... 1002 * 1003 * With this trick, the analyzer will show the time in the barrier 1004 * attributed to the loop at the end of which the barrier synchronization 1005 * is taking place. That loop body routine, will be shown as called 1006 * from the function from which it was extracted, which will be shown 1007 * as called from the real caller, either the slave or master library routine. 1008 */ 1009 1010/* 1011 * These no-fault-load (0x82) assembly functions are courtesy of Rob Gardner. 1012 * 1013 * Note that 0x82 is ASI_PNF. See 1014 * http://lxr.free-electrons.com/source/arch/sparc/include/uapi/asm/asi.h#L134 1015 * ASI address space identifier; PNF primary no fault 1016 */ 1017 1018/* load an int from an address */ 1019 1020/* if the address is illegal, return a 0 */ 1021static int 1022SPARC_no_fault_load_int (void *addr) 1023{ 1024 int val; 1025 __asm__ __volatile__( 1026 "lda [%1] 0x82, %0\n\t" 1027 : "=r" (val) 1028 : "r" (addr) 1029 ); 1030 1031 return val; 1032} 1033 1034/* check if an address is invalid 1035 * 1036 * A no-fault load of an illegal address still faults, but it does so silently to the calling process. 1037 * It returns a 0, but so could a load of a legal address. 1038 * So, we time the load. A "fast" load must be a successful load. 1039 * A "slow" load is probably a fault. 1040 * Since it could also be a cache/TLB miss or other abnormality, 1041 * it's safest to retry a slow load. 1042 * The cost of trying a valid address should be some nanosecs. 1043 * The cost of trying an invalid address up to 10 times could be some microsecs. 1044 */ 1045#if 0 1046static 1047int invalid_SPARC_addr(void *addr) 1048{ 1049 long t1, t2; 1050 int i; 1051 1052 for (i=0; i<10; i++) { 1053 __asm__ __volatile__( 1054 "rd %%tick, %0\n\t" 1055 "lduba [%2] 0x82, %%g0\n\t" 1056 "rd %%tick, %1\n\t" 1057 : "=r" (t1), "=r" (t2) 1058 : "r" (addr) ); 1059 if ( (t2 - t1) < 100 ) 1060 return 0; 1061 } 1062 return 1; 1063} 1064#endif 1065 1066/* 1067 * The standard SPARC procedure-calling convention is that the 1068 * calling PC (for determining the return address when the procedure 1069 * is finished) is placed in register %o7. A called procedure 1070 * typically executes a "save" instruction that shifts the register 1071 * window, and %o7 becomes %i7. 1072 * 1073 * Optimized leaf procedures do not shift the register window. 1074 * They assume the return address will remain %o7. So when 1075 * we process a leaf PC, we walk instructions to see if there 1076 * is a call, restore, or other instruction that would indicate 1077 * we can IGNORE %o7 because this is NOT a leaf procedure. 1078 * 1079 * If a limited instruction walk uncovers no such hint, we save 1080 * not only the PC but the %o7 value as well... just to be safe. 1081 * Later, in DBE post-processing of the call stacks, we decide 1082 * whether any recorded %o7 value should be used as a caller 1083 * frame or should be discarded. 1084 */ 1085 1086#define IS_ILLTRAP(x) (((x) & 0xc1c00000) == 0) 1087#define IS_SAVE(x) (((x) & 0xc1f80000) == 0x81e00000) 1088#define IS_MOVO7R(x) (((x) & 0xc1f8201f) == 0x8160000f) 1089#define IS_MOVRO7(x) (((x) & 0xfff82000) == 0x9f600000) 1090#define IS_ORRG0O7(x) (((x) & 0xff78201f) == 0x9e100000) 1091#define IS_ORG0RO7(x) (((x) & 0xff7fe000) == 0x9e100000) 1092#define IS_ORG0O7R(x) (((x) & 0xc17fe01f) == 0x8010000f) 1093#define IS_ORO7G0R(x) (((x) & 0xc17fe01f) == 0x8013c000) 1094#define IS_RESTORE(x) (((x) & 0xc1f80000) == 0x81e80000) 1095#define IS_RET(x) ((x) == 0x81c7e008) 1096#define IS_RETL(x) ((x) == 0x81c3e008) 1097#define IS_RETURN(x) (((x) & 0xc1f80000) == 0x81c80000) 1098#define IS_BRANCH(x) ((((x) & 0xc0000000) == 0) && (((x) & 0x01c00000) != 0x01000000)) 1099#define IS_CALL(x) (((x) & 0xc0000000) == 0x40000000) 1100#define IS_LDO7(x) (((x) & 0xfff80000) == 0xde000000) 1101 1102static long pagesize = 0; 1103 1104static int 1105process_leaf (long *lbuf, int ind, int lsize, void *context) 1106{ 1107 greg_t pc = GET_PC (context); 1108 greg_t o7 = GET_GREG (context, REG_O7); 1109 1110 /* omazur: TBR START -- not used */ 1111 if (IN_BARRIER (pc)) 1112 { 1113 if (ind < lsize) 1114 lbuf[ind++] = pc; 1115 if (ind < lsize) 1116 lbuf[ind++] = GET_GREG (context, REG_O0); 1117 return ind; 1118 } 1119 /* omazur: TBR END */ 1120#if WSIZE(64) 1121 if (IN_TRAP_HANDLER (pc)) 1122 { 1123 if (ind < lsize) 1124 lbuf[ind++] = pc; 1125 return ind; 1126 } 1127#endif 1128 unsigned *instrp = (unsigned *) pc; 1129 unsigned *end_addr = instrp + 20; 1130 while (instrp < end_addr) 1131 { 1132 unsigned instr = *instrp++; 1133 if (IS_ILLTRAP (instr)) 1134 break; 1135 else if (IS_SAVE (instr)) 1136 { 1137 if (ind < lsize) 1138 lbuf[ind++] = pc; 1139 if (o7 && ind < lsize) 1140 lbuf[ind++] = o7; 1141 return ind; 1142 } 1143 else if (IS_MOVO7R (instr) || IS_ORG0O7R (instr) || IS_ORO7G0R (instr)) 1144 break; 1145 else if (IS_MOVRO7 (instr) || IS_ORG0RO7 (instr)) 1146 { 1147 int rs2 = (instr & 0x1f) + REG_G1 - 1; 1148 o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0; 1149 break; 1150 } 1151 else if (IS_ORRG0O7 (instr)) 1152 { 1153 int rs2 = ((instr & 0x7c000) >> 14) + REG_G1 - 1; 1154 o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0; 1155 break; 1156 } 1157 else if (IS_RESTORE (instr)) 1158 { 1159 o7 = 0; 1160 break; 1161 } 1162 else if (IS_RETURN (instr)) 1163 { 1164 o7 = 0; 1165 break; 1166 } 1167 else if (IS_RET (instr)) 1168 { 1169 o7 = 0; 1170 break; 1171 } 1172 else if (IS_RETL (instr)) 1173 { 1174 /* process delay slot */ 1175 instr = *instrp++; 1176 if (IS_RESTORE (instr)) 1177 o7 = 0; 1178 break; 1179 } 1180 else if (IS_BRANCH (instr)) 1181 { 1182 unsigned *backbegin = ((unsigned *) pc - 1); 1183 unsigned *backend = backbegin - 12 + (instrp - (unsigned *) pc); 1184 while (backbegin > backend) 1185 { 1186 // 21920143 stack unwind: SPARC process_leaf backtracks too far 1187 /* 1188 * We've already dereferenced backbegin+1. 1189 * So if backbegin is on the same page, we're fine. 1190 * If we've gone to a different page, possibly things are not fine. 1191 * We don't really know how to test that. 1192 * Let's just assume the worst: that dereferencing backbegin would segv. 1193 * We won't know if we're in a leaf function or not. 1194 */ 1195 if (pagesize == 0) 1196 pagesize = CALL_UTIL (sysconf)(_SC_PAGESIZE); 1197 if ((((long) (backbegin + 1)) & (pagesize - 1)) < sizeof (unsigned*)) 1198 break; 1199 unsigned backinstr = *backbegin--; 1200 if (IS_LDO7 (backinstr)) 1201 { 1202 o7 = 0; 1203 break; 1204 } 1205 else if (IS_ILLTRAP (backinstr)) 1206 break; 1207 else if (IS_RETURN (backinstr)) 1208 break; 1209 else if (IS_RET (backinstr)) 1210 break; 1211 else if (IS_RETL (backinstr)) 1212 break; 1213 else if (IS_CALL (backinstr)) 1214 break; 1215 else if (IS_SAVE (backinstr)) 1216 { 1217 o7 = 0; 1218 break; 1219 } 1220 } 1221 break; 1222 } 1223 else if (IS_CALL (instr)) 1224 o7 = 0; 1225 } 1226 1227#if WSIZE(64) 1228 if (o7 != 0 && ((long) o7) < 32 && ((long) o7) > -32) 1229 { 1230 /* 20924821 SEGV in unwind code on SPARC/Linux 1231 * We've seen this condition in some SPARC-Linux runs. 1232 * o7 is non-zero but not a valid address. 1233 * Values like 4 or -7 have been seen. 1234 * Let's check if o7 is unreasonably small. 1235 * If so, set to 0 so that it won't be recorded. 1236 * Otherwise, there is risk of it being dereferenced in process_sigreturn(). 1237 */ 1238 // __collector_log_write("<event kind=\"%s\" id=\"%d\">time %lld, internal debug unwind at leaf; o7 = %ld, pc = %x</event>\n", 1239 // SP_JCMD_COMMENT, COL_COMMENT_NONE, __collector_gethrtime() - __collector_start_time, (long) o7, pc ); 1240 o7 = 0; 1241 } 1242#endif 1243 1244 if (o7) 1245 { 1246 if (ind < lsize) 1247 lbuf[ind++] = SP_LEAF_CHECK_MARKER; 1248 if (ind < lsize) 1249 lbuf[ind++] = pc; 1250 if (ind < lsize) 1251 lbuf[ind++] = o7; 1252 } 1253 else if (ind < lsize) 1254 lbuf[ind++] = pc; 1255 return ind; 1256} 1257 1258#if WSIZE(64) 1259// detect signal handler 1260static int 1261process_sigreturn (long *lbuf, int ind, int lsize, unsigned char * tpc, 1262 struct frame **pfp, void * bptr, int extra_frame) 1263{ 1264 // cheap checks whether tpc is obviously not an instruction address 1265 if ((4096 > (unsigned long) tpc) // the first page is off limits 1266 || (3 & (unsigned long) tpc)) 1267 return ind; // the address is not aligned 1268 1269 // get the instruction at tpc, skipping over as many as 7 nop's (0x01000000) 1270 int insn, i; 1271 for (i = 0; i < 7; i++) 1272 { 1273 insn = SPARC_no_fault_load_int ((void *) tpc); 1274 if (insn != 0x01000000) 1275 break; 1276 tpc += 4; 1277 } 1278 1279 // we're not expecting 0 (and it could mean an illegal address) 1280 if (insn == 0) 1281 return ind; 1282 1283 // We are looking for __rt_sigreturn_stub with the instruction 1284 // 0x82102065 : mov 0x65 /* __NR_rt_sigreturn */, %g1 1285 if (insn == 0x82102065) 1286 { 1287 /* 1288 * according to linux kernel source code, 1289 * syscall(_NR_rt_sigreturn) uses the following data in stack: 1290 * struct rt_signal_frame { 1291 * struct sparc_stackf ss; 1292 * siginfo_t info; 1293 * struct pt_regs regs; 1294 * ....}; 1295 * sizeof(struct sparc_stackf) is 192; 1296 * sizeof(siginfo_t) is 128; 1297 * we need to get the register values from regs, which is defined as: 1298 * struct pt_regs { 1299 * unsigned long u_regs[16]; 1300 * unsigned long tstate; 1301 * unsigned long tpc; 1302 * unsigned long tnpc; 1303 * ....}; 1304 * pc and fp register has offset of 120 and 112; 1305 * the pc of kill() is stored in tnpc, whose offest is 136. 1306 */ 1307 greg_t pc = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 136)); 1308 greg_t pc1 = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 120)); 1309 (*pfp) = *((struct frame**) ((char*) ((*pfp)) + 192 + 128 + 112)); 1310 if (pc && pc1) 1311 { 1312 if (bptr != NULL && extra_frame && ((char*) (*pfp) + STACK_BIAS) < (char*) bptr && ind < 2) 1313 { 1314 lbuf[0] = pc1; 1315 if (ind == 0) 1316 ind++; 1317 } 1318 if (bptr == NULL || ((char*) (*pfp) + STACK_BIAS) >= (char*) bptr) 1319 { 1320 if (ind < lsize) 1321 lbuf[ind++] = (unsigned long) tpc; 1322 if (ind < lsize) 1323 lbuf[ind++] = pc; 1324 if (ind < lsize) 1325 lbuf[ind++] = pc1; 1326 } 1327 } 1328 DprintfT (SP_DUMP_UNWIND, "unwind.c: resolved sigreturn pc=0x%lx, pc1=0x%lx, fp=0x%lx\n", pc, pc1, *(pfp)); 1329 } 1330 return ind; 1331} 1332#endif 1333 1334/* 1335 * int stack_unwind( char *buf, int size, ucontext_t *context ) 1336 * This routine looks into the mcontext and 1337 * trace stack frames to record return addresses. 1338 */ 1339int 1340stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode) 1341{ 1342 /* 1343 * trace the stack frames from user stack. 1344 * We are assuming that the frame pointer and return address 1345 * are null when we are at the top level. 1346 */ 1347 long *lbuf = (long*) buf; 1348 int lsize = size / sizeof (long); 1349 struct frame *fp = (struct frame *) GET_SP (context); /* frame pointer */ 1350 greg_t pc; /* program counter */ 1351 int extra_frame = 0; 1352 if ((mode & 0xffff) == FRINFO_FROM_STACK) 1353 extra_frame = 1; 1354 1355 int ind = 0; 1356 if (bptr == NULL) 1357 ind = process_leaf (lbuf, ind, lsize, context); 1358 1359 int extra_frame = 0; 1360 if ((mode & 0xffff) == FRINFO_FROM_STACK) 1361 extra_frame = 1; 1362 int ind = 0; 1363 if (bptr == NULL) 1364 ind = process_leaf (lbuf, ind, lsize, context); 1365 1366 while (fp) 1367 { 1368 if (ind >= lsize) 1369 break; 1370 fp = (struct frame *) ((char *) fp + STACK_BIAS); 1371 if (eptr && fp >= (struct frame *) eptr) 1372 { 1373 ind = ind >= 2 ? ind - 2 : 0; 1374 break; 1375 } 1376#if WSIZE(64) // detect signal handler 1377 unsigned char * tpc = ((unsigned char*) (fp->fr_savpc)); 1378 struct frame * tfp = (struct frame*) ((char*) (fp->fr_savfp) + STACK_BIAS); 1379 int old_ind = ind; 1380 ind = process_sigreturn (lbuf, old_ind, lsize, tpc, &tfp, bptr, extra_frame); 1381 if (ind != old_ind) 1382 { 1383 pc = (greg_t) tpc; 1384 fp = tfp; 1385 } 1386 else 1387#endif 1388 { 1389#if WSIZE(64) 1390 if (IN_TRAP_HANDLER (lbuf[ind - 1])) 1391 pc = fp->fr_local[6]; 1392 else 1393 pc = fp->fr_savpc; 1394#else 1395 pc = fp->fr_savpc; 1396#endif 1397 fp = fp->fr_savfp; 1398 if (pc) 1399 { 1400 if (bptr != NULL && extra_frame && ((char*) fp + STACK_BIAS) < (char*) bptr && ind < 2) 1401 { 1402 lbuf[0] = pc; 1403 if (ind == 0) 1404 ind++; 1405 } 1406 if (bptr == NULL || ((char*) fp + STACK_BIAS) >= (char*) bptr) 1407 lbuf[ind++] = pc; 1408 } 1409 } 1410 1411 /* 4616238: _door_return may have a frame that has non-zero 1412 * saved stack pointer and zero pc 1413 */ 1414 if (pc == (greg_t) NULL) 1415 break; 1416 } 1417 1418 if (ind >= lsize) 1419 { /* truncated stack handling */ 1420 ind = lsize - 1; 1421 lbuf[ind++] = SP_TRUNC_STACK_MARKER; 1422 } 1423 return ind * sizeof (long); 1424} 1425 1426#elif ARCH(Intel) 1427 1428/* get __NR_<syscall_name> constants */ 1429#include <syscall.h> 1430 1431/* 1432 * From uts/intel/ia32/os/sendsig.c: 1433 * 1434 * An amd64 signal frame looks like this on the stack: 1435 * 1436 * old %rsp: 1437 * <128 bytes of untouched stack space> 1438 * <a siginfo_t [optional]> 1439 * <a ucontext_t> 1440 * <siginfo_t *> 1441 * <signal number> 1442 * new %rsp: <return address (deliberately invalid)> 1443 * 1444 * The signal number and siginfo_t pointer are only pushed onto the stack in 1445 * order to allow stack backtraces. The actual signal handling code expects the 1446 * arguments in registers. 1447 * 1448 * An i386 SVR4/ABI signal frame looks like this on the stack: 1449 * 1450 * old %esp: 1451 * <a siginfo32_t [optional]> 1452 * <a ucontext32_t> 1453 * <pointer to that ucontext32_t> 1454 * <pointer to that siginfo32_t> 1455 * <signo> 1456 * new %esp: <return address (deliberately invalid)> 1457 */ 1458 1459#if WSIZE(32) 1460#define OPC_REG(x) ((x)&0x7) 1461#define MRM_REGD(x) (((x)>>3)&0x7) 1462#define MRM_REGS(x) ((x)&0x7) 1463#define RED_ZONE 0 1464#elif WSIZE(64) 1465#define OPC_REG(x) (B|((x)&0x7)) 1466#define MRM_REGD(x) (R|(((x)>>3)&0x7)) 1467#define MRM_REGS(x) (B|((x)&0x7)) 1468#define RED_ZONE 16 1469#endif 1470#define MRM_EXT(x) (((x)>>3)&0x7) 1471#define MRM_MOD(x) ((x)&0xc0) 1472 1473#define RAX 0 1474#define RDX 2 1475#define RSP 4 1476#define RBP 5 1477 1478struct AdvWalkContext 1479{ 1480 unsigned char *pc; 1481 unsigned long *sp; 1482 unsigned long *sp_safe; 1483 unsigned long *fp; 1484 unsigned long *fp_sav; 1485 unsigned long *fp_loc; 1486 unsigned long rax; 1487 unsigned long rdx; 1488 unsigned long ra_sav; 1489 unsigned long *ra_loc; 1490 unsigned long regs[16]; 1491 int tidx; /* targets table index */ 1492 uint32_t cval; /* cache value */ 1493}; 1494 1495static unsigned long 1496getRegVal (struct AdvWalkContext *cur, int r, int *undefRez) 1497{ 1498 if (cur->regs[r] == 0) 1499 { 1500 if (r == RBP) 1501 { 1502 tprintf (DBG_LT3, "getRegVal: returns cur->regs[RBP]=0x%lx cur->pc=0x%lx\n", 1503 (unsigned long) cur->fp, (unsigned long) cur->pc); 1504 return (unsigned long) cur->fp; 1505 } 1506 *undefRez = 1; 1507 } 1508 tprintf (DBG_LT3, "getRegVal: cur->regs[%d]=0x%lx cur->pc=0x%lx\n", 1509 r, (unsigned long) cur->regs[r], (unsigned long) cur->pc); 1510 return cur->regs[r]; 1511} 1512 1513static unsigned char * 1514check_modrm (unsigned char *pc) 1515{ 1516 unsigned char modrm = *pc++; 1517 unsigned char mod = MRM_MOD (modrm); 1518 if (mod == 0xc0) 1519 return pc; 1520 unsigned char regs = modrm & 0x07; 1521 if (regs == RSP) 1522 { 1523 if (mod == 0x40) 1524 return pc + 2; // SIB + disp8 1525 if (mod == 0x80) 1526 return pc + 5; // SIB + disp32 1527 return pc + 1; // SIB 1528 } 1529 if (mod == 0x0) 1530 { 1531 if (regs == RBP) 1532 pc += 4; // disp32 1533 } 1534 else if (mod == 0x40) 1535 pc += 1; /* byte */ 1536 else if (mod == 0x80) 1537 pc += 4; /* word */ 1538 return pc; 1539} 1540 1541static int 1542read_int (unsigned char *pc, int w) 1543{ 1544 if (w == 1) 1545 return *((char *) pc); 1546 if (w == 2) 1547 return *(short*) pc; 1548 return *(int*) pc; 1549} 1550 1551/* Return codes */ 1552enum 1553{ 1554 RA_FAILURE = 0, 1555 RA_SUCCESS, 1556 RA_END_OF_STACK, 1557 RA_SIGRETURN, 1558 RA_RT_SIGRETURN 1559}; 1560 1561/* Cache value encodings */ 1562static const uint32_t RA_FROMFP = (uint32_t) - 1; /* get the RA from the frame pointer */ 1563static const uint32_t RA_EOSTCK = (uint32_t) - 2; /* end-of-stack */ 1564 1565 1566#define MAXCTX 16 1567#define MAXTRGTS 64 1568#define MAXJMPREG 2 1569#define MAXJMPREGCTX 3 1570 1571#define DELETE_CURCTX() __collector_memcpy (cur, buf + (--nctx), sizeof (*cur)) 1572 1573/** 1574 * Look for pc in AddrTable_RA_FROMFP and in AddrTable_RA_EOSTCK 1575 * @param wctx 1576 * @return 1577 */ 1578static int 1579cache_get (struct WalkContext *wctx) 1580{ 1581 unsigned long addr; 1582 if (AddrTable_RA_FROMFP != NULL) 1583 { 1584 uint64_t idx = wctx->pc % ValTableSize; 1585 addr = AddrTable_RA_FROMFP[ idx ]; 1586 if (addr == wctx->pc) 1587 { // Found in AddrTable_RA_FROMFP 1588 unsigned long *sp = NULL; 1589 unsigned long fp = wctx->fp; 1590 /* validate fp before use */ 1591 if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp)) 1592 return RA_FAILURE; 1593 sp = (unsigned long *) fp; 1594 fp = *sp++; 1595 unsigned long ra = *sp++; 1596 unsigned long tbgn = wctx->tbgn; 1597 unsigned long tend = wctx->tend; 1598 if (ra < tbgn || ra >= tend) 1599 if (!__collector_check_segment (ra, &tbgn, &tend, 0)) 1600 return RA_FAILURE; 1601 unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend); 1602 if (npc == 0) 1603 return RA_FAILURE; 1604 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached pc=0x%lX\n", __LINE__, npc); 1605 wctx->pc = npc; 1606 wctx->sp = (unsigned long) sp; 1607 wctx->fp = fp; 1608 wctx->tbgn = tbgn; 1609 wctx->tend = tend; 1610 return RA_SUCCESS; 1611 } 1612 } 1613 if (NULL == AddrTable_RA_EOSTCK) 1614 return RA_FAILURE; 1615 uint64_t idx = wctx->pc % ValTableSize; 1616 addr = AddrTable_RA_EOSTCK[ idx ]; 1617 if (addr != wctx->pc) 1618 return RA_FAILURE; 1619 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached RA_END_OF_STACK\n", __LINE__); 1620 return RA_END_OF_STACK; 1621} 1622/** 1623 * Save pc in RA_FROMFP or RA_EOSTCK cache depending on val 1624 * @param wctx 1625 */ 1626static void 1627cache_put (struct WalkContext *wctx, const uint32_t val) 1628{ 1629 if (RA_FROMFP == val) 1630 { 1631 // save pc in RA_FROMFP cache 1632 if (NULL != AddrTable_RA_FROMFP) 1633 { 1634 uint64_t idx = wctx->pc % ValTableSize; 1635 AddrTable_RA_FROMFP[ idx ] = wctx->pc; 1636 if (NULL != AddrTable_RA_EOSTCK) 1637 if (AddrTable_RA_EOSTCK[ idx ] == wctx->pc) 1638 // invalidate pc in RA_EOSTCK cache 1639 AddrTable_RA_EOSTCK[ idx ] = 0; 1640 } 1641 return; 1642 } 1643 if (RA_EOSTCK == val) 1644 { 1645 // save pc in RA_EOSTCK cache 1646 if (NULL != AddrTable_RA_EOSTCK) 1647 { 1648 uint64_t idx = wctx->pc % ValTableSize; 1649 AddrTable_RA_EOSTCK[ idx ] = wctx->pc; 1650 if (NULL != AddrTable_RA_FROMFP) 1651 { 1652 if (AddrTable_RA_FROMFP[ idx ] == wctx->pc) 1653 // invalidate pc in RA_FROMFP cache 1654 AddrTable_RA_FROMFP[ idx ] = 0; 1655 } 1656 } 1657 return; 1658 } 1659} 1660 1661static int 1662process_return_real (struct WalkContext *wctx, struct AdvWalkContext *cur, int cache_on) 1663{ 1664 if ((unsigned long) cur->sp >= wctx->sbase || 1665 (unsigned long) cur->sp < wctx->sp) 1666 { 1667 DprintfT (SP_DUMP_UNWIND, "unwind.c: not in stack: %p [0x%lX-0x%lX]\n", 1668 cur->sp, wctx->sp, wctx->sbase); 1669 return RA_FAILURE; 1670 } 1671 1672 unsigned long ra; 1673 if (cur->sp == cur->ra_loc) 1674 { 1675 ra = cur->ra_sav; 1676 cur->sp++; 1677 } 1678 else if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase) 1679 ra = *cur->sp++; 1680 else 1681 { 1682 DprintfT (SP_DUMP_UNWIND, "unwind.c: not safe: %p >= %p\n", cur->sp, cur->sp_safe); 1683 return RA_FAILURE; 1684 } 1685 if (ra == 0) 1686 { 1687 if (cache_on) 1688 cache_put (wctx, RA_EOSTCK); 1689 wctx->pc = ra; 1690 wctx->sp = (unsigned long) cur->sp; 1691 wctx->fp = (unsigned long) cur->fp; 1692 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d RA_END_OF_STACK\n", __LINE__); 1693 return RA_END_OF_STACK; 1694 } 1695 1696 unsigned long tbgn = wctx->tbgn; 1697 unsigned long tend = wctx->tend; 1698 if (ra < tbgn || ra >= tend) 1699 { 1700 if (!__collector_check_segment (ra, &tbgn, &tend, 0)) 1701 { 1702 DprintfT (SP_DUMP_UNWIND, "unwind.c: not in segment: 0x%lX [0x%lX-0x%lX]\n", 1703 ra, wctx->tbgn, wctx->tend); 1704 return RA_FAILURE; 1705 } 1706 } 1707 1708 if (cur->cval == RA_FROMFP) 1709 { 1710 if (wctx->fp == (unsigned long) (cur->sp - 2)) 1711 { 1712 if (cache_on) 1713 cache_put (wctx, RA_FROMFP); 1714 } 1715 else 1716 cur->cval = 0; 1717 } 1718 1719 unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend); 1720 if (npc == 0) 1721 { 1722 if (cur->cval == RA_FROMFP) 1723 { 1724 /* We have another evidence that we can trust this RA */ 1725 DprintfT (SP_DUMP_UNWIND, "unwind.c: trusted fp, pc = 0x%lX\n", wctx->pc); 1726 wctx->pc = ra; 1727 } 1728 else 1729 { 1730 DprintfT (SP_DUMP_UNWIND, "unwind.c: 0 after adjustment\n"); 1731 return RA_FAILURE; 1732 } 1733 } 1734 else 1735 wctx->pc = npc; 1736 wctx->sp = (unsigned long) cur->sp; 1737 wctx->fp = (unsigned long) cur->fp; 1738 wctx->tbgn = tbgn; 1739 wctx->tend = tend; 1740 return RA_SUCCESS; 1741} 1742 1743static int 1744process_return (struct WalkContext *wctx, struct AdvWalkContext *cur) 1745{ 1746 return process_return_real (wctx, cur, 1); 1747} 1748 1749static void 1750omp_cache_put (unsigned long *cur_sp_safe, struct WalkContext * wctx_pc_save, 1751 struct WalkContext *wctx, uint32_t val) 1752{ 1753 if (omp_no_walk && (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)) 1754 { 1755 size_t sz = OmpValTableSize * sizeof (*OmpCurCtxs); 1756 OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1); 1757 sz = OmpValTableSize * sizeof (*OmpCtxs); 1758 OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1); 1759 sz = OmpValTableSize * sizeof (*OmpVals); 1760 OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1); 1761 sz = OmpValTableSize * sizeof (*OmpRAs); 1762 OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1); 1763 } 1764 if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL) 1765 return; 1766 1767#define USE_18434988_OMP_CACHE_WORKAROUND 1768#ifndef USE_18434988_OMP_CACHE_WORKAROUND 1769 uint64_t idx = wctx_pc_save->pc * ROOT_IDX; 1770 OmpVals[ idx % OmpValTableSize ] = val; 1771 idx = (idx + val) * ROOT_IDX; 1772 __collector_memcpy (&(OmpCurCtxs[ idx % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext)); 1773 idx = (idx + val) * ROOT_IDX; 1774 __collector_memcpy (&(OmpCtxs[ idx % OmpValTableSize ]), wctx, sizeof (struct WalkContext)); 1775#endif 1776 unsigned long *sp = NULL; 1777 unsigned long fp = wctx_pc_save->fp; 1778 int from_fp = 0; 1779 if (val == RA_END_OF_STACK) 1780 { 1781 sp = (unsigned long *) (wctx->sp); 1782 sp--; 1783 TprintfT (DBG_LT1, "omp_cache_put: get sp from EOS, sp=%p\n", sp); 1784 } 1785 else 1786 { 1787 if (fp < wctx_pc_save->sp || fp >= wctx_pc_save->sbase - sizeof (*sp)) 1788 { 1789 sp = (unsigned long *) (wctx->sp); 1790 sp--; 1791 TprintfT (DBG_LT1, "omp_cache_put: get sp from sp, sp=%p\n", sp); 1792 } 1793 else 1794 { 1795 TprintfT (DBG_LT1, "omp_cache_put: get sp from fp=0x%lx\n", fp); 1796 sp = (unsigned long *) fp; 1797 from_fp = 1; 1798 } 1799 } 1800 1801 if (sp < cur_sp_safe || ((unsigned long) sp >= wctx->sbase)) 1802 return; 1803 1804 unsigned long ra = *sp++; 1805 if (from_fp) 1806 { 1807 unsigned long tbgn = wctx_pc_save->tbgn; 1808 unsigned long tend = wctx_pc_save->tend; 1809 if (ra < tbgn || ra >= tend) 1810 { 1811 sp = (unsigned long *) (wctx->sp); 1812 sp--; 1813 ra = *sp++; 1814 } 1815 } 1816#ifdef USE_18434988_OMP_CACHE_WORKAROUND 1817 uint64_t idx1 = wctx_pc_save->pc * ROOT_IDX; 1818 uint64_t idx2 = (idx1 + val) * ROOT_IDX; 1819 uint64_t idx3 = (idx2 + val) * ROOT_IDX; 1820 uint64_t idx4 = (idx3 + val) * ROOT_IDX; 1821 OmpRAs [ idx4 % OmpValTableSize ] = 0; // lock 1822 OmpVals[ idx1 % OmpValTableSize ] = val; 1823 __collector_memcpy (&(OmpCurCtxs[ idx2 % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext)); 1824 __collector_memcpy (&(OmpCtxs [ idx3 % OmpValTableSize ]), wctx, sizeof (struct WalkContext)); 1825 OmpRAs [ idx4 % OmpValTableSize ] = ra; 1826#else 1827 idx = (idx + val) * ROOT_IDX; 1828 OmpRAs[ idx % OmpValTableSize ] = ra; 1829#endif 1830 TprintfT (DBG_LT1, "omp_cache_put: pc=0x%lx\n", wctx_pc_save->pc); 1831} 1832 1833/* 1834 * See bug 17166877 - malloc_internal unwind failure. 1835 * Sometimes there are several calls right after ret, like: 1836 * leave 1837 * ret 1838 * call xxx 1839 * call xxxx 1840 * call xxxxx 1841 * If they are also jump targets, we should better not 1842 * create new jump context for those, since they may 1843 * end up into some other function. 1844 */ 1845static int 1846is_after_ret (unsigned char * npc) 1847{ 1848 if (*npc != 0xe8) 1849 return 0; 1850 unsigned char * onpc = npc; 1851 int ncall = 1; 1852 int maxsteps = 10; 1853 int mincalls = 3; 1854 int steps = 0; 1855 while (*(npc - 5) == 0xe8 && steps < maxsteps) 1856 { 1857 npc -= 5; 1858 ncall++; 1859 steps++; 1860 } 1861 if (*(npc - 1) != 0xc3 || *(npc - 2) != 0xc9) 1862 return 0; 1863 steps = 0; 1864 while (*(onpc + 5) == 0xe8 && steps < maxsteps) 1865 { 1866 onpc += 5; 1867 ncall++; 1868 steps++; 1869 } 1870 if (ncall < mincalls) 1871 return 0; 1872 return 1; 1873} 1874 1875static int 1876find_i386_ret_addr (struct WalkContext *wctx, int do_walk) 1877{ 1878 if (wctx->sp == 0) 1879 // Some artificial contexts may have %sp set to 0. See SETFUNCTIONCONTEXT() 1880 return RA_FAILURE; 1881 1882 /* Check cached values */ 1883 int retc = cache_get (wctx); 1884 if (retc != RA_FAILURE) 1885 return retc; 1886 1887 /* An attempt to perform code analysis for call stack tracing */ 1888 unsigned char opcode; 1889 unsigned char extop; 1890 unsigned char extop2; 1891 unsigned char modrm; 1892 int imm8; /* immediate operand, byte */ 1893 int immv; /* immediate operand, word(2) or doubleword(4) */ 1894 int reg; /* register code */ 1895 1896 /* Buffer for branch targets (analysis stoppers) */ 1897 unsigned char *targets[MAXTRGTS]; 1898 int ntrg = 0; /* number of entries in the table */ 1899 targets[ntrg++] = (unsigned char*) wctx->pc; 1900 targets[ntrg++] = (unsigned char*) - 1; 1901 1902 struct AdvWalkContext buf[MAXCTX]; 1903 struct AdvWalkContext *cur = buf; 1904 CALL_UTIL (memset)((void*) cur, 0, sizeof (*cur)); 1905 1906 cur->pc = (unsigned char*) wctx->pc; 1907 cur->sp = (unsigned long*) wctx->sp; 1908 cur->sp_safe = cur->sp - RED_ZONE; /* allow for the 128-byte red zone on amd64 */ 1909 cur->fp = (unsigned long*) wctx->fp; 1910 cur->tidx = 1; 1911 DprintfT (SP_DUMP_UNWIND, "\nstack_unwind (x86 walk):%d %p start\n", __LINE__, cur->pc); 1912 1913 int nctx = 1; /* number of contexts being processed */ 1914 int cnt = 8192; /* number of instructions to analyse */ 1915 1916 /* 1917 * The basic idea of our x86 stack unwind is that we don't know 1918 * if we can trust the frame-pointer register. So we walk 1919 * instructions to find a return instruction, at which point 1920 * we know the return address is on the top of the stack, etc. 1921 * 1922 * A severe challenge to walking x86 instructions is when we 1923 * encounter "jmp *(reg)" instructions, where we are expected 1924 * to jump to the (unknown-to-us) contents of a register. 1925 * 1926 * The "jmp_reg" code here attempts to keep track of the 1927 * context for such a jump, deferring any handling of such 1928 * a difficult case. We continue with other contexts, hoping 1929 * that some other walk will take us to a return instruction. 1930 * 1931 * If no other walk helps, we return to "jmp_reg" contexts. 1932 * While we don't know the jump target, it is possible that the 1933 * bytes immediately following the jmp_reg instruction represent 1934 * one possible target, as might be the case when a "switch" 1935 * statement is compiled. 1936 * 1937 * Unfortunately, the bytes following a "jmp_reg" instruction might 1938 * instead be a jump target from somewhere else -- execution might 1939 * never "fall through" from the preceding "jmp_reg". Those bytes 1940 * might not even be instructions at all. There are many uses of 1941 * jmp_reg instructions beyond just compiling switch statements. 1942 * 1943 * So walking the bytes after a "jmp_reg" instruction can lead 1944 * to bugs and undefined behavior, including SEGV and core dump. 1945 * 1946 * We currently do not really understand the "jmp_reg" code below. 1947 */ 1948 int jmp_reg_switch_mode = 0; 1949 int num_jmp_reg = 0; // number of jmp *reg met when switch mode is off or when in current switch case 1950 int total_num_jmp_reg = 0; // number of total jmp *reg met 1951 struct AdvWalkContext * jmp_reg_ctx[MAXJMPREG]; // context of jmp *reg met when switch mode is off or when in current switch case 1952 struct AdvWalkContext * jmp_reg_switch_ctx[MAXJMPREG]; // context of jmp *reg used in switch cases 1953 struct AdvWalkContext * jmp_reg_switch_backup_ctx = NULL; // context of the first jmp *reg used in switch cases 1954 1955 int cur_jmp_reg_switch = 0; // current switch table 1956 int num_jmp_reg_switch = 0; // number of switch table 1957 int jmp_reg_switch_case = 0; // case number in current switch table 1958 unsigned char * jmp_reg_switch_pc = NULL; // the start pc of current switch case 1959 unsigned char * jmp_reg_switch_pc_old = NULL; // backup for deleteing context of jump target 1960 unsigned char * jmp_reg_switch_base = NULL; // start pc for checking offsets 1961 int max_jmp_reg_switch_case = 2; 1962#if WSIZE(32) 1963 int max_switch_pc_offset = 512; 1964#else // WSIZE(64) 1965 int max_switch_pc_offset = 1024; 1966#endif 1967 int expected_num_jmp_reg = 1; // should be smaller than MAXJMPREG 1968 int max_num_jmp_reg_seen = 4; // try to resolve return if there are so many such instructions 1969 1970 1971 int save_ctx = 0; // flag to save walk context in the cache to speed up unwind 1972 struct WalkContext wctx_pc_save; 1973 if (do_walk == 0) 1974 // do_walk is the flag indicating not walking through the instructions, resolving the RA from the stack fp first 1975 __collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext)); 1976 1977startWalk: 1978 if (do_walk == 0) 1979 { // try to resolve RA from stack frame pointer 1980 if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL) 1981 { 1982 do_walk = 1; 1983 goto startWalk; 1984 } 1985 // before goto checkFP, try the RA from cache (key: WalkContext -> value: caller's WalkContext)) 1986 uint64_t idx = wctx->pc * ROOT_IDX; 1987 uint32_t val = OmpVals[idx % OmpValTableSize]; 1988 idx = (idx + val) * ROOT_IDX; 1989#ifdef USE_18434988_OMP_CACHE_WORKAROUND 1990 // Check ra: if it is 0 - then cache is invalid 1991 uint64_t idx4; 1992 idx4 = (idx + val) * ROOT_IDX; 1993 idx4 = (idx4 + val) * ROOT_IDX; 1994 if (0 == OmpRAs[ idx4 % OmpValTableSize ]) // Invalid cache 1995 goto checkFP; 1996#endif 1997 struct WalkContext saved_ctx; 1998 __collector_memcpy (&saved_ctx, &OmpCurCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext)); 1999 if (wctx->pc == saved_ctx.pc 2000 && wctx->sp == saved_ctx.sp 2001 && wctx->fp == saved_ctx.fp 2002 && wctx->tbgn == saved_ctx.tbgn 2003 && wctx->tend == saved_ctx.tend) 2004 { // key match, RA may be valid 2005 idx = (idx + val) * ROOT_IDX; 2006 unsigned long *sp = NULL; 2007 unsigned long fp = wctx->fp; 2008 int from_fp = 0; 2009 if (val == RA_END_OF_STACK) 2010 { 2011 DprintfT (SP_DUMP_UNWIND, "find_i386_ret_addr:%d -- RA_END_OF_STACK: pc=0x%lx\n", __LINE__, wctx->pc); 2012 __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext)); 2013 return val; 2014 } 2015 else 2016 { 2017 if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp)) 2018 { 2019 TprintfT (DBG_LT1, "omp_cache_get -- wrong fp: pc=0x%lx\n", wctx->pc); 2020 sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp); 2021 sp--; 2022 if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase) 2023 { 2024 goto checkFP; 2025 } 2026 unsigned long ra = *sp; 2027 uint64_t idx2 = (idx + val) * ROOT_IDX; 2028 if (OmpRAs[ idx2 % OmpValTableSize ] == ra) 2029 { 2030 __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext)); 2031 TprintfT (DBG_LT1, "omp_cache_get -- ra match with target sp: pc=0x%lx, ra=0x%lx, val=%d\n", wctx->pc, ra, val); 2032 return val; 2033 } 2034 TprintfT (DBG_LT1, "omp_cache_get -- ra mismatch: ra=0x%lx, expected ra=0x%lx, val=%d\n", ra, OmpRAs[ idx2 % OmpValTableSize ], val); 2035 goto checkFP; 2036 } 2037 sp = (unsigned long *) fp; 2038 from_fp = 1; 2039 } 2040 2041 uint64_t idx2 = (idx + val) * ROOT_IDX; 2042 unsigned long ra = *sp++; 2043 if (from_fp) 2044 { 2045 unsigned long tbgn = wctx->tbgn; 2046 unsigned long tend = wctx->tend; 2047 if (ra < tbgn || ra >= tend) 2048 { 2049 sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp); 2050 sp--; 2051 //if (sp < cur->sp_safe - 16 || (unsigned long)sp >= wctx->sbase - sizeof(*sp)) { 2052 // The check above was replaced with the check below, 2053 // because we do not know why "- 16" and "- sizeof(*sp)" was used. 2054 if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase) 2055 goto checkFP; 2056 else 2057 ra = *sp; 2058 } 2059 } 2060 if (OmpRAs[ idx2 % OmpValTableSize ] == ra) 2061 { 2062 TprintfT (DBG_LT1, "omp_cache_get -- ra match: pc=0x%lx\n", wctx->pc); 2063 __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext)); 2064 return val; 2065 } 2066 } 2067 goto checkFP; 2068 } 2069 else 2070 { 2071 CALL_UTIL (memset)(jmp_reg_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *)); 2072 CALL_UTIL (memset)(jmp_reg_switch_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *)); 2073 } 2074 while (cnt--) 2075 { 2076 if (nctx == 0 && (num_jmp_reg == expected_num_jmp_reg || jmp_reg_switch_mode == 1)) 2077 { // no context available, try jmp switch mode 2078 int i = 0; 2079 if (num_jmp_reg == expected_num_jmp_reg) 2080 jmp_reg_switch_mode = 0; // first jmp reg expected, restart switch mode 2081 DprintfT (SP_DUMP_UNWIND, "unwind.c: begin switch mode, num_jmp_reg = %d, jmp_reg_switch_backup_ctx=%p, jmp_reg_switch_case=%d, jmp_reg_switch_mode=%d.\n", 2082 num_jmp_reg, jmp_reg_switch_backup_ctx, jmp_reg_switch_case, jmp_reg_switch_mode); 2083 // the ideal asm of switch is 2084 // jmp reg 2085 // ...//case 1 2086 // ret 2087 // ...//case 2 2088 // ret 2089 // ...//etc 2090 if (jmp_reg_switch_mode == 0) 2091 { 2092 num_jmp_reg_switch = num_jmp_reg; // backup num_jmp_reg 2093 jmp_reg_switch_mode = 1; // begin switch mode 2094 for (i = 0; i < num_jmp_reg_switch; i++) 2095 { 2096 if (jmp_reg_switch_ctx[i] == NULL) 2097 jmp_reg_switch_ctx[i] = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_ctx[i])); 2098 if (jmp_reg_switch_ctx[i] != NULL) 2099 { // backup jmp_reg_ctx 2100 __collector_memcpy (jmp_reg_switch_ctx[i], jmp_reg_ctx[i], sizeof (*jmp_reg_switch_ctx[i])); 2101 cur_jmp_reg_switch = 0; // reset the current switch table 2102 jmp_reg_switch_case = 0; // reset the case number in current switch table 2103 } 2104 } 2105 if (jmp_reg_switch_backup_ctx == NULL) 2106 { // only backup when the first jmp *reg is met for restoring later, if switch mode fails to resolve RA 2107 jmp_reg_switch_backup_ctx = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_backup_ctx)); 2108 if (jmp_reg_switch_backup_ctx != NULL) 2109 __collector_memcpy (jmp_reg_switch_backup_ctx, cur, sizeof (*cur)); 2110 DprintfT (SP_DUMP_UNWIND, "unwind.c: back up context for switch mode.\n"); 2111 } 2112 } 2113 if (jmp_reg_switch_mode == 1) 2114 { // in the process of trying switch cases 2115 if (cur_jmp_reg_switch == num_jmp_reg_switch) 2116 { 2117 DprintfT (SP_DUMP_UNWIND, "unwind.c: have tried all switch with max_jmp_reg_switch_case for each\n"); 2118 if (jmp_reg_switch_backup_ctx != NULL) 2119 __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur)); 2120 int rc = process_return_real (wctx, cur, 0); 2121 if (rc == RA_SUCCESS) 2122 { 2123 if (save_ctx) 2124 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); 2125 return rc; 2126 } 2127 break; // have tried all switch with max_jmp_reg_switch_case for each, goto checkFP 2128 } 2129 unsigned char *npc = jmp_reg_switch_ctx[cur_jmp_reg_switch]->pc; 2130 if (jmp_reg_switch_case == 0) 2131 // first switch case 2132 npc = check_modrm (npc); // pc next to "jmp reg" instruction 2133 else if (jmp_reg_switch_pc != NULL) 2134 npc = jmp_reg_switch_pc; // // pc next to "ret" instruction of previous case 2135 else 2136 { 2137 DprintfT (SP_DUMP_UNWIND, "unwind.c: unexpected jum switch mode situation, jmp_reg_switch_case=%d, jmp_reg_switch_pc=%p\n", 2138 jmp_reg_switch_case, jmp_reg_switch_pc); 2139 break; //goto checkFP 2140 } 2141 jmp_reg_switch_base = npc; 2142 struct AdvWalkContext *new = buf + nctx; 2143 nctx += 1; 2144 __collector_memcpy (new, jmp_reg_switch_ctx[cur_jmp_reg_switch], sizeof (*new)); 2145 new->pc = npc; 2146 cur = new; /* advance the new context first */ 2147 jmp_reg_switch_pc = NULL; 2148 jmp_reg_switch_case++; 2149 if (jmp_reg_switch_case == max_jmp_reg_switch_case) 2150 { // done many cases, change to another switch table 2151 cur_jmp_reg_switch++; 2152 jmp_reg_switch_case = 0; 2153 } 2154 } 2155 num_jmp_reg = 0; 2156 } 2157 if (jmp_reg_switch_mode == 1) 2158 { // when processing switch cases, check pc each time 2159 unsigned long tbgn = wctx->tbgn; 2160 unsigned long tend = wctx->tend; 2161 if ((unsigned long) (cur->pc) < tbgn || (unsigned long) (cur->pc) >= tend) 2162 { 2163 DprintfT (SP_DUMP_UNWIND, "unwind.c: pc out of range, pc=0x%lx\n", (unsigned long) (cur->pc)); 2164 break; 2165 } 2166 if (jmp_reg_switch_base != NULL && cur->pc > jmp_reg_switch_base + max_switch_pc_offset) 2167 { 2168 DprintfT (SP_DUMP_UNWIND, "unwind.c: limit the walk offset after jmp reg instruction\n"); 2169 if (jmp_reg_switch_backup_ctx != NULL) 2170 __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur)); 2171 int rc = process_return_real (wctx, cur, 0); 2172 if (rc == RA_SUCCESS) 2173 { 2174 if (save_ctx) 2175 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); 2176 return rc; 2177 } 2178 break; // limit the walk offset after jmp reg instruction, got checkFP 2179 } 2180 } 2181 2182 if (nctx == 0) 2183 break; 2184// dump_targets (__LINE__, ntrg, targets); 2185 while (cur->pc > targets[cur->tidx]) 2186 cur->tidx += 1; 2187 if (cur->pc == targets[cur->tidx]) 2188 { 2189 /* Stop analysis. Delete context. */ 2190 if (jmp_reg_switch_mode == 0 || cur->pc != jmp_reg_switch_pc_old) 2191 { 2192 if (jmp_reg_switch_mode == 1 && nctx == 1 && jmp_reg_switch_pc == NULL) 2193 { 2194 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d old target, cur->pc=%p, jmp_reg_switch_pc=%p, nctx=%d\n", 2195 __LINE__, cur->pc, jmp_reg_switch_pc, nctx); 2196 jmp_reg_switch_pc = cur->pc; // save cp before delete context, may be used as a start of switch case 2197 jmp_reg_switch_pc_old = jmp_reg_switch_pc; 2198 } 2199 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, old target.\n", __LINE__); 2200 DELETE_CURCTX (); 2201 if (cur >= buf + nctx) 2202 cur = buf; 2203 continue; 2204 } 2205 if (jmp_reg_switch_mode == 1 && cur->pc == jmp_reg_switch_pc_old) 2206 jmp_reg_switch_pc_old = NULL; // reset jmp_reg_switch_pc_old to delete the context later when cur->pc != jmp_reg_switch_pc_old 2207 } 2208 2209 /* let's walk the next x86 instruction */ 2210 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cur:%ld pc=0x%lx %02x %02x %02x %02x %02x %02x %02x sp=0x%lx\n", 2211 __LINE__, (long) (cur - buf), (unsigned long) cur->pc, 2212 (int) cur->pc[0], (int) cur->pc[1], (int) cur->pc[2], 2213 (int) cur->pc[3], (int) cur->pc[4], (int) cur->pc[5], 2214 (int) cur->pc[6], (unsigned long) cur->sp); 2215 int v = 4; /* Operand size */ 2216 int a = 4; /* Address size */ 2217 /* int W = 0; REX.W bit */ 2218#if WSIZE(64) 2219 int R = 0; /* REX.R bit */ 2220#endif 2221 int X = 0; /* REX.X bit */ 2222 int B = 0; /* REX.B bit */ 2223 /* Check prefixes */ 2224 int done = 0; 2225 while (!done) 2226 { 2227 opcode = *cur->pc++; 2228 switch (opcode) 2229 { 2230 case 0x66: /* opd size override */ 2231 v = 2; 2232 break; 2233 case 0x67: /*addr size override */ 2234 a = 2; 2235 break; 2236#if WSIZE(64) 2237 case 0x40: /* REX */ 2238 case 0x41: 2239 case 0x42: 2240 case 0x43: 2241 case 0x44: 2242 case 0x45: 2243 case 0x46: 2244 case 0x47: 2245 case 0x48: 2246 case 0x49: 2247 case 0x4a: 2248 case 0x4b: 2249 case 0x4c: 2250 case 0x4d: 2251 case 0x4e: 2252 case 0x4f: 2253 B = (opcode & 0x1) ? 8 : 0; 2254 X = (opcode & 0x2) ? 8 : 0; 2255 R = (opcode & 0x4) ? 8 : 0; 2256 if (opcode & 0x8) /* 64 bit operand size */ 2257 v = 8; 2258 opcode = *cur->pc++; 2259 done = 1; 2260 break; 2261#endif 2262 default: 2263 done = 1; 2264 break; 2265 } 2266 } 2267 int z = (v == 8) ? 4 : v; 2268 switch (opcode) 2269 { 2270 case 0x0: /* add Eb,Gb */ 2271 case 0x01: /* add Ev,Gv */ 2272 case 0x02: /* add Gb,Eb */ 2273 case 0x03: /* add Gv,Ev */ 2274 cur->pc = check_modrm (cur->pc); 2275 break; 2276 case 0x04: /* add %al,Ib */ 2277 cur->pc += 1; 2278 break; 2279 case 0x05: /* add %eax,Iz */ 2280 cur->pc += z; 2281 break; 2282 case 0x06: /* push es */ 2283 cur->sp -= 1; 2284 break; 2285 case 0x07: /* pop es */ 2286 cur->sp += 1; 2287 if (cur->sp - RED_ZONE > cur->sp_safe) 2288 cur->sp_safe = cur->sp - RED_ZONE; 2289 break; 2290 case 0x08: /* or Eb,Gb */ 2291 case 0x09: /* or Ev,Gv */ 2292 case 0x0a: /* or Gb,Eb */ 2293 case 0x0b: /* or Gv,Ev */ 2294 cur->pc = check_modrm (cur->pc); 2295 break; 2296 case 0x0c: /* or %al,Ib */ 2297 cur->pc += 1; 2298 break; 2299 case 0x0d: /* or %eax,Iz */ 2300 cur->pc += z; 2301 break; 2302 case 0x0e: /* push cs */ 2303 cur->sp -= 1; 2304 break; 2305 case 0x0f: /* two-byte opcodes */ 2306 extop = *cur->pc++; 2307 switch (extop) 2308 { /* RTM or HLE */ 2309 case 0x01: 2310 extop2 = *cur->pc; 2311 switch (extop2) 2312 { 2313 case 0xd5: /* xend */ 2314 case 0xd6: /* xtest */ 2315 cur->pc++; 2316 break; 2317 default: 2318 break; 2319 } 2320 break; 2321 case 0x03: 2322 cur->pc = check_modrm (cur->pc); 2323 break; 2324 case 0x0b: 2325 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, undefined instruction. opcode=0x%02x\n", 2326 __LINE__, (int) opcode); 2327 DELETE_CURCTX (); 2328 break; 2329 case 0x05: /* syscall */ 2330 case 0x34: /* sysenter */ 2331 if (cur->rax == __NR_exit) 2332 { 2333 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n", 2334 __LINE__, (int) opcode); 2335 DELETE_CURCTX (); 2336 break; 2337 } 2338 else if (cur->rax == __NR_rt_sigreturn) 2339 { 2340 if (jmp_reg_switch_mode == 1) 2341 { 2342 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0x%02x\n", 2343 __LINE__, (int) opcode); 2344 goto checkFP; 2345 } 2346 wctx->sp = (unsigned long) cur->sp; 2347 if (save_ctx) 2348 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN); 2349 return RA_RT_SIGRETURN; 2350 } 2351#if WSIZE(32) 2352 else if (cur->rax == __NR_sigreturn) 2353 { 2354 if (jmp_reg_switch_mode == 1) 2355 { 2356 DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0x34\n"); 2357 goto checkFP; 2358 } 2359 wctx->sp = (unsigned long) cur->sp; 2360 if (save_ctx) 2361 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN); 2362 return RA_SIGRETURN; 2363 } 2364#endif 2365 /* Check for Linus' trick in the vsyscall page */ 2366 while (*cur->pc == 0x90) /* nop */ 2367 cur->pc++; 2368 if (*cur->pc == 0xeb) /* jmp imm8 */ 2369 cur->pc += 2; 2370 break; 2371 case 0x0d: /* nop Ev */ 2372 cur->pc = check_modrm (cur->pc); 2373 break; 2374 case 0x10: /* xmm Vq,Wq */ 2375 case 0x11: 2376 case 0x12: 2377 case 0x13: 2378 case 0x14: 2379 case 0x15: 2380 case 0x16: 2381 case 0x17: 2382 cur->pc = check_modrm (cur->pc); 2383 break; 2384 case 0x18: /* prefetch */ 2385 cur->pc = check_modrm (cur->pc); 2386 break; 2387 case 0x1E: /* endbr64/endbr32 (f3 0f 1e .. ) is parsing as repz nop edx */ 2388 cur->pc += 2; 2389 break; 2390 case 0x1f: /* nop Ev */ 2391 cur->pc = check_modrm (cur->pc); 2392 break; 2393 case 0x28: /* xmm Vq,Wq */ 2394 case 0x29: 2395 case 0x2a: 2396 case 0x2b: 2397 case 0x2c: 2398 case 0x2d: 2399 case 0x2e: 2400 case 0x2f: 2401 cur->pc = check_modrm (cur->pc); 2402 break; 2403 case 0x30: /* wrmsr */ 2404 case 0x31: /* rdtsc */ 2405 case 0x32: /* rdmsr */ 2406 case 0x33: /* rdpmc */ 2407 break; 2408 /* case 0x34: sysenter (see above) */ 2409 case 0x38: case 0x3a: 2410 extop2 = *cur->pc++; 2411 cur->pc = check_modrm (cur->pc); 2412 // 21275311 Unwind failure in native stack for java application running on jdk8 2413 // Three-byte opcodes "66 0f 3a ??" should consume an additional "immediate" byte. 2414 if (extop == 0x3a) 2415 cur->pc++; 2416 break; 2417 case 0x40: case 0x41: case 0x42: case 0x43: /* CMOVcc Gv,Ev */ 2418 case 0x44: case 0x45: case 0x46: case 0x47: 2419 case 0x48: case 0x49: case 0x4a: case 0x4b: 2420 case 0x4c: case 0x4d: case 0x4e: case 0x4f: 2421 cur->pc = check_modrm (cur->pc); 2422 break; 2423 case 0x50: case 0x51: case 0x52: case 0x53: 2424 case 0x54: case 0x55: case 0x56: case 0x57: 2425 case 0x58: case 0x59: case 0x5a: case 0x5b: 2426 case 0x5c: case 0x5d: case 0x5e: case 0x5f: 2427 case 0x60: case 0x61: case 0x62: case 0x63: 2428 case 0x64: case 0x65: case 0x66: case 0x67: 2429 case 0x68: case 0x69: case 0x6a: case 0x6b: 2430 case 0x6c: case 0x6d: case 0x6e: case 0x6f: 2431 cur->pc = check_modrm (cur->pc); 2432 break; 2433 case 0x70: case 0x71: case 0x72: case 0x73: 2434 cur->pc = check_modrm (cur->pc) + 1; 2435 break; 2436 case 0x74: case 0x75: case 0x76: 2437 cur->pc = check_modrm (cur->pc); 2438 break; 2439 case 0x77: 2440 break; 2441 case 0x7c: case 0x7d: case 0x7e: case 0x7f: 2442 cur->pc = check_modrm (cur->pc); 2443 break; 2444 case 0x80: case 0x81: case 0x82: case 0x83: /* Jcc Jz */ 2445 case 0x84: case 0x85: case 0x86: case 0x87: 2446 case 0x88: case 0x89: case 0x8a: case 0x8b: 2447 case 0x8c: case 0x8d: case 0x8e: case 0x8f: 2448 immv = read_int (cur->pc, z); 2449 cur->pc += z; 2450 if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX)) 2451 { 2452 int tidx = 0; 2453 unsigned char *npc = cur->pc + immv; 2454 if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend) 2455 { 2456 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n", 2457 __LINE__, (int) opcode); 2458 DELETE_CURCTX (); 2459 break; 2460 } 2461 if (is_after_ret (npc)) 2462 break; 2463 while (npc > targets[tidx]) 2464 tidx += 1; 2465 if (npc != targets[tidx]) 2466 { 2467 if (ntrg < MAXTRGTS) 2468 { 2469 for (int i = 0; i < nctx; i++) 2470 if (buf[i].tidx >= tidx) 2471 buf[i].tidx++; 2472 2473 /* insert a new target */ 2474 for (int i = ntrg; i > tidx; i--) 2475 targets[i] = targets[i - 1]; 2476 ntrg += 1; 2477 targets[tidx++] = npc; 2478 } 2479 else 2480 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg=max(%d)\n", 2481 __LINE__, ntrg); 2482 struct AdvWalkContext *new = buf + nctx; 2483 nctx += 1; 2484 __collector_memcpy (new, cur, sizeof (*new)); 2485 new->pc = npc; 2486 new->tidx = tidx; 2487 cur = new; /* advance the new context first */ 2488 continue; 2489 } 2490 } 2491 else 2492 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx=max(%d)\n", 2493 __LINE__, ntrg); 2494 break; 2495 case 0x90: case 0x91: case 0x92: case 0x93: /* setcc Eb */ 2496 case 0x94: case 0x95: case 0x96: case 0x97: 2497 case 0x98: case 0x99: case 0x9a: case 0x9b: 2498 case 0x9c: case 0x9d: case 0x9e: case 0x9f: 2499 cur->pc = check_modrm (cur->pc); 2500 break; 2501 case 0xa0: /* push fs */ 2502 cur->sp -= 1; 2503 break; 2504 case 0xa1: /* pop fs */ 2505 cur->sp += 1; 2506 if (cur->sp - RED_ZONE > cur->sp_safe) 2507 cur->sp_safe = cur->sp - RED_ZONE; 2508 break; 2509 case 0xa2: /* cpuid */ 2510 break; 2511 case 0xa3: /* bt Ev,Gv */ 2512 cur->pc = check_modrm (cur->pc); 2513 break; 2514 case 0xa4: /* shld Ev,Gv,Ib */ 2515 cur->pc = check_modrm (cur->pc); 2516 cur->pc += 1; 2517 break; 2518 case 0xa5: /* shld Ev,Gv,%cl */ 2519 cur->pc = check_modrm (cur->pc); 2520 break; 2521 case 0xa8: /* push gs */ 2522 cur->sp -= 1; 2523 break; 2524 case 0xa9: /* pop gs */ 2525 cur->sp += 1; 2526 if (cur->sp - RED_ZONE > cur->sp_safe) 2527 cur->sp_safe = cur->sp - RED_ZONE; 2528 break; 2529 case 0xaa: /* rsm */ 2530 break; 2531 case 0xab: /* bts Ev,Gv */ 2532 cur->pc = check_modrm (cur->pc); 2533 break; 2534 case 0xac: /* shrd Ev,Gv,Ib */ 2535 cur->pc = check_modrm (cur->pc); 2536 cur->pc += 1; 2537 break; 2538 case 0xad: /* shrd Ev,Gv,%cl */ 2539 cur->pc = check_modrm (cur->pc); 2540 break; 2541 case 0xae: /* group15 */ 2542 cur->pc = check_modrm (cur->pc); 2543 break; 2544 case 0xaf: /* imul Gv,Ev */ 2545 cur->pc = check_modrm (cur->pc); 2546 break; 2547 case 0xb1: /* cmpxchg Ev,Gv */ 2548 cur->pc = check_modrm (cur->pc); 2549 break; 2550 case 0xb3: 2551 case 0xb6: /* movzx Gv,Eb */ 2552 case 0xb7: /* movzx Gv,Ew */ 2553 cur->pc = check_modrm (cur->pc); 2554 break; 2555 case 0xba: /* group8 Ev,Ib */ 2556 cur->pc = check_modrm (cur->pc); 2557 cur->pc += 1; 2558 break; 2559 case 0xbb: /* btc Ev,Gv */ 2560 case 0xbc: /* bsf Gv,Ev */ 2561 case 0xbd: /* bsr Gv,Ev */ 2562 cur->pc = check_modrm (cur->pc); 2563 break; 2564 case 0xbe: /* movsx Gv,Eb */ 2565 case 0xbf: /* movsx Gv,Ew */ 2566 cur->pc = check_modrm (cur->pc); 2567 break; 2568 case 0xc0: /* xadd Eb,Gb */ 2569 case 0xc1: /* xadd Ev,Gv */ 2570 cur->pc = check_modrm (cur->pc); 2571 break; 2572 case 0xc2: /* cmpps V,W,Ib */ 2573 cur->pc = check_modrm (cur->pc); 2574 cur->pc += 1; 2575 break; 2576 case 0xc3: /* movnti M,G */ 2577 cur->pc = check_modrm (cur->pc); 2578 break; 2579 case 0xc6: /* shufps V,W,Ib */ 2580 cur->pc = check_modrm (cur->pc); 2581 cur->pc += 1; 2582 break; 2583 case 0xc7: /* RDRAND */ 2584 cur->pc = check_modrm (cur->pc); 2585 break; 2586 case 0xc8: case 0xc9: case 0xca: case 0xcb: /* bswap */ 2587 case 0xcc: case 0xcd: case 0xce: case 0xcf: 2588 break; 2589 case 0xd0: case 0xd1: case 0xd2: case 0xd3: 2590 case 0xd4: case 0xd5: case 0xd6: case 0xd7: 2591 case 0xd8: case 0xd9: case 0xda: case 0xdb: 2592 case 0xdc: case 0xdd: case 0xde: case 0xdf: 2593 case 0xe0: case 0xe1: case 0xe2: case 0xe3: 2594 case 0xe4: case 0xe5: case 0xe6: case 0xe7: 2595 case 0xe8: case 0xe9: case 0xea: case 0xeb: 2596 case 0xec: case 0xed: case 0xee: case 0xef: 2597 case 0xf0: case 0xf1: case 0xf2: case 0xf3: 2598 case 0xf4: case 0xf5: case 0xf6: case 0xf7: 2599 case 0xf8: case 0xf9: case 0xfa: case 0xfb: 2600 case 0xfc: case 0xfd: case 0xfe: case 0xff: 2601 cur->pc = check_modrm (cur->pc); 2602 break; 2603 default: 2604 if (jmp_reg_switch_mode == 1 && extop == 0x0b) 2605 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d invalid opcode ub2: 0x0f %x jmp_reg_switch_mode=%d\n", 2606 __LINE__, (int) extop, jmp_reg_switch_mode); 2607 else 2608 { 2609 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x0f %x jmp_reg_switch_mode=%d\n", 2610 __LINE__, (int) extop, jmp_reg_switch_mode); 2611 DELETE_CURCTX (); 2612 } 2613 break; 2614 } 2615 break; 2616 case 0x10: /* adc Eb,Gb */ 2617 case 0x11: /* adc Ev,Gv */ 2618 case 0x12: /* adc Gb,Eb */ 2619 case 0x13: /* adc Gv,Ev */ 2620 cur->pc = check_modrm (cur->pc); 2621 break; 2622 case 0x14: /* adc %al,Ib */ 2623 cur->pc += 1; 2624 break; 2625 case 0x15: /* adc %eax,Iz */ 2626 cur->pc += z; 2627 break; 2628 case 0x16: /* push ss */ 2629 cur->sp -= 1; 2630 break; 2631 case 0x17: /* pop ss */ 2632 cur->sp += 1; 2633 if (cur->sp - RED_ZONE > cur->sp_safe) 2634 cur->sp_safe = cur->sp - RED_ZONE; 2635 break; 2636 case 0x18: /* sbb Eb,Gb */ 2637 case 0x19: /* sbb Ev,Gv */ 2638 case 0x1a: /* sbb Gb,Eb */ 2639 case 0x1b: /* sbb Gv,Ev */ 2640 cur->pc = check_modrm (cur->pc); 2641 break; 2642 case 0x1c: /* sbb %al,Ib */ 2643 cur->pc += 1; 2644 break; 2645 case 0x1d: /* sbb %eax,Iz */ 2646 cur->pc += z; 2647 break; 2648 case 0x1e: /* push ds */ 2649 cur->sp -= 1; 2650 break; 2651 case 0x1f: /* pop ds */ 2652 cur->sp += 1; 2653 if (cur->sp - RED_ZONE > cur->sp_safe) 2654 cur->sp_safe = cur->sp - RED_ZONE; 2655 break; 2656 case 0x20: /* and Eb,Gb */ 2657 case 0x21: /* and Ev,Gv */ 2658 case 0x22: /* and Gb,Eb */ 2659 case 0x23: /* and Gv,Ev */ 2660 cur->pc = check_modrm (cur->pc); 2661 break; 2662 case 0x24: /* and %al,Ib */ 2663 cur->pc += 1; 2664 break; 2665 case 0x25: /* and %eax,Iz */ 2666 cur->pc += z; 2667 break; 2668 case 0x26: /* seg=es prefix */ 2669 break; 2670 case 0x27: /* daa */ 2671 break; 2672 case 0x28: /* sub Eb,Gb */ 2673 case 0x29: /* sub Ev,Gv */ 2674 case 0x2a: /* sub Gb,Eb */ 2675 case 0x2b: /* sub Gv,Ev */ 2676 cur->pc = check_modrm (cur->pc); 2677 break; 2678 case 0x2c: /* sub %al,Ib */ 2679 cur->pc += 1; 2680 break; 2681 case 0x2d: /* sub %eax,Iz */ 2682 cur->pc += z; 2683 break; 2684 case 0x2e: /* seg=cs prefix */ 2685 break; 2686 case 0x2f: /* das */ 2687 break; 2688 case 0x30: /* xor Eb,Gb */ 2689 case 0x31: /* xor Ev,Gv */ 2690 case 0x32: /* xor Gb,Eb */ 2691 case 0x33: /* xor Gv,Ev */ 2692 cur->pc = check_modrm (cur->pc); 2693 break; 2694 case 0x34: /* xor %al,Ib */ 2695 cur->pc += 1; 2696 break; 2697 case 0x35: /* xor %eax,Iz */ 2698 cur->pc += z; 2699 break; 2700 case 0x36: /* seg=ss prefix */ 2701 break; 2702 case 0x37: /* aaa */ 2703 break; 2704 case 0x38: /* cmp Eb,Gb */ 2705 case 0x39: /* cmp Ev,Gv */ 2706 case 0x3a: /* cmp Gb,Eb */ 2707 case 0x3b: /* cmp Gv,Ev */ 2708 cur->pc = check_modrm (cur->pc); 2709 break; 2710 case 0x3c: /* cmp %al,Ib */ 2711 cur->pc += 1; 2712 break; 2713 case 0x3d: /* cmp %eax,Iz */ 2714 cur->pc += z; 2715 break; 2716 case 0x3e: /* seg=ds prefix */ 2717 break; 2718 case 0x3f: /* aas */ 2719 break; 2720#if WSIZE(32) 2721 case 0x40: /* inc %eax */ 2722 case 0x41: /* inc %ecx */ 2723 case 0x42: /* inc %edx */ 2724 case 0x43: /* inc %ebx */ 2725 break; 2726 case 0x44: /* inc %esp */ 2727 /* Can't be a valid stack pointer - delete context */ 2728 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x44.\n", __LINE__); 2729 DELETE_CURCTX (); 2730 break; 2731 case 0x45: /* inc %ebp */ 2732 case 0x46: /* inc %esi */ 2733 case 0x47: /* inc %edi */ 2734 case 0x48: /* dec %eax */ 2735 case 0x49: /* dec %ecx */ 2736 case 0x4a: /* dec %edx */ 2737 case 0x4b: /* dec %ebx */ 2738 break; 2739 case 0x4c: /* dec %esp */ 2740 /* Can't be a valid stack pointer - delete context */ 2741 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x4c.\n", __LINE__); 2742 DELETE_CURCTX (); 2743 break; 2744 case 0x4d: /* dec %ebp */ 2745 case 0x4e: /* dec %esi */ 2746 case 0x4f: /* dec %edi */ 2747 break; 2748#endif 2749 case 0x50: /* push %eax */ 2750 case 0x51: /* push %ecx */ 2751 case 0x52: /* push %edx */ 2752 case 0x53: /* push %ebx */ 2753 case 0x54: /* push %esp */ 2754 case 0x55: /* push %ebp */ 2755 case 0x56: /* push %esi */ 2756 case 0x57: /* push %edi */ 2757 cur->sp -= 1; 2758 reg = OPC_REG (opcode); 2759 if (reg == RBP) 2760 { 2761#if 0 2762 /* Don't do this check yet. Affects tail calls. */ 2763 /* avoid other function's prologue */ 2764 if ((cur->pc[0] == 0x89 && cur->pc[1] == 0xe5) || 2765 (cur->pc[0] == 0x8b && cur->pc[1] == 0xec)) 2766 { 2767 /* mov %esp,%ebp */ 2768 DELETE_CURCTX (); 2769 break; 2770 } 2771#endif 2772 if (cur->fp_loc == NULL) 2773 { 2774 cur->fp_loc = cur->sp; 2775 cur->fp_sav = cur->fp; 2776 } 2777 } 2778 break; 2779 case 0x58: /* pop %eax */ 2780 case 0x59: /* pop %ecx */ 2781 case 0x5a: /* pop %edx */ 2782 case 0x5b: /* pop %ebx */ 2783 case 0x5c: /* pop %esp */ 2784 case 0x5d: /* pop %ebp */ 2785 case 0x5e: /* pop %esi */ 2786 case 0x5f: /* pop %edi */ 2787 reg = OPC_REG (opcode); 2788 cur->regs[reg] = 0; 2789 if (isInside ((unsigned long) cur->sp, (unsigned long) cur->sp_safe, wctx->sbase)) 2790 cur->regs[reg] = *cur->sp; 2791 DprintfT (SP_DUMP_UNWIND, "stack_unwind:%d cur->regs[%d]=0x%lx\n", 2792 __LINE__, reg, (unsigned long) cur->regs[reg]); 2793 if (reg == RDX) 2794 { 2795 if (cur->sp >= cur->sp_safe && 2796 (unsigned long) cur->sp < wctx->sbase) 2797 cur->rdx = *cur->sp; 2798 } 2799 else if (reg == RBP) 2800 { 2801 if (cur->fp_loc == cur->sp) 2802 { 2803 cur->fp = cur->fp_sav; 2804 cur->fp_loc = NULL; 2805 } 2806 else if (cur->sp >= cur->sp_safe && 2807 (unsigned long) cur->sp < wctx->sbase) 2808 cur->fp = (unsigned long*) (*cur->sp); 2809 } 2810 else if (reg == RSP) 2811 { 2812 /* f.e. JVM I2CAdapter */ 2813 if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase) 2814 { 2815 unsigned long *nsp = (unsigned long*) (*cur->sp); 2816 if (nsp >= cur->sp && nsp <= cur->fp) 2817 { 2818 cur->sp = nsp; 2819 } 2820 else 2821 { 2822 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d give up return address, opcode=0x%02x\n", 2823 __LINE__, opcode); 2824 goto checkFP; 2825 } 2826 } 2827 else 2828 { 2829 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode=0x%02x\n", 2830 __LINE__, opcode); 2831 goto checkFP; 2832 } 2833 break; 2834 } 2835 cur->sp += 1; 2836 if (cur->sp - RED_ZONE > cur->sp_safe) 2837 { 2838 cur->sp_safe = cur->sp - RED_ZONE; 2839 } 2840 break; 2841 case 0x60: /* pusha(d) */ 2842 cur->sp -= 8; 2843 break; 2844 case 0x61: /* popa(d) */ 2845 cur->sp += 8; 2846 if (cur->sp - RED_ZONE > cur->sp_safe) 2847 cur->sp_safe = cur->sp - RED_ZONE; 2848 break; 2849 case 0x62: /* group AVX, 4-bytes EVEX prefix */ 2850 { 2851 unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction 2852 int len = parse_x86_AVX_instruction (pc); 2853 if (len < 4) 2854 { 2855 DELETE_CURCTX (); 2856 } 2857 else 2858 { 2859 pc += len; 2860 cur->pc = pc; 2861 } 2862 } 2863 break; 2864 case 0x63: /* arpl Ew,Gw (32) movsxd Gv,Ev (64)*/ 2865 cur->pc = check_modrm (cur->pc); 2866 break; 2867 case 0x64: /* seg=fs prefix */ 2868 case 0x65: /* seg=gs prefix */ 2869 break; 2870 case 0x66: /* opd size override */ 2871 case 0x67: /* addr size override */ 2872 break; 2873 case 0x68: /* push Iz */ 2874 cur->sp = (unsigned long*) ((long) cur->sp - z); 2875 cur->pc += z; 2876 break; 2877 case 0x69: /* imul Gv,Ev,Iz */ 2878 cur->pc = check_modrm (cur->pc); 2879 cur->pc += z; 2880 break; 2881 case 0x6a: /* push Ib */ 2882 cur->sp = (unsigned long*) ((long) cur->sp - v); 2883 cur->pc += 1; 2884 break; 2885 case 0x6b: /* imul Gv,Ev,Ib */ 2886 cur->pc = check_modrm (cur->pc); 2887 cur->pc += 1; 2888 break; 2889 case 0x6c: case 0x6d: case 0x6e: case 0x6f: 2890 cur->pc = check_modrm (cur->pc); 2891 break; 2892 case 0x70: /* jo Jb */ 2893 case 0x71: /* jno Jb */ 2894 case 0x72: /* jb Jb */ 2895 case 0x73: /* jnb Jb */ 2896 case 0x74: /* jz Jb */ 2897 case 0x75: /* jnz Jb */ 2898 case 0x76: /* jna Jb */ 2899 case 0x77: /* ja Jb */ 2900 case 0x78: /* js Jb */ 2901 case 0x79: /* jns Jb */ 2902 case 0x7a: /* jp Jb */ 2903 case 0x7b: /* jnp Jb */ 2904 case 0x7c: /* jl Jb */ 2905 case 0x7d: /* jge Jb */ 2906 case 0x7e: /* jle Jb */ 2907 case 0x7f: /* jg Jb */ 2908 imm8 = *(char*) cur->pc++; 2909 if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX)) 2910 { 2911 int tidx = 0; 2912 unsigned char *npc = cur->pc + imm8; 2913 if (is_after_ret (npc)) 2914 break; 2915 while (npc > targets[tidx]) 2916 tidx += 1; 2917 if (npc != targets[tidx]) 2918 { 2919 if (ntrg < MAXTRGTS) 2920 { 2921 for (int i = 0; i < nctx; i++) 2922 if (buf[i].tidx >= tidx) 2923 buf[i].tidx++; 2924 2925 /* insert a new target */ 2926 for (int i = ntrg; i > tidx; i--) 2927 targets[i] = targets[i - 1]; 2928 ntrg += 1; 2929 targets[tidx++] = npc; 2930 } 2931 else 2932 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg(%d)=max\n", __LINE__, ntrg); 2933 struct AdvWalkContext *new = buf + nctx; 2934 nctx += 1; 2935 __collector_memcpy (new, cur, sizeof (*new)); 2936 new->pc = npc; 2937 new->tidx = tidx; 2938 cur = new; /* advance the new context first */ 2939 continue; 2940 } 2941 } 2942 else 2943 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx(%d)=max\n", __LINE__, nctx); 2944 break; 2945 case 0x80: /* group1 Eb,Ib */ 2946 cur->pc = check_modrm (cur->pc); 2947 cur->pc += 1; 2948 break; 2949 case 0x81: /* group1 Ev,Iz */ 2950 modrm = *cur->pc; 2951 if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RSP) 2952 { 2953 int immz = read_int (cur->pc + 1, z); 2954 extop = MRM_EXT (modrm); 2955 if (extop == 0) /* add imm32,%esp */ 2956 cur->sp = (unsigned long*) ((long) cur->sp + immz); 2957 else if (extop == 4) /* and imm32,%esp */ 2958 cur->sp = (unsigned long*) ((long) cur->sp & immz); 2959 else if (extop == 5) /* sub imm32,%esp */ 2960 cur->sp = (unsigned long*) ((long) cur->sp - immz); 2961 if (cur->sp - RED_ZONE > cur->sp_safe) 2962 cur->sp_safe = cur->sp - RED_ZONE; 2963 } 2964 cur->pc = check_modrm (cur->pc); 2965 cur->pc += z; 2966 break; 2967 case 0x82: /* group1 Eb,Ib */ 2968 cur->pc = check_modrm (cur->pc); 2969 cur->pc += 1; 2970 break; 2971 case 0x83: /* group1 Ev,Ib */ 2972 modrm = *cur->pc; 2973 if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RSP) 2974 { 2975 imm8 = (char) cur->pc[1]; /* sign extension */ 2976 extop = MRM_EXT (modrm); 2977 if (extop == 0) /* add imm8,%esp */ 2978 cur->sp = (unsigned long*) ((long) cur->sp + imm8); 2979 else if (extop == 4) /* and imm8,%esp */ 2980 cur->sp = (unsigned long*) ((long) cur->sp & imm8); 2981 else if (extop == 5) /* sub imm8,%esp */ 2982 cur->sp = (unsigned long*) ((long) cur->sp - imm8); 2983 if (cur->sp - RED_ZONE > cur->sp_safe) 2984 cur->sp_safe = cur->sp - RED_ZONE; 2985 } 2986 cur->pc = check_modrm (cur->pc); 2987 cur->pc += 1; 2988 break; 2989 case 0x84: /* test Eb,Gb */ 2990 case 0x85: /* test Ev,Gv */ 2991 case 0x86: /* xchg Eb,Gb */ 2992 case 0x87: /* xchg Ev,Gv */ 2993 cur->pc = check_modrm (cur->pc); 2994 break; 2995 case 0x88: /* mov Eb,Gb */ 2996 cur->pc = check_modrm (cur->pc); 2997 break; 2998 case 0x89: /* mov Ev,Gv */ 2999 modrm = *cur->pc; 3000 if (MRM_MOD (modrm) == 0xc0) 3001 { 3002 if (MRM_REGS (modrm) == RBP && MRM_REGD (modrm) == RSP) 3003 /* movl %esp,%ebp */ 3004 cur->fp = cur->sp; 3005 else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP) 3006 { /* mov %ebp,%esp */ 3007 cur->sp = cur->fp; 3008 if (cur->sp - RED_ZONE > cur->sp_safe) 3009 cur->sp_safe = cur->sp - RED_ZONE; 3010 if (wctx->fp == (unsigned long) cur->sp) 3011 cur->cval = RA_FROMFP; 3012 } 3013 } 3014 else if (MRM_MOD (modrm) == 0x80) 3015 { 3016 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP) 3017 { 3018 if (cur->pc[1] == 0x24) 3019 { /* mov %ebp,disp32(%esp) - JVM */ 3020 immv = read_int (cur->pc + 2, 4); 3021 cur->fp_loc = (unsigned long*) ((char*) cur->sp + immv); 3022 cur->fp_sav = cur->fp; 3023 } 3024 } 3025 } 3026 else if (MRM_MOD (modrm) == 0x40) 3027 { 3028 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RDX) 3029 { 3030 if (cur->pc[1] == 0x24 && cur->pc[2] == 0x0) 3031 { /* movl %edx,0(%esp) */ 3032 cur->ra_loc = cur->sp; 3033 cur->ra_sav = cur->rdx; 3034 } 3035 } 3036 else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP) 3037 { 3038 if (cur->pc[1] == 0x24) 3039 { /* mov %ebp,disp8(%esp) - JVM */ 3040 imm8 = ((char*) (cur->pc))[2]; 3041 cur->fp_loc = (unsigned long*) ((char*) cur->sp + imm8); 3042 cur->fp_sav = cur->fp; 3043 } 3044 } 3045 } 3046 else if (MRM_MOD (modrm) == 0x0) 3047 { 3048 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP) 3049 { 3050 if (cur->pc[1] == 0x24) 3051 { /* mov %ebp,(%esp) */ 3052 cur->fp_loc = cur->sp; 3053 cur->fp_sav = cur->fp; 3054 } 3055 } 3056 else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RDX) 3057 { 3058 if (cur->pc[1] == 0x24) 3059 { /* movl %edx,(%esp) */ 3060 cur->ra_loc = cur->sp; 3061 cur->ra_sav = cur->rdx; 3062 } 3063 } 3064 } 3065 cur->pc = check_modrm (cur->pc); 3066 break; 3067 case 0x8a: /* mov Gb,Eb */ 3068 cur->pc = check_modrm (cur->pc); 3069 break; 3070 case 0x8b: /* mov Gv,Ev */ 3071 modrm = *cur->pc; 3072 if (MRM_MOD (modrm) == 0xc0) 3073 { 3074 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP) 3075 /* mov %esp,%ebp */ 3076 cur->fp = cur->sp; 3077 else if (MRM_REGS (modrm) == RBP && MRM_REGD (modrm) == RSP) 3078 { /* mov %ebp,%esp */ 3079 cur->sp = cur->fp; 3080 if (cur->sp - RED_ZONE > cur->sp_safe) 3081 cur->sp_safe = cur->sp - RED_ZONE; 3082 if (wctx->fp == (unsigned long) cur->sp) 3083 cur->cval = RA_FROMFP; 3084 } 3085 } 3086 else if (MRM_MOD (modrm) == 0x80) 3087 { 3088 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP) 3089 { 3090 if (cur->pc[1] == 0x24) 3091 { /* mov disp32(%esp),%ebp */ 3092 immv = read_int (cur->pc + 2, 4); 3093 unsigned long *ptr = (unsigned long*) ((char*) cur->sp + immv); 3094 if (cur->fp_loc == ptr) 3095 { 3096 cur->fp = cur->fp_sav; 3097 cur->fp_loc = NULL; 3098 } 3099 else if (ptr >= cur->sp_safe && (unsigned long) ptr < wctx->sbase) 3100 cur->fp = (unsigned long*) (*ptr); 3101 } 3102 } 3103 } 3104 else if (MRM_MOD (modrm) == 0x40) 3105 { 3106 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP) 3107 { 3108 if (cur->pc[1] == 0x24) 3109 { /* mov disp8(%esp),%ebp - JVM */ 3110 imm8 = ((char*) (cur->pc))[2]; 3111 unsigned long *ptr = (unsigned long*) ((char*) cur->sp + imm8); 3112 if (cur->fp_loc == ptr) 3113 { 3114 cur->fp = cur->fp_sav; 3115 cur->fp_loc = NULL; 3116 } 3117 else if (ptr >= cur->sp_safe && (unsigned long) ptr < wctx->sbase) 3118 cur->fp = (unsigned long*) (*ptr); 3119 } 3120 } 3121 } 3122 else if (MRM_MOD (modrm) == 0x0) 3123 { 3124 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP) 3125 { 3126 if (cur->pc[1] == 0x24) 3127 { /* mov (%esp),%ebp */ 3128 if (cur->fp_loc == cur->sp) 3129 { 3130 cur->fp = cur->fp_sav; 3131 cur->fp_loc = NULL; 3132 } 3133 else if (cur->sp >= cur->sp_safe && 3134 (unsigned long) cur->sp < wctx->sbase) 3135 cur->fp = (unsigned long*) *cur->sp; 3136 } 3137 } 3138 } 3139 cur->pc = check_modrm (cur->pc); 3140 break; 3141 case 0x8c: /* mov Mw,Sw */ 3142 cur->pc = check_modrm (cur->pc); 3143 break; 3144 case 0x8d: /* lea Gv,M */ 3145 modrm = *cur->pc; 3146 if (MRM_REGD (modrm) == RSP) 3147 { 3148 unsigned char *pc = cur->pc; 3149 // Mez: need to use always regs[RSP/RBP] instead cur->sp(or fp): 3150 cur->regs[RSP] = (unsigned long) cur->sp; 3151 cur->regs[RBP] = (unsigned long) cur->fp; 3152 cur->pc++; 3153 int mod = (modrm >> 6) & 3; 3154 int r_m = modrm & 7; 3155 long val = 0; 3156 int undefRez = 0; 3157 if (mod == 0x3) 3158 val = getRegVal (cur, MRM_REGS (modrm), &undefRez); 3159 else if (r_m == 4) 3160 { // SP or R12. Decode SIB-byte. 3161 int sib = *cur->pc++; 3162 int scale = 1 << (sib >> 6); 3163 int index = X | ((sib >> 3) & 7); 3164 int base = B | (sib & 7); 3165 if (mod == 0) 3166 { 3167 if ((base & 7) == 5) 3168 { // BP or R13 3169 if (index != 4) // SP 3170 val += getRegVal (cur, index, &undefRez) * scale; 3171 val += read_int (cur->pc, 4); 3172 cur->pc += 4; 3173 } 3174 else 3175 { 3176 val += getRegVal (cur, base, &undefRez); 3177 if (index != 4) // SP 3178 val += getRegVal (cur, index, &undefRez) * scale; 3179 } 3180 } 3181 else 3182 { 3183 val += getRegVal (cur, base, &undefRez); 3184 if (index != 4) // SP 3185 val += getRegVal (cur, index, &undefRez) * scale; 3186 if (mod == 1) 3187 { 3188 val += read_int (cur->pc, 1); 3189 cur->pc++; 3190 } 3191 else 3192 { // mod == 2 3193 val += read_int (cur->pc, 4); 3194 cur->pc += 4; 3195 } 3196 } 3197 } 3198 else if (mod == 0) 3199 { 3200 if (r_m == 5) 3201 { // BP or R13 3202 val += read_int (cur->pc, 4); 3203 cur->pc += 4; 3204 } 3205 else 3206 val += getRegVal (cur, MRM_REGS (modrm), &undefRez); 3207 } 3208 else 3209 { // mod == 1 || mod == 2 3210 val += getRegVal (cur, MRM_REGS (modrm), &undefRez); 3211 if (mod == 1) 3212 { 3213 val += read_int (cur->pc, 1); 3214 cur->pc++; 3215 } 3216 else 3217 { // mod == 2 3218 val += read_int (cur->pc, 4); 3219 cur->pc += 4; 3220 } 3221 } 3222 if (undefRez) 3223 { 3224 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cannot calculate RSP. cur->pc=0x%lx val=0x%lx\n", 3225 __LINE__, (unsigned long) cur->pc, (unsigned long) val); 3226 goto checkFP; 3227 } 3228 cur->regs[MRM_REGD (modrm)] = val; 3229 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cur->pc=0x%lx val=0x%lx wctx->sp=0x%lx wctx->sbase=0x%lx\n", 3230 __LINE__, (unsigned long) cur->pc, (unsigned long) val, 3231 (unsigned long) wctx->sp, (unsigned long) wctx->sbase); 3232 if (cur->pc != check_modrm (pc)) 3233 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d ERROR: cur->pc=0x%lx != check_modrm(0x%lx)=0x%lx\n", 3234 __LINE__, (unsigned long) cur->pc, (unsigned long) pc, 3235 (unsigned long) check_modrm (pc)); 3236 if (MRM_REGD (modrm) == RSP) 3237 { 3238 if (!isInside ((unsigned long) val, wctx->sp, wctx->sbase)) 3239 { 3240 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cannot calculate RSP. cur->pc=0x%lx opcode=0x%02x val=0x%lx wctx->sp=0x%lx wctx->sbase=0x%lx\n", 3241 __LINE__, (unsigned long) cur->pc, opcode, (unsigned long) val, 3242 (unsigned long) wctx->sp, (unsigned long) wctx->sbase); 3243 goto checkFP; 3244 } 3245 cur->sp = (unsigned long *) val; 3246 if (cur->sp - RED_ZONE > cur->sp_safe) 3247 cur->sp_safe = cur->sp - RED_ZONE; 3248 } 3249 } 3250 else 3251 cur->pc = check_modrm (cur->pc); 3252 break; 3253 case 0x8e: /* mov Sw,Ew */ 3254 cur->pc = check_modrm (cur->pc); 3255 break; 3256 case 0x8f: /* pop Ev */ 3257 cur->pc = check_modrm (cur->pc); 3258 cur->sp += 1; 3259 if (cur->sp - RED_ZONE > cur->sp_safe) 3260 cur->sp_safe = cur->sp - RED_ZONE; 3261 break; 3262 case 0x90: /* nop */ 3263 break; 3264 case 0x91: /* xchg %eax,%ecx */ 3265 case 0x92: /* xchg %eax,%edx */ 3266 case 0x93: /* xchg %eax,%ebx */ 3267 case 0x94: /* xchg %eax,%esp XXXX */ 3268 case 0x95: /* xchg %eax,%ebp XXXX */ 3269 case 0x96: /* xchg %eax,%esi */ 3270 case 0x97: /* xchg %eax,%edi */ 3271 break; 3272 case 0x98: /* cbw/cwde */ 3273 case 0x99: /* cwd/cwq */ 3274 break; 3275 case 0x9a: /* callf Ap */ 3276 if (jmp_reg_switch_mode == 1) 3277 { 3278 struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur)); 3279 __collector_memcpy (tmpctx, cur, sizeof (*cur)); 3280 int rc = process_return (wctx, tmpctx); 3281 if (rc != RA_FAILURE) 3282 { 3283 if (save_ctx) 3284 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); 3285 return rc; 3286 } 3287 } 3288 cur->pc += 2 + a; 3289 break; 3290 case 0x9b: /* fwait */ 3291 case 0x9c: /* pushf Fv */ 3292 case 0x9d: /* popf Fv */ 3293 case 0x9e: /* sahf */ 3294 case 0x9f: /* lahf */ 3295 break; 3296 case 0xa0: /* mov al,Ob */ 3297 case 0xa1: /* mov eax,Ov */ 3298 case 0xa2: /* mov Ob,al */ 3299 case 0xa3: /* mov Ov,eax */ 3300 cur->pc += a; 3301 break; 3302 case 0xa4: /* movsb Yb,Xb */ 3303 case 0xa5: /* movsd Yv,Xv */ 3304 case 0xa6: /* cmpsb Yb,Xb */ 3305 case 0xa7: /* cmpsd Xv,Yv */ 3306 break; 3307 case 0xa8: /* test al,Ib */ 3308 cur->pc += 1; 3309 break; 3310 case 0xa9: /* test eax,Iz */ 3311 cur->pc += z; 3312 break; 3313 case 0xaa: /* stosb Yb,%al */ 3314 case 0xab: /* stosd Yv,%eax */ 3315 case 0xac: /* lodsb %al,Xb */ 3316 case 0xad: /* lodsd %eax,Xv */ 3317 case 0xae: /* scasb %al,Yb */ 3318 case 0xaf: /* scasd %eax,Yv */ 3319 break; 3320 case 0xb0: /* mov %al,Ib */ 3321 case 0xb1: /* mov %cl,Ib */ 3322 case 0xb2: /* mov %dl,Ib */ 3323 case 0xb3: /* mov %bl,Ib */ 3324 case 0xb4: /* mov %ah,Ib */ 3325 case 0xb5: /* mov %ch,Ib */ 3326 case 0xb6: /* mov %dh,Ib */ 3327 case 0xb7: /* mov %bh,Ib */ 3328 cur->pc += 1; 3329 break; 3330 case 0xb8: /* mov Iv,%eax */ 3331 case 0xb9: /* mov Iv,%ecx */ 3332 case 0xba: /* mov Iv,%edx */ 3333 case 0xbb: /* mov Iv,%ebx */ 3334 case 0xbc: /* mov Iv,%esp */ 3335 case 0xbd: /* mov Iv,%rbp */ 3336 case 0xbe: /* mov Iv,%esi */ 3337 case 0xbf: /* mov Iv,%edi */ 3338 reg = OPC_REG (opcode); 3339 if (reg == RAX) 3340 cur->rax = read_int (cur->pc, v); 3341 cur->pc += v; 3342 break; 3343 case 0xc0: /* group2 Eb,Ib */ 3344 case 0xc1: /* group2 Ev,Ib */ 3345 cur->pc = check_modrm (cur->pc) + 1; 3346 break; 3347 case 0xc2: /* ret Iw */ 3348 /* In the dynamic linker we may see that 3349 * the actual return address is at sp+immv, 3350 * while sp points to the resolved address. 3351 */ 3352 { 3353 immv = read_int (cur->pc, 2); 3354 int rc = process_return (wctx, cur); 3355 if (rc != RA_FAILURE) 3356 { 3357 if (jmp_reg_switch_mode == 1) 3358 { 3359 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d give up return address under jmp switch mode, opcode = 0xc2\n", __LINE__); 3360 goto checkFP; 3361 } 3362 wctx->sp += immv; 3363 if (save_ctx) 3364 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); 3365 return rc; 3366 } 3367 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xc2.\n", __LINE__); 3368 DELETE_CURCTX (); 3369 } 3370 break; 3371 case 0xc3: /* ret */ 3372 { 3373 int rc = process_return (wctx, cur); 3374 if (rc != RA_FAILURE) 3375 { 3376 if (save_ctx) 3377 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); 3378 return rc; 3379 } 3380 if (jmp_reg_switch_mode == 1) 3381 jmp_reg_switch_pc = cur->pc; 3382 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xc3.\n", __LINE__); 3383 DELETE_CURCTX (); 3384 } 3385 break; 3386 case 0xc4: /* group AVX, 3-bytes VEX prefix */ 3387 { 3388 unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction 3389 int len = parse_x86_AVX_instruction (pc); 3390 if (len < 3) 3391 DELETE_CURCTX (); 3392 else 3393 { 3394 pc += len; 3395 cur->pc = pc; 3396 } 3397 } 3398 break; 3399 case 0xc5: /* group AVX, 2-bytes VEX prefix */ 3400 { 3401 unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction 3402 int len = parse_x86_AVX_instruction (pc); 3403 if (len < 2) 3404 DELETE_CURCTX (); 3405 else 3406 { 3407 pc += len; 3408 cur->pc = pc; 3409 } 3410 } 3411 break; 3412 case 0xc6: 3413 modrm = *cur->pc; 3414 if (modrm == 0xf8) /* xabort */ 3415 cur->pc += 2; 3416 else /* mov Eb,Ib */ 3417 cur->pc = check_modrm (cur->pc) + 1; 3418 break; 3419 case 0xc7: 3420 modrm = *cur->pc; 3421 if (modrm == 0xf8) /* xbegin */ 3422 cur->pc += v + 1; 3423 else 3424 { /* mov Ev,Iz */ 3425 extop = MRM_EXT (modrm); 3426 if (extop != 0) 3427 { 3428 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode = 0xc7\n", __LINE__); 3429 goto checkFP; 3430 } 3431 if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RAX) 3432 cur->rax = read_int (cur->pc + 1, z); 3433 cur->pc = check_modrm (cur->pc) + z; 3434 } 3435 break; 3436 case 0xc8: /* enter Iw,Ib */ 3437 cur->pc += 3; 3438 break; 3439 case 0xc9: /* leave */ 3440 /* mov %ebp,%esp */ 3441 cur->sp = cur->fp; 3442 /* pop %ebp */ 3443 if (cur->fp_loc == cur->sp) 3444 { 3445 cur->fp = cur->fp_sav; 3446 cur->fp_loc = NULL; 3447 } 3448 else if (cur->sp >= cur->sp_safe && 3449 (unsigned long) cur->sp < wctx->sbase) 3450 { 3451 cur->fp = (unsigned long*) (*cur->sp); 3452 if (wctx->fp == (unsigned long) cur->sp) 3453 cur->cval = RA_FROMFP; 3454 } 3455 cur->sp += 1; 3456 if (cur->sp - RED_ZONE > cur->sp_safe) 3457 cur->sp_safe = cur->sp - RED_ZONE; 3458 break; 3459 case 0xca: /* retf Iw */ 3460 cur->pc += 2; /* XXXX process return */ 3461 break; 3462 case 0xcb: /* retf */ 3463 break; /* XXXX process return */ 3464 case 0xcc: /* int 3 */ 3465 break; 3466 case 0xcd: /* int Ib */ 3467 if (*cur->pc == 0x80) 3468 { 3469 if (cur->rax == __NR_exit) 3470 { 3471 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xcd.\n", __LINE__); 3472 DELETE_CURCTX (); 3473 break; 3474 } 3475 else if (cur->rax == __NR_rt_sigreturn) 3476 { 3477 if (jmp_reg_switch_mode == 1) 3478 { 3479 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0xcd\n", 3480 __LINE__); 3481 goto checkFP; 3482 } 3483 wctx->sp = (unsigned long) cur->sp; 3484 if (save_ctx) 3485 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN); 3486 return RA_RT_SIGRETURN; 3487 } 3488#if WSIZE(32) 3489 else if (cur->rax == __NR_sigreturn) 3490 { 3491 if (jmp_reg_switch_mode == 1) 3492 { 3493 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode = 0xc2\n", 3494 __LINE__); 3495 goto checkFP; 3496 } 3497 wctx->sp = (unsigned long) cur->sp; 3498 if (save_ctx) 3499 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN); 3500 return RA_SIGRETURN; 3501 } 3502#endif 3503 } 3504 cur->pc += 1; 3505 break; 3506 case 0xce: /* into */ 3507 case 0xcf: /* iret */ 3508 break; 3509 case 0xd0: /* shift group2 Eb,1 */ 3510 case 0xd1: /* shift group2 Ev,1 */ 3511 case 0xd2: /* shift group2 Eb,%cl */ 3512 case 0xd3: /* shift group2 Ev,%cl */ 3513 cur->pc = check_modrm (cur->pc); 3514 break; 3515 case 0xd4: /* aam Ib */ 3516 cur->pc += 1; 3517 break; 3518 case 0xd5: /* aad Ib */ 3519 cur->pc += 1; 3520 break; 3521 case 0xd6: /* falc? */ 3522 break; 3523 case 0xd7: 3524 cur->pc = check_modrm (cur->pc); 3525 cur->pc++; 3526 break; 3527 case 0xd8: /* esc instructions */ 3528 case 0xd9: 3529 case 0xda: 3530 case 0xdb: 3531 case 0xdc: 3532 case 0xdd: 3533 case 0xde: 3534 case 0xdf: 3535 cur->pc = check_modrm (cur->pc); 3536 break; 3537 case 0xe0: /* loopne Jb */ 3538 case 0xe1: /* loope Jb */ 3539 case 0xe2: /* loop Jb */ 3540 case 0xe3: /* jcxz Jb */ 3541 imm8 = *(char*) cur->pc++; 3542 if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX)) 3543 { 3544 int tidx = 0; 3545 unsigned char *npc = cur->pc + imm8; 3546 if (is_after_ret (npc)) 3547 break; 3548 while (npc > targets[tidx]) 3549 tidx += 1; 3550 if (npc != targets[tidx]) 3551 { 3552 if (ntrg < MAXTRGTS) 3553 { 3554 for (int i = 0; i < nctx; i++) 3555 if (buf[i].tidx >= tidx) 3556 buf[i].tidx++; 3557 /* insert a new target */ 3558 for (int i = ntrg; i > tidx; i--) 3559 targets[i] = targets[i - 1]; 3560 ntrg += 1; 3561 targets[tidx++] = npc; 3562 } 3563 else 3564 DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n"); 3565 struct AdvWalkContext *new = buf + nctx; 3566 nctx += 1; 3567 __collector_memcpy (new, cur, sizeof (*new)); 3568 new->pc = npc; 3569 new->tidx = tidx; 3570 cur = new; /* advance the new context first */ 3571 continue; 3572 } 3573 } 3574 else 3575 DprintfT (SP_DUMP_UNWIND, "unwind.c: nctx = max\n"); 3576 break; 3577 case 0xe4: case 0xe5: 3578 cur->pc = check_modrm (cur->pc); 3579 cur->pc++; 3580 break; 3581 case 0xe6: case 0xe7: 3582 cur->pc++; 3583 cur->pc = check_modrm (cur->pc); 3584 break; 3585 case 0xec: case 0xed: case 0xee: case 0xef: 3586 cur->pc = check_modrm (cur->pc); 3587 break; 3588 case 0xe8: /* call Jz (f64) */ 3589 { 3590 if (jmp_reg_switch_mode == 1) 3591 { 3592 struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur)); 3593 __collector_memcpy (tmpctx, cur, sizeof (*cur)); 3594 int rc = process_return (wctx, tmpctx); 3595 if (rc != RA_FAILURE) 3596 { 3597 if (save_ctx) 3598 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); 3599 return rc; 3600 } 3601 } 3602 int immz = read_int (cur->pc, z); 3603 if (immz == 0) 3604 /* special case in PIC code */ 3605 cur->sp -= 1; 3606 cur->pc += z; 3607 } 3608 break; 3609 case 0xe9: /* jump Jz */ 3610 { 3611 int immz = read_int (cur->pc, z); 3612 unsigned char *npc = cur->pc + z + immz; 3613 if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend) 3614 { 3615 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xe9.\n", __LINE__); 3616 DELETE_CURCTX (); 3617 break; 3618 } 3619 int tidx = 0; 3620 while (npc > targets[tidx]) 3621 tidx += 1; 3622 if (npc != targets[tidx]) 3623 { 3624 if (ntrg < MAXTRGTS) 3625 { 3626 for (int i = 0; i < nctx; i++) 3627 if (buf[i].tidx >= tidx) 3628 buf[i].tidx++; 3629 /* insert a new target */ 3630 for (int i = ntrg; i > tidx; i--) 3631 targets[i] = targets[i - 1]; 3632 ntrg += 1; 3633 targets[tidx++] = npc; 3634 } 3635 else 3636 DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n"); 3637 cur->pc = npc; 3638 cur->tidx = tidx; 3639 continue; /* advance this context first */ 3640 } 3641 else 3642 { 3643 /* Delete context */ 3644 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xe9.\n", __LINE__); 3645 DELETE_CURCTX (); 3646 } 3647 } 3648 break; 3649 case 0xeb: /* jump imm8 */ 3650 { 3651 imm8 = *(char*) cur->pc++; 3652 int tidx = 0; 3653 unsigned char *npc = cur->pc + imm8; 3654 while (npc > targets[tidx]) 3655 tidx += 1; 3656 if (npc != targets[tidx]) 3657 { 3658 if (ntrg < MAXTRGTS) 3659 { 3660 for (int i = 0; i < nctx; i++) 3661 if (buf[i].tidx >= tidx) 3662 buf[i].tidx++; 3663 /* insert a new target */ 3664 for (int i = ntrg; i > tidx; i--) 3665 targets[i] = targets[i - 1]; 3666 ntrg += 1; 3667 targets[tidx++] = npc; 3668 } 3669 else 3670 DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n"); 3671 cur->pc = npc; 3672 cur->tidx = tidx; 3673 continue; /* advance this context first */ 3674 } 3675 else 3676 { 3677 /* Delete context */ 3678 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xeb.\n", __LINE__); 3679 DELETE_CURCTX (); 3680 } 3681 } 3682 break; 3683 case 0xf0: /* lock prefix */ 3684 case 0xf2: /* repne prefix */ 3685 case 0xf3: /* repz prefix */ 3686 break; 3687 case 0xf4: /* hlt */ 3688 extop2 = *(cur->pc - 3); 3689 if (extop2 == 0x90) 3690 { 3691 // 17851712 occasional SEGV in find_i386_ret_addr in unwind.c during attach 3692 if (save_ctx) 3693 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK); 3694 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK\n", __LINE__); 3695 return RA_END_OF_STACK; 3696 } 3697 /* We see 'hlt' in _start. Stop analysis, revert to FP */ 3698 /* A workaround for the Linux main stack */ 3699 if (nctx > 1) 3700 { 3701 DELETE_CURCTX (); 3702 break; 3703 } 3704 if (cur->fp == 0) 3705 { 3706 if (jmp_reg_switch_mode == 1) 3707 { 3708 DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0xf4\n"); 3709 goto checkFP; 3710 } 3711 cache_put (wctx, RA_EOSTCK); 3712 wctx->pc = 0; 3713 wctx->sp = 0; 3714 wctx->fp = 0; 3715 if (save_ctx) 3716 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK); 3717 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK\n", __LINE__); 3718 return RA_END_OF_STACK; 3719 } 3720 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode = 0xf4\n", __LINE__); 3721 goto checkFP; 3722 case 0xf5: /* cmc */ 3723 break; 3724 case 0xf6: /* group3 Eb */ 3725 modrm = *cur->pc; 3726 extop = MRM_EXT (modrm); 3727 cur->pc = check_modrm (cur->pc); 3728 if (extop == 0x0) /* test Ib */ 3729 cur->pc += 1; 3730 break; 3731 case 0xf7: /* group3 Ev */ 3732 modrm = *cur->pc; 3733 extop = MRM_EXT (modrm); 3734 cur->pc = check_modrm (cur->pc); 3735 if (extop == 0x0) /* test Iz */ 3736 cur->pc += z; 3737 break; 3738 case 0xf8: /* clc */ 3739 case 0xf9: /* stc */ 3740 case 0xfa: /* cli */ 3741 case 0xfb: /* sti */ 3742 case 0xfc: /* cld */ 3743 case 0xfd: /* std */ 3744 break; 3745 case 0xfe: /* group4 */ 3746 modrm = *cur->pc; 3747 extop = MRM_EXT (modrm); 3748 switch (extop) 3749 { 3750 case 0x0: /* inc Eb */ 3751 case 0x1: /* dec Eb */ 3752 cur->pc = check_modrm (cur->pc); 3753 break; 3754 case 0x7: 3755 cur->pc = check_modrm (cur->pc); 3756 break; 3757 default: 3758 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0xfe %x\n", 3759 __LINE__, extop); 3760 DELETE_CURCTX (); 3761 break; 3762 } 3763 break; 3764 case 0xff: /* group5 */ 3765 modrm = *cur->pc; 3766 extop = MRM_EXT (modrm); 3767 switch (extop) 3768 { 3769 case 0x0: /* inc Ev */ 3770 case 0x1: /* dec Ev */ 3771 cur->pc = check_modrm (cur->pc); 3772 break; 3773 case 0x2: /* calln Ev */ 3774 if (jmp_reg_switch_mode == 1) 3775 { 3776 struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur)); 3777 __collector_memcpy (tmpctx, cur, sizeof (*cur)); 3778 int rc = process_return (wctx, tmpctx); 3779 if (rc != RA_FAILURE) 3780 { 3781 if (save_ctx) 3782 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); 3783 return rc; 3784 } 3785 } 3786 cur->pc = check_modrm (cur->pc); 3787 break; 3788 case 0x3: /* callf Ep */ 3789 if (jmp_reg_switch_mode == 1) 3790 { 3791 struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur)); 3792 __collector_memcpy (tmpctx, cur, sizeof (*cur)); 3793 int rc = process_return (wctx, tmpctx); 3794 if (rc != RA_FAILURE) 3795 { 3796 if (save_ctx) 3797 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); 3798 return rc; 3799 } 3800 } 3801 cur->pc = check_modrm (cur->pc); /* XXXX */ 3802 break; 3803 case 0x4: /* jumpn Ev */ 3804 /* This instruction appears in PLT or 3805 * in tail call optimization. 3806 * In both cases treat it as return. 3807 * Save jump *(reg) - switch, etc, for later use when no ctx left 3808 */ 3809 if (modrm == 0x25 || /* jumpn *disp32 */ 3810 MRM_MOD (modrm) == 0x40 || /* jumpn byte(reg) */ 3811 MRM_MOD (modrm) == 0x80) /* jumpn word(reg) */ 3812 { 3813 DprintfT (SP_DUMP_UNWIND, "unwind.c: PLT or tail call: %p\n", cur->pc - 1); 3814 int rc = process_return (wctx, cur); 3815 if (rc != RA_FAILURE) 3816 { 3817 if (jmp_reg_switch_mode == 1 && total_num_jmp_reg < max_num_jmp_reg_seen) 3818 { 3819 DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0xff\n"); 3820 goto checkFP; 3821 } 3822 if (save_ctx) 3823 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); 3824 return rc; 3825 } 3826 } 3827 else if (modrm != 0x24 /*ignore SIB*/) /* jumpn *(reg) or jumpn reg */ 3828 { 3829 // 22846120 stack unwind does not find caller of __memcpy_ssse3_back with B64 intel-Linux 3830 /* 3831 * For now, let's deal rather narrowly with this scenario. If: 3832 * - we are in the middle of an "ff e2" instruction, and 3833 * - the next instruction is undefined ( 0f 0b == ud2 ) 3834 * then test return. (Might eventually have to broaden the scope 3835 * of this fix to other registers/etc.) 3836 */ 3837 if (cur->pc[0] == 0xe2 && cur->pc[1] == 0x0f && cur->pc[2] == 0x0b) 3838 { 3839 int rc = process_return_real (wctx, cur, 0); 3840 if (rc == RA_SUCCESS) 3841 { 3842 if (save_ctx) 3843 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); 3844 return rc; 3845 } 3846 } 3847 3848 // 22691241 shjsynprog, jsynprog core dump from find_i386_ret_addr 3849 /* 3850 * Here is another oddity. Java 9 seems to emit dynamically generated 3851 * code where a code block ends with a "jmp *reg" and then padding to a 3852 * multiple-of-16 boundary and then a bunch of 0s. In this case, let's 3853 * not continue to walk bytes since we would be walking off the end of 3854 * the instructions into ... something. Treating them as instructions 3855 * can lead to unexpected results, including SEGV. 3856 */ 3857 /* 3858 * While the general problem deserves a better solution, let's look 3859 * here only for one particular case: 3860 * 0xff 0xe7 jmp *reg 3861 * nop to bring us to a multiple-of-16 boundary 3862 * 0x0000000000000a00 something that does not look like an instruction 3863 * 3864 * A different nop might be used depending on how much padding is needed 3865 * to reach that multiple-of-16 boundary. We've seen two: 3866 * 0x90 one byte 3867 * 0x0f 0x1f 0x40 0x00 four bytes 3868 */ 3869 // confirm the instruction is 0xff 0xe7 3870 if (cur->pc[0] == 0xe7) 3871 { 3872 // check for correct-length nop and find next 16-byte boundary 3873 int found_nop = 0; 3874 unsigned long long *boundary = 0; 3875 switch ((((unsigned long) (cur->pc)) & 0xf)) 3876 { 3877 case 0xb: // look for 4-byte nop 3878 if (*((unsigned *) (cur->pc + 1)) == 0x00401f0f) 3879 found_nop = 1; 3880 boundary = (unsigned long long *) (cur->pc + 5); 3881 break; 3882 case 0xe: // look for 1-byte nop 3883 if (cur->pc[1] == 0x90) 3884 found_nop = 1; 3885 boundary = (unsigned long long *) (cur->pc + 2); 3886 break; 3887 default: 3888 break; 3889 } 3890 3891 // if nop is found, check what's at the boundary 3892 if (found_nop && *boundary == 0x000000000a00) 3893 { 3894 DELETE_CURCTX (); 3895 break; 3896 } 3897 } 3898 3899 DprintfT (SP_DUMP_UNWIND, "unwind.c: probably PLT or tail call or switch table: %p\n", 3900 cur->pc - 1); 3901 if (num_jmp_reg < expected_num_jmp_reg) 3902 { 3903 if (jmp_reg_ctx[num_jmp_reg] == NULL) 3904 jmp_reg_ctx[num_jmp_reg] = (struct AdvWalkContext *) alloca (sizeof (*cur)); 3905 if (jmp_reg_ctx[num_jmp_reg] != NULL) 3906 __collector_memcpy (jmp_reg_ctx[num_jmp_reg], cur, sizeof (*cur)); 3907 } 3908 if (num_jmp_reg < expected_num_jmp_reg || 3909 (num_jmp_reg >= expected_num_jmp_reg && 3910 jmp_reg_ctx[expected_num_jmp_reg - 1] != NULL && 3911 cur->pc != jmp_reg_ctx[expected_num_jmp_reg - 1]->pc)) 3912 { 3913 num_jmp_reg++; 3914 total_num_jmp_reg++; 3915 } 3916 if (jmp_reg_switch_mode == 1 && total_num_jmp_reg >= max_num_jmp_reg_seen) 3917 { 3918 int rc = process_return_real (wctx, cur, 0); 3919 if (rc == RA_SUCCESS) 3920 { 3921 if (save_ctx) 3922 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); 3923 return rc; 3924 } 3925 } 3926 } 3927 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xff.\n", __LINE__); 3928 DELETE_CURCTX (); 3929 break; 3930 case 0x5: /* jmpf Ep */ 3931 cur->pc = check_modrm (cur->pc); /* XXXX */ 3932 break; 3933 case 0x6: /* push Ev */ 3934 cur->pc = check_modrm (cur->pc); 3935 cur->sp -= 1; 3936 break; 3937 case 0x7: 3938 cur->pc = check_modrm (cur->pc); /* XXXX */ 3939 if (jmp_reg_switch_mode == 1) 3940 { 3941 int rc = process_return_real (wctx, cur, 0); 3942 if (rc == RA_SUCCESS) 3943 { 3944 if (save_ctx) 3945 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); 3946 return rc; 3947 } 3948 } 3949 break; 3950 default: 3951 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0xff %x\n", 3952 __LINE__, (int) extop); 3953 DELETE_CURCTX (); 3954 break; 3955 } 3956 break; 3957 default: 3958 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x%x\n", 3959 __LINE__, (int) opcode); 3960 DELETE_CURCTX (); 3961 break; 3962 } 3963 3964 /* switch to next context */ 3965 if (++cur >= buf + nctx) 3966 cur = buf; 3967 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d switch context: cur=0x%lx(%ld) nctx=%d cnt=%d\n", 3968 __LINE__, (unsigned long) cur, (long) (cur - buf), (int) nctx, (int) cnt); 3969 } 3970 3971checkFP: 3972 Tprintf (DBG_LT3, "find_i386_ret_addr:%d checkFP: wctx=0x%lx fp=0x%lx ln=0x%lx pc=0x%lx sbase=0x%lx sp=0x%lx tbgn=0x%lx tend=0x%lx\n", 3973 __LINE__, (unsigned long) wctx, (unsigned long) wctx->fp, 3974 (unsigned long) wctx->ln, (unsigned long) wctx->pc, (unsigned long) wctx->sbase, 3975 (unsigned long) wctx->sp, (unsigned long) wctx->tbgn, (unsigned long) wctx->tend); 3976 3977 if (jmp_reg_switch_mode == 1) 3978 { // not deal with switch cases not ending with ret 3979 if (jmp_reg_switch_backup_ctx != NULL) 3980 __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur)); 3981 DprintfT (SP_DUMP_UNWIND, "stack_unwind jmp reg mode on: pc = 0x%lx cnt = %d, nctx = %d\n", wctx->pc, cnt, nctx); 3982 } 3983 3984 unsigned long *cur_fp = cur->fp; 3985 unsigned long *cur_sp = cur->sp; 3986 if (do_walk == 0) 3987 __collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext)); 3988 3989 /* Resort to the frame pointer */ 3990 if (cur->fp_loc) 3991 cur->fp = cur->fp_sav; 3992 cur->sp = cur->fp; 3993 if ((unsigned long) cur->sp >= wctx->sbase || 3994 (unsigned long) cur->sp < wctx->sp) 3995 { 3996 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d do_walk=%d cur->sp=0x%p out of range. wctx->sbase=0x%lx wctx->sp=0x%lx wctx->pc=0x%lx\n", 3997 __LINE__, (int) do_walk, cur->sp, (unsigned long) wctx->sbase, 3998 (unsigned long) wctx->sp, (unsigned long) wctx->pc); 3999 if (do_walk == 0) 4000 { 4001 cur->sp = cur_sp; 4002 cur->fp = cur_fp; 4003 do_walk = 1; 4004 save_ctx = 1; 4005 goto startWalk; 4006 } 4007 if (save_ctx) 4008 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE); 4009 return RA_FAILURE; 4010 } 4011 4012 unsigned long fp = *cur->sp++; 4013 if (fp <= (unsigned long) cur->sp || fp >= wctx->sbase) 4014 { 4015 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d fp=0x%016llx out of range. cur->sp=%p wctx->sbase=0x%lx wctx->pc=0x%lx\n", 4016 __LINE__, (unsigned long long) fp, cur->sp, 4017 (unsigned long) wctx->sbase, (unsigned long) wctx->pc); 4018 if (do_walk == 0) 4019 { 4020 cur->sp = cur_sp; 4021 cur->fp = cur_fp; 4022 do_walk = 1; 4023 save_ctx = 1; 4024 goto startWalk; 4025 } 4026 if (save_ctx) 4027 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE); 4028 return RA_FAILURE; 4029 } 4030 4031 unsigned long ra = *cur->sp++; 4032 if (ra == 0) 4033 { 4034 cache_put (wctx, RA_EOSTCK); 4035 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK wctx->pc = 0x%lx\n", __LINE__, wctx->pc); 4036 if (save_ctx) 4037 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK); 4038 return RA_END_OF_STACK; 4039 } 4040 4041 unsigned long tbgn = wctx->tbgn; 4042 unsigned long tend = wctx->tend; 4043 if (ra < tbgn || ra >= tend) 4044 { 4045 // We do not know yet if update_map_segments is really needed 4046 if (!__collector_check_segment (ra, &tbgn, &tend, 0)) 4047 { 4048 DprintfT (SP_DUMP_UNWIND, "unwind.c: __collector_check_segment fail. wctx->pc = 0x%lx\n", wctx->pc); 4049 if (do_walk == 0) 4050 { 4051 cur->sp = cur_sp; 4052 cur->fp = cur_fp; 4053 do_walk = 1; 4054 save_ctx = 1; 4055 goto startWalk; 4056 } 4057 if (save_ctx) 4058 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE); 4059 return RA_FAILURE; 4060 } 4061 } 4062 4063 unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend); 4064 if (npc == 0) 4065 { 4066 DprintfT (SP_DUMP_UNWIND, "unwind.c: adjust_ret_addr fail. wctx->pc = 0x%lx\n", wctx->pc); 4067 if (do_walk == 0) 4068 { 4069 cur->sp = cur_sp; 4070 cur->fp = cur_fp; 4071 do_walk = 1; 4072 save_ctx = 1; 4073 goto startWalk; 4074 } 4075 if (save_ctx) 4076 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE); 4077 return RA_FAILURE; 4078 } 4079 wctx->pc = npc; 4080 wctx->sp = (unsigned long) cur->sp; 4081 wctx->fp = fp; 4082 wctx->tbgn = tbgn; 4083 wctx->tend = tend; 4084 4085 if (save_ctx) 4086 { 4087 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SUCCESS); 4088 DprintfT (SP_DUMP_UNWIND, "unwind.c: cache walk context. wctx_pc_save->pc = 0x%lx\n", wctx_pc_save.pc); 4089 } 4090 return RA_SUCCESS; 4091} 4092 4093/* 4094 * We have the return address, but we would like to report to the user 4095 * the calling PC, which is the instruction immediately preceding the 4096 * return address. Unfortunately, x86 instructions can have variable 4097 * length. So we back up 8 bytes and try to figure out where the 4098 * calling PC starts. (FWIW, call instructions are often 5-bytes long.) 4099 */ 4100unsigned long 4101adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend) 4102{ 4103 unsigned long npc = 0; 4104 int i = segoff < 8 ? segoff : 8; 4105 for (; i > 1; i--) 4106 { 4107 unsigned char *ptr = (unsigned char*) ra - i; 4108 int z = 4; 4109 int a = 4; 4110 int done = 0; 4111 int bVal; 4112 while (!done) 4113 { 4114 bVal = getByteInstruction (ptr); 4115 if (bVal < 0) 4116 return 0; 4117 switch (bVal) 4118 { 4119 case 0x26: 4120 case 0x36: 4121#if WSIZE(64) 4122 ptr += 1; 4123 break; 4124#endif 4125 case 0x64: 4126 case 0x65: 4127 bVal = getByteInstruction (ptr + 1); 4128 if (bVal < 0) 4129 return 0; 4130 if (bVal == 0xe8) 4131 // a workaround for bug 16193041, assuming "call Jz" has no segment override prefix 4132 done = 1; 4133 else 4134 ptr += 1; 4135 break; 4136 case 0x66: 4137 z = 2; 4138 ptr += 1; 4139 break; 4140 case 0x67: 4141 a = 2; 4142 ptr += 1; 4143 break; 4144 default: 4145 done = 1; 4146 break; 4147 } 4148 } 4149#if WSIZE(64) 4150 bVal = getByteInstruction (ptr); 4151 if (bVal < 0) 4152 return 0; 4153 if (bVal >= 0x40 && bVal <= 0x4f) 4154 { /* XXXX not all REX codes applicable */ 4155 if (bVal & 0x8) 4156 z = 4; 4157 ptr += 1; 4158 } 4159#endif 4160 int opcode = getByteInstruction (ptr); 4161 if (opcode < 0) 4162 return 0; 4163 ptr++; 4164 switch (opcode) 4165 { 4166 case 0xe8: /* call Jz (f64) */ 4167 ptr += z; 4168 break; 4169 case 0x9a: /* callf Ap */ 4170 ptr += 2 + a; 4171 break; 4172 case 0xff: /* calln Ev , callf Ep */ 4173 { 4174 int extop = MRM_EXT (*ptr); 4175 if (extop == 2 || extop == 3) 4176 ptr = check_modrm (ptr); 4177 } 4178 break; 4179 default: 4180 continue; 4181 } 4182 if ((unsigned long) ptr == ra) 4183 { 4184 npc = ra - i; 4185 break; 4186 } 4187 } 4188 if (npc == 0) 4189 { 4190 unsigned char * ptr = (unsigned char *) ra; 4191#if WSIZE(32) 4192 // test __kernel_sigreturn or __kernel_rt_sigreturn 4193 if ((ra + 7 < tend && getByteInstruction (ptr) == 0x58 4194 && getByteInstruction (ptr + 1) == 0xb8 4195 && getByteInstruction (ptr + 6) == 0xcd 4196 && getByteInstruction (ptr + 7) == 0x80) /* pop %eax; mov $NNNN, %eax; int */ 4197 || (ra + 7 < tend && getByteInstruction (ptr) == 0x58 4198 && getByteInstruction (ptr + 1) == 0xb8 4199 && getByteInstruction (ptr + 6) == 0x0f 4200 && getByteInstruction (ptr + 7) == 0x05) /* pop %eax; mov $NNNN, %eax; syscall */ 4201 || (ra + 6 < tend && getByteInstruction (ptr) == 0xb8 4202 && getByteInstruction (ptr + 5) == 0xcd 4203 && getByteInstruction (ptr + 6) == 0x80) /* mov $NNNN, %eax; int */ 4204 || (ra + 6 < tend && getByteInstruction (ptr) == 0xb8 4205 && getByteInstruction (ptr + 5) == 0x0f 4206 && getByteInstruction (ptr + 6) == 0x05)) /* mov $NNNN, %eax; syscall */ 4207#else //WSIZE(64) 4208 // test __restore_rt 4209 if (ra + 8 < tend && getByteInstruction (ptr) == 0x48 4210 && getByteInstruction (ptr + 7) == 0x0f 4211 && getByteInstruction (ptr + 8) == 0x05) /* mov $NNNNNNNN, %rax; syscall */ 4212#endif 4213 { 4214 npc = ra; 4215 } 4216 } 4217 if (npc == 0 && __collector_java_mode 4218 && __collector_java_asyncgetcalltrace_loaded) 4219 { // detect jvm interpreter code for java user threads 4220 unsigned char * ptr = (unsigned char *) ra; 4221#if WSIZE(32) 4222 // up to J170 4223 /* 4224 * ff 24 9d e0 64 02 f5 jmp *-0xafd9b20(,%ebx,4) 4225 * 8b 4e 01 movl 1(%esi),%ecx 4226 * f7 d1 notl %ecx 4227 * 8b 5d ec movl -0x14(%ebp),%ebx 4228 * c1 e1 02 shll $2,%ecx 4229 * eb d8 jmp .-0x26 [ 0x92a ] 4230 * 83 ec 08 subl $8,%esp || 8b 65 f8 movl -8(%ebp),%esp 4231 * */ 4232 if (ra - 20 >= (ra - segoff) && ((*ptr == 0x83 && *(ptr + 1) == 0xec) || (*ptr == 0x8b && *(ptr + 1) == 0x65)) 4233 && *(ptr - 2) == 0xeb 4234 && *(ptr - 5) == 0xc1 && *(ptr - 4) == 0xe1 4235 && *(ptr - 8) == 0x8b && *(ptr - 7) == 0x5d 4236 && *(ptr - 10) == 0xf7 && *(ptr - 9) == 0xd1 4237 && *(ptr - 13) == 0x8b && *(ptr - 12) == 0x4e 4238 && *(ptr - 20) == 0xff && *(ptr - 19) == 0x24 && *(ptr - 18) == 0x9d) 4239 { 4240 npc = ra - 20; 4241 } 4242 // J180 J190 4243 // ff 24 9d ** ** ** ** jmp *-0x*******(,%ebx,4) 4244 if (npc == 0 4245 && ra - 7 >= (ra - segoff) 4246 && *(ptr - 7) == 0xff 4247 && *(ptr - 6) == 0x24 4248 && *(ptr - 5) == 0x9d) 4249 { 4250 npc = ra - 7; 4251 } 4252#else //WSIZE(64) 4253 // up to J170 4254 /* 4255 * 41 ff 24 da jmp *(%r10,%rbx,8) 4256 * 41 8b 4d 01 movl 1(%r13),%ecx 4257 * f7 d1 notl %ecx 4258 * 48 8b 5d d8 movq -0x28(%rbp),%rbx 4259 * c1 e1 02 shll $2,%ecx 4260 * eb cc jmp .-0x32 [ 0xd23 ] 4261 * 48 8b 65 f0 movq -0x10(%rbp),%rsp 4262 */ 4263 if (ra - 19 >= (ra - segoff) && *ptr == 0x48 && ((*(ptr + 1) == 0x8b && *(ptr + 2) == 0x65) || (*(ptr + 1) == 0x83 && *(ptr + 2) == 0xec)) 4264 && *(ptr - 2) == 0xeb 4265 && *(ptr - 5) == 0xc1 && *(ptr - 4) == 0xe1 4266 && *(ptr - 9) == 0x48 && *(ptr - 8) == 0x8b && *(ptr - 7) == 0x5d 4267 && *(ptr - 11) == 0xf7 && *(ptr - 10) == 0xd1 4268 && *(ptr - 15) == 0x41 && *(ptr - 14) == 0x8b && *(ptr - 13) == 0x4d 4269 && *(ptr - 19) == 0x41 && *(ptr - 18) == 0xff) 4270 npc = ra - 19; 4271 // J180 J190 4272 // 41 ff 24 da jmp *(%r10,%rbx,8) 4273 if (npc == 0 4274 && ra - 4 >= (ra - segoff) 4275 && *(ptr - 4) == 0x41 4276 && *(ptr - 3) == 0xff 4277 && *(ptr - 2) == 0x24 4278 && *(ptr - 1) == 0xda) 4279 npc = ra - 4; 4280#endif 4281 } 4282 4283 return npc; 4284} 4285 4286/* 4287 * Parses AVX instruction and returns its length. 4288 * Returns 0 if parsing failed. 4289 * https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf 4290 */ 4291static int 4292parse_x86_AVX_instruction (unsigned char *pc) 4293{ 4294 /* 4295 * VEX prefix has a two-byte form (0xc5) and a three byte form (0xc4). 4296 * If an instruction syntax can be encoded using the two-byte form, 4297 * it can also be encoded using the three byte form of VEX. 4298 * The latter increases the length of the instruction by one byte. 4299 * This may be helpful in some situations for code alignment. 4300 * 4301 Byte 0 Byte 1 Byte 2 Byte 3 4302 (Bit Position) 7 0 7 6 5 4 0 7 6 3 2 10 4303 3-byte VEX [ 11000100 ] [ R X B | m-mmmm ] [ W | vvvv | L | pp ] 4304 7 0 7 6 3 2 10 4305 2-byte VEX [ 11000101 ] [ R | vvvv | L | pp ] 4306 7 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 4307 4-byte EVEX [ 01100010 ] [ R X B R1 0 0 m m ] [ W v v v v 1 p p ] [ z L1 L B1 V1 a a a ] 4308 4309 R: REX.R in 1's complement (inverted) form 4310 0: Same as REX.R=1 (64-bit mode only) 4311 1: Same as REX.R=0 (must be 1 in 32-bit mode) 4312 4313 X: REX.X in 1's complement (inverted) form 4314 0: Same as REX.X=1 (64-bit mode only) 4315 1: Same as REX.X=0 (must be 1 in 32-bit mode) 4316 4317 B: REX.B in 1's complement (inverted) form 4318 0: Same as REX.B=1 (64-bit mode only) 4319 1: Same as REX.B=0 (Ignored in 32-bit mode). 4320 4321 W: opcode specific (use like REX.W, or used for opcode 4322 extension, or ignored, depending on the opcode byte) 4323 4324 m-mmmm: 4325 00000: Reserved for future use (will #UD) 4326 00001: implied 0F leading opcode byte 4327 00010: implied 0F 38 leading opcode bytes 4328 00011: implied 0F 3A leading opcode bytes 4329 00100-11111: Reserved for future use (will #UD) 4330 4331 vvvv: a register specifier (in 1's complement form) or 1111 if unused. 4332 4333 L: Vector Length 4334 0: scalar or 128-bit vector 4335 1: 256-bit vector 4336 4337 pp: opcode extension providing equivalent functionality of a SIMD prefix 4338 00: None 4339 01: 66 4340 10: F3 4341 11: F2 4342 * 4343 * Example: 0xc5f877L vzeroupper 4344 * VEX prefix: 0xc5 0x77 4345 * Opcode: 0xf8 4346 * 4347 */ 4348 int len = 0; 4349 disassemble_info dis_info; 4350 dis_info.arch = bfd_arch_i386; 4351 dis_info.mach = bfd_mach_x86_64; 4352 dis_info.flavour = bfd_target_unknown_flavour; 4353 dis_info.endian = BFD_ENDIAN_UNKNOWN; 4354 dis_info.endian_code = dis_info.endian; 4355 dis_info.octets_per_byte = 1; 4356 dis_info.disassembler_needs_relocs = FALSE; 4357 dis_info.fprintf_func = fprintf_func; 4358 dis_info.fprintf_styled_func = fprintf_styled_func; 4359 dis_info.stream = NULL; 4360 dis_info.disassembler_options = NULL; 4361 dis_info.read_memory_func = read_memory_func; 4362 dis_info.memory_error_func = memory_error_func; 4363 dis_info.print_address_func = print_address_func; 4364 dis_info.symbol_at_address_func = symbol_at_address_func; 4365 dis_info.symbol_is_valid = symbol_is_valid; 4366 dis_info.display_endian = BFD_ENDIAN_UNKNOWN; 4367 dis_info.symtab = NULL; 4368 dis_info.symtab_size = 0; 4369 dis_info.buffer_vma = 0; 4370 dis_info.buffer = pc; 4371 dis_info.buffer_length = 8; 4372 4373 disassembler_ftype disassemble = print_insn_i386; 4374 if (disassemble == NULL) 4375 { 4376 DprintfT (SP_DUMP_UNWIND, "parse_x86_AVX_instruction ERROR: unsupported disassemble\n"); 4377 return 0; 4378 } 4379 len = disassemble (0, &dis_info); 4380 DprintfT (SP_DUMP_UNWIND, "parse_x86_AVX_instruction: returned %d pc: %p\n", len, pc); 4381 return len; 4382} 4383 4384/* 4385 * In the Intel world, a stack frame looks like this: 4386 * 4387 * %fp0->| | 4388 * |-------------------------------| 4389 * | Args to next subroutine | 4390 * |-------------------------------|-\ 4391 * %sp0->| One word struct-ret address | | 4392 * |-------------------------------| > minimum stack frame (8 bytes) 4393 * | Previous frame pointer (%fp0)| | 4394 * %fp1->|-------------------------------|-/ 4395 * | Local variables | 4396 * %sp1->|-------------------------------| 4397 * 4398 */ 4399 4400int 4401stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode) 4402{ 4403 long *lbuf = (long*) buf; 4404 int lsize = size / sizeof (long); 4405 int ind = 0; 4406 int do_walk = 1; 4407 int extra_frame = 0; 4408 if (mode & FRINFO_NO_WALK) 4409 do_walk = 0; 4410 if ((mode & 0xffff) == FRINFO_FROM_STACK) 4411 extra_frame = 1; 4412 4413 /* 4414 * trace the stack frames from user stack. 4415 * We are assuming that the frame pointer and return address 4416 * are null when we are at the top level. 4417 */ 4418 struct WalkContext wctx; 4419 wctx.pc = GET_PC (context); 4420 wctx.sp = GET_SP (context); 4421 wctx.fp = GET_FP (context); 4422 wctx.ln = (unsigned long) context->uc_link; 4423 unsigned long *sbase = (unsigned long*) __collector_tsd_get_by_key (unwind_key); 4424 if (sbase && *sbase > wctx.sp) 4425 wctx.sbase = *sbase; 4426 else 4427 { 4428 wctx.sbase = wctx.sp + 0x100000; 4429 if (wctx.sbase < wctx.sp) /* overflow */ 4430 wctx.sbase = (unsigned long) - 1; 4431 } 4432 // We do not know yet if update_map_segments is really needed 4433 __collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0); 4434 4435 for (;;) 4436 { 4437 if (ind >= lsize || wctx.pc == 0) 4438 break; 4439 if (bptr != NULL && extra_frame && wctx.sp <= (unsigned long) bptr && ind < 2) 4440 { 4441 lbuf[0] = wctx.pc; 4442 if (ind == 0) 4443 { 4444 ind++; 4445 if (ind >= lsize) 4446 break; 4447 } 4448 } 4449 if (bptr == NULL || wctx.sp > (unsigned long) bptr) 4450 { 4451 lbuf[ind++] = wctx.pc; 4452 if (ind >= lsize) 4453 break; 4454 } 4455 4456 for (;;) 4457 { 4458 if (eptr != NULL && wctx.sp >= (unsigned long) eptr) 4459 { 4460 ind = ind >= 2 ? ind - 2 : 0; 4461 goto exit; 4462 } 4463 int ret = find_i386_ret_addr (&wctx, do_walk); 4464 DprintfT (SP_DUMP_UNWIND, "stack_unwind (x86 walk):%d find_i386_ret_addr returns %d\n", __LINE__, ret); 4465 if (ret == RA_FAILURE) 4466 { 4467 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */ 4468 goto exit; 4469 } 4470 4471 if (ret == RA_END_OF_STACK) 4472 goto exit; 4473#if WSIZE(32) 4474 if (ret == RA_RT_SIGRETURN) 4475 { 4476 struct SigFrame 4477 { 4478 unsigned long arg0; 4479 unsigned long arg1; 4480 unsigned long arg2; 4481 } *sframe = (struct SigFrame*) wctx.sp; 4482 ucontext_t *ncontext = (ucontext_t*) sframe->arg2; 4483 wctx.pc = GET_PC (ncontext); 4484 if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0)) 4485 { 4486 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */ 4487 goto exit; 4488 } 4489 unsigned long nsp = GET_SP (ncontext); 4490 /* Check the new stack pointer */ 4491 if (nsp <= sframe->arg2 || nsp > sframe->arg2 + sizeof (ucontext_t) + 1024) 4492 { 4493 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */ 4494 goto exit; 4495 } 4496 wctx.sp = nsp; 4497 wctx.fp = GET_FP (ncontext); 4498 break; 4499 } 4500 else if (ret == RA_SIGRETURN) 4501 { 4502 struct sigcontext *sctx = (struct sigcontext*) wctx.sp; 4503 wctx.pc = sctx->eip; 4504 if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0)) 4505 { 4506 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */ 4507 goto exit; 4508 } 4509 wctx.sp = sctx->esp; 4510 wctx.fp = sctx->ebp; 4511 break; 4512 } 4513#elif WSIZE(64) 4514 if (ret == RA_RT_SIGRETURN) 4515 { 4516 ucontext_t *ncontext = (ucontext_t*) wctx.sp; 4517 wctx.pc = GET_PC (ncontext); 4518 if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0)) 4519 { 4520 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */ 4521 goto exit; 4522 } 4523 unsigned long nsp = GET_SP (ncontext); 4524 /* Check the new stack pointer */ 4525 if (nsp <= wctx.sp || nsp > wctx.sp + sizeof (ucontext_t) + 1024) 4526 { 4527 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */ 4528 goto exit; 4529 } 4530 wctx.sp = nsp; 4531 wctx.fp = GET_FP (ncontext); 4532 break; 4533 } 4534#endif /* WSIZE() */ 4535 if (bptr != NULL && extra_frame && wctx.sp <= (unsigned long) bptr && ind < 2) 4536 { 4537 lbuf[0] = wctx.pc; 4538 if (ind == 0) 4539 { 4540 ind++; 4541 if (ind >= lsize) 4542 break; 4543 } 4544 } 4545 if (bptr == NULL || wctx.sp > (unsigned long) bptr) 4546 { 4547 lbuf[ind++] = wctx.pc; 4548 if (ind >= lsize) 4549 goto exit; 4550 } 4551 } 4552 } 4553 4554exit: 4555#if defined(DEBUG) 4556 if ((SP_DUMP_UNWIND & __collector_tracelevel) != 0) 4557 { 4558 DprintfT (SP_DUMP_UNWIND, "stack_unwind (x86 walk):%d found %d frames\n\n", __LINE__, ind); 4559 for (int i = 0; i < ind; i++) 4560 DprintfT (SP_DUMP_UNWIND, " %3d: 0x%lx\n", i, (unsigned long) lbuf[i]); 4561 } 4562#endif 4563 dump_stack (__LINE__); 4564 if (ind >= lsize) 4565 { 4566 ind = lsize - 1; 4567 lbuf[ind++] = (unsigned long) SP_TRUNC_STACK_MARKER; 4568 } 4569 return ind * sizeof (long); 4570} 4571 4572#elif ARCH(Aarch64) 4573 4574static int 4575stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode) 4576{ 4577 if (buf && bptr && eptr && context && size + mode > 0) 4578 getByteInstruction ((unsigned char *) eptr); 4579 int ind = 0; 4580 __u64 *lbuf = (void *) buf; 4581 int lsize = size / sizeof (__u64); 4582 __u64 pc = context->uc_mcontext.pc; 4583 __u64 sp = context->uc_mcontext.sp; 4584 __u64 stack_base; 4585 unsigned long tbgn = 0; 4586 unsigned long tend = 0; 4587 4588 unsigned long *sbase = (unsigned long*) __collector_tsd_get_by_key (unwind_key); 4589 if (sbase && *sbase > sp) 4590 stack_base = *sbase; 4591 else 4592 { 4593 stack_base = sp + 0x100000; 4594 if (stack_base < sp) // overflow 4595 stack_base = (__u64) -1; 4596 } 4597 DprintfT (SP_DUMP_UNWIND, 4598 "unwind.c:%d stack_unwind %2d pc=0x%llx sp=0x%llx stack_base=0x%llx\n", 4599 __LINE__, ind, (unsigned long long) pc, (unsigned long long) sp, 4600 (unsigned long long) stack_base); 4601 4602 while (sp && pc) 4603 { 4604 DprintfT (SP_DUMP_UNWIND, 4605 "unwind.c:%d stack_unwind %2d pc=0x%llx sp=0x%llx\n", 4606 __LINE__, ind, (unsigned long long) pc, (unsigned long long) sp); 4607// Dl_info dlinfo; 4608// if (!dladdr ((void *) pc, &dlinfo)) 4609// break; 4610// DprintfT (SP_DUMP_UNWIND, "%2d: %llx <%s+%llu> (%s)\n", 4611// ind, (unsigned long long) pc, 4612// dlinfo.dli_sname ? dlinfo.dli_sname : "(?)", 4613// (unsigned long long) pc - (unsigned long long) dlinfo.dli_saddr, 4614// dlinfo.dli_fname); 4615 lbuf[ind++] = pc; 4616 if (ind >= lsize || sp >= stack_base || (sp & 15) != 0) 4617 break; 4618 if (pc < tbgn || pc >= tend) 4619 if (!__collector_check_segment ((unsigned long) pc, &tbgn, &tend, 0)) 4620 { 4621 DprintfT (SP_DUMP_UNWIND, 4622 "unwind.c:%d __collector_check_segment failed. sp=0x%lx\n", 4623 __LINE__, (unsigned long) sp); 4624 break; 4625 } 4626 pc = ((__u64 *) sp)[1]; 4627 __u64 old_sp = sp; 4628 sp = ((__u64 *) sp)[0]; 4629 if (sp < old_sp) 4630 break; 4631 } 4632 if (ind >= lsize) 4633 { 4634 ind = lsize - 1; 4635 lbuf[ind++] = (__u64) SP_TRUNC_STACK_MARKER; 4636 } 4637 return ind * sizeof (__u64); 4638} 4639#endif /* ARCH() */ 4640