1/* Copyright (C) 2021 Free Software Foundation, Inc.
2   Contributed by Oracle.
3
4   This file is part of GNU Binutils.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3, or (at your option)
9   any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, 51 Franklin Street - Fifth Floor, Boston,
19   MA 02110-1301, USA.  */
20
21#include "config.h"
22#include <alloca.h>
23#include <dlfcn.h>
24#include <stdlib.h>
25#include <signal.h>
26#include <unistd.h>
27#include <pthread.h>
28
29#include "gp-defs.h"
30#include "collector.h"
31#include "gp-experiment.h"
32#include "memmgr.h"
33#include "tsd.h"
34
35/* Get dynamic module interface*/
36#include "collector_module.h"
37
38/* Get definitions for SP_LEAF_CHECK_MARKER, SP_TRUNC_STACK_MARKER */
39#include "data_pckts.h"
40
41#if ARCH(SPARC)
42struct frame
43{
44  long fr_local[8];         /* saved locals */
45  long fr_arg[6];           /* saved arguments [0 - 5] */
46  struct frame *fr_savfp;   /* saved frame pointer */
47  long fr_savpc;            /* saved program counter */
48#if WSIZE(32)
49  char *fr_stret;           /* struct return addr */
50#endif
51  long fr_argd[6];          /* arg dump area */
52  long fr_argx[1];          /* array of args past the sixth */
53};
54
55#elif ARCH(Intel)
56struct frame
57{
58  unsigned long fr_savfp;
59  unsigned long fr_savpc;
60};
61#endif
62
63/* Set the debug trace level */
64#define DBG_LT0 0
65#define DBG_LT1	1
66#define DBG_LT2	2
67#define DBG_LT3	3
68
69int (*__collector_VM_ReadByteInstruction)(unsigned char *) = NULL;
70#define VM_NO_ACCESS        (-1)
71#define VM_NOT_VM_MEMORY    (-2)
72#define VM_NOT_X_SEGMENT    (-3)
73
74#define isInside(p, bgn, end) ((p) >= (bgn) && (p) < (end))
75
76/*
77 * Weed through all the arch dependent stuff to get the right definition
78 * for 'pc' in the ucontext structure.  The system header files are mess
79 * dealing with all the arch (just look for PC, R_PC, REG_PC).
80 *
81 */
82
83#if ARCH(SPARC)
84
85#define IN_BARRIER(x) \
86	( barrier_hdl && \
87	  (unsigned long)x >= barrier_hdl && \
88	  (unsigned long)x < barrier_hdlx )
89static unsigned long barrier_hdl = 0;
90static unsigned long barrier_hdlx = 0;
91
92#if WSIZE(64)
93#define STACK_BIAS 2047
94#define IN_TRAP_HANDLER(x) \
95	( misalign_hdl && \
96	  (unsigned long)x >= misalign_hdl && \
97	  (unsigned long)x < misalign_hdlx )
98static unsigned long misalign_hdl = 0;
99static unsigned long misalign_hdlx = 0;
100#elif  WSIZE(32)
101#define STACK_BIAS 0
102#endif
103
104#if WSIZE(64)
105#define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[(reg)])
106#define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_O6])
107#define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_PC])
108#else
109#define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.gregs[(reg)])
110#define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_O6])
111#define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_PC])
112#endif
113
114#elif ARCH(Intel)
115#include "opcodes/disassemble.h"
116
117static int
118fprintf_func (void *arg ATTRIBUTE_UNUSED, const char *fmt ATTRIBUTE_UNUSED, ...)
119{
120  return 0;
121}
122
123static int
124fprintf_styled_func (void *arg ATTRIBUTE_UNUSED,
125		      enum disassembler_style st ATTRIBUTE_UNUSED,
126		      const char *fmt ATTRIBUTE_UNUSED, ...)
127{
128  return 0;
129}
130
131/* Get LENGTH bytes from info's buffer, at target address memaddr.
132   Transfer them to myaddr.  */
133static int
134read_memory_func (bfd_vma memaddr, bfd_byte *myaddr, unsigned int length,
135		  disassemble_info *info)
136{
137  unsigned int opb = info->octets_per_byte;
138  size_t end_addr_offset = length / opb;
139  size_t max_addr_offset = info->buffer_length / opb;
140  size_t octets = (memaddr - info->buffer_vma) * opb;
141  if (memaddr < info->buffer_vma
142      || memaddr - info->buffer_vma > max_addr_offset
143      || memaddr - info->buffer_vma + end_addr_offset > max_addr_offset
144      || (info->stop_vma && (memaddr >= info->stop_vma
145			     || memaddr + end_addr_offset > info->stop_vma)))
146    return -1;
147  memcpy (myaddr, info->buffer + octets, length);
148  return 0;
149}
150
151static void
152print_address_func (bfd_vma addr ATTRIBUTE_UNUSED,
153		    disassemble_info *info ATTRIBUTE_UNUSED) { }
154
155static asymbol *
156symbol_at_address_func (bfd_vma addr ATTRIBUTE_UNUSED,
157			disassemble_info *info ATTRIBUTE_UNUSED)
158{
159  return NULL;
160}
161
162static bfd_boolean
163symbol_is_valid (asymbol *sym ATTRIBUTE_UNUSED,
164		 disassemble_info *info ATTRIBUTE_UNUSED)
165{
166  return TRUE;
167}
168
169static void
170memory_error_func (int status ATTRIBUTE_UNUSED, bfd_vma addr ATTRIBUTE_UNUSED,
171		   disassemble_info *info ATTRIBUTE_UNUSED) { }
172
173
174#if WSIZE(32)
175#define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EIP])
176#define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_ESP])
177#define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EBP])
178
179#elif WSIZE(64)
180#define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP])
181#define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RSP])
182#define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RBP])
183#endif /* WSIZE() */
184
185#elif ARCH(Aarch64)
186#define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[15])
187#define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[13])
188#define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[14])
189#endif /* ARCH() */
190
191/*
192 * FILL_CONTEXT() for all platforms
193 * Could use getcontext() except:
194 * - it's not guaranteed to be async signal safe
195 * - it's a system call and not that lightweight
196 * - it's not portable as of POSIX.1-2008
197 * So we just use low-level mechanisms to fill in the few fields we need.
198 */
199#if ARCH(SPARC)
200#if WSIZE(32)
201#define FILL_CONTEXT(context) \
202	{ \
203	greg_t fp; \
204	__asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \
205	__asm__ __volatile__( "ta 3" ); \
206	GET_SP(context) = fp; \
207	GET_PC(context) = (greg_t)0; \
208	}
209
210#elif WSIZE(64)
211#define FILL_CONTEXT(context) \
212	{ \
213	    greg_t fp; \
214	    __asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \
215	    __asm__ __volatile__( "flushw" ); \
216	    GET_SP(context) = fp; \
217	    GET_PC(context) = (greg_t)0; \
218	}
219#endif /* WSIZE() */
220
221#elif ARCH(Intel)
222#define FILL_CONTEXT(context) \
223	{ \
224	    context->uc_link = NULL; \
225	    void *sp = __collector_getsp(); \
226	    GET_SP(context) = (intptr_t)sp; \
227	    GET_FP(context) = (intptr_t)__collector_getfp(); \
228	    GET_PC(context) = (intptr_t)__collector_getpc(); \
229	    context->uc_stack.ss_sp = sp; \
230	    context->uc_stack.ss_size = 0x100000; \
231	}
232
233#elif ARCH(Aarch64)
234#define FILL_CONTEXT(context) \
235    { getcontext(context);  \
236      context->uc_mcontext.sp = (__u64) __builtin_frame_address(0); \
237    }
238
239#endif /* ARCH() */
240
241static int
242getByteInstruction (unsigned char *p)
243{
244  if (__collector_VM_ReadByteInstruction)
245    {
246      int v = __collector_VM_ReadByteInstruction (p);
247      if (v != VM_NOT_VM_MEMORY)
248	return v;
249    }
250  return *p;
251}
252
253struct DataHandle *dhndl = NULL;
254
255static unsigned unwind_key = COLLECTOR_TSD_INVALID_KEY;
256
257/* To support two OpenMP API's we use a pointer
258 * to the actual function.
259 */
260int (*__collector_omp_stack_trace)(char*, int, hrtime_t, void*) = NULL;
261int (*__collector_mpi_stack_trace)(char*, int, hrtime_t) = NULL;
262
263#define DEFAULT_MAX_NFRAMES 256
264static int max_native_nframes = DEFAULT_MAX_NFRAMES;
265static int max_java_nframes = DEFAULT_MAX_NFRAMES;
266
267#define NATIVE_FRAME_BYTES(nframes) ( ((nframes)+1) * sizeof(long)          )
268#define JAVA_FRAME_BYTES(nframes)   ( ((nframes)+1) * sizeof(long) * 2 + 16 )
269#define OVERHEAD_BYTES ( 2 * sizeof(long) + 2 * sizeof(Stack_info) )
270
271#define ROOT_UID	801425552975190205ULL
272#define ROOT_UID_INV	92251691606677ULL
273#define ROOT_IDX	13907816567264074199ULL
274#define ROOT_IDX_INV	2075111ULL
275#define	UIDTableSize	1048576
276static volatile uint64_t *UIDTable = NULL;
277static volatile int seen_omp = 0;
278
279static int stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode);
280static FrameInfo compute_uid (Frame_packet *frp);
281static int omp_no_walk = 0;
282
283#if ARCH(Intel)
284#define ValTableSize    1048576
285#define OmpValTableSize 65536
286static unsigned long *AddrTable_RA_FROMFP = NULL; // Cache for RA_FROMFP pcs
287static unsigned long *AddrTable_RA_EOSTCK = NULL; // Cache for RA_EOSTCK pcs
288static struct WalkContext *OmpCurCtxs = NULL;
289static struct WalkContext *OmpCtxs = NULL;
290static uint32_t *OmpVals = NULL;
291static unsigned long *OmpRAs = NULL;
292static unsigned long adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend);
293static int parse_x86_AVX_instruction (unsigned char *pc);
294
295struct WalkContext
296{
297  unsigned long pc;
298  unsigned long sp;
299  unsigned long fp;
300  unsigned long ln;
301  unsigned long sbase; /* stack boundary */
302  unsigned long tbgn;  /* current memory segment start */
303  unsigned long tend;  /* current memory segment end */
304};
305#endif
306
307#if defined(DEBUG) && ARCH(Intel)
308#include <execinfo.h>
309
310static void
311dump_stack (int nline)
312{
313  if ((__collector_tracelevel & SP_DUMP_STACK) == 0)
314    return;
315
316  enum Constexpr { MAX_SIZE = 1024 };
317  void *array[MAX_SIZE];
318  size_t sz = backtrace (array, MAX_SIZE);
319  char **strings = backtrace_symbols (array, sz);
320  DprintfT (SP_DUMP_STACK, "\ndump_stack: %d size=%d\n", nline, (int) sz);
321  for (int i = 0; i < sz; i++)
322    DprintfT (SP_DUMP_STACK, "  %3d:  %p %s\n", i, array[i],
323	     strings[i] ? strings[i] : "???");
324}
325
326#define dump_targets(nline, ntrg, targets) \
327    if ((__collector_tracelevel & SP_DUMP_UNWIND) != 0) \
328	for(int i = 0; i < ntrg; i++) \
329	     DprintfT (SP_DUMP_UNWIND, "  %2d: 0x%lx\n", i, (long) targets[i])
330#else
331#define dump_stack(x)
332#define dump_targets(nline, ntrg, targets)
333#endif
334
335void
336__collector_ext_unwind_key_init (int isPthread, void * stack)
337{
338  void * ptr = __collector_tsd_get_by_key (unwind_key);
339  if (ptr == NULL)
340    {
341      TprintfT (DBG_LT2, "__collector_ext_unwind_key_init: cannot get tsd\n");
342      return;
343    }
344  if (isPthread)
345    {
346      size_t stack_size = 0;
347      void *stack_addr = 0;
348      pthread_t pthread = pthread_self ();
349      pthread_attr_t attr;
350      int err = pthread_getattr_np (pthread, &attr);
351      TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: pthread: 0x%lx err: %d\n", pthread, err);
352      if (err == 0)
353	{
354	  err = pthread_attr_getstack (&attr, &stack_addr, &stack_size);
355	  if (err == 0)
356	    stack_addr = (char*) stack_addr + stack_size;
357	  TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: stack_size=0x%lx eos=%p err=%d\n",
358		    (long) stack_size, stack_addr, err);
359	  err = pthread_attr_destroy (&attr);
360	  TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: destroy: %d\n", err);
361	}
362      *(void**) ptr = stack_addr;
363    }
364  else
365    *(void**) ptr = stack;  // cloned thread
366}
367
368void
369__collector_ext_unwind_init (int record)
370{
371  int sz = UIDTableSize * sizeof (*UIDTable);
372  UIDTable = (uint64_t*) __collector_allocCSize (__collector_heap, sz, 1);
373  if (UIDTable == NULL)
374    {
375      __collector_terminate_expt ();
376      return;
377    }
378  CALL_UTIL (memset)((void*) UIDTable, 0, sz);
379
380  char *str = CALL_UTIL (getenv)("GPROFNG_JAVA_MAX_CALL_STACK_DEPTH");
381  if (str != NULL && *str != 0)
382    {
383      char *endptr;
384      int n = CALL_UTIL (strtol)(str, &endptr, 0);
385      if (endptr != str && n >= 0)
386	{
387	  if (n < 5)
388	    n = 5;
389	  if (n > MAX_STACKDEPTH)
390	    n = MAX_STACKDEPTH;
391	  max_java_nframes = n;
392	}
393    }
394
395  str = CALL_UTIL (getenv)("GPROFNG_MAX_CALL_STACK_DEPTH");
396  if (str != NULL && *str != 0)
397    {
398      char *endptr = str;
399      int n = CALL_UTIL (strtol)(str, &endptr, 0);
400      if (endptr != str && n >= 0)
401	{
402	  if (n < 5)
403	    n = 5;
404	  if (n > MAX_STACKDEPTH)
405	    n = MAX_STACKDEPTH;
406	  max_native_nframes = n;
407	}
408    }
409
410  TprintfT (DBG_LT0, "GPROFNG_MAX_CALL_STACK_DEPTH=%d  GPROFNG_JAVA_MAX_CALL_STACK_DEPTH=%d\n",
411	    max_native_nframes, max_java_nframes);
412  omp_no_walk = 1;
413
414  if (__collector_VM_ReadByteInstruction == NULL)
415    __collector_VM_ReadByteInstruction = (int(*)()) dlsym (RTLD_DEFAULT, "Async_VM_ReadByteInstruction");
416
417#if ARCH(SPARC)
418#if WSIZE(64)
419  misalign_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler");
420  misalign_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler_end");
421  if (misalign_hdlx == 0)
422    misalign_hdlx = misalign_hdl + 292;
423  barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_");
424  barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_");
425  if (barrier_hdlx == 0)
426    barrier_hdl = 0;
427#else
428  barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_");
429  barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_");
430  if (barrier_hdlx == 0)
431    barrier_hdl = 0;
432#endif /* WSIZE() */
433
434#elif ARCH(Intel)
435  sz = ValTableSize * sizeof (*AddrTable_RA_FROMFP);
436  AddrTable_RA_FROMFP = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
437  sz = ValTableSize * sizeof (*AddrTable_RA_EOSTCK);
438  AddrTable_RA_EOSTCK = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
439  if (omp_no_walk && (__collector_omp_stack_trace != NULL || __collector_mpi_stack_trace != NULL))
440    {
441      sz = OmpValTableSize * sizeof (*OmpCurCtxs);
442      OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
443      sz = OmpValTableSize * sizeof (*OmpCtxs);
444      OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
445      sz = OmpValTableSize * sizeof (*OmpVals);
446      OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1);
447      sz = OmpValTableSize * sizeof (*OmpRAs);
448      OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
449      if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
450	{
451	  TprintfT (0, "unwind_init() ERROR: failed; terminating experiment\n");
452	  __collector_terminate_expt ();
453	  return;
454	}
455    }
456#endif /* ARCH() */
457
458  if (record)
459    {
460      dhndl = __collector_create_handle (SP_FRINFO_FILE);
461      __collector_log_write ("<%s name=\"%s\" format=\"binary\"/>\n", SP_TAG_DATAPTR, SP_FRINFO_FILE);
462    }
463
464  unwind_key = __collector_tsd_create_key (sizeof (void*), NULL, NULL);
465  if (unwind_key == COLLECTOR_TSD_INVALID_KEY)
466    {
467      TprintfT (0, "unwind_init: ERROR: TSD key create failed.\n");
468      __collector_log_write ("<%s kind=\"%s\" id=\"%d\">TSD key not created</%s>\n",
469			     SP_TAG_EVENT, SP_JCMD_CERROR, COL_ERROR_GENERAL, SP_TAG_EVENT);
470      return;
471    }
472  TprintfT (0, "unwind_init() completed normally\n");
473  return;
474}
475
476void
477__collector_ext_unwind_close ()
478{
479  __collector_delete_handle (dhndl);
480  dhndl = NULL;
481}
482
483void*
484__collector_ext_return_address (unsigned level)
485{
486  if (NULL == UIDTable)  //unwind not initialized yet
487    return NULL;
488  unsigned size = (level + 4) * sizeof (long); // need to strip __collector_get_return_address and its caller
489  ucontext_t context;
490  FILL_CONTEXT ((&context));
491  char* buf = (char*) alloca (size);
492  if (buf == NULL)
493    {
494      TprintfT (DBG_LT0, "__collector_get_return_address: ERROR: alloca(%d) fails\n", size);
495      return NULL;
496    }
497  int sz = stack_unwind (buf, size, NULL, NULL, &context, 0);
498  if (sz < (level + 3) * sizeof (long))
499    {
500      TprintfT (DBG_LT0, "__collector_get_return_address: size=%d, but stack_unwind returns %d\n", size, sz);
501      return NULL;
502    }
503  long *lbuf = (long*) buf;
504  TprintfT (DBG_LT2, "__collector_get_return_address: return %lx\n", lbuf[level + 2]);
505  return (void *) (lbuf[level + 2]);
506}
507/*
508 *  Collector interface method getFrameInfo
509 */
510FrameInfo
511__collector_get_frame_info (hrtime_t ts, int mode, void *arg)
512{
513  ucontext_t *context = NULL;
514  void *bptr = NULL;
515  CM_Array *array = NULL;
516
517  int unwind_mode = 0;
518  int do_walk = 1;
519
520  if (mode & FRINFO_NO_WALK)
521    do_walk = 0;
522  int bmode = mode & 0xffff;
523  int pseudo_context = 0;
524  if (bmode == FRINFO_FROM_STACK_ARG || bmode == FRINFO_FROM_STACK)
525    {
526      bptr = arg;
527      context = (ucontext_t*) alloca (sizeof (ucontext_t));
528      FILL_CONTEXT (context);
529      unwind_mode |= bmode;
530    }
531  else if (bmode == FRINFO_FROM_UC)
532    {
533      context = (ucontext_t*) arg;
534      if (context == NULL)
535	return (FrameInfo) 0;
536      if (GET_SP (context) == 0)
537	pseudo_context = 1;
538    }
539  else if (bmode == FRINFO_FROM_ARRAY)
540    {
541      array = (CM_Array*) arg;
542      if (array == NULL || array->length <= 0)
543	return (FrameInfo) 0;
544    }
545  else
546    return (FrameInfo) 0;
547
548  int max_frame_size = OVERHEAD_BYTES + NATIVE_FRAME_BYTES (max_native_nframes);
549  if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context)
550    max_frame_size += JAVA_FRAME_BYTES (max_java_nframes);
551
552  Frame_packet *frpckt = alloca (sizeof (Frame_packet) + max_frame_size);
553  frpckt->type = FRAME_PCKT;
554  frpckt->hsize = sizeof (Frame_packet);
555
556  char *d = (char*) (frpckt + 1);
557  int size = max_frame_size;
558
559#define MIN(a,b) ((a)<(b)?(a):(b))
560  /* get Java info */
561  if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context)
562    {
563      /* use only 2/3 of the buffer and leave the rest for the native stack */
564      int tmpsz = MIN (size, JAVA_FRAME_BYTES (max_java_nframes));
565      if (tmpsz > 0)
566	{
567	  int sz = __collector_ext_jstack_unwind (d, tmpsz, context);
568	  d += sz;
569	  size -= sz;
570	}
571    }
572
573  /* get native stack */
574  if (context)
575    {
576      Stack_info *sinfo = (Stack_info*) d;
577      int sz = sizeof (Stack_info);
578      d += sz;
579      size -= sz;
580#if ARCH(Intel)
581      if (omp_no_walk == 0)
582	do_walk = 1;
583#endif
584      if (do_walk == 0)
585	unwind_mode |= FRINFO_NO_WALK;
586
587      int tmpsz = MIN (size, NATIVE_FRAME_BYTES (max_native_nframes));
588      if (tmpsz > 0)
589	{
590	  sz = stack_unwind (d, tmpsz, bptr, NULL, context, unwind_mode);
591	  d += sz;
592	  size -= sz;
593	}
594      sinfo->kind = STACK_INFO;
595      sinfo->hsize = (d - (char*) sinfo);
596    }
597
598  /* create a stack image from user data */
599  if (array && array->length > 0)
600    {
601      Stack_info *sinfo = (Stack_info*) d;
602      int sz = sizeof (Stack_info);
603      d += sz;
604      size -= sz;
605      sz = array->length;
606      if (sz > size)
607	sz = size;  // YXXX should we mark this with truncation frame?
608      __collector_memcpy (d, array->bytes, sz);
609      d += sz;
610      size -= sz;
611      sinfo->kind = STACK_INFO;
612      sinfo->hsize = (d - (char*) sinfo);
613    }
614
615  /* Compute the total size */
616  frpckt->tsize = d - (char*) frpckt;
617  FrameInfo uid = compute_uid (frpckt);
618  return uid;
619}
620
621FrameInfo
622compute_uid (Frame_packet *frp)
623{
624  uint64_t idxs[LAST_INFO];
625  uint64_t uid = ROOT_UID;
626  uint64_t idx = ROOT_IDX;
627
628  Common_info *cinfo = (Common_info*) ((char*) frp + frp->hsize);
629  char *end = (char*) frp + frp->tsize;
630  for (;;)
631    {
632      if ((char*) cinfo >= end || cinfo->hsize == 0 ||
633	  (char*) cinfo + cinfo->hsize > end)
634	break;
635
636      /* Start with a different value to avoid matching with uid */
637      uint64_t uidt = 1;
638      uint64_t idxt = 1;
639      long *ptr = (long*) ((char*) cinfo + cinfo->hsize);
640      long *bnd = (long*) ((char*) cinfo + sizeof (Common_info));
641      TprintfT (DBG_LT2, "compute_uid: Cnt=%ld: ", (long) cinfo->hsize);
642      while (ptr > bnd)
643	{
644	  long val = *(--ptr);
645	  tprintf (DBG_LT2, "0x%8.8llx ", (unsigned long long) val);
646	  uidt = (uidt + val) * ROOT_UID;
647	  idxt = (idxt + val) * ROOT_IDX;
648	  uid = (uid + val) * ROOT_UID;
649	  idx = (idx + val) * ROOT_IDX;
650	}
651      if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO)
652	{
653	  cinfo->uid = uidt;
654	  idxs[cinfo->kind] = idxt;
655	}
656      cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize);
657    }
658  tprintf (DBG_LT2, "\n");
659
660  /* Check if we have already recorded that uid.
661   * The following fragment contains benign data races.
662   * It's important, though, that all reads from UIDTable
663   * happen before writes.
664   */
665  int found1 = 0;
666  int idx1 = (int) ((idx >> 44) % UIDTableSize);
667  if (UIDTable[idx1] == uid)
668    found1 = 1;
669  int found2 = 0;
670  int idx2 = (int) ((idx >> 24) % UIDTableSize);
671  if (UIDTable[idx2] == uid)
672    found2 = 1;
673  int found3 = 0;
674  int idx3 = (int) ((idx >> 4) % UIDTableSize);
675  if (UIDTable[idx3] == uid)
676    found3 = 1;
677  if (!found1)
678    UIDTable[idx1] = uid;
679  if (!found2)
680    UIDTable[idx2] = uid;
681  if (!found3)
682    UIDTable[idx3] = uid;
683
684  if (found1 || found2 || found3)
685    return (FrameInfo) uid;
686  frp->uid = uid;
687
688  /* Compress info's */
689  cinfo = (Common_info*) ((char*) frp + frp->hsize);
690  for (;;)
691    {
692      if ((char*) cinfo >= end || cinfo->hsize == 0 ||
693	  (char*) cinfo + cinfo->hsize > end)
694	break;
695      if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO)
696	{
697	  long *ptr = (long*) ((char*) cinfo + sizeof (Common_info));
698	  long *bnd = (long*) ((char*) cinfo + cinfo->hsize);
699	  uint64_t uidt = cinfo->uid;
700	  uint64_t idxt = idxs[cinfo->kind];
701	  int found = 0;
702	  int first = 1;
703	  while (ptr < bnd - 1)
704	    {
705	      int idx1 = (int) ((idxt >> 44) % UIDTableSize);
706	      if (UIDTable[idx1] == uidt)
707		{
708		  found = 1;
709		  break;
710		}
711	      else if (first)
712		{
713		  first = 0;
714		  UIDTable[idx1] = uidt;
715		}
716	      long val = *ptr++;
717	      uidt = uidt * ROOT_UID_INV - val;
718	      idxt = idxt * ROOT_IDX_INV - val;
719	    }
720	  if (found)
721	    {
722	      char *d = (char*) ptr;
723	      char *s = (char*) bnd;
724	      if (!first)
725		{
726		  int i;
727		  for (i = 0; i<sizeof (uidt); i++)
728		    {
729		      *d++ = (char) uidt;
730		      uidt = uidt >> 8;
731		    }
732		}
733	      int delta = s - d;
734	      while (s < end)
735		*d++ = *s++;
736	      cinfo->kind |= COMPRESSED_INFO;
737	      cinfo->hsize -= delta;
738	      frp->tsize -= delta;
739	      end -= delta;
740	    }
741	}
742      cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize);
743    }
744  __collector_write_packet (dhndl, (CM_Packet*) frp);
745  return (FrameInfo) uid;
746}
747
748FrameInfo
749__collector_getUID (CM_Array *arg, FrameInfo suid)
750{
751  if (arg->length % sizeof (long) != 0 ||
752      (long) arg->bytes % sizeof (long) != 0)
753    return (FrameInfo) - 1;
754  if (arg->length == 0)
755    return suid;
756
757  uint64_t uid = suid ? suid : 1;
758  uint64_t idx = suid ? suid : 1;
759  long *ptr = (long*) ((char*) arg->bytes + arg->length);
760  long *bnd = (long*) (arg->bytes);
761  while (ptr > bnd)
762    {
763      long val = *(--ptr);
764      uid = (uid + val) * ROOT_UID;
765      idx = (idx + val) * ROOT_IDX;
766    }
767
768  /* Check if we have already recorded that uid.
769   * The following fragment contains benign data races.
770   * It's important, though, that all reads from UIDTable
771   * happen before writes.
772   */
773  int found1 = 0;
774  int idx1 = (int) ((idx >> 44) % UIDTableSize);
775  if (UIDTable[idx1] == uid)
776    found1 = 1;
777  int found2 = 0;
778  int idx2 = (int) ((idx >> 24) % UIDTableSize);
779  if (UIDTable[idx2] == uid)
780    found2 = 1;
781  int found3 = 0;
782  int idx3 = (int) ((idx >> 4) % UIDTableSize);
783  if (UIDTable[idx3] == uid)
784    found3 = 1;
785
786  if (!found1)
787    UIDTable[idx1] = uid;
788  if (!found2)
789    UIDTable[idx2] = uid;
790  if (!found3)
791    UIDTable[idx3] = uid;
792  if (found1 || found2 || found3)
793    return (FrameInfo) uid;
794
795  int sz = sizeof (Uid_packet) + arg->length;
796  if (suid)
797    sz += sizeof (suid);
798  Uid_packet *uidp = alloca (sz);
799  uidp->tsize = sz;
800  uidp->type = UID_PCKT;
801  uidp->flags = 0;
802  uidp->uid = uid;
803
804  /* Compress */
805  ptr = (long*) (arg->bytes);
806  bnd = (long*) ((char*) arg->bytes + arg->length);
807  long *dst = (long*) (uidp + 1);
808  uint64_t uidt = uid;
809  uint64_t idxt = idx;
810  uint64_t luid = suid; /* link uid */
811
812  while (ptr < bnd)
813    {
814
815      long val = *ptr++;
816      *dst++ = val;
817
818      if ((bnd - ptr) > sizeof (uidt))
819	{
820	  uidt = uidt * ROOT_UID_INV - val;
821	  idxt = idxt * ROOT_IDX_INV - val;
822	  int idx1 = (int) ((idxt >> 44) % UIDTableSize);
823	  if (UIDTable[idx1] == uidt)
824	    {
825	      luid = uidt;
826	      break;
827	    }
828	}
829    }
830  if (luid)
831    {
832      char *d = (char*) dst;
833      for (int i = 0; i<sizeof (luid); i++)
834	{
835	  *d++ = (char) luid;
836	  luid = luid >> 8;
837	}
838      uidp->flags |= COMPRESSED_INFO;
839      uidp->tsize = d - (char*) uidp;
840    }
841  __collector_write_packet (dhndl, (CM_Packet*) uidp);
842
843  return (FrameInfo) uid;
844}
845
846int
847__collector_getStackTrace (void *buf, int size, void *bptr, void *eptr, void *arg)
848{
849  if (arg == (void*) __collector_omp_stack_trace)
850    seen_omp = 1;
851  int do_walk = 1;
852  if (arg == NULL || arg == (void*) __collector_omp_stack_trace)
853    {
854      do_walk = (arg == (void*) __collector_omp_stack_trace && omp_no_walk) ? 0 : 1;
855      ucontext_t *context = (ucontext_t*) alloca (sizeof (ucontext_t));
856      FILL_CONTEXT (context);
857      arg = context;
858    }
859  int unwind_mode = 0;
860  if (do_walk == 0)
861    unwind_mode |= FRINFO_NO_WALK;
862  return stack_unwind (buf, size, bptr, eptr, arg, unwind_mode);
863}
864
865#if ARCH(SPARC)
866/*
867 * These are important data structures taken from the header files reg.h and
868 * ucontext.h. They are used for the stack trace algorithm explained below.
869 *
870 *	typedef struct ucontext {
871 * 		u_long		uc_flags;
872 * 		struct ucontext	*uc_link;
873 * 		usigset_t   	uc_sigmask;
874 * 		stack_t 	uc_stack;
875 * 		mcontext_t 	uc_mcontext;
876 * 		long		uc_filler[23];
877 * 	} ucontext_t;
878 *
879 *	#define	SPARC_MAXREGWINDOW	31
880 *
881 *	struct	rwindow {
882 *		greg_t	rw_local[8];
883 *		greg_t	rw_in[8];
884 *	};
885 *
886 *	#define	rw_fp	rw_in[6]
887 *	#define	rw_rtn	rw_in[7]
888 *
889 *	struct gwindows {
890 *		int		wbcnt;
891 *		int		*spbuf[SPARC_MAXREGWINDOW];
892 *		struct rwindow	wbuf[SPARC_MAXREGWINDOW];
893 *	};
894 *
895 *	typedef struct gwindows	gwindows_t;
896 *
897 *	typedef struct {
898 *		gregset_t	gregs;
899 *		gwindows_t	*gwins;
900 *		fpregset_t	fpregs;
901 *		long		filler[21];
902 *	} mcontext_t;
903 *
904 * The stack would look like this when SIGPROF occurrs.
905 *
906 *	------------------------- <- high memory
907 *	|			|
908 *	|			|
909 *	-------------------------
910 *	|			|
911 *	------------------------- <- fp' <-|
912 *	|			|	   |
913 *		:	:	 	   |
914 *	|			|	   |
915 *	-------------------------	   |
916 *	|	fp		|----------|
917 *	|			|
918 *	------------------------- <- sp'
919 *	|			|		             |	|
920 *	| 	gwins		| <- saved stack pointers &  |  |
921 *	|			|    register windows	     |  |- mcontext
922 *	-------------------------			     |  |
923 *	|	gregs		| <- saved registers	     |  |
924 *	-------------------------			     |
925 *	|			|			     |- ucontext
926 *	------------------------- <- ucp (ucontext pointer)  |
927 *	|			|				|
928 *	|			|				|- siginfo
929 *	------------------------- <- sip (siginfo pointer)	|
930 *	|			|
931 *	------------------------- <- sp
932 *
933 * Then the signal handler is called with:
934 *	handler( signo, sip, uip );
935 * When gwins is null, all the stack frames are saved in the user stack.
936 * In that case we can find sp' from gregs and walk the stack for a backtrace.
937 * However, if gwins is not null we will have a more complicated case.
938 * Wbcnt(in gwins) tells you how many saved register windows are valid.
939 * This is important because the kernel does not allocate the entire array.
940 * And the top most frame is saved in the lowest index element. The next
941 * paragraph explains the possible causes.
942 *
943 * There are two routines in the kernel to flush out user register windows.
944 *	flush_user_windows and flush_user_windows_to_stack
945 * The first routine will not cause a page fault. Therefore if the user
946 * stack is not in memory, the register windows will be saved to the pcb.
947 * This can happen when the kernel is trying to deliver a signal and
948 * the user stack got swap out. The kernel will then build a new context for
949 * the signal handler and the saved register windows will
950 * be copied to the ucontext as show above. On the other hand,
951 * flush_user_windows_to_stack can cause a page fault, and if it failed
952 * then there is something wrong (stack overflow, misalign).
953 * The first saved register window does not necessary correspond to the
954 * first stack frame. So the current stack pointer must be compare with
955 * the stack pointers in spbuf to find a match.
956 *
957 * We will also follow the uc_link field in ucontext to trace also nested
958 * signal stack frames.
959 *
960 */
961
962/* Dealing with trap handlers.
963 * When a user defined trap handler is invoked the return address
964 * (or actually the address of an instruction that raised the trap)
965 * is passed to the trap handler in %l6, whereas saved %o7 contains
966 * garbage. First, we need to find out if a particular pc belongs
967 * to the trap handler, and if so, take the %l6 value from the stack rather
968 * than %o7 from either the stack or the register.
969 * There are three possible situations represented
970 * by the following stacks:
971 *
972 *   MARKER		MARKER			MARKER
973 *   trap handler pc	__func pc before 'save'	__func pc after 'save'
974 *   %l6		%o7 from reg		%o7 (garbage)
975 *   ...		%l6			trap handler pc
976 *			...			%l6
977 *						...
978 * where __func is a function called from the trap handler.
979 *
980 * Currently this is implemented to only deal with __misalign_trap_handler
981 * set for v9 FORTRAN applications. Implementation of IN_TRAP_HANDLER
982 * macro shows it. A general solution is postponed.
983 */
984
985/* Special handling of unwind through the parallel loop barrier code:
986 *
987 *  The library defines two symbols, __mt_EndOfTask_Barrier_ and
988 *	__mt_EndOfTask_Barrier_Dummy_ representing the first word of
989 *	the barrier sychronization code, and the first word following
990 *	it.  Whenever the leaf PC is between these two symbols,
991 *	the unwind code is special-cased as follows:
992 *	The __mt_EndOfTask_Barrier_ function is guaranteed to be a leaf
993 *	function, so its return address is in a register, not saved on
994 *	the stack.
995 *
996 *    MARKER
997 *    __mt_EndOfTask_Barrier_ PC -- the leaf PC
998 *    loop body function address for the task -- implied caller of __mt_EndOfTask_Barrier_
999 *	    this address is taken from the %O0 register
1000 *    {mt_master or mt_slave} -- real caller of __mt_EndOfTask_Barrier_
1001 *     ...
1002 *
1003 *  With this trick, the analyzer will show the time in the barrier
1004 *	attributed to the loop at the end of which the barrier synchronization
1005 *	is taking place.  That loop body routine, will be shown as called
1006 *	from the function from which it was extracted, which will be shown
1007 *	as called from the real caller, either the slave or master library routine.
1008 */
1009
1010/*
1011 * These no-fault-load (0x82) assembly functions are courtesy of Rob Gardner.
1012 *
1013 * Note that 0x82 is ASI_PNF.  See
1014 *   http://lxr.free-electrons.com/source/arch/sparc/include/uapi/asm/asi.h#L134
1015 *   ASI  address space identifier; PNF  primary no fault
1016 */
1017
1018/* load an int from an address */
1019
1020/* if the address is illegal, return a 0 */
1021static int
1022SPARC_no_fault_load_int (void *addr)
1023{
1024  int val;
1025  __asm__ __volatile__(
1026		       "lda [%1] 0x82, %0\n\t"
1027		       : "=r" (val)
1028		       : "r" (addr)
1029		       );
1030
1031  return val;
1032}
1033
1034/* check if an address is invalid
1035 *
1036 * A no-fault load of an illegal address still faults, but it does so silently to the calling process.
1037 * It returns a 0, but so could a load of a legal address.
1038 * So, we time the load.  A "fast" load must be a successful load.
1039 * A "slow" load is probably a fault.
1040 * Since it could also be a cache/TLB miss or other abnormality,
1041 * it's safest to retry a slow load.
1042 * The cost of trying a valid address should be some nanosecs.
1043 * The cost of trying an invalid address up to 10 times could be some microsecs.
1044 */
1045#if 0
1046static
1047int invalid_SPARC_addr(void *addr)
1048{
1049    long t1, t2;
1050    int i;
1051
1052    for (i=0; i<10; i++) {
1053      __asm__ __volatile__(
1054	"rd %%tick, %0\n\t"
1055	"lduba [%2] 0x82, %%g0\n\t"
1056	"rd %%tick, %1\n\t"
1057	: "=r" (t1), "=r" (t2)
1058	: "r" (addr) );
1059      if ( (t2 - t1) < 100 )
1060	return 0;
1061    }
1062    return 1;
1063}
1064#endif
1065
1066/*
1067 * The standard SPARC procedure-calling convention is that the
1068 * calling PC (for determining the return address when the procedure
1069 * is finished) is placed in register %o7.  A called procedure
1070 * typically executes a "save" instruction that shifts the register
1071 * window, and %o7 becomes %i7.
1072 *
1073 * Optimized leaf procedures do not shift the register window.
1074 * They assume the return address will remain %o7.  So when
1075 * we process a leaf PC, we walk instructions to see if there
1076 * is a call, restore, or other instruction that would indicate
1077 * we can IGNORE %o7 because this is NOT a leaf procedure.
1078 *
1079 * If a limited instruction walk uncovers no such hint, we save
1080 * not only the PC but the %o7 value as well... just to be safe.
1081 * Later, in DBE post-processing of the call stacks, we decide
1082 * whether any recorded %o7 value should be used as a caller
1083 * frame or should be discarded.
1084 */
1085
1086#define IS_ILLTRAP(x) (((x) & 0xc1c00000) == 0)
1087#define IS_SAVE(x)    (((x) & 0xc1f80000) == 0x81e00000)
1088#define IS_MOVO7R(x)  (((x) & 0xc1f8201f) == 0x8160000f)
1089#define IS_MOVRO7(x)  (((x) & 0xfff82000) == 0x9f600000)
1090#define IS_ORRG0O7(x) (((x) & 0xff78201f) == 0x9e100000)
1091#define IS_ORG0RO7(x) (((x) & 0xff7fe000) == 0x9e100000)
1092#define IS_ORG0O7R(x) (((x) & 0xc17fe01f) == 0x8010000f)
1093#define IS_ORO7G0R(x) (((x) & 0xc17fe01f) == 0x8013c000)
1094#define IS_RESTORE(x) (((x) & 0xc1f80000) == 0x81e80000)
1095#define IS_RET(x)     ((x) == 0x81c7e008)
1096#define IS_RETL(x)    ((x) == 0x81c3e008)
1097#define IS_RETURN(x)  (((x) & 0xc1f80000) == 0x81c80000)
1098#define IS_BRANCH(x)  ((((x) & 0xc0000000) == 0) && (((x) & 0x01c00000) != 0x01000000))
1099#define IS_CALL(x)    (((x) & 0xc0000000) == 0x40000000)
1100#define IS_LDO7(x)    (((x) & 0xfff80000) == 0xde000000)
1101
1102static long pagesize = 0;
1103
1104static int
1105process_leaf (long *lbuf, int ind, int lsize, void *context)
1106{
1107  greg_t pc = GET_PC (context);
1108  greg_t o7 = GET_GREG (context, REG_O7);
1109
1110  /* omazur: TBR START -- not used */
1111  if (IN_BARRIER (pc))
1112    {
1113      if (ind < lsize)
1114	lbuf[ind++] = pc;
1115      if (ind < lsize)
1116	lbuf[ind++] = GET_GREG (context, REG_O0);
1117      return ind;
1118    }
1119  /* omazur: TBR END */
1120#if WSIZE(64)
1121  if (IN_TRAP_HANDLER (pc))
1122    {
1123      if (ind < lsize)
1124	lbuf[ind++] = pc;
1125      return ind;
1126    }
1127#endif
1128  unsigned *instrp = (unsigned *) pc;
1129  unsigned *end_addr = instrp + 20;
1130  while (instrp < end_addr)
1131    {
1132      unsigned instr = *instrp++;
1133      if (IS_ILLTRAP (instr))
1134	break;
1135      else if (IS_SAVE (instr))
1136	{
1137	  if (ind < lsize)
1138	    lbuf[ind++] = pc;
1139	  if (o7 && ind < lsize)
1140	    lbuf[ind++] = o7;
1141	  return ind;
1142	}
1143      else if (IS_MOVO7R (instr) || IS_ORG0O7R (instr) || IS_ORO7G0R (instr))
1144	break;
1145      else if (IS_MOVRO7 (instr) || IS_ORG0RO7 (instr))
1146	{
1147	  int rs2 = (instr & 0x1f) + REG_G1 - 1;
1148	  o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0;
1149	  break;
1150	}
1151      else if (IS_ORRG0O7 (instr))
1152	{
1153	  int rs2 = ((instr & 0x7c000) >> 14) + REG_G1 - 1;
1154	  o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0;
1155	  break;
1156	}
1157      else if (IS_RESTORE (instr))
1158	{
1159	  o7 = 0;
1160	  break;
1161	}
1162      else if (IS_RETURN (instr))
1163	{
1164	  o7 = 0;
1165	  break;
1166	}
1167      else if (IS_RET (instr))
1168	{
1169	  o7 = 0;
1170	  break;
1171	}
1172      else if (IS_RETL (instr))
1173	{
1174	  /* process delay slot */
1175	  instr = *instrp++;
1176	  if (IS_RESTORE (instr))
1177	    o7 = 0;
1178	  break;
1179	}
1180      else if (IS_BRANCH (instr))
1181	{
1182	  unsigned *backbegin = ((unsigned *) pc - 1);
1183	  unsigned *backend = backbegin - 12 + (instrp - (unsigned *) pc);
1184	  while (backbegin > backend)
1185	    {
1186	      // 21920143 stack unwind: SPARC process_leaf backtracks too far
1187	      /*
1188	       * We've already dereferenced backbegin+1.
1189	       * So if backbegin is on the same page, we're fine.
1190	       * If we've gone to a different page, possibly things are not fine.
1191	       * We don't really know how to test that.
1192	       * Let's just assume the worst:  that dereferencing backbegin would segv.
1193	       * We won't know if we're in a leaf function or not.
1194	       */
1195	      if (pagesize == 0)
1196		pagesize = CALL_UTIL (sysconf)(_SC_PAGESIZE);
1197	      if ((((long) (backbegin + 1)) & (pagesize - 1)) < sizeof (unsigned*))
1198		break;
1199	      unsigned backinstr = *backbegin--;
1200	      if (IS_LDO7 (backinstr))
1201		{
1202		  o7 = 0;
1203		  break;
1204		}
1205	      else if (IS_ILLTRAP (backinstr))
1206		break;
1207	      else if (IS_RETURN (backinstr))
1208		break;
1209	      else if (IS_RET (backinstr))
1210		break;
1211	      else if (IS_RETL (backinstr))
1212		break;
1213	      else if (IS_CALL (backinstr))
1214		break;
1215	      else if (IS_SAVE (backinstr))
1216		{
1217		  o7 = 0;
1218		  break;
1219		}
1220	    }
1221	  break;
1222	}
1223      else if (IS_CALL (instr))
1224	o7 = 0;
1225    }
1226
1227#if WSIZE(64)
1228  if (o7 != 0 && ((long) o7) < 32 && ((long) o7) > -32)
1229    {
1230      /* 20924821 SEGV in unwind code on SPARC/Linux
1231       * We've seen this condition in some SPARC-Linux runs.
1232       * o7 is non-zero but not a valid address.
1233       * Values like 4 or -7 have been seen.
1234       * Let's check if o7 is unreasonably small.
1235       * If so, set to 0 so that it won't be recorded.
1236       * Otherwise, there is risk of it being dereferenced in process_sigreturn().
1237       */
1238      // __collector_log_write("<event kind=\"%s\" id=\"%d\">time %lld, internal debug unwind at leaf; o7 = %ld, pc = %x</event>\n",
1239      //       SP_JCMD_COMMENT, COL_COMMENT_NONE, __collector_gethrtime() - __collector_start_time, (long) o7, pc );
1240      o7 = 0;
1241    }
1242#endif
1243
1244  if (o7)
1245    {
1246      if (ind < lsize)
1247	lbuf[ind++] = SP_LEAF_CHECK_MARKER;
1248      if (ind < lsize)
1249	lbuf[ind++] = pc;
1250      if (ind < lsize)
1251	lbuf[ind++] = o7;
1252    }
1253  else if (ind < lsize)
1254    lbuf[ind++] = pc;
1255  return ind;
1256}
1257
1258#if WSIZE(64)
1259// detect signal handler
1260static int
1261process_sigreturn (long *lbuf, int ind, int lsize, unsigned char * tpc,
1262		   struct frame **pfp, void * bptr, int extra_frame)
1263{
1264  // cheap checks whether tpc is obviously not an instruction address
1265  if ((4096 > (unsigned long) tpc) // the first page is off limits
1266      || (3 & (unsigned long) tpc))
1267    return ind;  // the address is not aligned
1268
1269  // get the instruction at tpc, skipping over as many as 7 nop's (0x01000000)
1270  int insn, i;
1271  for (i = 0; i < 7; i++)
1272    {
1273      insn = SPARC_no_fault_load_int ((void *) tpc);
1274      if (insn != 0x01000000)
1275	break;
1276      tpc += 4;
1277    }
1278
1279  // we're not expecting 0 (and it could mean an illegal address)
1280  if (insn == 0)
1281    return ind;
1282
1283  // We are looking for __rt_sigreturn_stub with the instruction
1284  //     0x82102065 : mov 0x65 /* __NR_rt_sigreturn */, %g1
1285  if (insn == 0x82102065)
1286    {
1287      /*
1288       * according to linux kernel source code,
1289       * syscall(_NR_rt_sigreturn) uses the following data in stack:
1290       * struct rt_signal_frame {
1291       *     struct sparc_stackf     ss;
1292       *     siginfo_t               info;
1293       *     struct pt_regs          regs;
1294       *     ....};
1295       * sizeof(struct sparc_stackf) is 192;
1296       * sizeof(siginfo_t) is 128;
1297       * we need to get the register values from regs, which is defined as:
1298       * struct pt_regs {
1299       *     unsigned long u_regs[16];
1300       *     unsigned long tstate;
1301       *     unsigned long tpc;
1302       *     unsigned long tnpc;
1303       *     ....};
1304       * pc and fp register has offset of 120 and 112;
1305       * the pc of kill() is stored in tnpc, whose offest is 136.
1306       */
1307      greg_t pc = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 136));
1308      greg_t pc1 = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 120));
1309      (*pfp) = *((struct frame**) ((char*) ((*pfp)) + 192 + 128 + 112));
1310      if (pc && pc1)
1311	{
1312	  if (bptr != NULL && extra_frame && ((char*) (*pfp) + STACK_BIAS) < (char*) bptr && ind < 2)
1313	    {
1314	      lbuf[0] = pc1;
1315	      if (ind == 0)
1316		ind++;
1317	    }
1318	  if (bptr == NULL || ((char*) (*pfp) + STACK_BIAS) >= (char*) bptr)
1319	    {
1320	      if (ind < lsize)
1321		lbuf[ind++] = (unsigned long) tpc;
1322	      if (ind < lsize)
1323		lbuf[ind++] = pc;
1324	      if (ind < lsize)
1325		lbuf[ind++] = pc1;
1326	    }
1327	}
1328      DprintfT (SP_DUMP_UNWIND, "unwind.c: resolved sigreturn pc=0x%lx, pc1=0x%lx, fp=0x%lx\n", pc, pc1, *(pfp));
1329    }
1330  return ind;
1331}
1332#endif
1333
1334/*
1335 * int stack_unwind( char *buf, int size, ucontext_t *context )
1336 *	This routine looks into the mcontext and
1337 *	trace stack frames to record return addresses.
1338 */
1339int
1340stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
1341{
1342  /*
1343   * trace the stack frames from user stack.
1344   * We are assuming that the frame pointer and return address
1345   * are null when we are at the top level.
1346   */
1347  long *lbuf = (long*) buf;
1348  int lsize = size / sizeof (long);
1349  struct frame *fp = (struct frame *) GET_SP (context); /* frame pointer */
1350  greg_t pc; /* program counter */
1351  int extra_frame = 0;
1352  if ((mode & 0xffff) == FRINFO_FROM_STACK)
1353    extra_frame = 1;
1354
1355  int ind = 0;
1356  if (bptr == NULL)
1357    ind = process_leaf (lbuf, ind, lsize, context);
1358
1359  int extra_frame = 0;
1360  if ((mode & 0xffff) == FRINFO_FROM_STACK)
1361    extra_frame = 1;
1362  int ind = 0;
1363  if (bptr == NULL)
1364    ind = process_leaf (lbuf, ind, lsize, context);
1365
1366  while (fp)
1367    {
1368      if (ind >= lsize)
1369	break;
1370      fp = (struct frame *) ((char *) fp + STACK_BIAS);
1371      if (eptr && fp >= (struct frame *) eptr)
1372	{
1373	  ind = ind >= 2 ? ind - 2 : 0;
1374	  break;
1375	}
1376#if WSIZE(64) // detect signal handler
1377      unsigned char * tpc = ((unsigned char*) (fp->fr_savpc));
1378      struct frame * tfp = (struct frame*) ((char*) (fp->fr_savfp) + STACK_BIAS);
1379      int old_ind = ind;
1380      ind = process_sigreturn (lbuf, old_ind, lsize, tpc, &tfp, bptr, extra_frame);
1381      if (ind != old_ind)
1382	{
1383	  pc = (greg_t) tpc;
1384	  fp = tfp;
1385	}
1386      else
1387#endif
1388	{
1389#if WSIZE(64)
1390	  if (IN_TRAP_HANDLER (lbuf[ind - 1]))
1391	    pc = fp->fr_local[6];
1392	  else
1393	    pc = fp->fr_savpc;
1394#else
1395	  pc = fp->fr_savpc;
1396#endif
1397	  fp = fp->fr_savfp;
1398	  if (pc)
1399	    {
1400	      if (bptr != NULL && extra_frame && ((char*) fp + STACK_BIAS) < (char*) bptr && ind < 2)
1401		{
1402		  lbuf[0] = pc;
1403		  if (ind == 0)
1404		    ind++;
1405		}
1406	      if (bptr == NULL || ((char*) fp + STACK_BIAS) >= (char*) bptr)
1407		lbuf[ind++] = pc;
1408	    }
1409	}
1410
1411      /* 4616238: _door_return may have a frame that has non-zero
1412       * saved stack pointer and zero pc
1413       */
1414      if (pc == (greg_t) NULL)
1415	break;
1416    }
1417
1418  if (ind >= lsize)
1419    { /* truncated stack handling */
1420      ind = lsize - 1;
1421      lbuf[ind++] = SP_TRUNC_STACK_MARKER;
1422    }
1423  return ind * sizeof (long);
1424}
1425
1426#elif ARCH(Intel)
1427
1428/* get __NR_<syscall_name> constants */
1429#include <syscall.h>
1430
1431/*
1432 * From uts/intel/ia32/os/sendsig.c:
1433 *
1434 * An amd64 signal frame looks like this on the stack:
1435 *
1436 * old %rsp:
1437 *		<128 bytes of untouched stack space>
1438 *		<a siginfo_t [optional]>
1439 *		<a ucontext_t>
1440 *		<siginfo_t *>
1441 *		<signal number>
1442 * new %rsp:	<return address (deliberately invalid)>
1443 *
1444 * The signal number and siginfo_t pointer are only pushed onto the stack in
1445 * order to allow stack backtraces.  The actual signal handling code expects the
1446 * arguments in registers.
1447 *
1448 * An i386 SVR4/ABI signal frame looks like this on the stack:
1449 *
1450 * old %esp:
1451 *		<a siginfo32_t [optional]>
1452 *		<a ucontext32_t>
1453 *		<pointer to that ucontext32_t>
1454 *		<pointer to that siginfo32_t>
1455 *		<signo>
1456 * new %esp:	<return address (deliberately invalid)>
1457 */
1458
1459#if WSIZE(32)
1460#define OPC_REG(x)      ((x)&0x7)
1461#define MRM_REGD(x)     (((x)>>3)&0x7)
1462#define MRM_REGS(x)     ((x)&0x7)
1463#define RED_ZONE        0
1464#elif WSIZE(64)
1465#define OPC_REG(x)      (B|((x)&0x7))
1466#define MRM_REGD(x)     (R|(((x)>>3)&0x7))
1467#define MRM_REGS(x)     (B|((x)&0x7))
1468#define RED_ZONE        16
1469#endif
1470#define MRM_EXT(x)      (((x)>>3)&0x7)
1471#define MRM_MOD(x)      ((x)&0xc0)
1472
1473#define RAX             0
1474#define RDX             2
1475#define RSP             4
1476#define RBP             5
1477
1478struct AdvWalkContext
1479{
1480  unsigned char *pc;
1481  unsigned long *sp;
1482  unsigned long *sp_safe;
1483  unsigned long *fp;
1484  unsigned long *fp_sav;
1485  unsigned long *fp_loc;
1486  unsigned long rax;
1487  unsigned long rdx;
1488  unsigned long ra_sav;
1489  unsigned long *ra_loc;
1490  unsigned long regs[16];
1491  int tidx;         /* targets table index */
1492  uint32_t cval;    /* cache value */
1493};
1494
1495static unsigned long
1496getRegVal (struct AdvWalkContext *cur, int r, int *undefRez)
1497{
1498  if (cur->regs[r] == 0)
1499    {
1500      if (r == RBP)
1501	{
1502	  tprintf (DBG_LT3, "getRegVal: returns cur->regs[RBP]=0x%lx  cur->pc=0x%lx\n",
1503		   (unsigned long) cur->fp, (unsigned long) cur->pc);
1504	  return (unsigned long) cur->fp;
1505	}
1506      *undefRez = 1;
1507    }
1508  tprintf (DBG_LT3, "getRegVal: cur->regs[%d]=0x%lx  cur->pc=0x%lx\n",
1509	   r, (unsigned long) cur->regs[r], (unsigned long) cur->pc);
1510  return cur->regs[r];
1511}
1512
1513static unsigned char *
1514check_modrm (unsigned char *pc)
1515{
1516  unsigned char modrm = *pc++;
1517  unsigned char mod = MRM_MOD (modrm);
1518  if (mod == 0xc0)
1519    return pc;
1520  unsigned char regs = modrm & 0x07;
1521  if (regs == RSP)
1522    {
1523      if (mod == 0x40)
1524	return pc + 2;  // SIB + disp8
1525      if (mod == 0x80)
1526	return pc + 5;  // SIB + disp32
1527      return pc + 1;    // SIB
1528    }
1529  if (mod == 0x0)
1530    {
1531      if (regs == RBP)
1532	pc += 4; // disp32
1533    }
1534  else if (mod == 0x40)
1535    pc += 1; /* byte */
1536  else if (mod == 0x80)
1537    pc += 4; /* word */
1538  return pc;
1539}
1540
1541static int
1542read_int (unsigned char *pc, int w)
1543{
1544  if (w == 1)
1545    return *((char *) pc);
1546  if (w == 2)
1547    return *(short*) pc;
1548  return *(int*) pc;
1549}
1550
1551/* Return codes */
1552enum
1553{
1554  RA_FAILURE = 0,
1555  RA_SUCCESS,
1556  RA_END_OF_STACK,
1557  RA_SIGRETURN,
1558  RA_RT_SIGRETURN
1559};
1560
1561/* Cache value encodings */
1562static const uint32_t RA_FROMFP = (uint32_t) - 1; /* get the RA from the frame pointer */
1563static const uint32_t RA_EOSTCK = (uint32_t) - 2; /* end-of-stack */
1564
1565
1566#define MAXCTX         16
1567#define MAXTRGTS       64
1568#define MAXJMPREG       2
1569#define MAXJMPREGCTX    3
1570
1571#define DELETE_CURCTX()  __collector_memcpy (cur, buf + (--nctx), sizeof (*cur))
1572
1573/**
1574 * Look for pc in AddrTable_RA_FROMFP and in AddrTable_RA_EOSTCK
1575 * @param wctx
1576 * @return
1577 */
1578static int
1579cache_get (struct WalkContext *wctx)
1580{
1581  unsigned long addr;
1582  if (AddrTable_RA_FROMFP != NULL)
1583    {
1584      uint64_t idx = wctx->pc % ValTableSize;
1585      addr = AddrTable_RA_FROMFP[ idx ];
1586      if (addr == wctx->pc)
1587	{ // Found in AddrTable_RA_FROMFP
1588	  unsigned long *sp = NULL;
1589	  unsigned long fp = wctx->fp;
1590	  /* validate fp before use */
1591	  if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp))
1592	    return RA_FAILURE;
1593	  sp = (unsigned long *) fp;
1594	  fp = *sp++;
1595	  unsigned long ra = *sp++;
1596	  unsigned long tbgn = wctx->tbgn;
1597	  unsigned long tend = wctx->tend;
1598	  if (ra < tbgn || ra >= tend)
1599	    if (!__collector_check_segment (ra, &tbgn, &tend, 0))
1600	      return RA_FAILURE;
1601	  unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
1602	  if (npc == 0)
1603	    return RA_FAILURE;
1604	  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached pc=0x%lX\n", __LINE__, npc);
1605	  wctx->pc = npc;
1606	  wctx->sp = (unsigned long) sp;
1607	  wctx->fp = fp;
1608	  wctx->tbgn = tbgn;
1609	  wctx->tend = tend;
1610	  return RA_SUCCESS;
1611	}
1612    }
1613  if (NULL == AddrTable_RA_EOSTCK)
1614    return RA_FAILURE;
1615  uint64_t idx = wctx->pc % ValTableSize;
1616  addr = AddrTable_RA_EOSTCK[ idx ];
1617  if (addr != wctx->pc)
1618    return RA_FAILURE;
1619  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached RA_END_OF_STACK\n", __LINE__);
1620  return RA_END_OF_STACK;
1621}
1622/**
1623 * Save pc in RA_FROMFP or RA_EOSTCK cache depending on val
1624 * @param wctx
1625 */
1626static void
1627cache_put (struct WalkContext *wctx, const uint32_t val)
1628{
1629  if (RA_FROMFP == val)
1630    {
1631      // save pc in RA_FROMFP cache
1632      if (NULL != AddrTable_RA_FROMFP)
1633	{
1634	  uint64_t idx = wctx->pc % ValTableSize;
1635	  AddrTable_RA_FROMFP[ idx ] = wctx->pc;
1636	  if (NULL != AddrTable_RA_EOSTCK)
1637	    if (AddrTable_RA_EOSTCK[ idx ] == wctx->pc)
1638	      // invalidate pc in RA_EOSTCK cache
1639	      AddrTable_RA_EOSTCK[ idx ] = 0;
1640	}
1641      return;
1642    }
1643  if (RA_EOSTCK == val)
1644    {
1645      // save pc in RA_EOSTCK cache
1646      if (NULL != AddrTable_RA_EOSTCK)
1647	{
1648	  uint64_t idx = wctx->pc % ValTableSize;
1649	  AddrTable_RA_EOSTCK[ idx ] = wctx->pc;
1650	  if (NULL != AddrTable_RA_FROMFP)
1651	    {
1652	      if (AddrTable_RA_FROMFP[ idx ] == wctx->pc)
1653		// invalidate pc in RA_FROMFP cache
1654		AddrTable_RA_FROMFP[ idx ] = 0;
1655	    }
1656	}
1657      return;
1658    }
1659}
1660
1661static int
1662process_return_real (struct WalkContext *wctx, struct AdvWalkContext *cur, int cache_on)
1663{
1664  if ((unsigned long) cur->sp >= wctx->sbase ||
1665      (unsigned long) cur->sp < wctx->sp)
1666    {
1667      DprintfT (SP_DUMP_UNWIND, "unwind.c: not in stack: %p [0x%lX-0x%lX]\n",
1668		cur->sp, wctx->sp, wctx->sbase);
1669      return RA_FAILURE;
1670    }
1671
1672  unsigned long ra;
1673  if (cur->sp == cur->ra_loc)
1674    {
1675      ra = cur->ra_sav;
1676      cur->sp++;
1677    }
1678  else if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase)
1679    ra = *cur->sp++;
1680  else
1681    {
1682      DprintfT (SP_DUMP_UNWIND, "unwind.c: not safe: %p >= %p\n", cur->sp, cur->sp_safe);
1683      return RA_FAILURE;
1684    }
1685  if (ra == 0)
1686    {
1687      if (cache_on)
1688	cache_put (wctx, RA_EOSTCK);
1689      wctx->pc = ra;
1690      wctx->sp = (unsigned long) cur->sp;
1691      wctx->fp = (unsigned long) cur->fp;
1692      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d RA_END_OF_STACK\n", __LINE__);
1693      return RA_END_OF_STACK;
1694    }
1695
1696  unsigned long tbgn = wctx->tbgn;
1697  unsigned long tend = wctx->tend;
1698  if (ra < tbgn || ra >= tend)
1699    {
1700      if (!__collector_check_segment (ra, &tbgn, &tend, 0))
1701	{
1702	  DprintfT (SP_DUMP_UNWIND, "unwind.c: not in segment: 0x%lX [0x%lX-0x%lX]\n",
1703		    ra, wctx->tbgn, wctx->tend);
1704	  return RA_FAILURE;
1705	}
1706    }
1707
1708  if (cur->cval == RA_FROMFP)
1709    {
1710      if (wctx->fp == (unsigned long) (cur->sp - 2))
1711	{
1712	  if (cache_on)
1713	    cache_put (wctx, RA_FROMFP);
1714	}
1715      else
1716	cur->cval = 0;
1717    }
1718
1719  unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
1720  if (npc == 0)
1721    {
1722      if (cur->cval == RA_FROMFP)
1723	{
1724	  /* We have another evidence that we can trust this RA */
1725	  DprintfT (SP_DUMP_UNWIND, "unwind.c: trusted fp, pc = 0x%lX\n", wctx->pc);
1726	  wctx->pc = ra;
1727	}
1728      else
1729	{
1730	  DprintfT (SP_DUMP_UNWIND, "unwind.c: 0 after adjustment\n");
1731	  return RA_FAILURE;
1732	}
1733    }
1734  else
1735    wctx->pc = npc;
1736  wctx->sp = (unsigned long) cur->sp;
1737  wctx->fp = (unsigned long) cur->fp;
1738  wctx->tbgn = tbgn;
1739  wctx->tend = tend;
1740  return RA_SUCCESS;
1741}
1742
1743static int
1744process_return (struct WalkContext *wctx, struct AdvWalkContext *cur)
1745{
1746  return process_return_real (wctx, cur, 1);
1747}
1748
1749static void
1750omp_cache_put (unsigned long *cur_sp_safe, struct WalkContext * wctx_pc_save,
1751	       struct WalkContext *wctx, uint32_t val)
1752{
1753  if (omp_no_walk && (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL))
1754    {
1755      size_t sz = OmpValTableSize * sizeof (*OmpCurCtxs);
1756      OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
1757      sz = OmpValTableSize * sizeof (*OmpCtxs);
1758      OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
1759      sz = OmpValTableSize * sizeof (*OmpVals);
1760      OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1);
1761      sz = OmpValTableSize * sizeof (*OmpRAs);
1762      OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
1763    }
1764  if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
1765    return;
1766
1767#define USE_18434988_OMP_CACHE_WORKAROUND
1768#ifndef USE_18434988_OMP_CACHE_WORKAROUND
1769  uint64_t idx = wctx_pc_save->pc * ROOT_IDX;
1770  OmpVals[ idx % OmpValTableSize ] = val;
1771  idx = (idx + val) * ROOT_IDX;
1772  __collector_memcpy (&(OmpCurCtxs[ idx % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext));
1773  idx = (idx + val) * ROOT_IDX;
1774  __collector_memcpy (&(OmpCtxs[ idx % OmpValTableSize ]), wctx, sizeof (struct WalkContext));
1775#endif
1776  unsigned long *sp = NULL;
1777  unsigned long fp = wctx_pc_save->fp;
1778  int from_fp = 0;
1779  if (val == RA_END_OF_STACK)
1780    {
1781      sp = (unsigned long *) (wctx->sp);
1782      sp--;
1783      TprintfT (DBG_LT1, "omp_cache_put: get sp from EOS, sp=%p\n", sp);
1784    }
1785  else
1786    {
1787      if (fp < wctx_pc_save->sp || fp >= wctx_pc_save->sbase - sizeof (*sp))
1788	{
1789	  sp = (unsigned long *) (wctx->sp);
1790	  sp--;
1791	  TprintfT (DBG_LT1, "omp_cache_put: get sp from sp, sp=%p\n", sp);
1792	}
1793      else
1794	{
1795	  TprintfT (DBG_LT1, "omp_cache_put: get sp from fp=0x%lx\n", fp);
1796	  sp = (unsigned long *) fp;
1797	  from_fp = 1;
1798	}
1799    }
1800
1801  if (sp < cur_sp_safe || ((unsigned long) sp >= wctx->sbase))
1802    return;
1803
1804  unsigned long ra = *sp++;
1805  if (from_fp)
1806    {
1807      unsigned long tbgn = wctx_pc_save->tbgn;
1808      unsigned long tend = wctx_pc_save->tend;
1809      if (ra < tbgn || ra >= tend)
1810	{
1811	  sp = (unsigned long *) (wctx->sp);
1812	  sp--;
1813	  ra = *sp++;
1814	}
1815    }
1816#ifdef USE_18434988_OMP_CACHE_WORKAROUND
1817  uint64_t idx1 = wctx_pc_save->pc * ROOT_IDX;
1818  uint64_t idx2 = (idx1 + val) * ROOT_IDX;
1819  uint64_t idx3 = (idx2 + val) * ROOT_IDX;
1820  uint64_t idx4 = (idx3 + val) * ROOT_IDX;
1821  OmpRAs [ idx4 % OmpValTableSize ] = 0; // lock
1822  OmpVals[ idx1 % OmpValTableSize ] = val;
1823  __collector_memcpy (&(OmpCurCtxs[ idx2 % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext));
1824  __collector_memcpy (&(OmpCtxs [ idx3 % OmpValTableSize ]), wctx, sizeof (struct WalkContext));
1825  OmpRAs [ idx4 % OmpValTableSize ] = ra;
1826#else
1827  idx = (idx + val) * ROOT_IDX;
1828  OmpRAs[ idx % OmpValTableSize ] = ra;
1829#endif
1830  TprintfT (DBG_LT1, "omp_cache_put: pc=0x%lx\n", wctx_pc_save->pc);
1831}
1832
1833/*
1834 *  See bug 17166877 - malloc_internal unwind failure.
1835 *  Sometimes there are several calls right after ret, like:
1836 *      leave
1837 *      ret
1838 *      call xxx
1839 *      call xxxx
1840 *      call xxxxx
1841 *  If they are also jump targets, we should better not
1842 *  create new jump context for those, since they may
1843 *  end up into some other function.
1844 */
1845static int
1846is_after_ret (unsigned char * npc)
1847{
1848  if (*npc != 0xe8)
1849    return 0;
1850  unsigned char * onpc = npc;
1851  int ncall = 1;
1852  int maxsteps = 10;
1853  int mincalls = 3;
1854  int steps = 0;
1855  while (*(npc - 5) == 0xe8 && steps < maxsteps)
1856    {
1857      npc -= 5;
1858      ncall++;
1859      steps++;
1860    }
1861  if (*(npc - 1) != 0xc3 || *(npc - 2) != 0xc9)
1862    return 0;
1863  steps = 0;
1864  while (*(onpc + 5) == 0xe8 && steps < maxsteps)
1865    {
1866      onpc += 5;
1867      ncall++;
1868      steps++;
1869    }
1870  if (ncall < mincalls)
1871    return 0;
1872  return 1;
1873}
1874
1875static int
1876find_i386_ret_addr (struct WalkContext *wctx, int do_walk)
1877{
1878  if (wctx->sp == 0)
1879    // Some artificial contexts may have %sp set to 0. See SETFUNCTIONCONTEXT()
1880    return RA_FAILURE;
1881
1882  /* Check cached values */
1883  int retc = cache_get (wctx);
1884  if (retc != RA_FAILURE)
1885    return retc;
1886
1887  /* An attempt to perform code analysis for call stack tracing */
1888  unsigned char opcode;
1889  unsigned char extop;
1890  unsigned char extop2;
1891  unsigned char modrm;
1892  int imm8; /* immediate operand, byte */
1893  int immv; /* immediate operand, word(2) or doubleword(4) */
1894  int reg; /* register code */
1895
1896  /* Buffer for branch targets (analysis stoppers) */
1897  unsigned char *targets[MAXTRGTS];
1898  int ntrg = 0; /* number of entries in the table */
1899  targets[ntrg++] = (unsigned char*) wctx->pc;
1900  targets[ntrg++] = (unsigned char*) - 1;
1901
1902  struct AdvWalkContext buf[MAXCTX];
1903  struct AdvWalkContext *cur = buf;
1904  CALL_UTIL (memset)((void*) cur, 0, sizeof (*cur));
1905
1906  cur->pc = (unsigned char*) wctx->pc;
1907  cur->sp = (unsigned long*) wctx->sp;
1908  cur->sp_safe = cur->sp - RED_ZONE; /* allow for the 128-byte red zone on amd64 */
1909  cur->fp = (unsigned long*) wctx->fp;
1910  cur->tidx = 1;
1911  DprintfT (SP_DUMP_UNWIND, "\nstack_unwind (x86 walk):%d %p start\n", __LINE__, cur->pc);
1912
1913  int nctx = 1; /* number of contexts being processed */
1914  int cnt = 8192; /* number of instructions to analyse */
1915
1916  /*
1917   * The basic idea of our x86 stack unwind is that we don't know
1918   * if we can trust the frame-pointer register.  So we walk
1919   * instructions to find a return instruction, at which point
1920   * we know the return address is on the top of the stack, etc.
1921   *
1922   * A severe challenge to walking x86 instructions is when we
1923   * encounter "jmp *(reg)" instructions, where we are expected
1924   * to jump to the (unknown-to-us) contents of a register.
1925   *
1926   * The "jmp_reg" code here attempts to keep track of the
1927   * context for such a jump, deferring any handling of such
1928   * a difficult case.  We continue with other contexts, hoping
1929   * that some other walk will take us to a return instruction.
1930   *
1931   * If no other walk helps, we return to "jmp_reg" contexts.
1932   * While we don't know the jump target, it is possible that the
1933   * bytes immediately following the jmp_reg instruction represent
1934   * one possible target, as might be the case when a "switch"
1935   * statement is compiled.
1936   *
1937   * Unfortunately, the bytes following a "jmp_reg" instruction might
1938   * instead be a jump target from somewhere else -- execution might
1939   * never "fall through" from the preceding "jmp_reg".  Those bytes
1940   * might not even be instructions at all.  There are many uses of
1941   * jmp_reg instructions beyond just compiling switch statements.
1942   *
1943   * So walking the bytes after a "jmp_reg" instruction can lead
1944   * to bugs and undefined behavior, including SEGV and core dump.
1945   *
1946   * We currently do not really understand the "jmp_reg" code below.
1947   */
1948  int jmp_reg_switch_mode = 0;
1949  int num_jmp_reg = 0; // number of jmp *reg met when switch mode is off or when in current switch case
1950  int total_num_jmp_reg = 0; // number of total jmp *reg met
1951  struct AdvWalkContext * jmp_reg_ctx[MAXJMPREG]; // context of jmp *reg met when switch mode is off or when in current switch case
1952  struct AdvWalkContext * jmp_reg_switch_ctx[MAXJMPREG]; // context of jmp *reg used in switch cases
1953  struct AdvWalkContext * jmp_reg_switch_backup_ctx = NULL; // context of the first jmp *reg used in switch cases
1954
1955  int cur_jmp_reg_switch = 0; // current switch table
1956  int num_jmp_reg_switch = 0; // number of switch table
1957  int jmp_reg_switch_case = 0; // case number in current switch table
1958  unsigned char * jmp_reg_switch_pc = NULL; // the start pc of current switch case
1959  unsigned char * jmp_reg_switch_pc_old = NULL; // backup for deleteing context of jump target
1960  unsigned char * jmp_reg_switch_base = NULL; // start pc for checking offsets
1961  int max_jmp_reg_switch_case = 2;
1962#if WSIZE(32)
1963  int max_switch_pc_offset = 512;
1964#else // WSIZE(64)
1965  int max_switch_pc_offset = 1024;
1966#endif
1967  int expected_num_jmp_reg = 1; // should be smaller than MAXJMPREG
1968  int max_num_jmp_reg_seen = 4; // try to resolve return if there are so many such instructions
1969
1970
1971  int save_ctx = 0; // flag to save walk context in the cache to speed up unwind
1972  struct WalkContext wctx_pc_save;
1973  if (do_walk == 0)
1974    // do_walk is the flag indicating not walking through the instructions, resolving the RA from the stack fp first
1975    __collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext));
1976
1977startWalk:
1978  if (do_walk == 0)
1979    { // try to resolve RA from stack frame pointer
1980      if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
1981	{
1982	  do_walk = 1;
1983	  goto startWalk;
1984	}
1985      // before goto checkFP, try the RA from cache (key: WalkContext -> value: caller's WalkContext))
1986      uint64_t idx = wctx->pc * ROOT_IDX;
1987      uint32_t val = OmpVals[idx % OmpValTableSize];
1988      idx = (idx + val) * ROOT_IDX;
1989#ifdef USE_18434988_OMP_CACHE_WORKAROUND
1990      // Check ra: if it is 0 - then cache is invalid
1991      uint64_t idx4;
1992      idx4 = (idx + val) * ROOT_IDX;
1993      idx4 = (idx4 + val) * ROOT_IDX;
1994      if (0 == OmpRAs[ idx4 % OmpValTableSize ])  // Invalid cache
1995	goto checkFP;
1996#endif
1997      struct WalkContext saved_ctx;
1998      __collector_memcpy (&saved_ctx, &OmpCurCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
1999      if (wctx->pc == saved_ctx.pc
2000	  && wctx->sp == saved_ctx.sp
2001	  && wctx->fp == saved_ctx.fp
2002	  && wctx->tbgn == saved_ctx.tbgn
2003	  && wctx->tend == saved_ctx.tend)
2004	{ // key match, RA may be valid
2005	  idx = (idx + val) * ROOT_IDX;
2006	  unsigned long *sp = NULL;
2007	  unsigned long fp = wctx->fp;
2008	  int from_fp = 0;
2009	  if (val == RA_END_OF_STACK)
2010	    {
2011	      DprintfT (SP_DUMP_UNWIND, "find_i386_ret_addr:%d -- RA_END_OF_STACK: pc=0x%lx\n", __LINE__, wctx->pc);
2012	      __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2013	      return val;
2014	    }
2015	  else
2016	    {
2017	      if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp))
2018		{
2019		  TprintfT (DBG_LT1, "omp_cache_get -- wrong fp: pc=0x%lx\n", wctx->pc);
2020		  sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp);
2021		  sp--;
2022		  if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase)
2023		    {
2024		      goto checkFP;
2025		    }
2026		  unsigned long ra = *sp;
2027		  uint64_t idx2 = (idx + val) * ROOT_IDX;
2028		  if (OmpRAs[ idx2 % OmpValTableSize ] == ra)
2029		    {
2030		      __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2031		      TprintfT (DBG_LT1, "omp_cache_get -- ra match with target sp: pc=0x%lx, ra=0x%lx, val=%d\n", wctx->pc, ra, val);
2032		      return val;
2033		    }
2034		  TprintfT (DBG_LT1, "omp_cache_get -- ra mismatch: ra=0x%lx, expected ra=0x%lx, val=%d\n", ra, OmpRAs[ idx2 % OmpValTableSize ], val);
2035		  goto checkFP;
2036		}
2037	      sp = (unsigned long *) fp;
2038	      from_fp = 1;
2039	    }
2040
2041	  uint64_t idx2 = (idx + val) * ROOT_IDX;
2042	  unsigned long ra = *sp++;
2043	  if (from_fp)
2044	    {
2045	      unsigned long tbgn = wctx->tbgn;
2046	      unsigned long tend = wctx->tend;
2047	      if (ra < tbgn || ra >= tend)
2048		{
2049		  sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp);
2050		  sp--;
2051		  //if (sp < cur->sp_safe - 16 || (unsigned long)sp >= wctx->sbase - sizeof(*sp)) {
2052		  // The check above was replaced with the check below,
2053		  // because we do not know why "- 16" and "- sizeof(*sp)" was used.
2054		  if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase)
2055		    goto checkFP;
2056		  else
2057		    ra = *sp;
2058		}
2059	    }
2060	  if (OmpRAs[ idx2 % OmpValTableSize ] == ra)
2061	    {
2062	      TprintfT (DBG_LT1, "omp_cache_get -- ra match: pc=0x%lx\n", wctx->pc);
2063	      __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2064	      return val;
2065	    }
2066	}
2067      goto checkFP;
2068    }
2069  else
2070    {
2071      CALL_UTIL (memset)(jmp_reg_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *));
2072      CALL_UTIL (memset)(jmp_reg_switch_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *));
2073    }
2074  while (cnt--)
2075    {
2076      if (nctx == 0 && (num_jmp_reg == expected_num_jmp_reg || jmp_reg_switch_mode == 1))
2077	{ // no context available, try jmp switch mode
2078	  int i = 0;
2079	  if (num_jmp_reg == expected_num_jmp_reg)
2080	    jmp_reg_switch_mode = 0; // first jmp reg expected, restart switch mode
2081	  DprintfT (SP_DUMP_UNWIND, "unwind.c: begin switch mode, num_jmp_reg = %d, jmp_reg_switch_backup_ctx=%p, jmp_reg_switch_case=%d, jmp_reg_switch_mode=%d.\n",
2082		    num_jmp_reg, jmp_reg_switch_backup_ctx, jmp_reg_switch_case, jmp_reg_switch_mode);
2083	  // the ideal asm of switch is
2084	  //   jmp reg
2085	  //   ...//case 1
2086	  //   ret
2087	  //   ...//case 2
2088	  //   ret
2089	  //   ...//etc
2090	  if (jmp_reg_switch_mode == 0)
2091	    {
2092	      num_jmp_reg_switch = num_jmp_reg; // backup num_jmp_reg
2093	      jmp_reg_switch_mode = 1; // begin switch mode
2094	      for (i = 0; i < num_jmp_reg_switch; i++)
2095		{
2096		  if (jmp_reg_switch_ctx[i] == NULL)
2097		    jmp_reg_switch_ctx[i] = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_ctx[i]));
2098		  if (jmp_reg_switch_ctx[i] != NULL)
2099		    { // backup jmp_reg_ctx
2100		      __collector_memcpy (jmp_reg_switch_ctx[i], jmp_reg_ctx[i], sizeof (*jmp_reg_switch_ctx[i]));
2101		      cur_jmp_reg_switch = 0; // reset the current switch table
2102		      jmp_reg_switch_case = 0; // reset the case number in current switch table
2103		    }
2104		}
2105	      if (jmp_reg_switch_backup_ctx == NULL)
2106		{ // only backup when the first jmp *reg is met for restoring later, if switch mode fails to resolve RA
2107		  jmp_reg_switch_backup_ctx = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_backup_ctx));
2108		  if (jmp_reg_switch_backup_ctx != NULL)
2109		    __collector_memcpy (jmp_reg_switch_backup_ctx, cur, sizeof (*cur));
2110		  DprintfT (SP_DUMP_UNWIND, "unwind.c: back up context for switch mode.\n");
2111		}
2112	    }
2113	  if (jmp_reg_switch_mode == 1)
2114	    { // in the process of trying switch cases
2115	      if (cur_jmp_reg_switch == num_jmp_reg_switch)
2116		{
2117		  DprintfT (SP_DUMP_UNWIND, "unwind.c: have tried all switch with max_jmp_reg_switch_case for each\n");
2118		  if (jmp_reg_switch_backup_ctx != NULL)
2119		    __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
2120		  int rc = process_return_real (wctx, cur, 0);
2121		  if (rc == RA_SUCCESS)
2122		    {
2123		      if (save_ctx)
2124			omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
2125		      return rc;
2126		    }
2127		  break; // have tried all switch with max_jmp_reg_switch_case for each, goto checkFP
2128		}
2129	      unsigned char *npc = jmp_reg_switch_ctx[cur_jmp_reg_switch]->pc;
2130	      if (jmp_reg_switch_case == 0)
2131		// first switch case
2132		npc = check_modrm (npc); // pc next to "jmp reg" instruction
2133	      else if (jmp_reg_switch_pc != NULL)
2134		npc = jmp_reg_switch_pc; // // pc next to "ret" instruction of previous case
2135	      else
2136		{
2137		  DprintfT (SP_DUMP_UNWIND, "unwind.c: unexpected jum switch mode situation, jmp_reg_switch_case=%d, jmp_reg_switch_pc=%p\n",
2138			    jmp_reg_switch_case, jmp_reg_switch_pc);
2139		  break; //goto checkFP
2140		}
2141	      jmp_reg_switch_base = npc;
2142	      struct AdvWalkContext *new = buf + nctx;
2143	      nctx += 1;
2144	      __collector_memcpy (new, jmp_reg_switch_ctx[cur_jmp_reg_switch], sizeof (*new));
2145	      new->pc = npc;
2146	      cur = new; /* advance the new context first */
2147	      jmp_reg_switch_pc = NULL;
2148	      jmp_reg_switch_case++;
2149	      if (jmp_reg_switch_case == max_jmp_reg_switch_case)
2150		{ // done many cases, change to another switch table
2151		  cur_jmp_reg_switch++;
2152		  jmp_reg_switch_case = 0;
2153		}
2154	    }
2155	  num_jmp_reg = 0;
2156	}
2157      if (jmp_reg_switch_mode == 1)
2158	{ // when processing switch cases, check pc each time
2159	  unsigned long tbgn = wctx->tbgn;
2160	  unsigned long tend = wctx->tend;
2161	  if ((unsigned long) (cur->pc) < tbgn || (unsigned long) (cur->pc) >= tend)
2162	    {
2163	      DprintfT (SP_DUMP_UNWIND, "unwind.c: pc out of range, pc=0x%lx\n", (unsigned long) (cur->pc));
2164	      break;
2165	    }
2166	  if (jmp_reg_switch_base != NULL && cur->pc > jmp_reg_switch_base + max_switch_pc_offset)
2167	    {
2168	      DprintfT (SP_DUMP_UNWIND, "unwind.c: limit the walk offset after jmp reg instruction\n");
2169	      if (jmp_reg_switch_backup_ctx != NULL)
2170		__collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
2171	      int rc = process_return_real (wctx, cur, 0);
2172	      if (rc == RA_SUCCESS)
2173		{
2174		  if (save_ctx)
2175		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
2176		  return rc;
2177		}
2178	      break; // limit the walk offset after jmp reg instruction, got checkFP
2179	    }
2180	}
2181
2182      if (nctx == 0)
2183	break;
2184//      dump_targets (__LINE__, ntrg, targets);
2185      while (cur->pc > targets[cur->tidx])
2186	cur->tidx += 1;
2187      if (cur->pc == targets[cur->tidx])
2188	{
2189	  /* Stop analysis. Delete context. */
2190	  if (jmp_reg_switch_mode == 0 || cur->pc != jmp_reg_switch_pc_old)
2191	    {
2192	      if (jmp_reg_switch_mode == 1 && nctx == 1 && jmp_reg_switch_pc == NULL)
2193		{
2194		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d old target, cur->pc=%p, jmp_reg_switch_pc=%p, nctx=%d\n",
2195			    __LINE__, cur->pc, jmp_reg_switch_pc, nctx);
2196		  jmp_reg_switch_pc = cur->pc; // save cp before delete context, may be used as a start of switch case
2197		  jmp_reg_switch_pc_old = jmp_reg_switch_pc;
2198		}
2199	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, old target.\n", __LINE__);
2200	      DELETE_CURCTX ();
2201	      if (cur >= buf + nctx)
2202		cur = buf;
2203	      continue;
2204	    }
2205	  if (jmp_reg_switch_mode == 1 && cur->pc == jmp_reg_switch_pc_old)
2206	    jmp_reg_switch_pc_old = NULL; // reset jmp_reg_switch_pc_old to delete the context later when cur->pc != jmp_reg_switch_pc_old
2207	}
2208
2209      /* let's walk the next x86 instruction */
2210      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cur:%ld pc=0x%lx %02x %02x %02x %02x %02x %02x %02x sp=0x%lx\n",
2211	       __LINE__, (long) (cur - buf), (unsigned long) cur->pc,
2212	       (int) cur->pc[0], (int) cur->pc[1], (int) cur->pc[2],
2213	       (int) cur->pc[3], (int) cur->pc[4], (int) cur->pc[5],
2214	       (int) cur->pc[6], (unsigned long) cur->sp);
2215      int v = 4; /* Operand size */
2216      int a = 4; /* Address size */
2217      /* int W = 0;	   REX.W bit */
2218#if WSIZE(64)
2219      int R = 0; /* REX.R bit */
2220#endif
2221      int X = 0; /* REX.X bit */
2222      int B = 0; /* REX.B bit */
2223      /* Check prefixes */
2224      int done = 0;
2225      while (!done)
2226	{
2227	  opcode = *cur->pc++;
2228	  switch (opcode)
2229	    {
2230	    case 0x66: /* opd size override */
2231	      v = 2;
2232	      break;
2233	    case 0x67: /*addr size override */
2234	      a = 2;
2235	      break;
2236#if WSIZE(64)
2237	    case 0x40: /* REX */
2238	    case 0x41:
2239	    case 0x42:
2240	    case 0x43:
2241	    case 0x44:
2242	    case 0x45:
2243	    case 0x46:
2244	    case 0x47:
2245	    case 0x48:
2246	    case 0x49:
2247	    case 0x4a:
2248	    case 0x4b:
2249	    case 0x4c:
2250	    case 0x4d:
2251	    case 0x4e:
2252	    case 0x4f:
2253	      B = (opcode & 0x1) ? 8 : 0;
2254	      X = (opcode & 0x2) ? 8 : 0;
2255	      R = (opcode & 0x4) ? 8 : 0;
2256	      if (opcode & 0x8)  /* 64 bit operand size */
2257		v = 8;
2258	      opcode = *cur->pc++;
2259	      done = 1;
2260	      break;
2261#endif
2262	    default:
2263	      done = 1;
2264	      break;
2265	    }
2266	}
2267      int z = (v == 8) ? 4 : v;
2268      switch (opcode)
2269	{
2270	case 0x0: /* add Eb,Gb */
2271	case 0x01: /* add Ev,Gv */
2272	case 0x02: /* add Gb,Eb */
2273	case 0x03: /* add Gv,Ev */
2274	  cur->pc = check_modrm (cur->pc);
2275	  break;
2276	case 0x04: /* add %al,Ib */
2277	  cur->pc += 1;
2278	  break;
2279	case 0x05: /* add %eax,Iz */
2280	  cur->pc += z;
2281	  break;
2282	case 0x06: /* push es */
2283	  cur->sp -= 1;
2284	  break;
2285	case 0x07: /* pop es */
2286	  cur->sp += 1;
2287	  if (cur->sp - RED_ZONE > cur->sp_safe)
2288	    cur->sp_safe = cur->sp - RED_ZONE;
2289	  break;
2290	case 0x08: /* or Eb,Gb */
2291	case 0x09: /* or Ev,Gv */
2292	case 0x0a: /* or Gb,Eb */
2293	case 0x0b: /* or Gv,Ev */
2294	  cur->pc = check_modrm (cur->pc);
2295	  break;
2296	case 0x0c: /* or %al,Ib */
2297	  cur->pc += 1;
2298	  break;
2299	case 0x0d: /* or %eax,Iz */
2300	  cur->pc += z;
2301	  break;
2302	case 0x0e: /* push cs */
2303	  cur->sp -= 1;
2304	  break;
2305	case 0x0f: /* two-byte opcodes */
2306	  extop = *cur->pc++;
2307	  switch (extop)
2308	    { /* RTM or HLE */
2309	    case 0x01:
2310	      extop2 = *cur->pc;
2311	      switch (extop2)
2312		{
2313		case 0xd5: /* xend */
2314		case 0xd6: /* xtest */
2315		  cur->pc++;
2316		  break;
2317		default:
2318		  break;
2319		}
2320	      break;
2321	    case 0x03:
2322	      cur->pc = check_modrm (cur->pc);
2323	      break;
2324	    case 0x0b:
2325	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, undefined instruction. opcode=0x%02x\n",
2326		       __LINE__, (int) opcode);
2327	      DELETE_CURCTX ();
2328	      break;
2329	    case 0x05: /* syscall */
2330	    case 0x34: /* sysenter */
2331	      if (cur->rax == __NR_exit)
2332		{
2333		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n",
2334			   __LINE__, (int) opcode);
2335		  DELETE_CURCTX ();
2336		  break;
2337		}
2338	      else if (cur->rax == __NR_rt_sigreturn)
2339		{
2340		  if (jmp_reg_switch_mode == 1)
2341		    {
2342		      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0x%02x\n",
2343			       __LINE__, (int) opcode);
2344		      goto checkFP;
2345		    }
2346		  wctx->sp = (unsigned long) cur->sp;
2347		  if (save_ctx)
2348		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN);
2349		  return RA_RT_SIGRETURN;
2350		}
2351#if WSIZE(32)
2352	      else if (cur->rax == __NR_sigreturn)
2353		{
2354		  if (jmp_reg_switch_mode == 1)
2355		    {
2356		      DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0x34\n");
2357		      goto checkFP;
2358		    }
2359		  wctx->sp = (unsigned long) cur->sp;
2360		  if (save_ctx)
2361		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN);
2362		  return RA_SIGRETURN;
2363		}
2364#endif
2365	      /* Check for Linus' trick in the vsyscall page */
2366	      while (*cur->pc == 0x90)  /* nop */
2367		cur->pc++;
2368	      if (*cur->pc == 0xeb)  /* jmp imm8 */
2369		cur->pc += 2;
2370	      break;
2371	    case 0x0d: /* nop Ev */
2372	      cur->pc = check_modrm (cur->pc);
2373	      break;
2374	    case 0x10: /* xmm Vq,Wq */
2375	    case 0x11:
2376	    case 0x12:
2377	    case 0x13:
2378	    case 0x14:
2379	    case 0x15:
2380	    case 0x16:
2381	    case 0x17:
2382	      cur->pc = check_modrm (cur->pc);
2383	      break;
2384	    case 0x18: /* prefetch */
2385	      cur->pc = check_modrm (cur->pc);
2386	      break;
2387	    case 0x1E: /* endbr64/endbr32 (f3 0f 1e .. ) is parsing as repz nop edx */
2388	      cur->pc += 2;
2389	      break;
2390	    case 0x1f: /* nop Ev */
2391	      cur->pc = check_modrm (cur->pc);
2392	      break;
2393	    case 0x28: /* xmm Vq,Wq */
2394	    case 0x29:
2395	    case 0x2a:
2396	    case 0x2b:
2397	    case 0x2c:
2398	    case 0x2d:
2399	    case 0x2e:
2400	    case 0x2f:
2401	      cur->pc = check_modrm (cur->pc);
2402	      break;
2403	    case 0x30: /* wrmsr */
2404	    case 0x31: /* rdtsc */
2405	    case 0x32: /* rdmsr */
2406	    case 0x33: /* rdpmc */
2407	      break;
2408	      /* case 0x34: sysenter (see above) */
2409	    case 0x38: case 0x3a:
2410	      extop2 = *cur->pc++;
2411	      cur->pc = check_modrm (cur->pc);
2412	      // 21275311 Unwind failure in native stack for java application running on jdk8
2413	      // Three-byte opcodes "66 0f 3a ??" should consume an additional "immediate" byte.
2414	      if (extop == 0x3a)
2415		cur->pc++;
2416	      break;
2417	    case 0x40: case 0x41: case 0x42: case 0x43: /* CMOVcc Gv,Ev */
2418	    case 0x44: case 0x45: case 0x46: case 0x47:
2419	    case 0x48: case 0x49: case 0x4a: case 0x4b:
2420	    case 0x4c: case 0x4d: case 0x4e: case 0x4f:
2421	      cur->pc = check_modrm (cur->pc);
2422	      break;
2423	    case 0x50: case 0x51: case 0x52: case 0x53:
2424	    case 0x54: case 0x55: case 0x56: case 0x57:
2425	    case 0x58: case 0x59: case 0x5a: case 0x5b:
2426	    case 0x5c: case 0x5d: case 0x5e: case 0x5f:
2427	    case 0x60: case 0x61: case 0x62: case 0x63:
2428	    case 0x64: case 0x65: case 0x66: case 0x67:
2429	    case 0x68: case 0x69: case 0x6a: case 0x6b:
2430	    case 0x6c: case 0x6d: case 0x6e: case 0x6f:
2431	      cur->pc = check_modrm (cur->pc);
2432	      break;
2433	    case 0x70: case 0x71: case 0x72: case 0x73:
2434	      cur->pc = check_modrm (cur->pc) + 1;
2435	      break;
2436	    case 0x74: case 0x75: case 0x76:
2437	      cur->pc = check_modrm (cur->pc);
2438	      break;
2439	    case 0x77:
2440	      break;
2441	    case 0x7c: case 0x7d: case 0x7e: case 0x7f:
2442	      cur->pc = check_modrm (cur->pc);
2443	      break;
2444	    case 0x80: case 0x81: case 0x82: case 0x83: /* Jcc Jz */
2445	    case 0x84: case 0x85: case 0x86: case 0x87:
2446	    case 0x88: case 0x89: case 0x8a: case 0x8b:
2447	    case 0x8c: case 0x8d: case 0x8e: case 0x8f:
2448	      immv = read_int (cur->pc, z);
2449	      cur->pc += z;
2450	      if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
2451		{
2452		  int tidx = 0;
2453		  unsigned char *npc = cur->pc + immv;
2454		  if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend)
2455		    {
2456		      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n",
2457			       __LINE__, (int) opcode);
2458		      DELETE_CURCTX ();
2459		      break;
2460		    }
2461		  if (is_after_ret (npc))
2462		    break;
2463		  while (npc > targets[tidx])
2464		    tidx += 1;
2465		  if (npc != targets[tidx])
2466		    {
2467		      if (ntrg < MAXTRGTS)
2468			{
2469			  for (int i = 0; i < nctx; i++)
2470			    if (buf[i].tidx >= tidx)
2471			      buf[i].tidx++;
2472
2473			  /* insert a new target */
2474			  for (int i = ntrg; i > tidx; i--)
2475			    targets[i] = targets[i - 1];
2476			  ntrg += 1;
2477			  targets[tidx++] = npc;
2478			}
2479		      else
2480			DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg=max(%d)\n",
2481				  __LINE__, ntrg);
2482		      struct AdvWalkContext *new = buf + nctx;
2483		      nctx += 1;
2484		      __collector_memcpy (new, cur, sizeof (*new));
2485		      new->pc = npc;
2486		      new->tidx = tidx;
2487		      cur = new; /* advance the new context first */
2488		      continue;
2489		    }
2490		}
2491	      else
2492		DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx=max(%d)\n",
2493			  __LINE__, ntrg);
2494	      break;
2495	    case 0x90: case 0x91: case 0x92: case 0x93: /* setcc Eb */
2496	    case 0x94: case 0x95: case 0x96: case 0x97:
2497	    case 0x98: case 0x99: case 0x9a: case 0x9b:
2498	    case 0x9c: case 0x9d: case 0x9e: case 0x9f:
2499	      cur->pc = check_modrm (cur->pc);
2500	      break;
2501	    case 0xa0: /* push fs */
2502	      cur->sp -= 1;
2503	      break;
2504	    case 0xa1: /* pop fs */
2505	      cur->sp += 1;
2506	      if (cur->sp - RED_ZONE > cur->sp_safe)
2507		cur->sp_safe = cur->sp - RED_ZONE;
2508	      break;
2509	    case 0xa2: /* cpuid */
2510	      break;
2511	    case 0xa3: /* bt Ev,Gv */
2512	      cur->pc = check_modrm (cur->pc);
2513	      break;
2514	    case 0xa4: /* shld Ev,Gv,Ib */
2515	      cur->pc = check_modrm (cur->pc);
2516	      cur->pc += 1;
2517	      break;
2518	    case 0xa5: /* shld Ev,Gv,%cl */
2519	      cur->pc = check_modrm (cur->pc);
2520	      break;
2521	    case 0xa8: /* push gs */
2522	      cur->sp -= 1;
2523	      break;
2524	    case 0xa9: /* pop gs */
2525	      cur->sp += 1;
2526	      if (cur->sp - RED_ZONE > cur->sp_safe)
2527		cur->sp_safe = cur->sp - RED_ZONE;
2528	      break;
2529	    case 0xaa: /* rsm */
2530	      break;
2531	    case 0xab: /* bts Ev,Gv */
2532	      cur->pc = check_modrm (cur->pc);
2533	      break;
2534	    case 0xac: /* shrd Ev,Gv,Ib */
2535	      cur->pc = check_modrm (cur->pc);
2536	      cur->pc += 1;
2537	      break;
2538	    case 0xad: /* shrd Ev,Gv,%cl */
2539	      cur->pc = check_modrm (cur->pc);
2540	      break;
2541	    case 0xae: /* group15 */
2542	      cur->pc = check_modrm (cur->pc);
2543	      break;
2544	    case 0xaf: /* imul Gv,Ev */
2545	      cur->pc = check_modrm (cur->pc);
2546	      break;
2547	    case 0xb1: /* cmpxchg Ev,Gv */
2548	      cur->pc = check_modrm (cur->pc);
2549	      break;
2550	    case 0xb3:
2551	    case 0xb6: /* movzx Gv,Eb */
2552	    case 0xb7: /* movzx Gv,Ew */
2553	      cur->pc = check_modrm (cur->pc);
2554	      break;
2555	    case 0xba: /* group8 Ev,Ib */
2556	      cur->pc = check_modrm (cur->pc);
2557	      cur->pc += 1;
2558	      break;
2559	    case 0xbb: /* btc Ev,Gv */
2560	    case 0xbc: /* bsf Gv,Ev */
2561	    case 0xbd: /* bsr Gv,Ev */
2562	      cur->pc = check_modrm (cur->pc);
2563	      break;
2564	    case 0xbe: /* movsx Gv,Eb */
2565	    case 0xbf: /* movsx Gv,Ew */
2566	      cur->pc = check_modrm (cur->pc);
2567	      break;
2568	    case 0xc0: /* xadd Eb,Gb */
2569	    case 0xc1: /* xadd Ev,Gv */
2570	      cur->pc = check_modrm (cur->pc);
2571	      break;
2572	    case 0xc2: /* cmpps V,W,Ib */
2573	      cur->pc = check_modrm (cur->pc);
2574	      cur->pc += 1;
2575	      break;
2576	    case 0xc3: /* movnti M,G */
2577	      cur->pc = check_modrm (cur->pc);
2578	      break;
2579	    case 0xc6: /* shufps V,W,Ib */
2580	      cur->pc = check_modrm (cur->pc);
2581	      cur->pc += 1;
2582	      break;
2583	    case 0xc7: /* RDRAND */
2584	      cur->pc = check_modrm (cur->pc);
2585	      break;
2586	    case 0xc8: case 0xc9: case 0xca: case 0xcb: /* bswap */
2587	    case 0xcc: case 0xcd: case 0xce: case 0xcf:
2588	      break;
2589	    case 0xd0: case 0xd1: case 0xd2: case 0xd3:
2590	    case 0xd4: case 0xd5: case 0xd6: case 0xd7:
2591	    case 0xd8: case 0xd9: case 0xda: case 0xdb:
2592	    case 0xdc: case 0xdd: case 0xde: case 0xdf:
2593	    case 0xe0: case 0xe1: case 0xe2: case 0xe3:
2594	    case 0xe4: case 0xe5: case 0xe6: case 0xe7:
2595	    case 0xe8: case 0xe9: case 0xea: case 0xeb:
2596	    case 0xec: case 0xed: case 0xee: case 0xef:
2597	    case 0xf0: case 0xf1: case 0xf2: case 0xf3:
2598	    case 0xf4: case 0xf5: case 0xf6: case 0xf7:
2599	    case 0xf8: case 0xf9: case 0xfa: case 0xfb:
2600	    case 0xfc: case 0xfd: case 0xfe: case 0xff:
2601	      cur->pc = check_modrm (cur->pc);
2602	      break;
2603	    default:
2604	      if (jmp_reg_switch_mode == 1 && extop == 0x0b)
2605		DprintfT (SP_DUMP_UNWIND, "unwind.c:%d invalid opcode ub2: 0x0f %x jmp_reg_switch_mode=%d\n",
2606			  __LINE__, (int) extop, jmp_reg_switch_mode);
2607	      else
2608		{
2609		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x0f %x jmp_reg_switch_mode=%d\n",
2610			    __LINE__, (int) extop, jmp_reg_switch_mode);
2611		  DELETE_CURCTX ();
2612		}
2613	      break;
2614	    }
2615	  break;
2616	case 0x10: /* adc Eb,Gb */
2617	case 0x11: /* adc Ev,Gv */
2618	case 0x12: /* adc Gb,Eb */
2619	case 0x13: /* adc Gv,Ev */
2620	  cur->pc = check_modrm (cur->pc);
2621	  break;
2622	case 0x14: /* adc %al,Ib */
2623	  cur->pc += 1;
2624	  break;
2625	case 0x15: /* adc %eax,Iz */
2626	  cur->pc += z;
2627	  break;
2628	case 0x16: /* push ss */
2629	  cur->sp -= 1;
2630	  break;
2631	case 0x17: /* pop ss */
2632	  cur->sp += 1;
2633	  if (cur->sp - RED_ZONE > cur->sp_safe)
2634	    cur->sp_safe = cur->sp - RED_ZONE;
2635	  break;
2636	case 0x18: /* sbb Eb,Gb */
2637	case 0x19: /* sbb Ev,Gv */
2638	case 0x1a: /* sbb Gb,Eb */
2639	case 0x1b: /* sbb Gv,Ev */
2640	  cur->pc = check_modrm (cur->pc);
2641	  break;
2642	case 0x1c: /* sbb %al,Ib */
2643	  cur->pc += 1;
2644	  break;
2645	case 0x1d: /* sbb %eax,Iz */
2646	  cur->pc += z;
2647	  break;
2648	case 0x1e: /* push ds */
2649	  cur->sp -= 1;
2650	  break;
2651	case 0x1f: /* pop ds */
2652	  cur->sp += 1;
2653	  if (cur->sp - RED_ZONE > cur->sp_safe)
2654	    cur->sp_safe = cur->sp - RED_ZONE;
2655	  break;
2656	case 0x20: /* and Eb,Gb */
2657	case 0x21: /* and Ev,Gv */
2658	case 0x22: /* and Gb,Eb */
2659	case 0x23: /* and Gv,Ev */
2660	  cur->pc = check_modrm (cur->pc);
2661	  break;
2662	case 0x24: /* and %al,Ib */
2663	  cur->pc += 1;
2664	  break;
2665	case 0x25: /* and %eax,Iz */
2666	  cur->pc += z;
2667	  break;
2668	case 0x26: /* seg=es prefix */
2669	  break;
2670	case 0x27: /* daa */
2671	  break;
2672	case 0x28: /* sub Eb,Gb */
2673	case 0x29: /* sub Ev,Gv */
2674	case 0x2a: /* sub Gb,Eb */
2675	case 0x2b: /* sub Gv,Ev */
2676	  cur->pc = check_modrm (cur->pc);
2677	  break;
2678	case 0x2c: /* sub %al,Ib */
2679	  cur->pc += 1;
2680	  break;
2681	case 0x2d: /* sub %eax,Iz */
2682	  cur->pc += z;
2683	  break;
2684	case 0x2e: /* seg=cs prefix */
2685	  break;
2686	case 0x2f: /* das */
2687	  break;
2688	case 0x30: /* xor Eb,Gb */
2689	case 0x31: /* xor Ev,Gv */
2690	case 0x32: /* xor Gb,Eb */
2691	case 0x33: /* xor Gv,Ev */
2692	  cur->pc = check_modrm (cur->pc);
2693	  break;
2694	case 0x34: /* xor %al,Ib */
2695	  cur->pc += 1;
2696	  break;
2697	case 0x35: /* xor %eax,Iz */
2698	  cur->pc += z;
2699	  break;
2700	case 0x36: /* seg=ss prefix */
2701	  break;
2702	case 0x37: /* aaa */
2703	  break;
2704	case 0x38: /* cmp Eb,Gb */
2705	case 0x39: /* cmp Ev,Gv */
2706	case 0x3a: /* cmp Gb,Eb */
2707	case 0x3b: /* cmp Gv,Ev */
2708	  cur->pc = check_modrm (cur->pc);
2709	  break;
2710	case 0x3c: /* cmp %al,Ib */
2711	  cur->pc += 1;
2712	  break;
2713	case 0x3d: /* cmp %eax,Iz */
2714	  cur->pc += z;
2715	  break;
2716	case 0x3e: /* seg=ds prefix */
2717	  break;
2718	case 0x3f: /* aas */
2719	  break;
2720#if WSIZE(32)
2721	case 0x40: /* inc %eax */
2722	case 0x41: /* inc %ecx */
2723	case 0x42: /* inc %edx */
2724	case 0x43: /* inc %ebx */
2725	  break;
2726	case 0x44: /* inc %esp */
2727	  /* Can't be a valid stack pointer - delete context */
2728	  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x44.\n", __LINE__);
2729	  DELETE_CURCTX ();
2730	  break;
2731	case 0x45: /* inc %ebp */
2732	case 0x46: /* inc %esi */
2733	case 0x47: /* inc %edi */
2734	case 0x48: /* dec %eax */
2735	case 0x49: /* dec %ecx */
2736	case 0x4a: /* dec %edx */
2737	case 0x4b: /* dec %ebx */
2738	  break;
2739	case 0x4c: /* dec %esp */
2740	  /* Can't be a valid stack pointer - delete context */
2741	  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x4c.\n", __LINE__);
2742	  DELETE_CURCTX ();
2743	  break;
2744	case 0x4d: /* dec %ebp */
2745	case 0x4e: /* dec %esi */
2746	case 0x4f: /* dec %edi */
2747	  break;
2748#endif
2749	case 0x50: /* push %eax */
2750	case 0x51: /* push %ecx */
2751	case 0x52: /* push %edx */
2752	case 0x53: /* push %ebx */
2753	case 0x54: /* push %esp */
2754	case 0x55: /* push %ebp */
2755	case 0x56: /* push %esi */
2756	case 0x57: /* push %edi */
2757	  cur->sp -= 1;
2758	  reg = OPC_REG (opcode);
2759	  if (reg == RBP)
2760	    {
2761#if 0
2762	      /* Don't do this check yet. Affects tail calls. */
2763	      /* avoid other function's prologue */
2764	      if ((cur->pc[0] == 0x89 && cur->pc[1] == 0xe5) ||
2765		  (cur->pc[0] == 0x8b && cur->pc[1] == 0xec))
2766		{
2767		  /* mov %esp,%ebp */
2768		  DELETE_CURCTX ();
2769		  break;
2770		}
2771#endif
2772	      if (cur->fp_loc == NULL)
2773		{
2774		  cur->fp_loc = cur->sp;
2775		  cur->fp_sav = cur->fp;
2776		}
2777	    }
2778	  break;
2779	case 0x58: /* pop %eax */
2780	case 0x59: /* pop %ecx */
2781	case 0x5a: /* pop %edx */
2782	case 0x5b: /* pop %ebx */
2783	case 0x5c: /* pop %esp */
2784	case 0x5d: /* pop %ebp */
2785	case 0x5e: /* pop %esi */
2786	case 0x5f: /* pop %edi */
2787	  reg = OPC_REG (opcode);
2788	  cur->regs[reg] = 0;
2789	  if (isInside ((unsigned long) cur->sp, (unsigned long) cur->sp_safe, wctx->sbase))
2790	    cur->regs[reg] = *cur->sp;
2791	  DprintfT (SP_DUMP_UNWIND, "stack_unwind:%d cur->regs[%d]=0x%lx\n",
2792		   __LINE__, reg, (unsigned long) cur->regs[reg]);
2793	  if (reg == RDX)
2794	    {
2795	      if (cur->sp >= cur->sp_safe &&
2796		  (unsigned long) cur->sp < wctx->sbase)
2797		cur->rdx = *cur->sp;
2798	    }
2799	  else if (reg == RBP)
2800	    {
2801	      if (cur->fp_loc == cur->sp)
2802		{
2803		  cur->fp = cur->fp_sav;
2804		  cur->fp_loc = NULL;
2805		}
2806	      else if (cur->sp >= cur->sp_safe &&
2807		       (unsigned long) cur->sp < wctx->sbase)
2808		cur->fp = (unsigned long*) (*cur->sp);
2809	    }
2810	  else if (reg == RSP)
2811	    {
2812	      /* f.e. JVM I2CAdapter */
2813	      if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase)
2814		{
2815		  unsigned long *nsp = (unsigned long*) (*cur->sp);
2816		  if (nsp >= cur->sp && nsp <= cur->fp)
2817		    {
2818		      cur->sp = nsp;
2819		    }
2820		  else
2821		    {
2822		      DprintfT (SP_DUMP_UNWIND, "stack_unwind%d give up return address, opcode=0x%02x\n",
2823			       __LINE__, opcode);
2824		      goto checkFP;
2825		    }
2826		}
2827	      else
2828		{
2829		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode=0x%02x\n",
2830			    __LINE__, opcode);
2831		  goto checkFP;
2832		}
2833	      break;
2834	    }
2835	  cur->sp += 1;
2836	  if (cur->sp - RED_ZONE > cur->sp_safe)
2837	    {
2838	      cur->sp_safe = cur->sp - RED_ZONE;
2839	    }
2840	  break;
2841	case 0x60: /* pusha(d) */
2842	  cur->sp -= 8;
2843	  break;
2844	case 0x61: /* popa(d) */
2845	  cur->sp += 8;
2846	  if (cur->sp - RED_ZONE > cur->sp_safe)
2847	    cur->sp_safe = cur->sp - RED_ZONE;
2848	  break;
2849	case 0x62: /* group AVX, 4-bytes EVEX prefix */
2850	  {
2851	    unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
2852	    int len = parse_x86_AVX_instruction (pc);
2853	    if (len < 4)
2854	      {
2855		DELETE_CURCTX ();
2856	      }
2857	    else
2858	      {
2859		pc += len;
2860		cur->pc = pc;
2861	      }
2862	  }
2863	  break;
2864	case 0x63: /* arpl Ew,Gw (32) movsxd Gv,Ev (64)*/
2865	  cur->pc = check_modrm (cur->pc);
2866	  break;
2867	case 0x64: /* seg=fs prefix */
2868	case 0x65: /* seg=gs prefix */
2869	  break;
2870	case 0x66: /* opd size override */
2871	case 0x67: /* addr size override */
2872	  break;
2873	case 0x68: /* push Iz */
2874	  cur->sp = (unsigned long*) ((long) cur->sp - z);
2875	  cur->pc += z;
2876	  break;
2877	case 0x69: /* imul Gv,Ev,Iz */
2878	  cur->pc = check_modrm (cur->pc);
2879	  cur->pc += z;
2880	  break;
2881	case 0x6a: /* push Ib */
2882	  cur->sp = (unsigned long*) ((long) cur->sp - v);
2883	  cur->pc += 1;
2884	  break;
2885	case 0x6b: /* imul Gv,Ev,Ib */
2886	  cur->pc = check_modrm (cur->pc);
2887	  cur->pc += 1;
2888	  break;
2889	case 0x6c: case 0x6d: case 0x6e: case 0x6f:
2890	  cur->pc = check_modrm (cur->pc);
2891	  break;
2892	case 0x70: /* jo Jb */
2893	case 0x71: /* jno Jb */
2894	case 0x72: /* jb Jb */
2895	case 0x73: /* jnb Jb */
2896	case 0x74: /* jz Jb */
2897	case 0x75: /* jnz Jb */
2898	case 0x76: /* jna Jb */
2899	case 0x77: /* ja Jb */
2900	case 0x78: /* js Jb */
2901	case 0x79: /* jns Jb */
2902	case 0x7a: /* jp Jb */
2903	case 0x7b: /* jnp Jb */
2904	case 0x7c: /* jl Jb */
2905	case 0x7d: /* jge Jb */
2906	case 0x7e: /* jle Jb */
2907	case 0x7f: /* jg Jb */
2908	  imm8 = *(char*) cur->pc++;
2909	  if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
2910	    {
2911	      int tidx = 0;
2912	      unsigned char *npc = cur->pc + imm8;
2913	      if (is_after_ret (npc))
2914		break;
2915	      while (npc > targets[tidx])
2916		tidx += 1;
2917	      if (npc != targets[tidx])
2918		{
2919		  if (ntrg < MAXTRGTS)
2920		    {
2921		      for (int i = 0; i < nctx; i++)
2922			if (buf[i].tidx >= tidx)
2923			  buf[i].tidx++;
2924
2925		      /* insert a new target */
2926		      for (int i = ntrg; i > tidx; i--)
2927			targets[i] = targets[i - 1];
2928		      ntrg += 1;
2929		      targets[tidx++] = npc;
2930		    }
2931		  else
2932		    DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg(%d)=max\n", __LINE__, ntrg);
2933		  struct AdvWalkContext *new = buf + nctx;
2934		  nctx += 1;
2935		  __collector_memcpy (new, cur, sizeof (*new));
2936		  new->pc = npc;
2937		  new->tidx = tidx;
2938		  cur = new; /* advance the new context first */
2939		  continue;
2940		}
2941	    }
2942	  else
2943	    DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx(%d)=max\n", __LINE__, nctx);
2944	  break;
2945	case 0x80: /* group1 Eb,Ib */
2946	  cur->pc = check_modrm (cur->pc);
2947	  cur->pc += 1;
2948	  break;
2949	case 0x81: /* group1 Ev,Iz */
2950	  modrm = *cur->pc;
2951	  if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RSP)
2952	    {
2953	      int immz = read_int (cur->pc + 1, z);
2954	      extop = MRM_EXT (modrm);
2955	      if (extop == 0) /* add  imm32,%esp */
2956		cur->sp = (unsigned long*) ((long) cur->sp + immz);
2957	      else if (extop == 4) /* and imm32,%esp */
2958		cur->sp = (unsigned long*) ((long) cur->sp & immz);
2959	      else if (extop == 5) /* sub imm32,%esp */
2960		cur->sp = (unsigned long*) ((long) cur->sp - immz);
2961	      if (cur->sp - RED_ZONE > cur->sp_safe)
2962		cur->sp_safe = cur->sp - RED_ZONE;
2963	    }
2964	  cur->pc = check_modrm (cur->pc);
2965	  cur->pc += z;
2966	  break;
2967	case 0x82: /* group1 Eb,Ib */
2968	  cur->pc = check_modrm (cur->pc);
2969	  cur->pc += 1;
2970	  break;
2971	case 0x83: /* group1 Ev,Ib */
2972	  modrm = *cur->pc;
2973	  if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RSP)
2974	    {
2975	      imm8 = (char) cur->pc[1]; /* sign extension */
2976	      extop = MRM_EXT (modrm);
2977	      if (extop == 0) /* add  imm8,%esp */
2978		cur->sp = (unsigned long*) ((long) cur->sp + imm8);
2979	      else if (extop == 4) /* and imm8,%esp */
2980		  cur->sp = (unsigned long*) ((long) cur->sp & imm8);
2981	      else if (extop == 5) /* sub imm8,%esp */
2982		cur->sp = (unsigned long*) ((long) cur->sp - imm8);
2983	      if (cur->sp - RED_ZONE > cur->sp_safe)
2984		cur->sp_safe = cur->sp - RED_ZONE;
2985	    }
2986	  cur->pc = check_modrm (cur->pc);
2987	  cur->pc += 1;
2988	  break;
2989	case 0x84: /* test Eb,Gb */
2990	case 0x85: /* test Ev,Gv */
2991	case 0x86: /* xchg Eb,Gb */
2992	case 0x87: /* xchg Ev,Gv */
2993	  cur->pc = check_modrm (cur->pc);
2994	  break;
2995	case 0x88: /* mov Eb,Gb */
2996	  cur->pc = check_modrm (cur->pc);
2997	  break;
2998	case 0x89: /* mov Ev,Gv */
2999	  modrm = *cur->pc;
3000	  if (MRM_MOD (modrm) == 0xc0)
3001	    {
3002	      if (MRM_REGS (modrm) == RBP && MRM_REGD (modrm) == RSP)
3003		/* movl %esp,%ebp */
3004		cur->fp = cur->sp;
3005	      else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3006		{ /* mov %ebp,%esp */
3007		  cur->sp = cur->fp;
3008		  if (cur->sp - RED_ZONE > cur->sp_safe)
3009		    cur->sp_safe = cur->sp - RED_ZONE;
3010		  if (wctx->fp == (unsigned long) cur->sp)
3011		    cur->cval = RA_FROMFP;
3012		}
3013	    }
3014	  else if (MRM_MOD (modrm) == 0x80)
3015	    {
3016	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3017		{
3018		  if (cur->pc[1] == 0x24)
3019		    { /* mov %ebp,disp32(%esp) - JVM */
3020		      immv = read_int (cur->pc + 2, 4);
3021		      cur->fp_loc = (unsigned long*) ((char*) cur->sp + immv);
3022		      cur->fp_sav = cur->fp;
3023		    }
3024		}
3025	    }
3026	  else if (MRM_MOD (modrm) == 0x40)
3027	    {
3028	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RDX)
3029		{
3030		  if (cur->pc[1] == 0x24 && cur->pc[2] == 0x0)
3031		    { /* movl %edx,0(%esp) */
3032		      cur->ra_loc = cur->sp;
3033		      cur->ra_sav = cur->rdx;
3034		    }
3035		}
3036	      else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3037		{
3038		  if (cur->pc[1] == 0x24)
3039		    { /* mov %ebp,disp8(%esp) - JVM */
3040		      imm8 = ((char*) (cur->pc))[2];
3041		      cur->fp_loc = (unsigned long*) ((char*) cur->sp + imm8);
3042		      cur->fp_sav = cur->fp;
3043		    }
3044		}
3045	    }
3046	  else if (MRM_MOD (modrm) == 0x0)
3047	    {
3048	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3049		{
3050		  if (cur->pc[1] == 0x24)
3051		    { /* mov %ebp,(%esp) */
3052		      cur->fp_loc = cur->sp;
3053		      cur->fp_sav = cur->fp;
3054		    }
3055		}
3056	      else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RDX)
3057		{
3058		  if (cur->pc[1] == 0x24)
3059		    { /* movl %edx,(%esp) */
3060		      cur->ra_loc = cur->sp;
3061		      cur->ra_sav = cur->rdx;
3062		    }
3063		}
3064	    }
3065	  cur->pc = check_modrm (cur->pc);
3066	  break;
3067	case 0x8a: /* mov Gb,Eb */
3068	  cur->pc = check_modrm (cur->pc);
3069	  break;
3070	case 0x8b: /* mov Gv,Ev */
3071	  modrm = *cur->pc;
3072	  if (MRM_MOD (modrm) == 0xc0)
3073	    {
3074	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3075		/* mov %esp,%ebp */
3076		cur->fp = cur->sp;
3077	      else if (MRM_REGS (modrm) == RBP && MRM_REGD (modrm) == RSP)
3078		{ /* mov %ebp,%esp */
3079		  cur->sp = cur->fp;
3080		  if (cur->sp - RED_ZONE > cur->sp_safe)
3081		    cur->sp_safe = cur->sp - RED_ZONE;
3082		  if (wctx->fp == (unsigned long) cur->sp)
3083		    cur->cval = RA_FROMFP;
3084		}
3085	    }
3086	  else if (MRM_MOD (modrm) == 0x80)
3087	    {
3088	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3089		{
3090		  if (cur->pc[1] == 0x24)
3091		    { /* mov disp32(%esp),%ebp */
3092		      immv = read_int (cur->pc + 2, 4);
3093		      unsigned long *ptr = (unsigned long*) ((char*) cur->sp + immv);
3094		      if (cur->fp_loc == ptr)
3095			{
3096			  cur->fp = cur->fp_sav;
3097			  cur->fp_loc = NULL;
3098			}
3099		      else if (ptr >= cur->sp_safe && (unsigned long) ptr < wctx->sbase)
3100			cur->fp = (unsigned long*) (*ptr);
3101		    }
3102		}
3103	    }
3104	  else if (MRM_MOD (modrm) == 0x40)
3105	    {
3106	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3107		{
3108		  if (cur->pc[1] == 0x24)
3109		    { /* mov disp8(%esp),%ebp - JVM */
3110		      imm8 = ((char*) (cur->pc))[2];
3111		      unsigned long *ptr = (unsigned long*) ((char*) cur->sp + imm8);
3112		      if (cur->fp_loc == ptr)
3113			{
3114			  cur->fp = cur->fp_sav;
3115			  cur->fp_loc = NULL;
3116			}
3117		      else if (ptr >= cur->sp_safe && (unsigned long) ptr < wctx->sbase)
3118			cur->fp = (unsigned long*) (*ptr);
3119		    }
3120		}
3121	    }
3122	  else if (MRM_MOD (modrm) == 0x0)
3123	    {
3124	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3125		{
3126		  if (cur->pc[1] == 0x24)
3127		    { /* mov (%esp),%ebp */
3128		      if (cur->fp_loc == cur->sp)
3129			{
3130			  cur->fp = cur->fp_sav;
3131			  cur->fp_loc = NULL;
3132			}
3133		      else if (cur->sp >= cur->sp_safe &&
3134			       (unsigned long) cur->sp < wctx->sbase)
3135			cur->fp = (unsigned long*) *cur->sp;
3136		    }
3137		}
3138	    }
3139	  cur->pc = check_modrm (cur->pc);
3140	  break;
3141	case 0x8c: /* mov Mw,Sw */
3142	  cur->pc = check_modrm (cur->pc);
3143	  break;
3144	case 0x8d: /* lea Gv,M */
3145	  modrm = *cur->pc;
3146	  if (MRM_REGD (modrm) == RSP)
3147	    {
3148	      unsigned char *pc = cur->pc;
3149	      // Mez: need to use always regs[RSP/RBP] instead cur->sp(or fp):
3150	      cur->regs[RSP] = (unsigned long) cur->sp;
3151	      cur->regs[RBP] = (unsigned long) cur->fp;
3152	      cur->pc++;
3153	      int mod = (modrm >> 6) & 3;
3154	      int r_m = modrm & 7;
3155	      long val = 0;
3156	      int undefRez = 0;
3157	      if (mod == 0x3)
3158		val = getRegVal (cur, MRM_REGS (modrm), &undefRez);
3159	      else if (r_m == 4)
3160		{ // SP or R12. Decode SIB-byte.
3161		  int sib = *cur->pc++;
3162		  int scale = 1 << (sib >> 6);
3163		  int index = X | ((sib >> 3) & 7);
3164		  int base = B | (sib & 7);
3165		  if (mod == 0)
3166		    {
3167		      if ((base & 7) == 5)
3168			{ // BP or R13
3169			  if (index != 4) // SP
3170			    val += getRegVal (cur, index, &undefRez) * scale;
3171			  val += read_int (cur->pc, 4);
3172			  cur->pc += 4;
3173			}
3174		      else
3175			{
3176			  val += getRegVal (cur, base, &undefRez);
3177			  if (index != 4) // SP
3178			    val += getRegVal (cur, index, &undefRez) * scale;
3179			}
3180		    }
3181		  else
3182		    {
3183		      val += getRegVal (cur, base, &undefRez);
3184		      if (index != 4) // SP
3185			val += getRegVal (cur, index, &undefRez) * scale;
3186		      if (mod == 1)
3187			{
3188			  val += read_int (cur->pc, 1);
3189			  cur->pc++;
3190			}
3191		      else
3192			{ // mod == 2
3193			  val += read_int (cur->pc, 4);
3194			  cur->pc += 4;
3195			}
3196		    }
3197		}
3198	      else if (mod == 0)
3199		{
3200		  if (r_m == 5)
3201		    { // BP or R13
3202		      val += read_int (cur->pc, 4);
3203		      cur->pc += 4;
3204		    }
3205		  else
3206		    val += getRegVal (cur, MRM_REGS (modrm), &undefRez);
3207		}
3208	      else
3209		{ // mod == 1 || mod == 2
3210		  val += getRegVal (cur, MRM_REGS (modrm), &undefRez);
3211		  if (mod == 1)
3212		    {
3213		      val += read_int (cur->pc, 1);
3214		      cur->pc++;
3215		    }
3216		  else
3217		    { // mod == 2
3218		      val += read_int (cur->pc, 4);
3219		      cur->pc += 4;
3220		    }
3221		}
3222	      if (undefRez)
3223		{
3224		  DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cannot calculate RSP. cur->pc=0x%lx val=0x%lx\n",
3225			   __LINE__, (unsigned long) cur->pc, (unsigned long) val);
3226		  goto checkFP;
3227		}
3228	      cur->regs[MRM_REGD (modrm)] = val;
3229	      DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cur->pc=0x%lx val=0x%lx wctx->sp=0x%lx wctx->sbase=0x%lx\n",
3230		       __LINE__, (unsigned long) cur->pc, (unsigned long) val,
3231		       (unsigned long) wctx->sp, (unsigned long) wctx->sbase);
3232	      if (cur->pc != check_modrm (pc))
3233		DprintfT (SP_DUMP_UNWIND, "stack_unwind%d ERROR: cur->pc=0x%lx != check_modrm(0x%lx)=0x%lx\n",
3234			 __LINE__, (unsigned long) cur->pc, (unsigned long) pc,
3235			 (unsigned long) check_modrm (pc));
3236	      if (MRM_REGD (modrm) == RSP)
3237		{
3238		  if (!isInside ((unsigned long) val, wctx->sp, wctx->sbase))
3239		    {
3240		      DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cannot calculate RSP. cur->pc=0x%lx opcode=0x%02x val=0x%lx wctx->sp=0x%lx wctx->sbase=0x%lx\n",
3241			       __LINE__, (unsigned long) cur->pc, opcode, (unsigned long) val,
3242			       (unsigned long) wctx->sp, (unsigned long) wctx->sbase);
3243		      goto checkFP;
3244		    }
3245		  cur->sp = (unsigned long *) val;
3246		  if (cur->sp - RED_ZONE > cur->sp_safe)
3247		    cur->sp_safe = cur->sp - RED_ZONE;
3248		}
3249	    }
3250	  else
3251	    cur->pc = check_modrm (cur->pc);
3252	  break;
3253	case 0x8e: /* mov Sw,Ew */
3254	  cur->pc = check_modrm (cur->pc);
3255	  break;
3256	case 0x8f: /* pop Ev */
3257	  cur->pc = check_modrm (cur->pc);
3258	  cur->sp += 1;
3259	  if (cur->sp - RED_ZONE > cur->sp_safe)
3260	    cur->sp_safe = cur->sp - RED_ZONE;
3261	  break;
3262	case 0x90: /* nop */
3263	  break;
3264	case 0x91: /* xchg %eax,%ecx */
3265	case 0x92: /* xchg %eax,%edx */
3266	case 0x93: /* xchg %eax,%ebx */
3267	case 0x94: /* xchg %eax,%esp XXXX */
3268	case 0x95: /* xchg %eax,%ebp XXXX */
3269	case 0x96: /* xchg %eax,%esi */
3270	case 0x97: /* xchg %eax,%edi */
3271	  break;
3272	case 0x98: /* cbw/cwde */
3273	case 0x99: /* cwd/cwq */
3274	  break;
3275	case 0x9a: /* callf Ap */
3276	  if (jmp_reg_switch_mode == 1)
3277	    {
3278	      struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3279	      __collector_memcpy (tmpctx, cur, sizeof (*cur));
3280	      int rc = process_return (wctx, tmpctx);
3281	      if (rc != RA_FAILURE)
3282		{
3283		  if (save_ctx)
3284		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3285		  return rc;
3286		}
3287	    }
3288	  cur->pc += 2 + a;
3289	  break;
3290	case 0x9b: /* fwait */
3291	case 0x9c: /* pushf Fv */
3292	case 0x9d: /* popf Fv */
3293	case 0x9e: /* sahf */
3294	case 0x9f: /* lahf */
3295	  break;
3296	case 0xa0: /* mov al,Ob */
3297	case 0xa1: /* mov eax,Ov */
3298	case 0xa2: /* mov Ob,al */
3299	case 0xa3: /* mov Ov,eax */
3300	  cur->pc += a;
3301	  break;
3302	case 0xa4: /* movsb Yb,Xb */
3303	case 0xa5: /* movsd Yv,Xv */
3304	case 0xa6: /* cmpsb Yb,Xb */
3305	case 0xa7: /* cmpsd Xv,Yv */
3306	  break;
3307	case 0xa8: /* test al,Ib */
3308	  cur->pc += 1;
3309	  break;
3310	case 0xa9: /* test eax,Iz */
3311	  cur->pc += z;
3312	  break;
3313	case 0xaa: /* stosb Yb,%al */
3314	case 0xab: /* stosd Yv,%eax */
3315	case 0xac: /* lodsb %al,Xb */
3316	case 0xad: /* lodsd %eax,Xv */
3317	case 0xae: /* scasb %al,Yb */
3318	case 0xaf: /* scasd %eax,Yv */
3319	  break;
3320	case 0xb0: /* mov %al,Ib */
3321	case 0xb1: /* mov %cl,Ib */
3322	case 0xb2: /* mov %dl,Ib */
3323	case 0xb3: /* mov %bl,Ib */
3324	case 0xb4: /* mov %ah,Ib */
3325	case 0xb5: /* mov %ch,Ib */
3326	case 0xb6: /* mov %dh,Ib */
3327	case 0xb7: /* mov %bh,Ib */
3328	  cur->pc += 1;
3329	  break;
3330	case 0xb8: /* mov Iv,%eax */
3331	case 0xb9: /* mov Iv,%ecx */
3332	case 0xba: /* mov Iv,%edx */
3333	case 0xbb: /* mov Iv,%ebx */
3334	case 0xbc: /* mov Iv,%esp */
3335	case 0xbd: /* mov Iv,%rbp */
3336	case 0xbe: /* mov Iv,%esi */
3337	case 0xbf: /* mov Iv,%edi */
3338	  reg = OPC_REG (opcode);
3339	  if (reg == RAX)
3340	    cur->rax = read_int (cur->pc, v);
3341	  cur->pc += v;
3342	  break;
3343	case 0xc0: /* group2 Eb,Ib */
3344	case 0xc1: /* group2 Ev,Ib */
3345	  cur->pc = check_modrm (cur->pc) + 1;
3346	  break;
3347	case 0xc2: /* ret Iw */
3348	  /* In the dynamic linker we may see that
3349	   * the actual return address is at sp+immv,
3350	   * while sp points to the resolved address.
3351	   */
3352	  {
3353	    immv = read_int (cur->pc, 2);
3354	    int rc = process_return (wctx, cur);
3355	    if (rc != RA_FAILURE)
3356	      {
3357		if (jmp_reg_switch_mode == 1)
3358		  {
3359		    DprintfT (SP_DUMP_UNWIND, "stack_unwind%d give up return address under jmp switch mode, opcode = 0xc2\n", __LINE__);
3360		    goto checkFP;
3361		  }
3362		wctx->sp += immv;
3363		if (save_ctx)
3364		  omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3365		return rc;
3366	      }
3367	    DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xc2.\n", __LINE__);
3368	    DELETE_CURCTX ();
3369	  }
3370	  break;
3371	case 0xc3: /* ret */
3372	  {
3373	    int rc = process_return (wctx, cur);
3374	    if (rc != RA_FAILURE)
3375	      {
3376		if (save_ctx)
3377		  omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3378		return rc;
3379	      }
3380	    if (jmp_reg_switch_mode == 1)
3381	      jmp_reg_switch_pc = cur->pc;
3382	    DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xc3.\n", __LINE__);
3383	    DELETE_CURCTX ();
3384	  }
3385	  break;
3386	case 0xc4: /* group AVX, 3-bytes VEX prefix */
3387	  {
3388	    unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
3389	    int len = parse_x86_AVX_instruction (pc);
3390	    if (len < 3)
3391	      DELETE_CURCTX ();
3392	    else
3393	      {
3394		pc += len;
3395		cur->pc = pc;
3396	      }
3397	  }
3398	  break;
3399	case 0xc5: /* group AVX, 2-bytes VEX prefix */
3400	  {
3401	    unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
3402	    int len = parse_x86_AVX_instruction (pc);
3403	    if (len < 2)
3404	      DELETE_CURCTX ();
3405	    else
3406	      {
3407		pc += len;
3408		cur->pc = pc;
3409	      }
3410	  }
3411	  break;
3412	case 0xc6:
3413	  modrm = *cur->pc;
3414	  if (modrm == 0xf8) /* xabort */
3415	    cur->pc += 2;
3416	  else /* mov Eb,Ib */
3417	    cur->pc = check_modrm (cur->pc) + 1;
3418	  break;
3419	case 0xc7:
3420	  modrm = *cur->pc;
3421	  if (modrm == 0xf8) /* xbegin */
3422	    cur->pc += v + 1;
3423	  else
3424	    { /* mov Ev,Iz */
3425	      extop = MRM_EXT (modrm);
3426	      if (extop != 0)
3427		{
3428		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode = 0xc7\n", __LINE__);
3429		  goto checkFP;
3430		}
3431	      if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RAX)
3432		cur->rax = read_int (cur->pc + 1, z);
3433	      cur->pc = check_modrm (cur->pc) + z;
3434	    }
3435	  break;
3436	case 0xc8: /* enter Iw,Ib */
3437	  cur->pc += 3;
3438	  break;
3439	case 0xc9: /* leave */
3440	  /* mov %ebp,%esp */
3441	  cur->sp = cur->fp;
3442	  /* pop %ebp */
3443	  if (cur->fp_loc == cur->sp)
3444	    {
3445	      cur->fp = cur->fp_sav;
3446	      cur->fp_loc = NULL;
3447	    }
3448	  else if (cur->sp >= cur->sp_safe &&
3449		   (unsigned long) cur->sp < wctx->sbase)
3450	    {
3451	      cur->fp = (unsigned long*) (*cur->sp);
3452	      if (wctx->fp == (unsigned long) cur->sp)
3453		cur->cval = RA_FROMFP;
3454	    }
3455	  cur->sp += 1;
3456	  if (cur->sp - RED_ZONE > cur->sp_safe)
3457	    cur->sp_safe = cur->sp - RED_ZONE;
3458	  break;
3459	case 0xca: /* retf Iw */
3460	  cur->pc += 2; /* XXXX process return */
3461	  break;
3462	case 0xcb: /* retf */
3463	  break; /* XXXX process return */
3464	case 0xcc: /* int 3 */
3465	  break;
3466	case 0xcd: /* int Ib */
3467	  if (*cur->pc == 0x80)
3468	    {
3469	      if (cur->rax == __NR_exit)
3470		{
3471		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xcd.\n", __LINE__);
3472		  DELETE_CURCTX ();
3473		  break;
3474		}
3475	      else if (cur->rax == __NR_rt_sigreturn)
3476		{
3477		  if (jmp_reg_switch_mode == 1)
3478		    {
3479		      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0xcd\n",
3480				__LINE__);
3481		      goto checkFP;
3482		    }
3483		  wctx->sp = (unsigned long) cur->sp;
3484		  if (save_ctx)
3485		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN);
3486		  return RA_RT_SIGRETURN;
3487		}
3488#if WSIZE(32)
3489	      else if (cur->rax == __NR_sigreturn)
3490		{
3491		  if (jmp_reg_switch_mode == 1)
3492		    {
3493		      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode = 0xc2\n",
3494				__LINE__);
3495		      goto checkFP;
3496		    }
3497		  wctx->sp = (unsigned long) cur->sp;
3498		  if (save_ctx)
3499		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN);
3500		  return RA_SIGRETURN;
3501		}
3502#endif
3503	    }
3504	  cur->pc += 1;
3505	  break;
3506	case 0xce: /* into */
3507	case 0xcf: /* iret */
3508	  break;
3509	case 0xd0: /* shift group2 Eb,1 */
3510	case 0xd1: /* shift group2 Ev,1 */
3511	case 0xd2: /* shift group2 Eb,%cl */
3512	case 0xd3: /* shift group2 Ev,%cl */
3513	  cur->pc = check_modrm (cur->pc);
3514	  break;
3515	case 0xd4: /* aam Ib */
3516	  cur->pc += 1;
3517	  break;
3518	case 0xd5: /* aad Ib */
3519	  cur->pc += 1;
3520	  break;
3521	case 0xd6: /* falc? */
3522	  break;
3523	case 0xd7:
3524	  cur->pc = check_modrm (cur->pc);
3525	  cur->pc++;
3526	  break;
3527	case 0xd8: /* esc instructions */
3528	case 0xd9:
3529	case 0xda:
3530	case 0xdb:
3531	case 0xdc:
3532	case 0xdd:
3533	case 0xde:
3534	case 0xdf:
3535	  cur->pc = check_modrm (cur->pc);
3536	  break;
3537	case 0xe0: /* loopne Jb */
3538	case 0xe1: /* loope Jb */
3539	case 0xe2: /* loop Jb */
3540	case 0xe3: /* jcxz Jb */
3541	  imm8 = *(char*) cur->pc++;
3542	  if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
3543	    {
3544	      int tidx = 0;
3545	      unsigned char *npc = cur->pc + imm8;
3546	      if (is_after_ret (npc))
3547		break;
3548	      while (npc > targets[tidx])
3549		tidx += 1;
3550	      if (npc != targets[tidx])
3551		{
3552		  if (ntrg < MAXTRGTS)
3553		    {
3554		      for (int i = 0; i < nctx; i++)
3555			if (buf[i].tidx >= tidx)
3556			  buf[i].tidx++;
3557		      /* insert a new target */
3558		      for (int i = ntrg; i > tidx; i--)
3559			targets[i] = targets[i - 1];
3560		      ntrg += 1;
3561		      targets[tidx++] = npc;
3562		    }
3563		  else
3564		    DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3565		  struct AdvWalkContext *new = buf + nctx;
3566		  nctx += 1;
3567		  __collector_memcpy (new, cur, sizeof (*new));
3568		  new->pc = npc;
3569		  new->tidx = tidx;
3570		  cur = new; /* advance the new context first */
3571		  continue;
3572		}
3573	    }
3574	  else
3575	    DprintfT (SP_DUMP_UNWIND, "unwind.c: nctx = max\n");
3576	  break;
3577	case 0xe4: case 0xe5:
3578	  cur->pc = check_modrm (cur->pc);
3579	  cur->pc++;
3580	  break;
3581	case 0xe6: case 0xe7:
3582	  cur->pc++;
3583	  cur->pc = check_modrm (cur->pc);
3584	  break;
3585	case 0xec: case 0xed: case 0xee: case 0xef:
3586	  cur->pc = check_modrm (cur->pc);
3587	  break;
3588	case 0xe8: /* call Jz (f64) */
3589	  {
3590	    if (jmp_reg_switch_mode == 1)
3591	      {
3592		struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3593		__collector_memcpy (tmpctx, cur, sizeof (*cur));
3594		int rc = process_return (wctx, tmpctx);
3595		if (rc != RA_FAILURE)
3596		  {
3597		    if (save_ctx)
3598		      omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3599		    return rc;
3600		  }
3601	      }
3602	    int immz = read_int (cur->pc, z);
3603	    if (immz == 0)
3604	      /* special case in PIC code */
3605	      cur->sp -= 1;
3606	    cur->pc += z;
3607	  }
3608	  break;
3609	case 0xe9: /* jump Jz */
3610	  {
3611	    int immz = read_int (cur->pc, z);
3612	    unsigned char *npc = cur->pc + z + immz;
3613	    if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend)
3614	      {
3615		DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xe9.\n", __LINE__);
3616		DELETE_CURCTX ();
3617		break;
3618	      }
3619	    int tidx = 0;
3620	    while (npc > targets[tidx])
3621	      tidx += 1;
3622	    if (npc != targets[tidx])
3623	      {
3624		if (ntrg < MAXTRGTS)
3625		  {
3626		    for (int i = 0; i < nctx; i++)
3627		      if (buf[i].tidx >= tidx)
3628			buf[i].tidx++;
3629		    /* insert a new target */
3630		    for (int i = ntrg; i > tidx; i--)
3631		      targets[i] = targets[i - 1];
3632		    ntrg += 1;
3633		    targets[tidx++] = npc;
3634		  }
3635		else
3636		  DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3637		cur->pc = npc;
3638		cur->tidx = tidx;
3639		continue; /* advance this context first */
3640	      }
3641	    else
3642	      {
3643		/* Delete context */
3644		DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xe9.\n", __LINE__);
3645		DELETE_CURCTX ();
3646	      }
3647	  }
3648	  break;
3649	case 0xeb: /* jump imm8 */
3650	  {
3651	    imm8 = *(char*) cur->pc++;
3652	    int tidx = 0;
3653	    unsigned char *npc = cur->pc + imm8;
3654	    while (npc > targets[tidx])
3655	      tidx += 1;
3656	    if (npc != targets[tidx])
3657	      {
3658		if (ntrg < MAXTRGTS)
3659		  {
3660		    for (int i = 0; i < nctx; i++)
3661		      if (buf[i].tidx >= tidx)
3662			buf[i].tidx++;
3663		    /* insert a new target */
3664		    for (int i = ntrg; i > tidx; i--)
3665		      targets[i] = targets[i - 1];
3666		    ntrg += 1;
3667		    targets[tidx++] = npc;
3668		  }
3669		else
3670		  DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3671		cur->pc = npc;
3672		cur->tidx = tidx;
3673		continue; /* advance this context first */
3674	      }
3675	    else
3676	      {
3677		/* Delete context */
3678		DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xeb.\n", __LINE__);
3679		DELETE_CURCTX ();
3680	      }
3681	  }
3682	  break;
3683	case 0xf0: /* lock prefix */
3684	case 0xf2: /* repne prefix */
3685	case 0xf3: /* repz prefix */
3686	  break;
3687	case 0xf4: /* hlt */
3688	  extop2 = *(cur->pc - 3);
3689	  if (extop2 == 0x90)
3690	    {
3691	      // 17851712 occasional SEGV in find_i386_ret_addr in unwind.c during attach
3692	      if (save_ctx)
3693		omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
3694	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK\n", __LINE__);
3695	      return RA_END_OF_STACK;
3696	    }
3697	  /* We see 'hlt' in _start. Stop analysis, revert to FP */
3698	  /* A workaround for the Linux main stack */
3699	  if (nctx > 1)
3700	    {
3701	      DELETE_CURCTX ();
3702	      break;
3703	    }
3704	  if (cur->fp == 0)
3705	    {
3706	      if (jmp_reg_switch_mode == 1)
3707		{
3708		  DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0xf4\n");
3709		  goto checkFP;
3710		}
3711	      cache_put (wctx, RA_EOSTCK);
3712	      wctx->pc = 0;
3713	      wctx->sp = 0;
3714	      wctx->fp = 0;
3715	      if (save_ctx)
3716		omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
3717	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK\n", __LINE__);
3718	      return RA_END_OF_STACK;
3719	    }
3720	  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode = 0xf4\n", __LINE__);
3721	  goto checkFP;
3722	case 0xf5: /* cmc */
3723	  break;
3724	case 0xf6: /* group3 Eb */
3725	  modrm = *cur->pc;
3726	  extop = MRM_EXT (modrm);
3727	  cur->pc = check_modrm (cur->pc);
3728	  if (extop == 0x0) /* test Ib */
3729	    cur->pc += 1;
3730	  break;
3731	case 0xf7: /* group3 Ev */
3732	  modrm = *cur->pc;
3733	  extop = MRM_EXT (modrm);
3734	  cur->pc = check_modrm (cur->pc);
3735	  if (extop == 0x0)  /* test Iz */
3736	    cur->pc += z;
3737	  break;
3738	case 0xf8: /* clc */
3739	case 0xf9: /* stc */
3740	case 0xfa: /* cli */
3741	case 0xfb: /* sti */
3742	case 0xfc: /* cld */
3743	case 0xfd: /* std */
3744	  break;
3745	case 0xfe: /* group4 */
3746	  modrm = *cur->pc;
3747	  extop = MRM_EXT (modrm);
3748	  switch (extop)
3749	    {
3750	    case 0x0: /* inc Eb */
3751	    case 0x1: /* dec Eb */
3752	      cur->pc = check_modrm (cur->pc);
3753	      break;
3754	    case 0x7:
3755	      cur->pc = check_modrm (cur->pc);
3756	      break;
3757	    default:
3758	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0xfe %x\n",
3759			__LINE__, extop);
3760	      DELETE_CURCTX ();
3761	      break;
3762	    }
3763	  break;
3764	case 0xff: /* group5 */
3765	  modrm = *cur->pc;
3766	  extop = MRM_EXT (modrm);
3767	  switch (extop)
3768	    {
3769	    case 0x0: /* inc Ev */
3770	    case 0x1: /* dec Ev */
3771	      cur->pc = check_modrm (cur->pc);
3772	      break;
3773	    case 0x2: /* calln Ev */
3774	      if (jmp_reg_switch_mode == 1)
3775		{
3776		  struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3777		  __collector_memcpy (tmpctx, cur, sizeof (*cur));
3778		  int rc = process_return (wctx, tmpctx);
3779		  if (rc != RA_FAILURE)
3780		    {
3781		      if (save_ctx)
3782			omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3783		      return rc;
3784		    }
3785		}
3786	      cur->pc = check_modrm (cur->pc);
3787	      break;
3788	    case 0x3: /* callf Ep */
3789	      if (jmp_reg_switch_mode == 1)
3790		{
3791		  struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3792		  __collector_memcpy (tmpctx, cur, sizeof (*cur));
3793		  int rc = process_return (wctx, tmpctx);
3794		  if (rc != RA_FAILURE)
3795		    {
3796		      if (save_ctx)
3797			omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3798		      return rc;
3799		    }
3800		}
3801	      cur->pc = check_modrm (cur->pc); /* XXXX */
3802	      break;
3803	    case 0x4: /* jumpn Ev */
3804	      /* This instruction appears in PLT or
3805	       * in tail call optimization.
3806	       * In both cases treat it as return.
3807	       * Save jump *(reg) - switch, etc, for later use when no ctx left
3808	       */
3809	      if (modrm == 0x25 || /* jumpn *disp32 */
3810		  MRM_MOD (modrm) == 0x40 || /* jumpn byte(reg) */
3811		  MRM_MOD (modrm) == 0x80) /* jumpn word(reg) */
3812		{
3813		  DprintfT (SP_DUMP_UNWIND, "unwind.c: PLT or tail call: %p\n", cur->pc - 1);
3814		  int rc = process_return (wctx, cur);
3815		  if (rc != RA_FAILURE)
3816		    {
3817		      if (jmp_reg_switch_mode == 1 && total_num_jmp_reg < max_num_jmp_reg_seen)
3818			{
3819			  DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0xff\n");
3820			  goto checkFP;
3821			}
3822		      if (save_ctx)
3823			omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3824		      return rc;
3825		    }
3826		}
3827	      else if (modrm != 0x24 /*ignore SIB*/) /* jumpn *(reg) or jumpn reg */
3828		{
3829		  // 22846120 stack unwind does not find caller of __memcpy_ssse3_back with B64 intel-Linux
3830		  /*
3831		   * For now, let's deal rather narrowly with this scenario.  If:
3832		   * - we are in the middle of an "ff e2" instruction, and
3833		   * - the next instruction is undefined ( 0f 0b == ud2 )
3834		   * then test return.  (Might eventually have to broaden the scope
3835		   * of this fix to other registers/etc.)
3836		   */
3837		  if (cur->pc[0] == 0xe2 && cur->pc[1] == 0x0f && cur->pc[2] == 0x0b)
3838		    {
3839		      int rc = process_return_real (wctx, cur, 0);
3840		      if (rc == RA_SUCCESS)
3841			{
3842			  if (save_ctx)
3843			    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3844			  return rc;
3845			}
3846		    }
3847
3848		  // 22691241 shjsynprog, jsynprog core dump from find_i386_ret_addr
3849		  /*
3850		   * Here is another oddity.  Java 9 seems to emit dynamically generated
3851		   * code where a code block ends with a "jmp *reg" and then padding to a
3852		   * multiple-of-16 boundary and then a bunch of 0s.  In this case, let's
3853		   * not continue to walk bytes since we would be walking off the end of
3854		   * the instructions into ... something.  Treating them as instructions
3855		   * can lead to unexpected results, including SEGV.
3856		   */
3857		  /*
3858		   * While the general problem deserves a better solution, let's look
3859		   * here only for one particular case:
3860		   *    0xff 0xe7               jmp *reg
3861		   *                            nop to bring us to a multiple-of-16 boundary
3862		   *    0x0000000000000a00      something that does not look like an instruction
3863		   *
3864		   * A different nop might be used depending on how much padding is needed
3865		   * to reach that multiple-of-16 boundary.  We've seen two:
3866		   *    0x90                    one byte
3867		   *    0x0f 0x1f 0x40 0x00     four bytes
3868		   */
3869		  // confirm the instruction is 0xff 0xe7
3870		  if (cur->pc[0] == 0xe7)
3871		    {
3872		      // check for correct-length nop and find next 16-byte boundary
3873		      int found_nop = 0;
3874		      unsigned long long *boundary = 0;
3875		      switch ((((unsigned long) (cur->pc)) & 0xf))
3876			{
3877			case 0xb: // look for 4-byte nop
3878			  if (*((unsigned *) (cur->pc + 1)) == 0x00401f0f)
3879			    found_nop = 1;
3880			  boundary = (unsigned long long *) (cur->pc + 5);
3881			  break;
3882			case 0xe: // look for 1-byte nop
3883			  if (cur->pc[1] == 0x90)
3884			    found_nop = 1;
3885			  boundary = (unsigned long long *) (cur->pc + 2);
3886			  break;
3887			default:
3888			  break;
3889			}
3890
3891		      // if nop is found, check what's at the boundary
3892		      if (found_nop && *boundary == 0x000000000a00)
3893			{
3894			  DELETE_CURCTX ();
3895			  break;
3896			}
3897		    }
3898
3899		  DprintfT (SP_DUMP_UNWIND, "unwind.c: probably PLT or tail call or switch table: %p\n",
3900			    cur->pc - 1);
3901		  if (num_jmp_reg < expected_num_jmp_reg)
3902		    {
3903		      if (jmp_reg_ctx[num_jmp_reg] == NULL)
3904			jmp_reg_ctx[num_jmp_reg] = (struct AdvWalkContext *) alloca (sizeof (*cur));
3905		      if (jmp_reg_ctx[num_jmp_reg] != NULL)
3906			__collector_memcpy (jmp_reg_ctx[num_jmp_reg], cur, sizeof (*cur));
3907		    }
3908		  if (num_jmp_reg < expected_num_jmp_reg ||
3909		      (num_jmp_reg >= expected_num_jmp_reg &&
3910		       jmp_reg_ctx[expected_num_jmp_reg - 1] != NULL &&
3911		       cur->pc != jmp_reg_ctx[expected_num_jmp_reg - 1]->pc))
3912		    {
3913		      num_jmp_reg++;
3914		      total_num_jmp_reg++;
3915		    }
3916		  if (jmp_reg_switch_mode == 1 && total_num_jmp_reg >= max_num_jmp_reg_seen)
3917		    {
3918		      int rc = process_return_real (wctx, cur, 0);
3919		      if (rc == RA_SUCCESS)
3920			{
3921			  if (save_ctx)
3922			    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3923			  return rc;
3924			}
3925		    }
3926		}
3927	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xff.\n", __LINE__);
3928	      DELETE_CURCTX ();
3929	      break;
3930	    case 0x5: /* jmpf Ep */
3931	      cur->pc = check_modrm (cur->pc); /* XXXX */
3932	      break;
3933	    case 0x6: /* push Ev */
3934	      cur->pc = check_modrm (cur->pc);
3935	      cur->sp -= 1;
3936	      break;
3937	    case 0x7:
3938	      cur->pc = check_modrm (cur->pc); /* XXXX */
3939	      if (jmp_reg_switch_mode == 1)
3940		{
3941		  int rc = process_return_real (wctx, cur, 0);
3942		  if (rc == RA_SUCCESS)
3943		    {
3944		      if (save_ctx)
3945			omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3946		      return rc;
3947		    }
3948		}
3949	      break;
3950	    default:
3951	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0xff %x\n",
3952			__LINE__, (int) extop);
3953	      DELETE_CURCTX ();
3954	      break;
3955	    }
3956	  break;
3957	default:
3958	  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x%x\n",
3959		    __LINE__, (int) opcode);
3960	  DELETE_CURCTX ();
3961	  break;
3962	}
3963
3964      /* switch to next context */
3965      if (++cur >= buf + nctx)
3966	cur = buf;
3967      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d switch context: cur=0x%lx(%ld)  nctx=%d  cnt=%d\n",
3968	       __LINE__, (unsigned long) cur, (long) (cur - buf), (int) nctx, (int) cnt);
3969    }
3970
3971checkFP:
3972  Tprintf (DBG_LT3, "find_i386_ret_addr:%d checkFP: wctx=0x%lx fp=0x%lx ln=0x%lx pc=0x%lx sbase=0x%lx sp=0x%lx tbgn=0x%lx tend=0x%lx\n",
3973	   __LINE__, (unsigned long) wctx, (unsigned long) wctx->fp,
3974	   (unsigned long) wctx->ln, (unsigned long) wctx->pc, (unsigned long) wctx->sbase,
3975	   (unsigned long) wctx->sp, (unsigned long) wctx->tbgn, (unsigned long) wctx->tend);
3976
3977  if (jmp_reg_switch_mode == 1)
3978    { // not deal with switch cases not ending with ret
3979      if (jmp_reg_switch_backup_ctx != NULL)
3980	__collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
3981      DprintfT (SP_DUMP_UNWIND, "stack_unwind jmp reg mode on: pc = 0x%lx cnt = %d, nctx = %d\n", wctx->pc, cnt, nctx);
3982    }
3983
3984  unsigned long *cur_fp = cur->fp;
3985  unsigned long *cur_sp = cur->sp;
3986  if (do_walk == 0)
3987    __collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext));
3988
3989  /* Resort to the frame pointer */
3990  if (cur->fp_loc)
3991    cur->fp = cur->fp_sav;
3992  cur->sp = cur->fp;
3993  if ((unsigned long) cur->sp >= wctx->sbase ||
3994      (unsigned long) cur->sp < wctx->sp)
3995    {
3996      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d do_walk=%d cur->sp=0x%p out of range. wctx->sbase=0x%lx wctx->sp=0x%lx wctx->pc=0x%lx\n",
3997		__LINE__, (int) do_walk, cur->sp, (unsigned long) wctx->sbase,
3998		(unsigned long) wctx->sp, (unsigned long) wctx->pc);
3999      if (do_walk == 0)
4000	{
4001	  cur->sp = cur_sp;
4002	  cur->fp = cur_fp;
4003	  do_walk = 1;
4004	  save_ctx = 1;
4005	  goto startWalk;
4006	}
4007      if (save_ctx)
4008	omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4009      return RA_FAILURE;
4010    }
4011
4012  unsigned long fp = *cur->sp++;
4013  if (fp <= (unsigned long) cur->sp || fp >= wctx->sbase)
4014    {
4015      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d fp=0x%016llx out of range. cur->sp=%p wctx->sbase=0x%lx wctx->pc=0x%lx\n",
4016	       __LINE__, (unsigned long long) fp, cur->sp,
4017	       (unsigned long) wctx->sbase, (unsigned long) wctx->pc);
4018      if (do_walk == 0)
4019	{
4020	  cur->sp = cur_sp;
4021	  cur->fp = cur_fp;
4022	  do_walk = 1;
4023	  save_ctx = 1;
4024	  goto startWalk;
4025	}
4026      if (save_ctx)
4027	omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4028      return RA_FAILURE;
4029    }
4030
4031  unsigned long ra = *cur->sp++;
4032  if (ra == 0)
4033    {
4034      cache_put (wctx, RA_EOSTCK);
4035      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK wctx->pc = 0x%lx\n", __LINE__, wctx->pc);
4036      if (save_ctx)
4037	omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
4038      return RA_END_OF_STACK;
4039    }
4040
4041  unsigned long tbgn = wctx->tbgn;
4042  unsigned long tend = wctx->tend;
4043  if (ra < tbgn || ra >= tend)
4044    {
4045      // We do not know yet if update_map_segments is really needed
4046      if (!__collector_check_segment (ra, &tbgn, &tend, 0))
4047	{
4048	  DprintfT (SP_DUMP_UNWIND, "unwind.c: __collector_check_segment fail. wctx->pc = 0x%lx\n", wctx->pc);
4049	  if (do_walk == 0)
4050	    {
4051	      cur->sp = cur_sp;
4052	      cur->fp = cur_fp;
4053	      do_walk = 1;
4054	      save_ctx = 1;
4055	      goto startWalk;
4056	    }
4057	  if (save_ctx)
4058	    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4059	  return RA_FAILURE;
4060	}
4061    }
4062
4063  unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
4064  if (npc == 0)
4065    {
4066      DprintfT (SP_DUMP_UNWIND, "unwind.c: adjust_ret_addr fail. wctx->pc = 0x%lx\n", wctx->pc);
4067      if (do_walk == 0)
4068	{
4069	  cur->sp = cur_sp;
4070	  cur->fp = cur_fp;
4071	  do_walk = 1;
4072	  save_ctx = 1;
4073	  goto startWalk;
4074	}
4075      if (save_ctx)
4076	omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4077      return RA_FAILURE;
4078    }
4079  wctx->pc = npc;
4080  wctx->sp = (unsigned long) cur->sp;
4081  wctx->fp = fp;
4082  wctx->tbgn = tbgn;
4083  wctx->tend = tend;
4084
4085  if (save_ctx)
4086    {
4087      omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SUCCESS);
4088      DprintfT (SP_DUMP_UNWIND, "unwind.c: cache walk context. wctx_pc_save->pc = 0x%lx\n", wctx_pc_save.pc);
4089    }
4090  return RA_SUCCESS;
4091}
4092
4093/*
4094 * We have the return address, but we would like to report to the user
4095 * the calling PC, which is the instruction immediately preceding the
4096 * return address.  Unfortunately, x86 instructions can have variable
4097 * length.  So we back up 8 bytes and try to figure out where the
4098 * calling PC starts.  (FWIW, call instructions are often 5-bytes long.)
4099 */
4100unsigned long
4101adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend)
4102{
4103  unsigned long npc = 0;
4104  int i = segoff < 8 ? segoff : 8;
4105  for (; i > 1; i--)
4106    {
4107      unsigned char *ptr = (unsigned char*) ra - i;
4108      int z = 4;
4109      int a = 4;
4110      int done = 0;
4111      int bVal;
4112      while (!done)
4113	{
4114	  bVal = getByteInstruction (ptr);
4115	  if (bVal < 0)
4116	    return 0;
4117	  switch (bVal)
4118	    {
4119	    case 0x26:
4120	    case 0x36:
4121#if WSIZE(64)
4122	      ptr += 1;
4123	      break;
4124#endif
4125	    case 0x64:
4126	    case 0x65:
4127	      bVal = getByteInstruction (ptr + 1);
4128	      if (bVal < 0)
4129		return 0;
4130	      if (bVal == 0xe8)
4131		// a workaround for bug 16193041, assuming "call Jz" has no segment override prefix
4132	       done = 1;
4133	      else
4134		ptr += 1;
4135	      break;
4136	    case 0x66:
4137	      z = 2;
4138	      ptr += 1;
4139	      break;
4140	    case 0x67:
4141	      a = 2;
4142	      ptr += 1;
4143	      break;
4144	    default:
4145	      done = 1;
4146	      break;
4147	    }
4148	}
4149#if WSIZE(64)
4150      bVal = getByteInstruction (ptr);
4151      if (bVal < 0)
4152	return 0;
4153      if (bVal >= 0x40 && bVal <= 0x4f)
4154	{ /* XXXX not all REX codes applicable */
4155	  if (bVal & 0x8)
4156	    z = 4;
4157	  ptr += 1;
4158	}
4159#endif
4160      int opcode = getByteInstruction (ptr);
4161      if (opcode < 0)
4162	return 0;
4163      ptr++;
4164      switch (opcode)
4165	{
4166	case 0xe8: /* call Jz (f64) */
4167	  ptr += z;
4168	  break;
4169	case 0x9a: /* callf Ap */
4170	  ptr += 2 + a;
4171	  break;
4172	case 0xff: /* calln Ev , callf Ep */
4173	  {
4174	    int extop = MRM_EXT (*ptr);
4175	    if (extop == 2 || extop == 3)
4176	      ptr = check_modrm (ptr);
4177	  }
4178	  break;
4179	default:
4180	  continue;
4181	}
4182      if ((unsigned long) ptr == ra)
4183	{
4184	  npc = ra - i;
4185	  break;
4186	}
4187    }
4188  if (npc == 0)
4189    {
4190      unsigned char * ptr = (unsigned char *) ra;
4191#if WSIZE(32)
4192      // test __kernel_sigreturn or __kernel_rt_sigreturn
4193      if ((ra + 7 < tend && getByteInstruction (ptr) == 0x58
4194	   && getByteInstruction (ptr + 1) == 0xb8
4195	   && getByteInstruction (ptr + 6) == 0xcd
4196	   && getByteInstruction (ptr + 7) == 0x80) /* pop %eax; mov $NNNN, %eax; int */
4197	  || (ra + 7 < tend && getByteInstruction (ptr) == 0x58
4198	      && getByteInstruction (ptr + 1) == 0xb8
4199	      && getByteInstruction (ptr + 6) == 0x0f
4200	      && getByteInstruction (ptr + 7) == 0x05) /* pop %eax; mov $NNNN, %eax; syscall */
4201	  || (ra + 6 < tend && getByteInstruction (ptr) == 0xb8
4202	      && getByteInstruction (ptr + 5) == 0xcd
4203	      && getByteInstruction (ptr + 6) == 0x80) /* mov $NNNN, %eax; int */
4204	  || (ra + 6 < tend && getByteInstruction (ptr) == 0xb8
4205	      && getByteInstruction (ptr + 5) == 0x0f
4206	      && getByteInstruction (ptr + 6) == 0x05)) /* mov $NNNN, %eax; syscall */
4207#else //WSIZE(64)
4208      // test __restore_rt
4209      if (ra + 8 < tend && getByteInstruction (ptr) == 0x48
4210	  && getByteInstruction (ptr + 7) == 0x0f
4211	  && getByteInstruction (ptr + 8) == 0x05) /* mov $NNNNNNNN, %rax; syscall */
4212#endif
4213	{
4214	  npc = ra;
4215	}
4216    }
4217  if (npc == 0 && __collector_java_mode
4218      && __collector_java_asyncgetcalltrace_loaded)
4219    { // detect jvm interpreter code for java user threads
4220      unsigned char * ptr = (unsigned char *) ra;
4221#if WSIZE(32)
4222      // up to J170
4223      /*
4224       * ff 24 9d e0 64 02 f5    jmp     *-0xafd9b20(,%ebx,4)
4225       * 8b 4e 01                movl    1(%esi),%ecx
4226       * f7 d1                   notl    %ecx
4227       * 8b 5d ec                movl    -0x14(%ebp),%ebx
4228       * c1 e1 02                shll    $2,%ecx
4229       * eb d8                   jmp     .-0x26 [ 0x92a ]
4230       * 83 ec 08                subl    $8,%esp || 8b 65 f8                movl    -8(%ebp),%esp
4231       * */
4232      if (ra - 20 >= (ra - segoff) && ((*ptr == 0x83 && *(ptr + 1) == 0xec) || (*ptr == 0x8b && *(ptr + 1) == 0x65))
4233	  && *(ptr - 2) == 0xeb
4234	  && *(ptr - 5) == 0xc1 && *(ptr - 4) == 0xe1
4235	  && *(ptr - 8) == 0x8b && *(ptr - 7) == 0x5d
4236	  && *(ptr - 10) == 0xf7 && *(ptr - 9) == 0xd1
4237	  && *(ptr - 13) == 0x8b && *(ptr - 12) == 0x4e
4238	  && *(ptr - 20) == 0xff && *(ptr - 19) == 0x24 && *(ptr - 18) == 0x9d)
4239	{
4240	  npc = ra - 20;
4241	}
4242      // J180 J190
4243      // ff 24 9d ** ** ** **    jmp     *-0x*******(,%ebx,4)
4244      if (npc == 0
4245	  && ra - 7 >= (ra - segoff)
4246	  && *(ptr - 7) == 0xff
4247	  && *(ptr - 6) == 0x24
4248	  && *(ptr - 5) == 0x9d)
4249	{
4250	  npc = ra - 7;
4251	}
4252#else //WSIZE(64)
4253      // up to J170
4254      /*
4255       * 41 ff 24 da             jmp     *(%r10,%rbx,8)
4256       * 41 8b 4d 01             movl    1(%r13),%ecx
4257       * f7 d1                   notl    %ecx
4258       * 48 8b 5d d8             movq    -0x28(%rbp),%rbx
4259       * c1 e1 02                shll    $2,%ecx
4260       * eb cc                   jmp     .-0x32 [ 0xd23 ]
4261       * 48 8b 65 f0             movq    -0x10(%rbp),%rsp
4262       */
4263      if (ra - 19 >= (ra - segoff) && *ptr == 0x48 && ((*(ptr + 1) == 0x8b && *(ptr + 2) == 0x65) || (*(ptr + 1) == 0x83 && *(ptr + 2) == 0xec))
4264	  && *(ptr - 2) == 0xeb
4265	  && *(ptr - 5) == 0xc1 && *(ptr - 4) == 0xe1
4266	  && *(ptr - 9) == 0x48 && *(ptr - 8) == 0x8b && *(ptr - 7) == 0x5d
4267	  && *(ptr - 11) == 0xf7 && *(ptr - 10) == 0xd1
4268	  && *(ptr - 15) == 0x41 && *(ptr - 14) == 0x8b && *(ptr - 13) == 0x4d
4269	  && *(ptr - 19) == 0x41 && *(ptr - 18) == 0xff)
4270	npc = ra - 19;
4271      // J180 J190
4272      // 41 ff 24 da             jmp     *(%r10,%rbx,8)
4273      if (npc == 0
4274	  && ra - 4 >= (ra - segoff)
4275	  && *(ptr - 4) == 0x41
4276	  && *(ptr - 3) == 0xff
4277	  && *(ptr - 2) == 0x24
4278	  && *(ptr - 1) == 0xda)
4279	npc = ra - 4;
4280#endif
4281    }
4282
4283  return npc;
4284}
4285
4286/*
4287 * Parses AVX instruction and returns its length.
4288 * Returns 0 if parsing failed.
4289 * https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
4290 */
4291static int
4292parse_x86_AVX_instruction (unsigned char *pc)
4293{
4294  /*
4295   * VEX prefix has a two-byte form (0xc5) and a three byte form (0xc4).
4296   * If an instruction syntax can be encoded using the two-byte form,
4297   * it can also be encoded using the three byte form of VEX.
4298   * The latter increases the length of the instruction by one byte.
4299   * This may be helpful in some situations for code alignment.
4300   *
4301		     Byte 0           Byte 1              Byte 2         Byte 3
4302     (Bit Position) 7      0     7 6 5   4    0     7   6  3   2   10
4303     3-byte VEX   [ 11000100 ] [ R X B | m-mmmm ] [ W | vvvv | L | pp ]
4304		    7      0     7   6  3   2   10
4305     2-byte VEX   [ 11000101 ] [ R | vvvv | L | pp ]
4306		    7      0     7 6 5  4 3 2 1 0     7 6 5 4 3 2 1 0     7  6 5  4  3 2 1 0
4307     4-byte EVEX  [ 01100010 ] [ R X B R1 0 0 m m ] [ W v v v v 1 p p ] [ z L1 L B1 V1 a a a ]
4308
4309     R: REX.R in 1's complement (inverted) form
4310	  0: Same as REX.R=1 (64-bit mode only)
4311	  1: Same as REX.R=0 (must be 1 in 32-bit mode)
4312
4313     X: REX.X in 1's complement (inverted) form
4314	  0: Same as REX.X=1 (64-bit mode only)
4315	  1: Same as REX.X=0 (must be 1 in 32-bit mode)
4316
4317     B: REX.B in 1's complement (inverted) form
4318	  0: Same as REX.B=1 (64-bit mode only)
4319	  1: Same as REX.B=0 (Ignored in 32-bit mode).
4320
4321     W: opcode specific (use like REX.W, or used for opcode
4322	  extension, or ignored, depending on the opcode byte)
4323
4324     m-mmmm:
4325	  00000: Reserved for future use (will #UD)
4326	  00001: implied 0F leading opcode byte
4327	  00010: implied 0F 38 leading opcode bytes
4328	  00011: implied 0F 3A leading opcode bytes
4329	  00100-11111: Reserved for future use (will #UD)
4330
4331     vvvv: a register specifier (in 1's complement form) or 1111 if unused.
4332
4333     L: Vector Length
4334	  0: scalar or 128-bit vector
4335	  1: 256-bit vector
4336
4337     pp: opcode extension providing equivalent functionality of a SIMD prefix
4338	  00: None
4339	  01: 66
4340	  10: F3
4341	  11: F2
4342   *
4343   * Example: 0xc5f877L vzeroupper
4344   * VEX prefix: 0xc5 0x77
4345   * Opcode: 0xf8
4346   *
4347   */
4348  int len = 0;
4349  disassemble_info dis_info;
4350  dis_info.arch = bfd_arch_i386;
4351  dis_info.mach = bfd_mach_x86_64;
4352  dis_info.flavour = bfd_target_unknown_flavour;
4353  dis_info.endian = BFD_ENDIAN_UNKNOWN;
4354  dis_info.endian_code = dis_info.endian;
4355  dis_info.octets_per_byte = 1;
4356  dis_info.disassembler_needs_relocs = FALSE;
4357  dis_info.fprintf_func = fprintf_func;
4358  dis_info.fprintf_styled_func = fprintf_styled_func;
4359  dis_info.stream = NULL;
4360  dis_info.disassembler_options = NULL;
4361  dis_info.read_memory_func = read_memory_func;
4362  dis_info.memory_error_func = memory_error_func;
4363  dis_info.print_address_func = print_address_func;
4364  dis_info.symbol_at_address_func = symbol_at_address_func;
4365  dis_info.symbol_is_valid = symbol_is_valid;
4366  dis_info.display_endian = BFD_ENDIAN_UNKNOWN;
4367  dis_info.symtab = NULL;
4368  dis_info.symtab_size = 0;
4369  dis_info.buffer_vma = 0;
4370  dis_info.buffer = pc;
4371  dis_info.buffer_length = 8;
4372
4373  disassembler_ftype disassemble = print_insn_i386;
4374  if (disassemble == NULL)
4375    {
4376      DprintfT (SP_DUMP_UNWIND, "parse_x86_AVX_instruction ERROR: unsupported disassemble\n");
4377      return 0;
4378    }
4379  len = disassemble (0, &dis_info);
4380  DprintfT (SP_DUMP_UNWIND, "parse_x86_AVX_instruction: returned %d  pc: %p\n", len, pc);
4381  return len;
4382}
4383
4384/*
4385 * In the Intel world, a stack frame looks like this:
4386 *
4387 * %fp0->|                               |
4388 *       |-------------------------------|
4389 *       |  Args to next subroutine      |
4390 *       |-------------------------------|-\
4391 * %sp0->|  One word struct-ret address  | |
4392 *       |-------------------------------|  > minimum stack frame (8 bytes)
4393 *       |  Previous frame pointer (%fp0)| |
4394 * %fp1->|-------------------------------|-/
4395 *       |  Local variables              |
4396 * %sp1->|-------------------------------|
4397 *
4398 */
4399
4400int
4401stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
4402{
4403  long *lbuf = (long*) buf;
4404  int lsize = size / sizeof (long);
4405  int ind = 0;
4406  int do_walk = 1;
4407  int extra_frame = 0;
4408  if (mode & FRINFO_NO_WALK)
4409    do_walk = 0;
4410  if ((mode & 0xffff) == FRINFO_FROM_STACK)
4411    extra_frame = 1;
4412
4413  /*
4414   * trace the stack frames from user stack.
4415   * We are assuming that the frame pointer and return address
4416   * are null when we are at the top level.
4417   */
4418  struct WalkContext wctx;
4419  wctx.pc = GET_PC (context);
4420  wctx.sp = GET_SP (context);
4421  wctx.fp = GET_FP (context);
4422  wctx.ln = (unsigned long) context->uc_link;
4423  unsigned long *sbase = (unsigned long*) __collector_tsd_get_by_key (unwind_key);
4424  if (sbase && *sbase > wctx.sp)
4425    wctx.sbase = *sbase;
4426  else
4427    {
4428      wctx.sbase = wctx.sp + 0x100000;
4429      if (wctx.sbase < wctx.sp)  /* overflow */
4430	wctx.sbase = (unsigned long) - 1;
4431    }
4432  // We do not know yet if update_map_segments is really needed
4433  __collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0);
4434
4435  for (;;)
4436    {
4437      if (ind >= lsize || wctx.pc == 0)
4438	break;
4439      if (bptr != NULL && extra_frame && wctx.sp <= (unsigned long) bptr && ind < 2)
4440	{
4441	  lbuf[0] = wctx.pc;
4442	  if (ind == 0)
4443	    {
4444	      ind++;
4445	      if (ind >= lsize)
4446		break;
4447	    }
4448	}
4449      if (bptr == NULL || wctx.sp > (unsigned long) bptr)
4450	{
4451	  lbuf[ind++] = wctx.pc;
4452	  if (ind >= lsize)
4453	    break;
4454	}
4455
4456      for (;;)
4457	{
4458	  if (eptr != NULL && wctx.sp >= (unsigned long) eptr)
4459	    {
4460	      ind = ind >= 2 ? ind - 2 : 0;
4461	      goto exit;
4462	    }
4463	  int ret = find_i386_ret_addr (&wctx, do_walk);
4464	  DprintfT (SP_DUMP_UNWIND, "stack_unwind (x86 walk):%d find_i386_ret_addr returns %d\n", __LINE__, ret);
4465	  if (ret == RA_FAILURE)
4466	    {
4467	      /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4468	      goto exit;
4469	    }
4470
4471	  if (ret == RA_END_OF_STACK)
4472	    goto exit;
4473#if WSIZE(32)
4474	  if (ret == RA_RT_SIGRETURN)
4475	    {
4476	      struct SigFrame
4477	      {
4478		unsigned long arg0;
4479		unsigned long arg1;
4480		unsigned long arg2;
4481	      } *sframe = (struct SigFrame*) wctx.sp;
4482	      ucontext_t *ncontext = (ucontext_t*) sframe->arg2;
4483	      wctx.pc = GET_PC (ncontext);
4484	      if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4485		{
4486		  /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4487		  goto exit;
4488		}
4489	      unsigned long nsp = GET_SP (ncontext);
4490	      /* Check the new stack pointer */
4491	      if (nsp <= sframe->arg2 || nsp > sframe->arg2 + sizeof (ucontext_t) + 1024)
4492		{
4493		  /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4494		  goto exit;
4495		}
4496	      wctx.sp = nsp;
4497	      wctx.fp = GET_FP (ncontext);
4498	      break;
4499	    }
4500	  else if (ret == RA_SIGRETURN)
4501	    {
4502	      struct sigcontext *sctx = (struct sigcontext*) wctx.sp;
4503	      wctx.pc = sctx->eip;
4504	      if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4505		{
4506		  /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4507		  goto exit;
4508		}
4509	      wctx.sp = sctx->esp;
4510	      wctx.fp = sctx->ebp;
4511	      break;
4512	    }
4513#elif WSIZE(64)
4514	  if (ret == RA_RT_SIGRETURN)
4515	    {
4516	      ucontext_t *ncontext = (ucontext_t*) wctx.sp;
4517	      wctx.pc = GET_PC (ncontext);
4518	      if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4519		{
4520		  /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4521		  goto exit;
4522		}
4523	      unsigned long nsp = GET_SP (ncontext);
4524	      /* Check the new stack pointer */
4525	      if (nsp <= wctx.sp || nsp > wctx.sp + sizeof (ucontext_t) + 1024)
4526		{
4527		  /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4528		  goto exit;
4529		}
4530	      wctx.sp = nsp;
4531	      wctx.fp = GET_FP (ncontext);
4532	      break;
4533	    }
4534#endif /* WSIZE() */
4535	  if (bptr != NULL && extra_frame && wctx.sp <= (unsigned long) bptr && ind < 2)
4536	    {
4537	      lbuf[0] = wctx.pc;
4538	      if (ind == 0)
4539		{
4540		  ind++;
4541		  if (ind >= lsize)
4542		    break;
4543		}
4544	    }
4545	  if (bptr == NULL || wctx.sp > (unsigned long) bptr)
4546	    {
4547	      lbuf[ind++] = wctx.pc;
4548	      if (ind >= lsize)
4549		goto exit;
4550	    }
4551	}
4552    }
4553
4554exit:
4555#if defined(DEBUG)
4556  if ((SP_DUMP_UNWIND & __collector_tracelevel) != 0)
4557    {
4558      DprintfT (SP_DUMP_UNWIND, "stack_unwind (x86 walk):%d found %d frames\n\n", __LINE__, ind);
4559      for (int i = 0; i < ind; i++)
4560	DprintfT (SP_DUMP_UNWIND, "  %3d:  0x%lx\n", i, (unsigned long) lbuf[i]);
4561    }
4562#endif
4563  dump_stack (__LINE__);
4564  if (ind >= lsize)
4565    {
4566      ind = lsize - 1;
4567      lbuf[ind++] = (unsigned long) SP_TRUNC_STACK_MARKER;
4568    }
4569  return ind * sizeof (long);
4570}
4571
4572#elif ARCH(Aarch64)
4573
4574static int
4575stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
4576{
4577  if (buf && bptr && eptr && context && size + mode > 0)
4578    getByteInstruction ((unsigned char *) eptr);
4579  int ind = 0;
4580  __u64 *lbuf = (void *) buf;
4581  int lsize = size / sizeof (__u64);
4582  __u64 pc = context->uc_mcontext.pc;
4583  __u64 sp = context->uc_mcontext.sp;
4584  __u64 stack_base;
4585  unsigned long tbgn = 0;
4586  unsigned long tend = 0;
4587
4588  unsigned long *sbase = (unsigned long*) __collector_tsd_get_by_key (unwind_key);
4589  if (sbase && *sbase > sp)
4590    stack_base = *sbase;
4591  else
4592    {
4593      stack_base = sp + 0x100000;
4594      if (stack_base < sp)  // overflow
4595	stack_base = (__u64) -1;
4596    }
4597  DprintfT (SP_DUMP_UNWIND,
4598    "unwind.c:%d stack_unwind %2d pc=0x%llx  sp=0x%llx  stack_base=0x%llx\n",
4599    __LINE__, ind, (unsigned long long) pc, (unsigned long long) sp,
4600    (unsigned long long) stack_base);
4601
4602  while (sp && pc)
4603  {
4604    DprintfT (SP_DUMP_UNWIND,
4605	"unwind.c:%d stack_unwind %2d pc=0x%llx  sp=0x%llx\n",
4606	__LINE__, ind, (unsigned long long) pc, (unsigned long long) sp);
4607//      Dl_info dlinfo;
4608//      if (!dladdr ((void *) pc, &dlinfo))
4609//	break;
4610//      DprintfT (SP_DUMP_UNWIND, "%2d: %llx <%s+%llu> (%s)\n",
4611//		ind, (unsigned long long) pc,
4612//		dlinfo.dli_sname ? dlinfo.dli_sname : "(?)",
4613//		(unsigned long long) pc - (unsigned long long) dlinfo.dli_saddr,
4614//		dlinfo.dli_fname);
4615      lbuf[ind++] = pc;
4616      if (ind >= lsize || sp >= stack_base || (sp & 15) != 0)
4617	break;
4618      if (pc < tbgn || pc >= tend)
4619	if (!__collector_check_segment ((unsigned long) pc, &tbgn, &tend, 0))
4620	  {
4621	    DprintfT (SP_DUMP_UNWIND,
4622		     "unwind.c:%d __collector_check_segment failed. sp=0x%lx\n",
4623		      __LINE__, (unsigned long) sp);
4624	    break;
4625	  }
4626      pc = ((__u64 *) sp)[1];
4627      __u64 old_sp = sp;
4628      sp = ((__u64 *) sp)[0];
4629      if (sp < old_sp)
4630	break;
4631    }
4632  if (ind >= lsize)
4633    {
4634      ind = lsize - 1;
4635      lbuf[ind++] = (__u64) SP_TRUNC_STACK_MARKER;
4636    }
4637  return ind * sizeof (__u64);
4638}
4639#endif /* ARCH() */
4640