vframeArray.cpp revision 1472:c18cbe5936b8
1/*
2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25# include "incls/_precompiled.incl"
26# include "incls/_vframeArray.cpp.incl"
27
28
29int vframeArrayElement:: bci(void) const { return (_bci == SynchronizationEntryBCI ? 0 : _bci); }
30
31void vframeArrayElement::free_monitors(JavaThread* jt) {
32  if (_monitors != NULL) {
33     MonitorChunk* chunk = _monitors;
34     _monitors = NULL;
35     jt->remove_monitor_chunk(chunk);
36     delete chunk;
37  }
38}
39
40void vframeArrayElement::fill_in(compiledVFrame* vf) {
41
42// Copy the information from the compiled vframe to the
43// interpreter frame we will be creating to replace vf
44
45  _method = vf->method();
46  _bci    = vf->raw_bci();
47  _reexecute = vf->should_reexecute();
48
49  int index;
50
51  // Get the monitors off-stack
52
53  GrowableArray<MonitorInfo*>* list = vf->monitors();
54  if (list->is_empty()) {
55    _monitors = NULL;
56  } else {
57
58    // Allocate monitor chunk
59    _monitors = new MonitorChunk(list->length());
60    vf->thread()->add_monitor_chunk(_monitors);
61
62    // Migrate the BasicLocks from the stack to the monitor chunk
63    for (index = 0; index < list->length(); index++) {
64      MonitorInfo* monitor = list->at(index);
65      assert(!monitor->owner_is_scalar_replaced(), "object should be reallocated already");
66      assert(monitor->owner() == NULL || (!monitor->owner()->is_unlocked() && !monitor->owner()->has_bias_pattern()), "object must be null or locked, and unbiased");
67      BasicObjectLock* dest = _monitors->at(index);
68      dest->set_obj(monitor->owner());
69      monitor->lock()->move_to(monitor->owner(), dest->lock());
70    }
71  }
72
73  // Convert the vframe locals and expressions to off stack
74  // values. Because we will not gc all oops can be converted to
75  // intptr_t (i.e. a stack slot) and we are fine. This is
76  // good since we are inside a HandleMark and the oops in our
77  // collection would go away between packing them here and
78  // unpacking them in unpack_on_stack.
79
80  // First the locals go off-stack
81
82  // FIXME this seems silly it creates a StackValueCollection
83  // in order to get the size to then copy them and
84  // convert the types to intptr_t size slots. Seems like it
85  // could do it in place... Still uses less memory than the
86  // old way though
87
88  StackValueCollection *locs = vf->locals();
89  _locals = new StackValueCollection(locs->size());
90  for(index = 0; index < locs->size(); index++) {
91    StackValue* value = locs->at(index);
92    switch(value->type()) {
93      case T_OBJECT:
94        assert(!value->obj_is_scalar_replaced(), "object should be reallocated already");
95        // preserve object type
96        _locals->add( new StackValue((intptr_t) (value->get_obj()()), T_OBJECT ));
97        break;
98      case T_CONFLICT:
99        // A dead local.  Will be initialized to null/zero.
100        _locals->add( new StackValue());
101        break;
102      case T_INT:
103        _locals->add( new StackValue(value->get_int()));
104        break;
105      default:
106        ShouldNotReachHere();
107    }
108  }
109
110  // Now the expressions off-stack
111  // Same silliness as above
112
113  StackValueCollection *exprs = vf->expressions();
114  _expressions = new StackValueCollection(exprs->size());
115  for(index = 0; index < exprs->size(); index++) {
116    StackValue* value = exprs->at(index);
117    switch(value->type()) {
118      case T_OBJECT:
119        assert(!value->obj_is_scalar_replaced(), "object should be reallocated already");
120        // preserve object type
121        _expressions->add( new StackValue((intptr_t) (value->get_obj()()), T_OBJECT ));
122        break;
123      case T_CONFLICT:
124        // A dead stack element.  Will be initialized to null/zero.
125        // This can occur when the compiler emits a state in which stack
126        // elements are known to be dead (because of an imminent exception).
127        _expressions->add( new StackValue());
128        break;
129      case T_INT:
130        _expressions->add( new StackValue(value->get_int()));
131        break;
132      default:
133        ShouldNotReachHere();
134    }
135  }
136}
137
138int unpack_counter = 0;
139
140void vframeArrayElement::unpack_on_stack(int callee_parameters,
141                                         int callee_locals,
142                                         frame* caller,
143                                         bool is_top_frame,
144                                         int exec_mode) {
145  JavaThread* thread = (JavaThread*) Thread::current();
146
147  // Look at bci and decide on bcp and continuation pc
148  address bcp;
149  // C++ interpreter doesn't need a pc since it will figure out what to do when it
150  // begins execution
151  address pc;
152  bool use_next_mdp = false; // true if we should use the mdp associated with the next bci
153                             // rather than the one associated with bcp
154  if (raw_bci() == SynchronizationEntryBCI) {
155    // We are deoptimizing while hanging in prologue code for synchronized method
156    bcp = method()->bcp_from(0); // first byte code
157    pc  = Interpreter::deopt_entry(vtos, 0); // step = 0 since we don't skip current bytecode
158  } else if (should_reexecute()) { //reexecute this bytecode
159    assert(is_top_frame, "reexecute allowed only for the top frame");
160    bcp = method()->bcp_from(bci());
161    pc  = Interpreter::deopt_reexecute_entry(method(), bcp);
162  } else {
163    bcp = method()->bcp_from(bci());
164    pc  = Interpreter::deopt_continue_after_entry(method(), bcp, callee_parameters, is_top_frame);
165    use_next_mdp = true;
166  }
167  assert(Bytecodes::is_defined(*bcp), "must be a valid bytecode");
168
169  // Monitorenter and pending exceptions:
170  //
171  // For Compiler2, there should be no pending exception when deoptimizing at monitorenter
172  // because there is no safepoint at the null pointer check (it is either handled explicitly
173  // or prior to the monitorenter) and asynchronous exceptions are not made "pending" by the
174  // runtime interface for the slow case (see JRT_ENTRY_FOR_MONITORENTER).  If an asynchronous
175  // exception was processed, the bytecode pointer would have to be extended one bytecode beyond
176  // the monitorenter to place it in the proper exception range.
177  //
178  // For Compiler1, deoptimization can occur while throwing a NullPointerException at monitorenter,
179  // in which case bcp should point to the monitorenter since it is within the exception's range.
180
181  assert(*bcp != Bytecodes::_monitorenter || is_top_frame, "a _monitorenter must be a top frame");
182  // TIERED Must know the compiler of the deoptee QQQ
183  COMPILER2_PRESENT(guarantee(*bcp != Bytecodes::_monitorenter || exec_mode != Deoptimization::Unpack_exception,
184                              "shouldn't get exception during monitorenter");)
185
186  int popframe_preserved_args_size_in_bytes = 0;
187  int popframe_preserved_args_size_in_words = 0;
188  if (is_top_frame) {
189    JvmtiThreadState *state = thread->jvmti_thread_state();
190    if (JvmtiExport::can_pop_frame() &&
191        (thread->has_pending_popframe() || thread->popframe_forcing_deopt_reexecution())) {
192      if (thread->has_pending_popframe()) {
193        // Pop top frame after deoptimization
194#ifndef CC_INTERP
195        pc = Interpreter::remove_activation_preserving_args_entry();
196#else
197        // Do an uncommon trap type entry. c++ interpreter will know
198        // to pop frame and preserve the args
199        pc = Interpreter::deopt_entry(vtos, 0);
200        use_next_mdp = false;
201#endif
202      } else {
203        // Reexecute invoke in top frame
204        pc = Interpreter::deopt_entry(vtos, 0);
205        use_next_mdp = false;
206        popframe_preserved_args_size_in_bytes = in_bytes(thread->popframe_preserved_args_size());
207        // Note: the PopFrame-related extension of the expression stack size is done in
208        // Deoptimization::fetch_unroll_info_helper
209        popframe_preserved_args_size_in_words = in_words(thread->popframe_preserved_args_size_in_words());
210      }
211    } else if (JvmtiExport::can_force_early_return() && state != NULL && state->is_earlyret_pending()) {
212      // Force early return from top frame after deoptimization
213#ifndef CC_INTERP
214      pc = Interpreter::remove_activation_early_entry(state->earlyret_tos());
215#else
216     // TBD: Need to implement ForceEarlyReturn for CC_INTERP (ia64)
217#endif
218    } else {
219      // Possibly override the previous pc computation of the top (youngest) frame
220      switch (exec_mode) {
221      case Deoptimization::Unpack_deopt:
222        // use what we've got
223        break;
224      case Deoptimization::Unpack_exception:
225        // exception is pending
226        pc = SharedRuntime::raw_exception_handler_for_return_address(thread, pc);
227        // [phh] We're going to end up in some handler or other, so it doesn't
228        // matter what mdp we point to.  See exception_handler_for_exception()
229        // in interpreterRuntime.cpp.
230        break;
231      case Deoptimization::Unpack_uncommon_trap:
232      case Deoptimization::Unpack_reexecute:
233        // redo last byte code
234        pc  = Interpreter::deopt_entry(vtos, 0);
235        use_next_mdp = false;
236        break;
237      default:
238        ShouldNotReachHere();
239      }
240    }
241  }
242
243  // Setup the interpreter frame
244
245  assert(method() != NULL, "method must exist");
246  int temps = expressions()->size();
247
248  int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors();
249
250  Interpreter::layout_activation(method(),
251                                 temps + callee_parameters,
252                                 popframe_preserved_args_size_in_words,
253                                 locks,
254                                 callee_parameters,
255                                 callee_locals,
256                                 caller,
257                                 iframe(),
258                                 is_top_frame);
259
260  // Update the pc in the frame object and overwrite the temporary pc
261  // we placed in the skeletal frame now that we finally know the
262  // exact interpreter address we should use.
263
264  _frame.patch_pc(thread, pc);
265
266  assert (!method()->is_synchronized() || locks > 0, "synchronized methods must have monitors");
267
268  BasicObjectLock* top = iframe()->interpreter_frame_monitor_begin();
269  for (int index = 0; index < locks; index++) {
270    top = iframe()->previous_monitor_in_interpreter_frame(top);
271    BasicObjectLock* src = _monitors->at(index);
272    top->set_obj(src->obj());
273    src->lock()->move_to(src->obj(), top->lock());
274  }
275  if (ProfileInterpreter) {
276    iframe()->interpreter_frame_set_mdx(0); // clear out the mdp.
277  }
278  iframe()->interpreter_frame_set_bcx((intptr_t)bcp); // cannot use bcp because frame is not initialized yet
279  if (ProfileInterpreter) {
280    methodDataOop mdo = method()->method_data();
281    if (mdo != NULL) {
282      int bci = iframe()->interpreter_frame_bci();
283      if (use_next_mdp) ++bci;
284      address mdp = mdo->bci_to_dp(bci);
285      iframe()->interpreter_frame_set_mdp(mdp);
286    }
287  }
288
289  // Unpack expression stack
290  // If this is an intermediate frame (i.e. not top frame) then this
291  // only unpacks the part of the expression stack not used by callee
292  // as parameters. The callee parameters are unpacked as part of the
293  // callee locals.
294  int i;
295  for(i = 0; i < expressions()->size(); i++) {
296    StackValue *value = expressions()->at(i);
297    intptr_t*   addr  = iframe()->interpreter_frame_expression_stack_at(i);
298    switch(value->type()) {
299      case T_INT:
300        *addr = value->get_int();
301        break;
302      case T_OBJECT:
303        *addr = value->get_int(T_OBJECT);
304        break;
305      case T_CONFLICT:
306        // A dead stack slot.  Initialize to null in case it is an oop.
307        *addr = NULL_WORD;
308        break;
309      default:
310        ShouldNotReachHere();
311    }
312  }
313
314
315  // Unpack the locals
316  for(i = 0; i < locals()->size(); i++) {
317    StackValue *value = locals()->at(i);
318    intptr_t* addr  = iframe()->interpreter_frame_local_at(i);
319    switch(value->type()) {
320      case T_INT:
321        *addr = value->get_int();
322        break;
323      case T_OBJECT:
324        *addr = value->get_int(T_OBJECT);
325        break;
326      case T_CONFLICT:
327        // A dead location. If it is an oop then we need a NULL to prevent GC from following it
328        *addr = NULL_WORD;
329        break;
330      default:
331        ShouldNotReachHere();
332    }
333  }
334
335  if (is_top_frame && JvmtiExport::can_pop_frame() && thread->popframe_forcing_deopt_reexecution()) {
336    // An interpreted frame was popped but it returns to a deoptimized
337    // frame. The incoming arguments to the interpreted activation
338    // were preserved in thread-local storage by the
339    // remove_activation_preserving_args_entry in the interpreter; now
340    // we put them back into the just-unpacked interpreter frame.
341    // Note that this assumes that the locals arena grows toward lower
342    // addresses.
343    if (popframe_preserved_args_size_in_words != 0) {
344      void* saved_args = thread->popframe_preserved_args();
345      assert(saved_args != NULL, "must have been saved by interpreter");
346#ifdef ASSERT
347      assert(popframe_preserved_args_size_in_words <=
348             iframe()->interpreter_frame_expression_stack_size()*Interpreter::stackElementWords,
349             "expression stack size should have been extended");
350#endif // ASSERT
351      int top_element = iframe()->interpreter_frame_expression_stack_size()-1;
352      intptr_t* base;
353      if (frame::interpreter_frame_expression_stack_direction() < 0) {
354        base = iframe()->interpreter_frame_expression_stack_at(top_element);
355      } else {
356        base = iframe()->interpreter_frame_expression_stack();
357      }
358      Copy::conjoint_bytes(saved_args,
359                           base,
360                           popframe_preserved_args_size_in_bytes);
361      thread->popframe_free_preserved_args();
362    }
363  }
364
365#ifndef PRODUCT
366  if (TraceDeoptimization && Verbose) {
367    ttyLocker ttyl;
368    tty->print_cr("[%d Interpreted Frame]", ++unpack_counter);
369    iframe()->print_on(tty);
370    RegisterMap map(thread);
371    vframe* f = vframe::new_vframe(iframe(), &map, thread);
372    f->print();
373
374    tty->print_cr("locals size     %d", locals()->size());
375    tty->print_cr("expression size %d", expressions()->size());
376
377    method()->print_value();
378    tty->cr();
379    // method()->print_codes();
380  } else if (TraceDeoptimization) {
381    tty->print("     ");
382    method()->print_value();
383    Bytecodes::Code code = Bytecodes::java_code_at(bcp);
384    int bci = method()->bci_from(bcp);
385    tty->print(" - %s", Bytecodes::name(code));
386    tty->print(" @ bci %d ", bci);
387    tty->print_cr("sp = " PTR_FORMAT, iframe()->sp());
388  }
389#endif // PRODUCT
390
391  // The expression stack and locals are in the resource area don't leave
392  // a dangling pointer in the vframeArray we leave around for debug
393  // purposes
394
395  _locals = _expressions = NULL;
396
397}
398
399int vframeArrayElement::on_stack_size(int callee_parameters,
400                                      int callee_locals,
401                                      bool is_top_frame,
402                                      int popframe_extra_stack_expression_els) const {
403  assert(method()->max_locals() == locals()->size(), "just checking");
404  int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors();
405  int temps = expressions()->size();
406  return Interpreter::size_activation(method(),
407                                      temps + callee_parameters,
408                                      popframe_extra_stack_expression_els,
409                                      locks,
410                                      callee_parameters,
411                                      callee_locals,
412                                      is_top_frame);
413}
414
415
416
417vframeArray* vframeArray::allocate(JavaThread* thread, int frame_size, GrowableArray<compiledVFrame*>* chunk,
418                                   RegisterMap *reg_map, frame sender, frame caller, frame self) {
419
420  // Allocate the vframeArray
421  vframeArray * result = (vframeArray*) AllocateHeap(sizeof(vframeArray) + // fixed part
422                                                     sizeof(vframeArrayElement) * (chunk->length() - 1), // variable part
423                                                     "vframeArray::allocate");
424  result->_frames = chunk->length();
425  result->_owner_thread = thread;
426  result->_sender = sender;
427  result->_caller = caller;
428  result->_original = self;
429  result->set_unroll_block(NULL); // initialize it
430  result->fill_in(thread, frame_size, chunk, reg_map);
431  return result;
432}
433
434void vframeArray::fill_in(JavaThread* thread,
435                          int frame_size,
436                          GrowableArray<compiledVFrame*>* chunk,
437                          const RegisterMap *reg_map) {
438  // Set owner first, it is used when adding monitor chunks
439
440  _frame_size = frame_size;
441  for(int i = 0; i < chunk->length(); i++) {
442    element(i)->fill_in(chunk->at(i));
443  }
444
445  // Copy registers for callee-saved registers
446  if (reg_map != NULL) {
447    for(int i = 0; i < RegisterMap::reg_count; i++) {
448#ifdef AMD64
449      // The register map has one entry for every int (32-bit value), so
450      // 64-bit physical registers have two entries in the map, one for
451      // each half.  Ignore the high halves of 64-bit registers, just like
452      // frame::oopmapreg_to_location does.
453      //
454      // [phh] FIXME: this is a temporary hack!  This code *should* work
455      // correctly w/o this hack, possibly by changing RegisterMap::pd_location
456      // in frame_amd64.cpp and the values of the phantom high half registers
457      // in amd64.ad.
458      //      if (VMReg::Name(i) < SharedInfo::stack0 && is_even(i)) {
459        intptr_t* src = (intptr_t*) reg_map->location(VMRegImpl::as_VMReg(i));
460        _callee_registers[i] = src != NULL ? *src : NULL_WORD;
461        //      } else {
462        //      jint* src = (jint*) reg_map->location(VMReg::Name(i));
463        //      _callee_registers[i] = src != NULL ? *src : NULL_WORD;
464        //      }
465#else
466      jint* src = (jint*) reg_map->location(VMRegImpl::as_VMReg(i));
467      _callee_registers[i] = src != NULL ? *src : NULL_WORD;
468#endif
469      if (src == NULL) {
470        set_location_valid(i, false);
471      } else {
472        set_location_valid(i, true);
473        jint* dst = (jint*) register_location(i);
474        *dst = *src;
475      }
476    }
477  }
478}
479
480void vframeArray::unpack_to_stack(frame &unpack_frame, int exec_mode) {
481  // stack picture
482  //   unpack_frame
483  //   [new interpreter frames ] (frames are skeletal but walkable)
484  //   caller_frame
485  //
486  //  This routine fills in the missing data for the skeletal interpreter frames
487  //  in the above picture.
488
489  // Find the skeletal interpreter frames to unpack into
490  RegisterMap map(JavaThread::current(), false);
491  // Get the youngest frame we will unpack (last to be unpacked)
492  frame me = unpack_frame.sender(&map);
493  int index;
494  for (index = 0; index < frames(); index++ ) {
495    *element(index)->iframe() = me;
496    // Get the caller frame (possibly skeletal)
497    me = me.sender(&map);
498  }
499
500  frame caller_frame = me;
501
502  // Do the unpacking of interpreter frames; the frame at index 0 represents the top activation, so it has no callee
503
504  // Unpack the frames from the oldest (frames() -1) to the youngest (0)
505
506  for (index = frames() - 1; index >= 0 ; index--) {
507    int callee_parameters = index == 0 ? 0 : element(index-1)->method()->size_of_parameters();
508    int callee_locals     = index == 0 ? 0 : element(index-1)->method()->max_locals();
509    element(index)->unpack_on_stack(callee_parameters,
510                                    callee_locals,
511                                    &caller_frame,
512                                    index == 0,
513                                    exec_mode);
514    if (index == frames() - 1) {
515      Deoptimization::unwind_callee_save_values(element(index)->iframe(), this);
516    }
517    caller_frame = *element(index)->iframe();
518  }
519
520
521  deallocate_monitor_chunks();
522}
523
524void vframeArray::deallocate_monitor_chunks() {
525  JavaThread* jt = JavaThread::current();
526  for (int index = 0; index < frames(); index++ ) {
527     element(index)->free_monitors(jt);
528  }
529}
530
531#ifndef PRODUCT
532
533bool vframeArray::structural_compare(JavaThread* thread, GrowableArray<compiledVFrame*>* chunk) {
534  if (owner_thread() != thread) return false;
535  int index = 0;
536#if 0 // FIXME can't do this comparison
537
538  // Compare only within vframe array.
539  for (deoptimizedVFrame* vf = deoptimizedVFrame::cast(vframe_at(first_index())); vf; vf = vf->deoptimized_sender_or_null()) {
540    if (index >= chunk->length() || !vf->structural_compare(chunk->at(index))) return false;
541    index++;
542  }
543  if (index != chunk->length()) return false;
544#endif
545
546  return true;
547}
548
549#endif
550
551address vframeArray::register_location(int i) const {
552  assert(0 <= i && i < RegisterMap::reg_count, "index out of bounds");
553  return (address) & _callee_registers[i];
554}
555
556
557#ifndef PRODUCT
558
559// Printing
560
561// Note: we cannot have print_on as const, as we allocate inside the method
562void vframeArray::print_on_2(outputStream* st)  {
563  st->print_cr(" - sp: " INTPTR_FORMAT, sp());
564  st->print(" - thread: ");
565  Thread::current()->print();
566  st->print_cr(" - frame size: %d", frame_size());
567  for (int index = 0; index < frames() ; index++ ) {
568    element(index)->print(st);
569  }
570}
571
572void vframeArrayElement::print(outputStream* st) {
573  st->print_cr(" - interpreter_frame -> sp: " INTPTR_FORMAT, iframe()->sp());
574}
575
576void vframeArray::print_value_on(outputStream* st) const {
577  st->print_cr("vframeArray [%d] ", frames());
578}
579
580
581#endif
582