stubGenerator_x86_32.cpp revision 337:9ee9cf798b59
1249259Sdim/*
2249259Sdim * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
3249259Sdim * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4249259Sdim *
5249259Sdim * This code is free software; you can redistribute it and/or modify it
6249259Sdim * under the terms of the GNU General Public License version 2 only, as
7249259Sdim * published by the Free Software Foundation.
8249259Sdim *
9249259Sdim * This code is distributed in the hope that it will be useful, but WITHOUT
10249259Sdim * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11249259Sdim * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12249259Sdim * version 2 for more details (a copy is included in the LICENSE file that
13249259Sdim * accompanied this code).
14249259Sdim *
15249259Sdim * You should have received a copy of the GNU General Public License version
16249259Sdim * 2 along with this work; if not, write to the Free Software Foundation,
17249259Sdim * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18249259Sdim *
19249259Sdim * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20249259Sdim * CA 95054 USA or visit www.sun.com if you need additional information or
21249259Sdim * have any questions.
22249259Sdim *
23249259Sdim */
24
25#include "incls/_precompiled.incl"
26#include "incls/_stubGenerator_x86_32.cpp.incl"
27
28// Declaration and definition of StubGenerator (no .hpp file).
29// For a more detailed description of the stub routine structure
30// see the comment in stubRoutines.hpp
31
32#define __ _masm->
33#define a__ ((Assembler*)_masm)->
34
35#ifdef PRODUCT
36#define BLOCK_COMMENT(str) /* nothing */
37#else
38#define BLOCK_COMMENT(str) __ block_comment(str)
39#endif
40
41#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
42
43const int MXCSR_MASK  = 0xFFC0;  // Mask out any pending exceptions
44const int FPU_CNTRL_WRD_MASK = 0xFFFF;
45
46// -------------------------------------------------------------------------------------------------------------------------
47// Stub Code definitions
48
49static address handle_unsafe_access() {
50  JavaThread* thread = JavaThread::current();
51  address pc  = thread->saved_exception_pc();
52  // pc is the instruction which we must emulate
53  // doing a no-op is fine:  return garbage from the load
54  // therefore, compute npc
55  address npc = Assembler::locate_next_instruction(pc);
56
57  // request an async exception
58  thread->set_pending_unsafe_access_error();
59
60  // return address of next instruction to execute
61  return npc;
62}
63
64class StubGenerator: public StubCodeGenerator {
65 private:
66
67#ifdef PRODUCT
68#define inc_counter_np(counter) (0)
69#else
70  void inc_counter_np_(int& counter) {
71    __ incrementl(ExternalAddress((address)&counter));
72  }
73#define inc_counter_np(counter) \
74  BLOCK_COMMENT("inc_counter " #counter); \
75  inc_counter_np_(counter);
76#endif //PRODUCT
77
78  void inc_copy_counter_np(BasicType t) {
79#ifndef PRODUCT
80    switch (t) {
81    case T_BYTE:    inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); return;
82    case T_SHORT:   inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); return;
83    case T_INT:     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); return;
84    case T_LONG:    inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); return;
85    case T_OBJECT:  inc_counter_np(SharedRuntime::_oop_array_copy_ctr); return;
86    }
87    ShouldNotReachHere();
88#endif //PRODUCT
89  }
90
91  //------------------------------------------------------------------------------------------------------------------------
92  // Call stubs are used to call Java from C
93  //
94  //    [ return_from_Java     ] <--- rsp
95  //    [ argument word n      ]
96  //      ...
97  // -N [ argument word 1      ]
98  // -7 [ Possible padding for stack alignment ]
99  // -6 [ Possible padding for stack alignment ]
100  // -5 [ Possible padding for stack alignment ]
101  // -4 [ mxcsr save           ] <--- rsp_after_call
102  // -3 [ saved rbx,            ]
103  // -2 [ saved rsi            ]
104  // -1 [ saved rdi            ]
105  //  0 [ saved rbp,            ] <--- rbp,
106  //  1 [ return address       ]
107  //  2 [ ptr. to call wrapper ]
108  //  3 [ result               ]
109  //  4 [ result_type          ]
110  //  5 [ method               ]
111  //  6 [ entry_point          ]
112  //  7 [ parameters           ]
113  //  8 [ parameter_size       ]
114  //  9 [ thread               ]
115
116
117  address generate_call_stub(address& return_address) {
118    StubCodeMark mark(this, "StubRoutines", "call_stub");
119    address start = __ pc();
120
121    // stub code parameters / addresses
122    assert(frame::entry_frame_call_wrapper_offset == 2, "adjust this code");
123    bool  sse_save = false;
124    const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_catch_exception()!
125    const int     locals_count_in_bytes  (4*wordSize);
126    const Address mxcsr_save    (rbp, -4 * wordSize);
127    const Address saved_rbx     (rbp, -3 * wordSize);
128    const Address saved_rsi     (rbp, -2 * wordSize);
129    const Address saved_rdi     (rbp, -1 * wordSize);
130    const Address result        (rbp,  3 * wordSize);
131    const Address result_type   (rbp,  4 * wordSize);
132    const Address method        (rbp,  5 * wordSize);
133    const Address entry_point   (rbp,  6 * wordSize);
134    const Address parameters    (rbp,  7 * wordSize);
135    const Address parameter_size(rbp,  8 * wordSize);
136    const Address thread        (rbp,  9 * wordSize); // same as in generate_catch_exception()!
137    sse_save =  UseSSE > 0;
138
139    // stub code
140    __ enter();
141    __ movptr(rcx, parameter_size);              // parameter counter
142    __ shlptr(rcx, Interpreter::logStackElementSize()); // convert parameter count to bytes
143    __ addptr(rcx, locals_count_in_bytes);       // reserve space for register saves
144    __ subptr(rsp, rcx);
145    __ andptr(rsp, -(StackAlignmentInBytes));    // Align stack
146
147    // save rdi, rsi, & rbx, according to C calling conventions
148    __ movptr(saved_rdi, rdi);
149    __ movptr(saved_rsi, rsi);
150    __ movptr(saved_rbx, rbx);
151    // save and initialize %mxcsr
152    if (sse_save) {
153      Label skip_ldmx;
154      __ stmxcsr(mxcsr_save);
155      __ movl(rax, mxcsr_save);
156      __ andl(rax, MXCSR_MASK);    // Only check control and mask bits
157      ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
158      __ cmp32(rax, mxcsr_std);
159      __ jcc(Assembler::equal, skip_ldmx);
160      __ ldmxcsr(mxcsr_std);
161      __ bind(skip_ldmx);
162    }
163
164    // make sure the control word is correct.
165    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
166
167#ifdef ASSERT
168    // make sure we have no pending exceptions
169    { Label L;
170      __ movptr(rcx, thread);
171      __ cmpptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
172      __ jcc(Assembler::equal, L);
173      __ stop("StubRoutines::call_stub: entered with pending exception");
174      __ bind(L);
175    }
176#endif
177
178    // pass parameters if any
179    BLOCK_COMMENT("pass parameters if any");
180    Label parameters_done;
181    __ movl(rcx, parameter_size);  // parameter counter
182    __ testl(rcx, rcx);
183    __ jcc(Assembler::zero, parameters_done);
184
185    // parameter passing loop
186
187    Label loop;
188    // Copy Java parameters in reverse order (receiver last)
189    // Note that the argument order is inverted in the process
190    // source is rdx[rcx: N-1..0]
191    // dest   is rsp[rbx: 0..N-1]
192
193    __ movptr(rdx, parameters);          // parameter pointer
194    __ xorptr(rbx, rbx);
195
196    __ BIND(loop);
197    if (TaggedStackInterpreter) {
198      __ movptr(rax, Address(rdx, rcx, Interpreter::stackElementScale(),
199                      -2*wordSize));                          // get tag
200      __ movptr(Address(rsp, rbx, Interpreter::stackElementScale(),
201                      Interpreter::expr_tag_offset_in_bytes(0)), rax);     // store tag
202    }
203
204    // get parameter
205    __ movptr(rax, Address(rdx, rcx, Interpreter::stackElementScale(), -wordSize));
206    __ movptr(Address(rsp, rbx, Interpreter::stackElementScale(),
207                    Interpreter::expr_offset_in_bytes(0)), rax);          // store parameter
208    __ increment(rbx);
209    __ decrement(rcx);
210    __ jcc(Assembler::notZero, loop);
211
212    // call Java function
213    __ BIND(parameters_done);
214    __ movptr(rbx, method);           // get methodOop
215    __ movptr(rax, entry_point);      // get entry_point
216    __ mov(rsi, rsp);                 // set sender sp
217    BLOCK_COMMENT("call Java function");
218    __ call(rax);
219
220    BLOCK_COMMENT("call_stub_return_address:");
221    return_address = __ pc();
222
223    Label common_return;
224
225    __ BIND(common_return);
226
227    // store result depending on type
228    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
229    __ movptr(rdi, result);
230    Label is_long, is_float, is_double, exit;
231    __ movl(rsi, result_type);
232    __ cmpl(rsi, T_LONG);
233    __ jcc(Assembler::equal, is_long);
234    __ cmpl(rsi, T_FLOAT);
235    __ jcc(Assembler::equal, is_float);
236    __ cmpl(rsi, T_DOUBLE);
237    __ jcc(Assembler::equal, is_double);
238
239    // handle T_INT case
240    __ movl(Address(rdi, 0), rax);
241    __ BIND(exit);
242
243    // check that FPU stack is empty
244    __ verify_FPU(0, "generate_call_stub");
245
246    // pop parameters
247    __ lea(rsp, rsp_after_call);
248
249    // restore %mxcsr
250    if (sse_save) {
251      __ ldmxcsr(mxcsr_save);
252    }
253
254    // restore rdi, rsi and rbx,
255    __ movptr(rbx, saved_rbx);
256    __ movptr(rsi, saved_rsi);
257    __ movptr(rdi, saved_rdi);
258    __ addptr(rsp, 4*wordSize);
259
260    // return
261    __ pop(rbp);
262    __ ret(0);
263
264    // handle return types different from T_INT
265    __ BIND(is_long);
266    __ movl(Address(rdi, 0 * wordSize), rax);
267    __ movl(Address(rdi, 1 * wordSize), rdx);
268    __ jmp(exit);
269
270    __ BIND(is_float);
271    // interpreter uses xmm0 for return values
272    if (UseSSE >= 1) {
273      __ movflt(Address(rdi, 0), xmm0);
274    } else {
275      __ fstp_s(Address(rdi, 0));
276    }
277    __ jmp(exit);
278
279    __ BIND(is_double);
280    // interpreter uses xmm0 for return values
281    if (UseSSE >= 2) {
282      __ movdbl(Address(rdi, 0), xmm0);
283    } else {
284      __ fstp_d(Address(rdi, 0));
285    }
286    __ jmp(exit);
287
288    // If we call compiled code directly from the call stub we will
289    // need to adjust the return back to the call stub to a specialized
290    // piece of code that can handle compiled results and cleaning the fpu
291    // stack. compiled code will be set to return here instead of the
292    // return above that handles interpreter returns.
293
294    BLOCK_COMMENT("call_stub_compiled_return:");
295    StubRoutines::x86::set_call_stub_compiled_return( __ pc());
296
297#ifdef COMPILER2
298    if (UseSSE >= 2) {
299      __ verify_FPU(0, "call_stub_compiled_return");
300    } else {
301      for (int i = 1; i < 8; i++) {
302        __ ffree(i);
303      }
304
305      // UseSSE <= 1 so double result should be left on TOS
306      __ movl(rsi, result_type);
307      __ cmpl(rsi, T_DOUBLE);
308      __ jcc(Assembler::equal, common_return);
309      if (UseSSE == 0) {
310        // UseSSE == 0 so float result should be left on TOS
311        __ cmpl(rsi, T_FLOAT);
312        __ jcc(Assembler::equal, common_return);
313      }
314      __ ffree(0);
315    }
316#endif /* COMPILER2 */
317    __ jmp(common_return);
318
319    return start;
320  }
321
322
323  //------------------------------------------------------------------------------------------------------------------------
324  // Return point for a Java call if there's an exception thrown in Java code.
325  // The exception is caught and transformed into a pending exception stored in
326  // JavaThread that can be tested from within the VM.
327  //
328  // Note: Usually the parameters are removed by the callee. In case of an exception
329  //       crossing an activation frame boundary, that is not the case if the callee
330  //       is compiled code => need to setup the rsp.
331  //
332  // rax,: exception oop
333
334  address generate_catch_exception() {
335    StubCodeMark mark(this, "StubRoutines", "catch_exception");
336    const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_call_stub()!
337    const Address thread        (rbp,  9 * wordSize); // same as in generate_call_stub()!
338    address start = __ pc();
339
340    // get thread directly
341    __ movptr(rcx, thread);
342#ifdef ASSERT
343    // verify that threads correspond
344    { Label L;
345      __ get_thread(rbx);
346      __ cmpptr(rbx, rcx);
347      __ jcc(Assembler::equal, L);
348      __ stop("StubRoutines::catch_exception: threads must correspond");
349      __ bind(L);
350    }
351#endif
352    // set pending exception
353    __ verify_oop(rax);
354    __ movptr(Address(rcx, Thread::pending_exception_offset()), rax          );
355    __ lea(Address(rcx, Thread::exception_file_offset   ()),
356           ExternalAddress((address)__FILE__));
357    __ movl(Address(rcx, Thread::exception_line_offset   ()), __LINE__ );
358    // complete return to VM
359    assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
360    __ jump(RuntimeAddress(StubRoutines::_call_stub_return_address));
361
362    return start;
363  }
364
365
366  //------------------------------------------------------------------------------------------------------------------------
367  // Continuation point for runtime calls returning with a pending exception.
368  // The pending exception check happened in the runtime or native call stub.
369  // The pending exception in Thread is converted into a Java-level exception.
370  //
371  // Contract with Java-level exception handlers:
372  // rax,: exception
373  // rdx: throwing pc
374  //
375  // NOTE: At entry of this stub, exception-pc must be on stack !!
376
377  address generate_forward_exception() {
378    StubCodeMark mark(this, "StubRoutines", "forward exception");
379    address start = __ pc();
380
381    // Upon entry, the sp points to the return address returning into Java
382    // (interpreted or compiled) code; i.e., the return address becomes the
383    // throwing pc.
384    //
385    // Arguments pushed before the runtime call are still on the stack but
386    // the exception handler will reset the stack pointer -> ignore them.
387    // A potential result in registers can be ignored as well.
388
389#ifdef ASSERT
390    // make sure this code is only executed if there is a pending exception
391    { Label L;
392      __ get_thread(rcx);
393      __ cmpptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
394      __ jcc(Assembler::notEqual, L);
395      __ stop("StubRoutines::forward exception: no pending exception (1)");
396      __ bind(L);
397    }
398#endif
399
400    // compute exception handler into rbx,
401    __ movptr(rax, Address(rsp, 0));
402    BLOCK_COMMENT("call exception_handler_for_return_address");
403    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), rax);
404    __ mov(rbx, rax);
405
406    // setup rax, & rdx, remove return address & clear pending exception
407    __ get_thread(rcx);
408    __ pop(rdx);
409    __ movptr(rax, Address(rcx, Thread::pending_exception_offset()));
410    __ movptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
411
412#ifdef ASSERT
413    // make sure exception is set
414    { Label L;
415      __ testptr(rax, rax);
416      __ jcc(Assembler::notEqual, L);
417      __ stop("StubRoutines::forward exception: no pending exception (2)");
418      __ bind(L);
419    }
420#endif
421
422    // continue at exception handler (return address removed)
423    // rax,: exception
424    // rbx,: exception handler
425    // rdx: throwing pc
426    __ verify_oop(rax);
427    __ jmp(rbx);
428
429    return start;
430  }
431
432
433  //----------------------------------------------------------------------------------------------------
434  // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest)
435  //
436  // xchg exists as far back as 8086, lock needed for MP only
437  // Stack layout immediately after call:
438  //
439  // 0 [ret addr ] <--- rsp
440  // 1 [  ex     ]
441  // 2 [  dest   ]
442  //
443  // Result:   *dest <- ex, return (old *dest)
444  //
445  // Note: win32 does not currently use this code
446
447  address generate_atomic_xchg() {
448    StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
449    address start = __ pc();
450
451    __ push(rdx);
452    Address exchange(rsp, 2 * wordSize);
453    Address dest_addr(rsp, 3 * wordSize);
454    __ movl(rax, exchange);
455    __ movptr(rdx, dest_addr);
456    __ xchgl(rax, Address(rdx, 0));
457    __ pop(rdx);
458    __ ret(0);
459
460    return start;
461  }
462
463  //----------------------------------------------------------------------------------------------------
464  // Support for void verify_mxcsr()
465  //
466  // This routine is used with -Xcheck:jni to verify that native
467  // JNI code does not return to Java code without restoring the
468  // MXCSR register to our expected state.
469
470
471  address generate_verify_mxcsr() {
472    StubCodeMark mark(this, "StubRoutines", "verify_mxcsr");
473    address start = __ pc();
474
475    const Address mxcsr_save(rsp, 0);
476
477    if (CheckJNICalls && UseSSE > 0 ) {
478      Label ok_ret;
479      ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
480      __ push(rax);
481      __ subptr(rsp, wordSize);      // allocate a temp location
482      __ stmxcsr(mxcsr_save);
483      __ movl(rax, mxcsr_save);
484      __ andl(rax, MXCSR_MASK);
485      __ cmp32(rax, mxcsr_std);
486      __ jcc(Assembler::equal, ok_ret);
487
488      __ warn("MXCSR changed by native JNI code.");
489
490      __ ldmxcsr(mxcsr_std);
491
492      __ bind(ok_ret);
493      __ addptr(rsp, wordSize);
494      __ pop(rax);
495    }
496
497    __ ret(0);
498
499    return start;
500  }
501
502
503  //---------------------------------------------------------------------------
504  // Support for void verify_fpu_cntrl_wrd()
505  //
506  // This routine is used with -Xcheck:jni to verify that native
507  // JNI code does not return to Java code without restoring the
508  // FP control word to our expected state.
509
510  address generate_verify_fpu_cntrl_wrd() {
511    StubCodeMark mark(this, "StubRoutines", "verify_spcw");
512    address start = __ pc();
513
514    const Address fpu_cntrl_wrd_save(rsp, 0);
515
516    if (CheckJNICalls) {
517      Label ok_ret;
518      __ push(rax);
519      __ subptr(rsp, wordSize);      // allocate a temp location
520      __ fnstcw(fpu_cntrl_wrd_save);
521      __ movl(rax, fpu_cntrl_wrd_save);
522      __ andl(rax, FPU_CNTRL_WRD_MASK);
523      ExternalAddress fpu_std(StubRoutines::addr_fpu_cntrl_wrd_std());
524      __ cmp32(rax, fpu_std);
525      __ jcc(Assembler::equal, ok_ret);
526
527      __ warn("Floating point control word changed by native JNI code.");
528
529      __ fldcw(fpu_std);
530
531      __ bind(ok_ret);
532      __ addptr(rsp, wordSize);
533      __ pop(rax);
534    }
535
536    __ ret(0);
537
538    return start;
539  }
540
541  //---------------------------------------------------------------------------
542  // Wrapper for slow-case handling of double-to-integer conversion
543  // d2i or f2i fast case failed either because it is nan or because
544  // of under/overflow.
545  // Input:  FPU TOS: float value
546  // Output: rax, (rdx): integer (long) result
547
548  address generate_d2i_wrapper(BasicType t, address fcn) {
549    StubCodeMark mark(this, "StubRoutines", "d2i_wrapper");
550    address start = __ pc();
551
552  // Capture info about frame layout
553  enum layout { FPUState_off         = 0,
554                rbp_off              = FPUStateSizeInWords,
555                rdi_off,
556                rsi_off,
557                rcx_off,
558                rbx_off,
559                saved_argument_off,
560                saved_argument_off2, // 2nd half of double
561                framesize
562  };
563
564  assert(FPUStateSizeInWords == 27, "update stack layout");
565
566    // Save outgoing argument to stack across push_FPU_state()
567    __ subptr(rsp, wordSize * 2);
568    __ fstp_d(Address(rsp, 0));
569
570    // Save CPU & FPU state
571    __ push(rbx);
572    __ push(rcx);
573    __ push(rsi);
574    __ push(rdi);
575    __ push(rbp);
576    __ push_FPU_state();
577
578    // push_FPU_state() resets the FP top of stack
579    // Load original double into FP top of stack
580    __ fld_d(Address(rsp, saved_argument_off * wordSize));
581    // Store double into stack as outgoing argument
582    __ subptr(rsp, wordSize*2);
583    __ fst_d(Address(rsp, 0));
584
585    // Prepare FPU for doing math in C-land
586    __ empty_FPU_stack();
587    // Call the C code to massage the double.  Result in EAX
588    if (t == T_INT)
589      { BLOCK_COMMENT("SharedRuntime::d2i"); }
590    else if (t == T_LONG)
591      { BLOCK_COMMENT("SharedRuntime::d2l"); }
592    __ call_VM_leaf( fcn, 2 );
593
594    // Restore CPU & FPU state
595    __ pop_FPU_state();
596    __ pop(rbp);
597    __ pop(rdi);
598    __ pop(rsi);
599    __ pop(rcx);
600    __ pop(rbx);
601    __ addptr(rsp, wordSize * 2);
602
603    __ ret(0);
604
605    return start;
606  }
607
608
609  //---------------------------------------------------------------------------
610  // The following routine generates a subroutine to throw an asynchronous
611  // UnknownError when an unsafe access gets a fault that could not be
612  // reasonably prevented by the programmer.  (Example: SIGBUS/OBJERR.)
613  address generate_handler_for_unsafe_access() {
614    StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
615    address start = __ pc();
616
617    __ push(0);                       // hole for return address-to-be
618    __ pusha();                       // push registers
619    Address next_pc(rsp, RegisterImpl::number_of_registers * BytesPerWord);
620    BLOCK_COMMENT("call handle_unsafe_access");
621    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, handle_unsafe_access)));
622    __ movptr(next_pc, rax);          // stuff next address
623    __ popa();
624    __ ret(0);                        // jump to next address
625
626    return start;
627  }
628
629
630  //----------------------------------------------------------------------------------------------------
631  // Non-destructive plausibility checks for oops
632
633  address generate_verify_oop() {
634    StubCodeMark mark(this, "StubRoutines", "verify_oop");
635    address start = __ pc();
636
637    // Incoming arguments on stack after saving rax,:
638    //
639    // [tos    ]: saved rdx
640    // [tos + 1]: saved EFLAGS
641    // [tos + 2]: return address
642    // [tos + 3]: char* error message
643    // [tos + 4]: oop   object to verify
644    // [tos + 5]: saved rax, - saved by caller and bashed
645
646    Label exit, error;
647    __ pushf();
648    __ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
649    __ push(rdx);                                // save rdx
650    // make sure object is 'reasonable'
651    __ movptr(rax, Address(rsp, 4 * wordSize));    // get object
652    __ testptr(rax, rax);
653    __ jcc(Assembler::zero, exit);               // if obj is NULL it is ok
654
655    // Check if the oop is in the right area of memory
656    const int oop_mask = Universe::verify_oop_mask();
657    const int oop_bits = Universe::verify_oop_bits();
658    __ mov(rdx, rax);
659    __ andptr(rdx, oop_mask);
660    __ cmpptr(rdx, oop_bits);
661    __ jcc(Assembler::notZero, error);
662
663    // make sure klass is 'reasonable'
664    __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass
665    __ testptr(rax, rax);
666    __ jcc(Assembler::zero, error);              // if klass is NULL it is broken
667
668    // Check if the klass is in the right area of memory
669    const int klass_mask = Universe::verify_klass_mask();
670    const int klass_bits = Universe::verify_klass_bits();
671    __ mov(rdx, rax);
672    __ andptr(rdx, klass_mask);
673    __ cmpptr(rdx, klass_bits);
674    __ jcc(Assembler::notZero, error);
675
676    // make sure klass' klass is 'reasonable'
677    __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass' klass
678    __ testptr(rax, rax);
679    __ jcc(Assembler::zero, error);              // if klass' klass is NULL it is broken
680
681    __ mov(rdx, rax);
682    __ andptr(rdx, klass_mask);
683    __ cmpptr(rdx, klass_bits);
684    __ jcc(Assembler::notZero, error);           // if klass not in right area
685                                                 // of memory it is broken too.
686
687    // return if everything seems ok
688    __ bind(exit);
689    __ movptr(rax, Address(rsp, 5 * wordSize));  // get saved rax, back
690    __ pop(rdx);                                 // restore rdx
691    __ popf();                                   // restore EFLAGS
692    __ ret(3 * wordSize);                        // pop arguments
693
694    // handle errors
695    __ bind(error);
696    __ movptr(rax, Address(rsp, 5 * wordSize));  // get saved rax, back
697    __ pop(rdx);                                 // get saved rdx back
698    __ popf();                                   // get saved EFLAGS off stack -- will be ignored
699    __ pusha();                                  // push registers (eip = return address & msg are already pushed)
700    BLOCK_COMMENT("call MacroAssembler::debug");
701    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
702    __ popa();
703    __ ret(3 * wordSize);                        // pop arguments
704    return start;
705  }
706
707  //
708  //  Generate pre-barrier for array stores
709  //
710  //  Input:
711  //     start   -  starting address
712  //     end     -  element count
713  void  gen_write_ref_array_pre_barrier(Register start, Register count) {
714    assert_different_registers(start, count);
715#if 0 // G1 only
716    BarrierSet* bs = Universe::heap()->barrier_set();
717    switch (bs->kind()) {
718      case BarrierSet::G1SATBCT:
719      case BarrierSet::G1SATBCTLogging:
720        {
721          __ pusha();                      // push registers
722          __ push(count);
723          __ push(start);
724          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
725          __ addl(esp, wordSize * 2);
726          __ popa();
727        }
728        break;
729      case BarrierSet::CardTableModRef:
730      case BarrierSet::CardTableExtension:
731      case BarrierSet::ModRef:
732        break;
733      default      :
734        ShouldNotReachHere();
735
736    }
737#endif // 0 - G1 only
738  }
739
740
741  //
742  // Generate a post-barrier for an array store
743  //
744  //     start    -  starting address
745  //     count    -  element count
746  //
747  //  The two input registers are overwritten.
748  //
749  void  gen_write_ref_array_post_barrier(Register start, Register count) {
750    BarrierSet* bs = Universe::heap()->barrier_set();
751    assert_different_registers(start, count);
752    switch (bs->kind()) {
753#if 0 // G1 only
754      case BarrierSet::G1SATBCT:
755      case BarrierSet::G1SATBCTLogging:
756        {
757          __ pusha();                      // push registers
758          __ push(count);
759          __ push(start);
760          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
761          __ addl(esp, wordSize * 2);
762          __ popa();
763
764        }
765        break;
766#endif // 0 G1 only
767
768      case BarrierSet::CardTableModRef:
769      case BarrierSet::CardTableExtension:
770        {
771          CardTableModRefBS* ct = (CardTableModRefBS*)bs;
772          assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
773
774          Label L_loop;
775          const Register end = count;  // elements count; end == start+count-1
776          assert_different_registers(start, end);
777
778          __ lea(end,  Address(start, count, Address::times_ptr, -wordSize));
779          __ shrptr(start, CardTableModRefBS::card_shift);
780          __ shrptr(end,   CardTableModRefBS::card_shift);
781          __ subptr(end, start); // end --> count
782        __ BIND(L_loop);
783          intptr_t disp = (intptr_t) ct->byte_map_base;
784          Address cardtable(start, count, Address::times_1, disp);
785          __ movb(cardtable, 0);
786          __ decrement(count);
787          __ jcc(Assembler::greaterEqual, L_loop);
788        }
789        break;
790      case BarrierSet::ModRef:
791        break;
792      default      :
793        ShouldNotReachHere();
794
795    }
796  }
797
798  // Copy 64 bytes chunks
799  //
800  // Inputs:
801  //   from        - source array address
802  //   to_from     - destination array address - from
803  //   qword_count - 8-bytes element count, negative
804  //
805  void mmx_copy_forward(Register from, Register to_from, Register qword_count) {
806    Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
807    // Copy 64-byte chunks
808    __ jmpb(L_copy_64_bytes);
809    __ align(16);
810  __ BIND(L_copy_64_bytes_loop);
811    __ movq(mmx0, Address(from, 0));
812    __ movq(mmx1, Address(from, 8));
813    __ movq(mmx2, Address(from, 16));
814    __ movq(Address(from, to_from, Address::times_1, 0), mmx0);
815    __ movq(mmx3, Address(from, 24));
816    __ movq(Address(from, to_from, Address::times_1, 8), mmx1);
817    __ movq(mmx4, Address(from, 32));
818    __ movq(Address(from, to_from, Address::times_1, 16), mmx2);
819    __ movq(mmx5, Address(from, 40));
820    __ movq(Address(from, to_from, Address::times_1, 24), mmx3);
821    __ movq(mmx6, Address(from, 48));
822    __ movq(Address(from, to_from, Address::times_1, 32), mmx4);
823    __ movq(mmx7, Address(from, 56));
824    __ movq(Address(from, to_from, Address::times_1, 40), mmx5);
825    __ movq(Address(from, to_from, Address::times_1, 48), mmx6);
826    __ movq(Address(from, to_from, Address::times_1, 56), mmx7);
827    __ addptr(from, 64);
828  __ BIND(L_copy_64_bytes);
829    __ subl(qword_count, 8);
830    __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
831    __ addl(qword_count, 8);
832    __ jccb(Assembler::zero, L_exit);
833    //
834    // length is too short, just copy qwords
835    //
836  __ BIND(L_copy_8_bytes);
837    __ movq(mmx0, Address(from, 0));
838    __ movq(Address(from, to_from, Address::times_1), mmx0);
839    __ addptr(from, 8);
840    __ decrement(qword_count);
841    __ jcc(Assembler::greater, L_copy_8_bytes);
842  __ BIND(L_exit);
843    __ emms();
844  }
845
846  address generate_disjoint_copy(BasicType t, bool aligned,
847                                 Address::ScaleFactor sf,
848                                 address* entry, const char *name) {
849    __ align(CodeEntryAlignment);
850    StubCodeMark mark(this, "StubRoutines", name);
851    address start = __ pc();
852
853    Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte;
854    Label L_copy_2_bytes, L_copy_4_bytes, L_copy_64_bytes;
855
856    int shift = Address::times_ptr - sf;
857
858    const Register from     = rsi;  // source array address
859    const Register to       = rdi;  // destination array address
860    const Register count    = rcx;  // elements count
861    const Register to_from  = to;   // (to - from)
862    const Register saved_to = rdx;  // saved destination array address
863
864    __ enter(); // required for proper stackwalking of RuntimeStub frame
865    __ push(rsi);
866    __ push(rdi);
867    __ movptr(from , Address(rsp, 12+ 4));
868    __ movptr(to   , Address(rsp, 12+ 8));
869    __ movl(count, Address(rsp, 12+ 12));
870    if (t == T_OBJECT) {
871      __ testl(count, count);
872      __ jcc(Assembler::zero, L_0_count);
873      gen_write_ref_array_pre_barrier(to, count);
874      __ mov(saved_to, to);          // save 'to'
875    }
876
877    *entry = __ pc(); // Entry point from conjoint arraycopy stub.
878    BLOCK_COMMENT("Entry:");
879
880    __ subptr(to, from); // to --> to_from
881    __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
882    __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
883    if (!aligned && (t == T_BYTE || t == T_SHORT)) {
884      // align source address at 4 bytes address boundary
885      if (t == T_BYTE) {
886        // One byte misalignment happens only for byte arrays
887        __ testl(from, 1);
888        __ jccb(Assembler::zero, L_skip_align1);
889        __ movb(rax, Address(from, 0));
890        __ movb(Address(from, to_from, Address::times_1, 0), rax);
891        __ increment(from);
892        __ decrement(count);
893      __ BIND(L_skip_align1);
894      }
895      // Two bytes misalignment happens only for byte and short (char) arrays
896      __ testl(from, 2);
897      __ jccb(Assembler::zero, L_skip_align2);
898      __ movw(rax, Address(from, 0));
899      __ movw(Address(from, to_from, Address::times_1, 0), rax);
900      __ addptr(from, 2);
901      __ subl(count, 1<<(shift-1));
902    __ BIND(L_skip_align2);
903    }
904    if (!VM_Version::supports_mmx()) {
905      __ mov(rax, count);      // save 'count'
906      __ shrl(count, shift); // bytes count
907      __ addptr(to_from, from);// restore 'to'
908      __ rep_mov();
909      __ subptr(to_from, from);// restore 'to_from'
910      __ mov(count, rax);      // restore 'count'
911      __ jmpb(L_copy_2_bytes); // all dwords were copied
912    } else {
913      // align to 8 bytes, we know we are 4 byte aligned to start
914      __ testptr(from, 4);
915      __ jccb(Assembler::zero, L_copy_64_bytes);
916      __ movl(rax, Address(from, 0));
917      __ movl(Address(from, to_from, Address::times_1, 0), rax);
918      __ addptr(from, 4);
919      __ subl(count, 1<<shift);
920    __ BIND(L_copy_64_bytes);
921      __ mov(rax, count);
922      __ shrl(rax, shift+1);  // 8 bytes chunk count
923      //
924      // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
925      //
926      mmx_copy_forward(from, to_from, rax);
927    }
928    // copy tailing dword
929  __ BIND(L_copy_4_bytes);
930    __ testl(count, 1<<shift);
931    __ jccb(Assembler::zero, L_copy_2_bytes);
932    __ movl(rax, Address(from, 0));
933    __ movl(Address(from, to_from, Address::times_1, 0), rax);
934    if (t == T_BYTE || t == T_SHORT) {
935      __ addptr(from, 4);
936    __ BIND(L_copy_2_bytes);
937      // copy tailing word
938      __ testl(count, 1<<(shift-1));
939      __ jccb(Assembler::zero, L_copy_byte);
940      __ movw(rax, Address(from, 0));
941      __ movw(Address(from, to_from, Address::times_1, 0), rax);
942      if (t == T_BYTE) {
943        __ addptr(from, 2);
944      __ BIND(L_copy_byte);
945        // copy tailing byte
946        __ testl(count, 1);
947        __ jccb(Assembler::zero, L_exit);
948        __ movb(rax, Address(from, 0));
949        __ movb(Address(from, to_from, Address::times_1, 0), rax);
950      __ BIND(L_exit);
951      } else {
952      __ BIND(L_copy_byte);
953      }
954    } else {
955    __ BIND(L_copy_2_bytes);
956    }
957
958    if (t == T_OBJECT) {
959      __ movl(count, Address(rsp, 12+12)); // reread 'count'
960      __ mov(to, saved_to); // restore 'to'
961      gen_write_ref_array_post_barrier(to, count);
962    __ BIND(L_0_count);
963    }
964    inc_copy_counter_np(t);
965    __ pop(rdi);
966    __ pop(rsi);
967    __ leave(); // required for proper stackwalking of RuntimeStub frame
968    __ xorptr(rax, rax); // return 0
969    __ ret(0);
970    return start;
971  }
972
973
974  address generate_conjoint_copy(BasicType t, bool aligned,
975                                 Address::ScaleFactor sf,
976                                 address nooverlap_target,
977                                 address* entry, const char *name) {
978    __ align(CodeEntryAlignment);
979    StubCodeMark mark(this, "StubRoutines", name);
980    address start = __ pc();
981
982    Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte;
983    Label L_copy_2_bytes, L_copy_4_bytes, L_copy_8_bytes, L_copy_8_bytes_loop;
984
985    int shift = Address::times_ptr - sf;
986
987    const Register src   = rax;  // source array address
988    const Register dst   = rdx;  // destination array address
989    const Register from  = rsi;  // source array address
990    const Register to    = rdi;  // destination array address
991    const Register count = rcx;  // elements count
992    const Register end   = rax;  // array end address
993
994    __ enter(); // required for proper stackwalking of RuntimeStub frame
995    __ push(rsi);
996    __ push(rdi);
997    __ movptr(src  , Address(rsp, 12+ 4));   // from
998    __ movptr(dst  , Address(rsp, 12+ 8));   // to
999    __ movl2ptr(count, Address(rsp, 12+12)); // count
1000    if (t == T_OBJECT) {
1001       gen_write_ref_array_pre_barrier(dst, count);
1002    }
1003
1004    if (entry != NULL) {
1005      *entry = __ pc(); // Entry point from generic arraycopy stub.
1006      BLOCK_COMMENT("Entry:");
1007    }
1008
1009    if (t == T_OBJECT) {
1010      __ testl(count, count);
1011      __ jcc(Assembler::zero, L_0_count);
1012    }
1013    __ mov(from, src);
1014    __ mov(to  , dst);
1015
1016    // arrays overlap test
1017    RuntimeAddress nooverlap(nooverlap_target);
1018    __ cmpptr(dst, src);
1019    __ lea(end, Address(src, count, sf, 0)); // src + count * elem_size
1020    __ jump_cc(Assembler::belowEqual, nooverlap);
1021    __ cmpptr(dst, end);
1022    __ jump_cc(Assembler::aboveEqual, nooverlap);
1023
1024    // copy from high to low
1025    __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
1026    __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
1027    if (t == T_BYTE || t == T_SHORT) {
1028      // Align the end of destination array at 4 bytes address boundary
1029      __ lea(end, Address(dst, count, sf, 0));
1030      if (t == T_BYTE) {
1031        // One byte misalignment happens only for byte arrays
1032        __ testl(end, 1);
1033        __ jccb(Assembler::zero, L_skip_align1);
1034        __ decrement(count);
1035        __ movb(rdx, Address(from, count, sf, 0));
1036        __ movb(Address(to, count, sf, 0), rdx);
1037      __ BIND(L_skip_align1);
1038      }
1039      // Two bytes misalignment happens only for byte and short (char) arrays
1040      __ testl(end, 2);
1041      __ jccb(Assembler::zero, L_skip_align2);
1042      __ subptr(count, 1<<(shift-1));
1043      __ movw(rdx, Address(from, count, sf, 0));
1044      __ movw(Address(to, count, sf, 0), rdx);
1045    __ BIND(L_skip_align2);
1046      __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
1047      __ jcc(Assembler::below, L_copy_4_bytes);
1048    }
1049
1050    if (!VM_Version::supports_mmx()) {
1051      __ std();
1052      __ mov(rax, count); // Save 'count'
1053      __ mov(rdx, to);    // Save 'to'
1054      __ lea(rsi, Address(from, count, sf, -4));
1055      __ lea(rdi, Address(to  , count, sf, -4));
1056      __ shrptr(count, shift); // bytes count
1057      __ rep_mov();
1058      __ cld();
1059      __ mov(count, rax); // restore 'count'
1060      __ andl(count, (1<<shift)-1);      // mask the number of rest elements
1061      __ movptr(from, Address(rsp, 12+4)); // reread 'from'
1062      __ mov(to, rdx);   // restore 'to'
1063      __ jmpb(L_copy_2_bytes); // all dword were copied
1064   } else {
1065      // Align to 8 bytes the end of array. It is aligned to 4 bytes already.
1066      __ testptr(end, 4);
1067      __ jccb(Assembler::zero, L_copy_8_bytes);
1068      __ subl(count, 1<<shift);
1069      __ movl(rdx, Address(from, count, sf, 0));
1070      __ movl(Address(to, count, sf, 0), rdx);
1071      __ jmpb(L_copy_8_bytes);
1072
1073      __ align(16);
1074      // Move 8 bytes
1075    __ BIND(L_copy_8_bytes_loop);
1076      __ movq(mmx0, Address(from, count, sf, 0));
1077      __ movq(Address(to, count, sf, 0), mmx0);
1078    __ BIND(L_copy_8_bytes);
1079      __ subl(count, 2<<shift);
1080      __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1081      __ addl(count, 2<<shift);
1082      __ emms();
1083    }
1084  __ BIND(L_copy_4_bytes);
1085    // copy prefix qword
1086    __ testl(count, 1<<shift);
1087    __ jccb(Assembler::zero, L_copy_2_bytes);
1088    __ movl(rdx, Address(from, count, sf, -4));
1089    __ movl(Address(to, count, sf, -4), rdx);
1090
1091    if (t == T_BYTE || t == T_SHORT) {
1092        __ subl(count, (1<<shift));
1093      __ BIND(L_copy_2_bytes);
1094        // copy prefix dword
1095        __ testl(count, 1<<(shift-1));
1096        __ jccb(Assembler::zero, L_copy_byte);
1097        __ movw(rdx, Address(from, count, sf, -2));
1098        __ movw(Address(to, count, sf, -2), rdx);
1099        if (t == T_BYTE) {
1100          __ subl(count, 1<<(shift-1));
1101        __ BIND(L_copy_byte);
1102          // copy prefix byte
1103          __ testl(count, 1);
1104          __ jccb(Assembler::zero, L_exit);
1105          __ movb(rdx, Address(from, 0));
1106          __ movb(Address(to, 0), rdx);
1107        __ BIND(L_exit);
1108        } else {
1109        __ BIND(L_copy_byte);
1110        }
1111    } else {
1112    __ BIND(L_copy_2_bytes);
1113    }
1114    if (t == T_OBJECT) {
1115      __ movl2ptr(count, Address(rsp, 12+12)); // reread count
1116      gen_write_ref_array_post_barrier(to, count);
1117    __ BIND(L_0_count);
1118    }
1119    inc_copy_counter_np(t);
1120    __ pop(rdi);
1121    __ pop(rsi);
1122    __ leave(); // required for proper stackwalking of RuntimeStub frame
1123    __ xorptr(rax, rax); // return 0
1124    __ ret(0);
1125    return start;
1126  }
1127
1128
1129  address generate_disjoint_long_copy(address* entry, const char *name) {
1130    __ align(CodeEntryAlignment);
1131    StubCodeMark mark(this, "StubRoutines", name);
1132    address start = __ pc();
1133
1134    Label L_copy_8_bytes, L_copy_8_bytes_loop;
1135    const Register from       = rax;  // source array address
1136    const Register to         = rdx;  // destination array address
1137    const Register count      = rcx;  // elements count
1138    const Register to_from    = rdx;  // (to - from)
1139
1140    __ enter(); // required for proper stackwalking of RuntimeStub frame
1141    __ movptr(from , Address(rsp, 8+0));       // from
1142    __ movptr(to   , Address(rsp, 8+4));       // to
1143    __ movl2ptr(count, Address(rsp, 8+8));     // count
1144
1145    *entry = __ pc(); // Entry point from conjoint arraycopy stub.
1146    BLOCK_COMMENT("Entry:");
1147
1148    __ subptr(to, from); // to --> to_from
1149    if (VM_Version::supports_mmx()) {
1150      mmx_copy_forward(from, to_from, count);
1151    } else {
1152      __ jmpb(L_copy_8_bytes);
1153      __ align(16);
1154    __ BIND(L_copy_8_bytes_loop);
1155      __ fild_d(Address(from, 0));
1156      __ fistp_d(Address(from, to_from, Address::times_1));
1157      __ addptr(from, 8);
1158    __ BIND(L_copy_8_bytes);
1159      __ decrement(count);
1160      __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1161    }
1162    inc_copy_counter_np(T_LONG);
1163    __ leave(); // required for proper stackwalking of RuntimeStub frame
1164    __ xorptr(rax, rax); // return 0
1165    __ ret(0);
1166    return start;
1167  }
1168
1169  address generate_conjoint_long_copy(address nooverlap_target,
1170                                      address* entry, const char *name) {
1171    __ align(CodeEntryAlignment);
1172    StubCodeMark mark(this, "StubRoutines", name);
1173    address start = __ pc();
1174
1175    Label L_copy_8_bytes, L_copy_8_bytes_loop;
1176    const Register from       = rax;  // source array address
1177    const Register to         = rdx;  // destination array address
1178    const Register count      = rcx;  // elements count
1179    const Register end_from   = rax;  // source array end address
1180
1181    __ enter(); // required for proper stackwalking of RuntimeStub frame
1182    __ movptr(from , Address(rsp, 8+0));       // from
1183    __ movptr(to   , Address(rsp, 8+4));       // to
1184    __ movl2ptr(count, Address(rsp, 8+8));     // count
1185
1186    *entry = __ pc(); // Entry point from generic arraycopy stub.
1187    BLOCK_COMMENT("Entry:");
1188
1189    // arrays overlap test
1190    __ cmpptr(to, from);
1191    RuntimeAddress nooverlap(nooverlap_target);
1192    __ jump_cc(Assembler::belowEqual, nooverlap);
1193    __ lea(end_from, Address(from, count, Address::times_8, 0));
1194    __ cmpptr(to, end_from);
1195    __ movptr(from, Address(rsp, 8));  // from
1196    __ jump_cc(Assembler::aboveEqual, nooverlap);
1197
1198    __ jmpb(L_copy_8_bytes);
1199
1200    __ align(16);
1201  __ BIND(L_copy_8_bytes_loop);
1202    if (VM_Version::supports_mmx()) {
1203      __ movq(mmx0, Address(from, count, Address::times_8));
1204      __ movq(Address(to, count, Address::times_8), mmx0);
1205    } else {
1206      __ fild_d(Address(from, count, Address::times_8));
1207      __ fistp_d(Address(to, count, Address::times_8));
1208    }
1209  __ BIND(L_copy_8_bytes);
1210    __ decrement(count);
1211    __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1212
1213    if (VM_Version::supports_mmx()) {
1214      __ emms();
1215    }
1216    inc_copy_counter_np(T_LONG);
1217    __ leave(); // required for proper stackwalking of RuntimeStub frame
1218    __ xorptr(rax, rax); // return 0
1219    __ ret(0);
1220    return start;
1221  }
1222
1223
1224  // Helper for generating a dynamic type check.
1225  // The sub_klass must be one of {rbx, rdx, rsi}.
1226  // The temp is killed.
1227  void generate_type_check(Register sub_klass,
1228                           Address& super_check_offset_addr,
1229                           Address& super_klass_addr,
1230                           Register temp,
1231                           Label* L_success_ptr, Label* L_failure_ptr) {
1232    BLOCK_COMMENT("type_check:");
1233
1234    Label L_fallthrough;
1235    bool fall_through_on_success = (L_success_ptr == NULL);
1236    if (fall_through_on_success) {
1237      L_success_ptr = &L_fallthrough;
1238    } else {
1239      L_failure_ptr = &L_fallthrough;
1240    }
1241    Label& L_success = *L_success_ptr;
1242    Label& L_failure = *L_failure_ptr;
1243
1244    assert_different_registers(sub_klass, temp);
1245
1246    // a couple of useful fields in sub_klass:
1247    int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
1248                     Klass::secondary_supers_offset_in_bytes());
1249    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
1250                     Klass::secondary_super_cache_offset_in_bytes());
1251    Address secondary_supers_addr(sub_klass, ss_offset);
1252    Address super_cache_addr(     sub_klass, sc_offset);
1253
1254    // if the pointers are equal, we are done (e.g., String[] elements)
1255    __ cmpptr(sub_klass, super_klass_addr);
1256    __ jcc(Assembler::equal, L_success);
1257
1258    // check the supertype display:
1259    __ movl2ptr(temp, super_check_offset_addr);
1260    Address super_check_addr(sub_klass, temp, Address::times_1, 0);
1261    __ movptr(temp, super_check_addr); // load displayed supertype
1262    __ cmpptr(temp, super_klass_addr); // test the super type
1263    __ jcc(Assembler::equal, L_success);
1264
1265    // if it was a primary super, we can just fail immediately
1266    __ cmpl(super_check_offset_addr, sc_offset);
1267    __ jcc(Assembler::notEqual, L_failure);
1268
1269    // Now do a linear scan of the secondary super-klass chain.
1270    // This code is rarely used, so simplicity is a virtue here.
1271    inc_counter_np(SharedRuntime::_partial_subtype_ctr);
1272    {
1273      // The repne_scan instruction uses fixed registers, which we must spill.
1274      // (We need a couple more temps in any case.)
1275      __ push(rax);
1276      __ push(rcx);
1277      __ push(rdi);
1278      assert_different_registers(sub_klass, rax, rcx, rdi);
1279
1280      __ movptr(rdi, secondary_supers_addr);
1281      // Load the array length.
1282      __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
1283      // Skip to start of data.
1284      __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
1285      // Scan rcx words at [edi] for occurance of rax,
1286      // Set NZ/Z based on last compare
1287      __ movptr(rax, super_klass_addr);
1288      __ repne_scan();
1289
1290      // Unspill the temp. registers:
1291      __ pop(rdi);
1292      __ pop(rcx);
1293      __ pop(rax);
1294    }
1295    __ jcc(Assembler::notEqual, L_failure);
1296
1297    // Success.  Cache the super we found and proceed in triumph.
1298    __ movptr(temp, super_klass_addr); // note: rax, is dead
1299    __ movptr(super_cache_addr, temp);
1300
1301    if (!fall_through_on_success)
1302      __ jmp(L_success);
1303
1304    // Fall through on failure!
1305    __ bind(L_fallthrough);
1306  }
1307
1308  //
1309  //  Generate checkcasting array copy stub
1310  //
1311  //  Input:
1312  //    4(rsp)   - source array address
1313  //    8(rsp)   - destination array address
1314  //   12(rsp)   - element count, can be zero
1315  //   16(rsp)   - size_t ckoff (super_check_offset)
1316  //   20(rsp)   - oop ckval (super_klass)
1317  //
1318  //  Output:
1319  //    rax, ==  0  -  success
1320  //    rax, == -1^K - failure, where K is partial transfer count
1321  //
1322  address generate_checkcast_copy(const char *name, address* entry) {
1323    __ align(CodeEntryAlignment);
1324    StubCodeMark mark(this, "StubRoutines", name);
1325    address start = __ pc();
1326
1327    Label L_load_element, L_store_element, L_do_card_marks, L_done;
1328
1329    // register use:
1330    //  rax, rdx, rcx -- loop control (end_from, end_to, count)
1331    //  rdi, rsi      -- element access (oop, klass)
1332    //  rbx,           -- temp
1333    const Register from       = rax;    // source array address
1334    const Register to         = rdx;    // destination array address
1335    const Register length     = rcx;    // elements count
1336    const Register elem       = rdi;    // each oop copied
1337    const Register elem_klass = rsi;    // each elem._klass (sub_klass)
1338    const Register temp       = rbx;    // lone remaining temp
1339
1340    __ enter(); // required for proper stackwalking of RuntimeStub frame
1341
1342    __ push(rsi);
1343    __ push(rdi);
1344    __ push(rbx);
1345
1346    Address   from_arg(rsp, 16+ 4);     // from
1347    Address     to_arg(rsp, 16+ 8);     // to
1348    Address length_arg(rsp, 16+12);     // elements count
1349    Address  ckoff_arg(rsp, 16+16);     // super_check_offset
1350    Address  ckval_arg(rsp, 16+20);     // super_klass
1351
1352    // Load up:
1353    __ movptr(from,     from_arg);
1354    __ movptr(to,         to_arg);
1355    __ movl2ptr(length, length_arg);
1356
1357    *entry = __ pc(); // Entry point from generic arraycopy stub.
1358    BLOCK_COMMENT("Entry:");
1359
1360    //---------------------------------------------------------------
1361    // Assembler stub will be used for this call to arraycopy
1362    // if the two arrays are subtypes of Object[] but the
1363    // destination array type is not equal to or a supertype
1364    // of the source type.  Each element must be separately
1365    // checked.
1366
1367    // Loop-invariant addresses.  They are exclusive end pointers.
1368    Address end_from_addr(from, length, Address::times_ptr, 0);
1369    Address   end_to_addr(to,   length, Address::times_ptr, 0);
1370
1371    Register end_from = from;           // re-use
1372    Register end_to   = to;             // re-use
1373    Register count    = length;         // re-use
1374
1375    // Loop-variant addresses.  They assume post-incremented count < 0.
1376    Address from_element_addr(end_from, count, Address::times_ptr, 0);
1377    Address   to_element_addr(end_to,   count, Address::times_ptr, 0);
1378    Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes());
1379
1380    // Copy from low to high addresses, indexed from the end of each array.
1381    __ lea(end_from, end_from_addr);
1382    __ lea(end_to,   end_to_addr);
1383    gen_write_ref_array_pre_barrier(to, count);
1384    assert(length == count, "");        // else fix next line:
1385    __ negptr(count);                   // negate and test the length
1386    __ jccb(Assembler::notZero, L_load_element);
1387
1388    // Empty array:  Nothing to do.
1389    __ xorptr(rax, rax);                  // return 0 on (trivial) success
1390    __ jmp(L_done);
1391
1392    // ======== begin loop ========
1393    // (Loop is rotated; its entry is L_load_element.)
1394    // Loop control:
1395    //   for (count = -count; count != 0; count++)
1396    // Base pointers src, dst are biased by 8*count,to last element.
1397    __ align(16);
1398
1399    __ BIND(L_store_element);
1400    __ movptr(to_element_addr, elem);     // store the oop
1401    __ increment(count);                // increment the count toward zero
1402    __ jccb(Assembler::zero, L_do_card_marks);
1403
1404    // ======== loop entry is here ========
1405    __ BIND(L_load_element);
1406    __ movptr(elem, from_element_addr);   // load the oop
1407    __ testptr(elem, elem);
1408    __ jccb(Assembler::zero, L_store_element);
1409
1410    // (Could do a trick here:  Remember last successful non-null
1411    // element stored and make a quick oop equality check on it.)
1412
1413    __ movptr(elem_klass, elem_klass_addr); // query the object klass
1414    generate_type_check(elem_klass, ckoff_arg, ckval_arg, temp,
1415                        &L_store_element, NULL);
1416      // (On fall-through, we have failed the element type check.)
1417    // ======== end loop ========
1418
1419    // It was a real error; we must depend on the caller to finish the job.
1420    // Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops.
1421    // Emit GC store barriers for the oops we have copied (length_arg + count),
1422    // and report their number to the caller.
1423    __ addl(count, length_arg);         // transfers = (length - remaining)
1424    __ movl2ptr(rax, count);            // save the value
1425    __ notptr(rax);                     // report (-1^K) to caller
1426    __ movptr(to, to_arg);              // reload
1427    assert_different_registers(to, count, rax);
1428    gen_write_ref_array_post_barrier(to, count);
1429    __ jmpb(L_done);
1430
1431    // Come here on success only.
1432    __ BIND(L_do_card_marks);
1433    __ movl2ptr(count, length_arg);
1434    __ movptr(to, to_arg);                // reload
1435    gen_write_ref_array_post_barrier(to, count);
1436    __ xorptr(rax, rax);                  // return 0 on success
1437
1438    // Common exit point (success or failure).
1439    __ BIND(L_done);
1440    __ pop(rbx);
1441    __ pop(rdi);
1442    __ pop(rsi);
1443    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
1444    __ leave(); // required for proper stackwalking of RuntimeStub frame
1445    __ ret(0);
1446
1447    return start;
1448  }
1449
1450  //
1451  //  Generate 'unsafe' array copy stub
1452  //  Though just as safe as the other stubs, it takes an unscaled
1453  //  size_t argument instead of an element count.
1454  //
1455  //  Input:
1456  //    4(rsp)   - source array address
1457  //    8(rsp)   - destination array address
1458  //   12(rsp)   - byte count, can be zero
1459  //
1460  //  Output:
1461  //    rax, ==  0  -  success
1462  //    rax, == -1  -  need to call System.arraycopy
1463  //
1464  // Examines the alignment of the operands and dispatches
1465  // to a long, int, short, or byte copy loop.
1466  //
1467  address generate_unsafe_copy(const char *name,
1468                               address byte_copy_entry,
1469                               address short_copy_entry,
1470                               address int_copy_entry,
1471                               address long_copy_entry) {
1472
1473    Label L_long_aligned, L_int_aligned, L_short_aligned;
1474
1475    __ align(CodeEntryAlignment);
1476    StubCodeMark mark(this, "StubRoutines", name);
1477    address start = __ pc();
1478
1479    const Register from       = rax;  // source array address
1480    const Register to         = rdx;  // destination array address
1481    const Register count      = rcx;  // elements count
1482
1483    __ enter(); // required for proper stackwalking of RuntimeStub frame
1484    __ push(rsi);
1485    __ push(rdi);
1486    Address  from_arg(rsp, 12+ 4);      // from
1487    Address    to_arg(rsp, 12+ 8);      // to
1488    Address count_arg(rsp, 12+12);      // byte count
1489
1490    // Load up:
1491    __ movptr(from ,  from_arg);
1492    __ movptr(to   ,    to_arg);
1493    __ movl2ptr(count, count_arg);
1494
1495    // bump this on entry, not on exit:
1496    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
1497
1498    const Register bits = rsi;
1499    __ mov(bits, from);
1500    __ orptr(bits, to);
1501    __ orptr(bits, count);
1502
1503    __ testl(bits, BytesPerLong-1);
1504    __ jccb(Assembler::zero, L_long_aligned);
1505
1506    __ testl(bits, BytesPerInt-1);
1507    __ jccb(Assembler::zero, L_int_aligned);
1508
1509    __ testl(bits, BytesPerShort-1);
1510    __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
1511
1512    __ BIND(L_short_aligned);
1513    __ shrptr(count, LogBytesPerShort); // size => short_count
1514    __ movl(count_arg, count);          // update 'count'
1515    __ jump(RuntimeAddress(short_copy_entry));
1516
1517    __ BIND(L_int_aligned);
1518    __ shrptr(count, LogBytesPerInt); // size => int_count
1519    __ movl(count_arg, count);          // update 'count'
1520    __ jump(RuntimeAddress(int_copy_entry));
1521
1522    __ BIND(L_long_aligned);
1523    __ shrptr(count, LogBytesPerLong); // size => qword_count
1524    __ movl(count_arg, count);          // update 'count'
1525    __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it.
1526    __ pop(rsi);
1527    __ jump(RuntimeAddress(long_copy_entry));
1528
1529    return start;
1530  }
1531
1532
1533  // Perform range checks on the proposed arraycopy.
1534  // Smashes src_pos and dst_pos.  (Uses them up for temps.)
1535  void arraycopy_range_checks(Register src,
1536                              Register src_pos,
1537                              Register dst,
1538                              Register dst_pos,
1539                              Address& length,
1540                              Label& L_failed) {
1541    BLOCK_COMMENT("arraycopy_range_checks:");
1542    const Register src_end = src_pos;   // source array end position
1543    const Register dst_end = dst_pos;   // destination array end position
1544    __ addl(src_end, length); // src_pos + length
1545    __ addl(dst_end, length); // dst_pos + length
1546
1547    //  if (src_pos + length > arrayOop(src)->length() ) FAIL;
1548    __ cmpl(src_end, Address(src, arrayOopDesc::length_offset_in_bytes()));
1549    __ jcc(Assembler::above, L_failed);
1550
1551    //  if (dst_pos + length > arrayOop(dst)->length() ) FAIL;
1552    __ cmpl(dst_end, Address(dst, arrayOopDesc::length_offset_in_bytes()));
1553    __ jcc(Assembler::above, L_failed);
1554
1555    BLOCK_COMMENT("arraycopy_range_checks done");
1556  }
1557
1558
1559  //
1560  //  Generate generic array copy stubs
1561  //
1562  //  Input:
1563  //     4(rsp)    -  src oop
1564  //     8(rsp)    -  src_pos
1565  //    12(rsp)    -  dst oop
1566  //    16(rsp)    -  dst_pos
1567  //    20(rsp)    -  element count
1568  //
1569  //  Output:
1570  //    rax, ==  0  -  success
1571  //    rax, == -1^K - failure, where K is partial transfer count
1572  //
1573  address generate_generic_copy(const char *name,
1574                                address entry_jbyte_arraycopy,
1575                                address entry_jshort_arraycopy,
1576                                address entry_jint_arraycopy,
1577                                address entry_oop_arraycopy,
1578                                address entry_jlong_arraycopy,
1579                                address entry_checkcast_arraycopy) {
1580    Label L_failed, L_failed_0, L_objArray;
1581
1582    { int modulus = CodeEntryAlignment;
1583      int target  = modulus - 5; // 5 = sizeof jmp(L_failed)
1584      int advance = target - (__ offset() % modulus);
1585      if (advance < 0)  advance += modulus;
1586      if (advance > 0)  __ nop(advance);
1587    }
1588    StubCodeMark mark(this, "StubRoutines", name);
1589
1590    // Short-hop target to L_failed.  Makes for denser prologue code.
1591    __ BIND(L_failed_0);
1592    __ jmp(L_failed);
1593    assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed");
1594
1595    __ align(CodeEntryAlignment);
1596    address start = __ pc();
1597
1598    __ enter(); // required for proper stackwalking of RuntimeStub frame
1599    __ push(rsi);
1600    __ push(rdi);
1601
1602    // bump this on entry, not on exit:
1603    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
1604
1605    // Input values
1606    Address SRC     (rsp, 12+ 4);
1607    Address SRC_POS (rsp, 12+ 8);
1608    Address DST     (rsp, 12+12);
1609    Address DST_POS (rsp, 12+16);
1610    Address LENGTH  (rsp, 12+20);
1611
1612    //-----------------------------------------------------------------------
1613    // Assembler stub will be used for this call to arraycopy
1614    // if the following conditions are met:
1615    //
1616    // (1) src and dst must not be null.
1617    // (2) src_pos must not be negative.
1618    // (3) dst_pos must not be negative.
1619    // (4) length  must not be negative.
1620    // (5) src klass and dst klass should be the same and not NULL.
1621    // (6) src and dst should be arrays.
1622    // (7) src_pos + length must not exceed length of src.
1623    // (8) dst_pos + length must not exceed length of dst.
1624    //
1625
1626    const Register src     = rax;       // source array oop
1627    const Register src_pos = rsi;
1628    const Register dst     = rdx;       // destination array oop
1629    const Register dst_pos = rdi;
1630    const Register length  = rcx;       // transfer count
1631
1632    //  if (src == NULL) return -1;
1633    __ movptr(src, SRC);      // src oop
1634    __ testptr(src, src);
1635    __ jccb(Assembler::zero, L_failed_0);
1636
1637    //  if (src_pos < 0) return -1;
1638    __ movl2ptr(src_pos, SRC_POS);  // src_pos
1639    __ testl(src_pos, src_pos);
1640    __ jccb(Assembler::negative, L_failed_0);
1641
1642    //  if (dst == NULL) return -1;
1643    __ movptr(dst, DST);      // dst oop
1644    __ testptr(dst, dst);
1645    __ jccb(Assembler::zero, L_failed_0);
1646
1647    //  if (dst_pos < 0) return -1;
1648    __ movl2ptr(dst_pos, DST_POS);  // dst_pos
1649    __ testl(dst_pos, dst_pos);
1650    __ jccb(Assembler::negative, L_failed_0);
1651
1652    //  if (length < 0) return -1;
1653    __ movl2ptr(length, LENGTH);   // length
1654    __ testl(length, length);
1655    __ jccb(Assembler::negative, L_failed_0);
1656
1657    //  if (src->klass() == NULL) return -1;
1658    Address src_klass_addr(src, oopDesc::klass_offset_in_bytes());
1659    Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes());
1660    const Register rcx_src_klass = rcx;    // array klass
1661    __ movptr(rcx_src_klass, Address(src, oopDesc::klass_offset_in_bytes()));
1662
1663#ifdef ASSERT
1664    //  assert(src->klass() != NULL);
1665    BLOCK_COMMENT("assert klasses not null");
1666    { Label L1, L2;
1667      __ testptr(rcx_src_klass, rcx_src_klass);
1668      __ jccb(Assembler::notZero, L2);   // it is broken if klass is NULL
1669      __ bind(L1);
1670      __ stop("broken null klass");
1671      __ bind(L2);
1672      __ cmpptr(dst_klass_addr, (int32_t)NULL_WORD);
1673      __ jccb(Assembler::equal, L1);      // this would be broken also
1674      BLOCK_COMMENT("assert done");
1675    }
1676#endif //ASSERT
1677
1678    // Load layout helper (32-bits)
1679    //
1680    //  |array_tag|     | header_size | element_type |     |log2_element_size|
1681    // 32        30    24            16              8     2                 0
1682    //
1683    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
1684    //
1685
1686    int lh_offset = klassOopDesc::header_size() * HeapWordSize +
1687                    Klass::layout_helper_offset_in_bytes();
1688    Address src_klass_lh_addr(rcx_src_klass, lh_offset);
1689
1690    // Handle objArrays completely differently...
1691    jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
1692    __ cmpl(src_klass_lh_addr, objArray_lh);
1693    __ jcc(Assembler::equal, L_objArray);
1694
1695    //  if (src->klass() != dst->klass()) return -1;
1696    __ cmpptr(rcx_src_klass, dst_klass_addr);
1697    __ jccb(Assembler::notEqual, L_failed_0);
1698
1699    const Register rcx_lh = rcx;  // layout helper
1700    assert(rcx_lh == rcx_src_klass, "known alias");
1701    __ movl(rcx_lh, src_klass_lh_addr);
1702
1703    //  if (!src->is_Array()) return -1;
1704    __ cmpl(rcx_lh, Klass::_lh_neutral_value);
1705    __ jcc(Assembler::greaterEqual, L_failed_0); // signed cmp
1706
1707    // At this point, it is known to be a typeArray (array_tag 0x3).
1708#ifdef ASSERT
1709    { Label L;
1710      __ cmpl(rcx_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
1711      __ jcc(Assembler::greaterEqual, L); // signed cmp
1712      __ stop("must be a primitive array");
1713      __ bind(L);
1714    }
1715#endif
1716
1717    assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh);
1718    arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
1719
1720    // typeArrayKlass
1721    //
1722    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
1723    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
1724    //
1725    const Register rsi_offset = rsi; // array offset
1726    const Register src_array  = src; // src array offset
1727    const Register dst_array  = dst; // dst array offset
1728    const Register rdi_elsize = rdi; // log2 element size
1729
1730    __ mov(rsi_offset, rcx_lh);
1731    __ shrptr(rsi_offset, Klass::_lh_header_size_shift);
1732    __ andptr(rsi_offset, Klass::_lh_header_size_mask);   // array_offset
1733    __ addptr(src_array, rsi_offset);  // src array offset
1734    __ addptr(dst_array, rsi_offset);  // dst array offset
1735    __ andptr(rcx_lh, Klass::_lh_log2_element_size_mask); // log2 elsize
1736
1737    // next registers should be set before the jump to corresponding stub
1738    const Register from       = src; // source array address
1739    const Register to         = dst; // destination array address
1740    const Register count      = rcx; // elements count
1741    // some of them should be duplicated on stack
1742#define FROM   Address(rsp, 12+ 4)
1743#define TO     Address(rsp, 12+ 8)   // Not used now
1744#define COUNT  Address(rsp, 12+12)   // Only for oop arraycopy
1745
1746    BLOCK_COMMENT("scale indexes to element size");
1747    __ movl2ptr(rsi, SRC_POS);  // src_pos
1748    __ shlptr(rsi);             // src_pos << rcx (log2 elsize)
1749    assert(src_array == from, "");
1750    __ addptr(from, rsi);       // from = src_array + SRC_POS << log2 elsize
1751    __ movl2ptr(rdi, DST_POS);  // dst_pos
1752    __ shlptr(rdi);             // dst_pos << rcx (log2 elsize)
1753    assert(dst_array == to, "");
1754    __ addptr(to,  rdi);        // to   = dst_array + DST_POS << log2 elsize
1755    __ movptr(FROM, from);      // src_addr
1756    __ mov(rdi_elsize, rcx_lh); // log2 elsize
1757    __ movl2ptr(count, LENGTH); // elements count
1758
1759    BLOCK_COMMENT("choose copy loop based on element size");
1760    __ cmpl(rdi_elsize, 0);
1761
1762    __ jump_cc(Assembler::equal, RuntimeAddress(entry_jbyte_arraycopy));
1763    __ cmpl(rdi_elsize, LogBytesPerShort);
1764    __ jump_cc(Assembler::equal, RuntimeAddress(entry_jshort_arraycopy));
1765    __ cmpl(rdi_elsize, LogBytesPerInt);
1766    __ jump_cc(Assembler::equal, RuntimeAddress(entry_jint_arraycopy));
1767#ifdef ASSERT
1768    __ cmpl(rdi_elsize, LogBytesPerLong);
1769    __ jccb(Assembler::notEqual, L_failed);
1770#endif
1771    __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it.
1772    __ pop(rsi);
1773    __ jump(RuntimeAddress(entry_jlong_arraycopy));
1774
1775  __ BIND(L_failed);
1776    __ xorptr(rax, rax);
1777    __ notptr(rax); // return -1
1778    __ pop(rdi);
1779    __ pop(rsi);
1780    __ leave(); // required for proper stackwalking of RuntimeStub frame
1781    __ ret(0);
1782
1783    // objArrayKlass
1784  __ BIND(L_objArray);
1785    // live at this point:  rcx_src_klass, src[_pos], dst[_pos]
1786
1787    Label L_plain_copy, L_checkcast_copy;
1788    //  test array classes for subtyping
1789    __ cmpptr(rcx_src_klass, dst_klass_addr); // usual case is exact equality
1790    __ jccb(Assembler::notEqual, L_checkcast_copy);
1791
1792    // Identically typed arrays can be copied without element-wise checks.
1793    assert_different_registers(src, src_pos, dst, dst_pos, rcx_src_klass);
1794    arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
1795
1796  __ BIND(L_plain_copy);
1797    __ movl2ptr(count, LENGTH); // elements count
1798    __ movl2ptr(src_pos, SRC_POS);  // reload src_pos
1799    __ lea(from, Address(src, src_pos, Address::times_ptr,
1800                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
1801    __ movl2ptr(dst_pos, DST_POS);  // reload dst_pos
1802    __ lea(to,   Address(dst, dst_pos, Address::times_ptr,
1803                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
1804    __ movptr(FROM,  from);   // src_addr
1805    __ movptr(TO,    to);     // dst_addr
1806    __ movl(COUNT, count);  // count
1807    __ jump(RuntimeAddress(entry_oop_arraycopy));
1808
1809  __ BIND(L_checkcast_copy);
1810    // live at this point:  rcx_src_klass, dst[_pos], src[_pos]
1811    {
1812      // Handy offsets:
1813      int  ek_offset = (klassOopDesc::header_size() * HeapWordSize +
1814                        objArrayKlass::element_klass_offset_in_bytes());
1815      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
1816                        Klass::super_check_offset_offset_in_bytes());
1817
1818      Register rsi_dst_klass = rsi;
1819      Register rdi_temp      = rdi;
1820      assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos");
1821      assert(rdi_temp      == dst_pos, "expected alias w/ dst_pos");
1822      Address dst_klass_lh_addr(rsi_dst_klass, lh_offset);
1823
1824      // Before looking at dst.length, make sure dst is also an objArray.
1825      __ movptr(rsi_dst_klass, dst_klass_addr);
1826      __ cmpl(dst_klass_lh_addr, objArray_lh);
1827      __ jccb(Assembler::notEqual, L_failed);
1828
1829      // It is safe to examine both src.length and dst.length.
1830      __ movl2ptr(src_pos, SRC_POS);        // reload rsi
1831      arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
1832      // (Now src_pos and dst_pos are killed, but not src and dst.)
1833
1834      // We'll need this temp (don't forget to pop it after the type check).
1835      __ push(rbx);
1836      Register rbx_src_klass = rbx;
1837
1838      __ mov(rbx_src_klass, rcx_src_klass); // spill away from rcx
1839      __ movptr(rsi_dst_klass, dst_klass_addr);
1840      Address super_check_offset_addr(rsi_dst_klass, sco_offset);
1841      Label L_fail_array_check;
1842      generate_type_check(rbx_src_klass,
1843                          super_check_offset_addr, dst_klass_addr,
1844                          rdi_temp, NULL, &L_fail_array_check);
1845      // (On fall-through, we have passed the array type check.)
1846      __ pop(rbx);
1847      __ jmp(L_plain_copy);
1848
1849      __ BIND(L_fail_array_check);
1850      // Reshuffle arguments so we can call checkcast_arraycopy:
1851
1852      // match initial saves for checkcast_arraycopy
1853      // push(rsi);    // already done; see above
1854      // push(rdi);    // already done; see above
1855      // push(rbx);    // already done; see above
1856
1857      // Marshal outgoing arguments now, freeing registers.
1858      Address   from_arg(rsp, 16+ 4);   // from
1859      Address     to_arg(rsp, 16+ 8);   // to
1860      Address length_arg(rsp, 16+12);   // elements count
1861      Address  ckoff_arg(rsp, 16+16);   // super_check_offset
1862      Address  ckval_arg(rsp, 16+20);   // super_klass
1863
1864      Address SRC_POS_arg(rsp, 16+ 8);
1865      Address DST_POS_arg(rsp, 16+16);
1866      Address  LENGTH_arg(rsp, 16+20);
1867      // push rbx, changed the incoming offsets (why not just use rbp,??)
1868      // assert(SRC_POS_arg.disp() == SRC_POS.disp() + 4, "");
1869
1870      __ movptr(rbx, Address(rsi_dst_klass, ek_offset));
1871      __ movl2ptr(length, LENGTH_arg);    // reload elements count
1872      __ movl2ptr(src_pos, SRC_POS_arg);  // reload src_pos
1873      __ movl2ptr(dst_pos, DST_POS_arg);  // reload dst_pos
1874
1875      __ movptr(ckval_arg, rbx);          // destination element type
1876      __ movl(rbx, Address(rbx, sco_offset));
1877      __ movl(ckoff_arg, rbx);          // corresponding class check offset
1878
1879      __ movl(length_arg, length);      // outgoing length argument
1880
1881      __ lea(from, Address(src, src_pos, Address::times_ptr,
1882                            arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
1883      __ movptr(from_arg, from);
1884
1885      __ lea(to, Address(dst, dst_pos, Address::times_ptr,
1886                          arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
1887      __ movptr(to_arg, to);
1888      __ jump(RuntimeAddress(entry_checkcast_arraycopy));
1889    }
1890
1891    return start;
1892  }
1893
1894  void generate_arraycopy_stubs() {
1895    address entry;
1896    address entry_jbyte_arraycopy;
1897    address entry_jshort_arraycopy;
1898    address entry_jint_arraycopy;
1899    address entry_oop_arraycopy;
1900    address entry_jlong_arraycopy;
1901    address entry_checkcast_arraycopy;
1902
1903    StubRoutines::_arrayof_jbyte_disjoint_arraycopy =
1904        generate_disjoint_copy(T_BYTE,  true, Address::times_1, &entry,
1905                               "arrayof_jbyte_disjoint_arraycopy");
1906    StubRoutines::_arrayof_jbyte_arraycopy =
1907        generate_conjoint_copy(T_BYTE,  true, Address::times_1,  entry,
1908                               NULL, "arrayof_jbyte_arraycopy");
1909    StubRoutines::_jbyte_disjoint_arraycopy =
1910        generate_disjoint_copy(T_BYTE, false, Address::times_1, &entry,
1911                               "jbyte_disjoint_arraycopy");
1912    StubRoutines::_jbyte_arraycopy =
1913        generate_conjoint_copy(T_BYTE, false, Address::times_1,  entry,
1914                               &entry_jbyte_arraycopy, "jbyte_arraycopy");
1915
1916    StubRoutines::_arrayof_jshort_disjoint_arraycopy =
1917        generate_disjoint_copy(T_SHORT,  true, Address::times_2, &entry,
1918                               "arrayof_jshort_disjoint_arraycopy");
1919    StubRoutines::_arrayof_jshort_arraycopy =
1920        generate_conjoint_copy(T_SHORT,  true, Address::times_2,  entry,
1921                               NULL, "arrayof_jshort_arraycopy");
1922    StubRoutines::_jshort_disjoint_arraycopy =
1923        generate_disjoint_copy(T_SHORT, false, Address::times_2, &entry,
1924                               "jshort_disjoint_arraycopy");
1925    StubRoutines::_jshort_arraycopy =
1926        generate_conjoint_copy(T_SHORT, false, Address::times_2,  entry,
1927                               &entry_jshort_arraycopy, "jshort_arraycopy");
1928
1929    // Next arrays are always aligned on 4 bytes at least.
1930    StubRoutines::_jint_disjoint_arraycopy =
1931        generate_disjoint_copy(T_INT, true, Address::times_4, &entry,
1932                               "jint_disjoint_arraycopy");
1933    StubRoutines::_jint_arraycopy =
1934        generate_conjoint_copy(T_INT, true, Address::times_4,  entry,
1935                               &entry_jint_arraycopy, "jint_arraycopy");
1936
1937    StubRoutines::_oop_disjoint_arraycopy =
1938        generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry,
1939                               "oop_disjoint_arraycopy");
1940    StubRoutines::_oop_arraycopy =
1941        generate_conjoint_copy(T_OBJECT, true, Address::times_ptr,  entry,
1942                               &entry_oop_arraycopy, "oop_arraycopy");
1943
1944    StubRoutines::_jlong_disjoint_arraycopy =
1945        generate_disjoint_long_copy(&entry, "jlong_disjoint_arraycopy");
1946    StubRoutines::_jlong_arraycopy =
1947        generate_conjoint_long_copy(entry, &entry_jlong_arraycopy,
1948                                    "jlong_arraycopy");
1949
1950    StubRoutines::_arrayof_jint_disjoint_arraycopy  =
1951        StubRoutines::_jint_disjoint_arraycopy;
1952    StubRoutines::_arrayof_oop_disjoint_arraycopy   =
1953        StubRoutines::_oop_disjoint_arraycopy;
1954    StubRoutines::_arrayof_jlong_disjoint_arraycopy =
1955        StubRoutines::_jlong_disjoint_arraycopy;
1956
1957    StubRoutines::_arrayof_jint_arraycopy  = StubRoutines::_jint_arraycopy;
1958    StubRoutines::_arrayof_oop_arraycopy   = StubRoutines::_oop_arraycopy;
1959    StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
1960
1961    StubRoutines::_checkcast_arraycopy =
1962        generate_checkcast_copy("checkcast_arraycopy",
1963                                  &entry_checkcast_arraycopy);
1964
1965    StubRoutines::_unsafe_arraycopy =
1966        generate_unsafe_copy("unsafe_arraycopy",
1967                               entry_jbyte_arraycopy,
1968                               entry_jshort_arraycopy,
1969                               entry_jint_arraycopy,
1970                               entry_jlong_arraycopy);
1971
1972    StubRoutines::_generic_arraycopy =
1973        generate_generic_copy("generic_arraycopy",
1974                               entry_jbyte_arraycopy,
1975                               entry_jshort_arraycopy,
1976                               entry_jint_arraycopy,
1977                               entry_oop_arraycopy,
1978                               entry_jlong_arraycopy,
1979                               entry_checkcast_arraycopy);
1980  }
1981
1982 public:
1983  // Information about frame layout at time of blocking runtime call.
1984  // Note that we only have to preserve callee-saved registers since
1985  // the compilers are responsible for supplying a continuation point
1986  // if they expect all registers to be preserved.
1987  enum layout {
1988    thread_off,    // last_java_sp
1989    rbp_off,       // callee saved register
1990    ret_pc,
1991    framesize
1992  };
1993
1994 private:
1995
1996#undef  __
1997#define __ masm->
1998
1999  //------------------------------------------------------------------------------------------------------------------------
2000  // Continuation point for throwing of implicit exceptions that are not handled in
2001  // the current activation. Fabricates an exception oop and initiates normal
2002  // exception dispatching in this frame.
2003  //
2004  // Previously the compiler (c2) allowed for callee save registers on Java calls.
2005  // This is no longer true after adapter frames were removed but could possibly
2006  // be brought back in the future if the interpreter code was reworked and it
2007  // was deemed worthwhile. The comment below was left to describe what must
2008  // happen here if callee saves were resurrected. As it stands now this stub
2009  // could actually be a vanilla BufferBlob and have now oopMap at all.
2010  // Since it doesn't make much difference we've chosen to leave it the
2011  // way it was in the callee save days and keep the comment.
2012
2013  // If we need to preserve callee-saved values we need a callee-saved oop map and
2014  // therefore have to make these stubs into RuntimeStubs rather than BufferBlobs.
2015  // If the compiler needs all registers to be preserved between the fault
2016  // point and the exception handler then it must assume responsibility for that in
2017  // AbstractCompiler::continuation_for_implicit_null_exception or
2018  // continuation_for_implicit_division_by_zero_exception. All other implicit
2019  // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are
2020  // either at call sites or otherwise assume that stack unwinding will be initiated,
2021  // so caller saved registers were assumed volatile in the compiler.
2022  address generate_throw_exception(const char* name, address runtime_entry,
2023                                   bool restore_saved_exception_pc) {
2024
2025    int insts_size = 256;
2026    int locs_size  = 32;
2027
2028    CodeBuffer code(name, insts_size, locs_size);
2029    OopMapSet* oop_maps  = new OopMapSet();
2030    MacroAssembler* masm = new MacroAssembler(&code);
2031
2032    address start = __ pc();
2033
2034    // This is an inlined and slightly modified version of call_VM
2035    // which has the ability to fetch the return PC out of
2036    // thread-local storage and also sets up last_Java_sp slightly
2037    // differently than the real call_VM
2038    Register java_thread = rbx;
2039    __ get_thread(java_thread);
2040    if (restore_saved_exception_pc) {
2041      __ movptr(rax, Address(java_thread, in_bytes(JavaThread::saved_exception_pc_offset())));
2042      __ push(rax);
2043    }
2044
2045    __ enter(); // required for proper stackwalking of RuntimeStub frame
2046
2047    // pc and rbp, already pushed
2048    __ subptr(rsp, (framesize-2) * wordSize); // prolog
2049
2050    // Frame is now completed as far as size and linkage.
2051
2052    int frame_complete = __ pc() - start;
2053
2054    // push java thread (becomes first argument of C function)
2055    __ movptr(Address(rsp, thread_off * wordSize), java_thread);
2056
2057    // Set up last_Java_sp and last_Java_fp
2058    __ set_last_Java_frame(java_thread, rsp, rbp, NULL);
2059
2060    // Call runtime
2061    BLOCK_COMMENT("call runtime_entry");
2062    __ call(RuntimeAddress(runtime_entry));
2063    // Generate oop map
2064    OopMap* map =  new OopMap(framesize, 0);
2065    oop_maps->add_gc_map(__ pc() - start, map);
2066
2067    // restore the thread (cannot use the pushed argument since arguments
2068    // may be overwritten by C code generated by an optimizing compiler);
2069    // however can use the register value directly if it is callee saved.
2070    __ get_thread(java_thread);
2071
2072    __ reset_last_Java_frame(java_thread, true, false);
2073
2074    __ leave(); // required for proper stackwalking of RuntimeStub frame
2075
2076    // check for pending exceptions
2077#ifdef ASSERT
2078    Label L;
2079    __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
2080    __ jcc(Assembler::notEqual, L);
2081    __ should_not_reach_here();
2082    __ bind(L);
2083#endif /* ASSERT */
2084    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2085
2086
2087    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, framesize, oop_maps, false);
2088    return stub->entry_point();
2089  }
2090
2091
2092  void create_control_words() {
2093    // Round to nearest, 53-bit mode, exceptions masked
2094    StubRoutines::_fpu_cntrl_wrd_std   = 0x027F;
2095    // Round to zero, 53-bit mode, exception mased
2096    StubRoutines::_fpu_cntrl_wrd_trunc = 0x0D7F;
2097    // Round to nearest, 24-bit mode, exceptions masked
2098    StubRoutines::_fpu_cntrl_wrd_24    = 0x007F;
2099    // Round to nearest, 64-bit mode, exceptions masked
2100    StubRoutines::_fpu_cntrl_wrd_64    = 0x037F;
2101    // Round to nearest, 64-bit mode, exceptions masked
2102    StubRoutines::_mxcsr_std           = 0x1F80;
2103    // Note: the following two constants are 80-bit values
2104    //       layout is critical for correct loading by FPU.
2105    // Bias for strict fp multiply/divide
2106    StubRoutines::_fpu_subnormal_bias1[0]= 0x00000000; // 2^(-15360) == 0x03ff 8000 0000 0000 0000
2107    StubRoutines::_fpu_subnormal_bias1[1]= 0x80000000;
2108    StubRoutines::_fpu_subnormal_bias1[2]= 0x03ff;
2109    // Un-Bias for strict fp multiply/divide
2110    StubRoutines::_fpu_subnormal_bias2[0]= 0x00000000; // 2^(+15360) == 0x7bff 8000 0000 0000 0000
2111    StubRoutines::_fpu_subnormal_bias2[1]= 0x80000000;
2112    StubRoutines::_fpu_subnormal_bias2[2]= 0x7bff;
2113  }
2114
2115  //---------------------------------------------------------------------------
2116  // Initialization
2117
2118  void generate_initial() {
2119    // Generates all stubs and initializes the entry points
2120
2121    //------------------------------------------------------------------------------------------------------------------------
2122    // entry points that exist in all platforms
2123    // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
2124    //       the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
2125    StubRoutines::_forward_exception_entry      = generate_forward_exception();
2126
2127    StubRoutines::_call_stub_entry              =
2128      generate_call_stub(StubRoutines::_call_stub_return_address);
2129    // is referenced by megamorphic call
2130    StubRoutines::_catch_exception_entry        = generate_catch_exception();
2131
2132    // These are currently used by Solaris/Intel
2133    StubRoutines::_atomic_xchg_entry            = generate_atomic_xchg();
2134
2135    StubRoutines::_handler_for_unsafe_access_entry =
2136      generate_handler_for_unsafe_access();
2137
2138    // platform dependent
2139    create_control_words();
2140
2141    StubRoutines::x86::_verify_mxcsr_entry                 = generate_verify_mxcsr();
2142    StubRoutines::x86::_verify_fpu_cntrl_wrd_entry         = generate_verify_fpu_cntrl_wrd();
2143    StubRoutines::_d2i_wrapper                              = generate_d2i_wrapper(T_INT,
2144                                                                                   CAST_FROM_FN_PTR(address, SharedRuntime::d2i));
2145    StubRoutines::_d2l_wrapper                              = generate_d2i_wrapper(T_LONG,
2146                                                                                   CAST_FROM_FN_PTR(address, SharedRuntime::d2l));
2147  }
2148
2149
2150  void generate_all() {
2151    // Generates all stubs and initializes the entry points
2152
2153    // These entry points require SharedInfo::stack0 to be set up in non-core builds
2154    // and need to be relocatable, so they each fabricate a RuntimeStub internally.
2155    StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
2156    StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError),  false);
2157    StubRoutines::_throw_ArithmeticException_entry         = generate_throw_exception("ArithmeticException throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException),  true);
2158    StubRoutines::_throw_NullPointerException_entry        = generate_throw_exception("NullPointerException throw_exception",         CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true);
2159    StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
2160    StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),   false);
2161
2162    //------------------------------------------------------------------------------------------------------------------------
2163    // entry points that are platform specific
2164
2165    // support for verify_oop (must happen after universe_init)
2166    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
2167
2168    // arraycopy stubs used by compilers
2169    generate_arraycopy_stubs();
2170  }
2171
2172
2173 public:
2174  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
2175    if (all) {
2176      generate_all();
2177    } else {
2178      generate_initial();
2179    }
2180  }
2181}; // end class declaration
2182
2183
2184void StubGenerator_generate(CodeBuffer* code, bool all) {
2185  StubGenerator g(code, all);
2186}
2187