stubGenerator_x86_32.cpp revision 2721:f08d439fab8c
1/*
2 * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/assembler.hpp"
27#include "assembler_x86.inline.hpp"
28#include "interpreter/interpreter.hpp"
29#include "nativeInst_x86.hpp"
30#include "oops/instanceOop.hpp"
31#include "oops/methodOop.hpp"
32#include "oops/objArrayKlass.hpp"
33#include "oops/oop.inline.hpp"
34#include "prims/methodHandles.hpp"
35#include "runtime/frame.inline.hpp"
36#include "runtime/handles.inline.hpp"
37#include "runtime/sharedRuntime.hpp"
38#include "runtime/stubCodeGenerator.hpp"
39#include "runtime/stubRoutines.hpp"
40#include "utilities/top.hpp"
41#ifdef TARGET_OS_FAMILY_linux
42# include "thread_linux.inline.hpp"
43#endif
44#ifdef TARGET_OS_FAMILY_solaris
45# include "thread_solaris.inline.hpp"
46#endif
47#ifdef TARGET_OS_FAMILY_windows
48# include "thread_windows.inline.hpp"
49#endif
50#ifdef TARGET_OS_FAMILY_bsd
51# include "thread_bsd.inline.hpp"
52#endif
53#ifdef COMPILER2
54#include "opto/runtime.hpp"
55#endif
56
57// Declaration and definition of StubGenerator (no .hpp file).
58// For a more detailed description of the stub routine structure
59// see the comment in stubRoutines.hpp
60
61#define __ _masm->
62#define a__ ((Assembler*)_masm)->
63
64#ifdef PRODUCT
65#define BLOCK_COMMENT(str) /* nothing */
66#else
67#define BLOCK_COMMENT(str) __ block_comment(str)
68#endif
69
70#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
71
72const int MXCSR_MASK  = 0xFFC0;  // Mask out any pending exceptions
73const int FPU_CNTRL_WRD_MASK = 0xFFFF;
74
75// -------------------------------------------------------------------------------------------------------------------------
76// Stub Code definitions
77
78static address handle_unsafe_access() {
79  JavaThread* thread = JavaThread::current();
80  address pc  = thread->saved_exception_pc();
81  // pc is the instruction which we must emulate
82  // doing a no-op is fine:  return garbage from the load
83  // therefore, compute npc
84  address npc = Assembler::locate_next_instruction(pc);
85
86  // request an async exception
87  thread->set_pending_unsafe_access_error();
88
89  // return address of next instruction to execute
90  return npc;
91}
92
93class StubGenerator: public StubCodeGenerator {
94 private:
95
96#ifdef PRODUCT
97#define inc_counter_np(counter) (0)
98#else
99  void inc_counter_np_(int& counter) {
100    __ incrementl(ExternalAddress((address)&counter));
101  }
102#define inc_counter_np(counter) \
103  BLOCK_COMMENT("inc_counter " #counter); \
104  inc_counter_np_(counter);
105#endif //PRODUCT
106
107  void inc_copy_counter_np(BasicType t) {
108#ifndef PRODUCT
109    switch (t) {
110    case T_BYTE:    inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); return;
111    case T_SHORT:   inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); return;
112    case T_INT:     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); return;
113    case T_LONG:    inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); return;
114    case T_OBJECT:  inc_counter_np(SharedRuntime::_oop_array_copy_ctr); return;
115    }
116    ShouldNotReachHere();
117#endif //PRODUCT
118  }
119
120  //------------------------------------------------------------------------------------------------------------------------
121  // Call stubs are used to call Java from C
122  //
123  //    [ return_from_Java     ] <--- rsp
124  //    [ argument word n      ]
125  //      ...
126  // -N [ argument word 1      ]
127  // -7 [ Possible padding for stack alignment ]
128  // -6 [ Possible padding for stack alignment ]
129  // -5 [ Possible padding for stack alignment ]
130  // -4 [ mxcsr save           ] <--- rsp_after_call
131  // -3 [ saved rbx,            ]
132  // -2 [ saved rsi            ]
133  // -1 [ saved rdi            ]
134  //  0 [ saved rbp,            ] <--- rbp,
135  //  1 [ return address       ]
136  //  2 [ ptr. to call wrapper ]
137  //  3 [ result               ]
138  //  4 [ result_type          ]
139  //  5 [ method               ]
140  //  6 [ entry_point          ]
141  //  7 [ parameters           ]
142  //  8 [ parameter_size       ]
143  //  9 [ thread               ]
144
145
146  address generate_call_stub(address& return_address) {
147    StubCodeMark mark(this, "StubRoutines", "call_stub");
148    address start = __ pc();
149
150    // stub code parameters / addresses
151    assert(frame::entry_frame_call_wrapper_offset == 2, "adjust this code");
152    bool  sse_save = false;
153    const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_catch_exception()!
154    const int     locals_count_in_bytes  (4*wordSize);
155    const Address mxcsr_save    (rbp, -4 * wordSize);
156    const Address saved_rbx     (rbp, -3 * wordSize);
157    const Address saved_rsi     (rbp, -2 * wordSize);
158    const Address saved_rdi     (rbp, -1 * wordSize);
159    const Address result        (rbp,  3 * wordSize);
160    const Address result_type   (rbp,  4 * wordSize);
161    const Address method        (rbp,  5 * wordSize);
162    const Address entry_point   (rbp,  6 * wordSize);
163    const Address parameters    (rbp,  7 * wordSize);
164    const Address parameter_size(rbp,  8 * wordSize);
165    const Address thread        (rbp,  9 * wordSize); // same as in generate_catch_exception()!
166    sse_save =  UseSSE > 0;
167
168    // stub code
169    __ enter();
170    __ movptr(rcx, parameter_size);              // parameter counter
171    __ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes
172    __ addptr(rcx, locals_count_in_bytes);       // reserve space for register saves
173    __ subptr(rsp, rcx);
174    __ andptr(rsp, -(StackAlignmentInBytes));    // Align stack
175
176    // save rdi, rsi, & rbx, according to C calling conventions
177    __ movptr(saved_rdi, rdi);
178    __ movptr(saved_rsi, rsi);
179    __ movptr(saved_rbx, rbx);
180    // save and initialize %mxcsr
181    if (sse_save) {
182      Label skip_ldmx;
183      __ stmxcsr(mxcsr_save);
184      __ movl(rax, mxcsr_save);
185      __ andl(rax, MXCSR_MASK);    // Only check control and mask bits
186      ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
187      __ cmp32(rax, mxcsr_std);
188      __ jcc(Assembler::equal, skip_ldmx);
189      __ ldmxcsr(mxcsr_std);
190      __ bind(skip_ldmx);
191    }
192
193    // make sure the control word is correct.
194    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
195
196#ifdef ASSERT
197    // make sure we have no pending exceptions
198    { Label L;
199      __ movptr(rcx, thread);
200      __ cmpptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
201      __ jcc(Assembler::equal, L);
202      __ stop("StubRoutines::call_stub: entered with pending exception");
203      __ bind(L);
204    }
205#endif
206
207    // pass parameters if any
208    BLOCK_COMMENT("pass parameters if any");
209    Label parameters_done;
210    __ movl(rcx, parameter_size);  // parameter counter
211    __ testl(rcx, rcx);
212    __ jcc(Assembler::zero, parameters_done);
213
214    // parameter passing loop
215
216    Label loop;
217    // Copy Java parameters in reverse order (receiver last)
218    // Note that the argument order is inverted in the process
219    // source is rdx[rcx: N-1..0]
220    // dest   is rsp[rbx: 0..N-1]
221
222    __ movptr(rdx, parameters);          // parameter pointer
223    __ xorptr(rbx, rbx);
224
225    __ BIND(loop);
226
227    // get parameter
228    __ movptr(rax, Address(rdx, rcx, Interpreter::stackElementScale(), -wordSize));
229    __ movptr(Address(rsp, rbx, Interpreter::stackElementScale(),
230                    Interpreter::expr_offset_in_bytes(0)), rax);          // store parameter
231    __ increment(rbx);
232    __ decrement(rcx);
233    __ jcc(Assembler::notZero, loop);
234
235    // call Java function
236    __ BIND(parameters_done);
237    __ movptr(rbx, method);           // get methodOop
238    __ movptr(rax, entry_point);      // get entry_point
239    __ mov(rsi, rsp);                 // set sender sp
240    BLOCK_COMMENT("call Java function");
241    __ call(rax);
242
243    BLOCK_COMMENT("call_stub_return_address:");
244    return_address = __ pc();
245
246#ifdef COMPILER2
247    {
248      Label L_skip;
249      if (UseSSE >= 2) {
250        __ verify_FPU(0, "call_stub_return");
251      } else {
252        for (int i = 1; i < 8; i++) {
253          __ ffree(i);
254        }
255
256        // UseSSE <= 1 so double result should be left on TOS
257        __ movl(rsi, result_type);
258        __ cmpl(rsi, T_DOUBLE);
259        __ jcc(Assembler::equal, L_skip);
260        if (UseSSE == 0) {
261          // UseSSE == 0 so float result should be left on TOS
262          __ cmpl(rsi, T_FLOAT);
263          __ jcc(Assembler::equal, L_skip);
264        }
265        __ ffree(0);
266      }
267      __ BIND(L_skip);
268    }
269#endif // COMPILER2
270
271    // store result depending on type
272    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
273    __ movptr(rdi, result);
274    Label is_long, is_float, is_double, exit;
275    __ movl(rsi, result_type);
276    __ cmpl(rsi, T_LONG);
277    __ jcc(Assembler::equal, is_long);
278    __ cmpl(rsi, T_FLOAT);
279    __ jcc(Assembler::equal, is_float);
280    __ cmpl(rsi, T_DOUBLE);
281    __ jcc(Assembler::equal, is_double);
282
283    // handle T_INT case
284    __ movl(Address(rdi, 0), rax);
285    __ BIND(exit);
286
287    // check that FPU stack is empty
288    __ verify_FPU(0, "generate_call_stub");
289
290    // pop parameters
291    __ lea(rsp, rsp_after_call);
292
293    // restore %mxcsr
294    if (sse_save) {
295      __ ldmxcsr(mxcsr_save);
296    }
297
298    // restore rdi, rsi and rbx,
299    __ movptr(rbx, saved_rbx);
300    __ movptr(rsi, saved_rsi);
301    __ movptr(rdi, saved_rdi);
302    __ addptr(rsp, 4*wordSize);
303
304    // return
305    __ pop(rbp);
306    __ ret(0);
307
308    // handle return types different from T_INT
309    __ BIND(is_long);
310    __ movl(Address(rdi, 0 * wordSize), rax);
311    __ movl(Address(rdi, 1 * wordSize), rdx);
312    __ jmp(exit);
313
314    __ BIND(is_float);
315    // interpreter uses xmm0 for return values
316    if (UseSSE >= 1) {
317      __ movflt(Address(rdi, 0), xmm0);
318    } else {
319      __ fstp_s(Address(rdi, 0));
320    }
321    __ jmp(exit);
322
323    __ BIND(is_double);
324    // interpreter uses xmm0 for return values
325    if (UseSSE >= 2) {
326      __ movdbl(Address(rdi, 0), xmm0);
327    } else {
328      __ fstp_d(Address(rdi, 0));
329    }
330    __ jmp(exit);
331
332    return start;
333  }
334
335
336  //------------------------------------------------------------------------------------------------------------------------
337  // Return point for a Java call if there's an exception thrown in Java code.
338  // The exception is caught and transformed into a pending exception stored in
339  // JavaThread that can be tested from within the VM.
340  //
341  // Note: Usually the parameters are removed by the callee. In case of an exception
342  //       crossing an activation frame boundary, that is not the case if the callee
343  //       is compiled code => need to setup the rsp.
344  //
345  // rax,: exception oop
346
347  address generate_catch_exception() {
348    StubCodeMark mark(this, "StubRoutines", "catch_exception");
349    const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_call_stub()!
350    const Address thread        (rbp,  9 * wordSize); // same as in generate_call_stub()!
351    address start = __ pc();
352
353    // get thread directly
354    __ movptr(rcx, thread);
355#ifdef ASSERT
356    // verify that threads correspond
357    { Label L;
358      __ get_thread(rbx);
359      __ cmpptr(rbx, rcx);
360      __ jcc(Assembler::equal, L);
361      __ stop("StubRoutines::catch_exception: threads must correspond");
362      __ bind(L);
363    }
364#endif
365    // set pending exception
366    __ verify_oop(rax);
367    __ movptr(Address(rcx, Thread::pending_exception_offset()), rax          );
368    __ lea(Address(rcx, Thread::exception_file_offset   ()),
369           ExternalAddress((address)__FILE__));
370    __ movl(Address(rcx, Thread::exception_line_offset   ()), __LINE__ );
371    // complete return to VM
372    assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
373    __ jump(RuntimeAddress(StubRoutines::_call_stub_return_address));
374
375    return start;
376  }
377
378
379  //------------------------------------------------------------------------------------------------------------------------
380  // Continuation point for runtime calls returning with a pending exception.
381  // The pending exception check happened in the runtime or native call stub.
382  // The pending exception in Thread is converted into a Java-level exception.
383  //
384  // Contract with Java-level exception handlers:
385  // rax: exception
386  // rdx: throwing pc
387  //
388  // NOTE: At entry of this stub, exception-pc must be on stack !!
389
390  address generate_forward_exception() {
391    StubCodeMark mark(this, "StubRoutines", "forward exception");
392    address start = __ pc();
393    const Register thread = rcx;
394
395    // other registers used in this stub
396    const Register exception_oop = rax;
397    const Register handler_addr  = rbx;
398    const Register exception_pc  = rdx;
399
400    // Upon entry, the sp points to the return address returning into Java
401    // (interpreted or compiled) code; i.e., the return address becomes the
402    // throwing pc.
403    //
404    // Arguments pushed before the runtime call are still on the stack but
405    // the exception handler will reset the stack pointer -> ignore them.
406    // A potential result in registers can be ignored as well.
407
408#ifdef ASSERT
409    // make sure this code is only executed if there is a pending exception
410    { Label L;
411      __ get_thread(thread);
412      __ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
413      __ jcc(Assembler::notEqual, L);
414      __ stop("StubRoutines::forward exception: no pending exception (1)");
415      __ bind(L);
416    }
417#endif
418
419    // compute exception handler into rbx,
420    __ get_thread(thread);
421    __ movptr(exception_pc, Address(rsp, 0));
422    BLOCK_COMMENT("call exception_handler_for_return_address");
423    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, exception_pc);
424    __ mov(handler_addr, rax);
425
426    // setup rax & rdx, remove return address & clear pending exception
427    __ get_thread(thread);
428    __ pop(exception_pc);
429    __ movptr(exception_oop, Address(thread, Thread::pending_exception_offset()));
430    __ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
431
432#ifdef ASSERT
433    // make sure exception is set
434    { Label L;
435      __ testptr(exception_oop, exception_oop);
436      __ jcc(Assembler::notEqual, L);
437      __ stop("StubRoutines::forward exception: no pending exception (2)");
438      __ bind(L);
439    }
440#endif
441
442    // Verify that there is really a valid exception in RAX.
443    __ verify_oop(exception_oop);
444
445    // continue at exception handler (return address removed)
446    // rax: exception
447    // rbx: exception handler
448    // rdx: throwing pc
449    __ jmp(handler_addr);
450
451    return start;
452  }
453
454
455  //----------------------------------------------------------------------------------------------------
456  // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest)
457  //
458  // xchg exists as far back as 8086, lock needed for MP only
459  // Stack layout immediately after call:
460  //
461  // 0 [ret addr ] <--- rsp
462  // 1 [  ex     ]
463  // 2 [  dest   ]
464  //
465  // Result:   *dest <- ex, return (old *dest)
466  //
467  // Note: win32 does not currently use this code
468
469  address generate_atomic_xchg() {
470    StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
471    address start = __ pc();
472
473    __ push(rdx);
474    Address exchange(rsp, 2 * wordSize);
475    Address dest_addr(rsp, 3 * wordSize);
476    __ movl(rax, exchange);
477    __ movptr(rdx, dest_addr);
478    __ xchgl(rax, Address(rdx, 0));
479    __ pop(rdx);
480    __ ret(0);
481
482    return start;
483  }
484
485  //----------------------------------------------------------------------------------------------------
486  // Support for void verify_mxcsr()
487  //
488  // This routine is used with -Xcheck:jni to verify that native
489  // JNI code does not return to Java code without restoring the
490  // MXCSR register to our expected state.
491
492
493  address generate_verify_mxcsr() {
494    StubCodeMark mark(this, "StubRoutines", "verify_mxcsr");
495    address start = __ pc();
496
497    const Address mxcsr_save(rsp, 0);
498
499    if (CheckJNICalls && UseSSE > 0 ) {
500      Label ok_ret;
501      ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
502      __ push(rax);
503      __ subptr(rsp, wordSize);      // allocate a temp location
504      __ stmxcsr(mxcsr_save);
505      __ movl(rax, mxcsr_save);
506      __ andl(rax, MXCSR_MASK);
507      __ cmp32(rax, mxcsr_std);
508      __ jcc(Assembler::equal, ok_ret);
509
510      __ warn("MXCSR changed by native JNI code.");
511
512      __ ldmxcsr(mxcsr_std);
513
514      __ bind(ok_ret);
515      __ addptr(rsp, wordSize);
516      __ pop(rax);
517    }
518
519    __ ret(0);
520
521    return start;
522  }
523
524
525  //---------------------------------------------------------------------------
526  // Support for void verify_fpu_cntrl_wrd()
527  //
528  // This routine is used with -Xcheck:jni to verify that native
529  // JNI code does not return to Java code without restoring the
530  // FP control word to our expected state.
531
532  address generate_verify_fpu_cntrl_wrd() {
533    StubCodeMark mark(this, "StubRoutines", "verify_spcw");
534    address start = __ pc();
535
536    const Address fpu_cntrl_wrd_save(rsp, 0);
537
538    if (CheckJNICalls) {
539      Label ok_ret;
540      __ push(rax);
541      __ subptr(rsp, wordSize);      // allocate a temp location
542      __ fnstcw(fpu_cntrl_wrd_save);
543      __ movl(rax, fpu_cntrl_wrd_save);
544      __ andl(rax, FPU_CNTRL_WRD_MASK);
545      ExternalAddress fpu_std(StubRoutines::addr_fpu_cntrl_wrd_std());
546      __ cmp32(rax, fpu_std);
547      __ jcc(Assembler::equal, ok_ret);
548
549      __ warn("Floating point control word changed by native JNI code.");
550
551      __ fldcw(fpu_std);
552
553      __ bind(ok_ret);
554      __ addptr(rsp, wordSize);
555      __ pop(rax);
556    }
557
558    __ ret(0);
559
560    return start;
561  }
562
563  //---------------------------------------------------------------------------
564  // Wrapper for slow-case handling of double-to-integer conversion
565  // d2i or f2i fast case failed either because it is nan or because
566  // of under/overflow.
567  // Input:  FPU TOS: float value
568  // Output: rax, (rdx): integer (long) result
569
570  address generate_d2i_wrapper(BasicType t, address fcn) {
571    StubCodeMark mark(this, "StubRoutines", "d2i_wrapper");
572    address start = __ pc();
573
574  // Capture info about frame layout
575  enum layout { FPUState_off         = 0,
576                rbp_off              = FPUStateSizeInWords,
577                rdi_off,
578                rsi_off,
579                rcx_off,
580                rbx_off,
581                saved_argument_off,
582                saved_argument_off2, // 2nd half of double
583                framesize
584  };
585
586  assert(FPUStateSizeInWords == 27, "update stack layout");
587
588    // Save outgoing argument to stack across push_FPU_state()
589    __ subptr(rsp, wordSize * 2);
590    __ fstp_d(Address(rsp, 0));
591
592    // Save CPU & FPU state
593    __ push(rbx);
594    __ push(rcx);
595    __ push(rsi);
596    __ push(rdi);
597    __ push(rbp);
598    __ push_FPU_state();
599
600    // push_FPU_state() resets the FP top of stack
601    // Load original double into FP top of stack
602    __ fld_d(Address(rsp, saved_argument_off * wordSize));
603    // Store double into stack as outgoing argument
604    __ subptr(rsp, wordSize*2);
605    __ fst_d(Address(rsp, 0));
606
607    // Prepare FPU for doing math in C-land
608    __ empty_FPU_stack();
609    // Call the C code to massage the double.  Result in EAX
610    if (t == T_INT)
611      { BLOCK_COMMENT("SharedRuntime::d2i"); }
612    else if (t == T_LONG)
613      { BLOCK_COMMENT("SharedRuntime::d2l"); }
614    __ call_VM_leaf( fcn, 2 );
615
616    // Restore CPU & FPU state
617    __ pop_FPU_state();
618    __ pop(rbp);
619    __ pop(rdi);
620    __ pop(rsi);
621    __ pop(rcx);
622    __ pop(rbx);
623    __ addptr(rsp, wordSize * 2);
624
625    __ ret(0);
626
627    return start;
628  }
629
630
631  //---------------------------------------------------------------------------
632  // The following routine generates a subroutine to throw an asynchronous
633  // UnknownError when an unsafe access gets a fault that could not be
634  // reasonably prevented by the programmer.  (Example: SIGBUS/OBJERR.)
635  address generate_handler_for_unsafe_access() {
636    StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
637    address start = __ pc();
638
639    __ push(0);                       // hole for return address-to-be
640    __ pusha();                       // push registers
641    Address next_pc(rsp, RegisterImpl::number_of_registers * BytesPerWord);
642    BLOCK_COMMENT("call handle_unsafe_access");
643    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, handle_unsafe_access)));
644    __ movptr(next_pc, rax);          // stuff next address
645    __ popa();
646    __ ret(0);                        // jump to next address
647
648    return start;
649  }
650
651
652  //----------------------------------------------------------------------------------------------------
653  // Non-destructive plausibility checks for oops
654
655  address generate_verify_oop() {
656    StubCodeMark mark(this, "StubRoutines", "verify_oop");
657    address start = __ pc();
658
659    // Incoming arguments on stack after saving rax,:
660    //
661    // [tos    ]: saved rdx
662    // [tos + 1]: saved EFLAGS
663    // [tos + 2]: return address
664    // [tos + 3]: char* error message
665    // [tos + 4]: oop   object to verify
666    // [tos + 5]: saved rax, - saved by caller and bashed
667
668    Label exit, error;
669    __ pushf();
670    __ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
671    __ push(rdx);                                // save rdx
672    // make sure object is 'reasonable'
673    __ movptr(rax, Address(rsp, 4 * wordSize));    // get object
674    __ testptr(rax, rax);
675    __ jcc(Assembler::zero, exit);               // if obj is NULL it is ok
676
677    // Check if the oop is in the right area of memory
678    const int oop_mask = Universe::verify_oop_mask();
679    const int oop_bits = Universe::verify_oop_bits();
680    __ mov(rdx, rax);
681    __ andptr(rdx, oop_mask);
682    __ cmpptr(rdx, oop_bits);
683    __ jcc(Assembler::notZero, error);
684
685    // make sure klass is 'reasonable'
686    __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass
687    __ testptr(rax, rax);
688    __ jcc(Assembler::zero, error);              // if klass is NULL it is broken
689
690    // Check if the klass is in the right area of memory
691    const int klass_mask = Universe::verify_klass_mask();
692    const int klass_bits = Universe::verify_klass_bits();
693    __ mov(rdx, rax);
694    __ andptr(rdx, klass_mask);
695    __ cmpptr(rdx, klass_bits);
696    __ jcc(Assembler::notZero, error);
697
698    // make sure klass' klass is 'reasonable'
699    __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass' klass
700    __ testptr(rax, rax);
701    __ jcc(Assembler::zero, error);              // if klass' klass is NULL it is broken
702
703    __ mov(rdx, rax);
704    __ andptr(rdx, klass_mask);
705    __ cmpptr(rdx, klass_bits);
706    __ jcc(Assembler::notZero, error);           // if klass not in right area
707                                                 // of memory it is broken too.
708
709    // return if everything seems ok
710    __ bind(exit);
711    __ movptr(rax, Address(rsp, 5 * wordSize));  // get saved rax, back
712    __ pop(rdx);                                 // restore rdx
713    __ popf();                                   // restore EFLAGS
714    __ ret(3 * wordSize);                        // pop arguments
715
716    // handle errors
717    __ bind(error);
718    __ movptr(rax, Address(rsp, 5 * wordSize));  // get saved rax, back
719    __ pop(rdx);                                 // get saved rdx back
720    __ popf();                                   // get saved EFLAGS off stack -- will be ignored
721    __ pusha();                                  // push registers (eip = return address & msg are already pushed)
722    BLOCK_COMMENT("call MacroAssembler::debug");
723    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
724    __ popa();
725    __ ret(3 * wordSize);                        // pop arguments
726    return start;
727  }
728
729  //
730  //  Generate pre-barrier for array stores
731  //
732  //  Input:
733  //     start   -  starting address
734  //     count   -  element count
735  void  gen_write_ref_array_pre_barrier(Register start, Register count, bool uninitialized_target) {
736    assert_different_registers(start, count);
737    BarrierSet* bs = Universe::heap()->barrier_set();
738    switch (bs->kind()) {
739      case BarrierSet::G1SATBCT:
740      case BarrierSet::G1SATBCTLogging:
741        // With G1, don't generate the call if we statically know that the target in uninitialized
742        if (!uninitialized_target) {
743           __ pusha();                      // push registers
744           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre),
745                           start, count);
746           __ popa();
747         }
748        break;
749      case BarrierSet::CardTableModRef:
750      case BarrierSet::CardTableExtension:
751      case BarrierSet::ModRef:
752        break;
753      default      :
754        ShouldNotReachHere();
755
756    }
757  }
758
759
760  //
761  // Generate a post-barrier for an array store
762  //
763  //     start    -  starting address
764  //     count    -  element count
765  //
766  //  The two input registers are overwritten.
767  //
768  void  gen_write_ref_array_post_barrier(Register start, Register count) {
769    BarrierSet* bs = Universe::heap()->barrier_set();
770    assert_different_registers(start, count);
771    switch (bs->kind()) {
772      case BarrierSet::G1SATBCT:
773      case BarrierSet::G1SATBCTLogging:
774        {
775          __ pusha();                      // push registers
776          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post),
777                          start, count);
778          __ popa();
779        }
780        break;
781
782      case BarrierSet::CardTableModRef:
783      case BarrierSet::CardTableExtension:
784        {
785          CardTableModRefBS* ct = (CardTableModRefBS*)bs;
786          assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
787
788          Label L_loop;
789          const Register end = count;  // elements count; end == start+count-1
790          assert_different_registers(start, end);
791
792          __ lea(end,  Address(start, count, Address::times_ptr, -wordSize));
793          __ shrptr(start, CardTableModRefBS::card_shift);
794          __ shrptr(end,   CardTableModRefBS::card_shift);
795          __ subptr(end, start); // end --> count
796        __ BIND(L_loop);
797          intptr_t disp = (intptr_t) ct->byte_map_base;
798          Address cardtable(start, count, Address::times_1, disp);
799          __ movb(cardtable, 0);
800          __ decrement(count);
801          __ jcc(Assembler::greaterEqual, L_loop);
802        }
803        break;
804      case BarrierSet::ModRef:
805        break;
806      default      :
807        ShouldNotReachHere();
808
809    }
810  }
811
812
813  // Copy 64 bytes chunks
814  //
815  // Inputs:
816  //   from        - source array address
817  //   to_from     - destination array address - from
818  //   qword_count - 8-bytes element count, negative
819  //
820  void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
821    assert( UseSSE >= 2, "supported cpu only" );
822    Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
823    // Copy 64-byte chunks
824    __ jmpb(L_copy_64_bytes);
825    __ align(OptoLoopAlignment);
826  __ BIND(L_copy_64_bytes_loop);
827
828    if(UseUnalignedLoadStores) {
829      __ movdqu(xmm0, Address(from, 0));
830      __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
831      __ movdqu(xmm1, Address(from, 16));
832      __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
833      __ movdqu(xmm2, Address(from, 32));
834      __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
835      __ movdqu(xmm3, Address(from, 48));
836      __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
837
838    } else {
839      __ movq(xmm0, Address(from, 0));
840      __ movq(Address(from, to_from, Address::times_1, 0), xmm0);
841      __ movq(xmm1, Address(from, 8));
842      __ movq(Address(from, to_from, Address::times_1, 8), xmm1);
843      __ movq(xmm2, Address(from, 16));
844      __ movq(Address(from, to_from, Address::times_1, 16), xmm2);
845      __ movq(xmm3, Address(from, 24));
846      __ movq(Address(from, to_from, Address::times_1, 24), xmm3);
847      __ movq(xmm4, Address(from, 32));
848      __ movq(Address(from, to_from, Address::times_1, 32), xmm4);
849      __ movq(xmm5, Address(from, 40));
850      __ movq(Address(from, to_from, Address::times_1, 40), xmm5);
851      __ movq(xmm6, Address(from, 48));
852      __ movq(Address(from, to_from, Address::times_1, 48), xmm6);
853      __ movq(xmm7, Address(from, 56));
854      __ movq(Address(from, to_from, Address::times_1, 56), xmm7);
855    }
856
857    __ addl(from, 64);
858  __ BIND(L_copy_64_bytes);
859    __ subl(qword_count, 8);
860    __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
861    __ addl(qword_count, 8);
862    __ jccb(Assembler::zero, L_exit);
863    //
864    // length is too short, just copy qwords
865    //
866  __ BIND(L_copy_8_bytes);
867    __ movq(xmm0, Address(from, 0));
868    __ movq(Address(from, to_from, Address::times_1), xmm0);
869    __ addl(from, 8);
870    __ decrement(qword_count);
871    __ jcc(Assembler::greater, L_copy_8_bytes);
872  __ BIND(L_exit);
873  }
874
875  // Copy 64 bytes chunks
876  //
877  // Inputs:
878  //   from        - source array address
879  //   to_from     - destination array address - from
880  //   qword_count - 8-bytes element count, negative
881  //
882  void mmx_copy_forward(Register from, Register to_from, Register qword_count) {
883    assert( VM_Version::supports_mmx(), "supported cpu only" );
884    Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
885    // Copy 64-byte chunks
886    __ jmpb(L_copy_64_bytes);
887    __ align(OptoLoopAlignment);
888  __ BIND(L_copy_64_bytes_loop);
889    __ movq(mmx0, Address(from, 0));
890    __ movq(mmx1, Address(from, 8));
891    __ movq(mmx2, Address(from, 16));
892    __ movq(Address(from, to_from, Address::times_1, 0), mmx0);
893    __ movq(mmx3, Address(from, 24));
894    __ movq(Address(from, to_from, Address::times_1, 8), mmx1);
895    __ movq(mmx4, Address(from, 32));
896    __ movq(Address(from, to_from, Address::times_1, 16), mmx2);
897    __ movq(mmx5, Address(from, 40));
898    __ movq(Address(from, to_from, Address::times_1, 24), mmx3);
899    __ movq(mmx6, Address(from, 48));
900    __ movq(Address(from, to_from, Address::times_1, 32), mmx4);
901    __ movq(mmx7, Address(from, 56));
902    __ movq(Address(from, to_from, Address::times_1, 40), mmx5);
903    __ movq(Address(from, to_from, Address::times_1, 48), mmx6);
904    __ movq(Address(from, to_from, Address::times_1, 56), mmx7);
905    __ addptr(from, 64);
906  __ BIND(L_copy_64_bytes);
907    __ subl(qword_count, 8);
908    __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
909    __ addl(qword_count, 8);
910    __ jccb(Assembler::zero, L_exit);
911    //
912    // length is too short, just copy qwords
913    //
914  __ BIND(L_copy_8_bytes);
915    __ movq(mmx0, Address(from, 0));
916    __ movq(Address(from, to_from, Address::times_1), mmx0);
917    __ addptr(from, 8);
918    __ decrement(qword_count);
919    __ jcc(Assembler::greater, L_copy_8_bytes);
920  __ BIND(L_exit);
921    __ emms();
922  }
923
924  address generate_disjoint_copy(BasicType t, bool aligned,
925                                 Address::ScaleFactor sf,
926                                 address* entry, const char *name,
927                                 bool dest_uninitialized = false) {
928    __ align(CodeEntryAlignment);
929    StubCodeMark mark(this, "StubRoutines", name);
930    address start = __ pc();
931
932    Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte;
933    Label L_copy_2_bytes, L_copy_4_bytes, L_copy_64_bytes;
934
935    int shift = Address::times_ptr - sf;
936
937    const Register from     = rsi;  // source array address
938    const Register to       = rdi;  // destination array address
939    const Register count    = rcx;  // elements count
940    const Register to_from  = to;   // (to - from)
941    const Register saved_to = rdx;  // saved destination array address
942
943    __ enter(); // required for proper stackwalking of RuntimeStub frame
944    __ push(rsi);
945    __ push(rdi);
946    __ movptr(from , Address(rsp, 12+ 4));
947    __ movptr(to   , Address(rsp, 12+ 8));
948    __ movl(count, Address(rsp, 12+ 12));
949
950    if (entry != NULL) {
951      *entry = __ pc(); // Entry point from conjoint arraycopy stub.
952      BLOCK_COMMENT("Entry:");
953    }
954
955    if (t == T_OBJECT) {
956      __ testl(count, count);
957      __ jcc(Assembler::zero, L_0_count);
958      gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
959      __ mov(saved_to, to);          // save 'to'
960    }
961
962    __ subptr(to, from); // to --> to_from
963    __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
964    __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
965    if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
966      // align source address at 4 bytes address boundary
967      if (t == T_BYTE) {
968        // One byte misalignment happens only for byte arrays
969        __ testl(from, 1);
970        __ jccb(Assembler::zero, L_skip_align1);
971        __ movb(rax, Address(from, 0));
972        __ movb(Address(from, to_from, Address::times_1, 0), rax);
973        __ increment(from);
974        __ decrement(count);
975      __ BIND(L_skip_align1);
976      }
977      // Two bytes misalignment happens only for byte and short (char) arrays
978      __ testl(from, 2);
979      __ jccb(Assembler::zero, L_skip_align2);
980      __ movw(rax, Address(from, 0));
981      __ movw(Address(from, to_from, Address::times_1, 0), rax);
982      __ addptr(from, 2);
983      __ subl(count, 1<<(shift-1));
984    __ BIND(L_skip_align2);
985    }
986    if (!VM_Version::supports_mmx()) {
987      __ mov(rax, count);      // save 'count'
988      __ shrl(count, shift); // bytes count
989      __ addptr(to_from, from);// restore 'to'
990      __ rep_mov();
991      __ subptr(to_from, from);// restore 'to_from'
992      __ mov(count, rax);      // restore 'count'
993      __ jmpb(L_copy_2_bytes); // all dwords were copied
994    } else {
995      if (!UseUnalignedLoadStores) {
996        // align to 8 bytes, we know we are 4 byte aligned to start
997        __ testptr(from, 4);
998        __ jccb(Assembler::zero, L_copy_64_bytes);
999        __ movl(rax, Address(from, 0));
1000        __ movl(Address(from, to_from, Address::times_1, 0), rax);
1001        __ addptr(from, 4);
1002        __ subl(count, 1<<shift);
1003      }
1004    __ BIND(L_copy_64_bytes);
1005      __ mov(rax, count);
1006      __ shrl(rax, shift+1);  // 8 bytes chunk count
1007      //
1008      // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
1009      //
1010      if (UseXMMForArrayCopy) {
1011        xmm_copy_forward(from, to_from, rax);
1012      } else {
1013        mmx_copy_forward(from, to_from, rax);
1014      }
1015    }
1016    // copy tailing dword
1017  __ BIND(L_copy_4_bytes);
1018    __ testl(count, 1<<shift);
1019    __ jccb(Assembler::zero, L_copy_2_bytes);
1020    __ movl(rax, Address(from, 0));
1021    __ movl(Address(from, to_from, Address::times_1, 0), rax);
1022    if (t == T_BYTE || t == T_SHORT) {
1023      __ addptr(from, 4);
1024    __ BIND(L_copy_2_bytes);
1025      // copy tailing word
1026      __ testl(count, 1<<(shift-1));
1027      __ jccb(Assembler::zero, L_copy_byte);
1028      __ movw(rax, Address(from, 0));
1029      __ movw(Address(from, to_from, Address::times_1, 0), rax);
1030      if (t == T_BYTE) {
1031        __ addptr(from, 2);
1032      __ BIND(L_copy_byte);
1033        // copy tailing byte
1034        __ testl(count, 1);
1035        __ jccb(Assembler::zero, L_exit);
1036        __ movb(rax, Address(from, 0));
1037        __ movb(Address(from, to_from, Address::times_1, 0), rax);
1038      __ BIND(L_exit);
1039      } else {
1040      __ BIND(L_copy_byte);
1041      }
1042    } else {
1043    __ BIND(L_copy_2_bytes);
1044    }
1045
1046    if (t == T_OBJECT) {
1047      __ movl(count, Address(rsp, 12+12)); // reread 'count'
1048      __ mov(to, saved_to); // restore 'to'
1049      gen_write_ref_array_post_barrier(to, count);
1050    __ BIND(L_0_count);
1051    }
1052    inc_copy_counter_np(t);
1053    __ pop(rdi);
1054    __ pop(rsi);
1055    __ leave(); // required for proper stackwalking of RuntimeStub frame
1056    __ xorptr(rax, rax); // return 0
1057    __ ret(0);
1058    return start;
1059  }
1060
1061
1062  address generate_fill(BasicType t, bool aligned, const char *name) {
1063    __ align(CodeEntryAlignment);
1064    StubCodeMark mark(this, "StubRoutines", name);
1065    address start = __ pc();
1066
1067    BLOCK_COMMENT("Entry:");
1068
1069    const Register to       = rdi;  // source array address
1070    const Register value    = rdx;  // value
1071    const Register count    = rsi;  // elements count
1072
1073    __ enter(); // required for proper stackwalking of RuntimeStub frame
1074    __ push(rsi);
1075    __ push(rdi);
1076    __ movptr(to   , Address(rsp, 12+ 4));
1077    __ movl(value, Address(rsp, 12+ 8));
1078    __ movl(count, Address(rsp, 12+ 12));
1079
1080    __ generate_fill(t, aligned, to, value, count, rax, xmm0);
1081
1082    __ pop(rdi);
1083    __ pop(rsi);
1084    __ leave(); // required for proper stackwalking of RuntimeStub frame
1085    __ ret(0);
1086    return start;
1087  }
1088
1089  address generate_conjoint_copy(BasicType t, bool aligned,
1090                                 Address::ScaleFactor sf,
1091                                 address nooverlap_target,
1092                                 address* entry, const char *name,
1093                                 bool dest_uninitialized = false) {
1094    __ align(CodeEntryAlignment);
1095    StubCodeMark mark(this, "StubRoutines", name);
1096    address start = __ pc();
1097
1098    Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte;
1099    Label L_copy_2_bytes, L_copy_4_bytes, L_copy_8_bytes, L_copy_8_bytes_loop;
1100
1101    int shift = Address::times_ptr - sf;
1102
1103    const Register src   = rax;  // source array address
1104    const Register dst   = rdx;  // destination array address
1105    const Register from  = rsi;  // source array address
1106    const Register to    = rdi;  // destination array address
1107    const Register count = rcx;  // elements count
1108    const Register end   = rax;  // array end address
1109
1110    __ enter(); // required for proper stackwalking of RuntimeStub frame
1111    __ push(rsi);
1112    __ push(rdi);
1113    __ movptr(src  , Address(rsp, 12+ 4));   // from
1114    __ movptr(dst  , Address(rsp, 12+ 8));   // to
1115    __ movl2ptr(count, Address(rsp, 12+12)); // count
1116
1117    if (entry != NULL) {
1118      *entry = __ pc(); // Entry point from generic arraycopy stub.
1119      BLOCK_COMMENT("Entry:");
1120    }
1121
1122    // nooverlap_target expects arguments in rsi and rdi.
1123    __ mov(from, src);
1124    __ mov(to  , dst);
1125
1126    // arrays overlap test: dispatch to disjoint stub if necessary.
1127    RuntimeAddress nooverlap(nooverlap_target);
1128    __ cmpptr(dst, src);
1129    __ lea(end, Address(src, count, sf, 0)); // src + count * elem_size
1130    __ jump_cc(Assembler::belowEqual, nooverlap);
1131    __ cmpptr(dst, end);
1132    __ jump_cc(Assembler::aboveEqual, nooverlap);
1133
1134    if (t == T_OBJECT) {
1135      __ testl(count, count);
1136      __ jcc(Assembler::zero, L_0_count);
1137      gen_write_ref_array_pre_barrier(dst, count, dest_uninitialized);
1138    }
1139
1140    // copy from high to low
1141    __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
1142    __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
1143    if (t == T_BYTE || t == T_SHORT) {
1144      // Align the end of destination array at 4 bytes address boundary
1145      __ lea(end, Address(dst, count, sf, 0));
1146      if (t == T_BYTE) {
1147        // One byte misalignment happens only for byte arrays
1148        __ testl(end, 1);
1149        __ jccb(Assembler::zero, L_skip_align1);
1150        __ decrement(count);
1151        __ movb(rdx, Address(from, count, sf, 0));
1152        __ movb(Address(to, count, sf, 0), rdx);
1153      __ BIND(L_skip_align1);
1154      }
1155      // Two bytes misalignment happens only for byte and short (char) arrays
1156      __ testl(end, 2);
1157      __ jccb(Assembler::zero, L_skip_align2);
1158      __ subptr(count, 1<<(shift-1));
1159      __ movw(rdx, Address(from, count, sf, 0));
1160      __ movw(Address(to, count, sf, 0), rdx);
1161    __ BIND(L_skip_align2);
1162      __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
1163      __ jcc(Assembler::below, L_copy_4_bytes);
1164    }
1165
1166    if (!VM_Version::supports_mmx()) {
1167      __ std();
1168      __ mov(rax, count); // Save 'count'
1169      __ mov(rdx, to);    // Save 'to'
1170      __ lea(rsi, Address(from, count, sf, -4));
1171      __ lea(rdi, Address(to  , count, sf, -4));
1172      __ shrptr(count, shift); // bytes count
1173      __ rep_mov();
1174      __ cld();
1175      __ mov(count, rax); // restore 'count'
1176      __ andl(count, (1<<shift)-1);      // mask the number of rest elements
1177      __ movptr(from, Address(rsp, 12+4)); // reread 'from'
1178      __ mov(to, rdx);   // restore 'to'
1179      __ jmpb(L_copy_2_bytes); // all dword were copied
1180   } else {
1181      // Align to 8 bytes the end of array. It is aligned to 4 bytes already.
1182      __ testptr(end, 4);
1183      __ jccb(Assembler::zero, L_copy_8_bytes);
1184      __ subl(count, 1<<shift);
1185      __ movl(rdx, Address(from, count, sf, 0));
1186      __ movl(Address(to, count, sf, 0), rdx);
1187      __ jmpb(L_copy_8_bytes);
1188
1189      __ align(OptoLoopAlignment);
1190      // Move 8 bytes
1191    __ BIND(L_copy_8_bytes_loop);
1192      if (UseXMMForArrayCopy) {
1193        __ movq(xmm0, Address(from, count, sf, 0));
1194        __ movq(Address(to, count, sf, 0), xmm0);
1195      } else {
1196        __ movq(mmx0, Address(from, count, sf, 0));
1197        __ movq(Address(to, count, sf, 0), mmx0);
1198      }
1199    __ BIND(L_copy_8_bytes);
1200      __ subl(count, 2<<shift);
1201      __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1202      __ addl(count, 2<<shift);
1203      if (!UseXMMForArrayCopy) {
1204        __ emms();
1205      }
1206    }
1207  __ BIND(L_copy_4_bytes);
1208    // copy prefix qword
1209    __ testl(count, 1<<shift);
1210    __ jccb(Assembler::zero, L_copy_2_bytes);
1211    __ movl(rdx, Address(from, count, sf, -4));
1212    __ movl(Address(to, count, sf, -4), rdx);
1213
1214    if (t == T_BYTE || t == T_SHORT) {
1215        __ subl(count, (1<<shift));
1216      __ BIND(L_copy_2_bytes);
1217        // copy prefix dword
1218        __ testl(count, 1<<(shift-1));
1219        __ jccb(Assembler::zero, L_copy_byte);
1220        __ movw(rdx, Address(from, count, sf, -2));
1221        __ movw(Address(to, count, sf, -2), rdx);
1222        if (t == T_BYTE) {
1223          __ subl(count, 1<<(shift-1));
1224        __ BIND(L_copy_byte);
1225          // copy prefix byte
1226          __ testl(count, 1);
1227          __ jccb(Assembler::zero, L_exit);
1228          __ movb(rdx, Address(from, 0));
1229          __ movb(Address(to, 0), rdx);
1230        __ BIND(L_exit);
1231        } else {
1232        __ BIND(L_copy_byte);
1233        }
1234    } else {
1235    __ BIND(L_copy_2_bytes);
1236    }
1237    if (t == T_OBJECT) {
1238      __ movl2ptr(count, Address(rsp, 12+12)); // reread count
1239      gen_write_ref_array_post_barrier(to, count);
1240    __ BIND(L_0_count);
1241    }
1242    inc_copy_counter_np(t);
1243    __ pop(rdi);
1244    __ pop(rsi);
1245    __ leave(); // required for proper stackwalking of RuntimeStub frame
1246    __ xorptr(rax, rax); // return 0
1247    __ ret(0);
1248    return start;
1249  }
1250
1251
1252  address generate_disjoint_long_copy(address* entry, const char *name) {
1253    __ align(CodeEntryAlignment);
1254    StubCodeMark mark(this, "StubRoutines", name);
1255    address start = __ pc();
1256
1257    Label L_copy_8_bytes, L_copy_8_bytes_loop;
1258    const Register from       = rax;  // source array address
1259    const Register to         = rdx;  // destination array address
1260    const Register count      = rcx;  // elements count
1261    const Register to_from    = rdx;  // (to - from)
1262
1263    __ enter(); // required for proper stackwalking of RuntimeStub frame
1264    __ movptr(from , Address(rsp, 8+0));       // from
1265    __ movptr(to   , Address(rsp, 8+4));       // to
1266    __ movl2ptr(count, Address(rsp, 8+8));     // count
1267
1268    *entry = __ pc(); // Entry point from conjoint arraycopy stub.
1269    BLOCK_COMMENT("Entry:");
1270
1271    __ subptr(to, from); // to --> to_from
1272    if (VM_Version::supports_mmx()) {
1273      if (UseXMMForArrayCopy) {
1274        xmm_copy_forward(from, to_from, count);
1275      } else {
1276        mmx_copy_forward(from, to_from, count);
1277      }
1278    } else {
1279      __ jmpb(L_copy_8_bytes);
1280      __ align(OptoLoopAlignment);
1281    __ BIND(L_copy_8_bytes_loop);
1282      __ fild_d(Address(from, 0));
1283      __ fistp_d(Address(from, to_from, Address::times_1));
1284      __ addptr(from, 8);
1285    __ BIND(L_copy_8_bytes);
1286      __ decrement(count);
1287      __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1288    }
1289    inc_copy_counter_np(T_LONG);
1290    __ leave(); // required for proper stackwalking of RuntimeStub frame
1291    __ xorptr(rax, rax); // return 0
1292    __ ret(0);
1293    return start;
1294  }
1295
1296  address generate_conjoint_long_copy(address nooverlap_target,
1297                                      address* entry, const char *name) {
1298    __ align(CodeEntryAlignment);
1299    StubCodeMark mark(this, "StubRoutines", name);
1300    address start = __ pc();
1301
1302    Label L_copy_8_bytes, L_copy_8_bytes_loop;
1303    const Register from       = rax;  // source array address
1304    const Register to         = rdx;  // destination array address
1305    const Register count      = rcx;  // elements count
1306    const Register end_from   = rax;  // source array end address
1307
1308    __ enter(); // required for proper stackwalking of RuntimeStub frame
1309    __ movptr(from , Address(rsp, 8+0));       // from
1310    __ movptr(to   , Address(rsp, 8+4));       // to
1311    __ movl2ptr(count, Address(rsp, 8+8));     // count
1312
1313    *entry = __ pc(); // Entry point from generic arraycopy stub.
1314    BLOCK_COMMENT("Entry:");
1315
1316    // arrays overlap test
1317    __ cmpptr(to, from);
1318    RuntimeAddress nooverlap(nooverlap_target);
1319    __ jump_cc(Assembler::belowEqual, nooverlap);
1320    __ lea(end_from, Address(from, count, Address::times_8, 0));
1321    __ cmpptr(to, end_from);
1322    __ movptr(from, Address(rsp, 8));  // from
1323    __ jump_cc(Assembler::aboveEqual, nooverlap);
1324
1325    __ jmpb(L_copy_8_bytes);
1326
1327    __ align(OptoLoopAlignment);
1328  __ BIND(L_copy_8_bytes_loop);
1329    if (VM_Version::supports_mmx()) {
1330      if (UseXMMForArrayCopy) {
1331        __ movq(xmm0, Address(from, count, Address::times_8));
1332        __ movq(Address(to, count, Address::times_8), xmm0);
1333      } else {
1334        __ movq(mmx0, Address(from, count, Address::times_8));
1335        __ movq(Address(to, count, Address::times_8), mmx0);
1336      }
1337    } else {
1338      __ fild_d(Address(from, count, Address::times_8));
1339      __ fistp_d(Address(to, count, Address::times_8));
1340    }
1341  __ BIND(L_copy_8_bytes);
1342    __ decrement(count);
1343    __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1344
1345    if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
1346      __ emms();
1347    }
1348    inc_copy_counter_np(T_LONG);
1349    __ leave(); // required for proper stackwalking of RuntimeStub frame
1350    __ xorptr(rax, rax); // return 0
1351    __ ret(0);
1352    return start;
1353  }
1354
1355
1356  // Helper for generating a dynamic type check.
1357  // The sub_klass must be one of {rbx, rdx, rsi}.
1358  // The temp is killed.
1359  void generate_type_check(Register sub_klass,
1360                           Address& super_check_offset_addr,
1361                           Address& super_klass_addr,
1362                           Register temp,
1363                           Label* L_success, Label* L_failure) {
1364    BLOCK_COMMENT("type_check:");
1365
1366    Label L_fallthrough;
1367#define LOCAL_JCC(assembler_con, label_ptr)                             \
1368    if (label_ptr != NULL)  __ jcc(assembler_con, *(label_ptr));        \
1369    else                    __ jcc(assembler_con, L_fallthrough) /*omit semi*/
1370
1371    // The following is a strange variation of the fast path which requires
1372    // one less register, because needed values are on the argument stack.
1373    // __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp,
1374    //                                  L_success, L_failure, NULL);
1375    assert_different_registers(sub_klass, temp);
1376
1377    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
1378                     Klass::secondary_super_cache_offset_in_bytes());
1379
1380    // if the pointers are equal, we are done (e.g., String[] elements)
1381    __ cmpptr(sub_klass, super_klass_addr);
1382    LOCAL_JCC(Assembler::equal, L_success);
1383
1384    // check the supertype display:
1385    __ movl2ptr(temp, super_check_offset_addr);
1386    Address super_check_addr(sub_klass, temp, Address::times_1, 0);
1387    __ movptr(temp, super_check_addr); // load displayed supertype
1388    __ cmpptr(temp, super_klass_addr); // test the super type
1389    LOCAL_JCC(Assembler::equal, L_success);
1390
1391    // if it was a primary super, we can just fail immediately
1392    __ cmpl(super_check_offset_addr, sc_offset);
1393    LOCAL_JCC(Assembler::notEqual, L_failure);
1394
1395    // The repne_scan instruction uses fixed registers, which will get spilled.
1396    // We happen to know this works best when super_klass is in rax.
1397    Register super_klass = temp;
1398    __ movptr(super_klass, super_klass_addr);
1399    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg,
1400                                     L_success, L_failure);
1401
1402    __ bind(L_fallthrough);
1403
1404    if (L_success == NULL) { BLOCK_COMMENT("L_success:"); }
1405    if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); }
1406
1407#undef LOCAL_JCC
1408  }
1409
1410  //
1411  //  Generate checkcasting array copy stub
1412  //
1413  //  Input:
1414  //    4(rsp)   - source array address
1415  //    8(rsp)   - destination array address
1416  //   12(rsp)   - element count, can be zero
1417  //   16(rsp)   - size_t ckoff (super_check_offset)
1418  //   20(rsp)   - oop ckval (super_klass)
1419  //
1420  //  Output:
1421  //    rax, ==  0  -  success
1422  //    rax, == -1^K - failure, where K is partial transfer count
1423  //
1424  address generate_checkcast_copy(const char *name, address* entry, bool dest_uninitialized = false) {
1425    __ align(CodeEntryAlignment);
1426    StubCodeMark mark(this, "StubRoutines", name);
1427    address start = __ pc();
1428
1429    Label L_load_element, L_store_element, L_do_card_marks, L_done;
1430
1431    // register use:
1432    //  rax, rdx, rcx -- loop control (end_from, end_to, count)
1433    //  rdi, rsi      -- element access (oop, klass)
1434    //  rbx,           -- temp
1435    const Register from       = rax;    // source array address
1436    const Register to         = rdx;    // destination array address
1437    const Register length     = rcx;    // elements count
1438    const Register elem       = rdi;    // each oop copied
1439    const Register elem_klass = rsi;    // each elem._klass (sub_klass)
1440    const Register temp       = rbx;    // lone remaining temp
1441
1442    __ enter(); // required for proper stackwalking of RuntimeStub frame
1443
1444    __ push(rsi);
1445    __ push(rdi);
1446    __ push(rbx);
1447
1448    Address   from_arg(rsp, 16+ 4);     // from
1449    Address     to_arg(rsp, 16+ 8);     // to
1450    Address length_arg(rsp, 16+12);     // elements count
1451    Address  ckoff_arg(rsp, 16+16);     // super_check_offset
1452    Address  ckval_arg(rsp, 16+20);     // super_klass
1453
1454    // Load up:
1455    __ movptr(from,     from_arg);
1456    __ movptr(to,         to_arg);
1457    __ movl2ptr(length, length_arg);
1458
1459    if (entry != NULL) {
1460      *entry = __ pc(); // Entry point from generic arraycopy stub.
1461      BLOCK_COMMENT("Entry:");
1462    }
1463
1464    //---------------------------------------------------------------
1465    // Assembler stub will be used for this call to arraycopy
1466    // if the two arrays are subtypes of Object[] but the
1467    // destination array type is not equal to or a supertype
1468    // of the source type.  Each element must be separately
1469    // checked.
1470
1471    // Loop-invariant addresses.  They are exclusive end pointers.
1472    Address end_from_addr(from, length, Address::times_ptr, 0);
1473    Address   end_to_addr(to,   length, Address::times_ptr, 0);
1474
1475    Register end_from = from;           // re-use
1476    Register end_to   = to;             // re-use
1477    Register count    = length;         // re-use
1478
1479    // Loop-variant addresses.  They assume post-incremented count < 0.
1480    Address from_element_addr(end_from, count, Address::times_ptr, 0);
1481    Address   to_element_addr(end_to,   count, Address::times_ptr, 0);
1482    Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes());
1483
1484    // Copy from low to high addresses, indexed from the end of each array.
1485    gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
1486    __ lea(end_from, end_from_addr);
1487    __ lea(end_to,   end_to_addr);
1488    assert(length == count, "");        // else fix next line:
1489    __ negptr(count);                   // negate and test the length
1490    __ jccb(Assembler::notZero, L_load_element);
1491
1492    // Empty array:  Nothing to do.
1493    __ xorptr(rax, rax);                  // return 0 on (trivial) success
1494    __ jmp(L_done);
1495
1496    // ======== begin loop ========
1497    // (Loop is rotated; its entry is L_load_element.)
1498    // Loop control:
1499    //   for (count = -count; count != 0; count++)
1500    // Base pointers src, dst are biased by 8*count,to last element.
1501    __ align(OptoLoopAlignment);
1502
1503    __ BIND(L_store_element);
1504    __ movptr(to_element_addr, elem);     // store the oop
1505    __ increment(count);                // increment the count toward zero
1506    __ jccb(Assembler::zero, L_do_card_marks);
1507
1508    // ======== loop entry is here ========
1509    __ BIND(L_load_element);
1510    __ movptr(elem, from_element_addr);   // load the oop
1511    __ testptr(elem, elem);
1512    __ jccb(Assembler::zero, L_store_element);
1513
1514    // (Could do a trick here:  Remember last successful non-null
1515    // element stored and make a quick oop equality check on it.)
1516
1517    __ movptr(elem_klass, elem_klass_addr); // query the object klass
1518    generate_type_check(elem_klass, ckoff_arg, ckval_arg, temp,
1519                        &L_store_element, NULL);
1520      // (On fall-through, we have failed the element type check.)
1521    // ======== end loop ========
1522
1523    // It was a real error; we must depend on the caller to finish the job.
1524    // Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops.
1525    // Emit GC store barriers for the oops we have copied (length_arg + count),
1526    // and report their number to the caller.
1527    __ addl(count, length_arg);         // transfers = (length - remaining)
1528    __ movl2ptr(rax, count);            // save the value
1529    __ notptr(rax);                     // report (-1^K) to caller
1530    __ movptr(to, to_arg);              // reload
1531    assert_different_registers(to, count, rax);
1532    gen_write_ref_array_post_barrier(to, count);
1533    __ jmpb(L_done);
1534
1535    // Come here on success only.
1536    __ BIND(L_do_card_marks);
1537    __ movl2ptr(count, length_arg);
1538    __ movptr(to, to_arg);                // reload
1539    gen_write_ref_array_post_barrier(to, count);
1540    __ xorptr(rax, rax);                  // return 0 on success
1541
1542    // Common exit point (success or failure).
1543    __ BIND(L_done);
1544    __ pop(rbx);
1545    __ pop(rdi);
1546    __ pop(rsi);
1547    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
1548    __ leave(); // required for proper stackwalking of RuntimeStub frame
1549    __ ret(0);
1550
1551    return start;
1552  }
1553
1554  //
1555  //  Generate 'unsafe' array copy stub
1556  //  Though just as safe as the other stubs, it takes an unscaled
1557  //  size_t argument instead of an element count.
1558  //
1559  //  Input:
1560  //    4(rsp)   - source array address
1561  //    8(rsp)   - destination array address
1562  //   12(rsp)   - byte count, can be zero
1563  //
1564  //  Output:
1565  //    rax, ==  0  -  success
1566  //    rax, == -1  -  need to call System.arraycopy
1567  //
1568  // Examines the alignment of the operands and dispatches
1569  // to a long, int, short, or byte copy loop.
1570  //
1571  address generate_unsafe_copy(const char *name,
1572                               address byte_copy_entry,
1573                               address short_copy_entry,
1574                               address int_copy_entry,
1575                               address long_copy_entry) {
1576
1577    Label L_long_aligned, L_int_aligned, L_short_aligned;
1578
1579    __ align(CodeEntryAlignment);
1580    StubCodeMark mark(this, "StubRoutines", name);
1581    address start = __ pc();
1582
1583    const Register from       = rax;  // source array address
1584    const Register to         = rdx;  // destination array address
1585    const Register count      = rcx;  // elements count
1586
1587    __ enter(); // required for proper stackwalking of RuntimeStub frame
1588    __ push(rsi);
1589    __ push(rdi);
1590    Address  from_arg(rsp, 12+ 4);      // from
1591    Address    to_arg(rsp, 12+ 8);      // to
1592    Address count_arg(rsp, 12+12);      // byte count
1593
1594    // Load up:
1595    __ movptr(from ,  from_arg);
1596    __ movptr(to   ,    to_arg);
1597    __ movl2ptr(count, count_arg);
1598
1599    // bump this on entry, not on exit:
1600    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
1601
1602    const Register bits = rsi;
1603    __ mov(bits, from);
1604    __ orptr(bits, to);
1605    __ orptr(bits, count);
1606
1607    __ testl(bits, BytesPerLong-1);
1608    __ jccb(Assembler::zero, L_long_aligned);
1609
1610    __ testl(bits, BytesPerInt-1);
1611    __ jccb(Assembler::zero, L_int_aligned);
1612
1613    __ testl(bits, BytesPerShort-1);
1614    __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
1615
1616    __ BIND(L_short_aligned);
1617    __ shrptr(count, LogBytesPerShort); // size => short_count
1618    __ movl(count_arg, count);          // update 'count'
1619    __ jump(RuntimeAddress(short_copy_entry));
1620
1621    __ BIND(L_int_aligned);
1622    __ shrptr(count, LogBytesPerInt); // size => int_count
1623    __ movl(count_arg, count);          // update 'count'
1624    __ jump(RuntimeAddress(int_copy_entry));
1625
1626    __ BIND(L_long_aligned);
1627    __ shrptr(count, LogBytesPerLong); // size => qword_count
1628    __ movl(count_arg, count);          // update 'count'
1629    __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it.
1630    __ pop(rsi);
1631    __ jump(RuntimeAddress(long_copy_entry));
1632
1633    return start;
1634  }
1635
1636
1637  // Perform range checks on the proposed arraycopy.
1638  // Smashes src_pos and dst_pos.  (Uses them up for temps.)
1639  void arraycopy_range_checks(Register src,
1640                              Register src_pos,
1641                              Register dst,
1642                              Register dst_pos,
1643                              Address& length,
1644                              Label& L_failed) {
1645    BLOCK_COMMENT("arraycopy_range_checks:");
1646    const Register src_end = src_pos;   // source array end position
1647    const Register dst_end = dst_pos;   // destination array end position
1648    __ addl(src_end, length); // src_pos + length
1649    __ addl(dst_end, length); // dst_pos + length
1650
1651    //  if (src_pos + length > arrayOop(src)->length() ) FAIL;
1652    __ cmpl(src_end, Address(src, arrayOopDesc::length_offset_in_bytes()));
1653    __ jcc(Assembler::above, L_failed);
1654
1655    //  if (dst_pos + length > arrayOop(dst)->length() ) FAIL;
1656    __ cmpl(dst_end, Address(dst, arrayOopDesc::length_offset_in_bytes()));
1657    __ jcc(Assembler::above, L_failed);
1658
1659    BLOCK_COMMENT("arraycopy_range_checks done");
1660  }
1661
1662
1663  //
1664  //  Generate generic array copy stubs
1665  //
1666  //  Input:
1667  //     4(rsp)    -  src oop
1668  //     8(rsp)    -  src_pos
1669  //    12(rsp)    -  dst oop
1670  //    16(rsp)    -  dst_pos
1671  //    20(rsp)    -  element count
1672  //
1673  //  Output:
1674  //    rax, ==  0  -  success
1675  //    rax, == -1^K - failure, where K is partial transfer count
1676  //
1677  address generate_generic_copy(const char *name,
1678                                address entry_jbyte_arraycopy,
1679                                address entry_jshort_arraycopy,
1680                                address entry_jint_arraycopy,
1681                                address entry_oop_arraycopy,
1682                                address entry_jlong_arraycopy,
1683                                address entry_checkcast_arraycopy) {
1684    Label L_failed, L_failed_0, L_objArray;
1685
1686    { int modulus = CodeEntryAlignment;
1687      int target  = modulus - 5; // 5 = sizeof jmp(L_failed)
1688      int advance = target - (__ offset() % modulus);
1689      if (advance < 0)  advance += modulus;
1690      if (advance > 0)  __ nop(advance);
1691    }
1692    StubCodeMark mark(this, "StubRoutines", name);
1693
1694    // Short-hop target to L_failed.  Makes for denser prologue code.
1695    __ BIND(L_failed_0);
1696    __ jmp(L_failed);
1697    assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed");
1698
1699    __ align(CodeEntryAlignment);
1700    address start = __ pc();
1701
1702    __ enter(); // required for proper stackwalking of RuntimeStub frame
1703    __ push(rsi);
1704    __ push(rdi);
1705
1706    // bump this on entry, not on exit:
1707    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
1708
1709    // Input values
1710    Address SRC     (rsp, 12+ 4);
1711    Address SRC_POS (rsp, 12+ 8);
1712    Address DST     (rsp, 12+12);
1713    Address DST_POS (rsp, 12+16);
1714    Address LENGTH  (rsp, 12+20);
1715
1716    //-----------------------------------------------------------------------
1717    // Assembler stub will be used for this call to arraycopy
1718    // if the following conditions are met:
1719    //
1720    // (1) src and dst must not be null.
1721    // (2) src_pos must not be negative.
1722    // (3) dst_pos must not be negative.
1723    // (4) length  must not be negative.
1724    // (5) src klass and dst klass should be the same and not NULL.
1725    // (6) src and dst should be arrays.
1726    // (7) src_pos + length must not exceed length of src.
1727    // (8) dst_pos + length must not exceed length of dst.
1728    //
1729
1730    const Register src     = rax;       // source array oop
1731    const Register src_pos = rsi;
1732    const Register dst     = rdx;       // destination array oop
1733    const Register dst_pos = rdi;
1734    const Register length  = rcx;       // transfer count
1735
1736    //  if (src == NULL) return -1;
1737    __ movptr(src, SRC);      // src oop
1738    __ testptr(src, src);
1739    __ jccb(Assembler::zero, L_failed_0);
1740
1741    //  if (src_pos < 0) return -1;
1742    __ movl2ptr(src_pos, SRC_POS);  // src_pos
1743    __ testl(src_pos, src_pos);
1744    __ jccb(Assembler::negative, L_failed_0);
1745
1746    //  if (dst == NULL) return -1;
1747    __ movptr(dst, DST);      // dst oop
1748    __ testptr(dst, dst);
1749    __ jccb(Assembler::zero, L_failed_0);
1750
1751    //  if (dst_pos < 0) return -1;
1752    __ movl2ptr(dst_pos, DST_POS);  // dst_pos
1753    __ testl(dst_pos, dst_pos);
1754    __ jccb(Assembler::negative, L_failed_0);
1755
1756    //  if (length < 0) return -1;
1757    __ movl2ptr(length, LENGTH);   // length
1758    __ testl(length, length);
1759    __ jccb(Assembler::negative, L_failed_0);
1760
1761    //  if (src->klass() == NULL) return -1;
1762    Address src_klass_addr(src, oopDesc::klass_offset_in_bytes());
1763    Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes());
1764    const Register rcx_src_klass = rcx;    // array klass
1765    __ movptr(rcx_src_klass, Address(src, oopDesc::klass_offset_in_bytes()));
1766
1767#ifdef ASSERT
1768    //  assert(src->klass() != NULL);
1769    BLOCK_COMMENT("assert klasses not null");
1770    { Label L1, L2;
1771      __ testptr(rcx_src_klass, rcx_src_klass);
1772      __ jccb(Assembler::notZero, L2);   // it is broken if klass is NULL
1773      __ bind(L1);
1774      __ stop("broken null klass");
1775      __ bind(L2);
1776      __ cmpptr(dst_klass_addr, (int32_t)NULL_WORD);
1777      __ jccb(Assembler::equal, L1);      // this would be broken also
1778      BLOCK_COMMENT("assert done");
1779    }
1780#endif //ASSERT
1781
1782    // Load layout helper (32-bits)
1783    //
1784    //  |array_tag|     | header_size | element_type |     |log2_element_size|
1785    // 32        30    24            16              8     2                 0
1786    //
1787    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
1788    //
1789
1790    int lh_offset = klassOopDesc::header_size() * HeapWordSize +
1791                    Klass::layout_helper_offset_in_bytes();
1792    Address src_klass_lh_addr(rcx_src_klass, lh_offset);
1793
1794    // Handle objArrays completely differently...
1795    jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
1796    __ cmpl(src_klass_lh_addr, objArray_lh);
1797    __ jcc(Assembler::equal, L_objArray);
1798
1799    //  if (src->klass() != dst->klass()) return -1;
1800    __ cmpptr(rcx_src_klass, dst_klass_addr);
1801    __ jccb(Assembler::notEqual, L_failed_0);
1802
1803    const Register rcx_lh = rcx;  // layout helper
1804    assert(rcx_lh == rcx_src_klass, "known alias");
1805    __ movl(rcx_lh, src_klass_lh_addr);
1806
1807    //  if (!src->is_Array()) return -1;
1808    __ cmpl(rcx_lh, Klass::_lh_neutral_value);
1809    __ jcc(Assembler::greaterEqual, L_failed_0); // signed cmp
1810
1811    // At this point, it is known to be a typeArray (array_tag 0x3).
1812#ifdef ASSERT
1813    { Label L;
1814      __ cmpl(rcx_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
1815      __ jcc(Assembler::greaterEqual, L); // signed cmp
1816      __ stop("must be a primitive array");
1817      __ bind(L);
1818    }
1819#endif
1820
1821    assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh);
1822    arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
1823
1824    // typeArrayKlass
1825    //
1826    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
1827    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
1828    //
1829    const Register rsi_offset = rsi; // array offset
1830    const Register src_array  = src; // src array offset
1831    const Register dst_array  = dst; // dst array offset
1832    const Register rdi_elsize = rdi; // log2 element size
1833
1834    __ mov(rsi_offset, rcx_lh);
1835    __ shrptr(rsi_offset, Klass::_lh_header_size_shift);
1836    __ andptr(rsi_offset, Klass::_lh_header_size_mask);   // array_offset
1837    __ addptr(src_array, rsi_offset);  // src array offset
1838    __ addptr(dst_array, rsi_offset);  // dst array offset
1839    __ andptr(rcx_lh, Klass::_lh_log2_element_size_mask); // log2 elsize
1840
1841    // next registers should be set before the jump to corresponding stub
1842    const Register from       = src; // source array address
1843    const Register to         = dst; // destination array address
1844    const Register count      = rcx; // elements count
1845    // some of them should be duplicated on stack
1846#define FROM   Address(rsp, 12+ 4)
1847#define TO     Address(rsp, 12+ 8)   // Not used now
1848#define COUNT  Address(rsp, 12+12)   // Only for oop arraycopy
1849
1850    BLOCK_COMMENT("scale indexes to element size");
1851    __ movl2ptr(rsi, SRC_POS);  // src_pos
1852    __ shlptr(rsi);             // src_pos << rcx (log2 elsize)
1853    assert(src_array == from, "");
1854    __ addptr(from, rsi);       // from = src_array + SRC_POS << log2 elsize
1855    __ movl2ptr(rdi, DST_POS);  // dst_pos
1856    __ shlptr(rdi);             // dst_pos << rcx (log2 elsize)
1857    assert(dst_array == to, "");
1858    __ addptr(to,  rdi);        // to   = dst_array + DST_POS << log2 elsize
1859    __ movptr(FROM, from);      // src_addr
1860    __ mov(rdi_elsize, rcx_lh); // log2 elsize
1861    __ movl2ptr(count, LENGTH); // elements count
1862
1863    BLOCK_COMMENT("choose copy loop based on element size");
1864    __ cmpl(rdi_elsize, 0);
1865
1866    __ jump_cc(Assembler::equal, RuntimeAddress(entry_jbyte_arraycopy));
1867    __ cmpl(rdi_elsize, LogBytesPerShort);
1868    __ jump_cc(Assembler::equal, RuntimeAddress(entry_jshort_arraycopy));
1869    __ cmpl(rdi_elsize, LogBytesPerInt);
1870    __ jump_cc(Assembler::equal, RuntimeAddress(entry_jint_arraycopy));
1871#ifdef ASSERT
1872    __ cmpl(rdi_elsize, LogBytesPerLong);
1873    __ jccb(Assembler::notEqual, L_failed);
1874#endif
1875    __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it.
1876    __ pop(rsi);
1877    __ jump(RuntimeAddress(entry_jlong_arraycopy));
1878
1879  __ BIND(L_failed);
1880    __ xorptr(rax, rax);
1881    __ notptr(rax); // return -1
1882    __ pop(rdi);
1883    __ pop(rsi);
1884    __ leave(); // required for proper stackwalking of RuntimeStub frame
1885    __ ret(0);
1886
1887    // objArrayKlass
1888  __ BIND(L_objArray);
1889    // live at this point:  rcx_src_klass, src[_pos], dst[_pos]
1890
1891    Label L_plain_copy, L_checkcast_copy;
1892    //  test array classes for subtyping
1893    __ cmpptr(rcx_src_klass, dst_klass_addr); // usual case is exact equality
1894    __ jccb(Assembler::notEqual, L_checkcast_copy);
1895
1896    // Identically typed arrays can be copied without element-wise checks.
1897    assert_different_registers(src, src_pos, dst, dst_pos, rcx_src_klass);
1898    arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
1899
1900  __ BIND(L_plain_copy);
1901    __ movl2ptr(count, LENGTH); // elements count
1902    __ movl2ptr(src_pos, SRC_POS);  // reload src_pos
1903    __ lea(from, Address(src, src_pos, Address::times_ptr,
1904                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
1905    __ movl2ptr(dst_pos, DST_POS);  // reload dst_pos
1906    __ lea(to,   Address(dst, dst_pos, Address::times_ptr,
1907                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
1908    __ movptr(FROM,  from);   // src_addr
1909    __ movptr(TO,    to);     // dst_addr
1910    __ movl(COUNT, count);  // count
1911    __ jump(RuntimeAddress(entry_oop_arraycopy));
1912
1913  __ BIND(L_checkcast_copy);
1914    // live at this point:  rcx_src_klass, dst[_pos], src[_pos]
1915    {
1916      // Handy offsets:
1917      int  ek_offset = (klassOopDesc::header_size() * HeapWordSize +
1918                        objArrayKlass::element_klass_offset_in_bytes());
1919      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
1920                        Klass::super_check_offset_offset_in_bytes());
1921
1922      Register rsi_dst_klass = rsi;
1923      Register rdi_temp      = rdi;
1924      assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos");
1925      assert(rdi_temp      == dst_pos, "expected alias w/ dst_pos");
1926      Address dst_klass_lh_addr(rsi_dst_klass, lh_offset);
1927
1928      // Before looking at dst.length, make sure dst is also an objArray.
1929      __ movptr(rsi_dst_klass, dst_klass_addr);
1930      __ cmpl(dst_klass_lh_addr, objArray_lh);
1931      __ jccb(Assembler::notEqual, L_failed);
1932
1933      // It is safe to examine both src.length and dst.length.
1934      __ movl2ptr(src_pos, SRC_POS);        // reload rsi
1935      arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
1936      // (Now src_pos and dst_pos are killed, but not src and dst.)
1937
1938      // We'll need this temp (don't forget to pop it after the type check).
1939      __ push(rbx);
1940      Register rbx_src_klass = rbx;
1941
1942      __ mov(rbx_src_klass, rcx_src_klass); // spill away from rcx
1943      __ movptr(rsi_dst_klass, dst_klass_addr);
1944      Address super_check_offset_addr(rsi_dst_klass, sco_offset);
1945      Label L_fail_array_check;
1946      generate_type_check(rbx_src_klass,
1947                          super_check_offset_addr, dst_klass_addr,
1948                          rdi_temp, NULL, &L_fail_array_check);
1949      // (On fall-through, we have passed the array type check.)
1950      __ pop(rbx);
1951      __ jmp(L_plain_copy);
1952
1953      __ BIND(L_fail_array_check);
1954      // Reshuffle arguments so we can call checkcast_arraycopy:
1955
1956      // match initial saves for checkcast_arraycopy
1957      // push(rsi);    // already done; see above
1958      // push(rdi);    // already done; see above
1959      // push(rbx);    // already done; see above
1960
1961      // Marshal outgoing arguments now, freeing registers.
1962      Address   from_arg(rsp, 16+ 4);   // from
1963      Address     to_arg(rsp, 16+ 8);   // to
1964      Address length_arg(rsp, 16+12);   // elements count
1965      Address  ckoff_arg(rsp, 16+16);   // super_check_offset
1966      Address  ckval_arg(rsp, 16+20);   // super_klass
1967
1968      Address SRC_POS_arg(rsp, 16+ 8);
1969      Address DST_POS_arg(rsp, 16+16);
1970      Address  LENGTH_arg(rsp, 16+20);
1971      // push rbx, changed the incoming offsets (why not just use rbp,??)
1972      // assert(SRC_POS_arg.disp() == SRC_POS.disp() + 4, "");
1973
1974      __ movptr(rbx, Address(rsi_dst_klass, ek_offset));
1975      __ movl2ptr(length, LENGTH_arg);    // reload elements count
1976      __ movl2ptr(src_pos, SRC_POS_arg);  // reload src_pos
1977      __ movl2ptr(dst_pos, DST_POS_arg);  // reload dst_pos
1978
1979      __ movptr(ckval_arg, rbx);          // destination element type
1980      __ movl(rbx, Address(rbx, sco_offset));
1981      __ movl(ckoff_arg, rbx);          // corresponding class check offset
1982
1983      __ movl(length_arg, length);      // outgoing length argument
1984
1985      __ lea(from, Address(src, src_pos, Address::times_ptr,
1986                            arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
1987      __ movptr(from_arg, from);
1988
1989      __ lea(to, Address(dst, dst_pos, Address::times_ptr,
1990                          arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
1991      __ movptr(to_arg, to);
1992      __ jump(RuntimeAddress(entry_checkcast_arraycopy));
1993    }
1994
1995    return start;
1996  }
1997
1998  void generate_arraycopy_stubs() {
1999    address entry;
2000    address entry_jbyte_arraycopy;
2001    address entry_jshort_arraycopy;
2002    address entry_jint_arraycopy;
2003    address entry_oop_arraycopy;
2004    address entry_jlong_arraycopy;
2005    address entry_checkcast_arraycopy;
2006
2007    StubRoutines::_arrayof_jbyte_disjoint_arraycopy =
2008        generate_disjoint_copy(T_BYTE,  true, Address::times_1, &entry,
2009                               "arrayof_jbyte_disjoint_arraycopy");
2010    StubRoutines::_arrayof_jbyte_arraycopy =
2011        generate_conjoint_copy(T_BYTE,  true, Address::times_1,  entry,
2012                               NULL, "arrayof_jbyte_arraycopy");
2013    StubRoutines::_jbyte_disjoint_arraycopy =
2014        generate_disjoint_copy(T_BYTE, false, Address::times_1, &entry,
2015                               "jbyte_disjoint_arraycopy");
2016    StubRoutines::_jbyte_arraycopy =
2017        generate_conjoint_copy(T_BYTE, false, Address::times_1,  entry,
2018                               &entry_jbyte_arraycopy, "jbyte_arraycopy");
2019
2020    StubRoutines::_arrayof_jshort_disjoint_arraycopy =
2021        generate_disjoint_copy(T_SHORT,  true, Address::times_2, &entry,
2022                               "arrayof_jshort_disjoint_arraycopy");
2023    StubRoutines::_arrayof_jshort_arraycopy =
2024        generate_conjoint_copy(T_SHORT,  true, Address::times_2,  entry,
2025                               NULL, "arrayof_jshort_arraycopy");
2026    StubRoutines::_jshort_disjoint_arraycopy =
2027        generate_disjoint_copy(T_SHORT, false, Address::times_2, &entry,
2028                               "jshort_disjoint_arraycopy");
2029    StubRoutines::_jshort_arraycopy =
2030        generate_conjoint_copy(T_SHORT, false, Address::times_2,  entry,
2031                               &entry_jshort_arraycopy, "jshort_arraycopy");
2032
2033    // Next arrays are always aligned on 4 bytes at least.
2034    StubRoutines::_jint_disjoint_arraycopy =
2035        generate_disjoint_copy(T_INT, true, Address::times_4, &entry,
2036                               "jint_disjoint_arraycopy");
2037    StubRoutines::_jint_arraycopy =
2038        generate_conjoint_copy(T_INT, true, Address::times_4,  entry,
2039                               &entry_jint_arraycopy, "jint_arraycopy");
2040
2041    StubRoutines::_oop_disjoint_arraycopy =
2042        generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry,
2043                               "oop_disjoint_arraycopy");
2044    StubRoutines::_oop_arraycopy =
2045        generate_conjoint_copy(T_OBJECT, true, Address::times_ptr,  entry,
2046                               &entry_oop_arraycopy, "oop_arraycopy");
2047
2048    StubRoutines::_oop_disjoint_arraycopy_uninit =
2049        generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry,
2050                               "oop_disjoint_arraycopy_uninit",
2051                               /*dest_uninitialized*/true);
2052    StubRoutines::_oop_arraycopy_uninit =
2053        generate_conjoint_copy(T_OBJECT, true, Address::times_ptr,  entry,
2054                               NULL, "oop_arraycopy_uninit",
2055                               /*dest_uninitialized*/true);
2056
2057    StubRoutines::_jlong_disjoint_arraycopy =
2058        generate_disjoint_long_copy(&entry, "jlong_disjoint_arraycopy");
2059    StubRoutines::_jlong_arraycopy =
2060        generate_conjoint_long_copy(entry, &entry_jlong_arraycopy,
2061                                    "jlong_arraycopy");
2062
2063    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
2064    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
2065    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
2066    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
2067    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
2068    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
2069
2070    StubRoutines::_arrayof_jint_disjoint_arraycopy       = StubRoutines::_jint_disjoint_arraycopy;
2071    StubRoutines::_arrayof_oop_disjoint_arraycopy        = StubRoutines::_oop_disjoint_arraycopy;
2072    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
2073    StubRoutines::_arrayof_jlong_disjoint_arraycopy      = StubRoutines::_jlong_disjoint_arraycopy;
2074
2075    StubRoutines::_arrayof_jint_arraycopy       = StubRoutines::_jint_arraycopy;
2076    StubRoutines::_arrayof_oop_arraycopy        = StubRoutines::_oop_arraycopy;
2077    StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
2078    StubRoutines::_arrayof_jlong_arraycopy      = StubRoutines::_jlong_arraycopy;
2079
2080    StubRoutines::_checkcast_arraycopy =
2081        generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
2082    StubRoutines::_checkcast_arraycopy_uninit =
2083        generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, /*dest_uninitialized*/true);
2084
2085    StubRoutines::_unsafe_arraycopy =
2086        generate_unsafe_copy("unsafe_arraycopy",
2087                               entry_jbyte_arraycopy,
2088                               entry_jshort_arraycopy,
2089                               entry_jint_arraycopy,
2090                               entry_jlong_arraycopy);
2091
2092    StubRoutines::_generic_arraycopy =
2093        generate_generic_copy("generic_arraycopy",
2094                               entry_jbyte_arraycopy,
2095                               entry_jshort_arraycopy,
2096                               entry_jint_arraycopy,
2097                               entry_oop_arraycopy,
2098                               entry_jlong_arraycopy,
2099                               entry_checkcast_arraycopy);
2100  }
2101
2102  void generate_math_stubs() {
2103    {
2104      StubCodeMark mark(this, "StubRoutines", "log");
2105      StubRoutines::_intrinsic_log = (double (*)(double)) __ pc();
2106
2107      __ fld_d(Address(rsp, 4));
2108      __ flog();
2109      __ ret(0);
2110    }
2111    {
2112      StubCodeMark mark(this, "StubRoutines", "log10");
2113      StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
2114
2115      __ fld_d(Address(rsp, 4));
2116      __ flog10();
2117      __ ret(0);
2118    }
2119    {
2120      StubCodeMark mark(this, "StubRoutines", "sin");
2121      StubRoutines::_intrinsic_sin = (double (*)(double))  __ pc();
2122
2123      __ fld_d(Address(rsp, 4));
2124      __ trigfunc('s');
2125      __ ret(0);
2126    }
2127    {
2128      StubCodeMark mark(this, "StubRoutines", "cos");
2129      StubRoutines::_intrinsic_cos = (double (*)(double)) __ pc();
2130
2131      __ fld_d(Address(rsp, 4));
2132      __ trigfunc('c');
2133      __ ret(0);
2134    }
2135    {
2136      StubCodeMark mark(this, "StubRoutines", "tan");
2137      StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
2138
2139      __ fld_d(Address(rsp, 4));
2140      __ trigfunc('t');
2141      __ ret(0);
2142    }
2143
2144    // The intrinsic version of these seem to return the same value as
2145    // the strict version.
2146    StubRoutines::_intrinsic_exp = SharedRuntime::dexp;
2147    StubRoutines::_intrinsic_pow = SharedRuntime::dpow;
2148  }
2149
2150 public:
2151  // Information about frame layout at time of blocking runtime call.
2152  // Note that we only have to preserve callee-saved registers since
2153  // the compilers are responsible for supplying a continuation point
2154  // if they expect all registers to be preserved.
2155  enum layout {
2156    thread_off,    // last_java_sp
2157    arg1_off,
2158    arg2_off,
2159    rbp_off,       // callee saved register
2160    ret_pc,
2161    framesize
2162  };
2163
2164 private:
2165
2166#undef  __
2167#define __ masm->
2168
2169  //------------------------------------------------------------------------------------------------------------------------
2170  // Continuation point for throwing of implicit exceptions that are not handled in
2171  // the current activation. Fabricates an exception oop and initiates normal
2172  // exception dispatching in this frame.
2173  //
2174  // Previously the compiler (c2) allowed for callee save registers on Java calls.
2175  // This is no longer true after adapter frames were removed but could possibly
2176  // be brought back in the future if the interpreter code was reworked and it
2177  // was deemed worthwhile. The comment below was left to describe what must
2178  // happen here if callee saves were resurrected. As it stands now this stub
2179  // could actually be a vanilla BufferBlob and have now oopMap at all.
2180  // Since it doesn't make much difference we've chosen to leave it the
2181  // way it was in the callee save days and keep the comment.
2182
2183  // If we need to preserve callee-saved values we need a callee-saved oop map and
2184  // therefore have to make these stubs into RuntimeStubs rather than BufferBlobs.
2185  // If the compiler needs all registers to be preserved between the fault
2186  // point and the exception handler then it must assume responsibility for that in
2187  // AbstractCompiler::continuation_for_implicit_null_exception or
2188  // continuation_for_implicit_division_by_zero_exception. All other implicit
2189  // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are
2190  // either at call sites or otherwise assume that stack unwinding will be initiated,
2191  // so caller saved registers were assumed volatile in the compiler.
2192  address generate_throw_exception(const char* name, address runtime_entry,
2193                                   Register arg1 = noreg, Register arg2 = noreg) {
2194
2195    int insts_size = 256;
2196    int locs_size  = 32;
2197
2198    CodeBuffer code(name, insts_size, locs_size);
2199    OopMapSet* oop_maps  = new OopMapSet();
2200    MacroAssembler* masm = new MacroAssembler(&code);
2201
2202    address start = __ pc();
2203
2204    // This is an inlined and slightly modified version of call_VM
2205    // which has the ability to fetch the return PC out of
2206    // thread-local storage and also sets up last_Java_sp slightly
2207    // differently than the real call_VM
2208    Register java_thread = rbx;
2209    __ get_thread(java_thread);
2210
2211    __ enter(); // required for proper stackwalking of RuntimeStub frame
2212
2213    // pc and rbp, already pushed
2214    __ subptr(rsp, (framesize-2) * wordSize); // prolog
2215
2216    // Frame is now completed as far as size and linkage.
2217
2218    int frame_complete = __ pc() - start;
2219
2220    // push java thread (becomes first argument of C function)
2221    __ movptr(Address(rsp, thread_off * wordSize), java_thread);
2222    if (arg1 != noreg) {
2223      __ movptr(Address(rsp, arg1_off * wordSize), arg1);
2224    }
2225    if (arg2 != noreg) {
2226      assert(arg1 != noreg, "missing reg arg");
2227      __ movptr(Address(rsp, arg2_off * wordSize), arg2);
2228    }
2229
2230    // Set up last_Java_sp and last_Java_fp
2231    __ set_last_Java_frame(java_thread, rsp, rbp, NULL);
2232
2233    // Call runtime
2234    BLOCK_COMMENT("call runtime_entry");
2235    __ call(RuntimeAddress(runtime_entry));
2236    // Generate oop map
2237    OopMap* map =  new OopMap(framesize, 0);
2238    oop_maps->add_gc_map(__ pc() - start, map);
2239
2240    // restore the thread (cannot use the pushed argument since arguments
2241    // may be overwritten by C code generated by an optimizing compiler);
2242    // however can use the register value directly if it is callee saved.
2243    __ get_thread(java_thread);
2244
2245    __ reset_last_Java_frame(java_thread, true, false);
2246
2247    __ leave(); // required for proper stackwalking of RuntimeStub frame
2248
2249    // check for pending exceptions
2250#ifdef ASSERT
2251    Label L;
2252    __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
2253    __ jcc(Assembler::notEqual, L);
2254    __ should_not_reach_here();
2255    __ bind(L);
2256#endif /* ASSERT */
2257    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2258
2259
2260    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, framesize, oop_maps, false);
2261    return stub->entry_point();
2262  }
2263
2264
2265  void create_control_words() {
2266    // Round to nearest, 53-bit mode, exceptions masked
2267    StubRoutines::_fpu_cntrl_wrd_std   = 0x027F;
2268    // Round to zero, 53-bit mode, exception mased
2269    StubRoutines::_fpu_cntrl_wrd_trunc = 0x0D7F;
2270    // Round to nearest, 24-bit mode, exceptions masked
2271    StubRoutines::_fpu_cntrl_wrd_24    = 0x007F;
2272    // Round to nearest, 64-bit mode, exceptions masked
2273    StubRoutines::_fpu_cntrl_wrd_64    = 0x037F;
2274    // Round to nearest, 64-bit mode, exceptions masked
2275    StubRoutines::_mxcsr_std           = 0x1F80;
2276    // Note: the following two constants are 80-bit values
2277    //       layout is critical for correct loading by FPU.
2278    // Bias for strict fp multiply/divide
2279    StubRoutines::_fpu_subnormal_bias1[0]= 0x00000000; // 2^(-15360) == 0x03ff 8000 0000 0000 0000
2280    StubRoutines::_fpu_subnormal_bias1[1]= 0x80000000;
2281    StubRoutines::_fpu_subnormal_bias1[2]= 0x03ff;
2282    // Un-Bias for strict fp multiply/divide
2283    StubRoutines::_fpu_subnormal_bias2[0]= 0x00000000; // 2^(+15360) == 0x7bff 8000 0000 0000 0000
2284    StubRoutines::_fpu_subnormal_bias2[1]= 0x80000000;
2285    StubRoutines::_fpu_subnormal_bias2[2]= 0x7bff;
2286  }
2287
2288  //---------------------------------------------------------------------------
2289  // Initialization
2290
2291  void generate_initial() {
2292    // Generates all stubs and initializes the entry points
2293
2294    //------------------------------------------------------------------------------------------------------------------------
2295    // entry points that exist in all platforms
2296    // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
2297    //       the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
2298    StubRoutines::_forward_exception_entry      = generate_forward_exception();
2299
2300    StubRoutines::_call_stub_entry              =
2301      generate_call_stub(StubRoutines::_call_stub_return_address);
2302    // is referenced by megamorphic call
2303    StubRoutines::_catch_exception_entry        = generate_catch_exception();
2304
2305    // These are currently used by Solaris/Intel
2306    StubRoutines::_atomic_xchg_entry            = generate_atomic_xchg();
2307
2308    StubRoutines::_handler_for_unsafe_access_entry =
2309      generate_handler_for_unsafe_access();
2310
2311    // platform dependent
2312    create_control_words();
2313
2314    StubRoutines::x86::_verify_mxcsr_entry                 = generate_verify_mxcsr();
2315    StubRoutines::x86::_verify_fpu_cntrl_wrd_entry         = generate_verify_fpu_cntrl_wrd();
2316    StubRoutines::_d2i_wrapper                              = generate_d2i_wrapper(T_INT,
2317                                                                                   CAST_FROM_FN_PTR(address, SharedRuntime::d2i));
2318    StubRoutines::_d2l_wrapper                              = generate_d2i_wrapper(T_LONG,
2319                                                                                   CAST_FROM_FN_PTR(address, SharedRuntime::d2l));
2320
2321    // Build this early so it's available for the interpreter
2322    StubRoutines::_throw_WrongMethodTypeException_entry =
2323      generate_throw_exception("WrongMethodTypeException throw_exception",
2324                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException),
2325                               rax, rcx);
2326  }
2327
2328
2329  void generate_all() {
2330    // Generates all stubs and initializes the entry points
2331
2332    // These entry points require SharedInfo::stack0 to be set up in non-core builds
2333    // and need to be relocatable, so they each fabricate a RuntimeStub internally.
2334    StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError));
2335    StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError));
2336    StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call));
2337    StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
2338
2339    //------------------------------------------------------------------------------------------------------------------------
2340    // entry points that are platform specific
2341
2342    // support for verify_oop (must happen after universe_init)
2343    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
2344
2345    // arraycopy stubs used by compilers
2346    generate_arraycopy_stubs();
2347
2348    generate_math_stubs();
2349  }
2350
2351
2352 public:
2353  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
2354    if (all) {
2355      generate_all();
2356    } else {
2357      generate_initial();
2358    }
2359  }
2360}; // end class declaration
2361
2362
2363void StubGenerator_generate(CodeBuffer* code, bool all) {
2364  StubGenerator g(code, all);
2365}
2366