stubGenerator_x86_32.cpp revision 647:bd441136a5ce
1/*
2 * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
24
25#include "incls/_precompiled.incl"
26#include "incls/_stubGenerator_x86_32.cpp.incl"
27
28// Declaration and definition of StubGenerator (no .hpp file).
29// For a more detailed description of the stub routine structure
30// see the comment in stubRoutines.hpp
31
32#define __ _masm->
33#define a__ ((Assembler*)_masm)->
34
35#ifdef PRODUCT
36#define BLOCK_COMMENT(str) /* nothing */
37#else
38#define BLOCK_COMMENT(str) __ block_comment(str)
39#endif
40
41#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
42
43const int MXCSR_MASK  = 0xFFC0;  // Mask out any pending exceptions
44const int FPU_CNTRL_WRD_MASK = 0xFFFF;
45
46// -------------------------------------------------------------------------------------------------------------------------
47// Stub Code definitions
48
49static address handle_unsafe_access() {
50  JavaThread* thread = JavaThread::current();
51  address pc  = thread->saved_exception_pc();
52  // pc is the instruction which we must emulate
53  // doing a no-op is fine:  return garbage from the load
54  // therefore, compute npc
55  address npc = Assembler::locate_next_instruction(pc);
56
57  // request an async exception
58  thread->set_pending_unsafe_access_error();
59
60  // return address of next instruction to execute
61  return npc;
62}
63
64class StubGenerator: public StubCodeGenerator {
65 private:
66
67#ifdef PRODUCT
68#define inc_counter_np(counter) (0)
69#else
70  void inc_counter_np_(int& counter) {
71    __ incrementl(ExternalAddress((address)&counter));
72  }
73#define inc_counter_np(counter) \
74  BLOCK_COMMENT("inc_counter " #counter); \
75  inc_counter_np_(counter);
76#endif //PRODUCT
77
78  void inc_copy_counter_np(BasicType t) {
79#ifndef PRODUCT
80    switch (t) {
81    case T_BYTE:    inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); return;
82    case T_SHORT:   inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); return;
83    case T_INT:     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); return;
84    case T_LONG:    inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); return;
85    case T_OBJECT:  inc_counter_np(SharedRuntime::_oop_array_copy_ctr); return;
86    }
87    ShouldNotReachHere();
88#endif //PRODUCT
89  }
90
91  //------------------------------------------------------------------------------------------------------------------------
92  // Call stubs are used to call Java from C
93  //
94  //    [ return_from_Java     ] <--- rsp
95  //    [ argument word n      ]
96  //      ...
97  // -N [ argument word 1      ]
98  // -7 [ Possible padding for stack alignment ]
99  // -6 [ Possible padding for stack alignment ]
100  // -5 [ Possible padding for stack alignment ]
101  // -4 [ mxcsr save           ] <--- rsp_after_call
102  // -3 [ saved rbx,            ]
103  // -2 [ saved rsi            ]
104  // -1 [ saved rdi            ]
105  //  0 [ saved rbp,            ] <--- rbp,
106  //  1 [ return address       ]
107  //  2 [ ptr. to call wrapper ]
108  //  3 [ result               ]
109  //  4 [ result_type          ]
110  //  5 [ method               ]
111  //  6 [ entry_point          ]
112  //  7 [ parameters           ]
113  //  8 [ parameter_size       ]
114  //  9 [ thread               ]
115
116
117  address generate_call_stub(address& return_address) {
118    StubCodeMark mark(this, "StubRoutines", "call_stub");
119    address start = __ pc();
120
121    // stub code parameters / addresses
122    assert(frame::entry_frame_call_wrapper_offset == 2, "adjust this code");
123    bool  sse_save = false;
124    const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_catch_exception()!
125    const int     locals_count_in_bytes  (4*wordSize);
126    const Address mxcsr_save    (rbp, -4 * wordSize);
127    const Address saved_rbx     (rbp, -3 * wordSize);
128    const Address saved_rsi     (rbp, -2 * wordSize);
129    const Address saved_rdi     (rbp, -1 * wordSize);
130    const Address result        (rbp,  3 * wordSize);
131    const Address result_type   (rbp,  4 * wordSize);
132    const Address method        (rbp,  5 * wordSize);
133    const Address entry_point   (rbp,  6 * wordSize);
134    const Address parameters    (rbp,  7 * wordSize);
135    const Address parameter_size(rbp,  8 * wordSize);
136    const Address thread        (rbp,  9 * wordSize); // same as in generate_catch_exception()!
137    sse_save =  UseSSE > 0;
138
139    // stub code
140    __ enter();
141    __ movptr(rcx, parameter_size);              // parameter counter
142    __ shlptr(rcx, Interpreter::logStackElementSize()); // convert parameter count to bytes
143    __ addptr(rcx, locals_count_in_bytes);       // reserve space for register saves
144    __ subptr(rsp, rcx);
145    __ andptr(rsp, -(StackAlignmentInBytes));    // Align stack
146
147    // save rdi, rsi, & rbx, according to C calling conventions
148    __ movptr(saved_rdi, rdi);
149    __ movptr(saved_rsi, rsi);
150    __ movptr(saved_rbx, rbx);
151    // save and initialize %mxcsr
152    if (sse_save) {
153      Label skip_ldmx;
154      __ stmxcsr(mxcsr_save);
155      __ movl(rax, mxcsr_save);
156      __ andl(rax, MXCSR_MASK);    // Only check control and mask bits
157      ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
158      __ cmp32(rax, mxcsr_std);
159      __ jcc(Assembler::equal, skip_ldmx);
160      __ ldmxcsr(mxcsr_std);
161      __ bind(skip_ldmx);
162    }
163
164    // make sure the control word is correct.
165    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
166
167#ifdef ASSERT
168    // make sure we have no pending exceptions
169    { Label L;
170      __ movptr(rcx, thread);
171      __ cmpptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
172      __ jcc(Assembler::equal, L);
173      __ stop("StubRoutines::call_stub: entered with pending exception");
174      __ bind(L);
175    }
176#endif
177
178    // pass parameters if any
179    BLOCK_COMMENT("pass parameters if any");
180    Label parameters_done;
181    __ movl(rcx, parameter_size);  // parameter counter
182    __ testl(rcx, rcx);
183    __ jcc(Assembler::zero, parameters_done);
184
185    // parameter passing loop
186
187    Label loop;
188    // Copy Java parameters in reverse order (receiver last)
189    // Note that the argument order is inverted in the process
190    // source is rdx[rcx: N-1..0]
191    // dest   is rsp[rbx: 0..N-1]
192
193    __ movptr(rdx, parameters);          // parameter pointer
194    __ xorptr(rbx, rbx);
195
196    __ BIND(loop);
197    if (TaggedStackInterpreter) {
198      __ movptr(rax, Address(rdx, rcx, Interpreter::stackElementScale(),
199                      -2*wordSize));                          // get tag
200      __ movptr(Address(rsp, rbx, Interpreter::stackElementScale(),
201                      Interpreter::expr_tag_offset_in_bytes(0)), rax);     // store tag
202    }
203
204    // get parameter
205    __ movptr(rax, Address(rdx, rcx, Interpreter::stackElementScale(), -wordSize));
206    __ movptr(Address(rsp, rbx, Interpreter::stackElementScale(),
207                    Interpreter::expr_offset_in_bytes(0)), rax);          // store parameter
208    __ increment(rbx);
209    __ decrement(rcx);
210    __ jcc(Assembler::notZero, loop);
211
212    // call Java function
213    __ BIND(parameters_done);
214    __ movptr(rbx, method);           // get methodOop
215    __ movptr(rax, entry_point);      // get entry_point
216    __ mov(rsi, rsp);                 // set sender sp
217    BLOCK_COMMENT("call Java function");
218    __ call(rax);
219
220    BLOCK_COMMENT("call_stub_return_address:");
221    return_address = __ pc();
222
223    Label common_return;
224
225    __ BIND(common_return);
226
227    // store result depending on type
228    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
229    __ movptr(rdi, result);
230    Label is_long, is_float, is_double, exit;
231    __ movl(rsi, result_type);
232    __ cmpl(rsi, T_LONG);
233    __ jcc(Assembler::equal, is_long);
234    __ cmpl(rsi, T_FLOAT);
235    __ jcc(Assembler::equal, is_float);
236    __ cmpl(rsi, T_DOUBLE);
237    __ jcc(Assembler::equal, is_double);
238
239    // handle T_INT case
240    __ movl(Address(rdi, 0), rax);
241    __ BIND(exit);
242
243    // check that FPU stack is empty
244    __ verify_FPU(0, "generate_call_stub");
245
246    // pop parameters
247    __ lea(rsp, rsp_after_call);
248
249    // restore %mxcsr
250    if (sse_save) {
251      __ ldmxcsr(mxcsr_save);
252    }
253
254    // restore rdi, rsi and rbx,
255    __ movptr(rbx, saved_rbx);
256    __ movptr(rsi, saved_rsi);
257    __ movptr(rdi, saved_rdi);
258    __ addptr(rsp, 4*wordSize);
259
260    // return
261    __ pop(rbp);
262    __ ret(0);
263
264    // handle return types different from T_INT
265    __ BIND(is_long);
266    __ movl(Address(rdi, 0 * wordSize), rax);
267    __ movl(Address(rdi, 1 * wordSize), rdx);
268    __ jmp(exit);
269
270    __ BIND(is_float);
271    // interpreter uses xmm0 for return values
272    if (UseSSE >= 1) {
273      __ movflt(Address(rdi, 0), xmm0);
274    } else {
275      __ fstp_s(Address(rdi, 0));
276    }
277    __ jmp(exit);
278
279    __ BIND(is_double);
280    // interpreter uses xmm0 for return values
281    if (UseSSE >= 2) {
282      __ movdbl(Address(rdi, 0), xmm0);
283    } else {
284      __ fstp_d(Address(rdi, 0));
285    }
286    __ jmp(exit);
287
288    // If we call compiled code directly from the call stub we will
289    // need to adjust the return back to the call stub to a specialized
290    // piece of code that can handle compiled results and cleaning the fpu
291    // stack. compiled code will be set to return here instead of the
292    // return above that handles interpreter returns.
293
294    BLOCK_COMMENT("call_stub_compiled_return:");
295    StubRoutines::x86::set_call_stub_compiled_return( __ pc());
296
297#ifdef COMPILER2
298    if (UseSSE >= 2) {
299      __ verify_FPU(0, "call_stub_compiled_return");
300    } else {
301      for (int i = 1; i < 8; i++) {
302        __ ffree(i);
303      }
304
305      // UseSSE <= 1 so double result should be left on TOS
306      __ movl(rsi, result_type);
307      __ cmpl(rsi, T_DOUBLE);
308      __ jcc(Assembler::equal, common_return);
309      if (UseSSE == 0) {
310        // UseSSE == 0 so float result should be left on TOS
311        __ cmpl(rsi, T_FLOAT);
312        __ jcc(Assembler::equal, common_return);
313      }
314      __ ffree(0);
315    }
316#endif /* COMPILER2 */
317    __ jmp(common_return);
318
319    return start;
320  }
321
322
323  //------------------------------------------------------------------------------------------------------------------------
324  // Return point for a Java call if there's an exception thrown in Java code.
325  // The exception is caught and transformed into a pending exception stored in
326  // JavaThread that can be tested from within the VM.
327  //
328  // Note: Usually the parameters are removed by the callee. In case of an exception
329  //       crossing an activation frame boundary, that is not the case if the callee
330  //       is compiled code => need to setup the rsp.
331  //
332  // rax,: exception oop
333
334  address generate_catch_exception() {
335    StubCodeMark mark(this, "StubRoutines", "catch_exception");
336    const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_call_stub()!
337    const Address thread        (rbp,  9 * wordSize); // same as in generate_call_stub()!
338    address start = __ pc();
339
340    // get thread directly
341    __ movptr(rcx, thread);
342#ifdef ASSERT
343    // verify that threads correspond
344    { Label L;
345      __ get_thread(rbx);
346      __ cmpptr(rbx, rcx);
347      __ jcc(Assembler::equal, L);
348      __ stop("StubRoutines::catch_exception: threads must correspond");
349      __ bind(L);
350    }
351#endif
352    // set pending exception
353    __ verify_oop(rax);
354    __ movptr(Address(rcx, Thread::pending_exception_offset()), rax          );
355    __ lea(Address(rcx, Thread::exception_file_offset   ()),
356           ExternalAddress((address)__FILE__));
357    __ movl(Address(rcx, Thread::exception_line_offset   ()), __LINE__ );
358    // complete return to VM
359    assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
360    __ jump(RuntimeAddress(StubRoutines::_call_stub_return_address));
361
362    return start;
363  }
364
365
366  //------------------------------------------------------------------------------------------------------------------------
367  // Continuation point for runtime calls returning with a pending exception.
368  // The pending exception check happened in the runtime or native call stub.
369  // The pending exception in Thread is converted into a Java-level exception.
370  //
371  // Contract with Java-level exception handlers:
372  // rax,: exception
373  // rdx: throwing pc
374  //
375  // NOTE: At entry of this stub, exception-pc must be on stack !!
376
377  address generate_forward_exception() {
378    StubCodeMark mark(this, "StubRoutines", "forward exception");
379    address start = __ pc();
380
381    // Upon entry, the sp points to the return address returning into Java
382    // (interpreted or compiled) code; i.e., the return address becomes the
383    // throwing pc.
384    //
385    // Arguments pushed before the runtime call are still on the stack but
386    // the exception handler will reset the stack pointer -> ignore them.
387    // A potential result in registers can be ignored as well.
388
389#ifdef ASSERT
390    // make sure this code is only executed if there is a pending exception
391    { Label L;
392      __ get_thread(rcx);
393      __ cmpptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
394      __ jcc(Assembler::notEqual, L);
395      __ stop("StubRoutines::forward exception: no pending exception (1)");
396      __ bind(L);
397    }
398#endif
399
400    // compute exception handler into rbx,
401    __ movptr(rax, Address(rsp, 0));
402    BLOCK_COMMENT("call exception_handler_for_return_address");
403    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), rax);
404    __ mov(rbx, rax);
405
406    // setup rax, & rdx, remove return address & clear pending exception
407    __ get_thread(rcx);
408    __ pop(rdx);
409    __ movptr(rax, Address(rcx, Thread::pending_exception_offset()));
410    __ movptr(Address(rcx, Thread::pending_exception_offset()), NULL_WORD);
411
412#ifdef ASSERT
413    // make sure exception is set
414    { Label L;
415      __ testptr(rax, rax);
416      __ jcc(Assembler::notEqual, L);
417      __ stop("StubRoutines::forward exception: no pending exception (2)");
418      __ bind(L);
419    }
420#endif
421
422    // continue at exception handler (return address removed)
423    // rax,: exception
424    // rbx,: exception handler
425    // rdx: throwing pc
426    __ verify_oop(rax);
427    __ jmp(rbx);
428
429    return start;
430  }
431
432
433  //----------------------------------------------------------------------------------------------------
434  // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest)
435  //
436  // xchg exists as far back as 8086, lock needed for MP only
437  // Stack layout immediately after call:
438  //
439  // 0 [ret addr ] <--- rsp
440  // 1 [  ex     ]
441  // 2 [  dest   ]
442  //
443  // Result:   *dest <- ex, return (old *dest)
444  //
445  // Note: win32 does not currently use this code
446
447  address generate_atomic_xchg() {
448    StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
449    address start = __ pc();
450
451    __ push(rdx);
452    Address exchange(rsp, 2 * wordSize);
453    Address dest_addr(rsp, 3 * wordSize);
454    __ movl(rax, exchange);
455    __ movptr(rdx, dest_addr);
456    __ xchgl(rax, Address(rdx, 0));
457    __ pop(rdx);
458    __ ret(0);
459
460    return start;
461  }
462
463  //----------------------------------------------------------------------------------------------------
464  // Support for void verify_mxcsr()
465  //
466  // This routine is used with -Xcheck:jni to verify that native
467  // JNI code does not return to Java code without restoring the
468  // MXCSR register to our expected state.
469
470
471  address generate_verify_mxcsr() {
472    StubCodeMark mark(this, "StubRoutines", "verify_mxcsr");
473    address start = __ pc();
474
475    const Address mxcsr_save(rsp, 0);
476
477    if (CheckJNICalls && UseSSE > 0 ) {
478      Label ok_ret;
479      ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
480      __ push(rax);
481      __ subptr(rsp, wordSize);      // allocate a temp location
482      __ stmxcsr(mxcsr_save);
483      __ movl(rax, mxcsr_save);
484      __ andl(rax, MXCSR_MASK);
485      __ cmp32(rax, mxcsr_std);
486      __ jcc(Assembler::equal, ok_ret);
487
488      __ warn("MXCSR changed by native JNI code.");
489
490      __ ldmxcsr(mxcsr_std);
491
492      __ bind(ok_ret);
493      __ addptr(rsp, wordSize);
494      __ pop(rax);
495    }
496
497    __ ret(0);
498
499    return start;
500  }
501
502
503  //---------------------------------------------------------------------------
504  // Support for void verify_fpu_cntrl_wrd()
505  //
506  // This routine is used with -Xcheck:jni to verify that native
507  // JNI code does not return to Java code without restoring the
508  // FP control word to our expected state.
509
510  address generate_verify_fpu_cntrl_wrd() {
511    StubCodeMark mark(this, "StubRoutines", "verify_spcw");
512    address start = __ pc();
513
514    const Address fpu_cntrl_wrd_save(rsp, 0);
515
516    if (CheckJNICalls) {
517      Label ok_ret;
518      __ push(rax);
519      __ subptr(rsp, wordSize);      // allocate a temp location
520      __ fnstcw(fpu_cntrl_wrd_save);
521      __ movl(rax, fpu_cntrl_wrd_save);
522      __ andl(rax, FPU_CNTRL_WRD_MASK);
523      ExternalAddress fpu_std(StubRoutines::addr_fpu_cntrl_wrd_std());
524      __ cmp32(rax, fpu_std);
525      __ jcc(Assembler::equal, ok_ret);
526
527      __ warn("Floating point control word changed by native JNI code.");
528
529      __ fldcw(fpu_std);
530
531      __ bind(ok_ret);
532      __ addptr(rsp, wordSize);
533      __ pop(rax);
534    }
535
536    __ ret(0);
537
538    return start;
539  }
540
541  //---------------------------------------------------------------------------
542  // Wrapper for slow-case handling of double-to-integer conversion
543  // d2i or f2i fast case failed either because it is nan or because
544  // of under/overflow.
545  // Input:  FPU TOS: float value
546  // Output: rax, (rdx): integer (long) result
547
548  address generate_d2i_wrapper(BasicType t, address fcn) {
549    StubCodeMark mark(this, "StubRoutines", "d2i_wrapper");
550    address start = __ pc();
551
552  // Capture info about frame layout
553  enum layout { FPUState_off         = 0,
554                rbp_off              = FPUStateSizeInWords,
555                rdi_off,
556                rsi_off,
557                rcx_off,
558                rbx_off,
559                saved_argument_off,
560                saved_argument_off2, // 2nd half of double
561                framesize
562  };
563
564  assert(FPUStateSizeInWords == 27, "update stack layout");
565
566    // Save outgoing argument to stack across push_FPU_state()
567    __ subptr(rsp, wordSize * 2);
568    __ fstp_d(Address(rsp, 0));
569
570    // Save CPU & FPU state
571    __ push(rbx);
572    __ push(rcx);
573    __ push(rsi);
574    __ push(rdi);
575    __ push(rbp);
576    __ push_FPU_state();
577
578    // push_FPU_state() resets the FP top of stack
579    // Load original double into FP top of stack
580    __ fld_d(Address(rsp, saved_argument_off * wordSize));
581    // Store double into stack as outgoing argument
582    __ subptr(rsp, wordSize*2);
583    __ fst_d(Address(rsp, 0));
584
585    // Prepare FPU for doing math in C-land
586    __ empty_FPU_stack();
587    // Call the C code to massage the double.  Result in EAX
588    if (t == T_INT)
589      { BLOCK_COMMENT("SharedRuntime::d2i"); }
590    else if (t == T_LONG)
591      { BLOCK_COMMENT("SharedRuntime::d2l"); }
592    __ call_VM_leaf( fcn, 2 );
593
594    // Restore CPU & FPU state
595    __ pop_FPU_state();
596    __ pop(rbp);
597    __ pop(rdi);
598    __ pop(rsi);
599    __ pop(rcx);
600    __ pop(rbx);
601    __ addptr(rsp, wordSize * 2);
602
603    __ ret(0);
604
605    return start;
606  }
607
608
609  //---------------------------------------------------------------------------
610  // The following routine generates a subroutine to throw an asynchronous
611  // UnknownError when an unsafe access gets a fault that could not be
612  // reasonably prevented by the programmer.  (Example: SIGBUS/OBJERR.)
613  address generate_handler_for_unsafe_access() {
614    StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
615    address start = __ pc();
616
617    __ push(0);                       // hole for return address-to-be
618    __ pusha();                       // push registers
619    Address next_pc(rsp, RegisterImpl::number_of_registers * BytesPerWord);
620    BLOCK_COMMENT("call handle_unsafe_access");
621    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, handle_unsafe_access)));
622    __ movptr(next_pc, rax);          // stuff next address
623    __ popa();
624    __ ret(0);                        // jump to next address
625
626    return start;
627  }
628
629
630  //----------------------------------------------------------------------------------------------------
631  // Non-destructive plausibility checks for oops
632
633  address generate_verify_oop() {
634    StubCodeMark mark(this, "StubRoutines", "verify_oop");
635    address start = __ pc();
636
637    // Incoming arguments on stack after saving rax,:
638    //
639    // [tos    ]: saved rdx
640    // [tos + 1]: saved EFLAGS
641    // [tos + 2]: return address
642    // [tos + 3]: char* error message
643    // [tos + 4]: oop   object to verify
644    // [tos + 5]: saved rax, - saved by caller and bashed
645
646    Label exit, error;
647    __ pushf();
648    __ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
649    __ push(rdx);                                // save rdx
650    // make sure object is 'reasonable'
651    __ movptr(rax, Address(rsp, 4 * wordSize));    // get object
652    __ testptr(rax, rax);
653    __ jcc(Assembler::zero, exit);               // if obj is NULL it is ok
654
655    // Check if the oop is in the right area of memory
656    const int oop_mask = Universe::verify_oop_mask();
657    const int oop_bits = Universe::verify_oop_bits();
658    __ mov(rdx, rax);
659    __ andptr(rdx, oop_mask);
660    __ cmpptr(rdx, oop_bits);
661    __ jcc(Assembler::notZero, error);
662
663    // make sure klass is 'reasonable'
664    __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass
665    __ testptr(rax, rax);
666    __ jcc(Assembler::zero, error);              // if klass is NULL it is broken
667
668    // Check if the klass is in the right area of memory
669    const int klass_mask = Universe::verify_klass_mask();
670    const int klass_bits = Universe::verify_klass_bits();
671    __ mov(rdx, rax);
672    __ andptr(rdx, klass_mask);
673    __ cmpptr(rdx, klass_bits);
674    __ jcc(Assembler::notZero, error);
675
676    // make sure klass' klass is 'reasonable'
677    __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass' klass
678    __ testptr(rax, rax);
679    __ jcc(Assembler::zero, error);              // if klass' klass is NULL it is broken
680
681    __ mov(rdx, rax);
682    __ andptr(rdx, klass_mask);
683    __ cmpptr(rdx, klass_bits);
684    __ jcc(Assembler::notZero, error);           // if klass not in right area
685                                                 // of memory it is broken too.
686
687    // return if everything seems ok
688    __ bind(exit);
689    __ movptr(rax, Address(rsp, 5 * wordSize));  // get saved rax, back
690    __ pop(rdx);                                 // restore rdx
691    __ popf();                                   // restore EFLAGS
692    __ ret(3 * wordSize);                        // pop arguments
693
694    // handle errors
695    __ bind(error);
696    __ movptr(rax, Address(rsp, 5 * wordSize));  // get saved rax, back
697    __ pop(rdx);                                 // get saved rdx back
698    __ popf();                                   // get saved EFLAGS off stack -- will be ignored
699    __ pusha();                                  // push registers (eip = return address & msg are already pushed)
700    BLOCK_COMMENT("call MacroAssembler::debug");
701    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
702    __ popa();
703    __ ret(3 * wordSize);                        // pop arguments
704    return start;
705  }
706
707  //
708  //  Generate pre-barrier for array stores
709  //
710  //  Input:
711  //     start   -  starting address
712  //     end     -  element count
713  void  gen_write_ref_array_pre_barrier(Register start, Register count) {
714    assert_different_registers(start, count);
715    BarrierSet* bs = Universe::heap()->barrier_set();
716    switch (bs->kind()) {
717      case BarrierSet::G1SATBCT:
718      case BarrierSet::G1SATBCTLogging:
719        {
720          __ pusha();                      // push registers
721          __ push(count);
722          __ push(start);
723          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)));
724          __ addptr(rsp, 2*wordSize);
725          __ popa();
726        }
727        break;
728      case BarrierSet::CardTableModRef:
729      case BarrierSet::CardTableExtension:
730      case BarrierSet::ModRef:
731        break;
732      default      :
733        ShouldNotReachHere();
734
735    }
736  }
737
738
739  //
740  // Generate a post-barrier for an array store
741  //
742  //     start    -  starting address
743  //     count    -  element count
744  //
745  //  The two input registers are overwritten.
746  //
747  void  gen_write_ref_array_post_barrier(Register start, Register count) {
748    BarrierSet* bs = Universe::heap()->barrier_set();
749    assert_different_registers(start, count);
750    switch (bs->kind()) {
751      case BarrierSet::G1SATBCT:
752      case BarrierSet::G1SATBCTLogging:
753        {
754          __ pusha();                      // push registers
755          __ push(count);
756          __ push(start);
757          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
758          __ addptr(rsp, 2*wordSize);
759          __ popa();
760
761        }
762        break;
763
764      case BarrierSet::CardTableModRef:
765      case BarrierSet::CardTableExtension:
766        {
767          CardTableModRefBS* ct = (CardTableModRefBS*)bs;
768          assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
769
770          Label L_loop;
771          const Register end = count;  // elements count; end == start+count-1
772          assert_different_registers(start, end);
773
774          __ lea(end,  Address(start, count, Address::times_ptr, -wordSize));
775          __ shrptr(start, CardTableModRefBS::card_shift);
776          __ shrptr(end,   CardTableModRefBS::card_shift);
777          __ subptr(end, start); // end --> count
778        __ BIND(L_loop);
779          intptr_t disp = (intptr_t) ct->byte_map_base;
780          Address cardtable(start, count, Address::times_1, disp);
781          __ movb(cardtable, 0);
782          __ decrement(count);
783          __ jcc(Assembler::greaterEqual, L_loop);
784        }
785        break;
786      case BarrierSet::ModRef:
787        break;
788      default      :
789        ShouldNotReachHere();
790
791    }
792  }
793
794
795  // Copy 64 bytes chunks
796  //
797  // Inputs:
798  //   from        - source array address
799  //   to_from     - destination array address - from
800  //   qword_count - 8-bytes element count, negative
801  //
802  void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
803    assert( UseSSE >= 2, "supported cpu only" );
804    Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
805    // Copy 64-byte chunks
806    __ jmpb(L_copy_64_bytes);
807    __ align(16);
808  __ BIND(L_copy_64_bytes_loop);
809
810    if(UseUnalignedLoadStores) {
811      __ movdqu(xmm0, Address(from, 0));
812      __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
813      __ movdqu(xmm1, Address(from, 16));
814      __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
815      __ movdqu(xmm2, Address(from, 32));
816      __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
817      __ movdqu(xmm3, Address(from, 48));
818      __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
819
820    } else {
821      __ movq(xmm0, Address(from, 0));
822      __ movq(Address(from, to_from, Address::times_1, 0), xmm0);
823      __ movq(xmm1, Address(from, 8));
824      __ movq(Address(from, to_from, Address::times_1, 8), xmm1);
825      __ movq(xmm2, Address(from, 16));
826      __ movq(Address(from, to_from, Address::times_1, 16), xmm2);
827      __ movq(xmm3, Address(from, 24));
828      __ movq(Address(from, to_from, Address::times_1, 24), xmm3);
829      __ movq(xmm4, Address(from, 32));
830      __ movq(Address(from, to_from, Address::times_1, 32), xmm4);
831      __ movq(xmm5, Address(from, 40));
832      __ movq(Address(from, to_from, Address::times_1, 40), xmm5);
833      __ movq(xmm6, Address(from, 48));
834      __ movq(Address(from, to_from, Address::times_1, 48), xmm6);
835      __ movq(xmm7, Address(from, 56));
836      __ movq(Address(from, to_from, Address::times_1, 56), xmm7);
837    }
838
839    __ addl(from, 64);
840  __ BIND(L_copy_64_bytes);
841    __ subl(qword_count, 8);
842    __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
843    __ addl(qword_count, 8);
844    __ jccb(Assembler::zero, L_exit);
845    //
846    // length is too short, just copy qwords
847    //
848  __ BIND(L_copy_8_bytes);
849    __ movq(xmm0, Address(from, 0));
850    __ movq(Address(from, to_from, Address::times_1), xmm0);
851    __ addl(from, 8);
852    __ decrement(qword_count);
853    __ jcc(Assembler::greater, L_copy_8_bytes);
854  __ BIND(L_exit);
855  }
856
857  // Copy 64 bytes chunks
858  //
859  // Inputs:
860  //   from        - source array address
861  //   to_from     - destination array address - from
862  //   qword_count - 8-bytes element count, negative
863  //
864  void mmx_copy_forward(Register from, Register to_from, Register qword_count) {
865    assert( VM_Version::supports_mmx(), "supported cpu only" );
866    Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
867    // Copy 64-byte chunks
868    __ jmpb(L_copy_64_bytes);
869    __ align(16);
870  __ BIND(L_copy_64_bytes_loop);
871    __ movq(mmx0, Address(from, 0));
872    __ movq(mmx1, Address(from, 8));
873    __ movq(mmx2, Address(from, 16));
874    __ movq(Address(from, to_from, Address::times_1, 0), mmx0);
875    __ movq(mmx3, Address(from, 24));
876    __ movq(Address(from, to_from, Address::times_1, 8), mmx1);
877    __ movq(mmx4, Address(from, 32));
878    __ movq(Address(from, to_from, Address::times_1, 16), mmx2);
879    __ movq(mmx5, Address(from, 40));
880    __ movq(Address(from, to_from, Address::times_1, 24), mmx3);
881    __ movq(mmx6, Address(from, 48));
882    __ movq(Address(from, to_from, Address::times_1, 32), mmx4);
883    __ movq(mmx7, Address(from, 56));
884    __ movq(Address(from, to_from, Address::times_1, 40), mmx5);
885    __ movq(Address(from, to_from, Address::times_1, 48), mmx6);
886    __ movq(Address(from, to_from, Address::times_1, 56), mmx7);
887    __ addptr(from, 64);
888  __ BIND(L_copy_64_bytes);
889    __ subl(qword_count, 8);
890    __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
891    __ addl(qword_count, 8);
892    __ jccb(Assembler::zero, L_exit);
893    //
894    // length is too short, just copy qwords
895    //
896  __ BIND(L_copy_8_bytes);
897    __ movq(mmx0, Address(from, 0));
898    __ movq(Address(from, to_from, Address::times_1), mmx0);
899    __ addptr(from, 8);
900    __ decrement(qword_count);
901    __ jcc(Assembler::greater, L_copy_8_bytes);
902  __ BIND(L_exit);
903    __ emms();
904  }
905
906  address generate_disjoint_copy(BasicType t, bool aligned,
907                                 Address::ScaleFactor sf,
908                                 address* entry, const char *name) {
909    __ align(CodeEntryAlignment);
910    StubCodeMark mark(this, "StubRoutines", name);
911    address start = __ pc();
912
913    Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte;
914    Label L_copy_2_bytes, L_copy_4_bytes, L_copy_64_bytes;
915
916    int shift = Address::times_ptr - sf;
917
918    const Register from     = rsi;  // source array address
919    const Register to       = rdi;  // destination array address
920    const Register count    = rcx;  // elements count
921    const Register to_from  = to;   // (to - from)
922    const Register saved_to = rdx;  // saved destination array address
923
924    __ enter(); // required for proper stackwalking of RuntimeStub frame
925    __ push(rsi);
926    __ push(rdi);
927    __ movptr(from , Address(rsp, 12+ 4));
928    __ movptr(to   , Address(rsp, 12+ 8));
929    __ movl(count, Address(rsp, 12+ 12));
930    if (t == T_OBJECT) {
931      __ testl(count, count);
932      __ jcc(Assembler::zero, L_0_count);
933      gen_write_ref_array_pre_barrier(to, count);
934      __ mov(saved_to, to);          // save 'to'
935    }
936
937    *entry = __ pc(); // Entry point from conjoint arraycopy stub.
938    BLOCK_COMMENT("Entry:");
939
940    __ subptr(to, from); // to --> to_from
941    __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
942    __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
943    if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
944      // align source address at 4 bytes address boundary
945      if (t == T_BYTE) {
946        // One byte misalignment happens only for byte arrays
947        __ testl(from, 1);
948        __ jccb(Assembler::zero, L_skip_align1);
949        __ movb(rax, Address(from, 0));
950        __ movb(Address(from, to_from, Address::times_1, 0), rax);
951        __ increment(from);
952        __ decrement(count);
953      __ BIND(L_skip_align1);
954      }
955      // Two bytes misalignment happens only for byte and short (char) arrays
956      __ testl(from, 2);
957      __ jccb(Assembler::zero, L_skip_align2);
958      __ movw(rax, Address(from, 0));
959      __ movw(Address(from, to_from, Address::times_1, 0), rax);
960      __ addptr(from, 2);
961      __ subl(count, 1<<(shift-1));
962    __ BIND(L_skip_align2);
963    }
964    if (!VM_Version::supports_mmx()) {
965      __ mov(rax, count);      // save 'count'
966      __ shrl(count, shift); // bytes count
967      __ addptr(to_from, from);// restore 'to'
968      __ rep_mov();
969      __ subptr(to_from, from);// restore 'to_from'
970      __ mov(count, rax);      // restore 'count'
971      __ jmpb(L_copy_2_bytes); // all dwords were copied
972    } else {
973      if (!UseUnalignedLoadStores) {
974        // align to 8 bytes, we know we are 4 byte aligned to start
975        __ testptr(from, 4);
976        __ jccb(Assembler::zero, L_copy_64_bytes);
977        __ movl(rax, Address(from, 0));
978        __ movl(Address(from, to_from, Address::times_1, 0), rax);
979        __ addptr(from, 4);
980        __ subl(count, 1<<shift);
981      }
982    __ BIND(L_copy_64_bytes);
983      __ mov(rax, count);
984      __ shrl(rax, shift+1);  // 8 bytes chunk count
985      //
986      // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
987      //
988      if (UseXMMForArrayCopy) {
989        xmm_copy_forward(from, to_from, rax);
990      } else {
991        mmx_copy_forward(from, to_from, rax);
992      }
993    }
994    // copy tailing dword
995  __ BIND(L_copy_4_bytes);
996    __ testl(count, 1<<shift);
997    __ jccb(Assembler::zero, L_copy_2_bytes);
998    __ movl(rax, Address(from, 0));
999    __ movl(Address(from, to_from, Address::times_1, 0), rax);
1000    if (t == T_BYTE || t == T_SHORT) {
1001      __ addptr(from, 4);
1002    __ BIND(L_copy_2_bytes);
1003      // copy tailing word
1004      __ testl(count, 1<<(shift-1));
1005      __ jccb(Assembler::zero, L_copy_byte);
1006      __ movw(rax, Address(from, 0));
1007      __ movw(Address(from, to_from, Address::times_1, 0), rax);
1008      if (t == T_BYTE) {
1009        __ addptr(from, 2);
1010      __ BIND(L_copy_byte);
1011        // copy tailing byte
1012        __ testl(count, 1);
1013        __ jccb(Assembler::zero, L_exit);
1014        __ movb(rax, Address(from, 0));
1015        __ movb(Address(from, to_from, Address::times_1, 0), rax);
1016      __ BIND(L_exit);
1017      } else {
1018      __ BIND(L_copy_byte);
1019      }
1020    } else {
1021    __ BIND(L_copy_2_bytes);
1022    }
1023
1024    if (t == T_OBJECT) {
1025      __ movl(count, Address(rsp, 12+12)); // reread 'count'
1026      __ mov(to, saved_to); // restore 'to'
1027      gen_write_ref_array_post_barrier(to, count);
1028    __ BIND(L_0_count);
1029    }
1030    inc_copy_counter_np(t);
1031    __ pop(rdi);
1032    __ pop(rsi);
1033    __ leave(); // required for proper stackwalking of RuntimeStub frame
1034    __ xorptr(rax, rax); // return 0
1035    __ ret(0);
1036    return start;
1037  }
1038
1039
1040  address generate_conjoint_copy(BasicType t, bool aligned,
1041                                 Address::ScaleFactor sf,
1042                                 address nooverlap_target,
1043                                 address* entry, const char *name) {
1044    __ align(CodeEntryAlignment);
1045    StubCodeMark mark(this, "StubRoutines", name);
1046    address start = __ pc();
1047
1048    Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte;
1049    Label L_copy_2_bytes, L_copy_4_bytes, L_copy_8_bytes, L_copy_8_bytes_loop;
1050
1051    int shift = Address::times_ptr - sf;
1052
1053    const Register src   = rax;  // source array address
1054    const Register dst   = rdx;  // destination array address
1055    const Register from  = rsi;  // source array address
1056    const Register to    = rdi;  // destination array address
1057    const Register count = rcx;  // elements count
1058    const Register end   = rax;  // array end address
1059
1060    __ enter(); // required for proper stackwalking of RuntimeStub frame
1061    __ push(rsi);
1062    __ push(rdi);
1063    __ movptr(src  , Address(rsp, 12+ 4));   // from
1064    __ movptr(dst  , Address(rsp, 12+ 8));   // to
1065    __ movl2ptr(count, Address(rsp, 12+12)); // count
1066    if (t == T_OBJECT) {
1067       gen_write_ref_array_pre_barrier(dst, count);
1068    }
1069
1070    if (entry != NULL) {
1071      *entry = __ pc(); // Entry point from generic arraycopy stub.
1072      BLOCK_COMMENT("Entry:");
1073    }
1074
1075    if (t == T_OBJECT) {
1076      __ testl(count, count);
1077      __ jcc(Assembler::zero, L_0_count);
1078    }
1079    __ mov(from, src);
1080    __ mov(to  , dst);
1081
1082    // arrays overlap test
1083    RuntimeAddress nooverlap(nooverlap_target);
1084    __ cmpptr(dst, src);
1085    __ lea(end, Address(src, count, sf, 0)); // src + count * elem_size
1086    __ jump_cc(Assembler::belowEqual, nooverlap);
1087    __ cmpptr(dst, end);
1088    __ jump_cc(Assembler::aboveEqual, nooverlap);
1089
1090    // copy from high to low
1091    __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
1092    __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
1093    if (t == T_BYTE || t == T_SHORT) {
1094      // Align the end of destination array at 4 bytes address boundary
1095      __ lea(end, Address(dst, count, sf, 0));
1096      if (t == T_BYTE) {
1097        // One byte misalignment happens only for byte arrays
1098        __ testl(end, 1);
1099        __ jccb(Assembler::zero, L_skip_align1);
1100        __ decrement(count);
1101        __ movb(rdx, Address(from, count, sf, 0));
1102        __ movb(Address(to, count, sf, 0), rdx);
1103      __ BIND(L_skip_align1);
1104      }
1105      // Two bytes misalignment happens only for byte and short (char) arrays
1106      __ testl(end, 2);
1107      __ jccb(Assembler::zero, L_skip_align2);
1108      __ subptr(count, 1<<(shift-1));
1109      __ movw(rdx, Address(from, count, sf, 0));
1110      __ movw(Address(to, count, sf, 0), rdx);
1111    __ BIND(L_skip_align2);
1112      __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
1113      __ jcc(Assembler::below, L_copy_4_bytes);
1114    }
1115
1116    if (!VM_Version::supports_mmx()) {
1117      __ std();
1118      __ mov(rax, count); // Save 'count'
1119      __ mov(rdx, to);    // Save 'to'
1120      __ lea(rsi, Address(from, count, sf, -4));
1121      __ lea(rdi, Address(to  , count, sf, -4));
1122      __ shrptr(count, shift); // bytes count
1123      __ rep_mov();
1124      __ cld();
1125      __ mov(count, rax); // restore 'count'
1126      __ andl(count, (1<<shift)-1);      // mask the number of rest elements
1127      __ movptr(from, Address(rsp, 12+4)); // reread 'from'
1128      __ mov(to, rdx);   // restore 'to'
1129      __ jmpb(L_copy_2_bytes); // all dword were copied
1130   } else {
1131      // Align to 8 bytes the end of array. It is aligned to 4 bytes already.
1132      __ testptr(end, 4);
1133      __ jccb(Assembler::zero, L_copy_8_bytes);
1134      __ subl(count, 1<<shift);
1135      __ movl(rdx, Address(from, count, sf, 0));
1136      __ movl(Address(to, count, sf, 0), rdx);
1137      __ jmpb(L_copy_8_bytes);
1138
1139      __ align(16);
1140      // Move 8 bytes
1141    __ BIND(L_copy_8_bytes_loop);
1142      if (UseXMMForArrayCopy) {
1143        __ movq(xmm0, Address(from, count, sf, 0));
1144        __ movq(Address(to, count, sf, 0), xmm0);
1145      } else {
1146        __ movq(mmx0, Address(from, count, sf, 0));
1147        __ movq(Address(to, count, sf, 0), mmx0);
1148      }
1149    __ BIND(L_copy_8_bytes);
1150      __ subl(count, 2<<shift);
1151      __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1152      __ addl(count, 2<<shift);
1153      if (!UseXMMForArrayCopy) {
1154        __ emms();
1155      }
1156    }
1157  __ BIND(L_copy_4_bytes);
1158    // copy prefix qword
1159    __ testl(count, 1<<shift);
1160    __ jccb(Assembler::zero, L_copy_2_bytes);
1161    __ movl(rdx, Address(from, count, sf, -4));
1162    __ movl(Address(to, count, sf, -4), rdx);
1163
1164    if (t == T_BYTE || t == T_SHORT) {
1165        __ subl(count, (1<<shift));
1166      __ BIND(L_copy_2_bytes);
1167        // copy prefix dword
1168        __ testl(count, 1<<(shift-1));
1169        __ jccb(Assembler::zero, L_copy_byte);
1170        __ movw(rdx, Address(from, count, sf, -2));
1171        __ movw(Address(to, count, sf, -2), rdx);
1172        if (t == T_BYTE) {
1173          __ subl(count, 1<<(shift-1));
1174        __ BIND(L_copy_byte);
1175          // copy prefix byte
1176          __ testl(count, 1);
1177          __ jccb(Assembler::zero, L_exit);
1178          __ movb(rdx, Address(from, 0));
1179          __ movb(Address(to, 0), rdx);
1180        __ BIND(L_exit);
1181        } else {
1182        __ BIND(L_copy_byte);
1183        }
1184    } else {
1185    __ BIND(L_copy_2_bytes);
1186    }
1187    if (t == T_OBJECT) {
1188      __ movl2ptr(count, Address(rsp, 12+12)); // reread count
1189      gen_write_ref_array_post_barrier(to, count);
1190    __ BIND(L_0_count);
1191    }
1192    inc_copy_counter_np(t);
1193    __ pop(rdi);
1194    __ pop(rsi);
1195    __ leave(); // required for proper stackwalking of RuntimeStub frame
1196    __ xorptr(rax, rax); // return 0
1197    __ ret(0);
1198    return start;
1199  }
1200
1201
1202  address generate_disjoint_long_copy(address* entry, const char *name) {
1203    __ align(CodeEntryAlignment);
1204    StubCodeMark mark(this, "StubRoutines", name);
1205    address start = __ pc();
1206
1207    Label L_copy_8_bytes, L_copy_8_bytes_loop;
1208    const Register from       = rax;  // source array address
1209    const Register to         = rdx;  // destination array address
1210    const Register count      = rcx;  // elements count
1211    const Register to_from    = rdx;  // (to - from)
1212
1213    __ enter(); // required for proper stackwalking of RuntimeStub frame
1214    __ movptr(from , Address(rsp, 8+0));       // from
1215    __ movptr(to   , Address(rsp, 8+4));       // to
1216    __ movl2ptr(count, Address(rsp, 8+8));     // count
1217
1218    *entry = __ pc(); // Entry point from conjoint arraycopy stub.
1219    BLOCK_COMMENT("Entry:");
1220
1221    __ subptr(to, from); // to --> to_from
1222    if (VM_Version::supports_mmx()) {
1223      if (UseXMMForArrayCopy) {
1224        xmm_copy_forward(from, to_from, count);
1225      } else {
1226        mmx_copy_forward(from, to_from, count);
1227      }
1228    } else {
1229      __ jmpb(L_copy_8_bytes);
1230      __ align(16);
1231    __ BIND(L_copy_8_bytes_loop);
1232      __ fild_d(Address(from, 0));
1233      __ fistp_d(Address(from, to_from, Address::times_1));
1234      __ addptr(from, 8);
1235    __ BIND(L_copy_8_bytes);
1236      __ decrement(count);
1237      __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1238    }
1239    inc_copy_counter_np(T_LONG);
1240    __ leave(); // required for proper stackwalking of RuntimeStub frame
1241    __ xorptr(rax, rax); // return 0
1242    __ ret(0);
1243    return start;
1244  }
1245
1246  address generate_conjoint_long_copy(address nooverlap_target,
1247                                      address* entry, const char *name) {
1248    __ align(CodeEntryAlignment);
1249    StubCodeMark mark(this, "StubRoutines", name);
1250    address start = __ pc();
1251
1252    Label L_copy_8_bytes, L_copy_8_bytes_loop;
1253    const Register from       = rax;  // source array address
1254    const Register to         = rdx;  // destination array address
1255    const Register count      = rcx;  // elements count
1256    const Register end_from   = rax;  // source array end address
1257
1258    __ enter(); // required for proper stackwalking of RuntimeStub frame
1259    __ movptr(from , Address(rsp, 8+0));       // from
1260    __ movptr(to   , Address(rsp, 8+4));       // to
1261    __ movl2ptr(count, Address(rsp, 8+8));     // count
1262
1263    *entry = __ pc(); // Entry point from generic arraycopy stub.
1264    BLOCK_COMMENT("Entry:");
1265
1266    // arrays overlap test
1267    __ cmpptr(to, from);
1268    RuntimeAddress nooverlap(nooverlap_target);
1269    __ jump_cc(Assembler::belowEqual, nooverlap);
1270    __ lea(end_from, Address(from, count, Address::times_8, 0));
1271    __ cmpptr(to, end_from);
1272    __ movptr(from, Address(rsp, 8));  // from
1273    __ jump_cc(Assembler::aboveEqual, nooverlap);
1274
1275    __ jmpb(L_copy_8_bytes);
1276
1277    __ align(16);
1278  __ BIND(L_copy_8_bytes_loop);
1279    if (VM_Version::supports_mmx()) {
1280      if (UseXMMForArrayCopy) {
1281        __ movq(xmm0, Address(from, count, Address::times_8));
1282        __ movq(Address(to, count, Address::times_8), xmm0);
1283      } else {
1284        __ movq(mmx0, Address(from, count, Address::times_8));
1285        __ movq(Address(to, count, Address::times_8), mmx0);
1286      }
1287    } else {
1288      __ fild_d(Address(from, count, Address::times_8));
1289      __ fistp_d(Address(to, count, Address::times_8));
1290    }
1291  __ BIND(L_copy_8_bytes);
1292    __ decrement(count);
1293    __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1294
1295    if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
1296      __ emms();
1297    }
1298    inc_copy_counter_np(T_LONG);
1299    __ leave(); // required for proper stackwalking of RuntimeStub frame
1300    __ xorptr(rax, rax); // return 0
1301    __ ret(0);
1302    return start;
1303  }
1304
1305
1306  // Helper for generating a dynamic type check.
1307  // The sub_klass must be one of {rbx, rdx, rsi}.
1308  // The temp is killed.
1309  void generate_type_check(Register sub_klass,
1310                           Address& super_check_offset_addr,
1311                           Address& super_klass_addr,
1312                           Register temp,
1313                           Label* L_success, Label* L_failure) {
1314    BLOCK_COMMENT("type_check:");
1315
1316    Label L_fallthrough;
1317#define LOCAL_JCC(assembler_con, label_ptr)                             \
1318    if (label_ptr != NULL)  __ jcc(assembler_con, *(label_ptr));        \
1319    else                    __ jcc(assembler_con, L_fallthrough) /*omit semi*/
1320
1321    // The following is a strange variation of the fast path which requires
1322    // one less register, because needed values are on the argument stack.
1323    // __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp,
1324    //                                  L_success, L_failure, NULL);
1325    assert_different_registers(sub_klass, temp);
1326
1327    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
1328                     Klass::secondary_super_cache_offset_in_bytes());
1329
1330    // if the pointers are equal, we are done (e.g., String[] elements)
1331    __ cmpptr(sub_klass, super_klass_addr);
1332    LOCAL_JCC(Assembler::equal, L_success);
1333
1334    // check the supertype display:
1335    __ movl2ptr(temp, super_check_offset_addr);
1336    Address super_check_addr(sub_klass, temp, Address::times_1, 0);
1337    __ movptr(temp, super_check_addr); // load displayed supertype
1338    __ cmpptr(temp, super_klass_addr); // test the super type
1339    LOCAL_JCC(Assembler::equal, L_success);
1340
1341    // if it was a primary super, we can just fail immediately
1342    __ cmpl(super_check_offset_addr, sc_offset);
1343    LOCAL_JCC(Assembler::notEqual, L_failure);
1344
1345    // The repne_scan instruction uses fixed registers, which will get spilled.
1346    // We happen to know this works best when super_klass is in rax.
1347    Register super_klass = temp;
1348    __ movptr(super_klass, super_klass_addr);
1349    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg,
1350                                     L_success, L_failure);
1351
1352    __ bind(L_fallthrough);
1353
1354    if (L_success == NULL) { BLOCK_COMMENT("L_success:"); }
1355    if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); }
1356
1357#undef LOCAL_JCC
1358  }
1359
1360  //
1361  //  Generate checkcasting array copy stub
1362  //
1363  //  Input:
1364  //    4(rsp)   - source array address
1365  //    8(rsp)   - destination array address
1366  //   12(rsp)   - element count, can be zero
1367  //   16(rsp)   - size_t ckoff (super_check_offset)
1368  //   20(rsp)   - oop ckval (super_klass)
1369  //
1370  //  Output:
1371  //    rax, ==  0  -  success
1372  //    rax, == -1^K - failure, where K is partial transfer count
1373  //
1374  address generate_checkcast_copy(const char *name, address* entry) {
1375    __ align(CodeEntryAlignment);
1376    StubCodeMark mark(this, "StubRoutines", name);
1377    address start = __ pc();
1378
1379    Label L_load_element, L_store_element, L_do_card_marks, L_done;
1380
1381    // register use:
1382    //  rax, rdx, rcx -- loop control (end_from, end_to, count)
1383    //  rdi, rsi      -- element access (oop, klass)
1384    //  rbx,           -- temp
1385    const Register from       = rax;    // source array address
1386    const Register to         = rdx;    // destination array address
1387    const Register length     = rcx;    // elements count
1388    const Register elem       = rdi;    // each oop copied
1389    const Register elem_klass = rsi;    // each elem._klass (sub_klass)
1390    const Register temp       = rbx;    // lone remaining temp
1391
1392    __ enter(); // required for proper stackwalking of RuntimeStub frame
1393
1394    __ push(rsi);
1395    __ push(rdi);
1396    __ push(rbx);
1397
1398    Address   from_arg(rsp, 16+ 4);     // from
1399    Address     to_arg(rsp, 16+ 8);     // to
1400    Address length_arg(rsp, 16+12);     // elements count
1401    Address  ckoff_arg(rsp, 16+16);     // super_check_offset
1402    Address  ckval_arg(rsp, 16+20);     // super_klass
1403
1404    // Load up:
1405    __ movptr(from,     from_arg);
1406    __ movptr(to,         to_arg);
1407    __ movl2ptr(length, length_arg);
1408
1409    *entry = __ pc(); // Entry point from generic arraycopy stub.
1410    BLOCK_COMMENT("Entry:");
1411
1412    //---------------------------------------------------------------
1413    // Assembler stub will be used for this call to arraycopy
1414    // if the two arrays are subtypes of Object[] but the
1415    // destination array type is not equal to or a supertype
1416    // of the source type.  Each element must be separately
1417    // checked.
1418
1419    // Loop-invariant addresses.  They are exclusive end pointers.
1420    Address end_from_addr(from, length, Address::times_ptr, 0);
1421    Address   end_to_addr(to,   length, Address::times_ptr, 0);
1422
1423    Register end_from = from;           // re-use
1424    Register end_to   = to;             // re-use
1425    Register count    = length;         // re-use
1426
1427    // Loop-variant addresses.  They assume post-incremented count < 0.
1428    Address from_element_addr(end_from, count, Address::times_ptr, 0);
1429    Address   to_element_addr(end_to,   count, Address::times_ptr, 0);
1430    Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes());
1431
1432    // Copy from low to high addresses, indexed from the end of each array.
1433    gen_write_ref_array_pre_barrier(to, count);
1434    __ lea(end_from, end_from_addr);
1435    __ lea(end_to,   end_to_addr);
1436    assert(length == count, "");        // else fix next line:
1437    __ negptr(count);                   // negate and test the length
1438    __ jccb(Assembler::notZero, L_load_element);
1439
1440    // Empty array:  Nothing to do.
1441    __ xorptr(rax, rax);                  // return 0 on (trivial) success
1442    __ jmp(L_done);
1443
1444    // ======== begin loop ========
1445    // (Loop is rotated; its entry is L_load_element.)
1446    // Loop control:
1447    //   for (count = -count; count != 0; count++)
1448    // Base pointers src, dst are biased by 8*count,to last element.
1449    __ align(16);
1450
1451    __ BIND(L_store_element);
1452    __ movptr(to_element_addr, elem);     // store the oop
1453    __ increment(count);                // increment the count toward zero
1454    __ jccb(Assembler::zero, L_do_card_marks);
1455
1456    // ======== loop entry is here ========
1457    __ BIND(L_load_element);
1458    __ movptr(elem, from_element_addr);   // load the oop
1459    __ testptr(elem, elem);
1460    __ jccb(Assembler::zero, L_store_element);
1461
1462    // (Could do a trick here:  Remember last successful non-null
1463    // element stored and make a quick oop equality check on it.)
1464
1465    __ movptr(elem_klass, elem_klass_addr); // query the object klass
1466    generate_type_check(elem_klass, ckoff_arg, ckval_arg, temp,
1467                        &L_store_element, NULL);
1468      // (On fall-through, we have failed the element type check.)
1469    // ======== end loop ========
1470
1471    // It was a real error; we must depend on the caller to finish the job.
1472    // Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops.
1473    // Emit GC store barriers for the oops we have copied (length_arg + count),
1474    // and report their number to the caller.
1475    __ addl(count, length_arg);         // transfers = (length - remaining)
1476    __ movl2ptr(rax, count);            // save the value
1477    __ notptr(rax);                     // report (-1^K) to caller
1478    __ movptr(to, to_arg);              // reload
1479    assert_different_registers(to, count, rax);
1480    gen_write_ref_array_post_barrier(to, count);
1481    __ jmpb(L_done);
1482
1483    // Come here on success only.
1484    __ BIND(L_do_card_marks);
1485    __ movl2ptr(count, length_arg);
1486    __ movptr(to, to_arg);                // reload
1487    gen_write_ref_array_post_barrier(to, count);
1488    __ xorptr(rax, rax);                  // return 0 on success
1489
1490    // Common exit point (success or failure).
1491    __ BIND(L_done);
1492    __ pop(rbx);
1493    __ pop(rdi);
1494    __ pop(rsi);
1495    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
1496    __ leave(); // required for proper stackwalking of RuntimeStub frame
1497    __ ret(0);
1498
1499    return start;
1500  }
1501
1502  //
1503  //  Generate 'unsafe' array copy stub
1504  //  Though just as safe as the other stubs, it takes an unscaled
1505  //  size_t argument instead of an element count.
1506  //
1507  //  Input:
1508  //    4(rsp)   - source array address
1509  //    8(rsp)   - destination array address
1510  //   12(rsp)   - byte count, can be zero
1511  //
1512  //  Output:
1513  //    rax, ==  0  -  success
1514  //    rax, == -1  -  need to call System.arraycopy
1515  //
1516  // Examines the alignment of the operands and dispatches
1517  // to a long, int, short, or byte copy loop.
1518  //
1519  address generate_unsafe_copy(const char *name,
1520                               address byte_copy_entry,
1521                               address short_copy_entry,
1522                               address int_copy_entry,
1523                               address long_copy_entry) {
1524
1525    Label L_long_aligned, L_int_aligned, L_short_aligned;
1526
1527    __ align(CodeEntryAlignment);
1528    StubCodeMark mark(this, "StubRoutines", name);
1529    address start = __ pc();
1530
1531    const Register from       = rax;  // source array address
1532    const Register to         = rdx;  // destination array address
1533    const Register count      = rcx;  // elements count
1534
1535    __ enter(); // required for proper stackwalking of RuntimeStub frame
1536    __ push(rsi);
1537    __ push(rdi);
1538    Address  from_arg(rsp, 12+ 4);      // from
1539    Address    to_arg(rsp, 12+ 8);      // to
1540    Address count_arg(rsp, 12+12);      // byte count
1541
1542    // Load up:
1543    __ movptr(from ,  from_arg);
1544    __ movptr(to   ,    to_arg);
1545    __ movl2ptr(count, count_arg);
1546
1547    // bump this on entry, not on exit:
1548    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
1549
1550    const Register bits = rsi;
1551    __ mov(bits, from);
1552    __ orptr(bits, to);
1553    __ orptr(bits, count);
1554
1555    __ testl(bits, BytesPerLong-1);
1556    __ jccb(Assembler::zero, L_long_aligned);
1557
1558    __ testl(bits, BytesPerInt-1);
1559    __ jccb(Assembler::zero, L_int_aligned);
1560
1561    __ testl(bits, BytesPerShort-1);
1562    __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
1563
1564    __ BIND(L_short_aligned);
1565    __ shrptr(count, LogBytesPerShort); // size => short_count
1566    __ movl(count_arg, count);          // update 'count'
1567    __ jump(RuntimeAddress(short_copy_entry));
1568
1569    __ BIND(L_int_aligned);
1570    __ shrptr(count, LogBytesPerInt); // size => int_count
1571    __ movl(count_arg, count);          // update 'count'
1572    __ jump(RuntimeAddress(int_copy_entry));
1573
1574    __ BIND(L_long_aligned);
1575    __ shrptr(count, LogBytesPerLong); // size => qword_count
1576    __ movl(count_arg, count);          // update 'count'
1577    __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it.
1578    __ pop(rsi);
1579    __ jump(RuntimeAddress(long_copy_entry));
1580
1581    return start;
1582  }
1583
1584
1585  // Perform range checks on the proposed arraycopy.
1586  // Smashes src_pos and dst_pos.  (Uses them up for temps.)
1587  void arraycopy_range_checks(Register src,
1588                              Register src_pos,
1589                              Register dst,
1590                              Register dst_pos,
1591                              Address& length,
1592                              Label& L_failed) {
1593    BLOCK_COMMENT("arraycopy_range_checks:");
1594    const Register src_end = src_pos;   // source array end position
1595    const Register dst_end = dst_pos;   // destination array end position
1596    __ addl(src_end, length); // src_pos + length
1597    __ addl(dst_end, length); // dst_pos + length
1598
1599    //  if (src_pos + length > arrayOop(src)->length() ) FAIL;
1600    __ cmpl(src_end, Address(src, arrayOopDesc::length_offset_in_bytes()));
1601    __ jcc(Assembler::above, L_failed);
1602
1603    //  if (dst_pos + length > arrayOop(dst)->length() ) FAIL;
1604    __ cmpl(dst_end, Address(dst, arrayOopDesc::length_offset_in_bytes()));
1605    __ jcc(Assembler::above, L_failed);
1606
1607    BLOCK_COMMENT("arraycopy_range_checks done");
1608  }
1609
1610
1611  //
1612  //  Generate generic array copy stubs
1613  //
1614  //  Input:
1615  //     4(rsp)    -  src oop
1616  //     8(rsp)    -  src_pos
1617  //    12(rsp)    -  dst oop
1618  //    16(rsp)    -  dst_pos
1619  //    20(rsp)    -  element count
1620  //
1621  //  Output:
1622  //    rax, ==  0  -  success
1623  //    rax, == -1^K - failure, where K is partial transfer count
1624  //
1625  address generate_generic_copy(const char *name,
1626                                address entry_jbyte_arraycopy,
1627                                address entry_jshort_arraycopy,
1628                                address entry_jint_arraycopy,
1629                                address entry_oop_arraycopy,
1630                                address entry_jlong_arraycopy,
1631                                address entry_checkcast_arraycopy) {
1632    Label L_failed, L_failed_0, L_objArray;
1633
1634    { int modulus = CodeEntryAlignment;
1635      int target  = modulus - 5; // 5 = sizeof jmp(L_failed)
1636      int advance = target - (__ offset() % modulus);
1637      if (advance < 0)  advance += modulus;
1638      if (advance > 0)  __ nop(advance);
1639    }
1640    StubCodeMark mark(this, "StubRoutines", name);
1641
1642    // Short-hop target to L_failed.  Makes for denser prologue code.
1643    __ BIND(L_failed_0);
1644    __ jmp(L_failed);
1645    assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed");
1646
1647    __ align(CodeEntryAlignment);
1648    address start = __ pc();
1649
1650    __ enter(); // required for proper stackwalking of RuntimeStub frame
1651    __ push(rsi);
1652    __ push(rdi);
1653
1654    // bump this on entry, not on exit:
1655    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
1656
1657    // Input values
1658    Address SRC     (rsp, 12+ 4);
1659    Address SRC_POS (rsp, 12+ 8);
1660    Address DST     (rsp, 12+12);
1661    Address DST_POS (rsp, 12+16);
1662    Address LENGTH  (rsp, 12+20);
1663
1664    //-----------------------------------------------------------------------
1665    // Assembler stub will be used for this call to arraycopy
1666    // if the following conditions are met:
1667    //
1668    // (1) src and dst must not be null.
1669    // (2) src_pos must not be negative.
1670    // (3) dst_pos must not be negative.
1671    // (4) length  must not be negative.
1672    // (5) src klass and dst klass should be the same and not NULL.
1673    // (6) src and dst should be arrays.
1674    // (7) src_pos + length must not exceed length of src.
1675    // (8) dst_pos + length must not exceed length of dst.
1676    //
1677
1678    const Register src     = rax;       // source array oop
1679    const Register src_pos = rsi;
1680    const Register dst     = rdx;       // destination array oop
1681    const Register dst_pos = rdi;
1682    const Register length  = rcx;       // transfer count
1683
1684    //  if (src == NULL) return -1;
1685    __ movptr(src, SRC);      // src oop
1686    __ testptr(src, src);
1687    __ jccb(Assembler::zero, L_failed_0);
1688
1689    //  if (src_pos < 0) return -1;
1690    __ movl2ptr(src_pos, SRC_POS);  // src_pos
1691    __ testl(src_pos, src_pos);
1692    __ jccb(Assembler::negative, L_failed_0);
1693
1694    //  if (dst == NULL) return -1;
1695    __ movptr(dst, DST);      // dst oop
1696    __ testptr(dst, dst);
1697    __ jccb(Assembler::zero, L_failed_0);
1698
1699    //  if (dst_pos < 0) return -1;
1700    __ movl2ptr(dst_pos, DST_POS);  // dst_pos
1701    __ testl(dst_pos, dst_pos);
1702    __ jccb(Assembler::negative, L_failed_0);
1703
1704    //  if (length < 0) return -1;
1705    __ movl2ptr(length, LENGTH);   // length
1706    __ testl(length, length);
1707    __ jccb(Assembler::negative, L_failed_0);
1708
1709    //  if (src->klass() == NULL) return -1;
1710    Address src_klass_addr(src, oopDesc::klass_offset_in_bytes());
1711    Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes());
1712    const Register rcx_src_klass = rcx;    // array klass
1713    __ movptr(rcx_src_klass, Address(src, oopDesc::klass_offset_in_bytes()));
1714
1715#ifdef ASSERT
1716    //  assert(src->klass() != NULL);
1717    BLOCK_COMMENT("assert klasses not null");
1718    { Label L1, L2;
1719      __ testptr(rcx_src_klass, rcx_src_klass);
1720      __ jccb(Assembler::notZero, L2);   // it is broken if klass is NULL
1721      __ bind(L1);
1722      __ stop("broken null klass");
1723      __ bind(L2);
1724      __ cmpptr(dst_klass_addr, (int32_t)NULL_WORD);
1725      __ jccb(Assembler::equal, L1);      // this would be broken also
1726      BLOCK_COMMENT("assert done");
1727    }
1728#endif //ASSERT
1729
1730    // Load layout helper (32-bits)
1731    //
1732    //  |array_tag|     | header_size | element_type |     |log2_element_size|
1733    // 32        30    24            16              8     2                 0
1734    //
1735    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
1736    //
1737
1738    int lh_offset = klassOopDesc::header_size() * HeapWordSize +
1739                    Klass::layout_helper_offset_in_bytes();
1740    Address src_klass_lh_addr(rcx_src_klass, lh_offset);
1741
1742    // Handle objArrays completely differently...
1743    jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
1744    __ cmpl(src_klass_lh_addr, objArray_lh);
1745    __ jcc(Assembler::equal, L_objArray);
1746
1747    //  if (src->klass() != dst->klass()) return -1;
1748    __ cmpptr(rcx_src_klass, dst_klass_addr);
1749    __ jccb(Assembler::notEqual, L_failed_0);
1750
1751    const Register rcx_lh = rcx;  // layout helper
1752    assert(rcx_lh == rcx_src_klass, "known alias");
1753    __ movl(rcx_lh, src_klass_lh_addr);
1754
1755    //  if (!src->is_Array()) return -1;
1756    __ cmpl(rcx_lh, Klass::_lh_neutral_value);
1757    __ jcc(Assembler::greaterEqual, L_failed_0); // signed cmp
1758
1759    // At this point, it is known to be a typeArray (array_tag 0x3).
1760#ifdef ASSERT
1761    { Label L;
1762      __ cmpl(rcx_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
1763      __ jcc(Assembler::greaterEqual, L); // signed cmp
1764      __ stop("must be a primitive array");
1765      __ bind(L);
1766    }
1767#endif
1768
1769    assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh);
1770    arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
1771
1772    // typeArrayKlass
1773    //
1774    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
1775    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
1776    //
1777    const Register rsi_offset = rsi; // array offset
1778    const Register src_array  = src; // src array offset
1779    const Register dst_array  = dst; // dst array offset
1780    const Register rdi_elsize = rdi; // log2 element size
1781
1782    __ mov(rsi_offset, rcx_lh);
1783    __ shrptr(rsi_offset, Klass::_lh_header_size_shift);
1784    __ andptr(rsi_offset, Klass::_lh_header_size_mask);   // array_offset
1785    __ addptr(src_array, rsi_offset);  // src array offset
1786    __ addptr(dst_array, rsi_offset);  // dst array offset
1787    __ andptr(rcx_lh, Klass::_lh_log2_element_size_mask); // log2 elsize
1788
1789    // next registers should be set before the jump to corresponding stub
1790    const Register from       = src; // source array address
1791    const Register to         = dst; // destination array address
1792    const Register count      = rcx; // elements count
1793    // some of them should be duplicated on stack
1794#define FROM   Address(rsp, 12+ 4)
1795#define TO     Address(rsp, 12+ 8)   // Not used now
1796#define COUNT  Address(rsp, 12+12)   // Only for oop arraycopy
1797
1798    BLOCK_COMMENT("scale indexes to element size");
1799    __ movl2ptr(rsi, SRC_POS);  // src_pos
1800    __ shlptr(rsi);             // src_pos << rcx (log2 elsize)
1801    assert(src_array == from, "");
1802    __ addptr(from, rsi);       // from = src_array + SRC_POS << log2 elsize
1803    __ movl2ptr(rdi, DST_POS);  // dst_pos
1804    __ shlptr(rdi);             // dst_pos << rcx (log2 elsize)
1805    assert(dst_array == to, "");
1806    __ addptr(to,  rdi);        // to   = dst_array + DST_POS << log2 elsize
1807    __ movptr(FROM, from);      // src_addr
1808    __ mov(rdi_elsize, rcx_lh); // log2 elsize
1809    __ movl2ptr(count, LENGTH); // elements count
1810
1811    BLOCK_COMMENT("choose copy loop based on element size");
1812    __ cmpl(rdi_elsize, 0);
1813
1814    __ jump_cc(Assembler::equal, RuntimeAddress(entry_jbyte_arraycopy));
1815    __ cmpl(rdi_elsize, LogBytesPerShort);
1816    __ jump_cc(Assembler::equal, RuntimeAddress(entry_jshort_arraycopy));
1817    __ cmpl(rdi_elsize, LogBytesPerInt);
1818    __ jump_cc(Assembler::equal, RuntimeAddress(entry_jint_arraycopy));
1819#ifdef ASSERT
1820    __ cmpl(rdi_elsize, LogBytesPerLong);
1821    __ jccb(Assembler::notEqual, L_failed);
1822#endif
1823    __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it.
1824    __ pop(rsi);
1825    __ jump(RuntimeAddress(entry_jlong_arraycopy));
1826
1827  __ BIND(L_failed);
1828    __ xorptr(rax, rax);
1829    __ notptr(rax); // return -1
1830    __ pop(rdi);
1831    __ pop(rsi);
1832    __ leave(); // required for proper stackwalking of RuntimeStub frame
1833    __ ret(0);
1834
1835    // objArrayKlass
1836  __ BIND(L_objArray);
1837    // live at this point:  rcx_src_klass, src[_pos], dst[_pos]
1838
1839    Label L_plain_copy, L_checkcast_copy;
1840    //  test array classes for subtyping
1841    __ cmpptr(rcx_src_klass, dst_klass_addr); // usual case is exact equality
1842    __ jccb(Assembler::notEqual, L_checkcast_copy);
1843
1844    // Identically typed arrays can be copied without element-wise checks.
1845    assert_different_registers(src, src_pos, dst, dst_pos, rcx_src_klass);
1846    arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
1847
1848  __ BIND(L_plain_copy);
1849    __ movl2ptr(count, LENGTH); // elements count
1850    __ movl2ptr(src_pos, SRC_POS);  // reload src_pos
1851    __ lea(from, Address(src, src_pos, Address::times_ptr,
1852                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
1853    __ movl2ptr(dst_pos, DST_POS);  // reload dst_pos
1854    __ lea(to,   Address(dst, dst_pos, Address::times_ptr,
1855                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
1856    __ movptr(FROM,  from);   // src_addr
1857    __ movptr(TO,    to);     // dst_addr
1858    __ movl(COUNT, count);  // count
1859    __ jump(RuntimeAddress(entry_oop_arraycopy));
1860
1861  __ BIND(L_checkcast_copy);
1862    // live at this point:  rcx_src_klass, dst[_pos], src[_pos]
1863    {
1864      // Handy offsets:
1865      int  ek_offset = (klassOopDesc::header_size() * HeapWordSize +
1866                        objArrayKlass::element_klass_offset_in_bytes());
1867      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
1868                        Klass::super_check_offset_offset_in_bytes());
1869
1870      Register rsi_dst_klass = rsi;
1871      Register rdi_temp      = rdi;
1872      assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos");
1873      assert(rdi_temp      == dst_pos, "expected alias w/ dst_pos");
1874      Address dst_klass_lh_addr(rsi_dst_klass, lh_offset);
1875
1876      // Before looking at dst.length, make sure dst is also an objArray.
1877      __ movptr(rsi_dst_klass, dst_klass_addr);
1878      __ cmpl(dst_klass_lh_addr, objArray_lh);
1879      __ jccb(Assembler::notEqual, L_failed);
1880
1881      // It is safe to examine both src.length and dst.length.
1882      __ movl2ptr(src_pos, SRC_POS);        // reload rsi
1883      arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
1884      // (Now src_pos and dst_pos are killed, but not src and dst.)
1885
1886      // We'll need this temp (don't forget to pop it after the type check).
1887      __ push(rbx);
1888      Register rbx_src_klass = rbx;
1889
1890      __ mov(rbx_src_klass, rcx_src_klass); // spill away from rcx
1891      __ movptr(rsi_dst_klass, dst_klass_addr);
1892      Address super_check_offset_addr(rsi_dst_klass, sco_offset);
1893      Label L_fail_array_check;
1894      generate_type_check(rbx_src_klass,
1895                          super_check_offset_addr, dst_klass_addr,
1896                          rdi_temp, NULL, &L_fail_array_check);
1897      // (On fall-through, we have passed the array type check.)
1898      __ pop(rbx);
1899      __ jmp(L_plain_copy);
1900
1901      __ BIND(L_fail_array_check);
1902      // Reshuffle arguments so we can call checkcast_arraycopy:
1903
1904      // match initial saves for checkcast_arraycopy
1905      // push(rsi);    // already done; see above
1906      // push(rdi);    // already done; see above
1907      // push(rbx);    // already done; see above
1908
1909      // Marshal outgoing arguments now, freeing registers.
1910      Address   from_arg(rsp, 16+ 4);   // from
1911      Address     to_arg(rsp, 16+ 8);   // to
1912      Address length_arg(rsp, 16+12);   // elements count
1913      Address  ckoff_arg(rsp, 16+16);   // super_check_offset
1914      Address  ckval_arg(rsp, 16+20);   // super_klass
1915
1916      Address SRC_POS_arg(rsp, 16+ 8);
1917      Address DST_POS_arg(rsp, 16+16);
1918      Address  LENGTH_arg(rsp, 16+20);
1919      // push rbx, changed the incoming offsets (why not just use rbp,??)
1920      // assert(SRC_POS_arg.disp() == SRC_POS.disp() + 4, "");
1921
1922      __ movptr(rbx, Address(rsi_dst_klass, ek_offset));
1923      __ movl2ptr(length, LENGTH_arg);    // reload elements count
1924      __ movl2ptr(src_pos, SRC_POS_arg);  // reload src_pos
1925      __ movl2ptr(dst_pos, DST_POS_arg);  // reload dst_pos
1926
1927      __ movptr(ckval_arg, rbx);          // destination element type
1928      __ movl(rbx, Address(rbx, sco_offset));
1929      __ movl(ckoff_arg, rbx);          // corresponding class check offset
1930
1931      __ movl(length_arg, length);      // outgoing length argument
1932
1933      __ lea(from, Address(src, src_pos, Address::times_ptr,
1934                            arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
1935      __ movptr(from_arg, from);
1936
1937      __ lea(to, Address(dst, dst_pos, Address::times_ptr,
1938                          arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
1939      __ movptr(to_arg, to);
1940      __ jump(RuntimeAddress(entry_checkcast_arraycopy));
1941    }
1942
1943    return start;
1944  }
1945
1946  void generate_arraycopy_stubs() {
1947    address entry;
1948    address entry_jbyte_arraycopy;
1949    address entry_jshort_arraycopy;
1950    address entry_jint_arraycopy;
1951    address entry_oop_arraycopy;
1952    address entry_jlong_arraycopy;
1953    address entry_checkcast_arraycopy;
1954
1955    StubRoutines::_arrayof_jbyte_disjoint_arraycopy =
1956        generate_disjoint_copy(T_BYTE,  true, Address::times_1, &entry,
1957                               "arrayof_jbyte_disjoint_arraycopy");
1958    StubRoutines::_arrayof_jbyte_arraycopy =
1959        generate_conjoint_copy(T_BYTE,  true, Address::times_1,  entry,
1960                               NULL, "arrayof_jbyte_arraycopy");
1961    StubRoutines::_jbyte_disjoint_arraycopy =
1962        generate_disjoint_copy(T_BYTE, false, Address::times_1, &entry,
1963                               "jbyte_disjoint_arraycopy");
1964    StubRoutines::_jbyte_arraycopy =
1965        generate_conjoint_copy(T_BYTE, false, Address::times_1,  entry,
1966                               &entry_jbyte_arraycopy, "jbyte_arraycopy");
1967
1968    StubRoutines::_arrayof_jshort_disjoint_arraycopy =
1969        generate_disjoint_copy(T_SHORT,  true, Address::times_2, &entry,
1970                               "arrayof_jshort_disjoint_arraycopy");
1971    StubRoutines::_arrayof_jshort_arraycopy =
1972        generate_conjoint_copy(T_SHORT,  true, Address::times_2,  entry,
1973                               NULL, "arrayof_jshort_arraycopy");
1974    StubRoutines::_jshort_disjoint_arraycopy =
1975        generate_disjoint_copy(T_SHORT, false, Address::times_2, &entry,
1976                               "jshort_disjoint_arraycopy");
1977    StubRoutines::_jshort_arraycopy =
1978        generate_conjoint_copy(T_SHORT, false, Address::times_2,  entry,
1979                               &entry_jshort_arraycopy, "jshort_arraycopy");
1980
1981    // Next arrays are always aligned on 4 bytes at least.
1982    StubRoutines::_jint_disjoint_arraycopy =
1983        generate_disjoint_copy(T_INT, true, Address::times_4, &entry,
1984                               "jint_disjoint_arraycopy");
1985    StubRoutines::_jint_arraycopy =
1986        generate_conjoint_copy(T_INT, true, Address::times_4,  entry,
1987                               &entry_jint_arraycopy, "jint_arraycopy");
1988
1989    StubRoutines::_oop_disjoint_arraycopy =
1990        generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry,
1991                               "oop_disjoint_arraycopy");
1992    StubRoutines::_oop_arraycopy =
1993        generate_conjoint_copy(T_OBJECT, true, Address::times_ptr,  entry,
1994                               &entry_oop_arraycopy, "oop_arraycopy");
1995
1996    StubRoutines::_jlong_disjoint_arraycopy =
1997        generate_disjoint_long_copy(&entry, "jlong_disjoint_arraycopy");
1998    StubRoutines::_jlong_arraycopy =
1999        generate_conjoint_long_copy(entry, &entry_jlong_arraycopy,
2000                                    "jlong_arraycopy");
2001
2002    StubRoutines::_arrayof_jint_disjoint_arraycopy  =
2003        StubRoutines::_jint_disjoint_arraycopy;
2004    StubRoutines::_arrayof_oop_disjoint_arraycopy   =
2005        StubRoutines::_oop_disjoint_arraycopy;
2006    StubRoutines::_arrayof_jlong_disjoint_arraycopy =
2007        StubRoutines::_jlong_disjoint_arraycopy;
2008
2009    StubRoutines::_arrayof_jint_arraycopy  = StubRoutines::_jint_arraycopy;
2010    StubRoutines::_arrayof_oop_arraycopy   = StubRoutines::_oop_arraycopy;
2011    StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
2012
2013    StubRoutines::_checkcast_arraycopy =
2014        generate_checkcast_copy("checkcast_arraycopy",
2015                                  &entry_checkcast_arraycopy);
2016
2017    StubRoutines::_unsafe_arraycopy =
2018        generate_unsafe_copy("unsafe_arraycopy",
2019                               entry_jbyte_arraycopy,
2020                               entry_jshort_arraycopy,
2021                               entry_jint_arraycopy,
2022                               entry_jlong_arraycopy);
2023
2024    StubRoutines::_generic_arraycopy =
2025        generate_generic_copy("generic_arraycopy",
2026                               entry_jbyte_arraycopy,
2027                               entry_jshort_arraycopy,
2028                               entry_jint_arraycopy,
2029                               entry_oop_arraycopy,
2030                               entry_jlong_arraycopy,
2031                               entry_checkcast_arraycopy);
2032  }
2033
2034 public:
2035  // Information about frame layout at time of blocking runtime call.
2036  // Note that we only have to preserve callee-saved registers since
2037  // the compilers are responsible for supplying a continuation point
2038  // if they expect all registers to be preserved.
2039  enum layout {
2040    thread_off,    // last_java_sp
2041    rbp_off,       // callee saved register
2042    ret_pc,
2043    framesize
2044  };
2045
2046 private:
2047
2048#undef  __
2049#define __ masm->
2050
2051  //------------------------------------------------------------------------------------------------------------------------
2052  // Continuation point for throwing of implicit exceptions that are not handled in
2053  // the current activation. Fabricates an exception oop and initiates normal
2054  // exception dispatching in this frame.
2055  //
2056  // Previously the compiler (c2) allowed for callee save registers on Java calls.
2057  // This is no longer true after adapter frames were removed but could possibly
2058  // be brought back in the future if the interpreter code was reworked and it
2059  // was deemed worthwhile. The comment below was left to describe what must
2060  // happen here if callee saves were resurrected. As it stands now this stub
2061  // could actually be a vanilla BufferBlob and have now oopMap at all.
2062  // Since it doesn't make much difference we've chosen to leave it the
2063  // way it was in the callee save days and keep the comment.
2064
2065  // If we need to preserve callee-saved values we need a callee-saved oop map and
2066  // therefore have to make these stubs into RuntimeStubs rather than BufferBlobs.
2067  // If the compiler needs all registers to be preserved between the fault
2068  // point and the exception handler then it must assume responsibility for that in
2069  // AbstractCompiler::continuation_for_implicit_null_exception or
2070  // continuation_for_implicit_division_by_zero_exception. All other implicit
2071  // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are
2072  // either at call sites or otherwise assume that stack unwinding will be initiated,
2073  // so caller saved registers were assumed volatile in the compiler.
2074  address generate_throw_exception(const char* name, address runtime_entry,
2075                                   bool restore_saved_exception_pc) {
2076
2077    int insts_size = 256;
2078    int locs_size  = 32;
2079
2080    CodeBuffer code(name, insts_size, locs_size);
2081    OopMapSet* oop_maps  = new OopMapSet();
2082    MacroAssembler* masm = new MacroAssembler(&code);
2083
2084    address start = __ pc();
2085
2086    // This is an inlined and slightly modified version of call_VM
2087    // which has the ability to fetch the return PC out of
2088    // thread-local storage and also sets up last_Java_sp slightly
2089    // differently than the real call_VM
2090    Register java_thread = rbx;
2091    __ get_thread(java_thread);
2092    if (restore_saved_exception_pc) {
2093      __ movptr(rax, Address(java_thread, in_bytes(JavaThread::saved_exception_pc_offset())));
2094      __ push(rax);
2095    }
2096
2097    __ enter(); // required for proper stackwalking of RuntimeStub frame
2098
2099    // pc and rbp, already pushed
2100    __ subptr(rsp, (framesize-2) * wordSize); // prolog
2101
2102    // Frame is now completed as far as size and linkage.
2103
2104    int frame_complete = __ pc() - start;
2105
2106    // push java thread (becomes first argument of C function)
2107    __ movptr(Address(rsp, thread_off * wordSize), java_thread);
2108
2109    // Set up last_Java_sp and last_Java_fp
2110    __ set_last_Java_frame(java_thread, rsp, rbp, NULL);
2111
2112    // Call runtime
2113    BLOCK_COMMENT("call runtime_entry");
2114    __ call(RuntimeAddress(runtime_entry));
2115    // Generate oop map
2116    OopMap* map =  new OopMap(framesize, 0);
2117    oop_maps->add_gc_map(__ pc() - start, map);
2118
2119    // restore the thread (cannot use the pushed argument since arguments
2120    // may be overwritten by C code generated by an optimizing compiler);
2121    // however can use the register value directly if it is callee saved.
2122    __ get_thread(java_thread);
2123
2124    __ reset_last_Java_frame(java_thread, true, false);
2125
2126    __ leave(); // required for proper stackwalking of RuntimeStub frame
2127
2128    // check for pending exceptions
2129#ifdef ASSERT
2130    Label L;
2131    __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
2132    __ jcc(Assembler::notEqual, L);
2133    __ should_not_reach_here();
2134    __ bind(L);
2135#endif /* ASSERT */
2136    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2137
2138
2139    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, framesize, oop_maps, false);
2140    return stub->entry_point();
2141  }
2142
2143
2144  void create_control_words() {
2145    // Round to nearest, 53-bit mode, exceptions masked
2146    StubRoutines::_fpu_cntrl_wrd_std   = 0x027F;
2147    // Round to zero, 53-bit mode, exception mased
2148    StubRoutines::_fpu_cntrl_wrd_trunc = 0x0D7F;
2149    // Round to nearest, 24-bit mode, exceptions masked
2150    StubRoutines::_fpu_cntrl_wrd_24    = 0x007F;
2151    // Round to nearest, 64-bit mode, exceptions masked
2152    StubRoutines::_fpu_cntrl_wrd_64    = 0x037F;
2153    // Round to nearest, 64-bit mode, exceptions masked
2154    StubRoutines::_mxcsr_std           = 0x1F80;
2155    // Note: the following two constants are 80-bit values
2156    //       layout is critical for correct loading by FPU.
2157    // Bias for strict fp multiply/divide
2158    StubRoutines::_fpu_subnormal_bias1[0]= 0x00000000; // 2^(-15360) == 0x03ff 8000 0000 0000 0000
2159    StubRoutines::_fpu_subnormal_bias1[1]= 0x80000000;
2160    StubRoutines::_fpu_subnormal_bias1[2]= 0x03ff;
2161    // Un-Bias for strict fp multiply/divide
2162    StubRoutines::_fpu_subnormal_bias2[0]= 0x00000000; // 2^(+15360) == 0x7bff 8000 0000 0000 0000
2163    StubRoutines::_fpu_subnormal_bias2[1]= 0x80000000;
2164    StubRoutines::_fpu_subnormal_bias2[2]= 0x7bff;
2165  }
2166
2167  //---------------------------------------------------------------------------
2168  // Initialization
2169
2170  void generate_initial() {
2171    // Generates all stubs and initializes the entry points
2172
2173    //------------------------------------------------------------------------------------------------------------------------
2174    // entry points that exist in all platforms
2175    // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
2176    //       the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
2177    StubRoutines::_forward_exception_entry      = generate_forward_exception();
2178
2179    StubRoutines::_call_stub_entry              =
2180      generate_call_stub(StubRoutines::_call_stub_return_address);
2181    // is referenced by megamorphic call
2182    StubRoutines::_catch_exception_entry        = generate_catch_exception();
2183
2184    // These are currently used by Solaris/Intel
2185    StubRoutines::_atomic_xchg_entry            = generate_atomic_xchg();
2186
2187    StubRoutines::_handler_for_unsafe_access_entry =
2188      generate_handler_for_unsafe_access();
2189
2190    // platform dependent
2191    create_control_words();
2192
2193    StubRoutines::x86::_verify_mxcsr_entry                 = generate_verify_mxcsr();
2194    StubRoutines::x86::_verify_fpu_cntrl_wrd_entry         = generate_verify_fpu_cntrl_wrd();
2195    StubRoutines::_d2i_wrapper                              = generate_d2i_wrapper(T_INT,
2196                                                                                   CAST_FROM_FN_PTR(address, SharedRuntime::d2i));
2197    StubRoutines::_d2l_wrapper                              = generate_d2i_wrapper(T_LONG,
2198                                                                                   CAST_FROM_FN_PTR(address, SharedRuntime::d2l));
2199  }
2200
2201
2202  void generate_all() {
2203    // Generates all stubs and initializes the entry points
2204
2205    // These entry points require SharedInfo::stack0 to be set up in non-core builds
2206    // and need to be relocatable, so they each fabricate a RuntimeStub internally.
2207    StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
2208    StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError),  false);
2209    StubRoutines::_throw_ArithmeticException_entry         = generate_throw_exception("ArithmeticException throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException),  true);
2210    StubRoutines::_throw_NullPointerException_entry        = generate_throw_exception("NullPointerException throw_exception",         CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true);
2211    StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
2212    StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),   false);
2213
2214    //------------------------------------------------------------------------------------------------------------------------
2215    // entry points that are platform specific
2216
2217    // support for verify_oop (must happen after universe_init)
2218    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
2219
2220    // arraycopy stubs used by compilers
2221    generate_arraycopy_stubs();
2222  }
2223
2224
2225 public:
2226  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
2227    if (all) {
2228      generate_all();
2229    } else {
2230      generate_initial();
2231    }
2232  }
2233}; // end class declaration
2234
2235
2236void StubGenerator_generate(CodeBuffer* code, bool all) {
2237  StubGenerator g(code, all);
2238}
2239