stubGenerator_sparc.cpp revision 3864:f34d701e952e
155714Skris/* 2280297Sjkim * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. 3280297Sjkim * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4280297Sjkim * 555714Skris * This code is free software; you can redistribute it and/or modify it 655714Skris * under the terms of the GNU General Public License version 2 only, as 755714Skris * published by the Free Software Foundation. 855714Skris * 955714Skris * This code is distributed in the hope that it will be useful, but WITHOUT 1055714Skris * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1155714Skris * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1255714Skris * version 2 for more details (a copy is included in the LICENSE file that 1355714Skris * accompanied this code). 14280297Sjkim * 1555714Skris * You should have received a copy of the GNU General Public License version 1655714Skris * 2 along with this work; if not, write to the Free Software Foundation, 1755714Skris * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1855714Skris * 1955714Skris * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 2055714Skris * or visit www.oracle.com if you need additional information or have any 2155714Skris * questions. 2255714Skris * 2355714Skris */ 2455714Skris 2555714Skris#include "precompiled.hpp" 2655714Skris#include "asm/assembler.hpp" 2755714Skris#include "assembler_sparc.inline.hpp" 2855714Skris#include "interpreter/interpreter.hpp" 2955714Skris#include "nativeInst_sparc.hpp" 3055714Skris#include "oops/instanceOop.hpp" 3155714Skris#include "oops/method.hpp" 3255714Skris#include "oops/objArrayKlass.hpp" 3355714Skris#include "oops/oop.inline.hpp" 3455714Skris#include "prims/methodHandles.hpp" 3555714Skris#include "runtime/frame.inline.hpp" 3655714Skris#include "runtime/handles.inline.hpp" 3755714Skris#include "runtime/sharedRuntime.hpp" 3855714Skris#include "runtime/stubCodeGenerator.hpp" 3955714Skris#include "runtime/stubRoutines.hpp" 4055714Skris#include "runtime/thread.inline.hpp" 4155714Skris#include "utilities/top.hpp" 4255714Skris#ifdef COMPILER2 4355714Skris#include "opto/runtime.hpp" 4455714Skris#endif 4555714Skris 4655714Skris// Declaration and definition of StubGenerator (no .hpp file). 4755714Skris// For a more detailed description of the stub routine structure 4855714Skris// see the comment in stubRoutines.hpp. 4955714Skris 5055714Skris#define __ _masm-> 5155714Skris 5255714Skris#ifdef PRODUCT 5355714Skris#define BLOCK_COMMENT(str) /* nothing */ 5455714Skris#else 5555714Skris#define BLOCK_COMMENT(str) __ block_comment(str) 5655714Skris#endif 5755714Skris 5855714Skris#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 5955714Skris 6055714Skris// Note: The register L7 is used as L7_thread_cache, and may not be used 6155714Skris// any other way within this module. 6255714Skris 6355714Skris 6455714Skrisstatic const Register& Lstub_temp = L2; 6555714Skris 6655714Skris// ------------------------------------------------------------------------------------------------------------------------- 6755714Skris// Stub Code definitions 6855714Skris 6955714Skrisstatic address handle_unsafe_access() { 7055714Skris JavaThread* thread = JavaThread::current(); 71280297Sjkim address pc = thread->saved_exception_pc(); 72280297Sjkim address npc = thread->saved_exception_npc(); 73280297Sjkim // pc is the instruction which we must emulate 74280297Sjkim // doing a no-op is fine: return garbage from the load 75280297Sjkim 76280297Sjkim // request an async exception 77280297Sjkim thread->set_pending_unsafe_access_error(); 78280297Sjkim 79280297Sjkim // return address of next instruction to execute 80280297Sjkim return npc; 81280297Sjkim} 82280297Sjkim 83280297Sjkimclass StubGenerator: public StubCodeGenerator { 84280297Sjkim private: 85280297Sjkim 86280297Sjkim#ifdef PRODUCT 87280297Sjkim#define inc_counter_np(a,b,c) (0) 88280297Sjkim#else 89280297Sjkim#define inc_counter_np(counter, t1, t2) \ 90280297Sjkim BLOCK_COMMENT("inc_counter " #counter); \ 91280297Sjkim __ inc_counter(&counter, t1, t2); 92280297Sjkim#endif 9355714Skris 94280297Sjkim //---------------------------------------------------------------------------------------------------- 95280297Sjkim // Call stubs are used to call Java from C 96280297Sjkim 97280297Sjkim address generate_call_stub(address& return_pc) { 98280297Sjkim StubCodeMark mark(this, "StubRoutines", "call_stub"); 99280297Sjkim address start = __ pc(); 100280297Sjkim 101280297Sjkim // Incoming arguments: 102280297Sjkim // 103280297Sjkim // o0 : call wrapper address 104280297Sjkim // o1 : result (address) 10555714Skris // o2 : result type 106280297Sjkim // o3 : method 107280297Sjkim // o4 : (interpreter) entry point 10855714Skris // o5 : parameters (address) 109280297Sjkim // [sp + 0x5c]: parameter size (in words) 110280297Sjkim // [sp + 0x60]: thread 111280297Sjkim // 112280297Sjkim // +---------------+ <--- sp + 0 113280297Sjkim // | | 11455714Skris // . reg save area . 115280297Sjkim // | | 116280297Sjkim // +---------------+ <--- sp + 0x40 117280297Sjkim // | | 118280297Sjkim // . extra 7 slots . 119280297Sjkim // | | 120280297Sjkim // +---------------+ <--- sp + 0x5c 121280297Sjkim // | param. size | 122280297Sjkim // +---------------+ <--- sp + 0x60 123280297Sjkim // | thread | 124280297Sjkim // +---------------+ 125280297Sjkim // | | 126280297Sjkim 127280297Sjkim // note: if the link argument position changes, adjust 128280297Sjkim // the code in frame::entry_frame_call_wrapper() 12955714Skris 130 const Argument link = Argument(0, false); // used only for GC 131 const Argument result = Argument(1, false); 132 const Argument result_type = Argument(2, false); 133 const Argument method = Argument(3, false); 134 const Argument entry_point = Argument(4, false); 135 const Argument parameters = Argument(5, false); 136 const Argument parameter_size = Argument(6, false); 137 const Argument thread = Argument(7, false); 138 139 // setup thread register 140 __ ld_ptr(thread.as_address(), G2_thread); 141 __ reinit_heapbase(); 142 143#ifdef ASSERT 144 // make sure we have no pending exceptions 145 { const Register t = G3_scratch; 146 Label L; 147 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t); 148 __ br_null_short(t, Assembler::pt, L); 149 __ stop("StubRoutines::call_stub: entered with pending exception"); 150 __ bind(L); 151 } 152#endif 153 154 // create activation frame & allocate space for parameters 155 { const Register t = G3_scratch; 156 __ ld_ptr(parameter_size.as_address(), t); // get parameter size (in words) 157 __ add(t, frame::memory_parameter_word_sp_offset, t); // add space for save area (in words) 158 __ round_to(t, WordsPerLong); // make sure it is multiple of 2 (in words) 159 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes 160 __ neg(t); // negate so it can be used with save 161 __ save(SP, t, SP); // setup new frame 162 } 163 164 // +---------------+ <--- sp + 0 165 // | | 166 // . reg save area . 167 // | | 168 // +---------------+ <--- sp + 0x40 169 // | | 170 // . extra 7 slots . 171 // | | 172 // +---------------+ <--- sp + 0x5c 173 // | empty slot | (only if parameter size is even) 174 // +---------------+ 175 // | | 176 // . parameters . 177 // | | 178 // +---------------+ <--- fp + 0 179 // | | 180 // . reg save area . 181 // | | 182 // +---------------+ <--- fp + 0x40 183 // | | 184 // . extra 7 slots . 185 // | | 186 // +---------------+ <--- fp + 0x5c 187 // | param. size | 188 // +---------------+ <--- fp + 0x60 189 // | thread | 190 // +---------------+ 191 // | | 192 193 // pass parameters if any 194 BLOCK_COMMENT("pass parameters if any"); 195 { const Register src = parameters.as_in().as_register(); 196 const Register dst = Lentry_args; 197 const Register tmp = G3_scratch; 198 const Register cnt = G4_scratch; 199 200 // test if any parameters & setup of Lentry_args 201 Label exit; 202 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter 203 __ add( FP, STACK_BIAS, dst ); 204 __ cmp_zero_and_br(Assembler::zero, cnt, exit); 205 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args 206 207 // copy parameters if any 208 Label loop; 209 __ BIND(loop); 210 // Store parameter value 211 __ ld_ptr(src, 0, tmp); 212 __ add(src, BytesPerWord, src); 213 __ st_ptr(tmp, dst, 0); 214 __ deccc(cnt); 215 __ br(Assembler::greater, false, Assembler::pt, loop); 216 __ delayed()->sub(dst, Interpreter::stackElementSize, dst); 217 218 // done 219 __ BIND(exit); 220 } 221 222 // setup parameters, method & call Java function 223#ifdef ASSERT 224 // layout_activation_impl checks it's notion of saved SP against 225 // this register, so if this changes update it as well. 226 const Register saved_SP = Lscratch; 227 __ mov(SP, saved_SP); // keep track of SP before call 228#endif 229 230 // setup parameters 231 const Register t = G3_scratch; 232 __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words) 233 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes 234 __ sub(FP, t, Gargs); // setup parameter pointer 235#ifdef _LP64 236 __ add( Gargs, STACK_BIAS, Gargs ); // Account for LP64 stack bias 237#endif 238 __ mov(SP, O5_savedSP); 239 240 241 // do the call 242 // 243 // the following register must be setup: 244 // 245 // G2_thread 246 // G5_method 247 // Gargs 248 BLOCK_COMMENT("call Java function"); 249 __ jmpl(entry_point.as_in().as_register(), G0, O7); 250 __ delayed()->mov(method.as_in().as_register(), G5_method); // setup method 251 252 BLOCK_COMMENT("call_stub_return_address:"); 253 return_pc = __ pc(); 254 255 // The callee, if it wasn't interpreted, can return with SP changed so 256 // we can no longer assert of change of SP. 257 258 // store result depending on type 259 // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE 260 // is treated as T_INT) 261 { const Register addr = result .as_in().as_register(); 262 const Register type = result_type.as_in().as_register(); 263 Label is_long, is_float, is_double, is_object, exit; 264 __ cmp(type, T_OBJECT); __ br(Assembler::equal, false, Assembler::pn, is_object); 265 __ delayed()->cmp(type, T_FLOAT); __ br(Assembler::equal, false, Assembler::pn, is_float); 266 __ delayed()->cmp(type, T_DOUBLE); __ br(Assembler::equal, false, Assembler::pn, is_double); 267 __ delayed()->cmp(type, T_LONG); __ br(Assembler::equal, false, Assembler::pn, is_long); 268 __ delayed()->nop(); 269 270 // store int result 271 __ st(O0, addr, G0); 272 273 __ BIND(exit); 274 __ ret(); 275 __ delayed()->restore(); 276 277 __ BIND(is_object); 278 __ ba(exit); 279 __ delayed()->st_ptr(O0, addr, G0); 280 281 __ BIND(is_float); 282 __ ba(exit); 283 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0); 284 285 __ BIND(is_double); 286 __ ba(exit); 287 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0); 288 289 __ BIND(is_long); 290#ifdef _LP64 291 __ ba(exit); 292 __ delayed()->st_long(O0, addr, G0); // store entire long 293#else 294#if defined(COMPILER2) 295 // All return values are where we want them, except for Longs. C2 returns 296 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1. 297 // Since the interpreter will return longs in G1 and O0/O1 in the 32bit 298 // build we simply always use G1. 299 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to 300 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node 301 // first which would move g1 -> O0/O1 and destroy the exception we were throwing. 302 303 __ ba(exit); 304 __ delayed()->stx(G1, addr, G0); // store entire long 305#else 306 __ st(O1, addr, BytesPerInt); 307 __ ba(exit); 308 __ delayed()->st(O0, addr, G0); 309#endif /* COMPILER2 */ 310#endif /* _LP64 */ 311 } 312 return start; 313 } 314 315 316 //---------------------------------------------------------------------------------------------------- 317 // Return point for a Java call if there's an exception thrown in Java code. 318 // The exception is caught and transformed into a pending exception stored in 319 // JavaThread that can be tested from within the VM. 320 // 321 // Oexception: exception oop 322 323 address generate_catch_exception() { 324 StubCodeMark mark(this, "StubRoutines", "catch_exception"); 325 326 address start = __ pc(); 327 // verify that thread corresponds 328 __ verify_thread(); 329 330 const Register& temp_reg = Gtemp; 331 Address pending_exception_addr (G2_thread, Thread::pending_exception_offset()); 332 Address exception_file_offset_addr(G2_thread, Thread::exception_file_offset ()); 333 Address exception_line_offset_addr(G2_thread, Thread::exception_line_offset ()); 334 335 // set pending exception 336 __ verify_oop(Oexception); 337 __ st_ptr(Oexception, pending_exception_addr); 338 __ set((intptr_t)__FILE__, temp_reg); 339 __ st_ptr(temp_reg, exception_file_offset_addr); 340 __ set((intptr_t)__LINE__, temp_reg); 341 __ st(temp_reg, exception_line_offset_addr); 342 343 // complete return to VM 344 assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before"); 345 346 AddressLiteral stub_ret(StubRoutines::_call_stub_return_address); 347 __ jump_to(stub_ret, temp_reg); 348 __ delayed()->nop(); 349 350 return start; 351 } 352 353 354 //---------------------------------------------------------------------------------------------------- 355 // Continuation point for runtime calls returning with a pending exception 356 // The pending exception check happened in the runtime or native call stub 357 // The pending exception in Thread is converted into a Java-level exception 358 // 359 // Contract with Java-level exception handler: O0 = exception 360 // O1 = throwing pc 361 362 address generate_forward_exception() { 363 StubCodeMark mark(this, "StubRoutines", "forward_exception"); 364 address start = __ pc(); 365 366 // Upon entry, O7 has the return address returning into Java 367 // (interpreted or compiled) code; i.e. the return address 368 // becomes the throwing pc. 369 370 const Register& handler_reg = Gtemp; 371 372 Address exception_addr(G2_thread, Thread::pending_exception_offset()); 373 374#ifdef ASSERT 375 // make sure that this code is only executed if there is a pending exception 376 { Label L; 377 __ ld_ptr(exception_addr, Gtemp); 378 __ br_notnull_short(Gtemp, Assembler::pt, L); 379 __ stop("StubRoutines::forward exception: no pending exception (1)"); 380 __ bind(L); 381 } 382#endif 383 384 // compute exception handler into handler_reg 385 __ get_thread(); 386 __ ld_ptr(exception_addr, Oexception); 387 __ verify_oop(Oexception); 388 __ save_frame(0); // compensates for compiler weakness 389 __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC 390 BLOCK_COMMENT("call exception_handler_for_return_address"); 391 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch); 392 __ mov(O0, handler_reg); 393 __ restore(); // compensates for compiler weakness 394 395 __ ld_ptr(exception_addr, Oexception); 396 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC 397 398#ifdef ASSERT 399 // make sure exception is set 400 { Label L; 401 __ br_notnull_short(Oexception, Assembler::pt, L); 402 __ stop("StubRoutines::forward exception: no pending exception (2)"); 403 __ bind(L); 404 } 405#endif 406 // jump to exception handler 407 __ jmp(handler_reg, 0); 408 // clear pending exception 409 __ delayed()->st_ptr(G0, exception_addr); 410 411 return start; 412 } 413 414 415 //------------------------------------------------------------------------------------------------------------------------ 416 // Continuation point for throwing of implicit exceptions that are not handled in 417 // the current activation. Fabricates an exception oop and initiates normal 418 // exception dispatching in this frame. Only callee-saved registers are preserved 419 // (through the normal register window / RegisterMap handling). 420 // If the compiler needs all registers to be preserved between the fault 421 // point and the exception handler then it must assume responsibility for that in 422 // AbstractCompiler::continuation_for_implicit_null_exception or 423 // continuation_for_implicit_division_by_zero_exception. All other implicit 424 // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are 425 // either at call sites or otherwise assume that stack unwinding will be initiated, 426 // so caller saved registers were assumed volatile in the compiler. 427 428 // Note that we generate only this stub into a RuntimeStub, because it needs to be 429 // properly traversed and ignored during GC, so we change the meaning of the "__" 430 // macro within this method. 431#undef __ 432#define __ masm-> 433 434 address generate_throw_exception(const char* name, address runtime_entry, 435 Register arg1 = noreg, Register arg2 = noreg) { 436#ifdef ASSERT 437 int insts_size = VerifyThread ? 1 * K : 600; 438#else 439 int insts_size = VerifyThread ? 1 * K : 256; 440#endif /* ASSERT */ 441 int locs_size = 32; 442 443 CodeBuffer code(name, insts_size, locs_size); 444 MacroAssembler* masm = new MacroAssembler(&code); 445 446 __ verify_thread(); 447 448 // This is an inlined and slightly modified version of call_VM 449 // which has the ability to fetch the return PC out of thread-local storage 450 __ assert_not_delayed(); 451 452 // Note that we always push a frame because on the SPARC 453 // architecture, for all of our implicit exception kinds at call 454 // sites, the implicit exception is taken before the callee frame 455 // is pushed. 456 __ save_frame(0); 457 458 int frame_complete = __ offset(); 459 460 // Note that we always have a runtime stub frame on the top of stack by this point 461 Register last_java_sp = SP; 462 // 64-bit last_java_sp is biased! 463 __ set_last_Java_frame(last_java_sp, G0); 464 if (VerifyThread) __ mov(G2_thread, O0); // about to be smashed; pass early 465 __ save_thread(noreg); 466 if (arg1 != noreg) { 467 assert(arg2 != O1, "clobbered"); 468 __ mov(arg1, O1); 469 } 470 if (arg2 != noreg) { 471 __ mov(arg2, O2); 472 } 473 // do the call 474 BLOCK_COMMENT("call runtime_entry"); 475 __ call(runtime_entry, relocInfo::runtime_call_type); 476 if (!VerifyThread) 477 __ delayed()->mov(G2_thread, O0); // pass thread as first argument 478 else 479 __ delayed()->nop(); // (thread already passed) 480 __ restore_thread(noreg); 481 __ reset_last_Java_frame(); 482 483 // check for pending exceptions. use Gtemp as scratch register. 484#ifdef ASSERT 485 Label L; 486 487 Address exception_addr(G2_thread, Thread::pending_exception_offset()); 488 Register scratch_reg = Gtemp; 489 __ ld_ptr(exception_addr, scratch_reg); 490 __ br_notnull_short(scratch_reg, Assembler::pt, L); 491 __ should_not_reach_here(); 492 __ bind(L); 493#endif // ASSERT 494 BLOCK_COMMENT("call forward_exception_entry"); 495 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); 496 // we use O7 linkage so that forward_exception_entry has the issuing PC 497 __ delayed()->restore(); 498 499 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false); 500 return stub->entry_point(); 501 } 502 503#undef __ 504#define __ _masm-> 505 506 507 // Generate a routine that sets all the registers so we 508 // can tell if the stop routine prints them correctly. 509 address generate_test_stop() { 510 StubCodeMark mark(this, "StubRoutines", "test_stop"); 511 address start = __ pc(); 512 513 int i; 514 515 __ save_frame(0); 516 517 static jfloat zero = 0.0, one = 1.0; 518 519 // put addr in L0, then load through L0 to F0 520 __ set((intptr_t)&zero, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F0); 521 __ set((intptr_t)&one, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F1); // 1.0 to F1 522 523 // use add to put 2..18 in F2..F18 524 for ( i = 2; i <= 18; ++i ) { 525 __ fadd( FloatRegisterImpl::S, F1, as_FloatRegister(i-1), as_FloatRegister(i)); 526 } 527 528 // Now put double 2 in F16, double 18 in F18 529 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F2, F16 ); 530 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F18, F18 ); 531 532 // use add to put 20..32 in F20..F32 533 for (i = 20; i < 32; i += 2) { 534 __ fadd( FloatRegisterImpl::D, F16, as_FloatRegister(i-2), as_FloatRegister(i)); 535 } 536 537 // put 0..7 in i's, 8..15 in l's, 16..23 in o's, 24..31 in g's 538 for ( i = 0; i < 8; ++i ) { 539 if (i < 6) { 540 __ set( i, as_iRegister(i)); 541 __ set(16 + i, as_oRegister(i)); 542 __ set(24 + i, as_gRegister(i)); 543 } 544 __ set( 8 + i, as_lRegister(i)); 545 } 546 547 __ stop("testing stop"); 548 549 550 __ ret(); 551 __ delayed()->restore(); 552 553 return start; 554 } 555 556 557 address generate_stop_subroutine() { 558 StubCodeMark mark(this, "StubRoutines", "stop_subroutine"); 559 address start = __ pc(); 560 561 __ stop_subroutine(); 562 563 return start; 564 } 565 566 address generate_flush_callers_register_windows() { 567 StubCodeMark mark(this, "StubRoutines", "flush_callers_register_windows"); 568 address start = __ pc(); 569 570 __ flush_windows(); 571 __ retl(false); 572 __ delayed()->add( FP, STACK_BIAS, O0 ); 573 // The returned value must be a stack pointer whose register save area 574 // is flushed, and will stay flushed while the caller executes. 575 576 return start; 577 } 578 579 // Helper functions for v8 atomic operations. 580 // 581 void get_v8_oop_lock_ptr(Register lock_ptr_reg, Register mark_oop_reg, Register scratch_reg) { 582 if (mark_oop_reg == noreg) { 583 address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(); 584 __ set((intptr_t)lock_ptr, lock_ptr_reg); 585 } else { 586 assert(scratch_reg != noreg, "just checking"); 587 address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache; 588 __ set((intptr_t)lock_ptr, lock_ptr_reg); 589 __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg); 590 __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg); 591 } 592 } 593 594 void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) { 595 596 get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg); 597 __ set(StubRoutines::Sparc::locked, lock_reg); 598 // Initialize yield counter 599 __ mov(G0,yield_reg); 600 601 __ BIND(retry); 602 __ cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dontyield); 603 604 // This code can only be called from inside the VM, this 605 // stub is only invoked from Atomic::add(). We do not 606 // want to use call_VM, because _last_java_sp and such 607 // must already be set. 608 // 609 // Save the regs and make space for a C call 610 __ save(SP, -96, SP); 611 __ save_all_globals_into_locals(); 612 BLOCK_COMMENT("call os::naked_sleep"); 613 __ call(CAST_FROM_FN_PTR(address, os::naked_sleep)); 614 __ delayed()->nop(); 615 __ restore_globals_from_locals(); 616 __ restore(); 617 // reset the counter 618 __ mov(G0,yield_reg); 619 620 __ BIND(dontyield); 621 622 // try to get lock 623 __ swap(lock_ptr_reg, 0, lock_reg); 624 625 // did we get the lock? 626 __ cmp(lock_reg, StubRoutines::Sparc::unlocked); 627 __ br(Assembler::notEqual, true, Assembler::pn, retry); 628 __ delayed()->add(yield_reg,1,yield_reg); 629 630 // yes, got lock. do the operation here. 631 } 632 633 void generate_v8_lock_epilogue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) { 634 __ st(lock_reg, lock_ptr_reg, 0); // unlock 635 } 636 637 // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest). 638 // 639 // Arguments : 640 // 641 // exchange_value: O0 642 // dest: O1 643 // 644 // Results: 645 // 646 // O0: the value previously stored in dest 647 // 648 address generate_atomic_xchg() { 649 StubCodeMark mark(this, "StubRoutines", "atomic_xchg"); 650 address start = __ pc(); 651 652 if (UseCASForSwap) { 653 // Use CAS instead of swap, just in case the MP hardware 654 // prefers to work with just one kind of synch. instruction. 655 Label retry; 656 __ BIND(retry); 657 __ mov(O0, O3); // scratch copy of exchange value 658 __ ld(O1, 0, O2); // observe the previous value 659 // try to replace O2 with O3 660 __ cas_under_lock(O1, O2, O3, 661 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false); 662 __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry); 663 664 __ retl(false); 665 __ delayed()->mov(O2, O0); // report previous value to caller 666 667 } else { 668 if (VM_Version::v9_instructions_work()) { 669 __ retl(false); 670 __ delayed()->swap(O1, 0, O0); 671 } else { 672 const Register& lock_reg = O2; 673 const Register& lock_ptr_reg = O3; 674 const Register& yield_reg = O4; 675 676 Label retry; 677 Label dontyield; 678 679 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); 680 // got the lock, do the swap 681 __ swap(O1, 0, O0); 682 683 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); 684 __ retl(false); 685 __ delayed()->nop(); 686 } 687 } 688 689 return start; 690 } 691 692 693 // Support for jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value) 694 // 695 // Arguments : 696 // 697 // exchange_value: O0 698 // dest: O1 699 // compare_value: O2 700 // 701 // Results: 702 // 703 // O0: the value previously stored in dest 704 // 705 // Overwrites (v8): O3,O4,O5 706 // 707 address generate_atomic_cmpxchg() { 708 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg"); 709 address start = __ pc(); 710 711 // cmpxchg(dest, compare_value, exchange_value) 712 __ cas_under_lock(O1, O2, O0, 713 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false); 714 __ retl(false); 715 __ delayed()->nop(); 716 717 return start; 718 } 719 720 // Support for jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong *dest, jlong compare_value) 721 // 722 // Arguments : 723 // 724 // exchange_value: O1:O0 725 // dest: O2 726 // compare_value: O4:O3 727 // 728 // Results: 729 // 730 // O1:O0: the value previously stored in dest 731 // 732 // This only works on V9, on V8 we don't generate any 733 // code and just return NULL. 734 // 735 // Overwrites: G1,G2,G3 736 // 737 address generate_atomic_cmpxchg_long() { 738 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long"); 739 address start = __ pc(); 740 741 if (!VM_Version::supports_cx8()) 742 return NULL;; 743 __ sllx(O0, 32, O0); 744 __ srl(O1, 0, O1); 745 __ or3(O0,O1,O0); // O0 holds 64-bit value from compare_value 746 __ sllx(O3, 32, O3); 747 __ srl(O4, 0, O4); 748 __ or3(O3,O4,O3); // O3 holds 64-bit value from exchange_value 749 __ casx(O2, O3, O0); 750 __ srl(O0, 0, O1); // unpacked return value in O1:O0 751 __ retl(false); 752 __ delayed()->srlx(O0, 32, O0); 753 754 return start; 755 } 756 757 758 // Support for jint Atomic::add(jint add_value, volatile jint* dest). 759 // 760 // Arguments : 761 // 762 // add_value: O0 (e.g., +1 or -1) 763 // dest: O1 764 // 765 // Results: 766 // 767 // O0: the new value stored in dest 768 // 769 // Overwrites (v9): O3 770 // Overwrites (v8): O3,O4,O5 771 // 772 address generate_atomic_add() { 773 StubCodeMark mark(this, "StubRoutines", "atomic_add"); 774 address start = __ pc(); 775 __ BIND(_atomic_add_stub); 776 777 if (VM_Version::v9_instructions_work()) { 778 Label(retry); 779 __ BIND(retry); 780 781 __ lduw(O1, 0, O2); 782 __ add(O0, O2, O3); 783 __ cas(O1, O2, O3); 784 __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry); 785 __ retl(false); 786 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3 787 } else { 788 const Register& lock_reg = O2; 789 const Register& lock_ptr_reg = O3; 790 const Register& value_reg = O4; 791 const Register& yield_reg = O5; 792 793 Label(retry); 794 Label(dontyield); 795 796 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); 797 // got lock, do the increment 798 __ ld(O1, 0, value_reg); 799 __ add(O0, value_reg, value_reg); 800 __ st(value_reg, O1, 0); 801 802 // %%% only for RMO and PSO 803 __ membar(Assembler::StoreStore); 804 805 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); 806 807 __ retl(false); 808 __ delayed()->mov(value_reg, O0); 809 } 810 811 return start; 812 } 813 Label _atomic_add_stub; // called from other stubs 814 815 816 //------------------------------------------------------------------------------------------------------------------------ 817 // The following routine generates a subroutine to throw an asynchronous 818 // UnknownError when an unsafe access gets a fault that could not be 819 // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.) 820 // 821 // Arguments : 822 // 823 // trapping PC: O7 824 // 825 // Results: 826 // posts an asynchronous exception, skips the trapping instruction 827 // 828 829 address generate_handler_for_unsafe_access() { 830 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); 831 address start = __ pc(); 832 833 const int preserve_register_words = (64 * 2); 834 Address preserve_addr(FP, (-preserve_register_words * wordSize) + STACK_BIAS); 835 836 Register Lthread = L7_thread_cache; 837 int i; 838 839 __ save_frame(0); 840 __ mov(G1, L1); 841 __ mov(G2, L2); 842 __ mov(G3, L3); 843 __ mov(G4, L4); 844 __ mov(G5, L5); 845 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) { 846 __ stf(FloatRegisterImpl::D, as_FloatRegister(i), preserve_addr, i * wordSize); 847 } 848 849 address entry_point = CAST_FROM_FN_PTR(address, handle_unsafe_access); 850 BLOCK_COMMENT("call handle_unsafe_access"); 851 __ call(entry_point, relocInfo::runtime_call_type); 852 __ delayed()->nop(); 853 854 __ mov(L1, G1); 855 __ mov(L2, G2); 856 __ mov(L3, G3); 857 __ mov(L4, G4); 858 __ mov(L5, G5); 859 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) { 860 __ ldf(FloatRegisterImpl::D, preserve_addr, as_FloatRegister(i), i * wordSize); 861 } 862 863 __ verify_thread(); 864 865 __ jmp(O0, 0); 866 __ delayed()->restore(); 867 868 return start; 869 } 870 871 872 // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super ); 873 // Arguments : 874 // 875 // ret : O0, returned 876 // icc/xcc: set as O0 (depending on wordSize) 877 // sub : O1, argument, not changed 878 // super: O2, argument, not changed 879 // raddr: O7, blown by call 880 address generate_partial_subtype_check() { 881 __ align(CodeEntryAlignment); 882 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); 883 address start = __ pc(); 884 Label miss; 885 886#if defined(COMPILER2) && !defined(_LP64) 887 // Do not use a 'save' because it blows the 64-bit O registers. 888 __ add(SP,-4*wordSize,SP); // Make space for 4 temps (stack must be 2 words aligned) 889 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize); 890 __ st_ptr(L1,SP,(frame::register_save_words+1)*wordSize); 891 __ st_ptr(L2,SP,(frame::register_save_words+2)*wordSize); 892 __ st_ptr(L3,SP,(frame::register_save_words+3)*wordSize); 893 Register Rret = O0; 894 Register Rsub = O1; 895 Register Rsuper = O2; 896#else 897 __ save_frame(0); 898 Register Rret = I0; 899 Register Rsub = I1; 900 Register Rsuper = I2; 901#endif 902 903 Register L0_ary_len = L0; 904 Register L1_ary_ptr = L1; 905 Register L2_super = L2; 906 Register L3_index = L3; 907 908 __ check_klass_subtype_slow_path(Rsub, Rsuper, 909 L0, L1, L2, L3, 910 NULL, &miss); 911 912 // Match falls through here. 913 __ addcc(G0,0,Rret); // set Z flags, Z result 914 915#if defined(COMPILER2) && !defined(_LP64) 916 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); 917 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); 918 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); 919 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3); 920 __ retl(); // Result in Rret is zero; flags set to Z 921 __ delayed()->add(SP,4*wordSize,SP); 922#else 923 __ ret(); // Result in Rret is zero; flags set to Z 924 __ delayed()->restore(); 925#endif 926 927 __ BIND(miss); 928 __ addcc(G0,1,Rret); // set NZ flags, NZ result 929 930#if defined(COMPILER2) && !defined(_LP64) 931 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); 932 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); 933 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); 934 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3); 935 __ retl(); // Result in Rret is != 0; flags set to NZ 936 __ delayed()->add(SP,4*wordSize,SP); 937#else 938 __ ret(); // Result in Rret is != 0; flags set to NZ 939 __ delayed()->restore(); 940#endif 941 942 return start; 943 } 944 945 946 // Called from MacroAssembler::verify_oop 947 // 948 address generate_verify_oop_subroutine() { 949 StubCodeMark mark(this, "StubRoutines", "verify_oop_stub"); 950 951 address start = __ pc(); 952 953 __ verify_oop_subroutine(); 954 955 return start; 956 } 957 958 959 // 960 // Verify that a register contains clean 32-bits positive value 961 // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax). 962 // 963 // Input: 964 // Rint - 32-bits value 965 // Rtmp - scratch 966 // 967 void assert_clean_int(Register Rint, Register Rtmp) { 968#if defined(ASSERT) && defined(_LP64) 969 __ signx(Rint, Rtmp); 970 __ cmp(Rint, Rtmp); 971 __ breakpoint_trap(Assembler::notEqual, Assembler::xcc); 972#endif 973 } 974 975 // 976 // Generate overlap test for array copy stubs 977 // 978 // Input: 979 // O0 - array1 980 // O1 - array2 981 // O2 - element count 982 // 983 // Kills temps: O3, O4 984 // 985 void array_overlap_test(address no_overlap_target, int log2_elem_size) { 986 assert(no_overlap_target != NULL, "must be generated"); 987 array_overlap_test(no_overlap_target, NULL, log2_elem_size); 988 } 989 void array_overlap_test(Label& L_no_overlap, int log2_elem_size) { 990 array_overlap_test(NULL, &L_no_overlap, log2_elem_size); 991 } 992 void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) { 993 const Register from = O0; 994 const Register to = O1; 995 const Register count = O2; 996 const Register to_from = O3; // to - from 997 const Register byte_count = O4; // count << log2_elem_size 998 999 __ subcc(to, from, to_from); 1000 __ sll_ptr(count, log2_elem_size, byte_count); 1001 if (NOLp == NULL) 1002 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, no_overlap_target); 1003 else 1004 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, (*NOLp)); 1005 __ delayed()->cmp(to_from, byte_count); 1006 if (NOLp == NULL) 1007 __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, no_overlap_target); 1008 else 1009 __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, (*NOLp)); 1010 __ delayed()->nop(); 1011 } 1012 1013 // 1014 // Generate pre-write barrier for array. 1015 // 1016 // Input: 1017 // addr - register containing starting address 1018 // count - register containing element count 1019 // tmp - scratch register 1020 // 1021 // The input registers are overwritten. 1022 // 1023 void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) { 1024 BarrierSet* bs = Universe::heap()->barrier_set(); 1025 switch (bs->kind()) { 1026 case BarrierSet::G1SATBCT: 1027 case BarrierSet::G1SATBCTLogging: 1028 // With G1, don't generate the call if we statically know that the target in uninitialized 1029 if (!dest_uninitialized) { 1030 __ save_frame(0); 1031 // Save the necessary global regs... will be used after. 1032 if (addr->is_global()) { 1033 __ mov(addr, L0); 1034 } 1035 if (count->is_global()) { 1036 __ mov(count, L1); 1037 } 1038 __ mov(addr->after_save(), O0); 1039 // Get the count into O1 1040 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)); 1041 __ delayed()->mov(count->after_save(), O1); 1042 if (addr->is_global()) { 1043 __ mov(L0, addr); 1044 } 1045 if (count->is_global()) { 1046 __ mov(L1, count); 1047 } 1048 __ restore(); 1049 } 1050 break; 1051 case BarrierSet::CardTableModRef: 1052 case BarrierSet::CardTableExtension: 1053 case BarrierSet::ModRef: 1054 break; 1055 default: 1056 ShouldNotReachHere(); 1057 } 1058 } 1059 // 1060 // Generate post-write barrier for array. 1061 // 1062 // Input: 1063 // addr - register containing starting address 1064 // count - register containing element count 1065 // tmp - scratch register 1066 // 1067 // The input registers are overwritten. 1068 // 1069 void gen_write_ref_array_post_barrier(Register addr, Register count, 1070 Register tmp) { 1071 BarrierSet* bs = Universe::heap()->barrier_set(); 1072 1073 switch (bs->kind()) { 1074 case BarrierSet::G1SATBCT: 1075 case BarrierSet::G1SATBCTLogging: 1076 { 1077 // Get some new fresh output registers. 1078 __ save_frame(0); 1079 __ mov(addr->after_save(), O0); 1080 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)); 1081 __ delayed()->mov(count->after_save(), O1); 1082 __ restore(); 1083 } 1084 break; 1085 case BarrierSet::CardTableModRef: 1086 case BarrierSet::CardTableExtension: 1087 { 1088 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 1089 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 1090 assert_different_registers(addr, count, tmp); 1091 1092 Label L_loop; 1093 1094 __ sll_ptr(count, LogBytesPerHeapOop, count); 1095 __ sub(count, BytesPerHeapOop, count); 1096 __ add(count, addr, count); 1097 // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.) 1098 __ srl_ptr(addr, CardTableModRefBS::card_shift, addr); 1099 __ srl_ptr(count, CardTableModRefBS::card_shift, count); 1100 __ sub(count, addr, count); 1101 AddressLiteral rs(ct->byte_map_base); 1102 __ set(rs, tmp); 1103 __ BIND(L_loop); 1104 __ stb(G0, tmp, addr); 1105 __ subcc(count, 1, count); 1106 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 1107 __ delayed()->add(addr, 1, addr); 1108 } 1109 break; 1110 case BarrierSet::ModRef: 1111 break; 1112 default: 1113 ShouldNotReachHere(); 1114 } 1115 } 1116 1117 // 1118 // Generate main code for disjoint arraycopy 1119 // 1120 typedef void (StubGenerator::*CopyLoopFunc)(Register from, Register to, Register count, int count_dec, 1121 Label& L_loop, bool use_prefetch, bool use_bis); 1122 1123 void disjoint_copy_core(Register from, Register to, Register count, int log2_elem_size, 1124 int iter_size, CopyLoopFunc copy_loop_func) { 1125 Label L_copy; 1126 1127 assert(log2_elem_size <= 3, "the following code should be changed"); 1128 int count_dec = 16>>log2_elem_size; 1129 1130 int prefetch_dist = MAX2(ArraycopySrcPrefetchDistance, ArraycopyDstPrefetchDistance); 1131 assert(prefetch_dist < 4096, "invalid value"); 1132 prefetch_dist = (prefetch_dist + (iter_size-1)) & (-iter_size); // round up to one iteration copy size 1133 int prefetch_count = (prefetch_dist >> log2_elem_size); // elements count 1134 1135 if (UseBlockCopy) { 1136 Label L_block_copy, L_block_copy_prefetch, L_skip_block_copy; 1137 1138 // 64 bytes tail + bytes copied in one loop iteration 1139 int tail_size = 64 + iter_size; 1140 int block_copy_count = (MAX2(tail_size, (int)BlockCopyLowLimit)) >> log2_elem_size; 1141 // Use BIS copy only for big arrays since it requires membar. 1142 __ set(block_copy_count, O4); 1143 __ cmp_and_br_short(count, O4, Assembler::lessUnsigned, Assembler::pt, L_skip_block_copy); 1144 // This code is for disjoint source and destination: 1145 // to <= from || to >= from+count 1146 // but BIS will stomp over 'from' if (to > from-tail_size && to <= from) 1147 __ sub(from, to, O4); 1148 __ srax(O4, 4, O4); // divide by 16 since following short branch have only 5 bits for imm. 1149 __ cmp_and_br_short(O4, (tail_size>>4), Assembler::lessEqualUnsigned, Assembler::pn, L_skip_block_copy); 1150 1151 __ wrasi(G0, Assembler::ASI_ST_BLKINIT_PRIMARY); 1152 // BIS should not be used to copy tail (64 bytes+iter_size) 1153 // to avoid zeroing of following values. 1154 __ sub(count, (tail_size>>log2_elem_size), count); // count is still positive >= 0 1155 1156 if (prefetch_count > 0) { // rounded up to one iteration count 1157 // Do prefetching only if copy size is bigger 1158 // than prefetch distance. 1159 __ set(prefetch_count, O4); 1160 __ cmp_and_brx_short(count, O4, Assembler::less, Assembler::pt, L_block_copy); 1161 __ sub(count, prefetch_count, count); 1162 1163 (this->*copy_loop_func)(from, to, count, count_dec, L_block_copy_prefetch, true, true); 1164 __ add(count, prefetch_count, count); // restore count 1165 1166 } // prefetch_count > 0 1167 1168 (this->*copy_loop_func)(from, to, count, count_dec, L_block_copy, false, true); 1169 __ add(count, (tail_size>>log2_elem_size), count); // restore count 1170 1171 __ wrasi(G0, Assembler::ASI_PRIMARY_NOFAULT); 1172 // BIS needs membar. 1173 __ membar(Assembler::StoreLoad); 1174 // Copy tail 1175 __ ba_short(L_copy); 1176 1177 __ BIND(L_skip_block_copy); 1178 } // UseBlockCopy 1179 1180 if (prefetch_count > 0) { // rounded up to one iteration count 1181 // Do prefetching only if copy size is bigger 1182 // than prefetch distance. 1183 __ set(prefetch_count, O4); 1184 __ cmp_and_brx_short(count, O4, Assembler::lessUnsigned, Assembler::pt, L_copy); 1185 __ sub(count, prefetch_count, count); 1186 1187 Label L_copy_prefetch; 1188 (this->*copy_loop_func)(from, to, count, count_dec, L_copy_prefetch, true, false); 1189 __ add(count, prefetch_count, count); // restore count 1190 1191 } // prefetch_count > 0 1192 1193 (this->*copy_loop_func)(from, to, count, count_dec, L_copy, false, false); 1194 } 1195 1196 1197 1198 // 1199 // Helper methods for copy_16_bytes_forward_with_shift() 1200 // 1201 void copy_16_bytes_shift_loop(Register from, Register to, Register count, int count_dec, 1202 Label& L_loop, bool use_prefetch, bool use_bis) { 1203 1204 const Register left_shift = G1; // left shift bit counter 1205 const Register right_shift = G5; // right shift bit counter 1206 1207 __ align(OptoLoopAlignment); 1208 __ BIND(L_loop); 1209 if (use_prefetch) { 1210 if (ArraycopySrcPrefetchDistance > 0) { 1211 __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads); 1212 } 1213 if (ArraycopyDstPrefetchDistance > 0) { 1214 __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads); 1215 } 1216 } 1217 __ ldx(from, 0, O4); 1218 __ ldx(from, 8, G4); 1219 __ inc(to, 16); 1220 __ inc(from, 16); 1221 __ deccc(count, count_dec); // Can we do next iteration after this one? 1222 __ srlx(O4, right_shift, G3); 1223 __ bset(G3, O3); 1224 __ sllx(O4, left_shift, O4); 1225 __ srlx(G4, right_shift, G3); 1226 __ bset(G3, O4); 1227 if (use_bis) { 1228 __ stxa(O3, to, -16); 1229 __ stxa(O4, to, -8); 1230 } else { 1231 __ stx(O3, to, -16); 1232 __ stx(O4, to, -8); 1233 } 1234 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 1235 __ delayed()->sllx(G4, left_shift, O3); 1236 } 1237 1238 // Copy big chunks forward with shift 1239 // 1240 // Inputs: 1241 // from - source arrays 1242 // to - destination array aligned to 8-bytes 1243 // count - elements count to copy >= the count equivalent to 16 bytes 1244 // count_dec - elements count's decrement equivalent to 16 bytes 1245 // L_copy_bytes - copy exit label 1246 // 1247 void copy_16_bytes_forward_with_shift(Register from, Register to, 1248 Register count, int log2_elem_size, Label& L_copy_bytes) { 1249 Label L_aligned_copy, L_copy_last_bytes; 1250 assert(log2_elem_size <= 3, "the following code should be changed"); 1251 int count_dec = 16>>log2_elem_size; 1252 1253 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy 1254 __ andcc(from, 7, G1); // misaligned bytes 1255 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 1256 __ delayed()->nop(); 1257 1258 const Register left_shift = G1; // left shift bit counter 1259 const Register right_shift = G5; // right shift bit counter 1260 1261 __ sll(G1, LogBitsPerByte, left_shift); 1262 __ mov(64, right_shift); 1263 __ sub(right_shift, left_shift, right_shift); 1264 1265 // 1266 // Load 2 aligned 8-bytes chunks and use one from previous iteration 1267 // to form 2 aligned 8-bytes chunks to store. 1268 // 1269 __ dec(count, count_dec); // Pre-decrement 'count' 1270 __ andn(from, 7, from); // Align address 1271 __ ldx(from, 0, O3); 1272 __ inc(from, 8); 1273 __ sllx(O3, left_shift, O3); 1274 1275 disjoint_copy_core(from, to, count, log2_elem_size, 16, copy_16_bytes_shift_loop); 1276 1277 __ inccc(count, count_dec>>1 ); // + 8 bytes 1278 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); 1279 __ delayed()->inc(count, count_dec>>1); // restore 'count' 1280 1281 // copy 8 bytes, part of them already loaded in O3 1282 __ ldx(from, 0, O4); 1283 __ inc(to, 8); 1284 __ inc(from, 8); 1285 __ srlx(O4, right_shift, G3); 1286 __ bset(O3, G3); 1287 __ stx(G3, to, -8); 1288 1289 __ BIND(L_copy_last_bytes); 1290 __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes 1291 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes); 1292 __ delayed()->sub(from, right_shift, from); // restore address 1293 1294 __ BIND(L_aligned_copy); 1295 } 1296 1297 // Copy big chunks backward with shift 1298 // 1299 // Inputs: 1300 // end_from - source arrays end address 1301 // end_to - destination array end address aligned to 8-bytes 1302 // count - elements count to copy >= the count equivalent to 16 bytes 1303 // count_dec - elements count's decrement equivalent to 16 bytes 1304 // L_aligned_copy - aligned copy exit label 1305 // L_copy_bytes - copy exit label 1306 // 1307 void copy_16_bytes_backward_with_shift(Register end_from, Register end_to, 1308 Register count, int count_dec, 1309 Label& L_aligned_copy, Label& L_copy_bytes) { 1310 Label L_loop, L_copy_last_bytes; 1311 1312 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy 1313 __ andcc(end_from, 7, G1); // misaligned bytes 1314 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 1315 __ delayed()->deccc(count, count_dec); // Pre-decrement 'count' 1316 1317 const Register left_shift = G1; // left shift bit counter 1318 const Register right_shift = G5; // right shift bit counter 1319 1320 __ sll(G1, LogBitsPerByte, left_shift); 1321 __ mov(64, right_shift); 1322 __ sub(right_shift, left_shift, right_shift); 1323 1324 // 1325 // Load 2 aligned 8-bytes chunks and use one from previous iteration 1326 // to form 2 aligned 8-bytes chunks to store. 1327 // 1328 __ andn(end_from, 7, end_from); // Align address 1329 __ ldx(end_from, 0, O3); 1330 __ align(OptoLoopAlignment); 1331 __ BIND(L_loop); 1332 __ ldx(end_from, -8, O4); 1333 __ deccc(count, count_dec); // Can we do next iteration after this one? 1334 __ ldx(end_from, -16, G4); 1335 __ dec(end_to, 16); 1336 __ dec(end_from, 16); 1337 __ srlx(O3, right_shift, O3); 1338 __ sllx(O4, left_shift, G3); 1339 __ bset(G3, O3); 1340 __ stx(O3, end_to, 8); 1341 __ srlx(O4, right_shift, O4); 1342 __ sllx(G4, left_shift, G3); 1343 __ bset(G3, O4); 1344 __ stx(O4, end_to, 0); 1345 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 1346 __ delayed()->mov(G4, O3); 1347 1348 __ inccc(count, count_dec>>1 ); // + 8 bytes 1349 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); 1350 __ delayed()->inc(count, count_dec>>1); // restore 'count' 1351 1352 // copy 8 bytes, part of them already loaded in O3 1353 __ ldx(end_from, -8, O4); 1354 __ dec(end_to, 8); 1355 __ dec(end_from, 8); 1356 __ srlx(O3, right_shift, O3); 1357 __ sllx(O4, left_shift, G3); 1358 __ bset(O3, G3); 1359 __ stx(G3, end_to, 0); 1360 1361 __ BIND(L_copy_last_bytes); 1362 __ srl(left_shift, LogBitsPerByte, left_shift); // misaligned bytes 1363 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes); 1364 __ delayed()->add(end_from, left_shift, end_from); // restore address 1365 } 1366 1367 // 1368 // Generate stub for disjoint byte copy. If "aligned" is true, the 1369 // "from" and "to" addresses are assumed to be heapword aligned. 1370 // 1371 // Arguments for generated stub: 1372 // from: O0 1373 // to: O1 1374 // count: O2 treated as signed 1375 // 1376 address generate_disjoint_byte_copy(bool aligned, address *entry, const char *name) { 1377 __ align(CodeEntryAlignment); 1378 StubCodeMark mark(this, "StubRoutines", name); 1379 address start = __ pc(); 1380 1381 Label L_skip_alignment, L_align; 1382 Label L_copy_byte, L_copy_byte_loop, L_exit; 1383 1384 const Register from = O0; // source array address 1385 const Register to = O1; // destination array address 1386 const Register count = O2; // elements count 1387 const Register offset = O5; // offset from start of arrays 1388 // O3, O4, G3, G4 are used as temp registers 1389 1390 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1391 1392 if (entry != NULL) { 1393 *entry = __ pc(); 1394 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1395 BLOCK_COMMENT("Entry:"); 1396 } 1397 1398 // for short arrays, just do single element copy 1399 __ cmp(count, 23); // 16 + 7 1400 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); 1401 __ delayed()->mov(G0, offset); 1402 1403 if (aligned) { 1404 // 'aligned' == true when it is known statically during compilation 1405 // of this arraycopy call site that both 'from' and 'to' addresses 1406 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 1407 // 1408 // Aligned arrays have 4 bytes alignment in 32-bits VM 1409 // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM 1410 // 1411#ifndef _LP64 1412 // copy a 4-bytes word if necessary to align 'to' to 8 bytes 1413 __ andcc(to, 7, G0); 1414 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment); 1415 __ delayed()->ld(from, 0, O3); 1416 __ inc(from, 4); 1417 __ inc(to, 4); 1418 __ dec(count, 4); 1419 __ st(O3, to, -4); 1420 __ BIND(L_skip_alignment); 1421#endif 1422 } else { 1423 // copy bytes to align 'to' on 8 byte boundary 1424 __ andcc(to, 7, G1); // misaligned bytes 1425 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1426 __ delayed()->neg(G1); 1427 __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment 1428 __ sub(count, G1, count); 1429 __ BIND(L_align); 1430 __ ldub(from, 0, O3); 1431 __ deccc(G1); 1432 __ inc(from); 1433 __ stb(O3, to, 0); 1434 __ br(Assembler::notZero, false, Assembler::pt, L_align); 1435 __ delayed()->inc(to); 1436 __ BIND(L_skip_alignment); 1437 } 1438#ifdef _LP64 1439 if (!aligned) 1440#endif 1441 { 1442 // Copy with shift 16 bytes per iteration if arrays do not have 1443 // the same alignment mod 8, otherwise fall through to the next 1444 // code for aligned copy. 1445 // The compare above (count >= 23) guarantes 'count' >= 16 bytes. 1446 // Also jump over aligned copy after the copy with shift completed. 1447 1448 copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte); 1449 } 1450 1451 // Both array are 8 bytes aligned, copy 16 bytes at a time 1452 __ and3(count, 7, G4); // Save count 1453 __ srl(count, 3, count); 1454 generate_disjoint_long_copy_core(aligned); 1455 __ mov(G4, count); // Restore count 1456 1457 // copy tailing bytes 1458 __ BIND(L_copy_byte); 1459 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); 1460 __ align(OptoLoopAlignment); 1461 __ BIND(L_copy_byte_loop); 1462 __ ldub(from, offset, O3); 1463 __ deccc(count); 1464 __ stb(O3, to, offset); 1465 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop); 1466 __ delayed()->inc(offset); 1467 1468 __ BIND(L_exit); 1469 // O3, O4 are used as temp registers 1470 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4); 1471 __ retl(); 1472 __ delayed()->mov(G0, O0); // return 0 1473 return start; 1474 } 1475 1476 // 1477 // Generate stub for conjoint byte copy. If "aligned" is true, the 1478 // "from" and "to" addresses are assumed to be heapword aligned. 1479 // 1480 // Arguments for generated stub: 1481 // from: O0 1482 // to: O1 1483 // count: O2 treated as signed 1484 // 1485 address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, 1486 address *entry, const char *name) { 1487 // Do reverse copy. 1488 1489 __ align(CodeEntryAlignment); 1490 StubCodeMark mark(this, "StubRoutines", name); 1491 address start = __ pc(); 1492 1493 Label L_skip_alignment, L_align, L_aligned_copy; 1494 Label L_copy_byte, L_copy_byte_loop, L_exit; 1495 1496 const Register from = O0; // source array address 1497 const Register to = O1; // destination array address 1498 const Register count = O2; // elements count 1499 const Register end_from = from; // source array end address 1500 const Register end_to = to; // destination array end address 1501 1502 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1503 1504 if (entry != NULL) { 1505 *entry = __ pc(); 1506 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1507 BLOCK_COMMENT("Entry:"); 1508 } 1509 1510 array_overlap_test(nooverlap_target, 0); 1511 1512 __ add(to, count, end_to); // offset after last copied element 1513 1514 // for short arrays, just do single element copy 1515 __ cmp(count, 23); // 16 + 7 1516 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); 1517 __ delayed()->add(from, count, end_from); 1518 1519 { 1520 // Align end of arrays since they could be not aligned even 1521 // when arrays itself are aligned. 1522 1523 // copy bytes to align 'end_to' on 8 byte boundary 1524 __ andcc(end_to, 7, G1); // misaligned bytes 1525 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1526 __ delayed()->nop(); 1527 __ sub(count, G1, count); 1528 __ BIND(L_align); 1529 __ dec(end_from); 1530 __ dec(end_to); 1531 __ ldub(end_from, 0, O3); 1532 __ deccc(G1); 1533 __ brx(Assembler::notZero, false, Assembler::pt, L_align); 1534 __ delayed()->stb(O3, end_to, 0); 1535 __ BIND(L_skip_alignment); 1536 } 1537#ifdef _LP64 1538 if (aligned) { 1539 // Both arrays are aligned to 8-bytes in 64-bits VM. 1540 // The 'count' is decremented in copy_16_bytes_backward_with_shift() 1541 // in unaligned case. 1542 __ dec(count, 16); 1543 } else 1544#endif 1545 { 1546 // Copy with shift 16 bytes per iteration if arrays do not have 1547 // the same alignment mod 8, otherwise jump to the next 1548 // code for aligned copy (and substracting 16 from 'count' before jump). 1549 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1550 // Also jump over aligned copy after the copy with shift completed. 1551 1552 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16, 1553 L_aligned_copy, L_copy_byte); 1554 } 1555 // copy 4 elements (16 bytes) at a time 1556 __ align(OptoLoopAlignment); 1557 __ BIND(L_aligned_copy); 1558 __ dec(end_from, 16); 1559 __ ldx(end_from, 8, O3); 1560 __ ldx(end_from, 0, O4); 1561 __ dec(end_to, 16); 1562 __ deccc(count, 16); 1563 __ stx(O3, end_to, 8); 1564 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); 1565 __ delayed()->stx(O4, end_to, 0); 1566 __ inc(count, 16); 1567 1568 // copy 1 element (2 bytes) at a time 1569 __ BIND(L_copy_byte); 1570 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); 1571 __ align(OptoLoopAlignment); 1572 __ BIND(L_copy_byte_loop); 1573 __ dec(end_from); 1574 __ dec(end_to); 1575 __ ldub(end_from, 0, O4); 1576 __ deccc(count); 1577 __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop); 1578 __ delayed()->stb(O4, end_to, 0); 1579 1580 __ BIND(L_exit); 1581 // O3, O4 are used as temp registers 1582 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4); 1583 __ retl(); 1584 __ delayed()->mov(G0, O0); // return 0 1585 return start; 1586 } 1587 1588 // 1589 // Generate stub for disjoint short copy. If "aligned" is true, the 1590 // "from" and "to" addresses are assumed to be heapword aligned. 1591 // 1592 // Arguments for generated stub: 1593 // from: O0 1594 // to: O1 1595 // count: O2 treated as signed 1596 // 1597 address generate_disjoint_short_copy(bool aligned, address *entry, const char * name) { 1598 __ align(CodeEntryAlignment); 1599 StubCodeMark mark(this, "StubRoutines", name); 1600 address start = __ pc(); 1601 1602 Label L_skip_alignment, L_skip_alignment2; 1603 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit; 1604 1605 const Register from = O0; // source array address 1606 const Register to = O1; // destination array address 1607 const Register count = O2; // elements count 1608 const Register offset = O5; // offset from start of arrays 1609 // O3, O4, G3, G4 are used as temp registers 1610 1611 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1612 1613 if (entry != NULL) { 1614 *entry = __ pc(); 1615 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1616 BLOCK_COMMENT("Entry:"); 1617 } 1618 1619 // for short arrays, just do single element copy 1620 __ cmp(count, 11); // 8 + 3 (22 bytes) 1621 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); 1622 __ delayed()->mov(G0, offset); 1623 1624 if (aligned) { 1625 // 'aligned' == true when it is known statically during compilation 1626 // of this arraycopy call site that both 'from' and 'to' addresses 1627 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 1628 // 1629 // Aligned arrays have 4 bytes alignment in 32-bits VM 1630 // and 8 bytes - in 64-bits VM. 1631 // 1632#ifndef _LP64 1633 // copy a 2-elements word if necessary to align 'to' to 8 bytes 1634 __ andcc(to, 7, G0); 1635 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1636 __ delayed()->ld(from, 0, O3); 1637 __ inc(from, 4); 1638 __ inc(to, 4); 1639 __ dec(count, 2); 1640 __ st(O3, to, -4); 1641 __ BIND(L_skip_alignment); 1642#endif 1643 } else { 1644 // copy 1 element if necessary to align 'to' on an 4 bytes 1645 __ andcc(to, 3, G0); 1646 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1647 __ delayed()->lduh(from, 0, O3); 1648 __ inc(from, 2); 1649 __ inc(to, 2); 1650 __ dec(count); 1651 __ sth(O3, to, -2); 1652 __ BIND(L_skip_alignment); 1653 1654 // copy 2 elements to align 'to' on an 8 byte boundary 1655 __ andcc(to, 7, G0); 1656 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); 1657 __ delayed()->lduh(from, 0, O3); 1658 __ dec(count, 2); 1659 __ lduh(from, 2, O4); 1660 __ inc(from, 4); 1661 __ inc(to, 4); 1662 __ sth(O3, to, -4); 1663 __ sth(O4, to, -2); 1664 __ BIND(L_skip_alignment2); 1665 } 1666#ifdef _LP64 1667 if (!aligned) 1668#endif 1669 { 1670 // Copy with shift 16 bytes per iteration if arrays do not have 1671 // the same alignment mod 8, otherwise fall through to the next 1672 // code for aligned copy. 1673 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1674 // Also jump over aligned copy after the copy with shift completed. 1675 1676 copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes); 1677 } 1678 1679 // Both array are 8 bytes aligned, copy 16 bytes at a time 1680 __ and3(count, 3, G4); // Save 1681 __ srl(count, 2, count); 1682 generate_disjoint_long_copy_core(aligned); 1683 __ mov(G4, count); // restore 1684 1685 // copy 1 element at a time 1686 __ BIND(L_copy_2_bytes); 1687 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); 1688 __ align(OptoLoopAlignment); 1689 __ BIND(L_copy_2_bytes_loop); 1690 __ lduh(from, offset, O3); 1691 __ deccc(count); 1692 __ sth(O3, to, offset); 1693 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop); 1694 __ delayed()->inc(offset, 2); 1695 1696 __ BIND(L_exit); 1697 // O3, O4 are used as temp registers 1698 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); 1699 __ retl(); 1700 __ delayed()->mov(G0, O0); // return 0 1701 return start; 1702 } 1703 1704 // 1705 // Generate stub for disjoint short fill. If "aligned" is true, the 1706 // "to" address is assumed to be heapword aligned. 1707 // 1708 // Arguments for generated stub: 1709 // to: O0 1710 // value: O1 1711 // count: O2 treated as signed 1712 // 1713 address generate_fill(BasicType t, bool aligned, const char* name) { 1714 __ align(CodeEntryAlignment); 1715 StubCodeMark mark(this, "StubRoutines", name); 1716 address start = __ pc(); 1717 1718 const Register to = O0; // source array address 1719 const Register value = O1; // fill value 1720 const Register count = O2; // elements count 1721 // O3 is used as a temp register 1722 1723 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1724 1725 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 1726 Label L_fill_2_bytes, L_fill_elements, L_fill_32_bytes; 1727 1728 int shift = -1; 1729 switch (t) { 1730 case T_BYTE: 1731 shift = 2; 1732 break; 1733 case T_SHORT: 1734 shift = 1; 1735 break; 1736 case T_INT: 1737 shift = 0; 1738 break; 1739 default: ShouldNotReachHere(); 1740 } 1741 1742 BLOCK_COMMENT("Entry:"); 1743 1744 if (t == T_BYTE) { 1745 // Zero extend value 1746 __ and3(value, 0xff, value); 1747 __ sllx(value, 8, O3); 1748 __ or3(value, O3, value); 1749 } 1750 if (t == T_SHORT) { 1751 // Zero extend value 1752 __ sllx(value, 48, value); 1753 __ srlx(value, 48, value); 1754 } 1755 if (t == T_BYTE || t == T_SHORT) { 1756 __ sllx(value, 16, O3); 1757 __ or3(value, O3, value); 1758 } 1759 1760 __ cmp(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 1761 __ brx(Assembler::lessUnsigned, false, Assembler::pn, L_fill_elements); // use unsigned cmp 1762 __ delayed()->andcc(count, 1, G0); 1763 1764 if (!aligned && (t == T_BYTE || t == T_SHORT)) { 1765 // align source address at 4 bytes address boundary 1766 if (t == T_BYTE) { 1767 // One byte misalignment happens only for byte arrays 1768 __ andcc(to, 1, G0); 1769 __ br(Assembler::zero, false, Assembler::pt, L_skip_align1); 1770 __ delayed()->nop(); 1771 __ stb(value, to, 0); 1772 __ inc(to, 1); 1773 __ dec(count, 1); 1774 __ BIND(L_skip_align1); 1775 } 1776 // Two bytes misalignment happens only for byte and short (char) arrays 1777 __ andcc(to, 2, G0); 1778 __ br(Assembler::zero, false, Assembler::pt, L_skip_align2); 1779 __ delayed()->nop(); 1780 __ sth(value, to, 0); 1781 __ inc(to, 2); 1782 __ dec(count, 1 << (shift - 1)); 1783 __ BIND(L_skip_align2); 1784 } 1785#ifdef _LP64 1786 if (!aligned) { 1787#endif 1788 // align to 8 bytes, we know we are 4 byte aligned to start 1789 __ andcc(to, 7, G0); 1790 __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes); 1791 __ delayed()->nop(); 1792 __ stw(value, to, 0); 1793 __ inc(to, 4); 1794 __ dec(count, 1 << shift); 1795 __ BIND(L_fill_32_bytes); 1796#ifdef _LP64 1797 } 1798#endif 1799 1800 if (t == T_INT) { 1801 // Zero extend value 1802 __ srl(value, 0, value); 1803 } 1804 if (t == T_BYTE || t == T_SHORT || t == T_INT) { 1805 __ sllx(value, 32, O3); 1806 __ or3(value, O3, value); 1807 } 1808 1809 Label L_check_fill_8_bytes; 1810 // Fill 32-byte chunks 1811 __ subcc(count, 8 << shift, count); 1812 __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes); 1813 __ delayed()->nop(); 1814 1815 Label L_fill_32_bytes_loop, L_fill_4_bytes; 1816 __ align(16); 1817 __ BIND(L_fill_32_bytes_loop); 1818 1819 __ stx(value, to, 0); 1820 __ stx(value, to, 8); 1821 __ stx(value, to, 16); 1822 __ stx(value, to, 24); 1823 1824 __ subcc(count, 8 << shift, count); 1825 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop); 1826 __ delayed()->add(to, 32, to); 1827 1828 __ BIND(L_check_fill_8_bytes); 1829 __ addcc(count, 8 << shift, count); 1830 __ brx(Assembler::zero, false, Assembler::pn, L_exit); 1831 __ delayed()->subcc(count, 1 << (shift + 1), count); 1832 __ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes); 1833 __ delayed()->andcc(count, 1<<shift, G0); 1834 1835 // 1836 // length is too short, just fill 8 bytes at a time 1837 // 1838 Label L_fill_8_bytes_loop; 1839 __ BIND(L_fill_8_bytes_loop); 1840 __ stx(value, to, 0); 1841 __ subcc(count, 1 << (shift + 1), count); 1842 __ brx(Assembler::greaterEqual, false, Assembler::pn, L_fill_8_bytes_loop); 1843 __ delayed()->add(to, 8, to); 1844 1845 // fill trailing 4 bytes 1846 __ andcc(count, 1<<shift, G0); // in delay slot of branches 1847 if (t == T_INT) { 1848 __ BIND(L_fill_elements); 1849 } 1850 __ BIND(L_fill_4_bytes); 1851 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2_bytes); 1852 if (t == T_BYTE || t == T_SHORT) { 1853 __ delayed()->andcc(count, 1<<(shift-1), G0); 1854 } else { 1855 __ delayed()->nop(); 1856 } 1857 __ stw(value, to, 0); 1858 if (t == T_BYTE || t == T_SHORT) { 1859 __ inc(to, 4); 1860 // fill trailing 2 bytes 1861 __ andcc(count, 1<<(shift-1), G0); // in delay slot of branches 1862 __ BIND(L_fill_2_bytes); 1863 __ brx(Assembler::zero, false, Assembler::pt, L_fill_byte); 1864 __ delayed()->andcc(count, 1, count); 1865 __ sth(value, to, 0); 1866 if (t == T_BYTE) { 1867 __ inc(to, 2); 1868 // fill trailing byte 1869 __ andcc(count, 1, count); // in delay slot of branches 1870 __ BIND(L_fill_byte); 1871 __ brx(Assembler::zero, false, Assembler::pt, L_exit); 1872 __ delayed()->nop(); 1873 __ stb(value, to, 0); 1874 } else { 1875 __ BIND(L_fill_byte); 1876 } 1877 } else { 1878 __ BIND(L_fill_2_bytes); 1879 } 1880 __ BIND(L_exit); 1881 __ retl(); 1882 __ delayed()->nop(); 1883 1884 // Handle copies less than 8 bytes. Int is handled elsewhere. 1885 if (t == T_BYTE) { 1886 __ BIND(L_fill_elements); 1887 Label L_fill_2, L_fill_4; 1888 // in delay slot __ andcc(count, 1, G0); 1889 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2); 1890 __ delayed()->andcc(count, 2, G0); 1891 __ stb(value, to, 0); 1892 __ inc(to, 1); 1893 __ BIND(L_fill_2); 1894 __ brx(Assembler::zero, false, Assembler::pt, L_fill_4); 1895 __ delayed()->andcc(count, 4, G0); 1896 __ stb(value, to, 0); 1897 __ stb(value, to, 1); 1898 __ inc(to, 2); 1899 __ BIND(L_fill_4); 1900 __ brx(Assembler::zero, false, Assembler::pt, L_exit); 1901 __ delayed()->nop(); 1902 __ stb(value, to, 0); 1903 __ stb(value, to, 1); 1904 __ stb(value, to, 2); 1905 __ retl(); 1906 __ delayed()->stb(value, to, 3); 1907 } 1908 1909 if (t == T_SHORT) { 1910 Label L_fill_2; 1911 __ BIND(L_fill_elements); 1912 // in delay slot __ andcc(count, 1, G0); 1913 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2); 1914 __ delayed()->andcc(count, 2, G0); 1915 __ sth(value, to, 0); 1916 __ inc(to, 2); 1917 __ BIND(L_fill_2); 1918 __ brx(Assembler::zero, false, Assembler::pt, L_exit); 1919 __ delayed()->nop(); 1920 __ sth(value, to, 0); 1921 __ retl(); 1922 __ delayed()->sth(value, to, 2); 1923 } 1924 return start; 1925 } 1926 1927 // 1928 // Generate stub for conjoint short copy. If "aligned" is true, the 1929 // "from" and "to" addresses are assumed to be heapword aligned. 1930 // 1931 // Arguments for generated stub: 1932 // from: O0 1933 // to: O1 1934 // count: O2 treated as signed 1935 // 1936 address generate_conjoint_short_copy(bool aligned, address nooverlap_target, 1937 address *entry, const char *name) { 1938 // Do reverse copy. 1939 1940 __ align(CodeEntryAlignment); 1941 StubCodeMark mark(this, "StubRoutines", name); 1942 address start = __ pc(); 1943 1944 Label L_skip_alignment, L_skip_alignment2, L_aligned_copy; 1945 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit; 1946 1947 const Register from = O0; // source array address 1948 const Register to = O1; // destination array address 1949 const Register count = O2; // elements count 1950 const Register end_from = from; // source array end address 1951 const Register end_to = to; // destination array end address 1952 1953 const Register byte_count = O3; // bytes count to copy 1954 1955 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1956 1957 if (entry != NULL) { 1958 *entry = __ pc(); 1959 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1960 BLOCK_COMMENT("Entry:"); 1961 } 1962 1963 array_overlap_test(nooverlap_target, 1); 1964 1965 __ sllx(count, LogBytesPerShort, byte_count); 1966 __ add(to, byte_count, end_to); // offset after last copied element 1967 1968 // for short arrays, just do single element copy 1969 __ cmp(count, 11); // 8 + 3 (22 bytes) 1970 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); 1971 __ delayed()->add(from, byte_count, end_from); 1972 1973 { 1974 // Align end of arrays since they could be not aligned even 1975 // when arrays itself are aligned. 1976 1977 // copy 1 element if necessary to align 'end_to' on an 4 bytes 1978 __ andcc(end_to, 3, G0); 1979 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1980 __ delayed()->lduh(end_from, -2, O3); 1981 __ dec(end_from, 2); 1982 __ dec(end_to, 2); 1983 __ dec(count); 1984 __ sth(O3, end_to, 0); 1985 __ BIND(L_skip_alignment); 1986 1987 // copy 2 elements to align 'end_to' on an 8 byte boundary 1988 __ andcc(end_to, 7, G0); 1989 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); 1990 __ delayed()->lduh(end_from, -2, O3); 1991 __ dec(count, 2); 1992 __ lduh(end_from, -4, O4); 1993 __ dec(end_from, 4); 1994 __ dec(end_to, 4); 1995 __ sth(O3, end_to, 2); 1996 __ sth(O4, end_to, 0); 1997 __ BIND(L_skip_alignment2); 1998 } 1999#ifdef _LP64 2000 if (aligned) { 2001 // Both arrays are aligned to 8-bytes in 64-bits VM. 2002 // The 'count' is decremented in copy_16_bytes_backward_with_shift() 2003 // in unaligned case. 2004 __ dec(count, 8); 2005 } else 2006#endif 2007 { 2008 // Copy with shift 16 bytes per iteration if arrays do not have 2009 // the same alignment mod 8, otherwise jump to the next 2010 // code for aligned copy (and substracting 8 from 'count' before jump). 2011 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 2012 // Also jump over aligned copy after the copy with shift completed. 2013 2014 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8, 2015 L_aligned_copy, L_copy_2_bytes); 2016 } 2017 // copy 4 elements (16 bytes) at a time 2018 __ align(OptoLoopAlignment); 2019 __ BIND(L_aligned_copy); 2020 __ dec(end_from, 16); 2021 __ ldx(end_from, 8, O3); 2022 __ ldx(end_from, 0, O4); 2023 __ dec(end_to, 16); 2024 __ deccc(count, 8); 2025 __ stx(O3, end_to, 8); 2026 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); 2027 __ delayed()->stx(O4, end_to, 0); 2028 __ inc(count, 8); 2029 2030 // copy 1 element (2 bytes) at a time 2031 __ BIND(L_copy_2_bytes); 2032 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); 2033 __ BIND(L_copy_2_bytes_loop); 2034 __ dec(end_from, 2); 2035 __ dec(end_to, 2); 2036 __ lduh(end_from, 0, O4); 2037 __ deccc(count); 2038 __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop); 2039 __ delayed()->sth(O4, end_to, 0); 2040 2041 __ BIND(L_exit); 2042 // O3, O4 are used as temp registers 2043 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); 2044 __ retl(); 2045 __ delayed()->mov(G0, O0); // return 0 2046 return start; 2047 } 2048 2049 // 2050 // Helper methods for generate_disjoint_int_copy_core() 2051 // 2052 void copy_16_bytes_loop(Register from, Register to, Register count, int count_dec, 2053 Label& L_loop, bool use_prefetch, bool use_bis) { 2054 2055 __ align(OptoLoopAlignment); 2056 __ BIND(L_loop); 2057 if (use_prefetch) { 2058 if (ArraycopySrcPrefetchDistance > 0) { 2059 __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads); 2060 } 2061 if (ArraycopyDstPrefetchDistance > 0) { 2062 __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads); 2063 } 2064 } 2065 __ ldx(from, 4, O4); 2066 __ ldx(from, 12, G4); 2067 __ inc(to, 16); 2068 __ inc(from, 16); 2069 __ deccc(count, 4); // Can we do next iteration after this one? 2070 2071 __ srlx(O4, 32, G3); 2072 __ bset(G3, O3); 2073 __ sllx(O4, 32, O4); 2074 __ srlx(G4, 32, G3); 2075 __ bset(G3, O4); 2076 if (use_bis) { 2077 __ stxa(O3, to, -16); 2078 __ stxa(O4, to, -8); 2079 } else { 2080 __ stx(O3, to, -16); 2081 __ stx(O4, to, -8); 2082 } 2083 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 2084 __ delayed()->sllx(G4, 32, O3); 2085 2086 } 2087 2088 // 2089 // Generate core code for disjoint int copy (and oop copy on 32-bit). 2090 // If "aligned" is true, the "from" and "to" addresses are assumed 2091 // to be heapword aligned. 2092 // 2093 // Arguments: 2094 // from: O0 2095 // to: O1 2096 // count: O2 treated as signed 2097 // 2098 void generate_disjoint_int_copy_core(bool aligned) { 2099 2100 Label L_skip_alignment, L_aligned_copy; 2101 Label L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; 2102 2103 const Register from = O0; // source array address 2104 const Register to = O1; // destination array address 2105 const Register count = O2; // elements count 2106 const Register offset = O5; // offset from start of arrays 2107 // O3, O4, G3, G4 are used as temp registers 2108 2109 // 'aligned' == true when it is known statically during compilation 2110 // of this arraycopy call site that both 'from' and 'to' addresses 2111 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 2112 // 2113 // Aligned arrays have 4 bytes alignment in 32-bits VM 2114 // and 8 bytes - in 64-bits VM. 2115 // 2116#ifdef _LP64 2117 if (!aligned) 2118#endif 2119 { 2120 // The next check could be put under 'ifndef' since the code in 2121 // generate_disjoint_long_copy_core() has own checks and set 'offset'. 2122 2123 // for short arrays, just do single element copy 2124 __ cmp(count, 5); // 4 + 1 (20 bytes) 2125 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes); 2126 __ delayed()->mov(G0, offset); 2127 2128 // copy 1 element to align 'to' on an 8 byte boundary 2129 __ andcc(to, 7, G0); 2130 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 2131 __ delayed()->ld(from, 0, O3); 2132 __ inc(from, 4); 2133 __ inc(to, 4); 2134 __ dec(count); 2135 __ st(O3, to, -4); 2136 __ BIND(L_skip_alignment); 2137 2138 // if arrays have same alignment mod 8, do 4 elements copy 2139 __ andcc(from, 7, G0); 2140 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 2141 __ delayed()->ld(from, 0, O3); 2142 2143 // 2144 // Load 2 aligned 8-bytes chunks and use one from previous iteration 2145 // to form 2 aligned 8-bytes chunks to store. 2146 // 2147 // copy_16_bytes_forward_with_shift() is not used here since this 2148 // code is more optimal. 2149 2150 // copy with shift 4 elements (16 bytes) at a time 2151 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4 2152 __ sllx(O3, 32, O3); 2153 2154 disjoint_copy_core(from, to, count, 2, 16, copy_16_bytes_loop); 2155 2156 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); 2157 __ delayed()->inc(count, 4); // restore 'count' 2158 2159 __ BIND(L_aligned_copy); 2160 } // !aligned 2161 2162 // copy 4 elements (16 bytes) at a time 2163 __ and3(count, 1, G4); // Save 2164 __ srl(count, 1, count); 2165 generate_disjoint_long_copy_core(aligned); 2166 __ mov(G4, count); // Restore 2167 2168 // copy 1 element at a time 2169 __ BIND(L_copy_4_bytes); 2170 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); 2171 __ BIND(L_copy_4_bytes_loop); 2172 __ ld(from, offset, O3); 2173 __ deccc(count); 2174 __ st(O3, to, offset); 2175 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop); 2176 __ delayed()->inc(offset, 4); 2177 __ BIND(L_exit); 2178 } 2179 2180 // 2181 // Generate stub for disjoint int copy. If "aligned" is true, the 2182 // "from" and "to" addresses are assumed to be heapword aligned. 2183 // 2184 // Arguments for generated stub: 2185 // from: O0 2186 // to: O1 2187 // count: O2 treated as signed 2188 // 2189 address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) { 2190 __ align(CodeEntryAlignment); 2191 StubCodeMark mark(this, "StubRoutines", name); 2192 address start = __ pc(); 2193 2194 const Register count = O2; 2195 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2196 2197 if (entry != NULL) { 2198 *entry = __ pc(); 2199 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2200 BLOCK_COMMENT("Entry:"); 2201 } 2202 2203 generate_disjoint_int_copy_core(aligned); 2204 2205 // O3, O4 are used as temp registers 2206 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); 2207 __ retl(); 2208 __ delayed()->mov(G0, O0); // return 0 2209 return start; 2210 } 2211 2212 // 2213 // Generate core code for conjoint int copy (and oop copy on 32-bit). 2214 // If "aligned" is true, the "from" and "to" addresses are assumed 2215 // to be heapword aligned. 2216 // 2217 // Arguments: 2218 // from: O0 2219 // to: O1 2220 // count: O2 treated as signed 2221 // 2222 void generate_conjoint_int_copy_core(bool aligned) { 2223 // Do reverse copy. 2224 2225 Label L_skip_alignment, L_aligned_copy; 2226 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; 2227 2228 const Register from = O0; // source array address 2229 const Register to = O1; // destination array address 2230 const Register count = O2; // elements count 2231 const Register end_from = from; // source array end address 2232 const Register end_to = to; // destination array end address 2233 // O3, O4, O5, G3 are used as temp registers 2234 2235 const Register byte_count = O3; // bytes count to copy 2236 2237 __ sllx(count, LogBytesPerInt, byte_count); 2238 __ add(to, byte_count, end_to); // offset after last copied element 2239 2240 __ cmp(count, 5); // for short arrays, just do single element copy 2241 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes); 2242 __ delayed()->add(from, byte_count, end_from); 2243 2244 // copy 1 element to align 'to' on an 8 byte boundary 2245 __ andcc(end_to, 7, G0); 2246 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 2247 __ delayed()->nop(); 2248 __ dec(count); 2249 __ dec(end_from, 4); 2250 __ dec(end_to, 4); 2251 __ ld(end_from, 0, O4); 2252 __ st(O4, end_to, 0); 2253 __ BIND(L_skip_alignment); 2254 2255 // Check if 'end_from' and 'end_to' has the same alignment. 2256 __ andcc(end_from, 7, G0); 2257 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 2258 __ delayed()->dec(count, 4); // The cmp at the start guaranty cnt >= 4 2259 2260 // copy with shift 4 elements (16 bytes) at a time 2261 // 2262 // Load 2 aligned 8-bytes chunks and use one from previous iteration 2263 // to form 2 aligned 8-bytes chunks to store. 2264 // 2265 __ ldx(end_from, -4, O3); 2266 __ align(OptoLoopAlignment); 2267 __ BIND(L_copy_16_bytes); 2268 __ ldx(end_from, -12, O4); 2269 __ deccc(count, 4); 2270 __ ldx(end_from, -20, O5); 2271 __ dec(end_to, 16); 2272 __ dec(end_from, 16); 2273 __ srlx(O3, 32, O3); 2274 __ sllx(O4, 32, G3); 2275 __ bset(G3, O3); 2276 __ stx(O3, end_to, 8); 2277 __ srlx(O4, 32, O4); 2278 __ sllx(O5, 32, G3); 2279 __ bset(O4, G3); 2280 __ stx(G3, end_to, 0); 2281 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); 2282 __ delayed()->mov(O5, O3); 2283 2284 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); 2285 __ delayed()->inc(count, 4); 2286 2287 // copy 4 elements (16 bytes) at a time 2288 __ align(OptoLoopAlignment); 2289 __ BIND(L_aligned_copy); 2290 __ dec(end_from, 16); 2291 __ ldx(end_from, 8, O3); 2292 __ ldx(end_from, 0, O4); 2293 __ dec(end_to, 16); 2294 __ deccc(count, 4); 2295 __ stx(O3, end_to, 8); 2296 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); 2297 __ delayed()->stx(O4, end_to, 0); 2298 __ inc(count, 4); 2299 2300 // copy 1 element (4 bytes) at a time 2301 __ BIND(L_copy_4_bytes); 2302 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); 2303 __ BIND(L_copy_4_bytes_loop); 2304 __ dec(end_from, 4); 2305 __ dec(end_to, 4); 2306 __ ld(end_from, 0, O4); 2307 __ deccc(count); 2308 __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop); 2309 __ delayed()->st(O4, end_to, 0); 2310 __ BIND(L_exit); 2311 } 2312 2313 // 2314 // Generate stub for conjoint int copy. If "aligned" is true, the 2315 // "from" and "to" addresses are assumed to be heapword aligned. 2316 // 2317 // Arguments for generated stub: 2318 // from: O0 2319 // to: O1 2320 // count: O2 treated as signed 2321 // 2322 address generate_conjoint_int_copy(bool aligned, address nooverlap_target, 2323 address *entry, const char *name) { 2324 __ align(CodeEntryAlignment); 2325 StubCodeMark mark(this, "StubRoutines", name); 2326 address start = __ pc(); 2327 2328 assert_clean_int(O2, O3); // Make sure 'count' is clean int. 2329 2330 if (entry != NULL) { 2331 *entry = __ pc(); 2332 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2333 BLOCK_COMMENT("Entry:"); 2334 } 2335 2336 array_overlap_test(nooverlap_target, 2); 2337 2338 generate_conjoint_int_copy_core(aligned); 2339 2340 // O3, O4 are used as temp registers 2341 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); 2342 __ retl(); 2343 __ delayed()->mov(G0, O0); // return 0 2344 return start; 2345 } 2346 2347 // 2348 // Helper methods for generate_disjoint_long_copy_core() 2349 // 2350 void copy_64_bytes_loop(Register from, Register to, Register count, int count_dec, 2351 Label& L_loop, bool use_prefetch, bool use_bis) { 2352 __ align(OptoLoopAlignment); 2353 __ BIND(L_loop); 2354 for (int off = 0; off < 64; off += 16) { 2355 if (use_prefetch && (off & 31) == 0) { 2356 if (ArraycopySrcPrefetchDistance > 0) { 2357 __ prefetch(from, ArraycopySrcPrefetchDistance+off, Assembler::severalReads); 2358 } 2359 if (ArraycopyDstPrefetchDistance > 0) { 2360 __ prefetch(to, ArraycopyDstPrefetchDistance+off, Assembler::severalWritesAndPossiblyReads); 2361 } 2362 } 2363 __ ldx(from, off+0, O4); 2364 __ ldx(from, off+8, O5); 2365 if (use_bis) { 2366 __ stxa(O4, to, off+0); 2367 __ stxa(O5, to, off+8); 2368 } else { 2369 __ stx(O4, to, off+0); 2370 __ stx(O5, to, off+8); 2371 } 2372 } 2373 __ deccc(count, 8); 2374 __ inc(from, 64); 2375 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 2376 __ delayed()->inc(to, 64); 2377 } 2378 2379 // 2380 // Generate core code for disjoint long copy (and oop copy on 64-bit). 2381 // "aligned" is ignored, because we must make the stronger 2382 // assumption that both addresses are always 64-bit aligned. 2383 // 2384 // Arguments: 2385 // from: O0 2386 // to: O1 2387 // count: O2 treated as signed 2388 // 2389 // count -= 2; 2390 // if ( count >= 0 ) { // >= 2 elements 2391 // if ( count > 6) { // >= 8 elements 2392 // count -= 6; // original count - 8 2393 // do { 2394 // copy_8_elements; 2395 // count -= 8; 2396 // } while ( count >= 0 ); 2397 // count += 6; 2398 // } 2399 // if ( count >= 0 ) { // >= 2 elements 2400 // do { 2401 // copy_2_elements; 2402 // } while ( (count=count-2) >= 0 ); 2403 // } 2404 // } 2405 // count += 2; 2406 // if ( count != 0 ) { // 1 element left 2407 // copy_1_element; 2408 // } 2409 // 2410 void generate_disjoint_long_copy_core(bool aligned) { 2411 Label L_copy_8_bytes, L_copy_16_bytes, L_exit; 2412 const Register from = O0; // source array address 2413 const Register to = O1; // destination array address 2414 const Register count = O2; // elements count 2415 const Register offset0 = O4; // element offset 2416 const Register offset8 = O5; // next element offset 2417 2418 __ deccc(count, 2); 2419 __ mov(G0, offset0); // offset from start of arrays (0) 2420 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); 2421 __ delayed()->add(offset0, 8, offset8); 2422 2423 // Copy by 64 bytes chunks 2424 2425 const Register from64 = O3; // source address 2426 const Register to64 = G3; // destination address 2427 __ subcc(count, 6, O3); 2428 __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes ); 2429 __ delayed()->mov(to, to64); 2430 // Now we can use O4(offset0), O5(offset8) as temps 2431 __ mov(O3, count); 2432 // count >= 0 (original count - 8) 2433 __ mov(from, from64); 2434 2435 disjoint_copy_core(from64, to64, count, 3, 64, copy_64_bytes_loop); 2436 2437 // Restore O4(offset0), O5(offset8) 2438 __ sub(from64, from, offset0); 2439 __ inccc(count, 6); // restore count 2440 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); 2441 __ delayed()->add(offset0, 8, offset8); 2442 2443 // Copy by 16 bytes chunks 2444 __ align(OptoLoopAlignment); 2445 __ BIND(L_copy_16_bytes); 2446 __ ldx(from, offset0, O3); 2447 __ ldx(from, offset8, G3); 2448 __ deccc(count, 2); 2449 __ stx(O3, to, offset0); 2450 __ inc(offset0, 16); 2451 __ stx(G3, to, offset8); 2452 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); 2453 __ delayed()->inc(offset8, 16); 2454 2455 // Copy last 8 bytes 2456 __ BIND(L_copy_8_bytes); 2457 __ inccc(count, 2); 2458 __ brx(Assembler::zero, true, Assembler::pn, L_exit ); 2459 __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs 2460 __ ldx(from, offset0, O3); 2461 __ stx(O3, to, offset0); 2462 __ BIND(L_exit); 2463 } 2464 2465 // 2466 // Generate stub for disjoint long copy. 2467 // "aligned" is ignored, because we must make the stronger 2468 // assumption that both addresses are always 64-bit aligned. 2469 // 2470 // Arguments for generated stub: 2471 // from: O0 2472 // to: O1 2473 // count: O2 treated as signed 2474 // 2475 address generate_disjoint_long_copy(bool aligned, address *entry, const char *name) { 2476 __ align(CodeEntryAlignment); 2477 StubCodeMark mark(this, "StubRoutines", name); 2478 address start = __ pc(); 2479 2480 assert_clean_int(O2, O3); // Make sure 'count' is clean int. 2481 2482 if (entry != NULL) { 2483 *entry = __ pc(); 2484 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2485 BLOCK_COMMENT("Entry:"); 2486 } 2487 2488 generate_disjoint_long_copy_core(aligned); 2489 2490 // O3, O4 are used as temp registers 2491 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4); 2492 __ retl(); 2493 __ delayed()->mov(G0, O0); // return 0 2494 return start; 2495 } 2496 2497 // 2498 // Generate core code for conjoint long copy (and oop copy on 64-bit). 2499 // "aligned" is ignored, because we must make the stronger 2500 // assumption that both addresses are always 64-bit aligned. 2501 // 2502 // Arguments: 2503 // from: O0 2504 // to: O1 2505 // count: O2 treated as signed 2506 // 2507 void generate_conjoint_long_copy_core(bool aligned) { 2508 // Do reverse copy. 2509 Label L_copy_8_bytes, L_copy_16_bytes, L_exit; 2510 const Register from = O0; // source array address 2511 const Register to = O1; // destination array address 2512 const Register count = O2; // elements count 2513 const Register offset8 = O4; // element offset 2514 const Register offset0 = O5; // previous element offset 2515 2516 __ subcc(count, 1, count); 2517 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_8_bytes ); 2518 __ delayed()->sllx(count, LogBytesPerLong, offset8); 2519 __ sub(offset8, 8, offset0); 2520 __ align(OptoLoopAlignment); 2521 __ BIND(L_copy_16_bytes); 2522 __ ldx(from, offset8, O2); 2523 __ ldx(from, offset0, O3); 2524 __ stx(O2, to, offset8); 2525 __ deccc(offset8, 16); // use offset8 as counter 2526 __ stx(O3, to, offset0); 2527 __ brx(Assembler::greater, false, Assembler::pt, L_copy_16_bytes); 2528 __ delayed()->dec(offset0, 16); 2529 2530 __ BIND(L_copy_8_bytes); 2531 __ brx(Assembler::negative, false, Assembler::pn, L_exit ); 2532 __ delayed()->nop(); 2533 __ ldx(from, 0, O3); 2534 __ stx(O3, to, 0); 2535 __ BIND(L_exit); 2536 } 2537 2538 // Generate stub for conjoint long copy. 2539 // "aligned" is ignored, because we must make the stronger 2540 // assumption that both addresses are always 64-bit aligned. 2541 // 2542 // Arguments for generated stub: 2543 // from: O0 2544 // to: O1 2545 // count: O2 treated as signed 2546 // 2547 address generate_conjoint_long_copy(bool aligned, address nooverlap_target, 2548 address *entry, const char *name) { 2549 __ align(CodeEntryAlignment); 2550 StubCodeMark mark(this, "StubRoutines", name); 2551 address start = __ pc(); 2552 2553 assert(aligned, "Should always be aligned"); 2554 2555 assert_clean_int(O2, O3); // Make sure 'count' is clean int. 2556 2557 if (entry != NULL) { 2558 *entry = __ pc(); 2559 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2560 BLOCK_COMMENT("Entry:"); 2561 } 2562 2563 array_overlap_test(nooverlap_target, 3); 2564 2565 generate_conjoint_long_copy_core(aligned); 2566 2567 // O3, O4 are used as temp registers 2568 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4); 2569 __ retl(); 2570 __ delayed()->mov(G0, O0); // return 0 2571 return start; 2572 } 2573 2574 // Generate stub for disjoint oop copy. If "aligned" is true, the 2575 // "from" and "to" addresses are assumed to be heapword aligned. 2576 // 2577 // Arguments for generated stub: 2578 // from: O0 2579 // to: O1 2580 // count: O2 treated as signed 2581 // 2582 address generate_disjoint_oop_copy(bool aligned, address *entry, const char *name, 2583 bool dest_uninitialized = false) { 2584 2585 const Register from = O0; // source array address 2586 const Register to = O1; // destination array address 2587 const Register count = O2; // elements count 2588 2589 __ align(CodeEntryAlignment); 2590 StubCodeMark mark(this, "StubRoutines", name); 2591 address start = __ pc(); 2592 2593 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2594 2595 if (entry != NULL) { 2596 *entry = __ pc(); 2597 // caller can pass a 64-bit byte count here 2598 BLOCK_COMMENT("Entry:"); 2599 } 2600 2601 // save arguments for barrier generation 2602 __ mov(to, G1); 2603 __ mov(count, G5); 2604 gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized); 2605 #ifdef _LP64 2606 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2607 if (UseCompressedOops) { 2608 generate_disjoint_int_copy_core(aligned); 2609 } else { 2610 generate_disjoint_long_copy_core(aligned); 2611 } 2612 #else 2613 generate_disjoint_int_copy_core(aligned); 2614 #endif 2615 // O0 is used as temp register 2616 gen_write_ref_array_post_barrier(G1, G5, O0); 2617 2618 // O3, O4 are used as temp registers 2619 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4); 2620 __ retl(); 2621 __ delayed()->mov(G0, O0); // return 0 2622 return start; 2623 } 2624 2625 // Generate stub for conjoint oop copy. If "aligned" is true, the 2626 // "from" and "to" addresses are assumed to be heapword aligned. 2627 // 2628 // Arguments for generated stub: 2629 // from: O0 2630 // to: O1 2631 // count: O2 treated as signed 2632 // 2633 address generate_conjoint_oop_copy(bool aligned, address nooverlap_target, 2634 address *entry, const char *name, 2635 bool dest_uninitialized = false) { 2636 2637 const Register from = O0; // source array address 2638 const Register to = O1; // destination array address 2639 const Register count = O2; // elements count 2640 2641 __ align(CodeEntryAlignment); 2642 StubCodeMark mark(this, "StubRoutines", name); 2643 address start = __ pc(); 2644 2645 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2646 2647 if (entry != NULL) { 2648 *entry = __ pc(); 2649 // caller can pass a 64-bit byte count here 2650 BLOCK_COMMENT("Entry:"); 2651 } 2652 2653 array_overlap_test(nooverlap_target, LogBytesPerHeapOop); 2654 2655 // save arguments for barrier generation 2656 __ mov(to, G1); 2657 __ mov(count, G5); 2658 gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized); 2659 2660 #ifdef _LP64 2661 if (UseCompressedOops) { 2662 generate_conjoint_int_copy_core(aligned); 2663 } else { 2664 generate_conjoint_long_copy_core(aligned); 2665 } 2666 #else 2667 generate_conjoint_int_copy_core(aligned); 2668 #endif 2669 2670 // O0 is used as temp register 2671 gen_write_ref_array_post_barrier(G1, G5, O0); 2672 2673 // O3, O4 are used as temp registers 2674 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4); 2675 __ retl(); 2676 __ delayed()->mov(G0, O0); // return 0 2677 return start; 2678 } 2679 2680 2681 // Helper for generating a dynamic type check. 2682 // Smashes only the given temp registers. 2683 void generate_type_check(Register sub_klass, 2684 Register super_check_offset, 2685 Register super_klass, 2686 Register temp, 2687 Label& L_success) { 2688 assert_different_registers(sub_klass, super_check_offset, super_klass, temp); 2689 2690 BLOCK_COMMENT("type_check:"); 2691 2692 Label L_miss, L_pop_to_miss; 2693 2694 assert_clean_int(super_check_offset, temp); 2695 2696 __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg, 2697 &L_success, &L_miss, NULL, 2698 super_check_offset); 2699 2700 BLOCK_COMMENT("type_check_slow_path:"); 2701 __ save_frame(0); 2702 __ check_klass_subtype_slow_path(sub_klass->after_save(), 2703 super_klass->after_save(), 2704 L0, L1, L2, L4, 2705 NULL, &L_pop_to_miss); 2706 __ ba(L_success); 2707 __ delayed()->restore(); 2708 2709 __ bind(L_pop_to_miss); 2710 __ restore(); 2711 2712 // Fall through on failure! 2713 __ BIND(L_miss); 2714 } 2715 2716 2717 // Generate stub for checked oop copy. 2718 // 2719 // Arguments for generated stub: 2720 // from: O0 2721 // to: O1 2722 // count: O2 treated as signed 2723 // ckoff: O3 (super_check_offset) 2724 // ckval: O4 (super_klass) 2725 // ret: O0 zero for success; (-1^K) where K is partial transfer count 2726 // 2727 address generate_checkcast_copy(const char *name, address *entry, bool dest_uninitialized = false) { 2728 2729 const Register O0_from = O0; // source array address 2730 const Register O1_to = O1; // destination array address 2731 const Register O2_count = O2; // elements count 2732 const Register O3_ckoff = O3; // super_check_offset 2733 const Register O4_ckval = O4; // super_klass 2734 2735 const Register O5_offset = O5; // loop var, with stride wordSize 2736 const Register G1_remain = G1; // loop var, with stride -1 2737 const Register G3_oop = G3; // actual oop copied 2738 const Register G4_klass = G4; // oop._klass 2739 const Register G5_super = G5; // oop._klass._primary_supers[ckval] 2740 2741 __ align(CodeEntryAlignment); 2742 StubCodeMark mark(this, "StubRoutines", name); 2743 address start = __ pc(); 2744 2745#ifdef ASSERT 2746 // We sometimes save a frame (see generate_type_check below). 2747 // If this will cause trouble, let's fail now instead of later. 2748 __ save_frame(0); 2749 __ restore(); 2750#endif 2751 2752 assert_clean_int(O2_count, G1); // Make sure 'count' is clean int. 2753 2754#ifdef ASSERT 2755 // caller guarantees that the arrays really are different 2756 // otherwise, we would have to make conjoint checks 2757 { Label L; 2758 __ mov(O3, G1); // spill: overlap test smashes O3 2759 __ mov(O4, G4); // spill: overlap test smashes O4 2760 array_overlap_test(L, LogBytesPerHeapOop); 2761 __ stop("checkcast_copy within a single array"); 2762 __ bind(L); 2763 __ mov(G1, O3); 2764 __ mov(G4, O4); 2765 } 2766#endif //ASSERT 2767 2768 if (entry != NULL) { 2769 *entry = __ pc(); 2770 // caller can pass a 64-bit byte count here (from generic stub) 2771 BLOCK_COMMENT("Entry:"); 2772 } 2773 gen_write_ref_array_pre_barrier(O1_to, O2_count, dest_uninitialized); 2774 2775 Label load_element, store_element, do_card_marks, fail, done; 2776 __ addcc(O2_count, 0, G1_remain); // initialize loop index, and test it 2777 __ brx(Assembler::notZero, false, Assembler::pt, load_element); 2778 __ delayed()->mov(G0, O5_offset); // offset from start of arrays 2779 2780 // Empty array: Nothing to do. 2781 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); 2782 __ retl(); 2783 __ delayed()->set(0, O0); // return 0 on (trivial) success 2784 2785 // ======== begin loop ======== 2786 // (Loop is rotated; its entry is load_element.) 2787 // Loop variables: 2788 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays 2789 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining* 2790 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super 2791 __ align(OptoLoopAlignment); 2792 2793 __ BIND(store_element); 2794 __ deccc(G1_remain); // decrement the count 2795 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop 2796 __ inc(O5_offset, heapOopSize); // step to next offset 2797 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks); 2798 __ delayed()->set(0, O0); // return -1 on success 2799 2800 // ======== loop entry is here ======== 2801 __ BIND(load_element); 2802 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop 2803 __ br_null_short(G3_oop, Assembler::pt, store_element); 2804 2805 __ load_klass(G3_oop, G4_klass); // query the object klass 2806 2807 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, 2808 // branch to this on success: 2809 store_element); 2810 // ======== end loop ======== 2811 2812 // It was a real error; we must depend on the caller to finish the job. 2813 // Register G1 has number of *remaining* oops, O2 number of *total* oops. 2814 // Emit GC store barriers for the oops we have copied (O2 minus G1), 2815 // and report their number to the caller. 2816 __ BIND(fail); 2817 __ subcc(O2_count, G1_remain, O2_count); 2818 __ brx(Assembler::zero, false, Assembler::pt, done); 2819 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller 2820 2821 __ BIND(do_card_marks); 2822 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2] 2823 2824 __ BIND(done); 2825 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); 2826 __ retl(); 2827 __ delayed()->nop(); // return value in 00 2828 2829 return start; 2830 } 2831 2832 2833 // Generate 'unsafe' array copy stub 2834 // Though just as safe as the other stubs, it takes an unscaled 2835 // size_t argument instead of an element count. 2836 // 2837 // Arguments for generated stub: 2838 // from: O0 2839 // to: O1 2840 // count: O2 byte count, treated as ssize_t, can be zero 2841 // 2842 // Examines the alignment of the operands and dispatches 2843 // to a long, int, short, or byte copy loop. 2844 // 2845 address generate_unsafe_copy(const char* name, 2846 address byte_copy_entry, 2847 address short_copy_entry, 2848 address int_copy_entry, 2849 address long_copy_entry) { 2850 2851 const Register O0_from = O0; // source array address 2852 const Register O1_to = O1; // destination array address 2853 const Register O2_count = O2; // elements count 2854 2855 const Register G1_bits = G1; // test copy of low bits 2856 2857 __ align(CodeEntryAlignment); 2858 StubCodeMark mark(this, "StubRoutines", name); 2859 address start = __ pc(); 2860 2861 // bump this on entry, not on exit: 2862 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr, G1, G3); 2863 2864 __ or3(O0_from, O1_to, G1_bits); 2865 __ or3(O2_count, G1_bits, G1_bits); 2866 2867 __ btst(BytesPerLong-1, G1_bits); 2868 __ br(Assembler::zero, true, Assembler::pt, 2869 long_copy_entry, relocInfo::runtime_call_type); 2870 // scale the count on the way out: 2871 __ delayed()->srax(O2_count, LogBytesPerLong, O2_count); 2872 2873 __ btst(BytesPerInt-1, G1_bits); 2874 __ br(Assembler::zero, true, Assembler::pt, 2875 int_copy_entry, relocInfo::runtime_call_type); 2876 // scale the count on the way out: 2877 __ delayed()->srax(O2_count, LogBytesPerInt, O2_count); 2878 2879 __ btst(BytesPerShort-1, G1_bits); 2880 __ br(Assembler::zero, true, Assembler::pt, 2881 short_copy_entry, relocInfo::runtime_call_type); 2882 // scale the count on the way out: 2883 __ delayed()->srax(O2_count, LogBytesPerShort, O2_count); 2884 2885 __ br(Assembler::always, false, Assembler::pt, 2886 byte_copy_entry, relocInfo::runtime_call_type); 2887 __ delayed()->nop(); 2888 2889 return start; 2890 } 2891 2892 2893 // Perform range checks on the proposed arraycopy. 2894 // Kills the two temps, but nothing else. 2895 // Also, clean the sign bits of src_pos and dst_pos. 2896 void arraycopy_range_checks(Register src, // source array oop (O0) 2897 Register src_pos, // source position (O1) 2898 Register dst, // destination array oo (O2) 2899 Register dst_pos, // destination position (O3) 2900 Register length, // length of copy (O4) 2901 Register temp1, Register temp2, 2902 Label& L_failed) { 2903 BLOCK_COMMENT("arraycopy_range_checks:"); 2904 2905 // if (src_pos + length > arrayOop(src)->length() ) FAIL; 2906 2907 const Register array_length = temp1; // scratch 2908 const Register end_pos = temp2; // scratch 2909 2910 // Note: This next instruction may be in the delay slot of a branch: 2911 __ add(length, src_pos, end_pos); // src_pos + length 2912 __ lduw(src, arrayOopDesc::length_offset_in_bytes(), array_length); 2913 __ cmp(end_pos, array_length); 2914 __ br(Assembler::greater, false, Assembler::pn, L_failed); 2915 2916 // if (dst_pos + length > arrayOop(dst)->length() ) FAIL; 2917 __ delayed()->add(length, dst_pos, end_pos); // dst_pos + length 2918 __ lduw(dst, arrayOopDesc::length_offset_in_bytes(), array_length); 2919 __ cmp(end_pos, array_length); 2920 __ br(Assembler::greater, false, Assembler::pn, L_failed); 2921 2922 // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'. 2923 // Move with sign extension can be used since they are positive. 2924 __ delayed()->signx(src_pos, src_pos); 2925 __ signx(dst_pos, dst_pos); 2926 2927 BLOCK_COMMENT("arraycopy_range_checks done"); 2928 } 2929 2930 2931 // 2932 // Generate generic array copy stubs 2933 // 2934 // Input: 2935 // O0 - src oop 2936 // O1 - src_pos 2937 // O2 - dst oop 2938 // O3 - dst_pos 2939 // O4 - element count 2940 // 2941 // Output: 2942 // O0 == 0 - success 2943 // O0 == -1 - need to call System.arraycopy 2944 // 2945 address generate_generic_copy(const char *name, 2946 address entry_jbyte_arraycopy, 2947 address entry_jshort_arraycopy, 2948 address entry_jint_arraycopy, 2949 address entry_oop_arraycopy, 2950 address entry_jlong_arraycopy, 2951 address entry_checkcast_arraycopy) { 2952 Label L_failed, L_objArray; 2953 2954 // Input registers 2955 const Register src = O0; // source array oop 2956 const Register src_pos = O1; // source position 2957 const Register dst = O2; // destination array oop 2958 const Register dst_pos = O3; // destination position 2959 const Register length = O4; // elements count 2960 2961 // registers used as temp 2962 const Register G3_src_klass = G3; // source array klass 2963 const Register G4_dst_klass = G4; // destination array klass 2964 const Register G5_lh = G5; // layout handler 2965 const Register O5_temp = O5; 2966 2967 __ align(CodeEntryAlignment); 2968 StubCodeMark mark(this, "StubRoutines", name); 2969 address start = __ pc(); 2970 2971 // bump this on entry, not on exit: 2972 inc_counter_np(SharedRuntime::_generic_array_copy_ctr, G1, G3); 2973 2974 // In principle, the int arguments could be dirty. 2975 //assert_clean_int(src_pos, G1); 2976 //assert_clean_int(dst_pos, G1); 2977 //assert_clean_int(length, G1); 2978 2979 //----------------------------------------------------------------------- 2980 // Assembler stubs will be used for this call to arraycopy 2981 // if the following conditions are met: 2982 // 2983 // (1) src and dst must not be null. 2984 // (2) src_pos must not be negative. 2985 // (3) dst_pos must not be negative. 2986 // (4) length must not be negative. 2987 // (5) src klass and dst klass should be the same and not NULL. 2988 // (6) src and dst should be arrays. 2989 // (7) src_pos + length must not exceed length of src. 2990 // (8) dst_pos + length must not exceed length of dst. 2991 BLOCK_COMMENT("arraycopy initial argument checks"); 2992 2993 // if (src == NULL) return -1; 2994 __ br_null(src, false, Assembler::pn, L_failed); 2995 2996 // if (src_pos < 0) return -1; 2997 __ delayed()->tst(src_pos); 2998 __ br(Assembler::negative, false, Assembler::pn, L_failed); 2999 __ delayed()->nop(); 3000 3001 // if (dst == NULL) return -1; 3002 __ br_null(dst, false, Assembler::pn, L_failed); 3003 3004 // if (dst_pos < 0) return -1; 3005 __ delayed()->tst(dst_pos); 3006 __ br(Assembler::negative, false, Assembler::pn, L_failed); 3007 3008 // if (length < 0) return -1; 3009 __ delayed()->tst(length); 3010 __ br(Assembler::negative, false, Assembler::pn, L_failed); 3011 3012 BLOCK_COMMENT("arraycopy argument klass checks"); 3013 // get src->klass() 3014 if (UseCompressedKlassPointers) { 3015 __ delayed()->nop(); // ??? not good 3016 __ load_klass(src, G3_src_klass); 3017 } else { 3018 __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass); 3019 } 3020 3021#ifdef ASSERT 3022 // assert(src->klass() != NULL); 3023 BLOCK_COMMENT("assert klasses not null"); 3024 { Label L_a, L_b; 3025 __ br_notnull_short(G3_src_klass, Assembler::pt, L_b); // it is broken if klass is NULL 3026 __ bind(L_a); 3027 __ stop("broken null klass"); 3028 __ bind(L_b); 3029 __ load_klass(dst, G4_dst_klass); 3030 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also 3031 __ delayed()->mov(G0, G4_dst_klass); // scribble the temp 3032 BLOCK_COMMENT("assert done"); 3033 } 3034#endif 3035 3036 // Load layout helper 3037 // 3038 // |array_tag| | header_size | element_type | |log2_element_size| 3039 // 32 30 24 16 8 2 0 3040 // 3041 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 3042 // 3043 3044 int lh_offset = in_bytes(Klass::layout_helper_offset()); 3045 3046 // Load 32-bits signed value. Use br() instruction with it to check icc. 3047 __ lduw(G3_src_klass, lh_offset, G5_lh); 3048 3049 if (UseCompressedKlassPointers) { 3050 __ load_klass(dst, G4_dst_klass); 3051 } 3052 // Handle objArrays completely differently... 3053 juint objArray_lh = Klass::array_layout_helper(T_OBJECT); 3054 __ set(objArray_lh, O5_temp); 3055 __ cmp(G5_lh, O5_temp); 3056 __ br(Assembler::equal, false, Assembler::pt, L_objArray); 3057 if (UseCompressedKlassPointers) { 3058 __ delayed()->nop(); 3059 } else { 3060 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass); 3061 } 3062 3063 // if (src->klass() != dst->klass()) return -1; 3064 __ cmp_and_brx_short(G3_src_klass, G4_dst_klass, Assembler::notEqual, Assembler::pn, L_failed); 3065 3066 // if (!src->is_Array()) return -1; 3067 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0 3068 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed); 3069 3070 // At this point, it is known to be a typeArray (array_tag 0x3). 3071#ifdef ASSERT 3072 __ delayed()->nop(); 3073 { Label L; 3074 jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift); 3075 __ set(lh_prim_tag_in_place, O5_temp); 3076 __ cmp(G5_lh, O5_temp); 3077 __ br(Assembler::greaterEqual, false, Assembler::pt, L); 3078 __ delayed()->nop(); 3079 __ stop("must be a primitive array"); 3080 __ bind(L); 3081 } 3082#else 3083 __ delayed(); // match next insn to prev branch 3084#endif 3085 3086 arraycopy_range_checks(src, src_pos, dst, dst_pos, length, 3087 O5_temp, G4_dst_klass, L_failed); 3088 3089 // TypeArrayKlass 3090 // 3091 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); 3092 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); 3093 // 3094 3095 const Register G4_offset = G4_dst_klass; // array offset 3096 const Register G3_elsize = G3_src_klass; // log2 element size 3097 3098 __ srl(G5_lh, Klass::_lh_header_size_shift, G4_offset); 3099 __ and3(G4_offset, Klass::_lh_header_size_mask, G4_offset); // array_offset 3100 __ add(src, G4_offset, src); // src array offset 3101 __ add(dst, G4_offset, dst); // dst array offset 3102 __ and3(G5_lh, Klass::_lh_log2_element_size_mask, G3_elsize); // log2 element size 3103 3104 // next registers should be set before the jump to corresponding stub 3105 const Register from = O0; // source array address 3106 const Register to = O1; // destination array address 3107 const Register count = O2; // elements count 3108 3109 // 'from', 'to', 'count' registers should be set in this order 3110 // since they are the same as 'src', 'src_pos', 'dst'. 3111 3112 BLOCK_COMMENT("scale indexes to element size"); 3113 __ sll_ptr(src_pos, G3_elsize, src_pos); 3114 __ sll_ptr(dst_pos, G3_elsize, dst_pos); 3115 __ add(src, src_pos, from); // src_addr 3116 __ add(dst, dst_pos, to); // dst_addr 3117 3118 BLOCK_COMMENT("choose copy loop based on element size"); 3119 __ cmp(G3_elsize, 0); 3120 __ br(Assembler::equal, true, Assembler::pt, entry_jbyte_arraycopy); 3121 __ delayed()->signx(length, count); // length 3122 3123 __ cmp(G3_elsize, LogBytesPerShort); 3124 __ br(Assembler::equal, true, Assembler::pt, entry_jshort_arraycopy); 3125 __ delayed()->signx(length, count); // length 3126 3127 __ cmp(G3_elsize, LogBytesPerInt); 3128 __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy); 3129 __ delayed()->signx(length, count); // length 3130#ifdef ASSERT 3131 { Label L; 3132 __ cmp_and_br_short(G3_elsize, LogBytesPerLong, Assembler::equal, Assembler::pt, L); 3133 __ stop("must be long copy, but elsize is wrong"); 3134 __ bind(L); 3135 } 3136#endif 3137 __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy); 3138 __ delayed()->signx(length, count); // length 3139 3140 // ObjArrayKlass 3141 __ BIND(L_objArray); 3142 // live at this point: G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length 3143 3144 Label L_plain_copy, L_checkcast_copy; 3145 // test array classes for subtyping 3146 __ cmp(G3_src_klass, G4_dst_klass); // usual case is exact equality 3147 __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy); 3148 __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below 3149 3150 // Identically typed arrays can be copied without element-wise checks. 3151 arraycopy_range_checks(src, src_pos, dst, dst_pos, length, 3152 O5_temp, G5_lh, L_failed); 3153 3154 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset 3155 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset 3156 __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos); 3157 __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos); 3158 __ add(src, src_pos, from); // src_addr 3159 __ add(dst, dst_pos, to); // dst_addr 3160 __ BIND(L_plain_copy); 3161 __ br(Assembler::always, false, Assembler::pt, entry_oop_arraycopy); 3162 __ delayed()->signx(length, count); // length 3163 3164 __ BIND(L_checkcast_copy); 3165 // live at this point: G3_src_klass, G4_dst_klass 3166 { 3167 // Before looking at dst.length, make sure dst is also an objArray. 3168 // lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted to delay slot 3169 __ cmp(G5_lh, O5_temp); 3170 __ br(Assembler::notEqual, false, Assembler::pn, L_failed); 3171 3172 // It is safe to examine both src.length and dst.length. 3173 __ delayed(); // match next insn to prev branch 3174 arraycopy_range_checks(src, src_pos, dst, dst_pos, length, 3175 O5_temp, G5_lh, L_failed); 3176 3177 // Marshal the base address arguments now, freeing registers. 3178 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset 3179 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset 3180 __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos); 3181 __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos); 3182 __ add(src, src_pos, from); // src_addr 3183 __ add(dst, dst_pos, to); // dst_addr 3184 __ signx(length, count); // length (reloaded) 3185 3186 Register sco_temp = O3; // this register is free now 3187 assert_different_registers(from, to, count, sco_temp, 3188 G4_dst_klass, G3_src_klass); 3189 3190 // Generate the type check. 3191 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 3192 __ lduw(G4_dst_klass, sco_offset, sco_temp); 3193 generate_type_check(G3_src_klass, sco_temp, G4_dst_klass, 3194 O5_temp, L_plain_copy); 3195 3196 // Fetch destination element klass from the ObjArrayKlass header. 3197 int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); 3198 3199 // the checkcast_copy loop needs two extra arguments: 3200 __ ld_ptr(G4_dst_klass, ek_offset, O4); // dest elem klass 3201 // lduw(O4, sco_offset, O3); // sco of elem klass 3202 3203 __ br(Assembler::always, false, Assembler::pt, entry_checkcast_arraycopy); 3204 __ delayed()->lduw(O4, sco_offset, O3); 3205 } 3206 3207 __ BIND(L_failed); 3208 __ retl(); 3209 __ delayed()->sub(G0, 1, O0); // return -1 3210 return start; 3211 } 3212 3213 // 3214 // Generate stub for heap zeroing. 3215 // "to" address is aligned to jlong (8 bytes). 3216 // 3217 // Arguments for generated stub: 3218 // to: O0 3219 // count: O1 treated as signed (count of HeapWord) 3220 // count could be 0 3221 // 3222 address generate_zero_aligned_words(const char* name) { 3223 __ align(CodeEntryAlignment); 3224 StubCodeMark mark(this, "StubRoutines", name); 3225 address start = __ pc(); 3226 3227 const Register to = O0; // source array address 3228 const Register count = O1; // HeapWords count 3229 const Register temp = O2; // scratch 3230 3231 Label Ldone; 3232 __ sllx(count, LogHeapWordSize, count); // to bytes count 3233 // Use BIS for zeroing 3234 __ bis_zeroing(to, count, temp, Ldone); 3235 __ bind(Ldone); 3236 __ retl(); 3237 __ delayed()->nop(); 3238 return start; 3239} 3240 3241 void generate_arraycopy_stubs() { 3242 address entry; 3243 address entry_jbyte_arraycopy; 3244 address entry_jshort_arraycopy; 3245 address entry_jint_arraycopy; 3246 address entry_oop_arraycopy; 3247 address entry_jlong_arraycopy; 3248 address entry_checkcast_arraycopy; 3249 3250 //*** jbyte 3251 // Always need aligned and unaligned versions 3252 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry, 3253 "jbyte_disjoint_arraycopy"); 3254 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry, 3255 &entry_jbyte_arraycopy, 3256 "jbyte_arraycopy"); 3257 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry, 3258 "arrayof_jbyte_disjoint_arraycopy"); 3259 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, entry, NULL, 3260 "arrayof_jbyte_arraycopy"); 3261 3262 //*** jshort 3263 // Always need aligned and unaligned versions 3264 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry, 3265 "jshort_disjoint_arraycopy"); 3266 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, 3267 &entry_jshort_arraycopy, 3268 "jshort_arraycopy"); 3269 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry, 3270 "arrayof_jshort_disjoint_arraycopy"); 3271 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL, 3272 "arrayof_jshort_arraycopy"); 3273 3274 //*** jint 3275 // Aligned versions 3276 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry, 3277 "arrayof_jint_disjoint_arraycopy"); 3278 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy, 3279 "arrayof_jint_arraycopy"); 3280#ifdef _LP64 3281 // In 64 bit we need both aligned and unaligned versions of jint arraycopy. 3282 // entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it). 3283 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry, 3284 "jint_disjoint_arraycopy"); 3285 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry, 3286 &entry_jint_arraycopy, 3287 "jint_arraycopy"); 3288#else 3289 // In 32 bit jints are always HeapWordSize aligned, so always use the aligned version 3290 // (in fact in 32bit we always have a pre-loop part even in the aligned version, 3291 // because it uses 64-bit loads/stores, so the aligned flag is actually ignored). 3292 StubRoutines::_jint_disjoint_arraycopy = StubRoutines::_arrayof_jint_disjoint_arraycopy; 3293 StubRoutines::_jint_arraycopy = StubRoutines::_arrayof_jint_arraycopy; 3294#endif 3295 3296 3297 //*** jlong 3298 // It is always aligned 3299 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry, 3300 "arrayof_jlong_disjoint_arraycopy"); 3301 StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy, 3302 "arrayof_jlong_arraycopy"); 3303 StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy; 3304 StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy; 3305 3306 3307 //*** oops 3308 // Aligned versions 3309 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, &entry, 3310 "arrayof_oop_disjoint_arraycopy"); 3311 StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy, 3312 "arrayof_oop_arraycopy"); 3313 // Aligned versions without pre-barriers 3314 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, &entry, 3315 "arrayof_oop_disjoint_arraycopy_uninit", 3316 /*dest_uninitialized*/true); 3317 StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(true, entry, NULL, 3318 "arrayof_oop_arraycopy_uninit", 3319 /*dest_uninitialized*/true); 3320#ifdef _LP64 3321 if (UseCompressedOops) { 3322 // With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy. 3323 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, &entry, 3324 "oop_disjoint_arraycopy"); 3325 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy, 3326 "oop_arraycopy"); 3327 // Unaligned versions without pre-barriers 3328 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(false, &entry, 3329 "oop_disjoint_arraycopy_uninit", 3330 /*dest_uninitialized*/true); 3331 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy(false, entry, NULL, 3332 "oop_arraycopy_uninit", 3333 /*dest_uninitialized*/true); 3334 } else 3335#endif 3336 { 3337 // oop arraycopy is always aligned on 32bit and 64bit without compressed oops 3338 StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy; 3339 StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy; 3340 StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit; 3341 StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit; 3342 } 3343 3344 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); 3345 StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, 3346 /*dest_uninitialized*/true); 3347 3348 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy", 3349 entry_jbyte_arraycopy, 3350 entry_jshort_arraycopy, 3351 entry_jint_arraycopy, 3352 entry_jlong_arraycopy); 3353 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy", 3354 entry_jbyte_arraycopy, 3355 entry_jshort_arraycopy, 3356 entry_jint_arraycopy, 3357 entry_oop_arraycopy, 3358 entry_jlong_arraycopy, 3359 entry_checkcast_arraycopy); 3360 3361 StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); 3362 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); 3363 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); 3364 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); 3365 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); 3366 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); 3367 3368 if (UseBlockZeroing) { 3369 StubRoutines::_zero_aligned_words = generate_zero_aligned_words("zero_aligned_words"); 3370 } 3371 } 3372 3373 void generate_initial() { 3374 // Generates all stubs and initializes the entry points 3375 3376 //------------------------------------------------------------------------------------------------------------------------ 3377 // entry points that exist in all platforms 3378 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than 3379 // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp. 3380 StubRoutines::_forward_exception_entry = generate_forward_exception(); 3381 3382 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); 3383 StubRoutines::_catch_exception_entry = generate_catch_exception(); 3384 3385 //------------------------------------------------------------------------------------------------------------------------ 3386 // entry points that are platform specific 3387 StubRoutines::Sparc::_test_stop_entry = generate_test_stop(); 3388 3389 StubRoutines::Sparc::_stop_subroutine_entry = generate_stop_subroutine(); 3390 StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows(); 3391 3392#if !defined(COMPILER2) && !defined(_LP64) 3393 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg(); 3394 StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg(); 3395 StubRoutines::_atomic_add_entry = generate_atomic_add(); 3396 StubRoutines::_atomic_xchg_ptr_entry = StubRoutines::_atomic_xchg_entry; 3397 StubRoutines::_atomic_cmpxchg_ptr_entry = StubRoutines::_atomic_cmpxchg_entry; 3398 StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long(); 3399 StubRoutines::_atomic_add_ptr_entry = StubRoutines::_atomic_add_entry; 3400#endif // COMPILER2 !=> _LP64 3401 3402 // Build this early so it's available for the interpreter. 3403 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError)); 3404 } 3405 3406 3407 void generate_all() { 3408 // Generates all stubs and initializes the entry points 3409 3410 // Generate partial_subtype_check first here since its code depends on 3411 // UseZeroBaseCompressedOops which is defined after heap initialization. 3412 StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check(); 3413 // These entry points require SharedInfo::stack0 to be set up in non-core builds 3414 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError)); 3415 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError)); 3416 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call)); 3417 3418 StubRoutines::_handler_for_unsafe_access_entry = 3419 generate_handler_for_unsafe_access(); 3420 3421 // support for verify_oop (must happen after universe_init) 3422 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop_subroutine(); 3423 3424 // arraycopy stubs used by compilers 3425 generate_arraycopy_stubs(); 3426 3427 // Don't initialize the platform math functions since sparc 3428 // doesn't have intrinsics for these operations. 3429 } 3430 3431 3432 public: 3433 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { 3434 // replace the standard masm with a special one: 3435 _masm = new MacroAssembler(code); 3436 3437 _stub_count = !all ? 0x100 : 0x200; 3438 if (all) { 3439 generate_all(); 3440 } else { 3441 generate_initial(); 3442 } 3443 3444 // make sure this stub is available for all local calls 3445 if (_atomic_add_stub.is_unbound()) { 3446 // generate a second time, if necessary 3447 (void) generate_atomic_add(); 3448 } 3449 } 3450 3451 3452 private: 3453 int _stub_count; 3454 void stub_prolog(StubCodeDesc* cdesc) { 3455 # ifdef ASSERT 3456 // put extra information in the stub code, to make it more readable 3457#ifdef _LP64 3458// Write the high part of the address 3459// [RGV] Check if there is a dependency on the size of this prolog 3460 __ emit_data((intptr_t)cdesc >> 32, relocInfo::none); 3461#endif 3462 __ emit_data((intptr_t)cdesc, relocInfo::none); 3463 __ emit_data(++_stub_count, relocInfo::none); 3464 # endif 3465 align(true); 3466 } 3467 3468 void align(bool at_header = false) { 3469 // %%%%% move this constant somewhere else 3470 // UltraSPARC cache line size is 8 instructions: 3471 const unsigned int icache_line_size = 32; 3472 const unsigned int icache_half_line_size = 16; 3473 3474 if (at_header) { 3475 while ((intptr_t)(__ pc()) % icache_line_size != 0) { 3476 __ emit_data(0, relocInfo::none); 3477 } 3478 } else { 3479 while ((intptr_t)(__ pc()) % icache_half_line_size != 0) { 3480 __ nop(); 3481 } 3482 } 3483 } 3484 3485}; // end class declaration 3486 3487void StubGenerator_generate(CodeBuffer* code, bool all) { 3488 StubGenerator g(code, all); 3489} 3490