arm/vm/stubGenerator_arm.cpp

16  * 2 along with this work; if not, write to the Free Software Foundation,
65 // from platform to platform depending on the configuration.
68 // unfortunately have to be done in the shared file and cannot appear
72 // Currently, this 'platform' is hardcoded to a value that is a good
73 // enough trade-off.  However, one can easily modify this file to test
75 // significant, we could decide to either add command line options or
76 // add code to automatically choose a configuration.
83 // Hard coded choices (XXX: could be changed to a command line option)
89 #define ArmCopyCacheLineSize 32 // not worth optimizing to 64 according to measured gains
125   // results, minimizing overhead with respect to best results on the
139     // - up to 40% from optimal configuration for backward shifted and backward align for tegra2
152     //   copy. However, this can lead to a 300% loss on nv-tegra and has
164     // - smaller prefetch distance is sufficient to get good result and might be more stable
298     // Would be better with respect to native tools if the following
299     // setting of FP was changed to conform to the native ABI, with FP
300     // pointing to the saved FP slot (and the corresponding modifications
321     __ add(SP, FP, wordSize); // Skip link to JavaCallWrapper
436     // Save return address on stack to free one extra register
443     // Jump to the appropriate place in the unrolled loop below
465     // Find the combination of argument signs and jump to corresponding handler
474     // Safer to save R9 here since callers may have been written
535  // no load/store can be reordered with respect to the initial load-linked, so we have:
546   // code below allows for it to be otherwise. The else clause indicates an ARMv5 system
688   // reordered before by a wrapper to (jlong compare_value, jlong exchange_value, volatile jlong *dest)
753       __ clrex(); // FIXME: safe to remove?
779       __ mov(Rtemp, dest);  // get dest to Rtemp
814     // R0 used as tmp_reg (in addition to return reg)
861       // Load next super to check
867       __ subs(R0, R0, search_key); // set R0 to 0 on success (and flags to eq)
869       // A miss means we are NOT a subtype and need to keep looping
878       // R0 is already 0 and flags are already set to eq
910     // R2: oop to verify
1004     const Register to         = R1;
1006     const Register to_from    = tmp1; // to - from
1010     assert_different_registers(from, to, count, tmp1, tmp2);
1012     // no_overlap version works if 'to' lower (unsigned) than 'from'
1013     // and or 'to' more than (count*size) from 'from'
1016     __ subs(to_from, to, from);
1058   // Stores regs[0], regs[1], ..., regs[count-1] to [to, to + count*wordSize)
1059   // and increases 'to' by count*wordSize.
1060   void bulk_store_forward(Register to, const Register regs[], int count) {
1065     __ stp(regs[0], regs[1], Address(to, bytes, post_indexed));
1069       __ stp(regs[i], regs[i+1], Address(to, -bytes + offset));
1078   // Note that the word with lowest address goes to regs[0].
1096   // Stores regs[0], regs[1], ..., regs[count-1] into [to - count*wordSize, to)
1097   // and decreases 'to' by count*wordSize.
1099   void bulk_store_backward(Register to, const Register regs[], int count) {
1107       __ stp(regs[i], regs[i+1], Address(to, -offset));
1111     __ stp(regs[0], regs[1], Address(to, -bytes, pre_indexed));
1119   void prefetch(Register from, Register to, int offset, int to_delta = 0) {
1122   // Next line commented out to avoid significant loss of performance in memory copy - JDK-8078120
1123   // __ prfm(pstl1keep, Address(to, offset + to_delta));
1131   //      to:        dst address, wordSize aligned
1139   // - 'to' aligned on wordSize
1142   // Increases 'from' and 'to' by count*bytes_per_count.
1147   int generate_forward_aligned_copy_loop(Register from, Register to, Register count, int bytes_per_count) {
1148     assert (from == R0 && to == R1 && count == R2, "adjust the implementation below");
1175     // predecrease to exit when there is less than count_per_loop
1181       prefetch(from, to, 0);
1186         // by the prefetch distance to optimize the inner loop and the
1196         prefetch(from, to, offset);
1207       // 32-bit ARM note: we have tried implementing loop unrolling to skip one
1215         prefetch(from, to, bytes_per_loop + pld_offset);
1237         prefetch(from, to, pld_offset, bytes_per_loop);
1241       bulk_store_forward(to, data_regs, 8);
1244         __ stmia(to, RegisterSet(R3, R6), writeback);
1245         __ stmia(to, RegisterSet(R7, R10), writeback);
1247         __ stmia(to, RegisterSet(R3, R10), writeback);
1254         // the inner loop may end earlier, allowing to skip PLD for the last iterations
1260     // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes
1274       bulk_store_forward(to, data_regs, 4);
1284       bulk_store_forward(to, data_regs, 2);
1294       __ str(R3, Address(to,   8, post_indexed));
1304       __ str_w(R3, Address(to,   4, post_indexed));
1314       __ strh(R3, Address(to,   2, post_indexed));
1324       __ strb(R3, Address(to,   1, post_indexed));
1331     __ stmia(to, RegisterSet(R3, R6), writeback, ne);
1335     __ stmia(to, RegisterSet(R3, R4), writeback, ne);
1340       __ str(R3, Address(to, 4, post_indexed), ne);
1346       __ strh(R3, Address(to, 2, post_indexed), ne);
1352       __ strb(R3, Address(to, 1, post_indexed), ne);
1430       // 32-bit ARM note: we have tried implementing loop unrolling to skip one
1478     // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes
1586   //      to:        start dst address, (now) wordSize aligned
1589   //      lsr_shift: shift applied to 'old' value to skipped already written bytes
1590   //      lsl_shift: shift applied to 'new' value to set the high bytes of the next write
1596   // - 'to' aligned on wordSize
1601   // Increases 'to' by count*bytes_per_count.
1607   // - (R12 >> lsr_shift) is the part not yet written (just before 'to')
1608   // --> (*to) = (R12 >> lsr_shift) | (*from) << lsl_shift); ...
1614   int generate_forward_shifted_copy_loop(Register from, Register to, Register count, int bytes_per_count, int lsr_shift, int lsl_shift) {
1615     assert (from == R0 && to == R1 && count == R2, "adjust the implementation below");
1635       prefetch(from, to, 0);
1641         // warning: count is predecreased by the prefetch distance to optimize the inner loop
1648         prefetch(from, to, offset);
1660       prefetch(from, to, bytes_per_loop + pld_offset);
1675       // if write is not split, use less registers in first set to reduce locking
1691       prefetch(from, to, pld_offset, bytes_per_loop);
1704       // write the first half as soon as possible to reduce stm locking
1705       __ stmia(to, RegisterSet(R3, R6), writeback, prefetch_before ? gt : ge);
1718     bulk_store_forward(to, data_regs, 8);
1721       __ stmia(to, RegisterSet(R7, R10), writeback, prefetch_before ? gt : ge);
1723       __ stmia(to, RegisterSet(R3, R10), writeback, prefetch_before ? gt : ge);
1726     __ b(L_shifted_loop, gt); // no need to loop if 0 (when count need not be precise modulo bytes_per_loop)
1729       // the first loop may end earlier, allowing to skip pld at the end
1732       __ stmia(to, RegisterSet(R3, R10), writeback); // stmia was skipped
1757       bulk_store_forward(to, data_regs, 4);
1769       bulk_store_forward(to, data_regs, 2);
1779       __ str(R3, Address(to, 8, post_indexed));
1786     // It remains less than wordSize to write.
1791       __ cmp_32(count, have_bytes/bytes_per_count); // do we have enough bytes to store?
1801       __ str_w(R3, Address(to, 4, post_indexed));
1811       __ strh(R3, Address(to, 2, post_indexed));
1821       __ strb(R3, Address(to, 1, post_indexed));
1837       __ stmia(to, RegisterSet(R3, R6), writeback, ne);
1845       __ stmia(to, RegisterSet(R3, R4), writeback, ne);
1851       __ str(R3, Address(to, 4, post_indexed), ne);
1855       __ strh(R3, Address(to, 2, post_indexed), ne); // one last short
1869       __ stmia(to, RegisterSet(R3, R6), writeback, ne);
1877       __ stmia(to, RegisterSet(R3, R4), writeback, ne);
1883       __ str(R3, Address(to, 4, post_indexed), ne);
1889       // Note: R3 might contain enough bytes ready to write (3 needed at most),
1900       __ strh(R3, Address(to, 2, post_indexed), ge); // two last bytes
1904       __ strb(R3, Address(to, 1, post_indexed), ne); // one last byte
1922   //      lsl_shift: shift applied to 'old' value to skipped already written bytes
1923   //      lsr_shift: shift applied to 'new' value to set the low bytes of the next write
1941   // --> (*--to) = (R3 << lsl_shift) | (*--from) >> lsr_shift); ...
1977         // warning: count is predecreased by the prefetch distance to optimize the inner loop
2037       // store early to reduce locking issues
2056     __ b(L_shifted_loop, gt); // no need to loop if 0 (when count need not be precise modulo bytes_per_loop)
2059       // the first loop may end earlier, allowing to skip pld at the end
2120     // It remains less than wordSize to write.
2125       __ cmp_32(count, have_bytes/bytes_per_count); // do we have enough bytes to store?
2140         __ logical_shift_left(R12, R12, 4*BitsPerByte); // Promote remaining bytes to MSB
2151         __ logical_shift_left(R12, R12, 2*BitsPerByte); // Promote remaining bytes to MSB
2233       // Note: R12 contains enough bytes ready to write (3 needed at most)
2237       // promote remaining to MSB
2272   // Stores one 'size_in_bytes'-sized value to 'to' in given direction (see load_one)
2273   void store_one(Register rd, Register to, int size_in_bytes, bool forward) {
2274     assert_different_registers(to, rd);
2275     Address addr = get_addr_with_indexing(to, size_in_bytes, forward);
2300   void store_one(Register rd, Register to, int size_in_bytes, bool forward, AsmCondition cond = al, Register rd2 = noreg) {
2301     assert_different_registers(to, rd, rd2);
2303       Address addr = get_addr_with_indexing(to, size_in_bytes, forward);
2310         __ stmia(to, RegisterSet(rd) | rd2, writeback, cond);
2312         __ stmdb(to, RegisterSet(rd) | rd2, writeback, cond);
2318   // Copies data from 'from' to 'to' in specified direction to align 'from' by 64 bits.
2322   //     from:              beginning (if forward) or upper bound (if !forward) of the region to be read
2323   //     to:                beginning (if forward) or upper bound (if !forward) of the region to be written
2329   //   'from' and 'to' must be aligned by 'bytes_per_count'
2331   //   shifts 'from' and 'to' by the number of copied bytes in corresponding direction
2335   int align_src(Register from, Register to, Register count, Register tmp, int bytes_per_count, bool forward) {
2336     assert_different_registers(from, to, count, tmp);
2345       store_one(tmp, to, 1, forward);
2354       store_one(tmp, to, 2, forward);
2363       store_one(tmp, to, 4, forward);
2374       store_one(tmp, to, bytes_per_count, forward, ne);
2386   //     from:              beginning (if forward) or upper bound (if !forward) of the region to be read
2387   //     to:                beginning (if forward) or upper bound (if !forward) of the region to be written
2388   //     count:             32-bit int, number of elements to be copied
2394   //     shifts 'from' and 'to'
2395   void copy_small_array(Register from, Register to, Register count, Register tmp, Register tmp2, int bytes_per_count, bool forward, Label & entry) {
2396     assert_different_registers(from, to, count, tmp);
2407     store_one(tmp, to, bytes_per_count, forward);
2414     store_one(tmp, to, bytes_per_count, forward, al, tmp2);
2422   // Aligns 'to' by reading one word from 'from' and writting its part to 'to'.
2425   //     to:                beginning (if forward) or upper bound (if !forward) of the region to be written
2426   //     count:             32-bit int, number of elements allowed to be copied
2427   //     to_remainder:      remainder of dividing 'to' by wordSize
2431   //                        its' LSBs (if forward) or MSBs (if !forward) are to be written to align 'to'.
2435   //     'to' must be aligned by bytes_per_count but must not be aligned by wordSize
2436   //     shifts 'to' by the number of written bytes (so that it becomes the bound of memory to be written)
2438   //     Rval's MSBs or LSBs remain to be written further by generate_{forward,backward}_shifted_copy_loop
2439   int align_dst(Register to, Register count, Register Rval, Register tmp,
2441     assert_different_registers(to, count, tmp, Rval);
2444     assert (to_remainder % bytes_per_count == 0, "to must be aligned by bytes_per_count");
2456             store_one(Rval, to, s, forward);
2459             store_one(tmp, to, s, forward);
2463           store_one(tmp, to, s, forward);
2480   //     from:              beginning (if forward) or upper bound (if !forward) of the region to be read
2481   //     to:                beginning (if forward) or upper bound (if !forward) of the region to be written
2482   //     count:             32-bit int, number of elements to be copied
2483   //     to_remainder:      remainder of dividing 'to' by wordSize
2492   //     'to' must be aligned by bytes_per_count but must not be aligned by wordSize
2493   //     shifts 'to' by the number of copied bytes
2496   int align_dst_and_generate_shifted_copy_loop(Register from, Register to, Register count, Register Rval,
2502     assert_different_registers(from, to, count, Rval, tmp);
2504     int required_to_align = align_dst(to, count, Rval, tmp, to_remainder, bytes_per_count, forward);
2511       min_copy = generate_forward_shifted_copy_loop(from, to, count, bytes_per_count, lsr_shift, lsl_shift);
2513       min_copy = generate_backward_shifted_copy_loop(from, to, count, bytes_per_count, lsr_shift, lsl_shift);
2522   //     from:              beginning (if forward) or upper bound (if !forward) of the region to be read
2523   //     to:                beginning (if forward) or upper bound (if !forward) of the region to be written
2524   //     count:             32-bit int, number of elements to be copied
2531   //     'to' must be aligned by bytes_per_count but must not be aligned by wordSize
2532   //     shifts 'to' by the number of copied bytes
2535   // On AArch64 also scratches R4-R10, on 32-bit ARM saves them to use.
2536   int align_dst_and_generate_shifted_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool forward) {
2543     // then the remainder of 'to' divided by wordSize is one of elements of {seq}.
2554         min_copy = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 4, bytes_per_count, forward);
2560         __ tbz(to, 1, L4);
2561         __ tbz(to, 2, L2);
2564         int min_copy6 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 6, bytes_per_count, forward);
2568         int min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
2572         int min_copy4 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 4, bytes_per_count, forward);
2583         __ tbz(to, 0, L246);
2584         __ tbz(to, 1, L15);
2585         __ tbz(to, 2, L3);
2588         int min_copy7 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 7, bytes_per_count, forward);
2592         __ tbnz(to, 1, L26);
2595         int min_copy4 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 4, bytes_per_count, forward);
2599         __ tbz(to, 2, L1);
2602         int min_copy5 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 5, bytes_per_count, forward);
2606         int min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
2610         __ tbz(to, 2, L2);
2613         int min_copy6 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 6, bytes_per_count, forward);
2617         int min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
2621         int min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
2644         min_copy = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
2654             __ tbz(to, 0, L2);
2655             __ tbz(to, 1, L1);
2658             min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
2662             min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
2666             min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
2668             __ tbz(to, 0, L2);
2669             __ tbnz(to, 1, L3);
2672             min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
2676             min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
2680             min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
2720   //  "from" and "to" addresses are assumed to be heapword aligned.
2722   //  If "disjoint" is true, arrays are assumed to be disjoint, otherwise they may overlap and
2723   //  "nooverlap_target" must be specified as the address to jump if they don't.
2727   //      to:    R1
2736     const Register to    = R1;   // destination array address
2760       // Set 'from' and 'to' to upper bounds
2762       __ add_ptr_scaled_int32(to,   to,   count, log_bytes_per_count);
2788     int count_required_to_align = from_is_aligned ? 0 : align_src(from, to, count, tmp1, bytes_per_count, forward);
2796       // 'to' is aligned by bytes_per_count, so it is aligned by wordSize
2800         // Originally 'from' and 'to' were heapword aligned;
2801         // (from - to) has not been changed, so since now 'from' is 8-byte aligned, then it is also heapword aligned,
2802         //  so 'to' is also heapword aligned and thus aligned by wordSize.
2811       __ tst(to, wordSize - 1);
2812       __ b(L_unaligned_dst, ne); // 'to' is not aligned
2815     // 'from' and 'to' are properly aligned
2819       min_copy = generate_forward_aligned_copy_loop (from, to, count, bytes_per_count);
2821       min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count);
2832       copy_small_array(from, to, count, tmp1, tmp2, bytes_per_count, forward, L_small_array /* entry */);
2843       int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
2922   //  Note: LR can be scratched but might be equal to addr, count or tmp
2943         // Safer to save R9 here since callers may have been written
2946         // is scratched. Note that the optimization might not be to
2994   // Generates pattern of code to be placed after raw data copying in generate_oop_copy
2998   //     to:       destination pointer after copying.
2999   //               if 'forward' then 'to' == upper bound, else 'to' == beginning of the modified region
3002   // Blows all volatile (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) and 'to', 'count', 'tmp' registers.
3003   void oop_arraycopy_stub_epilogue_helper(Register to, Register count, Register tmp, bool status, bool forward) {
3004     assert_different_registers(to, count, tmp);
3007       // 'to' is upper bound of the modified region
3009       __ sub_ptr_scaled_int32(to, to, count, LogBytesPerHeapOop);
3012     // 'to' is the beginning of the region
3014     gen_write_ref_array_post_barrier(to, count, tmp);
3030   //  "from" and "to" addresses are assumed to be heapword aligned.
3032   //  If "disjoint" is true, arrays are assumed to be disjoint, otherwise they may overlap and
3033   //  "nooverlap_target" must be specified as the address to jump if they don't.
3037   //      to:    R1
3046     Register to    = R1;
3075     // LR is used later to save barrier args
3083     gen_write_ref_array_pre_barrier(to, count, callee_saved_regs);
3090       __ add_ptr_scaled_int32(to,   to,   count, log_bytes_per_count);
3107     int count_required_to_align = from_is_aligned ? 0 : align_src(from, to, count, tmp1, bytes_per_count, forward);
3115       // 'to' is aligned by bytes_per_count, so it is aligned by wordSize
3119         // Originally 'from' and 'to' were heapword aligned;
3120         // (from - to) has not been changed, so since now 'from' is 8-byte aligned, then it is also heapword aligned,
3121         //  so 'to' is also heapword aligned and thus aligned by wordSize.
3130       __ tst(to, wordSize - 1);
3131       __ b(L_unaligned_dst, ne); // 'to' is not aligned
3136       min_copy = generate_forward_aligned_copy_loop(from, to, count, bytes_per_count);
3138       min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count);
3142     oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);
3145       copy_small_array(from, to, count, tmp1, noreg, bytes_per_count, forward, L_small_array);
3147       oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);
3158       int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
3161       oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);
3173   //      to:    R1
3177   // to a long, int, short, or byte copy loop.
3285     // Load next super to check
3293     // A miss means we are NOT a subtype and need to keep looping
3301     // Jump to success
3312   //      to:    R1
3324     const Register to    = R1;  // destination array address
3348     gen_write_ref_array_pre_barrier(to, count, callee_saved_regs);
3372     // nothing to copy
3389       __ store_heap_oop(R5, Address(to, BytesPerHeapOop, post_indexed));  // store the oop, changes flags
3393       __ str(R5, Address(to, BytesPerHeapOop, post_indexed));             // store the oop
3405                         // branch to this on success:
3409     // It was a real error; we must depend on the caller to finish the job.
3412     // and report their number to the caller (0 or (-1^n))
3424     __ sub(to, to, AsmOperand(copied, lsl, LogBytesPerHeapOop)); // initial to value
3427     gen_write_ref_array_post_barrier(to, R12, R3);
3493   //    R0 <   0  -  need to call System.arraycopy
3536     // Assembler stubs will be used for this call to arraycopy
3617       // next registers should be set before the jump to corresponding stub
3619       const Register to       = R1;  // destination array address
3622       // 'from', 'to', 'count' registers should be set in this order
3639       __ add_ptr_scaled_int32(to,   dst, dst_pos, LogBytesPerShort);
3645       __ add_ptr_scaled_int32(to,   dst, dst_pos, LogBytesPerInt);
3651       __ add_ptr_scaled_int32(to,   dst, dst_pos, 0);
3657       __ add_ptr_scaled_int32(to,   dst, dst_pos, LogBytesPerLong);
3663       BLOCK_COMMENT("scale indexes to element size");
3665       __ add(to, dst, AsmOperand(dst_pos, lsl, R12_elsize));         // dst_addr
3703       // next registers should be set before the jump to corresponding stub
3705       const Register to       = R1;  // destination array address
3711       __ add_ptr_scaled_int32(to, dst, dst_pos, LogBytesPerHeapOop);           // dst_addr
3730       // It is safe to examine both src.length and dst.length.
3735       // next registers should be set before the jump to corresponding stub
3737       const Register to       = R1;  // destination array address
3744       __ add_ptr_scaled_int32(to, dst, dst_pos, LogBytesPerHeapOop);           // dst_addr
3749       assert_different_registers(from, to, count, sco_temp,
3837     // With this flag, the C2 stubs are tested by generating calls to
3841     // and the result is tested to see whether the arraycopy stub should
3845     // arraycopy methods callable from C2 generic_arraycopy to set the
3846     // status to 0 for those who always succeed (calling the slow path stub might
3847     // lead to errors since the copy has already been performed).
3914     const Register to          = c_rarg1;  // destination array address
3994     __ vst1(V0, Address(to), MacroAssembler::VELEM_SIZE_8, 128);
4019     const Register to          = c_rarg1;  // destination array address
4100     __ vst1(V0, Address(to), MacroAssembler::VELEM_SIZE_8, 128);
4132     const Register to          = c_rarg1;  // destination array address
4215     __ vst1(V0, Address(to, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
4250     const Register to          = c_rarg1;  // destination array address
4336     __ vst1(V0, Address(to, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
4384     // Any extra arguments are already supposed to be R1 and R2
4415     // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
4448     // These entry points require SharedInfo::stack0 to be set up in non-core builds
4449     // and need to be relocatable, so they each fabricate a RuntimeStub internally.