Lines Matching defs:to

16  * 2 along with this work; if not, write to the Free Software Foundation,
65 // from platform to platform depending on the configuration.
68 // unfortunately have to be done in the shared file and cannot appear
72 // Currently, this 'platform' is hardcoded to a value that is a good
73 // enough trade-off. However, one can easily modify this file to test
75 // significant, we could decide to either add command line options or
76 // add code to automatically choose a configuration.
83 // Hard coded choices (XXX: could be changed to a command line option)
89 #define ArmCopyCacheLineSize 32 // not worth optimizing to 64 according to measured gains
125 // results, minimizing overhead with respect to best results on the
139 // - up to 40% from optimal configuration for backward shifted and backward align for tegra2
152 // copy. However, this can lead to a 300% loss on nv-tegra and has
164 // - smaller prefetch distance is sufficient to get good result and might be more stable
298 // Would be better with respect to native tools if the following
299 // setting of FP was changed to conform to the native ABI, with FP
300 // pointing to the saved FP slot (and the corresponding modifications
321 __ add(SP, FP, wordSize); // Skip link to JavaCallWrapper
436 // Save return address on stack to free one extra register
443 // Jump to the appropriate place in the unrolled loop below
465 // Find the combination of argument signs and jump to corresponding handler
474 // Safer to save R9 here since callers may have been written
535 // no load/store can be reordered with respect to the initial load-linked, so we have:
546 // code below allows for it to be otherwise. The else clause indicates an ARMv5 system
688 // reordered before by a wrapper to (jlong compare_value, jlong exchange_value, volatile jlong *dest)
753 __ clrex(); // FIXME: safe to remove?
779 __ mov(Rtemp, dest); // get dest to Rtemp
814 // R0 used as tmp_reg (in addition to return reg)
861 // Load next super to check
867 __ subs(R0, R0, search_key); // set R0 to 0 on success (and flags to eq)
869 // A miss means we are NOT a subtype and need to keep looping
878 // R0 is already 0 and flags are already set to eq
910 // R2: oop to verify
1004 const Register to = R1;
1006 const Register to_from = tmp1; // to - from
1010 assert_different_registers(from, to, count, tmp1, tmp2);
1012 // no_overlap version works if 'to' lower (unsigned) than 'from'
1013 // and or 'to' more than (count*size) from 'from'
1016 __ subs(to_from, to, from);
1058 // Stores regs[0], regs[1], ..., regs[count-1] to [to, to + count*wordSize)
1059 // and increases 'to' by count*wordSize.
1060 void bulk_store_forward(Register to, const Register regs[], int count) {
1065 __ stp(regs[0], regs[1], Address(to, bytes, post_indexed));
1069 __ stp(regs[i], regs[i+1], Address(to, -bytes + offset));
1078 // Note that the word with lowest address goes to regs[0].
1096 // Stores regs[0], regs[1], ..., regs[count-1] into [to - count*wordSize, to)
1097 // and decreases 'to' by count*wordSize.
1099 void bulk_store_backward(Register to, const Register regs[], int count) {
1107 __ stp(regs[i], regs[i+1], Address(to, -offset));
1111 __ stp(regs[0], regs[1], Address(to, -bytes, pre_indexed));
1119 void prefetch(Register from, Register to, int offset, int to_delta = 0) {
1122 // Next line commented out to avoid significant loss of performance in memory copy - JDK-8078120
1123 // __ prfm(pstl1keep, Address(to, offset + to_delta));
1131 // to: dst address, wordSize aligned
1139 // - 'to' aligned on wordSize
1142 // Increases 'from' and 'to' by count*bytes_per_count.
1147 int generate_forward_aligned_copy_loop(Register from, Register to, Register count, int bytes_per_count) {
1148 assert (from == R0 && to == R1 && count == R2, "adjust the implementation below");
1175 // predecrease to exit when there is less than count_per_loop
1181 prefetch(from, to, 0);
1186 // by the prefetch distance to optimize the inner loop and the
1196 prefetch(from, to, offset);
1207 // 32-bit ARM note: we have tried implementing loop unrolling to skip one
1215 prefetch(from, to, bytes_per_loop + pld_offset);
1237 prefetch(from, to, pld_offset, bytes_per_loop);
1241 bulk_store_forward(to, data_regs, 8);
1244 __ stmia(to, RegisterSet(R3, R6), writeback);
1245 __ stmia(to, RegisterSet(R7, R10), writeback);
1247 __ stmia(to, RegisterSet(R3, R10), writeback);
1254 // the inner loop may end earlier, allowing to skip PLD for the last iterations
1260 // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes
1274 bulk_store_forward(to, data_regs, 4);
1284 bulk_store_forward(to, data_regs, 2);
1294 __ str(R3, Address(to, 8, post_indexed));
1304 __ str_w(R3, Address(to, 4, post_indexed));
1314 __ strh(R3, Address(to, 2, post_indexed));
1324 __ strb(R3, Address(to, 1, post_indexed));
1331 __ stmia(to, RegisterSet(R3, R6), writeback, ne);
1335 __ stmia(to, RegisterSet(R3, R4), writeback, ne);
1340 __ str(R3, Address(to, 4, post_indexed), ne);
1346 __ strh(R3, Address(to, 2, post_indexed), ne);
1352 __ strb(R3, Address(to, 1, post_indexed), ne);
1430 // 32-bit ARM note: we have tried implementing loop unrolling to skip one
1478 // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes
1586 // to: start dst address, (now) wordSize aligned
1589 // lsr_shift: shift applied to 'old' value to skipped already written bytes
1590 // lsl_shift: shift applied to 'new' value to set the high bytes of the next write
1596 // - 'to' aligned on wordSize
1601 // Increases 'to' by count*bytes_per_count.
1607 // - (R12 >> lsr_shift) is the part not yet written (just before 'to')
1608 // --> (*to) = (R12 >> lsr_shift) | (*from) << lsl_shift); ...
1614 int generate_forward_shifted_copy_loop(Register from, Register to, Register count, int bytes_per_count, int lsr_shift, int lsl_shift) {
1615 assert (from == R0 && to == R1 && count == R2, "adjust the implementation below");
1635 prefetch(from, to, 0);
1641 // warning: count is predecreased by the prefetch distance to optimize the inner loop
1648 prefetch(from, to, offset);
1660 prefetch(from, to, bytes_per_loop + pld_offset);
1675 // if write is not split, use less registers in first set to reduce locking
1691 prefetch(from, to, pld_offset, bytes_per_loop);
1704 // write the first half as soon as possible to reduce stm locking
1705 __ stmia(to, RegisterSet(R3, R6), writeback, prefetch_before ? gt : ge);
1718 bulk_store_forward(to, data_regs, 8);
1721 __ stmia(to, RegisterSet(R7, R10), writeback, prefetch_before ? gt : ge);
1723 __ stmia(to, RegisterSet(R3, R10), writeback, prefetch_before ? gt : ge);
1726 __ b(L_shifted_loop, gt); // no need to loop if 0 (when count need not be precise modulo bytes_per_loop)
1729 // the first loop may end earlier, allowing to skip pld at the end
1732 __ stmia(to, RegisterSet(R3, R10), writeback); // stmia was skipped
1757 bulk_store_forward(to, data_regs, 4);
1769 bulk_store_forward(to, data_regs, 2);
1779 __ str(R3, Address(to, 8, post_indexed));
1786 // It remains less than wordSize to write.
1791 __ cmp_32(count, have_bytes/bytes_per_count); // do we have enough bytes to store?
1801 __ str_w(R3, Address(to, 4, post_indexed));
1811 __ strh(R3, Address(to, 2, post_indexed));
1821 __ strb(R3, Address(to, 1, post_indexed));
1837 __ stmia(to, RegisterSet(R3, R6), writeback, ne);
1845 __ stmia(to, RegisterSet(R3, R4), writeback, ne);
1851 __ str(R3, Address(to, 4, post_indexed), ne);
1855 __ strh(R3, Address(to, 2, post_indexed), ne); // one last short
1869 __ stmia(to, RegisterSet(R3, R6), writeback, ne);
1877 __ stmia(to, RegisterSet(R3, R4), writeback, ne);
1883 __ str(R3, Address(to, 4, post_indexed), ne);
1889 // Note: R3 might contain enough bytes ready to write (3 needed at most),
1900 __ strh(R3, Address(to, 2, post_indexed), ge); // two last bytes
1904 __ strb(R3, Address(to, 1, post_indexed), ne); // one last byte
1922 // lsl_shift: shift applied to 'old' value to skipped already written bytes
1923 // lsr_shift: shift applied to 'new' value to set the low bytes of the next write
1941 // --> (*--to) = (R3 << lsl_shift) | (*--from) >> lsr_shift); ...
1977 // warning: count is predecreased by the prefetch distance to optimize the inner loop
2037 // store early to reduce locking issues
2056 __ b(L_shifted_loop, gt); // no need to loop if 0 (when count need not be precise modulo bytes_per_loop)
2059 // the first loop may end earlier, allowing to skip pld at the end
2120 // It remains less than wordSize to write.
2125 __ cmp_32(count, have_bytes/bytes_per_count); // do we have enough bytes to store?
2140 __ logical_shift_left(R12, R12, 4*BitsPerByte); // Promote remaining bytes to MSB
2151 __ logical_shift_left(R12, R12, 2*BitsPerByte); // Promote remaining bytes to MSB
2233 // Note: R12 contains enough bytes ready to write (3 needed at most)
2237 // promote remaining to MSB
2272 // Stores one 'size_in_bytes'-sized value to 'to' in given direction (see load_one)
2273 void store_one(Register rd, Register to, int size_in_bytes, bool forward) {
2274 assert_different_registers(to, rd);
2275 Address addr = get_addr_with_indexing(to, size_in_bytes, forward);
2300 void store_one(Register rd, Register to, int size_in_bytes, bool forward, AsmCondition cond = al, Register rd2 = noreg) {
2301 assert_different_registers(to, rd, rd2);
2303 Address addr = get_addr_with_indexing(to, size_in_bytes, forward);
2310 __ stmia(to, RegisterSet(rd) | rd2, writeback, cond);
2312 __ stmdb(to, RegisterSet(rd) | rd2, writeback, cond);
2318 // Copies data from 'from' to 'to' in specified direction to align 'from' by 64 bits.
2322 // from: beginning (if forward) or upper bound (if !forward) of the region to be read
2323 // to: beginning (if forward) or upper bound (if !forward) of the region to be written
2329 // 'from' and 'to' must be aligned by 'bytes_per_count'
2331 // shifts 'from' and 'to' by the number of copied bytes in corresponding direction
2335 int align_src(Register from, Register to, Register count, Register tmp, int bytes_per_count, bool forward) {
2336 assert_different_registers(from, to, count, tmp);
2345 store_one(tmp, to, 1, forward);
2354 store_one(tmp, to, 2, forward);
2363 store_one(tmp, to, 4, forward);
2374 store_one(tmp, to, bytes_per_count, forward, ne);
2386 // from: beginning (if forward) or upper bound (if !forward) of the region to be read
2387 // to: beginning (if forward) or upper bound (if !forward) of the region to be written
2388 // count: 32-bit int, number of elements to be copied
2394 // shifts 'from' and 'to'
2395 void copy_small_array(Register from, Register to, Register count, Register tmp, Register tmp2, int bytes_per_count, bool forward, Label & entry) {
2396 assert_different_registers(from, to, count, tmp);
2407 store_one(tmp, to, bytes_per_count, forward);
2414 store_one(tmp, to, bytes_per_count, forward, al, tmp2);
2422 // Aligns 'to' by reading one word from 'from' and writting its part to 'to'.
2425 // to: beginning (if forward) or upper bound (if !forward) of the region to be written
2426 // count: 32-bit int, number of elements allowed to be copied
2427 // to_remainder: remainder of dividing 'to' by wordSize
2431 // its' LSBs (if forward) or MSBs (if !forward) are to be written to align 'to'.
2435 // 'to' must be aligned by bytes_per_count but must not be aligned by wordSize
2436 // shifts 'to' by the number of written bytes (so that it becomes the bound of memory to be written)
2438 // Rval's MSBs or LSBs remain to be written further by generate_{forward,backward}_shifted_copy_loop
2439 int align_dst(Register to, Register count, Register Rval, Register tmp,
2441 assert_different_registers(to, count, tmp, Rval);
2444 assert (to_remainder % bytes_per_count == 0, "to must be aligned by bytes_per_count");
2456 store_one(Rval, to, s, forward);
2459 store_one(tmp, to, s, forward);
2463 store_one(tmp, to, s, forward);
2480 // from: beginning (if forward) or upper bound (if !forward) of the region to be read
2481 // to: beginning (if forward) or upper bound (if !forward) of the region to be written
2482 // count: 32-bit int, number of elements to be copied
2483 // to_remainder: remainder of dividing 'to' by wordSize
2492 // 'to' must be aligned by bytes_per_count but must not be aligned by wordSize
2493 // shifts 'to' by the number of copied bytes
2496 int align_dst_and_generate_shifted_copy_loop(Register from, Register to, Register count, Register Rval,
2502 assert_different_registers(from, to, count, Rval, tmp);
2504 int required_to_align = align_dst(to, count, Rval, tmp, to_remainder, bytes_per_count, forward);
2511 min_copy = generate_forward_shifted_copy_loop(from, to, count, bytes_per_count, lsr_shift, lsl_shift);
2513 min_copy = generate_backward_shifted_copy_loop(from, to, count, bytes_per_count, lsr_shift, lsl_shift);
2522 // from: beginning (if forward) or upper bound (if !forward) of the region to be read
2523 // to: beginning (if forward) or upper bound (if !forward) of the region to be written
2524 // count: 32-bit int, number of elements to be copied
2531 // 'to' must be aligned by bytes_per_count but must not be aligned by wordSize
2532 // shifts 'to' by the number of copied bytes
2535 // On AArch64 also scratches R4-R10, on 32-bit ARM saves them to use.
2536 int align_dst_and_generate_shifted_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool forward) {
2543 // then the remainder of 'to' divided by wordSize is one of elements of {seq}.
2554 min_copy = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 4, bytes_per_count, forward);
2560 __ tbz(to, 1, L4);
2561 __ tbz(to, 2, L2);
2564 int min_copy6 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 6, bytes_per_count, forward);
2568 int min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
2572 int min_copy4 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 4, bytes_per_count, forward);
2583 __ tbz(to, 0, L246);
2584 __ tbz(to, 1, L15);
2585 __ tbz(to, 2, L3);
2588 int min_copy7 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 7, bytes_per_count, forward);
2592 __ tbnz(to, 1, L26);
2595 int min_copy4 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 4, bytes_per_count, forward);
2599 __ tbz(to, 2, L1);
2602 int min_copy5 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 5, bytes_per_count, forward);
2606 int min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
2610 __ tbz(to, 2, L2);
2613 int min_copy6 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 6, bytes_per_count, forward);
2617 int min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
2621 int min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
2644 min_copy = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
2654 __ tbz(to, 0, L2);
2655 __ tbz(to, 1, L1);
2658 min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
2662 min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
2666 min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
2668 __ tbz(to, 0, L2);
2669 __ tbnz(to, 1, L3);
2672 min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
2676 min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
2680 min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
2720 // "from" and "to" addresses are assumed to be heapword aligned.
2722 // If "disjoint" is true, arrays are assumed to be disjoint, otherwise they may overlap and
2723 // "nooverlap_target" must be specified as the address to jump if they don't.
2727 // to: R1
2736 const Register to = R1; // destination array address
2760 // Set 'from' and 'to' to upper bounds
2762 __ add_ptr_scaled_int32(to, to, count, log_bytes_per_count);
2788 int count_required_to_align = from_is_aligned ? 0 : align_src(from, to, count, tmp1, bytes_per_count, forward);
2796 // 'to' is aligned by bytes_per_count, so it is aligned by wordSize
2800 // Originally 'from' and 'to' were heapword aligned;
2801 // (from - to) has not been changed, so since now 'from' is 8-byte aligned, then it is also heapword aligned,
2802 // so 'to' is also heapword aligned and thus aligned by wordSize.
2811 __ tst(to, wordSize - 1);
2812 __ b(L_unaligned_dst, ne); // 'to' is not aligned
2815 // 'from' and 'to' are properly aligned
2819 min_copy = generate_forward_aligned_copy_loop (from, to, count, bytes_per_count);
2821 min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count);
2832 copy_small_array(from, to, count, tmp1, tmp2, bytes_per_count, forward, L_small_array /* entry */);
2843 int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
2922 // Note: LR can be scratched but might be equal to addr, count or tmp
2943 // Safer to save R9 here since callers may have been written
2946 // is scratched. Note that the optimization might not be to
2994 // Generates pattern of code to be placed after raw data copying in generate_oop_copy
2998 // to: destination pointer after copying.
2999 // if 'forward' then 'to' == upper bound, else 'to' == beginning of the modified region
3002 // Blows all volatile (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) and 'to', 'count', 'tmp' registers.
3003 void oop_arraycopy_stub_epilogue_helper(Register to, Register count, Register tmp, bool status, bool forward) {
3004 assert_different_registers(to, count, tmp);
3007 // 'to' is upper bound of the modified region
3009 __ sub_ptr_scaled_int32(to, to, count, LogBytesPerHeapOop);
3012 // 'to' is the beginning of the region
3014 gen_write_ref_array_post_barrier(to, count, tmp);
3030 // "from" and "to" addresses are assumed to be heapword aligned.
3032 // If "disjoint" is true, arrays are assumed to be disjoint, otherwise they may overlap and
3033 // "nooverlap_target" must be specified as the address to jump if they don't.
3037 // to: R1
3046 Register to = R1;
3075 // LR is used later to save barrier args
3083 gen_write_ref_array_pre_barrier(to, count, callee_saved_regs);
3090 __ add_ptr_scaled_int32(to, to, count, log_bytes_per_count);
3107 int count_required_to_align = from_is_aligned ? 0 : align_src(from, to, count, tmp1, bytes_per_count, forward);
3115 // 'to' is aligned by bytes_per_count, so it is aligned by wordSize
3119 // Originally 'from' and 'to' were heapword aligned;
3120 // (from - to) has not been changed, so since now 'from' is 8-byte aligned, then it is also heapword aligned,
3121 // so 'to' is also heapword aligned and thus aligned by wordSize.
3130 __ tst(to, wordSize - 1);
3131 __ b(L_unaligned_dst, ne); // 'to' is not aligned
3136 min_copy = generate_forward_aligned_copy_loop(from, to, count, bytes_per_count);
3138 min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count);
3142 oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);
3145 copy_small_array(from, to, count, tmp1, noreg, bytes_per_count, forward, L_small_array);
3147 oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);
3158 int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
3161 oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);
3173 // to: R1
3177 // to a long, int, short, or byte copy loop.
3285 // Load next super to check
3293 // A miss means we are NOT a subtype and need to keep looping
3301 // Jump to success
3312 // to: R1
3324 const Register to = R1; // destination array address
3348 gen_write_ref_array_pre_barrier(to, count, callee_saved_regs);
3372 // nothing to copy
3389 __ store_heap_oop(R5, Address(to, BytesPerHeapOop, post_indexed)); // store the oop, changes flags
3393 __ str(R5, Address(to, BytesPerHeapOop, post_indexed)); // store the oop
3405 // branch to this on success:
3409 // It was a real error; we must depend on the caller to finish the job.
3412 // and report their number to the caller (0 or (-1^n))
3424 __ sub(to, to, AsmOperand(copied, lsl, LogBytesPerHeapOop)); // initial to value
3427 gen_write_ref_array_post_barrier(to, R12, R3);
3493 // R0 < 0 - need to call System.arraycopy
3536 // Assembler stubs will be used for this call to arraycopy
3617 // next registers should be set before the jump to corresponding stub
3619 const Register to = R1; // destination array address
3622 // 'from', 'to', 'count' registers should be set in this order
3639 __ add_ptr_scaled_int32(to, dst, dst_pos, LogBytesPerShort);
3645 __ add_ptr_scaled_int32(to, dst, dst_pos, LogBytesPerInt);
3651 __ add_ptr_scaled_int32(to, dst, dst_pos, 0);
3657 __ add_ptr_scaled_int32(to, dst, dst_pos, LogBytesPerLong);
3663 BLOCK_COMMENT("scale indexes to element size");
3665 __ add(to, dst, AsmOperand(dst_pos, lsl, R12_elsize)); // dst_addr
3703 // next registers should be set before the jump to corresponding stub
3705 const Register to = R1; // destination array address
3711 __ add_ptr_scaled_int32(to, dst, dst_pos, LogBytesPerHeapOop); // dst_addr
3730 // It is safe to examine both src.length and dst.length.
3735 // next registers should be set before the jump to corresponding stub
3737 const Register to = R1; // destination array address
3744 __ add_ptr_scaled_int32(to, dst, dst_pos, LogBytesPerHeapOop); // dst_addr
3749 assert_different_registers(from, to, count, sco_temp,
3837 // With this flag, the C2 stubs are tested by generating calls to
3841 // and the result is tested to see whether the arraycopy stub should
3845 // arraycopy methods callable from C2 generic_arraycopy to set the
3846 // status to 0 for those who always succeed (calling the slow path stub might
3847 // lead to errors since the copy has already been performed).
3914 const Register to = c_rarg1; // destination array address
3994 __ vst1(V0, Address(to), MacroAssembler::VELEM_SIZE_8, 128);
4019 const Register to = c_rarg1; // destination array address
4100 __ vst1(V0, Address(to), MacroAssembler::VELEM_SIZE_8, 128);
4132 const Register to = c_rarg1; // destination array address
4215 __ vst1(V0, Address(to, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
4250 const Register to = c_rarg1; // destination array address
4336 __ vst1(V0, Address(to, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
4384 // Any extra arguments are already supposed to be R1 and R2
4415 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
4448 // These entry points require SharedInfo::stack0 to be set up in non-core builds
4449 // and need to be relocatable, so they each fabricate a RuntimeStub internally.