1/*
2 * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26#include "precompiled.hpp"
27#include "asm/macroAssembler.inline.hpp"
28#include "code/debugInfoRec.hpp"
29#include "code/icBuffer.hpp"
30#include "code/vtableStubs.hpp"
31#include "interpreter/interpreter.hpp"
32#include "interpreter/interp_masm.hpp"
33#include "memory/resourceArea.hpp"
34#include "oops/compiledICHolder.hpp"
35#include "registerSaver_s390.hpp"
36#include "runtime/sharedRuntime.hpp"
37#include "runtime/vframeArray.hpp"
38#include "utilities/align.hpp"
39#include "vmreg_s390.inline.hpp"
40#ifdef COMPILER1
41#include "c1/c1_Runtime1.hpp"
42#endif
43#ifdef COMPILER2
44#include "opto/ad.hpp"
45#include "opto/runtime.hpp"
46#endif
47
48#ifdef PRODUCT
49#define __ masm->
50#else
51#define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)->
52#endif
53
54#define BLOCK_COMMENT(str) __ block_comment(str)
55#define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
56
57#define RegisterSaver_LiveIntReg(regname) \
58  { RegisterSaver::int_reg,   regname->encoding(), regname->as_VMReg() }
59
60#define RegisterSaver_LiveFloatReg(regname) \
61  { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
62
63// Registers which are not saved/restored, but still they have got a frame slot.
64// Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2
65#define RegisterSaver_ExcludedIntReg(regname) \
66  { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
67
68// Registers which are not saved/restored, but still they have got a frame slot.
69// Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2.
70#define RegisterSaver_ExcludedFloatReg(regname) \
71  { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
72
73static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
74  // Live registers which get spilled to the stack. Register positions
75  // in this array correspond directly to the stack layout.
76  //
77  // live float registers:
78  //
79  RegisterSaver_LiveFloatReg(Z_F0 ),
80  // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
81  RegisterSaver_LiveFloatReg(Z_F2 ),
82  RegisterSaver_LiveFloatReg(Z_F3 ),
83  RegisterSaver_LiveFloatReg(Z_F4 ),
84  RegisterSaver_LiveFloatReg(Z_F5 ),
85  RegisterSaver_LiveFloatReg(Z_F6 ),
86  RegisterSaver_LiveFloatReg(Z_F7 ),
87  RegisterSaver_LiveFloatReg(Z_F8 ),
88  RegisterSaver_LiveFloatReg(Z_F9 ),
89  RegisterSaver_LiveFloatReg(Z_F10),
90  RegisterSaver_LiveFloatReg(Z_F11),
91  RegisterSaver_LiveFloatReg(Z_F12),
92  RegisterSaver_LiveFloatReg(Z_F13),
93  RegisterSaver_LiveFloatReg(Z_F14),
94  RegisterSaver_LiveFloatReg(Z_F15),
95  //
96  // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
97  // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
98  RegisterSaver_LiveIntReg(Z_R2 ),
99  RegisterSaver_LiveIntReg(Z_R3 ),
100  RegisterSaver_LiveIntReg(Z_R4 ),
101  RegisterSaver_LiveIntReg(Z_R5 ),
102  RegisterSaver_LiveIntReg(Z_R6 ),
103  RegisterSaver_LiveIntReg(Z_R7 ),
104  RegisterSaver_LiveIntReg(Z_R8 ),
105  RegisterSaver_LiveIntReg(Z_R9 ),
106  RegisterSaver_LiveIntReg(Z_R10),
107  RegisterSaver_LiveIntReg(Z_R11),
108  RegisterSaver_LiveIntReg(Z_R12),
109  RegisterSaver_LiveIntReg(Z_R13),
110  // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
111  // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
112};
113
114static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = {
115  // Live registers which get spilled to the stack. Register positions
116  // in this array correspond directly to the stack layout.
117  //
118  // live float registers: All excluded, but still they get a stack slot to get same frame size.
119  //
120  RegisterSaver_ExcludedFloatReg(Z_F0 ),
121  // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
122  RegisterSaver_ExcludedFloatReg(Z_F2 ),
123  RegisterSaver_ExcludedFloatReg(Z_F3 ),
124  RegisterSaver_ExcludedFloatReg(Z_F4 ),
125  RegisterSaver_ExcludedFloatReg(Z_F5 ),
126  RegisterSaver_ExcludedFloatReg(Z_F6 ),
127  RegisterSaver_ExcludedFloatReg(Z_F7 ),
128  RegisterSaver_ExcludedFloatReg(Z_F8 ),
129  RegisterSaver_ExcludedFloatReg(Z_F9 ),
130  RegisterSaver_ExcludedFloatReg(Z_F10),
131  RegisterSaver_ExcludedFloatReg(Z_F11),
132  RegisterSaver_ExcludedFloatReg(Z_F12),
133  RegisterSaver_ExcludedFloatReg(Z_F13),
134  RegisterSaver_ExcludedFloatReg(Z_F14),
135  RegisterSaver_ExcludedFloatReg(Z_F15),
136  //
137  // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
138  // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
139  RegisterSaver_LiveIntReg(Z_R2 ),
140  RegisterSaver_LiveIntReg(Z_R3 ),
141  RegisterSaver_LiveIntReg(Z_R4 ),
142  RegisterSaver_LiveIntReg(Z_R5 ),
143  RegisterSaver_LiveIntReg(Z_R6 ),
144  RegisterSaver_LiveIntReg(Z_R7 ),
145  RegisterSaver_LiveIntReg(Z_R8 ),
146  RegisterSaver_LiveIntReg(Z_R9 ),
147  RegisterSaver_LiveIntReg(Z_R10),
148  RegisterSaver_LiveIntReg(Z_R11),
149  RegisterSaver_LiveIntReg(Z_R12),
150  RegisterSaver_LiveIntReg(Z_R13),
151  // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
152  // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
153};
154
155static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = {
156  // Live registers which get spilled to the stack. Register positions
157  // in this array correspond directly to the stack layout.
158  //
159  // live float registers:
160  //
161  RegisterSaver_LiveFloatReg(Z_F0 ),
162  // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
163  RegisterSaver_LiveFloatReg(Z_F2 ),
164  RegisterSaver_LiveFloatReg(Z_F3 ),
165  RegisterSaver_LiveFloatReg(Z_F4 ),
166  RegisterSaver_LiveFloatReg(Z_F5 ),
167  RegisterSaver_LiveFloatReg(Z_F6 ),
168  RegisterSaver_LiveFloatReg(Z_F7 ),
169  RegisterSaver_LiveFloatReg(Z_F8 ),
170  RegisterSaver_LiveFloatReg(Z_F9 ),
171  RegisterSaver_LiveFloatReg(Z_F10),
172  RegisterSaver_LiveFloatReg(Z_F11),
173  RegisterSaver_LiveFloatReg(Z_F12),
174  RegisterSaver_LiveFloatReg(Z_F13),
175  RegisterSaver_LiveFloatReg(Z_F14),
176  RegisterSaver_LiveFloatReg(Z_F15),
177  //
178  // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
179  // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
180  RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2.
181  RegisterSaver_LiveIntReg(Z_R3 ),
182  RegisterSaver_LiveIntReg(Z_R4 ),
183  RegisterSaver_LiveIntReg(Z_R5 ),
184  RegisterSaver_LiveIntReg(Z_R6 ),
185  RegisterSaver_LiveIntReg(Z_R7 ),
186  RegisterSaver_LiveIntReg(Z_R8 ),
187  RegisterSaver_LiveIntReg(Z_R9 ),
188  RegisterSaver_LiveIntReg(Z_R10),
189  RegisterSaver_LiveIntReg(Z_R11),
190  RegisterSaver_LiveIntReg(Z_R12),
191  RegisterSaver_LiveIntReg(Z_R13),
192  // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
193  // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
194};
195
196// Live argument registers which get spilled to the stack.
197static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = {
198  RegisterSaver_LiveFloatReg(Z_FARG1),
199  RegisterSaver_LiveFloatReg(Z_FARG2),
200  RegisterSaver_LiveFloatReg(Z_FARG3),
201  RegisterSaver_LiveFloatReg(Z_FARG4),
202  RegisterSaver_LiveIntReg(Z_ARG1),
203  RegisterSaver_LiveIntReg(Z_ARG2),
204  RegisterSaver_LiveIntReg(Z_ARG3),
205  RegisterSaver_LiveIntReg(Z_ARG4),
206  RegisterSaver_LiveIntReg(Z_ARG5)
207};
208
209static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
210  // Live registers which get spilled to the stack. Register positions
211  // in this array correspond directly to the stack layout.
212  //
213  // live float registers:
214  //
215  RegisterSaver_LiveFloatReg(Z_F0 ),
216  // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
217  RegisterSaver_LiveFloatReg(Z_F2 ),
218  RegisterSaver_LiveFloatReg(Z_F3 ),
219  RegisterSaver_LiveFloatReg(Z_F4 ),
220  RegisterSaver_LiveFloatReg(Z_F5 ),
221  RegisterSaver_LiveFloatReg(Z_F6 ),
222  RegisterSaver_LiveFloatReg(Z_F7 ),
223  // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile
224  // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile
225  // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile
226  // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile
227  // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile
228  // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile
229  // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile
230  // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile
231  //
232  // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
233  // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
234  RegisterSaver_LiveIntReg(Z_R2 ),
235  RegisterSaver_LiveIntReg(Z_R3 ),
236  RegisterSaver_LiveIntReg(Z_R4 ),
237  RegisterSaver_LiveIntReg(Z_R5 ),
238  // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile
239  // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile
240  // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile
241  // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile
242  // RegisterSaver_LiveIntReg(Z_R10), // non-volatile
243  // RegisterSaver_LiveIntReg(Z_R11), // non-volatile
244  // RegisterSaver_LiveIntReg(Z_R12), // non-volatile
245  // RegisterSaver_LiveIntReg(Z_R13), // non-volatile
246  // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
247  // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
248};
249
250int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
251  int reg_space = -1;
252  switch (reg_set) {
253    case all_registers:           reg_space = sizeof(RegisterSaver_LiveRegs); break;
254    case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break;
255    case all_integer_registers:   reg_space = sizeof(RegisterSaver_LiveIntRegs); break;
256    case all_volatile_registers:  reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break;
257    case arg_registers:           reg_space = sizeof(RegisterSaver_LiveArgRegs); break;
258    default: ShouldNotReachHere();
259  }
260  return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
261}
262
263
264int RegisterSaver::live_reg_frame_size(RegisterSet reg_set) {
265  return live_reg_save_size(reg_set) + frame::z_abi_160_size;
266}
267
268
269// return_pc: Specify the register that should be stored as the return pc in the current frame.
270OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc) {
271  // Record volatile registers as callee-save values in an OopMap so
272  // their save locations will be propagated to the caller frame's
273  // RegisterMap during StackFrameStream construction (needed for
274  // deoptimization; see compiledVFrame::create_stack_value).
275
276  // Calculate frame size.
277  const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
278  const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
279  const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
280
281  // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
282  OopMap* map = new OopMap(frame_size_in_slots, 0);
283
284  int regstosave_num = 0;
285  const RegisterSaver::LiveRegType* live_regs = NULL;
286
287  switch (reg_set) {
288    case all_registers:
289      regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
290      live_regs      = RegisterSaver_LiveRegs;
291      break;
292    case all_registers_except_r2:
293      regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
294      live_regs      = RegisterSaver_LiveRegsWithoutR2;
295      break;
296    case all_integer_registers:
297      regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
298      live_regs      = RegisterSaver_LiveIntRegs;
299      break;
300    case all_volatile_registers:
301      regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
302      live_regs      = RegisterSaver_LiveVolatileRegs;
303      break;
304    case arg_registers:
305      regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
306      live_regs      = RegisterSaver_LiveArgRegs;
307      break;
308    default: ShouldNotReachHere();
309  }
310
311  // Save return pc in old frame.
312  __ save_return_pc(return_pc);
313
314  // Push a new frame (includes stack linkage).
315  // use return_pc as scratch for push_frame. Z_R0_scratch (the default) and Z_R1_scratch are
316  // illegally used to pass parameters (SAPJVM extension) by RangeCheckStub::emit_code().
317  __ push_frame(frame_size_in_bytes, return_pc);
318  // We have to restore return_pc right away.
319  // Nobody else will. Furthermore, return_pc isn't necessarily the default (Z_R14).
320  // Nobody else knows which register we saved.
321  __ z_lg(return_pc, _z_abi16(return_pc) + frame_size_in_bytes, Z_SP);
322
323  // Register save area in new frame starts above z_abi_160 area.
324  int offset = register_save_offset;
325
326  Register first = noreg;
327  Register last  = noreg;
328  int      first_offset = -1;
329  bool     float_spilled = false;
330
331  for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
332    int reg_num  = live_regs[i].reg_num;
333    int reg_type = live_regs[i].reg_type;
334
335    switch (reg_type) {
336      case RegisterSaver::int_reg: {
337        Register reg = as_Register(reg_num);
338        if (last != reg->predecessor()) {
339          if (first != noreg) {
340            __ z_stmg(first, last, first_offset, Z_SP);
341          }
342          first = reg;
343          first_offset = offset;
344          DEBUG_ONLY(float_spilled = false);
345        }
346        last = reg;
347        assert(last != Z_R0, "r0 would require special treatment");
348        assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
349        break;
350      }
351
352      case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot.
353        continue; // Continue with next loop iteration.
354
355      case RegisterSaver::float_reg: {
356        FloatRegister freg = as_FloatRegister(reg_num);
357        __ z_std(freg, offset, Z_SP);
358        DEBUG_ONLY(float_spilled = true);
359        break;
360      }
361
362      default:
363        ShouldNotReachHere();
364        break;
365    }
366
367    // Second set_callee_saved is really a waste but we'll keep things as they were for now
368    map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
369    map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
370  }
371  assert(first != noreg, "Should spill at least one int reg.");
372  __ z_stmg(first, last, first_offset, Z_SP);
373
374  // And we're done.
375  return map;
376}
377
378
379// Generate the OopMap (again, regs where saved before).
380OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) {
381  // Calculate frame size.
382  const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
383  const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
384  const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
385
386  // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
387  OopMap* map = new OopMap(frame_size_in_slots, 0);
388
389  int regstosave_num = 0;
390  const RegisterSaver::LiveRegType* live_regs = NULL;
391
392  switch (reg_set) {
393    case all_registers:
394      regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
395      live_regs      = RegisterSaver_LiveRegs;
396      break;
397    case all_registers_except_r2:
398      regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
399      live_regs      = RegisterSaver_LiveRegsWithoutR2;
400      break;
401    case all_integer_registers:
402      regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
403      live_regs      = RegisterSaver_LiveIntRegs;
404      break;
405    case all_volatile_registers:
406      regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
407      live_regs      = RegisterSaver_LiveVolatileRegs;
408      break;
409    case arg_registers:
410      regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
411      live_regs      = RegisterSaver_LiveArgRegs;
412      break;
413    default: ShouldNotReachHere();
414  }
415
416  // Register save area in new frame starts above z_abi_160 area.
417  int offset = register_save_offset;
418  for (int i = 0; i < regstosave_num; i++) {
419    if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
420      map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
421      map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
422    }
423    offset += reg_size;
424  }
425  return map;
426}
427
428
429// Pop the current frame and restore all the registers that we saved.
430void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set) {
431  int offset;
432  const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set);
433
434  Register first = noreg;
435  Register last = noreg;
436  int      first_offset = -1;
437  bool     float_spilled = false;
438
439  int regstosave_num = 0;
440  const RegisterSaver::LiveRegType* live_regs = NULL;
441
442  switch (reg_set) {
443    case all_registers:
444      regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);;
445      live_regs      = RegisterSaver_LiveRegs;
446      break;
447    case all_registers_except_r2:
448      regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
449      live_regs      = RegisterSaver_LiveRegsWithoutR2;
450      break;
451    case all_integer_registers:
452      regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
453      live_regs      = RegisterSaver_LiveIntRegs;
454      break;
455    case all_volatile_registers:
456      regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);;
457      live_regs      = RegisterSaver_LiveVolatileRegs;
458      break;
459    case arg_registers:
460      regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
461      live_regs      = RegisterSaver_LiveArgRegs;
462      break;
463    default: ShouldNotReachHere();
464  }
465
466  // Restore all registers (ints and floats).
467
468  // Register save area in new frame starts above z_abi_160 area.
469  offset = register_save_offset;
470
471  for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
472    int reg_num  = live_regs[i].reg_num;
473    int reg_type = live_regs[i].reg_type;
474
475    switch (reg_type) {
476      case RegisterSaver::excluded_reg:
477        continue; // Continue with next loop iteration.
478
479      case RegisterSaver::int_reg: {
480        Register reg = as_Register(reg_num);
481        if (last != reg->predecessor()) {
482          if (first != noreg) {
483            __ z_lmg(first, last, first_offset, Z_SP);
484          }
485          first = reg;
486          first_offset = offset;
487          DEBUG_ONLY(float_spilled = false);
488        }
489        last = reg;
490        assert(last != Z_R0, "r0 would require special treatment");
491        assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
492        break;
493      }
494
495      case RegisterSaver::float_reg: {
496        FloatRegister freg = as_FloatRegister(reg_num);
497        __ z_ld(freg, offset, Z_SP);
498        DEBUG_ONLY(float_spilled = true);
499        break;
500      }
501
502      default:
503        ShouldNotReachHere();
504    }
505  }
506  assert(first != noreg, "Should spill at least one int reg.");
507  __ z_lmg(first, last, first_offset, Z_SP);
508
509  // Pop the frame.
510  __ pop_frame();
511
512  // Restore the flags.
513  __ restore_return_pc();
514}
515
516
517// Pop the current frame and restore the registers that might be holding a result.
518void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
519  int i;
520  int offset;
521  const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
522                                   sizeof(RegisterSaver::LiveRegType);
523  const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
524
525  // Restore all result registers (ints and floats).
526  offset = register_save_offset;
527  for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
528    int reg_num = RegisterSaver_LiveRegs[i].reg_num;
529    int reg_type = RegisterSaver_LiveRegs[i].reg_type;
530    switch (reg_type) {
531      case RegisterSaver::excluded_reg:
532        continue; // Continue with next loop iteration.
533      case RegisterSaver::int_reg: {
534        if (as_Register(reg_num) == Z_RET) { // int result_reg
535          __ z_lg(as_Register(reg_num), offset, Z_SP);
536        }
537        break;
538      }
539      case RegisterSaver::float_reg: {
540        if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg
541          __ z_ld(as_FloatRegister(reg_num), offset, Z_SP);
542        }
543        break;
544      }
545      default:
546        ShouldNotReachHere();
547    }
548  }
549}
550
551size_t SharedRuntime::trampoline_size() {
552  return MacroAssembler::load_const_size() + 2;
553}
554
555void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
556  // Think about using pc-relative branch.
557  __ load_const(Z_R1_scratch, destination);
558  __ z_br(Z_R1_scratch);
559}
560
561// ---------------------------------------------------------------------------
562void SharedRuntime::save_native_result(MacroAssembler * masm,
563                                       BasicType ret_type,
564                                       int frame_slots) {
565  Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
566
567  switch (ret_type) {
568    case T_BOOLEAN:  // Save shorter types as int. Do we need sign extension at restore??
569    case T_BYTE:
570    case T_CHAR:
571    case T_SHORT:
572    case T_INT:
573      __ reg2mem_opt(Z_RET, memaddr, false);
574      break;
575    case T_OBJECT:   // Save pointer types as long.
576    case T_ARRAY:
577    case T_ADDRESS:
578    case T_VOID:
579    case T_LONG:
580      __ reg2mem_opt(Z_RET, memaddr);
581      break;
582    case T_FLOAT:
583      __ freg2mem_opt(Z_FRET, memaddr, false);
584      break;
585    case T_DOUBLE:
586      __ freg2mem_opt(Z_FRET, memaddr);
587      break;
588  }
589}
590
591void SharedRuntime::restore_native_result(MacroAssembler *masm,
592                                          BasicType       ret_type,
593                                          int             frame_slots) {
594  Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
595
596  switch (ret_type) {
597    case T_BOOLEAN:  // Restore shorter types as int. Do we need sign extension at restore??
598    case T_BYTE:
599    case T_CHAR:
600    case T_SHORT:
601    case T_INT:
602      __ mem2reg_opt(Z_RET, memaddr, false);
603      break;
604    case T_OBJECT:   // Restore pointer types as long.
605    case T_ARRAY:
606    case T_ADDRESS:
607    case T_VOID:
608    case T_LONG:
609      __ mem2reg_opt(Z_RET, memaddr);
610      break;
611    case T_FLOAT:
612      __ mem2freg_opt(Z_FRET, memaddr, false);
613      break;
614    case T_DOUBLE:
615      __ mem2freg_opt(Z_FRET, memaddr);
616      break;
617  }
618}
619
620// ---------------------------------------------------------------------------
621// Read the array of BasicTypes from a signature, and compute where the
622// arguments should go. Values in the VMRegPair regs array refer to 4-byte
623// quantities. Values less than VMRegImpl::stack0 are registers, those above
624// refer to 4-byte stack slots. All stack slots are based off of the stack pointer
625// as framesizes are fixed.
626// VMRegImpl::stack0 refers to the first slot 0(sp).
627// VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers
628// up to RegisterImpl::number_of_registers are the 64-bit integer registers.
629
630// Note: the INPUTS in sig_bt are in units of Java argument words, which are
631// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
632// units regardless of build.
633
634// The Java calling convention is a "shifted" version of the C ABI.
635// By skipping the first C ABI register we can call non-static jni methods
636// with small numbers of arguments without having to shuffle the arguments
637// at all. Since we control the java ABI we ought to at least get some
638// advantage out of it.
639int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
640                                           VMRegPair *regs,
641                                           int total_args_passed,
642                                           int is_outgoing) {
643  // c2c calling conventions for compiled-compiled calls.
644
645  // An int/float occupies 1 slot here.
646  const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats.
647  const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
648
649  const VMReg z_iarg_reg[5] = {
650    Z_R2->as_VMReg(),
651    Z_R3->as_VMReg(),
652    Z_R4->as_VMReg(),
653    Z_R5->as_VMReg(),
654    Z_R6->as_VMReg()
655  };
656  const VMReg z_farg_reg[4] = {
657    Z_F0->as_VMReg(),
658    Z_F2->as_VMReg(),
659    Z_F4->as_VMReg(),
660    Z_F6->as_VMReg()
661  };
662  const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
663  const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
664
665  assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
666  assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
667
668  int i;
669  int stk = 0;
670  int ireg = 0;
671  int freg = 0;
672
673  for (int i = 0; i < total_args_passed; ++i) {
674    switch (sig_bt[i]) {
675      case T_BOOLEAN:
676      case T_CHAR:
677      case T_BYTE:
678      case T_SHORT:
679      case T_INT:
680        if (ireg < z_num_iarg_registers) {
681          // Put int/ptr in register.
682          regs[i].set1(z_iarg_reg[ireg]);
683          ++ireg;
684        } else {
685          // Put int/ptr on stack.
686          regs[i].set1(VMRegImpl::stack2reg(stk));
687          stk += inc_stk_for_intfloat;
688        }
689        break;
690      case T_LONG:
691        assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
692        if (ireg < z_num_iarg_registers) {
693          // Put long in register.
694          regs[i].set2(z_iarg_reg[ireg]);
695          ++ireg;
696        } else {
697          // Put long on stack and align to 2 slots.
698          if (stk & 0x1) { ++stk; }
699          regs[i].set2(VMRegImpl::stack2reg(stk));
700          stk += inc_stk_for_longdouble;
701        }
702        break;
703      case T_OBJECT:
704      case T_ARRAY:
705      case T_ADDRESS:
706        if (ireg < z_num_iarg_registers) {
707          // Put ptr in register.
708          regs[i].set2(z_iarg_reg[ireg]);
709          ++ireg;
710        } else {
711          // Put ptr on stack and align to 2 slots, because
712          // "64-bit pointers record oop-ishness on 2 aligned adjacent
713          // registers." (see OopFlow::build_oop_map).
714          if (stk & 0x1) { ++stk; }
715          regs[i].set2(VMRegImpl::stack2reg(stk));
716          stk += inc_stk_for_longdouble;
717        }
718        break;
719      case T_FLOAT:
720        if (freg < z_num_farg_registers) {
721          // Put float in register.
722          regs[i].set1(z_farg_reg[freg]);
723          ++freg;
724        } else {
725          // Put float on stack.
726          regs[i].set1(VMRegImpl::stack2reg(stk));
727          stk += inc_stk_for_intfloat;
728        }
729        break;
730      case T_DOUBLE:
731        assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
732        if (freg < z_num_farg_registers) {
733          // Put double in register.
734          regs[i].set2(z_farg_reg[freg]);
735          ++freg;
736        } else {
737          // Put double on stack and align to 2 slots.
738          if (stk & 0x1) { ++stk; }
739          regs[i].set2(VMRegImpl::stack2reg(stk));
740          stk += inc_stk_for_longdouble;
741        }
742        break;
743      case T_VOID:
744        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
745        // Do not count halves.
746        regs[i].set_bad();
747        break;
748      default:
749        ShouldNotReachHere();
750    }
751  }
752  return align_up(stk, 2);
753}
754
755int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
756                                        VMRegPair *regs,
757                                        VMRegPair *regs2,
758                                        int total_args_passed) {
759  assert(regs2 == NULL, "second VMRegPair array not used on this platform");
760
761  // Calling conventions for C runtime calls and calls to JNI native methods.
762  const VMReg z_iarg_reg[5] = {
763    Z_R2->as_VMReg(),
764    Z_R3->as_VMReg(),
765    Z_R4->as_VMReg(),
766    Z_R5->as_VMReg(),
767    Z_R6->as_VMReg()
768  };
769  const VMReg z_farg_reg[4] = {
770    Z_F0->as_VMReg(),
771    Z_F2->as_VMReg(),
772    Z_F4->as_VMReg(),
773    Z_F6->as_VMReg()
774  };
775  const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
776  const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
777
778  // Check calling conventions consistency.
779  assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
780  assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
781
782  // Avoid passing C arguments in the wrong stack slots.
783
784  // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy
785  // 2 such slots, like 64 bit values do.
786  const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats.
787  const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
788
789  int i;
790  // Leave room for C-compatible ABI
791  int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size;
792  int freg = 0;
793  int ireg = 0;
794
795  // We put the first 5 arguments into registers and the rest on the
796  // stack. Float arguments are already in their argument registers
797  // due to c2c calling conventions (see calling_convention).
798  for (int i = 0; i < total_args_passed; ++i) {
799    switch (sig_bt[i]) {
800      case T_BOOLEAN:
801      case T_CHAR:
802      case T_BYTE:
803      case T_SHORT:
804      case T_INT:
805        // Fall through, handle as long.
806      case T_LONG:
807      case T_OBJECT:
808      case T_ARRAY:
809      case T_ADDRESS:
810      case T_METADATA:
811        // Oops are already boxed if required (JNI).
812        if (ireg < z_num_iarg_registers) {
813          regs[i].set2(z_iarg_reg[ireg]);
814          ++ireg;
815        } else {
816          regs[i].set2(VMRegImpl::stack2reg(stk));
817          stk += inc_stk_for_longdouble;
818        }
819        break;
820      case T_FLOAT:
821        if (freg < z_num_farg_registers) {
822          regs[i].set1(z_farg_reg[freg]);
823          ++freg;
824        } else {
825          regs[i].set1(VMRegImpl::stack2reg(stk+1));
826          stk +=  inc_stk_for_intfloat;
827        }
828        break;
829      case T_DOUBLE:
830        assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
831        if (freg < z_num_farg_registers) {
832          regs[i].set2(z_farg_reg[freg]);
833          ++freg;
834        } else {
835          // Put double on stack.
836          regs[i].set2(VMRegImpl::stack2reg(stk));
837          stk += inc_stk_for_longdouble;
838        }
839        break;
840      case T_VOID:
841        // Do not count halves.
842        regs[i].set_bad();
843        break;
844      default:
845        ShouldNotReachHere();
846    }
847  }
848  return align_up(stk, 2);
849}
850
851////////////////////////////////////////////////////////////////////////
852//
853//  Argument shufflers
854//
855////////////////////////////////////////////////////////////////////////
856
857//----------------------------------------------------------------------
858// The java_calling_convention describes stack locations as ideal slots on
859// a frame with no abi restrictions. Since we must observe abi restrictions
860// (like the placement of the register window) the slots must be biased by
861// the following value.
862//----------------------------------------------------------------------
863static int reg2slot(VMReg r) {
864  return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
865}
866
867static int reg2offset(VMReg r) {
868  return reg2slot(r) * VMRegImpl::stack_slot_size;
869}
870
871static void verify_oop_args(MacroAssembler *masm,
872                            int total_args_passed,
873                            const BasicType *sig_bt,
874                            const VMRegPair *regs) {
875  if (!VerifyOops) { return; }
876
877  for (int i = 0; i < total_args_passed; i++) {
878    if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
879      VMReg r = regs[i].first();
880      assert(r->is_valid(), "bad oop arg");
881
882      if (r->is_stack()) {
883        __ z_lg(Z_R0_scratch,
884                Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
885        __ verify_oop(Z_R0_scratch);
886      } else {
887        __ verify_oop(r->as_Register());
888      }
889    }
890  }
891}
892
893static void gen_special_dispatch(MacroAssembler *masm,
894                                 int total_args_passed,
895                                 vmIntrinsics::ID special_dispatch,
896                                 const BasicType *sig_bt,
897                                 const VMRegPair *regs) {
898  verify_oop_args(masm, total_args_passed, sig_bt, regs);
899
900  // Now write the args into the outgoing interpreter space.
901  bool     has_receiver   = false;
902  Register receiver_reg   = noreg;
903  int      member_arg_pos = -1;
904  Register member_reg     = noreg;
905  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
906
907  if (ref_kind != 0) {
908    member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
909    member_reg = Z_R9;                       // Known to be free at this point.
910    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
911  } else {
912    guarantee(special_dispatch == vmIntrinsics::_invokeBasic, "special_dispatch=%d", special_dispatch);
913    has_receiver = true;
914  }
915
916  if (member_reg != noreg) {
917    // Load the member_arg into register, if necessary.
918    assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
919    assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
920
921    VMReg r = regs[member_arg_pos].first();
922    assert(r->is_valid(), "bad member arg");
923
924    if (r->is_stack()) {
925      __ z_lg(member_reg, Address(Z_SP, reg2offset(r)));
926    } else {
927      // No data motion is needed.
928      member_reg = r->as_Register();
929    }
930  }
931
932  if (has_receiver) {
933    // Make sure the receiver is loaded into a register.
934    assert(total_args_passed > 0, "oob");
935    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
936
937    VMReg r = regs[0].first();
938    assert(r->is_valid(), "bad receiver arg");
939
940    if (r->is_stack()) {
941      // Porting note: This assumes that compiled calling conventions always
942      // pass the receiver oop in a register. If this is not true on some
943      // platform, pick a temp and load the receiver from stack.
944      assert(false, "receiver always in a register");
945      receiver_reg = Z_R13;  // Known to be free at this point.
946      __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r)));
947    } else {
948      // No data motion is needed.
949      receiver_reg = r->as_Register();
950    }
951  }
952
953  // Figure out which address we are really jumping to:
954  MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
955                                                 receiver_reg, member_reg,
956                                                 /*for_compiler_entry:*/ true);
957}
958
959////////////////////////////////////////////////////////////////////////
960//
961//  Argument shufflers
962//
963////////////////////////////////////////////////////////////////////////
964
965// Is the size of a vector size (in bytes) bigger than a size saved by default?
966// 8 bytes registers are saved by default on z/Architecture.
967bool SharedRuntime::is_wide_vector(int size) {
968  // Note, MaxVectorSize == 8 on this platform.
969  assert(size <= 8, "%d bytes vectors are not supported", size);
970  return size > 8;
971}
972
973//----------------------------------------------------------------------
974// An oop arg. Must pass a handle not the oop itself
975//----------------------------------------------------------------------
976static void object_move(MacroAssembler *masm,
977                        OopMap *map,
978                        int oop_handle_offset,
979                        int framesize_in_slots,
980                        VMRegPair src,
981                        VMRegPair dst,
982                        bool is_receiver,
983                        int *receiver_offset) {
984  int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
985
986  assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please.");
987
988  // Must pass a handle. First figure out the location we use as a handle.
989
990  if (src.first()->is_stack()) {
991    // Oop is already on the stack, put handle on stack or in register
992    // If handle will be on the stack, use temp reg to calculate it.
993    Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
994    Label    skip;
995    int      slot_in_older_frame = reg2slot(src.first());
996
997    guarantee(!is_receiver, "expecting receiver in register");
998    map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots));
999
1000    __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP);
1001    __ load_and_test_long(Z_R0, Address(rHandle));
1002    __ z_brne(skip);
1003    // Use a NULL handle if oop is NULL.
1004    __ clear_reg(rHandle, true, false);
1005    __ bind(skip);
1006
1007    // Copy handle to the right place (register or stack).
1008    if (dst.first()->is_stack()) {
1009      __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1010    } // else
1011      // nothing to do. rHandle uses the correct register
1012  } else {
1013    // Oop is passed in an input register. We must flush it to the stack.
1014    const Register rOop = src.first()->as_Register();
1015    const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1016    int            oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
1017    int            oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size;
1018    NearLabel skip;
1019
1020    if (is_receiver) {
1021      *receiver_offset = oop_slot_offset;
1022    }
1023    map->set_oop(VMRegImpl::stack2reg(oop_slot));
1024
1025    // Flush Oop to stack, calculate handle.
1026    __ z_stg(rOop, oop_slot_offset, Z_SP);
1027    __ add2reg(rHandle, oop_slot_offset, Z_SP);
1028
1029    // If Oop == NULL, use a NULL handle.
1030    __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip);
1031    __ clear_reg(rHandle, true, false);
1032    __ bind(skip);
1033
1034    // Copy handle to the right place (register or stack).
1035    if (dst.first()->is_stack()) {
1036      __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1037    } // else
1038      // nothing to do here, since rHandle = dst.first()->as_Register in this case.
1039  }
1040}
1041
1042//----------------------------------------------------------------------
1043// A float arg. May have to do float reg to int reg conversion
1044//----------------------------------------------------------------------
1045static void float_move(MacroAssembler *masm,
1046                       VMRegPair src,
1047                       VMRegPair dst,
1048                       int framesize_in_slots,
1049                       int workspace_slot_offset) {
1050  int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1051  int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size;
1052
1053  // We do not accept an argument in a VMRegPair to be spread over two slots,
1054  // no matter what physical location (reg or stack) the slots may have.
1055  // We just check for the unaccepted slot to be invalid.
1056  assert(!src.second()->is_valid(), "float in arg spread over two slots");
1057  assert(!dst.second()->is_valid(), "float out arg spread over two slots");
1058
1059  if (src.first()->is_stack()) {
1060    if (dst.first()->is_stack()) {
1061      // stack -> stack. The easiest of the bunch.
1062      __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1063               Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float));
1064    } else {
1065      // stack to reg
1066      Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1067      if (dst.first()->is_Register()) {
1068        __ mem2reg_opt(dst.first()->as_Register(), memaddr, false);
1069      } else {
1070        __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false);
1071      }
1072    }
1073  } else if (src.first()->is_Register()) {
1074    if (dst.first()->is_stack()) {
1075      // gpr -> stack
1076      __ reg2mem_opt(src.first()->as_Register(),
1077                     Address(Z_SP, reg2offset(dst.first()), false ));
1078    } else {
1079      if (dst.first()->is_Register()) {
1080        // gpr -> gpr
1081        __ move_reg_if_needed(dst.first()->as_Register(), T_INT,
1082                              src.first()->as_Register(), T_INT);
1083      } else {
1084        if (VM_Version::has_FPSupportEnhancements()) {
1085          // gpr -> fpr. Exploit z10 capability of direct transfer.
1086          __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1087        } else {
1088          // gpr -> fpr. Use work space on stack to transfer data.
1089          Address   stackaddr(Z_SP, workspace_offset);
1090
1091          __ reg2mem_opt(src.first()->as_Register(), stackaddr, false);
1092          __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false);
1093        }
1094      }
1095    }
1096  } else {
1097    if (dst.first()->is_stack()) {
1098      // fpr -> stack
1099      __ freg2mem_opt(src.first()->as_FloatRegister(),
1100                      Address(Z_SP, reg2offset(dst.first())), false);
1101    } else {
1102      if (dst.first()->is_Register()) {
1103        if (VM_Version::has_FPSupportEnhancements()) {
1104          // fpr -> gpr.
1105          __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1106        } else {
1107          // fpr -> gpr. Use work space on stack to transfer data.
1108          Address   stackaddr(Z_SP, workspace_offset);
1109
1110          __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false);
1111          __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false);
1112        }
1113      } else {
1114        // fpr -> fpr
1115        __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT,
1116                               src.first()->as_FloatRegister(), T_FLOAT);
1117      }
1118    }
1119  }
1120}
1121
1122//----------------------------------------------------------------------
1123// A double arg. May have to do double reg to long reg conversion
1124//----------------------------------------------------------------------
1125static void double_move(MacroAssembler *masm,
1126                        VMRegPair src,
1127                        VMRegPair dst,
1128                        int framesize_in_slots,
1129                        int workspace_slot_offset) {
1130  int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1131  int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size;
1132
1133  // Since src is always a java calling convention we know that the
1134  // src pair is always either all registers or all stack (and aligned?)
1135
1136  if (src.first()->is_stack()) {
1137    if (dst.first()->is_stack()) {
1138      // stack -> stack. The easiest of the bunch.
1139      __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1140               Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double));
1141    } else {
1142      // stack to reg
1143      Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1144
1145      if (dst.first()->is_Register()) {
1146        __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1147      } else {
1148        __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1149      }
1150    }
1151  } else if (src.first()->is_Register()) {
1152    if (dst.first()->is_stack()) {
1153      // gpr -> stack
1154      __ reg2mem_opt(src.first()->as_Register(),
1155                     Address(Z_SP, reg2offset(dst.first())));
1156    } else {
1157      if (dst.first()->is_Register()) {
1158        // gpr -> gpr
1159        __ move_reg_if_needed(dst.first()->as_Register(), T_LONG,
1160                              src.first()->as_Register(), T_LONG);
1161      } else {
1162        if (VM_Version::has_FPSupportEnhancements()) {
1163          // gpr -> fpr. Exploit z10 capability of direct transfer.
1164          __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1165        } else {
1166          // gpr -> fpr. Use work space on stack to transfer data.
1167          Address stackaddr(Z_SP, workspace_offset);
1168          __ reg2mem_opt(src.first()->as_Register(), stackaddr);
1169          __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1170        }
1171      }
1172    }
1173  } else {
1174    if (dst.first()->is_stack()) {
1175      // fpr -> stack
1176      __ freg2mem_opt(src.first()->as_FloatRegister(),
1177                      Address(Z_SP, reg2offset(dst.first())));
1178    } else {
1179      if (dst.first()->is_Register()) {
1180        if (VM_Version::has_FPSupportEnhancements()) {
1181          // fpr -> gpr. Exploit z10 capability of direct transfer.
1182          __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1183        } else {
1184          // fpr -> gpr. Use work space on stack to transfer data.
1185          Address stackaddr(Z_SP, workspace_offset);
1186
1187          __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr);
1188          __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1189        }
1190      } else {
1191        // fpr -> fpr
1192        // In theory these overlap but the ordering is such that this is likely a nop.
1193        __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE,
1194                               src.first()->as_FloatRegister(), T_DOUBLE);
1195      }
1196    }
1197  }
1198}
1199
1200//----------------------------------------------------------------------
1201// A long arg.
1202//----------------------------------------------------------------------
1203static void long_move(MacroAssembler *masm,
1204                      VMRegPair src,
1205                      VMRegPair dst,
1206                      int framesize_in_slots) {
1207  int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1208
1209  if (src.first()->is_stack()) {
1210    if (dst.first()->is_stack()) {
1211      // stack -> stack. The easiest of the bunch.
1212      __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1213               Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long));
1214    } else {
1215      // stack to reg
1216      assert(dst.first()->is_Register(), "long dst value must be in GPR");
1217      __ mem2reg_opt(dst.first()->as_Register(),
1218                      Address(Z_SP, reg2offset(src.first()) + frame_offset));
1219    }
1220  } else {
1221    // reg to reg
1222    assert(src.first()->is_Register(), "long src value must be in GPR");
1223    if (dst.first()->is_stack()) {
1224      // reg -> stack
1225      __ reg2mem_opt(src.first()->as_Register(),
1226                     Address(Z_SP, reg2offset(dst.first())));
1227    } else {
1228      // reg -> reg
1229      assert(dst.first()->is_Register(), "long dst value must be in GPR");
1230      __ move_reg_if_needed(dst.first()->as_Register(),
1231                            T_LONG, src.first()->as_Register(), T_LONG);
1232    }
1233  }
1234}
1235
1236
1237//----------------------------------------------------------------------
1238// A int-like arg.
1239//----------------------------------------------------------------------
1240// On z/Architecture we will store integer like items to the stack as 64 bit
1241// items, according to the z/Architecture ABI, even though Java would only store
1242// 32 bits for a parameter.
1243// We do sign extension for all base types. That is ok since the only
1244// unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int.
1245// Sign extension 32->64 bit will thus not affect the value.
1246//----------------------------------------------------------------------
1247static void move32_64(MacroAssembler *masm,
1248                      VMRegPair src,
1249                      VMRegPair dst,
1250                      int framesize_in_slots) {
1251  int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1252
1253  if (src.first()->is_stack()) {
1254    Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1255    if (dst.first()->is_stack()) {
1256      // stack -> stack. MVC not posible due to sign extension.
1257      Address firstaddr(Z_SP, reg2offset(dst.first()));
1258      __ mem2reg_signed_opt(Z_R0_scratch, memaddr);
1259      __ reg2mem_opt(Z_R0_scratch, firstaddr);
1260    } else {
1261      // stack -> reg, sign extended
1262      __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr);
1263    }
1264  } else {
1265    if (dst.first()->is_stack()) {
1266      // reg -> stack, sign extended
1267      Address firstaddr(Z_SP, reg2offset(dst.first()));
1268      __ z_lgfr(src.first()->as_Register(), src.first()->as_Register());
1269      __ reg2mem_opt(src.first()->as_Register(), firstaddr);
1270    } else {
1271      // reg -> reg, sign extended
1272      __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register());
1273    }
1274  }
1275}
1276
1277static void save_or_restore_arguments(MacroAssembler *masm,
1278                                      const int stack_slots,
1279                                      const int total_in_args,
1280                                      const int arg_save_area,
1281                                      OopMap *map,
1282                                      VMRegPair *in_regs,
1283                                      BasicType *in_sig_bt) {
1284
1285  // If map is non-NULL then the code should store the values,
1286  // otherwise it should load them.
1287  int slot = arg_save_area;
1288  // Handle double words first.
1289  for (int i = 0; i < total_in_args; i++) {
1290    if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
1291      int offset = slot * VMRegImpl::stack_slot_size;
1292      slot += VMRegImpl::slots_per_word;
1293      assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
1294      const FloatRegister   freg = in_regs[i].first()->as_FloatRegister();
1295      Address   stackaddr(Z_SP, offset);
1296      if (map != NULL) {
1297        __ freg2mem_opt(freg, stackaddr);
1298      } else {
1299        __ mem2freg_opt(freg, stackaddr);
1300      }
1301    } else if (in_regs[i].first()->is_Register() &&
1302               (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
1303      int offset = slot * VMRegImpl::stack_slot_size;
1304      const Register   reg = in_regs[i].first()->as_Register();
1305      if (map != NULL) {
1306        __ z_stg(reg, offset, Z_SP);
1307        if (in_sig_bt[i] == T_ARRAY) {
1308          map->set_oop(VMRegImpl::stack2reg(slot));
1309        }
1310      } else {
1311        __ z_lg(reg, offset, Z_SP);
1312        slot += VMRegImpl::slots_per_word;
1313        assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
1314      }
1315    }
1316  }
1317
1318  // Save or restore single word registers.
1319  for (int i = 0; i < total_in_args; i++) {
1320    if (in_regs[i].first()->is_FloatRegister()) {
1321      if (in_sig_bt[i] == T_FLOAT) {
1322        int offset = slot * VMRegImpl::stack_slot_size;
1323        slot++;
1324        assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
1325        const FloatRegister   freg = in_regs[i].first()->as_FloatRegister();
1326        Address   stackaddr(Z_SP, offset);
1327        if (map != NULL) {
1328          __ freg2mem_opt(freg, stackaddr, false);
1329        } else {
1330          __ mem2freg_opt(freg, stackaddr, false);
1331        }
1332      }
1333    } else if (in_regs[i].first()->is_stack() &&
1334               in_sig_bt[i] == T_ARRAY && map != NULL) {
1335      int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1336      map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1337    }
1338  }
1339}
1340
1341// Check GCLocker::needs_gc and enter the runtime if it's true. This
1342// keeps a new JNI critical region from starting until a GC has been
1343// forced. Save down any oops in registers and describe them in an OopMap.
1344static void check_needs_gc_for_critical_native(MacroAssembler   *masm,
1345                                                const int stack_slots,
1346                                                const int total_in_args,
1347                                                const int arg_save_area,
1348                                                OopMapSet *oop_maps,
1349                                                VMRegPair *in_regs,
1350                                                BasicType *in_sig_bt) {
1351  __ block_comment("check GCLocker::needs_gc");
1352  Label cont;
1353
1354  // Check GCLocker::_needs_gc flag.
1355  __ load_const_optimized(Z_R1_scratch, (long) GCLocker::needs_gc_address());
1356  __ z_cli(0, Z_R1_scratch, 0);
1357  __ z_bre(cont);
1358
1359  // Save down any values that are live in registers and call into the
1360  // runtime to halt for a GC.
1361  OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1362
1363  save_or_restore_arguments(masm, stack_slots, total_in_args,
1364                            arg_save_area, map, in_regs, in_sig_bt);
1365  address the_pc = __ pc();
1366  __ set_last_Java_frame(Z_SP, noreg);
1367
1368  __ block_comment("block_for_jni_critical");
1369  __ z_lgr(Z_ARG1, Z_thread);
1370
1371  address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
1372  __ call_c(entry_point);
1373  oop_maps->add_gc_map(__ offset(), map);
1374
1375  __ reset_last_Java_frame();
1376
1377  // Reload all the register arguments.
1378  save_or_restore_arguments(masm, stack_slots, total_in_args,
1379                            arg_save_area, NULL, in_regs, in_sig_bt);
1380
1381  __ bind(cont);
1382
1383  if (StressCriticalJNINatives) {
1384    // Stress register saving
1385    OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1386    save_or_restore_arguments(masm, stack_slots, total_in_args,
1387                              arg_save_area, map, in_regs, in_sig_bt);
1388
1389    // Destroy argument registers.
1390    for (int i = 0; i < total_in_args; i++) {
1391      if (in_regs[i].first()->is_Register()) {
1392        // Don't set CC.
1393        __ clear_reg(in_regs[i].first()->as_Register(), true, false);
1394      } else {
1395        if (in_regs[i].first()->is_FloatRegister()) {
1396          FloatRegister fr = in_regs[i].first()->as_FloatRegister();
1397          __ z_lcdbr(fr, fr);
1398        }
1399      }
1400    }
1401
1402    save_or_restore_arguments(masm, stack_slots, total_in_args,
1403                              arg_save_area, NULL, in_regs, in_sig_bt);
1404  }
1405}
1406
1407static void move_ptr(MacroAssembler *masm,
1408                     VMRegPair src,
1409                     VMRegPair dst,
1410                     int framesize_in_slots) {
1411  int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1412
1413  if (src.first()->is_stack()) {
1414    if (dst.first()->is_stack()) {
1415      // stack to stack
1416      __ mem2reg_opt(Z_R0_scratch, Address(Z_SP, reg2offset(src.first()) + frame_offset));
1417      __ reg2mem_opt(Z_R0_scratch, Address(Z_SP, reg2offset(dst.first())));
1418    } else {
1419      // stack to reg
1420      __ mem2reg_opt(dst.first()->as_Register(),
1421                     Address(Z_SP, reg2offset(src.first()) + frame_offset));
1422    }
1423  } else {
1424    if (dst.first()->is_stack()) {
1425      // reg to stack
1426    __ reg2mem_opt(src.first()->as_Register(), Address(Z_SP, reg2offset(dst.first())));
1427    } else {
1428    __ lgr_if_needed(dst.first()->as_Register(), src.first()->as_Register());
1429    }
1430  }
1431}
1432
1433// Unpack an array argument into a pointer to the body and the length
1434// if the array is non-null, otherwise pass 0 for both.
1435static void unpack_array_argument(MacroAssembler *masm,
1436                                   VMRegPair reg,
1437                                   BasicType in_elem_type,
1438                                   VMRegPair body_arg,
1439                                   VMRegPair length_arg,
1440                                   int framesize_in_slots) {
1441  Register tmp_reg = Z_tmp_2;
1442  Register tmp2_reg = Z_tmp_1;
1443
1444  assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
1445         "possible collision");
1446  assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
1447         "possible collision");
1448
1449  // Pass the length, ptr pair.
1450  NearLabel set_out_args;
1451  VMRegPair tmp, tmp2;
1452
1453  tmp.set_ptr(tmp_reg->as_VMReg());
1454  tmp2.set_ptr(tmp2_reg->as_VMReg());
1455  if (reg.first()->is_stack()) {
1456    // Load the arg up from the stack.
1457    move_ptr(masm, reg, tmp, framesize_in_slots);
1458    reg = tmp;
1459  }
1460
1461  const Register first = reg.first()->as_Register();
1462
1463  // Don't set CC, indicate unused result.
1464  (void) __ clear_reg(tmp2_reg, true, false);
1465  if (tmp_reg != first) {
1466    __ clear_reg(tmp_reg, true, false);  // Don't set CC.
1467  }
1468  __ compare64_and_branch(first, (RegisterOrConstant)0L, Assembler::bcondEqual, set_out_args);
1469  __ z_lgf(tmp2_reg, Address(first, arrayOopDesc::length_offset_in_bytes()));
1470  __ add2reg(tmp_reg, arrayOopDesc::base_offset_in_bytes(in_elem_type), first);
1471
1472  __ bind(set_out_args);
1473  move_ptr(masm, tmp, body_arg, framesize_in_slots);
1474  move32_64(masm, tmp2, length_arg, framesize_in_slots);
1475}
1476
1477//----------------------------------------------------------------------
1478// Wrap a JNI call.
1479//----------------------------------------------------------------------
1480#undef USE_RESIZE_FRAME
1481nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1482                                                const methodHandle& method,
1483                                                int compile_id,
1484                                                BasicType *in_sig_bt,
1485                                                VMRegPair *in_regs,
1486                                                BasicType ret_type) {
1487#ifdef COMPILER2
1488  int total_in_args = method->size_of_parameters();
1489  if (method->is_method_handle_intrinsic()) {
1490    vmIntrinsics::ID iid = method->intrinsic_id();
1491    intptr_t start = (intptr_t) __ pc();
1492    int vep_offset = ((intptr_t) __ pc()) - start;
1493
1494    gen_special_dispatch(masm, total_in_args,
1495                         method->intrinsic_id(), in_sig_bt, in_regs);
1496
1497    int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period.
1498
1499    __ flush();
1500
1501    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // No out slots at all, actually.
1502
1503    return nmethod::new_native_nmethod(method,
1504                                       compile_id,
1505                                       masm->code(),
1506                                       vep_offset,
1507                                       frame_complete,
1508                                       stack_slots / VMRegImpl::slots_per_word,
1509                                       in_ByteSize(-1),
1510                                       in_ByteSize(-1),
1511                                       (OopMapSet *) NULL);
1512  }
1513
1514
1515  ///////////////////////////////////////////////////////////////////////
1516  //
1517  //  Precalculations before generating any code
1518  //
1519  ///////////////////////////////////////////////////////////////////////
1520
1521  bool is_critical_native = true;
1522  address native_func = method->critical_native_function();
1523  if (native_func == NULL) {
1524    native_func = method->native_function();
1525    is_critical_native = false;
1526  }
1527  assert(native_func != NULL, "must have function");
1528
1529  //---------------------------------------------------------------------
1530  // We have received a description of where all the java args are located
1531  // on entry to the wrapper. We need to convert these args to where
1532  // the jni function will expect them. To figure out where they go
1533  // we convert the java signature to a C signature by inserting
1534  // the hidden arguments as arg[0] and possibly arg[1] (static method).
1535  //
1536  // The first hidden argument arg[0] is a pointer to the JNI environment.
1537  // It is generated for every call.
1538  // The second argument arg[1] to the JNI call, which is hidden for static
1539  // methods, is the boxed lock object. For static calls, the lock object
1540  // is the static method itself. The oop is constructed here. for instance
1541  // calls, the lock is performed on the object itself, the pointer of
1542  // which is passed as the first visible argument.
1543  //---------------------------------------------------------------------
1544
1545  // Additionally, on z/Architecture we must convert integers
1546  // to longs in the C signature. We do this in advance in order to have
1547  // no trouble with indexes into the bt-arrays.
1548  // So convert the signature and registers now, and adjust the total number
1549  // of in-arguments accordingly.
1550  bool method_is_static = method->is_static();
1551  int  total_c_args     = total_in_args;
1552
1553  if (!is_critical_native) {
1554    int n_hidden_args = method_is_static ? 2 : 1;
1555    total_c_args += n_hidden_args;
1556  } else {
1557    // No JNIEnv*, no this*, but unpacked arrays (base+length).
1558    for (int i = 0; i < total_in_args; i++) {
1559      if (in_sig_bt[i] == T_ARRAY) {
1560        total_c_args ++;
1561      }
1562    }
1563  }
1564
1565  BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1566  VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1567  BasicType* in_elem_bt = NULL;
1568
1569  // Create the signature for the C call:
1570  //   1) add the JNIEnv*
1571  //   2) add the class if the method is static
1572  //   3) copy the rest of the incoming signature (shifted by the number of
1573  //      hidden arguments)
1574
1575  int argc = 0;
1576  if (!is_critical_native) {
1577    out_sig_bt[argc++] = T_ADDRESS;
1578    if (method->is_static()) {
1579      out_sig_bt[argc++] = T_OBJECT;
1580    }
1581
1582    for (int i = 0; i < total_in_args; i++) {
1583      out_sig_bt[argc++] = in_sig_bt[i];
1584    }
1585  } else {
1586    Thread* THREAD = Thread::current();
1587    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
1588    SignatureStream ss(method->signature());
1589    int o = 0;
1590    for (int i = 0; i < total_in_args; i++, o++) {
1591      if (in_sig_bt[i] == T_ARRAY) {
1592        // Arrays are passed as tuples (int, elem*).
1593        Symbol* atype = ss.as_symbol(CHECK_NULL);
1594        const char* at = atype->as_C_string();
1595        if (strlen(at) == 2) {
1596          assert(at[0] == '[', "must be");
1597          switch (at[1]) {
1598            case 'B': in_elem_bt[o]  = T_BYTE; break;
1599            case 'C': in_elem_bt[o]  = T_CHAR; break;
1600            case 'D': in_elem_bt[o]  = T_DOUBLE; break;
1601            case 'F': in_elem_bt[o]  = T_FLOAT; break;
1602            case 'I': in_elem_bt[o]  = T_INT; break;
1603            case 'J': in_elem_bt[o]  = T_LONG; break;
1604            case 'S': in_elem_bt[o]  = T_SHORT; break;
1605            case 'Z': in_elem_bt[o]  = T_BOOLEAN; break;
1606            default: ShouldNotReachHere();
1607          }
1608        }
1609      } else {
1610        in_elem_bt[o] = T_VOID;
1611      }
1612      if (in_sig_bt[i] != T_VOID) {
1613        assert(in_sig_bt[i] == ss.type(), "must match");
1614        ss.next();
1615      }
1616    }
1617    assert(total_in_args == o, "must match");
1618
1619    for (int i = 0; i < total_in_args; i++) {
1620      if (in_sig_bt[i] == T_ARRAY) {
1621        // Arrays are passed as tuples (int, elem*).
1622        out_sig_bt[argc++] = T_INT;
1623        out_sig_bt[argc++] = T_ADDRESS;
1624      } else {
1625        out_sig_bt[argc++] = in_sig_bt[i];
1626      }
1627    }
1628  }
1629
1630  ///////////////////////////////////////////////////////////////////////
1631  // Now figure out where the args must be stored and how much stack space
1632  // they require (neglecting out_preserve_stack_slots but providing space
1633  // for storing the first five register arguments).
1634  // It's weird, see int_stk_helper.
1635  ///////////////////////////////////////////////////////////////////////
1636
1637  //---------------------------------------------------------------------
1638  // Compute framesize for the wrapper.
1639  //
1640  // - We need to handlize all oops passed in registers.
1641  // - We must create space for them here that is disjoint from the save area.
1642  // - We always just allocate 5 words for storing down these object.
1643  //   This allows us to simply record the base and use the Ireg number to
1644  //   decide which slot to use.
1645  // - Note that the reg number used to index the stack slot is the inbound
1646  //   number, not the outbound number.
1647  // - We must shuffle args to match the native convention,
1648  //   and to include var-args space.
1649  //---------------------------------------------------------------------
1650
1651  //---------------------------------------------------------------------
1652  // Calculate the total number of stack slots we will need:
1653  // - 1) abi requirements
1654  // - 2) outgoing args
1655  // - 3) space for inbound oop handle area
1656  // - 4) space for handlizing a klass if static method
1657  // - 5) space for a lock if synchronized method
1658  // - 6) workspace (save rtn value, int<->float reg moves, ...)
1659  // - 7) filler slots for alignment
1660  //---------------------------------------------------------------------
1661  // Here is how the space we have allocated will look like.
1662  // Since we use resize_frame, we do not create a new stack frame,
1663  // but just extend the one we got with our own data area.
1664  //
1665  // If an offset or pointer name points to a separator line, it is
1666  // assumed that addressing with offset 0 selects storage starting
1667  // at the first byte above the separator line.
1668  //
1669  //
1670  //     ...                   ...
1671  //      | caller's frame      |
1672  // FP-> |---------------------|
1673  //      | filler slots, if any|
1674  //     7| #slots == mult of 2 |
1675  //      |---------------------|
1676  //      | work space          |
1677  //     6| 2 slots = 8 bytes   |
1678  //      |---------------------|
1679  //     5| lock box (if sync)  |
1680  //      |---------------------| <- lock_slot_offset
1681  //     4| klass (if static)   |
1682  //      |---------------------| <- klass_slot_offset
1683  //     3| oopHandle area      |
1684  //      | (save area for      |
1685  //      |  critical natives)  |
1686  //      |                     |
1687  //      |                     |
1688  //      |---------------------| <- oop_handle_offset
1689  //     2| outbound memory     |
1690  //     ...                   ...
1691  //      | based arguments     |
1692  //      |---------------------|
1693  //      | vararg              |
1694  //     ...                   ...
1695  //      | area                |
1696  //      |---------------------| <- out_arg_slot_offset
1697  //     1| out_preserved_slots |
1698  //     ...                   ...
1699  //      | (z_abi spec)        |
1700  // SP-> |---------------------| <- FP_slot_offset (back chain)
1701  //     ...                   ...
1702  //
1703  //---------------------------------------------------------------------
1704
1705  // *_slot_offset indicates offset from SP in #stack slots
1706  // *_offset      indicates offset from SP in #bytes
1707
1708  int stack_slots = c_calling_convention(out_sig_bt, out_regs, /*regs2=*/NULL, total_c_args) + // 1+2
1709                    SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention
1710
1711  // Now the space for the inbound oop handle area.
1712  int total_save_slots = RegisterImpl::number_of_arg_registers * VMRegImpl::slots_per_word;
1713  if (is_critical_native) {
1714    // Critical natives may have to call out so they need a save area
1715    // for register arguments.
1716    int double_slots = 0;
1717    int single_slots = 0;
1718    for (int i = 0; i < total_in_args; i++) {
1719      if (in_regs[i].first()->is_Register()) {
1720        const Register reg = in_regs[i].first()->as_Register();
1721        switch (in_sig_bt[i]) {
1722          case T_BOOLEAN:
1723          case T_BYTE:
1724          case T_SHORT:
1725          case T_CHAR:
1726          case T_INT:
1727          // Fall through.
1728          case T_ARRAY:
1729          case T_LONG: double_slots++; break;
1730          default:  ShouldNotReachHere();
1731        }
1732      } else {
1733        if (in_regs[i].first()->is_FloatRegister()) {
1734          switch (in_sig_bt[i]) {
1735            case T_FLOAT:  single_slots++; break;
1736            case T_DOUBLE: double_slots++; break;
1737            default:  ShouldNotReachHere();
1738          }
1739        }
1740      }
1741    }  // for
1742    total_save_slots = double_slots * 2 + align_up(single_slots, 2); // Round to even.
1743  }
1744
1745  int oop_handle_slot_offset = stack_slots;
1746  stack_slots += total_save_slots;                                        // 3)
1747
1748  int klass_slot_offset = 0;
1749  int klass_offset      = -1;
1750  if (method_is_static && !is_critical_native) {                          // 4)
1751    klass_slot_offset  = stack_slots;
1752    klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
1753    stack_slots       += VMRegImpl::slots_per_word;
1754  }
1755
1756  int lock_slot_offset = 0;
1757  int lock_offset      = -1;
1758  if (method->is_synchronized()) {                                        // 5)
1759    lock_slot_offset   = stack_slots;
1760    lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
1761    stack_slots       += VMRegImpl::slots_per_word;
1762  }
1763
1764  int workspace_slot_offset= stack_slots;                                 // 6)
1765  stack_slots         += 2;
1766
1767  // Now compute actual number of stack words we need.
1768  // Round to align stack properly.
1769  stack_slots = align_up(stack_slots,                                     // 7)
1770                         frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
1771  int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
1772
1773
1774  ///////////////////////////////////////////////////////////////////////
1775  // Now we can start generating code
1776  ///////////////////////////////////////////////////////////////////////
1777
1778  unsigned int wrapper_CodeStart  = __ offset();
1779  unsigned int wrapper_UEPStart;
1780  unsigned int wrapper_VEPStart;
1781  unsigned int wrapper_FrameDone;
1782  unsigned int wrapper_CRegsSet;
1783  Label     handle_pending_exception;
1784  Label     ic_miss;
1785
1786  //---------------------------------------------------------------------
1787  // Unverified entry point (UEP)
1788  //---------------------------------------------------------------------
1789  wrapper_UEPStart = __ offset();
1790
1791  // check ic: object class <-> cached class
1792  if (!method_is_static) __ nmethod_UEP(ic_miss);
1793  // Fill with nops (alignment of verified entry point).
1794  __ align(CodeEntryAlignment);
1795
1796  //---------------------------------------------------------------------
1797  // Verified entry point (VEP)
1798  //---------------------------------------------------------------------
1799  wrapper_VEPStart = __ offset();
1800
1801  __ save_return_pc();
1802  __ generate_stack_overflow_check(frame_size_in_bytes);  // Check before creating frame.
1803#ifndef USE_RESIZE_FRAME
1804  __ push_frame(frame_size_in_bytes);                     // Create a new frame for the wrapper.
1805#else
1806  __ resize_frame(-frame_size_in_bytes, Z_R0_scratch);    // No new frame for the wrapper.
1807                                                          // Just resize the existing one.
1808#endif
1809
1810  wrapper_FrameDone = __ offset();
1811
1812  __ verify_thread();
1813
1814  // Native nmethod wrappers never take possession of the oop arguments.
1815  // So the caller will gc the arguments.
1816  // The only thing we need an oopMap for is if the call is static.
1817  //
1818  // An OopMap for lock (and class if static), and one for the VM call itself
1819  OopMapSet  *oop_maps        = new OopMapSet();
1820  OopMap     *map             = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1821
1822  if (is_critical_native) {
1823    check_needs_gc_for_critical_native(masm, stack_slots, total_in_args,
1824                                       oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt);
1825  }
1826
1827
1828  //////////////////////////////////////////////////////////////////////
1829  //
1830  // The Grand Shuffle
1831  //
1832  //////////////////////////////////////////////////////////////////////
1833  //
1834  // We immediately shuffle the arguments so that for any vm call we have
1835  // to make from here on out (sync slow path, jvmti, etc.) we will have
1836  // captured the oops from our caller and have a valid oopMap for them.
1837  //
1838  //--------------------------------------------------------------------
1839  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1840  // (derived from JavaThread* which is in Z_thread) and, if static,
1841  // the class mirror instead of a receiver. This pretty much guarantees that
1842  // register layout will not match. We ignore these extra arguments during
1843  // the shuffle. The shuffle is described by the two calling convention
1844  // vectors we have in our possession. We simply walk the java vector to
1845  // get the source locations and the c vector to get the destinations.
1846  //
1847  // This is a trick. We double the stack slots so we can claim
1848  // the oops in the caller's frame. Since we are sure to have
1849  // more args than the caller doubling is enough to make
1850  // sure we can capture all the incoming oop args from the caller.
1851  //--------------------------------------------------------------------
1852
1853  // Record sp-based slot for receiver on stack for non-static methods.
1854  int receiver_offset = -1;
1855
1856  //--------------------------------------------------------------------
1857  // We move the arguments backwards because the floating point registers
1858  // destination will always be to a register with a greater or equal
1859  // register number or the stack.
1860  //   jix is the index of the incoming Java arguments.
1861  //   cix is the index of the outgoing C arguments.
1862  //--------------------------------------------------------------------
1863
1864#ifdef ASSERT
1865  bool reg_destroyed[RegisterImpl::number_of_registers];
1866  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
1867  for (int r = 0; r < RegisterImpl::number_of_registers; r++) {
1868    reg_destroyed[r] = false;
1869  }
1870  for (int f = 0; f < FloatRegisterImpl::number_of_registers; f++) {
1871    freg_destroyed[f] = false;
1872  }
1873#endif // ASSERT
1874
1875  for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) {
1876#ifdef ASSERT
1877    if (in_regs[jix].first()->is_Register()) {
1878      assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!");
1879    } else {
1880      if (in_regs[jix].first()->is_FloatRegister()) {
1881        assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!");
1882      }
1883    }
1884    if (out_regs[cix].first()->is_Register()) {
1885      reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true;
1886    } else {
1887      if (out_regs[cix].first()->is_FloatRegister()) {
1888        freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true;
1889      }
1890    }
1891#endif // ASSERT
1892
1893    switch (in_sig_bt[jix]) {
1894      // Due to casting, small integers should only occur in pairs with type T_LONG.
1895      case T_BOOLEAN:
1896      case T_CHAR:
1897      case T_BYTE:
1898      case T_SHORT:
1899      case T_INT:
1900        // Move int and do sign extension.
1901        move32_64(masm, in_regs[jix], out_regs[cix], stack_slots);
1902        break;
1903
1904      case T_LONG :
1905        long_move(masm, in_regs[jix], out_regs[cix], stack_slots);
1906        break;
1907
1908      case T_ARRAY:
1909        if (is_critical_native) {
1910          int body_arg = cix;
1911          cix -= 2; // Point to length arg.
1912          unpack_array_argument(masm, in_regs[jix], in_elem_bt[jix], out_regs[body_arg], out_regs[cix], stack_slots);
1913          break;
1914        }
1915        // else fallthrough
1916      case T_OBJECT:
1917        assert(!is_critical_native, "no oop arguments");
1918        object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix],
1919                    ((jix == 0) && (!method_is_static)),
1920                    &receiver_offset);
1921        break;
1922      case T_VOID:
1923        break;
1924
1925      case T_FLOAT:
1926        float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1927        break;
1928
1929      case T_DOUBLE:
1930        assert(jix+1 <  total_in_args && in_sig_bt[jix+1]  == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list");
1931        double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1932        break;
1933
1934      case T_ADDRESS:
1935        assert(false, "found T_ADDRESS in java args");
1936        break;
1937
1938      default:
1939        ShouldNotReachHere();
1940    }
1941  }
1942
1943  //--------------------------------------------------------------------
1944  // Pre-load a static method's oop into ARG2.
1945  // Used both by locking code and the normal JNI call code.
1946  //--------------------------------------------------------------------
1947  if (method_is_static && !is_critical_native) {
1948    __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2);
1949
1950    // Now handlize the static class mirror in ARG2. It's known not-null.
1951    __ z_stg(Z_ARG2, klass_offset, Z_SP);
1952    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1953    __ add2reg(Z_ARG2, klass_offset, Z_SP);
1954  }
1955
1956  // Get JNIEnv* which is first argument to native.
1957  if (!is_critical_native) {
1958    __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
1959  }
1960
1961  //////////////////////////////////////////////////////////////////////
1962  // We have all of the arguments setup at this point.
1963  // We MUST NOT touch any outgoing regs from this point on.
1964  // So if we must call out we must push a new frame.
1965  //////////////////////////////////////////////////////////////////////
1966
1967
1968  // Calc the current pc into Z_R10 and into wrapper_CRegsSet.
1969  // Both values represent the same position.
1970  __ get_PC(Z_R10);                // PC into register
1971  wrapper_CRegsSet = __ offset();  // and into into variable.
1972
1973  // Z_R10 now has the pc loaded that we will use when we finally call to native.
1974
1975  // We use the same pc/oopMap repeatedly when we call out.
1976  oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map);
1977
1978  // Lock a synchronized method.
1979
1980  if (method->is_synchronized()) {
1981    assert(!is_critical_native, "unhandled");
1982
1983    // ATTENTION: args and Z_R10 must be preserved.
1984    Register r_oop  = Z_R11;
1985    Register r_box  = Z_R12;
1986    Register r_tmp1 = Z_R13;
1987    Register r_tmp2 = Z_R7;
1988    Label done;
1989
1990    // Load the oop for the object or class. R_carg2_classorobject contains
1991    // either the handlized oop from the incoming arguments or the handlized
1992    // class mirror (if the method is static).
1993    __ z_lg(r_oop, 0, Z_ARG2);
1994
1995    lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
1996    // Get the lock box slot's address.
1997    __ add2reg(r_box, lock_offset, Z_SP);
1998
1999#ifdef ASSERT
2000    if (UseBiasedLocking)
2001      // Making the box point to itself will make it clear it went unused
2002      // but also be obviously invalid.
2003      __ z_stg(r_box, 0, r_box);
2004#endif // ASSERT
2005
2006    // Try fastpath for locking.
2007    // Fast_lock kills r_temp_1, r_temp_2. (Don't use R1 as temp, won't work!)
2008    __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
2009    __ z_bre(done);
2010
2011    //-------------------------------------------------------------------------
2012    // None of the above fast optimizations worked so we have to get into the
2013    // slow case of monitor enter. Inline a special case of call_VM that
2014    // disallows any pending_exception.
2015    //-------------------------------------------------------------------------
2016
2017    Register oldSP = Z_R11;
2018
2019    __ z_lgr(oldSP, Z_SP);
2020
2021    RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2022
2023    // Prepare arguments for call.
2024    __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object.
2025    __ add2reg(Z_ARG2, lock_offset, oldSP);
2026    __ z_lgr(Z_ARG3, Z_thread);
2027
2028    __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */);
2029
2030    // Do the call.
2031    __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
2032    __ call(Z_R1_scratch);
2033
2034    __ reset_last_Java_frame();
2035
2036    RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2037#ifdef ASSERT
2038    { Label L;
2039      __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2040      __ z_bre(L);
2041      __ stop("no pending exception allowed on exit from IR::monitorenter");
2042      __ bind(L);
2043    }
2044#endif
2045    __ bind(done);
2046  } // lock for synchronized methods
2047
2048
2049  //////////////////////////////////////////////////////////////////////
2050  // Finally just about ready to make the JNI call.
2051  //////////////////////////////////////////////////////////////////////
2052
2053  // Use that pc we placed in Z_R10 a while back as the current frame anchor.
2054  __ set_last_Java_frame(Z_SP, Z_R10);
2055
2056  // Transition from _thread_in_Java to _thread_in_native.
2057  __ set_thread_state(_thread_in_native);
2058
2059
2060  //////////////////////////////////////////////////////////////////////
2061  // This is the JNI call.
2062  //////////////////////////////////////////////////////////////////////
2063
2064  __ call_c(native_func);
2065
2066
2067  //////////////////////////////////////////////////////////////////////
2068  // We have survived the call once we reach here.
2069  //////////////////////////////////////////////////////////////////////
2070
2071
2072  //--------------------------------------------------------------------
2073  // Unpack native results.
2074  //--------------------------------------------------------------------
2075  // For int-types, we do any needed sign-extension required.
2076  // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2
2077  // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for
2078  // blocking or unlocking.
2079  // An OOP result (handle) is done specially in the slow-path code.
2080  //--------------------------------------------------------------------
2081  switch (ret_type) {  //GLGLGL
2082    case T_VOID:    break;         // Nothing to do!
2083    case T_FLOAT:   break;         // Got it where we want it (unless slow-path)
2084    case T_DOUBLE:  break;         // Got it where we want it (unless slow-path)
2085    case T_LONG:    break;         // Got it where we want it (unless slow-path)
2086    case T_OBJECT:  break;         // Really a handle.
2087                                   // Cannot de-handlize until after reclaiming jvm_lock.
2088    case T_ARRAY:   break;
2089
2090    case T_BOOLEAN:                // 0 -> false(0); !0 -> true(1)
2091      __ z_lngfr(Z_RET, Z_RET);    // Force sign bit on except for zero.
2092      __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos.
2093      break;
2094    case T_BYTE:    __ z_lgbr(Z_RET, Z_RET);  break; // sign extension
2095    case T_CHAR:    __ z_llghr(Z_RET, Z_RET); break; // unsigned result
2096    case T_SHORT:   __ z_lghr(Z_RET, Z_RET);  break; // sign extension
2097    case T_INT:     __ z_lgfr(Z_RET, Z_RET);  break; // sign-extend for beauty.
2098
2099    default:
2100      ShouldNotReachHere();
2101      break;
2102  }
2103
2104
2105  // Switch thread to "native transition" state before reading the synchronization state.
2106  // This additional state is necessary because reading and testing the synchronization
2107  // state is not atomic w.r.t. GC, as this scenario demonstrates:
2108  //   - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2109  //   - VM thread changes sync state to synchronizing and suspends threads for GC.
2110  //   - Thread A is resumed to finish this native method, but doesn't block here since it
2111  //     didn't see any synchronization in progress, and escapes.
2112
2113  // Transition from _thread_in_native to _thread_in_native_trans.
2114  __ set_thread_state(_thread_in_native_trans);
2115
2116  // Safepoint synchronization
2117  //--------------------------------------------------------------------
2118  // Must we block?
2119  //--------------------------------------------------------------------
2120  // Block, if necessary, before resuming in _thread_in_Java state.
2121  // In order for GC to work, don't clear the last_Java_sp until after blocking.
2122  //--------------------------------------------------------------------
2123  Label after_transition;
2124  {
2125    Label no_block, sync;
2126
2127    save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg.
2128
2129    if (os::is_MP()) {
2130      if (UseMembar) {
2131        // Force this write out before the read below.
2132        __ z_fence();
2133      } else {
2134        // Write serialization page so VM thread can do a pseudo remote membar.
2135        // We use the current thread pointer to calculate a thread specific
2136        // offset to write to within the page. This minimizes bus traffic
2137        // due to cache line collision.
2138        __ serialize_memory(Z_thread, Z_R1, Z_R2);
2139      }
2140    }
2141    __ generate_safepoint_check(sync, Z_R1, true);
2142
2143    __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
2144    __ z_bre(no_block);
2145
2146    // Block. Save any potential method result value before the operation and
2147    // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
2148    // lets us share the oopMap we used when we went native rather than create
2149    // a distinct one for this pc.
2150    //
2151    __ bind(sync);
2152    __ z_acquire();
2153
2154    address entry_point = is_critical_native ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
2155                                             : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
2156
2157    __ call_VM_leaf(entry_point, Z_thread);
2158
2159    if (is_critical_native) {
2160      restore_native_result(masm, ret_type, workspace_slot_offset);
2161      __ z_bru(after_transition); // No thread state transition here.
2162    }
2163    __ bind(no_block);
2164    restore_native_result(masm, ret_type, workspace_slot_offset);
2165  }
2166
2167  //--------------------------------------------------------------------
2168  // Thread state is thread_in_native_trans. Any safepoint blocking has
2169  // already happened so we can now change state to _thread_in_Java.
2170  //--------------------------------------------------------------------
2171  // Transition from _thread_in_native_trans to _thread_in_Java.
2172  __ set_thread_state(_thread_in_Java);
2173  __ bind(after_transition);
2174
2175
2176  //--------------------------------------------------------------------
2177  // Reguard any pages if necessary.
2178  // Protect native result from being destroyed.
2179  //--------------------------------------------------------------------
2180
2181  Label no_reguard;
2182
2183  __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(JavaThread::StackGuardState) - 1)),
2184           JavaThread::stack_guard_yellow_reserved_disabled);
2185
2186  __ z_bre(no_reguard);
2187
2188  save_native_result(masm, ret_type, workspace_slot_offset);
2189  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method);
2190  restore_native_result(masm, ret_type, workspace_slot_offset);
2191
2192  __ bind(no_reguard);
2193
2194
2195  // Synchronized methods (slow path only)
2196  // No pending exceptions for now.
2197  //--------------------------------------------------------------------
2198  // Handle possibly pending exception (will unlock if necessary).
2199  // Native result is, if any is live, in Z_FRES or Z_RES.
2200  //--------------------------------------------------------------------
2201  // Unlock
2202  //--------------------------------------------------------------------
2203  if (method->is_synchronized()) {
2204    const Register r_oop        = Z_R11;
2205    const Register r_box        = Z_R12;
2206    const Register r_tmp1       = Z_R13;
2207    const Register r_tmp2       = Z_R7;
2208    Label done;
2209
2210    // Get unboxed oop of class mirror or object ...
2211    int   offset = method_is_static ? klass_offset : receiver_offset;
2212
2213    assert(offset != -1, "");
2214    __ z_lg(r_oop, offset, Z_SP);
2215
2216    // ... and address of lock object box.
2217    __ add2reg(r_box, lock_offset, Z_SP);
2218
2219    // Try fastpath for unlocking.
2220    __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); // Don't use R1 as temp.
2221    __ z_bre(done);
2222
2223    // Slow path for unlocking.
2224    // Save and restore any potential method result value around the unlocking operation.
2225    const Register R_exc = Z_R11;
2226
2227    save_native_result(masm, ret_type, workspace_slot_offset);
2228
2229    // Must save pending exception around the slow-path VM call. Since it's a
2230    // leaf call, the pending exception (if any) can be kept in a register.
2231    __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2232    assert(R_exc->is_nonvolatile(), "exception register must be non-volatile");
2233
2234    // Must clear pending-exception before re-entering the VM. Since this is
2235    // a leaf call, pending-exception-oop can be safely kept in a register.
2236    __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t));
2237
2238    // Inline a special case of call_VM that disallows any pending_exception.
2239
2240    // Get locked oop from the handle we passed to jni.
2241    __ z_lg(Z_ARG1, offset, Z_SP);
2242    __ add2reg(Z_ARG2, lock_offset, Z_SP);
2243    __ z_lgr(Z_ARG3, Z_thread);
2244
2245    __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
2246
2247    __ call(Z_R1_scratch);
2248
2249#ifdef ASSERT
2250    {
2251      Label L;
2252      __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2253      __ z_bre(L);
2254      __ stop("no pending exception allowed on exit from IR::monitorexit");
2255      __ bind(L);
2256    }
2257#endif
2258
2259    // Check_forward_pending_exception jump to forward_exception if any pending
2260    // exception is set. The forward_exception routine expects to see the
2261    // exception in pending_exception and not in a register. Kind of clumsy,
2262    // since all folks who branch to forward_exception must have tested
2263    // pending_exception first and hence have it in a register already.
2264    __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2265    restore_native_result(masm, ret_type, workspace_slot_offset);
2266    __ z_bru(done);
2267    __ z_illtrap(0x66);
2268
2269    __ bind(done);
2270  }
2271
2272
2273  //--------------------------------------------------------------------
2274  // Clear "last Java frame" SP and PC.
2275  //--------------------------------------------------------------------
2276  __ verify_thread(); // Z_thread must be correct.
2277
2278  __ reset_last_Java_frame();
2279
2280  // Unpack oop result, e.g. JNIHandles::resolve result.
2281  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
2282    __ resolve_jobject(Z_RET, /* tmp1 */ Z_R13, /* tmp2 */ Z_R7);
2283  }
2284
2285  if (CheckJNICalls) {
2286    // clear_pending_jni_exception_check
2287    __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
2288  }
2289
2290  // Reset handle block.
2291  if (!is_critical_native) {
2292    __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset()));
2293    __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset_in_bytes()), 4);
2294
2295    // Check for pending exceptions.
2296    __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2297    __ z_brne(handle_pending_exception);
2298  }
2299
2300
2301  //////////////////////////////////////////////////////////////////////
2302  // Return
2303  //////////////////////////////////////////////////////////////////////
2304
2305
2306#ifndef USE_RESIZE_FRAME
2307  __ pop_frame();                     // Pop wrapper frame.
2308#else
2309  __ resize_frame(frame_size_in_bytes, Z_R0_scratch);  // Revert stack extension.
2310#endif
2311  __ restore_return_pc();             // This is the way back to the caller.
2312  __ z_br(Z_R14);
2313
2314
2315  //////////////////////////////////////////////////////////////////////
2316  // Out-of-line calls to the runtime.
2317  //////////////////////////////////////////////////////////////////////
2318
2319
2320  if (!is_critical_native) {
2321
2322    //---------------------------------------------------------------------
2323    // Handler for pending exceptions (out-of-line).
2324    //---------------------------------------------------------------------
2325    // Since this is a native call, we know the proper exception handler
2326    // is the empty function. We just pop this frame and then jump to
2327    // forward_exception_entry. Z_R14 will contain the native caller's
2328    // return PC.
2329    __ bind(handle_pending_exception);
2330    __ pop_frame();
2331    __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2332    __ restore_return_pc();
2333    __ z_br(Z_R1_scratch);
2334
2335    //---------------------------------------------------------------------
2336    // Handler for a cache miss (out-of-line)
2337    //---------------------------------------------------------------------
2338    __ call_ic_miss_handler(ic_miss, 0x77, 0, Z_R1_scratch);
2339  }
2340  __ flush();
2341
2342
2343  //////////////////////////////////////////////////////////////////////
2344  // end of code generation
2345  //////////////////////////////////////////////////////////////////////
2346
2347
2348  nmethod *nm = nmethod::new_native_nmethod(method,
2349                                            compile_id,
2350                                            masm->code(),
2351                                            (int)(wrapper_VEPStart-wrapper_CodeStart),
2352                                            (int)(wrapper_FrameDone-wrapper_CodeStart),
2353                                            stack_slots / VMRegImpl::slots_per_word,
2354                                            (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2355                                            in_ByteSize(lock_offset),
2356                                            oop_maps);
2357
2358  if (is_critical_native) {
2359    nm->set_lazy_critical_native(true);
2360  }
2361
2362  return nm;
2363#else
2364  ShouldNotReachHere();
2365  return NULL;
2366#endif // COMPILER2
2367}
2368
2369static address gen_c2i_adapter(MacroAssembler  *masm,
2370                               int total_args_passed,
2371                               int comp_args_on_stack,
2372                               const BasicType *sig_bt,
2373                               const VMRegPair *regs,
2374                               Label &skip_fixup) {
2375  // Before we get into the guts of the C2I adapter, see if we should be here
2376  // at all. We've come from compiled code and are attempting to jump to the
2377  // interpreter, which means the caller made a static call to get here
2378  // (vcalls always get a compiled target if there is one). Check for a
2379  // compiled target. If there is one, we need to patch the caller's call.
2380
2381  // These two defs MUST MATCH code in gen_i2c2i_adapter!
2382  const Register ientry = Z_R11;
2383  const Register code   = Z_R11;
2384
2385  address c2i_entrypoint;
2386  Label   patch_callsite;
2387
2388  // Regular (verified) c2i entry point.
2389  c2i_entrypoint = __ pc();
2390
2391  // Call patching needed?
2392  __ load_and_test_long(Z_R0_scratch, method_(code));
2393  __ z_lg(ientry, method_(interpreter_entry));  // Preload interpreter entry (also if patching).
2394  __ z_brne(patch_callsite);                    // Patch required if code != NULL (compiled target exists).
2395
2396  __ bind(skip_fixup);  // Return point from patch_callsite.
2397
2398  // Since all args are passed on the stack, total_args_passed*wordSize is the
2399  // space we need. We need ABI scratch area but we use the caller's since
2400  // it has already been allocated.
2401
2402  const int abi_scratch = frame::z_top_ijava_frame_abi_size;
2403  int       extraspace  = align_up(total_args_passed, 2)*wordSize + abi_scratch;
2404  Register  sender_SP   = Z_R10;
2405  Register  value       = Z_R12;
2406
2407  // Remember the senderSP so we can pop the interpreter arguments off of the stack.
2408  // In addition, frame manager expects initial_caller_sp in Z_R10.
2409  __ z_lgr(sender_SP, Z_SP);
2410
2411  // This should always fit in 14 bit immediate.
2412  __ resize_frame(-extraspace, Z_R0_scratch);
2413
2414  // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial
2415  // args. This essentially moves the callers ABI scratch area from the top to the
2416  // bottom of the arg area.
2417
2418  int st_off =  extraspace - wordSize;
2419
2420  // Now write the args into the outgoing interpreter space.
2421  for (int i = 0; i < total_args_passed; i++) {
2422    VMReg r_1 = regs[i].first();
2423    VMReg r_2 = regs[i].second();
2424    if (!r_1->is_valid()) {
2425      assert(!r_2->is_valid(), "");
2426      continue;
2427    }
2428    if (r_1->is_stack()) {
2429      // The calling convention produces OptoRegs that ignore the preserve area (abi scratch).
2430      // We must account for it here.
2431      int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2432
2433      if (!r_2->is_valid()) {
2434        __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2435      } else {
2436        // longs are given 2 64-bit slots in the interpreter,
2437        // but the data is passed in only 1 slot.
2438        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2439#ifdef ASSERT
2440          __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2441#endif
2442          st_off -= wordSize;
2443        }
2444        __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2445      }
2446    } else {
2447      if (r_1->is_Register()) {
2448        if (!r_2->is_valid()) {
2449          __ z_st(r_1->as_Register(), st_off, Z_SP);
2450        } else {
2451          // longs are given 2 64-bit slots in the interpreter, but the
2452          // data is passed in only 1 slot.
2453          if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2454#ifdef ASSERT
2455            __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2456#endif
2457            st_off -= wordSize;
2458          }
2459          __ z_stg(r_1->as_Register(), st_off, Z_SP);
2460        }
2461      } else {
2462        assert(r_1->is_FloatRegister(), "");
2463        if (!r_2->is_valid()) {
2464          __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP);
2465        } else {
2466          // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
2467          // data is passed in only 1 slot.
2468          // One of these should get known junk...
2469#ifdef ASSERT
2470          __ z_lzdr(Z_F1);
2471          __ z_std(Z_F1, st_off, Z_SP);
2472#endif
2473          st_off-=wordSize;
2474          __ z_std(r_1->as_FloatRegister(), st_off, Z_SP);
2475        }
2476      }
2477    }
2478    st_off -= wordSize;
2479  }
2480
2481
2482  // Jump to the interpreter just as if interpreter was doing it.
2483  __ add2reg(Z_esp, st_off, Z_SP);
2484
2485  // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10.
2486  __ z_br(ientry);
2487
2488
2489  // Prevent illegal entry to out-of-line code.
2490  __ z_illtrap(0x22);
2491
2492  // Generate out-of-line runtime call to patch caller,
2493  // then continue as interpreted.
2494
2495  // IF you lose the race you go interpreted.
2496  // We don't see any possible endless c2i -> i2c -> c2i ...
2497  // transitions no matter how rare.
2498  __ bind(patch_callsite);
2499
2500  RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2501  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14);
2502  RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2503  __ z_bru(skip_fixup);
2504
2505  // end of out-of-line code
2506
2507  return c2i_entrypoint;
2508}
2509
2510// On entry, the following registers are set
2511//
2512//    Z_thread  r8  - JavaThread*
2513//    Z_method  r9  - callee's method (method to be invoked)
2514//    Z_esp     r7  - operand (or expression) stack pointer of caller. one slot above last arg.
2515//    Z_SP      r15 - SP prepared by call stub such that caller's outgoing args are near top
2516//
2517void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
2518                                    int total_args_passed,
2519                                    int comp_args_on_stack,
2520                                    const BasicType *sig_bt,
2521                                    const VMRegPair *regs) {
2522  const Register value = Z_R12;
2523  const Register ld_ptr= Z_esp;
2524
2525  int ld_offset = total_args_passed * wordSize;
2526
2527  // Cut-out for having no stack args.
2528  if (comp_args_on_stack) {
2529    // Sig words on the stack are greater than VMRegImpl::stack0. Those in
2530    // registers are below. By subtracting stack0, we either get a negative
2531    // number (all values in registers) or the maximum stack slot accessed.
2532    // Convert VMRegImpl (4 byte) stack slots to words.
2533    int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
2534    // Round up to miminum stack alignment, in wordSize
2535    comp_words_on_stack = align_up(comp_words_on_stack, 2);
2536
2537    __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch);
2538  }
2539
2540  // Now generate the shuffle code. Pick up all register args and move the
2541  // rest through register value=Z_R12.
2542  for (int i = 0; i < total_args_passed; i++) {
2543    if (sig_bt[i] == T_VOID) {
2544      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
2545      continue;
2546    }
2547
2548    // Pick up 0, 1 or 2 words from ld_ptr.
2549    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
2550           "scrambled load targets?");
2551    VMReg r_1 = regs[i].first();
2552    VMReg r_2 = regs[i].second();
2553    if (!r_1->is_valid()) {
2554      assert(!r_2->is_valid(), "");
2555      continue;
2556    }
2557    if (r_1->is_FloatRegister()) {
2558      if (!r_2->is_valid()) {
2559        __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr);
2560        ld_offset-=wordSize;
2561      } else {
2562        // Skip the unused interpreter slot.
2563        __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr);
2564        ld_offset -= 2 * wordSize;
2565      }
2566    } else {
2567      if (r_1->is_stack()) {
2568        // Must do a memory to memory move.
2569        int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2570
2571        if (!r_2->is_valid()) {
2572          __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2573        } else {
2574          // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2575          // data is passed in only 1 slot.
2576          if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2577            ld_offset -= wordSize;
2578          }
2579          __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2580        }
2581      } else {
2582        if (!r_2->is_valid()) {
2583          // Not sure we need to do this but it shouldn't hurt.
2584          if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ADDRESS || sig_bt[i] == T_ARRAY) {
2585            __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2586          } else {
2587            __ z_l(r_1->as_Register(), ld_offset, ld_ptr);
2588          }
2589        } else {
2590          // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2591          // data is passed in only 1 slot.
2592          if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2593            ld_offset -= wordSize;
2594          }
2595          __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2596        }
2597      }
2598      ld_offset -= wordSize;
2599    }
2600  }
2601
2602  // Jump to the compiled code just as if compiled code was doing it.
2603  // load target address from method oop:
2604  __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
2605
2606  // Store method oop into thread->callee_target.
2607  // 6243940: We might end up in handle_wrong_method if
2608  // the callee is deoptimized as we race thru here. If that
2609  // happens we don't want to take a safepoint because the
2610  // caller frame will look interpreted and arguments are now
2611  // "compiled" so it is much better to make this transition
2612  // invisible to the stack walking code. Unfortunately, if
2613  // we try and find the callee by normal means a safepoint
2614  // is possible. So we stash the desired callee in the thread
2615  // and the vm will find it there should this case occur.
2616  __ z_stg(Z_method, thread_(callee_target));
2617
2618  __ z_br(Z_R1_scratch);
2619}
2620
2621AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
2622                                                            int total_args_passed,
2623                                                            int comp_args_on_stack,
2624                                                            const BasicType *sig_bt,
2625                                                            const VMRegPair *regs,
2626                                                            AdapterFingerPrint* fingerprint) {
2627  __ align(CodeEntryAlignment);
2628  address i2c_entry = __ pc();
2629  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
2630
2631  address c2i_unverified_entry;
2632
2633  Label skip_fixup;
2634  {
2635    Label ic_miss;
2636    const int klass_offset         = oopDesc::klass_offset_in_bytes();
2637    const int holder_klass_offset  = CompiledICHolder::holder_klass_offset();
2638    const int holder_method_offset = CompiledICHolder::holder_method_offset();
2639
2640    // Out-of-line call to ic_miss handler.
2641    __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch);
2642
2643    // Unverified Entry Point UEP
2644    __ align(CodeEntryAlignment);
2645    c2i_unverified_entry = __ pc();
2646
2647    // Check the pointers.
2648    if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
2649      __ z_ltgr(Z_ARG1, Z_ARG1);
2650      __ z_bre(ic_miss);
2651    }
2652    __ verify_oop(Z_ARG1);
2653
2654    // Check ic: object class <-> cached class
2655    // Compress cached class for comparison. That's more efficient.
2656    if (UseCompressedClassPointers) {
2657      __ z_lg(Z_R11, holder_klass_offset, Z_method);             // Z_R11 is overwritten a few instructions down anyway.
2658      __ compare_klass_ptr(Z_R11, klass_offset, Z_ARG1, false); // Cached class can't be zero.
2659    } else {
2660      __ z_clc(klass_offset, sizeof(void *)-1, Z_ARG1, holder_klass_offset, Z_method);
2661    }
2662    __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2663
2664    // This def MUST MATCH code in gen_c2i_adapter!
2665    const Register code = Z_R11;
2666
2667    __ z_lg(Z_method, holder_method_offset, Z_method);
2668    __ load_and_test_long(Z_R0, method_(code));
2669    __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2670
2671    // Fallthru to VEP. Duplicate LTG, but saved taken branch.
2672  }
2673
2674  address c2i_entry;
2675  c2i_entry = gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
2676
2677  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
2678}
2679
2680// This function returns the adjust size (in number of words) to a c2i adapter
2681// activation for use during deoptimization.
2682//
2683// Actually only compiled frames need to be adjusted, but it
2684// doesn't harm to adjust entry and interpreter frames, too.
2685//
2686int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2687  assert(callee_locals >= callee_parameters,
2688          "test and remove; got more parms than locals");
2689  // Handle the abi adjustment here instead of doing it in push_skeleton_frames.
2690  return (callee_locals - callee_parameters) * Interpreter::stackElementWords +
2691         frame::z_parent_ijava_frame_abi_size / BytesPerWord;
2692}
2693
2694uint SharedRuntime::out_preserve_stack_slots() {
2695  return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size;
2696}
2697
2698//
2699// Frame generation for deopt and uncommon trap blobs.
2700//
2701static void push_skeleton_frame(MacroAssembler* masm,
2702                          /* Unchanged */
2703                          Register frame_sizes_reg,
2704                          Register pcs_reg,
2705                          /* Invalidate */
2706                          Register frame_size_reg,
2707                          Register pc_reg) {
2708  BLOCK_COMMENT("  push_skeleton_frame {");
2709   __ z_lg(pc_reg, 0, pcs_reg);
2710   __ z_lg(frame_size_reg, 0, frame_sizes_reg);
2711   __ z_stg(pc_reg, _z_abi(return_pc), Z_SP);
2712   Register fp = pc_reg;
2713   __ push_frame(frame_size_reg, fp);
2714#ifdef ASSERT
2715   // The magic is required for successful walking skeletal frames.
2716   __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number);
2717   __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp);
2718   // Fill other slots that are supposedly not necessary with eye catchers.
2719   __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1);
2720   __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp);
2721   // The sender_sp of the bottom frame is set before pushing it.
2722   // The sender_sp of non bottom frames is their caller's top_frame_sp, which
2723   // is unknown here. Luckily it is not needed before filling the frame in
2724   // layout_activation(), we assert this by setting an eye catcher (see
2725   // comments on sender_sp in frame_s390.hpp).
2726   __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP);
2727#endif // ASSERT
2728  BLOCK_COMMENT("  } push_skeleton_frame");
2729}
2730
2731// Loop through the UnrollBlock info and create new frames.
2732static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2733                            /* read */
2734                            Register unroll_block_reg,
2735                            /* invalidate */
2736                            Register frame_sizes_reg,
2737                            Register number_of_frames_reg,
2738                            Register pcs_reg,
2739                            Register tmp1,
2740                            Register tmp2) {
2741  BLOCK_COMMENT("push_skeleton_frames {");
2742  // _number_of_frames is of type int (deoptimization.hpp).
2743  __ z_lgf(number_of_frames_reg,
2744           Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2745  __ z_lg(pcs_reg,
2746          Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2747  __ z_lg(frame_sizes_reg,
2748          Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2749
2750  // stack: (caller_of_deoptee, ...).
2751
2752  // If caller_of_deoptee is a compiled frame, then we extend it to make
2753  // room for the callee's locals and the frame::z_parent_ijava_frame_abi.
2754  // See also Deoptimization::last_frame_adjust() above.
2755  // Note: entry and interpreted frames are adjusted, too. But this doesn't harm.
2756
2757  __ z_lgf(Z_R1_scratch,
2758           Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2759  __ z_lgr(tmp1, Z_SP);  // Save the sender sp before extending the frame.
2760  __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/);
2761  // The oldest skeletal frame requires a valid sender_sp to make it walkable
2762  // (it is required to find the original pc of caller_of_deoptee if it is marked
2763  // for deoptimization - see nmethod::orig_pc_addr()).
2764  __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP);
2765
2766  // Now push the new interpreter frames.
2767  Label loop, loop_entry;
2768
2769  // Make sure that there is at least one entry in the array.
2770  DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg));
2771  __ asm_assert_ne("array_size must be > 0", 0x205);
2772
2773  __ z_bru(loop_entry);
2774
2775  __ bind(loop);
2776
2777  __ add2reg(frame_sizes_reg, wordSize);
2778  __ add2reg(pcs_reg, wordSize);
2779
2780  __ bind(loop_entry);
2781
2782  // Allocate a new frame, fill in the pc.
2783  push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2);
2784
2785  __ z_aghi(number_of_frames_reg, -1);  // Emit AGHI, because it sets the condition code
2786  __ z_brne(loop);
2787
2788  // Set the top frame's return pc.
2789  __ add2reg(pcs_reg, wordSize);
2790  __ z_lg(Z_R0_scratch, 0, pcs_reg);
2791  __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP);
2792  BLOCK_COMMENT("} push_skeleton_frames");
2793}
2794
2795//------------------------------generate_deopt_blob----------------------------
2796void SharedRuntime::generate_deopt_blob() {
2797  // Allocate space for the code.
2798  ResourceMark rm;
2799  // Setup code generation tools.
2800  CodeBuffer buffer("deopt_blob", 2048, 1024);
2801  InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2802  Label exec_mode_initialized;
2803  OopMap* map = NULL;
2804  OopMapSet *oop_maps = new OopMapSet();
2805
2806  unsigned int start_off = __ offset();
2807  Label cont;
2808
2809  // --------------------------------------------------------------------------
2810  // Normal entry (non-exception case)
2811  //
2812  // We have been called from the deopt handler of the deoptee.
2813  // Z_R14 points behind the call in the deopt handler. We adjust
2814  // it such that it points to the start of the deopt handler.
2815  // The return_pc has been stored in the frame of the deoptee and
2816  // will replace the address of the deopt_handler in the call
2817  // to Deoptimization::fetch_unroll_info below.
2818  // The (int) cast is necessary, because -((unsigned int)14)
2819  // is an unsigned int.
2820  __ add2reg(Z_R14, -(int)HandlerImpl::size_deopt_handler());
2821
2822  const Register   exec_mode_reg = Z_tmp_1;
2823
2824  // stack: (deoptee, caller of deoptee, ...)
2825
2826  // pushes an "unpack" frame
2827  // R14 contains the return address pointing into the deoptimized
2828  // nmethod that was valid just before the nmethod was deoptimized.
2829  // save R14 into the deoptee frame.  the `fetch_unroll_info'
2830  // procedure called below will read it from there.
2831  map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2832
2833  // note the entry point.
2834  __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
2835  __ z_bru(exec_mode_initialized);
2836
2837#ifndef COMPILER1
2838  int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap
2839#else
2840  // --------------------------------------------------------------------------
2841  // Reexecute entry
2842  // - Z_R14 = Deopt Handler in nmethod
2843
2844  int reexecute_offset = __ offset() - start_off;
2845
2846  // No need to update map as each call to save_live_registers will produce identical oopmap
2847  (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2848
2849  __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
2850  __ z_bru(exec_mode_initialized);
2851#endif
2852
2853
2854  // --------------------------------------------------------------------------
2855  // Exception entry. We reached here via a branch. Registers on entry:
2856  // - Z_EXC_OOP (Z_ARG1) = exception oop
2857  // - Z_EXC_PC  (Z_ARG2) = the exception pc.
2858
2859  int exception_offset = __ offset() - start_off;
2860
2861  // all registers are dead at this entry point, except for Z_EXC_OOP, and
2862  // Z_EXC_PC which contain the exception oop and exception pc
2863  // respectively.  Set them in TLS and fall thru to the
2864  // unpack_with_exception_in_tls entry point.
2865
2866  // Store exception oop and pc in thread (location known to GC).
2867  // Need this since the call to "fetch_unroll_info()" may safepoint.
2868  __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
2869  __ z_stg(Z_EXC_PC,  Address(Z_thread, JavaThread::exception_pc_offset()));
2870
2871  // fall through
2872
2873  int exception_in_tls_offset = __ offset() - start_off;
2874
2875  // new implementation because exception oop is now passed in JavaThread
2876
2877  // Prolog for exception case
2878  // All registers must be preserved because they might be used by LinearScan
2879  // Exceptiop oop and throwing PC are passed in JavaThread
2880
2881  // load throwing pc from JavaThread and us it as the return address of the current frame.
2882  __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));
2883
2884  // Save everything in sight.
2885  (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);
2886
2887  // Now it is safe to overwrite any register
2888
2889  // Clear the exception pc field in JavaThread
2890  __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8);
2891
2892  // Deopt during an exception.  Save exec mode for unpack_frames.
2893  __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception);
2894
2895
2896#ifdef ASSERT
2897  // verify that there is really an exception oop in JavaThread
2898  __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
2899  __ verify_oop(Z_ARG1);
2900
2901  // verify that there is no pending exception
2902  __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
2903                             "must not have pending exception here", __LINE__);
2904#endif
2905
2906  // --------------------------------------------------------------------------
2907  // At this point, the live registers are saved and
2908  // the exec_mode_reg has been set up correctly.
2909  __ bind(exec_mode_initialized);
2910
2911  // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...).
2912
2913  {
2914  const Register unroll_block_reg  = Z_tmp_2;
2915
2916  // we need to set `last_Java_frame' because `fetch_unroll_info' will
2917  // call `last_Java_frame()'.  however we can't block and no gc will
2918  // occur so we don't need an oopmap. the value of the pc in the
2919  // frame is not particularly important.  it just needs to identify the blob.
2920
2921  // Don't set last_Java_pc anymore here (is implicitly NULL then).
2922  // the correct PC is retrieved in pd_last_frame() in that case.
2923  __ set_last_Java_frame(/*sp*/Z_SP, noreg);
2924  // With EscapeAnalysis turned on, this call may safepoint
2925  // despite it's marked as "leaf call"!
2926  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg);
2927  // Set an oopmap for the call site this describes all our saved volatile registers
2928  int offs = __ offset();
2929  oop_maps->add_gc_map(offs, map);
2930
2931  __ reset_last_Java_frame();
2932  // save the return value.
2933  __ z_lgr(unroll_block_reg, Z_RET);
2934  // restore the return registers that have been saved
2935  // (among other registers) by save_live_registers(...).
2936  RegisterSaver::restore_result_registers(masm);
2937
2938  // reload the exec mode from the UnrollBlock (it might have changed)
2939  __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2940
2941  // In excp_deopt_mode, restore and clear exception oop which we
2942  // stored in the thread during exception entry above. The exception
2943  // oop will be the return value of this stub.
2944  NearLabel skip_restore_excp;
2945  __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp);
2946  __ z_lg(Z_RET, thread_(exception_oop));
2947  __ clear_mem(thread_(exception_oop), 8);
2948  __ bind(skip_restore_excp);
2949
2950  // remove the "unpack" frame
2951  __ pop_frame();
2952
2953  // stack: (deoptee, caller of deoptee, ...).
2954
2955  // pop the deoptee's frame
2956  __ pop_frame();
2957
2958  // stack: (caller_of_deoptee, ...).
2959
2960  // loop through the `UnrollBlock' info and create interpreter frames.
2961  push_skeleton_frames(masm, true/*deopt*/,
2962                  unroll_block_reg,
2963                  Z_tmp_3,
2964                  Z_tmp_4,
2965                  Z_ARG5,
2966                  Z_ARG4,
2967                  Z_ARG3);
2968
2969  // stack: (skeletal interpreter frame, ..., optional skeletal
2970  // interpreter frame, caller of deoptee, ...).
2971  }
2972
2973  // push an "unpack" frame taking care of float / int return values.
2974  __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers));
2975
2976  // stack: (unpack frame, skeletal interpreter frame, ..., optional
2977  // skeletal interpreter frame, caller of deoptee, ...).
2978
2979  // spill live volatile registers since we'll do a call.
2980  __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
2981  __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
2982
2983  // let the unpacker layout information in the skeletal frames just allocated.
2984  __ get_PC(Z_RET);
2985  __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET);
2986  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
2987                  Z_thread/*thread*/, exec_mode_reg/*exec_mode*/);
2988
2989  __ reset_last_Java_frame();
2990
2991  // restore the volatiles saved above.
2992  __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
2993  __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
2994
2995  // pop the "unpack" frame.
2996  __ pop_frame();
2997  __ restore_return_pc();
2998
2999  // stack: (top interpreter frame, ..., optional interpreter frame,
3000  // caller of deoptee, ...).
3001
3002  __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
3003  __ restore_bcp();
3004  __ restore_locals();
3005  __ restore_esp();
3006
3007  // return to the interpreter entry point.
3008  __ z_br(Z_R14);
3009
3010  // Make sure all code is generated
3011  masm->flush();
3012
3013  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
3014  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3015}
3016
3017
3018#ifdef COMPILER2
3019//------------------------------generate_uncommon_trap_blob--------------------
3020void SharedRuntime::generate_uncommon_trap_blob() {
3021  // Allocate space for the code
3022  ResourceMark rm;
3023  // Setup code generation tools
3024  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
3025  InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
3026
3027  Register unroll_block_reg = Z_tmp_1;
3028  Register klass_index_reg  = Z_ARG2;
3029  Register unc_trap_reg     = Z_ARG2;
3030
3031  // stack: (deoptee, caller_of_deoptee, ...).
3032
3033  // push a dummy "unpack" frame and call
3034  // `Deoptimization::uncommon_trap' to pack the compiled frame into a
3035  // vframe array and return the `UnrollBlock' information.
3036
3037  // save R14 to compiled frame.
3038  __ save_return_pc();
3039  // push the "unpack_frame".
3040  __ push_frame_abi160(0);
3041
3042  // stack: (unpack frame, deoptee, caller_of_deoptee, ...).
3043
3044  // set the "unpack" frame as last_Java_frame.
3045  // `Deoptimization::uncommon_trap' expects it and considers its
3046  // sender frame as the deoptee frame.
3047  __ get_PC(Z_R1_scratch);
3048  __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
3049
3050  __ z_lgr(klass_index_reg, Z_ARG1);  // passed implicitly as ARG2
3051  __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap);  // passed implicitly as ARG3
3052  BLOCK_COMMENT("call Deoptimization::uncommon_trap()");
3053  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread);
3054
3055  __ reset_last_Java_frame();
3056
3057  // pop the "unpack" frame
3058  __ pop_frame();
3059
3060  // stack: (deoptee, caller_of_deoptee, ...).
3061
3062  // save the return value.
3063  __ z_lgr(unroll_block_reg, Z_RET);
3064
3065  // pop the deoptee frame.
3066  __ pop_frame();
3067
3068  // stack: (caller_of_deoptee, ...).
3069
3070#ifdef ASSERT
3071  assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates");
3072  assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates");
3073  const int unpack_kind_byte_offset = Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()
3074#ifndef VM_LITTLE_ENDIAN
3075  + 3
3076#endif
3077  ;
3078  if (Displacement::is_shortDisp(unpack_kind_byte_offset)) {
3079    __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
3080  } else {
3081    __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
3082  }
3083  __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
3084#endif
3085
3086  __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1);
3087
3088  // allocate new interpreter frame(s) and possibly resize the caller's frame
3089  // (no more adapters !)
3090  push_skeleton_frames(masm, false/*deopt*/,
3091                  unroll_block_reg,
3092                  Z_tmp_2,
3093                  Z_tmp_3,
3094                  Z_tmp_4,
3095                  Z_ARG5,
3096                  Z_ARG4);
3097
3098  // stack: (skeletal interpreter frame, ..., optional skeletal
3099  // interpreter frame, (resized) caller of deoptee, ...).
3100
3101  // push a dummy "unpack" frame taking care of float return values.
3102  // call `Deoptimization::unpack_frames' to layout information in the
3103  // interpreter frames just created
3104
3105  // push the "unpack" frame
3106   const unsigned int framesize_in_bytes = __ push_frame_abi160(0);
3107
3108  // stack: (unpack frame, skeletal interpreter frame, ..., optional
3109  // skeletal interpreter frame, (resized) caller of deoptee, ...).
3110
3111  // set the "unpack" frame as last_Java_frame
3112  __ get_PC(Z_R1_scratch);
3113  __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
3114
3115  // indicate it is the uncommon trap case
3116  BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()");
3117  __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
3118  // let the unpacker layout information in the skeletal frames just allocated.
3119  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread);
3120
3121  __ reset_last_Java_frame();
3122  // pop the "unpack" frame
3123  __ pop_frame();
3124  // restore LR from top interpreter frame
3125  __ restore_return_pc();
3126
3127  // stack: (top interpreter frame, ..., optional interpreter frame,
3128  // (resized) caller of deoptee, ...).
3129
3130  __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
3131  __ restore_bcp();
3132  __ restore_locals();
3133  __ restore_esp();
3134
3135  // return to the interpreter entry point
3136  __ z_br(Z_R14);
3137
3138  masm->flush();
3139  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, framesize_in_bytes/wordSize);
3140}
3141#endif // COMPILER2
3142
3143
3144//------------------------------generate_handler_blob------
3145//
3146// Generate a special Compile2Runtime blob that saves all registers,
3147// and setup oopmap.
3148SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
3149  assert(StubRoutines::forward_exception_entry() != NULL,
3150         "must be generated before");
3151
3152  ResourceMark rm;
3153  OopMapSet *oop_maps = new OopMapSet();
3154  OopMap* map;
3155
3156  // Allocate space for the code. Setup code generation tools.
3157  CodeBuffer buffer("handler_blob", 2048, 1024);
3158  MacroAssembler* masm = new MacroAssembler(&buffer);
3159
3160  unsigned int start_off = __ offset();
3161  address call_pc = NULL;
3162  int frame_size_in_bytes;
3163
3164  bool cause_return = (poll_type == POLL_AT_RETURN);
3165  // Make room for return address (or push it again)
3166  if (!cause_return)
3167    __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
3168
3169  // Save registers, fpu state, and flags
3170  map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3171
3172  // The following is basically a call_VM. However, we need the precise
3173  // address of the call in order to generate an oopmap. Hence, we do all the
3174  // work outselves.
3175  __ set_last_Java_frame(Z_SP, noreg);
3176
3177  // call into the runtime to handle the safepoint poll
3178  __ call_VM_leaf(call_ptr, Z_thread);
3179
3180
3181  // Set an oopmap for the call site. This oopmap will map all
3182  // oop-registers and debug-info registers as callee-saved. This
3183  // will allow deoptimization at this safepoint to find all possible
3184  // debug-info recordings, as well as let GC find all oops.
3185
3186  oop_maps->add_gc_map((int)(__ offset()-start_off), map);
3187
3188  Label noException;
3189
3190  __ reset_last_Java_frame();
3191
3192  __ load_and_test_long(Z_R1, thread_(pending_exception));
3193  __ z_bre(noException);
3194
3195  // Pending exception case, used (sporadically) by
3196  // api/java_lang/Thread.State/index#ThreadState et al.
3197  RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3198
3199  // Jump to forward_exception_entry, with the issuing PC in Z_R14
3200  // so it looks like the original nmethod called forward_exception_entry.
3201  __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3202  __ z_br(Z_R1_scratch);
3203
3204  // No exception case
3205  __ bind(noException);
3206
3207  // Normal exit, restore registers and exit.
3208  RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3209
3210  __ z_br(Z_R14);
3211
3212  // Make sure all code is generated
3213  masm->flush();
3214
3215  // Fill-out other meta info
3216  return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
3217}
3218
3219
3220//
3221// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
3222//
3223// Generate a stub that calls into vm to find out the proper destination
3224// of a Java call. All the argument registers are live at this point
3225// but since this is generic code we don't know what they are and the caller
3226// must do any gc of the args.
3227//
3228RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
3229  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3230
3231  // allocate space for the code
3232  ResourceMark rm;
3233
3234  CodeBuffer buffer(name, 1000, 512);
3235  MacroAssembler* masm                = new MacroAssembler(&buffer);
3236
3237  OopMapSet *oop_maps = new OopMapSet();
3238  OopMap* map = NULL;
3239
3240  unsigned int start_off = __ offset();
3241
3242  map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3243
3244  // We must save a PC from within the stub as return PC
3245  // C code doesn't store the LR where we expect the PC,
3246  // so we would run into trouble upon stack walking.
3247  __ get_PC(Z_R1_scratch);
3248
3249  unsigned int frame_complete = __ offset();
3250
3251  __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch);
3252
3253  __ call_VM_leaf(destination, Z_thread, Z_method);
3254
3255
3256  // Set an oopmap for the call site.
3257  // We need this not only for callee-saved registers, but also for volatile
3258  // registers that the compiler might be keeping live across a safepoint.
3259
3260  oop_maps->add_gc_map((int)(frame_complete-start_off), map);
3261
3262  // clear last_Java_sp
3263  __ reset_last_Java_frame();
3264
3265  // check for pending exceptions
3266  Label pending;
3267  __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
3268  __ z_brne(pending);
3269
3270  __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation.
3271  RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3272
3273  // get the returned method
3274  __ get_vm_result_2(Z_method);
3275
3276  // We are back the the original state on entry and ready to go.
3277  __ z_br(Z_R1_scratch);
3278
3279  // Pending exception after the safepoint
3280
3281  __ bind(pending);
3282
3283  RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3284
3285  // exception pending => remove activation and forward to exception handler
3286
3287  __ z_lgr(Z_R2, Z_R0); // pending_exception
3288  __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong));
3289  __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3290  __ z_br(Z_R1_scratch);
3291
3292  // -------------
3293  // make sure all code is generated
3294  masm->flush();
3295
3296  // return the blob
3297  // frame_size_words or bytes??
3298  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize,
3299                                       oop_maps, true);
3300
3301}
3302
3303//------------------------------Montgomery multiplication------------------------
3304//
3305
3306// Subtract 0:b from carry:a. Return carry.
3307static unsigned long
3308sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3309  unsigned long i, c = 8 * (unsigned long)(len - 1);
3310  __asm__ __volatile__ (
3311    "SLGR   %[i], %[i]         \n" // initialize to 0 and pre-set carry
3312    "LGHI   0, 8               \n" // index increment (for BRXLG)
3313    "LGR    1, %[c]            \n" // index limit (for BRXLG)
3314    "0:                        \n"
3315    "LG     %[c], 0(%[i],%[a]) \n"
3316    "SLBG   %[c], 0(%[i],%[b]) \n" // subtract with borrow
3317    "STG    %[c], 0(%[i],%[a]) \n"
3318    "BRXLG  %[i], 0, 0b        \n" // while ((i+=8)<limit);
3319    "SLBGR  %[c], %[c]         \n" // save carry - 1
3320    : [i]"=&a"(i), [c]"+r"(c)
3321    : [a]"a"(a), [b]"a"(b)
3322    : "cc", "memory", "r0", "r1"
3323 );
3324  return carry + c;
3325}
3326
3327// Multiply (unsigned) Long A by Long B, accumulating the double-
3328// length result into the accumulator formed of T0, T1, and T2.
3329inline void MACC(unsigned long A[], long A_ind,
3330                 unsigned long B[], long B_ind,
3331                 unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3332  long A_si = 8 * A_ind,
3333       B_si = 8 * B_ind;
3334  __asm__ __volatile__ (
3335    "LG     1, 0(%[A_si],%[A]) \n"
3336    "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3337    "ALGR   %[T0], 1           \n"
3338    "LGHI   1, 0               \n" // r1 = 0
3339    "ALCGR  %[T1], 0           \n"
3340    "ALCGR  %[T2], 1           \n"
3341    : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3342    : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si)
3343    : "cc", "r0", "r1"
3344 );
3345}
3346
3347// As above, but add twice the double-length result into the
3348// accumulator.
3349inline void MACC2(unsigned long A[], long A_ind,
3350                  unsigned long B[], long B_ind,
3351                  unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3352  const unsigned long zero = 0;
3353  long A_si = 8 * A_ind,
3354       B_si = 8 * B_ind;
3355  __asm__ __volatile__ (
3356    "LG     1, 0(%[A_si],%[A]) \n"
3357    "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3358    "ALGR   %[T0], 1           \n"
3359    "ALCGR  %[T1], 0           \n"
3360    "ALCGR  %[T2], %[zero]     \n"
3361    "ALGR   %[T0], 1           \n"
3362    "ALCGR  %[T1], 0           \n"
3363    "ALCGR  %[T2], %[zero]     \n"
3364    : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3365    : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero)
3366    : "cc", "r0", "r1"
3367 );
3368}
3369
3370// Fast Montgomery multiplication. The derivation of the algorithm is
3371// in "A Cryptographic Library for the Motorola DSP56000,
3372// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3373static void
3374montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3375                    unsigned long m[], unsigned long inv, int len) {
3376  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3377  int i;
3378
3379  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3380
3381  for (i = 0; i < len; i++) {
3382    int j;
3383    for (j = 0; j < i; j++) {
3384      MACC(a, j, b, i-j, t0, t1, t2);
3385      MACC(m, j, n, i-j, t0, t1, t2);
3386    }
3387    MACC(a, i, b, 0, t0, t1, t2);
3388    m[i] = t0 * inv;
3389    MACC(m, i, n, 0, t0, t1, t2);
3390
3391    assert(t0 == 0, "broken Montgomery multiply");
3392
3393    t0 = t1; t1 = t2; t2 = 0;
3394  }
3395
3396  for (i = len; i < 2 * len; i++) {
3397    int j;
3398    for (j = i - len + 1; j < len; j++) {
3399      MACC(a, j, b, i-j, t0, t1, t2);
3400      MACC(m, j, n, i-j, t0, t1, t2);
3401    }
3402    m[i-len] = t0;
3403    t0 = t1; t1 = t2; t2 = 0;
3404  }
3405
3406  while (t0) {
3407    t0 = sub(m, n, t0, len);
3408  }
3409}
3410
3411// Fast Montgomery squaring. This uses asymptotically 25% fewer
3412// multiplies so it should be up to 25% faster than Montgomery
3413// multiplication. However, its loop control is more complex and it
3414// may actually run slower on some machines.
3415static void
3416montgomery_square(unsigned long a[], unsigned long n[],
3417                  unsigned long m[], unsigned long inv, int len) {
3418  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3419  int i;
3420
3421  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3422
3423  for (i = 0; i < len; i++) {
3424    int j;
3425    int end = (i+1)/2;
3426    for (j = 0; j < end; j++) {
3427      MACC2(a, j, a, i-j, t0, t1, t2);
3428      MACC(m, j, n, i-j, t0, t1, t2);
3429    }
3430    if ((i & 1) == 0) {
3431      MACC(a, j, a, j, t0, t1, t2);
3432    }
3433    for (; j < i; j++) {
3434      MACC(m, j, n, i-j, t0, t1, t2);
3435    }
3436    m[i] = t0 * inv;
3437    MACC(m, i, n, 0, t0, t1, t2);
3438
3439    assert(t0 == 0, "broken Montgomery square");
3440
3441    t0 = t1; t1 = t2; t2 = 0;
3442  }
3443
3444  for (i = len; i < 2*len; i++) {
3445    int start = i-len+1;
3446    int end = start + (len - start)/2;
3447    int j;
3448    for (j = start; j < end; j++) {
3449      MACC2(a, j, a, i-j, t0, t1, t2);
3450      MACC(m, j, n, i-j, t0, t1, t2);
3451    }
3452    if ((i & 1) == 0) {
3453      MACC(a, j, a, j, t0, t1, t2);
3454    }
3455    for (; j < len; j++) {
3456      MACC(m, j, n, i-j, t0, t1, t2);
3457    }
3458    m[i-len] = t0;
3459    t0 = t1; t1 = t2; t2 = 0;
3460  }
3461
3462  while (t0) {
3463    t0 = sub(m, n, t0, len);
3464  }
3465}
3466
3467// The threshold at which squaring is advantageous was determined
3468// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3469// Value seems to be ok for other platforms, too.
3470#define MONTGOMERY_SQUARING_THRESHOLD 64
3471
3472// Copy len longwords from s to d, word-swapping as we go. The
3473// destination array is reversed.
3474static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3475  d += len;
3476  while(len-- > 0) {
3477    d--;
3478    unsigned long s_val = *s;
3479    // Swap words in a longword on little endian machines.
3480#ifdef VM_LITTLE_ENDIAN
3481     Unimplemented();
3482#endif
3483    *d = s_val;
3484    s++;
3485  }
3486}
3487
3488void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3489                                        jint len, jlong inv,
3490                                        jint *m_ints) {
3491  len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3492  assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3493  int longwords = len/2;
3494
3495  // Make very sure we don't use so much space that the stack might
3496  // overflow. 512 jints corresponds to an 16384-bit integer and
3497  // will use here a total of 8k bytes of stack space.
3498  int total_allocation = longwords * sizeof (unsigned long) * 4;
3499  guarantee(total_allocation <= 8192, "must be");
3500  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3501
3502  // Local scratch arrays
3503  unsigned long
3504    *a = scratch + 0 * longwords,
3505    *b = scratch + 1 * longwords,
3506    *n = scratch + 2 * longwords,
3507    *m = scratch + 3 * longwords;
3508
3509  reverse_words((unsigned long *)a_ints, a, longwords);
3510  reverse_words((unsigned long *)b_ints, b, longwords);
3511  reverse_words((unsigned long *)n_ints, n, longwords);
3512
3513  ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3514
3515  reverse_words(m, (unsigned long *)m_ints, longwords);
3516}
3517
3518void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3519                                      jint len, jlong inv,
3520                                      jint *m_ints) {
3521  len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3522  assert(len % 2 == 0, "array length in montgomery_square must be even");
3523  int longwords = len/2;
3524
3525  // Make very sure we don't use so much space that the stack might
3526  // overflow. 512 jints corresponds to an 16384-bit integer and
3527  // will use here a total of 6k bytes of stack space.
3528  int total_allocation = longwords * sizeof (unsigned long) * 3;
3529  guarantee(total_allocation <= 8192, "must be");
3530  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3531
3532  // Local scratch arrays
3533  unsigned long
3534    *a = scratch + 0 * longwords,
3535    *n = scratch + 1 * longwords,
3536    *m = scratch + 2 * longwords;
3537
3538  reverse_words((unsigned long *)a_ints, a, longwords);
3539  reverse_words((unsigned long *)n_ints, n, longwords);
3540
3541  if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3542    ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3543  } else {
3544    ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3545  }
3546
3547  reverse_words(m, (unsigned long *)m_ints, longwords);
3548}
3549
3550extern "C"
3551int SpinPause() {
3552  return 0;
3553}
3554