assembler_x86.cpp revision 1426:2338d41fbd81
1/*
2 * Copyright 1997-2010 Sun Microsystems, Inc.  All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
24
25#include "incls/_precompiled.incl"
26#include "incls/_assembler_x86.cpp.incl"
27
28// Implementation of AddressLiteral
29
30AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
31  _is_lval = false;
32  _target = target;
33  switch (rtype) {
34  case relocInfo::oop_type:
35    // Oops are a special case. Normally they would be their own section
36    // but in cases like icBuffer they are literals in the code stream that
37    // we don't have a section for. We use none so that we get a literal address
38    // which is always patchable.
39    break;
40  case relocInfo::external_word_type:
41    _rspec = external_word_Relocation::spec(target);
42    break;
43  case relocInfo::internal_word_type:
44    _rspec = internal_word_Relocation::spec(target);
45    break;
46  case relocInfo::opt_virtual_call_type:
47    _rspec = opt_virtual_call_Relocation::spec();
48    break;
49  case relocInfo::static_call_type:
50    _rspec = static_call_Relocation::spec();
51    break;
52  case relocInfo::runtime_call_type:
53    _rspec = runtime_call_Relocation::spec();
54    break;
55  case relocInfo::poll_type:
56  case relocInfo::poll_return_type:
57    _rspec = Relocation::spec_simple(rtype);
58    break;
59  case relocInfo::none:
60    break;
61  default:
62    ShouldNotReachHere();
63    break;
64  }
65}
66
67// Implementation of Address
68
69#ifdef _LP64
70
71Address Address::make_array(ArrayAddress adr) {
72  // Not implementable on 64bit machines
73  // Should have been handled higher up the call chain.
74  ShouldNotReachHere();
75  return Address();
76}
77
78// exceedingly dangerous constructor
79Address::Address(int disp, address loc, relocInfo::relocType rtype) {
80  _base  = noreg;
81  _index = noreg;
82  _scale = no_scale;
83  _disp  = disp;
84  switch (rtype) {
85    case relocInfo::external_word_type:
86      _rspec = external_word_Relocation::spec(loc);
87      break;
88    case relocInfo::internal_word_type:
89      _rspec = internal_word_Relocation::spec(loc);
90      break;
91    case relocInfo::runtime_call_type:
92      // HMM
93      _rspec = runtime_call_Relocation::spec();
94      break;
95    case relocInfo::poll_type:
96    case relocInfo::poll_return_type:
97      _rspec = Relocation::spec_simple(rtype);
98      break;
99    case relocInfo::none:
100      break;
101    default:
102      ShouldNotReachHere();
103  }
104}
105#else // LP64
106
107Address Address::make_array(ArrayAddress adr) {
108  AddressLiteral base = adr.base();
109  Address index = adr.index();
110  assert(index._disp == 0, "must not have disp"); // maybe it can?
111  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
112  array._rspec = base._rspec;
113  return array;
114}
115
116// exceedingly dangerous constructor
117Address::Address(address loc, RelocationHolder spec) {
118  _base  = noreg;
119  _index = noreg;
120  _scale = no_scale;
121  _disp  = (intptr_t) loc;
122  _rspec = spec;
123}
124
125#endif // _LP64
126
127
128
129// Convert the raw encoding form into the form expected by the constructor for
130// Address.  An index of 4 (rsp) corresponds to having no index, so convert
131// that to noreg for the Address constructor.
132Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) {
133  RelocationHolder rspec;
134  if (disp_is_oop) {
135    rspec = Relocation::spec_simple(relocInfo::oop_type);
136  }
137  bool valid_index = index != rsp->encoding();
138  if (valid_index) {
139    Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
140    madr._rspec = rspec;
141    return madr;
142  } else {
143    Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
144    madr._rspec = rspec;
145    return madr;
146  }
147}
148
149// Implementation of Assembler
150
151int AbstractAssembler::code_fill_byte() {
152  return (u_char)'\xF4'; // hlt
153}
154
155// make this go away someday
156void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
157  if (rtype == relocInfo::none)
158        emit_long(data);
159  else  emit_data(data, Relocation::spec_simple(rtype), format);
160}
161
162void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
163  assert(imm_operand == 0, "default format must be immediate in this file");
164  assert(inst_mark() != NULL, "must be inside InstructionMark");
165  if (rspec.type() !=  relocInfo::none) {
166    #ifdef ASSERT
167      check_relocation(rspec, format);
168    #endif
169    // Do not use AbstractAssembler::relocate, which is not intended for
170    // embedded words.  Instead, relocate to the enclosing instruction.
171
172    // hack. call32 is too wide for mask so use disp32
173    if (format == call32_operand)
174      code_section()->relocate(inst_mark(), rspec, disp32_operand);
175    else
176      code_section()->relocate(inst_mark(), rspec, format);
177  }
178  emit_long(data);
179}
180
181static int encode(Register r) {
182  int enc = r->encoding();
183  if (enc >= 8) {
184    enc -= 8;
185  }
186  return enc;
187}
188
189static int encode(XMMRegister r) {
190  int enc = r->encoding();
191  if (enc >= 8) {
192    enc -= 8;
193  }
194  return enc;
195}
196
197void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
198  assert(dst->has_byte_register(), "must have byte register");
199  assert(isByte(op1) && isByte(op2), "wrong opcode");
200  assert(isByte(imm8), "not a byte");
201  assert((op1 & 0x01) == 0, "should be 8bit operation");
202  emit_byte(op1);
203  emit_byte(op2 | encode(dst));
204  emit_byte(imm8);
205}
206
207
208void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
209  assert(isByte(op1) && isByte(op2), "wrong opcode");
210  assert((op1 & 0x01) == 1, "should be 32bit operation");
211  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
212  if (is8bit(imm32)) {
213    emit_byte(op1 | 0x02); // set sign bit
214    emit_byte(op2 | encode(dst));
215    emit_byte(imm32 & 0xFF);
216  } else {
217    emit_byte(op1);
218    emit_byte(op2 | encode(dst));
219    emit_long(imm32);
220  }
221}
222
223// immediate-to-memory forms
224void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
225  assert((op1 & 0x01) == 1, "should be 32bit operation");
226  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
227  if (is8bit(imm32)) {
228    emit_byte(op1 | 0x02); // set sign bit
229    emit_operand(rm, adr, 1);
230    emit_byte(imm32 & 0xFF);
231  } else {
232    emit_byte(op1);
233    emit_operand(rm, adr, 4);
234    emit_long(imm32);
235  }
236}
237
238void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) {
239  LP64_ONLY(ShouldNotReachHere());
240  assert(isByte(op1) && isByte(op2), "wrong opcode");
241  assert((op1 & 0x01) == 1, "should be 32bit operation");
242  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
243  InstructionMark im(this);
244  emit_byte(op1);
245  emit_byte(op2 | encode(dst));
246  emit_data((intptr_t)obj, relocInfo::oop_type, 0);
247}
248
249
250void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
251  assert(isByte(op1) && isByte(op2), "wrong opcode");
252  emit_byte(op1);
253  emit_byte(op2 | encode(dst) << 3 | encode(src));
254}
255
256
257void Assembler::emit_operand(Register reg, Register base, Register index,
258                             Address::ScaleFactor scale, int disp,
259                             RelocationHolder const& rspec,
260                             int rip_relative_correction) {
261  relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
262
263  // Encode the registers as needed in the fields they are used in
264
265  int regenc = encode(reg) << 3;
266  int indexenc = index->is_valid() ? encode(index) << 3 : 0;
267  int baseenc = base->is_valid() ? encode(base) : 0;
268
269  if (base->is_valid()) {
270    if (index->is_valid()) {
271      assert(scale != Address::no_scale, "inconsistent address");
272      // [base + index*scale + disp]
273      if (disp == 0 && rtype == relocInfo::none  &&
274          base != rbp LP64_ONLY(&& base != r13)) {
275        // [base + index*scale]
276        // [00 reg 100][ss index base]
277        assert(index != rsp, "illegal addressing mode");
278        emit_byte(0x04 | regenc);
279        emit_byte(scale << 6 | indexenc | baseenc);
280      } else if (is8bit(disp) && rtype == relocInfo::none) {
281        // [base + index*scale + imm8]
282        // [01 reg 100][ss index base] imm8
283        assert(index != rsp, "illegal addressing mode");
284        emit_byte(0x44 | regenc);
285        emit_byte(scale << 6 | indexenc | baseenc);
286        emit_byte(disp & 0xFF);
287      } else {
288        // [base + index*scale + disp32]
289        // [10 reg 100][ss index base] disp32
290        assert(index != rsp, "illegal addressing mode");
291        emit_byte(0x84 | regenc);
292        emit_byte(scale << 6 | indexenc | baseenc);
293        emit_data(disp, rspec, disp32_operand);
294      }
295    } else if (base == rsp LP64_ONLY(|| base == r12)) {
296      // [rsp + disp]
297      if (disp == 0 && rtype == relocInfo::none) {
298        // [rsp]
299        // [00 reg 100][00 100 100]
300        emit_byte(0x04 | regenc);
301        emit_byte(0x24);
302      } else if (is8bit(disp) && rtype == relocInfo::none) {
303        // [rsp + imm8]
304        // [01 reg 100][00 100 100] disp8
305        emit_byte(0x44 | regenc);
306        emit_byte(0x24);
307        emit_byte(disp & 0xFF);
308      } else {
309        // [rsp + imm32]
310        // [10 reg 100][00 100 100] disp32
311        emit_byte(0x84 | regenc);
312        emit_byte(0x24);
313        emit_data(disp, rspec, disp32_operand);
314      }
315    } else {
316      // [base + disp]
317      assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
318      if (disp == 0 && rtype == relocInfo::none &&
319          base != rbp LP64_ONLY(&& base != r13)) {
320        // [base]
321        // [00 reg base]
322        emit_byte(0x00 | regenc | baseenc);
323      } else if (is8bit(disp) && rtype == relocInfo::none) {
324        // [base + disp8]
325        // [01 reg base] disp8
326        emit_byte(0x40 | regenc | baseenc);
327        emit_byte(disp & 0xFF);
328      } else {
329        // [base + disp32]
330        // [10 reg base] disp32
331        emit_byte(0x80 | regenc | baseenc);
332        emit_data(disp, rspec, disp32_operand);
333      }
334    }
335  } else {
336    if (index->is_valid()) {
337      assert(scale != Address::no_scale, "inconsistent address");
338      // [index*scale + disp]
339      // [00 reg 100][ss index 101] disp32
340      assert(index != rsp, "illegal addressing mode");
341      emit_byte(0x04 | regenc);
342      emit_byte(scale << 6 | indexenc | 0x05);
343      emit_data(disp, rspec, disp32_operand);
344    } else if (rtype != relocInfo::none ) {
345      // [disp] (64bit) RIP-RELATIVE (32bit) abs
346      // [00 000 101] disp32
347
348      emit_byte(0x05 | regenc);
349      // Note that the RIP-rel. correction applies to the generated
350      // disp field, but _not_ to the target address in the rspec.
351
352      // disp was created by converting the target address minus the pc
353      // at the start of the instruction. That needs more correction here.
354      // intptr_t disp = target - next_ip;
355      assert(inst_mark() != NULL, "must be inside InstructionMark");
356      address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
357      int64_t adjusted = disp;
358      // Do rip-rel adjustment for 64bit
359      LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
360      assert(is_simm32(adjusted),
361             "must be 32bit offset (RIP relative address)");
362      emit_data((int32_t) adjusted, rspec, disp32_operand);
363
364    } else {
365      // 32bit never did this, did everything as the rip-rel/disp code above
366      // [disp] ABSOLUTE
367      // [00 reg 100][00 100 101] disp32
368      emit_byte(0x04 | regenc);
369      emit_byte(0x25);
370      emit_data(disp, rspec, disp32_operand);
371    }
372  }
373}
374
375void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
376                             Address::ScaleFactor scale, int disp,
377                             RelocationHolder const& rspec) {
378  emit_operand((Register)reg, base, index, scale, disp, rspec);
379}
380
381// Secret local extension to Assembler::WhichOperand:
382#define end_pc_operand (_WhichOperand_limit)
383
384address Assembler::locate_operand(address inst, WhichOperand which) {
385  // Decode the given instruction, and return the address of
386  // an embedded 32-bit operand word.
387
388  // If "which" is disp32_operand, selects the displacement portion
389  // of an effective address specifier.
390  // If "which" is imm64_operand, selects the trailing immediate constant.
391  // If "which" is call32_operand, selects the displacement of a call or jump.
392  // Caller is responsible for ensuring that there is such an operand,
393  // and that it is 32/64 bits wide.
394
395  // If "which" is end_pc_operand, find the end of the instruction.
396
397  address ip = inst;
398  bool is_64bit = false;
399
400  debug_only(bool has_disp32 = false);
401  int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
402
403  again_after_prefix:
404  switch (0xFF & *ip++) {
405
406  // These convenience macros generate groups of "case" labels for the switch.
407#define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
408#define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
409             case (x)+4: case (x)+5: case (x)+6: case (x)+7
410#define REP16(x) REP8((x)+0): \
411              case REP8((x)+8)
412
413  case CS_segment:
414  case SS_segment:
415  case DS_segment:
416  case ES_segment:
417  case FS_segment:
418  case GS_segment:
419    // Seems dubious
420    LP64_ONLY(assert(false, "shouldn't have that prefix"));
421    assert(ip == inst+1, "only one prefix allowed");
422    goto again_after_prefix;
423
424  case 0x67:
425  case REX:
426  case REX_B:
427  case REX_X:
428  case REX_XB:
429  case REX_R:
430  case REX_RB:
431  case REX_RX:
432  case REX_RXB:
433    NOT_LP64(assert(false, "64bit prefixes"));
434    goto again_after_prefix;
435
436  case REX_W:
437  case REX_WB:
438  case REX_WX:
439  case REX_WXB:
440  case REX_WR:
441  case REX_WRB:
442  case REX_WRX:
443  case REX_WRXB:
444    NOT_LP64(assert(false, "64bit prefixes"));
445    is_64bit = true;
446    goto again_after_prefix;
447
448  case 0xFF: // pushq a; decl a; incl a; call a; jmp a
449  case 0x88: // movb a, r
450  case 0x89: // movl a, r
451  case 0x8A: // movb r, a
452  case 0x8B: // movl r, a
453  case 0x8F: // popl a
454    debug_only(has_disp32 = true);
455    break;
456
457  case 0x68: // pushq #32
458    if (which == end_pc_operand) {
459      return ip + 4;
460    }
461    assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
462    return ip;                  // not produced by emit_operand
463
464  case 0x66: // movw ... (size prefix)
465    again_after_size_prefix2:
466    switch (0xFF & *ip++) {
467    case REX:
468    case REX_B:
469    case REX_X:
470    case REX_XB:
471    case REX_R:
472    case REX_RB:
473    case REX_RX:
474    case REX_RXB:
475    case REX_W:
476    case REX_WB:
477    case REX_WX:
478    case REX_WXB:
479    case REX_WR:
480    case REX_WRB:
481    case REX_WRX:
482    case REX_WRXB:
483      NOT_LP64(assert(false, "64bit prefix found"));
484      goto again_after_size_prefix2;
485    case 0x8B: // movw r, a
486    case 0x89: // movw a, r
487      debug_only(has_disp32 = true);
488      break;
489    case 0xC7: // movw a, #16
490      debug_only(has_disp32 = true);
491      tail_size = 2;  // the imm16
492      break;
493    case 0x0F: // several SSE/SSE2 variants
494      ip--;    // reparse the 0x0F
495      goto again_after_prefix;
496    default:
497      ShouldNotReachHere();
498    }
499    break;
500
501  case REP8(0xB8): // movl/q r, #32/#64(oop?)
502    if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
503    // these asserts are somewhat nonsensical
504#ifndef _LP64
505    assert(which == imm_operand || which == disp32_operand, "");
506#else
507    assert((which == call32_operand || which == imm_operand) && is_64bit ||
508           which == narrow_oop_operand && !is_64bit, "");
509#endif // _LP64
510    return ip;
511
512  case 0x69: // imul r, a, #32
513  case 0xC7: // movl a, #32(oop?)
514    tail_size = 4;
515    debug_only(has_disp32 = true); // has both kinds of operands!
516    break;
517
518  case 0x0F: // movx..., etc.
519    switch (0xFF & *ip++) {
520    case 0x12: // movlps
521    case 0x28: // movaps
522    case 0x2E: // ucomiss
523    case 0x2F: // comiss
524    case 0x54: // andps
525    case 0x55: // andnps
526    case 0x56: // orps
527    case 0x57: // xorps
528    case 0x6E: // movd
529    case 0x7E: // movd
530    case 0xAE: // ldmxcsr   a
531      // 64bit side says it these have both operands but that doesn't
532      // appear to be true
533      debug_only(has_disp32 = true);
534      break;
535
536    case 0xAD: // shrd r, a, %cl
537    case 0xAF: // imul r, a
538    case 0xBE: // movsbl r, a (movsxb)
539    case 0xBF: // movswl r, a (movsxw)
540    case 0xB6: // movzbl r, a (movzxb)
541    case 0xB7: // movzwl r, a (movzxw)
542    case REP16(0x40): // cmovl cc, r, a
543    case 0xB0: // cmpxchgb
544    case 0xB1: // cmpxchg
545    case 0xC1: // xaddl
546    case 0xC7: // cmpxchg8
547    case REP16(0x90): // setcc a
548      debug_only(has_disp32 = true);
549      // fall out of the switch to decode the address
550      break;
551
552    case 0xAC: // shrd r, a, #8
553      debug_only(has_disp32 = true);
554      tail_size = 1;  // the imm8
555      break;
556
557    case REP16(0x80): // jcc rdisp32
558      if (which == end_pc_operand)  return ip + 4;
559      assert(which == call32_operand, "jcc has no disp32 or imm");
560      return ip;
561    default:
562      ShouldNotReachHere();
563    }
564    break;
565
566  case 0x81: // addl a, #32; addl r, #32
567    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
568    // on 32bit in the case of cmpl, the imm might be an oop
569    tail_size = 4;
570    debug_only(has_disp32 = true); // has both kinds of operands!
571    break;
572
573  case 0x83: // addl a, #8; addl r, #8
574    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
575    debug_only(has_disp32 = true); // has both kinds of operands!
576    tail_size = 1;
577    break;
578
579  case 0x9B:
580    switch (0xFF & *ip++) {
581    case 0xD9: // fnstcw a
582      debug_only(has_disp32 = true);
583      break;
584    default:
585      ShouldNotReachHere();
586    }
587    break;
588
589  case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
590  case REP4(0x10): // adc...
591  case REP4(0x20): // and...
592  case REP4(0x30): // xor...
593  case REP4(0x08): // or...
594  case REP4(0x18): // sbb...
595  case REP4(0x28): // sub...
596  case 0xF7: // mull a
597  case 0x8D: // lea r, a
598  case 0x87: // xchg r, a
599  case REP4(0x38): // cmp...
600  case 0x85: // test r, a
601    debug_only(has_disp32 = true); // has both kinds of operands!
602    break;
603
604  case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
605  case 0xC6: // movb a, #8
606  case 0x80: // cmpb a, #8
607  case 0x6B: // imul r, a, #8
608    debug_only(has_disp32 = true); // has both kinds of operands!
609    tail_size = 1; // the imm8
610    break;
611
612  case 0xE8: // call rdisp32
613  case 0xE9: // jmp  rdisp32
614    if (which == end_pc_operand)  return ip + 4;
615    assert(which == call32_operand, "call has no disp32 or imm");
616    return ip;
617
618  case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
619  case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
620  case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
621  case 0xDD: // fld_d a; fst_d a; fstp_d a
622  case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
623  case 0xDF: // fild_d a; fistp_d a
624  case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
625  case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
626  case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
627    debug_only(has_disp32 = true);
628    break;
629
630  case 0xF0:                    // Lock
631    assert(os::is_MP(), "only on MP");
632    goto again_after_prefix;
633
634  case 0xF3:                    // For SSE
635  case 0xF2:                    // For SSE2
636    switch (0xFF & *ip++) {
637    case REX:
638    case REX_B:
639    case REX_X:
640    case REX_XB:
641    case REX_R:
642    case REX_RB:
643    case REX_RX:
644    case REX_RXB:
645    case REX_W:
646    case REX_WB:
647    case REX_WX:
648    case REX_WXB:
649    case REX_WR:
650    case REX_WRB:
651    case REX_WRX:
652    case REX_WRXB:
653      NOT_LP64(assert(false, "found 64bit prefix"));
654      ip++;
655    default:
656      ip++;
657    }
658    debug_only(has_disp32 = true); // has both kinds of operands!
659    break;
660
661  default:
662    ShouldNotReachHere();
663
664#undef REP8
665#undef REP16
666  }
667
668  assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
669#ifdef _LP64
670  assert(which != imm_operand, "instruction is not a movq reg, imm64");
671#else
672  // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
673  assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
674#endif // LP64
675  assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
676
677  // parse the output of emit_operand
678  int op2 = 0xFF & *ip++;
679  int base = op2 & 0x07;
680  int op3 = -1;
681  const int b100 = 4;
682  const int b101 = 5;
683  if (base == b100 && (op2 >> 6) != 3) {
684    op3 = 0xFF & *ip++;
685    base = op3 & 0x07;   // refetch the base
686  }
687  // now ip points at the disp (if any)
688
689  switch (op2 >> 6) {
690  case 0:
691    // [00 reg  100][ss index base]
692    // [00 reg  100][00   100  esp]
693    // [00 reg base]
694    // [00 reg  100][ss index  101][disp32]
695    // [00 reg  101]               [disp32]
696
697    if (base == b101) {
698      if (which == disp32_operand)
699        return ip;              // caller wants the disp32
700      ip += 4;                  // skip the disp32
701    }
702    break;
703
704  case 1:
705    // [01 reg  100][ss index base][disp8]
706    // [01 reg  100][00   100  esp][disp8]
707    // [01 reg base]               [disp8]
708    ip += 1;                    // skip the disp8
709    break;
710
711  case 2:
712    // [10 reg  100][ss index base][disp32]
713    // [10 reg  100][00   100  esp][disp32]
714    // [10 reg base]               [disp32]
715    if (which == disp32_operand)
716      return ip;                // caller wants the disp32
717    ip += 4;                    // skip the disp32
718    break;
719
720  case 3:
721    // [11 reg base]  (not a memory addressing mode)
722    break;
723  }
724
725  if (which == end_pc_operand) {
726    return ip + tail_size;
727  }
728
729#ifdef _LP64
730  assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
731#else
732  assert(which == imm_operand, "instruction has only an imm field");
733#endif // LP64
734  return ip;
735}
736
737address Assembler::locate_next_instruction(address inst) {
738  // Secretly share code with locate_operand:
739  return locate_operand(inst, end_pc_operand);
740}
741
742
743#ifdef ASSERT
744void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
745  address inst = inst_mark();
746  assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
747  address opnd;
748
749  Relocation* r = rspec.reloc();
750  if (r->type() == relocInfo::none) {
751    return;
752  } else if (r->is_call() || format == call32_operand) {
753    // assert(format == imm32_operand, "cannot specify a nonzero format");
754    opnd = locate_operand(inst, call32_operand);
755  } else if (r->is_data()) {
756    assert(format == imm_operand || format == disp32_operand
757           LP64_ONLY(|| format == narrow_oop_operand), "format ok");
758    opnd = locate_operand(inst, (WhichOperand)format);
759  } else {
760    assert(format == imm_operand, "cannot specify a format");
761    return;
762  }
763  assert(opnd == pc(), "must put operand where relocs can find it");
764}
765#endif // ASSERT
766
767void Assembler::emit_operand32(Register reg, Address adr) {
768  assert(reg->encoding() < 8, "no extended registers");
769  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
770  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
771               adr._rspec);
772}
773
774void Assembler::emit_operand(Register reg, Address adr,
775                             int rip_relative_correction) {
776  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
777               adr._rspec,
778               rip_relative_correction);
779}
780
781void Assembler::emit_operand(XMMRegister reg, Address adr) {
782  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
783               adr._rspec);
784}
785
786// MMX operations
787void Assembler::emit_operand(MMXRegister reg, Address adr) {
788  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
789  emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
790}
791
792// work around gcc (3.2.1-7a) bug
793void Assembler::emit_operand(Address adr, MMXRegister reg) {
794  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
795  emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
796}
797
798
799void Assembler::emit_farith(int b1, int b2, int i) {
800  assert(isByte(b1) && isByte(b2), "wrong opcode");
801  assert(0 <= i &&  i < 8, "illegal stack offset");
802  emit_byte(b1);
803  emit_byte(b2 + i);
804}
805
806
807// Now the Assembler instruction (identical for 32/64 bits)
808
809void Assembler::adcl(Register dst, int32_t imm32) {
810  prefix(dst);
811  emit_arith(0x81, 0xD0, dst, imm32);
812}
813
814void Assembler::adcl(Register dst, Address src) {
815  InstructionMark im(this);
816  prefix(src, dst);
817  emit_byte(0x13);
818  emit_operand(dst, src);
819}
820
821void Assembler::adcl(Register dst, Register src) {
822  (void) prefix_and_encode(dst->encoding(), src->encoding());
823  emit_arith(0x13, 0xC0, dst, src);
824}
825
826void Assembler::addl(Address dst, int32_t imm32) {
827  InstructionMark im(this);
828  prefix(dst);
829  emit_arith_operand(0x81, rax, dst, imm32);
830}
831
832void Assembler::addl(Address dst, Register src) {
833  InstructionMark im(this);
834  prefix(dst, src);
835  emit_byte(0x01);
836  emit_operand(src, dst);
837}
838
839void Assembler::addl(Register dst, int32_t imm32) {
840  prefix(dst);
841  emit_arith(0x81, 0xC0, dst, imm32);
842}
843
844void Assembler::addl(Register dst, Address src) {
845  InstructionMark im(this);
846  prefix(src, dst);
847  emit_byte(0x03);
848  emit_operand(dst, src);
849}
850
851void Assembler::addl(Register dst, Register src) {
852  (void) prefix_and_encode(dst->encoding(), src->encoding());
853  emit_arith(0x03, 0xC0, dst, src);
854}
855
856void Assembler::addr_nop_4() {
857  // 4 bytes: NOP DWORD PTR [EAX+0]
858  emit_byte(0x0F);
859  emit_byte(0x1F);
860  emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
861  emit_byte(0);    // 8-bits offset (1 byte)
862}
863
864void Assembler::addr_nop_5() {
865  // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
866  emit_byte(0x0F);
867  emit_byte(0x1F);
868  emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
869  emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
870  emit_byte(0);    // 8-bits offset (1 byte)
871}
872
873void Assembler::addr_nop_7() {
874  // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
875  emit_byte(0x0F);
876  emit_byte(0x1F);
877  emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
878  emit_long(0);    // 32-bits offset (4 bytes)
879}
880
881void Assembler::addr_nop_8() {
882  // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
883  emit_byte(0x0F);
884  emit_byte(0x1F);
885  emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
886  emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
887  emit_long(0);    // 32-bits offset (4 bytes)
888}
889
890void Assembler::addsd(XMMRegister dst, XMMRegister src) {
891  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
892  emit_byte(0xF2);
893  int encode = prefix_and_encode(dst->encoding(), src->encoding());
894  emit_byte(0x0F);
895  emit_byte(0x58);
896  emit_byte(0xC0 | encode);
897}
898
899void Assembler::addsd(XMMRegister dst, Address src) {
900  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
901  InstructionMark im(this);
902  emit_byte(0xF2);
903  prefix(src, dst);
904  emit_byte(0x0F);
905  emit_byte(0x58);
906  emit_operand(dst, src);
907}
908
909void Assembler::addss(XMMRegister dst, XMMRegister src) {
910  NOT_LP64(assert(VM_Version::supports_sse(), ""));
911  emit_byte(0xF3);
912  int encode = prefix_and_encode(dst->encoding(), src->encoding());
913  emit_byte(0x0F);
914  emit_byte(0x58);
915  emit_byte(0xC0 | encode);
916}
917
918void Assembler::addss(XMMRegister dst, Address src) {
919  NOT_LP64(assert(VM_Version::supports_sse(), ""));
920  InstructionMark im(this);
921  emit_byte(0xF3);
922  prefix(src, dst);
923  emit_byte(0x0F);
924  emit_byte(0x58);
925  emit_operand(dst, src);
926}
927
928void Assembler::andl(Register dst, int32_t imm32) {
929  prefix(dst);
930  emit_arith(0x81, 0xE0, dst, imm32);
931}
932
933void Assembler::andl(Register dst, Address src) {
934  InstructionMark im(this);
935  prefix(src, dst);
936  emit_byte(0x23);
937  emit_operand(dst, src);
938}
939
940void Assembler::andl(Register dst, Register src) {
941  (void) prefix_and_encode(dst->encoding(), src->encoding());
942  emit_arith(0x23, 0xC0, dst, src);
943}
944
945void Assembler::andpd(XMMRegister dst, Address src) {
946  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
947  InstructionMark im(this);
948  emit_byte(0x66);
949  prefix(src, dst);
950  emit_byte(0x0F);
951  emit_byte(0x54);
952  emit_operand(dst, src);
953}
954
955void Assembler::bsfl(Register dst, Register src) {
956  int encode = prefix_and_encode(dst->encoding(), src->encoding());
957  emit_byte(0x0F);
958  emit_byte(0xBC);
959  emit_byte(0xC0 | encode);
960}
961
962void Assembler::bsrl(Register dst, Register src) {
963  assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
964  int encode = prefix_and_encode(dst->encoding(), src->encoding());
965  emit_byte(0x0F);
966  emit_byte(0xBD);
967  emit_byte(0xC0 | encode);
968}
969
970void Assembler::bswapl(Register reg) { // bswap
971  int encode = prefix_and_encode(reg->encoding());
972  emit_byte(0x0F);
973  emit_byte(0xC8 | encode);
974}
975
976void Assembler::call(Label& L, relocInfo::relocType rtype) {
977  // suspect disp32 is always good
978  int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
979
980  if (L.is_bound()) {
981    const int long_size = 5;
982    int offs = (int)( target(L) - pc() );
983    assert(offs <= 0, "assembler error");
984    InstructionMark im(this);
985    // 1110 1000 #32-bit disp
986    emit_byte(0xE8);
987    emit_data(offs - long_size, rtype, operand);
988  } else {
989    InstructionMark im(this);
990    // 1110 1000 #32-bit disp
991    L.add_patch_at(code(), locator());
992
993    emit_byte(0xE8);
994    emit_data(int(0), rtype, operand);
995  }
996}
997
998void Assembler::call(Register dst) {
999  // This was originally using a 32bit register encoding
1000  // and surely we want 64bit!
1001  // this is a 32bit encoding but in 64bit mode the default
1002  // operand size is 64bit so there is no need for the
1003  // wide prefix. So prefix only happens if we use the
1004  // new registers. Much like push/pop.
1005  int x = offset();
1006  // this may be true but dbx disassembles it as if it
1007  // were 32bits...
1008  // int encode = prefix_and_encode(dst->encoding());
1009  // if (offset() != x) assert(dst->encoding() >= 8, "what?");
1010  int encode = prefixq_and_encode(dst->encoding());
1011
1012  emit_byte(0xFF);
1013  emit_byte(0xD0 | encode);
1014}
1015
1016
1017void Assembler::call(Address adr) {
1018  InstructionMark im(this);
1019  prefix(adr);
1020  emit_byte(0xFF);
1021  emit_operand(rdx, adr);
1022}
1023
1024void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1025  assert(entry != NULL, "call most probably wrong");
1026  InstructionMark im(this);
1027  emit_byte(0xE8);
1028  intptr_t disp = entry - (_code_pos + sizeof(int32_t));
1029  assert(is_simm32(disp), "must be 32bit offset (call2)");
1030  // Technically, should use call32_operand, but this format is
1031  // implied by the fact that we're emitting a call instruction.
1032
1033  int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1034  emit_data((int) disp, rspec, operand);
1035}
1036
1037void Assembler::cdql() {
1038  emit_byte(0x99);
1039}
1040
1041void Assembler::cmovl(Condition cc, Register dst, Register src) {
1042  NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1043  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1044  emit_byte(0x0F);
1045  emit_byte(0x40 | cc);
1046  emit_byte(0xC0 | encode);
1047}
1048
1049
1050void Assembler::cmovl(Condition cc, Register dst, Address src) {
1051  NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1052  prefix(src, dst);
1053  emit_byte(0x0F);
1054  emit_byte(0x40 | cc);
1055  emit_operand(dst, src);
1056}
1057
1058void Assembler::cmpb(Address dst, int imm8) {
1059  InstructionMark im(this);
1060  prefix(dst);
1061  emit_byte(0x80);
1062  emit_operand(rdi, dst, 1);
1063  emit_byte(imm8);
1064}
1065
1066void Assembler::cmpl(Address dst, int32_t imm32) {
1067  InstructionMark im(this);
1068  prefix(dst);
1069  emit_byte(0x81);
1070  emit_operand(rdi, dst, 4);
1071  emit_long(imm32);
1072}
1073
1074void Assembler::cmpl(Register dst, int32_t imm32) {
1075  prefix(dst);
1076  emit_arith(0x81, 0xF8, dst, imm32);
1077}
1078
1079void Assembler::cmpl(Register dst, Register src) {
1080  (void) prefix_and_encode(dst->encoding(), src->encoding());
1081  emit_arith(0x3B, 0xC0, dst, src);
1082}
1083
1084
1085void Assembler::cmpl(Register dst, Address  src) {
1086  InstructionMark im(this);
1087  prefix(src, dst);
1088  emit_byte(0x3B);
1089  emit_operand(dst, src);
1090}
1091
1092void Assembler::cmpw(Address dst, int imm16) {
1093  InstructionMark im(this);
1094  assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1095  emit_byte(0x66);
1096  emit_byte(0x81);
1097  emit_operand(rdi, dst, 2);
1098  emit_word(imm16);
1099}
1100
1101// The 32-bit cmpxchg compares the value at adr with the contents of rax,
1102// and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1103// The ZF is set if the compared values were equal, and cleared otherwise.
1104void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1105  if (Atomics & 2) {
1106     // caveat: no instructionmark, so this isn't relocatable.
1107     // Emit a synthetic, non-atomic, CAS equivalent.
1108     // Beware.  The synthetic form sets all ICCs, not just ZF.
1109     // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r)
1110     cmpl(rax, adr);
1111     movl(rax, adr);
1112     if (reg != rax) {
1113        Label L ;
1114        jcc(Assembler::notEqual, L);
1115        movl(adr, reg);
1116        bind(L);
1117     }
1118  } else {
1119     InstructionMark im(this);
1120     prefix(adr, reg);
1121     emit_byte(0x0F);
1122     emit_byte(0xB1);
1123     emit_operand(reg, adr);
1124  }
1125}
1126
1127void Assembler::comisd(XMMRegister dst, Address src) {
1128  // NOTE: dbx seems to decode this as comiss even though the
1129  // 0x66 is there. Strangly ucomisd comes out correct
1130  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1131  emit_byte(0x66);
1132  comiss(dst, src);
1133}
1134
1135void Assembler::comiss(XMMRegister dst, Address src) {
1136  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1137
1138  InstructionMark im(this);
1139  prefix(src, dst);
1140  emit_byte(0x0F);
1141  emit_byte(0x2F);
1142  emit_operand(dst, src);
1143}
1144
1145void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1146  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1147  emit_byte(0xF3);
1148  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1149  emit_byte(0x0F);
1150  emit_byte(0xE6);
1151  emit_byte(0xC0 | encode);
1152}
1153
1154void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1155  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1156  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1157  emit_byte(0x0F);
1158  emit_byte(0x5B);
1159  emit_byte(0xC0 | encode);
1160}
1161
1162void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1163  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1164  emit_byte(0xF2);
1165  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1166  emit_byte(0x0F);
1167  emit_byte(0x5A);
1168  emit_byte(0xC0 | encode);
1169}
1170
1171void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1172  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1173  emit_byte(0xF2);
1174  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1175  emit_byte(0x0F);
1176  emit_byte(0x2A);
1177  emit_byte(0xC0 | encode);
1178}
1179
1180void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1181  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1182  emit_byte(0xF3);
1183  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1184  emit_byte(0x0F);
1185  emit_byte(0x2A);
1186  emit_byte(0xC0 | encode);
1187}
1188
1189void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1190  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1191  emit_byte(0xF3);
1192  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1193  emit_byte(0x0F);
1194  emit_byte(0x5A);
1195  emit_byte(0xC0 | encode);
1196}
1197
1198void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1199  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1200  emit_byte(0xF2);
1201  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1202  emit_byte(0x0F);
1203  emit_byte(0x2C);
1204  emit_byte(0xC0 | encode);
1205}
1206
1207void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1208  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1209  emit_byte(0xF3);
1210  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1211  emit_byte(0x0F);
1212  emit_byte(0x2C);
1213  emit_byte(0xC0 | encode);
1214}
1215
1216void Assembler::decl(Address dst) {
1217  // Don't use it directly. Use MacroAssembler::decrement() instead.
1218  InstructionMark im(this);
1219  prefix(dst);
1220  emit_byte(0xFF);
1221  emit_operand(rcx, dst);
1222}
1223
1224void Assembler::divsd(XMMRegister dst, Address src) {
1225  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1226  InstructionMark im(this);
1227  emit_byte(0xF2);
1228  prefix(src, dst);
1229  emit_byte(0x0F);
1230  emit_byte(0x5E);
1231  emit_operand(dst, src);
1232}
1233
1234void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1235  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1236  emit_byte(0xF2);
1237  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1238  emit_byte(0x0F);
1239  emit_byte(0x5E);
1240  emit_byte(0xC0 | encode);
1241}
1242
1243void Assembler::divss(XMMRegister dst, Address src) {
1244  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1245  InstructionMark im(this);
1246  emit_byte(0xF3);
1247  prefix(src, dst);
1248  emit_byte(0x0F);
1249  emit_byte(0x5E);
1250  emit_operand(dst, src);
1251}
1252
1253void Assembler::divss(XMMRegister dst, XMMRegister src) {
1254  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1255  emit_byte(0xF3);
1256  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1257  emit_byte(0x0F);
1258  emit_byte(0x5E);
1259  emit_byte(0xC0 | encode);
1260}
1261
1262void Assembler::emms() {
1263  NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1264  emit_byte(0x0F);
1265  emit_byte(0x77);
1266}
1267
1268void Assembler::hlt() {
1269  emit_byte(0xF4);
1270}
1271
1272void Assembler::idivl(Register src) {
1273  int encode = prefix_and_encode(src->encoding());
1274  emit_byte(0xF7);
1275  emit_byte(0xF8 | encode);
1276}
1277
1278void Assembler::imull(Register dst, Register src) {
1279  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1280  emit_byte(0x0F);
1281  emit_byte(0xAF);
1282  emit_byte(0xC0 | encode);
1283}
1284
1285
1286void Assembler::imull(Register dst, Register src, int value) {
1287  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1288  if (is8bit(value)) {
1289    emit_byte(0x6B);
1290    emit_byte(0xC0 | encode);
1291    emit_byte(value);
1292  } else {
1293    emit_byte(0x69);
1294    emit_byte(0xC0 | encode);
1295    emit_long(value);
1296  }
1297}
1298
1299void Assembler::incl(Address dst) {
1300  // Don't use it directly. Use MacroAssembler::increment() instead.
1301  InstructionMark im(this);
1302  prefix(dst);
1303  emit_byte(0xFF);
1304  emit_operand(rax, dst);
1305}
1306
1307void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) {
1308  InstructionMark im(this);
1309  relocate(rtype);
1310  assert((0 <= cc) && (cc < 16), "illegal cc");
1311  if (L.is_bound()) {
1312    address dst = target(L);
1313    assert(dst != NULL, "jcc most probably wrong");
1314
1315    const int short_size = 2;
1316    const int long_size = 6;
1317    intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
1318    if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1319      // 0111 tttn #8-bit disp
1320      emit_byte(0x70 | cc);
1321      emit_byte((offs - short_size) & 0xFF);
1322    } else {
1323      // 0000 1111 1000 tttn #32-bit disp
1324      assert(is_simm32(offs - long_size),
1325             "must be 32bit offset (call4)");
1326      emit_byte(0x0F);
1327      emit_byte(0x80 | cc);
1328      emit_long(offs - long_size);
1329    }
1330  } else {
1331    // Note: could eliminate cond. jumps to this jump if condition
1332    //       is the same however, seems to be rather unlikely case.
1333    // Note: use jccb() if label to be bound is very close to get
1334    //       an 8-bit displacement
1335    L.add_patch_at(code(), locator());
1336    emit_byte(0x0F);
1337    emit_byte(0x80 | cc);
1338    emit_long(0);
1339  }
1340}
1341
1342void Assembler::jccb(Condition cc, Label& L) {
1343  if (L.is_bound()) {
1344    const int short_size = 2;
1345    address entry = target(L);
1346    assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
1347           "Dispacement too large for a short jmp");
1348    intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
1349    // 0111 tttn #8-bit disp
1350    emit_byte(0x70 | cc);
1351    emit_byte((offs - short_size) & 0xFF);
1352  } else {
1353    InstructionMark im(this);
1354    L.add_patch_at(code(), locator());
1355    emit_byte(0x70 | cc);
1356    emit_byte(0);
1357  }
1358}
1359
1360void Assembler::jmp(Address adr) {
1361  InstructionMark im(this);
1362  prefix(adr);
1363  emit_byte(0xFF);
1364  emit_operand(rsp, adr);
1365}
1366
1367void Assembler::jmp(Label& L, relocInfo::relocType rtype) {
1368  if (L.is_bound()) {
1369    address entry = target(L);
1370    assert(entry != NULL, "jmp most probably wrong");
1371    InstructionMark im(this);
1372    const int short_size = 2;
1373    const int long_size = 5;
1374    intptr_t offs = entry - _code_pos;
1375    if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1376      emit_byte(0xEB);
1377      emit_byte((offs - short_size) & 0xFF);
1378    } else {
1379      emit_byte(0xE9);
1380      emit_long(offs - long_size);
1381    }
1382  } else {
1383    // By default, forward jumps are always 32-bit displacements, since
1384    // we can't yet know where the label will be bound.  If you're sure that
1385    // the forward jump will not run beyond 256 bytes, use jmpb to
1386    // force an 8-bit displacement.
1387    InstructionMark im(this);
1388    relocate(rtype);
1389    L.add_patch_at(code(), locator());
1390    emit_byte(0xE9);
1391    emit_long(0);
1392  }
1393}
1394
1395void Assembler::jmp(Register entry) {
1396  int encode = prefix_and_encode(entry->encoding());
1397  emit_byte(0xFF);
1398  emit_byte(0xE0 | encode);
1399}
1400
1401void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1402  InstructionMark im(this);
1403  emit_byte(0xE9);
1404  assert(dest != NULL, "must have a target");
1405  intptr_t disp = dest - (_code_pos + sizeof(int32_t));
1406  assert(is_simm32(disp), "must be 32bit offset (jmp)");
1407  emit_data(disp, rspec.reloc(), call32_operand);
1408}
1409
1410void Assembler::jmpb(Label& L) {
1411  if (L.is_bound()) {
1412    const int short_size = 2;
1413    address entry = target(L);
1414    assert(is8bit((entry - _code_pos) + short_size),
1415           "Dispacement too large for a short jmp");
1416    assert(entry != NULL, "jmp most probably wrong");
1417    intptr_t offs = entry - _code_pos;
1418    emit_byte(0xEB);
1419    emit_byte((offs - short_size) & 0xFF);
1420  } else {
1421    InstructionMark im(this);
1422    L.add_patch_at(code(), locator());
1423    emit_byte(0xEB);
1424    emit_byte(0);
1425  }
1426}
1427
1428void Assembler::ldmxcsr( Address src) {
1429  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1430  InstructionMark im(this);
1431  prefix(src);
1432  emit_byte(0x0F);
1433  emit_byte(0xAE);
1434  emit_operand(as_Register(2), src);
1435}
1436
1437void Assembler::leal(Register dst, Address src) {
1438  InstructionMark im(this);
1439#ifdef _LP64
1440  emit_byte(0x67); // addr32
1441  prefix(src, dst);
1442#endif // LP64
1443  emit_byte(0x8D);
1444  emit_operand(dst, src);
1445}
1446
1447void Assembler::lock() {
1448  if (Atomics & 1) {
1449     // Emit either nothing, a NOP, or a NOP: prefix
1450     emit_byte(0x90) ;
1451  } else {
1452     emit_byte(0xF0);
1453  }
1454}
1455
1456void Assembler::lzcntl(Register dst, Register src) {
1457  assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
1458  emit_byte(0xF3);
1459  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1460  emit_byte(0x0F);
1461  emit_byte(0xBD);
1462  emit_byte(0xC0 | encode);
1463}
1464
1465// Emit mfence instruction
1466void Assembler::mfence() {
1467  NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1468  emit_byte( 0x0F );
1469  emit_byte( 0xAE );
1470  emit_byte( 0xF0 );
1471}
1472
1473void Assembler::mov(Register dst, Register src) {
1474  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1475}
1476
1477void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1478  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1479  int dstenc = dst->encoding();
1480  int srcenc = src->encoding();
1481  emit_byte(0x66);
1482  if (dstenc < 8) {
1483    if (srcenc >= 8) {
1484      prefix(REX_B);
1485      srcenc -= 8;
1486    }
1487  } else {
1488    if (srcenc < 8) {
1489      prefix(REX_R);
1490    } else {
1491      prefix(REX_RB);
1492      srcenc -= 8;
1493    }
1494    dstenc -= 8;
1495  }
1496  emit_byte(0x0F);
1497  emit_byte(0x28);
1498  emit_byte(0xC0 | dstenc << 3 | srcenc);
1499}
1500
1501void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1502  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1503  int dstenc = dst->encoding();
1504  int srcenc = src->encoding();
1505  if (dstenc < 8) {
1506    if (srcenc >= 8) {
1507      prefix(REX_B);
1508      srcenc -= 8;
1509    }
1510  } else {
1511    if (srcenc < 8) {
1512      prefix(REX_R);
1513    } else {
1514      prefix(REX_RB);
1515      srcenc -= 8;
1516    }
1517    dstenc -= 8;
1518  }
1519  emit_byte(0x0F);
1520  emit_byte(0x28);
1521  emit_byte(0xC0 | dstenc << 3 | srcenc);
1522}
1523
1524void Assembler::movb(Register dst, Address src) {
1525  NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1526  InstructionMark im(this);
1527  prefix(src, dst, true);
1528  emit_byte(0x8A);
1529  emit_operand(dst, src);
1530}
1531
1532
1533void Assembler::movb(Address dst, int imm8) {
1534  InstructionMark im(this);
1535   prefix(dst);
1536  emit_byte(0xC6);
1537  emit_operand(rax, dst, 1);
1538  emit_byte(imm8);
1539}
1540
1541
1542void Assembler::movb(Address dst, Register src) {
1543  assert(src->has_byte_register(), "must have byte register");
1544  InstructionMark im(this);
1545  prefix(dst, src, true);
1546  emit_byte(0x88);
1547  emit_operand(src, dst);
1548}
1549
1550void Assembler::movdl(XMMRegister dst, Register src) {
1551  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1552  emit_byte(0x66);
1553  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1554  emit_byte(0x0F);
1555  emit_byte(0x6E);
1556  emit_byte(0xC0 | encode);
1557}
1558
1559void Assembler::movdl(Register dst, XMMRegister src) {
1560  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1561  emit_byte(0x66);
1562  // swap src/dst to get correct prefix
1563  int encode = prefix_and_encode(src->encoding(), dst->encoding());
1564  emit_byte(0x0F);
1565  emit_byte(0x7E);
1566  emit_byte(0xC0 | encode);
1567}
1568
1569void Assembler::movdqa(XMMRegister dst, Address src) {
1570  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1571  InstructionMark im(this);
1572  emit_byte(0x66);
1573  prefix(src, dst);
1574  emit_byte(0x0F);
1575  emit_byte(0x6F);
1576  emit_operand(dst, src);
1577}
1578
1579void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1580  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1581  emit_byte(0x66);
1582  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1583  emit_byte(0x0F);
1584  emit_byte(0x6F);
1585  emit_byte(0xC0 | encode);
1586}
1587
1588void Assembler::movdqa(Address dst, XMMRegister src) {
1589  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1590  InstructionMark im(this);
1591  emit_byte(0x66);
1592  prefix(dst, src);
1593  emit_byte(0x0F);
1594  emit_byte(0x7F);
1595  emit_operand(src, dst);
1596}
1597
1598void Assembler::movdqu(XMMRegister dst, Address src) {
1599  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1600  InstructionMark im(this);
1601  emit_byte(0xF3);
1602  prefix(src, dst);
1603  emit_byte(0x0F);
1604  emit_byte(0x6F);
1605  emit_operand(dst, src);
1606}
1607
1608void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1609  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1610  emit_byte(0xF3);
1611  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1612  emit_byte(0x0F);
1613  emit_byte(0x6F);
1614  emit_byte(0xC0 | encode);
1615}
1616
1617void Assembler::movdqu(Address dst, XMMRegister src) {
1618  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1619  InstructionMark im(this);
1620  emit_byte(0xF3);
1621  prefix(dst, src);
1622  emit_byte(0x0F);
1623  emit_byte(0x7F);
1624  emit_operand(src, dst);
1625}
1626
1627// Uses zero extension on 64bit
1628
1629void Assembler::movl(Register dst, int32_t imm32) {
1630  int encode = prefix_and_encode(dst->encoding());
1631  emit_byte(0xB8 | encode);
1632  emit_long(imm32);
1633}
1634
1635void Assembler::movl(Register dst, Register src) {
1636  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1637  emit_byte(0x8B);
1638  emit_byte(0xC0 | encode);
1639}
1640
1641void Assembler::movl(Register dst, Address src) {
1642  InstructionMark im(this);
1643  prefix(src, dst);
1644  emit_byte(0x8B);
1645  emit_operand(dst, src);
1646}
1647
1648void Assembler::movl(Address dst, int32_t imm32) {
1649  InstructionMark im(this);
1650  prefix(dst);
1651  emit_byte(0xC7);
1652  emit_operand(rax, dst, 4);
1653  emit_long(imm32);
1654}
1655
1656void Assembler::movl(Address dst, Register src) {
1657  InstructionMark im(this);
1658  prefix(dst, src);
1659  emit_byte(0x89);
1660  emit_operand(src, dst);
1661}
1662
1663// New cpus require to use movsd and movss to avoid partial register stall
1664// when loading from memory. But for old Opteron use movlpd instead of movsd.
1665// The selection is done in MacroAssembler::movdbl() and movflt().
1666void Assembler::movlpd(XMMRegister dst, Address src) {
1667  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1668  InstructionMark im(this);
1669  emit_byte(0x66);
1670  prefix(src, dst);
1671  emit_byte(0x0F);
1672  emit_byte(0x12);
1673  emit_operand(dst, src);
1674}
1675
1676void Assembler::movq( MMXRegister dst, Address src ) {
1677  assert( VM_Version::supports_mmx(), "" );
1678  emit_byte(0x0F);
1679  emit_byte(0x6F);
1680  emit_operand(dst, src);
1681}
1682
1683void Assembler::movq( Address dst, MMXRegister src ) {
1684  assert( VM_Version::supports_mmx(), "" );
1685  emit_byte(0x0F);
1686  emit_byte(0x7F);
1687  // workaround gcc (3.2.1-7a) bug
1688  // In that version of gcc with only an emit_operand(MMX, Address)
1689  // gcc will tail jump and try and reverse the parameters completely
1690  // obliterating dst in the process. By having a version available
1691  // that doesn't need to swap the args at the tail jump the bug is
1692  // avoided.
1693  emit_operand(dst, src);
1694}
1695
1696void Assembler::movq(XMMRegister dst, Address src) {
1697  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1698  InstructionMark im(this);
1699  emit_byte(0xF3);
1700  prefix(src, dst);
1701  emit_byte(0x0F);
1702  emit_byte(0x7E);
1703  emit_operand(dst, src);
1704}
1705
1706void Assembler::movq(Address dst, XMMRegister src) {
1707  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1708  InstructionMark im(this);
1709  emit_byte(0x66);
1710  prefix(dst, src);
1711  emit_byte(0x0F);
1712  emit_byte(0xD6);
1713  emit_operand(src, dst);
1714}
1715
1716void Assembler::movsbl(Register dst, Address src) { // movsxb
1717  InstructionMark im(this);
1718  prefix(src, dst);
1719  emit_byte(0x0F);
1720  emit_byte(0xBE);
1721  emit_operand(dst, src);
1722}
1723
1724void Assembler::movsbl(Register dst, Register src) { // movsxb
1725  NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1726  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1727  emit_byte(0x0F);
1728  emit_byte(0xBE);
1729  emit_byte(0xC0 | encode);
1730}
1731
1732void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1733  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1734  emit_byte(0xF2);
1735  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1736  emit_byte(0x0F);
1737  emit_byte(0x10);
1738  emit_byte(0xC0 | encode);
1739}
1740
1741void Assembler::movsd(XMMRegister dst, Address src) {
1742  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1743  InstructionMark im(this);
1744  emit_byte(0xF2);
1745  prefix(src, dst);
1746  emit_byte(0x0F);
1747  emit_byte(0x10);
1748  emit_operand(dst, src);
1749}
1750
1751void Assembler::movsd(Address dst, XMMRegister src) {
1752  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1753  InstructionMark im(this);
1754  emit_byte(0xF2);
1755  prefix(dst, src);
1756  emit_byte(0x0F);
1757  emit_byte(0x11);
1758  emit_operand(src, dst);
1759}
1760
1761void Assembler::movss(XMMRegister dst, XMMRegister src) {
1762  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1763  emit_byte(0xF3);
1764  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1765  emit_byte(0x0F);
1766  emit_byte(0x10);
1767  emit_byte(0xC0 | encode);
1768}
1769
1770void Assembler::movss(XMMRegister dst, Address src) {
1771  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1772  InstructionMark im(this);
1773  emit_byte(0xF3);
1774  prefix(src, dst);
1775  emit_byte(0x0F);
1776  emit_byte(0x10);
1777  emit_operand(dst, src);
1778}
1779
1780void Assembler::movss(Address dst, XMMRegister src) {
1781  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1782  InstructionMark im(this);
1783  emit_byte(0xF3);
1784  prefix(dst, src);
1785  emit_byte(0x0F);
1786  emit_byte(0x11);
1787  emit_operand(src, dst);
1788}
1789
1790void Assembler::movswl(Register dst, Address src) { // movsxw
1791  InstructionMark im(this);
1792  prefix(src, dst);
1793  emit_byte(0x0F);
1794  emit_byte(0xBF);
1795  emit_operand(dst, src);
1796}
1797
1798void Assembler::movswl(Register dst, Register src) { // movsxw
1799  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1800  emit_byte(0x0F);
1801  emit_byte(0xBF);
1802  emit_byte(0xC0 | encode);
1803}
1804
1805void Assembler::movw(Address dst, int imm16) {
1806  InstructionMark im(this);
1807
1808  emit_byte(0x66); // switch to 16-bit mode
1809  prefix(dst);
1810  emit_byte(0xC7);
1811  emit_operand(rax, dst, 2);
1812  emit_word(imm16);
1813}
1814
1815void Assembler::movw(Register dst, Address src) {
1816  InstructionMark im(this);
1817  emit_byte(0x66);
1818  prefix(src, dst);
1819  emit_byte(0x8B);
1820  emit_operand(dst, src);
1821}
1822
1823void Assembler::movw(Address dst, Register src) {
1824  InstructionMark im(this);
1825  emit_byte(0x66);
1826  prefix(dst, src);
1827  emit_byte(0x89);
1828  emit_operand(src, dst);
1829}
1830
1831void Assembler::movzbl(Register dst, Address src) { // movzxb
1832  InstructionMark im(this);
1833  prefix(src, dst);
1834  emit_byte(0x0F);
1835  emit_byte(0xB6);
1836  emit_operand(dst, src);
1837}
1838
1839void Assembler::movzbl(Register dst, Register src) { // movzxb
1840  NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1841  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1842  emit_byte(0x0F);
1843  emit_byte(0xB6);
1844  emit_byte(0xC0 | encode);
1845}
1846
1847void Assembler::movzwl(Register dst, Address src) { // movzxw
1848  InstructionMark im(this);
1849  prefix(src, dst);
1850  emit_byte(0x0F);
1851  emit_byte(0xB7);
1852  emit_operand(dst, src);
1853}
1854
1855void Assembler::movzwl(Register dst, Register src) { // movzxw
1856  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1857  emit_byte(0x0F);
1858  emit_byte(0xB7);
1859  emit_byte(0xC0 | encode);
1860}
1861
1862void Assembler::mull(Address src) {
1863  InstructionMark im(this);
1864  prefix(src);
1865  emit_byte(0xF7);
1866  emit_operand(rsp, src);
1867}
1868
1869void Assembler::mull(Register src) {
1870  int encode = prefix_and_encode(src->encoding());
1871  emit_byte(0xF7);
1872  emit_byte(0xE0 | encode);
1873}
1874
1875void Assembler::mulsd(XMMRegister dst, Address src) {
1876  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1877  InstructionMark im(this);
1878  emit_byte(0xF2);
1879  prefix(src, dst);
1880  emit_byte(0x0F);
1881  emit_byte(0x59);
1882  emit_operand(dst, src);
1883}
1884
1885void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
1886  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1887  emit_byte(0xF2);
1888  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1889  emit_byte(0x0F);
1890  emit_byte(0x59);
1891  emit_byte(0xC0 | encode);
1892}
1893
1894void Assembler::mulss(XMMRegister dst, Address src) {
1895  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1896  InstructionMark im(this);
1897  emit_byte(0xF3);
1898  prefix(src, dst);
1899  emit_byte(0x0F);
1900  emit_byte(0x59);
1901  emit_operand(dst, src);
1902}
1903
1904void Assembler::mulss(XMMRegister dst, XMMRegister src) {
1905  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1906  emit_byte(0xF3);
1907  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1908  emit_byte(0x0F);
1909  emit_byte(0x59);
1910  emit_byte(0xC0 | encode);
1911}
1912
1913void Assembler::negl(Register dst) {
1914  int encode = prefix_and_encode(dst->encoding());
1915  emit_byte(0xF7);
1916  emit_byte(0xD8 | encode);
1917}
1918
1919void Assembler::nop(int i) {
1920#ifdef ASSERT
1921  assert(i > 0, " ");
1922  // The fancy nops aren't currently recognized by debuggers making it a
1923  // pain to disassemble code while debugging. If asserts are on clearly
1924  // speed is not an issue so simply use the single byte traditional nop
1925  // to do alignment.
1926
1927  for (; i > 0 ; i--) emit_byte(0x90);
1928  return;
1929
1930#endif // ASSERT
1931
1932  if (UseAddressNop && VM_Version::is_intel()) {
1933    //
1934    // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
1935    //  1: 0x90
1936    //  2: 0x66 0x90
1937    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1938    //  4: 0x0F 0x1F 0x40 0x00
1939    //  5: 0x0F 0x1F 0x44 0x00 0x00
1940    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1941    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1942    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1943    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1944    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1945    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1946
1947    // The rest coding is Intel specific - don't use consecutive address nops
1948
1949    // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1950    // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1951    // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1952    // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1953
1954    while(i >= 15) {
1955      // For Intel don't generate consecutive addess nops (mix with regular nops)
1956      i -= 15;
1957      emit_byte(0x66);   // size prefix
1958      emit_byte(0x66);   // size prefix
1959      emit_byte(0x66);   // size prefix
1960      addr_nop_8();
1961      emit_byte(0x66);   // size prefix
1962      emit_byte(0x66);   // size prefix
1963      emit_byte(0x66);   // size prefix
1964      emit_byte(0x90);   // nop
1965    }
1966    switch (i) {
1967      case 14:
1968        emit_byte(0x66); // size prefix
1969      case 13:
1970        emit_byte(0x66); // size prefix
1971      case 12:
1972        addr_nop_8();
1973        emit_byte(0x66); // size prefix
1974        emit_byte(0x66); // size prefix
1975        emit_byte(0x66); // size prefix
1976        emit_byte(0x90); // nop
1977        break;
1978      case 11:
1979        emit_byte(0x66); // size prefix
1980      case 10:
1981        emit_byte(0x66); // size prefix
1982      case 9:
1983        emit_byte(0x66); // size prefix
1984      case 8:
1985        addr_nop_8();
1986        break;
1987      case 7:
1988        addr_nop_7();
1989        break;
1990      case 6:
1991        emit_byte(0x66); // size prefix
1992      case 5:
1993        addr_nop_5();
1994        break;
1995      case 4:
1996        addr_nop_4();
1997        break;
1998      case 3:
1999        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2000        emit_byte(0x66); // size prefix
2001      case 2:
2002        emit_byte(0x66); // size prefix
2003      case 1:
2004        emit_byte(0x90); // nop
2005        break;
2006      default:
2007        assert(i == 0, " ");
2008    }
2009    return;
2010  }
2011  if (UseAddressNop && VM_Version::is_amd()) {
2012    //
2013    // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2014    //  1: 0x90
2015    //  2: 0x66 0x90
2016    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2017    //  4: 0x0F 0x1F 0x40 0x00
2018    //  5: 0x0F 0x1F 0x44 0x00 0x00
2019    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2020    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2021    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2022    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2023    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2024    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2025
2026    // The rest coding is AMD specific - use consecutive address nops
2027
2028    // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2029    // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2030    // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2031    // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2032    // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2033    //     Size prefixes (0x66) are added for larger sizes
2034
2035    while(i >= 22) {
2036      i -= 11;
2037      emit_byte(0x66); // size prefix
2038      emit_byte(0x66); // size prefix
2039      emit_byte(0x66); // size prefix
2040      addr_nop_8();
2041    }
2042    // Generate first nop for size between 21-12
2043    switch (i) {
2044      case 21:
2045        i -= 1;
2046        emit_byte(0x66); // size prefix
2047      case 20:
2048      case 19:
2049        i -= 1;
2050        emit_byte(0x66); // size prefix
2051      case 18:
2052      case 17:
2053        i -= 1;
2054        emit_byte(0x66); // size prefix
2055      case 16:
2056      case 15:
2057        i -= 8;
2058        addr_nop_8();
2059        break;
2060      case 14:
2061      case 13:
2062        i -= 7;
2063        addr_nop_7();
2064        break;
2065      case 12:
2066        i -= 6;
2067        emit_byte(0x66); // size prefix
2068        addr_nop_5();
2069        break;
2070      default:
2071        assert(i < 12, " ");
2072    }
2073
2074    // Generate second nop for size between 11-1
2075    switch (i) {
2076      case 11:
2077        emit_byte(0x66); // size prefix
2078      case 10:
2079        emit_byte(0x66); // size prefix
2080      case 9:
2081        emit_byte(0x66); // size prefix
2082      case 8:
2083        addr_nop_8();
2084        break;
2085      case 7:
2086        addr_nop_7();
2087        break;
2088      case 6:
2089        emit_byte(0x66); // size prefix
2090      case 5:
2091        addr_nop_5();
2092        break;
2093      case 4:
2094        addr_nop_4();
2095        break;
2096      case 3:
2097        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2098        emit_byte(0x66); // size prefix
2099      case 2:
2100        emit_byte(0x66); // size prefix
2101      case 1:
2102        emit_byte(0x90); // nop
2103        break;
2104      default:
2105        assert(i == 0, " ");
2106    }
2107    return;
2108  }
2109
2110  // Using nops with size prefixes "0x66 0x90".
2111  // From AMD Optimization Guide:
2112  //  1: 0x90
2113  //  2: 0x66 0x90
2114  //  3: 0x66 0x66 0x90
2115  //  4: 0x66 0x66 0x66 0x90
2116  //  5: 0x66 0x66 0x90 0x66 0x90
2117  //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2118  //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2119  //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2120  //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2121  // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2122  //
2123  while(i > 12) {
2124    i -= 4;
2125    emit_byte(0x66); // size prefix
2126    emit_byte(0x66);
2127    emit_byte(0x66);
2128    emit_byte(0x90); // nop
2129  }
2130  // 1 - 12 nops
2131  if(i > 8) {
2132    if(i > 9) {
2133      i -= 1;
2134      emit_byte(0x66);
2135    }
2136    i -= 3;
2137    emit_byte(0x66);
2138    emit_byte(0x66);
2139    emit_byte(0x90);
2140  }
2141  // 1 - 8 nops
2142  if(i > 4) {
2143    if(i > 6) {
2144      i -= 1;
2145      emit_byte(0x66);
2146    }
2147    i -= 3;
2148    emit_byte(0x66);
2149    emit_byte(0x66);
2150    emit_byte(0x90);
2151  }
2152  switch (i) {
2153    case 4:
2154      emit_byte(0x66);
2155    case 3:
2156      emit_byte(0x66);
2157    case 2:
2158      emit_byte(0x66);
2159    case 1:
2160      emit_byte(0x90);
2161      break;
2162    default:
2163      assert(i == 0, " ");
2164  }
2165}
2166
2167void Assembler::notl(Register dst) {
2168  int encode = prefix_and_encode(dst->encoding());
2169  emit_byte(0xF7);
2170  emit_byte(0xD0 | encode );
2171}
2172
2173void Assembler::orl(Address dst, int32_t imm32) {
2174  InstructionMark im(this);
2175  prefix(dst);
2176  emit_byte(0x81);
2177  emit_operand(rcx, dst, 4);
2178  emit_long(imm32);
2179}
2180
2181void Assembler::orl(Register dst, int32_t imm32) {
2182  prefix(dst);
2183  emit_arith(0x81, 0xC8, dst, imm32);
2184}
2185
2186
2187void Assembler::orl(Register dst, Address src) {
2188  InstructionMark im(this);
2189  prefix(src, dst);
2190  emit_byte(0x0B);
2191  emit_operand(dst, src);
2192}
2193
2194
2195void Assembler::orl(Register dst, Register src) {
2196  (void) prefix_and_encode(dst->encoding(), src->encoding());
2197  emit_arith(0x0B, 0xC0, dst, src);
2198}
2199
2200void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2201  assert(VM_Version::supports_sse4_2(), "");
2202
2203  InstructionMark im(this);
2204  emit_byte(0x66);
2205  prefix(src, dst);
2206  emit_byte(0x0F);
2207  emit_byte(0x3A);
2208  emit_byte(0x61);
2209  emit_operand(dst, src);
2210  emit_byte(imm8);
2211}
2212
2213void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2214  assert(VM_Version::supports_sse4_2(), "");
2215
2216  emit_byte(0x66);
2217  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2218  emit_byte(0x0F);
2219  emit_byte(0x3A);
2220  emit_byte(0x61);
2221  emit_byte(0xC0 | encode);
2222  emit_byte(imm8);
2223}
2224
2225// generic
2226void Assembler::pop(Register dst) {
2227  int encode = prefix_and_encode(dst->encoding());
2228  emit_byte(0x58 | encode);
2229}
2230
2231void Assembler::popcntl(Register dst, Address src) {
2232  assert(VM_Version::supports_popcnt(), "must support");
2233  InstructionMark im(this);
2234  emit_byte(0xF3);
2235  prefix(src, dst);
2236  emit_byte(0x0F);
2237  emit_byte(0xB8);
2238  emit_operand(dst, src);
2239}
2240
2241void Assembler::popcntl(Register dst, Register src) {
2242  assert(VM_Version::supports_popcnt(), "must support");
2243  emit_byte(0xF3);
2244  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2245  emit_byte(0x0F);
2246  emit_byte(0xB8);
2247  emit_byte(0xC0 | encode);
2248}
2249
2250void Assembler::popf() {
2251  emit_byte(0x9D);
2252}
2253
2254#ifndef _LP64 // no 32bit push/pop on amd64
2255void Assembler::popl(Address dst) {
2256  // NOTE: this will adjust stack by 8byte on 64bits
2257  InstructionMark im(this);
2258  prefix(dst);
2259  emit_byte(0x8F);
2260  emit_operand(rax, dst);
2261}
2262#endif
2263
2264void Assembler::prefetch_prefix(Address src) {
2265  prefix(src);
2266  emit_byte(0x0F);
2267}
2268
2269void Assembler::prefetchnta(Address src) {
2270  NOT_LP64(assert(VM_Version::supports_sse2(), "must support"));
2271  InstructionMark im(this);
2272  prefetch_prefix(src);
2273  emit_byte(0x18);
2274  emit_operand(rax, src); // 0, src
2275}
2276
2277void Assembler::prefetchr(Address src) {
2278  NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2279  InstructionMark im(this);
2280  prefetch_prefix(src);
2281  emit_byte(0x0D);
2282  emit_operand(rax, src); // 0, src
2283}
2284
2285void Assembler::prefetcht0(Address src) {
2286  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2287  InstructionMark im(this);
2288  prefetch_prefix(src);
2289  emit_byte(0x18);
2290  emit_operand(rcx, src); // 1, src
2291}
2292
2293void Assembler::prefetcht1(Address src) {
2294  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2295  InstructionMark im(this);
2296  prefetch_prefix(src);
2297  emit_byte(0x18);
2298  emit_operand(rdx, src); // 2, src
2299}
2300
2301void Assembler::prefetcht2(Address src) {
2302  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2303  InstructionMark im(this);
2304  prefetch_prefix(src);
2305  emit_byte(0x18);
2306  emit_operand(rbx, src); // 3, src
2307}
2308
2309void Assembler::prefetchw(Address src) {
2310  NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2311  InstructionMark im(this);
2312  prefetch_prefix(src);
2313  emit_byte(0x0D);
2314  emit_operand(rcx, src); // 1, src
2315}
2316
2317void Assembler::prefix(Prefix p) {
2318  a_byte(p);
2319}
2320
2321void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2322  assert(isByte(mode), "invalid value");
2323  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2324
2325  emit_byte(0x66);
2326  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2327  emit_byte(0x0F);
2328  emit_byte(0x70);
2329  emit_byte(0xC0 | encode);
2330  emit_byte(mode & 0xFF);
2331
2332}
2333
2334void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2335  assert(isByte(mode), "invalid value");
2336  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2337
2338  InstructionMark im(this);
2339  emit_byte(0x66);
2340  prefix(src, dst);
2341  emit_byte(0x0F);
2342  emit_byte(0x70);
2343  emit_operand(dst, src);
2344  emit_byte(mode & 0xFF);
2345}
2346
2347void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2348  assert(isByte(mode), "invalid value");
2349  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2350
2351  emit_byte(0xF2);
2352  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2353  emit_byte(0x0F);
2354  emit_byte(0x70);
2355  emit_byte(0xC0 | encode);
2356  emit_byte(mode & 0xFF);
2357}
2358
2359void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2360  assert(isByte(mode), "invalid value");
2361  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2362
2363  InstructionMark im(this);
2364  emit_byte(0xF2);
2365  prefix(src, dst); // QQ new
2366  emit_byte(0x0F);
2367  emit_byte(0x70);
2368  emit_operand(dst, src);
2369  emit_byte(mode & 0xFF);
2370}
2371
2372void Assembler::psrlq(XMMRegister dst, int shift) {
2373  // HMM Table D-1 says sse2 or mmx
2374  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2375
2376  int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
2377  emit_byte(0x66);
2378  emit_byte(0x0F);
2379  emit_byte(0x73);
2380  emit_byte(0xC0 | encode);
2381  emit_byte(shift);
2382}
2383
2384void Assembler::ptest(XMMRegister dst, Address src) {
2385  assert(VM_Version::supports_sse4_1(), "");
2386
2387  InstructionMark im(this);
2388  emit_byte(0x66);
2389  prefix(src, dst);
2390  emit_byte(0x0F);
2391  emit_byte(0x38);
2392  emit_byte(0x17);
2393  emit_operand(dst, src);
2394}
2395
2396void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2397  assert(VM_Version::supports_sse4_1(), "");
2398
2399  emit_byte(0x66);
2400  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2401  emit_byte(0x0F);
2402  emit_byte(0x38);
2403  emit_byte(0x17);
2404  emit_byte(0xC0 | encode);
2405}
2406
2407void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2408  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2409  emit_byte(0x66);
2410  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2411  emit_byte(0x0F);
2412  emit_byte(0x60);
2413  emit_byte(0xC0 | encode);
2414}
2415
2416void Assembler::push(int32_t imm32) {
2417  // in 64bits we push 64bits onto the stack but only
2418  // take a 32bit immediate
2419  emit_byte(0x68);
2420  emit_long(imm32);
2421}
2422
2423void Assembler::push(Register src) {
2424  int encode = prefix_and_encode(src->encoding());
2425
2426  emit_byte(0x50 | encode);
2427}
2428
2429void Assembler::pushf() {
2430  emit_byte(0x9C);
2431}
2432
2433#ifndef _LP64 // no 32bit push/pop on amd64
2434void Assembler::pushl(Address src) {
2435  // Note this will push 64bit on 64bit
2436  InstructionMark im(this);
2437  prefix(src);
2438  emit_byte(0xFF);
2439  emit_operand(rsi, src);
2440}
2441#endif
2442
2443void Assembler::pxor(XMMRegister dst, Address src) {
2444  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2445  InstructionMark im(this);
2446  emit_byte(0x66);
2447  prefix(src, dst);
2448  emit_byte(0x0F);
2449  emit_byte(0xEF);
2450  emit_operand(dst, src);
2451}
2452
2453void Assembler::pxor(XMMRegister dst, XMMRegister src) {
2454  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2455  InstructionMark im(this);
2456  emit_byte(0x66);
2457  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2458  emit_byte(0x0F);
2459  emit_byte(0xEF);
2460  emit_byte(0xC0 | encode);
2461}
2462
2463void Assembler::rcll(Register dst, int imm8) {
2464  assert(isShiftCount(imm8), "illegal shift count");
2465  int encode = prefix_and_encode(dst->encoding());
2466  if (imm8 == 1) {
2467    emit_byte(0xD1);
2468    emit_byte(0xD0 | encode);
2469  } else {
2470    emit_byte(0xC1);
2471    emit_byte(0xD0 | encode);
2472    emit_byte(imm8);
2473  }
2474}
2475
2476// copies data from [esi] to [edi] using rcx pointer sized words
2477// generic
2478void Assembler::rep_mov() {
2479  emit_byte(0xF3);
2480  // MOVSQ
2481  LP64_ONLY(prefix(REX_W));
2482  emit_byte(0xA5);
2483}
2484
2485// sets rcx pointer sized words with rax, value at [edi]
2486// generic
2487void Assembler::rep_set() { // rep_set
2488  emit_byte(0xF3);
2489  // STOSQ
2490  LP64_ONLY(prefix(REX_W));
2491  emit_byte(0xAB);
2492}
2493
2494// scans rcx pointer sized words at [edi] for occurance of rax,
2495// generic
2496void Assembler::repne_scan() { // repne_scan
2497  emit_byte(0xF2);
2498  // SCASQ
2499  LP64_ONLY(prefix(REX_W));
2500  emit_byte(0xAF);
2501}
2502
2503#ifdef _LP64
2504// scans rcx 4 byte words at [edi] for occurance of rax,
2505// generic
2506void Assembler::repne_scanl() { // repne_scan
2507  emit_byte(0xF2);
2508  // SCASL
2509  emit_byte(0xAF);
2510}
2511#endif
2512
2513void Assembler::ret(int imm16) {
2514  if (imm16 == 0) {
2515    emit_byte(0xC3);
2516  } else {
2517    emit_byte(0xC2);
2518    emit_word(imm16);
2519  }
2520}
2521
2522void Assembler::sahf() {
2523#ifdef _LP64
2524  // Not supported in 64bit mode
2525  ShouldNotReachHere();
2526#endif
2527  emit_byte(0x9E);
2528}
2529
2530void Assembler::sarl(Register dst, int imm8) {
2531  int encode = prefix_and_encode(dst->encoding());
2532  assert(isShiftCount(imm8), "illegal shift count");
2533  if (imm8 == 1) {
2534    emit_byte(0xD1);
2535    emit_byte(0xF8 | encode);
2536  } else {
2537    emit_byte(0xC1);
2538    emit_byte(0xF8 | encode);
2539    emit_byte(imm8);
2540  }
2541}
2542
2543void Assembler::sarl(Register dst) {
2544  int encode = prefix_and_encode(dst->encoding());
2545  emit_byte(0xD3);
2546  emit_byte(0xF8 | encode);
2547}
2548
2549void Assembler::sbbl(Address dst, int32_t imm32) {
2550  InstructionMark im(this);
2551  prefix(dst);
2552  emit_arith_operand(0x81, rbx, dst, imm32);
2553}
2554
2555void Assembler::sbbl(Register dst, int32_t imm32) {
2556  prefix(dst);
2557  emit_arith(0x81, 0xD8, dst, imm32);
2558}
2559
2560
2561void Assembler::sbbl(Register dst, Address src) {
2562  InstructionMark im(this);
2563  prefix(src, dst);
2564  emit_byte(0x1B);
2565  emit_operand(dst, src);
2566}
2567
2568void Assembler::sbbl(Register dst, Register src) {
2569  (void) prefix_and_encode(dst->encoding(), src->encoding());
2570  emit_arith(0x1B, 0xC0, dst, src);
2571}
2572
2573void Assembler::setb(Condition cc, Register dst) {
2574  assert(0 <= cc && cc < 16, "illegal cc");
2575  int encode = prefix_and_encode(dst->encoding(), true);
2576  emit_byte(0x0F);
2577  emit_byte(0x90 | cc);
2578  emit_byte(0xC0 | encode);
2579}
2580
2581void Assembler::shll(Register dst, int imm8) {
2582  assert(isShiftCount(imm8), "illegal shift count");
2583  int encode = prefix_and_encode(dst->encoding());
2584  if (imm8 == 1 ) {
2585    emit_byte(0xD1);
2586    emit_byte(0xE0 | encode);
2587  } else {
2588    emit_byte(0xC1);
2589    emit_byte(0xE0 | encode);
2590    emit_byte(imm8);
2591  }
2592}
2593
2594void Assembler::shll(Register dst) {
2595  int encode = prefix_and_encode(dst->encoding());
2596  emit_byte(0xD3);
2597  emit_byte(0xE0 | encode);
2598}
2599
2600void Assembler::shrl(Register dst, int imm8) {
2601  assert(isShiftCount(imm8), "illegal shift count");
2602  int encode = prefix_and_encode(dst->encoding());
2603  emit_byte(0xC1);
2604  emit_byte(0xE8 | encode);
2605  emit_byte(imm8);
2606}
2607
2608void Assembler::shrl(Register dst) {
2609  int encode = prefix_and_encode(dst->encoding());
2610  emit_byte(0xD3);
2611  emit_byte(0xE8 | encode);
2612}
2613
2614// copies a single word from [esi] to [edi]
2615void Assembler::smovl() {
2616  emit_byte(0xA5);
2617}
2618
2619void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2620  // HMM Table D-1 says sse2
2621  // NOT_LP64(assert(VM_Version::supports_sse(), ""));
2622  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2623  emit_byte(0xF2);
2624  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2625  emit_byte(0x0F);
2626  emit_byte(0x51);
2627  emit_byte(0xC0 | encode);
2628}
2629
2630void Assembler::stmxcsr( Address dst) {
2631  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2632  InstructionMark im(this);
2633  prefix(dst);
2634  emit_byte(0x0F);
2635  emit_byte(0xAE);
2636  emit_operand(as_Register(3), dst);
2637}
2638
2639void Assembler::subl(Address dst, int32_t imm32) {
2640  InstructionMark im(this);
2641  prefix(dst);
2642  if (is8bit(imm32)) {
2643    emit_byte(0x83);
2644    emit_operand(rbp, dst, 1);
2645    emit_byte(imm32 & 0xFF);
2646  } else {
2647    emit_byte(0x81);
2648    emit_operand(rbp, dst, 4);
2649    emit_long(imm32);
2650  }
2651}
2652
2653void Assembler::subl(Register dst, int32_t imm32) {
2654  prefix(dst);
2655  emit_arith(0x81, 0xE8, dst, imm32);
2656}
2657
2658void Assembler::subl(Address dst, Register src) {
2659  InstructionMark im(this);
2660  prefix(dst, src);
2661  emit_byte(0x29);
2662  emit_operand(src, dst);
2663}
2664
2665void Assembler::subl(Register dst, Address src) {
2666  InstructionMark im(this);
2667  prefix(src, dst);
2668  emit_byte(0x2B);
2669  emit_operand(dst, src);
2670}
2671
2672void Assembler::subl(Register dst, Register src) {
2673  (void) prefix_and_encode(dst->encoding(), src->encoding());
2674  emit_arith(0x2B, 0xC0, dst, src);
2675}
2676
2677void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2678  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2679  emit_byte(0xF2);
2680  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2681  emit_byte(0x0F);
2682  emit_byte(0x5C);
2683  emit_byte(0xC0 | encode);
2684}
2685
2686void Assembler::subsd(XMMRegister dst, Address src) {
2687  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2688  InstructionMark im(this);
2689  emit_byte(0xF2);
2690  prefix(src, dst);
2691  emit_byte(0x0F);
2692  emit_byte(0x5C);
2693  emit_operand(dst, src);
2694}
2695
2696void Assembler::subss(XMMRegister dst, XMMRegister src) {
2697  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2698  emit_byte(0xF3);
2699  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2700  emit_byte(0x0F);
2701  emit_byte(0x5C);
2702  emit_byte(0xC0 | encode);
2703}
2704
2705void Assembler::subss(XMMRegister dst, Address src) {
2706  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2707  InstructionMark im(this);
2708  emit_byte(0xF3);
2709  prefix(src, dst);
2710  emit_byte(0x0F);
2711  emit_byte(0x5C);
2712  emit_operand(dst, src);
2713}
2714
2715void Assembler::testb(Register dst, int imm8) {
2716  NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2717  (void) prefix_and_encode(dst->encoding(), true);
2718  emit_arith_b(0xF6, 0xC0, dst, imm8);
2719}
2720
2721void Assembler::testl(Register dst, int32_t imm32) {
2722  // not using emit_arith because test
2723  // doesn't support sign-extension of
2724  // 8bit operands
2725  int encode = dst->encoding();
2726  if (encode == 0) {
2727    emit_byte(0xA9);
2728  } else {
2729    encode = prefix_and_encode(encode);
2730    emit_byte(0xF7);
2731    emit_byte(0xC0 | encode);
2732  }
2733  emit_long(imm32);
2734}
2735
2736void Assembler::testl(Register dst, Register src) {
2737  (void) prefix_and_encode(dst->encoding(), src->encoding());
2738  emit_arith(0x85, 0xC0, dst, src);
2739}
2740
2741void Assembler::testl(Register dst, Address  src) {
2742  InstructionMark im(this);
2743  prefix(src, dst);
2744  emit_byte(0x85);
2745  emit_operand(dst, src);
2746}
2747
2748void Assembler::ucomisd(XMMRegister dst, Address src) {
2749  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2750  emit_byte(0x66);
2751  ucomiss(dst, src);
2752}
2753
2754void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2755  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2756  emit_byte(0x66);
2757  ucomiss(dst, src);
2758}
2759
2760void Assembler::ucomiss(XMMRegister dst, Address src) {
2761  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2762
2763  InstructionMark im(this);
2764  prefix(src, dst);
2765  emit_byte(0x0F);
2766  emit_byte(0x2E);
2767  emit_operand(dst, src);
2768}
2769
2770void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2771  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2772  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2773  emit_byte(0x0F);
2774  emit_byte(0x2E);
2775  emit_byte(0xC0 | encode);
2776}
2777
2778
2779void Assembler::xaddl(Address dst, Register src) {
2780  InstructionMark im(this);
2781  prefix(dst, src);
2782  emit_byte(0x0F);
2783  emit_byte(0xC1);
2784  emit_operand(src, dst);
2785}
2786
2787void Assembler::xchgl(Register dst, Address src) { // xchg
2788  InstructionMark im(this);
2789  prefix(src, dst);
2790  emit_byte(0x87);
2791  emit_operand(dst, src);
2792}
2793
2794void Assembler::xchgl(Register dst, Register src) {
2795  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2796  emit_byte(0x87);
2797  emit_byte(0xc0 | encode);
2798}
2799
2800void Assembler::xorl(Register dst, int32_t imm32) {
2801  prefix(dst);
2802  emit_arith(0x81, 0xF0, dst, imm32);
2803}
2804
2805void Assembler::xorl(Register dst, Address src) {
2806  InstructionMark im(this);
2807  prefix(src, dst);
2808  emit_byte(0x33);
2809  emit_operand(dst, src);
2810}
2811
2812void Assembler::xorl(Register dst, Register src) {
2813  (void) prefix_and_encode(dst->encoding(), src->encoding());
2814  emit_arith(0x33, 0xC0, dst, src);
2815}
2816
2817void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
2818  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2819  emit_byte(0x66);
2820  xorps(dst, src);
2821}
2822
2823void Assembler::xorpd(XMMRegister dst, Address src) {
2824  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2825  InstructionMark im(this);
2826  emit_byte(0x66);
2827  prefix(src, dst);
2828  emit_byte(0x0F);
2829  emit_byte(0x57);
2830  emit_operand(dst, src);
2831}
2832
2833
2834void Assembler::xorps(XMMRegister dst, XMMRegister src) {
2835  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2836  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2837  emit_byte(0x0F);
2838  emit_byte(0x57);
2839  emit_byte(0xC0 | encode);
2840}
2841
2842void Assembler::xorps(XMMRegister dst, Address src) {
2843  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2844  InstructionMark im(this);
2845  prefix(src, dst);
2846  emit_byte(0x0F);
2847  emit_byte(0x57);
2848  emit_operand(dst, src);
2849}
2850
2851#ifndef _LP64
2852// 32bit only pieces of the assembler
2853
2854void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
2855  // NO PREFIX AS NEVER 64BIT
2856  InstructionMark im(this);
2857  emit_byte(0x81);
2858  emit_byte(0xF8 | src1->encoding());
2859  emit_data(imm32, rspec, 0);
2860}
2861
2862void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
2863  // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
2864  InstructionMark im(this);
2865  emit_byte(0x81);
2866  emit_operand(rdi, src1);
2867  emit_data(imm32, rspec, 0);
2868}
2869
2870// The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
2871// and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
2872// into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
2873void Assembler::cmpxchg8(Address adr) {
2874  InstructionMark im(this);
2875  emit_byte(0x0F);
2876  emit_byte(0xc7);
2877  emit_operand(rcx, adr);
2878}
2879
2880void Assembler::decl(Register dst) {
2881  // Don't use it directly. Use MacroAssembler::decrementl() instead.
2882 emit_byte(0x48 | dst->encoding());
2883}
2884
2885#endif // _LP64
2886
2887// 64bit typically doesn't use the x87 but needs to for the trig funcs
2888
2889void Assembler::fabs() {
2890  emit_byte(0xD9);
2891  emit_byte(0xE1);
2892}
2893
2894void Assembler::fadd(int i) {
2895  emit_farith(0xD8, 0xC0, i);
2896}
2897
2898void Assembler::fadd_d(Address src) {
2899  InstructionMark im(this);
2900  emit_byte(0xDC);
2901  emit_operand32(rax, src);
2902}
2903
2904void Assembler::fadd_s(Address src) {
2905  InstructionMark im(this);
2906  emit_byte(0xD8);
2907  emit_operand32(rax, src);
2908}
2909
2910void Assembler::fadda(int i) {
2911  emit_farith(0xDC, 0xC0, i);
2912}
2913
2914void Assembler::faddp(int i) {
2915  emit_farith(0xDE, 0xC0, i);
2916}
2917
2918void Assembler::fchs() {
2919  emit_byte(0xD9);
2920  emit_byte(0xE0);
2921}
2922
2923void Assembler::fcom(int i) {
2924  emit_farith(0xD8, 0xD0, i);
2925}
2926
2927void Assembler::fcomp(int i) {
2928  emit_farith(0xD8, 0xD8, i);
2929}
2930
2931void Assembler::fcomp_d(Address src) {
2932  InstructionMark im(this);
2933  emit_byte(0xDC);
2934  emit_operand32(rbx, src);
2935}
2936
2937void Assembler::fcomp_s(Address src) {
2938  InstructionMark im(this);
2939  emit_byte(0xD8);
2940  emit_operand32(rbx, src);
2941}
2942
2943void Assembler::fcompp() {
2944  emit_byte(0xDE);
2945  emit_byte(0xD9);
2946}
2947
2948void Assembler::fcos() {
2949  emit_byte(0xD9);
2950  emit_byte(0xFF);
2951}
2952
2953void Assembler::fdecstp() {
2954  emit_byte(0xD9);
2955  emit_byte(0xF6);
2956}
2957
2958void Assembler::fdiv(int i) {
2959  emit_farith(0xD8, 0xF0, i);
2960}
2961
2962void Assembler::fdiv_d(Address src) {
2963  InstructionMark im(this);
2964  emit_byte(0xDC);
2965  emit_operand32(rsi, src);
2966}
2967
2968void Assembler::fdiv_s(Address src) {
2969  InstructionMark im(this);
2970  emit_byte(0xD8);
2971  emit_operand32(rsi, src);
2972}
2973
2974void Assembler::fdiva(int i) {
2975  emit_farith(0xDC, 0xF8, i);
2976}
2977
2978// Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
2979//       is erroneous for some of the floating-point instructions below.
2980
2981void Assembler::fdivp(int i) {
2982  emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
2983}
2984
2985void Assembler::fdivr(int i) {
2986  emit_farith(0xD8, 0xF8, i);
2987}
2988
2989void Assembler::fdivr_d(Address src) {
2990  InstructionMark im(this);
2991  emit_byte(0xDC);
2992  emit_operand32(rdi, src);
2993}
2994
2995void Assembler::fdivr_s(Address src) {
2996  InstructionMark im(this);
2997  emit_byte(0xD8);
2998  emit_operand32(rdi, src);
2999}
3000
3001void Assembler::fdivra(int i) {
3002  emit_farith(0xDC, 0xF0, i);
3003}
3004
3005void Assembler::fdivrp(int i) {
3006  emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
3007}
3008
3009void Assembler::ffree(int i) {
3010  emit_farith(0xDD, 0xC0, i);
3011}
3012
3013void Assembler::fild_d(Address adr) {
3014  InstructionMark im(this);
3015  emit_byte(0xDF);
3016  emit_operand32(rbp, adr);
3017}
3018
3019void Assembler::fild_s(Address adr) {
3020  InstructionMark im(this);
3021  emit_byte(0xDB);
3022  emit_operand32(rax, adr);
3023}
3024
3025void Assembler::fincstp() {
3026  emit_byte(0xD9);
3027  emit_byte(0xF7);
3028}
3029
3030void Assembler::finit() {
3031  emit_byte(0x9B);
3032  emit_byte(0xDB);
3033  emit_byte(0xE3);
3034}
3035
3036void Assembler::fist_s(Address adr) {
3037  InstructionMark im(this);
3038  emit_byte(0xDB);
3039  emit_operand32(rdx, adr);
3040}
3041
3042void Assembler::fistp_d(Address adr) {
3043  InstructionMark im(this);
3044  emit_byte(0xDF);
3045  emit_operand32(rdi, adr);
3046}
3047
3048void Assembler::fistp_s(Address adr) {
3049  InstructionMark im(this);
3050  emit_byte(0xDB);
3051  emit_operand32(rbx, adr);
3052}
3053
3054void Assembler::fld1() {
3055  emit_byte(0xD9);
3056  emit_byte(0xE8);
3057}
3058
3059void Assembler::fld_d(Address adr) {
3060  InstructionMark im(this);
3061  emit_byte(0xDD);
3062  emit_operand32(rax, adr);
3063}
3064
3065void Assembler::fld_s(Address adr) {
3066  InstructionMark im(this);
3067  emit_byte(0xD9);
3068  emit_operand32(rax, adr);
3069}
3070
3071
3072void Assembler::fld_s(int index) {
3073  emit_farith(0xD9, 0xC0, index);
3074}
3075
3076void Assembler::fld_x(Address adr) {
3077  InstructionMark im(this);
3078  emit_byte(0xDB);
3079  emit_operand32(rbp, adr);
3080}
3081
3082void Assembler::fldcw(Address src) {
3083  InstructionMark im(this);
3084  emit_byte(0xd9);
3085  emit_operand32(rbp, src);
3086}
3087
3088void Assembler::fldenv(Address src) {
3089  InstructionMark im(this);
3090  emit_byte(0xD9);
3091  emit_operand32(rsp, src);
3092}
3093
3094void Assembler::fldlg2() {
3095  emit_byte(0xD9);
3096  emit_byte(0xEC);
3097}
3098
3099void Assembler::fldln2() {
3100  emit_byte(0xD9);
3101  emit_byte(0xED);
3102}
3103
3104void Assembler::fldz() {
3105  emit_byte(0xD9);
3106  emit_byte(0xEE);
3107}
3108
3109void Assembler::flog() {
3110  fldln2();
3111  fxch();
3112  fyl2x();
3113}
3114
3115void Assembler::flog10() {
3116  fldlg2();
3117  fxch();
3118  fyl2x();
3119}
3120
3121void Assembler::fmul(int i) {
3122  emit_farith(0xD8, 0xC8, i);
3123}
3124
3125void Assembler::fmul_d(Address src) {
3126  InstructionMark im(this);
3127  emit_byte(0xDC);
3128  emit_operand32(rcx, src);
3129}
3130
3131void Assembler::fmul_s(Address src) {
3132  InstructionMark im(this);
3133  emit_byte(0xD8);
3134  emit_operand32(rcx, src);
3135}
3136
3137void Assembler::fmula(int i) {
3138  emit_farith(0xDC, 0xC8, i);
3139}
3140
3141void Assembler::fmulp(int i) {
3142  emit_farith(0xDE, 0xC8, i);
3143}
3144
3145void Assembler::fnsave(Address dst) {
3146  InstructionMark im(this);
3147  emit_byte(0xDD);
3148  emit_operand32(rsi, dst);
3149}
3150
3151void Assembler::fnstcw(Address src) {
3152  InstructionMark im(this);
3153  emit_byte(0x9B);
3154  emit_byte(0xD9);
3155  emit_operand32(rdi, src);
3156}
3157
3158void Assembler::fnstsw_ax() {
3159  emit_byte(0xdF);
3160  emit_byte(0xE0);
3161}
3162
3163void Assembler::fprem() {
3164  emit_byte(0xD9);
3165  emit_byte(0xF8);
3166}
3167
3168void Assembler::fprem1() {
3169  emit_byte(0xD9);
3170  emit_byte(0xF5);
3171}
3172
3173void Assembler::frstor(Address src) {
3174  InstructionMark im(this);
3175  emit_byte(0xDD);
3176  emit_operand32(rsp, src);
3177}
3178
3179void Assembler::fsin() {
3180  emit_byte(0xD9);
3181  emit_byte(0xFE);
3182}
3183
3184void Assembler::fsqrt() {
3185  emit_byte(0xD9);
3186  emit_byte(0xFA);
3187}
3188
3189void Assembler::fst_d(Address adr) {
3190  InstructionMark im(this);
3191  emit_byte(0xDD);
3192  emit_operand32(rdx, adr);
3193}
3194
3195void Assembler::fst_s(Address adr) {
3196  InstructionMark im(this);
3197  emit_byte(0xD9);
3198  emit_operand32(rdx, adr);
3199}
3200
3201void Assembler::fstp_d(Address adr) {
3202  InstructionMark im(this);
3203  emit_byte(0xDD);
3204  emit_operand32(rbx, adr);
3205}
3206
3207void Assembler::fstp_d(int index) {
3208  emit_farith(0xDD, 0xD8, index);
3209}
3210
3211void Assembler::fstp_s(Address adr) {
3212  InstructionMark im(this);
3213  emit_byte(0xD9);
3214  emit_operand32(rbx, adr);
3215}
3216
3217void Assembler::fstp_x(Address adr) {
3218  InstructionMark im(this);
3219  emit_byte(0xDB);
3220  emit_operand32(rdi, adr);
3221}
3222
3223void Assembler::fsub(int i) {
3224  emit_farith(0xD8, 0xE0, i);
3225}
3226
3227void Assembler::fsub_d(Address src) {
3228  InstructionMark im(this);
3229  emit_byte(0xDC);
3230  emit_operand32(rsp, src);
3231}
3232
3233void Assembler::fsub_s(Address src) {
3234  InstructionMark im(this);
3235  emit_byte(0xD8);
3236  emit_operand32(rsp, src);
3237}
3238
3239void Assembler::fsuba(int i) {
3240  emit_farith(0xDC, 0xE8, i);
3241}
3242
3243void Assembler::fsubp(int i) {
3244  emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
3245}
3246
3247void Assembler::fsubr(int i) {
3248  emit_farith(0xD8, 0xE8, i);
3249}
3250
3251void Assembler::fsubr_d(Address src) {
3252  InstructionMark im(this);
3253  emit_byte(0xDC);
3254  emit_operand32(rbp, src);
3255}
3256
3257void Assembler::fsubr_s(Address src) {
3258  InstructionMark im(this);
3259  emit_byte(0xD8);
3260  emit_operand32(rbp, src);
3261}
3262
3263void Assembler::fsubra(int i) {
3264  emit_farith(0xDC, 0xE0, i);
3265}
3266
3267void Assembler::fsubrp(int i) {
3268  emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
3269}
3270
3271void Assembler::ftan() {
3272  emit_byte(0xD9);
3273  emit_byte(0xF2);
3274  emit_byte(0xDD);
3275  emit_byte(0xD8);
3276}
3277
3278void Assembler::ftst() {
3279  emit_byte(0xD9);
3280  emit_byte(0xE4);
3281}
3282
3283void Assembler::fucomi(int i) {
3284  // make sure the instruction is supported (introduced for P6, together with cmov)
3285  guarantee(VM_Version::supports_cmov(), "illegal instruction");
3286  emit_farith(0xDB, 0xE8, i);
3287}
3288
3289void Assembler::fucomip(int i) {
3290  // make sure the instruction is supported (introduced for P6, together with cmov)
3291  guarantee(VM_Version::supports_cmov(), "illegal instruction");
3292  emit_farith(0xDF, 0xE8, i);
3293}
3294
3295void Assembler::fwait() {
3296  emit_byte(0x9B);
3297}
3298
3299void Assembler::fxch(int i) {
3300  emit_farith(0xD9, 0xC8, i);
3301}
3302
3303void Assembler::fyl2x() {
3304  emit_byte(0xD9);
3305  emit_byte(0xF1);
3306}
3307
3308
3309#ifndef _LP64
3310
3311void Assembler::incl(Register dst) {
3312  // Don't use it directly. Use MacroAssembler::incrementl() instead.
3313 emit_byte(0x40 | dst->encoding());
3314}
3315
3316void Assembler::lea(Register dst, Address src) {
3317  leal(dst, src);
3318}
3319
3320void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
3321  InstructionMark im(this);
3322  emit_byte(0xC7);
3323  emit_operand(rax, dst);
3324  emit_data((int)imm32, rspec, 0);
3325}
3326
3327void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
3328  InstructionMark im(this);
3329  int encode = prefix_and_encode(dst->encoding());
3330  emit_byte(0xB8 | encode);
3331  emit_data((int)imm32, rspec, 0);
3332}
3333
3334void Assembler::popa() { // 32bit
3335  emit_byte(0x61);
3336}
3337
3338void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
3339  InstructionMark im(this);
3340  emit_byte(0x68);
3341  emit_data(imm32, rspec, 0);
3342}
3343
3344void Assembler::pusha() { // 32bit
3345  emit_byte(0x60);
3346}
3347
3348void Assembler::set_byte_if_not_zero(Register dst) {
3349  emit_byte(0x0F);
3350  emit_byte(0x95);
3351  emit_byte(0xE0 | dst->encoding());
3352}
3353
3354void Assembler::shldl(Register dst, Register src) {
3355  emit_byte(0x0F);
3356  emit_byte(0xA5);
3357  emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3358}
3359
3360void Assembler::shrdl(Register dst, Register src) {
3361  emit_byte(0x0F);
3362  emit_byte(0xAD);
3363  emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3364}
3365
3366#else // LP64
3367
3368void Assembler::set_byte_if_not_zero(Register dst) {
3369  int enc = prefix_and_encode(dst->encoding(), true);
3370  emit_byte(0x0F);
3371  emit_byte(0x95);
3372  emit_byte(0xE0 | enc);
3373}
3374
3375// 64bit only pieces of the assembler
3376// This should only be used by 64bit instructions that can use rip-relative
3377// it cannot be used by instructions that want an immediate value.
3378
3379bool Assembler::reachable(AddressLiteral adr) {
3380  int64_t disp;
3381  // None will force a 64bit literal to the code stream. Likely a placeholder
3382  // for something that will be patched later and we need to certain it will
3383  // always be reachable.
3384  if (adr.reloc() == relocInfo::none) {
3385    return false;
3386  }
3387  if (adr.reloc() == relocInfo::internal_word_type) {
3388    // This should be rip relative and easily reachable.
3389    return true;
3390  }
3391  if (adr.reloc() == relocInfo::virtual_call_type ||
3392      adr.reloc() == relocInfo::opt_virtual_call_type ||
3393      adr.reloc() == relocInfo::static_call_type ||
3394      adr.reloc() == relocInfo::static_stub_type ) {
3395    // This should be rip relative within the code cache and easily
3396    // reachable until we get huge code caches. (At which point
3397    // ic code is going to have issues).
3398    return true;
3399  }
3400  if (adr.reloc() != relocInfo::external_word_type &&
3401      adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
3402      adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
3403      adr.reloc() != relocInfo::runtime_call_type ) {
3404    return false;
3405  }
3406
3407  // Stress the correction code
3408  if (ForceUnreachable) {
3409    // Must be runtimecall reloc, see if it is in the codecache
3410    // Flipping stuff in the codecache to be unreachable causes issues
3411    // with things like inline caches where the additional instructions
3412    // are not handled.
3413    if (CodeCache::find_blob(adr._target) == NULL) {
3414      return false;
3415    }
3416  }
3417  // For external_word_type/runtime_call_type if it is reachable from where we
3418  // are now (possibly a temp buffer) and where we might end up
3419  // anywhere in the codeCache then we are always reachable.
3420  // This would have to change if we ever save/restore shared code
3421  // to be more pessimistic.
3422
3423  disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
3424  if (!is_simm32(disp)) return false;
3425  disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
3426  if (!is_simm32(disp)) return false;
3427
3428  disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
3429
3430  // Because rip relative is a disp + address_of_next_instruction and we
3431  // don't know the value of address_of_next_instruction we apply a fudge factor
3432  // to make sure we will be ok no matter the size of the instruction we get placed into.
3433  // We don't have to fudge the checks above here because they are already worst case.
3434
3435  // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
3436  // + 4 because better safe than sorry.
3437  const int fudge = 12 + 4;
3438  if (disp < 0) {
3439    disp -= fudge;
3440  } else {
3441    disp += fudge;
3442  }
3443  return is_simm32(disp);
3444}
3445
3446void Assembler::emit_data64(jlong data,
3447                            relocInfo::relocType rtype,
3448                            int format) {
3449  if (rtype == relocInfo::none) {
3450    emit_long64(data);
3451  } else {
3452    emit_data64(data, Relocation::spec_simple(rtype), format);
3453  }
3454}
3455
3456void Assembler::emit_data64(jlong data,
3457                            RelocationHolder const& rspec,
3458                            int format) {
3459  assert(imm_operand == 0, "default format must be immediate in this file");
3460  assert(imm_operand == format, "must be immediate");
3461  assert(inst_mark() != NULL, "must be inside InstructionMark");
3462  // Do not use AbstractAssembler::relocate, which is not intended for
3463  // embedded words.  Instead, relocate to the enclosing instruction.
3464  code_section()->relocate(inst_mark(), rspec, format);
3465#ifdef ASSERT
3466  check_relocation(rspec, format);
3467#endif
3468  emit_long64(data);
3469}
3470
3471int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
3472  if (reg_enc >= 8) {
3473    prefix(REX_B);
3474    reg_enc -= 8;
3475  } else if (byteinst && reg_enc >= 4) {
3476    prefix(REX);
3477  }
3478  return reg_enc;
3479}
3480
3481int Assembler::prefixq_and_encode(int reg_enc) {
3482  if (reg_enc < 8) {
3483    prefix(REX_W);
3484  } else {
3485    prefix(REX_WB);
3486    reg_enc -= 8;
3487  }
3488  return reg_enc;
3489}
3490
3491int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
3492  if (dst_enc < 8) {
3493    if (src_enc >= 8) {
3494      prefix(REX_B);
3495      src_enc -= 8;
3496    } else if (byteinst && src_enc >= 4) {
3497      prefix(REX);
3498    }
3499  } else {
3500    if (src_enc < 8) {
3501      prefix(REX_R);
3502    } else {
3503      prefix(REX_RB);
3504      src_enc -= 8;
3505    }
3506    dst_enc -= 8;
3507  }
3508  return dst_enc << 3 | src_enc;
3509}
3510
3511int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
3512  if (dst_enc < 8) {
3513    if (src_enc < 8) {
3514      prefix(REX_W);
3515    } else {
3516      prefix(REX_WB);
3517      src_enc -= 8;
3518    }
3519  } else {
3520    if (src_enc < 8) {
3521      prefix(REX_WR);
3522    } else {
3523      prefix(REX_WRB);
3524      src_enc -= 8;
3525    }
3526    dst_enc -= 8;
3527  }
3528  return dst_enc << 3 | src_enc;
3529}
3530
3531void Assembler::prefix(Register reg) {
3532  if (reg->encoding() >= 8) {
3533    prefix(REX_B);
3534  }
3535}
3536
3537void Assembler::prefix(Address adr) {
3538  if (adr.base_needs_rex()) {
3539    if (adr.index_needs_rex()) {
3540      prefix(REX_XB);
3541    } else {
3542      prefix(REX_B);
3543    }
3544  } else {
3545    if (adr.index_needs_rex()) {
3546      prefix(REX_X);
3547    }
3548  }
3549}
3550
3551void Assembler::prefixq(Address adr) {
3552  if (adr.base_needs_rex()) {
3553    if (adr.index_needs_rex()) {
3554      prefix(REX_WXB);
3555    } else {
3556      prefix(REX_WB);
3557    }
3558  } else {
3559    if (adr.index_needs_rex()) {
3560      prefix(REX_WX);
3561    } else {
3562      prefix(REX_W);
3563    }
3564  }
3565}
3566
3567
3568void Assembler::prefix(Address adr, Register reg, bool byteinst) {
3569  if (reg->encoding() < 8) {
3570    if (adr.base_needs_rex()) {
3571      if (adr.index_needs_rex()) {
3572        prefix(REX_XB);
3573      } else {
3574        prefix(REX_B);
3575      }
3576    } else {
3577      if (adr.index_needs_rex()) {
3578        prefix(REX_X);
3579      } else if (reg->encoding() >= 4 ) {
3580        prefix(REX);
3581      }
3582    }
3583  } else {
3584    if (adr.base_needs_rex()) {
3585      if (adr.index_needs_rex()) {
3586        prefix(REX_RXB);
3587      } else {
3588        prefix(REX_RB);
3589      }
3590    } else {
3591      if (adr.index_needs_rex()) {
3592        prefix(REX_RX);
3593      } else {
3594        prefix(REX_R);
3595      }
3596    }
3597  }
3598}
3599
3600void Assembler::prefixq(Address adr, Register src) {
3601  if (src->encoding() < 8) {
3602    if (adr.base_needs_rex()) {
3603      if (adr.index_needs_rex()) {
3604        prefix(REX_WXB);
3605      } else {
3606        prefix(REX_WB);
3607      }
3608    } else {
3609      if (adr.index_needs_rex()) {
3610        prefix(REX_WX);
3611      } else {
3612        prefix(REX_W);
3613      }
3614    }
3615  } else {
3616    if (adr.base_needs_rex()) {
3617      if (adr.index_needs_rex()) {
3618        prefix(REX_WRXB);
3619      } else {
3620        prefix(REX_WRB);
3621      }
3622    } else {
3623      if (adr.index_needs_rex()) {
3624        prefix(REX_WRX);
3625      } else {
3626        prefix(REX_WR);
3627      }
3628    }
3629  }
3630}
3631
3632void Assembler::prefix(Address adr, XMMRegister reg) {
3633  if (reg->encoding() < 8) {
3634    if (adr.base_needs_rex()) {
3635      if (adr.index_needs_rex()) {
3636        prefix(REX_XB);
3637      } else {
3638        prefix(REX_B);
3639      }
3640    } else {
3641      if (adr.index_needs_rex()) {
3642        prefix(REX_X);
3643      }
3644    }
3645  } else {
3646    if (adr.base_needs_rex()) {
3647      if (adr.index_needs_rex()) {
3648        prefix(REX_RXB);
3649      } else {
3650        prefix(REX_RB);
3651      }
3652    } else {
3653      if (adr.index_needs_rex()) {
3654        prefix(REX_RX);
3655      } else {
3656        prefix(REX_R);
3657      }
3658    }
3659  }
3660}
3661
3662void Assembler::adcq(Register dst, int32_t imm32) {
3663  (void) prefixq_and_encode(dst->encoding());
3664  emit_arith(0x81, 0xD0, dst, imm32);
3665}
3666
3667void Assembler::adcq(Register dst, Address src) {
3668  InstructionMark im(this);
3669  prefixq(src, dst);
3670  emit_byte(0x13);
3671  emit_operand(dst, src);
3672}
3673
3674void Assembler::adcq(Register dst, Register src) {
3675  (int) prefixq_and_encode(dst->encoding(), src->encoding());
3676  emit_arith(0x13, 0xC0, dst, src);
3677}
3678
3679void Assembler::addq(Address dst, int32_t imm32) {
3680  InstructionMark im(this);
3681  prefixq(dst);
3682  emit_arith_operand(0x81, rax, dst,imm32);
3683}
3684
3685void Assembler::addq(Address dst, Register src) {
3686  InstructionMark im(this);
3687  prefixq(dst, src);
3688  emit_byte(0x01);
3689  emit_operand(src, dst);
3690}
3691
3692void Assembler::addq(Register dst, int32_t imm32) {
3693  (void) prefixq_and_encode(dst->encoding());
3694  emit_arith(0x81, 0xC0, dst, imm32);
3695}
3696
3697void Assembler::addq(Register dst, Address src) {
3698  InstructionMark im(this);
3699  prefixq(src, dst);
3700  emit_byte(0x03);
3701  emit_operand(dst, src);
3702}
3703
3704void Assembler::addq(Register dst, Register src) {
3705  (void) prefixq_and_encode(dst->encoding(), src->encoding());
3706  emit_arith(0x03, 0xC0, dst, src);
3707}
3708
3709void Assembler::andq(Register dst, int32_t imm32) {
3710  (void) prefixq_and_encode(dst->encoding());
3711  emit_arith(0x81, 0xE0, dst, imm32);
3712}
3713
3714void Assembler::andq(Register dst, Address src) {
3715  InstructionMark im(this);
3716  prefixq(src, dst);
3717  emit_byte(0x23);
3718  emit_operand(dst, src);
3719}
3720
3721void Assembler::andq(Register dst, Register src) {
3722  (int) prefixq_and_encode(dst->encoding(), src->encoding());
3723  emit_arith(0x23, 0xC0, dst, src);
3724}
3725
3726void Assembler::bsfq(Register dst, Register src) {
3727  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3728  emit_byte(0x0F);
3729  emit_byte(0xBC);
3730  emit_byte(0xC0 | encode);
3731}
3732
3733void Assembler::bsrq(Register dst, Register src) {
3734  assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
3735  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3736  emit_byte(0x0F);
3737  emit_byte(0xBD);
3738  emit_byte(0xC0 | encode);
3739}
3740
3741void Assembler::bswapq(Register reg) {
3742  int encode = prefixq_and_encode(reg->encoding());
3743  emit_byte(0x0F);
3744  emit_byte(0xC8 | encode);
3745}
3746
3747void Assembler::cdqq() {
3748  prefix(REX_W);
3749  emit_byte(0x99);
3750}
3751
3752void Assembler::clflush(Address adr) {
3753  prefix(adr);
3754  emit_byte(0x0F);
3755  emit_byte(0xAE);
3756  emit_operand(rdi, adr);
3757}
3758
3759void Assembler::cmovq(Condition cc, Register dst, Register src) {
3760  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3761  emit_byte(0x0F);
3762  emit_byte(0x40 | cc);
3763  emit_byte(0xC0 | encode);
3764}
3765
3766void Assembler::cmovq(Condition cc, Register dst, Address src) {
3767  InstructionMark im(this);
3768  prefixq(src, dst);
3769  emit_byte(0x0F);
3770  emit_byte(0x40 | cc);
3771  emit_operand(dst, src);
3772}
3773
3774void Assembler::cmpq(Address dst, int32_t imm32) {
3775  InstructionMark im(this);
3776  prefixq(dst);
3777  emit_byte(0x81);
3778  emit_operand(rdi, dst, 4);
3779  emit_long(imm32);
3780}
3781
3782void Assembler::cmpq(Register dst, int32_t imm32) {
3783  (void) prefixq_and_encode(dst->encoding());
3784  emit_arith(0x81, 0xF8, dst, imm32);
3785}
3786
3787void Assembler::cmpq(Address dst, Register src) {
3788  InstructionMark im(this);
3789  prefixq(dst, src);
3790  emit_byte(0x3B);
3791  emit_operand(src, dst);
3792}
3793
3794void Assembler::cmpq(Register dst, Register src) {
3795  (void) prefixq_and_encode(dst->encoding(), src->encoding());
3796  emit_arith(0x3B, 0xC0, dst, src);
3797}
3798
3799void Assembler::cmpq(Register dst, Address  src) {
3800  InstructionMark im(this);
3801  prefixq(src, dst);
3802  emit_byte(0x3B);
3803  emit_operand(dst, src);
3804}
3805
3806void Assembler::cmpxchgq(Register reg, Address adr) {
3807  InstructionMark im(this);
3808  prefixq(adr, reg);
3809  emit_byte(0x0F);
3810  emit_byte(0xB1);
3811  emit_operand(reg, adr);
3812}
3813
3814void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
3815  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3816  emit_byte(0xF2);
3817  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3818  emit_byte(0x0F);
3819  emit_byte(0x2A);
3820  emit_byte(0xC0 | encode);
3821}
3822
3823void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
3824  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3825  emit_byte(0xF3);
3826  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3827  emit_byte(0x0F);
3828  emit_byte(0x2A);
3829  emit_byte(0xC0 | encode);
3830}
3831
3832void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
3833  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3834  emit_byte(0xF2);
3835  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3836  emit_byte(0x0F);
3837  emit_byte(0x2C);
3838  emit_byte(0xC0 | encode);
3839}
3840
3841void Assembler::cvttss2siq(Register dst, XMMRegister src) {
3842  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3843  emit_byte(0xF3);
3844  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3845  emit_byte(0x0F);
3846  emit_byte(0x2C);
3847  emit_byte(0xC0 | encode);
3848}
3849
3850void Assembler::decl(Register dst) {
3851  // Don't use it directly. Use MacroAssembler::decrementl() instead.
3852  // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
3853  int encode = prefix_and_encode(dst->encoding());
3854  emit_byte(0xFF);
3855  emit_byte(0xC8 | encode);
3856}
3857
3858void Assembler::decq(Register dst) {
3859  // Don't use it directly. Use MacroAssembler::decrementq() instead.
3860  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3861  int encode = prefixq_and_encode(dst->encoding());
3862  emit_byte(0xFF);
3863  emit_byte(0xC8 | encode);
3864}
3865
3866void Assembler::decq(Address dst) {
3867  // Don't use it directly. Use MacroAssembler::decrementq() instead.
3868  InstructionMark im(this);
3869  prefixq(dst);
3870  emit_byte(0xFF);
3871  emit_operand(rcx, dst);
3872}
3873
3874void Assembler::fxrstor(Address src) {
3875  prefixq(src);
3876  emit_byte(0x0F);
3877  emit_byte(0xAE);
3878  emit_operand(as_Register(1), src);
3879}
3880
3881void Assembler::fxsave(Address dst) {
3882  prefixq(dst);
3883  emit_byte(0x0F);
3884  emit_byte(0xAE);
3885  emit_operand(as_Register(0), dst);
3886}
3887
3888void Assembler::idivq(Register src) {
3889  int encode = prefixq_and_encode(src->encoding());
3890  emit_byte(0xF7);
3891  emit_byte(0xF8 | encode);
3892}
3893
3894void Assembler::imulq(Register dst, Register src) {
3895  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3896  emit_byte(0x0F);
3897  emit_byte(0xAF);
3898  emit_byte(0xC0 | encode);
3899}
3900
3901void Assembler::imulq(Register dst, Register src, int value) {
3902  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3903  if (is8bit(value)) {
3904    emit_byte(0x6B);
3905    emit_byte(0xC0 | encode);
3906    emit_byte(value);
3907  } else {
3908    emit_byte(0x69);
3909    emit_byte(0xC0 | encode);
3910    emit_long(value);
3911  }
3912}
3913
3914void Assembler::incl(Register dst) {
3915  // Don't use it directly. Use MacroAssembler::incrementl() instead.
3916  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3917  int encode = prefix_and_encode(dst->encoding());
3918  emit_byte(0xFF);
3919  emit_byte(0xC0 | encode);
3920}
3921
3922void Assembler::incq(Register dst) {
3923  // Don't use it directly. Use MacroAssembler::incrementq() instead.
3924  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3925  int encode = prefixq_and_encode(dst->encoding());
3926  emit_byte(0xFF);
3927  emit_byte(0xC0 | encode);
3928}
3929
3930void Assembler::incq(Address dst) {
3931  // Don't use it directly. Use MacroAssembler::incrementq() instead.
3932  InstructionMark im(this);
3933  prefixq(dst);
3934  emit_byte(0xFF);
3935  emit_operand(rax, dst);
3936}
3937
3938void Assembler::lea(Register dst, Address src) {
3939  leaq(dst, src);
3940}
3941
3942void Assembler::leaq(Register dst, Address src) {
3943  InstructionMark im(this);
3944  prefixq(src, dst);
3945  emit_byte(0x8D);
3946  emit_operand(dst, src);
3947}
3948
3949void Assembler::mov64(Register dst, int64_t imm64) {
3950  InstructionMark im(this);
3951  int encode = prefixq_and_encode(dst->encoding());
3952  emit_byte(0xB8 | encode);
3953  emit_long64(imm64);
3954}
3955
3956void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
3957  InstructionMark im(this);
3958  int encode = prefixq_and_encode(dst->encoding());
3959  emit_byte(0xB8 | encode);
3960  emit_data64(imm64, rspec);
3961}
3962
3963void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
3964  InstructionMark im(this);
3965  int encode = prefix_and_encode(dst->encoding());
3966  emit_byte(0xB8 | encode);
3967  emit_data((int)imm32, rspec, narrow_oop_operand);
3968}
3969
3970void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
3971  InstructionMark im(this);
3972  prefix(dst);
3973  emit_byte(0xC7);
3974  emit_operand(rax, dst, 4);
3975  emit_data((int)imm32, rspec, narrow_oop_operand);
3976}
3977
3978void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
3979  InstructionMark im(this);
3980  int encode = prefix_and_encode(src1->encoding());
3981  emit_byte(0x81);
3982  emit_byte(0xF8 | encode);
3983  emit_data((int)imm32, rspec, narrow_oop_operand);
3984}
3985
3986void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
3987  InstructionMark im(this);
3988  prefix(src1);
3989  emit_byte(0x81);
3990  emit_operand(rax, src1, 4);
3991  emit_data((int)imm32, rspec, narrow_oop_operand);
3992}
3993
3994void Assembler::lzcntq(Register dst, Register src) {
3995  assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
3996  emit_byte(0xF3);
3997  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3998  emit_byte(0x0F);
3999  emit_byte(0xBD);
4000  emit_byte(0xC0 | encode);
4001}
4002
4003void Assembler::movdq(XMMRegister dst, Register src) {
4004  // table D-1 says MMX/SSE2
4005  NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
4006  emit_byte(0x66);
4007  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4008  emit_byte(0x0F);
4009  emit_byte(0x6E);
4010  emit_byte(0xC0 | encode);
4011}
4012
4013void Assembler::movdq(Register dst, XMMRegister src) {
4014  // table D-1 says MMX/SSE2
4015  NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
4016  emit_byte(0x66);
4017  // swap src/dst to get correct prefix
4018  int encode = prefixq_and_encode(src->encoding(), dst->encoding());
4019  emit_byte(0x0F);
4020  emit_byte(0x7E);
4021  emit_byte(0xC0 | encode);
4022}
4023
4024void Assembler::movq(Register dst, Register src) {
4025  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4026  emit_byte(0x8B);
4027  emit_byte(0xC0 | encode);
4028}
4029
4030void Assembler::movq(Register dst, Address src) {
4031  InstructionMark im(this);
4032  prefixq(src, dst);
4033  emit_byte(0x8B);
4034  emit_operand(dst, src);
4035}
4036
4037void Assembler::movq(Address dst, Register src) {
4038  InstructionMark im(this);
4039  prefixq(dst, src);
4040  emit_byte(0x89);
4041  emit_operand(src, dst);
4042}
4043
4044void Assembler::movsbq(Register dst, Address src) {
4045  InstructionMark im(this);
4046  prefixq(src, dst);
4047  emit_byte(0x0F);
4048  emit_byte(0xBE);
4049  emit_operand(dst, src);
4050}
4051
4052void Assembler::movsbq(Register dst, Register src) {
4053  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4054  emit_byte(0x0F);
4055  emit_byte(0xBE);
4056  emit_byte(0xC0 | encode);
4057}
4058
4059void Assembler::movslq(Register dst, int32_t imm32) {
4060  // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
4061  // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
4062  // as a result we shouldn't use until tested at runtime...
4063  ShouldNotReachHere();
4064  InstructionMark im(this);
4065  int encode = prefixq_and_encode(dst->encoding());
4066  emit_byte(0xC7 | encode);
4067  emit_long(imm32);
4068}
4069
4070void Assembler::movslq(Address dst, int32_t imm32) {
4071  assert(is_simm32(imm32), "lost bits");
4072  InstructionMark im(this);
4073  prefixq(dst);
4074  emit_byte(0xC7);
4075  emit_operand(rax, dst, 4);
4076  emit_long(imm32);
4077}
4078
4079void Assembler::movslq(Register dst, Address src) {
4080  InstructionMark im(this);
4081  prefixq(src, dst);
4082  emit_byte(0x63);
4083  emit_operand(dst, src);
4084}
4085
4086void Assembler::movslq(Register dst, Register src) {
4087  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4088  emit_byte(0x63);
4089  emit_byte(0xC0 | encode);
4090}
4091
4092void Assembler::movswq(Register dst, Address src) {
4093  InstructionMark im(this);
4094  prefixq(src, dst);
4095  emit_byte(0x0F);
4096  emit_byte(0xBF);
4097  emit_operand(dst, src);
4098}
4099
4100void Assembler::movswq(Register dst, Register src) {
4101  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4102  emit_byte(0x0F);
4103  emit_byte(0xBF);
4104  emit_byte(0xC0 | encode);
4105}
4106
4107void Assembler::movzbq(Register dst, Address src) {
4108  InstructionMark im(this);
4109  prefixq(src, dst);
4110  emit_byte(0x0F);
4111  emit_byte(0xB6);
4112  emit_operand(dst, src);
4113}
4114
4115void Assembler::movzbq(Register dst, Register src) {
4116  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4117  emit_byte(0x0F);
4118  emit_byte(0xB6);
4119  emit_byte(0xC0 | encode);
4120}
4121
4122void Assembler::movzwq(Register dst, Address src) {
4123  InstructionMark im(this);
4124  prefixq(src, dst);
4125  emit_byte(0x0F);
4126  emit_byte(0xB7);
4127  emit_operand(dst, src);
4128}
4129
4130void Assembler::movzwq(Register dst, Register src) {
4131  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4132  emit_byte(0x0F);
4133  emit_byte(0xB7);
4134  emit_byte(0xC0 | encode);
4135}
4136
4137void Assembler::negq(Register dst) {
4138  int encode = prefixq_and_encode(dst->encoding());
4139  emit_byte(0xF7);
4140  emit_byte(0xD8 | encode);
4141}
4142
4143void Assembler::notq(Register dst) {
4144  int encode = prefixq_and_encode(dst->encoding());
4145  emit_byte(0xF7);
4146  emit_byte(0xD0 | encode);
4147}
4148
4149void Assembler::orq(Address dst, int32_t imm32) {
4150  InstructionMark im(this);
4151  prefixq(dst);
4152  emit_byte(0x81);
4153  emit_operand(rcx, dst, 4);
4154  emit_long(imm32);
4155}
4156
4157void Assembler::orq(Register dst, int32_t imm32) {
4158  (void) prefixq_and_encode(dst->encoding());
4159  emit_arith(0x81, 0xC8, dst, imm32);
4160}
4161
4162void Assembler::orq(Register dst, Address src) {
4163  InstructionMark im(this);
4164  prefixq(src, dst);
4165  emit_byte(0x0B);
4166  emit_operand(dst, src);
4167}
4168
4169void Assembler::orq(Register dst, Register src) {
4170  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4171  emit_arith(0x0B, 0xC0, dst, src);
4172}
4173
4174void Assembler::popa() { // 64bit
4175  movq(r15, Address(rsp, 0));
4176  movq(r14, Address(rsp, wordSize));
4177  movq(r13, Address(rsp, 2 * wordSize));
4178  movq(r12, Address(rsp, 3 * wordSize));
4179  movq(r11, Address(rsp, 4 * wordSize));
4180  movq(r10, Address(rsp, 5 * wordSize));
4181  movq(r9,  Address(rsp, 6 * wordSize));
4182  movq(r8,  Address(rsp, 7 * wordSize));
4183  movq(rdi, Address(rsp, 8 * wordSize));
4184  movq(rsi, Address(rsp, 9 * wordSize));
4185  movq(rbp, Address(rsp, 10 * wordSize));
4186  // skip rsp
4187  movq(rbx, Address(rsp, 12 * wordSize));
4188  movq(rdx, Address(rsp, 13 * wordSize));
4189  movq(rcx, Address(rsp, 14 * wordSize));
4190  movq(rax, Address(rsp, 15 * wordSize));
4191
4192  addq(rsp, 16 * wordSize);
4193}
4194
4195void Assembler::popcntq(Register dst, Address src) {
4196  assert(VM_Version::supports_popcnt(), "must support");
4197  InstructionMark im(this);
4198  emit_byte(0xF3);
4199  prefixq(src, dst);
4200  emit_byte(0x0F);
4201  emit_byte(0xB8);
4202  emit_operand(dst, src);
4203}
4204
4205void Assembler::popcntq(Register dst, Register src) {
4206  assert(VM_Version::supports_popcnt(), "must support");
4207  emit_byte(0xF3);
4208  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4209  emit_byte(0x0F);
4210  emit_byte(0xB8);
4211  emit_byte(0xC0 | encode);
4212}
4213
4214void Assembler::popq(Address dst) {
4215  InstructionMark im(this);
4216  prefixq(dst);
4217  emit_byte(0x8F);
4218  emit_operand(rax, dst);
4219}
4220
4221void Assembler::pusha() { // 64bit
4222  // we have to store original rsp.  ABI says that 128 bytes
4223  // below rsp are local scratch.
4224  movq(Address(rsp, -5 * wordSize), rsp);
4225
4226  subq(rsp, 16 * wordSize);
4227
4228  movq(Address(rsp, 15 * wordSize), rax);
4229  movq(Address(rsp, 14 * wordSize), rcx);
4230  movq(Address(rsp, 13 * wordSize), rdx);
4231  movq(Address(rsp, 12 * wordSize), rbx);
4232  // skip rsp
4233  movq(Address(rsp, 10 * wordSize), rbp);
4234  movq(Address(rsp, 9 * wordSize), rsi);
4235  movq(Address(rsp, 8 * wordSize), rdi);
4236  movq(Address(rsp, 7 * wordSize), r8);
4237  movq(Address(rsp, 6 * wordSize), r9);
4238  movq(Address(rsp, 5 * wordSize), r10);
4239  movq(Address(rsp, 4 * wordSize), r11);
4240  movq(Address(rsp, 3 * wordSize), r12);
4241  movq(Address(rsp, 2 * wordSize), r13);
4242  movq(Address(rsp, wordSize), r14);
4243  movq(Address(rsp, 0), r15);
4244}
4245
4246void Assembler::pushq(Address src) {
4247  InstructionMark im(this);
4248  prefixq(src);
4249  emit_byte(0xFF);
4250  emit_operand(rsi, src);
4251}
4252
4253void Assembler::rclq(Register dst, int imm8) {
4254  assert(isShiftCount(imm8 >> 1), "illegal shift count");
4255  int encode = prefixq_and_encode(dst->encoding());
4256  if (imm8 == 1) {
4257    emit_byte(0xD1);
4258    emit_byte(0xD0 | encode);
4259  } else {
4260    emit_byte(0xC1);
4261    emit_byte(0xD0 | encode);
4262    emit_byte(imm8);
4263  }
4264}
4265void Assembler::sarq(Register dst, int imm8) {
4266  assert(isShiftCount(imm8 >> 1), "illegal shift count");
4267  int encode = prefixq_and_encode(dst->encoding());
4268  if (imm8 == 1) {
4269    emit_byte(0xD1);
4270    emit_byte(0xF8 | encode);
4271  } else {
4272    emit_byte(0xC1);
4273    emit_byte(0xF8 | encode);
4274    emit_byte(imm8);
4275  }
4276}
4277
4278void Assembler::sarq(Register dst) {
4279  int encode = prefixq_and_encode(dst->encoding());
4280  emit_byte(0xD3);
4281  emit_byte(0xF8 | encode);
4282}
4283void Assembler::sbbq(Address dst, int32_t imm32) {
4284  InstructionMark im(this);
4285  prefixq(dst);
4286  emit_arith_operand(0x81, rbx, dst, imm32);
4287}
4288
4289void Assembler::sbbq(Register dst, int32_t imm32) {
4290  (void) prefixq_and_encode(dst->encoding());
4291  emit_arith(0x81, 0xD8, dst, imm32);
4292}
4293
4294void Assembler::sbbq(Register dst, Address src) {
4295  InstructionMark im(this);
4296  prefixq(src, dst);
4297  emit_byte(0x1B);
4298  emit_operand(dst, src);
4299}
4300
4301void Assembler::sbbq(Register dst, Register src) {
4302  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4303  emit_arith(0x1B, 0xC0, dst, src);
4304}
4305
4306void Assembler::shlq(Register dst, int imm8) {
4307  assert(isShiftCount(imm8 >> 1), "illegal shift count");
4308  int encode = prefixq_and_encode(dst->encoding());
4309  if (imm8 == 1) {
4310    emit_byte(0xD1);
4311    emit_byte(0xE0 | encode);
4312  } else {
4313    emit_byte(0xC1);
4314    emit_byte(0xE0 | encode);
4315    emit_byte(imm8);
4316  }
4317}
4318
4319void Assembler::shlq(Register dst) {
4320  int encode = prefixq_and_encode(dst->encoding());
4321  emit_byte(0xD3);
4322  emit_byte(0xE0 | encode);
4323}
4324
4325void Assembler::shrq(Register dst, int imm8) {
4326  assert(isShiftCount(imm8 >> 1), "illegal shift count");
4327  int encode = prefixq_and_encode(dst->encoding());
4328  emit_byte(0xC1);
4329  emit_byte(0xE8 | encode);
4330  emit_byte(imm8);
4331}
4332
4333void Assembler::shrq(Register dst) {
4334  int encode = prefixq_and_encode(dst->encoding());
4335  emit_byte(0xD3);
4336  emit_byte(0xE8 | encode);
4337}
4338
4339void Assembler::sqrtsd(XMMRegister dst, Address src) {
4340  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4341  InstructionMark im(this);
4342  emit_byte(0xF2);
4343  prefix(src, dst);
4344  emit_byte(0x0F);
4345  emit_byte(0x51);
4346  emit_operand(dst, src);
4347}
4348
4349void Assembler::subq(Address dst, int32_t imm32) {
4350  InstructionMark im(this);
4351  prefixq(dst);
4352  if (is8bit(imm32)) {
4353    emit_byte(0x83);
4354    emit_operand(rbp, dst, 1);
4355    emit_byte(imm32 & 0xFF);
4356  } else {
4357    emit_byte(0x81);
4358    emit_operand(rbp, dst, 4);
4359    emit_long(imm32);
4360  }
4361}
4362
4363void Assembler::subq(Register dst, int32_t imm32) {
4364  (void) prefixq_and_encode(dst->encoding());
4365  emit_arith(0x81, 0xE8, dst, imm32);
4366}
4367
4368void Assembler::subq(Address dst, Register src) {
4369  InstructionMark im(this);
4370  prefixq(dst, src);
4371  emit_byte(0x29);
4372  emit_operand(src, dst);
4373}
4374
4375void Assembler::subq(Register dst, Address src) {
4376  InstructionMark im(this);
4377  prefixq(src, dst);
4378  emit_byte(0x2B);
4379  emit_operand(dst, src);
4380}
4381
4382void Assembler::subq(Register dst, Register src) {
4383  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4384  emit_arith(0x2B, 0xC0, dst, src);
4385}
4386
4387void Assembler::testq(Register dst, int32_t imm32) {
4388  // not using emit_arith because test
4389  // doesn't support sign-extension of
4390  // 8bit operands
4391  int encode = dst->encoding();
4392  if (encode == 0) {
4393    prefix(REX_W);
4394    emit_byte(0xA9);
4395  } else {
4396    encode = prefixq_and_encode(encode);
4397    emit_byte(0xF7);
4398    emit_byte(0xC0 | encode);
4399  }
4400  emit_long(imm32);
4401}
4402
4403void Assembler::testq(Register dst, Register src) {
4404  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4405  emit_arith(0x85, 0xC0, dst, src);
4406}
4407
4408void Assembler::xaddq(Address dst, Register src) {
4409  InstructionMark im(this);
4410  prefixq(dst, src);
4411  emit_byte(0x0F);
4412  emit_byte(0xC1);
4413  emit_operand(src, dst);
4414}
4415
4416void Assembler::xchgq(Register dst, Address src) {
4417  InstructionMark im(this);
4418  prefixq(src, dst);
4419  emit_byte(0x87);
4420  emit_operand(dst, src);
4421}
4422
4423void Assembler::xchgq(Register dst, Register src) {
4424  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4425  emit_byte(0x87);
4426  emit_byte(0xc0 | encode);
4427}
4428
4429void Assembler::xorq(Register dst, Register src) {
4430  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4431  emit_arith(0x33, 0xC0, dst, src);
4432}
4433
4434void Assembler::xorq(Register dst, Address src) {
4435  InstructionMark im(this);
4436  prefixq(src, dst);
4437  emit_byte(0x33);
4438  emit_operand(dst, src);
4439}
4440
4441#endif // !LP64
4442
4443static Assembler::Condition reverse[] = {
4444    Assembler::noOverflow     /* overflow      = 0x0 */ ,
4445    Assembler::overflow       /* noOverflow    = 0x1 */ ,
4446    Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
4447    Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,
4448    Assembler::notZero        /* zero          = 0x4, equal         = 0x4 */ ,
4449    Assembler::zero           /* notZero       = 0x5, notEqual      = 0x5 */ ,
4450    Assembler::above          /* belowEqual    = 0x6 */ ,
4451    Assembler::belowEqual     /* above         = 0x7 */ ,
4452    Assembler::positive       /* negative      = 0x8 */ ,
4453    Assembler::negative       /* positive      = 0x9 */ ,
4454    Assembler::noParity       /* parity        = 0xa */ ,
4455    Assembler::parity         /* noParity      = 0xb */ ,
4456    Assembler::greaterEqual   /* less          = 0xc */ ,
4457    Assembler::less           /* greaterEqual  = 0xd */ ,
4458    Assembler::greater        /* lessEqual     = 0xe */ ,
4459    Assembler::lessEqual      /* greater       = 0xf, */
4460
4461};
4462
4463
4464// Implementation of MacroAssembler
4465
4466// First all the versions that have distinct versions depending on 32/64 bit
4467// Unless the difference is trivial (1 line or so).
4468
4469#ifndef _LP64
4470
4471// 32bit versions
4472
4473Address MacroAssembler::as_Address(AddressLiteral adr) {
4474  return Address(adr.target(), adr.rspec());
4475}
4476
4477Address MacroAssembler::as_Address(ArrayAddress adr) {
4478  return Address::make_array(adr);
4479}
4480
4481int MacroAssembler::biased_locking_enter(Register lock_reg,
4482                                         Register obj_reg,
4483                                         Register swap_reg,
4484                                         Register tmp_reg,
4485                                         bool swap_reg_contains_mark,
4486                                         Label& done,
4487                                         Label* slow_case,
4488                                         BiasedLockingCounters* counters) {
4489  assert(UseBiasedLocking, "why call this otherwise?");
4490  assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
4491  assert_different_registers(lock_reg, obj_reg, swap_reg);
4492
4493  if (PrintBiasedLockingStatistics && counters == NULL)
4494    counters = BiasedLocking::counters();
4495
4496  bool need_tmp_reg = false;
4497  if (tmp_reg == noreg) {
4498    need_tmp_reg = true;
4499    tmp_reg = lock_reg;
4500  } else {
4501    assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4502  }
4503  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4504  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
4505  Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
4506  Address saved_mark_addr(lock_reg, 0);
4507
4508  // Biased locking
4509  // See whether the lock is currently biased toward our thread and
4510  // whether the epoch is still valid
4511  // Note that the runtime guarantees sufficient alignment of JavaThread
4512  // pointers to allow age to be placed into low bits
4513  // First check to see whether biasing is even enabled for this object
4514  Label cas_label;
4515  int null_check_offset = -1;
4516  if (!swap_reg_contains_mark) {
4517    null_check_offset = offset();
4518    movl(swap_reg, mark_addr);
4519  }
4520  if (need_tmp_reg) {
4521    push(tmp_reg);
4522  }
4523  movl(tmp_reg, swap_reg);
4524  andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4525  cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
4526  if (need_tmp_reg) {
4527    pop(tmp_reg);
4528  }
4529  jcc(Assembler::notEqual, cas_label);
4530  // The bias pattern is present in the object's header. Need to check
4531  // whether the bias owner and the epoch are both still current.
4532  // Note that because there is no current thread register on x86 we
4533  // need to store off the mark word we read out of the object to
4534  // avoid reloading it and needing to recheck invariants below. This
4535  // store is unfortunate but it makes the overall code shorter and
4536  // simpler.
4537  movl(saved_mark_addr, swap_reg);
4538  if (need_tmp_reg) {
4539    push(tmp_reg);
4540  }
4541  get_thread(tmp_reg);
4542  xorl(swap_reg, tmp_reg);
4543  if (swap_reg_contains_mark) {
4544    null_check_offset = offset();
4545  }
4546  movl(tmp_reg, klass_addr);
4547  xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4548  andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
4549  if (need_tmp_reg) {
4550    pop(tmp_reg);
4551  }
4552  if (counters != NULL) {
4553    cond_inc32(Assembler::zero,
4554               ExternalAddress((address)counters->biased_lock_entry_count_addr()));
4555  }
4556  jcc(Assembler::equal, done);
4557
4558  Label try_revoke_bias;
4559  Label try_rebias;
4560
4561  // At this point we know that the header has the bias pattern and
4562  // that we are not the bias owner in the current epoch. We need to
4563  // figure out more details about the state of the header in order to
4564  // know what operations can be legally performed on the object's
4565  // header.
4566
4567  // If the low three bits in the xor result aren't clear, that means
4568  // the prototype header is no longer biased and we have to revoke
4569  // the bias on this object.
4570  testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
4571  jcc(Assembler::notZero, try_revoke_bias);
4572
4573  // Biasing is still enabled for this data type. See whether the
4574  // epoch of the current bias is still valid, meaning that the epoch
4575  // bits of the mark word are equal to the epoch bits of the
4576  // prototype header. (Note that the prototype header's epoch bits
4577  // only change at a safepoint.) If not, attempt to rebias the object
4578  // toward the current thread. Note that we must be absolutely sure
4579  // that the current epoch is invalid in order to do this because
4580  // otherwise the manipulations it performs on the mark word are
4581  // illegal.
4582  testl(swap_reg, markOopDesc::epoch_mask_in_place);
4583  jcc(Assembler::notZero, try_rebias);
4584
4585  // The epoch of the current bias is still valid but we know nothing
4586  // about the owner; it might be set or it might be clear. Try to
4587  // acquire the bias of the object using an atomic operation. If this
4588  // fails we will go in to the runtime to revoke the object's bias.
4589  // Note that we first construct the presumed unbiased header so we
4590  // don't accidentally blow away another thread's valid bias.
4591  movl(swap_reg, saved_mark_addr);
4592  andl(swap_reg,
4593       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
4594  if (need_tmp_reg) {
4595    push(tmp_reg);
4596  }
4597  get_thread(tmp_reg);
4598  orl(tmp_reg, swap_reg);
4599  if (os::is_MP()) {
4600    lock();
4601  }
4602  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4603  if (need_tmp_reg) {
4604    pop(tmp_reg);
4605  }
4606  // If the biasing toward our thread failed, this means that
4607  // another thread succeeded in biasing it toward itself and we
4608  // need to revoke that bias. The revocation will occur in the
4609  // interpreter runtime in the slow case.
4610  if (counters != NULL) {
4611    cond_inc32(Assembler::zero,
4612               ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
4613  }
4614  if (slow_case != NULL) {
4615    jcc(Assembler::notZero, *slow_case);
4616  }
4617  jmp(done);
4618
4619  bind(try_rebias);
4620  // At this point we know the epoch has expired, meaning that the
4621  // current "bias owner", if any, is actually invalid. Under these
4622  // circumstances _only_, we are allowed to use the current header's
4623  // value as the comparison value when doing the cas to acquire the
4624  // bias in the current epoch. In other words, we allow transfer of
4625  // the bias from one thread to another directly in this situation.
4626  //
4627  // FIXME: due to a lack of registers we currently blow away the age
4628  // bits in this situation. Should attempt to preserve them.
4629  if (need_tmp_reg) {
4630    push(tmp_reg);
4631  }
4632  get_thread(tmp_reg);
4633  movl(swap_reg, klass_addr);
4634  orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4635  movl(swap_reg, saved_mark_addr);
4636  if (os::is_MP()) {
4637    lock();
4638  }
4639  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4640  if (need_tmp_reg) {
4641    pop(tmp_reg);
4642  }
4643  // If the biasing toward our thread failed, then another thread
4644  // succeeded in biasing it toward itself and we need to revoke that
4645  // bias. The revocation will occur in the runtime in the slow case.
4646  if (counters != NULL) {
4647    cond_inc32(Assembler::zero,
4648               ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
4649  }
4650  if (slow_case != NULL) {
4651    jcc(Assembler::notZero, *slow_case);
4652  }
4653  jmp(done);
4654
4655  bind(try_revoke_bias);
4656  // The prototype mark in the klass doesn't have the bias bit set any
4657  // more, indicating that objects of this data type are not supposed
4658  // to be biased any more. We are going to try to reset the mark of
4659  // this object to the prototype value and fall through to the
4660  // CAS-based locking scheme. Note that if our CAS fails, it means
4661  // that another thread raced us for the privilege of revoking the
4662  // bias of this particular object, so it's okay to continue in the
4663  // normal locking code.
4664  //
4665  // FIXME: due to a lack of registers we currently blow away the age
4666  // bits in this situation. Should attempt to preserve them.
4667  movl(swap_reg, saved_mark_addr);
4668  if (need_tmp_reg) {
4669    push(tmp_reg);
4670  }
4671  movl(tmp_reg, klass_addr);
4672  movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4673  if (os::is_MP()) {
4674    lock();
4675  }
4676  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4677  if (need_tmp_reg) {
4678    pop(tmp_reg);
4679  }
4680  // Fall through to the normal CAS-based lock, because no matter what
4681  // the result of the above CAS, some thread must have succeeded in
4682  // removing the bias bit from the object's header.
4683  if (counters != NULL) {
4684    cond_inc32(Assembler::zero,
4685               ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
4686  }
4687
4688  bind(cas_label);
4689
4690  return null_check_offset;
4691}
4692void MacroAssembler::call_VM_leaf_base(address entry_point,
4693                                       int number_of_arguments) {
4694  call(RuntimeAddress(entry_point));
4695  increment(rsp, number_of_arguments * wordSize);
4696}
4697
4698void MacroAssembler::cmpoop(Address src1, jobject obj) {
4699  cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4700}
4701
4702void MacroAssembler::cmpoop(Register src1, jobject obj) {
4703  cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4704}
4705
4706void MacroAssembler::extend_sign(Register hi, Register lo) {
4707  // According to Intel Doc. AP-526, "Integer Divide", p.18.
4708  if (VM_Version::is_P6() && hi == rdx && lo == rax) {
4709    cdql();
4710  } else {
4711    movl(hi, lo);
4712    sarl(hi, 31);
4713  }
4714}
4715
4716void MacroAssembler::fat_nop() {
4717  // A 5 byte nop that is safe for patching (see patch_verified_entry)
4718  emit_byte(0x26); // es:
4719  emit_byte(0x2e); // cs:
4720  emit_byte(0x64); // fs:
4721  emit_byte(0x65); // gs:
4722  emit_byte(0x90);
4723}
4724
4725void MacroAssembler::jC2(Register tmp, Label& L) {
4726  // set parity bit if FPU flag C2 is set (via rax)
4727  save_rax(tmp);
4728  fwait(); fnstsw_ax();
4729  sahf();
4730  restore_rax(tmp);
4731  // branch
4732  jcc(Assembler::parity, L);
4733}
4734
4735void MacroAssembler::jnC2(Register tmp, Label& L) {
4736  // set parity bit if FPU flag C2 is set (via rax)
4737  save_rax(tmp);
4738  fwait(); fnstsw_ax();
4739  sahf();
4740  restore_rax(tmp);
4741  // branch
4742  jcc(Assembler::noParity, L);
4743}
4744
4745// 32bit can do a case table jump in one instruction but we no longer allow the base
4746// to be installed in the Address class
4747void MacroAssembler::jump(ArrayAddress entry) {
4748  jmp(as_Address(entry));
4749}
4750
4751// Note: y_lo will be destroyed
4752void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
4753  // Long compare for Java (semantics as described in JVM spec.)
4754  Label high, low, done;
4755
4756  cmpl(x_hi, y_hi);
4757  jcc(Assembler::less, low);
4758  jcc(Assembler::greater, high);
4759  // x_hi is the return register
4760  xorl(x_hi, x_hi);
4761  cmpl(x_lo, y_lo);
4762  jcc(Assembler::below, low);
4763  jcc(Assembler::equal, done);
4764
4765  bind(high);
4766  xorl(x_hi, x_hi);
4767  increment(x_hi);
4768  jmp(done);
4769
4770  bind(low);
4771  xorl(x_hi, x_hi);
4772  decrementl(x_hi);
4773
4774  bind(done);
4775}
4776
4777void MacroAssembler::lea(Register dst, AddressLiteral src) {
4778    mov_literal32(dst, (int32_t)src.target(), src.rspec());
4779}
4780
4781void MacroAssembler::lea(Address dst, AddressLiteral adr) {
4782  // leal(dst, as_Address(adr));
4783  // see note in movl as to why we must use a move
4784  mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
4785}
4786
4787void MacroAssembler::leave() {
4788  mov(rsp, rbp);
4789  pop(rbp);
4790}
4791
4792void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
4793  // Multiplication of two Java long values stored on the stack
4794  // as illustrated below. Result is in rdx:rax.
4795  //
4796  // rsp ---> [  ??  ] \               \
4797  //            ....    | y_rsp_offset  |
4798  //          [ y_lo ] /  (in bytes)    | x_rsp_offset
4799  //          [ y_hi ]                  | (in bytes)
4800  //            ....                    |
4801  //          [ x_lo ]                 /
4802  //          [ x_hi ]
4803  //            ....
4804  //
4805  // Basic idea: lo(result) = lo(x_lo * y_lo)
4806  //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
4807  Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
4808  Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
4809  Label quick;
4810  // load x_hi, y_hi and check if quick
4811  // multiplication is possible
4812  movl(rbx, x_hi);
4813  movl(rcx, y_hi);
4814  movl(rax, rbx);
4815  orl(rbx, rcx);                                 // rbx, = 0 <=> x_hi = 0 and y_hi = 0
4816  jcc(Assembler::zero, quick);                   // if rbx, = 0 do quick multiply
4817  // do full multiplication
4818  // 1st step
4819  mull(y_lo);                                    // x_hi * y_lo
4820  movl(rbx, rax);                                // save lo(x_hi * y_lo) in rbx,
4821  // 2nd step
4822  movl(rax, x_lo);
4823  mull(rcx);                                     // x_lo * y_hi
4824  addl(rbx, rax);                                // add lo(x_lo * y_hi) to rbx,
4825  // 3rd step
4826  bind(quick);                                   // note: rbx, = 0 if quick multiply!
4827  movl(rax, x_lo);
4828  mull(y_lo);                                    // x_lo * y_lo
4829  addl(rdx, rbx);                                // correct hi(x_lo * y_lo)
4830}
4831
4832void MacroAssembler::lneg(Register hi, Register lo) {
4833  negl(lo);
4834  adcl(hi, 0);
4835  negl(hi);
4836}
4837
4838void MacroAssembler::lshl(Register hi, Register lo) {
4839  // Java shift left long support (semantics as described in JVM spec., p.305)
4840  // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
4841  // shift value is in rcx !
4842  assert(hi != rcx, "must not use rcx");
4843  assert(lo != rcx, "must not use rcx");
4844  const Register s = rcx;                        // shift count
4845  const int      n = BitsPerWord;
4846  Label L;
4847  andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
4848  cmpl(s, n);                                    // if (s < n)
4849  jcc(Assembler::less, L);                       // else (s >= n)
4850  movl(hi, lo);                                  // x := x << n
4851  xorl(lo, lo);
4852  // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4853  bind(L);                                       // s (mod n) < n
4854  shldl(hi, lo);                                 // x := x << s
4855  shll(lo);
4856}
4857
4858
4859void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
4860  // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
4861  // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
4862  assert(hi != rcx, "must not use rcx");
4863  assert(lo != rcx, "must not use rcx");
4864  const Register s = rcx;                        // shift count
4865  const int      n = BitsPerWord;
4866  Label L;
4867  andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
4868  cmpl(s, n);                                    // if (s < n)
4869  jcc(Assembler::less, L);                       // else (s >= n)
4870  movl(lo, hi);                                  // x := x >> n
4871  if (sign_extension) sarl(hi, 31);
4872  else                xorl(hi, hi);
4873  // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4874  bind(L);                                       // s (mod n) < n
4875  shrdl(lo, hi);                                 // x := x >> s
4876  if (sign_extension) sarl(hi);
4877  else                shrl(hi);
4878}
4879
4880void MacroAssembler::movoop(Register dst, jobject obj) {
4881  mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4882}
4883
4884void MacroAssembler::movoop(Address dst, jobject obj) {
4885  mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4886}
4887
4888void MacroAssembler::movptr(Register dst, AddressLiteral src) {
4889  if (src.is_lval()) {
4890    mov_literal32(dst, (intptr_t)src.target(), src.rspec());
4891  } else {
4892    movl(dst, as_Address(src));
4893  }
4894}
4895
4896void MacroAssembler::movptr(ArrayAddress dst, Register src) {
4897  movl(as_Address(dst), src);
4898}
4899
4900void MacroAssembler::movptr(Register dst, ArrayAddress src) {
4901  movl(dst, as_Address(src));
4902}
4903
4904// src should NEVER be a real pointer. Use AddressLiteral for true pointers
4905void MacroAssembler::movptr(Address dst, intptr_t src) {
4906  movl(dst, src);
4907}
4908
4909
4910void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
4911  movsd(dst, as_Address(src));
4912}
4913
4914void MacroAssembler::pop_callee_saved_registers() {
4915  pop(rcx);
4916  pop(rdx);
4917  pop(rdi);
4918  pop(rsi);
4919}
4920
4921void MacroAssembler::pop_fTOS() {
4922  fld_d(Address(rsp, 0));
4923  addl(rsp, 2 * wordSize);
4924}
4925
4926void MacroAssembler::push_callee_saved_registers() {
4927  push(rsi);
4928  push(rdi);
4929  push(rdx);
4930  push(rcx);
4931}
4932
4933void MacroAssembler::push_fTOS() {
4934  subl(rsp, 2 * wordSize);
4935  fstp_d(Address(rsp, 0));
4936}
4937
4938
4939void MacroAssembler::pushoop(jobject obj) {
4940  push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
4941}
4942
4943
4944void MacroAssembler::pushptr(AddressLiteral src) {
4945  if (src.is_lval()) {
4946    push_literal32((int32_t)src.target(), src.rspec());
4947  } else {
4948    pushl(as_Address(src));
4949  }
4950}
4951
4952void MacroAssembler::set_word_if_not_zero(Register dst) {
4953  xorl(dst, dst);
4954  set_byte_if_not_zero(dst);
4955}
4956
4957static void pass_arg0(MacroAssembler* masm, Register arg) {
4958  masm->push(arg);
4959}
4960
4961static void pass_arg1(MacroAssembler* masm, Register arg) {
4962  masm->push(arg);
4963}
4964
4965static void pass_arg2(MacroAssembler* masm, Register arg) {
4966  masm->push(arg);
4967}
4968
4969static void pass_arg3(MacroAssembler* masm, Register arg) {
4970  masm->push(arg);
4971}
4972
4973#ifndef PRODUCT
4974extern "C" void findpc(intptr_t x);
4975#endif
4976
4977void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
4978  // In order to get locks to work, we need to fake a in_VM state
4979  JavaThread* thread = JavaThread::current();
4980  JavaThreadState saved_state = thread->thread_state();
4981  thread->set_thread_state(_thread_in_vm);
4982  if (ShowMessageBoxOnError) {
4983    JavaThread* thread = JavaThread::current();
4984    JavaThreadState saved_state = thread->thread_state();
4985    thread->set_thread_state(_thread_in_vm);
4986    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
4987      ttyLocker ttyl;
4988      BytecodeCounter::print();
4989    }
4990    // To see where a verify_oop failed, get $ebx+40/X for this frame.
4991    // This is the value of eip which points to where verify_oop will return.
4992    if (os::message_box(msg, "Execution stopped, print registers?")) {
4993      ttyLocker ttyl;
4994      tty->print_cr("eip = 0x%08x", eip);
4995#ifndef PRODUCT
4996      tty->cr();
4997      findpc(eip);
4998      tty->cr();
4999#endif
5000      tty->print_cr("rax, = 0x%08x", rax);
5001      tty->print_cr("rbx, = 0x%08x", rbx);
5002      tty->print_cr("rcx = 0x%08x", rcx);
5003      tty->print_cr("rdx = 0x%08x", rdx);
5004      tty->print_cr("rdi = 0x%08x", rdi);
5005      tty->print_cr("rsi = 0x%08x", rsi);
5006      tty->print_cr("rbp, = 0x%08x", rbp);
5007      tty->print_cr("rsp = 0x%08x", rsp);
5008      BREAKPOINT;
5009    }
5010  } else {
5011    ttyLocker ttyl;
5012    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
5013    assert(false, "DEBUG MESSAGE");
5014  }
5015  ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5016}
5017
5018void MacroAssembler::stop(const char* msg) {
5019  ExternalAddress message((address)msg);
5020  // push address of message
5021  pushptr(message.addr());
5022  { Label L; call(L, relocInfo::none); bind(L); }     // push eip
5023  pusha();                                           // push registers
5024  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
5025  hlt();
5026}
5027
5028void MacroAssembler::warn(const char* msg) {
5029  push_CPU_state();
5030
5031  ExternalAddress message((address) msg);
5032  // push address of message
5033  pushptr(message.addr());
5034
5035  call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
5036  addl(rsp, wordSize);       // discard argument
5037  pop_CPU_state();
5038}
5039
5040#else // _LP64
5041
5042// 64 bit versions
5043
5044Address MacroAssembler::as_Address(AddressLiteral adr) {
5045  // amd64 always does this as a pc-rel
5046  // we can be absolute or disp based on the instruction type
5047  // jmp/call are displacements others are absolute
5048  assert(!adr.is_lval(), "must be rval");
5049  assert(reachable(adr), "must be");
5050  return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
5051
5052}
5053
5054Address MacroAssembler::as_Address(ArrayAddress adr) {
5055  AddressLiteral base = adr.base();
5056  lea(rscratch1, base);
5057  Address index = adr.index();
5058  assert(index._disp == 0, "must not have disp"); // maybe it can?
5059  Address array(rscratch1, index._index, index._scale, index._disp);
5060  return array;
5061}
5062
5063int MacroAssembler::biased_locking_enter(Register lock_reg,
5064                                         Register obj_reg,
5065                                         Register swap_reg,
5066                                         Register tmp_reg,
5067                                         bool swap_reg_contains_mark,
5068                                         Label& done,
5069                                         Label* slow_case,
5070                                         BiasedLockingCounters* counters) {
5071  assert(UseBiasedLocking, "why call this otherwise?");
5072  assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
5073  assert(tmp_reg != noreg, "tmp_reg must be supplied");
5074  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
5075  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
5076  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
5077  Address saved_mark_addr(lock_reg, 0);
5078
5079  if (PrintBiasedLockingStatistics && counters == NULL)
5080    counters = BiasedLocking::counters();
5081
5082  // Biased locking
5083  // See whether the lock is currently biased toward our thread and
5084  // whether the epoch is still valid
5085  // Note that the runtime guarantees sufficient alignment of JavaThread
5086  // pointers to allow age to be placed into low bits
5087  // First check to see whether biasing is even enabled for this object
5088  Label cas_label;
5089  int null_check_offset = -1;
5090  if (!swap_reg_contains_mark) {
5091    null_check_offset = offset();
5092    movq(swap_reg, mark_addr);
5093  }
5094  movq(tmp_reg, swap_reg);
5095  andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5096  cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
5097  jcc(Assembler::notEqual, cas_label);
5098  // The bias pattern is present in the object's header. Need to check
5099  // whether the bias owner and the epoch are both still current.
5100  load_prototype_header(tmp_reg, obj_reg);
5101  orq(tmp_reg, r15_thread);
5102  xorq(tmp_reg, swap_reg);
5103  andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
5104  if (counters != NULL) {
5105    cond_inc32(Assembler::zero,
5106               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5107  }
5108  jcc(Assembler::equal, done);
5109
5110  Label try_revoke_bias;
5111  Label try_rebias;
5112
5113  // At this point we know that the header has the bias pattern and
5114  // that we are not the bias owner in the current epoch. We need to
5115  // figure out more details about the state of the header in order to
5116  // know what operations can be legally performed on the object's
5117  // header.
5118
5119  // If the low three bits in the xor result aren't clear, that means
5120  // the prototype header is no longer biased and we have to revoke
5121  // the bias on this object.
5122  testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5123  jcc(Assembler::notZero, try_revoke_bias);
5124
5125  // Biasing is still enabled for this data type. See whether the
5126  // epoch of the current bias is still valid, meaning that the epoch
5127  // bits of the mark word are equal to the epoch bits of the
5128  // prototype header. (Note that the prototype header's epoch bits
5129  // only change at a safepoint.) If not, attempt to rebias the object
5130  // toward the current thread. Note that we must be absolutely sure
5131  // that the current epoch is invalid in order to do this because
5132  // otherwise the manipulations it performs on the mark word are
5133  // illegal.
5134  testq(tmp_reg, markOopDesc::epoch_mask_in_place);
5135  jcc(Assembler::notZero, try_rebias);
5136
5137  // The epoch of the current bias is still valid but we know nothing
5138  // about the owner; it might be set or it might be clear. Try to
5139  // acquire the bias of the object using an atomic operation. If this
5140  // fails we will go in to the runtime to revoke the object's bias.
5141  // Note that we first construct the presumed unbiased header so we
5142  // don't accidentally blow away another thread's valid bias.
5143  andq(swap_reg,
5144       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
5145  movq(tmp_reg, swap_reg);
5146  orq(tmp_reg, r15_thread);
5147  if (os::is_MP()) {
5148    lock();
5149  }
5150  cmpxchgq(tmp_reg, Address(obj_reg, 0));
5151  // If the biasing toward our thread failed, this means that
5152  // another thread succeeded in biasing it toward itself and we
5153  // need to revoke that bias. The revocation will occur in the
5154  // interpreter runtime in the slow case.
5155  if (counters != NULL) {
5156    cond_inc32(Assembler::zero,
5157               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5158  }
5159  if (slow_case != NULL) {
5160    jcc(Assembler::notZero, *slow_case);
5161  }
5162  jmp(done);
5163
5164  bind(try_rebias);
5165  // At this point we know the epoch has expired, meaning that the
5166  // current "bias owner", if any, is actually invalid. Under these
5167  // circumstances _only_, we are allowed to use the current header's
5168  // value as the comparison value when doing the cas to acquire the
5169  // bias in the current epoch. In other words, we allow transfer of
5170  // the bias from one thread to another directly in this situation.
5171  //
5172  // FIXME: due to a lack of registers we currently blow away the age
5173  // bits in this situation. Should attempt to preserve them.
5174  load_prototype_header(tmp_reg, obj_reg);
5175  orq(tmp_reg, r15_thread);
5176  if (os::is_MP()) {
5177    lock();
5178  }
5179  cmpxchgq(tmp_reg, Address(obj_reg, 0));
5180  // If the biasing toward our thread failed, then another thread
5181  // succeeded in biasing it toward itself and we need to revoke that
5182  // bias. The revocation will occur in the runtime in the slow case.
5183  if (counters != NULL) {
5184    cond_inc32(Assembler::zero,
5185               ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
5186  }
5187  if (slow_case != NULL) {
5188    jcc(Assembler::notZero, *slow_case);
5189  }
5190  jmp(done);
5191
5192  bind(try_revoke_bias);
5193  // The prototype mark in the klass doesn't have the bias bit set any
5194  // more, indicating that objects of this data type are not supposed
5195  // to be biased any more. We are going to try to reset the mark of
5196  // this object to the prototype value and fall through to the
5197  // CAS-based locking scheme. Note that if our CAS fails, it means
5198  // that another thread raced us for the privilege of revoking the
5199  // bias of this particular object, so it's okay to continue in the
5200  // normal locking code.
5201  //
5202  // FIXME: due to a lack of registers we currently blow away the age
5203  // bits in this situation. Should attempt to preserve them.
5204  load_prototype_header(tmp_reg, obj_reg);
5205  if (os::is_MP()) {
5206    lock();
5207  }
5208  cmpxchgq(tmp_reg, Address(obj_reg, 0));
5209  // Fall through to the normal CAS-based lock, because no matter what
5210  // the result of the above CAS, some thread must have succeeded in
5211  // removing the bias bit from the object's header.
5212  if (counters != NULL) {
5213    cond_inc32(Assembler::zero,
5214               ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
5215  }
5216
5217  bind(cas_label);
5218
5219  return null_check_offset;
5220}
5221
5222void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
5223  Label L, E;
5224
5225#ifdef _WIN64
5226  // Windows always allocates space for it's register args
5227  assert(num_args <= 4, "only register arguments supported");
5228  subq(rsp,  frame::arg_reg_save_area_bytes);
5229#endif
5230
5231  // Align stack if necessary
5232  testl(rsp, 15);
5233  jcc(Assembler::zero, L);
5234
5235  subq(rsp, 8);
5236  {
5237    call(RuntimeAddress(entry_point));
5238  }
5239  addq(rsp, 8);
5240  jmp(E);
5241
5242  bind(L);
5243  {
5244    call(RuntimeAddress(entry_point));
5245  }
5246
5247  bind(E);
5248
5249#ifdef _WIN64
5250  // restore stack pointer
5251  addq(rsp, frame::arg_reg_save_area_bytes);
5252#endif
5253
5254}
5255
5256void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
5257  assert(!src2.is_lval(), "should use cmpptr");
5258
5259  if (reachable(src2)) {
5260    cmpq(src1, as_Address(src2));
5261  } else {
5262    lea(rscratch1, src2);
5263    Assembler::cmpq(src1, Address(rscratch1, 0));
5264  }
5265}
5266
5267int MacroAssembler::corrected_idivq(Register reg) {
5268  // Full implementation of Java ldiv and lrem; checks for special
5269  // case as described in JVM spec., p.243 & p.271.  The function
5270  // returns the (pc) offset of the idivl instruction - may be needed
5271  // for implicit exceptions.
5272  //
5273  //         normal case                           special case
5274  //
5275  // input : rax: dividend                         min_long
5276  //         reg: divisor   (may not be eax/edx)   -1
5277  //
5278  // output: rax: quotient  (= rax idiv reg)       min_long
5279  //         rdx: remainder (= rax irem reg)       0
5280  assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
5281  static const int64_t min_long = 0x8000000000000000;
5282  Label normal_case, special_case;
5283
5284  // check for special case
5285  cmp64(rax, ExternalAddress((address) &min_long));
5286  jcc(Assembler::notEqual, normal_case);
5287  xorl(rdx, rdx); // prepare rdx for possible special case (where
5288                  // remainder = 0)
5289  cmpq(reg, -1);
5290  jcc(Assembler::equal, special_case);
5291
5292  // handle normal case
5293  bind(normal_case);
5294  cdqq();
5295  int idivq_offset = offset();
5296  idivq(reg);
5297
5298  // normal and special case exit
5299  bind(special_case);
5300
5301  return idivq_offset;
5302}
5303
5304void MacroAssembler::decrementq(Register reg, int value) {
5305  if (value == min_jint) { subq(reg, value); return; }
5306  if (value <  0) { incrementq(reg, -value); return; }
5307  if (value == 0) {                        ; return; }
5308  if (value == 1 && UseIncDec) { decq(reg) ; return; }
5309  /* else */      { subq(reg, value)       ; return; }
5310}
5311
5312void MacroAssembler::decrementq(Address dst, int value) {
5313  if (value == min_jint) { subq(dst, value); return; }
5314  if (value <  0) { incrementq(dst, -value); return; }
5315  if (value == 0) {                        ; return; }
5316  if (value == 1 && UseIncDec) { decq(dst) ; return; }
5317  /* else */      { subq(dst, value)       ; return; }
5318}
5319
5320void MacroAssembler::fat_nop() {
5321  // A 5 byte nop that is safe for patching (see patch_verified_entry)
5322  // Recommened sequence from 'Software Optimization Guide for the AMD
5323  // Hammer Processor'
5324  emit_byte(0x66);
5325  emit_byte(0x66);
5326  emit_byte(0x90);
5327  emit_byte(0x66);
5328  emit_byte(0x90);
5329}
5330
5331void MacroAssembler::incrementq(Register reg, int value) {
5332  if (value == min_jint) { addq(reg, value); return; }
5333  if (value <  0) { decrementq(reg, -value); return; }
5334  if (value == 0) {                        ; return; }
5335  if (value == 1 && UseIncDec) { incq(reg) ; return; }
5336  /* else */      { addq(reg, value)       ; return; }
5337}
5338
5339void MacroAssembler::incrementq(Address dst, int value) {
5340  if (value == min_jint) { addq(dst, value); return; }
5341  if (value <  0) { decrementq(dst, -value); return; }
5342  if (value == 0) {                        ; return; }
5343  if (value == 1 && UseIncDec) { incq(dst) ; return; }
5344  /* else */      { addq(dst, value)       ; return; }
5345}
5346
5347// 32bit can do a case table jump in one instruction but we no longer allow the base
5348// to be installed in the Address class
5349void MacroAssembler::jump(ArrayAddress entry) {
5350  lea(rscratch1, entry.base());
5351  Address dispatch = entry.index();
5352  assert(dispatch._base == noreg, "must be");
5353  dispatch._base = rscratch1;
5354  jmp(dispatch);
5355}
5356
5357void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
5358  ShouldNotReachHere(); // 64bit doesn't use two regs
5359  cmpq(x_lo, y_lo);
5360}
5361
5362void MacroAssembler::lea(Register dst, AddressLiteral src) {
5363    mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5364}
5365
5366void MacroAssembler::lea(Address dst, AddressLiteral adr) {
5367  mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
5368  movptr(dst, rscratch1);
5369}
5370
5371void MacroAssembler::leave() {
5372  // %%% is this really better? Why not on 32bit too?
5373  emit_byte(0xC9); // LEAVE
5374}
5375
5376void MacroAssembler::lneg(Register hi, Register lo) {
5377  ShouldNotReachHere(); // 64bit doesn't use two regs
5378  negq(lo);
5379}
5380
5381void MacroAssembler::movoop(Register dst, jobject obj) {
5382  mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5383}
5384
5385void MacroAssembler::movoop(Address dst, jobject obj) {
5386  mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5387  movq(dst, rscratch1);
5388}
5389
5390void MacroAssembler::movptr(Register dst, AddressLiteral src) {
5391  if (src.is_lval()) {
5392    mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5393  } else {
5394    if (reachable(src)) {
5395      movq(dst, as_Address(src));
5396    } else {
5397      lea(rscratch1, src);
5398      movq(dst, Address(rscratch1,0));
5399    }
5400  }
5401}
5402
5403void MacroAssembler::movptr(ArrayAddress dst, Register src) {
5404  movq(as_Address(dst), src);
5405}
5406
5407void MacroAssembler::movptr(Register dst, ArrayAddress src) {
5408  movq(dst, as_Address(src));
5409}
5410
5411// src should NEVER be a real pointer. Use AddressLiteral for true pointers
5412void MacroAssembler::movptr(Address dst, intptr_t src) {
5413  mov64(rscratch1, src);
5414  movq(dst, rscratch1);
5415}
5416
5417// These are mostly for initializing NULL
5418void MacroAssembler::movptr(Address dst, int32_t src) {
5419  movslq(dst, src);
5420}
5421
5422void MacroAssembler::movptr(Register dst, int32_t src) {
5423  mov64(dst, (intptr_t)src);
5424}
5425
5426void MacroAssembler::pushoop(jobject obj) {
5427  movoop(rscratch1, obj);
5428  push(rscratch1);
5429}
5430
5431void MacroAssembler::pushptr(AddressLiteral src) {
5432  lea(rscratch1, src);
5433  if (src.is_lval()) {
5434    push(rscratch1);
5435  } else {
5436    pushq(Address(rscratch1, 0));
5437  }
5438}
5439
5440void MacroAssembler::reset_last_Java_frame(bool clear_fp,
5441                                           bool clear_pc) {
5442  // we must set sp to zero to clear frame
5443  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
5444  // must clear fp, so that compiled frames are not confused; it is
5445  // possible that we need it only for debugging
5446  if (clear_fp) {
5447    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
5448  }
5449
5450  if (clear_pc) {
5451    movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
5452  }
5453}
5454
5455void MacroAssembler::set_last_Java_frame(Register last_java_sp,
5456                                         Register last_java_fp,
5457                                         address  last_java_pc) {
5458  // determine last_java_sp register
5459  if (!last_java_sp->is_valid()) {
5460    last_java_sp = rsp;
5461  }
5462
5463  // last_java_fp is optional
5464  if (last_java_fp->is_valid()) {
5465    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
5466           last_java_fp);
5467  }
5468
5469  // last_java_pc is optional
5470  if (last_java_pc != NULL) {
5471    Address java_pc(r15_thread,
5472                    JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
5473    lea(rscratch1, InternalAddress(last_java_pc));
5474    movptr(java_pc, rscratch1);
5475  }
5476
5477  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
5478}
5479
5480static void pass_arg0(MacroAssembler* masm, Register arg) {
5481  if (c_rarg0 != arg ) {
5482    masm->mov(c_rarg0, arg);
5483  }
5484}
5485
5486static void pass_arg1(MacroAssembler* masm, Register arg) {
5487  if (c_rarg1 != arg ) {
5488    masm->mov(c_rarg1, arg);
5489  }
5490}
5491
5492static void pass_arg2(MacroAssembler* masm, Register arg) {
5493  if (c_rarg2 != arg ) {
5494    masm->mov(c_rarg2, arg);
5495  }
5496}
5497
5498static void pass_arg3(MacroAssembler* masm, Register arg) {
5499  if (c_rarg3 != arg ) {
5500    masm->mov(c_rarg3, arg);
5501  }
5502}
5503
5504void MacroAssembler::stop(const char* msg) {
5505  address rip = pc();
5506  pusha(); // get regs on stack
5507  lea(c_rarg0, ExternalAddress((address) msg));
5508  lea(c_rarg1, InternalAddress(rip));
5509  movq(c_rarg2, rsp); // pass pointer to regs array
5510  andq(rsp, -16); // align stack as required by ABI
5511  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
5512  hlt();
5513}
5514
5515void MacroAssembler::warn(const char* msg) {
5516  push(r12);
5517  movq(r12, rsp);
5518  andq(rsp, -16);     // align stack as required by push_CPU_state and call
5519
5520  push_CPU_state();   // keeps alignment at 16 bytes
5521  lea(c_rarg0, ExternalAddress((address) msg));
5522  call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
5523  pop_CPU_state();
5524
5525  movq(rsp, r12);
5526  pop(r12);
5527}
5528
5529#ifndef PRODUCT
5530extern "C" void findpc(intptr_t x);
5531#endif
5532
5533void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
5534  // In order to get locks to work, we need to fake a in_VM state
5535  if (ShowMessageBoxOnError ) {
5536    JavaThread* thread = JavaThread::current();
5537    JavaThreadState saved_state = thread->thread_state();
5538    thread->set_thread_state(_thread_in_vm);
5539#ifndef PRODUCT
5540    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
5541      ttyLocker ttyl;
5542      BytecodeCounter::print();
5543    }
5544#endif
5545    // To see where a verify_oop failed, get $ebx+40/X for this frame.
5546    // XXX correct this offset for amd64
5547    // This is the value of eip which points to where verify_oop will return.
5548    if (os::message_box(msg, "Execution stopped, print registers?")) {
5549      ttyLocker ttyl;
5550      tty->print_cr("rip = 0x%016lx", pc);
5551#ifndef PRODUCT
5552      tty->cr();
5553      findpc(pc);
5554      tty->cr();
5555#endif
5556      tty->print_cr("rax = 0x%016lx", regs[15]);
5557      tty->print_cr("rbx = 0x%016lx", regs[12]);
5558      tty->print_cr("rcx = 0x%016lx", regs[14]);
5559      tty->print_cr("rdx = 0x%016lx", regs[13]);
5560      tty->print_cr("rdi = 0x%016lx", regs[8]);
5561      tty->print_cr("rsi = 0x%016lx", regs[9]);
5562      tty->print_cr("rbp = 0x%016lx", regs[10]);
5563      tty->print_cr("rsp = 0x%016lx", regs[11]);
5564      tty->print_cr("r8  = 0x%016lx", regs[7]);
5565      tty->print_cr("r9  = 0x%016lx", regs[6]);
5566      tty->print_cr("r10 = 0x%016lx", regs[5]);
5567      tty->print_cr("r11 = 0x%016lx", regs[4]);
5568      tty->print_cr("r12 = 0x%016lx", regs[3]);
5569      tty->print_cr("r13 = 0x%016lx", regs[2]);
5570      tty->print_cr("r14 = 0x%016lx", regs[1]);
5571      tty->print_cr("r15 = 0x%016lx", regs[0]);
5572      BREAKPOINT;
5573    }
5574    ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5575  } else {
5576    ttyLocker ttyl;
5577    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
5578                    msg);
5579  }
5580}
5581
5582#endif // _LP64
5583
5584// Now versions that are common to 32/64 bit
5585
5586void MacroAssembler::addptr(Register dst, int32_t imm32) {
5587  LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
5588}
5589
5590void MacroAssembler::addptr(Register dst, Register src) {
5591  LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5592}
5593
5594void MacroAssembler::addptr(Address dst, Register src) {
5595  LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5596}
5597
5598void MacroAssembler::align(int modulus) {
5599  if (offset() % modulus != 0) {
5600    nop(modulus - (offset() % modulus));
5601  }
5602}
5603
5604void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
5605  if (reachable(src)) {
5606    andpd(dst, as_Address(src));
5607  } else {
5608    lea(rscratch1, src);
5609    andpd(dst, Address(rscratch1, 0));
5610  }
5611}
5612
5613void MacroAssembler::andptr(Register dst, int32_t imm32) {
5614  LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
5615}
5616
5617void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
5618  pushf();
5619  if (os::is_MP())
5620    lock();
5621  incrementl(counter_addr);
5622  popf();
5623}
5624
5625// Writes to stack successive pages until offset reached to check for
5626// stack overflow + shadow pages.  This clobbers tmp.
5627void MacroAssembler::bang_stack_size(Register size, Register tmp) {
5628  movptr(tmp, rsp);
5629  // Bang stack for total size given plus shadow page size.
5630  // Bang one page at a time because large size can bang beyond yellow and
5631  // red zones.
5632  Label loop;
5633  bind(loop);
5634  movl(Address(tmp, (-os::vm_page_size())), size );
5635  subptr(tmp, os::vm_page_size());
5636  subl(size, os::vm_page_size());
5637  jcc(Assembler::greater, loop);
5638
5639  // Bang down shadow pages too.
5640  // The -1 because we already subtracted 1 page.
5641  for (int i = 0; i< StackShadowPages-1; i++) {
5642    // this could be any sized move but this is can be a debugging crumb
5643    // so the bigger the better.
5644    movptr(Address(tmp, (-i*os::vm_page_size())), size );
5645  }
5646}
5647
5648void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
5649  assert(UseBiasedLocking, "why call this otherwise?");
5650
5651  // Check for biased locking unlock case, which is a no-op
5652  // Note: we do not have to check the thread ID for two reasons.
5653  // First, the interpreter checks for IllegalMonitorStateException at
5654  // a higher level. Second, if the bias was revoked while we held the
5655  // lock, the object could not be rebiased toward another thread, so
5656  // the bias bit would be clear.
5657  movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
5658  andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
5659  cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
5660  jcc(Assembler::equal, done);
5661}
5662
5663void MacroAssembler::c2bool(Register x) {
5664  // implements x == 0 ? 0 : 1
5665  // note: must only look at least-significant byte of x
5666  //       since C-style booleans are stored in one byte
5667  //       only! (was bug)
5668  andl(x, 0xFF);
5669  setb(Assembler::notZero, x);
5670}
5671
5672// Wouldn't need if AddressLiteral version had new name
5673void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
5674  Assembler::call(L, rtype);
5675}
5676
5677void MacroAssembler::call(Register entry) {
5678  Assembler::call(entry);
5679}
5680
5681void MacroAssembler::call(AddressLiteral entry) {
5682  if (reachable(entry)) {
5683    Assembler::call_literal(entry.target(), entry.rspec());
5684  } else {
5685    lea(rscratch1, entry);
5686    Assembler::call(rscratch1);
5687  }
5688}
5689
5690// Implementation of call_VM versions
5691
5692void MacroAssembler::call_VM(Register oop_result,
5693                             address entry_point,
5694                             bool check_exceptions) {
5695  Label C, E;
5696  call(C, relocInfo::none);
5697  jmp(E);
5698
5699  bind(C);
5700  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
5701  ret(0);
5702
5703  bind(E);
5704}
5705
5706void MacroAssembler::call_VM(Register oop_result,
5707                             address entry_point,
5708                             Register arg_1,
5709                             bool check_exceptions) {
5710  Label C, E;
5711  call(C, relocInfo::none);
5712  jmp(E);
5713
5714  bind(C);
5715  pass_arg1(this, arg_1);
5716  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
5717  ret(0);
5718
5719  bind(E);
5720}
5721
5722void MacroAssembler::call_VM(Register oop_result,
5723                             address entry_point,
5724                             Register arg_1,
5725                             Register arg_2,
5726                             bool check_exceptions) {
5727  Label C, E;
5728  call(C, relocInfo::none);
5729  jmp(E);
5730
5731  bind(C);
5732
5733  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5734
5735  pass_arg2(this, arg_2);
5736  pass_arg1(this, arg_1);
5737  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
5738  ret(0);
5739
5740  bind(E);
5741}
5742
5743void MacroAssembler::call_VM(Register oop_result,
5744                             address entry_point,
5745                             Register arg_1,
5746                             Register arg_2,
5747                             Register arg_3,
5748                             bool check_exceptions) {
5749  Label C, E;
5750  call(C, relocInfo::none);
5751  jmp(E);
5752
5753  bind(C);
5754
5755  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5756  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5757  pass_arg3(this, arg_3);
5758
5759  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5760  pass_arg2(this, arg_2);
5761
5762  pass_arg1(this, arg_1);
5763  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
5764  ret(0);
5765
5766  bind(E);
5767}
5768
5769void MacroAssembler::call_VM(Register oop_result,
5770                             Register last_java_sp,
5771                             address entry_point,
5772                             int number_of_arguments,
5773                             bool check_exceptions) {
5774  Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
5775  call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
5776}
5777
5778void MacroAssembler::call_VM(Register oop_result,
5779                             Register last_java_sp,
5780                             address entry_point,
5781                             Register arg_1,
5782                             bool check_exceptions) {
5783  pass_arg1(this, arg_1);
5784  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
5785}
5786
5787void MacroAssembler::call_VM(Register oop_result,
5788                             Register last_java_sp,
5789                             address entry_point,
5790                             Register arg_1,
5791                             Register arg_2,
5792                             bool check_exceptions) {
5793
5794  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5795  pass_arg2(this, arg_2);
5796  pass_arg1(this, arg_1);
5797  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
5798}
5799
5800void MacroAssembler::call_VM(Register oop_result,
5801                             Register last_java_sp,
5802                             address entry_point,
5803                             Register arg_1,
5804                             Register arg_2,
5805                             Register arg_3,
5806                             bool check_exceptions) {
5807  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5808  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5809  pass_arg3(this, arg_3);
5810  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5811  pass_arg2(this, arg_2);
5812  pass_arg1(this, arg_1);
5813  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
5814}
5815
5816void MacroAssembler::call_VM_base(Register oop_result,
5817                                  Register java_thread,
5818                                  Register last_java_sp,
5819                                  address  entry_point,
5820                                  int      number_of_arguments,
5821                                  bool     check_exceptions) {
5822  // determine java_thread register
5823  if (!java_thread->is_valid()) {
5824#ifdef _LP64
5825    java_thread = r15_thread;
5826#else
5827    java_thread = rdi;
5828    get_thread(java_thread);
5829#endif // LP64
5830  }
5831  // determine last_java_sp register
5832  if (!last_java_sp->is_valid()) {
5833    last_java_sp = rsp;
5834  }
5835  // debugging support
5836  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
5837  LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
5838  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
5839  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
5840
5841  // push java thread (becomes first argument of C function)
5842
5843  NOT_LP64(push(java_thread); number_of_arguments++);
5844  LP64_ONLY(mov(c_rarg0, r15_thread));
5845
5846  // set last Java frame before call
5847  assert(last_java_sp != rbp, "can't use ebp/rbp");
5848
5849  // Only interpreter should have to set fp
5850  set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
5851
5852  // do the call, remove parameters
5853  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
5854
5855  // restore the thread (cannot use the pushed argument since arguments
5856  // may be overwritten by C code generated by an optimizing compiler);
5857  // however can use the register value directly if it is callee saved.
5858  if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
5859    // rdi & rsi (also r15) are callee saved -> nothing to do
5860#ifdef ASSERT
5861    guarantee(java_thread != rax, "change this code");
5862    push(rax);
5863    { Label L;
5864      get_thread(rax);
5865      cmpptr(java_thread, rax);
5866      jcc(Assembler::equal, L);
5867      stop("MacroAssembler::call_VM_base: rdi not callee saved?");
5868      bind(L);
5869    }
5870    pop(rax);
5871#endif
5872  } else {
5873    get_thread(java_thread);
5874  }
5875  // reset last Java frame
5876  // Only interpreter should have to clear fp
5877  reset_last_Java_frame(java_thread, true, false);
5878
5879#ifndef CC_INTERP
5880   // C++ interp handles this in the interpreter
5881  check_and_handle_popframe(java_thread);
5882  check_and_handle_earlyret(java_thread);
5883#endif /* CC_INTERP */
5884
5885  if (check_exceptions) {
5886    // check for pending exceptions (java_thread is set upon return)
5887    cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
5888#ifndef _LP64
5889    jump_cc(Assembler::notEqual,
5890            RuntimeAddress(StubRoutines::forward_exception_entry()));
5891#else
5892    // This used to conditionally jump to forward_exception however it is
5893    // possible if we relocate that the branch will not reach. So we must jump
5894    // around so we can always reach
5895
5896    Label ok;
5897    jcc(Assembler::equal, ok);
5898    jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
5899    bind(ok);
5900#endif // LP64
5901  }
5902
5903  // get oop result if there is one and reset the value in the thread
5904  if (oop_result->is_valid()) {
5905    movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
5906    movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
5907    verify_oop(oop_result, "broken oop in call_VM_base");
5908  }
5909}
5910
5911void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
5912
5913  // Calculate the value for last_Java_sp
5914  // somewhat subtle. call_VM does an intermediate call
5915  // which places a return address on the stack just under the
5916  // stack pointer as the user finsihed with it. This allows
5917  // use to retrieve last_Java_pc from last_Java_sp[-1].
5918  // On 32bit we then have to push additional args on the stack to accomplish
5919  // the actual requested call. On 64bit call_VM only can use register args
5920  // so the only extra space is the return address that call_VM created.
5921  // This hopefully explains the calculations here.
5922
5923#ifdef _LP64
5924  // We've pushed one address, correct last_Java_sp
5925  lea(rax, Address(rsp, wordSize));
5926#else
5927  lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
5928#endif // LP64
5929
5930  call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
5931
5932}
5933
5934void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
5935  call_VM_leaf_base(entry_point, number_of_arguments);
5936}
5937
5938void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
5939  pass_arg0(this, arg_0);
5940  call_VM_leaf(entry_point, 1);
5941}
5942
5943void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
5944
5945  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
5946  pass_arg1(this, arg_1);
5947  pass_arg0(this, arg_0);
5948  call_VM_leaf(entry_point, 2);
5949}
5950
5951void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
5952  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
5953  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5954  pass_arg2(this, arg_2);
5955  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
5956  pass_arg1(this, arg_1);
5957  pass_arg0(this, arg_0);
5958  call_VM_leaf(entry_point, 3);
5959}
5960
5961void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
5962}
5963
5964void MacroAssembler::check_and_handle_popframe(Register java_thread) {
5965}
5966
5967void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
5968  if (reachable(src1)) {
5969    cmpl(as_Address(src1), imm);
5970  } else {
5971    lea(rscratch1, src1);
5972    cmpl(Address(rscratch1, 0), imm);
5973  }
5974}
5975
5976void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
5977  assert(!src2.is_lval(), "use cmpptr");
5978  if (reachable(src2)) {
5979    cmpl(src1, as_Address(src2));
5980  } else {
5981    lea(rscratch1, src2);
5982    cmpl(src1, Address(rscratch1, 0));
5983  }
5984}
5985
5986void MacroAssembler::cmp32(Register src1, int32_t imm) {
5987  Assembler::cmpl(src1, imm);
5988}
5989
5990void MacroAssembler::cmp32(Register src1, Address src2) {
5991  Assembler::cmpl(src1, src2);
5992}
5993
5994void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
5995  ucomisd(opr1, opr2);
5996
5997  Label L;
5998  if (unordered_is_less) {
5999    movl(dst, -1);
6000    jcc(Assembler::parity, L);
6001    jcc(Assembler::below , L);
6002    movl(dst, 0);
6003    jcc(Assembler::equal , L);
6004    increment(dst);
6005  } else { // unordered is greater
6006    movl(dst, 1);
6007    jcc(Assembler::parity, L);
6008    jcc(Assembler::above , L);
6009    movl(dst, 0);
6010    jcc(Assembler::equal , L);
6011    decrementl(dst);
6012  }
6013  bind(L);
6014}
6015
6016void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
6017  ucomiss(opr1, opr2);
6018
6019  Label L;
6020  if (unordered_is_less) {
6021    movl(dst, -1);
6022    jcc(Assembler::parity, L);
6023    jcc(Assembler::below , L);
6024    movl(dst, 0);
6025    jcc(Assembler::equal , L);
6026    increment(dst);
6027  } else { // unordered is greater
6028    movl(dst, 1);
6029    jcc(Assembler::parity, L);
6030    jcc(Assembler::above , L);
6031    movl(dst, 0);
6032    jcc(Assembler::equal , L);
6033    decrementl(dst);
6034  }
6035  bind(L);
6036}
6037
6038
6039void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
6040  if (reachable(src1)) {
6041    cmpb(as_Address(src1), imm);
6042  } else {
6043    lea(rscratch1, src1);
6044    cmpb(Address(rscratch1, 0), imm);
6045  }
6046}
6047
6048void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
6049#ifdef _LP64
6050  if (src2.is_lval()) {
6051    movptr(rscratch1, src2);
6052    Assembler::cmpq(src1, rscratch1);
6053  } else if (reachable(src2)) {
6054    cmpq(src1, as_Address(src2));
6055  } else {
6056    lea(rscratch1, src2);
6057    Assembler::cmpq(src1, Address(rscratch1, 0));
6058  }
6059#else
6060  if (src2.is_lval()) {
6061    cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
6062  } else {
6063    cmpl(src1, as_Address(src2));
6064  }
6065#endif // _LP64
6066}
6067
6068void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
6069  assert(src2.is_lval(), "not a mem-mem compare");
6070#ifdef _LP64
6071  // moves src2's literal address
6072  movptr(rscratch1, src2);
6073  Assembler::cmpq(src1, rscratch1);
6074#else
6075  cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
6076#endif // _LP64
6077}
6078
6079void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
6080  if (reachable(adr)) {
6081    if (os::is_MP())
6082      lock();
6083    cmpxchgptr(reg, as_Address(adr));
6084  } else {
6085    lea(rscratch1, adr);
6086    if (os::is_MP())
6087      lock();
6088    cmpxchgptr(reg, Address(rscratch1, 0));
6089  }
6090}
6091
6092void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
6093  LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
6094}
6095
6096void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
6097  if (reachable(src)) {
6098    comisd(dst, as_Address(src));
6099  } else {
6100    lea(rscratch1, src);
6101    comisd(dst, Address(rscratch1, 0));
6102  }
6103}
6104
6105void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
6106  if (reachable(src)) {
6107    comiss(dst, as_Address(src));
6108  } else {
6109    lea(rscratch1, src);
6110    comiss(dst, Address(rscratch1, 0));
6111  }
6112}
6113
6114
6115void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
6116  Condition negated_cond = negate_condition(cond);
6117  Label L;
6118  jcc(negated_cond, L);
6119  atomic_incl(counter_addr);
6120  bind(L);
6121}
6122
6123int MacroAssembler::corrected_idivl(Register reg) {
6124  // Full implementation of Java idiv and irem; checks for
6125  // special case as described in JVM spec., p.243 & p.271.
6126  // The function returns the (pc) offset of the idivl
6127  // instruction - may be needed for implicit exceptions.
6128  //
6129  //         normal case                           special case
6130  //
6131  // input : rax,: dividend                         min_int
6132  //         reg: divisor   (may not be rax,/rdx)   -1
6133  //
6134  // output: rax,: quotient  (= rax, idiv reg)       min_int
6135  //         rdx: remainder (= rax, irem reg)       0
6136  assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
6137  const int min_int = 0x80000000;
6138  Label normal_case, special_case;
6139
6140  // check for special case
6141  cmpl(rax, min_int);
6142  jcc(Assembler::notEqual, normal_case);
6143  xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
6144  cmpl(reg, -1);
6145  jcc(Assembler::equal, special_case);
6146
6147  // handle normal case
6148  bind(normal_case);
6149  cdql();
6150  int idivl_offset = offset();
6151  idivl(reg);
6152
6153  // normal and special case exit
6154  bind(special_case);
6155
6156  return idivl_offset;
6157}
6158
6159
6160
6161void MacroAssembler::decrementl(Register reg, int value) {
6162  if (value == min_jint) {subl(reg, value) ; return; }
6163  if (value <  0) { incrementl(reg, -value); return; }
6164  if (value == 0) {                        ; return; }
6165  if (value == 1 && UseIncDec) { decl(reg) ; return; }
6166  /* else */      { subl(reg, value)       ; return; }
6167}
6168
6169void MacroAssembler::decrementl(Address dst, int value) {
6170  if (value == min_jint) {subl(dst, value) ; return; }
6171  if (value <  0) { incrementl(dst, -value); return; }
6172  if (value == 0) {                        ; return; }
6173  if (value == 1 && UseIncDec) { decl(dst) ; return; }
6174  /* else */      { subl(dst, value)       ; return; }
6175}
6176
6177void MacroAssembler::division_with_shift (Register reg, int shift_value) {
6178  assert (shift_value > 0, "illegal shift value");
6179  Label _is_positive;
6180  testl (reg, reg);
6181  jcc (Assembler::positive, _is_positive);
6182  int offset = (1 << shift_value) - 1 ;
6183
6184  if (offset == 1) {
6185    incrementl(reg);
6186  } else {
6187    addl(reg, offset);
6188  }
6189
6190  bind (_is_positive);
6191  sarl(reg, shift_value);
6192}
6193
6194// !defined(COMPILER2) is because of stupid core builds
6195#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
6196void MacroAssembler::empty_FPU_stack() {
6197  if (VM_Version::supports_mmx()) {
6198    emms();
6199  } else {
6200    for (int i = 8; i-- > 0; ) ffree(i);
6201  }
6202}
6203#endif // !LP64 || C1 || !C2
6204
6205
6206// Defines obj, preserves var_size_in_bytes
6207void MacroAssembler::eden_allocate(Register obj,
6208                                   Register var_size_in_bytes,
6209                                   int con_size_in_bytes,
6210                                   Register t1,
6211                                   Label& slow_case) {
6212  assert(obj == rax, "obj must be in rax, for cmpxchg");
6213  assert_different_registers(obj, var_size_in_bytes, t1);
6214  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
6215    jmp(slow_case);
6216  } else {
6217    Register end = t1;
6218    Label retry;
6219    bind(retry);
6220    ExternalAddress heap_top((address) Universe::heap()->top_addr());
6221    movptr(obj, heap_top);
6222    if (var_size_in_bytes == noreg) {
6223      lea(end, Address(obj, con_size_in_bytes));
6224    } else {
6225      lea(end, Address(obj, var_size_in_bytes, Address::times_1));
6226    }
6227    // if end < obj then we wrapped around => object too long => slow case
6228    cmpptr(end, obj);
6229    jcc(Assembler::below, slow_case);
6230    cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
6231    jcc(Assembler::above, slow_case);
6232    // Compare obj with the top addr, and if still equal, store the new top addr in
6233    // end at the address of the top addr pointer. Sets ZF if was equal, and clears
6234    // it otherwise. Use lock prefix for atomicity on MPs.
6235    locked_cmpxchgptr(end, heap_top);
6236    jcc(Assembler::notEqual, retry);
6237  }
6238}
6239
6240void MacroAssembler::enter() {
6241  push(rbp);
6242  mov(rbp, rsp);
6243}
6244
6245void MacroAssembler::fcmp(Register tmp) {
6246  fcmp(tmp, 1, true, true);
6247}
6248
6249void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
6250  assert(!pop_right || pop_left, "usage error");
6251  if (VM_Version::supports_cmov()) {
6252    assert(tmp == noreg, "unneeded temp");
6253    if (pop_left) {
6254      fucomip(index);
6255    } else {
6256      fucomi(index);
6257    }
6258    if (pop_right) {
6259      fpop();
6260    }
6261  } else {
6262    assert(tmp != noreg, "need temp");
6263    if (pop_left) {
6264      if (pop_right) {
6265        fcompp();
6266      } else {
6267        fcomp(index);
6268      }
6269    } else {
6270      fcom(index);
6271    }
6272    // convert FPU condition into eflags condition via rax,
6273    save_rax(tmp);
6274    fwait(); fnstsw_ax();
6275    sahf();
6276    restore_rax(tmp);
6277  }
6278  // condition codes set as follows:
6279  //
6280  // CF (corresponds to C0) if x < y
6281  // PF (corresponds to C2) if unordered
6282  // ZF (corresponds to C3) if x = y
6283}
6284
6285void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
6286  fcmp2int(dst, unordered_is_less, 1, true, true);
6287}
6288
6289void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
6290  fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
6291  Label L;
6292  if (unordered_is_less) {
6293    movl(dst, -1);
6294    jcc(Assembler::parity, L);
6295    jcc(Assembler::below , L);
6296    movl(dst, 0);
6297    jcc(Assembler::equal , L);
6298    increment(dst);
6299  } else { // unordered is greater
6300    movl(dst, 1);
6301    jcc(Assembler::parity, L);
6302    jcc(Assembler::above , L);
6303    movl(dst, 0);
6304    jcc(Assembler::equal , L);
6305    decrementl(dst);
6306  }
6307  bind(L);
6308}
6309
6310void MacroAssembler::fld_d(AddressLiteral src) {
6311  fld_d(as_Address(src));
6312}
6313
6314void MacroAssembler::fld_s(AddressLiteral src) {
6315  fld_s(as_Address(src));
6316}
6317
6318void MacroAssembler::fld_x(AddressLiteral src) {
6319  Assembler::fld_x(as_Address(src));
6320}
6321
6322void MacroAssembler::fldcw(AddressLiteral src) {
6323  Assembler::fldcw(as_Address(src));
6324}
6325
6326void MacroAssembler::fpop() {
6327  ffree();
6328  fincstp();
6329}
6330
6331void MacroAssembler::fremr(Register tmp) {
6332  save_rax(tmp);
6333  { Label L;
6334    bind(L);
6335    fprem();
6336    fwait(); fnstsw_ax();
6337#ifdef _LP64
6338    testl(rax, 0x400);
6339    jcc(Assembler::notEqual, L);
6340#else
6341    sahf();
6342    jcc(Assembler::parity, L);
6343#endif // _LP64
6344  }
6345  restore_rax(tmp);
6346  // Result is in ST0.
6347  // Note: fxch & fpop to get rid of ST1
6348  // (otherwise FPU stack could overflow eventually)
6349  fxch(1);
6350  fpop();
6351}
6352
6353
6354void MacroAssembler::incrementl(AddressLiteral dst) {
6355  if (reachable(dst)) {
6356    incrementl(as_Address(dst));
6357  } else {
6358    lea(rscratch1, dst);
6359    incrementl(Address(rscratch1, 0));
6360  }
6361}
6362
6363void MacroAssembler::incrementl(ArrayAddress dst) {
6364  incrementl(as_Address(dst));
6365}
6366
6367void MacroAssembler::incrementl(Register reg, int value) {
6368  if (value == min_jint) {addl(reg, value) ; return; }
6369  if (value <  0) { decrementl(reg, -value); return; }
6370  if (value == 0) {                        ; return; }
6371  if (value == 1 && UseIncDec) { incl(reg) ; return; }
6372  /* else */      { addl(reg, value)       ; return; }
6373}
6374
6375void MacroAssembler::incrementl(Address dst, int value) {
6376  if (value == min_jint) {addl(dst, value) ; return; }
6377  if (value <  0) { decrementl(dst, -value); return; }
6378  if (value == 0) {                        ; return; }
6379  if (value == 1 && UseIncDec) { incl(dst) ; return; }
6380  /* else */      { addl(dst, value)       ; return; }
6381}
6382
6383void MacroAssembler::jump(AddressLiteral dst) {
6384  if (reachable(dst)) {
6385    jmp_literal(dst.target(), dst.rspec());
6386  } else {
6387    lea(rscratch1, dst);
6388    jmp(rscratch1);
6389  }
6390}
6391
6392void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
6393  if (reachable(dst)) {
6394    InstructionMark im(this);
6395    relocate(dst.reloc());
6396    const int short_size = 2;
6397    const int long_size = 6;
6398    int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
6399    if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
6400      // 0111 tttn #8-bit disp
6401      emit_byte(0x70 | cc);
6402      emit_byte((offs - short_size) & 0xFF);
6403    } else {
6404      // 0000 1111 1000 tttn #32-bit disp
6405      emit_byte(0x0F);
6406      emit_byte(0x80 | cc);
6407      emit_long(offs - long_size);
6408    }
6409  } else {
6410#ifdef ASSERT
6411    warning("reversing conditional branch");
6412#endif /* ASSERT */
6413    Label skip;
6414    jccb(reverse[cc], skip);
6415    lea(rscratch1, dst);
6416    Assembler::jmp(rscratch1);
6417    bind(skip);
6418  }
6419}
6420
6421void MacroAssembler::ldmxcsr(AddressLiteral src) {
6422  if (reachable(src)) {
6423    Assembler::ldmxcsr(as_Address(src));
6424  } else {
6425    lea(rscratch1, src);
6426    Assembler::ldmxcsr(Address(rscratch1, 0));
6427  }
6428}
6429
6430int MacroAssembler::load_signed_byte(Register dst, Address src) {
6431  int off;
6432  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6433    off = offset();
6434    movsbl(dst, src); // movsxb
6435  } else {
6436    off = load_unsigned_byte(dst, src);
6437    shll(dst, 24);
6438    sarl(dst, 24);
6439  }
6440  return off;
6441}
6442
6443// Note: load_signed_short used to be called load_signed_word.
6444// Although the 'w' in x86 opcodes refers to the term "word" in the assembler
6445// manual, which means 16 bits, that usage is found nowhere in HotSpot code.
6446// The term "word" in HotSpot means a 32- or 64-bit machine word.
6447int MacroAssembler::load_signed_short(Register dst, Address src) {
6448  int off;
6449  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6450    // This is dubious to me since it seems safe to do a signed 16 => 64 bit
6451    // version but this is what 64bit has always done. This seems to imply
6452    // that users are only using 32bits worth.
6453    off = offset();
6454    movswl(dst, src); // movsxw
6455  } else {
6456    off = load_unsigned_short(dst, src);
6457    shll(dst, 16);
6458    sarl(dst, 16);
6459  }
6460  return off;
6461}
6462
6463int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
6464  // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6465  // and "3.9 Partial Register Penalties", p. 22).
6466  int off;
6467  if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
6468    off = offset();
6469    movzbl(dst, src); // movzxb
6470  } else {
6471    xorl(dst, dst);
6472    off = offset();
6473    movb(dst, src);
6474  }
6475  return off;
6476}
6477
6478// Note: load_unsigned_short used to be called load_unsigned_word.
6479int MacroAssembler::load_unsigned_short(Register dst, Address src) {
6480  // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6481  // and "3.9 Partial Register Penalties", p. 22).
6482  int off;
6483  if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
6484    off = offset();
6485    movzwl(dst, src); // movzxw
6486  } else {
6487    xorl(dst, dst);
6488    off = offset();
6489    movw(dst, src);
6490  }
6491  return off;
6492}
6493
6494void MacroAssembler::load_sized_value(Register dst, Address src,
6495                                      size_t size_in_bytes, bool is_signed) {
6496  switch (size_in_bytes) {
6497#ifndef _LP64
6498  // For case 8, caller is responsible for manually loading
6499  // the second word into another register.
6500  case  8: movl(dst, src); break;
6501#else
6502  case  8: movq(dst, src); break;
6503#endif
6504  case  4: movl(dst, src); break;
6505  case  2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
6506  case  1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
6507  default: ShouldNotReachHere();
6508  }
6509}
6510
6511void MacroAssembler::mov32(AddressLiteral dst, Register src) {
6512  if (reachable(dst)) {
6513    movl(as_Address(dst), src);
6514  } else {
6515    lea(rscratch1, dst);
6516    movl(Address(rscratch1, 0), src);
6517  }
6518}
6519
6520void MacroAssembler::mov32(Register dst, AddressLiteral src) {
6521  if (reachable(src)) {
6522    movl(dst, as_Address(src));
6523  } else {
6524    lea(rscratch1, src);
6525    movl(dst, Address(rscratch1, 0));
6526  }
6527}
6528
6529// C++ bool manipulation
6530
6531void MacroAssembler::movbool(Register dst, Address src) {
6532  if(sizeof(bool) == 1)
6533    movb(dst, src);
6534  else if(sizeof(bool) == 2)
6535    movw(dst, src);
6536  else if(sizeof(bool) == 4)
6537    movl(dst, src);
6538  else
6539    // unsupported
6540    ShouldNotReachHere();
6541}
6542
6543void MacroAssembler::movbool(Address dst, bool boolconst) {
6544  if(sizeof(bool) == 1)
6545    movb(dst, (int) boolconst);
6546  else if(sizeof(bool) == 2)
6547    movw(dst, (int) boolconst);
6548  else if(sizeof(bool) == 4)
6549    movl(dst, (int) boolconst);
6550  else
6551    // unsupported
6552    ShouldNotReachHere();
6553}
6554
6555void MacroAssembler::movbool(Address dst, Register src) {
6556  if(sizeof(bool) == 1)
6557    movb(dst, src);
6558  else if(sizeof(bool) == 2)
6559    movw(dst, src);
6560  else if(sizeof(bool) == 4)
6561    movl(dst, src);
6562  else
6563    // unsupported
6564    ShouldNotReachHere();
6565}
6566
6567void MacroAssembler::movbyte(ArrayAddress dst, int src) {
6568  movb(as_Address(dst), src);
6569}
6570
6571void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
6572  if (reachable(src)) {
6573    if (UseXmmLoadAndClearUpper) {
6574      movsd (dst, as_Address(src));
6575    } else {
6576      movlpd(dst, as_Address(src));
6577    }
6578  } else {
6579    lea(rscratch1, src);
6580    if (UseXmmLoadAndClearUpper) {
6581      movsd (dst, Address(rscratch1, 0));
6582    } else {
6583      movlpd(dst, Address(rscratch1, 0));
6584    }
6585  }
6586}
6587
6588void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
6589  if (reachable(src)) {
6590    movss(dst, as_Address(src));
6591  } else {
6592    lea(rscratch1, src);
6593    movss(dst, Address(rscratch1, 0));
6594  }
6595}
6596
6597void MacroAssembler::movptr(Register dst, Register src) {
6598  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6599}
6600
6601void MacroAssembler::movptr(Register dst, Address src) {
6602  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6603}
6604
6605// src should NEVER be a real pointer. Use AddressLiteral for true pointers
6606void MacroAssembler::movptr(Register dst, intptr_t src) {
6607  LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
6608}
6609
6610void MacroAssembler::movptr(Address dst, Register src) {
6611  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6612}
6613
6614void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
6615  if (reachable(src)) {
6616    movss(dst, as_Address(src));
6617  } else {
6618    lea(rscratch1, src);
6619    movss(dst, Address(rscratch1, 0));
6620  }
6621}
6622
6623void MacroAssembler::null_check(Register reg, int offset) {
6624  if (needs_explicit_null_check(offset)) {
6625    // provoke OS NULL exception if reg = NULL by
6626    // accessing M[reg] w/o changing any (non-CC) registers
6627    // NOTE: cmpl is plenty here to provoke a segv
6628    cmpptr(rax, Address(reg, 0));
6629    // Note: should probably use testl(rax, Address(reg, 0));
6630    //       may be shorter code (however, this version of
6631    //       testl needs to be implemented first)
6632  } else {
6633    // nothing to do, (later) access of M[reg + offset]
6634    // will provoke OS NULL exception if reg = NULL
6635  }
6636}
6637
6638void MacroAssembler::os_breakpoint() {
6639  // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
6640  // (e.g., MSVC can't call ps() otherwise)
6641  call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
6642}
6643
6644void MacroAssembler::pop_CPU_state() {
6645  pop_FPU_state();
6646  pop_IU_state();
6647}
6648
6649void MacroAssembler::pop_FPU_state() {
6650  NOT_LP64(frstor(Address(rsp, 0));)
6651  LP64_ONLY(fxrstor(Address(rsp, 0));)
6652  addptr(rsp, FPUStateSizeInWords * wordSize);
6653}
6654
6655void MacroAssembler::pop_IU_state() {
6656  popa();
6657  LP64_ONLY(addq(rsp, 8));
6658  popf();
6659}
6660
6661// Save Integer and Float state
6662// Warning: Stack must be 16 byte aligned (64bit)
6663void MacroAssembler::push_CPU_state() {
6664  push_IU_state();
6665  push_FPU_state();
6666}
6667
6668void MacroAssembler::push_FPU_state() {
6669  subptr(rsp, FPUStateSizeInWords * wordSize);
6670#ifndef _LP64
6671  fnsave(Address(rsp, 0));
6672  fwait();
6673#else
6674  fxsave(Address(rsp, 0));
6675#endif // LP64
6676}
6677
6678void MacroAssembler::push_IU_state() {
6679  // Push flags first because pusha kills them
6680  pushf();
6681  // Make sure rsp stays 16-byte aligned
6682  LP64_ONLY(subq(rsp, 8));
6683  pusha();
6684}
6685
6686void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
6687  // determine java_thread register
6688  if (!java_thread->is_valid()) {
6689    java_thread = rdi;
6690    get_thread(java_thread);
6691  }
6692  // we must set sp to zero to clear frame
6693  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
6694  if (clear_fp) {
6695    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
6696  }
6697
6698  if (clear_pc)
6699    movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
6700
6701}
6702
6703void MacroAssembler::restore_rax(Register tmp) {
6704  if (tmp == noreg) pop(rax);
6705  else if (tmp != rax) mov(rax, tmp);
6706}
6707
6708void MacroAssembler::round_to(Register reg, int modulus) {
6709  addptr(reg, modulus - 1);
6710  andptr(reg, -modulus);
6711}
6712
6713void MacroAssembler::save_rax(Register tmp) {
6714  if (tmp == noreg) push(rax);
6715  else if (tmp != rax) mov(tmp, rax);
6716}
6717
6718// Write serialization page so VM thread can do a pseudo remote membar.
6719// We use the current thread pointer to calculate a thread specific
6720// offset to write to within the page. This minimizes bus traffic
6721// due to cache line collision.
6722void MacroAssembler::serialize_memory(Register thread, Register tmp) {
6723  movl(tmp, thread);
6724  shrl(tmp, os::get_serialize_page_shift_count());
6725  andl(tmp, (os::vm_page_size() - sizeof(int)));
6726
6727  Address index(noreg, tmp, Address::times_1);
6728  ExternalAddress page(os::get_memory_serialize_page());
6729
6730  // Size of store must match masking code above
6731  movl(as_Address(ArrayAddress(page, index)), tmp);
6732}
6733
6734// Calls to C land
6735//
6736// When entering C land, the rbp, & rsp of the last Java frame have to be recorded
6737// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
6738// has to be reset to 0. This is required to allow proper stack traversal.
6739void MacroAssembler::set_last_Java_frame(Register java_thread,
6740                                         Register last_java_sp,
6741                                         Register last_java_fp,
6742                                         address  last_java_pc) {
6743  // determine java_thread register
6744  if (!java_thread->is_valid()) {
6745    java_thread = rdi;
6746    get_thread(java_thread);
6747  }
6748  // determine last_java_sp register
6749  if (!last_java_sp->is_valid()) {
6750    last_java_sp = rsp;
6751  }
6752
6753  // last_java_fp is optional
6754
6755  if (last_java_fp->is_valid()) {
6756    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
6757  }
6758
6759  // last_java_pc is optional
6760
6761  if (last_java_pc != NULL) {
6762    lea(Address(java_thread,
6763                 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
6764        InternalAddress(last_java_pc));
6765
6766  }
6767  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
6768}
6769
6770void MacroAssembler::shlptr(Register dst, int imm8) {
6771  LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
6772}
6773
6774void MacroAssembler::shrptr(Register dst, int imm8) {
6775  LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
6776}
6777
6778void MacroAssembler::sign_extend_byte(Register reg) {
6779  if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
6780    movsbl(reg, reg); // movsxb
6781  } else {
6782    shll(reg, 24);
6783    sarl(reg, 24);
6784  }
6785}
6786
6787void MacroAssembler::sign_extend_short(Register reg) {
6788  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6789    movswl(reg, reg); // movsxw
6790  } else {
6791    shll(reg, 16);
6792    sarl(reg, 16);
6793  }
6794}
6795
6796//////////////////////////////////////////////////////////////////////////////////
6797#ifndef SERIALGC
6798
6799void MacroAssembler::g1_write_barrier_pre(Register obj,
6800#ifndef _LP64
6801                                          Register thread,
6802#endif
6803                                          Register tmp,
6804                                          Register tmp2,
6805                                          bool tosca_live) {
6806  LP64_ONLY(Register thread = r15_thread;)
6807  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6808                                       PtrQueue::byte_offset_of_active()));
6809
6810  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6811                                       PtrQueue::byte_offset_of_index()));
6812  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6813                                       PtrQueue::byte_offset_of_buf()));
6814
6815
6816  Label done;
6817  Label runtime;
6818
6819  // if (!marking_in_progress) goto done;
6820  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
6821    cmpl(in_progress, 0);
6822  } else {
6823    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
6824    cmpb(in_progress, 0);
6825  }
6826  jcc(Assembler::equal, done);
6827
6828  // if (x.f == NULL) goto done;
6829#ifdef _LP64
6830  load_heap_oop(tmp2, Address(obj, 0));
6831#else
6832  movptr(tmp2, Address(obj, 0));
6833#endif
6834  cmpptr(tmp2, (int32_t) NULL_WORD);
6835  jcc(Assembler::equal, done);
6836
6837  // Can we store original value in the thread's buffer?
6838
6839#ifdef _LP64
6840  movslq(tmp, index);
6841  cmpq(tmp, 0);
6842#else
6843  cmpl(index, 0);
6844#endif
6845  jcc(Assembler::equal, runtime);
6846#ifdef _LP64
6847  subq(tmp, wordSize);
6848  movl(index, tmp);
6849  addq(tmp, buffer);
6850#else
6851  subl(index, wordSize);
6852  movl(tmp, buffer);
6853  addl(tmp, index);
6854#endif
6855  movptr(Address(tmp, 0), tmp2);
6856  jmp(done);
6857  bind(runtime);
6858  // save the live input values
6859  if(tosca_live) push(rax);
6860  push(obj);
6861#ifdef _LP64
6862  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, r15_thread);
6863#else
6864  push(thread);
6865  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
6866  pop(thread);
6867#endif
6868  pop(obj);
6869  if(tosca_live) pop(rax);
6870  bind(done);
6871
6872}
6873
6874void MacroAssembler::g1_write_barrier_post(Register store_addr,
6875                                           Register new_val,
6876#ifndef _LP64
6877                                           Register thread,
6878#endif
6879                                           Register tmp,
6880                                           Register tmp2) {
6881
6882  LP64_ONLY(Register thread = r15_thread;)
6883  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6884                                       PtrQueue::byte_offset_of_index()));
6885  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6886                                       PtrQueue::byte_offset_of_buf()));
6887  BarrierSet* bs = Universe::heap()->barrier_set();
6888  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
6889  Label done;
6890  Label runtime;
6891
6892  // Does store cross heap regions?
6893
6894  movptr(tmp, store_addr);
6895  xorptr(tmp, new_val);
6896  shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
6897  jcc(Assembler::equal, done);
6898
6899  // crosses regions, storing NULL?
6900
6901  cmpptr(new_val, (int32_t) NULL_WORD);
6902  jcc(Assembler::equal, done);
6903
6904  // storing region crossing non-NULL, is card already dirty?
6905
6906  ExternalAddress cardtable((address) ct->byte_map_base);
6907  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
6908#ifdef _LP64
6909  const Register card_addr = tmp;
6910
6911  movq(card_addr, store_addr);
6912  shrq(card_addr, CardTableModRefBS::card_shift);
6913
6914  lea(tmp2, cardtable);
6915
6916  // get the address of the card
6917  addq(card_addr, tmp2);
6918#else
6919  const Register card_index = tmp;
6920
6921  movl(card_index, store_addr);
6922  shrl(card_index, CardTableModRefBS::card_shift);
6923
6924  Address index(noreg, card_index, Address::times_1);
6925  const Register card_addr = tmp;
6926  lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
6927#endif
6928  cmpb(Address(card_addr, 0), 0);
6929  jcc(Assembler::equal, done);
6930
6931  // storing a region crossing, non-NULL oop, card is clean.
6932  // dirty card and log.
6933
6934  movb(Address(card_addr, 0), 0);
6935
6936  cmpl(queue_index, 0);
6937  jcc(Assembler::equal, runtime);
6938  subl(queue_index, wordSize);
6939  movptr(tmp2, buffer);
6940#ifdef _LP64
6941  movslq(rscratch1, queue_index);
6942  addq(tmp2, rscratch1);
6943  movq(Address(tmp2, 0), card_addr);
6944#else
6945  addl(tmp2, queue_index);
6946  movl(Address(tmp2, 0), card_index);
6947#endif
6948  jmp(done);
6949
6950  bind(runtime);
6951  // save the live input values
6952  push(store_addr);
6953  push(new_val);
6954#ifdef _LP64
6955  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
6956#else
6957  push(thread);
6958  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
6959  pop(thread);
6960#endif
6961  pop(new_val);
6962  pop(store_addr);
6963
6964  bind(done);
6965
6966}
6967
6968#endif // SERIALGC
6969//////////////////////////////////////////////////////////////////////////////////
6970
6971
6972void MacroAssembler::store_check(Register obj) {
6973  // Does a store check for the oop in register obj. The content of
6974  // register obj is destroyed afterwards.
6975  store_check_part_1(obj);
6976  store_check_part_2(obj);
6977}
6978
6979void MacroAssembler::store_check(Register obj, Address dst) {
6980  store_check(obj);
6981}
6982
6983
6984// split the store check operation so that other instructions can be scheduled inbetween
6985void MacroAssembler::store_check_part_1(Register obj) {
6986  BarrierSet* bs = Universe::heap()->barrier_set();
6987  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
6988  shrptr(obj, CardTableModRefBS::card_shift);
6989}
6990
6991void MacroAssembler::store_check_part_2(Register obj) {
6992  BarrierSet* bs = Universe::heap()->barrier_set();
6993  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
6994  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
6995  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
6996
6997  // The calculation for byte_map_base is as follows:
6998  // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
6999  // So this essentially converts an address to a displacement and
7000  // it will never need to be relocated. On 64bit however the value may be too
7001  // large for a 32bit displacement
7002
7003  intptr_t disp = (intptr_t) ct->byte_map_base;
7004  if (is_simm32(disp)) {
7005    Address cardtable(noreg, obj, Address::times_1, disp);
7006    movb(cardtable, 0);
7007  } else {
7008    // By doing it as an ExternalAddress disp could be converted to a rip-relative
7009    // displacement and done in a single instruction given favorable mapping and
7010    // a smarter version of as_Address. Worst case it is two instructions which
7011    // is no worse off then loading disp into a register and doing as a simple
7012    // Address() as above.
7013    // We can't do as ExternalAddress as the only style since if disp == 0 we'll
7014    // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
7015    // in some cases we'll get a single instruction version.
7016
7017    ExternalAddress cardtable((address)disp);
7018    Address index(noreg, obj, Address::times_1);
7019    movb(as_Address(ArrayAddress(cardtable, index)), 0);
7020  }
7021}
7022
7023void MacroAssembler::subptr(Register dst, int32_t imm32) {
7024  LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
7025}
7026
7027void MacroAssembler::subptr(Register dst, Register src) {
7028  LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
7029}
7030
7031void MacroAssembler::test32(Register src1, AddressLiteral src2) {
7032  // src2 must be rval
7033
7034  if (reachable(src2)) {
7035    testl(src1, as_Address(src2));
7036  } else {
7037    lea(rscratch1, src2);
7038    testl(src1, Address(rscratch1, 0));
7039  }
7040}
7041
7042// C++ bool manipulation
7043void MacroAssembler::testbool(Register dst) {
7044  if(sizeof(bool) == 1)
7045    testb(dst, 0xff);
7046  else if(sizeof(bool) == 2) {
7047    // testw implementation needed for two byte bools
7048    ShouldNotReachHere();
7049  } else if(sizeof(bool) == 4)
7050    testl(dst, dst);
7051  else
7052    // unsupported
7053    ShouldNotReachHere();
7054}
7055
7056void MacroAssembler::testptr(Register dst, Register src) {
7057  LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
7058}
7059
7060// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
7061void MacroAssembler::tlab_allocate(Register obj,
7062                                   Register var_size_in_bytes,
7063                                   int con_size_in_bytes,
7064                                   Register t1,
7065                                   Register t2,
7066                                   Label& slow_case) {
7067  assert_different_registers(obj, t1, t2);
7068  assert_different_registers(obj, var_size_in_bytes, t1);
7069  Register end = t2;
7070  Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
7071
7072  verify_tlab();
7073
7074  NOT_LP64(get_thread(thread));
7075
7076  movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
7077  if (var_size_in_bytes == noreg) {
7078    lea(end, Address(obj, con_size_in_bytes));
7079  } else {
7080    lea(end, Address(obj, var_size_in_bytes, Address::times_1));
7081  }
7082  cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
7083  jcc(Assembler::above, slow_case);
7084
7085  // update the tlab top pointer
7086  movptr(Address(thread, JavaThread::tlab_top_offset()), end);
7087
7088  // recover var_size_in_bytes if necessary
7089  if (var_size_in_bytes == end) {
7090    subptr(var_size_in_bytes, obj);
7091  }
7092  verify_tlab();
7093}
7094
7095// Preserves rbx, and rdx.
7096void MacroAssembler::tlab_refill(Label& retry,
7097                                 Label& try_eden,
7098                                 Label& slow_case) {
7099  Register top = rax;
7100  Register t1  = rcx;
7101  Register t2  = rsi;
7102  Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
7103  assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
7104  Label do_refill, discard_tlab;
7105
7106  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
7107    // No allocation in the shared eden.
7108    jmp(slow_case);
7109  }
7110
7111  NOT_LP64(get_thread(thread_reg));
7112
7113  movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7114  movptr(t1,  Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7115
7116  // calculate amount of free space
7117  subptr(t1, top);
7118  shrptr(t1, LogHeapWordSize);
7119
7120  // Retain tlab and allocate object in shared space if
7121  // the amount free in the tlab is too large to discard.
7122  cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
7123  jcc(Assembler::lessEqual, discard_tlab);
7124
7125  // Retain
7126  // %%% yuck as movptr...
7127  movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
7128  addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
7129  if (TLABStats) {
7130    // increment number of slow_allocations
7131    addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
7132  }
7133  jmp(try_eden);
7134
7135  bind(discard_tlab);
7136  if (TLABStats) {
7137    // increment number of refills
7138    addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
7139    // accumulate wastage -- t1 is amount free in tlab
7140    addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
7141  }
7142
7143  // if tlab is currently allocated (top or end != null) then
7144  // fill [top, end + alignment_reserve) with array object
7145  testptr (top, top);
7146  jcc(Assembler::zero, do_refill);
7147
7148  // set up the mark word
7149  movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
7150  // set the length to the remaining space
7151  subptr(t1, typeArrayOopDesc::header_size(T_INT));
7152  addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
7153  shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
7154  movptr(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
7155  // set klass to intArrayKlass
7156  // dubious reloc why not an oop reloc?
7157  movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr()));
7158  // store klass last.  concurrent gcs assumes klass length is valid if
7159  // klass field is not null.
7160  store_klass(top, t1);
7161
7162  // refill the tlab with an eden allocation
7163  bind(do_refill);
7164  movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
7165  shlptr(t1, LogHeapWordSize);
7166  // add object_size ??
7167  eden_allocate(top, t1, 0, t2, slow_case);
7168
7169  // Check that t1 was preserved in eden_allocate.
7170#ifdef ASSERT
7171  if (UseTLAB) {
7172    Label ok;
7173    Register tsize = rsi;
7174    assert_different_registers(tsize, thread_reg, t1);
7175    push(tsize);
7176    movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
7177    shlptr(tsize, LogHeapWordSize);
7178    cmpptr(t1, tsize);
7179    jcc(Assembler::equal, ok);
7180    stop("assert(t1 != tlab size)");
7181    should_not_reach_here();
7182
7183    bind(ok);
7184    pop(tsize);
7185  }
7186#endif
7187  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
7188  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
7189  addptr(top, t1);
7190  subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
7191  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
7192  verify_tlab();
7193  jmp(retry);
7194}
7195
7196static const double     pi_4 =  0.7853981633974483;
7197
7198void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
7199  // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
7200  // was attempted in this code; unfortunately it appears that the
7201  // switch to 80-bit precision and back causes this to be
7202  // unprofitable compared with simply performing a runtime call if
7203  // the argument is out of the (-pi/4, pi/4) range.
7204
7205  Register tmp = noreg;
7206  if (!VM_Version::supports_cmov()) {
7207    // fcmp needs a temporary so preserve rbx,
7208    tmp = rbx;
7209    push(tmp);
7210  }
7211
7212  Label slow_case, done;
7213
7214  ExternalAddress pi4_adr = (address)&pi_4;
7215  if (reachable(pi4_adr)) {
7216    // x ?<= pi/4
7217    fld_d(pi4_adr);
7218    fld_s(1);                // Stack:  X  PI/4  X
7219    fabs();                  // Stack: |X| PI/4  X
7220    fcmp(tmp);
7221    jcc(Assembler::above, slow_case);
7222
7223    // fastest case: -pi/4 <= x <= pi/4
7224    switch(trig) {
7225    case 's':
7226      fsin();
7227      break;
7228    case 'c':
7229      fcos();
7230      break;
7231    case 't':
7232      ftan();
7233      break;
7234    default:
7235      assert(false, "bad intrinsic");
7236      break;
7237    }
7238    jmp(done);
7239  }
7240
7241  // slow case: runtime call
7242  bind(slow_case);
7243  // Preserve registers across runtime call
7244  pusha();
7245  int incoming_argument_and_return_value_offset = -1;
7246  if (num_fpu_regs_in_use > 1) {
7247    // Must preserve all other FPU regs (could alternatively convert
7248    // SharedRuntime::dsin and dcos into assembly routines known not to trash
7249    // FPU state, but can not trust C compiler)
7250    NEEDS_CLEANUP;
7251    // NOTE that in this case we also push the incoming argument to
7252    // the stack and restore it later; we also use this stack slot to
7253    // hold the return value from dsin or dcos.
7254    for (int i = 0; i < num_fpu_regs_in_use; i++) {
7255      subptr(rsp, sizeof(jdouble));
7256      fstp_d(Address(rsp, 0));
7257    }
7258    incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
7259    fld_d(Address(rsp, incoming_argument_and_return_value_offset));
7260  }
7261  subptr(rsp, sizeof(jdouble));
7262  fstp_d(Address(rsp, 0));
7263#ifdef _LP64
7264  movdbl(xmm0, Address(rsp, 0));
7265#endif // _LP64
7266
7267  // NOTE: we must not use call_VM_leaf here because that requires a
7268  // complete interpreter frame in debug mode -- same bug as 4387334
7269  // MacroAssembler::call_VM_leaf_base is perfectly safe and will
7270  // do proper 64bit abi
7271
7272  NEEDS_CLEANUP;
7273  // Need to add stack banging before this runtime call if it needs to
7274  // be taken; however, there is no generic stack banging routine at
7275  // the MacroAssembler level
7276  switch(trig) {
7277  case 's':
7278    {
7279      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);
7280    }
7281    break;
7282  case 'c':
7283    {
7284      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);
7285    }
7286    break;
7287  case 't':
7288    {
7289      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);
7290    }
7291    break;
7292  default:
7293    assert(false, "bad intrinsic");
7294    break;
7295  }
7296#ifdef _LP64
7297    movsd(Address(rsp, 0), xmm0);
7298    fld_d(Address(rsp, 0));
7299#endif // _LP64
7300  addptr(rsp, sizeof(jdouble));
7301  if (num_fpu_regs_in_use > 1) {
7302    // Must save return value to stack and then restore entire FPU stack
7303    fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
7304    for (int i = 0; i < num_fpu_regs_in_use; i++) {
7305      fld_d(Address(rsp, 0));
7306      addptr(rsp, sizeof(jdouble));
7307    }
7308  }
7309  popa();
7310
7311  // Come here with result in F-TOS
7312  bind(done);
7313
7314  if (tmp != noreg) {
7315    pop(tmp);
7316  }
7317}
7318
7319
7320// Look up the method for a megamorphic invokeinterface call.
7321// The target method is determined by <intf_klass, itable_index>.
7322// The receiver klass is in recv_klass.
7323// On success, the result will be in method_result, and execution falls through.
7324// On failure, execution transfers to the given label.
7325void MacroAssembler::lookup_interface_method(Register recv_klass,
7326                                             Register intf_klass,
7327                                             RegisterOrConstant itable_index,
7328                                             Register method_result,
7329                                             Register scan_temp,
7330                                             Label& L_no_such_interface) {
7331  assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
7332  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
7333         "caller must use same register for non-constant itable index as for method");
7334
7335  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
7336  int vtable_base = instanceKlass::vtable_start_offset() * wordSize;
7337  int itentry_off = itableMethodEntry::method_offset_in_bytes();
7338  int scan_step   = itableOffsetEntry::size() * wordSize;
7339  int vte_size    = vtableEntry::size() * wordSize;
7340  Address::ScaleFactor times_vte_scale = Address::times_ptr;
7341  assert(vte_size == wordSize, "else adjust times_vte_scale");
7342
7343  movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize));
7344
7345  // %%% Could store the aligned, prescaled offset in the klassoop.
7346  lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
7347  if (HeapWordsPerLong > 1) {
7348    // Round up to align_object_offset boundary
7349    // see code for instanceKlass::start_of_itable!
7350    round_to(scan_temp, BytesPerLong);
7351  }
7352
7353  // Adjust recv_klass by scaled itable_index, so we can free itable_index.
7354  assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
7355  lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
7356
7357  // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
7358  //   if (scan->interface() == intf) {
7359  //     result = (klass + scan->offset() + itable_index);
7360  //   }
7361  // }
7362  Label search, found_method;
7363
7364  for (int peel = 1; peel >= 0; peel--) {
7365    movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
7366    cmpptr(intf_klass, method_result);
7367
7368    if (peel) {
7369      jccb(Assembler::equal, found_method);
7370    } else {
7371      jccb(Assembler::notEqual, search);
7372      // (invert the test to fall through to found_method...)
7373    }
7374
7375    if (!peel)  break;
7376
7377    bind(search);
7378
7379    // Check that the previous entry is non-null.  A null entry means that
7380    // the receiver class doesn't implement the interface, and wasn't the
7381    // same as when the caller was compiled.
7382    testptr(method_result, method_result);
7383    jcc(Assembler::zero, L_no_such_interface);
7384    addptr(scan_temp, scan_step);
7385  }
7386
7387  bind(found_method);
7388
7389  // Got a hit.
7390  movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
7391  movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
7392}
7393
7394
7395void MacroAssembler::check_klass_subtype(Register sub_klass,
7396                           Register super_klass,
7397                           Register temp_reg,
7398                           Label& L_success) {
7399  Label L_failure;
7400  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
7401  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
7402  bind(L_failure);
7403}
7404
7405
7406void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
7407                                                   Register super_klass,
7408                                                   Register temp_reg,
7409                                                   Label* L_success,
7410                                                   Label* L_failure,
7411                                                   Label* L_slow_path,
7412                                        RegisterOrConstant super_check_offset) {
7413  assert_different_registers(sub_klass, super_klass, temp_reg);
7414  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
7415  if (super_check_offset.is_register()) {
7416    assert_different_registers(sub_klass, super_klass,
7417                               super_check_offset.as_register());
7418  } else if (must_load_sco) {
7419    assert(temp_reg != noreg, "supply either a temp or a register offset");
7420  }
7421
7422  Label L_fallthrough;
7423  int label_nulls = 0;
7424  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
7425  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
7426  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
7427  assert(label_nulls <= 1, "at most one NULL in the batch");
7428
7429  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
7430                   Klass::secondary_super_cache_offset_in_bytes());
7431  int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
7432                    Klass::super_check_offset_offset_in_bytes());
7433  Address super_check_offset_addr(super_klass, sco_offset);
7434
7435  // Hacked jcc, which "knows" that L_fallthrough, at least, is in
7436  // range of a jccb.  If this routine grows larger, reconsider at
7437  // least some of these.
7438#define local_jcc(assembler_cond, label)                                \
7439  if (&(label) == &L_fallthrough)  jccb(assembler_cond, label);         \
7440  else                             jcc( assembler_cond, label) /*omit semi*/
7441
7442  // Hacked jmp, which may only be used just before L_fallthrough.
7443#define final_jmp(label)                                                \
7444  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
7445  else                            jmp(label)                /*omit semi*/
7446
7447  // If the pointers are equal, we are done (e.g., String[] elements).
7448  // This self-check enables sharing of secondary supertype arrays among
7449  // non-primary types such as array-of-interface.  Otherwise, each such
7450  // type would need its own customized SSA.
7451  // We move this check to the front of the fast path because many
7452  // type checks are in fact trivially successful in this manner,
7453  // so we get a nicely predicted branch right at the start of the check.
7454  cmpptr(sub_klass, super_klass);
7455  local_jcc(Assembler::equal, *L_success);
7456
7457  // Check the supertype display:
7458  if (must_load_sco) {
7459    // Positive movl does right thing on LP64.
7460    movl(temp_reg, super_check_offset_addr);
7461    super_check_offset = RegisterOrConstant(temp_reg);
7462  }
7463  Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
7464  cmpptr(super_klass, super_check_addr); // load displayed supertype
7465
7466  // This check has worked decisively for primary supers.
7467  // Secondary supers are sought in the super_cache ('super_cache_addr').
7468  // (Secondary supers are interfaces and very deeply nested subtypes.)
7469  // This works in the same check above because of a tricky aliasing
7470  // between the super_cache and the primary super display elements.
7471  // (The 'super_check_addr' can address either, as the case requires.)
7472  // Note that the cache is updated below if it does not help us find
7473  // what we need immediately.
7474  // So if it was a primary super, we can just fail immediately.
7475  // Otherwise, it's the slow path for us (no success at this point).
7476
7477  if (super_check_offset.is_register()) {
7478    local_jcc(Assembler::equal, *L_success);
7479    cmpl(super_check_offset.as_register(), sc_offset);
7480    if (L_failure == &L_fallthrough) {
7481      local_jcc(Assembler::equal, *L_slow_path);
7482    } else {
7483      local_jcc(Assembler::notEqual, *L_failure);
7484      final_jmp(*L_slow_path);
7485    }
7486  } else if (super_check_offset.as_constant() == sc_offset) {
7487    // Need a slow path; fast failure is impossible.
7488    if (L_slow_path == &L_fallthrough) {
7489      local_jcc(Assembler::equal, *L_success);
7490    } else {
7491      local_jcc(Assembler::notEqual, *L_slow_path);
7492      final_jmp(*L_success);
7493    }
7494  } else {
7495    // No slow path; it's a fast decision.
7496    if (L_failure == &L_fallthrough) {
7497      local_jcc(Assembler::equal, *L_success);
7498    } else {
7499      local_jcc(Assembler::notEqual, *L_failure);
7500      final_jmp(*L_success);
7501    }
7502  }
7503
7504  bind(L_fallthrough);
7505
7506#undef local_jcc
7507#undef final_jmp
7508}
7509
7510
7511void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
7512                                                   Register super_klass,
7513                                                   Register temp_reg,
7514                                                   Register temp2_reg,
7515                                                   Label* L_success,
7516                                                   Label* L_failure,
7517                                                   bool set_cond_codes) {
7518  assert_different_registers(sub_klass, super_klass, temp_reg);
7519  if (temp2_reg != noreg)
7520    assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
7521#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
7522
7523  Label L_fallthrough;
7524  int label_nulls = 0;
7525  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
7526  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
7527  assert(label_nulls <= 1, "at most one NULL in the batch");
7528
7529  // a couple of useful fields in sub_klass:
7530  int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
7531                   Klass::secondary_supers_offset_in_bytes());
7532  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
7533                   Klass::secondary_super_cache_offset_in_bytes());
7534  Address secondary_supers_addr(sub_klass, ss_offset);
7535  Address super_cache_addr(     sub_klass, sc_offset);
7536
7537  // Do a linear scan of the secondary super-klass chain.
7538  // This code is rarely used, so simplicity is a virtue here.
7539  // The repne_scan instruction uses fixed registers, which we must spill.
7540  // Don't worry too much about pre-existing connections with the input regs.
7541
7542  assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
7543  assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
7544
7545  // Get super_klass value into rax (even if it was in rdi or rcx).
7546  bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
7547  if (super_klass != rax || UseCompressedOops) {
7548    if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
7549    mov(rax, super_klass);
7550  }
7551  if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
7552  if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
7553
7554#ifndef PRODUCT
7555  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
7556  ExternalAddress pst_counter_addr((address) pst_counter);
7557  NOT_LP64(  incrementl(pst_counter_addr) );
7558  LP64_ONLY( lea(rcx, pst_counter_addr) );
7559  LP64_ONLY( incrementl(Address(rcx, 0)) );
7560#endif //PRODUCT
7561
7562  // We will consult the secondary-super array.
7563  movptr(rdi, secondary_supers_addr);
7564  // Load the array length.  (Positive movl does right thing on LP64.)
7565  movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
7566  // Skip to start of data.
7567  addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
7568
7569  // Scan RCX words at [RDI] for an occurrence of RAX.
7570  // Set NZ/Z based on last compare.
7571#ifdef _LP64
7572  // This part is tricky, as values in supers array could be 32 or 64 bit wide
7573  // and we store values in objArrays always encoded, thus we need to encode
7574  // the value of rax before repne.  Note that rax is dead after the repne.
7575  if (UseCompressedOops) {
7576    encode_heap_oop_not_null(rax);
7577    // The superclass is never null; it would be a basic system error if a null
7578    // pointer were to sneak in here.  Note that we have already loaded the
7579    // Klass::super_check_offset from the super_klass in the fast path,
7580    // so if there is a null in that register, we are already in the afterlife.
7581    repne_scanl();
7582  } else
7583#endif // _LP64
7584    repne_scan();
7585
7586  // Unspill the temp. registers:
7587  if (pushed_rdi)  pop(rdi);
7588  if (pushed_rcx)  pop(rcx);
7589  if (pushed_rax)  pop(rax);
7590
7591  if (set_cond_codes) {
7592    // Special hack for the AD files:  rdi is guaranteed non-zero.
7593    assert(!pushed_rdi, "rdi must be left non-NULL");
7594    // Also, the condition codes are properly set Z/NZ on succeed/failure.
7595  }
7596
7597  if (L_failure == &L_fallthrough)
7598        jccb(Assembler::notEqual, *L_failure);
7599  else  jcc(Assembler::notEqual, *L_failure);
7600
7601  // Success.  Cache the super we found and proceed in triumph.
7602  movptr(super_cache_addr, super_klass);
7603
7604  if (L_success != &L_fallthrough) {
7605    jmp(*L_success);
7606  }
7607
7608#undef IS_A_TEMP
7609
7610  bind(L_fallthrough);
7611}
7612
7613
7614void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
7615  ucomisd(dst, as_Address(src));
7616}
7617
7618void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
7619  ucomiss(dst, as_Address(src));
7620}
7621
7622void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
7623  if (reachable(src)) {
7624    xorpd(dst, as_Address(src));
7625  } else {
7626    lea(rscratch1, src);
7627    xorpd(dst, Address(rscratch1, 0));
7628  }
7629}
7630
7631void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
7632  if (reachable(src)) {
7633    xorps(dst, as_Address(src));
7634  } else {
7635    lea(rscratch1, src);
7636    xorps(dst, Address(rscratch1, 0));
7637  }
7638}
7639
7640void MacroAssembler::verify_oop(Register reg, const char* s) {
7641  if (!VerifyOops) return;
7642
7643  // Pass register number to verify_oop_subroutine
7644  char* b = new char[strlen(s) + 50];
7645  sprintf(b, "verify_oop: %s: %s", reg->name(), s);
7646  push(rax);                          // save rax,
7647  push(reg);                          // pass register argument
7648  ExternalAddress buffer((address) b);
7649  // avoid using pushptr, as it modifies scratch registers
7650  // and our contract is not to modify anything
7651  movptr(rax, buffer.addr());
7652  push(rax);
7653  // call indirectly to solve generation ordering problem
7654  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7655  call(rax);
7656}
7657
7658
7659RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
7660                                                      Register tmp,
7661                                                      int offset) {
7662  intptr_t value = *delayed_value_addr;
7663  if (value != 0)
7664    return RegisterOrConstant(value + offset);
7665
7666  // load indirectly to solve generation ordering problem
7667  movptr(tmp, ExternalAddress((address) delayed_value_addr));
7668
7669#ifdef ASSERT
7670  Label L;
7671  testptr(tmp, tmp);
7672  jccb(Assembler::notZero, L);
7673  hlt();
7674  bind(L);
7675#endif
7676
7677  if (offset != 0)
7678    addptr(tmp, offset);
7679
7680  return RegisterOrConstant(tmp);
7681}
7682
7683
7684// registers on entry:
7685//  - rax ('check' register): required MethodType
7686//  - rcx: method handle
7687//  - rdx, rsi, or ?: killable temp
7688void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
7689                                              Register temp_reg,
7690                                              Label& wrong_method_type) {
7691  if (UseCompressedOops)  unimplemented();  // field accesses must decode
7692  // compare method type against that of the receiver
7693  cmpptr(mtype_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::type_offset_in_bytes, temp_reg)));
7694  jcc(Assembler::notEqual, wrong_method_type);
7695}
7696
7697
7698// A method handle has a "vmslots" field which gives the size of its
7699// argument list in JVM stack slots.  This field is either located directly
7700// in every method handle, or else is indirectly accessed through the
7701// method handle's MethodType.  This macro hides the distinction.
7702void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
7703                                                Register temp_reg) {
7704  assert_different_registers(vmslots_reg, mh_reg, temp_reg);
7705  if (UseCompressedOops)  unimplemented();  // field accesses must decode
7706  // load mh.type.form.vmslots
7707  if (java_dyn_MethodHandle::vmslots_offset_in_bytes() != 0) {
7708    // hoist vmslots into every mh to avoid dependent load chain
7709    movl(vmslots_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::vmslots_offset_in_bytes, temp_reg)));
7710  } else {
7711    Register temp2_reg = vmslots_reg;
7712    movptr(temp2_reg, Address(mh_reg,    delayed_value(java_dyn_MethodHandle::type_offset_in_bytes, temp_reg)));
7713    movptr(temp2_reg, Address(temp2_reg, delayed_value(java_dyn_MethodType::form_offset_in_bytes, temp_reg)));
7714    movl(vmslots_reg, Address(temp2_reg, delayed_value(java_dyn_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)));
7715  }
7716}
7717
7718
7719// registers on entry:
7720//  - rcx: method handle
7721//  - rdx: killable temp (interpreted only)
7722//  - rax: killable temp (compiled only)
7723void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) {
7724  assert(mh_reg == rcx, "caller must put MH object in rcx");
7725  assert_different_registers(mh_reg, temp_reg);
7726
7727  if (UseCompressedOops)  unimplemented();  // field accesses must decode
7728
7729  // pick out the interpreted side of the handler
7730  movptr(temp_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::vmentry_offset_in_bytes, temp_reg)));
7731
7732  // off we go...
7733  jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes()));
7734
7735  // for the various stubs which take control at this point,
7736  // see MethodHandles::generate_method_handle_stub
7737}
7738
7739
7740Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
7741                                         int extra_slot_offset) {
7742  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
7743  int stackElementSize = Interpreter::stackElementSize;
7744  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
7745#ifdef ASSERT
7746  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
7747  assert(offset1 - offset == stackElementSize, "correct arithmetic");
7748#endif
7749  Register             scale_reg    = noreg;
7750  Address::ScaleFactor scale_factor = Address::no_scale;
7751  if (arg_slot.is_constant()) {
7752    offset += arg_slot.as_constant() * stackElementSize;
7753  } else {
7754    scale_reg    = arg_slot.as_register();
7755    scale_factor = Address::times(stackElementSize);
7756  }
7757  offset += wordSize;           // return PC is on stack
7758  return Address(rsp, scale_reg, scale_factor, offset);
7759}
7760
7761
7762void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
7763  if (!VerifyOops) return;
7764
7765  // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
7766  // Pass register number to verify_oop_subroutine
7767  char* b = new char[strlen(s) + 50];
7768  sprintf(b, "verify_oop_addr: %s", s);
7769
7770  push(rax);                          // save rax,
7771  // addr may contain rsp so we will have to adjust it based on the push
7772  // we just did
7773  // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
7774  // stores rax into addr which is backwards of what was intended.
7775  if (addr.uses(rsp)) {
7776    lea(rax, addr);
7777    pushptr(Address(rax, BytesPerWord));
7778  } else {
7779    pushptr(addr);
7780  }
7781
7782  ExternalAddress buffer((address) b);
7783  // pass msg argument
7784  // avoid using pushptr, as it modifies scratch registers
7785  // and our contract is not to modify anything
7786  movptr(rax, buffer.addr());
7787  push(rax);
7788
7789  // call indirectly to solve generation ordering problem
7790  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7791  call(rax);
7792  // Caller pops the arguments and restores rax, from the stack
7793}
7794
7795void MacroAssembler::verify_tlab() {
7796#ifdef ASSERT
7797  if (UseTLAB && VerifyOops) {
7798    Label next, ok;
7799    Register t1 = rsi;
7800    Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
7801
7802    push(t1);
7803    NOT_LP64(push(thread_reg));
7804    NOT_LP64(get_thread(thread_reg));
7805
7806    movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7807    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
7808    jcc(Assembler::aboveEqual, next);
7809    stop("assert(top >= start)");
7810    should_not_reach_here();
7811
7812    bind(next);
7813    movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7814    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7815    jcc(Assembler::aboveEqual, ok);
7816    stop("assert(top <= end)");
7817    should_not_reach_here();
7818
7819    bind(ok);
7820    NOT_LP64(pop(thread_reg));
7821    pop(t1);
7822  }
7823#endif
7824}
7825
7826class ControlWord {
7827 public:
7828  int32_t _value;
7829
7830  int  rounding_control() const        { return  (_value >> 10) & 3      ; }
7831  int  precision_control() const       { return  (_value >>  8) & 3      ; }
7832  bool precision() const               { return ((_value >>  5) & 1) != 0; }
7833  bool underflow() const               { return ((_value >>  4) & 1) != 0; }
7834  bool overflow() const                { return ((_value >>  3) & 1) != 0; }
7835  bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
7836  bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
7837  bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
7838
7839  void print() const {
7840    // rounding control
7841    const char* rc;
7842    switch (rounding_control()) {
7843      case 0: rc = "round near"; break;
7844      case 1: rc = "round down"; break;
7845      case 2: rc = "round up  "; break;
7846      case 3: rc = "chop      "; break;
7847    };
7848    // precision control
7849    const char* pc;
7850    switch (precision_control()) {
7851      case 0: pc = "24 bits "; break;
7852      case 1: pc = "reserved"; break;
7853      case 2: pc = "53 bits "; break;
7854      case 3: pc = "64 bits "; break;
7855    };
7856    // flags
7857    char f[9];
7858    f[0] = ' ';
7859    f[1] = ' ';
7860    f[2] = (precision   ()) ? 'P' : 'p';
7861    f[3] = (underflow   ()) ? 'U' : 'u';
7862    f[4] = (overflow    ()) ? 'O' : 'o';
7863    f[5] = (zero_divide ()) ? 'Z' : 'z';
7864    f[6] = (denormalized()) ? 'D' : 'd';
7865    f[7] = (invalid     ()) ? 'I' : 'i';
7866    f[8] = '\x0';
7867    // output
7868    printf("%04x  masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
7869  }
7870
7871};
7872
7873class StatusWord {
7874 public:
7875  int32_t _value;
7876
7877  bool busy() const                    { return ((_value >> 15) & 1) != 0; }
7878  bool C3() const                      { return ((_value >> 14) & 1) != 0; }
7879  bool C2() const                      { return ((_value >> 10) & 1) != 0; }
7880  bool C1() const                      { return ((_value >>  9) & 1) != 0; }
7881  bool C0() const                      { return ((_value >>  8) & 1) != 0; }
7882  int  top() const                     { return  (_value >> 11) & 7      ; }
7883  bool error_status() const            { return ((_value >>  7) & 1) != 0; }
7884  bool stack_fault() const             { return ((_value >>  6) & 1) != 0; }
7885  bool precision() const               { return ((_value >>  5) & 1) != 0; }
7886  bool underflow() const               { return ((_value >>  4) & 1) != 0; }
7887  bool overflow() const                { return ((_value >>  3) & 1) != 0; }
7888  bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
7889  bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
7890  bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
7891
7892  void print() const {
7893    // condition codes
7894    char c[5];
7895    c[0] = (C3()) ? '3' : '-';
7896    c[1] = (C2()) ? '2' : '-';
7897    c[2] = (C1()) ? '1' : '-';
7898    c[3] = (C0()) ? '0' : '-';
7899    c[4] = '\x0';
7900    // flags
7901    char f[9];
7902    f[0] = (error_status()) ? 'E' : '-';
7903    f[1] = (stack_fault ()) ? 'S' : '-';
7904    f[2] = (precision   ()) ? 'P' : '-';
7905    f[3] = (underflow   ()) ? 'U' : '-';
7906    f[4] = (overflow    ()) ? 'O' : '-';
7907    f[5] = (zero_divide ()) ? 'Z' : '-';
7908    f[6] = (denormalized()) ? 'D' : '-';
7909    f[7] = (invalid     ()) ? 'I' : '-';
7910    f[8] = '\x0';
7911    // output
7912    printf("%04x  flags = %s, cc =  %s, top = %d", _value & 0xFFFF, f, c, top());
7913  }
7914
7915};
7916
7917class TagWord {
7918 public:
7919  int32_t _value;
7920
7921  int tag_at(int i) const              { return (_value >> (i*2)) & 3; }
7922
7923  void print() const {
7924    printf("%04x", _value & 0xFFFF);
7925  }
7926
7927};
7928
7929class FPU_Register {
7930 public:
7931  int32_t _m0;
7932  int32_t _m1;
7933  int16_t _ex;
7934
7935  bool is_indefinite() const           {
7936    return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
7937  }
7938
7939  void print() const {
7940    char  sign = (_ex < 0) ? '-' : '+';
7941    const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : "   ";
7942    printf("%c%04hx.%08x%08x  %s", sign, _ex, _m1, _m0, kind);
7943  };
7944
7945};
7946
7947class FPU_State {
7948 public:
7949  enum {
7950    register_size       = 10,
7951    number_of_registers =  8,
7952    register_mask       =  7
7953  };
7954
7955  ControlWord  _control_word;
7956  StatusWord   _status_word;
7957  TagWord      _tag_word;
7958  int32_t      _error_offset;
7959  int32_t      _error_selector;
7960  int32_t      _data_offset;
7961  int32_t      _data_selector;
7962  int8_t       _register[register_size * number_of_registers];
7963
7964  int tag_for_st(int i) const          { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
7965  FPU_Register* st(int i) const        { return (FPU_Register*)&_register[register_size * i]; }
7966
7967  const char* tag_as_string(int tag) const {
7968    switch (tag) {
7969      case 0: return "valid";
7970      case 1: return "zero";
7971      case 2: return "special";
7972      case 3: return "empty";
7973    }
7974    ShouldNotReachHere()
7975    return NULL;
7976  }
7977
7978  void print() const {
7979    // print computation registers
7980    { int t = _status_word.top();
7981      for (int i = 0; i < number_of_registers; i++) {
7982        int j = (i - t) & register_mask;
7983        printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
7984        st(j)->print();
7985        printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
7986      }
7987    }
7988    printf("\n");
7989    // print control registers
7990    printf("ctrl = "); _control_word.print(); printf("\n");
7991    printf("stat = "); _status_word .print(); printf("\n");
7992    printf("tags = "); _tag_word    .print(); printf("\n");
7993  }
7994
7995};
7996
7997class Flag_Register {
7998 public:
7999  int32_t _value;
8000
8001  bool overflow() const                { return ((_value >> 11) & 1) != 0; }
8002  bool direction() const               { return ((_value >> 10) & 1) != 0; }
8003  bool sign() const                    { return ((_value >>  7) & 1) != 0; }
8004  bool zero() const                    { return ((_value >>  6) & 1) != 0; }
8005  bool auxiliary_carry() const         { return ((_value >>  4) & 1) != 0; }
8006  bool parity() const                  { return ((_value >>  2) & 1) != 0; }
8007  bool carry() const                   { return ((_value >>  0) & 1) != 0; }
8008
8009  void print() const {
8010    // flags
8011    char f[8];
8012    f[0] = (overflow       ()) ? 'O' : '-';
8013    f[1] = (direction      ()) ? 'D' : '-';
8014    f[2] = (sign           ()) ? 'S' : '-';
8015    f[3] = (zero           ()) ? 'Z' : '-';
8016    f[4] = (auxiliary_carry()) ? 'A' : '-';
8017    f[5] = (parity         ()) ? 'P' : '-';
8018    f[6] = (carry          ()) ? 'C' : '-';
8019    f[7] = '\x0';
8020    // output
8021    printf("%08x  flags = %s", _value, f);
8022  }
8023
8024};
8025
8026class IU_Register {
8027 public:
8028  int32_t _value;
8029
8030  void print() const {
8031    printf("%08x  %11d", _value, _value);
8032  }
8033
8034};
8035
8036class IU_State {
8037 public:
8038  Flag_Register _eflags;
8039  IU_Register   _rdi;
8040  IU_Register   _rsi;
8041  IU_Register   _rbp;
8042  IU_Register   _rsp;
8043  IU_Register   _rbx;
8044  IU_Register   _rdx;
8045  IU_Register   _rcx;
8046  IU_Register   _rax;
8047
8048  void print() const {
8049    // computation registers
8050    printf("rax,  = "); _rax.print(); printf("\n");
8051    printf("rbx,  = "); _rbx.print(); printf("\n");
8052    printf("rcx  = "); _rcx.print(); printf("\n");
8053    printf("rdx  = "); _rdx.print(); printf("\n");
8054    printf("rdi  = "); _rdi.print(); printf("\n");
8055    printf("rsi  = "); _rsi.print(); printf("\n");
8056    printf("rbp,  = "); _rbp.print(); printf("\n");
8057    printf("rsp  = "); _rsp.print(); printf("\n");
8058    printf("\n");
8059    // control registers
8060    printf("flgs = "); _eflags.print(); printf("\n");
8061  }
8062};
8063
8064
8065class CPU_State {
8066 public:
8067  FPU_State _fpu_state;
8068  IU_State  _iu_state;
8069
8070  void print() const {
8071    printf("--------------------------------------------------\n");
8072    _iu_state .print();
8073    printf("\n");
8074    _fpu_state.print();
8075    printf("--------------------------------------------------\n");
8076  }
8077
8078};
8079
8080
8081static void _print_CPU_state(CPU_State* state) {
8082  state->print();
8083};
8084
8085
8086void MacroAssembler::print_CPU_state() {
8087  push_CPU_state();
8088  push(rsp);                // pass CPU state
8089  call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
8090  addptr(rsp, wordSize);       // discard argument
8091  pop_CPU_state();
8092}
8093
8094
8095static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
8096  static int counter = 0;
8097  FPU_State* fs = &state->_fpu_state;
8098  counter++;
8099  // For leaf calls, only verify that the top few elements remain empty.
8100  // We only need 1 empty at the top for C2 code.
8101  if( stack_depth < 0 ) {
8102    if( fs->tag_for_st(7) != 3 ) {
8103      printf("FPR7 not empty\n");
8104      state->print();
8105      assert(false, "error");
8106      return false;
8107    }
8108    return true;                // All other stack states do not matter
8109  }
8110
8111  assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
8112         "bad FPU control word");
8113
8114  // compute stack depth
8115  int i = 0;
8116  while (i < FPU_State::number_of_registers && fs->tag_for_st(i)  < 3) i++;
8117  int d = i;
8118  while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
8119  // verify findings
8120  if (i != FPU_State::number_of_registers) {
8121    // stack not contiguous
8122    printf("%s: stack not contiguous at ST%d\n", s, i);
8123    state->print();
8124    assert(false, "error");
8125    return false;
8126  }
8127  // check if computed stack depth corresponds to expected stack depth
8128  if (stack_depth < 0) {
8129    // expected stack depth is -stack_depth or less
8130    if (d > -stack_depth) {
8131      // too many elements on the stack
8132      printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
8133      state->print();
8134      assert(false, "error");
8135      return false;
8136    }
8137  } else {
8138    // expected stack depth is stack_depth
8139    if (d != stack_depth) {
8140      // wrong stack depth
8141      printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
8142      state->print();
8143      assert(false, "error");
8144      return false;
8145    }
8146  }
8147  // everything is cool
8148  return true;
8149}
8150
8151
8152void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
8153  if (!VerifyFPU) return;
8154  push_CPU_state();
8155  push(rsp);                // pass CPU state
8156  ExternalAddress msg((address) s);
8157  // pass message string s
8158  pushptr(msg.addr());
8159  push(stack_depth);        // pass stack depth
8160  call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
8161  addptr(rsp, 3 * wordSize);   // discard arguments
8162  // check for error
8163  { Label L;
8164    testl(rax, rax);
8165    jcc(Assembler::notZero, L);
8166    int3();                  // break if error condition
8167    bind(L);
8168  }
8169  pop_CPU_state();
8170}
8171
8172void MacroAssembler::load_klass(Register dst, Register src) {
8173#ifdef _LP64
8174  if (UseCompressedOops) {
8175    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8176    decode_heap_oop_not_null(dst);
8177  } else
8178#endif
8179    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8180}
8181
8182void MacroAssembler::load_prototype_header(Register dst, Register src) {
8183#ifdef _LP64
8184  if (UseCompressedOops) {
8185    assert (Universe::heap() != NULL, "java heap should be initialized");
8186    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8187    if (Universe::narrow_oop_shift() != 0) {
8188      assert(Address::times_8 == LogMinObjAlignmentInBytes &&
8189             Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
8190      movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8191    } else {
8192      movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8193    }
8194  } else
8195#endif
8196  {
8197    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8198    movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8199  }
8200}
8201
8202void MacroAssembler::store_klass(Register dst, Register src) {
8203#ifdef _LP64
8204  if (UseCompressedOops) {
8205    encode_heap_oop_not_null(src);
8206    movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
8207  } else
8208#endif
8209    movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
8210}
8211
8212#ifdef _LP64
8213void MacroAssembler::store_klass_gap(Register dst, Register src) {
8214  if (UseCompressedOops) {
8215    // Store to klass gap in destination
8216    movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
8217  }
8218}
8219
8220void MacroAssembler::load_heap_oop(Register dst, Address src) {
8221  if (UseCompressedOops) {
8222    movl(dst, src);
8223    decode_heap_oop(dst);
8224  } else {
8225    movq(dst, src);
8226  }
8227}
8228
8229void MacroAssembler::store_heap_oop(Address dst, Register src) {
8230  if (UseCompressedOops) {
8231    assert(!dst.uses(src), "not enough registers");
8232    encode_heap_oop(src);
8233    movl(dst, src);
8234  } else {
8235    movq(dst, src);
8236  }
8237}
8238
8239// Used for storing NULLs.
8240void MacroAssembler::store_heap_oop_null(Address dst) {
8241  if (UseCompressedOops) {
8242    movl(dst, (int32_t)NULL_WORD);
8243  } else {
8244    movslq(dst, (int32_t)NULL_WORD);
8245  }
8246}
8247
8248// Algorithm must match oop.inline.hpp encode_heap_oop.
8249void MacroAssembler::encode_heap_oop(Register r) {
8250  assert (UseCompressedOops, "should be compressed");
8251  assert (Universe::heap() != NULL, "java heap should be initialized");
8252  if (Universe::narrow_oop_base() == NULL) {
8253    verify_oop(r, "broken oop in encode_heap_oop");
8254    if (Universe::narrow_oop_shift() != 0) {
8255      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8256      shrq(r, LogMinObjAlignmentInBytes);
8257    }
8258    return;
8259  }
8260#ifdef ASSERT
8261  if (CheckCompressedOops) {
8262    Label ok;
8263    push(rscratch1); // cmpptr trashes rscratch1
8264    cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
8265    jcc(Assembler::equal, ok);
8266    stop("MacroAssembler::encode_heap_oop: heap base corrupted?");
8267    bind(ok);
8268    pop(rscratch1);
8269  }
8270#endif
8271  verify_oop(r, "broken oop in encode_heap_oop");
8272  testq(r, r);
8273  cmovq(Assembler::equal, r, r12_heapbase);
8274  subq(r, r12_heapbase);
8275  shrq(r, LogMinObjAlignmentInBytes);
8276}
8277
8278void MacroAssembler::encode_heap_oop_not_null(Register r) {
8279  assert (UseCompressedOops, "should be compressed");
8280  assert (Universe::heap() != NULL, "java heap should be initialized");
8281#ifdef ASSERT
8282  if (CheckCompressedOops) {
8283    Label ok;
8284    testq(r, r);
8285    jcc(Assembler::notEqual, ok);
8286    stop("null oop passed to encode_heap_oop_not_null");
8287    bind(ok);
8288  }
8289#endif
8290  verify_oop(r, "broken oop in encode_heap_oop_not_null");
8291  if (Universe::narrow_oop_base() != NULL) {
8292    subq(r, r12_heapbase);
8293  }
8294  if (Universe::narrow_oop_shift() != 0) {
8295    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8296    shrq(r, LogMinObjAlignmentInBytes);
8297  }
8298}
8299
8300void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
8301  assert (UseCompressedOops, "should be compressed");
8302  assert (Universe::heap() != NULL, "java heap should be initialized");
8303#ifdef ASSERT
8304  if (CheckCompressedOops) {
8305    Label ok;
8306    testq(src, src);
8307    jcc(Assembler::notEqual, ok);
8308    stop("null oop passed to encode_heap_oop_not_null2");
8309    bind(ok);
8310  }
8311#endif
8312  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
8313  if (dst != src) {
8314    movq(dst, src);
8315  }
8316  if (Universe::narrow_oop_base() != NULL) {
8317    subq(dst, r12_heapbase);
8318  }
8319  if (Universe::narrow_oop_shift() != 0) {
8320    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8321    shrq(dst, LogMinObjAlignmentInBytes);
8322  }
8323}
8324
8325void  MacroAssembler::decode_heap_oop(Register r) {
8326  assert (UseCompressedOops, "should be compressed");
8327  assert (Universe::heap() != NULL, "java heap should be initialized");
8328  if (Universe::narrow_oop_base() == NULL) {
8329    if (Universe::narrow_oop_shift() != 0) {
8330      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8331      shlq(r, LogMinObjAlignmentInBytes);
8332    }
8333    verify_oop(r, "broken oop in decode_heap_oop");
8334    return;
8335  }
8336#ifdef ASSERT
8337  if (CheckCompressedOops) {
8338    Label ok;
8339    push(rscratch1);
8340    cmpptr(r12_heapbase,
8341           ExternalAddress((address)Universe::narrow_oop_base_addr()));
8342    jcc(Assembler::equal, ok);
8343    stop("MacroAssembler::decode_heap_oop: heap base corrupted?");
8344    bind(ok);
8345    pop(rscratch1);
8346  }
8347#endif
8348
8349  Label done;
8350  shlq(r, LogMinObjAlignmentInBytes);
8351  jccb(Assembler::equal, done);
8352  addq(r, r12_heapbase);
8353#if 0
8354   // alternate decoding probably a wash.
8355   testq(r, r);
8356   jccb(Assembler::equal, done);
8357   leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
8358#endif
8359  bind(done);
8360  verify_oop(r, "broken oop in decode_heap_oop");
8361}
8362
8363void  MacroAssembler::decode_heap_oop_not_null(Register r) {
8364  assert (UseCompressedOops, "should only be used for compressed headers");
8365  assert (Universe::heap() != NULL, "java heap should be initialized");
8366  // Cannot assert, unverified entry point counts instructions (see .ad file)
8367  // vtableStubs also counts instructions in pd_code_size_limit.
8368  // Also do not verify_oop as this is called by verify_oop.
8369  if (Universe::narrow_oop_shift() != 0) {
8370    assert (Address::times_8 == LogMinObjAlignmentInBytes &&
8371            Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
8372    // Don't use Shift since it modifies flags.
8373    leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
8374  } else {
8375    assert (Universe::narrow_oop_base() == NULL, "sanity");
8376  }
8377}
8378
8379void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
8380  assert (UseCompressedOops, "should only be used for compressed headers");
8381  assert (Universe::heap() != NULL, "java heap should be initialized");
8382  // Cannot assert, unverified entry point counts instructions (see .ad file)
8383  // vtableStubs also counts instructions in pd_code_size_limit.
8384  // Also do not verify_oop as this is called by verify_oop.
8385  if (Universe::narrow_oop_shift() != 0) {
8386    assert (Address::times_8 == LogMinObjAlignmentInBytes &&
8387            Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
8388    leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
8389  } else if (dst != src) {
8390    assert (Universe::narrow_oop_base() == NULL, "sanity");
8391    movq(dst, src);
8392  }
8393}
8394
8395void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
8396  assert (UseCompressedOops, "should only be used for compressed headers");
8397  assert (Universe::heap() != NULL, "java heap should be initialized");
8398  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8399  int oop_index = oop_recorder()->find_index(obj);
8400  RelocationHolder rspec = oop_Relocation::spec(oop_index);
8401  mov_narrow_oop(dst, oop_index, rspec);
8402}
8403
8404void  MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
8405  assert (UseCompressedOops, "should only be used for compressed headers");
8406  assert (Universe::heap() != NULL, "java heap should be initialized");
8407  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8408  int oop_index = oop_recorder()->find_index(obj);
8409  RelocationHolder rspec = oop_Relocation::spec(oop_index);
8410  mov_narrow_oop(dst, oop_index, rspec);
8411}
8412
8413void  MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
8414  assert (UseCompressedOops, "should only be used for compressed headers");
8415  assert (Universe::heap() != NULL, "java heap should be initialized");
8416  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8417  int oop_index = oop_recorder()->find_index(obj);
8418  RelocationHolder rspec = oop_Relocation::spec(oop_index);
8419  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
8420}
8421
8422void  MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
8423  assert (UseCompressedOops, "should only be used for compressed headers");
8424  assert (Universe::heap() != NULL, "java heap should be initialized");
8425  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8426  int oop_index = oop_recorder()->find_index(obj);
8427  RelocationHolder rspec = oop_Relocation::spec(oop_index);
8428  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
8429}
8430
8431void MacroAssembler::reinit_heapbase() {
8432  if (UseCompressedOops) {
8433    movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
8434  }
8435}
8436#endif // _LP64
8437
8438// IndexOf substring.
8439void MacroAssembler::string_indexof(Register str1, Register str2,
8440                                    Register cnt1, Register cnt2, Register result,
8441                                    XMMRegister vec, Register tmp) {
8442  assert(UseSSE42Intrinsics, "SSE4.2 is required");
8443
8444  Label RELOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
8445        SCAN_SUBSTR, RET_NOT_FOUND, CLEANUP;
8446
8447  push(str1); // string addr
8448  push(str2); // substr addr
8449  push(cnt2); // substr count
8450  jmpb(PREP_FOR_SCAN);
8451
8452  // Substr count saved at sp
8453  // Substr saved at sp+1*wordSize
8454  // String saved at sp+2*wordSize
8455
8456  // Reload substr for rescan
8457  bind(RELOAD_SUBSTR);
8458  movl(cnt2, Address(rsp, 0));
8459  movptr(str2, Address(rsp, wordSize));
8460  // We came here after the beginninig of the substring was
8461  // matched but the rest of it was not so we need to search
8462  // again. Start from the next element after the previous match.
8463  subptr(str1, result); // Restore counter
8464  shrl(str1, 1);
8465  addl(cnt1, str1);
8466  decrementl(cnt1);
8467  lea(str1, Address(result, 2)); // Reload string
8468
8469  // Load substr
8470  bind(PREP_FOR_SCAN);
8471  movdqu(vec, Address(str2, 0));
8472  addl(cnt1, 8);  // prime the loop
8473  subptr(str1, 16);
8474
8475  // Scan string for substr in 16-byte vectors
8476  bind(SCAN_TO_SUBSTR);
8477  subl(cnt1, 8);
8478  addptr(str1, 16);
8479
8480  // pcmpestri
8481  //   inputs:
8482  //     xmm - substring
8483  //     rax - substring length (elements count)
8484  //     mem - scaned string
8485  //     rdx - string length (elements count)
8486  //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
8487  //   outputs:
8488  //     rcx - matched index in string
8489  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
8490
8491  pcmpestri(vec, Address(str1, 0), 0x0d);
8492  jcc(Assembler::above, SCAN_TO_SUBSTR);      // CF == 0 && ZF == 0
8493  jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0
8494
8495  // Fallthrough: found a potential substr
8496
8497  // Make sure string is still long enough
8498  subl(cnt1, tmp);
8499  cmpl(cnt1, cnt2);
8500  jccb(Assembler::negative, RET_NOT_FOUND);
8501  // Compute start addr of substr
8502  lea(str1, Address(str1, tmp, Address::times_2));
8503  movptr(result, str1); // save
8504
8505  // Compare potential substr
8506  addl(cnt1, 8);     // prime the loop
8507  addl(cnt2, 8);
8508  subptr(str1, 16);
8509  subptr(str2, 16);
8510
8511  // Scan 16-byte vectors of string and substr
8512  bind(SCAN_SUBSTR);
8513  subl(cnt1, 8);
8514  subl(cnt2, 8);
8515  addptr(str1, 16);
8516  addptr(str2, 16);
8517  movdqu(vec, Address(str2, 0));
8518  pcmpestri(vec, Address(str1, 0), 0x0d);
8519  jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
8520  jcc(Assembler::positive, SCAN_SUBSTR);     // SF == 0
8521
8522  // Compute substr offset
8523  subptr(result, Address(rsp, 2*wordSize));
8524  shrl(result, 1); // index
8525  jmpb(CLEANUP);
8526
8527  bind(RET_NOT_FOUND);
8528  movl(result, -1);
8529
8530  bind(CLEANUP);
8531  addptr(rsp, 3*wordSize);
8532}
8533
8534// Compare strings.
8535void MacroAssembler::string_compare(Register str1, Register str2,
8536                                    Register cnt1, Register cnt2, Register result,
8537                                    XMMRegister vec1, XMMRegister vec2) {
8538  Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
8539
8540  // Compute the minimum of the string lengths and the
8541  // difference of the string lengths (stack).
8542  // Do the conditional move stuff
8543  movl(result, cnt1);
8544  subl(cnt1, cnt2);
8545  push(cnt1);
8546  if (VM_Version::supports_cmov()) {
8547    cmovl(Assembler::lessEqual, cnt2, result);
8548  } else {
8549    Label GT_LABEL;
8550    jccb(Assembler::greater, GT_LABEL);
8551    movl(cnt2, result);
8552    bind(GT_LABEL);
8553  }
8554
8555  // Is the minimum length zero?
8556  testl(cnt2, cnt2);
8557  jcc(Assembler::zero, LENGTH_DIFF_LABEL);
8558
8559  // Load first characters
8560  load_unsigned_short(result, Address(str1, 0));
8561  load_unsigned_short(cnt1, Address(str2, 0));
8562
8563  // Compare first characters
8564  subl(result, cnt1);
8565  jcc(Assembler::notZero,  POP_LABEL);
8566  decrementl(cnt2);
8567  jcc(Assembler::zero, LENGTH_DIFF_LABEL);
8568
8569  {
8570    // Check after comparing first character to see if strings are equivalent
8571    Label LSkip2;
8572    // Check if the strings start at same location
8573    cmpptr(str1, str2);
8574    jccb(Assembler::notEqual, LSkip2);
8575
8576    // Check if the length difference is zero (from stack)
8577    cmpl(Address(rsp, 0), 0x0);
8578    jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
8579
8580    // Strings might not be equivalent
8581    bind(LSkip2);
8582  }
8583
8584  // Advance to next character
8585  addptr(str1, 2);
8586  addptr(str2, 2);
8587
8588  if (UseSSE42Intrinsics) {
8589    // With SSE4.2, use double quad vector compare
8590    Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
8591    // Setup to compare 16-byte vectors
8592    movl(cnt1, cnt2);
8593    andl(cnt2, 0xfffffff8); // cnt2 holds the vector count
8594    andl(cnt1, 0x00000007); // cnt1 holds the tail count
8595    testl(cnt2, cnt2);
8596    jccb(Assembler::zero, COMPARE_TAIL);
8597
8598    lea(str2, Address(str2, cnt2, Address::times_2));
8599    lea(str1, Address(str1, cnt2, Address::times_2));
8600    negptr(cnt2);
8601
8602    bind(COMPARE_VECTORS);
8603    movdqu(vec1, Address(str1, cnt2, Address::times_2));
8604    movdqu(vec2, Address(str2, cnt2, Address::times_2));
8605    pxor(vec1, vec2);
8606    ptest(vec1, vec1);
8607    jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
8608    addptr(cnt2, 8);
8609    jcc(Assembler::notZero, COMPARE_VECTORS);
8610    jmpb(COMPARE_TAIL);
8611
8612    // Mismatched characters in the vectors
8613    bind(VECTOR_NOT_EQUAL);
8614    lea(str1, Address(str1, cnt2, Address::times_2));
8615    lea(str2, Address(str2, cnt2, Address::times_2));
8616    movl(cnt1, 8);
8617
8618    // Compare tail (< 8 chars), or rescan last vectors to
8619    // find 1st mismatched characters
8620    bind(COMPARE_TAIL);
8621    testl(cnt1, cnt1);
8622    jccb(Assembler::zero, LENGTH_DIFF_LABEL);
8623    movl(cnt2, cnt1);
8624    // Fallthru to tail compare
8625  }
8626
8627  // Shift str2 and str1 to the end of the arrays, negate min
8628  lea(str1, Address(str1, cnt2, Address::times_2, 0));
8629  lea(str2, Address(str2, cnt2, Address::times_2, 0));
8630  negptr(cnt2);
8631
8632    // Compare the rest of the characters
8633  bind(WHILE_HEAD_LABEL);
8634  load_unsigned_short(result, Address(str1, cnt2, Address::times_2, 0));
8635  load_unsigned_short(cnt1, Address(str2, cnt2, Address::times_2, 0));
8636  subl(result, cnt1);
8637  jccb(Assembler::notZero, POP_LABEL);
8638  increment(cnt2);
8639  jcc(Assembler::notZero, WHILE_HEAD_LABEL);
8640
8641  // Strings are equal up to min length.  Return the length difference.
8642  bind(LENGTH_DIFF_LABEL);
8643  pop(result);
8644  jmpb(DONE_LABEL);
8645
8646  // Discard the stored length difference
8647  bind(POP_LABEL);
8648  addptr(rsp, wordSize);
8649
8650  // That's it
8651  bind(DONE_LABEL);
8652}
8653
8654// Compare char[] arrays aligned to 4 bytes or substrings.
8655void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
8656                                        Register limit, Register result, Register chr,
8657                                        XMMRegister vec1, XMMRegister vec2) {
8658  Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
8659
8660  int length_offset  = arrayOopDesc::length_offset_in_bytes();
8661  int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
8662
8663  // Check the input args
8664  cmpptr(ary1, ary2);
8665  jcc(Assembler::equal, TRUE_LABEL);
8666
8667  if (is_array_equ) {
8668    // Need additional checks for arrays_equals.
8669    testptr(ary1, ary1);
8670    jcc(Assembler::zero, FALSE_LABEL);
8671    testptr(ary2, ary2);
8672    jcc(Assembler::zero, FALSE_LABEL);
8673
8674    // Check the lengths
8675    movl(limit, Address(ary1, length_offset));
8676    cmpl(limit, Address(ary2, length_offset));
8677    jcc(Assembler::notEqual, FALSE_LABEL);
8678  }
8679
8680  // count == 0
8681  testl(limit, limit);
8682  jcc(Assembler::zero, TRUE_LABEL);
8683
8684  if (is_array_equ) {
8685    // Load array address
8686    lea(ary1, Address(ary1, base_offset));
8687    lea(ary2, Address(ary2, base_offset));
8688  }
8689
8690  shll(limit, 1);      // byte count != 0
8691  movl(result, limit); // copy
8692
8693  if (UseSSE42Intrinsics) {
8694    // With SSE4.2, use double quad vector compare
8695    Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
8696    // Compare 16-byte vectors
8697    andl(result, 0x0000000e);  //   tail count (in bytes)
8698    andl(limit, 0xfffffff0);   // vector count (in bytes)
8699    jccb(Assembler::zero, COMPARE_TAIL);
8700
8701    lea(ary1, Address(ary1, limit, Address::times_1));
8702    lea(ary2, Address(ary2, limit, Address::times_1));
8703    negptr(limit);
8704
8705    bind(COMPARE_WIDE_VECTORS);
8706    movdqu(vec1, Address(ary1, limit, Address::times_1));
8707    movdqu(vec2, Address(ary2, limit, Address::times_1));
8708    pxor(vec1, vec2);
8709    ptest(vec1, vec1);
8710    jccb(Assembler::notZero, FALSE_LABEL);
8711    addptr(limit, 16);
8712    jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
8713
8714    bind(COMPARE_TAIL); // limit is zero
8715    movl(limit, result);
8716    // Fallthru to tail compare
8717  }
8718
8719  // Compare 4-byte vectors
8720  andl(limit, 0xfffffffc); // vector count (in bytes)
8721  jccb(Assembler::zero, COMPARE_CHAR);
8722
8723  lea(ary1, Address(ary1, limit, Address::times_1));
8724  lea(ary2, Address(ary2, limit, Address::times_1));
8725  negptr(limit);
8726
8727  bind(COMPARE_VECTORS);
8728  movl(chr, Address(ary1, limit, Address::times_1));
8729  cmpl(chr, Address(ary2, limit, Address::times_1));
8730  jccb(Assembler::notEqual, FALSE_LABEL);
8731  addptr(limit, 4);
8732  jcc(Assembler::notZero, COMPARE_VECTORS);
8733
8734  // Compare trailing char (final 2 bytes), if any
8735  bind(COMPARE_CHAR);
8736  testl(result, 0x2);   // tail  char
8737  jccb(Assembler::zero, TRUE_LABEL);
8738  load_unsigned_short(chr, Address(ary1, 0));
8739  load_unsigned_short(limit, Address(ary2, 0));
8740  cmpl(chr, limit);
8741  jccb(Assembler::notEqual, FALSE_LABEL);
8742
8743  bind(TRUE_LABEL);
8744  movl(result, 1);   // return true
8745  jmpb(DONE);
8746
8747  bind(FALSE_LABEL);
8748  xorl(result, result); // return false
8749
8750  // That's it
8751  bind(DONE);
8752}
8753
8754Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
8755  switch (cond) {
8756    // Note some conditions are synonyms for others
8757    case Assembler::zero:         return Assembler::notZero;
8758    case Assembler::notZero:      return Assembler::zero;
8759    case Assembler::less:         return Assembler::greaterEqual;
8760    case Assembler::lessEqual:    return Assembler::greater;
8761    case Assembler::greater:      return Assembler::lessEqual;
8762    case Assembler::greaterEqual: return Assembler::less;
8763    case Assembler::below:        return Assembler::aboveEqual;
8764    case Assembler::belowEqual:   return Assembler::above;
8765    case Assembler::above:        return Assembler::belowEqual;
8766    case Assembler::aboveEqual:   return Assembler::below;
8767    case Assembler::overflow:     return Assembler::noOverflow;
8768    case Assembler::noOverflow:   return Assembler::overflow;
8769    case Assembler::negative:     return Assembler::positive;
8770    case Assembler::positive:     return Assembler::negative;
8771    case Assembler::parity:       return Assembler::noParity;
8772    case Assembler::noParity:     return Assembler::parity;
8773  }
8774  ShouldNotReachHere(); return Assembler::overflow;
8775}
8776
8777SkipIfEqual::SkipIfEqual(
8778    MacroAssembler* masm, const bool* flag_addr, bool value) {
8779  _masm = masm;
8780  _masm->cmp8(ExternalAddress((address)flag_addr), value);
8781  _masm->jcc(Assembler::equal, _label);
8782}
8783
8784SkipIfEqual::~SkipIfEqual() {
8785  _masm->bind(_label);
8786}
8787