assembler_x86.cpp revision 647:bd441136a5ce
1132451Sroberto/*
2132451Sroberto * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
3132451Sroberto * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4132451Sroberto *
5182007Sroberto * This code is free software; you can redistribute it and/or modify it
6182007Sroberto * under the terms of the GNU General Public License version 2 only, as
7182007Sroberto * published by the Free Software Foundation.
8182007Sroberto *
9182007Sroberto * This code is distributed in the hope that it will be useful, but WITHOUT
10132451Sroberto * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11182007Sroberto * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12182007Sroberto * version 2 for more details (a copy is included in the LICENSE file that
13290000Sglebius * accompanied this code).
14290000Sglebius *
15290000Sglebius * You should have received a copy of the GNU General Public License version
16182007Sroberto * 2 along with this work; if not, write to the Free Software Foundation,
17182007Sroberto * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18182007Sroberto *
19182007Sroberto * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20182007Sroberto * CA 95054 USA or visit www.sun.com if you need additional information or
21182007Sroberto * have any questions.
22182007Sroberto *
23182007Sroberto */
24182007Sroberto
25182007Sroberto#include "incls/_precompiled.incl"
26182007Sroberto#include "incls/_assembler_x86.cpp.incl"
27182007Sroberto
28182007Sroberto// Implementation of AddressLiteral
29182007Sroberto
30182007SrobertoAddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
31182007Sroberto  _is_lval = false;
32182007Sroberto  _target = target;
33182007Sroberto  switch (rtype) {
34182007Sroberto  case relocInfo::oop_type:
35182007Sroberto    // Oops are a special case. Normally they would be their own section
36182007Sroberto    // but in cases like icBuffer they are literals in the code stream that
37182007Sroberto    // we don't have a section for. We use none so that we get a literal address
38182007Sroberto    // which is always patchable.
39182007Sroberto    break;
40182007Sroberto  case relocInfo::external_word_type:
41182007Sroberto    _rspec = external_word_Relocation::spec(target);
42182007Sroberto    break;
43182007Sroberto  case relocInfo::internal_word_type:
44182007Sroberto    _rspec = internal_word_Relocation::spec(target);
45182007Sroberto    break;
46182007Sroberto  case relocInfo::opt_virtual_call_type:
47182007Sroberto    _rspec = opt_virtual_call_Relocation::spec();
48182007Sroberto    break;
49182007Sroberto  case relocInfo::static_call_type:
50182007Sroberto    _rspec = static_call_Relocation::spec();
51182007Sroberto    break;
52182007Sroberto  case relocInfo::runtime_call_type:
53182007Sroberto    _rspec = runtime_call_Relocation::spec();
54182007Sroberto    break;
55182007Sroberto  case relocInfo::poll_type:
56182007Sroberto  case relocInfo::poll_return_type:
57182007Sroberto    _rspec = Relocation::spec_simple(rtype);
58182007Sroberto    break;
59182007Sroberto  case relocInfo::none:
60182007Sroberto    break;
61182007Sroberto  default:
62182007Sroberto    ShouldNotReachHere();
63182007Sroberto    break;
64182007Sroberto  }
65182007Sroberto}
66182007Sroberto
67182007Sroberto// Implementation of Address
68182007Sroberto
69182007Sroberto#ifdef _LP64
70182007Sroberto
71182007SrobertoAddress Address::make_array(ArrayAddress adr) {
72182007Sroberto  // Not implementable on 64bit machines
73182007Sroberto  // Should have been handled higher up the call chain.
74182007Sroberto  ShouldNotReachHere();
75182007Sroberto  return Address();
76182007Sroberto}
77182007Sroberto
78182007Sroberto// exceedingly dangerous constructor
79182007SrobertoAddress::Address(int disp, address loc, relocInfo::relocType rtype) {
80182007Sroberto  _base  = noreg;
81182007Sroberto  _index = noreg;
82182007Sroberto  _scale = no_scale;
83182007Sroberto  _disp  = disp;
84182007Sroberto  switch (rtype) {
85182007Sroberto    case relocInfo::external_word_type:
86182007Sroberto      _rspec = external_word_Relocation::spec(loc);
87182007Sroberto      break;
88182007Sroberto    case relocInfo::internal_word_type:
89182007Sroberto      _rspec = internal_word_Relocation::spec(loc);
90182007Sroberto      break;
91182007Sroberto    case relocInfo::runtime_call_type:
92182007Sroberto      // HMM
93182007Sroberto      _rspec = runtime_call_Relocation::spec();
94182007Sroberto      break;
95182007Sroberto    case relocInfo::poll_type:
96182007Sroberto    case relocInfo::poll_return_type:
97182007Sroberto      _rspec = Relocation::spec_simple(rtype);
98182007Sroberto      break;
99182007Sroberto    case relocInfo::none:
100182007Sroberto      break;
101182007Sroberto    default:
102182007Sroberto      ShouldNotReachHere();
103182007Sroberto  }
104182007Sroberto}
105182007Sroberto#else // LP64
106182007Sroberto
107182007SrobertoAddress Address::make_array(ArrayAddress adr) {
108182007Sroberto  AddressLiteral base = adr.base();
109182007Sroberto  Address index = adr.index();
110182007Sroberto  assert(index._disp == 0, "must not have disp"); // maybe it can?
111182007Sroberto  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
112182007Sroberto  array._rspec = base._rspec;
113182007Sroberto  return array;
114182007Sroberto}
115182007Sroberto
116290000Sglebius// exceedingly dangerous constructor
117182007SrobertoAddress::Address(address loc, RelocationHolder spec) {
118182007Sroberto  _base  = noreg;
119182007Sroberto  _index = noreg;
120182007Sroberto  _scale = no_scale;
121182007Sroberto  _disp  = (intptr_t) loc;
122182007Sroberto  _rspec = spec;
123182007Sroberto}
124182007Sroberto
125182007Sroberto#endif // _LP64
126182007Sroberto
127182007Sroberto
128182007Sroberto
129182007Sroberto// Convert the raw encoding form into the form expected by the constructor for
130182007Sroberto// Address.  An index of 4 (rsp) corresponds to having no index, so convert
131182007Sroberto// that to noreg for the Address constructor.
132132451SrobertoAddress Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) {
133290000Sglebius  RelocationHolder rspec;
134  if (disp_is_oop) {
135    rspec = Relocation::spec_simple(relocInfo::oop_type);
136  }
137  bool valid_index = index != rsp->encoding();
138  if (valid_index) {
139    Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
140    madr._rspec = rspec;
141    return madr;
142  } else {
143    Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
144    madr._rspec = rspec;
145    return madr;
146  }
147}
148
149// Implementation of Assembler
150
151int AbstractAssembler::code_fill_byte() {
152  return (u_char)'\xF4'; // hlt
153}
154
155// make this go away someday
156void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
157  if (rtype == relocInfo::none)
158        emit_long(data);
159  else  emit_data(data, Relocation::spec_simple(rtype), format);
160}
161
162void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
163  assert(imm_operand == 0, "default format must be immediate in this file");
164  assert(inst_mark() != NULL, "must be inside InstructionMark");
165  if (rspec.type() !=  relocInfo::none) {
166    #ifdef ASSERT
167      check_relocation(rspec, format);
168    #endif
169    // Do not use AbstractAssembler::relocate, which is not intended for
170    // embedded words.  Instead, relocate to the enclosing instruction.
171
172    // hack. call32 is too wide for mask so use disp32
173    if (format == call32_operand)
174      code_section()->relocate(inst_mark(), rspec, disp32_operand);
175    else
176      code_section()->relocate(inst_mark(), rspec, format);
177  }
178  emit_long(data);
179}
180
181static int encode(Register r) {
182  int enc = r->encoding();
183  if (enc >= 8) {
184    enc -= 8;
185  }
186  return enc;
187}
188
189static int encode(XMMRegister r) {
190  int enc = r->encoding();
191  if (enc >= 8) {
192    enc -= 8;
193  }
194  return enc;
195}
196
197void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
198  assert(dst->has_byte_register(), "must have byte register");
199  assert(isByte(op1) && isByte(op2), "wrong opcode");
200  assert(isByte(imm8), "not a byte");
201  assert((op1 & 0x01) == 0, "should be 8bit operation");
202  emit_byte(op1);
203  emit_byte(op2 | encode(dst));
204  emit_byte(imm8);
205}
206
207
208void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
209  assert(isByte(op1) && isByte(op2), "wrong opcode");
210  assert((op1 & 0x01) == 1, "should be 32bit operation");
211  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
212  if (is8bit(imm32)) {
213    emit_byte(op1 | 0x02); // set sign bit
214    emit_byte(op2 | encode(dst));
215    emit_byte(imm32 & 0xFF);
216  } else {
217    emit_byte(op1);
218    emit_byte(op2 | encode(dst));
219    emit_long(imm32);
220  }
221}
222
223// immediate-to-memory forms
224void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
225  assert((op1 & 0x01) == 1, "should be 32bit operation");
226  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
227  if (is8bit(imm32)) {
228    emit_byte(op1 | 0x02); // set sign bit
229    emit_operand(rm, adr, 1);
230    emit_byte(imm32 & 0xFF);
231  } else {
232    emit_byte(op1);
233    emit_operand(rm, adr, 4);
234    emit_long(imm32);
235  }
236}
237
238void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) {
239  LP64_ONLY(ShouldNotReachHere());
240  assert(isByte(op1) && isByte(op2), "wrong opcode");
241  assert((op1 & 0x01) == 1, "should be 32bit operation");
242  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
243  InstructionMark im(this);
244  emit_byte(op1);
245  emit_byte(op2 | encode(dst));
246  emit_data((intptr_t)obj, relocInfo::oop_type, 0);
247}
248
249
250void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
251  assert(isByte(op1) && isByte(op2), "wrong opcode");
252  emit_byte(op1);
253  emit_byte(op2 | encode(dst) << 3 | encode(src));
254}
255
256
257void Assembler::emit_operand(Register reg, Register base, Register index,
258                             Address::ScaleFactor scale, int disp,
259                             RelocationHolder const& rspec,
260                             int rip_relative_correction) {
261  relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
262
263  // Encode the registers as needed in the fields they are used in
264
265  int regenc = encode(reg) << 3;
266  int indexenc = index->is_valid() ? encode(index) << 3 : 0;
267  int baseenc = base->is_valid() ? encode(base) : 0;
268
269  if (base->is_valid()) {
270    if (index->is_valid()) {
271      assert(scale != Address::no_scale, "inconsistent address");
272      // [base + index*scale + disp]
273      if (disp == 0 && rtype == relocInfo::none  &&
274          base != rbp LP64_ONLY(&& base != r13)) {
275        // [base + index*scale]
276        // [00 reg 100][ss index base]
277        assert(index != rsp, "illegal addressing mode");
278        emit_byte(0x04 | regenc);
279        emit_byte(scale << 6 | indexenc | baseenc);
280      } else if (is8bit(disp) && rtype == relocInfo::none) {
281        // [base + index*scale + imm8]
282        // [01 reg 100][ss index base] imm8
283        assert(index != rsp, "illegal addressing mode");
284        emit_byte(0x44 | regenc);
285        emit_byte(scale << 6 | indexenc | baseenc);
286        emit_byte(disp & 0xFF);
287      } else {
288        // [base + index*scale + disp32]
289        // [10 reg 100][ss index base] disp32
290        assert(index != rsp, "illegal addressing mode");
291        emit_byte(0x84 | regenc);
292        emit_byte(scale << 6 | indexenc | baseenc);
293        emit_data(disp, rspec, disp32_operand);
294      }
295    } else if (base == rsp LP64_ONLY(|| base == r12)) {
296      // [rsp + disp]
297      if (disp == 0 && rtype == relocInfo::none) {
298        // [rsp]
299        // [00 reg 100][00 100 100]
300        emit_byte(0x04 | regenc);
301        emit_byte(0x24);
302      } else if (is8bit(disp) && rtype == relocInfo::none) {
303        // [rsp + imm8]
304        // [01 reg 100][00 100 100] disp8
305        emit_byte(0x44 | regenc);
306        emit_byte(0x24);
307        emit_byte(disp & 0xFF);
308      } else {
309        // [rsp + imm32]
310        // [10 reg 100][00 100 100] disp32
311        emit_byte(0x84 | regenc);
312        emit_byte(0x24);
313        emit_data(disp, rspec, disp32_operand);
314      }
315    } else {
316      // [base + disp]
317      assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
318      if (disp == 0 && rtype == relocInfo::none &&
319          base != rbp LP64_ONLY(&& base != r13)) {
320        // [base]
321        // [00 reg base]
322        emit_byte(0x00 | regenc | baseenc);
323      } else if (is8bit(disp) && rtype == relocInfo::none) {
324        // [base + disp8]
325        // [01 reg base] disp8
326        emit_byte(0x40 | regenc | baseenc);
327        emit_byte(disp & 0xFF);
328      } else {
329        // [base + disp32]
330        // [10 reg base] disp32
331        emit_byte(0x80 | regenc | baseenc);
332        emit_data(disp, rspec, disp32_operand);
333      }
334    }
335  } else {
336    if (index->is_valid()) {
337      assert(scale != Address::no_scale, "inconsistent address");
338      // [index*scale + disp]
339      // [00 reg 100][ss index 101] disp32
340      assert(index != rsp, "illegal addressing mode");
341      emit_byte(0x04 | regenc);
342      emit_byte(scale << 6 | indexenc | 0x05);
343      emit_data(disp, rspec, disp32_operand);
344    } else if (rtype != relocInfo::none ) {
345      // [disp] (64bit) RIP-RELATIVE (32bit) abs
346      // [00 000 101] disp32
347
348      emit_byte(0x05 | regenc);
349      // Note that the RIP-rel. correction applies to the generated
350      // disp field, but _not_ to the target address in the rspec.
351
352      // disp was created by converting the target address minus the pc
353      // at the start of the instruction. That needs more correction here.
354      // intptr_t disp = target - next_ip;
355      assert(inst_mark() != NULL, "must be inside InstructionMark");
356      address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
357      int64_t adjusted = disp;
358      // Do rip-rel adjustment for 64bit
359      LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
360      assert(is_simm32(adjusted),
361             "must be 32bit offset (RIP relative address)");
362      emit_data((int32_t) adjusted, rspec, disp32_operand);
363
364    } else {
365      // 32bit never did this, did everything as the rip-rel/disp code above
366      // [disp] ABSOLUTE
367      // [00 reg 100][00 100 101] disp32
368      emit_byte(0x04 | regenc);
369      emit_byte(0x25);
370      emit_data(disp, rspec, disp32_operand);
371    }
372  }
373}
374
375void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
376                             Address::ScaleFactor scale, int disp,
377                             RelocationHolder const& rspec) {
378  emit_operand((Register)reg, base, index, scale, disp, rspec);
379}
380
381// Secret local extension to Assembler::WhichOperand:
382#define end_pc_operand (_WhichOperand_limit)
383
384address Assembler::locate_operand(address inst, WhichOperand which) {
385  // Decode the given instruction, and return the address of
386  // an embedded 32-bit operand word.
387
388  // If "which" is disp32_operand, selects the displacement portion
389  // of an effective address specifier.
390  // If "which" is imm64_operand, selects the trailing immediate constant.
391  // If "which" is call32_operand, selects the displacement of a call or jump.
392  // Caller is responsible for ensuring that there is such an operand,
393  // and that it is 32/64 bits wide.
394
395  // If "which" is end_pc_operand, find the end of the instruction.
396
397  address ip = inst;
398  bool is_64bit = false;
399
400  debug_only(bool has_disp32 = false);
401  int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
402
403  again_after_prefix:
404  switch (0xFF & *ip++) {
405
406  // These convenience macros generate groups of "case" labels for the switch.
407#define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
408#define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
409             case (x)+4: case (x)+5: case (x)+6: case (x)+7
410#define REP16(x) REP8((x)+0): \
411              case REP8((x)+8)
412
413  case CS_segment:
414  case SS_segment:
415  case DS_segment:
416  case ES_segment:
417  case FS_segment:
418  case GS_segment:
419    // Seems dubious
420    LP64_ONLY(assert(false, "shouldn't have that prefix"));
421    assert(ip == inst+1, "only one prefix allowed");
422    goto again_after_prefix;
423
424  case 0x67:
425  case REX:
426  case REX_B:
427  case REX_X:
428  case REX_XB:
429  case REX_R:
430  case REX_RB:
431  case REX_RX:
432  case REX_RXB:
433    NOT_LP64(assert(false, "64bit prefixes"));
434    goto again_after_prefix;
435
436  case REX_W:
437  case REX_WB:
438  case REX_WX:
439  case REX_WXB:
440  case REX_WR:
441  case REX_WRB:
442  case REX_WRX:
443  case REX_WRXB:
444    NOT_LP64(assert(false, "64bit prefixes"));
445    is_64bit = true;
446    goto again_after_prefix;
447
448  case 0xFF: // pushq a; decl a; incl a; call a; jmp a
449  case 0x88: // movb a, r
450  case 0x89: // movl a, r
451  case 0x8A: // movb r, a
452  case 0x8B: // movl r, a
453  case 0x8F: // popl a
454    debug_only(has_disp32 = true);
455    break;
456
457  case 0x68: // pushq #32
458    if (which == end_pc_operand) {
459      return ip + 4;
460    }
461    assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
462    return ip;                  // not produced by emit_operand
463
464  case 0x66: // movw ... (size prefix)
465    again_after_size_prefix2:
466    switch (0xFF & *ip++) {
467    case REX:
468    case REX_B:
469    case REX_X:
470    case REX_XB:
471    case REX_R:
472    case REX_RB:
473    case REX_RX:
474    case REX_RXB:
475    case REX_W:
476    case REX_WB:
477    case REX_WX:
478    case REX_WXB:
479    case REX_WR:
480    case REX_WRB:
481    case REX_WRX:
482    case REX_WRXB:
483      NOT_LP64(assert(false, "64bit prefix found"));
484      goto again_after_size_prefix2;
485    case 0x8B: // movw r, a
486    case 0x89: // movw a, r
487      debug_only(has_disp32 = true);
488      break;
489    case 0xC7: // movw a, #16
490      debug_only(has_disp32 = true);
491      tail_size = 2;  // the imm16
492      break;
493    case 0x0F: // several SSE/SSE2 variants
494      ip--;    // reparse the 0x0F
495      goto again_after_prefix;
496    default:
497      ShouldNotReachHere();
498    }
499    break;
500
501  case REP8(0xB8): // movl/q r, #32/#64(oop?)
502    if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
503    // these asserts are somewhat nonsensical
504#ifndef _LP64
505    assert(which == imm_operand || which == disp32_operand, "");
506#else
507    assert((which == call32_operand || which == imm_operand) && is_64bit ||
508           which == narrow_oop_operand && !is_64bit, "");
509#endif // _LP64
510    return ip;
511
512  case 0x69: // imul r, a, #32
513  case 0xC7: // movl a, #32(oop?)
514    tail_size = 4;
515    debug_only(has_disp32 = true); // has both kinds of operands!
516    break;
517
518  case 0x0F: // movx..., etc.
519    switch (0xFF & *ip++) {
520    case 0x12: // movlps
521    case 0x28: // movaps
522    case 0x2E: // ucomiss
523    case 0x2F: // comiss
524    case 0x54: // andps
525    case 0x55: // andnps
526    case 0x56: // orps
527    case 0x57: // xorps
528    case 0x6E: // movd
529    case 0x7E: // movd
530    case 0xAE: // ldmxcsr   a
531      // 64bit side says it these have both operands but that doesn't
532      // appear to be true
533      debug_only(has_disp32 = true);
534      break;
535
536    case 0xAD: // shrd r, a, %cl
537    case 0xAF: // imul r, a
538    case 0xBE: // movsbl r, a (movsxb)
539    case 0xBF: // movswl r, a (movsxw)
540    case 0xB6: // movzbl r, a (movzxb)
541    case 0xB7: // movzwl r, a (movzxw)
542    case REP16(0x40): // cmovl cc, r, a
543    case 0xB0: // cmpxchgb
544    case 0xB1: // cmpxchg
545    case 0xC1: // xaddl
546    case 0xC7: // cmpxchg8
547    case REP16(0x90): // setcc a
548      debug_only(has_disp32 = true);
549      // fall out of the switch to decode the address
550      break;
551
552    case 0xAC: // shrd r, a, #8
553      debug_only(has_disp32 = true);
554      tail_size = 1;  // the imm8
555      break;
556
557    case REP16(0x80): // jcc rdisp32
558      if (which == end_pc_operand)  return ip + 4;
559      assert(which == call32_operand, "jcc has no disp32 or imm");
560      return ip;
561    default:
562      ShouldNotReachHere();
563    }
564    break;
565
566  case 0x81: // addl a, #32; addl r, #32
567    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
568    // on 32bit in the case of cmpl, the imm might be an oop
569    tail_size = 4;
570    debug_only(has_disp32 = true); // has both kinds of operands!
571    break;
572
573  case 0x83: // addl a, #8; addl r, #8
574    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
575    debug_only(has_disp32 = true); // has both kinds of operands!
576    tail_size = 1;
577    break;
578
579  case 0x9B:
580    switch (0xFF & *ip++) {
581    case 0xD9: // fnstcw a
582      debug_only(has_disp32 = true);
583      break;
584    default:
585      ShouldNotReachHere();
586    }
587    break;
588
589  case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
590  case REP4(0x10): // adc...
591  case REP4(0x20): // and...
592  case REP4(0x30): // xor...
593  case REP4(0x08): // or...
594  case REP4(0x18): // sbb...
595  case REP4(0x28): // sub...
596  case 0xF7: // mull a
597  case 0x8D: // lea r, a
598  case 0x87: // xchg r, a
599  case REP4(0x38): // cmp...
600  case 0x85: // test r, a
601    debug_only(has_disp32 = true); // has both kinds of operands!
602    break;
603
604  case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
605  case 0xC6: // movb a, #8
606  case 0x80: // cmpb a, #8
607  case 0x6B: // imul r, a, #8
608    debug_only(has_disp32 = true); // has both kinds of operands!
609    tail_size = 1; // the imm8
610    break;
611
612  case 0xE8: // call rdisp32
613  case 0xE9: // jmp  rdisp32
614    if (which == end_pc_operand)  return ip + 4;
615    assert(which == call32_operand, "call has no disp32 or imm");
616    return ip;
617
618  case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
619  case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
620  case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
621  case 0xDD: // fld_d a; fst_d a; fstp_d a
622  case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
623  case 0xDF: // fild_d a; fistp_d a
624  case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
625  case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
626  case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
627    debug_only(has_disp32 = true);
628    break;
629
630  case 0xF0:                    // Lock
631    assert(os::is_MP(), "only on MP");
632    goto again_after_prefix;
633
634  case 0xF3:                    // For SSE
635  case 0xF2:                    // For SSE2
636    switch (0xFF & *ip++) {
637    case REX:
638    case REX_B:
639    case REX_X:
640    case REX_XB:
641    case REX_R:
642    case REX_RB:
643    case REX_RX:
644    case REX_RXB:
645    case REX_W:
646    case REX_WB:
647    case REX_WX:
648    case REX_WXB:
649    case REX_WR:
650    case REX_WRB:
651    case REX_WRX:
652    case REX_WRXB:
653      NOT_LP64(assert(false, "found 64bit prefix"));
654      ip++;
655    default:
656      ip++;
657    }
658    debug_only(has_disp32 = true); // has both kinds of operands!
659    break;
660
661  default:
662    ShouldNotReachHere();
663
664#undef REP8
665#undef REP16
666  }
667
668  assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
669#ifdef _LP64
670  assert(which != imm_operand, "instruction is not a movq reg, imm64");
671#else
672  // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
673  assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
674#endif // LP64
675  assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
676
677  // parse the output of emit_operand
678  int op2 = 0xFF & *ip++;
679  int base = op2 & 0x07;
680  int op3 = -1;
681  const int b100 = 4;
682  const int b101 = 5;
683  if (base == b100 && (op2 >> 6) != 3) {
684    op3 = 0xFF & *ip++;
685    base = op3 & 0x07;   // refetch the base
686  }
687  // now ip points at the disp (if any)
688
689  switch (op2 >> 6) {
690  case 0:
691    // [00 reg  100][ss index base]
692    // [00 reg  100][00   100  esp]
693    // [00 reg base]
694    // [00 reg  100][ss index  101][disp32]
695    // [00 reg  101]               [disp32]
696
697    if (base == b101) {
698      if (which == disp32_operand)
699        return ip;              // caller wants the disp32
700      ip += 4;                  // skip the disp32
701    }
702    break;
703
704  case 1:
705    // [01 reg  100][ss index base][disp8]
706    // [01 reg  100][00   100  esp][disp8]
707    // [01 reg base]               [disp8]
708    ip += 1;                    // skip the disp8
709    break;
710
711  case 2:
712    // [10 reg  100][ss index base][disp32]
713    // [10 reg  100][00   100  esp][disp32]
714    // [10 reg base]               [disp32]
715    if (which == disp32_operand)
716      return ip;                // caller wants the disp32
717    ip += 4;                    // skip the disp32
718    break;
719
720  case 3:
721    // [11 reg base]  (not a memory addressing mode)
722    break;
723  }
724
725  if (which == end_pc_operand) {
726    return ip + tail_size;
727  }
728
729#ifdef _LP64
730  assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
731#else
732  assert(which == imm_operand, "instruction has only an imm field");
733#endif // LP64
734  return ip;
735}
736
737address Assembler::locate_next_instruction(address inst) {
738  // Secretly share code with locate_operand:
739  return locate_operand(inst, end_pc_operand);
740}
741
742
743#ifdef ASSERT
744void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
745  address inst = inst_mark();
746  assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
747  address opnd;
748
749  Relocation* r = rspec.reloc();
750  if (r->type() == relocInfo::none) {
751    return;
752  } else if (r->is_call() || format == call32_operand) {
753    // assert(format == imm32_operand, "cannot specify a nonzero format");
754    opnd = locate_operand(inst, call32_operand);
755  } else if (r->is_data()) {
756    assert(format == imm_operand || format == disp32_operand
757           LP64_ONLY(|| format == narrow_oop_operand), "format ok");
758    opnd = locate_operand(inst, (WhichOperand)format);
759  } else {
760    assert(format == imm_operand, "cannot specify a format");
761    return;
762  }
763  assert(opnd == pc(), "must put operand where relocs can find it");
764}
765#endif // ASSERT
766
767void Assembler::emit_operand32(Register reg, Address adr) {
768  assert(reg->encoding() < 8, "no extended registers");
769  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
770  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
771               adr._rspec);
772}
773
774void Assembler::emit_operand(Register reg, Address adr,
775                             int rip_relative_correction) {
776  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
777               adr._rspec,
778               rip_relative_correction);
779}
780
781void Assembler::emit_operand(XMMRegister reg, Address adr) {
782  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
783               adr._rspec);
784}
785
786// MMX operations
787void Assembler::emit_operand(MMXRegister reg, Address adr) {
788  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
789  emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
790}
791
792// work around gcc (3.2.1-7a) bug
793void Assembler::emit_operand(Address adr, MMXRegister reg) {
794  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
795  emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
796}
797
798
799void Assembler::emit_farith(int b1, int b2, int i) {
800  assert(isByte(b1) && isByte(b2), "wrong opcode");
801  assert(0 <= i &&  i < 8, "illegal stack offset");
802  emit_byte(b1);
803  emit_byte(b2 + i);
804}
805
806
807// Now the Assembler instruction (identical for 32/64 bits)
808
809void Assembler::adcl(Register dst, int32_t imm32) {
810  prefix(dst);
811  emit_arith(0x81, 0xD0, dst, imm32);
812}
813
814void Assembler::adcl(Register dst, Address src) {
815  InstructionMark im(this);
816  prefix(src, dst);
817  emit_byte(0x13);
818  emit_operand(dst, src);
819}
820
821void Assembler::adcl(Register dst, Register src) {
822  (void) prefix_and_encode(dst->encoding(), src->encoding());
823  emit_arith(0x13, 0xC0, dst, src);
824}
825
826void Assembler::addl(Address dst, int32_t imm32) {
827  InstructionMark im(this);
828  prefix(dst);
829  emit_arith_operand(0x81, rax, dst, imm32);
830}
831
832void Assembler::addl(Address dst, Register src) {
833  InstructionMark im(this);
834  prefix(dst, src);
835  emit_byte(0x01);
836  emit_operand(src, dst);
837}
838
839void Assembler::addl(Register dst, int32_t imm32) {
840  prefix(dst);
841  emit_arith(0x81, 0xC0, dst, imm32);
842}
843
844void Assembler::addl(Register dst, Address src) {
845  InstructionMark im(this);
846  prefix(src, dst);
847  emit_byte(0x03);
848  emit_operand(dst, src);
849}
850
851void Assembler::addl(Register dst, Register src) {
852  (void) prefix_and_encode(dst->encoding(), src->encoding());
853  emit_arith(0x03, 0xC0, dst, src);
854}
855
856void Assembler::addr_nop_4() {
857  // 4 bytes: NOP DWORD PTR [EAX+0]
858  emit_byte(0x0F);
859  emit_byte(0x1F);
860  emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
861  emit_byte(0);    // 8-bits offset (1 byte)
862}
863
864void Assembler::addr_nop_5() {
865  // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
866  emit_byte(0x0F);
867  emit_byte(0x1F);
868  emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
869  emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
870  emit_byte(0);    // 8-bits offset (1 byte)
871}
872
873void Assembler::addr_nop_7() {
874  // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
875  emit_byte(0x0F);
876  emit_byte(0x1F);
877  emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
878  emit_long(0);    // 32-bits offset (4 bytes)
879}
880
881void Assembler::addr_nop_8() {
882  // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
883  emit_byte(0x0F);
884  emit_byte(0x1F);
885  emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
886  emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
887  emit_long(0);    // 32-bits offset (4 bytes)
888}
889
890void Assembler::addsd(XMMRegister dst, XMMRegister src) {
891  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
892  emit_byte(0xF2);
893  int encode = prefix_and_encode(dst->encoding(), src->encoding());
894  emit_byte(0x0F);
895  emit_byte(0x58);
896  emit_byte(0xC0 | encode);
897}
898
899void Assembler::addsd(XMMRegister dst, Address src) {
900  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
901  InstructionMark im(this);
902  emit_byte(0xF2);
903  prefix(src, dst);
904  emit_byte(0x0F);
905  emit_byte(0x58);
906  emit_operand(dst, src);
907}
908
909void Assembler::addss(XMMRegister dst, XMMRegister src) {
910  NOT_LP64(assert(VM_Version::supports_sse(), ""));
911  emit_byte(0xF3);
912  int encode = prefix_and_encode(dst->encoding(), src->encoding());
913  emit_byte(0x0F);
914  emit_byte(0x58);
915  emit_byte(0xC0 | encode);
916}
917
918void Assembler::addss(XMMRegister dst, Address src) {
919  NOT_LP64(assert(VM_Version::supports_sse(), ""));
920  InstructionMark im(this);
921  emit_byte(0xF3);
922  prefix(src, dst);
923  emit_byte(0x0F);
924  emit_byte(0x58);
925  emit_operand(dst, src);
926}
927
928void Assembler::andl(Register dst, int32_t imm32) {
929  prefix(dst);
930  emit_arith(0x81, 0xE0, dst, imm32);
931}
932
933void Assembler::andl(Register dst, Address src) {
934  InstructionMark im(this);
935  prefix(src, dst);
936  emit_byte(0x23);
937  emit_operand(dst, src);
938}
939
940void Assembler::andl(Register dst, Register src) {
941  (void) prefix_and_encode(dst->encoding(), src->encoding());
942  emit_arith(0x23, 0xC0, dst, src);
943}
944
945void Assembler::andpd(XMMRegister dst, Address src) {
946  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
947  InstructionMark im(this);
948  emit_byte(0x66);
949  prefix(src, dst);
950  emit_byte(0x0F);
951  emit_byte(0x54);
952  emit_operand(dst, src);
953}
954
955void Assembler::bswapl(Register reg) { // bswap
956  int encode = prefix_and_encode(reg->encoding());
957  emit_byte(0x0F);
958  emit_byte(0xC8 | encode);
959}
960
961void Assembler::call(Label& L, relocInfo::relocType rtype) {
962  // suspect disp32 is always good
963  int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
964
965  if (L.is_bound()) {
966    const int long_size = 5;
967    int offs = (int)( target(L) - pc() );
968    assert(offs <= 0, "assembler error");
969    InstructionMark im(this);
970    // 1110 1000 #32-bit disp
971    emit_byte(0xE8);
972    emit_data(offs - long_size, rtype, operand);
973  } else {
974    InstructionMark im(this);
975    // 1110 1000 #32-bit disp
976    L.add_patch_at(code(), locator());
977
978    emit_byte(0xE8);
979    emit_data(int(0), rtype, operand);
980  }
981}
982
983void Assembler::call(Register dst) {
984  // This was originally using a 32bit register encoding
985  // and surely we want 64bit!
986  // this is a 32bit encoding but in 64bit mode the default
987  // operand size is 64bit so there is no need for the
988  // wide prefix. So prefix only happens if we use the
989  // new registers. Much like push/pop.
990  int x = offset();
991  // this may be true but dbx disassembles it as if it
992  // were 32bits...
993  // int encode = prefix_and_encode(dst->encoding());
994  // if (offset() != x) assert(dst->encoding() >= 8, "what?");
995  int encode = prefixq_and_encode(dst->encoding());
996
997  emit_byte(0xFF);
998  emit_byte(0xD0 | encode);
999}
1000
1001
1002void Assembler::call(Address adr) {
1003  InstructionMark im(this);
1004  prefix(adr);
1005  emit_byte(0xFF);
1006  emit_operand(rdx, adr);
1007}
1008
1009void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1010  assert(entry != NULL, "call most probably wrong");
1011  InstructionMark im(this);
1012  emit_byte(0xE8);
1013  intptr_t disp = entry - (_code_pos + sizeof(int32_t));
1014  assert(is_simm32(disp), "must be 32bit offset (call2)");
1015  // Technically, should use call32_operand, but this format is
1016  // implied by the fact that we're emitting a call instruction.
1017
1018  int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1019  emit_data((int) disp, rspec, operand);
1020}
1021
1022void Assembler::cdql() {
1023  emit_byte(0x99);
1024}
1025
1026void Assembler::cmovl(Condition cc, Register dst, Register src) {
1027  NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1028  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1029  emit_byte(0x0F);
1030  emit_byte(0x40 | cc);
1031  emit_byte(0xC0 | encode);
1032}
1033
1034
1035void Assembler::cmovl(Condition cc, Register dst, Address src) {
1036  NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1037  prefix(src, dst);
1038  emit_byte(0x0F);
1039  emit_byte(0x40 | cc);
1040  emit_operand(dst, src);
1041}
1042
1043void Assembler::cmpb(Address dst, int imm8) {
1044  InstructionMark im(this);
1045  prefix(dst);
1046  emit_byte(0x80);
1047  emit_operand(rdi, dst, 1);
1048  emit_byte(imm8);
1049}
1050
1051void Assembler::cmpl(Address dst, int32_t imm32) {
1052  InstructionMark im(this);
1053  prefix(dst);
1054  emit_byte(0x81);
1055  emit_operand(rdi, dst, 4);
1056  emit_long(imm32);
1057}
1058
1059void Assembler::cmpl(Register dst, int32_t imm32) {
1060  prefix(dst);
1061  emit_arith(0x81, 0xF8, dst, imm32);
1062}
1063
1064void Assembler::cmpl(Register dst, Register src) {
1065  (void) prefix_and_encode(dst->encoding(), src->encoding());
1066  emit_arith(0x3B, 0xC0, dst, src);
1067}
1068
1069
1070void Assembler::cmpl(Register dst, Address  src) {
1071  InstructionMark im(this);
1072  prefix(src, dst);
1073  emit_byte(0x3B);
1074  emit_operand(dst, src);
1075}
1076
1077void Assembler::cmpw(Address dst, int imm16) {
1078  InstructionMark im(this);
1079  assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1080  emit_byte(0x66);
1081  emit_byte(0x81);
1082  emit_operand(rdi, dst, 2);
1083  emit_word(imm16);
1084}
1085
1086// The 32-bit cmpxchg compares the value at adr with the contents of rax,
1087// and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1088// The ZF is set if the compared values were equal, and cleared otherwise.
1089void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1090  if (Atomics & 2) {
1091     // caveat: no instructionmark, so this isn't relocatable.
1092     // Emit a synthetic, non-atomic, CAS equivalent.
1093     // Beware.  The synthetic form sets all ICCs, not just ZF.
1094     // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r)
1095     cmpl(rax, adr);
1096     movl(rax, adr);
1097     if (reg != rax) {
1098        Label L ;
1099        jcc(Assembler::notEqual, L);
1100        movl(adr, reg);
1101        bind(L);
1102     }
1103  } else {
1104     InstructionMark im(this);
1105     prefix(adr, reg);
1106     emit_byte(0x0F);
1107     emit_byte(0xB1);
1108     emit_operand(reg, adr);
1109  }
1110}
1111
1112void Assembler::comisd(XMMRegister dst, Address src) {
1113  // NOTE: dbx seems to decode this as comiss even though the
1114  // 0x66 is there. Strangly ucomisd comes out correct
1115  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1116  emit_byte(0x66);
1117  comiss(dst, src);
1118}
1119
1120void Assembler::comiss(XMMRegister dst, Address src) {
1121  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1122
1123  InstructionMark im(this);
1124  prefix(src, dst);
1125  emit_byte(0x0F);
1126  emit_byte(0x2F);
1127  emit_operand(dst, src);
1128}
1129
1130void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1131  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1132  emit_byte(0xF3);
1133  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1134  emit_byte(0x0F);
1135  emit_byte(0xE6);
1136  emit_byte(0xC0 | encode);
1137}
1138
1139void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1140  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1141  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1142  emit_byte(0x0F);
1143  emit_byte(0x5B);
1144  emit_byte(0xC0 | encode);
1145}
1146
1147void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1148  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1149  emit_byte(0xF2);
1150  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1151  emit_byte(0x0F);
1152  emit_byte(0x5A);
1153  emit_byte(0xC0 | encode);
1154}
1155
1156void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1157  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1158  emit_byte(0xF2);
1159  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1160  emit_byte(0x0F);
1161  emit_byte(0x2A);
1162  emit_byte(0xC0 | encode);
1163}
1164
1165void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1166  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1167  emit_byte(0xF3);
1168  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1169  emit_byte(0x0F);
1170  emit_byte(0x2A);
1171  emit_byte(0xC0 | encode);
1172}
1173
1174void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1175  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1176  emit_byte(0xF3);
1177  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1178  emit_byte(0x0F);
1179  emit_byte(0x5A);
1180  emit_byte(0xC0 | encode);
1181}
1182
1183void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1184  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1185  emit_byte(0xF2);
1186  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1187  emit_byte(0x0F);
1188  emit_byte(0x2C);
1189  emit_byte(0xC0 | encode);
1190}
1191
1192void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1193  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1194  emit_byte(0xF3);
1195  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1196  emit_byte(0x0F);
1197  emit_byte(0x2C);
1198  emit_byte(0xC0 | encode);
1199}
1200
1201void Assembler::decl(Address dst) {
1202  // Don't use it directly. Use MacroAssembler::decrement() instead.
1203  InstructionMark im(this);
1204  prefix(dst);
1205  emit_byte(0xFF);
1206  emit_operand(rcx, dst);
1207}
1208
1209void Assembler::divsd(XMMRegister dst, Address src) {
1210  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1211  InstructionMark im(this);
1212  emit_byte(0xF2);
1213  prefix(src, dst);
1214  emit_byte(0x0F);
1215  emit_byte(0x5E);
1216  emit_operand(dst, src);
1217}
1218
1219void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1220  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1221  emit_byte(0xF2);
1222  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1223  emit_byte(0x0F);
1224  emit_byte(0x5E);
1225  emit_byte(0xC0 | encode);
1226}
1227
1228void Assembler::divss(XMMRegister dst, Address src) {
1229  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1230  InstructionMark im(this);
1231  emit_byte(0xF3);
1232  prefix(src, dst);
1233  emit_byte(0x0F);
1234  emit_byte(0x5E);
1235  emit_operand(dst, src);
1236}
1237
1238void Assembler::divss(XMMRegister dst, XMMRegister src) {
1239  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1240  emit_byte(0xF3);
1241  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1242  emit_byte(0x0F);
1243  emit_byte(0x5E);
1244  emit_byte(0xC0 | encode);
1245}
1246
1247void Assembler::emms() {
1248  NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1249  emit_byte(0x0F);
1250  emit_byte(0x77);
1251}
1252
1253void Assembler::hlt() {
1254  emit_byte(0xF4);
1255}
1256
1257void Assembler::idivl(Register src) {
1258  int encode = prefix_and_encode(src->encoding());
1259  emit_byte(0xF7);
1260  emit_byte(0xF8 | encode);
1261}
1262
1263void Assembler::imull(Register dst, Register src) {
1264  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1265  emit_byte(0x0F);
1266  emit_byte(0xAF);
1267  emit_byte(0xC0 | encode);
1268}
1269
1270
1271void Assembler::imull(Register dst, Register src, int value) {
1272  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1273  if (is8bit(value)) {
1274    emit_byte(0x6B);
1275    emit_byte(0xC0 | encode);
1276    emit_byte(value);
1277  } else {
1278    emit_byte(0x69);
1279    emit_byte(0xC0 | encode);
1280    emit_long(value);
1281  }
1282}
1283
1284void Assembler::incl(Address dst) {
1285  // Don't use it directly. Use MacroAssembler::increment() instead.
1286  InstructionMark im(this);
1287  prefix(dst);
1288  emit_byte(0xFF);
1289  emit_operand(rax, dst);
1290}
1291
1292void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) {
1293  InstructionMark im(this);
1294  relocate(rtype);
1295  assert((0 <= cc) && (cc < 16), "illegal cc");
1296  if (L.is_bound()) {
1297    address dst = target(L);
1298    assert(dst != NULL, "jcc most probably wrong");
1299
1300    const int short_size = 2;
1301    const int long_size = 6;
1302    intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
1303    if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1304      // 0111 tttn #8-bit disp
1305      emit_byte(0x70 | cc);
1306      emit_byte((offs - short_size) & 0xFF);
1307    } else {
1308      // 0000 1111 1000 tttn #32-bit disp
1309      assert(is_simm32(offs - long_size),
1310             "must be 32bit offset (call4)");
1311      emit_byte(0x0F);
1312      emit_byte(0x80 | cc);
1313      emit_long(offs - long_size);
1314    }
1315  } else {
1316    // Note: could eliminate cond. jumps to this jump if condition
1317    //       is the same however, seems to be rather unlikely case.
1318    // Note: use jccb() if label to be bound is very close to get
1319    //       an 8-bit displacement
1320    L.add_patch_at(code(), locator());
1321    emit_byte(0x0F);
1322    emit_byte(0x80 | cc);
1323    emit_long(0);
1324  }
1325}
1326
1327void Assembler::jccb(Condition cc, Label& L) {
1328  if (L.is_bound()) {
1329    const int short_size = 2;
1330    address entry = target(L);
1331    assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
1332           "Dispacement too large for a short jmp");
1333    intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
1334    // 0111 tttn #8-bit disp
1335    emit_byte(0x70 | cc);
1336    emit_byte((offs - short_size) & 0xFF);
1337  } else {
1338    InstructionMark im(this);
1339    L.add_patch_at(code(), locator());
1340    emit_byte(0x70 | cc);
1341    emit_byte(0);
1342  }
1343}
1344
1345void Assembler::jmp(Address adr) {
1346  InstructionMark im(this);
1347  prefix(adr);
1348  emit_byte(0xFF);
1349  emit_operand(rsp, adr);
1350}
1351
1352void Assembler::jmp(Label& L, relocInfo::relocType rtype) {
1353  if (L.is_bound()) {
1354    address entry = target(L);
1355    assert(entry != NULL, "jmp most probably wrong");
1356    InstructionMark im(this);
1357    const int short_size = 2;
1358    const int long_size = 5;
1359    intptr_t offs = entry - _code_pos;
1360    if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1361      emit_byte(0xEB);
1362      emit_byte((offs - short_size) & 0xFF);
1363    } else {
1364      emit_byte(0xE9);
1365      emit_long(offs - long_size);
1366    }
1367  } else {
1368    // By default, forward jumps are always 32-bit displacements, since
1369    // we can't yet know where the label will be bound.  If you're sure that
1370    // the forward jump will not run beyond 256 bytes, use jmpb to
1371    // force an 8-bit displacement.
1372    InstructionMark im(this);
1373    relocate(rtype);
1374    L.add_patch_at(code(), locator());
1375    emit_byte(0xE9);
1376    emit_long(0);
1377  }
1378}
1379
1380void Assembler::jmp(Register entry) {
1381  int encode = prefix_and_encode(entry->encoding());
1382  emit_byte(0xFF);
1383  emit_byte(0xE0 | encode);
1384}
1385
1386void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1387  InstructionMark im(this);
1388  emit_byte(0xE9);
1389  assert(dest != NULL, "must have a target");
1390  intptr_t disp = dest - (_code_pos + sizeof(int32_t));
1391  assert(is_simm32(disp), "must be 32bit offset (jmp)");
1392  emit_data(disp, rspec.reloc(), call32_operand);
1393}
1394
1395void Assembler::jmpb(Label& L) {
1396  if (L.is_bound()) {
1397    const int short_size = 2;
1398    address entry = target(L);
1399    assert(is8bit((entry - _code_pos) + short_size),
1400           "Dispacement too large for a short jmp");
1401    assert(entry != NULL, "jmp most probably wrong");
1402    intptr_t offs = entry - _code_pos;
1403    emit_byte(0xEB);
1404    emit_byte((offs - short_size) & 0xFF);
1405  } else {
1406    InstructionMark im(this);
1407    L.add_patch_at(code(), locator());
1408    emit_byte(0xEB);
1409    emit_byte(0);
1410  }
1411}
1412
1413void Assembler::ldmxcsr( Address src) {
1414  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1415  InstructionMark im(this);
1416  prefix(src);
1417  emit_byte(0x0F);
1418  emit_byte(0xAE);
1419  emit_operand(as_Register(2), src);
1420}
1421
1422void Assembler::leal(Register dst, Address src) {
1423  InstructionMark im(this);
1424#ifdef _LP64
1425  emit_byte(0x67); // addr32
1426  prefix(src, dst);
1427#endif // LP64
1428  emit_byte(0x8D);
1429  emit_operand(dst, src);
1430}
1431
1432void Assembler::lock() {
1433  if (Atomics & 1) {
1434     // Emit either nothing, a NOP, or a NOP: prefix
1435     emit_byte(0x90) ;
1436  } else {
1437     emit_byte(0xF0);
1438  }
1439}
1440
1441// Serializes memory.
1442void Assembler::mfence() {
1443    // Memory barriers are only needed on multiprocessors
1444  if (os::is_MP()) {
1445    if( LP64_ONLY(true ||) VM_Version::supports_sse2() ) {
1446      emit_byte( 0x0F );                // MFENCE; faster blows no regs
1447      emit_byte( 0xAE );
1448      emit_byte( 0xF0 );
1449    } else {
1450      // All usable chips support "locked" instructions which suffice
1451      // as barriers, and are much faster than the alternative of
1452      // using cpuid instruction. We use here a locked add [esp],0.
1453      // This is conveniently otherwise a no-op except for blowing
1454      // flags (which we save and restore.)
1455      pushf();                // Save eflags register
1456      lock();
1457      addl(Address(rsp, 0), 0);// Assert the lock# signal here
1458      popf();                 // Restore eflags register
1459    }
1460  }
1461}
1462
1463void Assembler::mov(Register dst, Register src) {
1464  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1465}
1466
1467void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1468  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1469  int dstenc = dst->encoding();
1470  int srcenc = src->encoding();
1471  emit_byte(0x66);
1472  if (dstenc < 8) {
1473    if (srcenc >= 8) {
1474      prefix(REX_B);
1475      srcenc -= 8;
1476    }
1477  } else {
1478    if (srcenc < 8) {
1479      prefix(REX_R);
1480    } else {
1481      prefix(REX_RB);
1482      srcenc -= 8;
1483    }
1484    dstenc -= 8;
1485  }
1486  emit_byte(0x0F);
1487  emit_byte(0x28);
1488  emit_byte(0xC0 | dstenc << 3 | srcenc);
1489}
1490
1491void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1492  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1493  int dstenc = dst->encoding();
1494  int srcenc = src->encoding();
1495  if (dstenc < 8) {
1496    if (srcenc >= 8) {
1497      prefix(REX_B);
1498      srcenc -= 8;
1499    }
1500  } else {
1501    if (srcenc < 8) {
1502      prefix(REX_R);
1503    } else {
1504      prefix(REX_RB);
1505      srcenc -= 8;
1506    }
1507    dstenc -= 8;
1508  }
1509  emit_byte(0x0F);
1510  emit_byte(0x28);
1511  emit_byte(0xC0 | dstenc << 3 | srcenc);
1512}
1513
1514void Assembler::movb(Register dst, Address src) {
1515  NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1516  InstructionMark im(this);
1517  prefix(src, dst, true);
1518  emit_byte(0x8A);
1519  emit_operand(dst, src);
1520}
1521
1522
1523void Assembler::movb(Address dst, int imm8) {
1524  InstructionMark im(this);
1525   prefix(dst);
1526  emit_byte(0xC6);
1527  emit_operand(rax, dst, 1);
1528  emit_byte(imm8);
1529}
1530
1531
1532void Assembler::movb(Address dst, Register src) {
1533  assert(src->has_byte_register(), "must have byte register");
1534  InstructionMark im(this);
1535  prefix(dst, src, true);
1536  emit_byte(0x88);
1537  emit_operand(src, dst);
1538}
1539
1540void Assembler::movdl(XMMRegister dst, Register src) {
1541  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1542  emit_byte(0x66);
1543  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1544  emit_byte(0x0F);
1545  emit_byte(0x6E);
1546  emit_byte(0xC0 | encode);
1547}
1548
1549void Assembler::movdl(Register dst, XMMRegister src) {
1550  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1551  emit_byte(0x66);
1552  // swap src/dst to get correct prefix
1553  int encode = prefix_and_encode(src->encoding(), dst->encoding());
1554  emit_byte(0x0F);
1555  emit_byte(0x7E);
1556  emit_byte(0xC0 | encode);
1557}
1558
1559void Assembler::movdqa(XMMRegister dst, Address src) {
1560  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1561  InstructionMark im(this);
1562  emit_byte(0x66);
1563  prefix(src, dst);
1564  emit_byte(0x0F);
1565  emit_byte(0x6F);
1566  emit_operand(dst, src);
1567}
1568
1569void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1570  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1571  emit_byte(0x66);
1572  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1573  emit_byte(0x0F);
1574  emit_byte(0x6F);
1575  emit_byte(0xC0 | encode);
1576}
1577
1578void Assembler::movdqa(Address dst, XMMRegister src) {
1579  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1580  InstructionMark im(this);
1581  emit_byte(0x66);
1582  prefix(dst, src);
1583  emit_byte(0x0F);
1584  emit_byte(0x7F);
1585  emit_operand(src, dst);
1586}
1587
1588void Assembler::movdqu(XMMRegister dst, Address src) {
1589  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1590  InstructionMark im(this);
1591  emit_byte(0xF3);
1592  prefix(src, dst);
1593  emit_byte(0x0F);
1594  emit_byte(0x6F);
1595  emit_operand(dst, src);
1596}
1597
1598void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1599  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1600  emit_byte(0xF3);
1601  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1602  emit_byte(0x0F);
1603  emit_byte(0x6F);
1604  emit_byte(0xC0 | encode);
1605}
1606
1607void Assembler::movdqu(Address dst, XMMRegister src) {
1608  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1609  InstructionMark im(this);
1610  emit_byte(0xF3);
1611  prefix(dst, src);
1612  emit_byte(0x0F);
1613  emit_byte(0x7F);
1614  emit_operand(src, dst);
1615}
1616
1617// Uses zero extension on 64bit
1618
1619void Assembler::movl(Register dst, int32_t imm32) {
1620  int encode = prefix_and_encode(dst->encoding());
1621  emit_byte(0xB8 | encode);
1622  emit_long(imm32);
1623}
1624
1625void Assembler::movl(Register dst, Register src) {
1626  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1627  emit_byte(0x8B);
1628  emit_byte(0xC0 | encode);
1629}
1630
1631void Assembler::movl(Register dst, Address src) {
1632  InstructionMark im(this);
1633  prefix(src, dst);
1634  emit_byte(0x8B);
1635  emit_operand(dst, src);
1636}
1637
1638void Assembler::movl(Address dst, int32_t imm32) {
1639  InstructionMark im(this);
1640  prefix(dst);
1641  emit_byte(0xC7);
1642  emit_operand(rax, dst, 4);
1643  emit_long(imm32);
1644}
1645
1646void Assembler::movl(Address dst, Register src) {
1647  InstructionMark im(this);
1648  prefix(dst, src);
1649  emit_byte(0x89);
1650  emit_operand(src, dst);
1651}
1652
1653// New cpus require to use movsd and movss to avoid partial register stall
1654// when loading from memory. But for old Opteron use movlpd instead of movsd.
1655// The selection is done in MacroAssembler::movdbl() and movflt().
1656void Assembler::movlpd(XMMRegister dst, Address src) {
1657  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1658  InstructionMark im(this);
1659  emit_byte(0x66);
1660  prefix(src, dst);
1661  emit_byte(0x0F);
1662  emit_byte(0x12);
1663  emit_operand(dst, src);
1664}
1665
1666void Assembler::movq( MMXRegister dst, Address src ) {
1667  assert( VM_Version::supports_mmx(), "" );
1668  emit_byte(0x0F);
1669  emit_byte(0x6F);
1670  emit_operand(dst, src);
1671}
1672
1673void Assembler::movq( Address dst, MMXRegister src ) {
1674  assert( VM_Version::supports_mmx(), "" );
1675  emit_byte(0x0F);
1676  emit_byte(0x7F);
1677  // workaround gcc (3.2.1-7a) bug
1678  // In that version of gcc with only an emit_operand(MMX, Address)
1679  // gcc will tail jump and try and reverse the parameters completely
1680  // obliterating dst in the process. By having a version available
1681  // that doesn't need to swap the args at the tail jump the bug is
1682  // avoided.
1683  emit_operand(dst, src);
1684}
1685
1686void Assembler::movq(XMMRegister dst, Address src) {
1687  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1688  InstructionMark im(this);
1689  emit_byte(0xF3);
1690  prefix(src, dst);
1691  emit_byte(0x0F);
1692  emit_byte(0x7E);
1693  emit_operand(dst, src);
1694}
1695
1696void Assembler::movq(Address dst, XMMRegister src) {
1697  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1698  InstructionMark im(this);
1699  emit_byte(0x66);
1700  prefix(dst, src);
1701  emit_byte(0x0F);
1702  emit_byte(0xD6);
1703  emit_operand(src, dst);
1704}
1705
1706void Assembler::movsbl(Register dst, Address src) { // movsxb
1707  InstructionMark im(this);
1708  prefix(src, dst);
1709  emit_byte(0x0F);
1710  emit_byte(0xBE);
1711  emit_operand(dst, src);
1712}
1713
1714void Assembler::movsbl(Register dst, Register src) { // movsxb
1715  NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1716  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1717  emit_byte(0x0F);
1718  emit_byte(0xBE);
1719  emit_byte(0xC0 | encode);
1720}
1721
1722void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1723  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1724  emit_byte(0xF2);
1725  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1726  emit_byte(0x0F);
1727  emit_byte(0x10);
1728  emit_byte(0xC0 | encode);
1729}
1730
1731void Assembler::movsd(XMMRegister dst, Address src) {
1732  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1733  InstructionMark im(this);
1734  emit_byte(0xF2);
1735  prefix(src, dst);
1736  emit_byte(0x0F);
1737  emit_byte(0x10);
1738  emit_operand(dst, src);
1739}
1740
1741void Assembler::movsd(Address dst, XMMRegister src) {
1742  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1743  InstructionMark im(this);
1744  emit_byte(0xF2);
1745  prefix(dst, src);
1746  emit_byte(0x0F);
1747  emit_byte(0x11);
1748  emit_operand(src, dst);
1749}
1750
1751void Assembler::movss(XMMRegister dst, XMMRegister src) {
1752  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1753  emit_byte(0xF3);
1754  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1755  emit_byte(0x0F);
1756  emit_byte(0x10);
1757  emit_byte(0xC0 | encode);
1758}
1759
1760void Assembler::movss(XMMRegister dst, Address src) {
1761  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1762  InstructionMark im(this);
1763  emit_byte(0xF3);
1764  prefix(src, dst);
1765  emit_byte(0x0F);
1766  emit_byte(0x10);
1767  emit_operand(dst, src);
1768}
1769
1770void Assembler::movss(Address dst, XMMRegister src) {
1771  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1772  InstructionMark im(this);
1773  emit_byte(0xF3);
1774  prefix(dst, src);
1775  emit_byte(0x0F);
1776  emit_byte(0x11);
1777  emit_operand(src, dst);
1778}
1779
1780void Assembler::movswl(Register dst, Address src) { // movsxw
1781  InstructionMark im(this);
1782  prefix(src, dst);
1783  emit_byte(0x0F);
1784  emit_byte(0xBF);
1785  emit_operand(dst, src);
1786}
1787
1788void Assembler::movswl(Register dst, Register src) { // movsxw
1789  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1790  emit_byte(0x0F);
1791  emit_byte(0xBF);
1792  emit_byte(0xC0 | encode);
1793}
1794
1795void Assembler::movw(Address dst, int imm16) {
1796  InstructionMark im(this);
1797
1798  emit_byte(0x66); // switch to 16-bit mode
1799  prefix(dst);
1800  emit_byte(0xC7);
1801  emit_operand(rax, dst, 2);
1802  emit_word(imm16);
1803}
1804
1805void Assembler::movw(Register dst, Address src) {
1806  InstructionMark im(this);
1807  emit_byte(0x66);
1808  prefix(src, dst);
1809  emit_byte(0x8B);
1810  emit_operand(dst, src);
1811}
1812
1813void Assembler::movw(Address dst, Register src) {
1814  InstructionMark im(this);
1815  emit_byte(0x66);
1816  prefix(dst, src);
1817  emit_byte(0x89);
1818  emit_operand(src, dst);
1819}
1820
1821void Assembler::movzbl(Register dst, Address src) { // movzxb
1822  InstructionMark im(this);
1823  prefix(src, dst);
1824  emit_byte(0x0F);
1825  emit_byte(0xB6);
1826  emit_operand(dst, src);
1827}
1828
1829void Assembler::movzbl(Register dst, Register src) { // movzxb
1830  NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1831  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1832  emit_byte(0x0F);
1833  emit_byte(0xB6);
1834  emit_byte(0xC0 | encode);
1835}
1836
1837void Assembler::movzwl(Register dst, Address src) { // movzxw
1838  InstructionMark im(this);
1839  prefix(src, dst);
1840  emit_byte(0x0F);
1841  emit_byte(0xB7);
1842  emit_operand(dst, src);
1843}
1844
1845void Assembler::movzwl(Register dst, Register src) { // movzxw
1846  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1847  emit_byte(0x0F);
1848  emit_byte(0xB7);
1849  emit_byte(0xC0 | encode);
1850}
1851
1852void Assembler::mull(Address src) {
1853  InstructionMark im(this);
1854  prefix(src);
1855  emit_byte(0xF7);
1856  emit_operand(rsp, src);
1857}
1858
1859void Assembler::mull(Register src) {
1860  int encode = prefix_and_encode(src->encoding());
1861  emit_byte(0xF7);
1862  emit_byte(0xE0 | encode);
1863}
1864
1865void Assembler::mulsd(XMMRegister dst, Address src) {
1866  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1867  InstructionMark im(this);
1868  emit_byte(0xF2);
1869  prefix(src, dst);
1870  emit_byte(0x0F);
1871  emit_byte(0x59);
1872  emit_operand(dst, src);
1873}
1874
1875void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
1876  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1877  emit_byte(0xF2);
1878  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1879  emit_byte(0x0F);
1880  emit_byte(0x59);
1881  emit_byte(0xC0 | encode);
1882}
1883
1884void Assembler::mulss(XMMRegister dst, Address src) {
1885  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1886  InstructionMark im(this);
1887  emit_byte(0xF3);
1888  prefix(src, dst);
1889  emit_byte(0x0F);
1890  emit_byte(0x59);
1891  emit_operand(dst, src);
1892}
1893
1894void Assembler::mulss(XMMRegister dst, XMMRegister src) {
1895  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1896  emit_byte(0xF3);
1897  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1898  emit_byte(0x0F);
1899  emit_byte(0x59);
1900  emit_byte(0xC0 | encode);
1901}
1902
1903void Assembler::negl(Register dst) {
1904  int encode = prefix_and_encode(dst->encoding());
1905  emit_byte(0xF7);
1906  emit_byte(0xD8 | encode);
1907}
1908
1909void Assembler::nop(int i) {
1910#ifdef ASSERT
1911  assert(i > 0, " ");
1912  // The fancy nops aren't currently recognized by debuggers making it a
1913  // pain to disassemble code while debugging. If asserts are on clearly
1914  // speed is not an issue so simply use the single byte traditional nop
1915  // to do alignment.
1916
1917  for (; i > 0 ; i--) emit_byte(0x90);
1918  return;
1919
1920#endif // ASSERT
1921
1922  if (UseAddressNop && VM_Version::is_intel()) {
1923    //
1924    // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
1925    //  1: 0x90
1926    //  2: 0x66 0x90
1927    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1928    //  4: 0x0F 0x1F 0x40 0x00
1929    //  5: 0x0F 0x1F 0x44 0x00 0x00
1930    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1931    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1932    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1933    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1934    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1935    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1936
1937    // The rest coding is Intel specific - don't use consecutive address nops
1938
1939    // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1940    // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1941    // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1942    // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1943
1944    while(i >= 15) {
1945      // For Intel don't generate consecutive addess nops (mix with regular nops)
1946      i -= 15;
1947      emit_byte(0x66);   // size prefix
1948      emit_byte(0x66);   // size prefix
1949      emit_byte(0x66);   // size prefix
1950      addr_nop_8();
1951      emit_byte(0x66);   // size prefix
1952      emit_byte(0x66);   // size prefix
1953      emit_byte(0x66);   // size prefix
1954      emit_byte(0x90);   // nop
1955    }
1956    switch (i) {
1957      case 14:
1958        emit_byte(0x66); // size prefix
1959      case 13:
1960        emit_byte(0x66); // size prefix
1961      case 12:
1962        addr_nop_8();
1963        emit_byte(0x66); // size prefix
1964        emit_byte(0x66); // size prefix
1965        emit_byte(0x66); // size prefix
1966        emit_byte(0x90); // nop
1967        break;
1968      case 11:
1969        emit_byte(0x66); // size prefix
1970      case 10:
1971        emit_byte(0x66); // size prefix
1972      case 9:
1973        emit_byte(0x66); // size prefix
1974      case 8:
1975        addr_nop_8();
1976        break;
1977      case 7:
1978        addr_nop_7();
1979        break;
1980      case 6:
1981        emit_byte(0x66); // size prefix
1982      case 5:
1983        addr_nop_5();
1984        break;
1985      case 4:
1986        addr_nop_4();
1987        break;
1988      case 3:
1989        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
1990        emit_byte(0x66); // size prefix
1991      case 2:
1992        emit_byte(0x66); // size prefix
1993      case 1:
1994        emit_byte(0x90); // nop
1995        break;
1996      default:
1997        assert(i == 0, " ");
1998    }
1999    return;
2000  }
2001  if (UseAddressNop && VM_Version::is_amd()) {
2002    //
2003    // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2004    //  1: 0x90
2005    //  2: 0x66 0x90
2006    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2007    //  4: 0x0F 0x1F 0x40 0x00
2008    //  5: 0x0F 0x1F 0x44 0x00 0x00
2009    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2010    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2011    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2012    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2013    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2014    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2015
2016    // The rest coding is AMD specific - use consecutive address nops
2017
2018    // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2019    // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2020    // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2021    // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2022    // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2023    //     Size prefixes (0x66) are added for larger sizes
2024
2025    while(i >= 22) {
2026      i -= 11;
2027      emit_byte(0x66); // size prefix
2028      emit_byte(0x66); // size prefix
2029      emit_byte(0x66); // size prefix
2030      addr_nop_8();
2031    }
2032    // Generate first nop for size between 21-12
2033    switch (i) {
2034      case 21:
2035        i -= 1;
2036        emit_byte(0x66); // size prefix
2037      case 20:
2038      case 19:
2039        i -= 1;
2040        emit_byte(0x66); // size prefix
2041      case 18:
2042      case 17:
2043        i -= 1;
2044        emit_byte(0x66); // size prefix
2045      case 16:
2046      case 15:
2047        i -= 8;
2048        addr_nop_8();
2049        break;
2050      case 14:
2051      case 13:
2052        i -= 7;
2053        addr_nop_7();
2054        break;
2055      case 12:
2056        i -= 6;
2057        emit_byte(0x66); // size prefix
2058        addr_nop_5();
2059        break;
2060      default:
2061        assert(i < 12, " ");
2062    }
2063
2064    // Generate second nop for size between 11-1
2065    switch (i) {
2066      case 11:
2067        emit_byte(0x66); // size prefix
2068      case 10:
2069        emit_byte(0x66); // size prefix
2070      case 9:
2071        emit_byte(0x66); // size prefix
2072      case 8:
2073        addr_nop_8();
2074        break;
2075      case 7:
2076        addr_nop_7();
2077        break;
2078      case 6:
2079        emit_byte(0x66); // size prefix
2080      case 5:
2081        addr_nop_5();
2082        break;
2083      case 4:
2084        addr_nop_4();
2085        break;
2086      case 3:
2087        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2088        emit_byte(0x66); // size prefix
2089      case 2:
2090        emit_byte(0x66); // size prefix
2091      case 1:
2092        emit_byte(0x90); // nop
2093        break;
2094      default:
2095        assert(i == 0, " ");
2096    }
2097    return;
2098  }
2099
2100  // Using nops with size prefixes "0x66 0x90".
2101  // From AMD Optimization Guide:
2102  //  1: 0x90
2103  //  2: 0x66 0x90
2104  //  3: 0x66 0x66 0x90
2105  //  4: 0x66 0x66 0x66 0x90
2106  //  5: 0x66 0x66 0x90 0x66 0x90
2107  //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2108  //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2109  //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2110  //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2111  // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2112  //
2113  while(i > 12) {
2114    i -= 4;
2115    emit_byte(0x66); // size prefix
2116    emit_byte(0x66);
2117    emit_byte(0x66);
2118    emit_byte(0x90); // nop
2119  }
2120  // 1 - 12 nops
2121  if(i > 8) {
2122    if(i > 9) {
2123      i -= 1;
2124      emit_byte(0x66);
2125    }
2126    i -= 3;
2127    emit_byte(0x66);
2128    emit_byte(0x66);
2129    emit_byte(0x90);
2130  }
2131  // 1 - 8 nops
2132  if(i > 4) {
2133    if(i > 6) {
2134      i -= 1;
2135      emit_byte(0x66);
2136    }
2137    i -= 3;
2138    emit_byte(0x66);
2139    emit_byte(0x66);
2140    emit_byte(0x90);
2141  }
2142  switch (i) {
2143    case 4:
2144      emit_byte(0x66);
2145    case 3:
2146      emit_byte(0x66);
2147    case 2:
2148      emit_byte(0x66);
2149    case 1:
2150      emit_byte(0x90);
2151      break;
2152    default:
2153      assert(i == 0, " ");
2154  }
2155}
2156
2157void Assembler::notl(Register dst) {
2158  int encode = prefix_and_encode(dst->encoding());
2159  emit_byte(0xF7);
2160  emit_byte(0xD0 | encode );
2161}
2162
2163void Assembler::orl(Address dst, int32_t imm32) {
2164  InstructionMark im(this);
2165  prefix(dst);
2166  emit_byte(0x81);
2167  emit_operand(rcx, dst, 4);
2168  emit_long(imm32);
2169}
2170
2171void Assembler::orl(Register dst, int32_t imm32) {
2172  prefix(dst);
2173  emit_arith(0x81, 0xC8, dst, imm32);
2174}
2175
2176
2177void Assembler::orl(Register dst, Address src) {
2178  InstructionMark im(this);
2179  prefix(src, dst);
2180  emit_byte(0x0B);
2181  emit_operand(dst, src);
2182}
2183
2184
2185void Assembler::orl(Register dst, Register src) {
2186  (void) prefix_and_encode(dst->encoding(), src->encoding());
2187  emit_arith(0x0B, 0xC0, dst, src);
2188}
2189
2190// generic
2191void Assembler::pop(Register dst) {
2192  int encode = prefix_and_encode(dst->encoding());
2193  emit_byte(0x58 | encode);
2194}
2195
2196void Assembler::popcntl(Register dst, Address src) {
2197  assert(VM_Version::supports_popcnt(), "must support");
2198  InstructionMark im(this);
2199  emit_byte(0xF3);
2200  prefix(src, dst);
2201  emit_byte(0x0F);
2202  emit_byte(0xB8);
2203  emit_operand(dst, src);
2204}
2205
2206void Assembler::popcntl(Register dst, Register src) {
2207  assert(VM_Version::supports_popcnt(), "must support");
2208  emit_byte(0xF3);
2209  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2210  emit_byte(0x0F);
2211  emit_byte(0xB8);
2212  emit_byte(0xC0 | encode);
2213}
2214
2215void Assembler::popf() {
2216  emit_byte(0x9D);
2217}
2218
2219void Assembler::popl(Address dst) {
2220  // NOTE: this will adjust stack by 8byte on 64bits
2221  InstructionMark im(this);
2222  prefix(dst);
2223  emit_byte(0x8F);
2224  emit_operand(rax, dst);
2225}
2226
2227void Assembler::prefetch_prefix(Address src) {
2228  prefix(src);
2229  emit_byte(0x0F);
2230}
2231
2232void Assembler::prefetchnta(Address src) {
2233  NOT_LP64(assert(VM_Version::supports_sse2(), "must support"));
2234  InstructionMark im(this);
2235  prefetch_prefix(src);
2236  emit_byte(0x18);
2237  emit_operand(rax, src); // 0, src
2238}
2239
2240void Assembler::prefetchr(Address src) {
2241  NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2242  InstructionMark im(this);
2243  prefetch_prefix(src);
2244  emit_byte(0x0D);
2245  emit_operand(rax, src); // 0, src
2246}
2247
2248void Assembler::prefetcht0(Address src) {
2249  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2250  InstructionMark im(this);
2251  prefetch_prefix(src);
2252  emit_byte(0x18);
2253  emit_operand(rcx, src); // 1, src
2254}
2255
2256void Assembler::prefetcht1(Address src) {
2257  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2258  InstructionMark im(this);
2259  prefetch_prefix(src);
2260  emit_byte(0x18);
2261  emit_operand(rdx, src); // 2, src
2262}
2263
2264void Assembler::prefetcht2(Address src) {
2265  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2266  InstructionMark im(this);
2267  prefetch_prefix(src);
2268  emit_byte(0x18);
2269  emit_operand(rbx, src); // 3, src
2270}
2271
2272void Assembler::prefetchw(Address src) {
2273  NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2274  InstructionMark im(this);
2275  prefetch_prefix(src);
2276  emit_byte(0x0D);
2277  emit_operand(rcx, src); // 1, src
2278}
2279
2280void Assembler::prefix(Prefix p) {
2281  a_byte(p);
2282}
2283
2284void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2285  assert(isByte(mode), "invalid value");
2286  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2287
2288  emit_byte(0x66);
2289  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2290  emit_byte(0x0F);
2291  emit_byte(0x70);
2292  emit_byte(0xC0 | encode);
2293  emit_byte(mode & 0xFF);
2294
2295}
2296
2297void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2298  assert(isByte(mode), "invalid value");
2299  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2300
2301  InstructionMark im(this);
2302  emit_byte(0x66);
2303  prefix(src, dst);
2304  emit_byte(0x0F);
2305  emit_byte(0x70);
2306  emit_operand(dst, src);
2307  emit_byte(mode & 0xFF);
2308}
2309
2310void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2311  assert(isByte(mode), "invalid value");
2312  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2313
2314  emit_byte(0xF2);
2315  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2316  emit_byte(0x0F);
2317  emit_byte(0x70);
2318  emit_byte(0xC0 | encode);
2319  emit_byte(mode & 0xFF);
2320}
2321
2322void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2323  assert(isByte(mode), "invalid value");
2324  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2325
2326  InstructionMark im(this);
2327  emit_byte(0xF2);
2328  prefix(src, dst); // QQ new
2329  emit_byte(0x0F);
2330  emit_byte(0x70);
2331  emit_operand(dst, src);
2332  emit_byte(mode & 0xFF);
2333}
2334
2335void Assembler::psrlq(XMMRegister dst, int shift) {
2336  // HMM Table D-1 says sse2 or mmx
2337  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2338
2339  int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
2340  emit_byte(0x66);
2341  emit_byte(0x0F);
2342  emit_byte(0x73);
2343  emit_byte(0xC0 | encode);
2344  emit_byte(shift);
2345}
2346
2347void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2348  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2349  emit_byte(0x66);
2350  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2351  emit_byte(0x0F);
2352  emit_byte(0x60);
2353  emit_byte(0xC0 | encode);
2354}
2355
2356void Assembler::push(int32_t imm32) {
2357  // in 64bits we push 64bits onto the stack but only
2358  // take a 32bit immediate
2359  emit_byte(0x68);
2360  emit_long(imm32);
2361}
2362
2363void Assembler::push(Register src) {
2364  int encode = prefix_and_encode(src->encoding());
2365
2366  emit_byte(0x50 | encode);
2367}
2368
2369void Assembler::pushf() {
2370  emit_byte(0x9C);
2371}
2372
2373void Assembler::pushl(Address src) {
2374  // Note this will push 64bit on 64bit
2375  InstructionMark im(this);
2376  prefix(src);
2377  emit_byte(0xFF);
2378  emit_operand(rsi, src);
2379}
2380
2381void Assembler::pxor(XMMRegister dst, Address src) {
2382  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2383  InstructionMark im(this);
2384  emit_byte(0x66);
2385  prefix(src, dst);
2386  emit_byte(0x0F);
2387  emit_byte(0xEF);
2388  emit_operand(dst, src);
2389}
2390
2391void Assembler::pxor(XMMRegister dst, XMMRegister src) {
2392  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2393  InstructionMark im(this);
2394  emit_byte(0x66);
2395  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2396  emit_byte(0x0F);
2397  emit_byte(0xEF);
2398  emit_byte(0xC0 | encode);
2399}
2400
2401void Assembler::rcll(Register dst, int imm8) {
2402  assert(isShiftCount(imm8), "illegal shift count");
2403  int encode = prefix_and_encode(dst->encoding());
2404  if (imm8 == 1) {
2405    emit_byte(0xD1);
2406    emit_byte(0xD0 | encode);
2407  } else {
2408    emit_byte(0xC1);
2409    emit_byte(0xD0 | encode);
2410    emit_byte(imm8);
2411  }
2412}
2413
2414// copies data from [esi] to [edi] using rcx pointer sized words
2415// generic
2416void Assembler::rep_mov() {
2417  emit_byte(0xF3);
2418  // MOVSQ
2419  LP64_ONLY(prefix(REX_W));
2420  emit_byte(0xA5);
2421}
2422
2423// sets rcx pointer sized words with rax, value at [edi]
2424// generic
2425void Assembler::rep_set() { // rep_set
2426  emit_byte(0xF3);
2427  // STOSQ
2428  LP64_ONLY(prefix(REX_W));
2429  emit_byte(0xAB);
2430}
2431
2432// scans rcx pointer sized words at [edi] for occurance of rax,
2433// generic
2434void Assembler::repne_scan() { // repne_scan
2435  emit_byte(0xF2);
2436  // SCASQ
2437  LP64_ONLY(prefix(REX_W));
2438  emit_byte(0xAF);
2439}
2440
2441#ifdef _LP64
2442// scans rcx 4 byte words at [edi] for occurance of rax,
2443// generic
2444void Assembler::repne_scanl() { // repne_scan
2445  emit_byte(0xF2);
2446  // SCASL
2447  emit_byte(0xAF);
2448}
2449#endif
2450
2451void Assembler::ret(int imm16) {
2452  if (imm16 == 0) {
2453    emit_byte(0xC3);
2454  } else {
2455    emit_byte(0xC2);
2456    emit_word(imm16);
2457  }
2458}
2459
2460void Assembler::sahf() {
2461#ifdef _LP64
2462  // Not supported in 64bit mode
2463  ShouldNotReachHere();
2464#endif
2465  emit_byte(0x9E);
2466}
2467
2468void Assembler::sarl(Register dst, int imm8) {
2469  int encode = prefix_and_encode(dst->encoding());
2470  assert(isShiftCount(imm8), "illegal shift count");
2471  if (imm8 == 1) {
2472    emit_byte(0xD1);
2473    emit_byte(0xF8 | encode);
2474  } else {
2475    emit_byte(0xC1);
2476    emit_byte(0xF8 | encode);
2477    emit_byte(imm8);
2478  }
2479}
2480
2481void Assembler::sarl(Register dst) {
2482  int encode = prefix_and_encode(dst->encoding());
2483  emit_byte(0xD3);
2484  emit_byte(0xF8 | encode);
2485}
2486
2487void Assembler::sbbl(Address dst, int32_t imm32) {
2488  InstructionMark im(this);
2489  prefix(dst);
2490  emit_arith_operand(0x81, rbx, dst, imm32);
2491}
2492
2493void Assembler::sbbl(Register dst, int32_t imm32) {
2494  prefix(dst);
2495  emit_arith(0x81, 0xD8, dst, imm32);
2496}
2497
2498
2499void Assembler::sbbl(Register dst, Address src) {
2500  InstructionMark im(this);
2501  prefix(src, dst);
2502  emit_byte(0x1B);
2503  emit_operand(dst, src);
2504}
2505
2506void Assembler::sbbl(Register dst, Register src) {
2507  (void) prefix_and_encode(dst->encoding(), src->encoding());
2508  emit_arith(0x1B, 0xC0, dst, src);
2509}
2510
2511void Assembler::setb(Condition cc, Register dst) {
2512  assert(0 <= cc && cc < 16, "illegal cc");
2513  int encode = prefix_and_encode(dst->encoding(), true);
2514  emit_byte(0x0F);
2515  emit_byte(0x90 | cc);
2516  emit_byte(0xC0 | encode);
2517}
2518
2519void Assembler::shll(Register dst, int imm8) {
2520  assert(isShiftCount(imm8), "illegal shift count");
2521  int encode = prefix_and_encode(dst->encoding());
2522  if (imm8 == 1 ) {
2523    emit_byte(0xD1);
2524    emit_byte(0xE0 | encode);
2525  } else {
2526    emit_byte(0xC1);
2527    emit_byte(0xE0 | encode);
2528    emit_byte(imm8);
2529  }
2530}
2531
2532void Assembler::shll(Register dst) {
2533  int encode = prefix_and_encode(dst->encoding());
2534  emit_byte(0xD3);
2535  emit_byte(0xE0 | encode);
2536}
2537
2538void Assembler::shrl(Register dst, int imm8) {
2539  assert(isShiftCount(imm8), "illegal shift count");
2540  int encode = prefix_and_encode(dst->encoding());
2541  emit_byte(0xC1);
2542  emit_byte(0xE8 | encode);
2543  emit_byte(imm8);
2544}
2545
2546void Assembler::shrl(Register dst) {
2547  int encode = prefix_and_encode(dst->encoding());
2548  emit_byte(0xD3);
2549  emit_byte(0xE8 | encode);
2550}
2551
2552// copies a single word from [esi] to [edi]
2553void Assembler::smovl() {
2554  emit_byte(0xA5);
2555}
2556
2557void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2558  // HMM Table D-1 says sse2
2559  // NOT_LP64(assert(VM_Version::supports_sse(), ""));
2560  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2561  emit_byte(0xF2);
2562  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2563  emit_byte(0x0F);
2564  emit_byte(0x51);
2565  emit_byte(0xC0 | encode);
2566}
2567
2568void Assembler::stmxcsr( Address dst) {
2569  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2570  InstructionMark im(this);
2571  prefix(dst);
2572  emit_byte(0x0F);
2573  emit_byte(0xAE);
2574  emit_operand(as_Register(3), dst);
2575}
2576
2577void Assembler::subl(Address dst, int32_t imm32) {
2578  InstructionMark im(this);
2579  prefix(dst);
2580  if (is8bit(imm32)) {
2581    emit_byte(0x83);
2582    emit_operand(rbp, dst, 1);
2583    emit_byte(imm32 & 0xFF);
2584  } else {
2585    emit_byte(0x81);
2586    emit_operand(rbp, dst, 4);
2587    emit_long(imm32);
2588  }
2589}
2590
2591void Assembler::subl(Register dst, int32_t imm32) {
2592  prefix(dst);
2593  emit_arith(0x81, 0xE8, dst, imm32);
2594}
2595
2596void Assembler::subl(Address dst, Register src) {
2597  InstructionMark im(this);
2598  prefix(dst, src);
2599  emit_byte(0x29);
2600  emit_operand(src, dst);
2601}
2602
2603void Assembler::subl(Register dst, Address src) {
2604  InstructionMark im(this);
2605  prefix(src, dst);
2606  emit_byte(0x2B);
2607  emit_operand(dst, src);
2608}
2609
2610void Assembler::subl(Register dst, Register src) {
2611  (void) prefix_and_encode(dst->encoding(), src->encoding());
2612  emit_arith(0x2B, 0xC0, dst, src);
2613}
2614
2615void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2616  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2617  emit_byte(0xF2);
2618  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2619  emit_byte(0x0F);
2620  emit_byte(0x5C);
2621  emit_byte(0xC0 | encode);
2622}
2623
2624void Assembler::subsd(XMMRegister dst, Address src) {
2625  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2626  InstructionMark im(this);
2627  emit_byte(0xF2);
2628  prefix(src, dst);
2629  emit_byte(0x0F);
2630  emit_byte(0x5C);
2631  emit_operand(dst, src);
2632}
2633
2634void Assembler::subss(XMMRegister dst, XMMRegister src) {
2635  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2636  emit_byte(0xF3);
2637  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2638  emit_byte(0x0F);
2639  emit_byte(0x5C);
2640  emit_byte(0xC0 | encode);
2641}
2642
2643void Assembler::subss(XMMRegister dst, Address src) {
2644  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2645  InstructionMark im(this);
2646  emit_byte(0xF3);
2647  prefix(src, dst);
2648  emit_byte(0x0F);
2649  emit_byte(0x5C);
2650  emit_operand(dst, src);
2651}
2652
2653void Assembler::testb(Register dst, int imm8) {
2654  NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2655  (void) prefix_and_encode(dst->encoding(), true);
2656  emit_arith_b(0xF6, 0xC0, dst, imm8);
2657}
2658
2659void Assembler::testl(Register dst, int32_t imm32) {
2660  // not using emit_arith because test
2661  // doesn't support sign-extension of
2662  // 8bit operands
2663  int encode = dst->encoding();
2664  if (encode == 0) {
2665    emit_byte(0xA9);
2666  } else {
2667    encode = prefix_and_encode(encode);
2668    emit_byte(0xF7);
2669    emit_byte(0xC0 | encode);
2670  }
2671  emit_long(imm32);
2672}
2673
2674void Assembler::testl(Register dst, Register src) {
2675  (void) prefix_and_encode(dst->encoding(), src->encoding());
2676  emit_arith(0x85, 0xC0, dst, src);
2677}
2678
2679void Assembler::testl(Register dst, Address  src) {
2680  InstructionMark im(this);
2681  prefix(src, dst);
2682  emit_byte(0x85);
2683  emit_operand(dst, src);
2684}
2685
2686void Assembler::ucomisd(XMMRegister dst, Address src) {
2687  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2688  emit_byte(0x66);
2689  ucomiss(dst, src);
2690}
2691
2692void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2693  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2694  emit_byte(0x66);
2695  ucomiss(dst, src);
2696}
2697
2698void Assembler::ucomiss(XMMRegister dst, Address src) {
2699  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2700
2701  InstructionMark im(this);
2702  prefix(src, dst);
2703  emit_byte(0x0F);
2704  emit_byte(0x2E);
2705  emit_operand(dst, src);
2706}
2707
2708void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2709  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2710  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2711  emit_byte(0x0F);
2712  emit_byte(0x2E);
2713  emit_byte(0xC0 | encode);
2714}
2715
2716
2717void Assembler::xaddl(Address dst, Register src) {
2718  InstructionMark im(this);
2719  prefix(dst, src);
2720  emit_byte(0x0F);
2721  emit_byte(0xC1);
2722  emit_operand(src, dst);
2723}
2724
2725void Assembler::xchgl(Register dst, Address src) { // xchg
2726  InstructionMark im(this);
2727  prefix(src, dst);
2728  emit_byte(0x87);
2729  emit_operand(dst, src);
2730}
2731
2732void Assembler::xchgl(Register dst, Register src) {
2733  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2734  emit_byte(0x87);
2735  emit_byte(0xc0 | encode);
2736}
2737
2738void Assembler::xorl(Register dst, int32_t imm32) {
2739  prefix(dst);
2740  emit_arith(0x81, 0xF0, dst, imm32);
2741}
2742
2743void Assembler::xorl(Register dst, Address src) {
2744  InstructionMark im(this);
2745  prefix(src, dst);
2746  emit_byte(0x33);
2747  emit_operand(dst, src);
2748}
2749
2750void Assembler::xorl(Register dst, Register src) {
2751  (void) prefix_and_encode(dst->encoding(), src->encoding());
2752  emit_arith(0x33, 0xC0, dst, src);
2753}
2754
2755void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
2756  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2757  emit_byte(0x66);
2758  xorps(dst, src);
2759}
2760
2761void Assembler::xorpd(XMMRegister dst, Address src) {
2762  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2763  InstructionMark im(this);
2764  emit_byte(0x66);
2765  prefix(src, dst);
2766  emit_byte(0x0F);
2767  emit_byte(0x57);
2768  emit_operand(dst, src);
2769}
2770
2771
2772void Assembler::xorps(XMMRegister dst, XMMRegister src) {
2773  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2774  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2775  emit_byte(0x0F);
2776  emit_byte(0x57);
2777  emit_byte(0xC0 | encode);
2778}
2779
2780void Assembler::xorps(XMMRegister dst, Address src) {
2781  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2782  InstructionMark im(this);
2783  prefix(src, dst);
2784  emit_byte(0x0F);
2785  emit_byte(0x57);
2786  emit_operand(dst, src);
2787}
2788
2789#ifndef _LP64
2790// 32bit only pieces of the assembler
2791
2792void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
2793  // NO PREFIX AS NEVER 64BIT
2794  InstructionMark im(this);
2795  emit_byte(0x81);
2796  emit_byte(0xF8 | src1->encoding());
2797  emit_data(imm32, rspec, 0);
2798}
2799
2800void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
2801  // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
2802  InstructionMark im(this);
2803  emit_byte(0x81);
2804  emit_operand(rdi, src1);
2805  emit_data(imm32, rspec, 0);
2806}
2807
2808// The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
2809// and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
2810// into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
2811void Assembler::cmpxchg8(Address adr) {
2812  InstructionMark im(this);
2813  emit_byte(0x0F);
2814  emit_byte(0xc7);
2815  emit_operand(rcx, adr);
2816}
2817
2818void Assembler::decl(Register dst) {
2819  // Don't use it directly. Use MacroAssembler::decrementl() instead.
2820 emit_byte(0x48 | dst->encoding());
2821}
2822
2823#endif // _LP64
2824
2825// 64bit typically doesn't use the x87 but needs to for the trig funcs
2826
2827void Assembler::fabs() {
2828  emit_byte(0xD9);
2829  emit_byte(0xE1);
2830}
2831
2832void Assembler::fadd(int i) {
2833  emit_farith(0xD8, 0xC0, i);
2834}
2835
2836void Assembler::fadd_d(Address src) {
2837  InstructionMark im(this);
2838  emit_byte(0xDC);
2839  emit_operand32(rax, src);
2840}
2841
2842void Assembler::fadd_s(Address src) {
2843  InstructionMark im(this);
2844  emit_byte(0xD8);
2845  emit_operand32(rax, src);
2846}
2847
2848void Assembler::fadda(int i) {
2849  emit_farith(0xDC, 0xC0, i);
2850}
2851
2852void Assembler::faddp(int i) {
2853  emit_farith(0xDE, 0xC0, i);
2854}
2855
2856void Assembler::fchs() {
2857  emit_byte(0xD9);
2858  emit_byte(0xE0);
2859}
2860
2861void Assembler::fcom(int i) {
2862  emit_farith(0xD8, 0xD0, i);
2863}
2864
2865void Assembler::fcomp(int i) {
2866  emit_farith(0xD8, 0xD8, i);
2867}
2868
2869void Assembler::fcomp_d(Address src) {
2870  InstructionMark im(this);
2871  emit_byte(0xDC);
2872  emit_operand32(rbx, src);
2873}
2874
2875void Assembler::fcomp_s(Address src) {
2876  InstructionMark im(this);
2877  emit_byte(0xD8);
2878  emit_operand32(rbx, src);
2879}
2880
2881void Assembler::fcompp() {
2882  emit_byte(0xDE);
2883  emit_byte(0xD9);
2884}
2885
2886void Assembler::fcos() {
2887  emit_byte(0xD9);
2888  emit_byte(0xFF);
2889}
2890
2891void Assembler::fdecstp() {
2892  emit_byte(0xD9);
2893  emit_byte(0xF6);
2894}
2895
2896void Assembler::fdiv(int i) {
2897  emit_farith(0xD8, 0xF0, i);
2898}
2899
2900void Assembler::fdiv_d(Address src) {
2901  InstructionMark im(this);
2902  emit_byte(0xDC);
2903  emit_operand32(rsi, src);
2904}
2905
2906void Assembler::fdiv_s(Address src) {
2907  InstructionMark im(this);
2908  emit_byte(0xD8);
2909  emit_operand32(rsi, src);
2910}
2911
2912void Assembler::fdiva(int i) {
2913  emit_farith(0xDC, 0xF8, i);
2914}
2915
2916// Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
2917//       is erroneous for some of the floating-point instructions below.
2918
2919void Assembler::fdivp(int i) {
2920  emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
2921}
2922
2923void Assembler::fdivr(int i) {
2924  emit_farith(0xD8, 0xF8, i);
2925}
2926
2927void Assembler::fdivr_d(Address src) {
2928  InstructionMark im(this);
2929  emit_byte(0xDC);
2930  emit_operand32(rdi, src);
2931}
2932
2933void Assembler::fdivr_s(Address src) {
2934  InstructionMark im(this);
2935  emit_byte(0xD8);
2936  emit_operand32(rdi, src);
2937}
2938
2939void Assembler::fdivra(int i) {
2940  emit_farith(0xDC, 0xF0, i);
2941}
2942
2943void Assembler::fdivrp(int i) {
2944  emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
2945}
2946
2947void Assembler::ffree(int i) {
2948  emit_farith(0xDD, 0xC0, i);
2949}
2950
2951void Assembler::fild_d(Address adr) {
2952  InstructionMark im(this);
2953  emit_byte(0xDF);
2954  emit_operand32(rbp, adr);
2955}
2956
2957void Assembler::fild_s(Address adr) {
2958  InstructionMark im(this);
2959  emit_byte(0xDB);
2960  emit_operand32(rax, adr);
2961}
2962
2963void Assembler::fincstp() {
2964  emit_byte(0xD9);
2965  emit_byte(0xF7);
2966}
2967
2968void Assembler::finit() {
2969  emit_byte(0x9B);
2970  emit_byte(0xDB);
2971  emit_byte(0xE3);
2972}
2973
2974void Assembler::fist_s(Address adr) {
2975  InstructionMark im(this);
2976  emit_byte(0xDB);
2977  emit_operand32(rdx, adr);
2978}
2979
2980void Assembler::fistp_d(Address adr) {
2981  InstructionMark im(this);
2982  emit_byte(0xDF);
2983  emit_operand32(rdi, adr);
2984}
2985
2986void Assembler::fistp_s(Address adr) {
2987  InstructionMark im(this);
2988  emit_byte(0xDB);
2989  emit_operand32(rbx, adr);
2990}
2991
2992void Assembler::fld1() {
2993  emit_byte(0xD9);
2994  emit_byte(0xE8);
2995}
2996
2997void Assembler::fld_d(Address adr) {
2998  InstructionMark im(this);
2999  emit_byte(0xDD);
3000  emit_operand32(rax, adr);
3001}
3002
3003void Assembler::fld_s(Address adr) {
3004  InstructionMark im(this);
3005  emit_byte(0xD9);
3006  emit_operand32(rax, adr);
3007}
3008
3009
3010void Assembler::fld_s(int index) {
3011  emit_farith(0xD9, 0xC0, index);
3012}
3013
3014void Assembler::fld_x(Address adr) {
3015  InstructionMark im(this);
3016  emit_byte(0xDB);
3017  emit_operand32(rbp, adr);
3018}
3019
3020void Assembler::fldcw(Address src) {
3021  InstructionMark im(this);
3022  emit_byte(0xd9);
3023  emit_operand32(rbp, src);
3024}
3025
3026void Assembler::fldenv(Address src) {
3027  InstructionMark im(this);
3028  emit_byte(0xD9);
3029  emit_operand32(rsp, src);
3030}
3031
3032void Assembler::fldlg2() {
3033  emit_byte(0xD9);
3034  emit_byte(0xEC);
3035}
3036
3037void Assembler::fldln2() {
3038  emit_byte(0xD9);
3039  emit_byte(0xED);
3040}
3041
3042void Assembler::fldz() {
3043  emit_byte(0xD9);
3044  emit_byte(0xEE);
3045}
3046
3047void Assembler::flog() {
3048  fldln2();
3049  fxch();
3050  fyl2x();
3051}
3052
3053void Assembler::flog10() {
3054  fldlg2();
3055  fxch();
3056  fyl2x();
3057}
3058
3059void Assembler::fmul(int i) {
3060  emit_farith(0xD8, 0xC8, i);
3061}
3062
3063void Assembler::fmul_d(Address src) {
3064  InstructionMark im(this);
3065  emit_byte(0xDC);
3066  emit_operand32(rcx, src);
3067}
3068
3069void Assembler::fmul_s(Address src) {
3070  InstructionMark im(this);
3071  emit_byte(0xD8);
3072  emit_operand32(rcx, src);
3073}
3074
3075void Assembler::fmula(int i) {
3076  emit_farith(0xDC, 0xC8, i);
3077}
3078
3079void Assembler::fmulp(int i) {
3080  emit_farith(0xDE, 0xC8, i);
3081}
3082
3083void Assembler::fnsave(Address dst) {
3084  InstructionMark im(this);
3085  emit_byte(0xDD);
3086  emit_operand32(rsi, dst);
3087}
3088
3089void Assembler::fnstcw(Address src) {
3090  InstructionMark im(this);
3091  emit_byte(0x9B);
3092  emit_byte(0xD9);
3093  emit_operand32(rdi, src);
3094}
3095
3096void Assembler::fnstsw_ax() {
3097  emit_byte(0xdF);
3098  emit_byte(0xE0);
3099}
3100
3101void Assembler::fprem() {
3102  emit_byte(0xD9);
3103  emit_byte(0xF8);
3104}
3105
3106void Assembler::fprem1() {
3107  emit_byte(0xD9);
3108  emit_byte(0xF5);
3109}
3110
3111void Assembler::frstor(Address src) {
3112  InstructionMark im(this);
3113  emit_byte(0xDD);
3114  emit_operand32(rsp, src);
3115}
3116
3117void Assembler::fsin() {
3118  emit_byte(0xD9);
3119  emit_byte(0xFE);
3120}
3121
3122void Assembler::fsqrt() {
3123  emit_byte(0xD9);
3124  emit_byte(0xFA);
3125}
3126
3127void Assembler::fst_d(Address adr) {
3128  InstructionMark im(this);
3129  emit_byte(0xDD);
3130  emit_operand32(rdx, adr);
3131}
3132
3133void Assembler::fst_s(Address adr) {
3134  InstructionMark im(this);
3135  emit_byte(0xD9);
3136  emit_operand32(rdx, adr);
3137}
3138
3139void Assembler::fstp_d(Address adr) {
3140  InstructionMark im(this);
3141  emit_byte(0xDD);
3142  emit_operand32(rbx, adr);
3143}
3144
3145void Assembler::fstp_d(int index) {
3146  emit_farith(0xDD, 0xD8, index);
3147}
3148
3149void Assembler::fstp_s(Address adr) {
3150  InstructionMark im(this);
3151  emit_byte(0xD9);
3152  emit_operand32(rbx, adr);
3153}
3154
3155void Assembler::fstp_x(Address adr) {
3156  InstructionMark im(this);
3157  emit_byte(0xDB);
3158  emit_operand32(rdi, adr);
3159}
3160
3161void Assembler::fsub(int i) {
3162  emit_farith(0xD8, 0xE0, i);
3163}
3164
3165void Assembler::fsub_d(Address src) {
3166  InstructionMark im(this);
3167  emit_byte(0xDC);
3168  emit_operand32(rsp, src);
3169}
3170
3171void Assembler::fsub_s(Address src) {
3172  InstructionMark im(this);
3173  emit_byte(0xD8);
3174  emit_operand32(rsp, src);
3175}
3176
3177void Assembler::fsuba(int i) {
3178  emit_farith(0xDC, 0xE8, i);
3179}
3180
3181void Assembler::fsubp(int i) {
3182  emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
3183}
3184
3185void Assembler::fsubr(int i) {
3186  emit_farith(0xD8, 0xE8, i);
3187}
3188
3189void Assembler::fsubr_d(Address src) {
3190  InstructionMark im(this);
3191  emit_byte(0xDC);
3192  emit_operand32(rbp, src);
3193}
3194
3195void Assembler::fsubr_s(Address src) {
3196  InstructionMark im(this);
3197  emit_byte(0xD8);
3198  emit_operand32(rbp, src);
3199}
3200
3201void Assembler::fsubra(int i) {
3202  emit_farith(0xDC, 0xE0, i);
3203}
3204
3205void Assembler::fsubrp(int i) {
3206  emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
3207}
3208
3209void Assembler::ftan() {
3210  emit_byte(0xD9);
3211  emit_byte(0xF2);
3212  emit_byte(0xDD);
3213  emit_byte(0xD8);
3214}
3215
3216void Assembler::ftst() {
3217  emit_byte(0xD9);
3218  emit_byte(0xE4);
3219}
3220
3221void Assembler::fucomi(int i) {
3222  // make sure the instruction is supported (introduced for P6, together with cmov)
3223  guarantee(VM_Version::supports_cmov(), "illegal instruction");
3224  emit_farith(0xDB, 0xE8, i);
3225}
3226
3227void Assembler::fucomip(int i) {
3228  // make sure the instruction is supported (introduced for P6, together with cmov)
3229  guarantee(VM_Version::supports_cmov(), "illegal instruction");
3230  emit_farith(0xDF, 0xE8, i);
3231}
3232
3233void Assembler::fwait() {
3234  emit_byte(0x9B);
3235}
3236
3237void Assembler::fxch(int i) {
3238  emit_farith(0xD9, 0xC8, i);
3239}
3240
3241void Assembler::fyl2x() {
3242  emit_byte(0xD9);
3243  emit_byte(0xF1);
3244}
3245
3246
3247#ifndef _LP64
3248
3249void Assembler::incl(Register dst) {
3250  // Don't use it directly. Use MacroAssembler::incrementl() instead.
3251 emit_byte(0x40 | dst->encoding());
3252}
3253
3254void Assembler::lea(Register dst, Address src) {
3255  leal(dst, src);
3256}
3257
3258void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
3259  InstructionMark im(this);
3260  emit_byte(0xC7);
3261  emit_operand(rax, dst);
3262  emit_data((int)imm32, rspec, 0);
3263}
3264
3265void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
3266  InstructionMark im(this);
3267  int encode = prefix_and_encode(dst->encoding());
3268  emit_byte(0xB8 | encode);
3269  emit_data((int)imm32, rspec, 0);
3270}
3271
3272void Assembler::popa() { // 32bit
3273  emit_byte(0x61);
3274}
3275
3276void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
3277  InstructionMark im(this);
3278  emit_byte(0x68);
3279  emit_data(imm32, rspec, 0);
3280}
3281
3282void Assembler::pusha() { // 32bit
3283  emit_byte(0x60);
3284}
3285
3286void Assembler::set_byte_if_not_zero(Register dst) {
3287  emit_byte(0x0F);
3288  emit_byte(0x95);
3289  emit_byte(0xE0 | dst->encoding());
3290}
3291
3292void Assembler::shldl(Register dst, Register src) {
3293  emit_byte(0x0F);
3294  emit_byte(0xA5);
3295  emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3296}
3297
3298void Assembler::shrdl(Register dst, Register src) {
3299  emit_byte(0x0F);
3300  emit_byte(0xAD);
3301  emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3302}
3303
3304#else // LP64
3305
3306// 64bit only pieces of the assembler
3307// This should only be used by 64bit instructions that can use rip-relative
3308// it cannot be used by instructions that want an immediate value.
3309
3310bool Assembler::reachable(AddressLiteral adr) {
3311  int64_t disp;
3312  // None will force a 64bit literal to the code stream. Likely a placeholder
3313  // for something that will be patched later and we need to certain it will
3314  // always be reachable.
3315  if (adr.reloc() == relocInfo::none) {
3316    return false;
3317  }
3318  if (adr.reloc() == relocInfo::internal_word_type) {
3319    // This should be rip relative and easily reachable.
3320    return true;
3321  }
3322  if (adr.reloc() == relocInfo::virtual_call_type ||
3323      adr.reloc() == relocInfo::opt_virtual_call_type ||
3324      adr.reloc() == relocInfo::static_call_type ||
3325      adr.reloc() == relocInfo::static_stub_type ) {
3326    // This should be rip relative within the code cache and easily
3327    // reachable until we get huge code caches. (At which point
3328    // ic code is going to have issues).
3329    return true;
3330  }
3331  if (adr.reloc() != relocInfo::external_word_type &&
3332      adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
3333      adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
3334      adr.reloc() != relocInfo::runtime_call_type ) {
3335    return false;
3336  }
3337
3338  // Stress the correction code
3339  if (ForceUnreachable) {
3340    // Must be runtimecall reloc, see if it is in the codecache
3341    // Flipping stuff in the codecache to be unreachable causes issues
3342    // with things like inline caches where the additional instructions
3343    // are not handled.
3344    if (CodeCache::find_blob(adr._target) == NULL) {
3345      return false;
3346    }
3347  }
3348  // For external_word_type/runtime_call_type if it is reachable from where we
3349  // are now (possibly a temp buffer) and where we might end up
3350  // anywhere in the codeCache then we are always reachable.
3351  // This would have to change if we ever save/restore shared code
3352  // to be more pessimistic.
3353
3354  disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
3355  if (!is_simm32(disp)) return false;
3356  disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
3357  if (!is_simm32(disp)) return false;
3358
3359  disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
3360
3361  // Because rip relative is a disp + address_of_next_instruction and we
3362  // don't know the value of address_of_next_instruction we apply a fudge factor
3363  // to make sure we will be ok no matter the size of the instruction we get placed into.
3364  // We don't have to fudge the checks above here because they are already worst case.
3365
3366  // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
3367  // + 4 because better safe than sorry.
3368  const int fudge = 12 + 4;
3369  if (disp < 0) {
3370    disp -= fudge;
3371  } else {
3372    disp += fudge;
3373  }
3374  return is_simm32(disp);
3375}
3376
3377void Assembler::emit_data64(jlong data,
3378                            relocInfo::relocType rtype,
3379                            int format) {
3380  if (rtype == relocInfo::none) {
3381    emit_long64(data);
3382  } else {
3383    emit_data64(data, Relocation::spec_simple(rtype), format);
3384  }
3385}
3386
3387void Assembler::emit_data64(jlong data,
3388                            RelocationHolder const& rspec,
3389                            int format) {
3390  assert(imm_operand == 0, "default format must be immediate in this file");
3391  assert(imm_operand == format, "must be immediate");
3392  assert(inst_mark() != NULL, "must be inside InstructionMark");
3393  // Do not use AbstractAssembler::relocate, which is not intended for
3394  // embedded words.  Instead, relocate to the enclosing instruction.
3395  code_section()->relocate(inst_mark(), rspec, format);
3396#ifdef ASSERT
3397  check_relocation(rspec, format);
3398#endif
3399  emit_long64(data);
3400}
3401
3402int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
3403  if (reg_enc >= 8) {
3404    prefix(REX_B);
3405    reg_enc -= 8;
3406  } else if (byteinst && reg_enc >= 4) {
3407    prefix(REX);
3408  }
3409  return reg_enc;
3410}
3411
3412int Assembler::prefixq_and_encode(int reg_enc) {
3413  if (reg_enc < 8) {
3414    prefix(REX_W);
3415  } else {
3416    prefix(REX_WB);
3417    reg_enc -= 8;
3418  }
3419  return reg_enc;
3420}
3421
3422int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
3423  if (dst_enc < 8) {
3424    if (src_enc >= 8) {
3425      prefix(REX_B);
3426      src_enc -= 8;
3427    } else if (byteinst && src_enc >= 4) {
3428      prefix(REX);
3429    }
3430  } else {
3431    if (src_enc < 8) {
3432      prefix(REX_R);
3433    } else {
3434      prefix(REX_RB);
3435      src_enc -= 8;
3436    }
3437    dst_enc -= 8;
3438  }
3439  return dst_enc << 3 | src_enc;
3440}
3441
3442int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
3443  if (dst_enc < 8) {
3444    if (src_enc < 8) {
3445      prefix(REX_W);
3446    } else {
3447      prefix(REX_WB);
3448      src_enc -= 8;
3449    }
3450  } else {
3451    if (src_enc < 8) {
3452      prefix(REX_WR);
3453    } else {
3454      prefix(REX_WRB);
3455      src_enc -= 8;
3456    }
3457    dst_enc -= 8;
3458  }
3459  return dst_enc << 3 | src_enc;
3460}
3461
3462void Assembler::prefix(Register reg) {
3463  if (reg->encoding() >= 8) {
3464    prefix(REX_B);
3465  }
3466}
3467
3468void Assembler::prefix(Address adr) {
3469  if (adr.base_needs_rex()) {
3470    if (adr.index_needs_rex()) {
3471      prefix(REX_XB);
3472    } else {
3473      prefix(REX_B);
3474    }
3475  } else {
3476    if (adr.index_needs_rex()) {
3477      prefix(REX_X);
3478    }
3479  }
3480}
3481
3482void Assembler::prefixq(Address adr) {
3483  if (adr.base_needs_rex()) {
3484    if (adr.index_needs_rex()) {
3485      prefix(REX_WXB);
3486    } else {
3487      prefix(REX_WB);
3488    }
3489  } else {
3490    if (adr.index_needs_rex()) {
3491      prefix(REX_WX);
3492    } else {
3493      prefix(REX_W);
3494    }
3495  }
3496}
3497
3498
3499void Assembler::prefix(Address adr, Register reg, bool byteinst) {
3500  if (reg->encoding() < 8) {
3501    if (adr.base_needs_rex()) {
3502      if (adr.index_needs_rex()) {
3503        prefix(REX_XB);
3504      } else {
3505        prefix(REX_B);
3506      }
3507    } else {
3508      if (adr.index_needs_rex()) {
3509        prefix(REX_X);
3510      } else if (reg->encoding() >= 4 ) {
3511        prefix(REX);
3512      }
3513    }
3514  } else {
3515    if (adr.base_needs_rex()) {
3516      if (adr.index_needs_rex()) {
3517        prefix(REX_RXB);
3518      } else {
3519        prefix(REX_RB);
3520      }
3521    } else {
3522      if (adr.index_needs_rex()) {
3523        prefix(REX_RX);
3524      } else {
3525        prefix(REX_R);
3526      }
3527    }
3528  }
3529}
3530
3531void Assembler::prefixq(Address adr, Register src) {
3532  if (src->encoding() < 8) {
3533    if (adr.base_needs_rex()) {
3534      if (adr.index_needs_rex()) {
3535        prefix(REX_WXB);
3536      } else {
3537        prefix(REX_WB);
3538      }
3539    } else {
3540      if (adr.index_needs_rex()) {
3541        prefix(REX_WX);
3542      } else {
3543        prefix(REX_W);
3544      }
3545    }
3546  } else {
3547    if (adr.base_needs_rex()) {
3548      if (adr.index_needs_rex()) {
3549        prefix(REX_WRXB);
3550      } else {
3551        prefix(REX_WRB);
3552      }
3553    } else {
3554      if (adr.index_needs_rex()) {
3555        prefix(REX_WRX);
3556      } else {
3557        prefix(REX_WR);
3558      }
3559    }
3560  }
3561}
3562
3563void Assembler::prefix(Address adr, XMMRegister reg) {
3564  if (reg->encoding() < 8) {
3565    if (adr.base_needs_rex()) {
3566      if (adr.index_needs_rex()) {
3567        prefix(REX_XB);
3568      } else {
3569        prefix(REX_B);
3570      }
3571    } else {
3572      if (adr.index_needs_rex()) {
3573        prefix(REX_X);
3574      }
3575    }
3576  } else {
3577    if (adr.base_needs_rex()) {
3578      if (adr.index_needs_rex()) {
3579        prefix(REX_RXB);
3580      } else {
3581        prefix(REX_RB);
3582      }
3583    } else {
3584      if (adr.index_needs_rex()) {
3585        prefix(REX_RX);
3586      } else {
3587        prefix(REX_R);
3588      }
3589    }
3590  }
3591}
3592
3593void Assembler::adcq(Register dst, int32_t imm32) {
3594  (void) prefixq_and_encode(dst->encoding());
3595  emit_arith(0x81, 0xD0, dst, imm32);
3596}
3597
3598void Assembler::adcq(Register dst, Address src) {
3599  InstructionMark im(this);
3600  prefixq(src, dst);
3601  emit_byte(0x13);
3602  emit_operand(dst, src);
3603}
3604
3605void Assembler::adcq(Register dst, Register src) {
3606  (int) prefixq_and_encode(dst->encoding(), src->encoding());
3607  emit_arith(0x13, 0xC0, dst, src);
3608}
3609
3610void Assembler::addq(Address dst, int32_t imm32) {
3611  InstructionMark im(this);
3612  prefixq(dst);
3613  emit_arith_operand(0x81, rax, dst,imm32);
3614}
3615
3616void Assembler::addq(Address dst, Register src) {
3617  InstructionMark im(this);
3618  prefixq(dst, src);
3619  emit_byte(0x01);
3620  emit_operand(src, dst);
3621}
3622
3623void Assembler::addq(Register dst, int32_t imm32) {
3624  (void) prefixq_and_encode(dst->encoding());
3625  emit_arith(0x81, 0xC0, dst, imm32);
3626}
3627
3628void Assembler::addq(Register dst, Address src) {
3629  InstructionMark im(this);
3630  prefixq(src, dst);
3631  emit_byte(0x03);
3632  emit_operand(dst, src);
3633}
3634
3635void Assembler::addq(Register dst, Register src) {
3636  (void) prefixq_and_encode(dst->encoding(), src->encoding());
3637  emit_arith(0x03, 0xC0, dst, src);
3638}
3639
3640void Assembler::andq(Register dst, int32_t imm32) {
3641  (void) prefixq_and_encode(dst->encoding());
3642  emit_arith(0x81, 0xE0, dst, imm32);
3643}
3644
3645void Assembler::andq(Register dst, Address src) {
3646  InstructionMark im(this);
3647  prefixq(src, dst);
3648  emit_byte(0x23);
3649  emit_operand(dst, src);
3650}
3651
3652void Assembler::andq(Register dst, Register src) {
3653  (int) prefixq_and_encode(dst->encoding(), src->encoding());
3654  emit_arith(0x23, 0xC0, dst, src);
3655}
3656
3657void Assembler::bswapq(Register reg) {
3658  int encode = prefixq_and_encode(reg->encoding());
3659  emit_byte(0x0F);
3660  emit_byte(0xC8 | encode);
3661}
3662
3663void Assembler::cdqq() {
3664  prefix(REX_W);
3665  emit_byte(0x99);
3666}
3667
3668void Assembler::clflush(Address adr) {
3669  prefix(adr);
3670  emit_byte(0x0F);
3671  emit_byte(0xAE);
3672  emit_operand(rdi, adr);
3673}
3674
3675void Assembler::cmovq(Condition cc, Register dst, Register src) {
3676  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3677  emit_byte(0x0F);
3678  emit_byte(0x40 | cc);
3679  emit_byte(0xC0 | encode);
3680}
3681
3682void Assembler::cmovq(Condition cc, Register dst, Address src) {
3683  InstructionMark im(this);
3684  prefixq(src, dst);
3685  emit_byte(0x0F);
3686  emit_byte(0x40 | cc);
3687  emit_operand(dst, src);
3688}
3689
3690void Assembler::cmpq(Address dst, int32_t imm32) {
3691  InstructionMark im(this);
3692  prefixq(dst);
3693  emit_byte(0x81);
3694  emit_operand(rdi, dst, 4);
3695  emit_long(imm32);
3696}
3697
3698void Assembler::cmpq(Register dst, int32_t imm32) {
3699  (void) prefixq_and_encode(dst->encoding());
3700  emit_arith(0x81, 0xF8, dst, imm32);
3701}
3702
3703void Assembler::cmpq(Address dst, Register src) {
3704  InstructionMark im(this);
3705  prefixq(dst, src);
3706  emit_byte(0x3B);
3707  emit_operand(src, dst);
3708}
3709
3710void Assembler::cmpq(Register dst, Register src) {
3711  (void) prefixq_and_encode(dst->encoding(), src->encoding());
3712  emit_arith(0x3B, 0xC0, dst, src);
3713}
3714
3715void Assembler::cmpq(Register dst, Address  src) {
3716  InstructionMark im(this);
3717  prefixq(src, dst);
3718  emit_byte(0x3B);
3719  emit_operand(dst, src);
3720}
3721
3722void Assembler::cmpxchgq(Register reg, Address adr) {
3723  InstructionMark im(this);
3724  prefixq(adr, reg);
3725  emit_byte(0x0F);
3726  emit_byte(0xB1);
3727  emit_operand(reg, adr);
3728}
3729
3730void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
3731  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3732  emit_byte(0xF2);
3733  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3734  emit_byte(0x0F);
3735  emit_byte(0x2A);
3736  emit_byte(0xC0 | encode);
3737}
3738
3739void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
3740  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3741  emit_byte(0xF3);
3742  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3743  emit_byte(0x0F);
3744  emit_byte(0x2A);
3745  emit_byte(0xC0 | encode);
3746}
3747
3748void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
3749  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3750  emit_byte(0xF2);
3751  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3752  emit_byte(0x0F);
3753  emit_byte(0x2C);
3754  emit_byte(0xC0 | encode);
3755}
3756
3757void Assembler::cvttss2siq(Register dst, XMMRegister src) {
3758  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3759  emit_byte(0xF3);
3760  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3761  emit_byte(0x0F);
3762  emit_byte(0x2C);
3763  emit_byte(0xC0 | encode);
3764}
3765
3766void Assembler::decl(Register dst) {
3767  // Don't use it directly. Use MacroAssembler::decrementl() instead.
3768  // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
3769  int encode = prefix_and_encode(dst->encoding());
3770  emit_byte(0xFF);
3771  emit_byte(0xC8 | encode);
3772}
3773
3774void Assembler::decq(Register dst) {
3775  // Don't use it directly. Use MacroAssembler::decrementq() instead.
3776  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3777  int encode = prefixq_and_encode(dst->encoding());
3778  emit_byte(0xFF);
3779  emit_byte(0xC8 | encode);
3780}
3781
3782void Assembler::decq(Address dst) {
3783  // Don't use it directly. Use MacroAssembler::decrementq() instead.
3784  InstructionMark im(this);
3785  prefixq(dst);
3786  emit_byte(0xFF);
3787  emit_operand(rcx, dst);
3788}
3789
3790void Assembler::fxrstor(Address src) {
3791  prefixq(src);
3792  emit_byte(0x0F);
3793  emit_byte(0xAE);
3794  emit_operand(as_Register(1), src);
3795}
3796
3797void Assembler::fxsave(Address dst) {
3798  prefixq(dst);
3799  emit_byte(0x0F);
3800  emit_byte(0xAE);
3801  emit_operand(as_Register(0), dst);
3802}
3803
3804void Assembler::idivq(Register src) {
3805  int encode = prefixq_and_encode(src->encoding());
3806  emit_byte(0xF7);
3807  emit_byte(0xF8 | encode);
3808}
3809
3810void Assembler::imulq(Register dst, Register src) {
3811  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3812  emit_byte(0x0F);
3813  emit_byte(0xAF);
3814  emit_byte(0xC0 | encode);
3815}
3816
3817void Assembler::imulq(Register dst, Register src, int value) {
3818  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3819  if (is8bit(value)) {
3820    emit_byte(0x6B);
3821    emit_byte(0xC0 | encode);
3822    emit_byte(value);
3823  } else {
3824    emit_byte(0x69);
3825    emit_byte(0xC0 | encode);
3826    emit_long(value);
3827  }
3828}
3829
3830void Assembler::incl(Register dst) {
3831  // Don't use it directly. Use MacroAssembler::incrementl() instead.
3832  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3833  int encode = prefix_and_encode(dst->encoding());
3834  emit_byte(0xFF);
3835  emit_byte(0xC0 | encode);
3836}
3837
3838void Assembler::incq(Register dst) {
3839  // Don't use it directly. Use MacroAssembler::incrementq() instead.
3840  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3841  int encode = prefixq_and_encode(dst->encoding());
3842  emit_byte(0xFF);
3843  emit_byte(0xC0 | encode);
3844}
3845
3846void Assembler::incq(Address dst) {
3847  // Don't use it directly. Use MacroAssembler::incrementq() instead.
3848  InstructionMark im(this);
3849  prefixq(dst);
3850  emit_byte(0xFF);
3851  emit_operand(rax, dst);
3852}
3853
3854void Assembler::lea(Register dst, Address src) {
3855  leaq(dst, src);
3856}
3857
3858void Assembler::leaq(Register dst, Address src) {
3859  InstructionMark im(this);
3860  prefixq(src, dst);
3861  emit_byte(0x8D);
3862  emit_operand(dst, src);
3863}
3864
3865void Assembler::mov64(Register dst, int64_t imm64) {
3866  InstructionMark im(this);
3867  int encode = prefixq_and_encode(dst->encoding());
3868  emit_byte(0xB8 | encode);
3869  emit_long64(imm64);
3870}
3871
3872void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
3873  InstructionMark im(this);
3874  int encode = prefixq_and_encode(dst->encoding());
3875  emit_byte(0xB8 | encode);
3876  emit_data64(imm64, rspec);
3877}
3878
3879void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
3880  InstructionMark im(this);
3881  int encode = prefix_and_encode(dst->encoding());
3882  emit_byte(0xB8 | encode);
3883  emit_data((int)imm32, rspec, narrow_oop_operand);
3884}
3885
3886void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
3887  InstructionMark im(this);
3888  prefix(dst);
3889  emit_byte(0xC7);
3890  emit_operand(rax, dst, 4);
3891  emit_data((int)imm32, rspec, narrow_oop_operand);
3892}
3893
3894void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
3895  InstructionMark im(this);
3896  int encode = prefix_and_encode(src1->encoding());
3897  emit_byte(0x81);
3898  emit_byte(0xF8 | encode);
3899  emit_data((int)imm32, rspec, narrow_oop_operand);
3900}
3901
3902void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
3903  InstructionMark im(this);
3904  prefix(src1);
3905  emit_byte(0x81);
3906  emit_operand(rax, src1, 4);
3907  emit_data((int)imm32, rspec, narrow_oop_operand);
3908}
3909
3910void Assembler::movdq(XMMRegister dst, Register src) {
3911  // table D-1 says MMX/SSE2
3912  NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
3913  emit_byte(0x66);
3914  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3915  emit_byte(0x0F);
3916  emit_byte(0x6E);
3917  emit_byte(0xC0 | encode);
3918}
3919
3920void Assembler::movdq(Register dst, XMMRegister src) {
3921  // table D-1 says MMX/SSE2
3922  NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
3923  emit_byte(0x66);
3924  // swap src/dst to get correct prefix
3925  int encode = prefixq_and_encode(src->encoding(), dst->encoding());
3926  emit_byte(0x0F);
3927  emit_byte(0x7E);
3928  emit_byte(0xC0 | encode);
3929}
3930
3931void Assembler::movq(Register dst, Register src) {
3932  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3933  emit_byte(0x8B);
3934  emit_byte(0xC0 | encode);
3935}
3936
3937void Assembler::movq(Register dst, Address src) {
3938  InstructionMark im(this);
3939  prefixq(src, dst);
3940  emit_byte(0x8B);
3941  emit_operand(dst, src);
3942}
3943
3944void Assembler::movq(Address dst, Register src) {
3945  InstructionMark im(this);
3946  prefixq(dst, src);
3947  emit_byte(0x89);
3948  emit_operand(src, dst);
3949}
3950
3951void Assembler::movsbq(Register dst, Address src) {
3952  InstructionMark im(this);
3953  prefixq(src, dst);
3954  emit_byte(0x0F);
3955  emit_byte(0xBE);
3956  emit_operand(dst, src);
3957}
3958
3959void Assembler::movsbq(Register dst, Register src) {
3960  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3961  emit_byte(0x0F);
3962  emit_byte(0xBE);
3963  emit_byte(0xC0 | encode);
3964}
3965
3966void Assembler::movslq(Register dst, int32_t imm32) {
3967  // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
3968  // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
3969  // as a result we shouldn't use until tested at runtime...
3970  ShouldNotReachHere();
3971  InstructionMark im(this);
3972  int encode = prefixq_and_encode(dst->encoding());
3973  emit_byte(0xC7 | encode);
3974  emit_long(imm32);
3975}
3976
3977void Assembler::movslq(Address dst, int32_t imm32) {
3978  assert(is_simm32(imm32), "lost bits");
3979  InstructionMark im(this);
3980  prefixq(dst);
3981  emit_byte(0xC7);
3982  emit_operand(rax, dst, 4);
3983  emit_long(imm32);
3984}
3985
3986void Assembler::movslq(Register dst, Address src) {
3987  InstructionMark im(this);
3988  prefixq(src, dst);
3989  emit_byte(0x63);
3990  emit_operand(dst, src);
3991}
3992
3993void Assembler::movslq(Register dst, Register src) {
3994  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3995  emit_byte(0x63);
3996  emit_byte(0xC0 | encode);
3997}
3998
3999void Assembler::movswq(Register dst, Address src) {
4000  InstructionMark im(this);
4001  prefixq(src, dst);
4002  emit_byte(0x0F);
4003  emit_byte(0xBF);
4004  emit_operand(dst, src);
4005}
4006
4007void Assembler::movswq(Register dst, Register src) {
4008  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4009  emit_byte(0x0F);
4010  emit_byte(0xBF);
4011  emit_byte(0xC0 | encode);
4012}
4013
4014void Assembler::movzbq(Register dst, Address src) {
4015  InstructionMark im(this);
4016  prefixq(src, dst);
4017  emit_byte(0x0F);
4018  emit_byte(0xB6);
4019  emit_operand(dst, src);
4020}
4021
4022void Assembler::movzbq(Register dst, Register src) {
4023  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4024  emit_byte(0x0F);
4025  emit_byte(0xB6);
4026  emit_byte(0xC0 | encode);
4027}
4028
4029void Assembler::movzwq(Register dst, Address src) {
4030  InstructionMark im(this);
4031  prefixq(src, dst);
4032  emit_byte(0x0F);
4033  emit_byte(0xB7);
4034  emit_operand(dst, src);
4035}
4036
4037void Assembler::movzwq(Register dst, Register src) {
4038  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4039  emit_byte(0x0F);
4040  emit_byte(0xB7);
4041  emit_byte(0xC0 | encode);
4042}
4043
4044void Assembler::negq(Register dst) {
4045  int encode = prefixq_and_encode(dst->encoding());
4046  emit_byte(0xF7);
4047  emit_byte(0xD8 | encode);
4048}
4049
4050void Assembler::notq(Register dst) {
4051  int encode = prefixq_and_encode(dst->encoding());
4052  emit_byte(0xF7);
4053  emit_byte(0xD0 | encode);
4054}
4055
4056void Assembler::orq(Address dst, int32_t imm32) {
4057  InstructionMark im(this);
4058  prefixq(dst);
4059  emit_byte(0x81);
4060  emit_operand(rcx, dst, 4);
4061  emit_long(imm32);
4062}
4063
4064void Assembler::orq(Register dst, int32_t imm32) {
4065  (void) prefixq_and_encode(dst->encoding());
4066  emit_arith(0x81, 0xC8, dst, imm32);
4067}
4068
4069void Assembler::orq(Register dst, Address src) {
4070  InstructionMark im(this);
4071  prefixq(src, dst);
4072  emit_byte(0x0B);
4073  emit_operand(dst, src);
4074}
4075
4076void Assembler::orq(Register dst, Register src) {
4077  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4078  emit_arith(0x0B, 0xC0, dst, src);
4079}
4080
4081void Assembler::popa() { // 64bit
4082  movq(r15, Address(rsp, 0));
4083  movq(r14, Address(rsp, wordSize));
4084  movq(r13, Address(rsp, 2 * wordSize));
4085  movq(r12, Address(rsp, 3 * wordSize));
4086  movq(r11, Address(rsp, 4 * wordSize));
4087  movq(r10, Address(rsp, 5 * wordSize));
4088  movq(r9,  Address(rsp, 6 * wordSize));
4089  movq(r8,  Address(rsp, 7 * wordSize));
4090  movq(rdi, Address(rsp, 8 * wordSize));
4091  movq(rsi, Address(rsp, 9 * wordSize));
4092  movq(rbp, Address(rsp, 10 * wordSize));
4093  // skip rsp
4094  movq(rbx, Address(rsp, 12 * wordSize));
4095  movq(rdx, Address(rsp, 13 * wordSize));
4096  movq(rcx, Address(rsp, 14 * wordSize));
4097  movq(rax, Address(rsp, 15 * wordSize));
4098
4099  addq(rsp, 16 * wordSize);
4100}
4101
4102void Assembler::popcntq(Register dst, Address src) {
4103  assert(VM_Version::supports_popcnt(), "must support");
4104  InstructionMark im(this);
4105  emit_byte(0xF3);
4106  prefixq(src, dst);
4107  emit_byte(0x0F);
4108  emit_byte(0xB8);
4109  emit_operand(dst, src);
4110}
4111
4112void Assembler::popcntq(Register dst, Register src) {
4113  assert(VM_Version::supports_popcnt(), "must support");
4114  emit_byte(0xF3);
4115  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4116  emit_byte(0x0F);
4117  emit_byte(0xB8);
4118  emit_byte(0xC0 | encode);
4119}
4120
4121void Assembler::popq(Address dst) {
4122  InstructionMark im(this);
4123  prefixq(dst);
4124  emit_byte(0x8F);
4125  emit_operand(rax, dst);
4126}
4127
4128void Assembler::pusha() { // 64bit
4129  // we have to store original rsp.  ABI says that 128 bytes
4130  // below rsp are local scratch.
4131  movq(Address(rsp, -5 * wordSize), rsp);
4132
4133  subq(rsp, 16 * wordSize);
4134
4135  movq(Address(rsp, 15 * wordSize), rax);
4136  movq(Address(rsp, 14 * wordSize), rcx);
4137  movq(Address(rsp, 13 * wordSize), rdx);
4138  movq(Address(rsp, 12 * wordSize), rbx);
4139  // skip rsp
4140  movq(Address(rsp, 10 * wordSize), rbp);
4141  movq(Address(rsp, 9 * wordSize), rsi);
4142  movq(Address(rsp, 8 * wordSize), rdi);
4143  movq(Address(rsp, 7 * wordSize), r8);
4144  movq(Address(rsp, 6 * wordSize), r9);
4145  movq(Address(rsp, 5 * wordSize), r10);
4146  movq(Address(rsp, 4 * wordSize), r11);
4147  movq(Address(rsp, 3 * wordSize), r12);
4148  movq(Address(rsp, 2 * wordSize), r13);
4149  movq(Address(rsp, wordSize), r14);
4150  movq(Address(rsp, 0), r15);
4151}
4152
4153void Assembler::pushq(Address src) {
4154  InstructionMark im(this);
4155  prefixq(src);
4156  emit_byte(0xFF);
4157  emit_operand(rsi, src);
4158}
4159
4160void Assembler::rclq(Register dst, int imm8) {
4161  assert(isShiftCount(imm8 >> 1), "illegal shift count");
4162  int encode = prefixq_and_encode(dst->encoding());
4163  if (imm8 == 1) {
4164    emit_byte(0xD1);
4165    emit_byte(0xD0 | encode);
4166  } else {
4167    emit_byte(0xC1);
4168    emit_byte(0xD0 | encode);
4169    emit_byte(imm8);
4170  }
4171}
4172void Assembler::sarq(Register dst, int imm8) {
4173  assert(isShiftCount(imm8 >> 1), "illegal shift count");
4174  int encode = prefixq_and_encode(dst->encoding());
4175  if (imm8 == 1) {
4176    emit_byte(0xD1);
4177    emit_byte(0xF8 | encode);
4178  } else {
4179    emit_byte(0xC1);
4180    emit_byte(0xF8 | encode);
4181    emit_byte(imm8);
4182  }
4183}
4184
4185void Assembler::sarq(Register dst) {
4186  int encode = prefixq_and_encode(dst->encoding());
4187  emit_byte(0xD3);
4188  emit_byte(0xF8 | encode);
4189}
4190void Assembler::sbbq(Address dst, int32_t imm32) {
4191  InstructionMark im(this);
4192  prefixq(dst);
4193  emit_arith_operand(0x81, rbx, dst, imm32);
4194}
4195
4196void Assembler::sbbq(Register dst, int32_t imm32) {
4197  (void) prefixq_and_encode(dst->encoding());
4198  emit_arith(0x81, 0xD8, dst, imm32);
4199}
4200
4201void Assembler::sbbq(Register dst, Address src) {
4202  InstructionMark im(this);
4203  prefixq(src, dst);
4204  emit_byte(0x1B);
4205  emit_operand(dst, src);
4206}
4207
4208void Assembler::sbbq(Register dst, Register src) {
4209  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4210  emit_arith(0x1B, 0xC0, dst, src);
4211}
4212
4213void Assembler::shlq(Register dst, int imm8) {
4214  assert(isShiftCount(imm8 >> 1), "illegal shift count");
4215  int encode = prefixq_and_encode(dst->encoding());
4216  if (imm8 == 1) {
4217    emit_byte(0xD1);
4218    emit_byte(0xE0 | encode);
4219  } else {
4220    emit_byte(0xC1);
4221    emit_byte(0xE0 | encode);
4222    emit_byte(imm8);
4223  }
4224}
4225
4226void Assembler::shlq(Register dst) {
4227  int encode = prefixq_and_encode(dst->encoding());
4228  emit_byte(0xD3);
4229  emit_byte(0xE0 | encode);
4230}
4231
4232void Assembler::shrq(Register dst, int imm8) {
4233  assert(isShiftCount(imm8 >> 1), "illegal shift count");
4234  int encode = prefixq_and_encode(dst->encoding());
4235  emit_byte(0xC1);
4236  emit_byte(0xE8 | encode);
4237  emit_byte(imm8);
4238}
4239
4240void Assembler::shrq(Register dst) {
4241  int encode = prefixq_and_encode(dst->encoding());
4242  emit_byte(0xD3);
4243  emit_byte(0xE8 | encode);
4244}
4245
4246void Assembler::sqrtsd(XMMRegister dst, Address src) {
4247  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4248  InstructionMark im(this);
4249  emit_byte(0xF2);
4250  prefix(src, dst);
4251  emit_byte(0x0F);
4252  emit_byte(0x51);
4253  emit_operand(dst, src);
4254}
4255
4256void Assembler::subq(Address dst, int32_t imm32) {
4257  InstructionMark im(this);
4258  prefixq(dst);
4259  if (is8bit(imm32)) {
4260    emit_byte(0x83);
4261    emit_operand(rbp, dst, 1);
4262    emit_byte(imm32 & 0xFF);
4263  } else {
4264    emit_byte(0x81);
4265    emit_operand(rbp, dst, 4);
4266    emit_long(imm32);
4267  }
4268}
4269
4270void Assembler::subq(Register dst, int32_t imm32) {
4271  (void) prefixq_and_encode(dst->encoding());
4272  emit_arith(0x81, 0xE8, dst, imm32);
4273}
4274
4275void Assembler::subq(Address dst, Register src) {
4276  InstructionMark im(this);
4277  prefixq(dst, src);
4278  emit_byte(0x29);
4279  emit_operand(src, dst);
4280}
4281
4282void Assembler::subq(Register dst, Address src) {
4283  InstructionMark im(this);
4284  prefixq(src, dst);
4285  emit_byte(0x2B);
4286  emit_operand(dst, src);
4287}
4288
4289void Assembler::subq(Register dst, Register src) {
4290  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4291  emit_arith(0x2B, 0xC0, dst, src);
4292}
4293
4294void Assembler::testq(Register dst, int32_t imm32) {
4295  // not using emit_arith because test
4296  // doesn't support sign-extension of
4297  // 8bit operands
4298  int encode = dst->encoding();
4299  if (encode == 0) {
4300    prefix(REX_W);
4301    emit_byte(0xA9);
4302  } else {
4303    encode = prefixq_and_encode(encode);
4304    emit_byte(0xF7);
4305    emit_byte(0xC0 | encode);
4306  }
4307  emit_long(imm32);
4308}
4309
4310void Assembler::testq(Register dst, Register src) {
4311  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4312  emit_arith(0x85, 0xC0, dst, src);
4313}
4314
4315void Assembler::xaddq(Address dst, Register src) {
4316  InstructionMark im(this);
4317  prefixq(dst, src);
4318  emit_byte(0x0F);
4319  emit_byte(0xC1);
4320  emit_operand(src, dst);
4321}
4322
4323void Assembler::xchgq(Register dst, Address src) {
4324  InstructionMark im(this);
4325  prefixq(src, dst);
4326  emit_byte(0x87);
4327  emit_operand(dst, src);
4328}
4329
4330void Assembler::xchgq(Register dst, Register src) {
4331  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4332  emit_byte(0x87);
4333  emit_byte(0xc0 | encode);
4334}
4335
4336void Assembler::xorq(Register dst, Register src) {
4337  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4338  emit_arith(0x33, 0xC0, dst, src);
4339}
4340
4341void Assembler::xorq(Register dst, Address src) {
4342  InstructionMark im(this);
4343  prefixq(src, dst);
4344  emit_byte(0x33);
4345  emit_operand(dst, src);
4346}
4347
4348#endif // !LP64
4349
4350static Assembler::Condition reverse[] = {
4351    Assembler::noOverflow     /* overflow      = 0x0 */ ,
4352    Assembler::overflow       /* noOverflow    = 0x1 */ ,
4353    Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
4354    Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,
4355    Assembler::notZero        /* zero          = 0x4, equal         = 0x4 */ ,
4356    Assembler::zero           /* notZero       = 0x5, notEqual      = 0x5 */ ,
4357    Assembler::above          /* belowEqual    = 0x6 */ ,
4358    Assembler::belowEqual     /* above         = 0x7 */ ,
4359    Assembler::positive       /* negative      = 0x8 */ ,
4360    Assembler::negative       /* positive      = 0x9 */ ,
4361    Assembler::noParity       /* parity        = 0xa */ ,
4362    Assembler::parity         /* noParity      = 0xb */ ,
4363    Assembler::greaterEqual   /* less          = 0xc */ ,
4364    Assembler::less           /* greaterEqual  = 0xd */ ,
4365    Assembler::greater        /* lessEqual     = 0xe */ ,
4366    Assembler::lessEqual      /* greater       = 0xf, */
4367
4368};
4369
4370
4371// Implementation of MacroAssembler
4372
4373// First all the versions that have distinct versions depending on 32/64 bit
4374// Unless the difference is trivial (1 line or so).
4375
4376#ifndef _LP64
4377
4378// 32bit versions
4379
4380Address MacroAssembler::as_Address(AddressLiteral adr) {
4381  return Address(adr.target(), adr.rspec());
4382}
4383
4384Address MacroAssembler::as_Address(ArrayAddress adr) {
4385  return Address::make_array(adr);
4386}
4387
4388int MacroAssembler::biased_locking_enter(Register lock_reg,
4389                                         Register obj_reg,
4390                                         Register swap_reg,
4391                                         Register tmp_reg,
4392                                         bool swap_reg_contains_mark,
4393                                         Label& done,
4394                                         Label* slow_case,
4395                                         BiasedLockingCounters* counters) {
4396  assert(UseBiasedLocking, "why call this otherwise?");
4397  assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
4398  assert_different_registers(lock_reg, obj_reg, swap_reg);
4399
4400  if (PrintBiasedLockingStatistics && counters == NULL)
4401    counters = BiasedLocking::counters();
4402
4403  bool need_tmp_reg = false;
4404  if (tmp_reg == noreg) {
4405    need_tmp_reg = true;
4406    tmp_reg = lock_reg;
4407  } else {
4408    assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4409  }
4410  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4411  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
4412  Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
4413  Address saved_mark_addr(lock_reg, 0);
4414
4415  // Biased locking
4416  // See whether the lock is currently biased toward our thread and
4417  // whether the epoch is still valid
4418  // Note that the runtime guarantees sufficient alignment of JavaThread
4419  // pointers to allow age to be placed into low bits
4420  // First check to see whether biasing is even enabled for this object
4421  Label cas_label;
4422  int null_check_offset = -1;
4423  if (!swap_reg_contains_mark) {
4424    null_check_offset = offset();
4425    movl(swap_reg, mark_addr);
4426  }
4427  if (need_tmp_reg) {
4428    push(tmp_reg);
4429  }
4430  movl(tmp_reg, swap_reg);
4431  andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4432  cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
4433  if (need_tmp_reg) {
4434    pop(tmp_reg);
4435  }
4436  jcc(Assembler::notEqual, cas_label);
4437  // The bias pattern is present in the object's header. Need to check
4438  // whether the bias owner and the epoch are both still current.
4439  // Note that because there is no current thread register on x86 we
4440  // need to store off the mark word we read out of the object to
4441  // avoid reloading it and needing to recheck invariants below. This
4442  // store is unfortunate but it makes the overall code shorter and
4443  // simpler.
4444  movl(saved_mark_addr, swap_reg);
4445  if (need_tmp_reg) {
4446    push(tmp_reg);
4447  }
4448  get_thread(tmp_reg);
4449  xorl(swap_reg, tmp_reg);
4450  if (swap_reg_contains_mark) {
4451    null_check_offset = offset();
4452  }
4453  movl(tmp_reg, klass_addr);
4454  xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4455  andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
4456  if (need_tmp_reg) {
4457    pop(tmp_reg);
4458  }
4459  if (counters != NULL) {
4460    cond_inc32(Assembler::zero,
4461               ExternalAddress((address)counters->biased_lock_entry_count_addr()));
4462  }
4463  jcc(Assembler::equal, done);
4464
4465  Label try_revoke_bias;
4466  Label try_rebias;
4467
4468  // At this point we know that the header has the bias pattern and
4469  // that we are not the bias owner in the current epoch. We need to
4470  // figure out more details about the state of the header in order to
4471  // know what operations can be legally performed on the object's
4472  // header.
4473
4474  // If the low three bits in the xor result aren't clear, that means
4475  // the prototype header is no longer biased and we have to revoke
4476  // the bias on this object.
4477  testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
4478  jcc(Assembler::notZero, try_revoke_bias);
4479
4480  // Biasing is still enabled for this data type. See whether the
4481  // epoch of the current bias is still valid, meaning that the epoch
4482  // bits of the mark word are equal to the epoch bits of the
4483  // prototype header. (Note that the prototype header's epoch bits
4484  // only change at a safepoint.) If not, attempt to rebias the object
4485  // toward the current thread. Note that we must be absolutely sure
4486  // that the current epoch is invalid in order to do this because
4487  // otherwise the manipulations it performs on the mark word are
4488  // illegal.
4489  testl(swap_reg, markOopDesc::epoch_mask_in_place);
4490  jcc(Assembler::notZero, try_rebias);
4491
4492  // The epoch of the current bias is still valid but we know nothing
4493  // about the owner; it might be set or it might be clear. Try to
4494  // acquire the bias of the object using an atomic operation. If this
4495  // fails we will go in to the runtime to revoke the object's bias.
4496  // Note that we first construct the presumed unbiased header so we
4497  // don't accidentally blow away another thread's valid bias.
4498  movl(swap_reg, saved_mark_addr);
4499  andl(swap_reg,
4500       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
4501  if (need_tmp_reg) {
4502    push(tmp_reg);
4503  }
4504  get_thread(tmp_reg);
4505  orl(tmp_reg, swap_reg);
4506  if (os::is_MP()) {
4507    lock();
4508  }
4509  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4510  if (need_tmp_reg) {
4511    pop(tmp_reg);
4512  }
4513  // If the biasing toward our thread failed, this means that
4514  // another thread succeeded in biasing it toward itself and we
4515  // need to revoke that bias. The revocation will occur in the
4516  // interpreter runtime in the slow case.
4517  if (counters != NULL) {
4518    cond_inc32(Assembler::zero,
4519               ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
4520  }
4521  if (slow_case != NULL) {
4522    jcc(Assembler::notZero, *slow_case);
4523  }
4524  jmp(done);
4525
4526  bind(try_rebias);
4527  // At this point we know the epoch has expired, meaning that the
4528  // current "bias owner", if any, is actually invalid. Under these
4529  // circumstances _only_, we are allowed to use the current header's
4530  // value as the comparison value when doing the cas to acquire the
4531  // bias in the current epoch. In other words, we allow transfer of
4532  // the bias from one thread to another directly in this situation.
4533  //
4534  // FIXME: due to a lack of registers we currently blow away the age
4535  // bits in this situation. Should attempt to preserve them.
4536  if (need_tmp_reg) {
4537    push(tmp_reg);
4538  }
4539  get_thread(tmp_reg);
4540  movl(swap_reg, klass_addr);
4541  orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4542  movl(swap_reg, saved_mark_addr);
4543  if (os::is_MP()) {
4544    lock();
4545  }
4546  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4547  if (need_tmp_reg) {
4548    pop(tmp_reg);
4549  }
4550  // If the biasing toward our thread failed, then another thread
4551  // succeeded in biasing it toward itself and we need to revoke that
4552  // bias. The revocation will occur in the runtime in the slow case.
4553  if (counters != NULL) {
4554    cond_inc32(Assembler::zero,
4555               ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
4556  }
4557  if (slow_case != NULL) {
4558    jcc(Assembler::notZero, *slow_case);
4559  }
4560  jmp(done);
4561
4562  bind(try_revoke_bias);
4563  // The prototype mark in the klass doesn't have the bias bit set any
4564  // more, indicating that objects of this data type are not supposed
4565  // to be biased any more. We are going to try to reset the mark of
4566  // this object to the prototype value and fall through to the
4567  // CAS-based locking scheme. Note that if our CAS fails, it means
4568  // that another thread raced us for the privilege of revoking the
4569  // bias of this particular object, so it's okay to continue in the
4570  // normal locking code.
4571  //
4572  // FIXME: due to a lack of registers we currently blow away the age
4573  // bits in this situation. Should attempt to preserve them.
4574  movl(swap_reg, saved_mark_addr);
4575  if (need_tmp_reg) {
4576    push(tmp_reg);
4577  }
4578  movl(tmp_reg, klass_addr);
4579  movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4580  if (os::is_MP()) {
4581    lock();
4582  }
4583  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4584  if (need_tmp_reg) {
4585    pop(tmp_reg);
4586  }
4587  // Fall through to the normal CAS-based lock, because no matter what
4588  // the result of the above CAS, some thread must have succeeded in
4589  // removing the bias bit from the object's header.
4590  if (counters != NULL) {
4591    cond_inc32(Assembler::zero,
4592               ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
4593  }
4594
4595  bind(cas_label);
4596
4597  return null_check_offset;
4598}
4599void MacroAssembler::call_VM_leaf_base(address entry_point,
4600                                       int number_of_arguments) {
4601  call(RuntimeAddress(entry_point));
4602  increment(rsp, number_of_arguments * wordSize);
4603}
4604
4605void MacroAssembler::cmpoop(Address src1, jobject obj) {
4606  cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4607}
4608
4609void MacroAssembler::cmpoop(Register src1, jobject obj) {
4610  cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4611}
4612
4613void MacroAssembler::extend_sign(Register hi, Register lo) {
4614  // According to Intel Doc. AP-526, "Integer Divide", p.18.
4615  if (VM_Version::is_P6() && hi == rdx && lo == rax) {
4616    cdql();
4617  } else {
4618    movl(hi, lo);
4619    sarl(hi, 31);
4620  }
4621}
4622
4623void MacroAssembler::fat_nop() {
4624  // A 5 byte nop that is safe for patching (see patch_verified_entry)
4625  emit_byte(0x26); // es:
4626  emit_byte(0x2e); // cs:
4627  emit_byte(0x64); // fs:
4628  emit_byte(0x65); // gs:
4629  emit_byte(0x90);
4630}
4631
4632void MacroAssembler::jC2(Register tmp, Label& L) {
4633  // set parity bit if FPU flag C2 is set (via rax)
4634  save_rax(tmp);
4635  fwait(); fnstsw_ax();
4636  sahf();
4637  restore_rax(tmp);
4638  // branch
4639  jcc(Assembler::parity, L);
4640}
4641
4642void MacroAssembler::jnC2(Register tmp, Label& L) {
4643  // set parity bit if FPU flag C2 is set (via rax)
4644  save_rax(tmp);
4645  fwait(); fnstsw_ax();
4646  sahf();
4647  restore_rax(tmp);
4648  // branch
4649  jcc(Assembler::noParity, L);
4650}
4651
4652// 32bit can do a case table jump in one instruction but we no longer allow the base
4653// to be installed in the Address class
4654void MacroAssembler::jump(ArrayAddress entry) {
4655  jmp(as_Address(entry));
4656}
4657
4658// Note: y_lo will be destroyed
4659void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
4660  // Long compare for Java (semantics as described in JVM spec.)
4661  Label high, low, done;
4662
4663  cmpl(x_hi, y_hi);
4664  jcc(Assembler::less, low);
4665  jcc(Assembler::greater, high);
4666  // x_hi is the return register
4667  xorl(x_hi, x_hi);
4668  cmpl(x_lo, y_lo);
4669  jcc(Assembler::below, low);
4670  jcc(Assembler::equal, done);
4671
4672  bind(high);
4673  xorl(x_hi, x_hi);
4674  increment(x_hi);
4675  jmp(done);
4676
4677  bind(low);
4678  xorl(x_hi, x_hi);
4679  decrementl(x_hi);
4680
4681  bind(done);
4682}
4683
4684void MacroAssembler::lea(Register dst, AddressLiteral src) {
4685    mov_literal32(dst, (int32_t)src.target(), src.rspec());
4686}
4687
4688void MacroAssembler::lea(Address dst, AddressLiteral adr) {
4689  // leal(dst, as_Address(adr));
4690  // see note in movl as to why we must use a move
4691  mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
4692}
4693
4694void MacroAssembler::leave() {
4695  mov(rsp, rbp);
4696  pop(rbp);
4697}
4698
4699void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
4700  // Multiplication of two Java long values stored on the stack
4701  // as illustrated below. Result is in rdx:rax.
4702  //
4703  // rsp ---> [  ??  ] \               \
4704  //            ....    | y_rsp_offset  |
4705  //          [ y_lo ] /  (in bytes)    | x_rsp_offset
4706  //          [ y_hi ]                  | (in bytes)
4707  //            ....                    |
4708  //          [ x_lo ]                 /
4709  //          [ x_hi ]
4710  //            ....
4711  //
4712  // Basic idea: lo(result) = lo(x_lo * y_lo)
4713  //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
4714  Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
4715  Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
4716  Label quick;
4717  // load x_hi, y_hi and check if quick
4718  // multiplication is possible
4719  movl(rbx, x_hi);
4720  movl(rcx, y_hi);
4721  movl(rax, rbx);
4722  orl(rbx, rcx);                                 // rbx, = 0 <=> x_hi = 0 and y_hi = 0
4723  jcc(Assembler::zero, quick);                   // if rbx, = 0 do quick multiply
4724  // do full multiplication
4725  // 1st step
4726  mull(y_lo);                                    // x_hi * y_lo
4727  movl(rbx, rax);                                // save lo(x_hi * y_lo) in rbx,
4728  // 2nd step
4729  movl(rax, x_lo);
4730  mull(rcx);                                     // x_lo * y_hi
4731  addl(rbx, rax);                                // add lo(x_lo * y_hi) to rbx,
4732  // 3rd step
4733  bind(quick);                                   // note: rbx, = 0 if quick multiply!
4734  movl(rax, x_lo);
4735  mull(y_lo);                                    // x_lo * y_lo
4736  addl(rdx, rbx);                                // correct hi(x_lo * y_lo)
4737}
4738
4739void MacroAssembler::lneg(Register hi, Register lo) {
4740  negl(lo);
4741  adcl(hi, 0);
4742  negl(hi);
4743}
4744
4745void MacroAssembler::lshl(Register hi, Register lo) {
4746  // Java shift left long support (semantics as described in JVM spec., p.305)
4747  // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
4748  // shift value is in rcx !
4749  assert(hi != rcx, "must not use rcx");
4750  assert(lo != rcx, "must not use rcx");
4751  const Register s = rcx;                        // shift count
4752  const int      n = BitsPerWord;
4753  Label L;
4754  andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
4755  cmpl(s, n);                                    // if (s < n)
4756  jcc(Assembler::less, L);                       // else (s >= n)
4757  movl(hi, lo);                                  // x := x << n
4758  xorl(lo, lo);
4759  // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4760  bind(L);                                       // s (mod n) < n
4761  shldl(hi, lo);                                 // x := x << s
4762  shll(lo);
4763}
4764
4765
4766void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
4767  // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
4768  // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
4769  assert(hi != rcx, "must not use rcx");
4770  assert(lo != rcx, "must not use rcx");
4771  const Register s = rcx;                        // shift count
4772  const int      n = BitsPerWord;
4773  Label L;
4774  andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
4775  cmpl(s, n);                                    // if (s < n)
4776  jcc(Assembler::less, L);                       // else (s >= n)
4777  movl(lo, hi);                                  // x := x >> n
4778  if (sign_extension) sarl(hi, 31);
4779  else                xorl(hi, hi);
4780  // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4781  bind(L);                                       // s (mod n) < n
4782  shrdl(lo, hi);                                 // x := x >> s
4783  if (sign_extension) sarl(hi);
4784  else                shrl(hi);
4785}
4786
4787void MacroAssembler::movoop(Register dst, jobject obj) {
4788  mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4789}
4790
4791void MacroAssembler::movoop(Address dst, jobject obj) {
4792  mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4793}
4794
4795void MacroAssembler::movptr(Register dst, AddressLiteral src) {
4796  if (src.is_lval()) {
4797    mov_literal32(dst, (intptr_t)src.target(), src.rspec());
4798  } else {
4799    movl(dst, as_Address(src));
4800  }
4801}
4802
4803void MacroAssembler::movptr(ArrayAddress dst, Register src) {
4804  movl(as_Address(dst), src);
4805}
4806
4807void MacroAssembler::movptr(Register dst, ArrayAddress src) {
4808  movl(dst, as_Address(src));
4809}
4810
4811// src should NEVER be a real pointer. Use AddressLiteral for true pointers
4812void MacroAssembler::movptr(Address dst, intptr_t src) {
4813  movl(dst, src);
4814}
4815
4816
4817void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
4818  movsd(dst, as_Address(src));
4819}
4820
4821void MacroAssembler::pop_callee_saved_registers() {
4822  pop(rcx);
4823  pop(rdx);
4824  pop(rdi);
4825  pop(rsi);
4826}
4827
4828void MacroAssembler::pop_fTOS() {
4829  fld_d(Address(rsp, 0));
4830  addl(rsp, 2 * wordSize);
4831}
4832
4833void MacroAssembler::push_callee_saved_registers() {
4834  push(rsi);
4835  push(rdi);
4836  push(rdx);
4837  push(rcx);
4838}
4839
4840void MacroAssembler::push_fTOS() {
4841  subl(rsp, 2 * wordSize);
4842  fstp_d(Address(rsp, 0));
4843}
4844
4845
4846void MacroAssembler::pushoop(jobject obj) {
4847  push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
4848}
4849
4850
4851void MacroAssembler::pushptr(AddressLiteral src) {
4852  if (src.is_lval()) {
4853    push_literal32((int32_t)src.target(), src.rspec());
4854  } else {
4855    pushl(as_Address(src));
4856  }
4857}
4858
4859void MacroAssembler::set_word_if_not_zero(Register dst) {
4860  xorl(dst, dst);
4861  set_byte_if_not_zero(dst);
4862}
4863
4864static void pass_arg0(MacroAssembler* masm, Register arg) {
4865  masm->push(arg);
4866}
4867
4868static void pass_arg1(MacroAssembler* masm, Register arg) {
4869  masm->push(arg);
4870}
4871
4872static void pass_arg2(MacroAssembler* masm, Register arg) {
4873  masm->push(arg);
4874}
4875
4876static void pass_arg3(MacroAssembler* masm, Register arg) {
4877  masm->push(arg);
4878}
4879
4880#ifndef PRODUCT
4881extern "C" void findpc(intptr_t x);
4882#endif
4883
4884void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
4885  // In order to get locks to work, we need to fake a in_VM state
4886  JavaThread* thread = JavaThread::current();
4887  JavaThreadState saved_state = thread->thread_state();
4888  thread->set_thread_state(_thread_in_vm);
4889  if (ShowMessageBoxOnError) {
4890    JavaThread* thread = JavaThread::current();
4891    JavaThreadState saved_state = thread->thread_state();
4892    thread->set_thread_state(_thread_in_vm);
4893    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
4894      ttyLocker ttyl;
4895      BytecodeCounter::print();
4896    }
4897    // To see where a verify_oop failed, get $ebx+40/X for this frame.
4898    // This is the value of eip which points to where verify_oop will return.
4899    if (os::message_box(msg, "Execution stopped, print registers?")) {
4900      ttyLocker ttyl;
4901      tty->print_cr("eip = 0x%08x", eip);
4902#ifndef PRODUCT
4903      tty->cr();
4904      findpc(eip);
4905      tty->cr();
4906#endif
4907      tty->print_cr("rax, = 0x%08x", rax);
4908      tty->print_cr("rbx, = 0x%08x", rbx);
4909      tty->print_cr("rcx = 0x%08x", rcx);
4910      tty->print_cr("rdx = 0x%08x", rdx);
4911      tty->print_cr("rdi = 0x%08x", rdi);
4912      tty->print_cr("rsi = 0x%08x", rsi);
4913      tty->print_cr("rbp, = 0x%08x", rbp);
4914      tty->print_cr("rsp = 0x%08x", rsp);
4915      BREAKPOINT;
4916    }
4917  } else {
4918    ttyLocker ttyl;
4919    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
4920    assert(false, "DEBUG MESSAGE");
4921  }
4922  ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
4923}
4924
4925void MacroAssembler::stop(const char* msg) {
4926  ExternalAddress message((address)msg);
4927  // push address of message
4928  pushptr(message.addr());
4929  { Label L; call(L, relocInfo::none); bind(L); }     // push eip
4930  pusha();                                           // push registers
4931  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
4932  hlt();
4933}
4934
4935void MacroAssembler::warn(const char* msg) {
4936  push_CPU_state();
4937
4938  ExternalAddress message((address) msg);
4939  // push address of message
4940  pushptr(message.addr());
4941
4942  call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
4943  addl(rsp, wordSize);       // discard argument
4944  pop_CPU_state();
4945}
4946
4947#else // _LP64
4948
4949// 64 bit versions
4950
4951Address MacroAssembler::as_Address(AddressLiteral adr) {
4952  // amd64 always does this as a pc-rel
4953  // we can be absolute or disp based on the instruction type
4954  // jmp/call are displacements others are absolute
4955  assert(!adr.is_lval(), "must be rval");
4956  assert(reachable(adr), "must be");
4957  return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
4958
4959}
4960
4961Address MacroAssembler::as_Address(ArrayAddress adr) {
4962  AddressLiteral base = adr.base();
4963  lea(rscratch1, base);
4964  Address index = adr.index();
4965  assert(index._disp == 0, "must not have disp"); // maybe it can?
4966  Address array(rscratch1, index._index, index._scale, index._disp);
4967  return array;
4968}
4969
4970int MacroAssembler::biased_locking_enter(Register lock_reg,
4971                                         Register obj_reg,
4972                                         Register swap_reg,
4973                                         Register tmp_reg,
4974                                         bool swap_reg_contains_mark,
4975                                         Label& done,
4976                                         Label* slow_case,
4977                                         BiasedLockingCounters* counters) {
4978  assert(UseBiasedLocking, "why call this otherwise?");
4979  assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
4980  assert(tmp_reg != noreg, "tmp_reg must be supplied");
4981  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4982  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4983  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
4984  Address saved_mark_addr(lock_reg, 0);
4985
4986  if (PrintBiasedLockingStatistics && counters == NULL)
4987    counters = BiasedLocking::counters();
4988
4989  // Biased locking
4990  // See whether the lock is currently biased toward our thread and
4991  // whether the epoch is still valid
4992  // Note that the runtime guarantees sufficient alignment of JavaThread
4993  // pointers to allow age to be placed into low bits
4994  // First check to see whether biasing is even enabled for this object
4995  Label cas_label;
4996  int null_check_offset = -1;
4997  if (!swap_reg_contains_mark) {
4998    null_check_offset = offset();
4999    movq(swap_reg, mark_addr);
5000  }
5001  movq(tmp_reg, swap_reg);
5002  andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5003  cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
5004  jcc(Assembler::notEqual, cas_label);
5005  // The bias pattern is present in the object's header. Need to check
5006  // whether the bias owner and the epoch are both still current.
5007  load_prototype_header(tmp_reg, obj_reg);
5008  orq(tmp_reg, r15_thread);
5009  xorq(tmp_reg, swap_reg);
5010  andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
5011  if (counters != NULL) {
5012    cond_inc32(Assembler::zero,
5013               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5014  }
5015  jcc(Assembler::equal, done);
5016
5017  Label try_revoke_bias;
5018  Label try_rebias;
5019
5020  // At this point we know that the header has the bias pattern and
5021  // that we are not the bias owner in the current epoch. We need to
5022  // figure out more details about the state of the header in order to
5023  // know what operations can be legally performed on the object's
5024  // header.
5025
5026  // If the low three bits in the xor result aren't clear, that means
5027  // the prototype header is no longer biased and we have to revoke
5028  // the bias on this object.
5029  testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5030  jcc(Assembler::notZero, try_revoke_bias);
5031
5032  // Biasing is still enabled for this data type. See whether the
5033  // epoch of the current bias is still valid, meaning that the epoch
5034  // bits of the mark word are equal to the epoch bits of the
5035  // prototype header. (Note that the prototype header's epoch bits
5036  // only change at a safepoint.) If not, attempt to rebias the object
5037  // toward the current thread. Note that we must be absolutely sure
5038  // that the current epoch is invalid in order to do this because
5039  // otherwise the manipulations it performs on the mark word are
5040  // illegal.
5041  testq(tmp_reg, markOopDesc::epoch_mask_in_place);
5042  jcc(Assembler::notZero, try_rebias);
5043
5044  // The epoch of the current bias is still valid but we know nothing
5045  // about the owner; it might be set or it might be clear. Try to
5046  // acquire the bias of the object using an atomic operation. If this
5047  // fails we will go in to the runtime to revoke the object's bias.
5048  // Note that we first construct the presumed unbiased header so we
5049  // don't accidentally blow away another thread's valid bias.
5050  andq(swap_reg,
5051       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
5052  movq(tmp_reg, swap_reg);
5053  orq(tmp_reg, r15_thread);
5054  if (os::is_MP()) {
5055    lock();
5056  }
5057  cmpxchgq(tmp_reg, Address(obj_reg, 0));
5058  // If the biasing toward our thread failed, this means that
5059  // another thread succeeded in biasing it toward itself and we
5060  // need to revoke that bias. The revocation will occur in the
5061  // interpreter runtime in the slow case.
5062  if (counters != NULL) {
5063    cond_inc32(Assembler::zero,
5064               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5065  }
5066  if (slow_case != NULL) {
5067    jcc(Assembler::notZero, *slow_case);
5068  }
5069  jmp(done);
5070
5071  bind(try_rebias);
5072  // At this point we know the epoch has expired, meaning that the
5073  // current "bias owner", if any, is actually invalid. Under these
5074  // circumstances _only_, we are allowed to use the current header's
5075  // value as the comparison value when doing the cas to acquire the
5076  // bias in the current epoch. In other words, we allow transfer of
5077  // the bias from one thread to another directly in this situation.
5078  //
5079  // FIXME: due to a lack of registers we currently blow away the age
5080  // bits in this situation. Should attempt to preserve them.
5081  load_prototype_header(tmp_reg, obj_reg);
5082  orq(tmp_reg, r15_thread);
5083  if (os::is_MP()) {
5084    lock();
5085  }
5086  cmpxchgq(tmp_reg, Address(obj_reg, 0));
5087  // If the biasing toward our thread failed, then another thread
5088  // succeeded in biasing it toward itself and we need to revoke that
5089  // bias. The revocation will occur in the runtime in the slow case.
5090  if (counters != NULL) {
5091    cond_inc32(Assembler::zero,
5092               ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
5093  }
5094  if (slow_case != NULL) {
5095    jcc(Assembler::notZero, *slow_case);
5096  }
5097  jmp(done);
5098
5099  bind(try_revoke_bias);
5100  // The prototype mark in the klass doesn't have the bias bit set any
5101  // more, indicating that objects of this data type are not supposed
5102  // to be biased any more. We are going to try to reset the mark of
5103  // this object to the prototype value and fall through to the
5104  // CAS-based locking scheme. Note that if our CAS fails, it means
5105  // that another thread raced us for the privilege of revoking the
5106  // bias of this particular object, so it's okay to continue in the
5107  // normal locking code.
5108  //
5109  // FIXME: due to a lack of registers we currently blow away the age
5110  // bits in this situation. Should attempt to preserve them.
5111  load_prototype_header(tmp_reg, obj_reg);
5112  if (os::is_MP()) {
5113    lock();
5114  }
5115  cmpxchgq(tmp_reg, Address(obj_reg, 0));
5116  // Fall through to the normal CAS-based lock, because no matter what
5117  // the result of the above CAS, some thread must have succeeded in
5118  // removing the bias bit from the object's header.
5119  if (counters != NULL) {
5120    cond_inc32(Assembler::zero,
5121               ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
5122  }
5123
5124  bind(cas_label);
5125
5126  return null_check_offset;
5127}
5128
5129void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
5130  Label L, E;
5131
5132#ifdef _WIN64
5133  // Windows always allocates space for it's register args
5134  assert(num_args <= 4, "only register arguments supported");
5135  subq(rsp,  frame::arg_reg_save_area_bytes);
5136#endif
5137
5138  // Align stack if necessary
5139  testl(rsp, 15);
5140  jcc(Assembler::zero, L);
5141
5142  subq(rsp, 8);
5143  {
5144    call(RuntimeAddress(entry_point));
5145  }
5146  addq(rsp, 8);
5147  jmp(E);
5148
5149  bind(L);
5150  {
5151    call(RuntimeAddress(entry_point));
5152  }
5153
5154  bind(E);
5155
5156#ifdef _WIN64
5157  // restore stack pointer
5158  addq(rsp, frame::arg_reg_save_area_bytes);
5159#endif
5160
5161}
5162
5163void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
5164  assert(!src2.is_lval(), "should use cmpptr");
5165
5166  if (reachable(src2)) {
5167    cmpq(src1, as_Address(src2));
5168  } else {
5169    lea(rscratch1, src2);
5170    Assembler::cmpq(src1, Address(rscratch1, 0));
5171  }
5172}
5173
5174int MacroAssembler::corrected_idivq(Register reg) {
5175  // Full implementation of Java ldiv and lrem; checks for special
5176  // case as described in JVM spec., p.243 & p.271.  The function
5177  // returns the (pc) offset of the idivl instruction - may be needed
5178  // for implicit exceptions.
5179  //
5180  //         normal case                           special case
5181  //
5182  // input : rax: dividend                         min_long
5183  //         reg: divisor   (may not be eax/edx)   -1
5184  //
5185  // output: rax: quotient  (= rax idiv reg)       min_long
5186  //         rdx: remainder (= rax irem reg)       0
5187  assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
5188  static const int64_t min_long = 0x8000000000000000;
5189  Label normal_case, special_case;
5190
5191  // check for special case
5192  cmp64(rax, ExternalAddress((address) &min_long));
5193  jcc(Assembler::notEqual, normal_case);
5194  xorl(rdx, rdx); // prepare rdx for possible special case (where
5195                  // remainder = 0)
5196  cmpq(reg, -1);
5197  jcc(Assembler::equal, special_case);
5198
5199  // handle normal case
5200  bind(normal_case);
5201  cdqq();
5202  int idivq_offset = offset();
5203  idivq(reg);
5204
5205  // normal and special case exit
5206  bind(special_case);
5207
5208  return idivq_offset;
5209}
5210
5211void MacroAssembler::decrementq(Register reg, int value) {
5212  if (value == min_jint) { subq(reg, value); return; }
5213  if (value <  0) { incrementq(reg, -value); return; }
5214  if (value == 0) {                        ; return; }
5215  if (value == 1 && UseIncDec) { decq(reg) ; return; }
5216  /* else */      { subq(reg, value)       ; return; }
5217}
5218
5219void MacroAssembler::decrementq(Address dst, int value) {
5220  if (value == min_jint) { subq(dst, value); return; }
5221  if (value <  0) { incrementq(dst, -value); return; }
5222  if (value == 0) {                        ; return; }
5223  if (value == 1 && UseIncDec) { decq(dst) ; return; }
5224  /* else */      { subq(dst, value)       ; return; }
5225}
5226
5227void MacroAssembler::fat_nop() {
5228  // A 5 byte nop that is safe for patching (see patch_verified_entry)
5229  // Recommened sequence from 'Software Optimization Guide for the AMD
5230  // Hammer Processor'
5231  emit_byte(0x66);
5232  emit_byte(0x66);
5233  emit_byte(0x90);
5234  emit_byte(0x66);
5235  emit_byte(0x90);
5236}
5237
5238void MacroAssembler::incrementq(Register reg, int value) {
5239  if (value == min_jint) { addq(reg, value); return; }
5240  if (value <  0) { decrementq(reg, -value); return; }
5241  if (value == 0) {                        ; return; }
5242  if (value == 1 && UseIncDec) { incq(reg) ; return; }
5243  /* else */      { addq(reg, value)       ; return; }
5244}
5245
5246void MacroAssembler::incrementq(Address dst, int value) {
5247  if (value == min_jint) { addq(dst, value); return; }
5248  if (value <  0) { decrementq(dst, -value); return; }
5249  if (value == 0) {                        ; return; }
5250  if (value == 1 && UseIncDec) { incq(dst) ; return; }
5251  /* else */      { addq(dst, value)       ; return; }
5252}
5253
5254// 32bit can do a case table jump in one instruction but we no longer allow the base
5255// to be installed in the Address class
5256void MacroAssembler::jump(ArrayAddress entry) {
5257  lea(rscratch1, entry.base());
5258  Address dispatch = entry.index();
5259  assert(dispatch._base == noreg, "must be");
5260  dispatch._base = rscratch1;
5261  jmp(dispatch);
5262}
5263
5264void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
5265  ShouldNotReachHere(); // 64bit doesn't use two regs
5266  cmpq(x_lo, y_lo);
5267}
5268
5269void MacroAssembler::lea(Register dst, AddressLiteral src) {
5270    mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5271}
5272
5273void MacroAssembler::lea(Address dst, AddressLiteral adr) {
5274  mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
5275  movptr(dst, rscratch1);
5276}
5277
5278void MacroAssembler::leave() {
5279  // %%% is this really better? Why not on 32bit too?
5280  emit_byte(0xC9); // LEAVE
5281}
5282
5283void MacroAssembler::lneg(Register hi, Register lo) {
5284  ShouldNotReachHere(); // 64bit doesn't use two regs
5285  negq(lo);
5286}
5287
5288void MacroAssembler::movoop(Register dst, jobject obj) {
5289  mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5290}
5291
5292void MacroAssembler::movoop(Address dst, jobject obj) {
5293  mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5294  movq(dst, rscratch1);
5295}
5296
5297void MacroAssembler::movptr(Register dst, AddressLiteral src) {
5298  if (src.is_lval()) {
5299    mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5300  } else {
5301    if (reachable(src)) {
5302      movq(dst, as_Address(src));
5303    } else {
5304      lea(rscratch1, src);
5305      movq(dst, Address(rscratch1,0));
5306    }
5307  }
5308}
5309
5310void MacroAssembler::movptr(ArrayAddress dst, Register src) {
5311  movq(as_Address(dst), src);
5312}
5313
5314void MacroAssembler::movptr(Register dst, ArrayAddress src) {
5315  movq(dst, as_Address(src));
5316}
5317
5318// src should NEVER be a real pointer. Use AddressLiteral for true pointers
5319void MacroAssembler::movptr(Address dst, intptr_t src) {
5320  mov64(rscratch1, src);
5321  movq(dst, rscratch1);
5322}
5323
5324// These are mostly for initializing NULL
5325void MacroAssembler::movptr(Address dst, int32_t src) {
5326  movslq(dst, src);
5327}
5328
5329void MacroAssembler::movptr(Register dst, int32_t src) {
5330  mov64(dst, (intptr_t)src);
5331}
5332
5333void MacroAssembler::pushoop(jobject obj) {
5334  movoop(rscratch1, obj);
5335  push(rscratch1);
5336}
5337
5338void MacroAssembler::pushptr(AddressLiteral src) {
5339  lea(rscratch1, src);
5340  if (src.is_lval()) {
5341    push(rscratch1);
5342  } else {
5343    pushq(Address(rscratch1, 0));
5344  }
5345}
5346
5347void MacroAssembler::reset_last_Java_frame(bool clear_fp,
5348                                           bool clear_pc) {
5349  // we must set sp to zero to clear frame
5350  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
5351  // must clear fp, so that compiled frames are not confused; it is
5352  // possible that we need it only for debugging
5353  if (clear_fp) {
5354    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
5355  }
5356
5357  if (clear_pc) {
5358    movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
5359  }
5360}
5361
5362void MacroAssembler::set_last_Java_frame(Register last_java_sp,
5363                                         Register last_java_fp,
5364                                         address  last_java_pc) {
5365  // determine last_java_sp register
5366  if (!last_java_sp->is_valid()) {
5367    last_java_sp = rsp;
5368  }
5369
5370  // last_java_fp is optional
5371  if (last_java_fp->is_valid()) {
5372    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
5373           last_java_fp);
5374  }
5375
5376  // last_java_pc is optional
5377  if (last_java_pc != NULL) {
5378    Address java_pc(r15_thread,
5379                    JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
5380    lea(rscratch1, InternalAddress(last_java_pc));
5381    movptr(java_pc, rscratch1);
5382  }
5383
5384  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
5385}
5386
5387static void pass_arg0(MacroAssembler* masm, Register arg) {
5388  if (c_rarg0 != arg ) {
5389    masm->mov(c_rarg0, arg);
5390  }
5391}
5392
5393static void pass_arg1(MacroAssembler* masm, Register arg) {
5394  if (c_rarg1 != arg ) {
5395    masm->mov(c_rarg1, arg);
5396  }
5397}
5398
5399static void pass_arg2(MacroAssembler* masm, Register arg) {
5400  if (c_rarg2 != arg ) {
5401    masm->mov(c_rarg2, arg);
5402  }
5403}
5404
5405static void pass_arg3(MacroAssembler* masm, Register arg) {
5406  if (c_rarg3 != arg ) {
5407    masm->mov(c_rarg3, arg);
5408  }
5409}
5410
5411void MacroAssembler::stop(const char* msg) {
5412  address rip = pc();
5413  pusha(); // get regs on stack
5414  lea(c_rarg0, ExternalAddress((address) msg));
5415  lea(c_rarg1, InternalAddress(rip));
5416  movq(c_rarg2, rsp); // pass pointer to regs array
5417  andq(rsp, -16); // align stack as required by ABI
5418  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
5419  hlt();
5420}
5421
5422void MacroAssembler::warn(const char* msg) {
5423  push(r12);
5424  movq(r12, rsp);
5425  andq(rsp, -16);     // align stack as required by push_CPU_state and call
5426
5427  push_CPU_state();   // keeps alignment at 16 bytes
5428  lea(c_rarg0, ExternalAddress((address) msg));
5429  call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
5430  pop_CPU_state();
5431
5432  movq(rsp, r12);
5433  pop(r12);
5434}
5435
5436#ifndef PRODUCT
5437extern "C" void findpc(intptr_t x);
5438#endif
5439
5440void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
5441  // In order to get locks to work, we need to fake a in_VM state
5442  if (ShowMessageBoxOnError ) {
5443    JavaThread* thread = JavaThread::current();
5444    JavaThreadState saved_state = thread->thread_state();
5445    thread->set_thread_state(_thread_in_vm);
5446#ifndef PRODUCT
5447    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
5448      ttyLocker ttyl;
5449      BytecodeCounter::print();
5450    }
5451#endif
5452    // To see where a verify_oop failed, get $ebx+40/X for this frame.
5453    // XXX correct this offset for amd64
5454    // This is the value of eip which points to where verify_oop will return.
5455    if (os::message_box(msg, "Execution stopped, print registers?")) {
5456      ttyLocker ttyl;
5457      tty->print_cr("rip = 0x%016lx", pc);
5458#ifndef PRODUCT
5459      tty->cr();
5460      findpc(pc);
5461      tty->cr();
5462#endif
5463      tty->print_cr("rax = 0x%016lx", regs[15]);
5464      tty->print_cr("rbx = 0x%016lx", regs[12]);
5465      tty->print_cr("rcx = 0x%016lx", regs[14]);
5466      tty->print_cr("rdx = 0x%016lx", regs[13]);
5467      tty->print_cr("rdi = 0x%016lx", regs[8]);
5468      tty->print_cr("rsi = 0x%016lx", regs[9]);
5469      tty->print_cr("rbp = 0x%016lx", regs[10]);
5470      tty->print_cr("rsp = 0x%016lx", regs[11]);
5471      tty->print_cr("r8  = 0x%016lx", regs[7]);
5472      tty->print_cr("r9  = 0x%016lx", regs[6]);
5473      tty->print_cr("r10 = 0x%016lx", regs[5]);
5474      tty->print_cr("r11 = 0x%016lx", regs[4]);
5475      tty->print_cr("r12 = 0x%016lx", regs[3]);
5476      tty->print_cr("r13 = 0x%016lx", regs[2]);
5477      tty->print_cr("r14 = 0x%016lx", regs[1]);
5478      tty->print_cr("r15 = 0x%016lx", regs[0]);
5479      BREAKPOINT;
5480    }
5481    ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5482  } else {
5483    ttyLocker ttyl;
5484    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
5485                    msg);
5486  }
5487}
5488
5489#endif // _LP64
5490
5491// Now versions that are common to 32/64 bit
5492
5493void MacroAssembler::addptr(Register dst, int32_t imm32) {
5494  LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
5495}
5496
5497void MacroAssembler::addptr(Register dst, Register src) {
5498  LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5499}
5500
5501void MacroAssembler::addptr(Address dst, Register src) {
5502  LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5503}
5504
5505void MacroAssembler::align(int modulus) {
5506  if (offset() % modulus != 0) {
5507    nop(modulus - (offset() % modulus));
5508  }
5509}
5510
5511void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
5512  andpd(dst, as_Address(src));
5513}
5514
5515void MacroAssembler::andptr(Register dst, int32_t imm32) {
5516  LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
5517}
5518
5519void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
5520  pushf();
5521  if (os::is_MP())
5522    lock();
5523  incrementl(counter_addr);
5524  popf();
5525}
5526
5527// Writes to stack successive pages until offset reached to check for
5528// stack overflow + shadow pages.  This clobbers tmp.
5529void MacroAssembler::bang_stack_size(Register size, Register tmp) {
5530  movptr(tmp, rsp);
5531  // Bang stack for total size given plus shadow page size.
5532  // Bang one page at a time because large size can bang beyond yellow and
5533  // red zones.
5534  Label loop;
5535  bind(loop);
5536  movl(Address(tmp, (-os::vm_page_size())), size );
5537  subptr(tmp, os::vm_page_size());
5538  subl(size, os::vm_page_size());
5539  jcc(Assembler::greater, loop);
5540
5541  // Bang down shadow pages too.
5542  // The -1 because we already subtracted 1 page.
5543  for (int i = 0; i< StackShadowPages-1; i++) {
5544    // this could be any sized move but this is can be a debugging crumb
5545    // so the bigger the better.
5546    movptr(Address(tmp, (-i*os::vm_page_size())), size );
5547  }
5548}
5549
5550void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
5551  assert(UseBiasedLocking, "why call this otherwise?");
5552
5553  // Check for biased locking unlock case, which is a no-op
5554  // Note: we do not have to check the thread ID for two reasons.
5555  // First, the interpreter checks for IllegalMonitorStateException at
5556  // a higher level. Second, if the bias was revoked while we held the
5557  // lock, the object could not be rebiased toward another thread, so
5558  // the bias bit would be clear.
5559  movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
5560  andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
5561  cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
5562  jcc(Assembler::equal, done);
5563}
5564
5565void MacroAssembler::c2bool(Register x) {
5566  // implements x == 0 ? 0 : 1
5567  // note: must only look at least-significant byte of x
5568  //       since C-style booleans are stored in one byte
5569  //       only! (was bug)
5570  andl(x, 0xFF);
5571  setb(Assembler::notZero, x);
5572}
5573
5574// Wouldn't need if AddressLiteral version had new name
5575void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
5576  Assembler::call(L, rtype);
5577}
5578
5579void MacroAssembler::call(Register entry) {
5580  Assembler::call(entry);
5581}
5582
5583void MacroAssembler::call(AddressLiteral entry) {
5584  if (reachable(entry)) {
5585    Assembler::call_literal(entry.target(), entry.rspec());
5586  } else {
5587    lea(rscratch1, entry);
5588    Assembler::call(rscratch1);
5589  }
5590}
5591
5592// Implementation of call_VM versions
5593
5594void MacroAssembler::call_VM(Register oop_result,
5595                             address entry_point,
5596                             bool check_exceptions) {
5597  Label C, E;
5598  call(C, relocInfo::none);
5599  jmp(E);
5600
5601  bind(C);
5602  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
5603  ret(0);
5604
5605  bind(E);
5606}
5607
5608void MacroAssembler::call_VM(Register oop_result,
5609                             address entry_point,
5610                             Register arg_1,
5611                             bool check_exceptions) {
5612  Label C, E;
5613  call(C, relocInfo::none);
5614  jmp(E);
5615
5616  bind(C);
5617  pass_arg1(this, arg_1);
5618  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
5619  ret(0);
5620
5621  bind(E);
5622}
5623
5624void MacroAssembler::call_VM(Register oop_result,
5625                             address entry_point,
5626                             Register arg_1,
5627                             Register arg_2,
5628                             bool check_exceptions) {
5629  Label C, E;
5630  call(C, relocInfo::none);
5631  jmp(E);
5632
5633  bind(C);
5634
5635  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5636
5637  pass_arg2(this, arg_2);
5638  pass_arg1(this, arg_1);
5639  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
5640  ret(0);
5641
5642  bind(E);
5643}
5644
5645void MacroAssembler::call_VM(Register oop_result,
5646                             address entry_point,
5647                             Register arg_1,
5648                             Register arg_2,
5649                             Register arg_3,
5650                             bool check_exceptions) {
5651  Label C, E;
5652  call(C, relocInfo::none);
5653  jmp(E);
5654
5655  bind(C);
5656
5657  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5658  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5659  pass_arg3(this, arg_3);
5660
5661  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5662  pass_arg2(this, arg_2);
5663
5664  pass_arg1(this, arg_1);
5665  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
5666  ret(0);
5667
5668  bind(E);
5669}
5670
5671void MacroAssembler::call_VM(Register oop_result,
5672                             Register last_java_sp,
5673                             address entry_point,
5674                             int number_of_arguments,
5675                             bool check_exceptions) {
5676  Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
5677  call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
5678}
5679
5680void MacroAssembler::call_VM(Register oop_result,
5681                             Register last_java_sp,
5682                             address entry_point,
5683                             Register arg_1,
5684                             bool check_exceptions) {
5685  pass_arg1(this, arg_1);
5686  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
5687}
5688
5689void MacroAssembler::call_VM(Register oop_result,
5690                             Register last_java_sp,
5691                             address entry_point,
5692                             Register arg_1,
5693                             Register arg_2,
5694                             bool check_exceptions) {
5695
5696  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5697  pass_arg2(this, arg_2);
5698  pass_arg1(this, arg_1);
5699  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
5700}
5701
5702void MacroAssembler::call_VM(Register oop_result,
5703                             Register last_java_sp,
5704                             address entry_point,
5705                             Register arg_1,
5706                             Register arg_2,
5707                             Register arg_3,
5708                             bool check_exceptions) {
5709  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5710  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5711  pass_arg3(this, arg_3);
5712  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5713  pass_arg2(this, arg_2);
5714  pass_arg1(this, arg_1);
5715  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
5716}
5717
5718void MacroAssembler::call_VM_base(Register oop_result,
5719                                  Register java_thread,
5720                                  Register last_java_sp,
5721                                  address  entry_point,
5722                                  int      number_of_arguments,
5723                                  bool     check_exceptions) {
5724  // determine java_thread register
5725  if (!java_thread->is_valid()) {
5726#ifdef _LP64
5727    java_thread = r15_thread;
5728#else
5729    java_thread = rdi;
5730    get_thread(java_thread);
5731#endif // LP64
5732  }
5733  // determine last_java_sp register
5734  if (!last_java_sp->is_valid()) {
5735    last_java_sp = rsp;
5736  }
5737  // debugging support
5738  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
5739  LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
5740  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
5741  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
5742
5743  // push java thread (becomes first argument of C function)
5744
5745  NOT_LP64(push(java_thread); number_of_arguments++);
5746  LP64_ONLY(mov(c_rarg0, r15_thread));
5747
5748  // set last Java frame before call
5749  assert(last_java_sp != rbp, "can't use ebp/rbp");
5750
5751  // Only interpreter should have to set fp
5752  set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
5753
5754  // do the call, remove parameters
5755  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
5756
5757  // restore the thread (cannot use the pushed argument since arguments
5758  // may be overwritten by C code generated by an optimizing compiler);
5759  // however can use the register value directly if it is callee saved.
5760  if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
5761    // rdi & rsi (also r15) are callee saved -> nothing to do
5762#ifdef ASSERT
5763    guarantee(java_thread != rax, "change this code");
5764    push(rax);
5765    { Label L;
5766      get_thread(rax);
5767      cmpptr(java_thread, rax);
5768      jcc(Assembler::equal, L);
5769      stop("MacroAssembler::call_VM_base: rdi not callee saved?");
5770      bind(L);
5771    }
5772    pop(rax);
5773#endif
5774  } else {
5775    get_thread(java_thread);
5776  }
5777  // reset last Java frame
5778  // Only interpreter should have to clear fp
5779  reset_last_Java_frame(java_thread, true, false);
5780
5781#ifndef CC_INTERP
5782   // C++ interp handles this in the interpreter
5783  check_and_handle_popframe(java_thread);
5784  check_and_handle_earlyret(java_thread);
5785#endif /* CC_INTERP */
5786
5787  if (check_exceptions) {
5788    // check for pending exceptions (java_thread is set upon return)
5789    cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
5790#ifndef _LP64
5791    jump_cc(Assembler::notEqual,
5792            RuntimeAddress(StubRoutines::forward_exception_entry()));
5793#else
5794    // This used to conditionally jump to forward_exception however it is
5795    // possible if we relocate that the branch will not reach. So we must jump
5796    // around so we can always reach
5797
5798    Label ok;
5799    jcc(Assembler::equal, ok);
5800    jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
5801    bind(ok);
5802#endif // LP64
5803  }
5804
5805  // get oop result if there is one and reset the value in the thread
5806  if (oop_result->is_valid()) {
5807    movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
5808    movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
5809    verify_oop(oop_result, "broken oop in call_VM_base");
5810  }
5811}
5812
5813void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
5814
5815  // Calculate the value for last_Java_sp
5816  // somewhat subtle. call_VM does an intermediate call
5817  // which places a return address on the stack just under the
5818  // stack pointer as the user finsihed with it. This allows
5819  // use to retrieve last_Java_pc from last_Java_sp[-1].
5820  // On 32bit we then have to push additional args on the stack to accomplish
5821  // the actual requested call. On 64bit call_VM only can use register args
5822  // so the only extra space is the return address that call_VM created.
5823  // This hopefully explains the calculations here.
5824
5825#ifdef _LP64
5826  // We've pushed one address, correct last_Java_sp
5827  lea(rax, Address(rsp, wordSize));
5828#else
5829  lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
5830#endif // LP64
5831
5832  call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
5833
5834}
5835
5836void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
5837  call_VM_leaf_base(entry_point, number_of_arguments);
5838}
5839
5840void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
5841  pass_arg0(this, arg_0);
5842  call_VM_leaf(entry_point, 1);
5843}
5844
5845void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
5846
5847  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
5848  pass_arg1(this, arg_1);
5849  pass_arg0(this, arg_0);
5850  call_VM_leaf(entry_point, 2);
5851}
5852
5853void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
5854  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
5855  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5856  pass_arg2(this, arg_2);
5857  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
5858  pass_arg1(this, arg_1);
5859  pass_arg0(this, arg_0);
5860  call_VM_leaf(entry_point, 3);
5861}
5862
5863void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
5864}
5865
5866void MacroAssembler::check_and_handle_popframe(Register java_thread) {
5867}
5868
5869void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
5870  if (reachable(src1)) {
5871    cmpl(as_Address(src1), imm);
5872  } else {
5873    lea(rscratch1, src1);
5874    cmpl(Address(rscratch1, 0), imm);
5875  }
5876}
5877
5878void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
5879  assert(!src2.is_lval(), "use cmpptr");
5880  if (reachable(src2)) {
5881    cmpl(src1, as_Address(src2));
5882  } else {
5883    lea(rscratch1, src2);
5884    cmpl(src1, Address(rscratch1, 0));
5885  }
5886}
5887
5888void MacroAssembler::cmp32(Register src1, int32_t imm) {
5889  Assembler::cmpl(src1, imm);
5890}
5891
5892void MacroAssembler::cmp32(Register src1, Address src2) {
5893  Assembler::cmpl(src1, src2);
5894}
5895
5896void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
5897  ucomisd(opr1, opr2);
5898
5899  Label L;
5900  if (unordered_is_less) {
5901    movl(dst, -1);
5902    jcc(Assembler::parity, L);
5903    jcc(Assembler::below , L);
5904    movl(dst, 0);
5905    jcc(Assembler::equal , L);
5906    increment(dst);
5907  } else { // unordered is greater
5908    movl(dst, 1);
5909    jcc(Assembler::parity, L);
5910    jcc(Assembler::above , L);
5911    movl(dst, 0);
5912    jcc(Assembler::equal , L);
5913    decrementl(dst);
5914  }
5915  bind(L);
5916}
5917
5918void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
5919  ucomiss(opr1, opr2);
5920
5921  Label L;
5922  if (unordered_is_less) {
5923    movl(dst, -1);
5924    jcc(Assembler::parity, L);
5925    jcc(Assembler::below , L);
5926    movl(dst, 0);
5927    jcc(Assembler::equal , L);
5928    increment(dst);
5929  } else { // unordered is greater
5930    movl(dst, 1);
5931    jcc(Assembler::parity, L);
5932    jcc(Assembler::above , L);
5933    movl(dst, 0);
5934    jcc(Assembler::equal , L);
5935    decrementl(dst);
5936  }
5937  bind(L);
5938}
5939
5940
5941void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
5942  if (reachable(src1)) {
5943    cmpb(as_Address(src1), imm);
5944  } else {
5945    lea(rscratch1, src1);
5946    cmpb(Address(rscratch1, 0), imm);
5947  }
5948}
5949
5950void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
5951#ifdef _LP64
5952  if (src2.is_lval()) {
5953    movptr(rscratch1, src2);
5954    Assembler::cmpq(src1, rscratch1);
5955  } else if (reachable(src2)) {
5956    cmpq(src1, as_Address(src2));
5957  } else {
5958    lea(rscratch1, src2);
5959    Assembler::cmpq(src1, Address(rscratch1, 0));
5960  }
5961#else
5962  if (src2.is_lval()) {
5963    cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
5964  } else {
5965    cmpl(src1, as_Address(src2));
5966  }
5967#endif // _LP64
5968}
5969
5970void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
5971  assert(src2.is_lval(), "not a mem-mem compare");
5972#ifdef _LP64
5973  // moves src2's literal address
5974  movptr(rscratch1, src2);
5975  Assembler::cmpq(src1, rscratch1);
5976#else
5977  cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
5978#endif // _LP64
5979}
5980
5981void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
5982  if (reachable(adr)) {
5983    if (os::is_MP())
5984      lock();
5985    cmpxchgptr(reg, as_Address(adr));
5986  } else {
5987    lea(rscratch1, adr);
5988    if (os::is_MP())
5989      lock();
5990    cmpxchgptr(reg, Address(rscratch1, 0));
5991  }
5992}
5993
5994void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
5995  LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
5996}
5997
5998void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
5999  comisd(dst, as_Address(src));
6000}
6001
6002void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
6003  comiss(dst, as_Address(src));
6004}
6005
6006
6007void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
6008  Condition negated_cond = negate_condition(cond);
6009  Label L;
6010  jcc(negated_cond, L);
6011  atomic_incl(counter_addr);
6012  bind(L);
6013}
6014
6015int MacroAssembler::corrected_idivl(Register reg) {
6016  // Full implementation of Java idiv and irem; checks for
6017  // special case as described in JVM spec., p.243 & p.271.
6018  // The function returns the (pc) offset of the idivl
6019  // instruction - may be needed for implicit exceptions.
6020  //
6021  //         normal case                           special case
6022  //
6023  // input : rax,: dividend                         min_int
6024  //         reg: divisor   (may not be rax,/rdx)   -1
6025  //
6026  // output: rax,: quotient  (= rax, idiv reg)       min_int
6027  //         rdx: remainder (= rax, irem reg)       0
6028  assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
6029  const int min_int = 0x80000000;
6030  Label normal_case, special_case;
6031
6032  // check for special case
6033  cmpl(rax, min_int);
6034  jcc(Assembler::notEqual, normal_case);
6035  xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
6036  cmpl(reg, -1);
6037  jcc(Assembler::equal, special_case);
6038
6039  // handle normal case
6040  bind(normal_case);
6041  cdql();
6042  int idivl_offset = offset();
6043  idivl(reg);
6044
6045  // normal and special case exit
6046  bind(special_case);
6047
6048  return idivl_offset;
6049}
6050
6051
6052
6053void MacroAssembler::decrementl(Register reg, int value) {
6054  if (value == min_jint) {subl(reg, value) ; return; }
6055  if (value <  0) { incrementl(reg, -value); return; }
6056  if (value == 0) {                        ; return; }
6057  if (value == 1 && UseIncDec) { decl(reg) ; return; }
6058  /* else */      { subl(reg, value)       ; return; }
6059}
6060
6061void MacroAssembler::decrementl(Address dst, int value) {
6062  if (value == min_jint) {subl(dst, value) ; return; }
6063  if (value <  0) { incrementl(dst, -value); return; }
6064  if (value == 0) {                        ; return; }
6065  if (value == 1 && UseIncDec) { decl(dst) ; return; }
6066  /* else */      { subl(dst, value)       ; return; }
6067}
6068
6069void MacroAssembler::division_with_shift (Register reg, int shift_value) {
6070  assert (shift_value > 0, "illegal shift value");
6071  Label _is_positive;
6072  testl (reg, reg);
6073  jcc (Assembler::positive, _is_positive);
6074  int offset = (1 << shift_value) - 1 ;
6075
6076  if (offset == 1) {
6077    incrementl(reg);
6078  } else {
6079    addl(reg, offset);
6080  }
6081
6082  bind (_is_positive);
6083  sarl(reg, shift_value);
6084}
6085
6086// !defined(COMPILER2) is because of stupid core builds
6087#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
6088void MacroAssembler::empty_FPU_stack() {
6089  if (VM_Version::supports_mmx()) {
6090    emms();
6091  } else {
6092    for (int i = 8; i-- > 0; ) ffree(i);
6093  }
6094}
6095#endif // !LP64 || C1 || !C2
6096
6097
6098// Defines obj, preserves var_size_in_bytes
6099void MacroAssembler::eden_allocate(Register obj,
6100                                   Register var_size_in_bytes,
6101                                   int con_size_in_bytes,
6102                                   Register t1,
6103                                   Label& slow_case) {
6104  assert(obj == rax, "obj must be in rax, for cmpxchg");
6105  assert_different_registers(obj, var_size_in_bytes, t1);
6106  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
6107    jmp(slow_case);
6108  } else {
6109    Register end = t1;
6110    Label retry;
6111    bind(retry);
6112    ExternalAddress heap_top((address) Universe::heap()->top_addr());
6113    movptr(obj, heap_top);
6114    if (var_size_in_bytes == noreg) {
6115      lea(end, Address(obj, con_size_in_bytes));
6116    } else {
6117      lea(end, Address(obj, var_size_in_bytes, Address::times_1));
6118    }
6119    // if end < obj then we wrapped around => object too long => slow case
6120    cmpptr(end, obj);
6121    jcc(Assembler::below, slow_case);
6122    cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
6123    jcc(Assembler::above, slow_case);
6124    // Compare obj with the top addr, and if still equal, store the new top addr in
6125    // end at the address of the top addr pointer. Sets ZF if was equal, and clears
6126    // it otherwise. Use lock prefix for atomicity on MPs.
6127    locked_cmpxchgptr(end, heap_top);
6128    jcc(Assembler::notEqual, retry);
6129  }
6130}
6131
6132void MacroAssembler::enter() {
6133  push(rbp);
6134  mov(rbp, rsp);
6135}
6136
6137void MacroAssembler::fcmp(Register tmp) {
6138  fcmp(tmp, 1, true, true);
6139}
6140
6141void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
6142  assert(!pop_right || pop_left, "usage error");
6143  if (VM_Version::supports_cmov()) {
6144    assert(tmp == noreg, "unneeded temp");
6145    if (pop_left) {
6146      fucomip(index);
6147    } else {
6148      fucomi(index);
6149    }
6150    if (pop_right) {
6151      fpop();
6152    }
6153  } else {
6154    assert(tmp != noreg, "need temp");
6155    if (pop_left) {
6156      if (pop_right) {
6157        fcompp();
6158      } else {
6159        fcomp(index);
6160      }
6161    } else {
6162      fcom(index);
6163    }
6164    // convert FPU condition into eflags condition via rax,
6165    save_rax(tmp);
6166    fwait(); fnstsw_ax();
6167    sahf();
6168    restore_rax(tmp);
6169  }
6170  // condition codes set as follows:
6171  //
6172  // CF (corresponds to C0) if x < y
6173  // PF (corresponds to C2) if unordered
6174  // ZF (corresponds to C3) if x = y
6175}
6176
6177void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
6178  fcmp2int(dst, unordered_is_less, 1, true, true);
6179}
6180
6181void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
6182  fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
6183  Label L;
6184  if (unordered_is_less) {
6185    movl(dst, -1);
6186    jcc(Assembler::parity, L);
6187    jcc(Assembler::below , L);
6188    movl(dst, 0);
6189    jcc(Assembler::equal , L);
6190    increment(dst);
6191  } else { // unordered is greater
6192    movl(dst, 1);
6193    jcc(Assembler::parity, L);
6194    jcc(Assembler::above , L);
6195    movl(dst, 0);
6196    jcc(Assembler::equal , L);
6197    decrementl(dst);
6198  }
6199  bind(L);
6200}
6201
6202void MacroAssembler::fld_d(AddressLiteral src) {
6203  fld_d(as_Address(src));
6204}
6205
6206void MacroAssembler::fld_s(AddressLiteral src) {
6207  fld_s(as_Address(src));
6208}
6209
6210void MacroAssembler::fld_x(AddressLiteral src) {
6211  Assembler::fld_x(as_Address(src));
6212}
6213
6214void MacroAssembler::fldcw(AddressLiteral src) {
6215  Assembler::fldcw(as_Address(src));
6216}
6217
6218void MacroAssembler::fpop() {
6219  ffree();
6220  fincstp();
6221}
6222
6223void MacroAssembler::fremr(Register tmp) {
6224  save_rax(tmp);
6225  { Label L;
6226    bind(L);
6227    fprem();
6228    fwait(); fnstsw_ax();
6229#ifdef _LP64
6230    testl(rax, 0x400);
6231    jcc(Assembler::notEqual, L);
6232#else
6233    sahf();
6234    jcc(Assembler::parity, L);
6235#endif // _LP64
6236  }
6237  restore_rax(tmp);
6238  // Result is in ST0.
6239  // Note: fxch & fpop to get rid of ST1
6240  // (otherwise FPU stack could overflow eventually)
6241  fxch(1);
6242  fpop();
6243}
6244
6245
6246void MacroAssembler::incrementl(AddressLiteral dst) {
6247  if (reachable(dst)) {
6248    incrementl(as_Address(dst));
6249  } else {
6250    lea(rscratch1, dst);
6251    incrementl(Address(rscratch1, 0));
6252  }
6253}
6254
6255void MacroAssembler::incrementl(ArrayAddress dst) {
6256  incrementl(as_Address(dst));
6257}
6258
6259void MacroAssembler::incrementl(Register reg, int value) {
6260  if (value == min_jint) {addl(reg, value) ; return; }
6261  if (value <  0) { decrementl(reg, -value); return; }
6262  if (value == 0) {                        ; return; }
6263  if (value == 1 && UseIncDec) { incl(reg) ; return; }
6264  /* else */      { addl(reg, value)       ; return; }
6265}
6266
6267void MacroAssembler::incrementl(Address dst, int value) {
6268  if (value == min_jint) {addl(dst, value) ; return; }
6269  if (value <  0) { decrementl(dst, -value); return; }
6270  if (value == 0) {                        ; return; }
6271  if (value == 1 && UseIncDec) { incl(dst) ; return; }
6272  /* else */      { addl(dst, value)       ; return; }
6273}
6274
6275void MacroAssembler::jump(AddressLiteral dst) {
6276  if (reachable(dst)) {
6277    jmp_literal(dst.target(), dst.rspec());
6278  } else {
6279    lea(rscratch1, dst);
6280    jmp(rscratch1);
6281  }
6282}
6283
6284void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
6285  if (reachable(dst)) {
6286    InstructionMark im(this);
6287    relocate(dst.reloc());
6288    const int short_size = 2;
6289    const int long_size = 6;
6290    int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
6291    if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
6292      // 0111 tttn #8-bit disp
6293      emit_byte(0x70 | cc);
6294      emit_byte((offs - short_size) & 0xFF);
6295    } else {
6296      // 0000 1111 1000 tttn #32-bit disp
6297      emit_byte(0x0F);
6298      emit_byte(0x80 | cc);
6299      emit_long(offs - long_size);
6300    }
6301  } else {
6302#ifdef ASSERT
6303    warning("reversing conditional branch");
6304#endif /* ASSERT */
6305    Label skip;
6306    jccb(reverse[cc], skip);
6307    lea(rscratch1, dst);
6308    Assembler::jmp(rscratch1);
6309    bind(skip);
6310  }
6311}
6312
6313void MacroAssembler::ldmxcsr(AddressLiteral src) {
6314  if (reachable(src)) {
6315    Assembler::ldmxcsr(as_Address(src));
6316  } else {
6317    lea(rscratch1, src);
6318    Assembler::ldmxcsr(Address(rscratch1, 0));
6319  }
6320}
6321
6322int MacroAssembler::load_signed_byte(Register dst, Address src) {
6323  int off;
6324  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6325    off = offset();
6326    movsbl(dst, src); // movsxb
6327  } else {
6328    off = load_unsigned_byte(dst, src);
6329    shll(dst, 24);
6330    sarl(dst, 24);
6331  }
6332  return off;
6333}
6334
6335// Note: load_signed_short used to be called load_signed_word.
6336// Although the 'w' in x86 opcodes refers to the term "word" in the assembler
6337// manual, which means 16 bits, that usage is found nowhere in HotSpot code.
6338// The term "word" in HotSpot means a 32- or 64-bit machine word.
6339int MacroAssembler::load_signed_short(Register dst, Address src) {
6340  int off;
6341  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6342    // This is dubious to me since it seems safe to do a signed 16 => 64 bit
6343    // version but this is what 64bit has always done. This seems to imply
6344    // that users are only using 32bits worth.
6345    off = offset();
6346    movswl(dst, src); // movsxw
6347  } else {
6348    off = load_unsigned_short(dst, src);
6349    shll(dst, 16);
6350    sarl(dst, 16);
6351  }
6352  return off;
6353}
6354
6355int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
6356  // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6357  // and "3.9 Partial Register Penalties", p. 22).
6358  int off;
6359  if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
6360    off = offset();
6361    movzbl(dst, src); // movzxb
6362  } else {
6363    xorl(dst, dst);
6364    off = offset();
6365    movb(dst, src);
6366  }
6367  return off;
6368}
6369
6370// Note: load_unsigned_short used to be called load_unsigned_word.
6371int MacroAssembler::load_unsigned_short(Register dst, Address src) {
6372  // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6373  // and "3.9 Partial Register Penalties", p. 22).
6374  int off;
6375  if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
6376    off = offset();
6377    movzwl(dst, src); // movzxw
6378  } else {
6379    xorl(dst, dst);
6380    off = offset();
6381    movw(dst, src);
6382  }
6383  return off;
6384}
6385
6386void MacroAssembler::load_sized_value(Register dst, Address src,
6387                                      int size_in_bytes, bool is_signed) {
6388  switch (size_in_bytes ^ (is_signed ? -1 : 0)) {
6389#ifndef _LP64
6390  // For case 8, caller is responsible for manually loading
6391  // the second word into another register.
6392  case ~8:  // fall through:
6393  case  8:  movl(                dst, src ); break;
6394#else
6395  case ~8:  // fall through:
6396  case  8:  movq(                dst, src ); break;
6397#endif
6398  case ~4:  // fall through:
6399  case  4:  movl(                dst, src ); break;
6400  case ~2:  load_signed_short(   dst, src ); break;
6401  case  2:  load_unsigned_short( dst, src ); break;
6402  case ~1:  load_signed_byte(    dst, src ); break;
6403  case  1:  load_unsigned_byte(  dst, src ); break;
6404  default:  ShouldNotReachHere();
6405  }
6406}
6407
6408void MacroAssembler::mov32(AddressLiteral dst, Register src) {
6409  if (reachable(dst)) {
6410    movl(as_Address(dst), src);
6411  } else {
6412    lea(rscratch1, dst);
6413    movl(Address(rscratch1, 0), src);
6414  }
6415}
6416
6417void MacroAssembler::mov32(Register dst, AddressLiteral src) {
6418  if (reachable(src)) {
6419    movl(dst, as_Address(src));
6420  } else {
6421    lea(rscratch1, src);
6422    movl(dst, Address(rscratch1, 0));
6423  }
6424}
6425
6426// C++ bool manipulation
6427
6428void MacroAssembler::movbool(Register dst, Address src) {
6429  if(sizeof(bool) == 1)
6430    movb(dst, src);
6431  else if(sizeof(bool) == 2)
6432    movw(dst, src);
6433  else if(sizeof(bool) == 4)
6434    movl(dst, src);
6435  else
6436    // unsupported
6437    ShouldNotReachHere();
6438}
6439
6440void MacroAssembler::movbool(Address dst, bool boolconst) {
6441  if(sizeof(bool) == 1)
6442    movb(dst, (int) boolconst);
6443  else if(sizeof(bool) == 2)
6444    movw(dst, (int) boolconst);
6445  else if(sizeof(bool) == 4)
6446    movl(dst, (int) boolconst);
6447  else
6448    // unsupported
6449    ShouldNotReachHere();
6450}
6451
6452void MacroAssembler::movbool(Address dst, Register src) {
6453  if(sizeof(bool) == 1)
6454    movb(dst, src);
6455  else if(sizeof(bool) == 2)
6456    movw(dst, src);
6457  else if(sizeof(bool) == 4)
6458    movl(dst, src);
6459  else
6460    // unsupported
6461    ShouldNotReachHere();
6462}
6463
6464void MacroAssembler::movbyte(ArrayAddress dst, int src) {
6465  movb(as_Address(dst), src);
6466}
6467
6468void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
6469  if (reachable(src)) {
6470    if (UseXmmLoadAndClearUpper) {
6471      movsd (dst, as_Address(src));
6472    } else {
6473      movlpd(dst, as_Address(src));
6474    }
6475  } else {
6476    lea(rscratch1, src);
6477    if (UseXmmLoadAndClearUpper) {
6478      movsd (dst, Address(rscratch1, 0));
6479    } else {
6480      movlpd(dst, Address(rscratch1, 0));
6481    }
6482  }
6483}
6484
6485void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
6486  if (reachable(src)) {
6487    movss(dst, as_Address(src));
6488  } else {
6489    lea(rscratch1, src);
6490    movss(dst, Address(rscratch1, 0));
6491  }
6492}
6493
6494void MacroAssembler::movptr(Register dst, Register src) {
6495  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6496}
6497
6498void MacroAssembler::movptr(Register dst, Address src) {
6499  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6500}
6501
6502// src should NEVER be a real pointer. Use AddressLiteral for true pointers
6503void MacroAssembler::movptr(Register dst, intptr_t src) {
6504  LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
6505}
6506
6507void MacroAssembler::movptr(Address dst, Register src) {
6508  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6509}
6510
6511void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
6512  if (reachable(src)) {
6513    movss(dst, as_Address(src));
6514  } else {
6515    lea(rscratch1, src);
6516    movss(dst, Address(rscratch1, 0));
6517  }
6518}
6519
6520void MacroAssembler::null_check(Register reg, int offset) {
6521  if (needs_explicit_null_check(offset)) {
6522    // provoke OS NULL exception if reg = NULL by
6523    // accessing M[reg] w/o changing any (non-CC) registers
6524    // NOTE: cmpl is plenty here to provoke a segv
6525    cmpptr(rax, Address(reg, 0));
6526    // Note: should probably use testl(rax, Address(reg, 0));
6527    //       may be shorter code (however, this version of
6528    //       testl needs to be implemented first)
6529  } else {
6530    // nothing to do, (later) access of M[reg + offset]
6531    // will provoke OS NULL exception if reg = NULL
6532  }
6533}
6534
6535void MacroAssembler::os_breakpoint() {
6536  // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
6537  // (e.g., MSVC can't call ps() otherwise)
6538  call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
6539}
6540
6541void MacroAssembler::pop_CPU_state() {
6542  pop_FPU_state();
6543  pop_IU_state();
6544}
6545
6546void MacroAssembler::pop_FPU_state() {
6547  NOT_LP64(frstor(Address(rsp, 0));)
6548  LP64_ONLY(fxrstor(Address(rsp, 0));)
6549  addptr(rsp, FPUStateSizeInWords * wordSize);
6550}
6551
6552void MacroAssembler::pop_IU_state() {
6553  popa();
6554  LP64_ONLY(addq(rsp, 8));
6555  popf();
6556}
6557
6558// Save Integer and Float state
6559// Warning: Stack must be 16 byte aligned (64bit)
6560void MacroAssembler::push_CPU_state() {
6561  push_IU_state();
6562  push_FPU_state();
6563}
6564
6565void MacroAssembler::push_FPU_state() {
6566  subptr(rsp, FPUStateSizeInWords * wordSize);
6567#ifndef _LP64
6568  fnsave(Address(rsp, 0));
6569  fwait();
6570#else
6571  fxsave(Address(rsp, 0));
6572#endif // LP64
6573}
6574
6575void MacroAssembler::push_IU_state() {
6576  // Push flags first because pusha kills them
6577  pushf();
6578  // Make sure rsp stays 16-byte aligned
6579  LP64_ONLY(subq(rsp, 8));
6580  pusha();
6581}
6582
6583void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
6584  // determine java_thread register
6585  if (!java_thread->is_valid()) {
6586    java_thread = rdi;
6587    get_thread(java_thread);
6588  }
6589  // we must set sp to zero to clear frame
6590  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
6591  if (clear_fp) {
6592    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
6593  }
6594
6595  if (clear_pc)
6596    movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
6597
6598}
6599
6600void MacroAssembler::restore_rax(Register tmp) {
6601  if (tmp == noreg) pop(rax);
6602  else if (tmp != rax) mov(rax, tmp);
6603}
6604
6605void MacroAssembler::round_to(Register reg, int modulus) {
6606  addptr(reg, modulus - 1);
6607  andptr(reg, -modulus);
6608}
6609
6610void MacroAssembler::save_rax(Register tmp) {
6611  if (tmp == noreg) push(rax);
6612  else if (tmp != rax) mov(tmp, rax);
6613}
6614
6615// Write serialization page so VM thread can do a pseudo remote membar.
6616// We use the current thread pointer to calculate a thread specific
6617// offset to write to within the page. This minimizes bus traffic
6618// due to cache line collision.
6619void MacroAssembler::serialize_memory(Register thread, Register tmp) {
6620  movl(tmp, thread);
6621  shrl(tmp, os::get_serialize_page_shift_count());
6622  andl(tmp, (os::vm_page_size() - sizeof(int)));
6623
6624  Address index(noreg, tmp, Address::times_1);
6625  ExternalAddress page(os::get_memory_serialize_page());
6626
6627  // Size of store must match masking code above
6628  movl(as_Address(ArrayAddress(page, index)), tmp);
6629}
6630
6631// Calls to C land
6632//
6633// When entering C land, the rbp, & rsp of the last Java frame have to be recorded
6634// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
6635// has to be reset to 0. This is required to allow proper stack traversal.
6636void MacroAssembler::set_last_Java_frame(Register java_thread,
6637                                         Register last_java_sp,
6638                                         Register last_java_fp,
6639                                         address  last_java_pc) {
6640  // determine java_thread register
6641  if (!java_thread->is_valid()) {
6642    java_thread = rdi;
6643    get_thread(java_thread);
6644  }
6645  // determine last_java_sp register
6646  if (!last_java_sp->is_valid()) {
6647    last_java_sp = rsp;
6648  }
6649
6650  // last_java_fp is optional
6651
6652  if (last_java_fp->is_valid()) {
6653    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
6654  }
6655
6656  // last_java_pc is optional
6657
6658  if (last_java_pc != NULL) {
6659    lea(Address(java_thread,
6660                 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
6661        InternalAddress(last_java_pc));
6662
6663  }
6664  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
6665}
6666
6667void MacroAssembler::shlptr(Register dst, int imm8) {
6668  LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
6669}
6670
6671void MacroAssembler::shrptr(Register dst, int imm8) {
6672  LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
6673}
6674
6675void MacroAssembler::sign_extend_byte(Register reg) {
6676  if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
6677    movsbl(reg, reg); // movsxb
6678  } else {
6679    shll(reg, 24);
6680    sarl(reg, 24);
6681  }
6682}
6683
6684void MacroAssembler::sign_extend_short(Register reg) {
6685  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6686    movswl(reg, reg); // movsxw
6687  } else {
6688    shll(reg, 16);
6689    sarl(reg, 16);
6690  }
6691}
6692
6693//////////////////////////////////////////////////////////////////////////////////
6694#ifndef SERIALGC
6695
6696void MacroAssembler::g1_write_barrier_pre(Register obj,
6697#ifndef _LP64
6698                                          Register thread,
6699#endif
6700                                          Register tmp,
6701                                          Register tmp2,
6702                                          bool tosca_live) {
6703  LP64_ONLY(Register thread = r15_thread;)
6704  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6705                                       PtrQueue::byte_offset_of_active()));
6706
6707  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6708                                       PtrQueue::byte_offset_of_index()));
6709  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6710                                       PtrQueue::byte_offset_of_buf()));
6711
6712
6713  Label done;
6714  Label runtime;
6715
6716  // if (!marking_in_progress) goto done;
6717  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
6718    cmpl(in_progress, 0);
6719  } else {
6720    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
6721    cmpb(in_progress, 0);
6722  }
6723  jcc(Assembler::equal, done);
6724
6725  // if (x.f == NULL) goto done;
6726  cmpptr(Address(obj, 0), NULL_WORD);
6727  jcc(Assembler::equal, done);
6728
6729  // Can we store original value in the thread's buffer?
6730
6731  LP64_ONLY(movslq(tmp, index);)
6732  movptr(tmp2, Address(obj, 0));
6733#ifdef _LP64
6734  cmpq(tmp, 0);
6735#else
6736  cmpl(index, 0);
6737#endif
6738  jcc(Assembler::equal, runtime);
6739#ifdef _LP64
6740  subq(tmp, wordSize);
6741  movl(index, tmp);
6742  addq(tmp, buffer);
6743#else
6744  subl(index, wordSize);
6745  movl(tmp, buffer);
6746  addl(tmp, index);
6747#endif
6748  movptr(Address(tmp, 0), tmp2);
6749  jmp(done);
6750  bind(runtime);
6751  // save the live input values
6752  if(tosca_live) push(rax);
6753  push(obj);
6754#ifdef _LP64
6755  movq(c_rarg0, Address(obj, 0));
6756  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
6757#else
6758  push(thread);
6759  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
6760  pop(thread);
6761#endif
6762  pop(obj);
6763  if(tosca_live) pop(rax);
6764  bind(done);
6765
6766}
6767
6768void MacroAssembler::g1_write_barrier_post(Register store_addr,
6769                                           Register new_val,
6770#ifndef _LP64
6771                                           Register thread,
6772#endif
6773                                           Register tmp,
6774                                           Register tmp2) {
6775
6776  LP64_ONLY(Register thread = r15_thread;)
6777  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6778                                       PtrQueue::byte_offset_of_index()));
6779  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6780                                       PtrQueue::byte_offset_of_buf()));
6781  BarrierSet* bs = Universe::heap()->barrier_set();
6782  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
6783  Label done;
6784  Label runtime;
6785
6786  // Does store cross heap regions?
6787
6788  movptr(tmp, store_addr);
6789  xorptr(tmp, new_val);
6790  shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
6791  jcc(Assembler::equal, done);
6792
6793  // crosses regions, storing NULL?
6794
6795  cmpptr(new_val, (int32_t) NULL_WORD);
6796  jcc(Assembler::equal, done);
6797
6798  // storing region crossing non-NULL, is card already dirty?
6799
6800  ExternalAddress cardtable((address) ct->byte_map_base);
6801  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
6802#ifdef _LP64
6803  const Register card_addr = tmp;
6804
6805  movq(card_addr, store_addr);
6806  shrq(card_addr, CardTableModRefBS::card_shift);
6807
6808  lea(tmp2, cardtable);
6809
6810  // get the address of the card
6811  addq(card_addr, tmp2);
6812#else
6813  const Register card_index = tmp;
6814
6815  movl(card_index, store_addr);
6816  shrl(card_index, CardTableModRefBS::card_shift);
6817
6818  Address index(noreg, card_index, Address::times_1);
6819  const Register card_addr = tmp;
6820  lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
6821#endif
6822  cmpb(Address(card_addr, 0), 0);
6823  jcc(Assembler::equal, done);
6824
6825  // storing a region crossing, non-NULL oop, card is clean.
6826  // dirty card and log.
6827
6828  movb(Address(card_addr, 0), 0);
6829
6830  cmpl(queue_index, 0);
6831  jcc(Assembler::equal, runtime);
6832  subl(queue_index, wordSize);
6833  movptr(tmp2, buffer);
6834#ifdef _LP64
6835  movslq(rscratch1, queue_index);
6836  addq(tmp2, rscratch1);
6837  movq(Address(tmp2, 0), card_addr);
6838#else
6839  addl(tmp2, queue_index);
6840  movl(Address(tmp2, 0), card_index);
6841#endif
6842  jmp(done);
6843
6844  bind(runtime);
6845  // save the live input values
6846  push(store_addr);
6847  push(new_val);
6848#ifdef _LP64
6849  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
6850#else
6851  push(thread);
6852  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
6853  pop(thread);
6854#endif
6855  pop(new_val);
6856  pop(store_addr);
6857
6858  bind(done);
6859
6860}
6861
6862#endif // SERIALGC
6863//////////////////////////////////////////////////////////////////////////////////
6864
6865
6866void MacroAssembler::store_check(Register obj) {
6867  // Does a store check for the oop in register obj. The content of
6868  // register obj is destroyed afterwards.
6869  store_check_part_1(obj);
6870  store_check_part_2(obj);
6871}
6872
6873void MacroAssembler::store_check(Register obj, Address dst) {
6874  store_check(obj);
6875}
6876
6877
6878// split the store check operation so that other instructions can be scheduled inbetween
6879void MacroAssembler::store_check_part_1(Register obj) {
6880  BarrierSet* bs = Universe::heap()->barrier_set();
6881  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
6882  shrptr(obj, CardTableModRefBS::card_shift);
6883}
6884
6885void MacroAssembler::store_check_part_2(Register obj) {
6886  BarrierSet* bs = Universe::heap()->barrier_set();
6887  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
6888  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
6889  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
6890
6891  // The calculation for byte_map_base is as follows:
6892  // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
6893  // So this essentially converts an address to a displacement and
6894  // it will never need to be relocated. On 64bit however the value may be too
6895  // large for a 32bit displacement
6896
6897  intptr_t disp = (intptr_t) ct->byte_map_base;
6898  if (is_simm32(disp)) {
6899    Address cardtable(noreg, obj, Address::times_1, disp);
6900    movb(cardtable, 0);
6901  } else {
6902    // By doing it as an ExternalAddress disp could be converted to a rip-relative
6903    // displacement and done in a single instruction given favorable mapping and
6904    // a smarter version of as_Address. Worst case it is two instructions which
6905    // is no worse off then loading disp into a register and doing as a simple
6906    // Address() as above.
6907    // We can't do as ExternalAddress as the only style since if disp == 0 we'll
6908    // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
6909    // in some cases we'll get a single instruction version.
6910
6911    ExternalAddress cardtable((address)disp);
6912    Address index(noreg, obj, Address::times_1);
6913    movb(as_Address(ArrayAddress(cardtable, index)), 0);
6914  }
6915}
6916
6917void MacroAssembler::subptr(Register dst, int32_t imm32) {
6918  LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
6919}
6920
6921void MacroAssembler::subptr(Register dst, Register src) {
6922  LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
6923}
6924
6925void MacroAssembler::test32(Register src1, AddressLiteral src2) {
6926  // src2 must be rval
6927
6928  if (reachable(src2)) {
6929    testl(src1, as_Address(src2));
6930  } else {
6931    lea(rscratch1, src2);
6932    testl(src1, Address(rscratch1, 0));
6933  }
6934}
6935
6936// C++ bool manipulation
6937void MacroAssembler::testbool(Register dst) {
6938  if(sizeof(bool) == 1)
6939    testb(dst, 0xff);
6940  else if(sizeof(bool) == 2) {
6941    // testw implementation needed for two byte bools
6942    ShouldNotReachHere();
6943  } else if(sizeof(bool) == 4)
6944    testl(dst, dst);
6945  else
6946    // unsupported
6947    ShouldNotReachHere();
6948}
6949
6950void MacroAssembler::testptr(Register dst, Register src) {
6951  LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
6952}
6953
6954// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
6955void MacroAssembler::tlab_allocate(Register obj,
6956                                   Register var_size_in_bytes,
6957                                   int con_size_in_bytes,
6958                                   Register t1,
6959                                   Register t2,
6960                                   Label& slow_case) {
6961  assert_different_registers(obj, t1, t2);
6962  assert_different_registers(obj, var_size_in_bytes, t1);
6963  Register end = t2;
6964  Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
6965
6966  verify_tlab();
6967
6968  NOT_LP64(get_thread(thread));
6969
6970  movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
6971  if (var_size_in_bytes == noreg) {
6972    lea(end, Address(obj, con_size_in_bytes));
6973  } else {
6974    lea(end, Address(obj, var_size_in_bytes, Address::times_1));
6975  }
6976  cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
6977  jcc(Assembler::above, slow_case);
6978
6979  // update the tlab top pointer
6980  movptr(Address(thread, JavaThread::tlab_top_offset()), end);
6981
6982  // recover var_size_in_bytes if necessary
6983  if (var_size_in_bytes == end) {
6984    subptr(var_size_in_bytes, obj);
6985  }
6986  verify_tlab();
6987}
6988
6989// Preserves rbx, and rdx.
6990void MacroAssembler::tlab_refill(Label& retry,
6991                                 Label& try_eden,
6992                                 Label& slow_case) {
6993  Register top = rax;
6994  Register t1  = rcx;
6995  Register t2  = rsi;
6996  Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
6997  assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
6998  Label do_refill, discard_tlab;
6999
7000  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
7001    // No allocation in the shared eden.
7002    jmp(slow_case);
7003  }
7004
7005  NOT_LP64(get_thread(thread_reg));
7006
7007  movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7008  movptr(t1,  Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7009
7010  // calculate amount of free space
7011  subptr(t1, top);
7012  shrptr(t1, LogHeapWordSize);
7013
7014  // Retain tlab and allocate object in shared space if
7015  // the amount free in the tlab is too large to discard.
7016  cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
7017  jcc(Assembler::lessEqual, discard_tlab);
7018
7019  // Retain
7020  // %%% yuck as movptr...
7021  movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
7022  addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
7023  if (TLABStats) {
7024    // increment number of slow_allocations
7025    addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
7026  }
7027  jmp(try_eden);
7028
7029  bind(discard_tlab);
7030  if (TLABStats) {
7031    // increment number of refills
7032    addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
7033    // accumulate wastage -- t1 is amount free in tlab
7034    addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
7035  }
7036
7037  // if tlab is currently allocated (top or end != null) then
7038  // fill [top, end + alignment_reserve) with array object
7039  testptr (top, top);
7040  jcc(Assembler::zero, do_refill);
7041
7042  // set up the mark word
7043  movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
7044  // set the length to the remaining space
7045  subptr(t1, typeArrayOopDesc::header_size(T_INT));
7046  addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
7047  shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
7048  movptr(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
7049  // set klass to intArrayKlass
7050  // dubious reloc why not an oop reloc?
7051  movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr()));
7052  // store klass last.  concurrent gcs assumes klass length is valid if
7053  // klass field is not null.
7054  store_klass(top, t1);
7055
7056  // refill the tlab with an eden allocation
7057  bind(do_refill);
7058  movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
7059  shlptr(t1, LogHeapWordSize);
7060  // add object_size ??
7061  eden_allocate(top, t1, 0, t2, slow_case);
7062
7063  // Check that t1 was preserved in eden_allocate.
7064#ifdef ASSERT
7065  if (UseTLAB) {
7066    Label ok;
7067    Register tsize = rsi;
7068    assert_different_registers(tsize, thread_reg, t1);
7069    push(tsize);
7070    movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
7071    shlptr(tsize, LogHeapWordSize);
7072    cmpptr(t1, tsize);
7073    jcc(Assembler::equal, ok);
7074    stop("assert(t1 != tlab size)");
7075    should_not_reach_here();
7076
7077    bind(ok);
7078    pop(tsize);
7079  }
7080#endif
7081  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
7082  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
7083  addptr(top, t1);
7084  subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
7085  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
7086  verify_tlab();
7087  jmp(retry);
7088}
7089
7090static const double     pi_4 =  0.7853981633974483;
7091
7092void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
7093  // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
7094  // was attempted in this code; unfortunately it appears that the
7095  // switch to 80-bit precision and back causes this to be
7096  // unprofitable compared with simply performing a runtime call if
7097  // the argument is out of the (-pi/4, pi/4) range.
7098
7099  Register tmp = noreg;
7100  if (!VM_Version::supports_cmov()) {
7101    // fcmp needs a temporary so preserve rbx,
7102    tmp = rbx;
7103    push(tmp);
7104  }
7105
7106  Label slow_case, done;
7107
7108  ExternalAddress pi4_adr = (address)&pi_4;
7109  if (reachable(pi4_adr)) {
7110    // x ?<= pi/4
7111    fld_d(pi4_adr);
7112    fld_s(1);                // Stack:  X  PI/4  X
7113    fabs();                  // Stack: |X| PI/4  X
7114    fcmp(tmp);
7115    jcc(Assembler::above, slow_case);
7116
7117    // fastest case: -pi/4 <= x <= pi/4
7118    switch(trig) {
7119    case 's':
7120      fsin();
7121      break;
7122    case 'c':
7123      fcos();
7124      break;
7125    case 't':
7126      ftan();
7127      break;
7128    default:
7129      assert(false, "bad intrinsic");
7130      break;
7131    }
7132    jmp(done);
7133  }
7134
7135  // slow case: runtime call
7136  bind(slow_case);
7137  // Preserve registers across runtime call
7138  pusha();
7139  int incoming_argument_and_return_value_offset = -1;
7140  if (num_fpu_regs_in_use > 1) {
7141    // Must preserve all other FPU regs (could alternatively convert
7142    // SharedRuntime::dsin and dcos into assembly routines known not to trash
7143    // FPU state, but can not trust C compiler)
7144    NEEDS_CLEANUP;
7145    // NOTE that in this case we also push the incoming argument to
7146    // the stack and restore it later; we also use this stack slot to
7147    // hold the return value from dsin or dcos.
7148    for (int i = 0; i < num_fpu_regs_in_use; i++) {
7149      subptr(rsp, sizeof(jdouble));
7150      fstp_d(Address(rsp, 0));
7151    }
7152    incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
7153    fld_d(Address(rsp, incoming_argument_and_return_value_offset));
7154  }
7155  subptr(rsp, sizeof(jdouble));
7156  fstp_d(Address(rsp, 0));
7157#ifdef _LP64
7158  movdbl(xmm0, Address(rsp, 0));
7159#endif // _LP64
7160
7161  // NOTE: we must not use call_VM_leaf here because that requires a
7162  // complete interpreter frame in debug mode -- same bug as 4387334
7163  // MacroAssembler::call_VM_leaf_base is perfectly safe and will
7164  // do proper 64bit abi
7165
7166  NEEDS_CLEANUP;
7167  // Need to add stack banging before this runtime call if it needs to
7168  // be taken; however, there is no generic stack banging routine at
7169  // the MacroAssembler level
7170  switch(trig) {
7171  case 's':
7172    {
7173      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);
7174    }
7175    break;
7176  case 'c':
7177    {
7178      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);
7179    }
7180    break;
7181  case 't':
7182    {
7183      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);
7184    }
7185    break;
7186  default:
7187    assert(false, "bad intrinsic");
7188    break;
7189  }
7190#ifdef _LP64
7191    movsd(Address(rsp, 0), xmm0);
7192    fld_d(Address(rsp, 0));
7193#endif // _LP64
7194  addptr(rsp, sizeof(jdouble));
7195  if (num_fpu_regs_in_use > 1) {
7196    // Must save return value to stack and then restore entire FPU stack
7197    fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
7198    for (int i = 0; i < num_fpu_regs_in_use; i++) {
7199      fld_d(Address(rsp, 0));
7200      addptr(rsp, sizeof(jdouble));
7201    }
7202  }
7203  popa();
7204
7205  // Come here with result in F-TOS
7206  bind(done);
7207
7208  if (tmp != noreg) {
7209    pop(tmp);
7210  }
7211}
7212
7213
7214// Look up the method for a megamorphic invokeinterface call.
7215// The target method is determined by <intf_klass, itable_index>.
7216// The receiver klass is in recv_klass.
7217// On success, the result will be in method_result, and execution falls through.
7218// On failure, execution transfers to the given label.
7219void MacroAssembler::lookup_interface_method(Register recv_klass,
7220                                             Register intf_klass,
7221                                             RegisterConstant itable_index,
7222                                             Register method_result,
7223                                             Register scan_temp,
7224                                             Label& L_no_such_interface) {
7225  assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
7226  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
7227         "caller must use same register for non-constant itable index as for method");
7228
7229  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
7230  int vtable_base = instanceKlass::vtable_start_offset() * wordSize;
7231  int itentry_off = itableMethodEntry::method_offset_in_bytes();
7232  int scan_step   = itableOffsetEntry::size() * wordSize;
7233  int vte_size    = vtableEntry::size() * wordSize;
7234  Address::ScaleFactor times_vte_scale = Address::times_ptr;
7235  assert(vte_size == wordSize, "else adjust times_vte_scale");
7236
7237  movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize));
7238
7239  // %%% Could store the aligned, prescaled offset in the klassoop.
7240  lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
7241  if (HeapWordsPerLong > 1) {
7242    // Round up to align_object_offset boundary
7243    // see code for instanceKlass::start_of_itable!
7244    round_to(scan_temp, BytesPerLong);
7245  }
7246
7247  // Adjust recv_klass by scaled itable_index, so we can free itable_index.
7248  assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
7249  lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
7250
7251  // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
7252  //   if (scan->interface() == intf) {
7253  //     result = (klass + scan->offset() + itable_index);
7254  //   }
7255  // }
7256  Label search, found_method;
7257
7258  for (int peel = 1; peel >= 0; peel--) {
7259    movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
7260    cmpptr(intf_klass, method_result);
7261
7262    if (peel) {
7263      jccb(Assembler::equal, found_method);
7264    } else {
7265      jccb(Assembler::notEqual, search);
7266      // (invert the test to fall through to found_method...)
7267    }
7268
7269    if (!peel)  break;
7270
7271    bind(search);
7272
7273    // Check that the previous entry is non-null.  A null entry means that
7274    // the receiver class doesn't implement the interface, and wasn't the
7275    // same as when the caller was compiled.
7276    testptr(method_result, method_result);
7277    jcc(Assembler::zero, L_no_such_interface);
7278    addptr(scan_temp, scan_step);
7279  }
7280
7281  bind(found_method);
7282
7283  // Got a hit.
7284  movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
7285  movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
7286}
7287
7288
7289void MacroAssembler::check_klass_subtype(Register sub_klass,
7290                           Register super_klass,
7291                           Register temp_reg,
7292                           Label& L_success) {
7293  Label L_failure;
7294  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
7295  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
7296  bind(L_failure);
7297}
7298
7299
7300void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
7301                                                   Register super_klass,
7302                                                   Register temp_reg,
7303                                                   Label* L_success,
7304                                                   Label* L_failure,
7305                                                   Label* L_slow_path,
7306                                        RegisterConstant super_check_offset) {
7307  assert_different_registers(sub_klass, super_klass, temp_reg);
7308  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
7309  if (super_check_offset.is_register()) {
7310    assert_different_registers(sub_klass, super_klass,
7311                               super_check_offset.as_register());
7312  } else if (must_load_sco) {
7313    assert(temp_reg != noreg, "supply either a temp or a register offset");
7314  }
7315
7316  Label L_fallthrough;
7317  int label_nulls = 0;
7318  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
7319  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
7320  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
7321  assert(label_nulls <= 1, "at most one NULL in the batch");
7322
7323  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
7324                   Klass::secondary_super_cache_offset_in_bytes());
7325  int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
7326                    Klass::super_check_offset_offset_in_bytes());
7327  Address super_check_offset_addr(super_klass, sco_offset);
7328
7329  // Hacked jcc, which "knows" that L_fallthrough, at least, is in
7330  // range of a jccb.  If this routine grows larger, reconsider at
7331  // least some of these.
7332#define local_jcc(assembler_cond, label)                                \
7333  if (&(label) == &L_fallthrough)  jccb(assembler_cond, label);         \
7334  else                             jcc( assembler_cond, label) /*omit semi*/
7335
7336  // Hacked jmp, which may only be used just before L_fallthrough.
7337#define final_jmp(label)                                                \
7338  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
7339  else                            jmp(label)                /*omit semi*/
7340
7341  // If the pointers are equal, we are done (e.g., String[] elements).
7342  // This self-check enables sharing of secondary supertype arrays among
7343  // non-primary types such as array-of-interface.  Otherwise, each such
7344  // type would need its own customized SSA.
7345  // We move this check to the front of the fast path because many
7346  // type checks are in fact trivially successful in this manner,
7347  // so we get a nicely predicted branch right at the start of the check.
7348  cmpptr(sub_klass, super_klass);
7349  local_jcc(Assembler::equal, *L_success);
7350
7351  // Check the supertype display:
7352  if (must_load_sco) {
7353    // Positive movl does right thing on LP64.
7354    movl(temp_reg, super_check_offset_addr);
7355    super_check_offset = RegisterConstant(temp_reg);
7356  }
7357  Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
7358  cmpptr(super_klass, super_check_addr); // load displayed supertype
7359
7360  // This check has worked decisively for primary supers.
7361  // Secondary supers are sought in the super_cache ('super_cache_addr').
7362  // (Secondary supers are interfaces and very deeply nested subtypes.)
7363  // This works in the same check above because of a tricky aliasing
7364  // between the super_cache and the primary super display elements.
7365  // (The 'super_check_addr' can address either, as the case requires.)
7366  // Note that the cache is updated below if it does not help us find
7367  // what we need immediately.
7368  // So if it was a primary super, we can just fail immediately.
7369  // Otherwise, it's the slow path for us (no success at this point).
7370
7371  if (super_check_offset.is_register()) {
7372    local_jcc(Assembler::equal, *L_success);
7373    cmpl(super_check_offset.as_register(), sc_offset);
7374    if (L_failure == &L_fallthrough) {
7375      local_jcc(Assembler::equal, *L_slow_path);
7376    } else {
7377      local_jcc(Assembler::notEqual, *L_failure);
7378      final_jmp(*L_slow_path);
7379    }
7380  } else if (super_check_offset.as_constant() == sc_offset) {
7381    // Need a slow path; fast failure is impossible.
7382    if (L_slow_path == &L_fallthrough) {
7383      local_jcc(Assembler::equal, *L_success);
7384    } else {
7385      local_jcc(Assembler::notEqual, *L_slow_path);
7386      final_jmp(*L_success);
7387    }
7388  } else {
7389    // No slow path; it's a fast decision.
7390    if (L_failure == &L_fallthrough) {
7391      local_jcc(Assembler::equal, *L_success);
7392    } else {
7393      local_jcc(Assembler::notEqual, *L_failure);
7394      final_jmp(*L_success);
7395    }
7396  }
7397
7398  bind(L_fallthrough);
7399
7400#undef local_jcc
7401#undef final_jmp
7402}
7403
7404
7405void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
7406                                                   Register super_klass,
7407                                                   Register temp_reg,
7408                                                   Register temp2_reg,
7409                                                   Label* L_success,
7410                                                   Label* L_failure,
7411                                                   bool set_cond_codes) {
7412  assert_different_registers(sub_klass, super_klass, temp_reg);
7413  if (temp2_reg != noreg)
7414    assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
7415#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
7416
7417  Label L_fallthrough;
7418  int label_nulls = 0;
7419  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
7420  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
7421  assert(label_nulls <= 1, "at most one NULL in the batch");
7422
7423  // a couple of useful fields in sub_klass:
7424  int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
7425                   Klass::secondary_supers_offset_in_bytes());
7426  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
7427                   Klass::secondary_super_cache_offset_in_bytes());
7428  Address secondary_supers_addr(sub_klass, ss_offset);
7429  Address super_cache_addr(     sub_klass, sc_offset);
7430
7431  // Do a linear scan of the secondary super-klass chain.
7432  // This code is rarely used, so simplicity is a virtue here.
7433  // The repne_scan instruction uses fixed registers, which we must spill.
7434  // Don't worry too much about pre-existing connections with the input regs.
7435
7436  assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
7437  assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
7438
7439  // Get super_klass value into rax (even if it was in rdi or rcx).
7440  bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
7441  if (super_klass != rax || UseCompressedOops) {
7442    if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
7443    mov(rax, super_klass);
7444  }
7445  if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
7446  if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
7447
7448#ifndef PRODUCT
7449  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
7450  ExternalAddress pst_counter_addr((address) pst_counter);
7451  NOT_LP64(  incrementl(pst_counter_addr) );
7452  LP64_ONLY( lea(rcx, pst_counter_addr) );
7453  LP64_ONLY( incrementl(Address(rcx, 0)) );
7454#endif //PRODUCT
7455
7456  // We will consult the secondary-super array.
7457  movptr(rdi, secondary_supers_addr);
7458  // Load the array length.  (Positive movl does right thing on LP64.)
7459  movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
7460  // Skip to start of data.
7461  addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
7462
7463  // Scan RCX words at [RDI] for an occurrence of RAX.
7464  // Set NZ/Z based on last compare.
7465#ifdef _LP64
7466  // This part is tricky, as values in supers array could be 32 or 64 bit wide
7467  // and we store values in objArrays always encoded, thus we need to encode
7468  // the value of rax before repne.  Note that rax is dead after the repne.
7469  if (UseCompressedOops) {
7470    encode_heap_oop_not_null(rax);
7471    // The superclass is never null; it would be a basic system error if a null
7472    // pointer were to sneak in here.  Note that we have already loaded the
7473    // Klass::super_check_offset from the super_klass in the fast path,
7474    // so if there is a null in that register, we are already in the afterlife.
7475    repne_scanl();
7476  } else
7477#endif // _LP64
7478    repne_scan();
7479
7480  // Unspill the temp. registers:
7481  if (pushed_rdi)  pop(rdi);
7482  if (pushed_rcx)  pop(rcx);
7483  if (pushed_rax)  pop(rax);
7484
7485  if (set_cond_codes) {
7486    // Special hack for the AD files:  rdi is guaranteed non-zero.
7487    assert(!pushed_rdi, "rdi must be left non-NULL");
7488    // Also, the condition codes are properly set Z/NZ on succeed/failure.
7489  }
7490
7491  if (L_failure == &L_fallthrough)
7492        jccb(Assembler::notEqual, *L_failure);
7493  else  jcc(Assembler::notEqual, *L_failure);
7494
7495  // Success.  Cache the super we found and proceed in triumph.
7496  movptr(super_cache_addr, super_klass);
7497
7498  if (L_success != &L_fallthrough) {
7499    jmp(*L_success);
7500  }
7501
7502#undef IS_A_TEMP
7503
7504  bind(L_fallthrough);
7505}
7506
7507
7508void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
7509  ucomisd(dst, as_Address(src));
7510}
7511
7512void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
7513  ucomiss(dst, as_Address(src));
7514}
7515
7516void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
7517  if (reachable(src)) {
7518    xorpd(dst, as_Address(src));
7519  } else {
7520    lea(rscratch1, src);
7521    xorpd(dst, Address(rscratch1, 0));
7522  }
7523}
7524
7525void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
7526  if (reachable(src)) {
7527    xorps(dst, as_Address(src));
7528  } else {
7529    lea(rscratch1, src);
7530    xorps(dst, Address(rscratch1, 0));
7531  }
7532}
7533
7534void MacroAssembler::verify_oop(Register reg, const char* s) {
7535  if (!VerifyOops) return;
7536
7537  // Pass register number to verify_oop_subroutine
7538  char* b = new char[strlen(s) + 50];
7539  sprintf(b, "verify_oop: %s: %s", reg->name(), s);
7540  push(rax);                          // save rax,
7541  push(reg);                          // pass register argument
7542  ExternalAddress buffer((address) b);
7543  // avoid using pushptr, as it modifies scratch registers
7544  // and our contract is not to modify anything
7545  movptr(rax, buffer.addr());
7546  push(rax);
7547  // call indirectly to solve generation ordering problem
7548  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7549  call(rax);
7550}
7551
7552
7553RegisterConstant MacroAssembler::delayed_value(intptr_t* delayed_value_addr,
7554                                               Register tmp,
7555                                               int offset) {
7556  intptr_t value = *delayed_value_addr;
7557  if (value != 0)
7558    return RegisterConstant(value + offset);
7559
7560  // load indirectly to solve generation ordering problem
7561  movptr(tmp, ExternalAddress((address) delayed_value_addr));
7562
7563#ifdef ASSERT
7564  Label L;
7565  testl(tmp, tmp);
7566  jccb(Assembler::notZero, L);
7567  hlt();
7568  bind(L);
7569#endif
7570
7571  if (offset != 0)
7572    addptr(tmp, offset);
7573
7574  return RegisterConstant(tmp);
7575}
7576
7577
7578void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
7579  if (!VerifyOops) return;
7580
7581  // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
7582  // Pass register number to verify_oop_subroutine
7583  char* b = new char[strlen(s) + 50];
7584  sprintf(b, "verify_oop_addr: %s", s);
7585
7586  push(rax);                          // save rax,
7587  // addr may contain rsp so we will have to adjust it based on the push
7588  // we just did
7589  // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
7590  // stores rax into addr which is backwards of what was intended.
7591  if (addr.uses(rsp)) {
7592    lea(rax, addr);
7593    pushptr(Address(rax, BytesPerWord));
7594  } else {
7595    pushptr(addr);
7596  }
7597
7598  ExternalAddress buffer((address) b);
7599  // pass msg argument
7600  // avoid using pushptr, as it modifies scratch registers
7601  // and our contract is not to modify anything
7602  movptr(rax, buffer.addr());
7603  push(rax);
7604
7605  // call indirectly to solve generation ordering problem
7606  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7607  call(rax);
7608  // Caller pops the arguments and restores rax, from the stack
7609}
7610
7611void MacroAssembler::verify_tlab() {
7612#ifdef ASSERT
7613  if (UseTLAB && VerifyOops) {
7614    Label next, ok;
7615    Register t1 = rsi;
7616    Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
7617
7618    push(t1);
7619    NOT_LP64(push(thread_reg));
7620    NOT_LP64(get_thread(thread_reg));
7621
7622    movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7623    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
7624    jcc(Assembler::aboveEqual, next);
7625    stop("assert(top >= start)");
7626    should_not_reach_here();
7627
7628    bind(next);
7629    movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7630    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7631    jcc(Assembler::aboveEqual, ok);
7632    stop("assert(top <= end)");
7633    should_not_reach_here();
7634
7635    bind(ok);
7636    NOT_LP64(pop(thread_reg));
7637    pop(t1);
7638  }
7639#endif
7640}
7641
7642class ControlWord {
7643 public:
7644  int32_t _value;
7645
7646  int  rounding_control() const        { return  (_value >> 10) & 3      ; }
7647  int  precision_control() const       { return  (_value >>  8) & 3      ; }
7648  bool precision() const               { return ((_value >>  5) & 1) != 0; }
7649  bool underflow() const               { return ((_value >>  4) & 1) != 0; }
7650  bool overflow() const                { return ((_value >>  3) & 1) != 0; }
7651  bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
7652  bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
7653  bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
7654
7655  void print() const {
7656    // rounding control
7657    const char* rc;
7658    switch (rounding_control()) {
7659      case 0: rc = "round near"; break;
7660      case 1: rc = "round down"; break;
7661      case 2: rc = "round up  "; break;
7662      case 3: rc = "chop      "; break;
7663    };
7664    // precision control
7665    const char* pc;
7666    switch (precision_control()) {
7667      case 0: pc = "24 bits "; break;
7668      case 1: pc = "reserved"; break;
7669      case 2: pc = "53 bits "; break;
7670      case 3: pc = "64 bits "; break;
7671    };
7672    // flags
7673    char f[9];
7674    f[0] = ' ';
7675    f[1] = ' ';
7676    f[2] = (precision   ()) ? 'P' : 'p';
7677    f[3] = (underflow   ()) ? 'U' : 'u';
7678    f[4] = (overflow    ()) ? 'O' : 'o';
7679    f[5] = (zero_divide ()) ? 'Z' : 'z';
7680    f[6] = (denormalized()) ? 'D' : 'd';
7681    f[7] = (invalid     ()) ? 'I' : 'i';
7682    f[8] = '\x0';
7683    // output
7684    printf("%04x  masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
7685  }
7686
7687};
7688
7689class StatusWord {
7690 public:
7691  int32_t _value;
7692
7693  bool busy() const                    { return ((_value >> 15) & 1) != 0; }
7694  bool C3() const                      { return ((_value >> 14) & 1) != 0; }
7695  bool C2() const                      { return ((_value >> 10) & 1) != 0; }
7696  bool C1() const                      { return ((_value >>  9) & 1) != 0; }
7697  bool C0() const                      { return ((_value >>  8) & 1) != 0; }
7698  int  top() const                     { return  (_value >> 11) & 7      ; }
7699  bool error_status() const            { return ((_value >>  7) & 1) != 0; }
7700  bool stack_fault() const             { return ((_value >>  6) & 1) != 0; }
7701  bool precision() const               { return ((_value >>  5) & 1) != 0; }
7702  bool underflow() const               { return ((_value >>  4) & 1) != 0; }
7703  bool overflow() const                { return ((_value >>  3) & 1) != 0; }
7704  bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
7705  bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
7706  bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
7707
7708  void print() const {
7709    // condition codes
7710    char c[5];
7711    c[0] = (C3()) ? '3' : '-';
7712    c[1] = (C2()) ? '2' : '-';
7713    c[2] = (C1()) ? '1' : '-';
7714    c[3] = (C0()) ? '0' : '-';
7715    c[4] = '\x0';
7716    // flags
7717    char f[9];
7718    f[0] = (error_status()) ? 'E' : '-';
7719    f[1] = (stack_fault ()) ? 'S' : '-';
7720    f[2] = (precision   ()) ? 'P' : '-';
7721    f[3] = (underflow   ()) ? 'U' : '-';
7722    f[4] = (overflow    ()) ? 'O' : '-';
7723    f[5] = (zero_divide ()) ? 'Z' : '-';
7724    f[6] = (denormalized()) ? 'D' : '-';
7725    f[7] = (invalid     ()) ? 'I' : '-';
7726    f[8] = '\x0';
7727    // output
7728    printf("%04x  flags = %s, cc =  %s, top = %d", _value & 0xFFFF, f, c, top());
7729  }
7730
7731};
7732
7733class TagWord {
7734 public:
7735  int32_t _value;
7736
7737  int tag_at(int i) const              { return (_value >> (i*2)) & 3; }
7738
7739  void print() const {
7740    printf("%04x", _value & 0xFFFF);
7741  }
7742
7743};
7744
7745class FPU_Register {
7746 public:
7747  int32_t _m0;
7748  int32_t _m1;
7749  int16_t _ex;
7750
7751  bool is_indefinite() const           {
7752    return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
7753  }
7754
7755  void print() const {
7756    char  sign = (_ex < 0) ? '-' : '+';
7757    const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : "   ";
7758    printf("%c%04hx.%08x%08x  %s", sign, _ex, _m1, _m0, kind);
7759  };
7760
7761};
7762
7763class FPU_State {
7764 public:
7765  enum {
7766    register_size       = 10,
7767    number_of_registers =  8,
7768    register_mask       =  7
7769  };
7770
7771  ControlWord  _control_word;
7772  StatusWord   _status_word;
7773  TagWord      _tag_word;
7774  int32_t      _error_offset;
7775  int32_t      _error_selector;
7776  int32_t      _data_offset;
7777  int32_t      _data_selector;
7778  int8_t       _register[register_size * number_of_registers];
7779
7780  int tag_for_st(int i) const          { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
7781  FPU_Register* st(int i) const        { return (FPU_Register*)&_register[register_size * i]; }
7782
7783  const char* tag_as_string(int tag) const {
7784    switch (tag) {
7785      case 0: return "valid";
7786      case 1: return "zero";
7787      case 2: return "special";
7788      case 3: return "empty";
7789    }
7790    ShouldNotReachHere()
7791    return NULL;
7792  }
7793
7794  void print() const {
7795    // print computation registers
7796    { int t = _status_word.top();
7797      for (int i = 0; i < number_of_registers; i++) {
7798        int j = (i - t) & register_mask;
7799        printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
7800        st(j)->print();
7801        printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
7802      }
7803    }
7804    printf("\n");
7805    // print control registers
7806    printf("ctrl = "); _control_word.print(); printf("\n");
7807    printf("stat = "); _status_word .print(); printf("\n");
7808    printf("tags = "); _tag_word    .print(); printf("\n");
7809  }
7810
7811};
7812
7813class Flag_Register {
7814 public:
7815  int32_t _value;
7816
7817  bool overflow() const                { return ((_value >> 11) & 1) != 0; }
7818  bool direction() const               { return ((_value >> 10) & 1) != 0; }
7819  bool sign() const                    { return ((_value >>  7) & 1) != 0; }
7820  bool zero() const                    { return ((_value >>  6) & 1) != 0; }
7821  bool auxiliary_carry() const         { return ((_value >>  4) & 1) != 0; }
7822  bool parity() const                  { return ((_value >>  2) & 1) != 0; }
7823  bool carry() const                   { return ((_value >>  0) & 1) != 0; }
7824
7825  void print() const {
7826    // flags
7827    char f[8];
7828    f[0] = (overflow       ()) ? 'O' : '-';
7829    f[1] = (direction      ()) ? 'D' : '-';
7830    f[2] = (sign           ()) ? 'S' : '-';
7831    f[3] = (zero           ()) ? 'Z' : '-';
7832    f[4] = (auxiliary_carry()) ? 'A' : '-';
7833    f[5] = (parity         ()) ? 'P' : '-';
7834    f[6] = (carry          ()) ? 'C' : '-';
7835    f[7] = '\x0';
7836    // output
7837    printf("%08x  flags = %s", _value, f);
7838  }
7839
7840};
7841
7842class IU_Register {
7843 public:
7844  int32_t _value;
7845
7846  void print() const {
7847    printf("%08x  %11d", _value, _value);
7848  }
7849
7850};
7851
7852class IU_State {
7853 public:
7854  Flag_Register _eflags;
7855  IU_Register   _rdi;
7856  IU_Register   _rsi;
7857  IU_Register   _rbp;
7858  IU_Register   _rsp;
7859  IU_Register   _rbx;
7860  IU_Register   _rdx;
7861  IU_Register   _rcx;
7862  IU_Register   _rax;
7863
7864  void print() const {
7865    // computation registers
7866    printf("rax,  = "); _rax.print(); printf("\n");
7867    printf("rbx,  = "); _rbx.print(); printf("\n");
7868    printf("rcx  = "); _rcx.print(); printf("\n");
7869    printf("rdx  = "); _rdx.print(); printf("\n");
7870    printf("rdi  = "); _rdi.print(); printf("\n");
7871    printf("rsi  = "); _rsi.print(); printf("\n");
7872    printf("rbp,  = "); _rbp.print(); printf("\n");
7873    printf("rsp  = "); _rsp.print(); printf("\n");
7874    printf("\n");
7875    // control registers
7876    printf("flgs = "); _eflags.print(); printf("\n");
7877  }
7878};
7879
7880
7881class CPU_State {
7882 public:
7883  FPU_State _fpu_state;
7884  IU_State  _iu_state;
7885
7886  void print() const {
7887    printf("--------------------------------------------------\n");
7888    _iu_state .print();
7889    printf("\n");
7890    _fpu_state.print();
7891    printf("--------------------------------------------------\n");
7892  }
7893
7894};
7895
7896
7897static void _print_CPU_state(CPU_State* state) {
7898  state->print();
7899};
7900
7901
7902void MacroAssembler::print_CPU_state() {
7903  push_CPU_state();
7904  push(rsp);                // pass CPU state
7905  call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
7906  addptr(rsp, wordSize);       // discard argument
7907  pop_CPU_state();
7908}
7909
7910
7911static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
7912  static int counter = 0;
7913  FPU_State* fs = &state->_fpu_state;
7914  counter++;
7915  // For leaf calls, only verify that the top few elements remain empty.
7916  // We only need 1 empty at the top for C2 code.
7917  if( stack_depth < 0 ) {
7918    if( fs->tag_for_st(7) != 3 ) {
7919      printf("FPR7 not empty\n");
7920      state->print();
7921      assert(false, "error");
7922      return false;
7923    }
7924    return true;                // All other stack states do not matter
7925  }
7926
7927  assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
7928         "bad FPU control word");
7929
7930  // compute stack depth
7931  int i = 0;
7932  while (i < FPU_State::number_of_registers && fs->tag_for_st(i)  < 3) i++;
7933  int d = i;
7934  while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
7935  // verify findings
7936  if (i != FPU_State::number_of_registers) {
7937    // stack not contiguous
7938    printf("%s: stack not contiguous at ST%d\n", s, i);
7939    state->print();
7940    assert(false, "error");
7941    return false;
7942  }
7943  // check if computed stack depth corresponds to expected stack depth
7944  if (stack_depth < 0) {
7945    // expected stack depth is -stack_depth or less
7946    if (d > -stack_depth) {
7947      // too many elements on the stack
7948      printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
7949      state->print();
7950      assert(false, "error");
7951      return false;
7952    }
7953  } else {
7954    // expected stack depth is stack_depth
7955    if (d != stack_depth) {
7956      // wrong stack depth
7957      printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
7958      state->print();
7959      assert(false, "error");
7960      return false;
7961    }
7962  }
7963  // everything is cool
7964  return true;
7965}
7966
7967
7968void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
7969  if (!VerifyFPU) return;
7970  push_CPU_state();
7971  push(rsp);                // pass CPU state
7972  ExternalAddress msg((address) s);
7973  // pass message string s
7974  pushptr(msg.addr());
7975  push(stack_depth);        // pass stack depth
7976  call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
7977  addptr(rsp, 3 * wordSize);   // discard arguments
7978  // check for error
7979  { Label L;
7980    testl(rax, rax);
7981    jcc(Assembler::notZero, L);
7982    int3();                  // break if error condition
7983    bind(L);
7984  }
7985  pop_CPU_state();
7986}
7987
7988void MacroAssembler::load_klass(Register dst, Register src) {
7989#ifdef _LP64
7990  if (UseCompressedOops) {
7991    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
7992    decode_heap_oop_not_null(dst);
7993  } else
7994#endif
7995    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
7996}
7997
7998void MacroAssembler::load_prototype_header(Register dst, Register src) {
7999#ifdef _LP64
8000  if (UseCompressedOops) {
8001    assert (Universe::heap() != NULL, "java heap should be initialized");
8002    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8003    if (Universe::narrow_oop_shift() != 0) {
8004      assert(Address::times_8 == LogMinObjAlignmentInBytes &&
8005             Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
8006      movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8007    } else {
8008      movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8009    }
8010  } else
8011#endif
8012  {
8013    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8014    movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8015  }
8016}
8017
8018void MacroAssembler::store_klass(Register dst, Register src) {
8019#ifdef _LP64
8020  if (UseCompressedOops) {
8021    encode_heap_oop_not_null(src);
8022    movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
8023  } else
8024#endif
8025    movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
8026}
8027
8028#ifdef _LP64
8029void MacroAssembler::store_klass_gap(Register dst, Register src) {
8030  if (UseCompressedOops) {
8031    // Store to klass gap in destination
8032    movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
8033  }
8034}
8035
8036void MacroAssembler::load_heap_oop(Register dst, Address src) {
8037  if (UseCompressedOops) {
8038    movl(dst, src);
8039    decode_heap_oop(dst);
8040  } else {
8041    movq(dst, src);
8042  }
8043}
8044
8045void MacroAssembler::store_heap_oop(Address dst, Register src) {
8046  if (UseCompressedOops) {
8047    assert(!dst.uses(src), "not enough registers");
8048    encode_heap_oop(src);
8049    movl(dst, src);
8050  } else {
8051    movq(dst, src);
8052  }
8053}
8054
8055// Algorithm must match oop.inline.hpp encode_heap_oop.
8056void MacroAssembler::encode_heap_oop(Register r) {
8057  assert (UseCompressedOops, "should be compressed");
8058  assert (Universe::heap() != NULL, "java heap should be initialized");
8059  if (Universe::narrow_oop_base() == NULL) {
8060    verify_oop(r, "broken oop in encode_heap_oop");
8061    if (Universe::narrow_oop_shift() != 0) {
8062      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8063      shrq(r, LogMinObjAlignmentInBytes);
8064    }
8065    return;
8066  }
8067#ifdef ASSERT
8068  if (CheckCompressedOops) {
8069    Label ok;
8070    push(rscratch1); // cmpptr trashes rscratch1
8071    cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
8072    jcc(Assembler::equal, ok);
8073    stop("MacroAssembler::encode_heap_oop: heap base corrupted?");
8074    bind(ok);
8075    pop(rscratch1);
8076  }
8077#endif
8078  verify_oop(r, "broken oop in encode_heap_oop");
8079  testq(r, r);
8080  cmovq(Assembler::equal, r, r12_heapbase);
8081  subq(r, r12_heapbase);
8082  shrq(r, LogMinObjAlignmentInBytes);
8083}
8084
8085void MacroAssembler::encode_heap_oop_not_null(Register r) {
8086  assert (UseCompressedOops, "should be compressed");
8087  assert (Universe::heap() != NULL, "java heap should be initialized");
8088#ifdef ASSERT
8089  if (CheckCompressedOops) {
8090    Label ok;
8091    testq(r, r);
8092    jcc(Assembler::notEqual, ok);
8093    stop("null oop passed to encode_heap_oop_not_null");
8094    bind(ok);
8095  }
8096#endif
8097  verify_oop(r, "broken oop in encode_heap_oop_not_null");
8098  if (Universe::narrow_oop_base() != NULL) {
8099    subq(r, r12_heapbase);
8100  }
8101  if (Universe::narrow_oop_shift() != 0) {
8102    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8103    shrq(r, LogMinObjAlignmentInBytes);
8104  }
8105}
8106
8107void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
8108  assert (UseCompressedOops, "should be compressed");
8109  assert (Universe::heap() != NULL, "java heap should be initialized");
8110#ifdef ASSERT
8111  if (CheckCompressedOops) {
8112    Label ok;
8113    testq(src, src);
8114    jcc(Assembler::notEqual, ok);
8115    stop("null oop passed to encode_heap_oop_not_null2");
8116    bind(ok);
8117  }
8118#endif
8119  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
8120  if (dst != src) {
8121    movq(dst, src);
8122  }
8123  if (Universe::narrow_oop_base() != NULL) {
8124    subq(dst, r12_heapbase);
8125  }
8126  if (Universe::narrow_oop_shift() != 0) {
8127    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8128    shrq(dst, LogMinObjAlignmentInBytes);
8129  }
8130}
8131
8132void  MacroAssembler::decode_heap_oop(Register r) {
8133  assert (UseCompressedOops, "should be compressed");
8134  assert (Universe::heap() != NULL, "java heap should be initialized");
8135  if (Universe::narrow_oop_base() == NULL) {
8136    if (Universe::narrow_oop_shift() != 0) {
8137      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8138      shlq(r, LogMinObjAlignmentInBytes);
8139    }
8140    verify_oop(r, "broken oop in decode_heap_oop");
8141    return;
8142  }
8143#ifdef ASSERT
8144  if (CheckCompressedOops) {
8145    Label ok;
8146    push(rscratch1);
8147    cmpptr(r12_heapbase,
8148           ExternalAddress((address)Universe::narrow_oop_base_addr()));
8149    jcc(Assembler::equal, ok);
8150    stop("MacroAssembler::decode_heap_oop: heap base corrupted?");
8151    bind(ok);
8152    pop(rscratch1);
8153  }
8154#endif
8155
8156  Label done;
8157  shlq(r, LogMinObjAlignmentInBytes);
8158  jccb(Assembler::equal, done);
8159  addq(r, r12_heapbase);
8160#if 0
8161   // alternate decoding probably a wash.
8162   testq(r, r);
8163   jccb(Assembler::equal, done);
8164   leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
8165#endif
8166  bind(done);
8167  verify_oop(r, "broken oop in decode_heap_oop");
8168}
8169
8170void  MacroAssembler::decode_heap_oop_not_null(Register r) {
8171  assert (UseCompressedOops, "should only be used for compressed headers");
8172  assert (Universe::heap() != NULL, "java heap should be initialized");
8173  // Cannot assert, unverified entry point counts instructions (see .ad file)
8174  // vtableStubs also counts instructions in pd_code_size_limit.
8175  // Also do not verify_oop as this is called by verify_oop.
8176  if (Universe::narrow_oop_base() == NULL) {
8177    if (Universe::narrow_oop_shift() != 0) {
8178      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8179      shlq(r, LogMinObjAlignmentInBytes);
8180    }
8181  } else {
8182      assert (Address::times_8 == LogMinObjAlignmentInBytes &&
8183              Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
8184    leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
8185  }
8186}
8187
8188void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
8189  assert (UseCompressedOops, "should only be used for compressed headers");
8190  assert (Universe::heap() != NULL, "java heap should be initialized");
8191  // Cannot assert, unverified entry point counts instructions (see .ad file)
8192  // vtableStubs also counts instructions in pd_code_size_limit.
8193  // Also do not verify_oop as this is called by verify_oop.
8194  if (Universe::narrow_oop_shift() != 0) {
8195    assert (Address::times_8 == LogMinObjAlignmentInBytes &&
8196            Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
8197    leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
8198  } else if (dst != src) {
8199    movq(dst, src);
8200  }
8201}
8202
8203void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
8204  assert (UseCompressedOops, "should only be used for compressed headers");
8205  assert (Universe::heap() != NULL, "java heap should be initialized");
8206  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8207  int oop_index = oop_recorder()->find_index(obj);
8208  RelocationHolder rspec = oop_Relocation::spec(oop_index);
8209  mov_narrow_oop(dst, oop_index, rspec);
8210}
8211
8212void  MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
8213  assert (UseCompressedOops, "should only be used for compressed headers");
8214  assert (Universe::heap() != NULL, "java heap should be initialized");
8215  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8216  int oop_index = oop_recorder()->find_index(obj);
8217  RelocationHolder rspec = oop_Relocation::spec(oop_index);
8218  mov_narrow_oop(dst, oop_index, rspec);
8219}
8220
8221void  MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
8222  assert (UseCompressedOops, "should only be used for compressed headers");
8223  assert (Universe::heap() != NULL, "java heap should be initialized");
8224  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8225  int oop_index = oop_recorder()->find_index(obj);
8226  RelocationHolder rspec = oop_Relocation::spec(oop_index);
8227  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
8228}
8229
8230void  MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
8231  assert (UseCompressedOops, "should only be used for compressed headers");
8232  assert (Universe::heap() != NULL, "java heap should be initialized");
8233  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8234  int oop_index = oop_recorder()->find_index(obj);
8235  RelocationHolder rspec = oop_Relocation::spec(oop_index);
8236  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
8237}
8238
8239void MacroAssembler::reinit_heapbase() {
8240  if (UseCompressedOops) {
8241    movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
8242  }
8243}
8244#endif // _LP64
8245
8246Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
8247  switch (cond) {
8248    // Note some conditions are synonyms for others
8249    case Assembler::zero:         return Assembler::notZero;
8250    case Assembler::notZero:      return Assembler::zero;
8251    case Assembler::less:         return Assembler::greaterEqual;
8252    case Assembler::lessEqual:    return Assembler::greater;
8253    case Assembler::greater:      return Assembler::lessEqual;
8254    case Assembler::greaterEqual: return Assembler::less;
8255    case Assembler::below:        return Assembler::aboveEqual;
8256    case Assembler::belowEqual:   return Assembler::above;
8257    case Assembler::above:        return Assembler::belowEqual;
8258    case Assembler::aboveEqual:   return Assembler::below;
8259    case Assembler::overflow:     return Assembler::noOverflow;
8260    case Assembler::noOverflow:   return Assembler::overflow;
8261    case Assembler::negative:     return Assembler::positive;
8262    case Assembler::positive:     return Assembler::negative;
8263    case Assembler::parity:       return Assembler::noParity;
8264    case Assembler::noParity:     return Assembler::parity;
8265  }
8266  ShouldNotReachHere(); return Assembler::overflow;
8267}
8268
8269SkipIfEqual::SkipIfEqual(
8270    MacroAssembler* masm, const bool* flag_addr, bool value) {
8271  _masm = masm;
8272  _masm->cmp8(ExternalAddress((address)flag_addr), value);
8273  _masm->jcc(Assembler::equal, _label);
8274}
8275
8276SkipIfEqual::~SkipIfEqual() {
8277  _masm->bind(_label);
8278}
8279