assembler_x86.cpp revision 579:0fbdb4381b99
1/*
2 * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
24
25#include "incls/_precompiled.incl"
26#include "incls/_assembler_x86.cpp.incl"
27
28// Implementation of AddressLiteral
29
30AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
31  _is_lval = false;
32  _target = target;
33  switch (rtype) {
34  case relocInfo::oop_type:
35    // Oops are a special case. Normally they would be their own section
36    // but in cases like icBuffer they are literals in the code stream that
37    // we don't have a section for. We use none so that we get a literal address
38    // which is always patchable.
39    break;
40  case relocInfo::external_word_type:
41    _rspec = external_word_Relocation::spec(target);
42    break;
43  case relocInfo::internal_word_type:
44    _rspec = internal_word_Relocation::spec(target);
45    break;
46  case relocInfo::opt_virtual_call_type:
47    _rspec = opt_virtual_call_Relocation::spec();
48    break;
49  case relocInfo::static_call_type:
50    _rspec = static_call_Relocation::spec();
51    break;
52  case relocInfo::runtime_call_type:
53    _rspec = runtime_call_Relocation::spec();
54    break;
55  case relocInfo::poll_type:
56  case relocInfo::poll_return_type:
57    _rspec = Relocation::spec_simple(rtype);
58    break;
59  case relocInfo::none:
60    break;
61  default:
62    ShouldNotReachHere();
63    break;
64  }
65}
66
67// Implementation of Address
68
69#ifdef _LP64
70
71Address Address::make_array(ArrayAddress adr) {
72  // Not implementable on 64bit machines
73  // Should have been handled higher up the call chain.
74  ShouldNotReachHere();
75  return Address();
76}
77
78// exceedingly dangerous constructor
79Address::Address(int disp, address loc, relocInfo::relocType rtype) {
80  _base  = noreg;
81  _index = noreg;
82  _scale = no_scale;
83  _disp  = disp;
84  switch (rtype) {
85    case relocInfo::external_word_type:
86      _rspec = external_word_Relocation::spec(loc);
87      break;
88    case relocInfo::internal_word_type:
89      _rspec = internal_word_Relocation::spec(loc);
90      break;
91    case relocInfo::runtime_call_type:
92      // HMM
93      _rspec = runtime_call_Relocation::spec();
94      break;
95    case relocInfo::poll_type:
96    case relocInfo::poll_return_type:
97      _rspec = Relocation::spec_simple(rtype);
98      break;
99    case relocInfo::none:
100      break;
101    default:
102      ShouldNotReachHere();
103  }
104}
105#else // LP64
106
107Address Address::make_array(ArrayAddress adr) {
108  AddressLiteral base = adr.base();
109  Address index = adr.index();
110  assert(index._disp == 0, "must not have disp"); // maybe it can?
111  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
112  array._rspec = base._rspec;
113  return array;
114}
115
116// exceedingly dangerous constructor
117Address::Address(address loc, RelocationHolder spec) {
118  _base  = noreg;
119  _index = noreg;
120  _scale = no_scale;
121  _disp  = (intptr_t) loc;
122  _rspec = spec;
123}
124
125#endif // _LP64
126
127
128
129// Convert the raw encoding form into the form expected by the constructor for
130// Address.  An index of 4 (rsp) corresponds to having no index, so convert
131// that to noreg for the Address constructor.
132Address Address::make_raw(int base, int index, int scale, int disp) {
133  bool valid_index = index != rsp->encoding();
134  if (valid_index) {
135    Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
136    return madr;
137  } else {
138    Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
139    return madr;
140  }
141}
142
143// Implementation of Assembler
144
145int AbstractAssembler::code_fill_byte() {
146  return (u_char)'\xF4'; // hlt
147}
148
149// make this go away someday
150void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
151  if (rtype == relocInfo::none)
152        emit_long(data);
153  else  emit_data(data, Relocation::spec_simple(rtype), format);
154}
155
156void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
157  assert(imm_operand == 0, "default format must be immediate in this file");
158  assert(inst_mark() != NULL, "must be inside InstructionMark");
159  if (rspec.type() !=  relocInfo::none) {
160    #ifdef ASSERT
161      check_relocation(rspec, format);
162    #endif
163    // Do not use AbstractAssembler::relocate, which is not intended for
164    // embedded words.  Instead, relocate to the enclosing instruction.
165
166    // hack. call32 is too wide for mask so use disp32
167    if (format == call32_operand)
168      code_section()->relocate(inst_mark(), rspec, disp32_operand);
169    else
170      code_section()->relocate(inst_mark(), rspec, format);
171  }
172  emit_long(data);
173}
174
175static int encode(Register r) {
176  int enc = r->encoding();
177  if (enc >= 8) {
178    enc -= 8;
179  }
180  return enc;
181}
182
183static int encode(XMMRegister r) {
184  int enc = r->encoding();
185  if (enc >= 8) {
186    enc -= 8;
187  }
188  return enc;
189}
190
191void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
192  assert(dst->has_byte_register(), "must have byte register");
193  assert(isByte(op1) && isByte(op2), "wrong opcode");
194  assert(isByte(imm8), "not a byte");
195  assert((op1 & 0x01) == 0, "should be 8bit operation");
196  emit_byte(op1);
197  emit_byte(op2 | encode(dst));
198  emit_byte(imm8);
199}
200
201
202void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
203  assert(isByte(op1) && isByte(op2), "wrong opcode");
204  assert((op1 & 0x01) == 1, "should be 32bit operation");
205  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
206  if (is8bit(imm32)) {
207    emit_byte(op1 | 0x02); // set sign bit
208    emit_byte(op2 | encode(dst));
209    emit_byte(imm32 & 0xFF);
210  } else {
211    emit_byte(op1);
212    emit_byte(op2 | encode(dst));
213    emit_long(imm32);
214  }
215}
216
217// immediate-to-memory forms
218void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
219  assert((op1 & 0x01) == 1, "should be 32bit operation");
220  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
221  if (is8bit(imm32)) {
222    emit_byte(op1 | 0x02); // set sign bit
223    emit_operand(rm, adr, 1);
224    emit_byte(imm32 & 0xFF);
225  } else {
226    emit_byte(op1);
227    emit_operand(rm, adr, 4);
228    emit_long(imm32);
229  }
230}
231
232void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) {
233  LP64_ONLY(ShouldNotReachHere());
234  assert(isByte(op1) && isByte(op2), "wrong opcode");
235  assert((op1 & 0x01) == 1, "should be 32bit operation");
236  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
237  InstructionMark im(this);
238  emit_byte(op1);
239  emit_byte(op2 | encode(dst));
240  emit_data((intptr_t)obj, relocInfo::oop_type, 0);
241}
242
243
244void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
245  assert(isByte(op1) && isByte(op2), "wrong opcode");
246  emit_byte(op1);
247  emit_byte(op2 | encode(dst) << 3 | encode(src));
248}
249
250
251void Assembler::emit_operand(Register reg, Register base, Register index,
252                             Address::ScaleFactor scale, int disp,
253                             RelocationHolder const& rspec,
254                             int rip_relative_correction) {
255  relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
256
257  // Encode the registers as needed in the fields they are used in
258
259  int regenc = encode(reg) << 3;
260  int indexenc = index->is_valid() ? encode(index) << 3 : 0;
261  int baseenc = base->is_valid() ? encode(base) : 0;
262
263  if (base->is_valid()) {
264    if (index->is_valid()) {
265      assert(scale != Address::no_scale, "inconsistent address");
266      // [base + index*scale + disp]
267      if (disp == 0 && rtype == relocInfo::none  &&
268          base != rbp LP64_ONLY(&& base != r13)) {
269        // [base + index*scale]
270        // [00 reg 100][ss index base]
271        assert(index != rsp, "illegal addressing mode");
272        emit_byte(0x04 | regenc);
273        emit_byte(scale << 6 | indexenc | baseenc);
274      } else if (is8bit(disp) && rtype == relocInfo::none) {
275        // [base + index*scale + imm8]
276        // [01 reg 100][ss index base] imm8
277        assert(index != rsp, "illegal addressing mode");
278        emit_byte(0x44 | regenc);
279        emit_byte(scale << 6 | indexenc | baseenc);
280        emit_byte(disp & 0xFF);
281      } else {
282        // [base + index*scale + disp32]
283        // [10 reg 100][ss index base] disp32
284        assert(index != rsp, "illegal addressing mode");
285        emit_byte(0x84 | regenc);
286        emit_byte(scale << 6 | indexenc | baseenc);
287        emit_data(disp, rspec, disp32_operand);
288      }
289    } else if (base == rsp LP64_ONLY(|| base == r12)) {
290      // [rsp + disp]
291      if (disp == 0 && rtype == relocInfo::none) {
292        // [rsp]
293        // [00 reg 100][00 100 100]
294        emit_byte(0x04 | regenc);
295        emit_byte(0x24);
296      } else if (is8bit(disp) && rtype == relocInfo::none) {
297        // [rsp + imm8]
298        // [01 reg 100][00 100 100] disp8
299        emit_byte(0x44 | regenc);
300        emit_byte(0x24);
301        emit_byte(disp & 0xFF);
302      } else {
303        // [rsp + imm32]
304        // [10 reg 100][00 100 100] disp32
305        emit_byte(0x84 | regenc);
306        emit_byte(0x24);
307        emit_data(disp, rspec, disp32_operand);
308      }
309    } else {
310      // [base + disp]
311      assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
312      if (disp == 0 && rtype == relocInfo::none &&
313          base != rbp LP64_ONLY(&& base != r13)) {
314        // [base]
315        // [00 reg base]
316        emit_byte(0x00 | regenc | baseenc);
317      } else if (is8bit(disp) && rtype == relocInfo::none) {
318        // [base + disp8]
319        // [01 reg base] disp8
320        emit_byte(0x40 | regenc | baseenc);
321        emit_byte(disp & 0xFF);
322      } else {
323        // [base + disp32]
324        // [10 reg base] disp32
325        emit_byte(0x80 | regenc | baseenc);
326        emit_data(disp, rspec, disp32_operand);
327      }
328    }
329  } else {
330    if (index->is_valid()) {
331      assert(scale != Address::no_scale, "inconsistent address");
332      // [index*scale + disp]
333      // [00 reg 100][ss index 101] disp32
334      assert(index != rsp, "illegal addressing mode");
335      emit_byte(0x04 | regenc);
336      emit_byte(scale << 6 | indexenc | 0x05);
337      emit_data(disp, rspec, disp32_operand);
338    } else if (rtype != relocInfo::none ) {
339      // [disp] (64bit) RIP-RELATIVE (32bit) abs
340      // [00 000 101] disp32
341
342      emit_byte(0x05 | regenc);
343      // Note that the RIP-rel. correction applies to the generated
344      // disp field, but _not_ to the target address in the rspec.
345
346      // disp was created by converting the target address minus the pc
347      // at the start of the instruction. That needs more correction here.
348      // intptr_t disp = target - next_ip;
349      assert(inst_mark() != NULL, "must be inside InstructionMark");
350      address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
351      int64_t adjusted = disp;
352      // Do rip-rel adjustment for 64bit
353      LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
354      assert(is_simm32(adjusted),
355             "must be 32bit offset (RIP relative address)");
356      emit_data((int32_t) adjusted, rspec, disp32_operand);
357
358    } else {
359      // 32bit never did this, did everything as the rip-rel/disp code above
360      // [disp] ABSOLUTE
361      // [00 reg 100][00 100 101] disp32
362      emit_byte(0x04 | regenc);
363      emit_byte(0x25);
364      emit_data(disp, rspec, disp32_operand);
365    }
366  }
367}
368
369void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
370                             Address::ScaleFactor scale, int disp,
371                             RelocationHolder const& rspec) {
372  emit_operand((Register)reg, base, index, scale, disp, rspec);
373}
374
375// Secret local extension to Assembler::WhichOperand:
376#define end_pc_operand (_WhichOperand_limit)
377
378address Assembler::locate_operand(address inst, WhichOperand which) {
379  // Decode the given instruction, and return the address of
380  // an embedded 32-bit operand word.
381
382  // If "which" is disp32_operand, selects the displacement portion
383  // of an effective address specifier.
384  // If "which" is imm64_operand, selects the trailing immediate constant.
385  // If "which" is call32_operand, selects the displacement of a call or jump.
386  // Caller is responsible for ensuring that there is such an operand,
387  // and that it is 32/64 bits wide.
388
389  // If "which" is end_pc_operand, find the end of the instruction.
390
391  address ip = inst;
392  bool is_64bit = false;
393
394  debug_only(bool has_disp32 = false);
395  int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
396
397  again_after_prefix:
398  switch (0xFF & *ip++) {
399
400  // These convenience macros generate groups of "case" labels for the switch.
401#define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
402#define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
403             case (x)+4: case (x)+5: case (x)+6: case (x)+7
404#define REP16(x) REP8((x)+0): \
405              case REP8((x)+8)
406
407  case CS_segment:
408  case SS_segment:
409  case DS_segment:
410  case ES_segment:
411  case FS_segment:
412  case GS_segment:
413    // Seems dubious
414    LP64_ONLY(assert(false, "shouldn't have that prefix"));
415    assert(ip == inst+1, "only one prefix allowed");
416    goto again_after_prefix;
417
418  case 0x67:
419  case REX:
420  case REX_B:
421  case REX_X:
422  case REX_XB:
423  case REX_R:
424  case REX_RB:
425  case REX_RX:
426  case REX_RXB:
427    NOT_LP64(assert(false, "64bit prefixes"));
428    goto again_after_prefix;
429
430  case REX_W:
431  case REX_WB:
432  case REX_WX:
433  case REX_WXB:
434  case REX_WR:
435  case REX_WRB:
436  case REX_WRX:
437  case REX_WRXB:
438    NOT_LP64(assert(false, "64bit prefixes"));
439    is_64bit = true;
440    goto again_after_prefix;
441
442  case 0xFF: // pushq a; decl a; incl a; call a; jmp a
443  case 0x88: // movb a, r
444  case 0x89: // movl a, r
445  case 0x8A: // movb r, a
446  case 0x8B: // movl r, a
447  case 0x8F: // popl a
448    debug_only(has_disp32 = true);
449    break;
450
451  case 0x68: // pushq #32
452    if (which == end_pc_operand) {
453      return ip + 4;
454    }
455    assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
456    return ip;                  // not produced by emit_operand
457
458  case 0x66: // movw ... (size prefix)
459    again_after_size_prefix2:
460    switch (0xFF & *ip++) {
461    case REX:
462    case REX_B:
463    case REX_X:
464    case REX_XB:
465    case REX_R:
466    case REX_RB:
467    case REX_RX:
468    case REX_RXB:
469    case REX_W:
470    case REX_WB:
471    case REX_WX:
472    case REX_WXB:
473    case REX_WR:
474    case REX_WRB:
475    case REX_WRX:
476    case REX_WRXB:
477      NOT_LP64(assert(false, "64bit prefix found"));
478      goto again_after_size_prefix2;
479    case 0x8B: // movw r, a
480    case 0x89: // movw a, r
481      debug_only(has_disp32 = true);
482      break;
483    case 0xC7: // movw a, #16
484      debug_only(has_disp32 = true);
485      tail_size = 2;  // the imm16
486      break;
487    case 0x0F: // several SSE/SSE2 variants
488      ip--;    // reparse the 0x0F
489      goto again_after_prefix;
490    default:
491      ShouldNotReachHere();
492    }
493    break;
494
495  case REP8(0xB8): // movl/q r, #32/#64(oop?)
496    if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
497    // these asserts are somewhat nonsensical
498#ifndef _LP64
499    assert(which == imm_operand || which == disp32_operand, "");
500#else
501    assert((which == call32_operand || which == imm_operand) && is_64bit ||
502           which == narrow_oop_operand && !is_64bit, "");
503#endif // _LP64
504    return ip;
505
506  case 0x69: // imul r, a, #32
507  case 0xC7: // movl a, #32(oop?)
508    tail_size = 4;
509    debug_only(has_disp32 = true); // has both kinds of operands!
510    break;
511
512  case 0x0F: // movx..., etc.
513    switch (0xFF & *ip++) {
514    case 0x12: // movlps
515    case 0x28: // movaps
516    case 0x2E: // ucomiss
517    case 0x2F: // comiss
518    case 0x54: // andps
519    case 0x55: // andnps
520    case 0x56: // orps
521    case 0x57: // xorps
522    case 0x6E: // movd
523    case 0x7E: // movd
524    case 0xAE: // ldmxcsr   a
525      // 64bit side says it these have both operands but that doesn't
526      // appear to be true
527      debug_only(has_disp32 = true);
528      break;
529
530    case 0xAD: // shrd r, a, %cl
531    case 0xAF: // imul r, a
532    case 0xBE: // movsbl r, a (movsxb)
533    case 0xBF: // movswl r, a (movsxw)
534    case 0xB6: // movzbl r, a (movzxb)
535    case 0xB7: // movzwl r, a (movzxw)
536    case REP16(0x40): // cmovl cc, r, a
537    case 0xB0: // cmpxchgb
538    case 0xB1: // cmpxchg
539    case 0xC1: // xaddl
540    case 0xC7: // cmpxchg8
541    case REP16(0x90): // setcc a
542      debug_only(has_disp32 = true);
543      // fall out of the switch to decode the address
544      break;
545
546    case 0xAC: // shrd r, a, #8
547      debug_only(has_disp32 = true);
548      tail_size = 1;  // the imm8
549      break;
550
551    case REP16(0x80): // jcc rdisp32
552      if (which == end_pc_operand)  return ip + 4;
553      assert(which == call32_operand, "jcc has no disp32 or imm");
554      return ip;
555    default:
556      ShouldNotReachHere();
557    }
558    break;
559
560  case 0x81: // addl a, #32; addl r, #32
561    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
562    // on 32bit in the case of cmpl, the imm might be an oop
563    tail_size = 4;
564    debug_only(has_disp32 = true); // has both kinds of operands!
565    break;
566
567  case 0x83: // addl a, #8; addl r, #8
568    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
569    debug_only(has_disp32 = true); // has both kinds of operands!
570    tail_size = 1;
571    break;
572
573  case 0x9B:
574    switch (0xFF & *ip++) {
575    case 0xD9: // fnstcw a
576      debug_only(has_disp32 = true);
577      break;
578    default:
579      ShouldNotReachHere();
580    }
581    break;
582
583  case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
584  case REP4(0x10): // adc...
585  case REP4(0x20): // and...
586  case REP4(0x30): // xor...
587  case REP4(0x08): // or...
588  case REP4(0x18): // sbb...
589  case REP4(0x28): // sub...
590  case 0xF7: // mull a
591  case 0x8D: // lea r, a
592  case 0x87: // xchg r, a
593  case REP4(0x38): // cmp...
594  case 0x85: // test r, a
595    debug_only(has_disp32 = true); // has both kinds of operands!
596    break;
597
598  case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
599  case 0xC6: // movb a, #8
600  case 0x80: // cmpb a, #8
601  case 0x6B: // imul r, a, #8
602    debug_only(has_disp32 = true); // has both kinds of operands!
603    tail_size = 1; // the imm8
604    break;
605
606  case 0xE8: // call rdisp32
607  case 0xE9: // jmp  rdisp32
608    if (which == end_pc_operand)  return ip + 4;
609    assert(which == call32_operand, "call has no disp32 or imm");
610    return ip;
611
612  case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
613  case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
614  case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
615  case 0xDD: // fld_d a; fst_d a; fstp_d a
616  case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
617  case 0xDF: // fild_d a; fistp_d a
618  case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
619  case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
620  case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
621    debug_only(has_disp32 = true);
622    break;
623
624  case 0xF0:                    // Lock
625    assert(os::is_MP(), "only on MP");
626    goto again_after_prefix;
627
628  case 0xF3:                    // For SSE
629  case 0xF2:                    // For SSE2
630    switch (0xFF & *ip++) {
631    case REX:
632    case REX_B:
633    case REX_X:
634    case REX_XB:
635    case REX_R:
636    case REX_RB:
637    case REX_RX:
638    case REX_RXB:
639    case REX_W:
640    case REX_WB:
641    case REX_WX:
642    case REX_WXB:
643    case REX_WR:
644    case REX_WRB:
645    case REX_WRX:
646    case REX_WRXB:
647      NOT_LP64(assert(false, "found 64bit prefix"));
648      ip++;
649    default:
650      ip++;
651    }
652    debug_only(has_disp32 = true); // has both kinds of operands!
653    break;
654
655  default:
656    ShouldNotReachHere();
657
658#undef REP8
659#undef REP16
660  }
661
662  assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
663#ifdef _LP64
664  assert(which != imm_operand, "instruction is not a movq reg, imm64");
665#else
666  // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
667  assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
668#endif // LP64
669  assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
670
671  // parse the output of emit_operand
672  int op2 = 0xFF & *ip++;
673  int base = op2 & 0x07;
674  int op3 = -1;
675  const int b100 = 4;
676  const int b101 = 5;
677  if (base == b100 && (op2 >> 6) != 3) {
678    op3 = 0xFF & *ip++;
679    base = op3 & 0x07;   // refetch the base
680  }
681  // now ip points at the disp (if any)
682
683  switch (op2 >> 6) {
684  case 0:
685    // [00 reg  100][ss index base]
686    // [00 reg  100][00   100  esp]
687    // [00 reg base]
688    // [00 reg  100][ss index  101][disp32]
689    // [00 reg  101]               [disp32]
690
691    if (base == b101) {
692      if (which == disp32_operand)
693        return ip;              // caller wants the disp32
694      ip += 4;                  // skip the disp32
695    }
696    break;
697
698  case 1:
699    // [01 reg  100][ss index base][disp8]
700    // [01 reg  100][00   100  esp][disp8]
701    // [01 reg base]               [disp8]
702    ip += 1;                    // skip the disp8
703    break;
704
705  case 2:
706    // [10 reg  100][ss index base][disp32]
707    // [10 reg  100][00   100  esp][disp32]
708    // [10 reg base]               [disp32]
709    if (which == disp32_operand)
710      return ip;                // caller wants the disp32
711    ip += 4;                    // skip the disp32
712    break;
713
714  case 3:
715    // [11 reg base]  (not a memory addressing mode)
716    break;
717  }
718
719  if (which == end_pc_operand) {
720    return ip + tail_size;
721  }
722
723#ifdef _LP64
724  assert(false, "fix locate_operand");
725#else
726  assert(which == imm_operand, "instruction has only an imm field");
727#endif // LP64
728  return ip;
729}
730
731address Assembler::locate_next_instruction(address inst) {
732  // Secretly share code with locate_operand:
733  return locate_operand(inst, end_pc_operand);
734}
735
736
737#ifdef ASSERT
738void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
739  address inst = inst_mark();
740  assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
741  address opnd;
742
743  Relocation* r = rspec.reloc();
744  if (r->type() == relocInfo::none) {
745    return;
746  } else if (r->is_call() || format == call32_operand) {
747    // assert(format == imm32_operand, "cannot specify a nonzero format");
748    opnd = locate_operand(inst, call32_operand);
749  } else if (r->is_data()) {
750    assert(format == imm_operand || format == disp32_operand
751           LP64_ONLY(|| format == narrow_oop_operand), "format ok");
752    opnd = locate_operand(inst, (WhichOperand)format);
753  } else {
754    assert(format == imm_operand, "cannot specify a format");
755    return;
756  }
757  assert(opnd == pc(), "must put operand where relocs can find it");
758}
759#endif // ASSERT
760
761void Assembler::emit_operand32(Register reg, Address adr) {
762  assert(reg->encoding() < 8, "no extended registers");
763  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
764  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
765               adr._rspec);
766}
767
768void Assembler::emit_operand(Register reg, Address adr,
769                             int rip_relative_correction) {
770  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
771               adr._rspec,
772               rip_relative_correction);
773}
774
775void Assembler::emit_operand(XMMRegister reg, Address adr) {
776  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
777               adr._rspec);
778}
779
780// MMX operations
781void Assembler::emit_operand(MMXRegister reg, Address adr) {
782  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
783  emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
784}
785
786// work around gcc (3.2.1-7a) bug
787void Assembler::emit_operand(Address adr, MMXRegister reg) {
788  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
789  emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
790}
791
792
793void Assembler::emit_farith(int b1, int b2, int i) {
794  assert(isByte(b1) && isByte(b2), "wrong opcode");
795  assert(0 <= i &&  i < 8, "illegal stack offset");
796  emit_byte(b1);
797  emit_byte(b2 + i);
798}
799
800
801// Now the Assembler instruction (identical for 32/64 bits)
802
803void Assembler::adcl(Register dst, int32_t imm32) {
804  prefix(dst);
805  emit_arith(0x81, 0xD0, dst, imm32);
806}
807
808void Assembler::adcl(Register dst, Address src) {
809  InstructionMark im(this);
810  prefix(src, dst);
811  emit_byte(0x13);
812  emit_operand(dst, src);
813}
814
815void Assembler::adcl(Register dst, Register src) {
816  (void) prefix_and_encode(dst->encoding(), src->encoding());
817  emit_arith(0x13, 0xC0, dst, src);
818}
819
820void Assembler::addl(Address dst, int32_t imm32) {
821  InstructionMark im(this);
822  prefix(dst);
823  emit_arith_operand(0x81, rax, dst, imm32);
824}
825
826void Assembler::addl(Address dst, Register src) {
827  InstructionMark im(this);
828  prefix(dst, src);
829  emit_byte(0x01);
830  emit_operand(src, dst);
831}
832
833void Assembler::addl(Register dst, int32_t imm32) {
834  prefix(dst);
835  emit_arith(0x81, 0xC0, dst, imm32);
836}
837
838void Assembler::addl(Register dst, Address src) {
839  InstructionMark im(this);
840  prefix(src, dst);
841  emit_byte(0x03);
842  emit_operand(dst, src);
843}
844
845void Assembler::addl(Register dst, Register src) {
846  (void) prefix_and_encode(dst->encoding(), src->encoding());
847  emit_arith(0x03, 0xC0, dst, src);
848}
849
850void Assembler::addr_nop_4() {
851  // 4 bytes: NOP DWORD PTR [EAX+0]
852  emit_byte(0x0F);
853  emit_byte(0x1F);
854  emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
855  emit_byte(0);    // 8-bits offset (1 byte)
856}
857
858void Assembler::addr_nop_5() {
859  // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
860  emit_byte(0x0F);
861  emit_byte(0x1F);
862  emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
863  emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
864  emit_byte(0);    // 8-bits offset (1 byte)
865}
866
867void Assembler::addr_nop_7() {
868  // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
869  emit_byte(0x0F);
870  emit_byte(0x1F);
871  emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
872  emit_long(0);    // 32-bits offset (4 bytes)
873}
874
875void Assembler::addr_nop_8() {
876  // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
877  emit_byte(0x0F);
878  emit_byte(0x1F);
879  emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
880  emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
881  emit_long(0);    // 32-bits offset (4 bytes)
882}
883
884void Assembler::addsd(XMMRegister dst, XMMRegister src) {
885  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
886  emit_byte(0xF2);
887  int encode = prefix_and_encode(dst->encoding(), src->encoding());
888  emit_byte(0x0F);
889  emit_byte(0x58);
890  emit_byte(0xC0 | encode);
891}
892
893void Assembler::addsd(XMMRegister dst, Address src) {
894  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
895  InstructionMark im(this);
896  emit_byte(0xF2);
897  prefix(src, dst);
898  emit_byte(0x0F);
899  emit_byte(0x58);
900  emit_operand(dst, src);
901}
902
903void Assembler::addss(XMMRegister dst, XMMRegister src) {
904  NOT_LP64(assert(VM_Version::supports_sse(), ""));
905  emit_byte(0xF3);
906  int encode = prefix_and_encode(dst->encoding(), src->encoding());
907  emit_byte(0x0F);
908  emit_byte(0x58);
909  emit_byte(0xC0 | encode);
910}
911
912void Assembler::addss(XMMRegister dst, Address src) {
913  NOT_LP64(assert(VM_Version::supports_sse(), ""));
914  InstructionMark im(this);
915  emit_byte(0xF3);
916  prefix(src, dst);
917  emit_byte(0x0F);
918  emit_byte(0x58);
919  emit_operand(dst, src);
920}
921
922void Assembler::andl(Register dst, int32_t imm32) {
923  prefix(dst);
924  emit_arith(0x81, 0xE0, dst, imm32);
925}
926
927void Assembler::andl(Register dst, Address src) {
928  InstructionMark im(this);
929  prefix(src, dst);
930  emit_byte(0x23);
931  emit_operand(dst, src);
932}
933
934void Assembler::andl(Register dst, Register src) {
935  (void) prefix_and_encode(dst->encoding(), src->encoding());
936  emit_arith(0x23, 0xC0, dst, src);
937}
938
939void Assembler::andpd(XMMRegister dst, Address src) {
940  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
941  InstructionMark im(this);
942  emit_byte(0x66);
943  prefix(src, dst);
944  emit_byte(0x0F);
945  emit_byte(0x54);
946  emit_operand(dst, src);
947}
948
949void Assembler::bswapl(Register reg) { // bswap
950  int encode = prefix_and_encode(reg->encoding());
951  emit_byte(0x0F);
952  emit_byte(0xC8 | encode);
953}
954
955void Assembler::call(Label& L, relocInfo::relocType rtype) {
956  // suspect disp32 is always good
957  int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
958
959  if (L.is_bound()) {
960    const int long_size = 5;
961    int offs = (int)( target(L) - pc() );
962    assert(offs <= 0, "assembler error");
963    InstructionMark im(this);
964    // 1110 1000 #32-bit disp
965    emit_byte(0xE8);
966    emit_data(offs - long_size, rtype, operand);
967  } else {
968    InstructionMark im(this);
969    // 1110 1000 #32-bit disp
970    L.add_patch_at(code(), locator());
971
972    emit_byte(0xE8);
973    emit_data(int(0), rtype, operand);
974  }
975}
976
977void Assembler::call(Register dst) {
978  // This was originally using a 32bit register encoding
979  // and surely we want 64bit!
980  // this is a 32bit encoding but in 64bit mode the default
981  // operand size is 64bit so there is no need for the
982  // wide prefix. So prefix only happens if we use the
983  // new registers. Much like push/pop.
984  int x = offset();
985  // this may be true but dbx disassembles it as if it
986  // were 32bits...
987  // int encode = prefix_and_encode(dst->encoding());
988  // if (offset() != x) assert(dst->encoding() >= 8, "what?");
989  int encode = prefixq_and_encode(dst->encoding());
990
991  emit_byte(0xFF);
992  emit_byte(0xD0 | encode);
993}
994
995
996void Assembler::call(Address adr) {
997  InstructionMark im(this);
998  prefix(adr);
999  emit_byte(0xFF);
1000  emit_operand(rdx, adr);
1001}
1002
1003void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1004  assert(entry != NULL, "call most probably wrong");
1005  InstructionMark im(this);
1006  emit_byte(0xE8);
1007  intptr_t disp = entry - (_code_pos + sizeof(int32_t));
1008  assert(is_simm32(disp), "must be 32bit offset (call2)");
1009  // Technically, should use call32_operand, but this format is
1010  // implied by the fact that we're emitting a call instruction.
1011
1012  int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1013  emit_data((int) disp, rspec, operand);
1014}
1015
1016void Assembler::cdql() {
1017  emit_byte(0x99);
1018}
1019
1020void Assembler::cmovl(Condition cc, Register dst, Register src) {
1021  NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1022  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1023  emit_byte(0x0F);
1024  emit_byte(0x40 | cc);
1025  emit_byte(0xC0 | encode);
1026}
1027
1028
1029void Assembler::cmovl(Condition cc, Register dst, Address src) {
1030  NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1031  prefix(src, dst);
1032  emit_byte(0x0F);
1033  emit_byte(0x40 | cc);
1034  emit_operand(dst, src);
1035}
1036
1037void Assembler::cmpb(Address dst, int imm8) {
1038  InstructionMark im(this);
1039  prefix(dst);
1040  emit_byte(0x80);
1041  emit_operand(rdi, dst, 1);
1042  emit_byte(imm8);
1043}
1044
1045void Assembler::cmpl(Address dst, int32_t imm32) {
1046  InstructionMark im(this);
1047  prefix(dst);
1048  emit_byte(0x81);
1049  emit_operand(rdi, dst, 4);
1050  emit_long(imm32);
1051}
1052
1053void Assembler::cmpl(Register dst, int32_t imm32) {
1054  prefix(dst);
1055  emit_arith(0x81, 0xF8, dst, imm32);
1056}
1057
1058void Assembler::cmpl(Register dst, Register src) {
1059  (void) prefix_and_encode(dst->encoding(), src->encoding());
1060  emit_arith(0x3B, 0xC0, dst, src);
1061}
1062
1063
1064void Assembler::cmpl(Register dst, Address  src) {
1065  InstructionMark im(this);
1066  prefix(src, dst);
1067  emit_byte(0x3B);
1068  emit_operand(dst, src);
1069}
1070
1071void Assembler::cmpw(Address dst, int imm16) {
1072  InstructionMark im(this);
1073  assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1074  emit_byte(0x66);
1075  emit_byte(0x81);
1076  emit_operand(rdi, dst, 2);
1077  emit_word(imm16);
1078}
1079
1080// The 32-bit cmpxchg compares the value at adr with the contents of rax,
1081// and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1082// The ZF is set if the compared values were equal, and cleared otherwise.
1083void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1084  if (Atomics & 2) {
1085     // caveat: no instructionmark, so this isn't relocatable.
1086     // Emit a synthetic, non-atomic, CAS equivalent.
1087     // Beware.  The synthetic form sets all ICCs, not just ZF.
1088     // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r)
1089     cmpl(rax, adr);
1090     movl(rax, adr);
1091     if (reg != rax) {
1092        Label L ;
1093        jcc(Assembler::notEqual, L);
1094        movl(adr, reg);
1095        bind(L);
1096     }
1097  } else {
1098     InstructionMark im(this);
1099     prefix(adr, reg);
1100     emit_byte(0x0F);
1101     emit_byte(0xB1);
1102     emit_operand(reg, adr);
1103  }
1104}
1105
1106void Assembler::comisd(XMMRegister dst, Address src) {
1107  // NOTE: dbx seems to decode this as comiss even though the
1108  // 0x66 is there. Strangly ucomisd comes out correct
1109  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1110  emit_byte(0x66);
1111  comiss(dst, src);
1112}
1113
1114void Assembler::comiss(XMMRegister dst, Address src) {
1115  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1116
1117  InstructionMark im(this);
1118  prefix(src, dst);
1119  emit_byte(0x0F);
1120  emit_byte(0x2F);
1121  emit_operand(dst, src);
1122}
1123
1124void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1125  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1126  emit_byte(0xF3);
1127  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1128  emit_byte(0x0F);
1129  emit_byte(0xE6);
1130  emit_byte(0xC0 | encode);
1131}
1132
1133void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1134  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1135  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1136  emit_byte(0x0F);
1137  emit_byte(0x5B);
1138  emit_byte(0xC0 | encode);
1139}
1140
1141void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1142  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1143  emit_byte(0xF2);
1144  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1145  emit_byte(0x0F);
1146  emit_byte(0x5A);
1147  emit_byte(0xC0 | encode);
1148}
1149
1150void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1151  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1152  emit_byte(0xF2);
1153  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1154  emit_byte(0x0F);
1155  emit_byte(0x2A);
1156  emit_byte(0xC0 | encode);
1157}
1158
1159void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1160  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1161  emit_byte(0xF3);
1162  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1163  emit_byte(0x0F);
1164  emit_byte(0x2A);
1165  emit_byte(0xC0 | encode);
1166}
1167
1168void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1169  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1170  emit_byte(0xF3);
1171  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1172  emit_byte(0x0F);
1173  emit_byte(0x5A);
1174  emit_byte(0xC0 | encode);
1175}
1176
1177void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1178  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1179  emit_byte(0xF2);
1180  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1181  emit_byte(0x0F);
1182  emit_byte(0x2C);
1183  emit_byte(0xC0 | encode);
1184}
1185
1186void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1187  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1188  emit_byte(0xF3);
1189  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1190  emit_byte(0x0F);
1191  emit_byte(0x2C);
1192  emit_byte(0xC0 | encode);
1193}
1194
1195void Assembler::decl(Address dst) {
1196  // Don't use it directly. Use MacroAssembler::decrement() instead.
1197  InstructionMark im(this);
1198  prefix(dst);
1199  emit_byte(0xFF);
1200  emit_operand(rcx, dst);
1201}
1202
1203void Assembler::divsd(XMMRegister dst, Address src) {
1204  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1205  InstructionMark im(this);
1206  emit_byte(0xF2);
1207  prefix(src, dst);
1208  emit_byte(0x0F);
1209  emit_byte(0x5E);
1210  emit_operand(dst, src);
1211}
1212
1213void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1214  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1215  emit_byte(0xF2);
1216  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1217  emit_byte(0x0F);
1218  emit_byte(0x5E);
1219  emit_byte(0xC0 | encode);
1220}
1221
1222void Assembler::divss(XMMRegister dst, Address src) {
1223  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1224  InstructionMark im(this);
1225  emit_byte(0xF3);
1226  prefix(src, dst);
1227  emit_byte(0x0F);
1228  emit_byte(0x5E);
1229  emit_operand(dst, src);
1230}
1231
1232void Assembler::divss(XMMRegister dst, XMMRegister src) {
1233  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1234  emit_byte(0xF3);
1235  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1236  emit_byte(0x0F);
1237  emit_byte(0x5E);
1238  emit_byte(0xC0 | encode);
1239}
1240
1241void Assembler::emms() {
1242  NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1243  emit_byte(0x0F);
1244  emit_byte(0x77);
1245}
1246
1247void Assembler::hlt() {
1248  emit_byte(0xF4);
1249}
1250
1251void Assembler::idivl(Register src) {
1252  int encode = prefix_and_encode(src->encoding());
1253  emit_byte(0xF7);
1254  emit_byte(0xF8 | encode);
1255}
1256
1257void Assembler::imull(Register dst, Register src) {
1258  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1259  emit_byte(0x0F);
1260  emit_byte(0xAF);
1261  emit_byte(0xC0 | encode);
1262}
1263
1264
1265void Assembler::imull(Register dst, Register src, int value) {
1266  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1267  if (is8bit(value)) {
1268    emit_byte(0x6B);
1269    emit_byte(0xC0 | encode);
1270    emit_byte(value);
1271  } else {
1272    emit_byte(0x69);
1273    emit_byte(0xC0 | encode);
1274    emit_long(value);
1275  }
1276}
1277
1278void Assembler::incl(Address dst) {
1279  // Don't use it directly. Use MacroAssembler::increment() instead.
1280  InstructionMark im(this);
1281  prefix(dst);
1282  emit_byte(0xFF);
1283  emit_operand(rax, dst);
1284}
1285
1286void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) {
1287  InstructionMark im(this);
1288  relocate(rtype);
1289  assert((0 <= cc) && (cc < 16), "illegal cc");
1290  if (L.is_bound()) {
1291    address dst = target(L);
1292    assert(dst != NULL, "jcc most probably wrong");
1293
1294    const int short_size = 2;
1295    const int long_size = 6;
1296    intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
1297    if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1298      // 0111 tttn #8-bit disp
1299      emit_byte(0x70 | cc);
1300      emit_byte((offs - short_size) & 0xFF);
1301    } else {
1302      // 0000 1111 1000 tttn #32-bit disp
1303      assert(is_simm32(offs - long_size),
1304             "must be 32bit offset (call4)");
1305      emit_byte(0x0F);
1306      emit_byte(0x80 | cc);
1307      emit_long(offs - long_size);
1308    }
1309  } else {
1310    // Note: could eliminate cond. jumps to this jump if condition
1311    //       is the same however, seems to be rather unlikely case.
1312    // Note: use jccb() if label to be bound is very close to get
1313    //       an 8-bit displacement
1314    L.add_patch_at(code(), locator());
1315    emit_byte(0x0F);
1316    emit_byte(0x80 | cc);
1317    emit_long(0);
1318  }
1319}
1320
1321void Assembler::jccb(Condition cc, Label& L) {
1322  if (L.is_bound()) {
1323    const int short_size = 2;
1324    address entry = target(L);
1325    assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
1326           "Dispacement too large for a short jmp");
1327    intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
1328    // 0111 tttn #8-bit disp
1329    emit_byte(0x70 | cc);
1330    emit_byte((offs - short_size) & 0xFF);
1331  } else {
1332    InstructionMark im(this);
1333    L.add_patch_at(code(), locator());
1334    emit_byte(0x70 | cc);
1335    emit_byte(0);
1336  }
1337}
1338
1339void Assembler::jmp(Address adr) {
1340  InstructionMark im(this);
1341  prefix(adr);
1342  emit_byte(0xFF);
1343  emit_operand(rsp, adr);
1344}
1345
1346void Assembler::jmp(Label& L, relocInfo::relocType rtype) {
1347  if (L.is_bound()) {
1348    address entry = target(L);
1349    assert(entry != NULL, "jmp most probably wrong");
1350    InstructionMark im(this);
1351    const int short_size = 2;
1352    const int long_size = 5;
1353    intptr_t offs = entry - _code_pos;
1354    if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1355      emit_byte(0xEB);
1356      emit_byte((offs - short_size) & 0xFF);
1357    } else {
1358      emit_byte(0xE9);
1359      emit_long(offs - long_size);
1360    }
1361  } else {
1362    // By default, forward jumps are always 32-bit displacements, since
1363    // we can't yet know where the label will be bound.  If you're sure that
1364    // the forward jump will not run beyond 256 bytes, use jmpb to
1365    // force an 8-bit displacement.
1366    InstructionMark im(this);
1367    relocate(rtype);
1368    L.add_patch_at(code(), locator());
1369    emit_byte(0xE9);
1370    emit_long(0);
1371  }
1372}
1373
1374void Assembler::jmp(Register entry) {
1375  int encode = prefix_and_encode(entry->encoding());
1376  emit_byte(0xFF);
1377  emit_byte(0xE0 | encode);
1378}
1379
1380void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1381  InstructionMark im(this);
1382  emit_byte(0xE9);
1383  assert(dest != NULL, "must have a target");
1384  intptr_t disp = dest - (_code_pos + sizeof(int32_t));
1385  assert(is_simm32(disp), "must be 32bit offset (jmp)");
1386  emit_data(disp, rspec.reloc(), call32_operand);
1387}
1388
1389void Assembler::jmpb(Label& L) {
1390  if (L.is_bound()) {
1391    const int short_size = 2;
1392    address entry = target(L);
1393    assert(is8bit((entry - _code_pos) + short_size),
1394           "Dispacement too large for a short jmp");
1395    assert(entry != NULL, "jmp most probably wrong");
1396    intptr_t offs = entry - _code_pos;
1397    emit_byte(0xEB);
1398    emit_byte((offs - short_size) & 0xFF);
1399  } else {
1400    InstructionMark im(this);
1401    L.add_patch_at(code(), locator());
1402    emit_byte(0xEB);
1403    emit_byte(0);
1404  }
1405}
1406
1407void Assembler::ldmxcsr( Address src) {
1408  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1409  InstructionMark im(this);
1410  prefix(src);
1411  emit_byte(0x0F);
1412  emit_byte(0xAE);
1413  emit_operand(as_Register(2), src);
1414}
1415
1416void Assembler::leal(Register dst, Address src) {
1417  InstructionMark im(this);
1418#ifdef _LP64
1419  emit_byte(0x67); // addr32
1420  prefix(src, dst);
1421#endif // LP64
1422  emit_byte(0x8D);
1423  emit_operand(dst, src);
1424}
1425
1426void Assembler::lock() {
1427  if (Atomics & 1) {
1428     // Emit either nothing, a NOP, or a NOP: prefix
1429     emit_byte(0x90) ;
1430  } else {
1431     emit_byte(0xF0);
1432  }
1433}
1434
1435// Serializes memory.
1436void Assembler::mfence() {
1437    // Memory barriers are only needed on multiprocessors
1438  if (os::is_MP()) {
1439    if( LP64_ONLY(true ||) VM_Version::supports_sse2() ) {
1440      emit_byte( 0x0F );                // MFENCE; faster blows no regs
1441      emit_byte( 0xAE );
1442      emit_byte( 0xF0 );
1443    } else {
1444      // All usable chips support "locked" instructions which suffice
1445      // as barriers, and are much faster than the alternative of
1446      // using cpuid instruction. We use here a locked add [esp],0.
1447      // This is conveniently otherwise a no-op except for blowing
1448      // flags (which we save and restore.)
1449      pushf();                // Save eflags register
1450      lock();
1451      addl(Address(rsp, 0), 0);// Assert the lock# signal here
1452      popf();                 // Restore eflags register
1453    }
1454  }
1455}
1456
1457void Assembler::mov(Register dst, Register src) {
1458  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1459}
1460
1461void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1462  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1463  int dstenc = dst->encoding();
1464  int srcenc = src->encoding();
1465  emit_byte(0x66);
1466  if (dstenc < 8) {
1467    if (srcenc >= 8) {
1468      prefix(REX_B);
1469      srcenc -= 8;
1470    }
1471  } else {
1472    if (srcenc < 8) {
1473      prefix(REX_R);
1474    } else {
1475      prefix(REX_RB);
1476      srcenc -= 8;
1477    }
1478    dstenc -= 8;
1479  }
1480  emit_byte(0x0F);
1481  emit_byte(0x28);
1482  emit_byte(0xC0 | dstenc << 3 | srcenc);
1483}
1484
1485void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1486  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1487  int dstenc = dst->encoding();
1488  int srcenc = src->encoding();
1489  if (dstenc < 8) {
1490    if (srcenc >= 8) {
1491      prefix(REX_B);
1492      srcenc -= 8;
1493    }
1494  } else {
1495    if (srcenc < 8) {
1496      prefix(REX_R);
1497    } else {
1498      prefix(REX_RB);
1499      srcenc -= 8;
1500    }
1501    dstenc -= 8;
1502  }
1503  emit_byte(0x0F);
1504  emit_byte(0x28);
1505  emit_byte(0xC0 | dstenc << 3 | srcenc);
1506}
1507
1508void Assembler::movb(Register dst, Address src) {
1509  NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1510  InstructionMark im(this);
1511  prefix(src, dst, true);
1512  emit_byte(0x8A);
1513  emit_operand(dst, src);
1514}
1515
1516
1517void Assembler::movb(Address dst, int imm8) {
1518  InstructionMark im(this);
1519   prefix(dst);
1520  emit_byte(0xC6);
1521  emit_operand(rax, dst, 1);
1522  emit_byte(imm8);
1523}
1524
1525
1526void Assembler::movb(Address dst, Register src) {
1527  assert(src->has_byte_register(), "must have byte register");
1528  InstructionMark im(this);
1529  prefix(dst, src, true);
1530  emit_byte(0x88);
1531  emit_operand(src, dst);
1532}
1533
1534void Assembler::movdl(XMMRegister dst, Register src) {
1535  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1536  emit_byte(0x66);
1537  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1538  emit_byte(0x0F);
1539  emit_byte(0x6E);
1540  emit_byte(0xC0 | encode);
1541}
1542
1543void Assembler::movdl(Register dst, XMMRegister src) {
1544  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1545  emit_byte(0x66);
1546  // swap src/dst to get correct prefix
1547  int encode = prefix_and_encode(src->encoding(), dst->encoding());
1548  emit_byte(0x0F);
1549  emit_byte(0x7E);
1550  emit_byte(0xC0 | encode);
1551}
1552
1553void Assembler::movdqa(XMMRegister dst, Address src) {
1554  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1555  InstructionMark im(this);
1556  emit_byte(0x66);
1557  prefix(src, dst);
1558  emit_byte(0x0F);
1559  emit_byte(0x6F);
1560  emit_operand(dst, src);
1561}
1562
1563void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1564  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1565  emit_byte(0x66);
1566  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1567  emit_byte(0x0F);
1568  emit_byte(0x6F);
1569  emit_byte(0xC0 | encode);
1570}
1571
1572void Assembler::movdqa(Address dst, XMMRegister src) {
1573  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1574  InstructionMark im(this);
1575  emit_byte(0x66);
1576  prefix(dst, src);
1577  emit_byte(0x0F);
1578  emit_byte(0x7F);
1579  emit_operand(src, dst);
1580}
1581
1582void Assembler::movdqu(XMMRegister dst, Address src) {
1583  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1584  InstructionMark im(this);
1585  emit_byte(0xF3);
1586  prefix(src, dst);
1587  emit_byte(0x0F);
1588  emit_byte(0x6F);
1589  emit_operand(dst, src);
1590}
1591
1592void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1593  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1594  emit_byte(0xF3);
1595  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1596  emit_byte(0x0F);
1597  emit_byte(0x6F);
1598  emit_byte(0xC0 | encode);
1599}
1600
1601void Assembler::movdqu(Address dst, XMMRegister src) {
1602  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1603  InstructionMark im(this);
1604  emit_byte(0xF3);
1605  prefix(dst, src);
1606  emit_byte(0x0F);
1607  emit_byte(0x7F);
1608  emit_operand(src, dst);
1609}
1610
1611// Uses zero extension on 64bit
1612
1613void Assembler::movl(Register dst, int32_t imm32) {
1614  int encode = prefix_and_encode(dst->encoding());
1615  emit_byte(0xB8 | encode);
1616  emit_long(imm32);
1617}
1618
1619void Assembler::movl(Register dst, Register src) {
1620  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1621  emit_byte(0x8B);
1622  emit_byte(0xC0 | encode);
1623}
1624
1625void Assembler::movl(Register dst, Address src) {
1626  InstructionMark im(this);
1627  prefix(src, dst);
1628  emit_byte(0x8B);
1629  emit_operand(dst, src);
1630}
1631
1632void Assembler::movl(Address dst, int32_t imm32) {
1633  InstructionMark im(this);
1634  prefix(dst);
1635  emit_byte(0xC7);
1636  emit_operand(rax, dst, 4);
1637  emit_long(imm32);
1638}
1639
1640void Assembler::movl(Address dst, Register src) {
1641  InstructionMark im(this);
1642  prefix(dst, src);
1643  emit_byte(0x89);
1644  emit_operand(src, dst);
1645}
1646
1647// New cpus require to use movsd and movss to avoid partial register stall
1648// when loading from memory. But for old Opteron use movlpd instead of movsd.
1649// The selection is done in MacroAssembler::movdbl() and movflt().
1650void Assembler::movlpd(XMMRegister dst, Address src) {
1651  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1652  InstructionMark im(this);
1653  emit_byte(0x66);
1654  prefix(src, dst);
1655  emit_byte(0x0F);
1656  emit_byte(0x12);
1657  emit_operand(dst, src);
1658}
1659
1660void Assembler::movq( MMXRegister dst, Address src ) {
1661  assert( VM_Version::supports_mmx(), "" );
1662  emit_byte(0x0F);
1663  emit_byte(0x6F);
1664  emit_operand(dst, src);
1665}
1666
1667void Assembler::movq( Address dst, MMXRegister src ) {
1668  assert( VM_Version::supports_mmx(), "" );
1669  emit_byte(0x0F);
1670  emit_byte(0x7F);
1671  // workaround gcc (3.2.1-7a) bug
1672  // In that version of gcc with only an emit_operand(MMX, Address)
1673  // gcc will tail jump and try and reverse the parameters completely
1674  // obliterating dst in the process. By having a version available
1675  // that doesn't need to swap the args at the tail jump the bug is
1676  // avoided.
1677  emit_operand(dst, src);
1678}
1679
1680void Assembler::movq(XMMRegister dst, Address src) {
1681  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1682  InstructionMark im(this);
1683  emit_byte(0xF3);
1684  prefix(src, dst);
1685  emit_byte(0x0F);
1686  emit_byte(0x7E);
1687  emit_operand(dst, src);
1688}
1689
1690void Assembler::movq(Address dst, XMMRegister src) {
1691  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1692  InstructionMark im(this);
1693  emit_byte(0x66);
1694  prefix(dst, src);
1695  emit_byte(0x0F);
1696  emit_byte(0xD6);
1697  emit_operand(src, dst);
1698}
1699
1700void Assembler::movsbl(Register dst, Address src) { // movsxb
1701  InstructionMark im(this);
1702  prefix(src, dst);
1703  emit_byte(0x0F);
1704  emit_byte(0xBE);
1705  emit_operand(dst, src);
1706}
1707
1708void Assembler::movsbl(Register dst, Register src) { // movsxb
1709  NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1710  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1711  emit_byte(0x0F);
1712  emit_byte(0xBE);
1713  emit_byte(0xC0 | encode);
1714}
1715
1716void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1717  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1718  emit_byte(0xF2);
1719  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1720  emit_byte(0x0F);
1721  emit_byte(0x10);
1722  emit_byte(0xC0 | encode);
1723}
1724
1725void Assembler::movsd(XMMRegister dst, Address src) {
1726  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1727  InstructionMark im(this);
1728  emit_byte(0xF2);
1729  prefix(src, dst);
1730  emit_byte(0x0F);
1731  emit_byte(0x10);
1732  emit_operand(dst, src);
1733}
1734
1735void Assembler::movsd(Address dst, XMMRegister src) {
1736  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1737  InstructionMark im(this);
1738  emit_byte(0xF2);
1739  prefix(dst, src);
1740  emit_byte(0x0F);
1741  emit_byte(0x11);
1742  emit_operand(src, dst);
1743}
1744
1745void Assembler::movss(XMMRegister dst, XMMRegister src) {
1746  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1747  emit_byte(0xF3);
1748  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1749  emit_byte(0x0F);
1750  emit_byte(0x10);
1751  emit_byte(0xC0 | encode);
1752}
1753
1754void Assembler::movss(XMMRegister dst, Address src) {
1755  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1756  InstructionMark im(this);
1757  emit_byte(0xF3);
1758  prefix(src, dst);
1759  emit_byte(0x0F);
1760  emit_byte(0x10);
1761  emit_operand(dst, src);
1762}
1763
1764void Assembler::movss(Address dst, XMMRegister src) {
1765  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1766  InstructionMark im(this);
1767  emit_byte(0xF3);
1768  prefix(dst, src);
1769  emit_byte(0x0F);
1770  emit_byte(0x11);
1771  emit_operand(src, dst);
1772}
1773
1774void Assembler::movswl(Register dst, Address src) { // movsxw
1775  InstructionMark im(this);
1776  prefix(src, dst);
1777  emit_byte(0x0F);
1778  emit_byte(0xBF);
1779  emit_operand(dst, src);
1780}
1781
1782void Assembler::movswl(Register dst, Register src) { // movsxw
1783  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1784  emit_byte(0x0F);
1785  emit_byte(0xBF);
1786  emit_byte(0xC0 | encode);
1787}
1788
1789void Assembler::movw(Address dst, int imm16) {
1790  InstructionMark im(this);
1791
1792  emit_byte(0x66); // switch to 16-bit mode
1793  prefix(dst);
1794  emit_byte(0xC7);
1795  emit_operand(rax, dst, 2);
1796  emit_word(imm16);
1797}
1798
1799void Assembler::movw(Register dst, Address src) {
1800  InstructionMark im(this);
1801  emit_byte(0x66);
1802  prefix(src, dst);
1803  emit_byte(0x8B);
1804  emit_operand(dst, src);
1805}
1806
1807void Assembler::movw(Address dst, Register src) {
1808  InstructionMark im(this);
1809  emit_byte(0x66);
1810  prefix(dst, src);
1811  emit_byte(0x89);
1812  emit_operand(src, dst);
1813}
1814
1815void Assembler::movzbl(Register dst, Address src) { // movzxb
1816  InstructionMark im(this);
1817  prefix(src, dst);
1818  emit_byte(0x0F);
1819  emit_byte(0xB6);
1820  emit_operand(dst, src);
1821}
1822
1823void Assembler::movzbl(Register dst, Register src) { // movzxb
1824  NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1825  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1826  emit_byte(0x0F);
1827  emit_byte(0xB6);
1828  emit_byte(0xC0 | encode);
1829}
1830
1831void Assembler::movzwl(Register dst, Address src) { // movzxw
1832  InstructionMark im(this);
1833  prefix(src, dst);
1834  emit_byte(0x0F);
1835  emit_byte(0xB7);
1836  emit_operand(dst, src);
1837}
1838
1839void Assembler::movzwl(Register dst, Register src) { // movzxw
1840  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1841  emit_byte(0x0F);
1842  emit_byte(0xB7);
1843  emit_byte(0xC0 | encode);
1844}
1845
1846void Assembler::mull(Address src) {
1847  InstructionMark im(this);
1848  prefix(src);
1849  emit_byte(0xF7);
1850  emit_operand(rsp, src);
1851}
1852
1853void Assembler::mull(Register src) {
1854  int encode = prefix_and_encode(src->encoding());
1855  emit_byte(0xF7);
1856  emit_byte(0xE0 | encode);
1857}
1858
1859void Assembler::mulsd(XMMRegister dst, Address src) {
1860  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1861  InstructionMark im(this);
1862  emit_byte(0xF2);
1863  prefix(src, dst);
1864  emit_byte(0x0F);
1865  emit_byte(0x59);
1866  emit_operand(dst, src);
1867}
1868
1869void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
1870  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1871  emit_byte(0xF2);
1872  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1873  emit_byte(0x0F);
1874  emit_byte(0x59);
1875  emit_byte(0xC0 | encode);
1876}
1877
1878void Assembler::mulss(XMMRegister dst, Address src) {
1879  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1880  InstructionMark im(this);
1881  emit_byte(0xF3);
1882  prefix(src, dst);
1883  emit_byte(0x0F);
1884  emit_byte(0x59);
1885  emit_operand(dst, src);
1886}
1887
1888void Assembler::mulss(XMMRegister dst, XMMRegister src) {
1889  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1890  emit_byte(0xF3);
1891  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1892  emit_byte(0x0F);
1893  emit_byte(0x59);
1894  emit_byte(0xC0 | encode);
1895}
1896
1897void Assembler::negl(Register dst) {
1898  int encode = prefix_and_encode(dst->encoding());
1899  emit_byte(0xF7);
1900  emit_byte(0xD8 | encode);
1901}
1902
1903void Assembler::nop(int i) {
1904#ifdef ASSERT
1905  assert(i > 0, " ");
1906  // The fancy nops aren't currently recognized by debuggers making it a
1907  // pain to disassemble code while debugging. If asserts are on clearly
1908  // speed is not an issue so simply use the single byte traditional nop
1909  // to do alignment.
1910
1911  for (; i > 0 ; i--) emit_byte(0x90);
1912  return;
1913
1914#endif // ASSERT
1915
1916  if (UseAddressNop && VM_Version::is_intel()) {
1917    //
1918    // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
1919    //  1: 0x90
1920    //  2: 0x66 0x90
1921    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1922    //  4: 0x0F 0x1F 0x40 0x00
1923    //  5: 0x0F 0x1F 0x44 0x00 0x00
1924    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1925    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1926    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1927    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1928    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1929    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1930
1931    // The rest coding is Intel specific - don't use consecutive address nops
1932
1933    // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1934    // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1935    // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1936    // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1937
1938    while(i >= 15) {
1939      // For Intel don't generate consecutive addess nops (mix with regular nops)
1940      i -= 15;
1941      emit_byte(0x66);   // size prefix
1942      emit_byte(0x66);   // size prefix
1943      emit_byte(0x66);   // size prefix
1944      addr_nop_8();
1945      emit_byte(0x66);   // size prefix
1946      emit_byte(0x66);   // size prefix
1947      emit_byte(0x66);   // size prefix
1948      emit_byte(0x90);   // nop
1949    }
1950    switch (i) {
1951      case 14:
1952        emit_byte(0x66); // size prefix
1953      case 13:
1954        emit_byte(0x66); // size prefix
1955      case 12:
1956        addr_nop_8();
1957        emit_byte(0x66); // size prefix
1958        emit_byte(0x66); // size prefix
1959        emit_byte(0x66); // size prefix
1960        emit_byte(0x90); // nop
1961        break;
1962      case 11:
1963        emit_byte(0x66); // size prefix
1964      case 10:
1965        emit_byte(0x66); // size prefix
1966      case 9:
1967        emit_byte(0x66); // size prefix
1968      case 8:
1969        addr_nop_8();
1970        break;
1971      case 7:
1972        addr_nop_7();
1973        break;
1974      case 6:
1975        emit_byte(0x66); // size prefix
1976      case 5:
1977        addr_nop_5();
1978        break;
1979      case 4:
1980        addr_nop_4();
1981        break;
1982      case 3:
1983        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
1984        emit_byte(0x66); // size prefix
1985      case 2:
1986        emit_byte(0x66); // size prefix
1987      case 1:
1988        emit_byte(0x90); // nop
1989        break;
1990      default:
1991        assert(i == 0, " ");
1992    }
1993    return;
1994  }
1995  if (UseAddressNop && VM_Version::is_amd()) {
1996    //
1997    // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
1998    //  1: 0x90
1999    //  2: 0x66 0x90
2000    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2001    //  4: 0x0F 0x1F 0x40 0x00
2002    //  5: 0x0F 0x1F 0x44 0x00 0x00
2003    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2004    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2005    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2006    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2007    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2008    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2009
2010    // The rest coding is AMD specific - use consecutive address nops
2011
2012    // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2013    // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2014    // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2015    // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2016    // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2017    //     Size prefixes (0x66) are added for larger sizes
2018
2019    while(i >= 22) {
2020      i -= 11;
2021      emit_byte(0x66); // size prefix
2022      emit_byte(0x66); // size prefix
2023      emit_byte(0x66); // size prefix
2024      addr_nop_8();
2025    }
2026    // Generate first nop for size between 21-12
2027    switch (i) {
2028      case 21:
2029        i -= 1;
2030        emit_byte(0x66); // size prefix
2031      case 20:
2032      case 19:
2033        i -= 1;
2034        emit_byte(0x66); // size prefix
2035      case 18:
2036      case 17:
2037        i -= 1;
2038        emit_byte(0x66); // size prefix
2039      case 16:
2040      case 15:
2041        i -= 8;
2042        addr_nop_8();
2043        break;
2044      case 14:
2045      case 13:
2046        i -= 7;
2047        addr_nop_7();
2048        break;
2049      case 12:
2050        i -= 6;
2051        emit_byte(0x66); // size prefix
2052        addr_nop_5();
2053        break;
2054      default:
2055        assert(i < 12, " ");
2056    }
2057
2058    // Generate second nop for size between 11-1
2059    switch (i) {
2060      case 11:
2061        emit_byte(0x66); // size prefix
2062      case 10:
2063        emit_byte(0x66); // size prefix
2064      case 9:
2065        emit_byte(0x66); // size prefix
2066      case 8:
2067        addr_nop_8();
2068        break;
2069      case 7:
2070        addr_nop_7();
2071        break;
2072      case 6:
2073        emit_byte(0x66); // size prefix
2074      case 5:
2075        addr_nop_5();
2076        break;
2077      case 4:
2078        addr_nop_4();
2079        break;
2080      case 3:
2081        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2082        emit_byte(0x66); // size prefix
2083      case 2:
2084        emit_byte(0x66); // size prefix
2085      case 1:
2086        emit_byte(0x90); // nop
2087        break;
2088      default:
2089        assert(i == 0, " ");
2090    }
2091    return;
2092  }
2093
2094  // Using nops with size prefixes "0x66 0x90".
2095  // From AMD Optimization Guide:
2096  //  1: 0x90
2097  //  2: 0x66 0x90
2098  //  3: 0x66 0x66 0x90
2099  //  4: 0x66 0x66 0x66 0x90
2100  //  5: 0x66 0x66 0x90 0x66 0x90
2101  //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2102  //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2103  //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2104  //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2105  // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2106  //
2107  while(i > 12) {
2108    i -= 4;
2109    emit_byte(0x66); // size prefix
2110    emit_byte(0x66);
2111    emit_byte(0x66);
2112    emit_byte(0x90); // nop
2113  }
2114  // 1 - 12 nops
2115  if(i > 8) {
2116    if(i > 9) {
2117      i -= 1;
2118      emit_byte(0x66);
2119    }
2120    i -= 3;
2121    emit_byte(0x66);
2122    emit_byte(0x66);
2123    emit_byte(0x90);
2124  }
2125  // 1 - 8 nops
2126  if(i > 4) {
2127    if(i > 6) {
2128      i -= 1;
2129      emit_byte(0x66);
2130    }
2131    i -= 3;
2132    emit_byte(0x66);
2133    emit_byte(0x66);
2134    emit_byte(0x90);
2135  }
2136  switch (i) {
2137    case 4:
2138      emit_byte(0x66);
2139    case 3:
2140      emit_byte(0x66);
2141    case 2:
2142      emit_byte(0x66);
2143    case 1:
2144      emit_byte(0x90);
2145      break;
2146    default:
2147      assert(i == 0, " ");
2148  }
2149}
2150
2151void Assembler::notl(Register dst) {
2152  int encode = prefix_and_encode(dst->encoding());
2153  emit_byte(0xF7);
2154  emit_byte(0xD0 | encode );
2155}
2156
2157void Assembler::orl(Address dst, int32_t imm32) {
2158  InstructionMark im(this);
2159  prefix(dst);
2160  emit_byte(0x81);
2161  emit_operand(rcx, dst, 4);
2162  emit_long(imm32);
2163}
2164
2165void Assembler::orl(Register dst, int32_t imm32) {
2166  prefix(dst);
2167  emit_arith(0x81, 0xC8, dst, imm32);
2168}
2169
2170
2171void Assembler::orl(Register dst, Address src) {
2172  InstructionMark im(this);
2173  prefix(src, dst);
2174  emit_byte(0x0B);
2175  emit_operand(dst, src);
2176}
2177
2178
2179void Assembler::orl(Register dst, Register src) {
2180  (void) prefix_and_encode(dst->encoding(), src->encoding());
2181  emit_arith(0x0B, 0xC0, dst, src);
2182}
2183
2184// generic
2185void Assembler::pop(Register dst) {
2186  int encode = prefix_and_encode(dst->encoding());
2187  emit_byte(0x58 | encode);
2188}
2189
2190void Assembler::popf() {
2191  emit_byte(0x9D);
2192}
2193
2194void Assembler::popl(Address dst) {
2195  // NOTE: this will adjust stack by 8byte on 64bits
2196  InstructionMark im(this);
2197  prefix(dst);
2198  emit_byte(0x8F);
2199  emit_operand(rax, dst);
2200}
2201
2202void Assembler::prefetch_prefix(Address src) {
2203  prefix(src);
2204  emit_byte(0x0F);
2205}
2206
2207void Assembler::prefetchnta(Address src) {
2208  NOT_LP64(assert(VM_Version::supports_sse2(), "must support"));
2209  InstructionMark im(this);
2210  prefetch_prefix(src);
2211  emit_byte(0x18);
2212  emit_operand(rax, src); // 0, src
2213}
2214
2215void Assembler::prefetchr(Address src) {
2216  NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2217  InstructionMark im(this);
2218  prefetch_prefix(src);
2219  emit_byte(0x0D);
2220  emit_operand(rax, src); // 0, src
2221}
2222
2223void Assembler::prefetcht0(Address src) {
2224  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2225  InstructionMark im(this);
2226  prefetch_prefix(src);
2227  emit_byte(0x18);
2228  emit_operand(rcx, src); // 1, src
2229}
2230
2231void Assembler::prefetcht1(Address src) {
2232  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2233  InstructionMark im(this);
2234  prefetch_prefix(src);
2235  emit_byte(0x18);
2236  emit_operand(rdx, src); // 2, src
2237}
2238
2239void Assembler::prefetcht2(Address src) {
2240  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2241  InstructionMark im(this);
2242  prefetch_prefix(src);
2243  emit_byte(0x18);
2244  emit_operand(rbx, src); // 3, src
2245}
2246
2247void Assembler::prefetchw(Address src) {
2248  NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2249  InstructionMark im(this);
2250  prefetch_prefix(src);
2251  emit_byte(0x0D);
2252  emit_operand(rcx, src); // 1, src
2253}
2254
2255void Assembler::prefix(Prefix p) {
2256  a_byte(p);
2257}
2258
2259void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2260  assert(isByte(mode), "invalid value");
2261  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2262
2263  emit_byte(0x66);
2264  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2265  emit_byte(0x0F);
2266  emit_byte(0x70);
2267  emit_byte(0xC0 | encode);
2268  emit_byte(mode & 0xFF);
2269
2270}
2271
2272void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2273  assert(isByte(mode), "invalid value");
2274  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2275
2276  InstructionMark im(this);
2277  emit_byte(0x66);
2278  prefix(src, dst);
2279  emit_byte(0x0F);
2280  emit_byte(0x70);
2281  emit_operand(dst, src);
2282  emit_byte(mode & 0xFF);
2283}
2284
2285void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2286  assert(isByte(mode), "invalid value");
2287  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2288
2289  emit_byte(0xF2);
2290  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2291  emit_byte(0x0F);
2292  emit_byte(0x70);
2293  emit_byte(0xC0 | encode);
2294  emit_byte(mode & 0xFF);
2295}
2296
2297void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2298  assert(isByte(mode), "invalid value");
2299  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2300
2301  InstructionMark im(this);
2302  emit_byte(0xF2);
2303  prefix(src, dst); // QQ new
2304  emit_byte(0x0F);
2305  emit_byte(0x70);
2306  emit_operand(dst, src);
2307  emit_byte(mode & 0xFF);
2308}
2309
2310void Assembler::psrlq(XMMRegister dst, int shift) {
2311  // HMM Table D-1 says sse2 or mmx
2312  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2313
2314  int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
2315  emit_byte(0x66);
2316  emit_byte(0x0F);
2317  emit_byte(0x73);
2318  emit_byte(0xC0 | encode);
2319  emit_byte(shift);
2320}
2321
2322void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2323  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2324  emit_byte(0x66);
2325  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2326  emit_byte(0x0F);
2327  emit_byte(0x60);
2328  emit_byte(0xC0 | encode);
2329}
2330
2331void Assembler::push(int32_t imm32) {
2332  // in 64bits we push 64bits onto the stack but only
2333  // take a 32bit immediate
2334  emit_byte(0x68);
2335  emit_long(imm32);
2336}
2337
2338void Assembler::push(Register src) {
2339  int encode = prefix_and_encode(src->encoding());
2340
2341  emit_byte(0x50 | encode);
2342}
2343
2344void Assembler::pushf() {
2345  emit_byte(0x9C);
2346}
2347
2348void Assembler::pushl(Address src) {
2349  // Note this will push 64bit on 64bit
2350  InstructionMark im(this);
2351  prefix(src);
2352  emit_byte(0xFF);
2353  emit_operand(rsi, src);
2354}
2355
2356void Assembler::pxor(XMMRegister dst, Address src) {
2357  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2358  InstructionMark im(this);
2359  emit_byte(0x66);
2360  prefix(src, dst);
2361  emit_byte(0x0F);
2362  emit_byte(0xEF);
2363  emit_operand(dst, src);
2364}
2365
2366void Assembler::pxor(XMMRegister dst, XMMRegister src) {
2367  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2368  InstructionMark im(this);
2369  emit_byte(0x66);
2370  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2371  emit_byte(0x0F);
2372  emit_byte(0xEF);
2373  emit_byte(0xC0 | encode);
2374}
2375
2376void Assembler::rcll(Register dst, int imm8) {
2377  assert(isShiftCount(imm8), "illegal shift count");
2378  int encode = prefix_and_encode(dst->encoding());
2379  if (imm8 == 1) {
2380    emit_byte(0xD1);
2381    emit_byte(0xD0 | encode);
2382  } else {
2383    emit_byte(0xC1);
2384    emit_byte(0xD0 | encode);
2385    emit_byte(imm8);
2386  }
2387}
2388
2389// copies data from [esi] to [edi] using rcx pointer sized words
2390// generic
2391void Assembler::rep_mov() {
2392  emit_byte(0xF3);
2393  // MOVSQ
2394  LP64_ONLY(prefix(REX_W));
2395  emit_byte(0xA5);
2396}
2397
2398// sets rcx pointer sized words with rax, value at [edi]
2399// generic
2400void Assembler::rep_set() { // rep_set
2401  emit_byte(0xF3);
2402  // STOSQ
2403  LP64_ONLY(prefix(REX_W));
2404  emit_byte(0xAB);
2405}
2406
2407// scans rcx pointer sized words at [edi] for occurance of rax,
2408// generic
2409void Assembler::repne_scan() { // repne_scan
2410  emit_byte(0xF2);
2411  // SCASQ
2412  LP64_ONLY(prefix(REX_W));
2413  emit_byte(0xAF);
2414}
2415
2416#ifdef _LP64
2417// scans rcx 4 byte words at [edi] for occurance of rax,
2418// generic
2419void Assembler::repne_scanl() { // repne_scan
2420  emit_byte(0xF2);
2421  // SCASL
2422  emit_byte(0xAF);
2423}
2424#endif
2425
2426void Assembler::ret(int imm16) {
2427  if (imm16 == 0) {
2428    emit_byte(0xC3);
2429  } else {
2430    emit_byte(0xC2);
2431    emit_word(imm16);
2432  }
2433}
2434
2435void Assembler::sahf() {
2436#ifdef _LP64
2437  // Not supported in 64bit mode
2438  ShouldNotReachHere();
2439#endif
2440  emit_byte(0x9E);
2441}
2442
2443void Assembler::sarl(Register dst, int imm8) {
2444  int encode = prefix_and_encode(dst->encoding());
2445  assert(isShiftCount(imm8), "illegal shift count");
2446  if (imm8 == 1) {
2447    emit_byte(0xD1);
2448    emit_byte(0xF8 | encode);
2449  } else {
2450    emit_byte(0xC1);
2451    emit_byte(0xF8 | encode);
2452    emit_byte(imm8);
2453  }
2454}
2455
2456void Assembler::sarl(Register dst) {
2457  int encode = prefix_and_encode(dst->encoding());
2458  emit_byte(0xD3);
2459  emit_byte(0xF8 | encode);
2460}
2461
2462void Assembler::sbbl(Address dst, int32_t imm32) {
2463  InstructionMark im(this);
2464  prefix(dst);
2465  emit_arith_operand(0x81, rbx, dst, imm32);
2466}
2467
2468void Assembler::sbbl(Register dst, int32_t imm32) {
2469  prefix(dst);
2470  emit_arith(0x81, 0xD8, dst, imm32);
2471}
2472
2473
2474void Assembler::sbbl(Register dst, Address src) {
2475  InstructionMark im(this);
2476  prefix(src, dst);
2477  emit_byte(0x1B);
2478  emit_operand(dst, src);
2479}
2480
2481void Assembler::sbbl(Register dst, Register src) {
2482  (void) prefix_and_encode(dst->encoding(), src->encoding());
2483  emit_arith(0x1B, 0xC0, dst, src);
2484}
2485
2486void Assembler::setb(Condition cc, Register dst) {
2487  assert(0 <= cc && cc < 16, "illegal cc");
2488  int encode = prefix_and_encode(dst->encoding(), true);
2489  emit_byte(0x0F);
2490  emit_byte(0x90 | cc);
2491  emit_byte(0xC0 | encode);
2492}
2493
2494void Assembler::shll(Register dst, int imm8) {
2495  assert(isShiftCount(imm8), "illegal shift count");
2496  int encode = prefix_and_encode(dst->encoding());
2497  if (imm8 == 1 ) {
2498    emit_byte(0xD1);
2499    emit_byte(0xE0 | encode);
2500  } else {
2501    emit_byte(0xC1);
2502    emit_byte(0xE0 | encode);
2503    emit_byte(imm8);
2504  }
2505}
2506
2507void Assembler::shll(Register dst) {
2508  int encode = prefix_and_encode(dst->encoding());
2509  emit_byte(0xD3);
2510  emit_byte(0xE0 | encode);
2511}
2512
2513void Assembler::shrl(Register dst, int imm8) {
2514  assert(isShiftCount(imm8), "illegal shift count");
2515  int encode = prefix_and_encode(dst->encoding());
2516  emit_byte(0xC1);
2517  emit_byte(0xE8 | encode);
2518  emit_byte(imm8);
2519}
2520
2521void Assembler::shrl(Register dst) {
2522  int encode = prefix_and_encode(dst->encoding());
2523  emit_byte(0xD3);
2524  emit_byte(0xE8 | encode);
2525}
2526
2527// copies a single word from [esi] to [edi]
2528void Assembler::smovl() {
2529  emit_byte(0xA5);
2530}
2531
2532void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2533  // HMM Table D-1 says sse2
2534  // NOT_LP64(assert(VM_Version::supports_sse(), ""));
2535  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2536  emit_byte(0xF2);
2537  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2538  emit_byte(0x0F);
2539  emit_byte(0x51);
2540  emit_byte(0xC0 | encode);
2541}
2542
2543void Assembler::stmxcsr( Address dst) {
2544  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2545  InstructionMark im(this);
2546  prefix(dst);
2547  emit_byte(0x0F);
2548  emit_byte(0xAE);
2549  emit_operand(as_Register(3), dst);
2550}
2551
2552void Assembler::subl(Address dst, int32_t imm32) {
2553  InstructionMark im(this);
2554  prefix(dst);
2555  if (is8bit(imm32)) {
2556    emit_byte(0x83);
2557    emit_operand(rbp, dst, 1);
2558    emit_byte(imm32 & 0xFF);
2559  } else {
2560    emit_byte(0x81);
2561    emit_operand(rbp, dst, 4);
2562    emit_long(imm32);
2563  }
2564}
2565
2566void Assembler::subl(Register dst, int32_t imm32) {
2567  prefix(dst);
2568  emit_arith(0x81, 0xE8, dst, imm32);
2569}
2570
2571void Assembler::subl(Address dst, Register src) {
2572  InstructionMark im(this);
2573  prefix(dst, src);
2574  emit_byte(0x29);
2575  emit_operand(src, dst);
2576}
2577
2578void Assembler::subl(Register dst, Address src) {
2579  InstructionMark im(this);
2580  prefix(src, dst);
2581  emit_byte(0x2B);
2582  emit_operand(dst, src);
2583}
2584
2585void Assembler::subl(Register dst, Register src) {
2586  (void) prefix_and_encode(dst->encoding(), src->encoding());
2587  emit_arith(0x2B, 0xC0, dst, src);
2588}
2589
2590void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2591  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2592  emit_byte(0xF2);
2593  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2594  emit_byte(0x0F);
2595  emit_byte(0x5C);
2596  emit_byte(0xC0 | encode);
2597}
2598
2599void Assembler::subsd(XMMRegister dst, Address src) {
2600  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2601  InstructionMark im(this);
2602  emit_byte(0xF2);
2603  prefix(src, dst);
2604  emit_byte(0x0F);
2605  emit_byte(0x5C);
2606  emit_operand(dst, src);
2607}
2608
2609void Assembler::subss(XMMRegister dst, XMMRegister src) {
2610  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2611  emit_byte(0xF3);
2612  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2613  emit_byte(0x0F);
2614  emit_byte(0x5C);
2615  emit_byte(0xC0 | encode);
2616}
2617
2618void Assembler::subss(XMMRegister dst, Address src) {
2619  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2620  InstructionMark im(this);
2621  emit_byte(0xF3);
2622  prefix(src, dst);
2623  emit_byte(0x0F);
2624  emit_byte(0x5C);
2625  emit_operand(dst, src);
2626}
2627
2628void Assembler::testb(Register dst, int imm8) {
2629  NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2630  (void) prefix_and_encode(dst->encoding(), true);
2631  emit_arith_b(0xF6, 0xC0, dst, imm8);
2632}
2633
2634void Assembler::testl(Register dst, int32_t imm32) {
2635  // not using emit_arith because test
2636  // doesn't support sign-extension of
2637  // 8bit operands
2638  int encode = dst->encoding();
2639  if (encode == 0) {
2640    emit_byte(0xA9);
2641  } else {
2642    encode = prefix_and_encode(encode);
2643    emit_byte(0xF7);
2644    emit_byte(0xC0 | encode);
2645  }
2646  emit_long(imm32);
2647}
2648
2649void Assembler::testl(Register dst, Register src) {
2650  (void) prefix_and_encode(dst->encoding(), src->encoding());
2651  emit_arith(0x85, 0xC0, dst, src);
2652}
2653
2654void Assembler::testl(Register dst, Address  src) {
2655  InstructionMark im(this);
2656  prefix(src, dst);
2657  emit_byte(0x85);
2658  emit_operand(dst, src);
2659}
2660
2661void Assembler::ucomisd(XMMRegister dst, Address src) {
2662  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2663  emit_byte(0x66);
2664  ucomiss(dst, src);
2665}
2666
2667void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2668  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2669  emit_byte(0x66);
2670  ucomiss(dst, src);
2671}
2672
2673void Assembler::ucomiss(XMMRegister dst, Address src) {
2674  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2675
2676  InstructionMark im(this);
2677  prefix(src, dst);
2678  emit_byte(0x0F);
2679  emit_byte(0x2E);
2680  emit_operand(dst, src);
2681}
2682
2683void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2684  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2685  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2686  emit_byte(0x0F);
2687  emit_byte(0x2E);
2688  emit_byte(0xC0 | encode);
2689}
2690
2691
2692void Assembler::xaddl(Address dst, Register src) {
2693  InstructionMark im(this);
2694  prefix(dst, src);
2695  emit_byte(0x0F);
2696  emit_byte(0xC1);
2697  emit_operand(src, dst);
2698}
2699
2700void Assembler::xchgl(Register dst, Address src) { // xchg
2701  InstructionMark im(this);
2702  prefix(src, dst);
2703  emit_byte(0x87);
2704  emit_operand(dst, src);
2705}
2706
2707void Assembler::xchgl(Register dst, Register src) {
2708  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2709  emit_byte(0x87);
2710  emit_byte(0xc0 | encode);
2711}
2712
2713void Assembler::xorl(Register dst, int32_t imm32) {
2714  prefix(dst);
2715  emit_arith(0x81, 0xF0, dst, imm32);
2716}
2717
2718void Assembler::xorl(Register dst, Address src) {
2719  InstructionMark im(this);
2720  prefix(src, dst);
2721  emit_byte(0x33);
2722  emit_operand(dst, src);
2723}
2724
2725void Assembler::xorl(Register dst, Register src) {
2726  (void) prefix_and_encode(dst->encoding(), src->encoding());
2727  emit_arith(0x33, 0xC0, dst, src);
2728}
2729
2730void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
2731  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2732  emit_byte(0x66);
2733  xorps(dst, src);
2734}
2735
2736void Assembler::xorpd(XMMRegister dst, Address src) {
2737  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2738  InstructionMark im(this);
2739  emit_byte(0x66);
2740  prefix(src, dst);
2741  emit_byte(0x0F);
2742  emit_byte(0x57);
2743  emit_operand(dst, src);
2744}
2745
2746
2747void Assembler::xorps(XMMRegister dst, XMMRegister src) {
2748  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2749  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2750  emit_byte(0x0F);
2751  emit_byte(0x57);
2752  emit_byte(0xC0 | encode);
2753}
2754
2755void Assembler::xorps(XMMRegister dst, Address src) {
2756  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2757  InstructionMark im(this);
2758  prefix(src, dst);
2759  emit_byte(0x0F);
2760  emit_byte(0x57);
2761  emit_operand(dst, src);
2762}
2763
2764#ifndef _LP64
2765// 32bit only pieces of the assembler
2766
2767void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
2768  // NO PREFIX AS NEVER 64BIT
2769  InstructionMark im(this);
2770  emit_byte(0x81);
2771  emit_byte(0xF8 | src1->encoding());
2772  emit_data(imm32, rspec, 0);
2773}
2774
2775void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
2776  // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
2777  InstructionMark im(this);
2778  emit_byte(0x81);
2779  emit_operand(rdi, src1);
2780  emit_data(imm32, rspec, 0);
2781}
2782
2783// The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
2784// and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
2785// into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
2786void Assembler::cmpxchg8(Address adr) {
2787  InstructionMark im(this);
2788  emit_byte(0x0F);
2789  emit_byte(0xc7);
2790  emit_operand(rcx, adr);
2791}
2792
2793void Assembler::decl(Register dst) {
2794  // Don't use it directly. Use MacroAssembler::decrementl() instead.
2795 emit_byte(0x48 | dst->encoding());
2796}
2797
2798#endif // _LP64
2799
2800// 64bit typically doesn't use the x87 but needs to for the trig funcs
2801
2802void Assembler::fabs() {
2803  emit_byte(0xD9);
2804  emit_byte(0xE1);
2805}
2806
2807void Assembler::fadd(int i) {
2808  emit_farith(0xD8, 0xC0, i);
2809}
2810
2811void Assembler::fadd_d(Address src) {
2812  InstructionMark im(this);
2813  emit_byte(0xDC);
2814  emit_operand32(rax, src);
2815}
2816
2817void Assembler::fadd_s(Address src) {
2818  InstructionMark im(this);
2819  emit_byte(0xD8);
2820  emit_operand32(rax, src);
2821}
2822
2823void Assembler::fadda(int i) {
2824  emit_farith(0xDC, 0xC0, i);
2825}
2826
2827void Assembler::faddp(int i) {
2828  emit_farith(0xDE, 0xC0, i);
2829}
2830
2831void Assembler::fchs() {
2832  emit_byte(0xD9);
2833  emit_byte(0xE0);
2834}
2835
2836void Assembler::fcom(int i) {
2837  emit_farith(0xD8, 0xD0, i);
2838}
2839
2840void Assembler::fcomp(int i) {
2841  emit_farith(0xD8, 0xD8, i);
2842}
2843
2844void Assembler::fcomp_d(Address src) {
2845  InstructionMark im(this);
2846  emit_byte(0xDC);
2847  emit_operand32(rbx, src);
2848}
2849
2850void Assembler::fcomp_s(Address src) {
2851  InstructionMark im(this);
2852  emit_byte(0xD8);
2853  emit_operand32(rbx, src);
2854}
2855
2856void Assembler::fcompp() {
2857  emit_byte(0xDE);
2858  emit_byte(0xD9);
2859}
2860
2861void Assembler::fcos() {
2862  emit_byte(0xD9);
2863  emit_byte(0xFF);
2864}
2865
2866void Assembler::fdecstp() {
2867  emit_byte(0xD9);
2868  emit_byte(0xF6);
2869}
2870
2871void Assembler::fdiv(int i) {
2872  emit_farith(0xD8, 0xF0, i);
2873}
2874
2875void Assembler::fdiv_d(Address src) {
2876  InstructionMark im(this);
2877  emit_byte(0xDC);
2878  emit_operand32(rsi, src);
2879}
2880
2881void Assembler::fdiv_s(Address src) {
2882  InstructionMark im(this);
2883  emit_byte(0xD8);
2884  emit_operand32(rsi, src);
2885}
2886
2887void Assembler::fdiva(int i) {
2888  emit_farith(0xDC, 0xF8, i);
2889}
2890
2891// Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
2892//       is erroneous for some of the floating-point instructions below.
2893
2894void Assembler::fdivp(int i) {
2895  emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
2896}
2897
2898void Assembler::fdivr(int i) {
2899  emit_farith(0xD8, 0xF8, i);
2900}
2901
2902void Assembler::fdivr_d(Address src) {
2903  InstructionMark im(this);
2904  emit_byte(0xDC);
2905  emit_operand32(rdi, src);
2906}
2907
2908void Assembler::fdivr_s(Address src) {
2909  InstructionMark im(this);
2910  emit_byte(0xD8);
2911  emit_operand32(rdi, src);
2912}
2913
2914void Assembler::fdivra(int i) {
2915  emit_farith(0xDC, 0xF0, i);
2916}
2917
2918void Assembler::fdivrp(int i) {
2919  emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
2920}
2921
2922void Assembler::ffree(int i) {
2923  emit_farith(0xDD, 0xC0, i);
2924}
2925
2926void Assembler::fild_d(Address adr) {
2927  InstructionMark im(this);
2928  emit_byte(0xDF);
2929  emit_operand32(rbp, adr);
2930}
2931
2932void Assembler::fild_s(Address adr) {
2933  InstructionMark im(this);
2934  emit_byte(0xDB);
2935  emit_operand32(rax, adr);
2936}
2937
2938void Assembler::fincstp() {
2939  emit_byte(0xD9);
2940  emit_byte(0xF7);
2941}
2942
2943void Assembler::finit() {
2944  emit_byte(0x9B);
2945  emit_byte(0xDB);
2946  emit_byte(0xE3);
2947}
2948
2949void Assembler::fist_s(Address adr) {
2950  InstructionMark im(this);
2951  emit_byte(0xDB);
2952  emit_operand32(rdx, adr);
2953}
2954
2955void Assembler::fistp_d(Address adr) {
2956  InstructionMark im(this);
2957  emit_byte(0xDF);
2958  emit_operand32(rdi, adr);
2959}
2960
2961void Assembler::fistp_s(Address adr) {
2962  InstructionMark im(this);
2963  emit_byte(0xDB);
2964  emit_operand32(rbx, adr);
2965}
2966
2967void Assembler::fld1() {
2968  emit_byte(0xD9);
2969  emit_byte(0xE8);
2970}
2971
2972void Assembler::fld_d(Address adr) {
2973  InstructionMark im(this);
2974  emit_byte(0xDD);
2975  emit_operand32(rax, adr);
2976}
2977
2978void Assembler::fld_s(Address adr) {
2979  InstructionMark im(this);
2980  emit_byte(0xD9);
2981  emit_operand32(rax, adr);
2982}
2983
2984
2985void Assembler::fld_s(int index) {
2986  emit_farith(0xD9, 0xC0, index);
2987}
2988
2989void Assembler::fld_x(Address adr) {
2990  InstructionMark im(this);
2991  emit_byte(0xDB);
2992  emit_operand32(rbp, adr);
2993}
2994
2995void Assembler::fldcw(Address src) {
2996  InstructionMark im(this);
2997  emit_byte(0xd9);
2998  emit_operand32(rbp, src);
2999}
3000
3001void Assembler::fldenv(Address src) {
3002  InstructionMark im(this);
3003  emit_byte(0xD9);
3004  emit_operand32(rsp, src);
3005}
3006
3007void Assembler::fldlg2() {
3008  emit_byte(0xD9);
3009  emit_byte(0xEC);
3010}
3011
3012void Assembler::fldln2() {
3013  emit_byte(0xD9);
3014  emit_byte(0xED);
3015}
3016
3017void Assembler::fldz() {
3018  emit_byte(0xD9);
3019  emit_byte(0xEE);
3020}
3021
3022void Assembler::flog() {
3023  fldln2();
3024  fxch();
3025  fyl2x();
3026}
3027
3028void Assembler::flog10() {
3029  fldlg2();
3030  fxch();
3031  fyl2x();
3032}
3033
3034void Assembler::fmul(int i) {
3035  emit_farith(0xD8, 0xC8, i);
3036}
3037
3038void Assembler::fmul_d(Address src) {
3039  InstructionMark im(this);
3040  emit_byte(0xDC);
3041  emit_operand32(rcx, src);
3042}
3043
3044void Assembler::fmul_s(Address src) {
3045  InstructionMark im(this);
3046  emit_byte(0xD8);
3047  emit_operand32(rcx, src);
3048}
3049
3050void Assembler::fmula(int i) {
3051  emit_farith(0xDC, 0xC8, i);
3052}
3053
3054void Assembler::fmulp(int i) {
3055  emit_farith(0xDE, 0xC8, i);
3056}
3057
3058void Assembler::fnsave(Address dst) {
3059  InstructionMark im(this);
3060  emit_byte(0xDD);
3061  emit_operand32(rsi, dst);
3062}
3063
3064void Assembler::fnstcw(Address src) {
3065  InstructionMark im(this);
3066  emit_byte(0x9B);
3067  emit_byte(0xD9);
3068  emit_operand32(rdi, src);
3069}
3070
3071void Assembler::fnstsw_ax() {
3072  emit_byte(0xdF);
3073  emit_byte(0xE0);
3074}
3075
3076void Assembler::fprem() {
3077  emit_byte(0xD9);
3078  emit_byte(0xF8);
3079}
3080
3081void Assembler::fprem1() {
3082  emit_byte(0xD9);
3083  emit_byte(0xF5);
3084}
3085
3086void Assembler::frstor(Address src) {
3087  InstructionMark im(this);
3088  emit_byte(0xDD);
3089  emit_operand32(rsp, src);
3090}
3091
3092void Assembler::fsin() {
3093  emit_byte(0xD9);
3094  emit_byte(0xFE);
3095}
3096
3097void Assembler::fsqrt() {
3098  emit_byte(0xD9);
3099  emit_byte(0xFA);
3100}
3101
3102void Assembler::fst_d(Address adr) {
3103  InstructionMark im(this);
3104  emit_byte(0xDD);
3105  emit_operand32(rdx, adr);
3106}
3107
3108void Assembler::fst_s(Address adr) {
3109  InstructionMark im(this);
3110  emit_byte(0xD9);
3111  emit_operand32(rdx, adr);
3112}
3113
3114void Assembler::fstp_d(Address adr) {
3115  InstructionMark im(this);
3116  emit_byte(0xDD);
3117  emit_operand32(rbx, adr);
3118}
3119
3120void Assembler::fstp_d(int index) {
3121  emit_farith(0xDD, 0xD8, index);
3122}
3123
3124void Assembler::fstp_s(Address adr) {
3125  InstructionMark im(this);
3126  emit_byte(0xD9);
3127  emit_operand32(rbx, adr);
3128}
3129
3130void Assembler::fstp_x(Address adr) {
3131  InstructionMark im(this);
3132  emit_byte(0xDB);
3133  emit_operand32(rdi, adr);
3134}
3135
3136void Assembler::fsub(int i) {
3137  emit_farith(0xD8, 0xE0, i);
3138}
3139
3140void Assembler::fsub_d(Address src) {
3141  InstructionMark im(this);
3142  emit_byte(0xDC);
3143  emit_operand32(rsp, src);
3144}
3145
3146void Assembler::fsub_s(Address src) {
3147  InstructionMark im(this);
3148  emit_byte(0xD8);
3149  emit_operand32(rsp, src);
3150}
3151
3152void Assembler::fsuba(int i) {
3153  emit_farith(0xDC, 0xE8, i);
3154}
3155
3156void Assembler::fsubp(int i) {
3157  emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
3158}
3159
3160void Assembler::fsubr(int i) {
3161  emit_farith(0xD8, 0xE8, i);
3162}
3163
3164void Assembler::fsubr_d(Address src) {
3165  InstructionMark im(this);
3166  emit_byte(0xDC);
3167  emit_operand32(rbp, src);
3168}
3169
3170void Assembler::fsubr_s(Address src) {
3171  InstructionMark im(this);
3172  emit_byte(0xD8);
3173  emit_operand32(rbp, src);
3174}
3175
3176void Assembler::fsubra(int i) {
3177  emit_farith(0xDC, 0xE0, i);
3178}
3179
3180void Assembler::fsubrp(int i) {
3181  emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
3182}
3183
3184void Assembler::ftan() {
3185  emit_byte(0xD9);
3186  emit_byte(0xF2);
3187  emit_byte(0xDD);
3188  emit_byte(0xD8);
3189}
3190
3191void Assembler::ftst() {
3192  emit_byte(0xD9);
3193  emit_byte(0xE4);
3194}
3195
3196void Assembler::fucomi(int i) {
3197  // make sure the instruction is supported (introduced for P6, together with cmov)
3198  guarantee(VM_Version::supports_cmov(), "illegal instruction");
3199  emit_farith(0xDB, 0xE8, i);
3200}
3201
3202void Assembler::fucomip(int i) {
3203  // make sure the instruction is supported (introduced for P6, together with cmov)
3204  guarantee(VM_Version::supports_cmov(), "illegal instruction");
3205  emit_farith(0xDF, 0xE8, i);
3206}
3207
3208void Assembler::fwait() {
3209  emit_byte(0x9B);
3210}
3211
3212void Assembler::fxch(int i) {
3213  emit_farith(0xD9, 0xC8, i);
3214}
3215
3216void Assembler::fyl2x() {
3217  emit_byte(0xD9);
3218  emit_byte(0xF1);
3219}
3220
3221void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format) {
3222  InstructionMark im(this);
3223  int encode = prefix_and_encode(dst->encoding());
3224  emit_byte(0xB8 | encode);
3225  emit_data((int)imm32, rspec, format);
3226}
3227
3228#ifndef _LP64
3229
3230void Assembler::incl(Register dst) {
3231  // Don't use it directly. Use MacroAssembler::incrementl() instead.
3232 emit_byte(0x40 | dst->encoding());
3233}
3234
3235void Assembler::lea(Register dst, Address src) {
3236  leal(dst, src);
3237}
3238
3239void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
3240  InstructionMark im(this);
3241  emit_byte(0xC7);
3242  emit_operand(rax, dst);
3243  emit_data((int)imm32, rspec, 0);
3244}
3245
3246
3247void Assembler::popa() { // 32bit
3248  emit_byte(0x61);
3249}
3250
3251void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
3252  InstructionMark im(this);
3253  emit_byte(0x68);
3254  emit_data(imm32, rspec, 0);
3255}
3256
3257void Assembler::pusha() { // 32bit
3258  emit_byte(0x60);
3259}
3260
3261void Assembler::set_byte_if_not_zero(Register dst) {
3262  emit_byte(0x0F);
3263  emit_byte(0x95);
3264  emit_byte(0xE0 | dst->encoding());
3265}
3266
3267void Assembler::shldl(Register dst, Register src) {
3268  emit_byte(0x0F);
3269  emit_byte(0xA5);
3270  emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3271}
3272
3273void Assembler::shrdl(Register dst, Register src) {
3274  emit_byte(0x0F);
3275  emit_byte(0xAD);
3276  emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3277}
3278
3279#else // LP64
3280
3281// 64bit only pieces of the assembler
3282// This should only be used by 64bit instructions that can use rip-relative
3283// it cannot be used by instructions that want an immediate value.
3284
3285bool Assembler::reachable(AddressLiteral adr) {
3286  int64_t disp;
3287  // None will force a 64bit literal to the code stream. Likely a placeholder
3288  // for something that will be patched later and we need to certain it will
3289  // always be reachable.
3290  if (adr.reloc() == relocInfo::none) {
3291    return false;
3292  }
3293  if (adr.reloc() == relocInfo::internal_word_type) {
3294    // This should be rip relative and easily reachable.
3295    return true;
3296  }
3297  if (adr.reloc() == relocInfo::virtual_call_type ||
3298      adr.reloc() == relocInfo::opt_virtual_call_type ||
3299      adr.reloc() == relocInfo::static_call_type ||
3300      adr.reloc() == relocInfo::static_stub_type ) {
3301    // This should be rip relative within the code cache and easily
3302    // reachable until we get huge code caches. (At which point
3303    // ic code is going to have issues).
3304    return true;
3305  }
3306  if (adr.reloc() != relocInfo::external_word_type &&
3307      adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
3308      adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
3309      adr.reloc() != relocInfo::runtime_call_type ) {
3310    return false;
3311  }
3312
3313  // Stress the correction code
3314  if (ForceUnreachable) {
3315    // Must be runtimecall reloc, see if it is in the codecache
3316    // Flipping stuff in the codecache to be unreachable causes issues
3317    // with things like inline caches where the additional instructions
3318    // are not handled.
3319    if (CodeCache::find_blob(adr._target) == NULL) {
3320      return false;
3321    }
3322  }
3323  // For external_word_type/runtime_call_type if it is reachable from where we
3324  // are now (possibly a temp buffer) and where we might end up
3325  // anywhere in the codeCache then we are always reachable.
3326  // This would have to change if we ever save/restore shared code
3327  // to be more pessimistic.
3328
3329  disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
3330  if (!is_simm32(disp)) return false;
3331  disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
3332  if (!is_simm32(disp)) return false;
3333
3334  disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
3335
3336  // Because rip relative is a disp + address_of_next_instruction and we
3337  // don't know the value of address_of_next_instruction we apply a fudge factor
3338  // to make sure we will be ok no matter the size of the instruction we get placed into.
3339  // We don't have to fudge the checks above here because they are already worst case.
3340
3341  // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
3342  // + 4 because better safe than sorry.
3343  const int fudge = 12 + 4;
3344  if (disp < 0) {
3345    disp -= fudge;
3346  } else {
3347    disp += fudge;
3348  }
3349  return is_simm32(disp);
3350}
3351
3352void Assembler::emit_data64(jlong data,
3353                            relocInfo::relocType rtype,
3354                            int format) {
3355  if (rtype == relocInfo::none) {
3356    emit_long64(data);
3357  } else {
3358    emit_data64(data, Relocation::spec_simple(rtype), format);
3359  }
3360}
3361
3362void Assembler::emit_data64(jlong data,
3363                            RelocationHolder const& rspec,
3364                            int format) {
3365  assert(imm_operand == 0, "default format must be immediate in this file");
3366  assert(imm_operand == format, "must be immediate");
3367  assert(inst_mark() != NULL, "must be inside InstructionMark");
3368  // Do not use AbstractAssembler::relocate, which is not intended for
3369  // embedded words.  Instead, relocate to the enclosing instruction.
3370  code_section()->relocate(inst_mark(), rspec, format);
3371#ifdef ASSERT
3372  check_relocation(rspec, format);
3373#endif
3374  emit_long64(data);
3375}
3376
3377int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
3378  if (reg_enc >= 8) {
3379    prefix(REX_B);
3380    reg_enc -= 8;
3381  } else if (byteinst && reg_enc >= 4) {
3382    prefix(REX);
3383  }
3384  return reg_enc;
3385}
3386
3387int Assembler::prefixq_and_encode(int reg_enc) {
3388  if (reg_enc < 8) {
3389    prefix(REX_W);
3390  } else {
3391    prefix(REX_WB);
3392    reg_enc -= 8;
3393  }
3394  return reg_enc;
3395}
3396
3397int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
3398  if (dst_enc < 8) {
3399    if (src_enc >= 8) {
3400      prefix(REX_B);
3401      src_enc -= 8;
3402    } else if (byteinst && src_enc >= 4) {
3403      prefix(REX);
3404    }
3405  } else {
3406    if (src_enc < 8) {
3407      prefix(REX_R);
3408    } else {
3409      prefix(REX_RB);
3410      src_enc -= 8;
3411    }
3412    dst_enc -= 8;
3413  }
3414  return dst_enc << 3 | src_enc;
3415}
3416
3417int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
3418  if (dst_enc < 8) {
3419    if (src_enc < 8) {
3420      prefix(REX_W);
3421    } else {
3422      prefix(REX_WB);
3423      src_enc -= 8;
3424    }
3425  } else {
3426    if (src_enc < 8) {
3427      prefix(REX_WR);
3428    } else {
3429      prefix(REX_WRB);
3430      src_enc -= 8;
3431    }
3432    dst_enc -= 8;
3433  }
3434  return dst_enc << 3 | src_enc;
3435}
3436
3437void Assembler::prefix(Register reg) {
3438  if (reg->encoding() >= 8) {
3439    prefix(REX_B);
3440  }
3441}
3442
3443void Assembler::prefix(Address adr) {
3444  if (adr.base_needs_rex()) {
3445    if (adr.index_needs_rex()) {
3446      prefix(REX_XB);
3447    } else {
3448      prefix(REX_B);
3449    }
3450  } else {
3451    if (adr.index_needs_rex()) {
3452      prefix(REX_X);
3453    }
3454  }
3455}
3456
3457void Assembler::prefixq(Address adr) {
3458  if (adr.base_needs_rex()) {
3459    if (adr.index_needs_rex()) {
3460      prefix(REX_WXB);
3461    } else {
3462      prefix(REX_WB);
3463    }
3464  } else {
3465    if (adr.index_needs_rex()) {
3466      prefix(REX_WX);
3467    } else {
3468      prefix(REX_W);
3469    }
3470  }
3471}
3472
3473
3474void Assembler::prefix(Address adr, Register reg, bool byteinst) {
3475  if (reg->encoding() < 8) {
3476    if (adr.base_needs_rex()) {
3477      if (adr.index_needs_rex()) {
3478        prefix(REX_XB);
3479      } else {
3480        prefix(REX_B);
3481      }
3482    } else {
3483      if (adr.index_needs_rex()) {
3484        prefix(REX_X);
3485      } else if (reg->encoding() >= 4 ) {
3486        prefix(REX);
3487      }
3488    }
3489  } else {
3490    if (adr.base_needs_rex()) {
3491      if (adr.index_needs_rex()) {
3492        prefix(REX_RXB);
3493      } else {
3494        prefix(REX_RB);
3495      }
3496    } else {
3497      if (adr.index_needs_rex()) {
3498        prefix(REX_RX);
3499      } else {
3500        prefix(REX_R);
3501      }
3502    }
3503  }
3504}
3505
3506void Assembler::prefixq(Address adr, Register src) {
3507  if (src->encoding() < 8) {
3508    if (adr.base_needs_rex()) {
3509      if (adr.index_needs_rex()) {
3510        prefix(REX_WXB);
3511      } else {
3512        prefix(REX_WB);
3513      }
3514    } else {
3515      if (adr.index_needs_rex()) {
3516        prefix(REX_WX);
3517      } else {
3518        prefix(REX_W);
3519      }
3520    }
3521  } else {
3522    if (adr.base_needs_rex()) {
3523      if (adr.index_needs_rex()) {
3524        prefix(REX_WRXB);
3525      } else {
3526        prefix(REX_WRB);
3527      }
3528    } else {
3529      if (adr.index_needs_rex()) {
3530        prefix(REX_WRX);
3531      } else {
3532        prefix(REX_WR);
3533      }
3534    }
3535  }
3536}
3537
3538void Assembler::prefix(Address adr, XMMRegister reg) {
3539  if (reg->encoding() < 8) {
3540    if (adr.base_needs_rex()) {
3541      if (adr.index_needs_rex()) {
3542        prefix(REX_XB);
3543      } else {
3544        prefix(REX_B);
3545      }
3546    } else {
3547      if (adr.index_needs_rex()) {
3548        prefix(REX_X);
3549      }
3550    }
3551  } else {
3552    if (adr.base_needs_rex()) {
3553      if (adr.index_needs_rex()) {
3554        prefix(REX_RXB);
3555      } else {
3556        prefix(REX_RB);
3557      }
3558    } else {
3559      if (adr.index_needs_rex()) {
3560        prefix(REX_RX);
3561      } else {
3562        prefix(REX_R);
3563      }
3564    }
3565  }
3566}
3567
3568void Assembler::adcq(Register dst, int32_t imm32) {
3569  (void) prefixq_and_encode(dst->encoding());
3570  emit_arith(0x81, 0xD0, dst, imm32);
3571}
3572
3573void Assembler::adcq(Register dst, Address src) {
3574  InstructionMark im(this);
3575  prefixq(src, dst);
3576  emit_byte(0x13);
3577  emit_operand(dst, src);
3578}
3579
3580void Assembler::adcq(Register dst, Register src) {
3581  (int) prefixq_and_encode(dst->encoding(), src->encoding());
3582  emit_arith(0x13, 0xC0, dst, src);
3583}
3584
3585void Assembler::addq(Address dst, int32_t imm32) {
3586  InstructionMark im(this);
3587  prefixq(dst);
3588  emit_arith_operand(0x81, rax, dst,imm32);
3589}
3590
3591void Assembler::addq(Address dst, Register src) {
3592  InstructionMark im(this);
3593  prefixq(dst, src);
3594  emit_byte(0x01);
3595  emit_operand(src, dst);
3596}
3597
3598void Assembler::addq(Register dst, int32_t imm32) {
3599  (void) prefixq_and_encode(dst->encoding());
3600  emit_arith(0x81, 0xC0, dst, imm32);
3601}
3602
3603void Assembler::addq(Register dst, Address src) {
3604  InstructionMark im(this);
3605  prefixq(src, dst);
3606  emit_byte(0x03);
3607  emit_operand(dst, src);
3608}
3609
3610void Assembler::addq(Register dst, Register src) {
3611  (void) prefixq_and_encode(dst->encoding(), src->encoding());
3612  emit_arith(0x03, 0xC0, dst, src);
3613}
3614
3615void Assembler::andq(Register dst, int32_t imm32) {
3616  (void) prefixq_and_encode(dst->encoding());
3617  emit_arith(0x81, 0xE0, dst, imm32);
3618}
3619
3620void Assembler::andq(Register dst, Address src) {
3621  InstructionMark im(this);
3622  prefixq(src, dst);
3623  emit_byte(0x23);
3624  emit_operand(dst, src);
3625}
3626
3627void Assembler::andq(Register dst, Register src) {
3628  (int) prefixq_and_encode(dst->encoding(), src->encoding());
3629  emit_arith(0x23, 0xC0, dst, src);
3630}
3631
3632void Assembler::bswapq(Register reg) {
3633  int encode = prefixq_and_encode(reg->encoding());
3634  emit_byte(0x0F);
3635  emit_byte(0xC8 | encode);
3636}
3637
3638void Assembler::cdqq() {
3639  prefix(REX_W);
3640  emit_byte(0x99);
3641}
3642
3643void Assembler::clflush(Address adr) {
3644  prefix(adr);
3645  emit_byte(0x0F);
3646  emit_byte(0xAE);
3647  emit_operand(rdi, adr);
3648}
3649
3650void Assembler::cmovq(Condition cc, Register dst, Register src) {
3651  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3652  emit_byte(0x0F);
3653  emit_byte(0x40 | cc);
3654  emit_byte(0xC0 | encode);
3655}
3656
3657void Assembler::cmovq(Condition cc, Register dst, Address src) {
3658  InstructionMark im(this);
3659  prefixq(src, dst);
3660  emit_byte(0x0F);
3661  emit_byte(0x40 | cc);
3662  emit_operand(dst, src);
3663}
3664
3665void Assembler::cmpq(Address dst, int32_t imm32) {
3666  InstructionMark im(this);
3667  prefixq(dst);
3668  emit_byte(0x81);
3669  emit_operand(rdi, dst, 4);
3670  emit_long(imm32);
3671}
3672
3673void Assembler::cmpq(Register dst, int32_t imm32) {
3674  (void) prefixq_and_encode(dst->encoding());
3675  emit_arith(0x81, 0xF8, dst, imm32);
3676}
3677
3678void Assembler::cmpq(Address dst, Register src) {
3679  InstructionMark im(this);
3680  prefixq(dst, src);
3681  emit_byte(0x3B);
3682  emit_operand(src, dst);
3683}
3684
3685void Assembler::cmpq(Register dst, Register src) {
3686  (void) prefixq_and_encode(dst->encoding(), src->encoding());
3687  emit_arith(0x3B, 0xC0, dst, src);
3688}
3689
3690void Assembler::cmpq(Register dst, Address  src) {
3691  InstructionMark im(this);
3692  prefixq(src, dst);
3693  emit_byte(0x3B);
3694  emit_operand(dst, src);
3695}
3696
3697void Assembler::cmpxchgq(Register reg, Address adr) {
3698  InstructionMark im(this);
3699  prefixq(adr, reg);
3700  emit_byte(0x0F);
3701  emit_byte(0xB1);
3702  emit_operand(reg, adr);
3703}
3704
3705void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
3706  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3707  emit_byte(0xF2);
3708  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3709  emit_byte(0x0F);
3710  emit_byte(0x2A);
3711  emit_byte(0xC0 | encode);
3712}
3713
3714void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
3715  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3716  emit_byte(0xF3);
3717  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3718  emit_byte(0x0F);
3719  emit_byte(0x2A);
3720  emit_byte(0xC0 | encode);
3721}
3722
3723void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
3724  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3725  emit_byte(0xF2);
3726  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3727  emit_byte(0x0F);
3728  emit_byte(0x2C);
3729  emit_byte(0xC0 | encode);
3730}
3731
3732void Assembler::cvttss2siq(Register dst, XMMRegister src) {
3733  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3734  emit_byte(0xF3);
3735  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3736  emit_byte(0x0F);
3737  emit_byte(0x2C);
3738  emit_byte(0xC0 | encode);
3739}
3740
3741void Assembler::decl(Register dst) {
3742  // Don't use it directly. Use MacroAssembler::decrementl() instead.
3743  // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
3744  int encode = prefix_and_encode(dst->encoding());
3745  emit_byte(0xFF);
3746  emit_byte(0xC8 | encode);
3747}
3748
3749void Assembler::decq(Register dst) {
3750  // Don't use it directly. Use MacroAssembler::decrementq() instead.
3751  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3752  int encode = prefixq_and_encode(dst->encoding());
3753  emit_byte(0xFF);
3754  emit_byte(0xC8 | encode);
3755}
3756
3757void Assembler::decq(Address dst) {
3758  // Don't use it directly. Use MacroAssembler::decrementq() instead.
3759  InstructionMark im(this);
3760  prefixq(dst);
3761  emit_byte(0xFF);
3762  emit_operand(rcx, dst);
3763}
3764
3765void Assembler::fxrstor(Address src) {
3766  prefixq(src);
3767  emit_byte(0x0F);
3768  emit_byte(0xAE);
3769  emit_operand(as_Register(1), src);
3770}
3771
3772void Assembler::fxsave(Address dst) {
3773  prefixq(dst);
3774  emit_byte(0x0F);
3775  emit_byte(0xAE);
3776  emit_operand(as_Register(0), dst);
3777}
3778
3779void Assembler::idivq(Register src) {
3780  int encode = prefixq_and_encode(src->encoding());
3781  emit_byte(0xF7);
3782  emit_byte(0xF8 | encode);
3783}
3784
3785void Assembler::imulq(Register dst, Register src) {
3786  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3787  emit_byte(0x0F);
3788  emit_byte(0xAF);
3789  emit_byte(0xC0 | encode);
3790}
3791
3792void Assembler::imulq(Register dst, Register src, int value) {
3793  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3794  if (is8bit(value)) {
3795    emit_byte(0x6B);
3796    emit_byte(0xC0 | encode);
3797    emit_byte(value);
3798  } else {
3799    emit_byte(0x69);
3800    emit_byte(0xC0 | encode);
3801    emit_long(value);
3802  }
3803}
3804
3805void Assembler::incl(Register dst) {
3806  // Don't use it directly. Use MacroAssembler::incrementl() instead.
3807  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3808  int encode = prefix_and_encode(dst->encoding());
3809  emit_byte(0xFF);
3810  emit_byte(0xC0 | encode);
3811}
3812
3813void Assembler::incq(Register dst) {
3814  // Don't use it directly. Use MacroAssembler::incrementq() instead.
3815  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3816  int encode = prefixq_and_encode(dst->encoding());
3817  emit_byte(0xFF);
3818  emit_byte(0xC0 | encode);
3819}
3820
3821void Assembler::incq(Address dst) {
3822  // Don't use it directly. Use MacroAssembler::incrementq() instead.
3823  InstructionMark im(this);
3824  prefixq(dst);
3825  emit_byte(0xFF);
3826  emit_operand(rax, dst);
3827}
3828
3829void Assembler::lea(Register dst, Address src) {
3830  leaq(dst, src);
3831}
3832
3833void Assembler::leaq(Register dst, Address src) {
3834  InstructionMark im(this);
3835  prefixq(src, dst);
3836  emit_byte(0x8D);
3837  emit_operand(dst, src);
3838}
3839
3840void Assembler::mov64(Register dst, int64_t imm64) {
3841  InstructionMark im(this);
3842  int encode = prefixq_and_encode(dst->encoding());
3843  emit_byte(0xB8 | encode);
3844  emit_long64(imm64);
3845}
3846
3847void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
3848  InstructionMark im(this);
3849  int encode = prefixq_and_encode(dst->encoding());
3850  emit_byte(0xB8 | encode);
3851  emit_data64(imm64, rspec);
3852}
3853
3854void Assembler::movdq(XMMRegister dst, Register src) {
3855  // table D-1 says MMX/SSE2
3856  NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
3857  emit_byte(0x66);
3858  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3859  emit_byte(0x0F);
3860  emit_byte(0x6E);
3861  emit_byte(0xC0 | encode);
3862}
3863
3864void Assembler::movdq(Register dst, XMMRegister src) {
3865  // table D-1 says MMX/SSE2
3866  NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
3867  emit_byte(0x66);
3868  // swap src/dst to get correct prefix
3869  int encode = prefixq_and_encode(src->encoding(), dst->encoding());
3870  emit_byte(0x0F);
3871  emit_byte(0x7E);
3872  emit_byte(0xC0 | encode);
3873}
3874
3875void Assembler::movq(Register dst, Register src) {
3876  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3877  emit_byte(0x8B);
3878  emit_byte(0xC0 | encode);
3879}
3880
3881void Assembler::movq(Register dst, Address src) {
3882  InstructionMark im(this);
3883  prefixq(src, dst);
3884  emit_byte(0x8B);
3885  emit_operand(dst, src);
3886}
3887
3888void Assembler::movq(Address dst, Register src) {
3889  InstructionMark im(this);
3890  prefixq(dst, src);
3891  emit_byte(0x89);
3892  emit_operand(src, dst);
3893}
3894
3895void Assembler::movslq(Register dst, int32_t imm32) {
3896  // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
3897  // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
3898  // as a result we shouldn't use until tested at runtime...
3899  ShouldNotReachHere();
3900  InstructionMark im(this);
3901  int encode = prefixq_and_encode(dst->encoding());
3902  emit_byte(0xC7 | encode);
3903  emit_long(imm32);
3904}
3905
3906void Assembler::movslq(Address dst, int32_t imm32) {
3907  assert(is_simm32(imm32), "lost bits");
3908  InstructionMark im(this);
3909  prefixq(dst);
3910  emit_byte(0xC7);
3911  emit_operand(rax, dst, 4);
3912  emit_long(imm32);
3913}
3914
3915void Assembler::movslq(Register dst, Address src) {
3916  InstructionMark im(this);
3917  prefixq(src, dst);
3918  emit_byte(0x63);
3919  emit_operand(dst, src);
3920}
3921
3922void Assembler::movslq(Register dst, Register src) {
3923  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3924  emit_byte(0x63);
3925  emit_byte(0xC0 | encode);
3926}
3927
3928void Assembler::negq(Register dst) {
3929  int encode = prefixq_and_encode(dst->encoding());
3930  emit_byte(0xF7);
3931  emit_byte(0xD8 | encode);
3932}
3933
3934void Assembler::notq(Register dst) {
3935  int encode = prefixq_and_encode(dst->encoding());
3936  emit_byte(0xF7);
3937  emit_byte(0xD0 | encode);
3938}
3939
3940void Assembler::orq(Address dst, int32_t imm32) {
3941  InstructionMark im(this);
3942  prefixq(dst);
3943  emit_byte(0x81);
3944  emit_operand(rcx, dst, 4);
3945  emit_long(imm32);
3946}
3947
3948void Assembler::orq(Register dst, int32_t imm32) {
3949  (void) prefixq_and_encode(dst->encoding());
3950  emit_arith(0x81, 0xC8, dst, imm32);
3951}
3952
3953void Assembler::orq(Register dst, Address src) {
3954  InstructionMark im(this);
3955  prefixq(src, dst);
3956  emit_byte(0x0B);
3957  emit_operand(dst, src);
3958}
3959
3960void Assembler::orq(Register dst, Register src) {
3961  (void) prefixq_and_encode(dst->encoding(), src->encoding());
3962  emit_arith(0x0B, 0xC0, dst, src);
3963}
3964
3965void Assembler::popa() { // 64bit
3966  movq(r15, Address(rsp, 0));
3967  movq(r14, Address(rsp, wordSize));
3968  movq(r13, Address(rsp, 2 * wordSize));
3969  movq(r12, Address(rsp, 3 * wordSize));
3970  movq(r11, Address(rsp, 4 * wordSize));
3971  movq(r10, Address(rsp, 5 * wordSize));
3972  movq(r9,  Address(rsp, 6 * wordSize));
3973  movq(r8,  Address(rsp, 7 * wordSize));
3974  movq(rdi, Address(rsp, 8 * wordSize));
3975  movq(rsi, Address(rsp, 9 * wordSize));
3976  movq(rbp, Address(rsp, 10 * wordSize));
3977  // skip rsp
3978  movq(rbx, Address(rsp, 12 * wordSize));
3979  movq(rdx, Address(rsp, 13 * wordSize));
3980  movq(rcx, Address(rsp, 14 * wordSize));
3981  movq(rax, Address(rsp, 15 * wordSize));
3982
3983  addq(rsp, 16 * wordSize);
3984}
3985
3986void Assembler::popq(Address dst) {
3987  InstructionMark im(this);
3988  prefixq(dst);
3989  emit_byte(0x8F);
3990  emit_operand(rax, dst);
3991}
3992
3993void Assembler::pusha() { // 64bit
3994  // we have to store original rsp.  ABI says that 128 bytes
3995  // below rsp are local scratch.
3996  movq(Address(rsp, -5 * wordSize), rsp);
3997
3998  subq(rsp, 16 * wordSize);
3999
4000  movq(Address(rsp, 15 * wordSize), rax);
4001  movq(Address(rsp, 14 * wordSize), rcx);
4002  movq(Address(rsp, 13 * wordSize), rdx);
4003  movq(Address(rsp, 12 * wordSize), rbx);
4004  // skip rsp
4005  movq(Address(rsp, 10 * wordSize), rbp);
4006  movq(Address(rsp, 9 * wordSize), rsi);
4007  movq(Address(rsp, 8 * wordSize), rdi);
4008  movq(Address(rsp, 7 * wordSize), r8);
4009  movq(Address(rsp, 6 * wordSize), r9);
4010  movq(Address(rsp, 5 * wordSize), r10);
4011  movq(Address(rsp, 4 * wordSize), r11);
4012  movq(Address(rsp, 3 * wordSize), r12);
4013  movq(Address(rsp, 2 * wordSize), r13);
4014  movq(Address(rsp, wordSize), r14);
4015  movq(Address(rsp, 0), r15);
4016}
4017
4018void Assembler::pushq(Address src) {
4019  InstructionMark im(this);
4020  prefixq(src);
4021  emit_byte(0xFF);
4022  emit_operand(rsi, src);
4023}
4024
4025void Assembler::rclq(Register dst, int imm8) {
4026  assert(isShiftCount(imm8 >> 1), "illegal shift count");
4027  int encode = prefixq_and_encode(dst->encoding());
4028  if (imm8 == 1) {
4029    emit_byte(0xD1);
4030    emit_byte(0xD0 | encode);
4031  } else {
4032    emit_byte(0xC1);
4033    emit_byte(0xD0 | encode);
4034    emit_byte(imm8);
4035  }
4036}
4037void Assembler::sarq(Register dst, int imm8) {
4038  assert(isShiftCount(imm8 >> 1), "illegal shift count");
4039  int encode = prefixq_and_encode(dst->encoding());
4040  if (imm8 == 1) {
4041    emit_byte(0xD1);
4042    emit_byte(0xF8 | encode);
4043  } else {
4044    emit_byte(0xC1);
4045    emit_byte(0xF8 | encode);
4046    emit_byte(imm8);
4047  }
4048}
4049
4050void Assembler::sarq(Register dst) {
4051  int encode = prefixq_and_encode(dst->encoding());
4052  emit_byte(0xD3);
4053  emit_byte(0xF8 | encode);
4054}
4055void Assembler::sbbq(Address dst, int32_t imm32) {
4056  InstructionMark im(this);
4057  prefixq(dst);
4058  emit_arith_operand(0x81, rbx, dst, imm32);
4059}
4060
4061void Assembler::sbbq(Register dst, int32_t imm32) {
4062  (void) prefixq_and_encode(dst->encoding());
4063  emit_arith(0x81, 0xD8, dst, imm32);
4064}
4065
4066void Assembler::sbbq(Register dst, Address src) {
4067  InstructionMark im(this);
4068  prefixq(src, dst);
4069  emit_byte(0x1B);
4070  emit_operand(dst, src);
4071}
4072
4073void Assembler::sbbq(Register dst, Register src) {
4074  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4075  emit_arith(0x1B, 0xC0, dst, src);
4076}
4077
4078void Assembler::shlq(Register dst, int imm8) {
4079  assert(isShiftCount(imm8 >> 1), "illegal shift count");
4080  int encode = prefixq_and_encode(dst->encoding());
4081  if (imm8 == 1) {
4082    emit_byte(0xD1);
4083    emit_byte(0xE0 | encode);
4084  } else {
4085    emit_byte(0xC1);
4086    emit_byte(0xE0 | encode);
4087    emit_byte(imm8);
4088  }
4089}
4090
4091void Assembler::shlq(Register dst) {
4092  int encode = prefixq_and_encode(dst->encoding());
4093  emit_byte(0xD3);
4094  emit_byte(0xE0 | encode);
4095}
4096
4097void Assembler::shrq(Register dst, int imm8) {
4098  assert(isShiftCount(imm8 >> 1), "illegal shift count");
4099  int encode = prefixq_and_encode(dst->encoding());
4100  emit_byte(0xC1);
4101  emit_byte(0xE8 | encode);
4102  emit_byte(imm8);
4103}
4104
4105void Assembler::shrq(Register dst) {
4106  int encode = prefixq_and_encode(dst->encoding());
4107  emit_byte(0xD3);
4108  emit_byte(0xE8 | encode);
4109}
4110
4111void Assembler::sqrtsd(XMMRegister dst, Address src) {
4112  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4113  InstructionMark im(this);
4114  emit_byte(0xF2);
4115  prefix(src, dst);
4116  emit_byte(0x0F);
4117  emit_byte(0x51);
4118  emit_operand(dst, src);
4119}
4120
4121void Assembler::subq(Address dst, int32_t imm32) {
4122  InstructionMark im(this);
4123  prefixq(dst);
4124  if (is8bit(imm32)) {
4125    emit_byte(0x83);
4126    emit_operand(rbp, dst, 1);
4127    emit_byte(imm32 & 0xFF);
4128  } else {
4129    emit_byte(0x81);
4130    emit_operand(rbp, dst, 4);
4131    emit_long(imm32);
4132  }
4133}
4134
4135void Assembler::subq(Register dst, int32_t imm32) {
4136  (void) prefixq_and_encode(dst->encoding());
4137  emit_arith(0x81, 0xE8, dst, imm32);
4138}
4139
4140void Assembler::subq(Address dst, Register src) {
4141  InstructionMark im(this);
4142  prefixq(dst, src);
4143  emit_byte(0x29);
4144  emit_operand(src, dst);
4145}
4146
4147void Assembler::subq(Register dst, Address src) {
4148  InstructionMark im(this);
4149  prefixq(src, dst);
4150  emit_byte(0x2B);
4151  emit_operand(dst, src);
4152}
4153
4154void Assembler::subq(Register dst, Register src) {
4155  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4156  emit_arith(0x2B, 0xC0, dst, src);
4157}
4158
4159void Assembler::testq(Register dst, int32_t imm32) {
4160  // not using emit_arith because test
4161  // doesn't support sign-extension of
4162  // 8bit operands
4163  int encode = dst->encoding();
4164  if (encode == 0) {
4165    prefix(REX_W);
4166    emit_byte(0xA9);
4167  } else {
4168    encode = prefixq_and_encode(encode);
4169    emit_byte(0xF7);
4170    emit_byte(0xC0 | encode);
4171  }
4172  emit_long(imm32);
4173}
4174
4175void Assembler::testq(Register dst, Register src) {
4176  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4177  emit_arith(0x85, 0xC0, dst, src);
4178}
4179
4180void Assembler::xaddq(Address dst, Register src) {
4181  InstructionMark im(this);
4182  prefixq(dst, src);
4183  emit_byte(0x0F);
4184  emit_byte(0xC1);
4185  emit_operand(src, dst);
4186}
4187
4188void Assembler::xchgq(Register dst, Address src) {
4189  InstructionMark im(this);
4190  prefixq(src, dst);
4191  emit_byte(0x87);
4192  emit_operand(dst, src);
4193}
4194
4195void Assembler::xchgq(Register dst, Register src) {
4196  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4197  emit_byte(0x87);
4198  emit_byte(0xc0 | encode);
4199}
4200
4201void Assembler::xorq(Register dst, Register src) {
4202  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4203  emit_arith(0x33, 0xC0, dst, src);
4204}
4205
4206void Assembler::xorq(Register dst, Address src) {
4207  InstructionMark im(this);
4208  prefixq(src, dst);
4209  emit_byte(0x33);
4210  emit_operand(dst, src);
4211}
4212
4213#endif // !LP64
4214
4215static Assembler::Condition reverse[] = {
4216    Assembler::noOverflow     /* overflow      = 0x0 */ ,
4217    Assembler::overflow       /* noOverflow    = 0x1 */ ,
4218    Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
4219    Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,
4220    Assembler::notZero        /* zero          = 0x4, equal         = 0x4 */ ,
4221    Assembler::zero           /* notZero       = 0x5, notEqual      = 0x5 */ ,
4222    Assembler::above          /* belowEqual    = 0x6 */ ,
4223    Assembler::belowEqual     /* above         = 0x7 */ ,
4224    Assembler::positive       /* negative      = 0x8 */ ,
4225    Assembler::negative       /* positive      = 0x9 */ ,
4226    Assembler::noParity       /* parity        = 0xa */ ,
4227    Assembler::parity         /* noParity      = 0xb */ ,
4228    Assembler::greaterEqual   /* less          = 0xc */ ,
4229    Assembler::less           /* greaterEqual  = 0xd */ ,
4230    Assembler::greater        /* lessEqual     = 0xe */ ,
4231    Assembler::lessEqual      /* greater       = 0xf, */
4232
4233};
4234
4235
4236// Implementation of MacroAssembler
4237
4238// First all the versions that have distinct versions depending on 32/64 bit
4239// Unless the difference is trivial (1 line or so).
4240
4241#ifndef _LP64
4242
4243// 32bit versions
4244
4245Address MacroAssembler::as_Address(AddressLiteral adr) {
4246  return Address(adr.target(), adr.rspec());
4247}
4248
4249Address MacroAssembler::as_Address(ArrayAddress adr) {
4250  return Address::make_array(adr);
4251}
4252
4253int MacroAssembler::biased_locking_enter(Register lock_reg,
4254                                         Register obj_reg,
4255                                         Register swap_reg,
4256                                         Register tmp_reg,
4257                                         bool swap_reg_contains_mark,
4258                                         Label& done,
4259                                         Label* slow_case,
4260                                         BiasedLockingCounters* counters) {
4261  assert(UseBiasedLocking, "why call this otherwise?");
4262  assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
4263  assert_different_registers(lock_reg, obj_reg, swap_reg);
4264
4265  if (PrintBiasedLockingStatistics && counters == NULL)
4266    counters = BiasedLocking::counters();
4267
4268  bool need_tmp_reg = false;
4269  if (tmp_reg == noreg) {
4270    need_tmp_reg = true;
4271    tmp_reg = lock_reg;
4272  } else {
4273    assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4274  }
4275  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4276  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
4277  Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
4278  Address saved_mark_addr(lock_reg, 0);
4279
4280  // Biased locking
4281  // See whether the lock is currently biased toward our thread and
4282  // whether the epoch is still valid
4283  // Note that the runtime guarantees sufficient alignment of JavaThread
4284  // pointers to allow age to be placed into low bits
4285  // First check to see whether biasing is even enabled for this object
4286  Label cas_label;
4287  int null_check_offset = -1;
4288  if (!swap_reg_contains_mark) {
4289    null_check_offset = offset();
4290    movl(swap_reg, mark_addr);
4291  }
4292  if (need_tmp_reg) {
4293    push(tmp_reg);
4294  }
4295  movl(tmp_reg, swap_reg);
4296  andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4297  cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
4298  if (need_tmp_reg) {
4299    pop(tmp_reg);
4300  }
4301  jcc(Assembler::notEqual, cas_label);
4302  // The bias pattern is present in the object's header. Need to check
4303  // whether the bias owner and the epoch are both still current.
4304  // Note that because there is no current thread register on x86 we
4305  // need to store off the mark word we read out of the object to
4306  // avoid reloading it and needing to recheck invariants below. This
4307  // store is unfortunate but it makes the overall code shorter and
4308  // simpler.
4309  movl(saved_mark_addr, swap_reg);
4310  if (need_tmp_reg) {
4311    push(tmp_reg);
4312  }
4313  get_thread(tmp_reg);
4314  xorl(swap_reg, tmp_reg);
4315  if (swap_reg_contains_mark) {
4316    null_check_offset = offset();
4317  }
4318  movl(tmp_reg, klass_addr);
4319  xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4320  andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
4321  if (need_tmp_reg) {
4322    pop(tmp_reg);
4323  }
4324  if (counters != NULL) {
4325    cond_inc32(Assembler::zero,
4326               ExternalAddress((address)counters->biased_lock_entry_count_addr()));
4327  }
4328  jcc(Assembler::equal, done);
4329
4330  Label try_revoke_bias;
4331  Label try_rebias;
4332
4333  // At this point we know that the header has the bias pattern and
4334  // that we are not the bias owner in the current epoch. We need to
4335  // figure out more details about the state of the header in order to
4336  // know what operations can be legally performed on the object's
4337  // header.
4338
4339  // If the low three bits in the xor result aren't clear, that means
4340  // the prototype header is no longer biased and we have to revoke
4341  // the bias on this object.
4342  testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
4343  jcc(Assembler::notZero, try_revoke_bias);
4344
4345  // Biasing is still enabled for this data type. See whether the
4346  // epoch of the current bias is still valid, meaning that the epoch
4347  // bits of the mark word are equal to the epoch bits of the
4348  // prototype header. (Note that the prototype header's epoch bits
4349  // only change at a safepoint.) If not, attempt to rebias the object
4350  // toward the current thread. Note that we must be absolutely sure
4351  // that the current epoch is invalid in order to do this because
4352  // otherwise the manipulations it performs on the mark word are
4353  // illegal.
4354  testl(swap_reg, markOopDesc::epoch_mask_in_place);
4355  jcc(Assembler::notZero, try_rebias);
4356
4357  // The epoch of the current bias is still valid but we know nothing
4358  // about the owner; it might be set or it might be clear. Try to
4359  // acquire the bias of the object using an atomic operation. If this
4360  // fails we will go in to the runtime to revoke the object's bias.
4361  // Note that we first construct the presumed unbiased header so we
4362  // don't accidentally blow away another thread's valid bias.
4363  movl(swap_reg, saved_mark_addr);
4364  andl(swap_reg,
4365       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
4366  if (need_tmp_reg) {
4367    push(tmp_reg);
4368  }
4369  get_thread(tmp_reg);
4370  orl(tmp_reg, swap_reg);
4371  if (os::is_MP()) {
4372    lock();
4373  }
4374  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4375  if (need_tmp_reg) {
4376    pop(tmp_reg);
4377  }
4378  // If the biasing toward our thread failed, this means that
4379  // another thread succeeded in biasing it toward itself and we
4380  // need to revoke that bias. The revocation will occur in the
4381  // interpreter runtime in the slow case.
4382  if (counters != NULL) {
4383    cond_inc32(Assembler::zero,
4384               ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
4385  }
4386  if (slow_case != NULL) {
4387    jcc(Assembler::notZero, *slow_case);
4388  }
4389  jmp(done);
4390
4391  bind(try_rebias);
4392  // At this point we know the epoch has expired, meaning that the
4393  // current "bias owner", if any, is actually invalid. Under these
4394  // circumstances _only_, we are allowed to use the current header's
4395  // value as the comparison value when doing the cas to acquire the
4396  // bias in the current epoch. In other words, we allow transfer of
4397  // the bias from one thread to another directly in this situation.
4398  //
4399  // FIXME: due to a lack of registers we currently blow away the age
4400  // bits in this situation. Should attempt to preserve them.
4401  if (need_tmp_reg) {
4402    push(tmp_reg);
4403  }
4404  get_thread(tmp_reg);
4405  movl(swap_reg, klass_addr);
4406  orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4407  movl(swap_reg, saved_mark_addr);
4408  if (os::is_MP()) {
4409    lock();
4410  }
4411  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4412  if (need_tmp_reg) {
4413    pop(tmp_reg);
4414  }
4415  // If the biasing toward our thread failed, then another thread
4416  // succeeded in biasing it toward itself and we need to revoke that
4417  // bias. The revocation will occur in the runtime in the slow case.
4418  if (counters != NULL) {
4419    cond_inc32(Assembler::zero,
4420               ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
4421  }
4422  if (slow_case != NULL) {
4423    jcc(Assembler::notZero, *slow_case);
4424  }
4425  jmp(done);
4426
4427  bind(try_revoke_bias);
4428  // The prototype mark in the klass doesn't have the bias bit set any
4429  // more, indicating that objects of this data type are not supposed
4430  // to be biased any more. We are going to try to reset the mark of
4431  // this object to the prototype value and fall through to the
4432  // CAS-based locking scheme. Note that if our CAS fails, it means
4433  // that another thread raced us for the privilege of revoking the
4434  // bias of this particular object, so it's okay to continue in the
4435  // normal locking code.
4436  //
4437  // FIXME: due to a lack of registers we currently blow away the age
4438  // bits in this situation. Should attempt to preserve them.
4439  movl(swap_reg, saved_mark_addr);
4440  if (need_tmp_reg) {
4441    push(tmp_reg);
4442  }
4443  movl(tmp_reg, klass_addr);
4444  movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4445  if (os::is_MP()) {
4446    lock();
4447  }
4448  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4449  if (need_tmp_reg) {
4450    pop(tmp_reg);
4451  }
4452  // Fall through to the normal CAS-based lock, because no matter what
4453  // the result of the above CAS, some thread must have succeeded in
4454  // removing the bias bit from the object's header.
4455  if (counters != NULL) {
4456    cond_inc32(Assembler::zero,
4457               ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
4458  }
4459
4460  bind(cas_label);
4461
4462  return null_check_offset;
4463}
4464void MacroAssembler::call_VM_leaf_base(address entry_point,
4465                                       int number_of_arguments) {
4466  call(RuntimeAddress(entry_point));
4467  increment(rsp, number_of_arguments * wordSize);
4468}
4469
4470void MacroAssembler::cmpoop(Address src1, jobject obj) {
4471  cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4472}
4473
4474void MacroAssembler::cmpoop(Register src1, jobject obj) {
4475  cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4476}
4477
4478void MacroAssembler::extend_sign(Register hi, Register lo) {
4479  // According to Intel Doc. AP-526, "Integer Divide", p.18.
4480  if (VM_Version::is_P6() && hi == rdx && lo == rax) {
4481    cdql();
4482  } else {
4483    movl(hi, lo);
4484    sarl(hi, 31);
4485  }
4486}
4487
4488void MacroAssembler::fat_nop() {
4489  // A 5 byte nop that is safe for patching (see patch_verified_entry)
4490  emit_byte(0x26); // es:
4491  emit_byte(0x2e); // cs:
4492  emit_byte(0x64); // fs:
4493  emit_byte(0x65); // gs:
4494  emit_byte(0x90);
4495}
4496
4497void MacroAssembler::jC2(Register tmp, Label& L) {
4498  // set parity bit if FPU flag C2 is set (via rax)
4499  save_rax(tmp);
4500  fwait(); fnstsw_ax();
4501  sahf();
4502  restore_rax(tmp);
4503  // branch
4504  jcc(Assembler::parity, L);
4505}
4506
4507void MacroAssembler::jnC2(Register tmp, Label& L) {
4508  // set parity bit if FPU flag C2 is set (via rax)
4509  save_rax(tmp);
4510  fwait(); fnstsw_ax();
4511  sahf();
4512  restore_rax(tmp);
4513  // branch
4514  jcc(Assembler::noParity, L);
4515}
4516
4517// 32bit can do a case table jump in one instruction but we no longer allow the base
4518// to be installed in the Address class
4519void MacroAssembler::jump(ArrayAddress entry) {
4520  jmp(as_Address(entry));
4521}
4522
4523// Note: y_lo will be destroyed
4524void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
4525  // Long compare for Java (semantics as described in JVM spec.)
4526  Label high, low, done;
4527
4528  cmpl(x_hi, y_hi);
4529  jcc(Assembler::less, low);
4530  jcc(Assembler::greater, high);
4531  // x_hi is the return register
4532  xorl(x_hi, x_hi);
4533  cmpl(x_lo, y_lo);
4534  jcc(Assembler::below, low);
4535  jcc(Assembler::equal, done);
4536
4537  bind(high);
4538  xorl(x_hi, x_hi);
4539  increment(x_hi);
4540  jmp(done);
4541
4542  bind(low);
4543  xorl(x_hi, x_hi);
4544  decrementl(x_hi);
4545
4546  bind(done);
4547}
4548
4549void MacroAssembler::lea(Register dst, AddressLiteral src) {
4550    mov_literal32(dst, (int32_t)src.target(), src.rspec());
4551}
4552
4553void MacroAssembler::lea(Address dst, AddressLiteral adr) {
4554  // leal(dst, as_Address(adr));
4555  // see note in movl as to why we must use a move
4556  mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
4557}
4558
4559void MacroAssembler::leave() {
4560  mov(rsp, rbp);
4561  pop(rbp);
4562}
4563
4564void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
4565  // Multiplication of two Java long values stored on the stack
4566  // as illustrated below. Result is in rdx:rax.
4567  //
4568  // rsp ---> [  ??  ] \               \
4569  //            ....    | y_rsp_offset  |
4570  //          [ y_lo ] /  (in bytes)    | x_rsp_offset
4571  //          [ y_hi ]                  | (in bytes)
4572  //            ....                    |
4573  //          [ x_lo ]                 /
4574  //          [ x_hi ]
4575  //            ....
4576  //
4577  // Basic idea: lo(result) = lo(x_lo * y_lo)
4578  //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
4579  Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
4580  Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
4581  Label quick;
4582  // load x_hi, y_hi and check if quick
4583  // multiplication is possible
4584  movl(rbx, x_hi);
4585  movl(rcx, y_hi);
4586  movl(rax, rbx);
4587  orl(rbx, rcx);                                 // rbx, = 0 <=> x_hi = 0 and y_hi = 0
4588  jcc(Assembler::zero, quick);                   // if rbx, = 0 do quick multiply
4589  // do full multiplication
4590  // 1st step
4591  mull(y_lo);                                    // x_hi * y_lo
4592  movl(rbx, rax);                                // save lo(x_hi * y_lo) in rbx,
4593  // 2nd step
4594  movl(rax, x_lo);
4595  mull(rcx);                                     // x_lo * y_hi
4596  addl(rbx, rax);                                // add lo(x_lo * y_hi) to rbx,
4597  // 3rd step
4598  bind(quick);                                   // note: rbx, = 0 if quick multiply!
4599  movl(rax, x_lo);
4600  mull(y_lo);                                    // x_lo * y_lo
4601  addl(rdx, rbx);                                // correct hi(x_lo * y_lo)
4602}
4603
4604void MacroAssembler::lneg(Register hi, Register lo) {
4605  negl(lo);
4606  adcl(hi, 0);
4607  negl(hi);
4608}
4609
4610void MacroAssembler::lshl(Register hi, Register lo) {
4611  // Java shift left long support (semantics as described in JVM spec., p.305)
4612  // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
4613  // shift value is in rcx !
4614  assert(hi != rcx, "must not use rcx");
4615  assert(lo != rcx, "must not use rcx");
4616  const Register s = rcx;                        // shift count
4617  const int      n = BitsPerWord;
4618  Label L;
4619  andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
4620  cmpl(s, n);                                    // if (s < n)
4621  jcc(Assembler::less, L);                       // else (s >= n)
4622  movl(hi, lo);                                  // x := x << n
4623  xorl(lo, lo);
4624  // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4625  bind(L);                                       // s (mod n) < n
4626  shldl(hi, lo);                                 // x := x << s
4627  shll(lo);
4628}
4629
4630
4631void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
4632  // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
4633  // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
4634  assert(hi != rcx, "must not use rcx");
4635  assert(lo != rcx, "must not use rcx");
4636  const Register s = rcx;                        // shift count
4637  const int      n = BitsPerWord;
4638  Label L;
4639  andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
4640  cmpl(s, n);                                    // if (s < n)
4641  jcc(Assembler::less, L);                       // else (s >= n)
4642  movl(lo, hi);                                  // x := x >> n
4643  if (sign_extension) sarl(hi, 31);
4644  else                xorl(hi, hi);
4645  // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4646  bind(L);                                       // s (mod n) < n
4647  shrdl(lo, hi);                                 // x := x >> s
4648  if (sign_extension) sarl(hi);
4649  else                shrl(hi);
4650}
4651
4652void MacroAssembler::movoop(Register dst, jobject obj) {
4653  mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4654}
4655
4656void MacroAssembler::movoop(Address dst, jobject obj) {
4657  mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4658}
4659
4660void MacroAssembler::movptr(Register dst, AddressLiteral src) {
4661  if (src.is_lval()) {
4662    mov_literal32(dst, (intptr_t)src.target(), src.rspec());
4663  } else {
4664    movl(dst, as_Address(src));
4665  }
4666}
4667
4668void MacroAssembler::movptr(ArrayAddress dst, Register src) {
4669  movl(as_Address(dst), src);
4670}
4671
4672void MacroAssembler::movptr(Register dst, ArrayAddress src) {
4673  movl(dst, as_Address(src));
4674}
4675
4676// src should NEVER be a real pointer. Use AddressLiteral for true pointers
4677void MacroAssembler::movptr(Address dst, intptr_t src) {
4678  movl(dst, src);
4679}
4680
4681
4682void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
4683  movsd(dst, as_Address(src));
4684}
4685
4686void MacroAssembler::pop_callee_saved_registers() {
4687  pop(rcx);
4688  pop(rdx);
4689  pop(rdi);
4690  pop(rsi);
4691}
4692
4693void MacroAssembler::pop_fTOS() {
4694  fld_d(Address(rsp, 0));
4695  addl(rsp, 2 * wordSize);
4696}
4697
4698void MacroAssembler::push_callee_saved_registers() {
4699  push(rsi);
4700  push(rdi);
4701  push(rdx);
4702  push(rcx);
4703}
4704
4705void MacroAssembler::push_fTOS() {
4706  subl(rsp, 2 * wordSize);
4707  fstp_d(Address(rsp, 0));
4708}
4709
4710
4711void MacroAssembler::pushoop(jobject obj) {
4712  push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
4713}
4714
4715
4716void MacroAssembler::pushptr(AddressLiteral src) {
4717  if (src.is_lval()) {
4718    push_literal32((int32_t)src.target(), src.rspec());
4719  } else {
4720    pushl(as_Address(src));
4721  }
4722}
4723
4724void MacroAssembler::set_word_if_not_zero(Register dst) {
4725  xorl(dst, dst);
4726  set_byte_if_not_zero(dst);
4727}
4728
4729static void pass_arg0(MacroAssembler* masm, Register arg) {
4730  masm->push(arg);
4731}
4732
4733static void pass_arg1(MacroAssembler* masm, Register arg) {
4734  masm->push(arg);
4735}
4736
4737static void pass_arg2(MacroAssembler* masm, Register arg) {
4738  masm->push(arg);
4739}
4740
4741static void pass_arg3(MacroAssembler* masm, Register arg) {
4742  masm->push(arg);
4743}
4744
4745#ifndef PRODUCT
4746extern "C" void findpc(intptr_t x);
4747#endif
4748
4749void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
4750  // In order to get locks to work, we need to fake a in_VM state
4751  JavaThread* thread = JavaThread::current();
4752  JavaThreadState saved_state = thread->thread_state();
4753  thread->set_thread_state(_thread_in_vm);
4754  if (ShowMessageBoxOnError) {
4755    JavaThread* thread = JavaThread::current();
4756    JavaThreadState saved_state = thread->thread_state();
4757    thread->set_thread_state(_thread_in_vm);
4758    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
4759      ttyLocker ttyl;
4760      BytecodeCounter::print();
4761    }
4762    // To see where a verify_oop failed, get $ebx+40/X for this frame.
4763    // This is the value of eip which points to where verify_oop will return.
4764    if (os::message_box(msg, "Execution stopped, print registers?")) {
4765      ttyLocker ttyl;
4766      tty->print_cr("eip = 0x%08x", eip);
4767#ifndef PRODUCT
4768      tty->cr();
4769      findpc(eip);
4770      tty->cr();
4771#endif
4772      tty->print_cr("rax, = 0x%08x", rax);
4773      tty->print_cr("rbx, = 0x%08x", rbx);
4774      tty->print_cr("rcx = 0x%08x", rcx);
4775      tty->print_cr("rdx = 0x%08x", rdx);
4776      tty->print_cr("rdi = 0x%08x", rdi);
4777      tty->print_cr("rsi = 0x%08x", rsi);
4778      tty->print_cr("rbp, = 0x%08x", rbp);
4779      tty->print_cr("rsp = 0x%08x", rsp);
4780      BREAKPOINT;
4781    }
4782  } else {
4783    ttyLocker ttyl;
4784    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
4785    assert(false, "DEBUG MESSAGE");
4786  }
4787  ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
4788}
4789
4790void MacroAssembler::stop(const char* msg) {
4791  ExternalAddress message((address)msg);
4792  // push address of message
4793  pushptr(message.addr());
4794  { Label L; call(L, relocInfo::none); bind(L); }     // push eip
4795  pusha();                                           // push registers
4796  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
4797  hlt();
4798}
4799
4800void MacroAssembler::warn(const char* msg) {
4801  push_CPU_state();
4802
4803  ExternalAddress message((address) msg);
4804  // push address of message
4805  pushptr(message.addr());
4806
4807  call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
4808  addl(rsp, wordSize);       // discard argument
4809  pop_CPU_state();
4810}
4811
4812#else // _LP64
4813
4814// 64 bit versions
4815
4816Address MacroAssembler::as_Address(AddressLiteral adr) {
4817  // amd64 always does this as a pc-rel
4818  // we can be absolute or disp based on the instruction type
4819  // jmp/call are displacements others are absolute
4820  assert(!adr.is_lval(), "must be rval");
4821  assert(reachable(adr), "must be");
4822  return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
4823
4824}
4825
4826Address MacroAssembler::as_Address(ArrayAddress adr) {
4827  AddressLiteral base = adr.base();
4828  lea(rscratch1, base);
4829  Address index = adr.index();
4830  assert(index._disp == 0, "must not have disp"); // maybe it can?
4831  Address array(rscratch1, index._index, index._scale, index._disp);
4832  return array;
4833}
4834
4835int MacroAssembler::biased_locking_enter(Register lock_reg,
4836                                         Register obj_reg,
4837                                         Register swap_reg,
4838                                         Register tmp_reg,
4839                                         bool swap_reg_contains_mark,
4840                                         Label& done,
4841                                         Label* slow_case,
4842                                         BiasedLockingCounters* counters) {
4843  assert(UseBiasedLocking, "why call this otherwise?");
4844  assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
4845  assert(tmp_reg != noreg, "tmp_reg must be supplied");
4846  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4847  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4848  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
4849  Address saved_mark_addr(lock_reg, 0);
4850
4851  if (PrintBiasedLockingStatistics && counters == NULL)
4852    counters = BiasedLocking::counters();
4853
4854  // Biased locking
4855  // See whether the lock is currently biased toward our thread and
4856  // whether the epoch is still valid
4857  // Note that the runtime guarantees sufficient alignment of JavaThread
4858  // pointers to allow age to be placed into low bits
4859  // First check to see whether biasing is even enabled for this object
4860  Label cas_label;
4861  int null_check_offset = -1;
4862  if (!swap_reg_contains_mark) {
4863    null_check_offset = offset();
4864    movq(swap_reg, mark_addr);
4865  }
4866  movq(tmp_reg, swap_reg);
4867  andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4868  cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
4869  jcc(Assembler::notEqual, cas_label);
4870  // The bias pattern is present in the object's header. Need to check
4871  // whether the bias owner and the epoch are both still current.
4872  load_prototype_header(tmp_reg, obj_reg);
4873  orq(tmp_reg, r15_thread);
4874  xorq(tmp_reg, swap_reg);
4875  andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
4876  if (counters != NULL) {
4877    cond_inc32(Assembler::zero,
4878               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
4879  }
4880  jcc(Assembler::equal, done);
4881
4882  Label try_revoke_bias;
4883  Label try_rebias;
4884
4885  // At this point we know that the header has the bias pattern and
4886  // that we are not the bias owner in the current epoch. We need to
4887  // figure out more details about the state of the header in order to
4888  // know what operations can be legally performed on the object's
4889  // header.
4890
4891  // If the low three bits in the xor result aren't clear, that means
4892  // the prototype header is no longer biased and we have to revoke
4893  // the bias on this object.
4894  testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4895  jcc(Assembler::notZero, try_revoke_bias);
4896
4897  // Biasing is still enabled for this data type. See whether the
4898  // epoch of the current bias is still valid, meaning that the epoch
4899  // bits of the mark word are equal to the epoch bits of the
4900  // prototype header. (Note that the prototype header's epoch bits
4901  // only change at a safepoint.) If not, attempt to rebias the object
4902  // toward the current thread. Note that we must be absolutely sure
4903  // that the current epoch is invalid in order to do this because
4904  // otherwise the manipulations it performs on the mark word are
4905  // illegal.
4906  testq(tmp_reg, markOopDesc::epoch_mask_in_place);
4907  jcc(Assembler::notZero, try_rebias);
4908
4909  // The epoch of the current bias is still valid but we know nothing
4910  // about the owner; it might be set or it might be clear. Try to
4911  // acquire the bias of the object using an atomic operation. If this
4912  // fails we will go in to the runtime to revoke the object's bias.
4913  // Note that we first construct the presumed unbiased header so we
4914  // don't accidentally blow away another thread's valid bias.
4915  andq(swap_reg,
4916       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
4917  movq(tmp_reg, swap_reg);
4918  orq(tmp_reg, r15_thread);
4919  if (os::is_MP()) {
4920    lock();
4921  }
4922  cmpxchgq(tmp_reg, Address(obj_reg, 0));
4923  // If the biasing toward our thread failed, this means that
4924  // another thread succeeded in biasing it toward itself and we
4925  // need to revoke that bias. The revocation will occur in the
4926  // interpreter runtime in the slow case.
4927  if (counters != NULL) {
4928    cond_inc32(Assembler::zero,
4929               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
4930  }
4931  if (slow_case != NULL) {
4932    jcc(Assembler::notZero, *slow_case);
4933  }
4934  jmp(done);
4935
4936  bind(try_rebias);
4937  // At this point we know the epoch has expired, meaning that the
4938  // current "bias owner", if any, is actually invalid. Under these
4939  // circumstances _only_, we are allowed to use the current header's
4940  // value as the comparison value when doing the cas to acquire the
4941  // bias in the current epoch. In other words, we allow transfer of
4942  // the bias from one thread to another directly in this situation.
4943  //
4944  // FIXME: due to a lack of registers we currently blow away the age
4945  // bits in this situation. Should attempt to preserve them.
4946  load_prototype_header(tmp_reg, obj_reg);
4947  orq(tmp_reg, r15_thread);
4948  if (os::is_MP()) {
4949    lock();
4950  }
4951  cmpxchgq(tmp_reg, Address(obj_reg, 0));
4952  // If the biasing toward our thread failed, then another thread
4953  // succeeded in biasing it toward itself and we need to revoke that
4954  // bias. The revocation will occur in the runtime in the slow case.
4955  if (counters != NULL) {
4956    cond_inc32(Assembler::zero,
4957               ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
4958  }
4959  if (slow_case != NULL) {
4960    jcc(Assembler::notZero, *slow_case);
4961  }
4962  jmp(done);
4963
4964  bind(try_revoke_bias);
4965  // The prototype mark in the klass doesn't have the bias bit set any
4966  // more, indicating that objects of this data type are not supposed
4967  // to be biased any more. We are going to try to reset the mark of
4968  // this object to the prototype value and fall through to the
4969  // CAS-based locking scheme. Note that if our CAS fails, it means
4970  // that another thread raced us for the privilege of revoking the
4971  // bias of this particular object, so it's okay to continue in the
4972  // normal locking code.
4973  //
4974  // FIXME: due to a lack of registers we currently blow away the age
4975  // bits in this situation. Should attempt to preserve them.
4976  load_prototype_header(tmp_reg, obj_reg);
4977  if (os::is_MP()) {
4978    lock();
4979  }
4980  cmpxchgq(tmp_reg, Address(obj_reg, 0));
4981  // Fall through to the normal CAS-based lock, because no matter what
4982  // the result of the above CAS, some thread must have succeeded in
4983  // removing the bias bit from the object's header.
4984  if (counters != NULL) {
4985    cond_inc32(Assembler::zero,
4986               ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
4987  }
4988
4989  bind(cas_label);
4990
4991  return null_check_offset;
4992}
4993
4994void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
4995  Label L, E;
4996
4997#ifdef _WIN64
4998  // Windows always allocates space for it's register args
4999  assert(num_args <= 4, "only register arguments supported");
5000  subq(rsp,  frame::arg_reg_save_area_bytes);
5001#endif
5002
5003  // Align stack if necessary
5004  testl(rsp, 15);
5005  jcc(Assembler::zero, L);
5006
5007  subq(rsp, 8);
5008  {
5009    call(RuntimeAddress(entry_point));
5010  }
5011  addq(rsp, 8);
5012  jmp(E);
5013
5014  bind(L);
5015  {
5016    call(RuntimeAddress(entry_point));
5017  }
5018
5019  bind(E);
5020
5021#ifdef _WIN64
5022  // restore stack pointer
5023  addq(rsp, frame::arg_reg_save_area_bytes);
5024#endif
5025
5026}
5027
5028void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
5029  assert(!src2.is_lval(), "should use cmpptr");
5030
5031  if (reachable(src2)) {
5032    cmpq(src1, as_Address(src2));
5033  } else {
5034    lea(rscratch1, src2);
5035    Assembler::cmpq(src1, Address(rscratch1, 0));
5036  }
5037}
5038
5039int MacroAssembler::corrected_idivq(Register reg) {
5040  // Full implementation of Java ldiv and lrem; checks for special
5041  // case as described in JVM spec., p.243 & p.271.  The function
5042  // returns the (pc) offset of the idivl instruction - may be needed
5043  // for implicit exceptions.
5044  //
5045  //         normal case                           special case
5046  //
5047  // input : rax: dividend                         min_long
5048  //         reg: divisor   (may not be eax/edx)   -1
5049  //
5050  // output: rax: quotient  (= rax idiv reg)       min_long
5051  //         rdx: remainder (= rax irem reg)       0
5052  assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
5053  static const int64_t min_long = 0x8000000000000000;
5054  Label normal_case, special_case;
5055
5056  // check for special case
5057  cmp64(rax, ExternalAddress((address) &min_long));
5058  jcc(Assembler::notEqual, normal_case);
5059  xorl(rdx, rdx); // prepare rdx for possible special case (where
5060                  // remainder = 0)
5061  cmpq(reg, -1);
5062  jcc(Assembler::equal, special_case);
5063
5064  // handle normal case
5065  bind(normal_case);
5066  cdqq();
5067  int idivq_offset = offset();
5068  idivq(reg);
5069
5070  // normal and special case exit
5071  bind(special_case);
5072
5073  return idivq_offset;
5074}
5075
5076void MacroAssembler::decrementq(Register reg, int value) {
5077  if (value == min_jint) { subq(reg, value); return; }
5078  if (value <  0) { incrementq(reg, -value); return; }
5079  if (value == 0) {                        ; return; }
5080  if (value == 1 && UseIncDec) { decq(reg) ; return; }
5081  /* else */      { subq(reg, value)       ; return; }
5082}
5083
5084void MacroAssembler::decrementq(Address dst, int value) {
5085  if (value == min_jint) { subq(dst, value); return; }
5086  if (value <  0) { incrementq(dst, -value); return; }
5087  if (value == 0) {                        ; return; }
5088  if (value == 1 && UseIncDec) { decq(dst) ; return; }
5089  /* else */      { subq(dst, value)       ; return; }
5090}
5091
5092void MacroAssembler::fat_nop() {
5093  // A 5 byte nop that is safe for patching (see patch_verified_entry)
5094  // Recommened sequence from 'Software Optimization Guide for the AMD
5095  // Hammer Processor'
5096  emit_byte(0x66);
5097  emit_byte(0x66);
5098  emit_byte(0x90);
5099  emit_byte(0x66);
5100  emit_byte(0x90);
5101}
5102
5103void MacroAssembler::incrementq(Register reg, int value) {
5104  if (value == min_jint) { addq(reg, value); return; }
5105  if (value <  0) { decrementq(reg, -value); return; }
5106  if (value == 0) {                        ; return; }
5107  if (value == 1 && UseIncDec) { incq(reg) ; return; }
5108  /* else */      { addq(reg, value)       ; return; }
5109}
5110
5111void MacroAssembler::incrementq(Address dst, int value) {
5112  if (value == min_jint) { addq(dst, value); return; }
5113  if (value <  0) { decrementq(dst, -value); return; }
5114  if (value == 0) {                        ; return; }
5115  if (value == 1 && UseIncDec) { incq(dst) ; return; }
5116  /* else */      { addq(dst, value)       ; return; }
5117}
5118
5119// 32bit can do a case table jump in one instruction but we no longer allow the base
5120// to be installed in the Address class
5121void MacroAssembler::jump(ArrayAddress entry) {
5122  lea(rscratch1, entry.base());
5123  Address dispatch = entry.index();
5124  assert(dispatch._base == noreg, "must be");
5125  dispatch._base = rscratch1;
5126  jmp(dispatch);
5127}
5128
5129void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
5130  ShouldNotReachHere(); // 64bit doesn't use two regs
5131  cmpq(x_lo, y_lo);
5132}
5133
5134void MacroAssembler::lea(Register dst, AddressLiteral src) {
5135    mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5136}
5137
5138void MacroAssembler::lea(Address dst, AddressLiteral adr) {
5139  mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
5140  movptr(dst, rscratch1);
5141}
5142
5143void MacroAssembler::leave() {
5144  // %%% is this really better? Why not on 32bit too?
5145  emit_byte(0xC9); // LEAVE
5146}
5147
5148void MacroAssembler::lneg(Register hi, Register lo) {
5149  ShouldNotReachHere(); // 64bit doesn't use two regs
5150  negq(lo);
5151}
5152
5153void MacroAssembler::movoop(Register dst, jobject obj) {
5154  mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5155}
5156
5157void MacroAssembler::movoop(Address dst, jobject obj) {
5158  mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5159  movq(dst, rscratch1);
5160}
5161
5162void MacroAssembler::movptr(Register dst, AddressLiteral src) {
5163  if (src.is_lval()) {
5164    mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5165  } else {
5166    if (reachable(src)) {
5167      movq(dst, as_Address(src));
5168    } else {
5169      lea(rscratch1, src);
5170      movq(dst, Address(rscratch1,0));
5171    }
5172  }
5173}
5174
5175void MacroAssembler::movptr(ArrayAddress dst, Register src) {
5176  movq(as_Address(dst), src);
5177}
5178
5179void MacroAssembler::movptr(Register dst, ArrayAddress src) {
5180  movq(dst, as_Address(src));
5181}
5182
5183// src should NEVER be a real pointer. Use AddressLiteral for true pointers
5184void MacroAssembler::movptr(Address dst, intptr_t src) {
5185  mov64(rscratch1, src);
5186  movq(dst, rscratch1);
5187}
5188
5189// These are mostly for initializing NULL
5190void MacroAssembler::movptr(Address dst, int32_t src) {
5191  movslq(dst, src);
5192}
5193
5194void MacroAssembler::movptr(Register dst, int32_t src) {
5195  mov64(dst, (intptr_t)src);
5196}
5197
5198void MacroAssembler::pushoop(jobject obj) {
5199  movoop(rscratch1, obj);
5200  push(rscratch1);
5201}
5202
5203void MacroAssembler::pushptr(AddressLiteral src) {
5204  lea(rscratch1, src);
5205  if (src.is_lval()) {
5206    push(rscratch1);
5207  } else {
5208    pushq(Address(rscratch1, 0));
5209  }
5210}
5211
5212void MacroAssembler::reset_last_Java_frame(bool clear_fp,
5213                                           bool clear_pc) {
5214  // we must set sp to zero to clear frame
5215  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
5216  // must clear fp, so that compiled frames are not confused; it is
5217  // possible that we need it only for debugging
5218  if (clear_fp) {
5219    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
5220  }
5221
5222  if (clear_pc) {
5223    movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
5224  }
5225}
5226
5227void MacroAssembler::set_last_Java_frame(Register last_java_sp,
5228                                         Register last_java_fp,
5229                                         address  last_java_pc) {
5230  // determine last_java_sp register
5231  if (!last_java_sp->is_valid()) {
5232    last_java_sp = rsp;
5233  }
5234
5235  // last_java_fp is optional
5236  if (last_java_fp->is_valid()) {
5237    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
5238           last_java_fp);
5239  }
5240
5241  // last_java_pc is optional
5242  if (last_java_pc != NULL) {
5243    Address java_pc(r15_thread,
5244                    JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
5245    lea(rscratch1, InternalAddress(last_java_pc));
5246    movptr(java_pc, rscratch1);
5247  }
5248
5249  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
5250}
5251
5252static void pass_arg0(MacroAssembler* masm, Register arg) {
5253  if (c_rarg0 != arg ) {
5254    masm->mov(c_rarg0, arg);
5255  }
5256}
5257
5258static void pass_arg1(MacroAssembler* masm, Register arg) {
5259  if (c_rarg1 != arg ) {
5260    masm->mov(c_rarg1, arg);
5261  }
5262}
5263
5264static void pass_arg2(MacroAssembler* masm, Register arg) {
5265  if (c_rarg2 != arg ) {
5266    masm->mov(c_rarg2, arg);
5267  }
5268}
5269
5270static void pass_arg3(MacroAssembler* masm, Register arg) {
5271  if (c_rarg3 != arg ) {
5272    masm->mov(c_rarg3, arg);
5273  }
5274}
5275
5276void MacroAssembler::stop(const char* msg) {
5277  address rip = pc();
5278  pusha(); // get regs on stack
5279  lea(c_rarg0, ExternalAddress((address) msg));
5280  lea(c_rarg1, InternalAddress(rip));
5281  movq(c_rarg2, rsp); // pass pointer to regs array
5282  andq(rsp, -16); // align stack as required by ABI
5283  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
5284  hlt();
5285}
5286
5287void MacroAssembler::warn(const char* msg) {
5288  push(r12);
5289  movq(r12, rsp);
5290  andq(rsp, -16);     // align stack as required by push_CPU_state and call
5291
5292  push_CPU_state();   // keeps alignment at 16 bytes
5293  lea(c_rarg0, ExternalAddress((address) msg));
5294  call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
5295  pop_CPU_state();
5296
5297  movq(rsp, r12);
5298  pop(r12);
5299}
5300
5301#ifndef PRODUCT
5302extern "C" void findpc(intptr_t x);
5303#endif
5304
5305void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
5306  // In order to get locks to work, we need to fake a in_VM state
5307  if (ShowMessageBoxOnError ) {
5308    JavaThread* thread = JavaThread::current();
5309    JavaThreadState saved_state = thread->thread_state();
5310    thread->set_thread_state(_thread_in_vm);
5311#ifndef PRODUCT
5312    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
5313      ttyLocker ttyl;
5314      BytecodeCounter::print();
5315    }
5316#endif
5317    // To see where a verify_oop failed, get $ebx+40/X for this frame.
5318    // XXX correct this offset for amd64
5319    // This is the value of eip which points to where verify_oop will return.
5320    if (os::message_box(msg, "Execution stopped, print registers?")) {
5321      ttyLocker ttyl;
5322      tty->print_cr("rip = 0x%016lx", pc);
5323#ifndef PRODUCT
5324      tty->cr();
5325      findpc(pc);
5326      tty->cr();
5327#endif
5328      tty->print_cr("rax = 0x%016lx", regs[15]);
5329      tty->print_cr("rbx = 0x%016lx", regs[12]);
5330      tty->print_cr("rcx = 0x%016lx", regs[14]);
5331      tty->print_cr("rdx = 0x%016lx", regs[13]);
5332      tty->print_cr("rdi = 0x%016lx", regs[8]);
5333      tty->print_cr("rsi = 0x%016lx", regs[9]);
5334      tty->print_cr("rbp = 0x%016lx", regs[10]);
5335      tty->print_cr("rsp = 0x%016lx", regs[11]);
5336      tty->print_cr("r8  = 0x%016lx", regs[7]);
5337      tty->print_cr("r9  = 0x%016lx", regs[6]);
5338      tty->print_cr("r10 = 0x%016lx", regs[5]);
5339      tty->print_cr("r11 = 0x%016lx", regs[4]);
5340      tty->print_cr("r12 = 0x%016lx", regs[3]);
5341      tty->print_cr("r13 = 0x%016lx", regs[2]);
5342      tty->print_cr("r14 = 0x%016lx", regs[1]);
5343      tty->print_cr("r15 = 0x%016lx", regs[0]);
5344      BREAKPOINT;
5345    }
5346    ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5347  } else {
5348    ttyLocker ttyl;
5349    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
5350                    msg);
5351  }
5352}
5353
5354#endif // _LP64
5355
5356// Now versions that are common to 32/64 bit
5357
5358void MacroAssembler::addptr(Register dst, int32_t imm32) {
5359  LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
5360}
5361
5362void MacroAssembler::addptr(Register dst, Register src) {
5363  LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5364}
5365
5366void MacroAssembler::addptr(Address dst, Register src) {
5367  LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5368}
5369
5370void MacroAssembler::align(int modulus) {
5371  if (offset() % modulus != 0) {
5372    nop(modulus - (offset() % modulus));
5373  }
5374}
5375
5376void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
5377  andpd(dst, as_Address(src));
5378}
5379
5380void MacroAssembler::andptr(Register dst, int32_t imm32) {
5381  LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
5382}
5383
5384void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
5385  pushf();
5386  if (os::is_MP())
5387    lock();
5388  incrementl(counter_addr);
5389  popf();
5390}
5391
5392// Writes to stack successive pages until offset reached to check for
5393// stack overflow + shadow pages.  This clobbers tmp.
5394void MacroAssembler::bang_stack_size(Register size, Register tmp) {
5395  movptr(tmp, rsp);
5396  // Bang stack for total size given plus shadow page size.
5397  // Bang one page at a time because large size can bang beyond yellow and
5398  // red zones.
5399  Label loop;
5400  bind(loop);
5401  movl(Address(tmp, (-os::vm_page_size())), size );
5402  subptr(tmp, os::vm_page_size());
5403  subl(size, os::vm_page_size());
5404  jcc(Assembler::greater, loop);
5405
5406  // Bang down shadow pages too.
5407  // The -1 because we already subtracted 1 page.
5408  for (int i = 0; i< StackShadowPages-1; i++) {
5409    // this could be any sized move but this is can be a debugging crumb
5410    // so the bigger the better.
5411    movptr(Address(tmp, (-i*os::vm_page_size())), size );
5412  }
5413}
5414
5415void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
5416  assert(UseBiasedLocking, "why call this otherwise?");
5417
5418  // Check for biased locking unlock case, which is a no-op
5419  // Note: we do not have to check the thread ID for two reasons.
5420  // First, the interpreter checks for IllegalMonitorStateException at
5421  // a higher level. Second, if the bias was revoked while we held the
5422  // lock, the object could not be rebiased toward another thread, so
5423  // the bias bit would be clear.
5424  movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
5425  andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
5426  cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
5427  jcc(Assembler::equal, done);
5428}
5429
5430void MacroAssembler::c2bool(Register x) {
5431  // implements x == 0 ? 0 : 1
5432  // note: must only look at least-significant byte of x
5433  //       since C-style booleans are stored in one byte
5434  //       only! (was bug)
5435  andl(x, 0xFF);
5436  setb(Assembler::notZero, x);
5437}
5438
5439// Wouldn't need if AddressLiteral version had new name
5440void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
5441  Assembler::call(L, rtype);
5442}
5443
5444void MacroAssembler::call(Register entry) {
5445  Assembler::call(entry);
5446}
5447
5448void MacroAssembler::call(AddressLiteral entry) {
5449  if (reachable(entry)) {
5450    Assembler::call_literal(entry.target(), entry.rspec());
5451  } else {
5452    lea(rscratch1, entry);
5453    Assembler::call(rscratch1);
5454  }
5455}
5456
5457// Implementation of call_VM versions
5458
5459void MacroAssembler::call_VM(Register oop_result,
5460                             address entry_point,
5461                             bool check_exceptions) {
5462  Label C, E;
5463  call(C, relocInfo::none);
5464  jmp(E);
5465
5466  bind(C);
5467  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
5468  ret(0);
5469
5470  bind(E);
5471}
5472
5473void MacroAssembler::call_VM(Register oop_result,
5474                             address entry_point,
5475                             Register arg_1,
5476                             bool check_exceptions) {
5477  Label C, E;
5478  call(C, relocInfo::none);
5479  jmp(E);
5480
5481  bind(C);
5482  pass_arg1(this, arg_1);
5483  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
5484  ret(0);
5485
5486  bind(E);
5487}
5488
5489void MacroAssembler::call_VM(Register oop_result,
5490                             address entry_point,
5491                             Register arg_1,
5492                             Register arg_2,
5493                             bool check_exceptions) {
5494  Label C, E;
5495  call(C, relocInfo::none);
5496  jmp(E);
5497
5498  bind(C);
5499
5500  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5501
5502  pass_arg2(this, arg_2);
5503  pass_arg1(this, arg_1);
5504  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
5505  ret(0);
5506
5507  bind(E);
5508}
5509
5510void MacroAssembler::call_VM(Register oop_result,
5511                             address entry_point,
5512                             Register arg_1,
5513                             Register arg_2,
5514                             Register arg_3,
5515                             bool check_exceptions) {
5516  Label C, E;
5517  call(C, relocInfo::none);
5518  jmp(E);
5519
5520  bind(C);
5521
5522  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5523  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5524  pass_arg3(this, arg_3);
5525
5526  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5527  pass_arg2(this, arg_2);
5528
5529  pass_arg1(this, arg_1);
5530  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
5531  ret(0);
5532
5533  bind(E);
5534}
5535
5536void MacroAssembler::call_VM(Register oop_result,
5537                             Register last_java_sp,
5538                             address entry_point,
5539                             int number_of_arguments,
5540                             bool check_exceptions) {
5541  Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
5542  call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
5543}
5544
5545void MacroAssembler::call_VM(Register oop_result,
5546                             Register last_java_sp,
5547                             address entry_point,
5548                             Register arg_1,
5549                             bool check_exceptions) {
5550  pass_arg1(this, arg_1);
5551  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
5552}
5553
5554void MacroAssembler::call_VM(Register oop_result,
5555                             Register last_java_sp,
5556                             address entry_point,
5557                             Register arg_1,
5558                             Register arg_2,
5559                             bool check_exceptions) {
5560
5561  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5562  pass_arg2(this, arg_2);
5563  pass_arg1(this, arg_1);
5564  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
5565}
5566
5567void MacroAssembler::call_VM(Register oop_result,
5568                             Register last_java_sp,
5569                             address entry_point,
5570                             Register arg_1,
5571                             Register arg_2,
5572                             Register arg_3,
5573                             bool check_exceptions) {
5574  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5575  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5576  pass_arg3(this, arg_3);
5577  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5578  pass_arg2(this, arg_2);
5579  pass_arg1(this, arg_1);
5580  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
5581}
5582
5583void MacroAssembler::call_VM_base(Register oop_result,
5584                                  Register java_thread,
5585                                  Register last_java_sp,
5586                                  address  entry_point,
5587                                  int      number_of_arguments,
5588                                  bool     check_exceptions) {
5589  // determine java_thread register
5590  if (!java_thread->is_valid()) {
5591#ifdef _LP64
5592    java_thread = r15_thread;
5593#else
5594    java_thread = rdi;
5595    get_thread(java_thread);
5596#endif // LP64
5597  }
5598  // determine last_java_sp register
5599  if (!last_java_sp->is_valid()) {
5600    last_java_sp = rsp;
5601  }
5602  // debugging support
5603  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
5604  LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
5605  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
5606  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
5607
5608  // push java thread (becomes first argument of C function)
5609
5610  NOT_LP64(push(java_thread); number_of_arguments++);
5611  LP64_ONLY(mov(c_rarg0, r15_thread));
5612
5613  // set last Java frame before call
5614  assert(last_java_sp != rbp, "can't use ebp/rbp");
5615
5616  // Only interpreter should have to set fp
5617  set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
5618
5619  // do the call, remove parameters
5620  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
5621
5622  // restore the thread (cannot use the pushed argument since arguments
5623  // may be overwritten by C code generated by an optimizing compiler);
5624  // however can use the register value directly if it is callee saved.
5625  if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
5626    // rdi & rsi (also r15) are callee saved -> nothing to do
5627#ifdef ASSERT
5628    guarantee(java_thread != rax, "change this code");
5629    push(rax);
5630    { Label L;
5631      get_thread(rax);
5632      cmpptr(java_thread, rax);
5633      jcc(Assembler::equal, L);
5634      stop("MacroAssembler::call_VM_base: rdi not callee saved?");
5635      bind(L);
5636    }
5637    pop(rax);
5638#endif
5639  } else {
5640    get_thread(java_thread);
5641  }
5642  // reset last Java frame
5643  // Only interpreter should have to clear fp
5644  reset_last_Java_frame(java_thread, true, false);
5645
5646#ifndef CC_INTERP
5647   // C++ interp handles this in the interpreter
5648  check_and_handle_popframe(java_thread);
5649  check_and_handle_earlyret(java_thread);
5650#endif /* CC_INTERP */
5651
5652  if (check_exceptions) {
5653    // check for pending exceptions (java_thread is set upon return)
5654    cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
5655#ifndef _LP64
5656    jump_cc(Assembler::notEqual,
5657            RuntimeAddress(StubRoutines::forward_exception_entry()));
5658#else
5659    // This used to conditionally jump to forward_exception however it is
5660    // possible if we relocate that the branch will not reach. So we must jump
5661    // around so we can always reach
5662
5663    Label ok;
5664    jcc(Assembler::equal, ok);
5665    jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
5666    bind(ok);
5667#endif // LP64
5668  }
5669
5670  // get oop result if there is one and reset the value in the thread
5671  if (oop_result->is_valid()) {
5672    movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
5673    movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
5674    verify_oop(oop_result, "broken oop in call_VM_base");
5675  }
5676}
5677
5678void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
5679
5680  // Calculate the value for last_Java_sp
5681  // somewhat subtle. call_VM does an intermediate call
5682  // which places a return address on the stack just under the
5683  // stack pointer as the user finsihed with it. This allows
5684  // use to retrieve last_Java_pc from last_Java_sp[-1].
5685  // On 32bit we then have to push additional args on the stack to accomplish
5686  // the actual requested call. On 64bit call_VM only can use register args
5687  // so the only extra space is the return address that call_VM created.
5688  // This hopefully explains the calculations here.
5689
5690#ifdef _LP64
5691  // We've pushed one address, correct last_Java_sp
5692  lea(rax, Address(rsp, wordSize));
5693#else
5694  lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
5695#endif // LP64
5696
5697  call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
5698
5699}
5700
5701void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
5702  call_VM_leaf_base(entry_point, number_of_arguments);
5703}
5704
5705void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
5706  pass_arg0(this, arg_0);
5707  call_VM_leaf(entry_point, 1);
5708}
5709
5710void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
5711
5712  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
5713  pass_arg1(this, arg_1);
5714  pass_arg0(this, arg_0);
5715  call_VM_leaf(entry_point, 2);
5716}
5717
5718void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
5719  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
5720  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5721  pass_arg2(this, arg_2);
5722  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
5723  pass_arg1(this, arg_1);
5724  pass_arg0(this, arg_0);
5725  call_VM_leaf(entry_point, 3);
5726}
5727
5728void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
5729}
5730
5731void MacroAssembler::check_and_handle_popframe(Register java_thread) {
5732}
5733
5734void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
5735  if (reachable(src1)) {
5736    cmpl(as_Address(src1), imm);
5737  } else {
5738    lea(rscratch1, src1);
5739    cmpl(Address(rscratch1, 0), imm);
5740  }
5741}
5742
5743void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
5744  assert(!src2.is_lval(), "use cmpptr");
5745  if (reachable(src2)) {
5746    cmpl(src1, as_Address(src2));
5747  } else {
5748    lea(rscratch1, src2);
5749    cmpl(src1, Address(rscratch1, 0));
5750  }
5751}
5752
5753void MacroAssembler::cmp32(Register src1, int32_t imm) {
5754  Assembler::cmpl(src1, imm);
5755}
5756
5757void MacroAssembler::cmp32(Register src1, Address src2) {
5758  Assembler::cmpl(src1, src2);
5759}
5760
5761void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
5762  ucomisd(opr1, opr2);
5763
5764  Label L;
5765  if (unordered_is_less) {
5766    movl(dst, -1);
5767    jcc(Assembler::parity, L);
5768    jcc(Assembler::below , L);
5769    movl(dst, 0);
5770    jcc(Assembler::equal , L);
5771    increment(dst);
5772  } else { // unordered is greater
5773    movl(dst, 1);
5774    jcc(Assembler::parity, L);
5775    jcc(Assembler::above , L);
5776    movl(dst, 0);
5777    jcc(Assembler::equal , L);
5778    decrementl(dst);
5779  }
5780  bind(L);
5781}
5782
5783void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
5784  ucomiss(opr1, opr2);
5785
5786  Label L;
5787  if (unordered_is_less) {
5788    movl(dst, -1);
5789    jcc(Assembler::parity, L);
5790    jcc(Assembler::below , L);
5791    movl(dst, 0);
5792    jcc(Assembler::equal , L);
5793    increment(dst);
5794  } else { // unordered is greater
5795    movl(dst, 1);
5796    jcc(Assembler::parity, L);
5797    jcc(Assembler::above , L);
5798    movl(dst, 0);
5799    jcc(Assembler::equal , L);
5800    decrementl(dst);
5801  }
5802  bind(L);
5803}
5804
5805
5806void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
5807  if (reachable(src1)) {
5808    cmpb(as_Address(src1), imm);
5809  } else {
5810    lea(rscratch1, src1);
5811    cmpb(Address(rscratch1, 0), imm);
5812  }
5813}
5814
5815void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
5816#ifdef _LP64
5817  if (src2.is_lval()) {
5818    movptr(rscratch1, src2);
5819    Assembler::cmpq(src1, rscratch1);
5820  } else if (reachable(src2)) {
5821    cmpq(src1, as_Address(src2));
5822  } else {
5823    lea(rscratch1, src2);
5824    Assembler::cmpq(src1, Address(rscratch1, 0));
5825  }
5826#else
5827  if (src2.is_lval()) {
5828    cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
5829  } else {
5830    cmpl(src1, as_Address(src2));
5831  }
5832#endif // _LP64
5833}
5834
5835void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
5836  assert(src2.is_lval(), "not a mem-mem compare");
5837#ifdef _LP64
5838  // moves src2's literal address
5839  movptr(rscratch1, src2);
5840  Assembler::cmpq(src1, rscratch1);
5841#else
5842  cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
5843#endif // _LP64
5844}
5845
5846void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
5847  if (reachable(adr)) {
5848    if (os::is_MP())
5849      lock();
5850    cmpxchgptr(reg, as_Address(adr));
5851  } else {
5852    lea(rscratch1, adr);
5853    if (os::is_MP())
5854      lock();
5855    cmpxchgptr(reg, Address(rscratch1, 0));
5856  }
5857}
5858
5859void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
5860  LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
5861}
5862
5863void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
5864  comisd(dst, as_Address(src));
5865}
5866
5867void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
5868  comiss(dst, as_Address(src));
5869}
5870
5871
5872void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
5873  Condition negated_cond = negate_condition(cond);
5874  Label L;
5875  jcc(negated_cond, L);
5876  atomic_incl(counter_addr);
5877  bind(L);
5878}
5879
5880int MacroAssembler::corrected_idivl(Register reg) {
5881  // Full implementation of Java idiv and irem; checks for
5882  // special case as described in JVM spec., p.243 & p.271.
5883  // The function returns the (pc) offset of the idivl
5884  // instruction - may be needed for implicit exceptions.
5885  //
5886  //         normal case                           special case
5887  //
5888  // input : rax,: dividend                         min_int
5889  //         reg: divisor   (may not be rax,/rdx)   -1
5890  //
5891  // output: rax,: quotient  (= rax, idiv reg)       min_int
5892  //         rdx: remainder (= rax, irem reg)       0
5893  assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
5894  const int min_int = 0x80000000;
5895  Label normal_case, special_case;
5896
5897  // check for special case
5898  cmpl(rax, min_int);
5899  jcc(Assembler::notEqual, normal_case);
5900  xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
5901  cmpl(reg, -1);
5902  jcc(Assembler::equal, special_case);
5903
5904  // handle normal case
5905  bind(normal_case);
5906  cdql();
5907  int idivl_offset = offset();
5908  idivl(reg);
5909
5910  // normal and special case exit
5911  bind(special_case);
5912
5913  return idivl_offset;
5914}
5915
5916
5917
5918void MacroAssembler::decrementl(Register reg, int value) {
5919  if (value == min_jint) {subl(reg, value) ; return; }
5920  if (value <  0) { incrementl(reg, -value); return; }
5921  if (value == 0) {                        ; return; }
5922  if (value == 1 && UseIncDec) { decl(reg) ; return; }
5923  /* else */      { subl(reg, value)       ; return; }
5924}
5925
5926void MacroAssembler::decrementl(Address dst, int value) {
5927  if (value == min_jint) {subl(dst, value) ; return; }
5928  if (value <  0) { incrementl(dst, -value); return; }
5929  if (value == 0) {                        ; return; }
5930  if (value == 1 && UseIncDec) { decl(dst) ; return; }
5931  /* else */      { subl(dst, value)       ; return; }
5932}
5933
5934void MacroAssembler::division_with_shift (Register reg, int shift_value) {
5935  assert (shift_value > 0, "illegal shift value");
5936  Label _is_positive;
5937  testl (reg, reg);
5938  jcc (Assembler::positive, _is_positive);
5939  int offset = (1 << shift_value) - 1 ;
5940
5941  if (offset == 1) {
5942    incrementl(reg);
5943  } else {
5944    addl(reg, offset);
5945  }
5946
5947  bind (_is_positive);
5948  sarl(reg, shift_value);
5949}
5950
5951// !defined(COMPILER2) is because of stupid core builds
5952#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
5953void MacroAssembler::empty_FPU_stack() {
5954  if (VM_Version::supports_mmx()) {
5955    emms();
5956  } else {
5957    for (int i = 8; i-- > 0; ) ffree(i);
5958  }
5959}
5960#endif // !LP64 || C1 || !C2
5961
5962
5963// Defines obj, preserves var_size_in_bytes
5964void MacroAssembler::eden_allocate(Register obj,
5965                                   Register var_size_in_bytes,
5966                                   int con_size_in_bytes,
5967                                   Register t1,
5968                                   Label& slow_case) {
5969  assert(obj == rax, "obj must be in rax, for cmpxchg");
5970  assert_different_registers(obj, var_size_in_bytes, t1);
5971  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
5972    jmp(slow_case);
5973  } else {
5974    Register end = t1;
5975    Label retry;
5976    bind(retry);
5977    ExternalAddress heap_top((address) Universe::heap()->top_addr());
5978    movptr(obj, heap_top);
5979    if (var_size_in_bytes == noreg) {
5980      lea(end, Address(obj, con_size_in_bytes));
5981    } else {
5982      lea(end, Address(obj, var_size_in_bytes, Address::times_1));
5983    }
5984    // if end < obj then we wrapped around => object too long => slow case
5985    cmpptr(end, obj);
5986    jcc(Assembler::below, slow_case);
5987    cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
5988    jcc(Assembler::above, slow_case);
5989    // Compare obj with the top addr, and if still equal, store the new top addr in
5990    // end at the address of the top addr pointer. Sets ZF if was equal, and clears
5991    // it otherwise. Use lock prefix for atomicity on MPs.
5992    locked_cmpxchgptr(end, heap_top);
5993    jcc(Assembler::notEqual, retry);
5994  }
5995}
5996
5997void MacroAssembler::enter() {
5998  push(rbp);
5999  mov(rbp, rsp);
6000}
6001
6002void MacroAssembler::fcmp(Register tmp) {
6003  fcmp(tmp, 1, true, true);
6004}
6005
6006void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
6007  assert(!pop_right || pop_left, "usage error");
6008  if (VM_Version::supports_cmov()) {
6009    assert(tmp == noreg, "unneeded temp");
6010    if (pop_left) {
6011      fucomip(index);
6012    } else {
6013      fucomi(index);
6014    }
6015    if (pop_right) {
6016      fpop();
6017    }
6018  } else {
6019    assert(tmp != noreg, "need temp");
6020    if (pop_left) {
6021      if (pop_right) {
6022        fcompp();
6023      } else {
6024        fcomp(index);
6025      }
6026    } else {
6027      fcom(index);
6028    }
6029    // convert FPU condition into eflags condition via rax,
6030    save_rax(tmp);
6031    fwait(); fnstsw_ax();
6032    sahf();
6033    restore_rax(tmp);
6034  }
6035  // condition codes set as follows:
6036  //
6037  // CF (corresponds to C0) if x < y
6038  // PF (corresponds to C2) if unordered
6039  // ZF (corresponds to C3) if x = y
6040}
6041
6042void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
6043  fcmp2int(dst, unordered_is_less, 1, true, true);
6044}
6045
6046void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
6047  fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
6048  Label L;
6049  if (unordered_is_less) {
6050    movl(dst, -1);
6051    jcc(Assembler::parity, L);
6052    jcc(Assembler::below , L);
6053    movl(dst, 0);
6054    jcc(Assembler::equal , L);
6055    increment(dst);
6056  } else { // unordered is greater
6057    movl(dst, 1);
6058    jcc(Assembler::parity, L);
6059    jcc(Assembler::above , L);
6060    movl(dst, 0);
6061    jcc(Assembler::equal , L);
6062    decrementl(dst);
6063  }
6064  bind(L);
6065}
6066
6067void MacroAssembler::fld_d(AddressLiteral src) {
6068  fld_d(as_Address(src));
6069}
6070
6071void MacroAssembler::fld_s(AddressLiteral src) {
6072  fld_s(as_Address(src));
6073}
6074
6075void MacroAssembler::fld_x(AddressLiteral src) {
6076  Assembler::fld_x(as_Address(src));
6077}
6078
6079void MacroAssembler::fldcw(AddressLiteral src) {
6080  Assembler::fldcw(as_Address(src));
6081}
6082
6083void MacroAssembler::fpop() {
6084  ffree();
6085  fincstp();
6086}
6087
6088void MacroAssembler::fremr(Register tmp) {
6089  save_rax(tmp);
6090  { Label L;
6091    bind(L);
6092    fprem();
6093    fwait(); fnstsw_ax();
6094#ifdef _LP64
6095    testl(rax, 0x400);
6096    jcc(Assembler::notEqual, L);
6097#else
6098    sahf();
6099    jcc(Assembler::parity, L);
6100#endif // _LP64
6101  }
6102  restore_rax(tmp);
6103  // Result is in ST0.
6104  // Note: fxch & fpop to get rid of ST1
6105  // (otherwise FPU stack could overflow eventually)
6106  fxch(1);
6107  fpop();
6108}
6109
6110
6111void MacroAssembler::incrementl(AddressLiteral dst) {
6112  if (reachable(dst)) {
6113    incrementl(as_Address(dst));
6114  } else {
6115    lea(rscratch1, dst);
6116    incrementl(Address(rscratch1, 0));
6117  }
6118}
6119
6120void MacroAssembler::incrementl(ArrayAddress dst) {
6121  incrementl(as_Address(dst));
6122}
6123
6124void MacroAssembler::incrementl(Register reg, int value) {
6125  if (value == min_jint) {addl(reg, value) ; return; }
6126  if (value <  0) { decrementl(reg, -value); return; }
6127  if (value == 0) {                        ; return; }
6128  if (value == 1 && UseIncDec) { incl(reg) ; return; }
6129  /* else */      { addl(reg, value)       ; return; }
6130}
6131
6132void MacroAssembler::incrementl(Address dst, int value) {
6133  if (value == min_jint) {addl(dst, value) ; return; }
6134  if (value <  0) { decrementl(dst, -value); return; }
6135  if (value == 0) {                        ; return; }
6136  if (value == 1 && UseIncDec) { incl(dst) ; return; }
6137  /* else */      { addl(dst, value)       ; return; }
6138}
6139
6140void MacroAssembler::jump(AddressLiteral dst) {
6141  if (reachable(dst)) {
6142    jmp_literal(dst.target(), dst.rspec());
6143  } else {
6144    lea(rscratch1, dst);
6145    jmp(rscratch1);
6146  }
6147}
6148
6149void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
6150  if (reachable(dst)) {
6151    InstructionMark im(this);
6152    relocate(dst.reloc());
6153    const int short_size = 2;
6154    const int long_size = 6;
6155    int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
6156    if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
6157      // 0111 tttn #8-bit disp
6158      emit_byte(0x70 | cc);
6159      emit_byte((offs - short_size) & 0xFF);
6160    } else {
6161      // 0000 1111 1000 tttn #32-bit disp
6162      emit_byte(0x0F);
6163      emit_byte(0x80 | cc);
6164      emit_long(offs - long_size);
6165    }
6166  } else {
6167#ifdef ASSERT
6168    warning("reversing conditional branch");
6169#endif /* ASSERT */
6170    Label skip;
6171    jccb(reverse[cc], skip);
6172    lea(rscratch1, dst);
6173    Assembler::jmp(rscratch1);
6174    bind(skip);
6175  }
6176}
6177
6178void MacroAssembler::ldmxcsr(AddressLiteral src) {
6179  if (reachable(src)) {
6180    Assembler::ldmxcsr(as_Address(src));
6181  } else {
6182    lea(rscratch1, src);
6183    Assembler::ldmxcsr(Address(rscratch1, 0));
6184  }
6185}
6186
6187int MacroAssembler::load_signed_byte(Register dst, Address src) {
6188  int off;
6189  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6190    off = offset();
6191    movsbl(dst, src); // movsxb
6192  } else {
6193    off = load_unsigned_byte(dst, src);
6194    shll(dst, 24);
6195    sarl(dst, 24);
6196  }
6197  return off;
6198}
6199
6200// word => int32 which seems bad for 64bit
6201int MacroAssembler::load_signed_word(Register dst, Address src) {
6202  int off;
6203  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6204    // This is dubious to me since it seems safe to do a signed 16 => 64 bit
6205    // version but this is what 64bit has always done. This seems to imply
6206    // that users are only using 32bits worth.
6207    off = offset();
6208    movswl(dst, src); // movsxw
6209  } else {
6210    off = load_unsigned_word(dst, src);
6211    shll(dst, 16);
6212    sarl(dst, 16);
6213  }
6214  return off;
6215}
6216
6217int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
6218  // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6219  // and "3.9 Partial Register Penalties", p. 22).
6220  int off;
6221  if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
6222    off = offset();
6223    movzbl(dst, src); // movzxb
6224  } else {
6225    xorl(dst, dst);
6226    off = offset();
6227    movb(dst, src);
6228  }
6229  return off;
6230}
6231
6232int MacroAssembler::load_unsigned_word(Register dst, Address src) {
6233  // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6234  // and "3.9 Partial Register Penalties", p. 22).
6235  int off;
6236  if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
6237    off = offset();
6238    movzwl(dst, src); // movzxw
6239  } else {
6240    xorl(dst, dst);
6241    off = offset();
6242    movw(dst, src);
6243  }
6244  return off;
6245}
6246
6247void MacroAssembler::mov32(AddressLiteral dst, Register src) {
6248  if (reachable(dst)) {
6249    movl(as_Address(dst), src);
6250  } else {
6251    lea(rscratch1, dst);
6252    movl(Address(rscratch1, 0), src);
6253  }
6254}
6255
6256void MacroAssembler::mov32(Register dst, AddressLiteral src) {
6257  if (reachable(src)) {
6258    movl(dst, as_Address(src));
6259  } else {
6260    lea(rscratch1, src);
6261    movl(dst, Address(rscratch1, 0));
6262  }
6263}
6264
6265// C++ bool manipulation
6266
6267void MacroAssembler::movbool(Register dst, Address src) {
6268  if(sizeof(bool) == 1)
6269    movb(dst, src);
6270  else if(sizeof(bool) == 2)
6271    movw(dst, src);
6272  else if(sizeof(bool) == 4)
6273    movl(dst, src);
6274  else
6275    // unsupported
6276    ShouldNotReachHere();
6277}
6278
6279void MacroAssembler::movbool(Address dst, bool boolconst) {
6280  if(sizeof(bool) == 1)
6281    movb(dst, (int) boolconst);
6282  else if(sizeof(bool) == 2)
6283    movw(dst, (int) boolconst);
6284  else if(sizeof(bool) == 4)
6285    movl(dst, (int) boolconst);
6286  else
6287    // unsupported
6288    ShouldNotReachHere();
6289}
6290
6291void MacroAssembler::movbool(Address dst, Register src) {
6292  if(sizeof(bool) == 1)
6293    movb(dst, src);
6294  else if(sizeof(bool) == 2)
6295    movw(dst, src);
6296  else if(sizeof(bool) == 4)
6297    movl(dst, src);
6298  else
6299    // unsupported
6300    ShouldNotReachHere();
6301}
6302
6303void MacroAssembler::movbyte(ArrayAddress dst, int src) {
6304  movb(as_Address(dst), src);
6305}
6306
6307void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
6308  if (reachable(src)) {
6309    if (UseXmmLoadAndClearUpper) {
6310      movsd (dst, as_Address(src));
6311    } else {
6312      movlpd(dst, as_Address(src));
6313    }
6314  } else {
6315    lea(rscratch1, src);
6316    if (UseXmmLoadAndClearUpper) {
6317      movsd (dst, Address(rscratch1, 0));
6318    } else {
6319      movlpd(dst, Address(rscratch1, 0));
6320    }
6321  }
6322}
6323
6324void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
6325  if (reachable(src)) {
6326    movss(dst, as_Address(src));
6327  } else {
6328    lea(rscratch1, src);
6329    movss(dst, Address(rscratch1, 0));
6330  }
6331}
6332
6333void MacroAssembler::movptr(Register dst, Register src) {
6334  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6335}
6336
6337void MacroAssembler::movptr(Register dst, Address src) {
6338  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6339}
6340
6341// src should NEVER be a real pointer. Use AddressLiteral for true pointers
6342void MacroAssembler::movptr(Register dst, intptr_t src) {
6343  LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
6344}
6345
6346void MacroAssembler::movptr(Address dst, Register src) {
6347  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6348}
6349
6350void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
6351  if (reachable(src)) {
6352    movss(dst, as_Address(src));
6353  } else {
6354    lea(rscratch1, src);
6355    movss(dst, Address(rscratch1, 0));
6356  }
6357}
6358
6359void MacroAssembler::null_check(Register reg, int offset) {
6360  if (needs_explicit_null_check(offset)) {
6361    // provoke OS NULL exception if reg = NULL by
6362    // accessing M[reg] w/o changing any (non-CC) registers
6363    // NOTE: cmpl is plenty here to provoke a segv
6364    cmpptr(rax, Address(reg, 0));
6365    // Note: should probably use testl(rax, Address(reg, 0));
6366    //       may be shorter code (however, this version of
6367    //       testl needs to be implemented first)
6368  } else {
6369    // nothing to do, (later) access of M[reg + offset]
6370    // will provoke OS NULL exception if reg = NULL
6371  }
6372}
6373
6374void MacroAssembler::os_breakpoint() {
6375  // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
6376  // (e.g., MSVC can't call ps() otherwise)
6377  call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
6378}
6379
6380void MacroAssembler::pop_CPU_state() {
6381  pop_FPU_state();
6382  pop_IU_state();
6383}
6384
6385void MacroAssembler::pop_FPU_state() {
6386  NOT_LP64(frstor(Address(rsp, 0));)
6387  LP64_ONLY(fxrstor(Address(rsp, 0));)
6388  addptr(rsp, FPUStateSizeInWords * wordSize);
6389}
6390
6391void MacroAssembler::pop_IU_state() {
6392  popa();
6393  LP64_ONLY(addq(rsp, 8));
6394  popf();
6395}
6396
6397// Save Integer and Float state
6398// Warning: Stack must be 16 byte aligned (64bit)
6399void MacroAssembler::push_CPU_state() {
6400  push_IU_state();
6401  push_FPU_state();
6402}
6403
6404void MacroAssembler::push_FPU_state() {
6405  subptr(rsp, FPUStateSizeInWords * wordSize);
6406#ifndef _LP64
6407  fnsave(Address(rsp, 0));
6408  fwait();
6409#else
6410  fxsave(Address(rsp, 0));
6411#endif // LP64
6412}
6413
6414void MacroAssembler::push_IU_state() {
6415  // Push flags first because pusha kills them
6416  pushf();
6417  // Make sure rsp stays 16-byte aligned
6418  LP64_ONLY(subq(rsp, 8));
6419  pusha();
6420}
6421
6422void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
6423  // determine java_thread register
6424  if (!java_thread->is_valid()) {
6425    java_thread = rdi;
6426    get_thread(java_thread);
6427  }
6428  // we must set sp to zero to clear frame
6429  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
6430  if (clear_fp) {
6431    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
6432  }
6433
6434  if (clear_pc)
6435    movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
6436
6437}
6438
6439void MacroAssembler::restore_rax(Register tmp) {
6440  if (tmp == noreg) pop(rax);
6441  else if (tmp != rax) mov(rax, tmp);
6442}
6443
6444void MacroAssembler::round_to(Register reg, int modulus) {
6445  addptr(reg, modulus - 1);
6446  andptr(reg, -modulus);
6447}
6448
6449void MacroAssembler::save_rax(Register tmp) {
6450  if (tmp == noreg) push(rax);
6451  else if (tmp != rax) mov(tmp, rax);
6452}
6453
6454// Write serialization page so VM thread can do a pseudo remote membar.
6455// We use the current thread pointer to calculate a thread specific
6456// offset to write to within the page. This minimizes bus traffic
6457// due to cache line collision.
6458void MacroAssembler::serialize_memory(Register thread, Register tmp) {
6459  movl(tmp, thread);
6460  shrl(tmp, os::get_serialize_page_shift_count());
6461  andl(tmp, (os::vm_page_size() - sizeof(int)));
6462
6463  Address index(noreg, tmp, Address::times_1);
6464  ExternalAddress page(os::get_memory_serialize_page());
6465
6466  movptr(ArrayAddress(page, index), tmp);
6467}
6468
6469// Calls to C land
6470//
6471// When entering C land, the rbp, & rsp of the last Java frame have to be recorded
6472// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
6473// has to be reset to 0. This is required to allow proper stack traversal.
6474void MacroAssembler::set_last_Java_frame(Register java_thread,
6475                                         Register last_java_sp,
6476                                         Register last_java_fp,
6477                                         address  last_java_pc) {
6478  // determine java_thread register
6479  if (!java_thread->is_valid()) {
6480    java_thread = rdi;
6481    get_thread(java_thread);
6482  }
6483  // determine last_java_sp register
6484  if (!last_java_sp->is_valid()) {
6485    last_java_sp = rsp;
6486  }
6487
6488  // last_java_fp is optional
6489
6490  if (last_java_fp->is_valid()) {
6491    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
6492  }
6493
6494  // last_java_pc is optional
6495
6496  if (last_java_pc != NULL) {
6497    lea(Address(java_thread,
6498                 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
6499        InternalAddress(last_java_pc));
6500
6501  }
6502  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
6503}
6504
6505void MacroAssembler::shlptr(Register dst, int imm8) {
6506  LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
6507}
6508
6509void MacroAssembler::shrptr(Register dst, int imm8) {
6510  LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
6511}
6512
6513void MacroAssembler::sign_extend_byte(Register reg) {
6514  if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
6515    movsbl(reg, reg); // movsxb
6516  } else {
6517    shll(reg, 24);
6518    sarl(reg, 24);
6519  }
6520}
6521
6522void MacroAssembler::sign_extend_short(Register reg) {
6523  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6524    movswl(reg, reg); // movsxw
6525  } else {
6526    shll(reg, 16);
6527    sarl(reg, 16);
6528  }
6529}
6530
6531//////////////////////////////////////////////////////////////////////////////////
6532#ifndef SERIALGC
6533
6534void MacroAssembler::g1_write_barrier_pre(Register obj,
6535#ifndef _LP64
6536                                          Register thread,
6537#endif
6538                                          Register tmp,
6539                                          Register tmp2,
6540                                          bool tosca_live) {
6541  LP64_ONLY(Register thread = r15_thread;)
6542  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6543                                       PtrQueue::byte_offset_of_active()));
6544
6545  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6546                                       PtrQueue::byte_offset_of_index()));
6547  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6548                                       PtrQueue::byte_offset_of_buf()));
6549
6550
6551  Label done;
6552  Label runtime;
6553
6554  // if (!marking_in_progress) goto done;
6555  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
6556    cmpl(in_progress, 0);
6557  } else {
6558    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
6559    cmpb(in_progress, 0);
6560  }
6561  jcc(Assembler::equal, done);
6562
6563  // if (x.f == NULL) goto done;
6564  cmpptr(Address(obj, 0), NULL_WORD);
6565  jcc(Assembler::equal, done);
6566
6567  // Can we store original value in the thread's buffer?
6568
6569  LP64_ONLY(movslq(tmp, index);)
6570  movptr(tmp2, Address(obj, 0));
6571#ifdef _LP64
6572  cmpq(tmp, 0);
6573#else
6574  cmpl(index, 0);
6575#endif
6576  jcc(Assembler::equal, runtime);
6577#ifdef _LP64
6578  subq(tmp, wordSize);
6579  movl(index, tmp);
6580  addq(tmp, buffer);
6581#else
6582  subl(index, wordSize);
6583  movl(tmp, buffer);
6584  addl(tmp, index);
6585#endif
6586  movptr(Address(tmp, 0), tmp2);
6587  jmp(done);
6588  bind(runtime);
6589  // save the live input values
6590  if(tosca_live) push(rax);
6591  push(obj);
6592#ifdef _LP64
6593  movq(c_rarg0, Address(obj, 0));
6594  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
6595#else
6596  push(thread);
6597  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
6598  pop(thread);
6599#endif
6600  pop(obj);
6601  if(tosca_live) pop(rax);
6602  bind(done);
6603
6604}
6605
6606void MacroAssembler::g1_write_barrier_post(Register store_addr,
6607                                           Register new_val,
6608#ifndef _LP64
6609                                           Register thread,
6610#endif
6611                                           Register tmp,
6612                                           Register tmp2) {
6613
6614  LP64_ONLY(Register thread = r15_thread;)
6615  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6616                                       PtrQueue::byte_offset_of_index()));
6617  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6618                                       PtrQueue::byte_offset_of_buf()));
6619  BarrierSet* bs = Universe::heap()->barrier_set();
6620  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
6621  Label done;
6622  Label runtime;
6623
6624  // Does store cross heap regions?
6625
6626  movptr(tmp, store_addr);
6627  xorptr(tmp, new_val);
6628  shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
6629  jcc(Assembler::equal, done);
6630
6631  // crosses regions, storing NULL?
6632
6633  cmpptr(new_val, (int32_t) NULL_WORD);
6634  jcc(Assembler::equal, done);
6635
6636  // storing region crossing non-NULL, is card already dirty?
6637
6638  ExternalAddress cardtable((address) ct->byte_map_base);
6639  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
6640#ifdef _LP64
6641  const Register card_addr = tmp;
6642
6643  movq(card_addr, store_addr);
6644  shrq(card_addr, CardTableModRefBS::card_shift);
6645
6646  lea(tmp2, cardtable);
6647
6648  // get the address of the card
6649  addq(card_addr, tmp2);
6650#else
6651  const Register card_index = tmp;
6652
6653  movl(card_index, store_addr);
6654  shrl(card_index, CardTableModRefBS::card_shift);
6655
6656  Address index(noreg, card_index, Address::times_1);
6657  const Register card_addr = tmp;
6658  lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
6659#endif
6660  cmpb(Address(card_addr, 0), 0);
6661  jcc(Assembler::equal, done);
6662
6663  // storing a region crossing, non-NULL oop, card is clean.
6664  // dirty card and log.
6665
6666  movb(Address(card_addr, 0), 0);
6667
6668  cmpl(queue_index, 0);
6669  jcc(Assembler::equal, runtime);
6670  subl(queue_index, wordSize);
6671  movptr(tmp2, buffer);
6672#ifdef _LP64
6673  movslq(rscratch1, queue_index);
6674  addq(tmp2, rscratch1);
6675  movq(Address(tmp2, 0), card_addr);
6676#else
6677  addl(tmp2, queue_index);
6678  movl(Address(tmp2, 0), card_index);
6679#endif
6680  jmp(done);
6681
6682  bind(runtime);
6683  // save the live input values
6684  push(store_addr);
6685  push(new_val);
6686#ifdef _LP64
6687  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
6688#else
6689  push(thread);
6690  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
6691  pop(thread);
6692#endif
6693  pop(new_val);
6694  pop(store_addr);
6695
6696  bind(done);
6697
6698}
6699
6700#endif // SERIALGC
6701//////////////////////////////////////////////////////////////////////////////////
6702
6703
6704void MacroAssembler::store_check(Register obj) {
6705  // Does a store check for the oop in register obj. The content of
6706  // register obj is destroyed afterwards.
6707  store_check_part_1(obj);
6708  store_check_part_2(obj);
6709}
6710
6711void MacroAssembler::store_check(Register obj, Address dst) {
6712  store_check(obj);
6713}
6714
6715
6716// split the store check operation so that other instructions can be scheduled inbetween
6717void MacroAssembler::store_check_part_1(Register obj) {
6718  BarrierSet* bs = Universe::heap()->barrier_set();
6719  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
6720  shrptr(obj, CardTableModRefBS::card_shift);
6721}
6722
6723void MacroAssembler::store_check_part_2(Register obj) {
6724  BarrierSet* bs = Universe::heap()->barrier_set();
6725  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
6726  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
6727  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
6728
6729  // The calculation for byte_map_base is as follows:
6730  // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
6731  // So this essentially converts an address to a displacement and
6732  // it will never need to be relocated. On 64bit however the value may be too
6733  // large for a 32bit displacement
6734
6735  intptr_t disp = (intptr_t) ct->byte_map_base;
6736  if (is_simm32(disp)) {
6737    Address cardtable(noreg, obj, Address::times_1, disp);
6738    movb(cardtable, 0);
6739  } else {
6740    // By doing it as an ExternalAddress disp could be converted to a rip-relative
6741    // displacement and done in a single instruction given favorable mapping and
6742    // a smarter version of as_Address. Worst case it is two instructions which
6743    // is no worse off then loading disp into a register and doing as a simple
6744    // Address() as above.
6745    // We can't do as ExternalAddress as the only style since if disp == 0 we'll
6746    // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
6747    // in some cases we'll get a single instruction version.
6748
6749    ExternalAddress cardtable((address)disp);
6750    Address index(noreg, obj, Address::times_1);
6751    movb(as_Address(ArrayAddress(cardtable, index)), 0);
6752  }
6753}
6754
6755void MacroAssembler::subptr(Register dst, int32_t imm32) {
6756  LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
6757}
6758
6759void MacroAssembler::subptr(Register dst, Register src) {
6760  LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
6761}
6762
6763void MacroAssembler::test32(Register src1, AddressLiteral src2) {
6764  // src2 must be rval
6765
6766  if (reachable(src2)) {
6767    testl(src1, as_Address(src2));
6768  } else {
6769    lea(rscratch1, src2);
6770    testl(src1, Address(rscratch1, 0));
6771  }
6772}
6773
6774// C++ bool manipulation
6775void MacroAssembler::testbool(Register dst) {
6776  if(sizeof(bool) == 1)
6777    testb(dst, 0xff);
6778  else if(sizeof(bool) == 2) {
6779    // testw implementation needed for two byte bools
6780    ShouldNotReachHere();
6781  } else if(sizeof(bool) == 4)
6782    testl(dst, dst);
6783  else
6784    // unsupported
6785    ShouldNotReachHere();
6786}
6787
6788void MacroAssembler::testptr(Register dst, Register src) {
6789  LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
6790}
6791
6792// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
6793void MacroAssembler::tlab_allocate(Register obj,
6794                                   Register var_size_in_bytes,
6795                                   int con_size_in_bytes,
6796                                   Register t1,
6797                                   Register t2,
6798                                   Label& slow_case) {
6799  assert_different_registers(obj, t1, t2);
6800  assert_different_registers(obj, var_size_in_bytes, t1);
6801  Register end = t2;
6802  Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
6803
6804  verify_tlab();
6805
6806  NOT_LP64(get_thread(thread));
6807
6808  movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
6809  if (var_size_in_bytes == noreg) {
6810    lea(end, Address(obj, con_size_in_bytes));
6811  } else {
6812    lea(end, Address(obj, var_size_in_bytes, Address::times_1));
6813  }
6814  cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
6815  jcc(Assembler::above, slow_case);
6816
6817  // update the tlab top pointer
6818  movptr(Address(thread, JavaThread::tlab_top_offset()), end);
6819
6820  // recover var_size_in_bytes if necessary
6821  if (var_size_in_bytes == end) {
6822    subptr(var_size_in_bytes, obj);
6823  }
6824  verify_tlab();
6825}
6826
6827// Preserves rbx, and rdx.
6828void MacroAssembler::tlab_refill(Label& retry,
6829                                 Label& try_eden,
6830                                 Label& slow_case) {
6831  Register top = rax;
6832  Register t1  = rcx;
6833  Register t2  = rsi;
6834  Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
6835  assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
6836  Label do_refill, discard_tlab;
6837
6838  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
6839    // No allocation in the shared eden.
6840    jmp(slow_case);
6841  }
6842
6843  NOT_LP64(get_thread(thread_reg));
6844
6845  movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
6846  movptr(t1,  Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
6847
6848  // calculate amount of free space
6849  subptr(t1, top);
6850  shrptr(t1, LogHeapWordSize);
6851
6852  // Retain tlab and allocate object in shared space if
6853  // the amount free in the tlab is too large to discard.
6854  cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
6855  jcc(Assembler::lessEqual, discard_tlab);
6856
6857  // Retain
6858  // %%% yuck as movptr...
6859  movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
6860  addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
6861  if (TLABStats) {
6862    // increment number of slow_allocations
6863    addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
6864  }
6865  jmp(try_eden);
6866
6867  bind(discard_tlab);
6868  if (TLABStats) {
6869    // increment number of refills
6870    addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
6871    // accumulate wastage -- t1 is amount free in tlab
6872    addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
6873  }
6874
6875  // if tlab is currently allocated (top or end != null) then
6876  // fill [top, end + alignment_reserve) with array object
6877  testptr (top, top);
6878  jcc(Assembler::zero, do_refill);
6879
6880  // set up the mark word
6881  movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
6882  // set the length to the remaining space
6883  subptr(t1, typeArrayOopDesc::header_size(T_INT));
6884  addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
6885  shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
6886  movptr(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
6887  // set klass to intArrayKlass
6888  // dubious reloc why not an oop reloc?
6889  movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr()));
6890  // store klass last.  concurrent gcs assumes klass length is valid if
6891  // klass field is not null.
6892  store_klass(top, t1);
6893
6894  // refill the tlab with an eden allocation
6895  bind(do_refill);
6896  movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
6897  shlptr(t1, LogHeapWordSize);
6898  // add object_size ??
6899  eden_allocate(top, t1, 0, t2, slow_case);
6900
6901  // Check that t1 was preserved in eden_allocate.
6902#ifdef ASSERT
6903  if (UseTLAB) {
6904    Label ok;
6905    Register tsize = rsi;
6906    assert_different_registers(tsize, thread_reg, t1);
6907    push(tsize);
6908    movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
6909    shlptr(tsize, LogHeapWordSize);
6910    cmpptr(t1, tsize);
6911    jcc(Assembler::equal, ok);
6912    stop("assert(t1 != tlab size)");
6913    should_not_reach_here();
6914
6915    bind(ok);
6916    pop(tsize);
6917  }
6918#endif
6919  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
6920  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
6921  addptr(top, t1);
6922  subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
6923  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
6924  verify_tlab();
6925  jmp(retry);
6926}
6927
6928static const double     pi_4 =  0.7853981633974483;
6929
6930void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
6931  // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
6932  // was attempted in this code; unfortunately it appears that the
6933  // switch to 80-bit precision and back causes this to be
6934  // unprofitable compared with simply performing a runtime call if
6935  // the argument is out of the (-pi/4, pi/4) range.
6936
6937  Register tmp = noreg;
6938  if (!VM_Version::supports_cmov()) {
6939    // fcmp needs a temporary so preserve rbx,
6940    tmp = rbx;
6941    push(tmp);
6942  }
6943
6944  Label slow_case, done;
6945
6946  ExternalAddress pi4_adr = (address)&pi_4;
6947  if (reachable(pi4_adr)) {
6948    // x ?<= pi/4
6949    fld_d(pi4_adr);
6950    fld_s(1);                // Stack:  X  PI/4  X
6951    fabs();                  // Stack: |X| PI/4  X
6952    fcmp(tmp);
6953    jcc(Assembler::above, slow_case);
6954
6955    // fastest case: -pi/4 <= x <= pi/4
6956    switch(trig) {
6957    case 's':
6958      fsin();
6959      break;
6960    case 'c':
6961      fcos();
6962      break;
6963    case 't':
6964      ftan();
6965      break;
6966    default:
6967      assert(false, "bad intrinsic");
6968      break;
6969    }
6970    jmp(done);
6971  }
6972
6973  // slow case: runtime call
6974  bind(slow_case);
6975  // Preserve registers across runtime call
6976  pusha();
6977  int incoming_argument_and_return_value_offset = -1;
6978  if (num_fpu_regs_in_use > 1) {
6979    // Must preserve all other FPU regs (could alternatively convert
6980    // SharedRuntime::dsin and dcos into assembly routines known not to trash
6981    // FPU state, but can not trust C compiler)
6982    NEEDS_CLEANUP;
6983    // NOTE that in this case we also push the incoming argument to
6984    // the stack and restore it later; we also use this stack slot to
6985    // hold the return value from dsin or dcos.
6986    for (int i = 0; i < num_fpu_regs_in_use; i++) {
6987      subptr(rsp, sizeof(jdouble));
6988      fstp_d(Address(rsp, 0));
6989    }
6990    incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
6991    fld_d(Address(rsp, incoming_argument_and_return_value_offset));
6992  }
6993  subptr(rsp, sizeof(jdouble));
6994  fstp_d(Address(rsp, 0));
6995#ifdef _LP64
6996  movdbl(xmm0, Address(rsp, 0));
6997#endif // _LP64
6998
6999  // NOTE: we must not use call_VM_leaf here because that requires a
7000  // complete interpreter frame in debug mode -- same bug as 4387334
7001  // MacroAssembler::call_VM_leaf_base is perfectly safe and will
7002  // do proper 64bit abi
7003
7004  NEEDS_CLEANUP;
7005  // Need to add stack banging before this runtime call if it needs to
7006  // be taken; however, there is no generic stack banging routine at
7007  // the MacroAssembler level
7008  switch(trig) {
7009  case 's':
7010    {
7011      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);
7012    }
7013    break;
7014  case 'c':
7015    {
7016      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);
7017    }
7018    break;
7019  case 't':
7020    {
7021      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);
7022    }
7023    break;
7024  default:
7025    assert(false, "bad intrinsic");
7026    break;
7027  }
7028#ifdef _LP64
7029    movsd(Address(rsp, 0), xmm0);
7030    fld_d(Address(rsp, 0));
7031#endif // _LP64
7032  addptr(rsp, sizeof(jdouble));
7033  if (num_fpu_regs_in_use > 1) {
7034    // Must save return value to stack and then restore entire FPU stack
7035    fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
7036    for (int i = 0; i < num_fpu_regs_in_use; i++) {
7037      fld_d(Address(rsp, 0));
7038      addptr(rsp, sizeof(jdouble));
7039    }
7040  }
7041  popa();
7042
7043  // Come here with result in F-TOS
7044  bind(done);
7045
7046  if (tmp != noreg) {
7047    pop(tmp);
7048  }
7049}
7050
7051
7052void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
7053  ucomisd(dst, as_Address(src));
7054}
7055
7056void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
7057  ucomiss(dst, as_Address(src));
7058}
7059
7060void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
7061  if (reachable(src)) {
7062    xorpd(dst, as_Address(src));
7063  } else {
7064    lea(rscratch1, src);
7065    xorpd(dst, Address(rscratch1, 0));
7066  }
7067}
7068
7069void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
7070  if (reachable(src)) {
7071    xorps(dst, as_Address(src));
7072  } else {
7073    lea(rscratch1, src);
7074    xorps(dst, Address(rscratch1, 0));
7075  }
7076}
7077
7078void MacroAssembler::verify_oop(Register reg, const char* s) {
7079  if (!VerifyOops) return;
7080
7081  // Pass register number to verify_oop_subroutine
7082  char* b = new char[strlen(s) + 50];
7083  sprintf(b, "verify_oop: %s: %s", reg->name(), s);
7084  push(rax);                          // save rax,
7085  push(reg);                          // pass register argument
7086  ExternalAddress buffer((address) b);
7087  // avoid using pushptr, as it modifies scratch registers
7088  // and our contract is not to modify anything
7089  movptr(rax, buffer.addr());
7090  push(rax);
7091  // call indirectly to solve generation ordering problem
7092  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7093  call(rax);
7094}
7095
7096
7097void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
7098  if (!VerifyOops) return;
7099
7100  // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
7101  // Pass register number to verify_oop_subroutine
7102  char* b = new char[strlen(s) + 50];
7103  sprintf(b, "verify_oop_addr: %s", s);
7104
7105  push(rax);                          // save rax,
7106  // addr may contain rsp so we will have to adjust it based on the push
7107  // we just did
7108  // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
7109  // stores rax into addr which is backwards of what was intended.
7110  if (addr.uses(rsp)) {
7111    lea(rax, addr);
7112    pushptr(Address(rax, BytesPerWord));
7113  } else {
7114    pushptr(addr);
7115  }
7116
7117  ExternalAddress buffer((address) b);
7118  // pass msg argument
7119  // avoid using pushptr, as it modifies scratch registers
7120  // and our contract is not to modify anything
7121  movptr(rax, buffer.addr());
7122  push(rax);
7123
7124  // call indirectly to solve generation ordering problem
7125  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7126  call(rax);
7127  // Caller pops the arguments and restores rax, from the stack
7128}
7129
7130void MacroAssembler::verify_tlab() {
7131#ifdef ASSERT
7132  if (UseTLAB && VerifyOops) {
7133    Label next, ok;
7134    Register t1 = rsi;
7135    Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
7136
7137    push(t1);
7138    NOT_LP64(push(thread_reg));
7139    NOT_LP64(get_thread(thread_reg));
7140
7141    movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7142    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
7143    jcc(Assembler::aboveEqual, next);
7144    stop("assert(top >= start)");
7145    should_not_reach_here();
7146
7147    bind(next);
7148    movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7149    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7150    jcc(Assembler::aboveEqual, ok);
7151    stop("assert(top <= end)");
7152    should_not_reach_here();
7153
7154    bind(ok);
7155    NOT_LP64(pop(thread_reg));
7156    pop(t1);
7157  }
7158#endif
7159}
7160
7161class ControlWord {
7162 public:
7163  int32_t _value;
7164
7165  int  rounding_control() const        { return  (_value >> 10) & 3      ; }
7166  int  precision_control() const       { return  (_value >>  8) & 3      ; }
7167  bool precision() const               { return ((_value >>  5) & 1) != 0; }
7168  bool underflow() const               { return ((_value >>  4) & 1) != 0; }
7169  bool overflow() const                { return ((_value >>  3) & 1) != 0; }
7170  bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
7171  bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
7172  bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
7173
7174  void print() const {
7175    // rounding control
7176    const char* rc;
7177    switch (rounding_control()) {
7178      case 0: rc = "round near"; break;
7179      case 1: rc = "round down"; break;
7180      case 2: rc = "round up  "; break;
7181      case 3: rc = "chop      "; break;
7182    };
7183    // precision control
7184    const char* pc;
7185    switch (precision_control()) {
7186      case 0: pc = "24 bits "; break;
7187      case 1: pc = "reserved"; break;
7188      case 2: pc = "53 bits "; break;
7189      case 3: pc = "64 bits "; break;
7190    };
7191    // flags
7192    char f[9];
7193    f[0] = ' ';
7194    f[1] = ' ';
7195    f[2] = (precision   ()) ? 'P' : 'p';
7196    f[3] = (underflow   ()) ? 'U' : 'u';
7197    f[4] = (overflow    ()) ? 'O' : 'o';
7198    f[5] = (zero_divide ()) ? 'Z' : 'z';
7199    f[6] = (denormalized()) ? 'D' : 'd';
7200    f[7] = (invalid     ()) ? 'I' : 'i';
7201    f[8] = '\x0';
7202    // output
7203    printf("%04x  masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
7204  }
7205
7206};
7207
7208class StatusWord {
7209 public:
7210  int32_t _value;
7211
7212  bool busy() const                    { return ((_value >> 15) & 1) != 0; }
7213  bool C3() const                      { return ((_value >> 14) & 1) != 0; }
7214  bool C2() const                      { return ((_value >> 10) & 1) != 0; }
7215  bool C1() const                      { return ((_value >>  9) & 1) != 0; }
7216  bool C0() const                      { return ((_value >>  8) & 1) != 0; }
7217  int  top() const                     { return  (_value >> 11) & 7      ; }
7218  bool error_status() const            { return ((_value >>  7) & 1) != 0; }
7219  bool stack_fault() const             { return ((_value >>  6) & 1) != 0; }
7220  bool precision() const               { return ((_value >>  5) & 1) != 0; }
7221  bool underflow() const               { return ((_value >>  4) & 1) != 0; }
7222  bool overflow() const                { return ((_value >>  3) & 1) != 0; }
7223  bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
7224  bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
7225  bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
7226
7227  void print() const {
7228    // condition codes
7229    char c[5];
7230    c[0] = (C3()) ? '3' : '-';
7231    c[1] = (C2()) ? '2' : '-';
7232    c[2] = (C1()) ? '1' : '-';
7233    c[3] = (C0()) ? '0' : '-';
7234    c[4] = '\x0';
7235    // flags
7236    char f[9];
7237    f[0] = (error_status()) ? 'E' : '-';
7238    f[1] = (stack_fault ()) ? 'S' : '-';
7239    f[2] = (precision   ()) ? 'P' : '-';
7240    f[3] = (underflow   ()) ? 'U' : '-';
7241    f[4] = (overflow    ()) ? 'O' : '-';
7242    f[5] = (zero_divide ()) ? 'Z' : '-';
7243    f[6] = (denormalized()) ? 'D' : '-';
7244    f[7] = (invalid     ()) ? 'I' : '-';
7245    f[8] = '\x0';
7246    // output
7247    printf("%04x  flags = %s, cc =  %s, top = %d", _value & 0xFFFF, f, c, top());
7248  }
7249
7250};
7251
7252class TagWord {
7253 public:
7254  int32_t _value;
7255
7256  int tag_at(int i) const              { return (_value >> (i*2)) & 3; }
7257
7258  void print() const {
7259    printf("%04x", _value & 0xFFFF);
7260  }
7261
7262};
7263
7264class FPU_Register {
7265 public:
7266  int32_t _m0;
7267  int32_t _m1;
7268  int16_t _ex;
7269
7270  bool is_indefinite() const           {
7271    return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
7272  }
7273
7274  void print() const {
7275    char  sign = (_ex < 0) ? '-' : '+';
7276    const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : "   ";
7277    printf("%c%04hx.%08x%08x  %s", sign, _ex, _m1, _m0, kind);
7278  };
7279
7280};
7281
7282class FPU_State {
7283 public:
7284  enum {
7285    register_size       = 10,
7286    number_of_registers =  8,
7287    register_mask       =  7
7288  };
7289
7290  ControlWord  _control_word;
7291  StatusWord   _status_word;
7292  TagWord      _tag_word;
7293  int32_t      _error_offset;
7294  int32_t      _error_selector;
7295  int32_t      _data_offset;
7296  int32_t      _data_selector;
7297  int8_t       _register[register_size * number_of_registers];
7298
7299  int tag_for_st(int i) const          { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
7300  FPU_Register* st(int i) const        { return (FPU_Register*)&_register[register_size * i]; }
7301
7302  const char* tag_as_string(int tag) const {
7303    switch (tag) {
7304      case 0: return "valid";
7305      case 1: return "zero";
7306      case 2: return "special";
7307      case 3: return "empty";
7308    }
7309    ShouldNotReachHere()
7310    return NULL;
7311  }
7312
7313  void print() const {
7314    // print computation registers
7315    { int t = _status_word.top();
7316      for (int i = 0; i < number_of_registers; i++) {
7317        int j = (i - t) & register_mask;
7318        printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
7319        st(j)->print();
7320        printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
7321      }
7322    }
7323    printf("\n");
7324    // print control registers
7325    printf("ctrl = "); _control_word.print(); printf("\n");
7326    printf("stat = "); _status_word .print(); printf("\n");
7327    printf("tags = "); _tag_word    .print(); printf("\n");
7328  }
7329
7330};
7331
7332class Flag_Register {
7333 public:
7334  int32_t _value;
7335
7336  bool overflow() const                { return ((_value >> 11) & 1) != 0; }
7337  bool direction() const               { return ((_value >> 10) & 1) != 0; }
7338  bool sign() const                    { return ((_value >>  7) & 1) != 0; }
7339  bool zero() const                    { return ((_value >>  6) & 1) != 0; }
7340  bool auxiliary_carry() const         { return ((_value >>  4) & 1) != 0; }
7341  bool parity() const                  { return ((_value >>  2) & 1) != 0; }
7342  bool carry() const                   { return ((_value >>  0) & 1) != 0; }
7343
7344  void print() const {
7345    // flags
7346    char f[8];
7347    f[0] = (overflow       ()) ? 'O' : '-';
7348    f[1] = (direction      ()) ? 'D' : '-';
7349    f[2] = (sign           ()) ? 'S' : '-';
7350    f[3] = (zero           ()) ? 'Z' : '-';
7351    f[4] = (auxiliary_carry()) ? 'A' : '-';
7352    f[5] = (parity         ()) ? 'P' : '-';
7353    f[6] = (carry          ()) ? 'C' : '-';
7354    f[7] = '\x0';
7355    // output
7356    printf("%08x  flags = %s", _value, f);
7357  }
7358
7359};
7360
7361class IU_Register {
7362 public:
7363  int32_t _value;
7364
7365  void print() const {
7366    printf("%08x  %11d", _value, _value);
7367  }
7368
7369};
7370
7371class IU_State {
7372 public:
7373  Flag_Register _eflags;
7374  IU_Register   _rdi;
7375  IU_Register   _rsi;
7376  IU_Register   _rbp;
7377  IU_Register   _rsp;
7378  IU_Register   _rbx;
7379  IU_Register   _rdx;
7380  IU_Register   _rcx;
7381  IU_Register   _rax;
7382
7383  void print() const {
7384    // computation registers
7385    printf("rax,  = "); _rax.print(); printf("\n");
7386    printf("rbx,  = "); _rbx.print(); printf("\n");
7387    printf("rcx  = "); _rcx.print(); printf("\n");
7388    printf("rdx  = "); _rdx.print(); printf("\n");
7389    printf("rdi  = "); _rdi.print(); printf("\n");
7390    printf("rsi  = "); _rsi.print(); printf("\n");
7391    printf("rbp,  = "); _rbp.print(); printf("\n");
7392    printf("rsp  = "); _rsp.print(); printf("\n");
7393    printf("\n");
7394    // control registers
7395    printf("flgs = "); _eflags.print(); printf("\n");
7396  }
7397};
7398
7399
7400class CPU_State {
7401 public:
7402  FPU_State _fpu_state;
7403  IU_State  _iu_state;
7404
7405  void print() const {
7406    printf("--------------------------------------------------\n");
7407    _iu_state .print();
7408    printf("\n");
7409    _fpu_state.print();
7410    printf("--------------------------------------------------\n");
7411  }
7412
7413};
7414
7415
7416static void _print_CPU_state(CPU_State* state) {
7417  state->print();
7418};
7419
7420
7421void MacroAssembler::print_CPU_state() {
7422  push_CPU_state();
7423  push(rsp);                // pass CPU state
7424  call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
7425  addptr(rsp, wordSize);       // discard argument
7426  pop_CPU_state();
7427}
7428
7429
7430static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
7431  static int counter = 0;
7432  FPU_State* fs = &state->_fpu_state;
7433  counter++;
7434  // For leaf calls, only verify that the top few elements remain empty.
7435  // We only need 1 empty at the top for C2 code.
7436  if( stack_depth < 0 ) {
7437    if( fs->tag_for_st(7) != 3 ) {
7438      printf("FPR7 not empty\n");
7439      state->print();
7440      assert(false, "error");
7441      return false;
7442    }
7443    return true;                // All other stack states do not matter
7444  }
7445
7446  assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
7447         "bad FPU control word");
7448
7449  // compute stack depth
7450  int i = 0;
7451  while (i < FPU_State::number_of_registers && fs->tag_for_st(i)  < 3) i++;
7452  int d = i;
7453  while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
7454  // verify findings
7455  if (i != FPU_State::number_of_registers) {
7456    // stack not contiguous
7457    printf("%s: stack not contiguous at ST%d\n", s, i);
7458    state->print();
7459    assert(false, "error");
7460    return false;
7461  }
7462  // check if computed stack depth corresponds to expected stack depth
7463  if (stack_depth < 0) {
7464    // expected stack depth is -stack_depth or less
7465    if (d > -stack_depth) {
7466      // too many elements on the stack
7467      printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
7468      state->print();
7469      assert(false, "error");
7470      return false;
7471    }
7472  } else {
7473    // expected stack depth is stack_depth
7474    if (d != stack_depth) {
7475      // wrong stack depth
7476      printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
7477      state->print();
7478      assert(false, "error");
7479      return false;
7480    }
7481  }
7482  // everything is cool
7483  return true;
7484}
7485
7486
7487void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
7488  if (!VerifyFPU) return;
7489  push_CPU_state();
7490  push(rsp);                // pass CPU state
7491  ExternalAddress msg((address) s);
7492  // pass message string s
7493  pushptr(msg.addr());
7494  push(stack_depth);        // pass stack depth
7495  call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
7496  addptr(rsp, 3 * wordSize);   // discard arguments
7497  // check for error
7498  { Label L;
7499    testl(rax, rax);
7500    jcc(Assembler::notZero, L);
7501    int3();                  // break if error condition
7502    bind(L);
7503  }
7504  pop_CPU_state();
7505}
7506
7507void MacroAssembler::load_klass(Register dst, Register src) {
7508#ifdef _LP64
7509  if (UseCompressedOops) {
7510    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
7511    decode_heap_oop_not_null(dst);
7512  } else
7513#endif
7514    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
7515}
7516
7517void MacroAssembler::load_prototype_header(Register dst, Register src) {
7518#ifdef _LP64
7519  if (UseCompressedOops) {
7520    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
7521    movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
7522  } else
7523#endif
7524    {
7525      movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
7526      movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
7527    }
7528}
7529
7530void MacroAssembler::store_klass(Register dst, Register src) {
7531#ifdef _LP64
7532  if (UseCompressedOops) {
7533    encode_heap_oop_not_null(src);
7534    movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
7535  } else
7536#endif
7537    movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
7538}
7539
7540#ifdef _LP64
7541void MacroAssembler::store_klass_gap(Register dst, Register src) {
7542  if (UseCompressedOops) {
7543    // Store to klass gap in destination
7544    movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
7545  }
7546}
7547
7548void MacroAssembler::load_heap_oop(Register dst, Address src) {
7549  if (UseCompressedOops) {
7550    movl(dst, src);
7551    decode_heap_oop(dst);
7552  } else {
7553    movq(dst, src);
7554  }
7555}
7556
7557void MacroAssembler::store_heap_oop(Address dst, Register src) {
7558  if (UseCompressedOops) {
7559    assert(!dst.uses(src), "not enough registers");
7560    encode_heap_oop(src);
7561    movl(dst, src);
7562  } else {
7563    movq(dst, src);
7564  }
7565}
7566
7567// Algorithm must match oop.inline.hpp encode_heap_oop.
7568void MacroAssembler::encode_heap_oop(Register r) {
7569  assert (UseCompressedOops, "should be compressed");
7570#ifdef ASSERT
7571  if (CheckCompressedOops) {
7572    Label ok;
7573    push(rscratch1); // cmpptr trashes rscratch1
7574    cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
7575    jcc(Assembler::equal, ok);
7576    stop("MacroAssembler::encode_heap_oop: heap base corrupted?");
7577    bind(ok);
7578    pop(rscratch1);
7579  }
7580#endif
7581  verify_oop(r, "broken oop in encode_heap_oop");
7582  testq(r, r);
7583  cmovq(Assembler::equal, r, r12_heapbase);
7584  subq(r, r12_heapbase);
7585  shrq(r, LogMinObjAlignmentInBytes);
7586}
7587
7588void MacroAssembler::encode_heap_oop_not_null(Register r) {
7589  assert (UseCompressedOops, "should be compressed");
7590#ifdef ASSERT
7591  if (CheckCompressedOops) {
7592    Label ok;
7593    testq(r, r);
7594    jcc(Assembler::notEqual, ok);
7595    stop("null oop passed to encode_heap_oop_not_null");
7596    bind(ok);
7597  }
7598#endif
7599  verify_oop(r, "broken oop in encode_heap_oop_not_null");
7600  subq(r, r12_heapbase);
7601  shrq(r, LogMinObjAlignmentInBytes);
7602}
7603
7604void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
7605  assert (UseCompressedOops, "should be compressed");
7606#ifdef ASSERT
7607  if (CheckCompressedOops) {
7608    Label ok;
7609    testq(src, src);
7610    jcc(Assembler::notEqual, ok);
7611    stop("null oop passed to encode_heap_oop_not_null2");
7612    bind(ok);
7613  }
7614#endif
7615  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
7616  if (dst != src) {
7617    movq(dst, src);
7618  }
7619  subq(dst, r12_heapbase);
7620  shrq(dst, LogMinObjAlignmentInBytes);
7621}
7622
7623void  MacroAssembler::decode_heap_oop(Register r) {
7624  assert (UseCompressedOops, "should be compressed");
7625#ifdef ASSERT
7626  if (CheckCompressedOops) {
7627    Label ok;
7628    push(rscratch1);
7629    cmpptr(r12_heapbase,
7630           ExternalAddress((address)Universe::heap_base_addr()));
7631    jcc(Assembler::equal, ok);
7632    stop("MacroAssembler::decode_heap_oop: heap base corrupted?");
7633    bind(ok);
7634    pop(rscratch1);
7635  }
7636#endif
7637
7638  Label done;
7639  shlq(r, LogMinObjAlignmentInBytes);
7640  jccb(Assembler::equal, done);
7641  addq(r, r12_heapbase);
7642#if 0
7643   // alternate decoding probably a wash.
7644   testq(r, r);
7645   jccb(Assembler::equal, done);
7646   leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
7647#endif
7648  bind(done);
7649  verify_oop(r, "broken oop in decode_heap_oop");
7650}
7651
7652void  MacroAssembler::decode_heap_oop_not_null(Register r) {
7653  assert (UseCompressedOops, "should only be used for compressed headers");
7654  // Cannot assert, unverified entry point counts instructions (see .ad file)
7655  // vtableStubs also counts instructions in pd_code_size_limit.
7656  // Also do not verify_oop as this is called by verify_oop.
7657  assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
7658  leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
7659}
7660
7661void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
7662  assert (UseCompressedOops, "should only be used for compressed headers");
7663  // Cannot assert, unverified entry point counts instructions (see .ad file)
7664  // vtableStubs also counts instructions in pd_code_size_limit.
7665  // Also do not verify_oop as this is called by verify_oop.
7666  assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
7667  leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
7668}
7669
7670void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
7671  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
7672  int oop_index = oop_recorder()->find_index(obj);
7673  RelocationHolder rspec = oop_Relocation::spec(oop_index);
7674  mov_literal32(dst, oop_index, rspec, narrow_oop_operand);
7675}
7676
7677void MacroAssembler::reinit_heapbase() {
7678  if (UseCompressedOops) {
7679    movptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
7680  }
7681}
7682#endif // _LP64
7683
7684Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
7685  switch (cond) {
7686    // Note some conditions are synonyms for others
7687    case Assembler::zero:         return Assembler::notZero;
7688    case Assembler::notZero:      return Assembler::zero;
7689    case Assembler::less:         return Assembler::greaterEqual;
7690    case Assembler::lessEqual:    return Assembler::greater;
7691    case Assembler::greater:      return Assembler::lessEqual;
7692    case Assembler::greaterEqual: return Assembler::less;
7693    case Assembler::below:        return Assembler::aboveEqual;
7694    case Assembler::belowEqual:   return Assembler::above;
7695    case Assembler::above:        return Assembler::belowEqual;
7696    case Assembler::aboveEqual:   return Assembler::below;
7697    case Assembler::overflow:     return Assembler::noOverflow;
7698    case Assembler::noOverflow:   return Assembler::overflow;
7699    case Assembler::negative:     return Assembler::positive;
7700    case Assembler::positive:     return Assembler::negative;
7701    case Assembler::parity:       return Assembler::noParity;
7702    case Assembler::noParity:     return Assembler::parity;
7703  }
7704  ShouldNotReachHere(); return Assembler::overflow;
7705}
7706
7707SkipIfEqual::SkipIfEqual(
7708    MacroAssembler* masm, const bool* flag_addr, bool value) {
7709  _masm = masm;
7710  _masm->cmp8(ExternalAddress((address)flag_addr), value);
7711  _masm->jcc(Assembler::equal, _label);
7712}
7713
7714SkipIfEqual::~SkipIfEqual() {
7715  _masm->bind(_label);
7716}
7717