assembler_x86.cpp revision 2614:95134e034042
1/*
2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "assembler_x86.inline.hpp"
27#include "gc_interface/collectedHeap.inline.hpp"
28#include "interpreter/interpreter.hpp"
29#include "memory/cardTableModRefBS.hpp"
30#include "memory/resourceArea.hpp"
31#include "prims/methodHandles.hpp"
32#include "runtime/biasedLocking.hpp"
33#include "runtime/interfaceSupport.hpp"
34#include "runtime/objectMonitor.hpp"
35#include "runtime/os.hpp"
36#include "runtime/sharedRuntime.hpp"
37#include "runtime/stubRoutines.hpp"
38#ifndef SERIALGC
39#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
40#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
41#include "gc_implementation/g1/heapRegion.hpp"
42#endif
43
44// Implementation of AddressLiteral
45
46AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
47  _is_lval = false;
48  _target = target;
49  switch (rtype) {
50  case relocInfo::oop_type:
51    // Oops are a special case. Normally they would be their own section
52    // but in cases like icBuffer they are literals in the code stream that
53    // we don't have a section for. We use none so that we get a literal address
54    // which is always patchable.
55    break;
56  case relocInfo::external_word_type:
57    _rspec = external_word_Relocation::spec(target);
58    break;
59  case relocInfo::internal_word_type:
60    _rspec = internal_word_Relocation::spec(target);
61    break;
62  case relocInfo::opt_virtual_call_type:
63    _rspec = opt_virtual_call_Relocation::spec();
64    break;
65  case relocInfo::static_call_type:
66    _rspec = static_call_Relocation::spec();
67    break;
68  case relocInfo::runtime_call_type:
69    _rspec = runtime_call_Relocation::spec();
70    break;
71  case relocInfo::poll_type:
72  case relocInfo::poll_return_type:
73    _rspec = Relocation::spec_simple(rtype);
74    break;
75  case relocInfo::none:
76    break;
77  default:
78    ShouldNotReachHere();
79    break;
80  }
81}
82
83// Implementation of Address
84
85#ifdef _LP64
86
87Address Address::make_array(ArrayAddress adr) {
88  // Not implementable on 64bit machines
89  // Should have been handled higher up the call chain.
90  ShouldNotReachHere();
91  return Address();
92}
93
94// exceedingly dangerous constructor
95Address::Address(int disp, address loc, relocInfo::relocType rtype) {
96  _base  = noreg;
97  _index = noreg;
98  _scale = no_scale;
99  _disp  = disp;
100  switch (rtype) {
101    case relocInfo::external_word_type:
102      _rspec = external_word_Relocation::spec(loc);
103      break;
104    case relocInfo::internal_word_type:
105      _rspec = internal_word_Relocation::spec(loc);
106      break;
107    case relocInfo::runtime_call_type:
108      // HMM
109      _rspec = runtime_call_Relocation::spec();
110      break;
111    case relocInfo::poll_type:
112    case relocInfo::poll_return_type:
113      _rspec = Relocation::spec_simple(rtype);
114      break;
115    case relocInfo::none:
116      break;
117    default:
118      ShouldNotReachHere();
119  }
120}
121#else // LP64
122
123Address Address::make_array(ArrayAddress adr) {
124  AddressLiteral base = adr.base();
125  Address index = adr.index();
126  assert(index._disp == 0, "must not have disp"); // maybe it can?
127  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
128  array._rspec = base._rspec;
129  return array;
130}
131
132// exceedingly dangerous constructor
133Address::Address(address loc, RelocationHolder spec) {
134  _base  = noreg;
135  _index = noreg;
136  _scale = no_scale;
137  _disp  = (intptr_t) loc;
138  _rspec = spec;
139}
140
141#endif // _LP64
142
143
144
145// Convert the raw encoding form into the form expected by the constructor for
146// Address.  An index of 4 (rsp) corresponds to having no index, so convert
147// that to noreg for the Address constructor.
148Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) {
149  RelocationHolder rspec;
150  if (disp_is_oop) {
151    rspec = Relocation::spec_simple(relocInfo::oop_type);
152  }
153  bool valid_index = index != rsp->encoding();
154  if (valid_index) {
155    Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
156    madr._rspec = rspec;
157    return madr;
158  } else {
159    Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
160    madr._rspec = rspec;
161    return madr;
162  }
163}
164
165// Implementation of Assembler
166
167int AbstractAssembler::code_fill_byte() {
168  return (u_char)'\xF4'; // hlt
169}
170
171// make this go away someday
172void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
173  if (rtype == relocInfo::none)
174        emit_long(data);
175  else  emit_data(data, Relocation::spec_simple(rtype), format);
176}
177
178void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
179  assert(imm_operand == 0, "default format must be immediate in this file");
180  assert(inst_mark() != NULL, "must be inside InstructionMark");
181  if (rspec.type() !=  relocInfo::none) {
182    #ifdef ASSERT
183      check_relocation(rspec, format);
184    #endif
185    // Do not use AbstractAssembler::relocate, which is not intended for
186    // embedded words.  Instead, relocate to the enclosing instruction.
187
188    // hack. call32 is too wide for mask so use disp32
189    if (format == call32_operand)
190      code_section()->relocate(inst_mark(), rspec, disp32_operand);
191    else
192      code_section()->relocate(inst_mark(), rspec, format);
193  }
194  emit_long(data);
195}
196
197static int encode(Register r) {
198  int enc = r->encoding();
199  if (enc >= 8) {
200    enc -= 8;
201  }
202  return enc;
203}
204
205static int encode(XMMRegister r) {
206  int enc = r->encoding();
207  if (enc >= 8) {
208    enc -= 8;
209  }
210  return enc;
211}
212
213void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
214  assert(dst->has_byte_register(), "must have byte register");
215  assert(isByte(op1) && isByte(op2), "wrong opcode");
216  assert(isByte(imm8), "not a byte");
217  assert((op1 & 0x01) == 0, "should be 8bit operation");
218  emit_byte(op1);
219  emit_byte(op2 | encode(dst));
220  emit_byte(imm8);
221}
222
223
224void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
225  assert(isByte(op1) && isByte(op2), "wrong opcode");
226  assert((op1 & 0x01) == 1, "should be 32bit operation");
227  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
228  if (is8bit(imm32)) {
229    emit_byte(op1 | 0x02); // set sign bit
230    emit_byte(op2 | encode(dst));
231    emit_byte(imm32 & 0xFF);
232  } else {
233    emit_byte(op1);
234    emit_byte(op2 | encode(dst));
235    emit_long(imm32);
236  }
237}
238
239// immediate-to-memory forms
240void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
241  assert((op1 & 0x01) == 1, "should be 32bit operation");
242  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
243  if (is8bit(imm32)) {
244    emit_byte(op1 | 0x02); // set sign bit
245    emit_operand(rm, adr, 1);
246    emit_byte(imm32 & 0xFF);
247  } else {
248    emit_byte(op1);
249    emit_operand(rm, adr, 4);
250    emit_long(imm32);
251  }
252}
253
254void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) {
255  LP64_ONLY(ShouldNotReachHere());
256  assert(isByte(op1) && isByte(op2), "wrong opcode");
257  assert((op1 & 0x01) == 1, "should be 32bit operation");
258  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
259  InstructionMark im(this);
260  emit_byte(op1);
261  emit_byte(op2 | encode(dst));
262  emit_data((intptr_t)obj, relocInfo::oop_type, 0);
263}
264
265
266void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
267  assert(isByte(op1) && isByte(op2), "wrong opcode");
268  emit_byte(op1);
269  emit_byte(op2 | encode(dst) << 3 | encode(src));
270}
271
272
273void Assembler::emit_operand(Register reg, Register base, Register index,
274                             Address::ScaleFactor scale, int disp,
275                             RelocationHolder const& rspec,
276                             int rip_relative_correction) {
277  relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
278
279  // Encode the registers as needed in the fields they are used in
280
281  int regenc = encode(reg) << 3;
282  int indexenc = index->is_valid() ? encode(index) << 3 : 0;
283  int baseenc = base->is_valid() ? encode(base) : 0;
284
285  if (base->is_valid()) {
286    if (index->is_valid()) {
287      assert(scale != Address::no_scale, "inconsistent address");
288      // [base + index*scale + disp]
289      if (disp == 0 && rtype == relocInfo::none  &&
290          base != rbp LP64_ONLY(&& base != r13)) {
291        // [base + index*scale]
292        // [00 reg 100][ss index base]
293        assert(index != rsp, "illegal addressing mode");
294        emit_byte(0x04 | regenc);
295        emit_byte(scale << 6 | indexenc | baseenc);
296      } else if (is8bit(disp) && rtype == relocInfo::none) {
297        // [base + index*scale + imm8]
298        // [01 reg 100][ss index base] imm8
299        assert(index != rsp, "illegal addressing mode");
300        emit_byte(0x44 | regenc);
301        emit_byte(scale << 6 | indexenc | baseenc);
302        emit_byte(disp & 0xFF);
303      } else {
304        // [base + index*scale + disp32]
305        // [10 reg 100][ss index base] disp32
306        assert(index != rsp, "illegal addressing mode");
307        emit_byte(0x84 | regenc);
308        emit_byte(scale << 6 | indexenc | baseenc);
309        emit_data(disp, rspec, disp32_operand);
310      }
311    } else if (base == rsp LP64_ONLY(|| base == r12)) {
312      // [rsp + disp]
313      if (disp == 0 && rtype == relocInfo::none) {
314        // [rsp]
315        // [00 reg 100][00 100 100]
316        emit_byte(0x04 | regenc);
317        emit_byte(0x24);
318      } else if (is8bit(disp) && rtype == relocInfo::none) {
319        // [rsp + imm8]
320        // [01 reg 100][00 100 100] disp8
321        emit_byte(0x44 | regenc);
322        emit_byte(0x24);
323        emit_byte(disp & 0xFF);
324      } else {
325        // [rsp + imm32]
326        // [10 reg 100][00 100 100] disp32
327        emit_byte(0x84 | regenc);
328        emit_byte(0x24);
329        emit_data(disp, rspec, disp32_operand);
330      }
331    } else {
332      // [base + disp]
333      assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
334      if (disp == 0 && rtype == relocInfo::none &&
335          base != rbp LP64_ONLY(&& base != r13)) {
336        // [base]
337        // [00 reg base]
338        emit_byte(0x00 | regenc | baseenc);
339      } else if (is8bit(disp) && rtype == relocInfo::none) {
340        // [base + disp8]
341        // [01 reg base] disp8
342        emit_byte(0x40 | regenc | baseenc);
343        emit_byte(disp & 0xFF);
344      } else {
345        // [base + disp32]
346        // [10 reg base] disp32
347        emit_byte(0x80 | regenc | baseenc);
348        emit_data(disp, rspec, disp32_operand);
349      }
350    }
351  } else {
352    if (index->is_valid()) {
353      assert(scale != Address::no_scale, "inconsistent address");
354      // [index*scale + disp]
355      // [00 reg 100][ss index 101] disp32
356      assert(index != rsp, "illegal addressing mode");
357      emit_byte(0x04 | regenc);
358      emit_byte(scale << 6 | indexenc | 0x05);
359      emit_data(disp, rspec, disp32_operand);
360    } else if (rtype != relocInfo::none ) {
361      // [disp] (64bit) RIP-RELATIVE (32bit) abs
362      // [00 000 101] disp32
363
364      emit_byte(0x05 | regenc);
365      // Note that the RIP-rel. correction applies to the generated
366      // disp field, but _not_ to the target address in the rspec.
367
368      // disp was created by converting the target address minus the pc
369      // at the start of the instruction. That needs more correction here.
370      // intptr_t disp = target - next_ip;
371      assert(inst_mark() != NULL, "must be inside InstructionMark");
372      address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
373      int64_t adjusted = disp;
374      // Do rip-rel adjustment for 64bit
375      LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
376      assert(is_simm32(adjusted),
377             "must be 32bit offset (RIP relative address)");
378      emit_data((int32_t) adjusted, rspec, disp32_operand);
379
380    } else {
381      // 32bit never did this, did everything as the rip-rel/disp code above
382      // [disp] ABSOLUTE
383      // [00 reg 100][00 100 101] disp32
384      emit_byte(0x04 | regenc);
385      emit_byte(0x25);
386      emit_data(disp, rspec, disp32_operand);
387    }
388  }
389}
390
391void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
392                             Address::ScaleFactor scale, int disp,
393                             RelocationHolder const& rspec) {
394  emit_operand((Register)reg, base, index, scale, disp, rspec);
395}
396
397// Secret local extension to Assembler::WhichOperand:
398#define end_pc_operand (_WhichOperand_limit)
399
400address Assembler::locate_operand(address inst, WhichOperand which) {
401  // Decode the given instruction, and return the address of
402  // an embedded 32-bit operand word.
403
404  // If "which" is disp32_operand, selects the displacement portion
405  // of an effective address specifier.
406  // If "which" is imm64_operand, selects the trailing immediate constant.
407  // If "which" is call32_operand, selects the displacement of a call or jump.
408  // Caller is responsible for ensuring that there is such an operand,
409  // and that it is 32/64 bits wide.
410
411  // If "which" is end_pc_operand, find the end of the instruction.
412
413  address ip = inst;
414  bool is_64bit = false;
415
416  debug_only(bool has_disp32 = false);
417  int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
418
419  again_after_prefix:
420  switch (0xFF & *ip++) {
421
422  // These convenience macros generate groups of "case" labels for the switch.
423#define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
424#define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
425             case (x)+4: case (x)+5: case (x)+6: case (x)+7
426#define REP16(x) REP8((x)+0): \
427              case REP8((x)+8)
428
429  case CS_segment:
430  case SS_segment:
431  case DS_segment:
432  case ES_segment:
433  case FS_segment:
434  case GS_segment:
435    // Seems dubious
436    LP64_ONLY(assert(false, "shouldn't have that prefix"));
437    assert(ip == inst+1, "only one prefix allowed");
438    goto again_after_prefix;
439
440  case 0x67:
441  case REX:
442  case REX_B:
443  case REX_X:
444  case REX_XB:
445  case REX_R:
446  case REX_RB:
447  case REX_RX:
448  case REX_RXB:
449    NOT_LP64(assert(false, "64bit prefixes"));
450    goto again_after_prefix;
451
452  case REX_W:
453  case REX_WB:
454  case REX_WX:
455  case REX_WXB:
456  case REX_WR:
457  case REX_WRB:
458  case REX_WRX:
459  case REX_WRXB:
460    NOT_LP64(assert(false, "64bit prefixes"));
461    is_64bit = true;
462    goto again_after_prefix;
463
464  case 0xFF: // pushq a; decl a; incl a; call a; jmp a
465  case 0x88: // movb a, r
466  case 0x89: // movl a, r
467  case 0x8A: // movb r, a
468  case 0x8B: // movl r, a
469  case 0x8F: // popl a
470    debug_only(has_disp32 = true);
471    break;
472
473  case 0x68: // pushq #32
474    if (which == end_pc_operand) {
475      return ip + 4;
476    }
477    assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
478    return ip;                  // not produced by emit_operand
479
480  case 0x66: // movw ... (size prefix)
481    again_after_size_prefix2:
482    switch (0xFF & *ip++) {
483    case REX:
484    case REX_B:
485    case REX_X:
486    case REX_XB:
487    case REX_R:
488    case REX_RB:
489    case REX_RX:
490    case REX_RXB:
491    case REX_W:
492    case REX_WB:
493    case REX_WX:
494    case REX_WXB:
495    case REX_WR:
496    case REX_WRB:
497    case REX_WRX:
498    case REX_WRXB:
499      NOT_LP64(assert(false, "64bit prefix found"));
500      goto again_after_size_prefix2;
501    case 0x8B: // movw r, a
502    case 0x89: // movw a, r
503      debug_only(has_disp32 = true);
504      break;
505    case 0xC7: // movw a, #16
506      debug_only(has_disp32 = true);
507      tail_size = 2;  // the imm16
508      break;
509    case 0x0F: // several SSE/SSE2 variants
510      ip--;    // reparse the 0x0F
511      goto again_after_prefix;
512    default:
513      ShouldNotReachHere();
514    }
515    break;
516
517  case REP8(0xB8): // movl/q r, #32/#64(oop?)
518    if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
519    // these asserts are somewhat nonsensical
520#ifndef _LP64
521    assert(which == imm_operand || which == disp32_operand, "");
522#else
523    assert((which == call32_operand || which == imm_operand) && is_64bit ||
524           which == narrow_oop_operand && !is_64bit, "");
525#endif // _LP64
526    return ip;
527
528  case 0x69: // imul r, a, #32
529  case 0xC7: // movl a, #32(oop?)
530    tail_size = 4;
531    debug_only(has_disp32 = true); // has both kinds of operands!
532    break;
533
534  case 0x0F: // movx..., etc.
535    switch (0xFF & *ip++) {
536    case 0x12: // movlps
537    case 0x28: // movaps
538    case 0x2E: // ucomiss
539    case 0x2F: // comiss
540    case 0x54: // andps
541    case 0x55: // andnps
542    case 0x56: // orps
543    case 0x57: // xorps
544    case 0x6E: // movd
545    case 0x7E: // movd
546    case 0xAE: // ldmxcsr   a
547      // 64bit side says it these have both operands but that doesn't
548      // appear to be true
549      debug_only(has_disp32 = true);
550      break;
551
552    case 0xAD: // shrd r, a, %cl
553    case 0xAF: // imul r, a
554    case 0xBE: // movsbl r, a (movsxb)
555    case 0xBF: // movswl r, a (movsxw)
556    case 0xB6: // movzbl r, a (movzxb)
557    case 0xB7: // movzwl r, a (movzxw)
558    case REP16(0x40): // cmovl cc, r, a
559    case 0xB0: // cmpxchgb
560    case 0xB1: // cmpxchg
561    case 0xC1: // xaddl
562    case 0xC7: // cmpxchg8
563    case REP16(0x90): // setcc a
564      debug_only(has_disp32 = true);
565      // fall out of the switch to decode the address
566      break;
567
568    case 0xAC: // shrd r, a, #8
569      debug_only(has_disp32 = true);
570      tail_size = 1;  // the imm8
571      break;
572
573    case REP16(0x80): // jcc rdisp32
574      if (which == end_pc_operand)  return ip + 4;
575      assert(which == call32_operand, "jcc has no disp32 or imm");
576      return ip;
577    default:
578      ShouldNotReachHere();
579    }
580    break;
581
582  case 0x81: // addl a, #32; addl r, #32
583    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
584    // on 32bit in the case of cmpl, the imm might be an oop
585    tail_size = 4;
586    debug_only(has_disp32 = true); // has both kinds of operands!
587    break;
588
589  case 0x83: // addl a, #8; addl r, #8
590    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
591    debug_only(has_disp32 = true); // has both kinds of operands!
592    tail_size = 1;
593    break;
594
595  case 0x9B:
596    switch (0xFF & *ip++) {
597    case 0xD9: // fnstcw a
598      debug_only(has_disp32 = true);
599      break;
600    default:
601      ShouldNotReachHere();
602    }
603    break;
604
605  case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
606  case REP4(0x10): // adc...
607  case REP4(0x20): // and...
608  case REP4(0x30): // xor...
609  case REP4(0x08): // or...
610  case REP4(0x18): // sbb...
611  case REP4(0x28): // sub...
612  case 0xF7: // mull a
613  case 0x8D: // lea r, a
614  case 0x87: // xchg r, a
615  case REP4(0x38): // cmp...
616  case 0x85: // test r, a
617    debug_only(has_disp32 = true); // has both kinds of operands!
618    break;
619
620  case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
621  case 0xC6: // movb a, #8
622  case 0x80: // cmpb a, #8
623  case 0x6B: // imul r, a, #8
624    debug_only(has_disp32 = true); // has both kinds of operands!
625    tail_size = 1; // the imm8
626    break;
627
628  case 0xE8: // call rdisp32
629  case 0xE9: // jmp  rdisp32
630    if (which == end_pc_operand)  return ip + 4;
631    assert(which == call32_operand, "call has no disp32 or imm");
632    return ip;
633
634  case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
635  case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
636  case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
637  case 0xDD: // fld_d a; fst_d a; fstp_d a
638  case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
639  case 0xDF: // fild_d a; fistp_d a
640  case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
641  case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
642  case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
643    debug_only(has_disp32 = true);
644    break;
645
646  case 0xF0:                    // Lock
647    assert(os::is_MP(), "only on MP");
648    goto again_after_prefix;
649
650  case 0xF3:                    // For SSE
651  case 0xF2:                    // For SSE2
652    switch (0xFF & *ip++) {
653    case REX:
654    case REX_B:
655    case REX_X:
656    case REX_XB:
657    case REX_R:
658    case REX_RB:
659    case REX_RX:
660    case REX_RXB:
661    case REX_W:
662    case REX_WB:
663    case REX_WX:
664    case REX_WXB:
665    case REX_WR:
666    case REX_WRB:
667    case REX_WRX:
668    case REX_WRXB:
669      NOT_LP64(assert(false, "found 64bit prefix"));
670      ip++;
671    default:
672      ip++;
673    }
674    debug_only(has_disp32 = true); // has both kinds of operands!
675    break;
676
677  default:
678    ShouldNotReachHere();
679
680#undef REP8
681#undef REP16
682  }
683
684  assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
685#ifdef _LP64
686  assert(which != imm_operand, "instruction is not a movq reg, imm64");
687#else
688  // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
689  assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
690#endif // LP64
691  assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
692
693  // parse the output of emit_operand
694  int op2 = 0xFF & *ip++;
695  int base = op2 & 0x07;
696  int op3 = -1;
697  const int b100 = 4;
698  const int b101 = 5;
699  if (base == b100 && (op2 >> 6) != 3) {
700    op3 = 0xFF & *ip++;
701    base = op3 & 0x07;   // refetch the base
702  }
703  // now ip points at the disp (if any)
704
705  switch (op2 >> 6) {
706  case 0:
707    // [00 reg  100][ss index base]
708    // [00 reg  100][00   100  esp]
709    // [00 reg base]
710    // [00 reg  100][ss index  101][disp32]
711    // [00 reg  101]               [disp32]
712
713    if (base == b101) {
714      if (which == disp32_operand)
715        return ip;              // caller wants the disp32
716      ip += 4;                  // skip the disp32
717    }
718    break;
719
720  case 1:
721    // [01 reg  100][ss index base][disp8]
722    // [01 reg  100][00   100  esp][disp8]
723    // [01 reg base]               [disp8]
724    ip += 1;                    // skip the disp8
725    break;
726
727  case 2:
728    // [10 reg  100][ss index base][disp32]
729    // [10 reg  100][00   100  esp][disp32]
730    // [10 reg base]               [disp32]
731    if (which == disp32_operand)
732      return ip;                // caller wants the disp32
733    ip += 4;                    // skip the disp32
734    break;
735
736  case 3:
737    // [11 reg base]  (not a memory addressing mode)
738    break;
739  }
740
741  if (which == end_pc_operand) {
742    return ip + tail_size;
743  }
744
745#ifdef _LP64
746  assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
747#else
748  assert(which == imm_operand, "instruction has only an imm field");
749#endif // LP64
750  return ip;
751}
752
753address Assembler::locate_next_instruction(address inst) {
754  // Secretly share code with locate_operand:
755  return locate_operand(inst, end_pc_operand);
756}
757
758
759#ifdef ASSERT
760void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
761  address inst = inst_mark();
762  assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
763  address opnd;
764
765  Relocation* r = rspec.reloc();
766  if (r->type() == relocInfo::none) {
767    return;
768  } else if (r->is_call() || format == call32_operand) {
769    // assert(format == imm32_operand, "cannot specify a nonzero format");
770    opnd = locate_operand(inst, call32_operand);
771  } else if (r->is_data()) {
772    assert(format == imm_operand || format == disp32_operand
773           LP64_ONLY(|| format == narrow_oop_operand), "format ok");
774    opnd = locate_operand(inst, (WhichOperand)format);
775  } else {
776    assert(format == imm_operand, "cannot specify a format");
777    return;
778  }
779  assert(opnd == pc(), "must put operand where relocs can find it");
780}
781#endif // ASSERT
782
783void Assembler::emit_operand32(Register reg, Address adr) {
784  assert(reg->encoding() < 8, "no extended registers");
785  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
786  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
787               adr._rspec);
788}
789
790void Assembler::emit_operand(Register reg, Address adr,
791                             int rip_relative_correction) {
792  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
793               adr._rspec,
794               rip_relative_correction);
795}
796
797void Assembler::emit_operand(XMMRegister reg, Address adr) {
798  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
799               adr._rspec);
800}
801
802// MMX operations
803void Assembler::emit_operand(MMXRegister reg, Address adr) {
804  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
805  emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
806}
807
808// work around gcc (3.2.1-7a) bug
809void Assembler::emit_operand(Address adr, MMXRegister reg) {
810  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
811  emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
812}
813
814
815void Assembler::emit_farith(int b1, int b2, int i) {
816  assert(isByte(b1) && isByte(b2), "wrong opcode");
817  assert(0 <= i &&  i < 8, "illegal stack offset");
818  emit_byte(b1);
819  emit_byte(b2 + i);
820}
821
822
823// Now the Assembler instructions (identical for 32/64 bits)
824
825void Assembler::adcl(Address dst, int32_t imm32) {
826  InstructionMark im(this);
827  prefix(dst);
828  emit_arith_operand(0x81, rdx, dst, imm32);
829}
830
831void Assembler::adcl(Address dst, Register src) {
832  InstructionMark im(this);
833  prefix(dst, src);
834  emit_byte(0x11);
835  emit_operand(src, dst);
836}
837
838void Assembler::adcl(Register dst, int32_t imm32) {
839  prefix(dst);
840  emit_arith(0x81, 0xD0, dst, imm32);
841}
842
843void Assembler::adcl(Register dst, Address src) {
844  InstructionMark im(this);
845  prefix(src, dst);
846  emit_byte(0x13);
847  emit_operand(dst, src);
848}
849
850void Assembler::adcl(Register dst, Register src) {
851  (void) prefix_and_encode(dst->encoding(), src->encoding());
852  emit_arith(0x13, 0xC0, dst, src);
853}
854
855void Assembler::addl(Address dst, int32_t imm32) {
856  InstructionMark im(this);
857  prefix(dst);
858  emit_arith_operand(0x81, rax, dst, imm32);
859}
860
861void Assembler::addl(Address dst, Register src) {
862  InstructionMark im(this);
863  prefix(dst, src);
864  emit_byte(0x01);
865  emit_operand(src, dst);
866}
867
868void Assembler::addl(Register dst, int32_t imm32) {
869  prefix(dst);
870  emit_arith(0x81, 0xC0, dst, imm32);
871}
872
873void Assembler::addl(Register dst, Address src) {
874  InstructionMark im(this);
875  prefix(src, dst);
876  emit_byte(0x03);
877  emit_operand(dst, src);
878}
879
880void Assembler::addl(Register dst, Register src) {
881  (void) prefix_and_encode(dst->encoding(), src->encoding());
882  emit_arith(0x03, 0xC0, dst, src);
883}
884
885void Assembler::addr_nop_4() {
886  // 4 bytes: NOP DWORD PTR [EAX+0]
887  emit_byte(0x0F);
888  emit_byte(0x1F);
889  emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
890  emit_byte(0);    // 8-bits offset (1 byte)
891}
892
893void Assembler::addr_nop_5() {
894  // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
895  emit_byte(0x0F);
896  emit_byte(0x1F);
897  emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
898  emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
899  emit_byte(0);    // 8-bits offset (1 byte)
900}
901
902void Assembler::addr_nop_7() {
903  // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
904  emit_byte(0x0F);
905  emit_byte(0x1F);
906  emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
907  emit_long(0);    // 32-bits offset (4 bytes)
908}
909
910void Assembler::addr_nop_8() {
911  // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
912  emit_byte(0x0F);
913  emit_byte(0x1F);
914  emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
915  emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
916  emit_long(0);    // 32-bits offset (4 bytes)
917}
918
919void Assembler::addsd(XMMRegister dst, XMMRegister src) {
920  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
921  emit_byte(0xF2);
922  int encode = prefix_and_encode(dst->encoding(), src->encoding());
923  emit_byte(0x0F);
924  emit_byte(0x58);
925  emit_byte(0xC0 | encode);
926}
927
928void Assembler::addsd(XMMRegister dst, Address src) {
929  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
930  InstructionMark im(this);
931  emit_byte(0xF2);
932  prefix(src, dst);
933  emit_byte(0x0F);
934  emit_byte(0x58);
935  emit_operand(dst, src);
936}
937
938void Assembler::addss(XMMRegister dst, XMMRegister src) {
939  NOT_LP64(assert(VM_Version::supports_sse(), ""));
940  emit_byte(0xF3);
941  int encode = prefix_and_encode(dst->encoding(), src->encoding());
942  emit_byte(0x0F);
943  emit_byte(0x58);
944  emit_byte(0xC0 | encode);
945}
946
947void Assembler::addss(XMMRegister dst, Address src) {
948  NOT_LP64(assert(VM_Version::supports_sse(), ""));
949  InstructionMark im(this);
950  emit_byte(0xF3);
951  prefix(src, dst);
952  emit_byte(0x0F);
953  emit_byte(0x58);
954  emit_operand(dst, src);
955}
956
957void Assembler::andl(Register dst, int32_t imm32) {
958  prefix(dst);
959  emit_arith(0x81, 0xE0, dst, imm32);
960}
961
962void Assembler::andl(Register dst, Address src) {
963  InstructionMark im(this);
964  prefix(src, dst);
965  emit_byte(0x23);
966  emit_operand(dst, src);
967}
968
969void Assembler::andl(Register dst, Register src) {
970  (void) prefix_and_encode(dst->encoding(), src->encoding());
971  emit_arith(0x23, 0xC0, dst, src);
972}
973
974void Assembler::andpd(XMMRegister dst, Address src) {
975  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
976  InstructionMark im(this);
977  emit_byte(0x66);
978  prefix(src, dst);
979  emit_byte(0x0F);
980  emit_byte(0x54);
981  emit_operand(dst, src);
982}
983
984void Assembler::bsfl(Register dst, Register src) {
985  int encode = prefix_and_encode(dst->encoding(), src->encoding());
986  emit_byte(0x0F);
987  emit_byte(0xBC);
988  emit_byte(0xC0 | encode);
989}
990
991void Assembler::bsrl(Register dst, Register src) {
992  assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
993  int encode = prefix_and_encode(dst->encoding(), src->encoding());
994  emit_byte(0x0F);
995  emit_byte(0xBD);
996  emit_byte(0xC0 | encode);
997}
998
999void Assembler::bswapl(Register reg) { // bswap
1000  int encode = prefix_and_encode(reg->encoding());
1001  emit_byte(0x0F);
1002  emit_byte(0xC8 | encode);
1003}
1004
1005void Assembler::call(Label& L, relocInfo::relocType rtype) {
1006  // suspect disp32 is always good
1007  int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1008
1009  if (L.is_bound()) {
1010    const int long_size = 5;
1011    int offs = (int)( target(L) - pc() );
1012    assert(offs <= 0, "assembler error");
1013    InstructionMark im(this);
1014    // 1110 1000 #32-bit disp
1015    emit_byte(0xE8);
1016    emit_data(offs - long_size, rtype, operand);
1017  } else {
1018    InstructionMark im(this);
1019    // 1110 1000 #32-bit disp
1020    L.add_patch_at(code(), locator());
1021
1022    emit_byte(0xE8);
1023    emit_data(int(0), rtype, operand);
1024  }
1025}
1026
1027void Assembler::call(Register dst) {
1028  // This was originally using a 32bit register encoding
1029  // and surely we want 64bit!
1030  // this is a 32bit encoding but in 64bit mode the default
1031  // operand size is 64bit so there is no need for the
1032  // wide prefix. So prefix only happens if we use the
1033  // new registers. Much like push/pop.
1034  int x = offset();
1035  // this may be true but dbx disassembles it as if it
1036  // were 32bits...
1037  // int encode = prefix_and_encode(dst->encoding());
1038  // if (offset() != x) assert(dst->encoding() >= 8, "what?");
1039  int encode = prefixq_and_encode(dst->encoding());
1040
1041  emit_byte(0xFF);
1042  emit_byte(0xD0 | encode);
1043}
1044
1045
1046void Assembler::call(Address adr) {
1047  InstructionMark im(this);
1048  prefix(adr);
1049  emit_byte(0xFF);
1050  emit_operand(rdx, adr);
1051}
1052
1053void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1054  assert(entry != NULL, "call most probably wrong");
1055  InstructionMark im(this);
1056  emit_byte(0xE8);
1057  intptr_t disp = entry - (_code_pos + sizeof(int32_t));
1058  assert(is_simm32(disp), "must be 32bit offset (call2)");
1059  // Technically, should use call32_operand, but this format is
1060  // implied by the fact that we're emitting a call instruction.
1061
1062  int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1063  emit_data((int) disp, rspec, operand);
1064}
1065
1066void Assembler::cdql() {
1067  emit_byte(0x99);
1068}
1069
1070void Assembler::cmovl(Condition cc, Register dst, Register src) {
1071  NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1072  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1073  emit_byte(0x0F);
1074  emit_byte(0x40 | cc);
1075  emit_byte(0xC0 | encode);
1076}
1077
1078
1079void Assembler::cmovl(Condition cc, Register dst, Address src) {
1080  NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1081  prefix(src, dst);
1082  emit_byte(0x0F);
1083  emit_byte(0x40 | cc);
1084  emit_operand(dst, src);
1085}
1086
1087void Assembler::cmpb(Address dst, int imm8) {
1088  InstructionMark im(this);
1089  prefix(dst);
1090  emit_byte(0x80);
1091  emit_operand(rdi, dst, 1);
1092  emit_byte(imm8);
1093}
1094
1095void Assembler::cmpl(Address dst, int32_t imm32) {
1096  InstructionMark im(this);
1097  prefix(dst);
1098  emit_byte(0x81);
1099  emit_operand(rdi, dst, 4);
1100  emit_long(imm32);
1101}
1102
1103void Assembler::cmpl(Register dst, int32_t imm32) {
1104  prefix(dst);
1105  emit_arith(0x81, 0xF8, dst, imm32);
1106}
1107
1108void Assembler::cmpl(Register dst, Register src) {
1109  (void) prefix_and_encode(dst->encoding(), src->encoding());
1110  emit_arith(0x3B, 0xC0, dst, src);
1111}
1112
1113
1114void Assembler::cmpl(Register dst, Address  src) {
1115  InstructionMark im(this);
1116  prefix(src, dst);
1117  emit_byte(0x3B);
1118  emit_operand(dst, src);
1119}
1120
1121void Assembler::cmpw(Address dst, int imm16) {
1122  InstructionMark im(this);
1123  assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1124  emit_byte(0x66);
1125  emit_byte(0x81);
1126  emit_operand(rdi, dst, 2);
1127  emit_word(imm16);
1128}
1129
1130// The 32-bit cmpxchg compares the value at adr with the contents of rax,
1131// and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1132// The ZF is set if the compared values were equal, and cleared otherwise.
1133void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1134  if (Atomics & 2) {
1135     // caveat: no instructionmark, so this isn't relocatable.
1136     // Emit a synthetic, non-atomic, CAS equivalent.
1137     // Beware.  The synthetic form sets all ICCs, not just ZF.
1138     // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r)
1139     cmpl(rax, adr);
1140     movl(rax, adr);
1141     if (reg != rax) {
1142        Label L ;
1143        jcc(Assembler::notEqual, L);
1144        movl(adr, reg);
1145        bind(L);
1146     }
1147  } else {
1148     InstructionMark im(this);
1149     prefix(adr, reg);
1150     emit_byte(0x0F);
1151     emit_byte(0xB1);
1152     emit_operand(reg, adr);
1153  }
1154}
1155
1156void Assembler::comisd(XMMRegister dst, Address src) {
1157  // NOTE: dbx seems to decode this as comiss even though the
1158  // 0x66 is there. Strangly ucomisd comes out correct
1159  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1160  emit_byte(0x66);
1161  comiss(dst, src);
1162}
1163
1164void Assembler::comiss(XMMRegister dst, Address src) {
1165  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1166
1167  InstructionMark im(this);
1168  prefix(src, dst);
1169  emit_byte(0x0F);
1170  emit_byte(0x2F);
1171  emit_operand(dst, src);
1172}
1173
1174void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1175  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1176  emit_byte(0xF3);
1177  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1178  emit_byte(0x0F);
1179  emit_byte(0xE6);
1180  emit_byte(0xC0 | encode);
1181}
1182
1183void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1184  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1185  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1186  emit_byte(0x0F);
1187  emit_byte(0x5B);
1188  emit_byte(0xC0 | encode);
1189}
1190
1191void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1192  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1193  emit_byte(0xF2);
1194  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1195  emit_byte(0x0F);
1196  emit_byte(0x5A);
1197  emit_byte(0xC0 | encode);
1198}
1199
1200void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1201  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1202  emit_byte(0xF2);
1203  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1204  emit_byte(0x0F);
1205  emit_byte(0x2A);
1206  emit_byte(0xC0 | encode);
1207}
1208
1209void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1210  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1211  emit_byte(0xF3);
1212  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1213  emit_byte(0x0F);
1214  emit_byte(0x2A);
1215  emit_byte(0xC0 | encode);
1216}
1217
1218void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1219  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1220  emit_byte(0xF3);
1221  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1222  emit_byte(0x0F);
1223  emit_byte(0x5A);
1224  emit_byte(0xC0 | encode);
1225}
1226
1227void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1228  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1229  emit_byte(0xF2);
1230  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1231  emit_byte(0x0F);
1232  emit_byte(0x2C);
1233  emit_byte(0xC0 | encode);
1234}
1235
1236void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1237  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1238  emit_byte(0xF3);
1239  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1240  emit_byte(0x0F);
1241  emit_byte(0x2C);
1242  emit_byte(0xC0 | encode);
1243}
1244
1245void Assembler::decl(Address dst) {
1246  // Don't use it directly. Use MacroAssembler::decrement() instead.
1247  InstructionMark im(this);
1248  prefix(dst);
1249  emit_byte(0xFF);
1250  emit_operand(rcx, dst);
1251}
1252
1253void Assembler::divsd(XMMRegister dst, Address src) {
1254  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1255  InstructionMark im(this);
1256  emit_byte(0xF2);
1257  prefix(src, dst);
1258  emit_byte(0x0F);
1259  emit_byte(0x5E);
1260  emit_operand(dst, src);
1261}
1262
1263void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1264  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1265  emit_byte(0xF2);
1266  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1267  emit_byte(0x0F);
1268  emit_byte(0x5E);
1269  emit_byte(0xC0 | encode);
1270}
1271
1272void Assembler::divss(XMMRegister dst, Address src) {
1273  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1274  InstructionMark im(this);
1275  emit_byte(0xF3);
1276  prefix(src, dst);
1277  emit_byte(0x0F);
1278  emit_byte(0x5E);
1279  emit_operand(dst, src);
1280}
1281
1282void Assembler::divss(XMMRegister dst, XMMRegister src) {
1283  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1284  emit_byte(0xF3);
1285  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1286  emit_byte(0x0F);
1287  emit_byte(0x5E);
1288  emit_byte(0xC0 | encode);
1289}
1290
1291void Assembler::emms() {
1292  NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1293  emit_byte(0x0F);
1294  emit_byte(0x77);
1295}
1296
1297void Assembler::hlt() {
1298  emit_byte(0xF4);
1299}
1300
1301void Assembler::idivl(Register src) {
1302  int encode = prefix_and_encode(src->encoding());
1303  emit_byte(0xF7);
1304  emit_byte(0xF8 | encode);
1305}
1306
1307void Assembler::divl(Register src) { // Unsigned
1308  int encode = prefix_and_encode(src->encoding());
1309  emit_byte(0xF7);
1310  emit_byte(0xF0 | encode);
1311}
1312
1313void Assembler::imull(Register dst, Register src) {
1314  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1315  emit_byte(0x0F);
1316  emit_byte(0xAF);
1317  emit_byte(0xC0 | encode);
1318}
1319
1320
1321void Assembler::imull(Register dst, Register src, int value) {
1322  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1323  if (is8bit(value)) {
1324    emit_byte(0x6B);
1325    emit_byte(0xC0 | encode);
1326    emit_byte(value & 0xFF);
1327  } else {
1328    emit_byte(0x69);
1329    emit_byte(0xC0 | encode);
1330    emit_long(value);
1331  }
1332}
1333
1334void Assembler::incl(Address dst) {
1335  // Don't use it directly. Use MacroAssembler::increment() instead.
1336  InstructionMark im(this);
1337  prefix(dst);
1338  emit_byte(0xFF);
1339  emit_operand(rax, dst);
1340}
1341
1342void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1343  InstructionMark im(this);
1344  assert((0 <= cc) && (cc < 16), "illegal cc");
1345  if (L.is_bound()) {
1346    address dst = target(L);
1347    assert(dst != NULL, "jcc most probably wrong");
1348
1349    const int short_size = 2;
1350    const int long_size = 6;
1351    intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
1352    if (maybe_short && is8bit(offs - short_size)) {
1353      // 0111 tttn #8-bit disp
1354      emit_byte(0x70 | cc);
1355      emit_byte((offs - short_size) & 0xFF);
1356    } else {
1357      // 0000 1111 1000 tttn #32-bit disp
1358      assert(is_simm32(offs - long_size),
1359             "must be 32bit offset (call4)");
1360      emit_byte(0x0F);
1361      emit_byte(0x80 | cc);
1362      emit_long(offs - long_size);
1363    }
1364  } else {
1365    // Note: could eliminate cond. jumps to this jump if condition
1366    //       is the same however, seems to be rather unlikely case.
1367    // Note: use jccb() if label to be bound is very close to get
1368    //       an 8-bit displacement
1369    L.add_patch_at(code(), locator());
1370    emit_byte(0x0F);
1371    emit_byte(0x80 | cc);
1372    emit_long(0);
1373  }
1374}
1375
1376void Assembler::jccb(Condition cc, Label& L) {
1377  if (L.is_bound()) {
1378    const int short_size = 2;
1379    address entry = target(L);
1380    assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
1381           "Dispacement too large for a short jmp");
1382    intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
1383    // 0111 tttn #8-bit disp
1384    emit_byte(0x70 | cc);
1385    emit_byte((offs - short_size) & 0xFF);
1386  } else {
1387    InstructionMark im(this);
1388    L.add_patch_at(code(), locator());
1389    emit_byte(0x70 | cc);
1390    emit_byte(0);
1391  }
1392}
1393
1394void Assembler::jmp(Address adr) {
1395  InstructionMark im(this);
1396  prefix(adr);
1397  emit_byte(0xFF);
1398  emit_operand(rsp, adr);
1399}
1400
1401void Assembler::jmp(Label& L, bool maybe_short) {
1402  if (L.is_bound()) {
1403    address entry = target(L);
1404    assert(entry != NULL, "jmp most probably wrong");
1405    InstructionMark im(this);
1406    const int short_size = 2;
1407    const int long_size = 5;
1408    intptr_t offs = entry - _code_pos;
1409    if (maybe_short && is8bit(offs - short_size)) {
1410      emit_byte(0xEB);
1411      emit_byte((offs - short_size) & 0xFF);
1412    } else {
1413      emit_byte(0xE9);
1414      emit_long(offs - long_size);
1415    }
1416  } else {
1417    // By default, forward jumps are always 32-bit displacements, since
1418    // we can't yet know where the label will be bound.  If you're sure that
1419    // the forward jump will not run beyond 256 bytes, use jmpb to
1420    // force an 8-bit displacement.
1421    InstructionMark im(this);
1422    L.add_patch_at(code(), locator());
1423    emit_byte(0xE9);
1424    emit_long(0);
1425  }
1426}
1427
1428void Assembler::jmp(Register entry) {
1429  int encode = prefix_and_encode(entry->encoding());
1430  emit_byte(0xFF);
1431  emit_byte(0xE0 | encode);
1432}
1433
1434void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1435  InstructionMark im(this);
1436  emit_byte(0xE9);
1437  assert(dest != NULL, "must have a target");
1438  intptr_t disp = dest - (_code_pos + sizeof(int32_t));
1439  assert(is_simm32(disp), "must be 32bit offset (jmp)");
1440  emit_data(disp, rspec.reloc(), call32_operand);
1441}
1442
1443void Assembler::jmpb(Label& L) {
1444  if (L.is_bound()) {
1445    const int short_size = 2;
1446    address entry = target(L);
1447    assert(is8bit((entry - _code_pos) + short_size),
1448           "Dispacement too large for a short jmp");
1449    assert(entry != NULL, "jmp most probably wrong");
1450    intptr_t offs = entry - _code_pos;
1451    emit_byte(0xEB);
1452    emit_byte((offs - short_size) & 0xFF);
1453  } else {
1454    InstructionMark im(this);
1455    L.add_patch_at(code(), locator());
1456    emit_byte(0xEB);
1457    emit_byte(0);
1458  }
1459}
1460
1461void Assembler::ldmxcsr( Address src) {
1462  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1463  InstructionMark im(this);
1464  prefix(src);
1465  emit_byte(0x0F);
1466  emit_byte(0xAE);
1467  emit_operand(as_Register(2), src);
1468}
1469
1470void Assembler::leal(Register dst, Address src) {
1471  InstructionMark im(this);
1472#ifdef _LP64
1473  emit_byte(0x67); // addr32
1474  prefix(src, dst);
1475#endif // LP64
1476  emit_byte(0x8D);
1477  emit_operand(dst, src);
1478}
1479
1480void Assembler::lock() {
1481  if (Atomics & 1) {
1482     // Emit either nothing, a NOP, or a NOP: prefix
1483     emit_byte(0x90) ;
1484  } else {
1485     emit_byte(0xF0);
1486  }
1487}
1488
1489void Assembler::lzcntl(Register dst, Register src) {
1490  assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
1491  emit_byte(0xF3);
1492  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1493  emit_byte(0x0F);
1494  emit_byte(0xBD);
1495  emit_byte(0xC0 | encode);
1496}
1497
1498// Emit mfence instruction
1499void Assembler::mfence() {
1500  NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1501  emit_byte( 0x0F );
1502  emit_byte( 0xAE );
1503  emit_byte( 0xF0 );
1504}
1505
1506void Assembler::mov(Register dst, Register src) {
1507  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1508}
1509
1510void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1511  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1512  int dstenc = dst->encoding();
1513  int srcenc = src->encoding();
1514  emit_byte(0x66);
1515  if (dstenc < 8) {
1516    if (srcenc >= 8) {
1517      prefix(REX_B);
1518      srcenc -= 8;
1519    }
1520  } else {
1521    if (srcenc < 8) {
1522      prefix(REX_R);
1523    } else {
1524      prefix(REX_RB);
1525      srcenc -= 8;
1526    }
1527    dstenc -= 8;
1528  }
1529  emit_byte(0x0F);
1530  emit_byte(0x28);
1531  emit_byte(0xC0 | dstenc << 3 | srcenc);
1532}
1533
1534void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1535  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1536  int dstenc = dst->encoding();
1537  int srcenc = src->encoding();
1538  if (dstenc < 8) {
1539    if (srcenc >= 8) {
1540      prefix(REX_B);
1541      srcenc -= 8;
1542    }
1543  } else {
1544    if (srcenc < 8) {
1545      prefix(REX_R);
1546    } else {
1547      prefix(REX_RB);
1548      srcenc -= 8;
1549    }
1550    dstenc -= 8;
1551  }
1552  emit_byte(0x0F);
1553  emit_byte(0x28);
1554  emit_byte(0xC0 | dstenc << 3 | srcenc);
1555}
1556
1557void Assembler::movb(Register dst, Address src) {
1558  NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1559  InstructionMark im(this);
1560  prefix(src, dst, true);
1561  emit_byte(0x8A);
1562  emit_operand(dst, src);
1563}
1564
1565
1566void Assembler::movb(Address dst, int imm8) {
1567  InstructionMark im(this);
1568   prefix(dst);
1569  emit_byte(0xC6);
1570  emit_operand(rax, dst, 1);
1571  emit_byte(imm8);
1572}
1573
1574
1575void Assembler::movb(Address dst, Register src) {
1576  assert(src->has_byte_register(), "must have byte register");
1577  InstructionMark im(this);
1578  prefix(dst, src, true);
1579  emit_byte(0x88);
1580  emit_operand(src, dst);
1581}
1582
1583void Assembler::movdl(XMMRegister dst, Register src) {
1584  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1585  emit_byte(0x66);
1586  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1587  emit_byte(0x0F);
1588  emit_byte(0x6E);
1589  emit_byte(0xC0 | encode);
1590}
1591
1592void Assembler::movdl(Register dst, XMMRegister src) {
1593  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1594  emit_byte(0x66);
1595  // swap src/dst to get correct prefix
1596  int encode = prefix_and_encode(src->encoding(), dst->encoding());
1597  emit_byte(0x0F);
1598  emit_byte(0x7E);
1599  emit_byte(0xC0 | encode);
1600}
1601
1602void Assembler::movdl(XMMRegister dst, Address src) {
1603  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1604  InstructionMark im(this);
1605  emit_byte(0x66);
1606  prefix(src, dst);
1607  emit_byte(0x0F);
1608  emit_byte(0x6E);
1609  emit_operand(dst, src);
1610}
1611
1612
1613void Assembler::movdqa(XMMRegister dst, Address src) {
1614  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1615  InstructionMark im(this);
1616  emit_byte(0x66);
1617  prefix(src, dst);
1618  emit_byte(0x0F);
1619  emit_byte(0x6F);
1620  emit_operand(dst, src);
1621}
1622
1623void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1624  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1625  emit_byte(0x66);
1626  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1627  emit_byte(0x0F);
1628  emit_byte(0x6F);
1629  emit_byte(0xC0 | encode);
1630}
1631
1632void Assembler::movdqa(Address dst, XMMRegister src) {
1633  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1634  InstructionMark im(this);
1635  emit_byte(0x66);
1636  prefix(dst, src);
1637  emit_byte(0x0F);
1638  emit_byte(0x7F);
1639  emit_operand(src, dst);
1640}
1641
1642void Assembler::movdqu(XMMRegister dst, Address src) {
1643  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1644  InstructionMark im(this);
1645  emit_byte(0xF3);
1646  prefix(src, dst);
1647  emit_byte(0x0F);
1648  emit_byte(0x6F);
1649  emit_operand(dst, src);
1650}
1651
1652void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1653  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1654  emit_byte(0xF3);
1655  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1656  emit_byte(0x0F);
1657  emit_byte(0x6F);
1658  emit_byte(0xC0 | encode);
1659}
1660
1661void Assembler::movdqu(Address dst, XMMRegister src) {
1662  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1663  InstructionMark im(this);
1664  emit_byte(0xF3);
1665  prefix(dst, src);
1666  emit_byte(0x0F);
1667  emit_byte(0x7F);
1668  emit_operand(src, dst);
1669}
1670
1671// Uses zero extension on 64bit
1672
1673void Assembler::movl(Register dst, int32_t imm32) {
1674  int encode = prefix_and_encode(dst->encoding());
1675  emit_byte(0xB8 | encode);
1676  emit_long(imm32);
1677}
1678
1679void Assembler::movl(Register dst, Register src) {
1680  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1681  emit_byte(0x8B);
1682  emit_byte(0xC0 | encode);
1683}
1684
1685void Assembler::movl(Register dst, Address src) {
1686  InstructionMark im(this);
1687  prefix(src, dst);
1688  emit_byte(0x8B);
1689  emit_operand(dst, src);
1690}
1691
1692void Assembler::movl(Address dst, int32_t imm32) {
1693  InstructionMark im(this);
1694  prefix(dst);
1695  emit_byte(0xC7);
1696  emit_operand(rax, dst, 4);
1697  emit_long(imm32);
1698}
1699
1700void Assembler::movl(Address dst, Register src) {
1701  InstructionMark im(this);
1702  prefix(dst, src);
1703  emit_byte(0x89);
1704  emit_operand(src, dst);
1705}
1706
1707// New cpus require to use movsd and movss to avoid partial register stall
1708// when loading from memory. But for old Opteron use movlpd instead of movsd.
1709// The selection is done in MacroAssembler::movdbl() and movflt().
1710void Assembler::movlpd(XMMRegister dst, Address src) {
1711  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1712  InstructionMark im(this);
1713  emit_byte(0x66);
1714  prefix(src, dst);
1715  emit_byte(0x0F);
1716  emit_byte(0x12);
1717  emit_operand(dst, src);
1718}
1719
1720void Assembler::movq( MMXRegister dst, Address src ) {
1721  assert( VM_Version::supports_mmx(), "" );
1722  emit_byte(0x0F);
1723  emit_byte(0x6F);
1724  emit_operand(dst, src);
1725}
1726
1727void Assembler::movq( Address dst, MMXRegister src ) {
1728  assert( VM_Version::supports_mmx(), "" );
1729  emit_byte(0x0F);
1730  emit_byte(0x7F);
1731  // workaround gcc (3.2.1-7a) bug
1732  // In that version of gcc with only an emit_operand(MMX, Address)
1733  // gcc will tail jump and try and reverse the parameters completely
1734  // obliterating dst in the process. By having a version available
1735  // that doesn't need to swap the args at the tail jump the bug is
1736  // avoided.
1737  emit_operand(dst, src);
1738}
1739
1740void Assembler::movq(XMMRegister dst, Address src) {
1741  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1742  InstructionMark im(this);
1743  emit_byte(0xF3);
1744  prefix(src, dst);
1745  emit_byte(0x0F);
1746  emit_byte(0x7E);
1747  emit_operand(dst, src);
1748}
1749
1750void Assembler::movq(Address dst, XMMRegister src) {
1751  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1752  InstructionMark im(this);
1753  emit_byte(0x66);
1754  prefix(dst, src);
1755  emit_byte(0x0F);
1756  emit_byte(0xD6);
1757  emit_operand(src, dst);
1758}
1759
1760void Assembler::movsbl(Register dst, Address src) { // movsxb
1761  InstructionMark im(this);
1762  prefix(src, dst);
1763  emit_byte(0x0F);
1764  emit_byte(0xBE);
1765  emit_operand(dst, src);
1766}
1767
1768void Assembler::movsbl(Register dst, Register src) { // movsxb
1769  NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1770  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1771  emit_byte(0x0F);
1772  emit_byte(0xBE);
1773  emit_byte(0xC0 | encode);
1774}
1775
1776void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1777  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1778  emit_byte(0xF2);
1779  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1780  emit_byte(0x0F);
1781  emit_byte(0x10);
1782  emit_byte(0xC0 | encode);
1783}
1784
1785void Assembler::movsd(XMMRegister dst, Address src) {
1786  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1787  InstructionMark im(this);
1788  emit_byte(0xF2);
1789  prefix(src, dst);
1790  emit_byte(0x0F);
1791  emit_byte(0x10);
1792  emit_operand(dst, src);
1793}
1794
1795void Assembler::movsd(Address dst, XMMRegister src) {
1796  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1797  InstructionMark im(this);
1798  emit_byte(0xF2);
1799  prefix(dst, src);
1800  emit_byte(0x0F);
1801  emit_byte(0x11);
1802  emit_operand(src, dst);
1803}
1804
1805void Assembler::movss(XMMRegister dst, XMMRegister src) {
1806  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1807  emit_byte(0xF3);
1808  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1809  emit_byte(0x0F);
1810  emit_byte(0x10);
1811  emit_byte(0xC0 | encode);
1812}
1813
1814void Assembler::movss(XMMRegister dst, Address src) {
1815  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1816  InstructionMark im(this);
1817  emit_byte(0xF3);
1818  prefix(src, dst);
1819  emit_byte(0x0F);
1820  emit_byte(0x10);
1821  emit_operand(dst, src);
1822}
1823
1824void Assembler::movss(Address dst, XMMRegister src) {
1825  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1826  InstructionMark im(this);
1827  emit_byte(0xF3);
1828  prefix(dst, src);
1829  emit_byte(0x0F);
1830  emit_byte(0x11);
1831  emit_operand(src, dst);
1832}
1833
1834void Assembler::movswl(Register dst, Address src) { // movsxw
1835  InstructionMark im(this);
1836  prefix(src, dst);
1837  emit_byte(0x0F);
1838  emit_byte(0xBF);
1839  emit_operand(dst, src);
1840}
1841
1842void Assembler::movswl(Register dst, Register src) { // movsxw
1843  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1844  emit_byte(0x0F);
1845  emit_byte(0xBF);
1846  emit_byte(0xC0 | encode);
1847}
1848
1849void Assembler::movw(Address dst, int imm16) {
1850  InstructionMark im(this);
1851
1852  emit_byte(0x66); // switch to 16-bit mode
1853  prefix(dst);
1854  emit_byte(0xC7);
1855  emit_operand(rax, dst, 2);
1856  emit_word(imm16);
1857}
1858
1859void Assembler::movw(Register dst, Address src) {
1860  InstructionMark im(this);
1861  emit_byte(0x66);
1862  prefix(src, dst);
1863  emit_byte(0x8B);
1864  emit_operand(dst, src);
1865}
1866
1867void Assembler::movw(Address dst, Register src) {
1868  InstructionMark im(this);
1869  emit_byte(0x66);
1870  prefix(dst, src);
1871  emit_byte(0x89);
1872  emit_operand(src, dst);
1873}
1874
1875void Assembler::movzbl(Register dst, Address src) { // movzxb
1876  InstructionMark im(this);
1877  prefix(src, dst);
1878  emit_byte(0x0F);
1879  emit_byte(0xB6);
1880  emit_operand(dst, src);
1881}
1882
1883void Assembler::movzbl(Register dst, Register src) { // movzxb
1884  NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1885  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1886  emit_byte(0x0F);
1887  emit_byte(0xB6);
1888  emit_byte(0xC0 | encode);
1889}
1890
1891void Assembler::movzwl(Register dst, Address src) { // movzxw
1892  InstructionMark im(this);
1893  prefix(src, dst);
1894  emit_byte(0x0F);
1895  emit_byte(0xB7);
1896  emit_operand(dst, src);
1897}
1898
1899void Assembler::movzwl(Register dst, Register src) { // movzxw
1900  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1901  emit_byte(0x0F);
1902  emit_byte(0xB7);
1903  emit_byte(0xC0 | encode);
1904}
1905
1906void Assembler::mull(Address src) {
1907  InstructionMark im(this);
1908  prefix(src);
1909  emit_byte(0xF7);
1910  emit_operand(rsp, src);
1911}
1912
1913void Assembler::mull(Register src) {
1914  int encode = prefix_and_encode(src->encoding());
1915  emit_byte(0xF7);
1916  emit_byte(0xE0 | encode);
1917}
1918
1919void Assembler::mulsd(XMMRegister dst, Address src) {
1920  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1921  InstructionMark im(this);
1922  emit_byte(0xF2);
1923  prefix(src, dst);
1924  emit_byte(0x0F);
1925  emit_byte(0x59);
1926  emit_operand(dst, src);
1927}
1928
1929void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
1930  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1931  emit_byte(0xF2);
1932  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1933  emit_byte(0x0F);
1934  emit_byte(0x59);
1935  emit_byte(0xC0 | encode);
1936}
1937
1938void Assembler::mulss(XMMRegister dst, Address src) {
1939  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1940  InstructionMark im(this);
1941  emit_byte(0xF3);
1942  prefix(src, dst);
1943  emit_byte(0x0F);
1944  emit_byte(0x59);
1945  emit_operand(dst, src);
1946}
1947
1948void Assembler::mulss(XMMRegister dst, XMMRegister src) {
1949  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1950  emit_byte(0xF3);
1951  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1952  emit_byte(0x0F);
1953  emit_byte(0x59);
1954  emit_byte(0xC0 | encode);
1955}
1956
1957void Assembler::negl(Register dst) {
1958  int encode = prefix_and_encode(dst->encoding());
1959  emit_byte(0xF7);
1960  emit_byte(0xD8 | encode);
1961}
1962
1963void Assembler::nop(int i) {
1964#ifdef ASSERT
1965  assert(i > 0, " ");
1966  // The fancy nops aren't currently recognized by debuggers making it a
1967  // pain to disassemble code while debugging. If asserts are on clearly
1968  // speed is not an issue so simply use the single byte traditional nop
1969  // to do alignment.
1970
1971  for (; i > 0 ; i--) emit_byte(0x90);
1972  return;
1973
1974#endif // ASSERT
1975
1976  if (UseAddressNop && VM_Version::is_intel()) {
1977    //
1978    // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
1979    //  1: 0x90
1980    //  2: 0x66 0x90
1981    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1982    //  4: 0x0F 0x1F 0x40 0x00
1983    //  5: 0x0F 0x1F 0x44 0x00 0x00
1984    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1985    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1986    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1987    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1988    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1989    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1990
1991    // The rest coding is Intel specific - don't use consecutive address nops
1992
1993    // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1994    // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1995    // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1996    // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1997
1998    while(i >= 15) {
1999      // For Intel don't generate consecutive addess nops (mix with regular nops)
2000      i -= 15;
2001      emit_byte(0x66);   // size prefix
2002      emit_byte(0x66);   // size prefix
2003      emit_byte(0x66);   // size prefix
2004      addr_nop_8();
2005      emit_byte(0x66);   // size prefix
2006      emit_byte(0x66);   // size prefix
2007      emit_byte(0x66);   // size prefix
2008      emit_byte(0x90);   // nop
2009    }
2010    switch (i) {
2011      case 14:
2012        emit_byte(0x66); // size prefix
2013      case 13:
2014        emit_byte(0x66); // size prefix
2015      case 12:
2016        addr_nop_8();
2017        emit_byte(0x66); // size prefix
2018        emit_byte(0x66); // size prefix
2019        emit_byte(0x66); // size prefix
2020        emit_byte(0x90); // nop
2021        break;
2022      case 11:
2023        emit_byte(0x66); // size prefix
2024      case 10:
2025        emit_byte(0x66); // size prefix
2026      case 9:
2027        emit_byte(0x66); // size prefix
2028      case 8:
2029        addr_nop_8();
2030        break;
2031      case 7:
2032        addr_nop_7();
2033        break;
2034      case 6:
2035        emit_byte(0x66); // size prefix
2036      case 5:
2037        addr_nop_5();
2038        break;
2039      case 4:
2040        addr_nop_4();
2041        break;
2042      case 3:
2043        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2044        emit_byte(0x66); // size prefix
2045      case 2:
2046        emit_byte(0x66); // size prefix
2047      case 1:
2048        emit_byte(0x90); // nop
2049        break;
2050      default:
2051        assert(i == 0, " ");
2052    }
2053    return;
2054  }
2055  if (UseAddressNop && VM_Version::is_amd()) {
2056    //
2057    // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2058    //  1: 0x90
2059    //  2: 0x66 0x90
2060    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2061    //  4: 0x0F 0x1F 0x40 0x00
2062    //  5: 0x0F 0x1F 0x44 0x00 0x00
2063    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2064    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2065    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2066    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2067    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2068    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2069
2070    // The rest coding is AMD specific - use consecutive address nops
2071
2072    // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2073    // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2074    // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2075    // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2076    // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2077    //     Size prefixes (0x66) are added for larger sizes
2078
2079    while(i >= 22) {
2080      i -= 11;
2081      emit_byte(0x66); // size prefix
2082      emit_byte(0x66); // size prefix
2083      emit_byte(0x66); // size prefix
2084      addr_nop_8();
2085    }
2086    // Generate first nop for size between 21-12
2087    switch (i) {
2088      case 21:
2089        i -= 1;
2090        emit_byte(0x66); // size prefix
2091      case 20:
2092      case 19:
2093        i -= 1;
2094        emit_byte(0x66); // size prefix
2095      case 18:
2096      case 17:
2097        i -= 1;
2098        emit_byte(0x66); // size prefix
2099      case 16:
2100      case 15:
2101        i -= 8;
2102        addr_nop_8();
2103        break;
2104      case 14:
2105      case 13:
2106        i -= 7;
2107        addr_nop_7();
2108        break;
2109      case 12:
2110        i -= 6;
2111        emit_byte(0x66); // size prefix
2112        addr_nop_5();
2113        break;
2114      default:
2115        assert(i < 12, " ");
2116    }
2117
2118    // Generate second nop for size between 11-1
2119    switch (i) {
2120      case 11:
2121        emit_byte(0x66); // size prefix
2122      case 10:
2123        emit_byte(0x66); // size prefix
2124      case 9:
2125        emit_byte(0x66); // size prefix
2126      case 8:
2127        addr_nop_8();
2128        break;
2129      case 7:
2130        addr_nop_7();
2131        break;
2132      case 6:
2133        emit_byte(0x66); // size prefix
2134      case 5:
2135        addr_nop_5();
2136        break;
2137      case 4:
2138        addr_nop_4();
2139        break;
2140      case 3:
2141        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2142        emit_byte(0x66); // size prefix
2143      case 2:
2144        emit_byte(0x66); // size prefix
2145      case 1:
2146        emit_byte(0x90); // nop
2147        break;
2148      default:
2149        assert(i == 0, " ");
2150    }
2151    return;
2152  }
2153
2154  // Using nops with size prefixes "0x66 0x90".
2155  // From AMD Optimization Guide:
2156  //  1: 0x90
2157  //  2: 0x66 0x90
2158  //  3: 0x66 0x66 0x90
2159  //  4: 0x66 0x66 0x66 0x90
2160  //  5: 0x66 0x66 0x90 0x66 0x90
2161  //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2162  //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2163  //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2164  //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2165  // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2166  //
2167  while(i > 12) {
2168    i -= 4;
2169    emit_byte(0x66); // size prefix
2170    emit_byte(0x66);
2171    emit_byte(0x66);
2172    emit_byte(0x90); // nop
2173  }
2174  // 1 - 12 nops
2175  if(i > 8) {
2176    if(i > 9) {
2177      i -= 1;
2178      emit_byte(0x66);
2179    }
2180    i -= 3;
2181    emit_byte(0x66);
2182    emit_byte(0x66);
2183    emit_byte(0x90);
2184  }
2185  // 1 - 8 nops
2186  if(i > 4) {
2187    if(i > 6) {
2188      i -= 1;
2189      emit_byte(0x66);
2190    }
2191    i -= 3;
2192    emit_byte(0x66);
2193    emit_byte(0x66);
2194    emit_byte(0x90);
2195  }
2196  switch (i) {
2197    case 4:
2198      emit_byte(0x66);
2199    case 3:
2200      emit_byte(0x66);
2201    case 2:
2202      emit_byte(0x66);
2203    case 1:
2204      emit_byte(0x90);
2205      break;
2206    default:
2207      assert(i == 0, " ");
2208  }
2209}
2210
2211void Assembler::notl(Register dst) {
2212  int encode = prefix_and_encode(dst->encoding());
2213  emit_byte(0xF7);
2214  emit_byte(0xD0 | encode );
2215}
2216
2217void Assembler::orl(Address dst, int32_t imm32) {
2218  InstructionMark im(this);
2219  prefix(dst);
2220  emit_arith_operand(0x81, rcx, dst, imm32);
2221}
2222
2223void Assembler::orl(Register dst, int32_t imm32) {
2224  prefix(dst);
2225  emit_arith(0x81, 0xC8, dst, imm32);
2226}
2227
2228void Assembler::orl(Register dst, Address src) {
2229  InstructionMark im(this);
2230  prefix(src, dst);
2231  emit_byte(0x0B);
2232  emit_operand(dst, src);
2233}
2234
2235void Assembler::orl(Register dst, Register src) {
2236  (void) prefix_and_encode(dst->encoding(), src->encoding());
2237  emit_arith(0x0B, 0xC0, dst, src);
2238}
2239
2240void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2241  assert(VM_Version::supports_sse4_2(), "");
2242
2243  InstructionMark im(this);
2244  emit_byte(0x66);
2245  prefix(src, dst);
2246  emit_byte(0x0F);
2247  emit_byte(0x3A);
2248  emit_byte(0x61);
2249  emit_operand(dst, src);
2250  emit_byte(imm8);
2251}
2252
2253void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2254  assert(VM_Version::supports_sse4_2(), "");
2255
2256  emit_byte(0x66);
2257  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2258  emit_byte(0x0F);
2259  emit_byte(0x3A);
2260  emit_byte(0x61);
2261  emit_byte(0xC0 | encode);
2262  emit_byte(imm8);
2263}
2264
2265// generic
2266void Assembler::pop(Register dst) {
2267  int encode = prefix_and_encode(dst->encoding());
2268  emit_byte(0x58 | encode);
2269}
2270
2271void Assembler::popcntl(Register dst, Address src) {
2272  assert(VM_Version::supports_popcnt(), "must support");
2273  InstructionMark im(this);
2274  emit_byte(0xF3);
2275  prefix(src, dst);
2276  emit_byte(0x0F);
2277  emit_byte(0xB8);
2278  emit_operand(dst, src);
2279}
2280
2281void Assembler::popcntl(Register dst, Register src) {
2282  assert(VM_Version::supports_popcnt(), "must support");
2283  emit_byte(0xF3);
2284  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2285  emit_byte(0x0F);
2286  emit_byte(0xB8);
2287  emit_byte(0xC0 | encode);
2288}
2289
2290void Assembler::popf() {
2291  emit_byte(0x9D);
2292}
2293
2294#ifndef _LP64 // no 32bit push/pop on amd64
2295void Assembler::popl(Address dst) {
2296  // NOTE: this will adjust stack by 8byte on 64bits
2297  InstructionMark im(this);
2298  prefix(dst);
2299  emit_byte(0x8F);
2300  emit_operand(rax, dst);
2301}
2302#endif
2303
2304void Assembler::prefetch_prefix(Address src) {
2305  prefix(src);
2306  emit_byte(0x0F);
2307}
2308
2309void Assembler::prefetchnta(Address src) {
2310  NOT_LP64(assert(VM_Version::supports_sse2(), "must support"));
2311  InstructionMark im(this);
2312  prefetch_prefix(src);
2313  emit_byte(0x18);
2314  emit_operand(rax, src); // 0, src
2315}
2316
2317void Assembler::prefetchr(Address src) {
2318  NOT_LP64(assert(VM_Version::supports_3dnow_prefetch(), "must support"));
2319  InstructionMark im(this);
2320  prefetch_prefix(src);
2321  emit_byte(0x0D);
2322  emit_operand(rax, src); // 0, src
2323}
2324
2325void Assembler::prefetcht0(Address src) {
2326  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2327  InstructionMark im(this);
2328  prefetch_prefix(src);
2329  emit_byte(0x18);
2330  emit_operand(rcx, src); // 1, src
2331}
2332
2333void Assembler::prefetcht1(Address src) {
2334  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2335  InstructionMark im(this);
2336  prefetch_prefix(src);
2337  emit_byte(0x18);
2338  emit_operand(rdx, src); // 2, src
2339}
2340
2341void Assembler::prefetcht2(Address src) {
2342  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2343  InstructionMark im(this);
2344  prefetch_prefix(src);
2345  emit_byte(0x18);
2346  emit_operand(rbx, src); // 3, src
2347}
2348
2349void Assembler::prefetchw(Address src) {
2350  NOT_LP64(assert(VM_Version::supports_3dnow_prefetch(), "must support"));
2351  InstructionMark im(this);
2352  prefetch_prefix(src);
2353  emit_byte(0x0D);
2354  emit_operand(rcx, src); // 1, src
2355}
2356
2357void Assembler::prefix(Prefix p) {
2358  a_byte(p);
2359}
2360
2361void Assembler::por(XMMRegister dst, XMMRegister src) {
2362  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2363
2364  emit_byte(0x66);
2365  int  encode = prefix_and_encode(dst->encoding(), src->encoding());
2366  emit_byte(0x0F);
2367
2368  emit_byte(0xEB);
2369  emit_byte(0xC0 | encode);
2370}
2371
2372void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2373  assert(isByte(mode), "invalid value");
2374  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2375
2376  emit_byte(0x66);
2377  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2378  emit_byte(0x0F);
2379  emit_byte(0x70);
2380  emit_byte(0xC0 | encode);
2381  emit_byte(mode & 0xFF);
2382
2383}
2384
2385void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2386  assert(isByte(mode), "invalid value");
2387  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2388
2389  InstructionMark im(this);
2390  emit_byte(0x66);
2391  prefix(src, dst);
2392  emit_byte(0x0F);
2393  emit_byte(0x70);
2394  emit_operand(dst, src);
2395  emit_byte(mode & 0xFF);
2396}
2397
2398void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2399  assert(isByte(mode), "invalid value");
2400  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2401
2402  emit_byte(0xF2);
2403  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2404  emit_byte(0x0F);
2405  emit_byte(0x70);
2406  emit_byte(0xC0 | encode);
2407  emit_byte(mode & 0xFF);
2408}
2409
2410void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2411  assert(isByte(mode), "invalid value");
2412  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2413
2414  InstructionMark im(this);
2415  emit_byte(0xF2);
2416  prefix(src, dst); // QQ new
2417  emit_byte(0x0F);
2418  emit_byte(0x70);
2419  emit_operand(dst, src);
2420  emit_byte(mode & 0xFF);
2421}
2422
2423void Assembler::psrlq(XMMRegister dst, int shift) {
2424  // Shift 64 bit value logically right by specified number of bits.
2425  // HMM Table D-1 says sse2 or mmx.
2426  // Do not confuse it with psrldq SSE2 instruction which
2427  // shifts 128 bit value in xmm register by number of bytes.
2428  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2429
2430  int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
2431  emit_byte(0x66);
2432  emit_byte(0x0F);
2433  emit_byte(0x73);
2434  emit_byte(0xC0 | encode);
2435  emit_byte(shift);
2436}
2437
2438void Assembler::psrldq(XMMRegister dst, int shift) {
2439  // Shift 128 bit value in xmm register by number of bytes.
2440  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2441
2442  int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding());
2443  emit_byte(0x66);
2444  emit_byte(0x0F);
2445  emit_byte(0x73);
2446  emit_byte(0xC0 | encode);
2447  emit_byte(shift);
2448}
2449
2450void Assembler::ptest(XMMRegister dst, Address src) {
2451  assert(VM_Version::supports_sse4_1(), "");
2452
2453  InstructionMark im(this);
2454  emit_byte(0x66);
2455  prefix(src, dst);
2456  emit_byte(0x0F);
2457  emit_byte(0x38);
2458  emit_byte(0x17);
2459  emit_operand(dst, src);
2460}
2461
2462void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2463  assert(VM_Version::supports_sse4_1(), "");
2464
2465  emit_byte(0x66);
2466  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2467  emit_byte(0x0F);
2468  emit_byte(0x38);
2469  emit_byte(0x17);
2470  emit_byte(0xC0 | encode);
2471}
2472
2473void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2474  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2475  emit_byte(0x66);
2476  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2477  emit_byte(0x0F);
2478  emit_byte(0x60);
2479  emit_byte(0xC0 | encode);
2480}
2481
2482void Assembler::push(int32_t imm32) {
2483  // in 64bits we push 64bits onto the stack but only
2484  // take a 32bit immediate
2485  emit_byte(0x68);
2486  emit_long(imm32);
2487}
2488
2489void Assembler::push(Register src) {
2490  int encode = prefix_and_encode(src->encoding());
2491
2492  emit_byte(0x50 | encode);
2493}
2494
2495void Assembler::pushf() {
2496  emit_byte(0x9C);
2497}
2498
2499#ifndef _LP64 // no 32bit push/pop on amd64
2500void Assembler::pushl(Address src) {
2501  // Note this will push 64bit on 64bit
2502  InstructionMark im(this);
2503  prefix(src);
2504  emit_byte(0xFF);
2505  emit_operand(rsi, src);
2506}
2507#endif
2508
2509void Assembler::pxor(XMMRegister dst, Address src) {
2510  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2511  InstructionMark im(this);
2512  emit_byte(0x66);
2513  prefix(src, dst);
2514  emit_byte(0x0F);
2515  emit_byte(0xEF);
2516  emit_operand(dst, src);
2517}
2518
2519void Assembler::pxor(XMMRegister dst, XMMRegister src) {
2520  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2521  InstructionMark im(this);
2522  emit_byte(0x66);
2523  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2524  emit_byte(0x0F);
2525  emit_byte(0xEF);
2526  emit_byte(0xC0 | encode);
2527}
2528
2529void Assembler::rcll(Register dst, int imm8) {
2530  assert(isShiftCount(imm8), "illegal shift count");
2531  int encode = prefix_and_encode(dst->encoding());
2532  if (imm8 == 1) {
2533    emit_byte(0xD1);
2534    emit_byte(0xD0 | encode);
2535  } else {
2536    emit_byte(0xC1);
2537    emit_byte(0xD0 | encode);
2538    emit_byte(imm8);
2539  }
2540}
2541
2542// copies data from [esi] to [edi] using rcx pointer sized words
2543// generic
2544void Assembler::rep_mov() {
2545  emit_byte(0xF3);
2546  // MOVSQ
2547  LP64_ONLY(prefix(REX_W));
2548  emit_byte(0xA5);
2549}
2550
2551// sets rcx pointer sized words with rax, value at [edi]
2552// generic
2553void Assembler::rep_set() { // rep_set
2554  emit_byte(0xF3);
2555  // STOSQ
2556  LP64_ONLY(prefix(REX_W));
2557  emit_byte(0xAB);
2558}
2559
2560// scans rcx pointer sized words at [edi] for occurance of rax,
2561// generic
2562void Assembler::repne_scan() { // repne_scan
2563  emit_byte(0xF2);
2564  // SCASQ
2565  LP64_ONLY(prefix(REX_W));
2566  emit_byte(0xAF);
2567}
2568
2569#ifdef _LP64
2570// scans rcx 4 byte words at [edi] for occurance of rax,
2571// generic
2572void Assembler::repne_scanl() { // repne_scan
2573  emit_byte(0xF2);
2574  // SCASL
2575  emit_byte(0xAF);
2576}
2577#endif
2578
2579void Assembler::ret(int imm16) {
2580  if (imm16 == 0) {
2581    emit_byte(0xC3);
2582  } else {
2583    emit_byte(0xC2);
2584    emit_word(imm16);
2585  }
2586}
2587
2588void Assembler::sahf() {
2589#ifdef _LP64
2590  // Not supported in 64bit mode
2591  ShouldNotReachHere();
2592#endif
2593  emit_byte(0x9E);
2594}
2595
2596void Assembler::sarl(Register dst, int imm8) {
2597  int encode = prefix_and_encode(dst->encoding());
2598  assert(isShiftCount(imm8), "illegal shift count");
2599  if (imm8 == 1) {
2600    emit_byte(0xD1);
2601    emit_byte(0xF8 | encode);
2602  } else {
2603    emit_byte(0xC1);
2604    emit_byte(0xF8 | encode);
2605    emit_byte(imm8);
2606  }
2607}
2608
2609void Assembler::sarl(Register dst) {
2610  int encode = prefix_and_encode(dst->encoding());
2611  emit_byte(0xD3);
2612  emit_byte(0xF8 | encode);
2613}
2614
2615void Assembler::sbbl(Address dst, int32_t imm32) {
2616  InstructionMark im(this);
2617  prefix(dst);
2618  emit_arith_operand(0x81, rbx, dst, imm32);
2619}
2620
2621void Assembler::sbbl(Register dst, int32_t imm32) {
2622  prefix(dst);
2623  emit_arith(0x81, 0xD8, dst, imm32);
2624}
2625
2626
2627void Assembler::sbbl(Register dst, Address src) {
2628  InstructionMark im(this);
2629  prefix(src, dst);
2630  emit_byte(0x1B);
2631  emit_operand(dst, src);
2632}
2633
2634void Assembler::sbbl(Register dst, Register src) {
2635  (void) prefix_and_encode(dst->encoding(), src->encoding());
2636  emit_arith(0x1B, 0xC0, dst, src);
2637}
2638
2639void Assembler::setb(Condition cc, Register dst) {
2640  assert(0 <= cc && cc < 16, "illegal cc");
2641  int encode = prefix_and_encode(dst->encoding(), true);
2642  emit_byte(0x0F);
2643  emit_byte(0x90 | cc);
2644  emit_byte(0xC0 | encode);
2645}
2646
2647void Assembler::shll(Register dst, int imm8) {
2648  assert(isShiftCount(imm8), "illegal shift count");
2649  int encode = prefix_and_encode(dst->encoding());
2650  if (imm8 == 1 ) {
2651    emit_byte(0xD1);
2652    emit_byte(0xE0 | encode);
2653  } else {
2654    emit_byte(0xC1);
2655    emit_byte(0xE0 | encode);
2656    emit_byte(imm8);
2657  }
2658}
2659
2660void Assembler::shll(Register dst) {
2661  int encode = prefix_and_encode(dst->encoding());
2662  emit_byte(0xD3);
2663  emit_byte(0xE0 | encode);
2664}
2665
2666void Assembler::shrl(Register dst, int imm8) {
2667  assert(isShiftCount(imm8), "illegal shift count");
2668  int encode = prefix_and_encode(dst->encoding());
2669  emit_byte(0xC1);
2670  emit_byte(0xE8 | encode);
2671  emit_byte(imm8);
2672}
2673
2674void Assembler::shrl(Register dst) {
2675  int encode = prefix_and_encode(dst->encoding());
2676  emit_byte(0xD3);
2677  emit_byte(0xE8 | encode);
2678}
2679
2680// copies a single word from [esi] to [edi]
2681void Assembler::smovl() {
2682  emit_byte(0xA5);
2683}
2684
2685void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2686  // HMM Table D-1 says sse2
2687  // NOT_LP64(assert(VM_Version::supports_sse(), ""));
2688  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2689  emit_byte(0xF2);
2690  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2691  emit_byte(0x0F);
2692  emit_byte(0x51);
2693  emit_byte(0xC0 | encode);
2694}
2695
2696void Assembler::sqrtsd(XMMRegister dst, Address src) {
2697  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2698  InstructionMark im(this);
2699  emit_byte(0xF2);
2700  prefix(src, dst);
2701  emit_byte(0x0F);
2702  emit_byte(0x51);
2703  emit_operand(dst, src);
2704}
2705
2706void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
2707  // HMM Table D-1 says sse2
2708  // NOT_LP64(assert(VM_Version::supports_sse(), ""));
2709  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2710  emit_byte(0xF3);
2711  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2712  emit_byte(0x0F);
2713  emit_byte(0x51);
2714  emit_byte(0xC0 | encode);
2715}
2716
2717void Assembler::sqrtss(XMMRegister dst, Address src) {
2718  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2719  InstructionMark im(this);
2720  emit_byte(0xF3);
2721  prefix(src, dst);
2722  emit_byte(0x0F);
2723  emit_byte(0x51);
2724  emit_operand(dst, src);
2725}
2726
2727void Assembler::stmxcsr( Address dst) {
2728  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2729  InstructionMark im(this);
2730  prefix(dst);
2731  emit_byte(0x0F);
2732  emit_byte(0xAE);
2733  emit_operand(as_Register(3), dst);
2734}
2735
2736void Assembler::subl(Address dst, int32_t imm32) {
2737  InstructionMark im(this);
2738  prefix(dst);
2739  emit_arith_operand(0x81, rbp, dst, imm32);
2740}
2741
2742void Assembler::subl(Address dst, Register src) {
2743  InstructionMark im(this);
2744  prefix(dst, src);
2745  emit_byte(0x29);
2746  emit_operand(src, dst);
2747}
2748
2749void Assembler::subl(Register dst, int32_t imm32) {
2750  prefix(dst);
2751  emit_arith(0x81, 0xE8, dst, imm32);
2752}
2753
2754void Assembler::subl(Register dst, Address src) {
2755  InstructionMark im(this);
2756  prefix(src, dst);
2757  emit_byte(0x2B);
2758  emit_operand(dst, src);
2759}
2760
2761void Assembler::subl(Register dst, Register src) {
2762  (void) prefix_and_encode(dst->encoding(), src->encoding());
2763  emit_arith(0x2B, 0xC0, dst, src);
2764}
2765
2766void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2767  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2768  emit_byte(0xF2);
2769  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2770  emit_byte(0x0F);
2771  emit_byte(0x5C);
2772  emit_byte(0xC0 | encode);
2773}
2774
2775void Assembler::subsd(XMMRegister dst, Address src) {
2776  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2777  InstructionMark im(this);
2778  emit_byte(0xF2);
2779  prefix(src, dst);
2780  emit_byte(0x0F);
2781  emit_byte(0x5C);
2782  emit_operand(dst, src);
2783}
2784
2785void Assembler::subss(XMMRegister dst, XMMRegister src) {
2786  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2787  emit_byte(0xF3);
2788  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2789  emit_byte(0x0F);
2790  emit_byte(0x5C);
2791  emit_byte(0xC0 | encode);
2792}
2793
2794void Assembler::subss(XMMRegister dst, Address src) {
2795  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2796  InstructionMark im(this);
2797  emit_byte(0xF3);
2798  prefix(src, dst);
2799  emit_byte(0x0F);
2800  emit_byte(0x5C);
2801  emit_operand(dst, src);
2802}
2803
2804void Assembler::testb(Register dst, int imm8) {
2805  NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2806  (void) prefix_and_encode(dst->encoding(), true);
2807  emit_arith_b(0xF6, 0xC0, dst, imm8);
2808}
2809
2810void Assembler::testl(Register dst, int32_t imm32) {
2811  // not using emit_arith because test
2812  // doesn't support sign-extension of
2813  // 8bit operands
2814  int encode = dst->encoding();
2815  if (encode == 0) {
2816    emit_byte(0xA9);
2817  } else {
2818    encode = prefix_and_encode(encode);
2819    emit_byte(0xF7);
2820    emit_byte(0xC0 | encode);
2821  }
2822  emit_long(imm32);
2823}
2824
2825void Assembler::testl(Register dst, Register src) {
2826  (void) prefix_and_encode(dst->encoding(), src->encoding());
2827  emit_arith(0x85, 0xC0, dst, src);
2828}
2829
2830void Assembler::testl(Register dst, Address  src) {
2831  InstructionMark im(this);
2832  prefix(src, dst);
2833  emit_byte(0x85);
2834  emit_operand(dst, src);
2835}
2836
2837void Assembler::ucomisd(XMMRegister dst, Address src) {
2838  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2839  emit_byte(0x66);
2840  ucomiss(dst, src);
2841}
2842
2843void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2844  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2845  emit_byte(0x66);
2846  ucomiss(dst, src);
2847}
2848
2849void Assembler::ucomiss(XMMRegister dst, Address src) {
2850  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2851
2852  InstructionMark im(this);
2853  prefix(src, dst);
2854  emit_byte(0x0F);
2855  emit_byte(0x2E);
2856  emit_operand(dst, src);
2857}
2858
2859void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2860  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2861  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2862  emit_byte(0x0F);
2863  emit_byte(0x2E);
2864  emit_byte(0xC0 | encode);
2865}
2866
2867
2868void Assembler::xaddl(Address dst, Register src) {
2869  InstructionMark im(this);
2870  prefix(dst, src);
2871  emit_byte(0x0F);
2872  emit_byte(0xC1);
2873  emit_operand(src, dst);
2874}
2875
2876void Assembler::xchgl(Register dst, Address src) { // xchg
2877  InstructionMark im(this);
2878  prefix(src, dst);
2879  emit_byte(0x87);
2880  emit_operand(dst, src);
2881}
2882
2883void Assembler::xchgl(Register dst, Register src) {
2884  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2885  emit_byte(0x87);
2886  emit_byte(0xc0 | encode);
2887}
2888
2889void Assembler::xorl(Register dst, int32_t imm32) {
2890  prefix(dst);
2891  emit_arith(0x81, 0xF0, dst, imm32);
2892}
2893
2894void Assembler::xorl(Register dst, Address src) {
2895  InstructionMark im(this);
2896  prefix(src, dst);
2897  emit_byte(0x33);
2898  emit_operand(dst, src);
2899}
2900
2901void Assembler::xorl(Register dst, Register src) {
2902  (void) prefix_and_encode(dst->encoding(), src->encoding());
2903  emit_arith(0x33, 0xC0, dst, src);
2904}
2905
2906void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
2907  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2908  emit_byte(0x66);
2909  xorps(dst, src);
2910}
2911
2912void Assembler::xorpd(XMMRegister dst, Address src) {
2913  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2914  InstructionMark im(this);
2915  emit_byte(0x66);
2916  prefix(src, dst);
2917  emit_byte(0x0F);
2918  emit_byte(0x57);
2919  emit_operand(dst, src);
2920}
2921
2922
2923void Assembler::xorps(XMMRegister dst, XMMRegister src) {
2924  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2925  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2926  emit_byte(0x0F);
2927  emit_byte(0x57);
2928  emit_byte(0xC0 | encode);
2929}
2930
2931void Assembler::xorps(XMMRegister dst, Address src) {
2932  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2933  InstructionMark im(this);
2934  prefix(src, dst);
2935  emit_byte(0x0F);
2936  emit_byte(0x57);
2937  emit_operand(dst, src);
2938}
2939
2940#ifndef _LP64
2941// 32bit only pieces of the assembler
2942
2943void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
2944  // NO PREFIX AS NEVER 64BIT
2945  InstructionMark im(this);
2946  emit_byte(0x81);
2947  emit_byte(0xF8 | src1->encoding());
2948  emit_data(imm32, rspec, 0);
2949}
2950
2951void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
2952  // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
2953  InstructionMark im(this);
2954  emit_byte(0x81);
2955  emit_operand(rdi, src1);
2956  emit_data(imm32, rspec, 0);
2957}
2958
2959// The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
2960// and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
2961// into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
2962void Assembler::cmpxchg8(Address adr) {
2963  InstructionMark im(this);
2964  emit_byte(0x0F);
2965  emit_byte(0xc7);
2966  emit_operand(rcx, adr);
2967}
2968
2969void Assembler::decl(Register dst) {
2970  // Don't use it directly. Use MacroAssembler::decrementl() instead.
2971 emit_byte(0x48 | dst->encoding());
2972}
2973
2974#endif // _LP64
2975
2976// 64bit typically doesn't use the x87 but needs to for the trig funcs
2977
2978void Assembler::fabs() {
2979  emit_byte(0xD9);
2980  emit_byte(0xE1);
2981}
2982
2983void Assembler::fadd(int i) {
2984  emit_farith(0xD8, 0xC0, i);
2985}
2986
2987void Assembler::fadd_d(Address src) {
2988  InstructionMark im(this);
2989  emit_byte(0xDC);
2990  emit_operand32(rax, src);
2991}
2992
2993void Assembler::fadd_s(Address src) {
2994  InstructionMark im(this);
2995  emit_byte(0xD8);
2996  emit_operand32(rax, src);
2997}
2998
2999void Assembler::fadda(int i) {
3000  emit_farith(0xDC, 0xC0, i);
3001}
3002
3003void Assembler::faddp(int i) {
3004  emit_farith(0xDE, 0xC0, i);
3005}
3006
3007void Assembler::fchs() {
3008  emit_byte(0xD9);
3009  emit_byte(0xE0);
3010}
3011
3012void Assembler::fcom(int i) {
3013  emit_farith(0xD8, 0xD0, i);
3014}
3015
3016void Assembler::fcomp(int i) {
3017  emit_farith(0xD8, 0xD8, i);
3018}
3019
3020void Assembler::fcomp_d(Address src) {
3021  InstructionMark im(this);
3022  emit_byte(0xDC);
3023  emit_operand32(rbx, src);
3024}
3025
3026void Assembler::fcomp_s(Address src) {
3027  InstructionMark im(this);
3028  emit_byte(0xD8);
3029  emit_operand32(rbx, src);
3030}
3031
3032void Assembler::fcompp() {
3033  emit_byte(0xDE);
3034  emit_byte(0xD9);
3035}
3036
3037void Assembler::fcos() {
3038  emit_byte(0xD9);
3039  emit_byte(0xFF);
3040}
3041
3042void Assembler::fdecstp() {
3043  emit_byte(0xD9);
3044  emit_byte(0xF6);
3045}
3046
3047void Assembler::fdiv(int i) {
3048  emit_farith(0xD8, 0xF0, i);
3049}
3050
3051void Assembler::fdiv_d(Address src) {
3052  InstructionMark im(this);
3053  emit_byte(0xDC);
3054  emit_operand32(rsi, src);
3055}
3056
3057void Assembler::fdiv_s(Address src) {
3058  InstructionMark im(this);
3059  emit_byte(0xD8);
3060  emit_operand32(rsi, src);
3061}
3062
3063void Assembler::fdiva(int i) {
3064  emit_farith(0xDC, 0xF8, i);
3065}
3066
3067// Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
3068//       is erroneous for some of the floating-point instructions below.
3069
3070void Assembler::fdivp(int i) {
3071  emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
3072}
3073
3074void Assembler::fdivr(int i) {
3075  emit_farith(0xD8, 0xF8, i);
3076}
3077
3078void Assembler::fdivr_d(Address src) {
3079  InstructionMark im(this);
3080  emit_byte(0xDC);
3081  emit_operand32(rdi, src);
3082}
3083
3084void Assembler::fdivr_s(Address src) {
3085  InstructionMark im(this);
3086  emit_byte(0xD8);
3087  emit_operand32(rdi, src);
3088}
3089
3090void Assembler::fdivra(int i) {
3091  emit_farith(0xDC, 0xF0, i);
3092}
3093
3094void Assembler::fdivrp(int i) {
3095  emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
3096}
3097
3098void Assembler::ffree(int i) {
3099  emit_farith(0xDD, 0xC0, i);
3100}
3101
3102void Assembler::fild_d(Address adr) {
3103  InstructionMark im(this);
3104  emit_byte(0xDF);
3105  emit_operand32(rbp, adr);
3106}
3107
3108void Assembler::fild_s(Address adr) {
3109  InstructionMark im(this);
3110  emit_byte(0xDB);
3111  emit_operand32(rax, adr);
3112}
3113
3114void Assembler::fincstp() {
3115  emit_byte(0xD9);
3116  emit_byte(0xF7);
3117}
3118
3119void Assembler::finit() {
3120  emit_byte(0x9B);
3121  emit_byte(0xDB);
3122  emit_byte(0xE3);
3123}
3124
3125void Assembler::fist_s(Address adr) {
3126  InstructionMark im(this);
3127  emit_byte(0xDB);
3128  emit_operand32(rdx, adr);
3129}
3130
3131void Assembler::fistp_d(Address adr) {
3132  InstructionMark im(this);
3133  emit_byte(0xDF);
3134  emit_operand32(rdi, adr);
3135}
3136
3137void Assembler::fistp_s(Address adr) {
3138  InstructionMark im(this);
3139  emit_byte(0xDB);
3140  emit_operand32(rbx, adr);
3141}
3142
3143void Assembler::fld1() {
3144  emit_byte(0xD9);
3145  emit_byte(0xE8);
3146}
3147
3148void Assembler::fld_d(Address adr) {
3149  InstructionMark im(this);
3150  emit_byte(0xDD);
3151  emit_operand32(rax, adr);
3152}
3153
3154void Assembler::fld_s(Address adr) {
3155  InstructionMark im(this);
3156  emit_byte(0xD9);
3157  emit_operand32(rax, adr);
3158}
3159
3160
3161void Assembler::fld_s(int index) {
3162  emit_farith(0xD9, 0xC0, index);
3163}
3164
3165void Assembler::fld_x(Address adr) {
3166  InstructionMark im(this);
3167  emit_byte(0xDB);
3168  emit_operand32(rbp, adr);
3169}
3170
3171void Assembler::fldcw(Address src) {
3172  InstructionMark im(this);
3173  emit_byte(0xd9);
3174  emit_operand32(rbp, src);
3175}
3176
3177void Assembler::fldenv(Address src) {
3178  InstructionMark im(this);
3179  emit_byte(0xD9);
3180  emit_operand32(rsp, src);
3181}
3182
3183void Assembler::fldlg2() {
3184  emit_byte(0xD9);
3185  emit_byte(0xEC);
3186}
3187
3188void Assembler::fldln2() {
3189  emit_byte(0xD9);
3190  emit_byte(0xED);
3191}
3192
3193void Assembler::fldz() {
3194  emit_byte(0xD9);
3195  emit_byte(0xEE);
3196}
3197
3198void Assembler::flog() {
3199  fldln2();
3200  fxch();
3201  fyl2x();
3202}
3203
3204void Assembler::flog10() {
3205  fldlg2();
3206  fxch();
3207  fyl2x();
3208}
3209
3210void Assembler::fmul(int i) {
3211  emit_farith(0xD8, 0xC8, i);
3212}
3213
3214void Assembler::fmul_d(Address src) {
3215  InstructionMark im(this);
3216  emit_byte(0xDC);
3217  emit_operand32(rcx, src);
3218}
3219
3220void Assembler::fmul_s(Address src) {
3221  InstructionMark im(this);
3222  emit_byte(0xD8);
3223  emit_operand32(rcx, src);
3224}
3225
3226void Assembler::fmula(int i) {
3227  emit_farith(0xDC, 0xC8, i);
3228}
3229
3230void Assembler::fmulp(int i) {
3231  emit_farith(0xDE, 0xC8, i);
3232}
3233
3234void Assembler::fnsave(Address dst) {
3235  InstructionMark im(this);
3236  emit_byte(0xDD);
3237  emit_operand32(rsi, dst);
3238}
3239
3240void Assembler::fnstcw(Address src) {
3241  InstructionMark im(this);
3242  emit_byte(0x9B);
3243  emit_byte(0xD9);
3244  emit_operand32(rdi, src);
3245}
3246
3247void Assembler::fnstsw_ax() {
3248  emit_byte(0xdF);
3249  emit_byte(0xE0);
3250}
3251
3252void Assembler::fprem() {
3253  emit_byte(0xD9);
3254  emit_byte(0xF8);
3255}
3256
3257void Assembler::fprem1() {
3258  emit_byte(0xD9);
3259  emit_byte(0xF5);
3260}
3261
3262void Assembler::frstor(Address src) {
3263  InstructionMark im(this);
3264  emit_byte(0xDD);
3265  emit_operand32(rsp, src);
3266}
3267
3268void Assembler::fsin() {
3269  emit_byte(0xD9);
3270  emit_byte(0xFE);
3271}
3272
3273void Assembler::fsqrt() {
3274  emit_byte(0xD9);
3275  emit_byte(0xFA);
3276}
3277
3278void Assembler::fst_d(Address adr) {
3279  InstructionMark im(this);
3280  emit_byte(0xDD);
3281  emit_operand32(rdx, adr);
3282}
3283
3284void Assembler::fst_s(Address adr) {
3285  InstructionMark im(this);
3286  emit_byte(0xD9);
3287  emit_operand32(rdx, adr);
3288}
3289
3290void Assembler::fstp_d(Address adr) {
3291  InstructionMark im(this);
3292  emit_byte(0xDD);
3293  emit_operand32(rbx, adr);
3294}
3295
3296void Assembler::fstp_d(int index) {
3297  emit_farith(0xDD, 0xD8, index);
3298}
3299
3300void Assembler::fstp_s(Address adr) {
3301  InstructionMark im(this);
3302  emit_byte(0xD9);
3303  emit_operand32(rbx, adr);
3304}
3305
3306void Assembler::fstp_x(Address adr) {
3307  InstructionMark im(this);
3308  emit_byte(0xDB);
3309  emit_operand32(rdi, adr);
3310}
3311
3312void Assembler::fsub(int i) {
3313  emit_farith(0xD8, 0xE0, i);
3314}
3315
3316void Assembler::fsub_d(Address src) {
3317  InstructionMark im(this);
3318  emit_byte(0xDC);
3319  emit_operand32(rsp, src);
3320}
3321
3322void Assembler::fsub_s(Address src) {
3323  InstructionMark im(this);
3324  emit_byte(0xD8);
3325  emit_operand32(rsp, src);
3326}
3327
3328void Assembler::fsuba(int i) {
3329  emit_farith(0xDC, 0xE8, i);
3330}
3331
3332void Assembler::fsubp(int i) {
3333  emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
3334}
3335
3336void Assembler::fsubr(int i) {
3337  emit_farith(0xD8, 0xE8, i);
3338}
3339
3340void Assembler::fsubr_d(Address src) {
3341  InstructionMark im(this);
3342  emit_byte(0xDC);
3343  emit_operand32(rbp, src);
3344}
3345
3346void Assembler::fsubr_s(Address src) {
3347  InstructionMark im(this);
3348  emit_byte(0xD8);
3349  emit_operand32(rbp, src);
3350}
3351
3352void Assembler::fsubra(int i) {
3353  emit_farith(0xDC, 0xE0, i);
3354}
3355
3356void Assembler::fsubrp(int i) {
3357  emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
3358}
3359
3360void Assembler::ftan() {
3361  emit_byte(0xD9);
3362  emit_byte(0xF2);
3363  emit_byte(0xDD);
3364  emit_byte(0xD8);
3365}
3366
3367void Assembler::ftst() {
3368  emit_byte(0xD9);
3369  emit_byte(0xE4);
3370}
3371
3372void Assembler::fucomi(int i) {
3373  // make sure the instruction is supported (introduced for P6, together with cmov)
3374  guarantee(VM_Version::supports_cmov(), "illegal instruction");
3375  emit_farith(0xDB, 0xE8, i);
3376}
3377
3378void Assembler::fucomip(int i) {
3379  // make sure the instruction is supported (introduced for P6, together with cmov)
3380  guarantee(VM_Version::supports_cmov(), "illegal instruction");
3381  emit_farith(0xDF, 0xE8, i);
3382}
3383
3384void Assembler::fwait() {
3385  emit_byte(0x9B);
3386}
3387
3388void Assembler::fxch(int i) {
3389  emit_farith(0xD9, 0xC8, i);
3390}
3391
3392void Assembler::fyl2x() {
3393  emit_byte(0xD9);
3394  emit_byte(0xF1);
3395}
3396
3397
3398#ifndef _LP64
3399
3400void Assembler::incl(Register dst) {
3401  // Don't use it directly. Use MacroAssembler::incrementl() instead.
3402 emit_byte(0x40 | dst->encoding());
3403}
3404
3405void Assembler::lea(Register dst, Address src) {
3406  leal(dst, src);
3407}
3408
3409void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
3410  InstructionMark im(this);
3411  emit_byte(0xC7);
3412  emit_operand(rax, dst);
3413  emit_data((int)imm32, rspec, 0);
3414}
3415
3416void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
3417  InstructionMark im(this);
3418  int encode = prefix_and_encode(dst->encoding());
3419  emit_byte(0xB8 | encode);
3420  emit_data((int)imm32, rspec, 0);
3421}
3422
3423void Assembler::popa() { // 32bit
3424  emit_byte(0x61);
3425}
3426
3427void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
3428  InstructionMark im(this);
3429  emit_byte(0x68);
3430  emit_data(imm32, rspec, 0);
3431}
3432
3433void Assembler::pusha() { // 32bit
3434  emit_byte(0x60);
3435}
3436
3437void Assembler::set_byte_if_not_zero(Register dst) {
3438  emit_byte(0x0F);
3439  emit_byte(0x95);
3440  emit_byte(0xE0 | dst->encoding());
3441}
3442
3443void Assembler::shldl(Register dst, Register src) {
3444  emit_byte(0x0F);
3445  emit_byte(0xA5);
3446  emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3447}
3448
3449void Assembler::shrdl(Register dst, Register src) {
3450  emit_byte(0x0F);
3451  emit_byte(0xAD);
3452  emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3453}
3454
3455#else // LP64
3456
3457void Assembler::set_byte_if_not_zero(Register dst) {
3458  int enc = prefix_and_encode(dst->encoding(), true);
3459  emit_byte(0x0F);
3460  emit_byte(0x95);
3461  emit_byte(0xE0 | enc);
3462}
3463
3464// 64bit only pieces of the assembler
3465// This should only be used by 64bit instructions that can use rip-relative
3466// it cannot be used by instructions that want an immediate value.
3467
3468bool Assembler::reachable(AddressLiteral adr) {
3469  int64_t disp;
3470  // None will force a 64bit literal to the code stream. Likely a placeholder
3471  // for something that will be patched later and we need to certain it will
3472  // always be reachable.
3473  if (adr.reloc() == relocInfo::none) {
3474    return false;
3475  }
3476  if (adr.reloc() == relocInfo::internal_word_type) {
3477    // This should be rip relative and easily reachable.
3478    return true;
3479  }
3480  if (adr.reloc() == relocInfo::virtual_call_type ||
3481      adr.reloc() == relocInfo::opt_virtual_call_type ||
3482      adr.reloc() == relocInfo::static_call_type ||
3483      adr.reloc() == relocInfo::static_stub_type ) {
3484    // This should be rip relative within the code cache and easily
3485    // reachable until we get huge code caches. (At which point
3486    // ic code is going to have issues).
3487    return true;
3488  }
3489  if (adr.reloc() != relocInfo::external_word_type &&
3490      adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
3491      adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
3492      adr.reloc() != relocInfo::runtime_call_type ) {
3493    return false;
3494  }
3495
3496  // Stress the correction code
3497  if (ForceUnreachable) {
3498    // Must be runtimecall reloc, see if it is in the codecache
3499    // Flipping stuff in the codecache to be unreachable causes issues
3500    // with things like inline caches where the additional instructions
3501    // are not handled.
3502    if (CodeCache::find_blob(adr._target) == NULL) {
3503      return false;
3504    }
3505  }
3506  // For external_word_type/runtime_call_type if it is reachable from where we
3507  // are now (possibly a temp buffer) and where we might end up
3508  // anywhere in the codeCache then we are always reachable.
3509  // This would have to change if we ever save/restore shared code
3510  // to be more pessimistic.
3511  disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
3512  if (!is_simm32(disp)) return false;
3513  disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
3514  if (!is_simm32(disp)) return false;
3515
3516  disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
3517
3518  // Because rip relative is a disp + address_of_next_instruction and we
3519  // don't know the value of address_of_next_instruction we apply a fudge factor
3520  // to make sure we will be ok no matter the size of the instruction we get placed into.
3521  // We don't have to fudge the checks above here because they are already worst case.
3522
3523  // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
3524  // + 4 because better safe than sorry.
3525  const int fudge = 12 + 4;
3526  if (disp < 0) {
3527    disp -= fudge;
3528  } else {
3529    disp += fudge;
3530  }
3531  return is_simm32(disp);
3532}
3533
3534// Check if the polling page is not reachable from the code cache using rip-relative
3535// addressing.
3536bool Assembler::is_polling_page_far() {
3537  intptr_t addr = (intptr_t)os::get_polling_page();
3538  return !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
3539         !is_simm32(addr - (intptr_t)CodeCache::high_bound());
3540}
3541
3542void Assembler::emit_data64(jlong data,
3543                            relocInfo::relocType rtype,
3544                            int format) {
3545  if (rtype == relocInfo::none) {
3546    emit_long64(data);
3547  } else {
3548    emit_data64(data, Relocation::spec_simple(rtype), format);
3549  }
3550}
3551
3552void Assembler::emit_data64(jlong data,
3553                            RelocationHolder const& rspec,
3554                            int format) {
3555  assert(imm_operand == 0, "default format must be immediate in this file");
3556  assert(imm_operand == format, "must be immediate");
3557  assert(inst_mark() != NULL, "must be inside InstructionMark");
3558  // Do not use AbstractAssembler::relocate, which is not intended for
3559  // embedded words.  Instead, relocate to the enclosing instruction.
3560  code_section()->relocate(inst_mark(), rspec, format);
3561#ifdef ASSERT
3562  check_relocation(rspec, format);
3563#endif
3564  emit_long64(data);
3565}
3566
3567int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
3568  if (reg_enc >= 8) {
3569    prefix(REX_B);
3570    reg_enc -= 8;
3571  } else if (byteinst && reg_enc >= 4) {
3572    prefix(REX);
3573  }
3574  return reg_enc;
3575}
3576
3577int Assembler::prefixq_and_encode(int reg_enc) {
3578  if (reg_enc < 8) {
3579    prefix(REX_W);
3580  } else {
3581    prefix(REX_WB);
3582    reg_enc -= 8;
3583  }
3584  return reg_enc;
3585}
3586
3587int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
3588  if (dst_enc < 8) {
3589    if (src_enc >= 8) {
3590      prefix(REX_B);
3591      src_enc -= 8;
3592    } else if (byteinst && src_enc >= 4) {
3593      prefix(REX);
3594    }
3595  } else {
3596    if (src_enc < 8) {
3597      prefix(REX_R);
3598    } else {
3599      prefix(REX_RB);
3600      src_enc -= 8;
3601    }
3602    dst_enc -= 8;
3603  }
3604  return dst_enc << 3 | src_enc;
3605}
3606
3607int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
3608  if (dst_enc < 8) {
3609    if (src_enc < 8) {
3610      prefix(REX_W);
3611    } else {
3612      prefix(REX_WB);
3613      src_enc -= 8;
3614    }
3615  } else {
3616    if (src_enc < 8) {
3617      prefix(REX_WR);
3618    } else {
3619      prefix(REX_WRB);
3620      src_enc -= 8;
3621    }
3622    dst_enc -= 8;
3623  }
3624  return dst_enc << 3 | src_enc;
3625}
3626
3627void Assembler::prefix(Register reg) {
3628  if (reg->encoding() >= 8) {
3629    prefix(REX_B);
3630  }
3631}
3632
3633void Assembler::prefix(Address adr) {
3634  if (adr.base_needs_rex()) {
3635    if (adr.index_needs_rex()) {
3636      prefix(REX_XB);
3637    } else {
3638      prefix(REX_B);
3639    }
3640  } else {
3641    if (adr.index_needs_rex()) {
3642      prefix(REX_X);
3643    }
3644  }
3645}
3646
3647void Assembler::prefixq(Address adr) {
3648  if (adr.base_needs_rex()) {
3649    if (adr.index_needs_rex()) {
3650      prefix(REX_WXB);
3651    } else {
3652      prefix(REX_WB);
3653    }
3654  } else {
3655    if (adr.index_needs_rex()) {
3656      prefix(REX_WX);
3657    } else {
3658      prefix(REX_W);
3659    }
3660  }
3661}
3662
3663
3664void Assembler::prefix(Address adr, Register reg, bool byteinst) {
3665  if (reg->encoding() < 8) {
3666    if (adr.base_needs_rex()) {
3667      if (adr.index_needs_rex()) {
3668        prefix(REX_XB);
3669      } else {
3670        prefix(REX_B);
3671      }
3672    } else {
3673      if (adr.index_needs_rex()) {
3674        prefix(REX_X);
3675      } else if (reg->encoding() >= 4 ) {
3676        prefix(REX);
3677      }
3678    }
3679  } else {
3680    if (adr.base_needs_rex()) {
3681      if (adr.index_needs_rex()) {
3682        prefix(REX_RXB);
3683      } else {
3684        prefix(REX_RB);
3685      }
3686    } else {
3687      if (adr.index_needs_rex()) {
3688        prefix(REX_RX);
3689      } else {
3690        prefix(REX_R);
3691      }
3692    }
3693  }
3694}
3695
3696void Assembler::prefixq(Address adr, Register src) {
3697  if (src->encoding() < 8) {
3698    if (adr.base_needs_rex()) {
3699      if (adr.index_needs_rex()) {
3700        prefix(REX_WXB);
3701      } else {
3702        prefix(REX_WB);
3703      }
3704    } else {
3705      if (adr.index_needs_rex()) {
3706        prefix(REX_WX);
3707      } else {
3708        prefix(REX_W);
3709      }
3710    }
3711  } else {
3712    if (adr.base_needs_rex()) {
3713      if (adr.index_needs_rex()) {
3714        prefix(REX_WRXB);
3715      } else {
3716        prefix(REX_WRB);
3717      }
3718    } else {
3719      if (adr.index_needs_rex()) {
3720        prefix(REX_WRX);
3721      } else {
3722        prefix(REX_WR);
3723      }
3724    }
3725  }
3726}
3727
3728void Assembler::prefix(Address adr, XMMRegister reg) {
3729  if (reg->encoding() < 8) {
3730    if (adr.base_needs_rex()) {
3731      if (adr.index_needs_rex()) {
3732        prefix(REX_XB);
3733      } else {
3734        prefix(REX_B);
3735      }
3736    } else {
3737      if (adr.index_needs_rex()) {
3738        prefix(REX_X);
3739      }
3740    }
3741  } else {
3742    if (adr.base_needs_rex()) {
3743      if (adr.index_needs_rex()) {
3744        prefix(REX_RXB);
3745      } else {
3746        prefix(REX_RB);
3747      }
3748    } else {
3749      if (adr.index_needs_rex()) {
3750        prefix(REX_RX);
3751      } else {
3752        prefix(REX_R);
3753      }
3754    }
3755  }
3756}
3757
3758void Assembler::adcq(Register dst, int32_t imm32) {
3759  (void) prefixq_and_encode(dst->encoding());
3760  emit_arith(0x81, 0xD0, dst, imm32);
3761}
3762
3763void Assembler::adcq(Register dst, Address src) {
3764  InstructionMark im(this);
3765  prefixq(src, dst);
3766  emit_byte(0x13);
3767  emit_operand(dst, src);
3768}
3769
3770void Assembler::adcq(Register dst, Register src) {
3771  (int) prefixq_and_encode(dst->encoding(), src->encoding());
3772  emit_arith(0x13, 0xC0, dst, src);
3773}
3774
3775void Assembler::addq(Address dst, int32_t imm32) {
3776  InstructionMark im(this);
3777  prefixq(dst);
3778  emit_arith_operand(0x81, rax, dst,imm32);
3779}
3780
3781void Assembler::addq(Address dst, Register src) {
3782  InstructionMark im(this);
3783  prefixq(dst, src);
3784  emit_byte(0x01);
3785  emit_operand(src, dst);
3786}
3787
3788void Assembler::addq(Register dst, int32_t imm32) {
3789  (void) prefixq_and_encode(dst->encoding());
3790  emit_arith(0x81, 0xC0, dst, imm32);
3791}
3792
3793void Assembler::addq(Register dst, Address src) {
3794  InstructionMark im(this);
3795  prefixq(src, dst);
3796  emit_byte(0x03);
3797  emit_operand(dst, src);
3798}
3799
3800void Assembler::addq(Register dst, Register src) {
3801  (void) prefixq_and_encode(dst->encoding(), src->encoding());
3802  emit_arith(0x03, 0xC0, dst, src);
3803}
3804
3805void Assembler::andq(Address dst, int32_t imm32) {
3806  InstructionMark im(this);
3807  prefixq(dst);
3808  emit_byte(0x81);
3809  emit_operand(rsp, dst, 4);
3810  emit_long(imm32);
3811}
3812
3813void Assembler::andq(Register dst, int32_t imm32) {
3814  (void) prefixq_and_encode(dst->encoding());
3815  emit_arith(0x81, 0xE0, dst, imm32);
3816}
3817
3818void Assembler::andq(Register dst, Address src) {
3819  InstructionMark im(this);
3820  prefixq(src, dst);
3821  emit_byte(0x23);
3822  emit_operand(dst, src);
3823}
3824
3825void Assembler::andq(Register dst, Register src) {
3826  (int) prefixq_and_encode(dst->encoding(), src->encoding());
3827  emit_arith(0x23, 0xC0, dst, src);
3828}
3829
3830void Assembler::bsfq(Register dst, Register src) {
3831  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3832  emit_byte(0x0F);
3833  emit_byte(0xBC);
3834  emit_byte(0xC0 | encode);
3835}
3836
3837void Assembler::bsrq(Register dst, Register src) {
3838  assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
3839  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3840  emit_byte(0x0F);
3841  emit_byte(0xBD);
3842  emit_byte(0xC0 | encode);
3843}
3844
3845void Assembler::bswapq(Register reg) {
3846  int encode = prefixq_and_encode(reg->encoding());
3847  emit_byte(0x0F);
3848  emit_byte(0xC8 | encode);
3849}
3850
3851void Assembler::cdqq() {
3852  prefix(REX_W);
3853  emit_byte(0x99);
3854}
3855
3856void Assembler::clflush(Address adr) {
3857  prefix(adr);
3858  emit_byte(0x0F);
3859  emit_byte(0xAE);
3860  emit_operand(rdi, adr);
3861}
3862
3863void Assembler::cmovq(Condition cc, Register dst, Register src) {
3864  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3865  emit_byte(0x0F);
3866  emit_byte(0x40 | cc);
3867  emit_byte(0xC0 | encode);
3868}
3869
3870void Assembler::cmovq(Condition cc, Register dst, Address src) {
3871  InstructionMark im(this);
3872  prefixq(src, dst);
3873  emit_byte(0x0F);
3874  emit_byte(0x40 | cc);
3875  emit_operand(dst, src);
3876}
3877
3878void Assembler::cmpq(Address dst, int32_t imm32) {
3879  InstructionMark im(this);
3880  prefixq(dst);
3881  emit_byte(0x81);
3882  emit_operand(rdi, dst, 4);
3883  emit_long(imm32);
3884}
3885
3886void Assembler::cmpq(Register dst, int32_t imm32) {
3887  (void) prefixq_and_encode(dst->encoding());
3888  emit_arith(0x81, 0xF8, dst, imm32);
3889}
3890
3891void Assembler::cmpq(Address dst, Register src) {
3892  InstructionMark im(this);
3893  prefixq(dst, src);
3894  emit_byte(0x3B);
3895  emit_operand(src, dst);
3896}
3897
3898void Assembler::cmpq(Register dst, Register src) {
3899  (void) prefixq_and_encode(dst->encoding(), src->encoding());
3900  emit_arith(0x3B, 0xC0, dst, src);
3901}
3902
3903void Assembler::cmpq(Register dst, Address  src) {
3904  InstructionMark im(this);
3905  prefixq(src, dst);
3906  emit_byte(0x3B);
3907  emit_operand(dst, src);
3908}
3909
3910void Assembler::cmpxchgq(Register reg, Address adr) {
3911  InstructionMark im(this);
3912  prefixq(adr, reg);
3913  emit_byte(0x0F);
3914  emit_byte(0xB1);
3915  emit_operand(reg, adr);
3916}
3917
3918void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
3919  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3920  emit_byte(0xF2);
3921  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3922  emit_byte(0x0F);
3923  emit_byte(0x2A);
3924  emit_byte(0xC0 | encode);
3925}
3926
3927void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
3928  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3929  emit_byte(0xF3);
3930  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3931  emit_byte(0x0F);
3932  emit_byte(0x2A);
3933  emit_byte(0xC0 | encode);
3934}
3935
3936void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
3937  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3938  emit_byte(0xF2);
3939  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3940  emit_byte(0x0F);
3941  emit_byte(0x2C);
3942  emit_byte(0xC0 | encode);
3943}
3944
3945void Assembler::cvttss2siq(Register dst, XMMRegister src) {
3946  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3947  emit_byte(0xF3);
3948  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3949  emit_byte(0x0F);
3950  emit_byte(0x2C);
3951  emit_byte(0xC0 | encode);
3952}
3953
3954void Assembler::decl(Register dst) {
3955  // Don't use it directly. Use MacroAssembler::decrementl() instead.
3956  // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
3957  int encode = prefix_and_encode(dst->encoding());
3958  emit_byte(0xFF);
3959  emit_byte(0xC8 | encode);
3960}
3961
3962void Assembler::decq(Register dst) {
3963  // Don't use it directly. Use MacroAssembler::decrementq() instead.
3964  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3965  int encode = prefixq_and_encode(dst->encoding());
3966  emit_byte(0xFF);
3967  emit_byte(0xC8 | encode);
3968}
3969
3970void Assembler::decq(Address dst) {
3971  // Don't use it directly. Use MacroAssembler::decrementq() instead.
3972  InstructionMark im(this);
3973  prefixq(dst);
3974  emit_byte(0xFF);
3975  emit_operand(rcx, dst);
3976}
3977
3978void Assembler::fxrstor(Address src) {
3979  prefixq(src);
3980  emit_byte(0x0F);
3981  emit_byte(0xAE);
3982  emit_operand(as_Register(1), src);
3983}
3984
3985void Assembler::fxsave(Address dst) {
3986  prefixq(dst);
3987  emit_byte(0x0F);
3988  emit_byte(0xAE);
3989  emit_operand(as_Register(0), dst);
3990}
3991
3992void Assembler::idivq(Register src) {
3993  int encode = prefixq_and_encode(src->encoding());
3994  emit_byte(0xF7);
3995  emit_byte(0xF8 | encode);
3996}
3997
3998void Assembler::imulq(Register dst, Register src) {
3999  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4000  emit_byte(0x0F);
4001  emit_byte(0xAF);
4002  emit_byte(0xC0 | encode);
4003}
4004
4005void Assembler::imulq(Register dst, Register src, int value) {
4006  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4007  if (is8bit(value)) {
4008    emit_byte(0x6B);
4009    emit_byte(0xC0 | encode);
4010    emit_byte(value & 0xFF);
4011  } else {
4012    emit_byte(0x69);
4013    emit_byte(0xC0 | encode);
4014    emit_long(value);
4015  }
4016}
4017
4018void Assembler::incl(Register dst) {
4019  // Don't use it directly. Use MacroAssembler::incrementl() instead.
4020  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4021  int encode = prefix_and_encode(dst->encoding());
4022  emit_byte(0xFF);
4023  emit_byte(0xC0 | encode);
4024}
4025
4026void Assembler::incq(Register dst) {
4027  // Don't use it directly. Use MacroAssembler::incrementq() instead.
4028  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4029  int encode = prefixq_and_encode(dst->encoding());
4030  emit_byte(0xFF);
4031  emit_byte(0xC0 | encode);
4032}
4033
4034void Assembler::incq(Address dst) {
4035  // Don't use it directly. Use MacroAssembler::incrementq() instead.
4036  InstructionMark im(this);
4037  prefixq(dst);
4038  emit_byte(0xFF);
4039  emit_operand(rax, dst);
4040}
4041
4042void Assembler::lea(Register dst, Address src) {
4043  leaq(dst, src);
4044}
4045
4046void Assembler::leaq(Register dst, Address src) {
4047  InstructionMark im(this);
4048  prefixq(src, dst);
4049  emit_byte(0x8D);
4050  emit_operand(dst, src);
4051}
4052
4053void Assembler::mov64(Register dst, int64_t imm64) {
4054  InstructionMark im(this);
4055  int encode = prefixq_and_encode(dst->encoding());
4056  emit_byte(0xB8 | encode);
4057  emit_long64(imm64);
4058}
4059
4060void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
4061  InstructionMark im(this);
4062  int encode = prefixq_and_encode(dst->encoding());
4063  emit_byte(0xB8 | encode);
4064  emit_data64(imm64, rspec);
4065}
4066
4067void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
4068  InstructionMark im(this);
4069  int encode = prefix_and_encode(dst->encoding());
4070  emit_byte(0xB8 | encode);
4071  emit_data((int)imm32, rspec, narrow_oop_operand);
4072}
4073
4074void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
4075  InstructionMark im(this);
4076  prefix(dst);
4077  emit_byte(0xC7);
4078  emit_operand(rax, dst, 4);
4079  emit_data((int)imm32, rspec, narrow_oop_operand);
4080}
4081
4082void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
4083  InstructionMark im(this);
4084  int encode = prefix_and_encode(src1->encoding());
4085  emit_byte(0x81);
4086  emit_byte(0xF8 | encode);
4087  emit_data((int)imm32, rspec, narrow_oop_operand);
4088}
4089
4090void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
4091  InstructionMark im(this);
4092  prefix(src1);
4093  emit_byte(0x81);
4094  emit_operand(rax, src1, 4);
4095  emit_data((int)imm32, rspec, narrow_oop_operand);
4096}
4097
4098void Assembler::lzcntq(Register dst, Register src) {
4099  assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
4100  emit_byte(0xF3);
4101  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4102  emit_byte(0x0F);
4103  emit_byte(0xBD);
4104  emit_byte(0xC0 | encode);
4105}
4106
4107void Assembler::movdq(XMMRegister dst, Register src) {
4108  // table D-1 says MMX/SSE2
4109  NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
4110  emit_byte(0x66);
4111  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4112  emit_byte(0x0F);
4113  emit_byte(0x6E);
4114  emit_byte(0xC0 | encode);
4115}
4116
4117void Assembler::movdq(Register dst, XMMRegister src) {
4118  // table D-1 says MMX/SSE2
4119  NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
4120  emit_byte(0x66);
4121  // swap src/dst to get correct prefix
4122  int encode = prefixq_and_encode(src->encoding(), dst->encoding());
4123  emit_byte(0x0F);
4124  emit_byte(0x7E);
4125  emit_byte(0xC0 | encode);
4126}
4127
4128void Assembler::movq(Register dst, Register src) {
4129  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4130  emit_byte(0x8B);
4131  emit_byte(0xC0 | encode);
4132}
4133
4134void Assembler::movq(Register dst, Address src) {
4135  InstructionMark im(this);
4136  prefixq(src, dst);
4137  emit_byte(0x8B);
4138  emit_operand(dst, src);
4139}
4140
4141void Assembler::movq(Address dst, Register src) {
4142  InstructionMark im(this);
4143  prefixq(dst, src);
4144  emit_byte(0x89);
4145  emit_operand(src, dst);
4146}
4147
4148void Assembler::movsbq(Register dst, Address src) {
4149  InstructionMark im(this);
4150  prefixq(src, dst);
4151  emit_byte(0x0F);
4152  emit_byte(0xBE);
4153  emit_operand(dst, src);
4154}
4155
4156void Assembler::movsbq(Register dst, Register src) {
4157  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4158  emit_byte(0x0F);
4159  emit_byte(0xBE);
4160  emit_byte(0xC0 | encode);
4161}
4162
4163void Assembler::movslq(Register dst, int32_t imm32) {
4164  // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
4165  // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
4166  // as a result we shouldn't use until tested at runtime...
4167  ShouldNotReachHere();
4168  InstructionMark im(this);
4169  int encode = prefixq_and_encode(dst->encoding());
4170  emit_byte(0xC7 | encode);
4171  emit_long(imm32);
4172}
4173
4174void Assembler::movslq(Address dst, int32_t imm32) {
4175  assert(is_simm32(imm32), "lost bits");
4176  InstructionMark im(this);
4177  prefixq(dst);
4178  emit_byte(0xC7);
4179  emit_operand(rax, dst, 4);
4180  emit_long(imm32);
4181}
4182
4183void Assembler::movslq(Register dst, Address src) {
4184  InstructionMark im(this);
4185  prefixq(src, dst);
4186  emit_byte(0x63);
4187  emit_operand(dst, src);
4188}
4189
4190void Assembler::movslq(Register dst, Register src) {
4191  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4192  emit_byte(0x63);
4193  emit_byte(0xC0 | encode);
4194}
4195
4196void Assembler::movswq(Register dst, Address src) {
4197  InstructionMark im(this);
4198  prefixq(src, dst);
4199  emit_byte(0x0F);
4200  emit_byte(0xBF);
4201  emit_operand(dst, src);
4202}
4203
4204void Assembler::movswq(Register dst, Register src) {
4205  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4206  emit_byte(0x0F);
4207  emit_byte(0xBF);
4208  emit_byte(0xC0 | encode);
4209}
4210
4211void Assembler::movzbq(Register dst, Address src) {
4212  InstructionMark im(this);
4213  prefixq(src, dst);
4214  emit_byte(0x0F);
4215  emit_byte(0xB6);
4216  emit_operand(dst, src);
4217}
4218
4219void Assembler::movzbq(Register dst, Register src) {
4220  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4221  emit_byte(0x0F);
4222  emit_byte(0xB6);
4223  emit_byte(0xC0 | encode);
4224}
4225
4226void Assembler::movzwq(Register dst, Address src) {
4227  InstructionMark im(this);
4228  prefixq(src, dst);
4229  emit_byte(0x0F);
4230  emit_byte(0xB7);
4231  emit_operand(dst, src);
4232}
4233
4234void Assembler::movzwq(Register dst, Register src) {
4235  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4236  emit_byte(0x0F);
4237  emit_byte(0xB7);
4238  emit_byte(0xC0 | encode);
4239}
4240
4241void Assembler::negq(Register dst) {
4242  int encode = prefixq_and_encode(dst->encoding());
4243  emit_byte(0xF7);
4244  emit_byte(0xD8 | encode);
4245}
4246
4247void Assembler::notq(Register dst) {
4248  int encode = prefixq_and_encode(dst->encoding());
4249  emit_byte(0xF7);
4250  emit_byte(0xD0 | encode);
4251}
4252
4253void Assembler::orq(Address dst, int32_t imm32) {
4254  InstructionMark im(this);
4255  prefixq(dst);
4256  emit_byte(0x81);
4257  emit_operand(rcx, dst, 4);
4258  emit_long(imm32);
4259}
4260
4261void Assembler::orq(Register dst, int32_t imm32) {
4262  (void) prefixq_and_encode(dst->encoding());
4263  emit_arith(0x81, 0xC8, dst, imm32);
4264}
4265
4266void Assembler::orq(Register dst, Address src) {
4267  InstructionMark im(this);
4268  prefixq(src, dst);
4269  emit_byte(0x0B);
4270  emit_operand(dst, src);
4271}
4272
4273void Assembler::orq(Register dst, Register src) {
4274  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4275  emit_arith(0x0B, 0xC0, dst, src);
4276}
4277
4278void Assembler::popa() { // 64bit
4279  movq(r15, Address(rsp, 0));
4280  movq(r14, Address(rsp, wordSize));
4281  movq(r13, Address(rsp, 2 * wordSize));
4282  movq(r12, Address(rsp, 3 * wordSize));
4283  movq(r11, Address(rsp, 4 * wordSize));
4284  movq(r10, Address(rsp, 5 * wordSize));
4285  movq(r9,  Address(rsp, 6 * wordSize));
4286  movq(r8,  Address(rsp, 7 * wordSize));
4287  movq(rdi, Address(rsp, 8 * wordSize));
4288  movq(rsi, Address(rsp, 9 * wordSize));
4289  movq(rbp, Address(rsp, 10 * wordSize));
4290  // skip rsp
4291  movq(rbx, Address(rsp, 12 * wordSize));
4292  movq(rdx, Address(rsp, 13 * wordSize));
4293  movq(rcx, Address(rsp, 14 * wordSize));
4294  movq(rax, Address(rsp, 15 * wordSize));
4295
4296  addq(rsp, 16 * wordSize);
4297}
4298
4299void Assembler::popcntq(Register dst, Address src) {
4300  assert(VM_Version::supports_popcnt(), "must support");
4301  InstructionMark im(this);
4302  emit_byte(0xF3);
4303  prefixq(src, dst);
4304  emit_byte(0x0F);
4305  emit_byte(0xB8);
4306  emit_operand(dst, src);
4307}
4308
4309void Assembler::popcntq(Register dst, Register src) {
4310  assert(VM_Version::supports_popcnt(), "must support");
4311  emit_byte(0xF3);
4312  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4313  emit_byte(0x0F);
4314  emit_byte(0xB8);
4315  emit_byte(0xC0 | encode);
4316}
4317
4318void Assembler::popq(Address dst) {
4319  InstructionMark im(this);
4320  prefixq(dst);
4321  emit_byte(0x8F);
4322  emit_operand(rax, dst);
4323}
4324
4325void Assembler::pusha() { // 64bit
4326  // we have to store original rsp.  ABI says that 128 bytes
4327  // below rsp are local scratch.
4328  movq(Address(rsp, -5 * wordSize), rsp);
4329
4330  subq(rsp, 16 * wordSize);
4331
4332  movq(Address(rsp, 15 * wordSize), rax);
4333  movq(Address(rsp, 14 * wordSize), rcx);
4334  movq(Address(rsp, 13 * wordSize), rdx);
4335  movq(Address(rsp, 12 * wordSize), rbx);
4336  // skip rsp
4337  movq(Address(rsp, 10 * wordSize), rbp);
4338  movq(Address(rsp, 9 * wordSize), rsi);
4339  movq(Address(rsp, 8 * wordSize), rdi);
4340  movq(Address(rsp, 7 * wordSize), r8);
4341  movq(Address(rsp, 6 * wordSize), r9);
4342  movq(Address(rsp, 5 * wordSize), r10);
4343  movq(Address(rsp, 4 * wordSize), r11);
4344  movq(Address(rsp, 3 * wordSize), r12);
4345  movq(Address(rsp, 2 * wordSize), r13);
4346  movq(Address(rsp, wordSize), r14);
4347  movq(Address(rsp, 0), r15);
4348}
4349
4350void Assembler::pushq(Address src) {
4351  InstructionMark im(this);
4352  prefixq(src);
4353  emit_byte(0xFF);
4354  emit_operand(rsi, src);
4355}
4356
4357void Assembler::rclq(Register dst, int imm8) {
4358  assert(isShiftCount(imm8 >> 1), "illegal shift count");
4359  int encode = prefixq_and_encode(dst->encoding());
4360  if (imm8 == 1) {
4361    emit_byte(0xD1);
4362    emit_byte(0xD0 | encode);
4363  } else {
4364    emit_byte(0xC1);
4365    emit_byte(0xD0 | encode);
4366    emit_byte(imm8);
4367  }
4368}
4369void Assembler::sarq(Register dst, int imm8) {
4370  assert(isShiftCount(imm8 >> 1), "illegal shift count");
4371  int encode = prefixq_and_encode(dst->encoding());
4372  if (imm8 == 1) {
4373    emit_byte(0xD1);
4374    emit_byte(0xF8 | encode);
4375  } else {
4376    emit_byte(0xC1);
4377    emit_byte(0xF8 | encode);
4378    emit_byte(imm8);
4379  }
4380}
4381
4382void Assembler::sarq(Register dst) {
4383  int encode = prefixq_and_encode(dst->encoding());
4384  emit_byte(0xD3);
4385  emit_byte(0xF8 | encode);
4386}
4387
4388void Assembler::sbbq(Address dst, int32_t imm32) {
4389  InstructionMark im(this);
4390  prefixq(dst);
4391  emit_arith_operand(0x81, rbx, dst, imm32);
4392}
4393
4394void Assembler::sbbq(Register dst, int32_t imm32) {
4395  (void) prefixq_and_encode(dst->encoding());
4396  emit_arith(0x81, 0xD8, dst, imm32);
4397}
4398
4399void Assembler::sbbq(Register dst, Address src) {
4400  InstructionMark im(this);
4401  prefixq(src, dst);
4402  emit_byte(0x1B);
4403  emit_operand(dst, src);
4404}
4405
4406void Assembler::sbbq(Register dst, Register src) {
4407  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4408  emit_arith(0x1B, 0xC0, dst, src);
4409}
4410
4411void Assembler::shlq(Register dst, int imm8) {
4412  assert(isShiftCount(imm8 >> 1), "illegal shift count");
4413  int encode = prefixq_and_encode(dst->encoding());
4414  if (imm8 == 1) {
4415    emit_byte(0xD1);
4416    emit_byte(0xE0 | encode);
4417  } else {
4418    emit_byte(0xC1);
4419    emit_byte(0xE0 | encode);
4420    emit_byte(imm8);
4421  }
4422}
4423
4424void Assembler::shlq(Register dst) {
4425  int encode = prefixq_and_encode(dst->encoding());
4426  emit_byte(0xD3);
4427  emit_byte(0xE0 | encode);
4428}
4429
4430void Assembler::shrq(Register dst, int imm8) {
4431  assert(isShiftCount(imm8 >> 1), "illegal shift count");
4432  int encode = prefixq_and_encode(dst->encoding());
4433  emit_byte(0xC1);
4434  emit_byte(0xE8 | encode);
4435  emit_byte(imm8);
4436}
4437
4438void Assembler::shrq(Register dst) {
4439  int encode = prefixq_and_encode(dst->encoding());
4440  emit_byte(0xD3);
4441  emit_byte(0xE8 | encode);
4442}
4443
4444void Assembler::subq(Address dst, int32_t imm32) {
4445  InstructionMark im(this);
4446  prefixq(dst);
4447  emit_arith_operand(0x81, rbp, dst, imm32);
4448}
4449
4450void Assembler::subq(Address dst, Register src) {
4451  InstructionMark im(this);
4452  prefixq(dst, src);
4453  emit_byte(0x29);
4454  emit_operand(src, dst);
4455}
4456
4457void Assembler::subq(Register dst, int32_t imm32) {
4458  (void) prefixq_and_encode(dst->encoding());
4459  emit_arith(0x81, 0xE8, dst, imm32);
4460}
4461
4462void Assembler::subq(Register dst, Address src) {
4463  InstructionMark im(this);
4464  prefixq(src, dst);
4465  emit_byte(0x2B);
4466  emit_operand(dst, src);
4467}
4468
4469void Assembler::subq(Register dst, Register src) {
4470  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4471  emit_arith(0x2B, 0xC0, dst, src);
4472}
4473
4474void Assembler::testq(Register dst, int32_t imm32) {
4475  // not using emit_arith because test
4476  // doesn't support sign-extension of
4477  // 8bit operands
4478  int encode = dst->encoding();
4479  if (encode == 0) {
4480    prefix(REX_W);
4481    emit_byte(0xA9);
4482  } else {
4483    encode = prefixq_and_encode(encode);
4484    emit_byte(0xF7);
4485    emit_byte(0xC0 | encode);
4486  }
4487  emit_long(imm32);
4488}
4489
4490void Assembler::testq(Register dst, Register src) {
4491  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4492  emit_arith(0x85, 0xC0, dst, src);
4493}
4494
4495void Assembler::xaddq(Address dst, Register src) {
4496  InstructionMark im(this);
4497  prefixq(dst, src);
4498  emit_byte(0x0F);
4499  emit_byte(0xC1);
4500  emit_operand(src, dst);
4501}
4502
4503void Assembler::xchgq(Register dst, Address src) {
4504  InstructionMark im(this);
4505  prefixq(src, dst);
4506  emit_byte(0x87);
4507  emit_operand(dst, src);
4508}
4509
4510void Assembler::xchgq(Register dst, Register src) {
4511  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4512  emit_byte(0x87);
4513  emit_byte(0xc0 | encode);
4514}
4515
4516void Assembler::xorq(Register dst, Register src) {
4517  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4518  emit_arith(0x33, 0xC0, dst, src);
4519}
4520
4521void Assembler::xorq(Register dst, Address src) {
4522  InstructionMark im(this);
4523  prefixq(src, dst);
4524  emit_byte(0x33);
4525  emit_operand(dst, src);
4526}
4527
4528#endif // !LP64
4529
4530static Assembler::Condition reverse[] = {
4531    Assembler::noOverflow     /* overflow      = 0x0 */ ,
4532    Assembler::overflow       /* noOverflow    = 0x1 */ ,
4533    Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
4534    Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,
4535    Assembler::notZero        /* zero          = 0x4, equal         = 0x4 */ ,
4536    Assembler::zero           /* notZero       = 0x5, notEqual      = 0x5 */ ,
4537    Assembler::above          /* belowEqual    = 0x6 */ ,
4538    Assembler::belowEqual     /* above         = 0x7 */ ,
4539    Assembler::positive       /* negative      = 0x8 */ ,
4540    Assembler::negative       /* positive      = 0x9 */ ,
4541    Assembler::noParity       /* parity        = 0xa */ ,
4542    Assembler::parity         /* noParity      = 0xb */ ,
4543    Assembler::greaterEqual   /* less          = 0xc */ ,
4544    Assembler::less           /* greaterEqual  = 0xd */ ,
4545    Assembler::greater        /* lessEqual     = 0xe */ ,
4546    Assembler::lessEqual      /* greater       = 0xf, */
4547
4548};
4549
4550
4551// Implementation of MacroAssembler
4552
4553// First all the versions that have distinct versions depending on 32/64 bit
4554// Unless the difference is trivial (1 line or so).
4555
4556#ifndef _LP64
4557
4558// 32bit versions
4559
4560Address MacroAssembler::as_Address(AddressLiteral adr) {
4561  return Address(adr.target(), adr.rspec());
4562}
4563
4564Address MacroAssembler::as_Address(ArrayAddress adr) {
4565  return Address::make_array(adr);
4566}
4567
4568int MacroAssembler::biased_locking_enter(Register lock_reg,
4569                                         Register obj_reg,
4570                                         Register swap_reg,
4571                                         Register tmp_reg,
4572                                         bool swap_reg_contains_mark,
4573                                         Label& done,
4574                                         Label* slow_case,
4575                                         BiasedLockingCounters* counters) {
4576  assert(UseBiasedLocking, "why call this otherwise?");
4577  assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
4578  assert_different_registers(lock_reg, obj_reg, swap_reg);
4579
4580  if (PrintBiasedLockingStatistics && counters == NULL)
4581    counters = BiasedLocking::counters();
4582
4583  bool need_tmp_reg = false;
4584  if (tmp_reg == noreg) {
4585    need_tmp_reg = true;
4586    tmp_reg = lock_reg;
4587  } else {
4588    assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4589  }
4590  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4591  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
4592  Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
4593  Address saved_mark_addr(lock_reg, 0);
4594
4595  // Biased locking
4596  // See whether the lock is currently biased toward our thread and
4597  // whether the epoch is still valid
4598  // Note that the runtime guarantees sufficient alignment of JavaThread
4599  // pointers to allow age to be placed into low bits
4600  // First check to see whether biasing is even enabled for this object
4601  Label cas_label;
4602  int null_check_offset = -1;
4603  if (!swap_reg_contains_mark) {
4604    null_check_offset = offset();
4605    movl(swap_reg, mark_addr);
4606  }
4607  if (need_tmp_reg) {
4608    push(tmp_reg);
4609  }
4610  movl(tmp_reg, swap_reg);
4611  andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4612  cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
4613  if (need_tmp_reg) {
4614    pop(tmp_reg);
4615  }
4616  jcc(Assembler::notEqual, cas_label);
4617  // The bias pattern is present in the object's header. Need to check
4618  // whether the bias owner and the epoch are both still current.
4619  // Note that because there is no current thread register on x86 we
4620  // need to store off the mark word we read out of the object to
4621  // avoid reloading it and needing to recheck invariants below. This
4622  // store is unfortunate but it makes the overall code shorter and
4623  // simpler.
4624  movl(saved_mark_addr, swap_reg);
4625  if (need_tmp_reg) {
4626    push(tmp_reg);
4627  }
4628  get_thread(tmp_reg);
4629  xorl(swap_reg, tmp_reg);
4630  if (swap_reg_contains_mark) {
4631    null_check_offset = offset();
4632  }
4633  movl(tmp_reg, klass_addr);
4634  xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4635  andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
4636  if (need_tmp_reg) {
4637    pop(tmp_reg);
4638  }
4639  if (counters != NULL) {
4640    cond_inc32(Assembler::zero,
4641               ExternalAddress((address)counters->biased_lock_entry_count_addr()));
4642  }
4643  jcc(Assembler::equal, done);
4644
4645  Label try_revoke_bias;
4646  Label try_rebias;
4647
4648  // At this point we know that the header has the bias pattern and
4649  // that we are not the bias owner in the current epoch. We need to
4650  // figure out more details about the state of the header in order to
4651  // know what operations can be legally performed on the object's
4652  // header.
4653
4654  // If the low three bits in the xor result aren't clear, that means
4655  // the prototype header is no longer biased and we have to revoke
4656  // the bias on this object.
4657  testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
4658  jcc(Assembler::notZero, try_revoke_bias);
4659
4660  // Biasing is still enabled for this data type. See whether the
4661  // epoch of the current bias is still valid, meaning that the epoch
4662  // bits of the mark word are equal to the epoch bits of the
4663  // prototype header. (Note that the prototype header's epoch bits
4664  // only change at a safepoint.) If not, attempt to rebias the object
4665  // toward the current thread. Note that we must be absolutely sure
4666  // that the current epoch is invalid in order to do this because
4667  // otherwise the manipulations it performs on the mark word are
4668  // illegal.
4669  testl(swap_reg, markOopDesc::epoch_mask_in_place);
4670  jcc(Assembler::notZero, try_rebias);
4671
4672  // The epoch of the current bias is still valid but we know nothing
4673  // about the owner; it might be set or it might be clear. Try to
4674  // acquire the bias of the object using an atomic operation. If this
4675  // fails we will go in to the runtime to revoke the object's bias.
4676  // Note that we first construct the presumed unbiased header so we
4677  // don't accidentally blow away another thread's valid bias.
4678  movl(swap_reg, saved_mark_addr);
4679  andl(swap_reg,
4680       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
4681  if (need_tmp_reg) {
4682    push(tmp_reg);
4683  }
4684  get_thread(tmp_reg);
4685  orl(tmp_reg, swap_reg);
4686  if (os::is_MP()) {
4687    lock();
4688  }
4689  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4690  if (need_tmp_reg) {
4691    pop(tmp_reg);
4692  }
4693  // If the biasing toward our thread failed, this means that
4694  // another thread succeeded in biasing it toward itself and we
4695  // need to revoke that bias. The revocation will occur in the
4696  // interpreter runtime in the slow case.
4697  if (counters != NULL) {
4698    cond_inc32(Assembler::zero,
4699               ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
4700  }
4701  if (slow_case != NULL) {
4702    jcc(Assembler::notZero, *slow_case);
4703  }
4704  jmp(done);
4705
4706  bind(try_rebias);
4707  // At this point we know the epoch has expired, meaning that the
4708  // current "bias owner", if any, is actually invalid. Under these
4709  // circumstances _only_, we are allowed to use the current header's
4710  // value as the comparison value when doing the cas to acquire the
4711  // bias in the current epoch. In other words, we allow transfer of
4712  // the bias from one thread to another directly in this situation.
4713  //
4714  // FIXME: due to a lack of registers we currently blow away the age
4715  // bits in this situation. Should attempt to preserve them.
4716  if (need_tmp_reg) {
4717    push(tmp_reg);
4718  }
4719  get_thread(tmp_reg);
4720  movl(swap_reg, klass_addr);
4721  orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4722  movl(swap_reg, saved_mark_addr);
4723  if (os::is_MP()) {
4724    lock();
4725  }
4726  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4727  if (need_tmp_reg) {
4728    pop(tmp_reg);
4729  }
4730  // If the biasing toward our thread failed, then another thread
4731  // succeeded in biasing it toward itself and we need to revoke that
4732  // bias. The revocation will occur in the runtime in the slow case.
4733  if (counters != NULL) {
4734    cond_inc32(Assembler::zero,
4735               ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
4736  }
4737  if (slow_case != NULL) {
4738    jcc(Assembler::notZero, *slow_case);
4739  }
4740  jmp(done);
4741
4742  bind(try_revoke_bias);
4743  // The prototype mark in the klass doesn't have the bias bit set any
4744  // more, indicating that objects of this data type are not supposed
4745  // to be biased any more. We are going to try to reset the mark of
4746  // this object to the prototype value and fall through to the
4747  // CAS-based locking scheme. Note that if our CAS fails, it means
4748  // that another thread raced us for the privilege of revoking the
4749  // bias of this particular object, so it's okay to continue in the
4750  // normal locking code.
4751  //
4752  // FIXME: due to a lack of registers we currently blow away the age
4753  // bits in this situation. Should attempt to preserve them.
4754  movl(swap_reg, saved_mark_addr);
4755  if (need_tmp_reg) {
4756    push(tmp_reg);
4757  }
4758  movl(tmp_reg, klass_addr);
4759  movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4760  if (os::is_MP()) {
4761    lock();
4762  }
4763  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4764  if (need_tmp_reg) {
4765    pop(tmp_reg);
4766  }
4767  // Fall through to the normal CAS-based lock, because no matter what
4768  // the result of the above CAS, some thread must have succeeded in
4769  // removing the bias bit from the object's header.
4770  if (counters != NULL) {
4771    cond_inc32(Assembler::zero,
4772               ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
4773  }
4774
4775  bind(cas_label);
4776
4777  return null_check_offset;
4778}
4779void MacroAssembler::call_VM_leaf_base(address entry_point,
4780                                       int number_of_arguments) {
4781  call(RuntimeAddress(entry_point));
4782  increment(rsp, number_of_arguments * wordSize);
4783}
4784
4785void MacroAssembler::cmpoop(Address src1, jobject obj) {
4786  cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4787}
4788
4789void MacroAssembler::cmpoop(Register src1, jobject obj) {
4790  cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4791}
4792
4793void MacroAssembler::extend_sign(Register hi, Register lo) {
4794  // According to Intel Doc. AP-526, "Integer Divide", p.18.
4795  if (VM_Version::is_P6() && hi == rdx && lo == rax) {
4796    cdql();
4797  } else {
4798    movl(hi, lo);
4799    sarl(hi, 31);
4800  }
4801}
4802
4803void MacroAssembler::fat_nop() {
4804  // A 5 byte nop that is safe for patching (see patch_verified_entry)
4805  emit_byte(0x26); // es:
4806  emit_byte(0x2e); // cs:
4807  emit_byte(0x64); // fs:
4808  emit_byte(0x65); // gs:
4809  emit_byte(0x90);
4810}
4811
4812void MacroAssembler::jC2(Register tmp, Label& L) {
4813  // set parity bit if FPU flag C2 is set (via rax)
4814  save_rax(tmp);
4815  fwait(); fnstsw_ax();
4816  sahf();
4817  restore_rax(tmp);
4818  // branch
4819  jcc(Assembler::parity, L);
4820}
4821
4822void MacroAssembler::jnC2(Register tmp, Label& L) {
4823  // set parity bit if FPU flag C2 is set (via rax)
4824  save_rax(tmp);
4825  fwait(); fnstsw_ax();
4826  sahf();
4827  restore_rax(tmp);
4828  // branch
4829  jcc(Assembler::noParity, L);
4830}
4831
4832// 32bit can do a case table jump in one instruction but we no longer allow the base
4833// to be installed in the Address class
4834void MacroAssembler::jump(ArrayAddress entry) {
4835  jmp(as_Address(entry));
4836}
4837
4838// Note: y_lo will be destroyed
4839void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
4840  // Long compare for Java (semantics as described in JVM spec.)
4841  Label high, low, done;
4842
4843  cmpl(x_hi, y_hi);
4844  jcc(Assembler::less, low);
4845  jcc(Assembler::greater, high);
4846  // x_hi is the return register
4847  xorl(x_hi, x_hi);
4848  cmpl(x_lo, y_lo);
4849  jcc(Assembler::below, low);
4850  jcc(Assembler::equal, done);
4851
4852  bind(high);
4853  xorl(x_hi, x_hi);
4854  increment(x_hi);
4855  jmp(done);
4856
4857  bind(low);
4858  xorl(x_hi, x_hi);
4859  decrementl(x_hi);
4860
4861  bind(done);
4862}
4863
4864void MacroAssembler::lea(Register dst, AddressLiteral src) {
4865    mov_literal32(dst, (int32_t)src.target(), src.rspec());
4866}
4867
4868void MacroAssembler::lea(Address dst, AddressLiteral adr) {
4869  // leal(dst, as_Address(adr));
4870  // see note in movl as to why we must use a move
4871  mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
4872}
4873
4874void MacroAssembler::leave() {
4875  mov(rsp, rbp);
4876  pop(rbp);
4877}
4878
4879void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
4880  // Multiplication of two Java long values stored on the stack
4881  // as illustrated below. Result is in rdx:rax.
4882  //
4883  // rsp ---> [  ??  ] \               \
4884  //            ....    | y_rsp_offset  |
4885  //          [ y_lo ] /  (in bytes)    | x_rsp_offset
4886  //          [ y_hi ]                  | (in bytes)
4887  //            ....                    |
4888  //          [ x_lo ]                 /
4889  //          [ x_hi ]
4890  //            ....
4891  //
4892  // Basic idea: lo(result) = lo(x_lo * y_lo)
4893  //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
4894  Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
4895  Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
4896  Label quick;
4897  // load x_hi, y_hi and check if quick
4898  // multiplication is possible
4899  movl(rbx, x_hi);
4900  movl(rcx, y_hi);
4901  movl(rax, rbx);
4902  orl(rbx, rcx);                                 // rbx, = 0 <=> x_hi = 0 and y_hi = 0
4903  jcc(Assembler::zero, quick);                   // if rbx, = 0 do quick multiply
4904  // do full multiplication
4905  // 1st step
4906  mull(y_lo);                                    // x_hi * y_lo
4907  movl(rbx, rax);                                // save lo(x_hi * y_lo) in rbx,
4908  // 2nd step
4909  movl(rax, x_lo);
4910  mull(rcx);                                     // x_lo * y_hi
4911  addl(rbx, rax);                                // add lo(x_lo * y_hi) to rbx,
4912  // 3rd step
4913  bind(quick);                                   // note: rbx, = 0 if quick multiply!
4914  movl(rax, x_lo);
4915  mull(y_lo);                                    // x_lo * y_lo
4916  addl(rdx, rbx);                                // correct hi(x_lo * y_lo)
4917}
4918
4919void MacroAssembler::lneg(Register hi, Register lo) {
4920  negl(lo);
4921  adcl(hi, 0);
4922  negl(hi);
4923}
4924
4925void MacroAssembler::lshl(Register hi, Register lo) {
4926  // Java shift left long support (semantics as described in JVM spec., p.305)
4927  // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
4928  // shift value is in rcx !
4929  assert(hi != rcx, "must not use rcx");
4930  assert(lo != rcx, "must not use rcx");
4931  const Register s = rcx;                        // shift count
4932  const int      n = BitsPerWord;
4933  Label L;
4934  andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
4935  cmpl(s, n);                                    // if (s < n)
4936  jcc(Assembler::less, L);                       // else (s >= n)
4937  movl(hi, lo);                                  // x := x << n
4938  xorl(lo, lo);
4939  // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4940  bind(L);                                       // s (mod n) < n
4941  shldl(hi, lo);                                 // x := x << s
4942  shll(lo);
4943}
4944
4945
4946void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
4947  // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
4948  // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
4949  assert(hi != rcx, "must not use rcx");
4950  assert(lo != rcx, "must not use rcx");
4951  const Register s = rcx;                        // shift count
4952  const int      n = BitsPerWord;
4953  Label L;
4954  andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
4955  cmpl(s, n);                                    // if (s < n)
4956  jcc(Assembler::less, L);                       // else (s >= n)
4957  movl(lo, hi);                                  // x := x >> n
4958  if (sign_extension) sarl(hi, 31);
4959  else                xorl(hi, hi);
4960  // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4961  bind(L);                                       // s (mod n) < n
4962  shrdl(lo, hi);                                 // x := x >> s
4963  if (sign_extension) sarl(hi);
4964  else                shrl(hi);
4965}
4966
4967void MacroAssembler::movoop(Register dst, jobject obj) {
4968  mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4969}
4970
4971void MacroAssembler::movoop(Address dst, jobject obj) {
4972  mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4973}
4974
4975void MacroAssembler::movptr(Register dst, AddressLiteral src) {
4976  if (src.is_lval()) {
4977    mov_literal32(dst, (intptr_t)src.target(), src.rspec());
4978  } else {
4979    movl(dst, as_Address(src));
4980  }
4981}
4982
4983void MacroAssembler::movptr(ArrayAddress dst, Register src) {
4984  movl(as_Address(dst), src);
4985}
4986
4987void MacroAssembler::movptr(Register dst, ArrayAddress src) {
4988  movl(dst, as_Address(src));
4989}
4990
4991// src should NEVER be a real pointer. Use AddressLiteral for true pointers
4992void MacroAssembler::movptr(Address dst, intptr_t src) {
4993  movl(dst, src);
4994}
4995
4996
4997void MacroAssembler::pop_callee_saved_registers() {
4998  pop(rcx);
4999  pop(rdx);
5000  pop(rdi);
5001  pop(rsi);
5002}
5003
5004void MacroAssembler::pop_fTOS() {
5005  fld_d(Address(rsp, 0));
5006  addl(rsp, 2 * wordSize);
5007}
5008
5009void MacroAssembler::push_callee_saved_registers() {
5010  push(rsi);
5011  push(rdi);
5012  push(rdx);
5013  push(rcx);
5014}
5015
5016void MacroAssembler::push_fTOS() {
5017  subl(rsp, 2 * wordSize);
5018  fstp_d(Address(rsp, 0));
5019}
5020
5021
5022void MacroAssembler::pushoop(jobject obj) {
5023  push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
5024}
5025
5026
5027void MacroAssembler::pushptr(AddressLiteral src) {
5028  if (src.is_lval()) {
5029    push_literal32((int32_t)src.target(), src.rspec());
5030  } else {
5031    pushl(as_Address(src));
5032  }
5033}
5034
5035void MacroAssembler::set_word_if_not_zero(Register dst) {
5036  xorl(dst, dst);
5037  set_byte_if_not_zero(dst);
5038}
5039
5040static void pass_arg0(MacroAssembler* masm, Register arg) {
5041  masm->push(arg);
5042}
5043
5044static void pass_arg1(MacroAssembler* masm, Register arg) {
5045  masm->push(arg);
5046}
5047
5048static void pass_arg2(MacroAssembler* masm, Register arg) {
5049  masm->push(arg);
5050}
5051
5052static void pass_arg3(MacroAssembler* masm, Register arg) {
5053  masm->push(arg);
5054}
5055
5056#ifndef PRODUCT
5057extern "C" void findpc(intptr_t x);
5058#endif
5059
5060void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
5061  // In order to get locks to work, we need to fake a in_VM state
5062  JavaThread* thread = JavaThread::current();
5063  JavaThreadState saved_state = thread->thread_state();
5064  thread->set_thread_state(_thread_in_vm);
5065  if (ShowMessageBoxOnError) {
5066    JavaThread* thread = JavaThread::current();
5067    JavaThreadState saved_state = thread->thread_state();
5068    thread->set_thread_state(_thread_in_vm);
5069    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
5070      ttyLocker ttyl;
5071      BytecodeCounter::print();
5072    }
5073    // To see where a verify_oop failed, get $ebx+40/X for this frame.
5074    // This is the value of eip which points to where verify_oop will return.
5075    if (os::message_box(msg, "Execution stopped, print registers?")) {
5076      ttyLocker ttyl;
5077      tty->print_cr("eip = 0x%08x", eip);
5078#ifndef PRODUCT
5079      if ((WizardMode || Verbose) && PrintMiscellaneous) {
5080        tty->cr();
5081        findpc(eip);
5082        tty->cr();
5083      }
5084#endif
5085      tty->print_cr("rax = 0x%08x", rax);
5086      tty->print_cr("rbx = 0x%08x", rbx);
5087      tty->print_cr("rcx = 0x%08x", rcx);
5088      tty->print_cr("rdx = 0x%08x", rdx);
5089      tty->print_cr("rdi = 0x%08x", rdi);
5090      tty->print_cr("rsi = 0x%08x", rsi);
5091      tty->print_cr("rbp = 0x%08x", rbp);
5092      tty->print_cr("rsp = 0x%08x", rsp);
5093      BREAKPOINT;
5094      assert(false, "start up GDB");
5095    }
5096  } else {
5097    ttyLocker ttyl;
5098    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
5099    assert(false, err_msg("DEBUG MESSAGE: %s", msg));
5100  }
5101  ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5102}
5103
5104void MacroAssembler::stop(const char* msg) {
5105  ExternalAddress message((address)msg);
5106  // push address of message
5107  pushptr(message.addr());
5108  { Label L; call(L, relocInfo::none); bind(L); }     // push eip
5109  pusha();                                           // push registers
5110  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
5111  hlt();
5112}
5113
5114void MacroAssembler::warn(const char* msg) {
5115  push_CPU_state();
5116
5117  ExternalAddress message((address) msg);
5118  // push address of message
5119  pushptr(message.addr());
5120
5121  call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
5122  addl(rsp, wordSize);       // discard argument
5123  pop_CPU_state();
5124}
5125
5126#else // _LP64
5127
5128// 64 bit versions
5129
5130Address MacroAssembler::as_Address(AddressLiteral adr) {
5131  // amd64 always does this as a pc-rel
5132  // we can be absolute or disp based on the instruction type
5133  // jmp/call are displacements others are absolute
5134  assert(!adr.is_lval(), "must be rval");
5135  assert(reachable(adr), "must be");
5136  return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
5137
5138}
5139
5140Address MacroAssembler::as_Address(ArrayAddress adr) {
5141  AddressLiteral base = adr.base();
5142  lea(rscratch1, base);
5143  Address index = adr.index();
5144  assert(index._disp == 0, "must not have disp"); // maybe it can?
5145  Address array(rscratch1, index._index, index._scale, index._disp);
5146  return array;
5147}
5148
5149int MacroAssembler::biased_locking_enter(Register lock_reg,
5150                                         Register obj_reg,
5151                                         Register swap_reg,
5152                                         Register tmp_reg,
5153                                         bool swap_reg_contains_mark,
5154                                         Label& done,
5155                                         Label* slow_case,
5156                                         BiasedLockingCounters* counters) {
5157  assert(UseBiasedLocking, "why call this otherwise?");
5158  assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
5159  assert(tmp_reg != noreg, "tmp_reg must be supplied");
5160  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
5161  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
5162  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
5163  Address saved_mark_addr(lock_reg, 0);
5164
5165  if (PrintBiasedLockingStatistics && counters == NULL)
5166    counters = BiasedLocking::counters();
5167
5168  // Biased locking
5169  // See whether the lock is currently biased toward our thread and
5170  // whether the epoch is still valid
5171  // Note that the runtime guarantees sufficient alignment of JavaThread
5172  // pointers to allow age to be placed into low bits
5173  // First check to see whether biasing is even enabled for this object
5174  Label cas_label;
5175  int null_check_offset = -1;
5176  if (!swap_reg_contains_mark) {
5177    null_check_offset = offset();
5178    movq(swap_reg, mark_addr);
5179  }
5180  movq(tmp_reg, swap_reg);
5181  andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5182  cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
5183  jcc(Assembler::notEqual, cas_label);
5184  // The bias pattern is present in the object's header. Need to check
5185  // whether the bias owner and the epoch are both still current.
5186  load_prototype_header(tmp_reg, obj_reg);
5187  orq(tmp_reg, r15_thread);
5188  xorq(tmp_reg, swap_reg);
5189  andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
5190  if (counters != NULL) {
5191    cond_inc32(Assembler::zero,
5192               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5193  }
5194  jcc(Assembler::equal, done);
5195
5196  Label try_revoke_bias;
5197  Label try_rebias;
5198
5199  // At this point we know that the header has the bias pattern and
5200  // that we are not the bias owner in the current epoch. We need to
5201  // figure out more details about the state of the header in order to
5202  // know what operations can be legally performed on the object's
5203  // header.
5204
5205  // If the low three bits in the xor result aren't clear, that means
5206  // the prototype header is no longer biased and we have to revoke
5207  // the bias on this object.
5208  testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5209  jcc(Assembler::notZero, try_revoke_bias);
5210
5211  // Biasing is still enabled for this data type. See whether the
5212  // epoch of the current bias is still valid, meaning that the epoch
5213  // bits of the mark word are equal to the epoch bits of the
5214  // prototype header. (Note that the prototype header's epoch bits
5215  // only change at a safepoint.) If not, attempt to rebias the object
5216  // toward the current thread. Note that we must be absolutely sure
5217  // that the current epoch is invalid in order to do this because
5218  // otherwise the manipulations it performs on the mark word are
5219  // illegal.
5220  testq(tmp_reg, markOopDesc::epoch_mask_in_place);
5221  jcc(Assembler::notZero, try_rebias);
5222
5223  // The epoch of the current bias is still valid but we know nothing
5224  // about the owner; it might be set or it might be clear. Try to
5225  // acquire the bias of the object using an atomic operation. If this
5226  // fails we will go in to the runtime to revoke the object's bias.
5227  // Note that we first construct the presumed unbiased header so we
5228  // don't accidentally blow away another thread's valid bias.
5229  andq(swap_reg,
5230       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
5231  movq(tmp_reg, swap_reg);
5232  orq(tmp_reg, r15_thread);
5233  if (os::is_MP()) {
5234    lock();
5235  }
5236  cmpxchgq(tmp_reg, Address(obj_reg, 0));
5237  // If the biasing toward our thread failed, this means that
5238  // another thread succeeded in biasing it toward itself and we
5239  // need to revoke that bias. The revocation will occur in the
5240  // interpreter runtime in the slow case.
5241  if (counters != NULL) {
5242    cond_inc32(Assembler::zero,
5243               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5244  }
5245  if (slow_case != NULL) {
5246    jcc(Assembler::notZero, *slow_case);
5247  }
5248  jmp(done);
5249
5250  bind(try_rebias);
5251  // At this point we know the epoch has expired, meaning that the
5252  // current "bias owner", if any, is actually invalid. Under these
5253  // circumstances _only_, we are allowed to use the current header's
5254  // value as the comparison value when doing the cas to acquire the
5255  // bias in the current epoch. In other words, we allow transfer of
5256  // the bias from one thread to another directly in this situation.
5257  //
5258  // FIXME: due to a lack of registers we currently blow away the age
5259  // bits in this situation. Should attempt to preserve them.
5260  load_prototype_header(tmp_reg, obj_reg);
5261  orq(tmp_reg, r15_thread);
5262  if (os::is_MP()) {
5263    lock();
5264  }
5265  cmpxchgq(tmp_reg, Address(obj_reg, 0));
5266  // If the biasing toward our thread failed, then another thread
5267  // succeeded in biasing it toward itself and we need to revoke that
5268  // bias. The revocation will occur in the runtime in the slow case.
5269  if (counters != NULL) {
5270    cond_inc32(Assembler::zero,
5271               ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
5272  }
5273  if (slow_case != NULL) {
5274    jcc(Assembler::notZero, *slow_case);
5275  }
5276  jmp(done);
5277
5278  bind(try_revoke_bias);
5279  // The prototype mark in the klass doesn't have the bias bit set any
5280  // more, indicating that objects of this data type are not supposed
5281  // to be biased any more. We are going to try to reset the mark of
5282  // this object to the prototype value and fall through to the
5283  // CAS-based locking scheme. Note that if our CAS fails, it means
5284  // that another thread raced us for the privilege of revoking the
5285  // bias of this particular object, so it's okay to continue in the
5286  // normal locking code.
5287  //
5288  // FIXME: due to a lack of registers we currently blow away the age
5289  // bits in this situation. Should attempt to preserve them.
5290  load_prototype_header(tmp_reg, obj_reg);
5291  if (os::is_MP()) {
5292    lock();
5293  }
5294  cmpxchgq(tmp_reg, Address(obj_reg, 0));
5295  // Fall through to the normal CAS-based lock, because no matter what
5296  // the result of the above CAS, some thread must have succeeded in
5297  // removing the bias bit from the object's header.
5298  if (counters != NULL) {
5299    cond_inc32(Assembler::zero,
5300               ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
5301  }
5302
5303  bind(cas_label);
5304
5305  return null_check_offset;
5306}
5307
5308void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
5309  Label L, E;
5310
5311#ifdef _WIN64
5312  // Windows always allocates space for it's register args
5313  assert(num_args <= 4, "only register arguments supported");
5314  subq(rsp,  frame::arg_reg_save_area_bytes);
5315#endif
5316
5317  // Align stack if necessary
5318  testl(rsp, 15);
5319  jcc(Assembler::zero, L);
5320
5321  subq(rsp, 8);
5322  {
5323    call(RuntimeAddress(entry_point));
5324  }
5325  addq(rsp, 8);
5326  jmp(E);
5327
5328  bind(L);
5329  {
5330    call(RuntimeAddress(entry_point));
5331  }
5332
5333  bind(E);
5334
5335#ifdef _WIN64
5336  // restore stack pointer
5337  addq(rsp, frame::arg_reg_save_area_bytes);
5338#endif
5339
5340}
5341
5342void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
5343  assert(!src2.is_lval(), "should use cmpptr");
5344
5345  if (reachable(src2)) {
5346    cmpq(src1, as_Address(src2));
5347  } else {
5348    lea(rscratch1, src2);
5349    Assembler::cmpq(src1, Address(rscratch1, 0));
5350  }
5351}
5352
5353int MacroAssembler::corrected_idivq(Register reg) {
5354  // Full implementation of Java ldiv and lrem; checks for special
5355  // case as described in JVM spec., p.243 & p.271.  The function
5356  // returns the (pc) offset of the idivl instruction - may be needed
5357  // for implicit exceptions.
5358  //
5359  //         normal case                           special case
5360  //
5361  // input : rax: dividend                         min_long
5362  //         reg: divisor   (may not be eax/edx)   -1
5363  //
5364  // output: rax: quotient  (= rax idiv reg)       min_long
5365  //         rdx: remainder (= rax irem reg)       0
5366  assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
5367  static const int64_t min_long = 0x8000000000000000;
5368  Label normal_case, special_case;
5369
5370  // check for special case
5371  cmp64(rax, ExternalAddress((address) &min_long));
5372  jcc(Assembler::notEqual, normal_case);
5373  xorl(rdx, rdx); // prepare rdx for possible special case (where
5374                  // remainder = 0)
5375  cmpq(reg, -1);
5376  jcc(Assembler::equal, special_case);
5377
5378  // handle normal case
5379  bind(normal_case);
5380  cdqq();
5381  int idivq_offset = offset();
5382  idivq(reg);
5383
5384  // normal and special case exit
5385  bind(special_case);
5386
5387  return idivq_offset;
5388}
5389
5390void MacroAssembler::decrementq(Register reg, int value) {
5391  if (value == min_jint) { subq(reg, value); return; }
5392  if (value <  0) { incrementq(reg, -value); return; }
5393  if (value == 0) {                        ; return; }
5394  if (value == 1 && UseIncDec) { decq(reg) ; return; }
5395  /* else */      { subq(reg, value)       ; return; }
5396}
5397
5398void MacroAssembler::decrementq(Address dst, int value) {
5399  if (value == min_jint) { subq(dst, value); return; }
5400  if (value <  0) { incrementq(dst, -value); return; }
5401  if (value == 0) {                        ; return; }
5402  if (value == 1 && UseIncDec) { decq(dst) ; return; }
5403  /* else */      { subq(dst, value)       ; return; }
5404}
5405
5406void MacroAssembler::fat_nop() {
5407  // A 5 byte nop that is safe for patching (see patch_verified_entry)
5408  // Recommened sequence from 'Software Optimization Guide for the AMD
5409  // Hammer Processor'
5410  emit_byte(0x66);
5411  emit_byte(0x66);
5412  emit_byte(0x90);
5413  emit_byte(0x66);
5414  emit_byte(0x90);
5415}
5416
5417void MacroAssembler::incrementq(Register reg, int value) {
5418  if (value == min_jint) { addq(reg, value); return; }
5419  if (value <  0) { decrementq(reg, -value); return; }
5420  if (value == 0) {                        ; return; }
5421  if (value == 1 && UseIncDec) { incq(reg) ; return; }
5422  /* else */      { addq(reg, value)       ; return; }
5423}
5424
5425void MacroAssembler::incrementq(Address dst, int value) {
5426  if (value == min_jint) { addq(dst, value); return; }
5427  if (value <  0) { decrementq(dst, -value); return; }
5428  if (value == 0) {                        ; return; }
5429  if (value == 1 && UseIncDec) { incq(dst) ; return; }
5430  /* else */      { addq(dst, value)       ; return; }
5431}
5432
5433// 32bit can do a case table jump in one instruction but we no longer allow the base
5434// to be installed in the Address class
5435void MacroAssembler::jump(ArrayAddress entry) {
5436  lea(rscratch1, entry.base());
5437  Address dispatch = entry.index();
5438  assert(dispatch._base == noreg, "must be");
5439  dispatch._base = rscratch1;
5440  jmp(dispatch);
5441}
5442
5443void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
5444  ShouldNotReachHere(); // 64bit doesn't use two regs
5445  cmpq(x_lo, y_lo);
5446}
5447
5448void MacroAssembler::lea(Register dst, AddressLiteral src) {
5449    mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5450}
5451
5452void MacroAssembler::lea(Address dst, AddressLiteral adr) {
5453  mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
5454  movptr(dst, rscratch1);
5455}
5456
5457void MacroAssembler::leave() {
5458  // %%% is this really better? Why not on 32bit too?
5459  emit_byte(0xC9); // LEAVE
5460}
5461
5462void MacroAssembler::lneg(Register hi, Register lo) {
5463  ShouldNotReachHere(); // 64bit doesn't use two regs
5464  negq(lo);
5465}
5466
5467void MacroAssembler::movoop(Register dst, jobject obj) {
5468  mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5469}
5470
5471void MacroAssembler::movoop(Address dst, jobject obj) {
5472  mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5473  movq(dst, rscratch1);
5474}
5475
5476void MacroAssembler::movptr(Register dst, AddressLiteral src) {
5477  if (src.is_lval()) {
5478    mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5479  } else {
5480    if (reachable(src)) {
5481      movq(dst, as_Address(src));
5482    } else {
5483      lea(rscratch1, src);
5484      movq(dst, Address(rscratch1,0));
5485    }
5486  }
5487}
5488
5489void MacroAssembler::movptr(ArrayAddress dst, Register src) {
5490  movq(as_Address(dst), src);
5491}
5492
5493void MacroAssembler::movptr(Register dst, ArrayAddress src) {
5494  movq(dst, as_Address(src));
5495}
5496
5497// src should NEVER be a real pointer. Use AddressLiteral for true pointers
5498void MacroAssembler::movptr(Address dst, intptr_t src) {
5499  mov64(rscratch1, src);
5500  movq(dst, rscratch1);
5501}
5502
5503// These are mostly for initializing NULL
5504void MacroAssembler::movptr(Address dst, int32_t src) {
5505  movslq(dst, src);
5506}
5507
5508void MacroAssembler::movptr(Register dst, int32_t src) {
5509  mov64(dst, (intptr_t)src);
5510}
5511
5512void MacroAssembler::pushoop(jobject obj) {
5513  movoop(rscratch1, obj);
5514  push(rscratch1);
5515}
5516
5517void MacroAssembler::pushptr(AddressLiteral src) {
5518  lea(rscratch1, src);
5519  if (src.is_lval()) {
5520    push(rscratch1);
5521  } else {
5522    pushq(Address(rscratch1, 0));
5523  }
5524}
5525
5526void MacroAssembler::reset_last_Java_frame(bool clear_fp,
5527                                           bool clear_pc) {
5528  // we must set sp to zero to clear frame
5529  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
5530  // must clear fp, so that compiled frames are not confused; it is
5531  // possible that we need it only for debugging
5532  if (clear_fp) {
5533    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
5534  }
5535
5536  if (clear_pc) {
5537    movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
5538  }
5539}
5540
5541void MacroAssembler::set_last_Java_frame(Register last_java_sp,
5542                                         Register last_java_fp,
5543                                         address  last_java_pc) {
5544  // determine last_java_sp register
5545  if (!last_java_sp->is_valid()) {
5546    last_java_sp = rsp;
5547  }
5548
5549  // last_java_fp is optional
5550  if (last_java_fp->is_valid()) {
5551    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
5552           last_java_fp);
5553  }
5554
5555  // last_java_pc is optional
5556  if (last_java_pc != NULL) {
5557    Address java_pc(r15_thread,
5558                    JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
5559    lea(rscratch1, InternalAddress(last_java_pc));
5560    movptr(java_pc, rscratch1);
5561  }
5562
5563  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
5564}
5565
5566static void pass_arg0(MacroAssembler* masm, Register arg) {
5567  if (c_rarg0 != arg ) {
5568    masm->mov(c_rarg0, arg);
5569  }
5570}
5571
5572static void pass_arg1(MacroAssembler* masm, Register arg) {
5573  if (c_rarg1 != arg ) {
5574    masm->mov(c_rarg1, arg);
5575  }
5576}
5577
5578static void pass_arg2(MacroAssembler* masm, Register arg) {
5579  if (c_rarg2 != arg ) {
5580    masm->mov(c_rarg2, arg);
5581  }
5582}
5583
5584static void pass_arg3(MacroAssembler* masm, Register arg) {
5585  if (c_rarg3 != arg ) {
5586    masm->mov(c_rarg3, arg);
5587  }
5588}
5589
5590void MacroAssembler::stop(const char* msg) {
5591  address rip = pc();
5592  pusha(); // get regs on stack
5593  lea(c_rarg0, ExternalAddress((address) msg));
5594  lea(c_rarg1, InternalAddress(rip));
5595  movq(c_rarg2, rsp); // pass pointer to regs array
5596  andq(rsp, -16); // align stack as required by ABI
5597  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
5598  hlt();
5599}
5600
5601void MacroAssembler::warn(const char* msg) {
5602  push(rsp);
5603  andq(rsp, -16);     // align stack as required by push_CPU_state and call
5604
5605  push_CPU_state();   // keeps alignment at 16 bytes
5606  lea(c_rarg0, ExternalAddress((address) msg));
5607  call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
5608  pop_CPU_state();
5609  pop(rsp);
5610}
5611
5612#ifndef PRODUCT
5613extern "C" void findpc(intptr_t x);
5614#endif
5615
5616void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
5617  // In order to get locks to work, we need to fake a in_VM state
5618  if (ShowMessageBoxOnError ) {
5619    JavaThread* thread = JavaThread::current();
5620    JavaThreadState saved_state = thread->thread_state();
5621    thread->set_thread_state(_thread_in_vm);
5622#ifndef PRODUCT
5623    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
5624      ttyLocker ttyl;
5625      BytecodeCounter::print();
5626    }
5627#endif
5628    // To see where a verify_oop failed, get $ebx+40/X for this frame.
5629    // XXX correct this offset for amd64
5630    // This is the value of eip which points to where verify_oop will return.
5631    if (os::message_box(msg, "Execution stopped, print registers?")) {
5632      ttyLocker ttyl;
5633      tty->print_cr("rip = 0x%016lx", pc);
5634#ifndef PRODUCT
5635      tty->cr();
5636      findpc(pc);
5637      tty->cr();
5638#endif
5639      tty->print_cr("rax = 0x%016lx", regs[15]);
5640      tty->print_cr("rbx = 0x%016lx", regs[12]);
5641      tty->print_cr("rcx = 0x%016lx", regs[14]);
5642      tty->print_cr("rdx = 0x%016lx", regs[13]);
5643      tty->print_cr("rdi = 0x%016lx", regs[8]);
5644      tty->print_cr("rsi = 0x%016lx", regs[9]);
5645      tty->print_cr("rbp = 0x%016lx", regs[10]);
5646      tty->print_cr("rsp = 0x%016lx", regs[11]);
5647      tty->print_cr("r8  = 0x%016lx", regs[7]);
5648      tty->print_cr("r9  = 0x%016lx", regs[6]);
5649      tty->print_cr("r10 = 0x%016lx", regs[5]);
5650      tty->print_cr("r11 = 0x%016lx", regs[4]);
5651      tty->print_cr("r12 = 0x%016lx", regs[3]);
5652      tty->print_cr("r13 = 0x%016lx", regs[2]);
5653      tty->print_cr("r14 = 0x%016lx", regs[1]);
5654      tty->print_cr("r15 = 0x%016lx", regs[0]);
5655      BREAKPOINT;
5656    }
5657    ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5658  } else {
5659    ttyLocker ttyl;
5660    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
5661                    msg);
5662    assert(false, err_msg("DEBUG MESSAGE: %s", msg));
5663  }
5664}
5665
5666#endif // _LP64
5667
5668// Now versions that are common to 32/64 bit
5669
5670void MacroAssembler::addptr(Register dst, int32_t imm32) {
5671  LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
5672}
5673
5674void MacroAssembler::addptr(Register dst, Register src) {
5675  LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5676}
5677
5678void MacroAssembler::addptr(Address dst, Register src) {
5679  LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5680}
5681
5682void MacroAssembler::align(int modulus) {
5683  if (offset() % modulus != 0) {
5684    nop(modulus - (offset() % modulus));
5685  }
5686}
5687
5688void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
5689  if (reachable(src)) {
5690    andpd(dst, as_Address(src));
5691  } else {
5692    lea(rscratch1, src);
5693    andpd(dst, Address(rscratch1, 0));
5694  }
5695}
5696
5697void MacroAssembler::andptr(Register dst, int32_t imm32) {
5698  LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
5699}
5700
5701void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
5702  pushf();
5703  if (os::is_MP())
5704    lock();
5705  incrementl(counter_addr);
5706  popf();
5707}
5708
5709// Writes to stack successive pages until offset reached to check for
5710// stack overflow + shadow pages.  This clobbers tmp.
5711void MacroAssembler::bang_stack_size(Register size, Register tmp) {
5712  movptr(tmp, rsp);
5713  // Bang stack for total size given plus shadow page size.
5714  // Bang one page at a time because large size can bang beyond yellow and
5715  // red zones.
5716  Label loop;
5717  bind(loop);
5718  movl(Address(tmp, (-os::vm_page_size())), size );
5719  subptr(tmp, os::vm_page_size());
5720  subl(size, os::vm_page_size());
5721  jcc(Assembler::greater, loop);
5722
5723  // Bang down shadow pages too.
5724  // The -1 because we already subtracted 1 page.
5725  for (int i = 0; i< StackShadowPages-1; i++) {
5726    // this could be any sized move but this is can be a debugging crumb
5727    // so the bigger the better.
5728    movptr(Address(tmp, (-i*os::vm_page_size())), size );
5729  }
5730}
5731
5732void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
5733  assert(UseBiasedLocking, "why call this otherwise?");
5734
5735  // Check for biased locking unlock case, which is a no-op
5736  // Note: we do not have to check the thread ID for two reasons.
5737  // First, the interpreter checks for IllegalMonitorStateException at
5738  // a higher level. Second, if the bias was revoked while we held the
5739  // lock, the object could not be rebiased toward another thread, so
5740  // the bias bit would be clear.
5741  movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
5742  andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
5743  cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
5744  jcc(Assembler::equal, done);
5745}
5746
5747void MacroAssembler::c2bool(Register x) {
5748  // implements x == 0 ? 0 : 1
5749  // note: must only look at least-significant byte of x
5750  //       since C-style booleans are stored in one byte
5751  //       only! (was bug)
5752  andl(x, 0xFF);
5753  setb(Assembler::notZero, x);
5754}
5755
5756// Wouldn't need if AddressLiteral version had new name
5757void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
5758  Assembler::call(L, rtype);
5759}
5760
5761void MacroAssembler::call(Register entry) {
5762  Assembler::call(entry);
5763}
5764
5765void MacroAssembler::call(AddressLiteral entry) {
5766  if (reachable(entry)) {
5767    Assembler::call_literal(entry.target(), entry.rspec());
5768  } else {
5769    lea(rscratch1, entry);
5770    Assembler::call(rscratch1);
5771  }
5772}
5773
5774// Implementation of call_VM versions
5775
5776void MacroAssembler::call_VM(Register oop_result,
5777                             address entry_point,
5778                             bool check_exceptions) {
5779  Label C, E;
5780  call(C, relocInfo::none);
5781  jmp(E);
5782
5783  bind(C);
5784  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
5785  ret(0);
5786
5787  bind(E);
5788}
5789
5790void MacroAssembler::call_VM(Register oop_result,
5791                             address entry_point,
5792                             Register arg_1,
5793                             bool check_exceptions) {
5794  Label C, E;
5795  call(C, relocInfo::none);
5796  jmp(E);
5797
5798  bind(C);
5799  pass_arg1(this, arg_1);
5800  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
5801  ret(0);
5802
5803  bind(E);
5804}
5805
5806void MacroAssembler::call_VM(Register oop_result,
5807                             address entry_point,
5808                             Register arg_1,
5809                             Register arg_2,
5810                             bool check_exceptions) {
5811  Label C, E;
5812  call(C, relocInfo::none);
5813  jmp(E);
5814
5815  bind(C);
5816
5817  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5818
5819  pass_arg2(this, arg_2);
5820  pass_arg1(this, arg_1);
5821  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
5822  ret(0);
5823
5824  bind(E);
5825}
5826
5827void MacroAssembler::call_VM(Register oop_result,
5828                             address entry_point,
5829                             Register arg_1,
5830                             Register arg_2,
5831                             Register arg_3,
5832                             bool check_exceptions) {
5833  Label C, E;
5834  call(C, relocInfo::none);
5835  jmp(E);
5836
5837  bind(C);
5838
5839  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5840  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5841  pass_arg3(this, arg_3);
5842
5843  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5844  pass_arg2(this, arg_2);
5845
5846  pass_arg1(this, arg_1);
5847  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
5848  ret(0);
5849
5850  bind(E);
5851}
5852
5853void MacroAssembler::call_VM(Register oop_result,
5854                             Register last_java_sp,
5855                             address entry_point,
5856                             int number_of_arguments,
5857                             bool check_exceptions) {
5858  Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
5859  call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
5860}
5861
5862void MacroAssembler::call_VM(Register oop_result,
5863                             Register last_java_sp,
5864                             address entry_point,
5865                             Register arg_1,
5866                             bool check_exceptions) {
5867  pass_arg1(this, arg_1);
5868  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
5869}
5870
5871void MacroAssembler::call_VM(Register oop_result,
5872                             Register last_java_sp,
5873                             address entry_point,
5874                             Register arg_1,
5875                             Register arg_2,
5876                             bool check_exceptions) {
5877
5878  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5879  pass_arg2(this, arg_2);
5880  pass_arg1(this, arg_1);
5881  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
5882}
5883
5884void MacroAssembler::call_VM(Register oop_result,
5885                             Register last_java_sp,
5886                             address entry_point,
5887                             Register arg_1,
5888                             Register arg_2,
5889                             Register arg_3,
5890                             bool check_exceptions) {
5891  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5892  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5893  pass_arg3(this, arg_3);
5894  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5895  pass_arg2(this, arg_2);
5896  pass_arg1(this, arg_1);
5897  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
5898}
5899
5900void MacroAssembler::super_call_VM(Register oop_result,
5901                                   Register last_java_sp,
5902                                   address entry_point,
5903                                   int number_of_arguments,
5904                                   bool check_exceptions) {
5905  Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
5906  MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
5907}
5908
5909void MacroAssembler::super_call_VM(Register oop_result,
5910                                   Register last_java_sp,
5911                                   address entry_point,
5912                                   Register arg_1,
5913                                   bool check_exceptions) {
5914  pass_arg1(this, arg_1);
5915  super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
5916}
5917
5918void MacroAssembler::super_call_VM(Register oop_result,
5919                                   Register last_java_sp,
5920                                   address entry_point,
5921                                   Register arg_1,
5922                                   Register arg_2,
5923                                   bool check_exceptions) {
5924
5925  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5926  pass_arg2(this, arg_2);
5927  pass_arg1(this, arg_1);
5928  super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
5929}
5930
5931void MacroAssembler::super_call_VM(Register oop_result,
5932                                   Register last_java_sp,
5933                                   address entry_point,
5934                                   Register arg_1,
5935                                   Register arg_2,
5936                                   Register arg_3,
5937                                   bool check_exceptions) {
5938  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5939  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5940  pass_arg3(this, arg_3);
5941  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5942  pass_arg2(this, arg_2);
5943  pass_arg1(this, arg_1);
5944  super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
5945}
5946
5947void MacroAssembler::call_VM_base(Register oop_result,
5948                                  Register java_thread,
5949                                  Register last_java_sp,
5950                                  address  entry_point,
5951                                  int      number_of_arguments,
5952                                  bool     check_exceptions) {
5953  // determine java_thread register
5954  if (!java_thread->is_valid()) {
5955#ifdef _LP64
5956    java_thread = r15_thread;
5957#else
5958    java_thread = rdi;
5959    get_thread(java_thread);
5960#endif // LP64
5961  }
5962  // determine last_java_sp register
5963  if (!last_java_sp->is_valid()) {
5964    last_java_sp = rsp;
5965  }
5966  // debugging support
5967  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
5968  LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
5969#ifdef ASSERT
5970  LP64_ONLY(if (UseCompressedOops) verify_heapbase("call_VM_base");)
5971#endif // ASSERT
5972
5973  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
5974  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
5975
5976  // push java thread (becomes first argument of C function)
5977
5978  NOT_LP64(push(java_thread); number_of_arguments++);
5979  LP64_ONLY(mov(c_rarg0, r15_thread));
5980
5981  // set last Java frame before call
5982  assert(last_java_sp != rbp, "can't use ebp/rbp");
5983
5984  // Only interpreter should have to set fp
5985  set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
5986
5987  // do the call, remove parameters
5988  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
5989
5990  // restore the thread (cannot use the pushed argument since arguments
5991  // may be overwritten by C code generated by an optimizing compiler);
5992  // however can use the register value directly if it is callee saved.
5993  if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
5994    // rdi & rsi (also r15) are callee saved -> nothing to do
5995#ifdef ASSERT
5996    guarantee(java_thread != rax, "change this code");
5997    push(rax);
5998    { Label L;
5999      get_thread(rax);
6000      cmpptr(java_thread, rax);
6001      jcc(Assembler::equal, L);
6002      stop("MacroAssembler::call_VM_base: rdi not callee saved?");
6003      bind(L);
6004    }
6005    pop(rax);
6006#endif
6007  } else {
6008    get_thread(java_thread);
6009  }
6010  // reset last Java frame
6011  // Only interpreter should have to clear fp
6012  reset_last_Java_frame(java_thread, true, false);
6013
6014#ifndef CC_INTERP
6015   // C++ interp handles this in the interpreter
6016  check_and_handle_popframe(java_thread);
6017  check_and_handle_earlyret(java_thread);
6018#endif /* CC_INTERP */
6019
6020  if (check_exceptions) {
6021    // check for pending exceptions (java_thread is set upon return)
6022    cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
6023#ifndef _LP64
6024    jump_cc(Assembler::notEqual,
6025            RuntimeAddress(StubRoutines::forward_exception_entry()));
6026#else
6027    // This used to conditionally jump to forward_exception however it is
6028    // possible if we relocate that the branch will not reach. So we must jump
6029    // around so we can always reach
6030
6031    Label ok;
6032    jcc(Assembler::equal, ok);
6033    jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
6034    bind(ok);
6035#endif // LP64
6036  }
6037
6038  // get oop result if there is one and reset the value in the thread
6039  if (oop_result->is_valid()) {
6040    movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
6041    movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
6042    verify_oop(oop_result, "broken oop in call_VM_base");
6043  }
6044}
6045
6046void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
6047
6048  // Calculate the value for last_Java_sp
6049  // somewhat subtle. call_VM does an intermediate call
6050  // which places a return address on the stack just under the
6051  // stack pointer as the user finsihed with it. This allows
6052  // use to retrieve last_Java_pc from last_Java_sp[-1].
6053  // On 32bit we then have to push additional args on the stack to accomplish
6054  // the actual requested call. On 64bit call_VM only can use register args
6055  // so the only extra space is the return address that call_VM created.
6056  // This hopefully explains the calculations here.
6057
6058#ifdef _LP64
6059  // We've pushed one address, correct last_Java_sp
6060  lea(rax, Address(rsp, wordSize));
6061#else
6062  lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
6063#endif // LP64
6064
6065  call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
6066
6067}
6068
6069void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
6070  call_VM_leaf_base(entry_point, number_of_arguments);
6071}
6072
6073void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
6074  pass_arg0(this, arg_0);
6075  call_VM_leaf(entry_point, 1);
6076}
6077
6078void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
6079
6080  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
6081  pass_arg1(this, arg_1);
6082  pass_arg0(this, arg_0);
6083  call_VM_leaf(entry_point, 2);
6084}
6085
6086void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
6087  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
6088  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
6089  pass_arg2(this, arg_2);
6090  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
6091  pass_arg1(this, arg_1);
6092  pass_arg0(this, arg_0);
6093  call_VM_leaf(entry_point, 3);
6094}
6095
6096void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
6097  pass_arg0(this, arg_0);
6098  MacroAssembler::call_VM_leaf_base(entry_point, 1);
6099}
6100
6101void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
6102
6103  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
6104  pass_arg1(this, arg_1);
6105  pass_arg0(this, arg_0);
6106  MacroAssembler::call_VM_leaf_base(entry_point, 2);
6107}
6108
6109void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
6110  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
6111  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
6112  pass_arg2(this, arg_2);
6113  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
6114  pass_arg1(this, arg_1);
6115  pass_arg0(this, arg_0);
6116  MacroAssembler::call_VM_leaf_base(entry_point, 3);
6117}
6118
6119void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
6120  LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"));
6121  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
6122  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
6123  pass_arg3(this, arg_3);
6124  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
6125  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
6126  pass_arg2(this, arg_2);
6127  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
6128  pass_arg1(this, arg_1);
6129  pass_arg0(this, arg_0);
6130  MacroAssembler::call_VM_leaf_base(entry_point, 4);
6131}
6132
6133void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
6134}
6135
6136void MacroAssembler::check_and_handle_popframe(Register java_thread) {
6137}
6138
6139void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
6140  if (reachable(src1)) {
6141    cmpl(as_Address(src1), imm);
6142  } else {
6143    lea(rscratch1, src1);
6144    cmpl(Address(rscratch1, 0), imm);
6145  }
6146}
6147
6148void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
6149  assert(!src2.is_lval(), "use cmpptr");
6150  if (reachable(src2)) {
6151    cmpl(src1, as_Address(src2));
6152  } else {
6153    lea(rscratch1, src2);
6154    cmpl(src1, Address(rscratch1, 0));
6155  }
6156}
6157
6158void MacroAssembler::cmp32(Register src1, int32_t imm) {
6159  Assembler::cmpl(src1, imm);
6160}
6161
6162void MacroAssembler::cmp32(Register src1, Address src2) {
6163  Assembler::cmpl(src1, src2);
6164}
6165
6166void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
6167  ucomisd(opr1, opr2);
6168
6169  Label L;
6170  if (unordered_is_less) {
6171    movl(dst, -1);
6172    jcc(Assembler::parity, L);
6173    jcc(Assembler::below , L);
6174    movl(dst, 0);
6175    jcc(Assembler::equal , L);
6176    increment(dst);
6177  } else { // unordered is greater
6178    movl(dst, 1);
6179    jcc(Assembler::parity, L);
6180    jcc(Assembler::above , L);
6181    movl(dst, 0);
6182    jcc(Assembler::equal , L);
6183    decrementl(dst);
6184  }
6185  bind(L);
6186}
6187
6188void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
6189  ucomiss(opr1, opr2);
6190
6191  Label L;
6192  if (unordered_is_less) {
6193    movl(dst, -1);
6194    jcc(Assembler::parity, L);
6195    jcc(Assembler::below , L);
6196    movl(dst, 0);
6197    jcc(Assembler::equal , L);
6198    increment(dst);
6199  } else { // unordered is greater
6200    movl(dst, 1);
6201    jcc(Assembler::parity, L);
6202    jcc(Assembler::above , L);
6203    movl(dst, 0);
6204    jcc(Assembler::equal , L);
6205    decrementl(dst);
6206  }
6207  bind(L);
6208}
6209
6210
6211void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
6212  if (reachable(src1)) {
6213    cmpb(as_Address(src1), imm);
6214  } else {
6215    lea(rscratch1, src1);
6216    cmpb(Address(rscratch1, 0), imm);
6217  }
6218}
6219
6220void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
6221#ifdef _LP64
6222  if (src2.is_lval()) {
6223    movptr(rscratch1, src2);
6224    Assembler::cmpq(src1, rscratch1);
6225  } else if (reachable(src2)) {
6226    cmpq(src1, as_Address(src2));
6227  } else {
6228    lea(rscratch1, src2);
6229    Assembler::cmpq(src1, Address(rscratch1, 0));
6230  }
6231#else
6232  if (src2.is_lval()) {
6233    cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
6234  } else {
6235    cmpl(src1, as_Address(src2));
6236  }
6237#endif // _LP64
6238}
6239
6240void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
6241  assert(src2.is_lval(), "not a mem-mem compare");
6242#ifdef _LP64
6243  // moves src2's literal address
6244  movptr(rscratch1, src2);
6245  Assembler::cmpq(src1, rscratch1);
6246#else
6247  cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
6248#endif // _LP64
6249}
6250
6251void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
6252  if (reachable(adr)) {
6253    if (os::is_MP())
6254      lock();
6255    cmpxchgptr(reg, as_Address(adr));
6256  } else {
6257    lea(rscratch1, adr);
6258    if (os::is_MP())
6259      lock();
6260    cmpxchgptr(reg, Address(rscratch1, 0));
6261  }
6262}
6263
6264void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
6265  LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
6266}
6267
6268void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
6269  if (reachable(src)) {
6270    comisd(dst, as_Address(src));
6271  } else {
6272    lea(rscratch1, src);
6273    comisd(dst, Address(rscratch1, 0));
6274  }
6275}
6276
6277void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
6278  if (reachable(src)) {
6279    comiss(dst, as_Address(src));
6280  } else {
6281    lea(rscratch1, src);
6282    comiss(dst, Address(rscratch1, 0));
6283  }
6284}
6285
6286
6287void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
6288  Condition negated_cond = negate_condition(cond);
6289  Label L;
6290  jcc(negated_cond, L);
6291  atomic_incl(counter_addr);
6292  bind(L);
6293}
6294
6295int MacroAssembler::corrected_idivl(Register reg) {
6296  // Full implementation of Java idiv and irem; checks for
6297  // special case as described in JVM spec., p.243 & p.271.
6298  // The function returns the (pc) offset of the idivl
6299  // instruction - may be needed for implicit exceptions.
6300  //
6301  //         normal case                           special case
6302  //
6303  // input : rax,: dividend                         min_int
6304  //         reg: divisor   (may not be rax,/rdx)   -1
6305  //
6306  // output: rax,: quotient  (= rax, idiv reg)       min_int
6307  //         rdx: remainder (= rax, irem reg)       0
6308  assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
6309  const int min_int = 0x80000000;
6310  Label normal_case, special_case;
6311
6312  // check for special case
6313  cmpl(rax, min_int);
6314  jcc(Assembler::notEqual, normal_case);
6315  xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
6316  cmpl(reg, -1);
6317  jcc(Assembler::equal, special_case);
6318
6319  // handle normal case
6320  bind(normal_case);
6321  cdql();
6322  int idivl_offset = offset();
6323  idivl(reg);
6324
6325  // normal and special case exit
6326  bind(special_case);
6327
6328  return idivl_offset;
6329}
6330
6331
6332
6333void MacroAssembler::decrementl(Register reg, int value) {
6334  if (value == min_jint) {subl(reg, value) ; return; }
6335  if (value <  0) { incrementl(reg, -value); return; }
6336  if (value == 0) {                        ; return; }
6337  if (value == 1 && UseIncDec) { decl(reg) ; return; }
6338  /* else */      { subl(reg, value)       ; return; }
6339}
6340
6341void MacroAssembler::decrementl(Address dst, int value) {
6342  if (value == min_jint) {subl(dst, value) ; return; }
6343  if (value <  0) { incrementl(dst, -value); return; }
6344  if (value == 0) {                        ; return; }
6345  if (value == 1 && UseIncDec) { decl(dst) ; return; }
6346  /* else */      { subl(dst, value)       ; return; }
6347}
6348
6349void MacroAssembler::division_with_shift (Register reg, int shift_value) {
6350  assert (shift_value > 0, "illegal shift value");
6351  Label _is_positive;
6352  testl (reg, reg);
6353  jcc (Assembler::positive, _is_positive);
6354  int offset = (1 << shift_value) - 1 ;
6355
6356  if (offset == 1) {
6357    incrementl(reg);
6358  } else {
6359    addl(reg, offset);
6360  }
6361
6362  bind (_is_positive);
6363  sarl(reg, shift_value);
6364}
6365
6366// !defined(COMPILER2) is because of stupid core builds
6367#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
6368void MacroAssembler::empty_FPU_stack() {
6369  if (VM_Version::supports_mmx()) {
6370    emms();
6371  } else {
6372    for (int i = 8; i-- > 0; ) ffree(i);
6373  }
6374}
6375#endif // !LP64 || C1 || !C2
6376
6377
6378// Defines obj, preserves var_size_in_bytes
6379void MacroAssembler::eden_allocate(Register obj,
6380                                   Register var_size_in_bytes,
6381                                   int con_size_in_bytes,
6382                                   Register t1,
6383                                   Label& slow_case) {
6384  assert(obj == rax, "obj must be in rax, for cmpxchg");
6385  assert_different_registers(obj, var_size_in_bytes, t1);
6386  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
6387    jmp(slow_case);
6388  } else {
6389    Register end = t1;
6390    Label retry;
6391    bind(retry);
6392    ExternalAddress heap_top((address) Universe::heap()->top_addr());
6393    movptr(obj, heap_top);
6394    if (var_size_in_bytes == noreg) {
6395      lea(end, Address(obj, con_size_in_bytes));
6396    } else {
6397      lea(end, Address(obj, var_size_in_bytes, Address::times_1));
6398    }
6399    // if end < obj then we wrapped around => object too long => slow case
6400    cmpptr(end, obj);
6401    jcc(Assembler::below, slow_case);
6402    cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
6403    jcc(Assembler::above, slow_case);
6404    // Compare obj with the top addr, and if still equal, store the new top addr in
6405    // end at the address of the top addr pointer. Sets ZF if was equal, and clears
6406    // it otherwise. Use lock prefix for atomicity on MPs.
6407    locked_cmpxchgptr(end, heap_top);
6408    jcc(Assembler::notEqual, retry);
6409  }
6410}
6411
6412void MacroAssembler::enter() {
6413  push(rbp);
6414  mov(rbp, rsp);
6415}
6416
6417void MacroAssembler::fcmp(Register tmp) {
6418  fcmp(tmp, 1, true, true);
6419}
6420
6421void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
6422  assert(!pop_right || pop_left, "usage error");
6423  if (VM_Version::supports_cmov()) {
6424    assert(tmp == noreg, "unneeded temp");
6425    if (pop_left) {
6426      fucomip(index);
6427    } else {
6428      fucomi(index);
6429    }
6430    if (pop_right) {
6431      fpop();
6432    }
6433  } else {
6434    assert(tmp != noreg, "need temp");
6435    if (pop_left) {
6436      if (pop_right) {
6437        fcompp();
6438      } else {
6439        fcomp(index);
6440      }
6441    } else {
6442      fcom(index);
6443    }
6444    // convert FPU condition into eflags condition via rax,
6445    save_rax(tmp);
6446    fwait(); fnstsw_ax();
6447    sahf();
6448    restore_rax(tmp);
6449  }
6450  // condition codes set as follows:
6451  //
6452  // CF (corresponds to C0) if x < y
6453  // PF (corresponds to C2) if unordered
6454  // ZF (corresponds to C3) if x = y
6455}
6456
6457void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
6458  fcmp2int(dst, unordered_is_less, 1, true, true);
6459}
6460
6461void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
6462  fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
6463  Label L;
6464  if (unordered_is_less) {
6465    movl(dst, -1);
6466    jcc(Assembler::parity, L);
6467    jcc(Assembler::below , L);
6468    movl(dst, 0);
6469    jcc(Assembler::equal , L);
6470    increment(dst);
6471  } else { // unordered is greater
6472    movl(dst, 1);
6473    jcc(Assembler::parity, L);
6474    jcc(Assembler::above , L);
6475    movl(dst, 0);
6476    jcc(Assembler::equal , L);
6477    decrementl(dst);
6478  }
6479  bind(L);
6480}
6481
6482void MacroAssembler::fld_d(AddressLiteral src) {
6483  fld_d(as_Address(src));
6484}
6485
6486void MacroAssembler::fld_s(AddressLiteral src) {
6487  fld_s(as_Address(src));
6488}
6489
6490void MacroAssembler::fld_x(AddressLiteral src) {
6491  Assembler::fld_x(as_Address(src));
6492}
6493
6494void MacroAssembler::fldcw(AddressLiteral src) {
6495  Assembler::fldcw(as_Address(src));
6496}
6497
6498void MacroAssembler::fpop() {
6499  ffree();
6500  fincstp();
6501}
6502
6503void MacroAssembler::fremr(Register tmp) {
6504  save_rax(tmp);
6505  { Label L;
6506    bind(L);
6507    fprem();
6508    fwait(); fnstsw_ax();
6509#ifdef _LP64
6510    testl(rax, 0x400);
6511    jcc(Assembler::notEqual, L);
6512#else
6513    sahf();
6514    jcc(Assembler::parity, L);
6515#endif // _LP64
6516  }
6517  restore_rax(tmp);
6518  // Result is in ST0.
6519  // Note: fxch & fpop to get rid of ST1
6520  // (otherwise FPU stack could overflow eventually)
6521  fxch(1);
6522  fpop();
6523}
6524
6525
6526void MacroAssembler::incrementl(AddressLiteral dst) {
6527  if (reachable(dst)) {
6528    incrementl(as_Address(dst));
6529  } else {
6530    lea(rscratch1, dst);
6531    incrementl(Address(rscratch1, 0));
6532  }
6533}
6534
6535void MacroAssembler::incrementl(ArrayAddress dst) {
6536  incrementl(as_Address(dst));
6537}
6538
6539void MacroAssembler::incrementl(Register reg, int value) {
6540  if (value == min_jint) {addl(reg, value) ; return; }
6541  if (value <  0) { decrementl(reg, -value); return; }
6542  if (value == 0) {                        ; return; }
6543  if (value == 1 && UseIncDec) { incl(reg) ; return; }
6544  /* else */      { addl(reg, value)       ; return; }
6545}
6546
6547void MacroAssembler::incrementl(Address dst, int value) {
6548  if (value == min_jint) {addl(dst, value) ; return; }
6549  if (value <  0) { decrementl(dst, -value); return; }
6550  if (value == 0) {                        ; return; }
6551  if (value == 1 && UseIncDec) { incl(dst) ; return; }
6552  /* else */      { addl(dst, value)       ; return; }
6553}
6554
6555void MacroAssembler::jump(AddressLiteral dst) {
6556  if (reachable(dst)) {
6557    jmp_literal(dst.target(), dst.rspec());
6558  } else {
6559    lea(rscratch1, dst);
6560    jmp(rscratch1);
6561  }
6562}
6563
6564void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
6565  if (reachable(dst)) {
6566    InstructionMark im(this);
6567    relocate(dst.reloc());
6568    const int short_size = 2;
6569    const int long_size = 6;
6570    int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
6571    if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
6572      // 0111 tttn #8-bit disp
6573      emit_byte(0x70 | cc);
6574      emit_byte((offs - short_size) & 0xFF);
6575    } else {
6576      // 0000 1111 1000 tttn #32-bit disp
6577      emit_byte(0x0F);
6578      emit_byte(0x80 | cc);
6579      emit_long(offs - long_size);
6580    }
6581  } else {
6582#ifdef ASSERT
6583    warning("reversing conditional branch");
6584#endif /* ASSERT */
6585    Label skip;
6586    jccb(reverse[cc], skip);
6587    lea(rscratch1, dst);
6588    Assembler::jmp(rscratch1);
6589    bind(skip);
6590  }
6591}
6592
6593void MacroAssembler::ldmxcsr(AddressLiteral src) {
6594  if (reachable(src)) {
6595    Assembler::ldmxcsr(as_Address(src));
6596  } else {
6597    lea(rscratch1, src);
6598    Assembler::ldmxcsr(Address(rscratch1, 0));
6599  }
6600}
6601
6602int MacroAssembler::load_signed_byte(Register dst, Address src) {
6603  int off;
6604  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6605    off = offset();
6606    movsbl(dst, src); // movsxb
6607  } else {
6608    off = load_unsigned_byte(dst, src);
6609    shll(dst, 24);
6610    sarl(dst, 24);
6611  }
6612  return off;
6613}
6614
6615// Note: load_signed_short used to be called load_signed_word.
6616// Although the 'w' in x86 opcodes refers to the term "word" in the assembler
6617// manual, which means 16 bits, that usage is found nowhere in HotSpot code.
6618// The term "word" in HotSpot means a 32- or 64-bit machine word.
6619int MacroAssembler::load_signed_short(Register dst, Address src) {
6620  int off;
6621  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6622    // This is dubious to me since it seems safe to do a signed 16 => 64 bit
6623    // version but this is what 64bit has always done. This seems to imply
6624    // that users are only using 32bits worth.
6625    off = offset();
6626    movswl(dst, src); // movsxw
6627  } else {
6628    off = load_unsigned_short(dst, src);
6629    shll(dst, 16);
6630    sarl(dst, 16);
6631  }
6632  return off;
6633}
6634
6635int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
6636  // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6637  // and "3.9 Partial Register Penalties", p. 22).
6638  int off;
6639  if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
6640    off = offset();
6641    movzbl(dst, src); // movzxb
6642  } else {
6643    xorl(dst, dst);
6644    off = offset();
6645    movb(dst, src);
6646  }
6647  return off;
6648}
6649
6650// Note: load_unsigned_short used to be called load_unsigned_word.
6651int MacroAssembler::load_unsigned_short(Register dst, Address src) {
6652  // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6653  // and "3.9 Partial Register Penalties", p. 22).
6654  int off;
6655  if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
6656    off = offset();
6657    movzwl(dst, src); // movzxw
6658  } else {
6659    xorl(dst, dst);
6660    off = offset();
6661    movw(dst, src);
6662  }
6663  return off;
6664}
6665
6666void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
6667  switch (size_in_bytes) {
6668#ifndef _LP64
6669  case  8:
6670    assert(dst2 != noreg, "second dest register required");
6671    movl(dst,  src);
6672    movl(dst2, src.plus_disp(BytesPerInt));
6673    break;
6674#else
6675  case  8:  movq(dst, src); break;
6676#endif
6677  case  4:  movl(dst, src); break;
6678  case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
6679  case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
6680  default:  ShouldNotReachHere();
6681  }
6682}
6683
6684void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
6685  switch (size_in_bytes) {
6686#ifndef _LP64
6687  case  8:
6688    assert(src2 != noreg, "second source register required");
6689    movl(dst,                        src);
6690    movl(dst.plus_disp(BytesPerInt), src2);
6691    break;
6692#else
6693  case  8:  movq(dst, src); break;
6694#endif
6695  case  4:  movl(dst, src); break;
6696  case  2:  movw(dst, src); break;
6697  case  1:  movb(dst, src); break;
6698  default:  ShouldNotReachHere();
6699  }
6700}
6701
6702void MacroAssembler::mov32(AddressLiteral dst, Register src) {
6703  if (reachable(dst)) {
6704    movl(as_Address(dst), src);
6705  } else {
6706    lea(rscratch1, dst);
6707    movl(Address(rscratch1, 0), src);
6708  }
6709}
6710
6711void MacroAssembler::mov32(Register dst, AddressLiteral src) {
6712  if (reachable(src)) {
6713    movl(dst, as_Address(src));
6714  } else {
6715    lea(rscratch1, src);
6716    movl(dst, Address(rscratch1, 0));
6717  }
6718}
6719
6720// C++ bool manipulation
6721
6722void MacroAssembler::movbool(Register dst, Address src) {
6723  if(sizeof(bool) == 1)
6724    movb(dst, src);
6725  else if(sizeof(bool) == 2)
6726    movw(dst, src);
6727  else if(sizeof(bool) == 4)
6728    movl(dst, src);
6729  else
6730    // unsupported
6731    ShouldNotReachHere();
6732}
6733
6734void MacroAssembler::movbool(Address dst, bool boolconst) {
6735  if(sizeof(bool) == 1)
6736    movb(dst, (int) boolconst);
6737  else if(sizeof(bool) == 2)
6738    movw(dst, (int) boolconst);
6739  else if(sizeof(bool) == 4)
6740    movl(dst, (int) boolconst);
6741  else
6742    // unsupported
6743    ShouldNotReachHere();
6744}
6745
6746void MacroAssembler::movbool(Address dst, Register src) {
6747  if(sizeof(bool) == 1)
6748    movb(dst, src);
6749  else if(sizeof(bool) == 2)
6750    movw(dst, src);
6751  else if(sizeof(bool) == 4)
6752    movl(dst, src);
6753  else
6754    // unsupported
6755    ShouldNotReachHere();
6756}
6757
6758void MacroAssembler::movbyte(ArrayAddress dst, int src) {
6759  movb(as_Address(dst), src);
6760}
6761
6762void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
6763  if (reachable(src)) {
6764    if (UseXmmLoadAndClearUpper) {
6765      movsd (dst, as_Address(src));
6766    } else {
6767      movlpd(dst, as_Address(src));
6768    }
6769  } else {
6770    lea(rscratch1, src);
6771    if (UseXmmLoadAndClearUpper) {
6772      movsd (dst, Address(rscratch1, 0));
6773    } else {
6774      movlpd(dst, Address(rscratch1, 0));
6775    }
6776  }
6777}
6778
6779void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
6780  if (reachable(src)) {
6781    movss(dst, as_Address(src));
6782  } else {
6783    lea(rscratch1, src);
6784    movss(dst, Address(rscratch1, 0));
6785  }
6786}
6787
6788void MacroAssembler::movptr(Register dst, Register src) {
6789  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6790}
6791
6792void MacroAssembler::movptr(Register dst, Address src) {
6793  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6794}
6795
6796// src should NEVER be a real pointer. Use AddressLiteral for true pointers
6797void MacroAssembler::movptr(Register dst, intptr_t src) {
6798  LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
6799}
6800
6801void MacroAssembler::movptr(Address dst, Register src) {
6802  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6803}
6804
6805void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
6806  if (reachable(src)) {
6807    movss(dst, as_Address(src));
6808  } else {
6809    lea(rscratch1, src);
6810    movss(dst, Address(rscratch1, 0));
6811  }
6812}
6813
6814void MacroAssembler::null_check(Register reg, int offset) {
6815  if (needs_explicit_null_check(offset)) {
6816    // provoke OS NULL exception if reg = NULL by
6817    // accessing M[reg] w/o changing any (non-CC) registers
6818    // NOTE: cmpl is plenty here to provoke a segv
6819    cmpptr(rax, Address(reg, 0));
6820    // Note: should probably use testl(rax, Address(reg, 0));
6821    //       may be shorter code (however, this version of
6822    //       testl needs to be implemented first)
6823  } else {
6824    // nothing to do, (later) access of M[reg + offset]
6825    // will provoke OS NULL exception if reg = NULL
6826  }
6827}
6828
6829void MacroAssembler::os_breakpoint() {
6830  // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
6831  // (e.g., MSVC can't call ps() otherwise)
6832  call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
6833}
6834
6835void MacroAssembler::pop_CPU_state() {
6836  pop_FPU_state();
6837  pop_IU_state();
6838}
6839
6840void MacroAssembler::pop_FPU_state() {
6841  NOT_LP64(frstor(Address(rsp, 0));)
6842  LP64_ONLY(fxrstor(Address(rsp, 0));)
6843  addptr(rsp, FPUStateSizeInWords * wordSize);
6844}
6845
6846void MacroAssembler::pop_IU_state() {
6847  popa();
6848  LP64_ONLY(addq(rsp, 8));
6849  popf();
6850}
6851
6852// Save Integer and Float state
6853// Warning: Stack must be 16 byte aligned (64bit)
6854void MacroAssembler::push_CPU_state() {
6855  push_IU_state();
6856  push_FPU_state();
6857}
6858
6859void MacroAssembler::push_FPU_state() {
6860  subptr(rsp, FPUStateSizeInWords * wordSize);
6861#ifndef _LP64
6862  fnsave(Address(rsp, 0));
6863  fwait();
6864#else
6865  fxsave(Address(rsp, 0));
6866#endif // LP64
6867}
6868
6869void MacroAssembler::push_IU_state() {
6870  // Push flags first because pusha kills them
6871  pushf();
6872  // Make sure rsp stays 16-byte aligned
6873  LP64_ONLY(subq(rsp, 8));
6874  pusha();
6875}
6876
6877void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
6878  // determine java_thread register
6879  if (!java_thread->is_valid()) {
6880    java_thread = rdi;
6881    get_thread(java_thread);
6882  }
6883  // we must set sp to zero to clear frame
6884  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
6885  if (clear_fp) {
6886    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
6887  }
6888
6889  if (clear_pc)
6890    movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
6891
6892}
6893
6894void MacroAssembler::restore_rax(Register tmp) {
6895  if (tmp == noreg) pop(rax);
6896  else if (tmp != rax) mov(rax, tmp);
6897}
6898
6899void MacroAssembler::round_to(Register reg, int modulus) {
6900  addptr(reg, modulus - 1);
6901  andptr(reg, -modulus);
6902}
6903
6904void MacroAssembler::save_rax(Register tmp) {
6905  if (tmp == noreg) push(rax);
6906  else if (tmp != rax) mov(tmp, rax);
6907}
6908
6909// Write serialization page so VM thread can do a pseudo remote membar.
6910// We use the current thread pointer to calculate a thread specific
6911// offset to write to within the page. This minimizes bus traffic
6912// due to cache line collision.
6913void MacroAssembler::serialize_memory(Register thread, Register tmp) {
6914  movl(tmp, thread);
6915  shrl(tmp, os::get_serialize_page_shift_count());
6916  andl(tmp, (os::vm_page_size() - sizeof(int)));
6917
6918  Address index(noreg, tmp, Address::times_1);
6919  ExternalAddress page(os::get_memory_serialize_page());
6920
6921  // Size of store must match masking code above
6922  movl(as_Address(ArrayAddress(page, index)), tmp);
6923}
6924
6925// Calls to C land
6926//
6927// When entering C land, the rbp, & rsp of the last Java frame have to be recorded
6928// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
6929// has to be reset to 0. This is required to allow proper stack traversal.
6930void MacroAssembler::set_last_Java_frame(Register java_thread,
6931                                         Register last_java_sp,
6932                                         Register last_java_fp,
6933                                         address  last_java_pc) {
6934  // determine java_thread register
6935  if (!java_thread->is_valid()) {
6936    java_thread = rdi;
6937    get_thread(java_thread);
6938  }
6939  // determine last_java_sp register
6940  if (!last_java_sp->is_valid()) {
6941    last_java_sp = rsp;
6942  }
6943
6944  // last_java_fp is optional
6945
6946  if (last_java_fp->is_valid()) {
6947    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
6948  }
6949
6950  // last_java_pc is optional
6951
6952  if (last_java_pc != NULL) {
6953    lea(Address(java_thread,
6954                 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
6955        InternalAddress(last_java_pc));
6956
6957  }
6958  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
6959}
6960
6961void MacroAssembler::shlptr(Register dst, int imm8) {
6962  LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
6963}
6964
6965void MacroAssembler::shrptr(Register dst, int imm8) {
6966  LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
6967}
6968
6969void MacroAssembler::sign_extend_byte(Register reg) {
6970  if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
6971    movsbl(reg, reg); // movsxb
6972  } else {
6973    shll(reg, 24);
6974    sarl(reg, 24);
6975  }
6976}
6977
6978void MacroAssembler::sign_extend_short(Register reg) {
6979  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6980    movswl(reg, reg); // movsxw
6981  } else {
6982    shll(reg, 16);
6983    sarl(reg, 16);
6984  }
6985}
6986
6987void MacroAssembler::testl(Register dst, AddressLiteral src) {
6988  assert(reachable(src), "Address should be reachable");
6989  testl(dst, as_Address(src));
6990}
6991
6992//////////////////////////////////////////////////////////////////////////////////
6993#ifndef SERIALGC
6994
6995void MacroAssembler::g1_write_barrier_pre(Register obj,
6996                                          Register pre_val,
6997                                          Register thread,
6998                                          Register tmp,
6999                                          bool tosca_live,
7000                                          bool expand_call) {
7001
7002  // If expand_call is true then we expand the call_VM_leaf macro
7003  // directly to skip generating the check by
7004  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
7005
7006#ifdef _LP64
7007  assert(thread == r15_thread, "must be");
7008#endif // _LP64
7009
7010  Label done;
7011  Label runtime;
7012
7013  assert(pre_val != noreg, "check this code");
7014
7015  if (obj != noreg) {
7016    assert_different_registers(obj, pre_val, tmp);
7017    assert(pre_val != rax, "check this code");
7018  }
7019
7020  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
7021                                       PtrQueue::byte_offset_of_active()));
7022  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
7023                                       PtrQueue::byte_offset_of_index()));
7024  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
7025                                       PtrQueue::byte_offset_of_buf()));
7026
7027
7028  // Is marking active?
7029  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
7030    cmpl(in_progress, 0);
7031  } else {
7032    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
7033    cmpb(in_progress, 0);
7034  }
7035  jcc(Assembler::equal, done);
7036
7037  // Do we need to load the previous value?
7038  if (obj != noreg) {
7039    load_heap_oop(pre_val, Address(obj, 0));
7040  }
7041
7042  // Is the previous value null?
7043  cmpptr(pre_val, (int32_t) NULL_WORD);
7044  jcc(Assembler::equal, done);
7045
7046  // Can we store original value in the thread's buffer?
7047  // Is index == 0?
7048  // (The index field is typed as size_t.)
7049
7050  movptr(tmp, index);                   // tmp := *index_adr
7051  cmpptr(tmp, 0);                       // tmp == 0?
7052  jcc(Assembler::equal, runtime);       // If yes, goto runtime
7053
7054  subptr(tmp, wordSize);                // tmp := tmp - wordSize
7055  movptr(index, tmp);                   // *index_adr := tmp
7056  addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr
7057
7058  // Record the previous value
7059  movptr(Address(tmp, 0), pre_val);
7060  jmp(done);
7061
7062  bind(runtime);
7063  // save the live input values
7064  if(tosca_live) push(rax);
7065
7066  if (obj != noreg && obj != rax)
7067    push(obj);
7068
7069  if (pre_val != rax)
7070    push(pre_val);
7071
7072  // Calling the runtime using the regular call_VM_leaf mechanism generates
7073  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
7074  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
7075  //
7076  // If we care generating the pre-barrier without a frame (e.g. in the
7077  // intrinsified Reference.get() routine) then ebp might be pointing to
7078  // the caller frame and so this check will most likely fail at runtime.
7079  //
7080  // Expanding the call directly bypasses the generation of the check.
7081  // So when we do not have have a full interpreter frame on the stack
7082  // expand_call should be passed true.
7083
7084  NOT_LP64( push(thread); )
7085
7086  if (expand_call) {
7087    LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
7088    pass_arg1(this, thread);
7089    pass_arg0(this, pre_val);
7090    MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
7091  } else {
7092    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
7093  }
7094
7095  NOT_LP64( pop(thread); )
7096
7097  // save the live input values
7098  if (pre_val != rax)
7099    pop(pre_val);
7100
7101  if (obj != noreg && obj != rax)
7102    pop(obj);
7103
7104  if(tosca_live) pop(rax);
7105
7106  bind(done);
7107}
7108
7109void MacroAssembler::g1_write_barrier_post(Register store_addr,
7110                                           Register new_val,
7111                                           Register thread,
7112                                           Register tmp,
7113                                           Register tmp2) {
7114#ifdef _LP64
7115  assert(thread == r15_thread, "must be");
7116#endif // _LP64
7117
7118  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
7119                                       PtrQueue::byte_offset_of_index()));
7120  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
7121                                       PtrQueue::byte_offset_of_buf()));
7122
7123  BarrierSet* bs = Universe::heap()->barrier_set();
7124  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
7125  Label done;
7126  Label runtime;
7127
7128  // Does store cross heap regions?
7129
7130  movptr(tmp, store_addr);
7131  xorptr(tmp, new_val);
7132  shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
7133  jcc(Assembler::equal, done);
7134
7135  // crosses regions, storing NULL?
7136
7137  cmpptr(new_val, (int32_t) NULL_WORD);
7138  jcc(Assembler::equal, done);
7139
7140  // storing region crossing non-NULL, is card already dirty?
7141
7142  ExternalAddress cardtable((address) ct->byte_map_base);
7143  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
7144#ifdef _LP64
7145  const Register card_addr = tmp;
7146
7147  movq(card_addr, store_addr);
7148  shrq(card_addr, CardTableModRefBS::card_shift);
7149
7150  lea(tmp2, cardtable);
7151
7152  // get the address of the card
7153  addq(card_addr, tmp2);
7154#else
7155  const Register card_index = tmp;
7156
7157  movl(card_index, store_addr);
7158  shrl(card_index, CardTableModRefBS::card_shift);
7159
7160  Address index(noreg, card_index, Address::times_1);
7161  const Register card_addr = tmp;
7162  lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
7163#endif
7164  cmpb(Address(card_addr, 0), 0);
7165  jcc(Assembler::equal, done);
7166
7167  // storing a region crossing, non-NULL oop, card is clean.
7168  // dirty card and log.
7169
7170  movb(Address(card_addr, 0), 0);
7171
7172  cmpl(queue_index, 0);
7173  jcc(Assembler::equal, runtime);
7174  subl(queue_index, wordSize);
7175  movptr(tmp2, buffer);
7176#ifdef _LP64
7177  movslq(rscratch1, queue_index);
7178  addq(tmp2, rscratch1);
7179  movq(Address(tmp2, 0), card_addr);
7180#else
7181  addl(tmp2, queue_index);
7182  movl(Address(tmp2, 0), card_index);
7183#endif
7184  jmp(done);
7185
7186  bind(runtime);
7187  // save the live input values
7188  push(store_addr);
7189  push(new_val);
7190#ifdef _LP64
7191  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
7192#else
7193  push(thread);
7194  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
7195  pop(thread);
7196#endif
7197  pop(new_val);
7198  pop(store_addr);
7199
7200  bind(done);
7201}
7202
7203#endif // SERIALGC
7204//////////////////////////////////////////////////////////////////////////////////
7205
7206
7207void MacroAssembler::store_check(Register obj) {
7208  // Does a store check for the oop in register obj. The content of
7209  // register obj is destroyed afterwards.
7210  store_check_part_1(obj);
7211  store_check_part_2(obj);
7212}
7213
7214void MacroAssembler::store_check(Register obj, Address dst) {
7215  store_check(obj);
7216}
7217
7218
7219// split the store check operation so that other instructions can be scheduled inbetween
7220void MacroAssembler::store_check_part_1(Register obj) {
7221  BarrierSet* bs = Universe::heap()->barrier_set();
7222  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
7223  shrptr(obj, CardTableModRefBS::card_shift);
7224}
7225
7226void MacroAssembler::store_check_part_2(Register obj) {
7227  BarrierSet* bs = Universe::heap()->barrier_set();
7228  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
7229  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
7230  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
7231
7232  // The calculation for byte_map_base is as follows:
7233  // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
7234  // So this essentially converts an address to a displacement and
7235  // it will never need to be relocated. On 64bit however the value may be too
7236  // large for a 32bit displacement
7237
7238  intptr_t disp = (intptr_t) ct->byte_map_base;
7239  if (is_simm32(disp)) {
7240    Address cardtable(noreg, obj, Address::times_1, disp);
7241    movb(cardtable, 0);
7242  } else {
7243    // By doing it as an ExternalAddress disp could be converted to a rip-relative
7244    // displacement and done in a single instruction given favorable mapping and
7245    // a smarter version of as_Address. Worst case it is two instructions which
7246    // is no worse off then loading disp into a register and doing as a simple
7247    // Address() as above.
7248    // We can't do as ExternalAddress as the only style since if disp == 0 we'll
7249    // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
7250    // in some cases we'll get a single instruction version.
7251
7252    ExternalAddress cardtable((address)disp);
7253    Address index(noreg, obj, Address::times_1);
7254    movb(as_Address(ArrayAddress(cardtable, index)), 0);
7255  }
7256}
7257
7258void MacroAssembler::subptr(Register dst, int32_t imm32) {
7259  LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
7260}
7261
7262void MacroAssembler::subptr(Register dst, Register src) {
7263  LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
7264}
7265
7266// C++ bool manipulation
7267void MacroAssembler::testbool(Register dst) {
7268  if(sizeof(bool) == 1)
7269    testb(dst, 0xff);
7270  else if(sizeof(bool) == 2) {
7271    // testw implementation needed for two byte bools
7272    ShouldNotReachHere();
7273  } else if(sizeof(bool) == 4)
7274    testl(dst, dst);
7275  else
7276    // unsupported
7277    ShouldNotReachHere();
7278}
7279
7280void MacroAssembler::testptr(Register dst, Register src) {
7281  LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
7282}
7283
7284// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
7285void MacroAssembler::tlab_allocate(Register obj,
7286                                   Register var_size_in_bytes,
7287                                   int con_size_in_bytes,
7288                                   Register t1,
7289                                   Register t2,
7290                                   Label& slow_case) {
7291  assert_different_registers(obj, t1, t2);
7292  assert_different_registers(obj, var_size_in_bytes, t1);
7293  Register end = t2;
7294  Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
7295
7296  verify_tlab();
7297
7298  NOT_LP64(get_thread(thread));
7299
7300  movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
7301  if (var_size_in_bytes == noreg) {
7302    lea(end, Address(obj, con_size_in_bytes));
7303  } else {
7304    lea(end, Address(obj, var_size_in_bytes, Address::times_1));
7305  }
7306  cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
7307  jcc(Assembler::above, slow_case);
7308
7309  // update the tlab top pointer
7310  movptr(Address(thread, JavaThread::tlab_top_offset()), end);
7311
7312  // recover var_size_in_bytes if necessary
7313  if (var_size_in_bytes == end) {
7314    subptr(var_size_in_bytes, obj);
7315  }
7316  verify_tlab();
7317}
7318
7319// Preserves rbx, and rdx.
7320Register MacroAssembler::tlab_refill(Label& retry,
7321                                     Label& try_eden,
7322                                     Label& slow_case) {
7323  Register top = rax;
7324  Register t1  = rcx;
7325  Register t2  = rsi;
7326  Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
7327  assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
7328  Label do_refill, discard_tlab;
7329
7330  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
7331    // No allocation in the shared eden.
7332    jmp(slow_case);
7333  }
7334
7335  NOT_LP64(get_thread(thread_reg));
7336
7337  movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7338  movptr(t1,  Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7339
7340  // calculate amount of free space
7341  subptr(t1, top);
7342  shrptr(t1, LogHeapWordSize);
7343
7344  // Retain tlab and allocate object in shared space if
7345  // the amount free in the tlab is too large to discard.
7346  cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
7347  jcc(Assembler::lessEqual, discard_tlab);
7348
7349  // Retain
7350  // %%% yuck as movptr...
7351  movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
7352  addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
7353  if (TLABStats) {
7354    // increment number of slow_allocations
7355    addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
7356  }
7357  jmp(try_eden);
7358
7359  bind(discard_tlab);
7360  if (TLABStats) {
7361    // increment number of refills
7362    addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
7363    // accumulate wastage -- t1 is amount free in tlab
7364    addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
7365  }
7366
7367  // if tlab is currently allocated (top or end != null) then
7368  // fill [top, end + alignment_reserve) with array object
7369  testptr(top, top);
7370  jcc(Assembler::zero, do_refill);
7371
7372  // set up the mark word
7373  movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
7374  // set the length to the remaining space
7375  subptr(t1, typeArrayOopDesc::header_size(T_INT));
7376  addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
7377  shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
7378  movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
7379  // set klass to intArrayKlass
7380  // dubious reloc why not an oop reloc?
7381  movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr()));
7382  // store klass last.  concurrent gcs assumes klass length is valid if
7383  // klass field is not null.
7384  store_klass(top, t1);
7385
7386  movptr(t1, top);
7387  subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
7388  incr_allocated_bytes(thread_reg, t1, 0);
7389
7390  // refill the tlab with an eden allocation
7391  bind(do_refill);
7392  movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
7393  shlptr(t1, LogHeapWordSize);
7394  // allocate new tlab, address returned in top
7395  eden_allocate(top, t1, 0, t2, slow_case);
7396
7397  // Check that t1 was preserved in eden_allocate.
7398#ifdef ASSERT
7399  if (UseTLAB) {
7400    Label ok;
7401    Register tsize = rsi;
7402    assert_different_registers(tsize, thread_reg, t1);
7403    push(tsize);
7404    movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
7405    shlptr(tsize, LogHeapWordSize);
7406    cmpptr(t1, tsize);
7407    jcc(Assembler::equal, ok);
7408    stop("assert(t1 != tlab size)");
7409    should_not_reach_here();
7410
7411    bind(ok);
7412    pop(tsize);
7413  }
7414#endif
7415  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
7416  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
7417  addptr(top, t1);
7418  subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
7419  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
7420  verify_tlab();
7421  jmp(retry);
7422
7423  return thread_reg; // for use by caller
7424}
7425
7426void MacroAssembler::incr_allocated_bytes(Register thread,
7427                                          Register var_size_in_bytes,
7428                                          int con_size_in_bytes,
7429                                          Register t1) {
7430#ifdef _LP64
7431  if (var_size_in_bytes->is_valid()) {
7432    addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
7433  } else {
7434    addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
7435  }
7436#else
7437  if (!thread->is_valid()) {
7438    assert(t1->is_valid(), "need temp reg");
7439    thread = t1;
7440    get_thread(thread);
7441  }
7442
7443  if (var_size_in_bytes->is_valid()) {
7444    addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
7445  } else {
7446    addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
7447  }
7448  adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
7449#endif
7450}
7451
7452static const double     pi_4 =  0.7853981633974483;
7453
7454void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
7455  // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
7456  // was attempted in this code; unfortunately it appears that the
7457  // switch to 80-bit precision and back causes this to be
7458  // unprofitable compared with simply performing a runtime call if
7459  // the argument is out of the (-pi/4, pi/4) range.
7460
7461  Register tmp = noreg;
7462  if (!VM_Version::supports_cmov()) {
7463    // fcmp needs a temporary so preserve rbx,
7464    tmp = rbx;
7465    push(tmp);
7466  }
7467
7468  Label slow_case, done;
7469
7470  ExternalAddress pi4_adr = (address)&pi_4;
7471  if (reachable(pi4_adr)) {
7472    // x ?<= pi/4
7473    fld_d(pi4_adr);
7474    fld_s(1);                // Stack:  X  PI/4  X
7475    fabs();                  // Stack: |X| PI/4  X
7476    fcmp(tmp);
7477    jcc(Assembler::above, slow_case);
7478
7479    // fastest case: -pi/4 <= x <= pi/4
7480    switch(trig) {
7481    case 's':
7482      fsin();
7483      break;
7484    case 'c':
7485      fcos();
7486      break;
7487    case 't':
7488      ftan();
7489      break;
7490    default:
7491      assert(false, "bad intrinsic");
7492      break;
7493    }
7494    jmp(done);
7495  }
7496
7497  // slow case: runtime call
7498  bind(slow_case);
7499  // Preserve registers across runtime call
7500  pusha();
7501  int incoming_argument_and_return_value_offset = -1;
7502  if (num_fpu_regs_in_use > 1) {
7503    // Must preserve all other FPU regs (could alternatively convert
7504    // SharedRuntime::dsin and dcos into assembly routines known not to trash
7505    // FPU state, but can not trust C compiler)
7506    NEEDS_CLEANUP;
7507    // NOTE that in this case we also push the incoming argument to
7508    // the stack and restore it later; we also use this stack slot to
7509    // hold the return value from dsin or dcos.
7510    for (int i = 0; i < num_fpu_regs_in_use; i++) {
7511      subptr(rsp, sizeof(jdouble));
7512      fstp_d(Address(rsp, 0));
7513    }
7514    incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
7515    fld_d(Address(rsp, incoming_argument_and_return_value_offset));
7516  }
7517  subptr(rsp, sizeof(jdouble));
7518  fstp_d(Address(rsp, 0));
7519#ifdef _LP64
7520  movdbl(xmm0, Address(rsp, 0));
7521#endif // _LP64
7522
7523  // NOTE: we must not use call_VM_leaf here because that requires a
7524  // complete interpreter frame in debug mode -- same bug as 4387334
7525  // MacroAssembler::call_VM_leaf_base is perfectly safe and will
7526  // do proper 64bit abi
7527
7528  NEEDS_CLEANUP;
7529  // Need to add stack banging before this runtime call if it needs to
7530  // be taken; however, there is no generic stack banging routine at
7531  // the MacroAssembler level
7532  switch(trig) {
7533  case 's':
7534    {
7535      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);
7536    }
7537    break;
7538  case 'c':
7539    {
7540      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);
7541    }
7542    break;
7543  case 't':
7544    {
7545      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);
7546    }
7547    break;
7548  default:
7549    assert(false, "bad intrinsic");
7550    break;
7551  }
7552#ifdef _LP64
7553    movsd(Address(rsp, 0), xmm0);
7554    fld_d(Address(rsp, 0));
7555#endif // _LP64
7556  addptr(rsp, sizeof(jdouble));
7557  if (num_fpu_regs_in_use > 1) {
7558    // Must save return value to stack and then restore entire FPU stack
7559    fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
7560    for (int i = 0; i < num_fpu_regs_in_use; i++) {
7561      fld_d(Address(rsp, 0));
7562      addptr(rsp, sizeof(jdouble));
7563    }
7564  }
7565  popa();
7566
7567  // Come here with result in F-TOS
7568  bind(done);
7569
7570  if (tmp != noreg) {
7571    pop(tmp);
7572  }
7573}
7574
7575
7576// Look up the method for a megamorphic invokeinterface call.
7577// The target method is determined by <intf_klass, itable_index>.
7578// The receiver klass is in recv_klass.
7579// On success, the result will be in method_result, and execution falls through.
7580// On failure, execution transfers to the given label.
7581void MacroAssembler::lookup_interface_method(Register recv_klass,
7582                                             Register intf_klass,
7583                                             RegisterOrConstant itable_index,
7584                                             Register method_result,
7585                                             Register scan_temp,
7586                                             Label& L_no_such_interface) {
7587  assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
7588  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
7589         "caller must use same register for non-constant itable index as for method");
7590
7591  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
7592  int vtable_base = instanceKlass::vtable_start_offset() * wordSize;
7593  int itentry_off = itableMethodEntry::method_offset_in_bytes();
7594  int scan_step   = itableOffsetEntry::size() * wordSize;
7595  int vte_size    = vtableEntry::size() * wordSize;
7596  Address::ScaleFactor times_vte_scale = Address::times_ptr;
7597  assert(vte_size == wordSize, "else adjust times_vte_scale");
7598
7599  movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize));
7600
7601  // %%% Could store the aligned, prescaled offset in the klassoop.
7602  lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
7603  if (HeapWordsPerLong > 1) {
7604    // Round up to align_object_offset boundary
7605    // see code for instanceKlass::start_of_itable!
7606    round_to(scan_temp, BytesPerLong);
7607  }
7608
7609  // Adjust recv_klass by scaled itable_index, so we can free itable_index.
7610  assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
7611  lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
7612
7613  // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
7614  //   if (scan->interface() == intf) {
7615  //     result = (klass + scan->offset() + itable_index);
7616  //   }
7617  // }
7618  Label search, found_method;
7619
7620  for (int peel = 1; peel >= 0; peel--) {
7621    movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
7622    cmpptr(intf_klass, method_result);
7623
7624    if (peel) {
7625      jccb(Assembler::equal, found_method);
7626    } else {
7627      jccb(Assembler::notEqual, search);
7628      // (invert the test to fall through to found_method...)
7629    }
7630
7631    if (!peel)  break;
7632
7633    bind(search);
7634
7635    // Check that the previous entry is non-null.  A null entry means that
7636    // the receiver class doesn't implement the interface, and wasn't the
7637    // same as when the caller was compiled.
7638    testptr(method_result, method_result);
7639    jcc(Assembler::zero, L_no_such_interface);
7640    addptr(scan_temp, scan_step);
7641  }
7642
7643  bind(found_method);
7644
7645  // Got a hit.
7646  movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
7647  movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
7648}
7649
7650
7651void MacroAssembler::check_klass_subtype(Register sub_klass,
7652                           Register super_klass,
7653                           Register temp_reg,
7654                           Label& L_success) {
7655  Label L_failure;
7656  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
7657  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
7658  bind(L_failure);
7659}
7660
7661
7662void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
7663                                                   Register super_klass,
7664                                                   Register temp_reg,
7665                                                   Label* L_success,
7666                                                   Label* L_failure,
7667                                                   Label* L_slow_path,
7668                                        RegisterOrConstant super_check_offset) {
7669  assert_different_registers(sub_klass, super_klass, temp_reg);
7670  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
7671  if (super_check_offset.is_register()) {
7672    assert_different_registers(sub_klass, super_klass,
7673                               super_check_offset.as_register());
7674  } else if (must_load_sco) {
7675    assert(temp_reg != noreg, "supply either a temp or a register offset");
7676  }
7677
7678  Label L_fallthrough;
7679  int label_nulls = 0;
7680  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
7681  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
7682  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
7683  assert(label_nulls <= 1, "at most one NULL in the batch");
7684
7685  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
7686                   Klass::secondary_super_cache_offset_in_bytes());
7687  int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
7688                    Klass::super_check_offset_offset_in_bytes());
7689  Address super_check_offset_addr(super_klass, sco_offset);
7690
7691  // Hacked jcc, which "knows" that L_fallthrough, at least, is in
7692  // range of a jccb.  If this routine grows larger, reconsider at
7693  // least some of these.
7694#define local_jcc(assembler_cond, label)                                \
7695  if (&(label) == &L_fallthrough)  jccb(assembler_cond, label);         \
7696  else                             jcc( assembler_cond, label) /*omit semi*/
7697
7698  // Hacked jmp, which may only be used just before L_fallthrough.
7699#define final_jmp(label)                                                \
7700  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
7701  else                            jmp(label)                /*omit semi*/
7702
7703  // If the pointers are equal, we are done (e.g., String[] elements).
7704  // This self-check enables sharing of secondary supertype arrays among
7705  // non-primary types such as array-of-interface.  Otherwise, each such
7706  // type would need its own customized SSA.
7707  // We move this check to the front of the fast path because many
7708  // type checks are in fact trivially successful in this manner,
7709  // so we get a nicely predicted branch right at the start of the check.
7710  cmpptr(sub_klass, super_klass);
7711  local_jcc(Assembler::equal, *L_success);
7712
7713  // Check the supertype display:
7714  if (must_load_sco) {
7715    // Positive movl does right thing on LP64.
7716    movl(temp_reg, super_check_offset_addr);
7717    super_check_offset = RegisterOrConstant(temp_reg);
7718  }
7719  Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
7720  cmpptr(super_klass, super_check_addr); // load displayed supertype
7721
7722  // This check has worked decisively for primary supers.
7723  // Secondary supers are sought in the super_cache ('super_cache_addr').
7724  // (Secondary supers are interfaces and very deeply nested subtypes.)
7725  // This works in the same check above because of a tricky aliasing
7726  // between the super_cache and the primary super display elements.
7727  // (The 'super_check_addr' can address either, as the case requires.)
7728  // Note that the cache is updated below if it does not help us find
7729  // what we need immediately.
7730  // So if it was a primary super, we can just fail immediately.
7731  // Otherwise, it's the slow path for us (no success at this point).
7732
7733  if (super_check_offset.is_register()) {
7734    local_jcc(Assembler::equal, *L_success);
7735    cmpl(super_check_offset.as_register(), sc_offset);
7736    if (L_failure == &L_fallthrough) {
7737      local_jcc(Assembler::equal, *L_slow_path);
7738    } else {
7739      local_jcc(Assembler::notEqual, *L_failure);
7740      final_jmp(*L_slow_path);
7741    }
7742  } else if (super_check_offset.as_constant() == sc_offset) {
7743    // Need a slow path; fast failure is impossible.
7744    if (L_slow_path == &L_fallthrough) {
7745      local_jcc(Assembler::equal, *L_success);
7746    } else {
7747      local_jcc(Assembler::notEqual, *L_slow_path);
7748      final_jmp(*L_success);
7749    }
7750  } else {
7751    // No slow path; it's a fast decision.
7752    if (L_failure == &L_fallthrough) {
7753      local_jcc(Assembler::equal, *L_success);
7754    } else {
7755      local_jcc(Assembler::notEqual, *L_failure);
7756      final_jmp(*L_success);
7757    }
7758  }
7759
7760  bind(L_fallthrough);
7761
7762#undef local_jcc
7763#undef final_jmp
7764}
7765
7766
7767void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
7768                                                   Register super_klass,
7769                                                   Register temp_reg,
7770                                                   Register temp2_reg,
7771                                                   Label* L_success,
7772                                                   Label* L_failure,
7773                                                   bool set_cond_codes) {
7774  assert_different_registers(sub_klass, super_klass, temp_reg);
7775  if (temp2_reg != noreg)
7776    assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
7777#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
7778
7779  Label L_fallthrough;
7780  int label_nulls = 0;
7781  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
7782  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
7783  assert(label_nulls <= 1, "at most one NULL in the batch");
7784
7785  // a couple of useful fields in sub_klass:
7786  int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
7787                   Klass::secondary_supers_offset_in_bytes());
7788  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
7789                   Klass::secondary_super_cache_offset_in_bytes());
7790  Address secondary_supers_addr(sub_klass, ss_offset);
7791  Address super_cache_addr(     sub_klass, sc_offset);
7792
7793  // Do a linear scan of the secondary super-klass chain.
7794  // This code is rarely used, so simplicity is a virtue here.
7795  // The repne_scan instruction uses fixed registers, which we must spill.
7796  // Don't worry too much about pre-existing connections with the input regs.
7797
7798  assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
7799  assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
7800
7801  // Get super_klass value into rax (even if it was in rdi or rcx).
7802  bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
7803  if (super_klass != rax || UseCompressedOops) {
7804    if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
7805    mov(rax, super_klass);
7806  }
7807  if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
7808  if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
7809
7810#ifndef PRODUCT
7811  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
7812  ExternalAddress pst_counter_addr((address) pst_counter);
7813  NOT_LP64(  incrementl(pst_counter_addr) );
7814  LP64_ONLY( lea(rcx, pst_counter_addr) );
7815  LP64_ONLY( incrementl(Address(rcx, 0)) );
7816#endif //PRODUCT
7817
7818  // We will consult the secondary-super array.
7819  movptr(rdi, secondary_supers_addr);
7820  // Load the array length.  (Positive movl does right thing on LP64.)
7821  movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
7822  // Skip to start of data.
7823  addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
7824
7825  // Scan RCX words at [RDI] for an occurrence of RAX.
7826  // Set NZ/Z based on last compare.
7827  // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
7828  // not change flags (only scas instruction which is repeated sets flags).
7829  // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
7830#ifdef _LP64
7831  // This part is tricky, as values in supers array could be 32 or 64 bit wide
7832  // and we store values in objArrays always encoded, thus we need to encode
7833  // the value of rax before repne.  Note that rax is dead after the repne.
7834  if (UseCompressedOops) {
7835    encode_heap_oop_not_null(rax); // Changes flags.
7836    // The superclass is never null; it would be a basic system error if a null
7837    // pointer were to sneak in here.  Note that we have already loaded the
7838    // Klass::super_check_offset from the super_klass in the fast path,
7839    // so if there is a null in that register, we are already in the afterlife.
7840    testl(rax,rax); // Set Z = 0
7841    repne_scanl();
7842  } else
7843#endif // _LP64
7844  {
7845    testptr(rax,rax); // Set Z = 0
7846    repne_scan();
7847  }
7848  // Unspill the temp. registers:
7849  if (pushed_rdi)  pop(rdi);
7850  if (pushed_rcx)  pop(rcx);
7851  if (pushed_rax)  pop(rax);
7852
7853  if (set_cond_codes) {
7854    // Special hack for the AD files:  rdi is guaranteed non-zero.
7855    assert(!pushed_rdi, "rdi must be left non-NULL");
7856    // Also, the condition codes are properly set Z/NZ on succeed/failure.
7857  }
7858
7859  if (L_failure == &L_fallthrough)
7860        jccb(Assembler::notEqual, *L_failure);
7861  else  jcc(Assembler::notEqual, *L_failure);
7862
7863  // Success.  Cache the super we found and proceed in triumph.
7864  movptr(super_cache_addr, super_klass);
7865
7866  if (L_success != &L_fallthrough) {
7867    jmp(*L_success);
7868  }
7869
7870#undef IS_A_TEMP
7871
7872  bind(L_fallthrough);
7873}
7874
7875
7876void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
7877  ucomisd(dst, as_Address(src));
7878}
7879
7880void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
7881  ucomiss(dst, as_Address(src));
7882}
7883
7884void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
7885  if (reachable(src)) {
7886    xorpd(dst, as_Address(src));
7887  } else {
7888    lea(rscratch1, src);
7889    xorpd(dst, Address(rscratch1, 0));
7890  }
7891}
7892
7893void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
7894  if (reachable(src)) {
7895    xorps(dst, as_Address(src));
7896  } else {
7897    lea(rscratch1, src);
7898    xorps(dst, Address(rscratch1, 0));
7899  }
7900}
7901
7902void MacroAssembler::cmov32(Condition cc, Register dst, Address src) {
7903  if (VM_Version::supports_cmov()) {
7904    cmovl(cc, dst, src);
7905  } else {
7906    Label L;
7907    jccb(negate_condition(cc), L);
7908    movl(dst, src);
7909    bind(L);
7910  }
7911}
7912
7913void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
7914  if (VM_Version::supports_cmov()) {
7915    cmovl(cc, dst, src);
7916  } else {
7917    Label L;
7918    jccb(negate_condition(cc), L);
7919    movl(dst, src);
7920    bind(L);
7921  }
7922}
7923
7924void MacroAssembler::verify_oop(Register reg, const char* s) {
7925  if (!VerifyOops) return;
7926
7927  // Pass register number to verify_oop_subroutine
7928  char* b = new char[strlen(s) + 50];
7929  sprintf(b, "verify_oop: %s: %s", reg->name(), s);
7930#ifdef _LP64
7931  push(rscratch1);                    // save r10, trashed by movptr()
7932#endif
7933  push(rax);                          // save rax,
7934  push(reg);                          // pass register argument
7935  ExternalAddress buffer((address) b);
7936  // avoid using pushptr, as it modifies scratch registers
7937  // and our contract is not to modify anything
7938  movptr(rax, buffer.addr());
7939  push(rax);
7940  // call indirectly to solve generation ordering problem
7941  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7942  call(rax);
7943  // Caller pops the arguments (oop, message) and restores rax, r10
7944}
7945
7946
7947RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
7948                                                      Register tmp,
7949                                                      int offset) {
7950  intptr_t value = *delayed_value_addr;
7951  if (value != 0)
7952    return RegisterOrConstant(value + offset);
7953
7954  // load indirectly to solve generation ordering problem
7955  movptr(tmp, ExternalAddress((address) delayed_value_addr));
7956
7957#ifdef ASSERT
7958  { Label L;
7959    testptr(tmp, tmp);
7960    if (WizardMode) {
7961      jcc(Assembler::notZero, L);
7962      char* buf = new char[40];
7963      sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
7964      stop(buf);
7965    } else {
7966      jccb(Assembler::notZero, L);
7967      hlt();
7968    }
7969    bind(L);
7970  }
7971#endif
7972
7973  if (offset != 0)
7974    addptr(tmp, offset);
7975
7976  return RegisterOrConstant(tmp);
7977}
7978
7979
7980// registers on entry:
7981//  - rax ('check' register): required MethodType
7982//  - rcx: method handle
7983//  - rdx, rsi, or ?: killable temp
7984void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
7985                                              Register temp_reg,
7986                                              Label& wrong_method_type) {
7987  Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg));
7988  // compare method type against that of the receiver
7989  if (UseCompressedOops) {
7990    load_heap_oop(temp_reg, type_addr);
7991    cmpptr(mtype_reg, temp_reg);
7992  } else {
7993    cmpptr(mtype_reg, type_addr);
7994  }
7995  jcc(Assembler::notEqual, wrong_method_type);
7996}
7997
7998
7999// A method handle has a "vmslots" field which gives the size of its
8000// argument list in JVM stack slots.  This field is either located directly
8001// in every method handle, or else is indirectly accessed through the
8002// method handle's MethodType.  This macro hides the distinction.
8003void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
8004                                                Register temp_reg) {
8005  assert_different_registers(vmslots_reg, mh_reg, temp_reg);
8006  // load mh.type.form.vmslots
8007  if (java_lang_invoke_MethodHandle::vmslots_offset_in_bytes() != 0) {
8008    // hoist vmslots into every mh to avoid dependent load chain
8009    movl(vmslots_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmslots_offset_in_bytes, temp_reg)));
8010  } else {
8011    Register temp2_reg = vmslots_reg;
8012    load_heap_oop(temp2_reg, Address(mh_reg,    delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)));
8013    load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg)));
8014    movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)));
8015  }
8016}
8017
8018
8019// registers on entry:
8020//  - rcx: method handle
8021//  - rdx: killable temp (interpreted only)
8022//  - rax: killable temp (compiled only)
8023void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) {
8024  assert(mh_reg == rcx, "caller must put MH object in rcx");
8025  assert_different_registers(mh_reg, temp_reg);
8026
8027  // pick out the interpreted side of the handler
8028  // NOTE: vmentry is not an oop!
8029  movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg)));
8030
8031  // off we go...
8032  jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes()));
8033
8034  // for the various stubs which take control at this point,
8035  // see MethodHandles::generate_method_handle_stub
8036}
8037
8038
8039Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
8040                                         int extra_slot_offset) {
8041  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
8042  int stackElementSize = Interpreter::stackElementSize;
8043  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
8044#ifdef ASSERT
8045  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
8046  assert(offset1 - offset == stackElementSize, "correct arithmetic");
8047#endif
8048  Register             scale_reg    = noreg;
8049  Address::ScaleFactor scale_factor = Address::no_scale;
8050  if (arg_slot.is_constant()) {
8051    offset += arg_slot.as_constant() * stackElementSize;
8052  } else {
8053    scale_reg    = arg_slot.as_register();
8054    scale_factor = Address::times(stackElementSize);
8055  }
8056  offset += wordSize;           // return PC is on stack
8057  return Address(rsp, scale_reg, scale_factor, offset);
8058}
8059
8060
8061void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
8062  if (!VerifyOops) return;
8063
8064  // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
8065  // Pass register number to verify_oop_subroutine
8066  char* b = new char[strlen(s) + 50];
8067  sprintf(b, "verify_oop_addr: %s", s);
8068
8069#ifdef _LP64
8070  push(rscratch1);                    // save r10, trashed by movptr()
8071#endif
8072  push(rax);                          // save rax,
8073  // addr may contain rsp so we will have to adjust it based on the push
8074  // we just did (and on 64 bit we do two pushes)
8075  // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
8076  // stores rax into addr which is backwards of what was intended.
8077  if (addr.uses(rsp)) {
8078    lea(rax, addr);
8079    pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord));
8080  } else {
8081    pushptr(addr);
8082  }
8083
8084  ExternalAddress buffer((address) b);
8085  // pass msg argument
8086  // avoid using pushptr, as it modifies scratch registers
8087  // and our contract is not to modify anything
8088  movptr(rax, buffer.addr());
8089  push(rax);
8090
8091  // call indirectly to solve generation ordering problem
8092  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
8093  call(rax);
8094  // Caller pops the arguments (addr, message) and restores rax, r10.
8095}
8096
8097void MacroAssembler::verify_tlab() {
8098#ifdef ASSERT
8099  if (UseTLAB && VerifyOops) {
8100    Label next, ok;
8101    Register t1 = rsi;
8102    Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
8103
8104    push(t1);
8105    NOT_LP64(push(thread_reg));
8106    NOT_LP64(get_thread(thread_reg));
8107
8108    movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
8109    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
8110    jcc(Assembler::aboveEqual, next);
8111    stop("assert(top >= start)");
8112    should_not_reach_here();
8113
8114    bind(next);
8115    movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
8116    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
8117    jcc(Assembler::aboveEqual, ok);
8118    stop("assert(top <= end)");
8119    should_not_reach_here();
8120
8121    bind(ok);
8122    NOT_LP64(pop(thread_reg));
8123    pop(t1);
8124  }
8125#endif
8126}
8127
8128class ControlWord {
8129 public:
8130  int32_t _value;
8131
8132  int  rounding_control() const        { return  (_value >> 10) & 3      ; }
8133  int  precision_control() const       { return  (_value >>  8) & 3      ; }
8134  bool precision() const               { return ((_value >>  5) & 1) != 0; }
8135  bool underflow() const               { return ((_value >>  4) & 1) != 0; }
8136  bool overflow() const                { return ((_value >>  3) & 1) != 0; }
8137  bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
8138  bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
8139  bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
8140
8141  void print() const {
8142    // rounding control
8143    const char* rc;
8144    switch (rounding_control()) {
8145      case 0: rc = "round near"; break;
8146      case 1: rc = "round down"; break;
8147      case 2: rc = "round up  "; break;
8148      case 3: rc = "chop      "; break;
8149    };
8150    // precision control
8151    const char* pc;
8152    switch (precision_control()) {
8153      case 0: pc = "24 bits "; break;
8154      case 1: pc = "reserved"; break;
8155      case 2: pc = "53 bits "; break;
8156      case 3: pc = "64 bits "; break;
8157    };
8158    // flags
8159    char f[9];
8160    f[0] = ' ';
8161    f[1] = ' ';
8162    f[2] = (precision   ()) ? 'P' : 'p';
8163    f[3] = (underflow   ()) ? 'U' : 'u';
8164    f[4] = (overflow    ()) ? 'O' : 'o';
8165    f[5] = (zero_divide ()) ? 'Z' : 'z';
8166    f[6] = (denormalized()) ? 'D' : 'd';
8167    f[7] = (invalid     ()) ? 'I' : 'i';
8168    f[8] = '\x0';
8169    // output
8170    printf("%04x  masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
8171  }
8172
8173};
8174
8175class StatusWord {
8176 public:
8177  int32_t _value;
8178
8179  bool busy() const                    { return ((_value >> 15) & 1) != 0; }
8180  bool C3() const                      { return ((_value >> 14) & 1) != 0; }
8181  bool C2() const                      { return ((_value >> 10) & 1) != 0; }
8182  bool C1() const                      { return ((_value >>  9) & 1) != 0; }
8183  bool C0() const                      { return ((_value >>  8) & 1) != 0; }
8184  int  top() const                     { return  (_value >> 11) & 7      ; }
8185  bool error_status() const            { return ((_value >>  7) & 1) != 0; }
8186  bool stack_fault() const             { return ((_value >>  6) & 1) != 0; }
8187  bool precision() const               { return ((_value >>  5) & 1) != 0; }
8188  bool underflow() const               { return ((_value >>  4) & 1) != 0; }
8189  bool overflow() const                { return ((_value >>  3) & 1) != 0; }
8190  bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
8191  bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
8192  bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
8193
8194  void print() const {
8195    // condition codes
8196    char c[5];
8197    c[0] = (C3()) ? '3' : '-';
8198    c[1] = (C2()) ? '2' : '-';
8199    c[2] = (C1()) ? '1' : '-';
8200    c[3] = (C0()) ? '0' : '-';
8201    c[4] = '\x0';
8202    // flags
8203    char f[9];
8204    f[0] = (error_status()) ? 'E' : '-';
8205    f[1] = (stack_fault ()) ? 'S' : '-';
8206    f[2] = (precision   ()) ? 'P' : '-';
8207    f[3] = (underflow   ()) ? 'U' : '-';
8208    f[4] = (overflow    ()) ? 'O' : '-';
8209    f[5] = (zero_divide ()) ? 'Z' : '-';
8210    f[6] = (denormalized()) ? 'D' : '-';
8211    f[7] = (invalid     ()) ? 'I' : '-';
8212    f[8] = '\x0';
8213    // output
8214    printf("%04x  flags = %s, cc =  %s, top = %d", _value & 0xFFFF, f, c, top());
8215  }
8216
8217};
8218
8219class TagWord {
8220 public:
8221  int32_t _value;
8222
8223  int tag_at(int i) const              { return (_value >> (i*2)) & 3; }
8224
8225  void print() const {
8226    printf("%04x", _value & 0xFFFF);
8227  }
8228
8229};
8230
8231class FPU_Register {
8232 public:
8233  int32_t _m0;
8234  int32_t _m1;
8235  int16_t _ex;
8236
8237  bool is_indefinite() const           {
8238    return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
8239  }
8240
8241  void print() const {
8242    char  sign = (_ex < 0) ? '-' : '+';
8243    const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : "   ";
8244    printf("%c%04hx.%08x%08x  %s", sign, _ex, _m1, _m0, kind);
8245  };
8246
8247};
8248
8249class FPU_State {
8250 public:
8251  enum {
8252    register_size       = 10,
8253    number_of_registers =  8,
8254    register_mask       =  7
8255  };
8256
8257  ControlWord  _control_word;
8258  StatusWord   _status_word;
8259  TagWord      _tag_word;
8260  int32_t      _error_offset;
8261  int32_t      _error_selector;
8262  int32_t      _data_offset;
8263  int32_t      _data_selector;
8264  int8_t       _register[register_size * number_of_registers];
8265
8266  int tag_for_st(int i) const          { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
8267  FPU_Register* st(int i) const        { return (FPU_Register*)&_register[register_size * i]; }
8268
8269  const char* tag_as_string(int tag) const {
8270    switch (tag) {
8271      case 0: return "valid";
8272      case 1: return "zero";
8273      case 2: return "special";
8274      case 3: return "empty";
8275    }
8276    ShouldNotReachHere();
8277    return NULL;
8278  }
8279
8280  void print() const {
8281    // print computation registers
8282    { int t = _status_word.top();
8283      for (int i = 0; i < number_of_registers; i++) {
8284        int j = (i - t) & register_mask;
8285        printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
8286        st(j)->print();
8287        printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
8288      }
8289    }
8290    printf("\n");
8291    // print control registers
8292    printf("ctrl = "); _control_word.print(); printf("\n");
8293    printf("stat = "); _status_word .print(); printf("\n");
8294    printf("tags = "); _tag_word    .print(); printf("\n");
8295  }
8296
8297};
8298
8299class Flag_Register {
8300 public:
8301  int32_t _value;
8302
8303  bool overflow() const                { return ((_value >> 11) & 1) != 0; }
8304  bool direction() const               { return ((_value >> 10) & 1) != 0; }
8305  bool sign() const                    { return ((_value >>  7) & 1) != 0; }
8306  bool zero() const                    { return ((_value >>  6) & 1) != 0; }
8307  bool auxiliary_carry() const         { return ((_value >>  4) & 1) != 0; }
8308  bool parity() const                  { return ((_value >>  2) & 1) != 0; }
8309  bool carry() const                   { return ((_value >>  0) & 1) != 0; }
8310
8311  void print() const {
8312    // flags
8313    char f[8];
8314    f[0] = (overflow       ()) ? 'O' : '-';
8315    f[1] = (direction      ()) ? 'D' : '-';
8316    f[2] = (sign           ()) ? 'S' : '-';
8317    f[3] = (zero           ()) ? 'Z' : '-';
8318    f[4] = (auxiliary_carry()) ? 'A' : '-';
8319    f[5] = (parity         ()) ? 'P' : '-';
8320    f[6] = (carry          ()) ? 'C' : '-';
8321    f[7] = '\x0';
8322    // output
8323    printf("%08x  flags = %s", _value, f);
8324  }
8325
8326};
8327
8328class IU_Register {
8329 public:
8330  int32_t _value;
8331
8332  void print() const {
8333    printf("%08x  %11d", _value, _value);
8334  }
8335
8336};
8337
8338class IU_State {
8339 public:
8340  Flag_Register _eflags;
8341  IU_Register   _rdi;
8342  IU_Register   _rsi;
8343  IU_Register   _rbp;
8344  IU_Register   _rsp;
8345  IU_Register   _rbx;
8346  IU_Register   _rdx;
8347  IU_Register   _rcx;
8348  IU_Register   _rax;
8349
8350  void print() const {
8351    // computation registers
8352    printf("rax,  = "); _rax.print(); printf("\n");
8353    printf("rbx,  = "); _rbx.print(); printf("\n");
8354    printf("rcx  = "); _rcx.print(); printf("\n");
8355    printf("rdx  = "); _rdx.print(); printf("\n");
8356    printf("rdi  = "); _rdi.print(); printf("\n");
8357    printf("rsi  = "); _rsi.print(); printf("\n");
8358    printf("rbp,  = "); _rbp.print(); printf("\n");
8359    printf("rsp  = "); _rsp.print(); printf("\n");
8360    printf("\n");
8361    // control registers
8362    printf("flgs = "); _eflags.print(); printf("\n");
8363  }
8364};
8365
8366
8367class CPU_State {
8368 public:
8369  FPU_State _fpu_state;
8370  IU_State  _iu_state;
8371
8372  void print() const {
8373    printf("--------------------------------------------------\n");
8374    _iu_state .print();
8375    printf("\n");
8376    _fpu_state.print();
8377    printf("--------------------------------------------------\n");
8378  }
8379
8380};
8381
8382
8383static void _print_CPU_state(CPU_State* state) {
8384  state->print();
8385};
8386
8387
8388void MacroAssembler::print_CPU_state() {
8389  push_CPU_state();
8390  push(rsp);                // pass CPU state
8391  call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
8392  addptr(rsp, wordSize);       // discard argument
8393  pop_CPU_state();
8394}
8395
8396
8397static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
8398  static int counter = 0;
8399  FPU_State* fs = &state->_fpu_state;
8400  counter++;
8401  // For leaf calls, only verify that the top few elements remain empty.
8402  // We only need 1 empty at the top for C2 code.
8403  if( stack_depth < 0 ) {
8404    if( fs->tag_for_st(7) != 3 ) {
8405      printf("FPR7 not empty\n");
8406      state->print();
8407      assert(false, "error");
8408      return false;
8409    }
8410    return true;                // All other stack states do not matter
8411  }
8412
8413  assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
8414         "bad FPU control word");
8415
8416  // compute stack depth
8417  int i = 0;
8418  while (i < FPU_State::number_of_registers && fs->tag_for_st(i)  < 3) i++;
8419  int d = i;
8420  while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
8421  // verify findings
8422  if (i != FPU_State::number_of_registers) {
8423    // stack not contiguous
8424    printf("%s: stack not contiguous at ST%d\n", s, i);
8425    state->print();
8426    assert(false, "error");
8427    return false;
8428  }
8429  // check if computed stack depth corresponds to expected stack depth
8430  if (stack_depth < 0) {
8431    // expected stack depth is -stack_depth or less
8432    if (d > -stack_depth) {
8433      // too many elements on the stack
8434      printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
8435      state->print();
8436      assert(false, "error");
8437      return false;
8438    }
8439  } else {
8440    // expected stack depth is stack_depth
8441    if (d != stack_depth) {
8442      // wrong stack depth
8443      printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
8444      state->print();
8445      assert(false, "error");
8446      return false;
8447    }
8448  }
8449  // everything is cool
8450  return true;
8451}
8452
8453
8454void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
8455  if (!VerifyFPU) return;
8456  push_CPU_state();
8457  push(rsp);                // pass CPU state
8458  ExternalAddress msg((address) s);
8459  // pass message string s
8460  pushptr(msg.addr());
8461  push(stack_depth);        // pass stack depth
8462  call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
8463  addptr(rsp, 3 * wordSize);   // discard arguments
8464  // check for error
8465  { Label L;
8466    testl(rax, rax);
8467    jcc(Assembler::notZero, L);
8468    int3();                  // break if error condition
8469    bind(L);
8470  }
8471  pop_CPU_state();
8472}
8473
8474void MacroAssembler::load_klass(Register dst, Register src) {
8475#ifdef _LP64
8476  if (UseCompressedOops) {
8477    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8478    decode_heap_oop_not_null(dst);
8479  } else
8480#endif
8481    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8482}
8483
8484void MacroAssembler::load_prototype_header(Register dst, Register src) {
8485#ifdef _LP64
8486  if (UseCompressedOops) {
8487    assert (Universe::heap() != NULL, "java heap should be initialized");
8488    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8489    if (Universe::narrow_oop_shift() != 0) {
8490      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8491      if (LogMinObjAlignmentInBytes == Address::times_8) {
8492        movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8493      } else {
8494        // OK to use shift since we don't need to preserve flags.
8495        shlq(dst, LogMinObjAlignmentInBytes);
8496        movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8497      }
8498    } else {
8499      movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8500    }
8501  } else
8502#endif
8503  {
8504    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8505    movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8506  }
8507}
8508
8509void MacroAssembler::store_klass(Register dst, Register src) {
8510#ifdef _LP64
8511  if (UseCompressedOops) {
8512    encode_heap_oop_not_null(src);
8513    movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
8514  } else
8515#endif
8516    movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
8517}
8518
8519void MacroAssembler::load_heap_oop(Register dst, Address src) {
8520#ifdef _LP64
8521  if (UseCompressedOops) {
8522    movl(dst, src);
8523    decode_heap_oop(dst);
8524  } else
8525#endif
8526    movptr(dst, src);
8527}
8528
8529// Doesn't do verfication, generates fixed size code
8530void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) {
8531#ifdef _LP64
8532  if (UseCompressedOops) {
8533    movl(dst, src);
8534    decode_heap_oop_not_null(dst);
8535  } else
8536#endif
8537    movptr(dst, src);
8538}
8539
8540void MacroAssembler::store_heap_oop(Address dst, Register src) {
8541#ifdef _LP64
8542  if (UseCompressedOops) {
8543    assert(!dst.uses(src), "not enough registers");
8544    encode_heap_oop(src);
8545    movl(dst, src);
8546  } else
8547#endif
8548    movptr(dst, src);
8549}
8550
8551// Used for storing NULLs.
8552void MacroAssembler::store_heap_oop_null(Address dst) {
8553#ifdef _LP64
8554  if (UseCompressedOops) {
8555    movl(dst, (int32_t)NULL_WORD);
8556  } else {
8557    movslq(dst, (int32_t)NULL_WORD);
8558  }
8559#else
8560  movl(dst, (int32_t)NULL_WORD);
8561#endif
8562}
8563
8564#ifdef _LP64
8565void MacroAssembler::store_klass_gap(Register dst, Register src) {
8566  if (UseCompressedOops) {
8567    // Store to klass gap in destination
8568    movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
8569  }
8570}
8571
8572#ifdef ASSERT
8573void MacroAssembler::verify_heapbase(const char* msg) {
8574  assert (UseCompressedOops, "should be compressed");
8575  assert (Universe::heap() != NULL, "java heap should be initialized");
8576  if (CheckCompressedOops) {
8577    Label ok;
8578    push(rscratch1); // cmpptr trashes rscratch1
8579    cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
8580    jcc(Assembler::equal, ok);
8581    stop(msg);
8582    bind(ok);
8583    pop(rscratch1);
8584  }
8585}
8586#endif
8587
8588// Algorithm must match oop.inline.hpp encode_heap_oop.
8589void MacroAssembler::encode_heap_oop(Register r) {
8590#ifdef ASSERT
8591  verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
8592#endif
8593  verify_oop(r, "broken oop in encode_heap_oop");
8594  if (Universe::narrow_oop_base() == NULL) {
8595    if (Universe::narrow_oop_shift() != 0) {
8596      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8597      shrq(r, LogMinObjAlignmentInBytes);
8598    }
8599    return;
8600  }
8601  testq(r, r);
8602  cmovq(Assembler::equal, r, r12_heapbase);
8603  subq(r, r12_heapbase);
8604  shrq(r, LogMinObjAlignmentInBytes);
8605}
8606
8607void MacroAssembler::encode_heap_oop_not_null(Register r) {
8608#ifdef ASSERT
8609  verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?");
8610  if (CheckCompressedOops) {
8611    Label ok;
8612    testq(r, r);
8613    jcc(Assembler::notEqual, ok);
8614    stop("null oop passed to encode_heap_oop_not_null");
8615    bind(ok);
8616  }
8617#endif
8618  verify_oop(r, "broken oop in encode_heap_oop_not_null");
8619  if (Universe::narrow_oop_base() != NULL) {
8620    subq(r, r12_heapbase);
8621  }
8622  if (Universe::narrow_oop_shift() != 0) {
8623    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8624    shrq(r, LogMinObjAlignmentInBytes);
8625  }
8626}
8627
8628void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
8629#ifdef ASSERT
8630  verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?");
8631  if (CheckCompressedOops) {
8632    Label ok;
8633    testq(src, src);
8634    jcc(Assembler::notEqual, ok);
8635    stop("null oop passed to encode_heap_oop_not_null2");
8636    bind(ok);
8637  }
8638#endif
8639  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
8640  if (dst != src) {
8641    movq(dst, src);
8642  }
8643  if (Universe::narrow_oop_base() != NULL) {
8644    subq(dst, r12_heapbase);
8645  }
8646  if (Universe::narrow_oop_shift() != 0) {
8647    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8648    shrq(dst, LogMinObjAlignmentInBytes);
8649  }
8650}
8651
8652void  MacroAssembler::decode_heap_oop(Register r) {
8653#ifdef ASSERT
8654  verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
8655#endif
8656  if (Universe::narrow_oop_base() == NULL) {
8657    if (Universe::narrow_oop_shift() != 0) {
8658      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8659      shlq(r, LogMinObjAlignmentInBytes);
8660    }
8661  } else {
8662    Label done;
8663    shlq(r, LogMinObjAlignmentInBytes);
8664    jccb(Assembler::equal, done);
8665    addq(r, r12_heapbase);
8666    bind(done);
8667  }
8668  verify_oop(r, "broken oop in decode_heap_oop");
8669}
8670
8671void  MacroAssembler::decode_heap_oop_not_null(Register r) {
8672  // Note: it will change flags
8673  assert (UseCompressedOops, "should only be used for compressed headers");
8674  assert (Universe::heap() != NULL, "java heap should be initialized");
8675  // Cannot assert, unverified entry point counts instructions (see .ad file)
8676  // vtableStubs also counts instructions in pd_code_size_limit.
8677  // Also do not verify_oop as this is called by verify_oop.
8678  if (Universe::narrow_oop_shift() != 0) {
8679    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8680    shlq(r, LogMinObjAlignmentInBytes);
8681    if (Universe::narrow_oop_base() != NULL) {
8682      addq(r, r12_heapbase);
8683    }
8684  } else {
8685    assert (Universe::narrow_oop_base() == NULL, "sanity");
8686  }
8687}
8688
8689void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
8690  // Note: it will change flags
8691  assert (UseCompressedOops, "should only be used for compressed headers");
8692  assert (Universe::heap() != NULL, "java heap should be initialized");
8693  // Cannot assert, unverified entry point counts instructions (see .ad file)
8694  // vtableStubs also counts instructions in pd_code_size_limit.
8695  // Also do not verify_oop as this is called by verify_oop.
8696  if (Universe::narrow_oop_shift() != 0) {
8697    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8698    if (LogMinObjAlignmentInBytes == Address::times_8) {
8699      leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
8700    } else {
8701      if (dst != src) {
8702        movq(dst, src);
8703      }
8704      shlq(dst, LogMinObjAlignmentInBytes);
8705      if (Universe::narrow_oop_base() != NULL) {
8706        addq(dst, r12_heapbase);
8707      }
8708    }
8709  } else {
8710    assert (Universe::narrow_oop_base() == NULL, "sanity");
8711    if (dst != src) {
8712      movq(dst, src);
8713    }
8714  }
8715}
8716
8717void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
8718  assert (UseCompressedOops, "should only be used for compressed headers");
8719  assert (Universe::heap() != NULL, "java heap should be initialized");
8720  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8721  int oop_index = oop_recorder()->find_index(obj);
8722  RelocationHolder rspec = oop_Relocation::spec(oop_index);
8723  mov_narrow_oop(dst, oop_index, rspec);
8724}
8725
8726void  MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
8727  assert (UseCompressedOops, "should only be used for compressed headers");
8728  assert (Universe::heap() != NULL, "java heap should be initialized");
8729  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8730  int oop_index = oop_recorder()->find_index(obj);
8731  RelocationHolder rspec = oop_Relocation::spec(oop_index);
8732  mov_narrow_oop(dst, oop_index, rspec);
8733}
8734
8735void  MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
8736  assert (UseCompressedOops, "should only be used for compressed headers");
8737  assert (Universe::heap() != NULL, "java heap should be initialized");
8738  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8739  int oop_index = oop_recorder()->find_index(obj);
8740  RelocationHolder rspec = oop_Relocation::spec(oop_index);
8741  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
8742}
8743
8744void  MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
8745  assert (UseCompressedOops, "should only be used for compressed headers");
8746  assert (Universe::heap() != NULL, "java heap should be initialized");
8747  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8748  int oop_index = oop_recorder()->find_index(obj);
8749  RelocationHolder rspec = oop_Relocation::spec(oop_index);
8750  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
8751}
8752
8753void MacroAssembler::reinit_heapbase() {
8754  if (UseCompressedOops) {
8755    movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
8756  }
8757}
8758#endif // _LP64
8759
8760// IndexOf for constant substrings with size >= 8 chars
8761// which don't need to be loaded through stack.
8762void MacroAssembler::string_indexofC8(Register str1, Register str2,
8763                                      Register cnt1, Register cnt2,
8764                                      int int_cnt2,  Register result,
8765                                      XMMRegister vec, Register tmp) {
8766  assert(UseSSE42Intrinsics, "SSE4.2 is required");
8767
8768  // This method uses pcmpestri inxtruction with bound registers
8769  //   inputs:
8770  //     xmm - substring
8771  //     rax - substring length (elements count)
8772  //     mem - scanned string
8773  //     rdx - string length (elements count)
8774  //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
8775  //   outputs:
8776  //     rcx - matched index in string
8777  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
8778
8779  Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
8780        RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
8781        MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
8782
8783  // Note, inline_string_indexOf() generates checks:
8784  // if (substr.count > string.count) return -1;
8785  // if (substr.count == 0) return 0;
8786  assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
8787
8788  // Load substring.
8789  movdqu(vec, Address(str2, 0));
8790  movl(cnt2, int_cnt2);
8791  movptr(result, str1); // string addr
8792
8793  if (int_cnt2 > 8) {
8794    jmpb(SCAN_TO_SUBSTR);
8795
8796    // Reload substr for rescan, this code
8797    // is executed only for large substrings (> 8 chars)
8798    bind(RELOAD_SUBSTR);
8799    movdqu(vec, Address(str2, 0));
8800    negptr(cnt2); // Jumped here with negative cnt2, convert to positive
8801
8802    bind(RELOAD_STR);
8803    // We came here after the beginning of the substring was
8804    // matched but the rest of it was not so we need to search
8805    // again. Start from the next element after the previous match.
8806
8807    // cnt2 is number of substring reminding elements and
8808    // cnt1 is number of string reminding elements when cmp failed.
8809    // Restored cnt1 = cnt1 - cnt2 + int_cnt2
8810    subl(cnt1, cnt2);
8811    addl(cnt1, int_cnt2);
8812    movl(cnt2, int_cnt2); // Now restore cnt2
8813
8814    decrementl(cnt1);     // Shift to next element
8815    cmpl(cnt1, cnt2);
8816    jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
8817
8818    addptr(result, 2);
8819
8820  } // (int_cnt2 > 8)
8821
8822  // Scan string for start of substr in 16-byte vectors
8823  bind(SCAN_TO_SUBSTR);
8824  pcmpestri(vec, Address(result, 0), 0x0d);
8825  jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
8826  subl(cnt1, 8);
8827  jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
8828  cmpl(cnt1, cnt2);
8829  jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
8830  addptr(result, 16);
8831  jmpb(SCAN_TO_SUBSTR);
8832
8833  // Found a potential substr
8834  bind(FOUND_CANDIDATE);
8835  // Matched whole vector if first element matched (tmp(rcx) == 0).
8836  if (int_cnt2 == 8) {
8837    jccb(Assembler::overflow, RET_FOUND);    // OF == 1
8838  } else { // int_cnt2 > 8
8839    jccb(Assembler::overflow, FOUND_SUBSTR);
8840  }
8841  // After pcmpestri tmp(rcx) contains matched element index
8842  // Compute start addr of substr
8843  lea(result, Address(result, tmp, Address::times_2));
8844
8845  // Make sure string is still long enough
8846  subl(cnt1, tmp);
8847  cmpl(cnt1, cnt2);
8848  if (int_cnt2 == 8) {
8849    jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
8850  } else { // int_cnt2 > 8
8851    jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
8852  }
8853  // Left less then substring.
8854
8855  bind(RET_NOT_FOUND);
8856  movl(result, -1);
8857  jmpb(EXIT);
8858
8859  if (int_cnt2 > 8) {
8860    // This code is optimized for the case when whole substring
8861    // is matched if its head is matched.
8862    bind(MATCH_SUBSTR_HEAD);
8863    pcmpestri(vec, Address(result, 0), 0x0d);
8864    // Reload only string if does not match
8865    jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
8866
8867    Label CONT_SCAN_SUBSTR;
8868    // Compare the rest of substring (> 8 chars).
8869    bind(FOUND_SUBSTR);
8870    // First 8 chars are already matched.
8871    negptr(cnt2);
8872    addptr(cnt2, 8);
8873
8874    bind(SCAN_SUBSTR);
8875    subl(cnt1, 8);
8876    cmpl(cnt2, -8); // Do not read beyond substring
8877    jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
8878    // Back-up strings to avoid reading beyond substring:
8879    // cnt1 = cnt1 - cnt2 + 8
8880    addl(cnt1, cnt2); // cnt2 is negative
8881    addl(cnt1, 8);
8882    movl(cnt2, 8); negptr(cnt2);
8883    bind(CONT_SCAN_SUBSTR);
8884    if (int_cnt2 < (int)G) {
8885      movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
8886      pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
8887    } else {
8888      // calculate index in register to avoid integer overflow (int_cnt2*2)
8889      movl(tmp, int_cnt2);
8890      addptr(tmp, cnt2);
8891      movdqu(vec, Address(str2, tmp, Address::times_2, 0));
8892      pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
8893    }
8894    // Need to reload strings pointers if not matched whole vector
8895    jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
8896    addptr(cnt2, 8);
8897    jccb(Assembler::negative, SCAN_SUBSTR);
8898    // Fall through if found full substring
8899
8900  } // (int_cnt2 > 8)
8901
8902  bind(RET_FOUND);
8903  // Found result if we matched full small substring.
8904  // Compute substr offset
8905  subptr(result, str1);
8906  shrl(result, 1); // index
8907  bind(EXIT);
8908
8909} // string_indexofC8
8910
8911// Small strings are loaded through stack if they cross page boundary.
8912void MacroAssembler::string_indexof(Register str1, Register str2,
8913                                    Register cnt1, Register cnt2,
8914                                    int int_cnt2,  Register result,
8915                                    XMMRegister vec, Register tmp) {
8916  assert(UseSSE42Intrinsics, "SSE4.2 is required");
8917  //
8918  // int_cnt2 is length of small (< 8 chars) constant substring
8919  // or (-1) for non constant substring in which case its length
8920  // is in cnt2 register.
8921  //
8922  // Note, inline_string_indexOf() generates checks:
8923  // if (substr.count > string.count) return -1;
8924  // if (substr.count == 0) return 0;
8925  //
8926  assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
8927
8928  // This method uses pcmpestri inxtruction with bound registers
8929  //   inputs:
8930  //     xmm - substring
8931  //     rax - substring length (elements count)
8932  //     mem - scanned string
8933  //     rdx - string length (elements count)
8934  //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
8935  //   outputs:
8936  //     rcx - matched index in string
8937  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
8938
8939  Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
8940        RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
8941        FOUND_CANDIDATE;
8942
8943  { //========================================================
8944    // We don't know where these strings are located
8945    // and we can't read beyond them. Load them through stack.
8946    Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
8947
8948    movptr(tmp, rsp); // save old SP
8949
8950    if (int_cnt2 > 0) {     // small (< 8 chars) constant substring
8951      if (int_cnt2 == 1) {  // One char
8952        load_unsigned_short(result, Address(str2, 0));
8953        movdl(vec, result); // move 32 bits
8954      } else if (int_cnt2 == 2) { // Two chars
8955        movdl(vec, Address(str2, 0)); // move 32 bits
8956      } else if (int_cnt2 == 4) { // Four chars
8957        movq(vec, Address(str2, 0));  // move 64 bits
8958      } else { // cnt2 = { 3, 5, 6, 7 }
8959        // Array header size is 12 bytes in 32-bit VM
8960        // + 6 bytes for 3 chars == 18 bytes,
8961        // enough space to load vec and shift.
8962        assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity");
8963        movdqu(vec, Address(str2, (int_cnt2*2)-16));
8964        psrldq(vec, 16-(int_cnt2*2));
8965      }
8966    } else { // not constant substring
8967      cmpl(cnt2, 8);
8968      jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
8969
8970      // We can read beyond string if srt+16 does not cross page boundary
8971      // since heaps are aligned and mapped by pages.
8972      assert(os::vm_page_size() < (int)G, "default page should be small");
8973      movl(result, str2); // We need only low 32 bits
8974      andl(result, (os::vm_page_size()-1));
8975      cmpl(result, (os::vm_page_size()-16));
8976      jccb(Assembler::belowEqual, CHECK_STR);
8977
8978      // Move small strings to stack to allow load 16 bytes into vec.
8979      subptr(rsp, 16);
8980      int stk_offset = wordSize-2;
8981      push(cnt2);
8982
8983      bind(COPY_SUBSTR);
8984      load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
8985      movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
8986      decrement(cnt2);
8987      jccb(Assembler::notZero, COPY_SUBSTR);
8988
8989      pop(cnt2);
8990      movptr(str2, rsp);  // New substring address
8991    } // non constant
8992
8993    bind(CHECK_STR);
8994    cmpl(cnt1, 8);
8995    jccb(Assembler::aboveEqual, BIG_STRINGS);
8996
8997    // Check cross page boundary.
8998    movl(result, str1); // We need only low 32 bits
8999    andl(result, (os::vm_page_size()-1));
9000    cmpl(result, (os::vm_page_size()-16));
9001    jccb(Assembler::belowEqual, BIG_STRINGS);
9002
9003    subptr(rsp, 16);
9004    int stk_offset = -2;
9005    if (int_cnt2 < 0) { // not constant
9006      push(cnt2);
9007      stk_offset += wordSize;
9008    }
9009    movl(cnt2, cnt1);
9010
9011    bind(COPY_STR);
9012    load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
9013    movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
9014    decrement(cnt2);
9015    jccb(Assembler::notZero, COPY_STR);
9016
9017    if (int_cnt2 < 0) { // not constant
9018      pop(cnt2);
9019    }
9020    movptr(str1, rsp);  // New string address
9021
9022    bind(BIG_STRINGS);
9023    // Load substring.
9024    if (int_cnt2 < 0) { // -1
9025      movdqu(vec, Address(str2, 0));
9026      push(cnt2);       // substr count
9027      push(str2);       // substr addr
9028      push(str1);       // string addr
9029    } else {
9030      // Small (< 8 chars) constant substrings are loaded already.
9031      movl(cnt2, int_cnt2);
9032    }
9033    push(tmp);  // original SP
9034
9035  } // Finished loading
9036
9037  //========================================================
9038  // Start search
9039  //
9040
9041  movptr(result, str1); // string addr
9042
9043  if (int_cnt2  < 0) {  // Only for non constant substring
9044    jmpb(SCAN_TO_SUBSTR);
9045
9046    // SP saved at sp+0
9047    // String saved at sp+1*wordSize
9048    // Substr saved at sp+2*wordSize
9049    // Substr count saved at sp+3*wordSize
9050
9051    // Reload substr for rescan, this code
9052    // is executed only for large substrings (> 8 chars)
9053    bind(RELOAD_SUBSTR);
9054    movptr(str2, Address(rsp, 2*wordSize));
9055    movl(cnt2, Address(rsp, 3*wordSize));
9056    movdqu(vec, Address(str2, 0));
9057    // We came here after the beginning of the substring was
9058    // matched but the rest of it was not so we need to search
9059    // again. Start from the next element after the previous match.
9060    subptr(str1, result); // Restore counter
9061    shrl(str1, 1);
9062    addl(cnt1, str1);
9063    decrementl(cnt1);   // Shift to next element
9064    cmpl(cnt1, cnt2);
9065    jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
9066
9067    addptr(result, 2);
9068  } // non constant
9069
9070  // Scan string for start of substr in 16-byte vectors
9071  bind(SCAN_TO_SUBSTR);
9072  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
9073  pcmpestri(vec, Address(result, 0), 0x0d);
9074  jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
9075  subl(cnt1, 8);
9076  jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
9077  cmpl(cnt1, cnt2);
9078  jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
9079  addptr(result, 16);
9080
9081  bind(ADJUST_STR);
9082  cmpl(cnt1, 8); // Do not read beyond string
9083  jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
9084  // Back-up string to avoid reading beyond string.
9085  lea(result, Address(result, cnt1, Address::times_2, -16));
9086  movl(cnt1, 8);
9087  jmpb(SCAN_TO_SUBSTR);
9088
9089  // Found a potential substr
9090  bind(FOUND_CANDIDATE);
9091  // After pcmpestri tmp(rcx) contains matched element index
9092
9093  // Make sure string is still long enough
9094  subl(cnt1, tmp);
9095  cmpl(cnt1, cnt2);
9096  jccb(Assembler::greaterEqual, FOUND_SUBSTR);
9097  // Left less then substring.
9098
9099  bind(RET_NOT_FOUND);
9100  movl(result, -1);
9101  jmpb(CLEANUP);
9102
9103  bind(FOUND_SUBSTR);
9104  // Compute start addr of substr
9105  lea(result, Address(result, tmp, Address::times_2));
9106
9107  if (int_cnt2 > 0) { // Constant substring
9108    // Repeat search for small substring (< 8 chars)
9109    // from new point without reloading substring.
9110    // Have to check that we don't read beyond string.
9111    cmpl(tmp, 8-int_cnt2);
9112    jccb(Assembler::greater, ADJUST_STR);
9113    // Fall through if matched whole substring.
9114  } else { // non constant
9115    assert(int_cnt2 == -1, "should be != 0");
9116
9117    addl(tmp, cnt2);
9118    // Found result if we matched whole substring.
9119    cmpl(tmp, 8);
9120    jccb(Assembler::lessEqual, RET_FOUND);
9121
9122    // Repeat search for small substring (<= 8 chars)
9123    // from new point 'str1' without reloading substring.
9124    cmpl(cnt2, 8);
9125    // Have to check that we don't read beyond string.
9126    jccb(Assembler::lessEqual, ADJUST_STR);
9127
9128    Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
9129    // Compare the rest of substring (> 8 chars).
9130    movptr(str1, result);
9131
9132    cmpl(tmp, cnt2);
9133    // First 8 chars are already matched.
9134    jccb(Assembler::equal, CHECK_NEXT);
9135
9136    bind(SCAN_SUBSTR);
9137    pcmpestri(vec, Address(str1, 0), 0x0d);
9138    // Need to reload strings pointers if not matched whole vector
9139    jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
9140
9141    bind(CHECK_NEXT);
9142    subl(cnt2, 8);
9143    jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
9144    addptr(str1, 16);
9145    addptr(str2, 16);
9146    subl(cnt1, 8);
9147    cmpl(cnt2, 8); // Do not read beyond substring
9148    jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
9149    // Back-up strings to avoid reading beyond substring.
9150    lea(str2, Address(str2, cnt2, Address::times_2, -16));
9151    lea(str1, Address(str1, cnt2, Address::times_2, -16));
9152    subl(cnt1, cnt2);
9153    movl(cnt2, 8);
9154    addl(cnt1, 8);
9155    bind(CONT_SCAN_SUBSTR);
9156    movdqu(vec, Address(str2, 0));
9157    jmpb(SCAN_SUBSTR);
9158
9159    bind(RET_FOUND_LONG);
9160    movptr(str1, Address(rsp, wordSize));
9161  } // non constant
9162
9163  bind(RET_FOUND);
9164  // Compute substr offset
9165  subptr(result, str1);
9166  shrl(result, 1); // index
9167
9168  bind(CLEANUP);
9169  pop(rsp); // restore SP
9170
9171} // string_indexof
9172
9173// Compare strings.
9174void MacroAssembler::string_compare(Register str1, Register str2,
9175                                    Register cnt1, Register cnt2, Register result,
9176                                    XMMRegister vec1) {
9177  Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
9178
9179  // Compute the minimum of the string lengths and the
9180  // difference of the string lengths (stack).
9181  // Do the conditional move stuff
9182  movl(result, cnt1);
9183  subl(cnt1, cnt2);
9184  push(cnt1);
9185  cmov32(Assembler::lessEqual, cnt2, result);
9186
9187  // Is the minimum length zero?
9188  testl(cnt2, cnt2);
9189  jcc(Assembler::zero, LENGTH_DIFF_LABEL);
9190
9191  // Load first characters
9192  load_unsigned_short(result, Address(str1, 0));
9193  load_unsigned_short(cnt1, Address(str2, 0));
9194
9195  // Compare first characters
9196  subl(result, cnt1);
9197  jcc(Assembler::notZero,  POP_LABEL);
9198  decrementl(cnt2);
9199  jcc(Assembler::zero, LENGTH_DIFF_LABEL);
9200
9201  {
9202    // Check after comparing first character to see if strings are equivalent
9203    Label LSkip2;
9204    // Check if the strings start at same location
9205    cmpptr(str1, str2);
9206    jccb(Assembler::notEqual, LSkip2);
9207
9208    // Check if the length difference is zero (from stack)
9209    cmpl(Address(rsp, 0), 0x0);
9210    jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
9211
9212    // Strings might not be equivalent
9213    bind(LSkip2);
9214  }
9215
9216  Address::ScaleFactor scale = Address::times_2;
9217  int stride = 8;
9218
9219  // Advance to next element
9220  addptr(str1, 16/stride);
9221  addptr(str2, 16/stride);
9222
9223  if (UseSSE42Intrinsics) {
9224    Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
9225    int pcmpmask = 0x19;
9226    // Setup to compare 16-byte vectors
9227    movl(result, cnt2);
9228    andl(cnt2, ~(stride - 1));   // cnt2 holds the vector count
9229    jccb(Assembler::zero, COMPARE_TAIL);
9230
9231    lea(str1, Address(str1, result, scale));
9232    lea(str2, Address(str2, result, scale));
9233    negptr(result);
9234
9235    // pcmpestri
9236    //   inputs:
9237    //     vec1- substring
9238    //     rax - negative string length (elements count)
9239    //     mem - scaned string
9240    //     rdx - string length (elements count)
9241    //     pcmpmask - cmp mode: 11000 (string compare with negated result)
9242    //               + 00 (unsigned bytes) or  + 01 (unsigned shorts)
9243    //   outputs:
9244    //     rcx - first mismatched element index
9245    assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
9246
9247    bind(COMPARE_WIDE_VECTORS);
9248    movdqu(vec1, Address(str1, result, scale));
9249    pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
9250    // After pcmpestri cnt1(rcx) contains mismatched element index
9251
9252    jccb(Assembler::below, VECTOR_NOT_EQUAL);  // CF==1
9253    addptr(result, stride);
9254    subptr(cnt2, stride);
9255    jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
9256
9257    // compare wide vectors tail
9258    testl(result, result);
9259    jccb(Assembler::zero, LENGTH_DIFF_LABEL);
9260
9261    movl(cnt2, stride);
9262    movl(result, stride);
9263    negptr(result);
9264    movdqu(vec1, Address(str1, result, scale));
9265    pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
9266    jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
9267
9268    // Mismatched characters in the vectors
9269    bind(VECTOR_NOT_EQUAL);
9270    addptr(result, cnt1);
9271    movptr(cnt2, result);
9272    load_unsigned_short(result, Address(str1, cnt2, scale));
9273    load_unsigned_short(cnt1, Address(str2, cnt2, scale));
9274    subl(result, cnt1);
9275    jmpb(POP_LABEL);
9276
9277    bind(COMPARE_TAIL); // limit is zero
9278    movl(cnt2, result);
9279    // Fallthru to tail compare
9280  }
9281
9282  // Shift str2 and str1 to the end of the arrays, negate min
9283  lea(str1, Address(str1, cnt2, scale, 0));
9284  lea(str2, Address(str2, cnt2, scale, 0));
9285  negptr(cnt2);
9286
9287  // Compare the rest of the elements
9288  bind(WHILE_HEAD_LABEL);
9289  load_unsigned_short(result, Address(str1, cnt2, scale, 0));
9290  load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));
9291  subl(result, cnt1);
9292  jccb(Assembler::notZero, POP_LABEL);
9293  increment(cnt2);
9294  jccb(Assembler::notZero, WHILE_HEAD_LABEL);
9295
9296  // Strings are equal up to min length.  Return the length difference.
9297  bind(LENGTH_DIFF_LABEL);
9298  pop(result);
9299  jmpb(DONE_LABEL);
9300
9301  // Discard the stored length difference
9302  bind(POP_LABEL);
9303  pop(cnt1);
9304
9305  // That's it
9306  bind(DONE_LABEL);
9307}
9308
9309// Compare char[] arrays aligned to 4 bytes or substrings.
9310void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
9311                                        Register limit, Register result, Register chr,
9312                                        XMMRegister vec1, XMMRegister vec2) {
9313  Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
9314
9315  int length_offset  = arrayOopDesc::length_offset_in_bytes();
9316  int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
9317
9318  // Check the input args
9319  cmpptr(ary1, ary2);
9320  jcc(Assembler::equal, TRUE_LABEL);
9321
9322  if (is_array_equ) {
9323    // Need additional checks for arrays_equals.
9324    testptr(ary1, ary1);
9325    jcc(Assembler::zero, FALSE_LABEL);
9326    testptr(ary2, ary2);
9327    jcc(Assembler::zero, FALSE_LABEL);
9328
9329    // Check the lengths
9330    movl(limit, Address(ary1, length_offset));
9331    cmpl(limit, Address(ary2, length_offset));
9332    jcc(Assembler::notEqual, FALSE_LABEL);
9333  }
9334
9335  // count == 0
9336  testl(limit, limit);
9337  jcc(Assembler::zero, TRUE_LABEL);
9338
9339  if (is_array_equ) {
9340    // Load array address
9341    lea(ary1, Address(ary1, base_offset));
9342    lea(ary2, Address(ary2, base_offset));
9343  }
9344
9345  shll(limit, 1);      // byte count != 0
9346  movl(result, limit); // copy
9347
9348  if (UseSSE42Intrinsics) {
9349    // With SSE4.2, use double quad vector compare
9350    Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
9351
9352    // Compare 16-byte vectors
9353    andl(result, 0x0000000e);  //   tail count (in bytes)
9354    andl(limit, 0xfffffff0);   // vector count (in bytes)
9355    jccb(Assembler::zero, COMPARE_TAIL);
9356
9357    lea(ary1, Address(ary1, limit, Address::times_1));
9358    lea(ary2, Address(ary2, limit, Address::times_1));
9359    negptr(limit);
9360
9361    bind(COMPARE_WIDE_VECTORS);
9362    movdqu(vec1, Address(ary1, limit, Address::times_1));
9363    movdqu(vec2, Address(ary2, limit, Address::times_1));
9364    pxor(vec1, vec2);
9365
9366    ptest(vec1, vec1);
9367    jccb(Assembler::notZero, FALSE_LABEL);
9368    addptr(limit, 16);
9369    jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
9370
9371    testl(result, result);
9372    jccb(Assembler::zero, TRUE_LABEL);
9373
9374    movdqu(vec1, Address(ary1, result, Address::times_1, -16));
9375    movdqu(vec2, Address(ary2, result, Address::times_1, -16));
9376    pxor(vec1, vec2);
9377
9378    ptest(vec1, vec1);
9379    jccb(Assembler::notZero, FALSE_LABEL);
9380    jmpb(TRUE_LABEL);
9381
9382    bind(COMPARE_TAIL); // limit is zero
9383    movl(limit, result);
9384    // Fallthru to tail compare
9385  }
9386
9387  // Compare 4-byte vectors
9388  andl(limit, 0xfffffffc); // vector count (in bytes)
9389  jccb(Assembler::zero, COMPARE_CHAR);
9390
9391  lea(ary1, Address(ary1, limit, Address::times_1));
9392  lea(ary2, Address(ary2, limit, Address::times_1));
9393  negptr(limit);
9394
9395  bind(COMPARE_VECTORS);
9396  movl(chr, Address(ary1, limit, Address::times_1));
9397  cmpl(chr, Address(ary2, limit, Address::times_1));
9398  jccb(Assembler::notEqual, FALSE_LABEL);
9399  addptr(limit, 4);
9400  jcc(Assembler::notZero, COMPARE_VECTORS);
9401
9402  // Compare trailing char (final 2 bytes), if any
9403  bind(COMPARE_CHAR);
9404  testl(result, 0x2);   // tail  char
9405  jccb(Assembler::zero, TRUE_LABEL);
9406  load_unsigned_short(chr, Address(ary1, 0));
9407  load_unsigned_short(limit, Address(ary2, 0));
9408  cmpl(chr, limit);
9409  jccb(Assembler::notEqual, FALSE_LABEL);
9410
9411  bind(TRUE_LABEL);
9412  movl(result, 1);   // return true
9413  jmpb(DONE);
9414
9415  bind(FALSE_LABEL);
9416  xorl(result, result); // return false
9417
9418  // That's it
9419  bind(DONE);
9420}
9421
9422#ifdef PRODUCT
9423#define BLOCK_COMMENT(str) /* nothing */
9424#else
9425#define BLOCK_COMMENT(str) block_comment(str)
9426#endif
9427
9428#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
9429void MacroAssembler::generate_fill(BasicType t, bool aligned,
9430                                   Register to, Register value, Register count,
9431                                   Register rtmp, XMMRegister xtmp) {
9432  assert_different_registers(to, value, count, rtmp);
9433  Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
9434  Label L_fill_2_bytes, L_fill_4_bytes;
9435
9436  int shift = -1;
9437  switch (t) {
9438    case T_BYTE:
9439      shift = 2;
9440      break;
9441    case T_SHORT:
9442      shift = 1;
9443      break;
9444    case T_INT:
9445      shift = 0;
9446      break;
9447    default: ShouldNotReachHere();
9448  }
9449
9450  if (t == T_BYTE) {
9451    andl(value, 0xff);
9452    movl(rtmp, value);
9453    shll(rtmp, 8);
9454    orl(value, rtmp);
9455  }
9456  if (t == T_SHORT) {
9457    andl(value, 0xffff);
9458  }
9459  if (t == T_BYTE || t == T_SHORT) {
9460    movl(rtmp, value);
9461    shll(rtmp, 16);
9462    orl(value, rtmp);
9463  }
9464
9465  cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
9466  jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
9467  if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
9468    // align source address at 4 bytes address boundary
9469    if (t == T_BYTE) {
9470      // One byte misalignment happens only for byte arrays
9471      testptr(to, 1);
9472      jccb(Assembler::zero, L_skip_align1);
9473      movb(Address(to, 0), value);
9474      increment(to);
9475      decrement(count);
9476      BIND(L_skip_align1);
9477    }
9478    // Two bytes misalignment happens only for byte and short (char) arrays
9479    testptr(to, 2);
9480    jccb(Assembler::zero, L_skip_align2);
9481    movw(Address(to, 0), value);
9482    addptr(to, 2);
9483    subl(count, 1<<(shift-1));
9484    BIND(L_skip_align2);
9485  }
9486  if (UseSSE < 2) {
9487    Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
9488    // Fill 32-byte chunks
9489    subl(count, 8 << shift);
9490    jcc(Assembler::less, L_check_fill_8_bytes);
9491    align(16);
9492
9493    BIND(L_fill_32_bytes_loop);
9494
9495    for (int i = 0; i < 32; i += 4) {
9496      movl(Address(to, i), value);
9497    }
9498
9499    addptr(to, 32);
9500    subl(count, 8 << shift);
9501    jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
9502    BIND(L_check_fill_8_bytes);
9503    addl(count, 8 << shift);
9504    jccb(Assembler::zero, L_exit);
9505    jmpb(L_fill_8_bytes);
9506
9507    //
9508    // length is too short, just fill qwords
9509    //
9510    BIND(L_fill_8_bytes_loop);
9511    movl(Address(to, 0), value);
9512    movl(Address(to, 4), value);
9513    addptr(to, 8);
9514    BIND(L_fill_8_bytes);
9515    subl(count, 1 << (shift + 1));
9516    jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
9517    // fall through to fill 4 bytes
9518  } else {
9519    Label L_fill_32_bytes;
9520    if (!UseUnalignedLoadStores) {
9521      // align to 8 bytes, we know we are 4 byte aligned to start
9522      testptr(to, 4);
9523      jccb(Assembler::zero, L_fill_32_bytes);
9524      movl(Address(to, 0), value);
9525      addptr(to, 4);
9526      subl(count, 1<<shift);
9527    }
9528    BIND(L_fill_32_bytes);
9529    {
9530      assert( UseSSE >= 2, "supported cpu only" );
9531      Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
9532      // Fill 32-byte chunks
9533      movdl(xtmp, value);
9534      pshufd(xtmp, xtmp, 0);
9535
9536      subl(count, 8 << shift);
9537      jcc(Assembler::less, L_check_fill_8_bytes);
9538      align(16);
9539
9540      BIND(L_fill_32_bytes_loop);
9541
9542      if (UseUnalignedLoadStores) {
9543        movdqu(Address(to, 0), xtmp);
9544        movdqu(Address(to, 16), xtmp);
9545      } else {
9546        movq(Address(to, 0), xtmp);
9547        movq(Address(to, 8), xtmp);
9548        movq(Address(to, 16), xtmp);
9549        movq(Address(to, 24), xtmp);
9550      }
9551
9552      addptr(to, 32);
9553      subl(count, 8 << shift);
9554      jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
9555      BIND(L_check_fill_8_bytes);
9556      addl(count, 8 << shift);
9557      jccb(Assembler::zero, L_exit);
9558      jmpb(L_fill_8_bytes);
9559
9560      //
9561      // length is too short, just fill qwords
9562      //
9563      BIND(L_fill_8_bytes_loop);
9564      movq(Address(to, 0), xtmp);
9565      addptr(to, 8);
9566      BIND(L_fill_8_bytes);
9567      subl(count, 1 << (shift + 1));
9568      jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
9569    }
9570  }
9571  // fill trailing 4 bytes
9572  BIND(L_fill_4_bytes);
9573  testl(count, 1<<shift);
9574  jccb(Assembler::zero, L_fill_2_bytes);
9575  movl(Address(to, 0), value);
9576  if (t == T_BYTE || t == T_SHORT) {
9577    addptr(to, 4);
9578    BIND(L_fill_2_bytes);
9579    // fill trailing 2 bytes
9580    testl(count, 1<<(shift-1));
9581    jccb(Assembler::zero, L_fill_byte);
9582    movw(Address(to, 0), value);
9583    if (t == T_BYTE) {
9584      addptr(to, 2);
9585      BIND(L_fill_byte);
9586      // fill trailing byte
9587      testl(count, 1);
9588      jccb(Assembler::zero, L_exit);
9589      movb(Address(to, 0), value);
9590    } else {
9591      BIND(L_fill_byte);
9592    }
9593  } else {
9594    BIND(L_fill_2_bytes);
9595  }
9596  BIND(L_exit);
9597}
9598#undef BIND
9599#undef BLOCK_COMMENT
9600
9601
9602Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
9603  switch (cond) {
9604    // Note some conditions are synonyms for others
9605    case Assembler::zero:         return Assembler::notZero;
9606    case Assembler::notZero:      return Assembler::zero;
9607    case Assembler::less:         return Assembler::greaterEqual;
9608    case Assembler::lessEqual:    return Assembler::greater;
9609    case Assembler::greater:      return Assembler::lessEqual;
9610    case Assembler::greaterEqual: return Assembler::less;
9611    case Assembler::below:        return Assembler::aboveEqual;
9612    case Assembler::belowEqual:   return Assembler::above;
9613    case Assembler::above:        return Assembler::belowEqual;
9614    case Assembler::aboveEqual:   return Assembler::below;
9615    case Assembler::overflow:     return Assembler::noOverflow;
9616    case Assembler::noOverflow:   return Assembler::overflow;
9617    case Assembler::negative:     return Assembler::positive;
9618    case Assembler::positive:     return Assembler::negative;
9619    case Assembler::parity:       return Assembler::noParity;
9620    case Assembler::noParity:     return Assembler::parity;
9621  }
9622  ShouldNotReachHere(); return Assembler::overflow;
9623}
9624
9625SkipIfEqual::SkipIfEqual(
9626    MacroAssembler* masm, const bool* flag_addr, bool value) {
9627  _masm = masm;
9628  _masm->cmp8(ExternalAddress((address)flag_addr), value);
9629  _masm->jcc(Assembler::equal, _label);
9630}
9631
9632SkipIfEqual::~SkipIfEqual() {
9633  _masm->bind(_label);
9634}
9635