assembler_x86.cpp revision 6412:53a41e7cbe05
1/*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/assembler.hpp"
27#include "asm/assembler.inline.hpp"
28#include "gc_interface/collectedHeap.inline.hpp"
29#include "interpreter/interpreter.hpp"
30#include "memory/cardTableModRefBS.hpp"
31#include "memory/resourceArea.hpp"
32#include "prims/methodHandles.hpp"
33#include "runtime/biasedLocking.hpp"
34#include "runtime/interfaceSupport.hpp"
35#include "runtime/objectMonitor.hpp"
36#include "runtime/os.hpp"
37#include "runtime/sharedRuntime.hpp"
38#include "runtime/stubRoutines.hpp"
39#include "utilities/macros.hpp"
40#if INCLUDE_ALL_GCS
41#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
42#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
43#include "gc_implementation/g1/heapRegion.hpp"
44#endif // INCLUDE_ALL_GCS
45
46#ifdef PRODUCT
47#define BLOCK_COMMENT(str) /* nothing */
48#define STOP(error) stop(error)
49#else
50#define BLOCK_COMMENT(str) block_comment(str)
51#define STOP(error) block_comment(error); stop(error)
52#endif
53
54#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
55// Implementation of AddressLiteral
56
57AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
58  _is_lval = false;
59  _target = target;
60  switch (rtype) {
61  case relocInfo::oop_type:
62  case relocInfo::metadata_type:
63    // Oops are a special case. Normally they would be their own section
64    // but in cases like icBuffer they are literals in the code stream that
65    // we don't have a section for. We use none so that we get a literal address
66    // which is always patchable.
67    break;
68  case relocInfo::external_word_type:
69    _rspec = external_word_Relocation::spec(target);
70    break;
71  case relocInfo::internal_word_type:
72    _rspec = internal_word_Relocation::spec(target);
73    break;
74  case relocInfo::opt_virtual_call_type:
75    _rspec = opt_virtual_call_Relocation::spec();
76    break;
77  case relocInfo::static_call_type:
78    _rspec = static_call_Relocation::spec();
79    break;
80  case relocInfo::runtime_call_type:
81    _rspec = runtime_call_Relocation::spec();
82    break;
83  case relocInfo::poll_type:
84  case relocInfo::poll_return_type:
85    _rspec = Relocation::spec_simple(rtype);
86    break;
87  case relocInfo::none:
88    break;
89  default:
90    ShouldNotReachHere();
91    break;
92  }
93}
94
95// Implementation of Address
96
97#ifdef _LP64
98
99Address Address::make_array(ArrayAddress adr) {
100  // Not implementable on 64bit machines
101  // Should have been handled higher up the call chain.
102  ShouldNotReachHere();
103  return Address();
104}
105
106// exceedingly dangerous constructor
107Address::Address(int disp, address loc, relocInfo::relocType rtype) {
108  _base  = noreg;
109  _index = noreg;
110  _scale = no_scale;
111  _disp  = disp;
112  switch (rtype) {
113    case relocInfo::external_word_type:
114      _rspec = external_word_Relocation::spec(loc);
115      break;
116    case relocInfo::internal_word_type:
117      _rspec = internal_word_Relocation::spec(loc);
118      break;
119    case relocInfo::runtime_call_type:
120      // HMM
121      _rspec = runtime_call_Relocation::spec();
122      break;
123    case relocInfo::poll_type:
124    case relocInfo::poll_return_type:
125      _rspec = Relocation::spec_simple(rtype);
126      break;
127    case relocInfo::none:
128      break;
129    default:
130      ShouldNotReachHere();
131  }
132}
133#else // LP64
134
135Address Address::make_array(ArrayAddress adr) {
136  AddressLiteral base = adr.base();
137  Address index = adr.index();
138  assert(index._disp == 0, "must not have disp"); // maybe it can?
139  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
140  array._rspec = base._rspec;
141  return array;
142}
143
144// exceedingly dangerous constructor
145Address::Address(address loc, RelocationHolder spec) {
146  _base  = noreg;
147  _index = noreg;
148  _scale = no_scale;
149  _disp  = (intptr_t) loc;
150  _rspec = spec;
151}
152
153#endif // _LP64
154
155
156
157// Convert the raw encoding form into the form expected by the constructor for
158// Address.  An index of 4 (rsp) corresponds to having no index, so convert
159// that to noreg for the Address constructor.
160Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
161  RelocationHolder rspec;
162  if (disp_reloc != relocInfo::none) {
163    rspec = Relocation::spec_simple(disp_reloc);
164  }
165  bool valid_index = index != rsp->encoding();
166  if (valid_index) {
167    Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
168    madr._rspec = rspec;
169    return madr;
170  } else {
171    Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
172    madr._rspec = rspec;
173    return madr;
174  }
175}
176
177// Implementation of Assembler
178
179int AbstractAssembler::code_fill_byte() {
180  return (u_char)'\xF4'; // hlt
181}
182
183// make this go away someday
184void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
185  if (rtype == relocInfo::none)
186        emit_int32(data);
187  else  emit_data(data, Relocation::spec_simple(rtype), format);
188}
189
190void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
191  assert(imm_operand == 0, "default format must be immediate in this file");
192  assert(inst_mark() != NULL, "must be inside InstructionMark");
193  if (rspec.type() !=  relocInfo::none) {
194    #ifdef ASSERT
195      check_relocation(rspec, format);
196    #endif
197    // Do not use AbstractAssembler::relocate, which is not intended for
198    // embedded words.  Instead, relocate to the enclosing instruction.
199
200    // hack. call32 is too wide for mask so use disp32
201    if (format == call32_operand)
202      code_section()->relocate(inst_mark(), rspec, disp32_operand);
203    else
204      code_section()->relocate(inst_mark(), rspec, format);
205  }
206  emit_int32(data);
207}
208
209static int encode(Register r) {
210  int enc = r->encoding();
211  if (enc >= 8) {
212    enc -= 8;
213  }
214  return enc;
215}
216
217void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
218  assert(dst->has_byte_register(), "must have byte register");
219  assert(isByte(op1) && isByte(op2), "wrong opcode");
220  assert(isByte(imm8), "not a byte");
221  assert((op1 & 0x01) == 0, "should be 8bit operation");
222  emit_int8(op1);
223  emit_int8(op2 | encode(dst));
224  emit_int8(imm8);
225}
226
227
228void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
229  assert(isByte(op1) && isByte(op2), "wrong opcode");
230  assert((op1 & 0x01) == 1, "should be 32bit operation");
231  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
232  if (is8bit(imm32)) {
233    emit_int8(op1 | 0x02); // set sign bit
234    emit_int8(op2 | encode(dst));
235    emit_int8(imm32 & 0xFF);
236  } else {
237    emit_int8(op1);
238    emit_int8(op2 | encode(dst));
239    emit_int32(imm32);
240  }
241}
242
243// Force generation of a 4 byte immediate value even if it fits into 8bit
244void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
245  assert(isByte(op1) && isByte(op2), "wrong opcode");
246  assert((op1 & 0x01) == 1, "should be 32bit operation");
247  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
248  emit_int8(op1);
249  emit_int8(op2 | encode(dst));
250  emit_int32(imm32);
251}
252
253// immediate-to-memory forms
254void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
255  assert((op1 & 0x01) == 1, "should be 32bit operation");
256  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
257  if (is8bit(imm32)) {
258    emit_int8(op1 | 0x02); // set sign bit
259    emit_operand(rm, adr, 1);
260    emit_int8(imm32 & 0xFF);
261  } else {
262    emit_int8(op1);
263    emit_operand(rm, adr, 4);
264    emit_int32(imm32);
265  }
266}
267
268
269void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
270  assert(isByte(op1) && isByte(op2), "wrong opcode");
271  emit_int8(op1);
272  emit_int8(op2 | encode(dst) << 3 | encode(src));
273}
274
275
276void Assembler::emit_operand(Register reg, Register base, Register index,
277                             Address::ScaleFactor scale, int disp,
278                             RelocationHolder const& rspec,
279                             int rip_relative_correction) {
280  relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
281
282  // Encode the registers as needed in the fields they are used in
283
284  int regenc = encode(reg) << 3;
285  int indexenc = index->is_valid() ? encode(index) << 3 : 0;
286  int baseenc = base->is_valid() ? encode(base) : 0;
287
288  if (base->is_valid()) {
289    if (index->is_valid()) {
290      assert(scale != Address::no_scale, "inconsistent address");
291      // [base + index*scale + disp]
292      if (disp == 0 && rtype == relocInfo::none  &&
293          base != rbp LP64_ONLY(&& base != r13)) {
294        // [base + index*scale]
295        // [00 reg 100][ss index base]
296        assert(index != rsp, "illegal addressing mode");
297        emit_int8(0x04 | regenc);
298        emit_int8(scale << 6 | indexenc | baseenc);
299      } else if (is8bit(disp) && rtype == relocInfo::none) {
300        // [base + index*scale + imm8]
301        // [01 reg 100][ss index base] imm8
302        assert(index != rsp, "illegal addressing mode");
303        emit_int8(0x44 | regenc);
304        emit_int8(scale << 6 | indexenc | baseenc);
305        emit_int8(disp & 0xFF);
306      } else {
307        // [base + index*scale + disp32]
308        // [10 reg 100][ss index base] disp32
309        assert(index != rsp, "illegal addressing mode");
310        emit_int8(0x84 | regenc);
311        emit_int8(scale << 6 | indexenc | baseenc);
312        emit_data(disp, rspec, disp32_operand);
313      }
314    } else if (base == rsp LP64_ONLY(|| base == r12)) {
315      // [rsp + disp]
316      if (disp == 0 && rtype == relocInfo::none) {
317        // [rsp]
318        // [00 reg 100][00 100 100]
319        emit_int8(0x04 | regenc);
320        emit_int8(0x24);
321      } else if (is8bit(disp) && rtype == relocInfo::none) {
322        // [rsp + imm8]
323        // [01 reg 100][00 100 100] disp8
324        emit_int8(0x44 | regenc);
325        emit_int8(0x24);
326        emit_int8(disp & 0xFF);
327      } else {
328        // [rsp + imm32]
329        // [10 reg 100][00 100 100] disp32
330        emit_int8(0x84 | regenc);
331        emit_int8(0x24);
332        emit_data(disp, rspec, disp32_operand);
333      }
334    } else {
335      // [base + disp]
336      assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
337      if (disp == 0 && rtype == relocInfo::none &&
338          base != rbp LP64_ONLY(&& base != r13)) {
339        // [base]
340        // [00 reg base]
341        emit_int8(0x00 | regenc | baseenc);
342      } else if (is8bit(disp) && rtype == relocInfo::none) {
343        // [base + disp8]
344        // [01 reg base] disp8
345        emit_int8(0x40 | regenc | baseenc);
346        emit_int8(disp & 0xFF);
347      } else {
348        // [base + disp32]
349        // [10 reg base] disp32
350        emit_int8(0x80 | regenc | baseenc);
351        emit_data(disp, rspec, disp32_operand);
352      }
353    }
354  } else {
355    if (index->is_valid()) {
356      assert(scale != Address::no_scale, "inconsistent address");
357      // [index*scale + disp]
358      // [00 reg 100][ss index 101] disp32
359      assert(index != rsp, "illegal addressing mode");
360      emit_int8(0x04 | regenc);
361      emit_int8(scale << 6 | indexenc | 0x05);
362      emit_data(disp, rspec, disp32_operand);
363    } else if (rtype != relocInfo::none ) {
364      // [disp] (64bit) RIP-RELATIVE (32bit) abs
365      // [00 000 101] disp32
366
367      emit_int8(0x05 | regenc);
368      // Note that the RIP-rel. correction applies to the generated
369      // disp field, but _not_ to the target address in the rspec.
370
371      // disp was created by converting the target address minus the pc
372      // at the start of the instruction. That needs more correction here.
373      // intptr_t disp = target - next_ip;
374      assert(inst_mark() != NULL, "must be inside InstructionMark");
375      address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
376      int64_t adjusted = disp;
377      // Do rip-rel adjustment for 64bit
378      LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
379      assert(is_simm32(adjusted),
380             "must be 32bit offset (RIP relative address)");
381      emit_data((int32_t) adjusted, rspec, disp32_operand);
382
383    } else {
384      // 32bit never did this, did everything as the rip-rel/disp code above
385      // [disp] ABSOLUTE
386      // [00 reg 100][00 100 101] disp32
387      emit_int8(0x04 | regenc);
388      emit_int8(0x25);
389      emit_data(disp, rspec, disp32_operand);
390    }
391  }
392}
393
394void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
395                             Address::ScaleFactor scale, int disp,
396                             RelocationHolder const& rspec) {
397  emit_operand((Register)reg, base, index, scale, disp, rspec);
398}
399
400// Secret local extension to Assembler::WhichOperand:
401#define end_pc_operand (_WhichOperand_limit)
402
403address Assembler::locate_operand(address inst, WhichOperand which) {
404  // Decode the given instruction, and return the address of
405  // an embedded 32-bit operand word.
406
407  // If "which" is disp32_operand, selects the displacement portion
408  // of an effective address specifier.
409  // If "which" is imm64_operand, selects the trailing immediate constant.
410  // If "which" is call32_operand, selects the displacement of a call or jump.
411  // Caller is responsible for ensuring that there is such an operand,
412  // and that it is 32/64 bits wide.
413
414  // If "which" is end_pc_operand, find the end of the instruction.
415
416  address ip = inst;
417  bool is_64bit = false;
418
419  debug_only(bool has_disp32 = false);
420  int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
421
422  again_after_prefix:
423  switch (0xFF & *ip++) {
424
425  // These convenience macros generate groups of "case" labels for the switch.
426#define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
427#define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
428             case (x)+4: case (x)+5: case (x)+6: case (x)+7
429#define REP16(x) REP8((x)+0): \
430              case REP8((x)+8)
431
432  case CS_segment:
433  case SS_segment:
434  case DS_segment:
435  case ES_segment:
436  case FS_segment:
437  case GS_segment:
438    // Seems dubious
439    LP64_ONLY(assert(false, "shouldn't have that prefix"));
440    assert(ip == inst+1, "only one prefix allowed");
441    goto again_after_prefix;
442
443  case 0x67:
444  case REX:
445  case REX_B:
446  case REX_X:
447  case REX_XB:
448  case REX_R:
449  case REX_RB:
450  case REX_RX:
451  case REX_RXB:
452    NOT_LP64(assert(false, "64bit prefixes"));
453    goto again_after_prefix;
454
455  case REX_W:
456  case REX_WB:
457  case REX_WX:
458  case REX_WXB:
459  case REX_WR:
460  case REX_WRB:
461  case REX_WRX:
462  case REX_WRXB:
463    NOT_LP64(assert(false, "64bit prefixes"));
464    is_64bit = true;
465    goto again_after_prefix;
466
467  case 0xFF: // pushq a; decl a; incl a; call a; jmp a
468  case 0x88: // movb a, r
469  case 0x89: // movl a, r
470  case 0x8A: // movb r, a
471  case 0x8B: // movl r, a
472  case 0x8F: // popl a
473    debug_only(has_disp32 = true);
474    break;
475
476  case 0x68: // pushq #32
477    if (which == end_pc_operand) {
478      return ip + 4;
479    }
480    assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
481    return ip;                  // not produced by emit_operand
482
483  case 0x66: // movw ... (size prefix)
484    again_after_size_prefix2:
485    switch (0xFF & *ip++) {
486    case REX:
487    case REX_B:
488    case REX_X:
489    case REX_XB:
490    case REX_R:
491    case REX_RB:
492    case REX_RX:
493    case REX_RXB:
494    case REX_W:
495    case REX_WB:
496    case REX_WX:
497    case REX_WXB:
498    case REX_WR:
499    case REX_WRB:
500    case REX_WRX:
501    case REX_WRXB:
502      NOT_LP64(assert(false, "64bit prefix found"));
503      goto again_after_size_prefix2;
504    case 0x8B: // movw r, a
505    case 0x89: // movw a, r
506      debug_only(has_disp32 = true);
507      break;
508    case 0xC7: // movw a, #16
509      debug_only(has_disp32 = true);
510      tail_size = 2;  // the imm16
511      break;
512    case 0x0F: // several SSE/SSE2 variants
513      ip--;    // reparse the 0x0F
514      goto again_after_prefix;
515    default:
516      ShouldNotReachHere();
517    }
518    break;
519
520  case REP8(0xB8): // movl/q r, #32/#64(oop?)
521    if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
522    // these asserts are somewhat nonsensical
523#ifndef _LP64
524    assert(which == imm_operand || which == disp32_operand,
525           err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)));
526#else
527    assert((which == call32_operand || which == imm_operand) && is_64bit ||
528           which == narrow_oop_operand && !is_64bit,
529           err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)));
530#endif // _LP64
531    return ip;
532
533  case 0x69: // imul r, a, #32
534  case 0xC7: // movl a, #32(oop?)
535    tail_size = 4;
536    debug_only(has_disp32 = true); // has both kinds of operands!
537    break;
538
539  case 0x0F: // movx..., etc.
540    switch (0xFF & *ip++) {
541    case 0x3A: // pcmpestri
542      tail_size = 1;
543    case 0x38: // ptest, pmovzxbw
544      ip++; // skip opcode
545      debug_only(has_disp32 = true); // has both kinds of operands!
546      break;
547
548    case 0x70: // pshufd r, r/a, #8
549      debug_only(has_disp32 = true); // has both kinds of operands!
550    case 0x73: // psrldq r, #8
551      tail_size = 1;
552      break;
553
554    case 0x12: // movlps
555    case 0x28: // movaps
556    case 0x2E: // ucomiss
557    case 0x2F: // comiss
558    case 0x54: // andps
559    case 0x55: // andnps
560    case 0x56: // orps
561    case 0x57: // xorps
562    case 0x6E: // movd
563    case 0x7E: // movd
564    case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
565      debug_only(has_disp32 = true);
566      break;
567
568    case 0xAD: // shrd r, a, %cl
569    case 0xAF: // imul r, a
570    case 0xBE: // movsbl r, a (movsxb)
571    case 0xBF: // movswl r, a (movsxw)
572    case 0xB6: // movzbl r, a (movzxb)
573    case 0xB7: // movzwl r, a (movzxw)
574    case REP16(0x40): // cmovl cc, r, a
575    case 0xB0: // cmpxchgb
576    case 0xB1: // cmpxchg
577    case 0xC1: // xaddl
578    case 0xC7: // cmpxchg8
579    case REP16(0x90): // setcc a
580      debug_only(has_disp32 = true);
581      // fall out of the switch to decode the address
582      break;
583
584    case 0xC4: // pinsrw r, a, #8
585      debug_only(has_disp32 = true);
586    case 0xC5: // pextrw r, r, #8
587      tail_size = 1;  // the imm8
588      break;
589
590    case 0xAC: // shrd r, a, #8
591      debug_only(has_disp32 = true);
592      tail_size = 1;  // the imm8
593      break;
594
595    case REP16(0x80): // jcc rdisp32
596      if (which == end_pc_operand)  return ip + 4;
597      assert(which == call32_operand, "jcc has no disp32 or imm");
598      return ip;
599    default:
600      ShouldNotReachHere();
601    }
602    break;
603
604  case 0x81: // addl a, #32; addl r, #32
605    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
606    // on 32bit in the case of cmpl, the imm might be an oop
607    tail_size = 4;
608    debug_only(has_disp32 = true); // has both kinds of operands!
609    break;
610
611  case 0x83: // addl a, #8; addl r, #8
612    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
613    debug_only(has_disp32 = true); // has both kinds of operands!
614    tail_size = 1;
615    break;
616
617  case 0x9B:
618    switch (0xFF & *ip++) {
619    case 0xD9: // fnstcw a
620      debug_only(has_disp32 = true);
621      break;
622    default:
623      ShouldNotReachHere();
624    }
625    break;
626
627  case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
628  case REP4(0x10): // adc...
629  case REP4(0x20): // and...
630  case REP4(0x30): // xor...
631  case REP4(0x08): // or...
632  case REP4(0x18): // sbb...
633  case REP4(0x28): // sub...
634  case 0xF7: // mull a
635  case 0x8D: // lea r, a
636  case 0x87: // xchg r, a
637  case REP4(0x38): // cmp...
638  case 0x85: // test r, a
639    debug_only(has_disp32 = true); // has both kinds of operands!
640    break;
641
642  case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
643  case 0xC6: // movb a, #8
644  case 0x80: // cmpb a, #8
645  case 0x6B: // imul r, a, #8
646    debug_only(has_disp32 = true); // has both kinds of operands!
647    tail_size = 1; // the imm8
648    break;
649
650  case 0xC4: // VEX_3bytes
651  case 0xC5: // VEX_2bytes
652    assert((UseAVX > 0), "shouldn't have VEX prefix");
653    assert(ip == inst+1, "no prefixes allowed");
654    // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
655    // but they have prefix 0x0F and processed when 0x0F processed above.
656    //
657    // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
658    // instructions (these instructions are not supported in 64-bit mode).
659    // To distinguish them bits [7:6] are set in the VEX second byte since
660    // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
661    // those VEX bits REX and vvvv bits are inverted.
662    //
663    // Fortunately C2 doesn't generate these instructions so we don't need
664    // to check for them in product version.
665
666    // Check second byte
667    NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
668
669    // First byte
670    if ((0xFF & *inst) == VEX_3bytes) {
671      ip++; // third byte
672      is_64bit = ((VEX_W & *ip) == VEX_W);
673    }
674    ip++; // opcode
675    // To find the end of instruction (which == end_pc_operand).
676    switch (0xFF & *ip) {
677    case 0x61: // pcmpestri r, r/a, #8
678    case 0x70: // pshufd r, r/a, #8
679    case 0x73: // psrldq r, #8
680      tail_size = 1;  // the imm8
681      break;
682    default:
683      break;
684    }
685    ip++; // skip opcode
686    debug_only(has_disp32 = true); // has both kinds of operands!
687    break;
688
689  case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
690  case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
691  case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
692  case 0xDD: // fld_d a; fst_d a; fstp_d a
693  case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
694  case 0xDF: // fild_d a; fistp_d a
695  case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
696  case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
697  case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
698    debug_only(has_disp32 = true);
699    break;
700
701  case 0xE8: // call rdisp32
702  case 0xE9: // jmp  rdisp32
703    if (which == end_pc_operand)  return ip + 4;
704    assert(which == call32_operand, "call has no disp32 or imm");
705    return ip;
706
707  case 0xF0:                    // Lock
708    assert(os::is_MP(), "only on MP");
709    goto again_after_prefix;
710
711  case 0xF3:                    // For SSE
712  case 0xF2:                    // For SSE2
713    switch (0xFF & *ip++) {
714    case REX:
715    case REX_B:
716    case REX_X:
717    case REX_XB:
718    case REX_R:
719    case REX_RB:
720    case REX_RX:
721    case REX_RXB:
722    case REX_W:
723    case REX_WB:
724    case REX_WX:
725    case REX_WXB:
726    case REX_WR:
727    case REX_WRB:
728    case REX_WRX:
729    case REX_WRXB:
730      NOT_LP64(assert(false, "found 64bit prefix"));
731      ip++;
732    default:
733      ip++;
734    }
735    debug_only(has_disp32 = true); // has both kinds of operands!
736    break;
737
738  default:
739    ShouldNotReachHere();
740
741#undef REP8
742#undef REP16
743  }
744
745  assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
746#ifdef _LP64
747  assert(which != imm_operand, "instruction is not a movq reg, imm64");
748#else
749  // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
750  assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
751#endif // LP64
752  assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
753
754  // parse the output of emit_operand
755  int op2 = 0xFF & *ip++;
756  int base = op2 & 0x07;
757  int op3 = -1;
758  const int b100 = 4;
759  const int b101 = 5;
760  if (base == b100 && (op2 >> 6) != 3) {
761    op3 = 0xFF & *ip++;
762    base = op3 & 0x07;   // refetch the base
763  }
764  // now ip points at the disp (if any)
765
766  switch (op2 >> 6) {
767  case 0:
768    // [00 reg  100][ss index base]
769    // [00 reg  100][00   100  esp]
770    // [00 reg base]
771    // [00 reg  100][ss index  101][disp32]
772    // [00 reg  101]               [disp32]
773
774    if (base == b101) {
775      if (which == disp32_operand)
776        return ip;              // caller wants the disp32
777      ip += 4;                  // skip the disp32
778    }
779    break;
780
781  case 1:
782    // [01 reg  100][ss index base][disp8]
783    // [01 reg  100][00   100  esp][disp8]
784    // [01 reg base]               [disp8]
785    ip += 1;                    // skip the disp8
786    break;
787
788  case 2:
789    // [10 reg  100][ss index base][disp32]
790    // [10 reg  100][00   100  esp][disp32]
791    // [10 reg base]               [disp32]
792    if (which == disp32_operand)
793      return ip;                // caller wants the disp32
794    ip += 4;                    // skip the disp32
795    break;
796
797  case 3:
798    // [11 reg base]  (not a memory addressing mode)
799    break;
800  }
801
802  if (which == end_pc_operand) {
803    return ip + tail_size;
804  }
805
806#ifdef _LP64
807  assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
808#else
809  assert(which == imm_operand, "instruction has only an imm field");
810#endif // LP64
811  return ip;
812}
813
814address Assembler::locate_next_instruction(address inst) {
815  // Secretly share code with locate_operand:
816  return locate_operand(inst, end_pc_operand);
817}
818
819
820#ifdef ASSERT
821void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
822  address inst = inst_mark();
823  assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
824  address opnd;
825
826  Relocation* r = rspec.reloc();
827  if (r->type() == relocInfo::none) {
828    return;
829  } else if (r->is_call() || format == call32_operand) {
830    // assert(format == imm32_operand, "cannot specify a nonzero format");
831    opnd = locate_operand(inst, call32_operand);
832  } else if (r->is_data()) {
833    assert(format == imm_operand || format == disp32_operand
834           LP64_ONLY(|| format == narrow_oop_operand), "format ok");
835    opnd = locate_operand(inst, (WhichOperand)format);
836  } else {
837    assert(format == imm_operand, "cannot specify a format");
838    return;
839  }
840  assert(opnd == pc(), "must put operand where relocs can find it");
841}
842#endif // ASSERT
843
844void Assembler::emit_operand32(Register reg, Address adr) {
845  assert(reg->encoding() < 8, "no extended registers");
846  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
847  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
848               adr._rspec);
849}
850
851void Assembler::emit_operand(Register reg, Address adr,
852                             int rip_relative_correction) {
853  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
854               adr._rspec,
855               rip_relative_correction);
856}
857
858void Assembler::emit_operand(XMMRegister reg, Address adr) {
859  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
860               adr._rspec);
861}
862
863// MMX operations
864void Assembler::emit_operand(MMXRegister reg, Address adr) {
865  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
866  emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
867}
868
869// work around gcc (3.2.1-7a) bug
870void Assembler::emit_operand(Address adr, MMXRegister reg) {
871  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
872  emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
873}
874
875
876void Assembler::emit_farith(int b1, int b2, int i) {
877  assert(isByte(b1) && isByte(b2), "wrong opcode");
878  assert(0 <= i &&  i < 8, "illegal stack offset");
879  emit_int8(b1);
880  emit_int8(b2 + i);
881}
882
883
884// Now the Assembler instructions (identical for 32/64 bits)
885
886void Assembler::adcl(Address dst, int32_t imm32) {
887  InstructionMark im(this);
888  prefix(dst);
889  emit_arith_operand(0x81, rdx, dst, imm32);
890}
891
892void Assembler::adcl(Address dst, Register src) {
893  InstructionMark im(this);
894  prefix(dst, src);
895  emit_int8(0x11);
896  emit_operand(src, dst);
897}
898
899void Assembler::adcl(Register dst, int32_t imm32) {
900  prefix(dst);
901  emit_arith(0x81, 0xD0, dst, imm32);
902}
903
904void Assembler::adcl(Register dst, Address src) {
905  InstructionMark im(this);
906  prefix(src, dst);
907  emit_int8(0x13);
908  emit_operand(dst, src);
909}
910
911void Assembler::adcl(Register dst, Register src) {
912  (void) prefix_and_encode(dst->encoding(), src->encoding());
913  emit_arith(0x13, 0xC0, dst, src);
914}
915
916void Assembler::addl(Address dst, int32_t imm32) {
917  InstructionMark im(this);
918  prefix(dst);
919  emit_arith_operand(0x81, rax, dst, imm32);
920}
921
922void Assembler::addl(Address dst, Register src) {
923  InstructionMark im(this);
924  prefix(dst, src);
925  emit_int8(0x01);
926  emit_operand(src, dst);
927}
928
929void Assembler::addl(Register dst, int32_t imm32) {
930  prefix(dst);
931  emit_arith(0x81, 0xC0, dst, imm32);
932}
933
934void Assembler::addl(Register dst, Address src) {
935  InstructionMark im(this);
936  prefix(src, dst);
937  emit_int8(0x03);
938  emit_operand(dst, src);
939}
940
941void Assembler::addl(Register dst, Register src) {
942  (void) prefix_and_encode(dst->encoding(), src->encoding());
943  emit_arith(0x03, 0xC0, dst, src);
944}
945
946void Assembler::addr_nop_4() {
947  assert(UseAddressNop, "no CPU support");
948  // 4 bytes: NOP DWORD PTR [EAX+0]
949  emit_int8(0x0F);
950  emit_int8(0x1F);
951  emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
952  emit_int8(0);    // 8-bits offset (1 byte)
953}
954
955void Assembler::addr_nop_5() {
956  assert(UseAddressNop, "no CPU support");
957  // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
958  emit_int8(0x0F);
959  emit_int8(0x1F);
960  emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
961  emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
962  emit_int8(0);    // 8-bits offset (1 byte)
963}
964
965void Assembler::addr_nop_7() {
966  assert(UseAddressNop, "no CPU support");
967  // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
968  emit_int8(0x0F);
969  emit_int8(0x1F);
970  emit_int8((unsigned char)0x80);
971                   // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
972  emit_int32(0);   // 32-bits offset (4 bytes)
973}
974
975void Assembler::addr_nop_8() {
976  assert(UseAddressNop, "no CPU support");
977  // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
978  emit_int8(0x0F);
979  emit_int8(0x1F);
980  emit_int8((unsigned char)0x84);
981                   // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
982  emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
983  emit_int32(0);   // 32-bits offset (4 bytes)
984}
985
986void Assembler::addsd(XMMRegister dst, XMMRegister src) {
987  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
988  emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
989}
990
991void Assembler::addsd(XMMRegister dst, Address src) {
992  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
993  emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
994}
995
996void Assembler::addss(XMMRegister dst, XMMRegister src) {
997  NOT_LP64(assert(VM_Version::supports_sse(), ""));
998  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
999}
1000
1001void Assembler::addss(XMMRegister dst, Address src) {
1002  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1003  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1004}
1005
1006void Assembler::aesdec(XMMRegister dst, Address src) {
1007  assert(VM_Version::supports_aes(), "");
1008  InstructionMark im(this);
1009  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1010  emit_int8((unsigned char)0xDE);
1011  emit_operand(dst, src);
1012}
1013
1014void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1015  assert(VM_Version::supports_aes(), "");
1016  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1017  emit_int8((unsigned char)0xDE);
1018  emit_int8(0xC0 | encode);
1019}
1020
1021void Assembler::aesdeclast(XMMRegister dst, Address src) {
1022  assert(VM_Version::supports_aes(), "");
1023  InstructionMark im(this);
1024  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1025  emit_int8((unsigned char)0xDF);
1026  emit_operand(dst, src);
1027}
1028
1029void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1030  assert(VM_Version::supports_aes(), "");
1031  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1032  emit_int8((unsigned char)0xDF);
1033  emit_int8((unsigned char)(0xC0 | encode));
1034}
1035
1036void Assembler::aesenc(XMMRegister dst, Address src) {
1037  assert(VM_Version::supports_aes(), "");
1038  InstructionMark im(this);
1039  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1040  emit_int8((unsigned char)0xDC);
1041  emit_operand(dst, src);
1042}
1043
1044void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1045  assert(VM_Version::supports_aes(), "");
1046  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1047  emit_int8((unsigned char)0xDC);
1048  emit_int8(0xC0 | encode);
1049}
1050
1051void Assembler::aesenclast(XMMRegister dst, Address src) {
1052  assert(VM_Version::supports_aes(), "");
1053  InstructionMark im(this);
1054  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1055  emit_int8((unsigned char)0xDD);
1056  emit_operand(dst, src);
1057}
1058
1059void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1060  assert(VM_Version::supports_aes(), "");
1061  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1062  emit_int8((unsigned char)0xDD);
1063  emit_int8((unsigned char)(0xC0 | encode));
1064}
1065
1066
1067void Assembler::andl(Address dst, int32_t imm32) {
1068  InstructionMark im(this);
1069  prefix(dst);
1070  emit_int8((unsigned char)0x81);
1071  emit_operand(rsp, dst, 4);
1072  emit_int32(imm32);
1073}
1074
1075void Assembler::andl(Register dst, int32_t imm32) {
1076  prefix(dst);
1077  emit_arith(0x81, 0xE0, dst, imm32);
1078}
1079
1080void Assembler::andl(Register dst, Address src) {
1081  InstructionMark im(this);
1082  prefix(src, dst);
1083  emit_int8(0x23);
1084  emit_operand(dst, src);
1085}
1086
1087void Assembler::andl(Register dst, Register src) {
1088  (void) prefix_and_encode(dst->encoding(), src->encoding());
1089  emit_arith(0x23, 0xC0, dst, src);
1090}
1091
1092void Assembler::andnl(Register dst, Register src1, Register src2) {
1093  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1094  int encode = vex_prefix_0F38_and_encode(dst, src1, src2);
1095  emit_int8((unsigned char)0xF2);
1096  emit_int8((unsigned char)(0xC0 | encode));
1097}
1098
1099void Assembler::andnl(Register dst, Register src1, Address src2) {
1100  InstructionMark im(this);
1101  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1102  vex_prefix_0F38(dst, src1, src2);
1103  emit_int8((unsigned char)0xF2);
1104  emit_operand(dst, src2);
1105}
1106
1107void Assembler::bsfl(Register dst, Register src) {
1108  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1109  emit_int8(0x0F);
1110  emit_int8((unsigned char)0xBC);
1111  emit_int8((unsigned char)(0xC0 | encode));
1112}
1113
1114void Assembler::bsrl(Register dst, Register src) {
1115  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1116  emit_int8(0x0F);
1117  emit_int8((unsigned char)0xBD);
1118  emit_int8((unsigned char)(0xC0 | encode));
1119}
1120
1121void Assembler::bswapl(Register reg) { // bswap
1122  int encode = prefix_and_encode(reg->encoding());
1123  emit_int8(0x0F);
1124  emit_int8((unsigned char)(0xC8 | encode));
1125}
1126
1127void Assembler::blsil(Register dst, Register src) {
1128  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1129  int encode = vex_prefix_0F38_and_encode(rbx, dst, src);
1130  emit_int8((unsigned char)0xF3);
1131  emit_int8((unsigned char)(0xC0 | encode));
1132}
1133
1134void Assembler::blsil(Register dst, Address src) {
1135  InstructionMark im(this);
1136  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1137  vex_prefix_0F38(rbx, dst, src);
1138  emit_int8((unsigned char)0xF3);
1139  emit_operand(rbx, src);
1140}
1141
1142void Assembler::blsmskl(Register dst, Register src) {
1143  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1144  int encode = vex_prefix_0F38_and_encode(rdx, dst, src);
1145  emit_int8((unsigned char)0xF3);
1146  emit_int8((unsigned char)(0xC0 | encode));
1147}
1148
1149void Assembler::blsmskl(Register dst, Address src) {
1150  InstructionMark im(this);
1151  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1152  vex_prefix_0F38(rdx, dst, src);
1153  emit_int8((unsigned char)0xF3);
1154  emit_operand(rdx, src);
1155}
1156
1157void Assembler::blsrl(Register dst, Register src) {
1158  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1159  int encode = vex_prefix_0F38_and_encode(rcx, dst, src);
1160  emit_int8((unsigned char)0xF3);
1161  emit_int8((unsigned char)(0xC0 | encode));
1162}
1163
1164void Assembler::blsrl(Register dst, Address src) {
1165  InstructionMark im(this);
1166  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1167  vex_prefix_0F38(rcx, dst, src);
1168  emit_int8((unsigned char)0xF3);
1169  emit_operand(rcx, src);
1170}
1171
1172void Assembler::call(Label& L, relocInfo::relocType rtype) {
1173  // suspect disp32 is always good
1174  int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1175
1176  if (L.is_bound()) {
1177    const int long_size = 5;
1178    int offs = (int)( target(L) - pc() );
1179    assert(offs <= 0, "assembler error");
1180    InstructionMark im(this);
1181    // 1110 1000 #32-bit disp
1182    emit_int8((unsigned char)0xE8);
1183    emit_data(offs - long_size, rtype, operand);
1184  } else {
1185    InstructionMark im(this);
1186    // 1110 1000 #32-bit disp
1187    L.add_patch_at(code(), locator());
1188
1189    emit_int8((unsigned char)0xE8);
1190    emit_data(int(0), rtype, operand);
1191  }
1192}
1193
1194void Assembler::call(Register dst) {
1195  int encode = prefix_and_encode(dst->encoding());
1196  emit_int8((unsigned char)0xFF);
1197  emit_int8((unsigned char)(0xD0 | encode));
1198}
1199
1200
1201void Assembler::call(Address adr) {
1202  InstructionMark im(this);
1203  prefix(adr);
1204  emit_int8((unsigned char)0xFF);
1205  emit_operand(rdx, adr);
1206}
1207
1208void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1209  assert(entry != NULL, "call most probably wrong");
1210  InstructionMark im(this);
1211  emit_int8((unsigned char)0xE8);
1212  intptr_t disp = entry - (pc() + sizeof(int32_t));
1213  assert(is_simm32(disp), "must be 32bit offset (call2)");
1214  // Technically, should use call32_operand, but this format is
1215  // implied by the fact that we're emitting a call instruction.
1216
1217  int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1218  emit_data((int) disp, rspec, operand);
1219}
1220
1221void Assembler::cdql() {
1222  emit_int8((unsigned char)0x99);
1223}
1224
1225void Assembler::cld() {
1226  emit_int8((unsigned char)0xFC);
1227}
1228
1229void Assembler::cmovl(Condition cc, Register dst, Register src) {
1230  NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1231  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1232  emit_int8(0x0F);
1233  emit_int8(0x40 | cc);
1234  emit_int8((unsigned char)(0xC0 | encode));
1235}
1236
1237
1238void Assembler::cmovl(Condition cc, Register dst, Address src) {
1239  NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1240  prefix(src, dst);
1241  emit_int8(0x0F);
1242  emit_int8(0x40 | cc);
1243  emit_operand(dst, src);
1244}
1245
1246void Assembler::cmpb(Address dst, int imm8) {
1247  InstructionMark im(this);
1248  prefix(dst);
1249  emit_int8((unsigned char)0x80);
1250  emit_operand(rdi, dst, 1);
1251  emit_int8(imm8);
1252}
1253
1254void Assembler::cmpl(Address dst, int32_t imm32) {
1255  InstructionMark im(this);
1256  prefix(dst);
1257  emit_int8((unsigned char)0x81);
1258  emit_operand(rdi, dst, 4);
1259  emit_int32(imm32);
1260}
1261
1262void Assembler::cmpl(Register dst, int32_t imm32) {
1263  prefix(dst);
1264  emit_arith(0x81, 0xF8, dst, imm32);
1265}
1266
1267void Assembler::cmpl(Register dst, Register src) {
1268  (void) prefix_and_encode(dst->encoding(), src->encoding());
1269  emit_arith(0x3B, 0xC0, dst, src);
1270}
1271
1272
1273void Assembler::cmpl(Register dst, Address  src) {
1274  InstructionMark im(this);
1275  prefix(src, dst);
1276  emit_int8((unsigned char)0x3B);
1277  emit_operand(dst, src);
1278}
1279
1280void Assembler::cmpw(Address dst, int imm16) {
1281  InstructionMark im(this);
1282  assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1283  emit_int8(0x66);
1284  emit_int8((unsigned char)0x81);
1285  emit_operand(rdi, dst, 2);
1286  emit_int16(imm16);
1287}
1288
1289// The 32-bit cmpxchg compares the value at adr with the contents of rax,
1290// and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1291// The ZF is set if the compared values were equal, and cleared otherwise.
1292void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1293  InstructionMark im(this);
1294  prefix(adr, reg);
1295  emit_int8(0x0F);
1296  emit_int8((unsigned char)0xB1);
1297  emit_operand(reg, adr);
1298}
1299
1300void Assembler::comisd(XMMRegister dst, Address src) {
1301  // NOTE: dbx seems to decode this as comiss even though the
1302  // 0x66 is there. Strangly ucomisd comes out correct
1303  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1304  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1305}
1306
1307void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1308  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1309  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1310}
1311
1312void Assembler::comiss(XMMRegister dst, Address src) {
1313  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1314  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
1315}
1316
1317void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1318  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1319  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
1320}
1321
1322void Assembler::cpuid() {
1323  emit_int8(0x0F);
1324  emit_int8((unsigned char)0xA2);
1325}
1326
1327void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1328  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1329  emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
1330}
1331
1332void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1333  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1334  emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE);
1335}
1336
1337void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1338  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1339  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1340}
1341
1342void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1343  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1344  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1345}
1346
1347void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1348  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1349  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
1350  emit_int8(0x2A);
1351  emit_int8((unsigned char)(0xC0 | encode));
1352}
1353
1354void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1355  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1356  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
1357}
1358
1359void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1360  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1361  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
1362  emit_int8(0x2A);
1363  emit_int8((unsigned char)(0xC0 | encode));
1364}
1365
1366void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1367  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1368  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3);
1369}
1370
1371void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1372  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1373  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1374}
1375
1376void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1377  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1378  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1379}
1380
1381
1382void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1383  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1384  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
1385  emit_int8(0x2C);
1386  emit_int8((unsigned char)(0xC0 | encode));
1387}
1388
1389void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1390  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1391  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
1392  emit_int8(0x2C);
1393  emit_int8((unsigned char)(0xC0 | encode));
1394}
1395
1396void Assembler::decl(Address dst) {
1397  // Don't use it directly. Use MacroAssembler::decrement() instead.
1398  InstructionMark im(this);
1399  prefix(dst);
1400  emit_int8((unsigned char)0xFF);
1401  emit_operand(rcx, dst);
1402}
1403
1404void Assembler::divsd(XMMRegister dst, Address src) {
1405  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1406  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1407}
1408
1409void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1410  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1411  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1412}
1413
1414void Assembler::divss(XMMRegister dst, Address src) {
1415  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1416  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1417}
1418
1419void Assembler::divss(XMMRegister dst, XMMRegister src) {
1420  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1421  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1422}
1423
1424void Assembler::emms() {
1425  NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1426  emit_int8(0x0F);
1427  emit_int8(0x77);
1428}
1429
1430void Assembler::hlt() {
1431  emit_int8((unsigned char)0xF4);
1432}
1433
1434void Assembler::idivl(Register src) {
1435  int encode = prefix_and_encode(src->encoding());
1436  emit_int8((unsigned char)0xF7);
1437  emit_int8((unsigned char)(0xF8 | encode));
1438}
1439
1440void Assembler::divl(Register src) { // Unsigned
1441  int encode = prefix_and_encode(src->encoding());
1442  emit_int8((unsigned char)0xF7);
1443  emit_int8((unsigned char)(0xF0 | encode));
1444}
1445
1446void Assembler::imull(Register dst, Register src) {
1447  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1448  emit_int8(0x0F);
1449  emit_int8((unsigned char)0xAF);
1450  emit_int8((unsigned char)(0xC0 | encode));
1451}
1452
1453
1454void Assembler::imull(Register dst, Register src, int value) {
1455  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1456  if (is8bit(value)) {
1457    emit_int8(0x6B);
1458    emit_int8((unsigned char)(0xC0 | encode));
1459    emit_int8(value & 0xFF);
1460  } else {
1461    emit_int8(0x69);
1462    emit_int8((unsigned char)(0xC0 | encode));
1463    emit_int32(value);
1464  }
1465}
1466
1467void Assembler::imull(Register dst, Address src) {
1468  InstructionMark im(this);
1469  prefix(src, dst);
1470  emit_int8(0x0F);
1471  emit_int8((unsigned char) 0xAF);
1472  emit_operand(dst, src);
1473}
1474
1475
1476void Assembler::incl(Address dst) {
1477  // Don't use it directly. Use MacroAssembler::increment() instead.
1478  InstructionMark im(this);
1479  prefix(dst);
1480  emit_int8((unsigned char)0xFF);
1481  emit_operand(rax, dst);
1482}
1483
1484void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1485  InstructionMark im(this);
1486  assert((0 <= cc) && (cc < 16), "illegal cc");
1487  if (L.is_bound()) {
1488    address dst = target(L);
1489    assert(dst != NULL, "jcc most probably wrong");
1490
1491    const int short_size = 2;
1492    const int long_size = 6;
1493    intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1494    if (maybe_short && is8bit(offs - short_size)) {
1495      // 0111 tttn #8-bit disp
1496      emit_int8(0x70 | cc);
1497      emit_int8((offs - short_size) & 0xFF);
1498    } else {
1499      // 0000 1111 1000 tttn #32-bit disp
1500      assert(is_simm32(offs - long_size),
1501             "must be 32bit offset (call4)");
1502      emit_int8(0x0F);
1503      emit_int8((unsigned char)(0x80 | cc));
1504      emit_int32(offs - long_size);
1505    }
1506  } else {
1507    // Note: could eliminate cond. jumps to this jump if condition
1508    //       is the same however, seems to be rather unlikely case.
1509    // Note: use jccb() if label to be bound is very close to get
1510    //       an 8-bit displacement
1511    L.add_patch_at(code(), locator());
1512    emit_int8(0x0F);
1513    emit_int8((unsigned char)(0x80 | cc));
1514    emit_int32(0);
1515  }
1516}
1517
1518void Assembler::jccb(Condition cc, Label& L) {
1519  if (L.is_bound()) {
1520    const int short_size = 2;
1521    address entry = target(L);
1522#ifdef ASSERT
1523    intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
1524    intptr_t delta = short_branch_delta();
1525    if (delta != 0) {
1526      dist += (dist < 0 ? (-delta) :delta);
1527    }
1528    assert(is8bit(dist), "Dispacement too large for a short jmp");
1529#endif
1530    intptr_t offs = (intptr_t)entry - (intptr_t)pc();
1531    // 0111 tttn #8-bit disp
1532    emit_int8(0x70 | cc);
1533    emit_int8((offs - short_size) & 0xFF);
1534  } else {
1535    InstructionMark im(this);
1536    L.add_patch_at(code(), locator());
1537    emit_int8(0x70 | cc);
1538    emit_int8(0);
1539  }
1540}
1541
1542void Assembler::jmp(Address adr) {
1543  InstructionMark im(this);
1544  prefix(adr);
1545  emit_int8((unsigned char)0xFF);
1546  emit_operand(rsp, adr);
1547}
1548
1549void Assembler::jmp(Label& L, bool maybe_short) {
1550  if (L.is_bound()) {
1551    address entry = target(L);
1552    assert(entry != NULL, "jmp most probably wrong");
1553    InstructionMark im(this);
1554    const int short_size = 2;
1555    const int long_size = 5;
1556    intptr_t offs = entry - pc();
1557    if (maybe_short && is8bit(offs - short_size)) {
1558      emit_int8((unsigned char)0xEB);
1559      emit_int8((offs - short_size) & 0xFF);
1560    } else {
1561      emit_int8((unsigned char)0xE9);
1562      emit_int32(offs - long_size);
1563    }
1564  } else {
1565    // By default, forward jumps are always 32-bit displacements, since
1566    // we can't yet know where the label will be bound.  If you're sure that
1567    // the forward jump will not run beyond 256 bytes, use jmpb to
1568    // force an 8-bit displacement.
1569    InstructionMark im(this);
1570    L.add_patch_at(code(), locator());
1571    emit_int8((unsigned char)0xE9);
1572    emit_int32(0);
1573  }
1574}
1575
1576void Assembler::jmp(Register entry) {
1577  int encode = prefix_and_encode(entry->encoding());
1578  emit_int8((unsigned char)0xFF);
1579  emit_int8((unsigned char)(0xE0 | encode));
1580}
1581
1582void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1583  InstructionMark im(this);
1584  emit_int8((unsigned char)0xE9);
1585  assert(dest != NULL, "must have a target");
1586  intptr_t disp = dest - (pc() + sizeof(int32_t));
1587  assert(is_simm32(disp), "must be 32bit offset (jmp)");
1588  emit_data(disp, rspec.reloc(), call32_operand);
1589}
1590
1591void Assembler::jmpb(Label& L) {
1592  if (L.is_bound()) {
1593    const int short_size = 2;
1594    address entry = target(L);
1595    assert(entry != NULL, "jmp most probably wrong");
1596#ifdef ASSERT
1597    intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
1598    intptr_t delta = short_branch_delta();
1599    if (delta != 0) {
1600      dist += (dist < 0 ? (-delta) :delta);
1601    }
1602    assert(is8bit(dist), "Dispacement too large for a short jmp");
1603#endif
1604    intptr_t offs = entry - pc();
1605    emit_int8((unsigned char)0xEB);
1606    emit_int8((offs - short_size) & 0xFF);
1607  } else {
1608    InstructionMark im(this);
1609    L.add_patch_at(code(), locator());
1610    emit_int8((unsigned char)0xEB);
1611    emit_int8(0);
1612  }
1613}
1614
1615void Assembler::ldmxcsr( Address src) {
1616  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1617  InstructionMark im(this);
1618  prefix(src);
1619  emit_int8(0x0F);
1620  emit_int8((unsigned char)0xAE);
1621  emit_operand(as_Register(2), src);
1622}
1623
1624void Assembler::leal(Register dst, Address src) {
1625  InstructionMark im(this);
1626#ifdef _LP64
1627  emit_int8(0x67); // addr32
1628  prefix(src, dst);
1629#endif // LP64
1630  emit_int8((unsigned char)0x8D);
1631  emit_operand(dst, src);
1632}
1633
1634void Assembler::lfence() {
1635  emit_int8(0x0F);
1636  emit_int8((unsigned char)0xAE);
1637  emit_int8((unsigned char)0xE8);
1638}
1639
1640void Assembler::lock() {
1641  emit_int8((unsigned char)0xF0);
1642}
1643
1644void Assembler::lzcntl(Register dst, Register src) {
1645  assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
1646  emit_int8((unsigned char)0xF3);
1647  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1648  emit_int8(0x0F);
1649  emit_int8((unsigned char)0xBD);
1650  emit_int8((unsigned char)(0xC0 | encode));
1651}
1652
1653// Emit mfence instruction
1654void Assembler::mfence() {
1655  NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1656  emit_int8(0x0F);
1657  emit_int8((unsigned char)0xAE);
1658  emit_int8((unsigned char)0xF0);
1659}
1660
1661void Assembler::mov(Register dst, Register src) {
1662  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1663}
1664
1665void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1666  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1667  emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
1668}
1669
1670void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1671  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1672  emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
1673}
1674
1675void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
1676  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1677  int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE);
1678  emit_int8(0x16);
1679  emit_int8((unsigned char)(0xC0 | encode));
1680}
1681
1682void Assembler::movb(Register dst, Address src) {
1683  NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1684  InstructionMark im(this);
1685  prefix(src, dst, true);
1686  emit_int8((unsigned char)0x8A);
1687  emit_operand(dst, src);
1688}
1689
1690
1691void Assembler::movb(Address dst, int imm8) {
1692  InstructionMark im(this);
1693   prefix(dst);
1694  emit_int8((unsigned char)0xC6);
1695  emit_operand(rax, dst, 1);
1696  emit_int8(imm8);
1697}
1698
1699
1700void Assembler::movb(Address dst, Register src) {
1701  assert(src->has_byte_register(), "must have byte register");
1702  InstructionMark im(this);
1703  prefix(dst, src, true);
1704  emit_int8((unsigned char)0x88);
1705  emit_operand(src, dst);
1706}
1707
1708void Assembler::movdl(XMMRegister dst, Register src) {
1709  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1710  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
1711  emit_int8(0x6E);
1712  emit_int8((unsigned char)(0xC0 | encode));
1713}
1714
1715void Assembler::movdl(Register dst, XMMRegister src) {
1716  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1717  // swap src/dst to get correct prefix
1718  int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
1719  emit_int8(0x7E);
1720  emit_int8((unsigned char)(0xC0 | encode));
1721}
1722
1723void Assembler::movdl(XMMRegister dst, Address src) {
1724  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1725  InstructionMark im(this);
1726  simd_prefix(dst, src, VEX_SIMD_66);
1727  emit_int8(0x6E);
1728  emit_operand(dst, src);
1729}
1730
1731void Assembler::movdl(Address dst, XMMRegister src) {
1732  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1733  InstructionMark im(this);
1734  simd_prefix(dst, src, VEX_SIMD_66);
1735  emit_int8(0x7E);
1736  emit_operand(src, dst);
1737}
1738
1739void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1740  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1741  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
1742}
1743
1744void Assembler::movdqa(XMMRegister dst, Address src) {
1745  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1746  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
1747}
1748
1749void Assembler::movdqu(XMMRegister dst, Address src) {
1750  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1751  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
1752}
1753
1754void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1755  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1756  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
1757}
1758
1759void Assembler::movdqu(Address dst, XMMRegister src) {
1760  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1761  InstructionMark im(this);
1762  simd_prefix(dst, src, VEX_SIMD_F3);
1763  emit_int8(0x7F);
1764  emit_operand(src, dst);
1765}
1766
1767// Move Unaligned 256bit Vector
1768void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
1769  assert(UseAVX > 0, "");
1770  bool vector256 = true;
1771  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);
1772  emit_int8(0x6F);
1773  emit_int8((unsigned char)(0xC0 | encode));
1774}
1775
1776void Assembler::vmovdqu(XMMRegister dst, Address src) {
1777  assert(UseAVX > 0, "");
1778  InstructionMark im(this);
1779  bool vector256 = true;
1780  vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
1781  emit_int8(0x6F);
1782  emit_operand(dst, src);
1783}
1784
1785void Assembler::vmovdqu(Address dst, XMMRegister src) {
1786  assert(UseAVX > 0, "");
1787  InstructionMark im(this);
1788  bool vector256 = true;
1789  // swap src<->dst for encoding
1790  assert(src != xnoreg, "sanity");
1791  vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256);
1792  emit_int8(0x7F);
1793  emit_operand(src, dst);
1794}
1795
1796// Uses zero extension on 64bit
1797
1798void Assembler::movl(Register dst, int32_t imm32) {
1799  int encode = prefix_and_encode(dst->encoding());
1800  emit_int8((unsigned char)(0xB8 | encode));
1801  emit_int32(imm32);
1802}
1803
1804void Assembler::movl(Register dst, Register src) {
1805  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1806  emit_int8((unsigned char)0x8B);
1807  emit_int8((unsigned char)(0xC0 | encode));
1808}
1809
1810void Assembler::movl(Register dst, Address src) {
1811  InstructionMark im(this);
1812  prefix(src, dst);
1813  emit_int8((unsigned char)0x8B);
1814  emit_operand(dst, src);
1815}
1816
1817void Assembler::movl(Address dst, int32_t imm32) {
1818  InstructionMark im(this);
1819  prefix(dst);
1820  emit_int8((unsigned char)0xC7);
1821  emit_operand(rax, dst, 4);
1822  emit_int32(imm32);
1823}
1824
1825void Assembler::movl(Address dst, Register src) {
1826  InstructionMark im(this);
1827  prefix(dst, src);
1828  emit_int8((unsigned char)0x89);
1829  emit_operand(src, dst);
1830}
1831
1832// New cpus require to use movsd and movss to avoid partial register stall
1833// when loading from memory. But for old Opteron use movlpd instead of movsd.
1834// The selection is done in MacroAssembler::movdbl() and movflt().
1835void Assembler::movlpd(XMMRegister dst, Address src) {
1836  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1837  emit_simd_arith(0x12, dst, src, VEX_SIMD_66);
1838}
1839
1840void Assembler::movq( MMXRegister dst, Address src ) {
1841  assert( VM_Version::supports_mmx(), "" );
1842  emit_int8(0x0F);
1843  emit_int8(0x6F);
1844  emit_operand(dst, src);
1845}
1846
1847void Assembler::movq( Address dst, MMXRegister src ) {
1848  assert( VM_Version::supports_mmx(), "" );
1849  emit_int8(0x0F);
1850  emit_int8(0x7F);
1851  // workaround gcc (3.2.1-7a) bug
1852  // In that version of gcc with only an emit_operand(MMX, Address)
1853  // gcc will tail jump and try and reverse the parameters completely
1854  // obliterating dst in the process. By having a version available
1855  // that doesn't need to swap the args at the tail jump the bug is
1856  // avoided.
1857  emit_operand(dst, src);
1858}
1859
1860void Assembler::movq(XMMRegister dst, Address src) {
1861  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1862  InstructionMark im(this);
1863  simd_prefix(dst, src, VEX_SIMD_F3);
1864  emit_int8(0x7E);
1865  emit_operand(dst, src);
1866}
1867
1868void Assembler::movq(Address dst, XMMRegister src) {
1869  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1870  InstructionMark im(this);
1871  simd_prefix(dst, src, VEX_SIMD_66);
1872  emit_int8((unsigned char)0xD6);
1873  emit_operand(src, dst);
1874}
1875
1876void Assembler::movsbl(Register dst, Address src) { // movsxb
1877  InstructionMark im(this);
1878  prefix(src, dst);
1879  emit_int8(0x0F);
1880  emit_int8((unsigned char)0xBE);
1881  emit_operand(dst, src);
1882}
1883
1884void Assembler::movsbl(Register dst, Register src) { // movsxb
1885  NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1886  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1887  emit_int8(0x0F);
1888  emit_int8((unsigned char)0xBE);
1889  emit_int8((unsigned char)(0xC0 | encode));
1890}
1891
1892void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1893  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1894  emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
1895}
1896
1897void Assembler::movsd(XMMRegister dst, Address src) {
1898  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1899  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
1900}
1901
1902void Assembler::movsd(Address dst, XMMRegister src) {
1903  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1904  InstructionMark im(this);
1905  simd_prefix(dst, src, VEX_SIMD_F2);
1906  emit_int8(0x11);
1907  emit_operand(src, dst);
1908}
1909
1910void Assembler::movss(XMMRegister dst, XMMRegister src) {
1911  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1912  emit_simd_arith(0x10, dst, src, VEX_SIMD_F3);
1913}
1914
1915void Assembler::movss(XMMRegister dst, Address src) {
1916  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1917  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3);
1918}
1919
1920void Assembler::movss(Address dst, XMMRegister src) {
1921  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1922  InstructionMark im(this);
1923  simd_prefix(dst, src, VEX_SIMD_F3);
1924  emit_int8(0x11);
1925  emit_operand(src, dst);
1926}
1927
1928void Assembler::movswl(Register dst, Address src) { // movsxw
1929  InstructionMark im(this);
1930  prefix(src, dst);
1931  emit_int8(0x0F);
1932  emit_int8((unsigned char)0xBF);
1933  emit_operand(dst, src);
1934}
1935
1936void Assembler::movswl(Register dst, Register src) { // movsxw
1937  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1938  emit_int8(0x0F);
1939  emit_int8((unsigned char)0xBF);
1940  emit_int8((unsigned char)(0xC0 | encode));
1941}
1942
1943void Assembler::movw(Address dst, int imm16) {
1944  InstructionMark im(this);
1945
1946  emit_int8(0x66); // switch to 16-bit mode
1947  prefix(dst);
1948  emit_int8((unsigned char)0xC7);
1949  emit_operand(rax, dst, 2);
1950  emit_int16(imm16);
1951}
1952
1953void Assembler::movw(Register dst, Address src) {
1954  InstructionMark im(this);
1955  emit_int8(0x66);
1956  prefix(src, dst);
1957  emit_int8((unsigned char)0x8B);
1958  emit_operand(dst, src);
1959}
1960
1961void Assembler::movw(Address dst, Register src) {
1962  InstructionMark im(this);
1963  emit_int8(0x66);
1964  prefix(dst, src);
1965  emit_int8((unsigned char)0x89);
1966  emit_operand(src, dst);
1967}
1968
1969void Assembler::movzbl(Register dst, Address src) { // movzxb
1970  InstructionMark im(this);
1971  prefix(src, dst);
1972  emit_int8(0x0F);
1973  emit_int8((unsigned char)0xB6);
1974  emit_operand(dst, src);
1975}
1976
1977void Assembler::movzbl(Register dst, Register src) { // movzxb
1978  NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1979  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1980  emit_int8(0x0F);
1981  emit_int8((unsigned char)0xB6);
1982  emit_int8(0xC0 | encode);
1983}
1984
1985void Assembler::movzwl(Register dst, Address src) { // movzxw
1986  InstructionMark im(this);
1987  prefix(src, dst);
1988  emit_int8(0x0F);
1989  emit_int8((unsigned char)0xB7);
1990  emit_operand(dst, src);
1991}
1992
1993void Assembler::movzwl(Register dst, Register src) { // movzxw
1994  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1995  emit_int8(0x0F);
1996  emit_int8((unsigned char)0xB7);
1997  emit_int8(0xC0 | encode);
1998}
1999
2000void Assembler::mull(Address src) {
2001  InstructionMark im(this);
2002  prefix(src);
2003  emit_int8((unsigned char)0xF7);
2004  emit_operand(rsp, src);
2005}
2006
2007void Assembler::mull(Register src) {
2008  int encode = prefix_and_encode(src->encoding());
2009  emit_int8((unsigned char)0xF7);
2010  emit_int8((unsigned char)(0xE0 | encode));
2011}
2012
2013void Assembler::mulsd(XMMRegister dst, Address src) {
2014  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2015  emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2016}
2017
2018void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2019  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2020  emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2021}
2022
2023void Assembler::mulss(XMMRegister dst, Address src) {
2024  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2025  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2026}
2027
2028void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2029  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2030  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2031}
2032
2033void Assembler::negl(Register dst) {
2034  int encode = prefix_and_encode(dst->encoding());
2035  emit_int8((unsigned char)0xF7);
2036  emit_int8((unsigned char)(0xD8 | encode));
2037}
2038
2039void Assembler::nop(int i) {
2040#ifdef ASSERT
2041  assert(i > 0, " ");
2042  // The fancy nops aren't currently recognized by debuggers making it a
2043  // pain to disassemble code while debugging. If asserts are on clearly
2044  // speed is not an issue so simply use the single byte traditional nop
2045  // to do alignment.
2046
2047  for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
2048  return;
2049
2050#endif // ASSERT
2051
2052  if (UseAddressNop && VM_Version::is_intel()) {
2053    //
2054    // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2055    //  1: 0x90
2056    //  2: 0x66 0x90
2057    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2058    //  4: 0x0F 0x1F 0x40 0x00
2059    //  5: 0x0F 0x1F 0x44 0x00 0x00
2060    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2061    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2062    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2063    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2064    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2065    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2066
2067    // The rest coding is Intel specific - don't use consecutive address nops
2068
2069    // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2070    // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2071    // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2072    // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2073
2074    while(i >= 15) {
2075      // For Intel don't generate consecutive addess nops (mix with regular nops)
2076      i -= 15;
2077      emit_int8(0x66);   // size prefix
2078      emit_int8(0x66);   // size prefix
2079      emit_int8(0x66);   // size prefix
2080      addr_nop_8();
2081      emit_int8(0x66);   // size prefix
2082      emit_int8(0x66);   // size prefix
2083      emit_int8(0x66);   // size prefix
2084      emit_int8((unsigned char)0x90);
2085                         // nop
2086    }
2087    switch (i) {
2088      case 14:
2089        emit_int8(0x66); // size prefix
2090      case 13:
2091        emit_int8(0x66); // size prefix
2092      case 12:
2093        addr_nop_8();
2094        emit_int8(0x66); // size prefix
2095        emit_int8(0x66); // size prefix
2096        emit_int8(0x66); // size prefix
2097        emit_int8((unsigned char)0x90);
2098                         // nop
2099        break;
2100      case 11:
2101        emit_int8(0x66); // size prefix
2102      case 10:
2103        emit_int8(0x66); // size prefix
2104      case 9:
2105        emit_int8(0x66); // size prefix
2106      case 8:
2107        addr_nop_8();
2108        break;
2109      case 7:
2110        addr_nop_7();
2111        break;
2112      case 6:
2113        emit_int8(0x66); // size prefix
2114      case 5:
2115        addr_nop_5();
2116        break;
2117      case 4:
2118        addr_nop_4();
2119        break;
2120      case 3:
2121        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2122        emit_int8(0x66); // size prefix
2123      case 2:
2124        emit_int8(0x66); // size prefix
2125      case 1:
2126        emit_int8((unsigned char)0x90);
2127                         // nop
2128        break;
2129      default:
2130        assert(i == 0, " ");
2131    }
2132    return;
2133  }
2134  if (UseAddressNop && VM_Version::is_amd()) {
2135    //
2136    // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2137    //  1: 0x90
2138    //  2: 0x66 0x90
2139    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2140    //  4: 0x0F 0x1F 0x40 0x00
2141    //  5: 0x0F 0x1F 0x44 0x00 0x00
2142    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2143    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2144    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2145    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2146    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2147    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2148
2149    // The rest coding is AMD specific - use consecutive address nops
2150
2151    // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2152    // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2153    // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2154    // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2155    // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2156    //     Size prefixes (0x66) are added for larger sizes
2157
2158    while(i >= 22) {
2159      i -= 11;
2160      emit_int8(0x66); // size prefix
2161      emit_int8(0x66); // size prefix
2162      emit_int8(0x66); // size prefix
2163      addr_nop_8();
2164    }
2165    // Generate first nop for size between 21-12
2166    switch (i) {
2167      case 21:
2168        i -= 1;
2169        emit_int8(0x66); // size prefix
2170      case 20:
2171      case 19:
2172        i -= 1;
2173        emit_int8(0x66); // size prefix
2174      case 18:
2175      case 17:
2176        i -= 1;
2177        emit_int8(0x66); // size prefix
2178      case 16:
2179      case 15:
2180        i -= 8;
2181        addr_nop_8();
2182        break;
2183      case 14:
2184      case 13:
2185        i -= 7;
2186        addr_nop_7();
2187        break;
2188      case 12:
2189        i -= 6;
2190        emit_int8(0x66); // size prefix
2191        addr_nop_5();
2192        break;
2193      default:
2194        assert(i < 12, " ");
2195    }
2196
2197    // Generate second nop for size between 11-1
2198    switch (i) {
2199      case 11:
2200        emit_int8(0x66); // size prefix
2201      case 10:
2202        emit_int8(0x66); // size prefix
2203      case 9:
2204        emit_int8(0x66); // size prefix
2205      case 8:
2206        addr_nop_8();
2207        break;
2208      case 7:
2209        addr_nop_7();
2210        break;
2211      case 6:
2212        emit_int8(0x66); // size prefix
2213      case 5:
2214        addr_nop_5();
2215        break;
2216      case 4:
2217        addr_nop_4();
2218        break;
2219      case 3:
2220        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2221        emit_int8(0x66); // size prefix
2222      case 2:
2223        emit_int8(0x66); // size prefix
2224      case 1:
2225        emit_int8((unsigned char)0x90);
2226                         // nop
2227        break;
2228      default:
2229        assert(i == 0, " ");
2230    }
2231    return;
2232  }
2233
2234  // Using nops with size prefixes "0x66 0x90".
2235  // From AMD Optimization Guide:
2236  //  1: 0x90
2237  //  2: 0x66 0x90
2238  //  3: 0x66 0x66 0x90
2239  //  4: 0x66 0x66 0x66 0x90
2240  //  5: 0x66 0x66 0x90 0x66 0x90
2241  //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2242  //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2243  //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2244  //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2245  // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2246  //
2247  while(i > 12) {
2248    i -= 4;
2249    emit_int8(0x66); // size prefix
2250    emit_int8(0x66);
2251    emit_int8(0x66);
2252    emit_int8((unsigned char)0x90);
2253                     // nop
2254  }
2255  // 1 - 12 nops
2256  if(i > 8) {
2257    if(i > 9) {
2258      i -= 1;
2259      emit_int8(0x66);
2260    }
2261    i -= 3;
2262    emit_int8(0x66);
2263    emit_int8(0x66);
2264    emit_int8((unsigned char)0x90);
2265  }
2266  // 1 - 8 nops
2267  if(i > 4) {
2268    if(i > 6) {
2269      i -= 1;
2270      emit_int8(0x66);
2271    }
2272    i -= 3;
2273    emit_int8(0x66);
2274    emit_int8(0x66);
2275    emit_int8((unsigned char)0x90);
2276  }
2277  switch (i) {
2278    case 4:
2279      emit_int8(0x66);
2280    case 3:
2281      emit_int8(0x66);
2282    case 2:
2283      emit_int8(0x66);
2284    case 1:
2285      emit_int8((unsigned char)0x90);
2286      break;
2287    default:
2288      assert(i == 0, " ");
2289  }
2290}
2291
2292void Assembler::notl(Register dst) {
2293  int encode = prefix_and_encode(dst->encoding());
2294  emit_int8((unsigned char)0xF7);
2295  emit_int8((unsigned char)(0xD0 | encode));
2296}
2297
2298void Assembler::orl(Address dst, int32_t imm32) {
2299  InstructionMark im(this);
2300  prefix(dst);
2301  emit_arith_operand(0x81, rcx, dst, imm32);
2302}
2303
2304void Assembler::orl(Register dst, int32_t imm32) {
2305  prefix(dst);
2306  emit_arith(0x81, 0xC8, dst, imm32);
2307}
2308
2309void Assembler::orl(Register dst, Address src) {
2310  InstructionMark im(this);
2311  prefix(src, dst);
2312  emit_int8(0x0B);
2313  emit_operand(dst, src);
2314}
2315
2316void Assembler::orl(Register dst, Register src) {
2317  (void) prefix_and_encode(dst->encoding(), src->encoding());
2318  emit_arith(0x0B, 0xC0, dst, src);
2319}
2320
2321void Assembler::packuswb(XMMRegister dst, Address src) {
2322  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2323  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2324  emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
2325}
2326
2327void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
2328  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2329  emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
2330}
2331
2332void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2333  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
2334  emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector256);
2335}
2336
2337void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256) {
2338  assert(VM_Version::supports_avx2(), "");
2339  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector256);
2340  emit_int8(0x00);
2341  emit_int8(0xC0 | encode);
2342  emit_int8(imm8);
2343}
2344
2345void Assembler::pause() {
2346  emit_int8((unsigned char)0xF3);
2347  emit_int8((unsigned char)0x90);
2348}
2349
2350void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2351  assert(VM_Version::supports_sse4_2(), "");
2352  InstructionMark im(this);
2353  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
2354  emit_int8(0x61);
2355  emit_operand(dst, src);
2356  emit_int8(imm8);
2357}
2358
2359void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2360  assert(VM_Version::supports_sse4_2(), "");
2361  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
2362  emit_int8(0x61);
2363  emit_int8((unsigned char)(0xC0 | encode));
2364  emit_int8(imm8);
2365}
2366
2367void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
2368  assert(VM_Version::supports_sse4_1(), "");
2369  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
2370  emit_int8(0x16);
2371  emit_int8((unsigned char)(0xC0 | encode));
2372  emit_int8(imm8);
2373}
2374
2375void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
2376  assert(VM_Version::supports_sse4_1(), "");
2377  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
2378  emit_int8(0x16);
2379  emit_int8((unsigned char)(0xC0 | encode));
2380  emit_int8(imm8);
2381}
2382
2383void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
2384  assert(VM_Version::supports_sse4_1(), "");
2385  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
2386  emit_int8(0x22);
2387  emit_int8((unsigned char)(0xC0 | encode));
2388  emit_int8(imm8);
2389}
2390
2391void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
2392  assert(VM_Version::supports_sse4_1(), "");
2393  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
2394  emit_int8(0x22);
2395  emit_int8((unsigned char)(0xC0 | encode));
2396  emit_int8(imm8);
2397}
2398
2399void Assembler::pmovzxbw(XMMRegister dst, Address src) {
2400  assert(VM_Version::supports_sse4_1(), "");
2401  InstructionMark im(this);
2402  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2403  emit_int8(0x30);
2404  emit_operand(dst, src);
2405}
2406
2407void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
2408  assert(VM_Version::supports_sse4_1(), "");
2409  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2410  emit_int8(0x30);
2411  emit_int8((unsigned char)(0xC0 | encode));
2412}
2413
2414// generic
2415void Assembler::pop(Register dst) {
2416  int encode = prefix_and_encode(dst->encoding());
2417  emit_int8(0x58 | encode);
2418}
2419
2420void Assembler::popcntl(Register dst, Address src) {
2421  assert(VM_Version::supports_popcnt(), "must support");
2422  InstructionMark im(this);
2423  emit_int8((unsigned char)0xF3);
2424  prefix(src, dst);
2425  emit_int8(0x0F);
2426  emit_int8((unsigned char)0xB8);
2427  emit_operand(dst, src);
2428}
2429
2430void Assembler::popcntl(Register dst, Register src) {
2431  assert(VM_Version::supports_popcnt(), "must support");
2432  emit_int8((unsigned char)0xF3);
2433  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2434  emit_int8(0x0F);
2435  emit_int8((unsigned char)0xB8);
2436  emit_int8((unsigned char)(0xC0 | encode));
2437}
2438
2439void Assembler::popf() {
2440  emit_int8((unsigned char)0x9D);
2441}
2442
2443#ifndef _LP64 // no 32bit push/pop on amd64
2444void Assembler::popl(Address dst) {
2445  // NOTE: this will adjust stack by 8byte on 64bits
2446  InstructionMark im(this);
2447  prefix(dst);
2448  emit_int8((unsigned char)0x8F);
2449  emit_operand(rax, dst);
2450}
2451#endif
2452
2453void Assembler::prefetch_prefix(Address src) {
2454  prefix(src);
2455  emit_int8(0x0F);
2456}
2457
2458void Assembler::prefetchnta(Address src) {
2459  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2460  InstructionMark im(this);
2461  prefetch_prefix(src);
2462  emit_int8(0x18);
2463  emit_operand(rax, src); // 0, src
2464}
2465
2466void Assembler::prefetchr(Address src) {
2467  assert(VM_Version::supports_3dnow_prefetch(), "must support");
2468  InstructionMark im(this);
2469  prefetch_prefix(src);
2470  emit_int8(0x0D);
2471  emit_operand(rax, src); // 0, src
2472}
2473
2474void Assembler::prefetcht0(Address src) {
2475  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2476  InstructionMark im(this);
2477  prefetch_prefix(src);
2478  emit_int8(0x18);
2479  emit_operand(rcx, src); // 1, src
2480}
2481
2482void Assembler::prefetcht1(Address src) {
2483  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2484  InstructionMark im(this);
2485  prefetch_prefix(src);
2486  emit_int8(0x18);
2487  emit_operand(rdx, src); // 2, src
2488}
2489
2490void Assembler::prefetcht2(Address src) {
2491  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2492  InstructionMark im(this);
2493  prefetch_prefix(src);
2494  emit_int8(0x18);
2495  emit_operand(rbx, src); // 3, src
2496}
2497
2498void Assembler::prefetchw(Address src) {
2499  assert(VM_Version::supports_3dnow_prefetch(), "must support");
2500  InstructionMark im(this);
2501  prefetch_prefix(src);
2502  emit_int8(0x0D);
2503  emit_operand(rcx, src); // 1, src
2504}
2505
2506void Assembler::prefix(Prefix p) {
2507  emit_int8(p);
2508}
2509
2510void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
2511  assert(VM_Version::supports_ssse3(), "");
2512  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2513  emit_int8(0x00);
2514  emit_int8((unsigned char)(0xC0 | encode));
2515}
2516
2517void Assembler::pshufb(XMMRegister dst, Address src) {
2518  assert(VM_Version::supports_ssse3(), "");
2519  InstructionMark im(this);
2520  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2521  emit_int8(0x00);
2522  emit_operand(dst, src);
2523}
2524
2525void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2526  assert(isByte(mode), "invalid value");
2527  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2528  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
2529  emit_int8(mode & 0xFF);
2530
2531}
2532
2533void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2534  assert(isByte(mode), "invalid value");
2535  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2536  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2537  InstructionMark im(this);
2538  simd_prefix(dst, src, VEX_SIMD_66);
2539  emit_int8(0x70);
2540  emit_operand(dst, src);
2541  emit_int8(mode & 0xFF);
2542}
2543
2544void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2545  assert(isByte(mode), "invalid value");
2546  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2547  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
2548  emit_int8(mode & 0xFF);
2549}
2550
2551void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2552  assert(isByte(mode), "invalid value");
2553  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2554  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2555  InstructionMark im(this);
2556  simd_prefix(dst, src, VEX_SIMD_F2);
2557  emit_int8(0x70);
2558  emit_operand(dst, src);
2559  emit_int8(mode & 0xFF);
2560}
2561
2562void Assembler::psrldq(XMMRegister dst, int shift) {
2563  // Shift 128 bit value in xmm register by number of bytes.
2564  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2565  int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
2566  emit_int8(0x73);
2567  emit_int8((unsigned char)(0xC0 | encode));
2568  emit_int8(shift);
2569}
2570
2571void Assembler::ptest(XMMRegister dst, Address src) {
2572  assert(VM_Version::supports_sse4_1(), "");
2573  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2574  InstructionMark im(this);
2575  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2576  emit_int8(0x17);
2577  emit_operand(dst, src);
2578}
2579
2580void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2581  assert(VM_Version::supports_sse4_1(), "");
2582  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2583  emit_int8(0x17);
2584  emit_int8((unsigned char)(0xC0 | encode));
2585}
2586
2587void Assembler::vptest(XMMRegister dst, Address src) {
2588  assert(VM_Version::supports_avx(), "");
2589  InstructionMark im(this);
2590  bool vector256 = true;
2591  assert(dst != xnoreg, "sanity");
2592  int dst_enc = dst->encoding();
2593  // swap src<->dst for encoding
2594  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
2595  emit_int8(0x17);
2596  emit_operand(dst, src);
2597}
2598
2599void Assembler::vptest(XMMRegister dst, XMMRegister src) {
2600  assert(VM_Version::supports_avx(), "");
2601  bool vector256 = true;
2602  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
2603  emit_int8(0x17);
2604  emit_int8((unsigned char)(0xC0 | encode));
2605}
2606
2607void Assembler::punpcklbw(XMMRegister dst, Address src) {
2608  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2609  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2610  emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
2611}
2612
2613void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2614  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2615  emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
2616}
2617
2618void Assembler::punpckldq(XMMRegister dst, Address src) {
2619  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2620  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2621  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
2622}
2623
2624void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
2625  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2626  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
2627}
2628
2629void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
2630  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2631  emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
2632}
2633
2634void Assembler::push(int32_t imm32) {
2635  // in 64bits we push 64bits onto the stack but only
2636  // take a 32bit immediate
2637  emit_int8(0x68);
2638  emit_int32(imm32);
2639}
2640
2641void Assembler::push(Register src) {
2642  int encode = prefix_and_encode(src->encoding());
2643
2644  emit_int8(0x50 | encode);
2645}
2646
2647void Assembler::pushf() {
2648  emit_int8((unsigned char)0x9C);
2649}
2650
2651#ifndef _LP64 // no 32bit push/pop on amd64
2652void Assembler::pushl(Address src) {
2653  // Note this will push 64bit on 64bit
2654  InstructionMark im(this);
2655  prefix(src);
2656  emit_int8((unsigned char)0xFF);
2657  emit_operand(rsi, src);
2658}
2659#endif
2660
2661void Assembler::rcll(Register dst, int imm8) {
2662  assert(isShiftCount(imm8), "illegal shift count");
2663  int encode = prefix_and_encode(dst->encoding());
2664  if (imm8 == 1) {
2665    emit_int8((unsigned char)0xD1);
2666    emit_int8((unsigned char)(0xD0 | encode));
2667  } else {
2668    emit_int8((unsigned char)0xC1);
2669    emit_int8((unsigned char)0xD0 | encode);
2670    emit_int8(imm8);
2671  }
2672}
2673
2674void Assembler::rdtsc() {
2675  emit_int8((unsigned char)0x0F);
2676  emit_int8((unsigned char)0x31);
2677}
2678
2679// copies data from [esi] to [edi] using rcx pointer sized words
2680// generic
2681void Assembler::rep_mov() {
2682  emit_int8((unsigned char)0xF3);
2683  // MOVSQ
2684  LP64_ONLY(prefix(REX_W));
2685  emit_int8((unsigned char)0xA5);
2686}
2687
2688// sets rcx bytes with rax, value at [edi]
2689void Assembler::rep_stosb() {
2690  emit_int8((unsigned char)0xF3); // REP
2691  LP64_ONLY(prefix(REX_W));
2692  emit_int8((unsigned char)0xAA); // STOSB
2693}
2694
2695// sets rcx pointer sized words with rax, value at [edi]
2696// generic
2697void Assembler::rep_stos() {
2698  emit_int8((unsigned char)0xF3); // REP
2699  LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
2700  emit_int8((unsigned char)0xAB);
2701}
2702
2703// scans rcx pointer sized words at [edi] for occurance of rax,
2704// generic
2705void Assembler::repne_scan() { // repne_scan
2706  emit_int8((unsigned char)0xF2);
2707  // SCASQ
2708  LP64_ONLY(prefix(REX_W));
2709  emit_int8((unsigned char)0xAF);
2710}
2711
2712#ifdef _LP64
2713// scans rcx 4 byte words at [edi] for occurance of rax,
2714// generic
2715void Assembler::repne_scanl() { // repne_scan
2716  emit_int8((unsigned char)0xF2);
2717  // SCASL
2718  emit_int8((unsigned char)0xAF);
2719}
2720#endif
2721
2722void Assembler::ret(int imm16) {
2723  if (imm16 == 0) {
2724    emit_int8((unsigned char)0xC3);
2725  } else {
2726    emit_int8((unsigned char)0xC2);
2727    emit_int16(imm16);
2728  }
2729}
2730
2731void Assembler::sahf() {
2732#ifdef _LP64
2733  // Not supported in 64bit mode
2734  ShouldNotReachHere();
2735#endif
2736  emit_int8((unsigned char)0x9E);
2737}
2738
2739void Assembler::sarl(Register dst, int imm8) {
2740  int encode = prefix_and_encode(dst->encoding());
2741  assert(isShiftCount(imm8), "illegal shift count");
2742  if (imm8 == 1) {
2743    emit_int8((unsigned char)0xD1);
2744    emit_int8((unsigned char)(0xF8 | encode));
2745  } else {
2746    emit_int8((unsigned char)0xC1);
2747    emit_int8((unsigned char)(0xF8 | encode));
2748    emit_int8(imm8);
2749  }
2750}
2751
2752void Assembler::sarl(Register dst) {
2753  int encode = prefix_and_encode(dst->encoding());
2754  emit_int8((unsigned char)0xD3);
2755  emit_int8((unsigned char)(0xF8 | encode));
2756}
2757
2758void Assembler::sbbl(Address dst, int32_t imm32) {
2759  InstructionMark im(this);
2760  prefix(dst);
2761  emit_arith_operand(0x81, rbx, dst, imm32);
2762}
2763
2764void Assembler::sbbl(Register dst, int32_t imm32) {
2765  prefix(dst);
2766  emit_arith(0x81, 0xD8, dst, imm32);
2767}
2768
2769
2770void Assembler::sbbl(Register dst, Address src) {
2771  InstructionMark im(this);
2772  prefix(src, dst);
2773  emit_int8(0x1B);
2774  emit_operand(dst, src);
2775}
2776
2777void Assembler::sbbl(Register dst, Register src) {
2778  (void) prefix_and_encode(dst->encoding(), src->encoding());
2779  emit_arith(0x1B, 0xC0, dst, src);
2780}
2781
2782void Assembler::setb(Condition cc, Register dst) {
2783  assert(0 <= cc && cc < 16, "illegal cc");
2784  int encode = prefix_and_encode(dst->encoding(), true);
2785  emit_int8(0x0F);
2786  emit_int8((unsigned char)0x90 | cc);
2787  emit_int8((unsigned char)(0xC0 | encode));
2788}
2789
2790void Assembler::shll(Register dst, int imm8) {
2791  assert(isShiftCount(imm8), "illegal shift count");
2792  int encode = prefix_and_encode(dst->encoding());
2793  if (imm8 == 1 ) {
2794    emit_int8((unsigned char)0xD1);
2795    emit_int8((unsigned char)(0xE0 | encode));
2796  } else {
2797    emit_int8((unsigned char)0xC1);
2798    emit_int8((unsigned char)(0xE0 | encode));
2799    emit_int8(imm8);
2800  }
2801}
2802
2803void Assembler::shll(Register dst) {
2804  int encode = prefix_and_encode(dst->encoding());
2805  emit_int8((unsigned char)0xD3);
2806  emit_int8((unsigned char)(0xE0 | encode));
2807}
2808
2809void Assembler::shrl(Register dst, int imm8) {
2810  assert(isShiftCount(imm8), "illegal shift count");
2811  int encode = prefix_and_encode(dst->encoding());
2812  emit_int8((unsigned char)0xC1);
2813  emit_int8((unsigned char)(0xE8 | encode));
2814  emit_int8(imm8);
2815}
2816
2817void Assembler::shrl(Register dst) {
2818  int encode = prefix_and_encode(dst->encoding());
2819  emit_int8((unsigned char)0xD3);
2820  emit_int8((unsigned char)(0xE8 | encode));
2821}
2822
2823// copies a single word from [esi] to [edi]
2824void Assembler::smovl() {
2825  emit_int8((unsigned char)0xA5);
2826}
2827
2828void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2829  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2830  emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
2831}
2832
2833void Assembler::sqrtsd(XMMRegister dst, Address src) {
2834  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2835  emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
2836}
2837
2838void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
2839  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2840  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
2841}
2842
2843void Assembler::std() {
2844  emit_int8((unsigned char)0xFD);
2845}
2846
2847void Assembler::sqrtss(XMMRegister dst, Address src) {
2848  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2849  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
2850}
2851
2852void Assembler::stmxcsr( Address dst) {
2853  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2854  InstructionMark im(this);
2855  prefix(dst);
2856  emit_int8(0x0F);
2857  emit_int8((unsigned char)0xAE);
2858  emit_operand(as_Register(3), dst);
2859}
2860
2861void Assembler::subl(Address dst, int32_t imm32) {
2862  InstructionMark im(this);
2863  prefix(dst);
2864  emit_arith_operand(0x81, rbp, dst, imm32);
2865}
2866
2867void Assembler::subl(Address dst, Register src) {
2868  InstructionMark im(this);
2869  prefix(dst, src);
2870  emit_int8(0x29);
2871  emit_operand(src, dst);
2872}
2873
2874void Assembler::subl(Register dst, int32_t imm32) {
2875  prefix(dst);
2876  emit_arith(0x81, 0xE8, dst, imm32);
2877}
2878
2879// Force generation of a 4 byte immediate value even if it fits into 8bit
2880void Assembler::subl_imm32(Register dst, int32_t imm32) {
2881  prefix(dst);
2882  emit_arith_imm32(0x81, 0xE8, dst, imm32);
2883}
2884
2885void Assembler::subl(Register dst, Address src) {
2886  InstructionMark im(this);
2887  prefix(src, dst);
2888  emit_int8(0x2B);
2889  emit_operand(dst, src);
2890}
2891
2892void Assembler::subl(Register dst, Register src) {
2893  (void) prefix_and_encode(dst->encoding(), src->encoding());
2894  emit_arith(0x2B, 0xC0, dst, src);
2895}
2896
2897void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2898  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2899  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
2900}
2901
2902void Assembler::subsd(XMMRegister dst, Address src) {
2903  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2904  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
2905}
2906
2907void Assembler::subss(XMMRegister dst, XMMRegister src) {
2908  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2909  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
2910}
2911
2912void Assembler::subss(XMMRegister dst, Address src) {
2913  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2914  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
2915}
2916
2917void Assembler::testb(Register dst, int imm8) {
2918  NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2919  (void) prefix_and_encode(dst->encoding(), true);
2920  emit_arith_b(0xF6, 0xC0, dst, imm8);
2921}
2922
2923void Assembler::testl(Register dst, int32_t imm32) {
2924  // not using emit_arith because test
2925  // doesn't support sign-extension of
2926  // 8bit operands
2927  int encode = dst->encoding();
2928  if (encode == 0) {
2929    emit_int8((unsigned char)0xA9);
2930  } else {
2931    encode = prefix_and_encode(encode);
2932    emit_int8((unsigned char)0xF7);
2933    emit_int8((unsigned char)(0xC0 | encode));
2934  }
2935  emit_int32(imm32);
2936}
2937
2938void Assembler::testl(Register dst, Register src) {
2939  (void) prefix_and_encode(dst->encoding(), src->encoding());
2940  emit_arith(0x85, 0xC0, dst, src);
2941}
2942
2943void Assembler::testl(Register dst, Address  src) {
2944  InstructionMark im(this);
2945  prefix(src, dst);
2946  emit_int8((unsigned char)0x85);
2947  emit_operand(dst, src);
2948}
2949
2950void Assembler::tzcntl(Register dst, Register src) {
2951  assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
2952  emit_int8((unsigned char)0xF3);
2953  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2954  emit_int8(0x0F);
2955  emit_int8((unsigned char)0xBC);
2956  emit_int8((unsigned char)0xC0 | encode);
2957}
2958
2959void Assembler::tzcntq(Register dst, Register src) {
2960  assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
2961  emit_int8((unsigned char)0xF3);
2962  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2963  emit_int8(0x0F);
2964  emit_int8((unsigned char)0xBC);
2965  emit_int8((unsigned char)(0xC0 | encode));
2966}
2967
2968void Assembler::ucomisd(XMMRegister dst, Address src) {
2969  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2970  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
2971}
2972
2973void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2974  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2975  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
2976}
2977
2978void Assembler::ucomiss(XMMRegister dst, Address src) {
2979  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2980  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
2981}
2982
2983void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2984  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2985  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
2986}
2987
2988void Assembler::xabort(int8_t imm8) {
2989  emit_int8((unsigned char)0xC6);
2990  emit_int8((unsigned char)0xF8);
2991  emit_int8((unsigned char)(imm8 & 0xFF));
2992}
2993
2994void Assembler::xaddl(Address dst, Register src) {
2995  InstructionMark im(this);
2996  prefix(dst, src);
2997  emit_int8(0x0F);
2998  emit_int8((unsigned char)0xC1);
2999  emit_operand(src, dst);
3000}
3001
3002void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
3003  InstructionMark im(this);
3004  relocate(rtype);
3005  if (abort.is_bound()) {
3006    address entry = target(abort);
3007    assert(entry != NULL, "abort entry NULL");
3008    intptr_t offset = entry - pc();
3009    emit_int8((unsigned char)0xC7);
3010    emit_int8((unsigned char)0xF8);
3011    emit_int32(offset - 6); // 2 opcode + 4 address
3012  } else {
3013    abort.add_patch_at(code(), locator());
3014    emit_int8((unsigned char)0xC7);
3015    emit_int8((unsigned char)0xF8);
3016    emit_int32(0);
3017  }
3018}
3019
3020void Assembler::xchgl(Register dst, Address src) { // xchg
3021  InstructionMark im(this);
3022  prefix(src, dst);
3023  emit_int8((unsigned char)0x87);
3024  emit_operand(dst, src);
3025}
3026
3027void Assembler::xchgl(Register dst, Register src) {
3028  int encode = prefix_and_encode(dst->encoding(), src->encoding());
3029  emit_int8((unsigned char)0x87);
3030  emit_int8((unsigned char)(0xC0 | encode));
3031}
3032
3033void Assembler::xend() {
3034  emit_int8((unsigned char)0x0F);
3035  emit_int8((unsigned char)0x01);
3036  emit_int8((unsigned char)0xD5);
3037}
3038
3039void Assembler::xgetbv() {
3040  emit_int8(0x0F);
3041  emit_int8(0x01);
3042  emit_int8((unsigned char)0xD0);
3043}
3044
3045void Assembler::xorl(Register dst, int32_t imm32) {
3046  prefix(dst);
3047  emit_arith(0x81, 0xF0, dst, imm32);
3048}
3049
3050void Assembler::xorl(Register dst, Address src) {
3051  InstructionMark im(this);
3052  prefix(src, dst);
3053  emit_int8(0x33);
3054  emit_operand(dst, src);
3055}
3056
3057void Assembler::xorl(Register dst, Register src) {
3058  (void) prefix_and_encode(dst->encoding(), src->encoding());
3059  emit_arith(0x33, 0xC0, dst, src);
3060}
3061
3062
3063// AVX 3-operands scalar float-point arithmetic instructions
3064
3065void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
3066  assert(VM_Version::supports_avx(), "");
3067  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3068}
3069
3070void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3071  assert(VM_Version::supports_avx(), "");
3072  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3073}
3074
3075void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
3076  assert(VM_Version::supports_avx(), "");
3077  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3078}
3079
3080void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3081  assert(VM_Version::supports_avx(), "");
3082  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3083}
3084
3085void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
3086  assert(VM_Version::supports_avx(), "");
3087  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3088}
3089
3090void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3091  assert(VM_Version::supports_avx(), "");
3092  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3093}
3094
3095void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
3096  assert(VM_Version::supports_avx(), "");
3097  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3098}
3099
3100void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3101  assert(VM_Version::supports_avx(), "");
3102  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3103}
3104
3105void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
3106  assert(VM_Version::supports_avx(), "");
3107  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3108}
3109
3110void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3111  assert(VM_Version::supports_avx(), "");
3112  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3113}
3114
3115void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
3116  assert(VM_Version::supports_avx(), "");
3117  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3118}
3119
3120void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3121  assert(VM_Version::supports_avx(), "");
3122  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3123}
3124
3125void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
3126  assert(VM_Version::supports_avx(), "");
3127  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3128}
3129
3130void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3131  assert(VM_Version::supports_avx(), "");
3132  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3133}
3134
3135void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
3136  assert(VM_Version::supports_avx(), "");
3137  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3138}
3139
3140void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3141  assert(VM_Version::supports_avx(), "");
3142  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3143}
3144
3145//====================VECTOR ARITHMETIC=====================================
3146
3147// Float-point vector arithmetic
3148
3149void Assembler::addpd(XMMRegister dst, XMMRegister src) {
3150  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3151  emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
3152}
3153
3154void Assembler::addps(XMMRegister dst, XMMRegister src) {
3155  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3156  emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
3157}
3158
3159void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3160  assert(VM_Version::supports_avx(), "");
3161  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
3162}
3163
3164void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3165  assert(VM_Version::supports_avx(), "");
3166  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
3167}
3168
3169void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3170  assert(VM_Version::supports_avx(), "");
3171  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
3172}
3173
3174void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3175  assert(VM_Version::supports_avx(), "");
3176  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
3177}
3178
3179void Assembler::subpd(XMMRegister dst, XMMRegister src) {
3180  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3181  emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
3182}
3183
3184void Assembler::subps(XMMRegister dst, XMMRegister src) {
3185  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3186  emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
3187}
3188
3189void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3190  assert(VM_Version::supports_avx(), "");
3191  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
3192}
3193
3194void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3195  assert(VM_Version::supports_avx(), "");
3196  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
3197}
3198
3199void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3200  assert(VM_Version::supports_avx(), "");
3201  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
3202}
3203
3204void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3205  assert(VM_Version::supports_avx(), "");
3206  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
3207}
3208
3209void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
3210  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3211  emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
3212}
3213
3214void Assembler::mulps(XMMRegister dst, XMMRegister src) {
3215  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3216  emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
3217}
3218
3219void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3220  assert(VM_Version::supports_avx(), "");
3221  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
3222}
3223
3224void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3225  assert(VM_Version::supports_avx(), "");
3226  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
3227}
3228
3229void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3230  assert(VM_Version::supports_avx(), "");
3231  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
3232}
3233
3234void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3235  assert(VM_Version::supports_avx(), "");
3236  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
3237}
3238
3239void Assembler::divpd(XMMRegister dst, XMMRegister src) {
3240  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3241  emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
3242}
3243
3244void Assembler::divps(XMMRegister dst, XMMRegister src) {
3245  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3246  emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
3247}
3248
3249void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3250  assert(VM_Version::supports_avx(), "");
3251  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
3252}
3253
3254void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3255  assert(VM_Version::supports_avx(), "");
3256  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
3257}
3258
3259void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3260  assert(VM_Version::supports_avx(), "");
3261  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
3262}
3263
3264void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3265  assert(VM_Version::supports_avx(), "");
3266  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
3267}
3268
3269void Assembler::andpd(XMMRegister dst, XMMRegister src) {
3270  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3271  emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
3272}
3273
3274void Assembler::andps(XMMRegister dst, XMMRegister src) {
3275  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3276  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
3277}
3278
3279void Assembler::andps(XMMRegister dst, Address src) {
3280  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3281  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
3282}
3283
3284void Assembler::andpd(XMMRegister dst, Address src) {
3285  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3286  emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
3287}
3288
3289void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3290  assert(VM_Version::supports_avx(), "");
3291  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
3292}
3293
3294void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3295  assert(VM_Version::supports_avx(), "");
3296  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
3297}
3298
3299void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3300  assert(VM_Version::supports_avx(), "");
3301  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
3302}
3303
3304void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3305  assert(VM_Version::supports_avx(), "");
3306  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
3307}
3308
3309void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
3310  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3311  emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
3312}
3313
3314void Assembler::xorps(XMMRegister dst, XMMRegister src) {
3315  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3316  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
3317}
3318
3319void Assembler::xorpd(XMMRegister dst, Address src) {
3320  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3321  emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
3322}
3323
3324void Assembler::xorps(XMMRegister dst, Address src) {
3325  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3326  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
3327}
3328
3329void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3330  assert(VM_Version::supports_avx(), "");
3331  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
3332}
3333
3334void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3335  assert(VM_Version::supports_avx(), "");
3336  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
3337}
3338
3339void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3340  assert(VM_Version::supports_avx(), "");
3341  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
3342}
3343
3344void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3345  assert(VM_Version::supports_avx(), "");
3346  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
3347}
3348
3349
3350// Integer vector arithmetic
3351void Assembler::paddb(XMMRegister dst, XMMRegister src) {
3352  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3353  emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
3354}
3355
3356void Assembler::paddw(XMMRegister dst, XMMRegister src) {
3357  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3358  emit_simd_arith(0xFD, dst, src, VEX_SIMD_66);
3359}
3360
3361void Assembler::paddd(XMMRegister dst, XMMRegister src) {
3362  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3363  emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
3364}
3365
3366void Assembler::paddq(XMMRegister dst, XMMRegister src) {
3367  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3368  emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
3369}
3370
3371void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3372  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3373  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
3374}
3375
3376void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3377  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3378  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
3379}
3380
3381void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3382  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3383  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
3384}
3385
3386void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3387  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3388  emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
3389}
3390
3391void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3392  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3393  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
3394}
3395
3396void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3397  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3398  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
3399}
3400
3401void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3402  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3403  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
3404}
3405
3406void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3407  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3408  emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
3409}
3410
3411void Assembler::psubb(XMMRegister dst, XMMRegister src) {
3412  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3413  emit_simd_arith(0xF8, dst, src, VEX_SIMD_66);
3414}
3415
3416void Assembler::psubw(XMMRegister dst, XMMRegister src) {
3417  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3418  emit_simd_arith(0xF9, dst, src, VEX_SIMD_66);
3419}
3420
3421void Assembler::psubd(XMMRegister dst, XMMRegister src) {
3422  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3423  emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
3424}
3425
3426void Assembler::psubq(XMMRegister dst, XMMRegister src) {
3427  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3428  emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
3429}
3430
3431void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3432  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3433  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
3434}
3435
3436void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3437  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3438  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
3439}
3440
3441void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3442  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3443  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
3444}
3445
3446void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3447  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3448  emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
3449}
3450
3451void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3452  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3453  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
3454}
3455
3456void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3457  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3458  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
3459}
3460
3461void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3462  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3463  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
3464}
3465
3466void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3467  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3468  emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
3469}
3470
3471void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
3472  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3473  emit_simd_arith(0xD5, dst, src, VEX_SIMD_66);
3474}
3475
3476void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
3477  assert(VM_Version::supports_sse4_1(), "");
3478  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
3479  emit_int8(0x40);
3480  emit_int8((unsigned char)(0xC0 | encode));
3481}
3482
3483void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3484  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3485  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
3486}
3487
3488void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3489  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3490  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
3491  emit_int8(0x40);
3492  emit_int8((unsigned char)(0xC0 | encode));
3493}
3494
3495void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3496  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3497  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
3498}
3499
3500void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3501  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3502  InstructionMark im(this);
3503  int dst_enc = dst->encoding();
3504  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
3505  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
3506  emit_int8(0x40);
3507  emit_operand(dst, src);
3508}
3509
3510// Shift packed integers left by specified number of bits.
3511void Assembler::psllw(XMMRegister dst, int shift) {
3512  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3513  // XMM6 is for /6 encoding: 66 0F 71 /6 ib
3514  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3515  emit_int8(0x71);
3516  emit_int8((unsigned char)(0xC0 | encode));
3517  emit_int8(shift & 0xFF);
3518}
3519
3520void Assembler::pslld(XMMRegister dst, int shift) {
3521  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3522  // XMM6 is for /6 encoding: 66 0F 72 /6 ib
3523  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3524  emit_int8(0x72);
3525  emit_int8((unsigned char)(0xC0 | encode));
3526  emit_int8(shift & 0xFF);
3527}
3528
3529void Assembler::psllq(XMMRegister dst, int shift) {
3530  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3531  // XMM6 is for /6 encoding: 66 0F 73 /6 ib
3532  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3533  emit_int8(0x73);
3534  emit_int8((unsigned char)(0xC0 | encode));
3535  emit_int8(shift & 0xFF);
3536}
3537
3538void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
3539  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3540  emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66);
3541}
3542
3543void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
3544  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3545  emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
3546}
3547
3548void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
3549  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3550  emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
3551}
3552
3553void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3554  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3555  // XMM6 is for /6 encoding: 66 0F 71 /6 ib
3556  emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
3557  emit_int8(shift & 0xFF);
3558}
3559
3560void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3561  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3562  // XMM6 is for /6 encoding: 66 0F 72 /6 ib
3563  emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
3564  emit_int8(shift & 0xFF);
3565}
3566
3567void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3568  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3569  // XMM6 is for /6 encoding: 66 0F 73 /6 ib
3570  emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);
3571  emit_int8(shift & 0xFF);
3572}
3573
3574void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3575  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3576  emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256);
3577}
3578
3579void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3580  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3581  emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256);
3582}
3583
3584void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3585  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3586  emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256);
3587}
3588
3589// Shift packed integers logically right by specified number of bits.
3590void Assembler::psrlw(XMMRegister dst, int shift) {
3591  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3592  // XMM2 is for /2 encoding: 66 0F 71 /2 ib
3593  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3594  emit_int8(0x71);
3595  emit_int8((unsigned char)(0xC0 | encode));
3596  emit_int8(shift & 0xFF);
3597}
3598
3599void Assembler::psrld(XMMRegister dst, int shift) {
3600  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3601  // XMM2 is for /2 encoding: 66 0F 72 /2 ib
3602  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3603  emit_int8(0x72);
3604  emit_int8((unsigned char)(0xC0 | encode));
3605  emit_int8(shift & 0xFF);
3606}
3607
3608void Assembler::psrlq(XMMRegister dst, int shift) {
3609  // Do not confuse it with psrldq SSE2 instruction which
3610  // shifts 128 bit value in xmm register by number of bytes.
3611  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3612  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3613  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3614  emit_int8(0x73);
3615  emit_int8((unsigned char)(0xC0 | encode));
3616  emit_int8(shift & 0xFF);
3617}
3618
3619void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
3620  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3621  emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66);
3622}
3623
3624void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
3625  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3626  emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
3627}
3628
3629void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
3630  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3631  emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
3632}
3633
3634void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3635  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3636  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3637  emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
3638  emit_int8(shift & 0xFF);
3639}
3640
3641void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3642  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3643  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3644  emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
3645  emit_int8(shift & 0xFF);
3646}
3647
3648void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3649  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3650  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3651  emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);
3652  emit_int8(shift & 0xFF);
3653}
3654
3655void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3656  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3657  emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256);
3658}
3659
3660void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3661  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3662  emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256);
3663}
3664
3665void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3666  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3667  emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256);
3668}
3669
3670// Shift packed integers arithmetically right by specified number of bits.
3671void Assembler::psraw(XMMRegister dst, int shift) {
3672  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3673  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3674  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
3675  emit_int8(0x71);
3676  emit_int8((unsigned char)(0xC0 | encode));
3677  emit_int8(shift & 0xFF);
3678}
3679
3680void Assembler::psrad(XMMRegister dst, int shift) {
3681  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3682  // XMM4 is for /4 encoding: 66 0F 72 /4 ib
3683  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
3684  emit_int8(0x72);
3685  emit_int8((unsigned char)(0xC0 | encode));
3686  emit_int8(shift & 0xFF);
3687}
3688
3689void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
3690  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3691  emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66);
3692}
3693
3694void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
3695  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3696  emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
3697}
3698
3699void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3700  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3701  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3702  emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
3703  emit_int8(shift & 0xFF);
3704}
3705
3706void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3707  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3708  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3709  emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
3710  emit_int8(shift & 0xFF);
3711}
3712
3713void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3714  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3715  emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256);
3716}
3717
3718void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3719  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3720  emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256);
3721}
3722
3723
3724// AND packed integers
3725void Assembler::pand(XMMRegister dst, XMMRegister src) {
3726  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3727  emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
3728}
3729
3730void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3731  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3732  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
3733}
3734
3735void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3736  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3737  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
3738}
3739
3740void Assembler::por(XMMRegister dst, XMMRegister src) {
3741  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3742  emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
3743}
3744
3745void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3746  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3747  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
3748}
3749
3750void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3751  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3752  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
3753}
3754
3755void Assembler::pxor(XMMRegister dst, XMMRegister src) {
3756  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3757  emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
3758}
3759
3760void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3761  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3762  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
3763}
3764
3765void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3766  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3767  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
3768}
3769
3770
3771void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3772  assert(VM_Version::supports_avx(), "");
3773  bool vector256 = true;
3774  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3775  emit_int8(0x18);
3776  emit_int8((unsigned char)(0xC0 | encode));
3777  // 0x00 - insert into lower 128 bits
3778  // 0x01 - insert into upper 128 bits
3779  emit_int8(0x01);
3780}
3781
3782void Assembler::vinsertf128h(XMMRegister dst, Address src) {
3783  assert(VM_Version::supports_avx(), "");
3784  InstructionMark im(this);
3785  bool vector256 = true;
3786  assert(dst != xnoreg, "sanity");
3787  int dst_enc = dst->encoding();
3788  // swap src<->dst for encoding
3789  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3790  emit_int8(0x18);
3791  emit_operand(dst, src);
3792  // 0x01 - insert into upper 128 bits
3793  emit_int8(0x01);
3794}
3795
3796void Assembler::vextractf128h(Address dst, XMMRegister src) {
3797  assert(VM_Version::supports_avx(), "");
3798  InstructionMark im(this);
3799  bool vector256 = true;
3800  assert(src != xnoreg, "sanity");
3801  int src_enc = src->encoding();
3802  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3803  emit_int8(0x19);
3804  emit_operand(src, dst);
3805  // 0x01 - extract from upper 128 bits
3806  emit_int8(0x01);
3807}
3808
3809void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3810  assert(VM_Version::supports_avx2(), "");
3811  bool vector256 = true;
3812  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3813  emit_int8(0x38);
3814  emit_int8((unsigned char)(0xC0 | encode));
3815  // 0x00 - insert into lower 128 bits
3816  // 0x01 - insert into upper 128 bits
3817  emit_int8(0x01);
3818}
3819
3820void Assembler::vinserti128h(XMMRegister dst, Address src) {
3821  assert(VM_Version::supports_avx2(), "");
3822  InstructionMark im(this);
3823  bool vector256 = true;
3824  assert(dst != xnoreg, "sanity");
3825  int dst_enc = dst->encoding();
3826  // swap src<->dst for encoding
3827  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3828  emit_int8(0x38);
3829  emit_operand(dst, src);
3830  // 0x01 - insert into upper 128 bits
3831  emit_int8(0x01);
3832}
3833
3834void Assembler::vextracti128h(Address dst, XMMRegister src) {
3835  assert(VM_Version::supports_avx2(), "");
3836  InstructionMark im(this);
3837  bool vector256 = true;
3838  assert(src != xnoreg, "sanity");
3839  int src_enc = src->encoding();
3840  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3841  emit_int8(0x39);
3842  emit_operand(src, dst);
3843  // 0x01 - extract from upper 128 bits
3844  emit_int8(0x01);
3845}
3846
3847// duplicate 4-bytes integer data from src into 8 locations in dest
3848void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
3849  assert(VM_Version::supports_avx2(), "");
3850  bool vector256 = true;
3851  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
3852  emit_int8(0x58);
3853  emit_int8((unsigned char)(0xC0 | encode));
3854}
3855
3856// Carry-Less Multiplication Quadword
3857void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
3858  assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
3859  bool vector256 = false;
3860  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3861  emit_int8(0x44);
3862  emit_int8((unsigned char)(0xC0 | encode));
3863  emit_int8((unsigned char)mask);
3864}
3865
3866void Assembler::vzeroupper() {
3867  assert(VM_Version::supports_avx(), "");
3868  (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
3869  emit_int8(0x77);
3870}
3871
3872
3873#ifndef _LP64
3874// 32bit only pieces of the assembler
3875
3876void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
3877  // NO PREFIX AS NEVER 64BIT
3878  InstructionMark im(this);
3879  emit_int8((unsigned char)0x81);
3880  emit_int8((unsigned char)(0xF8 | src1->encoding()));
3881  emit_data(imm32, rspec, 0);
3882}
3883
3884void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
3885  // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
3886  InstructionMark im(this);
3887  emit_int8((unsigned char)0x81);
3888  emit_operand(rdi, src1);
3889  emit_data(imm32, rspec, 0);
3890}
3891
3892// The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
3893// and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
3894// into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
3895void Assembler::cmpxchg8(Address adr) {
3896  InstructionMark im(this);
3897  emit_int8(0x0F);
3898  emit_int8((unsigned char)0xC7);
3899  emit_operand(rcx, adr);
3900}
3901
3902void Assembler::decl(Register dst) {
3903  // Don't use it directly. Use MacroAssembler::decrementl() instead.
3904 emit_int8(0x48 | dst->encoding());
3905}
3906
3907#endif // _LP64
3908
3909// 64bit typically doesn't use the x87 but needs to for the trig funcs
3910
3911void Assembler::fabs() {
3912  emit_int8((unsigned char)0xD9);
3913  emit_int8((unsigned char)0xE1);
3914}
3915
3916void Assembler::fadd(int i) {
3917  emit_farith(0xD8, 0xC0, i);
3918}
3919
3920void Assembler::fadd_d(Address src) {
3921  InstructionMark im(this);
3922  emit_int8((unsigned char)0xDC);
3923  emit_operand32(rax, src);
3924}
3925
3926void Assembler::fadd_s(Address src) {
3927  InstructionMark im(this);
3928  emit_int8((unsigned char)0xD8);
3929  emit_operand32(rax, src);
3930}
3931
3932void Assembler::fadda(int i) {
3933  emit_farith(0xDC, 0xC0, i);
3934}
3935
3936void Assembler::faddp(int i) {
3937  emit_farith(0xDE, 0xC0, i);
3938}
3939
3940void Assembler::fchs() {
3941  emit_int8((unsigned char)0xD9);
3942  emit_int8((unsigned char)0xE0);
3943}
3944
3945void Assembler::fcom(int i) {
3946  emit_farith(0xD8, 0xD0, i);
3947}
3948
3949void Assembler::fcomp(int i) {
3950  emit_farith(0xD8, 0xD8, i);
3951}
3952
3953void Assembler::fcomp_d(Address src) {
3954  InstructionMark im(this);
3955  emit_int8((unsigned char)0xDC);
3956  emit_operand32(rbx, src);
3957}
3958
3959void Assembler::fcomp_s(Address src) {
3960  InstructionMark im(this);
3961  emit_int8((unsigned char)0xD8);
3962  emit_operand32(rbx, src);
3963}
3964
3965void Assembler::fcompp() {
3966  emit_int8((unsigned char)0xDE);
3967  emit_int8((unsigned char)0xD9);
3968}
3969
3970void Assembler::fcos() {
3971  emit_int8((unsigned char)0xD9);
3972  emit_int8((unsigned char)0xFF);
3973}
3974
3975void Assembler::fdecstp() {
3976  emit_int8((unsigned char)0xD9);
3977  emit_int8((unsigned char)0xF6);
3978}
3979
3980void Assembler::fdiv(int i) {
3981  emit_farith(0xD8, 0xF0, i);
3982}
3983
3984void Assembler::fdiv_d(Address src) {
3985  InstructionMark im(this);
3986  emit_int8((unsigned char)0xDC);
3987  emit_operand32(rsi, src);
3988}
3989
3990void Assembler::fdiv_s(Address src) {
3991  InstructionMark im(this);
3992  emit_int8((unsigned char)0xD8);
3993  emit_operand32(rsi, src);
3994}
3995
3996void Assembler::fdiva(int i) {
3997  emit_farith(0xDC, 0xF8, i);
3998}
3999
4000// Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
4001//       is erroneous for some of the floating-point instructions below.
4002
4003void Assembler::fdivp(int i) {
4004  emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
4005}
4006
4007void Assembler::fdivr(int i) {
4008  emit_farith(0xD8, 0xF8, i);
4009}
4010
4011void Assembler::fdivr_d(Address src) {
4012  InstructionMark im(this);
4013  emit_int8((unsigned char)0xDC);
4014  emit_operand32(rdi, src);
4015}
4016
4017void Assembler::fdivr_s(Address src) {
4018  InstructionMark im(this);
4019  emit_int8((unsigned char)0xD8);
4020  emit_operand32(rdi, src);
4021}
4022
4023void Assembler::fdivra(int i) {
4024  emit_farith(0xDC, 0xF0, i);
4025}
4026
4027void Assembler::fdivrp(int i) {
4028  emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
4029}
4030
4031void Assembler::ffree(int i) {
4032  emit_farith(0xDD, 0xC0, i);
4033}
4034
4035void Assembler::fild_d(Address adr) {
4036  InstructionMark im(this);
4037  emit_int8((unsigned char)0xDF);
4038  emit_operand32(rbp, adr);
4039}
4040
4041void Assembler::fild_s(Address adr) {
4042  InstructionMark im(this);
4043  emit_int8((unsigned char)0xDB);
4044  emit_operand32(rax, adr);
4045}
4046
4047void Assembler::fincstp() {
4048  emit_int8((unsigned char)0xD9);
4049  emit_int8((unsigned char)0xF7);
4050}
4051
4052void Assembler::finit() {
4053  emit_int8((unsigned char)0x9B);
4054  emit_int8((unsigned char)0xDB);
4055  emit_int8((unsigned char)0xE3);
4056}
4057
4058void Assembler::fist_s(Address adr) {
4059  InstructionMark im(this);
4060  emit_int8((unsigned char)0xDB);
4061  emit_operand32(rdx, adr);
4062}
4063
4064void Assembler::fistp_d(Address adr) {
4065  InstructionMark im(this);
4066  emit_int8((unsigned char)0xDF);
4067  emit_operand32(rdi, adr);
4068}
4069
4070void Assembler::fistp_s(Address adr) {
4071  InstructionMark im(this);
4072  emit_int8((unsigned char)0xDB);
4073  emit_operand32(rbx, adr);
4074}
4075
4076void Assembler::fld1() {
4077  emit_int8((unsigned char)0xD9);
4078  emit_int8((unsigned char)0xE8);
4079}
4080
4081void Assembler::fld_d(Address adr) {
4082  InstructionMark im(this);
4083  emit_int8((unsigned char)0xDD);
4084  emit_operand32(rax, adr);
4085}
4086
4087void Assembler::fld_s(Address adr) {
4088  InstructionMark im(this);
4089  emit_int8((unsigned char)0xD9);
4090  emit_operand32(rax, adr);
4091}
4092
4093
4094void Assembler::fld_s(int index) {
4095  emit_farith(0xD9, 0xC0, index);
4096}
4097
4098void Assembler::fld_x(Address adr) {
4099  InstructionMark im(this);
4100  emit_int8((unsigned char)0xDB);
4101  emit_operand32(rbp, adr);
4102}
4103
4104void Assembler::fldcw(Address src) {
4105  InstructionMark im(this);
4106  emit_int8((unsigned char)0xD9);
4107  emit_operand32(rbp, src);
4108}
4109
4110void Assembler::fldenv(Address src) {
4111  InstructionMark im(this);
4112  emit_int8((unsigned char)0xD9);
4113  emit_operand32(rsp, src);
4114}
4115
4116void Assembler::fldlg2() {
4117  emit_int8((unsigned char)0xD9);
4118  emit_int8((unsigned char)0xEC);
4119}
4120
4121void Assembler::fldln2() {
4122  emit_int8((unsigned char)0xD9);
4123  emit_int8((unsigned char)0xED);
4124}
4125
4126void Assembler::fldz() {
4127  emit_int8((unsigned char)0xD9);
4128  emit_int8((unsigned char)0xEE);
4129}
4130
4131void Assembler::flog() {
4132  fldln2();
4133  fxch();
4134  fyl2x();
4135}
4136
4137void Assembler::flog10() {
4138  fldlg2();
4139  fxch();
4140  fyl2x();
4141}
4142
4143void Assembler::fmul(int i) {
4144  emit_farith(0xD8, 0xC8, i);
4145}
4146
4147void Assembler::fmul_d(Address src) {
4148  InstructionMark im(this);
4149  emit_int8((unsigned char)0xDC);
4150  emit_operand32(rcx, src);
4151}
4152
4153void Assembler::fmul_s(Address src) {
4154  InstructionMark im(this);
4155  emit_int8((unsigned char)0xD8);
4156  emit_operand32(rcx, src);
4157}
4158
4159void Assembler::fmula(int i) {
4160  emit_farith(0xDC, 0xC8, i);
4161}
4162
4163void Assembler::fmulp(int i) {
4164  emit_farith(0xDE, 0xC8, i);
4165}
4166
4167void Assembler::fnsave(Address dst) {
4168  InstructionMark im(this);
4169  emit_int8((unsigned char)0xDD);
4170  emit_operand32(rsi, dst);
4171}
4172
4173void Assembler::fnstcw(Address src) {
4174  InstructionMark im(this);
4175  emit_int8((unsigned char)0x9B);
4176  emit_int8((unsigned char)0xD9);
4177  emit_operand32(rdi, src);
4178}
4179
4180void Assembler::fnstsw_ax() {
4181  emit_int8((unsigned char)0xDF);
4182  emit_int8((unsigned char)0xE0);
4183}
4184
4185void Assembler::fprem() {
4186  emit_int8((unsigned char)0xD9);
4187  emit_int8((unsigned char)0xF8);
4188}
4189
4190void Assembler::fprem1() {
4191  emit_int8((unsigned char)0xD9);
4192  emit_int8((unsigned char)0xF5);
4193}
4194
4195void Assembler::frstor(Address src) {
4196  InstructionMark im(this);
4197  emit_int8((unsigned char)0xDD);
4198  emit_operand32(rsp, src);
4199}
4200
4201void Assembler::fsin() {
4202  emit_int8((unsigned char)0xD9);
4203  emit_int8((unsigned char)0xFE);
4204}
4205
4206void Assembler::fsqrt() {
4207  emit_int8((unsigned char)0xD9);
4208  emit_int8((unsigned char)0xFA);
4209}
4210
4211void Assembler::fst_d(Address adr) {
4212  InstructionMark im(this);
4213  emit_int8((unsigned char)0xDD);
4214  emit_operand32(rdx, adr);
4215}
4216
4217void Assembler::fst_s(Address adr) {
4218  InstructionMark im(this);
4219  emit_int8((unsigned char)0xD9);
4220  emit_operand32(rdx, adr);
4221}
4222
4223void Assembler::fstp_d(Address adr) {
4224  InstructionMark im(this);
4225  emit_int8((unsigned char)0xDD);
4226  emit_operand32(rbx, adr);
4227}
4228
4229void Assembler::fstp_d(int index) {
4230  emit_farith(0xDD, 0xD8, index);
4231}
4232
4233void Assembler::fstp_s(Address adr) {
4234  InstructionMark im(this);
4235  emit_int8((unsigned char)0xD9);
4236  emit_operand32(rbx, adr);
4237}
4238
4239void Assembler::fstp_x(Address adr) {
4240  InstructionMark im(this);
4241  emit_int8((unsigned char)0xDB);
4242  emit_operand32(rdi, adr);
4243}
4244
4245void Assembler::fsub(int i) {
4246  emit_farith(0xD8, 0xE0, i);
4247}
4248
4249void Assembler::fsub_d(Address src) {
4250  InstructionMark im(this);
4251  emit_int8((unsigned char)0xDC);
4252  emit_operand32(rsp, src);
4253}
4254
4255void Assembler::fsub_s(Address src) {
4256  InstructionMark im(this);
4257  emit_int8((unsigned char)0xD8);
4258  emit_operand32(rsp, src);
4259}
4260
4261void Assembler::fsuba(int i) {
4262  emit_farith(0xDC, 0xE8, i);
4263}
4264
4265void Assembler::fsubp(int i) {
4266  emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
4267}
4268
4269void Assembler::fsubr(int i) {
4270  emit_farith(0xD8, 0xE8, i);
4271}
4272
4273void Assembler::fsubr_d(Address src) {
4274  InstructionMark im(this);
4275  emit_int8((unsigned char)0xDC);
4276  emit_operand32(rbp, src);
4277}
4278
4279void Assembler::fsubr_s(Address src) {
4280  InstructionMark im(this);
4281  emit_int8((unsigned char)0xD8);
4282  emit_operand32(rbp, src);
4283}
4284
4285void Assembler::fsubra(int i) {
4286  emit_farith(0xDC, 0xE0, i);
4287}
4288
4289void Assembler::fsubrp(int i) {
4290  emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
4291}
4292
4293void Assembler::ftan() {
4294  emit_int8((unsigned char)0xD9);
4295  emit_int8((unsigned char)0xF2);
4296  emit_int8((unsigned char)0xDD);
4297  emit_int8((unsigned char)0xD8);
4298}
4299
4300void Assembler::ftst() {
4301  emit_int8((unsigned char)0xD9);
4302  emit_int8((unsigned char)0xE4);
4303}
4304
4305void Assembler::fucomi(int i) {
4306  // make sure the instruction is supported (introduced for P6, together with cmov)
4307  guarantee(VM_Version::supports_cmov(), "illegal instruction");
4308  emit_farith(0xDB, 0xE8, i);
4309}
4310
4311void Assembler::fucomip(int i) {
4312  // make sure the instruction is supported (introduced for P6, together with cmov)
4313  guarantee(VM_Version::supports_cmov(), "illegal instruction");
4314  emit_farith(0xDF, 0xE8, i);
4315}
4316
4317void Assembler::fwait() {
4318  emit_int8((unsigned char)0x9B);
4319}
4320
4321void Assembler::fxch(int i) {
4322  emit_farith(0xD9, 0xC8, i);
4323}
4324
4325void Assembler::fyl2x() {
4326  emit_int8((unsigned char)0xD9);
4327  emit_int8((unsigned char)0xF1);
4328}
4329
4330void Assembler::frndint() {
4331  emit_int8((unsigned char)0xD9);
4332  emit_int8((unsigned char)0xFC);
4333}
4334
4335void Assembler::f2xm1() {
4336  emit_int8((unsigned char)0xD9);
4337  emit_int8((unsigned char)0xF0);
4338}
4339
4340void Assembler::fldl2e() {
4341  emit_int8((unsigned char)0xD9);
4342  emit_int8((unsigned char)0xEA);
4343}
4344
4345// SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
4346static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
4347// SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
4348static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
4349
4350// Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
4351void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
4352  if (pre > 0) {
4353    emit_int8(simd_pre[pre]);
4354  }
4355  if (rex_w) {
4356    prefixq(adr, xreg);
4357  } else {
4358    prefix(adr, xreg);
4359  }
4360  if (opc > 0) {
4361    emit_int8(0x0F);
4362    int opc2 = simd_opc[opc];
4363    if (opc2 > 0) {
4364      emit_int8(opc2);
4365    }
4366  }
4367}
4368
4369int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
4370  if (pre > 0) {
4371    emit_int8(simd_pre[pre]);
4372  }
4373  int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
4374                          prefix_and_encode(dst_enc, src_enc);
4375  if (opc > 0) {
4376    emit_int8(0x0F);
4377    int opc2 = simd_opc[opc];
4378    if (opc2 > 0) {
4379      emit_int8(opc2);
4380    }
4381  }
4382  return encode;
4383}
4384
4385
4386void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) {
4387  if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
4388    prefix(VEX_3bytes);
4389
4390    int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
4391    byte1 = (~byte1) & 0xE0;
4392    byte1 |= opc;
4393    emit_int8(byte1);
4394
4395    int byte2 = ((~nds_enc) & 0xf) << 3;
4396    byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
4397    emit_int8(byte2);
4398  } else {
4399    prefix(VEX_2bytes);
4400
4401    int byte1 = vex_r ? VEX_R : 0;
4402    byte1 = (~byte1) & 0x80;
4403    byte1 |= ((~nds_enc) & 0xf) << 3;
4404    byte1 |= (vector256 ? 4 : 0) | pre;
4405    emit_int8(byte1);
4406  }
4407}
4408
4409void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){
4410  bool vex_r = (xreg_enc >= 8);
4411  bool vex_b = adr.base_needs_rex();
4412  bool vex_x = adr.index_needs_rex();
4413  vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
4414}
4415
4416int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) {
4417  bool vex_r = (dst_enc >= 8);
4418  bool vex_b = (src_enc >= 8);
4419  bool vex_x = false;
4420  vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
4421  return (((dst_enc & 7) << 3) | (src_enc & 7));
4422}
4423
4424
4425void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
4426  if (UseAVX > 0) {
4427    int xreg_enc = xreg->encoding();
4428    int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
4429    vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256);
4430  } else {
4431    assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
4432    rex_prefix(adr, xreg, pre, opc, rex_w);
4433  }
4434}
4435
4436int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
4437  int dst_enc = dst->encoding();
4438  int src_enc = src->encoding();
4439  if (UseAVX > 0) {
4440    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4441    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256);
4442  } else {
4443    assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
4444    return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
4445  }
4446}
4447
4448void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
4449  InstructionMark im(this);
4450  simd_prefix(dst, dst, src, pre);
4451  emit_int8(opcode);
4452  emit_operand(dst, src);
4453}
4454
4455void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
4456  int encode = simd_prefix_and_encode(dst, dst, src, pre);
4457  emit_int8(opcode);
4458  emit_int8((unsigned char)(0xC0 | encode));
4459}
4460
4461// Versions with no second source register (non-destructive source).
4462void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
4463  InstructionMark im(this);
4464  simd_prefix(dst, xnoreg, src, pre);
4465  emit_int8(opcode);
4466  emit_operand(dst, src);
4467}
4468
4469void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
4470  int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);
4471  emit_int8(opcode);
4472  emit_int8((unsigned char)(0xC0 | encode));
4473}
4474
4475// 3-operands AVX instructions
4476void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
4477                               Address src, VexSimdPrefix pre, bool vector256) {
4478  InstructionMark im(this);
4479  vex_prefix(dst, nds, src, pre, vector256);
4480  emit_int8(opcode);
4481  emit_operand(dst, src);
4482}
4483
4484void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
4485                               XMMRegister src, VexSimdPrefix pre, bool vector256) {
4486  int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);
4487  emit_int8(opcode);
4488  emit_int8((unsigned char)(0xC0 | encode));
4489}
4490
4491#ifndef _LP64
4492
4493void Assembler::incl(Register dst) {
4494  // Don't use it directly. Use MacroAssembler::incrementl() instead.
4495  emit_int8(0x40 | dst->encoding());
4496}
4497
4498void Assembler::lea(Register dst, Address src) {
4499  leal(dst, src);
4500}
4501
4502void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
4503  InstructionMark im(this);
4504  emit_int8((unsigned char)0xC7);
4505  emit_operand(rax, dst);
4506  emit_data((int)imm32, rspec, 0);
4507}
4508
4509void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
4510  InstructionMark im(this);
4511  int encode = prefix_and_encode(dst->encoding());
4512  emit_int8((unsigned char)(0xB8 | encode));
4513  emit_data((int)imm32, rspec, 0);
4514}
4515
4516void Assembler::popa() { // 32bit
4517  emit_int8(0x61);
4518}
4519
4520void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
4521  InstructionMark im(this);
4522  emit_int8(0x68);
4523  emit_data(imm32, rspec, 0);
4524}
4525
4526void Assembler::pusha() { // 32bit
4527  emit_int8(0x60);
4528}
4529
4530void Assembler::set_byte_if_not_zero(Register dst) {
4531  emit_int8(0x0F);
4532  emit_int8((unsigned char)0x95);
4533  emit_int8((unsigned char)(0xE0 | dst->encoding()));
4534}
4535
4536void Assembler::shldl(Register dst, Register src) {
4537  emit_int8(0x0F);
4538  emit_int8((unsigned char)0xA5);
4539  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
4540}
4541
4542void Assembler::shrdl(Register dst, Register src) {
4543  emit_int8(0x0F);
4544  emit_int8((unsigned char)0xAD);
4545  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
4546}
4547
4548#else // LP64
4549
4550void Assembler::set_byte_if_not_zero(Register dst) {
4551  int enc = prefix_and_encode(dst->encoding(), true);
4552  emit_int8(0x0F);
4553  emit_int8((unsigned char)0x95);
4554  emit_int8((unsigned char)(0xE0 | enc));
4555}
4556
4557// 64bit only pieces of the assembler
4558// This should only be used by 64bit instructions that can use rip-relative
4559// it cannot be used by instructions that want an immediate value.
4560
4561bool Assembler::reachable(AddressLiteral adr) {
4562  int64_t disp;
4563  // None will force a 64bit literal to the code stream. Likely a placeholder
4564  // for something that will be patched later and we need to certain it will
4565  // always be reachable.
4566  if (adr.reloc() == relocInfo::none) {
4567    return false;
4568  }
4569  if (adr.reloc() == relocInfo::internal_word_type) {
4570    // This should be rip relative and easily reachable.
4571    return true;
4572  }
4573  if (adr.reloc() == relocInfo::virtual_call_type ||
4574      adr.reloc() == relocInfo::opt_virtual_call_type ||
4575      adr.reloc() == relocInfo::static_call_type ||
4576      adr.reloc() == relocInfo::static_stub_type ) {
4577    // This should be rip relative within the code cache and easily
4578    // reachable until we get huge code caches. (At which point
4579    // ic code is going to have issues).
4580    return true;
4581  }
4582  if (adr.reloc() != relocInfo::external_word_type &&
4583      adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
4584      adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
4585      adr.reloc() != relocInfo::runtime_call_type ) {
4586    return false;
4587  }
4588
4589  // Stress the correction code
4590  if (ForceUnreachable) {
4591    // Must be runtimecall reloc, see if it is in the codecache
4592    // Flipping stuff in the codecache to be unreachable causes issues
4593    // with things like inline caches where the additional instructions
4594    // are not handled.
4595    if (CodeCache::find_blob(adr._target) == NULL) {
4596      return false;
4597    }
4598  }
4599  // For external_word_type/runtime_call_type if it is reachable from where we
4600  // are now (possibly a temp buffer) and where we might end up
4601  // anywhere in the codeCache then we are always reachable.
4602  // This would have to change if we ever save/restore shared code
4603  // to be more pessimistic.
4604  disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
4605  if (!is_simm32(disp)) return false;
4606  disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
4607  if (!is_simm32(disp)) return false;
4608
4609  disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
4610
4611  // Because rip relative is a disp + address_of_next_instruction and we
4612  // don't know the value of address_of_next_instruction we apply a fudge factor
4613  // to make sure we will be ok no matter the size of the instruction we get placed into.
4614  // We don't have to fudge the checks above here because they are already worst case.
4615
4616  // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
4617  // + 4 because better safe than sorry.
4618  const int fudge = 12 + 4;
4619  if (disp < 0) {
4620    disp -= fudge;
4621  } else {
4622    disp += fudge;
4623  }
4624  return is_simm32(disp);
4625}
4626
4627// Check if the polling page is not reachable from the code cache using rip-relative
4628// addressing.
4629bool Assembler::is_polling_page_far() {
4630  intptr_t addr = (intptr_t)os::get_polling_page();
4631  return ForceUnreachable ||
4632         !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
4633         !is_simm32(addr - (intptr_t)CodeCache::high_bound());
4634}
4635
4636void Assembler::emit_data64(jlong data,
4637                            relocInfo::relocType rtype,
4638                            int format) {
4639  if (rtype == relocInfo::none) {
4640    emit_int64(data);
4641  } else {
4642    emit_data64(data, Relocation::spec_simple(rtype), format);
4643  }
4644}
4645
4646void Assembler::emit_data64(jlong data,
4647                            RelocationHolder const& rspec,
4648                            int format) {
4649  assert(imm_operand == 0, "default format must be immediate in this file");
4650  assert(imm_operand == format, "must be immediate");
4651  assert(inst_mark() != NULL, "must be inside InstructionMark");
4652  // Do not use AbstractAssembler::relocate, which is not intended for
4653  // embedded words.  Instead, relocate to the enclosing instruction.
4654  code_section()->relocate(inst_mark(), rspec, format);
4655#ifdef ASSERT
4656  check_relocation(rspec, format);
4657#endif
4658  emit_int64(data);
4659}
4660
4661int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
4662  if (reg_enc >= 8) {
4663    prefix(REX_B);
4664    reg_enc -= 8;
4665  } else if (byteinst && reg_enc >= 4) {
4666    prefix(REX);
4667  }
4668  return reg_enc;
4669}
4670
4671int Assembler::prefixq_and_encode(int reg_enc) {
4672  if (reg_enc < 8) {
4673    prefix(REX_W);
4674  } else {
4675    prefix(REX_WB);
4676    reg_enc -= 8;
4677  }
4678  return reg_enc;
4679}
4680
4681int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
4682  if (dst_enc < 8) {
4683    if (src_enc >= 8) {
4684      prefix(REX_B);
4685      src_enc -= 8;
4686    } else if (byteinst && src_enc >= 4) {
4687      prefix(REX);
4688    }
4689  } else {
4690    if (src_enc < 8) {
4691      prefix(REX_R);
4692    } else {
4693      prefix(REX_RB);
4694      src_enc -= 8;
4695    }
4696    dst_enc -= 8;
4697  }
4698  return dst_enc << 3 | src_enc;
4699}
4700
4701int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
4702  if (dst_enc < 8) {
4703    if (src_enc < 8) {
4704      prefix(REX_W);
4705    } else {
4706      prefix(REX_WB);
4707      src_enc -= 8;
4708    }
4709  } else {
4710    if (src_enc < 8) {
4711      prefix(REX_WR);
4712    } else {
4713      prefix(REX_WRB);
4714      src_enc -= 8;
4715    }
4716    dst_enc -= 8;
4717  }
4718  return dst_enc << 3 | src_enc;
4719}
4720
4721void Assembler::prefix(Register reg) {
4722  if (reg->encoding() >= 8) {
4723    prefix(REX_B);
4724  }
4725}
4726
4727void Assembler::prefix(Address adr) {
4728  if (adr.base_needs_rex()) {
4729    if (adr.index_needs_rex()) {
4730      prefix(REX_XB);
4731    } else {
4732      prefix(REX_B);
4733    }
4734  } else {
4735    if (adr.index_needs_rex()) {
4736      prefix(REX_X);
4737    }
4738  }
4739}
4740
4741void Assembler::prefixq(Address adr) {
4742  if (adr.base_needs_rex()) {
4743    if (adr.index_needs_rex()) {
4744      prefix(REX_WXB);
4745    } else {
4746      prefix(REX_WB);
4747    }
4748  } else {
4749    if (adr.index_needs_rex()) {
4750      prefix(REX_WX);
4751    } else {
4752      prefix(REX_W);
4753    }
4754  }
4755}
4756
4757
4758void Assembler::prefix(Address adr, Register reg, bool byteinst) {
4759  if (reg->encoding() < 8) {
4760    if (adr.base_needs_rex()) {
4761      if (adr.index_needs_rex()) {
4762        prefix(REX_XB);
4763      } else {
4764        prefix(REX_B);
4765      }
4766    } else {
4767      if (adr.index_needs_rex()) {
4768        prefix(REX_X);
4769      } else if (byteinst && reg->encoding() >= 4 ) {
4770        prefix(REX);
4771      }
4772    }
4773  } else {
4774    if (adr.base_needs_rex()) {
4775      if (adr.index_needs_rex()) {
4776        prefix(REX_RXB);
4777      } else {
4778        prefix(REX_RB);
4779      }
4780    } else {
4781      if (adr.index_needs_rex()) {
4782        prefix(REX_RX);
4783      } else {
4784        prefix(REX_R);
4785      }
4786    }
4787  }
4788}
4789
4790void Assembler::prefixq(Address adr, Register src) {
4791  if (src->encoding() < 8) {
4792    if (adr.base_needs_rex()) {
4793      if (adr.index_needs_rex()) {
4794        prefix(REX_WXB);
4795      } else {
4796        prefix(REX_WB);
4797      }
4798    } else {
4799      if (adr.index_needs_rex()) {
4800        prefix(REX_WX);
4801      } else {
4802        prefix(REX_W);
4803      }
4804    }
4805  } else {
4806    if (adr.base_needs_rex()) {
4807      if (adr.index_needs_rex()) {
4808        prefix(REX_WRXB);
4809      } else {
4810        prefix(REX_WRB);
4811      }
4812    } else {
4813      if (adr.index_needs_rex()) {
4814        prefix(REX_WRX);
4815      } else {
4816        prefix(REX_WR);
4817      }
4818    }
4819  }
4820}
4821
4822void Assembler::prefix(Address adr, XMMRegister reg) {
4823  if (reg->encoding() < 8) {
4824    if (adr.base_needs_rex()) {
4825      if (adr.index_needs_rex()) {
4826        prefix(REX_XB);
4827      } else {
4828        prefix(REX_B);
4829      }
4830    } else {
4831      if (adr.index_needs_rex()) {
4832        prefix(REX_X);
4833      }
4834    }
4835  } else {
4836    if (adr.base_needs_rex()) {
4837      if (adr.index_needs_rex()) {
4838        prefix(REX_RXB);
4839      } else {
4840        prefix(REX_RB);
4841      }
4842    } else {
4843      if (adr.index_needs_rex()) {
4844        prefix(REX_RX);
4845      } else {
4846        prefix(REX_R);
4847      }
4848    }
4849  }
4850}
4851
4852void Assembler::prefixq(Address adr, XMMRegister src) {
4853  if (src->encoding() < 8) {
4854    if (adr.base_needs_rex()) {
4855      if (adr.index_needs_rex()) {
4856        prefix(REX_WXB);
4857      } else {
4858        prefix(REX_WB);
4859      }
4860    } else {
4861      if (adr.index_needs_rex()) {
4862        prefix(REX_WX);
4863      } else {
4864        prefix(REX_W);
4865      }
4866    }
4867  } else {
4868    if (adr.base_needs_rex()) {
4869      if (adr.index_needs_rex()) {
4870        prefix(REX_WRXB);
4871      } else {
4872        prefix(REX_WRB);
4873      }
4874    } else {
4875      if (adr.index_needs_rex()) {
4876        prefix(REX_WRX);
4877      } else {
4878        prefix(REX_WR);
4879      }
4880    }
4881  }
4882}
4883
4884void Assembler::adcq(Register dst, int32_t imm32) {
4885  (void) prefixq_and_encode(dst->encoding());
4886  emit_arith(0x81, 0xD0, dst, imm32);
4887}
4888
4889void Assembler::adcq(Register dst, Address src) {
4890  InstructionMark im(this);
4891  prefixq(src, dst);
4892  emit_int8(0x13);
4893  emit_operand(dst, src);
4894}
4895
4896void Assembler::adcq(Register dst, Register src) {
4897  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4898  emit_arith(0x13, 0xC0, dst, src);
4899}
4900
4901void Assembler::addq(Address dst, int32_t imm32) {
4902  InstructionMark im(this);
4903  prefixq(dst);
4904  emit_arith_operand(0x81, rax, dst,imm32);
4905}
4906
4907void Assembler::addq(Address dst, Register src) {
4908  InstructionMark im(this);
4909  prefixq(dst, src);
4910  emit_int8(0x01);
4911  emit_operand(src, dst);
4912}
4913
4914void Assembler::addq(Register dst, int32_t imm32) {
4915  (void) prefixq_and_encode(dst->encoding());
4916  emit_arith(0x81, 0xC0, dst, imm32);
4917}
4918
4919void Assembler::addq(Register dst, Address src) {
4920  InstructionMark im(this);
4921  prefixq(src, dst);
4922  emit_int8(0x03);
4923  emit_operand(dst, src);
4924}
4925
4926void Assembler::addq(Register dst, Register src) {
4927  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4928  emit_arith(0x03, 0xC0, dst, src);
4929}
4930
4931void Assembler::andq(Address dst, int32_t imm32) {
4932  InstructionMark im(this);
4933  prefixq(dst);
4934  emit_int8((unsigned char)0x81);
4935  emit_operand(rsp, dst, 4);
4936  emit_int32(imm32);
4937}
4938
4939void Assembler::andq(Register dst, int32_t imm32) {
4940  (void) prefixq_and_encode(dst->encoding());
4941  emit_arith(0x81, 0xE0, dst, imm32);
4942}
4943
4944void Assembler::andq(Register dst, Address src) {
4945  InstructionMark im(this);
4946  prefixq(src, dst);
4947  emit_int8(0x23);
4948  emit_operand(dst, src);
4949}
4950
4951void Assembler::andq(Register dst, Register src) {
4952  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4953  emit_arith(0x23, 0xC0, dst, src);
4954}
4955
4956void Assembler::andnq(Register dst, Register src1, Register src2) {
4957  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
4958  int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2);
4959  emit_int8((unsigned char)0xF2);
4960  emit_int8((unsigned char)(0xC0 | encode));
4961}
4962
4963void Assembler::andnq(Register dst, Register src1, Address src2) {
4964  InstructionMark im(this);
4965  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
4966  vex_prefix_0F38_q(dst, src1, src2);
4967  emit_int8((unsigned char)0xF2);
4968  emit_operand(dst, src2);
4969}
4970
4971void Assembler::bsfq(Register dst, Register src) {
4972  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4973  emit_int8(0x0F);
4974  emit_int8((unsigned char)0xBC);
4975  emit_int8((unsigned char)(0xC0 | encode));
4976}
4977
4978void Assembler::bsrq(Register dst, Register src) {
4979  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4980  emit_int8(0x0F);
4981  emit_int8((unsigned char)0xBD);
4982  emit_int8((unsigned char)(0xC0 | encode));
4983}
4984
4985void Assembler::bswapq(Register reg) {
4986  int encode = prefixq_and_encode(reg->encoding());
4987  emit_int8(0x0F);
4988  emit_int8((unsigned char)(0xC8 | encode));
4989}
4990
4991void Assembler::blsiq(Register dst, Register src) {
4992  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
4993  int encode = vex_prefix_0F38_and_encode_q(rbx, dst, src);
4994  emit_int8((unsigned char)0xF3);
4995  emit_int8((unsigned char)(0xC0 | encode));
4996}
4997
4998void Assembler::blsiq(Register dst, Address src) {
4999  InstructionMark im(this);
5000  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5001  vex_prefix_0F38_q(rbx, dst, src);
5002  emit_int8((unsigned char)0xF3);
5003  emit_operand(rbx, src);
5004}
5005
5006void Assembler::blsmskq(Register dst, Register src) {
5007  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5008  int encode = vex_prefix_0F38_and_encode_q(rdx, dst, src);
5009  emit_int8((unsigned char)0xF3);
5010  emit_int8((unsigned char)(0xC0 | encode));
5011}
5012
5013void Assembler::blsmskq(Register dst, Address src) {
5014  InstructionMark im(this);
5015  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5016  vex_prefix_0F38_q(rdx, dst, src);
5017  emit_int8((unsigned char)0xF3);
5018  emit_operand(rdx, src);
5019}
5020
5021void Assembler::blsrq(Register dst, Register src) {
5022  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5023  int encode = vex_prefix_0F38_and_encode_q(rcx, dst, src);
5024  emit_int8((unsigned char)0xF3);
5025  emit_int8((unsigned char)(0xC0 | encode));
5026}
5027
5028void Assembler::blsrq(Register dst, Address src) {
5029  InstructionMark im(this);
5030  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5031  vex_prefix_0F38_q(rcx, dst, src);
5032  emit_int8((unsigned char)0xF3);
5033  emit_operand(rcx, src);
5034}
5035
5036void Assembler::cdqq() {
5037  prefix(REX_W);
5038  emit_int8((unsigned char)0x99);
5039}
5040
5041void Assembler::clflush(Address adr) {
5042  prefix(adr);
5043  emit_int8(0x0F);
5044  emit_int8((unsigned char)0xAE);
5045  emit_operand(rdi, adr);
5046}
5047
5048void Assembler::cmovq(Condition cc, Register dst, Register src) {
5049  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5050  emit_int8(0x0F);
5051  emit_int8(0x40 | cc);
5052  emit_int8((unsigned char)(0xC0 | encode));
5053}
5054
5055void Assembler::cmovq(Condition cc, Register dst, Address src) {
5056  InstructionMark im(this);
5057  prefixq(src, dst);
5058  emit_int8(0x0F);
5059  emit_int8(0x40 | cc);
5060  emit_operand(dst, src);
5061}
5062
5063void Assembler::cmpq(Address dst, int32_t imm32) {
5064  InstructionMark im(this);
5065  prefixq(dst);
5066  emit_int8((unsigned char)0x81);
5067  emit_operand(rdi, dst, 4);
5068  emit_int32(imm32);
5069}
5070
5071void Assembler::cmpq(Register dst, int32_t imm32) {
5072  (void) prefixq_and_encode(dst->encoding());
5073  emit_arith(0x81, 0xF8, dst, imm32);
5074}
5075
5076void Assembler::cmpq(Address dst, Register src) {
5077  InstructionMark im(this);
5078  prefixq(dst, src);
5079  emit_int8(0x3B);
5080  emit_operand(src, dst);
5081}
5082
5083void Assembler::cmpq(Register dst, Register src) {
5084  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5085  emit_arith(0x3B, 0xC0, dst, src);
5086}
5087
5088void Assembler::cmpq(Register dst, Address  src) {
5089  InstructionMark im(this);
5090  prefixq(src, dst);
5091  emit_int8(0x3B);
5092  emit_operand(dst, src);
5093}
5094
5095void Assembler::cmpxchgq(Register reg, Address adr) {
5096  InstructionMark im(this);
5097  prefixq(adr, reg);
5098  emit_int8(0x0F);
5099  emit_int8((unsigned char)0xB1);
5100  emit_operand(reg, adr);
5101}
5102
5103void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
5104  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5105  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
5106  emit_int8(0x2A);
5107  emit_int8((unsigned char)(0xC0 | encode));
5108}
5109
5110void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
5111  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5112  InstructionMark im(this);
5113  simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
5114  emit_int8(0x2A);
5115  emit_operand(dst, src);
5116}
5117
5118void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
5119  NOT_LP64(assert(VM_Version::supports_sse(), ""));
5120  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
5121  emit_int8(0x2A);
5122  emit_int8((unsigned char)(0xC0 | encode));
5123}
5124
5125void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
5126  NOT_LP64(assert(VM_Version::supports_sse(), ""));
5127  InstructionMark im(this);
5128  simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
5129  emit_int8(0x2A);
5130  emit_operand(dst, src);
5131}
5132
5133void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
5134  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5135  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
5136  emit_int8(0x2C);
5137  emit_int8((unsigned char)(0xC0 | encode));
5138}
5139
5140void Assembler::cvttss2siq(Register dst, XMMRegister src) {
5141  NOT_LP64(assert(VM_Version::supports_sse(), ""));
5142  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
5143  emit_int8(0x2C);
5144  emit_int8((unsigned char)(0xC0 | encode));
5145}
5146
5147void Assembler::decl(Register dst) {
5148  // Don't use it directly. Use MacroAssembler::decrementl() instead.
5149  // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
5150  int encode = prefix_and_encode(dst->encoding());
5151  emit_int8((unsigned char)0xFF);
5152  emit_int8((unsigned char)(0xC8 | encode));
5153}
5154
5155void Assembler::decq(Register dst) {
5156  // Don't use it directly. Use MacroAssembler::decrementq() instead.
5157  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
5158  int encode = prefixq_and_encode(dst->encoding());
5159  emit_int8((unsigned char)0xFF);
5160  emit_int8(0xC8 | encode);
5161}
5162
5163void Assembler::decq(Address dst) {
5164  // Don't use it directly. Use MacroAssembler::decrementq() instead.
5165  InstructionMark im(this);
5166  prefixq(dst);
5167  emit_int8((unsigned char)0xFF);
5168  emit_operand(rcx, dst);
5169}
5170
5171void Assembler::fxrstor(Address src) {
5172  prefixq(src);
5173  emit_int8(0x0F);
5174  emit_int8((unsigned char)0xAE);
5175  emit_operand(as_Register(1), src);
5176}
5177
5178void Assembler::fxsave(Address dst) {
5179  prefixq(dst);
5180  emit_int8(0x0F);
5181  emit_int8((unsigned char)0xAE);
5182  emit_operand(as_Register(0), dst);
5183}
5184
5185void Assembler::idivq(Register src) {
5186  int encode = prefixq_and_encode(src->encoding());
5187  emit_int8((unsigned char)0xF7);
5188  emit_int8((unsigned char)(0xF8 | encode));
5189}
5190
5191void Assembler::imulq(Register dst, Register src) {
5192  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5193  emit_int8(0x0F);
5194  emit_int8((unsigned char)0xAF);
5195  emit_int8((unsigned char)(0xC0 | encode));
5196}
5197
5198void Assembler::imulq(Register dst, Register src, int value) {
5199  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5200  if (is8bit(value)) {
5201    emit_int8(0x6B);
5202    emit_int8((unsigned char)(0xC0 | encode));
5203    emit_int8(value & 0xFF);
5204  } else {
5205    emit_int8(0x69);
5206    emit_int8((unsigned char)(0xC0 | encode));
5207    emit_int32(value);
5208  }
5209}
5210
5211void Assembler::imulq(Register dst, Address src) {
5212  InstructionMark im(this);
5213  prefixq(src, dst);
5214  emit_int8(0x0F);
5215  emit_int8((unsigned char) 0xAF);
5216  emit_operand(dst, src);
5217}
5218
5219void Assembler::incl(Register dst) {
5220  // Don't use it directly. Use MacroAssembler::incrementl() instead.
5221  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
5222  int encode = prefix_and_encode(dst->encoding());
5223  emit_int8((unsigned char)0xFF);
5224  emit_int8((unsigned char)(0xC0 | encode));
5225}
5226
5227void Assembler::incq(Register dst) {
5228  // Don't use it directly. Use MacroAssembler::incrementq() instead.
5229  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
5230  int encode = prefixq_and_encode(dst->encoding());
5231  emit_int8((unsigned char)0xFF);
5232  emit_int8((unsigned char)(0xC0 | encode));
5233}
5234
5235void Assembler::incq(Address dst) {
5236  // Don't use it directly. Use MacroAssembler::incrementq() instead.
5237  InstructionMark im(this);
5238  prefixq(dst);
5239  emit_int8((unsigned char)0xFF);
5240  emit_operand(rax, dst);
5241}
5242
5243void Assembler::lea(Register dst, Address src) {
5244  leaq(dst, src);
5245}
5246
5247void Assembler::leaq(Register dst, Address src) {
5248  InstructionMark im(this);
5249  prefixq(src, dst);
5250  emit_int8((unsigned char)0x8D);
5251  emit_operand(dst, src);
5252}
5253
5254void Assembler::mov64(Register dst, int64_t imm64) {
5255  InstructionMark im(this);
5256  int encode = prefixq_and_encode(dst->encoding());
5257  emit_int8((unsigned char)(0xB8 | encode));
5258  emit_int64(imm64);
5259}
5260
5261void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
5262  InstructionMark im(this);
5263  int encode = prefixq_and_encode(dst->encoding());
5264  emit_int8(0xB8 | encode);
5265  emit_data64(imm64, rspec);
5266}
5267
5268void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
5269  InstructionMark im(this);
5270  int encode = prefix_and_encode(dst->encoding());
5271  emit_int8((unsigned char)(0xB8 | encode));
5272  emit_data((int)imm32, rspec, narrow_oop_operand);
5273}
5274
5275void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
5276  InstructionMark im(this);
5277  prefix(dst);
5278  emit_int8((unsigned char)0xC7);
5279  emit_operand(rax, dst, 4);
5280  emit_data((int)imm32, rspec, narrow_oop_operand);
5281}
5282
5283void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
5284  InstructionMark im(this);
5285  int encode = prefix_and_encode(src1->encoding());
5286  emit_int8((unsigned char)0x81);
5287  emit_int8((unsigned char)(0xF8 | encode));
5288  emit_data((int)imm32, rspec, narrow_oop_operand);
5289}
5290
5291void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
5292  InstructionMark im(this);
5293  prefix(src1);
5294  emit_int8((unsigned char)0x81);
5295  emit_operand(rax, src1, 4);
5296  emit_data((int)imm32, rspec, narrow_oop_operand);
5297}
5298
5299void Assembler::lzcntq(Register dst, Register src) {
5300  assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
5301  emit_int8((unsigned char)0xF3);
5302  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5303  emit_int8(0x0F);
5304  emit_int8((unsigned char)0xBD);
5305  emit_int8((unsigned char)(0xC0 | encode));
5306}
5307
5308void Assembler::movdq(XMMRegister dst, Register src) {
5309  // table D-1 says MMX/SSE2
5310  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5311  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
5312  emit_int8(0x6E);
5313  emit_int8((unsigned char)(0xC0 | encode));
5314}
5315
5316void Assembler::movdq(Register dst, XMMRegister src) {
5317  // table D-1 says MMX/SSE2
5318  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5319  // swap src/dst to get correct prefix
5320  int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
5321  emit_int8(0x7E);
5322  emit_int8((unsigned char)(0xC0 | encode));
5323}
5324
5325void Assembler::movq(Register dst, Register src) {
5326  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5327  emit_int8((unsigned char)0x8B);
5328  emit_int8((unsigned char)(0xC0 | encode));
5329}
5330
5331void Assembler::movq(Register dst, Address src) {
5332  InstructionMark im(this);
5333  prefixq(src, dst);
5334  emit_int8((unsigned char)0x8B);
5335  emit_operand(dst, src);
5336}
5337
5338void Assembler::movq(Address dst, Register src) {
5339  InstructionMark im(this);
5340  prefixq(dst, src);
5341  emit_int8((unsigned char)0x89);
5342  emit_operand(src, dst);
5343}
5344
5345void Assembler::movsbq(Register dst, Address src) {
5346  InstructionMark im(this);
5347  prefixq(src, dst);
5348  emit_int8(0x0F);
5349  emit_int8((unsigned char)0xBE);
5350  emit_operand(dst, src);
5351}
5352
5353void Assembler::movsbq(Register dst, Register src) {
5354  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5355  emit_int8(0x0F);
5356  emit_int8((unsigned char)0xBE);
5357  emit_int8((unsigned char)(0xC0 | encode));
5358}
5359
5360void Assembler::movslq(Register dst, int32_t imm32) {
5361  // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
5362  // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
5363  // as a result we shouldn't use until tested at runtime...
5364  ShouldNotReachHere();
5365  InstructionMark im(this);
5366  int encode = prefixq_and_encode(dst->encoding());
5367  emit_int8((unsigned char)(0xC7 | encode));
5368  emit_int32(imm32);
5369}
5370
5371void Assembler::movslq(Address dst, int32_t imm32) {
5372  assert(is_simm32(imm32), "lost bits");
5373  InstructionMark im(this);
5374  prefixq(dst);
5375  emit_int8((unsigned char)0xC7);
5376  emit_operand(rax, dst, 4);
5377  emit_int32(imm32);
5378}
5379
5380void Assembler::movslq(Register dst, Address src) {
5381  InstructionMark im(this);
5382  prefixq(src, dst);
5383  emit_int8(0x63);
5384  emit_operand(dst, src);
5385}
5386
5387void Assembler::movslq(Register dst, Register src) {
5388  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5389  emit_int8(0x63);
5390  emit_int8((unsigned char)(0xC0 | encode));
5391}
5392
5393void Assembler::movswq(Register dst, Address src) {
5394  InstructionMark im(this);
5395  prefixq(src, dst);
5396  emit_int8(0x0F);
5397  emit_int8((unsigned char)0xBF);
5398  emit_operand(dst, src);
5399}
5400
5401void Assembler::movswq(Register dst, Register src) {
5402  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5403  emit_int8((unsigned char)0x0F);
5404  emit_int8((unsigned char)0xBF);
5405  emit_int8((unsigned char)(0xC0 | encode));
5406}
5407
5408void Assembler::movzbq(Register dst, Address src) {
5409  InstructionMark im(this);
5410  prefixq(src, dst);
5411  emit_int8((unsigned char)0x0F);
5412  emit_int8((unsigned char)0xB6);
5413  emit_operand(dst, src);
5414}
5415
5416void Assembler::movzbq(Register dst, Register src) {
5417  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5418  emit_int8(0x0F);
5419  emit_int8((unsigned char)0xB6);
5420  emit_int8(0xC0 | encode);
5421}
5422
5423void Assembler::movzwq(Register dst, Address src) {
5424  InstructionMark im(this);
5425  prefixq(src, dst);
5426  emit_int8((unsigned char)0x0F);
5427  emit_int8((unsigned char)0xB7);
5428  emit_operand(dst, src);
5429}
5430
5431void Assembler::movzwq(Register dst, Register src) {
5432  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5433  emit_int8((unsigned char)0x0F);
5434  emit_int8((unsigned char)0xB7);
5435  emit_int8((unsigned char)(0xC0 | encode));
5436}
5437
5438void Assembler::negq(Register dst) {
5439  int encode = prefixq_and_encode(dst->encoding());
5440  emit_int8((unsigned char)0xF7);
5441  emit_int8((unsigned char)(0xD8 | encode));
5442}
5443
5444void Assembler::notq(Register dst) {
5445  int encode = prefixq_and_encode(dst->encoding());
5446  emit_int8((unsigned char)0xF7);
5447  emit_int8((unsigned char)(0xD0 | encode));
5448}
5449
5450void Assembler::orq(Address dst, int32_t imm32) {
5451  InstructionMark im(this);
5452  prefixq(dst);
5453  emit_int8((unsigned char)0x81);
5454  emit_operand(rcx, dst, 4);
5455  emit_int32(imm32);
5456}
5457
5458void Assembler::orq(Register dst, int32_t imm32) {
5459  (void) prefixq_and_encode(dst->encoding());
5460  emit_arith(0x81, 0xC8, dst, imm32);
5461}
5462
5463void Assembler::orq(Register dst, Address src) {
5464  InstructionMark im(this);
5465  prefixq(src, dst);
5466  emit_int8(0x0B);
5467  emit_operand(dst, src);
5468}
5469
5470void Assembler::orq(Register dst, Register src) {
5471  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5472  emit_arith(0x0B, 0xC0, dst, src);
5473}
5474
5475void Assembler::popa() { // 64bit
5476  movq(r15, Address(rsp, 0));
5477  movq(r14, Address(rsp, wordSize));
5478  movq(r13, Address(rsp, 2 * wordSize));
5479  movq(r12, Address(rsp, 3 * wordSize));
5480  movq(r11, Address(rsp, 4 * wordSize));
5481  movq(r10, Address(rsp, 5 * wordSize));
5482  movq(r9,  Address(rsp, 6 * wordSize));
5483  movq(r8,  Address(rsp, 7 * wordSize));
5484  movq(rdi, Address(rsp, 8 * wordSize));
5485  movq(rsi, Address(rsp, 9 * wordSize));
5486  movq(rbp, Address(rsp, 10 * wordSize));
5487  // skip rsp
5488  movq(rbx, Address(rsp, 12 * wordSize));
5489  movq(rdx, Address(rsp, 13 * wordSize));
5490  movq(rcx, Address(rsp, 14 * wordSize));
5491  movq(rax, Address(rsp, 15 * wordSize));
5492
5493  addq(rsp, 16 * wordSize);
5494}
5495
5496void Assembler::popcntq(Register dst, Address src) {
5497  assert(VM_Version::supports_popcnt(), "must support");
5498  InstructionMark im(this);
5499  emit_int8((unsigned char)0xF3);
5500  prefixq(src, dst);
5501  emit_int8((unsigned char)0x0F);
5502  emit_int8((unsigned char)0xB8);
5503  emit_operand(dst, src);
5504}
5505
5506void Assembler::popcntq(Register dst, Register src) {
5507  assert(VM_Version::supports_popcnt(), "must support");
5508  emit_int8((unsigned char)0xF3);
5509  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5510  emit_int8((unsigned char)0x0F);
5511  emit_int8((unsigned char)0xB8);
5512  emit_int8((unsigned char)(0xC0 | encode));
5513}
5514
5515void Assembler::popq(Address dst) {
5516  InstructionMark im(this);
5517  prefixq(dst);
5518  emit_int8((unsigned char)0x8F);
5519  emit_operand(rax, dst);
5520}
5521
5522void Assembler::pusha() { // 64bit
5523  // we have to store original rsp.  ABI says that 128 bytes
5524  // below rsp are local scratch.
5525  movq(Address(rsp, -5 * wordSize), rsp);
5526
5527  subq(rsp, 16 * wordSize);
5528
5529  movq(Address(rsp, 15 * wordSize), rax);
5530  movq(Address(rsp, 14 * wordSize), rcx);
5531  movq(Address(rsp, 13 * wordSize), rdx);
5532  movq(Address(rsp, 12 * wordSize), rbx);
5533  // skip rsp
5534  movq(Address(rsp, 10 * wordSize), rbp);
5535  movq(Address(rsp, 9 * wordSize), rsi);
5536  movq(Address(rsp, 8 * wordSize), rdi);
5537  movq(Address(rsp, 7 * wordSize), r8);
5538  movq(Address(rsp, 6 * wordSize), r9);
5539  movq(Address(rsp, 5 * wordSize), r10);
5540  movq(Address(rsp, 4 * wordSize), r11);
5541  movq(Address(rsp, 3 * wordSize), r12);
5542  movq(Address(rsp, 2 * wordSize), r13);
5543  movq(Address(rsp, wordSize), r14);
5544  movq(Address(rsp, 0), r15);
5545}
5546
5547void Assembler::pushq(Address src) {
5548  InstructionMark im(this);
5549  prefixq(src);
5550  emit_int8((unsigned char)0xFF);
5551  emit_operand(rsi, src);
5552}
5553
5554void Assembler::rclq(Register dst, int imm8) {
5555  assert(isShiftCount(imm8 >> 1), "illegal shift count");
5556  int encode = prefixq_and_encode(dst->encoding());
5557  if (imm8 == 1) {
5558    emit_int8((unsigned char)0xD1);
5559    emit_int8((unsigned char)(0xD0 | encode));
5560  } else {
5561    emit_int8((unsigned char)0xC1);
5562    emit_int8((unsigned char)(0xD0 | encode));
5563    emit_int8(imm8);
5564  }
5565}
5566void Assembler::sarq(Register dst, int imm8) {
5567  assert(isShiftCount(imm8 >> 1), "illegal shift count");
5568  int encode = prefixq_and_encode(dst->encoding());
5569  if (imm8 == 1) {
5570    emit_int8((unsigned char)0xD1);
5571    emit_int8((unsigned char)(0xF8 | encode));
5572  } else {
5573    emit_int8((unsigned char)0xC1);
5574    emit_int8((unsigned char)(0xF8 | encode));
5575    emit_int8(imm8);
5576  }
5577}
5578
5579void Assembler::sarq(Register dst) {
5580  int encode = prefixq_and_encode(dst->encoding());
5581  emit_int8((unsigned char)0xD3);
5582  emit_int8((unsigned char)(0xF8 | encode));
5583}
5584
5585void Assembler::sbbq(Address dst, int32_t imm32) {
5586  InstructionMark im(this);
5587  prefixq(dst);
5588  emit_arith_operand(0x81, rbx, dst, imm32);
5589}
5590
5591void Assembler::sbbq(Register dst, int32_t imm32) {
5592  (void) prefixq_and_encode(dst->encoding());
5593  emit_arith(0x81, 0xD8, dst, imm32);
5594}
5595
5596void Assembler::sbbq(Register dst, Address src) {
5597  InstructionMark im(this);
5598  prefixq(src, dst);
5599  emit_int8(0x1B);
5600  emit_operand(dst, src);
5601}
5602
5603void Assembler::sbbq(Register dst, Register src) {
5604  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5605  emit_arith(0x1B, 0xC0, dst, src);
5606}
5607
5608void Assembler::shlq(Register dst, int imm8) {
5609  assert(isShiftCount(imm8 >> 1), "illegal shift count");
5610  int encode = prefixq_and_encode(dst->encoding());
5611  if (imm8 == 1) {
5612    emit_int8((unsigned char)0xD1);
5613    emit_int8((unsigned char)(0xE0 | encode));
5614  } else {
5615    emit_int8((unsigned char)0xC1);
5616    emit_int8((unsigned char)(0xE0 | encode));
5617    emit_int8(imm8);
5618  }
5619}
5620
5621void Assembler::shlq(Register dst) {
5622  int encode = prefixq_and_encode(dst->encoding());
5623  emit_int8((unsigned char)0xD3);
5624  emit_int8((unsigned char)(0xE0 | encode));
5625}
5626
5627void Assembler::shrq(Register dst, int imm8) {
5628  assert(isShiftCount(imm8 >> 1), "illegal shift count");
5629  int encode = prefixq_and_encode(dst->encoding());
5630  emit_int8((unsigned char)0xC1);
5631  emit_int8((unsigned char)(0xE8 | encode));
5632  emit_int8(imm8);
5633}
5634
5635void Assembler::shrq(Register dst) {
5636  int encode = prefixq_and_encode(dst->encoding());
5637  emit_int8((unsigned char)0xD3);
5638  emit_int8(0xE8 | encode);
5639}
5640
5641void Assembler::subq(Address dst, int32_t imm32) {
5642  InstructionMark im(this);
5643  prefixq(dst);
5644  emit_arith_operand(0x81, rbp, dst, imm32);
5645}
5646
5647void Assembler::subq(Address dst, Register src) {
5648  InstructionMark im(this);
5649  prefixq(dst, src);
5650  emit_int8(0x29);
5651  emit_operand(src, dst);
5652}
5653
5654void Assembler::subq(Register dst, int32_t imm32) {
5655  (void) prefixq_and_encode(dst->encoding());
5656  emit_arith(0x81, 0xE8, dst, imm32);
5657}
5658
5659// Force generation of a 4 byte immediate value even if it fits into 8bit
5660void Assembler::subq_imm32(Register dst, int32_t imm32) {
5661  (void) prefixq_and_encode(dst->encoding());
5662  emit_arith_imm32(0x81, 0xE8, dst, imm32);
5663}
5664
5665void Assembler::subq(Register dst, Address src) {
5666  InstructionMark im(this);
5667  prefixq(src, dst);
5668  emit_int8(0x2B);
5669  emit_operand(dst, src);
5670}
5671
5672void Assembler::subq(Register dst, Register src) {
5673  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5674  emit_arith(0x2B, 0xC0, dst, src);
5675}
5676
5677void Assembler::testq(Register dst, int32_t imm32) {
5678  // not using emit_arith because test
5679  // doesn't support sign-extension of
5680  // 8bit operands
5681  int encode = dst->encoding();
5682  if (encode == 0) {
5683    prefix(REX_W);
5684    emit_int8((unsigned char)0xA9);
5685  } else {
5686    encode = prefixq_and_encode(encode);
5687    emit_int8((unsigned char)0xF7);
5688    emit_int8((unsigned char)(0xC0 | encode));
5689  }
5690  emit_int32(imm32);
5691}
5692
5693void Assembler::testq(Register dst, Register src) {
5694  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5695  emit_arith(0x85, 0xC0, dst, src);
5696}
5697
5698void Assembler::xaddq(Address dst, Register src) {
5699  InstructionMark im(this);
5700  prefixq(dst, src);
5701  emit_int8(0x0F);
5702  emit_int8((unsigned char)0xC1);
5703  emit_operand(src, dst);
5704}
5705
5706void Assembler::xchgq(Register dst, Address src) {
5707  InstructionMark im(this);
5708  prefixq(src, dst);
5709  emit_int8((unsigned char)0x87);
5710  emit_operand(dst, src);
5711}
5712
5713void Assembler::xchgq(Register dst, Register src) {
5714  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5715  emit_int8((unsigned char)0x87);
5716  emit_int8((unsigned char)(0xc0 | encode));
5717}
5718
5719void Assembler::xorq(Register dst, Register src) {
5720  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5721  emit_arith(0x33, 0xC0, dst, src);
5722}
5723
5724void Assembler::xorq(Register dst, Address src) {
5725  InstructionMark im(this);
5726  prefixq(src, dst);
5727  emit_int8(0x33);
5728  emit_operand(dst, src);
5729}
5730
5731#endif // !LP64
5732