assembler_x86.cpp revision 8413:92457dfb91bd
1/*
2 * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/assembler.hpp"
27#include "asm/assembler.inline.hpp"
28#include "gc/shared/cardTableModRefBS.hpp"
29#include "gc/shared/collectedHeap.inline.hpp"
30#include "interpreter/interpreter.hpp"
31#include "memory/resourceArea.hpp"
32#include "prims/methodHandles.hpp"
33#include "runtime/biasedLocking.hpp"
34#include "runtime/interfaceSupport.hpp"
35#include "runtime/objectMonitor.hpp"
36#include "runtime/os.hpp"
37#include "runtime/sharedRuntime.hpp"
38#include "runtime/stubRoutines.hpp"
39#include "utilities/macros.hpp"
40#if INCLUDE_ALL_GCS
41#include "gc/g1/g1CollectedHeap.inline.hpp"
42#include "gc/g1/g1SATBCardTableModRefBS.hpp"
43#include "gc/g1/heapRegion.hpp"
44#endif // INCLUDE_ALL_GCS
45
46#ifdef PRODUCT
47#define BLOCK_COMMENT(str) /* nothing */
48#define STOP(error) stop(error)
49#else
50#define BLOCK_COMMENT(str) block_comment(str)
51#define STOP(error) block_comment(error); stop(error)
52#endif
53
54#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
55// Implementation of AddressLiteral
56
57AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
58  _is_lval = false;
59  _target = target;
60  switch (rtype) {
61  case relocInfo::oop_type:
62  case relocInfo::metadata_type:
63    // Oops are a special case. Normally they would be their own section
64    // but in cases like icBuffer they are literals in the code stream that
65    // we don't have a section for. We use none so that we get a literal address
66    // which is always patchable.
67    break;
68  case relocInfo::external_word_type:
69    _rspec = external_word_Relocation::spec(target);
70    break;
71  case relocInfo::internal_word_type:
72    _rspec = internal_word_Relocation::spec(target);
73    break;
74  case relocInfo::opt_virtual_call_type:
75    _rspec = opt_virtual_call_Relocation::spec();
76    break;
77  case relocInfo::static_call_type:
78    _rspec = static_call_Relocation::spec();
79    break;
80  case relocInfo::runtime_call_type:
81    _rspec = runtime_call_Relocation::spec();
82    break;
83  case relocInfo::poll_type:
84  case relocInfo::poll_return_type:
85    _rspec = Relocation::spec_simple(rtype);
86    break;
87  case relocInfo::none:
88    break;
89  default:
90    ShouldNotReachHere();
91    break;
92  }
93}
94
95// Implementation of Address
96
97#ifdef _LP64
98
99Address Address::make_array(ArrayAddress adr) {
100  // Not implementable on 64bit machines
101  // Should have been handled higher up the call chain.
102  ShouldNotReachHere();
103  return Address();
104}
105
106// exceedingly dangerous constructor
107Address::Address(int disp, address loc, relocInfo::relocType rtype) {
108  _base  = noreg;
109  _index = noreg;
110  _scale = no_scale;
111  _disp  = disp;
112  switch (rtype) {
113    case relocInfo::external_word_type:
114      _rspec = external_word_Relocation::spec(loc);
115      break;
116    case relocInfo::internal_word_type:
117      _rspec = internal_word_Relocation::spec(loc);
118      break;
119    case relocInfo::runtime_call_type:
120      // HMM
121      _rspec = runtime_call_Relocation::spec();
122      break;
123    case relocInfo::poll_type:
124    case relocInfo::poll_return_type:
125      _rspec = Relocation::spec_simple(rtype);
126      break;
127    case relocInfo::none:
128      break;
129    default:
130      ShouldNotReachHere();
131  }
132}
133#else // LP64
134
135Address Address::make_array(ArrayAddress adr) {
136  AddressLiteral base = adr.base();
137  Address index = adr.index();
138  assert(index._disp == 0, "must not have disp"); // maybe it can?
139  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
140  array._rspec = base._rspec;
141  return array;
142}
143
144// exceedingly dangerous constructor
145Address::Address(address loc, RelocationHolder spec) {
146  _base  = noreg;
147  _index = noreg;
148  _scale = no_scale;
149  _disp  = (intptr_t) loc;
150  _rspec = spec;
151}
152
153#endif // _LP64
154
155
156
157// Convert the raw encoding form into the form expected by the constructor for
158// Address.  An index of 4 (rsp) corresponds to having no index, so convert
159// that to noreg for the Address constructor.
160Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
161  RelocationHolder rspec;
162  if (disp_reloc != relocInfo::none) {
163    rspec = Relocation::spec_simple(disp_reloc);
164  }
165  bool valid_index = index != rsp->encoding();
166  if (valid_index) {
167    Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
168    madr._rspec = rspec;
169    return madr;
170  } else {
171    Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
172    madr._rspec = rspec;
173    return madr;
174  }
175}
176
177// Implementation of Assembler
178
179int AbstractAssembler::code_fill_byte() {
180  return (u_char)'\xF4'; // hlt
181}
182
183// make this go away someday
184void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
185  if (rtype == relocInfo::none)
186        emit_int32(data);
187  else  emit_data(data, Relocation::spec_simple(rtype), format);
188}
189
190void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
191  assert(imm_operand == 0, "default format must be immediate in this file");
192  assert(inst_mark() != NULL, "must be inside InstructionMark");
193  if (rspec.type() !=  relocInfo::none) {
194    #ifdef ASSERT
195      check_relocation(rspec, format);
196    #endif
197    // Do not use AbstractAssembler::relocate, which is not intended for
198    // embedded words.  Instead, relocate to the enclosing instruction.
199
200    // hack. call32 is too wide for mask so use disp32
201    if (format == call32_operand)
202      code_section()->relocate(inst_mark(), rspec, disp32_operand);
203    else
204      code_section()->relocate(inst_mark(), rspec, format);
205  }
206  emit_int32(data);
207}
208
209static int encode(Register r) {
210  int enc = r->encoding();
211  if (enc >= 8) {
212    enc -= 8;
213  }
214  return enc;
215}
216
217void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
218  assert(dst->has_byte_register(), "must have byte register");
219  assert(isByte(op1) && isByte(op2), "wrong opcode");
220  assert(isByte(imm8), "not a byte");
221  assert((op1 & 0x01) == 0, "should be 8bit operation");
222  emit_int8(op1);
223  emit_int8(op2 | encode(dst));
224  emit_int8(imm8);
225}
226
227
228void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
229  assert(isByte(op1) && isByte(op2), "wrong opcode");
230  assert((op1 & 0x01) == 1, "should be 32bit operation");
231  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
232  if (is8bit(imm32)) {
233    emit_int8(op1 | 0x02); // set sign bit
234    emit_int8(op2 | encode(dst));
235    emit_int8(imm32 & 0xFF);
236  } else {
237    emit_int8(op1);
238    emit_int8(op2 | encode(dst));
239    emit_int32(imm32);
240  }
241}
242
243// Force generation of a 4 byte immediate value even if it fits into 8bit
244void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
245  assert(isByte(op1) && isByte(op2), "wrong opcode");
246  assert((op1 & 0x01) == 1, "should be 32bit operation");
247  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
248  emit_int8(op1);
249  emit_int8(op2 | encode(dst));
250  emit_int32(imm32);
251}
252
253// immediate-to-memory forms
254void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
255  assert((op1 & 0x01) == 1, "should be 32bit operation");
256  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
257  if (is8bit(imm32)) {
258    emit_int8(op1 | 0x02); // set sign bit
259    emit_operand(rm, adr, 1);
260    emit_int8(imm32 & 0xFF);
261  } else {
262    emit_int8(op1);
263    emit_operand(rm, adr, 4);
264    emit_int32(imm32);
265  }
266}
267
268
269void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
270  assert(isByte(op1) && isByte(op2), "wrong opcode");
271  emit_int8(op1);
272  emit_int8(op2 | encode(dst) << 3 | encode(src));
273}
274
275
276void Assembler::emit_operand(Register reg, Register base, Register index,
277                             Address::ScaleFactor scale, int disp,
278                             RelocationHolder const& rspec,
279                             int rip_relative_correction) {
280  relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
281
282  // Encode the registers as needed in the fields they are used in
283
284  int regenc = encode(reg) << 3;
285  int indexenc = index->is_valid() ? encode(index) << 3 : 0;
286  int baseenc = base->is_valid() ? encode(base) : 0;
287
288  if (base->is_valid()) {
289    if (index->is_valid()) {
290      assert(scale != Address::no_scale, "inconsistent address");
291      // [base + index*scale + disp]
292      if (disp == 0 && rtype == relocInfo::none  &&
293          base != rbp LP64_ONLY(&& base != r13)) {
294        // [base + index*scale]
295        // [00 reg 100][ss index base]
296        assert(index != rsp, "illegal addressing mode");
297        emit_int8(0x04 | regenc);
298        emit_int8(scale << 6 | indexenc | baseenc);
299      } else if (is8bit(disp) && rtype == relocInfo::none) {
300        // [base + index*scale + imm8]
301        // [01 reg 100][ss index base] imm8
302        assert(index != rsp, "illegal addressing mode");
303        emit_int8(0x44 | regenc);
304        emit_int8(scale << 6 | indexenc | baseenc);
305        emit_int8(disp & 0xFF);
306      } else {
307        // [base + index*scale + disp32]
308        // [10 reg 100][ss index base] disp32
309        assert(index != rsp, "illegal addressing mode");
310        emit_int8(0x84 | regenc);
311        emit_int8(scale << 6 | indexenc | baseenc);
312        emit_data(disp, rspec, disp32_operand);
313      }
314    } else if (base == rsp LP64_ONLY(|| base == r12)) {
315      // [rsp + disp]
316      if (disp == 0 && rtype == relocInfo::none) {
317        // [rsp]
318        // [00 reg 100][00 100 100]
319        emit_int8(0x04 | regenc);
320        emit_int8(0x24);
321      } else if (is8bit(disp) && rtype == relocInfo::none) {
322        // [rsp + imm8]
323        // [01 reg 100][00 100 100] disp8
324        emit_int8(0x44 | regenc);
325        emit_int8(0x24);
326        emit_int8(disp & 0xFF);
327      } else {
328        // [rsp + imm32]
329        // [10 reg 100][00 100 100] disp32
330        emit_int8(0x84 | regenc);
331        emit_int8(0x24);
332        emit_data(disp, rspec, disp32_operand);
333      }
334    } else {
335      // [base + disp]
336      assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
337      if (disp == 0 && rtype == relocInfo::none &&
338          base != rbp LP64_ONLY(&& base != r13)) {
339        // [base]
340        // [00 reg base]
341        emit_int8(0x00 | regenc | baseenc);
342      } else if (is8bit(disp) && rtype == relocInfo::none) {
343        // [base + disp8]
344        // [01 reg base] disp8
345        emit_int8(0x40 | regenc | baseenc);
346        emit_int8(disp & 0xFF);
347      } else {
348        // [base + disp32]
349        // [10 reg base] disp32
350        emit_int8(0x80 | regenc | baseenc);
351        emit_data(disp, rspec, disp32_operand);
352      }
353    }
354  } else {
355    if (index->is_valid()) {
356      assert(scale != Address::no_scale, "inconsistent address");
357      // [index*scale + disp]
358      // [00 reg 100][ss index 101] disp32
359      assert(index != rsp, "illegal addressing mode");
360      emit_int8(0x04 | regenc);
361      emit_int8(scale << 6 | indexenc | 0x05);
362      emit_data(disp, rspec, disp32_operand);
363    } else if (rtype != relocInfo::none ) {
364      // [disp] (64bit) RIP-RELATIVE (32bit) abs
365      // [00 000 101] disp32
366
367      emit_int8(0x05 | regenc);
368      // Note that the RIP-rel. correction applies to the generated
369      // disp field, but _not_ to the target address in the rspec.
370
371      // disp was created by converting the target address minus the pc
372      // at the start of the instruction. That needs more correction here.
373      // intptr_t disp = target - next_ip;
374      assert(inst_mark() != NULL, "must be inside InstructionMark");
375      address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
376      int64_t adjusted = disp;
377      // Do rip-rel adjustment for 64bit
378      LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
379      assert(is_simm32(adjusted),
380             "must be 32bit offset (RIP relative address)");
381      emit_data((int32_t) adjusted, rspec, disp32_operand);
382
383    } else {
384      // 32bit never did this, did everything as the rip-rel/disp code above
385      // [disp] ABSOLUTE
386      // [00 reg 100][00 100 101] disp32
387      emit_int8(0x04 | regenc);
388      emit_int8(0x25);
389      emit_data(disp, rspec, disp32_operand);
390    }
391  }
392}
393
394void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
395                             Address::ScaleFactor scale, int disp,
396                             RelocationHolder const& rspec) {
397  emit_operand((Register)reg, base, index, scale, disp, rspec);
398}
399
400// Secret local extension to Assembler::WhichOperand:
401#define end_pc_operand (_WhichOperand_limit)
402
403address Assembler::locate_operand(address inst, WhichOperand which) {
404  // Decode the given instruction, and return the address of
405  // an embedded 32-bit operand word.
406
407  // If "which" is disp32_operand, selects the displacement portion
408  // of an effective address specifier.
409  // If "which" is imm64_operand, selects the trailing immediate constant.
410  // If "which" is call32_operand, selects the displacement of a call or jump.
411  // Caller is responsible for ensuring that there is such an operand,
412  // and that it is 32/64 bits wide.
413
414  // If "which" is end_pc_operand, find the end of the instruction.
415
416  address ip = inst;
417  bool is_64bit = false;
418
419  debug_only(bool has_disp32 = false);
420  int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
421
422  again_after_prefix:
423  switch (0xFF & *ip++) {
424
425  // These convenience macros generate groups of "case" labels for the switch.
426#define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
427#define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
428             case (x)+4: case (x)+5: case (x)+6: case (x)+7
429#define REP16(x) REP8((x)+0): \
430              case REP8((x)+8)
431
432  case CS_segment:
433  case SS_segment:
434  case DS_segment:
435  case ES_segment:
436  case FS_segment:
437  case GS_segment:
438    // Seems dubious
439    LP64_ONLY(assert(false, "shouldn't have that prefix"));
440    assert(ip == inst+1, "only one prefix allowed");
441    goto again_after_prefix;
442
443  case 0x67:
444  case REX:
445  case REX_B:
446  case REX_X:
447  case REX_XB:
448  case REX_R:
449  case REX_RB:
450  case REX_RX:
451  case REX_RXB:
452    NOT_LP64(assert(false, "64bit prefixes"));
453    goto again_after_prefix;
454
455  case REX_W:
456  case REX_WB:
457  case REX_WX:
458  case REX_WXB:
459  case REX_WR:
460  case REX_WRB:
461  case REX_WRX:
462  case REX_WRXB:
463    NOT_LP64(assert(false, "64bit prefixes"));
464    is_64bit = true;
465    goto again_after_prefix;
466
467  case 0xFF: // pushq a; decl a; incl a; call a; jmp a
468  case 0x88: // movb a, r
469  case 0x89: // movl a, r
470  case 0x8A: // movb r, a
471  case 0x8B: // movl r, a
472  case 0x8F: // popl a
473    debug_only(has_disp32 = true);
474    break;
475
476  case 0x68: // pushq #32
477    if (which == end_pc_operand) {
478      return ip + 4;
479    }
480    assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
481    return ip;                  // not produced by emit_operand
482
483  case 0x66: // movw ... (size prefix)
484    again_after_size_prefix2:
485    switch (0xFF & *ip++) {
486    case REX:
487    case REX_B:
488    case REX_X:
489    case REX_XB:
490    case REX_R:
491    case REX_RB:
492    case REX_RX:
493    case REX_RXB:
494    case REX_W:
495    case REX_WB:
496    case REX_WX:
497    case REX_WXB:
498    case REX_WR:
499    case REX_WRB:
500    case REX_WRX:
501    case REX_WRXB:
502      NOT_LP64(assert(false, "64bit prefix found"));
503      goto again_after_size_prefix2;
504    case 0x8B: // movw r, a
505    case 0x89: // movw a, r
506      debug_only(has_disp32 = true);
507      break;
508    case 0xC7: // movw a, #16
509      debug_only(has_disp32 = true);
510      tail_size = 2;  // the imm16
511      break;
512    case 0x0F: // several SSE/SSE2 variants
513      ip--;    // reparse the 0x0F
514      goto again_after_prefix;
515    default:
516      ShouldNotReachHere();
517    }
518    break;
519
520  case REP8(0xB8): // movl/q r, #32/#64(oop?)
521    if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
522    // these asserts are somewhat nonsensical
523#ifndef _LP64
524    assert(which == imm_operand || which == disp32_operand,
525           err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)));
526#else
527    assert((which == call32_operand || which == imm_operand) && is_64bit ||
528           which == narrow_oop_operand && !is_64bit,
529           err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)));
530#endif // _LP64
531    return ip;
532
533  case 0x69: // imul r, a, #32
534  case 0xC7: // movl a, #32(oop?)
535    tail_size = 4;
536    debug_only(has_disp32 = true); // has both kinds of operands!
537    break;
538
539  case 0x0F: // movx..., etc.
540    switch (0xFF & *ip++) {
541    case 0x3A: // pcmpestri
542      tail_size = 1;
543    case 0x38: // ptest, pmovzxbw
544      ip++; // skip opcode
545      debug_only(has_disp32 = true); // has both kinds of operands!
546      break;
547
548    case 0x70: // pshufd r, r/a, #8
549      debug_only(has_disp32 = true); // has both kinds of operands!
550    case 0x73: // psrldq r, #8
551      tail_size = 1;
552      break;
553
554    case 0x12: // movlps
555    case 0x28: // movaps
556    case 0x2E: // ucomiss
557    case 0x2F: // comiss
558    case 0x54: // andps
559    case 0x55: // andnps
560    case 0x56: // orps
561    case 0x57: // xorps
562    case 0x6E: // movd
563    case 0x7E: // movd
564    case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
565      debug_only(has_disp32 = true);
566      break;
567
568    case 0xAD: // shrd r, a, %cl
569    case 0xAF: // imul r, a
570    case 0xBE: // movsbl r, a (movsxb)
571    case 0xBF: // movswl r, a (movsxw)
572    case 0xB6: // movzbl r, a (movzxb)
573    case 0xB7: // movzwl r, a (movzxw)
574    case REP16(0x40): // cmovl cc, r, a
575    case 0xB0: // cmpxchgb
576    case 0xB1: // cmpxchg
577    case 0xC1: // xaddl
578    case 0xC7: // cmpxchg8
579    case REP16(0x90): // setcc a
580      debug_only(has_disp32 = true);
581      // fall out of the switch to decode the address
582      break;
583
584    case 0xC4: // pinsrw r, a, #8
585      debug_only(has_disp32 = true);
586    case 0xC5: // pextrw r, r, #8
587      tail_size = 1;  // the imm8
588      break;
589
590    case 0xAC: // shrd r, a, #8
591      debug_only(has_disp32 = true);
592      tail_size = 1;  // the imm8
593      break;
594
595    case REP16(0x80): // jcc rdisp32
596      if (which == end_pc_operand)  return ip + 4;
597      assert(which == call32_operand, "jcc has no disp32 or imm");
598      return ip;
599    default:
600      ShouldNotReachHere();
601    }
602    break;
603
604  case 0x81: // addl a, #32; addl r, #32
605    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
606    // on 32bit in the case of cmpl, the imm might be an oop
607    tail_size = 4;
608    debug_only(has_disp32 = true); // has both kinds of operands!
609    break;
610
611  case 0x83: // addl a, #8; addl r, #8
612    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
613    debug_only(has_disp32 = true); // has both kinds of operands!
614    tail_size = 1;
615    break;
616
617  case 0x9B:
618    switch (0xFF & *ip++) {
619    case 0xD9: // fnstcw a
620      debug_only(has_disp32 = true);
621      break;
622    default:
623      ShouldNotReachHere();
624    }
625    break;
626
627  case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
628  case REP4(0x10): // adc...
629  case REP4(0x20): // and...
630  case REP4(0x30): // xor...
631  case REP4(0x08): // or...
632  case REP4(0x18): // sbb...
633  case REP4(0x28): // sub...
634  case 0xF7: // mull a
635  case 0x8D: // lea r, a
636  case 0x87: // xchg r, a
637  case REP4(0x38): // cmp...
638  case 0x85: // test r, a
639    debug_only(has_disp32 = true); // has both kinds of operands!
640    break;
641
642  case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
643  case 0xC6: // movb a, #8
644  case 0x80: // cmpb a, #8
645  case 0x6B: // imul r, a, #8
646    debug_only(has_disp32 = true); // has both kinds of operands!
647    tail_size = 1; // the imm8
648    break;
649
650  case 0xC4: // VEX_3bytes
651  case 0xC5: // VEX_2bytes
652    assert((UseAVX > 0), "shouldn't have VEX prefix");
653    assert(ip == inst+1, "no prefixes allowed");
654    // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
655    // but they have prefix 0x0F and processed when 0x0F processed above.
656    //
657    // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
658    // instructions (these instructions are not supported in 64-bit mode).
659    // To distinguish them bits [7:6] are set in the VEX second byte since
660    // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
661    // those VEX bits REX and vvvv bits are inverted.
662    //
663    // Fortunately C2 doesn't generate these instructions so we don't need
664    // to check for them in product version.
665
666    // Check second byte
667    NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
668
669    // First byte
670    if ((0xFF & *inst) == VEX_3bytes) {
671      ip++; // third byte
672      is_64bit = ((VEX_W & *ip) == VEX_W);
673    }
674    ip++; // opcode
675    // To find the end of instruction (which == end_pc_operand).
676    switch (0xFF & *ip) {
677    case 0x61: // pcmpestri r, r/a, #8
678    case 0x70: // pshufd r, r/a, #8
679    case 0x73: // psrldq r, #8
680      tail_size = 1;  // the imm8
681      break;
682    default:
683      break;
684    }
685    ip++; // skip opcode
686    debug_only(has_disp32 = true); // has both kinds of operands!
687    break;
688
689  case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
690  case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
691  case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
692  case 0xDD: // fld_d a; fst_d a; fstp_d a
693  case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
694  case 0xDF: // fild_d a; fistp_d a
695  case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
696  case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
697  case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
698    debug_only(has_disp32 = true);
699    break;
700
701  case 0xE8: // call rdisp32
702  case 0xE9: // jmp  rdisp32
703    if (which == end_pc_operand)  return ip + 4;
704    assert(which == call32_operand, "call has no disp32 or imm");
705    return ip;
706
707  case 0xF0:                    // Lock
708    assert(os::is_MP(), "only on MP");
709    goto again_after_prefix;
710
711  case 0xF3:                    // For SSE
712  case 0xF2:                    // For SSE2
713    switch (0xFF & *ip++) {
714    case REX:
715    case REX_B:
716    case REX_X:
717    case REX_XB:
718    case REX_R:
719    case REX_RB:
720    case REX_RX:
721    case REX_RXB:
722    case REX_W:
723    case REX_WB:
724    case REX_WX:
725    case REX_WXB:
726    case REX_WR:
727    case REX_WRB:
728    case REX_WRX:
729    case REX_WRXB:
730      NOT_LP64(assert(false, "found 64bit prefix"));
731      ip++;
732    default:
733      ip++;
734    }
735    debug_only(has_disp32 = true); // has both kinds of operands!
736    break;
737
738  default:
739    ShouldNotReachHere();
740
741#undef REP8
742#undef REP16
743  }
744
745  assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
746#ifdef _LP64
747  assert(which != imm_operand, "instruction is not a movq reg, imm64");
748#else
749  // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
750  assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
751#endif // LP64
752  assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
753
754  // parse the output of emit_operand
755  int op2 = 0xFF & *ip++;
756  int base = op2 & 0x07;
757  int op3 = -1;
758  const int b100 = 4;
759  const int b101 = 5;
760  if (base == b100 && (op2 >> 6) != 3) {
761    op3 = 0xFF & *ip++;
762    base = op3 & 0x07;   // refetch the base
763  }
764  // now ip points at the disp (if any)
765
766  switch (op2 >> 6) {
767  case 0:
768    // [00 reg  100][ss index base]
769    // [00 reg  100][00   100  esp]
770    // [00 reg base]
771    // [00 reg  100][ss index  101][disp32]
772    // [00 reg  101]               [disp32]
773
774    if (base == b101) {
775      if (which == disp32_operand)
776        return ip;              // caller wants the disp32
777      ip += 4;                  // skip the disp32
778    }
779    break;
780
781  case 1:
782    // [01 reg  100][ss index base][disp8]
783    // [01 reg  100][00   100  esp][disp8]
784    // [01 reg base]               [disp8]
785    ip += 1;                    // skip the disp8
786    break;
787
788  case 2:
789    // [10 reg  100][ss index base][disp32]
790    // [10 reg  100][00   100  esp][disp32]
791    // [10 reg base]               [disp32]
792    if (which == disp32_operand)
793      return ip;                // caller wants the disp32
794    ip += 4;                    // skip the disp32
795    break;
796
797  case 3:
798    // [11 reg base]  (not a memory addressing mode)
799    break;
800  }
801
802  if (which == end_pc_operand) {
803    return ip + tail_size;
804  }
805
806#ifdef _LP64
807  assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
808#else
809  assert(which == imm_operand, "instruction has only an imm field");
810#endif // LP64
811  return ip;
812}
813
814address Assembler::locate_next_instruction(address inst) {
815  // Secretly share code with locate_operand:
816  return locate_operand(inst, end_pc_operand);
817}
818
819
820#ifdef ASSERT
821void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
822  address inst = inst_mark();
823  assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
824  address opnd;
825
826  Relocation* r = rspec.reloc();
827  if (r->type() == relocInfo::none) {
828    return;
829  } else if (r->is_call() || format == call32_operand) {
830    // assert(format == imm32_operand, "cannot specify a nonzero format");
831    opnd = locate_operand(inst, call32_operand);
832  } else if (r->is_data()) {
833    assert(format == imm_operand || format == disp32_operand
834           LP64_ONLY(|| format == narrow_oop_operand), "format ok");
835    opnd = locate_operand(inst, (WhichOperand)format);
836  } else {
837    assert(format == imm_operand, "cannot specify a format");
838    return;
839  }
840  assert(opnd == pc(), "must put operand where relocs can find it");
841}
842#endif // ASSERT
843
844void Assembler::emit_operand32(Register reg, Address adr) {
845  assert(reg->encoding() < 8, "no extended registers");
846  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
847  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
848               adr._rspec);
849}
850
851void Assembler::emit_operand(Register reg, Address adr,
852                             int rip_relative_correction) {
853  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
854               adr._rspec,
855               rip_relative_correction);
856}
857
858void Assembler::emit_operand(XMMRegister reg, Address adr) {
859  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
860               adr._rspec);
861}
862
863// MMX operations
864void Assembler::emit_operand(MMXRegister reg, Address adr) {
865  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
866  emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
867}
868
869// work around gcc (3.2.1-7a) bug
870void Assembler::emit_operand(Address adr, MMXRegister reg) {
871  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
872  emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
873}
874
875
876void Assembler::emit_farith(int b1, int b2, int i) {
877  assert(isByte(b1) && isByte(b2), "wrong opcode");
878  assert(0 <= i &&  i < 8, "illegal stack offset");
879  emit_int8(b1);
880  emit_int8(b2 + i);
881}
882
883
884// Now the Assembler instructions (identical for 32/64 bits)
885
886void Assembler::adcl(Address dst, int32_t imm32) {
887  InstructionMark im(this);
888  prefix(dst);
889  emit_arith_operand(0x81, rdx, dst, imm32);
890}
891
892void Assembler::adcl(Address dst, Register src) {
893  InstructionMark im(this);
894  prefix(dst, src);
895  emit_int8(0x11);
896  emit_operand(src, dst);
897}
898
899void Assembler::adcl(Register dst, int32_t imm32) {
900  prefix(dst);
901  emit_arith(0x81, 0xD0, dst, imm32);
902}
903
904void Assembler::adcl(Register dst, Address src) {
905  InstructionMark im(this);
906  prefix(src, dst);
907  emit_int8(0x13);
908  emit_operand(dst, src);
909}
910
911void Assembler::adcl(Register dst, Register src) {
912  (void) prefix_and_encode(dst->encoding(), src->encoding());
913  emit_arith(0x13, 0xC0, dst, src);
914}
915
916void Assembler::addl(Address dst, int32_t imm32) {
917  InstructionMark im(this);
918  prefix(dst);
919  emit_arith_operand(0x81, rax, dst, imm32);
920}
921
922void Assembler::addl(Address dst, Register src) {
923  InstructionMark im(this);
924  prefix(dst, src);
925  emit_int8(0x01);
926  emit_operand(src, dst);
927}
928
929void Assembler::addl(Register dst, int32_t imm32) {
930  prefix(dst);
931  emit_arith(0x81, 0xC0, dst, imm32);
932}
933
934void Assembler::addl(Register dst, Address src) {
935  InstructionMark im(this);
936  prefix(src, dst);
937  emit_int8(0x03);
938  emit_operand(dst, src);
939}
940
941void Assembler::addl(Register dst, Register src) {
942  (void) prefix_and_encode(dst->encoding(), src->encoding());
943  emit_arith(0x03, 0xC0, dst, src);
944}
945
946void Assembler::addr_nop_4() {
947  assert(UseAddressNop, "no CPU support");
948  // 4 bytes: NOP DWORD PTR [EAX+0]
949  emit_int8(0x0F);
950  emit_int8(0x1F);
951  emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
952  emit_int8(0);    // 8-bits offset (1 byte)
953}
954
955void Assembler::addr_nop_5() {
956  assert(UseAddressNop, "no CPU support");
957  // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
958  emit_int8(0x0F);
959  emit_int8(0x1F);
960  emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
961  emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
962  emit_int8(0);    // 8-bits offset (1 byte)
963}
964
965void Assembler::addr_nop_7() {
966  assert(UseAddressNop, "no CPU support");
967  // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
968  emit_int8(0x0F);
969  emit_int8(0x1F);
970  emit_int8((unsigned char)0x80);
971                   // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
972  emit_int32(0);   // 32-bits offset (4 bytes)
973}
974
975void Assembler::addr_nop_8() {
976  assert(UseAddressNop, "no CPU support");
977  // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
978  emit_int8(0x0F);
979  emit_int8(0x1F);
980  emit_int8((unsigned char)0x84);
981                   // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
982  emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
983  emit_int32(0);   // 32-bits offset (4 bytes)
984}
985
986void Assembler::addsd(XMMRegister dst, XMMRegister src) {
987  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
988  emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
989}
990
991void Assembler::addsd(XMMRegister dst, Address src) {
992  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
993  emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
994}
995
996void Assembler::addss(XMMRegister dst, XMMRegister src) {
997  NOT_LP64(assert(VM_Version::supports_sse(), ""));
998  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
999}
1000
1001void Assembler::addss(XMMRegister dst, Address src) {
1002  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1003  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1004}
1005
1006void Assembler::aesdec(XMMRegister dst, Address src) {
1007  assert(VM_Version::supports_aes(), "");
1008  InstructionMark im(this);
1009  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1010  emit_int8((unsigned char)0xDE);
1011  emit_operand(dst, src);
1012}
1013
1014void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1015  assert(VM_Version::supports_aes(), "");
1016  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1017  emit_int8((unsigned char)0xDE);
1018  emit_int8(0xC0 | encode);
1019}
1020
1021void Assembler::aesdeclast(XMMRegister dst, Address src) {
1022  assert(VM_Version::supports_aes(), "");
1023  InstructionMark im(this);
1024  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1025  emit_int8((unsigned char)0xDF);
1026  emit_operand(dst, src);
1027}
1028
1029void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1030  assert(VM_Version::supports_aes(), "");
1031  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1032  emit_int8((unsigned char)0xDF);
1033  emit_int8((unsigned char)(0xC0 | encode));
1034}
1035
1036void Assembler::aesenc(XMMRegister dst, Address src) {
1037  assert(VM_Version::supports_aes(), "");
1038  InstructionMark im(this);
1039  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1040  emit_int8((unsigned char)0xDC);
1041  emit_operand(dst, src);
1042}
1043
1044void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1045  assert(VM_Version::supports_aes(), "");
1046  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1047  emit_int8((unsigned char)0xDC);
1048  emit_int8(0xC0 | encode);
1049}
1050
1051void Assembler::aesenclast(XMMRegister dst, Address src) {
1052  assert(VM_Version::supports_aes(), "");
1053  InstructionMark im(this);
1054  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1055  emit_int8((unsigned char)0xDD);
1056  emit_operand(dst, src);
1057}
1058
1059void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1060  assert(VM_Version::supports_aes(), "");
1061  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1062  emit_int8((unsigned char)0xDD);
1063  emit_int8((unsigned char)(0xC0 | encode));
1064}
1065
1066
1067void Assembler::andl(Address dst, int32_t imm32) {
1068  InstructionMark im(this);
1069  prefix(dst);
1070  emit_int8((unsigned char)0x81);
1071  emit_operand(rsp, dst, 4);
1072  emit_int32(imm32);
1073}
1074
1075void Assembler::andl(Register dst, int32_t imm32) {
1076  prefix(dst);
1077  emit_arith(0x81, 0xE0, dst, imm32);
1078}
1079
1080void Assembler::andl(Register dst, Address src) {
1081  InstructionMark im(this);
1082  prefix(src, dst);
1083  emit_int8(0x23);
1084  emit_operand(dst, src);
1085}
1086
1087void Assembler::andl(Register dst, Register src) {
1088  (void) prefix_and_encode(dst->encoding(), src->encoding());
1089  emit_arith(0x23, 0xC0, dst, src);
1090}
1091
1092void Assembler::andnl(Register dst, Register src1, Register src2) {
1093  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1094  int encode = vex_prefix_0F38_and_encode(dst, src1, src2);
1095  emit_int8((unsigned char)0xF2);
1096  emit_int8((unsigned char)(0xC0 | encode));
1097}
1098
1099void Assembler::andnl(Register dst, Register src1, Address src2) {
1100  InstructionMark im(this);
1101  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1102  vex_prefix_0F38(dst, src1, src2);
1103  emit_int8((unsigned char)0xF2);
1104  emit_operand(dst, src2);
1105}
1106
1107void Assembler::bsfl(Register dst, Register src) {
1108  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1109  emit_int8(0x0F);
1110  emit_int8((unsigned char)0xBC);
1111  emit_int8((unsigned char)(0xC0 | encode));
1112}
1113
1114void Assembler::bsrl(Register dst, Register src) {
1115  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1116  emit_int8(0x0F);
1117  emit_int8((unsigned char)0xBD);
1118  emit_int8((unsigned char)(0xC0 | encode));
1119}
1120
1121void Assembler::bswapl(Register reg) { // bswap
1122  int encode = prefix_and_encode(reg->encoding());
1123  emit_int8(0x0F);
1124  emit_int8((unsigned char)(0xC8 | encode));
1125}
1126
1127void Assembler::blsil(Register dst, Register src) {
1128  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1129  int encode = vex_prefix_0F38_and_encode(rbx, dst, src);
1130  emit_int8((unsigned char)0xF3);
1131  emit_int8((unsigned char)(0xC0 | encode));
1132}
1133
1134void Assembler::blsil(Register dst, Address src) {
1135  InstructionMark im(this);
1136  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1137  vex_prefix_0F38(rbx, dst, src);
1138  emit_int8((unsigned char)0xF3);
1139  emit_operand(rbx, src);
1140}
1141
1142void Assembler::blsmskl(Register dst, Register src) {
1143  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1144  int encode = vex_prefix_0F38_and_encode(rdx, dst, src);
1145  emit_int8((unsigned char)0xF3);
1146  emit_int8((unsigned char)(0xC0 | encode));
1147}
1148
1149void Assembler::blsmskl(Register dst, Address src) {
1150  InstructionMark im(this);
1151  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1152  vex_prefix_0F38(rdx, dst, src);
1153  emit_int8((unsigned char)0xF3);
1154  emit_operand(rdx, src);
1155}
1156
1157void Assembler::blsrl(Register dst, Register src) {
1158  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1159  int encode = vex_prefix_0F38_and_encode(rcx, dst, src);
1160  emit_int8((unsigned char)0xF3);
1161  emit_int8((unsigned char)(0xC0 | encode));
1162}
1163
1164void Assembler::blsrl(Register dst, Address src) {
1165  InstructionMark im(this);
1166  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1167  vex_prefix_0F38(rcx, dst, src);
1168  emit_int8((unsigned char)0xF3);
1169  emit_operand(rcx, src);
1170}
1171
1172void Assembler::call(Label& L, relocInfo::relocType rtype) {
1173  // suspect disp32 is always good
1174  int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1175
1176  if (L.is_bound()) {
1177    const int long_size = 5;
1178    int offs = (int)( target(L) - pc() );
1179    assert(offs <= 0, "assembler error");
1180    InstructionMark im(this);
1181    // 1110 1000 #32-bit disp
1182    emit_int8((unsigned char)0xE8);
1183    emit_data(offs - long_size, rtype, operand);
1184  } else {
1185    InstructionMark im(this);
1186    // 1110 1000 #32-bit disp
1187    L.add_patch_at(code(), locator());
1188
1189    emit_int8((unsigned char)0xE8);
1190    emit_data(int(0), rtype, operand);
1191  }
1192}
1193
1194void Assembler::call(Register dst) {
1195  int encode = prefix_and_encode(dst->encoding());
1196  emit_int8((unsigned char)0xFF);
1197  emit_int8((unsigned char)(0xD0 | encode));
1198}
1199
1200
1201void Assembler::call(Address adr) {
1202  InstructionMark im(this);
1203  prefix(adr);
1204  emit_int8((unsigned char)0xFF);
1205  emit_operand(rdx, adr);
1206}
1207
1208void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1209  assert(entry != NULL, "call most probably wrong");
1210  InstructionMark im(this);
1211  emit_int8((unsigned char)0xE8);
1212  intptr_t disp = entry - (pc() + sizeof(int32_t));
1213  assert(is_simm32(disp), "must be 32bit offset (call2)");
1214  // Technically, should use call32_operand, but this format is
1215  // implied by the fact that we're emitting a call instruction.
1216
1217  int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1218  emit_data((int) disp, rspec, operand);
1219}
1220
1221void Assembler::cdql() {
1222  emit_int8((unsigned char)0x99);
1223}
1224
1225void Assembler::cld() {
1226  emit_int8((unsigned char)0xFC);
1227}
1228
1229void Assembler::cmovl(Condition cc, Register dst, Register src) {
1230  NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1231  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1232  emit_int8(0x0F);
1233  emit_int8(0x40 | cc);
1234  emit_int8((unsigned char)(0xC0 | encode));
1235}
1236
1237
1238void Assembler::cmovl(Condition cc, Register dst, Address src) {
1239  NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1240  prefix(src, dst);
1241  emit_int8(0x0F);
1242  emit_int8(0x40 | cc);
1243  emit_operand(dst, src);
1244}
1245
1246void Assembler::cmpb(Address dst, int imm8) {
1247  InstructionMark im(this);
1248  prefix(dst);
1249  emit_int8((unsigned char)0x80);
1250  emit_operand(rdi, dst, 1);
1251  emit_int8(imm8);
1252}
1253
1254void Assembler::cmpl(Address dst, int32_t imm32) {
1255  InstructionMark im(this);
1256  prefix(dst);
1257  emit_int8((unsigned char)0x81);
1258  emit_operand(rdi, dst, 4);
1259  emit_int32(imm32);
1260}
1261
1262void Assembler::cmpl(Register dst, int32_t imm32) {
1263  prefix(dst);
1264  emit_arith(0x81, 0xF8, dst, imm32);
1265}
1266
1267void Assembler::cmpl(Register dst, Register src) {
1268  (void) prefix_and_encode(dst->encoding(), src->encoding());
1269  emit_arith(0x3B, 0xC0, dst, src);
1270}
1271
1272
1273void Assembler::cmpl(Register dst, Address  src) {
1274  InstructionMark im(this);
1275  prefix(src, dst);
1276  emit_int8((unsigned char)0x3B);
1277  emit_operand(dst, src);
1278}
1279
1280void Assembler::cmpw(Address dst, int imm16) {
1281  InstructionMark im(this);
1282  assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1283  emit_int8(0x66);
1284  emit_int8((unsigned char)0x81);
1285  emit_operand(rdi, dst, 2);
1286  emit_int16(imm16);
1287}
1288
1289// The 32-bit cmpxchg compares the value at adr with the contents of rax,
1290// and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1291// The ZF is set if the compared values were equal, and cleared otherwise.
1292void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1293  InstructionMark im(this);
1294  prefix(adr, reg);
1295  emit_int8(0x0F);
1296  emit_int8((unsigned char)0xB1);
1297  emit_operand(reg, adr);
1298}
1299
1300// The 8-bit cmpxchg compares the value at adr with the contents of rax,
1301// and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1302// The ZF is set if the compared values were equal, and cleared otherwise.
1303void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1304  InstructionMark im(this);
1305  prefix(adr, reg, true);
1306  emit_int8(0x0F);
1307  emit_int8((unsigned char)0xB0);
1308  emit_operand(reg, adr);
1309}
1310
1311void Assembler::comisd(XMMRegister dst, Address src) {
1312  // NOTE: dbx seems to decode this as comiss even though the
1313  // 0x66 is there. Strangly ucomisd comes out correct
1314  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1315  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1316}
1317
1318void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1319  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1320  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1321}
1322
1323void Assembler::comiss(XMMRegister dst, Address src) {
1324  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1325  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
1326}
1327
1328void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1329  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1330  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
1331}
1332
1333void Assembler::cpuid() {
1334  emit_int8(0x0F);
1335  emit_int8((unsigned char)0xA2);
1336}
1337
1338void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1339  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1340  emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
1341}
1342
1343void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1344  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1345  emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE);
1346}
1347
1348void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1349  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1350  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1351}
1352
1353void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1354  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1355  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1356}
1357
1358void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1359  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1360  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
1361  emit_int8(0x2A);
1362  emit_int8((unsigned char)(0xC0 | encode));
1363}
1364
1365void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1366  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1367  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
1368}
1369
1370void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1371  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1372  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
1373  emit_int8(0x2A);
1374  emit_int8((unsigned char)(0xC0 | encode));
1375}
1376
1377void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1378  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1379  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3);
1380}
1381
1382void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1383  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1384  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1385}
1386
1387void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1388  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1389  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1390}
1391
1392
1393void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1394  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1395  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
1396  emit_int8(0x2C);
1397  emit_int8((unsigned char)(0xC0 | encode));
1398}
1399
1400void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1401  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1402  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
1403  emit_int8(0x2C);
1404  emit_int8((unsigned char)(0xC0 | encode));
1405}
1406
1407void Assembler::decl(Address dst) {
1408  // Don't use it directly. Use MacroAssembler::decrement() instead.
1409  InstructionMark im(this);
1410  prefix(dst);
1411  emit_int8((unsigned char)0xFF);
1412  emit_operand(rcx, dst);
1413}
1414
1415void Assembler::divsd(XMMRegister dst, Address src) {
1416  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1417  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1418}
1419
1420void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1421  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1422  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1423}
1424
1425void Assembler::divss(XMMRegister dst, Address src) {
1426  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1427  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1428}
1429
1430void Assembler::divss(XMMRegister dst, XMMRegister src) {
1431  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1432  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1433}
1434
1435void Assembler::emms() {
1436  NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1437  emit_int8(0x0F);
1438  emit_int8(0x77);
1439}
1440
1441void Assembler::hlt() {
1442  emit_int8((unsigned char)0xF4);
1443}
1444
1445void Assembler::idivl(Register src) {
1446  int encode = prefix_and_encode(src->encoding());
1447  emit_int8((unsigned char)0xF7);
1448  emit_int8((unsigned char)(0xF8 | encode));
1449}
1450
1451void Assembler::divl(Register src) { // Unsigned
1452  int encode = prefix_and_encode(src->encoding());
1453  emit_int8((unsigned char)0xF7);
1454  emit_int8((unsigned char)(0xF0 | encode));
1455}
1456
1457void Assembler::imull(Register dst, Register src) {
1458  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1459  emit_int8(0x0F);
1460  emit_int8((unsigned char)0xAF);
1461  emit_int8((unsigned char)(0xC0 | encode));
1462}
1463
1464
1465void Assembler::imull(Register dst, Register src, int value) {
1466  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1467  if (is8bit(value)) {
1468    emit_int8(0x6B);
1469    emit_int8((unsigned char)(0xC0 | encode));
1470    emit_int8(value & 0xFF);
1471  } else {
1472    emit_int8(0x69);
1473    emit_int8((unsigned char)(0xC0 | encode));
1474    emit_int32(value);
1475  }
1476}
1477
1478void Assembler::imull(Register dst, Address src) {
1479  InstructionMark im(this);
1480  prefix(src, dst);
1481  emit_int8(0x0F);
1482  emit_int8((unsigned char) 0xAF);
1483  emit_operand(dst, src);
1484}
1485
1486
1487void Assembler::incl(Address dst) {
1488  // Don't use it directly. Use MacroAssembler::increment() instead.
1489  InstructionMark im(this);
1490  prefix(dst);
1491  emit_int8((unsigned char)0xFF);
1492  emit_operand(rax, dst);
1493}
1494
1495void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1496  InstructionMark im(this);
1497  assert((0 <= cc) && (cc < 16), "illegal cc");
1498  if (L.is_bound()) {
1499    address dst = target(L);
1500    assert(dst != NULL, "jcc most probably wrong");
1501
1502    const int short_size = 2;
1503    const int long_size = 6;
1504    intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1505    if (maybe_short && is8bit(offs - short_size)) {
1506      // 0111 tttn #8-bit disp
1507      emit_int8(0x70 | cc);
1508      emit_int8((offs - short_size) & 0xFF);
1509    } else {
1510      // 0000 1111 1000 tttn #32-bit disp
1511      assert(is_simm32(offs - long_size),
1512             "must be 32bit offset (call4)");
1513      emit_int8(0x0F);
1514      emit_int8((unsigned char)(0x80 | cc));
1515      emit_int32(offs - long_size);
1516    }
1517  } else {
1518    // Note: could eliminate cond. jumps to this jump if condition
1519    //       is the same however, seems to be rather unlikely case.
1520    // Note: use jccb() if label to be bound is very close to get
1521    //       an 8-bit displacement
1522    L.add_patch_at(code(), locator());
1523    emit_int8(0x0F);
1524    emit_int8((unsigned char)(0x80 | cc));
1525    emit_int32(0);
1526  }
1527}
1528
1529void Assembler::jccb(Condition cc, Label& L) {
1530  if (L.is_bound()) {
1531    const int short_size = 2;
1532    address entry = target(L);
1533#ifdef ASSERT
1534    intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
1535    intptr_t delta = short_branch_delta();
1536    if (delta != 0) {
1537      dist += (dist < 0 ? (-delta) :delta);
1538    }
1539    assert(is8bit(dist), "Dispacement too large for a short jmp");
1540#endif
1541    intptr_t offs = (intptr_t)entry - (intptr_t)pc();
1542    // 0111 tttn #8-bit disp
1543    emit_int8(0x70 | cc);
1544    emit_int8((offs - short_size) & 0xFF);
1545  } else {
1546    InstructionMark im(this);
1547    L.add_patch_at(code(), locator());
1548    emit_int8(0x70 | cc);
1549    emit_int8(0);
1550  }
1551}
1552
1553void Assembler::jmp(Address adr) {
1554  InstructionMark im(this);
1555  prefix(adr);
1556  emit_int8((unsigned char)0xFF);
1557  emit_operand(rsp, adr);
1558}
1559
1560void Assembler::jmp(Label& L, bool maybe_short) {
1561  if (L.is_bound()) {
1562    address entry = target(L);
1563    assert(entry != NULL, "jmp most probably wrong");
1564    InstructionMark im(this);
1565    const int short_size = 2;
1566    const int long_size = 5;
1567    intptr_t offs = entry - pc();
1568    if (maybe_short && is8bit(offs - short_size)) {
1569      emit_int8((unsigned char)0xEB);
1570      emit_int8((offs - short_size) & 0xFF);
1571    } else {
1572      emit_int8((unsigned char)0xE9);
1573      emit_int32(offs - long_size);
1574    }
1575  } else {
1576    // By default, forward jumps are always 32-bit displacements, since
1577    // we can't yet know where the label will be bound.  If you're sure that
1578    // the forward jump will not run beyond 256 bytes, use jmpb to
1579    // force an 8-bit displacement.
1580    InstructionMark im(this);
1581    L.add_patch_at(code(), locator());
1582    emit_int8((unsigned char)0xE9);
1583    emit_int32(0);
1584  }
1585}
1586
1587void Assembler::jmp(Register entry) {
1588  int encode = prefix_and_encode(entry->encoding());
1589  emit_int8((unsigned char)0xFF);
1590  emit_int8((unsigned char)(0xE0 | encode));
1591}
1592
1593void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1594  InstructionMark im(this);
1595  emit_int8((unsigned char)0xE9);
1596  assert(dest != NULL, "must have a target");
1597  intptr_t disp = dest - (pc() + sizeof(int32_t));
1598  assert(is_simm32(disp), "must be 32bit offset (jmp)");
1599  emit_data(disp, rspec.reloc(), call32_operand);
1600}
1601
1602void Assembler::jmpb(Label& L) {
1603  if (L.is_bound()) {
1604    const int short_size = 2;
1605    address entry = target(L);
1606    assert(entry != NULL, "jmp most probably wrong");
1607#ifdef ASSERT
1608    intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
1609    intptr_t delta = short_branch_delta();
1610    if (delta != 0) {
1611      dist += (dist < 0 ? (-delta) :delta);
1612    }
1613    assert(is8bit(dist), "Dispacement too large for a short jmp");
1614#endif
1615    intptr_t offs = entry - pc();
1616    emit_int8((unsigned char)0xEB);
1617    emit_int8((offs - short_size) & 0xFF);
1618  } else {
1619    InstructionMark im(this);
1620    L.add_patch_at(code(), locator());
1621    emit_int8((unsigned char)0xEB);
1622    emit_int8(0);
1623  }
1624}
1625
1626void Assembler::ldmxcsr( Address src) {
1627  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1628  InstructionMark im(this);
1629  prefix(src);
1630  emit_int8(0x0F);
1631  emit_int8((unsigned char)0xAE);
1632  emit_operand(as_Register(2), src);
1633}
1634
1635void Assembler::leal(Register dst, Address src) {
1636  InstructionMark im(this);
1637#ifdef _LP64
1638  emit_int8(0x67); // addr32
1639  prefix(src, dst);
1640#endif // LP64
1641  emit_int8((unsigned char)0x8D);
1642  emit_operand(dst, src);
1643}
1644
1645void Assembler::lfence() {
1646  emit_int8(0x0F);
1647  emit_int8((unsigned char)0xAE);
1648  emit_int8((unsigned char)0xE8);
1649}
1650
1651void Assembler::lock() {
1652  emit_int8((unsigned char)0xF0);
1653}
1654
1655void Assembler::lzcntl(Register dst, Register src) {
1656  assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
1657  emit_int8((unsigned char)0xF3);
1658  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1659  emit_int8(0x0F);
1660  emit_int8((unsigned char)0xBD);
1661  emit_int8((unsigned char)(0xC0 | encode));
1662}
1663
1664// Emit mfence instruction
1665void Assembler::mfence() {
1666  NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1667  emit_int8(0x0F);
1668  emit_int8((unsigned char)0xAE);
1669  emit_int8((unsigned char)0xF0);
1670}
1671
1672void Assembler::mov(Register dst, Register src) {
1673  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1674}
1675
1676void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1677  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1678  emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
1679}
1680
1681void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1682  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1683  emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
1684}
1685
1686void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
1687  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1688  int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE);
1689  emit_int8(0x16);
1690  emit_int8((unsigned char)(0xC0 | encode));
1691}
1692
1693void Assembler::movb(Register dst, Address src) {
1694  NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1695  InstructionMark im(this);
1696  prefix(src, dst, true);
1697  emit_int8((unsigned char)0x8A);
1698  emit_operand(dst, src);
1699}
1700
1701
1702void Assembler::movb(Address dst, int imm8) {
1703  InstructionMark im(this);
1704   prefix(dst);
1705  emit_int8((unsigned char)0xC6);
1706  emit_operand(rax, dst, 1);
1707  emit_int8(imm8);
1708}
1709
1710
1711void Assembler::movb(Address dst, Register src) {
1712  assert(src->has_byte_register(), "must have byte register");
1713  InstructionMark im(this);
1714  prefix(dst, src, true);
1715  emit_int8((unsigned char)0x88);
1716  emit_operand(src, dst);
1717}
1718
1719void Assembler::movdl(XMMRegister dst, Register src) {
1720  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1721  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
1722  emit_int8(0x6E);
1723  emit_int8((unsigned char)(0xC0 | encode));
1724}
1725
1726void Assembler::movdl(Register dst, XMMRegister src) {
1727  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1728  // swap src/dst to get correct prefix
1729  int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
1730  emit_int8(0x7E);
1731  emit_int8((unsigned char)(0xC0 | encode));
1732}
1733
1734void Assembler::movdl(XMMRegister dst, Address src) {
1735  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1736  InstructionMark im(this);
1737  simd_prefix(dst, src, VEX_SIMD_66);
1738  emit_int8(0x6E);
1739  emit_operand(dst, src);
1740}
1741
1742void Assembler::movdl(Address dst, XMMRegister src) {
1743  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1744  InstructionMark im(this);
1745  simd_prefix(dst, src, VEX_SIMD_66);
1746  emit_int8(0x7E);
1747  emit_operand(src, dst);
1748}
1749
1750void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1751  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1752  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
1753}
1754
1755void Assembler::movdqa(XMMRegister dst, Address src) {
1756  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1757  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
1758}
1759
1760void Assembler::movdqu(XMMRegister dst, Address src) {
1761  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1762  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
1763}
1764
1765void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1766  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1767  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
1768}
1769
1770void Assembler::movdqu(Address dst, XMMRegister src) {
1771  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1772  InstructionMark im(this);
1773  simd_prefix(dst, src, VEX_SIMD_F3);
1774  emit_int8(0x7F);
1775  emit_operand(src, dst);
1776}
1777
1778// Move Unaligned 256bit Vector
1779void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
1780  assert(UseAVX > 0, "");
1781  bool vector256 = true;
1782  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);
1783  emit_int8(0x6F);
1784  emit_int8((unsigned char)(0xC0 | encode));
1785}
1786
1787void Assembler::vmovdqu(XMMRegister dst, Address src) {
1788  assert(UseAVX > 0, "");
1789  InstructionMark im(this);
1790  bool vector256 = true;
1791  vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
1792  emit_int8(0x6F);
1793  emit_operand(dst, src);
1794}
1795
1796void Assembler::vmovdqu(Address dst, XMMRegister src) {
1797  assert(UseAVX > 0, "");
1798  InstructionMark im(this);
1799  bool vector256 = true;
1800  // swap src<->dst for encoding
1801  assert(src != xnoreg, "sanity");
1802  vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256);
1803  emit_int8(0x7F);
1804  emit_operand(src, dst);
1805}
1806
1807// Uses zero extension on 64bit
1808
1809void Assembler::movl(Register dst, int32_t imm32) {
1810  int encode = prefix_and_encode(dst->encoding());
1811  emit_int8((unsigned char)(0xB8 | encode));
1812  emit_int32(imm32);
1813}
1814
1815void Assembler::movl(Register dst, Register src) {
1816  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1817  emit_int8((unsigned char)0x8B);
1818  emit_int8((unsigned char)(0xC0 | encode));
1819}
1820
1821void Assembler::movl(Register dst, Address src) {
1822  InstructionMark im(this);
1823  prefix(src, dst);
1824  emit_int8((unsigned char)0x8B);
1825  emit_operand(dst, src);
1826}
1827
1828void Assembler::movl(Address dst, int32_t imm32) {
1829  InstructionMark im(this);
1830  prefix(dst);
1831  emit_int8((unsigned char)0xC7);
1832  emit_operand(rax, dst, 4);
1833  emit_int32(imm32);
1834}
1835
1836void Assembler::movl(Address dst, Register src) {
1837  InstructionMark im(this);
1838  prefix(dst, src);
1839  emit_int8((unsigned char)0x89);
1840  emit_operand(src, dst);
1841}
1842
1843// New cpus require to use movsd and movss to avoid partial register stall
1844// when loading from memory. But for old Opteron use movlpd instead of movsd.
1845// The selection is done in MacroAssembler::movdbl() and movflt().
1846void Assembler::movlpd(XMMRegister dst, Address src) {
1847  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1848  emit_simd_arith(0x12, dst, src, VEX_SIMD_66);
1849}
1850
1851void Assembler::movq( MMXRegister dst, Address src ) {
1852  assert( VM_Version::supports_mmx(), "" );
1853  emit_int8(0x0F);
1854  emit_int8(0x6F);
1855  emit_operand(dst, src);
1856}
1857
1858void Assembler::movq( Address dst, MMXRegister src ) {
1859  assert( VM_Version::supports_mmx(), "" );
1860  emit_int8(0x0F);
1861  emit_int8(0x7F);
1862  // workaround gcc (3.2.1-7a) bug
1863  // In that version of gcc with only an emit_operand(MMX, Address)
1864  // gcc will tail jump and try and reverse the parameters completely
1865  // obliterating dst in the process. By having a version available
1866  // that doesn't need to swap the args at the tail jump the bug is
1867  // avoided.
1868  emit_operand(dst, src);
1869}
1870
1871void Assembler::movq(XMMRegister dst, Address src) {
1872  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1873  InstructionMark im(this);
1874  simd_prefix(dst, src, VEX_SIMD_F3);
1875  emit_int8(0x7E);
1876  emit_operand(dst, src);
1877}
1878
1879void Assembler::movq(Address dst, XMMRegister src) {
1880  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1881  InstructionMark im(this);
1882  simd_prefix(dst, src, VEX_SIMD_66);
1883  emit_int8((unsigned char)0xD6);
1884  emit_operand(src, dst);
1885}
1886
1887void Assembler::movsbl(Register dst, Address src) { // movsxb
1888  InstructionMark im(this);
1889  prefix(src, dst);
1890  emit_int8(0x0F);
1891  emit_int8((unsigned char)0xBE);
1892  emit_operand(dst, src);
1893}
1894
1895void Assembler::movsbl(Register dst, Register src) { // movsxb
1896  NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1897  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1898  emit_int8(0x0F);
1899  emit_int8((unsigned char)0xBE);
1900  emit_int8((unsigned char)(0xC0 | encode));
1901}
1902
1903void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1904  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1905  emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
1906}
1907
1908void Assembler::movsd(XMMRegister dst, Address src) {
1909  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1910  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
1911}
1912
1913void Assembler::movsd(Address dst, XMMRegister src) {
1914  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1915  InstructionMark im(this);
1916  simd_prefix(dst, src, VEX_SIMD_F2);
1917  emit_int8(0x11);
1918  emit_operand(src, dst);
1919}
1920
1921void Assembler::movss(XMMRegister dst, XMMRegister src) {
1922  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1923  emit_simd_arith(0x10, dst, src, VEX_SIMD_F3);
1924}
1925
1926void Assembler::movss(XMMRegister dst, Address src) {
1927  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1928  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3);
1929}
1930
1931void Assembler::movss(Address dst, XMMRegister src) {
1932  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1933  InstructionMark im(this);
1934  simd_prefix(dst, src, VEX_SIMD_F3);
1935  emit_int8(0x11);
1936  emit_operand(src, dst);
1937}
1938
1939void Assembler::movswl(Register dst, Address src) { // movsxw
1940  InstructionMark im(this);
1941  prefix(src, dst);
1942  emit_int8(0x0F);
1943  emit_int8((unsigned char)0xBF);
1944  emit_operand(dst, src);
1945}
1946
1947void Assembler::movswl(Register dst, Register src) { // movsxw
1948  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1949  emit_int8(0x0F);
1950  emit_int8((unsigned char)0xBF);
1951  emit_int8((unsigned char)(0xC0 | encode));
1952}
1953
1954void Assembler::movw(Address dst, int imm16) {
1955  InstructionMark im(this);
1956
1957  emit_int8(0x66); // switch to 16-bit mode
1958  prefix(dst);
1959  emit_int8((unsigned char)0xC7);
1960  emit_operand(rax, dst, 2);
1961  emit_int16(imm16);
1962}
1963
1964void Assembler::movw(Register dst, Address src) {
1965  InstructionMark im(this);
1966  emit_int8(0x66);
1967  prefix(src, dst);
1968  emit_int8((unsigned char)0x8B);
1969  emit_operand(dst, src);
1970}
1971
1972void Assembler::movw(Address dst, Register src) {
1973  InstructionMark im(this);
1974  emit_int8(0x66);
1975  prefix(dst, src);
1976  emit_int8((unsigned char)0x89);
1977  emit_operand(src, dst);
1978}
1979
1980void Assembler::movzbl(Register dst, Address src) { // movzxb
1981  InstructionMark im(this);
1982  prefix(src, dst);
1983  emit_int8(0x0F);
1984  emit_int8((unsigned char)0xB6);
1985  emit_operand(dst, src);
1986}
1987
1988void Assembler::movzbl(Register dst, Register src) { // movzxb
1989  NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1990  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1991  emit_int8(0x0F);
1992  emit_int8((unsigned char)0xB6);
1993  emit_int8(0xC0 | encode);
1994}
1995
1996void Assembler::movzwl(Register dst, Address src) { // movzxw
1997  InstructionMark im(this);
1998  prefix(src, dst);
1999  emit_int8(0x0F);
2000  emit_int8((unsigned char)0xB7);
2001  emit_operand(dst, src);
2002}
2003
2004void Assembler::movzwl(Register dst, Register src) { // movzxw
2005  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2006  emit_int8(0x0F);
2007  emit_int8((unsigned char)0xB7);
2008  emit_int8(0xC0 | encode);
2009}
2010
2011void Assembler::mull(Address src) {
2012  InstructionMark im(this);
2013  prefix(src);
2014  emit_int8((unsigned char)0xF7);
2015  emit_operand(rsp, src);
2016}
2017
2018void Assembler::mull(Register src) {
2019  int encode = prefix_and_encode(src->encoding());
2020  emit_int8((unsigned char)0xF7);
2021  emit_int8((unsigned char)(0xE0 | encode));
2022}
2023
2024void Assembler::mulsd(XMMRegister dst, Address src) {
2025  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2026  emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2027}
2028
2029void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2030  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2031  emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2032}
2033
2034void Assembler::mulss(XMMRegister dst, Address src) {
2035  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2036  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2037}
2038
2039void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2040  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2041  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2042}
2043
2044void Assembler::negl(Register dst) {
2045  int encode = prefix_and_encode(dst->encoding());
2046  emit_int8((unsigned char)0xF7);
2047  emit_int8((unsigned char)(0xD8 | encode));
2048}
2049
2050void Assembler::nop(int i) {
2051#ifdef ASSERT
2052  assert(i > 0, " ");
2053  // The fancy nops aren't currently recognized by debuggers making it a
2054  // pain to disassemble code while debugging. If asserts are on clearly
2055  // speed is not an issue so simply use the single byte traditional nop
2056  // to do alignment.
2057
2058  for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
2059  return;
2060
2061#endif // ASSERT
2062
2063  if (UseAddressNop && VM_Version::is_intel()) {
2064    //
2065    // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2066    //  1: 0x90
2067    //  2: 0x66 0x90
2068    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2069    //  4: 0x0F 0x1F 0x40 0x00
2070    //  5: 0x0F 0x1F 0x44 0x00 0x00
2071    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2072    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2073    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2074    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2075    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2076    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2077
2078    // The rest coding is Intel specific - don't use consecutive address nops
2079
2080    // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2081    // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2082    // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2083    // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2084
2085    while(i >= 15) {
2086      // For Intel don't generate consecutive addess nops (mix with regular nops)
2087      i -= 15;
2088      emit_int8(0x66);   // size prefix
2089      emit_int8(0x66);   // size prefix
2090      emit_int8(0x66);   // size prefix
2091      addr_nop_8();
2092      emit_int8(0x66);   // size prefix
2093      emit_int8(0x66);   // size prefix
2094      emit_int8(0x66);   // size prefix
2095      emit_int8((unsigned char)0x90);
2096                         // nop
2097    }
2098    switch (i) {
2099      case 14:
2100        emit_int8(0x66); // size prefix
2101      case 13:
2102        emit_int8(0x66); // size prefix
2103      case 12:
2104        addr_nop_8();
2105        emit_int8(0x66); // size prefix
2106        emit_int8(0x66); // size prefix
2107        emit_int8(0x66); // size prefix
2108        emit_int8((unsigned char)0x90);
2109                         // nop
2110        break;
2111      case 11:
2112        emit_int8(0x66); // size prefix
2113      case 10:
2114        emit_int8(0x66); // size prefix
2115      case 9:
2116        emit_int8(0x66); // size prefix
2117      case 8:
2118        addr_nop_8();
2119        break;
2120      case 7:
2121        addr_nop_7();
2122        break;
2123      case 6:
2124        emit_int8(0x66); // size prefix
2125      case 5:
2126        addr_nop_5();
2127        break;
2128      case 4:
2129        addr_nop_4();
2130        break;
2131      case 3:
2132        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2133        emit_int8(0x66); // size prefix
2134      case 2:
2135        emit_int8(0x66); // size prefix
2136      case 1:
2137        emit_int8((unsigned char)0x90);
2138                         // nop
2139        break;
2140      default:
2141        assert(i == 0, " ");
2142    }
2143    return;
2144  }
2145  if (UseAddressNop && VM_Version::is_amd()) {
2146    //
2147    // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2148    //  1: 0x90
2149    //  2: 0x66 0x90
2150    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2151    //  4: 0x0F 0x1F 0x40 0x00
2152    //  5: 0x0F 0x1F 0x44 0x00 0x00
2153    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2154    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2155    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2156    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2157    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2158    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2159
2160    // The rest coding is AMD specific - use consecutive address nops
2161
2162    // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2163    // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2164    // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2165    // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2166    // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2167    //     Size prefixes (0x66) are added for larger sizes
2168
2169    while(i >= 22) {
2170      i -= 11;
2171      emit_int8(0x66); // size prefix
2172      emit_int8(0x66); // size prefix
2173      emit_int8(0x66); // size prefix
2174      addr_nop_8();
2175    }
2176    // Generate first nop for size between 21-12
2177    switch (i) {
2178      case 21:
2179        i -= 1;
2180        emit_int8(0x66); // size prefix
2181      case 20:
2182      case 19:
2183        i -= 1;
2184        emit_int8(0x66); // size prefix
2185      case 18:
2186      case 17:
2187        i -= 1;
2188        emit_int8(0x66); // size prefix
2189      case 16:
2190      case 15:
2191        i -= 8;
2192        addr_nop_8();
2193        break;
2194      case 14:
2195      case 13:
2196        i -= 7;
2197        addr_nop_7();
2198        break;
2199      case 12:
2200        i -= 6;
2201        emit_int8(0x66); // size prefix
2202        addr_nop_5();
2203        break;
2204      default:
2205        assert(i < 12, " ");
2206    }
2207
2208    // Generate second nop for size between 11-1
2209    switch (i) {
2210      case 11:
2211        emit_int8(0x66); // size prefix
2212      case 10:
2213        emit_int8(0x66); // size prefix
2214      case 9:
2215        emit_int8(0x66); // size prefix
2216      case 8:
2217        addr_nop_8();
2218        break;
2219      case 7:
2220        addr_nop_7();
2221        break;
2222      case 6:
2223        emit_int8(0x66); // size prefix
2224      case 5:
2225        addr_nop_5();
2226        break;
2227      case 4:
2228        addr_nop_4();
2229        break;
2230      case 3:
2231        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2232        emit_int8(0x66); // size prefix
2233      case 2:
2234        emit_int8(0x66); // size prefix
2235      case 1:
2236        emit_int8((unsigned char)0x90);
2237                         // nop
2238        break;
2239      default:
2240        assert(i == 0, " ");
2241    }
2242    return;
2243  }
2244
2245  // Using nops with size prefixes "0x66 0x90".
2246  // From AMD Optimization Guide:
2247  //  1: 0x90
2248  //  2: 0x66 0x90
2249  //  3: 0x66 0x66 0x90
2250  //  4: 0x66 0x66 0x66 0x90
2251  //  5: 0x66 0x66 0x90 0x66 0x90
2252  //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2253  //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2254  //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2255  //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2256  // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2257  //
2258  while(i > 12) {
2259    i -= 4;
2260    emit_int8(0x66); // size prefix
2261    emit_int8(0x66);
2262    emit_int8(0x66);
2263    emit_int8((unsigned char)0x90);
2264                     // nop
2265  }
2266  // 1 - 12 nops
2267  if(i > 8) {
2268    if(i > 9) {
2269      i -= 1;
2270      emit_int8(0x66);
2271    }
2272    i -= 3;
2273    emit_int8(0x66);
2274    emit_int8(0x66);
2275    emit_int8((unsigned char)0x90);
2276  }
2277  // 1 - 8 nops
2278  if(i > 4) {
2279    if(i > 6) {
2280      i -= 1;
2281      emit_int8(0x66);
2282    }
2283    i -= 3;
2284    emit_int8(0x66);
2285    emit_int8(0x66);
2286    emit_int8((unsigned char)0x90);
2287  }
2288  switch (i) {
2289    case 4:
2290      emit_int8(0x66);
2291    case 3:
2292      emit_int8(0x66);
2293    case 2:
2294      emit_int8(0x66);
2295    case 1:
2296      emit_int8((unsigned char)0x90);
2297      break;
2298    default:
2299      assert(i == 0, " ");
2300  }
2301}
2302
2303void Assembler::notl(Register dst) {
2304  int encode = prefix_and_encode(dst->encoding());
2305  emit_int8((unsigned char)0xF7);
2306  emit_int8((unsigned char)(0xD0 | encode));
2307}
2308
2309void Assembler::orl(Address dst, int32_t imm32) {
2310  InstructionMark im(this);
2311  prefix(dst);
2312  emit_arith_operand(0x81, rcx, dst, imm32);
2313}
2314
2315void Assembler::orl(Register dst, int32_t imm32) {
2316  prefix(dst);
2317  emit_arith(0x81, 0xC8, dst, imm32);
2318}
2319
2320void Assembler::orl(Register dst, Address src) {
2321  InstructionMark im(this);
2322  prefix(src, dst);
2323  emit_int8(0x0B);
2324  emit_operand(dst, src);
2325}
2326
2327void Assembler::orl(Register dst, Register src) {
2328  (void) prefix_and_encode(dst->encoding(), src->encoding());
2329  emit_arith(0x0B, 0xC0, dst, src);
2330}
2331
2332void Assembler::packuswb(XMMRegister dst, Address src) {
2333  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2334  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2335  emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
2336}
2337
2338void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
2339  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2340  emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
2341}
2342
2343void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2344  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
2345  emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector256);
2346}
2347
2348void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256) {
2349  assert(VM_Version::supports_avx2(), "");
2350  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector256);
2351  emit_int8(0x00);
2352  emit_int8(0xC0 | encode);
2353  emit_int8(imm8);
2354}
2355
2356void Assembler::pause() {
2357  emit_int8((unsigned char)0xF3);
2358  emit_int8((unsigned char)0x90);
2359}
2360
2361void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2362  assert(VM_Version::supports_sse4_2(), "");
2363  InstructionMark im(this);
2364  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
2365  emit_int8(0x61);
2366  emit_operand(dst, src);
2367  emit_int8(imm8);
2368}
2369
2370void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2371  assert(VM_Version::supports_sse4_2(), "");
2372  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
2373  emit_int8(0x61);
2374  emit_int8((unsigned char)(0xC0 | encode));
2375  emit_int8(imm8);
2376}
2377
2378void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
2379  assert(VM_Version::supports_sse4_1(), "");
2380  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
2381  emit_int8(0x16);
2382  emit_int8((unsigned char)(0xC0 | encode));
2383  emit_int8(imm8);
2384}
2385
2386void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
2387  assert(VM_Version::supports_sse4_1(), "");
2388  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
2389  emit_int8(0x16);
2390  emit_int8((unsigned char)(0xC0 | encode));
2391  emit_int8(imm8);
2392}
2393
2394void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
2395  assert(VM_Version::supports_sse4_1(), "");
2396  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
2397  emit_int8(0x22);
2398  emit_int8((unsigned char)(0xC0 | encode));
2399  emit_int8(imm8);
2400}
2401
2402void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
2403  assert(VM_Version::supports_sse4_1(), "");
2404  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
2405  emit_int8(0x22);
2406  emit_int8((unsigned char)(0xC0 | encode));
2407  emit_int8(imm8);
2408}
2409
2410void Assembler::pmovzxbw(XMMRegister dst, Address src) {
2411  assert(VM_Version::supports_sse4_1(), "");
2412  InstructionMark im(this);
2413  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2414  emit_int8(0x30);
2415  emit_operand(dst, src);
2416}
2417
2418void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
2419  assert(VM_Version::supports_sse4_1(), "");
2420  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2421  emit_int8(0x30);
2422  emit_int8((unsigned char)(0xC0 | encode));
2423}
2424
2425// generic
2426void Assembler::pop(Register dst) {
2427  int encode = prefix_and_encode(dst->encoding());
2428  emit_int8(0x58 | encode);
2429}
2430
2431void Assembler::popcntl(Register dst, Address src) {
2432  assert(VM_Version::supports_popcnt(), "must support");
2433  InstructionMark im(this);
2434  emit_int8((unsigned char)0xF3);
2435  prefix(src, dst);
2436  emit_int8(0x0F);
2437  emit_int8((unsigned char)0xB8);
2438  emit_operand(dst, src);
2439}
2440
2441void Assembler::popcntl(Register dst, Register src) {
2442  assert(VM_Version::supports_popcnt(), "must support");
2443  emit_int8((unsigned char)0xF3);
2444  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2445  emit_int8(0x0F);
2446  emit_int8((unsigned char)0xB8);
2447  emit_int8((unsigned char)(0xC0 | encode));
2448}
2449
2450void Assembler::popf() {
2451  emit_int8((unsigned char)0x9D);
2452}
2453
2454#ifndef _LP64 // no 32bit push/pop on amd64
2455void Assembler::popl(Address dst) {
2456  // NOTE: this will adjust stack by 8byte on 64bits
2457  InstructionMark im(this);
2458  prefix(dst);
2459  emit_int8((unsigned char)0x8F);
2460  emit_operand(rax, dst);
2461}
2462#endif
2463
2464void Assembler::prefetch_prefix(Address src) {
2465  prefix(src);
2466  emit_int8(0x0F);
2467}
2468
2469void Assembler::prefetchnta(Address src) {
2470  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2471  InstructionMark im(this);
2472  prefetch_prefix(src);
2473  emit_int8(0x18);
2474  emit_operand(rax, src); // 0, src
2475}
2476
2477void Assembler::prefetchr(Address src) {
2478  assert(VM_Version::supports_3dnow_prefetch(), "must support");
2479  InstructionMark im(this);
2480  prefetch_prefix(src);
2481  emit_int8(0x0D);
2482  emit_operand(rax, src); // 0, src
2483}
2484
2485void Assembler::prefetcht0(Address src) {
2486  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2487  InstructionMark im(this);
2488  prefetch_prefix(src);
2489  emit_int8(0x18);
2490  emit_operand(rcx, src); // 1, src
2491}
2492
2493void Assembler::prefetcht1(Address src) {
2494  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2495  InstructionMark im(this);
2496  prefetch_prefix(src);
2497  emit_int8(0x18);
2498  emit_operand(rdx, src); // 2, src
2499}
2500
2501void Assembler::prefetcht2(Address src) {
2502  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2503  InstructionMark im(this);
2504  prefetch_prefix(src);
2505  emit_int8(0x18);
2506  emit_operand(rbx, src); // 3, src
2507}
2508
2509void Assembler::prefetchw(Address src) {
2510  assert(VM_Version::supports_3dnow_prefetch(), "must support");
2511  InstructionMark im(this);
2512  prefetch_prefix(src);
2513  emit_int8(0x0D);
2514  emit_operand(rcx, src); // 1, src
2515}
2516
2517void Assembler::prefix(Prefix p) {
2518  emit_int8(p);
2519}
2520
2521void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
2522  assert(VM_Version::supports_ssse3(), "");
2523  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2524  emit_int8(0x00);
2525  emit_int8((unsigned char)(0xC0 | encode));
2526}
2527
2528void Assembler::pshufb(XMMRegister dst, Address src) {
2529  assert(VM_Version::supports_ssse3(), "");
2530  InstructionMark im(this);
2531  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2532  emit_int8(0x00);
2533  emit_operand(dst, src);
2534}
2535
2536void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2537  assert(isByte(mode), "invalid value");
2538  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2539  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
2540  emit_int8(mode & 0xFF);
2541
2542}
2543
2544void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2545  assert(isByte(mode), "invalid value");
2546  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2547  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2548  InstructionMark im(this);
2549  simd_prefix(dst, src, VEX_SIMD_66);
2550  emit_int8(0x70);
2551  emit_operand(dst, src);
2552  emit_int8(mode & 0xFF);
2553}
2554
2555void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2556  assert(isByte(mode), "invalid value");
2557  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2558  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
2559  emit_int8(mode & 0xFF);
2560}
2561
2562void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2563  assert(isByte(mode), "invalid value");
2564  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2565  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2566  InstructionMark im(this);
2567  simd_prefix(dst, src, VEX_SIMD_F2);
2568  emit_int8(0x70);
2569  emit_operand(dst, src);
2570  emit_int8(mode & 0xFF);
2571}
2572
2573void Assembler::psrldq(XMMRegister dst, int shift) {
2574  // Shift 128 bit value in xmm register by number of bytes.
2575  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2576  int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
2577  emit_int8(0x73);
2578  emit_int8((unsigned char)(0xC0 | encode));
2579  emit_int8(shift);
2580}
2581
2582void Assembler::ptest(XMMRegister dst, Address src) {
2583  assert(VM_Version::supports_sse4_1(), "");
2584  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2585  InstructionMark im(this);
2586  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2587  emit_int8(0x17);
2588  emit_operand(dst, src);
2589}
2590
2591void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2592  assert(VM_Version::supports_sse4_1(), "");
2593  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2594  emit_int8(0x17);
2595  emit_int8((unsigned char)(0xC0 | encode));
2596}
2597
2598void Assembler::vptest(XMMRegister dst, Address src) {
2599  assert(VM_Version::supports_avx(), "");
2600  InstructionMark im(this);
2601  bool vector256 = true;
2602  assert(dst != xnoreg, "sanity");
2603  int dst_enc = dst->encoding();
2604  // swap src<->dst for encoding
2605  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
2606  emit_int8(0x17);
2607  emit_operand(dst, src);
2608}
2609
2610void Assembler::vptest(XMMRegister dst, XMMRegister src) {
2611  assert(VM_Version::supports_avx(), "");
2612  bool vector256 = true;
2613  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
2614  emit_int8(0x17);
2615  emit_int8((unsigned char)(0xC0 | encode));
2616}
2617
2618void Assembler::punpcklbw(XMMRegister dst, Address src) {
2619  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2620  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2621  emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
2622}
2623
2624void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2625  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2626  emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
2627}
2628
2629void Assembler::punpckldq(XMMRegister dst, Address src) {
2630  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2631  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2632  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
2633}
2634
2635void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
2636  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2637  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
2638}
2639
2640void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
2641  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2642  emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
2643}
2644
2645void Assembler::push(int32_t imm32) {
2646  // in 64bits we push 64bits onto the stack but only
2647  // take a 32bit immediate
2648  emit_int8(0x68);
2649  emit_int32(imm32);
2650}
2651
2652void Assembler::push(Register src) {
2653  int encode = prefix_and_encode(src->encoding());
2654
2655  emit_int8(0x50 | encode);
2656}
2657
2658void Assembler::pushf() {
2659  emit_int8((unsigned char)0x9C);
2660}
2661
2662#ifndef _LP64 // no 32bit push/pop on amd64
2663void Assembler::pushl(Address src) {
2664  // Note this will push 64bit on 64bit
2665  InstructionMark im(this);
2666  prefix(src);
2667  emit_int8((unsigned char)0xFF);
2668  emit_operand(rsi, src);
2669}
2670#endif
2671
2672void Assembler::rcll(Register dst, int imm8) {
2673  assert(isShiftCount(imm8), "illegal shift count");
2674  int encode = prefix_and_encode(dst->encoding());
2675  if (imm8 == 1) {
2676    emit_int8((unsigned char)0xD1);
2677    emit_int8((unsigned char)(0xD0 | encode));
2678  } else {
2679    emit_int8((unsigned char)0xC1);
2680    emit_int8((unsigned char)0xD0 | encode);
2681    emit_int8(imm8);
2682  }
2683}
2684
2685void Assembler::rdtsc() {
2686  emit_int8((unsigned char)0x0F);
2687  emit_int8((unsigned char)0x31);
2688}
2689
2690// copies data from [esi] to [edi] using rcx pointer sized words
2691// generic
2692void Assembler::rep_mov() {
2693  emit_int8((unsigned char)0xF3);
2694  // MOVSQ
2695  LP64_ONLY(prefix(REX_W));
2696  emit_int8((unsigned char)0xA5);
2697}
2698
2699// sets rcx bytes with rax, value at [edi]
2700void Assembler::rep_stosb() {
2701  emit_int8((unsigned char)0xF3); // REP
2702  LP64_ONLY(prefix(REX_W));
2703  emit_int8((unsigned char)0xAA); // STOSB
2704}
2705
2706// sets rcx pointer sized words with rax, value at [edi]
2707// generic
2708void Assembler::rep_stos() {
2709  emit_int8((unsigned char)0xF3); // REP
2710  LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
2711  emit_int8((unsigned char)0xAB);
2712}
2713
2714// scans rcx pointer sized words at [edi] for occurance of rax,
2715// generic
2716void Assembler::repne_scan() { // repne_scan
2717  emit_int8((unsigned char)0xF2);
2718  // SCASQ
2719  LP64_ONLY(prefix(REX_W));
2720  emit_int8((unsigned char)0xAF);
2721}
2722
2723#ifdef _LP64
2724// scans rcx 4 byte words at [edi] for occurance of rax,
2725// generic
2726void Assembler::repne_scanl() { // repne_scan
2727  emit_int8((unsigned char)0xF2);
2728  // SCASL
2729  emit_int8((unsigned char)0xAF);
2730}
2731#endif
2732
2733void Assembler::ret(int imm16) {
2734  if (imm16 == 0) {
2735    emit_int8((unsigned char)0xC3);
2736  } else {
2737    emit_int8((unsigned char)0xC2);
2738    emit_int16(imm16);
2739  }
2740}
2741
2742void Assembler::sahf() {
2743#ifdef _LP64
2744  // Not supported in 64bit mode
2745  ShouldNotReachHere();
2746#endif
2747  emit_int8((unsigned char)0x9E);
2748}
2749
2750void Assembler::sarl(Register dst, int imm8) {
2751  int encode = prefix_and_encode(dst->encoding());
2752  assert(isShiftCount(imm8), "illegal shift count");
2753  if (imm8 == 1) {
2754    emit_int8((unsigned char)0xD1);
2755    emit_int8((unsigned char)(0xF8 | encode));
2756  } else {
2757    emit_int8((unsigned char)0xC1);
2758    emit_int8((unsigned char)(0xF8 | encode));
2759    emit_int8(imm8);
2760  }
2761}
2762
2763void Assembler::sarl(Register dst) {
2764  int encode = prefix_and_encode(dst->encoding());
2765  emit_int8((unsigned char)0xD3);
2766  emit_int8((unsigned char)(0xF8 | encode));
2767}
2768
2769void Assembler::sbbl(Address dst, int32_t imm32) {
2770  InstructionMark im(this);
2771  prefix(dst);
2772  emit_arith_operand(0x81, rbx, dst, imm32);
2773}
2774
2775void Assembler::sbbl(Register dst, int32_t imm32) {
2776  prefix(dst);
2777  emit_arith(0x81, 0xD8, dst, imm32);
2778}
2779
2780
2781void Assembler::sbbl(Register dst, Address src) {
2782  InstructionMark im(this);
2783  prefix(src, dst);
2784  emit_int8(0x1B);
2785  emit_operand(dst, src);
2786}
2787
2788void Assembler::sbbl(Register dst, Register src) {
2789  (void) prefix_and_encode(dst->encoding(), src->encoding());
2790  emit_arith(0x1B, 0xC0, dst, src);
2791}
2792
2793void Assembler::setb(Condition cc, Register dst) {
2794  assert(0 <= cc && cc < 16, "illegal cc");
2795  int encode = prefix_and_encode(dst->encoding(), true);
2796  emit_int8(0x0F);
2797  emit_int8((unsigned char)0x90 | cc);
2798  emit_int8((unsigned char)(0xC0 | encode));
2799}
2800
2801void Assembler::shll(Register dst, int imm8) {
2802  assert(isShiftCount(imm8), "illegal shift count");
2803  int encode = prefix_and_encode(dst->encoding());
2804  if (imm8 == 1 ) {
2805    emit_int8((unsigned char)0xD1);
2806    emit_int8((unsigned char)(0xE0 | encode));
2807  } else {
2808    emit_int8((unsigned char)0xC1);
2809    emit_int8((unsigned char)(0xE0 | encode));
2810    emit_int8(imm8);
2811  }
2812}
2813
2814void Assembler::shll(Register dst) {
2815  int encode = prefix_and_encode(dst->encoding());
2816  emit_int8((unsigned char)0xD3);
2817  emit_int8((unsigned char)(0xE0 | encode));
2818}
2819
2820void Assembler::shrl(Register dst, int imm8) {
2821  assert(isShiftCount(imm8), "illegal shift count");
2822  int encode = prefix_and_encode(dst->encoding());
2823  emit_int8((unsigned char)0xC1);
2824  emit_int8((unsigned char)(0xE8 | encode));
2825  emit_int8(imm8);
2826}
2827
2828void Assembler::shrl(Register dst) {
2829  int encode = prefix_and_encode(dst->encoding());
2830  emit_int8((unsigned char)0xD3);
2831  emit_int8((unsigned char)(0xE8 | encode));
2832}
2833
2834// copies a single word from [esi] to [edi]
2835void Assembler::smovl() {
2836  emit_int8((unsigned char)0xA5);
2837}
2838
2839void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2840  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2841  emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
2842}
2843
2844void Assembler::sqrtsd(XMMRegister dst, Address src) {
2845  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2846  emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
2847}
2848
2849void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
2850  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2851  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
2852}
2853
2854void Assembler::std() {
2855  emit_int8((unsigned char)0xFD);
2856}
2857
2858void Assembler::sqrtss(XMMRegister dst, Address src) {
2859  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2860  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
2861}
2862
2863void Assembler::stmxcsr( Address dst) {
2864  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2865  InstructionMark im(this);
2866  prefix(dst);
2867  emit_int8(0x0F);
2868  emit_int8((unsigned char)0xAE);
2869  emit_operand(as_Register(3), dst);
2870}
2871
2872void Assembler::subl(Address dst, int32_t imm32) {
2873  InstructionMark im(this);
2874  prefix(dst);
2875  emit_arith_operand(0x81, rbp, dst, imm32);
2876}
2877
2878void Assembler::subl(Address dst, Register src) {
2879  InstructionMark im(this);
2880  prefix(dst, src);
2881  emit_int8(0x29);
2882  emit_operand(src, dst);
2883}
2884
2885void Assembler::subl(Register dst, int32_t imm32) {
2886  prefix(dst);
2887  emit_arith(0x81, 0xE8, dst, imm32);
2888}
2889
2890// Force generation of a 4 byte immediate value even if it fits into 8bit
2891void Assembler::subl_imm32(Register dst, int32_t imm32) {
2892  prefix(dst);
2893  emit_arith_imm32(0x81, 0xE8, dst, imm32);
2894}
2895
2896void Assembler::subl(Register dst, Address src) {
2897  InstructionMark im(this);
2898  prefix(src, dst);
2899  emit_int8(0x2B);
2900  emit_operand(dst, src);
2901}
2902
2903void Assembler::subl(Register dst, Register src) {
2904  (void) prefix_and_encode(dst->encoding(), src->encoding());
2905  emit_arith(0x2B, 0xC0, dst, src);
2906}
2907
2908void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2909  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2910  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
2911}
2912
2913void Assembler::subsd(XMMRegister dst, Address src) {
2914  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2915  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
2916}
2917
2918void Assembler::subss(XMMRegister dst, XMMRegister src) {
2919  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2920  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
2921}
2922
2923void Assembler::subss(XMMRegister dst, Address src) {
2924  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2925  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
2926}
2927
2928void Assembler::testb(Register dst, int imm8) {
2929  NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2930  (void) prefix_and_encode(dst->encoding(), true);
2931  emit_arith_b(0xF6, 0xC0, dst, imm8);
2932}
2933
2934void Assembler::testl(Register dst, int32_t imm32) {
2935  // not using emit_arith because test
2936  // doesn't support sign-extension of
2937  // 8bit operands
2938  int encode = dst->encoding();
2939  if (encode == 0) {
2940    emit_int8((unsigned char)0xA9);
2941  } else {
2942    encode = prefix_and_encode(encode);
2943    emit_int8((unsigned char)0xF7);
2944    emit_int8((unsigned char)(0xC0 | encode));
2945  }
2946  emit_int32(imm32);
2947}
2948
2949void Assembler::testl(Register dst, Register src) {
2950  (void) prefix_and_encode(dst->encoding(), src->encoding());
2951  emit_arith(0x85, 0xC0, dst, src);
2952}
2953
2954void Assembler::testl(Register dst, Address  src) {
2955  InstructionMark im(this);
2956  prefix(src, dst);
2957  emit_int8((unsigned char)0x85);
2958  emit_operand(dst, src);
2959}
2960
2961void Assembler::tzcntl(Register dst, Register src) {
2962  assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
2963  emit_int8((unsigned char)0xF3);
2964  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2965  emit_int8(0x0F);
2966  emit_int8((unsigned char)0xBC);
2967  emit_int8((unsigned char)0xC0 | encode);
2968}
2969
2970void Assembler::tzcntq(Register dst, Register src) {
2971  assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
2972  emit_int8((unsigned char)0xF3);
2973  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2974  emit_int8(0x0F);
2975  emit_int8((unsigned char)0xBC);
2976  emit_int8((unsigned char)(0xC0 | encode));
2977}
2978
2979void Assembler::ucomisd(XMMRegister dst, Address src) {
2980  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2981  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
2982}
2983
2984void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2985  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2986  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
2987}
2988
2989void Assembler::ucomiss(XMMRegister dst, Address src) {
2990  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2991  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
2992}
2993
2994void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2995  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2996  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
2997}
2998
2999void Assembler::xabort(int8_t imm8) {
3000  emit_int8((unsigned char)0xC6);
3001  emit_int8((unsigned char)0xF8);
3002  emit_int8((unsigned char)(imm8 & 0xFF));
3003}
3004
3005void Assembler::xaddl(Address dst, Register src) {
3006  InstructionMark im(this);
3007  prefix(dst, src);
3008  emit_int8(0x0F);
3009  emit_int8((unsigned char)0xC1);
3010  emit_operand(src, dst);
3011}
3012
3013void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
3014  InstructionMark im(this);
3015  relocate(rtype);
3016  if (abort.is_bound()) {
3017    address entry = target(abort);
3018    assert(entry != NULL, "abort entry NULL");
3019    intptr_t offset = entry - pc();
3020    emit_int8((unsigned char)0xC7);
3021    emit_int8((unsigned char)0xF8);
3022    emit_int32(offset - 6); // 2 opcode + 4 address
3023  } else {
3024    abort.add_patch_at(code(), locator());
3025    emit_int8((unsigned char)0xC7);
3026    emit_int8((unsigned char)0xF8);
3027    emit_int32(0);
3028  }
3029}
3030
3031void Assembler::xchgl(Register dst, Address src) { // xchg
3032  InstructionMark im(this);
3033  prefix(src, dst);
3034  emit_int8((unsigned char)0x87);
3035  emit_operand(dst, src);
3036}
3037
3038void Assembler::xchgl(Register dst, Register src) {
3039  int encode = prefix_and_encode(dst->encoding(), src->encoding());
3040  emit_int8((unsigned char)0x87);
3041  emit_int8((unsigned char)(0xC0 | encode));
3042}
3043
3044void Assembler::xend() {
3045  emit_int8((unsigned char)0x0F);
3046  emit_int8((unsigned char)0x01);
3047  emit_int8((unsigned char)0xD5);
3048}
3049
3050void Assembler::xgetbv() {
3051  emit_int8(0x0F);
3052  emit_int8(0x01);
3053  emit_int8((unsigned char)0xD0);
3054}
3055
3056void Assembler::xorl(Register dst, int32_t imm32) {
3057  prefix(dst);
3058  emit_arith(0x81, 0xF0, dst, imm32);
3059}
3060
3061void Assembler::xorl(Register dst, Address src) {
3062  InstructionMark im(this);
3063  prefix(src, dst);
3064  emit_int8(0x33);
3065  emit_operand(dst, src);
3066}
3067
3068void Assembler::xorl(Register dst, Register src) {
3069  (void) prefix_and_encode(dst->encoding(), src->encoding());
3070  emit_arith(0x33, 0xC0, dst, src);
3071}
3072
3073
3074// AVX 3-operands scalar float-point arithmetic instructions
3075
3076void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
3077  assert(VM_Version::supports_avx(), "");
3078  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3079}
3080
3081void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3082  assert(VM_Version::supports_avx(), "");
3083  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3084}
3085
3086void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
3087  assert(VM_Version::supports_avx(), "");
3088  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3089}
3090
3091void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3092  assert(VM_Version::supports_avx(), "");
3093  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3094}
3095
3096void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
3097  assert(VM_Version::supports_avx(), "");
3098  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3099}
3100
3101void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3102  assert(VM_Version::supports_avx(), "");
3103  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3104}
3105
3106void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
3107  assert(VM_Version::supports_avx(), "");
3108  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3109}
3110
3111void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3112  assert(VM_Version::supports_avx(), "");
3113  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3114}
3115
3116void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
3117  assert(VM_Version::supports_avx(), "");
3118  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3119}
3120
3121void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3122  assert(VM_Version::supports_avx(), "");
3123  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3124}
3125
3126void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
3127  assert(VM_Version::supports_avx(), "");
3128  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3129}
3130
3131void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3132  assert(VM_Version::supports_avx(), "");
3133  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3134}
3135
3136void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
3137  assert(VM_Version::supports_avx(), "");
3138  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3139}
3140
3141void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3142  assert(VM_Version::supports_avx(), "");
3143  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3144}
3145
3146void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
3147  assert(VM_Version::supports_avx(), "");
3148  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3149}
3150
3151void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3152  assert(VM_Version::supports_avx(), "");
3153  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3154}
3155
3156//====================VECTOR ARITHMETIC=====================================
3157
3158// Float-point vector arithmetic
3159
3160void Assembler::addpd(XMMRegister dst, XMMRegister src) {
3161  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3162  emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
3163}
3164
3165void Assembler::addps(XMMRegister dst, XMMRegister src) {
3166  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3167  emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
3168}
3169
3170void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3171  assert(VM_Version::supports_avx(), "");
3172  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
3173}
3174
3175void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3176  assert(VM_Version::supports_avx(), "");
3177  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
3178}
3179
3180void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3181  assert(VM_Version::supports_avx(), "");
3182  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
3183}
3184
3185void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3186  assert(VM_Version::supports_avx(), "");
3187  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
3188}
3189
3190void Assembler::subpd(XMMRegister dst, XMMRegister src) {
3191  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3192  emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
3193}
3194
3195void Assembler::subps(XMMRegister dst, XMMRegister src) {
3196  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3197  emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
3198}
3199
3200void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3201  assert(VM_Version::supports_avx(), "");
3202  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
3203}
3204
3205void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3206  assert(VM_Version::supports_avx(), "");
3207  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
3208}
3209
3210void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3211  assert(VM_Version::supports_avx(), "");
3212  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
3213}
3214
3215void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3216  assert(VM_Version::supports_avx(), "");
3217  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
3218}
3219
3220void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
3221  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3222  emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
3223}
3224
3225void Assembler::mulps(XMMRegister dst, XMMRegister src) {
3226  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3227  emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
3228}
3229
3230void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3231  assert(VM_Version::supports_avx(), "");
3232  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
3233}
3234
3235void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3236  assert(VM_Version::supports_avx(), "");
3237  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
3238}
3239
3240void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3241  assert(VM_Version::supports_avx(), "");
3242  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
3243}
3244
3245void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3246  assert(VM_Version::supports_avx(), "");
3247  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
3248}
3249
3250void Assembler::divpd(XMMRegister dst, XMMRegister src) {
3251  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3252  emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
3253}
3254
3255void Assembler::divps(XMMRegister dst, XMMRegister src) {
3256  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3257  emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
3258}
3259
3260void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3261  assert(VM_Version::supports_avx(), "");
3262  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
3263}
3264
3265void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3266  assert(VM_Version::supports_avx(), "");
3267  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
3268}
3269
3270void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3271  assert(VM_Version::supports_avx(), "");
3272  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
3273}
3274
3275void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3276  assert(VM_Version::supports_avx(), "");
3277  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
3278}
3279
3280void Assembler::andpd(XMMRegister dst, XMMRegister src) {
3281  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3282  emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
3283}
3284
3285void Assembler::andps(XMMRegister dst, XMMRegister src) {
3286  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3287  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
3288}
3289
3290void Assembler::andps(XMMRegister dst, Address src) {
3291  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3292  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
3293}
3294
3295void Assembler::andpd(XMMRegister dst, Address src) {
3296  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3297  emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
3298}
3299
3300void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3301  assert(VM_Version::supports_avx(), "");
3302  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
3303}
3304
3305void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3306  assert(VM_Version::supports_avx(), "");
3307  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
3308}
3309
3310void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3311  assert(VM_Version::supports_avx(), "");
3312  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
3313}
3314
3315void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3316  assert(VM_Version::supports_avx(), "");
3317  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
3318}
3319
3320void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
3321  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3322  emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
3323}
3324
3325void Assembler::xorps(XMMRegister dst, XMMRegister src) {
3326  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3327  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
3328}
3329
3330void Assembler::xorpd(XMMRegister dst, Address src) {
3331  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3332  emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
3333}
3334
3335void Assembler::xorps(XMMRegister dst, Address src) {
3336  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3337  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
3338}
3339
3340void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3341  assert(VM_Version::supports_avx(), "");
3342  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
3343}
3344
3345void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3346  assert(VM_Version::supports_avx(), "");
3347  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
3348}
3349
3350void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3351  assert(VM_Version::supports_avx(), "");
3352  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
3353}
3354
3355void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3356  assert(VM_Version::supports_avx(), "");
3357  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
3358}
3359
3360
3361// Integer vector arithmetic
3362void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3363  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3364  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
3365  emit_int8(0x01);
3366  emit_int8((unsigned char)(0xC0 | encode));
3367}
3368
3369void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3370  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3371  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
3372  emit_int8(0x02);
3373  emit_int8((unsigned char)(0xC0 | encode));
3374}
3375
3376void Assembler::paddb(XMMRegister dst, XMMRegister src) {
3377  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3378  emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
3379}
3380
3381void Assembler::paddw(XMMRegister dst, XMMRegister src) {
3382  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3383  emit_simd_arith(0xFD, dst, src, VEX_SIMD_66);
3384}
3385
3386void Assembler::paddd(XMMRegister dst, XMMRegister src) {
3387  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3388  emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
3389}
3390
3391void Assembler::paddq(XMMRegister dst, XMMRegister src) {
3392  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3393  emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
3394}
3395
3396void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
3397  NOT_LP64(assert(VM_Version::supports_sse3(), ""));
3398  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
3399  emit_int8(0x01);
3400  emit_int8((unsigned char)(0xC0 | encode));
3401}
3402
3403void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
3404  NOT_LP64(assert(VM_Version::supports_sse3(), ""));
3405  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
3406  emit_int8(0x02);
3407  emit_int8((unsigned char)(0xC0 | encode));
3408}
3409
3410void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3411  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3412  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
3413}
3414
3415void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3416  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3417  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
3418}
3419
3420void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3421  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3422  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
3423}
3424
3425void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3426  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3427  emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
3428}
3429
3430void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3431  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3432  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
3433}
3434
3435void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3436  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3437  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
3438}
3439
3440void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3441  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3442  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
3443}
3444
3445void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3446  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3447  emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
3448}
3449
3450void Assembler::psubb(XMMRegister dst, XMMRegister src) {
3451  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3452  emit_simd_arith(0xF8, dst, src, VEX_SIMD_66);
3453}
3454
3455void Assembler::psubw(XMMRegister dst, XMMRegister src) {
3456  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3457  emit_simd_arith(0xF9, dst, src, VEX_SIMD_66);
3458}
3459
3460void Assembler::psubd(XMMRegister dst, XMMRegister src) {
3461  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3462  emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
3463}
3464
3465void Assembler::psubq(XMMRegister dst, XMMRegister src) {
3466  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3467  emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
3468}
3469
3470void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3471  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3472  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
3473}
3474
3475void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3476  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3477  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
3478}
3479
3480void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3481  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3482  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
3483}
3484
3485void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3486  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3487  emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
3488}
3489
3490void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3491  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3492  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
3493}
3494
3495void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3496  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3497  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
3498}
3499
3500void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3501  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3502  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
3503}
3504
3505void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3506  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3507  emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
3508}
3509
3510void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
3511  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3512  emit_simd_arith(0xD5, dst, src, VEX_SIMD_66);
3513}
3514
3515void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
3516  assert(VM_Version::supports_sse4_1(), "");
3517  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
3518  emit_int8(0x40);
3519  emit_int8((unsigned char)(0xC0 | encode));
3520}
3521
3522void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3523  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3524  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
3525}
3526
3527void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3528  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3529  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
3530  emit_int8(0x40);
3531  emit_int8((unsigned char)(0xC0 | encode));
3532}
3533
3534void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3535  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3536  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
3537}
3538
3539void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3540  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3541  InstructionMark im(this);
3542  int dst_enc = dst->encoding();
3543  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
3544  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
3545  emit_int8(0x40);
3546  emit_operand(dst, src);
3547}
3548
3549// Shift packed integers left by specified number of bits.
3550void Assembler::psllw(XMMRegister dst, int shift) {
3551  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3552  // XMM6 is for /6 encoding: 66 0F 71 /6 ib
3553  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3554  emit_int8(0x71);
3555  emit_int8((unsigned char)(0xC0 | encode));
3556  emit_int8(shift & 0xFF);
3557}
3558
3559void Assembler::pslld(XMMRegister dst, int shift) {
3560  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3561  // XMM6 is for /6 encoding: 66 0F 72 /6 ib
3562  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3563  emit_int8(0x72);
3564  emit_int8((unsigned char)(0xC0 | encode));
3565  emit_int8(shift & 0xFF);
3566}
3567
3568void Assembler::psllq(XMMRegister dst, int shift) {
3569  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3570  // XMM6 is for /6 encoding: 66 0F 73 /6 ib
3571  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3572  emit_int8(0x73);
3573  emit_int8((unsigned char)(0xC0 | encode));
3574  emit_int8(shift & 0xFF);
3575}
3576
3577void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
3578  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3579  emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66);
3580}
3581
3582void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
3583  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3584  emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
3585}
3586
3587void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
3588  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3589  emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
3590}
3591
3592void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3593  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3594  // XMM6 is for /6 encoding: 66 0F 71 /6 ib
3595  emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
3596  emit_int8(shift & 0xFF);
3597}
3598
3599void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3600  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3601  // XMM6 is for /6 encoding: 66 0F 72 /6 ib
3602  emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
3603  emit_int8(shift & 0xFF);
3604}
3605
3606void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3607  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3608  // XMM6 is for /6 encoding: 66 0F 73 /6 ib
3609  emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);
3610  emit_int8(shift & 0xFF);
3611}
3612
3613void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3614  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3615  emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256);
3616}
3617
3618void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3619  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3620  emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256);
3621}
3622
3623void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3624  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3625  emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256);
3626}
3627
3628// Shift packed integers logically right by specified number of bits.
3629void Assembler::psrlw(XMMRegister dst, int shift) {
3630  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3631  // XMM2 is for /2 encoding: 66 0F 71 /2 ib
3632  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3633  emit_int8(0x71);
3634  emit_int8((unsigned char)(0xC0 | encode));
3635  emit_int8(shift & 0xFF);
3636}
3637
3638void Assembler::psrld(XMMRegister dst, int shift) {
3639  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3640  // XMM2 is for /2 encoding: 66 0F 72 /2 ib
3641  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3642  emit_int8(0x72);
3643  emit_int8((unsigned char)(0xC0 | encode));
3644  emit_int8(shift & 0xFF);
3645}
3646
3647void Assembler::psrlq(XMMRegister dst, int shift) {
3648  // Do not confuse it with psrldq SSE2 instruction which
3649  // shifts 128 bit value in xmm register by number of bytes.
3650  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3651  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3652  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3653  emit_int8(0x73);
3654  emit_int8((unsigned char)(0xC0 | encode));
3655  emit_int8(shift & 0xFF);
3656}
3657
3658void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
3659  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3660  emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66);
3661}
3662
3663void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
3664  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3665  emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
3666}
3667
3668void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
3669  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3670  emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
3671}
3672
3673void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3674  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3675  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3676  emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
3677  emit_int8(shift & 0xFF);
3678}
3679
3680void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3681  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3682  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3683  emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
3684  emit_int8(shift & 0xFF);
3685}
3686
3687void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3688  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3689  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3690  emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);
3691  emit_int8(shift & 0xFF);
3692}
3693
3694void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3695  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3696  emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256);
3697}
3698
3699void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3700  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3701  emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256);
3702}
3703
3704void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3705  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3706  emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256);
3707}
3708
3709// Shift packed integers arithmetically right by specified number of bits.
3710void Assembler::psraw(XMMRegister dst, int shift) {
3711  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3712  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3713  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
3714  emit_int8(0x71);
3715  emit_int8((unsigned char)(0xC0 | encode));
3716  emit_int8(shift & 0xFF);
3717}
3718
3719void Assembler::psrad(XMMRegister dst, int shift) {
3720  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3721  // XMM4 is for /4 encoding: 66 0F 72 /4 ib
3722  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
3723  emit_int8(0x72);
3724  emit_int8((unsigned char)(0xC0 | encode));
3725  emit_int8(shift & 0xFF);
3726}
3727
3728void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
3729  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3730  emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66);
3731}
3732
3733void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
3734  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3735  emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
3736}
3737
3738void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3739  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3740  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3741  emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
3742  emit_int8(shift & 0xFF);
3743}
3744
3745void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3746  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3747  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3748  emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
3749  emit_int8(shift & 0xFF);
3750}
3751
3752void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3753  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3754  emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256);
3755}
3756
3757void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3758  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3759  emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256);
3760}
3761
3762
3763// AND packed integers
3764void Assembler::pand(XMMRegister dst, XMMRegister src) {
3765  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3766  emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
3767}
3768
3769void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3770  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3771  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
3772}
3773
3774void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3775  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3776  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
3777}
3778
3779void Assembler::por(XMMRegister dst, XMMRegister src) {
3780  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3781  emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
3782}
3783
3784void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3785  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3786  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
3787}
3788
3789void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3790  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3791  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
3792}
3793
3794void Assembler::pxor(XMMRegister dst, XMMRegister src) {
3795  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3796  emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
3797}
3798
3799void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3800  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3801  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
3802}
3803
3804void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3805  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3806  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
3807}
3808
3809
3810void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3811  assert(VM_Version::supports_avx(), "");
3812  bool vector256 = true;
3813  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3814  emit_int8(0x18);
3815  emit_int8((unsigned char)(0xC0 | encode));
3816  // 0x00 - insert into lower 128 bits
3817  // 0x01 - insert into upper 128 bits
3818  emit_int8(0x01);
3819}
3820
3821void Assembler::vinsertf128h(XMMRegister dst, Address src) {
3822  assert(VM_Version::supports_avx(), "");
3823  InstructionMark im(this);
3824  bool vector256 = true;
3825  assert(dst != xnoreg, "sanity");
3826  int dst_enc = dst->encoding();
3827  // swap src<->dst for encoding
3828  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3829  emit_int8(0x18);
3830  emit_operand(dst, src);
3831  // 0x01 - insert into upper 128 bits
3832  emit_int8(0x01);
3833}
3834
3835void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
3836  assert(VM_Version::supports_avx(), "");
3837  bool vector256 = true;
3838  int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3839  emit_int8(0x19);
3840  emit_int8((unsigned char)(0xC0 | encode));
3841  // 0x00 - insert into lower 128 bits
3842  // 0x01 - insert into upper 128 bits
3843  emit_int8(0x01);
3844}
3845
3846void Assembler::vextractf128h(Address dst, XMMRegister src) {
3847  assert(VM_Version::supports_avx(), "");
3848  InstructionMark im(this);
3849  bool vector256 = true;
3850  assert(src != xnoreg, "sanity");
3851  int src_enc = src->encoding();
3852  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3853  emit_int8(0x19);
3854  emit_operand(src, dst);
3855  // 0x01 - extract from upper 128 bits
3856  emit_int8(0x01);
3857}
3858
3859void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3860  assert(VM_Version::supports_avx2(), "");
3861  bool vector256 = true;
3862  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3863  emit_int8(0x38);
3864  emit_int8((unsigned char)(0xC0 | encode));
3865  // 0x00 - insert into lower 128 bits
3866  // 0x01 - insert into upper 128 bits
3867  emit_int8(0x01);
3868}
3869
3870void Assembler::vinserti128h(XMMRegister dst, Address src) {
3871  assert(VM_Version::supports_avx2(), "");
3872  InstructionMark im(this);
3873  bool vector256 = true;
3874  assert(dst != xnoreg, "sanity");
3875  int dst_enc = dst->encoding();
3876  // swap src<->dst for encoding
3877  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3878  emit_int8(0x38);
3879  emit_operand(dst, src);
3880  // 0x01 - insert into upper 128 bits
3881  emit_int8(0x01);
3882}
3883
3884void Assembler::vextracti128h(Address dst, XMMRegister src) {
3885  assert(VM_Version::supports_avx2(), "");
3886  InstructionMark im(this);
3887  bool vector256 = true;
3888  assert(src != xnoreg, "sanity");
3889  int src_enc = src->encoding();
3890  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3891  emit_int8(0x39);
3892  emit_operand(src, dst);
3893  // 0x01 - extract from upper 128 bits
3894  emit_int8(0x01);
3895}
3896
3897// duplicate 4-bytes integer data from src into 8 locations in dest
3898void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
3899  assert(VM_Version::supports_avx2(), "");
3900  bool vector256 = true;
3901  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
3902  emit_int8(0x58);
3903  emit_int8((unsigned char)(0xC0 | encode));
3904}
3905
3906// Carry-Less Multiplication Quadword
3907void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
3908  assert(VM_Version::supports_clmul(), "");
3909  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
3910  emit_int8(0x44);
3911  emit_int8((unsigned char)(0xC0 | encode));
3912  emit_int8((unsigned char)mask);
3913}
3914
3915// Carry-Less Multiplication Quadword
3916void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
3917  assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
3918  bool vector256 = false;
3919  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3920  emit_int8(0x44);
3921  emit_int8((unsigned char)(0xC0 | encode));
3922  emit_int8((unsigned char)mask);
3923}
3924
3925void Assembler::vzeroupper() {
3926  assert(VM_Version::supports_avx(), "");
3927  (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
3928  emit_int8(0x77);
3929}
3930
3931
3932#ifndef _LP64
3933// 32bit only pieces of the assembler
3934
3935void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
3936  // NO PREFIX AS NEVER 64BIT
3937  InstructionMark im(this);
3938  emit_int8((unsigned char)0x81);
3939  emit_int8((unsigned char)(0xF8 | src1->encoding()));
3940  emit_data(imm32, rspec, 0);
3941}
3942
3943void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
3944  // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
3945  InstructionMark im(this);
3946  emit_int8((unsigned char)0x81);
3947  emit_operand(rdi, src1);
3948  emit_data(imm32, rspec, 0);
3949}
3950
3951// The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
3952// and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
3953// into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
3954void Assembler::cmpxchg8(Address adr) {
3955  InstructionMark im(this);
3956  emit_int8(0x0F);
3957  emit_int8((unsigned char)0xC7);
3958  emit_operand(rcx, adr);
3959}
3960
3961void Assembler::decl(Register dst) {
3962  // Don't use it directly. Use MacroAssembler::decrementl() instead.
3963 emit_int8(0x48 | dst->encoding());
3964}
3965
3966#endif // _LP64
3967
3968// 64bit typically doesn't use the x87 but needs to for the trig funcs
3969
3970void Assembler::fabs() {
3971  emit_int8((unsigned char)0xD9);
3972  emit_int8((unsigned char)0xE1);
3973}
3974
3975void Assembler::fadd(int i) {
3976  emit_farith(0xD8, 0xC0, i);
3977}
3978
3979void Assembler::fadd_d(Address src) {
3980  InstructionMark im(this);
3981  emit_int8((unsigned char)0xDC);
3982  emit_operand32(rax, src);
3983}
3984
3985void Assembler::fadd_s(Address src) {
3986  InstructionMark im(this);
3987  emit_int8((unsigned char)0xD8);
3988  emit_operand32(rax, src);
3989}
3990
3991void Assembler::fadda(int i) {
3992  emit_farith(0xDC, 0xC0, i);
3993}
3994
3995void Assembler::faddp(int i) {
3996  emit_farith(0xDE, 0xC0, i);
3997}
3998
3999void Assembler::fchs() {
4000  emit_int8((unsigned char)0xD9);
4001  emit_int8((unsigned char)0xE0);
4002}
4003
4004void Assembler::fcom(int i) {
4005  emit_farith(0xD8, 0xD0, i);
4006}
4007
4008void Assembler::fcomp(int i) {
4009  emit_farith(0xD8, 0xD8, i);
4010}
4011
4012void Assembler::fcomp_d(Address src) {
4013  InstructionMark im(this);
4014  emit_int8((unsigned char)0xDC);
4015  emit_operand32(rbx, src);
4016}
4017
4018void Assembler::fcomp_s(Address src) {
4019  InstructionMark im(this);
4020  emit_int8((unsigned char)0xD8);
4021  emit_operand32(rbx, src);
4022}
4023
4024void Assembler::fcompp() {
4025  emit_int8((unsigned char)0xDE);
4026  emit_int8((unsigned char)0xD9);
4027}
4028
4029void Assembler::fcos() {
4030  emit_int8((unsigned char)0xD9);
4031  emit_int8((unsigned char)0xFF);
4032}
4033
4034void Assembler::fdecstp() {
4035  emit_int8((unsigned char)0xD9);
4036  emit_int8((unsigned char)0xF6);
4037}
4038
4039void Assembler::fdiv(int i) {
4040  emit_farith(0xD8, 0xF0, i);
4041}
4042
4043void Assembler::fdiv_d(Address src) {
4044  InstructionMark im(this);
4045  emit_int8((unsigned char)0xDC);
4046  emit_operand32(rsi, src);
4047}
4048
4049void Assembler::fdiv_s(Address src) {
4050  InstructionMark im(this);
4051  emit_int8((unsigned char)0xD8);
4052  emit_operand32(rsi, src);
4053}
4054
4055void Assembler::fdiva(int i) {
4056  emit_farith(0xDC, 0xF8, i);
4057}
4058
4059// Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
4060//       is erroneous for some of the floating-point instructions below.
4061
4062void Assembler::fdivp(int i) {
4063  emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
4064}
4065
4066void Assembler::fdivr(int i) {
4067  emit_farith(0xD8, 0xF8, i);
4068}
4069
4070void Assembler::fdivr_d(Address src) {
4071  InstructionMark im(this);
4072  emit_int8((unsigned char)0xDC);
4073  emit_operand32(rdi, src);
4074}
4075
4076void Assembler::fdivr_s(Address src) {
4077  InstructionMark im(this);
4078  emit_int8((unsigned char)0xD8);
4079  emit_operand32(rdi, src);
4080}
4081
4082void Assembler::fdivra(int i) {
4083  emit_farith(0xDC, 0xF0, i);
4084}
4085
4086void Assembler::fdivrp(int i) {
4087  emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
4088}
4089
4090void Assembler::ffree(int i) {
4091  emit_farith(0xDD, 0xC0, i);
4092}
4093
4094void Assembler::fild_d(Address adr) {
4095  InstructionMark im(this);
4096  emit_int8((unsigned char)0xDF);
4097  emit_operand32(rbp, adr);
4098}
4099
4100void Assembler::fild_s(Address adr) {
4101  InstructionMark im(this);
4102  emit_int8((unsigned char)0xDB);
4103  emit_operand32(rax, adr);
4104}
4105
4106void Assembler::fincstp() {
4107  emit_int8((unsigned char)0xD9);
4108  emit_int8((unsigned char)0xF7);
4109}
4110
4111void Assembler::finit() {
4112  emit_int8((unsigned char)0x9B);
4113  emit_int8((unsigned char)0xDB);
4114  emit_int8((unsigned char)0xE3);
4115}
4116
4117void Assembler::fist_s(Address adr) {
4118  InstructionMark im(this);
4119  emit_int8((unsigned char)0xDB);
4120  emit_operand32(rdx, adr);
4121}
4122
4123void Assembler::fistp_d(Address adr) {
4124  InstructionMark im(this);
4125  emit_int8((unsigned char)0xDF);
4126  emit_operand32(rdi, adr);
4127}
4128
4129void Assembler::fistp_s(Address adr) {
4130  InstructionMark im(this);
4131  emit_int8((unsigned char)0xDB);
4132  emit_operand32(rbx, adr);
4133}
4134
4135void Assembler::fld1() {
4136  emit_int8((unsigned char)0xD9);
4137  emit_int8((unsigned char)0xE8);
4138}
4139
4140void Assembler::fld_d(Address adr) {
4141  InstructionMark im(this);
4142  emit_int8((unsigned char)0xDD);
4143  emit_operand32(rax, adr);
4144}
4145
4146void Assembler::fld_s(Address adr) {
4147  InstructionMark im(this);
4148  emit_int8((unsigned char)0xD9);
4149  emit_operand32(rax, adr);
4150}
4151
4152
4153void Assembler::fld_s(int index) {
4154  emit_farith(0xD9, 0xC0, index);
4155}
4156
4157void Assembler::fld_x(Address adr) {
4158  InstructionMark im(this);
4159  emit_int8((unsigned char)0xDB);
4160  emit_operand32(rbp, adr);
4161}
4162
4163void Assembler::fldcw(Address src) {
4164  InstructionMark im(this);
4165  emit_int8((unsigned char)0xD9);
4166  emit_operand32(rbp, src);
4167}
4168
4169void Assembler::fldenv(Address src) {
4170  InstructionMark im(this);
4171  emit_int8((unsigned char)0xD9);
4172  emit_operand32(rsp, src);
4173}
4174
4175void Assembler::fldlg2() {
4176  emit_int8((unsigned char)0xD9);
4177  emit_int8((unsigned char)0xEC);
4178}
4179
4180void Assembler::fldln2() {
4181  emit_int8((unsigned char)0xD9);
4182  emit_int8((unsigned char)0xED);
4183}
4184
4185void Assembler::fldz() {
4186  emit_int8((unsigned char)0xD9);
4187  emit_int8((unsigned char)0xEE);
4188}
4189
4190void Assembler::flog() {
4191  fldln2();
4192  fxch();
4193  fyl2x();
4194}
4195
4196void Assembler::flog10() {
4197  fldlg2();
4198  fxch();
4199  fyl2x();
4200}
4201
4202void Assembler::fmul(int i) {
4203  emit_farith(0xD8, 0xC8, i);
4204}
4205
4206void Assembler::fmul_d(Address src) {
4207  InstructionMark im(this);
4208  emit_int8((unsigned char)0xDC);
4209  emit_operand32(rcx, src);
4210}
4211
4212void Assembler::fmul_s(Address src) {
4213  InstructionMark im(this);
4214  emit_int8((unsigned char)0xD8);
4215  emit_operand32(rcx, src);
4216}
4217
4218void Assembler::fmula(int i) {
4219  emit_farith(0xDC, 0xC8, i);
4220}
4221
4222void Assembler::fmulp(int i) {
4223  emit_farith(0xDE, 0xC8, i);
4224}
4225
4226void Assembler::fnsave(Address dst) {
4227  InstructionMark im(this);
4228  emit_int8((unsigned char)0xDD);
4229  emit_operand32(rsi, dst);
4230}
4231
4232void Assembler::fnstcw(Address src) {
4233  InstructionMark im(this);
4234  emit_int8((unsigned char)0x9B);
4235  emit_int8((unsigned char)0xD9);
4236  emit_operand32(rdi, src);
4237}
4238
4239void Assembler::fnstsw_ax() {
4240  emit_int8((unsigned char)0xDF);
4241  emit_int8((unsigned char)0xE0);
4242}
4243
4244void Assembler::fprem() {
4245  emit_int8((unsigned char)0xD9);
4246  emit_int8((unsigned char)0xF8);
4247}
4248
4249void Assembler::fprem1() {
4250  emit_int8((unsigned char)0xD9);
4251  emit_int8((unsigned char)0xF5);
4252}
4253
4254void Assembler::frstor(Address src) {
4255  InstructionMark im(this);
4256  emit_int8((unsigned char)0xDD);
4257  emit_operand32(rsp, src);
4258}
4259
4260void Assembler::fsin() {
4261  emit_int8((unsigned char)0xD9);
4262  emit_int8((unsigned char)0xFE);
4263}
4264
4265void Assembler::fsqrt() {
4266  emit_int8((unsigned char)0xD9);
4267  emit_int8((unsigned char)0xFA);
4268}
4269
4270void Assembler::fst_d(Address adr) {
4271  InstructionMark im(this);
4272  emit_int8((unsigned char)0xDD);
4273  emit_operand32(rdx, adr);
4274}
4275
4276void Assembler::fst_s(Address adr) {
4277  InstructionMark im(this);
4278  emit_int8((unsigned char)0xD9);
4279  emit_operand32(rdx, adr);
4280}
4281
4282void Assembler::fstp_d(Address adr) {
4283  InstructionMark im(this);
4284  emit_int8((unsigned char)0xDD);
4285  emit_operand32(rbx, adr);
4286}
4287
4288void Assembler::fstp_d(int index) {
4289  emit_farith(0xDD, 0xD8, index);
4290}
4291
4292void Assembler::fstp_s(Address adr) {
4293  InstructionMark im(this);
4294  emit_int8((unsigned char)0xD9);
4295  emit_operand32(rbx, adr);
4296}
4297
4298void Assembler::fstp_x(Address adr) {
4299  InstructionMark im(this);
4300  emit_int8((unsigned char)0xDB);
4301  emit_operand32(rdi, adr);
4302}
4303
4304void Assembler::fsub(int i) {
4305  emit_farith(0xD8, 0xE0, i);
4306}
4307
4308void Assembler::fsub_d(Address src) {
4309  InstructionMark im(this);
4310  emit_int8((unsigned char)0xDC);
4311  emit_operand32(rsp, src);
4312}
4313
4314void Assembler::fsub_s(Address src) {
4315  InstructionMark im(this);
4316  emit_int8((unsigned char)0xD8);
4317  emit_operand32(rsp, src);
4318}
4319
4320void Assembler::fsuba(int i) {
4321  emit_farith(0xDC, 0xE8, i);
4322}
4323
4324void Assembler::fsubp(int i) {
4325  emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
4326}
4327
4328void Assembler::fsubr(int i) {
4329  emit_farith(0xD8, 0xE8, i);
4330}
4331
4332void Assembler::fsubr_d(Address src) {
4333  InstructionMark im(this);
4334  emit_int8((unsigned char)0xDC);
4335  emit_operand32(rbp, src);
4336}
4337
4338void Assembler::fsubr_s(Address src) {
4339  InstructionMark im(this);
4340  emit_int8((unsigned char)0xD8);
4341  emit_operand32(rbp, src);
4342}
4343
4344void Assembler::fsubra(int i) {
4345  emit_farith(0xDC, 0xE0, i);
4346}
4347
4348void Assembler::fsubrp(int i) {
4349  emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
4350}
4351
4352void Assembler::ftan() {
4353  emit_int8((unsigned char)0xD9);
4354  emit_int8((unsigned char)0xF2);
4355  emit_int8((unsigned char)0xDD);
4356  emit_int8((unsigned char)0xD8);
4357}
4358
4359void Assembler::ftst() {
4360  emit_int8((unsigned char)0xD9);
4361  emit_int8((unsigned char)0xE4);
4362}
4363
4364void Assembler::fucomi(int i) {
4365  // make sure the instruction is supported (introduced for P6, together with cmov)
4366  guarantee(VM_Version::supports_cmov(), "illegal instruction");
4367  emit_farith(0xDB, 0xE8, i);
4368}
4369
4370void Assembler::fucomip(int i) {
4371  // make sure the instruction is supported (introduced for P6, together with cmov)
4372  guarantee(VM_Version::supports_cmov(), "illegal instruction");
4373  emit_farith(0xDF, 0xE8, i);
4374}
4375
4376void Assembler::fwait() {
4377  emit_int8((unsigned char)0x9B);
4378}
4379
4380void Assembler::fxch(int i) {
4381  emit_farith(0xD9, 0xC8, i);
4382}
4383
4384void Assembler::fyl2x() {
4385  emit_int8((unsigned char)0xD9);
4386  emit_int8((unsigned char)0xF1);
4387}
4388
4389void Assembler::frndint() {
4390  emit_int8((unsigned char)0xD9);
4391  emit_int8((unsigned char)0xFC);
4392}
4393
4394void Assembler::f2xm1() {
4395  emit_int8((unsigned char)0xD9);
4396  emit_int8((unsigned char)0xF0);
4397}
4398
4399void Assembler::fldl2e() {
4400  emit_int8((unsigned char)0xD9);
4401  emit_int8((unsigned char)0xEA);
4402}
4403
4404// SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
4405static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
4406// SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
4407static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
4408
4409// Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
4410void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
4411  if (pre > 0) {
4412    emit_int8(simd_pre[pre]);
4413  }
4414  if (rex_w) {
4415    prefixq(adr, xreg);
4416  } else {
4417    prefix(adr, xreg);
4418  }
4419  if (opc > 0) {
4420    emit_int8(0x0F);
4421    int opc2 = simd_opc[opc];
4422    if (opc2 > 0) {
4423      emit_int8(opc2);
4424    }
4425  }
4426}
4427
4428int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
4429  if (pre > 0) {
4430    emit_int8(simd_pre[pre]);
4431  }
4432  int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
4433                          prefix_and_encode(dst_enc, src_enc);
4434  if (opc > 0) {
4435    emit_int8(0x0F);
4436    int opc2 = simd_opc[opc];
4437    if (opc2 > 0) {
4438      emit_int8(opc2);
4439    }
4440  }
4441  return encode;
4442}
4443
4444
4445void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) {
4446  if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
4447    prefix(VEX_3bytes);
4448
4449    int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
4450    byte1 = (~byte1) & 0xE0;
4451    byte1 |= opc;
4452    emit_int8(byte1);
4453
4454    int byte2 = ((~nds_enc) & 0xf) << 3;
4455    byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
4456    emit_int8(byte2);
4457  } else {
4458    prefix(VEX_2bytes);
4459
4460    int byte1 = vex_r ? VEX_R : 0;
4461    byte1 = (~byte1) & 0x80;
4462    byte1 |= ((~nds_enc) & 0xf) << 3;
4463    byte1 |= (vector256 ? 4 : 0) | pre;
4464    emit_int8(byte1);
4465  }
4466}
4467
4468void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){
4469  bool vex_r = (xreg_enc >= 8);
4470  bool vex_b = adr.base_needs_rex();
4471  bool vex_x = adr.index_needs_rex();
4472  vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
4473}
4474
4475int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) {
4476  bool vex_r = (dst_enc >= 8);
4477  bool vex_b = (src_enc >= 8);
4478  bool vex_x = false;
4479  vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
4480  return (((dst_enc & 7) << 3) | (src_enc & 7));
4481}
4482
4483
4484void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
4485  if (UseAVX > 0) {
4486    int xreg_enc = xreg->encoding();
4487    int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
4488    vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256);
4489  } else {
4490    assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
4491    rex_prefix(adr, xreg, pre, opc, rex_w);
4492  }
4493}
4494
4495int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
4496  int dst_enc = dst->encoding();
4497  int src_enc = src->encoding();
4498  if (UseAVX > 0) {
4499    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4500    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256);
4501  } else {
4502    assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
4503    return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
4504  }
4505}
4506
4507void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
4508  InstructionMark im(this);
4509  simd_prefix(dst, dst, src, pre);
4510  emit_int8(opcode);
4511  emit_operand(dst, src);
4512}
4513
4514void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
4515  int encode = simd_prefix_and_encode(dst, dst, src, pre);
4516  emit_int8(opcode);
4517  emit_int8((unsigned char)(0xC0 | encode));
4518}
4519
4520// Versions with no second source register (non-destructive source).
4521void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
4522  InstructionMark im(this);
4523  simd_prefix(dst, xnoreg, src, pre);
4524  emit_int8(opcode);
4525  emit_operand(dst, src);
4526}
4527
4528void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
4529  int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);
4530  emit_int8(opcode);
4531  emit_int8((unsigned char)(0xC0 | encode));
4532}
4533
4534// 3-operands AVX instructions
4535void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
4536                               Address src, VexSimdPrefix pre, bool vector256) {
4537  InstructionMark im(this);
4538  vex_prefix(dst, nds, src, pre, vector256);
4539  emit_int8(opcode);
4540  emit_operand(dst, src);
4541}
4542
4543void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
4544                               XMMRegister src, VexSimdPrefix pre, bool vector256) {
4545  int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);
4546  emit_int8(opcode);
4547  emit_int8((unsigned char)(0xC0 | encode));
4548}
4549
4550#ifndef _LP64
4551
4552void Assembler::incl(Register dst) {
4553  // Don't use it directly. Use MacroAssembler::incrementl() instead.
4554  emit_int8(0x40 | dst->encoding());
4555}
4556
4557void Assembler::lea(Register dst, Address src) {
4558  leal(dst, src);
4559}
4560
4561void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
4562  InstructionMark im(this);
4563  emit_int8((unsigned char)0xC7);
4564  emit_operand(rax, dst);
4565  emit_data((int)imm32, rspec, 0);
4566}
4567
4568void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
4569  InstructionMark im(this);
4570  int encode = prefix_and_encode(dst->encoding());
4571  emit_int8((unsigned char)(0xB8 | encode));
4572  emit_data((int)imm32, rspec, 0);
4573}
4574
4575void Assembler::popa() { // 32bit
4576  emit_int8(0x61);
4577}
4578
4579void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
4580  InstructionMark im(this);
4581  emit_int8(0x68);
4582  emit_data(imm32, rspec, 0);
4583}
4584
4585void Assembler::pusha() { // 32bit
4586  emit_int8(0x60);
4587}
4588
4589void Assembler::set_byte_if_not_zero(Register dst) {
4590  emit_int8(0x0F);
4591  emit_int8((unsigned char)0x95);
4592  emit_int8((unsigned char)(0xE0 | dst->encoding()));
4593}
4594
4595void Assembler::shldl(Register dst, Register src) {
4596  emit_int8(0x0F);
4597  emit_int8((unsigned char)0xA5);
4598  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
4599}
4600
4601void Assembler::shrdl(Register dst, Register src) {
4602  emit_int8(0x0F);
4603  emit_int8((unsigned char)0xAD);
4604  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
4605}
4606
4607#else // LP64
4608
4609void Assembler::set_byte_if_not_zero(Register dst) {
4610  int enc = prefix_and_encode(dst->encoding(), true);
4611  emit_int8(0x0F);
4612  emit_int8((unsigned char)0x95);
4613  emit_int8((unsigned char)(0xE0 | enc));
4614}
4615
4616// 64bit only pieces of the assembler
4617// This should only be used by 64bit instructions that can use rip-relative
4618// it cannot be used by instructions that want an immediate value.
4619
4620bool Assembler::reachable(AddressLiteral adr) {
4621  int64_t disp;
4622  // None will force a 64bit literal to the code stream. Likely a placeholder
4623  // for something that will be patched later and we need to certain it will
4624  // always be reachable.
4625  if (adr.reloc() == relocInfo::none) {
4626    return false;
4627  }
4628  if (adr.reloc() == relocInfo::internal_word_type) {
4629    // This should be rip relative and easily reachable.
4630    return true;
4631  }
4632  if (adr.reloc() == relocInfo::virtual_call_type ||
4633      adr.reloc() == relocInfo::opt_virtual_call_type ||
4634      adr.reloc() == relocInfo::static_call_type ||
4635      adr.reloc() == relocInfo::static_stub_type ) {
4636    // This should be rip relative within the code cache and easily
4637    // reachable until we get huge code caches. (At which point
4638    // ic code is going to have issues).
4639    return true;
4640  }
4641  if (adr.reloc() != relocInfo::external_word_type &&
4642      adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
4643      adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
4644      adr.reloc() != relocInfo::runtime_call_type ) {
4645    return false;
4646  }
4647
4648  // Stress the correction code
4649  if (ForceUnreachable) {
4650    // Must be runtimecall reloc, see if it is in the codecache
4651    // Flipping stuff in the codecache to be unreachable causes issues
4652    // with things like inline caches where the additional instructions
4653    // are not handled.
4654    if (CodeCache::find_blob(adr._target) == NULL) {
4655      return false;
4656    }
4657  }
4658  // For external_word_type/runtime_call_type if it is reachable from where we
4659  // are now (possibly a temp buffer) and where we might end up
4660  // anywhere in the codeCache then we are always reachable.
4661  // This would have to change if we ever save/restore shared code
4662  // to be more pessimistic.
4663  disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
4664  if (!is_simm32(disp)) return false;
4665  disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
4666  if (!is_simm32(disp)) return false;
4667
4668  disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
4669
4670  // Because rip relative is a disp + address_of_next_instruction and we
4671  // don't know the value of address_of_next_instruction we apply a fudge factor
4672  // to make sure we will be ok no matter the size of the instruction we get placed into.
4673  // We don't have to fudge the checks above here because they are already worst case.
4674
4675  // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
4676  // + 4 because better safe than sorry.
4677  const int fudge = 12 + 4;
4678  if (disp < 0) {
4679    disp -= fudge;
4680  } else {
4681    disp += fudge;
4682  }
4683  return is_simm32(disp);
4684}
4685
4686// Check if the polling page is not reachable from the code cache using rip-relative
4687// addressing.
4688bool Assembler::is_polling_page_far() {
4689  intptr_t addr = (intptr_t)os::get_polling_page();
4690  return ForceUnreachable ||
4691         !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
4692         !is_simm32(addr - (intptr_t)CodeCache::high_bound());
4693}
4694
4695void Assembler::emit_data64(jlong data,
4696                            relocInfo::relocType rtype,
4697                            int format) {
4698  if (rtype == relocInfo::none) {
4699    emit_int64(data);
4700  } else {
4701    emit_data64(data, Relocation::spec_simple(rtype), format);
4702  }
4703}
4704
4705void Assembler::emit_data64(jlong data,
4706                            RelocationHolder const& rspec,
4707                            int format) {
4708  assert(imm_operand == 0, "default format must be immediate in this file");
4709  assert(imm_operand == format, "must be immediate");
4710  assert(inst_mark() != NULL, "must be inside InstructionMark");
4711  // Do not use AbstractAssembler::relocate, which is not intended for
4712  // embedded words.  Instead, relocate to the enclosing instruction.
4713  code_section()->relocate(inst_mark(), rspec, format);
4714#ifdef ASSERT
4715  check_relocation(rspec, format);
4716#endif
4717  emit_int64(data);
4718}
4719
4720int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
4721  if (reg_enc >= 8) {
4722    prefix(REX_B);
4723    reg_enc -= 8;
4724  } else if (byteinst && reg_enc >= 4) {
4725    prefix(REX);
4726  }
4727  return reg_enc;
4728}
4729
4730int Assembler::prefixq_and_encode(int reg_enc) {
4731  if (reg_enc < 8) {
4732    prefix(REX_W);
4733  } else {
4734    prefix(REX_WB);
4735    reg_enc -= 8;
4736  }
4737  return reg_enc;
4738}
4739
4740int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
4741  if (dst_enc < 8) {
4742    if (src_enc >= 8) {
4743      prefix(REX_B);
4744      src_enc -= 8;
4745    } else if (byteinst && src_enc >= 4) {
4746      prefix(REX);
4747    }
4748  } else {
4749    if (src_enc < 8) {
4750      prefix(REX_R);
4751    } else {
4752      prefix(REX_RB);
4753      src_enc -= 8;
4754    }
4755    dst_enc -= 8;
4756  }
4757  return dst_enc << 3 | src_enc;
4758}
4759
4760int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
4761  if (dst_enc < 8) {
4762    if (src_enc < 8) {
4763      prefix(REX_W);
4764    } else {
4765      prefix(REX_WB);
4766      src_enc -= 8;
4767    }
4768  } else {
4769    if (src_enc < 8) {
4770      prefix(REX_WR);
4771    } else {
4772      prefix(REX_WRB);
4773      src_enc -= 8;
4774    }
4775    dst_enc -= 8;
4776  }
4777  return dst_enc << 3 | src_enc;
4778}
4779
4780void Assembler::prefix(Register reg) {
4781  if (reg->encoding() >= 8) {
4782    prefix(REX_B);
4783  }
4784}
4785
4786void Assembler::prefix(Address adr) {
4787  if (adr.base_needs_rex()) {
4788    if (adr.index_needs_rex()) {
4789      prefix(REX_XB);
4790    } else {
4791      prefix(REX_B);
4792    }
4793  } else {
4794    if (adr.index_needs_rex()) {
4795      prefix(REX_X);
4796    }
4797  }
4798}
4799
4800void Assembler::prefixq(Address adr) {
4801  if (adr.base_needs_rex()) {
4802    if (adr.index_needs_rex()) {
4803      prefix(REX_WXB);
4804    } else {
4805      prefix(REX_WB);
4806    }
4807  } else {
4808    if (adr.index_needs_rex()) {
4809      prefix(REX_WX);
4810    } else {
4811      prefix(REX_W);
4812    }
4813  }
4814}
4815
4816
4817void Assembler::prefix(Address adr, Register reg, bool byteinst) {
4818  if (reg->encoding() < 8) {
4819    if (adr.base_needs_rex()) {
4820      if (adr.index_needs_rex()) {
4821        prefix(REX_XB);
4822      } else {
4823        prefix(REX_B);
4824      }
4825    } else {
4826      if (adr.index_needs_rex()) {
4827        prefix(REX_X);
4828      } else if (byteinst && reg->encoding() >= 4 ) {
4829        prefix(REX);
4830      }
4831    }
4832  } else {
4833    if (adr.base_needs_rex()) {
4834      if (adr.index_needs_rex()) {
4835        prefix(REX_RXB);
4836      } else {
4837        prefix(REX_RB);
4838      }
4839    } else {
4840      if (adr.index_needs_rex()) {
4841        prefix(REX_RX);
4842      } else {
4843        prefix(REX_R);
4844      }
4845    }
4846  }
4847}
4848
4849void Assembler::prefixq(Address adr, Register src) {
4850  if (src->encoding() < 8) {
4851    if (adr.base_needs_rex()) {
4852      if (adr.index_needs_rex()) {
4853        prefix(REX_WXB);
4854      } else {
4855        prefix(REX_WB);
4856      }
4857    } else {
4858      if (adr.index_needs_rex()) {
4859        prefix(REX_WX);
4860      } else {
4861        prefix(REX_W);
4862      }
4863    }
4864  } else {
4865    if (adr.base_needs_rex()) {
4866      if (adr.index_needs_rex()) {
4867        prefix(REX_WRXB);
4868      } else {
4869        prefix(REX_WRB);
4870      }
4871    } else {
4872      if (adr.index_needs_rex()) {
4873        prefix(REX_WRX);
4874      } else {
4875        prefix(REX_WR);
4876      }
4877    }
4878  }
4879}
4880
4881void Assembler::prefix(Address adr, XMMRegister reg) {
4882  if (reg->encoding() < 8) {
4883    if (adr.base_needs_rex()) {
4884      if (adr.index_needs_rex()) {
4885        prefix(REX_XB);
4886      } else {
4887        prefix(REX_B);
4888      }
4889    } else {
4890      if (adr.index_needs_rex()) {
4891        prefix(REX_X);
4892      }
4893    }
4894  } else {
4895    if (adr.base_needs_rex()) {
4896      if (adr.index_needs_rex()) {
4897        prefix(REX_RXB);
4898      } else {
4899        prefix(REX_RB);
4900      }
4901    } else {
4902      if (adr.index_needs_rex()) {
4903        prefix(REX_RX);
4904      } else {
4905        prefix(REX_R);
4906      }
4907    }
4908  }
4909}
4910
4911void Assembler::prefixq(Address adr, XMMRegister src) {
4912  if (src->encoding() < 8) {
4913    if (adr.base_needs_rex()) {
4914      if (adr.index_needs_rex()) {
4915        prefix(REX_WXB);
4916      } else {
4917        prefix(REX_WB);
4918      }
4919    } else {
4920      if (adr.index_needs_rex()) {
4921        prefix(REX_WX);
4922      } else {
4923        prefix(REX_W);
4924      }
4925    }
4926  } else {
4927    if (adr.base_needs_rex()) {
4928      if (adr.index_needs_rex()) {
4929        prefix(REX_WRXB);
4930      } else {
4931        prefix(REX_WRB);
4932      }
4933    } else {
4934      if (adr.index_needs_rex()) {
4935        prefix(REX_WRX);
4936      } else {
4937        prefix(REX_WR);
4938      }
4939    }
4940  }
4941}
4942
4943void Assembler::adcq(Register dst, int32_t imm32) {
4944  (void) prefixq_and_encode(dst->encoding());
4945  emit_arith(0x81, 0xD0, dst, imm32);
4946}
4947
4948void Assembler::adcq(Register dst, Address src) {
4949  InstructionMark im(this);
4950  prefixq(src, dst);
4951  emit_int8(0x13);
4952  emit_operand(dst, src);
4953}
4954
4955void Assembler::adcq(Register dst, Register src) {
4956  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4957  emit_arith(0x13, 0xC0, dst, src);
4958}
4959
4960void Assembler::addq(Address dst, int32_t imm32) {
4961  InstructionMark im(this);
4962  prefixq(dst);
4963  emit_arith_operand(0x81, rax, dst,imm32);
4964}
4965
4966void Assembler::addq(Address dst, Register src) {
4967  InstructionMark im(this);
4968  prefixq(dst, src);
4969  emit_int8(0x01);
4970  emit_operand(src, dst);
4971}
4972
4973void Assembler::addq(Register dst, int32_t imm32) {
4974  (void) prefixq_and_encode(dst->encoding());
4975  emit_arith(0x81, 0xC0, dst, imm32);
4976}
4977
4978void Assembler::addq(Register dst, Address src) {
4979  InstructionMark im(this);
4980  prefixq(src, dst);
4981  emit_int8(0x03);
4982  emit_operand(dst, src);
4983}
4984
4985void Assembler::addq(Register dst, Register src) {
4986  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4987  emit_arith(0x03, 0xC0, dst, src);
4988}
4989
4990void Assembler::adcxq(Register dst, Register src) {
4991  //assert(VM_Version::supports_adx(), "adx instructions not supported");
4992  emit_int8((unsigned char)0x66);
4993  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4994  emit_int8(0x0F);
4995  emit_int8(0x38);
4996  emit_int8((unsigned char)0xF6);
4997  emit_int8((unsigned char)(0xC0 | encode));
4998}
4999
5000void Assembler::adoxq(Register dst, Register src) {
5001  //assert(VM_Version::supports_adx(), "adx instructions not supported");
5002  emit_int8((unsigned char)0xF3);
5003  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5004  emit_int8(0x0F);
5005  emit_int8(0x38);
5006  emit_int8((unsigned char)0xF6);
5007  emit_int8((unsigned char)(0xC0 | encode));
5008}
5009
5010void Assembler::andq(Address dst, int32_t imm32) {
5011  InstructionMark im(this);
5012  prefixq(dst);
5013  emit_int8((unsigned char)0x81);
5014  emit_operand(rsp, dst, 4);
5015  emit_int32(imm32);
5016}
5017
5018void Assembler::andq(Register dst, int32_t imm32) {
5019  (void) prefixq_and_encode(dst->encoding());
5020  emit_arith(0x81, 0xE0, dst, imm32);
5021}
5022
5023void Assembler::andq(Register dst, Address src) {
5024  InstructionMark im(this);
5025  prefixq(src, dst);
5026  emit_int8(0x23);
5027  emit_operand(dst, src);
5028}
5029
5030void Assembler::andq(Register dst, Register src) {
5031  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5032  emit_arith(0x23, 0xC0, dst, src);
5033}
5034
5035void Assembler::andnq(Register dst, Register src1, Register src2) {
5036  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5037  int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2);
5038  emit_int8((unsigned char)0xF2);
5039  emit_int8((unsigned char)(0xC0 | encode));
5040}
5041
5042void Assembler::andnq(Register dst, Register src1, Address src2) {
5043  InstructionMark im(this);
5044  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5045  vex_prefix_0F38_q(dst, src1, src2);
5046  emit_int8((unsigned char)0xF2);
5047  emit_operand(dst, src2);
5048}
5049
5050void Assembler::bsfq(Register dst, Register src) {
5051  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5052  emit_int8(0x0F);
5053  emit_int8((unsigned char)0xBC);
5054  emit_int8((unsigned char)(0xC0 | encode));
5055}
5056
5057void Assembler::bsrq(Register dst, Register src) {
5058  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5059  emit_int8(0x0F);
5060  emit_int8((unsigned char)0xBD);
5061  emit_int8((unsigned char)(0xC0 | encode));
5062}
5063
5064void Assembler::bswapq(Register reg) {
5065  int encode = prefixq_and_encode(reg->encoding());
5066  emit_int8(0x0F);
5067  emit_int8((unsigned char)(0xC8 | encode));
5068}
5069
5070void Assembler::blsiq(Register dst, Register src) {
5071  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5072  int encode = vex_prefix_0F38_and_encode_q(rbx, dst, src);
5073  emit_int8((unsigned char)0xF3);
5074  emit_int8((unsigned char)(0xC0 | encode));
5075}
5076
5077void Assembler::blsiq(Register dst, Address src) {
5078  InstructionMark im(this);
5079  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5080  vex_prefix_0F38_q(rbx, dst, src);
5081  emit_int8((unsigned char)0xF3);
5082  emit_operand(rbx, src);
5083}
5084
5085void Assembler::blsmskq(Register dst, Register src) {
5086  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5087  int encode = vex_prefix_0F38_and_encode_q(rdx, dst, src);
5088  emit_int8((unsigned char)0xF3);
5089  emit_int8((unsigned char)(0xC0 | encode));
5090}
5091
5092void Assembler::blsmskq(Register dst, Address src) {
5093  InstructionMark im(this);
5094  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5095  vex_prefix_0F38_q(rdx, dst, src);
5096  emit_int8((unsigned char)0xF3);
5097  emit_operand(rdx, src);
5098}
5099
5100void Assembler::blsrq(Register dst, Register src) {
5101  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5102  int encode = vex_prefix_0F38_and_encode_q(rcx, dst, src);
5103  emit_int8((unsigned char)0xF3);
5104  emit_int8((unsigned char)(0xC0 | encode));
5105}
5106
5107void Assembler::blsrq(Register dst, Address src) {
5108  InstructionMark im(this);
5109  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5110  vex_prefix_0F38_q(rcx, dst, src);
5111  emit_int8((unsigned char)0xF3);
5112  emit_operand(rcx, src);
5113}
5114
5115void Assembler::cdqq() {
5116  prefix(REX_W);
5117  emit_int8((unsigned char)0x99);
5118}
5119
5120void Assembler::clflush(Address adr) {
5121  prefix(adr);
5122  emit_int8(0x0F);
5123  emit_int8((unsigned char)0xAE);
5124  emit_operand(rdi, adr);
5125}
5126
5127void Assembler::cmovq(Condition cc, Register dst, Register src) {
5128  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5129  emit_int8(0x0F);
5130  emit_int8(0x40 | cc);
5131  emit_int8((unsigned char)(0xC0 | encode));
5132}
5133
5134void Assembler::cmovq(Condition cc, Register dst, Address src) {
5135  InstructionMark im(this);
5136  prefixq(src, dst);
5137  emit_int8(0x0F);
5138  emit_int8(0x40 | cc);
5139  emit_operand(dst, src);
5140}
5141
5142void Assembler::cmpq(Address dst, int32_t imm32) {
5143  InstructionMark im(this);
5144  prefixq(dst);
5145  emit_int8((unsigned char)0x81);
5146  emit_operand(rdi, dst, 4);
5147  emit_int32(imm32);
5148}
5149
5150void Assembler::cmpq(Register dst, int32_t imm32) {
5151  (void) prefixq_and_encode(dst->encoding());
5152  emit_arith(0x81, 0xF8, dst, imm32);
5153}
5154
5155void Assembler::cmpq(Address dst, Register src) {
5156  InstructionMark im(this);
5157  prefixq(dst, src);
5158  emit_int8(0x3B);
5159  emit_operand(src, dst);
5160}
5161
5162void Assembler::cmpq(Register dst, Register src) {
5163  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5164  emit_arith(0x3B, 0xC0, dst, src);
5165}
5166
5167void Assembler::cmpq(Register dst, Address  src) {
5168  InstructionMark im(this);
5169  prefixq(src, dst);
5170  emit_int8(0x3B);
5171  emit_operand(dst, src);
5172}
5173
5174void Assembler::cmpxchgq(Register reg, Address adr) {
5175  InstructionMark im(this);
5176  prefixq(adr, reg);
5177  emit_int8(0x0F);
5178  emit_int8((unsigned char)0xB1);
5179  emit_operand(reg, adr);
5180}
5181
5182void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
5183  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5184  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
5185  emit_int8(0x2A);
5186  emit_int8((unsigned char)(0xC0 | encode));
5187}
5188
5189void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
5190  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5191  InstructionMark im(this);
5192  simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
5193  emit_int8(0x2A);
5194  emit_operand(dst, src);
5195}
5196
5197void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
5198  NOT_LP64(assert(VM_Version::supports_sse(), ""));
5199  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
5200  emit_int8(0x2A);
5201  emit_int8((unsigned char)(0xC0 | encode));
5202}
5203
5204void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
5205  NOT_LP64(assert(VM_Version::supports_sse(), ""));
5206  InstructionMark im(this);
5207  simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
5208  emit_int8(0x2A);
5209  emit_operand(dst, src);
5210}
5211
5212void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
5213  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5214  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
5215  emit_int8(0x2C);
5216  emit_int8((unsigned char)(0xC0 | encode));
5217}
5218
5219void Assembler::cvttss2siq(Register dst, XMMRegister src) {
5220  NOT_LP64(assert(VM_Version::supports_sse(), ""));
5221  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
5222  emit_int8(0x2C);
5223  emit_int8((unsigned char)(0xC0 | encode));
5224}
5225
5226void Assembler::decl(Register dst) {
5227  // Don't use it directly. Use MacroAssembler::decrementl() instead.
5228  // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
5229  int encode = prefix_and_encode(dst->encoding());
5230  emit_int8((unsigned char)0xFF);
5231  emit_int8((unsigned char)(0xC8 | encode));
5232}
5233
5234void Assembler::decq(Register dst) {
5235  // Don't use it directly. Use MacroAssembler::decrementq() instead.
5236  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
5237  int encode = prefixq_and_encode(dst->encoding());
5238  emit_int8((unsigned char)0xFF);
5239  emit_int8(0xC8 | encode);
5240}
5241
5242void Assembler::decq(Address dst) {
5243  // Don't use it directly. Use MacroAssembler::decrementq() instead.
5244  InstructionMark im(this);
5245  prefixq(dst);
5246  emit_int8((unsigned char)0xFF);
5247  emit_operand(rcx, dst);
5248}
5249
5250void Assembler::fxrstor(Address src) {
5251  prefixq(src);
5252  emit_int8(0x0F);
5253  emit_int8((unsigned char)0xAE);
5254  emit_operand(as_Register(1), src);
5255}
5256
5257void Assembler::fxsave(Address dst) {
5258  prefixq(dst);
5259  emit_int8(0x0F);
5260  emit_int8((unsigned char)0xAE);
5261  emit_operand(as_Register(0), dst);
5262}
5263
5264void Assembler::idivq(Register src) {
5265  int encode = prefixq_and_encode(src->encoding());
5266  emit_int8((unsigned char)0xF7);
5267  emit_int8((unsigned char)(0xF8 | encode));
5268}
5269
5270void Assembler::imulq(Register dst, Register src) {
5271  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5272  emit_int8(0x0F);
5273  emit_int8((unsigned char)0xAF);
5274  emit_int8((unsigned char)(0xC0 | encode));
5275}
5276
5277void Assembler::imulq(Register dst, Register src, int value) {
5278  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5279  if (is8bit(value)) {
5280    emit_int8(0x6B);
5281    emit_int8((unsigned char)(0xC0 | encode));
5282    emit_int8(value & 0xFF);
5283  } else {
5284    emit_int8(0x69);
5285    emit_int8((unsigned char)(0xC0 | encode));
5286    emit_int32(value);
5287  }
5288}
5289
5290void Assembler::imulq(Register dst, Address src) {
5291  InstructionMark im(this);
5292  prefixq(src, dst);
5293  emit_int8(0x0F);
5294  emit_int8((unsigned char) 0xAF);
5295  emit_operand(dst, src);
5296}
5297
5298void Assembler::incl(Register dst) {
5299  // Don't use it directly. Use MacroAssembler::incrementl() instead.
5300  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
5301  int encode = prefix_and_encode(dst->encoding());
5302  emit_int8((unsigned char)0xFF);
5303  emit_int8((unsigned char)(0xC0 | encode));
5304}
5305
5306void Assembler::incq(Register dst) {
5307  // Don't use it directly. Use MacroAssembler::incrementq() instead.
5308  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
5309  int encode = prefixq_and_encode(dst->encoding());
5310  emit_int8((unsigned char)0xFF);
5311  emit_int8((unsigned char)(0xC0 | encode));
5312}
5313
5314void Assembler::incq(Address dst) {
5315  // Don't use it directly. Use MacroAssembler::incrementq() instead.
5316  InstructionMark im(this);
5317  prefixq(dst);
5318  emit_int8((unsigned char)0xFF);
5319  emit_operand(rax, dst);
5320}
5321
5322void Assembler::lea(Register dst, Address src) {
5323  leaq(dst, src);
5324}
5325
5326void Assembler::leaq(Register dst, Address src) {
5327  InstructionMark im(this);
5328  prefixq(src, dst);
5329  emit_int8((unsigned char)0x8D);
5330  emit_operand(dst, src);
5331}
5332
5333void Assembler::mov64(Register dst, int64_t imm64) {
5334  InstructionMark im(this);
5335  int encode = prefixq_and_encode(dst->encoding());
5336  emit_int8((unsigned char)(0xB8 | encode));
5337  emit_int64(imm64);
5338}
5339
5340void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
5341  InstructionMark im(this);
5342  int encode = prefixq_and_encode(dst->encoding());
5343  emit_int8(0xB8 | encode);
5344  emit_data64(imm64, rspec);
5345}
5346
5347void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
5348  InstructionMark im(this);
5349  int encode = prefix_and_encode(dst->encoding());
5350  emit_int8((unsigned char)(0xB8 | encode));
5351  emit_data((int)imm32, rspec, narrow_oop_operand);
5352}
5353
5354void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
5355  InstructionMark im(this);
5356  prefix(dst);
5357  emit_int8((unsigned char)0xC7);
5358  emit_operand(rax, dst, 4);
5359  emit_data((int)imm32, rspec, narrow_oop_operand);
5360}
5361
5362void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
5363  InstructionMark im(this);
5364  int encode = prefix_and_encode(src1->encoding());
5365  emit_int8((unsigned char)0x81);
5366  emit_int8((unsigned char)(0xF8 | encode));
5367  emit_data((int)imm32, rspec, narrow_oop_operand);
5368}
5369
5370void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
5371  InstructionMark im(this);
5372  prefix(src1);
5373  emit_int8((unsigned char)0x81);
5374  emit_operand(rax, src1, 4);
5375  emit_data((int)imm32, rspec, narrow_oop_operand);
5376}
5377
5378void Assembler::lzcntq(Register dst, Register src) {
5379  assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
5380  emit_int8((unsigned char)0xF3);
5381  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5382  emit_int8(0x0F);
5383  emit_int8((unsigned char)0xBD);
5384  emit_int8((unsigned char)(0xC0 | encode));
5385}
5386
5387void Assembler::movdq(XMMRegister dst, Register src) {
5388  // table D-1 says MMX/SSE2
5389  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5390  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
5391  emit_int8(0x6E);
5392  emit_int8((unsigned char)(0xC0 | encode));
5393}
5394
5395void Assembler::movdq(Register dst, XMMRegister src) {
5396  // table D-1 says MMX/SSE2
5397  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5398  // swap src/dst to get correct prefix
5399  int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
5400  emit_int8(0x7E);
5401  emit_int8((unsigned char)(0xC0 | encode));
5402}
5403
5404void Assembler::movq(Register dst, Register src) {
5405  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5406  emit_int8((unsigned char)0x8B);
5407  emit_int8((unsigned char)(0xC0 | encode));
5408}
5409
5410void Assembler::movq(Register dst, Address src) {
5411  InstructionMark im(this);
5412  prefixq(src, dst);
5413  emit_int8((unsigned char)0x8B);
5414  emit_operand(dst, src);
5415}
5416
5417void Assembler::movq(Address dst, Register src) {
5418  InstructionMark im(this);
5419  prefixq(dst, src);
5420  emit_int8((unsigned char)0x89);
5421  emit_operand(src, dst);
5422}
5423
5424void Assembler::movsbq(Register dst, Address src) {
5425  InstructionMark im(this);
5426  prefixq(src, dst);
5427  emit_int8(0x0F);
5428  emit_int8((unsigned char)0xBE);
5429  emit_operand(dst, src);
5430}
5431
5432void Assembler::movsbq(Register dst, Register src) {
5433  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5434  emit_int8(0x0F);
5435  emit_int8((unsigned char)0xBE);
5436  emit_int8((unsigned char)(0xC0 | encode));
5437}
5438
5439void Assembler::movslq(Register dst, int32_t imm32) {
5440  // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
5441  // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
5442  // as a result we shouldn't use until tested at runtime...
5443  ShouldNotReachHere();
5444  InstructionMark im(this);
5445  int encode = prefixq_and_encode(dst->encoding());
5446  emit_int8((unsigned char)(0xC7 | encode));
5447  emit_int32(imm32);
5448}
5449
5450void Assembler::movslq(Address dst, int32_t imm32) {
5451  assert(is_simm32(imm32), "lost bits");
5452  InstructionMark im(this);
5453  prefixq(dst);
5454  emit_int8((unsigned char)0xC7);
5455  emit_operand(rax, dst, 4);
5456  emit_int32(imm32);
5457}
5458
5459void Assembler::movslq(Register dst, Address src) {
5460  InstructionMark im(this);
5461  prefixq(src, dst);
5462  emit_int8(0x63);
5463  emit_operand(dst, src);
5464}
5465
5466void Assembler::movslq(Register dst, Register src) {
5467  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5468  emit_int8(0x63);
5469  emit_int8((unsigned char)(0xC0 | encode));
5470}
5471
5472void Assembler::movswq(Register dst, Address src) {
5473  InstructionMark im(this);
5474  prefixq(src, dst);
5475  emit_int8(0x0F);
5476  emit_int8((unsigned char)0xBF);
5477  emit_operand(dst, src);
5478}
5479
5480void Assembler::movswq(Register dst, Register src) {
5481  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5482  emit_int8((unsigned char)0x0F);
5483  emit_int8((unsigned char)0xBF);
5484  emit_int8((unsigned char)(0xC0 | encode));
5485}
5486
5487void Assembler::movzbq(Register dst, Address src) {
5488  InstructionMark im(this);
5489  prefixq(src, dst);
5490  emit_int8((unsigned char)0x0F);
5491  emit_int8((unsigned char)0xB6);
5492  emit_operand(dst, src);
5493}
5494
5495void Assembler::movzbq(Register dst, Register src) {
5496  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5497  emit_int8(0x0F);
5498  emit_int8((unsigned char)0xB6);
5499  emit_int8(0xC0 | encode);
5500}
5501
5502void Assembler::movzwq(Register dst, Address src) {
5503  InstructionMark im(this);
5504  prefixq(src, dst);
5505  emit_int8((unsigned char)0x0F);
5506  emit_int8((unsigned char)0xB7);
5507  emit_operand(dst, src);
5508}
5509
5510void Assembler::movzwq(Register dst, Register src) {
5511  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5512  emit_int8((unsigned char)0x0F);
5513  emit_int8((unsigned char)0xB7);
5514  emit_int8((unsigned char)(0xC0 | encode));
5515}
5516
5517void Assembler::mulq(Address src) {
5518  InstructionMark im(this);
5519  prefixq(src);
5520  emit_int8((unsigned char)0xF7);
5521  emit_operand(rsp, src);
5522}
5523
5524void Assembler::mulq(Register src) {
5525  int encode = prefixq_and_encode(src->encoding());
5526  emit_int8((unsigned char)0xF7);
5527  emit_int8((unsigned char)(0xE0 | encode));
5528}
5529
5530void Assembler::mulxq(Register dst1, Register dst2, Register src) {
5531  assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
5532  int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, true, false);
5533  emit_int8((unsigned char)0xF6);
5534  emit_int8((unsigned char)(0xC0 | encode));
5535}
5536
5537void Assembler::negq(Register dst) {
5538  int encode = prefixq_and_encode(dst->encoding());
5539  emit_int8((unsigned char)0xF7);
5540  emit_int8((unsigned char)(0xD8 | encode));
5541}
5542
5543void Assembler::notq(Register dst) {
5544  int encode = prefixq_and_encode(dst->encoding());
5545  emit_int8((unsigned char)0xF7);
5546  emit_int8((unsigned char)(0xD0 | encode));
5547}
5548
5549void Assembler::orq(Address dst, int32_t imm32) {
5550  InstructionMark im(this);
5551  prefixq(dst);
5552  emit_int8((unsigned char)0x81);
5553  emit_operand(rcx, dst, 4);
5554  emit_int32(imm32);
5555}
5556
5557void Assembler::orq(Register dst, int32_t imm32) {
5558  (void) prefixq_and_encode(dst->encoding());
5559  emit_arith(0x81, 0xC8, dst, imm32);
5560}
5561
5562void Assembler::orq(Register dst, Address src) {
5563  InstructionMark im(this);
5564  prefixq(src, dst);
5565  emit_int8(0x0B);
5566  emit_operand(dst, src);
5567}
5568
5569void Assembler::orq(Register dst, Register src) {
5570  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5571  emit_arith(0x0B, 0xC0, dst, src);
5572}
5573
5574void Assembler::popa() { // 64bit
5575  movq(r15, Address(rsp, 0));
5576  movq(r14, Address(rsp, wordSize));
5577  movq(r13, Address(rsp, 2 * wordSize));
5578  movq(r12, Address(rsp, 3 * wordSize));
5579  movq(r11, Address(rsp, 4 * wordSize));
5580  movq(r10, Address(rsp, 5 * wordSize));
5581  movq(r9,  Address(rsp, 6 * wordSize));
5582  movq(r8,  Address(rsp, 7 * wordSize));
5583  movq(rdi, Address(rsp, 8 * wordSize));
5584  movq(rsi, Address(rsp, 9 * wordSize));
5585  movq(rbp, Address(rsp, 10 * wordSize));
5586  // skip rsp
5587  movq(rbx, Address(rsp, 12 * wordSize));
5588  movq(rdx, Address(rsp, 13 * wordSize));
5589  movq(rcx, Address(rsp, 14 * wordSize));
5590  movq(rax, Address(rsp, 15 * wordSize));
5591
5592  addq(rsp, 16 * wordSize);
5593}
5594
5595void Assembler::popcntq(Register dst, Address src) {
5596  assert(VM_Version::supports_popcnt(), "must support");
5597  InstructionMark im(this);
5598  emit_int8((unsigned char)0xF3);
5599  prefixq(src, dst);
5600  emit_int8((unsigned char)0x0F);
5601  emit_int8((unsigned char)0xB8);
5602  emit_operand(dst, src);
5603}
5604
5605void Assembler::popcntq(Register dst, Register src) {
5606  assert(VM_Version::supports_popcnt(), "must support");
5607  emit_int8((unsigned char)0xF3);
5608  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5609  emit_int8((unsigned char)0x0F);
5610  emit_int8((unsigned char)0xB8);
5611  emit_int8((unsigned char)(0xC0 | encode));
5612}
5613
5614void Assembler::popq(Address dst) {
5615  InstructionMark im(this);
5616  prefixq(dst);
5617  emit_int8((unsigned char)0x8F);
5618  emit_operand(rax, dst);
5619}
5620
5621void Assembler::pusha() { // 64bit
5622  // we have to store original rsp.  ABI says that 128 bytes
5623  // below rsp are local scratch.
5624  movq(Address(rsp, -5 * wordSize), rsp);
5625
5626  subq(rsp, 16 * wordSize);
5627
5628  movq(Address(rsp, 15 * wordSize), rax);
5629  movq(Address(rsp, 14 * wordSize), rcx);
5630  movq(Address(rsp, 13 * wordSize), rdx);
5631  movq(Address(rsp, 12 * wordSize), rbx);
5632  // skip rsp
5633  movq(Address(rsp, 10 * wordSize), rbp);
5634  movq(Address(rsp, 9 * wordSize), rsi);
5635  movq(Address(rsp, 8 * wordSize), rdi);
5636  movq(Address(rsp, 7 * wordSize), r8);
5637  movq(Address(rsp, 6 * wordSize), r9);
5638  movq(Address(rsp, 5 * wordSize), r10);
5639  movq(Address(rsp, 4 * wordSize), r11);
5640  movq(Address(rsp, 3 * wordSize), r12);
5641  movq(Address(rsp, 2 * wordSize), r13);
5642  movq(Address(rsp, wordSize), r14);
5643  movq(Address(rsp, 0), r15);
5644}
5645
5646void Assembler::pushq(Address src) {
5647  InstructionMark im(this);
5648  prefixq(src);
5649  emit_int8((unsigned char)0xFF);
5650  emit_operand(rsi, src);
5651}
5652
5653void Assembler::rclq(Register dst, int imm8) {
5654  assert(isShiftCount(imm8 >> 1), "illegal shift count");
5655  int encode = prefixq_and_encode(dst->encoding());
5656  if (imm8 == 1) {
5657    emit_int8((unsigned char)0xD1);
5658    emit_int8((unsigned char)(0xD0 | encode));
5659  } else {
5660    emit_int8((unsigned char)0xC1);
5661    emit_int8((unsigned char)(0xD0 | encode));
5662    emit_int8(imm8);
5663  }
5664}
5665
5666void Assembler::rorq(Register dst, int imm8) {
5667  assert(isShiftCount(imm8 >> 1), "illegal shift count");
5668  int encode = prefixq_and_encode(dst->encoding());
5669  if (imm8 == 1) {
5670    emit_int8((unsigned char)0xD1);
5671    emit_int8((unsigned char)(0xC8 | encode));
5672  } else {
5673    emit_int8((unsigned char)0xC1);
5674    emit_int8((unsigned char)(0xc8 | encode));
5675    emit_int8(imm8);
5676  }
5677}
5678
5679void Assembler::rorxq(Register dst, Register src, int imm8) {
5680  assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
5681  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, true, false);
5682  emit_int8((unsigned char)0xF0);
5683  emit_int8((unsigned char)(0xC0 | encode));
5684  emit_int8(imm8);
5685}
5686
5687void Assembler::sarq(Register dst, int imm8) {
5688  assert(isShiftCount(imm8 >> 1), "illegal shift count");
5689  int encode = prefixq_and_encode(dst->encoding());
5690  if (imm8 == 1) {
5691    emit_int8((unsigned char)0xD1);
5692    emit_int8((unsigned char)(0xF8 | encode));
5693  } else {
5694    emit_int8((unsigned char)0xC1);
5695    emit_int8((unsigned char)(0xF8 | encode));
5696    emit_int8(imm8);
5697  }
5698}
5699
5700void Assembler::sarq(Register dst) {
5701  int encode = prefixq_and_encode(dst->encoding());
5702  emit_int8((unsigned char)0xD3);
5703  emit_int8((unsigned char)(0xF8 | encode));
5704}
5705
5706void Assembler::sbbq(Address dst, int32_t imm32) {
5707  InstructionMark im(this);
5708  prefixq(dst);
5709  emit_arith_operand(0x81, rbx, dst, imm32);
5710}
5711
5712void Assembler::sbbq(Register dst, int32_t imm32) {
5713  (void) prefixq_and_encode(dst->encoding());
5714  emit_arith(0x81, 0xD8, dst, imm32);
5715}
5716
5717void Assembler::sbbq(Register dst, Address src) {
5718  InstructionMark im(this);
5719  prefixq(src, dst);
5720  emit_int8(0x1B);
5721  emit_operand(dst, src);
5722}
5723
5724void Assembler::sbbq(Register dst, Register src) {
5725  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5726  emit_arith(0x1B, 0xC0, dst, src);
5727}
5728
5729void Assembler::shlq(Register dst, int imm8) {
5730  assert(isShiftCount(imm8 >> 1), "illegal shift count");
5731  int encode = prefixq_and_encode(dst->encoding());
5732  if (imm8 == 1) {
5733    emit_int8((unsigned char)0xD1);
5734    emit_int8((unsigned char)(0xE0 | encode));
5735  } else {
5736    emit_int8((unsigned char)0xC1);
5737    emit_int8((unsigned char)(0xE0 | encode));
5738    emit_int8(imm8);
5739  }
5740}
5741
5742void Assembler::shlq(Register dst) {
5743  int encode = prefixq_and_encode(dst->encoding());
5744  emit_int8((unsigned char)0xD3);
5745  emit_int8((unsigned char)(0xE0 | encode));
5746}
5747
5748void Assembler::shrq(Register dst, int imm8) {
5749  assert(isShiftCount(imm8 >> 1), "illegal shift count");
5750  int encode = prefixq_and_encode(dst->encoding());
5751  emit_int8((unsigned char)0xC1);
5752  emit_int8((unsigned char)(0xE8 | encode));
5753  emit_int8(imm8);
5754}
5755
5756void Assembler::shrq(Register dst) {
5757  int encode = prefixq_and_encode(dst->encoding());
5758  emit_int8((unsigned char)0xD3);
5759  emit_int8(0xE8 | encode);
5760}
5761
5762void Assembler::subq(Address dst, int32_t imm32) {
5763  InstructionMark im(this);
5764  prefixq(dst);
5765  emit_arith_operand(0x81, rbp, dst, imm32);
5766}
5767
5768void Assembler::subq(Address dst, Register src) {
5769  InstructionMark im(this);
5770  prefixq(dst, src);
5771  emit_int8(0x29);
5772  emit_operand(src, dst);
5773}
5774
5775void Assembler::subq(Register dst, int32_t imm32) {
5776  (void) prefixq_and_encode(dst->encoding());
5777  emit_arith(0x81, 0xE8, dst, imm32);
5778}
5779
5780// Force generation of a 4 byte immediate value even if it fits into 8bit
5781void Assembler::subq_imm32(Register dst, int32_t imm32) {
5782  (void) prefixq_and_encode(dst->encoding());
5783  emit_arith_imm32(0x81, 0xE8, dst, imm32);
5784}
5785
5786void Assembler::subq(Register dst, Address src) {
5787  InstructionMark im(this);
5788  prefixq(src, dst);
5789  emit_int8(0x2B);
5790  emit_operand(dst, src);
5791}
5792
5793void Assembler::subq(Register dst, Register src) {
5794  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5795  emit_arith(0x2B, 0xC0, dst, src);
5796}
5797
5798void Assembler::testq(Register dst, int32_t imm32) {
5799  // not using emit_arith because test
5800  // doesn't support sign-extension of
5801  // 8bit operands
5802  int encode = dst->encoding();
5803  if (encode == 0) {
5804    prefix(REX_W);
5805    emit_int8((unsigned char)0xA9);
5806  } else {
5807    encode = prefixq_and_encode(encode);
5808    emit_int8((unsigned char)0xF7);
5809    emit_int8((unsigned char)(0xC0 | encode));
5810  }
5811  emit_int32(imm32);
5812}
5813
5814void Assembler::testq(Register dst, Register src) {
5815  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5816  emit_arith(0x85, 0xC0, dst, src);
5817}
5818
5819void Assembler::xaddq(Address dst, Register src) {
5820  InstructionMark im(this);
5821  prefixq(dst, src);
5822  emit_int8(0x0F);
5823  emit_int8((unsigned char)0xC1);
5824  emit_operand(src, dst);
5825}
5826
5827void Assembler::xchgq(Register dst, Address src) {
5828  InstructionMark im(this);
5829  prefixq(src, dst);
5830  emit_int8((unsigned char)0x87);
5831  emit_operand(dst, src);
5832}
5833
5834void Assembler::xchgq(Register dst, Register src) {
5835  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5836  emit_int8((unsigned char)0x87);
5837  emit_int8((unsigned char)(0xc0 | encode));
5838}
5839
5840void Assembler::xorq(Register dst, Register src) {
5841  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5842  emit_arith(0x33, 0xC0, dst, src);
5843}
5844
5845void Assembler::xorq(Register dst, Address src) {
5846  InstructionMark im(this);
5847  prefixq(src, dst);
5848  emit_int8(0x33);
5849  emit_operand(dst, src);
5850}
5851
5852#endif // !LP64
5853