assembler_x86.cpp revision 3883:cd3d6a6b95d9
1/*
2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/assembler.hpp"
27#include "asm/assembler.inline.hpp"
28#include "gc_interface/collectedHeap.inline.hpp"
29#include "interpreter/interpreter.hpp"
30#include "memory/cardTableModRefBS.hpp"
31#include "memory/resourceArea.hpp"
32#include "prims/methodHandles.hpp"
33#include "runtime/biasedLocking.hpp"
34#include "runtime/interfaceSupport.hpp"
35#include "runtime/objectMonitor.hpp"
36#include "runtime/os.hpp"
37#include "runtime/sharedRuntime.hpp"
38#include "runtime/stubRoutines.hpp"
39#ifndef SERIALGC
40#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
41#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
42#include "gc_implementation/g1/heapRegion.hpp"
43#endif
44
45#ifdef PRODUCT
46#define BLOCK_COMMENT(str) /* nothing */
47#define STOP(error) stop(error)
48#else
49#define BLOCK_COMMENT(str) block_comment(str)
50#define STOP(error) block_comment(error); stop(error)
51#endif
52
53#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
54// Implementation of AddressLiteral
55
56AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
57  _is_lval = false;
58  _target = target;
59  switch (rtype) {
60  case relocInfo::oop_type:
61  case relocInfo::metadata_type:
62    // Oops are a special case. Normally they would be their own section
63    // but in cases like icBuffer they are literals in the code stream that
64    // we don't have a section for. We use none so that we get a literal address
65    // which is always patchable.
66    break;
67  case relocInfo::external_word_type:
68    _rspec = external_word_Relocation::spec(target);
69    break;
70  case relocInfo::internal_word_type:
71    _rspec = internal_word_Relocation::spec(target);
72    break;
73  case relocInfo::opt_virtual_call_type:
74    _rspec = opt_virtual_call_Relocation::spec();
75    break;
76  case relocInfo::static_call_type:
77    _rspec = static_call_Relocation::spec();
78    break;
79  case relocInfo::runtime_call_type:
80    _rspec = runtime_call_Relocation::spec();
81    break;
82  case relocInfo::poll_type:
83  case relocInfo::poll_return_type:
84    _rspec = Relocation::spec_simple(rtype);
85    break;
86  case relocInfo::none:
87    break;
88  default:
89    ShouldNotReachHere();
90    break;
91  }
92}
93
94// Implementation of Address
95
96#ifdef _LP64
97
98Address Address::make_array(ArrayAddress adr) {
99  // Not implementable on 64bit machines
100  // Should have been handled higher up the call chain.
101  ShouldNotReachHere();
102  return Address();
103}
104
105// exceedingly dangerous constructor
106Address::Address(int disp, address loc, relocInfo::relocType rtype) {
107  _base  = noreg;
108  _index = noreg;
109  _scale = no_scale;
110  _disp  = disp;
111  switch (rtype) {
112    case relocInfo::external_word_type:
113      _rspec = external_word_Relocation::spec(loc);
114      break;
115    case relocInfo::internal_word_type:
116      _rspec = internal_word_Relocation::spec(loc);
117      break;
118    case relocInfo::runtime_call_type:
119      // HMM
120      _rspec = runtime_call_Relocation::spec();
121      break;
122    case relocInfo::poll_type:
123    case relocInfo::poll_return_type:
124      _rspec = Relocation::spec_simple(rtype);
125      break;
126    case relocInfo::none:
127      break;
128    default:
129      ShouldNotReachHere();
130  }
131}
132#else // LP64
133
134Address Address::make_array(ArrayAddress adr) {
135  AddressLiteral base = adr.base();
136  Address index = adr.index();
137  assert(index._disp == 0, "must not have disp"); // maybe it can?
138  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
139  array._rspec = base._rspec;
140  return array;
141}
142
143// exceedingly dangerous constructor
144Address::Address(address loc, RelocationHolder spec) {
145  _base  = noreg;
146  _index = noreg;
147  _scale = no_scale;
148  _disp  = (intptr_t) loc;
149  _rspec = spec;
150}
151
152#endif // _LP64
153
154
155
156// Convert the raw encoding form into the form expected by the constructor for
157// Address.  An index of 4 (rsp) corresponds to having no index, so convert
158// that to noreg for the Address constructor.
159Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
160  RelocationHolder rspec;
161  if (disp_reloc != relocInfo::none) {
162    rspec = Relocation::spec_simple(disp_reloc);
163  }
164  bool valid_index = index != rsp->encoding();
165  if (valid_index) {
166    Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
167    madr._rspec = rspec;
168    return madr;
169  } else {
170    Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
171    madr._rspec = rspec;
172    return madr;
173  }
174}
175
176// Implementation of Assembler
177
178int AbstractAssembler::code_fill_byte() {
179  return (u_char)'\xF4'; // hlt
180}
181
182// make this go away someday
183void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
184  if (rtype == relocInfo::none)
185        emit_long(data);
186  else  emit_data(data, Relocation::spec_simple(rtype), format);
187}
188
189void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
190  assert(imm_operand == 0, "default format must be immediate in this file");
191  assert(inst_mark() != NULL, "must be inside InstructionMark");
192  if (rspec.type() !=  relocInfo::none) {
193    #ifdef ASSERT
194      check_relocation(rspec, format);
195    #endif
196    // Do not use AbstractAssembler::relocate, which is not intended for
197    // embedded words.  Instead, relocate to the enclosing instruction.
198
199    // hack. call32 is too wide for mask so use disp32
200    if (format == call32_operand)
201      code_section()->relocate(inst_mark(), rspec, disp32_operand);
202    else
203      code_section()->relocate(inst_mark(), rspec, format);
204  }
205  emit_long(data);
206}
207
208static int encode(Register r) {
209  int enc = r->encoding();
210  if (enc >= 8) {
211    enc -= 8;
212  }
213  return enc;
214}
215
216static int encode(XMMRegister r) {
217  int enc = r->encoding();
218  if (enc >= 8) {
219    enc -= 8;
220  }
221  return enc;
222}
223
224void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
225  assert(dst->has_byte_register(), "must have byte register");
226  assert(isByte(op1) && isByte(op2), "wrong opcode");
227  assert(isByte(imm8), "not a byte");
228  assert((op1 & 0x01) == 0, "should be 8bit operation");
229  emit_byte(op1);
230  emit_byte(op2 | encode(dst));
231  emit_byte(imm8);
232}
233
234
235void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
236  assert(isByte(op1) && isByte(op2), "wrong opcode");
237  assert((op1 & 0x01) == 1, "should be 32bit operation");
238  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
239  if (is8bit(imm32)) {
240    emit_byte(op1 | 0x02); // set sign bit
241    emit_byte(op2 | encode(dst));
242    emit_byte(imm32 & 0xFF);
243  } else {
244    emit_byte(op1);
245    emit_byte(op2 | encode(dst));
246    emit_long(imm32);
247  }
248}
249
250// Force generation of a 4 byte immediate value even if it fits into 8bit
251void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
252  assert(isByte(op1) && isByte(op2), "wrong opcode");
253  assert((op1 & 0x01) == 1, "should be 32bit operation");
254  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
255  emit_byte(op1);
256  emit_byte(op2 | encode(dst));
257  emit_long(imm32);
258}
259
260// immediate-to-memory forms
261void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
262  assert((op1 & 0x01) == 1, "should be 32bit operation");
263  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
264  if (is8bit(imm32)) {
265    emit_byte(op1 | 0x02); // set sign bit
266    emit_operand(rm, adr, 1);
267    emit_byte(imm32 & 0xFF);
268  } else {
269    emit_byte(op1);
270    emit_operand(rm, adr, 4);
271    emit_long(imm32);
272  }
273}
274
275
276void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
277  assert(isByte(op1) && isByte(op2), "wrong opcode");
278  emit_byte(op1);
279  emit_byte(op2 | encode(dst) << 3 | encode(src));
280}
281
282
283void Assembler::emit_operand(Register reg, Register base, Register index,
284                             Address::ScaleFactor scale, int disp,
285                             RelocationHolder const& rspec,
286                             int rip_relative_correction) {
287  relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
288
289  // Encode the registers as needed in the fields they are used in
290
291  int regenc = encode(reg) << 3;
292  int indexenc = index->is_valid() ? encode(index) << 3 : 0;
293  int baseenc = base->is_valid() ? encode(base) : 0;
294
295  if (base->is_valid()) {
296    if (index->is_valid()) {
297      assert(scale != Address::no_scale, "inconsistent address");
298      // [base + index*scale + disp]
299      if (disp == 0 && rtype == relocInfo::none  &&
300          base != rbp LP64_ONLY(&& base != r13)) {
301        // [base + index*scale]
302        // [00 reg 100][ss index base]
303        assert(index != rsp, "illegal addressing mode");
304        emit_byte(0x04 | regenc);
305        emit_byte(scale << 6 | indexenc | baseenc);
306      } else if (is8bit(disp) && rtype == relocInfo::none) {
307        // [base + index*scale + imm8]
308        // [01 reg 100][ss index base] imm8
309        assert(index != rsp, "illegal addressing mode");
310        emit_byte(0x44 | regenc);
311        emit_byte(scale << 6 | indexenc | baseenc);
312        emit_byte(disp & 0xFF);
313      } else {
314        // [base + index*scale + disp32]
315        // [10 reg 100][ss index base] disp32
316        assert(index != rsp, "illegal addressing mode");
317        emit_byte(0x84 | regenc);
318        emit_byte(scale << 6 | indexenc | baseenc);
319        emit_data(disp, rspec, disp32_operand);
320      }
321    } else if (base == rsp LP64_ONLY(|| base == r12)) {
322      // [rsp + disp]
323      if (disp == 0 && rtype == relocInfo::none) {
324        // [rsp]
325        // [00 reg 100][00 100 100]
326        emit_byte(0x04 | regenc);
327        emit_byte(0x24);
328      } else if (is8bit(disp) && rtype == relocInfo::none) {
329        // [rsp + imm8]
330        // [01 reg 100][00 100 100] disp8
331        emit_byte(0x44 | regenc);
332        emit_byte(0x24);
333        emit_byte(disp & 0xFF);
334      } else {
335        // [rsp + imm32]
336        // [10 reg 100][00 100 100] disp32
337        emit_byte(0x84 | regenc);
338        emit_byte(0x24);
339        emit_data(disp, rspec, disp32_operand);
340      }
341    } else {
342      // [base + disp]
343      assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
344      if (disp == 0 && rtype == relocInfo::none &&
345          base != rbp LP64_ONLY(&& base != r13)) {
346        // [base]
347        // [00 reg base]
348        emit_byte(0x00 | regenc | baseenc);
349      } else if (is8bit(disp) && rtype == relocInfo::none) {
350        // [base + disp8]
351        // [01 reg base] disp8
352        emit_byte(0x40 | regenc | baseenc);
353        emit_byte(disp & 0xFF);
354      } else {
355        // [base + disp32]
356        // [10 reg base] disp32
357        emit_byte(0x80 | regenc | baseenc);
358        emit_data(disp, rspec, disp32_operand);
359      }
360    }
361  } else {
362    if (index->is_valid()) {
363      assert(scale != Address::no_scale, "inconsistent address");
364      // [index*scale + disp]
365      // [00 reg 100][ss index 101] disp32
366      assert(index != rsp, "illegal addressing mode");
367      emit_byte(0x04 | regenc);
368      emit_byte(scale << 6 | indexenc | 0x05);
369      emit_data(disp, rspec, disp32_operand);
370    } else if (rtype != relocInfo::none ) {
371      // [disp] (64bit) RIP-RELATIVE (32bit) abs
372      // [00 000 101] disp32
373
374      emit_byte(0x05 | regenc);
375      // Note that the RIP-rel. correction applies to the generated
376      // disp field, but _not_ to the target address in the rspec.
377
378      // disp was created by converting the target address minus the pc
379      // at the start of the instruction. That needs more correction here.
380      // intptr_t disp = target - next_ip;
381      assert(inst_mark() != NULL, "must be inside InstructionMark");
382      address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
383      int64_t adjusted = disp;
384      // Do rip-rel adjustment for 64bit
385      LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
386      assert(is_simm32(adjusted),
387             "must be 32bit offset (RIP relative address)");
388      emit_data((int32_t) adjusted, rspec, disp32_operand);
389
390    } else {
391      // 32bit never did this, did everything as the rip-rel/disp code above
392      // [disp] ABSOLUTE
393      // [00 reg 100][00 100 101] disp32
394      emit_byte(0x04 | regenc);
395      emit_byte(0x25);
396      emit_data(disp, rspec, disp32_operand);
397    }
398  }
399}
400
401void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
402                             Address::ScaleFactor scale, int disp,
403                             RelocationHolder const& rspec) {
404  emit_operand((Register)reg, base, index, scale, disp, rspec);
405}
406
407// Secret local extension to Assembler::WhichOperand:
408#define end_pc_operand (_WhichOperand_limit)
409
410address Assembler::locate_operand(address inst, WhichOperand which) {
411  // Decode the given instruction, and return the address of
412  // an embedded 32-bit operand word.
413
414  // If "which" is disp32_operand, selects the displacement portion
415  // of an effective address specifier.
416  // If "which" is imm64_operand, selects the trailing immediate constant.
417  // If "which" is call32_operand, selects the displacement of a call or jump.
418  // Caller is responsible for ensuring that there is such an operand,
419  // and that it is 32/64 bits wide.
420
421  // If "which" is end_pc_operand, find the end of the instruction.
422
423  address ip = inst;
424  bool is_64bit = false;
425
426  debug_only(bool has_disp32 = false);
427  int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
428
429  again_after_prefix:
430  switch (0xFF & *ip++) {
431
432  // These convenience macros generate groups of "case" labels for the switch.
433#define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
434#define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
435             case (x)+4: case (x)+5: case (x)+6: case (x)+7
436#define REP16(x) REP8((x)+0): \
437              case REP8((x)+8)
438
439  case CS_segment:
440  case SS_segment:
441  case DS_segment:
442  case ES_segment:
443  case FS_segment:
444  case GS_segment:
445    // Seems dubious
446    LP64_ONLY(assert(false, "shouldn't have that prefix"));
447    assert(ip == inst+1, "only one prefix allowed");
448    goto again_after_prefix;
449
450  case 0x67:
451  case REX:
452  case REX_B:
453  case REX_X:
454  case REX_XB:
455  case REX_R:
456  case REX_RB:
457  case REX_RX:
458  case REX_RXB:
459    NOT_LP64(assert(false, "64bit prefixes"));
460    goto again_after_prefix;
461
462  case REX_W:
463  case REX_WB:
464  case REX_WX:
465  case REX_WXB:
466  case REX_WR:
467  case REX_WRB:
468  case REX_WRX:
469  case REX_WRXB:
470    NOT_LP64(assert(false, "64bit prefixes"));
471    is_64bit = true;
472    goto again_after_prefix;
473
474  case 0xFF: // pushq a; decl a; incl a; call a; jmp a
475  case 0x88: // movb a, r
476  case 0x89: // movl a, r
477  case 0x8A: // movb r, a
478  case 0x8B: // movl r, a
479  case 0x8F: // popl a
480    debug_only(has_disp32 = true);
481    break;
482
483  case 0x68: // pushq #32
484    if (which == end_pc_operand) {
485      return ip + 4;
486    }
487    assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
488    return ip;                  // not produced by emit_operand
489
490  case 0x66: // movw ... (size prefix)
491    again_after_size_prefix2:
492    switch (0xFF & *ip++) {
493    case REX:
494    case REX_B:
495    case REX_X:
496    case REX_XB:
497    case REX_R:
498    case REX_RB:
499    case REX_RX:
500    case REX_RXB:
501    case REX_W:
502    case REX_WB:
503    case REX_WX:
504    case REX_WXB:
505    case REX_WR:
506    case REX_WRB:
507    case REX_WRX:
508    case REX_WRXB:
509      NOT_LP64(assert(false, "64bit prefix found"));
510      goto again_after_size_prefix2;
511    case 0x8B: // movw r, a
512    case 0x89: // movw a, r
513      debug_only(has_disp32 = true);
514      break;
515    case 0xC7: // movw a, #16
516      debug_only(has_disp32 = true);
517      tail_size = 2;  // the imm16
518      break;
519    case 0x0F: // several SSE/SSE2 variants
520      ip--;    // reparse the 0x0F
521      goto again_after_prefix;
522    default:
523      ShouldNotReachHere();
524    }
525    break;
526
527  case REP8(0xB8): // movl/q r, #32/#64(oop?)
528    if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
529    // these asserts are somewhat nonsensical
530#ifndef _LP64
531    assert(which == imm_operand || which == disp32_operand,
532           err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip));
533#else
534    assert((which == call32_operand || which == imm_operand) && is_64bit ||
535           which == narrow_oop_operand && !is_64bit,
536           err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip));
537#endif // _LP64
538    return ip;
539
540  case 0x69: // imul r, a, #32
541  case 0xC7: // movl a, #32(oop?)
542    tail_size = 4;
543    debug_only(has_disp32 = true); // has both kinds of operands!
544    break;
545
546  case 0x0F: // movx..., etc.
547    switch (0xFF & *ip++) {
548    case 0x3A: // pcmpestri
549      tail_size = 1;
550    case 0x38: // ptest, pmovzxbw
551      ip++; // skip opcode
552      debug_only(has_disp32 = true); // has both kinds of operands!
553      break;
554
555    case 0x70: // pshufd r, r/a, #8
556      debug_only(has_disp32 = true); // has both kinds of operands!
557    case 0x73: // psrldq r, #8
558      tail_size = 1;
559      break;
560
561    case 0x12: // movlps
562    case 0x28: // movaps
563    case 0x2E: // ucomiss
564    case 0x2F: // comiss
565    case 0x54: // andps
566    case 0x55: // andnps
567    case 0x56: // orps
568    case 0x57: // xorps
569    case 0x6E: // movd
570    case 0x7E: // movd
571    case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
572      debug_only(has_disp32 = true);
573      break;
574
575    case 0xAD: // shrd r, a, %cl
576    case 0xAF: // imul r, a
577    case 0xBE: // movsbl r, a (movsxb)
578    case 0xBF: // movswl r, a (movsxw)
579    case 0xB6: // movzbl r, a (movzxb)
580    case 0xB7: // movzwl r, a (movzxw)
581    case REP16(0x40): // cmovl cc, r, a
582    case 0xB0: // cmpxchgb
583    case 0xB1: // cmpxchg
584    case 0xC1: // xaddl
585    case 0xC7: // cmpxchg8
586    case REP16(0x90): // setcc a
587      debug_only(has_disp32 = true);
588      // fall out of the switch to decode the address
589      break;
590
591    case 0xC4: // pinsrw r, a, #8
592      debug_only(has_disp32 = true);
593    case 0xC5: // pextrw r, r, #8
594      tail_size = 1;  // the imm8
595      break;
596
597    case 0xAC: // shrd r, a, #8
598      debug_only(has_disp32 = true);
599      tail_size = 1;  // the imm8
600      break;
601
602    case REP16(0x80): // jcc rdisp32
603      if (which == end_pc_operand)  return ip + 4;
604      assert(which == call32_operand, "jcc has no disp32 or imm");
605      return ip;
606    default:
607      ShouldNotReachHere();
608    }
609    break;
610
611  case 0x81: // addl a, #32; addl r, #32
612    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
613    // on 32bit in the case of cmpl, the imm might be an oop
614    tail_size = 4;
615    debug_only(has_disp32 = true); // has both kinds of operands!
616    break;
617
618  case 0x83: // addl a, #8; addl r, #8
619    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
620    debug_only(has_disp32 = true); // has both kinds of operands!
621    tail_size = 1;
622    break;
623
624  case 0x9B:
625    switch (0xFF & *ip++) {
626    case 0xD9: // fnstcw a
627      debug_only(has_disp32 = true);
628      break;
629    default:
630      ShouldNotReachHere();
631    }
632    break;
633
634  case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
635  case REP4(0x10): // adc...
636  case REP4(0x20): // and...
637  case REP4(0x30): // xor...
638  case REP4(0x08): // or...
639  case REP4(0x18): // sbb...
640  case REP4(0x28): // sub...
641  case 0xF7: // mull a
642  case 0x8D: // lea r, a
643  case 0x87: // xchg r, a
644  case REP4(0x38): // cmp...
645  case 0x85: // test r, a
646    debug_only(has_disp32 = true); // has both kinds of operands!
647    break;
648
649  case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
650  case 0xC6: // movb a, #8
651  case 0x80: // cmpb a, #8
652  case 0x6B: // imul r, a, #8
653    debug_only(has_disp32 = true); // has both kinds of operands!
654    tail_size = 1; // the imm8
655    break;
656
657  case 0xC4: // VEX_3bytes
658  case 0xC5: // VEX_2bytes
659    assert((UseAVX > 0), "shouldn't have VEX prefix");
660    assert(ip == inst+1, "no prefixes allowed");
661    // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
662    // but they have prefix 0x0F and processed when 0x0F processed above.
663    //
664    // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
665    // instructions (these instructions are not supported in 64-bit mode).
666    // To distinguish them bits [7:6] are set in the VEX second byte since
667    // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
668    // those VEX bits REX and vvvv bits are inverted.
669    //
670    // Fortunately C2 doesn't generate these instructions so we don't need
671    // to check for them in product version.
672
673    // Check second byte
674    NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
675
676    // First byte
677    if ((0xFF & *inst) == VEX_3bytes) {
678      ip++; // third byte
679      is_64bit = ((VEX_W & *ip) == VEX_W);
680    }
681    ip++; // opcode
682    // To find the end of instruction (which == end_pc_operand).
683    switch (0xFF & *ip) {
684    case 0x61: // pcmpestri r, r/a, #8
685    case 0x70: // pshufd r, r/a, #8
686    case 0x73: // psrldq r, #8
687      tail_size = 1;  // the imm8
688      break;
689    default:
690      break;
691    }
692    ip++; // skip opcode
693    debug_only(has_disp32 = true); // has both kinds of operands!
694    break;
695
696  case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
697  case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
698  case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
699  case 0xDD: // fld_d a; fst_d a; fstp_d a
700  case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
701  case 0xDF: // fild_d a; fistp_d a
702  case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
703  case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
704  case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
705    debug_only(has_disp32 = true);
706    break;
707
708  case 0xE8: // call rdisp32
709  case 0xE9: // jmp  rdisp32
710    if (which == end_pc_operand)  return ip + 4;
711    assert(which == call32_operand, "call has no disp32 or imm");
712    return ip;
713
714  case 0xF0:                    // Lock
715    assert(os::is_MP(), "only on MP");
716    goto again_after_prefix;
717
718  case 0xF3:                    // For SSE
719  case 0xF2:                    // For SSE2
720    switch (0xFF & *ip++) {
721    case REX:
722    case REX_B:
723    case REX_X:
724    case REX_XB:
725    case REX_R:
726    case REX_RB:
727    case REX_RX:
728    case REX_RXB:
729    case REX_W:
730    case REX_WB:
731    case REX_WX:
732    case REX_WXB:
733    case REX_WR:
734    case REX_WRB:
735    case REX_WRX:
736    case REX_WRXB:
737      NOT_LP64(assert(false, "found 64bit prefix"));
738      ip++;
739    default:
740      ip++;
741    }
742    debug_only(has_disp32 = true); // has both kinds of operands!
743    break;
744
745  default:
746    ShouldNotReachHere();
747
748#undef REP8
749#undef REP16
750  }
751
752  assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
753#ifdef _LP64
754  assert(which != imm_operand, "instruction is not a movq reg, imm64");
755#else
756  // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
757  assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
758#endif // LP64
759  assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
760
761  // parse the output of emit_operand
762  int op2 = 0xFF & *ip++;
763  int base = op2 & 0x07;
764  int op3 = -1;
765  const int b100 = 4;
766  const int b101 = 5;
767  if (base == b100 && (op2 >> 6) != 3) {
768    op3 = 0xFF & *ip++;
769    base = op3 & 0x07;   // refetch the base
770  }
771  // now ip points at the disp (if any)
772
773  switch (op2 >> 6) {
774  case 0:
775    // [00 reg  100][ss index base]
776    // [00 reg  100][00   100  esp]
777    // [00 reg base]
778    // [00 reg  100][ss index  101][disp32]
779    // [00 reg  101]               [disp32]
780
781    if (base == b101) {
782      if (which == disp32_operand)
783        return ip;              // caller wants the disp32
784      ip += 4;                  // skip the disp32
785    }
786    break;
787
788  case 1:
789    // [01 reg  100][ss index base][disp8]
790    // [01 reg  100][00   100  esp][disp8]
791    // [01 reg base]               [disp8]
792    ip += 1;                    // skip the disp8
793    break;
794
795  case 2:
796    // [10 reg  100][ss index base][disp32]
797    // [10 reg  100][00   100  esp][disp32]
798    // [10 reg base]               [disp32]
799    if (which == disp32_operand)
800      return ip;                // caller wants the disp32
801    ip += 4;                    // skip the disp32
802    break;
803
804  case 3:
805    // [11 reg base]  (not a memory addressing mode)
806    break;
807  }
808
809  if (which == end_pc_operand) {
810    return ip + tail_size;
811  }
812
813#ifdef _LP64
814  assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
815#else
816  assert(which == imm_operand, "instruction has only an imm field");
817#endif // LP64
818  return ip;
819}
820
821address Assembler::locate_next_instruction(address inst) {
822  // Secretly share code with locate_operand:
823  return locate_operand(inst, end_pc_operand);
824}
825
826
827#ifdef ASSERT
828void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
829  address inst = inst_mark();
830  assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
831  address opnd;
832
833  Relocation* r = rspec.reloc();
834  if (r->type() == relocInfo::none) {
835    return;
836  } else if (r->is_call() || format == call32_operand) {
837    // assert(format == imm32_operand, "cannot specify a nonzero format");
838    opnd = locate_operand(inst, call32_operand);
839  } else if (r->is_data()) {
840    assert(format == imm_operand || format == disp32_operand
841           LP64_ONLY(|| format == narrow_oop_operand), "format ok");
842    opnd = locate_operand(inst, (WhichOperand)format);
843  } else {
844    assert(format == imm_operand, "cannot specify a format");
845    return;
846  }
847  assert(opnd == pc(), "must put operand where relocs can find it");
848}
849#endif // ASSERT
850
851void Assembler::emit_operand32(Register reg, Address adr) {
852  assert(reg->encoding() < 8, "no extended registers");
853  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
854  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
855               adr._rspec);
856}
857
858void Assembler::emit_operand(Register reg, Address adr,
859                             int rip_relative_correction) {
860  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
861               adr._rspec,
862               rip_relative_correction);
863}
864
865void Assembler::emit_operand(XMMRegister reg, Address adr) {
866  emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
867               adr._rspec);
868}
869
870// MMX operations
871void Assembler::emit_operand(MMXRegister reg, Address adr) {
872  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
873  emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
874}
875
876// work around gcc (3.2.1-7a) bug
877void Assembler::emit_operand(Address adr, MMXRegister reg) {
878  assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
879  emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
880}
881
882
883void Assembler::emit_farith(int b1, int b2, int i) {
884  assert(isByte(b1) && isByte(b2), "wrong opcode");
885  assert(0 <= i &&  i < 8, "illegal stack offset");
886  emit_byte(b1);
887  emit_byte(b2 + i);
888}
889
890
891// Now the Assembler instructions (identical for 32/64 bits)
892
893void Assembler::adcl(Address dst, int32_t imm32) {
894  InstructionMark im(this);
895  prefix(dst);
896  emit_arith_operand(0x81, rdx, dst, imm32);
897}
898
899void Assembler::adcl(Address dst, Register src) {
900  InstructionMark im(this);
901  prefix(dst, src);
902  emit_byte(0x11);
903  emit_operand(src, dst);
904}
905
906void Assembler::adcl(Register dst, int32_t imm32) {
907  prefix(dst);
908  emit_arith(0x81, 0xD0, dst, imm32);
909}
910
911void Assembler::adcl(Register dst, Address src) {
912  InstructionMark im(this);
913  prefix(src, dst);
914  emit_byte(0x13);
915  emit_operand(dst, src);
916}
917
918void Assembler::adcl(Register dst, Register src) {
919  (void) prefix_and_encode(dst->encoding(), src->encoding());
920  emit_arith(0x13, 0xC0, dst, src);
921}
922
923void Assembler::addl(Address dst, int32_t imm32) {
924  InstructionMark im(this);
925  prefix(dst);
926  emit_arith_operand(0x81, rax, dst, imm32);
927}
928
929void Assembler::addl(Address dst, Register src) {
930  InstructionMark im(this);
931  prefix(dst, src);
932  emit_byte(0x01);
933  emit_operand(src, dst);
934}
935
936void Assembler::addl(Register dst, int32_t imm32) {
937  prefix(dst);
938  emit_arith(0x81, 0xC0, dst, imm32);
939}
940
941void Assembler::addl(Register dst, Address src) {
942  InstructionMark im(this);
943  prefix(src, dst);
944  emit_byte(0x03);
945  emit_operand(dst, src);
946}
947
948void Assembler::addl(Register dst, Register src) {
949  (void) prefix_and_encode(dst->encoding(), src->encoding());
950  emit_arith(0x03, 0xC0, dst, src);
951}
952
953void Assembler::addr_nop_4() {
954  assert(UseAddressNop, "no CPU support");
955  // 4 bytes: NOP DWORD PTR [EAX+0]
956  emit_byte(0x0F);
957  emit_byte(0x1F);
958  emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
959  emit_byte(0);    // 8-bits offset (1 byte)
960}
961
962void Assembler::addr_nop_5() {
963  assert(UseAddressNop, "no CPU support");
964  // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
965  emit_byte(0x0F);
966  emit_byte(0x1F);
967  emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
968  emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
969  emit_byte(0);    // 8-bits offset (1 byte)
970}
971
972void Assembler::addr_nop_7() {
973  assert(UseAddressNop, "no CPU support");
974  // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
975  emit_byte(0x0F);
976  emit_byte(0x1F);
977  emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
978  emit_long(0);    // 32-bits offset (4 bytes)
979}
980
981void Assembler::addr_nop_8() {
982  assert(UseAddressNop, "no CPU support");
983  // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
984  emit_byte(0x0F);
985  emit_byte(0x1F);
986  emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
987  emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
988  emit_long(0);    // 32-bits offset (4 bytes)
989}
990
991void Assembler::addsd(XMMRegister dst, XMMRegister src) {
992  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
993  emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
994}
995
996void Assembler::addsd(XMMRegister dst, Address src) {
997  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
998  emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
999}
1000
1001void Assembler::addss(XMMRegister dst, XMMRegister src) {
1002  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1003  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1004}
1005
1006void Assembler::addss(XMMRegister dst, Address src) {
1007  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1008  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1009}
1010
1011void Assembler::aesdec(XMMRegister dst, Address src) {
1012  assert(VM_Version::supports_aes(), "");
1013  InstructionMark im(this);
1014  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1015  emit_byte(0xde);
1016  emit_operand(dst, src);
1017}
1018
1019void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1020  assert(VM_Version::supports_aes(), "");
1021  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1022  emit_byte(0xde);
1023  emit_byte(0xC0 | encode);
1024}
1025
1026void Assembler::aesdeclast(XMMRegister dst, Address src) {
1027  assert(VM_Version::supports_aes(), "");
1028  InstructionMark im(this);
1029  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1030  emit_byte(0xdf);
1031  emit_operand(dst, src);
1032}
1033
1034void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1035  assert(VM_Version::supports_aes(), "");
1036  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1037  emit_byte(0xdf);
1038  emit_byte(0xC0 | encode);
1039}
1040
1041void Assembler::aesenc(XMMRegister dst, Address src) {
1042  assert(VM_Version::supports_aes(), "");
1043  InstructionMark im(this);
1044  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1045  emit_byte(0xdc);
1046  emit_operand(dst, src);
1047}
1048
1049void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1050  assert(VM_Version::supports_aes(), "");
1051  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1052  emit_byte(0xdc);
1053  emit_byte(0xC0 | encode);
1054}
1055
1056void Assembler::aesenclast(XMMRegister dst, Address src) {
1057  assert(VM_Version::supports_aes(), "");
1058  InstructionMark im(this);
1059  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1060  emit_byte(0xdd);
1061  emit_operand(dst, src);
1062}
1063
1064void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1065  assert(VM_Version::supports_aes(), "");
1066  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1067  emit_byte(0xdd);
1068  emit_byte(0xC0 | encode);
1069}
1070
1071
1072void Assembler::andl(Address dst, int32_t imm32) {
1073  InstructionMark im(this);
1074  prefix(dst);
1075  emit_byte(0x81);
1076  emit_operand(rsp, dst, 4);
1077  emit_long(imm32);
1078}
1079
1080void Assembler::andl(Register dst, int32_t imm32) {
1081  prefix(dst);
1082  emit_arith(0x81, 0xE0, dst, imm32);
1083}
1084
1085void Assembler::andl(Register dst, Address src) {
1086  InstructionMark im(this);
1087  prefix(src, dst);
1088  emit_byte(0x23);
1089  emit_operand(dst, src);
1090}
1091
1092void Assembler::andl(Register dst, Register src) {
1093  (void) prefix_and_encode(dst->encoding(), src->encoding());
1094  emit_arith(0x23, 0xC0, dst, src);
1095}
1096
1097void Assembler::bsfl(Register dst, Register src) {
1098  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1099  emit_byte(0x0F);
1100  emit_byte(0xBC);
1101  emit_byte(0xC0 | encode);
1102}
1103
1104void Assembler::bsrl(Register dst, Register src) {
1105  assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
1106  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1107  emit_byte(0x0F);
1108  emit_byte(0xBD);
1109  emit_byte(0xC0 | encode);
1110}
1111
1112void Assembler::bswapl(Register reg) { // bswap
1113  int encode = prefix_and_encode(reg->encoding());
1114  emit_byte(0x0F);
1115  emit_byte(0xC8 | encode);
1116}
1117
1118void Assembler::call(Label& L, relocInfo::relocType rtype) {
1119  // suspect disp32 is always good
1120  int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1121
1122  if (L.is_bound()) {
1123    const int long_size = 5;
1124    int offs = (int)( target(L) - pc() );
1125    assert(offs <= 0, "assembler error");
1126    InstructionMark im(this);
1127    // 1110 1000 #32-bit disp
1128    emit_byte(0xE8);
1129    emit_data(offs - long_size, rtype, operand);
1130  } else {
1131    InstructionMark im(this);
1132    // 1110 1000 #32-bit disp
1133    L.add_patch_at(code(), locator());
1134
1135    emit_byte(0xE8);
1136    emit_data(int(0), rtype, operand);
1137  }
1138}
1139
1140void Assembler::call(Register dst) {
1141  int encode = prefix_and_encode(dst->encoding());
1142  emit_byte(0xFF);
1143  emit_byte(0xD0 | encode);
1144}
1145
1146
1147void Assembler::call(Address adr) {
1148  InstructionMark im(this);
1149  prefix(adr);
1150  emit_byte(0xFF);
1151  emit_operand(rdx, adr);
1152}
1153
1154void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1155  assert(entry != NULL, "call most probably wrong");
1156  InstructionMark im(this);
1157  emit_byte(0xE8);
1158  intptr_t disp = entry - (pc() + sizeof(int32_t));
1159  assert(is_simm32(disp), "must be 32bit offset (call2)");
1160  // Technically, should use call32_operand, but this format is
1161  // implied by the fact that we're emitting a call instruction.
1162
1163  int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1164  emit_data((int) disp, rspec, operand);
1165}
1166
1167void Assembler::cdql() {
1168  emit_byte(0x99);
1169}
1170
1171void Assembler::cld() {
1172  emit_byte(0xfc);
1173}
1174
1175void Assembler::cmovl(Condition cc, Register dst, Register src) {
1176  NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1177  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1178  emit_byte(0x0F);
1179  emit_byte(0x40 | cc);
1180  emit_byte(0xC0 | encode);
1181}
1182
1183
1184void Assembler::cmovl(Condition cc, Register dst, Address src) {
1185  NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1186  prefix(src, dst);
1187  emit_byte(0x0F);
1188  emit_byte(0x40 | cc);
1189  emit_operand(dst, src);
1190}
1191
1192void Assembler::cmpb(Address dst, int imm8) {
1193  InstructionMark im(this);
1194  prefix(dst);
1195  emit_byte(0x80);
1196  emit_operand(rdi, dst, 1);
1197  emit_byte(imm8);
1198}
1199
1200void Assembler::cmpl(Address dst, int32_t imm32) {
1201  InstructionMark im(this);
1202  prefix(dst);
1203  emit_byte(0x81);
1204  emit_operand(rdi, dst, 4);
1205  emit_long(imm32);
1206}
1207
1208void Assembler::cmpl(Register dst, int32_t imm32) {
1209  prefix(dst);
1210  emit_arith(0x81, 0xF8, dst, imm32);
1211}
1212
1213void Assembler::cmpl(Register dst, Register src) {
1214  (void) prefix_and_encode(dst->encoding(), src->encoding());
1215  emit_arith(0x3B, 0xC0, dst, src);
1216}
1217
1218
1219void Assembler::cmpl(Register dst, Address  src) {
1220  InstructionMark im(this);
1221  prefix(src, dst);
1222  emit_byte(0x3B);
1223  emit_operand(dst, src);
1224}
1225
1226void Assembler::cmpw(Address dst, int imm16) {
1227  InstructionMark im(this);
1228  assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1229  emit_byte(0x66);
1230  emit_byte(0x81);
1231  emit_operand(rdi, dst, 2);
1232  emit_word(imm16);
1233}
1234
1235// The 32-bit cmpxchg compares the value at adr with the contents of rax,
1236// and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1237// The ZF is set if the compared values were equal, and cleared otherwise.
1238void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1239  InstructionMark im(this);
1240  prefix(adr, reg);
1241  emit_byte(0x0F);
1242  emit_byte(0xB1);
1243  emit_operand(reg, adr);
1244}
1245
1246void Assembler::comisd(XMMRegister dst, Address src) {
1247  // NOTE: dbx seems to decode this as comiss even though the
1248  // 0x66 is there. Strangly ucomisd comes out correct
1249  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1250  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1251}
1252
1253void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1254  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1255  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1256}
1257
1258void Assembler::comiss(XMMRegister dst, Address src) {
1259  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1260  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
1261}
1262
1263void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1264  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1265  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
1266}
1267
1268void Assembler::cpuid() {
1269  emit_byte(0x0F);
1270  emit_byte(0xA2);
1271}
1272
1273void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1274  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1275  emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
1276}
1277
1278void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1279  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1280  emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE);
1281}
1282
1283void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1284  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1285  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1286}
1287
1288void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1289  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1290  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1291}
1292
1293void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1294  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1295  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
1296  emit_byte(0x2A);
1297  emit_byte(0xC0 | encode);
1298}
1299
1300void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1301  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1302  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
1303}
1304
1305void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1306  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1307  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
1308  emit_byte(0x2A);
1309  emit_byte(0xC0 | encode);
1310}
1311
1312void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1313  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1314  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3);
1315}
1316
1317void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1318  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1319  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1320}
1321
1322void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1323  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1324  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1325}
1326
1327
1328void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1329  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1330  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
1331  emit_byte(0x2C);
1332  emit_byte(0xC0 | encode);
1333}
1334
1335void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1336  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1337  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
1338  emit_byte(0x2C);
1339  emit_byte(0xC0 | encode);
1340}
1341
1342void Assembler::decl(Address dst) {
1343  // Don't use it directly. Use MacroAssembler::decrement() instead.
1344  InstructionMark im(this);
1345  prefix(dst);
1346  emit_byte(0xFF);
1347  emit_operand(rcx, dst);
1348}
1349
1350void Assembler::divsd(XMMRegister dst, Address src) {
1351  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1352  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1353}
1354
1355void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1356  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1357  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1358}
1359
1360void Assembler::divss(XMMRegister dst, Address src) {
1361  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1362  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1363}
1364
1365void Assembler::divss(XMMRegister dst, XMMRegister src) {
1366  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1367  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1368}
1369
1370void Assembler::emms() {
1371  NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1372  emit_byte(0x0F);
1373  emit_byte(0x77);
1374}
1375
1376void Assembler::hlt() {
1377  emit_byte(0xF4);
1378}
1379
1380void Assembler::idivl(Register src) {
1381  int encode = prefix_and_encode(src->encoding());
1382  emit_byte(0xF7);
1383  emit_byte(0xF8 | encode);
1384}
1385
1386void Assembler::divl(Register src) { // Unsigned
1387  int encode = prefix_and_encode(src->encoding());
1388  emit_byte(0xF7);
1389  emit_byte(0xF0 | encode);
1390}
1391
1392void Assembler::imull(Register dst, Register src) {
1393  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1394  emit_byte(0x0F);
1395  emit_byte(0xAF);
1396  emit_byte(0xC0 | encode);
1397}
1398
1399
1400void Assembler::imull(Register dst, Register src, int value) {
1401  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1402  if (is8bit(value)) {
1403    emit_byte(0x6B);
1404    emit_byte(0xC0 | encode);
1405    emit_byte(value & 0xFF);
1406  } else {
1407    emit_byte(0x69);
1408    emit_byte(0xC0 | encode);
1409    emit_long(value);
1410  }
1411}
1412
1413void Assembler::incl(Address dst) {
1414  // Don't use it directly. Use MacroAssembler::increment() instead.
1415  InstructionMark im(this);
1416  prefix(dst);
1417  emit_byte(0xFF);
1418  emit_operand(rax, dst);
1419}
1420
1421void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1422  InstructionMark im(this);
1423  assert((0 <= cc) && (cc < 16), "illegal cc");
1424  if (L.is_bound()) {
1425    address dst = target(L);
1426    assert(dst != NULL, "jcc most probably wrong");
1427
1428    const int short_size = 2;
1429    const int long_size = 6;
1430    intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1431    if (maybe_short && is8bit(offs - short_size)) {
1432      // 0111 tttn #8-bit disp
1433      emit_byte(0x70 | cc);
1434      emit_byte((offs - short_size) & 0xFF);
1435    } else {
1436      // 0000 1111 1000 tttn #32-bit disp
1437      assert(is_simm32(offs - long_size),
1438             "must be 32bit offset (call4)");
1439      emit_byte(0x0F);
1440      emit_byte(0x80 | cc);
1441      emit_long(offs - long_size);
1442    }
1443  } else {
1444    // Note: could eliminate cond. jumps to this jump if condition
1445    //       is the same however, seems to be rather unlikely case.
1446    // Note: use jccb() if label to be bound is very close to get
1447    //       an 8-bit displacement
1448    L.add_patch_at(code(), locator());
1449    emit_byte(0x0F);
1450    emit_byte(0x80 | cc);
1451    emit_long(0);
1452  }
1453}
1454
1455void Assembler::jccb(Condition cc, Label& L) {
1456  if (L.is_bound()) {
1457    const int short_size = 2;
1458    address entry = target(L);
1459#ifdef ASSERT
1460    intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
1461    intptr_t delta = short_branch_delta();
1462    if (delta != 0) {
1463      dist += (dist < 0 ? (-delta) :delta);
1464    }
1465    assert(is8bit(dist), "Dispacement too large for a short jmp");
1466#endif
1467    intptr_t offs = (intptr_t)entry - (intptr_t)pc();
1468    // 0111 tttn #8-bit disp
1469    emit_byte(0x70 | cc);
1470    emit_byte((offs - short_size) & 0xFF);
1471  } else {
1472    InstructionMark im(this);
1473    L.add_patch_at(code(), locator());
1474    emit_byte(0x70 | cc);
1475    emit_byte(0);
1476  }
1477}
1478
1479void Assembler::jmp(Address adr) {
1480  InstructionMark im(this);
1481  prefix(adr);
1482  emit_byte(0xFF);
1483  emit_operand(rsp, adr);
1484}
1485
1486void Assembler::jmp(Label& L, bool maybe_short) {
1487  if (L.is_bound()) {
1488    address entry = target(L);
1489    assert(entry != NULL, "jmp most probably wrong");
1490    InstructionMark im(this);
1491    const int short_size = 2;
1492    const int long_size = 5;
1493    intptr_t offs = entry - pc();
1494    if (maybe_short && is8bit(offs - short_size)) {
1495      emit_byte(0xEB);
1496      emit_byte((offs - short_size) & 0xFF);
1497    } else {
1498      emit_byte(0xE9);
1499      emit_long(offs - long_size);
1500    }
1501  } else {
1502    // By default, forward jumps are always 32-bit displacements, since
1503    // we can't yet know where the label will be bound.  If you're sure that
1504    // the forward jump will not run beyond 256 bytes, use jmpb to
1505    // force an 8-bit displacement.
1506    InstructionMark im(this);
1507    L.add_patch_at(code(), locator());
1508    emit_byte(0xE9);
1509    emit_long(0);
1510  }
1511}
1512
1513void Assembler::jmp(Register entry) {
1514  int encode = prefix_and_encode(entry->encoding());
1515  emit_byte(0xFF);
1516  emit_byte(0xE0 | encode);
1517}
1518
1519void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1520  InstructionMark im(this);
1521  emit_byte(0xE9);
1522  assert(dest != NULL, "must have a target");
1523  intptr_t disp = dest - (pc() + sizeof(int32_t));
1524  assert(is_simm32(disp), "must be 32bit offset (jmp)");
1525  emit_data(disp, rspec.reloc(), call32_operand);
1526}
1527
1528void Assembler::jmpb(Label& L) {
1529  if (L.is_bound()) {
1530    const int short_size = 2;
1531    address entry = target(L);
1532    assert(entry != NULL, "jmp most probably wrong");
1533#ifdef ASSERT
1534    intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
1535    intptr_t delta = short_branch_delta();
1536    if (delta != 0) {
1537      dist += (dist < 0 ? (-delta) :delta);
1538    }
1539    assert(is8bit(dist), "Dispacement too large for a short jmp");
1540#endif
1541    intptr_t offs = entry - pc();
1542    emit_byte(0xEB);
1543    emit_byte((offs - short_size) & 0xFF);
1544  } else {
1545    InstructionMark im(this);
1546    L.add_patch_at(code(), locator());
1547    emit_byte(0xEB);
1548    emit_byte(0);
1549  }
1550}
1551
1552void Assembler::ldmxcsr( Address src) {
1553  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1554  InstructionMark im(this);
1555  prefix(src);
1556  emit_byte(0x0F);
1557  emit_byte(0xAE);
1558  emit_operand(as_Register(2), src);
1559}
1560
1561void Assembler::leal(Register dst, Address src) {
1562  InstructionMark im(this);
1563#ifdef _LP64
1564  emit_byte(0x67); // addr32
1565  prefix(src, dst);
1566#endif // LP64
1567  emit_byte(0x8D);
1568  emit_operand(dst, src);
1569}
1570
1571void Assembler::lfence() {
1572  emit_byte(0x0F);
1573  emit_byte(0xAE);
1574  emit_byte(0xE8);
1575}
1576
1577void Assembler::lock() {
1578  emit_byte(0xF0);
1579}
1580
1581void Assembler::lzcntl(Register dst, Register src) {
1582  assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
1583  emit_byte(0xF3);
1584  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1585  emit_byte(0x0F);
1586  emit_byte(0xBD);
1587  emit_byte(0xC0 | encode);
1588}
1589
1590// Emit mfence instruction
1591void Assembler::mfence() {
1592  NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1593  emit_byte( 0x0F );
1594  emit_byte( 0xAE );
1595  emit_byte( 0xF0 );
1596}
1597
1598void Assembler::mov(Register dst, Register src) {
1599  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1600}
1601
1602void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1603  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1604  emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
1605}
1606
1607void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1608  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1609  emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
1610}
1611
1612void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
1613  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1614  int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE);
1615  emit_byte(0x16);
1616  emit_byte(0xC0 | encode);
1617}
1618
1619void Assembler::movb(Register dst, Address src) {
1620  NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1621  InstructionMark im(this);
1622  prefix(src, dst, true);
1623  emit_byte(0x8A);
1624  emit_operand(dst, src);
1625}
1626
1627
1628void Assembler::movb(Address dst, int imm8) {
1629  InstructionMark im(this);
1630   prefix(dst);
1631  emit_byte(0xC6);
1632  emit_operand(rax, dst, 1);
1633  emit_byte(imm8);
1634}
1635
1636
1637void Assembler::movb(Address dst, Register src) {
1638  assert(src->has_byte_register(), "must have byte register");
1639  InstructionMark im(this);
1640  prefix(dst, src, true);
1641  emit_byte(0x88);
1642  emit_operand(src, dst);
1643}
1644
1645void Assembler::movdl(XMMRegister dst, Register src) {
1646  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1647  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
1648  emit_byte(0x6E);
1649  emit_byte(0xC0 | encode);
1650}
1651
1652void Assembler::movdl(Register dst, XMMRegister src) {
1653  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1654  // swap src/dst to get correct prefix
1655  int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
1656  emit_byte(0x7E);
1657  emit_byte(0xC0 | encode);
1658}
1659
1660void Assembler::movdl(XMMRegister dst, Address src) {
1661  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1662  InstructionMark im(this);
1663  simd_prefix(dst, src, VEX_SIMD_66);
1664  emit_byte(0x6E);
1665  emit_operand(dst, src);
1666}
1667
1668void Assembler::movdl(Address dst, XMMRegister src) {
1669  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1670  InstructionMark im(this);
1671  simd_prefix(dst, src, VEX_SIMD_66);
1672  emit_byte(0x7E);
1673  emit_operand(src, dst);
1674}
1675
1676void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1677  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1678  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
1679}
1680
1681void Assembler::movdqu(XMMRegister dst, Address src) {
1682  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1683  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
1684}
1685
1686void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1687  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1688  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
1689}
1690
1691void Assembler::movdqu(Address dst, XMMRegister src) {
1692  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1693  InstructionMark im(this);
1694  simd_prefix(dst, src, VEX_SIMD_F3);
1695  emit_byte(0x7F);
1696  emit_operand(src, dst);
1697}
1698
1699// Move Unaligned 256bit Vector
1700void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
1701  assert(UseAVX, "");
1702  bool vector256 = true;
1703  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);
1704  emit_byte(0x6F);
1705  emit_byte(0xC0 | encode);
1706}
1707
1708void Assembler::vmovdqu(XMMRegister dst, Address src) {
1709  assert(UseAVX, "");
1710  InstructionMark im(this);
1711  bool vector256 = true;
1712  vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
1713  emit_byte(0x6F);
1714  emit_operand(dst, src);
1715}
1716
1717void Assembler::vmovdqu(Address dst, XMMRegister src) {
1718  assert(UseAVX, "");
1719  InstructionMark im(this);
1720  bool vector256 = true;
1721  // swap src<->dst for encoding
1722  assert(src != xnoreg, "sanity");
1723  vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256);
1724  emit_byte(0x7F);
1725  emit_operand(src, dst);
1726}
1727
1728// Uses zero extension on 64bit
1729
1730void Assembler::movl(Register dst, int32_t imm32) {
1731  int encode = prefix_and_encode(dst->encoding());
1732  emit_byte(0xB8 | encode);
1733  emit_long(imm32);
1734}
1735
1736void Assembler::movl(Register dst, Register src) {
1737  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1738  emit_byte(0x8B);
1739  emit_byte(0xC0 | encode);
1740}
1741
1742void Assembler::movl(Register dst, Address src) {
1743  InstructionMark im(this);
1744  prefix(src, dst);
1745  emit_byte(0x8B);
1746  emit_operand(dst, src);
1747}
1748
1749void Assembler::movl(Address dst, int32_t imm32) {
1750  InstructionMark im(this);
1751  prefix(dst);
1752  emit_byte(0xC7);
1753  emit_operand(rax, dst, 4);
1754  emit_long(imm32);
1755}
1756
1757void Assembler::movl(Address dst, Register src) {
1758  InstructionMark im(this);
1759  prefix(dst, src);
1760  emit_byte(0x89);
1761  emit_operand(src, dst);
1762}
1763
1764// New cpus require to use movsd and movss to avoid partial register stall
1765// when loading from memory. But for old Opteron use movlpd instead of movsd.
1766// The selection is done in MacroAssembler::movdbl() and movflt().
1767void Assembler::movlpd(XMMRegister dst, Address src) {
1768  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1769  emit_simd_arith(0x12, dst, src, VEX_SIMD_66);
1770}
1771
1772void Assembler::movq( MMXRegister dst, Address src ) {
1773  assert( VM_Version::supports_mmx(), "" );
1774  emit_byte(0x0F);
1775  emit_byte(0x6F);
1776  emit_operand(dst, src);
1777}
1778
1779void Assembler::movq( Address dst, MMXRegister src ) {
1780  assert( VM_Version::supports_mmx(), "" );
1781  emit_byte(0x0F);
1782  emit_byte(0x7F);
1783  // workaround gcc (3.2.1-7a) bug
1784  // In that version of gcc with only an emit_operand(MMX, Address)
1785  // gcc will tail jump and try and reverse the parameters completely
1786  // obliterating dst in the process. By having a version available
1787  // that doesn't need to swap the args at the tail jump the bug is
1788  // avoided.
1789  emit_operand(dst, src);
1790}
1791
1792void Assembler::movq(XMMRegister dst, Address src) {
1793  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1794  InstructionMark im(this);
1795  simd_prefix(dst, src, VEX_SIMD_F3);
1796  emit_byte(0x7E);
1797  emit_operand(dst, src);
1798}
1799
1800void Assembler::movq(Address dst, XMMRegister src) {
1801  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1802  InstructionMark im(this);
1803  simd_prefix(dst, src, VEX_SIMD_66);
1804  emit_byte(0xD6);
1805  emit_operand(src, dst);
1806}
1807
1808void Assembler::movsbl(Register dst, Address src) { // movsxb
1809  InstructionMark im(this);
1810  prefix(src, dst);
1811  emit_byte(0x0F);
1812  emit_byte(0xBE);
1813  emit_operand(dst, src);
1814}
1815
1816void Assembler::movsbl(Register dst, Register src) { // movsxb
1817  NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1818  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1819  emit_byte(0x0F);
1820  emit_byte(0xBE);
1821  emit_byte(0xC0 | encode);
1822}
1823
1824void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1825  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1826  emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
1827}
1828
1829void Assembler::movsd(XMMRegister dst, Address src) {
1830  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1831  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
1832}
1833
1834void Assembler::movsd(Address dst, XMMRegister src) {
1835  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1836  InstructionMark im(this);
1837  simd_prefix(dst, src, VEX_SIMD_F2);
1838  emit_byte(0x11);
1839  emit_operand(src, dst);
1840}
1841
1842void Assembler::movss(XMMRegister dst, XMMRegister src) {
1843  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1844  emit_simd_arith(0x10, dst, src, VEX_SIMD_F3);
1845}
1846
1847void Assembler::movss(XMMRegister dst, Address src) {
1848  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1849  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3);
1850}
1851
1852void Assembler::movss(Address dst, XMMRegister src) {
1853  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1854  InstructionMark im(this);
1855  simd_prefix(dst, src, VEX_SIMD_F3);
1856  emit_byte(0x11);
1857  emit_operand(src, dst);
1858}
1859
1860void Assembler::movswl(Register dst, Address src) { // movsxw
1861  InstructionMark im(this);
1862  prefix(src, dst);
1863  emit_byte(0x0F);
1864  emit_byte(0xBF);
1865  emit_operand(dst, src);
1866}
1867
1868void Assembler::movswl(Register dst, Register src) { // movsxw
1869  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1870  emit_byte(0x0F);
1871  emit_byte(0xBF);
1872  emit_byte(0xC0 | encode);
1873}
1874
1875void Assembler::movw(Address dst, int imm16) {
1876  InstructionMark im(this);
1877
1878  emit_byte(0x66); // switch to 16-bit mode
1879  prefix(dst);
1880  emit_byte(0xC7);
1881  emit_operand(rax, dst, 2);
1882  emit_word(imm16);
1883}
1884
1885void Assembler::movw(Register dst, Address src) {
1886  InstructionMark im(this);
1887  emit_byte(0x66);
1888  prefix(src, dst);
1889  emit_byte(0x8B);
1890  emit_operand(dst, src);
1891}
1892
1893void Assembler::movw(Address dst, Register src) {
1894  InstructionMark im(this);
1895  emit_byte(0x66);
1896  prefix(dst, src);
1897  emit_byte(0x89);
1898  emit_operand(src, dst);
1899}
1900
1901void Assembler::movzbl(Register dst, Address src) { // movzxb
1902  InstructionMark im(this);
1903  prefix(src, dst);
1904  emit_byte(0x0F);
1905  emit_byte(0xB6);
1906  emit_operand(dst, src);
1907}
1908
1909void Assembler::movzbl(Register dst, Register src) { // movzxb
1910  NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1911  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1912  emit_byte(0x0F);
1913  emit_byte(0xB6);
1914  emit_byte(0xC0 | encode);
1915}
1916
1917void Assembler::movzwl(Register dst, Address src) { // movzxw
1918  InstructionMark im(this);
1919  prefix(src, dst);
1920  emit_byte(0x0F);
1921  emit_byte(0xB7);
1922  emit_operand(dst, src);
1923}
1924
1925void Assembler::movzwl(Register dst, Register src) { // movzxw
1926  int encode = prefix_and_encode(dst->encoding(), src->encoding());
1927  emit_byte(0x0F);
1928  emit_byte(0xB7);
1929  emit_byte(0xC0 | encode);
1930}
1931
1932void Assembler::mull(Address src) {
1933  InstructionMark im(this);
1934  prefix(src);
1935  emit_byte(0xF7);
1936  emit_operand(rsp, src);
1937}
1938
1939void Assembler::mull(Register src) {
1940  int encode = prefix_and_encode(src->encoding());
1941  emit_byte(0xF7);
1942  emit_byte(0xE0 | encode);
1943}
1944
1945void Assembler::mulsd(XMMRegister dst, Address src) {
1946  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1947  emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
1948}
1949
1950void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
1951  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1952  emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
1953}
1954
1955void Assembler::mulss(XMMRegister dst, Address src) {
1956  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1957  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
1958}
1959
1960void Assembler::mulss(XMMRegister dst, XMMRegister src) {
1961  NOT_LP64(assert(VM_Version::supports_sse(), ""));
1962  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
1963}
1964
1965void Assembler::negl(Register dst) {
1966  int encode = prefix_and_encode(dst->encoding());
1967  emit_byte(0xF7);
1968  emit_byte(0xD8 | encode);
1969}
1970
1971void Assembler::nop(int i) {
1972#ifdef ASSERT
1973  assert(i > 0, " ");
1974  // The fancy nops aren't currently recognized by debuggers making it a
1975  // pain to disassemble code while debugging. If asserts are on clearly
1976  // speed is not an issue so simply use the single byte traditional nop
1977  // to do alignment.
1978
1979  for (; i > 0 ; i--) emit_byte(0x90);
1980  return;
1981
1982#endif // ASSERT
1983
1984  if (UseAddressNop && VM_Version::is_intel()) {
1985    //
1986    // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
1987    //  1: 0x90
1988    //  2: 0x66 0x90
1989    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1990    //  4: 0x0F 0x1F 0x40 0x00
1991    //  5: 0x0F 0x1F 0x44 0x00 0x00
1992    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1993    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1994    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1995    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1996    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1997    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1998
1999    // The rest coding is Intel specific - don't use consecutive address nops
2000
2001    // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2002    // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2003    // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2004    // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2005
2006    while(i >= 15) {
2007      // For Intel don't generate consecutive addess nops (mix with regular nops)
2008      i -= 15;
2009      emit_byte(0x66);   // size prefix
2010      emit_byte(0x66);   // size prefix
2011      emit_byte(0x66);   // size prefix
2012      addr_nop_8();
2013      emit_byte(0x66);   // size prefix
2014      emit_byte(0x66);   // size prefix
2015      emit_byte(0x66);   // size prefix
2016      emit_byte(0x90);   // nop
2017    }
2018    switch (i) {
2019      case 14:
2020        emit_byte(0x66); // size prefix
2021      case 13:
2022        emit_byte(0x66); // size prefix
2023      case 12:
2024        addr_nop_8();
2025        emit_byte(0x66); // size prefix
2026        emit_byte(0x66); // size prefix
2027        emit_byte(0x66); // size prefix
2028        emit_byte(0x90); // nop
2029        break;
2030      case 11:
2031        emit_byte(0x66); // size prefix
2032      case 10:
2033        emit_byte(0x66); // size prefix
2034      case 9:
2035        emit_byte(0x66); // size prefix
2036      case 8:
2037        addr_nop_8();
2038        break;
2039      case 7:
2040        addr_nop_7();
2041        break;
2042      case 6:
2043        emit_byte(0x66); // size prefix
2044      case 5:
2045        addr_nop_5();
2046        break;
2047      case 4:
2048        addr_nop_4();
2049        break;
2050      case 3:
2051        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2052        emit_byte(0x66); // size prefix
2053      case 2:
2054        emit_byte(0x66); // size prefix
2055      case 1:
2056        emit_byte(0x90); // nop
2057        break;
2058      default:
2059        assert(i == 0, " ");
2060    }
2061    return;
2062  }
2063  if (UseAddressNop && VM_Version::is_amd()) {
2064    //
2065    // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2066    //  1: 0x90
2067    //  2: 0x66 0x90
2068    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2069    //  4: 0x0F 0x1F 0x40 0x00
2070    //  5: 0x0F 0x1F 0x44 0x00 0x00
2071    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2072    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2073    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2074    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2075    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2076    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2077
2078    // The rest coding is AMD specific - use consecutive address nops
2079
2080    // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2081    // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2082    // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2083    // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2084    // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2085    //     Size prefixes (0x66) are added for larger sizes
2086
2087    while(i >= 22) {
2088      i -= 11;
2089      emit_byte(0x66); // size prefix
2090      emit_byte(0x66); // size prefix
2091      emit_byte(0x66); // size prefix
2092      addr_nop_8();
2093    }
2094    // Generate first nop for size between 21-12
2095    switch (i) {
2096      case 21:
2097        i -= 1;
2098        emit_byte(0x66); // size prefix
2099      case 20:
2100      case 19:
2101        i -= 1;
2102        emit_byte(0x66); // size prefix
2103      case 18:
2104      case 17:
2105        i -= 1;
2106        emit_byte(0x66); // size prefix
2107      case 16:
2108      case 15:
2109        i -= 8;
2110        addr_nop_8();
2111        break;
2112      case 14:
2113      case 13:
2114        i -= 7;
2115        addr_nop_7();
2116        break;
2117      case 12:
2118        i -= 6;
2119        emit_byte(0x66); // size prefix
2120        addr_nop_5();
2121        break;
2122      default:
2123        assert(i < 12, " ");
2124    }
2125
2126    // Generate second nop for size between 11-1
2127    switch (i) {
2128      case 11:
2129        emit_byte(0x66); // size prefix
2130      case 10:
2131        emit_byte(0x66); // size prefix
2132      case 9:
2133        emit_byte(0x66); // size prefix
2134      case 8:
2135        addr_nop_8();
2136        break;
2137      case 7:
2138        addr_nop_7();
2139        break;
2140      case 6:
2141        emit_byte(0x66); // size prefix
2142      case 5:
2143        addr_nop_5();
2144        break;
2145      case 4:
2146        addr_nop_4();
2147        break;
2148      case 3:
2149        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2150        emit_byte(0x66); // size prefix
2151      case 2:
2152        emit_byte(0x66); // size prefix
2153      case 1:
2154        emit_byte(0x90); // nop
2155        break;
2156      default:
2157        assert(i == 0, " ");
2158    }
2159    return;
2160  }
2161
2162  // Using nops with size prefixes "0x66 0x90".
2163  // From AMD Optimization Guide:
2164  //  1: 0x90
2165  //  2: 0x66 0x90
2166  //  3: 0x66 0x66 0x90
2167  //  4: 0x66 0x66 0x66 0x90
2168  //  5: 0x66 0x66 0x90 0x66 0x90
2169  //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2170  //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2171  //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2172  //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2173  // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2174  //
2175  while(i > 12) {
2176    i -= 4;
2177    emit_byte(0x66); // size prefix
2178    emit_byte(0x66);
2179    emit_byte(0x66);
2180    emit_byte(0x90); // nop
2181  }
2182  // 1 - 12 nops
2183  if(i > 8) {
2184    if(i > 9) {
2185      i -= 1;
2186      emit_byte(0x66);
2187    }
2188    i -= 3;
2189    emit_byte(0x66);
2190    emit_byte(0x66);
2191    emit_byte(0x90);
2192  }
2193  // 1 - 8 nops
2194  if(i > 4) {
2195    if(i > 6) {
2196      i -= 1;
2197      emit_byte(0x66);
2198    }
2199    i -= 3;
2200    emit_byte(0x66);
2201    emit_byte(0x66);
2202    emit_byte(0x90);
2203  }
2204  switch (i) {
2205    case 4:
2206      emit_byte(0x66);
2207    case 3:
2208      emit_byte(0x66);
2209    case 2:
2210      emit_byte(0x66);
2211    case 1:
2212      emit_byte(0x90);
2213      break;
2214    default:
2215      assert(i == 0, " ");
2216  }
2217}
2218
2219void Assembler::notl(Register dst) {
2220  int encode = prefix_and_encode(dst->encoding());
2221  emit_byte(0xF7);
2222  emit_byte(0xD0 | encode );
2223}
2224
2225void Assembler::orl(Address dst, int32_t imm32) {
2226  InstructionMark im(this);
2227  prefix(dst);
2228  emit_arith_operand(0x81, rcx, dst, imm32);
2229}
2230
2231void Assembler::orl(Register dst, int32_t imm32) {
2232  prefix(dst);
2233  emit_arith(0x81, 0xC8, dst, imm32);
2234}
2235
2236void Assembler::orl(Register dst, Address src) {
2237  InstructionMark im(this);
2238  prefix(src, dst);
2239  emit_byte(0x0B);
2240  emit_operand(dst, src);
2241}
2242
2243void Assembler::orl(Register dst, Register src) {
2244  (void) prefix_and_encode(dst->encoding(), src->encoding());
2245  emit_arith(0x0B, 0xC0, dst, src);
2246}
2247
2248void Assembler::packuswb(XMMRegister dst, Address src) {
2249  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2250  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2251  emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
2252}
2253
2254void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
2255  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2256  emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
2257}
2258
2259void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2260  assert(VM_Version::supports_sse4_2(), "");
2261  InstructionMark im(this);
2262  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
2263  emit_byte(0x61);
2264  emit_operand(dst, src);
2265  emit_byte(imm8);
2266}
2267
2268void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2269  assert(VM_Version::supports_sse4_2(), "");
2270  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
2271  emit_byte(0x61);
2272  emit_byte(0xC0 | encode);
2273  emit_byte(imm8);
2274}
2275
2276void Assembler::pmovzxbw(XMMRegister dst, Address src) {
2277  assert(VM_Version::supports_sse4_1(), "");
2278  InstructionMark im(this);
2279  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2280  emit_byte(0x30);
2281  emit_operand(dst, src);
2282}
2283
2284void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
2285  assert(VM_Version::supports_sse4_1(), "");
2286  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2287  emit_byte(0x30);
2288  emit_byte(0xC0 | encode);
2289}
2290
2291// generic
2292void Assembler::pop(Register dst) {
2293  int encode = prefix_and_encode(dst->encoding());
2294  emit_byte(0x58 | encode);
2295}
2296
2297void Assembler::popcntl(Register dst, Address src) {
2298  assert(VM_Version::supports_popcnt(), "must support");
2299  InstructionMark im(this);
2300  emit_byte(0xF3);
2301  prefix(src, dst);
2302  emit_byte(0x0F);
2303  emit_byte(0xB8);
2304  emit_operand(dst, src);
2305}
2306
2307void Assembler::popcntl(Register dst, Register src) {
2308  assert(VM_Version::supports_popcnt(), "must support");
2309  emit_byte(0xF3);
2310  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2311  emit_byte(0x0F);
2312  emit_byte(0xB8);
2313  emit_byte(0xC0 | encode);
2314}
2315
2316void Assembler::popf() {
2317  emit_byte(0x9D);
2318}
2319
2320#ifndef _LP64 // no 32bit push/pop on amd64
2321void Assembler::popl(Address dst) {
2322  // NOTE: this will adjust stack by 8byte on 64bits
2323  InstructionMark im(this);
2324  prefix(dst);
2325  emit_byte(0x8F);
2326  emit_operand(rax, dst);
2327}
2328#endif
2329
2330void Assembler::prefetch_prefix(Address src) {
2331  prefix(src);
2332  emit_byte(0x0F);
2333}
2334
2335void Assembler::prefetchnta(Address src) {
2336  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2337  InstructionMark im(this);
2338  prefetch_prefix(src);
2339  emit_byte(0x18);
2340  emit_operand(rax, src); // 0, src
2341}
2342
2343void Assembler::prefetchr(Address src) {
2344  assert(VM_Version::supports_3dnow_prefetch(), "must support");
2345  InstructionMark im(this);
2346  prefetch_prefix(src);
2347  emit_byte(0x0D);
2348  emit_operand(rax, src); // 0, src
2349}
2350
2351void Assembler::prefetcht0(Address src) {
2352  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2353  InstructionMark im(this);
2354  prefetch_prefix(src);
2355  emit_byte(0x18);
2356  emit_operand(rcx, src); // 1, src
2357}
2358
2359void Assembler::prefetcht1(Address src) {
2360  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2361  InstructionMark im(this);
2362  prefetch_prefix(src);
2363  emit_byte(0x18);
2364  emit_operand(rdx, src); // 2, src
2365}
2366
2367void Assembler::prefetcht2(Address src) {
2368  NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2369  InstructionMark im(this);
2370  prefetch_prefix(src);
2371  emit_byte(0x18);
2372  emit_operand(rbx, src); // 3, src
2373}
2374
2375void Assembler::prefetchw(Address src) {
2376  assert(VM_Version::supports_3dnow_prefetch(), "must support");
2377  InstructionMark im(this);
2378  prefetch_prefix(src);
2379  emit_byte(0x0D);
2380  emit_operand(rcx, src); // 1, src
2381}
2382
2383void Assembler::prefix(Prefix p) {
2384  a_byte(p);
2385}
2386
2387void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
2388  assert(VM_Version::supports_ssse3(), "");
2389  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2390  emit_byte(0x00);
2391  emit_byte(0xC0 | encode);
2392}
2393
2394void Assembler::pshufb(XMMRegister dst, Address src) {
2395  assert(VM_Version::supports_ssse3(), "");
2396  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2397  InstructionMark im(this);
2398  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2399  emit_byte(0x00);
2400  emit_operand(dst, src);
2401}
2402
2403void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2404  assert(isByte(mode), "invalid value");
2405  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2406  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
2407  emit_byte(mode & 0xFF);
2408
2409}
2410
2411void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2412  assert(isByte(mode), "invalid value");
2413  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2414  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2415  InstructionMark im(this);
2416  simd_prefix(dst, src, VEX_SIMD_66);
2417  emit_byte(0x70);
2418  emit_operand(dst, src);
2419  emit_byte(mode & 0xFF);
2420}
2421
2422void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2423  assert(isByte(mode), "invalid value");
2424  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2425  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
2426  emit_byte(mode & 0xFF);
2427}
2428
2429void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2430  assert(isByte(mode), "invalid value");
2431  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2432  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2433  InstructionMark im(this);
2434  simd_prefix(dst, src, VEX_SIMD_F2);
2435  emit_byte(0x70);
2436  emit_operand(dst, src);
2437  emit_byte(mode & 0xFF);
2438}
2439
2440void Assembler::psrldq(XMMRegister dst, int shift) {
2441  // Shift 128 bit value in xmm register by number of bytes.
2442  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2443  int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
2444  emit_byte(0x73);
2445  emit_byte(0xC0 | encode);
2446  emit_byte(shift);
2447}
2448
2449void Assembler::ptest(XMMRegister dst, Address src) {
2450  assert(VM_Version::supports_sse4_1(), "");
2451  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2452  InstructionMark im(this);
2453  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2454  emit_byte(0x17);
2455  emit_operand(dst, src);
2456}
2457
2458void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2459  assert(VM_Version::supports_sse4_1(), "");
2460  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2461  emit_byte(0x17);
2462  emit_byte(0xC0 | encode);
2463}
2464
2465void Assembler::punpcklbw(XMMRegister dst, Address src) {
2466  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2467  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2468  emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
2469}
2470
2471void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2472  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2473  emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
2474}
2475
2476void Assembler::punpckldq(XMMRegister dst, Address src) {
2477  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2478  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2479  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
2480}
2481
2482void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
2483  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2484  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
2485}
2486
2487void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
2488  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2489  emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
2490}
2491
2492void Assembler::push(int32_t imm32) {
2493  // in 64bits we push 64bits onto the stack but only
2494  // take a 32bit immediate
2495  emit_byte(0x68);
2496  emit_long(imm32);
2497}
2498
2499void Assembler::push(Register src) {
2500  int encode = prefix_and_encode(src->encoding());
2501
2502  emit_byte(0x50 | encode);
2503}
2504
2505void Assembler::pushf() {
2506  emit_byte(0x9C);
2507}
2508
2509#ifndef _LP64 // no 32bit push/pop on amd64
2510void Assembler::pushl(Address src) {
2511  // Note this will push 64bit on 64bit
2512  InstructionMark im(this);
2513  prefix(src);
2514  emit_byte(0xFF);
2515  emit_operand(rsi, src);
2516}
2517#endif
2518
2519void Assembler::rcll(Register dst, int imm8) {
2520  assert(isShiftCount(imm8), "illegal shift count");
2521  int encode = prefix_and_encode(dst->encoding());
2522  if (imm8 == 1) {
2523    emit_byte(0xD1);
2524    emit_byte(0xD0 | encode);
2525  } else {
2526    emit_byte(0xC1);
2527    emit_byte(0xD0 | encode);
2528    emit_byte(imm8);
2529  }
2530}
2531
2532// copies data from [esi] to [edi] using rcx pointer sized words
2533// generic
2534void Assembler::rep_mov() {
2535  emit_byte(0xF3);
2536  // MOVSQ
2537  LP64_ONLY(prefix(REX_W));
2538  emit_byte(0xA5);
2539}
2540
2541// sets rcx pointer sized words with rax, value at [edi]
2542// generic
2543void Assembler::rep_set() { // rep_set
2544  emit_byte(0xF3);
2545  // STOSQ
2546  LP64_ONLY(prefix(REX_W));
2547  emit_byte(0xAB);
2548}
2549
2550// scans rcx pointer sized words at [edi] for occurance of rax,
2551// generic
2552void Assembler::repne_scan() { // repne_scan
2553  emit_byte(0xF2);
2554  // SCASQ
2555  LP64_ONLY(prefix(REX_W));
2556  emit_byte(0xAF);
2557}
2558
2559#ifdef _LP64
2560// scans rcx 4 byte words at [edi] for occurance of rax,
2561// generic
2562void Assembler::repne_scanl() { // repne_scan
2563  emit_byte(0xF2);
2564  // SCASL
2565  emit_byte(0xAF);
2566}
2567#endif
2568
2569void Assembler::ret(int imm16) {
2570  if (imm16 == 0) {
2571    emit_byte(0xC3);
2572  } else {
2573    emit_byte(0xC2);
2574    emit_word(imm16);
2575  }
2576}
2577
2578void Assembler::sahf() {
2579#ifdef _LP64
2580  // Not supported in 64bit mode
2581  ShouldNotReachHere();
2582#endif
2583  emit_byte(0x9E);
2584}
2585
2586void Assembler::sarl(Register dst, int imm8) {
2587  int encode = prefix_and_encode(dst->encoding());
2588  assert(isShiftCount(imm8), "illegal shift count");
2589  if (imm8 == 1) {
2590    emit_byte(0xD1);
2591    emit_byte(0xF8 | encode);
2592  } else {
2593    emit_byte(0xC1);
2594    emit_byte(0xF8 | encode);
2595    emit_byte(imm8);
2596  }
2597}
2598
2599void Assembler::sarl(Register dst) {
2600  int encode = prefix_and_encode(dst->encoding());
2601  emit_byte(0xD3);
2602  emit_byte(0xF8 | encode);
2603}
2604
2605void Assembler::sbbl(Address dst, int32_t imm32) {
2606  InstructionMark im(this);
2607  prefix(dst);
2608  emit_arith_operand(0x81, rbx, dst, imm32);
2609}
2610
2611void Assembler::sbbl(Register dst, int32_t imm32) {
2612  prefix(dst);
2613  emit_arith(0x81, 0xD8, dst, imm32);
2614}
2615
2616
2617void Assembler::sbbl(Register dst, Address src) {
2618  InstructionMark im(this);
2619  prefix(src, dst);
2620  emit_byte(0x1B);
2621  emit_operand(dst, src);
2622}
2623
2624void Assembler::sbbl(Register dst, Register src) {
2625  (void) prefix_and_encode(dst->encoding(), src->encoding());
2626  emit_arith(0x1B, 0xC0, dst, src);
2627}
2628
2629void Assembler::setb(Condition cc, Register dst) {
2630  assert(0 <= cc && cc < 16, "illegal cc");
2631  int encode = prefix_and_encode(dst->encoding(), true);
2632  emit_byte(0x0F);
2633  emit_byte(0x90 | cc);
2634  emit_byte(0xC0 | encode);
2635}
2636
2637void Assembler::shll(Register dst, int imm8) {
2638  assert(isShiftCount(imm8), "illegal shift count");
2639  int encode = prefix_and_encode(dst->encoding());
2640  if (imm8 == 1 ) {
2641    emit_byte(0xD1);
2642    emit_byte(0xE0 | encode);
2643  } else {
2644    emit_byte(0xC1);
2645    emit_byte(0xE0 | encode);
2646    emit_byte(imm8);
2647  }
2648}
2649
2650void Assembler::shll(Register dst) {
2651  int encode = prefix_and_encode(dst->encoding());
2652  emit_byte(0xD3);
2653  emit_byte(0xE0 | encode);
2654}
2655
2656void Assembler::shrl(Register dst, int imm8) {
2657  assert(isShiftCount(imm8), "illegal shift count");
2658  int encode = prefix_and_encode(dst->encoding());
2659  emit_byte(0xC1);
2660  emit_byte(0xE8 | encode);
2661  emit_byte(imm8);
2662}
2663
2664void Assembler::shrl(Register dst) {
2665  int encode = prefix_and_encode(dst->encoding());
2666  emit_byte(0xD3);
2667  emit_byte(0xE8 | encode);
2668}
2669
2670// copies a single word from [esi] to [edi]
2671void Assembler::smovl() {
2672  emit_byte(0xA5);
2673}
2674
2675void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2676  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2677  emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
2678}
2679
2680void Assembler::sqrtsd(XMMRegister dst, Address src) {
2681  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2682  emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
2683}
2684
2685void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
2686  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2687  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
2688}
2689
2690void Assembler::std() {
2691  emit_byte(0xfd);
2692}
2693
2694void Assembler::sqrtss(XMMRegister dst, Address src) {
2695  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2696  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
2697}
2698
2699void Assembler::stmxcsr( Address dst) {
2700  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2701  InstructionMark im(this);
2702  prefix(dst);
2703  emit_byte(0x0F);
2704  emit_byte(0xAE);
2705  emit_operand(as_Register(3), dst);
2706}
2707
2708void Assembler::subl(Address dst, int32_t imm32) {
2709  InstructionMark im(this);
2710  prefix(dst);
2711  emit_arith_operand(0x81, rbp, dst, imm32);
2712}
2713
2714void Assembler::subl(Address dst, Register src) {
2715  InstructionMark im(this);
2716  prefix(dst, src);
2717  emit_byte(0x29);
2718  emit_operand(src, dst);
2719}
2720
2721void Assembler::subl(Register dst, int32_t imm32) {
2722  prefix(dst);
2723  emit_arith(0x81, 0xE8, dst, imm32);
2724}
2725
2726// Force generation of a 4 byte immediate value even if it fits into 8bit
2727void Assembler::subl_imm32(Register dst, int32_t imm32) {
2728  prefix(dst);
2729  emit_arith_imm32(0x81, 0xE8, dst, imm32);
2730}
2731
2732void Assembler::subl(Register dst, Address src) {
2733  InstructionMark im(this);
2734  prefix(src, dst);
2735  emit_byte(0x2B);
2736  emit_operand(dst, src);
2737}
2738
2739void Assembler::subl(Register dst, Register src) {
2740  (void) prefix_and_encode(dst->encoding(), src->encoding());
2741  emit_arith(0x2B, 0xC0, dst, src);
2742}
2743
2744void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2745  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2746  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
2747}
2748
2749void Assembler::subsd(XMMRegister dst, Address src) {
2750  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2751  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
2752}
2753
2754void Assembler::subss(XMMRegister dst, XMMRegister src) {
2755  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2756  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
2757}
2758
2759void Assembler::subss(XMMRegister dst, Address src) {
2760  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2761  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
2762}
2763
2764void Assembler::testb(Register dst, int imm8) {
2765  NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2766  (void) prefix_and_encode(dst->encoding(), true);
2767  emit_arith_b(0xF6, 0xC0, dst, imm8);
2768}
2769
2770void Assembler::testl(Register dst, int32_t imm32) {
2771  // not using emit_arith because test
2772  // doesn't support sign-extension of
2773  // 8bit operands
2774  int encode = dst->encoding();
2775  if (encode == 0) {
2776    emit_byte(0xA9);
2777  } else {
2778    encode = prefix_and_encode(encode);
2779    emit_byte(0xF7);
2780    emit_byte(0xC0 | encode);
2781  }
2782  emit_long(imm32);
2783}
2784
2785void Assembler::testl(Register dst, Register src) {
2786  (void) prefix_and_encode(dst->encoding(), src->encoding());
2787  emit_arith(0x85, 0xC0, dst, src);
2788}
2789
2790void Assembler::testl(Register dst, Address  src) {
2791  InstructionMark im(this);
2792  prefix(src, dst);
2793  emit_byte(0x85);
2794  emit_operand(dst, src);
2795}
2796
2797void Assembler::ucomisd(XMMRegister dst, Address src) {
2798  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2799  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
2800}
2801
2802void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2803  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2804  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
2805}
2806
2807void Assembler::ucomiss(XMMRegister dst, Address src) {
2808  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2809  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
2810}
2811
2812void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2813  NOT_LP64(assert(VM_Version::supports_sse(), ""));
2814  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
2815}
2816
2817
2818void Assembler::xaddl(Address dst, Register src) {
2819  InstructionMark im(this);
2820  prefix(dst, src);
2821  emit_byte(0x0F);
2822  emit_byte(0xC1);
2823  emit_operand(src, dst);
2824}
2825
2826void Assembler::xchgl(Register dst, Address src) { // xchg
2827  InstructionMark im(this);
2828  prefix(src, dst);
2829  emit_byte(0x87);
2830  emit_operand(dst, src);
2831}
2832
2833void Assembler::xchgl(Register dst, Register src) {
2834  int encode = prefix_and_encode(dst->encoding(), src->encoding());
2835  emit_byte(0x87);
2836  emit_byte(0xc0 | encode);
2837}
2838
2839void Assembler::xgetbv() {
2840  emit_byte(0x0F);
2841  emit_byte(0x01);
2842  emit_byte(0xD0);
2843}
2844
2845void Assembler::xorl(Register dst, int32_t imm32) {
2846  prefix(dst);
2847  emit_arith(0x81, 0xF0, dst, imm32);
2848}
2849
2850void Assembler::xorl(Register dst, Address src) {
2851  InstructionMark im(this);
2852  prefix(src, dst);
2853  emit_byte(0x33);
2854  emit_operand(dst, src);
2855}
2856
2857void Assembler::xorl(Register dst, Register src) {
2858  (void) prefix_and_encode(dst->encoding(), src->encoding());
2859  emit_arith(0x33, 0xC0, dst, src);
2860}
2861
2862
2863// AVX 3-operands scalar float-point arithmetic instructions
2864
2865void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
2866  assert(VM_Version::supports_avx(), "");
2867  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2868}
2869
2870void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2871  assert(VM_Version::supports_avx(), "");
2872  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2873}
2874
2875void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
2876  assert(VM_Version::supports_avx(), "");
2877  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2878}
2879
2880void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2881  assert(VM_Version::supports_avx(), "");
2882  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2883}
2884
2885void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
2886  assert(VM_Version::supports_avx(), "");
2887  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2888}
2889
2890void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2891  assert(VM_Version::supports_avx(), "");
2892  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2893}
2894
2895void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
2896  assert(VM_Version::supports_avx(), "");
2897  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2898}
2899
2900void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2901  assert(VM_Version::supports_avx(), "");
2902  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2903}
2904
2905void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
2906  assert(VM_Version::supports_avx(), "");
2907  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2908}
2909
2910void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2911  assert(VM_Version::supports_avx(), "");
2912  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2913}
2914
2915void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
2916  assert(VM_Version::supports_avx(), "");
2917  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2918}
2919
2920void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2921  assert(VM_Version::supports_avx(), "");
2922  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2923}
2924
2925void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
2926  assert(VM_Version::supports_avx(), "");
2927  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2928}
2929
2930void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2931  assert(VM_Version::supports_avx(), "");
2932  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2933}
2934
2935void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
2936  assert(VM_Version::supports_avx(), "");
2937  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2938}
2939
2940void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2941  assert(VM_Version::supports_avx(), "");
2942  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2943}
2944
2945//====================VECTOR ARITHMETIC=====================================
2946
2947// Float-point vector arithmetic
2948
2949void Assembler::addpd(XMMRegister dst, XMMRegister src) {
2950  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2951  emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
2952}
2953
2954void Assembler::addps(XMMRegister dst, XMMRegister src) {
2955  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2956  emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
2957}
2958
2959void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2960  assert(VM_Version::supports_avx(), "");
2961  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
2962}
2963
2964void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2965  assert(VM_Version::supports_avx(), "");
2966  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
2967}
2968
2969void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
2970  assert(VM_Version::supports_avx(), "");
2971  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
2972}
2973
2974void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
2975  assert(VM_Version::supports_avx(), "");
2976  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
2977}
2978
2979void Assembler::subpd(XMMRegister dst, XMMRegister src) {
2980  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2981  emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
2982}
2983
2984void Assembler::subps(XMMRegister dst, XMMRegister src) {
2985  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2986  emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
2987}
2988
2989void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2990  assert(VM_Version::supports_avx(), "");
2991  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
2992}
2993
2994void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2995  assert(VM_Version::supports_avx(), "");
2996  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
2997}
2998
2999void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3000  assert(VM_Version::supports_avx(), "");
3001  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
3002}
3003
3004void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3005  assert(VM_Version::supports_avx(), "");
3006  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
3007}
3008
3009void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
3010  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3011  emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
3012}
3013
3014void Assembler::mulps(XMMRegister dst, XMMRegister src) {
3015  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3016  emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
3017}
3018
3019void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3020  assert(VM_Version::supports_avx(), "");
3021  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
3022}
3023
3024void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3025  assert(VM_Version::supports_avx(), "");
3026  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
3027}
3028
3029void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3030  assert(VM_Version::supports_avx(), "");
3031  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
3032}
3033
3034void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3035  assert(VM_Version::supports_avx(), "");
3036  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
3037}
3038
3039void Assembler::divpd(XMMRegister dst, XMMRegister src) {
3040  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3041  emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
3042}
3043
3044void Assembler::divps(XMMRegister dst, XMMRegister src) {
3045  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3046  emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
3047}
3048
3049void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3050  assert(VM_Version::supports_avx(), "");
3051  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
3052}
3053
3054void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3055  assert(VM_Version::supports_avx(), "");
3056  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
3057}
3058
3059void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3060  assert(VM_Version::supports_avx(), "");
3061  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
3062}
3063
3064void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3065  assert(VM_Version::supports_avx(), "");
3066  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
3067}
3068
3069void Assembler::andpd(XMMRegister dst, XMMRegister src) {
3070  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3071  emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
3072}
3073
3074void Assembler::andps(XMMRegister dst, XMMRegister src) {
3075  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3076  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
3077}
3078
3079void Assembler::andps(XMMRegister dst, Address src) {
3080  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3081  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
3082}
3083
3084void Assembler::andpd(XMMRegister dst, Address src) {
3085  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3086  emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
3087}
3088
3089void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3090  assert(VM_Version::supports_avx(), "");
3091  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
3092}
3093
3094void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3095  assert(VM_Version::supports_avx(), "");
3096  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
3097}
3098
3099void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3100  assert(VM_Version::supports_avx(), "");
3101  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
3102}
3103
3104void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3105  assert(VM_Version::supports_avx(), "");
3106  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
3107}
3108
3109void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
3110  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3111  emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
3112}
3113
3114void Assembler::xorps(XMMRegister dst, XMMRegister src) {
3115  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3116  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
3117}
3118
3119void Assembler::xorpd(XMMRegister dst, Address src) {
3120  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3121  emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
3122}
3123
3124void Assembler::xorps(XMMRegister dst, Address src) {
3125  NOT_LP64(assert(VM_Version::supports_sse(), ""));
3126  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
3127}
3128
3129void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3130  assert(VM_Version::supports_avx(), "");
3131  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
3132}
3133
3134void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3135  assert(VM_Version::supports_avx(), "");
3136  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
3137}
3138
3139void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3140  assert(VM_Version::supports_avx(), "");
3141  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
3142}
3143
3144void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3145  assert(VM_Version::supports_avx(), "");
3146  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
3147}
3148
3149
3150// Integer vector arithmetic
3151void Assembler::paddb(XMMRegister dst, XMMRegister src) {
3152  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3153  emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
3154}
3155
3156void Assembler::paddw(XMMRegister dst, XMMRegister src) {
3157  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3158  emit_simd_arith(0xFD, dst, src, VEX_SIMD_66);
3159}
3160
3161void Assembler::paddd(XMMRegister dst, XMMRegister src) {
3162  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3163  emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
3164}
3165
3166void Assembler::paddq(XMMRegister dst, XMMRegister src) {
3167  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3168  emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
3169}
3170
3171void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3172  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3173  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
3174}
3175
3176void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3177  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3178  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
3179}
3180
3181void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3182  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3183  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
3184}
3185
3186void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3187  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3188  emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
3189}
3190
3191void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3192  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3193  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
3194}
3195
3196void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3197  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3198  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
3199}
3200
3201void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3202  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3203  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
3204}
3205
3206void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3207  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3208  emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
3209}
3210
3211void Assembler::psubb(XMMRegister dst, XMMRegister src) {
3212  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3213  emit_simd_arith(0xF8, dst, src, VEX_SIMD_66);
3214}
3215
3216void Assembler::psubw(XMMRegister dst, XMMRegister src) {
3217  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3218  emit_simd_arith(0xF9, dst, src, VEX_SIMD_66);
3219}
3220
3221void Assembler::psubd(XMMRegister dst, XMMRegister src) {
3222  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3223  emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
3224}
3225
3226void Assembler::psubq(XMMRegister dst, XMMRegister src) {
3227  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3228  emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
3229}
3230
3231void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3232  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3233  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
3234}
3235
3236void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3237  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3238  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
3239}
3240
3241void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3242  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3243  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
3244}
3245
3246void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3247  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3248  emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
3249}
3250
3251void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3252  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3253  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
3254}
3255
3256void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3257  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3258  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
3259}
3260
3261void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3262  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3263  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
3264}
3265
3266void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3267  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3268  emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
3269}
3270
3271void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
3272  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3273  emit_simd_arith(0xD5, dst, src, VEX_SIMD_66);
3274}
3275
3276void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
3277  assert(VM_Version::supports_sse4_1(), "");
3278  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
3279  emit_byte(0x40);
3280  emit_byte(0xC0 | encode);
3281}
3282
3283void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3284  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3285  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
3286}
3287
3288void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3289  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3290  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
3291  emit_byte(0x40);
3292  emit_byte(0xC0 | encode);
3293}
3294
3295void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3296  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3297  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
3298}
3299
3300void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3301  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3302  InstructionMark im(this);
3303  int dst_enc = dst->encoding();
3304  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
3305  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
3306  emit_byte(0x40);
3307  emit_operand(dst, src);
3308}
3309
3310// Shift packed integers left by specified number of bits.
3311void Assembler::psllw(XMMRegister dst, int shift) {
3312  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3313  // XMM6 is for /6 encoding: 66 0F 71 /6 ib
3314  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3315  emit_byte(0x71);
3316  emit_byte(0xC0 | encode);
3317  emit_byte(shift & 0xFF);
3318}
3319
3320void Assembler::pslld(XMMRegister dst, int shift) {
3321  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3322  // XMM6 is for /6 encoding: 66 0F 72 /6 ib
3323  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3324  emit_byte(0x72);
3325  emit_byte(0xC0 | encode);
3326  emit_byte(shift & 0xFF);
3327}
3328
3329void Assembler::psllq(XMMRegister dst, int shift) {
3330  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3331  // XMM6 is for /6 encoding: 66 0F 73 /6 ib
3332  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3333  emit_byte(0x73);
3334  emit_byte(0xC0 | encode);
3335  emit_byte(shift & 0xFF);
3336}
3337
3338void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
3339  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3340  emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66);
3341}
3342
3343void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
3344  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3345  emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
3346}
3347
3348void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
3349  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3350  emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
3351}
3352
3353void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3354  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3355  // XMM6 is for /6 encoding: 66 0F 71 /6 ib
3356  emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
3357  emit_byte(shift & 0xFF);
3358}
3359
3360void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3361  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3362  // XMM6 is for /6 encoding: 66 0F 72 /6 ib
3363  emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
3364  emit_byte(shift & 0xFF);
3365}
3366
3367void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3368  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3369  // XMM6 is for /6 encoding: 66 0F 73 /6 ib
3370  emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);
3371  emit_byte(shift & 0xFF);
3372}
3373
3374void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3375  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3376  emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256);
3377}
3378
3379void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3380  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3381  emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256);
3382}
3383
3384void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3385  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3386  emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256);
3387}
3388
3389// Shift packed integers logically right by specified number of bits.
3390void Assembler::psrlw(XMMRegister dst, int shift) {
3391  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3392  // XMM2 is for /2 encoding: 66 0F 71 /2 ib
3393  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3394  emit_byte(0x71);
3395  emit_byte(0xC0 | encode);
3396  emit_byte(shift & 0xFF);
3397}
3398
3399void Assembler::psrld(XMMRegister dst, int shift) {
3400  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3401  // XMM2 is for /2 encoding: 66 0F 72 /2 ib
3402  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3403  emit_byte(0x72);
3404  emit_byte(0xC0 | encode);
3405  emit_byte(shift & 0xFF);
3406}
3407
3408void Assembler::psrlq(XMMRegister dst, int shift) {
3409  // Do not confuse it with psrldq SSE2 instruction which
3410  // shifts 128 bit value in xmm register by number of bytes.
3411  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3412  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3413  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3414  emit_byte(0x73);
3415  emit_byte(0xC0 | encode);
3416  emit_byte(shift & 0xFF);
3417}
3418
3419void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
3420  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3421  emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66);
3422}
3423
3424void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
3425  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3426  emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
3427}
3428
3429void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
3430  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3431  emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
3432}
3433
3434void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3435  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3436  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3437  emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
3438  emit_byte(shift & 0xFF);
3439}
3440
3441void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3442  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3443  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3444  emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
3445  emit_byte(shift & 0xFF);
3446}
3447
3448void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3449  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3450  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3451  emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);
3452  emit_byte(shift & 0xFF);
3453}
3454
3455void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3456  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3457  emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256);
3458}
3459
3460void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3461  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3462  emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256);
3463}
3464
3465void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3466  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3467  emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256);
3468}
3469
3470// Shift packed integers arithmetically right by specified number of bits.
3471void Assembler::psraw(XMMRegister dst, int shift) {
3472  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3473  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3474  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
3475  emit_byte(0x71);
3476  emit_byte(0xC0 | encode);
3477  emit_byte(shift & 0xFF);
3478}
3479
3480void Assembler::psrad(XMMRegister dst, int shift) {
3481  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3482  // XMM4 is for /4 encoding: 66 0F 72 /4 ib
3483  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
3484  emit_byte(0x72);
3485  emit_byte(0xC0 | encode);
3486  emit_byte(shift & 0xFF);
3487}
3488
3489void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
3490  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3491  emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66);
3492}
3493
3494void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
3495  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3496  emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
3497}
3498
3499void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3500  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3501  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3502  emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
3503  emit_byte(shift & 0xFF);
3504}
3505
3506void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3507  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3508  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3509  emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
3510  emit_byte(shift & 0xFF);
3511}
3512
3513void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3514  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3515  emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256);
3516}
3517
3518void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3519  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3520  emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256);
3521}
3522
3523
3524// AND packed integers
3525void Assembler::pand(XMMRegister dst, XMMRegister src) {
3526  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3527  emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
3528}
3529
3530void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3531  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3532  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
3533}
3534
3535void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3536  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3537  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
3538}
3539
3540void Assembler::por(XMMRegister dst, XMMRegister src) {
3541  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3542  emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
3543}
3544
3545void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3546  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3547  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
3548}
3549
3550void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3551  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3552  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
3553}
3554
3555void Assembler::pxor(XMMRegister dst, XMMRegister src) {
3556  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3557  emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
3558}
3559
3560void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3561  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3562  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
3563}
3564
3565void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3566  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3567  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
3568}
3569
3570
3571void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3572  assert(VM_Version::supports_avx(), "");
3573  bool vector256 = true;
3574  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3575  emit_byte(0x18);
3576  emit_byte(0xC0 | encode);
3577  // 0x00 - insert into lower 128 bits
3578  // 0x01 - insert into upper 128 bits
3579  emit_byte(0x01);
3580}
3581
3582void Assembler::vinsertf128h(XMMRegister dst, Address src) {
3583  assert(VM_Version::supports_avx(), "");
3584  InstructionMark im(this);
3585  bool vector256 = true;
3586  assert(dst != xnoreg, "sanity");
3587  int dst_enc = dst->encoding();
3588  // swap src<->dst for encoding
3589  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3590  emit_byte(0x18);
3591  emit_operand(dst, src);
3592  // 0x01 - insert into upper 128 bits
3593  emit_byte(0x01);
3594}
3595
3596void Assembler::vextractf128h(Address dst, XMMRegister src) {
3597  assert(VM_Version::supports_avx(), "");
3598  InstructionMark im(this);
3599  bool vector256 = true;
3600  assert(src != xnoreg, "sanity");
3601  int src_enc = src->encoding();
3602  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3603  emit_byte(0x19);
3604  emit_operand(src, dst);
3605  // 0x01 - extract from upper 128 bits
3606  emit_byte(0x01);
3607}
3608
3609void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3610  assert(VM_Version::supports_avx2(), "");
3611  bool vector256 = true;
3612  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3613  emit_byte(0x38);
3614  emit_byte(0xC0 | encode);
3615  // 0x00 - insert into lower 128 bits
3616  // 0x01 - insert into upper 128 bits
3617  emit_byte(0x01);
3618}
3619
3620void Assembler::vinserti128h(XMMRegister dst, Address src) {
3621  assert(VM_Version::supports_avx2(), "");
3622  InstructionMark im(this);
3623  bool vector256 = true;
3624  assert(dst != xnoreg, "sanity");
3625  int dst_enc = dst->encoding();
3626  // swap src<->dst for encoding
3627  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3628  emit_byte(0x38);
3629  emit_operand(dst, src);
3630  // 0x01 - insert into upper 128 bits
3631  emit_byte(0x01);
3632}
3633
3634void Assembler::vextracti128h(Address dst, XMMRegister src) {
3635  assert(VM_Version::supports_avx2(), "");
3636  InstructionMark im(this);
3637  bool vector256 = true;
3638  assert(src != xnoreg, "sanity");
3639  int src_enc = src->encoding();
3640  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3641  emit_byte(0x39);
3642  emit_operand(src, dst);
3643  // 0x01 - extract from upper 128 bits
3644  emit_byte(0x01);
3645}
3646
3647void Assembler::vzeroupper() {
3648  assert(VM_Version::supports_avx(), "");
3649  (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
3650  emit_byte(0x77);
3651}
3652
3653
3654#ifndef _LP64
3655// 32bit only pieces of the assembler
3656
3657void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
3658  // NO PREFIX AS NEVER 64BIT
3659  InstructionMark im(this);
3660  emit_byte(0x81);
3661  emit_byte(0xF8 | src1->encoding());
3662  emit_data(imm32, rspec, 0);
3663}
3664
3665void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
3666  // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
3667  InstructionMark im(this);
3668  emit_byte(0x81);
3669  emit_operand(rdi, src1);
3670  emit_data(imm32, rspec, 0);
3671}
3672
3673// The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
3674// and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
3675// into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
3676void Assembler::cmpxchg8(Address adr) {
3677  InstructionMark im(this);
3678  emit_byte(0x0F);
3679  emit_byte(0xc7);
3680  emit_operand(rcx, adr);
3681}
3682
3683void Assembler::decl(Register dst) {
3684  // Don't use it directly. Use MacroAssembler::decrementl() instead.
3685 emit_byte(0x48 | dst->encoding());
3686}
3687
3688#endif // _LP64
3689
3690// 64bit typically doesn't use the x87 but needs to for the trig funcs
3691
3692void Assembler::fabs() {
3693  emit_byte(0xD9);
3694  emit_byte(0xE1);
3695}
3696
3697void Assembler::fadd(int i) {
3698  emit_farith(0xD8, 0xC0, i);
3699}
3700
3701void Assembler::fadd_d(Address src) {
3702  InstructionMark im(this);
3703  emit_byte(0xDC);
3704  emit_operand32(rax, src);
3705}
3706
3707void Assembler::fadd_s(Address src) {
3708  InstructionMark im(this);
3709  emit_byte(0xD8);
3710  emit_operand32(rax, src);
3711}
3712
3713void Assembler::fadda(int i) {
3714  emit_farith(0xDC, 0xC0, i);
3715}
3716
3717void Assembler::faddp(int i) {
3718  emit_farith(0xDE, 0xC0, i);
3719}
3720
3721void Assembler::fchs() {
3722  emit_byte(0xD9);
3723  emit_byte(0xE0);
3724}
3725
3726void Assembler::fcom(int i) {
3727  emit_farith(0xD8, 0xD0, i);
3728}
3729
3730void Assembler::fcomp(int i) {
3731  emit_farith(0xD8, 0xD8, i);
3732}
3733
3734void Assembler::fcomp_d(Address src) {
3735  InstructionMark im(this);
3736  emit_byte(0xDC);
3737  emit_operand32(rbx, src);
3738}
3739
3740void Assembler::fcomp_s(Address src) {
3741  InstructionMark im(this);
3742  emit_byte(0xD8);
3743  emit_operand32(rbx, src);
3744}
3745
3746void Assembler::fcompp() {
3747  emit_byte(0xDE);
3748  emit_byte(0xD9);
3749}
3750
3751void Assembler::fcos() {
3752  emit_byte(0xD9);
3753  emit_byte(0xFF);
3754}
3755
3756void Assembler::fdecstp() {
3757  emit_byte(0xD9);
3758  emit_byte(0xF6);
3759}
3760
3761void Assembler::fdiv(int i) {
3762  emit_farith(0xD8, 0xF0, i);
3763}
3764
3765void Assembler::fdiv_d(Address src) {
3766  InstructionMark im(this);
3767  emit_byte(0xDC);
3768  emit_operand32(rsi, src);
3769}
3770
3771void Assembler::fdiv_s(Address src) {
3772  InstructionMark im(this);
3773  emit_byte(0xD8);
3774  emit_operand32(rsi, src);
3775}
3776
3777void Assembler::fdiva(int i) {
3778  emit_farith(0xDC, 0xF8, i);
3779}
3780
3781// Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
3782//       is erroneous for some of the floating-point instructions below.
3783
3784void Assembler::fdivp(int i) {
3785  emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
3786}
3787
3788void Assembler::fdivr(int i) {
3789  emit_farith(0xD8, 0xF8, i);
3790}
3791
3792void Assembler::fdivr_d(Address src) {
3793  InstructionMark im(this);
3794  emit_byte(0xDC);
3795  emit_operand32(rdi, src);
3796}
3797
3798void Assembler::fdivr_s(Address src) {
3799  InstructionMark im(this);
3800  emit_byte(0xD8);
3801  emit_operand32(rdi, src);
3802}
3803
3804void Assembler::fdivra(int i) {
3805  emit_farith(0xDC, 0xF0, i);
3806}
3807
3808void Assembler::fdivrp(int i) {
3809  emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
3810}
3811
3812void Assembler::ffree(int i) {
3813  emit_farith(0xDD, 0xC0, i);
3814}
3815
3816void Assembler::fild_d(Address adr) {
3817  InstructionMark im(this);
3818  emit_byte(0xDF);
3819  emit_operand32(rbp, adr);
3820}
3821
3822void Assembler::fild_s(Address adr) {
3823  InstructionMark im(this);
3824  emit_byte(0xDB);
3825  emit_operand32(rax, adr);
3826}
3827
3828void Assembler::fincstp() {
3829  emit_byte(0xD9);
3830  emit_byte(0xF7);
3831}
3832
3833void Assembler::finit() {
3834  emit_byte(0x9B);
3835  emit_byte(0xDB);
3836  emit_byte(0xE3);
3837}
3838
3839void Assembler::fist_s(Address adr) {
3840  InstructionMark im(this);
3841  emit_byte(0xDB);
3842  emit_operand32(rdx, adr);
3843}
3844
3845void Assembler::fistp_d(Address adr) {
3846  InstructionMark im(this);
3847  emit_byte(0xDF);
3848  emit_operand32(rdi, adr);
3849}
3850
3851void Assembler::fistp_s(Address adr) {
3852  InstructionMark im(this);
3853  emit_byte(0xDB);
3854  emit_operand32(rbx, adr);
3855}
3856
3857void Assembler::fld1() {
3858  emit_byte(0xD9);
3859  emit_byte(0xE8);
3860}
3861
3862void Assembler::fld_d(Address adr) {
3863  InstructionMark im(this);
3864  emit_byte(0xDD);
3865  emit_operand32(rax, adr);
3866}
3867
3868void Assembler::fld_s(Address adr) {
3869  InstructionMark im(this);
3870  emit_byte(0xD9);
3871  emit_operand32(rax, adr);
3872}
3873
3874
3875void Assembler::fld_s(int index) {
3876  emit_farith(0xD9, 0xC0, index);
3877}
3878
3879void Assembler::fld_x(Address adr) {
3880  InstructionMark im(this);
3881  emit_byte(0xDB);
3882  emit_operand32(rbp, adr);
3883}
3884
3885void Assembler::fldcw(Address src) {
3886  InstructionMark im(this);
3887  emit_byte(0xd9);
3888  emit_operand32(rbp, src);
3889}
3890
3891void Assembler::fldenv(Address src) {
3892  InstructionMark im(this);
3893  emit_byte(0xD9);
3894  emit_operand32(rsp, src);
3895}
3896
3897void Assembler::fldlg2() {
3898  emit_byte(0xD9);
3899  emit_byte(0xEC);
3900}
3901
3902void Assembler::fldln2() {
3903  emit_byte(0xD9);
3904  emit_byte(0xED);
3905}
3906
3907void Assembler::fldz() {
3908  emit_byte(0xD9);
3909  emit_byte(0xEE);
3910}
3911
3912void Assembler::flog() {
3913  fldln2();
3914  fxch();
3915  fyl2x();
3916}
3917
3918void Assembler::flog10() {
3919  fldlg2();
3920  fxch();
3921  fyl2x();
3922}
3923
3924void Assembler::fmul(int i) {
3925  emit_farith(0xD8, 0xC8, i);
3926}
3927
3928void Assembler::fmul_d(Address src) {
3929  InstructionMark im(this);
3930  emit_byte(0xDC);
3931  emit_operand32(rcx, src);
3932}
3933
3934void Assembler::fmul_s(Address src) {
3935  InstructionMark im(this);
3936  emit_byte(0xD8);
3937  emit_operand32(rcx, src);
3938}
3939
3940void Assembler::fmula(int i) {
3941  emit_farith(0xDC, 0xC8, i);
3942}
3943
3944void Assembler::fmulp(int i) {
3945  emit_farith(0xDE, 0xC8, i);
3946}
3947
3948void Assembler::fnsave(Address dst) {
3949  InstructionMark im(this);
3950  emit_byte(0xDD);
3951  emit_operand32(rsi, dst);
3952}
3953
3954void Assembler::fnstcw(Address src) {
3955  InstructionMark im(this);
3956  emit_byte(0x9B);
3957  emit_byte(0xD9);
3958  emit_operand32(rdi, src);
3959}
3960
3961void Assembler::fnstsw_ax() {
3962  emit_byte(0xdF);
3963  emit_byte(0xE0);
3964}
3965
3966void Assembler::fprem() {
3967  emit_byte(0xD9);
3968  emit_byte(0xF8);
3969}
3970
3971void Assembler::fprem1() {
3972  emit_byte(0xD9);
3973  emit_byte(0xF5);
3974}
3975
3976void Assembler::frstor(Address src) {
3977  InstructionMark im(this);
3978  emit_byte(0xDD);
3979  emit_operand32(rsp, src);
3980}
3981
3982void Assembler::fsin() {
3983  emit_byte(0xD9);
3984  emit_byte(0xFE);
3985}
3986
3987void Assembler::fsqrt() {
3988  emit_byte(0xD9);
3989  emit_byte(0xFA);
3990}
3991
3992void Assembler::fst_d(Address adr) {
3993  InstructionMark im(this);
3994  emit_byte(0xDD);
3995  emit_operand32(rdx, adr);
3996}
3997
3998void Assembler::fst_s(Address adr) {
3999  InstructionMark im(this);
4000  emit_byte(0xD9);
4001  emit_operand32(rdx, adr);
4002}
4003
4004void Assembler::fstp_d(Address adr) {
4005  InstructionMark im(this);
4006  emit_byte(0xDD);
4007  emit_operand32(rbx, adr);
4008}
4009
4010void Assembler::fstp_d(int index) {
4011  emit_farith(0xDD, 0xD8, index);
4012}
4013
4014void Assembler::fstp_s(Address adr) {
4015  InstructionMark im(this);
4016  emit_byte(0xD9);
4017  emit_operand32(rbx, adr);
4018}
4019
4020void Assembler::fstp_x(Address adr) {
4021  InstructionMark im(this);
4022  emit_byte(0xDB);
4023  emit_operand32(rdi, adr);
4024}
4025
4026void Assembler::fsub(int i) {
4027  emit_farith(0xD8, 0xE0, i);
4028}
4029
4030void Assembler::fsub_d(Address src) {
4031  InstructionMark im(this);
4032  emit_byte(0xDC);
4033  emit_operand32(rsp, src);
4034}
4035
4036void Assembler::fsub_s(Address src) {
4037  InstructionMark im(this);
4038  emit_byte(0xD8);
4039  emit_operand32(rsp, src);
4040}
4041
4042void Assembler::fsuba(int i) {
4043  emit_farith(0xDC, 0xE8, i);
4044}
4045
4046void Assembler::fsubp(int i) {
4047  emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
4048}
4049
4050void Assembler::fsubr(int i) {
4051  emit_farith(0xD8, 0xE8, i);
4052}
4053
4054void Assembler::fsubr_d(Address src) {
4055  InstructionMark im(this);
4056  emit_byte(0xDC);
4057  emit_operand32(rbp, src);
4058}
4059
4060void Assembler::fsubr_s(Address src) {
4061  InstructionMark im(this);
4062  emit_byte(0xD8);
4063  emit_operand32(rbp, src);
4064}
4065
4066void Assembler::fsubra(int i) {
4067  emit_farith(0xDC, 0xE0, i);
4068}
4069
4070void Assembler::fsubrp(int i) {
4071  emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
4072}
4073
4074void Assembler::ftan() {
4075  emit_byte(0xD9);
4076  emit_byte(0xF2);
4077  emit_byte(0xDD);
4078  emit_byte(0xD8);
4079}
4080
4081void Assembler::ftst() {
4082  emit_byte(0xD9);
4083  emit_byte(0xE4);
4084}
4085
4086void Assembler::fucomi(int i) {
4087  // make sure the instruction is supported (introduced for P6, together with cmov)
4088  guarantee(VM_Version::supports_cmov(), "illegal instruction");
4089  emit_farith(0xDB, 0xE8, i);
4090}
4091
4092void Assembler::fucomip(int i) {
4093  // make sure the instruction is supported (introduced for P6, together with cmov)
4094  guarantee(VM_Version::supports_cmov(), "illegal instruction");
4095  emit_farith(0xDF, 0xE8, i);
4096}
4097
4098void Assembler::fwait() {
4099  emit_byte(0x9B);
4100}
4101
4102void Assembler::fxch(int i) {
4103  emit_farith(0xD9, 0xC8, i);
4104}
4105
4106void Assembler::fyl2x() {
4107  emit_byte(0xD9);
4108  emit_byte(0xF1);
4109}
4110
4111void Assembler::frndint() {
4112  emit_byte(0xD9);
4113  emit_byte(0xFC);
4114}
4115
4116void Assembler::f2xm1() {
4117  emit_byte(0xD9);
4118  emit_byte(0xF0);
4119}
4120
4121void Assembler::fldl2e() {
4122  emit_byte(0xD9);
4123  emit_byte(0xEA);
4124}
4125
4126// SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
4127static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
4128// SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
4129static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
4130
4131// Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
4132void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
4133  if (pre > 0) {
4134    emit_byte(simd_pre[pre]);
4135  }
4136  if (rex_w) {
4137    prefixq(adr, xreg);
4138  } else {
4139    prefix(adr, xreg);
4140  }
4141  if (opc > 0) {
4142    emit_byte(0x0F);
4143    int opc2 = simd_opc[opc];
4144    if (opc2 > 0) {
4145      emit_byte(opc2);
4146    }
4147  }
4148}
4149
4150int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
4151  if (pre > 0) {
4152    emit_byte(simd_pre[pre]);
4153  }
4154  int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
4155                          prefix_and_encode(dst_enc, src_enc);
4156  if (opc > 0) {
4157    emit_byte(0x0F);
4158    int opc2 = simd_opc[opc];
4159    if (opc2 > 0) {
4160      emit_byte(opc2);
4161    }
4162  }
4163  return encode;
4164}
4165
4166
4167void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) {
4168  if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
4169    prefix(VEX_3bytes);
4170
4171    int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
4172    byte1 = (~byte1) & 0xE0;
4173    byte1 |= opc;
4174    a_byte(byte1);
4175
4176    int byte2 = ((~nds_enc) & 0xf) << 3;
4177    byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
4178    emit_byte(byte2);
4179  } else {
4180    prefix(VEX_2bytes);
4181
4182    int byte1 = vex_r ? VEX_R : 0;
4183    byte1 = (~byte1) & 0x80;
4184    byte1 |= ((~nds_enc) & 0xf) << 3;
4185    byte1 |= (vector256 ? 4 : 0) | pre;
4186    emit_byte(byte1);
4187  }
4188}
4189
4190void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){
4191  bool vex_r = (xreg_enc >= 8);
4192  bool vex_b = adr.base_needs_rex();
4193  bool vex_x = adr.index_needs_rex();
4194  vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
4195}
4196
4197int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) {
4198  bool vex_r = (dst_enc >= 8);
4199  bool vex_b = (src_enc >= 8);
4200  bool vex_x = false;
4201  vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
4202  return (((dst_enc & 7) << 3) | (src_enc & 7));
4203}
4204
4205
4206void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
4207  if (UseAVX > 0) {
4208    int xreg_enc = xreg->encoding();
4209    int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
4210    vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256);
4211  } else {
4212    assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
4213    rex_prefix(adr, xreg, pre, opc, rex_w);
4214  }
4215}
4216
4217int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
4218  int dst_enc = dst->encoding();
4219  int src_enc = src->encoding();
4220  if (UseAVX > 0) {
4221    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4222    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256);
4223  } else {
4224    assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
4225    return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
4226  }
4227}
4228
4229void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
4230  InstructionMark im(this);
4231  simd_prefix(dst, dst, src, pre);
4232  emit_byte(opcode);
4233  emit_operand(dst, src);
4234}
4235
4236void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
4237  int encode = simd_prefix_and_encode(dst, dst, src, pre);
4238  emit_byte(opcode);
4239  emit_byte(0xC0 | encode);
4240}
4241
4242// Versions with no second source register (non-destructive source).
4243void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
4244  InstructionMark im(this);
4245  simd_prefix(dst, xnoreg, src, pre);
4246  emit_byte(opcode);
4247  emit_operand(dst, src);
4248}
4249
4250void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
4251  int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);
4252  emit_byte(opcode);
4253  emit_byte(0xC0 | encode);
4254}
4255
4256// 3-operands AVX instructions
4257void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
4258                               Address src, VexSimdPrefix pre, bool vector256) {
4259  InstructionMark im(this);
4260  vex_prefix(dst, nds, src, pre, vector256);
4261  emit_byte(opcode);
4262  emit_operand(dst, src);
4263}
4264
4265void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
4266                               XMMRegister src, VexSimdPrefix pre, bool vector256) {
4267  int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);
4268  emit_byte(opcode);
4269  emit_byte(0xC0 | encode);
4270}
4271
4272#ifndef _LP64
4273
4274void Assembler::incl(Register dst) {
4275  // Don't use it directly. Use MacroAssembler::incrementl() instead.
4276  emit_byte(0x40 | dst->encoding());
4277}
4278
4279void Assembler::lea(Register dst, Address src) {
4280  leal(dst, src);
4281}
4282
4283void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
4284  InstructionMark im(this);
4285  emit_byte(0xC7);
4286  emit_operand(rax, dst);
4287  emit_data((int)imm32, rspec, 0);
4288}
4289
4290void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
4291  InstructionMark im(this);
4292  int encode = prefix_and_encode(dst->encoding());
4293  emit_byte(0xB8 | encode);
4294  emit_data((int)imm32, rspec, 0);
4295}
4296
4297void Assembler::popa() { // 32bit
4298  emit_byte(0x61);
4299}
4300
4301void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
4302  InstructionMark im(this);
4303  emit_byte(0x68);
4304  emit_data(imm32, rspec, 0);
4305}
4306
4307void Assembler::pusha() { // 32bit
4308  emit_byte(0x60);
4309}
4310
4311void Assembler::set_byte_if_not_zero(Register dst) {
4312  emit_byte(0x0F);
4313  emit_byte(0x95);
4314  emit_byte(0xE0 | dst->encoding());
4315}
4316
4317void Assembler::shldl(Register dst, Register src) {
4318  emit_byte(0x0F);
4319  emit_byte(0xA5);
4320  emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
4321}
4322
4323void Assembler::shrdl(Register dst, Register src) {
4324  emit_byte(0x0F);
4325  emit_byte(0xAD);
4326  emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
4327}
4328
4329#else // LP64
4330
4331void Assembler::set_byte_if_not_zero(Register dst) {
4332  int enc = prefix_and_encode(dst->encoding(), true);
4333  emit_byte(0x0F);
4334  emit_byte(0x95);
4335  emit_byte(0xE0 | enc);
4336}
4337
4338// 64bit only pieces of the assembler
4339// This should only be used by 64bit instructions that can use rip-relative
4340// it cannot be used by instructions that want an immediate value.
4341
4342bool Assembler::reachable(AddressLiteral adr) {
4343  int64_t disp;
4344  // None will force a 64bit literal to the code stream. Likely a placeholder
4345  // for something that will be patched later and we need to certain it will
4346  // always be reachable.
4347  if (adr.reloc() == relocInfo::none) {
4348    return false;
4349  }
4350  if (adr.reloc() == relocInfo::internal_word_type) {
4351    // This should be rip relative and easily reachable.
4352    return true;
4353  }
4354  if (adr.reloc() == relocInfo::virtual_call_type ||
4355      adr.reloc() == relocInfo::opt_virtual_call_type ||
4356      adr.reloc() == relocInfo::static_call_type ||
4357      adr.reloc() == relocInfo::static_stub_type ) {
4358    // This should be rip relative within the code cache and easily
4359    // reachable until we get huge code caches. (At which point
4360    // ic code is going to have issues).
4361    return true;
4362  }
4363  if (adr.reloc() != relocInfo::external_word_type &&
4364      adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
4365      adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
4366      adr.reloc() != relocInfo::runtime_call_type ) {
4367    return false;
4368  }
4369
4370  // Stress the correction code
4371  if (ForceUnreachable) {
4372    // Must be runtimecall reloc, see if it is in the codecache
4373    // Flipping stuff in the codecache to be unreachable causes issues
4374    // with things like inline caches where the additional instructions
4375    // are not handled.
4376    if (CodeCache::find_blob(adr._target) == NULL) {
4377      return false;
4378    }
4379  }
4380  // For external_word_type/runtime_call_type if it is reachable from where we
4381  // are now (possibly a temp buffer) and where we might end up
4382  // anywhere in the codeCache then we are always reachable.
4383  // This would have to change if we ever save/restore shared code
4384  // to be more pessimistic.
4385  disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
4386  if (!is_simm32(disp)) return false;
4387  disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
4388  if (!is_simm32(disp)) return false;
4389
4390  disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
4391
4392  // Because rip relative is a disp + address_of_next_instruction and we
4393  // don't know the value of address_of_next_instruction we apply a fudge factor
4394  // to make sure we will be ok no matter the size of the instruction we get placed into.
4395  // We don't have to fudge the checks above here because they are already worst case.
4396
4397  // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
4398  // + 4 because better safe than sorry.
4399  const int fudge = 12 + 4;
4400  if (disp < 0) {
4401    disp -= fudge;
4402  } else {
4403    disp += fudge;
4404  }
4405  return is_simm32(disp);
4406}
4407
4408// Check if the polling page is not reachable from the code cache using rip-relative
4409// addressing.
4410bool Assembler::is_polling_page_far() {
4411  intptr_t addr = (intptr_t)os::get_polling_page();
4412  return ForceUnreachable ||
4413         !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
4414         !is_simm32(addr - (intptr_t)CodeCache::high_bound());
4415}
4416
4417void Assembler::emit_data64(jlong data,
4418                            relocInfo::relocType rtype,
4419                            int format) {
4420  if (rtype == relocInfo::none) {
4421    emit_int64(data);
4422  } else {
4423    emit_data64(data, Relocation::spec_simple(rtype), format);
4424  }
4425}
4426
4427void Assembler::emit_data64(jlong data,
4428                            RelocationHolder const& rspec,
4429                            int format) {
4430  assert(imm_operand == 0, "default format must be immediate in this file");
4431  assert(imm_operand == format, "must be immediate");
4432  assert(inst_mark() != NULL, "must be inside InstructionMark");
4433  // Do not use AbstractAssembler::relocate, which is not intended for
4434  // embedded words.  Instead, relocate to the enclosing instruction.
4435  code_section()->relocate(inst_mark(), rspec, format);
4436#ifdef ASSERT
4437  check_relocation(rspec, format);
4438#endif
4439  emit_int64(data);
4440}
4441
4442int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
4443  if (reg_enc >= 8) {
4444    prefix(REX_B);
4445    reg_enc -= 8;
4446  } else if (byteinst && reg_enc >= 4) {
4447    prefix(REX);
4448  }
4449  return reg_enc;
4450}
4451
4452int Assembler::prefixq_and_encode(int reg_enc) {
4453  if (reg_enc < 8) {
4454    prefix(REX_W);
4455  } else {
4456    prefix(REX_WB);
4457    reg_enc -= 8;
4458  }
4459  return reg_enc;
4460}
4461
4462int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
4463  if (dst_enc < 8) {
4464    if (src_enc >= 8) {
4465      prefix(REX_B);
4466      src_enc -= 8;
4467    } else if (byteinst && src_enc >= 4) {
4468      prefix(REX);
4469    }
4470  } else {
4471    if (src_enc < 8) {
4472      prefix(REX_R);
4473    } else {
4474      prefix(REX_RB);
4475      src_enc -= 8;
4476    }
4477    dst_enc -= 8;
4478  }
4479  return dst_enc << 3 | src_enc;
4480}
4481
4482int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
4483  if (dst_enc < 8) {
4484    if (src_enc < 8) {
4485      prefix(REX_W);
4486    } else {
4487      prefix(REX_WB);
4488      src_enc -= 8;
4489    }
4490  } else {
4491    if (src_enc < 8) {
4492      prefix(REX_WR);
4493    } else {
4494      prefix(REX_WRB);
4495      src_enc -= 8;
4496    }
4497    dst_enc -= 8;
4498  }
4499  return dst_enc << 3 | src_enc;
4500}
4501
4502void Assembler::prefix(Register reg) {
4503  if (reg->encoding() >= 8) {
4504    prefix(REX_B);
4505  }
4506}
4507
4508void Assembler::prefix(Address adr) {
4509  if (adr.base_needs_rex()) {
4510    if (adr.index_needs_rex()) {
4511      prefix(REX_XB);
4512    } else {
4513      prefix(REX_B);
4514    }
4515  } else {
4516    if (adr.index_needs_rex()) {
4517      prefix(REX_X);
4518    }
4519  }
4520}
4521
4522void Assembler::prefixq(Address adr) {
4523  if (adr.base_needs_rex()) {
4524    if (adr.index_needs_rex()) {
4525      prefix(REX_WXB);
4526    } else {
4527      prefix(REX_WB);
4528    }
4529  } else {
4530    if (adr.index_needs_rex()) {
4531      prefix(REX_WX);
4532    } else {
4533      prefix(REX_W);
4534    }
4535  }
4536}
4537
4538
4539void Assembler::prefix(Address adr, Register reg, bool byteinst) {
4540  if (reg->encoding() < 8) {
4541    if (adr.base_needs_rex()) {
4542      if (adr.index_needs_rex()) {
4543        prefix(REX_XB);
4544      } else {
4545        prefix(REX_B);
4546      }
4547    } else {
4548      if (adr.index_needs_rex()) {
4549        prefix(REX_X);
4550      } else if (byteinst && reg->encoding() >= 4 ) {
4551        prefix(REX);
4552      }
4553    }
4554  } else {
4555    if (adr.base_needs_rex()) {
4556      if (adr.index_needs_rex()) {
4557        prefix(REX_RXB);
4558      } else {
4559        prefix(REX_RB);
4560      }
4561    } else {
4562      if (adr.index_needs_rex()) {
4563        prefix(REX_RX);
4564      } else {
4565        prefix(REX_R);
4566      }
4567    }
4568  }
4569}
4570
4571void Assembler::prefixq(Address adr, Register src) {
4572  if (src->encoding() < 8) {
4573    if (adr.base_needs_rex()) {
4574      if (adr.index_needs_rex()) {
4575        prefix(REX_WXB);
4576      } else {
4577        prefix(REX_WB);
4578      }
4579    } else {
4580      if (adr.index_needs_rex()) {
4581        prefix(REX_WX);
4582      } else {
4583        prefix(REX_W);
4584      }
4585    }
4586  } else {
4587    if (adr.base_needs_rex()) {
4588      if (adr.index_needs_rex()) {
4589        prefix(REX_WRXB);
4590      } else {
4591        prefix(REX_WRB);
4592      }
4593    } else {
4594      if (adr.index_needs_rex()) {
4595        prefix(REX_WRX);
4596      } else {
4597        prefix(REX_WR);
4598      }
4599    }
4600  }
4601}
4602
4603void Assembler::prefix(Address adr, XMMRegister reg) {
4604  if (reg->encoding() < 8) {
4605    if (adr.base_needs_rex()) {
4606      if (adr.index_needs_rex()) {
4607        prefix(REX_XB);
4608      } else {
4609        prefix(REX_B);
4610      }
4611    } else {
4612      if (adr.index_needs_rex()) {
4613        prefix(REX_X);
4614      }
4615    }
4616  } else {
4617    if (adr.base_needs_rex()) {
4618      if (adr.index_needs_rex()) {
4619        prefix(REX_RXB);
4620      } else {
4621        prefix(REX_RB);
4622      }
4623    } else {
4624      if (adr.index_needs_rex()) {
4625        prefix(REX_RX);
4626      } else {
4627        prefix(REX_R);
4628      }
4629    }
4630  }
4631}
4632
4633void Assembler::prefixq(Address adr, XMMRegister src) {
4634  if (src->encoding() < 8) {
4635    if (adr.base_needs_rex()) {
4636      if (adr.index_needs_rex()) {
4637        prefix(REX_WXB);
4638      } else {
4639        prefix(REX_WB);
4640      }
4641    } else {
4642      if (adr.index_needs_rex()) {
4643        prefix(REX_WX);
4644      } else {
4645        prefix(REX_W);
4646      }
4647    }
4648  } else {
4649    if (adr.base_needs_rex()) {
4650      if (adr.index_needs_rex()) {
4651        prefix(REX_WRXB);
4652      } else {
4653        prefix(REX_WRB);
4654      }
4655    } else {
4656      if (adr.index_needs_rex()) {
4657        prefix(REX_WRX);
4658      } else {
4659        prefix(REX_WR);
4660      }
4661    }
4662  }
4663}
4664
4665void Assembler::adcq(Register dst, int32_t imm32) {
4666  (void) prefixq_and_encode(dst->encoding());
4667  emit_arith(0x81, 0xD0, dst, imm32);
4668}
4669
4670void Assembler::adcq(Register dst, Address src) {
4671  InstructionMark im(this);
4672  prefixq(src, dst);
4673  emit_byte(0x13);
4674  emit_operand(dst, src);
4675}
4676
4677void Assembler::adcq(Register dst, Register src) {
4678  (int) prefixq_and_encode(dst->encoding(), src->encoding());
4679  emit_arith(0x13, 0xC0, dst, src);
4680}
4681
4682void Assembler::addq(Address dst, int32_t imm32) {
4683  InstructionMark im(this);
4684  prefixq(dst);
4685  emit_arith_operand(0x81, rax, dst,imm32);
4686}
4687
4688void Assembler::addq(Address dst, Register src) {
4689  InstructionMark im(this);
4690  prefixq(dst, src);
4691  emit_byte(0x01);
4692  emit_operand(src, dst);
4693}
4694
4695void Assembler::addq(Register dst, int32_t imm32) {
4696  (void) prefixq_and_encode(dst->encoding());
4697  emit_arith(0x81, 0xC0, dst, imm32);
4698}
4699
4700void Assembler::addq(Register dst, Address src) {
4701  InstructionMark im(this);
4702  prefixq(src, dst);
4703  emit_byte(0x03);
4704  emit_operand(dst, src);
4705}
4706
4707void Assembler::addq(Register dst, Register src) {
4708  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4709  emit_arith(0x03, 0xC0, dst, src);
4710}
4711
4712void Assembler::andq(Address dst, int32_t imm32) {
4713  InstructionMark im(this);
4714  prefixq(dst);
4715  emit_byte(0x81);
4716  emit_operand(rsp, dst, 4);
4717  emit_long(imm32);
4718}
4719
4720void Assembler::andq(Register dst, int32_t imm32) {
4721  (void) prefixq_and_encode(dst->encoding());
4722  emit_arith(0x81, 0xE0, dst, imm32);
4723}
4724
4725void Assembler::andq(Register dst, Address src) {
4726  InstructionMark im(this);
4727  prefixq(src, dst);
4728  emit_byte(0x23);
4729  emit_operand(dst, src);
4730}
4731
4732void Assembler::andq(Register dst, Register src) {
4733  (int) prefixq_and_encode(dst->encoding(), src->encoding());
4734  emit_arith(0x23, 0xC0, dst, src);
4735}
4736
4737void Assembler::bsfq(Register dst, Register src) {
4738  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4739  emit_byte(0x0F);
4740  emit_byte(0xBC);
4741  emit_byte(0xC0 | encode);
4742}
4743
4744void Assembler::bsrq(Register dst, Register src) {
4745  assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
4746  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4747  emit_byte(0x0F);
4748  emit_byte(0xBD);
4749  emit_byte(0xC0 | encode);
4750}
4751
4752void Assembler::bswapq(Register reg) {
4753  int encode = prefixq_and_encode(reg->encoding());
4754  emit_byte(0x0F);
4755  emit_byte(0xC8 | encode);
4756}
4757
4758void Assembler::cdqq() {
4759  prefix(REX_W);
4760  emit_byte(0x99);
4761}
4762
4763void Assembler::clflush(Address adr) {
4764  prefix(adr);
4765  emit_byte(0x0F);
4766  emit_byte(0xAE);
4767  emit_operand(rdi, adr);
4768}
4769
4770void Assembler::cmovq(Condition cc, Register dst, Register src) {
4771  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4772  emit_byte(0x0F);
4773  emit_byte(0x40 | cc);
4774  emit_byte(0xC0 | encode);
4775}
4776
4777void Assembler::cmovq(Condition cc, Register dst, Address src) {
4778  InstructionMark im(this);
4779  prefixq(src, dst);
4780  emit_byte(0x0F);
4781  emit_byte(0x40 | cc);
4782  emit_operand(dst, src);
4783}
4784
4785void Assembler::cmpq(Address dst, int32_t imm32) {
4786  InstructionMark im(this);
4787  prefixq(dst);
4788  emit_byte(0x81);
4789  emit_operand(rdi, dst, 4);
4790  emit_long(imm32);
4791}
4792
4793void Assembler::cmpq(Register dst, int32_t imm32) {
4794  (void) prefixq_and_encode(dst->encoding());
4795  emit_arith(0x81, 0xF8, dst, imm32);
4796}
4797
4798void Assembler::cmpq(Address dst, Register src) {
4799  InstructionMark im(this);
4800  prefixq(dst, src);
4801  emit_byte(0x3B);
4802  emit_operand(src, dst);
4803}
4804
4805void Assembler::cmpq(Register dst, Register src) {
4806  (void) prefixq_and_encode(dst->encoding(), src->encoding());
4807  emit_arith(0x3B, 0xC0, dst, src);
4808}
4809
4810void Assembler::cmpq(Register dst, Address  src) {
4811  InstructionMark im(this);
4812  prefixq(src, dst);
4813  emit_byte(0x3B);
4814  emit_operand(dst, src);
4815}
4816
4817void Assembler::cmpxchgq(Register reg, Address adr) {
4818  InstructionMark im(this);
4819  prefixq(adr, reg);
4820  emit_byte(0x0F);
4821  emit_byte(0xB1);
4822  emit_operand(reg, adr);
4823}
4824
4825void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
4826  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4827  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
4828  emit_byte(0x2A);
4829  emit_byte(0xC0 | encode);
4830}
4831
4832void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
4833  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4834  InstructionMark im(this);
4835  simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
4836  emit_byte(0x2A);
4837  emit_operand(dst, src);
4838}
4839
4840void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
4841  NOT_LP64(assert(VM_Version::supports_sse(), ""));
4842  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
4843  emit_byte(0x2A);
4844  emit_byte(0xC0 | encode);
4845}
4846
4847void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
4848  NOT_LP64(assert(VM_Version::supports_sse(), ""));
4849  InstructionMark im(this);
4850  simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
4851  emit_byte(0x2A);
4852  emit_operand(dst, src);
4853}
4854
4855void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
4856  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4857  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
4858  emit_byte(0x2C);
4859  emit_byte(0xC0 | encode);
4860}
4861
4862void Assembler::cvttss2siq(Register dst, XMMRegister src) {
4863  NOT_LP64(assert(VM_Version::supports_sse(), ""));
4864  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
4865  emit_byte(0x2C);
4866  emit_byte(0xC0 | encode);
4867}
4868
4869void Assembler::decl(Register dst) {
4870  // Don't use it directly. Use MacroAssembler::decrementl() instead.
4871  // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
4872  int encode = prefix_and_encode(dst->encoding());
4873  emit_byte(0xFF);
4874  emit_byte(0xC8 | encode);
4875}
4876
4877void Assembler::decq(Register dst) {
4878  // Don't use it directly. Use MacroAssembler::decrementq() instead.
4879  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4880  int encode = prefixq_and_encode(dst->encoding());
4881  emit_byte(0xFF);
4882  emit_byte(0xC8 | encode);
4883}
4884
4885void Assembler::decq(Address dst) {
4886  // Don't use it directly. Use MacroAssembler::decrementq() instead.
4887  InstructionMark im(this);
4888  prefixq(dst);
4889  emit_byte(0xFF);
4890  emit_operand(rcx, dst);
4891}
4892
4893void Assembler::fxrstor(Address src) {
4894  prefixq(src);
4895  emit_byte(0x0F);
4896  emit_byte(0xAE);
4897  emit_operand(as_Register(1), src);
4898}
4899
4900void Assembler::fxsave(Address dst) {
4901  prefixq(dst);
4902  emit_byte(0x0F);
4903  emit_byte(0xAE);
4904  emit_operand(as_Register(0), dst);
4905}
4906
4907void Assembler::idivq(Register src) {
4908  int encode = prefixq_and_encode(src->encoding());
4909  emit_byte(0xF7);
4910  emit_byte(0xF8 | encode);
4911}
4912
4913void Assembler::imulq(Register dst, Register src) {
4914  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4915  emit_byte(0x0F);
4916  emit_byte(0xAF);
4917  emit_byte(0xC0 | encode);
4918}
4919
4920void Assembler::imulq(Register dst, Register src, int value) {
4921  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4922  if (is8bit(value)) {
4923    emit_byte(0x6B);
4924    emit_byte(0xC0 | encode);
4925    emit_byte(value & 0xFF);
4926  } else {
4927    emit_byte(0x69);
4928    emit_byte(0xC0 | encode);
4929    emit_long(value);
4930  }
4931}
4932
4933void Assembler::incl(Register dst) {
4934  // Don't use it directly. Use MacroAssembler::incrementl() instead.
4935  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4936  int encode = prefix_and_encode(dst->encoding());
4937  emit_byte(0xFF);
4938  emit_byte(0xC0 | encode);
4939}
4940
4941void Assembler::incq(Register dst) {
4942  // Don't use it directly. Use MacroAssembler::incrementq() instead.
4943  // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4944  int encode = prefixq_and_encode(dst->encoding());
4945  emit_byte(0xFF);
4946  emit_byte(0xC0 | encode);
4947}
4948
4949void Assembler::incq(Address dst) {
4950  // Don't use it directly. Use MacroAssembler::incrementq() instead.
4951  InstructionMark im(this);
4952  prefixq(dst);
4953  emit_byte(0xFF);
4954  emit_operand(rax, dst);
4955}
4956
4957void Assembler::lea(Register dst, Address src) {
4958  leaq(dst, src);
4959}
4960
4961void Assembler::leaq(Register dst, Address src) {
4962  InstructionMark im(this);
4963  prefixq(src, dst);
4964  emit_byte(0x8D);
4965  emit_operand(dst, src);
4966}
4967
4968void Assembler::mov64(Register dst, int64_t imm64) {
4969  InstructionMark im(this);
4970  int encode = prefixq_and_encode(dst->encoding());
4971  emit_byte(0xB8 | encode);
4972  emit_int64(imm64);
4973}
4974
4975void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
4976  InstructionMark im(this);
4977  int encode = prefixq_and_encode(dst->encoding());
4978  emit_byte(0xB8 | encode);
4979  emit_data64(imm64, rspec);
4980}
4981
4982void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
4983  InstructionMark im(this);
4984  int encode = prefix_and_encode(dst->encoding());
4985  emit_byte(0xB8 | encode);
4986  emit_data((int)imm32, rspec, narrow_oop_operand);
4987}
4988
4989void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
4990  InstructionMark im(this);
4991  prefix(dst);
4992  emit_byte(0xC7);
4993  emit_operand(rax, dst, 4);
4994  emit_data((int)imm32, rspec, narrow_oop_operand);
4995}
4996
4997void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
4998  InstructionMark im(this);
4999  int encode = prefix_and_encode(src1->encoding());
5000  emit_byte(0x81);
5001  emit_byte(0xF8 | encode);
5002  emit_data((int)imm32, rspec, narrow_oop_operand);
5003}
5004
5005void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
5006  InstructionMark im(this);
5007  prefix(src1);
5008  emit_byte(0x81);
5009  emit_operand(rax, src1, 4);
5010  emit_data((int)imm32, rspec, narrow_oop_operand);
5011}
5012
5013void Assembler::lzcntq(Register dst, Register src) {
5014  assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
5015  emit_byte(0xF3);
5016  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5017  emit_byte(0x0F);
5018  emit_byte(0xBD);
5019  emit_byte(0xC0 | encode);
5020}
5021
5022void Assembler::movdq(XMMRegister dst, Register src) {
5023  // table D-1 says MMX/SSE2
5024  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5025  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
5026  emit_byte(0x6E);
5027  emit_byte(0xC0 | encode);
5028}
5029
5030void Assembler::movdq(Register dst, XMMRegister src) {
5031  // table D-1 says MMX/SSE2
5032  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5033  // swap src/dst to get correct prefix
5034  int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
5035  emit_byte(0x7E);
5036  emit_byte(0xC0 | encode);
5037}
5038
5039void Assembler::movq(Register dst, Register src) {
5040  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5041  emit_byte(0x8B);
5042  emit_byte(0xC0 | encode);
5043}
5044
5045void Assembler::movq(Register dst, Address src) {
5046  InstructionMark im(this);
5047  prefixq(src, dst);
5048  emit_byte(0x8B);
5049  emit_operand(dst, src);
5050}
5051
5052void Assembler::movq(Address dst, Register src) {
5053  InstructionMark im(this);
5054  prefixq(dst, src);
5055  emit_byte(0x89);
5056  emit_operand(src, dst);
5057}
5058
5059void Assembler::movsbq(Register dst, Address src) {
5060  InstructionMark im(this);
5061  prefixq(src, dst);
5062  emit_byte(0x0F);
5063  emit_byte(0xBE);
5064  emit_operand(dst, src);
5065}
5066
5067void Assembler::movsbq(Register dst, Register src) {
5068  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5069  emit_byte(0x0F);
5070  emit_byte(0xBE);
5071  emit_byte(0xC0 | encode);
5072}
5073
5074void Assembler::movslq(Register dst, int32_t imm32) {
5075  // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
5076  // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
5077  // as a result we shouldn't use until tested at runtime...
5078  ShouldNotReachHere();
5079  InstructionMark im(this);
5080  int encode = prefixq_and_encode(dst->encoding());
5081  emit_byte(0xC7 | encode);
5082  emit_long(imm32);
5083}
5084
5085void Assembler::movslq(Address dst, int32_t imm32) {
5086  assert(is_simm32(imm32), "lost bits");
5087  InstructionMark im(this);
5088  prefixq(dst);
5089  emit_byte(0xC7);
5090  emit_operand(rax, dst, 4);
5091  emit_long(imm32);
5092}
5093
5094void Assembler::movslq(Register dst, Address src) {
5095  InstructionMark im(this);
5096  prefixq(src, dst);
5097  emit_byte(0x63);
5098  emit_operand(dst, src);
5099}
5100
5101void Assembler::movslq(Register dst, Register src) {
5102  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5103  emit_byte(0x63);
5104  emit_byte(0xC0 | encode);
5105}
5106
5107void Assembler::movswq(Register dst, Address src) {
5108  InstructionMark im(this);
5109  prefixq(src, dst);
5110  emit_byte(0x0F);
5111  emit_byte(0xBF);
5112  emit_operand(dst, src);
5113}
5114
5115void Assembler::movswq(Register dst, Register src) {
5116  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5117  emit_byte(0x0F);
5118  emit_byte(0xBF);
5119  emit_byte(0xC0 | encode);
5120}
5121
5122void Assembler::movzbq(Register dst, Address src) {
5123  InstructionMark im(this);
5124  prefixq(src, dst);
5125  emit_byte(0x0F);
5126  emit_byte(0xB6);
5127  emit_operand(dst, src);
5128}
5129
5130void Assembler::movzbq(Register dst, Register src) {
5131  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5132  emit_byte(0x0F);
5133  emit_byte(0xB6);
5134  emit_byte(0xC0 | encode);
5135}
5136
5137void Assembler::movzwq(Register dst, Address src) {
5138  InstructionMark im(this);
5139  prefixq(src, dst);
5140  emit_byte(0x0F);
5141  emit_byte(0xB7);
5142  emit_operand(dst, src);
5143}
5144
5145void Assembler::movzwq(Register dst, Register src) {
5146  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5147  emit_byte(0x0F);
5148  emit_byte(0xB7);
5149  emit_byte(0xC0 | encode);
5150}
5151
5152void Assembler::negq(Register dst) {
5153  int encode = prefixq_and_encode(dst->encoding());
5154  emit_byte(0xF7);
5155  emit_byte(0xD8 | encode);
5156}
5157
5158void Assembler::notq(Register dst) {
5159  int encode = prefixq_and_encode(dst->encoding());
5160  emit_byte(0xF7);
5161  emit_byte(0xD0 | encode);
5162}
5163
5164void Assembler::orq(Address dst, int32_t imm32) {
5165  InstructionMark im(this);
5166  prefixq(dst);
5167  emit_byte(0x81);
5168  emit_operand(rcx, dst, 4);
5169  emit_long(imm32);
5170}
5171
5172void Assembler::orq(Register dst, int32_t imm32) {
5173  (void) prefixq_and_encode(dst->encoding());
5174  emit_arith(0x81, 0xC8, dst, imm32);
5175}
5176
5177void Assembler::orq(Register dst, Address src) {
5178  InstructionMark im(this);
5179  prefixq(src, dst);
5180  emit_byte(0x0B);
5181  emit_operand(dst, src);
5182}
5183
5184void Assembler::orq(Register dst, Register src) {
5185  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5186  emit_arith(0x0B, 0xC0, dst, src);
5187}
5188
5189void Assembler::popa() { // 64bit
5190  movq(r15, Address(rsp, 0));
5191  movq(r14, Address(rsp, wordSize));
5192  movq(r13, Address(rsp, 2 * wordSize));
5193  movq(r12, Address(rsp, 3 * wordSize));
5194  movq(r11, Address(rsp, 4 * wordSize));
5195  movq(r10, Address(rsp, 5 * wordSize));
5196  movq(r9,  Address(rsp, 6 * wordSize));
5197  movq(r8,  Address(rsp, 7 * wordSize));
5198  movq(rdi, Address(rsp, 8 * wordSize));
5199  movq(rsi, Address(rsp, 9 * wordSize));
5200  movq(rbp, Address(rsp, 10 * wordSize));
5201  // skip rsp
5202  movq(rbx, Address(rsp, 12 * wordSize));
5203  movq(rdx, Address(rsp, 13 * wordSize));
5204  movq(rcx, Address(rsp, 14 * wordSize));
5205  movq(rax, Address(rsp, 15 * wordSize));
5206
5207  addq(rsp, 16 * wordSize);
5208}
5209
5210void Assembler::popcntq(Register dst, Address src) {
5211  assert(VM_Version::supports_popcnt(), "must support");
5212  InstructionMark im(this);
5213  emit_byte(0xF3);
5214  prefixq(src, dst);
5215  emit_byte(0x0F);
5216  emit_byte(0xB8);
5217  emit_operand(dst, src);
5218}
5219
5220void Assembler::popcntq(Register dst, Register src) {
5221  assert(VM_Version::supports_popcnt(), "must support");
5222  emit_byte(0xF3);
5223  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5224  emit_byte(0x0F);
5225  emit_byte(0xB8);
5226  emit_byte(0xC0 | encode);
5227}
5228
5229void Assembler::popq(Address dst) {
5230  InstructionMark im(this);
5231  prefixq(dst);
5232  emit_byte(0x8F);
5233  emit_operand(rax, dst);
5234}
5235
5236void Assembler::pusha() { // 64bit
5237  // we have to store original rsp.  ABI says that 128 bytes
5238  // below rsp are local scratch.
5239  movq(Address(rsp, -5 * wordSize), rsp);
5240
5241  subq(rsp, 16 * wordSize);
5242
5243  movq(Address(rsp, 15 * wordSize), rax);
5244  movq(Address(rsp, 14 * wordSize), rcx);
5245  movq(Address(rsp, 13 * wordSize), rdx);
5246  movq(Address(rsp, 12 * wordSize), rbx);
5247  // skip rsp
5248  movq(Address(rsp, 10 * wordSize), rbp);
5249  movq(Address(rsp, 9 * wordSize), rsi);
5250  movq(Address(rsp, 8 * wordSize), rdi);
5251  movq(Address(rsp, 7 * wordSize), r8);
5252  movq(Address(rsp, 6 * wordSize), r9);
5253  movq(Address(rsp, 5 * wordSize), r10);
5254  movq(Address(rsp, 4 * wordSize), r11);
5255  movq(Address(rsp, 3 * wordSize), r12);
5256  movq(Address(rsp, 2 * wordSize), r13);
5257  movq(Address(rsp, wordSize), r14);
5258  movq(Address(rsp, 0), r15);
5259}
5260
5261void Assembler::pushq(Address src) {
5262  InstructionMark im(this);
5263  prefixq(src);
5264  emit_byte(0xFF);
5265  emit_operand(rsi, src);
5266}
5267
5268void Assembler::rclq(Register dst, int imm8) {
5269  assert(isShiftCount(imm8 >> 1), "illegal shift count");
5270  int encode = prefixq_and_encode(dst->encoding());
5271  if (imm8 == 1) {
5272    emit_byte(0xD1);
5273    emit_byte(0xD0 | encode);
5274  } else {
5275    emit_byte(0xC1);
5276    emit_byte(0xD0 | encode);
5277    emit_byte(imm8);
5278  }
5279}
5280void Assembler::sarq(Register dst, int imm8) {
5281  assert(isShiftCount(imm8 >> 1), "illegal shift count");
5282  int encode = prefixq_and_encode(dst->encoding());
5283  if (imm8 == 1) {
5284    emit_byte(0xD1);
5285    emit_byte(0xF8 | encode);
5286  } else {
5287    emit_byte(0xC1);
5288    emit_byte(0xF8 | encode);
5289    emit_byte(imm8);
5290  }
5291}
5292
5293void Assembler::sarq(Register dst) {
5294  int encode = prefixq_and_encode(dst->encoding());
5295  emit_byte(0xD3);
5296  emit_byte(0xF8 | encode);
5297}
5298
5299void Assembler::sbbq(Address dst, int32_t imm32) {
5300  InstructionMark im(this);
5301  prefixq(dst);
5302  emit_arith_operand(0x81, rbx, dst, imm32);
5303}
5304
5305void Assembler::sbbq(Register dst, int32_t imm32) {
5306  (void) prefixq_and_encode(dst->encoding());
5307  emit_arith(0x81, 0xD8, dst, imm32);
5308}
5309
5310void Assembler::sbbq(Register dst, Address src) {
5311  InstructionMark im(this);
5312  prefixq(src, dst);
5313  emit_byte(0x1B);
5314  emit_operand(dst, src);
5315}
5316
5317void Assembler::sbbq(Register dst, Register src) {
5318  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5319  emit_arith(0x1B, 0xC0, dst, src);
5320}
5321
5322void Assembler::shlq(Register dst, int imm8) {
5323  assert(isShiftCount(imm8 >> 1), "illegal shift count");
5324  int encode = prefixq_and_encode(dst->encoding());
5325  if (imm8 == 1) {
5326    emit_byte(0xD1);
5327    emit_byte(0xE0 | encode);
5328  } else {
5329    emit_byte(0xC1);
5330    emit_byte(0xE0 | encode);
5331    emit_byte(imm8);
5332  }
5333}
5334
5335void Assembler::shlq(Register dst) {
5336  int encode = prefixq_and_encode(dst->encoding());
5337  emit_byte(0xD3);
5338  emit_byte(0xE0 | encode);
5339}
5340
5341void Assembler::shrq(Register dst, int imm8) {
5342  assert(isShiftCount(imm8 >> 1), "illegal shift count");
5343  int encode = prefixq_and_encode(dst->encoding());
5344  emit_byte(0xC1);
5345  emit_byte(0xE8 | encode);
5346  emit_byte(imm8);
5347}
5348
5349void Assembler::shrq(Register dst) {
5350  int encode = prefixq_and_encode(dst->encoding());
5351  emit_byte(0xD3);
5352  emit_byte(0xE8 | encode);
5353}
5354
5355void Assembler::subq(Address dst, int32_t imm32) {
5356  InstructionMark im(this);
5357  prefixq(dst);
5358  emit_arith_operand(0x81, rbp, dst, imm32);
5359}
5360
5361void Assembler::subq(Address dst, Register src) {
5362  InstructionMark im(this);
5363  prefixq(dst, src);
5364  emit_byte(0x29);
5365  emit_operand(src, dst);
5366}
5367
5368void Assembler::subq(Register dst, int32_t imm32) {
5369  (void) prefixq_and_encode(dst->encoding());
5370  emit_arith(0x81, 0xE8, dst, imm32);
5371}
5372
5373// Force generation of a 4 byte immediate value even if it fits into 8bit
5374void Assembler::subq_imm32(Register dst, int32_t imm32) {
5375  (void) prefixq_and_encode(dst->encoding());
5376  emit_arith_imm32(0x81, 0xE8, dst, imm32);
5377}
5378
5379void Assembler::subq(Register dst, Address src) {
5380  InstructionMark im(this);
5381  prefixq(src, dst);
5382  emit_byte(0x2B);
5383  emit_operand(dst, src);
5384}
5385
5386void Assembler::subq(Register dst, Register src) {
5387  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5388  emit_arith(0x2B, 0xC0, dst, src);
5389}
5390
5391void Assembler::testq(Register dst, int32_t imm32) {
5392  // not using emit_arith because test
5393  // doesn't support sign-extension of
5394  // 8bit operands
5395  int encode = dst->encoding();
5396  if (encode == 0) {
5397    prefix(REX_W);
5398    emit_byte(0xA9);
5399  } else {
5400    encode = prefixq_and_encode(encode);
5401    emit_byte(0xF7);
5402    emit_byte(0xC0 | encode);
5403  }
5404  emit_long(imm32);
5405}
5406
5407void Assembler::testq(Register dst, Register src) {
5408  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5409  emit_arith(0x85, 0xC0, dst, src);
5410}
5411
5412void Assembler::xaddq(Address dst, Register src) {
5413  InstructionMark im(this);
5414  prefixq(dst, src);
5415  emit_byte(0x0F);
5416  emit_byte(0xC1);
5417  emit_operand(src, dst);
5418}
5419
5420void Assembler::xchgq(Register dst, Address src) {
5421  InstructionMark im(this);
5422  prefixq(src, dst);
5423  emit_byte(0x87);
5424  emit_operand(dst, src);
5425}
5426
5427void Assembler::xchgq(Register dst, Register src) {
5428  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5429  emit_byte(0x87);
5430  emit_byte(0xc0 | encode);
5431}
5432
5433void Assembler::xorq(Register dst, Register src) {
5434  (void) prefixq_and_encode(dst->encoding(), src->encoding());
5435  emit_arith(0x33, 0xC0, dst, src);
5436}
5437
5438void Assembler::xorq(Register dst, Address src) {
5439  InstructionMark im(this);
5440  prefixq(src, dst);
5441  emit_byte(0x33);
5442  emit_operand(dst, src);
5443}
5444
5445#endif // !LP64
5446